libavcodec/x86/cabac.h
Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
00003  *
00004  * This file is part of Libav.
00005  *
00006  * Libav is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * Libav is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with Libav; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00019  */
00020 
00021 #ifndef AVCODEC_X86_CABAC_H
00022 #define AVCODEC_X86_CABAC_H
00023 
00024 #include "libavcodec/cabac.h"
00025 #include "libavutil/attributes.h"
00026 #include "libavutil/x86_cpu.h"
00027 #include "config.h"
00028 
00029 #if HAVE_FAST_CMOV
00030 #define BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, lowword, range, tmp)\
00031         "mov    "tmp"       , %%ecx     \n\t"\
00032         "shl    $17         , "tmp"     \n\t"\
00033         "cmp    "low"       , "tmp"     \n\t"\
00034         "cmova  %%ecx       , "range"   \n\t"\
00035         "sbb    %%ecx       , %%ecx     \n\t"\
00036         "and    %%ecx       , "tmp"     \n\t"\
00037         "xor    %%ecx       , "ret"     \n\t"\
00038         "sub    "tmp"       , "low"     \n\t"
00039 #else /* HAVE_FAST_CMOV */
00040 #define BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, lowword, range, tmp)\
00041         "mov    "tmp"       , %%ecx     \n\t"\
00042         "shl    $17         , "tmp"     \n\t"\
00043         "sub    "low"       , "tmp"     \n\t"\
00044         "sar    $31         , "tmp"     \n\t" /*lps_mask*/\
00045         "sub    %%ecx       , "range"   \n\t" /*RangeLPS - range*/\
00046         "and    "tmp"       , "range"   \n\t" /*(RangeLPS - range)&lps_mask*/\
00047         "add    %%ecx       , "range"   \n\t" /*new range*/\
00048         "shl    $17         , %%ecx     \n\t"\
00049         "and    "tmp"       , %%ecx     \n\t"\
00050         "sub    %%ecx       , "low"     \n\t"\
00051         "xor    "tmp"       , "ret"     \n\t"
00052 #endif /* HAVE_FAST_CMOV */
00053 
00054 #define BRANCHLESS_GET_CABAC(ret, statep, low, lowword, range, tmp, tmpbyte, byte) \
00055         "movzbl "statep"    , "ret"                                     \n\t"\
00056         "mov    "range"     , "tmp"                                     \n\t"\
00057         "and    $0xC0       , "range"                                   \n\t"\
00058         "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\
00059         "sub    "range"     , "tmp"                                     \n\t"\
00060         BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, lowword, range, tmp)   \
00061         "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx          \n\t"\
00062         "shl    %%cl        , "range"                                   \n\t"\
00063         "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp"          \n\t"\
00064         "shl    %%cl        , "low"                                     \n\t"\
00065         "mov    "tmpbyte"   , "statep"                                  \n\t"\
00066         "test   "lowword"   , "lowword"                                 \n\t"\
00067         " jnz   1f                                                      \n\t"\
00068         "mov    "byte"      , %%"REG_c"                                 \n\t"\
00069         "add"OPSIZE" $2     , "byte"                                    \n\t"\
00070         "movzwl (%%"REG_c")     , "tmp"                                 \n\t"\
00071         "lea    -1("low")   , %%ecx                                     \n\t"\
00072         "xor    "low"       , %%ecx                                     \n\t"\
00073         "shr    $15         , %%ecx                                     \n\t"\
00074         "bswap  "tmp"                                                   \n\t"\
00075         "shr    $15         , "tmp"                                     \n\t"\
00076         "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx            \n\t"\
00077         "sub    $0xFFFF     , "tmp"                                     \n\t"\
00078         "neg    %%ecx                                                   \n\t"\
00079         "add    $7          , %%ecx                                     \n\t"\
00080         "shl    %%cl        , "tmp"                                     \n\t"\
00081         "add    "tmp"       , "low"                                     \n\t"\
00082         "1:                                                             \n\t"
00083 
00084 #if HAVE_7REGS && !defined(BROKEN_RELOCATIONS)
00085 #define get_cabac_inline get_cabac_inline_x86
00086 static av_always_inline int get_cabac_inline_x86(CABACContext *c,
00087                                                  uint8_t *const state)
00088 {
00089     int bit, tmp;
00090 
00091     __asm__ volatile(
00092         BRANCHLESS_GET_CABAC("%0", "(%5)", "%1", "%w1", "%2",
00093                              "%3", "%b3", "%4")
00094         :"=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp),
00095          "+m"(c->bytestream)
00096         :"r"(state)
00097         : "%"REG_c, "memory"
00098     );
00099     return bit & 1;
00100 }
00101 #endif /* HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */
00102 
00103 #define get_cabac_bypass_sign get_cabac_bypass_sign_x86
00104 static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
00105 {
00106     x86_reg tmp;
00107     __asm__ volatile(
00108         "movl %4, %k1                           \n\t"
00109         "movl %2, %%eax                         \n\t"
00110         "shl $17, %k1                           \n\t"
00111         "add %%eax, %%eax                       \n\t"
00112         "sub %k1, %%eax                         \n\t"
00113         "cltd                                   \n\t"
00114         "and %%edx, %k1                         \n\t"
00115         "add %k1, %%eax                         \n\t"
00116         "xor %%edx, %%ecx                       \n\t"
00117         "sub %%edx, %%ecx                       \n\t"
00118         "test %%ax, %%ax                        \n\t"
00119         " jnz 1f                                \n\t"
00120         "mov  %3, %1                            \n\t"
00121         "subl $0xFFFF, %%eax                    \n\t"
00122         "movzwl (%1), %%edx                     \n\t"
00123         "bswap %%edx                            \n\t"
00124         "shrl $15, %%edx                        \n\t"
00125         "add  $2, %1                            \n\t"
00126         "addl %%edx, %%eax                      \n\t"
00127         "mov  %1, %3                            \n\t"
00128         "1:                                     \n\t"
00129         "movl %%eax, %2                         \n\t"
00130 
00131         :"+c"(val), "=&r"(tmp), "+m"(c->low), "+m"(c->bytestream)
00132         :"m"(c->range)
00133         : "%eax", "%edx"
00134     );
00135     return val;
00136 }
00137 
00138 #endif /* AVCODEC_X86_CABAC_H */