diff options
| author | Tim Redfern <tim@eclectronics.org> | 2013-12-29 12:19:38 +0000 |
|---|---|---|
| committer | Tim Redfern <tim@eclectronics.org> | 2013-12-29 12:19:38 +0000 |
| commit | f7813a5324be39d13ab536c245d15dfc602a7849 (patch) | |
| tree | fad99148b88823d34a5df2f0a25881a002eb291b /ffmpeg/libavcodec/x86/h264_qpel.c | |
| parent | b7a5a477b8ff4d4e3028b9dfb9a9df0a41463f92 (diff) | |
basic type mechanism working
Diffstat (limited to 'ffmpeg/libavcodec/x86/h264_qpel.c')
| -rw-r--r-- | ffmpeg/libavcodec/x86/h264_qpel.c | 60 |
1 files changed, 25 insertions, 35 deletions
diff --git a/ffmpeg/libavcodec/x86/h264_qpel.c b/ffmpeg/libavcodec/x86/h264_qpel.c index 96dec82..fd6068f 100644 --- a/ffmpeg/libavcodec/x86/h264_qpel.c +++ b/ffmpeg/libavcodec/x86/h264_qpel.c @@ -25,24 +25,13 @@ #include "libavutil/x86/cpu.h" #include "libavcodec/h264qpel.h" #include "libavcodec/mpegvideo.h" -#include "dsputil_mmx.h" +#include "dsputil_x86.h" #if HAVE_YASM -void ff_put_pixels4_mmxext(uint8_t *block, const uint8_t *pixels, int line_size, int h); -void ff_avg_pixels4_mmxext(uint8_t *block, const uint8_t *pixels, int line_size, int h); -void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, int line_size, int h); -static void ff_put_pixels16_mmxext(uint8_t *block, const uint8_t *pixels, - int line_size, int h) -{ - ff_put_pixels8_mmxext(block, pixels, line_size, h); - ff_put_pixels8_mmxext(block + 8, pixels + 8, line_size, h); -} -static void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels, - int line_size, int h) -{ - ff_avg_pixels8_mmxext(block, pixels, line_size, h); - ff_avg_pixels8_mmxext(block + 8, pixels + 8, line_size, h); -} +void ff_put_pixels4_mmxext(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); +void ff_avg_pixels4_mmxext(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); void ff_put_pixels4_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h); void ff_avg_pixels4_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, @@ -55,15 +44,14 @@ void ff_put_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h); void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h); -void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, - int line_size, int h); -void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, - int line_size, int h); #define ff_put_pixels8_l2_sse2 ff_put_pixels8_l2_mmxext #define ff_avg_pixels8_l2_sse2 ff_avg_pixels8_l2_mmxext #define ff_put_pixels16_l2_sse2 ff_put_pixels16_l2_mmxext #define ff_avg_pixels16_l2_sse2 ff_avg_pixels16_l2_mmxext +PIXELS16(static, ff_avg, , , _mmxext) +PIXELS16(static, ff_put, , , _mmxext) + #define DEF_QPEL(OPNAME)\ void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\ void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\ @@ -209,7 +197,12 @@ static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ } -static av_always_inline void ff_put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, uint8_t *src, int tmpStride, int srcStride, int size){ +static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, + uint8_t *src, + int tmpStride, + int srcStride, + int size) +{ int w = (size+8)>>3; src -= 2*srcStride+2; while(w--){ @@ -221,7 +214,7 @@ static av_always_inline void ff_put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp #define QPEL_H264_HV_XMM(OPNAME, OP, MMX)\ static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\ - ff_put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, tmpStride, srcStride, size);\ + put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, tmpStride, srcStride, size);\ ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\ }\ static av_always_inline void ff_ ## OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ @@ -345,7 +338,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t * DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ uint8_t * const halfHV= temp;\ int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ - assert(((int)temp & 7) == 0);\ + av_assert2(((int)temp & 7) == 0);\ ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\ }\ @@ -355,7 +348,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t * DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ uint8_t * const halfHV= temp;\ int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ - assert(((int)temp & 7) == 0);\ + av_assert2(((int)temp & 7) == 0);\ ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\ }\ @@ -365,7 +358,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t * DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ uint8_t * const halfHV= temp;\ int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ - assert(((int)temp & 7) == 0);\ + av_assert2(((int)temp & 7) == 0);\ ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+2, halfHV, stride, SIZE, SIZE);\ }\ @@ -375,7 +368,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t * DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ uint8_t * const halfHV= temp;\ int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ - assert(((int)temp & 7) == 0);\ + av_assert2(((int)temp & 7) == 0);\ ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+3, halfHV, stride, SIZE, SIZE);\ }\ @@ -394,8 +387,6 @@ QPEL(put_, 16,XMM, 16)\ QPEL(avg_, 8, XMM, 16)\ QPEL(avg_, 16,XMM, 16)\ -#undef PAVGB -#define PAVGB "pavgb" QPEL_H264(put_, PUT_OP, mmxext) QPEL_H264(avg_, AVG_MMXEXT_OP, mmxext) QPEL_H264_V_XMM(put_, PUT_OP, sse2) @@ -406,7 +397,6 @@ QPEL_H264_H_XMM(put_, PUT_OP, ssse3) QPEL_H264_H_XMM(avg_,AVG_MMXEXT_OP, ssse3) QPEL_H264_HV_XMM(put_, PUT_OP, ssse3) QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3) -#undef PAVGB H264_MC_4816(mmxext) H264_MC_816(H264_MC_V, sse2) @@ -552,9 +542,9 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) { #if HAVE_YASM int high_bit_depth = bit_depth > 8; - int mm_flags = av_get_cpu_flags(); + int cpu_flags = av_get_cpu_flags(); - if (EXTERNAL_MMXEXT(mm_flags)) { + if (EXTERNAL_MMXEXT(cpu_flags)) { if (!high_bit_depth) { SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, ); SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, ); @@ -574,8 +564,8 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) } } - if (EXTERNAL_SSE2(mm_flags)) { - if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) { + if (EXTERNAL_SSE2(cpu_flags)) { + if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) { // these functions are slower than mmx on AMD, but faster on Intel H264_QPEL_FUNCS(0, 0, sse2); } @@ -606,7 +596,7 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) } } - if (EXTERNAL_SSSE3(mm_flags)) { + if (EXTERNAL_SSSE3(cpu_flags)) { if (!high_bit_depth) { H264_QPEL_FUNCS(1, 0, ssse3); H264_QPEL_FUNCS(1, 1, ssse3); @@ -629,7 +619,7 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) } } - if (EXTERNAL_AVX(mm_flags)) { + if (EXTERNAL_AVX(cpu_flags)) { /* AVX implies 64 byte cache lines without the need to avoid unaligned * memory accesses that cross the boundary between two cache lines. * TODO: Port X264_CPU_CACHELINE_32/64 detection from x264 to avoid |
