basic type mechanism working

author: Tim Redfern <tim@eclectronics.org> 2013-12-29 12:19:38 +0000
committer: Tim Redfern <tim@eclectronics.org> 2013-12-29 12:19:38 +0000
commit: f7813a5324be39d13ab536c245d15dfc602a7849 (patch)
tree: fad99148b88823d34a5df2f0a25881a002eb291b /ffmpeg/libavcodec/x86/h264_qpel.c
parent: b7a5a477b8ff4d4e3028b9dfb9a9df0a41463f92 (diff)
1 files changed, 25 insertions, 35 deletions
diff --git a/ffmpeg/libavcodec/x86/h264_qpel.c b/ffmpeg/libavcodec/x86/h264_qpel.c
index 96dec82..fd6068f 100644
--- a/ffmpeg/libavcodec/x86/h264_qpel.c
+++ b/ffmpeg/libavcodec/x86/h264_qpel.c
@@ -25,24 +25,13 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/h264qpel.h"
 #include "libavcodec/mpegvideo.h"
-#include "dsputil_mmx.h"
+#include "dsputil_x86.h"
 
 #if HAVE_YASM
-void ff_put_pixels4_mmxext(uint8_t *block, const uint8_t *pixels, int line_size, int h);
-void ff_avg_pixels4_mmxext(uint8_t *block, const uint8_t *pixels, int line_size, int h);
-void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, int line_size, int h);
-static void ff_put_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
-                                   int line_size, int h)
-{
-    ff_put_pixels8_mmxext(block,     pixels,     line_size, h);
-    ff_put_pixels8_mmxext(block + 8, pixels + 8, line_size, h);
-}
-static void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
-                                   int line_size, int h)
-{
-    ff_avg_pixels8_mmxext(block,     pixels,     line_size, h);
-    ff_avg_pixels8_mmxext(block + 8, pixels + 8, line_size, h);
-}
+void ff_put_pixels4_mmxext(uint8_t *block, const uint8_t *pixels,
+                           ptrdiff_t line_size, int h);
+void ff_avg_pixels4_mmxext(uint8_t *block, const uint8_t *pixels,
+                           ptrdiff_t line_size, int h);
 void ff_put_pixels4_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
                               int dstStride, int src1Stride, int h);
 void ff_avg_pixels4_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
@@ -55,15 +44,14 @@ void ff_put_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
                                int dstStride, int src1Stride, int h);
 void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
                                int dstStride, int src1Stride, int h);
-void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
-                          int line_size, int h);
-void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
-                          int line_size, int h);
 #define ff_put_pixels8_l2_sse2  ff_put_pixels8_l2_mmxext
 #define ff_avg_pixels8_l2_sse2  ff_avg_pixels8_l2_mmxext
 #define ff_put_pixels16_l2_sse2 ff_put_pixels16_l2_mmxext
 #define ff_avg_pixels16_l2_sse2 ff_avg_pixels16_l2_mmxext
 
+PIXELS16(static, ff_avg, , , _mmxext)
+PIXELS16(static, ff_put, , , _mmxext)
+
 #define DEF_QPEL(OPNAME)\
 void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\
 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\
@@ -209,7 +197,12 @@ static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint
     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
 }
 
-static av_always_inline void ff_put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, uint8_t *src, int tmpStride, int srcStride, int size){
+static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp,
+                                                                 uint8_t *src,
+                                                                 int tmpStride,
+                                                                 int srcStride,
+                                                                 int size)
+{
     int w = (size+8)>>3;
     src -= 2*srcStride+2;
     while(w--){
@@ -221,7 +214,7 @@ static av_always_inline void ff_put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp
 
 #define QPEL_H264_HV_XMM(OPNAME, OP, MMX)\
 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
-    ff_put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, tmpStride, srcStride, size);\
+    put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, tmpStride, srcStride, size);\
     ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\
 }\
 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
@@ -345,7 +338,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *
     DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
     uint8_t * const halfHV= temp;\
     int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
-    assert(((int)temp & 7) == 0);\
+    av_assert2(((int)temp & 7) == 0);\
     ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\
 }\
@@ -355,7 +348,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *
     DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
     uint8_t * const halfHV= temp;\
     int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
-    assert(((int)temp & 7) == 0);\
+    av_assert2(((int)temp & 7) == 0);\
     ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\
 }\
@@ -365,7 +358,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *
     DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
     uint8_t * const halfHV= temp;\
     int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
-    assert(((int)temp & 7) == 0);\
+    av_assert2(((int)temp & 7) == 0);\
     ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
     ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+2, halfHV, stride, SIZE, SIZE);\
 }\
@@ -375,7 +368,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *
     DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
     uint8_t * const halfHV= temp;\
     int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
-    assert(((int)temp & 7) == 0);\
+    av_assert2(((int)temp & 7) == 0);\
     ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
     ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+3, halfHV, stride, SIZE, SIZE);\
 }\
@@ -394,8 +387,6 @@ QPEL(put_, 16,XMM, 16)\
 QPEL(avg_, 8, XMM, 16)\
 QPEL(avg_, 16,XMM, 16)\
 
-#undef PAVGB
-#define PAVGB "pavgb"
 QPEL_H264(put_,        PUT_OP, mmxext)
 QPEL_H264(avg_, AVG_MMXEXT_OP, mmxext)
 QPEL_H264_V_XMM(put_,       PUT_OP, sse2)
@@ -406,7 +397,6 @@ QPEL_H264_H_XMM(put_,       PUT_OP, ssse3)
 QPEL_H264_H_XMM(avg_,AVG_MMXEXT_OP, ssse3)
 QPEL_H264_HV_XMM(put_,       PUT_OP, ssse3)
 QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3)
-#undef PAVGB
 
 H264_MC_4816(mmxext)
 H264_MC_816(H264_MC_V, sse2)
@@ -552,9 +542,9 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
 {
 #if HAVE_YASM
     int high_bit_depth = bit_depth > 8;
-    int mm_flags = av_get_cpu_flags();
+    int cpu_flags = av_get_cpu_flags();
 
-    if (EXTERNAL_MMXEXT(mm_flags)) {
+    if (EXTERNAL_MMXEXT(cpu_flags)) {
         if (!high_bit_depth) {
             SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, );
             SET_QPEL_FUNCS(put_h264_qpel, 1,  8, mmxext, );
@@ -574,8 +564,8 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
         }
     }
 
-    if (EXTERNAL_SSE2(mm_flags)) {
-        if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) {
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) {
             // these functions are slower than mmx on AMD, but faster on Intel
             H264_QPEL_FUNCS(0, 0, sse2);
         }
@@ -606,7 +596,7 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
         }
     }
 
-    if (EXTERNAL_SSSE3(mm_flags)) {
+    if (EXTERNAL_SSSE3(cpu_flags)) {
         if (!high_bit_depth) {
             H264_QPEL_FUNCS(1, 0, ssse3);
             H264_QPEL_FUNCS(1, 1, ssse3);
@@ -629,7 +619,7 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
         }
     }
 
-    if (EXTERNAL_AVX(mm_flags)) {
+    if (EXTERNAL_AVX(cpu_flags)) {
         /* AVX implies 64 byte cache lines without the need to avoid unaligned
          * memory accesses that cross the boundary between two cache lines.
          * TODO: Port X264_CPU_CACHELINE_32/64 detection from x264 to avoid
author	Tim Redfern <tim@eclectronics.org>	2013-12-29 12:19:38 +0000
committer	Tim Redfern <tim@eclectronics.org>	2013-12-29 12:19:38 +0000
commit	f7813a5324be39d13ab536c245d15dfc602a7849 (patch)
tree	fad99148b88823d34a5df2f0a25881a002eb291b /ffmpeg/libavcodec/x86/h264_qpel.c
parent	b7a5a477b8ff4d4e3028b9dfb9a9df0a41463f92 (diff)