summaryrefslogtreecommitdiff
path: root/ffmpeg/libavcodec/x86/hpeldsp_init.c
diff options
context:
space:
mode:
authorTim Redfern <tim@eclectronics.org>2013-12-29 12:19:38 +0000
committerTim Redfern <tim@eclectronics.org>2013-12-29 12:19:38 +0000
commitf7813a5324be39d13ab536c245d15dfc602a7849 (patch)
treefad99148b88823d34a5df2f0a25881a002eb291b /ffmpeg/libavcodec/x86/hpeldsp_init.c
parentb7a5a477b8ff4d4e3028b9dfb9a9df0a41463f92 (diff)
basic type mechanism working
Diffstat (limited to 'ffmpeg/libavcodec/x86/hpeldsp_init.c')
-rw-r--r--ffmpeg/libavcodec/x86/hpeldsp_init.c286
1 files changed, 70 insertions, 216 deletions
diff --git a/ffmpeg/libavcodec/x86/hpeldsp_init.c b/ffmpeg/libavcodec/x86/hpeldsp_init.c
index 4b877b8..8ecf909 100644
--- a/ffmpeg/libavcodec/x86/hpeldsp_init.c
+++ b/ffmpeg/libavcodec/x86/hpeldsp_init.c
@@ -24,13 +24,10 @@
#include "libavutil/cpu.h"
#include "libavutil/x86/asm.h"
+#include "libavutil/x86/cpu.h"
#include "libavcodec/hpeldsp.h"
-#include "dsputil_mmx.h"
+#include "dsputil_x86.h"
-//#undef NDEBUG
-//#include <assert.h>
-
-#if HAVE_YASM
void ff_put_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
@@ -77,103 +74,45 @@ void ff_avg_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
-#endif /* HAVE_YASM */
+#define avg_pixels8_mmx ff_avg_pixels8_mmx
+#define avg_pixels8_x2_mmx ff_avg_pixels8_x2_mmx
+#define avg_pixels16_mmx ff_avg_pixels16_mmx
+#define avg_pixels8_xy2_mmx ff_avg_pixels8_xy2_mmx
+#define avg_pixels16_xy2_mmx ff_avg_pixels16_xy2_mmx
+#define put_pixels8_mmx ff_put_pixels8_mmx
+#define put_pixels16_mmx ff_put_pixels16_mmx
+#define put_pixels8_xy2_mmx ff_put_pixels8_xy2_mmx
+#define put_pixels16_xy2_mmx ff_put_pixels16_xy2_mmx
+#define avg_no_rnd_pixels16_mmx ff_avg_pixels16_mmx
+#define put_no_rnd_pixels8_mmx ff_put_pixels8_mmx
+#define put_no_rnd_pixels16_mmx ff_put_pixels16_mmx
#if HAVE_INLINE_ASM
-#define JUMPALIGN() __asm__ volatile (".p2align 3"::)
-#define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%"#regd", %%"#regd ::)
-
-#define MOVQ_BFE(regd) \
- __asm__ volatile ( \
- "pcmpeqd %%"#regd", %%"#regd" \n\t" \
- "paddb %%"#regd", %%"#regd" \n\t" ::)
-
-#ifndef PIC
-#define MOVQ_BONE(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_bone))
-#define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_wtwo))
-#else
-// for shared library it's better to use this way for accessing constants
-// pcmpeqd -> -1
-#define MOVQ_BONE(regd) \
- __asm__ volatile ( \
- "pcmpeqd %%"#regd", %%"#regd" \n\t" \
- "psrlw $15, %%"#regd" \n\t" \
- "packuswb %%"#regd", %%"#regd" \n\t" ::)
-
-#define MOVQ_WTWO(regd) \
- __asm__ volatile ( \
- "pcmpeqd %%"#regd", %%"#regd" \n\t" \
- "psrlw $15, %%"#regd" \n\t" \
- "psllw $1, %%"#regd" \n\t"::)
-
-#endif
-
-// using regr as temporary and for the output result
-// first argument is unmodifed and second is trashed
-// regfe is supposed to contain 0xfefefefefefefefe
-#define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \
- "movq "#rega", "#regr" \n\t" \
- "pand "#regb", "#regr" \n\t" \
- "pxor "#rega", "#regb" \n\t" \
- "pand "#regfe", "#regb" \n\t" \
- "psrlq $1, "#regb" \n\t" \
- "paddb "#regb", "#regr" \n\t"
-
-#define PAVGB_MMX(rega, regb, regr, regfe) \
- "movq "#rega", "#regr" \n\t" \
- "por "#regb", "#regr" \n\t" \
- "pxor "#rega", "#regb" \n\t" \
- "pand "#regfe", "#regb" \n\t" \
- "psrlq $1, "#regb" \n\t" \
- "psubb "#regb", "#regr" \n\t"
-
-// mm6 is supposed to contain 0xfefefefefefefefe
-#define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \
- "movq "#rega", "#regr" \n\t" \
- "movq "#regc", "#regp" \n\t" \
- "pand "#regb", "#regr" \n\t" \
- "pand "#regd", "#regp" \n\t" \
- "pxor "#rega", "#regb" \n\t" \
- "pxor "#regc", "#regd" \n\t" \
- "pand %%mm6, "#regb" \n\t" \
- "pand %%mm6, "#regd" \n\t" \
- "psrlq $1, "#regb" \n\t" \
- "psrlq $1, "#regd" \n\t" \
- "paddb "#regb", "#regr" \n\t" \
- "paddb "#regd", "#regp" \n\t"
-
-#define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \
- "movq "#rega", "#regr" \n\t" \
- "movq "#regc", "#regp" \n\t" \
- "por "#regb", "#regr" \n\t" \
- "por "#regd", "#regp" \n\t" \
- "pxor "#rega", "#regb" \n\t" \
- "pxor "#regc", "#regd" \n\t" \
- "pand %%mm6, "#regb" \n\t" \
- "pand %%mm6, "#regd" \n\t" \
- "psrlq $1, "#regd" \n\t" \
- "psrlq $1, "#regb" \n\t" \
- "psubb "#regb", "#regr" \n\t" \
- "psubb "#regd", "#regp" \n\t"
-
/***********************************/
/* MMX no rounding */
-#define NO_RND 1
#define DEF(x, y) x ## _no_rnd_ ## y ## _mmx
#define SET_RND MOVQ_WONE
#define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f)
#define PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e)
-#define OP_AVG(a, b, c, e) PAVGB_MMX(a, b, c, e)
+#define STATIC static
+#include "rnd_template.c"
#include "hpeldsp_rnd_template.c"
#undef DEF
#undef SET_RND
#undef PAVGBP
#undef PAVGB
-#undef NO_RND
+#undef STATIC
+
+PIXELS16(static, avg_no_rnd, , _y2, _mmx)
+PIXELS16(static, put_no_rnd, , _y2, _mmx)
+
+PIXELS16(static, avg_no_rnd, , _xy2, _mmx)
+PIXELS16(static, put_no_rnd, , _xy2, _mmx)
+
/***********************************/
/* MMX rounding */
@@ -188,112 +127,29 @@ void ff_avg_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
#undef SET_RND
#undef PAVGBP
#undef PAVGB
-#undef OP_AVG
+
+PIXELS16(static, avg, , _y2, _mmx)
+PIXELS16(static, put, , _y2, _mmx)
#endif /* HAVE_INLINE_ASM */
#if HAVE_YASM
-#define ff_put_pixels8_mmx ff_put_pixels8_mmxext
-
-/***********************************/
-/* 3Dnow specific */
-#define DEF(x) x ## _3dnow
+#define HPELDSP_AVG_PIXELS16(CPUEXT) \
+ PIXELS16(static, put_no_rnd, ff_, _x2, CPUEXT) \
+ PIXELS16(static, put, ff_, _y2, CPUEXT) \
+ PIXELS16(static, put_no_rnd, ff_, _y2, CPUEXT) \
+ PIXELS16(static, avg, ff_, , CPUEXT) \
+ PIXELS16(static, avg, ff_, _x2, CPUEXT) \
+ PIXELS16(static, avg, ff_, _y2, CPUEXT) \
+ PIXELS16(static, avg, ff_, _xy2, CPUEXT)
-#include "hpeldsp_avg_template.c"
-
-#undef DEF
-
-/***********************************/
-/* MMXEXT specific */
-
-#define DEF(x) x ## _mmxext
-
-#include "hpeldsp_avg_template.c"
-
-#undef DEF
+HPELDSP_AVG_PIXELS16(_3dnow)
+HPELDSP_AVG_PIXELS16(_mmxext)
#endif /* HAVE_YASM */
-
-#if HAVE_INLINE_ASM
-#define put_no_rnd_pixels16_mmx put_pixels16_mmx
-#define put_no_rnd_pixels8_mmx put_pixels8_mmx
-#define put_pixels16_mmxext put_pixels16_mmx
-#define put_pixels8_mmxext put_pixels8_mmx
-#define put_pixels4_mmxext put_pixels4_mmx
-#define put_no_rnd_pixels16_mmxext put_no_rnd_pixels16_mmx
-#define put_no_rnd_pixels8_mmxext put_no_rnd_pixels8_mmx
-
-static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
- ptrdiff_t line_size, int h)
-{
- __asm__ volatile (
- "lea (%3, %3), %%"REG_a" \n\t"
- ".p2align 3 \n\t"
- "1: \n\t"
- "movq (%1 ), %%mm0 \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "movq %%mm0, (%2) \n\t"
- "movq %%mm1, (%2, %3) \n\t"
- "add %%"REG_a", %1 \n\t"
- "add %%"REG_a", %2 \n\t"
- "movq (%1 ), %%mm0 \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "movq %%mm0, (%2) \n\t"
- "movq %%mm1, (%2, %3) \n\t"
- "add %%"REG_a", %1 \n\t"
- "add %%"REG_a", %2 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
- : "+g"(h), "+r"(pixels), "+r"(block)
- : "r"((x86_reg)line_size)
- : "%"REG_a, "memory"
- );
-}
-
-static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
- ptrdiff_t line_size, int h)
-{
- __asm__ volatile (
- "lea (%3, %3), %%"REG_a" \n\t"
- ".p2align 3 \n\t"
- "1: \n\t"
- "movq (%1 ), %%mm0 \n\t"
- "movq 8(%1 ), %%mm4 \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "movq 8(%1, %3), %%mm5 \n\t"
- "movq %%mm0, (%2) \n\t"
- "movq %%mm4, 8(%2) \n\t"
- "movq %%mm1, (%2, %3) \n\t"
- "movq %%mm5, 8(%2, %3) \n\t"
- "add %%"REG_a", %1 \n\t"
- "add %%"REG_a", %2 \n\t"
- "movq (%1 ), %%mm0 \n\t"
- "movq 8(%1 ), %%mm4 \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "movq 8(%1, %3), %%mm5 \n\t"
- "movq %%mm0, (%2) \n\t"
- "movq %%mm4, 8(%2) \n\t"
- "movq %%mm1, (%2, %3) \n\t"
- "movq %%mm5, 8(%2, %3) \n\t"
- "add %%"REG_a", %1 \n\t"
- "add %%"REG_a", %2 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
- : "+g"(h), "+r"(pixels), "+r"(block)
- : "r"((x86_reg)line_size)
- : "%"REG_a, "memory"
- );
-}
-#endif /* HAVE_INLINE_ASM */
-
-void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
- ptrdiff_t line_size, int h);
-void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
- ptrdiff_t line_size, int h);
-
#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
do { \
c->PFX ## _pixels_tab IDX [0] = PFX ## _pixels ## SIZE ## _ ## CPU; \
@@ -302,9 +158,9 @@ void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
c->PFX ## _pixels_tab IDX [3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU; \
} while (0)
-static void hpeldsp_init_mmx(HpelDSPContext *c, int flags, int mm_flags)
+static void hpeldsp_init_mmx(HpelDSPContext *c, int flags, int cpu_flags)
{
-#if HAVE_INLINE_ASM
+#if HAVE_MMX_INLINE
SET_HPEL_FUNCS(put, [0], 16, mmx);
SET_HPEL_FUNCS(put_no_rnd, [0], 16, mmx);
SET_HPEL_FUNCS(avg, [0], 16, mmx);
@@ -312,18 +168,18 @@ static void hpeldsp_init_mmx(HpelDSPContext *c, int flags, int mm_flags)
SET_HPEL_FUNCS(put, [1], 8, mmx);
SET_HPEL_FUNCS(put_no_rnd, [1], 8, mmx);
SET_HPEL_FUNCS(avg, [1], 8, mmx);
-#endif /* HAVE_INLINE_ASM */
+#endif /* HAVE_MMX_INLINE */
}
-static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int mm_flags)
+static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags)
{
-#if HAVE_YASM
+#if HAVE_MMXEXT_EXTERNAL
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_mmxext;
- c->put_pixels_tab[0][2] = ff_put_pixels16_y2_mmxext;
+ c->put_pixels_tab[0][2] = put_pixels16_y2_mmxext;
- c->avg_pixels_tab[0][0] = ff_avg_pixels16_mmxext;
- c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_mmxext;
- c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_mmxext;
+ c->avg_pixels_tab[0][0] = avg_pixels16_mmxext;
+ c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmxext;
+ c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmxext;
c->put_pixels_tab[1][1] = ff_put_pixels8_x2_mmxext;
c->put_pixels_tab[1][2] = ff_put_pixels8_y2_mmxext;
@@ -333,17 +189,15 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int mm_flags)
c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_mmxext;
if (!(flags & CODEC_FLAG_BITEXACT)) {
- c->put_no_rnd_pixels_tab[0][1] = ff_put_no_rnd_pixels16_x2_mmxext;
- c->put_no_rnd_pixels_tab[0][2] = ff_put_no_rnd_pixels16_y2_mmxext;
+ c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmxext;
+ c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmxext;
c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_mmxext;
c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_mmxext;
- c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_mmxext;
+ c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext;
c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext;
}
-#endif /* HAVE_YASM */
-#if HAVE_MMXEXT_EXTERNAL
if (flags & CODEC_FLAG_BITEXACT && CONFIG_VP3_DECODER) {
c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
@@ -351,15 +205,15 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int mm_flags)
#endif /* HAVE_MMXEXT_EXTERNAL */
}
-static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int mm_flags)
+static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int cpu_flags)
{
-#if HAVE_YASM
+#if HAVE_AMD3DNOW_EXTERNAL
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_3dnow;
- c->put_pixels_tab[0][2] = ff_put_pixels16_y2_3dnow;
+ c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
- c->avg_pixels_tab[0][0] = ff_avg_pixels16_3dnow;
- c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_3dnow;
- c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_3dnow;
+ c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;
+ c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
+ c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
c->put_pixels_tab[1][1] = ff_put_pixels8_x2_3dnow;
c->put_pixels_tab[1][2] = ff_put_pixels8_y2_3dnow;
@@ -369,12 +223,12 @@ static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int mm_flags)
c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_3dnow;
if (!(flags & CODEC_FLAG_BITEXACT)){
- c->put_no_rnd_pixels_tab[0][1] = ff_put_no_rnd_pixels16_x2_3dnow;
- c->put_no_rnd_pixels_tab[0][2] = ff_put_no_rnd_pixels16_y2_3dnow;
+ c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
+ c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_3dnow;
c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_3dnow;
- c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_3dnow;
+ c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_3dnow;
}
@@ -382,13 +236,13 @@ static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int mm_flags)
c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
}
-#endif /* HAVE_YASM */
+#endif /* HAVE_AMD3DNOW_EXTERNAL */
}
-static void hpeldsp_init_sse2(HpelDSPContext *c, int flags, int mm_flags)
+static void hpeldsp_init_sse2(HpelDSPContext *c, int flags, int cpu_flags)
{
#if HAVE_SSE2_EXTERNAL
- if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
+ if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) {
// these functions are slower than mmx on AMD, but faster on Intel
c->put_pixels_tab[0][0] = ff_put_pixels16_sse2;
c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_sse2;
@@ -399,17 +253,17 @@ static void hpeldsp_init_sse2(HpelDSPContext *c, int flags, int mm_flags)
void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
{
- int mm_flags = av_get_cpu_flags();
+ int cpu_flags = av_get_cpu_flags();
- if (mm_flags & AV_CPU_FLAG_MMX)
- hpeldsp_init_mmx(c, flags, mm_flags);
+ if (INLINE_MMX(cpu_flags))
+ hpeldsp_init_mmx(c, flags, cpu_flags);
- if (mm_flags & AV_CPU_FLAG_MMXEXT)
- hpeldsp_init_mmxext(c, flags, mm_flags);
+ if (EXTERNAL_AMD3DNOW(cpu_flags))
+ hpeldsp_init_3dnow(c, flags, cpu_flags);
- if (mm_flags & AV_CPU_FLAG_3DNOW)
- hpeldsp_init_3dnow(c, flags, mm_flags);
+ if (EXTERNAL_MMXEXT(cpu_flags))
+ hpeldsp_init_mmxext(c, flags, cpu_flags);
- if (mm_flags & AV_CPU_FLAG_SSE2)
- hpeldsp_init_sse2(c, flags, mm_flags);
+ if (EXTERNAL_SSE2(cpu_flags))
+ hpeldsp_init_sse2(c, flags, cpu_flags);
}