diff options
| author | Tim Redfern <tim@eclectronics.org> | 2013-12-29 12:19:38 +0000 |
|---|---|---|
| committer | Tim Redfern <tim@eclectronics.org> | 2013-12-29 12:19:38 +0000 |
| commit | f7813a5324be39d13ab536c245d15dfc602a7849 (patch) | |
| tree | fad99148b88823d34a5df2f0a25881a002eb291b /ffmpeg/libswscale/x86 | |
| parent | b7a5a477b8ff4d4e3028b9dfb9a9df0a41463f92 (diff) | |
basic type mechanism working
Diffstat (limited to 'ffmpeg/libswscale/x86')
| -rw-r--r-- | ffmpeg/libswscale/x86/Makefile | 6 | ||||
| -rw-r--r-- | ffmpeg/libswscale/x86/input.asm | 92 | ||||
| -rw-r--r-- | ffmpeg/libswscale/x86/output.asm | 8 | ||||
| -rw-r--r-- | ffmpeg/libswscale/x86/rgb2rgb.c | 29 | ||||
| -rw-r--r-- | ffmpeg/libswscale/x86/rgb2rgb_template.c | 89 | ||||
| -rw-r--r-- | ffmpeg/libswscale/x86/scale.asm | 8 | ||||
| -rw-r--r-- | ffmpeg/libswscale/x86/swscale.c | 51 | ||||
| -rw-r--r-- | ffmpeg/libswscale/x86/swscale_template.c | 2 | ||||
| -rw-r--r-- | ffmpeg/libswscale/x86/w64xmmtest.c | 8 | ||||
| -rw-r--r-- | ffmpeg/libswscale/x86/yuv2rgb.c | 37 |
10 files changed, 201 insertions, 129 deletions
diff --git a/ffmpeg/libswscale/x86/Makefile b/ffmpeg/libswscale/x86/Makefile index 7d219b4..e767a5c 100644 --- a/ffmpeg/libswscale/x86/Makefile +++ b/ffmpeg/libswscale/x86/Makefile @@ -1,11 +1,11 @@ $(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS) -OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o - -MMX-OBJS += x86/rgb2rgb.o \ +OBJS += x86/rgb2rgb.o \ x86/swscale.o \ x86/yuv2rgb.o \ +OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o + YASM-OBJS += x86/input.o \ x86/output.o \ x86/scale.o \ diff --git a/ffmpeg/libswscale/x86/input.asm b/ffmpeg/libswscale/x86/input.asm index 9d5a871..0c4f30e 100644 --- a/ffmpeg/libswscale/x86/input.asm +++ b/ffmpeg/libswscale/x86/input.asm @@ -4,20 +4,20 @@ ;* into YUV planes also. ;* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com> ;* -;* This file is part of Libav. +;* This file is part of FFmpeg. ;* -;* Libav is free software; you can redistribute it and/or +;* FFmpeg is free software; you can redistribute it and/or ;* modify it under the terms of the GNU Lesser General Public ;* License as published by the Free Software Foundation; either ;* version 2.1 of the License, or (at your option) any later version. ;* -;* Libav is distributed in the hope that it will be useful, +;* FFmpeg is distributed in the hope that it will be useful, ;* but WITHOUT ANY WARRANTY; without even the implied warranty of ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;* Lesser General Public License for more details. ;* ;* You should have received a copy of the GNU Lesser General Public -;* License along with Libav; if not, write to the Free Software +;* License along with FFmpeg; if not, write to the Free Software ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ;****************************************************************************** @@ -37,31 +37,57 @@ SECTION_RODATA rgb_Yrnd: times 4 dd 0x80100 ; 16.5 << 15 rgb_UVrnd: times 4 dd 0x400100 ; 128.5 << 15 -bgr_Ycoeff_12x4: times 2 dw BY, GY, 0, BY -bgr_Ycoeff_3x56: times 2 dw RY, 0, GY, RY -rgb_Ycoeff_12x4: times 2 dw RY, GY, 0, RY -rgb_Ycoeff_3x56: times 2 dw BY, 0, GY, BY -bgr_Ucoeff_12x4: times 2 dw BU, GU, 0, BU -bgr_Ucoeff_3x56: times 2 dw RU, 0, GU, RU -rgb_Ucoeff_12x4: times 2 dw RU, GU, 0, RU -rgb_Ucoeff_3x56: times 2 dw BU, 0, GU, BU -bgr_Vcoeff_12x4: times 2 dw BV, GV, 0, BV -bgr_Vcoeff_3x56: times 2 dw RV, 0, GV, RV -rgb_Vcoeff_12x4: times 2 dw RV, GV, 0, RV -rgb_Vcoeff_3x56: times 2 dw BV, 0, GV, BV - -rgba_Ycoeff_rb: times 4 dw RY, BY -rgba_Ycoeff_br: times 4 dw BY, RY -rgba_Ycoeff_ga: times 4 dw GY, 0 -rgba_Ycoeff_ag: times 4 dw 0, GY -rgba_Ucoeff_rb: times 4 dw RU, BU -rgba_Ucoeff_br: times 4 dw BU, RU -rgba_Ucoeff_ga: times 4 dw GU, 0 -rgba_Ucoeff_ag: times 4 dw 0, GU -rgba_Vcoeff_rb: times 4 dw RV, BV -rgba_Vcoeff_br: times 4 dw BV, RV -rgba_Vcoeff_ga: times 4 dw GV, 0 -rgba_Vcoeff_ag: times 4 dw 0, GV +%define bgr_Ycoeff_12x4 16*4 + 16* 0 + tableq +%define bgr_Ycoeff_3x56 16*4 + 16* 1 + tableq +%define rgb_Ycoeff_12x4 16*4 + 16* 2 + tableq +%define rgb_Ycoeff_3x56 16*4 + 16* 3 + tableq +%define bgr_Ucoeff_12x4 16*4 + 16* 4 + tableq +%define bgr_Ucoeff_3x56 16*4 + 16* 5 + tableq +%define rgb_Ucoeff_12x4 16*4 + 16* 6 + tableq +%define rgb_Ucoeff_3x56 16*4 + 16* 7 + tableq +%define bgr_Vcoeff_12x4 16*4 + 16* 8 + tableq +%define bgr_Vcoeff_3x56 16*4 + 16* 9 + tableq +%define rgb_Vcoeff_12x4 16*4 + 16*10 + tableq +%define rgb_Vcoeff_3x56 16*4 + 16*11 + tableq + +%define rgba_Ycoeff_rb 16*4 + 16*12 + tableq +%define rgba_Ycoeff_br 16*4 + 16*13 + tableq +%define rgba_Ycoeff_ga 16*4 + 16*14 + tableq +%define rgba_Ycoeff_ag 16*4 + 16*15 + tableq +%define rgba_Ucoeff_rb 16*4 + 16*16 + tableq +%define rgba_Ucoeff_br 16*4 + 16*17 + tableq +%define rgba_Ucoeff_ga 16*4 + 16*18 + tableq +%define rgba_Ucoeff_ag 16*4 + 16*19 + tableq +%define rgba_Vcoeff_rb 16*4 + 16*20 + tableq +%define rgba_Vcoeff_br 16*4 + 16*21 + tableq +%define rgba_Vcoeff_ga 16*4 + 16*22 + tableq +%define rgba_Vcoeff_ag 16*4 + 16*23 + tableq + +; bgr_Ycoeff_12x4: times 2 dw BY, GY, 0, BY +; bgr_Ycoeff_3x56: times 2 dw RY, 0, GY, RY +; rgb_Ycoeff_12x4: times 2 dw RY, GY, 0, RY +; rgb_Ycoeff_3x56: times 2 dw BY, 0, GY, BY +; bgr_Ucoeff_12x4: times 2 dw BU, GU, 0, BU +; bgr_Ucoeff_3x56: times 2 dw RU, 0, GU, RU +; rgb_Ucoeff_12x4: times 2 dw RU, GU, 0, RU +; rgb_Ucoeff_3x56: times 2 dw BU, 0, GU, BU +; bgr_Vcoeff_12x4: times 2 dw BV, GV, 0, BV +; bgr_Vcoeff_3x56: times 2 dw RV, 0, GV, RV +; rgb_Vcoeff_12x4: times 2 dw RV, GV, 0, RV +; rgb_Vcoeff_3x56: times 2 dw BV, 0, GV, BV + +; rgba_Ycoeff_rb: times 4 dw RY, BY +; rgba_Ycoeff_br: times 4 dw BY, RY +; rgba_Ycoeff_ga: times 4 dw GY, 0 +; rgba_Ycoeff_ag: times 4 dw 0, GY +; rgba_Ucoeff_rb: times 4 dw RU, BU +; rgba_Ucoeff_br: times 4 dw BU, RU +; rgba_Ucoeff_ga: times 4 dw GU, 0 +; rgba_Ucoeff_ag: times 4 dw 0, GU +; rgba_Vcoeff_rb: times 4 dw RV, BV +; rgba_Vcoeff_br: times 4 dw BV, RV +; rgba_Vcoeff_ga: times 4 dw GV, 0 +; rgba_Vcoeff_ag: times 4 dw 0, GV shuf_rgb_12x4: db 0, 0x80, 1, 0x80, 2, 0x80, 3, 0x80, \ 6, 0x80, 7, 0x80, 8, 0x80, 9, 0x80 @@ -82,7 +108,7 @@ SECTION .text ; %1 = nr. of XMM registers ; %2 = rgb or bgr %macro RGB24_TO_Y_FN 2-3 -cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, u3 +cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table %if mmsize == 8 mova m5, [%2_Ycoeff_12x4] mova m6, [%2_Ycoeff_3x56] @@ -171,7 +197,7 @@ cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, u3 ; %1 = nr. of XMM registers ; %2 = rgb or bgr %macro RGB24_TO_UV_FN 2-3 -cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, u3 +cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table %if ARCH_X86_64 mova m8, [%2_Ucoeff_12x4] mova m9, [%2_Ucoeff_3x56] @@ -311,7 +337,7 @@ RGB24_FUNCS 11, 13 ; %1 = nr. of XMM registers ; %2-5 = rgba, bgra, argb or abgr (in individual characters) %macro RGB32_TO_Y_FN 5-6 -cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, u3 +cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, table mova m5, [rgba_Ycoeff_%2%4] mova m6, [rgba_Ycoeff_%3%5] %if %0 == 6 @@ -354,7 +380,7 @@ cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, u3 ; %1 = nr. of XMM registers ; %2-5 = rgba, bgra, argb or abgr (in individual characters) %macro RGB32_TO_UV_FN 5-6 -cglobal %2%3%4%5 %+ ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, u3 +cglobal %2%3%4%5 %+ ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table %if ARCH_X86_64 mova m8, [rgba_Ucoeff_%2%4] mova m9, [rgba_Ucoeff_%3%5] diff --git a/ffmpeg/libswscale/x86/output.asm b/ffmpeg/libswscale/x86/output.asm index f9add35..9ea4af9 100644 --- a/ffmpeg/libswscale/x86/output.asm +++ b/ffmpeg/libswscale/x86/output.asm @@ -3,20 +3,20 @@ ;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com> ;* Kieran Kunhya <kieran@kunhya.com> ;* -;* This file is part of Libav. +;* This file is part of FFmpeg. ;* -;* Libav is free software; you can redistribute it and/or +;* FFmpeg is free software; you can redistribute it and/or ;* modify it under the terms of the GNU Lesser General Public ;* License as published by the Free Software Foundation; either ;* version 2.1 of the License, or (at your option) any later version. ;* -;* Libav is distributed in the hope that it will be useful, +;* FFmpeg is distributed in the hope that it will be useful, ;* but WITHOUT ANY WARRANTY; without even the implied warranty of ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;* Lesser General Public License for more details. ;* ;* You should have received a copy of the GNU Lesser General Public -;* License along with Libav; if not, write to the Free Software +;* License along with FFmpeg; if not, write to the Free Software ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ;****************************************************************************** diff --git a/ffmpeg/libswscale/x86/rgb2rgb.c b/ffmpeg/libswscale/x86/rgb2rgb.c index 1e20176..8cc99c6 100644 --- a/ffmpeg/libswscale/x86/rgb2rgb.c +++ b/ffmpeg/libswscale/x86/rgb2rgb.c @@ -76,7 +76,6 @@ DECLARE_ASM_CONST(8, uint64_t, mul15_mid) = 0x4200420042004200ULL; DECLARE_ASM_CONST(8, uint64_t, mul15_hi) = 0x0210021002100210ULL; DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL; -#define RGB2YUV_SHIFT 8 #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5)) #define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5)) #define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5)) @@ -92,35 +91,45 @@ DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL; #define COMPILE_TEMPLATE_MMXEXT 0 #define COMPILE_TEMPLATE_AMD3DNOW 0 #define COMPILE_TEMPLATE_SSE2 0 +#define COMPILE_TEMPLATE_AVX 0 //MMX versions #undef RENAME -#define RENAME(a) a ## _MMX +#define RENAME(a) a ## _mmx #include "rgb2rgb_template.c" // MMXEXT versions #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 1 -#define RENAME(a) a ## _MMXEXT +#define RENAME(a) a ## _mmxext #include "rgb2rgb_template.c" //SSE2 versions #undef RENAME #undef COMPILE_TEMPLATE_SSE2 #define COMPILE_TEMPLATE_SSE2 1 -#define RENAME(a) a ## _SSE2 +#define RENAME(a) a ## _sse2 +#include "rgb2rgb_template.c" + +//AVX versions +#undef RENAME +#undef COMPILE_TEMPLATE_AVX +#define COMPILE_TEMPLATE_AVX 1 +#define RENAME(a) a ## _avx #include "rgb2rgb_template.c" //3DNOW versions #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #undef COMPILE_TEMPLATE_SSE2 +#undef COMPILE_TEMPLATE_AVX #undef COMPILE_TEMPLATE_AMD3DNOW #define COMPILE_TEMPLATE_MMXEXT 0 #define COMPILE_TEMPLATE_SSE2 0 +#define COMPILE_TEMPLATE_AVX 0 #define COMPILE_TEMPLATE_AMD3DNOW 1 -#define RENAME(a) a ## _3DNOW +#define RENAME(a) a ## _3dnow #include "rgb2rgb_template.c" /* @@ -138,12 +147,14 @@ av_cold void rgb2rgb_init_x86(void) int cpu_flags = av_get_cpu_flags(); if (INLINE_MMX(cpu_flags)) - rgb2rgb_init_MMX(); + rgb2rgb_init_mmx(); if (INLINE_AMD3DNOW(cpu_flags)) - rgb2rgb_init_3DNOW(); + rgb2rgb_init_3dnow(); if (INLINE_MMXEXT(cpu_flags)) - rgb2rgb_init_MMXEXT(); + rgb2rgb_init_mmxext(); if (INLINE_SSE2(cpu_flags)) - rgb2rgb_init_SSE2(); + rgb2rgb_init_sse2(); + if (INLINE_AVX(cpu_flags)) + rgb2rgb_init_avx(); #endif /* HAVE_INLINE_ASM */ } diff --git a/ffmpeg/libswscale/x86/rgb2rgb_template.c b/ffmpeg/libswscale/x86/rgb2rgb_template.c index d802ab4..d58219b 100644 --- a/ffmpeg/libswscale/x86/rgb2rgb_template.c +++ b/ffmpeg/libswscale/x86/rgb2rgb_template.c @@ -26,6 +26,8 @@ #include <stddef.h> +#include "libavutil/attributes.h" + #undef PREFETCH #undef MOVNTQ #undef EMMS @@ -1610,10 +1612,15 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t * others are ignored in the C version. * FIXME: Write HQ version. */ +#if HAVE_7REGS static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int width, int height, - int lumStride, int chromStride, int srcStride) + int lumStride, int chromStride, int srcStride, + int32_t *rgb2yuv) { +#define BGR2Y_IDX "16*4+16*32" +#define BGR2U_IDX "16*4+16*33" +#define BGR2V_IDX "16*4+16*34" int y; const x86_reg chromWidth= width>>1; for (y=0; y<height-2; y+=2) { @@ -1621,7 +1628,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ for (i=0; i<2; i++) { __asm__ volatile( "mov %2, %%"REG_a" \n\t" - "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t" + "movq "BGR2Y_IDX"(%3), %%mm6 \n\t" "movq "MANGLE(ff_w1111)", %%mm5 \n\t" "pxor %%mm7, %%mm7 \n\t" "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" @@ -1640,12 +1647,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "pmaddwd %%mm6, %%mm1 \n\t" "pmaddwd %%mm6, %%mm2 \n\t" "pmaddwd %%mm6, %%mm3 \n\t" -#ifndef FAST_BGR2YV12 "psrad $8, %%mm0 \n\t" "psrad $8, %%mm1 \n\t" "psrad $8, %%mm2 \n\t" "psrad $8, %%mm3 \n\t" -#endif "packssdw %%mm1, %%mm0 \n\t" "packssdw %%mm3, %%mm2 \n\t" "pmaddwd %%mm5, %%mm0 \n\t" @@ -1665,12 +1670,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "pmaddwd %%mm6, %%mm1 \n\t" "pmaddwd %%mm6, %%mm2 \n\t" "pmaddwd %%mm6, %%mm3 \n\t" -#ifndef FAST_BGR2YV12 "psrad $8, %%mm4 \n\t" "psrad $8, %%mm1 \n\t" "psrad $8, %%mm2 \n\t" "psrad $8, %%mm3 \n\t" -#endif "packssdw %%mm1, %%mm4 \n\t" "packssdw %%mm3, %%mm2 \n\t" "pmaddwd %%mm5, %%mm4 \n\t" @@ -1685,7 +1688,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t" "add $8, %%"REG_a" \n\t" " js 1b \n\t" - : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width) + : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width), "r"(rgb2yuv) : "%"REG_a, "%"REG_d ); ydst += lumStride; @@ -1695,7 +1698,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ __asm__ volatile( "mov %4, %%"REG_a" \n\t" "movq "MANGLE(ff_w1111)", %%mm5 \n\t" - "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t" + "movq "BGR2U_IDX"(%5), %%mm6 \n\t" "pxor %%mm7, %%mm7 \n\t" "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" "add %%"REG_d", %%"REG_d" \n\t" @@ -1744,19 +1747,17 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "psrlw $2, %%mm0 \n\t" "psrlw $2, %%mm2 \n\t" #endif - "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" - "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" + "movq "BGR2V_IDX"(%5), %%mm1 \n\t" + "movq "BGR2V_IDX"(%5), %%mm3 \n\t" "pmaddwd %%mm0, %%mm1 \n\t" "pmaddwd %%mm2, %%mm3 \n\t" "pmaddwd %%mm6, %%mm0 \n\t" "pmaddwd %%mm6, %%mm2 \n\t" -#ifndef FAST_BGR2YV12 "psrad $8, %%mm0 \n\t" "psrad $8, %%mm1 \n\t" "psrad $8, %%mm2 \n\t" "psrad $8, %%mm3 \n\t" -#endif "packssdw %%mm2, %%mm0 \n\t" "packssdw %%mm3, %%mm1 \n\t" "pmaddwd %%mm5, %%mm0 \n\t" @@ -1806,19 +1807,17 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "psrlw $2, %%mm4 \n\t" "psrlw $2, %%mm2 \n\t" #endif - "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" - "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" + "movq "BGR2V_IDX"(%5), %%mm1 \n\t" + "movq "BGR2V_IDX"(%5), %%mm3 \n\t" "pmaddwd %%mm4, %%mm1 \n\t" "pmaddwd %%mm2, %%mm3 \n\t" "pmaddwd %%mm6, %%mm4 \n\t" "pmaddwd %%mm6, %%mm2 \n\t" -#ifndef FAST_BGR2YV12 "psrad $8, %%mm4 \n\t" "psrad $8, %%mm1 \n\t" "psrad $8, %%mm2 \n\t" "psrad $8, %%mm3 \n\t" -#endif "packssdw %%mm2, %%mm4 \n\t" "packssdw %%mm3, %%mm1 \n\t" "pmaddwd %%mm5, %%mm4 \n\t" @@ -1837,7 +1836,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "movd %%mm0, (%3, %%"REG_a") \n\t" "add $4, %%"REG_a" \n\t" " js 1b \n\t" - : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth) + : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth), "r"(rgb2yuv) : "%"REG_a, "%"REG_d ); @@ -1850,11 +1849,12 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ SFENCE" \n\t" :::"memory"); - rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride); + ff_rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride, rgb2yuv); } +#endif /* HAVE_7REGS */ #endif /* !COMPILE_TEMPLATE_SSE2 */ -#if !COMPILE_TEMPLATE_AMD3DNOW +#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest, int width, int height, int src1Stride, int src2Stride, int dstStride) @@ -1924,7 +1924,35 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui ::: "memory" ); } +#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX*/ + +#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL +#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_YASM +void RENAME(ff_nv12ToUV)(uint8_t *dstU, uint8_t *dstV, + const uint8_t *unused0, + const uint8_t *src1, + const uint8_t *src2, + int w, uint32_t *unused); +static void RENAME(deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, + int width, int height, int srcStride, + int dst1Stride, int dst2Stride) +{ + int h; + + for (h=0; h < height; h++) { + RENAME(ff_nv12ToUV)(dst1, dst2, NULL, src, NULL, width, NULL); + src += srcStride; + dst1 += dst1Stride; + dst2 += dst2Stride; + } + __asm__( + EMMS" \n\t" + SFENCE" \n\t" + ::: "memory" + ); +} #endif /* !COMPILE_TEMPLATE_AMD3DNOW */ +#endif /* !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL */ #if !COMPILE_TEMPLATE_SSE2 #if !COMPILE_TEMPLATE_AMD3DNOW @@ -2354,7 +2382,7 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co int lumStride, int chromStride, int srcStride) { int y; - const int chromWidth= -((-width)>>1); + const int chromWidth = FF_CEIL_RSHIFT(width, 1); for (y=0; y<height; y++) { RENAME(extract_even)(src, ydst, width); @@ -2380,7 +2408,7 @@ static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co int lumStride, int chromStride, int srcStride) { int y; - const int chromWidth= -((-width)>>1); + const int chromWidth = FF_CEIL_RSHIFT(width, 1); for (y=0; y<height; y++) { RENAME(extract_even)(src, ydst, width); @@ -2404,7 +2432,7 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co int lumStride, int chromStride, int srcStride) { int y; - const int chromWidth= -((-width)>>1); + const int chromWidth = FF_CEIL_RSHIFT(width, 1); for (y=0; y<height; y++) { RENAME(extract_even)(src+1, ydst, width); @@ -2430,7 +2458,7 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co int lumStride, int chromStride, int srcStride) { int y; - const int chromWidth= -((-width)>>1); + const int chromWidth = FF_CEIL_RSHIFT(width, 1); for (y=0; y<height; y++) { RENAME(extract_even)(src+1, ydst, width); @@ -2450,7 +2478,7 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co #endif /* !COMPILE_TEMPLATE_AMD3DNOW */ #endif /* !COMPILE_TEMPLATE_SSE2 */ -static inline void RENAME(rgb2rgb_init)(void) +static av_cold void RENAME(rgb2rgb_init)(void) { #if !COMPILE_TEMPLATE_SSE2 #if !COMPILE_TEMPLATE_AMD3DNOW @@ -2486,13 +2514,20 @@ static inline void RENAME(rgb2rgb_init)(void) #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW planar2x = RENAME(planar2x); #endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */ - rgb24toyv12 = RENAME(rgb24toyv12); +#if HAVE_7REGS + ff_rgb24toyv12 = RENAME(rgb24toyv12); +#endif /* HAVE_7REGS */ yuyvtoyuv420 = RENAME(yuyvtoyuv420); uyvytoyuv420 = RENAME(uyvytoyuv420); #endif /* !COMPILE_TEMPLATE_SSE2 */ -#if !COMPILE_TEMPLATE_AMD3DNOW +#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX interleaveBytes = RENAME(interleaveBytes); -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ +#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX*/ +#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL +#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_YASM + deinterleaveBytes = RENAME(deinterleaveBytes); +#endif +#endif } diff --git a/ffmpeg/libswscale/x86/scale.asm b/ffmpeg/libswscale/x86/scale.asm index c6dafde..940f357 100644 --- a/ffmpeg/libswscale/x86/scale.asm +++ b/ffmpeg/libswscale/x86/scale.asm @@ -2,20 +2,20 @@ ;* x86-optimized horizontal line scaling functions ;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com> ;* -;* This file is part of Libav. +;* This file is part of FFmpeg. ;* -;* Libav is free software; you can redistribute it and/or +;* FFmpeg is free software; you can redistribute it and/or ;* modify it under the terms of the GNU Lesser General Public ;* License as published by the Free Software Foundation; either ;* version 2.1 of the License, or (at your option) any later version. ;* -;* Libav is distributed in the hope that it will be useful, +;* FFmpeg is distributed in the hope that it will be useful, ;* but WITHOUT ANY WARRANTY; without even the implied warranty of ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;* Lesser General Public License for more details. ;* ;* You should have received a copy of the GNU Lesser General Public -;* License along with Libav; if not, write to the Free Software +;* License along with FFmpeg; if not, write to the Free Software ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ;****************************************************************************** diff --git a/ffmpeg/libswscale/x86/swscale.c b/ffmpeg/libswscale/x86/swscale.c index 2f67b1b..2f7e4f7 100644 --- a/ffmpeg/libswscale/x86/swscale.c +++ b/ffmpeg/libswscale/x86/swscale.c @@ -58,15 +58,10 @@ DECLARE_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL; DECLARE_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL; DECLARE_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL; -#ifdef FAST_BGR2YV12 -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000000210041000DULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000FFEEFFDC0038ULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00000038FFD2FFF8ULL; -#else DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL; DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL; DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL; -#endif /* FAST_BGR2YV12 */ + DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL; DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL; DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; @@ -76,7 +71,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; #if HAVE_MMX_INLINE #undef RENAME #define COMPILE_TEMPLATE_MMXEXT 0 -#define RENAME(a) a ## _MMX +#define RENAME(a) a ## _mmx #include "swscale_template.c" #endif @@ -85,7 +80,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 1 -#define RENAME(a) a ## _MMXEXT +#define RENAME(a) a ## _mmxext #include "swscale_template.c" #endif @@ -211,7 +206,7 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, const uint8_t *dither, int offset) { if(((int)dest) & 15){ - return yuv2yuvX_MMXEXT(filter, filterSize, src, dest, dstW, dither, offset); + return yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset); } if (offset) { __asm__ volatile("movq (%0), %%xmm3\n\t" @@ -279,7 +274,7 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, #endif /* HAVE_INLINE_ASM */ #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \ -extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \ +void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \ SwsContext *c, int16_t *data, \ int dstW, const uint8_t *src, \ const int16_t *filter, \ @@ -318,9 +313,9 @@ SCALE_FUNCS_SSE(ssse3); SCALE_FUNCS_SSE(sse4); #define VSCALEX_FUNC(size, opt) \ -extern void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \ - const int16_t **src, uint8_t *dest, int dstW, \ - const uint8_t *dither, int offset) +void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \ + const int16_t **src, uint8_t *dest, int dstW, \ + const uint8_t *dither, int offset) #define VSCALEX_FUNCS(opt) \ VSCALEX_FUNC(8, opt); \ VSCALEX_FUNC(9, opt); \ @@ -335,8 +330,8 @@ VSCALEX_FUNC(16, sse4); VSCALEX_FUNCS(avx); #define VSCALE_FUNC(size, opt) \ -extern void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \ - const uint8_t *dither, int offset) +void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \ + const uint8_t *dither, int offset) #define VSCALE_FUNCS(opt1, opt2) \ VSCALE_FUNC(8, opt1); \ VSCALE_FUNC(9, opt2); \ @@ -351,15 +346,15 @@ VSCALE_FUNC(16, sse4); VSCALE_FUNCS(avx, avx); #define INPUT_Y_FUNC(fmt, opt) \ -extern void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \ - const uint8_t *unused1, const uint8_t *unused2, \ - int w, uint32_t *unused) +void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \ + const uint8_t *unused1, const uint8_t *unused2, \ + int w, uint32_t *unused) #define INPUT_UV_FUNC(fmt, opt) \ -extern void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \ - const uint8_t *unused0, \ - const uint8_t *src1, \ - const uint8_t *src2, \ - int w, uint32_t *unused) +void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *unused0, \ + const uint8_t *src1, \ + const uint8_t *src2, \ + int w, uint32_t *unused) #define INPUT_FUNC(fmt, opt) \ INPUT_Y_FUNC(fmt, opt); \ INPUT_UV_FUNC(fmt, opt) @@ -382,22 +377,22 @@ INPUT_FUNCS(sse2); INPUT_FUNCS(ssse3); INPUT_FUNCS(avx); -av_cold void ff_sws_init_swScale_mmx(SwsContext *c) +av_cold void ff_sws_init_swscale_x86(SwsContext *c) { int cpu_flags = av_get_cpu_flags(); -#if HAVE_INLINE_ASM +#if HAVE_MMX_INLINE if (cpu_flags & AV_CPU_FLAG_MMX) - sws_init_swScale_MMX(c); + sws_init_swscale_mmx(c); +#endif #if HAVE_MMXEXT_INLINE if (cpu_flags & AV_CPU_FLAG_MMXEXT) - sws_init_swScale_MMXEXT(c); + sws_init_swscale_mmxext(c); if (cpu_flags & AV_CPU_FLAG_SSE3){ if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) c->yuv2planeX = yuv2yuvX_sse3; } #endif -#endif /* HAVE_INLINE_ASM */ #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \ if (c->srcBpc == 8) { \ diff --git a/ffmpeg/libswscale/x86/swscale_template.c b/ffmpeg/libswscale/x86/swscale_template.c index f2567c1..c7a1bb4 100644 --- a/ffmpeg/libswscale/x86/swscale_template.c +++ b/ffmpeg/libswscale/x86/swscale_template.c @@ -1640,7 +1640,7 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, } #endif /* COMPILE_TEMPLATE_MMXEXT */ -static av_cold void RENAME(sws_init_swScale)(SwsContext *c) +static av_cold void RENAME(sws_init_swscale)(SwsContext *c) { enum AVPixelFormat dstFormat = c->dstFormat; diff --git a/ffmpeg/libswscale/x86/w64xmmtest.c b/ffmpeg/libswscale/x86/w64xmmtest.c index dd9a2a4..88143d9 100644 --- a/ffmpeg/libswscale/x86/w64xmmtest.c +++ b/ffmpeg/libswscale/x86/w64xmmtest.c @@ -2,20 +2,20 @@ * check XMM registers for clobbers on Win64 * Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/ffmpeg/libswscale/x86/yuv2rgb.c b/ffmpeg/libswscale/x86/yuv2rgb.c index 3938e6b..e4315ef 100644 --- a/ffmpeg/libswscale/x86/yuv2rgb.c +++ b/ffmpeg/libswscale/x86/yuv2rgb.c @@ -27,7 +27,6 @@ #include <stdio.h> #include <stdlib.h> #include <inttypes.h> -#include <assert.h> #include "config.h" #include "libswscale/rgb2rgb.h" @@ -54,7 +53,7 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 0 -#define RENAME(a) a ## _MMX +#define RENAME(a) a ## _mmx #include "yuv2rgb_template.c" #endif /* HAVE_MMX_INLINE */ @@ -63,24 +62,24 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 1 -#define RENAME(a) a ## _MMXEXT +#define RENAME(a) a ## _mmxext #include "yuv2rgb_template.c" #endif /* HAVE_MMXEXT_INLINE */ #endif /* HAVE_INLINE_ASM */ -av_cold SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c) +av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) { -#if HAVE_INLINE_ASM +#if HAVE_MMX_INLINE int cpu_flags = av_get_cpu_flags(); #if HAVE_MMXEXT_INLINE if (cpu_flags & AV_CPU_FLAG_MMXEXT) { switch (c->dstFormat) { case AV_PIX_FMT_RGB24: - return yuv420_rgb24_MMXEXT; + return yuv420_rgb24_mmxext; case AV_PIX_FMT_BGR24: - return yuv420_bgr24_MMXEXT; + return yuv420_bgr24_mmxext; } } #endif @@ -90,24 +89,30 @@ av_cold SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c) case AV_PIX_FMT_RGB32: if (c->srcFormat == AV_PIX_FMT_YUVA420P) { #if HAVE_7REGS && CONFIG_SWSCALE_ALPHA - return yuva420_rgb32_MMX; + return yuva420_rgb32_mmx; #endif break; - } else return yuv420_rgb32_MMX; + } else + return yuv420_rgb32_mmx; case AV_PIX_FMT_BGR32: if (c->srcFormat == AV_PIX_FMT_YUVA420P) { #if HAVE_7REGS && CONFIG_SWSCALE_ALPHA - return yuva420_bgr32_MMX; + return yuva420_bgr32_mmx; #endif break; - } else return yuv420_bgr32_MMX; - case AV_PIX_FMT_RGB24: return yuv420_rgb24_MMX; - case AV_PIX_FMT_BGR24: return yuv420_bgr24_MMX; - case AV_PIX_FMT_RGB565: return yuv420_rgb16_MMX; - case AV_PIX_FMT_RGB555: return yuv420_rgb15_MMX; + } else + return yuv420_bgr32_mmx; + case AV_PIX_FMT_RGB24: + return yuv420_rgb24_mmx; + case AV_PIX_FMT_BGR24: + return yuv420_bgr24_mmx; + case AV_PIX_FMT_RGB565: + return yuv420_rgb16_mmx; + case AV_PIX_FMT_RGB555: + return yuv420_rgb15_mmx; } } -#endif /* HAVE_INLINE_ASM */ +#endif /* HAVE_MMX_INLINE */ return NULL; } |
