summaryrefslogtreecommitdiff
path: root/ffmpeg/libswscale/x86
diff options
context:
space:
mode:
authorTim Redfern <tim@eclectronics.org>2013-12-29 12:19:38 +0000
committerTim Redfern <tim@eclectronics.org>2013-12-29 12:19:38 +0000
commitf7813a5324be39d13ab536c245d15dfc602a7849 (patch)
treefad99148b88823d34a5df2f0a25881a002eb291b /ffmpeg/libswscale/x86
parentb7a5a477b8ff4d4e3028b9dfb9a9df0a41463f92 (diff)
basic type mechanism working
Diffstat (limited to 'ffmpeg/libswscale/x86')
-rw-r--r--ffmpeg/libswscale/x86/Makefile6
-rw-r--r--ffmpeg/libswscale/x86/input.asm92
-rw-r--r--ffmpeg/libswscale/x86/output.asm8
-rw-r--r--ffmpeg/libswscale/x86/rgb2rgb.c29
-rw-r--r--ffmpeg/libswscale/x86/rgb2rgb_template.c89
-rw-r--r--ffmpeg/libswscale/x86/scale.asm8
-rw-r--r--ffmpeg/libswscale/x86/swscale.c51
-rw-r--r--ffmpeg/libswscale/x86/swscale_template.c2
-rw-r--r--ffmpeg/libswscale/x86/w64xmmtest.c8
-rw-r--r--ffmpeg/libswscale/x86/yuv2rgb.c37
10 files changed, 201 insertions, 129 deletions
diff --git a/ffmpeg/libswscale/x86/Makefile b/ffmpeg/libswscale/x86/Makefile
index 7d219b4..e767a5c 100644
--- a/ffmpeg/libswscale/x86/Makefile
+++ b/ffmpeg/libswscale/x86/Makefile
@@ -1,11 +1,11 @@
$(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS)
-OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
-
-MMX-OBJS += x86/rgb2rgb.o \
+OBJS += x86/rgb2rgb.o \
x86/swscale.o \
x86/yuv2rgb.o \
+OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
+
YASM-OBJS += x86/input.o \
x86/output.o \
x86/scale.o \
diff --git a/ffmpeg/libswscale/x86/input.asm b/ffmpeg/libswscale/x86/input.asm
index 9d5a871..0c4f30e 100644
--- a/ffmpeg/libswscale/x86/input.asm
+++ b/ffmpeg/libswscale/x86/input.asm
@@ -4,20 +4,20 @@
;* into YUV planes also.
;* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
@@ -37,31 +37,57 @@ SECTION_RODATA
rgb_Yrnd: times 4 dd 0x80100 ; 16.5 << 15
rgb_UVrnd: times 4 dd 0x400100 ; 128.5 << 15
-bgr_Ycoeff_12x4: times 2 dw BY, GY, 0, BY
-bgr_Ycoeff_3x56: times 2 dw RY, 0, GY, RY
-rgb_Ycoeff_12x4: times 2 dw RY, GY, 0, RY
-rgb_Ycoeff_3x56: times 2 dw BY, 0, GY, BY
-bgr_Ucoeff_12x4: times 2 dw BU, GU, 0, BU
-bgr_Ucoeff_3x56: times 2 dw RU, 0, GU, RU
-rgb_Ucoeff_12x4: times 2 dw RU, GU, 0, RU
-rgb_Ucoeff_3x56: times 2 dw BU, 0, GU, BU
-bgr_Vcoeff_12x4: times 2 dw BV, GV, 0, BV
-bgr_Vcoeff_3x56: times 2 dw RV, 0, GV, RV
-rgb_Vcoeff_12x4: times 2 dw RV, GV, 0, RV
-rgb_Vcoeff_3x56: times 2 dw BV, 0, GV, BV
-
-rgba_Ycoeff_rb: times 4 dw RY, BY
-rgba_Ycoeff_br: times 4 dw BY, RY
-rgba_Ycoeff_ga: times 4 dw GY, 0
-rgba_Ycoeff_ag: times 4 dw 0, GY
-rgba_Ucoeff_rb: times 4 dw RU, BU
-rgba_Ucoeff_br: times 4 dw BU, RU
-rgba_Ucoeff_ga: times 4 dw GU, 0
-rgba_Ucoeff_ag: times 4 dw 0, GU
-rgba_Vcoeff_rb: times 4 dw RV, BV
-rgba_Vcoeff_br: times 4 dw BV, RV
-rgba_Vcoeff_ga: times 4 dw GV, 0
-rgba_Vcoeff_ag: times 4 dw 0, GV
+%define bgr_Ycoeff_12x4 16*4 + 16* 0 + tableq
+%define bgr_Ycoeff_3x56 16*4 + 16* 1 + tableq
+%define rgb_Ycoeff_12x4 16*4 + 16* 2 + tableq
+%define rgb_Ycoeff_3x56 16*4 + 16* 3 + tableq
+%define bgr_Ucoeff_12x4 16*4 + 16* 4 + tableq
+%define bgr_Ucoeff_3x56 16*4 + 16* 5 + tableq
+%define rgb_Ucoeff_12x4 16*4 + 16* 6 + tableq
+%define rgb_Ucoeff_3x56 16*4 + 16* 7 + tableq
+%define bgr_Vcoeff_12x4 16*4 + 16* 8 + tableq
+%define bgr_Vcoeff_3x56 16*4 + 16* 9 + tableq
+%define rgb_Vcoeff_12x4 16*4 + 16*10 + tableq
+%define rgb_Vcoeff_3x56 16*4 + 16*11 + tableq
+
+%define rgba_Ycoeff_rb 16*4 + 16*12 + tableq
+%define rgba_Ycoeff_br 16*4 + 16*13 + tableq
+%define rgba_Ycoeff_ga 16*4 + 16*14 + tableq
+%define rgba_Ycoeff_ag 16*4 + 16*15 + tableq
+%define rgba_Ucoeff_rb 16*4 + 16*16 + tableq
+%define rgba_Ucoeff_br 16*4 + 16*17 + tableq
+%define rgba_Ucoeff_ga 16*4 + 16*18 + tableq
+%define rgba_Ucoeff_ag 16*4 + 16*19 + tableq
+%define rgba_Vcoeff_rb 16*4 + 16*20 + tableq
+%define rgba_Vcoeff_br 16*4 + 16*21 + tableq
+%define rgba_Vcoeff_ga 16*4 + 16*22 + tableq
+%define rgba_Vcoeff_ag 16*4 + 16*23 + tableq
+
+; bgr_Ycoeff_12x4: times 2 dw BY, GY, 0, BY
+; bgr_Ycoeff_3x56: times 2 dw RY, 0, GY, RY
+; rgb_Ycoeff_12x4: times 2 dw RY, GY, 0, RY
+; rgb_Ycoeff_3x56: times 2 dw BY, 0, GY, BY
+; bgr_Ucoeff_12x4: times 2 dw BU, GU, 0, BU
+; bgr_Ucoeff_3x56: times 2 dw RU, 0, GU, RU
+; rgb_Ucoeff_12x4: times 2 dw RU, GU, 0, RU
+; rgb_Ucoeff_3x56: times 2 dw BU, 0, GU, BU
+; bgr_Vcoeff_12x4: times 2 dw BV, GV, 0, BV
+; bgr_Vcoeff_3x56: times 2 dw RV, 0, GV, RV
+; rgb_Vcoeff_12x4: times 2 dw RV, GV, 0, RV
+; rgb_Vcoeff_3x56: times 2 dw BV, 0, GV, BV
+
+; rgba_Ycoeff_rb: times 4 dw RY, BY
+; rgba_Ycoeff_br: times 4 dw BY, RY
+; rgba_Ycoeff_ga: times 4 dw GY, 0
+; rgba_Ycoeff_ag: times 4 dw 0, GY
+; rgba_Ucoeff_rb: times 4 dw RU, BU
+; rgba_Ucoeff_br: times 4 dw BU, RU
+; rgba_Ucoeff_ga: times 4 dw GU, 0
+; rgba_Ucoeff_ag: times 4 dw 0, GU
+; rgba_Vcoeff_rb: times 4 dw RV, BV
+; rgba_Vcoeff_br: times 4 dw BV, RV
+; rgba_Vcoeff_ga: times 4 dw GV, 0
+; rgba_Vcoeff_ag: times 4 dw 0, GV
shuf_rgb_12x4: db 0, 0x80, 1, 0x80, 2, 0x80, 3, 0x80, \
6, 0x80, 7, 0x80, 8, 0x80, 9, 0x80
@@ -82,7 +108,7 @@ SECTION .text
; %1 = nr. of XMM registers
; %2 = rgb or bgr
%macro RGB24_TO_Y_FN 2-3
-cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, u3
+cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table
%if mmsize == 8
mova m5, [%2_Ycoeff_12x4]
mova m6, [%2_Ycoeff_3x56]
@@ -171,7 +197,7 @@ cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, u3
; %1 = nr. of XMM registers
; %2 = rgb or bgr
%macro RGB24_TO_UV_FN 2-3
-cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, u3
+cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table
%if ARCH_X86_64
mova m8, [%2_Ucoeff_12x4]
mova m9, [%2_Ucoeff_3x56]
@@ -311,7 +337,7 @@ RGB24_FUNCS 11, 13
; %1 = nr. of XMM registers
; %2-5 = rgba, bgra, argb or abgr (in individual characters)
%macro RGB32_TO_Y_FN 5-6
-cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, u3
+cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, table
mova m5, [rgba_Ycoeff_%2%4]
mova m6, [rgba_Ycoeff_%3%5]
%if %0 == 6
@@ -354,7 +380,7 @@ cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, u3
; %1 = nr. of XMM registers
; %2-5 = rgba, bgra, argb or abgr (in individual characters)
%macro RGB32_TO_UV_FN 5-6
-cglobal %2%3%4%5 %+ ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, u3
+cglobal %2%3%4%5 %+ ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table
%if ARCH_X86_64
mova m8, [rgba_Ucoeff_%2%4]
mova m9, [rgba_Ucoeff_%3%5]
diff --git a/ffmpeg/libswscale/x86/output.asm b/ffmpeg/libswscale/x86/output.asm
index f9add35..9ea4af9 100644
--- a/ffmpeg/libswscale/x86/output.asm
+++ b/ffmpeg/libswscale/x86/output.asm
@@ -3,20 +3,20 @@
;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
;* Kieran Kunhya <kieran@kunhya.com>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
diff --git a/ffmpeg/libswscale/x86/rgb2rgb.c b/ffmpeg/libswscale/x86/rgb2rgb.c
index 1e20176..8cc99c6 100644
--- a/ffmpeg/libswscale/x86/rgb2rgb.c
+++ b/ffmpeg/libswscale/x86/rgb2rgb.c
@@ -76,7 +76,6 @@ DECLARE_ASM_CONST(8, uint64_t, mul15_mid) = 0x4200420042004200ULL;
DECLARE_ASM_CONST(8, uint64_t, mul15_hi) = 0x0210021002100210ULL;
DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL;
-#define RGB2YUV_SHIFT 8
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
@@ -92,35 +91,45 @@ DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL;
#define COMPILE_TEMPLATE_MMXEXT 0
#define COMPILE_TEMPLATE_AMD3DNOW 0
#define COMPILE_TEMPLATE_SSE2 0
+#define COMPILE_TEMPLATE_AVX 0
//MMX versions
#undef RENAME
-#define RENAME(a) a ## _MMX
+#define RENAME(a) a ## _mmx
#include "rgb2rgb_template.c"
// MMXEXT versions
#undef RENAME
#undef COMPILE_TEMPLATE_MMXEXT
#define COMPILE_TEMPLATE_MMXEXT 1
-#define RENAME(a) a ## _MMXEXT
+#define RENAME(a) a ## _mmxext
#include "rgb2rgb_template.c"
//SSE2 versions
#undef RENAME
#undef COMPILE_TEMPLATE_SSE2
#define COMPILE_TEMPLATE_SSE2 1
-#define RENAME(a) a ## _SSE2
+#define RENAME(a) a ## _sse2
+#include "rgb2rgb_template.c"
+
+//AVX versions
+#undef RENAME
+#undef COMPILE_TEMPLATE_AVX
+#define COMPILE_TEMPLATE_AVX 1
+#define RENAME(a) a ## _avx
#include "rgb2rgb_template.c"
//3DNOW versions
#undef RENAME
#undef COMPILE_TEMPLATE_MMXEXT
#undef COMPILE_TEMPLATE_SSE2
+#undef COMPILE_TEMPLATE_AVX
#undef COMPILE_TEMPLATE_AMD3DNOW
#define COMPILE_TEMPLATE_MMXEXT 0
#define COMPILE_TEMPLATE_SSE2 0
+#define COMPILE_TEMPLATE_AVX 0
#define COMPILE_TEMPLATE_AMD3DNOW 1
-#define RENAME(a) a ## _3DNOW
+#define RENAME(a) a ## _3dnow
#include "rgb2rgb_template.c"
/*
@@ -138,12 +147,14 @@ av_cold void rgb2rgb_init_x86(void)
int cpu_flags = av_get_cpu_flags();
if (INLINE_MMX(cpu_flags))
- rgb2rgb_init_MMX();
+ rgb2rgb_init_mmx();
if (INLINE_AMD3DNOW(cpu_flags))
- rgb2rgb_init_3DNOW();
+ rgb2rgb_init_3dnow();
if (INLINE_MMXEXT(cpu_flags))
- rgb2rgb_init_MMXEXT();
+ rgb2rgb_init_mmxext();
if (INLINE_SSE2(cpu_flags))
- rgb2rgb_init_SSE2();
+ rgb2rgb_init_sse2();
+ if (INLINE_AVX(cpu_flags))
+ rgb2rgb_init_avx();
#endif /* HAVE_INLINE_ASM */
}
diff --git a/ffmpeg/libswscale/x86/rgb2rgb_template.c b/ffmpeg/libswscale/x86/rgb2rgb_template.c
index d802ab4..d58219b 100644
--- a/ffmpeg/libswscale/x86/rgb2rgb_template.c
+++ b/ffmpeg/libswscale/x86/rgb2rgb_template.c
@@ -26,6 +26,8 @@
#include <stddef.h>
+#include "libavutil/attributes.h"
+
#undef PREFETCH
#undef MOVNTQ
#undef EMMS
@@ -1610,10 +1612,15 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
* others are ignored in the C version.
* FIXME: Write HQ version.
*/
+#if HAVE_7REGS
static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
int width, int height,
- int lumStride, int chromStride, int srcStride)
+ int lumStride, int chromStride, int srcStride,
+ int32_t *rgb2yuv)
{
+#define BGR2Y_IDX "16*4+16*32"
+#define BGR2U_IDX "16*4+16*33"
+#define BGR2V_IDX "16*4+16*34"
int y;
const x86_reg chromWidth= width>>1;
for (y=0; y<height-2; y+=2) {
@@ -1621,7 +1628,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
for (i=0; i<2; i++) {
__asm__ volatile(
"mov %2, %%"REG_a" \n\t"
- "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t"
+ "movq "BGR2Y_IDX"(%3), %%mm6 \n\t"
"movq "MANGLE(ff_w1111)", %%mm5 \n\t"
"pxor %%mm7, %%mm7 \n\t"
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
@@ -1640,12 +1647,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"pmaddwd %%mm6, %%mm1 \n\t"
"pmaddwd %%mm6, %%mm2 \n\t"
"pmaddwd %%mm6, %%mm3 \n\t"
-#ifndef FAST_BGR2YV12
"psrad $8, %%mm0 \n\t"
"psrad $8, %%mm1 \n\t"
"psrad $8, %%mm2 \n\t"
"psrad $8, %%mm3 \n\t"
-#endif
"packssdw %%mm1, %%mm0 \n\t"
"packssdw %%mm3, %%mm2 \n\t"
"pmaddwd %%mm5, %%mm0 \n\t"
@@ -1665,12 +1670,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"pmaddwd %%mm6, %%mm1 \n\t"
"pmaddwd %%mm6, %%mm2 \n\t"
"pmaddwd %%mm6, %%mm3 \n\t"
-#ifndef FAST_BGR2YV12
"psrad $8, %%mm4 \n\t"
"psrad $8, %%mm1 \n\t"
"psrad $8, %%mm2 \n\t"
"psrad $8, %%mm3 \n\t"
-#endif
"packssdw %%mm1, %%mm4 \n\t"
"packssdw %%mm3, %%mm2 \n\t"
"pmaddwd %%mm5, %%mm4 \n\t"
@@ -1685,7 +1688,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t"
"add $8, %%"REG_a" \n\t"
" js 1b \n\t"
- : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width)
+ : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width), "r"(rgb2yuv)
: "%"REG_a, "%"REG_d
);
ydst += lumStride;
@@ -1695,7 +1698,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
__asm__ volatile(
"mov %4, %%"REG_a" \n\t"
"movq "MANGLE(ff_w1111)", %%mm5 \n\t"
- "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t"
+ "movq "BGR2U_IDX"(%5), %%mm6 \n\t"
"pxor %%mm7, %%mm7 \n\t"
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
"add %%"REG_d", %%"REG_d" \n\t"
@@ -1744,19 +1747,17 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"psrlw $2, %%mm0 \n\t"
"psrlw $2, %%mm2 \n\t"
#endif
- "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t"
- "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t"
+ "movq "BGR2V_IDX"(%5), %%mm1 \n\t"
+ "movq "BGR2V_IDX"(%5), %%mm3 \n\t"
"pmaddwd %%mm0, %%mm1 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t"
"pmaddwd %%mm6, %%mm0 \n\t"
"pmaddwd %%mm6, %%mm2 \n\t"
-#ifndef FAST_BGR2YV12
"psrad $8, %%mm0 \n\t"
"psrad $8, %%mm1 \n\t"
"psrad $8, %%mm2 \n\t"
"psrad $8, %%mm3 \n\t"
-#endif
"packssdw %%mm2, %%mm0 \n\t"
"packssdw %%mm3, %%mm1 \n\t"
"pmaddwd %%mm5, %%mm0 \n\t"
@@ -1806,19 +1807,17 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"psrlw $2, %%mm4 \n\t"
"psrlw $2, %%mm2 \n\t"
#endif
- "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t"
- "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t"
+ "movq "BGR2V_IDX"(%5), %%mm1 \n\t"
+ "movq "BGR2V_IDX"(%5), %%mm3 \n\t"
"pmaddwd %%mm4, %%mm1 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t"
"pmaddwd %%mm6, %%mm4 \n\t"
"pmaddwd %%mm6, %%mm2 \n\t"
-#ifndef FAST_BGR2YV12
"psrad $8, %%mm4 \n\t"
"psrad $8, %%mm1 \n\t"
"psrad $8, %%mm2 \n\t"
"psrad $8, %%mm3 \n\t"
-#endif
"packssdw %%mm2, %%mm4 \n\t"
"packssdw %%mm3, %%mm1 \n\t"
"pmaddwd %%mm5, %%mm4 \n\t"
@@ -1837,7 +1836,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"movd %%mm0, (%3, %%"REG_a") \n\t"
"add $4, %%"REG_a" \n\t"
" js 1b \n\t"
- : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth)
+ : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth), "r"(rgb2yuv)
: "%"REG_a, "%"REG_d
);
@@ -1850,11 +1849,12 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
SFENCE" \n\t"
:::"memory");
- rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride);
+ ff_rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride, rgb2yuv);
}
+#endif /* HAVE_7REGS */
#endif /* !COMPILE_TEMPLATE_SSE2 */
-#if !COMPILE_TEMPLATE_AMD3DNOW
+#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX
static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest,
int width, int height, int src1Stride,
int src2Stride, int dstStride)
@@ -1924,7 +1924,35 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
::: "memory"
);
}
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX*/
+
+#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL
+#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_YASM
+void RENAME(ff_nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
+ const uint8_t *unused0,
+ const uint8_t *src1,
+ const uint8_t *src2,
+ int w, uint32_t *unused);
+static void RENAME(deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2,
+ int width, int height, int srcStride,
+ int dst1Stride, int dst2Stride)
+{
+ int h;
+
+ for (h=0; h < height; h++) {
+ RENAME(ff_nv12ToUV)(dst1, dst2, NULL, src, NULL, width, NULL);
+ src += srcStride;
+ dst1 += dst1Stride;
+ dst2 += dst2Stride;
+ }
+ __asm__(
+ EMMS" \n\t"
+ SFENCE" \n\t"
+ ::: "memory"
+ );
+}
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
+#endif /* !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL */
#if !COMPILE_TEMPLATE_SSE2
#if !COMPILE_TEMPLATE_AMD3DNOW
@@ -2354,7 +2382,7 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
int lumStride, int chromStride, int srcStride)
{
int y;
- const int chromWidth= -((-width)>>1);
+ const int chromWidth = FF_CEIL_RSHIFT(width, 1);
for (y=0; y<height; y++) {
RENAME(extract_even)(src, ydst, width);
@@ -2380,7 +2408,7 @@ static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
int lumStride, int chromStride, int srcStride)
{
int y;
- const int chromWidth= -((-width)>>1);
+ const int chromWidth = FF_CEIL_RSHIFT(width, 1);
for (y=0; y<height; y++) {
RENAME(extract_even)(src, ydst, width);
@@ -2404,7 +2432,7 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
int lumStride, int chromStride, int srcStride)
{
int y;
- const int chromWidth= -((-width)>>1);
+ const int chromWidth = FF_CEIL_RSHIFT(width, 1);
for (y=0; y<height; y++) {
RENAME(extract_even)(src+1, ydst, width);
@@ -2430,7 +2458,7 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
int lumStride, int chromStride, int srcStride)
{
int y;
- const int chromWidth= -((-width)>>1);
+ const int chromWidth = FF_CEIL_RSHIFT(width, 1);
for (y=0; y<height; y++) {
RENAME(extract_even)(src+1, ydst, width);
@@ -2450,7 +2478,7 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
#endif /* !COMPILE_TEMPLATE_SSE2 */
-static inline void RENAME(rgb2rgb_init)(void)
+static av_cold void RENAME(rgb2rgb_init)(void)
{
#if !COMPILE_TEMPLATE_SSE2
#if !COMPILE_TEMPLATE_AMD3DNOW
@@ -2486,13 +2514,20 @@ static inline void RENAME(rgb2rgb_init)(void)
#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
planar2x = RENAME(planar2x);
#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */
- rgb24toyv12 = RENAME(rgb24toyv12);
+#if HAVE_7REGS
+ ff_rgb24toyv12 = RENAME(rgb24toyv12);
+#endif /* HAVE_7REGS */
yuyvtoyuv420 = RENAME(yuyvtoyuv420);
uyvytoyuv420 = RENAME(uyvytoyuv420);
#endif /* !COMPILE_TEMPLATE_SSE2 */
-#if !COMPILE_TEMPLATE_AMD3DNOW
+#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX
interleaveBytes = RENAME(interleaveBytes);
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX*/
+#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL
+#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_YASM
+ deinterleaveBytes = RENAME(deinterleaveBytes);
+#endif
+#endif
}
diff --git a/ffmpeg/libswscale/x86/scale.asm b/ffmpeg/libswscale/x86/scale.asm
index c6dafde..940f357 100644
--- a/ffmpeg/libswscale/x86/scale.asm
+++ b/ffmpeg/libswscale/x86/scale.asm
@@ -2,20 +2,20 @@
;* x86-optimized horizontal line scaling functions
;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
diff --git a/ffmpeg/libswscale/x86/swscale.c b/ffmpeg/libswscale/x86/swscale.c
index 2f67b1b..2f7e4f7 100644
--- a/ffmpeg/libswscale/x86/swscale.c
+++ b/ffmpeg/libswscale/x86/swscale.c
@@ -58,15 +58,10 @@ DECLARE_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
DECLARE_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
DECLARE_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
-#ifdef FAST_BGR2YV12
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000000210041000DULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000FFEEFFDC0038ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00000038FFD2FFF8ULL;
-#else
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL;
-#endif /* FAST_BGR2YV12 */
+
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
@@ -76,7 +71,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
#if HAVE_MMX_INLINE
#undef RENAME
#define COMPILE_TEMPLATE_MMXEXT 0
-#define RENAME(a) a ## _MMX
+#define RENAME(a) a ## _mmx
#include "swscale_template.c"
#endif
@@ -85,7 +80,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
#undef RENAME
#undef COMPILE_TEMPLATE_MMXEXT
#define COMPILE_TEMPLATE_MMXEXT 1
-#define RENAME(a) a ## _MMXEXT
+#define RENAME(a) a ## _mmxext
#include "swscale_template.c"
#endif
@@ -211,7 +206,7 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
const uint8_t *dither, int offset)
{
if(((int)dest) & 15){
- return yuv2yuvX_MMXEXT(filter, filterSize, src, dest, dstW, dither, offset);
+ return yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset);
}
if (offset) {
__asm__ volatile("movq (%0), %%xmm3\n\t"
@@ -279,7 +274,7 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
#endif /* HAVE_INLINE_ASM */
#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
-extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
+void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
SwsContext *c, int16_t *data, \
int dstW, const uint8_t *src, \
const int16_t *filter, \
@@ -318,9 +313,9 @@ SCALE_FUNCS_SSE(ssse3);
SCALE_FUNCS_SSE(sse4);
#define VSCALEX_FUNC(size, opt) \
-extern void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
- const int16_t **src, uint8_t *dest, int dstW, \
- const uint8_t *dither, int offset)
+void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
+ const int16_t **src, uint8_t *dest, int dstW, \
+ const uint8_t *dither, int offset)
#define VSCALEX_FUNCS(opt) \
VSCALEX_FUNC(8, opt); \
VSCALEX_FUNC(9, opt); \
@@ -335,8 +330,8 @@ VSCALEX_FUNC(16, sse4);
VSCALEX_FUNCS(avx);
#define VSCALE_FUNC(size, opt) \
-extern void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
- const uint8_t *dither, int offset)
+void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
+ const uint8_t *dither, int offset)
#define VSCALE_FUNCS(opt1, opt2) \
VSCALE_FUNC(8, opt1); \
VSCALE_FUNC(9, opt2); \
@@ -351,15 +346,15 @@ VSCALE_FUNC(16, sse4);
VSCALE_FUNCS(avx, avx);
#define INPUT_Y_FUNC(fmt, opt) \
-extern void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
- const uint8_t *unused1, const uint8_t *unused2, \
- int w, uint32_t *unused)
+void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
+ const uint8_t *unused1, const uint8_t *unused2, \
+ int w, uint32_t *unused)
#define INPUT_UV_FUNC(fmt, opt) \
-extern void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
- const uint8_t *unused0, \
- const uint8_t *src1, \
- const uint8_t *src2, \
- int w, uint32_t *unused)
+void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
+ const uint8_t *unused0, \
+ const uint8_t *src1, \
+ const uint8_t *src2, \
+ int w, uint32_t *unused)
#define INPUT_FUNC(fmt, opt) \
INPUT_Y_FUNC(fmt, opt); \
INPUT_UV_FUNC(fmt, opt)
@@ -382,22 +377,22 @@ INPUT_FUNCS(sse2);
INPUT_FUNCS(ssse3);
INPUT_FUNCS(avx);
-av_cold void ff_sws_init_swScale_mmx(SwsContext *c)
+av_cold void ff_sws_init_swscale_x86(SwsContext *c)
{
int cpu_flags = av_get_cpu_flags();
-#if HAVE_INLINE_ASM
+#if HAVE_MMX_INLINE
if (cpu_flags & AV_CPU_FLAG_MMX)
- sws_init_swScale_MMX(c);
+ sws_init_swscale_mmx(c);
+#endif
#if HAVE_MMXEXT_INLINE
if (cpu_flags & AV_CPU_FLAG_MMXEXT)
- sws_init_swScale_MMXEXT(c);
+ sws_init_swscale_mmxext(c);
if (cpu_flags & AV_CPU_FLAG_SSE3){
if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND))
c->yuv2planeX = yuv2yuvX_sse3;
}
#endif
-#endif /* HAVE_INLINE_ASM */
#define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
if (c->srcBpc == 8) { \
diff --git a/ffmpeg/libswscale/x86/swscale_template.c b/ffmpeg/libswscale/x86/swscale_template.c
index f2567c1..c7a1bb4 100644
--- a/ffmpeg/libswscale/x86/swscale_template.c
+++ b/ffmpeg/libswscale/x86/swscale_template.c
@@ -1640,7 +1640,7 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
}
#endif /* COMPILE_TEMPLATE_MMXEXT */
-static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
+static av_cold void RENAME(sws_init_swscale)(SwsContext *c)
{
enum AVPixelFormat dstFormat = c->dstFormat;
diff --git a/ffmpeg/libswscale/x86/w64xmmtest.c b/ffmpeg/libswscale/x86/w64xmmtest.c
index dd9a2a4..88143d9 100644
--- a/ffmpeg/libswscale/x86/w64xmmtest.c
+++ b/ffmpeg/libswscale/x86/w64xmmtest.c
@@ -2,20 +2,20 @@
* check XMM registers for clobbers on Win64
* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/ffmpeg/libswscale/x86/yuv2rgb.c b/ffmpeg/libswscale/x86/yuv2rgb.c
index 3938e6b..e4315ef 100644
--- a/ffmpeg/libswscale/x86/yuv2rgb.c
+++ b/ffmpeg/libswscale/x86/yuv2rgb.c
@@ -27,7 +27,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>
-#include <assert.h>
#include "config.h"
#include "libswscale/rgb2rgb.h"
@@ -54,7 +53,7 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
#undef RENAME
#undef COMPILE_TEMPLATE_MMXEXT
#define COMPILE_TEMPLATE_MMXEXT 0
-#define RENAME(a) a ## _MMX
+#define RENAME(a) a ## _mmx
#include "yuv2rgb_template.c"
#endif /* HAVE_MMX_INLINE */
@@ -63,24 +62,24 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
#undef RENAME
#undef COMPILE_TEMPLATE_MMXEXT
#define COMPILE_TEMPLATE_MMXEXT 1
-#define RENAME(a) a ## _MMXEXT
+#define RENAME(a) a ## _mmxext
#include "yuv2rgb_template.c"
#endif /* HAVE_MMXEXT_INLINE */
#endif /* HAVE_INLINE_ASM */
-av_cold SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
+av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
{
-#if HAVE_INLINE_ASM
+#if HAVE_MMX_INLINE
int cpu_flags = av_get_cpu_flags();
#if HAVE_MMXEXT_INLINE
if (cpu_flags & AV_CPU_FLAG_MMXEXT) {
switch (c->dstFormat) {
case AV_PIX_FMT_RGB24:
- return yuv420_rgb24_MMXEXT;
+ return yuv420_rgb24_mmxext;
case AV_PIX_FMT_BGR24:
- return yuv420_bgr24_MMXEXT;
+ return yuv420_bgr24_mmxext;
}
}
#endif
@@ -90,24 +89,30 @@ av_cold SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
case AV_PIX_FMT_RGB32:
if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
- return yuva420_rgb32_MMX;
+ return yuva420_rgb32_mmx;
#endif
break;
- } else return yuv420_rgb32_MMX;
+ } else
+ return yuv420_rgb32_mmx;
case AV_PIX_FMT_BGR32:
if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
- return yuva420_bgr32_MMX;
+ return yuva420_bgr32_mmx;
#endif
break;
- } else return yuv420_bgr32_MMX;
- case AV_PIX_FMT_RGB24: return yuv420_rgb24_MMX;
- case AV_PIX_FMT_BGR24: return yuv420_bgr24_MMX;
- case AV_PIX_FMT_RGB565: return yuv420_rgb16_MMX;
- case AV_PIX_FMT_RGB555: return yuv420_rgb15_MMX;
+ } else
+ return yuv420_bgr32_mmx;
+ case AV_PIX_FMT_RGB24:
+ return yuv420_rgb24_mmx;
+ case AV_PIX_FMT_BGR24:
+ return yuv420_bgr24_mmx;
+ case AV_PIX_FMT_RGB565:
+ return yuv420_rgb16_mmx;
+ case AV_PIX_FMT_RGB555:
+ return yuv420_rgb15_mmx;
}
}
-#endif /* HAVE_INLINE_ASM */
+#endif /* HAVE_MMX_INLINE */
return NULL;
}