diff options
| author | Tim Redfern <tim@eclectronics.org> | 2013-12-29 12:19:38 +0000 |
|---|---|---|
| committer | Tim Redfern <tim@eclectronics.org> | 2013-12-29 12:19:38 +0000 |
| commit | f7813a5324be39d13ab536c245d15dfc602a7849 (patch) | |
| tree | fad99148b88823d34a5df2f0a25881a002eb291b /ffmpeg/libswscale | |
| parent | b7a5a477b8ff4d4e3028b9dfb9a9df0a41463f92 (diff) | |
basic type mechanism working
Diffstat (limited to 'ffmpeg/libswscale')
32 files changed, 2063 insertions, 894 deletions
diff --git a/ffmpeg/libswscale/Makefile b/ffmpeg/libswscale/Makefile index dd00f7d..ca6e27d 100644 --- a/ffmpeg/libswscale/Makefile +++ b/ffmpeg/libswscale/Makefile @@ -15,5 +15,8 @@ OBJS = input.o \ utils.o \ yuv2rgb.o \ +# Windows resource file +SLIBOBJS-$(HAVE_GNU_WINDRES) += swscaleres.o + TESTPROGS = colorspace \ swscale \ diff --git a/ffmpeg/libswscale/bfin/swscale_bfin.c b/ffmpeg/libswscale/bfin/swscale_bfin.c index 2b93858..33c3ec5 100644 --- a/ffmpeg/libswscale/bfin/swscale_bfin.c +++ b/ffmpeg/libswscale/bfin/swscale_bfin.c @@ -23,6 +23,7 @@ #include <stdint.h> #include "config.h" +#include "libavutil/attributes.h" #include "libswscale/swscale_internal.h" #if defined (__FDPIC__) && CONFIG_SRAM @@ -71,16 +72,16 @@ static int yuyvtoyv12_unscaled(SwsContext *c, const uint8_t *src[], return srcSliceH; } -void ff_bfin_get_unscaled_swscale(SwsContext *c) +av_cold void ff_get_unscaled_swscale_bfin(SwsContext *c) { if (c->dstFormat == AV_PIX_FMT_YUV420P && c->srcFormat == AV_PIX_FMT_UYVY422) { av_log(NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized uyvytoyv12_unscaled\n"); - c->swScale = uyvytoyv12_unscaled; + c->swscale = uyvytoyv12_unscaled; } if (c->dstFormat == AV_PIX_FMT_YUV420P && c->srcFormat == AV_PIX_FMT_YUYV422) { av_log(NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized yuyvtoyv12_unscaled\n"); - c->swScale = yuyvtoyv12_unscaled; + c->swscale = yuyvtoyv12_unscaled; } } diff --git a/ffmpeg/libswscale/bfin/yuv2rgb_bfin.c b/ffmpeg/libswscale/bfin/yuv2rgb_bfin.c index e1b7afa..2a36ad5 100644 --- a/ffmpeg/libswscale/bfin/yuv2rgb_bfin.c +++ b/ffmpeg/libswscale/bfin/yuv2rgb_bfin.c @@ -25,6 +25,7 @@ #include <stdint.h> #include "config.h" +#include "libavutil/attributes.h" #include "libswscale/swscale_internal.h" #if defined(__FDPIC__) && CONFIG_SRAM @@ -168,7 +169,7 @@ static int bfin_yuv420_bgr565(SwsContext *c, const uint8_t **in, int *instrides, outstrides, ff_bfin_yuv2rgb565_line, 0, 565); } -SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c) +av_cold SwsFunc ff_yuv2rgb_init_bfin(SwsContext *c) { SwsFunc f; diff --git a/ffmpeg/libswscale/input.c b/ffmpeg/libswscale/input.c index 2def2de..919b232 100644 --- a/ffmpeg/libswscale/input.c +++ b/ffmpeg/libswscale/input.c @@ -18,7 +18,6 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include <assert.h> #include <math.h> #include <stdint.h> #include <stdio.h> @@ -36,17 +35,6 @@ #include "swscale.h" #include "swscale_internal.h" -#define RGB2YUV_SHIFT 15 -#define BY ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define BV (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define BU ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define GY ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define GV (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define GU (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define RY ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define RV ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define RU (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) - #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) #define r ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE) ? b_r : r_b) @@ -54,79 +42,84 @@ static av_always_inline void rgb64ToY_c_template(uint16_t *dst, const uint16_t *src, int width, - enum AVPixelFormat origin) + enum AVPixelFormat origin, int32_t *rgb2yuv) { + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; int i; for (i = 0; i < width; i++) { unsigned int r_b = input_pixel(&src[i*4+0]); unsigned int g = input_pixel(&src[i*4+1]); unsigned int b_r = input_pixel(&src[i*4+2]); - dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; } } static av_always_inline void rgb64ToUV_c_template(uint16_t *dstU, uint16_t *dstV, const uint16_t *src1, const uint16_t *src2, - int width, enum AVPixelFormat origin) + int width, enum AVPixelFormat origin, int32_t *rgb2yuv) { int i; + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; av_assert1(src1==src2); for (i = 0; i < width; i++) { int r_b = input_pixel(&src1[i*4+0]); int g = input_pixel(&src1[i*4+1]); int b_r = input_pixel(&src1[i*4+2]); - dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; - dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; } } static av_always_inline void rgb64ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV, const uint16_t *src1, const uint16_t *src2, - int width, enum AVPixelFormat origin) + int width, enum AVPixelFormat origin, int32_t *rgb2yuv) { int i; + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; av_assert1(src1==src2); for (i = 0; i < width; i++) { int r_b = (input_pixel(&src1[8 * i + 0]) + input_pixel(&src1[8 * i + 4]) + 1) >> 1; int g = (input_pixel(&src1[8 * i + 1]) + input_pixel(&src1[8 * i + 5]) + 1) >> 1; int b_r = (input_pixel(&src1[8 * i + 2]) + input_pixel(&src1[8 * i + 6]) + 1) >> 1; - dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; - dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstU[i]= (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstV[i]= (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; } } #define rgb64funcs(pattern, BE_LE, origin) \ static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\ - int width, uint32_t *unused) \ + int width, uint32_t *rgb2yuv) \ { \ const uint16_t *src = (const uint16_t *) _src; \ uint16_t *dst = (uint16_t *) _dst; \ - rgb64ToY_c_template(dst, src, width, origin); \ + rgb64ToY_c_template(dst, src, width, origin, rgb2yuv); \ } \ \ static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \ const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \ - int width, uint32_t *unused) \ + int width, uint32_t *rgb2yuv) \ { \ const uint16_t *src1 = (const uint16_t *) _src1, \ *src2 = (const uint16_t *) _src2; \ uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \ - rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin); \ + rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ } \ \ static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \ const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \ - int width, uint32_t *unused) \ + int width, uint32_t *rgb2yuv) \ { \ const uint16_t *src1 = (const uint16_t *) _src1, \ *src2 = (const uint16_t *) _src2; \ uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \ - rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \ + rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ } rgb64funcs(rgb, LE, AV_PIX_FMT_RGBA64LE) @@ -134,15 +127,17 @@ rgb64funcs(rgb, BE, AV_PIX_FMT_RGBA64BE) static av_always_inline void rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width, - enum AVPixelFormat origin) + enum AVPixelFormat origin, + int32_t *rgb2yuv) { + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; int i; for (i = 0; i < width; i++) { unsigned int r_b = input_pixel(&src[i * 3 + 0]); unsigned int g = input_pixel(&src[i * 3 + 1]); unsigned int b_r = input_pixel(&src[i * 3 + 2]); - dst[i] = (RY * r + GY * g + BY * b + (0x2001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; + dst[i] = (ry*r + gy*g + by*b + (0x2001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; } } @@ -151,17 +146,20 @@ static av_always_inline void rgb48ToUV_c_template(uint16_t *dstU, const uint16_t *src1, const uint16_t *src2, int width, - enum AVPixelFormat origin) + enum AVPixelFormat origin, + int32_t *rgb2yuv) { int i; + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; av_assert1(src1 == src2); for (i = 0; i < width; i++) { int r_b = input_pixel(&src1[i * 3 + 0]); int g = input_pixel(&src1[i * 3 + 1]); int b_r = input_pixel(&src1[i * 3 + 2]); - dstU[i] = (RU * r + GU * g + BU * b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; - dstV[i] = (RV * r + GV * g + BV * b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; + dstU[i] = (ru*r + gu*g + bu*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; + dstV[i] = (rv*r + gv*g + bv*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; } } @@ -170,9 +168,12 @@ static av_always_inline void rgb48ToUV_half_c_template(uint16_t *dstU, const uint16_t *src1, const uint16_t *src2, int width, - enum AVPixelFormat origin) + enum AVPixelFormat origin, + int32_t *rgb2yuv) { int i; + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; av_assert1(src1 == src2); for (i = 0; i < width; i++) { int r_b = (input_pixel(&src1[6 * i + 0]) + @@ -182,8 +183,8 @@ static av_always_inline void rgb48ToUV_half_c_template(uint16_t *dstU, int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1; - dstU[i] = (RU * r + GU * g + BU * b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; - dstV[i] = (RV * r + GV * g + BV * b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; + dstU[i] = (ru*r + gu*g + bu*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; + dstV[i] = (rv*r + gv*g + bv*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; } } @@ -196,11 +197,11 @@ static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, \ const uint8_t *_src, \ const uint8_t *unused0, const uint8_t *unused1,\ int width, \ - uint32_t *unused) \ + uint32_t *rgb2yuv) \ { \ const uint16_t *src = (const uint16_t *)_src; \ uint16_t *dst = (uint16_t *)_dst; \ - rgb48ToY_c_template(dst, src, width, origin); \ + rgb48ToY_c_template(dst, src, width, origin, rgb2yuv); \ } \ \ static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, \ @@ -209,13 +210,13 @@ static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, \ const uint8_t *_src1, \ const uint8_t *_src2, \ int width, \ - uint32_t *unused) \ + uint32_t *rgb2yuv) \ { \ const uint16_t *src1 = (const uint16_t *)_src1, \ *src2 = (const uint16_t *)_src2; \ uint16_t *dstU = (uint16_t *)_dstU, \ *dstV = (uint16_t *)_dstV; \ - rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \ + rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ } \ \ static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, \ @@ -224,13 +225,13 @@ static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, \ const uint8_t *_src1, \ const uint8_t *_src2, \ int width, \ - uint32_t *unused) \ + uint32_t *rgb2yuv) \ { \ const uint16_t *src1 = (const uint16_t *)_src1, \ *src2 = (const uint16_t *)_src2; \ uint16_t *dstU = (uint16_t *)_dstU, \ *dstV = (uint16_t *)_dstV; \ - rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \ + rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ } rgb48funcs(rgb, LE, AV_PIX_FMT_RGB48LE) @@ -254,9 +255,10 @@ static av_always_inline void rgb16_32ToY_c_template(int16_t *dst, int shb, int shp, int maskr, int maskg, int maskb, int rsh, - int gsh, int bsh, int S) + int gsh, int bsh, int S, + int32_t *rgb2yuv) { - const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh; + const int ry = rgb2yuv[RY_IDX]<<rsh, gy = rgb2yuv[GY_IDX]<<gsh, by = rgb2yuv[BY_IDX]<<bsh; const unsigned rnd = (32<<((S)-1)) + (1<<(S-7)); int i; @@ -279,10 +281,11 @@ static av_always_inline void rgb16_32ToUV_c_template(int16_t *dstU, int shb, int shp, int maskr, int maskg, int maskb, int rsh, - int gsh, int bsh, int S) + int gsh, int bsh, int S, + int32_t *rgb2yuv) { - const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh, - rv = RV << rsh, gv = GV << gsh, bv = BV << bsh; + const int ru = rgb2yuv[RU_IDX] << rsh, gu = rgb2yuv[GU_IDX] << gsh, bu = rgb2yuv[BU_IDX] << bsh, + rv = rgb2yuv[RV_IDX] << rsh, gv = rgb2yuv[GV_IDX] << gsh, bv = rgb2yuv[BV_IDX] << bsh; const unsigned rnd = (256u<<((S)-1)) + (1<<(S-7)); int i; @@ -306,10 +309,11 @@ static av_always_inline void rgb16_32ToUV_half_c_template(int16_t *dstU, int shb, int shp, int maskr, int maskg, int maskb, int rsh, - int gsh, int bsh, int S) + int gsh, int bsh, int S, + int32_t *rgb2yuv) { - const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh, - rv = RV << rsh, gv = GV << gsh, bv = BV << bsh, + const int ru = rgb2yuv[RU_IDX] << rsh, gu = rgb2yuv[GU_IDX] << gsh, bu = rgb2yuv[BU_IDX] << bsh, + rv = rgb2yuv[RV_IDX] << rsh, gv = rgb2yuv[GV_IDX] << gsh, bv = rgb2yuv[BV_IDX] << bsh, maskgx = ~(maskr | maskb); const unsigned rnd = (256U<<(S)) + (1<<(S-6)); int i; @@ -343,30 +347,30 @@ static av_always_inline void rgb16_32ToUV_half_c_template(int16_t *dstU, #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \ maskg, maskb, rsh, gsh, bsh, S) \ static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \ - int width, uint32_t *unused) \ + int width, uint32_t *tab) \ { \ rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, shr, shg, shb, shp, \ - maskr, maskg, maskb, rsh, gsh, bsh, S); \ + maskr, maskg, maskb, rsh, gsh, bsh, S, tab); \ } \ \ static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \ - int width, uint32_t *unused) \ + int width, uint32_t *tab) \ { \ rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \ shr, shg, shb, shp, \ - maskr, maskg, maskb, rsh, gsh, bsh, S); \ + maskr, maskg, maskb, rsh, gsh, bsh, S, tab);\ } \ \ static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \ const uint8_t *unused0, const uint8_t *src, \ const uint8_t *dummy, \ - int width, uint32_t *unused) \ + int width, uint32_t *tab) \ { \ rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \ shr, shg, shb, shp, \ maskr, maskg, maskb, \ - rsh, gsh, bsh, S); \ + rsh, gsh, bsh, S, tab); \ } rgb16_32_wrapper(AV_PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT + 8) @@ -388,18 +392,21 @@ rgb16_32_wrapper(AV_PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, static void gbr24pToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc, - int width, uint32_t *unused) + int width, uint32_t *rgb2yuv) { uint16_t *dstU = (uint16_t *)_dstU; uint16_t *dstV = (uint16_t *)_dstV; + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; + int i; for (i = 0; i < width; i++) { unsigned int g = gsrc[2*i] + gsrc[2*i+1]; unsigned int b = bsrc[2*i] + bsrc[2*i+1]; unsigned int r = rsrc[2*i] + rsrc[2*i+1]; - dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1); - dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1); + dstU[i] = (ru*r + gu*g + bu*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1); + dstV[i] = (rv*r + gv*g + bv*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1); } } @@ -593,274 +600,212 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) static void bgr24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, - int width, uint32_t *unused) + int width, uint32_t *rgb2yuv) { int16_t *dst = (int16_t *)_dst; + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; int i; for (i = 0; i < width; i++) { int b = src[i * 3 + 0]; int g = src[i * 3 + 1]; int r = src[i * 3 + 2]; - dst[i] = ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6)); + dst[i] = ((ry*r + gy*g + by*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6)); } } static void bgr24ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *unused) + const uint8_t *src2, int width, uint32_t *rgb2yuv) { int16_t *dstU = (int16_t *)_dstU; int16_t *dstV = (int16_t *)_dstV; + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; int i; for (i = 0; i < width; i++) { int b = src1[3 * i + 0]; int g = src1[3 * i + 1]; int r = src1[3 * i + 2]; - dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); - dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); + dstU[i] = (ru*r + gu*g + bu*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); + dstV[i] = (rv*r + gv*g + bv*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); } av_assert1(src1 == src2); } static void bgr24ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *unused) + const uint8_t *src2, int width, uint32_t *rgb2yuv) { int16_t *dstU = (int16_t *)_dstU; int16_t *dstV = (int16_t *)_dstV; int i; + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; for (i = 0; i < width; i++) { int b = src1[6 * i + 0] + src1[6 * i + 3]; int g = src1[6 * i + 1] + src1[6 * i + 4]; int r = src1[6 * i + 2] + src1[6 * i + 5]; - dstU[i] = (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); - dstV[i] = (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); + dstU[i] = (ru*r + gu*g + bu*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); + dstV[i] = (rv*r + gv*g + bv*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); } av_assert1(src1 == src2); } static void rgb24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, - uint32_t *unused) + uint32_t *rgb2yuv) { int16_t *dst = (int16_t *)_dst; + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; int i; for (i = 0; i < width; i++) { int r = src[i * 3 + 0]; int g = src[i * 3 + 1]; int b = src[i * 3 + 2]; - dst[i] = ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6)); + dst[i] = ((ry*r + gy*g + by*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6)); } } static void rgb24ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *unused) + const uint8_t *src2, int width, uint32_t *rgb2yuv) { int16_t *dstU = (int16_t *)_dstU; int16_t *dstV = (int16_t *)_dstV; int i; + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; av_assert1(src1 == src2); for (i = 0; i < width; i++) { int r = src1[3 * i + 0]; int g = src1[3 * i + 1]; int b = src1[3 * i + 2]; - dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); - dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); + dstU[i] = (ru*r + gu*g + bu*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); + dstV[i] = (rv*r + gv*g + bv*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); } } static void rgb24ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *unused) + const uint8_t *src2, int width, uint32_t *rgb2yuv) { int16_t *dstU = (int16_t *)_dstU; int16_t *dstV = (int16_t *)_dstV; int i; + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; av_assert1(src1 == src2); for (i = 0; i < width; i++) { int r = src1[6 * i + 0] + src1[6 * i + 3]; int g = src1[6 * i + 1] + src1[6 * i + 4]; int b = src1[6 * i + 2] + src1[6 * i + 5]; - dstU[i] = (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); - dstV[i] = (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); + dstU[i] = (ru*r + gu*g + bu*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); + dstV[i] = (rv*r + gv*g + bv*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); } } -static void planar_rgb_to_y(uint8_t *_dst, const uint8_t *src[4], int width) +static void planar_rgb_to_y(uint8_t *_dst, const uint8_t *src[4], int width, int32_t *rgb2yuv) { uint16_t *dst = (uint16_t *)_dst; + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; int i; for (i = 0; i < width; i++) { int g = src[0][i]; int b = src[1][i]; int r = src[2][i]; - dst[i] = (RY*r + GY*g + BY*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); + dst[i] = (ry*r + gy*g + by*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); } } -static void planar_rgb_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *src[4], int width) +static void planar_rgb_to_a(uint8_t *_dst, const uint8_t *src[4], int width, int32_t *unused) +{ + uint16_t *dst = (uint16_t *)_dst; + int i; + for (i = 0; i < width; i++) + dst[i] = src[3][i] << 6; +} + +static void planar_rgb_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *src[4], int width, int32_t *rgb2yuv) { uint16_t *dstU = (uint16_t *)_dstU; uint16_t *dstV = (uint16_t *)_dstV; + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; int i; for (i = 0; i < width; i++) { int g = src[0][i]; int b = src[1][i]; int r = src[2][i]; - dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); - dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); + dstU[i] = (ru*r + gu*g + bu*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); + dstV[i] = (rv*r + gv*g + bv*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); } } #define rdpx(src) \ is_be ? AV_RB16(src) : AV_RL16(src) static av_always_inline void planar_rgb16_to_y(uint8_t *_dst, const uint8_t *_src[4], - int width, int bpc, int is_be) + int width, int bpc, int is_be, int32_t *rgb2yuv) { int i; const uint16_t **src = (const uint16_t **)_src; uint16_t *dst = (uint16_t *)_dst; + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; + int shift = bpc < 16 ? bpc : 14; for (i = 0; i < width; i++) { int g = rdpx(src[0] + i); int b = rdpx(src[1] + i); int r = rdpx(src[2] + i); - dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + bpc - 14)); + dst[i] = ((ry*r + gy*g + by*b + (33 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14)); } } -static void planar_rgb9le_to_y(uint8_t *dst, const uint8_t *src[4], int w) -{ - planar_rgb16_to_y(dst, src, w, 9, 0); -} - -static void planar_rgb9be_to_y(uint8_t *dst, const uint8_t *src[4], int w) -{ - planar_rgb16_to_y(dst, src, w, 9, 1); -} - -static void planar_rgb10le_to_y(uint8_t *dst, const uint8_t *src[4], int w) -{ - planar_rgb16_to_y(dst, src, w, 10, 0); -} - -static void planar_rgb10be_to_y(uint8_t *dst, const uint8_t *src[4], int w) -{ - planar_rgb16_to_y(dst, src, w, 10, 1); -} - -static void planar_rgb12le_to_y(uint8_t *dst, const uint8_t *src[4], int w) -{ - planar_rgb16_to_y(dst, src, w, 12, 0); -} - -static void planar_rgb12be_to_y(uint8_t *dst, const uint8_t *src[4], int w) -{ - planar_rgb16_to_y(dst, src, w, 12, 1); -} - -static void planar_rgb14le_to_y(uint8_t *dst, const uint8_t *src[4], int w) -{ - planar_rgb16_to_y(dst, src, w, 14, 0); -} - -static void planar_rgb14be_to_y(uint8_t *dst, const uint8_t *src[4], int w) -{ - planar_rgb16_to_y(dst, src, w, 14, 1); -} - -static void planar_rgb16le_to_y(uint8_t *dst, const uint8_t *src[4], int w) -{ - planar_rgb16_to_y(dst, src, w, 16, 0); -} - -static void planar_rgb16be_to_y(uint8_t *dst, const uint8_t *src[4], int w) -{ - planar_rgb16_to_y(dst, src, w, 16, 1); -} - static av_always_inline void planar_rgb16_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width, - int bpc, int is_be) + int bpc, int is_be, int32_t *rgb2yuv) { int i; const uint16_t **src = (const uint16_t **)_src; uint16_t *dstU = (uint16_t *)_dstU; uint16_t *dstV = (uint16_t *)_dstV; + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; + int shift = bpc < 16 ? bpc : 14; for (i = 0; i < width; i++) { int g = rdpx(src[0] + i); int b = rdpx(src[1] + i); int r = rdpx(src[2] + i); - dstU[i] = (RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + bpc - 14); - dstV[i] = (RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + bpc - 14); + dstU[i] = (ru*r + gu*g + bu*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14); + dstV[i] = (rv*r + gv*g + bv*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14); } } #undef rdpx -static void planar_rgb9le_to_uv(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src[4], int w) -{ - planar_rgb16_to_uv(dstU, dstV, src, w, 9, 0); -} - -static void planar_rgb9be_to_uv(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src[4], int w) -{ - planar_rgb16_to_uv(dstU, dstV, src, w, 9, 1); -} - -static void planar_rgb10le_to_uv(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src[4], int w) -{ - planar_rgb16_to_uv(dstU, dstV, src, w, 10, 0); -} - -static void planar_rgb10be_to_uv(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src[4], int w) -{ - planar_rgb16_to_uv(dstU, dstV, src, w, 10, 1); -} - -static void planar_rgb12le_to_uv(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src[4], int w) -{ - planar_rgb16_to_uv(dstU, dstV, src, w, 12, 0); -} - -static void planar_rgb12be_to_uv(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src[4], int w) -{ - planar_rgb16_to_uv(dstU, dstV, src, w, 12, 1); -} - -static void planar_rgb14le_to_uv(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src[4], int w) -{ - planar_rgb16_to_uv(dstU, dstV, src, w, 14, 0); -} - -static void planar_rgb14be_to_uv(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src[4], int w) -{ - planar_rgb16_to_uv(dstU, dstV, src, w, 14, 1); -} - -static void planar_rgb16le_to_uv(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src[4], int w) -{ - planar_rgb16_to_uv(dstU, dstV, src, w, 16, 0); -} - -static void planar_rgb16be_to_uv(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src[4], int w) -{ - planar_rgb16_to_uv(dstU, dstV, src, w, 16, 1); -} +#define rgb9plus_planar_funcs_endian(nbits, endian_name, endian) \ +static void planar_rgb##nbits##endian_name##_to_y(uint8_t *dst, const uint8_t *src[4], \ + int w, int32_t *rgb2yuv) \ +{ \ + planar_rgb16_to_y(dst, src, w, nbits, endian, rgb2yuv); \ +} \ +static void planar_rgb##nbits##endian_name##_to_uv(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *src[4], int w, int32_t *rgb2yuv) \ +{ \ + planar_rgb16_to_uv(dstU, dstV, src, w, nbits, endian, rgb2yuv); \ +} \ + +#define rgb9plus_planar_funcs(nbits) \ + rgb9plus_planar_funcs_endian(nbits, le, 0) \ + rgb9plus_planar_funcs_endian(nbits, be, 1) + +rgb9plus_planar_funcs(9) +rgb9plus_planar_funcs(10) +rgb9plus_planar_funcs(12) +rgb9plus_planar_funcs(14) +rgb9plus_planar_funcs(16) av_cold void ff_sws_init_input_funcs(SwsContext *c) { @@ -899,6 +844,7 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_GBRP14LE: c->readChrPlanar = planar_rgb14le_to_uv; break; + case AV_PIX_FMT_GBRAP16LE: case AV_PIX_FMT_GBRP16LE: c->readChrPlanar = planar_rgb16le_to_uv; break; @@ -914,9 +860,11 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_GBRP14BE: c->readChrPlanar = planar_rgb14be_to_uv; break; + case AV_PIX_FMT_GBRAP16BE: case AV_PIX_FMT_GBRP16BE: c->readChrPlanar = planar_rgb16be_to_uv; break; + case AV_PIX_FMT_GBRAP: case AV_PIX_FMT_GBRP: c->readChrPlanar = planar_rgb_to_uv; break; @@ -1019,7 +967,8 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break; - case AV_PIX_FMT_GBR24P : + case AV_PIX_FMT_GBRAP: + case AV_PIX_FMT_GBRP: c->chrToYV12 = gbr24pToUV_half_c; break; case AV_PIX_FMT_BGR444LE: @@ -1148,6 +1097,7 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_GBRP14LE: c->readLumPlanar = planar_rgb14le_to_y; break; + case AV_PIX_FMT_GBRAP16LE: case AV_PIX_FMT_GBRP16LE: c->readLumPlanar = planar_rgb16le_to_y; break; @@ -1163,9 +1113,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_GBRP14BE: c->readLumPlanar = planar_rgb14be_to_y; break; + case AV_PIX_FMT_GBRAP16BE: case AV_PIX_FMT_GBRP16BE: c->readLumPlanar = planar_rgb16be_to_y; break; + case AV_PIX_FMT_GBRAP: + c->readAlpPlanar = planar_rgb_to_a; case AV_PIX_FMT_GBRP: c->readLumPlanar = planar_rgb_to_y; break; diff --git a/ffmpeg/libswscale/libswscale.pc b/ffmpeg/libswscale/libswscale.pc index 523b5e4..63e7375 100644 --- a/ffmpeg/libswscale/libswscale.pc +++ b/ffmpeg/libswscale/libswscale.pc @@ -5,10 +5,10 @@ includedir=${prefix}/include Name: libswscale Description: FFmpeg image rescaling library -Version: 2.2.100 +Version: 2.5.101 Requires: -Requires.private: libavutil = 52.22.100 +Requires.private: libavutil = 52.59.100 Conflicts: -Libs: -L${libdir} -lswscale +Libs: -L${libdir} -lswscale Libs.private: -lm Cflags: -I${includedir} diff --git a/ffmpeg/libswscale/options.c b/ffmpeg/libswscale/options.c index fc571ac..9e8703f 100644 --- a/ffmpeg/libswscale/options.c +++ b/ffmpeg/libswscale/options.c @@ -33,8 +33,8 @@ static const char *sws_context_to_name(void *ptr) #define DEFAULT 0 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM -static const AVOption options[] = { - { "sws_flags", "scaler flags", OFFSET(flags), AV_OPT_TYPE_FLAGS, { .i64 = DEFAULT }, 0, UINT_MAX, VE, "sws_flags" }, +static const AVOption swscale_options[] = { + { "sws_flags", "scaler flags", OFFSET(flags), AV_OPT_TYPE_FLAGS, { .i64 = SWS_BICUBIC }, 0, UINT_MAX, VE, "sws_flags" }, { "fast_bilinear", "fast bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_FAST_BILINEAR }, INT_MIN, INT_MAX, VE, "sws_flags" }, { "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_BILINEAR }, INT_MIN, INT_MAX, VE, "sws_flags" }, { "bicubic", "bicubic", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_BICUBIC }, INT_MIN, INT_MAX, VE, "sws_flags" }, @@ -64,13 +64,23 @@ static const AVOption options[] = { { "param0", "scaler param 0", OFFSET(param[0]), AV_OPT_TYPE_DOUBLE, { .dbl = SWS_PARAM_DEFAULT }, INT_MIN, INT_MAX, VE }, { "param1", "scaler param 1", OFFSET(param[1]), AV_OPT_TYPE_DOUBLE, { .dbl = SWS_PARAM_DEFAULT }, INT_MIN, INT_MAX, VE }, + { "src_v_chr_pos", "source vertical chroma position in luma grid/256" , OFFSET(src_v_chr_pos), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 512, VE }, + { "src_h_chr_pos", "source horizontal chroma position in luma grid/256", OFFSET(src_h_chr_pos), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 512, VE }, + { "dst_v_chr_pos", "destination vertical chroma position in luma grid/256" , OFFSET(dst_v_chr_pos), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 512, VE }, + { "dst_h_chr_pos", "destination horizontal chroma position in luma grid/256", OFFSET(dst_h_chr_pos), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 512, VE }, + + { "sws_dither", "set dithering algorithm", OFFSET(dither), AV_OPT_TYPE_INT, { .i64 = SWS_DITHER_AUTO }, 0, NB_SWS_DITHER, VE, "sws_dither" }, + { "auto", "leave choice to sws", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_DITHER_AUTO }, INT_MIN, INT_MAX, VE, "sws_dither" }, + { "bayer", "bayer dither", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_DITHER_BAYER }, INT_MIN, INT_MAX, VE, "sws_dither" }, + { "ed", "error diffusion", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_DITHER_ED }, INT_MIN, INT_MAX, VE, "sws_dither" }, + { NULL } }; const AVClass sws_context_class = { .class_name = "SWScaler", .item_name = sws_context_to_name, - .option = options, + .option = swscale_options, .category = AV_CLASS_CATEGORY_SWSCALER, .version = LIBAVUTIL_VERSION_INT, }; diff --git a/ffmpeg/libswscale/output.c b/ffmpeg/libswscale/output.c index d9745fb..ddb0d0c 100644 --- a/ffmpeg/libswscale/output.c +++ b/ffmpeg/libswscale/output.c @@ -18,7 +18,6 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include <assert.h> #include <math.h> #include <stdint.h> #include <stdio.h> @@ -37,19 +36,19 @@ #include "swscale.h" #include "swscale_internal.h" -DECLARE_ALIGNED(8, const uint8_t, dither_2x2_4)[][8]={ +DECLARE_ALIGNED(8, const uint8_t, ff_dither_2x2_4)[][8] = { { 1, 3, 1, 3, 1, 3, 1, 3, }, { 2, 0, 2, 0, 2, 0, 2, 0, }, { 1, 3, 1, 3, 1, 3, 1, 3, }, }; -DECLARE_ALIGNED(8, const uint8_t, dither_2x2_8)[][8]={ +DECLARE_ALIGNED(8, const uint8_t, ff_dither_2x2_8)[][8] = { { 6, 2, 6, 2, 6, 2, 6, 2, }, { 0, 4, 0, 4, 0, 4, 0, 4, }, { 6, 2, 6, 2, 6, 2, 6, 2, }, }; -DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[][8]={ +DECLARE_ALIGNED(8, const uint8_t, ff_dither_4x4_16)[][8] = { { 8, 4, 11, 7, 8, 4, 11, 7, }, { 2, 14, 1, 13, 2, 14, 1, 13, }, { 10, 6, 9, 5, 10, 6, 9, 5, }, @@ -57,7 +56,7 @@ DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[][8]={ { 8, 4, 11, 7, 8, 4, 11, 7, }, }; -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[][8]={ +DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_32)[][8] = { { 17, 9, 23, 15, 16, 8, 22, 14, }, { 5, 29, 3, 27, 4, 28, 2, 26, }, { 21, 13, 19, 11, 20, 12, 18, 10, }, @@ -69,7 +68,7 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[][8]={ { 17, 9, 23, 15, 16, 8, 22, 14, }, }; -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[][8]={ +DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_73)[][8] = { { 0, 55, 14, 68, 3, 58, 17, 72, }, { 37, 18, 50, 32, 40, 22, 54, 35, }, { 9, 64, 5, 59, 13, 67, 8, 63, }, @@ -82,7 +81,7 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[][8]={ }; #if 1 -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[][8]={ +DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = { {117, 62, 158, 103, 113, 58, 155, 100, }, { 34, 199, 21, 186, 31, 196, 17, 182, }, {144, 89, 131, 76, 141, 86, 127, 72, }, @@ -95,7 +94,7 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[][8]={ }; #elif 1 // tries to correct a gamma of 1.5 -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[][8]={ +DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = { { 0, 143, 18, 200, 2, 156, 25, 215, }, { 78, 28, 125, 64, 89, 36, 138, 74, }, { 10, 180, 3, 161, 16, 195, 8, 175, }, @@ -108,7 +107,7 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[][8]={ }; #elif 1 // tries to correct a gamma of 2.0 -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[][8]={ +DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = { { 0, 124, 8, 193, 0, 140, 12, 213, }, { 55, 14, 104, 42, 66, 19, 119, 52, }, { 3, 168, 1, 145, 6, 187, 3, 162, }, @@ -121,7 +120,7 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[][8]={ }; #else // tries to correct a gamma of 2.5 -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[][8]={ +DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = { { 0, 107, 3, 187, 0, 125, 6, 212, }, { 39, 7, 86, 28, 49, 11, 102, 36, }, { 1, 158, 0, 131, 3, 180, 1, 151, }, @@ -330,7 +329,7 @@ yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter, const int16_t **alpSrc, uint8_t *dest, int dstW, int y, enum AVPixelFormat target) { - const uint8_t * const d128=dither_8x8_220[y&7]; + const uint8_t * const d128 = ff_dither_8x8_220[y&7]; int i; unsigned acc = 0; int err = 0; @@ -350,7 +349,7 @@ yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter, Y1 = av_clip_uint8(Y1); Y2 = av_clip_uint8(Y2); } - if (c->flags & SWS_ERROR_DIFFUSION) { + if (c->dither == SWS_DITHER_ED) { Y1 += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4; c->dither_error[0][i] = err; acc = 2*acc + (Y1 >= 128); @@ -383,11 +382,11 @@ yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2], enum AVPixelFormat target) { const int16_t *buf0 = buf[0], *buf1 = buf[1]; - const uint8_t * const d128 = dither_8x8_220[y & 7]; + const uint8_t * const d128 = ff_dither_8x8_220[y & 7]; int yalpha1 = 4096 - yalpha; int i; - if (c->flags & SWS_ERROR_DIFFUSION) { + if (c->dither == SWS_DITHER_ED) { int err = 0; int acc = 0; for (i = 0; i < dstW; i +=2) { @@ -441,10 +440,10 @@ yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0, const int16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int y, enum AVPixelFormat target) { - const uint8_t * const d128 = dither_8x8_220[y & 7]; + const uint8_t * const d128 = ff_dither_8x8_220[y & 7]; int i; - if (c->flags & SWS_ERROR_DIFFUSION) { + if (c->dither == SWS_DITHER_ED) { int err = 0; int acc = 0; for (i = 0; i < dstW; i +=2) { @@ -674,12 +673,231 @@ YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, AV_PIX_FMT_UYVY422) } static av_always_inline void +yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter, + const int32_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int32_t **chrUSrc, + const int32_t **chrVSrc, int chrFilterSize, + const int32_t **alpSrc, uint16_t *dest, int dstW, + int y, enum AVPixelFormat target, int hasAlpha) +{ + int i; + int A1 = 0xffff<<14, A2 = 0xffff<<14; + + for (i = 0; i < ((dstW + 1) >> 1); i++) { + int j; + int Y1 = -0x40000000; + int Y2 = -0x40000000; + int U = -128 << 23; // 19 + int V = -128 << 23; + int R, G, B; + + for (j = 0; j < lumFilterSize; j++) { + Y1 += lumSrc[j][i * 2] * (unsigned)lumFilter[j]; + Y2 += lumSrc[j][i * 2 + 1] * (unsigned)lumFilter[j]; + } + for (j = 0; j < chrFilterSize; j++) {; + U += chrUSrc[j][i] * (unsigned)chrFilter[j]; + V += chrVSrc[j][i] * (unsigned)chrFilter[j]; + } + + if (hasAlpha) { + A1 = -0x40000000; + A2 = -0x40000000; + for (j = 0; j < lumFilterSize; j++) { + A1 += alpSrc[j][i * 2] * (unsigned)lumFilter[j]; + A2 += alpSrc[j][i * 2 + 1] * (unsigned)lumFilter[j]; + } + A1 >>= 1; + A1 += 0x20002000; + A2 >>= 1; + A2 += 0x20002000; + } + + // 8bit: 12+15=27; 16-bit: 12+19=31 + Y1 >>= 14; // 10 + Y1 += 0x10000; + Y2 >>= 14; + Y2 += 0x10000; + U >>= 14; + V >>= 14; + + // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit + Y1 -= c->yuv2rgb_y_offset; + Y2 -= c->yuv2rgb_y_offset; + Y1 *= c->yuv2rgb_y_coeff; + Y2 *= c->yuv2rgb_y_coeff; + Y1 += 1 << 13; // 21 + Y2 += 1 << 13; + // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit + + R = V * c->yuv2rgb_v2r_coeff; + G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; + B = U * c->yuv2rgb_u2b_coeff; + + // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit + output_pixel(&dest[0], av_clip_uintp2(B_R + Y1, 30) >> 14); + output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); + output_pixel(&dest[2], av_clip_uintp2(R_B + Y1, 30) >> 14); + output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14); + output_pixel(&dest[4], av_clip_uintp2(B_R + Y2, 30) >> 14); + output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14); + output_pixel(&dest[6], av_clip_uintp2(R_B + Y2, 30) >> 14); + output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14); + dest += 8; + } +} + +static av_always_inline void +yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2], + const int32_t *ubuf[2], const int32_t *vbuf[2], + const int32_t *abuf[2], uint16_t *dest, int dstW, + int yalpha, int uvalpha, int y, + enum AVPixelFormat target, int hasAlpha) +{ + const int32_t *buf0 = buf[0], *buf1 = buf[1], + *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], + *vbuf0 = vbuf[0], *vbuf1 = vbuf[1], + *abuf0 = hasAlpha ? abuf[0] : NULL, + *abuf1 = hasAlpha ? abuf[1] : NULL; + int yalpha1 = 4096 - yalpha; + int uvalpha1 = 4096 - uvalpha; + int i; + int A1 = 0xffff<<14, A2 = 0xffff<<14; + + for (i = 0; i < ((dstW + 1) >> 1); i++) { + int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14; + int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14; + int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14; + int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14; + int R, G, B; + + Y1 -= c->yuv2rgb_y_offset; + Y2 -= c->yuv2rgb_y_offset; + Y1 *= c->yuv2rgb_y_coeff; + Y2 *= c->yuv2rgb_y_coeff; + Y1 += 1 << 13; + Y2 += 1 << 13; + + R = V * c->yuv2rgb_v2r_coeff; + G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; + B = U * c->yuv2rgb_u2b_coeff; + + if (hasAlpha) { + A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 1; + A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 1; + + A1 += 1 << 13; + A2 += 1 << 13; + } + + output_pixel(&dest[0], av_clip_uintp2(B_R + Y1, 30) >> 14); + output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); + output_pixel(&dest[2], av_clip_uintp2(R_B + Y1, 30) >> 14); + output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14); + output_pixel(&dest[4], av_clip_uintp2(B_R + Y2, 30) >> 14); + output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14); + output_pixel(&dest[6], av_clip_uintp2(R_B + Y2, 30) >> 14); + output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14); + dest += 8; + } +} + +static av_always_inline void +yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0, + const int32_t *ubuf[2], const int32_t *vbuf[2], + const int32_t *abuf0, uint16_t *dest, int dstW, + int uvalpha, int y, enum AVPixelFormat target, int hasAlpha) +{ + const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; + int i; + int A1 = 0xffff<<14, A2= 0xffff<<14; + + if (uvalpha < 2048) { + for (i = 0; i < ((dstW + 1) >> 1); i++) { + int Y1 = (buf0[i * 2] ) >> 2; + int Y2 = (buf0[i * 2 + 1]) >> 2; + int U = (ubuf0[i] + (-128 << 11)) >> 2; + int V = (vbuf0[i] + (-128 << 11)) >> 2; + int R, G, B; + + Y1 -= c->yuv2rgb_y_offset; + Y2 -= c->yuv2rgb_y_offset; + Y1 *= c->yuv2rgb_y_coeff; + Y2 *= c->yuv2rgb_y_coeff; + Y1 += 1 << 13; + Y2 += 1 << 13; + + if (hasAlpha) { + A1 = abuf0[i * 2 ] << 11; + A2 = abuf0[i * 2 + 1] << 11; + + A1 += 1 << 13; + A2 += 1 << 13; + } + + R = V * c->yuv2rgb_v2r_coeff; + G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; + B = U * c->yuv2rgb_u2b_coeff; + + output_pixel(&dest[0], av_clip_uintp2(B_R + Y1, 30) >> 14); + output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); + output_pixel(&dest[2], av_clip_uintp2(R_B + Y1, 30) >> 14); + output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14); + output_pixel(&dest[4], av_clip_uintp2(B_R + Y2, 30) >> 14); + output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14); + output_pixel(&dest[6], av_clip_uintp2(R_B + Y2, 30) >> 14); + output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14); + dest += 8; + } + } else { + const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; + int A1 = 0xffff<<14, A2 = 0xffff<<14; + for (i = 0; i < ((dstW + 1) >> 1); i++) { + int Y1 = (buf0[i * 2] ) >> 2; + int Y2 = (buf0[i * 2 + 1]) >> 2; + int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3; + int V = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3; + int R, G, B; + + Y1 -= c->yuv2rgb_y_offset; + Y2 -= c->yuv2rgb_y_offset; + Y1 *= c->yuv2rgb_y_coeff; + Y2 *= c->yuv2rgb_y_coeff; + Y1 += 1 << 13; + Y2 += 1 << 13; + + if (hasAlpha) { + A1 = abuf0[i * 2 ] << 11; + A2 = abuf0[i * 2 + 1] << 11; + + A1 += 1 << 13; + A2 += 1 << 13; + } + + R = V * c->yuv2rgb_v2r_coeff; + G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; + B = U * c->yuv2rgb_u2b_coeff; + + output_pixel(&dest[0], av_clip_uintp2(B_R + Y1, 30) >> 14); + output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); + output_pixel(&dest[2], av_clip_uintp2(R_B + Y1, 30) >> 14); + output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14); + output_pixel(&dest[4], av_clip_uintp2(B_R + Y2, 30) >> 14); + output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14); + output_pixel(&dest[6], av_clip_uintp2(R_B + Y2, 30) >> 14); + output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14); + dest += 8; + } + } +} + +static av_always_inline void yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter, const int32_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int32_t **chrUSrc, const int32_t **chrVSrc, int chrFilterSize, const int32_t **alpSrc, uint16_t *dest, int dstW, - int y, enum AVPixelFormat target) + int y, enum AVPixelFormat target, int hasAlpha) { int i; @@ -737,7 +955,7 @@ yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2], const int32_t *ubuf[2], const int32_t *vbuf[2], const int32_t *abuf[2], uint16_t *dest, int dstW, int yalpha, int uvalpha, int y, - enum AVPixelFormat target) + enum AVPixelFormat target, int hasAlpha) { const int32_t *buf0 = buf[0], *buf1 = buf[1], *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], @@ -778,7 +996,7 @@ static av_always_inline void yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0, const int32_t *ubuf[2], const int32_t *vbuf[2], const int32_t *abuf0, uint16_t *dest, int dstW, - int uvalpha, int y, enum AVPixelFormat target) + int uvalpha, int y, enum AVPixelFormat target, int hasAlpha) { const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; int i; @@ -845,7 +1063,7 @@ yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0, #undef r_b #undef b_r -#define YUV2PACKED16WRAPPER(name, base, ext, fmt) \ +#define YUV2PACKED16WRAPPER(name, base, ext, fmt, hasAlpha) \ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ const int16_t **_lumSrc, int lumFilterSize, \ const int16_t *chrFilter, const int16_t **_chrUSrc, \ @@ -860,7 +1078,7 @@ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ uint16_t *dest = (uint16_t *) _dest; \ name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \ chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ - alpSrc, dest, dstW, y, fmt); \ + alpSrc, dest, dstW, y, fmt, hasAlpha); \ } \ \ static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \ @@ -874,7 +1092,7 @@ static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \ **abuf = (const int32_t **) _abuf; \ uint16_t *dest = (uint16_t *) _dest; \ name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \ - dest, dstW, yalpha, uvalpha, y, fmt); \ + dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \ } \ \ static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \ @@ -888,13 +1106,17 @@ static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \ *abuf0 = (const int32_t *) _abuf0; \ uint16_t *dest = (uint16_t *) _dest; \ name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \ - dstW, uvalpha, y, fmt); \ + dstW, uvalpha, y, fmt, hasAlpha); \ } -YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, AV_PIX_FMT_RGB48BE) -YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, AV_PIX_FMT_RGB48LE) -YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, AV_PIX_FMT_BGR48BE) -YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, AV_PIX_FMT_BGR48LE) +YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, AV_PIX_FMT_RGB48BE, 0) +YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, AV_PIX_FMT_RGB48LE, 0) +YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, AV_PIX_FMT_BGR48BE, 0) +YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, AV_PIX_FMT_BGR48LE, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64BE, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64LE, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64BE, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64LE, 0) /* * Write out 2 RGB pixels in the target pixel format. This function takes a @@ -926,9 +1148,15 @@ yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2, if (hasAlpha) { int sh = (target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24; + av_assert2((((r[Y1] + g[Y1] + b[Y1]) >> sh) & 0xFF) == 0); dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh); dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh); } else { +#if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1 + int sh = (target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24; + + av_assert2((((r[Y1] + g[Y1] + b[Y1]) >> sh) & 0xFF) == 0xFF); +#endif dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1]; dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2]; } @@ -960,26 +1188,26 @@ yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2, int dr1, dg1, db1, dr2, dg2, db2; if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565) { - dr1 = dither_2x2_8[ y & 1 ][0]; - dg1 = dither_2x2_4[ y & 1 ][0]; - db1 = dither_2x2_8[(y & 1) ^ 1][0]; - dr2 = dither_2x2_8[ y & 1 ][1]; - dg2 = dither_2x2_4[ y & 1 ][1]; - db2 = dither_2x2_8[(y & 1) ^ 1][1]; + dr1 = ff_dither_2x2_8[ y & 1 ][0]; + dg1 = ff_dither_2x2_4[ y & 1 ][0]; + db1 = ff_dither_2x2_8[(y & 1) ^ 1][0]; + dr2 = ff_dither_2x2_8[ y & 1 ][1]; + dg2 = ff_dither_2x2_4[ y & 1 ][1]; + db2 = ff_dither_2x2_8[(y & 1) ^ 1][1]; } else if (target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555) { - dr1 = dither_2x2_8[ y & 1 ][0]; - dg1 = dither_2x2_8[ y & 1 ][1]; - db1 = dither_2x2_8[(y & 1) ^ 1][0]; - dr2 = dither_2x2_8[ y & 1 ][1]; - dg2 = dither_2x2_8[ y & 1 ][0]; - db2 = dither_2x2_8[(y & 1) ^ 1][1]; + dr1 = ff_dither_2x2_8[ y & 1 ][0]; + dg1 = ff_dither_2x2_8[ y & 1 ][1]; + db1 = ff_dither_2x2_8[(y & 1) ^ 1][0]; + dr2 = ff_dither_2x2_8[ y & 1 ][1]; + dg2 = ff_dither_2x2_8[ y & 1 ][0]; + db2 = ff_dither_2x2_8[(y & 1) ^ 1][1]; } else { - dr1 = dither_4x4_16[ y & 3 ][0]; - dg1 = dither_4x4_16[ y & 3 ][1]; - db1 = dither_4x4_16[(y & 3) ^ 3][0]; - dr2 = dither_4x4_16[ y & 3 ][1]; - dg2 = dither_4x4_16[ y & 3 ][0]; - db2 = dither_4x4_16[(y & 3) ^ 3][1]; + dr1 = ff_dither_4x4_16[ y & 3 ][0]; + dg1 = ff_dither_4x4_16[ y & 3 ][1]; + db1 = ff_dither_4x4_16[(y & 3) ^ 3][0]; + dr2 = ff_dither_4x4_16[ y & 3 ][1]; + dg2 = ff_dither_4x4_16[ y & 3 ][0]; + db2 = ff_dither_4x4_16[(y & 3) ^ 3][1]; } dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1]; @@ -992,15 +1220,15 @@ yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2, int dr1, dg1, db1, dr2, dg2, db2; if (target == AV_PIX_FMT_RGB8 || target == AV_PIX_FMT_BGR8) { - const uint8_t * const d64 = dither_8x8_73[y & 7]; - const uint8_t * const d32 = dither_8x8_32[y & 7]; + const uint8_t * const d64 = ff_dither_8x8_73[y & 7]; + const uint8_t * const d32 = ff_dither_8x8_32[y & 7]; dr1 = dg1 = d32[(i * 2 + 0) & 7]; db1 = d64[(i * 2 + 0) & 7]; dr2 = dg2 = d32[(i * 2 + 1) & 7]; db2 = d64[(i * 2 + 1) & 7]; } else { - const uint8_t * const d64 = dither_8x8_73 [y & 7]; - const uint8_t * const d128 = dither_8x8_220[y & 7]; + const uint8_t * const d64 = ff_dither_8x8_73 [y & 7]; + const uint8_t * const d128 = ff_dither_8x8_220[y & 7]; dr1 = db1 = d128[(i * 2 + 0) & 7]; dg1 = d64[(i * 2 + 0) & 7]; dr2 = db2 = d128[(i * 2 + 1) & 7]; @@ -1177,7 +1405,8 @@ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ alpSrc, dest, dstW, y, fmt, hasAlpha); \ } -#define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \ + +#define YUV2RGBWRAPPERX2(name, base, ext, fmt, hasAlpha) \ YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \ static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \ const int16_t *ubuf[2], const int16_t *vbuf[2], \ @@ -1186,8 +1415,10 @@ static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \ { \ name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \ dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \ -} \ - \ +} + +#define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \ +YUV2RGBWRAPPERX2(name, base, ext, fmt, hasAlpha) \ static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \ const int16_t *ubuf[2], const int16_t *vbuf[2], \ const int16_t *abuf0, uint8_t *dest, int dstW, \ @@ -1217,6 +1448,98 @@ YUV2RGBWRAPPER(yuv2rgb,, 8, AV_PIX_FMT_RGB8, 0) YUV2RGBWRAPPER(yuv2rgb,, 4, AV_PIX_FMT_RGB4, 0) YUV2RGBWRAPPER(yuv2rgb,, 4b, AV_PIX_FMT_RGB4_BYTE, 0) +static av_always_inline void yuv2rgb_write_full(SwsContext *c, + uint8_t *dest, int i, int Y, int A, int U, int V, + int y, enum AVPixelFormat target, int hasAlpha, int err[4]) +{ + int R, G, B; + int isrgb8 = target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8; + + Y -= c->yuv2rgb_y_offset; + Y *= c->yuv2rgb_y_coeff; + Y += 1 << 21; + R = Y + V*c->yuv2rgb_v2r_coeff; + G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff; + B = Y + U*c->yuv2rgb_u2b_coeff; + if ((R | G | B) & 0xC0000000) { + R = av_clip_uintp2(R, 30); + G = av_clip_uintp2(G, 30); + B = av_clip_uintp2(B, 30); + } + + switch(target) { + case AV_PIX_FMT_ARGB: + dest[0] = hasAlpha ? A : 255; + dest[1] = R >> 22; + dest[2] = G >> 22; + dest[3] = B >> 22; + break; + case AV_PIX_FMT_RGB24: + dest[0] = R >> 22; + dest[1] = G >> 22; + dest[2] = B >> 22; + break; + case AV_PIX_FMT_RGBA: + dest[0] = R >> 22; + dest[1] = G >> 22; + dest[2] = B >> 22; + dest[3] = hasAlpha ? A : 255; + break; + case AV_PIX_FMT_ABGR: + dest[0] = hasAlpha ? A : 255; + dest[1] = B >> 22; + dest[2] = G >> 22; + dest[3] = R >> 22; + break; + case AV_PIX_FMT_BGR24: + dest[0] = B >> 22; + dest[1] = G >> 22; + dest[2] = R >> 22; + break; + case AV_PIX_FMT_BGRA: + dest[0] = B >> 22; + dest[1] = G >> 22; + dest[2] = R >> 22; + dest[3] = hasAlpha ? A : 255; + break; + case AV_PIX_FMT_BGR4_BYTE: + case AV_PIX_FMT_RGB4_BYTE: + case AV_PIX_FMT_BGR8: + case AV_PIX_FMT_RGB8: + { + int r,g,b; + R >>= 22; + G >>= 22; + B >>= 22; + R += (7*err[0] + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2])>>4; + G += (7*err[1] + 1*c->dither_error[1][i] + 5*c->dither_error[1][i+1] + 3*c->dither_error[1][i+2])>>4; + B += (7*err[2] + 1*c->dither_error[2][i] + 5*c->dither_error[2][i+1] + 3*c->dither_error[2][i+2])>>4; + c->dither_error[0][i] = err[0]; + c->dither_error[1][i] = err[1]; + c->dither_error[2][i] = err[2]; + r = R >> (isrgb8 ? 5 : 7); + g = G >> (isrgb8 ? 5 : 6); + b = B >> (isrgb8 ? 6 : 7); + r = av_clip(r, 0, isrgb8 ? 7 : 1); + g = av_clip(g, 0, isrgb8 ? 7 : 3); + b = av_clip(b, 0, isrgb8 ? 3 : 1); + err[0] = R - r*(isrgb8 ? 36 : 255); + err[1] = G - g*(isrgb8 ? 36 : 85); + err[2] = B - b*(isrgb8 ? 85 : 255); + if(target == AV_PIX_FMT_BGR4_BYTE) { + dest[0] = r + 2*g + 8*b; + } else if(target == AV_PIX_FMT_RGB4_BYTE) { + dest[0] = b + 2*g + 8*r; + } else if(target == AV_PIX_FMT_BGR8) { + dest[0] = r + 8*g + 64*b; + } else if(target == AV_PIX_FMT_RGB8) { + dest[0] = b + 4*g + 32*r; + } else + av_assert2(0); + break;} + } +} + static av_always_inline void yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, @@ -1228,14 +1551,17 @@ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter, int i; int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4; int err[4] = {0}; - int isrgb8 = target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8; + int A = 0; //init to silence warning + + if( target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE + || target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8) + step = 1; for (i = 0; i < dstW; i++) { int j; int Y = 1<<9; int U = (1<<9)-(128 << 19); int V = (1<<9)-(128 << 19); - int R, G, B, A; for (j = 0; j < lumFilterSize; j++) { Y += lumSrc[j][i] * lumFilter[j]; @@ -1256,90 +1582,49 @@ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter, if (A & 0x100) A = av_clip_uint8(A); } - Y -= c->yuv2rgb_y_offset; - Y *= c->yuv2rgb_y_coeff; - Y += 1 << 21; - R = Y + V*c->yuv2rgb_v2r_coeff; - G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff; - B = Y + U*c->yuv2rgb_u2b_coeff; - if ((R | G | B) & 0xC0000000) { - R = av_clip_uintp2(R, 30); - G = av_clip_uintp2(G, 30); - B = av_clip_uintp2(B, 30); - } + yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err); + dest += step; + } + c->dither_error[0][i] = err[0]; + c->dither_error[1][i] = err[1]; + c->dither_error[2][i] = err[2]; +} - switch(target) { - case AV_PIX_FMT_ARGB: - dest[0] = hasAlpha ? A : 255; - dest[1] = R >> 22; - dest[2] = G >> 22; - dest[3] = B >> 22; - break; - case AV_PIX_FMT_RGB24: - dest[0] = R >> 22; - dest[1] = G >> 22; - dest[2] = B >> 22; - break; - case AV_PIX_FMT_RGBA: - dest[0] = R >> 22; - dest[1] = G >> 22; - dest[2] = B >> 22; - dest[3] = hasAlpha ? A : 255; - break; - case AV_PIX_FMT_ABGR: - dest[0] = hasAlpha ? A : 255; - dest[1] = B >> 22; - dest[2] = G >> 22; - dest[3] = R >> 22; - break; - case AV_PIX_FMT_BGR24: - dest[0] = B >> 22; - dest[1] = G >> 22; - dest[2] = R >> 22; - break; - case AV_PIX_FMT_BGRA: - dest[0] = B >> 22; - dest[1] = G >> 22; - dest[2] = R >> 22; - dest[3] = hasAlpha ? A : 255; - break; - case AV_PIX_FMT_BGR4_BYTE: - case AV_PIX_FMT_RGB4_BYTE: - case AV_PIX_FMT_BGR8: - case AV_PIX_FMT_RGB8: - { - int r,g,b; - R >>= 22; - G >>= 22; - B >>= 22; - R += (7*err[0] + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2])>>4; - G += (7*err[1] + 1*c->dither_error[1][i] + 5*c->dither_error[1][i+1] + 3*c->dither_error[1][i+2])>>4; - B += (7*err[2] + 1*c->dither_error[2][i] + 5*c->dither_error[2][i+1] + 3*c->dither_error[2][i+2])>>4; - c->dither_error[0][i] = err[0]; - c->dither_error[1][i] = err[1]; - c->dither_error[2][i] = err[2]; - r = R >> (isrgb8 ? 5 : 7); - g = G >> (isrgb8 ? 5 : 6); - b = B >> (isrgb8 ? 6 : 7); - r = av_clip(r, 0, isrgb8 ? 7 : 1); - g = av_clip(g, 0, isrgb8 ? 7 : 3); - b = av_clip(b, 0, isrgb8 ? 3 : 1); - err[0] = R - r*(isrgb8 ? 36 : 255); - err[1] = G - g*(isrgb8 ? 36 : 85); - err[2] = B - b*(isrgb8 ? 85 : 255); - if(target == AV_PIX_FMT_BGR4_BYTE) { - dest[0] = r + 2*g + 8*b; - } else if(target == AV_PIX_FMT_RGB4_BYTE) { - dest[0] = b + 2*g + 8*r; - } else if(target == AV_PIX_FMT_BGR8) { - dest[0] = r + 8*g + 64*b; - } else if(target == AV_PIX_FMT_RGB8) { - dest[0] = b + 4*g + 32*r; - } else - av_assert2(0); - step = 1; - break;} +static av_always_inline void +yuv2rgb_full_2_c_template(SwsContext *c, const int16_t *buf[2], + const int16_t *ubuf[2], const int16_t *vbuf[2], + const int16_t *abuf[2], uint8_t *dest, int dstW, + int yalpha, int uvalpha, int y, + enum AVPixelFormat target, int hasAlpha) +{ + const int16_t *buf0 = buf[0], *buf1 = buf[1], + *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], + *vbuf0 = vbuf[0], *vbuf1 = vbuf[1], + *abuf0 = hasAlpha ? abuf[0] : NULL, + *abuf1 = hasAlpha ? abuf[1] : NULL; + int yalpha1 = 4096 - yalpha; + int uvalpha1 = 4096 - uvalpha; + int i; + int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4; + int err[4] = {0}; + int A = 0; // init to silcene warning + + if( target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE + || target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8) + step = 1; + + for (i = 0; i < dstW; i++) { + int Y = ( buf0[i] * yalpha1 + buf1[i] * yalpha ) >> 10; //FIXME rounding + int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha-(128 << 19)) >> 10; + int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha-(128 << 19)) >> 10; + + if (hasAlpha) { + A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha + (1<<18)) >> 19; + if (A & 0x100) + A = av_clip_uint8(A); } + + yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err); dest += step; } c->dither_error[0][i] = err[0]; @@ -1347,30 +1632,86 @@ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter, c->dither_error[2][i] = err[2]; } +static av_always_inline void +yuv2rgb_full_1_c_template(SwsContext *c, const int16_t *buf0, + const int16_t *ubuf[2], const int16_t *vbuf[2], + const int16_t *abuf0, uint8_t *dest, int dstW, + int uvalpha, int y, enum AVPixelFormat target, + int hasAlpha) +{ + const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; + int i; + int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4; + int err[4] = {0}; + + if( target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE + || target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8) + step = 1; + + if (uvalpha < 2048) { + int A = 0; //init to silence warning + for (i = 0; i < dstW; i++) { + int Y = buf0[i] << 2; + int U = (ubuf0[i] - (128<<7)) << 2; + int V = (vbuf0[i] - (128<<7)) << 2; + + if (hasAlpha) { + A = (abuf0[i] + 64) >> 7; + if (A & 0x100) + A = av_clip_uint8(A); + } + + yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err); + dest += step; + } + } else { + const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; + int A = 0; //init to silence warning + for (i = 0; i < dstW; i++) { + int Y = buf0[i] << 2; + int U = (ubuf0[i] + ubuf1[i] - (128<<8)) << 1; + int V = (vbuf0[i] + vbuf1[i] - (128<<8)) << 1; + + if (hasAlpha) { + A = (abuf0[i] + 64) >> 7; + if (A & 0x100) + A = av_clip_uint8(A); + } + + yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err); + dest += step; + } + } + + c->dither_error[0][i] = err[0]; + c->dither_error[1][i] = err[1]; + c->dither_error[2][i] = err[2]; +} + #if CONFIG_SMALL -YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) -YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) -YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) -YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) +YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) +YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) +YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) +YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) #else #if CONFIG_SWSCALE_ALPHA -YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA, 1) -YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR, 1) -YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA, 1) -YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB, 1) +YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA, 1) +YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR, 1) +YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA, 1) +YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB, 1) #endif -YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, AV_PIX_FMT_BGRA, 0) -YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, AV_PIX_FMT_ABGR, 0) -YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, AV_PIX_FMT_RGBA, 0) -YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, AV_PIX_FMT_ARGB, 0) +YUV2RGBWRAPPER(yuv2, rgb_full, bgrx32_full, AV_PIX_FMT_BGRA, 0) +YUV2RGBWRAPPER(yuv2, rgb_full, xbgr32_full, AV_PIX_FMT_ABGR, 0) +YUV2RGBWRAPPER(yuv2, rgb_full, rgbx32_full, AV_PIX_FMT_RGBA, 0) +YUV2RGBWRAPPER(yuv2, rgb_full, xrgb32_full, AV_PIX_FMT_ARGB, 0) #endif -YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, AV_PIX_FMT_BGR24, 0) -YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, AV_PIX_FMT_RGB24, 0) +YUV2RGBWRAPPER(yuv2, rgb_full, bgr24_full, AV_PIX_FMT_BGR24, 0) +YUV2RGBWRAPPER(yuv2, rgb_full, rgb24_full, AV_PIX_FMT_RGB24, 0) -YUV2RGBWRAPPERX(yuv2, rgb_full, bgr4_byte_full, AV_PIX_FMT_BGR4_BYTE, 0) -YUV2RGBWRAPPERX(yuv2, rgb_full, rgb4_byte_full, AV_PIX_FMT_RGB4_BYTE, 0) -YUV2RGBWRAPPERX(yuv2, rgb_full, bgr8_full, AV_PIX_FMT_BGR8, 0) -YUV2RGBWRAPPERX(yuv2, rgb_full, rgb8_full, AV_PIX_FMT_RGB8, 0) +YUV2RGBWRAPPER(yuv2, rgb_full, bgr4_byte_full, AV_PIX_FMT_BGR4_BYTE, 0) +YUV2RGBWRAPPER(yuv2, rgb_full, rgb4_byte_full, AV_PIX_FMT_RGB4_BYTE, 0) +YUV2RGBWRAPPER(yuv2, rgb_full, bgr8_full, AV_PIX_FMT_BGR8, 0) +YUV2RGBWRAPPER(yuv2, rgb_full, rgb8_full, AV_PIX_FMT_RGB8, 0) static void yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter, @@ -1382,16 +1723,17 @@ yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter, { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat); int i; - int hasAlpha = 0; + int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrc; uint16_t **dest16 = (uint16_t**)dest; int SH = 22 + 7 - desc->comp[0].depth_minus1; + int A = 0; // init to silence warning for (i = 0; i < dstW; i++) { int j; int Y = 1 << 9; int U = (1 << 9) - (128 << 19); int V = (1 << 9) - (128 << 19); - int R, G, B, A; + int R, G, B; for (j = 0; j < lumFilterSize; j++) Y += lumSrc[j][i] * lumFilter[j]; @@ -1434,10 +1776,14 @@ yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter, dest16[0][i] = G >> SH; dest16[1][i] = B >> SH; dest16[2][i] = R >> SH; + if (hasAlpha) + dest16[3][i] = A; } else { dest[0][i] = G >> 22; dest[1][i] = B >> 22; dest[2][i] = R >> 22; + if (hasAlpha) + dest[3][i] = A; } } if (SH != 22 && (!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) { @@ -1445,6 +1791,8 @@ yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter, dest16[0][i] = av_bswap16(dest16[0][i]); dest16[1][i] = av_bswap16(dest16[1][i]); dest16[2][i] = av_bswap16(dest16[2][i]); + if (hasAlpha) + dest16[3][i] = av_bswap16(dest16[3][i]); } } } @@ -1491,76 +1839,112 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, case AV_PIX_FMT_RGBA: #if CONFIG_SMALL *yuv2packedX = yuv2rgba32_full_X_c; + *yuv2packed2 = yuv2rgba32_full_2_c; + *yuv2packed1 = yuv2rgba32_full_1_c; #else #if CONFIG_SWSCALE_ALPHA if (c->alpPixBuf) { *yuv2packedX = yuv2rgba32_full_X_c; + *yuv2packed2 = yuv2rgba32_full_2_c; + *yuv2packed1 = yuv2rgba32_full_1_c; } else #endif /* CONFIG_SWSCALE_ALPHA */ { *yuv2packedX = yuv2rgbx32_full_X_c; + *yuv2packed2 = yuv2rgbx32_full_2_c; + *yuv2packed1 = yuv2rgbx32_full_1_c; } #endif /* !CONFIG_SMALL */ break; case AV_PIX_FMT_ARGB: #if CONFIG_SMALL *yuv2packedX = yuv2argb32_full_X_c; + *yuv2packed2 = yuv2argb32_full_2_c; + *yuv2packed1 = yuv2argb32_full_1_c; #else #if CONFIG_SWSCALE_ALPHA if (c->alpPixBuf) { *yuv2packedX = yuv2argb32_full_X_c; + *yuv2packed2 = yuv2argb32_full_2_c; + *yuv2packed1 = yuv2argb32_full_1_c; } else #endif /* CONFIG_SWSCALE_ALPHA */ { *yuv2packedX = yuv2xrgb32_full_X_c; + *yuv2packed2 = yuv2xrgb32_full_2_c; + *yuv2packed1 = yuv2xrgb32_full_1_c; } #endif /* !CONFIG_SMALL */ break; case AV_PIX_FMT_BGRA: #if CONFIG_SMALL *yuv2packedX = yuv2bgra32_full_X_c; + *yuv2packed2 = yuv2bgra32_full_2_c; + *yuv2packed1 = yuv2bgra32_full_1_c; #else #if CONFIG_SWSCALE_ALPHA if (c->alpPixBuf) { *yuv2packedX = yuv2bgra32_full_X_c; + *yuv2packed2 = yuv2bgra32_full_2_c; + *yuv2packed1 = yuv2bgra32_full_1_c; } else #endif /* CONFIG_SWSCALE_ALPHA */ { *yuv2packedX = yuv2bgrx32_full_X_c; + *yuv2packed2 = yuv2bgrx32_full_2_c; + *yuv2packed1 = yuv2bgrx32_full_1_c; } #endif /* !CONFIG_SMALL */ break; case AV_PIX_FMT_ABGR: #if CONFIG_SMALL *yuv2packedX = yuv2abgr32_full_X_c; + *yuv2packed2 = yuv2abgr32_full_2_c; + *yuv2packed1 = yuv2abgr32_full_1_c; #else #if CONFIG_SWSCALE_ALPHA if (c->alpPixBuf) { *yuv2packedX = yuv2abgr32_full_X_c; + *yuv2packed2 = yuv2abgr32_full_2_c; + *yuv2packed1 = yuv2abgr32_full_1_c; } else #endif /* CONFIG_SWSCALE_ALPHA */ { *yuv2packedX = yuv2xbgr32_full_X_c; + *yuv2packed2 = yuv2xbgr32_full_2_c; + *yuv2packed1 = yuv2xbgr32_full_1_c; } #endif /* !CONFIG_SMALL */ break; case AV_PIX_FMT_RGB24: *yuv2packedX = yuv2rgb24_full_X_c; + *yuv2packed2 = yuv2rgb24_full_2_c; + *yuv2packed1 = yuv2rgb24_full_1_c; break; case AV_PIX_FMT_BGR24: *yuv2packedX = yuv2bgr24_full_X_c; + *yuv2packed2 = yuv2bgr24_full_2_c; + *yuv2packed1 = yuv2bgr24_full_1_c; break; case AV_PIX_FMT_BGR4_BYTE: *yuv2packedX = yuv2bgr4_byte_full_X_c; + *yuv2packed2 = yuv2bgr4_byte_full_2_c; + *yuv2packed1 = yuv2bgr4_byte_full_1_c; break; case AV_PIX_FMT_RGB4_BYTE: *yuv2packedX = yuv2rgb4_byte_full_X_c; + *yuv2packed2 = yuv2rgb4_byte_full_2_c; + *yuv2packed1 = yuv2rgb4_byte_full_1_c; break; case AV_PIX_FMT_BGR8: *yuv2packedX = yuv2bgr8_full_X_c; + *yuv2packed2 = yuv2bgr8_full_2_c; + *yuv2packed1 = yuv2bgr8_full_1_c; break; case AV_PIX_FMT_RGB8: *yuv2packedX = yuv2rgb8_full_X_c; + *yuv2packed2 = yuv2rgb8_full_2_c; + *yuv2packed1 = yuv2rgb8_full_1_c; break; case AV_PIX_FMT_GBRP: case AV_PIX_FMT_GBRP9BE: @@ -1573,6 +1957,7 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, case AV_PIX_FMT_GBRP14LE: case AV_PIX_FMT_GBRP16BE: case AV_PIX_FMT_GBRP16LE: + case AV_PIX_FMT_GBRAP: *yuv2anyX = yuv2gbrp_full_X_c; break; } @@ -1581,6 +1966,34 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, } else { YUV_PACKED: switch (dstFormat) { + case AV_PIX_FMT_RGBA64LE: +#if CONFIG_SWSCALE_ALPHA + if (c->alpPixBuf) { + *yuv2packed1 = yuv2rgba64le_1_c; + *yuv2packed2 = yuv2rgba64le_2_c; + *yuv2packedX = yuv2rgba64le_X_c; + } else +#endif /* CONFIG_SWSCALE_ALPHA */ + { + *yuv2packed1 = yuv2rgbx64le_1_c; + *yuv2packed2 = yuv2rgbx64le_2_c; + *yuv2packedX = yuv2rgbx64le_X_c; + } + break; + case AV_PIX_FMT_RGBA64BE: +#if CONFIG_SWSCALE_ALPHA + if (c->alpPixBuf) { + *yuv2packed1 = yuv2rgba64be_1_c; + *yuv2packed2 = yuv2rgba64be_2_c; + *yuv2packedX = yuv2rgba64be_X_c; + } else +#endif /* CONFIG_SWSCALE_ALPHA */ + { + *yuv2packed1 = yuv2rgbx64be_1_c; + *yuv2packed2 = yuv2rgbx64be_2_c; + *yuv2packedX = yuv2rgbx64be_X_c; + } + break; case AV_PIX_FMT_RGB48LE: *yuv2packed1 = yuv2rgb48le_1_c; *yuv2packed2 = yuv2rgb48le_2_c; diff --git a/ffmpeg/libswscale/ppc/Makefile b/ffmpeg/libswscale/ppc/Makefile index 018955b..d1b596e 100644 --- a/ffmpeg/libswscale/ppc/Makefile +++ b/ffmpeg/libswscale/ppc/Makefile @@ -1,3 +1,3 @@ -ALTIVEC-OBJS += ppc/swscale_altivec.o \ - ppc/yuv2rgb_altivec.o \ - ppc/yuv2yuv_altivec.o \ +OBJS += ppc/swscale_altivec.o \ + ppc/yuv2rgb_altivec.o \ + ppc/yuv2yuv_altivec.o \ diff --git a/ffmpeg/libswscale/ppc/swscale_altivec.c b/ffmpeg/libswscale/ppc/swscale_altivec.c index 9ca2868..86f40ab 100644 --- a/ffmpeg/libswscale/ppc/swscale_altivec.c +++ b/ffmpeg/libswscale/ppc/swscale_altivec.c @@ -30,6 +30,7 @@ #include "libavutil/cpu.h" #include "yuv2rgb_altivec.h" +#if HAVE_ALTIVEC #define vzero vec_splat_s32(0) #define yuv2planeX_8(d1, d2, l1, src, x, perm, filter) do { \ @@ -284,9 +285,11 @@ static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW, } } } +#endif /* HAVE_ALTIVEC */ -av_cold void ff_sws_init_swScale_altivec(SwsContext *c) +av_cold void ff_sws_init_swscale_ppc(SwsContext *c) { +#if HAVE_ALTIVEC enum AVPixelFormat dstFormat = c->dstFormat; if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) @@ -325,4 +328,5 @@ av_cold void ff_sws_init_swScale_altivec(SwsContext *c) break; } } +#endif /* HAVE_ALTIVEC */ } diff --git a/ffmpeg/libswscale/ppc/yuv2rgb_altivec.c b/ffmpeg/libswscale/ppc/yuv2rgb_altivec.c index a8501d9..25282bf 100644 --- a/ffmpeg/libswscale/ppc/yuv2rgb_altivec.c +++ b/ffmpeg/libswscale/ppc/yuv2rgb_altivec.c @@ -100,6 +100,8 @@ #include "libavutil/pixdesc.h" #include "yuv2rgb_altivec.h" +#if HAVE_ALTIVEC + #undef PROFILE_THE_BEAST #undef INC_SCALING @@ -246,8 +248,6 @@ static const vector unsigned char (vector unsigned short) \ vec_max(y, ((vector signed short) { 0 }))) -//#define out_pixels(a, b, c, ptr) vec_mstrgb32(__typeof__(a), ((__typeof__(a)) { 255 }), a, a, a, ptr) - static inline void cvtyuvtoRGB(SwsContext *c, vector signed short Y, vector signed short U, vector signed short V, vector signed short *R, vector signed short *G, @@ -526,14 +526,17 @@ static int altivec_uyvy_rgb32(SwsContext *c, const unsigned char **in, return srcSliceH; } +#endif /* HAVE_ALTIVEC */ + /* Ok currently the acceleration routine only supports * inputs of widths a multiple of 16 * and heights a multiple 2 * * So we just fall back to the C codes for this. */ -av_cold SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c) +av_cold SwsFunc ff_yuv2rgb_init_ppc(SwsContext *c) { +#if HAVE_ALTIVEC if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) return NULL; @@ -589,20 +592,26 @@ av_cold SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c) } break; } +#endif /* HAVE_ALTIVEC */ + return NULL; } -av_cold void ff_yuv2rgb_init_tables_altivec(SwsContext *c, - const int inv_table[4], - int brightness, - int contrast, - int saturation) +av_cold void ff_yuv2rgb_init_tables_ppc(SwsContext *c, + const int inv_table[4], + int brightness, + int contrast, + int saturation) { +#if HAVE_ALTIVEC union { DECLARE_ALIGNED(16, signed short, tmp)[8]; vector signed short vec; } buf; + if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) + return; + buf.tmp[0] = ((0xffffLL) * contrast >> 8) >> 9; // cy buf.tmp[1] = -256 * brightness; // oy buf.tmp[2] = (inv_table[0] >> 3) * (contrast >> 16) * (saturation >> 16); // crv @@ -618,20 +627,23 @@ av_cold void ff_yuv2rgb_init_tables_altivec(SwsContext *c, c->CGU = vec_splat((vector signed short) buf.vec, 4); c->CGV = vec_splat((vector signed short) buf.vec, 5); return; +#endif /* HAVE_ALTIVEC */ } -static av_always_inline void ff_yuv2packedX_altivec(SwsContext *c, - const int16_t *lumFilter, - const int16_t **lumSrc, - int lumFilterSize, - const int16_t *chrFilter, - const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, - const int16_t **alpSrc, - uint8_t *dest, - int dstW, int dstY, - enum AVPixelFormat target) +#if HAVE_ALTIVEC + +static av_always_inline void yuv2packedX_altivec(SwsContext *c, + const int16_t *lumFilter, + const int16_t **lumSrc, + int lumFilterSize, + const int16_t *chrFilter, + const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, + const int16_t **alpSrc, + uint8_t *dest, + int dstW, int dstY, + enum AVPixelFormat target) { int i, j; vector signed short X, X0, X1, Y0, U0, V0, Y1, U1, V1, U, V; @@ -840,10 +852,10 @@ void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, \ const int16_t **alpSrc, \ uint8_t *dest, int dstW, int dstY) \ { \ - ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize, \ - chrFilter, chrUSrc, chrVSrc, \ - chrFilterSize, alpSrc, \ - dest, dstW, dstY, pixfmt); \ + yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize, \ + chrFilter, chrUSrc, chrVSrc, \ + chrFilterSize, alpSrc, \ + dest, dstW, dstY, pixfmt); \ } YUV2PACKEDX_WRAPPER(abgr, AV_PIX_FMT_ABGR); @@ -852,3 +864,5 @@ YUV2PACKEDX_WRAPPER(argb, AV_PIX_FMT_ARGB); YUV2PACKEDX_WRAPPER(rgba, AV_PIX_FMT_RGBA); YUV2PACKEDX_WRAPPER(rgb24, AV_PIX_FMT_RGB24); YUV2PACKEDX_WRAPPER(bgr24, AV_PIX_FMT_BGR24); + +#endif /* HAVE_ALTIVEC */ diff --git a/ffmpeg/libswscale/ppc/yuv2yuv_altivec.c b/ffmpeg/libswscale/ppc/yuv2yuv_altivec.c index 792deb9..2b1c5dd 100644 --- a/ffmpeg/libswscale/ppc/yuv2yuv_altivec.c +++ b/ffmpeg/libswscale/ppc/yuv2yuv_altivec.c @@ -24,9 +24,12 @@ #include <inttypes.h> #include "config.h" +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" #include "libswscale/swscale.h" #include "libswscale/swscale_internal.h" -#include "libavutil/cpu.h" + +#if HAVE_ALTIVEC static int yv12toyuy2_unscaled_altivec(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, @@ -179,16 +182,23 @@ static int yv12touyvy_unscaled_altivec(SwsContext *c, const uint8_t *src[], return srcSliceH; } -void ff_swscale_get_unscaled_altivec(SwsContext *c) +#endif /* HAVE_ALTIVEC */ + +av_cold void ff_get_unscaled_swscale_ppc(SwsContext *c) { - if ((av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) && !(c->srcW & 15) && - !(c->flags & SWS_BITEXACT) && c->srcFormat == AV_PIX_FMT_YUV420P) { +#if HAVE_ALTIVEC + if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) + return; + + if (!(c->srcW & 15) && !(c->flags & SWS_BITEXACT) && + c->srcFormat == AV_PIX_FMT_YUV420P) { enum AVPixelFormat dstFormat = c->dstFormat; // unscaled YV12 -> packed YUV, we want speed if (dstFormat == AV_PIX_FMT_YUYV422) - c->swScale = yv12toyuy2_unscaled_altivec; + c->swscale = yv12toyuy2_unscaled_altivec; else if (dstFormat == AV_PIX_FMT_UYVY422) - c->swScale = yv12touyvy_unscaled_altivec; + c->swscale = yv12touyvy_unscaled_altivec; } +#endif /* HAVE_ALTIVEC */ } diff --git a/ffmpeg/libswscale/rgb2rgb.c b/ffmpeg/libswscale/rgb2rgb.c index 1233a1d..cf877fb 100644 --- a/ffmpeg/libswscale/rgb2rgb.c +++ b/ffmpeg/libswscale/rgb2rgb.c @@ -73,15 +73,19 @@ void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int width, int height, int lumStride, int chromStride, int srcStride); -void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, - uint8_t *udst, uint8_t *vdst, - int width, int height, - int lumStride, int chromStride, int srcStride); +void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, + uint8_t *udst, uint8_t *vdst, + int width, int height, + int lumStride, int chromStride, int srcStride, + int32_t *rgb2yuv); void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, int srcStride, int dstStride); void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst, int width, int height, int src1Stride, int src2Stride, int dstStride); +void (*deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, + int width, int height, int srcStride, + int dst1Stride, int dst2Stride); void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst1, uint8_t *dst2, int width, int height, @@ -105,7 +109,6 @@ void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, int width, int height, int lumStride, int chromStride, int srcStride); -#define RGB2YUV_SHIFT 8 #define BY ((int)( 0.098 * (1 << RGB2YUV_SHIFT) + 0.5)) #define BV ((int)(-0.071 * (1 << RGB2YUV_SHIFT) + 0.5)) #define BU ((int)( 0.439 * (1 << RGB2YUV_SHIFT) + 0.5)) @@ -129,7 +132,7 @@ void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, av_cold void sws_rgb2rgb_init(void) { rgb2rgb_init_c(); - if (HAVE_MMX) + if (ARCH_X86) rgb2rgb_init_x86(); } diff --git a/ffmpeg/libswscale/rgb2rgb.h b/ffmpeg/libswscale/rgb2rgb.h index e37f0fb..5df5dea 100644 --- a/ffmpeg/libswscale/rgb2rgb.h +++ b/ffmpeg/libswscale/rgb2rgb.h @@ -28,8 +28,8 @@ #include <inttypes.h> -#include "libswscale/swscale.h" #include "libavutil/avutil.h" +#include "swscale.h" /* A full collection of RGB to RGB(BGR) converters */ extern void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size); @@ -76,9 +76,9 @@ void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, int src_size); void shuffle_bytes_3012(const uint8_t *src, uint8_t *dst, int src_size); void shuffle_bytes_3210(const uint8_t *src, uint8_t *dst, int src_size); -void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, - uint8_t *vdst, int width, int height, int lumStride, - int chromStride, int srcStride); +void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, + uint8_t *vdst, int width, int height, int lumStride, + int chromStride, int srcStride, int32_t *rgb2yuv); /** * Height should be a multiple of 2 and width should be a multiple of 16. @@ -124,9 +124,10 @@ extern void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uin * Chrominance data is only taken from every second line, others are ignored. * FIXME: Write high quality version. */ -extern void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - int width, int height, - int lumStride, int chromStride, int srcStride); +extern void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, + int width, int height, + int lumStride, int chromStride, int srcStride, + int32_t *rgb2yuv); extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, int srcStride, int dstStride); @@ -134,6 +135,10 @@ extern void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t int width, int height, int src1Stride, int src2Stride, int dstStride); +extern void (*deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, + int width, int height, int srcStride, + int dst1Stride, int dst2Stride); + extern void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst1, uint8_t *dst2, int width, int height, diff --git a/ffmpeg/libswscale/rgb2rgb_template.c b/ffmpeg/libswscale/rgb2rgb_template.c index 8753594..56e735f 100644 --- a/ffmpeg/libswscale/rgb2rgb_template.c +++ b/ffmpeg/libswscale/rgb2rgb_template.c @@ -26,6 +26,8 @@ #include <stddef.h> +#include "libavutil/attributes.h" + static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, int src_size) { @@ -612,10 +614,13 @@ static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst, * others are ignored in the C version. * FIXME: Write HQ version. */ -void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, +void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int width, int height, int lumStride, - int chromStride, int srcStride) + int chromStride, int srcStride, int32_t *rgb2yuv) { + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; int y; const int chromWidth = width >> 1; @@ -626,9 +631,9 @@ void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, unsigned int g = src[6 * i + 1]; unsigned int r = src[6 * i + 2]; - unsigned int Y = ((RY * r + GY * g + BY * b) >> RGB2YUV_SHIFT) + 16; - unsigned int V = ((RV * r + GV * g + BV * b) >> RGB2YUV_SHIFT) + 128; - unsigned int U = ((RU * r + GU * g + BU * b) >> RGB2YUV_SHIFT) + 128; + unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; + unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128; + unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128; udst[i] = U; vdst[i] = V; @@ -638,7 +643,7 @@ void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, g = src[6 * i + 4]; r = src[6 * i + 5]; - Y = ((RY * r + GY * g + BY * b) >> RGB2YUV_SHIFT) + 16; + Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ydst[2 * i + 1] = Y; } ydst += lumStride; @@ -652,7 +657,7 @@ void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, unsigned int g = src[6 * i + 1]; unsigned int r = src[6 * i + 2]; - unsigned int Y = ((RY * r + GY * g + BY * b) >> RGB2YUV_SHIFT) + 16; + unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ydst[2 * i] = Y; @@ -660,7 +665,7 @@ void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, g = src[6 * i + 4]; r = src[6 * i + 5]; - Y = ((RY * r + GY * g + BY * b) >> RGB2YUV_SHIFT) + 16; + Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ydst[2 * i + 1] = Y; } udst += chromStride; @@ -688,6 +693,24 @@ static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2, } } +static void deinterleaveBytes_c(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, + int width, int height, int srcStride, + int dst1Stride, int dst2Stride) +{ + int h; + + for (h = 0; h < height; h++) { + int w; + for (w = 0; w < width; w++) { + dst1[w] = src[2 * w + 0]; + dst2[w] = src[2 * w + 1]; + } + src += srcStride; + dst1 += dst1Stride; + dst2 += dst2Stride; + } +} + static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2, uint8_t *dst1, uint8_t *dst2, int width, int height, @@ -818,7 +841,7 @@ static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int lumStride, int chromStride, int srcStride) { int y; - const int chromWidth = -((-width) >> 1); + const int chromWidth = FF_CEIL_RSHIFT(width, 1); for (y = 0; y < height; y++) { extract_even_c(src, ydst, width); @@ -838,7 +861,7 @@ static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int lumStride, int chromStride, int srcStride) { int y; - const int chromWidth = -((-width) >> 1); + const int chromWidth = FF_CEIL_RSHIFT(width, 1); for (y = 0; y < height; y++) { extract_even_c(src, ydst, width); @@ -856,7 +879,7 @@ static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int lumStride, int chromStride, int srcStride) { int y; - const int chromWidth = -((-width) >> 1); + const int chromWidth = FF_CEIL_RSHIFT(width, 1); for (y = 0; y < height; y++) { extract_even_c(src + 1, ydst, width); @@ -876,7 +899,7 @@ static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int lumStride, int chromStride, int srcStride) { int y; - const int chromWidth = -((-width) >> 1); + const int chromWidth = FF_CEIL_RSHIFT(width, 1); for (y = 0; y < height; y++) { extract_even_c(src + 1, ydst, width); @@ -889,7 +912,7 @@ static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, } } -static inline void rgb2rgb_init_c(void) +static av_cold void rgb2rgb_init_c(void) { rgb15to16 = rgb15to16_c; rgb15tobgr24 = rgb15tobgr24_c; @@ -915,8 +938,9 @@ static inline void rgb2rgb_init_c(void) yuv422ptouyvy = yuv422ptouyvy_c; yuy2toyv12 = yuy2toyv12_c; planar2x = planar2x_c; - rgb24toyv12 = rgb24toyv12_c; + ff_rgb24toyv12 = ff_rgb24toyv12_c; interleaveBytes = interleaveBytes_c; + deinterleaveBytes = deinterleaveBytes_c; vu9_to_vu12 = vu9_to_vu12_c; yvu9_to_yuy2 = yvu9_to_yuy2_c; diff --git a/ffmpeg/libswscale/swscale-test.c b/ffmpeg/libswscale/swscale-test.c index aece61e..e0b887e 100644 --- a/ffmpeg/libswscale/swscale-test.c +++ b/ffmpeg/libswscale/swscale-test.c @@ -305,7 +305,7 @@ static int fileTest(uint8_t *ref[4], int refStride[4], int w, int h, FILE *fp, ret = sscanf(buf, " %12s %dx%d -> %12s %dx%d flags=%d CRC=%x" - " SSD=%"PRId64 ", %"PRId64 ", %"PRId64 ", %"PRId64 "\n", + " SSD=%"SCNd64 ", %"SCNd64 ", %"SCNd64 ", %"SCNd64 "\n", srcStr, &srcW, &srcH, dstStr, &dstW, &dstH, &flags, &r.crc, &r.ssdY, &r.ssdU, &r.ssdV, &r.ssdA); if (ret != 12) { diff --git a/ffmpeg/libswscale/swscale.c b/ffmpeg/libswscale/swscale.c index bb90819..710dce3 100644 --- a/ffmpeg/libswscale/swscale.c +++ b/ffmpeg/libswscale/swscale.c @@ -18,7 +18,6 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include <assert.h> #include <inttypes.h> #include <math.h> #include <stdio.h> @@ -36,7 +35,7 @@ #include "swscale_internal.h" #include "swscale.h" -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = { +DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_128)[9][8] = { { 36, 68, 60, 92, 34, 66, 58, 90, }, { 100, 4, 124, 28, 98, 2, 122, 26, }, { 52, 84, 44, 76, 50, 82, 42, 74, }, @@ -45,9 +44,10 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = { { 96, 0, 120, 24, 102, 6, 126, 30, }, { 48, 80, 40, 72, 54, 86, 46, 78, }, { 112, 16, 104, 8, 118, 22, 110, 14, }, + { 36, 68, 60, 92, 34, 66, 58, 90, }, }; -DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] = { +DECLARE_ALIGNED(8, static const uint8_t, sws_pb_64)[8] = { 64, 64, 64, 64, 64, 64, 64, 64 }; @@ -254,7 +254,10 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal); src = formatConvBuffer; } else if (c->readLumPlanar && !isAlpha) { - c->readLumPlanar(formatConvBuffer, src_in, srcW); + c->readLumPlanar(formatConvBuffer, src_in, srcW, c->input_rgb2yuv_table); + src = formatConvBuffer; + } else if (c->readAlpPlanar && isAlpha) { + c->readAlpPlanar(formatConvBuffer, src_in, srcW, NULL); src = formatConvBuffer; } @@ -307,7 +310,7 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, } else if (c->readChrPlanar) { uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16); - c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW); + c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW, c->input_rgb2yuv_table); src1 = formatConvBuffer; src2 = buf2; } @@ -328,7 +331,7 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, if (DEBUG_SWSCALE_BUFFERS) \ av_log(c, AV_LOG_DEBUG, __VA_ARGS__) -static int swScale(SwsContext *c, const uint8_t *src[], +static int swscale(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { @@ -370,8 +373,8 @@ static int swScale(SwsContext *c, const uint8_t *src[], yuv2packed2_fn yuv2packed2 = c->yuv2packed2; yuv2packedX_fn yuv2packedX = c->yuv2packedX; yuv2anyX_fn yuv2anyX = c->yuv2anyX; - const int chrSrcSliceY = srcSliceY >> c->chrSrcVSubSample; - const int chrSrcSliceH = -((-srcSliceH) >> c->chrSrcVSubSample); + const int chrSrcSliceY = srcSliceY >> c->chrSrcVSubSample; + const int chrSrcSliceH = FF_CEIL_RSHIFT(srcSliceH, c->chrSrcVSubSample); int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat); int lastDstY; @@ -383,6 +386,10 @@ static int swScale(SwsContext *c, const uint8_t *src[], int lastInLumBuf = c->lastInLumBuf; int lastInChrBuf = c->lastInChrBuf; + if (!usePal(c->srcFormat)) { + pal = c->input_rgb2yuv_table; + } + if (isPacked(c->srcFormat)) { src[0] = src[1] = @@ -396,7 +403,7 @@ static int swScale(SwsContext *c, const uint8_t *src[], srcStride[1] <<= c->vChrDrop; srcStride[2] <<= c->vChrDrop; - DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n", + DEBUG_BUFFERS("swscale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n", src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3], dst[0], dstStride[0], dst[1], dstStride[1], @@ -442,7 +449,7 @@ static int swScale(SwsContext *c, const uint8_t *src[], } if (!should_dither) { - c->chrDither8 = c->lumDither8 = ff_sws_pb_64; + c->chrDither8 = c->lumDither8 = sws_pb_64; } lastDstY = dstY; @@ -484,7 +491,7 @@ static int swScale(SwsContext *c, const uint8_t *src[], // Do we have enough lines in this slice to output the dstY line enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && - lastChrSrcY < -((-srcSliceY - srcSliceH) >> c->chrSrcVSubSample); + lastChrSrcY < FF_CEIL_RSHIFT(srcSliceY + srcSliceH, c->chrSrcVSubSample); if (!enough_lines) { lastLumSrcY = srcSliceY + srcSliceH - 1; @@ -551,8 +558,8 @@ static int swScale(SwsContext *c, const uint8_t *src[], lastInLumBuf, lastInChrBuf); #endif if (should_dither) { - c->chrDither8 = dither_8x8_128[chrDstY & 7]; - c->lumDither8 = dither_8x8_128[dstY & 7]; + c->chrDither8 = ff_dither_8x8_128[chrDstY & 7]; + c->lumDither8 = ff_dither_8x8_128[dstY & 7]; } if (dstY >= dstH - 2) { /* hmm looks like we can't use MMX here without overwriting @@ -697,7 +704,7 @@ static int swScale(SwsContext *c, const uint8_t *src[], return dstY - lastDstY; } -static av_cold void sws_init_swScale_c(SwsContext *c) +static av_cold void sws_init_swscale(SwsContext *c) { enum AVPixelFormat srcFormat = c->srcFormat; @@ -750,14 +757,14 @@ static av_cold void sws_init_swScale_c(SwsContext *c) SwsFunc ff_getSwsFunc(SwsContext *c) { - sws_init_swScale_c(c); + sws_init_swscale(c); - if (HAVE_MMX) - ff_sws_init_swScale_mmx(c); - if (HAVE_ALTIVEC) - ff_sws_init_swScale_altivec(c); + if (ARCH_PPC) + ff_sws_init_swscale_ppc(c); + if (ARCH_X86) + ff_sws_init_swscale_x86(c); - return swScale; + return swscale; } static void reset_ptr(const uint8_t *src[], int format) @@ -787,6 +794,118 @@ static int check_image_pointers(const uint8_t * const data[4], enum AVPixelForma return 1; } +static void xyz12Torgb48(struct SwsContext *c, uint16_t *dst, + const uint16_t *src, int stride, int h) +{ + int xp,yp; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat); + + for (yp=0; yp<h; yp++) { + for (xp=0; xp+2<stride; xp+=3) { + int x, y, z, r, g, b; + + if (desc->flags & AV_PIX_FMT_FLAG_BE) { + x = AV_RB16(src + xp + 0); + y = AV_RB16(src + xp + 1); + z = AV_RB16(src + xp + 2); + } else { + x = AV_RL16(src + xp + 0); + y = AV_RL16(src + xp + 1); + z = AV_RL16(src + xp + 2); + } + + x = c->xyzgamma[x>>4]; + y = c->xyzgamma[y>>4]; + z = c->xyzgamma[z>>4]; + + // convert from XYZlinear to sRGBlinear + r = c->xyz2rgb_matrix[0][0] * x + + c->xyz2rgb_matrix[0][1] * y + + c->xyz2rgb_matrix[0][2] * z >> 12; + g = c->xyz2rgb_matrix[1][0] * x + + c->xyz2rgb_matrix[1][1] * y + + c->xyz2rgb_matrix[1][2] * z >> 12; + b = c->xyz2rgb_matrix[2][0] * x + + c->xyz2rgb_matrix[2][1] * y + + c->xyz2rgb_matrix[2][2] * z >> 12; + + // limit values to 12-bit depth + r = av_clip_c(r,0,4095); + g = av_clip_c(g,0,4095); + b = av_clip_c(b,0,4095); + + // convert from sRGBlinear to RGB and scale from 12bit to 16bit + if (desc->flags & AV_PIX_FMT_FLAG_BE) { + AV_WB16(dst + xp + 0, c->rgbgamma[r] << 4); + AV_WB16(dst + xp + 1, c->rgbgamma[g] << 4); + AV_WB16(dst + xp + 2, c->rgbgamma[b] << 4); + } else { + AV_WL16(dst + xp + 0, c->rgbgamma[r] << 4); + AV_WL16(dst + xp + 1, c->rgbgamma[g] << 4); + AV_WL16(dst + xp + 2, c->rgbgamma[b] << 4); + } + } + src += stride; + dst += stride; + } +} + +static void rgb48Toxyz12(struct SwsContext *c, uint16_t *dst, + const uint16_t *src, int stride, int h) +{ + int xp,yp; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat); + + for (yp=0; yp<h; yp++) { + for (xp=0; xp+2<stride; xp+=3) { + int x, y, z, r, g, b; + + if (desc->flags & AV_PIX_FMT_FLAG_BE) { + r = AV_RB16(src + xp + 0); + g = AV_RB16(src + xp + 1); + b = AV_RB16(src + xp + 2); + } else { + r = AV_RL16(src + xp + 0); + g = AV_RL16(src + xp + 1); + b = AV_RL16(src + xp + 2); + } + + r = c->rgbgammainv[r>>4]; + g = c->rgbgammainv[g>>4]; + b = c->rgbgammainv[b>>4]; + + // convert from sRGBlinear to XYZlinear + x = c->rgb2xyz_matrix[0][0] * r + + c->rgb2xyz_matrix[0][1] * g + + c->rgb2xyz_matrix[0][2] * b >> 12; + y = c->rgb2xyz_matrix[1][0] * r + + c->rgb2xyz_matrix[1][1] * g + + c->rgb2xyz_matrix[1][2] * b >> 12; + z = c->rgb2xyz_matrix[2][0] * r + + c->rgb2xyz_matrix[2][1] * g + + c->rgb2xyz_matrix[2][2] * b >> 12; + + // limit values to 12-bit depth + x = av_clip_c(x,0,4095); + y = av_clip_c(y,0,4095); + z = av_clip_c(z,0,4095); + + // convert from XYZlinear to X'Y'Z' and scale from 12bit to 16bit + if (desc->flags & AV_PIX_FMT_FLAG_BE) { + AV_WB16(dst + xp + 0, c->xyzgammainv[x] << 4); + AV_WB16(dst + xp + 1, c->xyzgammainv[y] << 4); + AV_WB16(dst + xp + 2, c->xyzgammainv[z] << 4); + } else { + AV_WL16(dst + xp + 0, c->xyzgammainv[x] << 4); + AV_WL16(dst + xp + 1, c->xyzgammainv[y] << 4); + AV_WL16(dst + xp + 2, c->xyzgammainv[z] << 4); + } + } + src += stride; + dst += stride; + } +} + /** * swscale wrapper, so we don't need to export the SwsContext. * Assumes planar YUV to be in YUV order instead of YVU. @@ -908,6 +1027,9 @@ int attribute_align_arg sws_scale(struct SwsContext *c, uint8_t *base; int x,y; rgb0_tmp = av_malloc(FFABS(srcStride[0]) * srcSliceH + 32); + if (!rgb0_tmp) + return AVERROR(ENOMEM); + base = srcStride[0] < 0 ? rgb0_tmp - srcStride[0] * (srcSliceH-1) : rgb0_tmp; for (y=0; y<srcSliceH; y++){ memcpy(base + srcStride[0]*y, src2[0] + srcStride[0]*y, 4*c->srcW); @@ -918,7 +1040,19 @@ int attribute_align_arg sws_scale(struct SwsContext *c, src2[0] = base; } - if (!srcSliceY && (c->flags & SWS_BITEXACT) && (c->flags & SWS_ERROR_DIFFUSION) && c->dither_error[0]) + if (c->srcXYZ && !(c->dstXYZ && c->srcW==c->dstW && c->srcH==c->dstH)) { + uint8_t *base; + rgb0_tmp = av_malloc(FFABS(srcStride[0]) * srcSliceH + 32); + if (!rgb0_tmp) + return AVERROR(ENOMEM); + + base = srcStride[0] < 0 ? rgb0_tmp - srcStride[0] * (srcSliceH-1) : rgb0_tmp; + + xyz12Torgb48(c, (uint16_t*)base, (const uint16_t*)src2[0], srcStride[0]/2, srcSliceH); + src2[0] = base; + } + + if (!srcSliceY && (c->flags & SWS_BITEXACT) && c->dither == SWS_DITHER_ED && c->dither_error[0]) for (i = 0; i < 4; i++) memset(c->dither_error[i], 0, sizeof(c->dither_error[0][0]) * (c->dstW+2)); @@ -938,7 +1072,7 @@ int attribute_align_arg sws_scale(struct SwsContext *c, if (srcSliceY + srcSliceH == c->srcH) c->sliceDir = 0; - ret = c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst2, + ret = c->swscale(c, src2, srcStride2, srcSliceY, srcSliceH, dst2, dstStride2); } else { // slices go from bottom to top => we flip the image internally @@ -964,10 +1098,16 @@ int attribute_align_arg sws_scale(struct SwsContext *c, if (!srcSliceY) c->sliceDir = 0; - ret = c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, + ret = c->swscale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2); } + + if (c->dstXYZ && !(c->srcXYZ && c->srcW==c->dstW && c->srcH==c->dstH)) { + /* replace on the same data */ + rgb48Toxyz12(c, (uint16_t*)dst2[0], (const uint16_t*)dst2[0], dstStride[0]/2, ret); + } + av_free(rgb0_tmp); return ret; } diff --git a/ffmpeg/libswscale/swscale.h b/ffmpeg/libswscale/swscale.h index 5f6ae0f..25c8b5e 100644 --- a/ffmpeg/libswscale/swscale.h +++ b/ffmpeg/libswscale/swscale.h @@ -23,15 +23,10 @@ /** * @file - * @ingroup lsws + * @ingroup libsws * external API header */ -/** - * @defgroup lsws Libswscale - * @{ - */ - #include <stdint.h> #include "libavutil/avutil.h" @@ -40,6 +35,9 @@ #include "version.h" /** + * @defgroup libsws Color conversion and scaling + * @{ + * * Return the LIBSWSCALE_VERSION_INT constant. */ unsigned swscale_version(void); @@ -147,6 +145,13 @@ int sws_isSupportedInput(enum AVPixelFormat pix_fmt); int sws_isSupportedOutput(enum AVPixelFormat pix_fmt); /** + * @param[in] pix_fmt the pixel format + * @return a positive value if an endianness conversion for pix_fmt is + * supported, 0 otherwise. + */ +int sws_isSupportedEndiannessConversion(enum AVPixelFormat pix_fmt); + +/** * Allocate an empty SwsContext. This must be filled and passed to * sws_init_context(). For filling see AVOptions, options.c and * sws_setColorspaceDetails(). diff --git a/ffmpeg/libswscale/swscale_internal.h b/ffmpeg/libswscale/swscale_internal.h index 83d3a00..443615d 100644 --- a/ffmpeg/libswscale/swscale_internal.h +++ b/ffmpeg/libswscale/swscale_internal.h @@ -39,8 +39,6 @@ #define YUVRGB_TABLE_HEADROOM 128 -#define FAST_BGR2YV12 // use 7-bit instead of 15-bit coefficients - #define MAX_FILTER_SIZE 256 #define DITHER1XBPP @@ -63,6 +61,14 @@ struct SwsContext; +typedef enum SwsDither { + SWS_DITHER_NONE = 0, + SWS_DITHER_AUTO, + SWS_DITHER_BAYER, + SWS_DITHER_ED, + NB_SWS_DITHER, +} SwsDither; + typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]); @@ -268,7 +274,7 @@ typedef struct SwsContext { * Note that src, dst, srcStride, dstStride will be copied in the * sws_scale() wrapper so they can be freely modified here. */ - SwsFunc swScale; + SwsFunc swscale; int srcW; ///< Width of source luma/alpha planes. int srcH; ///< Height of source luma/alpha planes. int dstH; ///< Height of destination luma/alpha planes. @@ -356,10 +362,23 @@ typedef struct SwsContext { int dstY; ///< Last destination vertical line output from last slice. int flags; ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc... void *yuvTable; // pointer to the yuv->rgb table start so it can be freed() + // alignment ensures the offset can be added in a single + // instruction on e.g. ARM + DECLARE_ALIGNED(16, int, table_gV)[256 + 2*YUVRGB_TABLE_HEADROOM]; uint8_t *table_rV[256 + 2*YUVRGB_TABLE_HEADROOM]; uint8_t *table_gU[256 + 2*YUVRGB_TABLE_HEADROOM]; - int table_gV[256 + 2*YUVRGB_TABLE_HEADROOM]; uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM]; + DECLARE_ALIGNED(16, int32_t, input_rgb2yuv_table)[16+40*4]; // This table can contain both C and SIMD formatted values, teh C vales are always at the XY_IDX points +#define RY_IDX 0 +#define GY_IDX 1 +#define BY_IDX 2 +#define RU_IDX 3 +#define GU_IDX 4 +#define BU_IDX 5 +#define RV_IDX 6 +#define GV_IDX 7 +#define BV_IDX 8 +#define RGB2YUV_SHIFT 15 int *dither_error[4]; @@ -371,6 +390,12 @@ typedef struct SwsContext { int dstRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (destination image). int src0Alpha; int dst0Alpha; + int srcXYZ; + int dstXYZ; + int src_h_chr_pos; + int dst_h_chr_pos; + int src_v_chr_pos; + int dst_v_chr_pos; int yuv2rgb_y_offset; int yuv2rgb_y_coeff; int yuv2rgb_v2r_coeff; @@ -464,7 +489,17 @@ typedef struct SwsContext { #endif int use_mmx_vfilter; - /* function pointers for swScale() */ +/* pre defined color-spaces gamma */ +#define XYZ_GAMMA (2.6f) +#define RGB_GAMMA (2.2f) + int16_t *xyzgamma; + int16_t *rgbgamma; + int16_t *xyzgammainv; + int16_t *rgbgammainv; + int16_t xyz2rgb_matrix[3][4]; + int16_t rgb2xyz_matrix[3][4]; + + /* function pointers for swscale() */ yuv2planar1_fn yuv2plane1; yuv2planarX_fn yuv2planeX; yuv2interleavedX_fn yuv2nv12cX; @@ -486,12 +521,13 @@ typedef struct SwsContext { /** * Functions to read planar input, such as planar RGB, and convert - * internally to Y/UV. + * internally to Y/UV/A. */ /** @{ */ - void (*readLumPlanar)(uint8_t *dst, const uint8_t *src[4], int width); + void (*readLumPlanar)(uint8_t *dst, const uint8_t *src[4], int width, int32_t *rgb2yuv); void (*readChrPlanar)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], - int width); + int width, int32_t *rgb2yuv); + void (*readAlpPlanar)(uint8_t *dst, const uint8_t *src[4], int width, int32_t *rgb2yuv); /** @} */ /** @@ -567,6 +603,8 @@ typedef struct SwsContext { void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width); int needs_hcscale; ///< Set if there are chroma planes to be converted. + + SwsDither dither; } SwsContext; //FIXME check init (where 0) @@ -574,17 +612,16 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c); int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int fullRange, int brightness, int contrast, int saturation); +void ff_yuv2rgb_init_tables_ppc(SwsContext *c, const int inv_table[4], + int brightness, int contrast, int saturation); -void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4], - int brightness, int contrast, int saturation); void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex, int lastInLumBuf, int lastInChrBuf); -SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c); +SwsFunc ff_yuv2rgb_init_x86(SwsContext *c); SwsFunc ff_yuv2rgb_init_vis(SwsContext *c); -SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c); -SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c); -void ff_bfin_get_unscaled_swscale(SwsContext *c); +SwsFunc ff_yuv2rgb_init_ppc(SwsContext *c); +SwsFunc ff_yuv2rgb_init_bfin(SwsContext *c); #if FF_API_SWS_FORMAT_NAME /** @@ -614,33 +651,33 @@ static av_always_inline int isBE(enum AVPixelFormat pix_fmt) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); av_assert0(desc); - return desc->flags & PIX_FMT_BE; + return desc->flags & AV_PIX_FMT_FLAG_BE; } static av_always_inline int isYUV(enum AVPixelFormat pix_fmt) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); av_assert0(desc); - return !(desc->flags & PIX_FMT_RGB) && desc->nb_components >= 2; + return !(desc->flags & AV_PIX_FMT_FLAG_RGB) && desc->nb_components >= 2; } static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); av_assert0(desc); - return ((desc->flags & PIX_FMT_PLANAR) && isYUV(pix_fmt)); + return ((desc->flags & AV_PIX_FMT_FLAG_PLANAR) && isYUV(pix_fmt)); } static av_always_inline int isRGB(enum AVPixelFormat pix_fmt) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); av_assert0(desc); - return (desc->flags & PIX_FMT_RGB); + return (desc->flags & AV_PIX_FMT_FLAG_RGB); } #if 0 // FIXME #define isGray(x) \ - (!(av_pix_fmt_desc_get(x)->flags & PIX_FMT_PAL) && \ + (!(av_pix_fmt_desc_get(x)->flags & AV_PIX_FMT_FLAG_PAL) && \ av_pix_fmt_desc_get(x)->nb_components <= 2) #else #define isGray(x) \ @@ -716,23 +753,16 @@ static av_always_inline int isRGB(enum AVPixelFormat pix_fmt) ( \ isRGBinInt(x) || \ isBGRinInt(x) || \ - isRGB(x) || \ - (x)==AV_PIX_FMT_GBRP9LE || \ - (x)==AV_PIX_FMT_GBRP9BE || \ - (x)==AV_PIX_FMT_GBRP10LE || \ - (x)==AV_PIX_FMT_GBRP10BE || \ - (x)==AV_PIX_FMT_GBRP12LE || \ - (x)==AV_PIX_FMT_GBRP12BE || \ - (x)==AV_PIX_FMT_GBRP14LE || \ - (x)==AV_PIX_FMT_GBRP14BE || \ - (x)==AV_PIX_FMT_GBR24P \ + isRGB(x) \ ) static av_always_inline int isALPHA(enum AVPixelFormat pix_fmt) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); av_assert0(desc); - return desc->flags & PIX_FMT_ALPHA; + if (pix_fmt == AV_PIX_FMT_PAL8) + return 1; + return desc->flags & AV_PIX_FMT_FLAG_ALPHA; } #if 1 @@ -749,7 +779,7 @@ static av_always_inline int isPacked(enum AVPixelFormat pix_fmt) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); av_assert0(desc); - return ((desc->nb_components >= 2 && !(desc->flags & PIX_FMT_PLANAR)) || + return ((desc->nb_components >= 2 && !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) || pix_fmt == AV_PIX_FMT_PAL8); } @@ -758,46 +788,54 @@ static av_always_inline int isPlanar(enum AVPixelFormat pix_fmt) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); av_assert0(desc); - return (desc->nb_components >= 2 && (desc->flags & PIX_FMT_PLANAR)); + return (desc->nb_components >= 2 && (desc->flags & AV_PIX_FMT_FLAG_PLANAR)); } static av_always_inline int isPackedRGB(enum AVPixelFormat pix_fmt) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); av_assert0(desc); - return ((desc->flags & (PIX_FMT_PLANAR | PIX_FMT_RGB)) == PIX_FMT_RGB); + return ((desc->flags & (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB)) == AV_PIX_FMT_FLAG_RGB); } static av_always_inline int isPlanarRGB(enum AVPixelFormat pix_fmt) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); av_assert0(desc); - return ((desc->flags & (PIX_FMT_PLANAR | PIX_FMT_RGB)) == - (PIX_FMT_PLANAR | PIX_FMT_RGB)); + return ((desc->flags & (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB)) == + (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB)); } static av_always_inline int usePal(enum AVPixelFormat pix_fmt) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); av_assert0(desc); - return (desc->flags & PIX_FMT_PAL) || (desc->flags & PIX_FMT_PSEUDOPAL); + return (desc->flags & AV_PIX_FMT_FLAG_PAL) || (desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL); } extern const uint64_t ff_dither4[2]; extern const uint64_t ff_dither8[2]; -extern const uint8_t dithers[8][8][8]; -extern const uint16_t dither_scale[15][16]; +extern const uint8_t ff_dither_2x2_4[3][8]; +extern const uint8_t ff_dither_2x2_8[3][8]; +extern const uint8_t ff_dither_4x4_16[5][8]; +extern const uint8_t ff_dither_8x8_32[9][8]; +extern const uint8_t ff_dither_8x8_73[9][8]; +extern const uint8_t ff_dither_8x8_128[9][8]; +extern const uint8_t ff_dither_8x8_220[9][8]; + +extern const int32_t ff_yuv2rgb_coeffs[8][4]; extern const AVClass sws_context_class; /** - * Set c->swScale to an unscaled converter if one exists for the specific + * Set c->swscale to an unscaled converter if one exists for the specific * source and destination formats, bit depths, flags, etc. */ void ff_get_unscaled_swscale(SwsContext *c); - -void ff_swscale_get_unscaled_altivec(SwsContext *c); +void ff_get_unscaled_swscale_bfin(SwsContext *c); +void ff_get_unscaled_swscale_ppc(SwsContext *c); +void ff_get_unscaled_swscale_arm(SwsContext *c); /** * Return function pointer to fastest main scaler path function depending @@ -814,8 +852,8 @@ void ff_sws_init_output_funcs(SwsContext *c, yuv2packed2_fn *yuv2packed2, yuv2packedX_fn *yuv2packedX, yuv2anyX_fn *yuv2anyX); -void ff_sws_init_swScale_altivec(SwsContext *c); -void ff_sws_init_swScale_mmx(SwsContext *c); +void ff_sws_init_swscale_ppc(SwsContext *c); +void ff_sws_init_swscale_x86(SwsContext *c); static inline void fillPlane16(uint8_t *plane, int stride, int width, int height, int y, int alpha, int bits, const int big_endian) diff --git a/ffmpeg/libswscale/swscale_unscaled.c b/ffmpeg/libswscale/swscale_unscaled.c index 4a22fca..ccf9980 100644 --- a/ffmpeg/libswscale/swscale_unscaled.c +++ b/ffmpeg/libswscale/swscale_unscaled.c @@ -34,7 +34,7 @@ #include "libavutil/pixdesc.h" #include "libavutil/avassert.h" -DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={ +DECLARE_ALIGNED(8, static const uint8_t, dithers)[8][8][8]={ { { 0, 1, 0, 1, 0, 1, 0, 1,}, { 1, 0, 1, 0, 1, 0, 1, 0,}, @@ -109,7 +109,7 @@ DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={ { 112, 16,104, 8,118, 22,110, 14,}, }}; -const uint16_t dither_scale[15][16]={ +static const uint16_t dither_scale[15][16]={ { 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,}, { 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,}, { 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,}, @@ -168,10 +168,31 @@ static int planarToNv12Wrapper(SwsContext *c, const uint8_t *src[], if (c->dstFormat == AV_PIX_FMT_NV12) interleaveBytes(src[1], src[2], dst, c->srcW / 2, srcSliceH / 2, - srcStride[1], srcStride[2], dstStride[0]); + srcStride[1], srcStride[2], dstStride[1]); else interleaveBytes(src[2], src[1], dst, c->srcW / 2, srcSliceH / 2, - srcStride[2], srcStride[1], dstStride[0]); + srcStride[2], srcStride[1], dstStride[1]); + + return srcSliceH; +} + +static int nv12ToPlanarWrapper(SwsContext *c, const uint8_t *src[], + int srcStride[], int srcSliceY, + int srcSliceH, uint8_t *dstParam[], + int dstStride[]) +{ + uint8_t *dst1 = dstParam[1] + dstStride[1] * srcSliceY / 2; + uint8_t *dst2 = dstParam[2] + dstStride[2] * srcSliceY / 2; + + copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW, + dstParam[0], dstStride[0]); + + if (c->srcFormat == AV_PIX_FMT_NV12) + deinterleaveBytes(src[1], dst1, dst2,c->srcW / 2, srcSliceH / 2, + srcStride[1], dstStride[1], dstStride[2]); + else + deinterleaveBytes(src[1], dst2, dst1, c->srcW / 2, srcSliceH / 2, + srcStride[1], dstStride[2], dstStride[1]); return srcSliceH; } @@ -321,19 +342,23 @@ static int packed_16bpc_bswap(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int i, j; - int srcstr = srcStride[0] >> 1; - int dststr = dstStride[0] >> 1; - uint16_t *dstPtr = (uint16_t *) dst[0]; - const uint16_t *srcPtr = (const uint16_t *) src[0]; - int min_stride = FFMIN(srcstr, dststr); - - for (i = 0; i < srcSliceH; i++) { - for (j = 0; j < min_stride; j++) { - dstPtr[j] = av_bswap16(srcPtr[j]); + int i, j, p; + + for (p = 0; p < 4; p++) { + int srcstr = srcStride[p] / 2; + int dststr = dstStride[p] / 2; + uint16_t *dstPtr = (uint16_t *) dst[p]; + const uint16_t *srcPtr = (const uint16_t *) src[p]; + int min_stride = FFMIN(FFABS(srcstr), FFABS(dststr)); + if(!dstPtr || !srcPtr) + continue; + for (i = 0; i < (srcSliceH >> c->chrDstVSubSample); i++) { + for (j = 0; j < min_stride; j++) { + dstPtr[j] = av_bswap16(srcPtr[j]); + } + srcPtr += srcstr; + dstPtr += dststr; } - srcPtr += srcstr; - dstPtr += dststr; } return srcSliceH; @@ -385,6 +410,185 @@ static int palToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[], return srcSliceH; } +static void gbr16ptopacked16(const uint16_t *src[], int srcStride[], + uint8_t *dst, int dstStride, int srcSliceH, + int alpha, int swap, int bpp, int width) +{ + int x, h, i; + int src_alpha = src[3] != NULL; + int scale_high = 16 - bpp, scale_low = (bpp - 8) * 2; + for (h = 0; h < srcSliceH; h++) { + uint16_t *dest = (uint16_t *)(dst + dstStride * h); + uint16_t component; + + switch(swap) { + case 3: + if (alpha && !src_alpha) { + for (x = 0; x < width; x++) { + component = av_bswap16(src[0][x]); + *dest++ = av_bswap16(component << scale_high | component >> scale_low); + component = av_bswap16(src[1][x]); + *dest++ = av_bswap16(component << scale_high | component >> scale_low); + component = av_bswap16(src[2][x]); + *dest++ = av_bswap16(component << scale_high | component >> scale_low); + *dest++ = 0xffff; + } + } else if (alpha && src_alpha) { + for (x = 0; x < width; x++) { + component = av_bswap16(src[0][x]); + *dest++ = av_bswap16(component << scale_high | component >> scale_low); + component = av_bswap16(src[1][x]); + *dest++ = av_bswap16(component << scale_high | component >> scale_low); + component = av_bswap16(src[2][x]); + *dest++ = av_bswap16(component << scale_high | component >> scale_low); + component = av_bswap16(src[3][x]); + *dest++ = av_bswap16(component << scale_high | component >> scale_low); + } + } else { + for (x = 0; x < width; x++) { + component = av_bswap16(src[0][x]); + *dest++ = av_bswap16(component << scale_high | component >> scale_low); + component = av_bswap16(src[1][x]); + *dest++ = av_bswap16(component << scale_high | component >> scale_low); + component = av_bswap16(src[2][x]); + *dest++ = av_bswap16(component << scale_high | component >> scale_low); + } + } + break; + case 2: + if (alpha && !src_alpha) { + for (x = 0; x < width; x++) { + *dest++ = av_bswap16(src[0][x] << scale_high | src[0][x] >> scale_low); + *dest++ = av_bswap16(src[1][x] << scale_high | src[1][x] >> scale_low); + *dest++ = av_bswap16(src[2][x] << scale_high | src[2][x] >> scale_low); + *dest++ = 0xffff; + } + } else if (alpha && src_alpha) { + for (x = 0; x < width; x++) { + *dest++ = av_bswap16(src[0][x] << scale_high | src[0][x] >> scale_low); + *dest++ = av_bswap16(src[1][x] << scale_high | src[1][x] >> scale_low); + *dest++ = av_bswap16(src[2][x] << scale_high | src[2][x] >> scale_low); + *dest++ = av_bswap16(src[3][x] << scale_high | src[3][x] >> scale_low); + } + } else { + for (x = 0; x < width; x++) { + *dest++ = av_bswap16(src[0][x] << scale_high | src[0][x] >> scale_low); + *dest++ = av_bswap16(src[1][x] << scale_high | src[1][x] >> scale_low); + *dest++ = av_bswap16(src[2][x] << scale_high | src[2][x] >> scale_low); + } + } + break; + case 1: + if (alpha && !src_alpha) { + for (x = 0; x < width; x++) { + *dest++ = av_bswap16(src[0][x]) << scale_high | av_bswap16(src[0][x]) >> scale_low; + *dest++ = av_bswap16(src[1][x]) << scale_high | av_bswap16(src[1][x]) >> scale_low; + *dest++ = av_bswap16(src[2][x]) << scale_high | av_bswap16(src[2][x]) >> scale_low; + *dest++ = 0xffff; + } + } else if (alpha && src_alpha) { + for (x = 0; x < width; x++) { + *dest++ = av_bswap16(src[0][x]) << scale_high | av_bswap16(src[0][x]) >> scale_low; + *dest++ = av_bswap16(src[1][x]) << scale_high | av_bswap16(src[1][x]) >> scale_low; + *dest++ = av_bswap16(src[2][x]) << scale_high | av_bswap16(src[2][x]) >> scale_low; + *dest++ = av_bswap16(src[3][x]) << scale_high | av_bswap16(src[3][x]) >> scale_low; + } + } else { + for (x = 0; x < width; x++) { + *dest++ = av_bswap16(src[0][x]) << scale_high | av_bswap16(src[0][x]) >> scale_low; + *dest++ = av_bswap16(src[1][x]) << scale_high | av_bswap16(src[1][x]) >> scale_low; + *dest++ = av_bswap16(src[2][x]) << scale_high | av_bswap16(src[2][x]) >> scale_low; + } + } + break; + default: + if (alpha && !src_alpha) { + for (x = 0; x < width; x++) { + *dest++ = src[0][x] << scale_high | src[0][x] >> scale_low; + *dest++ = src[1][x] << scale_high | src[1][x] >> scale_low; + *dest++ = src[2][x] << scale_high | src[2][x] >> scale_low; + *dest++ = 0xffff; + } + } else if (alpha && src_alpha) { + for (x = 0; x < width; x++) { + *dest++ = src[0][x] << scale_high | src[0][x] >> scale_low; + *dest++ = src[1][x] << scale_high | src[1][x] >> scale_low; + *dest++ = src[2][x] << scale_high | src[2][x] >> scale_low; + *dest++ = src[3][x] << scale_high | src[3][x] >> scale_low; + } + } else { + for (x = 0; x < width; x++) { + *dest++ = src[0][x] << scale_high | src[0][x] >> scale_low; + *dest++ = src[1][x] << scale_high | src[1][x] >> scale_low; + *dest++ = src[2][x] << scale_high | src[2][x] >> scale_low; + } + } + } + for (i = 0; i < 3 + src_alpha; i++) + src[i] += srcStride[i] >> 1; + } +} + +static int planarRgb16ToRgb16Wrapper(SwsContext *c, const uint8_t *src[], + int srcStride[], int srcSliceY, int srcSliceH, + uint8_t *dst[], int dstStride[]) +{ + const uint16_t *src102[] = { (uint16_t *)src[1], (uint16_t *)src[0], (uint16_t *)src[2], (uint16_t *)src[3] }; + const uint16_t *src201[] = { (uint16_t *)src[2], (uint16_t *)src[0], (uint16_t *)src[1], (uint16_t *)src[3] }; + int stride102[] = { srcStride[1], srcStride[0], srcStride[2], srcStride[3] }; + int stride201[] = { srcStride[2], srcStride[0], srcStride[1], srcStride[3] }; + const AVPixFmtDescriptor *src_format = av_pix_fmt_desc_get(c->srcFormat); + const AVPixFmtDescriptor *dst_format = av_pix_fmt_desc_get(c->dstFormat); + int bits_per_sample = src_format->comp[0].depth_minus1 + 1; + int swap = 0; + if ( HAVE_BIGENDIAN && !(src_format->flags & AV_PIX_FMT_FLAG_BE) || + !HAVE_BIGENDIAN && src_format->flags & AV_PIX_FMT_FLAG_BE) + swap++; + if ( HAVE_BIGENDIAN && !(dst_format->flags & AV_PIX_FMT_FLAG_BE) || + !HAVE_BIGENDIAN && dst_format->flags & AV_PIX_FMT_FLAG_BE) + swap += 2; + + if ((src_format->flags & (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB)) != + (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB) || + bits_per_sample <= 8) { + av_log(c, AV_LOG_ERROR, "unsupported planar RGB conversion %s -> %s\n", + src_format->name, dst_format->name); + return srcSliceH; + } + switch (c->dstFormat) { + case AV_PIX_FMT_BGR48LE: + case AV_PIX_FMT_BGR48BE: + gbr16ptopacked16(src102, stride102, + dst[0] + srcSliceY * dstStride[0], dstStride[0], + srcSliceH, 0, swap, bits_per_sample, c->srcW); + break; + case AV_PIX_FMT_RGB48LE: + case AV_PIX_FMT_RGB48BE: + gbr16ptopacked16(src201, stride201, + dst[0] + srcSliceY * dstStride[0], dstStride[0], + srcSliceH, 0, swap, bits_per_sample, c->srcW); + break; + case AV_PIX_FMT_RGBA64LE: + case AV_PIX_FMT_RGBA64BE: + gbr16ptopacked16(src201, stride201, + dst[0] + srcSliceY * dstStride[0], dstStride[0], + srcSliceH, 1, swap, bits_per_sample, c->srcW); + break; + case AV_PIX_FMT_BGRA64LE: + case AV_PIX_FMT_BGRA64BE: + gbr16ptopacked16(src102, stride102, + dst[0] + srcSliceY * dstStride[0], dstStride[0], + srcSliceH, 1, swap, bits_per_sample, c->srcW); + break; + default: + av_log(c, AV_LOG_ERROR, + "unsupported planar RGB conversion %s -> %s\n", + src_format->name, dst_format->name); + } + + return srcSliceH; +} + static void gbr24ptopacked24(const uint8_t *src[], int srcStride[], uint8_t *dst, int dstStride, int srcSliceH, int width) @@ -488,6 +692,22 @@ static int planarRgbToRgbWrapper(SwsContext *c, const uint8_t *src[], return srcSliceH; } +static int planarRgbToplanarRgbWrapper(SwsContext *c, const uint8_t *src[], + int srcStride[], int srcSliceY, int srcSliceH, + uint8_t *dst[], int dstStride[]) +{ + copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW, + dst[0], dstStride[0]); + copyPlane(src[1], srcStride[1], srcSliceY, srcSliceH, c->srcW, + dst[1], dstStride[1]); + copyPlane(src[2], srcStride[2], srcSliceY, srcSliceH, c->srcW, + dst[2], dstStride[2]); + if (dst[3]) + fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); + + return srcSliceH; +} + static void packedtogbr24p(const uint8_t *src, int srcStride, uint8_t *dst[], int dstStride[], int srcSliceH, int alpha_first, int inc_size, int width) @@ -595,7 +815,7 @@ static rgbConvFn findRgbConvFn(SwsContext *c) #define IS_NOT_NE(bpp, desc) \ (((bpp + 7) >> 3) == 2 && \ - (!(desc->flags & PIX_FMT_BE) != !HAVE_BIGENDIAN)) + (!(desc->flags & AV_PIX_FMT_FLAG_BE) != !HAVE_BIGENDIAN)) #define CONV_IS(src, dst) (srcFormat == AV_PIX_FMT_##src && dstFormat == AV_PIX_FMT_##dst) @@ -713,8 +933,13 @@ static int rgbToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[], srcPtr += ALT32_CORR; if ((dstFormat == AV_PIX_FMT_RGB32_1 || dstFormat == AV_PIX_FMT_BGR32_1) && - !isRGBA32(srcFormat)) + !isRGBA32(srcFormat)) { + int i; + av_assert0(ALT32_CORR == 1); + for (i = 0; i < srcSliceH; i++) + dstPtr[dstStride[0] * (srcSliceY + i)] = 255; dstPtr += ALT32_CORR; + } if (dstStride[0] * srcBpp == srcStride[0] * dstBpp && srcStride[0] > 0 && !(srcStride[0] % srcBpp) && !dst_bswap && !src_bswap) @@ -746,13 +971,14 @@ static int bgr24ToYv12Wrapper(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - rgb24toyv12( + ff_rgb24toyv12( src[0], dst[0] + srcSliceY * dstStride[0], dst[1] + (srcSliceY >> 1) * dstStride[1], dst[2] + (srcSliceY >> 1) * dstStride[2], c->srcW, srcSliceH, - dstStride[0], dstStride[1], srcStride[0]); + dstStride[0], dstStride[1], srcStride[0], + c->input_rgb2yuv_table); if (dst[3]) fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); return srcSliceH; @@ -831,9 +1057,9 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t *src[], const AVPixFmtDescriptor *desc_dst = av_pix_fmt_desc_get(c->dstFormat); int plane, i, j; for (plane = 0; plane < 4; plane++) { - int length = (plane == 0 || plane == 3) ? c->srcW : -((-c->srcW ) >> c->chrDstHSubSample); - int y = (plane == 0 || plane == 3) ? srcSliceY: -((-srcSliceY) >> c->chrDstVSubSample); - int height = (plane == 0 || plane == 3) ? srcSliceH: -((-srcSliceH) >> c->chrDstVSubSample); + int length = (plane == 0 || plane == 3) ? c->srcW : FF_CEIL_RSHIFT(c->srcW, c->chrDstHSubSample); + int y = (plane == 0 || plane == 3) ? srcSliceY: FF_CEIL_RSHIFT(srcSliceY, c->chrDstVSubSample); + int height = (plane == 0 || plane == 3) ? srcSliceH: FF_CEIL_RSHIFT(srcSliceH, c->chrDstVSubSample); const uint8_t *srcPtr = src[plane]; uint8_t *dstPtr = dst[plane] + dstStride[plane] * y; int shiftonly= plane==1 || plane==2 || (!c->srcRange && plane==0); @@ -886,27 +1112,24 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t *src[], srcPtr += srcStride[plane]; } } else if (src_depth <= dst_depth) { - int orig_length = length; for (i = 0; i < height; i++) { + j = 0; if(isBE(c->srcFormat) == HAVE_BIGENDIAN && isBE(c->dstFormat) == HAVE_BIGENDIAN && shiftonly) { unsigned shift = dst_depth - src_depth; - length = orig_length; #if HAVE_FAST_64BIT #define FAST_COPY_UP(shift) \ - for (j = 0; j < length - 3; j += 4) { \ + for (; j < length - 3; j += 4) { \ uint64_t v = AV_RN64A(srcPtr2 + j); \ AV_WN64A(dstPtr2 + j, v << shift); \ - } \ - length &= 3; + } #else #define FAST_COPY_UP(shift) \ - for (j = 0; j < length - 1; j += 2) { \ + for (; j < length - 1; j += 2) { \ uint32_t v = AV_RN32A(srcPtr2 + j); \ AV_WN32A(dstPtr2 + j, v << shift); \ - } \ - length &= 1; + } #endif switch (shift) { @@ -916,12 +1139,12 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t *src[], } #define COPY_UP(r,w) \ if(shiftonly){\ - for (j = 0; j < length; j++){ \ + for (; j < length; j++){ \ unsigned int v= r(&srcPtr2[j]);\ w(&dstPtr2[j], v<<(dst_depth-src_depth));\ }\ }else{\ - for (j = 0; j < length; j++){ \ + for (; j < length; j++){ \ unsigned int v= r(&srcPtr2[j]);\ w(&dstPtr2[j], (v<<(dst_depth-src_depth)) | \ (v>>(2*src_depth-dst_depth)));\ @@ -1008,31 +1231,40 @@ void ff_get_unscaled_swscale(SwsContext *c) /* yv12_to_nv12 */ if ((srcFormat == AV_PIX_FMT_YUV420P || srcFormat == AV_PIX_FMT_YUVA420P) && (dstFormat == AV_PIX_FMT_NV12 || dstFormat == AV_PIX_FMT_NV21)) { - c->swScale = planarToNv12Wrapper; + c->swscale = planarToNv12Wrapper; + } + /* nv12_to_yv12 */ + if (dstFormat == AV_PIX_FMT_YUV420P && + (srcFormat == AV_PIX_FMT_NV12 || srcFormat == AV_PIX_FMT_NV21)) { + c->swscale = nv12ToPlanarWrapper; } /* yuv2bgr */ if ((srcFormat == AV_PIX_FMT_YUV420P || srcFormat == AV_PIX_FMT_YUV422P || srcFormat == AV_PIX_FMT_YUVA420P) && isAnyRGB(dstFormat) && - !(flags & (SWS_ACCURATE_RND|SWS_ERROR_DIFFUSION)) && !(dstH & 1)) { - c->swScale = ff_yuv2rgb_get_func_ptr(c); + !(flags & SWS_ACCURATE_RND) && (c->dither == SWS_DITHER_BAYER || c->dither == SWS_DITHER_AUTO) && !(dstH & 1)) { + c->swscale = ff_yuv2rgb_get_func_ptr(c); } if (srcFormat == AV_PIX_FMT_YUV410P && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) && !(flags & SWS_BITEXACT)) { - c->swScale = yvu9ToYv12Wrapper; + c->swscale = yvu9ToYv12Wrapper; } /* bgr24toYV12 */ if (srcFormat == AV_PIX_FMT_BGR24 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) && !(flags & SWS_ACCURATE_RND)) - c->swScale = bgr24ToYv12Wrapper; + c->swscale = bgr24ToYv12Wrapper; /* RGB/BGR -> RGB/BGR (no dither needed forms) */ if (isAnyRGB(srcFormat) && isAnyRGB(dstFormat) && findRgbConvFn(c) && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)))) - c->swScale= rgbToRgbWrapper; + c->swscale = rgbToRgbWrapper; + + if ((srcFormat == AV_PIX_FMT_GBRP && dstFormat == AV_PIX_FMT_GBRAP) || + (srcFormat == AV_PIX_FMT_GBRAP && dstFormat == AV_PIX_FMT_GBRP)) + c->swscale = planarRgbToplanarRgbWrapper; #define isByteRGB(f) ( \ f == AV_PIX_FMT_RGB32 || \ @@ -1043,11 +1275,23 @@ void ff_get_unscaled_swscale(SwsContext *c) f == AV_PIX_FMT_BGR24) if (srcFormat == AV_PIX_FMT_GBRP && isPlanar(srcFormat) && isByteRGB(dstFormat)) - c->swScale = planarRgbToRgbWrapper; + c->swscale = planarRgbToRgbWrapper; + + if ((srcFormat == AV_PIX_FMT_GBRP9LE || srcFormat == AV_PIX_FMT_GBRP9BE || + srcFormat == AV_PIX_FMT_GBRP16LE || srcFormat == AV_PIX_FMT_GBRP16BE || + srcFormat == AV_PIX_FMT_GBRP10LE || srcFormat == AV_PIX_FMT_GBRP10BE || + srcFormat == AV_PIX_FMT_GBRP12LE || srcFormat == AV_PIX_FMT_GBRP12BE || + srcFormat == AV_PIX_FMT_GBRP14LE || srcFormat == AV_PIX_FMT_GBRP14BE || + srcFormat == AV_PIX_FMT_GBRAP16LE || srcFormat == AV_PIX_FMT_GBRAP16BE) && + (dstFormat == AV_PIX_FMT_RGB48LE || dstFormat == AV_PIX_FMT_RGB48BE || + dstFormat == AV_PIX_FMT_BGR48LE || dstFormat == AV_PIX_FMT_BGR48BE || + dstFormat == AV_PIX_FMT_RGBA64LE || dstFormat == AV_PIX_FMT_RGBA64BE || + dstFormat == AV_PIX_FMT_BGRA64LE || dstFormat == AV_PIX_FMT_BGRA64BE)) + c->swscale = planarRgb16ToRgb16Wrapper; if (av_pix_fmt_desc_get(srcFormat)->comp[0].depth_minus1 == 7 && isPackedRGB(srcFormat) && dstFormat == AV_PIX_FMT_GBRP) - c->swScale = rgbToPlanarRgbWrapper; + c->swscale = rgbToPlanarRgbWrapper; /* bswap 16 bits per pixel/component packed formats */ if (IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR444) || @@ -1056,21 +1300,43 @@ void ff_get_unscaled_swscale(SwsContext *c) IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR555) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR565) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GRAY16) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP9) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP10) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP12) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP14) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP16) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRAP16) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB444) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB48) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGBA64) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB555) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB565)) - c->swScale = packed_16bpc_bswap; + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB565) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_XYZ12) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P9) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P10) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P12) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P14) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P16) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P9) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P10) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P12) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P14) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P16) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P9) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P10) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P12) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P14) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P16)) + c->swscale = packed_16bpc_bswap; if (usePal(srcFormat) && isByteRGB(dstFormat)) - c->swScale = palToRgbWrapper; + c->swscale = palToRgbWrapper; if (srcFormat == AV_PIX_FMT_YUV422P) { if (dstFormat == AV_PIX_FMT_YUYV422) - c->swScale = yuv422pToYuy2Wrapper; + c->swscale = yuv422pToYuy2Wrapper; else if (dstFormat == AV_PIX_FMT_UYVY422) - c->swScale = yuv422pToUyvyWrapper; + c->swscale = yuv422pToUyvyWrapper; } /* LQ converters if -sws 0 or -sws 4*/ @@ -1078,21 +1344,21 @@ void ff_get_unscaled_swscale(SwsContext *c) /* yv12_to_yuy2 */ if (srcFormat == AV_PIX_FMT_YUV420P || srcFormat == AV_PIX_FMT_YUVA420P) { if (dstFormat == AV_PIX_FMT_YUYV422) - c->swScale = planarToYuy2Wrapper; + c->swscale = planarToYuy2Wrapper; else if (dstFormat == AV_PIX_FMT_UYVY422) - c->swScale = planarToUyvyWrapper; + c->swscale = planarToUyvyWrapper; } } if (srcFormat == AV_PIX_FMT_YUYV422 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P)) - c->swScale = yuyvToYuv420Wrapper; + c->swscale = yuyvToYuv420Wrapper; if (srcFormat == AV_PIX_FMT_UYVY422 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P)) - c->swScale = uyvyToYuv420Wrapper; + c->swscale = uyvyToYuv420Wrapper; if (srcFormat == AV_PIX_FMT_YUYV422 && dstFormat == AV_PIX_FMT_YUV422P) - c->swScale = yuyvToYuv422Wrapper; + c->swscale = yuyvToYuv422Wrapper; if (srcFormat == AV_PIX_FMT_UYVY422 && dstFormat == AV_PIX_FMT_YUV422P) - c->swScale = uyvyToYuv422Wrapper; + c->swscale = uyvyToYuv422Wrapper; #define isPlanarGray(x) (isGray(x) && (x) != AV_PIX_FMT_GRAY8A) /* simple copy */ @@ -1109,15 +1375,18 @@ void ff_get_unscaled_swscale(SwsContext *c) srcFormat != AV_PIX_FMT_NV12 && srcFormat != AV_PIX_FMT_NV21)) { if (isPacked(c->srcFormat)) - c->swScale = packedCopyWrapper; + c->swscale = packedCopyWrapper; else /* Planar YUV or gray */ - c->swScale = planarCopyWrapper; + c->swscale = planarCopyWrapper; } if (ARCH_BFIN) - ff_bfin_get_unscaled_swscale(c); - if (HAVE_ALTIVEC) - ff_swscale_get_unscaled_altivec(c); + ff_get_unscaled_swscale_bfin(c); + if (ARCH_PPC) + ff_get_unscaled_swscale_ppc(c); + if (ARCH_ARM) + ff_get_unscaled_swscale_arm(c); + } /* Convert the palette to the same packed 32-bit format as the palette */ diff --git a/ffmpeg/libswscale/utils.c b/ffmpeg/libswscale/utils.c index 932cf94..b8b95a9 100644 --- a/ffmpeg/libswscale/utils.c +++ b/ffmpeg/libswscale/utils.c @@ -46,12 +46,15 @@ #include "libavutil/mathematics.h" #include "libavutil/opt.h" #include "libavutil/pixdesc.h" +#include "libavutil/ppc/cpu.h" #include "libavutil/x86/asm.h" #include "libavutil/x86/cpu.h" #include "rgb2rgb.h" #include "swscale.h" #include "swscale_internal.h" +static void handle_formats(SwsContext *c); + unsigned swscale_version(void) { av_assert0(LIBSWSCALE_VERSION_MICRO >= 100); @@ -72,7 +75,9 @@ const char *swscale_license(void) #define RET 0xC3 // near return opcode for x86 typedef struct FormatEntry { - int is_supported_in, is_supported_out; + uint8_t is_supported_in :1; + uint8_t is_supported_out :1; + uint8_t is_supported_endianness :1; } FormatEntry; static const FormatEntry format_entries[AV_PIX_FMT_NB] = { @@ -89,6 +94,7 @@ static const FormatEntry format_entries[AV_PIX_FMT_NB] = { [AV_PIX_FMT_MONOBLACK] = { 1, 1 }, [AV_PIX_FMT_PAL8] = { 1, 0 }, [AV_PIX_FMT_YUVJ420P] = { 1, 1 }, + [AV_PIX_FMT_YUVJ411P] = { 1, 1 }, [AV_PIX_FMT_YUVJ422P] = { 1, 1 }, [AV_PIX_FMT_YUVJ444P] = { 1, 1 }, [AV_PIX_FMT_UYVY422] = { 1, 1 }, @@ -136,8 +142,8 @@ static const FormatEntry format_entries[AV_PIX_FMT_NB] = { [AV_PIX_FMT_YUVA444P16LE]= { 1, 1 }, [AV_PIX_FMT_RGB48BE] = { 1, 1 }, [AV_PIX_FMT_RGB48LE] = { 1, 1 }, - [AV_PIX_FMT_RGBA64BE] = { 1, 0 }, - [AV_PIX_FMT_RGBA64LE] = { 1, 0 }, + [AV_PIX_FMT_RGBA64BE] = { 1, 1 }, + [AV_PIX_FMT_RGBA64LE] = { 1, 1 }, [AV_PIX_FMT_RGB565BE] = { 1, 1 }, [AV_PIX_FMT_RGB565LE] = { 1, 1 }, [AV_PIX_FMT_RGB555BE] = { 1, 1 }, @@ -196,6 +202,11 @@ static const FormatEntry format_entries[AV_PIX_FMT_NB] = { [AV_PIX_FMT_GBRP14BE] = { 1, 1 }, [AV_PIX_FMT_GBRP16LE] = { 1, 0 }, [AV_PIX_FMT_GBRP16BE] = { 1, 0 }, + [AV_PIX_FMT_XYZ12BE] = { 1, 1, 1 }, + [AV_PIX_FMT_XYZ12LE] = { 1, 1, 1 }, + [AV_PIX_FMT_GBRAP] = { 1, 1 }, + [AV_PIX_FMT_GBRAP16LE] = { 1, 0 }, + [AV_PIX_FMT_GBRAP16BE] = { 1, 0 }, }; int sws_isSupportedInput(enum AVPixelFormat pix_fmt) @@ -210,7 +221,11 @@ int sws_isSupportedOutput(enum AVPixelFormat pix_fmt) format_entries[pix_fmt].is_supported_out : 0; } -extern const int32_t ff_yuv2rgb_coeffs[8][4]; +int sws_isSupportedEndiannessConversion(enum AVPixelFormat pix_fmt) +{ + return (unsigned)pix_fmt < AV_PIX_FMT_NB ? + format_entries[pix_fmt].is_supported_endianness : 0; +} #if FF_API_SWS_FORMAT_NAME const char *sws_format_name(enum AVPixelFormat format) @@ -236,11 +251,41 @@ static double getSplineCoeff(double a, double b, double c, double d, dist - 1.0); } -static int initFilter(int16_t **outFilter, int32_t **filterPos, - int *outFilterSize, int xInc, int srcW, int dstW, - int filterAlign, int one, int flags, int cpu_flags, - SwsVector *srcFilter, SwsVector *dstFilter, - double param[2]) +static av_cold int get_local_pos(SwsContext *s, int chr_subsample, int pos, int dir) +{ + if (pos < 0) { + pos = (128 << chr_subsample) - 128; + } + pos += 128; // relative to ideal left edge + return pos >> chr_subsample; +} + +typedef struct { + int flag; ///< flag associated to the algorithm + const char *description; ///< human-readable description + int size_factor; ///< size factor used when initing the filters +} ScaleAlgorithm; + +static const ScaleAlgorithm scale_algorithms[] = { + { SWS_AREA, "area averaging", 1 /* downscale only, for upscale it is bilinear */ }, + { SWS_BICUBIC, "bicubic", 4 }, + { SWS_BICUBLIN, "luma bicubic / chroma bilinear", -1 }, + { SWS_BILINEAR, "bilinear", 2 }, + { SWS_FAST_BILINEAR, "fast bilinear", -1 }, + { SWS_GAUSS, "Gaussian", 8 /* infinite ;) */ }, + { SWS_LANCZOS, "Lanczos", -1 /* custom */ }, + { SWS_POINT, "nearest neighbor / point", -1 }, + { SWS_SINC, "sinc", 20 /* infinite ;) */ }, + { SWS_SPLINE, "bicubic spline", 20 /* infinite :)*/ }, + { SWS_X, "experimental", 8 }, +}; + +static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos, + int *outFilterSize, int xInc, int srcW, + int dstW, int filterAlign, int one, + int flags, int cpu_flags, + SwsVector *srcFilter, SwsVector *dstFilter, + double param[2], int srcPos, int dstPos) { int i; int filterSize; @@ -256,7 +301,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, // NOTE: the +3 is for the MMX(+1) / SSE(+3) scaler which reads over the end FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW + 3) * sizeof(**filterPos), fail); - if (FFABS(xInc - 0x10000) < 10) { // unscaled + if (FFABS(xInc - 0x10000) < 10 && srcPos == dstPos) { // unscaled int i; filterSize = 1; FF_ALLOCZ_OR_GOTO(NULL, filter, @@ -273,7 +318,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, FF_ALLOC_OR_GOTO(NULL, filter, dstW * sizeof(*filter) * filterSize, fail); - xDstInSrc = xInc / 2 - 0x8000; + xDstInSrc = ((dstPos*(int64_t)xInc)>>8) - ((srcPos*0x8000LL)>>7); for (i = 0; i < dstW; i++) { int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16; @@ -289,7 +334,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, FF_ALLOC_OR_GOTO(NULL, filter, dstW * sizeof(*filter) * filterSize, fail); - xDstInSrc = xInc / 2 - 0x8000; + xDstInSrc = ((dstPos*(int64_t)xInc)>>8) - ((srcPos*0x8000LL)>>7); for (i = 0; i < dstW; i++) { int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16; int j; @@ -307,27 +352,17 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, } } else { int64_t xDstInSrc; - int sizeFactor; - - if (flags & SWS_BICUBIC) - sizeFactor = 4; - else if (flags & SWS_X) - sizeFactor = 8; - else if (flags & SWS_AREA) - sizeFactor = 1; // downscale only, for upscale it is bilinear - else if (flags & SWS_GAUSS) - sizeFactor = 8; // infinite ;) - else if (flags & SWS_LANCZOS) - sizeFactor = param[0] != SWS_PARAM_DEFAULT ? ceil(2 * param[0]) : 6; - else if (flags & SWS_SINC) - sizeFactor = 20; // infinite ;) - else if (flags & SWS_SPLINE) - sizeFactor = 20; // infinite ;) - else if (flags & SWS_BILINEAR) - sizeFactor = 2; - else { - av_assert0(0); + int sizeFactor = -1; + + for (i = 0; i < FF_ARRAY_ELEMS(scale_algorithms); i++) { + if (flags & scale_algorithms[i].flag) { + sizeFactor = scale_algorithms[i].size_factor; + break; + } } + if (flags & SWS_LANCZOS) + sizeFactor = param[0] != SWS_PARAM_DEFAULT ? ceil(2 * param[0]) : 6; + av_assert0(sizeFactor > 0); if (xInc <= 1 << 16) filterSize = 1 + sizeFactor; // upscale @@ -340,7 +375,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, FF_ALLOC_OR_GOTO(NULL, filter, dstW * sizeof(*filter) * filterSize, fail); - xDstInSrc = xInc - 0x10000; + xDstInSrc = ((dstPos*(int64_t)xInc)>>7) - ((srcPos*0x10000LL)>>7); for (i = 0; i < dstW; i++) { int xx = (xDstInSrc - ((filterSize - 2) << 16)) / (1 << 17); int j; @@ -508,7 +543,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, minFilterSize = min; } - if (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) { + if (PPC_ALTIVEC(cpu_flags)) { // we can handle the special case 4, so we don't want to go the full 8 if (minFilterSize < 5) filterAlign = 4; @@ -520,7 +555,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, filterAlign = 1; } - if (INLINE_MMX(cpu_flags)) { + if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) { // special case for unscaled vertical filtering if (minFilterSize == 1 && filterAlign == 2) filterAlign = 1; @@ -532,7 +567,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, filter = av_malloc(filterSize * dstW * sizeof(*filter)); if (filterSize >= MAX_FILTER_SIZE * 16 / ((flags & SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter) { - av_log(NULL, AV_LOG_ERROR, "sws: filterSize %d is too large, try less extreem scaling or increase MAX_FILTER_SIZE and recompile\n", filterSize); + av_log(NULL, AV_LOG_ERROR, "sws: filterSize %d is too large, try less extreme scaling or increase MAX_FILTER_SIZE and recompile\n", filterSize); goto fail; } *outFilterSize = filterSize; @@ -597,6 +632,10 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, sum += filter[i * filterSize + j]; } sum = (sum + one / 2) / one; + if (!sum) { + av_log(NULL, AV_LOG_WARNING, "SwScaler: zero vector in scaling\n"); + sum = 1; + } for (j = 0; j < *outFilterSize; j++) { int64_t v = filter[i * filterSize + j] + error; int intV = ROUNDED_DIV(v, sum); @@ -627,9 +666,9 @@ fail: } #if HAVE_MMXEXT_INLINE -static int init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode, - int16_t *filter, int32_t *filterPos, - int numSplits) +static av_cold int init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode, + int16_t *filter, int32_t *filterPos, + int numSplits) { uint8_t *fragmentA; x86_reg imm8OfPShufW1A; @@ -742,10 +781,10 @@ static int init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode, int c = ((xpos + xInc * 2) >> 16) - xx; int d = ((xpos + xInc * 3) >> 16) - xx; int inc = (d + 1 < 4); - uint8_t *fragment = (d + 1 < 4) ? fragmentB : fragmentA; - x86_reg imm8OfPShufW1 = (d + 1 < 4) ? imm8OfPShufW1B : imm8OfPShufW1A; - x86_reg imm8OfPShufW2 = (d + 1 < 4) ? imm8OfPShufW2B : imm8OfPShufW2A; - x86_reg fragmentLength = (d + 1 < 4) ? fragmentLengthB : fragmentLengthA; + uint8_t *fragment = inc ? fragmentB : fragmentA; + x86_reg imm8OfPShufW1 = inc ? imm8OfPShufW1B : imm8OfPShufW1A; + x86_reg imm8OfPShufW2 = inc ? imm8OfPShufW2B : imm8OfPShufW2A; + x86_reg fragmentLength = inc ? fragmentLengthB : fragmentLengthA; int maxShift = 3 - (d + inc); int shift = 0; @@ -792,21 +831,148 @@ static int init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode, } #endif /* HAVE_MMXEXT_INLINE */ -static void getSubSampleFactors(int *h, int *v, enum AVPixelFormat format) +static void fill_rgb2yuv_table(SwsContext *c, const int table[4], int dstRange) { - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format); - *h = desc->log2_chroma_w; - *v = desc->log2_chroma_h; + int64_t W, V, Z, Cy, Cu, Cv; + int64_t vr = table[0]; + int64_t ub = table[1]; + int64_t ug = -table[2]; + int64_t vg = -table[3]; + int64_t ONE = 65536; + int64_t cy = ONE; + uint8_t *p = (uint8_t*)c->input_rgb2yuv_table; + int i; + static const int8_t map[] = { + BY_IDX, GY_IDX, -1 , BY_IDX, BY_IDX, GY_IDX, -1 , BY_IDX, + RY_IDX, -1 , GY_IDX, RY_IDX, RY_IDX, -1 , GY_IDX, RY_IDX, + RY_IDX, GY_IDX, -1 , RY_IDX, RY_IDX, GY_IDX, -1 , RY_IDX, + BY_IDX, -1 , GY_IDX, BY_IDX, BY_IDX, -1 , GY_IDX, BY_IDX, + BU_IDX, GU_IDX, -1 , BU_IDX, BU_IDX, GU_IDX, -1 , BU_IDX, + RU_IDX, -1 , GU_IDX, RU_IDX, RU_IDX, -1 , GU_IDX, RU_IDX, + RU_IDX, GU_IDX, -1 , RU_IDX, RU_IDX, GU_IDX, -1 , RU_IDX, + BU_IDX, -1 , GU_IDX, BU_IDX, BU_IDX, -1 , GU_IDX, BU_IDX, + BV_IDX, GV_IDX, -1 , BV_IDX, BV_IDX, GV_IDX, -1 , BV_IDX, + RV_IDX, -1 , GV_IDX, RV_IDX, RV_IDX, -1 , GV_IDX, RV_IDX, + RV_IDX, GV_IDX, -1 , RV_IDX, RV_IDX, GV_IDX, -1 , RV_IDX, + BV_IDX, -1 , GV_IDX, BV_IDX, BV_IDX, -1 , GV_IDX, BV_IDX, + RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, + BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, + GY_IDX, -1 , GY_IDX, -1 , GY_IDX, -1 , GY_IDX, -1 , + -1 , GY_IDX, -1 , GY_IDX, -1 , GY_IDX, -1 , GY_IDX, + RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, + BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, + GU_IDX, -1 , GU_IDX, -1 , GU_IDX, -1 , GU_IDX, -1 , + -1 , GU_IDX, -1 , GU_IDX, -1 , GU_IDX, -1 , GU_IDX, + RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, + BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, + GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 , + -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, //23 + -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //24 + -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //25 + -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //26 + -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //27 + -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //28 + -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //29 + -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //30 + -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //31 + BY_IDX, GY_IDX, RY_IDX, -1 , -1 , -1 , -1 , -1 , //32 + BU_IDX, GU_IDX, RU_IDX, -1 , -1 , -1 , -1 , -1 , //33 + BV_IDX, GV_IDX, RV_IDX, -1 , -1 , -1 , -1 , -1 , //34 + }; + + dstRange = 0; //FIXME range = 1 is handled elsewhere + + if (!dstRange) { + cy = cy * 255 / 219; + } else { + vr = vr * 224 / 255; + ub = ub * 224 / 255; + ug = ug * 224 / 255; + vg = vg * 224 / 255; + } + W = ROUNDED_DIV(ONE*ONE*ug, ub); + V = ROUNDED_DIV(ONE*ONE*vg, vr); + Z = ONE*ONE-W-V; + + Cy = ROUNDED_DIV(cy*Z, ONE); + Cu = ROUNDED_DIV(ub*Z, ONE); + Cv = ROUNDED_DIV(vr*Z, ONE); + + c->input_rgb2yuv_table[RY_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*V , Cy); + c->input_rgb2yuv_table[GY_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE , Cy); + c->input_rgb2yuv_table[BY_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*W , Cy); + + c->input_rgb2yuv_table[RU_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*V , Cu); + c->input_rgb2yuv_table[GU_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE , Cu); + c->input_rgb2yuv_table[BU_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*(Z+W) , Cu); + + c->input_rgb2yuv_table[RV_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*(V+Z) , Cv); + c->input_rgb2yuv_table[GV_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE , Cv); + c->input_rgb2yuv_table[BV_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*W , Cv); + + if(/*!dstRange && */!memcmp(table, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], sizeof(ff_yuv2rgb_coeffs[SWS_CS_DEFAULT]))) { + c->input_rgb2yuv_table[BY_IDX] = ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); + c->input_rgb2yuv_table[BV_IDX] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); + c->input_rgb2yuv_table[BU_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); + c->input_rgb2yuv_table[GY_IDX] = ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); + c->input_rgb2yuv_table[GV_IDX] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); + c->input_rgb2yuv_table[GU_IDX] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); + c->input_rgb2yuv_table[RY_IDX] = ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); + c->input_rgb2yuv_table[RV_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); + c->input_rgb2yuv_table[RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); + } + for(i=0; i<FF_ARRAY_ELEMS(map); i++) + AV_WL16(p + 16*4 + 2*i, map[i] >= 0 ? c->input_rgb2yuv_table[map[i]] : 0); +} + +static void fill_xyztables(struct SwsContext *c) +{ + int i; + double xyzgamma = XYZ_GAMMA; + double rgbgamma = 1.0 / RGB_GAMMA; + double xyzgammainv = 1.0 / XYZ_GAMMA; + double rgbgammainv = RGB_GAMMA; + static const int16_t xyz2rgb_matrix[3][4] = { + {13270, -6295, -2041}, + {-3969, 7682, 170}, + { 228, -835, 4329} }; + static const int16_t rgb2xyz_matrix[3][4] = { + {1689, 1464, 739}, + { 871, 2929, 296}, + { 79, 488, 3891} }; + static int16_t xyzgamma_tab[4096], rgbgamma_tab[4096], xyzgammainv_tab[4096], rgbgammainv_tab[4096]; + + memcpy(c->xyz2rgb_matrix, xyz2rgb_matrix, sizeof(c->xyz2rgb_matrix)); + memcpy(c->rgb2xyz_matrix, rgb2xyz_matrix, sizeof(c->rgb2xyz_matrix)); + c->xyzgamma = xyzgamma_tab; + c->rgbgamma = rgbgamma_tab; + c->xyzgammainv = xyzgammainv_tab; + c->rgbgammainv = rgbgammainv_tab; + + if (rgbgamma_tab[4095]) + return; + + /* set gamma vectors */ + for (i = 0; i < 4096; i++) { + xyzgamma_tab[i] = lrint(pow(i / 4095.0, xyzgamma) * 4095.0); + rgbgamma_tab[i] = lrint(pow(i / 4095.0, rgbgamma) * 4095.0); + xyzgammainv_tab[i] = lrint(pow(i / 4095.0, xyzgammainv) * 4095.0); + rgbgammainv_tab[i] = lrint(pow(i / 4095.0, rgbgammainv) * 4095.0); + } } int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation) { - const AVPixFmtDescriptor *desc_dst = av_pix_fmt_desc_get(c->dstFormat); - const AVPixFmtDescriptor *desc_src = av_pix_fmt_desc_get(c->srcFormat); - memcpy(c->srcColorspaceTable, inv_table, sizeof(int) * 4); - memcpy(c->dstColorspaceTable, table, sizeof(int) * 4); + const AVPixFmtDescriptor *desc_dst; + const AVPixFmtDescriptor *desc_src; + memmove(c->srcColorspaceTable, inv_table, sizeof(int) * 4); + memmove(c->dstColorspaceTable, table, sizeof(int) * 4); + + handle_formats(c); + desc_dst = av_pix_fmt_desc_get(c->dstFormat); + desc_src = av_pix_fmt_desc_get(c->srcFormat); if(!isYUV(c->dstFormat) && !isGray(c->dstFormat)) dstRange = 0; @@ -819,19 +985,24 @@ int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4], c->srcRange = srcRange; c->dstRange = dstRange; - if (isYUV(c->dstFormat) || isGray(c->dstFormat)) + if ((isYUV(c->dstFormat) || isGray(c->dstFormat)) && (isYUV(c->srcFormat) || isGray(c->srcFormat))) return -1; c->dstFormatBpp = av_get_bits_per_pixel(desc_dst); c->srcFormatBpp = av_get_bits_per_pixel(desc_src); - ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, - contrast, saturation); - // FIXME factorize + if (!isYUV(c->dstFormat) && !isGray(c->dstFormat)) { + ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, + contrast, saturation); + // FIXME factorize - if (HAVE_ALTIVEC && av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) - ff_yuv2rgb_init_tables_altivec(c, inv_table, brightness, + if (ARCH_PPC) + ff_yuv2rgb_init_tables_ppc(c, inv_table, brightness, contrast, saturation); + } + + fill_rgb2yuv_table(c, table, dstRange); + return 0; } @@ -839,7 +1010,7 @@ int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation) { - if (!c || isYUV(c->dstFormat) || isGray(c->dstFormat)) + if (!c ) return -1; *inv_table = c->srcColorspaceTable; @@ -859,6 +1030,9 @@ static int handle_jpeg(enum AVPixelFormat *format) case AV_PIX_FMT_YUVJ420P: *format = AV_PIX_FMT_YUV420P; return 1; + case AV_PIX_FMT_YUVJ411P: + *format = AV_PIX_FMT_YUV411P; + return 1; case AV_PIX_FMT_YUVJ422P: *format = AV_PIX_FMT_YUV422P; return 1; @@ -868,6 +1042,8 @@ static int handle_jpeg(enum AVPixelFormat *format) case AV_PIX_FMT_YUVJ440P: *format = AV_PIX_FMT_YUV440P; return 1; + case AV_PIX_FMT_GRAY8: + return 1; default: return 0; } @@ -884,6 +1060,25 @@ static int handle_0alpha(enum AVPixelFormat *format) } } +static int handle_xyz(enum AVPixelFormat *format) +{ + switch (*format) { + case AV_PIX_FMT_XYZ12BE : *format = AV_PIX_FMT_RGB48BE; return 1; + case AV_PIX_FMT_XYZ12LE : *format = AV_PIX_FMT_RGB48LE; return 1; + default: return 0; + } +} + +static void handle_formats(SwsContext *c) +{ + c->src0Alpha |= handle_0alpha(&c->srcFormat); + c->dst0Alpha |= handle_0alpha(&c->dstFormat); + c->srcXYZ |= handle_xyz(&c->srcFormat); + c->dstXYZ |= handle_xyz(&c->dstFormat); + if (c->srcXYZ || c->dstXYZ) + fill_xyztables(c); +} + SwsContext *sws_alloc_context(void) { SwsContext *c = av_mallocz(sizeof(SwsContext)); @@ -911,8 +1106,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, int flags, cpu_flags; enum AVPixelFormat srcFormat = c->srcFormat; enum AVPixelFormat dstFormat = c->dstFormat; - const AVPixFmtDescriptor *desc_src = av_pix_fmt_desc_get(srcFormat); - const AVPixFmtDescriptor *desc_dst = av_pix_fmt_desc_get(dstFormat); + const AVPixFmtDescriptor *desc_src; + const AVPixFmtDescriptor *desc_dst; cpu_flags = av_get_cpu_flags(); flags = c->flags; @@ -922,17 +1117,25 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, unscaled = (srcW == dstW && srcH == dstH); - handle_jpeg(&srcFormat); - handle_jpeg(&dstFormat); - handle_0alpha(&srcFormat); - handle_0alpha(&dstFormat); + c->srcRange |= handle_jpeg(&c->srcFormat); + c->dstRange |= handle_jpeg(&c->dstFormat); - if(srcFormat!=c->srcFormat || dstFormat!=c->dstFormat){ + if(srcFormat!=c->srcFormat || dstFormat!=c->dstFormat) av_log(c, AV_LOG_WARNING, "deprecated pixel format used, make sure you did set range correctly\n"); - c->srcFormat= srcFormat; - c->dstFormat= dstFormat; - } + if (!c->contrast && !c->saturation && !c->dstFormatBpp) + sws_setColorspaceDetails(c, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], c->srcRange, + ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], + c->dstRange, 0, 1 << 16, 1 << 16); + + handle_formats(c); + srcFormat = c->srcFormat; + dstFormat = c->dstFormat; + desc_src = av_pix_fmt_desc_get(srcFormat); + desc_dst = av_pix_fmt_desc_get(dstFormat); + + if (!(unscaled && sws_isSupportedEndiannessConversion(srcFormat) && + av_pix_fmt_swap_endianness(srcFormat) == dstFormat)) { if (!sws_isSupportedInput(srcFormat)) { av_log(c, AV_LOG_ERROR, "%s is not supported as input pixel format\n", av_get_pix_fmt_name(srcFormat)); @@ -943,6 +1146,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, av_get_pix_fmt_name(dstFormat)); return AVERROR(EINVAL); } + } i = flags & (SWS_POINT | SWS_AREA | @@ -955,8 +1159,19 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SWS_SINC | SWS_SPLINE | SWS_BICUBLIN); - if (!i || (i & (i - 1))) { - av_log(c, AV_LOG_ERROR, "Exactly one scaler algorithm must be chosen, got %X\n", i); + + /* provide a default scaler if not set by caller */ + if (!i) { + if (dstW < srcW && dstH < srcH) + flags |= SWS_BICUBIC; + else if (dstW > srcW && dstH > srcH) + flags |= SWS_BICUBIC; + else + flags |= SWS_BICUBIC; + c->flags = flags; + } else if (i & (i - 1)) { + av_log(c, AV_LOG_ERROR, + "Exactly one scaler algorithm must be chosen, got %X\n", i); return AVERROR(EINVAL); } /* sanity check */ @@ -988,8 +1203,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, (dstFilter->lumH && dstFilter->lumH->length > 1) || (dstFilter->chrH && dstFilter->chrH->length > 1); - getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat); - getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat); + av_pix_fmt_get_chroma_sub_sample(srcFormat, &c->chrSrcHSubSample, &c->chrSrcVSubSample); + av_pix_fmt_get_chroma_sub_sample(dstFormat, &c->chrDstHSubSample, &c->chrDstVSubSample); if (isAnyRGB(dstFormat) && !(flags&SWS_FULL_CHR_H_INT)) { if (dstW&1) { @@ -997,25 +1212,45 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, flags |= SWS_FULL_CHR_H_INT; c->flags = flags; } + + if ( c->chrSrcHSubSample == 0 + && c->chrSrcVSubSample == 0 + && c->dither != SWS_DITHER_BAYER //SWS_FULL_CHR_H_INT is currently not supported with SWS_DITHER_BAYER + && !(c->flags & SWS_FAST_BILINEAR) + ) { + av_log(c, AV_LOG_DEBUG, "Forcing full internal H chroma due to input having non subsampled chroma\n"); + flags |= SWS_FULL_CHR_H_INT; + c->flags = flags; + } + } + + if (c->dither == SWS_DITHER_AUTO) { + if (flags & SWS_ERROR_DIFFUSION) + c->dither = SWS_DITHER_ED; } if(dstFormat == AV_PIX_FMT_BGR4_BYTE || dstFormat == AV_PIX_FMT_RGB4_BYTE || dstFormat == AV_PIX_FMT_BGR8 || dstFormat == AV_PIX_FMT_RGB8) { - if (flags & SWS_ERROR_DIFFUSION && !(flags & SWS_FULL_CHR_H_INT)) { - av_log(c, AV_LOG_DEBUG, - "Error diffusion dither is only supported in full chroma interpolation for destination format '%s'\n", - av_get_pix_fmt_name(dstFormat)); - flags |= SWS_FULL_CHR_H_INT; - c->flags = flags; + if (c->dither == SWS_DITHER_AUTO) + c->dither = (flags & SWS_FULL_CHR_H_INT) ? SWS_DITHER_ED : SWS_DITHER_BAYER; + if (!(flags & SWS_FULL_CHR_H_INT)) { + if (c->dither == SWS_DITHER_ED) { + av_log(c, AV_LOG_DEBUG, + "Desired dithering only supported in full chroma interpolation for destination format '%s'\n", + av_get_pix_fmt_name(dstFormat)); + flags |= SWS_FULL_CHR_H_INT; + c->flags = flags; + } } - if (!(flags & SWS_ERROR_DIFFUSION) && (flags & SWS_FULL_CHR_H_INT)) { - av_log(c, AV_LOG_DEBUG, - "Ordered dither is not supported in full chroma interpolation for destination format '%s'\n", - av_get_pix_fmt_name(dstFormat)); - flags |= SWS_ERROR_DIFFUSION; - c->flags = flags; + if (flags & SWS_FULL_CHR_H_INT) { + if (c->dither == SWS_DITHER_BAYER) { + av_log(c, AV_LOG_DEBUG, + "Ordered dither is not supported in full chroma interpolation for destination format '%s'\n", + av_get_pix_fmt_name(dstFormat)); + c->dither = SWS_DITHER_ED; + } } } if (isPlanarRGB(dstFormat)) { @@ -1073,11 +1308,11 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, (flags & SWS_FAST_BILINEAR))) c->chrSrcHSubSample = 1; - // Note the -((-x)>>y) is so that we always round toward +inf. - c->chrSrcW = -((-srcW) >> c->chrSrcHSubSample); - c->chrSrcH = -((-srcH) >> c->chrSrcVSubSample); - c->chrDstW = -((-dstW) >> c->chrDstHSubSample); - c->chrDstH = -((-dstH) >> c->chrDstVSubSample); + // Note the FF_CEIL_RSHIFT is so that we always round toward +inf. + c->chrSrcW = FF_CEIL_RSHIFT(srcW, c->chrSrcHSubSample); + c->chrSrcH = FF_CEIL_RSHIFT(srcH, c->chrSrcVSubSample); + c->chrDstW = FF_CEIL_RSHIFT(dstW, c->chrDstHSubSample); + c->chrDstH = FF_CEIL_RSHIFT(dstH, c->chrDstVSubSample); FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail); @@ -1086,7 +1321,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, (c->srcRange == c->dstRange || isAnyRGB(dstFormat))) { ff_get_unscaled_swscale(c); - if (c->swScale) { + if (c->swscale) { if (flags & SWS_PRINT_INFO) av_log(c, AV_LOG_INFO, "using unscaled %s -> %s special converter\n", @@ -1107,9 +1342,10 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, dst_stride <<= 1; if (INLINE_MMXEXT(cpu_flags) && c->srcBpc == 8 && c->dstBpc <= 14) { - c->canMMXEXTBeUsed = (dstW >= srcW && (dstW & 31) == 0 && - (srcW & 15) == 0) ? 1 : 0; - if (!c->canMMXEXTBeUsed && dstW >= srcW && (srcW & 15) == 0 + c->canMMXEXTBeUsed = dstW >= srcW && (dstW & 31) == 0 && + c->chrDstW >= c->chrSrcW && + (srcW & 15) == 0; + if (!c->canMMXEXTBeUsed && dstW >= srcW && c->chrDstW >= c->chrSrcW && (srcW & 15) == 0 && (flags & SWS_FAST_BILINEAR)) { if (flags & SWS_PRINT_INFO) @@ -1199,53 +1435,61 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, c->hChrFilter, (uint32_t*)c->hChrFilterPos, 4); #if USE_MMAP - mprotect(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize, PROT_EXEC | PROT_READ); - mprotect(c->chrMmxextFilterCode, c->chrMmxextFilterCodeSize, PROT_EXEC | PROT_READ); + if ( mprotect(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize, PROT_EXEC | PROT_READ) == -1 + || mprotect(c->chrMmxextFilterCode, c->chrMmxextFilterCodeSize, PROT_EXEC | PROT_READ) == -1) { + av_log(c, AV_LOG_ERROR, "mprotect failed, cannot use fast bilinear scaler\n"); + goto fail; + } #endif } else #endif /* HAVE_MMXEXT_INLINE */ { - const int filterAlign = - (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 4 : - (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) ? 8 : - 1; + const int filterAlign = X86_MMX(cpu_flags) ? 4 : + PPC_ALTIVEC(cpu_flags) ? 8 : 1; if (initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc, srcW, dstW, filterAlign, 1 << 14, (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags, cpu_flags, srcFilter->lumH, dstFilter->lumH, - c->param) < 0) + c->param, + get_local_pos(c, 0, 0, 0), + get_local_pos(c, 0, 0, 0)) < 0) goto fail; if (initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc, c->chrSrcW, c->chrDstW, filterAlign, 1 << 14, (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags, cpu_flags, srcFilter->chrH, dstFilter->chrH, - c->param) < 0) + c->param, + get_local_pos(c, c->chrSrcHSubSample, c->src_h_chr_pos, 0), + get_local_pos(c, c->chrDstHSubSample, c->dst_h_chr_pos, 0)) < 0) goto fail; } } // initialize horizontal stuff /* precalculate vertical scaler filter coefficients */ { - const int filterAlign = - (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 2 : - (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) ? 8 : - 1; + const int filterAlign = X86_MMX(cpu_flags) ? 2 : + PPC_ALTIVEC(cpu_flags) ? 8 : 1; if (initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc, srcH, dstH, filterAlign, (1 << 12), (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags, cpu_flags, srcFilter->lumV, dstFilter->lumV, - c->param) < 0) + c->param, + get_local_pos(c, 0, 0, 1), + get_local_pos(c, 0, 0, 1)) < 0) goto fail; if (initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc, c->chrSrcH, c->chrDstH, filterAlign, (1 << 12), (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags, cpu_flags, srcFilter->chrV, dstFilter->chrV, - c->param) < 0) + c->param, + get_local_pos(c, c->chrSrcVSubSample, c->src_v_chr_pos, 1), + get_local_pos(c, c->chrDstVSubSample, c->dst_v_chr_pos, 1)) < 0) + goto fail; #if HAVE_ALTIVEC @@ -1335,32 +1579,18 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, av_assert0(c->chrDstH <= dstH); if (flags & SWS_PRINT_INFO) { - if (flags & SWS_FAST_BILINEAR) - av_log(c, AV_LOG_INFO, "FAST_BILINEAR scaler, "); - else if (flags & SWS_BILINEAR) - av_log(c, AV_LOG_INFO, "BILINEAR scaler, "); - else if (flags & SWS_BICUBIC) - av_log(c, AV_LOG_INFO, "BICUBIC scaler, "); - else if (flags & SWS_X) - av_log(c, AV_LOG_INFO, "Experimental scaler, "); - else if (flags & SWS_POINT) - av_log(c, AV_LOG_INFO, "Nearest Neighbor / POINT scaler, "); - else if (flags & SWS_AREA) - av_log(c, AV_LOG_INFO, "Area Averaging scaler, "); - else if (flags & SWS_BICUBLIN) - av_log(c, AV_LOG_INFO, "luma BICUBIC / chroma BILINEAR scaler, "); - else if (flags & SWS_GAUSS) - av_log(c, AV_LOG_INFO, "Gaussian scaler, "); - else if (flags & SWS_SINC) - av_log(c, AV_LOG_INFO, "Sinc scaler, "); - else if (flags & SWS_LANCZOS) - av_log(c, AV_LOG_INFO, "Lanczos scaler, "); - else if (flags & SWS_SPLINE) - av_log(c, AV_LOG_INFO, "Bicubic spline scaler, "); - else - av_log(c, AV_LOG_INFO, "ehh flags invalid?! "); + const char *scaler = NULL, *cpucaps; - av_log(c, AV_LOG_INFO, "from %s to %s%s ", + for (i = 0; i < FF_ARRAY_ELEMS(scale_algorithms); i++) { + if (flags & scale_algorithms[i].flag) { + scaler = scale_algorithms[i].description; + break; + } + } + if (!scaler) + scaler = "ehh flags invalid?!"; + av_log(c, AV_LOG_INFO, "%s scaler, from %s to %s%s ", + scaler, av_get_pix_fmt_name(srcFormat), #ifdef DITHER1XBPP dstFormat == AV_PIX_FMT_BGR555 || dstFormat == AV_PIX_FMT_BGR565 || @@ -1373,15 +1603,17 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, av_get_pix_fmt_name(dstFormat)); if (INLINE_MMXEXT(cpu_flags)) - av_log(c, AV_LOG_INFO, "using MMXEXT\n"); + cpucaps = "MMXEXT"; else if (INLINE_AMD3DNOW(cpu_flags)) - av_log(c, AV_LOG_INFO, "using 3DNOW\n"); + cpucaps = "3DNOW"; else if (INLINE_MMX(cpu_flags)) - av_log(c, AV_LOG_INFO, "using MMX\n"); - else if (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) - av_log(c, AV_LOG_INFO, "using AltiVec\n"); + cpucaps = "MMX"; + else if (PPC_ALTIVEC(cpu_flags)) + cpucaps = "AltiVec"; else - av_log(c, AV_LOG_INFO, "using C\n"); + cpucaps = "C"; + + av_log(c, AV_LOG_INFO, "using %s\n", cpucaps); av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH); av_log(c, AV_LOG_DEBUG, @@ -1393,7 +1625,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, c->chrXInc, c->chrYInc); } - c->swScale = ff_getSwsFunc(c); + c->swscale = ff_getSwsFunc(c); return 0; fail: // FIXME replace things by appropriate error codes return -1; @@ -1415,10 +1647,6 @@ SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat, c->srcH = srcH; c->dstW = dstW; c->dstH = dstH; - c->srcRange = handle_jpeg(&srcFormat); - c->dstRange = handle_jpeg(&dstFormat); - c->src0Alpha = handle_0alpha(&srcFormat); - c->dst0Alpha = handle_0alpha(&dstFormat); c->srcFormat = srcFormat; c->dstFormat = dstFormat; @@ -1426,9 +1654,6 @@ SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat, c->param[0] = param[0]; c->param[1] = param[1]; } - sws_setColorspaceDetails(c, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], c->srcRange, - ff_yuv2rgb_coeffs[SWS_CS_DEFAULT] /* FIXME*/, - c->dstRange, 0, 1 << 16, 1 << 16); if (sws_init_context(c, srcFilter, dstFilter) < 0) { sws_freeContext(c); @@ -1695,14 +1920,12 @@ void sws_convVec(SwsVector *a, SwsVector *b) SwsVector *sws_cloneVec(SwsVector *a) { - int i; SwsVector *vec = sws_allocVec(a->length); if (!vec) return NULL; - for (i = 0; i < a->length; i++) - vec->coeff[i] = a->coeff[i]; + memcpy(vec->coeff, a->coeff, a->length * sizeof(*a->coeff)); return vec; } @@ -1747,14 +1970,10 @@ void sws_freeFilter(SwsFilter *filter) if (!filter) return; - if (filter->lumH) - sws_freeVec(filter->lumH); - if (filter->lumV) - sws_freeVec(filter->lumV); - if (filter->chrH) - sws_freeVec(filter->chrH); - if (filter->chrV) - sws_freeVec(filter->chrV); + sws_freeVec(filter->lumH); + sws_freeVec(filter->lumV); + sws_freeVec(filter->chrH); + sws_freeVec(filter->chrV); av_free(filter); } @@ -1858,21 +2077,13 @@ struct SwsContext *sws_getCachedContext(struct SwsContext *context, int srcW, return NULL; context->srcW = srcW; context->srcH = srcH; - context->srcRange = handle_jpeg(&srcFormat); - context->src0Alpha = handle_0alpha(&srcFormat); context->srcFormat = srcFormat; context->dstW = dstW; context->dstH = dstH; - context->dstRange = handle_jpeg(&dstFormat); - context->dst0Alpha = handle_0alpha(&dstFormat); context->dstFormat = dstFormat; context->flags = flags; context->param[0] = param[0]; context->param[1] = param[1]; - sws_setColorspaceDetails(context, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], - context->srcRange, - ff_yuv2rgb_coeffs[SWS_CS_DEFAULT] /* FIXME*/, - context->dstRange, 0, 1 << 16, 1 << 16); if (sws_init_context(context, srcFilter, dstFilter) < 0) { sws_freeContext(context); return NULL; diff --git a/ffmpeg/libswscale/version.h b/ffmpeg/libswscale/version.h index c430f2d..99f3295 100644 --- a/ffmpeg/libswscale/version.h +++ b/ffmpeg/libswscale/version.h @@ -24,11 +24,11 @@ * swscale version macros */ -#include "libavutil/avutil.h" +#include "libavutil/version.h" #define LIBSWSCALE_VERSION_MAJOR 2 -#define LIBSWSCALE_VERSION_MINOR 2 -#define LIBSWSCALE_VERSION_MICRO 100 +#define LIBSWSCALE_VERSION_MINOR 5 +#define LIBSWSCALE_VERSION_MICRO 101 #define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ LIBSWSCALE_VERSION_MINOR, \ diff --git a/ffmpeg/libswscale/x86/Makefile b/ffmpeg/libswscale/x86/Makefile index 7d219b4..e767a5c 100644 --- a/ffmpeg/libswscale/x86/Makefile +++ b/ffmpeg/libswscale/x86/Makefile @@ -1,11 +1,11 @@ $(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS) -OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o - -MMX-OBJS += x86/rgb2rgb.o \ +OBJS += x86/rgb2rgb.o \ x86/swscale.o \ x86/yuv2rgb.o \ +OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o + YASM-OBJS += x86/input.o \ x86/output.o \ x86/scale.o \ diff --git a/ffmpeg/libswscale/x86/input.asm b/ffmpeg/libswscale/x86/input.asm index 9d5a871..0c4f30e 100644 --- a/ffmpeg/libswscale/x86/input.asm +++ b/ffmpeg/libswscale/x86/input.asm @@ -4,20 +4,20 @@ ;* into YUV planes also. ;* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com> ;* -;* This file is part of Libav. +;* This file is part of FFmpeg. ;* -;* Libav is free software; you can redistribute it and/or +;* FFmpeg is free software; you can redistribute it and/or ;* modify it under the terms of the GNU Lesser General Public ;* License as published by the Free Software Foundation; either ;* version 2.1 of the License, or (at your option) any later version. ;* -;* Libav is distributed in the hope that it will be useful, +;* FFmpeg is distributed in the hope that it will be useful, ;* but WITHOUT ANY WARRANTY; without even the implied warranty of ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;* Lesser General Public License for more details. ;* ;* You should have received a copy of the GNU Lesser General Public -;* License along with Libav; if not, write to the Free Software +;* License along with FFmpeg; if not, write to the Free Software ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ;****************************************************************************** @@ -37,31 +37,57 @@ SECTION_RODATA rgb_Yrnd: times 4 dd 0x80100 ; 16.5 << 15 rgb_UVrnd: times 4 dd 0x400100 ; 128.5 << 15 -bgr_Ycoeff_12x4: times 2 dw BY, GY, 0, BY -bgr_Ycoeff_3x56: times 2 dw RY, 0, GY, RY -rgb_Ycoeff_12x4: times 2 dw RY, GY, 0, RY -rgb_Ycoeff_3x56: times 2 dw BY, 0, GY, BY -bgr_Ucoeff_12x4: times 2 dw BU, GU, 0, BU -bgr_Ucoeff_3x56: times 2 dw RU, 0, GU, RU -rgb_Ucoeff_12x4: times 2 dw RU, GU, 0, RU -rgb_Ucoeff_3x56: times 2 dw BU, 0, GU, BU -bgr_Vcoeff_12x4: times 2 dw BV, GV, 0, BV -bgr_Vcoeff_3x56: times 2 dw RV, 0, GV, RV -rgb_Vcoeff_12x4: times 2 dw RV, GV, 0, RV -rgb_Vcoeff_3x56: times 2 dw BV, 0, GV, BV - -rgba_Ycoeff_rb: times 4 dw RY, BY -rgba_Ycoeff_br: times 4 dw BY, RY -rgba_Ycoeff_ga: times 4 dw GY, 0 -rgba_Ycoeff_ag: times 4 dw 0, GY -rgba_Ucoeff_rb: times 4 dw RU, BU -rgba_Ucoeff_br: times 4 dw BU, RU -rgba_Ucoeff_ga: times 4 dw GU, 0 -rgba_Ucoeff_ag: times 4 dw 0, GU -rgba_Vcoeff_rb: times 4 dw RV, BV -rgba_Vcoeff_br: times 4 dw BV, RV -rgba_Vcoeff_ga: times 4 dw GV, 0 -rgba_Vcoeff_ag: times 4 dw 0, GV +%define bgr_Ycoeff_12x4 16*4 + 16* 0 + tableq +%define bgr_Ycoeff_3x56 16*4 + 16* 1 + tableq +%define rgb_Ycoeff_12x4 16*4 + 16* 2 + tableq +%define rgb_Ycoeff_3x56 16*4 + 16* 3 + tableq +%define bgr_Ucoeff_12x4 16*4 + 16* 4 + tableq +%define bgr_Ucoeff_3x56 16*4 + 16* 5 + tableq +%define rgb_Ucoeff_12x4 16*4 + 16* 6 + tableq +%define rgb_Ucoeff_3x56 16*4 + 16* 7 + tableq +%define bgr_Vcoeff_12x4 16*4 + 16* 8 + tableq +%define bgr_Vcoeff_3x56 16*4 + 16* 9 + tableq +%define rgb_Vcoeff_12x4 16*4 + 16*10 + tableq +%define rgb_Vcoeff_3x56 16*4 + 16*11 + tableq + +%define rgba_Ycoeff_rb 16*4 + 16*12 + tableq +%define rgba_Ycoeff_br 16*4 + 16*13 + tableq +%define rgba_Ycoeff_ga 16*4 + 16*14 + tableq +%define rgba_Ycoeff_ag 16*4 + 16*15 + tableq +%define rgba_Ucoeff_rb 16*4 + 16*16 + tableq +%define rgba_Ucoeff_br 16*4 + 16*17 + tableq +%define rgba_Ucoeff_ga 16*4 + 16*18 + tableq +%define rgba_Ucoeff_ag 16*4 + 16*19 + tableq +%define rgba_Vcoeff_rb 16*4 + 16*20 + tableq +%define rgba_Vcoeff_br 16*4 + 16*21 + tableq +%define rgba_Vcoeff_ga 16*4 + 16*22 + tableq +%define rgba_Vcoeff_ag 16*4 + 16*23 + tableq + +; bgr_Ycoeff_12x4: times 2 dw BY, GY, 0, BY +; bgr_Ycoeff_3x56: times 2 dw RY, 0, GY, RY +; rgb_Ycoeff_12x4: times 2 dw RY, GY, 0, RY +; rgb_Ycoeff_3x56: times 2 dw BY, 0, GY, BY +; bgr_Ucoeff_12x4: times 2 dw BU, GU, 0, BU +; bgr_Ucoeff_3x56: times 2 dw RU, 0, GU, RU +; rgb_Ucoeff_12x4: times 2 dw RU, GU, 0, RU +; rgb_Ucoeff_3x56: times 2 dw BU, 0, GU, BU +; bgr_Vcoeff_12x4: times 2 dw BV, GV, 0, BV +; bgr_Vcoeff_3x56: times 2 dw RV, 0, GV, RV +; rgb_Vcoeff_12x4: times 2 dw RV, GV, 0, RV +; rgb_Vcoeff_3x56: times 2 dw BV, 0, GV, BV + +; rgba_Ycoeff_rb: times 4 dw RY, BY +; rgba_Ycoeff_br: times 4 dw BY, RY +; rgba_Ycoeff_ga: times 4 dw GY, 0 +; rgba_Ycoeff_ag: times 4 dw 0, GY +; rgba_Ucoeff_rb: times 4 dw RU, BU +; rgba_Ucoeff_br: times 4 dw BU, RU +; rgba_Ucoeff_ga: times 4 dw GU, 0 +; rgba_Ucoeff_ag: times 4 dw 0, GU +; rgba_Vcoeff_rb: times 4 dw RV, BV +; rgba_Vcoeff_br: times 4 dw BV, RV +; rgba_Vcoeff_ga: times 4 dw GV, 0 +; rgba_Vcoeff_ag: times 4 dw 0, GV shuf_rgb_12x4: db 0, 0x80, 1, 0x80, 2, 0x80, 3, 0x80, \ 6, 0x80, 7, 0x80, 8, 0x80, 9, 0x80 @@ -82,7 +108,7 @@ SECTION .text ; %1 = nr. of XMM registers ; %2 = rgb or bgr %macro RGB24_TO_Y_FN 2-3 -cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, u3 +cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table %if mmsize == 8 mova m5, [%2_Ycoeff_12x4] mova m6, [%2_Ycoeff_3x56] @@ -171,7 +197,7 @@ cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, u3 ; %1 = nr. of XMM registers ; %2 = rgb or bgr %macro RGB24_TO_UV_FN 2-3 -cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, u3 +cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table %if ARCH_X86_64 mova m8, [%2_Ucoeff_12x4] mova m9, [%2_Ucoeff_3x56] @@ -311,7 +337,7 @@ RGB24_FUNCS 11, 13 ; %1 = nr. of XMM registers ; %2-5 = rgba, bgra, argb or abgr (in individual characters) %macro RGB32_TO_Y_FN 5-6 -cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, u3 +cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, table mova m5, [rgba_Ycoeff_%2%4] mova m6, [rgba_Ycoeff_%3%5] %if %0 == 6 @@ -354,7 +380,7 @@ cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, u3 ; %1 = nr. of XMM registers ; %2-5 = rgba, bgra, argb or abgr (in individual characters) %macro RGB32_TO_UV_FN 5-6 -cglobal %2%3%4%5 %+ ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, u3 +cglobal %2%3%4%5 %+ ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table %if ARCH_X86_64 mova m8, [rgba_Ucoeff_%2%4] mova m9, [rgba_Ucoeff_%3%5] diff --git a/ffmpeg/libswscale/x86/output.asm b/ffmpeg/libswscale/x86/output.asm index f9add35..9ea4af9 100644 --- a/ffmpeg/libswscale/x86/output.asm +++ b/ffmpeg/libswscale/x86/output.asm @@ -3,20 +3,20 @@ ;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com> ;* Kieran Kunhya <kieran@kunhya.com> ;* -;* This file is part of Libav. +;* This file is part of FFmpeg. ;* -;* Libav is free software; you can redistribute it and/or +;* FFmpeg is free software; you can redistribute it and/or ;* modify it under the terms of the GNU Lesser General Public ;* License as published by the Free Software Foundation; either ;* version 2.1 of the License, or (at your option) any later version. ;* -;* Libav is distributed in the hope that it will be useful, +;* FFmpeg is distributed in the hope that it will be useful, ;* but WITHOUT ANY WARRANTY; without even the implied warranty of ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;* Lesser General Public License for more details. ;* ;* You should have received a copy of the GNU Lesser General Public -;* License along with Libav; if not, write to the Free Software +;* License along with FFmpeg; if not, write to the Free Software ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ;****************************************************************************** diff --git a/ffmpeg/libswscale/x86/rgb2rgb.c b/ffmpeg/libswscale/x86/rgb2rgb.c index 1e20176..8cc99c6 100644 --- a/ffmpeg/libswscale/x86/rgb2rgb.c +++ b/ffmpeg/libswscale/x86/rgb2rgb.c @@ -76,7 +76,6 @@ DECLARE_ASM_CONST(8, uint64_t, mul15_mid) = 0x4200420042004200ULL; DECLARE_ASM_CONST(8, uint64_t, mul15_hi) = 0x0210021002100210ULL; DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL; -#define RGB2YUV_SHIFT 8 #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5)) #define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5)) #define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5)) @@ -92,35 +91,45 @@ DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL; #define COMPILE_TEMPLATE_MMXEXT 0 #define COMPILE_TEMPLATE_AMD3DNOW 0 #define COMPILE_TEMPLATE_SSE2 0 +#define COMPILE_TEMPLATE_AVX 0 //MMX versions #undef RENAME -#define RENAME(a) a ## _MMX +#define RENAME(a) a ## _mmx #include "rgb2rgb_template.c" // MMXEXT versions #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 1 -#define RENAME(a) a ## _MMXEXT +#define RENAME(a) a ## _mmxext #include "rgb2rgb_template.c" //SSE2 versions #undef RENAME #undef COMPILE_TEMPLATE_SSE2 #define COMPILE_TEMPLATE_SSE2 1 -#define RENAME(a) a ## _SSE2 +#define RENAME(a) a ## _sse2 +#include "rgb2rgb_template.c" + +//AVX versions +#undef RENAME +#undef COMPILE_TEMPLATE_AVX +#define COMPILE_TEMPLATE_AVX 1 +#define RENAME(a) a ## _avx #include "rgb2rgb_template.c" //3DNOW versions #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #undef COMPILE_TEMPLATE_SSE2 +#undef COMPILE_TEMPLATE_AVX #undef COMPILE_TEMPLATE_AMD3DNOW #define COMPILE_TEMPLATE_MMXEXT 0 #define COMPILE_TEMPLATE_SSE2 0 +#define COMPILE_TEMPLATE_AVX 0 #define COMPILE_TEMPLATE_AMD3DNOW 1 -#define RENAME(a) a ## _3DNOW +#define RENAME(a) a ## _3dnow #include "rgb2rgb_template.c" /* @@ -138,12 +147,14 @@ av_cold void rgb2rgb_init_x86(void) int cpu_flags = av_get_cpu_flags(); if (INLINE_MMX(cpu_flags)) - rgb2rgb_init_MMX(); + rgb2rgb_init_mmx(); if (INLINE_AMD3DNOW(cpu_flags)) - rgb2rgb_init_3DNOW(); + rgb2rgb_init_3dnow(); if (INLINE_MMXEXT(cpu_flags)) - rgb2rgb_init_MMXEXT(); + rgb2rgb_init_mmxext(); if (INLINE_SSE2(cpu_flags)) - rgb2rgb_init_SSE2(); + rgb2rgb_init_sse2(); + if (INLINE_AVX(cpu_flags)) + rgb2rgb_init_avx(); #endif /* HAVE_INLINE_ASM */ } diff --git a/ffmpeg/libswscale/x86/rgb2rgb_template.c b/ffmpeg/libswscale/x86/rgb2rgb_template.c index d802ab4..d58219b 100644 --- a/ffmpeg/libswscale/x86/rgb2rgb_template.c +++ b/ffmpeg/libswscale/x86/rgb2rgb_template.c @@ -26,6 +26,8 @@ #include <stddef.h> +#include "libavutil/attributes.h" + #undef PREFETCH #undef MOVNTQ #undef EMMS @@ -1610,10 +1612,15 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t * others are ignored in the C version. * FIXME: Write HQ version. */ +#if HAVE_7REGS static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int width, int height, - int lumStride, int chromStride, int srcStride) + int lumStride, int chromStride, int srcStride, + int32_t *rgb2yuv) { +#define BGR2Y_IDX "16*4+16*32" +#define BGR2U_IDX "16*4+16*33" +#define BGR2V_IDX "16*4+16*34" int y; const x86_reg chromWidth= width>>1; for (y=0; y<height-2; y+=2) { @@ -1621,7 +1628,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ for (i=0; i<2; i++) { __asm__ volatile( "mov %2, %%"REG_a" \n\t" - "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t" + "movq "BGR2Y_IDX"(%3), %%mm6 \n\t" "movq "MANGLE(ff_w1111)", %%mm5 \n\t" "pxor %%mm7, %%mm7 \n\t" "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" @@ -1640,12 +1647,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "pmaddwd %%mm6, %%mm1 \n\t" "pmaddwd %%mm6, %%mm2 \n\t" "pmaddwd %%mm6, %%mm3 \n\t" -#ifndef FAST_BGR2YV12 "psrad $8, %%mm0 \n\t" "psrad $8, %%mm1 \n\t" "psrad $8, %%mm2 \n\t" "psrad $8, %%mm3 \n\t" -#endif "packssdw %%mm1, %%mm0 \n\t" "packssdw %%mm3, %%mm2 \n\t" "pmaddwd %%mm5, %%mm0 \n\t" @@ -1665,12 +1670,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "pmaddwd %%mm6, %%mm1 \n\t" "pmaddwd %%mm6, %%mm2 \n\t" "pmaddwd %%mm6, %%mm3 \n\t" -#ifndef FAST_BGR2YV12 "psrad $8, %%mm4 \n\t" "psrad $8, %%mm1 \n\t" "psrad $8, %%mm2 \n\t" "psrad $8, %%mm3 \n\t" -#endif "packssdw %%mm1, %%mm4 \n\t" "packssdw %%mm3, %%mm2 \n\t" "pmaddwd %%mm5, %%mm4 \n\t" @@ -1685,7 +1688,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t" "add $8, %%"REG_a" \n\t" " js 1b \n\t" - : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width) + : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width), "r"(rgb2yuv) : "%"REG_a, "%"REG_d ); ydst += lumStride; @@ -1695,7 +1698,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ __asm__ volatile( "mov %4, %%"REG_a" \n\t" "movq "MANGLE(ff_w1111)", %%mm5 \n\t" - "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t" + "movq "BGR2U_IDX"(%5), %%mm6 \n\t" "pxor %%mm7, %%mm7 \n\t" "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" "add %%"REG_d", %%"REG_d" \n\t" @@ -1744,19 +1747,17 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "psrlw $2, %%mm0 \n\t" "psrlw $2, %%mm2 \n\t" #endif - "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" - "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" + "movq "BGR2V_IDX"(%5), %%mm1 \n\t" + "movq "BGR2V_IDX"(%5), %%mm3 \n\t" "pmaddwd %%mm0, %%mm1 \n\t" "pmaddwd %%mm2, %%mm3 \n\t" "pmaddwd %%mm6, %%mm0 \n\t" "pmaddwd %%mm6, %%mm2 \n\t" -#ifndef FAST_BGR2YV12 "psrad $8, %%mm0 \n\t" "psrad $8, %%mm1 \n\t" "psrad $8, %%mm2 \n\t" "psrad $8, %%mm3 \n\t" -#endif "packssdw %%mm2, %%mm0 \n\t" "packssdw %%mm3, %%mm1 \n\t" "pmaddwd %%mm5, %%mm0 \n\t" @@ -1806,19 +1807,17 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "psrlw $2, %%mm4 \n\t" "psrlw $2, %%mm2 \n\t" #endif - "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" - "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" + "movq "BGR2V_IDX"(%5), %%mm1 \n\t" + "movq "BGR2V_IDX"(%5), %%mm3 \n\t" "pmaddwd %%mm4, %%mm1 \n\t" "pmaddwd %%mm2, %%mm3 \n\t" "pmaddwd %%mm6, %%mm4 \n\t" "pmaddwd %%mm6, %%mm2 \n\t" -#ifndef FAST_BGR2YV12 "psrad $8, %%mm4 \n\t" "psrad $8, %%mm1 \n\t" "psrad $8, %%mm2 \n\t" "psrad $8, %%mm3 \n\t" -#endif "packssdw %%mm2, %%mm4 \n\t" "packssdw %%mm3, %%mm1 \n\t" "pmaddwd %%mm5, %%mm4 \n\t" @@ -1837,7 +1836,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "movd %%mm0, (%3, %%"REG_a") \n\t" "add $4, %%"REG_a" \n\t" " js 1b \n\t" - : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth) + : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth), "r"(rgb2yuv) : "%"REG_a, "%"REG_d ); @@ -1850,11 +1849,12 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ SFENCE" \n\t" :::"memory"); - rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride); + ff_rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride, rgb2yuv); } +#endif /* HAVE_7REGS */ #endif /* !COMPILE_TEMPLATE_SSE2 */ -#if !COMPILE_TEMPLATE_AMD3DNOW +#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest, int width, int height, int src1Stride, int src2Stride, int dstStride) @@ -1924,7 +1924,35 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui ::: "memory" ); } +#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX*/ + +#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL +#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_YASM +void RENAME(ff_nv12ToUV)(uint8_t *dstU, uint8_t *dstV, + const uint8_t *unused0, + const uint8_t *src1, + const uint8_t *src2, + int w, uint32_t *unused); +static void RENAME(deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, + int width, int height, int srcStride, + int dst1Stride, int dst2Stride) +{ + int h; + + for (h=0; h < height; h++) { + RENAME(ff_nv12ToUV)(dst1, dst2, NULL, src, NULL, width, NULL); + src += srcStride; + dst1 += dst1Stride; + dst2 += dst2Stride; + } + __asm__( + EMMS" \n\t" + SFENCE" \n\t" + ::: "memory" + ); +} #endif /* !COMPILE_TEMPLATE_AMD3DNOW */ +#endif /* !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL */ #if !COMPILE_TEMPLATE_SSE2 #if !COMPILE_TEMPLATE_AMD3DNOW @@ -2354,7 +2382,7 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co int lumStride, int chromStride, int srcStride) { int y; - const int chromWidth= -((-width)>>1); + const int chromWidth = FF_CEIL_RSHIFT(width, 1); for (y=0; y<height; y++) { RENAME(extract_even)(src, ydst, width); @@ -2380,7 +2408,7 @@ static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co int lumStride, int chromStride, int srcStride) { int y; - const int chromWidth= -((-width)>>1); + const int chromWidth = FF_CEIL_RSHIFT(width, 1); for (y=0; y<height; y++) { RENAME(extract_even)(src, ydst, width); @@ -2404,7 +2432,7 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co int lumStride, int chromStride, int srcStride) { int y; - const int chromWidth= -((-width)>>1); + const int chromWidth = FF_CEIL_RSHIFT(width, 1); for (y=0; y<height; y++) { RENAME(extract_even)(src+1, ydst, width); @@ -2430,7 +2458,7 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co int lumStride, int chromStride, int srcStride) { int y; - const int chromWidth= -((-width)>>1); + const int chromWidth = FF_CEIL_RSHIFT(width, 1); for (y=0; y<height; y++) { RENAME(extract_even)(src+1, ydst, width); @@ -2450,7 +2478,7 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co #endif /* !COMPILE_TEMPLATE_AMD3DNOW */ #endif /* !COMPILE_TEMPLATE_SSE2 */ -static inline void RENAME(rgb2rgb_init)(void) +static av_cold void RENAME(rgb2rgb_init)(void) { #if !COMPILE_TEMPLATE_SSE2 #if !COMPILE_TEMPLATE_AMD3DNOW @@ -2486,13 +2514,20 @@ static inline void RENAME(rgb2rgb_init)(void) #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW planar2x = RENAME(planar2x); #endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */ - rgb24toyv12 = RENAME(rgb24toyv12); +#if HAVE_7REGS + ff_rgb24toyv12 = RENAME(rgb24toyv12); +#endif /* HAVE_7REGS */ yuyvtoyuv420 = RENAME(yuyvtoyuv420); uyvytoyuv420 = RENAME(uyvytoyuv420); #endif /* !COMPILE_TEMPLATE_SSE2 */ -#if !COMPILE_TEMPLATE_AMD3DNOW +#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX interleaveBytes = RENAME(interleaveBytes); -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ +#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX*/ +#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL +#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_YASM + deinterleaveBytes = RENAME(deinterleaveBytes); +#endif +#endif } diff --git a/ffmpeg/libswscale/x86/scale.asm b/ffmpeg/libswscale/x86/scale.asm index c6dafde..940f357 100644 --- a/ffmpeg/libswscale/x86/scale.asm +++ b/ffmpeg/libswscale/x86/scale.asm @@ -2,20 +2,20 @@ ;* x86-optimized horizontal line scaling functions ;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com> ;* -;* This file is part of Libav. +;* This file is part of FFmpeg. ;* -;* Libav is free software; you can redistribute it and/or +;* FFmpeg is free software; you can redistribute it and/or ;* modify it under the terms of the GNU Lesser General Public ;* License as published by the Free Software Foundation; either ;* version 2.1 of the License, or (at your option) any later version. ;* -;* Libav is distributed in the hope that it will be useful, +;* FFmpeg is distributed in the hope that it will be useful, ;* but WITHOUT ANY WARRANTY; without even the implied warranty of ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;* Lesser General Public License for more details. ;* ;* You should have received a copy of the GNU Lesser General Public -;* License along with Libav; if not, write to the Free Software +;* License along with FFmpeg; if not, write to the Free Software ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ;****************************************************************************** diff --git a/ffmpeg/libswscale/x86/swscale.c b/ffmpeg/libswscale/x86/swscale.c index 2f67b1b..2f7e4f7 100644 --- a/ffmpeg/libswscale/x86/swscale.c +++ b/ffmpeg/libswscale/x86/swscale.c @@ -58,15 +58,10 @@ DECLARE_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL; DECLARE_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL; DECLARE_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL; -#ifdef FAST_BGR2YV12 -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000000210041000DULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000FFEEFFDC0038ULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00000038FFD2FFF8ULL; -#else DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL; DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL; DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL; -#endif /* FAST_BGR2YV12 */ + DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL; DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL; DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; @@ -76,7 +71,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; #if HAVE_MMX_INLINE #undef RENAME #define COMPILE_TEMPLATE_MMXEXT 0 -#define RENAME(a) a ## _MMX +#define RENAME(a) a ## _mmx #include "swscale_template.c" #endif @@ -85,7 +80,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 1 -#define RENAME(a) a ## _MMXEXT +#define RENAME(a) a ## _mmxext #include "swscale_template.c" #endif @@ -211,7 +206,7 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, const uint8_t *dither, int offset) { if(((int)dest) & 15){ - return yuv2yuvX_MMXEXT(filter, filterSize, src, dest, dstW, dither, offset); + return yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset); } if (offset) { __asm__ volatile("movq (%0), %%xmm3\n\t" @@ -279,7 +274,7 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, #endif /* HAVE_INLINE_ASM */ #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \ -extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \ +void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \ SwsContext *c, int16_t *data, \ int dstW, const uint8_t *src, \ const int16_t *filter, \ @@ -318,9 +313,9 @@ SCALE_FUNCS_SSE(ssse3); SCALE_FUNCS_SSE(sse4); #define VSCALEX_FUNC(size, opt) \ -extern void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \ - const int16_t **src, uint8_t *dest, int dstW, \ - const uint8_t *dither, int offset) +void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \ + const int16_t **src, uint8_t *dest, int dstW, \ + const uint8_t *dither, int offset) #define VSCALEX_FUNCS(opt) \ VSCALEX_FUNC(8, opt); \ VSCALEX_FUNC(9, opt); \ @@ -335,8 +330,8 @@ VSCALEX_FUNC(16, sse4); VSCALEX_FUNCS(avx); #define VSCALE_FUNC(size, opt) \ -extern void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \ - const uint8_t *dither, int offset) +void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \ + const uint8_t *dither, int offset) #define VSCALE_FUNCS(opt1, opt2) \ VSCALE_FUNC(8, opt1); \ VSCALE_FUNC(9, opt2); \ @@ -351,15 +346,15 @@ VSCALE_FUNC(16, sse4); VSCALE_FUNCS(avx, avx); #define INPUT_Y_FUNC(fmt, opt) \ -extern void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \ - const uint8_t *unused1, const uint8_t *unused2, \ - int w, uint32_t *unused) +void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \ + const uint8_t *unused1, const uint8_t *unused2, \ + int w, uint32_t *unused) #define INPUT_UV_FUNC(fmt, opt) \ -extern void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \ - const uint8_t *unused0, \ - const uint8_t *src1, \ - const uint8_t *src2, \ - int w, uint32_t *unused) +void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *unused0, \ + const uint8_t *src1, \ + const uint8_t *src2, \ + int w, uint32_t *unused) #define INPUT_FUNC(fmt, opt) \ INPUT_Y_FUNC(fmt, opt); \ INPUT_UV_FUNC(fmt, opt) @@ -382,22 +377,22 @@ INPUT_FUNCS(sse2); INPUT_FUNCS(ssse3); INPUT_FUNCS(avx); -av_cold void ff_sws_init_swScale_mmx(SwsContext *c) +av_cold void ff_sws_init_swscale_x86(SwsContext *c) { int cpu_flags = av_get_cpu_flags(); -#if HAVE_INLINE_ASM +#if HAVE_MMX_INLINE if (cpu_flags & AV_CPU_FLAG_MMX) - sws_init_swScale_MMX(c); + sws_init_swscale_mmx(c); +#endif #if HAVE_MMXEXT_INLINE if (cpu_flags & AV_CPU_FLAG_MMXEXT) - sws_init_swScale_MMXEXT(c); + sws_init_swscale_mmxext(c); if (cpu_flags & AV_CPU_FLAG_SSE3){ if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) c->yuv2planeX = yuv2yuvX_sse3; } #endif -#endif /* HAVE_INLINE_ASM */ #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \ if (c->srcBpc == 8) { \ diff --git a/ffmpeg/libswscale/x86/swscale_template.c b/ffmpeg/libswscale/x86/swscale_template.c index f2567c1..c7a1bb4 100644 --- a/ffmpeg/libswscale/x86/swscale_template.c +++ b/ffmpeg/libswscale/x86/swscale_template.c @@ -1640,7 +1640,7 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, } #endif /* COMPILE_TEMPLATE_MMXEXT */ -static av_cold void RENAME(sws_init_swScale)(SwsContext *c) +static av_cold void RENAME(sws_init_swscale)(SwsContext *c) { enum AVPixelFormat dstFormat = c->dstFormat; diff --git a/ffmpeg/libswscale/x86/w64xmmtest.c b/ffmpeg/libswscale/x86/w64xmmtest.c index dd9a2a4..88143d9 100644 --- a/ffmpeg/libswscale/x86/w64xmmtest.c +++ b/ffmpeg/libswscale/x86/w64xmmtest.c @@ -2,20 +2,20 @@ * check XMM registers for clobbers on Win64 * Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/ffmpeg/libswscale/x86/yuv2rgb.c b/ffmpeg/libswscale/x86/yuv2rgb.c index 3938e6b..e4315ef 100644 --- a/ffmpeg/libswscale/x86/yuv2rgb.c +++ b/ffmpeg/libswscale/x86/yuv2rgb.c @@ -27,7 +27,6 @@ #include <stdio.h> #include <stdlib.h> #include <inttypes.h> -#include <assert.h> #include "config.h" #include "libswscale/rgb2rgb.h" @@ -54,7 +53,7 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 0 -#define RENAME(a) a ## _MMX +#define RENAME(a) a ## _mmx #include "yuv2rgb_template.c" #endif /* HAVE_MMX_INLINE */ @@ -63,24 +62,24 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 1 -#define RENAME(a) a ## _MMXEXT +#define RENAME(a) a ## _mmxext #include "yuv2rgb_template.c" #endif /* HAVE_MMXEXT_INLINE */ #endif /* HAVE_INLINE_ASM */ -av_cold SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c) +av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) { -#if HAVE_INLINE_ASM +#if HAVE_MMX_INLINE int cpu_flags = av_get_cpu_flags(); #if HAVE_MMXEXT_INLINE if (cpu_flags & AV_CPU_FLAG_MMXEXT) { switch (c->dstFormat) { case AV_PIX_FMT_RGB24: - return yuv420_rgb24_MMXEXT; + return yuv420_rgb24_mmxext; case AV_PIX_FMT_BGR24: - return yuv420_bgr24_MMXEXT; + return yuv420_bgr24_mmxext; } } #endif @@ -90,24 +89,30 @@ av_cold SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c) case AV_PIX_FMT_RGB32: if (c->srcFormat == AV_PIX_FMT_YUVA420P) { #if HAVE_7REGS && CONFIG_SWSCALE_ALPHA - return yuva420_rgb32_MMX; + return yuva420_rgb32_mmx; #endif break; - } else return yuv420_rgb32_MMX; + } else + return yuv420_rgb32_mmx; case AV_PIX_FMT_BGR32: if (c->srcFormat == AV_PIX_FMT_YUVA420P) { #if HAVE_7REGS && CONFIG_SWSCALE_ALPHA - return yuva420_bgr32_MMX; + return yuva420_bgr32_mmx; #endif break; - } else return yuv420_bgr32_MMX; - case AV_PIX_FMT_RGB24: return yuv420_rgb24_MMX; - case AV_PIX_FMT_BGR24: return yuv420_bgr24_MMX; - case AV_PIX_FMT_RGB565: return yuv420_rgb16_MMX; - case AV_PIX_FMT_RGB555: return yuv420_rgb15_MMX; + } else + return yuv420_bgr32_mmx; + case AV_PIX_FMT_RGB24: + return yuv420_rgb24_mmx; + case AV_PIX_FMT_BGR24: + return yuv420_bgr24_mmx; + case AV_PIX_FMT_RGB565: + return yuv420_rgb16_mmx; + case AV_PIX_FMT_RGB555: + return yuv420_rgb15_mmx; } } -#endif /* HAVE_INLINE_ASM */ +#endif /* HAVE_MMX_INLINE */ return NULL; } diff --git a/ffmpeg/libswscale/yuv2rgb.c b/ffmpeg/libswscale/yuv2rgb.c index d12abda..77c56a9 100644 --- a/ffmpeg/libswscale/yuv2rgb.c +++ b/ffmpeg/libswscale/yuv2rgb.c @@ -35,13 +35,6 @@ #include "swscale_internal.h" #include "libavutil/pixdesc.h" -extern const uint8_t dither_2x2_4[3][8]; -extern const uint8_t dither_2x2_8[3][8]; -extern const uint8_t dither_4x4_16[5][8]; -extern const uint8_t dither_8x8_32[9][8]; -extern const uint8_t dither_8x8_73[9][8]; -extern const uint8_t dither_8x8_220[9][8]; - const int32_t ff_yuv2rgb_coeffs[8][4] = { { 117504, 138453, 13954, 34903 }, /* no sequence_display_extension */ { 117504, 138453, 13954, 34903 }, /* ITU-R Rec. 709 (1990) */ @@ -266,28 +259,28 @@ YUV2RGBFUNC(yuva2rgba_c, uint32_t, 1) PUTRGBA(dst_2, py_2, pa_2, 0, 24); LOADCHROMA(1); - PUTRGBA(dst_2, py_2, pa_1, 1, 24); - PUTRGBA(dst_1, py_1, pa_2, 1, 24); + PUTRGBA(dst_2, py_2, pa_2, 1, 24); + PUTRGBA(dst_1, py_1, pa_1, 1, 24); LOADCHROMA(2); PUTRGBA(dst_1, py_1, pa_1, 2, 24); PUTRGBA(dst_2, py_2, pa_2, 2, 24); LOADCHROMA(3); - PUTRGBA(dst_2, py_2, pa_1, 3, 24); - PUTRGBA(dst_1, py_1, pa_2, 3, 24); - pa_1 += 8; \ - pa_2 += 8; \ + PUTRGBA(dst_2, py_2, pa_2, 3, 24); + PUTRGBA(dst_1, py_1, pa_1, 3, 24); + pa_1 += 8; + pa_2 += 8; ENDYUV2RGBLINE(8, 0) LOADCHROMA(0); PUTRGBA(dst_1, py_1, pa_1, 0, 24); PUTRGBA(dst_2, py_2, pa_2, 0, 24); LOADCHROMA(1); - PUTRGBA(dst_2, py_2, pa_1, 1, 24); - PUTRGBA(dst_1, py_1, pa_2, 1, 24); - pa_1 += 4; \ - pa_2 += 4; \ + PUTRGBA(dst_2, py_2, pa_2, 1, 24); + PUTRGBA(dst_1, py_1, pa_1, 1, 24); + pa_1 += 4; + pa_2 += 4; ENDYUV2RGBLINE(8, 1) LOADCHROMA(0); PUTRGBA(dst_1, py_1, pa_1, 0, 24); @@ -310,8 +303,8 @@ YUV2RGBFUNC(yuva2argb_c, uint32_t, 1) LOADCHROMA(3); PUTRGBA(dst_2, py_2, pa_2, 3, 0); PUTRGBA(dst_1, py_1, pa_1, 3, 0); - pa_1 += 8; \ - pa_2 += 8; \ + pa_1 += 8; + pa_2 += 8; ENDYUV2RGBLINE(8, 0) LOADCHROMA(0); PUTRGBA(dst_1, py_1, pa_1, 0, 0); @@ -320,8 +313,8 @@ ENDYUV2RGBLINE(8, 0) LOADCHROMA(1); PUTRGBA(dst_2, py_2, pa_2, 1, 0); PUTRGBA(dst_1, py_1, pa_1, 1, 0); - pa_1 += 4; \ - pa_2 += 4; \ + pa_1 += 4; + pa_2 += 4; ENDYUV2RGBLINE(8, 1) LOADCHROMA(0); PUTRGBA(dst_1, py_1, pa_1, 0, 0); @@ -390,9 +383,9 @@ ENDYUV2RGBLINE(24, 1) ENDYUV2RGBFUNC() YUV2RGBFUNC(yuv2rgb_c_16_ordered_dither, uint16_t, 0) - const uint8_t *d16 = dither_2x2_8[y & 1]; - const uint8_t *e16 = dither_2x2_4[y & 1]; - const uint8_t *f16 = dither_2x2_8[(y & 1)^1]; + const uint8_t *d16 = ff_dither_2x2_8[y & 1]; + const uint8_t *e16 = ff_dither_2x2_4[y & 1]; + const uint8_t *f16 = ff_dither_2x2_8[(y & 1)^1]; #define PUTRGB16(dst, src, i, o) \ Y = src[2 * i]; \ @@ -421,8 +414,8 @@ YUV2RGBFUNC(yuv2rgb_c_16_ordered_dither, uint16_t, 0) CLOSEYUV2RGBFUNC(8) YUV2RGBFUNC(yuv2rgb_c_15_ordered_dither, uint16_t, 0) - const uint8_t *d16 = dither_2x2_8[y & 1]; - const uint8_t *e16 = dither_2x2_8[(y & 1)^1]; + const uint8_t *d16 = ff_dither_2x2_8[y & 1]; + const uint8_t *e16 = ff_dither_2x2_8[(y & 1)^1]; #define PUTRGB15(dst, src, i, o) \ Y = src[2 * i]; \ @@ -452,7 +445,7 @@ CLOSEYUV2RGBFUNC(8) // r, g, b, dst_1, dst_2 YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0) - const uint8_t *d16 = dither_4x4_16[y & 3]; + const uint8_t *d16 = ff_dither_4x4_16[y & 3]; #define PUTRGB12(dst, src, i, o) \ Y = src[2 * i]; \ @@ -483,8 +476,8 @@ CLOSEYUV2RGBFUNC(8) // r, g, b, dst_1, dst_2 YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0) - const uint8_t *d32 = dither_8x8_32[y & 7]; - const uint8_t *d64 = dither_8x8_73[y & 7]; + const uint8_t *d32 = ff_dither_8x8_32[y & 7]; + const uint8_t *d64 = ff_dither_8x8_73[y & 7]; #define PUTRGB8(dst, src, i, o) \ Y = src[2 * i]; \ @@ -514,8 +507,8 @@ YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0) CLOSEYUV2RGBFUNC(8) YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0) - const uint8_t * d64 = dither_8x8_73[y & 7]; - const uint8_t *d128 = dither_8x8_220[y & 7]; + const uint8_t * d64 = ff_dither_8x8_73[y & 7]; + const uint8_t *d128 = ff_dither_8x8_220[y & 7]; int acc; #define PUTRGB4D(dst, src, i, o) \ @@ -547,8 +540,8 @@ YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0) CLOSEYUV2RGBFUNC(4) YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0) - const uint8_t *d64 = dither_8x8_73[y & 7]; - const uint8_t *d128 = dither_8x8_220[y & 7]; + const uint8_t *d64 = ff_dither_8x8_73[y & 7]; + const uint8_t *d128 = ff_dither_8x8_220[y & 7]; #define PUTRGB4DB(dst, src, i, o) \ Y = src[2 * i]; \ @@ -578,7 +571,7 @@ YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0) CLOSEYUV2RGBFUNC(8) YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0) - const uint8_t *d128 = dither_8x8_220[y & 7]; + const uint8_t *d128 = ff_dither_8x8_220[y & 7]; char out_1 = 0, out_2 = 0; g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM]; @@ -608,14 +601,14 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c) { SwsFunc t = NULL; - if (HAVE_MMX) - t = ff_yuv2rgb_init_mmx(c); - else if (HAVE_VIS) + if (ARCH_BFIN) + t = ff_yuv2rgb_init_bfin(c); + if (ARCH_PPC) + t = ff_yuv2rgb_init_ppc(c); + if (HAVE_VIS) t = ff_yuv2rgb_init_vis(c); - else if (HAVE_ALTIVEC) - t = ff_yuv2rgb_init_altivec(c); - else if (ARCH_BFIN) - t = ff_yuv2rgb_get_func_ptr_bfin(c); + if (ARCH_X86) + t = ff_yuv2rgb_init_x86(c); if (t) return t; @@ -775,12 +768,12 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], c->yuv2rgb_u2b_coeff = (int16_t)roundToInt16(cbu << 13); //scale coefficients by cy - crv = ((crv << 16) + 0x8000) / cy; - cbu = ((cbu << 16) + 0x8000) / cy; - cgu = ((cgu << 16) + 0x8000) / cy; - cgv = ((cgv << 16) + 0x8000) / cy; + crv = ((crv << 16) + 0x8000) / FFMAX(cy, 1); + cbu = ((cbu << 16) + 0x8000) / FFMAX(cy, 1); + cgu = ((cgu << 16) + 0x8000) / FFMAX(cy, 1); + cgv = ((cgv << 16) + 0x8000) / FFMAX(cy, 1); - av_free(c->yuvTable); + av_freep(&c->yuvTable); switch (bpp) { case 1: @@ -893,6 +886,7 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], fill_gv_table(c->table_gV, 1, cgv); break; case 32: + case 64: base = (c->dstFormat == AV_PIX_FMT_RGB32_1 || c->dstFormat == AV_PIX_FMT_BGR32_1) ? 8 : 0; rbase = base + (isRgb ? 16 : 0); @@ -918,7 +912,6 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], fill_gv_table(c->table_gV, 4, cgv); break; default: - c->yuvTable = NULL; if(!isPlanar(c->dstFormat) || bpp <= 24) av_log(c, AV_LOG_ERROR, "%ibpp not supported by yuv2rgb\n", bpp); return -1; |
