diff options
| author | Tim Redfern <tim@eclectronics.org> | 2014-02-17 13:36:38 +0000 |
|---|---|---|
| committer | Tim Redfern <tim@eclectronics.org> | 2014-02-17 13:36:38 +0000 |
| commit | 22e28216336da876e1fd17f380ce42eaf1446769 (patch) | |
| tree | 444dad3dc7e2656992d29f34f7bce31970c122a5 /ffmpeg/libswscale | |
| parent | ae5e8541f6e06e64c28719467cdf366ac57aff31 (diff) | |
chasing indexing error
Diffstat (limited to 'ffmpeg/libswscale')
40 files changed, 0 insertions, 22230 deletions
diff --git a/ffmpeg/libswscale/Makefile b/ffmpeg/libswscale/Makefile deleted file mode 100644 index ca6e27d..0000000 --- a/ffmpeg/libswscale/Makefile +++ /dev/null @@ -1,22 +0,0 @@ -include $(SUBDIR)../config.mak - -NAME = swscale -FFLIBS = avutil - -HEADERS = swscale.h \ - version.h \ - -OBJS = input.o \ - options.o \ - output.o \ - rgb2rgb.o \ - swscale.o \ - swscale_unscaled.o \ - utils.o \ - yuv2rgb.o \ - -# Windows resource file -SLIBOBJS-$(HAVE_GNU_WINDRES) += swscaleres.o - -TESTPROGS = colorspace \ - swscale \ diff --git a/ffmpeg/libswscale/bfin/Makefile b/ffmpeg/libswscale/bfin/Makefile deleted file mode 100644 index 5f34550..0000000 --- a/ffmpeg/libswscale/bfin/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -OBJS += bfin/internal_bfin.o \ - bfin/swscale_bfin.o \ - bfin/yuv2rgb_bfin.o \ diff --git a/ffmpeg/libswscale/bfin/internal_bfin.S b/ffmpeg/libswscale/bfin/internal_bfin.S deleted file mode 100644 index eab30aa..0000000 --- a/ffmpeg/libswscale/bfin/internal_bfin.S +++ /dev/null @@ -1,613 +0,0 @@ -/* - * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com> - * April 20, 2007 - * - * Blackfin video color space converter operations - * convert I420 YV12 to RGB in various formats - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - - -/* -YUV420 to RGB565 conversion. This routine takes a YUV 420 planar macroblock -and converts it to RGB565. R:5 bits, G:6 bits, B:5 bits.. packed into shorts. - - -The following calculation is used for the conversion: - - r = clipz((y - oy) * cy + crv * (v - 128)) - g = clipz((y - oy) * cy + cgv * (v - 128) + cgu * (u - 128)) - b = clipz((y - oy) * cy + cbu * (u - 128)) - -y, u, v are prescaled by a factor of 4 i.e. left-shifted to gain precision. - - -New factorization to eliminate the truncation error which was -occurring due to the byteop3p. - - -1) Use the bytop16m to subtract quad bytes we use this in U8 this - then so the offsets need to be renormalized to 8bits. - -2) Scale operands up by a factor of 4 not 8 because Blackfin - multiplies include a shift. - -3) Compute into the accumulators cy * yx0, cy * yx1. - -4) Compute each of the linear equations: - r = clipz((y - oy) * cy + crv * (v - 128)) - - g = clipz((y - oy) * cy + cgv * (v - 128) + cgu * (u - 128)) - - b = clipz((y - oy) * cy + cbu * (u - 128)) - - Reuse of the accumulators requires that we actually multiply - twice once with addition and the second time with a subtraction. - - Because of this we need to compute the equations in the order R B - then G saving the writes for B in the case of 24/32 bit color - formats. - - API: yuv2rgb_kind (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, - int dW, uint32_t *coeffs); - - A B - --- --- - i2 = cb i3 = cr - i1 = coeff i0 = y - -Where coeffs have the following layout in memory. - -uint32_t oy, oc, zero, cy, crv, rmask, cbu, bmask, cgu, cgv; - -coeffs is a pointer to oy. - -The {rgb} masks are only utilized by the 565 packing algorithm. Note the data -replication is used to simplify the internal algorithms for the dual Mac -architecture of BlackFin. - -All routines are exported with _ff_bfin_ as a symbol prefix. - -Rough performance gain compared against -O3: - -2779809/1484290 187.28% - -which translates to ~33c/pel to ~57c/pel for the reference vs 17.5 -c/pel for the optimized implementations. Not sure why there is such a -huge variation on the reference codes on Blackfin I guess it must have -to do with the memory system. -*/ - -#define mL3 .text -#if defined(__FDPIC__) && CONFIG_SRAM -#define mL1 .l1.text -#else -#define mL1 mL3 -#endif -#define MEM mL1 - -#define DEFUN(fname,where,interface) \ - .section where; \ - .global _ff_bfin_ ## fname; \ - .type _ff_bfin_ ## fname, STT_FUNC; \ - .align 8; \ - _ff_bfin_ ## fname - -#define DEFUN_END(fname) \ - .size _ff_bfin_ ## fname, . - _ff_bfin_ ## fname - - -.text - -#define COEFF_LEN 11*4 -#define COEFF_REL_CY_OFF 4*4 - -#define ARG_OUT 20 -#define ARG_W 24 -#define ARG_COEFF 28 - -DEFUN(yuv2rgb565_line,MEM, - (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, int dW, uint32_t *coeffs)): - link 0; - [--sp] = (r7:4); - p1 = [fp+ARG_OUT]; - r3 = [fp+ARG_W]; - - i0 = r0; - i2 = r1; - i3 = r2; - - r0 = [fp+ARG_COEFF]; - i1 = r0; - b1 = i1; - l1 = COEFF_LEN; - m0 = COEFF_REL_CY_OFF; - p0 = r3; - - r0 = [i0++]; // 2Y - r1.l = w[i2++]; // 2u - r1.h = w[i3++]; // 2v - p0 = p0>>2; - - lsetup (.L0565, .L1565) lc0 = p0; - - /* - uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv - r0 -- used to load 4ys - r1 -- used to load 2us,2vs - r4 -- y3,y2 - r5 -- y1,y0 - r6 -- u1,u0 - r7 -- v1,v0 - */ - r2=[i1++]; // oy -.L0565: - /* - rrrrrrrr gggggggg bbbbbbbb - 5432109876543210 - bbbbb >>3 - gggggggg <<3 - rrrrrrrr <<8 - rrrrrggggggbbbbb - */ - (r4,r5) = byteop16m (r1:0, r3:2) || r3=[i1++]; // oc - (r7,r6) = byteop16m (r1:0, r3:2) (r); - r5 = r5 << 2 (v); // y1,y0 - r4 = r4 << 2 (v); // y3,y2 - r6 = r6 << 2 (v) || r0=[i1++]; // u1,u0, r0=zero - r7 = r7 << 2 (v) || r1=[i1++]; // v1,v0 r1=cy - /* Y' = y*cy */ - a1 = r1.h*r5.h, a0 = r1.l*r5.l || r1=[i1++]; // crv - - /* R = Y+ crv*(Cr-128) */ - r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l); - a1 -= r1.h*r7.l, a0 -= r1.l*r7.l || r5=[i1++]; // rmask - r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cbu - r2 = r2 >> 3 (v); - r3 = r2 & r5; - - /* B = Y+ cbu*(Cb-128) */ - r2.h = (a1 += r1.h*r6.l), r2.l = (a0 += r1.l*r6.l); - a1 -= r1.h*r6.l, a0 -= r1.l*r6.l || r5=[i1++]; // bmask - r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cgu - r2 = r2 << 8 (v); - r2 = r2 & r5; - r3 = r3 | r2; - - /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */ - a1 += r1.h*r6.l, a0 += r1.l*r6.l || r1=[i1++]; // cgv - r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l); - r2 = byteop3p(r3:2, r1:0)(LO) || r5=[i1++m0]; // gmask - r2 = r2 << 3 (v); - r2 = r2 & r5; - r3 = r3 | r2; - [p1++]=r3 || r1=[i1++]; // cy - - /* Y' = y*cy */ - - a1 = r1.h*r4.h, a0 = r1.l*r4.l || r1=[i1++]; // crv - - /* R = Y+ crv*(Cr-128) */ - r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h); - a1 -= r1.h*r7.h, a0 -= r1.l*r7.h || r5=[i1++]; // rmask - r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cbu - r2 = r2 >> 3 (v); - r3 = r2 & r5; - - /* B = Y+ cbu*(Cb-128) */ - r2.h = (a1 += r1.h*r6.h), r2.l = (a0 += r1.l*r6.h); - a1 -= r1.h*r6.h, a0 -= r1.l*r6.h || r5=[i1++]; // bmask - r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cgu - r2 = r2 << 8 (v); - r2 = r2 & r5; - r3 = r3 | r2; - - /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */ - a1 += r1.h*r6.h, a0 += r1.l*r6.h || r1=[i1++]; // cgv - r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h) || r5=[i1++]; // gmask - r2 = byteop3p(r3:2, r1:0)(LO) || r0 = [i0++]; // 2Y - r2 = r2 << 3 (v) || r1.l = w[i2++]; // 2u - r2 = r2 & r5; - r3 = r3 | r2; - [p1++]=r3 || r1.h = w[i3++]; // 2v -.L1565: r2=[i1++]; // oy - - l1 = 0; - - (r7:4) = [sp++]; - unlink; - rts; -DEFUN_END(yuv2rgb565_line) - -DEFUN(yuv2rgb555_line,MEM, - (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, int dW, uint32_t *coeffs)): - link 0; - [--sp] = (r7:4); - p1 = [fp+ARG_OUT]; - r3 = [fp+ARG_W]; - - i0 = r0; - i2 = r1; - i3 = r2; - - r0 = [fp+ARG_COEFF]; - i1 = r0; - b1 = i1; - l1 = COEFF_LEN; - m0 = COEFF_REL_CY_OFF; - p0 = r3; - - r0 = [i0++]; // 2Y - r1.l = w[i2++]; // 2u - r1.h = w[i3++]; // 2v - p0 = p0>>2; - - lsetup (.L0555, .L1555) lc0 = p0; - - /* - uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv - r0 -- used to load 4ys - r1 -- used to load 2us,2vs - r4 -- y3,y2 - r5 -- y1,y0 - r6 -- u1,u0 - r7 -- v1,v0 - */ - r2=[i1++]; // oy -.L0555: - /* - rrrrrrrr gggggggg bbbbbbbb - 5432109876543210 - bbbbb >>3 - gggggggg <<2 - rrrrrrrr <<7 - xrrrrrgggggbbbbb - */ - - (r4,r5) = byteop16m (r1:0, r3:2) || r3=[i1++]; // oc - (r7,r6) = byteop16m (r1:0, r3:2) (r); - r5 = r5 << 2 (v); // y1,y0 - r4 = r4 << 2 (v); // y3,y2 - r6 = r6 << 2 (v) || r0=[i1++]; // u1,u0, r0=zero - r7 = r7 << 2 (v) || r1=[i1++]; // v1,v0 r1=cy - /* Y' = y*cy */ - a1 = r1.h*r5.h, a0 = r1.l*r5.l || r1=[i1++]; // crv - - /* R = Y+ crv*(Cr-128) */ - r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l); - a1 -= r1.h*r7.l, a0 -= r1.l*r7.l || r5=[i1++]; // rmask - r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cbu - r2 = r2 >> 3 (v); - r3 = r2 & r5; - - /* B = Y+ cbu*(Cb-128) */ - r2.h = (a1 += r1.h*r6.l), r2.l = (a0 += r1.l*r6.l); - a1 -= r1.h*r6.l, a0 -= r1.l*r6.l || r5=[i1++]; // bmask - r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cgu - r2 = r2 << 7 (v); - r2 = r2 & r5; - r3 = r3 | r2; - - /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */ - a1 += r1.h*r6.l, a0 += r1.l*r6.l || r1=[i1++]; // cgv - r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l); - r2 = byteop3p(r3:2, r1:0)(LO) || r5=[i1++m0]; // gmask - r2 = r2 << 2 (v); - r2 = r2 & r5; - r3 = r3 | r2; - [p1++]=r3 || r1=[i1++]; // cy - - /* Y' = y*cy */ - - a1 = r1.h*r4.h, a0 = r1.l*r4.l || r1=[i1++]; // crv - - /* R = Y+ crv*(Cr-128) */ - r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h); - a1 -= r1.h*r7.h, a0 -= r1.l*r7.h || r5=[i1++]; // rmask - r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cbu - r2 = r2 >> 3 (v); - r3 = r2 & r5; - - /* B = Y+ cbu*(Cb-128) */ - r2.h = (a1 += r1.h*r6.h), r2.l = (a0 += r1.l*r6.h); - a1 -= r1.h*r6.h, a0 -= r1.l*r6.h || r5=[i1++]; // bmask - r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cgu - r2 = r2 << 7 (v); - r2 = r2 & r5; - r3 = r3 | r2; - - /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */ - a1 += r1.h*r6.h, a0 += r1.l*r6.h || r1=[i1++]; // cgv - r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h) || r5=[i1++]; // gmask - r2 = byteop3p(r3:2, r1:0)(LO) || r0=[i0++]; // 4Y - r2 = r2 << 2 (v) || r1.l=w[i2++]; // 2u - r2 = r2 & r5; - r3 = r3 | r2; - [p1++]=r3 || r1.h=w[i3++]; // 2v - -.L1555: r2=[i1++]; // oy - - l1 = 0; - - (r7:4) = [sp++]; - unlink; - rts; -DEFUN_END(yuv2rgb555_line) - -DEFUN(yuv2rgb24_line,MEM, - (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, int dW, uint32_t *coeffs)): - link 0; - [--sp] = (r7:4); - p1 = [fp+ARG_OUT]; - r3 = [fp+ARG_W]; - p2 = p1; - p2 += 3; - - i0 = r0; - i2 = r1; - i3 = r2; - - r0 = [fp+ARG_COEFF]; // coeff buffer - i1 = r0; - b1 = i1; - l1 = COEFF_LEN; - m0 = COEFF_REL_CY_OFF; - p0 = r3; - - r0 = [i0++]; // 2Y - r1.l = w[i2++]; // 2u - r1.h = w[i3++]; // 2v - p0 = p0>>2; - - lsetup (.L0888, .L1888) lc0 = p0; - - /* - uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv - r0 -- used to load 4ys - r1 -- used to load 2us,2vs - r4 -- y3,y2 - r5 -- y1,y0 - r6 -- u1,u0 - r7 -- v1,v0 - */ - r2=[i1++]; // oy -.L0888: - (r4,r5) = byteop16m (r1:0, r3:2) || r3=[i1++]; // oc - (r7,r6) = byteop16m (r1:0, r3:2) (r); - r5 = r5 << 2 (v); // y1,y0 - r4 = r4 << 2 (v); // y3,y2 - r6 = r6 << 2 (v) || r0=[i1++]; // u1,u0, r0=zero - r7 = r7 << 2 (v) || r1=[i1++]; // v1,v0 r1=cy - - /* Y' = y*cy */ - a1 = r1.h*r5.h, a0 = r1.l*r5.l || r1=[i1++]; // crv - - /* R = Y+ crv*(Cr-128) */ - r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l); - a1 -= r1.h*r7.l, a0 -= r1.l*r7.l || r5=[i1++]; // rmask - r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cbu - r2=r2>>16 || B[p1++]=r2; - B[p2++]=r2; - - /* B = Y+ cbu*(Cb-128) */ - r2.h = (a1 += r1.h*r6.l), r2.l = (a0 += r1.l*r6.l); - a1 -= r1.h*r6.l, a0 -= r1.l*r6.l || r5=[i1++]; // bmask - r3 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cgu - - /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */ - a1 += r1.h*r6.l, a0 += r1.l*r6.l || r1=[i1++]; // cgv - r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l); - r2 = byteop3p(r3:2, r1:0)(LO) || r5=[i1++m0]; // gmask, oy,cy,zero - - r2=r2>>16 || B[p1++]=r2; - B[p2++]=r2; - - r3=r3>>16 || B[p1++]=r3; - B[p2++]=r3 || r1=[i1++]; // cy - - p1+=3; - p2+=3; - /* Y' = y*cy */ - a1 = r1.h*r4.h, a0 = r1.l*r4.l || r1=[i1++]; // crv - - /* R = Y+ crv*(Cr-128) */ - r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h); - a1 -= r1.h*r7.h, a0 -= r1.l*r7.h || r5=[i1++]; // rmask - r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cbu - r2=r2>>16 || B[p1++]=r2; - B[p2++]=r2; - - /* B = Y+ cbu*(Cb-128) */ - r2.h = (a1 += r1.h*r6.h), r2.l = (a0 += r1.l*r6.h); - a1 -= r1.h*r6.h, a0 -= r1.l*r6.h || r5=[i1++]; // bmask - r3 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cgu - - /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */ - a1 += r1.h*r6.h, a0 += r1.l*r6.h || r1=[i1++]; // cgv - r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h); - r2 = byteop3p(r3:2, r1:0)(LO) || r5=[i1++]; // gmask - r2=r2>>16 || B[p1++]=r2 || r0 = [i0++]; // 4y - B[p2++]=r2 || r1.l = w[i2++]; // 2u - r3=r3>>16 || B[p1++]=r3 || r1.h = w[i3++]; // 2v - B[p2++]=r3 || r2=[i1++]; // oy - - p1+=3; -.L1888: p2+=3; - - l1 = 0; - - (r7:4) = [sp++]; - unlink; - rts; -DEFUN_END(yuv2rgb24_line) - - - -#define ARG_vdst 20 -#define ARG_width 24 -#define ARG_height 28 -#define ARG_lumStride 32 -#define ARG_chromStride 36 -#define ARG_srcStride 40 - -DEFUN(uyvytoyv12, mL3, (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - int width, int height, - int lumStride, int chromStride, int srcStride)): - link 0; - [--sp] = (r7:4,p5:4); - - p0 = r1; // Y top even - - i2 = r2; // *u - r2 = [fp + ARG_vdst]; - i3 = r2; // *v - - r1 = [fp + ARG_srcStride]; - r2 = r0 + r1; - i0 = r0; // uyvy_T even - i1 = r2; // uyvy_B odd - - p2 = [fp + ARG_lumStride]; - p1 = p0 + p2; // Y bot odd - - p5 = [fp + ARG_width]; - p4 = [fp + ARG_height]; - r0 = p5; - p4 = p4 >> 1; - p5 = p5 >> 2; - - r2 = r0 << 1; - r1 = r1 << 1; - r1 = r1 - r2; // srcStride + (srcStride - 2*width) - r1 += -8; // i0,i1 is pre read need to correct - m0 = r1; - - r2 = [fp + ARG_chromStride]; - r0 = r0 >> 1; - r2 = r2 - r0; - m1 = r2; - - /* I0,I1 - src input line pointers - * p0,p1 - luma output line pointers - * I2 - dstU - * I3 - dstV - */ - - lsetup (0f, 1f) lc1 = p4; // H/2 -0: r0 = [i0++] || r2 = [i1++]; - r1 = [i0++] || r3 = [i1++]; - r4 = byteop1p(r1:0, r3:2); - r5 = byteop1p(r1:0, r3:2) (r); - lsetup (2f, 3f) lc0 = p5; // W/4 -2: r0 = r0 >> 8(v); - r1 = r1 >> 8(v); - r2 = r2 >> 8(v); - r3 = r3 >> 8(v); - r0 = bytepack(r0, r1); - r2 = bytepack(r2, r3) || [p0++] = r0; // yyyy - r6 = pack(r5.l, r4.l) || [p1++] = r2; // yyyy - r7 = pack(r5.h, r4.h) || r0 = [i0++] || r2 = [i1++]; - r6 = bytepack(r6, r7) || r1 = [i0++] || r3 = [i1++]; - r4 = byteop1p(r1:0, r3:2) || w[i2++] = r6.l; // uu -3: r5 = byteop1p(r1:0, r3:2) (r) || w[i3++] = r6.h; // vv - - i0 += m0; - i1 += m0; - i2 += m1; - i3 += m1; - p0 = p0 + p2; -1: p1 = p1 + p2; - - (r7:4,p5:4) = [sp++]; - unlink; - rts; -DEFUN_END(uyvytoyv12) - -DEFUN(yuyvtoyv12, mL3, (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - int width, int height, - int lumStride, int chromStride, int srcStride)): - link 0; - [--sp] = (r7:4,p5:4); - - p0 = r1; // Y top even - - i2 = r2; // *u - r2 = [fp + ARG_vdst]; - i3 = r2; // *v - - r1 = [fp + ARG_srcStride]; - r2 = r0 + r1; - - i0 = r0; // uyvy_T even - i1 = r2; // uyvy_B odd - - p2 = [fp + ARG_lumStride]; - p1 = p0 + p2; // Y bot odd - - p5 = [fp + ARG_width]; - p4 = [fp + ARG_height]; - r0 = p5; - p4 = p4 >> 1; - p5 = p5 >> 2; - - r2 = r0 << 1; - r1 = r1 << 1; - r1 = r1 - r2; // srcStride + (srcStride - 2*width) - r1 += -8; // i0,i1 is pre read need to correct - m0 = r1; - - r2 = [fp + ARG_chromStride]; - r0 = r0 >> 1; - r2 = r2 - r0; - m1 = r2; - - /* I0,I1 - src input line pointers - * p0,p1 - luma output line pointers - * I2 - dstU - * I3 - dstV - */ - - lsetup (0f, 1f) lc1 = p4; // H/2 -0: r0 = [i0++] || r2 = [i1++]; - r1 = [i0++] || r3 = [i1++]; - r4 = bytepack(r0, r1); - r5 = bytepack(r2, r3); - lsetup (2f, 3f) lc0 = p5; // W/4 -2: r0 = r0 >> 8(v) || [p0++] = r4; // yyyy-even - r1 = r1 >> 8(v) || [p1++] = r5; // yyyy-odd - r2 = r2 >> 8(v); - r3 = r3 >> 8(v); - r4 = byteop1p(r1:0, r3:2); - r5 = byteop1p(r1:0, r3:2) (r); - r6 = pack(r5.l, r4.l); - r7 = pack(r5.h, r4.h) || r0 = [i0++] || r2 = [i1++]; - r6 = bytepack(r6, r7) || r1 = [i0++] || r3 = [i1++]; - r4 = bytepack(r0, r1) || w[i2++] = r6.l; // uu -3: r5 = bytepack(r2, r3) || w[i3++] = r6.h; // vv - - i0 += m0; - i1 += m0; - i2 += m1; - i3 += m1; - p0 = p0 + p2; -1: p1 = p1 + p2; - - (r7:4,p5:4) = [sp++]; - unlink; - rts; -DEFUN_END(yuyvtoyv12) diff --git a/ffmpeg/libswscale/bfin/swscale_bfin.c b/ffmpeg/libswscale/bfin/swscale_bfin.c deleted file mode 100644 index 33c3ec5..0000000 --- a/ffmpeg/libswscale/bfin/swscale_bfin.c +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com> - * - * Blackfin software video scaler operations - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <stdint.h> - -#include "config.h" -#include "libavutil/attributes.h" -#include "libswscale/swscale_internal.h" - -#if defined (__FDPIC__) && CONFIG_SRAM -#define L1CODE __attribute__((l1_text)) -#else -#define L1CODE -#endif - -int ff_bfin_uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, - uint8_t *vdst, int width, int height, - int lumStride, int chromStride, int srcStride) L1CODE; - -int ff_bfin_yuyvtoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, - uint8_t *vdst, int width, int height, - int lumStride, int chromStride, int srcStride) L1CODE; - -static int uyvytoyv12_unscaled(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - uint8_t *dsty = dst[0] + dstStride[0] * srcSliceY; - uint8_t *dstu = dst[1] + dstStride[1] * srcSliceY / 2; - uint8_t *dstv = dst[2] + dstStride[2] * srcSliceY / 2; - const uint8_t *ip = src[0] + srcStride[0] * srcSliceY; - int w = dstStride[0]; - - ff_bfin_uyvytoyv12(ip, dsty, dstu, dstv, w, srcSliceH, - dstStride[0], dstStride[1], srcStride[0]); - - return srcSliceH; -} - -static int yuyvtoyv12_unscaled(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - uint8_t *dsty = dst[0] + dstStride[0] * srcSliceY; - uint8_t *dstu = dst[1] + dstStride[1] * srcSliceY / 2; - uint8_t *dstv = dst[2] + dstStride[2] * srcSliceY / 2; - const uint8_t *ip = src[0] + srcStride[0] * srcSliceY; - int w = dstStride[0]; - - ff_bfin_yuyvtoyv12(ip, dsty, dstu, dstv, w, srcSliceH, - dstStride[0], dstStride[1], srcStride[0]); - - return srcSliceH; -} - -av_cold void ff_get_unscaled_swscale_bfin(SwsContext *c) -{ - if (c->dstFormat == AV_PIX_FMT_YUV420P && c->srcFormat == AV_PIX_FMT_UYVY422) { - av_log(NULL, AV_LOG_VERBOSE, - "selecting Blackfin optimized uyvytoyv12_unscaled\n"); - c->swscale = uyvytoyv12_unscaled; - } - if (c->dstFormat == AV_PIX_FMT_YUV420P && c->srcFormat == AV_PIX_FMT_YUYV422) { - av_log(NULL, AV_LOG_VERBOSE, - "selecting Blackfin optimized yuyvtoyv12_unscaled\n"); - c->swscale = yuyvtoyv12_unscaled; - } -} diff --git a/ffmpeg/libswscale/bfin/yuv2rgb_bfin.c b/ffmpeg/libswscale/bfin/yuv2rgb_bfin.c deleted file mode 100644 index 2a36ad5..0000000 --- a/ffmpeg/libswscale/bfin/yuv2rgb_bfin.c +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com> - * - * Blackfin video color space converter operations - * convert I420 YV12 to RGB in various formats - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/pixdesc.h" -#include <stdint.h> - -#include "config.h" -#include "libavutil/attributes.h" -#include "libswscale/swscale_internal.h" - -#if defined(__FDPIC__) && CONFIG_SRAM -#define L1CODE __attribute__((l1_text)) -#else -#define L1CODE -#endif - -void ff_bfin_yuv2rgb555_line(const uint8_t *Y, const uint8_t *U, - const uint8_t *V, uint8_t *out, - int w, uint32_t *coeffs) L1CODE; - -void ff_bfin_yuv2rgb565_line(const uint8_t *Y, const uint8_t *U, - const uint8_t *V, uint8_t *out, - int w, uint32_t *coeffs) L1CODE; - -void ff_bfin_yuv2rgb24_line(const uint8_t *Y, const uint8_t *U, - const uint8_t *V, uint8_t *out, - int w, uint32_t *coeffs) L1CODE; - -typedef void (*ltransform)(const uint8_t *Y, const uint8_t *U, const uint8_t *V, - uint8_t *out, int w, uint32_t *coeffs); - -static void bfin_prepare_coefficients(SwsContext *c, int rgb, int masks) -{ - int oy; - oy = c->yOffset & 0xffff; - oy = oy >> 3; // keep everything U8.0 for offset calculation - - c->oc = 128 * 0x01010101U; - c->oy = oy * 0x01010101U; - - /* copy 64bit vector coeffs down to 32bit vector coeffs */ - c->cy = c->yCoeff; - c->zero = 0; - - if (rgb) { - c->crv = c->vrCoeff; - c->cbu = c->ubCoeff; - c->cgu = c->ugCoeff; - c->cgv = c->vgCoeff; - } else { - c->crv = c->ubCoeff; - c->cbu = c->vrCoeff; - c->cgu = c->vgCoeff; - c->cgv = c->ugCoeff; - } - - if (masks == 555) { - c->rmask = 0x001f * 0x00010001U; - c->gmask = 0x03e0 * 0x00010001U; - c->bmask = 0x7c00 * 0x00010001U; - } else if (masks == 565) { - c->rmask = 0x001f * 0x00010001U; - c->gmask = 0x07e0 * 0x00010001U; - c->bmask = 0xf800 * 0x00010001U; - } -} - -static int core_yuv420_rgb(SwsContext *c, const uint8_t **in, int *instrides, - int srcSliceY, int srcSliceH, uint8_t **oplanes, - int *outstrides, ltransform lcscf, - int rgb, int masks) -{ - const uint8_t *py, *pu, *pv; - uint8_t *op; - int w = instrides[0]; - int h2 = srcSliceH >> 1; - int i; - - bfin_prepare_coefficients(c, rgb, masks); - - py = in[0]; - pu = in[1 + (1 ^ rgb)]; - pv = in[1 + (0 ^ rgb)]; - - op = oplanes[0] + srcSliceY * outstrides[0]; - - for (i = 0; i < h2; i++) { - lcscf(py, pu, pv, op, w, &c->oy); - - py += instrides[0]; - op += outstrides[0]; - - lcscf(py, pu, pv, op, w, &c->oy); - - py += instrides[0]; - pu += instrides[1]; - pv += instrides[2]; - op += outstrides[0]; - } - - return srcSliceH; -} - -static int bfin_yuv420_rgb555(SwsContext *c, const uint8_t **in, int *instrides, - int srcSliceY, int srcSliceH, - uint8_t **oplanes, int *outstrides) -{ - return core_yuv420_rgb(c, in, instrides, srcSliceY, srcSliceH, oplanes, - outstrides, ff_bfin_yuv2rgb555_line, 1, 555); -} - -static int bfin_yuv420_bgr555(SwsContext *c, const uint8_t **in, int *instrides, - int srcSliceY, int srcSliceH, - uint8_t **oplanes, int *outstrides) -{ - return core_yuv420_rgb(c, in, instrides, srcSliceY, srcSliceH, oplanes, - outstrides, ff_bfin_yuv2rgb555_line, 0, 555); -} - -static int bfin_yuv420_rgb24(SwsContext *c, const uint8_t **in, int *instrides, - int srcSliceY, int srcSliceH, - uint8_t **oplanes, int *outstrides) -{ - return core_yuv420_rgb(c, in, instrides, srcSliceY, srcSliceH, oplanes, - outstrides, ff_bfin_yuv2rgb24_line, 1, 888); -} - -static int bfin_yuv420_bgr24(SwsContext *c, const uint8_t **in, int *instrides, - int srcSliceY, int srcSliceH, - uint8_t **oplanes, int *outstrides) -{ - return core_yuv420_rgb(c, in, instrides, srcSliceY, srcSliceH, oplanes, - outstrides, ff_bfin_yuv2rgb24_line, 0, 888); -} - -static int bfin_yuv420_rgb565(SwsContext *c, const uint8_t **in, int *instrides, - int srcSliceY, int srcSliceH, - uint8_t **oplanes, int *outstrides) -{ - return core_yuv420_rgb(c, in, instrides, srcSliceY, srcSliceH, oplanes, - outstrides, ff_bfin_yuv2rgb565_line, 1, 565); -} - -static int bfin_yuv420_bgr565(SwsContext *c, const uint8_t **in, int *instrides, - int srcSliceY, int srcSliceH, - uint8_t **oplanes, int *outstrides) -{ - return core_yuv420_rgb(c, in, instrides, srcSliceY, srcSliceH, oplanes, - outstrides, ff_bfin_yuv2rgb565_line, 0, 565); -} - -av_cold SwsFunc ff_yuv2rgb_init_bfin(SwsContext *c) -{ - SwsFunc f; - - switch (c->dstFormat) { - case AV_PIX_FMT_RGB555: - f = bfin_yuv420_rgb555; - break; - case AV_PIX_FMT_BGR555: - f = bfin_yuv420_bgr555; - break; - case AV_PIX_FMT_RGB565: - f = bfin_yuv420_rgb565; - break; - case AV_PIX_FMT_BGR565: - f = bfin_yuv420_bgr565; - break; - case AV_PIX_FMT_RGB24: - f = bfin_yuv420_rgb24; - break; - case AV_PIX_FMT_BGR24: - f = bfin_yuv420_bgr24; - break; - default: - return 0; - } - - av_log(c, AV_LOG_INFO, "BlackFin accelerated color space converter %s\n", - av_get_pix_fmt_name(c->dstFormat)); - - return f; -} diff --git a/ffmpeg/libswscale/colorspace-test.c b/ffmpeg/libswscale/colorspace-test.c deleted file mode 100644 index 42a915b..0000000 --- a/ffmpeg/libswscale/colorspace-test.c +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Copyright (C) 2002 Michael Niedermayer <michaelni@gmx.at> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <stdio.h> -#include <string.h> /* for memset() */ -#include <stdlib.h> -#include <inttypes.h> - -#include "swscale.h" -#include "rgb2rgb.h" -#include "libavutil/mem.h" - -#define SIZE 1000 -#define srcByte 0x55 -#define dstByte 0xBB - -#define FUNC(s, d, n) { s, d, #n, n } - -int main(int argc, char **argv) -{ - int i, funcNum; - uint8_t *srcBuffer = av_malloc(SIZE); - uint8_t *dstBuffer = av_malloc(SIZE); - int failedNum = 0; - int passedNum = 0; - - if (!srcBuffer || !dstBuffer) - return -1; - - av_log(NULL, AV_LOG_INFO, "memory corruption test ...\n"); - sws_rgb2rgb_init(); - - for (funcNum = 0; ; funcNum++) { - struct func_info_s { - int src_bpp; - int dst_bpp; - const char *name; - void (*func)(const uint8_t *src, uint8_t *dst, int src_size); - } func_info[] = { - FUNC(2, 2, rgb12to15), - FUNC(2, 2, rgb15to16), - FUNC(2, 3, rgb15to24), - FUNC(2, 4, rgb15to32), - FUNC(2, 3, rgb16to24), - FUNC(2, 4, rgb16to32), - FUNC(3, 2, rgb24to15), - FUNC(3, 2, rgb24to16), - FUNC(3, 4, rgb24to32), - FUNC(4, 2, rgb32to15), - FUNC(4, 2, rgb32to16), - FUNC(4, 3, rgb32to24), - FUNC(2, 2, rgb16to15), - FUNC(2, 2, rgb12tobgr12), - FUNC(2, 2, rgb15tobgr15), - FUNC(2, 2, rgb15tobgr16), - FUNC(2, 3, rgb15tobgr24), - FUNC(2, 4, rgb15tobgr32), - FUNC(2, 2, rgb16tobgr15), - FUNC(2, 2, rgb16tobgr16), - FUNC(2, 3, rgb16tobgr24), - FUNC(2, 4, rgb16tobgr32), - FUNC(3, 2, rgb24tobgr15), - FUNC(3, 2, rgb24tobgr16), - FUNC(3, 3, rgb24tobgr24), - FUNC(3, 4, rgb24tobgr32), - FUNC(4, 2, rgb32tobgr15), - FUNC(4, 2, rgb32tobgr16), - FUNC(4, 3, rgb32tobgr24), - FUNC(4, 4, shuffle_bytes_2103), /* rgb32tobgr32 */ - FUNC(6, 6, rgb48tobgr48_nobswap), - FUNC(6, 6, rgb48tobgr48_bswap), - FUNC(8, 6, rgb64to48_nobswap), - FUNC(8, 6, rgb64to48_bswap), - FUNC(8, 6, rgb64tobgr48_nobswap), - FUNC(8, 6, rgb64tobgr48_bswap), - FUNC(0, 0, NULL) - }; - int width; - int failed = 0; - int srcBpp = 0; - int dstBpp = 0; - - if (!func_info[funcNum].func) - break; - - av_log(NULL, AV_LOG_INFO, "."); - memset(srcBuffer, srcByte, SIZE); - - for (width = 63; width > 0; width--) { - int dstOffset; - for (dstOffset = 128; dstOffset < 196; dstOffset += 4) { - int srcOffset; - memset(dstBuffer, dstByte, SIZE); - - for (srcOffset = 128; srcOffset < 196; srcOffset += 4) { - uint8_t *src = srcBuffer + srcOffset; - uint8_t *dst = dstBuffer + dstOffset; - const char *name = NULL; - - // don't fill the screen with shit ... - if (failed) - break; - - srcBpp = func_info[funcNum].src_bpp; - dstBpp = func_info[funcNum].dst_bpp; - name = func_info[funcNum].name; - - func_info[funcNum].func(src, dst, width * srcBpp); - - if (!srcBpp) - break; - - for (i = 0; i < SIZE; i++) { - if (srcBuffer[i] != srcByte) { - av_log(NULL, AV_LOG_INFO, - "src damaged at %d w:%d src:%d dst:%d %s\n", - i, width, srcOffset, dstOffset, name); - failed = 1; - break; - } - } - for (i = 0; i < dstOffset; i++) { - if (dstBuffer[i] != dstByte) { - av_log(NULL, AV_LOG_INFO, - "dst damaged at %d w:%d src:%d dst:%d %s\n", - i, width, srcOffset, dstOffset, name); - failed = 1; - break; - } - } - for (i = dstOffset + width * dstBpp; i < SIZE; i++) { - if (dstBuffer[i] != dstByte) { - av_log(NULL, AV_LOG_INFO, - "dst damaged at %d w:%d src:%d dst:%d %s\n", - i, width, srcOffset, dstOffset, name); - failed = 1; - break; - } - } - } - } - } - if (failed) - failedNum++; - else if (srcBpp) - passedNum++; - } - - av_log(NULL, AV_LOG_INFO, - "\n%d converters passed, %d converters randomly overwrote memory\n", - passedNum, failedNum); - return failedNum; -} diff --git a/ffmpeg/libswscale/input.c b/ffmpeg/libswscale/input.c deleted file mode 100644 index 919b232..0000000 --- a/ffmpeg/libswscale/input.c +++ /dev/null @@ -1,1307 +0,0 @@ -/* - * Copyright (C) 2001-2012 Michael Niedermayer <michaelni@gmx.at> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <math.h> -#include <stdint.h> -#include <stdio.h> -#include <string.h> - -#include "libavutil/avutil.h" -#include "libavutil/bswap.h" -#include "libavutil/cpu.h" -#include "libavutil/intreadwrite.h" -#include "libavutil/mathematics.h" -#include "libavutil/pixdesc.h" -#include "libavutil/avassert.h" -#include "config.h" -#include "rgb2rgb.h" -#include "swscale.h" -#include "swscale_internal.h" - -#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) - -#define r ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE) ? b_r : r_b) -#define b ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE) ? r_b : b_r) - -static av_always_inline void -rgb64ToY_c_template(uint16_t *dst, const uint16_t *src, int width, - enum AVPixelFormat origin, int32_t *rgb2yuv) -{ - int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; - int i; - for (i = 0; i < width; i++) { - unsigned int r_b = input_pixel(&src[i*4+0]); - unsigned int g = input_pixel(&src[i*4+1]); - unsigned int b_r = input_pixel(&src[i*4+2]); - - dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; - } -} - -static av_always_inline void -rgb64ToUV_c_template(uint16_t *dstU, uint16_t *dstV, - const uint16_t *src1, const uint16_t *src2, - int width, enum AVPixelFormat origin, int32_t *rgb2yuv) -{ - int i; - int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; - int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; - av_assert1(src1==src2); - for (i = 0; i < width; i++) { - int r_b = input_pixel(&src1[i*4+0]); - int g = input_pixel(&src1[i*4+1]); - int b_r = input_pixel(&src1[i*4+2]); - - dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; - dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; - } -} - -static av_always_inline void -rgb64ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV, - const uint16_t *src1, const uint16_t *src2, - int width, enum AVPixelFormat origin, int32_t *rgb2yuv) -{ - int i; - int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; - int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; - av_assert1(src1==src2); - for (i = 0; i < width; i++) { - int r_b = (input_pixel(&src1[8 * i + 0]) + input_pixel(&src1[8 * i + 4]) + 1) >> 1; - int g = (input_pixel(&src1[8 * i + 1]) + input_pixel(&src1[8 * i + 5]) + 1) >> 1; - int b_r = (input_pixel(&src1[8 * i + 2]) + input_pixel(&src1[8 * i + 6]) + 1) >> 1; - - dstU[i]= (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; - dstV[i]= (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; - } -} - -#define rgb64funcs(pattern, BE_LE, origin) \ -static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\ - int width, uint32_t *rgb2yuv) \ -{ \ - const uint16_t *src = (const uint16_t *) _src; \ - uint16_t *dst = (uint16_t *) _dst; \ - rgb64ToY_c_template(dst, src, width, origin, rgb2yuv); \ -} \ - \ -static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \ - const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \ - int width, uint32_t *rgb2yuv) \ -{ \ - const uint16_t *src1 = (const uint16_t *) _src1, \ - *src2 = (const uint16_t *) _src2; \ - uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \ - rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ -} \ - \ -static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \ - const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \ - int width, uint32_t *rgb2yuv) \ -{ \ - const uint16_t *src1 = (const uint16_t *) _src1, \ - *src2 = (const uint16_t *) _src2; \ - uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \ - rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ -} - -rgb64funcs(rgb, LE, AV_PIX_FMT_RGBA64LE) -rgb64funcs(rgb, BE, AV_PIX_FMT_RGBA64BE) - -static av_always_inline void rgb48ToY_c_template(uint16_t *dst, - const uint16_t *src, int width, - enum AVPixelFormat origin, - int32_t *rgb2yuv) -{ - int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; - int i; - for (i = 0; i < width; i++) { - unsigned int r_b = input_pixel(&src[i * 3 + 0]); - unsigned int g = input_pixel(&src[i * 3 + 1]); - unsigned int b_r = input_pixel(&src[i * 3 + 2]); - - dst[i] = (ry*r + gy*g + by*b + (0x2001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; - } -} - -static av_always_inline void rgb48ToUV_c_template(uint16_t *dstU, - uint16_t *dstV, - const uint16_t *src1, - const uint16_t *src2, - int width, - enum AVPixelFormat origin, - int32_t *rgb2yuv) -{ - int i; - int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; - int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; - av_assert1(src1 == src2); - for (i = 0; i < width; i++) { - int r_b = input_pixel(&src1[i * 3 + 0]); - int g = input_pixel(&src1[i * 3 + 1]); - int b_r = input_pixel(&src1[i * 3 + 2]); - - dstU[i] = (ru*r + gu*g + bu*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; - dstV[i] = (rv*r + gv*g + bv*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; - } -} - -static av_always_inline void rgb48ToUV_half_c_template(uint16_t *dstU, - uint16_t *dstV, - const uint16_t *src1, - const uint16_t *src2, - int width, - enum AVPixelFormat origin, - int32_t *rgb2yuv) -{ - int i; - int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; - int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; - av_assert1(src1 == src2); - for (i = 0; i < width; i++) { - int r_b = (input_pixel(&src1[6 * i + 0]) + - input_pixel(&src1[6 * i + 3]) + 1) >> 1; - int g = (input_pixel(&src1[6 * i + 1]) + - input_pixel(&src1[6 * i + 4]) + 1) >> 1; - int b_r = (input_pixel(&src1[6 * i + 2]) + - input_pixel(&src1[6 * i + 5]) + 1) >> 1; - - dstU[i] = (ru*r + gu*g + bu*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; - dstV[i] = (rv*r + gv*g + bv*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; - } -} - -#undef r -#undef b -#undef input_pixel - -#define rgb48funcs(pattern, BE_LE, origin) \ -static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, \ - const uint8_t *_src, \ - const uint8_t *unused0, const uint8_t *unused1,\ - int width, \ - uint32_t *rgb2yuv) \ -{ \ - const uint16_t *src = (const uint16_t *)_src; \ - uint16_t *dst = (uint16_t *)_dst; \ - rgb48ToY_c_template(dst, src, width, origin, rgb2yuv); \ -} \ - \ -static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, \ - uint8_t *_dstV, \ - const uint8_t *unused0, \ - const uint8_t *_src1, \ - const uint8_t *_src2, \ - int width, \ - uint32_t *rgb2yuv) \ -{ \ - const uint16_t *src1 = (const uint16_t *)_src1, \ - *src2 = (const uint16_t *)_src2; \ - uint16_t *dstU = (uint16_t *)_dstU, \ - *dstV = (uint16_t *)_dstV; \ - rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ -} \ - \ -static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, \ - uint8_t *_dstV, \ - const uint8_t *unused0, \ - const uint8_t *_src1, \ - const uint8_t *_src2, \ - int width, \ - uint32_t *rgb2yuv) \ -{ \ - const uint16_t *src1 = (const uint16_t *)_src1, \ - *src2 = (const uint16_t *)_src2; \ - uint16_t *dstU = (uint16_t *)_dstU, \ - *dstV = (uint16_t *)_dstV; \ - rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ -} - -rgb48funcs(rgb, LE, AV_PIX_FMT_RGB48LE) -rgb48funcs(rgb, BE, AV_PIX_FMT_RGB48BE) -rgb48funcs(bgr, LE, AV_PIX_FMT_BGR48LE) -rgb48funcs(bgr, BE, AV_PIX_FMT_BGR48BE) - -#define input_pixel(i) ((origin == AV_PIX_FMT_RGBA || \ - origin == AV_PIX_FMT_BGRA || \ - origin == AV_PIX_FMT_ARGB || \ - origin == AV_PIX_FMT_ABGR) \ - ? AV_RN32A(&src[(i) * 4]) \ - : (isBE(origin) ? AV_RB16(&src[(i) * 2]) \ - : AV_RL16(&src[(i) * 2]))) - -static av_always_inline void rgb16_32ToY_c_template(int16_t *dst, - const uint8_t *src, - int width, - enum AVPixelFormat origin, - int shr, int shg, - int shb, int shp, - int maskr, int maskg, - int maskb, int rsh, - int gsh, int bsh, int S, - int32_t *rgb2yuv) -{ - const int ry = rgb2yuv[RY_IDX]<<rsh, gy = rgb2yuv[GY_IDX]<<gsh, by = rgb2yuv[BY_IDX]<<bsh; - const unsigned rnd = (32<<((S)-1)) + (1<<(S-7)); - int i; - - for (i = 0; i < width; i++) { - int px = input_pixel(i) >> shp; - int b = (px & maskb) >> shb; - int g = (px & maskg) >> shg; - int r = (px & maskr) >> shr; - - dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6); - } -} - -static av_always_inline void rgb16_32ToUV_c_template(int16_t *dstU, - int16_t *dstV, - const uint8_t *src, - int width, - enum AVPixelFormat origin, - int shr, int shg, - int shb, int shp, - int maskr, int maskg, - int maskb, int rsh, - int gsh, int bsh, int S, - int32_t *rgb2yuv) -{ - const int ru = rgb2yuv[RU_IDX] << rsh, gu = rgb2yuv[GU_IDX] << gsh, bu = rgb2yuv[BU_IDX] << bsh, - rv = rgb2yuv[RV_IDX] << rsh, gv = rgb2yuv[GV_IDX] << gsh, bv = rgb2yuv[BV_IDX] << bsh; - const unsigned rnd = (256u<<((S)-1)) + (1<<(S-7)); - int i; - - for (i = 0; i < width; i++) { - int px = input_pixel(i) >> shp; - int b = (px & maskb) >> shb; - int g = (px & maskg) >> shg; - int r = (px & maskr) >> shr; - - dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6); - dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6); - } -} - -static av_always_inline void rgb16_32ToUV_half_c_template(int16_t *dstU, - int16_t *dstV, - const uint8_t *src, - int width, - enum AVPixelFormat origin, - int shr, int shg, - int shb, int shp, - int maskr, int maskg, - int maskb, int rsh, - int gsh, int bsh, int S, - int32_t *rgb2yuv) -{ - const int ru = rgb2yuv[RU_IDX] << rsh, gu = rgb2yuv[GU_IDX] << gsh, bu = rgb2yuv[BU_IDX] << bsh, - rv = rgb2yuv[RV_IDX] << rsh, gv = rgb2yuv[GV_IDX] << gsh, bv = rgb2yuv[BV_IDX] << bsh, - maskgx = ~(maskr | maskb); - const unsigned rnd = (256U<<(S)) + (1<<(S-6)); - int i; - - maskr |= maskr << 1; - maskb |= maskb << 1; - maskg |= maskg << 1; - for (i = 0; i < width; i++) { - int px0 = input_pixel(2 * i + 0) >> shp; - int px1 = input_pixel(2 * i + 1) >> shp; - int b, r, g = (px0 & maskgx) + (px1 & maskgx); - int rb = px0 + px1 - g; - - b = (rb & maskb) >> shb; - if (shp || - origin == AV_PIX_FMT_BGR565LE || origin == AV_PIX_FMT_BGR565BE || - origin == AV_PIX_FMT_RGB565LE || origin == AV_PIX_FMT_RGB565BE) { - g >>= shg; - } else { - g = (g & maskg) >> shg; - } - r = (rb & maskr) >> shr; - - dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1); - dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1); - } -} - -#undef input_pixel - -#define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \ - maskg, maskb, rsh, gsh, bsh, S) \ -static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \ - int width, uint32_t *tab) \ -{ \ - rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, shr, shg, shb, shp, \ - maskr, maskg, maskb, rsh, gsh, bsh, S, tab); \ -} \ - \ -static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ - const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \ - int width, uint32_t *tab) \ -{ \ - rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \ - shr, shg, shb, shp, \ - maskr, maskg, maskb, rsh, gsh, bsh, S, tab);\ -} \ - \ -static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \ - const uint8_t *unused0, const uint8_t *src, \ - const uint8_t *dummy, \ - int width, uint32_t *tab) \ -{ \ - rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \ - shr, shg, shb, shp, \ - maskr, maskg, maskb, \ - rsh, gsh, bsh, S, tab); \ -} - -rgb16_32_wrapper(AV_PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT + 8) -rgb16_32_wrapper(AV_PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT + 8) -rgb16_32_wrapper(AV_PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT + 8) -rgb16_32_wrapper(AV_PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT + 8) -rgb16_32_wrapper(AV_PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT + 8) -rgb16_32_wrapper(AV_PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT + 7) -rgb16_32_wrapper(AV_PIX_FMT_BGR444LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT + 4) -rgb16_32_wrapper(AV_PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT + 8) -rgb16_32_wrapper(AV_PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT + 7) -rgb16_32_wrapper(AV_PIX_FMT_RGB444LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT + 4) -rgb16_32_wrapper(AV_PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT + 8) -rgb16_32_wrapper(AV_PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT + 7) -rgb16_32_wrapper(AV_PIX_FMT_BGR444BE, bgr12be, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT + 4) -rgb16_32_wrapper(AV_PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT + 8) -rgb16_32_wrapper(AV_PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT + 7) -rgb16_32_wrapper(AV_PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT + 4) - -static void gbr24pToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, - const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc, - int width, uint32_t *rgb2yuv) -{ - uint16_t *dstU = (uint16_t *)_dstU; - uint16_t *dstV = (uint16_t *)_dstV; - int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; - int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; - - int i; - for (i = 0; i < width; i++) { - unsigned int g = gsrc[2*i] + gsrc[2*i+1]; - unsigned int b = bsrc[2*i] + bsrc[2*i+1]; - unsigned int r = rsrc[2*i] + rsrc[2*i+1]; - - dstU[i] = (ru*r + gu*g + bu*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1); - dstV[i] = (rv*r + gv*g + bv*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1); - } -} - -static void rgba64ToA_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, - const uint8_t *unused2, int width, uint32_t *unused) -{ - int16_t *dst = (int16_t *)_dst; - const uint16_t *src = (const uint16_t *)_src; - int i; - for (i = 0; i < width; i++) - dst[i] = src[4 * i + 3]; -} - -static void abgrToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) -{ - int16_t *dst = (int16_t *)_dst; - int i; - for (i=0; i<width; i++) { - dst[i]= src[4*i]<<6; - } -} - -static void rgbaToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) -{ - int16_t *dst = (int16_t *)_dst; - int i; - for (i=0; i<width; i++) { - dst[i]= src[4*i+3]<<6; - } -} - -static void palToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal) -{ - int16_t *dst = (int16_t *)_dst; - int i; - for (i=0; i<width; i++) { - int d= src[i]; - - dst[i]= (pal[d] >> 24)<<6; - } -} - -static void palToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal) -{ - int16_t *dst = (int16_t *)_dst; - int i; - for (i = 0; i < width; i++) { - int d = src[i]; - - dst[i] = (pal[d] & 0xFF)<<6; - } -} - -static void palToUV_c(uint8_t *_dstU, uint8_t *_dstV, - const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *pal) -{ - uint16_t *dstU = (uint16_t *)_dstU; - int16_t *dstV = (int16_t *)_dstV; - int i; - av_assert1(src1 == src2); - for (i = 0; i < width; i++) { - int p = pal[src1[i]]; - - dstU[i] = (uint8_t)(p>> 8)<<6; - dstV[i] = (uint8_t)(p>>16)<<6; - } -} - -static void monowhite2Y_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) -{ - int16_t *dst = (int16_t *)_dst; - int i, j; - width = (width + 7) >> 3; - for (i = 0; i < width; i++) { - int d = ~src[i]; - for (j = 0; j < 8; j++) - dst[8*i+j]= ((d>>(7-j))&1) * 16383; - } - if(width&7){ - int d= ~src[i]; - for (j = 0; j < (width&7); j++) - dst[8*i+j]= ((d>>(7-j))&1) * 16383; - } -} - -static void monoblack2Y_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) -{ - int16_t *dst = (int16_t *)_dst; - int i, j; - width = (width + 7) >> 3; - for (i = 0; i < width; i++) { - int d = src[i]; - for (j = 0; j < 8; j++) - dst[8*i+j]= ((d>>(7-j))&1) * 16383; - } - if(width&7){ - int d = src[i]; - for (j = 0; j < (width&7); j++) - dst[8*i+j] = ((d>>(7-j))&1) * 16383; - } -} - -static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, - uint32_t *unused) -{ - int i; - for (i = 0; i < width; i++) - dst[i] = src[2 * i]; -} - -static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *unused) -{ - int i; - for (i = 0; i < width; i++) { - dstU[i] = src1[4 * i + 1]; - dstV[i] = src1[4 * i + 3]; - } - av_assert1(src1 == src2); -} - -static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width, - uint32_t *unused) -{ - int i; - const uint16_t *src = (const uint16_t *)_src; - uint16_t *dst = (uint16_t *)_dst; - for (i = 0; i < width; i++) - dst[i] = av_bswap16(src[i]); -} - -static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *_src1, - const uint8_t *_src2, int width, uint32_t *unused) -{ - int i; - const uint16_t *src1 = (const uint16_t *)_src1, - *src2 = (const uint16_t *)_src2; - uint16_t *dstU = (uint16_t *)_dstU, *dstV = (uint16_t *)_dstV; - for (i = 0; i < width; i++) { - dstU[i] = av_bswap16(src1[i]); - dstV[i] = av_bswap16(src2[i]); - } -} - -/* This is almost identical to the previous, end exists only because - * yuy2ToY/UV)(dst, src + 1, ...) would have 100% unaligned accesses. */ -static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, - uint32_t *unused) -{ - int i; - for (i = 0; i < width; i++) - dst[i] = src[2 * i + 1]; -} - -static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *unused) -{ - int i; - for (i = 0; i < width; i++) { - dstU[i] = src1[4 * i + 0]; - dstV[i] = src1[4 * i + 2]; - } - av_assert1(src1 == src2); -} - -static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2, - const uint8_t *src, int width) -{ - int i; - for (i = 0; i < width; i++) { - dst1[i] = src[2 * i + 0]; - dst2[i] = src[2 * i + 1]; - } -} - -static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV, - const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *unused) -{ - nvXXtoUV_c(dstU, dstV, src1, width); -} - -static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, - const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *unused) -{ - nvXXtoUV_c(dstV, dstU, src1, width); -} - -#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) - -static void bgr24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, - int width, uint32_t *rgb2yuv) -{ - int16_t *dst = (int16_t *)_dst; - int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; - int i; - for (i = 0; i < width; i++) { - int b = src[i * 3 + 0]; - int g = src[i * 3 + 1]; - int r = src[i * 3 + 2]; - - dst[i] = ((ry*r + gy*g + by*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6)); - } -} - -static void bgr24ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *rgb2yuv) -{ - int16_t *dstU = (int16_t *)_dstU; - int16_t *dstV = (int16_t *)_dstV; - int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; - int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; - int i; - for (i = 0; i < width; i++) { - int b = src1[3 * i + 0]; - int g = src1[3 * i + 1]; - int r = src1[3 * i + 2]; - - dstU[i] = (ru*r + gu*g + bu*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); - dstV[i] = (rv*r + gv*g + bv*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); - } - av_assert1(src1 == src2); -} - -static void bgr24ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *rgb2yuv) -{ - int16_t *dstU = (int16_t *)_dstU; - int16_t *dstV = (int16_t *)_dstV; - int i; - int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; - int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; - for (i = 0; i < width; i++) { - int b = src1[6 * i + 0] + src1[6 * i + 3]; - int g = src1[6 * i + 1] + src1[6 * i + 4]; - int r = src1[6 * i + 2] + src1[6 * i + 5]; - - dstU[i] = (ru*r + gu*g + bu*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); - dstV[i] = (rv*r + gv*g + bv*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); - } - av_assert1(src1 == src2); -} - -static void rgb24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, - uint32_t *rgb2yuv) -{ - int16_t *dst = (int16_t *)_dst; - int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; - int i; - for (i = 0; i < width; i++) { - int r = src[i * 3 + 0]; - int g = src[i * 3 + 1]; - int b = src[i * 3 + 2]; - - dst[i] = ((ry*r + gy*g + by*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6)); - } -} - -static void rgb24ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *rgb2yuv) -{ - int16_t *dstU = (int16_t *)_dstU; - int16_t *dstV = (int16_t *)_dstV; - int i; - int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; - int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; - av_assert1(src1 == src2); - for (i = 0; i < width; i++) { - int r = src1[3 * i + 0]; - int g = src1[3 * i + 1]; - int b = src1[3 * i + 2]; - - dstU[i] = (ru*r + gu*g + bu*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); - dstV[i] = (rv*r + gv*g + bv*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); - } -} - -static void rgb24ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *rgb2yuv) -{ - int16_t *dstU = (int16_t *)_dstU; - int16_t *dstV = (int16_t *)_dstV; - int i; - int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; - int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; - av_assert1(src1 == src2); - for (i = 0; i < width; i++) { - int r = src1[6 * i + 0] + src1[6 * i + 3]; - int g = src1[6 * i + 1] + src1[6 * i + 4]; - int b = src1[6 * i + 2] + src1[6 * i + 5]; - - dstU[i] = (ru*r + gu*g + bu*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); - dstV[i] = (rv*r + gv*g + bv*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); - } -} - -static void planar_rgb_to_y(uint8_t *_dst, const uint8_t *src[4], int width, int32_t *rgb2yuv) -{ - uint16_t *dst = (uint16_t *)_dst; - int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; - int i; - for (i = 0; i < width; i++) { - int g = src[0][i]; - int b = src[1][i]; - int r = src[2][i]; - - dst[i] = (ry*r + gy*g + by*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); - } -} - -static void planar_rgb_to_a(uint8_t *_dst, const uint8_t *src[4], int width, int32_t *unused) -{ - uint16_t *dst = (uint16_t *)_dst; - int i; - for (i = 0; i < width; i++) - dst[i] = src[3][i] << 6; -} - -static void planar_rgb_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *src[4], int width, int32_t *rgb2yuv) -{ - uint16_t *dstU = (uint16_t *)_dstU; - uint16_t *dstV = (uint16_t *)_dstV; - int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; - int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; - int i; - for (i = 0; i < width; i++) { - int g = src[0][i]; - int b = src[1][i]; - int r = src[2][i]; - - dstU[i] = (ru*r + gu*g + bu*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); - dstV[i] = (rv*r + gv*g + bv*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); - } -} - -#define rdpx(src) \ - is_be ? AV_RB16(src) : AV_RL16(src) -static av_always_inline void planar_rgb16_to_y(uint8_t *_dst, const uint8_t *_src[4], - int width, int bpc, int is_be, int32_t *rgb2yuv) -{ - int i; - const uint16_t **src = (const uint16_t **)_src; - uint16_t *dst = (uint16_t *)_dst; - int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; - int shift = bpc < 16 ? bpc : 14; - for (i = 0; i < width; i++) { - int g = rdpx(src[0] + i); - int b = rdpx(src[1] + i); - int r = rdpx(src[2] + i); - - dst[i] = ((ry*r + gy*g + by*b + (33 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14)); - } -} - -static av_always_inline void planar_rgb16_to_uv(uint8_t *_dstU, uint8_t *_dstV, - const uint8_t *_src[4], int width, - int bpc, int is_be, int32_t *rgb2yuv) -{ - int i; - const uint16_t **src = (const uint16_t **)_src; - uint16_t *dstU = (uint16_t *)_dstU; - uint16_t *dstV = (uint16_t *)_dstV; - int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; - int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; - int shift = bpc < 16 ? bpc : 14; - for (i = 0; i < width; i++) { - int g = rdpx(src[0] + i); - int b = rdpx(src[1] + i); - int r = rdpx(src[2] + i); - - dstU[i] = (ru*r + gu*g + bu*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14); - dstV[i] = (rv*r + gv*g + bv*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14); - } -} -#undef rdpx - -#define rgb9plus_planar_funcs_endian(nbits, endian_name, endian) \ -static void planar_rgb##nbits##endian_name##_to_y(uint8_t *dst, const uint8_t *src[4], \ - int w, int32_t *rgb2yuv) \ -{ \ - planar_rgb16_to_y(dst, src, w, nbits, endian, rgb2yuv); \ -} \ -static void planar_rgb##nbits##endian_name##_to_uv(uint8_t *dstU, uint8_t *dstV, \ - const uint8_t *src[4], int w, int32_t *rgb2yuv) \ -{ \ - planar_rgb16_to_uv(dstU, dstV, src, w, nbits, endian, rgb2yuv); \ -} \ - -#define rgb9plus_planar_funcs(nbits) \ - rgb9plus_planar_funcs_endian(nbits, le, 0) \ - rgb9plus_planar_funcs_endian(nbits, be, 1) - -rgb9plus_planar_funcs(9) -rgb9plus_planar_funcs(10) -rgb9plus_planar_funcs(12) -rgb9plus_planar_funcs(14) -rgb9plus_planar_funcs(16) - -av_cold void ff_sws_init_input_funcs(SwsContext *c) -{ - enum AVPixelFormat srcFormat = c->srcFormat; - - c->chrToYV12 = NULL; - switch (srcFormat) { - case AV_PIX_FMT_YUYV422: - c->chrToYV12 = yuy2ToUV_c; - break; - case AV_PIX_FMT_UYVY422: - c->chrToYV12 = uyvyToUV_c; - break; - case AV_PIX_FMT_NV12: - c->chrToYV12 = nv12ToUV_c; - break; - case AV_PIX_FMT_NV21: - c->chrToYV12 = nv21ToUV_c; - break; - case AV_PIX_FMT_RGB8: - case AV_PIX_FMT_BGR8: - case AV_PIX_FMT_PAL8: - case AV_PIX_FMT_BGR4_BYTE: - case AV_PIX_FMT_RGB4_BYTE: - c->chrToYV12 = palToUV_c; - break; - case AV_PIX_FMT_GBRP9LE: - c->readChrPlanar = planar_rgb9le_to_uv; - break; - case AV_PIX_FMT_GBRP10LE: - c->readChrPlanar = planar_rgb10le_to_uv; - break; - case AV_PIX_FMT_GBRP12LE: - c->readChrPlanar = planar_rgb12le_to_uv; - break; - case AV_PIX_FMT_GBRP14LE: - c->readChrPlanar = planar_rgb14le_to_uv; - break; - case AV_PIX_FMT_GBRAP16LE: - case AV_PIX_FMT_GBRP16LE: - c->readChrPlanar = planar_rgb16le_to_uv; - break; - case AV_PIX_FMT_GBRP9BE: - c->readChrPlanar = planar_rgb9be_to_uv; - break; - case AV_PIX_FMT_GBRP10BE: - c->readChrPlanar = planar_rgb10be_to_uv; - break; - case AV_PIX_FMT_GBRP12BE: - c->readChrPlanar = planar_rgb12be_to_uv; - break; - case AV_PIX_FMT_GBRP14BE: - c->readChrPlanar = planar_rgb14be_to_uv; - break; - case AV_PIX_FMT_GBRAP16BE: - case AV_PIX_FMT_GBRP16BE: - c->readChrPlanar = planar_rgb16be_to_uv; - break; - case AV_PIX_FMT_GBRAP: - case AV_PIX_FMT_GBRP: - c->readChrPlanar = planar_rgb_to_uv; - break; -#if HAVE_BIGENDIAN - case AV_PIX_FMT_YUV444P9LE: - case AV_PIX_FMT_YUV422P9LE: - case AV_PIX_FMT_YUV420P9LE: - case AV_PIX_FMT_YUV422P10LE: - case AV_PIX_FMT_YUV444P10LE: - case AV_PIX_FMT_YUV420P10LE: - case AV_PIX_FMT_YUV422P12LE: - case AV_PIX_FMT_YUV444P12LE: - case AV_PIX_FMT_YUV420P12LE: - case AV_PIX_FMT_YUV422P14LE: - case AV_PIX_FMT_YUV444P14LE: - case AV_PIX_FMT_YUV420P14LE: - case AV_PIX_FMT_YUV420P16LE: - case AV_PIX_FMT_YUV422P16LE: - case AV_PIX_FMT_YUV444P16LE: - - case AV_PIX_FMT_YUVA444P9LE: - case AV_PIX_FMT_YUVA422P9LE: - case AV_PIX_FMT_YUVA420P9LE: - case AV_PIX_FMT_YUVA444P10LE: - case AV_PIX_FMT_YUVA422P10LE: - case AV_PIX_FMT_YUVA420P10LE: - case AV_PIX_FMT_YUVA420P16LE: - case AV_PIX_FMT_YUVA422P16LE: - case AV_PIX_FMT_YUVA444P16LE: - c->chrToYV12 = bswap16UV_c; - break; -#else - case AV_PIX_FMT_YUV444P9BE: - case AV_PIX_FMT_YUV422P9BE: - case AV_PIX_FMT_YUV420P9BE: - case AV_PIX_FMT_YUV444P10BE: - case AV_PIX_FMT_YUV422P10BE: - case AV_PIX_FMT_YUV420P10BE: - case AV_PIX_FMT_YUV444P12BE: - case AV_PIX_FMT_YUV422P12BE: - case AV_PIX_FMT_YUV420P12BE: - case AV_PIX_FMT_YUV444P14BE: - case AV_PIX_FMT_YUV422P14BE: - case AV_PIX_FMT_YUV420P14BE: - case AV_PIX_FMT_YUV420P16BE: - case AV_PIX_FMT_YUV422P16BE: - case AV_PIX_FMT_YUV444P16BE: - - case AV_PIX_FMT_YUVA444P9BE: - case AV_PIX_FMT_YUVA422P9BE: - case AV_PIX_FMT_YUVA420P9BE: - case AV_PIX_FMT_YUVA444P10BE: - case AV_PIX_FMT_YUVA422P10BE: - case AV_PIX_FMT_YUVA420P10BE: - case AV_PIX_FMT_YUVA420P16BE: - case AV_PIX_FMT_YUVA422P16BE: - case AV_PIX_FMT_YUVA444P16BE: - c->chrToYV12 = bswap16UV_c; - break; -#endif - } - if (c->chrSrcHSubSample) { - switch (srcFormat) { - case AV_PIX_FMT_RGBA64BE: - c->chrToYV12 = rgb64BEToUV_half_c; - break; - case AV_PIX_FMT_RGBA64LE: - c->chrToYV12 = rgb64LEToUV_half_c; - break; - case AV_PIX_FMT_RGB48BE: - c->chrToYV12 = rgb48BEToUV_half_c; - break; - case AV_PIX_FMT_RGB48LE: - c->chrToYV12 = rgb48LEToUV_half_c; - break; - case AV_PIX_FMT_BGR48BE: - c->chrToYV12 = bgr48BEToUV_half_c; - break; - case AV_PIX_FMT_BGR48LE: - c->chrToYV12 = bgr48LEToUV_half_c; - break; - case AV_PIX_FMT_RGB32: - c->chrToYV12 = bgr32ToUV_half_c; - break; - case AV_PIX_FMT_RGB32_1: - c->chrToYV12 = bgr321ToUV_half_c; - break; - case AV_PIX_FMT_BGR24: - c->chrToYV12 = bgr24ToUV_half_c; - break; - case AV_PIX_FMT_BGR565LE: - c->chrToYV12 = bgr16leToUV_half_c; - break; - case AV_PIX_FMT_BGR565BE: - c->chrToYV12 = bgr16beToUV_half_c; - break; - case AV_PIX_FMT_BGR555LE: - c->chrToYV12 = bgr15leToUV_half_c; - break; - case AV_PIX_FMT_BGR555BE: - c->chrToYV12 = bgr15beToUV_half_c; - break; - case AV_PIX_FMT_GBRAP: - case AV_PIX_FMT_GBRP: - c->chrToYV12 = gbr24pToUV_half_c; - break; - case AV_PIX_FMT_BGR444LE: - c->chrToYV12 = bgr12leToUV_half_c; - break; - case AV_PIX_FMT_BGR444BE: - c->chrToYV12 = bgr12beToUV_half_c; - break; - case AV_PIX_FMT_BGR32: - c->chrToYV12 = rgb32ToUV_half_c; - break; - case AV_PIX_FMT_BGR32_1: - c->chrToYV12 = rgb321ToUV_half_c; - break; - case AV_PIX_FMT_RGB24: - c->chrToYV12 = rgb24ToUV_half_c; - break; - case AV_PIX_FMT_RGB565LE: - c->chrToYV12 = rgb16leToUV_half_c; - break; - case AV_PIX_FMT_RGB565BE: - c->chrToYV12 = rgb16beToUV_half_c; - break; - case AV_PIX_FMT_RGB555LE: - c->chrToYV12 = rgb15leToUV_half_c; - break; - case AV_PIX_FMT_RGB555BE: - c->chrToYV12 = rgb15beToUV_half_c; - break; - case AV_PIX_FMT_RGB444LE: - c->chrToYV12 = rgb12leToUV_half_c; - break; - case AV_PIX_FMT_RGB444BE: - c->chrToYV12 = rgb12beToUV_half_c; - break; - } - } else { - switch (srcFormat) { - case AV_PIX_FMT_RGBA64BE: - c->chrToYV12 = rgb64BEToUV_c; - break; - case AV_PIX_FMT_RGBA64LE: - c->chrToYV12 = rgb64LEToUV_c; - break; - case AV_PIX_FMT_RGB48BE: - c->chrToYV12 = rgb48BEToUV_c; - break; - case AV_PIX_FMT_RGB48LE: - c->chrToYV12 = rgb48LEToUV_c; - break; - case AV_PIX_FMT_BGR48BE: - c->chrToYV12 = bgr48BEToUV_c; - break; - case AV_PIX_FMT_BGR48LE: - c->chrToYV12 = bgr48LEToUV_c; - break; - case AV_PIX_FMT_RGB32: - c->chrToYV12 = bgr32ToUV_c; - break; - case AV_PIX_FMT_RGB32_1: - c->chrToYV12 = bgr321ToUV_c; - break; - case AV_PIX_FMT_BGR24: - c->chrToYV12 = bgr24ToUV_c; - break; - case AV_PIX_FMT_BGR565LE: - c->chrToYV12 = bgr16leToUV_c; - break; - case AV_PIX_FMT_BGR565BE: - c->chrToYV12 = bgr16beToUV_c; - break; - case AV_PIX_FMT_BGR555LE: - c->chrToYV12 = bgr15leToUV_c; - break; - case AV_PIX_FMT_BGR555BE: - c->chrToYV12 = bgr15beToUV_c; - break; - case AV_PIX_FMT_BGR444LE: - c->chrToYV12 = bgr12leToUV_c; - break; - case AV_PIX_FMT_BGR444BE: - c->chrToYV12 = bgr12beToUV_c; - break; - case AV_PIX_FMT_BGR32: - c->chrToYV12 = rgb32ToUV_c; - break; - case AV_PIX_FMT_BGR32_1: - c->chrToYV12 = rgb321ToUV_c; - break; - case AV_PIX_FMT_RGB24: - c->chrToYV12 = rgb24ToUV_c; - break; - case AV_PIX_FMT_RGB565LE: - c->chrToYV12 = rgb16leToUV_c; - break; - case AV_PIX_FMT_RGB565BE: - c->chrToYV12 = rgb16beToUV_c; - break; - case AV_PIX_FMT_RGB555LE: - c->chrToYV12 = rgb15leToUV_c; - break; - case AV_PIX_FMT_RGB555BE: - c->chrToYV12 = rgb15beToUV_c; - break; - case AV_PIX_FMT_RGB444LE: - c->chrToYV12 = rgb12leToUV_c; - break; - case AV_PIX_FMT_RGB444BE: - c->chrToYV12 = rgb12beToUV_c; - break; - } - } - - c->lumToYV12 = NULL; - c->alpToYV12 = NULL; - switch (srcFormat) { - case AV_PIX_FMT_GBRP9LE: - c->readLumPlanar = planar_rgb9le_to_y; - break; - case AV_PIX_FMT_GBRP10LE: - c->readLumPlanar = planar_rgb10le_to_y; - break; - case AV_PIX_FMT_GBRP12LE: - c->readLumPlanar = planar_rgb12le_to_y; - break; - case AV_PIX_FMT_GBRP14LE: - c->readLumPlanar = planar_rgb14le_to_y; - break; - case AV_PIX_FMT_GBRAP16LE: - case AV_PIX_FMT_GBRP16LE: - c->readLumPlanar = planar_rgb16le_to_y; - break; - case AV_PIX_FMT_GBRP9BE: - c->readLumPlanar = planar_rgb9be_to_y; - break; - case AV_PIX_FMT_GBRP10BE: - c->readLumPlanar = planar_rgb10be_to_y; - break; - case AV_PIX_FMT_GBRP12BE: - c->readLumPlanar = planar_rgb12be_to_y; - break; - case AV_PIX_FMT_GBRP14BE: - c->readLumPlanar = planar_rgb14be_to_y; - break; - case AV_PIX_FMT_GBRAP16BE: - case AV_PIX_FMT_GBRP16BE: - c->readLumPlanar = planar_rgb16be_to_y; - break; - case AV_PIX_FMT_GBRAP: - c->readAlpPlanar = planar_rgb_to_a; - case AV_PIX_FMT_GBRP: - c->readLumPlanar = planar_rgb_to_y; - break; -#if HAVE_BIGENDIAN - case AV_PIX_FMT_YUV444P9LE: - case AV_PIX_FMT_YUV422P9LE: - case AV_PIX_FMT_YUV420P9LE: - case AV_PIX_FMT_YUV444P10LE: - case AV_PIX_FMT_YUV422P10LE: - case AV_PIX_FMT_YUV420P10LE: - case AV_PIX_FMT_YUV444P12LE: - case AV_PIX_FMT_YUV422P12LE: - case AV_PIX_FMT_YUV420P12LE: - case AV_PIX_FMT_YUV444P14LE: - case AV_PIX_FMT_YUV422P14LE: - case AV_PIX_FMT_YUV420P14LE: - case AV_PIX_FMT_YUV420P16LE: - case AV_PIX_FMT_YUV422P16LE: - case AV_PIX_FMT_YUV444P16LE: - - case AV_PIX_FMT_GRAY16LE: - c->lumToYV12 = bswap16Y_c; - break; - case AV_PIX_FMT_YUVA444P9LE: - case AV_PIX_FMT_YUVA422P9LE: - case AV_PIX_FMT_YUVA420P9LE: - case AV_PIX_FMT_YUVA444P10LE: - case AV_PIX_FMT_YUVA422P10LE: - case AV_PIX_FMT_YUVA420P10LE: - case AV_PIX_FMT_YUVA420P16LE: - case AV_PIX_FMT_YUVA422P16LE: - case AV_PIX_FMT_YUVA444P16LE: - c->lumToYV12 = bswap16Y_c; - c->alpToYV12 = bswap16Y_c; - break; -#else - case AV_PIX_FMT_YUV444P9BE: - case AV_PIX_FMT_YUV422P9BE: - case AV_PIX_FMT_YUV420P9BE: - case AV_PIX_FMT_YUV444P10BE: - case AV_PIX_FMT_YUV422P10BE: - case AV_PIX_FMT_YUV420P10BE: - case AV_PIX_FMT_YUV444P12BE: - case AV_PIX_FMT_YUV422P12BE: - case AV_PIX_FMT_YUV420P12BE: - case AV_PIX_FMT_YUV444P14BE: - case AV_PIX_FMT_YUV422P14BE: - case AV_PIX_FMT_YUV420P14BE: - case AV_PIX_FMT_YUV420P16BE: - case AV_PIX_FMT_YUV422P16BE: - case AV_PIX_FMT_YUV444P16BE: - - case AV_PIX_FMT_GRAY16BE: - c->lumToYV12 = bswap16Y_c; - break; - case AV_PIX_FMT_YUVA444P9BE: - case AV_PIX_FMT_YUVA422P9BE: - case AV_PIX_FMT_YUVA420P9BE: - case AV_PIX_FMT_YUVA444P10BE: - case AV_PIX_FMT_YUVA422P10BE: - case AV_PIX_FMT_YUVA420P10BE: - case AV_PIX_FMT_YUVA420P16BE: - case AV_PIX_FMT_YUVA422P16BE: - case AV_PIX_FMT_YUVA444P16BE: - c->lumToYV12 = bswap16Y_c; - c->alpToYV12 = bswap16Y_c; - break; -#endif - case AV_PIX_FMT_YUYV422: - case AV_PIX_FMT_Y400A: - c->lumToYV12 = yuy2ToY_c; - break; - case AV_PIX_FMT_UYVY422: - c->lumToYV12 = uyvyToY_c; - break; - case AV_PIX_FMT_BGR24: - c->lumToYV12 = bgr24ToY_c; - break; - case AV_PIX_FMT_BGR565LE: - c->lumToYV12 = bgr16leToY_c; - break; - case AV_PIX_FMT_BGR565BE: - c->lumToYV12 = bgr16beToY_c; - break; - case AV_PIX_FMT_BGR555LE: - c->lumToYV12 = bgr15leToY_c; - break; - case AV_PIX_FMT_BGR555BE: - c->lumToYV12 = bgr15beToY_c; - break; - case AV_PIX_FMT_BGR444LE: - c->lumToYV12 = bgr12leToY_c; - break; - case AV_PIX_FMT_BGR444BE: - c->lumToYV12 = bgr12beToY_c; - break; - case AV_PIX_FMT_RGB24: - c->lumToYV12 = rgb24ToY_c; - break; - case AV_PIX_FMT_RGB565LE: - c->lumToYV12 = rgb16leToY_c; - break; - case AV_PIX_FMT_RGB565BE: - c->lumToYV12 = rgb16beToY_c; - break; - case AV_PIX_FMT_RGB555LE: - c->lumToYV12 = rgb15leToY_c; - break; - case AV_PIX_FMT_RGB555BE: - c->lumToYV12 = rgb15beToY_c; - break; - case AV_PIX_FMT_RGB444LE: - c->lumToYV12 = rgb12leToY_c; - break; - case AV_PIX_FMT_RGB444BE: - c->lumToYV12 = rgb12beToY_c; - break; - case AV_PIX_FMT_RGB8: - case AV_PIX_FMT_BGR8: - case AV_PIX_FMT_PAL8: - case AV_PIX_FMT_BGR4_BYTE: - case AV_PIX_FMT_RGB4_BYTE: - c->lumToYV12 = palToY_c; - break; - case AV_PIX_FMT_MONOBLACK: - c->lumToYV12 = monoblack2Y_c; - break; - case AV_PIX_FMT_MONOWHITE: - c->lumToYV12 = monowhite2Y_c; - break; - case AV_PIX_FMT_RGB32: - c->lumToYV12 = bgr32ToY_c; - break; - case AV_PIX_FMT_RGB32_1: - c->lumToYV12 = bgr321ToY_c; - break; - case AV_PIX_FMT_BGR32: - c->lumToYV12 = rgb32ToY_c; - break; - case AV_PIX_FMT_BGR32_1: - c->lumToYV12 = rgb321ToY_c; - break; - case AV_PIX_FMT_RGB48BE: - c->lumToYV12 = rgb48BEToY_c; - break; - case AV_PIX_FMT_RGB48LE: - c->lumToYV12 = rgb48LEToY_c; - break; - case AV_PIX_FMT_BGR48BE: - c->lumToYV12 = bgr48BEToY_c; - break; - case AV_PIX_FMT_BGR48LE: - c->lumToYV12 = bgr48LEToY_c; - break; - case AV_PIX_FMT_RGBA64BE: - c->lumToYV12 = rgb64BEToY_c; - break; - case AV_PIX_FMT_RGBA64LE: - c->lumToYV12 = rgb64LEToY_c; - break; - } - if (c->alpPixBuf) { - if (is16BPS(srcFormat) || isNBPS(srcFormat)) { - if (HAVE_BIGENDIAN == !isBE(srcFormat)) - c->alpToYV12 = bswap16Y_c; - } - switch (srcFormat) { - case AV_PIX_FMT_RGBA64LE: - case AV_PIX_FMT_RGBA64BE: c->alpToYV12 = rgba64ToA_c; break; - case AV_PIX_FMT_BGRA: - case AV_PIX_FMT_RGBA: - c->alpToYV12 = rgbaToA_c; - break; - case AV_PIX_FMT_ABGR: - case AV_PIX_FMT_ARGB: - c->alpToYV12 = abgrToA_c; - break; - case AV_PIX_FMT_Y400A: - c->alpToYV12 = uyvyToY_c; - break; - case AV_PIX_FMT_PAL8 : - c->alpToYV12 = palToA_c; - break; - } - } -} diff --git a/ffmpeg/libswscale/libswscale.pc b/ffmpeg/libswscale/libswscale.pc deleted file mode 100644 index 63e7375..0000000 --- a/ffmpeg/libswscale/libswscale.pc +++ /dev/null @@ -1,14 +0,0 @@ -prefix=/usr/local -exec_prefix=${prefix} -libdir=${prefix}/lib -includedir=${prefix}/include - -Name: libswscale -Description: FFmpeg image rescaling library -Version: 2.5.101 -Requires: -Requires.private: libavutil = 52.59.100 -Conflicts: -Libs: -L${libdir} -lswscale -Libs.private: -lm -Cflags: -I${includedir} diff --git a/ffmpeg/libswscale/libswscale.v b/ffmpeg/libswscale/libswscale.v deleted file mode 100644 index 8b9a96a..0000000 --- a/ffmpeg/libswscale/libswscale.v +++ /dev/null @@ -1,4 +0,0 @@ -LIBSWSCALE_$MAJOR { - global: swscale_*; sws_*; - local: *; -}; diff --git a/ffmpeg/libswscale/options.c b/ffmpeg/libswscale/options.c deleted file mode 100644 index 9e8703f..0000000 --- a/ffmpeg/libswscale/options.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/avutil.h" -#include "libavutil/opt.h" -#include "libavutil/pixfmt.h" -#include "swscale.h" -#include "swscale_internal.h" - -static const char *sws_context_to_name(void *ptr) -{ - return "swscaler"; -} - -#define OFFSET(x) offsetof(SwsContext, x) -#define DEFAULT 0 -#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM - -static const AVOption swscale_options[] = { - { "sws_flags", "scaler flags", OFFSET(flags), AV_OPT_TYPE_FLAGS, { .i64 = SWS_BICUBIC }, 0, UINT_MAX, VE, "sws_flags" }, - { "fast_bilinear", "fast bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_FAST_BILINEAR }, INT_MIN, INT_MAX, VE, "sws_flags" }, - { "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_BILINEAR }, INT_MIN, INT_MAX, VE, "sws_flags" }, - { "bicubic", "bicubic", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_BICUBIC }, INT_MIN, INT_MAX, VE, "sws_flags" }, - { "experimental", "experimental", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_X }, INT_MIN, INT_MAX, VE, "sws_flags" }, - { "neighbor", "nearest neighbor", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_POINT }, INT_MIN, INT_MAX, VE, "sws_flags" }, - { "area", "averaging area", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_AREA }, INT_MIN, INT_MAX, VE, "sws_flags" }, - { "bicublin", "luma bicubic, chroma bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_BICUBLIN }, INT_MIN, INT_MAX, VE, "sws_flags" }, - { "gauss", "gaussian", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_GAUSS }, INT_MIN, INT_MAX, VE, "sws_flags" }, - { "sinc", "sinc", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_SINC }, INT_MIN, INT_MAX, VE, "sws_flags" }, - { "lanczos", "lanczos", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_LANCZOS }, INT_MIN, INT_MAX, VE, "sws_flags" }, - { "spline", "natural bicubic spline", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_SPLINE }, INT_MIN, INT_MAX, VE, "sws_flags" }, - { "print_info", "print info", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_PRINT_INFO }, INT_MIN, INT_MAX, VE, "sws_flags" }, - { "accurate_rnd", "accurate rounding", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_ACCURATE_RND }, INT_MIN, INT_MAX, VE, "sws_flags" }, - { "full_chroma_int", "full chroma interpolation", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_FULL_CHR_H_INT }, INT_MIN, INT_MAX, VE, "sws_flags" }, - { "full_chroma_inp", "full chroma input", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_FULL_CHR_H_INP }, INT_MIN, INT_MAX, VE, "sws_flags" }, - { "bitexact", "", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_BITEXACT }, INT_MIN, INT_MAX, VE, "sws_flags" }, - { "error_diffusion", "error diffusion dither", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_ERROR_DIFFUSION}, INT_MIN, INT_MAX, VE, "sws_flags" }, - - { "srcw", "source width", OFFSET(srcW), AV_OPT_TYPE_INT, { .i64 = 16 }, 1, INT_MAX, VE }, - { "srch", "source height", OFFSET(srcH), AV_OPT_TYPE_INT, { .i64 = 16 }, 1, INT_MAX, VE }, - { "dstw", "destination width", OFFSET(dstW), AV_OPT_TYPE_INT, { .i64 = 16 }, 1, INT_MAX, VE }, - { "dsth", "destination height", OFFSET(dstH), AV_OPT_TYPE_INT, { .i64 = 16 }, 1, INT_MAX, VE }, - { "src_format", "source format", OFFSET(srcFormat), AV_OPT_TYPE_INT, { .i64 = DEFAULT }, 0, AV_PIX_FMT_NB - 1, VE }, - { "dst_format", "destination format", OFFSET(dstFormat), AV_OPT_TYPE_INT, { .i64 = DEFAULT }, 0, AV_PIX_FMT_NB - 1, VE }, - { "src_range", "source range", OFFSET(srcRange), AV_OPT_TYPE_INT, { .i64 = DEFAULT }, 0, 1, VE }, - { "dst_range", "destination range", OFFSET(dstRange), AV_OPT_TYPE_INT, { .i64 = DEFAULT }, 0, 1, VE }, - { "param0", "scaler param 0", OFFSET(param[0]), AV_OPT_TYPE_DOUBLE, { .dbl = SWS_PARAM_DEFAULT }, INT_MIN, INT_MAX, VE }, - { "param1", "scaler param 1", OFFSET(param[1]), AV_OPT_TYPE_DOUBLE, { .dbl = SWS_PARAM_DEFAULT }, INT_MIN, INT_MAX, VE }, - - { "src_v_chr_pos", "source vertical chroma position in luma grid/256" , OFFSET(src_v_chr_pos), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 512, VE }, - { "src_h_chr_pos", "source horizontal chroma position in luma grid/256", OFFSET(src_h_chr_pos), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 512, VE }, - { "dst_v_chr_pos", "destination vertical chroma position in luma grid/256" , OFFSET(dst_v_chr_pos), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 512, VE }, - { "dst_h_chr_pos", "destination horizontal chroma position in luma grid/256", OFFSET(dst_h_chr_pos), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 512, VE }, - - { "sws_dither", "set dithering algorithm", OFFSET(dither), AV_OPT_TYPE_INT, { .i64 = SWS_DITHER_AUTO }, 0, NB_SWS_DITHER, VE, "sws_dither" }, - { "auto", "leave choice to sws", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_DITHER_AUTO }, INT_MIN, INT_MAX, VE, "sws_dither" }, - { "bayer", "bayer dither", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_DITHER_BAYER }, INT_MIN, INT_MAX, VE, "sws_dither" }, - { "ed", "error diffusion", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_DITHER_ED }, INT_MIN, INT_MAX, VE, "sws_dither" }, - - { NULL } -}; - -const AVClass sws_context_class = { - .class_name = "SWScaler", - .item_name = sws_context_to_name, - .option = swscale_options, - .category = AV_CLASS_CATEGORY_SWSCALER, - .version = LIBAVUTIL_VERSION_INT, -}; - -const AVClass *sws_get_class(void) -{ - return &sws_context_class; -} diff --git a/ffmpeg/libswscale/output.c b/ffmpeg/libswscale/output.c deleted file mode 100644 index ddb0d0c..0000000 --- a/ffmpeg/libswscale/output.c +++ /dev/null @@ -1,2135 +0,0 @@ -/* - * Copyright (C) 2001-2012 Michael Niedermayer <michaelni@gmx.at> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <math.h> -#include <stdint.h> -#include <stdio.h> -#include <string.h> - -#include "libavutil/attributes.h" -#include "libavutil/avutil.h" -#include "libavutil/avassert.h" -#include "libavutil/bswap.h" -#include "libavutil/cpu.h" -#include "libavutil/intreadwrite.h" -#include "libavutil/mathematics.h" -#include "libavutil/pixdesc.h" -#include "config.h" -#include "rgb2rgb.h" -#include "swscale.h" -#include "swscale_internal.h" - -DECLARE_ALIGNED(8, const uint8_t, ff_dither_2x2_4)[][8] = { -{ 1, 3, 1, 3, 1, 3, 1, 3, }, -{ 2, 0, 2, 0, 2, 0, 2, 0, }, -{ 1, 3, 1, 3, 1, 3, 1, 3, }, -}; - -DECLARE_ALIGNED(8, const uint8_t, ff_dither_2x2_8)[][8] = { -{ 6, 2, 6, 2, 6, 2, 6, 2, }, -{ 0, 4, 0, 4, 0, 4, 0, 4, }, -{ 6, 2, 6, 2, 6, 2, 6, 2, }, -}; - -DECLARE_ALIGNED(8, const uint8_t, ff_dither_4x4_16)[][8] = { -{ 8, 4, 11, 7, 8, 4, 11, 7, }, -{ 2, 14, 1, 13, 2, 14, 1, 13, }, -{ 10, 6, 9, 5, 10, 6, 9, 5, }, -{ 0, 12, 3, 15, 0, 12, 3, 15, }, -{ 8, 4, 11, 7, 8, 4, 11, 7, }, -}; - -DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_32)[][8] = { -{ 17, 9, 23, 15, 16, 8, 22, 14, }, -{ 5, 29, 3, 27, 4, 28, 2, 26, }, -{ 21, 13, 19, 11, 20, 12, 18, 10, }, -{ 0, 24, 6, 30, 1, 25, 7, 31, }, -{ 16, 8, 22, 14, 17, 9, 23, 15, }, -{ 4, 28, 2, 26, 5, 29, 3, 27, }, -{ 20, 12, 18, 10, 21, 13, 19, 11, }, -{ 1, 25, 7, 31, 0, 24, 6, 30, }, -{ 17, 9, 23, 15, 16, 8, 22, 14, }, -}; - -DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_73)[][8] = { -{ 0, 55, 14, 68, 3, 58, 17, 72, }, -{ 37, 18, 50, 32, 40, 22, 54, 35, }, -{ 9, 64, 5, 59, 13, 67, 8, 63, }, -{ 46, 27, 41, 23, 49, 31, 44, 26, }, -{ 2, 57, 16, 71, 1, 56, 15, 70, }, -{ 39, 21, 52, 34, 38, 19, 51, 33, }, -{ 11, 66, 7, 62, 10, 65, 6, 60, }, -{ 48, 30, 43, 25, 47, 29, 42, 24, }, -{ 0, 55, 14, 68, 3, 58, 17, 72, }, -}; - -#if 1 -DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = { -{117, 62, 158, 103, 113, 58, 155, 100, }, -{ 34, 199, 21, 186, 31, 196, 17, 182, }, -{144, 89, 131, 76, 141, 86, 127, 72, }, -{ 0, 165, 41, 206, 10, 175, 52, 217, }, -{110, 55, 151, 96, 120, 65, 162, 107, }, -{ 28, 193, 14, 179, 38, 203, 24, 189, }, -{138, 83, 124, 69, 148, 93, 134, 79, }, -{ 7, 172, 48, 213, 3, 168, 45, 210, }, -{117, 62, 158, 103, 113, 58, 155, 100, }, -}; -#elif 1 -// tries to correct a gamma of 1.5 -DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = { -{ 0, 143, 18, 200, 2, 156, 25, 215, }, -{ 78, 28, 125, 64, 89, 36, 138, 74, }, -{ 10, 180, 3, 161, 16, 195, 8, 175, }, -{109, 51, 93, 38, 121, 60, 105, 47, }, -{ 1, 152, 23, 210, 0, 147, 20, 205, }, -{ 85, 33, 134, 71, 81, 30, 130, 67, }, -{ 14, 190, 6, 171, 12, 185, 5, 166, }, -{117, 57, 101, 44, 113, 54, 97, 41, }, -{ 0, 143, 18, 200, 2, 156, 25, 215, }, -}; -#elif 1 -// tries to correct a gamma of 2.0 -DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = { -{ 0, 124, 8, 193, 0, 140, 12, 213, }, -{ 55, 14, 104, 42, 66, 19, 119, 52, }, -{ 3, 168, 1, 145, 6, 187, 3, 162, }, -{ 86, 31, 70, 21, 99, 39, 82, 28, }, -{ 0, 134, 11, 206, 0, 129, 9, 200, }, -{ 62, 17, 114, 48, 58, 16, 109, 45, }, -{ 5, 181, 2, 157, 4, 175, 1, 151, }, -{ 95, 36, 78, 26, 90, 34, 74, 24, }, -{ 0, 124, 8, 193, 0, 140, 12, 213, }, -}; -#else -// tries to correct a gamma of 2.5 -DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = { -{ 0, 107, 3, 187, 0, 125, 6, 212, }, -{ 39, 7, 86, 28, 49, 11, 102, 36, }, -{ 1, 158, 0, 131, 3, 180, 1, 151, }, -{ 68, 19, 52, 12, 81, 25, 64, 17, }, -{ 0, 119, 5, 203, 0, 113, 4, 195, }, -{ 45, 9, 96, 33, 42, 8, 91, 30, }, -{ 2, 172, 1, 144, 2, 165, 0, 137, }, -{ 77, 23, 60, 15, 72, 21, 56, 14, }, -{ 0, 107, 3, 187, 0, 125, 6, 212, }, -}; -#endif - -#define output_pixel(pos, val, bias, signedness) \ - if (big_endian) { \ - AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \ - } else { \ - AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \ - } - -static av_always_inline void -yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW, - int big_endian, int output_bits) -{ - int i; - int shift = 3; - av_assert0(output_bits == 16); - - for (i = 0; i < dstW; i++) { - int val = src[i] + (1 << (shift - 1)); - output_pixel(&dest[i], val, 0, uint); - } -} - -static av_always_inline void -yuv2planeX_16_c_template(const int16_t *filter, int filterSize, - const int32_t **src, uint16_t *dest, int dstW, - int big_endian, int output_bits) -{ - int i; - int shift = 15; - av_assert0(output_bits == 16); - - for (i = 0; i < dstW; i++) { - int val = 1 << (shift - 1); - int j; - - /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline - * filters (or anything with negative coeffs, the range can be slightly - * wider in both directions. To account for this overflow, we subtract - * a constant so it always fits in the signed range (assuming a - * reasonable filterSize), and re-add that at the end. */ - val -= 0x40000000; - for (j = 0; j < filterSize; j++) - val += src[j][i] * (unsigned)filter[j]; - - output_pixel(&dest[i], val, 0x8000, int); - } -} - -#undef output_pixel - -#define output_pixel(pos, val) \ - if (big_endian) { \ - AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \ - } else { \ - AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \ - } - -static av_always_inline void -yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW, - int big_endian, int output_bits) -{ - int i; - int shift = 15 - output_bits; - - for (i = 0; i < dstW; i++) { - int val = src[i] + (1 << (shift - 1)); - output_pixel(&dest[i], val); - } -} - -static av_always_inline void -yuv2planeX_10_c_template(const int16_t *filter, int filterSize, - const int16_t **src, uint16_t *dest, int dstW, - int big_endian, int output_bits) -{ - int i; - int shift = 11 + 16 - output_bits; - - for (i = 0; i < dstW; i++) { - int val = 1 << (shift - 1); - int j; - - for (j = 0; j < filterSize; j++) - val += src[j][i] * filter[j]; - - output_pixel(&dest[i], val); - } -} - -#undef output_pixel - -#define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \ -static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \ - uint8_t *dest, int dstW, \ - const uint8_t *dither, int offset)\ -{ \ - yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \ - (uint16_t *) dest, dstW, is_be, bits); \ -}\ -static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \ - const int16_t **src, uint8_t *dest, int dstW, \ - const uint8_t *dither, int offset)\ -{ \ - yuv2planeX_## template_size ## _c_template(filter, \ - filterSize, (const typeX_t **) src, \ - (uint16_t *) dest, dstW, is_be, bits); \ -} -yuv2NBPS( 9, BE, 1, 10, int16_t) -yuv2NBPS( 9, LE, 0, 10, int16_t) -yuv2NBPS(10, BE, 1, 10, int16_t) -yuv2NBPS(10, LE, 0, 10, int16_t) -yuv2NBPS(12, BE, 1, 10, int16_t) -yuv2NBPS(12, LE, 0, 10, int16_t) -yuv2NBPS(14, BE, 1, 10, int16_t) -yuv2NBPS(14, LE, 0, 10, int16_t) -yuv2NBPS(16, BE, 1, 16, int32_t) -yuv2NBPS(16, LE, 0, 16, int32_t) - -static void yuv2planeX_8_c(const int16_t *filter, int filterSize, - const int16_t **src, uint8_t *dest, int dstW, - const uint8_t *dither, int offset) -{ - int i; - for (i=0; i<dstW; i++) { - int val = dither[(i + offset) & 7] << 12; - int j; - for (j=0; j<filterSize; j++) - val += src[j][i] * filter[j]; - - dest[i]= av_clip_uint8(val>>19); - } -} - -static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW, - const uint8_t *dither, int offset) -{ - int i; - for (i=0; i<dstW; i++) { - int val = (src[i] + dither[(i + offset) & 7]) >> 7; - dest[i]= av_clip_uint8(val); - } -} - -static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize, - const int16_t **chrUSrc, const int16_t **chrVSrc, - uint8_t *dest, int chrDstW) -{ - enum AVPixelFormat dstFormat = c->dstFormat; - const uint8_t *chrDither = c->chrDither8; - int i; - - if (dstFormat == AV_PIX_FMT_NV12) - for (i=0; i<chrDstW; i++) { - int u = chrDither[i & 7] << 12; - int v = chrDither[(i + 3) & 7] << 12; - int j; - for (j=0; j<chrFilterSize; j++) { - u += chrUSrc[j][i] * chrFilter[j]; - v += chrVSrc[j][i] * chrFilter[j]; - } - - dest[2*i]= av_clip_uint8(u>>19); - dest[2*i+1]= av_clip_uint8(v>>19); - } - else - for (i=0; i<chrDstW; i++) { - int u = chrDither[i & 7] << 12; - int v = chrDither[(i + 3) & 7] << 12; - int j; - for (j=0; j<chrFilterSize; j++) { - u += chrUSrc[j][i] * chrFilter[j]; - v += chrVSrc[j][i] * chrFilter[j]; - } - - dest[2*i]= av_clip_uint8(v>>19); - dest[2*i+1]= av_clip_uint8(u>>19); - } -} - -#define accumulate_bit(acc, val) \ - acc <<= 1; \ - acc |= (val) >= (128 + 110) -#define output_pixel(pos, acc) \ - if (target == AV_PIX_FMT_MONOBLACK) { \ - pos = acc; \ - } else { \ - pos = ~acc; \ - } - -static av_always_inline void -yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, int chrFilterSize, - const int16_t **alpSrc, uint8_t *dest, int dstW, - int y, enum AVPixelFormat target) -{ - const uint8_t * const d128 = ff_dither_8x8_220[y&7]; - int i; - unsigned acc = 0; - int err = 0; - - for (i = 0; i < dstW; i += 2) { - int j; - int Y1 = 1 << 18; - int Y2 = 1 << 18; - - for (j = 0; j < lumFilterSize; j++) { - Y1 += lumSrc[j][i] * lumFilter[j]; - Y2 += lumSrc[j][i+1] * lumFilter[j]; - } - Y1 >>= 19; - Y2 >>= 19; - if ((Y1 | Y2) & 0x100) { - Y1 = av_clip_uint8(Y1); - Y2 = av_clip_uint8(Y2); - } - if (c->dither == SWS_DITHER_ED) { - Y1 += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4; - c->dither_error[0][i] = err; - acc = 2*acc + (Y1 >= 128); - Y1 -= 220*(acc&1); - - err = Y2 + ((7*Y1 + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4); - c->dither_error[0][i+1] = Y1; - acc = 2*acc + (err >= 128); - err -= 220*(acc&1); - } else { - accumulate_bit(acc, Y1 + d128[(i + 0) & 7]); - accumulate_bit(acc, Y2 + d128[(i + 1) & 7]); - } - if ((i & 7) == 6) { - output_pixel(*dest++, acc); - } - } - c->dither_error[0][i] = err; - - if (i & 6) { - output_pixel(*dest, acc); - } -} - -static av_always_inline void -yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2], - const int16_t *ubuf[2], const int16_t *vbuf[2], - const int16_t *abuf[2], uint8_t *dest, int dstW, - int yalpha, int uvalpha, int y, - enum AVPixelFormat target) -{ - const int16_t *buf0 = buf[0], *buf1 = buf[1]; - const uint8_t * const d128 = ff_dither_8x8_220[y & 7]; - int yalpha1 = 4096 - yalpha; - int i; - - if (c->dither == SWS_DITHER_ED) { - int err = 0; - int acc = 0; - for (i = 0; i < dstW; i +=2) { - int Y; - - Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19; - Y += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4; - c->dither_error[0][i] = err; - acc = 2*acc + (Y >= 128); - Y -= 220*(acc&1); - - err = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19; - err += (7*Y + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4; - c->dither_error[0][i+1] = Y; - acc = 2*acc + (err >= 128); - err -= 220*(acc&1); - - if ((i & 7) == 6) - output_pixel(*dest++, acc); - } - c->dither_error[0][i] = err; - } else { - for (i = 0; i < dstW; i += 8) { - int Y, acc = 0; - - Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[0]); - Y = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[1]); - Y = (buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[2]); - Y = (buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[3]); - Y = (buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[4]); - Y = (buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[5]); - Y = (buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[6]); - Y = (buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[7]); - - output_pixel(*dest++, acc); - } - } -} - -static av_always_inline void -yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0, - const int16_t *ubuf[2], const int16_t *vbuf[2], - const int16_t *abuf0, uint8_t *dest, int dstW, - int uvalpha, int y, enum AVPixelFormat target) -{ - const uint8_t * const d128 = ff_dither_8x8_220[y & 7]; - int i; - - if (c->dither == SWS_DITHER_ED) { - int err = 0; - int acc = 0; - for (i = 0; i < dstW; i +=2) { - int Y; - - Y = ((buf0[i + 0] + 64) >> 7); - Y += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4; - c->dither_error[0][i] = err; - acc = 2*acc + (Y >= 128); - Y -= 220*(acc&1); - - err = ((buf0[i + 1] + 64) >> 7); - err += (7*Y + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4; - c->dither_error[0][i+1] = Y; - acc = 2*acc + (err >= 128); - err -= 220*(acc&1); - - if ((i & 7) == 6) - output_pixel(*dest++, acc); - } - c->dither_error[0][i] = err; - } else { - for (i = 0; i < dstW; i += 8) { - int acc = 0; - accumulate_bit(acc, ((buf0[i + 0] + 64) >> 7) + d128[0]); - accumulate_bit(acc, ((buf0[i + 1] + 64) >> 7) + d128[1]); - accumulate_bit(acc, ((buf0[i + 2] + 64) >> 7) + d128[2]); - accumulate_bit(acc, ((buf0[i + 3] + 64) >> 7) + d128[3]); - accumulate_bit(acc, ((buf0[i + 4] + 64) >> 7) + d128[4]); - accumulate_bit(acc, ((buf0[i + 5] + 64) >> 7) + d128[5]); - accumulate_bit(acc, ((buf0[i + 6] + 64) >> 7) + d128[6]); - accumulate_bit(acc, ((buf0[i + 7] + 64) >> 7) + d128[7]); - - output_pixel(*dest++, acc); - } - } -} - -#undef output_pixel -#undef accumulate_bit - -#define YUV2PACKEDWRAPPER(name, base, ext, fmt) \ -static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ - const int16_t **lumSrc, int lumFilterSize, \ - const int16_t *chrFilter, const int16_t **chrUSrc, \ - const int16_t **chrVSrc, int chrFilterSize, \ - const int16_t **alpSrc, uint8_t *dest, int dstW, \ - int y) \ -{ \ - name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \ - chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ - alpSrc, dest, dstW, y, fmt); \ -} \ - \ -static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \ - const int16_t *ubuf[2], const int16_t *vbuf[2], \ - const int16_t *abuf[2], uint8_t *dest, int dstW, \ - int yalpha, int uvalpha, int y) \ -{ \ - name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \ - dest, dstW, yalpha, uvalpha, y, fmt); \ -} \ - \ -static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \ - const int16_t *ubuf[2], const int16_t *vbuf[2], \ - const int16_t *abuf0, uint8_t *dest, int dstW, \ - int uvalpha, int y) \ -{ \ - name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \ - abuf0, dest, dstW, uvalpha, \ - y, fmt); \ -} - -YUV2PACKEDWRAPPER(yuv2mono,, white, AV_PIX_FMT_MONOWHITE) -YUV2PACKEDWRAPPER(yuv2mono,, black, AV_PIX_FMT_MONOBLACK) - -#define output_pixels(pos, Y1, U, Y2, V) \ - if (target == AV_PIX_FMT_YUYV422) { \ - dest[pos + 0] = Y1; \ - dest[pos + 1] = U; \ - dest[pos + 2] = Y2; \ - dest[pos + 3] = V; \ - } else { \ - dest[pos + 0] = U; \ - dest[pos + 1] = Y1; \ - dest[pos + 2] = V; \ - dest[pos + 3] = Y2; \ - } - -static av_always_inline void -yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, int chrFilterSize, - const int16_t **alpSrc, uint8_t *dest, int dstW, - int y, enum AVPixelFormat target) -{ - int i; - - for (i = 0; i < ((dstW + 1) >> 1); i++) { - int j; - int Y1 = 1 << 18; - int Y2 = 1 << 18; - int U = 1 << 18; - int V = 1 << 18; - - for (j = 0; j < lumFilterSize; j++) { - Y1 += lumSrc[j][i * 2] * lumFilter[j]; - Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j]; - } - for (j = 0; j < chrFilterSize; j++) { - U += chrUSrc[j][i] * chrFilter[j]; - V += chrVSrc[j][i] * chrFilter[j]; - } - Y1 >>= 19; - Y2 >>= 19; - U >>= 19; - V >>= 19; - if ((Y1 | Y2 | U | V) & 0x100) { - Y1 = av_clip_uint8(Y1); - Y2 = av_clip_uint8(Y2); - U = av_clip_uint8(U); - V = av_clip_uint8(V); - } - output_pixels(4*i, Y1, U, Y2, V); - } -} - -static av_always_inline void -yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2], - const int16_t *ubuf[2], const int16_t *vbuf[2], - const int16_t *abuf[2], uint8_t *dest, int dstW, - int yalpha, int uvalpha, int y, - enum AVPixelFormat target) -{ - const int16_t *buf0 = buf[0], *buf1 = buf[1], - *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], - *vbuf0 = vbuf[0], *vbuf1 = vbuf[1]; - int yalpha1 = 4096 - yalpha; - int uvalpha1 = 4096 - uvalpha; - int i; - - for (i = 0; i < ((dstW + 1) >> 1); i++) { - int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19; - int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19; - int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19; - int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19; - - if ((Y1 | Y2 | U | V) & 0x100) { - Y1 = av_clip_uint8(Y1); - Y2 = av_clip_uint8(Y2); - U = av_clip_uint8(U); - V = av_clip_uint8(V); - } - - output_pixels(i * 4, Y1, U, Y2, V); - } -} - -static av_always_inline void -yuv2422_1_c_template(SwsContext *c, const int16_t *buf0, - const int16_t *ubuf[2], const int16_t *vbuf[2], - const int16_t *abuf0, uint8_t *dest, int dstW, - int uvalpha, int y, enum AVPixelFormat target) -{ - const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; - int i; - - if (uvalpha < 2048) { - for (i = 0; i < ((dstW + 1) >> 1); i++) { - int Y1 = (buf0[i * 2 ]+64) >> 7; - int Y2 = (buf0[i * 2 + 1]+64) >> 7; - int U = (ubuf0[i] +64) >> 7; - int V = (vbuf0[i] +64) >> 7; - - if ((Y1 | Y2 | U | V) & 0x100) { - Y1 = av_clip_uint8(Y1); - Y2 = av_clip_uint8(Y2); - U = av_clip_uint8(U); - V = av_clip_uint8(V); - } - - Y1 = av_clip_uint8(Y1); - Y2 = av_clip_uint8(Y2); - U = av_clip_uint8(U); - V = av_clip_uint8(V); - - output_pixels(i * 4, Y1, U, Y2, V); - } - } else { - const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; - for (i = 0; i < ((dstW + 1) >> 1); i++) { - int Y1 = (buf0[i * 2 ] + 64) >> 7; - int Y2 = (buf0[i * 2 + 1] + 64) >> 7; - int U = (ubuf0[i] + ubuf1[i]+128) >> 8; - int V = (vbuf0[i] + vbuf1[i]+128) >> 8; - - if ((Y1 | Y2 | U | V) & 0x100) { - Y1 = av_clip_uint8(Y1); - Y2 = av_clip_uint8(Y2); - U = av_clip_uint8(U); - V = av_clip_uint8(V); - } - - Y1 = av_clip_uint8(Y1); - Y2 = av_clip_uint8(Y2); - U = av_clip_uint8(U); - V = av_clip_uint8(V); - - output_pixels(i * 4, Y1, U, Y2, V); - } - } -} - -#undef output_pixels - -YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, AV_PIX_FMT_YUYV422) -YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, AV_PIX_FMT_UYVY422) - -#define R_B ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE) ? R : B) -#define B_R ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE) ? B : R) -#define output_pixel(pos, val) \ - if (isBE(target)) { \ - AV_WB16(pos, val); \ - } else { \ - AV_WL16(pos, val); \ - } - -static av_always_inline void -yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter, - const int32_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int32_t **chrUSrc, - const int32_t **chrVSrc, int chrFilterSize, - const int32_t **alpSrc, uint16_t *dest, int dstW, - int y, enum AVPixelFormat target, int hasAlpha) -{ - int i; - int A1 = 0xffff<<14, A2 = 0xffff<<14; - - for (i = 0; i < ((dstW + 1) >> 1); i++) { - int j; - int Y1 = -0x40000000; - int Y2 = -0x40000000; - int U = -128 << 23; // 19 - int V = -128 << 23; - int R, G, B; - - for (j = 0; j < lumFilterSize; j++) { - Y1 += lumSrc[j][i * 2] * (unsigned)lumFilter[j]; - Y2 += lumSrc[j][i * 2 + 1] * (unsigned)lumFilter[j]; - } - for (j = 0; j < chrFilterSize; j++) {; - U += chrUSrc[j][i] * (unsigned)chrFilter[j]; - V += chrVSrc[j][i] * (unsigned)chrFilter[j]; - } - - if (hasAlpha) { - A1 = -0x40000000; - A2 = -0x40000000; - for (j = 0; j < lumFilterSize; j++) { - A1 += alpSrc[j][i * 2] * (unsigned)lumFilter[j]; - A2 += alpSrc[j][i * 2 + 1] * (unsigned)lumFilter[j]; - } - A1 >>= 1; - A1 += 0x20002000; - A2 >>= 1; - A2 += 0x20002000; - } - - // 8bit: 12+15=27; 16-bit: 12+19=31 - Y1 >>= 14; // 10 - Y1 += 0x10000; - Y2 >>= 14; - Y2 += 0x10000; - U >>= 14; - V >>= 14; - - // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit - Y1 -= c->yuv2rgb_y_offset; - Y2 -= c->yuv2rgb_y_offset; - Y1 *= c->yuv2rgb_y_coeff; - Y2 *= c->yuv2rgb_y_coeff; - Y1 += 1 << 13; // 21 - Y2 += 1 << 13; - // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit - - R = V * c->yuv2rgb_v2r_coeff; - G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; - B = U * c->yuv2rgb_u2b_coeff; - - // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit - output_pixel(&dest[0], av_clip_uintp2(B_R + Y1, 30) >> 14); - output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); - output_pixel(&dest[2], av_clip_uintp2(R_B + Y1, 30) >> 14); - output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14); - output_pixel(&dest[4], av_clip_uintp2(B_R + Y2, 30) >> 14); - output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14); - output_pixel(&dest[6], av_clip_uintp2(R_B + Y2, 30) >> 14); - output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14); - dest += 8; - } -} - -static av_always_inline void -yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2], - const int32_t *ubuf[2], const int32_t *vbuf[2], - const int32_t *abuf[2], uint16_t *dest, int dstW, - int yalpha, int uvalpha, int y, - enum AVPixelFormat target, int hasAlpha) -{ - const int32_t *buf0 = buf[0], *buf1 = buf[1], - *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], - *vbuf0 = vbuf[0], *vbuf1 = vbuf[1], - *abuf0 = hasAlpha ? abuf[0] : NULL, - *abuf1 = hasAlpha ? abuf[1] : NULL; - int yalpha1 = 4096 - yalpha; - int uvalpha1 = 4096 - uvalpha; - int i; - int A1 = 0xffff<<14, A2 = 0xffff<<14; - - for (i = 0; i < ((dstW + 1) >> 1); i++) { - int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14; - int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14; - int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14; - int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14; - int R, G, B; - - Y1 -= c->yuv2rgb_y_offset; - Y2 -= c->yuv2rgb_y_offset; - Y1 *= c->yuv2rgb_y_coeff; - Y2 *= c->yuv2rgb_y_coeff; - Y1 += 1 << 13; - Y2 += 1 << 13; - - R = V * c->yuv2rgb_v2r_coeff; - G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; - B = U * c->yuv2rgb_u2b_coeff; - - if (hasAlpha) { - A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 1; - A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 1; - - A1 += 1 << 13; - A2 += 1 << 13; - } - - output_pixel(&dest[0], av_clip_uintp2(B_R + Y1, 30) >> 14); - output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); - output_pixel(&dest[2], av_clip_uintp2(R_B + Y1, 30) >> 14); - output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14); - output_pixel(&dest[4], av_clip_uintp2(B_R + Y2, 30) >> 14); - output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14); - output_pixel(&dest[6], av_clip_uintp2(R_B + Y2, 30) >> 14); - output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14); - dest += 8; - } -} - -static av_always_inline void -yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0, - const int32_t *ubuf[2], const int32_t *vbuf[2], - const int32_t *abuf0, uint16_t *dest, int dstW, - int uvalpha, int y, enum AVPixelFormat target, int hasAlpha) -{ - const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; - int i; - int A1 = 0xffff<<14, A2= 0xffff<<14; - - if (uvalpha < 2048) { - for (i = 0; i < ((dstW + 1) >> 1); i++) { - int Y1 = (buf0[i * 2] ) >> 2; - int Y2 = (buf0[i * 2 + 1]) >> 2; - int U = (ubuf0[i] + (-128 << 11)) >> 2; - int V = (vbuf0[i] + (-128 << 11)) >> 2; - int R, G, B; - - Y1 -= c->yuv2rgb_y_offset; - Y2 -= c->yuv2rgb_y_offset; - Y1 *= c->yuv2rgb_y_coeff; - Y2 *= c->yuv2rgb_y_coeff; - Y1 += 1 << 13; - Y2 += 1 << 13; - - if (hasAlpha) { - A1 = abuf0[i * 2 ] << 11; - A2 = abuf0[i * 2 + 1] << 11; - - A1 += 1 << 13; - A2 += 1 << 13; - } - - R = V * c->yuv2rgb_v2r_coeff; - G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; - B = U * c->yuv2rgb_u2b_coeff; - - output_pixel(&dest[0], av_clip_uintp2(B_R + Y1, 30) >> 14); - output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); - output_pixel(&dest[2], av_clip_uintp2(R_B + Y1, 30) >> 14); - output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14); - output_pixel(&dest[4], av_clip_uintp2(B_R + Y2, 30) >> 14); - output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14); - output_pixel(&dest[6], av_clip_uintp2(R_B + Y2, 30) >> 14); - output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14); - dest += 8; - } - } else { - const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; - int A1 = 0xffff<<14, A2 = 0xffff<<14; - for (i = 0; i < ((dstW + 1) >> 1); i++) { - int Y1 = (buf0[i * 2] ) >> 2; - int Y2 = (buf0[i * 2 + 1]) >> 2; - int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3; - int V = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3; - int R, G, B; - - Y1 -= c->yuv2rgb_y_offset; - Y2 -= c->yuv2rgb_y_offset; - Y1 *= c->yuv2rgb_y_coeff; - Y2 *= c->yuv2rgb_y_coeff; - Y1 += 1 << 13; - Y2 += 1 << 13; - - if (hasAlpha) { - A1 = abuf0[i * 2 ] << 11; - A2 = abuf0[i * 2 + 1] << 11; - - A1 += 1 << 13; - A2 += 1 << 13; - } - - R = V * c->yuv2rgb_v2r_coeff; - G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; - B = U * c->yuv2rgb_u2b_coeff; - - output_pixel(&dest[0], av_clip_uintp2(B_R + Y1, 30) >> 14); - output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); - output_pixel(&dest[2], av_clip_uintp2(R_B + Y1, 30) >> 14); - output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14); - output_pixel(&dest[4], av_clip_uintp2(B_R + Y2, 30) >> 14); - output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14); - output_pixel(&dest[6], av_clip_uintp2(R_B + Y2, 30) >> 14); - output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14); - dest += 8; - } - } -} - -static av_always_inline void -yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter, - const int32_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int32_t **chrUSrc, - const int32_t **chrVSrc, int chrFilterSize, - const int32_t **alpSrc, uint16_t *dest, int dstW, - int y, enum AVPixelFormat target, int hasAlpha) -{ - int i; - - for (i = 0; i < ((dstW + 1) >> 1); i++) { - int j; - int Y1 = -0x40000000; - int Y2 = -0x40000000; - int U = -128 << 23; // 19 - int V = -128 << 23; - int R, G, B; - - for (j = 0; j < lumFilterSize; j++) { - Y1 += lumSrc[j][i * 2] * (unsigned)lumFilter[j]; - Y2 += lumSrc[j][i * 2 + 1] * (unsigned)lumFilter[j]; - } - for (j = 0; j < chrFilterSize; j++) {; - U += chrUSrc[j][i] * (unsigned)chrFilter[j]; - V += chrVSrc[j][i] * (unsigned)chrFilter[j]; - } - - // 8bit: 12+15=27; 16-bit: 12+19=31 - Y1 >>= 14; // 10 - Y1 += 0x10000; - Y2 >>= 14; - Y2 += 0x10000; - U >>= 14; - V >>= 14; - - // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit - Y1 -= c->yuv2rgb_y_offset; - Y2 -= c->yuv2rgb_y_offset; - Y1 *= c->yuv2rgb_y_coeff; - Y2 *= c->yuv2rgb_y_coeff; - Y1 += 1 << 13; // 21 - Y2 += 1 << 13; - // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit - - R = V * c->yuv2rgb_v2r_coeff; - G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; - B = U * c->yuv2rgb_u2b_coeff; - - // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit - output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14); - output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); - output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14); - output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14); - output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14); - output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14); - dest += 6; - } -} - -static av_always_inline void -yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2], - const int32_t *ubuf[2], const int32_t *vbuf[2], - const int32_t *abuf[2], uint16_t *dest, int dstW, - int yalpha, int uvalpha, int y, - enum AVPixelFormat target, int hasAlpha) -{ - const int32_t *buf0 = buf[0], *buf1 = buf[1], - *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], - *vbuf0 = vbuf[0], *vbuf1 = vbuf[1]; - int yalpha1 = 4096 - yalpha; - int uvalpha1 = 4096 - uvalpha; - int i; - - for (i = 0; i < ((dstW + 1) >> 1); i++) { - int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14; - int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14; - int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14; - int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14; - int R, G, B; - - Y1 -= c->yuv2rgb_y_offset; - Y2 -= c->yuv2rgb_y_offset; - Y1 *= c->yuv2rgb_y_coeff; - Y2 *= c->yuv2rgb_y_coeff; - Y1 += 1 << 13; - Y2 += 1 << 13; - - R = V * c->yuv2rgb_v2r_coeff; - G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; - B = U * c->yuv2rgb_u2b_coeff; - - output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14); - output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); - output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14); - output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14); - output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14); - output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14); - dest += 6; - } -} - -static av_always_inline void -yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0, - const int32_t *ubuf[2], const int32_t *vbuf[2], - const int32_t *abuf0, uint16_t *dest, int dstW, - int uvalpha, int y, enum AVPixelFormat target, int hasAlpha) -{ - const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; - int i; - - if (uvalpha < 2048) { - for (i = 0; i < ((dstW + 1) >> 1); i++) { - int Y1 = (buf0[i * 2] ) >> 2; - int Y2 = (buf0[i * 2 + 1]) >> 2; - int U = (ubuf0[i] + (-128 << 11)) >> 2; - int V = (vbuf0[i] + (-128 << 11)) >> 2; - int R, G, B; - - Y1 -= c->yuv2rgb_y_offset; - Y2 -= c->yuv2rgb_y_offset; - Y1 *= c->yuv2rgb_y_coeff; - Y2 *= c->yuv2rgb_y_coeff; - Y1 += 1 << 13; - Y2 += 1 << 13; - - R = V * c->yuv2rgb_v2r_coeff; - G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; - B = U * c->yuv2rgb_u2b_coeff; - - output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14); - output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); - output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14); - output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14); - output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14); - output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14); - dest += 6; - } - } else { - const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; - for (i = 0; i < ((dstW + 1) >> 1); i++) { - int Y1 = (buf0[i * 2] ) >> 2; - int Y2 = (buf0[i * 2 + 1]) >> 2; - int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3; - int V = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3; - int R, G, B; - - Y1 -= c->yuv2rgb_y_offset; - Y2 -= c->yuv2rgb_y_offset; - Y1 *= c->yuv2rgb_y_coeff; - Y2 *= c->yuv2rgb_y_coeff; - Y1 += 1 << 13; - Y2 += 1 << 13; - - R = V * c->yuv2rgb_v2r_coeff; - G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; - B = U * c->yuv2rgb_u2b_coeff; - - output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14); - output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); - output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14); - output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14); - output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14); - output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14); - dest += 6; - } - } -} - -#undef output_pixel -#undef r_b -#undef b_r - -#define YUV2PACKED16WRAPPER(name, base, ext, fmt, hasAlpha) \ -static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ - const int16_t **_lumSrc, int lumFilterSize, \ - const int16_t *chrFilter, const int16_t **_chrUSrc, \ - const int16_t **_chrVSrc, int chrFilterSize, \ - const int16_t **_alpSrc, uint8_t *_dest, int dstW, \ - int y) \ -{ \ - const int32_t **lumSrc = (const int32_t **) _lumSrc, \ - **chrUSrc = (const int32_t **) _chrUSrc, \ - **chrVSrc = (const int32_t **) _chrVSrc, \ - **alpSrc = (const int32_t **) _alpSrc; \ - uint16_t *dest = (uint16_t *) _dest; \ - name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \ - chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ - alpSrc, dest, dstW, y, fmt, hasAlpha); \ -} \ - \ -static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \ - const int16_t *_ubuf[2], const int16_t *_vbuf[2], \ - const int16_t *_abuf[2], uint8_t *_dest, int dstW, \ - int yalpha, int uvalpha, int y) \ -{ \ - const int32_t **buf = (const int32_t **) _buf, \ - **ubuf = (const int32_t **) _ubuf, \ - **vbuf = (const int32_t **) _vbuf, \ - **abuf = (const int32_t **) _abuf; \ - uint16_t *dest = (uint16_t *) _dest; \ - name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \ - dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \ -} \ - \ -static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \ - const int16_t *_ubuf[2], const int16_t *_vbuf[2], \ - const int16_t *_abuf0, uint8_t *_dest, int dstW, \ - int uvalpha, int y) \ -{ \ - const int32_t *buf0 = (const int32_t *) _buf0, \ - **ubuf = (const int32_t **) _ubuf, \ - **vbuf = (const int32_t **) _vbuf, \ - *abuf0 = (const int32_t *) _abuf0; \ - uint16_t *dest = (uint16_t *) _dest; \ - name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \ - dstW, uvalpha, y, fmt, hasAlpha); \ -} - -YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, AV_PIX_FMT_RGB48BE, 0) -YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, AV_PIX_FMT_RGB48LE, 0) -YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, AV_PIX_FMT_BGR48BE, 0) -YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, AV_PIX_FMT_BGR48LE, 0) -YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64BE, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64LE, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64BE, 0) -YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64LE, 0) - -/* - * Write out 2 RGB pixels in the target pixel format. This function takes a - * R/G/B LUT as generated by ff_yuv2rgb_c_init_tables(), which takes care of - * things like endianness conversion and shifting. The caller takes care of - * setting the correct offset in these tables from the chroma (U/V) values. - * This function then uses the luminance (Y1/Y2) values to write out the - * correct RGB values into the destination buffer. - */ -static av_always_inline void -yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2, - unsigned A1, unsigned A2, - const void *_r, const void *_g, const void *_b, int y, - enum AVPixelFormat target, int hasAlpha) -{ - if (target == AV_PIX_FMT_ARGB || target == AV_PIX_FMT_RGBA || - target == AV_PIX_FMT_ABGR || target == AV_PIX_FMT_BGRA) { - uint32_t *dest = (uint32_t *) _dest; - const uint32_t *r = (const uint32_t *) _r; - const uint32_t *g = (const uint32_t *) _g; - const uint32_t *b = (const uint32_t *) _b; - -#if CONFIG_SMALL - int sh = hasAlpha ? ((target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24) : 0; - - dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0); - dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0); -#else - if (hasAlpha) { - int sh = (target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24; - - av_assert2((((r[Y1] + g[Y1] + b[Y1]) >> sh) & 0xFF) == 0); - dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh); - dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh); - } else { -#if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1 - int sh = (target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24; - - av_assert2((((r[Y1] + g[Y1] + b[Y1]) >> sh) & 0xFF) == 0xFF); -#endif - dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1]; - dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2]; - } -#endif - } else if (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) { - uint8_t *dest = (uint8_t *) _dest; - const uint8_t *r = (const uint8_t *) _r; - const uint8_t *g = (const uint8_t *) _g; - const uint8_t *b = (const uint8_t *) _b; - -#define r_b ((target == AV_PIX_FMT_RGB24) ? r : b) -#define b_r ((target == AV_PIX_FMT_RGB24) ? b : r) - - dest[i * 6 + 0] = r_b[Y1]; - dest[i * 6 + 1] = g[Y1]; - dest[i * 6 + 2] = b_r[Y1]; - dest[i * 6 + 3] = r_b[Y2]; - dest[i * 6 + 4] = g[Y2]; - dest[i * 6 + 5] = b_r[Y2]; -#undef r_b -#undef b_r - } else if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565 || - target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555 || - target == AV_PIX_FMT_RGB444 || target == AV_PIX_FMT_BGR444) { - uint16_t *dest = (uint16_t *) _dest; - const uint16_t *r = (const uint16_t *) _r; - const uint16_t *g = (const uint16_t *) _g; - const uint16_t *b = (const uint16_t *) _b; - int dr1, dg1, db1, dr2, dg2, db2; - - if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565) { - dr1 = ff_dither_2x2_8[ y & 1 ][0]; - dg1 = ff_dither_2x2_4[ y & 1 ][0]; - db1 = ff_dither_2x2_8[(y & 1) ^ 1][0]; - dr2 = ff_dither_2x2_8[ y & 1 ][1]; - dg2 = ff_dither_2x2_4[ y & 1 ][1]; - db2 = ff_dither_2x2_8[(y & 1) ^ 1][1]; - } else if (target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555) { - dr1 = ff_dither_2x2_8[ y & 1 ][0]; - dg1 = ff_dither_2x2_8[ y & 1 ][1]; - db1 = ff_dither_2x2_8[(y & 1) ^ 1][0]; - dr2 = ff_dither_2x2_8[ y & 1 ][1]; - dg2 = ff_dither_2x2_8[ y & 1 ][0]; - db2 = ff_dither_2x2_8[(y & 1) ^ 1][1]; - } else { - dr1 = ff_dither_4x4_16[ y & 3 ][0]; - dg1 = ff_dither_4x4_16[ y & 3 ][1]; - db1 = ff_dither_4x4_16[(y & 3) ^ 3][0]; - dr2 = ff_dither_4x4_16[ y & 3 ][1]; - dg2 = ff_dither_4x4_16[ y & 3 ][0]; - db2 = ff_dither_4x4_16[(y & 3) ^ 3][1]; - } - - dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1]; - dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]; - } else /* 8/4-bit */ { - uint8_t *dest = (uint8_t *) _dest; - const uint8_t *r = (const uint8_t *) _r; - const uint8_t *g = (const uint8_t *) _g; - const uint8_t *b = (const uint8_t *) _b; - int dr1, dg1, db1, dr2, dg2, db2; - - if (target == AV_PIX_FMT_RGB8 || target == AV_PIX_FMT_BGR8) { - const uint8_t * const d64 = ff_dither_8x8_73[y & 7]; - const uint8_t * const d32 = ff_dither_8x8_32[y & 7]; - dr1 = dg1 = d32[(i * 2 + 0) & 7]; - db1 = d64[(i * 2 + 0) & 7]; - dr2 = dg2 = d32[(i * 2 + 1) & 7]; - db2 = d64[(i * 2 + 1) & 7]; - } else { - const uint8_t * const d64 = ff_dither_8x8_73 [y & 7]; - const uint8_t * const d128 = ff_dither_8x8_220[y & 7]; - dr1 = db1 = d128[(i * 2 + 0) & 7]; - dg1 = d64[(i * 2 + 0) & 7]; - dr2 = db2 = d128[(i * 2 + 1) & 7]; - dg2 = d64[(i * 2 + 1) & 7]; - } - - if (target == AV_PIX_FMT_RGB4 || target == AV_PIX_FMT_BGR4) { - dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] + - ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4); - } else { - dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1]; - dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]; - } - } -} - -static av_always_inline void -yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, int chrFilterSize, - const int16_t **alpSrc, uint8_t *dest, int dstW, - int y, enum AVPixelFormat target, int hasAlpha) -{ - int i; - - for (i = 0; i < ((dstW + 1) >> 1); i++) { - int j, A1, A2; - int Y1 = 1 << 18; - int Y2 = 1 << 18; - int U = 1 << 18; - int V = 1 << 18; - const void *r, *g, *b; - - for (j = 0; j < lumFilterSize; j++) { - Y1 += lumSrc[j][i * 2] * lumFilter[j]; - Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j]; - } - for (j = 0; j < chrFilterSize; j++) { - U += chrUSrc[j][i] * chrFilter[j]; - V += chrVSrc[j][i] * chrFilter[j]; - } - Y1 >>= 19; - Y2 >>= 19; - U >>= 19; - V >>= 19; - if (hasAlpha) { - A1 = 1 << 18; - A2 = 1 << 18; - for (j = 0; j < lumFilterSize; j++) { - A1 += alpSrc[j][i * 2 ] * lumFilter[j]; - A2 += alpSrc[j][i * 2 + 1] * lumFilter[j]; - } - A1 >>= 19; - A2 >>= 19; - if ((A1 | A2) & 0x100) { - A1 = av_clip_uint8(A1); - A2 = av_clip_uint8(A2); - } - } - - r = c->table_rV[V + YUVRGB_TABLE_HEADROOM]; - g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]); - b = c->table_bU[U + YUVRGB_TABLE_HEADROOM]; - - yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0, - r, g, b, y, target, hasAlpha); - } -} - -static av_always_inline void -yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2], - const int16_t *ubuf[2], const int16_t *vbuf[2], - const int16_t *abuf[2], uint8_t *dest, int dstW, - int yalpha, int uvalpha, int y, - enum AVPixelFormat target, int hasAlpha) -{ - const int16_t *buf0 = buf[0], *buf1 = buf[1], - *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], - *vbuf0 = vbuf[0], *vbuf1 = vbuf[1], - *abuf0 = hasAlpha ? abuf[0] : NULL, - *abuf1 = hasAlpha ? abuf[1] : NULL; - int yalpha1 = 4096 - yalpha; - int uvalpha1 = 4096 - uvalpha; - int i; - - for (i = 0; i < ((dstW + 1) >> 1); i++) { - int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19; - int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19; - int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19; - int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19; - int A1, A2; - const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM], - *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]), - *b = c->table_bU[U + YUVRGB_TABLE_HEADROOM]; - - if (hasAlpha) { - A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19; - A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19; - A1 = av_clip_uint8(A1); - A2 = av_clip_uint8(A2); - } - - yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0, - r, g, b, y, target, hasAlpha); - } -} - -static av_always_inline void -yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0, - const int16_t *ubuf[2], const int16_t *vbuf[2], - const int16_t *abuf0, uint8_t *dest, int dstW, - int uvalpha, int y, enum AVPixelFormat target, - int hasAlpha) -{ - const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; - int i; - - if (uvalpha < 2048) { - for (i = 0; i < ((dstW + 1) >> 1); i++) { - int Y1 = (buf0[i * 2 ] + 64) >> 7; - int Y2 = (buf0[i * 2 + 1] + 64) >> 7; - int U = (ubuf0[i] + 64) >> 7; - int V = (vbuf0[i] + 64) >> 7; - int A1, A2; - const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM], - *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]), - *b = c->table_bU[U + YUVRGB_TABLE_HEADROOM]; - - if (hasAlpha) { - A1 = abuf0[i * 2 ] * 255 + 16384 >> 15; - A2 = abuf0[i * 2 + 1] * 255 + 16384 >> 15; - A1 = av_clip_uint8(A1); - A2 = av_clip_uint8(A2); - } - - yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0, - r, g, b, y, target, hasAlpha); - } - } else { - const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; - for (i = 0; i < ((dstW + 1) >> 1); i++) { - int Y1 = (buf0[i * 2 ] + 64) >> 7; - int Y2 = (buf0[i * 2 + 1] + 64) >> 7; - int U = (ubuf0[i] + ubuf1[i] + 128) >> 8; - int V = (vbuf0[i] + vbuf1[i] + 128) >> 8; - int A1, A2; - const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM], - *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]), - *b = c->table_bU[U + YUVRGB_TABLE_HEADROOM]; - - if (hasAlpha) { - A1 = (abuf0[i * 2 ] + 64) >> 7; - A2 = (abuf0[i * 2 + 1] + 64) >> 7; - A1 = av_clip_uint8(A1); - A2 = av_clip_uint8(A2); - } - - yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0, - r, g, b, y, target, hasAlpha); - } - } -} - -#define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \ -static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ - const int16_t **lumSrc, int lumFilterSize, \ - const int16_t *chrFilter, const int16_t **chrUSrc, \ - const int16_t **chrVSrc, int chrFilterSize, \ - const int16_t **alpSrc, uint8_t *dest, int dstW, \ - int y) \ -{ \ - name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \ - chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ - alpSrc, dest, dstW, y, fmt, hasAlpha); \ -} - -#define YUV2RGBWRAPPERX2(name, base, ext, fmt, hasAlpha) \ -YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \ -static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \ - const int16_t *ubuf[2], const int16_t *vbuf[2], \ - const int16_t *abuf[2], uint8_t *dest, int dstW, \ - int yalpha, int uvalpha, int y) \ -{ \ - name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \ - dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \ -} - -#define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \ -YUV2RGBWRAPPERX2(name, base, ext, fmt, hasAlpha) \ -static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \ - const int16_t *ubuf[2], const int16_t *vbuf[2], \ - const int16_t *abuf0, uint8_t *dest, int dstW, \ - int uvalpha, int y) \ -{ \ - name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \ - dstW, uvalpha, y, fmt, hasAlpha); \ -} - -#if CONFIG_SMALL -YUV2RGBWRAPPER(yuv2rgb,, 32_1, AV_PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) -YUV2RGBWRAPPER(yuv2rgb,, 32, AV_PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) -#else -#if CONFIG_SWSCALE_ALPHA -YUV2RGBWRAPPER(yuv2rgb,, a32_1, AV_PIX_FMT_RGB32_1, 1) -YUV2RGBWRAPPER(yuv2rgb,, a32, AV_PIX_FMT_RGB32, 1) -#endif -YUV2RGBWRAPPER(yuv2rgb,, x32_1, AV_PIX_FMT_RGB32_1, 0) -YUV2RGBWRAPPER(yuv2rgb,, x32, AV_PIX_FMT_RGB32, 0) -#endif -YUV2RGBWRAPPER(yuv2, rgb, rgb24, AV_PIX_FMT_RGB24, 0) -YUV2RGBWRAPPER(yuv2, rgb, bgr24, AV_PIX_FMT_BGR24, 0) -YUV2RGBWRAPPER(yuv2rgb,, 16, AV_PIX_FMT_RGB565, 0) -YUV2RGBWRAPPER(yuv2rgb,, 15, AV_PIX_FMT_RGB555, 0) -YUV2RGBWRAPPER(yuv2rgb,, 12, AV_PIX_FMT_RGB444, 0) -YUV2RGBWRAPPER(yuv2rgb,, 8, AV_PIX_FMT_RGB8, 0) -YUV2RGBWRAPPER(yuv2rgb,, 4, AV_PIX_FMT_RGB4, 0) -YUV2RGBWRAPPER(yuv2rgb,, 4b, AV_PIX_FMT_RGB4_BYTE, 0) - -static av_always_inline void yuv2rgb_write_full(SwsContext *c, - uint8_t *dest, int i, int Y, int A, int U, int V, - int y, enum AVPixelFormat target, int hasAlpha, int err[4]) -{ - int R, G, B; - int isrgb8 = target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8; - - Y -= c->yuv2rgb_y_offset; - Y *= c->yuv2rgb_y_coeff; - Y += 1 << 21; - R = Y + V*c->yuv2rgb_v2r_coeff; - G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff; - B = Y + U*c->yuv2rgb_u2b_coeff; - if ((R | G | B) & 0xC0000000) { - R = av_clip_uintp2(R, 30); - G = av_clip_uintp2(G, 30); - B = av_clip_uintp2(B, 30); - } - - switch(target) { - case AV_PIX_FMT_ARGB: - dest[0] = hasAlpha ? A : 255; - dest[1] = R >> 22; - dest[2] = G >> 22; - dest[3] = B >> 22; - break; - case AV_PIX_FMT_RGB24: - dest[0] = R >> 22; - dest[1] = G >> 22; - dest[2] = B >> 22; - break; - case AV_PIX_FMT_RGBA: - dest[0] = R >> 22; - dest[1] = G >> 22; - dest[2] = B >> 22; - dest[3] = hasAlpha ? A : 255; - break; - case AV_PIX_FMT_ABGR: - dest[0] = hasAlpha ? A : 255; - dest[1] = B >> 22; - dest[2] = G >> 22; - dest[3] = R >> 22; - break; - case AV_PIX_FMT_BGR24: - dest[0] = B >> 22; - dest[1] = G >> 22; - dest[2] = R >> 22; - break; - case AV_PIX_FMT_BGRA: - dest[0] = B >> 22; - dest[1] = G >> 22; - dest[2] = R >> 22; - dest[3] = hasAlpha ? A : 255; - break; - case AV_PIX_FMT_BGR4_BYTE: - case AV_PIX_FMT_RGB4_BYTE: - case AV_PIX_FMT_BGR8: - case AV_PIX_FMT_RGB8: - { - int r,g,b; - R >>= 22; - G >>= 22; - B >>= 22; - R += (7*err[0] + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2])>>4; - G += (7*err[1] + 1*c->dither_error[1][i] + 5*c->dither_error[1][i+1] + 3*c->dither_error[1][i+2])>>4; - B += (7*err[2] + 1*c->dither_error[2][i] + 5*c->dither_error[2][i+1] + 3*c->dither_error[2][i+2])>>4; - c->dither_error[0][i] = err[0]; - c->dither_error[1][i] = err[1]; - c->dither_error[2][i] = err[2]; - r = R >> (isrgb8 ? 5 : 7); - g = G >> (isrgb8 ? 5 : 6); - b = B >> (isrgb8 ? 6 : 7); - r = av_clip(r, 0, isrgb8 ? 7 : 1); - g = av_clip(g, 0, isrgb8 ? 7 : 3); - b = av_clip(b, 0, isrgb8 ? 3 : 1); - err[0] = R - r*(isrgb8 ? 36 : 255); - err[1] = G - g*(isrgb8 ? 36 : 85); - err[2] = B - b*(isrgb8 ? 85 : 255); - if(target == AV_PIX_FMT_BGR4_BYTE) { - dest[0] = r + 2*g + 8*b; - } else if(target == AV_PIX_FMT_RGB4_BYTE) { - dest[0] = b + 2*g + 8*r; - } else if(target == AV_PIX_FMT_BGR8) { - dest[0] = r + 8*g + 64*b; - } else if(target == AV_PIX_FMT_RGB8) { - dest[0] = b + 4*g + 32*r; - } else - av_assert2(0); - break;} - } -} - -static av_always_inline void -yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, int chrFilterSize, - const int16_t **alpSrc, uint8_t *dest, - int dstW, int y, enum AVPixelFormat target, int hasAlpha) -{ - int i; - int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4; - int err[4] = {0}; - int A = 0; //init to silence warning - - if( target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE - || target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8) - step = 1; - - for (i = 0; i < dstW; i++) { - int j; - int Y = 1<<9; - int U = (1<<9)-(128 << 19); - int V = (1<<9)-(128 << 19); - - for (j = 0; j < lumFilterSize; j++) { - Y += lumSrc[j][i] * lumFilter[j]; - } - for (j = 0; j < chrFilterSize; j++) { - U += chrUSrc[j][i] * chrFilter[j]; - V += chrVSrc[j][i] * chrFilter[j]; - } - Y >>= 10; - U >>= 10; - V >>= 10; - if (hasAlpha) { - A = 1 << 18; - for (j = 0; j < lumFilterSize; j++) { - A += alpSrc[j][i] * lumFilter[j]; - } - A >>= 19; - if (A & 0x100) - A = av_clip_uint8(A); - } - yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err); - dest += step; - } - c->dither_error[0][i] = err[0]; - c->dither_error[1][i] = err[1]; - c->dither_error[2][i] = err[2]; -} - -static av_always_inline void -yuv2rgb_full_2_c_template(SwsContext *c, const int16_t *buf[2], - const int16_t *ubuf[2], const int16_t *vbuf[2], - const int16_t *abuf[2], uint8_t *dest, int dstW, - int yalpha, int uvalpha, int y, - enum AVPixelFormat target, int hasAlpha) -{ - const int16_t *buf0 = buf[0], *buf1 = buf[1], - *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], - *vbuf0 = vbuf[0], *vbuf1 = vbuf[1], - *abuf0 = hasAlpha ? abuf[0] : NULL, - *abuf1 = hasAlpha ? abuf[1] : NULL; - int yalpha1 = 4096 - yalpha; - int uvalpha1 = 4096 - uvalpha; - int i; - int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4; - int err[4] = {0}; - int A = 0; // init to silcene warning - - if( target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE - || target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8) - step = 1; - - for (i = 0; i < dstW; i++) { - int Y = ( buf0[i] * yalpha1 + buf1[i] * yalpha ) >> 10; //FIXME rounding - int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha-(128 << 19)) >> 10; - int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha-(128 << 19)) >> 10; - - if (hasAlpha) { - A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha + (1<<18)) >> 19; - if (A & 0x100) - A = av_clip_uint8(A); - } - - yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err); - dest += step; - } - c->dither_error[0][i] = err[0]; - c->dither_error[1][i] = err[1]; - c->dither_error[2][i] = err[2]; -} - -static av_always_inline void -yuv2rgb_full_1_c_template(SwsContext *c, const int16_t *buf0, - const int16_t *ubuf[2], const int16_t *vbuf[2], - const int16_t *abuf0, uint8_t *dest, int dstW, - int uvalpha, int y, enum AVPixelFormat target, - int hasAlpha) -{ - const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; - int i; - int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4; - int err[4] = {0}; - - if( target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE - || target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8) - step = 1; - - if (uvalpha < 2048) { - int A = 0; //init to silence warning - for (i = 0; i < dstW; i++) { - int Y = buf0[i] << 2; - int U = (ubuf0[i] - (128<<7)) << 2; - int V = (vbuf0[i] - (128<<7)) << 2; - - if (hasAlpha) { - A = (abuf0[i] + 64) >> 7; - if (A & 0x100) - A = av_clip_uint8(A); - } - - yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err); - dest += step; - } - } else { - const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; - int A = 0; //init to silence warning - for (i = 0; i < dstW; i++) { - int Y = buf0[i] << 2; - int U = (ubuf0[i] + ubuf1[i] - (128<<8)) << 1; - int V = (vbuf0[i] + vbuf1[i] - (128<<8)) << 1; - - if (hasAlpha) { - A = (abuf0[i] + 64) >> 7; - if (A & 0x100) - A = av_clip_uint8(A); - } - - yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err); - dest += step; - } - } - - c->dither_error[0][i] = err[0]; - c->dither_error[1][i] = err[1]; - c->dither_error[2][i] = err[2]; -} - -#if CONFIG_SMALL -YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) -YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) -YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) -YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) -#else -#if CONFIG_SWSCALE_ALPHA -YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA, 1) -YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR, 1) -YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA, 1) -YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB, 1) -#endif -YUV2RGBWRAPPER(yuv2, rgb_full, bgrx32_full, AV_PIX_FMT_BGRA, 0) -YUV2RGBWRAPPER(yuv2, rgb_full, xbgr32_full, AV_PIX_FMT_ABGR, 0) -YUV2RGBWRAPPER(yuv2, rgb_full, rgbx32_full, AV_PIX_FMT_RGBA, 0) -YUV2RGBWRAPPER(yuv2, rgb_full, xrgb32_full, AV_PIX_FMT_ARGB, 0) -#endif -YUV2RGBWRAPPER(yuv2, rgb_full, bgr24_full, AV_PIX_FMT_BGR24, 0) -YUV2RGBWRAPPER(yuv2, rgb_full, rgb24_full, AV_PIX_FMT_RGB24, 0) - -YUV2RGBWRAPPER(yuv2, rgb_full, bgr4_byte_full, AV_PIX_FMT_BGR4_BYTE, 0) -YUV2RGBWRAPPER(yuv2, rgb_full, rgb4_byte_full, AV_PIX_FMT_RGB4_BYTE, 0) -YUV2RGBWRAPPER(yuv2, rgb_full, bgr8_full, AV_PIX_FMT_BGR8, 0) -YUV2RGBWRAPPER(yuv2, rgb_full, rgb8_full, AV_PIX_FMT_RGB8, 0) - -static void -yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, int chrFilterSize, - const int16_t **alpSrc, uint8_t **dest, - int dstW, int y) -{ - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat); - int i; - int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrc; - uint16_t **dest16 = (uint16_t**)dest; - int SH = 22 + 7 - desc->comp[0].depth_minus1; - int A = 0; // init to silence warning - - for (i = 0; i < dstW; i++) { - int j; - int Y = 1 << 9; - int U = (1 << 9) - (128 << 19); - int V = (1 << 9) - (128 << 19); - int R, G, B; - - for (j = 0; j < lumFilterSize; j++) - Y += lumSrc[j][i] * lumFilter[j]; - - for (j = 0; j < chrFilterSize; j++) { - U += chrUSrc[j][i] * chrFilter[j]; - V += chrVSrc[j][i] * chrFilter[j]; - } - - Y >>= 10; - U >>= 10; - V >>= 10; - - if (hasAlpha) { - A = 1 << 18; - - for (j = 0; j < lumFilterSize; j++) - A += alpSrc[j][i] * lumFilter[j]; - - A >>= 19; - - if (A & 0x100) - A = av_clip_uint8(A); - } - - Y -= c->yuv2rgb_y_offset; - Y *= c->yuv2rgb_y_coeff; - Y += 1 << 21; - R = Y + V * c->yuv2rgb_v2r_coeff; - G = Y + V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; - B = Y + U * c->yuv2rgb_u2b_coeff; - - if ((R | G | B) & 0xC0000000) { - R = av_clip_uintp2(R, 30); - G = av_clip_uintp2(G, 30); - B = av_clip_uintp2(B, 30); - } - - if (SH != 22) { - dest16[0][i] = G >> SH; - dest16[1][i] = B >> SH; - dest16[2][i] = R >> SH; - if (hasAlpha) - dest16[3][i] = A; - } else { - dest[0][i] = G >> 22; - dest[1][i] = B >> 22; - dest[2][i] = R >> 22; - if (hasAlpha) - dest[3][i] = A; - } - } - if (SH != 22 && (!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) { - for (i = 0; i < dstW; i++) { - dest16[0][i] = av_bswap16(dest16[0][i]); - dest16[1][i] = av_bswap16(dest16[1][i]); - dest16[2][i] = av_bswap16(dest16[2][i]); - if (hasAlpha) - dest16[3][i] = av_bswap16(dest16[3][i]); - } - } -} - -av_cold void ff_sws_init_output_funcs(SwsContext *c, - yuv2planar1_fn *yuv2plane1, - yuv2planarX_fn *yuv2planeX, - yuv2interleavedX_fn *yuv2nv12cX, - yuv2packed1_fn *yuv2packed1, - yuv2packed2_fn *yuv2packed2, - yuv2packedX_fn *yuv2packedX, - yuv2anyX_fn *yuv2anyX) -{ - enum AVPixelFormat dstFormat = c->dstFormat; - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat); - - if (is16BPS(dstFormat)) { - *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c; - *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c; - } else if (is9_OR_10BPS(dstFormat)) { - if (desc->comp[0].depth_minus1 == 8) { - *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c; - *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c; - } else if (desc->comp[0].depth_minus1 == 9) { - *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c; - *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c; - } else if (desc->comp[0].depth_minus1 == 11) { - *yuv2planeX = isBE(dstFormat) ? yuv2planeX_12BE_c : yuv2planeX_12LE_c; - *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_12BE_c : yuv2plane1_12LE_c; - } else if (desc->comp[0].depth_minus1 == 13) { - *yuv2planeX = isBE(dstFormat) ? yuv2planeX_14BE_c : yuv2planeX_14LE_c; - *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_14BE_c : yuv2plane1_14LE_c; - } else - av_assert0(0); - } else { - *yuv2plane1 = yuv2plane1_8_c; - *yuv2planeX = yuv2planeX_8_c; - if (dstFormat == AV_PIX_FMT_NV12 || dstFormat == AV_PIX_FMT_NV21) - *yuv2nv12cX = yuv2nv12cX_c; - } - - if(c->flags & SWS_FULL_CHR_H_INT) { - switch (dstFormat) { - case AV_PIX_FMT_RGBA: -#if CONFIG_SMALL - *yuv2packedX = yuv2rgba32_full_X_c; - *yuv2packed2 = yuv2rgba32_full_2_c; - *yuv2packed1 = yuv2rgba32_full_1_c; -#else -#if CONFIG_SWSCALE_ALPHA - if (c->alpPixBuf) { - *yuv2packedX = yuv2rgba32_full_X_c; - *yuv2packed2 = yuv2rgba32_full_2_c; - *yuv2packed1 = yuv2rgba32_full_1_c; - } else -#endif /* CONFIG_SWSCALE_ALPHA */ - { - *yuv2packedX = yuv2rgbx32_full_X_c; - *yuv2packed2 = yuv2rgbx32_full_2_c; - *yuv2packed1 = yuv2rgbx32_full_1_c; - } -#endif /* !CONFIG_SMALL */ - break; - case AV_PIX_FMT_ARGB: -#if CONFIG_SMALL - *yuv2packedX = yuv2argb32_full_X_c; - *yuv2packed2 = yuv2argb32_full_2_c; - *yuv2packed1 = yuv2argb32_full_1_c; -#else -#if CONFIG_SWSCALE_ALPHA - if (c->alpPixBuf) { - *yuv2packedX = yuv2argb32_full_X_c; - *yuv2packed2 = yuv2argb32_full_2_c; - *yuv2packed1 = yuv2argb32_full_1_c; - } else -#endif /* CONFIG_SWSCALE_ALPHA */ - { - *yuv2packedX = yuv2xrgb32_full_X_c; - *yuv2packed2 = yuv2xrgb32_full_2_c; - *yuv2packed1 = yuv2xrgb32_full_1_c; - } -#endif /* !CONFIG_SMALL */ - break; - case AV_PIX_FMT_BGRA: -#if CONFIG_SMALL - *yuv2packedX = yuv2bgra32_full_X_c; - *yuv2packed2 = yuv2bgra32_full_2_c; - *yuv2packed1 = yuv2bgra32_full_1_c; -#else -#if CONFIG_SWSCALE_ALPHA - if (c->alpPixBuf) { - *yuv2packedX = yuv2bgra32_full_X_c; - *yuv2packed2 = yuv2bgra32_full_2_c; - *yuv2packed1 = yuv2bgra32_full_1_c; - } else -#endif /* CONFIG_SWSCALE_ALPHA */ - { - *yuv2packedX = yuv2bgrx32_full_X_c; - *yuv2packed2 = yuv2bgrx32_full_2_c; - *yuv2packed1 = yuv2bgrx32_full_1_c; - } -#endif /* !CONFIG_SMALL */ - break; - case AV_PIX_FMT_ABGR: -#if CONFIG_SMALL - *yuv2packedX = yuv2abgr32_full_X_c; - *yuv2packed2 = yuv2abgr32_full_2_c; - *yuv2packed1 = yuv2abgr32_full_1_c; -#else -#if CONFIG_SWSCALE_ALPHA - if (c->alpPixBuf) { - *yuv2packedX = yuv2abgr32_full_X_c; - *yuv2packed2 = yuv2abgr32_full_2_c; - *yuv2packed1 = yuv2abgr32_full_1_c; - } else -#endif /* CONFIG_SWSCALE_ALPHA */ - { - *yuv2packedX = yuv2xbgr32_full_X_c; - *yuv2packed2 = yuv2xbgr32_full_2_c; - *yuv2packed1 = yuv2xbgr32_full_1_c; - } -#endif /* !CONFIG_SMALL */ - break; - case AV_PIX_FMT_RGB24: - *yuv2packedX = yuv2rgb24_full_X_c; - *yuv2packed2 = yuv2rgb24_full_2_c; - *yuv2packed1 = yuv2rgb24_full_1_c; - break; - case AV_PIX_FMT_BGR24: - *yuv2packedX = yuv2bgr24_full_X_c; - *yuv2packed2 = yuv2bgr24_full_2_c; - *yuv2packed1 = yuv2bgr24_full_1_c; - break; - case AV_PIX_FMT_BGR4_BYTE: - *yuv2packedX = yuv2bgr4_byte_full_X_c; - *yuv2packed2 = yuv2bgr4_byte_full_2_c; - *yuv2packed1 = yuv2bgr4_byte_full_1_c; - break; - case AV_PIX_FMT_RGB4_BYTE: - *yuv2packedX = yuv2rgb4_byte_full_X_c; - *yuv2packed2 = yuv2rgb4_byte_full_2_c; - *yuv2packed1 = yuv2rgb4_byte_full_1_c; - break; - case AV_PIX_FMT_BGR8: - *yuv2packedX = yuv2bgr8_full_X_c; - *yuv2packed2 = yuv2bgr8_full_2_c; - *yuv2packed1 = yuv2bgr8_full_1_c; - break; - case AV_PIX_FMT_RGB8: - *yuv2packedX = yuv2rgb8_full_X_c; - *yuv2packed2 = yuv2rgb8_full_2_c; - *yuv2packed1 = yuv2rgb8_full_1_c; - break; - case AV_PIX_FMT_GBRP: - case AV_PIX_FMT_GBRP9BE: - case AV_PIX_FMT_GBRP9LE: - case AV_PIX_FMT_GBRP10BE: - case AV_PIX_FMT_GBRP10LE: - case AV_PIX_FMT_GBRP12BE: - case AV_PIX_FMT_GBRP12LE: - case AV_PIX_FMT_GBRP14BE: - case AV_PIX_FMT_GBRP14LE: - case AV_PIX_FMT_GBRP16BE: - case AV_PIX_FMT_GBRP16LE: - case AV_PIX_FMT_GBRAP: - *yuv2anyX = yuv2gbrp_full_X_c; - break; - } - if (!*yuv2packedX && !*yuv2anyX) - goto YUV_PACKED; - } else { - YUV_PACKED: - switch (dstFormat) { - case AV_PIX_FMT_RGBA64LE: -#if CONFIG_SWSCALE_ALPHA - if (c->alpPixBuf) { - *yuv2packed1 = yuv2rgba64le_1_c; - *yuv2packed2 = yuv2rgba64le_2_c; - *yuv2packedX = yuv2rgba64le_X_c; - } else -#endif /* CONFIG_SWSCALE_ALPHA */ - { - *yuv2packed1 = yuv2rgbx64le_1_c; - *yuv2packed2 = yuv2rgbx64le_2_c; - *yuv2packedX = yuv2rgbx64le_X_c; - } - break; - case AV_PIX_FMT_RGBA64BE: -#if CONFIG_SWSCALE_ALPHA - if (c->alpPixBuf) { - *yuv2packed1 = yuv2rgba64be_1_c; - *yuv2packed2 = yuv2rgba64be_2_c; - *yuv2packedX = yuv2rgba64be_X_c; - } else -#endif /* CONFIG_SWSCALE_ALPHA */ - { - *yuv2packed1 = yuv2rgbx64be_1_c; - *yuv2packed2 = yuv2rgbx64be_2_c; - *yuv2packedX = yuv2rgbx64be_X_c; - } - break; - case AV_PIX_FMT_RGB48LE: - *yuv2packed1 = yuv2rgb48le_1_c; - *yuv2packed2 = yuv2rgb48le_2_c; - *yuv2packedX = yuv2rgb48le_X_c; - break; - case AV_PIX_FMT_RGB48BE: - *yuv2packed1 = yuv2rgb48be_1_c; - *yuv2packed2 = yuv2rgb48be_2_c; - *yuv2packedX = yuv2rgb48be_X_c; - break; - case AV_PIX_FMT_BGR48LE: - *yuv2packed1 = yuv2bgr48le_1_c; - *yuv2packed2 = yuv2bgr48le_2_c; - *yuv2packedX = yuv2bgr48le_X_c; - break; - case AV_PIX_FMT_BGR48BE: - *yuv2packed1 = yuv2bgr48be_1_c; - *yuv2packed2 = yuv2bgr48be_2_c; - *yuv2packedX = yuv2bgr48be_X_c; - break; - case AV_PIX_FMT_RGB32: - case AV_PIX_FMT_BGR32: -#if CONFIG_SMALL - *yuv2packed1 = yuv2rgb32_1_c; - *yuv2packed2 = yuv2rgb32_2_c; - *yuv2packedX = yuv2rgb32_X_c; -#else -#if CONFIG_SWSCALE_ALPHA - if (c->alpPixBuf) { - *yuv2packed1 = yuv2rgba32_1_c; - *yuv2packed2 = yuv2rgba32_2_c; - *yuv2packedX = yuv2rgba32_X_c; - } else -#endif /* CONFIG_SWSCALE_ALPHA */ - { - *yuv2packed1 = yuv2rgbx32_1_c; - *yuv2packed2 = yuv2rgbx32_2_c; - *yuv2packedX = yuv2rgbx32_X_c; - } -#endif /* !CONFIG_SMALL */ - break; - case AV_PIX_FMT_RGB32_1: - case AV_PIX_FMT_BGR32_1: -#if CONFIG_SMALL - *yuv2packed1 = yuv2rgb32_1_1_c; - *yuv2packed2 = yuv2rgb32_1_2_c; - *yuv2packedX = yuv2rgb32_1_X_c; -#else -#if CONFIG_SWSCALE_ALPHA - if (c->alpPixBuf) { - *yuv2packed1 = yuv2rgba32_1_1_c; - *yuv2packed2 = yuv2rgba32_1_2_c; - *yuv2packedX = yuv2rgba32_1_X_c; - } else -#endif /* CONFIG_SWSCALE_ALPHA */ - { - *yuv2packed1 = yuv2rgbx32_1_1_c; - *yuv2packed2 = yuv2rgbx32_1_2_c; - *yuv2packedX = yuv2rgbx32_1_X_c; - } -#endif /* !CONFIG_SMALL */ - break; - case AV_PIX_FMT_RGB24: - *yuv2packed1 = yuv2rgb24_1_c; - *yuv2packed2 = yuv2rgb24_2_c; - *yuv2packedX = yuv2rgb24_X_c; - break; - case AV_PIX_FMT_BGR24: - *yuv2packed1 = yuv2bgr24_1_c; - *yuv2packed2 = yuv2bgr24_2_c; - *yuv2packedX = yuv2bgr24_X_c; - break; - case AV_PIX_FMT_RGB565LE: - case AV_PIX_FMT_RGB565BE: - case AV_PIX_FMT_BGR565LE: - case AV_PIX_FMT_BGR565BE: - *yuv2packed1 = yuv2rgb16_1_c; - *yuv2packed2 = yuv2rgb16_2_c; - *yuv2packedX = yuv2rgb16_X_c; - break; - case AV_PIX_FMT_RGB555LE: - case AV_PIX_FMT_RGB555BE: - case AV_PIX_FMT_BGR555LE: - case AV_PIX_FMT_BGR555BE: - *yuv2packed1 = yuv2rgb15_1_c; - *yuv2packed2 = yuv2rgb15_2_c; - *yuv2packedX = yuv2rgb15_X_c; - break; - case AV_PIX_FMT_RGB444LE: - case AV_PIX_FMT_RGB444BE: - case AV_PIX_FMT_BGR444LE: - case AV_PIX_FMT_BGR444BE: - *yuv2packed1 = yuv2rgb12_1_c; - *yuv2packed2 = yuv2rgb12_2_c; - *yuv2packedX = yuv2rgb12_X_c; - break; - case AV_PIX_FMT_RGB8: - case AV_PIX_FMT_BGR8: - *yuv2packed1 = yuv2rgb8_1_c; - *yuv2packed2 = yuv2rgb8_2_c; - *yuv2packedX = yuv2rgb8_X_c; - break; - case AV_PIX_FMT_RGB4: - case AV_PIX_FMT_BGR4: - *yuv2packed1 = yuv2rgb4_1_c; - *yuv2packed2 = yuv2rgb4_2_c; - *yuv2packedX = yuv2rgb4_X_c; - break; - case AV_PIX_FMT_RGB4_BYTE: - case AV_PIX_FMT_BGR4_BYTE: - *yuv2packed1 = yuv2rgb4b_1_c; - *yuv2packed2 = yuv2rgb4b_2_c; - *yuv2packedX = yuv2rgb4b_X_c; - break; - } - } - switch (dstFormat) { - case AV_PIX_FMT_MONOWHITE: - *yuv2packed1 = yuv2monowhite_1_c; - *yuv2packed2 = yuv2monowhite_2_c; - *yuv2packedX = yuv2monowhite_X_c; - break; - case AV_PIX_FMT_MONOBLACK: - *yuv2packed1 = yuv2monoblack_1_c; - *yuv2packed2 = yuv2monoblack_2_c; - *yuv2packedX = yuv2monoblack_X_c; - break; - case AV_PIX_FMT_YUYV422: - *yuv2packed1 = yuv2yuyv422_1_c; - *yuv2packed2 = yuv2yuyv422_2_c; - *yuv2packedX = yuv2yuyv422_X_c; - break; - case AV_PIX_FMT_UYVY422: - *yuv2packed1 = yuv2uyvy422_1_c; - *yuv2packed2 = yuv2uyvy422_2_c; - *yuv2packedX = yuv2uyvy422_X_c; - break; - } -} diff --git a/ffmpeg/libswscale/ppc/Makefile b/ffmpeg/libswscale/ppc/Makefile deleted file mode 100644 index d1b596e..0000000 --- a/ffmpeg/libswscale/ppc/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -OBJS += ppc/swscale_altivec.o \ - ppc/yuv2rgb_altivec.o \ - ppc/yuv2yuv_altivec.o \ diff --git a/ffmpeg/libswscale/ppc/swscale_altivec.c b/ffmpeg/libswscale/ppc/swscale_altivec.c deleted file mode 100644 index 86f40ab..0000000 --- a/ffmpeg/libswscale/ppc/swscale_altivec.c +++ /dev/null @@ -1,332 +0,0 @@ -/* - * AltiVec-enhanced yuv2yuvX - * - * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> - * based on the equivalent C code in swscale.c - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <inttypes.h> - -#include "config.h" -#include "libswscale/swscale.h" -#include "libswscale/swscale_internal.h" -#include "libavutil/attributes.h" -#include "libavutil/cpu.h" -#include "yuv2rgb_altivec.h" - -#if HAVE_ALTIVEC -#define vzero vec_splat_s32(0) - -#define yuv2planeX_8(d1, d2, l1, src, x, perm, filter) do { \ - vector signed short l2 = vec_ld(((x) << 1) + 16, src); \ - vector signed short ls = vec_perm(l1, l2, perm); \ - vector signed int i1 = vec_mule(filter, ls); \ - vector signed int i2 = vec_mulo(filter, ls); \ - vector signed int vf1 = vec_mergeh(i1, i2); \ - vector signed int vf2 = vec_mergel(i1, i2); \ - d1 = vec_add(d1, vf1); \ - d2 = vec_add(d2, vf2); \ - l1 = l2; \ - } while (0) - -static void yuv2planeX_16_altivec(const int16_t *filter, int filterSize, - const int16_t **src, uint8_t *dest, - const uint8_t *dither, int offset, int x) -{ - register int i, j; - DECLARE_ALIGNED(16, int, val)[16]; - vector signed int vo1, vo2, vo3, vo4; - vector unsigned short vs1, vs2; - vector unsigned char vf; - vector unsigned int altivec_vectorShiftInt19 = - vec_add(vec_splat_u32(10), vec_splat_u32(9)); - - for (i = 0; i < 16; i++) - val[i] = dither[(x + i + offset) & 7] << 12; - - vo1 = vec_ld(0, val); - vo2 = vec_ld(16, val); - vo3 = vec_ld(32, val); - vo4 = vec_ld(48, val); - - for (j = 0; j < filterSize; j++) { - vector signed short l1, vLumFilter = vec_ld(j << 1, filter); - vector unsigned char perm, perm0 = vec_lvsl(j << 1, filter); - vLumFilter = vec_perm(vLumFilter, vLumFilter, perm0); - vLumFilter = vec_splat(vLumFilter, 0); // lumFilter[j] is loaded 8 times in vLumFilter - - perm = vec_lvsl(x << 1, src[j]); - l1 = vec_ld(x << 1, src[j]); - - yuv2planeX_8(vo1, vo2, l1, src[j], x, perm, vLumFilter); - yuv2planeX_8(vo3, vo4, l1, src[j], x + 8, perm, vLumFilter); - } - - vo1 = vec_sra(vo1, altivec_vectorShiftInt19); - vo2 = vec_sra(vo2, altivec_vectorShiftInt19); - vo3 = vec_sra(vo3, altivec_vectorShiftInt19); - vo4 = vec_sra(vo4, altivec_vectorShiftInt19); - vs1 = vec_packsu(vo1, vo2); - vs2 = vec_packsu(vo3, vo4); - vf = vec_packsu(vs1, vs2); - vec_st(vf, 0, dest); -} - -static inline void yuv2planeX_u(const int16_t *filter, int filterSize, - const int16_t **src, uint8_t *dest, int dstW, - const uint8_t *dither, int offset, int x) -{ - int i, j; - - for (i = x; i < dstW; i++) { - int t = dither[(i + offset) & 7] << 12; - for (j = 0; j < filterSize; j++) - t += src[j][i] * filter[j]; - dest[i] = av_clip_uint8(t >> 19); - } -} - -static void yuv2planeX_altivec(const int16_t *filter, int filterSize, - const int16_t **src, uint8_t *dest, int dstW, - const uint8_t *dither, int offset) -{ - int dst_u = -(uintptr_t)dest & 15; - int i; - - yuv2planeX_u(filter, filterSize, src, dest, dst_u, dither, offset, 0); - - for (i = dst_u; i < dstW - 15; i += 16) - yuv2planeX_16_altivec(filter, filterSize, src, dest + i, dither, - offset, i); - - yuv2planeX_u(filter, filterSize, src, dest, dstW, dither, offset, i); -} - -static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW, - const uint8_t *src, const int16_t *filter, - const int32_t *filterPos, int filterSize) -{ - register int i; - DECLARE_ALIGNED(16, int, tempo)[4]; - - if (filterSize % 4) { - for (i = 0; i < dstW; i++) { - register int j; - register int srcPos = filterPos[i]; - register int val = 0; - for (j = 0; j < filterSize; j++) - val += ((int)src[srcPos + j]) * filter[filterSize * i + j]; - dst[i] = FFMIN(val >> 7, (1 << 15) - 1); - } - } else - switch (filterSize) { - case 4: - for (i = 0; i < dstW; i++) { - register int srcPos = filterPos[i]; - - vector unsigned char src_v0 = vec_ld(srcPos, src); - vector unsigned char src_v1, src_vF; - vector signed short src_v, filter_v; - vector signed int val_vEven, val_s; - if ((((uintptr_t)src + srcPos) % 16) > 12) { - src_v1 = vec_ld(srcPos + 16, src); - } - src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src)); - - src_v = // vec_unpackh sign-extends... - (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF)); - // now put our elements in the even slots - src_v = vec_mergeh(src_v, (vector signed short)vzero); - - filter_v = vec_ld(i << 3, filter); - // The 3 above is 2 (filterSize == 4) + 1 (sizeof(short) == 2). - - // The neat trick: We only care for half the elements, - // high or low depending on (i<<3)%16 (it's 0 or 8 here), - // and we're going to use vec_mule, so we choose - // carefully how to "unpack" the elements into the even slots. - if ((i << 3) % 16) - filter_v = vec_mergel(filter_v, (vector signed short)vzero); - else - filter_v = vec_mergeh(filter_v, (vector signed short)vzero); - - val_vEven = vec_mule(src_v, filter_v); - val_s = vec_sums(val_vEven, vzero); - vec_st(val_s, 0, tempo); - dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); - } - break; - - case 8: - for (i = 0; i < dstW; i++) { - register int srcPos = filterPos[i]; - - vector unsigned char src_v0 = vec_ld(srcPos, src); - vector unsigned char src_v1, src_vF; - vector signed short src_v, filter_v; - vector signed int val_v, val_s; - if ((((uintptr_t)src + srcPos) % 16) > 8) { - src_v1 = vec_ld(srcPos + 16, src); - } - src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src)); - - src_v = // vec_unpackh sign-extends... - (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF)); - filter_v = vec_ld(i << 4, filter); - // the 4 above is 3 (filterSize == 8) + 1 (sizeof(short) == 2) - - val_v = vec_msums(src_v, filter_v, (vector signed int)vzero); - val_s = vec_sums(val_v, vzero); - vec_st(val_s, 0, tempo); - dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); - } - break; - - case 16: - for (i = 0; i < dstW; i++) { - register int srcPos = filterPos[i]; - - vector unsigned char src_v0 = vec_ld(srcPos, src); - vector unsigned char src_v1 = vec_ld(srcPos + 16, src); - vector unsigned char src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src)); - - vector signed short src_vA = // vec_unpackh sign-extends... - (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF)); - vector signed short src_vB = // vec_unpackh sign-extends... - (vector signed short)(vec_mergel((vector unsigned char)vzero, src_vF)); - - vector signed short filter_v0 = vec_ld(i << 5, filter); - vector signed short filter_v1 = vec_ld((i << 5) + 16, filter); - // the 5 above are 4 (filterSize == 16) + 1 (sizeof(short) == 2) - - vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero); - vector signed int val_v = vec_msums(src_vB, filter_v1, val_acc); - - vector signed int val_s = vec_sums(val_v, vzero); - - vec_st(val_s, 0, tempo); - dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); - } - break; - - default: - for (i = 0; i < dstW; i++) { - register int j; - register int srcPos = filterPos[i]; - - vector signed int val_s, val_v = (vector signed int)vzero; - vector signed short filter_v0R = vec_ld(i * 2 * filterSize, filter); - vector unsigned char permF = vec_lvsl((i * 2 * filterSize), filter); - - vector unsigned char src_v0 = vec_ld(srcPos, src); - vector unsigned char permS = vec_lvsl(srcPos, src); - - for (j = 0; j < filterSize - 15; j += 16) { - vector unsigned char src_v1 = vec_ld(srcPos + j + 16, src); - vector unsigned char src_vF = vec_perm(src_v0, src_v1, permS); - - vector signed short src_vA = // vec_unpackh sign-extends... - (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF)); - vector signed short src_vB = // vec_unpackh sign-extends... - (vector signed short)(vec_mergel((vector unsigned char)vzero, src_vF)); - - vector signed short filter_v1R = vec_ld((i * 2 * filterSize) + (j * 2) + 16, filter); - vector signed short filter_v2R = vec_ld((i * 2 * filterSize) + (j * 2) + 32, filter); - vector signed short filter_v0 = vec_perm(filter_v0R, filter_v1R, permF); - vector signed short filter_v1 = vec_perm(filter_v1R, filter_v2R, permF); - - vector signed int val_acc = vec_msums(src_vA, filter_v0, val_v); - val_v = vec_msums(src_vB, filter_v1, val_acc); - - filter_v0R = filter_v2R; - src_v0 = src_v1; - } - - if (j < filterSize - 7) { - // loading src_v0 is useless, it's already done above - // vector unsigned char src_v0 = vec_ld(srcPos + j, src); - vector unsigned char src_v1, src_vF; - vector signed short src_v, filter_v1R, filter_v; - if ((((uintptr_t)src + srcPos) % 16) > 8) { - src_v1 = vec_ld(srcPos + j + 16, src); - } - src_vF = vec_perm(src_v0, src_v1, permS); - - src_v = // vec_unpackh sign-extends... - (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF)); - // loading filter_v0R is useless, it's already done above - // vector signed short filter_v0R = vec_ld((i * 2 * filterSize) + j, filter); - filter_v1R = vec_ld((i * 2 * filterSize) + (j * 2) + 16, filter); - filter_v = vec_perm(filter_v0R, filter_v1R, permF); - - val_v = vec_msums(src_v, filter_v, val_v); - } - - val_s = vec_sums(val_v, vzero); - - vec_st(val_s, 0, tempo); - dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); - } - } -} -#endif /* HAVE_ALTIVEC */ - -av_cold void ff_sws_init_swscale_ppc(SwsContext *c) -{ -#if HAVE_ALTIVEC - enum AVPixelFormat dstFormat = c->dstFormat; - - if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) - return; - - if (c->srcBpc == 8 && c->dstBpc <= 14) { - c->hyScale = c->hcScale = hScale_altivec_real; - } - if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && - dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21 && - !c->alpPixBuf) { - c->yuv2planeX = yuv2planeX_altivec; - } - - /* The following list of supported dstFormat values should - * match what's found in the body of ff_yuv2packedX_altivec() */ - if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->alpPixBuf) { - switch (c->dstFormat) { - case AV_PIX_FMT_ABGR: - c->yuv2packedX = ff_yuv2abgr_X_altivec; - break; - case AV_PIX_FMT_BGRA: - c->yuv2packedX = ff_yuv2bgra_X_altivec; - break; - case AV_PIX_FMT_ARGB: - c->yuv2packedX = ff_yuv2argb_X_altivec; - break; - case AV_PIX_FMT_RGBA: - c->yuv2packedX = ff_yuv2rgba_X_altivec; - break; - case AV_PIX_FMT_BGR24: - c->yuv2packedX = ff_yuv2bgr24_X_altivec; - break; - case AV_PIX_FMT_RGB24: - c->yuv2packedX = ff_yuv2rgb24_X_altivec; - break; - } - } -#endif /* HAVE_ALTIVEC */ -} diff --git a/ffmpeg/libswscale/ppc/yuv2rgb_altivec.c b/ffmpeg/libswscale/ppc/yuv2rgb_altivec.c deleted file mode 100644 index 25282bf..0000000 --- a/ffmpeg/libswscale/ppc/yuv2rgb_altivec.c +++ /dev/null @@ -1,868 +0,0 @@ -/* - * AltiVec acceleration for colorspace conversion - * - * copyright (C) 2004 Marc Hoffman <marc.hoffman@analog.com> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -/* - * Convert I420 YV12 to RGB in various formats, - * it rejects images that are not in 420 formats, - * it rejects images that don't have widths of multiples of 16, - * it rejects images that don't have heights of multiples of 2. - * Reject defers to C simulation code. - * - * Lots of optimizations to be done here. - * - * 1. Need to fix saturation code. I just couldn't get it to fly with packs - * and adds, so we currently use max/min to clip. - * - * 2. The inefficient use of chroma loading needs a bit of brushing up. - * - * 3. Analysis of pipeline stalls needs to be done. Use shark to identify - * pipeline stalls. - * - * - * MODIFIED to calculate coeffs from currently selected color space. - * MODIFIED core to be a macro where you specify the output format. - * ADDED UYVY conversion which is never called due to some thing in swscale. - * CORRECTED algorithim selection to be strict on input formats. - * ADDED runtime detection of AltiVec. - * - * ADDED altivec_yuv2packedX vertical scl + RGB converter - * - * March 27,2004 - * PERFORMANCE ANALYSIS - * - * The C version uses 25% of the processor or ~250Mips for D1 video rawvideo - * used as test. - * The AltiVec version uses 10% of the processor or ~100Mips for D1 video - * same sequence. - * - * 720 * 480 * 30 ~10MPS - * - * so we have roughly 10 clocks per pixel. This is too high, something has - * to be wrong. - * - * OPTIMIZED clip codes to utilize vec_max and vec_packs removing the - * need for vec_min. - * - * OPTIMIZED DST OUTPUT cache/DMA controls. We are pretty much guaranteed to - * have the input video frame, it was just decompressed so it probably resides - * in L1 caches. However, we are creating the output video stream. This needs - * to use the DSTST instruction to optimize for the cache. We couple this with - * the fact that we are not going to be visiting the input buffer again so we - * mark it Least Recently Used. This shaves 25% of the processor cycles off. - * - * Now memcpy is the largest mips consumer in the system, probably due - * to the inefficient X11 stuff. - * - * GL libraries seem to be very slow on this machine 1.33Ghz PB running - * Jaguar, this is not the case for my 1Ghz PB. I thought it might be - * a versioning issue, however I have libGL.1.2.dylib for both - * machines. (We need to figure this out now.) - * - * GL2 libraries work now with patch for RGB32. - * - * NOTE: quartz vo driver ARGB32_to_RGB24 consumes 30% of the processor. - * - * Integrated luma prescaling adjustment for saturation/contrast/brightness - * adjustment. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <inttypes.h> -#include <assert.h> - -#include "config.h" -#include "libswscale/rgb2rgb.h" -#include "libswscale/swscale.h" -#include "libswscale/swscale_internal.h" -#include "libavutil/attributes.h" -#include "libavutil/cpu.h" -#include "libavutil/pixdesc.h" -#include "yuv2rgb_altivec.h" - -#if HAVE_ALTIVEC - -#undef PROFILE_THE_BEAST -#undef INC_SCALING - -typedef unsigned char ubyte; -typedef signed char sbyte; - -/* RGB interleaver, 16 planar pels 8-bit samples per channel in - * homogeneous vector registers x0,x1,x2 are interleaved with the - * following technique: - * - * o0 = vec_mergeh(x0, x1); - * o1 = vec_perm(o0, x2, perm_rgb_0); - * o2 = vec_perm(o0, x2, perm_rgb_1); - * o3 = vec_mergel(x0, x1); - * o4 = vec_perm(o3, o2, perm_rgb_2); - * o5 = vec_perm(o3, o2, perm_rgb_3); - * - * perm_rgb_0: o0(RG).h v1(B) --> o1* - * 0 1 2 3 4 - * rgbr|gbrg|brgb|rgbr - * 0010 0100 1001 0010 - * 0102 3145 2673 894A - * - * perm_rgb_1: o0(RG).h v1(B) --> o2 - * 0 1 2 3 4 - * gbrg|brgb|bbbb|bbbb - * 0100 1001 1111 1111 - * B5CD 6EF7 89AB CDEF - * - * perm_rgb_2: o3(RG).l o2(rgbB.l) --> o4* - * 0 1 2 3 4 - * gbrg|brgb|rgbr|gbrg - * 1111 1111 0010 0100 - * 89AB CDEF 0182 3945 - * - * perm_rgb_2: o3(RG).l o2(rgbB.l) ---> o5* - * 0 1 2 3 4 - * brgb|rgbr|gbrg|brgb - * 1001 0010 0100 1001 - * a67b 89cA BdCD eEFf - * - */ -static const vector unsigned char - perm_rgb_0 = { 0x00, 0x01, 0x10, 0x02, 0x03, 0x11, 0x04, 0x05, - 0x12, 0x06, 0x07, 0x13, 0x08, 0x09, 0x14, 0x0a }, - perm_rgb_1 = { 0x0b, 0x15, 0x0c, 0x0d, 0x16, 0x0e, 0x0f, 0x17, - 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }, - perm_rgb_2 = { 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - 0x00, 0x01, 0x18, 0x02, 0x03, 0x19, 0x04, 0x05 }, - perm_rgb_3 = { 0x1a, 0x06, 0x07, 0x1b, 0x08, 0x09, 0x1c, 0x0a, - 0x0b, 0x1d, 0x0c, 0x0d, 0x1e, 0x0e, 0x0f, 0x1f }; - -#define vec_merge3(x2, x1, x0, y0, y1, y2) \ - do { \ - __typeof__(x0) o0, o2, o3; \ - o0 = vec_mergeh(x0, x1); \ - y0 = vec_perm(o0, x2, perm_rgb_0); \ - o2 = vec_perm(o0, x2, perm_rgb_1); \ - o3 = vec_mergel(x0, x1); \ - y1 = vec_perm(o3, o2, perm_rgb_2); \ - y2 = vec_perm(o3, o2, perm_rgb_3); \ - } while (0) - -#define vec_mstbgr24(x0, x1, x2, ptr) \ - do { \ - __typeof__(x0) _0, _1, _2; \ - vec_merge3(x0, x1, x2, _0, _1, _2); \ - vec_st(_0, 0, ptr++); \ - vec_st(_1, 0, ptr++); \ - vec_st(_2, 0, ptr++); \ - } while (0) - -#define vec_mstrgb24(x0, x1, x2, ptr) \ - do { \ - __typeof__(x0) _0, _1, _2; \ - vec_merge3(x2, x1, x0, _0, _1, _2); \ - vec_st(_0, 0, ptr++); \ - vec_st(_1, 0, ptr++); \ - vec_st(_2, 0, ptr++); \ - } while (0) - -/* pack the pixels in rgb0 format - * msb R - * lsb 0 - */ -#define vec_mstrgb32(T, x0, x1, x2, x3, ptr) \ - do { \ - T _0, _1, _2, _3; \ - _0 = vec_mergeh(x0, x1); \ - _1 = vec_mergeh(x2, x3); \ - _2 = (T) vec_mergeh((vector unsigned short) _0, \ - (vector unsigned short) _1); \ - _3 = (T) vec_mergel((vector unsigned short) _0, \ - (vector unsigned short) _1); \ - vec_st(_2, 0 * 16, (T *) ptr); \ - vec_st(_3, 1 * 16, (T *) ptr); \ - _0 = vec_mergel(x0, x1); \ - _1 = vec_mergel(x2, x3); \ - _2 = (T) vec_mergeh((vector unsigned short) _0, \ - (vector unsigned short) _1); \ - _3 = (T) vec_mergel((vector unsigned short) _0, \ - (vector unsigned short) _1); \ - vec_st(_2, 2 * 16, (T *) ptr); \ - vec_st(_3, 3 * 16, (T *) ptr); \ - ptr += 4; \ - } while (0) - -/* - * 1 0 1.4021 | | Y | - * 1 -0.3441 -0.7142 |x| Cb| - * 1 1.7718 0 | | Cr| - * - * - * Y: [-128 127] - * Cb/Cr : [-128 127] - * - * typical YUV conversion works on Y: 0-255 this version has been - * optimized for JPEG decoding. - */ - -#define vec_unh(x) \ - (vector signed short) \ - vec_perm(x, (__typeof__(x)) { 0 }, \ - ((vector unsigned char) { \ - 0x10, 0x00, 0x10, 0x01, 0x10, 0x02, 0x10, 0x03, \ - 0x10, 0x04, 0x10, 0x05, 0x10, 0x06, 0x10, 0x07 })) - -#define vec_unl(x) \ - (vector signed short) \ - vec_perm(x, (__typeof__(x)) { 0 }, \ - ((vector unsigned char) { \ - 0x10, 0x08, 0x10, 0x09, 0x10, 0x0A, 0x10, 0x0B, \ - 0x10, 0x0C, 0x10, 0x0D, 0x10, 0x0E, 0x10, 0x0F })) - -#define vec_clip_s16(x) \ - vec_max(vec_min(x, ((vector signed short) { \ - 235, 235, 235, 235, 235, 235, 235, 235 })), \ - ((vector signed short) { 16, 16, 16, 16, 16, 16, 16, 16 })) - -#define vec_packclp(x, y) \ - (vector unsigned char) \ - vec_packs((vector unsigned short) \ - vec_max(x, ((vector signed short) { 0 })), \ - (vector unsigned short) \ - vec_max(y, ((vector signed short) { 0 }))) - -static inline void cvtyuvtoRGB(SwsContext *c, vector signed short Y, - vector signed short U, vector signed short V, - vector signed short *R, vector signed short *G, - vector signed short *B) -{ - vector signed short vx, ux, uvx; - - Y = vec_mradds(Y, c->CY, c->OY); - U = vec_sub(U, (vector signed short) - vec_splat((vector signed short) { 128 }, 0)); - V = vec_sub(V, (vector signed short) - vec_splat((vector signed short) { 128 }, 0)); - - // ux = (CBU * (u << c->CSHIFT) + 0x4000) >> 15; - ux = vec_sl(U, c->CSHIFT); - *B = vec_mradds(ux, c->CBU, Y); - - // vx = (CRV * (v << c->CSHIFT) + 0x4000) >> 15; - vx = vec_sl(V, c->CSHIFT); - *R = vec_mradds(vx, c->CRV, Y); - - // uvx = ((CGU * u) + (CGV * v)) >> 15; - uvx = vec_mradds(U, c->CGU, Y); - *G = vec_mradds(V, c->CGV, uvx); -} - -/* - * ------------------------------------------------------------------------------ - * CS converters - * ------------------------------------------------------------------------------ - */ - -#define DEFCSP420_CVT(name, out_pixels) \ -static int altivec_ ## name(SwsContext *c, const unsigned char **in, \ - int *instrides, int srcSliceY, int srcSliceH, \ - unsigned char **oplanes, int *outstrides) \ -{ \ - int w = c->srcW; \ - int h = srcSliceH; \ - int i, j; \ - int instrides_scl[3]; \ - vector unsigned char y0, y1; \ - \ - vector signed char u, v; \ - \ - vector signed short Y0, Y1, Y2, Y3; \ - vector signed short U, V; \ - vector signed short vx, ux, uvx; \ - vector signed short vx0, ux0, uvx0; \ - vector signed short vx1, ux1, uvx1; \ - vector signed short R0, G0, B0; \ - vector signed short R1, G1, B1; \ - vector unsigned char R, G, B; \ - \ - const vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP; \ - vector unsigned char align_perm; \ - \ - vector signed short lCY = c->CY; \ - vector signed short lOY = c->OY; \ - vector signed short lCRV = c->CRV; \ - vector signed short lCBU = c->CBU; \ - vector signed short lCGU = c->CGU; \ - vector signed short lCGV = c->CGV; \ - vector unsigned short lCSHIFT = c->CSHIFT; \ - \ - const ubyte *y1i = in[0]; \ - const ubyte *y2i = in[0] + instrides[0]; \ - const ubyte *ui = in[1]; \ - const ubyte *vi = in[2]; \ - \ - vector unsigned char *oute, *outo; \ - \ - /* loop moves y{1, 2}i by w */ \ - instrides_scl[0] = instrides[0] * 2 - w; \ - /* loop moves ui by w / 2 */ \ - instrides_scl[1] = instrides[1] - w / 2; \ - /* loop moves vi by w / 2 */ \ - instrides_scl[2] = instrides[2] - w / 2; \ - \ - for (i = 0; i < h / 2; i++) { \ - oute = (vector unsigned char *)(oplanes[0] + outstrides[0] * \ - (srcSliceY + i * 2)); \ - outo = oute + (outstrides[0] >> 4); \ - vec_dstst(outo, (0x02000002 | (((w * 3 + 32) / 32) << 16)), 0); \ - vec_dstst(oute, (0x02000002 | (((w * 3 + 32) / 32) << 16)), 1); \ - \ - for (j = 0; j < w / 16; j++) { \ - y1ivP = (const vector unsigned char *) y1i; \ - y2ivP = (const vector unsigned char *) y2i; \ - uivP = (const vector unsigned char *) ui; \ - vivP = (const vector unsigned char *) vi; \ - \ - align_perm = vec_lvsl(0, y1i); \ - y0 = (vector unsigned char) \ - vec_perm(y1ivP[0], y1ivP[1], align_perm); \ - \ - align_perm = vec_lvsl(0, y2i); \ - y1 = (vector unsigned char) \ - vec_perm(y2ivP[0], y2ivP[1], align_perm); \ - \ - align_perm = vec_lvsl(0, ui); \ - u = (vector signed char) \ - vec_perm(uivP[0], uivP[1], align_perm); \ - \ - align_perm = vec_lvsl(0, vi); \ - v = (vector signed char) \ - vec_perm(vivP[0], vivP[1], align_perm); \ - \ - u = (vector signed char) \ - vec_sub(u, \ - (vector signed char) \ - vec_splat((vector signed char) { 128 }, 0)); \ - v = (vector signed char) \ - vec_sub(v, \ - (vector signed char) \ - vec_splat((vector signed char) { 128 }, 0)); \ - \ - U = vec_unpackh(u); \ - V = vec_unpackh(v); \ - \ - Y0 = vec_unh(y0); \ - Y1 = vec_unl(y0); \ - Y2 = vec_unh(y1); \ - Y3 = vec_unl(y1); \ - \ - Y0 = vec_mradds(Y0, lCY, lOY); \ - Y1 = vec_mradds(Y1, lCY, lOY); \ - Y2 = vec_mradds(Y2, lCY, lOY); \ - Y3 = vec_mradds(Y3, lCY, lOY); \ - \ - /* ux = (CBU * (u << CSHIFT) + 0x4000) >> 15 */ \ - ux = vec_sl(U, lCSHIFT); \ - ux = vec_mradds(ux, lCBU, (vector signed short) { 0 }); \ - ux0 = vec_mergeh(ux, ux); \ - ux1 = vec_mergel(ux, ux); \ - \ - /* vx = (CRV * (v << CSHIFT) + 0x4000) >> 15; */ \ - vx = vec_sl(V, lCSHIFT); \ - vx = vec_mradds(vx, lCRV, (vector signed short) { 0 }); \ - vx0 = vec_mergeh(vx, vx); \ - vx1 = vec_mergel(vx, vx); \ - \ - /* uvx = ((CGU * u) + (CGV * v)) >> 15 */ \ - uvx = vec_mradds(U, lCGU, (vector signed short) { 0 }); \ - uvx = vec_mradds(V, lCGV, uvx); \ - uvx0 = vec_mergeh(uvx, uvx); \ - uvx1 = vec_mergel(uvx, uvx); \ - \ - R0 = vec_add(Y0, vx0); \ - G0 = vec_add(Y0, uvx0); \ - B0 = vec_add(Y0, ux0); \ - R1 = vec_add(Y1, vx1); \ - G1 = vec_add(Y1, uvx1); \ - B1 = vec_add(Y1, ux1); \ - \ - R = vec_packclp(R0, R1); \ - G = vec_packclp(G0, G1); \ - B = vec_packclp(B0, B1); \ - \ - out_pixels(R, G, B, oute); \ - \ - R0 = vec_add(Y2, vx0); \ - G0 = vec_add(Y2, uvx0); \ - B0 = vec_add(Y2, ux0); \ - R1 = vec_add(Y3, vx1); \ - G1 = vec_add(Y3, uvx1); \ - B1 = vec_add(Y3, ux1); \ - R = vec_packclp(R0, R1); \ - G = vec_packclp(G0, G1); \ - B = vec_packclp(B0, B1); \ - \ - \ - out_pixels(R, G, B, outo); \ - \ - y1i += 16; \ - y2i += 16; \ - ui += 8; \ - vi += 8; \ - } \ - \ - ui += instrides_scl[1]; \ - vi += instrides_scl[2]; \ - y1i += instrides_scl[0]; \ - y2i += instrides_scl[0]; \ - } \ - return srcSliceH; \ -} - -#define out_abgr(a, b, c, ptr) \ - vec_mstrgb32(__typeof__(a), ((__typeof__(a)) { 255 }), c, b, a, ptr) -#define out_bgra(a, b, c, ptr) \ - vec_mstrgb32(__typeof__(a), c, b, a, ((__typeof__(a)) { 255 }), ptr) -#define out_rgba(a, b, c, ptr) \ - vec_mstrgb32(__typeof__(a), a, b, c, ((__typeof__(a)) { 255 }), ptr) -#define out_argb(a, b, c, ptr) \ - vec_mstrgb32(__typeof__(a), ((__typeof__(a)) { 255 }), a, b, c, ptr) -#define out_rgb24(a, b, c, ptr) vec_mstrgb24(a, b, c, ptr) -#define out_bgr24(a, b, c, ptr) vec_mstbgr24(a, b, c, ptr) - -DEFCSP420_CVT(yuv2_abgr, out_abgr) -DEFCSP420_CVT(yuv2_bgra, out_bgra) -DEFCSP420_CVT(yuv2_rgba, out_rgba) -DEFCSP420_CVT(yuv2_argb, out_argb) -DEFCSP420_CVT(yuv2_rgb24, out_rgb24) -DEFCSP420_CVT(yuv2_bgr24, out_bgr24) - -// uyvy|uyvy|uyvy|uyvy -// 0123 4567 89ab cdef -static const vector unsigned char - demux_u = { 0x10, 0x00, 0x10, 0x00, - 0x10, 0x04, 0x10, 0x04, - 0x10, 0x08, 0x10, 0x08, - 0x10, 0x0c, 0x10, 0x0c }, - demux_v = { 0x10, 0x02, 0x10, 0x02, - 0x10, 0x06, 0x10, 0x06, - 0x10, 0x0A, 0x10, 0x0A, - 0x10, 0x0E, 0x10, 0x0E }, - demux_y = { 0x10, 0x01, 0x10, 0x03, - 0x10, 0x05, 0x10, 0x07, - 0x10, 0x09, 0x10, 0x0B, - 0x10, 0x0D, 0x10, 0x0F }; - -/* - * this is so I can play live CCIR raw video - */ -static int altivec_uyvy_rgb32(SwsContext *c, const unsigned char **in, - int *instrides, int srcSliceY, int srcSliceH, - unsigned char **oplanes, int *outstrides) -{ - int w = c->srcW; - int h = srcSliceH; - int i, j; - vector unsigned char uyvy; - vector signed short Y, U, V; - vector signed short R0, G0, B0, R1, G1, B1; - vector unsigned char R, G, B; - vector unsigned char *out; - const ubyte *img; - - img = in[0]; - out = (vector unsigned char *) (oplanes[0] + srcSliceY * outstrides[0]); - - for (i = 0; i < h; i++) - for (j = 0; j < w / 16; j++) { - uyvy = vec_ld(0, img); - - U = (vector signed short) - vec_perm(uyvy, (vector unsigned char) { 0 }, demux_u); - V = (vector signed short) - vec_perm(uyvy, (vector unsigned char) { 0 }, demux_v); - Y = (vector signed short) - vec_perm(uyvy, (vector unsigned char) { 0 }, demux_y); - - cvtyuvtoRGB(c, Y, U, V, &R0, &G0, &B0); - - uyvy = vec_ld(16, img); - - U = (vector signed short) - vec_perm(uyvy, (vector unsigned char) { 0 }, demux_u); - V = (vector signed short) - vec_perm(uyvy, (vector unsigned char) { 0 }, demux_v); - Y = (vector signed short) - vec_perm(uyvy, (vector unsigned char) { 0 }, demux_y); - - cvtyuvtoRGB(c, Y, U, V, &R1, &G1, &B1); - - R = vec_packclp(R0, R1); - G = vec_packclp(G0, G1); - B = vec_packclp(B0, B1); - - // vec_mstbgr24 (R,G,B, out); - out_rgba(R, G, B, out); - - img += 32; - } - return srcSliceH; -} - -#endif /* HAVE_ALTIVEC */ - -/* Ok currently the acceleration routine only supports - * inputs of widths a multiple of 16 - * and heights a multiple 2 - * - * So we just fall back to the C codes for this. - */ -av_cold SwsFunc ff_yuv2rgb_init_ppc(SwsContext *c) -{ -#if HAVE_ALTIVEC - if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) - return NULL; - - /* - * and this seems not to matter too much I tried a bunch of - * videos with abnormal widths and MPlayer crashes elsewhere. - * mplayer -vo x11 -rawvideo on:w=350:h=240 raw-350x240.eyuv - * boom with X11 bad match. - * - */ - if ((c->srcW & 0xf) != 0) - return NULL; - - switch (c->srcFormat) { - case AV_PIX_FMT_YUV410P: - case AV_PIX_FMT_YUV420P: - /*case IMGFMT_CLPL: ??? */ - case AV_PIX_FMT_GRAY8: - case AV_PIX_FMT_NV12: - case AV_PIX_FMT_NV21: - if ((c->srcH & 0x1) != 0) - return NULL; - - switch (c->dstFormat) { - case AV_PIX_FMT_RGB24: - av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGB24\n"); - return altivec_yuv2_rgb24; - case AV_PIX_FMT_BGR24: - av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGR24\n"); - return altivec_yuv2_bgr24; - case AV_PIX_FMT_ARGB: - av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ARGB\n"); - return altivec_yuv2_argb; - case AV_PIX_FMT_ABGR: - av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ABGR\n"); - return altivec_yuv2_abgr; - case AV_PIX_FMT_RGBA: - av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGBA\n"); - return altivec_yuv2_rgba; - case AV_PIX_FMT_BGRA: - av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGRA\n"); - return altivec_yuv2_bgra; - default: return NULL; - } - break; - - case AV_PIX_FMT_UYVY422: - switch (c->dstFormat) { - case AV_PIX_FMT_BGR32: - av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space UYVY -> RGB32\n"); - return altivec_uyvy_rgb32; - default: return NULL; - } - break; - } -#endif /* HAVE_ALTIVEC */ - - return NULL; -} - -av_cold void ff_yuv2rgb_init_tables_ppc(SwsContext *c, - const int inv_table[4], - int brightness, - int contrast, - int saturation) -{ -#if HAVE_ALTIVEC - union { - DECLARE_ALIGNED(16, signed short, tmp)[8]; - vector signed short vec; - } buf; - - if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) - return; - - buf.tmp[0] = ((0xffffLL) * contrast >> 8) >> 9; // cy - buf.tmp[1] = -256 * brightness; // oy - buf.tmp[2] = (inv_table[0] >> 3) * (contrast >> 16) * (saturation >> 16); // crv - buf.tmp[3] = (inv_table[1] >> 3) * (contrast >> 16) * (saturation >> 16); // cbu - buf.tmp[4] = -((inv_table[2] >> 1) * (contrast >> 16) * (saturation >> 16)); // cgu - buf.tmp[5] = -((inv_table[3] >> 1) * (contrast >> 16) * (saturation >> 16)); // cgv - - c->CSHIFT = (vector unsigned short) vec_splat_u16(2); - c->CY = vec_splat((vector signed short) buf.vec, 0); - c->OY = vec_splat((vector signed short) buf.vec, 1); - c->CRV = vec_splat((vector signed short) buf.vec, 2); - c->CBU = vec_splat((vector signed short) buf.vec, 3); - c->CGU = vec_splat((vector signed short) buf.vec, 4); - c->CGV = vec_splat((vector signed short) buf.vec, 5); - return; -#endif /* HAVE_ALTIVEC */ -} - -#if HAVE_ALTIVEC - -static av_always_inline void yuv2packedX_altivec(SwsContext *c, - const int16_t *lumFilter, - const int16_t **lumSrc, - int lumFilterSize, - const int16_t *chrFilter, - const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, - const int16_t **alpSrc, - uint8_t *dest, - int dstW, int dstY, - enum AVPixelFormat target) -{ - int i, j; - vector signed short X, X0, X1, Y0, U0, V0, Y1, U1, V1, U, V; - vector signed short R0, G0, B0, R1, G1, B1; - - vector unsigned char R, G, B; - vector unsigned char *out, *nout; - - vector signed short RND = vec_splat_s16(1 << 3); - vector unsigned short SCL = vec_splat_u16(4); - DECLARE_ALIGNED(16, unsigned int, scratch)[16]; - - vector signed short *YCoeffs, *CCoeffs; - - YCoeffs = c->vYCoeffsBank + dstY * lumFilterSize; - CCoeffs = c->vCCoeffsBank + dstY * chrFilterSize; - - out = (vector unsigned char *) dest; - - for (i = 0; i < dstW; i += 16) { - Y0 = RND; - Y1 = RND; - /* extract 16 coeffs from lumSrc */ - for (j = 0; j < lumFilterSize; j++) { - X0 = vec_ld(0, &lumSrc[j][i]); - X1 = vec_ld(16, &lumSrc[j][i]); - Y0 = vec_mradds(X0, YCoeffs[j], Y0); - Y1 = vec_mradds(X1, YCoeffs[j], Y1); - } - - U = RND; - V = RND; - /* extract 8 coeffs from U,V */ - for (j = 0; j < chrFilterSize; j++) { - X = vec_ld(0, &chrUSrc[j][i / 2]); - U = vec_mradds(X, CCoeffs[j], U); - X = vec_ld(0, &chrVSrc[j][i / 2]); - V = vec_mradds(X, CCoeffs[j], V); - } - - /* scale and clip signals */ - Y0 = vec_sra(Y0, SCL); - Y1 = vec_sra(Y1, SCL); - U = vec_sra(U, SCL); - V = vec_sra(V, SCL); - - Y0 = vec_clip_s16(Y0); - Y1 = vec_clip_s16(Y1); - U = vec_clip_s16(U); - V = vec_clip_s16(V); - - /* now we have - * Y0 = y0 y1 y2 y3 y4 y5 y6 y7 Y1 = y8 y9 y10 y11 y12 y13 y14 y15 - * U = u0 u1 u2 u3 u4 u5 u6 u7 V = v0 v1 v2 v3 v4 v5 v6 v7 - * - * Y0 = y0 y1 y2 y3 y4 y5 y6 y7 Y1 = y8 y9 y10 y11 y12 y13 y14 y15 - * U0 = u0 u0 u1 u1 u2 u2 u3 u3 U1 = u4 u4 u5 u5 u6 u6 u7 u7 - * V0 = v0 v0 v1 v1 v2 v2 v3 v3 V1 = v4 v4 v5 v5 v6 v6 v7 v7 - */ - - U0 = vec_mergeh(U, U); - V0 = vec_mergeh(V, V); - - U1 = vec_mergel(U, U); - V1 = vec_mergel(V, V); - - cvtyuvtoRGB(c, Y0, U0, V0, &R0, &G0, &B0); - cvtyuvtoRGB(c, Y1, U1, V1, &R1, &G1, &B1); - - R = vec_packclp(R0, R1); - G = vec_packclp(G0, G1); - B = vec_packclp(B0, B1); - - switch (target) { - case AV_PIX_FMT_ABGR: - out_abgr(R, G, B, out); - break; - case AV_PIX_FMT_BGRA: - out_bgra(R, G, B, out); - break; - case AV_PIX_FMT_RGBA: - out_rgba(R, G, B, out); - break; - case AV_PIX_FMT_ARGB: - out_argb(R, G, B, out); - break; - case AV_PIX_FMT_RGB24: - out_rgb24(R, G, B, out); - break; - case AV_PIX_FMT_BGR24: - out_bgr24(R, G, B, out); - break; - default: - { - /* If this is reached, the caller should have called yuv2packedXinC - * instead. */ - static int printed_error_message; - if (!printed_error_message) { - av_log(c, AV_LOG_ERROR, - "altivec_yuv2packedX doesn't support %s output\n", - av_get_pix_fmt_name(c->dstFormat)); - printed_error_message = 1; - } - return; - } - } - } - - if (i < dstW) { - i -= 16; - - Y0 = RND; - Y1 = RND; - /* extract 16 coeffs from lumSrc */ - for (j = 0; j < lumFilterSize; j++) { - X0 = vec_ld(0, &lumSrc[j][i]); - X1 = vec_ld(16, &lumSrc[j][i]); - Y0 = vec_mradds(X0, YCoeffs[j], Y0); - Y1 = vec_mradds(X1, YCoeffs[j], Y1); - } - - U = RND; - V = RND; - /* extract 8 coeffs from U,V */ - for (j = 0; j < chrFilterSize; j++) { - X = vec_ld(0, &chrUSrc[j][i / 2]); - U = vec_mradds(X, CCoeffs[j], U); - X = vec_ld(0, &chrVSrc[j][i / 2]); - V = vec_mradds(X, CCoeffs[j], V); - } - - /* scale and clip signals */ - Y0 = vec_sra(Y0, SCL); - Y1 = vec_sra(Y1, SCL); - U = vec_sra(U, SCL); - V = vec_sra(V, SCL); - - Y0 = vec_clip_s16(Y0); - Y1 = vec_clip_s16(Y1); - U = vec_clip_s16(U); - V = vec_clip_s16(V); - - /* now we have - * Y0 = y0 y1 y2 y3 y4 y5 y6 y7 Y1 = y8 y9 y10 y11 y12 y13 y14 y15 - * U = u0 u1 u2 u3 u4 u5 u6 u7 V = v0 v1 v2 v3 v4 v5 v6 v7 - * - * Y0 = y0 y1 y2 y3 y4 y5 y6 y7 Y1 = y8 y9 y10 y11 y12 y13 y14 y15 - * U0 = u0 u0 u1 u1 u2 u2 u3 u3 U1 = u4 u4 u5 u5 u6 u6 u7 u7 - * V0 = v0 v0 v1 v1 v2 v2 v3 v3 V1 = v4 v4 v5 v5 v6 v6 v7 v7 - */ - - U0 = vec_mergeh(U, U); - V0 = vec_mergeh(V, V); - - U1 = vec_mergel(U, U); - V1 = vec_mergel(V, V); - - cvtyuvtoRGB(c, Y0, U0, V0, &R0, &G0, &B0); - cvtyuvtoRGB(c, Y1, U1, V1, &R1, &G1, &B1); - - R = vec_packclp(R0, R1); - G = vec_packclp(G0, G1); - B = vec_packclp(B0, B1); - - nout = (vector unsigned char *) scratch; - switch (target) { - case AV_PIX_FMT_ABGR: - out_abgr(R, G, B, nout); - break; - case AV_PIX_FMT_BGRA: - out_bgra(R, G, B, nout); - break; - case AV_PIX_FMT_RGBA: - out_rgba(R, G, B, nout); - break; - case AV_PIX_FMT_ARGB: - out_argb(R, G, B, nout); - break; - case AV_PIX_FMT_RGB24: - out_rgb24(R, G, B, nout); - break; - case AV_PIX_FMT_BGR24: - out_bgr24(R, G, B, nout); - break; - default: - /* Unreachable, I think. */ - av_log(c, AV_LOG_ERROR, - "altivec_yuv2packedX doesn't support %s output\n", - av_get_pix_fmt_name(c->dstFormat)); - return; - } - - memcpy(&((uint32_t *) dest)[i], scratch, (dstW - i) / 4); - } -} - -#define YUV2PACKEDX_WRAPPER(suffix, pixfmt) \ -void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, \ - const int16_t *lumFilter, \ - const int16_t **lumSrc, \ - int lumFilterSize, \ - const int16_t *chrFilter, \ - const int16_t **chrUSrc, \ - const int16_t **chrVSrc, \ - int chrFilterSize, \ - const int16_t **alpSrc, \ - uint8_t *dest, int dstW, int dstY) \ -{ \ - yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize, \ - chrFilter, chrUSrc, chrVSrc, \ - chrFilterSize, alpSrc, \ - dest, dstW, dstY, pixfmt); \ -} - -YUV2PACKEDX_WRAPPER(abgr, AV_PIX_FMT_ABGR); -YUV2PACKEDX_WRAPPER(bgra, AV_PIX_FMT_BGRA); -YUV2PACKEDX_WRAPPER(argb, AV_PIX_FMT_ARGB); -YUV2PACKEDX_WRAPPER(rgba, AV_PIX_FMT_RGBA); -YUV2PACKEDX_WRAPPER(rgb24, AV_PIX_FMT_RGB24); -YUV2PACKEDX_WRAPPER(bgr24, AV_PIX_FMT_BGR24); - -#endif /* HAVE_ALTIVEC */ diff --git a/ffmpeg/libswscale/ppc/yuv2rgb_altivec.h b/ffmpeg/libswscale/ppc/yuv2rgb_altivec.h deleted file mode 100644 index aa52a47..0000000 --- a/ffmpeg/libswscale/ppc/yuv2rgb_altivec.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * AltiVec-enhanced yuv2yuvX - * - * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> - * based on the equivalent C code in swscale.c - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef SWSCALE_PPC_YUV2RGB_ALTIVEC_H -#define SWSCALE_PPC_YUV2RGB_ALTIVEC_H - -#include <stdint.h> - -#include "libswscale/swscale_internal.h" - -#define YUV2PACKEDX_HEADER(suffix) \ - void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, \ - const int16_t *lumFilter, \ - const int16_t **lumSrc, \ - int lumFilterSize, \ - const int16_t *chrFilter, \ - const int16_t **chrUSrc, \ - const int16_t **chrVSrc, \ - int chrFilterSize, \ - const int16_t **alpSrc, \ - uint8_t *dest, \ - int dstW, int dstY); - -YUV2PACKEDX_HEADER(abgr); -YUV2PACKEDX_HEADER(bgra); -YUV2PACKEDX_HEADER(argb); -YUV2PACKEDX_HEADER(rgba); -YUV2PACKEDX_HEADER(rgb24); -YUV2PACKEDX_HEADER(bgr24); - -#endif /* SWSCALE_PPC_YUV2RGB_ALTIVEC_H */ diff --git a/ffmpeg/libswscale/ppc/yuv2yuv_altivec.c b/ffmpeg/libswscale/ppc/yuv2yuv_altivec.c deleted file mode 100644 index 2b1c5dd..0000000 --- a/ffmpeg/libswscale/ppc/yuv2yuv_altivec.c +++ /dev/null @@ -1,204 +0,0 @@ -/* - * AltiVec-enhanced yuv-to-yuv conversion routines. - * - * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> - * based on the equivalent C code in swscale.c - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <inttypes.h> - -#include "config.h" -#include "libavutil/attributes.h" -#include "libavutil/cpu.h" -#include "libswscale/swscale.h" -#include "libswscale/swscale_internal.h" - -#if HAVE_ALTIVEC - -static int yv12toyuy2_unscaled_altivec(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, - int srcSliceH, uint8_t *dstParam[], - int dstStride_a[]) -{ - uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY; - // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, - // srcStride[0], srcStride[1], dstStride[0]); - const uint8_t *ysrc = src[0]; - const uint8_t *usrc = src[1]; - const uint8_t *vsrc = src[2]; - const int width = c->srcW; - const int height = srcSliceH; - const int lumStride = srcStride[0]; - const int chromStride = srcStride[1]; - const int dstStride = dstStride_a[0]; - const vector unsigned char yperm = vec_lvsl(0, ysrc); - const int vertLumPerChroma = 2; - register unsigned int y; - - /* This code assumes: - * - * 1) dst is 16 bytes-aligned - * 2) dstStride is a multiple of 16 - * 3) width is a multiple of 16 - * 4) lum & chrom stride are multiples of 8 - */ - - for (y = 0; y < height; y++) { - int i; - for (i = 0; i < width - 31; i += 32) { - const unsigned int j = i >> 1; - vector unsigned char v_yA = vec_ld(i, ysrc); - vector unsigned char v_yB = vec_ld(i + 16, ysrc); - vector unsigned char v_yC = vec_ld(i + 32, ysrc); - vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm); - vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm); - vector unsigned char v_uA = vec_ld(j, usrc); - vector unsigned char v_uB = vec_ld(j + 16, usrc); - vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc)); - vector unsigned char v_vA = vec_ld(j, vsrc); - vector unsigned char v_vB = vec_ld(j + 16, vsrc); - vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc)); - vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); - vector unsigned char v_uv_b = vec_mergel(v_u, v_v); - vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a); - vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a); - vector unsigned char v_yuy2_2 = vec_mergeh(v_y2, v_uv_b); - vector unsigned char v_yuy2_3 = vec_mergel(v_y2, v_uv_b); - vec_st(v_yuy2_0, (i << 1), dst); - vec_st(v_yuy2_1, (i << 1) + 16, dst); - vec_st(v_yuy2_2, (i << 1) + 32, dst); - vec_st(v_yuy2_3, (i << 1) + 48, dst); - } - if (i < width) { - const unsigned int j = i >> 1; - vector unsigned char v_y1 = vec_ld(i, ysrc); - vector unsigned char v_u = vec_ld(j, usrc); - vector unsigned char v_v = vec_ld(j, vsrc); - vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); - vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a); - vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a); - vec_st(v_yuy2_0, (i << 1), dst); - vec_st(v_yuy2_1, (i << 1) + 16, dst); - } - if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) { - usrc += chromStride; - vsrc += chromStride; - } - ysrc += lumStride; - dst += dstStride; - } - - return srcSliceH; -} - -static int yv12touyvy_unscaled_altivec(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, - int srcSliceH, uint8_t *dstParam[], - int dstStride_a[]) -{ - uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY; - // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, - // srcStride[0], srcStride[1], dstStride[0]); - const uint8_t *ysrc = src[0]; - const uint8_t *usrc = src[1]; - const uint8_t *vsrc = src[2]; - const int width = c->srcW; - const int height = srcSliceH; - const int lumStride = srcStride[0]; - const int chromStride = srcStride[1]; - const int dstStride = dstStride_a[0]; - const int vertLumPerChroma = 2; - const vector unsigned char yperm = vec_lvsl(0, ysrc); - register unsigned int y; - - /* This code assumes: - * - * 1) dst is 16 bytes-aligned - * 2) dstStride is a multiple of 16 - * 3) width is a multiple of 16 - * 4) lum & chrom stride are multiples of 8 - */ - - for (y = 0; y < height; y++) { - int i; - for (i = 0; i < width - 31; i += 32) { - const unsigned int j = i >> 1; - vector unsigned char v_yA = vec_ld(i, ysrc); - vector unsigned char v_yB = vec_ld(i + 16, ysrc); - vector unsigned char v_yC = vec_ld(i + 32, ysrc); - vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm); - vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm); - vector unsigned char v_uA = vec_ld(j, usrc); - vector unsigned char v_uB = vec_ld(j + 16, usrc); - vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc)); - vector unsigned char v_vA = vec_ld(j, vsrc); - vector unsigned char v_vB = vec_ld(j + 16, vsrc); - vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc)); - vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); - vector unsigned char v_uv_b = vec_mergel(v_u, v_v); - vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1); - vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1); - vector unsigned char v_uyvy_2 = vec_mergeh(v_uv_b, v_y2); - vector unsigned char v_uyvy_3 = vec_mergel(v_uv_b, v_y2); - vec_st(v_uyvy_0, (i << 1), dst); - vec_st(v_uyvy_1, (i << 1) + 16, dst); - vec_st(v_uyvy_2, (i << 1) + 32, dst); - vec_st(v_uyvy_3, (i << 1) + 48, dst); - } - if (i < width) { - const unsigned int j = i >> 1; - vector unsigned char v_y1 = vec_ld(i, ysrc); - vector unsigned char v_u = vec_ld(j, usrc); - vector unsigned char v_v = vec_ld(j, vsrc); - vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); - vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1); - vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1); - vec_st(v_uyvy_0, (i << 1), dst); - vec_st(v_uyvy_1, (i << 1) + 16, dst); - } - if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) { - usrc += chromStride; - vsrc += chromStride; - } - ysrc += lumStride; - dst += dstStride; - } - return srcSliceH; -} - -#endif /* HAVE_ALTIVEC */ - -av_cold void ff_get_unscaled_swscale_ppc(SwsContext *c) -{ -#if HAVE_ALTIVEC - if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) - return; - - if (!(c->srcW & 15) && !(c->flags & SWS_BITEXACT) && - c->srcFormat == AV_PIX_FMT_YUV420P) { - enum AVPixelFormat dstFormat = c->dstFormat; - - // unscaled YV12 -> packed YUV, we want speed - if (dstFormat == AV_PIX_FMT_YUYV422) - c->swscale = yv12toyuy2_unscaled_altivec; - else if (dstFormat == AV_PIX_FMT_UYVY422) - c->swscale = yv12touyvy_unscaled_altivec; - } -#endif /* HAVE_ALTIVEC */ -} diff --git a/ffmpeg/libswscale/rgb2rgb.c b/ffmpeg/libswscale/rgb2rgb.c deleted file mode 100644 index cf877fb..0000000 --- a/ffmpeg/libswscale/rgb2rgb.c +++ /dev/null @@ -1,393 +0,0 @@ -/* - * software RGB to RGB converter - * pluralize by software PAL8 to RGB converter - * software YUV to YUV converter - * software YUV to RGB converter - * Written by Nick Kurshev. - * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <inttypes.h> - -#include "libavutil/attributes.h" -#include "libavutil/bswap.h" -#include "config.h" -#include "rgb2rgb.h" -#include "swscale.h" -#include "swscale_internal.h" - -void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); -void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size); -void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size); -void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size); -void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); -void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size); -void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size); -void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); -void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); - -void (*rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size); -void (*rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size); -void (*rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size); -void (*rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size); -void (*rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size); -void (*rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size); -void (*rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size); -void (*rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size); - -void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size); - -void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, - const uint8_t *vsrc, uint8_t *dst, - int width, int height, - int lumStride, int chromStride, int dstStride); -void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, - const uint8_t *vsrc, uint8_t *dst, - int width, int height, - int lumStride, int chromStride, int dstStride); -void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, - const uint8_t *vsrc, uint8_t *dst, - int width, int height, - int lumStride, int chromStride, int dstStride); -void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, - const uint8_t *vsrc, uint8_t *dst, - int width, int height, - int lumStride, int chromStride, int dstStride); -void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, - uint8_t *udst, uint8_t *vdst, - int width, int height, - int lumStride, int chromStride, int srcStride); -void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, - uint8_t *udst, uint8_t *vdst, - int width, int height, - int lumStride, int chromStride, int srcStride, - int32_t *rgb2yuv); -void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, - int srcStride, int dstStride); -void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst, - int width, int height, int src1Stride, - int src2Stride, int dstStride); -void (*deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, - int width, int height, int srcStride, - int dst1Stride, int dst2Stride); -void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, - uint8_t *dst1, uint8_t *dst2, - int width, int height, - int srcStride1, int srcStride2, - int dstStride1, int dstStride2); -void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, - const uint8_t *src3, uint8_t *dst, - int width, int height, - int srcStride1, int srcStride2, - int srcStride3, int dstStride); -void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - const uint8_t *src, int width, int height, - int lumStride, int chromStride, int srcStride); -void (*uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - const uint8_t *src, int width, int height, - int lumStride, int chromStride, int srcStride); -void (*yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - const uint8_t *src, int width, int height, - int lumStride, int chromStride, int srcStride); -void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - const uint8_t *src, int width, int height, - int lumStride, int chromStride, int srcStride); - -#define BY ((int)( 0.098 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define BV ((int)(-0.071 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define BU ((int)( 0.439 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define GY ((int)( 0.504 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define GV ((int)(-0.368 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define GU ((int)(-0.291 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define RY ((int)( 0.257 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define RV ((int)( 0.439 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define RU ((int)(-0.148 * (1 << RGB2YUV_SHIFT) + 0.5)) - -//plain C versions -#include "rgb2rgb_template.c" - -/* - * RGB15->RGB16 original by Strepto/Astral - * ported to gcc & bugfixed : A'rpi - * MMXEXT, 3DNOW optimization by Nick Kurshev - * 32-bit C version, and and&add trick by Michael Niedermayer - */ - -av_cold void sws_rgb2rgb_init(void) -{ - rgb2rgb_init_c(); - if (ARCH_X86) - rgb2rgb_init_x86(); -} - -void rgb32to24(const uint8_t *src, uint8_t *dst, int src_size) -{ - int i, num_pixels = src_size >> 2; - - for (i = 0; i < num_pixels; i++) { -#if HAVE_BIGENDIAN - /* RGB32 (= A,B,G,R) -> BGR24 (= B,G,R) */ - dst[3 * i + 0] = src[4 * i + 1]; - dst[3 * i + 1] = src[4 * i + 2]; - dst[3 * i + 2] = src[4 * i + 3]; -#else - dst[3 * i + 0] = src[4 * i + 2]; - dst[3 * i + 1] = src[4 * i + 1]; - dst[3 * i + 2] = src[4 * i + 0]; -#endif - } -} - -void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size) -{ - int i; - - for (i = 0; 3 * i < src_size; i++) { -#if HAVE_BIGENDIAN - /* RGB24 (= R, G, B) -> BGR32 (= A, R, G, B) */ - dst[4 * i + 0] = 255; - dst[4 * i + 1] = src[3 * i + 0]; - dst[4 * i + 2] = src[3 * i + 1]; - dst[4 * i + 3] = src[3 * i + 2]; -#else - dst[4 * i + 0] = src[3 * i + 2]; - dst[4 * i + 1] = src[3 * i + 1]; - dst[4 * i + 2] = src[3 * i + 0]; - dst[4 * i + 3] = 255; -#endif - } -} - -void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size) -{ - uint8_t *d = dst; - const uint16_t *s = (const uint16_t *)src; - const uint16_t *end = s + src_size / 2; - - while (s < end) { - register uint16_t bgr = *s++; -#if HAVE_BIGENDIAN - *d++ = 255; - *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); - *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9); - *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); -#else - *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); - *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9); - *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); - *d++ = 255; -#endif - } -} - -void rgb12to15(const uint8_t *src, uint8_t *dst, int src_size) -{ - uint16_t rgb, r, g, b; - uint16_t *d = (uint16_t *)dst; - const uint16_t *s = (const uint16_t *)src; - const uint16_t *end = s + src_size / 2; - - while (s < end) { - rgb = *s++; - r = rgb & 0xF00; - g = rgb & 0x0F0; - b = rgb & 0x00F; - r = (r << 3) | ((r & 0x800) >> 1); - g = (g << 2) | ((g & 0x080) >> 2); - b = (b << 1) | ( b >> 3); - *d++ = r | g | b; - } -} - -void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size) -{ - uint8_t *d = dst; - const uint16_t *s = (const uint16_t *)src; - const uint16_t *end = s + src_size / 2; - - while (s < end) { - register uint16_t bgr = *s++; - *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); - *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9); - *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); - } -} - -void rgb16tobgr16(const uint8_t *src, uint8_t *dst, int src_size) -{ - int i, num_pixels = src_size >> 1; - - for (i = 0; i < num_pixels; i++) { - unsigned rgb = ((const uint16_t *)src)[i]; - ((uint16_t *)dst)[i] = (rgb >> 11) | (rgb & 0x7E0) | (rgb << 11); - } -} - -void rgb16tobgr15(const uint8_t *src, uint8_t *dst, int src_size) -{ - int i, num_pixels = src_size >> 1; - - for (i = 0; i < num_pixels; i++) { - unsigned rgb = ((const uint16_t *)src)[i]; - ((uint16_t *)dst)[i] = (rgb >> 11) | ((rgb & 0x7C0) >> 1) | ((rgb & 0x1F) << 10); - } -} - -void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size) -{ - uint8_t *d = dst; - const uint16_t *s = (const uint16_t *)src; - const uint16_t *end = s + src_size / 2; - - while (s < end) { - register uint16_t bgr = *s++; -#if HAVE_BIGENDIAN - *d++ = 255; - *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); - *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7); - *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); -#else - *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); - *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7); - *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); - *d++ = 255; -#endif - } -} - -void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size) -{ - uint8_t *d = dst; - const uint16_t *s = (const uint16_t *)src; - const uint16_t *end = s + src_size / 2; - - while (s < end) { - register uint16_t bgr = *s++; - *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); - *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7); - *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); - } -} - -void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size) -{ - int i, num_pixels = src_size >> 1; - - for (i = 0; i < num_pixels; i++) { - unsigned rgb = ((const uint16_t *)src)[i]; - ((uint16_t *)dst)[i] = ((rgb & 0x7C00) >> 10) | ((rgb & 0x3E0) << 1) | (rgb << 11); - } -} - -void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size) -{ - int i, num_pixels = src_size >> 1; - - for (i = 0; i < num_pixels; i++) { - unsigned rgb = ((const uint16_t *)src)[i]; - unsigned br = rgb & 0x7C1F; - ((uint16_t *)dst)[i] = (br >> 10) | (rgb & 0x3E0) | (br << 10); - } -} - -void rgb12tobgr12(const uint8_t *src, uint8_t *dst, int src_size) -{ - uint16_t *d = (uint16_t *)dst; - uint16_t *s = (uint16_t *)src; - int i, num_pixels = src_size >> 1; - - for (i = 0; i < num_pixels; i++) { - unsigned rgb = s[i]; - d[i] = (rgb << 8 | rgb & 0xF0 | rgb >> 8) & 0xFFF; - } -} - - -#define DEFINE_SHUFFLE_BYTES(a, b, c, d) \ -void shuffle_bytes_ ## a ## b ## c ## d(const uint8_t *src, \ - uint8_t *dst, int src_size) \ -{ \ - int i; \ - \ - for (i = 0; i < src_size; i += 4) { \ - dst[i + 0] = src[i + a]; \ - dst[i + 1] = src[i + b]; \ - dst[i + 2] = src[i + c]; \ - dst[i + 3] = src[i + d]; \ - } \ -} - -DEFINE_SHUFFLE_BYTES(0, 3, 2, 1) -DEFINE_SHUFFLE_BYTES(1, 2, 3, 0) -DEFINE_SHUFFLE_BYTES(3, 0, 1, 2) -DEFINE_SHUFFLE_BYTES(3, 2, 1, 0) - -#define DEFINE_RGB48TOBGR48(need_bswap, swap) \ -void rgb48tobgr48_ ## need_bswap(const uint8_t *src, \ - uint8_t *dst, int src_size) \ -{ \ - uint16_t *d = (uint16_t *)dst; \ - uint16_t *s = (uint16_t *)src; \ - int i, num_pixels = src_size >> 1; \ - \ - for (i = 0; i < num_pixels; i += 3) { \ - d[i ] = swap ? av_bswap16(s[i + 2]) : s[i + 2]; \ - d[i + 1] = swap ? av_bswap16(s[i + 1]) : s[i + 1]; \ - d[i + 2] = swap ? av_bswap16(s[i ]) : s[i ]; \ - } \ -} - -DEFINE_RGB48TOBGR48(nobswap, 0) -DEFINE_RGB48TOBGR48(bswap, 1) - -#define DEFINE_RGB64TOBGR48(need_bswap, swap) \ -void rgb64tobgr48_ ## need_bswap(const uint8_t *src, \ - uint8_t *dst, int src_size) \ -{ \ - uint16_t *d = (uint16_t *)dst; \ - uint16_t *s = (uint16_t *)src; \ - int i, num_pixels = src_size >> 3; \ - \ - for (i = 0; i < num_pixels; i++) { \ - d[3 * i ] = swap ? av_bswap16(s[4 * i + 2]) : s[4 * i + 2]; \ - d[3 * i + 1] = swap ? av_bswap16(s[4 * i + 1]) : s[4 * i + 1]; \ - d[3 * i + 2] = swap ? av_bswap16(s[4 * i ]) : s[4 * i ]; \ - } \ -} - -DEFINE_RGB64TOBGR48(nobswap, 0) -DEFINE_RGB64TOBGR48(bswap, 1) - -#define DEFINE_RGB64TO48(need_bswap, swap) \ -void rgb64to48_ ## need_bswap(const uint8_t *src, \ - uint8_t *dst, int src_size) \ -{ \ - uint16_t *d = (uint16_t *)dst; \ - uint16_t *s = (uint16_t *)src; \ - int i, num_pixels = src_size >> 3; \ - \ - for (i = 0; i < num_pixels; i++) { \ - d[3 * i ] = swap ? av_bswap16(s[4 * i ]) : s[4 * i ]; \ - d[3 * i + 1] = swap ? av_bswap16(s[4 * i + 1]) : s[4 * i + 1]; \ - d[3 * i + 2] = swap ? av_bswap16(s[4 * i + 2]) : s[4 * i + 2]; \ - } \ -} - -DEFINE_RGB64TO48(nobswap, 0) -DEFINE_RGB64TO48(bswap, 1) diff --git a/ffmpeg/libswscale/rgb2rgb.h b/ffmpeg/libswscale/rgb2rgb.h deleted file mode 100644 index 5df5dea..0000000 --- a/ffmpeg/libswscale/rgb2rgb.h +++ /dev/null @@ -1,171 +0,0 @@ -/* - * software RGB to RGB converter - * pluralize by Software PAL8 to RGB converter - * Software YUV to YUV converter - * Software YUV to RGB converter - * Written by Nick Kurshev. - * YUV & runtime CPU stuff by Michael (michaelni@gmx.at) - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef SWSCALE_RGB2RGB_H -#define SWSCALE_RGB2RGB_H - -#include <inttypes.h> - -#include "libavutil/avutil.h" -#include "swscale.h" - -/* A full collection of RGB to RGB(BGR) converters */ -extern void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size); - -extern void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size); - -void rgb64tobgr48_nobswap(const uint8_t *src, uint8_t *dst, int src_size); -void rgb64tobgr48_bswap(const uint8_t *src, uint8_t *dst, int src_size); -void rgb48tobgr48_nobswap(const uint8_t *src, uint8_t *dst, int src_size); -void rgb48tobgr48_bswap(const uint8_t *src, uint8_t *dst, int src_size); -void rgb64to48_nobswap(const uint8_t *src, uint8_t *dst, int src_size); -void rgb64to48_bswap(const uint8_t *src, uint8_t *dst, int src_size); -void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size); -void rgb32to24(const uint8_t *src, uint8_t *dst, int src_size); -void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size); -void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size); -void rgb16tobgr16(const uint8_t *src, uint8_t *dst, int src_size); -void rgb16tobgr15(const uint8_t *src, uint8_t *dst, int src_size); -void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size); -void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size); -void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size); -void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size); -void rgb12tobgr12(const uint8_t *src, uint8_t *dst, int src_size); -void rgb12to15(const uint8_t *src, uint8_t *dst, int src_size); - -void shuffle_bytes_0321(const uint8_t *src, uint8_t *dst, int src_size); -void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, int src_size); -void shuffle_bytes_3012(const uint8_t *src, uint8_t *dst, int src_size); -void shuffle_bytes_3210(const uint8_t *src, uint8_t *dst, int src_size); - -void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, - uint8_t *vdst, int width, int height, int lumStride, - int chromStride, int srcStride, int32_t *rgb2yuv); - -/** - * Height should be a multiple of 2 and width should be a multiple of 16. - * (If this is a problem for anyone then tell me, and I will fix it.) - */ -extern void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - int width, int height, - int lumStride, int chromStride, int dstStride); - -/** - * Width should be a multiple of 16. - */ -extern void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - int width, int height, - int lumStride, int chromStride, int dstStride); - -/** - * Height should be a multiple of 2 and width should be a multiple of 16. - * (If this is a problem for anyone then tell me, and I will fix it.) - */ -extern void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - int width, int height, - int lumStride, int chromStride, int srcStride); - -/** - * Height should be a multiple of 2 and width should be a multiple of 16. - * (If this is a problem for anyone then tell me, and I will fix it.) - */ -extern void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - int width, int height, - int lumStride, int chromStride, int dstStride); - -/** - * Width should be a multiple of 16. - */ -extern void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - int width, int height, - int lumStride, int chromStride, int dstStride); - -/** - * Height should be a multiple of 2 and width should be a multiple of 2. - * (If this is a problem for anyone then tell me, and I will fix it.) - * Chrominance data is only taken from every second line, others are ignored. - * FIXME: Write high quality version. - */ -extern void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - int width, int height, - int lumStride, int chromStride, int srcStride, - int32_t *rgb2yuv); -extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, - int srcStride, int dstStride); - -extern void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst, - int width, int height, int src1Stride, - int src2Stride, int dstStride); - -extern void (*deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, - int width, int height, int srcStride, - int dst1Stride, int dst2Stride); - -extern void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, - uint8_t *dst1, uint8_t *dst2, - int width, int height, - int srcStride1, int srcStride2, - int dstStride1, int dstStride2); - -extern void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, - uint8_t *dst, - int width, int height, - int srcStride1, int srcStride2, - int srcStride3, int dstStride); - -extern void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, - int width, int height, - int lumStride, int chromStride, int srcStride); -extern void (*uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, - int width, int height, - int lumStride, int chromStride, int srcStride); -extern void (*yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, - int width, int height, - int lumStride, int chromStride, int srcStride); -extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, - int width, int height, - int lumStride, int chromStride, int srcStride); - -void sws_rgb2rgb_init(void); - -void rgb2rgb_init_x86(void); - -#endif /* SWSCALE_RGB2RGB_H */ diff --git a/ffmpeg/libswscale/rgb2rgb_template.c b/ffmpeg/libswscale/rgb2rgb_template.c deleted file mode 100644 index 56e735f..0000000 --- a/ffmpeg/libswscale/rgb2rgb_template.c +++ /dev/null @@ -1,951 +0,0 @@ -/* - * software RGB to RGB converter - * pluralize by software PAL8 to RGB converter - * software YUV to YUV converter - * software YUV to RGB converter - * Written by Nick Kurshev. - * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) - * lot of big-endian byte order fixes by Alex Beregszaszi - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <stddef.h> - -#include "libavutil/attributes.h" - -static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, - int src_size) -{ - uint8_t *dest = dst; - const uint8_t *s = src; - const uint8_t *end = s + src_size; - - while (s < end) { -#if HAVE_BIGENDIAN - /* RGB24 (= R, G, B) -> RGB32 (= A, B, G, R) */ - *dest++ = 255; - *dest++ = s[2]; - *dest++ = s[1]; - *dest++ = s[0]; - s += 3; -#else - *dest++ = *s++; - *dest++ = *s++; - *dest++ = *s++; - *dest++ = 255; -#endif - } -} - -static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, - int src_size) -{ - uint8_t *dest = dst; - const uint8_t *s = src; - const uint8_t *end = s + src_size; - - while (s < end) { -#if HAVE_BIGENDIAN - /* RGB32 (= A, B, G, R) -> RGB24 (= R, G, B) */ - s++; - dest[2] = *s++; - dest[1] = *s++; - dest[0] = *s++; - dest += 3; -#else - *dest++ = *s++; - *dest++ = *s++; - *dest++ = *s++; - s++; -#endif - } -} - -/* - * original by Strepto/Astral - * ported to gcc & bugfixed: A'rpi - * MMXEXT, 3DNOW optimization by Nick Kurshev - * 32-bit C version, and and&add trick by Michael Niedermayer - */ -static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, int src_size) -{ - register uint8_t *d = dst; - register const uint8_t *s = src; - register const uint8_t *end = s + src_size; - const uint8_t *mm_end = end - 3; - - while (s < mm_end) { - register unsigned x = *((const uint32_t *)s); - *((uint32_t *)d) = (x & 0x7FFF7FFF) + (x & 0x7FE07FE0); - d += 4; - s += 4; - } - if (s < end) { - register unsigned short x = *((const uint16_t *)s); - *((uint16_t *)d) = (x & 0x7FFF) + (x & 0x7FE0); - } -} - -static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, int src_size) -{ - register uint8_t *d = dst; - register const uint8_t *s = src; - register const uint8_t *end = s + src_size; - const uint8_t *mm_end = end - 3; - - while (s < mm_end) { - register uint32_t x = *((const uint32_t *)s); - *((uint32_t *)d) = ((x >> 1) & 0x7FE07FE0) | (x & 0x001F001F); - s += 4; - d += 4; - } - if (s < end) { - register uint16_t x = *((const uint16_t *)s); - *((uint16_t *)d) = ((x >> 1) & 0x7FE0) | (x & 0x001F); - } -} - -static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, int src_size) -{ - uint16_t *d = (uint16_t *)dst; - const uint8_t *s = src; - const uint8_t *end = s + src_size; - - while (s < end) { - register int rgb = *(const uint32_t *)s; - s += 4; - *d++ = ((rgb & 0xFF) >> 3) + - ((rgb & 0xFC00) >> 5) + - ((rgb & 0xF80000) >> 8); - } -} - -static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, - int src_size) -{ - uint16_t *d = (uint16_t *)dst; - const uint8_t *s = src; - const uint8_t *end = s + src_size; - - while (s < end) { - register int rgb = *(const uint32_t *)s; - s += 4; - *d++ = ((rgb & 0xF8) << 8) + - ((rgb & 0xFC00) >> 5) + - ((rgb & 0xF80000) >> 19); - } -} - -static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, int src_size) -{ - uint16_t *d = (uint16_t *)dst; - const uint8_t *s = src; - const uint8_t *end = s + src_size; - - while (s < end) { - register int rgb = *(const uint32_t *)s; - s += 4; - *d++ = ((rgb & 0xFF) >> 3) + - ((rgb & 0xF800) >> 6) + - ((rgb & 0xF80000) >> 9); - } -} - -static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, - int src_size) -{ - uint16_t *d = (uint16_t *)dst; - const uint8_t *s = src; - const uint8_t *end = s + src_size; - - while (s < end) { - register int rgb = *(const uint32_t *)s; - s += 4; - *d++ = ((rgb & 0xF8) << 7) + - ((rgb & 0xF800) >> 6) + - ((rgb & 0xF80000) >> 19); - } -} - -static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, - int src_size) -{ - uint16_t *d = (uint16_t *)dst; - const uint8_t *s = src; - const uint8_t *end = s + src_size; - - while (s < end) { - const int b = *s++; - const int g = *s++; - const int r = *s++; - *d++ = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8); - } -} - -static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, int src_size) -{ - uint16_t *d = (uint16_t *)dst; - const uint8_t *s = src; - const uint8_t *end = s + src_size; - - while (s < end) { - const int r = *s++; - const int g = *s++; - const int b = *s++; - *d++ = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8); - } -} - -static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, - int src_size) -{ - uint16_t *d = (uint16_t *)dst; - const uint8_t *s = src; - const uint8_t *end = s + src_size; - - while (s < end) { - const int b = *s++; - const int g = *s++; - const int r = *s++; - *d++ = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7); - } -} - -static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size) -{ - uint16_t *d = (uint16_t *)dst; - const uint8_t *s = src; - const uint8_t *end = s + src_size; - - while (s < end) { - const int r = *s++; - const int g = *s++; - const int b = *s++; - *d++ = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7); - } -} - -static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, - int src_size) -{ - uint8_t *d = dst; - const uint16_t *s = (const uint16_t *)src; - const uint16_t *end = s + src_size / 2; - - while (s < end) { - register uint16_t bgr = *s++; - *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); - *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7); - *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); - } -} - -static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, - int src_size) -{ - uint8_t *d = (uint8_t *)dst; - const uint16_t *s = (const uint16_t *)src; - const uint16_t *end = s + src_size / 2; - - while (s < end) { - register uint16_t bgr = *s++; - *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); - *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9); - *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); - } -} - -static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size) -{ - uint8_t *d = dst; - const uint16_t *s = (const uint16_t *)src; - const uint16_t *end = s + src_size / 2; - - while (s < end) { - register uint16_t bgr = *s++; -#if HAVE_BIGENDIAN - *d++ = 255; - *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); - *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7); - *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); -#else - *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); - *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7); - *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); - *d++ = 255; -#endif - } -} - -static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size) -{ - uint8_t *d = dst; - const uint16_t *s = (const uint16_t *)src; - const uint16_t *end = s + src_size / 2; - - while (s < end) { - register uint16_t bgr = *s++; -#if HAVE_BIGENDIAN - *d++ = 255; - *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); - *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9); - *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); -#else - *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); - *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9); - *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); - *d++ = 255; -#endif - } -} - -static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst, - int src_size) -{ - int idx = 15 - src_size; - const uint8_t *s = src - idx; - uint8_t *d = dst - idx; - - for (; idx < 15; idx += 4) { - register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00; - v &= 0xff00ff; - *(uint32_t *)&d[idx] = (v >> 16) + g + (v << 16); - } -} - -static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size) -{ - unsigned i; - - for (i = 0; i < src_size; i += 3) { - register uint8_t x = src[i + 2]; - dst[i + 1] = src[i + 1]; - dst[i + 2] = src[i + 0]; - dst[i + 0] = x; - } -} - -static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc, - const uint8_t *vsrc, uint8_t *dst, - int width, int height, - int lumStride, int chromStride, - int dstStride, int vertLumPerChroma) -{ - int y, i; - const int chromWidth = width >> 1; - - for (y = 0; y < height; y++) { -#if HAVE_FAST_64BIT - uint64_t *ldst = (uint64_t *)dst; - const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; - for (i = 0; i < chromWidth; i += 2) { - uint64_t k = yc[0] + (uc[0] << 8) + - (yc[1] << 16) + (unsigned)(vc[0] << 24); - uint64_t l = yc[2] + (uc[1] << 8) + - (yc[3] << 16) + (unsigned)(vc[1] << 24); - *ldst++ = k + (l << 32); - yc += 4; - uc += 2; - vc += 2; - } - -#else - int *idst = (int32_t *)dst; - const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; - - for (i = 0; i < chromWidth; i++) { -#if HAVE_BIGENDIAN - *idst++ = (yc[0] << 24) + (uc[0] << 16) + - (yc[1] << 8) + (vc[0] << 0); -#else - *idst++ = yc[0] + (uc[0] << 8) + - (yc[1] << 16) + (vc[0] << 24); -#endif - yc += 2; - uc++; - vc++; - } -#endif - if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) { - usrc += chromStride; - vsrc += chromStride; - } - ysrc += lumStride; - dst += dstStride; - } -} - -/** - * Height should be a multiple of 2 and width should be a multiple of 16. - * (If this is a problem for anyone then tell me, and I will fix it.) - */ -static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc, - const uint8_t *vsrc, uint8_t *dst, - int width, int height, int lumStride, - int chromStride, int dstStride) -{ - //FIXME interpolate chroma - yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride, - chromStride, dstStride, 2); -} - -static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc, - const uint8_t *vsrc, uint8_t *dst, - int width, int height, - int lumStride, int chromStride, - int dstStride, int vertLumPerChroma) -{ - int y, i; - const int chromWidth = width >> 1; - - for (y = 0; y < height; y++) { -#if HAVE_FAST_64BIT - uint64_t *ldst = (uint64_t *)dst; - const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; - for (i = 0; i < chromWidth; i += 2) { - uint64_t k = uc[0] + (yc[0] << 8) + - (vc[0] << 16) + (unsigned)(yc[1] << 24); - uint64_t l = uc[1] + (yc[2] << 8) + - (vc[1] << 16) + (unsigned)(yc[3] << 24); - *ldst++ = k + (l << 32); - yc += 4; - uc += 2; - vc += 2; - } - -#else - int *idst = (int32_t *)dst; - const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; - - for (i = 0; i < chromWidth; i++) { -#if HAVE_BIGENDIAN - *idst++ = (uc[0] << 24) + (yc[0] << 16) + - (vc[0] << 8) + (yc[1] << 0); -#else - *idst++ = uc[0] + (yc[0] << 8) + - (vc[0] << 16) + (yc[1] << 24); -#endif - yc += 2; - uc++; - vc++; - } -#endif - if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) { - usrc += chromStride; - vsrc += chromStride; - } - ysrc += lumStride; - dst += dstStride; - } -} - -/** - * Height should be a multiple of 2 and width should be a multiple of 16 - * (If this is a problem for anyone then tell me, and I will fix it.) - */ -static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc, - const uint8_t *vsrc, uint8_t *dst, - int width, int height, int lumStride, - int chromStride, int dstStride) -{ - //FIXME interpolate chroma - yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride, - chromStride, dstStride, 2); -} - -/** - * Width should be a multiple of 16. - */ -static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc, - const uint8_t *vsrc, uint8_t *dst, - int width, int height, int lumStride, - int chromStride, int dstStride) -{ - yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride, - chromStride, dstStride, 1); -} - -/** - * Width should be a multiple of 16. - */ -static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc, - const uint8_t *vsrc, uint8_t *dst, - int width, int height, int lumStride, - int chromStride, int dstStride) -{ - yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride, - chromStride, dstStride, 1); -} - -/** - * Height should be a multiple of 2 and width should be a multiple of 16. - * (If this is a problem for anyone then tell me, and I will fix it.) - */ -static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst, - uint8_t *udst, uint8_t *vdst, - int width, int height, int lumStride, - int chromStride, int srcStride) -{ - int y; - const int chromWidth = width >> 1; - - for (y = 0; y < height; y += 2) { - int i; - for (i = 0; i < chromWidth; i++) { - ydst[2 * i + 0] = src[4 * i + 0]; - udst[i] = src[4 * i + 1]; - ydst[2 * i + 1] = src[4 * i + 2]; - vdst[i] = src[4 * i + 3]; - } - ydst += lumStride; - src += srcStride; - - for (i = 0; i < chromWidth; i++) { - ydst[2 * i + 0] = src[4 * i + 0]; - ydst[2 * i + 1] = src[4 * i + 2]; - } - udst += chromStride; - vdst += chromStride; - ydst += lumStride; - src += srcStride; - } -} - -static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth, - int srcHeight, int srcStride, int dstStride) -{ - int x, y; - - dst[0] = src[0]; - - // first line - for (x = 0; x < srcWidth - 1; x++) { - dst[2 * x + 1] = (3 * src[x] + src[x + 1]) >> 2; - dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2; - } - dst[2 * srcWidth - 1] = src[srcWidth - 1]; - - dst += dstStride; - - for (y = 1; y < srcHeight; y++) { - const int mmxSize = 1; - - dst[0] = (src[0] * 3 + src[srcStride]) >> 2; - dst[dstStride] = (src[0] + 3 * src[srcStride]) >> 2; - - for (x = mmxSize - 1; x < srcWidth - 1; x++) { - dst[2 * x + 1] = (src[x + 0] * 3 + src[x + srcStride + 1]) >> 2; - dst[2 * x + dstStride + 2] = (src[x + 0] + 3 * src[x + srcStride + 1]) >> 2; - dst[2 * x + dstStride + 1] = (src[x + 1] + 3 * src[x + srcStride]) >> 2; - dst[2 * x + 2] = (src[x + 1] * 3 + src[x + srcStride]) >> 2; - } - dst[srcWidth * 2 - 1] = (src[srcWidth - 1] * 3 + src[srcWidth - 1 + srcStride]) >> 2; - dst[srcWidth * 2 - 1 + dstStride] = (src[srcWidth - 1] + 3 * src[srcWidth - 1 + srcStride]) >> 2; - - dst += dstStride * 2; - src += srcStride; - } - - // last line - dst[0] = src[0]; - - for (x = 0; x < srcWidth - 1; x++) { - dst[2 * x + 1] = (src[x] * 3 + src[x + 1]) >> 2; - dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2; - } - dst[2 * srcWidth - 1] = src[srcWidth - 1]; -} - -/** - * Height should be a multiple of 2 and width should be a multiple of 16. - * (If this is a problem for anyone then tell me, and I will fix it.) - * Chrominance data is only taken from every second line, others are ignored. - * FIXME: Write HQ version. - */ -static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst, - uint8_t *udst, uint8_t *vdst, - int width, int height, int lumStride, - int chromStride, int srcStride) -{ - int y; - const int chromWidth = width >> 1; - - for (y = 0; y < height; y += 2) { - int i; - for (i = 0; i < chromWidth; i++) { - udst[i] = src[4 * i + 0]; - ydst[2 * i + 0] = src[4 * i + 1]; - vdst[i] = src[4 * i + 2]; - ydst[2 * i + 1] = src[4 * i + 3]; - } - ydst += lumStride; - src += srcStride; - - for (i = 0; i < chromWidth; i++) { - ydst[2 * i + 0] = src[4 * i + 1]; - ydst[2 * i + 1] = src[4 * i + 3]; - } - udst += chromStride; - vdst += chromStride; - ydst += lumStride; - src += srcStride; - } -} - -/** - * Height should be a multiple of 2 and width should be a multiple of 2. - * (If this is a problem for anyone then tell me, and I will fix it.) - * Chrominance data is only taken from every second line, - * others are ignored in the C version. - * FIXME: Write HQ version. - */ -void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, - uint8_t *vdst, int width, int height, int lumStride, - int chromStride, int srcStride, int32_t *rgb2yuv) -{ - int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; - int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; - int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; - int y; - const int chromWidth = width >> 1; - - for (y = 0; y < height; y += 2) { - int i; - for (i = 0; i < chromWidth; i++) { - unsigned int b = src[6 * i + 0]; - unsigned int g = src[6 * i + 1]; - unsigned int r = src[6 * i + 2]; - - unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; - unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128; - unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128; - - udst[i] = U; - vdst[i] = V; - ydst[2 * i] = Y; - - b = src[6 * i + 3]; - g = src[6 * i + 4]; - r = src[6 * i + 5]; - - Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; - ydst[2 * i + 1] = Y; - } - ydst += lumStride; - src += srcStride; - - if (y+1 == height) - break; - - for (i = 0; i < chromWidth; i++) { - unsigned int b = src[6 * i + 0]; - unsigned int g = src[6 * i + 1]; - unsigned int r = src[6 * i + 2]; - - unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; - - ydst[2 * i] = Y; - - b = src[6 * i + 3]; - g = src[6 * i + 4]; - r = src[6 * i + 5]; - - Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; - ydst[2 * i + 1] = Y; - } - udst += chromStride; - vdst += chromStride; - ydst += lumStride; - src += srcStride; - } -} - -static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2, - uint8_t *dest, int width, int height, - int src1Stride, int src2Stride, int dstStride) -{ - int h; - - for (h = 0; h < height; h++) { - int w; - for (w = 0; w < width; w++) { - dest[2 * w + 0] = src1[w]; - dest[2 * w + 1] = src2[w]; - } - dest += dstStride; - src1 += src1Stride; - src2 += src2Stride; - } -} - -static void deinterleaveBytes_c(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, - int width, int height, int srcStride, - int dst1Stride, int dst2Stride) -{ - int h; - - for (h = 0; h < height; h++) { - int w; - for (w = 0; w < width; w++) { - dst1[w] = src[2 * w + 0]; - dst2[w] = src[2 * w + 1]; - } - src += srcStride; - dst1 += dst1Stride; - dst2 += dst2Stride; - } -} - -static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2, - uint8_t *dst1, uint8_t *dst2, - int width, int height, - int srcStride1, int srcStride2, - int dstStride1, int dstStride2) -{ - int x, y; - int w = width / 2; - int h = height / 2; - - for (y = 0; y < h; y++) { - const uint8_t *s1 = src1 + srcStride1 * (y >> 1); - uint8_t *d = dst1 + dstStride1 * y; - for (x = 0; x < w; x++) - d[2 * x] = d[2 * x + 1] = s1[x]; - } - for (y = 0; y < h; y++) { - const uint8_t *s2 = src2 + srcStride2 * (y >> 1); - uint8_t *d = dst2 + dstStride2 * y; - for (x = 0; x < w; x++) - d[2 * x] = d[2 * x + 1] = s2[x]; - } -} - -static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2, - const uint8_t *src3, uint8_t *dst, - int width, int height, - int srcStride1, int srcStride2, - int srcStride3, int dstStride) -{ - int x, y; - int w = width / 2; - int h = height; - - for (y = 0; y < h; y++) { - const uint8_t *yp = src1 + srcStride1 * y; - const uint8_t *up = src2 + srcStride2 * (y >> 2); - const uint8_t *vp = src3 + srcStride3 * (y >> 2); - uint8_t *d = dst + dstStride * y; - for (x = 0; x < w; x++) { - const int x2 = x << 2; - d[8 * x + 0] = yp[x2]; - d[8 * x + 1] = up[x]; - d[8 * x + 2] = yp[x2 + 1]; - d[8 * x + 3] = vp[x]; - d[8 * x + 4] = yp[x2 + 2]; - d[8 * x + 5] = up[x]; - d[8 * x + 6] = yp[x2 + 3]; - d[8 * x + 7] = vp[x]; - } - } -} - -static void extract_even_c(const uint8_t *src, uint8_t *dst, int count) -{ - dst += count; - src += count * 2; - count = -count; - while (count < 0) { - dst[count] = src[2 * count]; - count++; - } -} - -static void extract_even2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, - int count) -{ - dst0 += count; - dst1 += count; - src += count * 4; - count = -count; - while (count < 0) { - dst0[count] = src[4 * count + 0]; - dst1[count] = src[4 * count + 2]; - count++; - } -} - -static void extract_even2avg_c(const uint8_t *src0, const uint8_t *src1, - uint8_t *dst0, uint8_t *dst1, int count) -{ - dst0 += count; - dst1 += count; - src0 += count * 4; - src1 += count * 4; - count = -count; - while (count < 0) { - dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1; - dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1; - count++; - } -} - -static void extract_odd2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, - int count) -{ - dst0 += count; - dst1 += count; - src += count * 4; - count = -count; - src++; - while (count < 0) { - dst0[count] = src[4 * count + 0]; - dst1[count] = src[4 * count + 2]; - count++; - } -} - -static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1, - uint8_t *dst0, uint8_t *dst1, int count) -{ - dst0 += count; - dst1 += count; - src0 += count * 4; - src1 += count * 4; - count = -count; - src0++; - src1++; - while (count < 0) { - dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1; - dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1; - count++; - } -} - -static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - const uint8_t *src, int width, int height, - int lumStride, int chromStride, int srcStride) -{ - int y; - const int chromWidth = FF_CEIL_RSHIFT(width, 1); - - for (y = 0; y < height; y++) { - extract_even_c(src, ydst, width); - if (y & 1) { - extract_odd2avg_c(src - srcStride, src, udst, vdst, chromWidth); - udst += chromStride; - vdst += chromStride; - } - - src += srcStride; - ydst += lumStride; - } -} - -static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - const uint8_t *src, int width, int height, - int lumStride, int chromStride, int srcStride) -{ - int y; - const int chromWidth = FF_CEIL_RSHIFT(width, 1); - - for (y = 0; y < height; y++) { - extract_even_c(src, ydst, width); - extract_odd2_c(src, udst, vdst, chromWidth); - - src += srcStride; - ydst += lumStride; - udst += chromStride; - vdst += chromStride; - } -} - -static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - const uint8_t *src, int width, int height, - int lumStride, int chromStride, int srcStride) -{ - int y; - const int chromWidth = FF_CEIL_RSHIFT(width, 1); - - for (y = 0; y < height; y++) { - extract_even_c(src + 1, ydst, width); - if (y & 1) { - extract_even2avg_c(src - srcStride, src, udst, vdst, chromWidth); - udst += chromStride; - vdst += chromStride; - } - - src += srcStride; - ydst += lumStride; - } -} - -static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - const uint8_t *src, int width, int height, - int lumStride, int chromStride, int srcStride) -{ - int y; - const int chromWidth = FF_CEIL_RSHIFT(width, 1); - - for (y = 0; y < height; y++) { - extract_even_c(src + 1, ydst, width); - extract_even2_c(src, udst, vdst, chromWidth); - - src += srcStride; - ydst += lumStride; - udst += chromStride; - vdst += chromStride; - } -} - -static av_cold void rgb2rgb_init_c(void) -{ - rgb15to16 = rgb15to16_c; - rgb15tobgr24 = rgb15tobgr24_c; - rgb15to32 = rgb15to32_c; - rgb16tobgr24 = rgb16tobgr24_c; - rgb16to32 = rgb16to32_c; - rgb16to15 = rgb16to15_c; - rgb24tobgr16 = rgb24tobgr16_c; - rgb24tobgr15 = rgb24tobgr15_c; - rgb24tobgr32 = rgb24tobgr32_c; - rgb32to16 = rgb32to16_c; - rgb32to15 = rgb32to15_c; - rgb32tobgr24 = rgb32tobgr24_c; - rgb24to15 = rgb24to15_c; - rgb24to16 = rgb24to16_c; - rgb24tobgr24 = rgb24tobgr24_c; - shuffle_bytes_2103 = shuffle_bytes_2103_c; - rgb32tobgr16 = rgb32tobgr16_c; - rgb32tobgr15 = rgb32tobgr15_c; - yv12toyuy2 = yv12toyuy2_c; - yv12touyvy = yv12touyvy_c; - yuv422ptoyuy2 = yuv422ptoyuy2_c; - yuv422ptouyvy = yuv422ptouyvy_c; - yuy2toyv12 = yuy2toyv12_c; - planar2x = planar2x_c; - ff_rgb24toyv12 = ff_rgb24toyv12_c; - interleaveBytes = interleaveBytes_c; - deinterleaveBytes = deinterleaveBytes_c; - vu9_to_vu12 = vu9_to_vu12_c; - yvu9_to_yuy2 = yvu9_to_yuy2_c; - - uyvytoyuv420 = uyvytoyuv420_c; - uyvytoyuv422 = uyvytoyuv422_c; - yuyvtoyuv420 = yuyvtoyuv420_c; - yuyvtoyuv422 = yuyvtoyuv422_c; -} diff --git a/ffmpeg/libswscale/sparc/Makefile b/ffmpeg/libswscale/sparc/Makefile deleted file mode 100644 index 2351ba4..0000000 --- a/ffmpeg/libswscale/sparc/Makefile +++ /dev/null @@ -1 +0,0 @@ -VIS-OBJS += sparc/yuv2rgb_vis.o \ diff --git a/ffmpeg/libswscale/sparc/yuv2rgb_vis.c b/ffmpeg/libswscale/sparc/yuv2rgb_vis.c deleted file mode 100644 index ed00837..0000000 --- a/ffmpeg/libswscale/sparc/yuv2rgb_vis.c +++ /dev/null @@ -1,212 +0,0 @@ -/* - * VIS optimized software YUV to RGB converter - * Copyright (c) 2007 Denes Balatoni <dbalatoni@programozo.hu> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <inttypes.h> -#include <stdlib.h> - -#include "libavutil/attributes.h" -#include "libswscale/swscale.h" -#include "libswscale/swscale_internal.h" - -#define YUV2RGB_INIT \ - "wr %%g0, 0x10, %%gsr \n\t" \ - "ldd [%5], %%f32 \n\t" \ - "ldd [%5 + 8], %%f34 \n\t" \ - "ldd [%5 + 16], %%f36 \n\t" \ - "ldd [%5 + 24], %%f38 \n\t" \ - "ldd [%5 + 32], %%f40 \n\t" \ - "ldd [%5 + 40], %%f42 \n\t" \ - "ldd [%5 + 48], %%f44 \n\t" \ - "ldd [%5 + 56], %%f46 \n\t" \ - "ldd [%5 + 64], %%f48 \n\t" \ - "ldd [%5 + 72], %%f50 \n\t" - -#define YUV2RGB_KERNEL \ - /* ^^^^ f0=Y f3=u f5=v */ \ - "fmul8x16 %%f3, %%f48, %%f6 \n\t" \ - "fmul8x16 %%f19, %%f48, %%f22 \n\t" \ - "fmul8x16 %%f5, %%f44, %%f8 \n\t" \ - "fmul8x16 %%f21, %%f44, %%f24 \n\t" \ - "fmul8x16 %%f0, %%f42, %%f0 \n\t" \ - "fmul8x16 %%f16, %%f42, %%f16 \n\t" \ - "fmul8x16 %%f3, %%f50, %%f2 \n\t" \ - "fmul8x16 %%f19, %%f50, %%f18 \n\t" \ - "fmul8x16 %%f5, %%f46, %%f4 \n\t" \ - "fmul8x16 %%f21, %%f46, %%f20 \n\t" \ - \ - "fpsub16 %%f6, %%f34, %%f6 \n\t" /* 1 */ \ - "fpsub16 %%f22, %%f34, %%f22 \n\t" /* 1 */ \ - "fpsub16 %%f8, %%f38, %%f8 \n\t" /* 3 */ \ - "fpsub16 %%f24, %%f38, %%f24 \n\t" /* 3 */ \ - "fpsub16 %%f0, %%f32, %%f0 \n\t" /* 0 */ \ - "fpsub16 %%f16, %%f32, %%f16 \n\t" /* 0 */ \ - "fpsub16 %%f2, %%f36, %%f2 \n\t" /* 2 */ \ - "fpsub16 %%f18, %%f36, %%f18 \n\t" /* 2 */ \ - "fpsub16 %%f4, %%f40, %%f4 \n\t" /* 4 */ \ - "fpsub16 %%f20, %%f40, %%f20 \n\t" /* 4 */ \ - \ - "fpadd16 %%f0, %%f8, %%f8 \n\t" /* Gt */ \ - "fpadd16 %%f16, %%f24, %%f24 \n\t" /* Gt */ \ - "fpadd16 %%f0, %%f4, %%f4 \n\t" /* R */ \ - "fpadd16 %%f16, %%f20, %%f20 \n\t" /* R */ \ - "fpadd16 %%f0, %%f6, %%f6 \n\t" /* B */ \ - "fpadd16 %%f16, %%f22, %%f22 \n\t" /* B */ \ - "fpadd16 %%f8, %%f2, %%f2 \n\t" /* G */ \ - "fpadd16 %%f24, %%f18, %%f18 \n\t" /* G */ \ - \ - "fpack16 %%f4, %%f4 \n\t" \ - "fpack16 %%f20, %%f20 \n\t" \ - "fpack16 %%f6, %%f6 \n\t" \ - "fpack16 %%f22, %%f22 \n\t" \ - "fpack16 %%f2, %%f2 \n\t" \ - "fpack16 %%f18, %%f18 \n\t" - -// FIXME: must be changed to set alpha to 255 instead of 0 -static int vis_420P_ARGB32(SwsContext *c, uint8_t *src[], int srcStride[], - int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - int y, out1, out2, out3, out4, out5, out6; - - for (y = 0; y < srcSliceH; ++y) - __asm__ volatile ( - YUV2RGB_INIT - "wr %%g0, 0xd2, %%asi \n\t" /* ASI_FL16_P */ - "1: \n\t" - "ldda [%1] %%asi, %%f2 \n\t" - "ldda [%1 + 2] %%asi, %%f18 \n\t" - "ldda [%2] %%asi, %%f4 \n\t" - "ldda [%2 + 2] %%asi, %%f20 \n\t" - "ld [%0], %%f0 \n\t" - "ld [%0+4], %%f16 \n\t" - "fpmerge %%f3, %%f3, %%f2 \n\t" - "fpmerge %%f19, %%f19, %%f18 \n\t" - "fpmerge %%f5, %%f5, %%f4 \n\t" - "fpmerge %%f21, %%f21, %%f20 \n\t" - YUV2RGB_KERNEL - "fzero %%f0 \n\t" - "fpmerge %%f4, %%f6, %%f8 \n\t" // r, b, t1 - "fpmerge %%f20, %%f22, %%f24 \n\t" // r, b, t1 - "fpmerge %%f0, %%f2, %%f10 \n\t" // 0, g, t2 - "fpmerge %%f0, %%f18, %%f26 \n\t" // 0, g, t2 - "fpmerge %%f10, %%f8, %%f4 \n\t" // t2, t1, msb - "fpmerge %%f26, %%f24, %%f20 \n\t" // t2, t1, msb - "fpmerge %%f11, %%f9, %%f6 \n\t" // t2, t1, lsb - "fpmerge %%f27, %%f25, %%f22 \n\t" // t2, t1, lsb - "std %%f4, [%3] \n\t" - "std %%f20, [%3 + 16] \n\t" - "std %%f6, [%3 + 8] \n\t" - "std %%f22, [%3 + 24] \n\t" - - "add %0, 8, %0 \n\t" - "add %1, 4, %1 \n\t" - "add %2, 4, %2 \n\t" - "subcc %4, 8, %4 \n\t" - "bne 1b \n\t" - "add %3, 32, %3 \n\t" // delay slot - : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6) - : "0" (src[0] + (y + srcSliceY) * srcStride[0]), "1" (src[1] + ((y + srcSliceY) >> 1) * srcStride[1]), - "2" (src[2] + ((y + srcSliceY) >> 1) * srcStride[2]), "3" (dst[0] + (y + srcSliceY) * dstStride[0]), - "4" (c->dstW), - "5" (c->sparc_coeffs) - ); - - return srcSliceH; -} - -// FIXME: must be changed to set alpha to 255 instead of 0 -static int vis_422P_ARGB32(SwsContext *c, uint8_t *src[], int srcStride[], - int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - int y, out1, out2, out3, out4, out5, out6; - - for (y = 0; y < srcSliceH; ++y) - __asm__ volatile ( - YUV2RGB_INIT - "wr %%g0, 0xd2, %%asi \n\t" /* ASI_FL16_P */ - "1: \n\t" - "ldda [%1] %%asi, %%f2 \n\t" - "ldda [%1 + 2] %%asi, %%f18 \n\t" - "ldda [%2] %%asi, %%f4 \n\t" - "ldda [%2 + 2] %%asi, %%f20 \n\t" - "ld [%0], %%f0 \n\t" - "ld [%0 + 4], %%f16 \n\t" - "fpmerge %%f3, %%f3, %%f2 \n\t" - "fpmerge %%f19, %%f19, %%f18 \n\t" - "fpmerge %%f5, %%f5, %%f4 \n\t" - "fpmerge %%f21, %%f21, %%f20 \n\t" - YUV2RGB_KERNEL - "fzero %%f0 \n\t" - "fpmerge %%f4, %%f6, %%f8 \n\t" // r,b,t1 - "fpmerge %%f20, %%f22, %%f24 \n\t" // r,b,t1 - "fpmerge %%f0, %%f2, %%f10 \n\t" // 0,g,t2 - "fpmerge %%f0, %%f18, %%f26 \n\t" // 0,g,t2 - "fpmerge %%f10, %%f8, %%f4 \n\t" // t2,t1,msb - "fpmerge %%f26, %%f24, %%f20 \n\t" // t2,t1,msb - "fpmerge %%f11, %%f9, %%f6 \n\t" // t2,t1,lsb - "fpmerge %%f27, %%f25, %%f22 \n\t" // t2,t1,lsb - "std %%f4, [%3] \n\t" - "std %%f20, [%3 + 16] \n\t" - "std %%f6, [%3 + 8] \n\t" - "std %%f22, [%3 + 24] \n\t" - - "add %0, 8, %0 \n\t" - "add %1, 4, %1 \n\t" - "add %2, 4, %2 \n\t" - "subcc %4, 8, %4 \n\t" - "bne 1b \n\t" - "add %3, 32, %3 \n\t" //delay slot - : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6) - : "0" (src[0] + (y + srcSliceY) * srcStride[0]), "1" (src[1] + (y + srcSliceY) * srcStride[1]), - "2" (src[2] + (y + srcSliceY) * srcStride[2]), "3" (dst[0] + (y + srcSliceY) * dstStride[0]), - "4" (c->dstW), - "5" (c->sparc_coeffs) - ); - - return srcSliceH; -} - -av_cold SwsFunc ff_yuv2rgb_init_vis(SwsContext *c) -{ - c->sparc_coeffs[5] = c->yCoeff; - c->sparc_coeffs[6] = c->vgCoeff; - c->sparc_coeffs[7] = c->vrCoeff; - c->sparc_coeffs[8] = c->ubCoeff; - c->sparc_coeffs[9] = c->ugCoeff; - - c->sparc_coeffs[0] = (((int16_t)c->yOffset * (int16_t)c->yCoeff >> 11) & 0xffff) * 0x0001000100010001ULL; - c->sparc_coeffs[1] = (((int16_t)c->uOffset * (int16_t)c->ubCoeff >> 11) & 0xffff) * 0x0001000100010001ULL; - c->sparc_coeffs[2] = (((int16_t)c->uOffset * (int16_t)c->ugCoeff >> 11) & 0xffff) * 0x0001000100010001ULL; - c->sparc_coeffs[3] = (((int16_t)c->vOffset * (int16_t)c->vgCoeff >> 11) & 0xffff) * 0x0001000100010001ULL; - c->sparc_coeffs[4] = (((int16_t)c->vOffset * (int16_t)c->vrCoeff >> 11) & 0xffff) * 0x0001000100010001ULL; - - if (c->dstFormat == AV_PIX_FMT_RGB32 && c->srcFormat == AV_PIX_FMT_YUV422P && (c->dstW & 7) == 0) { - av_log(c, AV_LOG_INFO, - "SPARC VIS accelerated YUV422P -> RGB32 (WARNING: alpha value is wrong)\n"); - return vis_422P_ARGB32; - } else if (c->dstFormat == AV_PIX_FMT_RGB32 && c->srcFormat == AV_PIX_FMT_YUV420P && (c->dstW & 7) == 0) { - av_log(c, AV_LOG_INFO, - "SPARC VIS accelerated YUV420P -> RGB32 (WARNING: alpha value is wrong)\n"); - return vis_420P_ARGB32; - } - return NULL; -} diff --git a/ffmpeg/libswscale/swscale-test.c b/ffmpeg/libswscale/swscale-test.c deleted file mode 100644 index e0b887e..0000000 --- a/ffmpeg/libswscale/swscale-test.c +++ /dev/null @@ -1,415 +0,0 @@ -/* - * Copyright (C) 2003-2011 Michael Niedermayer <michaelni@gmx.at> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <inttypes.h> -#include <stdarg.h> - -#undef HAVE_AV_CONFIG_H -#include "libavutil/imgutils.h" -#include "libavutil/mem.h" -#include "libavutil/avutil.h" -#include "libavutil/crc.h" -#include "libavutil/pixdesc.h" -#include "libavutil/lfg.h" -#include "swscale.h" - -/* HACK Duplicated from swscale_internal.h. - * Should be removed when a cleaner pixel format system exists. */ -#define isGray(x) \ - ((x) == AV_PIX_FMT_GRAY8 || \ - (x) == AV_PIX_FMT_Y400A || \ - (x) == AV_PIX_FMT_GRAY16BE || \ - (x) == AV_PIX_FMT_GRAY16LE) -#define hasChroma(x) \ - (!(isGray(x) || \ - (x) == AV_PIX_FMT_MONOBLACK || \ - (x) == AV_PIX_FMT_MONOWHITE)) -#define isALPHA(x) \ - ((x) == AV_PIX_FMT_BGR32 || \ - (x) == AV_PIX_FMT_BGR32_1 || \ - (x) == AV_PIX_FMT_RGB32 || \ - (x) == AV_PIX_FMT_RGB32_1 || \ - (x) == AV_PIX_FMT_YUVA420P) - -static uint64_t getSSD(const uint8_t *src1, const uint8_t *src2, int stride1, - int stride2, int w, int h) -{ - int x, y; - uint64_t ssd = 0; - - for (y = 0; y < h; y++) { - for (x = 0; x < w; x++) { - int d = src1[x + y * stride1] - src2[x + y * stride2]; - ssd += d * d; - } - } - return ssd; -} - -struct Results { - uint64_t ssdY; - uint64_t ssdU; - uint64_t ssdV; - uint64_t ssdA; - uint32_t crc; -}; - -// test by ref -> src -> dst -> out & compare out against ref -// ref & out are YV12 -static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, - enum AVPixelFormat srcFormat, enum AVPixelFormat dstFormat, - int srcW, int srcH, int dstW, int dstH, int flags, - struct Results *r) -{ - const AVPixFmtDescriptor *desc_yuva420p = av_pix_fmt_desc_get(AV_PIX_FMT_YUVA420P); - const AVPixFmtDescriptor *desc_src = av_pix_fmt_desc_get(srcFormat); - const AVPixFmtDescriptor *desc_dst = av_pix_fmt_desc_get(dstFormat); - static enum AVPixelFormat cur_srcFormat; - static int cur_srcW, cur_srcH; - static uint8_t *src[4]; - static int srcStride[4]; - uint8_t *dst[4] = { 0 }; - uint8_t *out[4] = { 0 }; - int dstStride[4] = {0}; - int i; - uint64_t ssdY, ssdU = 0, ssdV = 0, ssdA = 0; - struct SwsContext *dstContext = NULL, *outContext = NULL; - uint32_t crc = 0; - int res = 0; - - if (cur_srcFormat != srcFormat || cur_srcW != srcW || cur_srcH != srcH) { - struct SwsContext *srcContext = NULL; - int p; - - for (p = 0; p < 4; p++) - av_freep(&src[p]); - - av_image_fill_linesizes(srcStride, srcFormat, srcW); - for (p = 0; p < 4; p++) { - srcStride[p] = FFALIGN(srcStride[p], 16); - if (srcStride[p]) - src[p] = av_mallocz(srcStride[p] * srcH + 16); - if (srcStride[p] && !src[p]) { - perror("Malloc"); - res = -1; - goto end; - } - } - srcContext = sws_getContext(w, h, AV_PIX_FMT_YUVA420P, srcW, srcH, - srcFormat, SWS_BILINEAR, NULL, NULL, NULL); - if (!srcContext) { - fprintf(stderr, "Failed to get %s ---> %s\n", - desc_yuva420p->name, - desc_src->name); - res = -1; - goto end; - } - sws_scale(srcContext, (const uint8_t * const*)ref, refStride, 0, h, src, srcStride); - sws_freeContext(srcContext); - - cur_srcFormat = srcFormat; - cur_srcW = srcW; - cur_srcH = srcH; - } - - av_image_fill_linesizes(dstStride, dstFormat, dstW); - for (i = 0; i < 4; i++) { - /* Image buffers passed into libswscale can be allocated any way you - * prefer, as long as they're aligned enough for the architecture, and - * they're freed appropriately (such as using av_free for buffers - * allocated with av_malloc). */ - /* An extra 16 bytes is being allocated because some scalers may write - * out of bounds. */ - dstStride[i] = FFALIGN(dstStride[i], 16); - if (dstStride[i]) - dst[i] = av_mallocz(dstStride[i] * dstH + 16); - if (dstStride[i] && !dst[i]) { - perror("Malloc"); - res = -1; - - goto end; - } - } - - dstContext = sws_getContext(srcW, srcH, srcFormat, dstW, dstH, dstFormat, - flags, NULL, NULL, NULL); - if (!dstContext) { - fprintf(stderr, "Failed to get %s ---> %s\n", - desc_src->name, desc_dst->name); - res = -1; - goto end; - } - - printf(" %s %dx%d -> %s %3dx%3d flags=%2d", - desc_src->name, srcW, srcH, - desc_dst->name, dstW, dstH, - flags); - fflush(stdout); - - sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride); - - for (i = 0; i < 4 && dstStride[i]; i++) - crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), crc, dst[i], - dstStride[i] * dstH); - - if (r && crc == r->crc) { - ssdY = r->ssdY; - ssdU = r->ssdU; - ssdV = r->ssdV; - ssdA = r->ssdA; - } else { - for (i = 0; i < 4; i++) { - refStride[i] = FFALIGN(refStride[i], 16); - if (refStride[i]) - out[i] = av_mallocz(refStride[i] * h); - if (refStride[i] && !out[i]) { - perror("Malloc"); - res = -1; - goto end; - } - } - outContext = sws_getContext(dstW, dstH, dstFormat, w, h, - AV_PIX_FMT_YUVA420P, SWS_BILINEAR, - NULL, NULL, NULL); - if (!outContext) { - fprintf(stderr, "Failed to get %s ---> %s\n", - desc_dst->name, - desc_yuva420p->name); - res = -1; - goto end; - } - sws_scale(outContext, (const uint8_t * const*)dst, dstStride, 0, dstH, out, refStride); - - ssdY = getSSD(ref[0], out[0], refStride[0], refStride[0], w, h); - if (hasChroma(srcFormat) && hasChroma(dstFormat)) { - //FIXME check that output is really gray - ssdU = getSSD(ref[1], out[1], refStride[1], refStride[1], - (w + 1) >> 1, (h + 1) >> 1); - ssdV = getSSD(ref[2], out[2], refStride[2], refStride[2], - (w + 1) >> 1, (h + 1) >> 1); - } - if (isALPHA(srcFormat) && isALPHA(dstFormat)) - ssdA = getSSD(ref[3], out[3], refStride[3], refStride[3], w, h); - - ssdY /= w * h; - ssdU /= w * h / 4; - ssdV /= w * h / 4; - ssdA /= w * h; - - sws_freeContext(outContext); - - for (i = 0; i < 4; i++) - if (refStride[i]) - av_free(out[i]); - } - - printf(" CRC=%08x SSD=%5"PRId64 ",%5"PRId64 ",%5"PRId64 ",%5"PRId64 "\n", - crc, ssdY, ssdU, ssdV, ssdA); - -end: - sws_freeContext(dstContext); - - for (i = 0; i < 4; i++) - if (dstStride[i]) - av_free(dst[i]); - - return res; -} - -static void selfTest(uint8_t *ref[4], int refStride[4], int w, int h, - enum AVPixelFormat srcFormat_in, - enum AVPixelFormat dstFormat_in) -{ - const int flags[] = { SWS_FAST_BILINEAR, SWS_BILINEAR, SWS_BICUBIC, - SWS_X, SWS_POINT, SWS_AREA, 0 }; - const int srcW = w; - const int srcH = h; - const int dstW[] = { srcW - srcW / 3, srcW, srcW + srcW / 3, 0 }; - const int dstH[] = { srcH - srcH / 3, srcH, srcH + srcH / 3, 0 }; - enum AVPixelFormat srcFormat, dstFormat; - const AVPixFmtDescriptor *desc_src, *desc_dst; - - for (srcFormat = srcFormat_in != AV_PIX_FMT_NONE ? srcFormat_in : 0; - srcFormat < AV_PIX_FMT_NB; srcFormat++) { - if (!sws_isSupportedInput(srcFormat) || - !sws_isSupportedOutput(srcFormat)) - continue; - - desc_src = av_pix_fmt_desc_get(srcFormat); - - for (dstFormat = dstFormat_in != AV_PIX_FMT_NONE ? dstFormat_in : 0; - dstFormat < AV_PIX_FMT_NB; dstFormat++) { - int i, j, k; - int res = 0; - - if (!sws_isSupportedInput(dstFormat) || - !sws_isSupportedOutput(dstFormat)) - continue; - - desc_dst = av_pix_fmt_desc_get(dstFormat); - - printf("%s -> %s\n", desc_src->name, desc_dst->name); - fflush(stdout); - - for (k = 0; flags[k] && !res; k++) - for (i = 0; dstW[i] && !res; i++) - for (j = 0; dstH[j] && !res; j++) - res = doTest(ref, refStride, w, h, - srcFormat, dstFormat, - srcW, srcH, dstW[i], dstH[j], flags[k], - NULL); - if (dstFormat_in != AV_PIX_FMT_NONE) - break; - } - if (srcFormat_in != AV_PIX_FMT_NONE) - break; - } -} - -static int fileTest(uint8_t *ref[4], int refStride[4], int w, int h, FILE *fp, - enum AVPixelFormat srcFormat_in, - enum AVPixelFormat dstFormat_in) -{ - char buf[256]; - - while (fgets(buf, sizeof(buf), fp)) { - struct Results r; - enum AVPixelFormat srcFormat; - char srcStr[12]; - int srcW, srcH; - enum AVPixelFormat dstFormat; - char dstStr[12]; - int dstW, dstH; - int flags; - int ret; - - ret = sscanf(buf, - " %12s %dx%d -> %12s %dx%d flags=%d CRC=%x" - " SSD=%"SCNd64 ", %"SCNd64 ", %"SCNd64 ", %"SCNd64 "\n", - srcStr, &srcW, &srcH, dstStr, &dstW, &dstH, - &flags, &r.crc, &r.ssdY, &r.ssdU, &r.ssdV, &r.ssdA); - if (ret != 12) { - srcStr[0] = dstStr[0] = 0; - ret = sscanf(buf, "%12s -> %12s\n", srcStr, dstStr); - } - - srcFormat = av_get_pix_fmt(srcStr); - dstFormat = av_get_pix_fmt(dstStr); - - if (srcFormat == AV_PIX_FMT_NONE || dstFormat == AV_PIX_FMT_NONE || - srcW > 8192U || srcH > 8192U || dstW > 8192U || dstH > 8192U) { - fprintf(stderr, "malformed input file\n"); - return -1; - } - if ((srcFormat_in != AV_PIX_FMT_NONE && srcFormat_in != srcFormat) || - (dstFormat_in != AV_PIX_FMT_NONE && dstFormat_in != dstFormat)) - continue; - if (ret != 12) { - printf("%s", buf); - continue; - } - - doTest(ref, refStride, w, h, - srcFormat, dstFormat, - srcW, srcH, dstW, dstH, flags, - &r); - } - - return 0; -} - -#define W 96 -#define H 96 - -int main(int argc, char **argv) -{ - enum AVPixelFormat srcFormat = AV_PIX_FMT_NONE; - enum AVPixelFormat dstFormat = AV_PIX_FMT_NONE; - uint8_t *rgb_data = av_malloc(W * H * 4); - const uint8_t * const rgb_src[4] = { rgb_data, NULL, NULL, NULL }; - int rgb_stride[4] = { 4 * W, 0, 0, 0 }; - uint8_t *data = av_malloc(4 * W * H); - uint8_t *src[4] = { data, data + W * H, data + W * H * 2, data + W * H * 3 }; - int stride[4] = { W, W, W, W }; - int x, y; - struct SwsContext *sws; - AVLFG rand; - int res = -1; - int i; - FILE *fp = NULL; - - if (!rgb_data || !data) - return -1; - - for (i = 1; i < argc; i += 2) { - if (argv[i][0] != '-' || i + 1 == argc) - goto bad_option; - if (!strcmp(argv[i], "-ref")) { - fp = fopen(argv[i + 1], "r"); - if (!fp) { - fprintf(stderr, "could not open '%s'\n", argv[i + 1]); - goto error; - } - } else if (!strcmp(argv[i], "-src")) { - srcFormat = av_get_pix_fmt(argv[i + 1]); - if (srcFormat == AV_PIX_FMT_NONE) { - fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]); - return -1; - } - } else if (!strcmp(argv[i], "-dst")) { - dstFormat = av_get_pix_fmt(argv[i + 1]); - if (dstFormat == AV_PIX_FMT_NONE) { - fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]); - return -1; - } - } else { -bad_option: - fprintf(stderr, "bad option or argument missing (%s)\n", argv[i]); - goto error; - } - } - - sws = sws_getContext(W / 12, H / 12, AV_PIX_FMT_RGB32, W, H, - AV_PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL); - - av_lfg_init(&rand, 1); - - for (y = 0; y < H; y++) - for (x = 0; x < W * 4; x++) - rgb_data[ x + y * 4 * W] = av_lfg_get(&rand); - sws_scale(sws, rgb_src, rgb_stride, 0, H, src, stride); - sws_freeContext(sws); - av_free(rgb_data); - - if(fp) { - res = fileTest(src, stride, W, H, fp, srcFormat, dstFormat); - fclose(fp); - } else { - selfTest(src, stride, W, H, srcFormat, dstFormat); - res = 0; - } -error: - av_free(data); - - return res; -} diff --git a/ffmpeg/libswscale/swscale.c b/ffmpeg/libswscale/swscale.c deleted file mode 100644 index 710dce3..0000000 --- a/ffmpeg/libswscale/swscale.c +++ /dev/null @@ -1,1114 +0,0 @@ -/* - * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <inttypes.h> -#include <math.h> -#include <stdio.h> -#include <string.h> - -#include "libavutil/avassert.h" -#include "libavutil/avutil.h" -#include "libavutil/bswap.h" -#include "libavutil/cpu.h" -#include "libavutil/intreadwrite.h" -#include "libavutil/mathematics.h" -#include "libavutil/pixdesc.h" -#include "config.h" -#include "rgb2rgb.h" -#include "swscale_internal.h" -#include "swscale.h" - -DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_128)[9][8] = { - { 36, 68, 60, 92, 34, 66, 58, 90, }, - { 100, 4, 124, 28, 98, 2, 122, 26, }, - { 52, 84, 44, 76, 50, 82, 42, 74, }, - { 116, 20, 108, 12, 114, 18, 106, 10, }, - { 32, 64, 56, 88, 38, 70, 62, 94, }, - { 96, 0, 120, 24, 102, 6, 126, 30, }, - { 48, 80, 40, 72, 54, 86, 46, 78, }, - { 112, 16, 104, 8, 118, 22, 110, 14, }, - { 36, 68, 60, 92, 34, 66, 58, 90, }, -}; - -DECLARE_ALIGNED(8, static const uint8_t, sws_pb_64)[8] = { - 64, 64, 64, 64, 64, 64, 64, 64 -}; - -static av_always_inline void fillPlane(uint8_t *plane, int stride, int width, - int height, int y, uint8_t val) -{ - int i; - uint8_t *ptr = plane + stride * y; - for (i = 0; i < height; i++) { - memset(ptr, val, width); - ptr += stride; - } -} - -static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, - const uint8_t *_src, const int16_t *filter, - const int32_t *filterPos, int filterSize) -{ - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat); - int i; - int32_t *dst = (int32_t *) _dst; - const uint16_t *src = (const uint16_t *) _src; - int bits = desc->comp[0].depth_minus1; - int sh = bits - 4; - - if((isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8) && desc->comp[0].depth_minus1<15) - sh= 9; - - for (i = 0; i < dstW; i++) { - int j; - int srcPos = filterPos[i]; - int val = 0; - - for (j = 0; j < filterSize; j++) { - val += src[srcPos + j] * filter[filterSize * i + j]; - } - // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit - dst[i] = FFMIN(val >> sh, (1 << 19) - 1); - } -} - -static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, - const uint8_t *_src, const int16_t *filter, - const int32_t *filterPos, int filterSize) -{ - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat); - int i; - const uint16_t *src = (const uint16_t *) _src; - int sh = desc->comp[0].depth_minus1; - - if(sh<15) - sh= isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8 ? 13 : desc->comp[0].depth_minus1; - - for (i = 0; i < dstW; i++) { - int j; - int srcPos = filterPos[i]; - int val = 0; - - for (j = 0; j < filterSize; j++) { - val += src[srcPos + j] * filter[filterSize * i + j]; - } - // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit - dst[i] = FFMIN(val >> sh, (1 << 15) - 1); - } -} - -// bilinear / bicubic scaling -static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, - const uint8_t *src, const int16_t *filter, - const int32_t *filterPos, int filterSize) -{ - int i; - for (i = 0; i < dstW; i++) { - int j; - int srcPos = filterPos[i]; - int val = 0; - for (j = 0; j < filterSize; j++) { - val += ((int)src[srcPos + j]) * filter[filterSize * i + j]; - } - dst[i] = FFMIN(val >> 7, (1 << 15) - 1); // the cubic equation does overflow ... - } -} - -static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, - const uint8_t *src, const int16_t *filter, - const int32_t *filterPos, int filterSize) -{ - int i; - int32_t *dst = (int32_t *) _dst; - for (i = 0; i < dstW; i++) { - int j; - int srcPos = filterPos[i]; - int val = 0; - for (j = 0; j < filterSize; j++) { - val += ((int)src[srcPos + j]) * filter[filterSize * i + j]; - } - dst[i] = FFMIN(val >> 3, (1 << 19) - 1); // the cubic equation does overflow ... - } -} - -// FIXME all pal and rgb srcFormats could do this conversion as well -// FIXME all scalers more complex than bilinear could do half of this transform -static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width) -{ - int i; - for (i = 0; i < width; i++) { - dstU[i] = (FFMIN(dstU[i], 30775) * 4663 - 9289992) >> 12; // -264 - dstV[i] = (FFMIN(dstV[i], 30775) * 4663 - 9289992) >> 12; // -264 - } -} - -static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width) -{ - int i; - for (i = 0; i < width; i++) { - dstU[i] = (dstU[i] * 1799 + 4081085) >> 11; // 1469 - dstV[i] = (dstV[i] * 1799 + 4081085) >> 11; // 1469 - } -} - -static void lumRangeToJpeg_c(int16_t *dst, int width) -{ - int i; - for (i = 0; i < width; i++) - dst[i] = (FFMIN(dst[i], 30189) * 19077 - 39057361) >> 14; -} - -static void lumRangeFromJpeg_c(int16_t *dst, int width) -{ - int i; - for (i = 0; i < width; i++) - dst[i] = (dst[i] * 14071 + 33561947) >> 14; -} - -static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width) -{ - int i; - int32_t *dstU = (int32_t *) _dstU; - int32_t *dstV = (int32_t *) _dstV; - for (i = 0; i < width; i++) { - dstU[i] = (FFMIN(dstU[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264 - dstV[i] = (FFMIN(dstV[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264 - } -} - -static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width) -{ - int i; - int32_t *dstU = (int32_t *) _dstU; - int32_t *dstV = (int32_t *) _dstV; - for (i = 0; i < width; i++) { - dstU[i] = (dstU[i] * 1799 + (4081085 << 4)) >> 11; // 1469 - dstV[i] = (dstV[i] * 1799 + (4081085 << 4)) >> 11; // 1469 - } -} - -static void lumRangeToJpeg16_c(int16_t *_dst, int width) -{ - int i; - int32_t *dst = (int32_t *) _dst; - for (i = 0; i < width; i++) - dst[i] = (FFMIN(dst[i], 30189 << 4) * 4769 - (39057361 << 2)) >> 12; -} - -static void lumRangeFromJpeg16_c(int16_t *_dst, int width) -{ - int i; - int32_t *dst = (int32_t *) _dst; - for (i = 0; i < width; i++) - dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12; -} - -static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, - const uint8_t *src, int srcW, int xInc) -{ - int i; - unsigned int xpos = 0; - for (i = 0; i < dstWidth; i++) { - register unsigned int xx = xpos >> 16; - register unsigned int xalpha = (xpos & 0xFFFF) >> 9; - dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha; - xpos += xInc; - } - for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) - dst[i] = src[srcW-1]*128; -} - -// *** horizontal scale Y line to temp buffer -static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, - const uint8_t *src_in[4], - int srcW, int xInc, - const int16_t *hLumFilter, - const int32_t *hLumFilterPos, - int hLumFilterSize, - uint8_t *formatConvBuffer, - uint32_t *pal, int isAlpha) -{ - void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = - isAlpha ? c->alpToYV12 : c->lumToYV12; - void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange; - const uint8_t *src = src_in[isAlpha ? 3 : 0]; - - if (toYV12) { - toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal); - src = formatConvBuffer; - } else if (c->readLumPlanar && !isAlpha) { - c->readLumPlanar(formatConvBuffer, src_in, srcW, c->input_rgb2yuv_table); - src = formatConvBuffer; - } else if (c->readAlpPlanar && isAlpha) { - c->readAlpPlanar(formatConvBuffer, src_in, srcW, NULL); - src = formatConvBuffer; - } - - if (!c->hyscale_fast) { - c->hyScale(c, dst, dstWidth, src, hLumFilter, - hLumFilterPos, hLumFilterSize); - } else { // fast bilinear upscale / crap downscale - c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc); - } - - if (convertRange) - convertRange(dst, dstWidth); -} - -static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2, - int dstWidth, const uint8_t *src1, - const uint8_t *src2, int srcW, int xInc) -{ - int i; - unsigned int xpos = 0; - for (i = 0; i < dstWidth; i++) { - register unsigned int xx = xpos >> 16; - register unsigned int xalpha = (xpos & 0xFFFF) >> 9; - dst1[i] = (src1[xx] * (xalpha ^ 127) + src1[xx + 1] * xalpha); - dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha); - xpos += xInc; - } - for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) { - dst1[i] = src1[srcW-1]*128; - dst2[i] = src2[srcW-1]*128; - } -} - -static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, - int16_t *dst2, int dstWidth, - const uint8_t *src_in[4], - int srcW, int xInc, - const int16_t *hChrFilter, - const int32_t *hChrFilterPos, - int hChrFilterSize, - uint8_t *formatConvBuffer, uint32_t *pal) -{ - const uint8_t *src1 = src_in[1], *src2 = src_in[2]; - if (c->chrToYV12) { - uint8_t *buf2 = formatConvBuffer + - FFALIGN(srcW*2+78, 16); - c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal); - src1= formatConvBuffer; - src2= buf2; - } else if (c->readChrPlanar) { - uint8_t *buf2 = formatConvBuffer + - FFALIGN(srcW*2+78, 16); - c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW, c->input_rgb2yuv_table); - src1 = formatConvBuffer; - src2 = buf2; - } - - if (!c->hcscale_fast) { - c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize); - c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize); - } else { // fast bilinear upscale / crap downscale - c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc); - } - - if (c->chrConvertRange) - c->chrConvertRange(dst1, dst2, dstWidth); -} - -#define DEBUG_SWSCALE_BUFFERS 0 -#define DEBUG_BUFFERS(...) \ - if (DEBUG_SWSCALE_BUFFERS) \ - av_log(c, AV_LOG_DEBUG, __VA_ARGS__) - -static int swscale(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, - int srcSliceH, uint8_t *dst[], int dstStride[]) -{ - /* load a few things into local vars to make the code more readable? - * and faster */ - const int srcW = c->srcW; - const int dstW = c->dstW; - const int dstH = c->dstH; - const int chrDstW = c->chrDstW; - const int chrSrcW = c->chrSrcW; - const int lumXInc = c->lumXInc; - const int chrXInc = c->chrXInc; - const enum AVPixelFormat dstFormat = c->dstFormat; - const int flags = c->flags; - int32_t *vLumFilterPos = c->vLumFilterPos; - int32_t *vChrFilterPos = c->vChrFilterPos; - int32_t *hLumFilterPos = c->hLumFilterPos; - int32_t *hChrFilterPos = c->hChrFilterPos; - int16_t *hLumFilter = c->hLumFilter; - int16_t *hChrFilter = c->hChrFilter; - int32_t *lumMmxFilter = c->lumMmxFilter; - int32_t *chrMmxFilter = c->chrMmxFilter; - const int vLumFilterSize = c->vLumFilterSize; - const int vChrFilterSize = c->vChrFilterSize; - const int hLumFilterSize = c->hLumFilterSize; - const int hChrFilterSize = c->hChrFilterSize; - int16_t **lumPixBuf = c->lumPixBuf; - int16_t **chrUPixBuf = c->chrUPixBuf; - int16_t **chrVPixBuf = c->chrVPixBuf; - int16_t **alpPixBuf = c->alpPixBuf; - const int vLumBufSize = c->vLumBufSize; - const int vChrBufSize = c->vChrBufSize; - uint8_t *formatConvBuffer = c->formatConvBuffer; - uint32_t *pal = c->pal_yuv; - yuv2planar1_fn yuv2plane1 = c->yuv2plane1; - yuv2planarX_fn yuv2planeX = c->yuv2planeX; - yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX; - yuv2packed1_fn yuv2packed1 = c->yuv2packed1; - yuv2packed2_fn yuv2packed2 = c->yuv2packed2; - yuv2packedX_fn yuv2packedX = c->yuv2packedX; - yuv2anyX_fn yuv2anyX = c->yuv2anyX; - const int chrSrcSliceY = srcSliceY >> c->chrSrcVSubSample; - const int chrSrcSliceH = FF_CEIL_RSHIFT(srcSliceH, c->chrSrcVSubSample); - int should_dither = is9_OR_10BPS(c->srcFormat) || - is16BPS(c->srcFormat); - int lastDstY; - - /* vars which will change and which we need to store back in the context */ - int dstY = c->dstY; - int lumBufIndex = c->lumBufIndex; - int chrBufIndex = c->chrBufIndex; - int lastInLumBuf = c->lastInLumBuf; - int lastInChrBuf = c->lastInChrBuf; - - if (!usePal(c->srcFormat)) { - pal = c->input_rgb2yuv_table; - } - - if (isPacked(c->srcFormat)) { - src[0] = - src[1] = - src[2] = - src[3] = src[0]; - srcStride[0] = - srcStride[1] = - srcStride[2] = - srcStride[3] = srcStride[0]; - } - srcStride[1] <<= c->vChrDrop; - srcStride[2] <<= c->vChrDrop; - - DEBUG_BUFFERS("swscale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n", - src[0], srcStride[0], src[1], srcStride[1], - src[2], srcStride[2], src[3], srcStride[3], - dst[0], dstStride[0], dst[1], dstStride[1], - dst[2], dstStride[2], dst[3], dstStride[3]); - DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n", - srcSliceY, srcSliceH, dstY, dstH); - DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n", - vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize); - - if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 || - dstStride[2]%16 !=0 || dstStride[3]%16 != 0) { - static int warnedAlready = 0; // FIXME maybe move this into the context - if (flags & SWS_PRINT_INFO && !warnedAlready) { - av_log(c, AV_LOG_WARNING, - "Warning: dstStride is not aligned!\n" - " ->cannot do aligned memory accesses anymore\n"); - warnedAlready = 1; - } - } - - if ( (uintptr_t)dst[0]%16 || (uintptr_t)dst[1]%16 || (uintptr_t)dst[2]%16 - || (uintptr_t)src[0]%16 || (uintptr_t)src[1]%16 || (uintptr_t)src[2]%16 - || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16 - || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16 - ) { - static int warnedAlready=0; - int cpu_flags = av_get_cpu_flags(); - if (HAVE_MMXEXT && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){ - av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n"); - warnedAlready=1; - } - } - - /* Note the user might start scaling the picture in the middle so this - * will not get executed. This is not really intended but works - * currently, so people might do it. */ - if (srcSliceY == 0) { - lumBufIndex = -1; - chrBufIndex = -1; - dstY = 0; - lastInLumBuf = -1; - lastInChrBuf = -1; - } - - if (!should_dither) { - c->chrDither8 = c->lumDither8 = sws_pb_64; - } - lastDstY = dstY; - - for (; dstY < dstH; dstY++) { - const int chrDstY = dstY >> c->chrDstVSubSample; - uint8_t *dest[4] = { - dst[0] + dstStride[0] * dstY, - dst[1] + dstStride[1] * chrDstY, - dst[2] + dstStride[2] * chrDstY, - (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL, - }; - int use_mmx_vfilter= c->use_mmx_vfilter; - - // First line needed as input - const int firstLumSrcY = FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]); - const int firstLumSrcY2 = FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1 << c->chrDstVSubSample) - 1), dstH - 1)]); - // First line needed as input - const int firstChrSrcY = FFMAX(1 - vChrFilterSize, vChrFilterPos[chrDstY]); - - // Last line needed as input - int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1; - int lastLumSrcY2 = FFMIN(c->srcH, firstLumSrcY2 + vLumFilterSize) - 1; - int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1; - int enough_lines; - - // handle holes (FAST_BILINEAR & weird filters) - if (firstLumSrcY > lastInLumBuf) - lastInLumBuf = firstLumSrcY - 1; - if (firstChrSrcY > lastInChrBuf) - lastInChrBuf = firstChrSrcY - 1; - av_assert0(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1); - av_assert0(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1); - - DEBUG_BUFFERS("dstY: %d\n", dstY); - DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n", - firstLumSrcY, lastLumSrcY, lastInLumBuf); - DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n", - firstChrSrcY, lastChrSrcY, lastInChrBuf); - - // Do we have enough lines in this slice to output the dstY line - enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && - lastChrSrcY < FF_CEIL_RSHIFT(srcSliceY + srcSliceH, c->chrSrcVSubSample); - - if (!enough_lines) { - lastLumSrcY = srcSliceY + srcSliceH - 1; - lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1; - DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n", - lastLumSrcY, lastChrSrcY); - } - - // Do horizontal scaling - while (lastInLumBuf < lastLumSrcY) { - const uint8_t *src1[4] = { - src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0], - src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1], - src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2], - src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3], - }; - lumBufIndex++; - av_assert0(lumBufIndex < 2 * vLumBufSize); - av_assert0(lastInLumBuf + 1 - srcSliceY < srcSliceH); - av_assert0(lastInLumBuf + 1 - srcSliceY >= 0); - hyscale(c, lumPixBuf[lumBufIndex], dstW, src1, srcW, lumXInc, - hLumFilter, hLumFilterPos, hLumFilterSize, - formatConvBuffer, pal, 0); - if (CONFIG_SWSCALE_ALPHA && alpPixBuf) - hyscale(c, alpPixBuf[lumBufIndex], dstW, src1, srcW, - lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize, - formatConvBuffer, pal, 1); - lastInLumBuf++; - DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n", - lumBufIndex, lastInLumBuf); - } - while (lastInChrBuf < lastChrSrcY) { - const uint8_t *src1[4] = { - src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0], - src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1], - src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2], - src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3], - }; - chrBufIndex++; - av_assert0(chrBufIndex < 2 * vChrBufSize); - av_assert0(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH)); - av_assert0(lastInChrBuf + 1 - chrSrcSliceY >= 0); - // FIXME replace parameters through context struct (some at least) - - if (c->needs_hcscale) - hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex], - chrDstW, src1, chrSrcW, chrXInc, - hChrFilter, hChrFilterPos, hChrFilterSize, - formatConvBuffer, pal); - lastInChrBuf++; - DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n", - chrBufIndex, lastInChrBuf); - } - // wrap buf index around to stay inside the ring buffer - if (lumBufIndex >= vLumBufSize) - lumBufIndex -= vLumBufSize; - if (chrBufIndex >= vChrBufSize) - chrBufIndex -= vChrBufSize; - if (!enough_lines) - break; // we can't output a dstY line so let's try with the next slice - -#if HAVE_MMX_INLINE - updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, - lastInLumBuf, lastInChrBuf); -#endif - if (should_dither) { - c->chrDither8 = ff_dither_8x8_128[chrDstY & 7]; - c->lumDither8 = ff_dither_8x8_128[dstY & 7]; - } - if (dstY >= dstH - 2) { - /* hmm looks like we can't use MMX here without overwriting - * this array's tail */ - ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX, - &yuv2packed1, &yuv2packed2, &yuv2packedX, &yuv2anyX); - use_mmx_vfilter= 0; - } - - { - const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; - const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; - const int16_t **chrVSrcPtr = (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; - const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? - (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; - int16_t *vLumFilter = c->vLumFilter; - int16_t *vChrFilter = c->vChrFilter; - - if (isPlanarYUV(dstFormat) || - (isGray(dstFormat) && !isALPHA(dstFormat))) { // YV12 like - const int chrSkipMask = (1 << c->chrDstVSubSample) - 1; - - vLumFilter += dstY * vLumFilterSize; - vChrFilter += chrDstY * vChrFilterSize; - -// av_assert0(use_mmx_vfilter != ( -// yuv2planeX == yuv2planeX_10BE_c -// || yuv2planeX == yuv2planeX_10LE_c -// || yuv2planeX == yuv2planeX_9BE_c -// || yuv2planeX == yuv2planeX_9LE_c -// || yuv2planeX == yuv2planeX_16BE_c -// || yuv2planeX == yuv2planeX_16LE_c -// || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86); - - if(use_mmx_vfilter){ - vLumFilter= (int16_t *)c->lumMmxFilter; - vChrFilter= (int16_t *)c->chrMmxFilter; - } - - if (vLumFilterSize == 1) { - yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0); - } else { - yuv2planeX(vLumFilter, vLumFilterSize, - lumSrcPtr, dest[0], - dstW, c->lumDither8, 0); - } - - if (!((dstY & chrSkipMask) || isGray(dstFormat))) { - if (yuv2nv12cX) { - yuv2nv12cX(c, vChrFilter, - vChrFilterSize, chrUSrcPtr, chrVSrcPtr, - dest[1], chrDstW); - } else if (vChrFilterSize == 1) { - yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0); - yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3); - } else { - yuv2planeX(vChrFilter, - vChrFilterSize, chrUSrcPtr, dest[1], - chrDstW, c->chrDither8, 0); - yuv2planeX(vChrFilter, - vChrFilterSize, chrVSrcPtr, dest[2], - chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3); - } - } - - if (CONFIG_SWSCALE_ALPHA && alpPixBuf) { - if(use_mmx_vfilter){ - vLumFilter= (int16_t *)c->alpMmxFilter; - } - if (vLumFilterSize == 1) { - yuv2plane1(alpSrcPtr[0], dest[3], dstW, - c->lumDither8, 0); - } else { - yuv2planeX(vLumFilter, - vLumFilterSize, alpSrcPtr, dest[3], - dstW, c->lumDither8, 0); - } - } - } else if (yuv2packedX) { - av_assert1(lumSrcPtr + vLumFilterSize - 1 < (const int16_t **)lumPixBuf + vLumBufSize * 2); - av_assert1(chrUSrcPtr + vChrFilterSize - 1 < (const int16_t **)chrUPixBuf + vChrBufSize * 2); - if (c->yuv2packed1 && vLumFilterSize == 1 && - vChrFilterSize <= 2) { // unscaled RGB - int chrAlpha = vChrFilterSize == 1 ? 0 : vChrFilter[2 * dstY + 1]; - yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr, - alpPixBuf ? *alpSrcPtr : NULL, - dest[0], dstW, chrAlpha, dstY); - } else if (c->yuv2packed2 && vLumFilterSize == 2 && - vChrFilterSize == 2) { // bilinear upscale RGB - int lumAlpha = vLumFilter[2 * dstY + 1]; - int chrAlpha = vChrFilter[2 * dstY + 1]; - lumMmxFilter[2] = - lumMmxFilter[3] = vLumFilter[2 * dstY] * 0x10001; - chrMmxFilter[2] = - chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001; - yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr, - alpPixBuf ? alpSrcPtr : NULL, - dest[0], dstW, lumAlpha, chrAlpha, dstY); - } else { // general RGB - yuv2packedX(c, vLumFilter + dstY * vLumFilterSize, - lumSrcPtr, vLumFilterSize, - vChrFilter + dstY * vChrFilterSize, - chrUSrcPtr, chrVSrcPtr, vChrFilterSize, - alpSrcPtr, dest[0], dstW, dstY); - } - } else { - av_assert1(!yuv2packed1 && !yuv2packed2); - yuv2anyX(c, vLumFilter + dstY * vLumFilterSize, - lumSrcPtr, vLumFilterSize, - vChrFilter + dstY * vChrFilterSize, - chrUSrcPtr, chrVSrcPtr, vChrFilterSize, - alpSrcPtr, dest, dstW, dstY); - } - } - } - if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf) { - int length = dstW; - int height = dstY - lastDstY; - - if (is16BPS(dstFormat) || isNBPS(dstFormat)) { - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat); - fillPlane16(dst[3], dstStride[3], length, height, lastDstY, - 1, desc->comp[3].depth_minus1, - isBE(dstFormat)); - } else - fillPlane(dst[3], dstStride[3], length, height, lastDstY, 255); - } - -#if HAVE_MMXEXT_INLINE - if (av_get_cpu_flags() & AV_CPU_FLAG_MMXEXT) - __asm__ volatile ("sfence" ::: "memory"); -#endif - emms_c(); - - /* store changed local vars back in the context */ - c->dstY = dstY; - c->lumBufIndex = lumBufIndex; - c->chrBufIndex = chrBufIndex; - c->lastInLumBuf = lastInLumBuf; - c->lastInChrBuf = lastInChrBuf; - - return dstY - lastDstY; -} - -static av_cold void sws_init_swscale(SwsContext *c) -{ - enum AVPixelFormat srcFormat = c->srcFormat; - - ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX, - &c->yuv2nv12cX, &c->yuv2packed1, - &c->yuv2packed2, &c->yuv2packedX, &c->yuv2anyX); - - ff_sws_init_input_funcs(c); - - - if (c->srcBpc == 8) { - if (c->dstBpc <= 14) { - c->hyScale = c->hcScale = hScale8To15_c; - if (c->flags & SWS_FAST_BILINEAR) { - c->hyscale_fast = hyscale_fast_c; - c->hcscale_fast = hcscale_fast_c; - } - } else { - c->hyScale = c->hcScale = hScale8To19_c; - } - } else { - c->hyScale = c->hcScale = c->dstBpc > 14 ? hScale16To19_c - : hScale16To15_c; - } - - if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) { - if (c->dstBpc <= 14) { - if (c->srcRange) { - c->lumConvertRange = lumRangeFromJpeg_c; - c->chrConvertRange = chrRangeFromJpeg_c; - } else { - c->lumConvertRange = lumRangeToJpeg_c; - c->chrConvertRange = chrRangeToJpeg_c; - } - } else { - if (c->srcRange) { - c->lumConvertRange = lumRangeFromJpeg16_c; - c->chrConvertRange = chrRangeFromJpeg16_c; - } else { - c->lumConvertRange = lumRangeToJpeg16_c; - c->chrConvertRange = chrRangeToJpeg16_c; - } - } - } - - if (!(isGray(srcFormat) || isGray(c->dstFormat) || - srcFormat == AV_PIX_FMT_MONOBLACK || srcFormat == AV_PIX_FMT_MONOWHITE)) - c->needs_hcscale = 1; -} - -SwsFunc ff_getSwsFunc(SwsContext *c) -{ - sws_init_swscale(c); - - if (ARCH_PPC) - ff_sws_init_swscale_ppc(c); - if (ARCH_X86) - ff_sws_init_swscale_x86(c); - - return swscale; -} - -static void reset_ptr(const uint8_t *src[], int format) -{ - if (!isALPHA(format)) - src[3] = NULL; - if (!isPlanar(format)) { - src[3] = src[2] = NULL; - - if (!usePal(format)) - src[1] = NULL; - } -} - -static int check_image_pointers(const uint8_t * const data[4], enum AVPixelFormat pix_fmt, - const int linesizes[4]) -{ - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); - int i; - - for (i = 0; i < 4; i++) { - int plane = desc->comp[i].plane; - if (!data[plane] || !linesizes[plane]) - return 0; - } - - return 1; -} - -static void xyz12Torgb48(struct SwsContext *c, uint16_t *dst, - const uint16_t *src, int stride, int h) -{ - int xp,yp; - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat); - - for (yp=0; yp<h; yp++) { - for (xp=0; xp+2<stride; xp+=3) { - int x, y, z, r, g, b; - - if (desc->flags & AV_PIX_FMT_FLAG_BE) { - x = AV_RB16(src + xp + 0); - y = AV_RB16(src + xp + 1); - z = AV_RB16(src + xp + 2); - } else { - x = AV_RL16(src + xp + 0); - y = AV_RL16(src + xp + 1); - z = AV_RL16(src + xp + 2); - } - - x = c->xyzgamma[x>>4]; - y = c->xyzgamma[y>>4]; - z = c->xyzgamma[z>>4]; - - // convert from XYZlinear to sRGBlinear - r = c->xyz2rgb_matrix[0][0] * x + - c->xyz2rgb_matrix[0][1] * y + - c->xyz2rgb_matrix[0][2] * z >> 12; - g = c->xyz2rgb_matrix[1][0] * x + - c->xyz2rgb_matrix[1][1] * y + - c->xyz2rgb_matrix[1][2] * z >> 12; - b = c->xyz2rgb_matrix[2][0] * x + - c->xyz2rgb_matrix[2][1] * y + - c->xyz2rgb_matrix[2][2] * z >> 12; - - // limit values to 12-bit depth - r = av_clip_c(r,0,4095); - g = av_clip_c(g,0,4095); - b = av_clip_c(b,0,4095); - - // convert from sRGBlinear to RGB and scale from 12bit to 16bit - if (desc->flags & AV_PIX_FMT_FLAG_BE) { - AV_WB16(dst + xp + 0, c->rgbgamma[r] << 4); - AV_WB16(dst + xp + 1, c->rgbgamma[g] << 4); - AV_WB16(dst + xp + 2, c->rgbgamma[b] << 4); - } else { - AV_WL16(dst + xp + 0, c->rgbgamma[r] << 4); - AV_WL16(dst + xp + 1, c->rgbgamma[g] << 4); - AV_WL16(dst + xp + 2, c->rgbgamma[b] << 4); - } - } - src += stride; - dst += stride; - } -} - -static void rgb48Toxyz12(struct SwsContext *c, uint16_t *dst, - const uint16_t *src, int stride, int h) -{ - int xp,yp; - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat); - - for (yp=0; yp<h; yp++) { - for (xp=0; xp+2<stride; xp+=3) { - int x, y, z, r, g, b; - - if (desc->flags & AV_PIX_FMT_FLAG_BE) { - r = AV_RB16(src + xp + 0); - g = AV_RB16(src + xp + 1); - b = AV_RB16(src + xp + 2); - } else { - r = AV_RL16(src + xp + 0); - g = AV_RL16(src + xp + 1); - b = AV_RL16(src + xp + 2); - } - - r = c->rgbgammainv[r>>4]; - g = c->rgbgammainv[g>>4]; - b = c->rgbgammainv[b>>4]; - - // convert from sRGBlinear to XYZlinear - x = c->rgb2xyz_matrix[0][0] * r + - c->rgb2xyz_matrix[0][1] * g + - c->rgb2xyz_matrix[0][2] * b >> 12; - y = c->rgb2xyz_matrix[1][0] * r + - c->rgb2xyz_matrix[1][1] * g + - c->rgb2xyz_matrix[1][2] * b >> 12; - z = c->rgb2xyz_matrix[2][0] * r + - c->rgb2xyz_matrix[2][1] * g + - c->rgb2xyz_matrix[2][2] * b >> 12; - - // limit values to 12-bit depth - x = av_clip_c(x,0,4095); - y = av_clip_c(y,0,4095); - z = av_clip_c(z,0,4095); - - // convert from XYZlinear to X'Y'Z' and scale from 12bit to 16bit - if (desc->flags & AV_PIX_FMT_FLAG_BE) { - AV_WB16(dst + xp + 0, c->xyzgammainv[x] << 4); - AV_WB16(dst + xp + 1, c->xyzgammainv[y] << 4); - AV_WB16(dst + xp + 2, c->xyzgammainv[z] << 4); - } else { - AV_WL16(dst + xp + 0, c->xyzgammainv[x] << 4); - AV_WL16(dst + xp + 1, c->xyzgammainv[y] << 4); - AV_WL16(dst + xp + 2, c->xyzgammainv[z] << 4); - } - } - src += stride; - dst += stride; - } -} - -/** - * swscale wrapper, so we don't need to export the SwsContext. - * Assumes planar YUV to be in YUV order instead of YVU. - */ -int attribute_align_arg sws_scale(struct SwsContext *c, - const uint8_t * const srcSlice[], - const int srcStride[], int srcSliceY, - int srcSliceH, uint8_t *const dst[], - const int dstStride[]) -{ - int i, ret; - const uint8_t *src2[4]; - uint8_t *dst2[4]; - uint8_t *rgb0_tmp = NULL; - - if (!srcSlice || !dstStride || !dst || !srcSlice) { - av_log(c, AV_LOG_ERROR, "One of the input parameters to sws_scale() is NULL, please check the calling code\n"); - return 0; - } - memcpy(src2, srcSlice, sizeof(src2)); - memcpy(dst2, dst, sizeof(dst2)); - - // do not mess up sliceDir if we have a "trailing" 0-size slice - if (srcSliceH == 0) - return 0; - - if (!check_image_pointers(srcSlice, c->srcFormat, srcStride)) { - av_log(c, AV_LOG_ERROR, "bad src image pointers\n"); - return 0; - } - if (!check_image_pointers((const uint8_t* const*)dst, c->dstFormat, dstStride)) { - av_log(c, AV_LOG_ERROR, "bad dst image pointers\n"); - return 0; - } - - if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) { - av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n"); - return 0; - } - if (c->sliceDir == 0) { - if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1; - } - - if (usePal(c->srcFormat)) { - for (i = 0; i < 256; i++) { - int p, r, g, b, y, u, v, a = 0xff; - if (c->srcFormat == AV_PIX_FMT_PAL8) { - p = ((const uint32_t *)(srcSlice[1]))[i]; - a = (p >> 24) & 0xFF; - r = (p >> 16) & 0xFF; - g = (p >> 8) & 0xFF; - b = p & 0xFF; - } else if (c->srcFormat == AV_PIX_FMT_RGB8) { - r = ( i >> 5 ) * 36; - g = ((i >> 2) & 7) * 36; - b = ( i & 3) * 85; - } else if (c->srcFormat == AV_PIX_FMT_BGR8) { - b = ( i >> 6 ) * 85; - g = ((i >> 3) & 7) * 36; - r = ( i & 7) * 36; - } else if (c->srcFormat == AV_PIX_FMT_RGB4_BYTE) { - r = ( i >> 3 ) * 255; - g = ((i >> 1) & 3) * 85; - b = ( i & 1) * 255; - } else if (c->srcFormat == AV_PIX_FMT_GRAY8 || c->srcFormat == AV_PIX_FMT_GRAY8A) { - r = g = b = i; - } else { - av_assert1(c->srcFormat == AV_PIX_FMT_BGR4_BYTE); - b = ( i >> 3 ) * 255; - g = ((i >> 1) & 3) * 85; - r = ( i & 1) * 255; - } -#define RGB2YUV_SHIFT 15 -#define BY ( (int) (0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define BV (-(int) (0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define BU ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define GY ( (int) (0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define GV (-(int) (0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define GU (-(int) (0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define RY ( (int) (0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define RV ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define RU (-(int) (0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) - - y = av_clip_uint8((RY * r + GY * g + BY * b + ( 33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT); - u = av_clip_uint8((RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT); - v = av_clip_uint8((RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT); - c->pal_yuv[i]= y + (u<<8) + (v<<16) + ((unsigned)a<<24); - - switch (c->dstFormat) { - case AV_PIX_FMT_BGR32: -#if !HAVE_BIGENDIAN - case AV_PIX_FMT_RGB24: -#endif - c->pal_rgb[i]= r + (g<<8) + (b<<16) + ((unsigned)a<<24); - break; - case AV_PIX_FMT_BGR32_1: -#if HAVE_BIGENDIAN - case AV_PIX_FMT_BGR24: -#endif - c->pal_rgb[i]= a + (r<<8) + (g<<16) + ((unsigned)b<<24); - break; - case AV_PIX_FMT_RGB32_1: -#if HAVE_BIGENDIAN - case AV_PIX_FMT_RGB24: -#endif - c->pal_rgb[i]= a + (b<<8) + (g<<16) + ((unsigned)r<<24); - break; - case AV_PIX_FMT_RGB32: -#if !HAVE_BIGENDIAN - case AV_PIX_FMT_BGR24: -#endif - default: - c->pal_rgb[i]= b + (g<<8) + (r<<16) + ((unsigned)a<<24); - } - } - } - - if (c->src0Alpha && !c->dst0Alpha && isALPHA(c->dstFormat)) { - uint8_t *base; - int x,y; - rgb0_tmp = av_malloc(FFABS(srcStride[0]) * srcSliceH + 32); - if (!rgb0_tmp) - return AVERROR(ENOMEM); - - base = srcStride[0] < 0 ? rgb0_tmp - srcStride[0] * (srcSliceH-1) : rgb0_tmp; - for (y=0; y<srcSliceH; y++){ - memcpy(base + srcStride[0]*y, src2[0] + srcStride[0]*y, 4*c->srcW); - for (x=c->src0Alpha-1; x<4*c->srcW; x+=4) { - base[ srcStride[0]*y + x] = 0xFF; - } - } - src2[0] = base; - } - - if (c->srcXYZ && !(c->dstXYZ && c->srcW==c->dstW && c->srcH==c->dstH)) { - uint8_t *base; - rgb0_tmp = av_malloc(FFABS(srcStride[0]) * srcSliceH + 32); - if (!rgb0_tmp) - return AVERROR(ENOMEM); - - base = srcStride[0] < 0 ? rgb0_tmp - srcStride[0] * (srcSliceH-1) : rgb0_tmp; - - xyz12Torgb48(c, (uint16_t*)base, (const uint16_t*)src2[0], srcStride[0]/2, srcSliceH); - src2[0] = base; - } - - if (!srcSliceY && (c->flags & SWS_BITEXACT) && c->dither == SWS_DITHER_ED && c->dither_error[0]) - for (i = 0; i < 4; i++) - memset(c->dither_error[i], 0, sizeof(c->dither_error[0][0]) * (c->dstW+2)); - - - // copy strides, so they can safely be modified - if (c->sliceDir == 1) { - // slices go from top to bottom - int srcStride2[4] = { srcStride[0], srcStride[1], srcStride[2], - srcStride[3] }; - int dstStride2[4] = { dstStride[0], dstStride[1], dstStride[2], - dstStride[3] }; - - reset_ptr(src2, c->srcFormat); - reset_ptr((void*)dst2, c->dstFormat); - - /* reset slice direction at end of frame */ - if (srcSliceY + srcSliceH == c->srcH) - c->sliceDir = 0; - - ret = c->swscale(c, src2, srcStride2, srcSliceY, srcSliceH, dst2, - dstStride2); - } else { - // slices go from bottom to top => we flip the image internally - int srcStride2[4] = { -srcStride[0], -srcStride[1], -srcStride[2], - -srcStride[3] }; - int dstStride2[4] = { -dstStride[0], -dstStride[1], -dstStride[2], - -dstStride[3] }; - - src2[0] += (srcSliceH - 1) * srcStride[0]; - if (!usePal(c->srcFormat)) - src2[1] += ((srcSliceH >> c->chrSrcVSubSample) - 1) * srcStride[1]; - src2[2] += ((srcSliceH >> c->chrSrcVSubSample) - 1) * srcStride[2]; - src2[3] += (srcSliceH - 1) * srcStride[3]; - dst2[0] += ( c->dstH - 1) * dstStride[0]; - dst2[1] += ((c->dstH >> c->chrDstVSubSample) - 1) * dstStride[1]; - dst2[2] += ((c->dstH >> c->chrDstVSubSample) - 1) * dstStride[2]; - dst2[3] += ( c->dstH - 1) * dstStride[3]; - - reset_ptr(src2, c->srcFormat); - reset_ptr((void*)dst2, c->dstFormat); - - /* reset slice direction at end of frame */ - if (!srcSliceY) - c->sliceDir = 0; - - ret = c->swscale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, - srcSliceH, dst2, dstStride2); - } - - - if (c->dstXYZ && !(c->srcXYZ && c->srcW==c->dstW && c->srcH==c->dstH)) { - /* replace on the same data */ - rgb48Toxyz12(c, (uint16_t*)dst2[0], (const uint16_t*)dst2[0], dstStride[0]/2, ret); - } - - av_free(rgb0_tmp); - return ret; -} - diff --git a/ffmpeg/libswscale/swscale.h b/ffmpeg/libswscale/swscale.h deleted file mode 100644 index 25c8b5e..0000000 --- a/ffmpeg/libswscale/swscale.h +++ /dev/null @@ -1,360 +0,0 @@ -/* - * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef SWSCALE_SWSCALE_H -#define SWSCALE_SWSCALE_H - -/** - * @file - * @ingroup libsws - * external API header - */ - -#include <stdint.h> - -#include "libavutil/avutil.h" -#include "libavutil/log.h" -#include "libavutil/pixfmt.h" -#include "version.h" - -/** - * @defgroup libsws Color conversion and scaling - * @{ - * - * Return the LIBSWSCALE_VERSION_INT constant. - */ -unsigned swscale_version(void); - -/** - * Return the libswscale build-time configuration. - */ -const char *swscale_configuration(void); - -/** - * Return the libswscale license. - */ -const char *swscale_license(void); - -/* values for the flags, the stuff on the command line is different */ -#define SWS_FAST_BILINEAR 1 -#define SWS_BILINEAR 2 -#define SWS_BICUBIC 4 -#define SWS_X 8 -#define SWS_POINT 0x10 -#define SWS_AREA 0x20 -#define SWS_BICUBLIN 0x40 -#define SWS_GAUSS 0x80 -#define SWS_SINC 0x100 -#define SWS_LANCZOS 0x200 -#define SWS_SPLINE 0x400 - -#define SWS_SRC_V_CHR_DROP_MASK 0x30000 -#define SWS_SRC_V_CHR_DROP_SHIFT 16 - -#define SWS_PARAM_DEFAULT 123456 - -#define SWS_PRINT_INFO 0x1000 - -//the following 3 flags are not completely implemented -//internal chrominace subsampling info -#define SWS_FULL_CHR_H_INT 0x2000 -//input subsampling info -#define SWS_FULL_CHR_H_INP 0x4000 -#define SWS_DIRECT_BGR 0x8000 -#define SWS_ACCURATE_RND 0x40000 -#define SWS_BITEXACT 0x80000 -#define SWS_ERROR_DIFFUSION 0x800000 - -#if FF_API_SWS_CPU_CAPS -/** - * CPU caps are autodetected now, those flags - * are only provided for API compatibility. - */ -#define SWS_CPU_CAPS_MMX 0x80000000 -#define SWS_CPU_CAPS_MMXEXT 0x20000000 -#define SWS_CPU_CAPS_MMX2 0x20000000 -#define SWS_CPU_CAPS_3DNOW 0x40000000 -#define SWS_CPU_CAPS_ALTIVEC 0x10000000 -#define SWS_CPU_CAPS_BFIN 0x01000000 -#define SWS_CPU_CAPS_SSE2 0x02000000 -#endif - -#define SWS_MAX_REDUCE_CUTOFF 0.002 - -#define SWS_CS_ITU709 1 -#define SWS_CS_FCC 4 -#define SWS_CS_ITU601 5 -#define SWS_CS_ITU624 5 -#define SWS_CS_SMPTE170M 5 -#define SWS_CS_SMPTE240M 7 -#define SWS_CS_DEFAULT 5 - -/** - * Return a pointer to yuv<->rgb coefficients for the given colorspace - * suitable for sws_setColorspaceDetails(). - * - * @param colorspace One of the SWS_CS_* macros. If invalid, - * SWS_CS_DEFAULT is used. - */ -const int *sws_getCoefficients(int colorspace); - -// when used for filters they must have an odd number of elements -// coeffs cannot be shared between vectors -typedef struct SwsVector { - double *coeff; ///< pointer to the list of coefficients - int length; ///< number of coefficients in the vector -} SwsVector; - -// vectors can be shared -typedef struct SwsFilter { - SwsVector *lumH; - SwsVector *lumV; - SwsVector *chrH; - SwsVector *chrV; -} SwsFilter; - -struct SwsContext; - -/** - * Return a positive value if pix_fmt is a supported input format, 0 - * otherwise. - */ -int sws_isSupportedInput(enum AVPixelFormat pix_fmt); - -/** - * Return a positive value if pix_fmt is a supported output format, 0 - * otherwise. - */ -int sws_isSupportedOutput(enum AVPixelFormat pix_fmt); - -/** - * @param[in] pix_fmt the pixel format - * @return a positive value if an endianness conversion for pix_fmt is - * supported, 0 otherwise. - */ -int sws_isSupportedEndiannessConversion(enum AVPixelFormat pix_fmt); - -/** - * Allocate an empty SwsContext. This must be filled and passed to - * sws_init_context(). For filling see AVOptions, options.c and - * sws_setColorspaceDetails(). - */ -struct SwsContext *sws_alloc_context(void); - -/** - * Initialize the swscaler context sws_context. - * - * @return zero or positive value on success, a negative value on - * error - */ -int sws_init_context(struct SwsContext *sws_context, SwsFilter *srcFilter, SwsFilter *dstFilter); - -/** - * Free the swscaler context swsContext. - * If swsContext is NULL, then does nothing. - */ -void sws_freeContext(struct SwsContext *swsContext); - -#if FF_API_SWS_GETCONTEXT -/** - * Allocate and return an SwsContext. You need it to perform - * scaling/conversion operations using sws_scale(). - * - * @param srcW the width of the source image - * @param srcH the height of the source image - * @param srcFormat the source image format - * @param dstW the width of the destination image - * @param dstH the height of the destination image - * @param dstFormat the destination image format - * @param flags specify which algorithm and options to use for rescaling - * @return a pointer to an allocated context, or NULL in case of error - * @note this function is to be removed after a saner alternative is - * written - * @deprecated Use sws_getCachedContext() instead. - */ -struct SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat, - int dstW, int dstH, enum AVPixelFormat dstFormat, - int flags, SwsFilter *srcFilter, - SwsFilter *dstFilter, const double *param); -#endif - -/** - * Scale the image slice in srcSlice and put the resulting scaled - * slice in the image in dst. A slice is a sequence of consecutive - * rows in an image. - * - * Slices have to be provided in sequential order, either in - * top-bottom or bottom-top order. If slices are provided in - * non-sequential order the behavior of the function is undefined. - * - * @param c the scaling context previously created with - * sws_getContext() - * @param srcSlice the array containing the pointers to the planes of - * the source slice - * @param srcStride the array containing the strides for each plane of - * the source image - * @param srcSliceY the position in the source image of the slice to - * process, that is the number (counted starting from - * zero) in the image of the first row of the slice - * @param srcSliceH the height of the source slice, that is the number - * of rows in the slice - * @param dst the array containing the pointers to the planes of - * the destination image - * @param dstStride the array containing the strides for each plane of - * the destination image - * @return the height of the output slice - */ -int sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[], - const int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *const dst[], const int dstStride[]); - -/** - * @param dstRange flag indicating the while-black range of the output (1=jpeg / 0=mpeg) - * @param srcRange flag indicating the while-black range of the input (1=jpeg / 0=mpeg) - * @param table the yuv2rgb coefficients describing the output yuv space, normally ff_yuv2rgb_coeffs[x] - * @param inv_table the yuv2rgb coefficients describing the input yuv space, normally ff_yuv2rgb_coeffs[x] - * @param brightness 16.16 fixed point brightness correction - * @param contrast 16.16 fixed point contrast correction - * @param saturation 16.16 fixed point saturation correction - * @return -1 if not supported - */ -int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4], - int srcRange, const int table[4], int dstRange, - int brightness, int contrast, int saturation); - -/** - * @return -1 if not supported - */ -int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table, - int *srcRange, int **table, int *dstRange, - int *brightness, int *contrast, int *saturation); - -/** - * Allocate and return an uninitialized vector with length coefficients. - */ -SwsVector *sws_allocVec(int length); - -/** - * Return a normalized Gaussian curve used to filter stuff - * quality = 3 is high quality, lower is lower quality. - */ -SwsVector *sws_getGaussianVec(double variance, double quality); - -/** - * Allocate and return a vector with length coefficients, all - * with the same value c. - */ -SwsVector *sws_getConstVec(double c, int length); - -/** - * Allocate and return a vector with just one coefficient, with - * value 1.0. - */ -SwsVector *sws_getIdentityVec(void); - -/** - * Scale all the coefficients of a by the scalar value. - */ -void sws_scaleVec(SwsVector *a, double scalar); - -/** - * Scale all the coefficients of a so that their sum equals height. - */ -void sws_normalizeVec(SwsVector *a, double height); -void sws_convVec(SwsVector *a, SwsVector *b); -void sws_addVec(SwsVector *a, SwsVector *b); -void sws_subVec(SwsVector *a, SwsVector *b); -void sws_shiftVec(SwsVector *a, int shift); - -/** - * Allocate and return a clone of the vector a, that is a vector - * with the same coefficients as a. - */ -SwsVector *sws_cloneVec(SwsVector *a); - -/** - * Print with av_log() a textual representation of the vector a - * if log_level <= av_log_level. - */ -void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level); - -void sws_freeVec(SwsVector *a); - -SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur, - float lumaSharpen, float chromaSharpen, - float chromaHShift, float chromaVShift, - int verbose); -void sws_freeFilter(SwsFilter *filter); - -/** - * Check if context can be reused, otherwise reallocate a new one. - * - * If context is NULL, just calls sws_getContext() to get a new - * context. Otherwise, checks if the parameters are the ones already - * saved in context. If that is the case, returns the current - * context. Otherwise, frees context and gets a new context with - * the new parameters. - * - * Be warned that srcFilter and dstFilter are not checked, they - * are assumed to remain the same. - */ -struct SwsContext *sws_getCachedContext(struct SwsContext *context, - int srcW, int srcH, enum AVPixelFormat srcFormat, - int dstW, int dstH, enum AVPixelFormat dstFormat, - int flags, SwsFilter *srcFilter, - SwsFilter *dstFilter, const double *param); - -/** - * Convert an 8-bit paletted frame into a frame with a color depth of 32 bits. - * - * The output frame will have the same packed format as the palette. - * - * @param src source frame buffer - * @param dst destination frame buffer - * @param num_pixels number of pixels to convert - * @param palette array with [256] entries, which must match color arrangement (RGB or BGR) of src - */ -void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette); - -/** - * Convert an 8-bit paletted frame into a frame with a color depth of 24 bits. - * - * With the palette format "ABCD", the destination frame ends up with the format "ABC". - * - * @param src source frame buffer - * @param dst destination frame buffer - * @param num_pixels number of pixels to convert - * @param palette array with [256] entries, which must match color arrangement (RGB or BGR) of src - */ -void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette); - -/** - * Get the AVClass for swsContext. It can be used in combination with - * AV_OPT_SEARCH_FAKE_OBJ for examining options. - * - * @see av_opt_find(). - */ -const AVClass *sws_get_class(void); - -/** - * @} - */ - -#endif /* SWSCALE_SWSCALE_H */ diff --git a/ffmpeg/libswscale/swscale_internal.h b/ffmpeg/libswscale/swscale_internal.h deleted file mode 100644 index 443615d..0000000 --- a/ffmpeg/libswscale/swscale_internal.h +++ /dev/null @@ -1,878 +0,0 @@ -/* - * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef SWSCALE_SWSCALE_INTERNAL_H -#define SWSCALE_SWSCALE_INTERNAL_H - -#include "config.h" - -#if HAVE_ALTIVEC_H -#include <altivec.h> -#endif - -#include "libavutil/avassert.h" -#include "libavutil/avutil.h" -#include "libavutil/common.h" -#include "libavutil/intreadwrite.h" -#include "libavutil/log.h" -#include "libavutil/pixfmt.h" -#include "libavutil/pixdesc.h" - -#define STR(s) AV_TOSTRING(s) // AV_STRINGIFY is too long - -#define YUVRGB_TABLE_HEADROOM 128 - -#define MAX_FILTER_SIZE 256 - -#define DITHER1XBPP - -#if HAVE_BIGENDIAN -#define ALT32_CORR (-1) -#else -#define ALT32_CORR 1 -#endif - -#if ARCH_X86_64 -# define APCK_PTR2 8 -# define APCK_COEF 16 -# define APCK_SIZE 24 -#else -# define APCK_PTR2 4 -# define APCK_COEF 8 -# define APCK_SIZE 16 -#endif - -struct SwsContext; - -typedef enum SwsDither { - SWS_DITHER_NONE = 0, - SWS_DITHER_AUTO, - SWS_DITHER_BAYER, - SWS_DITHER_ED, - NB_SWS_DITHER, -} SwsDither; - -typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]); - -/** - * Write one line of horizontally scaled data to planar output - * without any additional vertical scaling (or point-scaling). - * - * @param src scaled source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param dest pointer to the output plane. For >8bit - * output, this is in uint16_t - * @param dstW width of destination in pixels - * @param dither ordered dither array of type int16_t and size 8 - * @param offset Dither offset - */ -typedef void (*yuv2planar1_fn)(const int16_t *src, uint8_t *dest, int dstW, - const uint8_t *dither, int offset); - -/** - * Write one line of horizontally scaled data to planar output - * with multi-point vertical scaling between input pixels. - * - * @param filter vertical luma/alpha scaling coefficients, 12bit [0,4096] - * @param src scaled luma (Y) or alpha (A) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param filterSize number of vertical input lines to scale - * @param dest pointer to output plane. For >8bit - * output, this is in uint16_t - * @param dstW width of destination pixels - * @param offset Dither offset - */ -typedef void (*yuv2planarX_fn)(const int16_t *filter, int filterSize, - const int16_t **src, uint8_t *dest, int dstW, - const uint8_t *dither, int offset); - -/** - * Write one line of horizontally scaled chroma to interleaved output - * with multi-point vertical scaling between input pixels. - * - * @param c SWS scaling context - * @param chrFilter vertical chroma scaling coefficients, 12bit [0,4096] - * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param chrFilterSize number of vertical chroma input lines to scale - * @param dest pointer to the output plane. For >8bit - * output, this is in uint16_t - * @param dstW width of chroma planes - */ -typedef void (*yuv2interleavedX_fn)(struct SwsContext *c, - const int16_t *chrFilter, - int chrFilterSize, - const int16_t **chrUSrc, - const int16_t **chrVSrc, - uint8_t *dest, int dstW); - -/** - * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB - * output without any additional vertical scaling (or point-scaling). Note - * that this function may do chroma scaling, see the "uvalpha" argument. - * - * @param c SWS scaling context - * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param dest pointer to the output plane. For 16bit output, this is - * uint16_t - * @param dstW width of lumSrc and alpSrc in pixels, number of pixels - * to write into dest[] - * @param uvalpha chroma scaling coefficient for the second line of chroma - * pixels, either 2048 or 0. If 0, one chroma input is used - * for 2 output pixels (or if the SWS_FLAG_FULL_CHR_INT flag - * is set, it generates 1 output pixel). If 2048, two chroma - * input pixels should be averaged for 2 output pixels (this - * only happens if SWS_FLAG_FULL_CHR_INT is not set) - * @param y vertical line number for this output. This does not need - * to be used to calculate the offset in the destination, - * but can be used to generate comfort noise using dithering - * for some output formats. - */ -typedef void (*yuv2packed1_fn)(struct SwsContext *c, const int16_t *lumSrc, - const int16_t *chrUSrc[2], - const int16_t *chrVSrc[2], - const int16_t *alpSrc, uint8_t *dest, - int dstW, int uvalpha, int y); -/** - * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB - * output by doing bilinear scaling between two input lines. - * - * @param c SWS scaling context - * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param dest pointer to the output plane. For 16bit output, this is - * uint16_t - * @param dstW width of lumSrc and alpSrc in pixels, number of pixels - * to write into dest[] - * @param yalpha luma/alpha scaling coefficients for the second input line. - * The first line's coefficients can be calculated by using - * 4096 - yalpha - * @param uvalpha chroma scaling coefficient for the second input line. The - * first line's coefficients can be calculated by using - * 4096 - uvalpha - * @param y vertical line number for this output. This does not need - * to be used to calculate the offset in the destination, - * but can be used to generate comfort noise using dithering - * for some output formats. - */ -typedef void (*yuv2packed2_fn)(struct SwsContext *c, const int16_t *lumSrc[2], - const int16_t *chrUSrc[2], - const int16_t *chrVSrc[2], - const int16_t *alpSrc[2], - uint8_t *dest, - int dstW, int yalpha, int uvalpha, int y); -/** - * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB - * output by doing multi-point vertical scaling between input pixels. - * - * @param c SWS scaling context - * @param lumFilter vertical luma/alpha scaling coefficients, 12bit [0,4096] - * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param lumFilterSize number of vertical luma/alpha input lines to scale - * @param chrFilter vertical chroma scaling coefficients, 12bit [0,4096] - * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param chrFilterSize number of vertical chroma input lines to scale - * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param dest pointer to the output plane. For 16bit output, this is - * uint16_t - * @param dstW width of lumSrc and alpSrc in pixels, number of pixels - * to write into dest[] - * @param y vertical line number for this output. This does not need - * to be used to calculate the offset in the destination, - * but can be used to generate comfort noise using dithering - * or some output formats. - */ -typedef void (*yuv2packedX_fn)(struct SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, - const int16_t **chrUSrc, - const int16_t **chrVSrc, int chrFilterSize, - const int16_t **alpSrc, uint8_t *dest, - int dstW, int y); - -/** - * Write one line of horizontally scaled Y/U/V/A to YUV/RGB - * output by doing multi-point vertical scaling between input pixels. - * - * @param c SWS scaling context - * @param lumFilter vertical luma/alpha scaling coefficients, 12bit [0,4096] - * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param lumFilterSize number of vertical luma/alpha input lines to scale - * @param chrFilter vertical chroma scaling coefficients, 12bit [0,4096] - * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param chrFilterSize number of vertical chroma input lines to scale - * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param dest pointer to the output planes. For 16bit output, this is - * uint16_t - * @param dstW width of lumSrc and alpSrc in pixels, number of pixels - * to write into dest[] - * @param y vertical line number for this output. This does not need - * to be used to calculate the offset in the destination, - * but can be used to generate comfort noise using dithering - * or some output formats. - */ -typedef void (*yuv2anyX_fn)(struct SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, - const int16_t **chrUSrc, - const int16_t **chrVSrc, int chrFilterSize, - const int16_t **alpSrc, uint8_t **dest, - int dstW, int y); - -/* This struct should be aligned on at least a 32-byte boundary. */ -typedef struct SwsContext { - /** - * info on struct for av_log - */ - const AVClass *av_class; - - /** - * Note that src, dst, srcStride, dstStride will be copied in the - * sws_scale() wrapper so they can be freely modified here. - */ - SwsFunc swscale; - int srcW; ///< Width of source luma/alpha planes. - int srcH; ///< Height of source luma/alpha planes. - int dstH; ///< Height of destination luma/alpha planes. - int chrSrcW; ///< Width of source chroma planes. - int chrSrcH; ///< Height of source chroma planes. - int chrDstW; ///< Width of destination chroma planes. - int chrDstH; ///< Height of destination chroma planes. - int lumXInc, chrXInc; - int lumYInc, chrYInc; - enum AVPixelFormat dstFormat; ///< Destination pixel format. - enum AVPixelFormat srcFormat; ///< Source pixel format. - int dstFormatBpp; ///< Number of bits per pixel of the destination pixel format. - int srcFormatBpp; ///< Number of bits per pixel of the source pixel format. - int dstBpc, srcBpc; - int chrSrcHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in source image. - int chrSrcVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in source image. - int chrDstHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image. - int chrDstVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination image. - int vChrDrop; ///< Binary logarithm of extra vertical subsampling factor in source image chroma planes specified by user. - int sliceDir; ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top). - double param[2]; ///< Input parameters for scaling algorithms that need them. - - uint32_t pal_yuv[256]; - uint32_t pal_rgb[256]; - - /** - * @name Scaled horizontal lines ring buffer. - * The horizontal scaler keeps just enough scaled lines in a ring buffer - * so they may be passed to the vertical scaler. The pointers to the - * allocated buffers for each line are duplicated in sequence in the ring - * buffer to simplify indexing and avoid wrapping around between lines - * inside the vertical scaler code. The wrapping is done before the - * vertical scaler is called. - */ - //@{ - int16_t **lumPixBuf; ///< Ring buffer for scaled horizontal luma plane lines to be fed to the vertical scaler. - int16_t **chrUPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler. - int16_t **chrVPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler. - int16_t **alpPixBuf; ///< Ring buffer for scaled horizontal alpha plane lines to be fed to the vertical scaler. - int vLumBufSize; ///< Number of vertical luma/alpha lines allocated in the ring buffer. - int vChrBufSize; ///< Number of vertical chroma lines allocated in the ring buffer. - int lastInLumBuf; ///< Last scaled horizontal luma/alpha line from source in the ring buffer. - int lastInChrBuf; ///< Last scaled horizontal chroma line from source in the ring buffer. - int lumBufIndex; ///< Index in ring buffer of the last scaled horizontal luma/alpha line from source. - int chrBufIndex; ///< Index in ring buffer of the last scaled horizontal chroma line from source. - //@} - - uint8_t *formatConvBuffer; - - /** - * @name Horizontal and vertical filters. - * To better understand the following fields, here is a pseudo-code of - * their usage in filtering a horizontal line: - * @code - * for (i = 0; i < width; i++) { - * dst[i] = 0; - * for (j = 0; j < filterSize; j++) - * dst[i] += src[ filterPos[i] + j ] * filter[ filterSize * i + j ]; - * dst[i] >>= FRAC_BITS; // The actual implementation is fixed-point. - * } - * @endcode - */ - //@{ - int16_t *hLumFilter; ///< Array of horizontal filter coefficients for luma/alpha planes. - int16_t *hChrFilter; ///< Array of horizontal filter coefficients for chroma planes. - int16_t *vLumFilter; ///< Array of vertical filter coefficients for luma/alpha planes. - int16_t *vChrFilter; ///< Array of vertical filter coefficients for chroma planes. - int32_t *hLumFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes. - int32_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes. - int32_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes. - int32_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes. - int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels. - int hChrFilterSize; ///< Horizontal filter size for chroma pixels. - int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels. - int vChrFilterSize; ///< Vertical filter size for chroma pixels. - //@} - - int lumMmxextFilterCodeSize; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for luma/alpha planes. - int chrMmxextFilterCodeSize; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for chroma planes. - uint8_t *lumMmxextFilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for luma/alpha planes. - uint8_t *chrMmxextFilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for chroma planes. - - int canMMXEXTBeUsed; - - int dstY; ///< Last destination vertical line output from last slice. - int flags; ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc... - void *yuvTable; // pointer to the yuv->rgb table start so it can be freed() - // alignment ensures the offset can be added in a single - // instruction on e.g. ARM - DECLARE_ALIGNED(16, int, table_gV)[256 + 2*YUVRGB_TABLE_HEADROOM]; - uint8_t *table_rV[256 + 2*YUVRGB_TABLE_HEADROOM]; - uint8_t *table_gU[256 + 2*YUVRGB_TABLE_HEADROOM]; - uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM]; - DECLARE_ALIGNED(16, int32_t, input_rgb2yuv_table)[16+40*4]; // This table can contain both C and SIMD formatted values, teh C vales are always at the XY_IDX points -#define RY_IDX 0 -#define GY_IDX 1 -#define BY_IDX 2 -#define RU_IDX 3 -#define GU_IDX 4 -#define BU_IDX 5 -#define RV_IDX 6 -#define GV_IDX 7 -#define BV_IDX 8 -#define RGB2YUV_SHIFT 15 - - int *dither_error[4]; - - //Colorspace stuff - int contrast, brightness, saturation; // for sws_getColorspaceDetails - int srcColorspaceTable[4]; - int dstColorspaceTable[4]; - int srcRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (source image). - int dstRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (destination image). - int src0Alpha; - int dst0Alpha; - int srcXYZ; - int dstXYZ; - int src_h_chr_pos; - int dst_h_chr_pos; - int src_v_chr_pos; - int dst_v_chr_pos; - int yuv2rgb_y_offset; - int yuv2rgb_y_coeff; - int yuv2rgb_v2r_coeff; - int yuv2rgb_v2g_coeff; - int yuv2rgb_u2g_coeff; - int yuv2rgb_u2b_coeff; - -#define RED_DITHER "0*8" -#define GREEN_DITHER "1*8" -#define BLUE_DITHER "2*8" -#define Y_COEFF "3*8" -#define VR_COEFF "4*8" -#define UB_COEFF "5*8" -#define VG_COEFF "6*8" -#define UG_COEFF "7*8" -#define Y_OFFSET "8*8" -#define U_OFFSET "9*8" -#define V_OFFSET "10*8" -#define LUM_MMX_FILTER_OFFSET "11*8" -#define CHR_MMX_FILTER_OFFSET "11*8+4*4*256" -#define DSTW_OFFSET "11*8+4*4*256*2" //do not change, it is hardcoded in the ASM -#define ESP_OFFSET "11*8+4*4*256*2+8" -#define VROUNDER_OFFSET "11*8+4*4*256*2+16" -#define U_TEMP "11*8+4*4*256*2+24" -#define V_TEMP "11*8+4*4*256*2+32" -#define Y_TEMP "11*8+4*4*256*2+40" -#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48" -#define UV_OFF_PX "11*8+4*4*256*3+48" -#define UV_OFF_BYTE "11*8+4*4*256*3+56" -#define DITHER16 "11*8+4*4*256*3+64" -#define DITHER32 "11*8+4*4*256*3+80" - - DECLARE_ALIGNED(8, uint64_t, redDither); - DECLARE_ALIGNED(8, uint64_t, greenDither); - DECLARE_ALIGNED(8, uint64_t, blueDither); - - DECLARE_ALIGNED(8, uint64_t, yCoeff); - DECLARE_ALIGNED(8, uint64_t, vrCoeff); - DECLARE_ALIGNED(8, uint64_t, ubCoeff); - DECLARE_ALIGNED(8, uint64_t, vgCoeff); - DECLARE_ALIGNED(8, uint64_t, ugCoeff); - DECLARE_ALIGNED(8, uint64_t, yOffset); - DECLARE_ALIGNED(8, uint64_t, uOffset); - DECLARE_ALIGNED(8, uint64_t, vOffset); - int32_t lumMmxFilter[4 * MAX_FILTER_SIZE]; - int32_t chrMmxFilter[4 * MAX_FILTER_SIZE]; - int dstW; ///< Width of destination luma/alpha planes. - DECLARE_ALIGNED(8, uint64_t, esp); - DECLARE_ALIGNED(8, uint64_t, vRounder); - DECLARE_ALIGNED(8, uint64_t, u_temp); - DECLARE_ALIGNED(8, uint64_t, v_temp); - DECLARE_ALIGNED(8, uint64_t, y_temp); - int32_t alpMmxFilter[4 * MAX_FILTER_SIZE]; - // alignment of these values is not necessary, but merely here - // to maintain the same offset across x8632 and x86-64. Once we - // use proper offset macros in the asm, they can be removed. - DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes - DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes - DECLARE_ALIGNED(8, uint16_t, dither16)[8]; - DECLARE_ALIGNED(8, uint32_t, dither32)[8]; - - const uint8_t *chrDither8, *lumDither8; - -#if HAVE_ALTIVEC - vector signed short CY; - vector signed short CRV; - vector signed short CBU; - vector signed short CGU; - vector signed short CGV; - vector signed short OY; - vector unsigned short CSHIFT; - vector signed short *vYCoeffsBank, *vCCoeffsBank; -#endif - -#if ARCH_BFIN - DECLARE_ALIGNED(4, uint32_t, oy); - DECLARE_ALIGNED(4, uint32_t, oc); - DECLARE_ALIGNED(4, uint32_t, zero); - DECLARE_ALIGNED(4, uint32_t, cy); - DECLARE_ALIGNED(4, uint32_t, crv); - DECLARE_ALIGNED(4, uint32_t, rmask); - DECLARE_ALIGNED(4, uint32_t, cbu); - DECLARE_ALIGNED(4, uint32_t, bmask); - DECLARE_ALIGNED(4, uint32_t, cgu); - DECLARE_ALIGNED(4, uint32_t, cgv); - DECLARE_ALIGNED(4, uint32_t, gmask); -#endif - -#if HAVE_VIS - DECLARE_ALIGNED(8, uint64_t, sparc_coeffs)[10]; -#endif - int use_mmx_vfilter; - -/* pre defined color-spaces gamma */ -#define XYZ_GAMMA (2.6f) -#define RGB_GAMMA (2.2f) - int16_t *xyzgamma; - int16_t *rgbgamma; - int16_t *xyzgammainv; - int16_t *rgbgammainv; - int16_t xyz2rgb_matrix[3][4]; - int16_t rgb2xyz_matrix[3][4]; - - /* function pointers for swscale() */ - yuv2planar1_fn yuv2plane1; - yuv2planarX_fn yuv2planeX; - yuv2interleavedX_fn yuv2nv12cX; - yuv2packed1_fn yuv2packed1; - yuv2packed2_fn yuv2packed2; - yuv2packedX_fn yuv2packedX; - yuv2anyX_fn yuv2anyX; - - /// Unscaled conversion of luma plane to YV12 for horizontal scaler. - void (*lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, - int width, uint32_t *pal); - /// Unscaled conversion of alpha plane to YV12 for horizontal scaler. - void (*alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, - int width, uint32_t *pal); - /// Unscaled conversion of chroma planes to YV12 for horizontal scaler. - void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, - int width, uint32_t *pal); - - /** - * Functions to read planar input, such as planar RGB, and convert - * internally to Y/UV/A. - */ - /** @{ */ - void (*readLumPlanar)(uint8_t *dst, const uint8_t *src[4], int width, int32_t *rgb2yuv); - void (*readChrPlanar)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], - int width, int32_t *rgb2yuv); - void (*readAlpPlanar)(uint8_t *dst, const uint8_t *src[4], int width, int32_t *rgb2yuv); - /** @} */ - - /** - * Scale one horizontal line of input data using a bilinear filter - * to produce one line of output data. Compared to SwsContext->hScale(), - * please take note of the following caveats when using these: - * - Scaling is done using only 7bit instead of 14bit coefficients. - * - You can use no more than 5 input pixels to produce 4 output - * pixels. Therefore, this filter should not be used for downscaling - * by more than ~20% in width (because that equals more than 5/4th - * downscaling and thus more than 5 pixels input per 4 pixels output). - * - In general, bilinear filters create artifacts during downscaling - * (even when <20%), because one output pixel will span more than one - * input pixel, and thus some pixels will need edges of both neighbor - * pixels to interpolate the output pixel. Since you can use at most - * two input pixels per output pixel in bilinear scaling, this is - * impossible and thus downscaling by any size will create artifacts. - * To enable this type of scaling, set SWS_FLAG_FAST_BILINEAR - * in SwsContext->flags. - */ - /** @{ */ - void (*hyscale_fast)(struct SwsContext *c, - int16_t *dst, int dstWidth, - const uint8_t *src, int srcW, int xInc); - void (*hcscale_fast)(struct SwsContext *c, - int16_t *dst1, int16_t *dst2, int dstWidth, - const uint8_t *src1, const uint8_t *src2, - int srcW, int xInc); - /** @} */ - - /** - * Scale one horizontal line of input data using a filter over the input - * lines, to produce one (differently sized) line of output data. - * - * @param dst pointer to destination buffer for horizontally scaled - * data. If the number of bits per component of one - * destination pixel (SwsContext->dstBpc) is <= 10, data - * will be 15bpc in 16bits (int16_t) width. Else (i.e. - * SwsContext->dstBpc == 16), data will be 19bpc in - * 32bits (int32_t) width. - * @param dstW width of destination image - * @param src pointer to source data to be scaled. If the number of - * bits per component of a source pixel (SwsContext->srcBpc) - * is 8, this is 8bpc in 8bits (uint8_t) width. Else - * (i.e. SwsContext->dstBpc > 8), this is native depth - * in 16bits (uint16_t) width. In other words, for 9-bit - * YUV input, this is 9bpc, for 10-bit YUV input, this is - * 10bpc, and for 16-bit RGB or YUV, this is 16bpc. - * @param filter filter coefficients to be used per output pixel for - * scaling. This contains 14bpp filtering coefficients. - * Guaranteed to contain dstW * filterSize entries. - * @param filterPos position of the first input pixel to be used for - * each output pixel during scaling. Guaranteed to - * contain dstW entries. - * @param filterSize the number of input coefficients to be used (and - * thus the number of input pixels to be used) for - * creating a single output pixel. Is aligned to 4 - * (and input coefficients thus padded with zeroes) - * to simplify creating SIMD code. - */ - /** @{ */ - void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW, - const uint8_t *src, const int16_t *filter, - const int32_t *filterPos, int filterSize); - void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW, - const uint8_t *src, const int16_t *filter, - const int32_t *filterPos, int filterSize); - /** @} */ - - /// Color range conversion function for luma plane if needed. - void (*lumConvertRange)(int16_t *dst, int width); - /// Color range conversion function for chroma planes if needed. - void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width); - - int needs_hcscale; ///< Set if there are chroma planes to be converted. - - SwsDither dither; -} SwsContext; -//FIXME check init (where 0) - -SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c); -int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], - int fullRange, int brightness, - int contrast, int saturation); -void ff_yuv2rgb_init_tables_ppc(SwsContext *c, const int inv_table[4], - int brightness, int contrast, int saturation); - -void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex, - int lastInLumBuf, int lastInChrBuf); - -SwsFunc ff_yuv2rgb_init_x86(SwsContext *c); -SwsFunc ff_yuv2rgb_init_vis(SwsContext *c); -SwsFunc ff_yuv2rgb_init_ppc(SwsContext *c); -SwsFunc ff_yuv2rgb_init_bfin(SwsContext *c); - -#if FF_API_SWS_FORMAT_NAME -/** - * @deprecated Use av_get_pix_fmt_name() instead. - */ -attribute_deprecated -const char *sws_format_name(enum AVPixelFormat format); -#endif - -static av_always_inline int is16BPS(enum AVPixelFormat pix_fmt) -{ - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); - av_assert0(desc); - return desc->comp[0].depth_minus1 == 15; -} - -static av_always_inline int is9_OR_10BPS(enum AVPixelFormat pix_fmt) -{ - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); - av_assert0(desc); - return desc->comp[0].depth_minus1 >= 8 && desc->comp[0].depth_minus1 <= 13; -} - -#define isNBPS(x) is9_OR_10BPS(x) - -static av_always_inline int isBE(enum AVPixelFormat pix_fmt) -{ - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); - av_assert0(desc); - return desc->flags & AV_PIX_FMT_FLAG_BE; -} - -static av_always_inline int isYUV(enum AVPixelFormat pix_fmt) -{ - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); - av_assert0(desc); - return !(desc->flags & AV_PIX_FMT_FLAG_RGB) && desc->nb_components >= 2; -} - -static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt) -{ - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); - av_assert0(desc); - return ((desc->flags & AV_PIX_FMT_FLAG_PLANAR) && isYUV(pix_fmt)); -} - -static av_always_inline int isRGB(enum AVPixelFormat pix_fmt) -{ - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); - av_assert0(desc); - return (desc->flags & AV_PIX_FMT_FLAG_RGB); -} - -#if 0 // FIXME -#define isGray(x) \ - (!(av_pix_fmt_desc_get(x)->flags & AV_PIX_FMT_FLAG_PAL) && \ - av_pix_fmt_desc_get(x)->nb_components <= 2) -#else -#define isGray(x) \ - ((x) == AV_PIX_FMT_GRAY8 || \ - (x) == AV_PIX_FMT_Y400A || \ - (x) == AV_PIX_FMT_GRAY16BE || \ - (x) == AV_PIX_FMT_GRAY16LE) -#endif - -#define isRGBinInt(x) \ - ( \ - (x) == AV_PIX_FMT_RGB48BE || \ - (x) == AV_PIX_FMT_RGB48LE || \ - (x) == AV_PIX_FMT_RGBA64BE || \ - (x) == AV_PIX_FMT_RGBA64LE || \ - (x) == AV_PIX_FMT_RGB32 || \ - (x) == AV_PIX_FMT_RGB32_1 || \ - (x) == AV_PIX_FMT_RGB24 || \ - (x) == AV_PIX_FMT_RGB565BE || \ - (x) == AV_PIX_FMT_RGB565LE || \ - (x) == AV_PIX_FMT_RGB555BE || \ - (x) == AV_PIX_FMT_RGB555LE || \ - (x) == AV_PIX_FMT_RGB444BE || \ - (x) == AV_PIX_FMT_RGB444LE || \ - (x) == AV_PIX_FMT_RGB8 || \ - (x) == AV_PIX_FMT_RGB4 || \ - (x) == AV_PIX_FMT_RGB4_BYTE || \ - (x) == AV_PIX_FMT_MONOBLACK || \ - (x) == AV_PIX_FMT_MONOWHITE \ - ) -#define isBGRinInt(x) \ - ( \ - (x) == AV_PIX_FMT_BGR48BE || \ - (x) == AV_PIX_FMT_BGR48LE || \ - (x) == AV_PIX_FMT_BGRA64BE || \ - (x) == AV_PIX_FMT_BGRA64LE || \ - (x) == AV_PIX_FMT_BGR32 || \ - (x) == AV_PIX_FMT_BGR32_1 || \ - (x) == AV_PIX_FMT_BGR24 || \ - (x) == AV_PIX_FMT_BGR565BE || \ - (x) == AV_PIX_FMT_BGR565LE || \ - (x) == AV_PIX_FMT_BGR555BE || \ - (x) == AV_PIX_FMT_BGR555LE || \ - (x) == AV_PIX_FMT_BGR444BE || \ - (x) == AV_PIX_FMT_BGR444LE || \ - (x) == AV_PIX_FMT_BGR8 || \ - (x) == AV_PIX_FMT_BGR4 || \ - (x) == AV_PIX_FMT_BGR4_BYTE || \ - (x) == AV_PIX_FMT_MONOBLACK || \ - (x) == AV_PIX_FMT_MONOWHITE \ - ) - -#define isRGBinBytes(x) ( \ - (x) == AV_PIX_FMT_RGB48BE \ - || (x) == AV_PIX_FMT_RGB48LE \ - || (x) == AV_PIX_FMT_RGBA64BE \ - || (x) == AV_PIX_FMT_RGBA64LE \ - || (x) == AV_PIX_FMT_RGBA \ - || (x) == AV_PIX_FMT_ARGB \ - || (x) == AV_PIX_FMT_RGB24 \ - ) -#define isBGRinBytes(x) ( \ - (x) == AV_PIX_FMT_BGR48BE \ - || (x) == AV_PIX_FMT_BGR48LE \ - || (x) == AV_PIX_FMT_BGRA64BE \ - || (x) == AV_PIX_FMT_BGRA64LE \ - || (x) == AV_PIX_FMT_BGRA \ - || (x) == AV_PIX_FMT_ABGR \ - || (x) == AV_PIX_FMT_BGR24 \ - ) - -#define isAnyRGB(x) \ - ( \ - isRGBinInt(x) || \ - isBGRinInt(x) || \ - isRGB(x) \ - ) - -static av_always_inline int isALPHA(enum AVPixelFormat pix_fmt) -{ - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); - av_assert0(desc); - if (pix_fmt == AV_PIX_FMT_PAL8) - return 1; - return desc->flags & AV_PIX_FMT_FLAG_ALPHA; -} - -#if 1 -#define isPacked(x) ( \ - (x)==AV_PIX_FMT_PAL8 \ - || (x)==AV_PIX_FMT_YUYV422 \ - || (x)==AV_PIX_FMT_UYVY422 \ - || (x)==AV_PIX_FMT_Y400A \ - || isRGBinInt(x) \ - || isBGRinInt(x) \ - ) -#else -static av_always_inline int isPacked(enum AVPixelFormat pix_fmt) -{ - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); - av_assert0(desc); - return ((desc->nb_components >= 2 && !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) || - pix_fmt == AV_PIX_FMT_PAL8); -} - -#endif -static av_always_inline int isPlanar(enum AVPixelFormat pix_fmt) -{ - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); - av_assert0(desc); - return (desc->nb_components >= 2 && (desc->flags & AV_PIX_FMT_FLAG_PLANAR)); -} - -static av_always_inline int isPackedRGB(enum AVPixelFormat pix_fmt) -{ - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); - av_assert0(desc); - return ((desc->flags & (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB)) == AV_PIX_FMT_FLAG_RGB); -} - -static av_always_inline int isPlanarRGB(enum AVPixelFormat pix_fmt) -{ - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); - av_assert0(desc); - return ((desc->flags & (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB)) == - (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB)); -} - -static av_always_inline int usePal(enum AVPixelFormat pix_fmt) -{ - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); - av_assert0(desc); - return (desc->flags & AV_PIX_FMT_FLAG_PAL) || (desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL); -} - -extern const uint64_t ff_dither4[2]; -extern const uint64_t ff_dither8[2]; - -extern const uint8_t ff_dither_2x2_4[3][8]; -extern const uint8_t ff_dither_2x2_8[3][8]; -extern const uint8_t ff_dither_4x4_16[5][8]; -extern const uint8_t ff_dither_8x8_32[9][8]; -extern const uint8_t ff_dither_8x8_73[9][8]; -extern const uint8_t ff_dither_8x8_128[9][8]; -extern const uint8_t ff_dither_8x8_220[9][8]; - -extern const int32_t ff_yuv2rgb_coeffs[8][4]; - -extern const AVClass sws_context_class; - -/** - * Set c->swscale to an unscaled converter if one exists for the specific - * source and destination formats, bit depths, flags, etc. - */ -void ff_get_unscaled_swscale(SwsContext *c); -void ff_get_unscaled_swscale_bfin(SwsContext *c); -void ff_get_unscaled_swscale_ppc(SwsContext *c); -void ff_get_unscaled_swscale_arm(SwsContext *c); - -/** - * Return function pointer to fastest main scaler path function depending - * on architecture and available optimizations. - */ -SwsFunc ff_getSwsFunc(SwsContext *c); - -void ff_sws_init_input_funcs(SwsContext *c); -void ff_sws_init_output_funcs(SwsContext *c, - yuv2planar1_fn *yuv2plane1, - yuv2planarX_fn *yuv2planeX, - yuv2interleavedX_fn *yuv2nv12cX, - yuv2packed1_fn *yuv2packed1, - yuv2packed2_fn *yuv2packed2, - yuv2packedX_fn *yuv2packedX, - yuv2anyX_fn *yuv2anyX); -void ff_sws_init_swscale_ppc(SwsContext *c); -void ff_sws_init_swscale_x86(SwsContext *c); - -static inline void fillPlane16(uint8_t *plane, int stride, int width, int height, int y, - int alpha, int bits, const int big_endian) -{ - int i, j; - uint8_t *ptr = plane + stride * y; - int v = alpha ? 0xFFFF>>(15-bits) : (1<<bits); - for (i = 0; i < height; i++) { -#define FILL(wfunc) \ - for (j = 0; j < width; j++) {\ - wfunc(ptr+2*j, v);\ - } - if (big_endian) { - FILL(AV_WB16); - } else { - FILL(AV_WL16); - } - ptr += stride; - } -} - -#endif /* SWSCALE_SWSCALE_INTERNAL_H */ diff --git a/ffmpeg/libswscale/swscale_unscaled.c b/ffmpeg/libswscale/swscale_unscaled.c deleted file mode 100644 index ccf9980..0000000 --- a/ffmpeg/libswscale/swscale_unscaled.c +++ /dev/null @@ -1,1415 +0,0 @@ -/* - * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <inttypes.h> -#include <string.h> -#include <math.h> -#include <stdio.h> -#include "config.h" -#include "swscale.h" -#include "swscale_internal.h" -#include "rgb2rgb.h" -#include "libavutil/intreadwrite.h" -#include "libavutil/cpu.h" -#include "libavutil/avutil.h" -#include "libavutil/mathematics.h" -#include "libavutil/bswap.h" -#include "libavutil/pixdesc.h" -#include "libavutil/avassert.h" - -DECLARE_ALIGNED(8, static const uint8_t, dithers)[8][8][8]={ -{ - { 0, 1, 0, 1, 0, 1, 0, 1,}, - { 1, 0, 1, 0, 1, 0, 1, 0,}, - { 0, 1, 0, 1, 0, 1, 0, 1,}, - { 1, 0, 1, 0, 1, 0, 1, 0,}, - { 0, 1, 0, 1, 0, 1, 0, 1,}, - { 1, 0, 1, 0, 1, 0, 1, 0,}, - { 0, 1, 0, 1, 0, 1, 0, 1,}, - { 1, 0, 1, 0, 1, 0, 1, 0,}, -},{ - { 1, 2, 1, 2, 1, 2, 1, 2,}, - { 3, 0, 3, 0, 3, 0, 3, 0,}, - { 1, 2, 1, 2, 1, 2, 1, 2,}, - { 3, 0, 3, 0, 3, 0, 3, 0,}, - { 1, 2, 1, 2, 1, 2, 1, 2,}, - { 3, 0, 3, 0, 3, 0, 3, 0,}, - { 1, 2, 1, 2, 1, 2, 1, 2,}, - { 3, 0, 3, 0, 3, 0, 3, 0,}, -},{ - { 2, 4, 3, 5, 2, 4, 3, 5,}, - { 6, 0, 7, 1, 6, 0, 7, 1,}, - { 3, 5, 2, 4, 3, 5, 2, 4,}, - { 7, 1, 6, 0, 7, 1, 6, 0,}, - { 2, 4, 3, 5, 2, 4, 3, 5,}, - { 6, 0, 7, 1, 6, 0, 7, 1,}, - { 3, 5, 2, 4, 3, 5, 2, 4,}, - { 7, 1, 6, 0, 7, 1, 6, 0,}, -},{ - { 4, 8, 7, 11, 4, 8, 7, 11,}, - { 12, 0, 15, 3, 12, 0, 15, 3,}, - { 6, 10, 5, 9, 6, 10, 5, 9,}, - { 14, 2, 13, 1, 14, 2, 13, 1,}, - { 4, 8, 7, 11, 4, 8, 7, 11,}, - { 12, 0, 15, 3, 12, 0, 15, 3,}, - { 6, 10, 5, 9, 6, 10, 5, 9,}, - { 14, 2, 13, 1, 14, 2, 13, 1,}, -},{ - { 9, 17, 15, 23, 8, 16, 14, 22,}, - { 25, 1, 31, 7, 24, 0, 30, 6,}, - { 13, 21, 11, 19, 12, 20, 10, 18,}, - { 29, 5, 27, 3, 28, 4, 26, 2,}, - { 8, 16, 14, 22, 9, 17, 15, 23,}, - { 24, 0, 30, 6, 25, 1, 31, 7,}, - { 12, 20, 10, 18, 13, 21, 11, 19,}, - { 28, 4, 26, 2, 29, 5, 27, 3,}, -},{ - { 18, 34, 30, 46, 17, 33, 29, 45,}, - { 50, 2, 62, 14, 49, 1, 61, 13,}, - { 26, 42, 22, 38, 25, 41, 21, 37,}, - { 58, 10, 54, 6, 57, 9, 53, 5,}, - { 16, 32, 28, 44, 19, 35, 31, 47,}, - { 48, 0, 60, 12, 51, 3, 63, 15,}, - { 24, 40, 20, 36, 27, 43, 23, 39,}, - { 56, 8, 52, 4, 59, 11, 55, 7,}, -},{ - { 18, 34, 30, 46, 17, 33, 29, 45,}, - { 50, 2, 62, 14, 49, 1, 61, 13,}, - { 26, 42, 22, 38, 25, 41, 21, 37,}, - { 58, 10, 54, 6, 57, 9, 53, 5,}, - { 16, 32, 28, 44, 19, 35, 31, 47,}, - { 48, 0, 60, 12, 51, 3, 63, 15,}, - { 24, 40, 20, 36, 27, 43, 23, 39,}, - { 56, 8, 52, 4, 59, 11, 55, 7,}, -},{ - { 36, 68, 60, 92, 34, 66, 58, 90,}, - { 100, 4,124, 28, 98, 2,122, 26,}, - { 52, 84, 44, 76, 50, 82, 42, 74,}, - { 116, 20,108, 12,114, 18,106, 10,}, - { 32, 64, 56, 88, 38, 70, 62, 94,}, - { 96, 0,120, 24,102, 6,126, 30,}, - { 48, 80, 40, 72, 54, 86, 46, 78,}, - { 112, 16,104, 8,118, 22,110, 14,}, -}}; - -static const uint16_t dither_scale[15][16]={ -{ 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,}, -{ 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,}, -{ 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,}, -{ 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,}, -{ 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,}, -{ 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,}, -{ 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,}, -{ 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,}, -{ 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,}, -{ 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,}, -{ 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,}, -{ 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,}, -{ 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,}, -{ 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,}, -{ 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,}, -}; - - -static void fillPlane(uint8_t *plane, int stride, int width, int height, int y, - uint8_t val) -{ - int i; - uint8_t *ptr = plane + stride * y; - for (i = 0; i < height; i++) { - memset(ptr, val, width); - ptr += stride; - } -} - -static void copyPlane(const uint8_t *src, int srcStride, - int srcSliceY, int srcSliceH, int width, - uint8_t *dst, int dstStride) -{ - dst += dstStride * srcSliceY; - if (dstStride == srcStride && srcStride > 0) { - memcpy(dst, src, srcSliceH * dstStride); - } else { - int i; - for (i = 0; i < srcSliceH; i++) { - memcpy(dst, src, width); - src += srcStride; - dst += dstStride; - } - } -} - -static int planarToNv12Wrapper(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, - int srcSliceH, uint8_t *dstParam[], - int dstStride[]) -{ - uint8_t *dst = dstParam[1] + dstStride[1] * srcSliceY / 2; - - copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW, - dstParam[0], dstStride[0]); - - if (c->dstFormat == AV_PIX_FMT_NV12) - interleaveBytes(src[1], src[2], dst, c->srcW / 2, srcSliceH / 2, - srcStride[1], srcStride[2], dstStride[1]); - else - interleaveBytes(src[2], src[1], dst, c->srcW / 2, srcSliceH / 2, - srcStride[2], srcStride[1], dstStride[1]); - - return srcSliceH; -} - -static int nv12ToPlanarWrapper(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, - int srcSliceH, uint8_t *dstParam[], - int dstStride[]) -{ - uint8_t *dst1 = dstParam[1] + dstStride[1] * srcSliceY / 2; - uint8_t *dst2 = dstParam[2] + dstStride[2] * srcSliceY / 2; - - copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW, - dstParam[0], dstStride[0]); - - if (c->srcFormat == AV_PIX_FMT_NV12) - deinterleaveBytes(src[1], dst1, dst2,c->srcW / 2, srcSliceH / 2, - srcStride[1], dstStride[1], dstStride[2]); - else - deinterleaveBytes(src[1], dst2, dst1, c->srcW / 2, srcSliceH / 2, - srcStride[1], dstStride[2], dstStride[1]); - - return srcSliceH; -} - -static int planarToYuy2Wrapper(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dstParam[], int dstStride[]) -{ - uint8_t *dst = dstParam[0] + dstStride[0] * srcSliceY; - - yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], - srcStride[1], dstStride[0]); - - return srcSliceH; -} - -static int planarToUyvyWrapper(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dstParam[], int dstStride[]) -{ - uint8_t *dst = dstParam[0] + dstStride[0] * srcSliceY; - - yv12touyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], - srcStride[1], dstStride[0]); - - return srcSliceH; -} - -static int yuv422pToYuy2Wrapper(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dstParam[], int dstStride[]) -{ - uint8_t *dst = dstParam[0] + dstStride[0] * srcSliceY; - - yuv422ptoyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], - srcStride[1], dstStride[0]); - - return srcSliceH; -} - -static int yuv422pToUyvyWrapper(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dstParam[], int dstStride[]) -{ - uint8_t *dst = dstParam[0] + dstStride[0] * srcSliceY; - - yuv422ptouyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], - srcStride[1], dstStride[0]); - - return srcSliceH; -} - -static int yuyvToYuv420Wrapper(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dstParam[], int dstStride[]) -{ - uint8_t *ydst = dstParam[0] + dstStride[0] * srcSliceY; - uint8_t *udst = dstParam[1] + dstStride[1] * srcSliceY / 2; - uint8_t *vdst = dstParam[2] + dstStride[2] * srcSliceY / 2; - - yuyvtoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], - dstStride[1], srcStride[0]); - - if (dstParam[3]) - fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); - - return srcSliceH; -} - -static int yuyvToYuv422Wrapper(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dstParam[], int dstStride[]) -{ - uint8_t *ydst = dstParam[0] + dstStride[0] * srcSliceY; - uint8_t *udst = dstParam[1] + dstStride[1] * srcSliceY; - uint8_t *vdst = dstParam[2] + dstStride[2] * srcSliceY; - - yuyvtoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], - dstStride[1], srcStride[0]); - - return srcSliceH; -} - -static int uyvyToYuv420Wrapper(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dstParam[], int dstStride[]) -{ - uint8_t *ydst = dstParam[0] + dstStride[0] * srcSliceY; - uint8_t *udst = dstParam[1] + dstStride[1] * srcSliceY / 2; - uint8_t *vdst = dstParam[2] + dstStride[2] * srcSliceY / 2; - - uyvytoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], - dstStride[1], srcStride[0]); - - if (dstParam[3]) - fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); - - return srcSliceH; -} - -static int uyvyToYuv422Wrapper(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dstParam[], int dstStride[]) -{ - uint8_t *ydst = dstParam[0] + dstStride[0] * srcSliceY; - uint8_t *udst = dstParam[1] + dstStride[1] * srcSliceY; - uint8_t *vdst = dstParam[2] + dstStride[2] * srcSliceY; - - uyvytoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], - dstStride[1], srcStride[0]); - - return srcSliceH; -} - -static void gray8aToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, - const uint8_t *palette) -{ - int i; - for (i = 0; i < num_pixels; i++) - ((uint32_t *) dst)[i] = ((const uint32_t *) palette)[src[i << 1]] | (src[(i << 1) + 1] << 24); -} - -static void gray8aToPacked32_1(const uint8_t *src, uint8_t *dst, int num_pixels, - const uint8_t *palette) -{ - int i; - - for (i = 0; i < num_pixels; i++) - ((uint32_t *) dst)[i] = ((const uint32_t *) palette)[src[i << 1]] | src[(i << 1) + 1]; -} - -static void gray8aToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, - const uint8_t *palette) -{ - int i; - - for (i = 0; i < num_pixels; i++) { - //FIXME slow? - dst[0] = palette[src[i << 1] * 4 + 0]; - dst[1] = palette[src[i << 1] * 4 + 1]; - dst[2] = palette[src[i << 1] * 4 + 2]; - dst += 3; - } -} - -static int packed_16bpc_bswap(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - int i, j, p; - - for (p = 0; p < 4; p++) { - int srcstr = srcStride[p] / 2; - int dststr = dstStride[p] / 2; - uint16_t *dstPtr = (uint16_t *) dst[p]; - const uint16_t *srcPtr = (const uint16_t *) src[p]; - int min_stride = FFMIN(FFABS(srcstr), FFABS(dststr)); - if(!dstPtr || !srcPtr) - continue; - for (i = 0; i < (srcSliceH >> c->chrDstVSubSample); i++) { - for (j = 0; j < min_stride; j++) { - dstPtr[j] = av_bswap16(srcPtr[j]); - } - srcPtr += srcstr; - dstPtr += dststr; - } - } - - return srcSliceH; -} - -static int palToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[], - int srcSliceY, int srcSliceH, uint8_t *dst[], - int dstStride[]) -{ - const enum AVPixelFormat srcFormat = c->srcFormat; - const enum AVPixelFormat dstFormat = c->dstFormat; - void (*conv)(const uint8_t *src, uint8_t *dst, int num_pixels, - const uint8_t *palette) = NULL; - int i; - uint8_t *dstPtr = dst[0] + dstStride[0] * srcSliceY; - const uint8_t *srcPtr = src[0]; - - if (srcFormat == AV_PIX_FMT_GRAY8A) { - switch (dstFormat) { - case AV_PIX_FMT_RGB32 : conv = gray8aToPacked32; break; - case AV_PIX_FMT_BGR32 : conv = gray8aToPacked32; break; - case AV_PIX_FMT_BGR32_1: conv = gray8aToPacked32_1; break; - case AV_PIX_FMT_RGB32_1: conv = gray8aToPacked32_1; break; - case AV_PIX_FMT_RGB24 : conv = gray8aToPacked24; break; - case AV_PIX_FMT_BGR24 : conv = gray8aToPacked24; break; - } - } else if (usePal(srcFormat)) { - switch (dstFormat) { - case AV_PIX_FMT_RGB32 : conv = sws_convertPalette8ToPacked32; break; - case AV_PIX_FMT_BGR32 : conv = sws_convertPalette8ToPacked32; break; - case AV_PIX_FMT_BGR32_1: conv = sws_convertPalette8ToPacked32; break; - case AV_PIX_FMT_RGB32_1: conv = sws_convertPalette8ToPacked32; break; - case AV_PIX_FMT_RGB24 : conv = sws_convertPalette8ToPacked24; break; - case AV_PIX_FMT_BGR24 : conv = sws_convertPalette8ToPacked24; break; - } - } - - if (!conv) - av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", - av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); - else { - for (i = 0; i < srcSliceH; i++) { - conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb); - srcPtr += srcStride[0]; - dstPtr += dstStride[0]; - } - } - - return srcSliceH; -} - -static void gbr16ptopacked16(const uint16_t *src[], int srcStride[], - uint8_t *dst, int dstStride, int srcSliceH, - int alpha, int swap, int bpp, int width) -{ - int x, h, i; - int src_alpha = src[3] != NULL; - int scale_high = 16 - bpp, scale_low = (bpp - 8) * 2; - for (h = 0; h < srcSliceH; h++) { - uint16_t *dest = (uint16_t *)(dst + dstStride * h); - uint16_t component; - - switch(swap) { - case 3: - if (alpha && !src_alpha) { - for (x = 0; x < width; x++) { - component = av_bswap16(src[0][x]); - *dest++ = av_bswap16(component << scale_high | component >> scale_low); - component = av_bswap16(src[1][x]); - *dest++ = av_bswap16(component << scale_high | component >> scale_low); - component = av_bswap16(src[2][x]); - *dest++ = av_bswap16(component << scale_high | component >> scale_low); - *dest++ = 0xffff; - } - } else if (alpha && src_alpha) { - for (x = 0; x < width; x++) { - component = av_bswap16(src[0][x]); - *dest++ = av_bswap16(component << scale_high | component >> scale_low); - component = av_bswap16(src[1][x]); - *dest++ = av_bswap16(component << scale_high | component >> scale_low); - component = av_bswap16(src[2][x]); - *dest++ = av_bswap16(component << scale_high | component >> scale_low); - component = av_bswap16(src[3][x]); - *dest++ = av_bswap16(component << scale_high | component >> scale_low); - } - } else { - for (x = 0; x < width; x++) { - component = av_bswap16(src[0][x]); - *dest++ = av_bswap16(component << scale_high | component >> scale_low); - component = av_bswap16(src[1][x]); - *dest++ = av_bswap16(component << scale_high | component >> scale_low); - component = av_bswap16(src[2][x]); - *dest++ = av_bswap16(component << scale_high | component >> scale_low); - } - } - break; - case 2: - if (alpha && !src_alpha) { - for (x = 0; x < width; x++) { - *dest++ = av_bswap16(src[0][x] << scale_high | src[0][x] >> scale_low); - *dest++ = av_bswap16(src[1][x] << scale_high | src[1][x] >> scale_low); - *dest++ = av_bswap16(src[2][x] << scale_high | src[2][x] >> scale_low); - *dest++ = 0xffff; - } - } else if (alpha && src_alpha) { - for (x = 0; x < width; x++) { - *dest++ = av_bswap16(src[0][x] << scale_high | src[0][x] >> scale_low); - *dest++ = av_bswap16(src[1][x] << scale_high | src[1][x] >> scale_low); - *dest++ = av_bswap16(src[2][x] << scale_high | src[2][x] >> scale_low); - *dest++ = av_bswap16(src[3][x] << scale_high | src[3][x] >> scale_low); - } - } else { - for (x = 0; x < width; x++) { - *dest++ = av_bswap16(src[0][x] << scale_high | src[0][x] >> scale_low); - *dest++ = av_bswap16(src[1][x] << scale_high | src[1][x] >> scale_low); - *dest++ = av_bswap16(src[2][x] << scale_high | src[2][x] >> scale_low); - } - } - break; - case 1: - if (alpha && !src_alpha) { - for (x = 0; x < width; x++) { - *dest++ = av_bswap16(src[0][x]) << scale_high | av_bswap16(src[0][x]) >> scale_low; - *dest++ = av_bswap16(src[1][x]) << scale_high | av_bswap16(src[1][x]) >> scale_low; - *dest++ = av_bswap16(src[2][x]) << scale_high | av_bswap16(src[2][x]) >> scale_low; - *dest++ = 0xffff; - } - } else if (alpha && src_alpha) { - for (x = 0; x < width; x++) { - *dest++ = av_bswap16(src[0][x]) << scale_high | av_bswap16(src[0][x]) >> scale_low; - *dest++ = av_bswap16(src[1][x]) << scale_high | av_bswap16(src[1][x]) >> scale_low; - *dest++ = av_bswap16(src[2][x]) << scale_high | av_bswap16(src[2][x]) >> scale_low; - *dest++ = av_bswap16(src[3][x]) << scale_high | av_bswap16(src[3][x]) >> scale_low; - } - } else { - for (x = 0; x < width; x++) { - *dest++ = av_bswap16(src[0][x]) << scale_high | av_bswap16(src[0][x]) >> scale_low; - *dest++ = av_bswap16(src[1][x]) << scale_high | av_bswap16(src[1][x]) >> scale_low; - *dest++ = av_bswap16(src[2][x]) << scale_high | av_bswap16(src[2][x]) >> scale_low; - } - } - break; - default: - if (alpha && !src_alpha) { - for (x = 0; x < width; x++) { - *dest++ = src[0][x] << scale_high | src[0][x] >> scale_low; - *dest++ = src[1][x] << scale_high | src[1][x] >> scale_low; - *dest++ = src[2][x] << scale_high | src[2][x] >> scale_low; - *dest++ = 0xffff; - } - } else if (alpha && src_alpha) { - for (x = 0; x < width; x++) { - *dest++ = src[0][x] << scale_high | src[0][x] >> scale_low; - *dest++ = src[1][x] << scale_high | src[1][x] >> scale_low; - *dest++ = src[2][x] << scale_high | src[2][x] >> scale_low; - *dest++ = src[3][x] << scale_high | src[3][x] >> scale_low; - } - } else { - for (x = 0; x < width; x++) { - *dest++ = src[0][x] << scale_high | src[0][x] >> scale_low; - *dest++ = src[1][x] << scale_high | src[1][x] >> scale_low; - *dest++ = src[2][x] << scale_high | src[2][x] >> scale_low; - } - } - } - for (i = 0; i < 3 + src_alpha; i++) - src[i] += srcStride[i] >> 1; - } -} - -static int planarRgb16ToRgb16Wrapper(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - const uint16_t *src102[] = { (uint16_t *)src[1], (uint16_t *)src[0], (uint16_t *)src[2], (uint16_t *)src[3] }; - const uint16_t *src201[] = { (uint16_t *)src[2], (uint16_t *)src[0], (uint16_t *)src[1], (uint16_t *)src[3] }; - int stride102[] = { srcStride[1], srcStride[0], srcStride[2], srcStride[3] }; - int stride201[] = { srcStride[2], srcStride[0], srcStride[1], srcStride[3] }; - const AVPixFmtDescriptor *src_format = av_pix_fmt_desc_get(c->srcFormat); - const AVPixFmtDescriptor *dst_format = av_pix_fmt_desc_get(c->dstFormat); - int bits_per_sample = src_format->comp[0].depth_minus1 + 1; - int swap = 0; - if ( HAVE_BIGENDIAN && !(src_format->flags & AV_PIX_FMT_FLAG_BE) || - !HAVE_BIGENDIAN && src_format->flags & AV_PIX_FMT_FLAG_BE) - swap++; - if ( HAVE_BIGENDIAN && !(dst_format->flags & AV_PIX_FMT_FLAG_BE) || - !HAVE_BIGENDIAN && dst_format->flags & AV_PIX_FMT_FLAG_BE) - swap += 2; - - if ((src_format->flags & (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB)) != - (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB) || - bits_per_sample <= 8) { - av_log(c, AV_LOG_ERROR, "unsupported planar RGB conversion %s -> %s\n", - src_format->name, dst_format->name); - return srcSliceH; - } - switch (c->dstFormat) { - case AV_PIX_FMT_BGR48LE: - case AV_PIX_FMT_BGR48BE: - gbr16ptopacked16(src102, stride102, - dst[0] + srcSliceY * dstStride[0], dstStride[0], - srcSliceH, 0, swap, bits_per_sample, c->srcW); - break; - case AV_PIX_FMT_RGB48LE: - case AV_PIX_FMT_RGB48BE: - gbr16ptopacked16(src201, stride201, - dst[0] + srcSliceY * dstStride[0], dstStride[0], - srcSliceH, 0, swap, bits_per_sample, c->srcW); - break; - case AV_PIX_FMT_RGBA64LE: - case AV_PIX_FMT_RGBA64BE: - gbr16ptopacked16(src201, stride201, - dst[0] + srcSliceY * dstStride[0], dstStride[0], - srcSliceH, 1, swap, bits_per_sample, c->srcW); - break; - case AV_PIX_FMT_BGRA64LE: - case AV_PIX_FMT_BGRA64BE: - gbr16ptopacked16(src102, stride102, - dst[0] + srcSliceY * dstStride[0], dstStride[0], - srcSliceH, 1, swap, bits_per_sample, c->srcW); - break; - default: - av_log(c, AV_LOG_ERROR, - "unsupported planar RGB conversion %s -> %s\n", - src_format->name, dst_format->name); - } - - return srcSliceH; -} - -static void gbr24ptopacked24(const uint8_t *src[], int srcStride[], - uint8_t *dst, int dstStride, int srcSliceH, - int width) -{ - int x, h, i; - for (h = 0; h < srcSliceH; h++) { - uint8_t *dest = dst + dstStride * h; - for (x = 0; x < width; x++) { - *dest++ = src[0][x]; - *dest++ = src[1][x]; - *dest++ = src[2][x]; - } - - for (i = 0; i < 3; i++) - src[i] += srcStride[i]; - } -} - -static void gbr24ptopacked32(const uint8_t *src[], int srcStride[], - uint8_t *dst, int dstStride, int srcSliceH, - int alpha_first, int width) -{ - int x, h, i; - for (h = 0; h < srcSliceH; h++) { - uint8_t *dest = dst + dstStride * h; - - if (alpha_first) { - for (x = 0; x < width; x++) { - *dest++ = 0xff; - *dest++ = src[0][x]; - *dest++ = src[1][x]; - *dest++ = src[2][x]; - } - } else { - for (x = 0; x < width; x++) { - *dest++ = src[0][x]; - *dest++ = src[1][x]; - *dest++ = src[2][x]; - *dest++ = 0xff; - } - } - - for (i = 0; i < 3; i++) - src[i] += srcStride[i]; - } -} - -static int planarRgbToRgbWrapper(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - int alpha_first = 0; - const uint8_t *src102[] = { src[1], src[0], src[2] }; - const uint8_t *src201[] = { src[2], src[0], src[1] }; - int stride102[] = { srcStride[1], srcStride[0], srcStride[2] }; - int stride201[] = { srcStride[2], srcStride[0], srcStride[1] }; - - if (c->srcFormat != AV_PIX_FMT_GBRP) { - av_log(c, AV_LOG_ERROR, "unsupported planar RGB conversion %s -> %s\n", - av_get_pix_fmt_name(c->srcFormat), - av_get_pix_fmt_name(c->dstFormat)); - return srcSliceH; - } - - switch (c->dstFormat) { - case AV_PIX_FMT_BGR24: - gbr24ptopacked24(src102, stride102, - dst[0] + srcSliceY * dstStride[0], dstStride[0], - srcSliceH, c->srcW); - break; - - case AV_PIX_FMT_RGB24: - gbr24ptopacked24(src201, stride201, - dst[0] + srcSliceY * dstStride[0], dstStride[0], - srcSliceH, c->srcW); - break; - - case AV_PIX_FMT_ARGB: - alpha_first = 1; - case AV_PIX_FMT_RGBA: - gbr24ptopacked32(src201, stride201, - dst[0] + srcSliceY * dstStride[0], dstStride[0], - srcSliceH, alpha_first, c->srcW); - break; - - case AV_PIX_FMT_ABGR: - alpha_first = 1; - case AV_PIX_FMT_BGRA: - gbr24ptopacked32(src102, stride102, - dst[0] + srcSliceY * dstStride[0], dstStride[0], - srcSliceH, alpha_first, c->srcW); - break; - - default: - av_log(c, AV_LOG_ERROR, - "unsupported planar RGB conversion %s -> %s\n", - av_get_pix_fmt_name(c->srcFormat), - av_get_pix_fmt_name(c->dstFormat)); - } - - return srcSliceH; -} - -static int planarRgbToplanarRgbWrapper(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW, - dst[0], dstStride[0]); - copyPlane(src[1], srcStride[1], srcSliceY, srcSliceH, c->srcW, - dst[1], dstStride[1]); - copyPlane(src[2], srcStride[2], srcSliceY, srcSliceH, c->srcW, - dst[2], dstStride[2]); - if (dst[3]) - fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); - - return srcSliceH; -} - -static void packedtogbr24p(const uint8_t *src, int srcStride, - uint8_t *dst[], int dstStride[], int srcSliceH, - int alpha_first, int inc_size, int width) -{ - uint8_t *dest[3]; - int x, h; - - dest[0] = dst[0]; - dest[1] = dst[1]; - dest[2] = dst[2]; - - if (alpha_first) - src++; - - for (h = 0; h < srcSliceH; h++) { - for (x = 0; x < width; x++) { - dest[0][x] = src[0]; - dest[1][x] = src[1]; - dest[2][x] = src[2]; - - src += inc_size; - } - src += srcStride - width * inc_size; - dest[0] += dstStride[0]; - dest[1] += dstStride[1]; - dest[2] += dstStride[2]; - } -} - -static int rgbToPlanarRgbWrapper(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - int alpha_first = 0; - int stride102[] = { dstStride[1], dstStride[0], dstStride[2] }; - int stride201[] = { dstStride[2], dstStride[0], dstStride[1] }; - uint8_t *dst102[] = { dst[1] + srcSliceY * dstStride[1], - dst[0] + srcSliceY * dstStride[0], - dst[2] + srcSliceY * dstStride[2] }; - uint8_t *dst201[] = { dst[2] + srcSliceY * dstStride[2], - dst[0] + srcSliceY * dstStride[0], - dst[1] + srcSliceY * dstStride[1] }; - - switch (c->srcFormat) { - case AV_PIX_FMT_RGB24: - packedtogbr24p((const uint8_t *) src[0], srcStride[0], dst201, - stride201, srcSliceH, alpha_first, 3, c->srcW); - break; - case AV_PIX_FMT_BGR24: - packedtogbr24p((const uint8_t *) src[0], srcStride[0], dst102, - stride102, srcSliceH, alpha_first, 3, c->srcW); - break; - case AV_PIX_FMT_ARGB: - alpha_first = 1; - case AV_PIX_FMT_RGBA: - packedtogbr24p((const uint8_t *) src[0], srcStride[0], dst201, - stride201, srcSliceH, alpha_first, 4, c->srcW); - break; - case AV_PIX_FMT_ABGR: - alpha_first = 1; - case AV_PIX_FMT_BGRA: - packedtogbr24p((const uint8_t *) src[0], srcStride[0], dst102, - stride102, srcSliceH, alpha_first, 4, c->srcW); - break; - default: - av_log(c, AV_LOG_ERROR, - "unsupported planar RGB conversion %s -> %s\n", - av_get_pix_fmt_name(c->srcFormat), - av_get_pix_fmt_name(c->dstFormat)); - } - - return srcSliceH; -} - -#define isRGBA32(x) ( \ - (x) == AV_PIX_FMT_ARGB \ - || (x) == AV_PIX_FMT_RGBA \ - || (x) == AV_PIX_FMT_BGRA \ - || (x) == AV_PIX_FMT_ABGR \ - ) - -#define isRGBA64(x) ( \ - (x) == AV_PIX_FMT_RGBA64LE \ - || (x) == AV_PIX_FMT_RGBA64BE \ - || (x) == AV_PIX_FMT_BGRA64LE \ - || (x) == AV_PIX_FMT_BGRA64BE \ - ) - -#define isRGB48(x) ( \ - (x) == AV_PIX_FMT_RGB48LE \ - || (x) == AV_PIX_FMT_RGB48BE \ - || (x) == AV_PIX_FMT_BGR48LE \ - || (x) == AV_PIX_FMT_BGR48BE \ - ) - -/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */ -typedef void (* rgbConvFn) (const uint8_t *, uint8_t *, int); -static rgbConvFn findRgbConvFn(SwsContext *c) -{ - const enum AVPixelFormat srcFormat = c->srcFormat; - const enum AVPixelFormat dstFormat = c->dstFormat; - const int srcId = c->srcFormatBpp; - const int dstId = c->dstFormatBpp; - rgbConvFn conv = NULL; - -#define IS_NOT_NE(bpp, desc) \ - (((bpp + 7) >> 3) == 2 && \ - (!(desc->flags & AV_PIX_FMT_FLAG_BE) != !HAVE_BIGENDIAN)) - -#define CONV_IS(src, dst) (srcFormat == AV_PIX_FMT_##src && dstFormat == AV_PIX_FMT_##dst) - - if (isRGBA32(srcFormat) && isRGBA32(dstFormat)) { - if ( CONV_IS(ABGR, RGBA) - || CONV_IS(ARGB, BGRA) - || CONV_IS(BGRA, ARGB) - || CONV_IS(RGBA, ABGR)) conv = shuffle_bytes_3210; - else if (CONV_IS(ABGR, ARGB) - || CONV_IS(ARGB, ABGR)) conv = shuffle_bytes_0321; - else if (CONV_IS(ABGR, BGRA) - || CONV_IS(ARGB, RGBA)) conv = shuffle_bytes_1230; - else if (CONV_IS(BGRA, RGBA) - || CONV_IS(RGBA, BGRA)) conv = shuffle_bytes_2103; - else if (CONV_IS(BGRA, ABGR) - || CONV_IS(RGBA, ARGB)) conv = shuffle_bytes_3012; - } else if (isRGB48(srcFormat) && isRGB48(dstFormat)) { - if (CONV_IS(RGB48LE, BGR48LE) - || CONV_IS(BGR48LE, RGB48LE) - || CONV_IS(RGB48BE, BGR48BE) - || CONV_IS(BGR48BE, RGB48BE)) conv = rgb48tobgr48_nobswap; - else if (CONV_IS(RGB48LE, BGR48BE) - || CONV_IS(BGR48LE, RGB48BE) - || CONV_IS(RGB48BE, BGR48LE) - || CONV_IS(BGR48BE, RGB48LE)) conv = rgb48tobgr48_bswap; - } else if (isRGBA64(srcFormat) && isRGB48(dstFormat)) { - if (CONV_IS(RGBA64LE, BGR48LE) - || CONV_IS(BGRA64LE, RGB48LE) - || CONV_IS(RGBA64BE, BGR48BE) - || CONV_IS(BGRA64BE, RGB48BE)) conv = rgb64tobgr48_nobswap; - else if (CONV_IS(RGBA64LE, BGR48BE) - || CONV_IS(BGRA64LE, RGB48BE) - || CONV_IS(RGBA64BE, BGR48LE) - || CONV_IS(BGRA64BE, RGB48LE)) conv = rgb64tobgr48_bswap; - else if (CONV_IS(RGBA64LE, RGB48LE) - || CONV_IS(BGRA64LE, BGR48LE) - || CONV_IS(RGBA64BE, RGB48BE) - || CONV_IS(BGRA64BE, BGR48BE)) conv = rgb64to48_nobswap; - else if (CONV_IS(RGBA64LE, RGB48BE) - || CONV_IS(BGRA64LE, BGR48BE) - || CONV_IS(RGBA64BE, RGB48LE) - || CONV_IS(BGRA64BE, BGR48LE)) conv = rgb64to48_bswap; - } else - /* BGR -> BGR */ - if ((isBGRinInt(srcFormat) && isBGRinInt(dstFormat)) || - (isRGBinInt(srcFormat) && isRGBinInt(dstFormat))) { - switch (srcId | (dstId << 16)) { - case 0x000F000C: conv = rgb12to15; break; - case 0x000F0010: conv = rgb16to15; break; - case 0x000F0018: conv = rgb24to15; break; - case 0x000F0020: conv = rgb32to15; break; - case 0x0010000F: conv = rgb15to16; break; - case 0x00100018: conv = rgb24to16; break; - case 0x00100020: conv = rgb32to16; break; - case 0x0018000F: conv = rgb15to24; break; - case 0x00180010: conv = rgb16to24; break; - case 0x00180020: conv = rgb32to24; break; - case 0x0020000F: conv = rgb15to32; break; - case 0x00200010: conv = rgb16to32; break; - case 0x00200018: conv = rgb24to32; break; - } - } else if ((isBGRinInt(srcFormat) && isRGBinInt(dstFormat)) || - (isRGBinInt(srcFormat) && isBGRinInt(dstFormat))) { - switch (srcId | (dstId << 16)) { - case 0x000C000C: conv = rgb12tobgr12; break; - case 0x000F000F: conv = rgb15tobgr15; break; - case 0x000F0010: conv = rgb16tobgr15; break; - case 0x000F0018: conv = rgb24tobgr15; break; - case 0x000F0020: conv = rgb32tobgr15; break; - case 0x0010000F: conv = rgb15tobgr16; break; - case 0x00100010: conv = rgb16tobgr16; break; - case 0x00100018: conv = rgb24tobgr16; break; - case 0x00100020: conv = rgb32tobgr16; break; - case 0x0018000F: conv = rgb15tobgr24; break; - case 0x00180010: conv = rgb16tobgr24; break; - case 0x00180018: conv = rgb24tobgr24; break; - case 0x00180020: conv = rgb32tobgr24; break; - case 0x0020000F: conv = rgb15tobgr32; break; - case 0x00200010: conv = rgb16tobgr32; break; - case 0x00200018: conv = rgb24tobgr32; break; - } - } - - if ((dstFormat == AV_PIX_FMT_RGB32_1 || dstFormat == AV_PIX_FMT_BGR32_1) && !isRGBA32(srcFormat) && ALT32_CORR<0) - return NULL; - - return conv; -} - -/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */ -static int rgbToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[], - int srcSliceY, int srcSliceH, uint8_t *dst[], - int dstStride[]) - -{ - const enum AVPixelFormat srcFormat = c->srcFormat; - const enum AVPixelFormat dstFormat = c->dstFormat; - const AVPixFmtDescriptor *desc_src = av_pix_fmt_desc_get(c->srcFormat); - const AVPixFmtDescriptor *desc_dst = av_pix_fmt_desc_get(c->dstFormat); - const int srcBpp = (c->srcFormatBpp + 7) >> 3; - const int dstBpp = (c->dstFormatBpp + 7) >> 3; - rgbConvFn conv = findRgbConvFn(c); - - if (!conv) { - av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", - av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); - } else { - const uint8_t *srcPtr = src[0]; - uint8_t *dstPtr = dst[0]; - int src_bswap = IS_NOT_NE(c->srcFormatBpp, desc_src); - int dst_bswap = IS_NOT_NE(c->dstFormatBpp, desc_dst); - - if ((srcFormat == AV_PIX_FMT_RGB32_1 || srcFormat == AV_PIX_FMT_BGR32_1) && - !isRGBA32(dstFormat)) - srcPtr += ALT32_CORR; - - if ((dstFormat == AV_PIX_FMT_RGB32_1 || dstFormat == AV_PIX_FMT_BGR32_1) && - !isRGBA32(srcFormat)) { - int i; - av_assert0(ALT32_CORR == 1); - for (i = 0; i < srcSliceH; i++) - dstPtr[dstStride[0] * (srcSliceY + i)] = 255; - dstPtr += ALT32_CORR; - } - - if (dstStride[0] * srcBpp == srcStride[0] * dstBpp && srcStride[0] > 0 && - !(srcStride[0] % srcBpp) && !dst_bswap && !src_bswap) - conv(srcPtr, dstPtr + dstStride[0] * srcSliceY, - srcSliceH * srcStride[0]); - else { - int i, j; - dstPtr += dstStride[0] * srcSliceY; - - for (i = 0; i < srcSliceH; i++) { - if(src_bswap) { - for(j=0; j<c->srcW; j++) - ((uint16_t*)c->formatConvBuffer)[j] = av_bswap16(((uint16_t*)srcPtr)[j]); - conv(c->formatConvBuffer, dstPtr, c->srcW * srcBpp); - }else - conv(srcPtr, dstPtr, c->srcW * srcBpp); - if(dst_bswap) - for(j=0; j<c->srcW; j++) - ((uint16_t*)dstPtr)[j] = av_bswap16(((uint16_t*)dstPtr)[j]); - srcPtr += srcStride[0]; - dstPtr += dstStride[0]; - } - } - } - return srcSliceH; -} - -static int bgr24ToYv12Wrapper(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - ff_rgb24toyv12( - src[0], - dst[0] + srcSliceY * dstStride[0], - dst[1] + (srcSliceY >> 1) * dstStride[1], - dst[2] + (srcSliceY >> 1) * dstStride[2], - c->srcW, srcSliceH, - dstStride[0], dstStride[1], srcStride[0], - c->input_rgb2yuv_table); - if (dst[3]) - fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); - return srcSliceH; -} - -static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW, - dst[0], dstStride[0]); - - planar2x(src[1], dst[1] + dstStride[1] * (srcSliceY >> 1), c->chrSrcW, - srcSliceH >> 2, srcStride[1], dstStride[1]); - planar2x(src[2], dst[2] + dstStride[2] * (srcSliceY >> 1), c->chrSrcW, - srcSliceH >> 2, srcStride[2], dstStride[2]); - if (dst[3]) - fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); - return srcSliceH; -} - -/* unscaled copy like stuff (assumes nearly identical formats) */ -static int packedCopyWrapper(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - if (dstStride[0] == srcStride[0] && srcStride[0] > 0) - memcpy(dst[0] + dstStride[0] * srcSliceY, src[0], srcSliceH * dstStride[0]); - else { - int i; - const uint8_t *srcPtr = src[0]; - uint8_t *dstPtr = dst[0] + dstStride[0] * srcSliceY; - int length = 0; - - /* universal length finder */ - while (length + c->srcW <= FFABS(dstStride[0]) && - length + c->srcW <= FFABS(srcStride[0])) - length += c->srcW; - av_assert1(length != 0); - - for (i = 0; i < srcSliceH; i++) { - memcpy(dstPtr, srcPtr, length); - srcPtr += srcStride[0]; - dstPtr += dstStride[0]; - } - } - return srcSliceH; -} - -#define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\ - uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\ - int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\ - for (i = 0; i < height; i++) {\ - const uint8_t *dither= dithers[src_depth-9][i&7];\ - for (j = 0; j < length-7; j+=8){\ - dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\ - dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\ - dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\ - dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\ - dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\ - dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\ - dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\ - dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\ - }\ - for (; j < length; j++)\ - dst[j] = dbswap((bswap(src[j]) + dither[j&7])*scale>>shift);\ - dst += dstStride;\ - src += srcStride;\ - } - -static int planarCopyWrapper(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - const AVPixFmtDescriptor *desc_src = av_pix_fmt_desc_get(c->srcFormat); - const AVPixFmtDescriptor *desc_dst = av_pix_fmt_desc_get(c->dstFormat); - int plane, i, j; - for (plane = 0; plane < 4; plane++) { - int length = (plane == 0 || plane == 3) ? c->srcW : FF_CEIL_RSHIFT(c->srcW, c->chrDstHSubSample); - int y = (plane == 0 || plane == 3) ? srcSliceY: FF_CEIL_RSHIFT(srcSliceY, c->chrDstVSubSample); - int height = (plane == 0 || plane == 3) ? srcSliceH: FF_CEIL_RSHIFT(srcSliceH, c->chrDstVSubSample); - const uint8_t *srcPtr = src[plane]; - uint8_t *dstPtr = dst[plane] + dstStride[plane] * y; - int shiftonly= plane==1 || plane==2 || (!c->srcRange && plane==0); - - if (!dst[plane]) - continue; - // ignore palette for GRAY8 - if (plane == 1 && !dst[2]) continue; - if (!src[plane] || (plane == 1 && !src[2])) { - if (is16BPS(c->dstFormat) || isNBPS(c->dstFormat)) { - fillPlane16(dst[plane], dstStride[plane], length, height, y, - plane == 3, desc_dst->comp[plane].depth_minus1, - isBE(c->dstFormat)); - } else { - fillPlane(dst[plane], dstStride[plane], length, height, y, - (plane == 3) ? 255 : 128); - } - } else { - if(isNBPS(c->srcFormat) || isNBPS(c->dstFormat) - || (is16BPS(c->srcFormat) != is16BPS(c->dstFormat)) - ) { - const int src_depth = desc_src->comp[plane].depth_minus1 + 1; - const int dst_depth = desc_dst->comp[plane].depth_minus1 + 1; - const uint16_t *srcPtr2 = (const uint16_t *) srcPtr; - uint16_t *dstPtr2 = (uint16_t*)dstPtr; - - if (dst_depth == 8) { - if(isBE(c->srcFormat) == HAVE_BIGENDIAN){ - DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, , ) - } else { - DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, av_bswap16, ) - } - } else if (src_depth == 8) { - for (i = 0; i < height; i++) { - #define COPY816(w)\ - if(shiftonly){\ - for (j = 0; j < length; j++)\ - w(&dstPtr2[j], srcPtr[j]<<(dst_depth-8));\ - }else{\ - for (j = 0; j < length; j++)\ - w(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) |\ - (srcPtr[j]>>(2*8-dst_depth)));\ - } - if(isBE(c->dstFormat)){ - COPY816(AV_WB16) - } else { - COPY816(AV_WL16) - } - dstPtr2 += dstStride[plane]/2; - srcPtr += srcStride[plane]; - } - } else if (src_depth <= dst_depth) { - for (i = 0; i < height; i++) { - j = 0; - if(isBE(c->srcFormat) == HAVE_BIGENDIAN && - isBE(c->dstFormat) == HAVE_BIGENDIAN && - shiftonly) { - unsigned shift = dst_depth - src_depth; -#if HAVE_FAST_64BIT -#define FAST_COPY_UP(shift) \ - for (; j < length - 3; j += 4) { \ - uint64_t v = AV_RN64A(srcPtr2 + j); \ - AV_WN64A(dstPtr2 + j, v << shift); \ - } -#else -#define FAST_COPY_UP(shift) \ - for (; j < length - 1; j += 2) { \ - uint32_t v = AV_RN32A(srcPtr2 + j); \ - AV_WN32A(dstPtr2 + j, v << shift); \ - } -#endif - switch (shift) - { - case 6: FAST_COPY_UP(6); break; - case 7: FAST_COPY_UP(7); break; - } - } -#define COPY_UP(r,w) \ - if(shiftonly){\ - for (; j < length; j++){ \ - unsigned int v= r(&srcPtr2[j]);\ - w(&dstPtr2[j], v<<(dst_depth-src_depth));\ - }\ - }else{\ - for (; j < length; j++){ \ - unsigned int v= r(&srcPtr2[j]);\ - w(&dstPtr2[j], (v<<(dst_depth-src_depth)) | \ - (v>>(2*src_depth-dst_depth)));\ - }\ - } - if(isBE(c->srcFormat)){ - if(isBE(c->dstFormat)){ - COPY_UP(AV_RB16, AV_WB16) - } else { - COPY_UP(AV_RB16, AV_WL16) - } - } else { - if(isBE(c->dstFormat)){ - COPY_UP(AV_RL16, AV_WB16) - } else { - COPY_UP(AV_RL16, AV_WL16) - } - } - dstPtr2 += dstStride[plane]/2; - srcPtr2 += srcStride[plane]/2; - } - } else { - if(isBE(c->srcFormat) == HAVE_BIGENDIAN){ - if(isBE(c->dstFormat) == HAVE_BIGENDIAN){ - DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , ) - } else { - DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , av_bswap16) - } - }else{ - if(isBE(c->dstFormat) == HAVE_BIGENDIAN){ - DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, ) - } else { - DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, av_bswap16) - } - } - } - } else if (is16BPS(c->srcFormat) && is16BPS(c->dstFormat) && - isBE(c->srcFormat) != isBE(c->dstFormat)) { - - for (i = 0; i < height; i++) { - for (j = 0; j < length; j++) - ((uint16_t *) dstPtr)[j] = av_bswap16(((const uint16_t *) srcPtr)[j]); - srcPtr += srcStride[plane]; - dstPtr += dstStride[plane]; - } - } else if (dstStride[plane] == srcStride[plane] && - srcStride[plane] > 0 && srcStride[plane] == length) { - memcpy(dst[plane] + dstStride[plane] * y, src[plane], - height * dstStride[plane]); - } else { - if (is16BPS(c->srcFormat) && is16BPS(c->dstFormat)) - length *= 2; - else if (!desc_src->comp[0].depth_minus1) - length >>= 3; // monowhite/black - for (i = 0; i < height; i++) { - memcpy(dstPtr, srcPtr, length); - srcPtr += srcStride[plane]; - dstPtr += dstStride[plane]; - } - } - } - } - return srcSliceH; -} - - -#define IS_DIFFERENT_ENDIANESS(src_fmt, dst_fmt, pix_fmt) \ - ((src_fmt == pix_fmt ## BE && dst_fmt == pix_fmt ## LE) || \ - (src_fmt == pix_fmt ## LE && dst_fmt == pix_fmt ## BE)) - - -void ff_get_unscaled_swscale(SwsContext *c) -{ - const enum AVPixelFormat srcFormat = c->srcFormat; - const enum AVPixelFormat dstFormat = c->dstFormat; - const int flags = c->flags; - const int dstH = c->dstH; - int needsDither; - - needsDither = isAnyRGB(dstFormat) && - c->dstFormatBpp < 24 && - (c->dstFormatBpp < c->srcFormatBpp || (!isAnyRGB(srcFormat))); - - /* yv12_to_nv12 */ - if ((srcFormat == AV_PIX_FMT_YUV420P || srcFormat == AV_PIX_FMT_YUVA420P) && - (dstFormat == AV_PIX_FMT_NV12 || dstFormat == AV_PIX_FMT_NV21)) { - c->swscale = planarToNv12Wrapper; - } - /* nv12_to_yv12 */ - if (dstFormat == AV_PIX_FMT_YUV420P && - (srcFormat == AV_PIX_FMT_NV12 || srcFormat == AV_PIX_FMT_NV21)) { - c->swscale = nv12ToPlanarWrapper; - } - /* yuv2bgr */ - if ((srcFormat == AV_PIX_FMT_YUV420P || srcFormat == AV_PIX_FMT_YUV422P || - srcFormat == AV_PIX_FMT_YUVA420P) && isAnyRGB(dstFormat) && - !(flags & SWS_ACCURATE_RND) && (c->dither == SWS_DITHER_BAYER || c->dither == SWS_DITHER_AUTO) && !(dstH & 1)) { - c->swscale = ff_yuv2rgb_get_func_ptr(c); - } - - if (srcFormat == AV_PIX_FMT_YUV410P && - (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) && - !(flags & SWS_BITEXACT)) { - c->swscale = yvu9ToYv12Wrapper; - } - - /* bgr24toYV12 */ - if (srcFormat == AV_PIX_FMT_BGR24 && - (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) && - !(flags & SWS_ACCURATE_RND)) - c->swscale = bgr24ToYv12Wrapper; - - /* RGB/BGR -> RGB/BGR (no dither needed forms) */ - if (isAnyRGB(srcFormat) && isAnyRGB(dstFormat) && findRgbConvFn(c) - && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)))) - c->swscale = rgbToRgbWrapper; - - if ((srcFormat == AV_PIX_FMT_GBRP && dstFormat == AV_PIX_FMT_GBRAP) || - (srcFormat == AV_PIX_FMT_GBRAP && dstFormat == AV_PIX_FMT_GBRP)) - c->swscale = planarRgbToplanarRgbWrapper; - -#define isByteRGB(f) ( \ - f == AV_PIX_FMT_RGB32 || \ - f == AV_PIX_FMT_RGB32_1 || \ - f == AV_PIX_FMT_RGB24 || \ - f == AV_PIX_FMT_BGR32 || \ - f == AV_PIX_FMT_BGR32_1 || \ - f == AV_PIX_FMT_BGR24) - - if (srcFormat == AV_PIX_FMT_GBRP && isPlanar(srcFormat) && isByteRGB(dstFormat)) - c->swscale = planarRgbToRgbWrapper; - - if ((srcFormat == AV_PIX_FMT_GBRP9LE || srcFormat == AV_PIX_FMT_GBRP9BE || - srcFormat == AV_PIX_FMT_GBRP16LE || srcFormat == AV_PIX_FMT_GBRP16BE || - srcFormat == AV_PIX_FMT_GBRP10LE || srcFormat == AV_PIX_FMT_GBRP10BE || - srcFormat == AV_PIX_FMT_GBRP12LE || srcFormat == AV_PIX_FMT_GBRP12BE || - srcFormat == AV_PIX_FMT_GBRP14LE || srcFormat == AV_PIX_FMT_GBRP14BE || - srcFormat == AV_PIX_FMT_GBRAP16LE || srcFormat == AV_PIX_FMT_GBRAP16BE) && - (dstFormat == AV_PIX_FMT_RGB48LE || dstFormat == AV_PIX_FMT_RGB48BE || - dstFormat == AV_PIX_FMT_BGR48LE || dstFormat == AV_PIX_FMT_BGR48BE || - dstFormat == AV_PIX_FMT_RGBA64LE || dstFormat == AV_PIX_FMT_RGBA64BE || - dstFormat == AV_PIX_FMT_BGRA64LE || dstFormat == AV_PIX_FMT_BGRA64BE)) - c->swscale = planarRgb16ToRgb16Wrapper; - - if (av_pix_fmt_desc_get(srcFormat)->comp[0].depth_minus1 == 7 && - isPackedRGB(srcFormat) && dstFormat == AV_PIX_FMT_GBRP) - c->swscale = rgbToPlanarRgbWrapper; - - /* bswap 16 bits per pixel/component packed formats */ - if (IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR444) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR48) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGRA64) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR555) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR565) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GRAY16) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP9) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP10) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP12) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP14) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP16) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRAP16) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB444) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB48) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGBA64) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB555) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB565) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_XYZ12) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P9) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P10) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P12) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P14) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P16) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P9) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P10) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P12) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P14) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P16) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P9) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P10) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P12) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P14) || - IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P16)) - c->swscale = packed_16bpc_bswap; - - if (usePal(srcFormat) && isByteRGB(dstFormat)) - c->swscale = palToRgbWrapper; - - if (srcFormat == AV_PIX_FMT_YUV422P) { - if (dstFormat == AV_PIX_FMT_YUYV422) - c->swscale = yuv422pToYuy2Wrapper; - else if (dstFormat == AV_PIX_FMT_UYVY422) - c->swscale = yuv422pToUyvyWrapper; - } - - /* LQ converters if -sws 0 or -sws 4*/ - if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)) { - /* yv12_to_yuy2 */ - if (srcFormat == AV_PIX_FMT_YUV420P || srcFormat == AV_PIX_FMT_YUVA420P) { - if (dstFormat == AV_PIX_FMT_YUYV422) - c->swscale = planarToYuy2Wrapper; - else if (dstFormat == AV_PIX_FMT_UYVY422) - c->swscale = planarToUyvyWrapper; - } - } - if (srcFormat == AV_PIX_FMT_YUYV422 && - (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P)) - c->swscale = yuyvToYuv420Wrapper; - if (srcFormat == AV_PIX_FMT_UYVY422 && - (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P)) - c->swscale = uyvyToYuv420Wrapper; - if (srcFormat == AV_PIX_FMT_YUYV422 && dstFormat == AV_PIX_FMT_YUV422P) - c->swscale = yuyvToYuv422Wrapper; - if (srcFormat == AV_PIX_FMT_UYVY422 && dstFormat == AV_PIX_FMT_YUV422P) - c->swscale = uyvyToYuv422Wrapper; - -#define isPlanarGray(x) (isGray(x) && (x) != AV_PIX_FMT_GRAY8A) - /* simple copy */ - if ( srcFormat == dstFormat || - (srcFormat == AV_PIX_FMT_YUVA420P && dstFormat == AV_PIX_FMT_YUV420P) || - (srcFormat == AV_PIX_FMT_YUV420P && dstFormat == AV_PIX_FMT_YUVA420P) || - (isPlanarYUV(srcFormat) && isPlanarGray(dstFormat)) || - (isPlanarYUV(dstFormat) && isPlanarGray(srcFormat)) || - (isPlanarGray(dstFormat) && isPlanarGray(srcFormat)) || - (isPlanarYUV(srcFormat) && isPlanarYUV(dstFormat) && - c->chrDstHSubSample == c->chrSrcHSubSample && - c->chrDstVSubSample == c->chrSrcVSubSample && - dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21 && - srcFormat != AV_PIX_FMT_NV12 && srcFormat != AV_PIX_FMT_NV21)) - { - if (isPacked(c->srcFormat)) - c->swscale = packedCopyWrapper; - else /* Planar YUV or gray */ - c->swscale = planarCopyWrapper; - } - - if (ARCH_BFIN) - ff_get_unscaled_swscale_bfin(c); - if (ARCH_PPC) - ff_get_unscaled_swscale_ppc(c); - if (ARCH_ARM) - ff_get_unscaled_swscale_arm(c); - -} - -/* Convert the palette to the same packed 32-bit format as the palette */ -void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, - int num_pixels, const uint8_t *palette) -{ - int i; - - for (i = 0; i < num_pixels; i++) - ((uint32_t *) dst)[i] = ((const uint32_t *) palette)[src[i]]; -} - -/* Palette format: ABCD -> dst format: ABC */ -void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, - int num_pixels, const uint8_t *palette) -{ - int i; - - for (i = 0; i < num_pixels; i++) { - //FIXME slow? - dst[0] = palette[src[i] * 4 + 0]; - dst[1] = palette[src[i] * 4 + 1]; - dst[2] = palette[src[i] * 4 + 2]; - dst += 3; - } -} diff --git a/ffmpeg/libswscale/utils.c b/ffmpeg/libswscale/utils.c deleted file mode 100644 index b8b95a9..0000000 --- a/ffmpeg/libswscale/utils.c +++ /dev/null @@ -1,2093 +0,0 @@ -/* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "config.h" - -#define _SVID_SOURCE // needed for MAP_ANONYMOUS -#define _DARWIN_C_SOURCE // needed for MAP_ANON -#include <inttypes.h> -#include <math.h> -#include <stdio.h> -#include <string.h> -#if HAVE_SYS_MMAN_H -#include <sys/mman.h> -#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS) -#define MAP_ANONYMOUS MAP_ANON -#endif -#endif -#if HAVE_VIRTUALALLOC -#define WIN32_LEAN_AND_MEAN -#include <windows.h> -#endif - -#include "libavutil/attributes.h" -#include "libavutil/avassert.h" -#include "libavutil/avutil.h" -#include "libavutil/bswap.h" -#include "libavutil/cpu.h" -#include "libavutil/intreadwrite.h" -#include "libavutil/mathematics.h" -#include "libavutil/opt.h" -#include "libavutil/pixdesc.h" -#include "libavutil/ppc/cpu.h" -#include "libavutil/x86/asm.h" -#include "libavutil/x86/cpu.h" -#include "rgb2rgb.h" -#include "swscale.h" -#include "swscale_internal.h" - -static void handle_formats(SwsContext *c); - -unsigned swscale_version(void) -{ - av_assert0(LIBSWSCALE_VERSION_MICRO >= 100); - return LIBSWSCALE_VERSION_INT; -} - -const char *swscale_configuration(void) -{ - return FFMPEG_CONFIGURATION; -} - -const char *swscale_license(void) -{ -#define LICENSE_PREFIX "libswscale license: " - return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1; -} - -#define RET 0xC3 // near return opcode for x86 - -typedef struct FormatEntry { - uint8_t is_supported_in :1; - uint8_t is_supported_out :1; - uint8_t is_supported_endianness :1; -} FormatEntry; - -static const FormatEntry format_entries[AV_PIX_FMT_NB] = { - [AV_PIX_FMT_YUV420P] = { 1, 1 }, - [AV_PIX_FMT_YUYV422] = { 1, 1 }, - [AV_PIX_FMT_RGB24] = { 1, 1 }, - [AV_PIX_FMT_BGR24] = { 1, 1 }, - [AV_PIX_FMT_YUV422P] = { 1, 1 }, - [AV_PIX_FMT_YUV444P] = { 1, 1 }, - [AV_PIX_FMT_YUV410P] = { 1, 1 }, - [AV_PIX_FMT_YUV411P] = { 1, 1 }, - [AV_PIX_FMT_GRAY8] = { 1, 1 }, - [AV_PIX_FMT_MONOWHITE] = { 1, 1 }, - [AV_PIX_FMT_MONOBLACK] = { 1, 1 }, - [AV_PIX_FMT_PAL8] = { 1, 0 }, - [AV_PIX_FMT_YUVJ420P] = { 1, 1 }, - [AV_PIX_FMT_YUVJ411P] = { 1, 1 }, - [AV_PIX_FMT_YUVJ422P] = { 1, 1 }, - [AV_PIX_FMT_YUVJ444P] = { 1, 1 }, - [AV_PIX_FMT_UYVY422] = { 1, 1 }, - [AV_PIX_FMT_UYYVYY411] = { 0, 0 }, - [AV_PIX_FMT_BGR8] = { 1, 1 }, - [AV_PIX_FMT_BGR4] = { 0, 1 }, - [AV_PIX_FMT_BGR4_BYTE] = { 1, 1 }, - [AV_PIX_FMT_RGB8] = { 1, 1 }, - [AV_PIX_FMT_RGB4] = { 0, 1 }, - [AV_PIX_FMT_RGB4_BYTE] = { 1, 1 }, - [AV_PIX_FMT_NV12] = { 1, 1 }, - [AV_PIX_FMT_NV21] = { 1, 1 }, - [AV_PIX_FMT_ARGB] = { 1, 1 }, - [AV_PIX_FMT_RGBA] = { 1, 1 }, - [AV_PIX_FMT_ABGR] = { 1, 1 }, - [AV_PIX_FMT_BGRA] = { 1, 1 }, - [AV_PIX_FMT_0RGB] = { 1, 1 }, - [AV_PIX_FMT_RGB0] = { 1, 1 }, - [AV_PIX_FMT_0BGR] = { 1, 1 }, - [AV_PIX_FMT_BGR0] = { 1, 1 }, - [AV_PIX_FMT_GRAY16BE] = { 1, 1 }, - [AV_PIX_FMT_GRAY16LE] = { 1, 1 }, - [AV_PIX_FMT_YUV440P] = { 1, 1 }, - [AV_PIX_FMT_YUVJ440P] = { 1, 1 }, - [AV_PIX_FMT_YUVA420P] = { 1, 1 }, - [AV_PIX_FMT_YUVA422P] = { 1, 1 }, - [AV_PIX_FMT_YUVA444P] = { 1, 1 }, - [AV_PIX_FMT_YUVA420P9BE] = { 1, 1 }, - [AV_PIX_FMT_YUVA420P9LE] = { 1, 1 }, - [AV_PIX_FMT_YUVA422P9BE] = { 1, 1 }, - [AV_PIX_FMT_YUVA422P9LE] = { 1, 1 }, - [AV_PIX_FMT_YUVA444P9BE] = { 1, 1 }, - [AV_PIX_FMT_YUVA444P9LE] = { 1, 1 }, - [AV_PIX_FMT_YUVA420P10BE]= { 1, 1 }, - [AV_PIX_FMT_YUVA420P10LE]= { 1, 1 }, - [AV_PIX_FMT_YUVA422P10BE]= { 1, 1 }, - [AV_PIX_FMT_YUVA422P10LE]= { 1, 1 }, - [AV_PIX_FMT_YUVA444P10BE]= { 1, 1 }, - [AV_PIX_FMT_YUVA444P10LE]= { 1, 1 }, - [AV_PIX_FMT_YUVA420P16BE]= { 1, 1 }, - [AV_PIX_FMT_YUVA420P16LE]= { 1, 1 }, - [AV_PIX_FMT_YUVA422P16BE]= { 1, 1 }, - [AV_PIX_FMT_YUVA422P16LE]= { 1, 1 }, - [AV_PIX_FMT_YUVA444P16BE]= { 1, 1 }, - [AV_PIX_FMT_YUVA444P16LE]= { 1, 1 }, - [AV_PIX_FMT_RGB48BE] = { 1, 1 }, - [AV_PIX_FMT_RGB48LE] = { 1, 1 }, - [AV_PIX_FMT_RGBA64BE] = { 1, 1 }, - [AV_PIX_FMT_RGBA64LE] = { 1, 1 }, - [AV_PIX_FMT_RGB565BE] = { 1, 1 }, - [AV_PIX_FMT_RGB565LE] = { 1, 1 }, - [AV_PIX_FMT_RGB555BE] = { 1, 1 }, - [AV_PIX_FMT_RGB555LE] = { 1, 1 }, - [AV_PIX_FMT_BGR565BE] = { 1, 1 }, - [AV_PIX_FMT_BGR565LE] = { 1, 1 }, - [AV_PIX_FMT_BGR555BE] = { 1, 1 }, - [AV_PIX_FMT_BGR555LE] = { 1, 1 }, - [AV_PIX_FMT_YUV420P16LE] = { 1, 1 }, - [AV_PIX_FMT_YUV420P16BE] = { 1, 1 }, - [AV_PIX_FMT_YUV422P16LE] = { 1, 1 }, - [AV_PIX_FMT_YUV422P16BE] = { 1, 1 }, - [AV_PIX_FMT_YUV444P16LE] = { 1, 1 }, - [AV_PIX_FMT_YUV444P16BE] = { 1, 1 }, - [AV_PIX_FMT_RGB444LE] = { 1, 1 }, - [AV_PIX_FMT_RGB444BE] = { 1, 1 }, - [AV_PIX_FMT_BGR444LE] = { 1, 1 }, - [AV_PIX_FMT_BGR444BE] = { 1, 1 }, - [AV_PIX_FMT_Y400A] = { 1, 0 }, - [AV_PIX_FMT_BGR48BE] = { 1, 1 }, - [AV_PIX_FMT_BGR48LE] = { 1, 1 }, - [AV_PIX_FMT_BGRA64BE] = { 0, 0 }, - [AV_PIX_FMT_BGRA64LE] = { 0, 0 }, - [AV_PIX_FMT_YUV420P9BE] = { 1, 1 }, - [AV_PIX_FMT_YUV420P9LE] = { 1, 1 }, - [AV_PIX_FMT_YUV420P10BE] = { 1, 1 }, - [AV_PIX_FMT_YUV420P10LE] = { 1, 1 }, - [AV_PIX_FMT_YUV420P12BE] = { 1, 1 }, - [AV_PIX_FMT_YUV420P12LE] = { 1, 1 }, - [AV_PIX_FMT_YUV420P14BE] = { 1, 1 }, - [AV_PIX_FMT_YUV420P14LE] = { 1, 1 }, - [AV_PIX_FMT_YUV422P9BE] = { 1, 1 }, - [AV_PIX_FMT_YUV422P9LE] = { 1, 1 }, - [AV_PIX_FMT_YUV422P10BE] = { 1, 1 }, - [AV_PIX_FMT_YUV422P10LE] = { 1, 1 }, - [AV_PIX_FMT_YUV422P12BE] = { 1, 1 }, - [AV_PIX_FMT_YUV422P12LE] = { 1, 1 }, - [AV_PIX_FMT_YUV422P14BE] = { 1, 1 }, - [AV_PIX_FMT_YUV422P14LE] = { 1, 1 }, - [AV_PIX_FMT_YUV444P9BE] = { 1, 1 }, - [AV_PIX_FMT_YUV444P9LE] = { 1, 1 }, - [AV_PIX_FMT_YUV444P10BE] = { 1, 1 }, - [AV_PIX_FMT_YUV444P10LE] = { 1, 1 }, - [AV_PIX_FMT_YUV444P12BE] = { 1, 1 }, - [AV_PIX_FMT_YUV444P12LE] = { 1, 1 }, - [AV_PIX_FMT_YUV444P14BE] = { 1, 1 }, - [AV_PIX_FMT_YUV444P14LE] = { 1, 1 }, - [AV_PIX_FMT_GBRP] = { 1, 1 }, - [AV_PIX_FMT_GBRP9LE] = { 1, 1 }, - [AV_PIX_FMT_GBRP9BE] = { 1, 1 }, - [AV_PIX_FMT_GBRP10LE] = { 1, 1 }, - [AV_PIX_FMT_GBRP10BE] = { 1, 1 }, - [AV_PIX_FMT_GBRP12LE] = { 1, 1 }, - [AV_PIX_FMT_GBRP12BE] = { 1, 1 }, - [AV_PIX_FMT_GBRP14LE] = { 1, 1 }, - [AV_PIX_FMT_GBRP14BE] = { 1, 1 }, - [AV_PIX_FMT_GBRP16LE] = { 1, 0 }, - [AV_PIX_FMT_GBRP16BE] = { 1, 0 }, - [AV_PIX_FMT_XYZ12BE] = { 1, 1, 1 }, - [AV_PIX_FMT_XYZ12LE] = { 1, 1, 1 }, - [AV_PIX_FMT_GBRAP] = { 1, 1 }, - [AV_PIX_FMT_GBRAP16LE] = { 1, 0 }, - [AV_PIX_FMT_GBRAP16BE] = { 1, 0 }, -}; - -int sws_isSupportedInput(enum AVPixelFormat pix_fmt) -{ - return (unsigned)pix_fmt < AV_PIX_FMT_NB ? - format_entries[pix_fmt].is_supported_in : 0; -} - -int sws_isSupportedOutput(enum AVPixelFormat pix_fmt) -{ - return (unsigned)pix_fmt < AV_PIX_FMT_NB ? - format_entries[pix_fmt].is_supported_out : 0; -} - -int sws_isSupportedEndiannessConversion(enum AVPixelFormat pix_fmt) -{ - return (unsigned)pix_fmt < AV_PIX_FMT_NB ? - format_entries[pix_fmt].is_supported_endianness : 0; -} - -#if FF_API_SWS_FORMAT_NAME -const char *sws_format_name(enum AVPixelFormat format) -{ - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format); - if (desc) - return desc->name; - else - return "Unknown format"; -} -#endif - -static double getSplineCoeff(double a, double b, double c, double d, - double dist) -{ - if (dist <= 1.0) - return ((d * dist + c) * dist + b) * dist + a; - else - return getSplineCoeff(0.0, - b + 2.0 * c + 3.0 * d, - c + 3.0 * d, - -b - 3.0 * c - 6.0 * d, - dist - 1.0); -} - -static av_cold int get_local_pos(SwsContext *s, int chr_subsample, int pos, int dir) -{ - if (pos < 0) { - pos = (128 << chr_subsample) - 128; - } - pos += 128; // relative to ideal left edge - return pos >> chr_subsample; -} - -typedef struct { - int flag; ///< flag associated to the algorithm - const char *description; ///< human-readable description - int size_factor; ///< size factor used when initing the filters -} ScaleAlgorithm; - -static const ScaleAlgorithm scale_algorithms[] = { - { SWS_AREA, "area averaging", 1 /* downscale only, for upscale it is bilinear */ }, - { SWS_BICUBIC, "bicubic", 4 }, - { SWS_BICUBLIN, "luma bicubic / chroma bilinear", -1 }, - { SWS_BILINEAR, "bilinear", 2 }, - { SWS_FAST_BILINEAR, "fast bilinear", -1 }, - { SWS_GAUSS, "Gaussian", 8 /* infinite ;) */ }, - { SWS_LANCZOS, "Lanczos", -1 /* custom */ }, - { SWS_POINT, "nearest neighbor / point", -1 }, - { SWS_SINC, "sinc", 20 /* infinite ;) */ }, - { SWS_SPLINE, "bicubic spline", 20 /* infinite :)*/ }, - { SWS_X, "experimental", 8 }, -}; - -static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos, - int *outFilterSize, int xInc, int srcW, - int dstW, int filterAlign, int one, - int flags, int cpu_flags, - SwsVector *srcFilter, SwsVector *dstFilter, - double param[2], int srcPos, int dstPos) -{ - int i; - int filterSize; - int filter2Size; - int minFilterSize; - int64_t *filter = NULL; - int64_t *filter2 = NULL; - const int64_t fone = 1LL << (54 - FFMIN(av_log2(srcW/dstW), 8)); - int ret = -1; - - emms_c(); // FIXME should not be required but IS (even for non-MMX versions) - - // NOTE: the +3 is for the MMX(+1) / SSE(+3) scaler which reads over the end - FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW + 3) * sizeof(**filterPos), fail); - - if (FFABS(xInc - 0x10000) < 10 && srcPos == dstPos) { // unscaled - int i; - filterSize = 1; - FF_ALLOCZ_OR_GOTO(NULL, filter, - dstW * sizeof(*filter) * filterSize, fail); - - for (i = 0; i < dstW; i++) { - filter[i * filterSize] = fone; - (*filterPos)[i] = i; - } - } else if (flags & SWS_POINT) { // lame looking point sampling mode - int i; - int64_t xDstInSrc; - filterSize = 1; - FF_ALLOC_OR_GOTO(NULL, filter, - dstW * sizeof(*filter) * filterSize, fail); - - xDstInSrc = ((dstPos*(int64_t)xInc)>>8) - ((srcPos*0x8000LL)>>7); - for (i = 0; i < dstW; i++) { - int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16; - - (*filterPos)[i] = xx; - filter[i] = fone; - xDstInSrc += xInc; - } - } else if ((xInc <= (1 << 16) && (flags & SWS_AREA)) || - (flags & SWS_FAST_BILINEAR)) { // bilinear upscale - int i; - int64_t xDstInSrc; - filterSize = 2; - FF_ALLOC_OR_GOTO(NULL, filter, - dstW * sizeof(*filter) * filterSize, fail); - - xDstInSrc = ((dstPos*(int64_t)xInc)>>8) - ((srcPos*0x8000LL)>>7); - for (i = 0; i < dstW; i++) { - int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16; - int j; - - (*filterPos)[i] = xx; - // bilinear upscale / linear interpolate / area averaging - for (j = 0; j < filterSize; j++) { - int64_t coeff= fone - FFABS(((int64_t)xx<<16) - xDstInSrc)*(fone>>16); - if (coeff < 0) - coeff = 0; - filter[i * filterSize + j] = coeff; - xx++; - } - xDstInSrc += xInc; - } - } else { - int64_t xDstInSrc; - int sizeFactor = -1; - - for (i = 0; i < FF_ARRAY_ELEMS(scale_algorithms); i++) { - if (flags & scale_algorithms[i].flag) { - sizeFactor = scale_algorithms[i].size_factor; - break; - } - } - if (flags & SWS_LANCZOS) - sizeFactor = param[0] != SWS_PARAM_DEFAULT ? ceil(2 * param[0]) : 6; - av_assert0(sizeFactor > 0); - - if (xInc <= 1 << 16) - filterSize = 1 + sizeFactor; // upscale - else - filterSize = 1 + (sizeFactor * srcW + dstW - 1) / dstW; - - filterSize = FFMIN(filterSize, srcW - 2); - filterSize = FFMAX(filterSize, 1); - - FF_ALLOC_OR_GOTO(NULL, filter, - dstW * sizeof(*filter) * filterSize, fail); - - xDstInSrc = ((dstPos*(int64_t)xInc)>>7) - ((srcPos*0x10000LL)>>7); - for (i = 0; i < dstW; i++) { - int xx = (xDstInSrc - ((filterSize - 2) << 16)) / (1 << 17); - int j; - (*filterPos)[i] = xx; - for (j = 0; j < filterSize; j++) { - int64_t d = (FFABS(((int64_t)xx << 17) - xDstInSrc)) << 13; - double floatd; - int64_t coeff; - - if (xInc > 1 << 16) - d = d * dstW / srcW; - floatd = d * (1.0 / (1 << 30)); - - if (flags & SWS_BICUBIC) { - int64_t B = (param[0] != SWS_PARAM_DEFAULT ? param[0] : 0) * (1 << 24); - int64_t C = (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1 << 24); - - if (d >= 1LL << 31) { - coeff = 0.0; - } else { - int64_t dd = (d * d) >> 30; - int64_t ddd = (dd * d) >> 30; - - if (d < 1LL << 30) - coeff = (12 * (1 << 24) - 9 * B - 6 * C) * ddd + - (-18 * (1 << 24) + 12 * B + 6 * C) * dd + - (6 * (1 << 24) - 2 * B) * (1 << 30); - else - coeff = (-B - 6 * C) * ddd + - (6 * B + 30 * C) * dd + - (-12 * B - 48 * C) * d + - (8 * B + 24 * C) * (1 << 30); - } - coeff /= (1LL<<54)/fone; - } -#if 0 - else if (flags & SWS_X) { - double p = param ? param * 0.01 : 0.3; - coeff = d ? sin(d * M_PI) / (d * M_PI) : 1.0; - coeff *= pow(2.0, -p * d * d); - } -#endif - else if (flags & SWS_X) { - double A = param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0; - double c; - - if (floatd < 1.0) - c = cos(floatd * M_PI); - else - c = -1.0; - if (c < 0.0) - c = -pow(-c, A); - else - c = pow(c, A); - coeff = (c * 0.5 + 0.5) * fone; - } else if (flags & SWS_AREA) { - int64_t d2 = d - (1 << 29); - if (d2 * xInc < -(1LL << (29 + 16))) - coeff = 1.0 * (1LL << (30 + 16)); - else if (d2 * xInc < (1LL << (29 + 16))) - coeff = -d2 * xInc + (1LL << (29 + 16)); - else - coeff = 0.0; - coeff *= fone >> (30 + 16); - } else if (flags & SWS_GAUSS) { - double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0; - coeff = (pow(2.0, -p * floatd * floatd)) * fone; - } else if (flags & SWS_SINC) { - coeff = (d ? sin(floatd * M_PI) / (floatd * M_PI) : 1.0) * fone; - } else if (flags & SWS_LANCZOS) { - double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0; - coeff = (d ? sin(floatd * M_PI) * sin(floatd * M_PI / p) / - (floatd * floatd * M_PI * M_PI / p) : 1.0) * fone; - if (floatd > p) - coeff = 0; - } else if (flags & SWS_BILINEAR) { - coeff = (1 << 30) - d; - if (coeff < 0) - coeff = 0; - coeff *= fone >> 30; - } else if (flags & SWS_SPLINE) { - double p = -2.196152422706632; - coeff = getSplineCoeff(1.0, 0.0, p, -p - 1.0, floatd) * fone; - } else { - av_assert0(0); - } - - filter[i * filterSize + j] = coeff; - xx++; - } - xDstInSrc += 2 * xInc; - } - } - - /* apply src & dst Filter to filter -> filter2 - * av_free(filter); - */ - av_assert0(filterSize > 0); - filter2Size = filterSize; - if (srcFilter) - filter2Size += srcFilter->length - 1; - if (dstFilter) - filter2Size += dstFilter->length - 1; - av_assert0(filter2Size > 0); - FF_ALLOCZ_OR_GOTO(NULL, filter2, filter2Size * dstW * sizeof(*filter2), fail); - - for (i = 0; i < dstW; i++) { - int j, k; - - if (srcFilter) { - for (k = 0; k < srcFilter->length; k++) { - for (j = 0; j < filterSize; j++) - filter2[i * filter2Size + k + j] += - srcFilter->coeff[k] * filter[i * filterSize + j]; - } - } else { - for (j = 0; j < filterSize; j++) - filter2[i * filter2Size + j] = filter[i * filterSize + j]; - } - // FIXME dstFilter - - (*filterPos)[i] += (filterSize - 1) / 2 - (filter2Size - 1) / 2; - } - av_freep(&filter); - - /* try to reduce the filter-size (step1 find size and shift left) */ - // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not). - minFilterSize = 0; - for (i = dstW - 1; i >= 0; i--) { - int min = filter2Size; - int j; - int64_t cutOff = 0.0; - - /* get rid of near zero elements on the left by shifting left */ - for (j = 0; j < filter2Size; j++) { - int k; - cutOff += FFABS(filter2[i * filter2Size]); - - if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone) - break; - - /* preserve monotonicity because the core can't handle the - * filter otherwise */ - if (i < dstW - 1 && (*filterPos)[i] >= (*filterPos)[i + 1]) - break; - - // move filter coefficients left - for (k = 1; k < filter2Size; k++) - filter2[i * filter2Size + k - 1] = filter2[i * filter2Size + k]; - filter2[i * filter2Size + k - 1] = 0; - (*filterPos)[i]++; - } - - cutOff = 0; - /* count near zeros on the right */ - for (j = filter2Size - 1; j > 0; j--) { - cutOff += FFABS(filter2[i * filter2Size + j]); - - if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone) - break; - min--; - } - - if (min > minFilterSize) - minFilterSize = min; - } - - if (PPC_ALTIVEC(cpu_flags)) { - // we can handle the special case 4, so we don't want to go the full 8 - if (minFilterSize < 5) - filterAlign = 4; - - /* We really don't want to waste our time doing useless computation, so - * fall back on the scalar C code for very small filters. - * Vectorizing is worth it only if you have a decent-sized vector. */ - if (minFilterSize < 3) - filterAlign = 1; - } - - if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) { - // special case for unscaled vertical filtering - if (minFilterSize == 1 && filterAlign == 2) - filterAlign = 1; - } - - av_assert0(minFilterSize > 0); - filterSize = (minFilterSize + (filterAlign - 1)) & (~(filterAlign - 1)); - av_assert0(filterSize > 0); - filter = av_malloc(filterSize * dstW * sizeof(*filter)); - if (filterSize >= MAX_FILTER_SIZE * 16 / - ((flags & SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter) { - av_log(NULL, AV_LOG_ERROR, "sws: filterSize %d is too large, try less extreme scaling or increase MAX_FILTER_SIZE and recompile\n", filterSize); - goto fail; - } - *outFilterSize = filterSize; - - if (flags & SWS_PRINT_INFO) - av_log(NULL, AV_LOG_VERBOSE, - "SwScaler: reducing / aligning filtersize %d -> %d\n", - filter2Size, filterSize); - /* try to reduce the filter-size (step2 reduce it) */ - for (i = 0; i < dstW; i++) { - int j; - - for (j = 0; j < filterSize; j++) { - if (j >= filter2Size) - filter[i * filterSize + j] = 0; - else - filter[i * filterSize + j] = filter2[i * filter2Size + j]; - if ((flags & SWS_BITEXACT) && j >= minFilterSize) - filter[i * filterSize + j] = 0; - } - } - - // FIXME try to align filterPos if possible - - // fix borders - for (i = 0; i < dstW; i++) { - int j; - if ((*filterPos)[i] < 0) { - // move filter coefficients left to compensate for filterPos - for (j = 1; j < filterSize; j++) { - int left = FFMAX(j + (*filterPos)[i], 0); - filter[i * filterSize + left] += filter[i * filterSize + j]; - filter[i * filterSize + j] = 0; - } - (*filterPos)[i]= 0; - } - - if ((*filterPos)[i] + filterSize > srcW) { - int shift = (*filterPos)[i] + filterSize - srcW; - // move filter coefficients right to compensate for filterPos - for (j = filterSize - 2; j >= 0; j--) { - int right = FFMIN(j + shift, filterSize - 1); - filter[i * filterSize + right] += filter[i * filterSize + j]; - filter[i * filterSize + j] = 0; - } - (*filterPos)[i]= srcW - filterSize; - } - } - - // Note the +1 is for the MMX scaler which reads over the end - /* align at 16 for AltiVec (needed by hScale_altivec_real) */ - FF_ALLOCZ_OR_GOTO(NULL, *outFilter, - *outFilterSize * (dstW + 3) * sizeof(int16_t), fail); - - /* normalize & store in outFilter */ - for (i = 0; i < dstW; i++) { - int j; - int64_t error = 0; - int64_t sum = 0; - - for (j = 0; j < filterSize; j++) { - sum += filter[i * filterSize + j]; - } - sum = (sum + one / 2) / one; - if (!sum) { - av_log(NULL, AV_LOG_WARNING, "SwScaler: zero vector in scaling\n"); - sum = 1; - } - for (j = 0; j < *outFilterSize; j++) { - int64_t v = filter[i * filterSize + j] + error; - int intV = ROUNDED_DIV(v, sum); - (*outFilter)[i * (*outFilterSize) + j] = intV; - error = v - intV * sum; - } - } - - (*filterPos)[dstW + 0] = - (*filterPos)[dstW + 1] = - (*filterPos)[dstW + 2] = (*filterPos)[dstW - 1]; /* the MMX/SSE scaler will - * read over the end */ - for (i = 0; i < *outFilterSize; i++) { - int k = (dstW - 1) * (*outFilterSize) + i; - (*outFilter)[k + 1 * (*outFilterSize)] = - (*outFilter)[k + 2 * (*outFilterSize)] = - (*outFilter)[k + 3 * (*outFilterSize)] = (*outFilter)[k]; - } - - ret = 0; - -fail: - if(ret < 0) - av_log(NULL, AV_LOG_ERROR, "sws: initFilter failed\n"); - av_free(filter); - av_free(filter2); - return ret; -} - -#if HAVE_MMXEXT_INLINE -static av_cold int init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode, - int16_t *filter, int32_t *filterPos, - int numSplits) -{ - uint8_t *fragmentA; - x86_reg imm8OfPShufW1A; - x86_reg imm8OfPShufW2A; - x86_reg fragmentLengthA; - uint8_t *fragmentB; - x86_reg imm8OfPShufW1B; - x86_reg imm8OfPShufW2B; - x86_reg fragmentLengthB; - int fragmentPos; - - int xpos, i; - - // create an optimized horizontal scaling routine - /* This scaler is made of runtime-generated MMXEXT code using specially tuned - * pshufw instructions. For every four output pixels, if four input pixels - * are enough for the fast bilinear scaling, then a chunk of fragmentB is - * used. If five input pixels are needed, then a chunk of fragmentA is used. - */ - - // code fragment - - __asm__ volatile ( - "jmp 9f \n\t" - // Begin - "0: \n\t" - "movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t" - "movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t" - "movd 1(%%"REG_c", %%"REG_S"), %%mm1 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "pshufw $0xFF, %%mm1, %%mm1 \n\t" - "1: \n\t" - "pshufw $0xFF, %%mm0, %%mm0 \n\t" - "2: \n\t" - "psubw %%mm1, %%mm0 \n\t" - "movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t" - "pmullw %%mm3, %%mm0 \n\t" - "psllw $7, %%mm1 \n\t" - "paddw %%mm1, %%mm0 \n\t" - - "movq %%mm0, (%%"REG_D", %%"REG_a") \n\t" - - "add $8, %%"REG_a" \n\t" - // End - "9: \n\t" - // "int $3 \n\t" - "lea " LOCAL_MANGLE(0b) ", %0 \n\t" - "lea " LOCAL_MANGLE(1b) ", %1 \n\t" - "lea " LOCAL_MANGLE(2b) ", %2 \n\t" - "dec %1 \n\t" - "dec %2 \n\t" - "sub %0, %1 \n\t" - "sub %0, %2 \n\t" - "lea " LOCAL_MANGLE(9b) ", %3 \n\t" - "sub %0, %3 \n\t" - - - : "=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A), - "=r" (fragmentLengthA) - ); - - __asm__ volatile ( - "jmp 9f \n\t" - // Begin - "0: \n\t" - "movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t" - "movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "pshufw $0xFF, %%mm0, %%mm1 \n\t" - "1: \n\t" - "pshufw $0xFF, %%mm0, %%mm0 \n\t" - "2: \n\t" - "psubw %%mm1, %%mm0 \n\t" - "movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t" - "pmullw %%mm3, %%mm0 \n\t" - "psllw $7, %%mm1 \n\t" - "paddw %%mm1, %%mm0 \n\t" - - "movq %%mm0, (%%"REG_D", %%"REG_a") \n\t" - - "add $8, %%"REG_a" \n\t" - // End - "9: \n\t" - // "int $3 \n\t" - "lea " LOCAL_MANGLE(0b) ", %0 \n\t" - "lea " LOCAL_MANGLE(1b) ", %1 \n\t" - "lea " LOCAL_MANGLE(2b) ", %2 \n\t" - "dec %1 \n\t" - "dec %2 \n\t" - "sub %0, %1 \n\t" - "sub %0, %2 \n\t" - "lea " LOCAL_MANGLE(9b) ", %3 \n\t" - "sub %0, %3 \n\t" - - - : "=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B), - "=r" (fragmentLengthB) - ); - - xpos = 0; // lumXInc/2 - 0x8000; // difference between pixel centers - fragmentPos = 0; - - for (i = 0; i < dstW / numSplits; i++) { - int xx = xpos >> 16; - - if ((i & 3) == 0) { - int a = 0; - int b = ((xpos + xInc) >> 16) - xx; - int c = ((xpos + xInc * 2) >> 16) - xx; - int d = ((xpos + xInc * 3) >> 16) - xx; - int inc = (d + 1 < 4); - uint8_t *fragment = inc ? fragmentB : fragmentA; - x86_reg imm8OfPShufW1 = inc ? imm8OfPShufW1B : imm8OfPShufW1A; - x86_reg imm8OfPShufW2 = inc ? imm8OfPShufW2B : imm8OfPShufW2A; - x86_reg fragmentLength = inc ? fragmentLengthB : fragmentLengthA; - int maxShift = 3 - (d + inc); - int shift = 0; - - if (filterCode) { - filter[i] = ((xpos & 0xFFFF) ^ 0xFFFF) >> 9; - filter[i + 1] = (((xpos + xInc) & 0xFFFF) ^ 0xFFFF) >> 9; - filter[i + 2] = (((xpos + xInc * 2) & 0xFFFF) ^ 0xFFFF) >> 9; - filter[i + 3] = (((xpos + xInc * 3) & 0xFFFF) ^ 0xFFFF) >> 9; - filterPos[i / 2] = xx; - - memcpy(filterCode + fragmentPos, fragment, fragmentLength); - - filterCode[fragmentPos + imm8OfPShufW1] = (a + inc) | - ((b + inc) << 2) | - ((c + inc) << 4) | - ((d + inc) << 6); - filterCode[fragmentPos + imm8OfPShufW2] = a | (b << 2) | - (c << 4) | - (d << 6); - - if (i + 4 - inc >= dstW) - shift = maxShift; // avoid overread - else if ((filterPos[i / 2] & 3) <= maxShift) - shift = filterPos[i / 2] & 3; // align - - if (shift && i >= shift) { - filterCode[fragmentPos + imm8OfPShufW1] += 0x55 * shift; - filterCode[fragmentPos + imm8OfPShufW2] += 0x55 * shift; - filterPos[i / 2] -= shift; - } - } - - fragmentPos += fragmentLength; - - if (filterCode) - filterCode[fragmentPos] = RET; - } - xpos += xInc; - } - if (filterCode) - filterPos[((i / 2) + 1) & (~1)] = xpos >> 16; // needed to jump to the next part - - return fragmentPos + 1; -} -#endif /* HAVE_MMXEXT_INLINE */ - -static void fill_rgb2yuv_table(SwsContext *c, const int table[4], int dstRange) -{ - int64_t W, V, Z, Cy, Cu, Cv; - int64_t vr = table[0]; - int64_t ub = table[1]; - int64_t ug = -table[2]; - int64_t vg = -table[3]; - int64_t ONE = 65536; - int64_t cy = ONE; - uint8_t *p = (uint8_t*)c->input_rgb2yuv_table; - int i; - static const int8_t map[] = { - BY_IDX, GY_IDX, -1 , BY_IDX, BY_IDX, GY_IDX, -1 , BY_IDX, - RY_IDX, -1 , GY_IDX, RY_IDX, RY_IDX, -1 , GY_IDX, RY_IDX, - RY_IDX, GY_IDX, -1 , RY_IDX, RY_IDX, GY_IDX, -1 , RY_IDX, - BY_IDX, -1 , GY_IDX, BY_IDX, BY_IDX, -1 , GY_IDX, BY_IDX, - BU_IDX, GU_IDX, -1 , BU_IDX, BU_IDX, GU_IDX, -1 , BU_IDX, - RU_IDX, -1 , GU_IDX, RU_IDX, RU_IDX, -1 , GU_IDX, RU_IDX, - RU_IDX, GU_IDX, -1 , RU_IDX, RU_IDX, GU_IDX, -1 , RU_IDX, - BU_IDX, -1 , GU_IDX, BU_IDX, BU_IDX, -1 , GU_IDX, BU_IDX, - BV_IDX, GV_IDX, -1 , BV_IDX, BV_IDX, GV_IDX, -1 , BV_IDX, - RV_IDX, -1 , GV_IDX, RV_IDX, RV_IDX, -1 , GV_IDX, RV_IDX, - RV_IDX, GV_IDX, -1 , RV_IDX, RV_IDX, GV_IDX, -1 , RV_IDX, - BV_IDX, -1 , GV_IDX, BV_IDX, BV_IDX, -1 , GV_IDX, BV_IDX, - RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, - BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, - GY_IDX, -1 , GY_IDX, -1 , GY_IDX, -1 , GY_IDX, -1 , - -1 , GY_IDX, -1 , GY_IDX, -1 , GY_IDX, -1 , GY_IDX, - RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, - BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, - GU_IDX, -1 , GU_IDX, -1 , GU_IDX, -1 , GU_IDX, -1 , - -1 , GU_IDX, -1 , GU_IDX, -1 , GU_IDX, -1 , GU_IDX, - RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, - BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, - GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 , - -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, //23 - -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //24 - -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //25 - -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //26 - -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //27 - -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //28 - -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //29 - -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //30 - -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //31 - BY_IDX, GY_IDX, RY_IDX, -1 , -1 , -1 , -1 , -1 , //32 - BU_IDX, GU_IDX, RU_IDX, -1 , -1 , -1 , -1 , -1 , //33 - BV_IDX, GV_IDX, RV_IDX, -1 , -1 , -1 , -1 , -1 , //34 - }; - - dstRange = 0; //FIXME range = 1 is handled elsewhere - - if (!dstRange) { - cy = cy * 255 / 219; - } else { - vr = vr * 224 / 255; - ub = ub * 224 / 255; - ug = ug * 224 / 255; - vg = vg * 224 / 255; - } - W = ROUNDED_DIV(ONE*ONE*ug, ub); - V = ROUNDED_DIV(ONE*ONE*vg, vr); - Z = ONE*ONE-W-V; - - Cy = ROUNDED_DIV(cy*Z, ONE); - Cu = ROUNDED_DIV(ub*Z, ONE); - Cv = ROUNDED_DIV(vr*Z, ONE); - - c->input_rgb2yuv_table[RY_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*V , Cy); - c->input_rgb2yuv_table[GY_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE , Cy); - c->input_rgb2yuv_table[BY_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*W , Cy); - - c->input_rgb2yuv_table[RU_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*V , Cu); - c->input_rgb2yuv_table[GU_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE , Cu); - c->input_rgb2yuv_table[BU_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*(Z+W) , Cu); - - c->input_rgb2yuv_table[RV_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*(V+Z) , Cv); - c->input_rgb2yuv_table[GV_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE , Cv); - c->input_rgb2yuv_table[BV_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*W , Cv); - - if(/*!dstRange && */!memcmp(table, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], sizeof(ff_yuv2rgb_coeffs[SWS_CS_DEFAULT]))) { - c->input_rgb2yuv_table[BY_IDX] = ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); - c->input_rgb2yuv_table[BV_IDX] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); - c->input_rgb2yuv_table[BU_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); - c->input_rgb2yuv_table[GY_IDX] = ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); - c->input_rgb2yuv_table[GV_IDX] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); - c->input_rgb2yuv_table[GU_IDX] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); - c->input_rgb2yuv_table[RY_IDX] = ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); - c->input_rgb2yuv_table[RV_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); - c->input_rgb2yuv_table[RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); - } - for(i=0; i<FF_ARRAY_ELEMS(map); i++) - AV_WL16(p + 16*4 + 2*i, map[i] >= 0 ? c->input_rgb2yuv_table[map[i]] : 0); -} - -static void fill_xyztables(struct SwsContext *c) -{ - int i; - double xyzgamma = XYZ_GAMMA; - double rgbgamma = 1.0 / RGB_GAMMA; - double xyzgammainv = 1.0 / XYZ_GAMMA; - double rgbgammainv = RGB_GAMMA; - static const int16_t xyz2rgb_matrix[3][4] = { - {13270, -6295, -2041}, - {-3969, 7682, 170}, - { 228, -835, 4329} }; - static const int16_t rgb2xyz_matrix[3][4] = { - {1689, 1464, 739}, - { 871, 2929, 296}, - { 79, 488, 3891} }; - static int16_t xyzgamma_tab[4096], rgbgamma_tab[4096], xyzgammainv_tab[4096], rgbgammainv_tab[4096]; - - memcpy(c->xyz2rgb_matrix, xyz2rgb_matrix, sizeof(c->xyz2rgb_matrix)); - memcpy(c->rgb2xyz_matrix, rgb2xyz_matrix, sizeof(c->rgb2xyz_matrix)); - c->xyzgamma = xyzgamma_tab; - c->rgbgamma = rgbgamma_tab; - c->xyzgammainv = xyzgammainv_tab; - c->rgbgammainv = rgbgammainv_tab; - - if (rgbgamma_tab[4095]) - return; - - /* set gamma vectors */ - for (i = 0; i < 4096; i++) { - xyzgamma_tab[i] = lrint(pow(i / 4095.0, xyzgamma) * 4095.0); - rgbgamma_tab[i] = lrint(pow(i / 4095.0, rgbgamma) * 4095.0); - xyzgammainv_tab[i] = lrint(pow(i / 4095.0, xyzgammainv) * 4095.0); - rgbgammainv_tab[i] = lrint(pow(i / 4095.0, rgbgammainv) * 4095.0); - } -} - -int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4], - int srcRange, const int table[4], int dstRange, - int brightness, int contrast, int saturation) -{ - const AVPixFmtDescriptor *desc_dst; - const AVPixFmtDescriptor *desc_src; - memmove(c->srcColorspaceTable, inv_table, sizeof(int) * 4); - memmove(c->dstColorspaceTable, table, sizeof(int) * 4); - - handle_formats(c); - desc_dst = av_pix_fmt_desc_get(c->dstFormat); - desc_src = av_pix_fmt_desc_get(c->srcFormat); - - if(!isYUV(c->dstFormat) && !isGray(c->dstFormat)) - dstRange = 0; - if(!isYUV(c->srcFormat) && !isGray(c->srcFormat)) - srcRange = 0; - - c->brightness = brightness; - c->contrast = contrast; - c->saturation = saturation; - c->srcRange = srcRange; - c->dstRange = dstRange; - - if ((isYUV(c->dstFormat) || isGray(c->dstFormat)) && (isYUV(c->srcFormat) || isGray(c->srcFormat))) - return -1; - - c->dstFormatBpp = av_get_bits_per_pixel(desc_dst); - c->srcFormatBpp = av_get_bits_per_pixel(desc_src); - - if (!isYUV(c->dstFormat) && !isGray(c->dstFormat)) { - ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, - contrast, saturation); - // FIXME factorize - - if (ARCH_PPC) - ff_yuv2rgb_init_tables_ppc(c, inv_table, brightness, - contrast, saturation); - } - - fill_rgb2yuv_table(c, table, dstRange); - - return 0; -} - -int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table, - int *srcRange, int **table, int *dstRange, - int *brightness, int *contrast, int *saturation) -{ - if (!c ) - return -1; - - *inv_table = c->srcColorspaceTable; - *table = c->dstColorspaceTable; - *srcRange = c->srcRange; - *dstRange = c->dstRange; - *brightness = c->brightness; - *contrast = c->contrast; - *saturation = c->saturation; - - return 0; -} - -static int handle_jpeg(enum AVPixelFormat *format) -{ - switch (*format) { - case AV_PIX_FMT_YUVJ420P: - *format = AV_PIX_FMT_YUV420P; - return 1; - case AV_PIX_FMT_YUVJ411P: - *format = AV_PIX_FMT_YUV411P; - return 1; - case AV_PIX_FMT_YUVJ422P: - *format = AV_PIX_FMT_YUV422P; - return 1; - case AV_PIX_FMT_YUVJ444P: - *format = AV_PIX_FMT_YUV444P; - return 1; - case AV_PIX_FMT_YUVJ440P: - *format = AV_PIX_FMT_YUV440P; - return 1; - case AV_PIX_FMT_GRAY8: - return 1; - default: - return 0; - } -} - -static int handle_0alpha(enum AVPixelFormat *format) -{ - switch (*format) { - case AV_PIX_FMT_0BGR : *format = AV_PIX_FMT_ABGR ; return 1; - case AV_PIX_FMT_BGR0 : *format = AV_PIX_FMT_BGRA ; return 4; - case AV_PIX_FMT_0RGB : *format = AV_PIX_FMT_ARGB ; return 1; - case AV_PIX_FMT_RGB0 : *format = AV_PIX_FMT_RGBA ; return 4; - default: return 0; - } -} - -static int handle_xyz(enum AVPixelFormat *format) -{ - switch (*format) { - case AV_PIX_FMT_XYZ12BE : *format = AV_PIX_FMT_RGB48BE; return 1; - case AV_PIX_FMT_XYZ12LE : *format = AV_PIX_FMT_RGB48LE; return 1; - default: return 0; - } -} - -static void handle_formats(SwsContext *c) -{ - c->src0Alpha |= handle_0alpha(&c->srcFormat); - c->dst0Alpha |= handle_0alpha(&c->dstFormat); - c->srcXYZ |= handle_xyz(&c->srcFormat); - c->dstXYZ |= handle_xyz(&c->dstFormat); - if (c->srcXYZ || c->dstXYZ) - fill_xyztables(c); -} - -SwsContext *sws_alloc_context(void) -{ - SwsContext *c = av_mallocz(sizeof(SwsContext)); - - if (c) { - c->av_class = &sws_context_class; - av_opt_set_defaults(c); - } - - return c; -} - -av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, - SwsFilter *dstFilter) -{ - int i, j; - int usesVFilter, usesHFilter; - int unscaled; - SwsFilter dummyFilter = { NULL, NULL, NULL, NULL }; - int srcW = c->srcW; - int srcH = c->srcH; - int dstW = c->dstW; - int dstH = c->dstH; - int dst_stride = FFALIGN(dstW * sizeof(int16_t) + 66, 16); - int flags, cpu_flags; - enum AVPixelFormat srcFormat = c->srcFormat; - enum AVPixelFormat dstFormat = c->dstFormat; - const AVPixFmtDescriptor *desc_src; - const AVPixFmtDescriptor *desc_dst; - - cpu_flags = av_get_cpu_flags(); - flags = c->flags; - emms_c(); - if (!rgb15to16) - sws_rgb2rgb_init(); - - unscaled = (srcW == dstW && srcH == dstH); - - c->srcRange |= handle_jpeg(&c->srcFormat); - c->dstRange |= handle_jpeg(&c->dstFormat); - - if(srcFormat!=c->srcFormat || dstFormat!=c->dstFormat) - av_log(c, AV_LOG_WARNING, "deprecated pixel format used, make sure you did set range correctly\n"); - - if (!c->contrast && !c->saturation && !c->dstFormatBpp) - sws_setColorspaceDetails(c, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], c->srcRange, - ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], - c->dstRange, 0, 1 << 16, 1 << 16); - - handle_formats(c); - srcFormat = c->srcFormat; - dstFormat = c->dstFormat; - desc_src = av_pix_fmt_desc_get(srcFormat); - desc_dst = av_pix_fmt_desc_get(dstFormat); - - if (!(unscaled && sws_isSupportedEndiannessConversion(srcFormat) && - av_pix_fmt_swap_endianness(srcFormat) == dstFormat)) { - if (!sws_isSupportedInput(srcFormat)) { - av_log(c, AV_LOG_ERROR, "%s is not supported as input pixel format\n", - av_get_pix_fmt_name(srcFormat)); - return AVERROR(EINVAL); - } - if (!sws_isSupportedOutput(dstFormat)) { - av_log(c, AV_LOG_ERROR, "%s is not supported as output pixel format\n", - av_get_pix_fmt_name(dstFormat)); - return AVERROR(EINVAL); - } - } - - i = flags & (SWS_POINT | - SWS_AREA | - SWS_BILINEAR | - SWS_FAST_BILINEAR | - SWS_BICUBIC | - SWS_X | - SWS_GAUSS | - SWS_LANCZOS | - SWS_SINC | - SWS_SPLINE | - SWS_BICUBLIN); - - /* provide a default scaler if not set by caller */ - if (!i) { - if (dstW < srcW && dstH < srcH) - flags |= SWS_BICUBIC; - else if (dstW > srcW && dstH > srcH) - flags |= SWS_BICUBIC; - else - flags |= SWS_BICUBIC; - c->flags = flags; - } else if (i & (i - 1)) { - av_log(c, AV_LOG_ERROR, - "Exactly one scaler algorithm must be chosen, got %X\n", i); - return AVERROR(EINVAL); - } - /* sanity check */ - if (srcW < 1 || srcH < 1 || dstW < 1 || dstH < 1) { - /* FIXME check if these are enough and try to lower them after - * fixing the relevant parts of the code */ - av_log(c, AV_LOG_ERROR, "%dx%d -> %dx%d is invalid scaling dimension\n", - srcW, srcH, dstW, dstH); - return AVERROR(EINVAL); - } - - if (!dstFilter) - dstFilter = &dummyFilter; - if (!srcFilter) - srcFilter = &dummyFilter; - - c->lumXInc = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW; - c->lumYInc = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH; - c->dstFormatBpp = av_get_bits_per_pixel(desc_dst); - c->srcFormatBpp = av_get_bits_per_pixel(desc_src); - c->vRounder = 4 * 0x0001000100010001ULL; - - usesVFilter = (srcFilter->lumV && srcFilter->lumV->length > 1) || - (srcFilter->chrV && srcFilter->chrV->length > 1) || - (dstFilter->lumV && dstFilter->lumV->length > 1) || - (dstFilter->chrV && dstFilter->chrV->length > 1); - usesHFilter = (srcFilter->lumH && srcFilter->lumH->length > 1) || - (srcFilter->chrH && srcFilter->chrH->length > 1) || - (dstFilter->lumH && dstFilter->lumH->length > 1) || - (dstFilter->chrH && dstFilter->chrH->length > 1); - - av_pix_fmt_get_chroma_sub_sample(srcFormat, &c->chrSrcHSubSample, &c->chrSrcVSubSample); - av_pix_fmt_get_chroma_sub_sample(dstFormat, &c->chrDstHSubSample, &c->chrDstVSubSample); - - if (isAnyRGB(dstFormat) && !(flags&SWS_FULL_CHR_H_INT)) { - if (dstW&1) { - av_log(c, AV_LOG_DEBUG, "Forcing full internal H chroma due to odd output size\n"); - flags |= SWS_FULL_CHR_H_INT; - c->flags = flags; - } - - if ( c->chrSrcHSubSample == 0 - && c->chrSrcVSubSample == 0 - && c->dither != SWS_DITHER_BAYER //SWS_FULL_CHR_H_INT is currently not supported with SWS_DITHER_BAYER - && !(c->flags & SWS_FAST_BILINEAR) - ) { - av_log(c, AV_LOG_DEBUG, "Forcing full internal H chroma due to input having non subsampled chroma\n"); - flags |= SWS_FULL_CHR_H_INT; - c->flags = flags; - } - } - - if (c->dither == SWS_DITHER_AUTO) { - if (flags & SWS_ERROR_DIFFUSION) - c->dither = SWS_DITHER_ED; - } - - if(dstFormat == AV_PIX_FMT_BGR4_BYTE || - dstFormat == AV_PIX_FMT_RGB4_BYTE || - dstFormat == AV_PIX_FMT_BGR8 || - dstFormat == AV_PIX_FMT_RGB8) { - if (c->dither == SWS_DITHER_AUTO) - c->dither = (flags & SWS_FULL_CHR_H_INT) ? SWS_DITHER_ED : SWS_DITHER_BAYER; - if (!(flags & SWS_FULL_CHR_H_INT)) { - if (c->dither == SWS_DITHER_ED) { - av_log(c, AV_LOG_DEBUG, - "Desired dithering only supported in full chroma interpolation for destination format '%s'\n", - av_get_pix_fmt_name(dstFormat)); - flags |= SWS_FULL_CHR_H_INT; - c->flags = flags; - } - } - if (flags & SWS_FULL_CHR_H_INT) { - if (c->dither == SWS_DITHER_BAYER) { - av_log(c, AV_LOG_DEBUG, - "Ordered dither is not supported in full chroma interpolation for destination format '%s'\n", - av_get_pix_fmt_name(dstFormat)); - c->dither = SWS_DITHER_ED; - } - } - } - if (isPlanarRGB(dstFormat)) { - if (!(flags & SWS_FULL_CHR_H_INT)) { - av_log(c, AV_LOG_DEBUG, - "%s output is not supported with half chroma resolution, switching to full\n", - av_get_pix_fmt_name(dstFormat)); - flags |= SWS_FULL_CHR_H_INT; - c->flags = flags; - } - } - - /* reuse chroma for 2 pixels RGB/BGR unless user wants full - * chroma interpolation */ - if (flags & SWS_FULL_CHR_H_INT && - isAnyRGB(dstFormat) && - !isPlanarRGB(dstFormat) && - dstFormat != AV_PIX_FMT_RGBA && - dstFormat != AV_PIX_FMT_ARGB && - dstFormat != AV_PIX_FMT_BGRA && - dstFormat != AV_PIX_FMT_ABGR && - dstFormat != AV_PIX_FMT_RGB24 && - dstFormat != AV_PIX_FMT_BGR24 && - dstFormat != AV_PIX_FMT_BGR4_BYTE && - dstFormat != AV_PIX_FMT_RGB4_BYTE && - dstFormat != AV_PIX_FMT_BGR8 && - dstFormat != AV_PIX_FMT_RGB8 - ) { - av_log(c, AV_LOG_WARNING, - "full chroma interpolation for destination format '%s' not yet implemented\n", - av_get_pix_fmt_name(dstFormat)); - flags &= ~SWS_FULL_CHR_H_INT; - c->flags = flags; - } - if (isAnyRGB(dstFormat) && !(flags & SWS_FULL_CHR_H_INT)) - c->chrDstHSubSample = 1; - - // drop some chroma lines if the user wants it - c->vChrDrop = (flags & SWS_SRC_V_CHR_DROP_MASK) >> - SWS_SRC_V_CHR_DROP_SHIFT; - c->chrSrcVSubSample += c->vChrDrop; - - /* drop every other pixel for chroma calculation unless user - * wants full chroma */ - if (isAnyRGB(srcFormat) && !(flags & SWS_FULL_CHR_H_INP) && - srcFormat != AV_PIX_FMT_RGB8 && srcFormat != AV_PIX_FMT_BGR8 && - srcFormat != AV_PIX_FMT_RGB4 && srcFormat != AV_PIX_FMT_BGR4 && - srcFormat != AV_PIX_FMT_RGB4_BYTE && srcFormat != AV_PIX_FMT_BGR4_BYTE && - srcFormat != AV_PIX_FMT_GBRP9BE && srcFormat != AV_PIX_FMT_GBRP9LE && - srcFormat != AV_PIX_FMT_GBRP10BE && srcFormat != AV_PIX_FMT_GBRP10LE && - srcFormat != AV_PIX_FMT_GBRP12BE && srcFormat != AV_PIX_FMT_GBRP12LE && - srcFormat != AV_PIX_FMT_GBRP14BE && srcFormat != AV_PIX_FMT_GBRP14LE && - srcFormat != AV_PIX_FMT_GBRP16BE && srcFormat != AV_PIX_FMT_GBRP16LE && - ((dstW >> c->chrDstHSubSample) <= (srcW >> 1) || - (flags & SWS_FAST_BILINEAR))) - c->chrSrcHSubSample = 1; - - // Note the FF_CEIL_RSHIFT is so that we always round toward +inf. - c->chrSrcW = FF_CEIL_RSHIFT(srcW, c->chrSrcHSubSample); - c->chrSrcH = FF_CEIL_RSHIFT(srcH, c->chrSrcVSubSample); - c->chrDstW = FF_CEIL_RSHIFT(dstW, c->chrDstHSubSample); - c->chrDstH = FF_CEIL_RSHIFT(dstH, c->chrDstVSubSample); - - FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail); - - /* unscaled special cases */ - if (unscaled && !usesHFilter && !usesVFilter && - (c->srcRange == c->dstRange || isAnyRGB(dstFormat))) { - ff_get_unscaled_swscale(c); - - if (c->swscale) { - if (flags & SWS_PRINT_INFO) - av_log(c, AV_LOG_INFO, - "using unscaled %s -> %s special converter\n", - av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); - return 0; - } - } - - c->srcBpc = 1 + desc_src->comp[0].depth_minus1; - if (c->srcBpc < 8) - c->srcBpc = 8; - c->dstBpc = 1 + desc_dst->comp[0].depth_minus1; - if (c->dstBpc < 8) - c->dstBpc = 8; - if (isAnyRGB(srcFormat) || srcFormat == AV_PIX_FMT_PAL8) - c->srcBpc = 16; - if (c->dstBpc == 16) - dst_stride <<= 1; - - if (INLINE_MMXEXT(cpu_flags) && c->srcBpc == 8 && c->dstBpc <= 14) { - c->canMMXEXTBeUsed = dstW >= srcW && (dstW & 31) == 0 && - c->chrDstW >= c->chrSrcW && - (srcW & 15) == 0; - if (!c->canMMXEXTBeUsed && dstW >= srcW && c->chrDstW >= c->chrSrcW && (srcW & 15) == 0 - - && (flags & SWS_FAST_BILINEAR)) { - if (flags & SWS_PRINT_INFO) - av_log(c, AV_LOG_INFO, - "output width is not a multiple of 32 -> no MMXEXT scaler\n"); - } - if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat) || isAnyRGB(c->srcFormat)) - c->canMMXEXTBeUsed = 0; - } else - c->canMMXEXTBeUsed = 0; - - c->chrXInc = (((int64_t)c->chrSrcW << 16) + (c->chrDstW >> 1)) / c->chrDstW; - c->chrYInc = (((int64_t)c->chrSrcH << 16) + (c->chrDstH >> 1)) / c->chrDstH; - - /* Match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src - * to pixel n-2 of dst, but only for the FAST_BILINEAR mode otherwise do - * correct scaling. - * n-2 is the last chrominance sample available. - * This is not perfect, but no one should notice the difference, the more - * correct variant would be like the vertical one, but that would require - * some special code for the first and last pixel */ - if (flags & SWS_FAST_BILINEAR) { - if (c->canMMXEXTBeUsed) { - c->lumXInc += 20; - c->chrXInc += 20; - } - // we don't use the x86 asm scaler if MMX is available - else if (INLINE_MMX(cpu_flags) && c->dstBpc <= 14) { - c->lumXInc = ((int64_t)(srcW - 2) << 16) / (dstW - 2) - 20; - c->chrXInc = ((int64_t)(c->chrSrcW - 2) << 16) / (c->chrDstW - 2) - 20; - } - } - -#define USE_MMAP (HAVE_MMAP && HAVE_MPROTECT && defined MAP_ANONYMOUS) - - /* precalculate horizontal scaler filter coefficients */ - { -#if HAVE_MMXEXT_INLINE -// can't downscale !!! - if (c->canMMXEXTBeUsed && (flags & SWS_FAST_BILINEAR)) { - c->lumMmxextFilterCodeSize = init_hscaler_mmxext(dstW, c->lumXInc, NULL, - NULL, NULL, 8); - c->chrMmxextFilterCodeSize = init_hscaler_mmxext(c->chrDstW, c->chrXInc, - NULL, NULL, NULL, 4); - -#if USE_MMAP - c->lumMmxextFilterCode = mmap(NULL, c->lumMmxextFilterCodeSize, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, - -1, 0); - c->chrMmxextFilterCode = mmap(NULL, c->chrMmxextFilterCodeSize, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, - -1, 0); -#elif HAVE_VIRTUALALLOC - c->lumMmxextFilterCode = VirtualAlloc(NULL, - c->lumMmxextFilterCodeSize, - MEM_COMMIT, - PAGE_EXECUTE_READWRITE); - c->chrMmxextFilterCode = VirtualAlloc(NULL, - c->chrMmxextFilterCodeSize, - MEM_COMMIT, - PAGE_EXECUTE_READWRITE); -#else - c->lumMmxextFilterCode = av_malloc(c->lumMmxextFilterCodeSize); - c->chrMmxextFilterCode = av_malloc(c->chrMmxextFilterCodeSize); -#endif - -#ifdef MAP_ANONYMOUS - if (c->lumMmxextFilterCode == MAP_FAILED || c->chrMmxextFilterCode == MAP_FAILED) -#else - if (!c->lumMmxextFilterCode || !c->chrMmxextFilterCode) -#endif - { - av_log(c, AV_LOG_ERROR, "Failed to allocate MMX2FilterCode\n"); - return AVERROR(ENOMEM); - } - - FF_ALLOCZ_OR_GOTO(c, c->hLumFilter, (dstW / 8 + 8) * sizeof(int16_t), fail); - FF_ALLOCZ_OR_GOTO(c, c->hChrFilter, (c->chrDstW / 4 + 8) * sizeof(int16_t), fail); - FF_ALLOCZ_OR_GOTO(c, c->hLumFilterPos, (dstW / 2 / 8 + 8) * sizeof(int32_t), fail); - FF_ALLOCZ_OR_GOTO(c, c->hChrFilterPos, (c->chrDstW / 2 / 4 + 8) * sizeof(int32_t), fail); - - init_hscaler_mmxext( dstW, c->lumXInc, c->lumMmxextFilterCode, - c->hLumFilter, (uint32_t*)c->hLumFilterPos, 8); - init_hscaler_mmxext(c->chrDstW, c->chrXInc, c->chrMmxextFilterCode, - c->hChrFilter, (uint32_t*)c->hChrFilterPos, 4); - -#if USE_MMAP - if ( mprotect(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize, PROT_EXEC | PROT_READ) == -1 - || mprotect(c->chrMmxextFilterCode, c->chrMmxextFilterCodeSize, PROT_EXEC | PROT_READ) == -1) { - av_log(c, AV_LOG_ERROR, "mprotect failed, cannot use fast bilinear scaler\n"); - goto fail; - } -#endif - } else -#endif /* HAVE_MMXEXT_INLINE */ - { - const int filterAlign = X86_MMX(cpu_flags) ? 4 : - PPC_ALTIVEC(cpu_flags) ? 8 : 1; - - if (initFilter(&c->hLumFilter, &c->hLumFilterPos, - &c->hLumFilterSize, c->lumXInc, - srcW, dstW, filterAlign, 1 << 14, - (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags, - cpu_flags, srcFilter->lumH, dstFilter->lumH, - c->param, - get_local_pos(c, 0, 0, 0), - get_local_pos(c, 0, 0, 0)) < 0) - goto fail; - if (initFilter(&c->hChrFilter, &c->hChrFilterPos, - &c->hChrFilterSize, c->chrXInc, - c->chrSrcW, c->chrDstW, filterAlign, 1 << 14, - (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags, - cpu_flags, srcFilter->chrH, dstFilter->chrH, - c->param, - get_local_pos(c, c->chrSrcHSubSample, c->src_h_chr_pos, 0), - get_local_pos(c, c->chrDstHSubSample, c->dst_h_chr_pos, 0)) < 0) - goto fail; - } - } // initialize horizontal stuff - - /* precalculate vertical scaler filter coefficients */ - { - const int filterAlign = X86_MMX(cpu_flags) ? 2 : - PPC_ALTIVEC(cpu_flags) ? 8 : 1; - - if (initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, - c->lumYInc, srcH, dstH, filterAlign, (1 << 12), - (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags, - cpu_flags, srcFilter->lumV, dstFilter->lumV, - c->param, - get_local_pos(c, 0, 0, 1), - get_local_pos(c, 0, 0, 1)) < 0) - goto fail; - if (initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, - c->chrYInc, c->chrSrcH, c->chrDstH, - filterAlign, (1 << 12), - (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags, - cpu_flags, srcFilter->chrV, dstFilter->chrV, - c->param, - get_local_pos(c, c->chrSrcVSubSample, c->src_v_chr_pos, 1), - get_local_pos(c, c->chrDstVSubSample, c->dst_v_chr_pos, 1)) < 0) - - goto fail; - -#if HAVE_ALTIVEC - FF_ALLOC_OR_GOTO(c, c->vYCoeffsBank, sizeof(vector signed short) * c->vLumFilterSize * c->dstH, fail); - FF_ALLOC_OR_GOTO(c, c->vCCoeffsBank, sizeof(vector signed short) * c->vChrFilterSize * c->chrDstH, fail); - - for (i = 0; i < c->vLumFilterSize * c->dstH; i++) { - int j; - short *p = (short *)&c->vYCoeffsBank[i]; - for (j = 0; j < 8; j++) - p[j] = c->vLumFilter[i]; - } - - for (i = 0; i < c->vChrFilterSize * c->chrDstH; i++) { - int j; - short *p = (short *)&c->vCCoeffsBank[i]; - for (j = 0; j < 8; j++) - p[j] = c->vChrFilter[i]; - } -#endif - } - - // calculate buffer sizes so that they won't run out while handling these damn slices - c->vLumBufSize = c->vLumFilterSize; - c->vChrBufSize = c->vChrFilterSize; - for (i = 0; i < dstH; i++) { - int chrI = (int64_t)i * c->chrDstH / dstH; - int nextSlice = FFMAX(c->vLumFilterPos[i] + c->vLumFilterSize - 1, - ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1) - << c->chrSrcVSubSample)); - - nextSlice >>= c->chrSrcVSubSample; - nextSlice <<= c->chrSrcVSubSample; - if (c->vLumFilterPos[i] + c->vLumBufSize < nextSlice) - c->vLumBufSize = nextSlice - c->vLumFilterPos[i]; - if (c->vChrFilterPos[chrI] + c->vChrBufSize < - (nextSlice >> c->chrSrcVSubSample)) - c->vChrBufSize = (nextSlice >> c->chrSrcVSubSample) - - c->vChrFilterPos[chrI]; - } - - for (i = 0; i < 4; i++) - FF_ALLOCZ_OR_GOTO(c, c->dither_error[i], (c->dstW+2) * sizeof(int), fail); - - /* Allocate pixbufs (we use dynamic allocation because otherwise we would - * need to allocate several megabytes to handle all possible cases) */ - FF_ALLOC_OR_GOTO(c, c->lumPixBuf, c->vLumBufSize * 3 * sizeof(int16_t *), fail); - FF_ALLOC_OR_GOTO(c, c->chrUPixBuf, c->vChrBufSize * 3 * sizeof(int16_t *), fail); - FF_ALLOC_OR_GOTO(c, c->chrVPixBuf, c->vChrBufSize * 3 * sizeof(int16_t *), fail); - if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) - FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf, c->vLumBufSize * 3 * sizeof(int16_t *), fail); - /* Note we need at least one pixel more at the end because of the MMX code - * (just in case someone wants to replace the 4000/8000). */ - /* align at 16 bytes for AltiVec */ - for (i = 0; i < c->vLumBufSize; i++) { - FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf[i + c->vLumBufSize], - dst_stride + 16, fail); - c->lumPixBuf[i] = c->lumPixBuf[i + c->vLumBufSize]; - } - // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate) - c->uv_off = (dst_stride>>1) + 64 / (c->dstBpc &~ 7); - c->uv_offx2 = dst_stride + 16; - for (i = 0; i < c->vChrBufSize; i++) { - FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i + c->vChrBufSize], - dst_stride * 2 + 32, fail); - c->chrUPixBuf[i] = c->chrUPixBuf[i + c->vChrBufSize]; - c->chrVPixBuf[i] = c->chrVPixBuf[i + c->vChrBufSize] - = c->chrUPixBuf[i] + (dst_stride >> 1) + 8; - } - if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) - for (i = 0; i < c->vLumBufSize; i++) { - FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf[i + c->vLumBufSize], - dst_stride + 16, fail); - c->alpPixBuf[i] = c->alpPixBuf[i + c->vLumBufSize]; - } - - // try to avoid drawing green stuff between the right end and the stride end - for (i = 0; i < c->vChrBufSize; i++) - if(desc_dst->comp[0].depth_minus1 == 15){ - av_assert0(c->dstBpc > 14); - for(j=0; j<dst_stride/2+1; j++) - ((int32_t*)(c->chrUPixBuf[i]))[j] = 1<<18; - } else - for(j=0; j<dst_stride+1; j++) - ((int16_t*)(c->chrUPixBuf[i]))[j] = 1<<14; - - av_assert0(c->chrDstH <= dstH); - - if (flags & SWS_PRINT_INFO) { - const char *scaler = NULL, *cpucaps; - - for (i = 0; i < FF_ARRAY_ELEMS(scale_algorithms); i++) { - if (flags & scale_algorithms[i].flag) { - scaler = scale_algorithms[i].description; - break; - } - } - if (!scaler) - scaler = "ehh flags invalid?!"; - av_log(c, AV_LOG_INFO, "%s scaler, from %s to %s%s ", - scaler, - av_get_pix_fmt_name(srcFormat), -#ifdef DITHER1XBPP - dstFormat == AV_PIX_FMT_BGR555 || dstFormat == AV_PIX_FMT_BGR565 || - dstFormat == AV_PIX_FMT_RGB444BE || dstFormat == AV_PIX_FMT_RGB444LE || - dstFormat == AV_PIX_FMT_BGR444BE || dstFormat == AV_PIX_FMT_BGR444LE ? - "dithered " : "", -#else - "", -#endif - av_get_pix_fmt_name(dstFormat)); - - if (INLINE_MMXEXT(cpu_flags)) - cpucaps = "MMXEXT"; - else if (INLINE_AMD3DNOW(cpu_flags)) - cpucaps = "3DNOW"; - else if (INLINE_MMX(cpu_flags)) - cpucaps = "MMX"; - else if (PPC_ALTIVEC(cpu_flags)) - cpucaps = "AltiVec"; - else - cpucaps = "C"; - - av_log(c, AV_LOG_INFO, "using %s\n", cpucaps); - - av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH); - av_log(c, AV_LOG_DEBUG, - "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", - c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc); - av_log(c, AV_LOG_DEBUG, - "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", - c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, - c->chrXInc, c->chrYInc); - } - - c->swscale = ff_getSwsFunc(c); - return 0; -fail: // FIXME replace things by appropriate error codes - return -1; -} - -#if FF_API_SWS_GETCONTEXT -SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat, - int dstW, int dstH, enum AVPixelFormat dstFormat, - int flags, SwsFilter *srcFilter, - SwsFilter *dstFilter, const double *param) -{ - SwsContext *c; - - if (!(c = sws_alloc_context())) - return NULL; - - c->flags = flags; - c->srcW = srcW; - c->srcH = srcH; - c->dstW = dstW; - c->dstH = dstH; - c->srcFormat = srcFormat; - c->dstFormat = dstFormat; - - if (param) { - c->param[0] = param[0]; - c->param[1] = param[1]; - } - - if (sws_init_context(c, srcFilter, dstFilter) < 0) { - sws_freeContext(c); - return NULL; - } - - return c; -} -#endif - -SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur, - float lumaSharpen, float chromaSharpen, - float chromaHShift, float chromaVShift, - int verbose) -{ - SwsFilter *filter = av_malloc(sizeof(SwsFilter)); - if (!filter) - return NULL; - - if (lumaGBlur != 0.0) { - filter->lumH = sws_getGaussianVec(lumaGBlur, 3.0); - filter->lumV = sws_getGaussianVec(lumaGBlur, 3.0); - } else { - filter->lumH = sws_getIdentityVec(); - filter->lumV = sws_getIdentityVec(); - } - - if (chromaGBlur != 0.0) { - filter->chrH = sws_getGaussianVec(chromaGBlur, 3.0); - filter->chrV = sws_getGaussianVec(chromaGBlur, 3.0); - } else { - filter->chrH = sws_getIdentityVec(); - filter->chrV = sws_getIdentityVec(); - } - - if (chromaSharpen != 0.0) { - SwsVector *id = sws_getIdentityVec(); - sws_scaleVec(filter->chrH, -chromaSharpen); - sws_scaleVec(filter->chrV, -chromaSharpen); - sws_addVec(filter->chrH, id); - sws_addVec(filter->chrV, id); - sws_freeVec(id); - } - - if (lumaSharpen != 0.0) { - SwsVector *id = sws_getIdentityVec(); - sws_scaleVec(filter->lumH, -lumaSharpen); - sws_scaleVec(filter->lumV, -lumaSharpen); - sws_addVec(filter->lumH, id); - sws_addVec(filter->lumV, id); - sws_freeVec(id); - } - - if (chromaHShift != 0.0) - sws_shiftVec(filter->chrH, (int)(chromaHShift + 0.5)); - - if (chromaVShift != 0.0) - sws_shiftVec(filter->chrV, (int)(chromaVShift + 0.5)); - - sws_normalizeVec(filter->chrH, 1.0); - sws_normalizeVec(filter->chrV, 1.0); - sws_normalizeVec(filter->lumH, 1.0); - sws_normalizeVec(filter->lumV, 1.0); - - if (verbose) - sws_printVec2(filter->chrH, NULL, AV_LOG_DEBUG); - if (verbose) - sws_printVec2(filter->lumH, NULL, AV_LOG_DEBUG); - - return filter; -} - -SwsVector *sws_allocVec(int length) -{ - SwsVector *vec; - - if(length <= 0 || length > INT_MAX/ sizeof(double)) - return NULL; - - vec = av_malloc(sizeof(SwsVector)); - if (!vec) - return NULL; - vec->length = length; - vec->coeff = av_malloc(sizeof(double) * length); - if (!vec->coeff) - av_freep(&vec); - return vec; -} - -SwsVector *sws_getGaussianVec(double variance, double quality) -{ - const int length = (int)(variance * quality + 0.5) | 1; - int i; - double middle = (length - 1) * 0.5; - SwsVector *vec; - - if(variance < 0 || quality < 0) - return NULL; - - vec = sws_allocVec(length); - - if (!vec) - return NULL; - - for (i = 0; i < length; i++) { - double dist = i - middle; - vec->coeff[i] = exp(-dist * dist / (2 * variance * variance)) / - sqrt(2 * variance * M_PI); - } - - sws_normalizeVec(vec, 1.0); - - return vec; -} - -SwsVector *sws_getConstVec(double c, int length) -{ - int i; - SwsVector *vec = sws_allocVec(length); - - if (!vec) - return NULL; - - for (i = 0; i < length; i++) - vec->coeff[i] = c; - - return vec; -} - -SwsVector *sws_getIdentityVec(void) -{ - return sws_getConstVec(1.0, 1); -} - -static double sws_dcVec(SwsVector *a) -{ - int i; - double sum = 0; - - for (i = 0; i < a->length; i++) - sum += a->coeff[i]; - - return sum; -} - -void sws_scaleVec(SwsVector *a, double scalar) -{ - int i; - - for (i = 0; i < a->length; i++) - a->coeff[i] *= scalar; -} - -void sws_normalizeVec(SwsVector *a, double height) -{ - sws_scaleVec(a, height / sws_dcVec(a)); -} - -static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b) -{ - int length = a->length + b->length - 1; - int i, j; - SwsVector *vec = sws_getConstVec(0.0, length); - - if (!vec) - return NULL; - - for (i = 0; i < a->length; i++) { - for (j = 0; j < b->length; j++) { - vec->coeff[i + j] += a->coeff[i] * b->coeff[j]; - } - } - - return vec; -} - -static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b) -{ - int length = FFMAX(a->length, b->length); - int i; - SwsVector *vec = sws_getConstVec(0.0, length); - - if (!vec) - return NULL; - - for (i = 0; i < a->length; i++) - vec->coeff[i + (length - 1) / 2 - (a->length - 1) / 2] += a->coeff[i]; - for (i = 0; i < b->length; i++) - vec->coeff[i + (length - 1) / 2 - (b->length - 1) / 2] += b->coeff[i]; - - return vec; -} - -static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b) -{ - int length = FFMAX(a->length, b->length); - int i; - SwsVector *vec = sws_getConstVec(0.0, length); - - if (!vec) - return NULL; - - for (i = 0; i < a->length; i++) - vec->coeff[i + (length - 1) / 2 - (a->length - 1) / 2] += a->coeff[i]; - for (i = 0; i < b->length; i++) - vec->coeff[i + (length - 1) / 2 - (b->length - 1) / 2] -= b->coeff[i]; - - return vec; -} - -/* shift left / or right if "shift" is negative */ -static SwsVector *sws_getShiftedVec(SwsVector *a, int shift) -{ - int length = a->length + FFABS(shift) * 2; - int i; - SwsVector *vec = sws_getConstVec(0.0, length); - - if (!vec) - return NULL; - - for (i = 0; i < a->length; i++) { - vec->coeff[i + (length - 1) / 2 - - (a->length - 1) / 2 - shift] = a->coeff[i]; - } - - return vec; -} - -void sws_shiftVec(SwsVector *a, int shift) -{ - SwsVector *shifted = sws_getShiftedVec(a, shift); - av_free(a->coeff); - a->coeff = shifted->coeff; - a->length = shifted->length; - av_free(shifted); -} - -void sws_addVec(SwsVector *a, SwsVector *b) -{ - SwsVector *sum = sws_sumVec(a, b); - av_free(a->coeff); - a->coeff = sum->coeff; - a->length = sum->length; - av_free(sum); -} - -void sws_subVec(SwsVector *a, SwsVector *b) -{ - SwsVector *diff = sws_diffVec(a, b); - av_free(a->coeff); - a->coeff = diff->coeff; - a->length = diff->length; - av_free(diff); -} - -void sws_convVec(SwsVector *a, SwsVector *b) -{ - SwsVector *conv = sws_getConvVec(a, b); - av_free(a->coeff); - a->coeff = conv->coeff; - a->length = conv->length; - av_free(conv); -} - -SwsVector *sws_cloneVec(SwsVector *a) -{ - SwsVector *vec = sws_allocVec(a->length); - - if (!vec) - return NULL; - - memcpy(vec->coeff, a->coeff, a->length * sizeof(*a->coeff)); - - return vec; -} - -void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level) -{ - int i; - double max = 0; - double min = 0; - double range; - - for (i = 0; i < a->length; i++) - if (a->coeff[i] > max) - max = a->coeff[i]; - - for (i = 0; i < a->length; i++) - if (a->coeff[i] < min) - min = a->coeff[i]; - - range = max - min; - - for (i = 0; i < a->length; i++) { - int x = (int)((a->coeff[i] - min) * 60.0 / range + 0.5); - av_log(log_ctx, log_level, "%1.3f ", a->coeff[i]); - for (; x > 0; x--) - av_log(log_ctx, log_level, " "); - av_log(log_ctx, log_level, "|\n"); - } -} - -void sws_freeVec(SwsVector *a) -{ - if (!a) - return; - av_freep(&a->coeff); - a->length = 0; - av_free(a); -} - -void sws_freeFilter(SwsFilter *filter) -{ - if (!filter) - return; - - sws_freeVec(filter->lumH); - sws_freeVec(filter->lumV); - sws_freeVec(filter->chrH); - sws_freeVec(filter->chrV); - av_free(filter); -} - -void sws_freeContext(SwsContext *c) -{ - int i; - if (!c) - return; - - if (c->lumPixBuf) { - for (i = 0; i < c->vLumBufSize; i++) - av_freep(&c->lumPixBuf[i]); - av_freep(&c->lumPixBuf); - } - - if (c->chrUPixBuf) { - for (i = 0; i < c->vChrBufSize; i++) - av_freep(&c->chrUPixBuf[i]); - av_freep(&c->chrUPixBuf); - av_freep(&c->chrVPixBuf); - } - - if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { - for (i = 0; i < c->vLumBufSize; i++) - av_freep(&c->alpPixBuf[i]); - av_freep(&c->alpPixBuf); - } - - for (i = 0; i < 4; i++) - av_freep(&c->dither_error[i]); - - av_freep(&c->vLumFilter); - av_freep(&c->vChrFilter); - av_freep(&c->hLumFilter); - av_freep(&c->hChrFilter); -#if HAVE_ALTIVEC - av_freep(&c->vYCoeffsBank); - av_freep(&c->vCCoeffsBank); -#endif - - av_freep(&c->vLumFilterPos); - av_freep(&c->vChrFilterPos); - av_freep(&c->hLumFilterPos); - av_freep(&c->hChrFilterPos); - -#if HAVE_MMX_INLINE -#if USE_MMAP - if (c->lumMmxextFilterCode) - munmap(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize); - if (c->chrMmxextFilterCode) - munmap(c->chrMmxextFilterCode, c->chrMmxextFilterCodeSize); -#elif HAVE_VIRTUALALLOC - if (c->lumMmxextFilterCode) - VirtualFree(c->lumMmxextFilterCode, 0, MEM_RELEASE); - if (c->chrMmxextFilterCode) - VirtualFree(c->chrMmxextFilterCode, 0, MEM_RELEASE); -#else - av_free(c->lumMmxextFilterCode); - av_free(c->chrMmxextFilterCode); -#endif - c->lumMmxextFilterCode = NULL; - c->chrMmxextFilterCode = NULL; -#endif /* HAVE_MMX_INLINE */ - - av_freep(&c->yuvTable); - av_freep(&c->formatConvBuffer); - - av_free(c); -} - -struct SwsContext *sws_getCachedContext(struct SwsContext *context, int srcW, - int srcH, enum AVPixelFormat srcFormat, - int dstW, int dstH, - enum AVPixelFormat dstFormat, int flags, - SwsFilter *srcFilter, - SwsFilter *dstFilter, - const double *param) -{ - static const double default_param[2] = { SWS_PARAM_DEFAULT, - SWS_PARAM_DEFAULT }; - - if (!param) - param = default_param; - - if (context && - (context->srcW != srcW || - context->srcH != srcH || - context->srcFormat != srcFormat || - context->dstW != dstW || - context->dstH != dstH || - context->dstFormat != dstFormat || - context->flags != flags || - context->param[0] != param[0] || - context->param[1] != param[1])) { - sws_freeContext(context); - context = NULL; - } - - if (!context) { - if (!(context = sws_alloc_context())) - return NULL; - context->srcW = srcW; - context->srcH = srcH; - context->srcFormat = srcFormat; - context->dstW = dstW; - context->dstH = dstH; - context->dstFormat = dstFormat; - context->flags = flags; - context->param[0] = param[0]; - context->param[1] = param[1]; - if (sws_init_context(context, srcFilter, dstFilter) < 0) { - sws_freeContext(context); - return NULL; - } - } - return context; -} diff --git a/ffmpeg/libswscale/version.h b/ffmpeg/libswscale/version.h deleted file mode 100644 index 99f3295..0000000 --- a/ffmpeg/libswscale/version.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef SWSCALE_VERSION_H -#define SWSCALE_VERSION_H - -/** - * @file - * swscale version macros - */ - -#include "libavutil/version.h" - -#define LIBSWSCALE_VERSION_MAJOR 2 -#define LIBSWSCALE_VERSION_MINOR 5 -#define LIBSWSCALE_VERSION_MICRO 101 - -#define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ - LIBSWSCALE_VERSION_MINOR, \ - LIBSWSCALE_VERSION_MICRO) -#define LIBSWSCALE_VERSION AV_VERSION(LIBSWSCALE_VERSION_MAJOR, \ - LIBSWSCALE_VERSION_MINOR, \ - LIBSWSCALE_VERSION_MICRO) -#define LIBSWSCALE_BUILD LIBSWSCALE_VERSION_INT - -#define LIBSWSCALE_IDENT "SwS" AV_STRINGIFY(LIBSWSCALE_VERSION) - -/** - * FF_API_* defines may be placed below to indicate public API that will be - * dropped at a future version bump. The defines themselves are not part of - * the public API and may change, break or disappear at any time. - */ - -#ifndef FF_API_SWS_GETCONTEXT -#define FF_API_SWS_GETCONTEXT (LIBSWSCALE_VERSION_MAJOR < 3) -#endif -#ifndef FF_API_SWS_CPU_CAPS -#define FF_API_SWS_CPU_CAPS (LIBSWSCALE_VERSION_MAJOR < 3) -#endif -#ifndef FF_API_SWS_FORMAT_NAME -#define FF_API_SWS_FORMAT_NAME (LIBSWSCALE_VERSION_MAJOR < 3) -#endif - -#endif /* SWSCALE_VERSION_H */ diff --git a/ffmpeg/libswscale/x86/Makefile b/ffmpeg/libswscale/x86/Makefile deleted file mode 100644 index e767a5c..0000000 --- a/ffmpeg/libswscale/x86/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -$(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS) - -OBJS += x86/rgb2rgb.o \ - x86/swscale.o \ - x86/yuv2rgb.o \ - -OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o - -YASM-OBJS += x86/input.o \ - x86/output.o \ - x86/scale.o \ diff --git a/ffmpeg/libswscale/x86/input.asm b/ffmpeg/libswscale/x86/input.asm deleted file mode 100644 index 0c4f30e..0000000 --- a/ffmpeg/libswscale/x86/input.asm +++ /dev/null @@ -1,696 +0,0 @@ -;****************************************************************************** -;* x86-optimized input routines; does shuffling of packed -;* YUV formats into individual planes, and converts RGB -;* into YUV planes also. -;* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com> -;* -;* This file is part of FFmpeg. -;* -;* FFmpeg is free software; you can redistribute it and/or -;* modify it under the terms of the GNU Lesser General Public -;* License as published by the Free Software Foundation; either -;* version 2.1 of the License, or (at your option) any later version. -;* -;* FFmpeg is distributed in the hope that it will be useful, -;* but WITHOUT ANY WARRANTY; without even the implied warranty of -;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -;* Lesser General Public License for more details. -;* -;* You should have received a copy of the GNU Lesser General Public -;* License along with FFmpeg; if not, write to the Free Software -;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -;****************************************************************************** - -%include "libavutil/x86/x86util.asm" - -SECTION_RODATA - -%define RY 0x20DE -%define GY 0x4087 -%define BY 0x0C88 -%define RU 0xECFF -%define GU 0xDAC8 -%define BU 0x3838 -%define RV 0x3838 -%define GV 0xD0E3 -%define BV 0xF6E4 - -rgb_Yrnd: times 4 dd 0x80100 ; 16.5 << 15 -rgb_UVrnd: times 4 dd 0x400100 ; 128.5 << 15 -%define bgr_Ycoeff_12x4 16*4 + 16* 0 + tableq -%define bgr_Ycoeff_3x56 16*4 + 16* 1 + tableq -%define rgb_Ycoeff_12x4 16*4 + 16* 2 + tableq -%define rgb_Ycoeff_3x56 16*4 + 16* 3 + tableq -%define bgr_Ucoeff_12x4 16*4 + 16* 4 + tableq -%define bgr_Ucoeff_3x56 16*4 + 16* 5 + tableq -%define rgb_Ucoeff_12x4 16*4 + 16* 6 + tableq -%define rgb_Ucoeff_3x56 16*4 + 16* 7 + tableq -%define bgr_Vcoeff_12x4 16*4 + 16* 8 + tableq -%define bgr_Vcoeff_3x56 16*4 + 16* 9 + tableq -%define rgb_Vcoeff_12x4 16*4 + 16*10 + tableq -%define rgb_Vcoeff_3x56 16*4 + 16*11 + tableq - -%define rgba_Ycoeff_rb 16*4 + 16*12 + tableq -%define rgba_Ycoeff_br 16*4 + 16*13 + tableq -%define rgba_Ycoeff_ga 16*4 + 16*14 + tableq -%define rgba_Ycoeff_ag 16*4 + 16*15 + tableq -%define rgba_Ucoeff_rb 16*4 + 16*16 + tableq -%define rgba_Ucoeff_br 16*4 + 16*17 + tableq -%define rgba_Ucoeff_ga 16*4 + 16*18 + tableq -%define rgba_Ucoeff_ag 16*4 + 16*19 + tableq -%define rgba_Vcoeff_rb 16*4 + 16*20 + tableq -%define rgba_Vcoeff_br 16*4 + 16*21 + tableq -%define rgba_Vcoeff_ga 16*4 + 16*22 + tableq -%define rgba_Vcoeff_ag 16*4 + 16*23 + tableq - -; bgr_Ycoeff_12x4: times 2 dw BY, GY, 0, BY -; bgr_Ycoeff_3x56: times 2 dw RY, 0, GY, RY -; rgb_Ycoeff_12x4: times 2 dw RY, GY, 0, RY -; rgb_Ycoeff_3x56: times 2 dw BY, 0, GY, BY -; bgr_Ucoeff_12x4: times 2 dw BU, GU, 0, BU -; bgr_Ucoeff_3x56: times 2 dw RU, 0, GU, RU -; rgb_Ucoeff_12x4: times 2 dw RU, GU, 0, RU -; rgb_Ucoeff_3x56: times 2 dw BU, 0, GU, BU -; bgr_Vcoeff_12x4: times 2 dw BV, GV, 0, BV -; bgr_Vcoeff_3x56: times 2 dw RV, 0, GV, RV -; rgb_Vcoeff_12x4: times 2 dw RV, GV, 0, RV -; rgb_Vcoeff_3x56: times 2 dw BV, 0, GV, BV - -; rgba_Ycoeff_rb: times 4 dw RY, BY -; rgba_Ycoeff_br: times 4 dw BY, RY -; rgba_Ycoeff_ga: times 4 dw GY, 0 -; rgba_Ycoeff_ag: times 4 dw 0, GY -; rgba_Ucoeff_rb: times 4 dw RU, BU -; rgba_Ucoeff_br: times 4 dw BU, RU -; rgba_Ucoeff_ga: times 4 dw GU, 0 -; rgba_Ucoeff_ag: times 4 dw 0, GU -; rgba_Vcoeff_rb: times 4 dw RV, BV -; rgba_Vcoeff_br: times 4 dw BV, RV -; rgba_Vcoeff_ga: times 4 dw GV, 0 -; rgba_Vcoeff_ag: times 4 dw 0, GV - -shuf_rgb_12x4: db 0, 0x80, 1, 0x80, 2, 0x80, 3, 0x80, \ - 6, 0x80, 7, 0x80, 8, 0x80, 9, 0x80 -shuf_rgb_3x56: db 2, 0x80, 3, 0x80, 4, 0x80, 5, 0x80, \ - 8, 0x80, 9, 0x80, 10, 0x80, 11, 0x80 - -SECTION .text - -;----------------------------------------------------------------------------- -; RGB to Y/UV. -; -; void <fmt>ToY_<opt>(uint8_t *dst, const uint8_t *src, int w); -; and -; void <fmt>toUV_<opt>(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, -; const uint8_t *unused, int w); -;----------------------------------------------------------------------------- - -; %1 = nr. of XMM registers -; %2 = rgb or bgr -%macro RGB24_TO_Y_FN 2-3 -cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table -%if mmsize == 8 - mova m5, [%2_Ycoeff_12x4] - mova m6, [%2_Ycoeff_3x56] -%define coeff1 m5 -%define coeff2 m6 -%elif ARCH_X86_64 - mova m8, [%2_Ycoeff_12x4] - mova m9, [%2_Ycoeff_3x56] -%define coeff1 m8 -%define coeff2 m9 -%else ; x86-32 && mmsize == 16 -%define coeff1 [%2_Ycoeff_12x4] -%define coeff2 [%2_Ycoeff_3x56] -%endif ; x86-32/64 && mmsize == 8/16 -%if (ARCH_X86_64 || mmsize == 8) && %0 == 3 - jmp mangle(private_prefix %+ _ %+ %3 %+ 24ToY %+ SUFFIX).body -%else ; (ARCH_X86_64 && %0 == 3) || mmsize == 8 -.body: -%if cpuflag(ssse3) - mova m7, [shuf_rgb_12x4] -%define shuf_rgb1 m7 -%if ARCH_X86_64 - mova m10, [shuf_rgb_3x56] -%define shuf_rgb2 m10 -%else ; x86-32 -%define shuf_rgb2 [shuf_rgb_3x56] -%endif ; x86-32/64 -%endif ; cpuflag(ssse3) -%if ARCH_X86_64 - movsxd wq, wd -%endif - add wq, wq - add dstq, wq - neg wq -%if notcpuflag(ssse3) - pxor m7, m7 -%endif ; !cpuflag(ssse3) - mova m4, [rgb_Yrnd] -.loop: -%if cpuflag(ssse3) - movu m0, [srcq+0] ; (byte) { Bx, Gx, Rx }[0-3] - movu m2, [srcq+12] ; (byte) { Bx, Gx, Rx }[4-7] - pshufb m1, m0, shuf_rgb2 ; (word) { R0, B1, G1, R1, R2, B3, G3, R3 } - pshufb m0, shuf_rgb1 ; (word) { B0, G0, R0, B1, B2, G2, R2, B3 } - pshufb m3, m2, shuf_rgb2 ; (word) { R4, B5, G5, R5, R6, B7, G7, R7 } - pshufb m2, shuf_rgb1 ; (word) { B4, G4, R4, B5, B6, G6, R6, B7 } -%else ; !cpuflag(ssse3) - movd m0, [srcq+0] ; (byte) { B0, G0, R0, B1 } - movd m1, [srcq+2] ; (byte) { R0, B1, G1, R1 } - movd m2, [srcq+6] ; (byte) { B2, G2, R2, B3 } - movd m3, [srcq+8] ; (byte) { R2, B3, G3, R3 } -%if mmsize == 16 ; i.e. sse2 - punpckldq m0, m2 ; (byte) { B0, G0, R0, B1, B2, G2, R2, B3 } - punpckldq m1, m3 ; (byte) { R0, B1, G1, R1, R2, B3, G3, R3 } - movd m2, [srcq+12] ; (byte) { B4, G4, R4, B5 } - movd m3, [srcq+14] ; (byte) { R4, B5, G5, R5 } - movd m5, [srcq+18] ; (byte) { B6, G6, R6, B7 } - movd m6, [srcq+20] ; (byte) { R6, B7, G7, R7 } - punpckldq m2, m5 ; (byte) { B4, G4, R4, B5, B6, G6, R6, B7 } - punpckldq m3, m6 ; (byte) { R4, B5, G5, R5, R6, B7, G7, R7 } -%endif ; mmsize == 16 - punpcklbw m0, m7 ; (word) { B0, G0, R0, B1, B2, G2, R2, B3 } - punpcklbw m1, m7 ; (word) { R0, B1, G1, R1, R2, B3, G3, R3 } - punpcklbw m2, m7 ; (word) { B4, G4, R4, B5, B6, G6, R6, B7 } - punpcklbw m3, m7 ; (word) { R4, B5, G5, R5, R6, B7, G7, R7 } -%endif ; cpuflag(ssse3) - add srcq, 3 * mmsize / 2 - pmaddwd m0, coeff1 ; (dword) { B0*BY + G0*GY, B1*BY, B2*BY + G2*GY, B3*BY } - pmaddwd m1, coeff2 ; (dword) { R0*RY, G1+GY + R1*RY, R2*RY, G3+GY + R3*RY } - pmaddwd m2, coeff1 ; (dword) { B4*BY + G4*GY, B5*BY, B6*BY + G6*GY, B7*BY } - pmaddwd m3, coeff2 ; (dword) { R4*RY, G5+GY + R5*RY, R6*RY, G7+GY + R7*RY } - paddd m0, m1 ; (dword) { Bx*BY + Gx*GY + Rx*RY }[0-3] - paddd m2, m3 ; (dword) { Bx*BY + Gx*GY + Rx*RY }[4-7] - paddd m0, m4 ; += rgb_Yrnd, i.e. (dword) { Y[0-3] } - paddd m2, m4 ; += rgb_Yrnd, i.e. (dword) { Y[4-7] } - psrad m0, 9 - psrad m2, 9 - packssdw m0, m2 ; (word) { Y[0-7] } - mova [dstq+wq], m0 - add wq, mmsize - jl .loop - REP_RET -%endif ; (ARCH_X86_64 && %0 == 3) || mmsize == 8 -%endmacro - -; %1 = nr. of XMM registers -; %2 = rgb or bgr -%macro RGB24_TO_UV_FN 2-3 -cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table -%if ARCH_X86_64 - mova m8, [%2_Ucoeff_12x4] - mova m9, [%2_Ucoeff_3x56] - mova m10, [%2_Vcoeff_12x4] - mova m11, [%2_Vcoeff_3x56] -%define coeffU1 m8 -%define coeffU2 m9 -%define coeffV1 m10 -%define coeffV2 m11 -%else ; x86-32 -%define coeffU1 [%2_Ucoeff_12x4] -%define coeffU2 [%2_Ucoeff_3x56] -%define coeffV1 [%2_Vcoeff_12x4] -%define coeffV2 [%2_Vcoeff_3x56] -%endif ; x86-32/64 -%if ARCH_X86_64 && %0 == 3 - jmp mangle(private_prefix %+ _ %+ %3 %+ 24ToUV %+ SUFFIX).body -%else ; ARCH_X86_64 && %0 == 3 -.body: -%if cpuflag(ssse3) - mova m7, [shuf_rgb_12x4] -%define shuf_rgb1 m7 -%if ARCH_X86_64 - mova m12, [shuf_rgb_3x56] -%define shuf_rgb2 m12 -%else ; x86-32 -%define shuf_rgb2 [shuf_rgb_3x56] -%endif ; x86-32/64 -%endif ; cpuflag(ssse3) -%if ARCH_X86_64 - movsxd wq, dword r5m -%else ; x86-32 - mov wq, r5m -%endif - add wq, wq - add dstUq, wq - add dstVq, wq - neg wq - mova m6, [rgb_UVrnd] -%if notcpuflag(ssse3) - pxor m7, m7 -%endif -.loop: -%if cpuflag(ssse3) - movu m0, [srcq+0] ; (byte) { Bx, Gx, Rx }[0-3] - movu m4, [srcq+12] ; (byte) { Bx, Gx, Rx }[4-7] - pshufb m1, m0, shuf_rgb2 ; (word) { R0, B1, G1, R1, R2, B3, G3, R3 } - pshufb m0, shuf_rgb1 ; (word) { B0, G0, R0, B1, B2, G2, R2, B3 } -%else ; !cpuflag(ssse3) - movd m0, [srcq+0] ; (byte) { B0, G0, R0, B1 } - movd m1, [srcq+2] ; (byte) { R0, B1, G1, R1 } - movd m4, [srcq+6] ; (byte) { B2, G2, R2, B3 } - movd m5, [srcq+8] ; (byte) { R2, B3, G3, R3 } -%if mmsize == 16 - punpckldq m0, m4 ; (byte) { B0, G0, R0, B1, B2, G2, R2, B3 } - punpckldq m1, m5 ; (byte) { R0, B1, G1, R1, R2, B3, G3, R3 } - movd m4, [srcq+12] ; (byte) { B4, G4, R4, B5 } - movd m5, [srcq+14] ; (byte) { R4, B5, G5, R5 } -%endif ; mmsize == 16 - punpcklbw m0, m7 ; (word) { B0, G0, R0, B1, B2, G2, R2, B3 } - punpcklbw m1, m7 ; (word) { R0, B1, G1, R1, R2, B3, G3, R3 } -%endif ; cpuflag(ssse3) - pmaddwd m2, m0, coeffV1 ; (dword) { B0*BV + G0*GV, B1*BV, B2*BV + G2*GV, B3*BV } - pmaddwd m3, m1, coeffV2 ; (dword) { R0*BV, G1*GV + R1*BV, R2*BV, G3*GV + R3*BV } - pmaddwd m0, coeffU1 ; (dword) { B0*BU + G0*GU, B1*BU, B2*BU + G2*GU, B3*BU } - pmaddwd m1, coeffU2 ; (dword) { R0*BU, G1*GU + R1*BU, R2*BU, G3*GU + R3*BU } - paddd m0, m1 ; (dword) { Bx*BU + Gx*GU + Rx*RU }[0-3] - paddd m2, m3 ; (dword) { Bx*BV + Gx*GV + Rx*RV }[0-3] -%if cpuflag(ssse3) - pshufb m5, m4, shuf_rgb2 ; (word) { R4, B5, G5, R5, R6, B7, G7, R7 } - pshufb m4, shuf_rgb1 ; (word) { B4, G4, R4, B5, B6, G6, R6, B7 } -%else ; !cpuflag(ssse3) -%if mmsize == 16 - movd m1, [srcq+18] ; (byte) { B6, G6, R6, B7 } - movd m3, [srcq+20] ; (byte) { R6, B7, G7, R7 } - punpckldq m4, m1 ; (byte) { B4, G4, R4, B5, B6, G6, R6, B7 } - punpckldq m5, m3 ; (byte) { R4, B5, G5, R5, R6, B7, G7, R7 } -%endif ; mmsize == 16 && !cpuflag(ssse3) - punpcklbw m4, m7 ; (word) { B4, G4, R4, B5, B6, G6, R6, B7 } - punpcklbw m5, m7 ; (word) { R4, B5, G5, R5, R6, B7, G7, R7 } -%endif ; cpuflag(ssse3) - add srcq, 3 * mmsize / 2 - pmaddwd m1, m4, coeffU1 ; (dword) { B4*BU + G4*GU, B5*BU, B6*BU + G6*GU, B7*BU } - pmaddwd m3, m5, coeffU2 ; (dword) { R4*BU, G5*GU + R5*BU, R6*BU, G7*GU + R7*BU } - pmaddwd m4, coeffV1 ; (dword) { B4*BV + G4*GV, B5*BV, B6*BV + G6*GV, B7*BV } - pmaddwd m5, coeffV2 ; (dword) { R4*BV, G5*GV + R5*BV, R6*BV, G7*GV + R7*BV } - paddd m1, m3 ; (dword) { Bx*BU + Gx*GU + Rx*RU }[4-7] - paddd m4, m5 ; (dword) { Bx*BV + Gx*GV + Rx*RV }[4-7] - paddd m0, m6 ; += rgb_UVrnd, i.e. (dword) { U[0-3] } - paddd m2, m6 ; += rgb_UVrnd, i.e. (dword) { V[0-3] } - paddd m1, m6 ; += rgb_UVrnd, i.e. (dword) { U[4-7] } - paddd m4, m6 ; += rgb_UVrnd, i.e. (dword) { V[4-7] } - psrad m0, 9 - psrad m2, 9 - psrad m1, 9 - psrad m4, 9 - packssdw m0, m1 ; (word) { U[0-7] } - packssdw m2, m4 ; (word) { V[0-7] } -%if mmsize == 8 - mova [dstUq+wq], m0 - mova [dstVq+wq], m2 -%else ; mmsize == 16 - mova [dstUq+wq], m0 - mova [dstVq+wq], m2 -%endif ; mmsize == 8/16 - add wq, mmsize - jl .loop - REP_RET -%endif ; ARCH_X86_64 && %0 == 3 -%endmacro - -; %1 = nr. of XMM registers for rgb-to-Y func -; %2 = nr. of XMM registers for rgb-to-UV func -%macro RGB24_FUNCS 2 -RGB24_TO_Y_FN %1, rgb -RGB24_TO_Y_FN %1, bgr, rgb -RGB24_TO_UV_FN %2, rgb -RGB24_TO_UV_FN %2, bgr, rgb -%endmacro - -%if ARCH_X86_32 -INIT_MMX mmx -RGB24_FUNCS 0, 0 -%endif - -INIT_XMM sse2 -RGB24_FUNCS 10, 12 - -INIT_XMM ssse3 -RGB24_FUNCS 11, 13 - -%if HAVE_AVX_EXTERNAL -INIT_XMM avx -RGB24_FUNCS 11, 13 -%endif - -; %1 = nr. of XMM registers -; %2-5 = rgba, bgra, argb or abgr (in individual characters) -%macro RGB32_TO_Y_FN 5-6 -cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, table - mova m5, [rgba_Ycoeff_%2%4] - mova m6, [rgba_Ycoeff_%3%5] -%if %0 == 6 - jmp mangle(private_prefix %+ _ %+ %6 %+ ToY %+ SUFFIX).body -%else ; %0 == 6 -.body: -%if ARCH_X86_64 - movsxd wq, wd -%endif - lea srcq, [srcq+wq*4] - add wq, wq - add dstq, wq - neg wq - mova m4, [rgb_Yrnd] - pcmpeqb m7, m7 - psrlw m7, 8 ; (word) { 0x00ff } x4 -.loop: - ; FIXME check alignment and use mova - movu m0, [srcq+wq*2+0] ; (byte) { Bx, Gx, Rx, xx }[0-3] - movu m2, [srcq+wq*2+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7] - DEINTB 1, 0, 3, 2, 7 ; (word) { Gx, xx (m0/m2) or Bx, Rx (m1/m3) }[0-3]/[4-7] - pmaddwd m1, m5 ; (dword) { Bx*BY + Rx*RY }[0-3] - pmaddwd m0, m6 ; (dword) { Gx*GY }[0-3] - pmaddwd m3, m5 ; (dword) { Bx*BY + Rx*RY }[4-7] - pmaddwd m2, m6 ; (dword) { Gx*GY }[4-7] - paddd m0, m4 ; += rgb_Yrnd - paddd m2, m4 ; += rgb_Yrnd - paddd m0, m1 ; (dword) { Y[0-3] } - paddd m2, m3 ; (dword) { Y[4-7] } - psrad m0, 9 - psrad m2, 9 - packssdw m0, m2 ; (word) { Y[0-7] } - mova [dstq+wq], m0 - add wq, mmsize - jl .loop - REP_RET -%endif ; %0 == 3 -%endmacro - -; %1 = nr. of XMM registers -; %2-5 = rgba, bgra, argb or abgr (in individual characters) -%macro RGB32_TO_UV_FN 5-6 -cglobal %2%3%4%5 %+ ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table -%if ARCH_X86_64 - mova m8, [rgba_Ucoeff_%2%4] - mova m9, [rgba_Ucoeff_%3%5] - mova m10, [rgba_Vcoeff_%2%4] - mova m11, [rgba_Vcoeff_%3%5] -%define coeffU1 m8 -%define coeffU2 m9 -%define coeffV1 m10 -%define coeffV2 m11 -%else ; x86-32 -%define coeffU1 [rgba_Ucoeff_%2%4] -%define coeffU2 [rgba_Ucoeff_%3%5] -%define coeffV1 [rgba_Vcoeff_%2%4] -%define coeffV2 [rgba_Vcoeff_%3%5] -%endif ; x86-64/32 -%if ARCH_X86_64 && %0 == 6 - jmp mangle(private_prefix %+ _ %+ %6 %+ ToUV %+ SUFFIX).body -%else ; ARCH_X86_64 && %0 == 6 -.body: -%if ARCH_X86_64 - movsxd wq, dword r5m -%else ; x86-32 - mov wq, r5m -%endif - add wq, wq - add dstUq, wq - add dstVq, wq - lea srcq, [srcq+wq*2] - neg wq - pcmpeqb m7, m7 - psrlw m7, 8 ; (word) { 0x00ff } x4 - mova m6, [rgb_UVrnd] -.loop: - ; FIXME check alignment and use mova - movu m0, [srcq+wq*2+0] ; (byte) { Bx, Gx, Rx, xx }[0-3] - movu m4, [srcq+wq*2+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7] - DEINTB 1, 0, 5, 4, 7 ; (word) { Gx, xx (m0/m4) or Bx, Rx (m1/m5) }[0-3]/[4-7] - pmaddwd m3, m1, coeffV1 ; (dword) { Bx*BV + Rx*RV }[0-3] - pmaddwd m2, m0, coeffV2 ; (dword) { Gx*GV }[0-3] - pmaddwd m1, coeffU1 ; (dword) { Bx*BU + Rx*RU }[0-3] - pmaddwd m0, coeffU2 ; (dword) { Gx*GU }[0-3] - paddd m3, m6 ; += rgb_UVrnd - paddd m1, m6 ; += rgb_UVrnd - paddd m2, m3 ; (dword) { V[0-3] } - paddd m0, m1 ; (dword) { U[0-3] } - pmaddwd m3, m5, coeffV1 ; (dword) { Bx*BV + Rx*RV }[4-7] - pmaddwd m1, m4, coeffV2 ; (dword) { Gx*GV }[4-7] - pmaddwd m5, coeffU1 ; (dword) { Bx*BU + Rx*RU }[4-7] - pmaddwd m4, coeffU2 ; (dword) { Gx*GU }[4-7] - paddd m3, m6 ; += rgb_UVrnd - paddd m5, m6 ; += rgb_UVrnd - psrad m0, 9 - paddd m1, m3 ; (dword) { V[4-7] } - paddd m4, m5 ; (dword) { U[4-7] } - psrad m2, 9 - psrad m4, 9 - psrad m1, 9 - packssdw m0, m4 ; (word) { U[0-7] } - packssdw m2, m1 ; (word) { V[0-7] } -%if mmsize == 8 - mova [dstUq+wq], m0 - mova [dstVq+wq], m2 -%else ; mmsize == 16 - mova [dstUq+wq], m0 - mova [dstVq+wq], m2 -%endif ; mmsize == 8/16 - add wq, mmsize - jl .loop - REP_RET -%endif ; ARCH_X86_64 && %0 == 3 -%endmacro - -; %1 = nr. of XMM registers for rgb-to-Y func -; %2 = nr. of XMM registers for rgb-to-UV func -%macro RGB32_FUNCS 2 -RGB32_TO_Y_FN %1, r, g, b, a -RGB32_TO_Y_FN %1, b, g, r, a, rgba -RGB32_TO_Y_FN %1, a, r, g, b, rgba -RGB32_TO_Y_FN %1, a, b, g, r, rgba - -RGB32_TO_UV_FN %2, r, g, b, a -RGB32_TO_UV_FN %2, b, g, r, a, rgba -RGB32_TO_UV_FN %2, a, r, g, b, rgba -RGB32_TO_UV_FN %2, a, b, g, r, rgba -%endmacro - -%if ARCH_X86_32 -INIT_MMX mmx -RGB32_FUNCS 0, 0 -%endif - -INIT_XMM sse2 -RGB32_FUNCS 8, 12 - -%if HAVE_AVX_EXTERNAL -INIT_XMM avx -RGB32_FUNCS 8, 12 -%endif - -;----------------------------------------------------------------------------- -; YUYV/UYVY/NV12/NV21 packed pixel shuffling. -; -; void <fmt>ToY_<opt>(uint8_t *dst, const uint8_t *src, int w); -; and -; void <fmt>toUV_<opt>(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, -; const uint8_t *unused, int w); -;----------------------------------------------------------------------------- - -; %1 = a (aligned) or u (unaligned) -; %2 = yuyv or uyvy -%macro LOOP_YUYV_TO_Y 2 -.loop_%1: - mov%1 m0, [srcq+wq*2] ; (byte) { Y0, U0, Y1, V0, ... } - mov%1 m1, [srcq+wq*2+mmsize] ; (byte) { Y8, U4, Y9, V4, ... } -%ifidn %2, yuyv - pand m0, m2 ; (word) { Y0, Y1, ..., Y7 } - pand m1, m2 ; (word) { Y8, Y9, ..., Y15 } -%else ; uyvy - psrlw m0, 8 ; (word) { Y0, Y1, ..., Y7 } - psrlw m1, 8 ; (word) { Y8, Y9, ..., Y15 } -%endif ; yuyv/uyvy - packuswb m0, m1 ; (byte) { Y0, ..., Y15 } - mova [dstq+wq], m0 - add wq, mmsize - jl .loop_%1 - REP_RET -%endmacro - -; %1 = nr. of XMM registers -; %2 = yuyv or uyvy -; %3 = if specified, it means that unaligned and aligned code in loop -; will be the same (i.e. YUYV+AVX), and thus we don't need to -; split the loop in an aligned and unaligned case -%macro YUYV_TO_Y_FN 2-3 -cglobal %2ToY, 5, 5, %1, dst, unused0, unused1, src, w -%if ARCH_X86_64 - movsxd wq, wd -%endif - add dstq, wq -%if mmsize == 16 - test srcq, 15 -%endif - lea srcq, [srcq+wq*2] -%ifidn %2, yuyv - pcmpeqb m2, m2 ; (byte) { 0xff } x 16 - psrlw m2, 8 ; (word) { 0x00ff } x 8 -%endif ; yuyv -%if mmsize == 16 - jnz .loop_u_start - neg wq - LOOP_YUYV_TO_Y a, %2 -.loop_u_start: - neg wq - LOOP_YUYV_TO_Y u, %2 -%else ; mmsize == 8 - neg wq - LOOP_YUYV_TO_Y a, %2 -%endif ; mmsize == 8/16 -%endmacro - -; %1 = a (aligned) or u (unaligned) -; %2 = yuyv or uyvy -%macro LOOP_YUYV_TO_UV 2 -.loop_%1: -%ifidn %2, yuyv - mov%1 m0, [srcq+wq*4] ; (byte) { Y0, U0, Y1, V0, ... } - mov%1 m1, [srcq+wq*4+mmsize] ; (byte) { Y8, U4, Y9, V4, ... } - psrlw m0, 8 ; (word) { U0, V0, ..., U3, V3 } - psrlw m1, 8 ; (word) { U4, V4, ..., U7, V7 } -%else ; uyvy -%if cpuflag(avx) - vpand m0, m2, [srcq+wq*4] ; (word) { U0, V0, ..., U3, V3 } - vpand m1, m2, [srcq+wq*4+mmsize] ; (word) { U4, V4, ..., U7, V7 } -%else - mov%1 m0, [srcq+wq*4] ; (byte) { Y0, U0, Y1, V0, ... } - mov%1 m1, [srcq+wq*4+mmsize] ; (byte) { Y8, U4, Y9, V4, ... } - pand m0, m2 ; (word) { U0, V0, ..., U3, V3 } - pand m1, m2 ; (word) { U4, V4, ..., U7, V7 } -%endif -%endif ; yuyv/uyvy - packuswb m0, m1 ; (byte) { U0, V0, ..., U7, V7 } - pand m1, m0, m2 ; (word) { U0, U1, ..., U7 } - psrlw m0, 8 ; (word) { V0, V1, ..., V7 } -%if mmsize == 16 - packuswb m1, m0 ; (byte) { U0, ... U7, V1, ... V7 } - movh [dstUq+wq], m1 - movhps [dstVq+wq], m1 -%else ; mmsize == 8 - packuswb m1, m1 ; (byte) { U0, ... U3 } - packuswb m0, m0 ; (byte) { V0, ... V3 } - movh [dstUq+wq], m1 - movh [dstVq+wq], m0 -%endif ; mmsize == 8/16 - add wq, mmsize / 2 - jl .loop_%1 - REP_RET -%endmacro - -; %1 = nr. of XMM registers -; %2 = yuyv or uyvy -; %3 = if specified, it means that unaligned and aligned code in loop -; will be the same (i.e. UYVY+AVX), and thus we don't need to -; split the loop in an aligned and unaligned case -%macro YUYV_TO_UV_FN 2-3 -cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w -%if ARCH_X86_64 - movsxd wq, dword r5m -%else ; x86-32 - mov wq, r5m -%endif - add dstUq, wq - add dstVq, wq -%if mmsize == 16 && %0 == 2 - test srcq, 15 -%endif - lea srcq, [srcq+wq*4] - pcmpeqb m2, m2 ; (byte) { 0xff } x 16 - psrlw m2, 8 ; (word) { 0x00ff } x 8 - ; NOTE: if uyvy+avx, u/a are identical -%if mmsize == 16 && %0 == 2 - jnz .loop_u_start - neg wq - LOOP_YUYV_TO_UV a, %2 -.loop_u_start: - neg wq - LOOP_YUYV_TO_UV u, %2 -%else ; mmsize == 8 - neg wq - LOOP_YUYV_TO_UV a, %2 -%endif ; mmsize == 8/16 -%endmacro - -; %1 = a (aligned) or u (unaligned) -; %2 = nv12 or nv21 -%macro LOOP_NVXX_TO_UV 2 -.loop_%1: - mov%1 m0, [srcq+wq*2] ; (byte) { U0, V0, U1, V1, ... } - mov%1 m1, [srcq+wq*2+mmsize] ; (byte) { U8, V8, U9, V9, ... } - pand m2, m0, m5 ; (word) { U0, U1, ..., U7 } - pand m3, m1, m5 ; (word) { U8, U9, ..., U15 } - psrlw m0, 8 ; (word) { V0, V1, ..., V7 } - psrlw m1, 8 ; (word) { V8, V9, ..., V15 } - packuswb m2, m3 ; (byte) { U0, ..., U15 } - packuswb m0, m1 ; (byte) { V0, ..., V15 } -%ifidn %2, nv12 - mova [dstUq+wq], m2 - mova [dstVq+wq], m0 -%else ; nv21 - mova [dstVq+wq], m2 - mova [dstUq+wq], m0 -%endif ; nv12/21 - add wq, mmsize - jl .loop_%1 - REP_RET -%endmacro - -; %1 = nr. of XMM registers -; %2 = nv12 or nv21 -%macro NVXX_TO_UV_FN 2 -cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w -%if ARCH_X86_64 - movsxd wq, dword r5m -%else ; x86-32 - mov wq, r5m -%endif - add dstUq, wq - add dstVq, wq -%if mmsize == 16 - test srcq, 15 -%endif - lea srcq, [srcq+wq*2] - pcmpeqb m5, m5 ; (byte) { 0xff } x 16 - psrlw m5, 8 ; (word) { 0x00ff } x 8 -%if mmsize == 16 - jnz .loop_u_start - neg wq - LOOP_NVXX_TO_UV a, %2 -.loop_u_start: - neg wq - LOOP_NVXX_TO_UV u, %2 -%else ; mmsize == 8 - neg wq - LOOP_NVXX_TO_UV a, %2 -%endif ; mmsize == 8/16 -%endmacro - -%if ARCH_X86_32 -INIT_MMX mmx -YUYV_TO_Y_FN 0, yuyv -YUYV_TO_Y_FN 0, uyvy -YUYV_TO_UV_FN 0, yuyv -YUYV_TO_UV_FN 0, uyvy -NVXX_TO_UV_FN 0, nv12 -NVXX_TO_UV_FN 0, nv21 -%endif - -INIT_XMM sse2 -YUYV_TO_Y_FN 3, yuyv -YUYV_TO_Y_FN 2, uyvy -YUYV_TO_UV_FN 3, yuyv -YUYV_TO_UV_FN 3, uyvy -NVXX_TO_UV_FN 5, nv12 -NVXX_TO_UV_FN 5, nv21 - -%if HAVE_AVX_EXTERNAL -INIT_XMM avx -; in theory, we could write a yuy2-to-y using vpand (i.e. AVX), but -; that's not faster in practice -YUYV_TO_UV_FN 3, yuyv -YUYV_TO_UV_FN 3, uyvy, 1 -NVXX_TO_UV_FN 5, nv12 -NVXX_TO_UV_FN 5, nv21 -%endif diff --git a/ffmpeg/libswscale/x86/output.asm b/ffmpeg/libswscale/x86/output.asm deleted file mode 100644 index 9ea4af9..0000000 --- a/ffmpeg/libswscale/x86/output.asm +++ /dev/null @@ -1,413 +0,0 @@ -;****************************************************************************** -;* x86-optimized vertical line scaling functions -;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com> -;* Kieran Kunhya <kieran@kunhya.com> -;* -;* This file is part of FFmpeg. -;* -;* FFmpeg is free software; you can redistribute it and/or -;* modify it under the terms of the GNU Lesser General Public -;* License as published by the Free Software Foundation; either -;* version 2.1 of the License, or (at your option) any later version. -;* -;* FFmpeg is distributed in the hope that it will be useful, -;* but WITHOUT ANY WARRANTY; without even the implied warranty of -;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -;* Lesser General Public License for more details. -;* -;* You should have received a copy of the GNU Lesser General Public -;* License along with FFmpeg; if not, write to the Free Software -;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -;****************************************************************************** - -%include "libavutil/x86/x86util.asm" - -SECTION_RODATA - -minshort: times 8 dw 0x8000 -yuv2yuvX_16_start: times 4 dd 0x4000 - 0x40000000 -yuv2yuvX_10_start: times 4 dd 0x10000 -yuv2yuvX_9_start: times 4 dd 0x20000 -yuv2yuvX_10_upper: times 8 dw 0x3ff -yuv2yuvX_9_upper: times 8 dw 0x1ff -pd_4: times 4 dd 4 -pd_4min0x40000:times 4 dd 4 - (0x40000) -pw_16: times 8 dw 16 -pw_32: times 8 dw 32 -pw_512: times 8 dw 512 -pw_1024: times 8 dw 1024 - -SECTION .text - -;----------------------------------------------------------------------------- -; vertical line scaling -; -; void yuv2plane1_<output_size>_<opt>(const int16_t *src, uint8_t *dst, int dstW, -; const uint8_t *dither, int offset) -; and -; void yuv2planeX_<output_size>_<opt>(const int16_t *filter, int filterSize, -; const int16_t **src, uint8_t *dst, int dstW, -; const uint8_t *dither, int offset) -; -; Scale one or $filterSize lines of source data to generate one line of output -; data. The input is 15-bit in int16_t if $output_size is [8,10] and 19-bit in -; int32_t if $output_size is 16. $filter is 12-bits. $filterSize is a multiple -; of 2. $offset is either 0 or 3. $dither holds 8 values. -;----------------------------------------------------------------------------- - -%macro yuv2planeX_fn 3 - -%if ARCH_X86_32 -%define cntr_reg fltsizeq -%define movsx mov -%else -%define cntr_reg r7 -%define movsx movsxd -%endif - -cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset -%if %1 == 8 || %1 == 9 || %1 == 10 - pxor m6, m6 -%endif ; %1 == 8/9/10 - -%if %1 == 8 -%if ARCH_X86_32 -%assign pad 0x2c - (stack_offset & 15) - SUB rsp, pad -%define m_dith m7 -%else ; x86-64 -%define m_dith m9 -%endif ; x86-32 - - ; create registers holding dither - movq m_dith, [ditherq] ; dither - test offsetd, offsetd - jz .no_rot -%if mmsize == 16 - punpcklqdq m_dith, m_dith -%endif ; mmsize == 16 - PALIGNR m_dith, m_dith, 3, m0 -.no_rot: -%if mmsize == 16 - punpcklbw m_dith, m6 -%if ARCH_X86_64 - punpcklwd m8, m_dith, m6 - pslld m8, 12 -%else ; x86-32 - punpcklwd m5, m_dith, m6 - pslld m5, 12 -%endif ; x86-32/64 - punpckhwd m_dith, m6 - pslld m_dith, 12 -%if ARCH_X86_32 - mova [rsp+ 0], m5 - mova [rsp+16], m_dith -%endif -%else ; mmsize == 8 - punpcklbw m5, m_dith, m6 - punpckhbw m_dith, m6 - punpcklwd m4, m5, m6 - punpckhwd m5, m6 - punpcklwd m3, m_dith, m6 - punpckhwd m_dith, m6 - pslld m4, 12 - pslld m5, 12 - pslld m3, 12 - pslld m_dith, 12 - mova [rsp+ 0], m4 - mova [rsp+ 8], m5 - mova [rsp+16], m3 - mova [rsp+24], m_dith -%endif ; mmsize == 8/16 -%endif ; %1 == 8 - - xor r5, r5 - -.pixelloop: -%assign %%i 0 - ; the rep here is for the 8bit output mmx case, where dither covers - ; 8 pixels but we can only handle 2 pixels per register, and thus 4 - ; pixels per iteration. In order to not have to keep track of where - ; we are w.r.t. dithering, we unroll the mmx/8bit loop x2. -%if %1 == 8 -%assign %%repcnt 16/mmsize -%else -%assign %%repcnt 1 -%endif - -%rep %%repcnt - -%if %1 == 8 -%if ARCH_X86_32 - mova m2, [rsp+mmsize*(0+%%i)] - mova m1, [rsp+mmsize*(1+%%i)] -%else ; x86-64 - mova m2, m8 - mova m1, m_dith -%endif ; x86-32/64 -%else ; %1 == 9/10/16 - mova m1, [yuv2yuvX_%1_start] - mova m2, m1 -%endif ; %1 == 8/9/10/16 - movsx cntr_reg, fltsizem -.filterloop_ %+ %%i: - ; input pixels - mov r6, [srcq+gprsize*cntr_reg-2*gprsize] -%if %1 == 16 - mova m3, [r6+r5*4] - mova m5, [r6+r5*4+mmsize] -%else ; %1 == 8/9/10 - mova m3, [r6+r5*2] -%endif ; %1 == 8/9/10/16 - mov r6, [srcq+gprsize*cntr_reg-gprsize] -%if %1 == 16 - mova m4, [r6+r5*4] - mova m6, [r6+r5*4+mmsize] -%else ; %1 == 8/9/10 - mova m4, [r6+r5*2] -%endif ; %1 == 8/9/10/16 - - ; coefficients - movd m0, [filterq+2*cntr_reg-4] ; coeff[0], coeff[1] -%if %1 == 16 - pshuflw m7, m0, 0 ; coeff[0] - pshuflw m0, m0, 0x55 ; coeff[1] - pmovsxwd m7, m7 ; word -> dword - pmovsxwd m0, m0 ; word -> dword - - pmulld m3, m7 - pmulld m5, m7 - pmulld m4, m0 - pmulld m6, m0 - - paddd m2, m3 - paddd m1, m5 - paddd m2, m4 - paddd m1, m6 -%else ; %1 == 10/9/8 - punpcklwd m5, m3, m4 - punpckhwd m3, m4 - SPLATD m0 - - pmaddwd m5, m0 - pmaddwd m3, m0 - - paddd m2, m5 - paddd m1, m3 -%endif ; %1 == 8/9/10/16 - - sub cntr_reg, 2 - jg .filterloop_ %+ %%i - -%if %1 == 16 - psrad m2, 31 - %1 - psrad m1, 31 - %1 -%else ; %1 == 10/9/8 - psrad m2, 27 - %1 - psrad m1, 27 - %1 -%endif ; %1 == 8/9/10/16 - -%if %1 == 8 - packssdw m2, m1 - packuswb m2, m2 - movh [dstq+r5*1], m2 -%else ; %1 == 9/10/16 -%if %1 == 16 - packssdw m2, m1 - paddw m2, [minshort] -%else ; %1 == 9/10 -%if cpuflag(sse4) - packusdw m2, m1 -%else ; mmxext/sse2 - packssdw m2, m1 - pmaxsw m2, m6 -%endif ; mmxext/sse2/sse4/avx - pminsw m2, [yuv2yuvX_%1_upper] -%endif ; %1 == 9/10/16 - mova [dstq+r5*2], m2 -%endif ; %1 == 8/9/10/16 - - add r5, mmsize/2 - sub wd, mmsize/2 - -%assign %%i %%i+2 -%endrep - jg .pixelloop - -%if %1 == 8 -%if ARCH_X86_32 - ADD rsp, pad - RET -%else ; x86-64 - REP_RET -%endif ; x86-32/64 -%else ; %1 == 9/10/16 - REP_RET -%endif ; %1 == 8/9/10/16 -%endmacro - -%if ARCH_X86_32 -INIT_MMX mmxext -yuv2planeX_fn 8, 0, 7 -yuv2planeX_fn 9, 0, 5 -yuv2planeX_fn 10, 0, 5 -%endif - -INIT_XMM sse2 -yuv2planeX_fn 8, 10, 7 -yuv2planeX_fn 9, 7, 5 -yuv2planeX_fn 10, 7, 5 - -INIT_XMM sse4 -yuv2planeX_fn 8, 10, 7 -yuv2planeX_fn 9, 7, 5 -yuv2planeX_fn 10, 7, 5 -yuv2planeX_fn 16, 8, 5 - -%if HAVE_AVX_EXTERNAL -INIT_XMM avx -yuv2planeX_fn 8, 10, 7 -yuv2planeX_fn 9, 7, 5 -yuv2planeX_fn 10, 7, 5 -%endif - -; %1=outout-bpc, %2=alignment (u/a) -%macro yuv2plane1_mainloop 2 -.loop_%2: -%if %1 == 8 - paddsw m0, m2, [srcq+wq*2+mmsize*0] - paddsw m1, m3, [srcq+wq*2+mmsize*1] - psraw m0, 7 - psraw m1, 7 - packuswb m0, m1 - mov%2 [dstq+wq], m0 -%elif %1 == 16 - paddd m0, m4, [srcq+wq*4+mmsize*0] - paddd m1, m4, [srcq+wq*4+mmsize*1] - paddd m2, m4, [srcq+wq*4+mmsize*2] - paddd m3, m4, [srcq+wq*4+mmsize*3] - psrad m0, 3 - psrad m1, 3 - psrad m2, 3 - psrad m3, 3 -%if cpuflag(sse4) ; avx/sse4 - packusdw m0, m1 - packusdw m2, m3 -%else ; mmx/sse2 - packssdw m0, m1 - packssdw m2, m3 - paddw m0, m5 - paddw m2, m5 -%endif ; mmx/sse2/sse4/avx - mov%2 [dstq+wq*2+mmsize*0], m0 - mov%2 [dstq+wq*2+mmsize*1], m2 -%else ; %1 == 9/10 - paddsw m0, m2, [srcq+wq*2+mmsize*0] - paddsw m1, m2, [srcq+wq*2+mmsize*1] - psraw m0, 15 - %1 - psraw m1, 15 - %1 - pmaxsw m0, m4 - pmaxsw m1, m4 - pminsw m0, m3 - pminsw m1, m3 - mov%2 [dstq+wq*2+mmsize*0], m0 - mov%2 [dstq+wq*2+mmsize*1], m1 -%endif - add wq, mmsize - jl .loop_%2 -%endmacro - -%macro yuv2plane1_fn 3 -cglobal yuv2plane1_%1, %3, %3, %2, src, dst, w, dither, offset - movsxdifnidn wq, wd - add wq, mmsize - 1 - and wq, ~(mmsize - 1) -%if %1 == 8 - add dstq, wq -%else ; %1 != 8 - lea dstq, [dstq+wq*2] -%endif ; %1 == 8 -%if %1 == 16 - lea srcq, [srcq+wq*4] -%else ; %1 != 16 - lea srcq, [srcq+wq*2] -%endif ; %1 == 16 - neg wq - -%if %1 == 8 - pxor m4, m4 ; zero - - ; create registers holding dither - movq m3, [ditherq] ; dither - test offsetd, offsetd - jz .no_rot -%if mmsize == 16 - punpcklqdq m3, m3 -%endif ; mmsize == 16 - PALIGNR m3, m3, 3, m2 -.no_rot: -%if mmsize == 8 - mova m2, m3 - punpckhbw m3, m4 ; byte->word - punpcklbw m2, m4 ; byte->word -%else - punpcklbw m3, m4 - mova m2, m3 -%endif -%elif %1 == 9 - pxor m4, m4 - mova m3, [pw_512] - mova m2, [pw_32] -%elif %1 == 10 - pxor m4, m4 - mova m3, [pw_1024] - mova m2, [pw_16] -%else ; %1 == 16 -%if cpuflag(sse4) ; sse4/avx - mova m4, [pd_4] -%else ; mmx/sse2 - mova m4, [pd_4min0x40000] - mova m5, [minshort] -%endif ; mmx/sse2/sse4/avx -%endif ; %1 == .. - - ; actual pixel scaling -%if mmsize == 8 - yuv2plane1_mainloop %1, a -%else ; mmsize == 16 - test dstq, 15 - jnz .unaligned - yuv2plane1_mainloop %1, a - REP_RET -.unaligned: - yuv2plane1_mainloop %1, u -%endif ; mmsize == 8/16 - REP_RET -%endmacro - -%if ARCH_X86_32 -INIT_MMX mmx -yuv2plane1_fn 8, 0, 5 -yuv2plane1_fn 16, 0, 3 - -INIT_MMX mmxext -yuv2plane1_fn 9, 0, 3 -yuv2plane1_fn 10, 0, 3 -%endif - -INIT_XMM sse2 -yuv2plane1_fn 8, 5, 5 -yuv2plane1_fn 9, 5, 3 -yuv2plane1_fn 10, 5, 3 -yuv2plane1_fn 16, 6, 3 - -INIT_XMM sse4 -yuv2plane1_fn 16, 5, 3 - -%if HAVE_AVX_EXTERNAL -INIT_XMM avx -yuv2plane1_fn 8, 5, 5 -yuv2plane1_fn 9, 5, 3 -yuv2plane1_fn 10, 5, 3 -yuv2plane1_fn 16, 5, 3 -%endif diff --git a/ffmpeg/libswscale/x86/rgb2rgb.c b/ffmpeg/libswscale/x86/rgb2rgb.c deleted file mode 100644 index 8cc99c6..0000000 --- a/ffmpeg/libswscale/x86/rgb2rgb.c +++ /dev/null @@ -1,160 +0,0 @@ -/* - * software RGB to RGB converter - * pluralize by software PAL8 to RGB converter - * software YUV to YUV converter - * software YUV to RGB converter - * Written by Nick Kurshev. - * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <stdint.h> - -#include "config.h" -#include "libavutil/attributes.h" -#include "libavutil/x86/asm.h" -#include "libavutil/x86/cpu.h" -#include "libavutil/cpu.h" -#include "libavutil/bswap.h" -#include "libswscale/rgb2rgb.h" -#include "libswscale/swscale.h" -#include "libswscale/swscale_internal.h" - -#if HAVE_INLINE_ASM - -DECLARE_ASM_CONST(8, uint64_t, mmx_ff) = 0x00000000000000FFULL; -DECLARE_ASM_CONST(8, uint64_t, mmx_null) = 0x0000000000000000ULL; -DECLARE_ASM_CONST(8, uint64_t, mmx_one) = 0xFFFFFFFFFFFFFFFFULL; -DECLARE_ASM_CONST(8, uint64_t, mask32b) = 0x000000FF000000FFULL; -DECLARE_ASM_CONST(8, uint64_t, mask32g) = 0x0000FF000000FF00ULL; -DECLARE_ASM_CONST(8, uint64_t, mask32r) = 0x00FF000000FF0000ULL; -DECLARE_ASM_CONST(8, uint64_t, mask32a) = 0xFF000000FF000000ULL; -DECLARE_ASM_CONST(8, uint64_t, mask32) = 0x00FFFFFF00FFFFFFULL; -DECLARE_ASM_CONST(8, uint64_t, mask3216br) = 0x00F800F800F800F8ULL; -DECLARE_ASM_CONST(8, uint64_t, mask3216g) = 0x0000FC000000FC00ULL; -DECLARE_ASM_CONST(8, uint64_t, mask3215g) = 0x0000F8000000F800ULL; -DECLARE_ASM_CONST(8, uint64_t, mul3216) = 0x2000000420000004ULL; -DECLARE_ASM_CONST(8, uint64_t, mul3215) = 0x2000000820000008ULL; -DECLARE_ASM_CONST(8, uint64_t, mask24b) = 0x00FF0000FF0000FFULL; -DECLARE_ASM_CONST(8, uint64_t, mask24g) = 0xFF0000FF0000FF00ULL; -DECLARE_ASM_CONST(8, uint64_t, mask24r) = 0x0000FF0000FF0000ULL; -DECLARE_ASM_CONST(8, uint64_t, mask24l) = 0x0000000000FFFFFFULL; -DECLARE_ASM_CONST(8, uint64_t, mask24h) = 0x0000FFFFFF000000ULL; -DECLARE_ASM_CONST(8, uint64_t, mask24hh) = 0xffff000000000000ULL; -DECLARE_ASM_CONST(8, uint64_t, mask24hhh) = 0xffffffff00000000ULL; -DECLARE_ASM_CONST(8, uint64_t, mask24hhhh) = 0xffffffffffff0000ULL; -DECLARE_ASM_CONST(8, uint64_t, mask15b) = 0x001F001F001F001FULL; /* 00000000 00011111 xxB */ -DECLARE_ASM_CONST(8, uint64_t, mask15rg) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000 RGx */ -DECLARE_ASM_CONST(8, uint64_t, mask15s) = 0xFFE0FFE0FFE0FFE0ULL; -DECLARE_ASM_CONST(8, uint64_t, mask15g) = 0x03E003E003E003E0ULL; -DECLARE_ASM_CONST(8, uint64_t, mask15r) = 0x7C007C007C007C00ULL; -#define mask16b mask15b -DECLARE_ASM_CONST(8, uint64_t, mask16g) = 0x07E007E007E007E0ULL; -DECLARE_ASM_CONST(8, uint64_t, mask16r) = 0xF800F800F800F800ULL; -DECLARE_ASM_CONST(8, uint64_t, red_16mask) = 0x0000f8000000f800ULL; -DECLARE_ASM_CONST(8, uint64_t, green_16mask) = 0x000007e0000007e0ULL; -DECLARE_ASM_CONST(8, uint64_t, blue_16mask) = 0x0000001f0000001fULL; -DECLARE_ASM_CONST(8, uint64_t, red_15mask) = 0x00007c0000007c00ULL; -DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL; -DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; -DECLARE_ASM_CONST(8, uint64_t, mul15_mid) = 0x4200420042004200ULL; -DECLARE_ASM_CONST(8, uint64_t, mul15_hi) = 0x0210021002100210ULL; -DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL; - -#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5)) -#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5)) -#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5)) -#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5)) -#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5)) -#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5)) -#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5)) -#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5)) -#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5)) - -// Note: We have C, MMX, MMXEXT, 3DNOW versions, there is no 3DNOW + MMXEXT one. - -#define COMPILE_TEMPLATE_MMXEXT 0 -#define COMPILE_TEMPLATE_AMD3DNOW 0 -#define COMPILE_TEMPLATE_SSE2 0 -#define COMPILE_TEMPLATE_AVX 0 - -//MMX versions -#undef RENAME -#define RENAME(a) a ## _mmx -#include "rgb2rgb_template.c" - -// MMXEXT versions -#undef RENAME -#undef COMPILE_TEMPLATE_MMXEXT -#define COMPILE_TEMPLATE_MMXEXT 1 -#define RENAME(a) a ## _mmxext -#include "rgb2rgb_template.c" - -//SSE2 versions -#undef RENAME -#undef COMPILE_TEMPLATE_SSE2 -#define COMPILE_TEMPLATE_SSE2 1 -#define RENAME(a) a ## _sse2 -#include "rgb2rgb_template.c" - -//AVX versions -#undef RENAME -#undef COMPILE_TEMPLATE_AVX -#define COMPILE_TEMPLATE_AVX 1 -#define RENAME(a) a ## _avx -#include "rgb2rgb_template.c" - -//3DNOW versions -#undef RENAME -#undef COMPILE_TEMPLATE_MMXEXT -#undef COMPILE_TEMPLATE_SSE2 -#undef COMPILE_TEMPLATE_AVX -#undef COMPILE_TEMPLATE_AMD3DNOW -#define COMPILE_TEMPLATE_MMXEXT 0 -#define COMPILE_TEMPLATE_SSE2 0 -#define COMPILE_TEMPLATE_AVX 0 -#define COMPILE_TEMPLATE_AMD3DNOW 1 -#define RENAME(a) a ## _3dnow -#include "rgb2rgb_template.c" - -/* - RGB15->RGB16 original by Strepto/Astral - ported to gcc & bugfixed : A'rpi - MMXEXT, 3DNOW optimization by Nick Kurshev - 32-bit C version, and and&add trick by Michael Niedermayer -*/ - -#endif /* HAVE_INLINE_ASM */ - -av_cold void rgb2rgb_init_x86(void) -{ -#if HAVE_INLINE_ASM - int cpu_flags = av_get_cpu_flags(); - - if (INLINE_MMX(cpu_flags)) - rgb2rgb_init_mmx(); - if (INLINE_AMD3DNOW(cpu_flags)) - rgb2rgb_init_3dnow(); - if (INLINE_MMXEXT(cpu_flags)) - rgb2rgb_init_mmxext(); - if (INLINE_SSE2(cpu_flags)) - rgb2rgb_init_sse2(); - if (INLINE_AVX(cpu_flags)) - rgb2rgb_init_avx(); -#endif /* HAVE_INLINE_ASM */ -} diff --git a/ffmpeg/libswscale/x86/rgb2rgb_template.c b/ffmpeg/libswscale/x86/rgb2rgb_template.c deleted file mode 100644 index d58219b..0000000 --- a/ffmpeg/libswscale/x86/rgb2rgb_template.c +++ /dev/null @@ -1,2533 +0,0 @@ -/* - * software RGB to RGB converter - * pluralize by software PAL8 to RGB converter - * software YUV to YUV converter - * software YUV to RGB converter - * Written by Nick Kurshev. - * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) - * lot of big-endian byte order fixes by Alex Beregszaszi - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <stddef.h> - -#include "libavutil/attributes.h" - -#undef PREFETCH -#undef MOVNTQ -#undef EMMS -#undef SFENCE -#undef PAVGB - -#if COMPILE_TEMPLATE_AMD3DNOW -#define PREFETCH "prefetch" -#define PAVGB "pavgusb" -#elif COMPILE_TEMPLATE_MMXEXT -#define PREFETCH "prefetchnta" -#define PAVGB "pavgb" -#else -#define PREFETCH " # nop" -#endif - -#if COMPILE_TEMPLATE_AMD3DNOW -/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */ -#define EMMS "femms" -#else -#define EMMS "emms" -#endif - -#if COMPILE_TEMPLATE_MMXEXT -#define MOVNTQ "movntq" -#define SFENCE "sfence" -#else -#define MOVNTQ "movq" -#define SFENCE " # nop" -#endif - -#if !COMPILE_TEMPLATE_SSE2 - -#if !COMPILE_TEMPLATE_AMD3DNOW - -static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size) -{ - uint8_t *dest = dst; - const uint8_t *s = src; - const uint8_t *end; - const uint8_t *mm_end; - end = s + src_size; - __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); - mm_end = end - 23; - __asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory"); - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32(%1) \n\t" - "movd (%1), %%mm0 \n\t" - "punpckldq 3(%1), %%mm0 \n\t" - "movd 6(%1), %%mm1 \n\t" - "punpckldq 9(%1), %%mm1 \n\t" - "movd 12(%1), %%mm2 \n\t" - "punpckldq 15(%1), %%mm2 \n\t" - "movd 18(%1), %%mm3 \n\t" - "punpckldq 21(%1), %%mm3 \n\t" - "por %%mm7, %%mm0 \n\t" - "por %%mm7, %%mm1 \n\t" - "por %%mm7, %%mm2 \n\t" - "por %%mm7, %%mm3 \n\t" - MOVNTQ" %%mm0, (%0) \n\t" - MOVNTQ" %%mm1, 8(%0) \n\t" - MOVNTQ" %%mm2, 16(%0) \n\t" - MOVNTQ" %%mm3, 24(%0)" - :: "r"(dest), "r"(s) - :"memory"); - dest += 32; - s += 24; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); - while (s < end) { - *dest++ = *s++; - *dest++ = *s++; - *dest++ = *s++; - *dest++ = 255; - } -} - -#define STORE_BGR24_MMX \ - "psrlq $8, %%mm2 \n\t" \ - "psrlq $8, %%mm3 \n\t" \ - "psrlq $8, %%mm6 \n\t" \ - "psrlq $8, %%mm7 \n\t" \ - "pand "MANGLE(mask24l)", %%mm0\n\t" \ - "pand "MANGLE(mask24l)", %%mm1\n\t" \ - "pand "MANGLE(mask24l)", %%mm4\n\t" \ - "pand "MANGLE(mask24l)", %%mm5\n\t" \ - "pand "MANGLE(mask24h)", %%mm2\n\t" \ - "pand "MANGLE(mask24h)", %%mm3\n\t" \ - "pand "MANGLE(mask24h)", %%mm6\n\t" \ - "pand "MANGLE(mask24h)", %%mm7\n\t" \ - "por %%mm2, %%mm0 \n\t" \ - "por %%mm3, %%mm1 \n\t" \ - "por %%mm6, %%mm4 \n\t" \ - "por %%mm7, %%mm5 \n\t" \ - \ - "movq %%mm1, %%mm2 \n\t" \ - "movq %%mm4, %%mm3 \n\t" \ - "psllq $48, %%mm2 \n\t" \ - "psllq $32, %%mm3 \n\t" \ - "por %%mm2, %%mm0 \n\t" \ - "psrlq $16, %%mm1 \n\t" \ - "psrlq $32, %%mm4 \n\t" \ - "psllq $16, %%mm5 \n\t" \ - "por %%mm3, %%mm1 \n\t" \ - "por %%mm5, %%mm4 \n\t" \ - \ - MOVNTQ" %%mm0, (%0) \n\t" \ - MOVNTQ" %%mm1, 8(%0) \n\t" \ - MOVNTQ" %%mm4, 16(%0)" - - -static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size) -{ - uint8_t *dest = dst; - const uint8_t *s = src; - const uint8_t *end; - const uint8_t *mm_end; - end = s + src_size; - __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); - mm_end = end - 31; - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32(%1) \n\t" - "movq (%1), %%mm0 \n\t" - "movq 8(%1), %%mm1 \n\t" - "movq 16(%1), %%mm4 \n\t" - "movq 24(%1), %%mm5 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm1, %%mm3 \n\t" - "movq %%mm4, %%mm6 \n\t" - "movq %%mm5, %%mm7 \n\t" - STORE_BGR24_MMX - :: "r"(dest), "r"(s) - :"memory"); - dest += 24; - s += 32; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); - while (s < end) { - *dest++ = *s++; - *dest++ = *s++; - *dest++ = *s++; - s++; - } -} - -/* - original by Strepto/Astral - ported to gcc & bugfixed: A'rpi - MMXEXT, 3DNOW optimization by Nick Kurshev - 32-bit C version, and and&add trick by Michael Niedermayer -*/ -static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size) -{ - register const uint8_t* s=src; - register uint8_t* d=dst; - register const uint8_t *end; - const uint8_t *mm_end; - end = s + src_size; - __asm__ volatile(PREFETCH" %0"::"m"(*s)); - __asm__ volatile("movq %0, %%mm4"::"m"(mask15s)); - mm_end = end - 15; - while (s<mm_end) { - __asm__ volatile( - PREFETCH" 32(%1) \n\t" - "movq (%1), %%mm0 \n\t" - "movq 8(%1), %%mm2 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "pand %%mm4, %%mm0 \n\t" - "pand %%mm4, %%mm2 \n\t" - "paddw %%mm1, %%mm0 \n\t" - "paddw %%mm3, %%mm2 \n\t" - MOVNTQ" %%mm0, (%0) \n\t" - MOVNTQ" %%mm2, 8(%0)" - :: "r"(d), "r"(s) - ); - d+=16; - s+=16; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); - mm_end = end - 3; - while (s < mm_end) { - register unsigned x= *((const uint32_t *)s); - *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0); - d+=4; - s+=4; - } - if (s < end) { - register unsigned short x= *((const uint16_t *)s); - *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0); - } -} - -static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size) -{ - register const uint8_t* s=src; - register uint8_t* d=dst; - register const uint8_t *end; - const uint8_t *mm_end; - end = s + src_size; - __asm__ volatile(PREFETCH" %0"::"m"(*s)); - __asm__ volatile("movq %0, %%mm7"::"m"(mask15rg)); - __asm__ volatile("movq %0, %%mm6"::"m"(mask15b)); - mm_end = end - 15; - while (s<mm_end) { - __asm__ volatile( - PREFETCH" 32(%1) \n\t" - "movq (%1), %%mm0 \n\t" - "movq 8(%1), %%mm2 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "psrlq $1, %%mm0 \n\t" - "psrlq $1, %%mm2 \n\t" - "pand %%mm7, %%mm0 \n\t" - "pand %%mm7, %%mm2 \n\t" - "pand %%mm6, %%mm1 \n\t" - "pand %%mm6, %%mm3 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm3, %%mm2 \n\t" - MOVNTQ" %%mm0, (%0) \n\t" - MOVNTQ" %%mm2, 8(%0)" - :: "r"(d), "r"(s) - ); - d+=16; - s+=16; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); - mm_end = end - 3; - while (s < mm_end) { - register uint32_t x= *((const uint32_t*)s); - *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F); - s+=4; - d+=4; - } - if (s < end) { - register uint16_t x= *((const uint16_t*)s); - *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F); - } -} - -static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size) -{ - const uint8_t *s = src; - const uint8_t *end; - const uint8_t *mm_end; - uint16_t *d = (uint16_t *)dst; - end = s + src_size; - mm_end = end - 15; - __asm__ volatile( - "movq %3, %%mm5 \n\t" - "movq %4, %%mm6 \n\t" - "movq %5, %%mm7 \n\t" - "jmp 2f \n\t" - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 32(%1) \n\t" - "movd (%1), %%mm0 \n\t" - "movd 4(%1), %%mm3 \n\t" - "punpckldq 8(%1), %%mm0 \n\t" - "punpckldq 12(%1), %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm3, %%mm4 \n\t" - "pand %%mm6, %%mm0 \n\t" - "pand %%mm6, %%mm3 \n\t" - "pmaddwd %%mm7, %%mm0 \n\t" - "pmaddwd %%mm7, %%mm3 \n\t" - "pand %%mm5, %%mm1 \n\t" - "pand %%mm5, %%mm4 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "psrld $5, %%mm0 \n\t" - "pslld $11, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, (%0) \n\t" - "add $16, %1 \n\t" - "add $8, %0 \n\t" - "2: \n\t" - "cmp %2, %1 \n\t" - " jb 1b \n\t" - : "+r" (d), "+r"(s) - : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216) - ); - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); - while (s < end) { - register int rgb = *(const uint32_t*)s; s += 4; - *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8); - } -} - -static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size) -{ - const uint8_t *s = src; - const uint8_t *end; - const uint8_t *mm_end; - uint16_t *d = (uint16_t *)dst; - end = s + src_size; - __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm__ volatile( - "movq %0, %%mm7 \n\t" - "movq %1, %%mm6 \n\t" - ::"m"(red_16mask),"m"(green_16mask)); - mm_end = end - 15; - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32(%1) \n\t" - "movd (%1), %%mm0 \n\t" - "movd 4(%1), %%mm3 \n\t" - "punpckldq 8(%1), %%mm0 \n\t" - "punpckldq 12(%1), %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm3, %%mm4 \n\t" - "movq %%mm3, %%mm5 \n\t" - "psllq $8, %%mm0 \n\t" - "psllq $8, %%mm3 \n\t" - "pand %%mm7, %%mm0 \n\t" - "pand %%mm7, %%mm3 \n\t" - "psrlq $5, %%mm1 \n\t" - "psrlq $5, %%mm4 \n\t" - "pand %%mm6, %%mm1 \n\t" - "pand %%mm6, %%mm4 \n\t" - "psrlq $19, %%mm2 \n\t" - "psrlq $19, %%mm5 \n\t" - "pand %2, %%mm2 \n\t" - "pand %2, %%mm5 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm2, %%mm0 \n\t" - "por %%mm5, %%mm3 \n\t" - "psllq $16, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, (%0) \n\t" - :: "r"(d),"r"(s),"m"(blue_16mask):"memory"); - d += 4; - s += 16; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); - while (s < end) { - register int rgb = *(const uint32_t*)s; s += 4; - *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19); - } -} - -static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size) -{ - const uint8_t *s = src; - const uint8_t *end; - const uint8_t *mm_end; - uint16_t *d = (uint16_t *)dst; - end = s + src_size; - mm_end = end - 15; - __asm__ volatile( - "movq %3, %%mm5 \n\t" - "movq %4, %%mm6 \n\t" - "movq %5, %%mm7 \n\t" - "jmp 2f \n\t" - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 32(%1) \n\t" - "movd (%1), %%mm0 \n\t" - "movd 4(%1), %%mm3 \n\t" - "punpckldq 8(%1), %%mm0 \n\t" - "punpckldq 12(%1), %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm3, %%mm4 \n\t" - "pand %%mm6, %%mm0 \n\t" - "pand %%mm6, %%mm3 \n\t" - "pmaddwd %%mm7, %%mm0 \n\t" - "pmaddwd %%mm7, %%mm3 \n\t" - "pand %%mm5, %%mm1 \n\t" - "pand %%mm5, %%mm4 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "psrld $6, %%mm0 \n\t" - "pslld $10, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, (%0) \n\t" - "add $16, %1 \n\t" - "add $8, %0 \n\t" - "2: \n\t" - "cmp %2, %1 \n\t" - " jb 1b \n\t" - : "+r" (d), "+r"(s) - : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215) - ); - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); - while (s < end) { - register int rgb = *(const uint32_t*)s; s += 4; - *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9); - } -} - -static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size) -{ - const uint8_t *s = src; - const uint8_t *end; - const uint8_t *mm_end; - uint16_t *d = (uint16_t *)dst; - end = s + src_size; - __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm__ volatile( - "movq %0, %%mm7 \n\t" - "movq %1, %%mm6 \n\t" - ::"m"(red_15mask),"m"(green_15mask)); - mm_end = end - 15; - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32(%1) \n\t" - "movd (%1), %%mm0 \n\t" - "movd 4(%1), %%mm3 \n\t" - "punpckldq 8(%1), %%mm0 \n\t" - "punpckldq 12(%1), %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm3, %%mm4 \n\t" - "movq %%mm3, %%mm5 \n\t" - "psllq $7, %%mm0 \n\t" - "psllq $7, %%mm3 \n\t" - "pand %%mm7, %%mm0 \n\t" - "pand %%mm7, %%mm3 \n\t" - "psrlq $6, %%mm1 \n\t" - "psrlq $6, %%mm4 \n\t" - "pand %%mm6, %%mm1 \n\t" - "pand %%mm6, %%mm4 \n\t" - "psrlq $19, %%mm2 \n\t" - "psrlq $19, %%mm5 \n\t" - "pand %2, %%mm2 \n\t" - "pand %2, %%mm5 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm2, %%mm0 \n\t" - "por %%mm5, %%mm3 \n\t" - "psllq $16, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, (%0) \n\t" - ::"r"(d),"r"(s),"m"(blue_15mask):"memory"); - d += 4; - s += 16; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); - while (s < end) { - register int rgb = *(const uint32_t*)s; s += 4; - *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19); - } -} - -static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size) -{ - const uint8_t *s = src; - const uint8_t *end; - const uint8_t *mm_end; - uint16_t *d = (uint16_t *)dst; - end = s + src_size; - __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm__ volatile( - "movq %0, %%mm7 \n\t" - "movq %1, %%mm6 \n\t" - ::"m"(red_16mask),"m"(green_16mask)); - mm_end = end - 11; - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32(%1) \n\t" - "movd (%1), %%mm0 \n\t" - "movd 3(%1), %%mm3 \n\t" - "punpckldq 6(%1), %%mm0 \n\t" - "punpckldq 9(%1), %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm3, %%mm4 \n\t" - "movq %%mm3, %%mm5 \n\t" - "psrlq $3, %%mm0 \n\t" - "psrlq $3, %%mm3 \n\t" - "pand %2, %%mm0 \n\t" - "pand %2, %%mm3 \n\t" - "psrlq $5, %%mm1 \n\t" - "psrlq $5, %%mm4 \n\t" - "pand %%mm6, %%mm1 \n\t" - "pand %%mm6, %%mm4 \n\t" - "psrlq $8, %%mm2 \n\t" - "psrlq $8, %%mm5 \n\t" - "pand %%mm7, %%mm2 \n\t" - "pand %%mm7, %%mm5 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm2, %%mm0 \n\t" - "por %%mm5, %%mm3 \n\t" - "psllq $16, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, (%0) \n\t" - ::"r"(d),"r"(s),"m"(blue_16mask):"memory"); - d += 4; - s += 12; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); - while (s < end) { - const int b = *s++; - const int g = *s++; - const int r = *s++; - *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); - } -} - -static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size) -{ - const uint8_t *s = src; - const uint8_t *end; - const uint8_t *mm_end; - uint16_t *d = (uint16_t *)dst; - end = s + src_size; - __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm__ volatile( - "movq %0, %%mm7 \n\t" - "movq %1, %%mm6 \n\t" - ::"m"(red_16mask),"m"(green_16mask)); - mm_end = end - 15; - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32(%1) \n\t" - "movd (%1), %%mm0 \n\t" - "movd 3(%1), %%mm3 \n\t" - "punpckldq 6(%1), %%mm0 \n\t" - "punpckldq 9(%1), %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm3, %%mm4 \n\t" - "movq %%mm3, %%mm5 \n\t" - "psllq $8, %%mm0 \n\t" - "psllq $8, %%mm3 \n\t" - "pand %%mm7, %%mm0 \n\t" - "pand %%mm7, %%mm3 \n\t" - "psrlq $5, %%mm1 \n\t" - "psrlq $5, %%mm4 \n\t" - "pand %%mm6, %%mm1 \n\t" - "pand %%mm6, %%mm4 \n\t" - "psrlq $19, %%mm2 \n\t" - "psrlq $19, %%mm5 \n\t" - "pand %2, %%mm2 \n\t" - "pand %2, %%mm5 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm2, %%mm0 \n\t" - "por %%mm5, %%mm3 \n\t" - "psllq $16, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, (%0) \n\t" - ::"r"(d),"r"(s),"m"(blue_16mask):"memory"); - d += 4; - s += 12; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); - while (s < end) { - const int r = *s++; - const int g = *s++; - const int b = *s++; - *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); - } -} - -static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size) -{ - const uint8_t *s = src; - const uint8_t *end; - const uint8_t *mm_end; - uint16_t *d = (uint16_t *)dst; - end = s + src_size; - __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm__ volatile( - "movq %0, %%mm7 \n\t" - "movq %1, %%mm6 \n\t" - ::"m"(red_15mask),"m"(green_15mask)); - mm_end = end - 11; - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32(%1) \n\t" - "movd (%1), %%mm0 \n\t" - "movd 3(%1), %%mm3 \n\t" - "punpckldq 6(%1), %%mm0 \n\t" - "punpckldq 9(%1), %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm3, %%mm4 \n\t" - "movq %%mm3, %%mm5 \n\t" - "psrlq $3, %%mm0 \n\t" - "psrlq $3, %%mm3 \n\t" - "pand %2, %%mm0 \n\t" - "pand %2, %%mm3 \n\t" - "psrlq $6, %%mm1 \n\t" - "psrlq $6, %%mm4 \n\t" - "pand %%mm6, %%mm1 \n\t" - "pand %%mm6, %%mm4 \n\t" - "psrlq $9, %%mm2 \n\t" - "psrlq $9, %%mm5 \n\t" - "pand %%mm7, %%mm2 \n\t" - "pand %%mm7, %%mm5 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm2, %%mm0 \n\t" - "por %%mm5, %%mm3 \n\t" - "psllq $16, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, (%0) \n\t" - ::"r"(d),"r"(s),"m"(blue_15mask):"memory"); - d += 4; - s += 12; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); - while (s < end) { - const int b = *s++; - const int g = *s++; - const int r = *s++; - *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); - } -} - -static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size) -{ - const uint8_t *s = src; - const uint8_t *end; - const uint8_t *mm_end; - uint16_t *d = (uint16_t *)dst; - end = s + src_size; - __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm__ volatile( - "movq %0, %%mm7 \n\t" - "movq %1, %%mm6 \n\t" - ::"m"(red_15mask),"m"(green_15mask)); - mm_end = end - 15; - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32(%1) \n\t" - "movd (%1), %%mm0 \n\t" - "movd 3(%1), %%mm3 \n\t" - "punpckldq 6(%1), %%mm0 \n\t" - "punpckldq 9(%1), %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm3, %%mm4 \n\t" - "movq %%mm3, %%mm5 \n\t" - "psllq $7, %%mm0 \n\t" - "psllq $7, %%mm3 \n\t" - "pand %%mm7, %%mm0 \n\t" - "pand %%mm7, %%mm3 \n\t" - "psrlq $6, %%mm1 \n\t" - "psrlq $6, %%mm4 \n\t" - "pand %%mm6, %%mm1 \n\t" - "pand %%mm6, %%mm4 \n\t" - "psrlq $19, %%mm2 \n\t" - "psrlq $19, %%mm5 \n\t" - "pand %2, %%mm2 \n\t" - "pand %2, %%mm5 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm2, %%mm0 \n\t" - "por %%mm5, %%mm3 \n\t" - "psllq $16, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, (%0) \n\t" - ::"r"(d),"r"(s),"m"(blue_15mask):"memory"); - d += 4; - s += 12; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); - while (s < end) { - const int r = *s++; - const int g = *s++; - const int b = *s++; - *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); - } -} - -static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size) -{ - const uint16_t *end; - const uint16_t *mm_end; - uint8_t *d = dst; - const uint16_t *s = (const uint16_t*)src; - end = s + src_size/2; - __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); - mm_end = end - 7; - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32(%1) \n\t" - "movq (%1), %%mm0 \n\t" - "movq (%1), %%mm1 \n\t" - "movq (%1), %%mm2 \n\t" - "pand %2, %%mm0 \n\t" - "pand %3, %%mm1 \n\t" - "pand %4, %%mm2 \n\t" - "psllq $5, %%mm0 \n\t" - "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t" - "pmulhw "MANGLE(mul15_mid)", %%mm1 \n\t" - "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" - "movq %%mm0, %%mm3 \n\t" - "movq %%mm1, %%mm4 \n\t" - "movq %%mm2, %%mm5 \n\t" - "punpcklwd %5, %%mm0 \n\t" - "punpcklwd %5, %%mm1 \n\t" - "punpcklwd %5, %%mm2 \n\t" - "punpckhwd %5, %%mm3 \n\t" - "punpckhwd %5, %%mm4 \n\t" - "punpckhwd %5, %%mm5 \n\t" - "psllq $8, %%mm1 \n\t" - "psllq $16, %%mm2 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm2, %%mm0 \n\t" - "psllq $8, %%mm4 \n\t" - "psllq $16, %%mm5 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm5, %%mm3 \n\t" - - "movq %%mm0, %%mm6 \n\t" - "movq %%mm3, %%mm7 \n\t" - - "movq 8(%1), %%mm0 \n\t" - "movq 8(%1), %%mm1 \n\t" - "movq 8(%1), %%mm2 \n\t" - "pand %2, %%mm0 \n\t" - "pand %3, %%mm1 \n\t" - "pand %4, %%mm2 \n\t" - "psllq $5, %%mm0 \n\t" - "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t" - "pmulhw "MANGLE(mul15_mid)", %%mm1 \n\t" - "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" - "movq %%mm0, %%mm3 \n\t" - "movq %%mm1, %%mm4 \n\t" - "movq %%mm2, %%mm5 \n\t" - "punpcklwd %5, %%mm0 \n\t" - "punpcklwd %5, %%mm1 \n\t" - "punpcklwd %5, %%mm2 \n\t" - "punpckhwd %5, %%mm3 \n\t" - "punpckhwd %5, %%mm4 \n\t" - "punpckhwd %5, %%mm5 \n\t" - "psllq $8, %%mm1 \n\t" - "psllq $16, %%mm2 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm2, %%mm0 \n\t" - "psllq $8, %%mm4 \n\t" - "psllq $16, %%mm5 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm5, %%mm3 \n\t" - - :"=m"(*d) - :"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) - :"memory"); - /* borrowed 32 to 24 */ - __asm__ volatile( - "movq %%mm0, %%mm4 \n\t" - "movq %%mm3, %%mm5 \n\t" - "movq %%mm6, %%mm0 \n\t" - "movq %%mm7, %%mm1 \n\t" - - "movq %%mm4, %%mm6 \n\t" - "movq %%mm5, %%mm7 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm1, %%mm3 \n\t" - - STORE_BGR24_MMX - - :: "r"(d), "m"(*s) - :"memory"); - d += 24; - s += 8; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); - while (s < end) { - register uint16_t bgr; - bgr = *s++; - *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); - *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); - *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); - } -} - -static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size) -{ - const uint16_t *end; - const uint16_t *mm_end; - uint8_t *d = (uint8_t *)dst; - const uint16_t *s = (const uint16_t *)src; - end = s + src_size/2; - __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); - mm_end = end - 7; - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32(%1) \n\t" - "movq (%1), %%mm0 \n\t" - "movq (%1), %%mm1 \n\t" - "movq (%1), %%mm2 \n\t" - "pand %2, %%mm0 \n\t" - "pand %3, %%mm1 \n\t" - "pand %4, %%mm2 \n\t" - "psllq $5, %%mm0 \n\t" - "psrlq $1, %%mm2 \n\t" - "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t" - "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t" - "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" - "movq %%mm0, %%mm3 \n\t" - "movq %%mm1, %%mm4 \n\t" - "movq %%mm2, %%mm5 \n\t" - "punpcklwd %5, %%mm0 \n\t" - "punpcklwd %5, %%mm1 \n\t" - "punpcklwd %5, %%mm2 \n\t" - "punpckhwd %5, %%mm3 \n\t" - "punpckhwd %5, %%mm4 \n\t" - "punpckhwd %5, %%mm5 \n\t" - "psllq $8, %%mm1 \n\t" - "psllq $16, %%mm2 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm2, %%mm0 \n\t" - "psllq $8, %%mm4 \n\t" - "psllq $16, %%mm5 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm5, %%mm3 \n\t" - - "movq %%mm0, %%mm6 \n\t" - "movq %%mm3, %%mm7 \n\t" - - "movq 8(%1), %%mm0 \n\t" - "movq 8(%1), %%mm1 \n\t" - "movq 8(%1), %%mm2 \n\t" - "pand %2, %%mm0 \n\t" - "pand %3, %%mm1 \n\t" - "pand %4, %%mm2 \n\t" - "psllq $5, %%mm0 \n\t" - "psrlq $1, %%mm2 \n\t" - "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t" - "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t" - "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" - "movq %%mm0, %%mm3 \n\t" - "movq %%mm1, %%mm4 \n\t" - "movq %%mm2, %%mm5 \n\t" - "punpcklwd %5, %%mm0 \n\t" - "punpcklwd %5, %%mm1 \n\t" - "punpcklwd %5, %%mm2 \n\t" - "punpckhwd %5, %%mm3 \n\t" - "punpckhwd %5, %%mm4 \n\t" - "punpckhwd %5, %%mm5 \n\t" - "psllq $8, %%mm1 \n\t" - "psllq $16, %%mm2 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm2, %%mm0 \n\t" - "psllq $8, %%mm4 \n\t" - "psllq $16, %%mm5 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm5, %%mm3 \n\t" - :"=m"(*d) - :"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) - :"memory"); - /* borrowed 32 to 24 */ - __asm__ volatile( - "movq %%mm0, %%mm4 \n\t" - "movq %%mm3, %%mm5 \n\t" - "movq %%mm6, %%mm0 \n\t" - "movq %%mm7, %%mm1 \n\t" - - "movq %%mm4, %%mm6 \n\t" - "movq %%mm5, %%mm7 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm1, %%mm3 \n\t" - - STORE_BGR24_MMX - - :: "r"(d), "m"(*s) - :"memory"); - d += 24; - s += 8; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); - while (s < end) { - register uint16_t bgr; - bgr = *s++; - *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); - *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); - *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); - } -} - -/* - * mm0 = 00 B3 00 B2 00 B1 00 B0 - * mm1 = 00 G3 00 G2 00 G1 00 G0 - * mm2 = 00 R3 00 R2 00 R1 00 R0 - * mm6 = FF FF FF FF FF FF FF FF - * mm7 = 00 00 00 00 00 00 00 00 - */ -#define PACK_RGB32 \ - "packuswb %%mm7, %%mm0 \n\t" /* 00 00 00 00 B3 B2 B1 B0 */ \ - "packuswb %%mm7, %%mm1 \n\t" /* 00 00 00 00 G3 G2 G1 G0 */ \ - "packuswb %%mm7, %%mm2 \n\t" /* 00 00 00 00 R3 R2 R1 R0 */ \ - "punpcklbw %%mm1, %%mm0 \n\t" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \ - "punpcklbw %%mm6, %%mm2 \n\t" /* FF R3 FF R2 FF R1 FF R0 */ \ - "movq %%mm0, %%mm3 \n\t" \ - "punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \ - "punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \ - MOVNTQ" %%mm0, (%0) \n\t" \ - MOVNTQ" %%mm3, 8(%0) \n\t" \ - -static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size) -{ - const uint16_t *end; - const uint16_t *mm_end; - uint8_t *d = dst; - const uint16_t *s = (const uint16_t *)src; - end = s + src_size/2; - __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); - __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); - __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); - mm_end = end - 3; - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32(%1) \n\t" - "movq (%1), %%mm0 \n\t" - "movq (%1), %%mm1 \n\t" - "movq (%1), %%mm2 \n\t" - "pand %2, %%mm0 \n\t" - "pand %3, %%mm1 \n\t" - "pand %4, %%mm2 \n\t" - "psllq $5, %%mm0 \n\t" - "pmulhw %5, %%mm0 \n\t" - "pmulhw %5, %%mm1 \n\t" - "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" - PACK_RGB32 - ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) ,"m"(mul15_mid) - :"memory"); - d += 16; - s += 4; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); - while (s < end) { - register uint16_t bgr; - bgr = *s++; - *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); - *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); - *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); - *d++ = 255; - } -} - -static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size) -{ - const uint16_t *end; - const uint16_t *mm_end; - uint8_t *d = dst; - const uint16_t *s = (const uint16_t*)src; - end = s + src_size/2; - __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); - __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); - __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); - mm_end = end - 3; - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32(%1) \n\t" - "movq (%1), %%mm0 \n\t" - "movq (%1), %%mm1 \n\t" - "movq (%1), %%mm2 \n\t" - "pand %2, %%mm0 \n\t" - "pand %3, %%mm1 \n\t" - "pand %4, %%mm2 \n\t" - "psllq $5, %%mm0 \n\t" - "psrlq $1, %%mm2 \n\t" - "pmulhw %5, %%mm0 \n\t" - "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t" - "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" - PACK_RGB32 - ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid) - :"memory"); - d += 16; - s += 4; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); - while (s < end) { - register uint16_t bgr; - bgr = *s++; - *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); - *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); - *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); - *d++ = 255; - } -} - -static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size) -{ - x86_reg idx = 15 - src_size; - const uint8_t *s = src-idx; - uint8_t *d = dst-idx; - __asm__ volatile( - "test %0, %0 \n\t" - "jns 2f \n\t" - PREFETCH" (%1, %0) \n\t" - "movq %3, %%mm7 \n\t" - "pxor %4, %%mm7 \n\t" - "movq %%mm7, %%mm6 \n\t" - "pxor %5, %%mm7 \n\t" - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 32(%1, %0) \n\t" - "movq (%1, %0), %%mm0 \n\t" - "movq 8(%1, %0), %%mm1 \n\t" -# if COMPILE_TEMPLATE_MMXEXT - "pshufw $177, %%mm0, %%mm3 \n\t" - "pshufw $177, %%mm1, %%mm5 \n\t" - "pand %%mm7, %%mm0 \n\t" - "pand %%mm6, %%mm3 \n\t" - "pand %%mm7, %%mm1 \n\t" - "pand %%mm6, %%mm5 \n\t" - "por %%mm3, %%mm0 \n\t" - "por %%mm5, %%mm1 \n\t" -# else - "movq %%mm0, %%mm2 \n\t" - "movq %%mm1, %%mm4 \n\t" - "pand %%mm7, %%mm0 \n\t" - "pand %%mm6, %%mm2 \n\t" - "pand %%mm7, %%mm1 \n\t" - "pand %%mm6, %%mm4 \n\t" - "movq %%mm2, %%mm3 \n\t" - "movq %%mm4, %%mm5 \n\t" - "pslld $16, %%mm2 \n\t" - "psrld $16, %%mm3 \n\t" - "pslld $16, %%mm4 \n\t" - "psrld $16, %%mm5 \n\t" - "por %%mm2, %%mm0 \n\t" - "por %%mm4, %%mm1 \n\t" - "por %%mm3, %%mm0 \n\t" - "por %%mm5, %%mm1 \n\t" -# endif - MOVNTQ" %%mm0, (%2, %0) \n\t" - MOVNTQ" %%mm1, 8(%2, %0) \n\t" - "add $16, %0 \n\t" - "js 1b \n\t" - SFENCE" \n\t" - EMMS" \n\t" - "2: \n\t" - : "+&r"(idx) - : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one) - : "memory"); - for (; idx<15; idx+=4) { - register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00; - v &= 0xff00ff; - *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16); - } -} - -static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size) -{ - unsigned i; - x86_reg mmx_size= 23 - src_size; - __asm__ volatile ( - "test %%"REG_a", %%"REG_a" \n\t" - "jns 2f \n\t" - "movq "MANGLE(mask24r)", %%mm5 \n\t" - "movq "MANGLE(mask24g)", %%mm6 \n\t" - "movq "MANGLE(mask24b)", %%mm7 \n\t" - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 32(%1, %%"REG_a") \n\t" - "movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG - "movq (%1, %%"REG_a"), %%mm1 \n\t" // BGR BGR BG - "movq 2(%1, %%"REG_a"), %%mm2 \n\t" // R BGR BGR B - "psllq $16, %%mm0 \n\t" // 00 BGR BGR - "pand %%mm5, %%mm0 \n\t" - "pand %%mm6, %%mm1 \n\t" - "pand %%mm7, %%mm2 \n\t" - "por %%mm0, %%mm1 \n\t" - "por %%mm2, %%mm1 \n\t" - "movq 6(%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG - MOVNTQ" %%mm1, (%2, %%"REG_a") \n\t" // RGB RGB RG - "movq 8(%1, %%"REG_a"), %%mm1 \n\t" // R BGR BGR B - "movq 10(%1, %%"REG_a"), %%mm2 \n\t" // GR BGR BGR - "pand %%mm7, %%mm0 \n\t" - "pand %%mm5, %%mm1 \n\t" - "pand %%mm6, %%mm2 \n\t" - "por %%mm0, %%mm1 \n\t" - "por %%mm2, %%mm1 \n\t" - "movq 14(%1, %%"REG_a"), %%mm0 \n\t" // R BGR BGR B - MOVNTQ" %%mm1, 8(%2, %%"REG_a") \n\t" // B RGB RGB R - "movq 16(%1, %%"REG_a"), %%mm1 \n\t" // GR BGR BGR - "movq 18(%1, %%"REG_a"), %%mm2 \n\t" // BGR BGR BG - "pand %%mm6, %%mm0 \n\t" - "pand %%mm7, %%mm1 \n\t" - "pand %%mm5, %%mm2 \n\t" - "por %%mm0, %%mm1 \n\t" - "por %%mm2, %%mm1 \n\t" - MOVNTQ" %%mm1, 16(%2, %%"REG_a") \n\t" - "add $24, %%"REG_a" \n\t" - " js 1b \n\t" - "2: \n\t" - : "+a" (mmx_size) - : "r" (src-mmx_size), "r"(dst-mmx_size) - ); - - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); - - if (mmx_size==23) return; //finished, was multiple of 8 - - src+= src_size; - dst+= src_size; - src_size= 23-mmx_size; - src-= src_size; - dst-= src_size; - for (i=0; i<src_size; i+=3) { - register uint8_t x; - x = src[i + 2]; - dst[i + 1] = src[i + 1]; - dst[i + 2] = src[i + 0]; - dst[i + 0] = x; - } -} - -static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - int width, int height, - int lumStride, int chromStride, int dstStride, int vertLumPerChroma) -{ - int y; - const x86_reg chromWidth= width>>1; - for (y=0; y<height; y++) { - //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) - __asm__ volatile( - "xor %%"REG_a", %%"REG_a" \n\t" - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 32(%1, %%"REG_a", 2) \n\t" - PREFETCH" 32(%2, %%"REG_a") \n\t" - PREFETCH" 32(%3, %%"REG_a") \n\t" - "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) - "movq %%mm0, %%mm2 \n\t" // U(0) - "movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0) - "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) - "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8) - - "movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0) - "movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8) - "movq %%mm3, %%mm4 \n\t" // Y(0) - "movq %%mm5, %%mm6 \n\t" // Y(8) - "punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0) - "punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4) - "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8) - "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12) - - MOVNTQ" %%mm3, (%0, %%"REG_a", 4) \n\t" - MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t" - MOVNTQ" %%mm5, 16(%0, %%"REG_a", 4) \n\t" - MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t" - - "add $8, %%"REG_a" \n\t" - "cmp %4, %%"REG_a" \n\t" - " jb 1b \n\t" - ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) - : "%"REG_a - ); - if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { - usrc += chromStride; - vsrc += chromStride; - } - ysrc += lumStride; - dst += dstStride; - } - __asm__(EMMS" \n\t" - SFENCE" \n\t" - :::"memory"); -} - -/** - * Height should be a multiple of 2 and width should be a multiple of 16. - * (If this is a problem for anyone then tell me, and I will fix it.) - */ -static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - int width, int height, - int lumStride, int chromStride, int dstStride) -{ - //FIXME interpolate chroma - RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2); -} - -static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - int width, int height, - int lumStride, int chromStride, int dstStride, int vertLumPerChroma) -{ - int y; - const x86_reg chromWidth= width>>1; - for (y=0; y<height; y++) { - //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) - __asm__ volatile( - "xor %%"REG_a", %%"REG_a" \n\t" - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 32(%1, %%"REG_a", 2) \n\t" - PREFETCH" 32(%2, %%"REG_a") \n\t" - PREFETCH" 32(%3, %%"REG_a") \n\t" - "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) - "movq %%mm0, %%mm2 \n\t" // U(0) - "movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0) - "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) - "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8) - - "movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0) - "movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8) - "movq %%mm0, %%mm4 \n\t" // Y(0) - "movq %%mm2, %%mm6 \n\t" // Y(8) - "punpcklbw %%mm3, %%mm0 \n\t" // YUYV YUYV(0) - "punpckhbw %%mm3, %%mm4 \n\t" // YUYV YUYV(4) - "punpcklbw %%mm5, %%mm2 \n\t" // YUYV YUYV(8) - "punpckhbw %%mm5, %%mm6 \n\t" // YUYV YUYV(12) - - MOVNTQ" %%mm0, (%0, %%"REG_a", 4) \n\t" - MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t" - MOVNTQ" %%mm2, 16(%0, %%"REG_a", 4) \n\t" - MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t" - - "add $8, %%"REG_a" \n\t" - "cmp %4, %%"REG_a" \n\t" - " jb 1b \n\t" - ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) - : "%"REG_a - ); - if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { - usrc += chromStride; - vsrc += chromStride; - } - ysrc += lumStride; - dst += dstStride; - } - __asm__(EMMS" \n\t" - SFENCE" \n\t" - :::"memory"); -} - -/** - * Height should be a multiple of 2 and width should be a multiple of 16 - * (If this is a problem for anyone then tell me, and I will fix it.) - */ -static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - int width, int height, - int lumStride, int chromStride, int dstStride) -{ - //FIXME interpolate chroma - RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2); -} - -/** - * Width should be a multiple of 16. - */ -static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - int width, int height, - int lumStride, int chromStride, int dstStride) -{ - RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1); -} - -/** - * Width should be a multiple of 16. - */ -static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - int width, int height, - int lumStride, int chromStride, int dstStride) -{ - RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1); -} - -/** - * Height should be a multiple of 2 and width should be a multiple of 16. - * (If this is a problem for anyone then tell me, and I will fix it.) - */ -static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - int width, int height, - int lumStride, int chromStride, int srcStride) -{ - int y; - const x86_reg chromWidth= width>>1; - for (y=0; y<height; y+=2) { - __asm__ volatile( - "xor %%"REG_a", %%"REG_a" \n\t" - "pcmpeqw %%mm7, %%mm7 \n\t" - "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 64(%0, %%"REG_a", 4) \n\t" - "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) - "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) - "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0) - "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4) - "psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0) - "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(4) - "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(0) - "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(4) - "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) - "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) - - MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t" - - "movq 16(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(8) - "movq 24(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(12) - "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8) - "movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12) - "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8) - "psrlw $8, %%mm2 \n\t" // U0V0 U0V0(12) - "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(8) - "pand %%mm7, %%mm4 \n\t" // Y0Y0 Y0Y0(12) - "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8) - "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8) - - MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t" - - "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0) - "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8) - "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0) - "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8) - "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0) - "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8) - "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) - "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) - - MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t" - MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t" - - "add $8, %%"REG_a" \n\t" - "cmp %4, %%"REG_a" \n\t" - " jb 1b \n\t" - ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) - : "memory", "%"REG_a - ); - - ydst += lumStride; - src += srcStride; - - __asm__ volatile( - "xor %%"REG_a", %%"REG_a" \n\t" - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 64(%0, %%"REG_a", 4) \n\t" - "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) - "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) - "movq 16(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8) - "movq 24(%0, %%"REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12) - "pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0) - "pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4) - "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8) - "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(12) - "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0) - "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8) - - MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t" - MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t" - - "add $8, %%"REG_a" \n\t" - "cmp %4, %%"REG_a" \n\t" - " jb 1b \n\t" - - ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) - : "memory", "%"REG_a - ); - udst += chromStride; - vdst += chromStride; - ydst += lumStride; - src += srcStride; - } - __asm__ volatile(EMMS" \n\t" - SFENCE" \n\t" - :::"memory"); -} -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ - -#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW -static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride) -{ - int x,y; - - dst[0]= src[0]; - - // first line - for (x=0; x<srcWidth-1; x++) { - dst[2*x+1]= (3*src[x] + src[x+1])>>2; - dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; - } - dst[2*srcWidth-1]= src[srcWidth-1]; - - dst+= dstStride; - - for (y=1; y<srcHeight; y++) { - const x86_reg mmxSize= srcWidth&~15; - __asm__ volatile( - "mov %4, %%"REG_a" \n\t" - "movq "MANGLE(mmx_ff)", %%mm0 \n\t" - "movq (%0, %%"REG_a"), %%mm4 \n\t" - "movq %%mm4, %%mm2 \n\t" - "psllq $8, %%mm4 \n\t" - "pand %%mm0, %%mm2 \n\t" - "por %%mm2, %%mm4 \n\t" - "movq (%1, %%"REG_a"), %%mm5 \n\t" - "movq %%mm5, %%mm3 \n\t" - "psllq $8, %%mm5 \n\t" - "pand %%mm0, %%mm3 \n\t" - "por %%mm3, %%mm5 \n\t" - "1: \n\t" - "movq (%0, %%"REG_a"), %%mm0 \n\t" - "movq (%1, %%"REG_a"), %%mm1 \n\t" - "movq 1(%0, %%"REG_a"), %%mm2 \n\t" - "movq 1(%1, %%"REG_a"), %%mm3 \n\t" - PAVGB" %%mm0, %%mm5 \n\t" - PAVGB" %%mm0, %%mm3 \n\t" - PAVGB" %%mm0, %%mm5 \n\t" - PAVGB" %%mm0, %%mm3 \n\t" - PAVGB" %%mm1, %%mm4 \n\t" - PAVGB" %%mm1, %%mm2 \n\t" - PAVGB" %%mm1, %%mm4 \n\t" - PAVGB" %%mm1, %%mm2 \n\t" - "movq %%mm5, %%mm7 \n\t" - "movq %%mm4, %%mm6 \n\t" - "punpcklbw %%mm3, %%mm5 \n\t" - "punpckhbw %%mm3, %%mm7 \n\t" - "punpcklbw %%mm2, %%mm4 \n\t" - "punpckhbw %%mm2, %%mm6 \n\t" - MOVNTQ" %%mm5, (%2, %%"REG_a", 2) \n\t" - MOVNTQ" %%mm7, 8(%2, %%"REG_a", 2) \n\t" - MOVNTQ" %%mm4, (%3, %%"REG_a", 2) \n\t" - MOVNTQ" %%mm6, 8(%3, %%"REG_a", 2) \n\t" - "add $8, %%"REG_a" \n\t" - "movq -1(%0, %%"REG_a"), %%mm4 \n\t" - "movq -1(%1, %%"REG_a"), %%mm5 \n\t" - " js 1b \n\t" - :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ), - "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2), - "g" (-mmxSize) - : "%"REG_a - ); - - for (x=mmxSize-1; x<srcWidth-1; x++) { - dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2; - dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2; - dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2; - dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2; - } - dst[srcWidth*2 -1 ]= (3*src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2; - dst[srcWidth*2 -1 + dstStride]= ( src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2; - - dst+=dstStride*2; - src+=srcStride; - } - - // last line - dst[0]= src[0]; - - for (x=0; x<srcWidth-1; x++) { - dst[2*x+1]= (3*src[x] + src[x+1])>>2; - dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; - } - dst[2*srcWidth-1]= src[srcWidth-1]; - - __asm__ volatile(EMMS" \n\t" - SFENCE" \n\t" - :::"memory"); -} -#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */ - -#if !COMPILE_TEMPLATE_AMD3DNOW -/** - * Height should be a multiple of 2 and width should be a multiple of 16. - * (If this is a problem for anyone then tell me, and I will fix it.) - * Chrominance data is only taken from every second line, others are ignored. - * FIXME: Write HQ version. - */ -static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - int width, int height, - int lumStride, int chromStride, int srcStride) -{ - int y; - const x86_reg chromWidth= width>>1; - for (y=0; y<height; y+=2) { - __asm__ volatile( - "xor %%"REG_a", %%"REG_a" \n\t" - "pcmpeqw %%mm7, %%mm7 \n\t" - "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 64(%0, %%"REG_a", 4) \n\t" - "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // UYVY UYVY(0) - "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // UYVY UYVY(4) - "movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0) - "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4) - "pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0) - "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(4) - "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(0) - "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(4) - "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) - "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) - - MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t" - - "movq 16(%0, %%"REG_a", 4), %%mm1 \n\t" // UYVY UYVY(8) - "movq 24(%0, %%"REG_a", 4), %%mm2 \n\t" // UYVY UYVY(12) - "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8) - "movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12) - "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8) - "pand %%mm7, %%mm2 \n\t" // U0V0 U0V0(12) - "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(8) - "psrlw $8, %%mm4 \n\t" // Y0Y0 Y0Y0(12) - "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8) - "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8) - - MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t" - - "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0) - "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8) - "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0) - "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8) - "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0) - "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8) - "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) - "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) - - MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t" - MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t" - - "add $8, %%"REG_a" \n\t" - "cmp %4, %%"REG_a" \n\t" - " jb 1b \n\t" - ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) - : "memory", "%"REG_a - ); - - ydst += lumStride; - src += srcStride; - - __asm__ volatile( - "xor %%"REG_a", %%"REG_a" \n\t" - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 64(%0, %%"REG_a", 4) \n\t" - "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) - "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) - "movq 16(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8) - "movq 24(%0, %%"REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12) - "psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0) - "psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4) - "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8) - "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(12) - "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0) - "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8) - - MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t" - MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t" - - "add $8, %%"REG_a" \n\t" - "cmp %4, %%"REG_a" \n\t" - " jb 1b \n\t" - - ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) - : "memory", "%"REG_a - ); - udst += chromStride; - vdst += chromStride; - ydst += lumStride; - src += srcStride; - } - __asm__ volatile(EMMS" \n\t" - SFENCE" \n\t" - :::"memory"); -} -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ - -/** - * Height should be a multiple of 2 and width should be a multiple of 2. - * (If this is a problem for anyone then tell me, and I will fix it.) - * Chrominance data is only taken from every second line, - * others are ignored in the C version. - * FIXME: Write HQ version. - */ -#if HAVE_7REGS -static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - int width, int height, - int lumStride, int chromStride, int srcStride, - int32_t *rgb2yuv) -{ -#define BGR2Y_IDX "16*4+16*32" -#define BGR2U_IDX "16*4+16*33" -#define BGR2V_IDX "16*4+16*34" - int y; - const x86_reg chromWidth= width>>1; - for (y=0; y<height-2; y+=2) { - int i; - for (i=0; i<2; i++) { - __asm__ volatile( - "mov %2, %%"REG_a" \n\t" - "movq "BGR2Y_IDX"(%3), %%mm6 \n\t" - "movq "MANGLE(ff_w1111)", %%mm5 \n\t" - "pxor %%mm7, %%mm7 \n\t" - "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 64(%0, %%"REG_d") \n\t" - "movd (%0, %%"REG_d"), %%mm0 \n\t" - "movd 3(%0, %%"REG_d"), %%mm1 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "movd 6(%0, %%"REG_d"), %%mm2 \n\t" - "movd 9(%0, %%"REG_d"), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "pmaddwd %%mm6, %%mm0 \n\t" - "pmaddwd %%mm6, %%mm1 \n\t" - "pmaddwd %%mm6, %%mm2 \n\t" - "pmaddwd %%mm6, %%mm3 \n\t" - "psrad $8, %%mm0 \n\t" - "psrad $8, %%mm1 \n\t" - "psrad $8, %%mm2 \n\t" - "psrad $8, %%mm3 \n\t" - "packssdw %%mm1, %%mm0 \n\t" - "packssdw %%mm3, %%mm2 \n\t" - "pmaddwd %%mm5, %%mm0 \n\t" - "pmaddwd %%mm5, %%mm2 \n\t" - "packssdw %%mm2, %%mm0 \n\t" - "psraw $7, %%mm0 \n\t" - - "movd 12(%0, %%"REG_d"), %%mm4 \n\t" - "movd 15(%0, %%"REG_d"), %%mm1 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "movd 18(%0, %%"REG_d"), %%mm2 \n\t" - "movd 21(%0, %%"REG_d"), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "pmaddwd %%mm6, %%mm4 \n\t" - "pmaddwd %%mm6, %%mm1 \n\t" - "pmaddwd %%mm6, %%mm2 \n\t" - "pmaddwd %%mm6, %%mm3 \n\t" - "psrad $8, %%mm4 \n\t" - "psrad $8, %%mm1 \n\t" - "psrad $8, %%mm2 \n\t" - "psrad $8, %%mm3 \n\t" - "packssdw %%mm1, %%mm4 \n\t" - "packssdw %%mm3, %%mm2 \n\t" - "pmaddwd %%mm5, %%mm4 \n\t" - "pmaddwd %%mm5, %%mm2 \n\t" - "add $24, %%"REG_d" \n\t" - "packssdw %%mm2, %%mm4 \n\t" - "psraw $7, %%mm4 \n\t" - - "packuswb %%mm4, %%mm0 \n\t" - "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t" - - MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t" - "add $8, %%"REG_a" \n\t" - " js 1b \n\t" - : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width), "r"(rgb2yuv) - : "%"REG_a, "%"REG_d - ); - ydst += lumStride; - src += srcStride; - } - src -= srcStride*2; - __asm__ volatile( - "mov %4, %%"REG_a" \n\t" - "movq "MANGLE(ff_w1111)", %%mm5 \n\t" - "movq "BGR2U_IDX"(%5), %%mm6 \n\t" - "pxor %%mm7, %%mm7 \n\t" - "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" - "add %%"REG_d", %%"REG_d" \n\t" - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 64(%0, %%"REG_d") \n\t" - PREFETCH" 64(%1, %%"REG_d") \n\t" -#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW - "movq (%0, %%"REG_d"), %%mm0 \n\t" - "movq (%1, %%"REG_d"), %%mm1 \n\t" - "movq 6(%0, %%"REG_d"), %%mm2 \n\t" - "movq 6(%1, %%"REG_d"), %%mm3 \n\t" - PAVGB" %%mm1, %%mm0 \n\t" - PAVGB" %%mm3, %%mm2 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "psrlq $24, %%mm0 \n\t" - "psrlq $24, %%mm2 \n\t" - PAVGB" %%mm1, %%mm0 \n\t" - PAVGB" %%mm3, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" -#else - "movd (%0, %%"REG_d"), %%mm0 \n\t" - "movd (%1, %%"REG_d"), %%mm1 \n\t" - "movd 3(%0, %%"REG_d"), %%mm2 \n\t" - "movd 3(%1, %%"REG_d"), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "paddw %%mm1, %%mm0 \n\t" - "paddw %%mm3, %%mm2 \n\t" - "paddw %%mm2, %%mm0 \n\t" - "movd 6(%0, %%"REG_d"), %%mm4 \n\t" - "movd 6(%1, %%"REG_d"), %%mm1 \n\t" - "movd 9(%0, %%"REG_d"), %%mm2 \n\t" - "movd 9(%1, %%"REG_d"), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "paddw %%mm1, %%mm4 \n\t" - "paddw %%mm3, %%mm2 \n\t" - "paddw %%mm4, %%mm2 \n\t" - "psrlw $2, %%mm0 \n\t" - "psrlw $2, %%mm2 \n\t" -#endif - "movq "BGR2V_IDX"(%5), %%mm1 \n\t" - "movq "BGR2V_IDX"(%5), %%mm3 \n\t" - - "pmaddwd %%mm0, %%mm1 \n\t" - "pmaddwd %%mm2, %%mm3 \n\t" - "pmaddwd %%mm6, %%mm0 \n\t" - "pmaddwd %%mm6, %%mm2 \n\t" - "psrad $8, %%mm0 \n\t" - "psrad $8, %%mm1 \n\t" - "psrad $8, %%mm2 \n\t" - "psrad $8, %%mm3 \n\t" - "packssdw %%mm2, %%mm0 \n\t" - "packssdw %%mm3, %%mm1 \n\t" - "pmaddwd %%mm5, %%mm0 \n\t" - "pmaddwd %%mm5, %%mm1 \n\t" - "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0 - "psraw $7, %%mm0 \n\t" - -#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW - "movq 12(%0, %%"REG_d"), %%mm4 \n\t" - "movq 12(%1, %%"REG_d"), %%mm1 \n\t" - "movq 18(%0, %%"REG_d"), %%mm2 \n\t" - "movq 18(%1, %%"REG_d"), %%mm3 \n\t" - PAVGB" %%mm1, %%mm4 \n\t" - PAVGB" %%mm3, %%mm2 \n\t" - "movq %%mm4, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "psrlq $24, %%mm4 \n\t" - "psrlq $24, %%mm2 \n\t" - PAVGB" %%mm1, %%mm4 \n\t" - PAVGB" %%mm3, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" -#else - "movd 12(%0, %%"REG_d"), %%mm4 \n\t" - "movd 12(%1, %%"REG_d"), %%mm1 \n\t" - "movd 15(%0, %%"REG_d"), %%mm2 \n\t" - "movd 15(%1, %%"REG_d"), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "paddw %%mm1, %%mm4 \n\t" - "paddw %%mm3, %%mm2 \n\t" - "paddw %%mm2, %%mm4 \n\t" - "movd 18(%0, %%"REG_d"), %%mm5 \n\t" - "movd 18(%1, %%"REG_d"), %%mm1 \n\t" - "movd 21(%0, %%"REG_d"), %%mm2 \n\t" - "movd 21(%1, %%"REG_d"), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm5 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "paddw %%mm1, %%mm5 \n\t" - "paddw %%mm3, %%mm2 \n\t" - "paddw %%mm5, %%mm2 \n\t" - "movq "MANGLE(ff_w1111)", %%mm5 \n\t" - "psrlw $2, %%mm4 \n\t" - "psrlw $2, %%mm2 \n\t" -#endif - "movq "BGR2V_IDX"(%5), %%mm1 \n\t" - "movq "BGR2V_IDX"(%5), %%mm3 \n\t" - - "pmaddwd %%mm4, %%mm1 \n\t" - "pmaddwd %%mm2, %%mm3 \n\t" - "pmaddwd %%mm6, %%mm4 \n\t" - "pmaddwd %%mm6, %%mm2 \n\t" - "psrad $8, %%mm4 \n\t" - "psrad $8, %%mm1 \n\t" - "psrad $8, %%mm2 \n\t" - "psrad $8, %%mm3 \n\t" - "packssdw %%mm2, %%mm4 \n\t" - "packssdw %%mm3, %%mm1 \n\t" - "pmaddwd %%mm5, %%mm4 \n\t" - "pmaddwd %%mm5, %%mm1 \n\t" - "add $24, %%"REG_d" \n\t" - "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2 - "psraw $7, %%mm4 \n\t" - - "movq %%mm0, %%mm1 \n\t" - "punpckldq %%mm4, %%mm0 \n\t" - "punpckhdq %%mm4, %%mm1 \n\t" - "packsswb %%mm1, %%mm0 \n\t" - "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t" - "movd %%mm0, (%2, %%"REG_a") \n\t" - "punpckhdq %%mm0, %%mm0 \n\t" - "movd %%mm0, (%3, %%"REG_a") \n\t" - "add $4, %%"REG_a" \n\t" - " js 1b \n\t" - : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth), "r"(rgb2yuv) - : "%"REG_a, "%"REG_d - ); - - udst += chromStride; - vdst += chromStride; - src += srcStride*2; - } - - __asm__ volatile(EMMS" \n\t" - SFENCE" \n\t" - :::"memory"); - - ff_rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride, rgb2yuv); -} -#endif /* HAVE_7REGS */ -#endif /* !COMPILE_TEMPLATE_SSE2 */ - -#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX -static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest, - int width, int height, int src1Stride, - int src2Stride, int dstStride) -{ - int h; - - for (h=0; h < height; h++) { - int w; - -#if COMPILE_TEMPLATE_SSE2 - __asm__( - "xor %%"REG_a", %%"REG_a" \n\t" - "1: \n\t" - PREFETCH" 64(%1, %%"REG_a") \n\t" - PREFETCH" 64(%2, %%"REG_a") \n\t" - "movdqa (%1, %%"REG_a"), %%xmm0 \n\t" - "movdqa (%1, %%"REG_a"), %%xmm1 \n\t" - "movdqa (%2, %%"REG_a"), %%xmm2 \n\t" - "punpcklbw %%xmm2, %%xmm0 \n\t" - "punpckhbw %%xmm2, %%xmm1 \n\t" - "movntdq %%xmm0, (%0, %%"REG_a", 2) \n\t" - "movntdq %%xmm1, 16(%0, %%"REG_a", 2) \n\t" - "add $16, %%"REG_a" \n\t" - "cmp %3, %%"REG_a" \n\t" - " jb 1b \n\t" - ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) - : "memory", "%"REG_a"" - ); -#else - __asm__( - "xor %%"REG_a", %%"REG_a" \n\t" - "1: \n\t" - PREFETCH" 64(%1, %%"REG_a") \n\t" - PREFETCH" 64(%2, %%"REG_a") \n\t" - "movq (%1, %%"REG_a"), %%mm0 \n\t" - "movq 8(%1, %%"REG_a"), %%mm2 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "movq (%2, %%"REG_a"), %%mm4 \n\t" - "movq 8(%2, %%"REG_a"), %%mm5 \n\t" - "punpcklbw %%mm4, %%mm0 \n\t" - "punpckhbw %%mm4, %%mm1 \n\t" - "punpcklbw %%mm5, %%mm2 \n\t" - "punpckhbw %%mm5, %%mm3 \n\t" - MOVNTQ" %%mm0, (%0, %%"REG_a", 2) \n\t" - MOVNTQ" %%mm1, 8(%0, %%"REG_a", 2) \n\t" - MOVNTQ" %%mm2, 16(%0, %%"REG_a", 2) \n\t" - MOVNTQ" %%mm3, 24(%0, %%"REG_a", 2) \n\t" - "add $16, %%"REG_a" \n\t" - "cmp %3, %%"REG_a" \n\t" - " jb 1b \n\t" - ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) - : "memory", "%"REG_a - ); -#endif - for (w= (width&(~15)); w < width; w++) { - dest[2*w+0] = src1[w]; - dest[2*w+1] = src2[w]; - } - dest += dstStride; - src1 += src1Stride; - src2 += src2Stride; - } - __asm__( - EMMS" \n\t" - SFENCE" \n\t" - ::: "memory" - ); -} -#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX*/ - -#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL -#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_YASM -void RENAME(ff_nv12ToUV)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *unused0, - const uint8_t *src1, - const uint8_t *src2, - int w, uint32_t *unused); -static void RENAME(deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, - int width, int height, int srcStride, - int dst1Stride, int dst2Stride) -{ - int h; - - for (h=0; h < height; h++) { - RENAME(ff_nv12ToUV)(dst1, dst2, NULL, src, NULL, width, NULL); - src += srcStride; - dst1 += dst1Stride; - dst2 += dst2Stride; - } - __asm__( - EMMS" \n\t" - SFENCE" \n\t" - ::: "memory" - ); -} -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ -#endif /* !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL */ - -#if !COMPILE_TEMPLATE_SSE2 -#if !COMPILE_TEMPLATE_AMD3DNOW -static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, - uint8_t *dst1, uint8_t *dst2, - int width, int height, - int srcStride1, int srcStride2, - int dstStride1, int dstStride2) -{ - x86_reg x, y; - int w,h; - w=width/2; h=height/2; - __asm__ volatile( - PREFETCH" %0 \n\t" - PREFETCH" %1 \n\t" - ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory"); - for (y=0;y<h;y++) { - const uint8_t* s1=src1+srcStride1*(y>>1); - uint8_t* d=dst1+dstStride1*y; - x=0; - for (;x<w-31;x+=32) { - __asm__ volatile( - PREFETCH" 32(%1,%2) \n\t" - "movq (%1,%2), %%mm0 \n\t" - "movq 8(%1,%2), %%mm2 \n\t" - "movq 16(%1,%2), %%mm4 \n\t" - "movq 24(%1,%2), %%mm6 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "movq %%mm4, %%mm5 \n\t" - "movq %%mm6, %%mm7 \n\t" - "punpcklbw %%mm0, %%mm0 \n\t" - "punpckhbw %%mm1, %%mm1 \n\t" - "punpcklbw %%mm2, %%mm2 \n\t" - "punpckhbw %%mm3, %%mm3 \n\t" - "punpcklbw %%mm4, %%mm4 \n\t" - "punpckhbw %%mm5, %%mm5 \n\t" - "punpcklbw %%mm6, %%mm6 \n\t" - "punpckhbw %%mm7, %%mm7 \n\t" - MOVNTQ" %%mm0, (%0,%2,2) \n\t" - MOVNTQ" %%mm1, 8(%0,%2,2) \n\t" - MOVNTQ" %%mm2, 16(%0,%2,2) \n\t" - MOVNTQ" %%mm3, 24(%0,%2,2) \n\t" - MOVNTQ" %%mm4, 32(%0,%2,2) \n\t" - MOVNTQ" %%mm5, 40(%0,%2,2) \n\t" - MOVNTQ" %%mm6, 48(%0,%2,2) \n\t" - MOVNTQ" %%mm7, 56(%0,%2,2)" - :: "r"(d), "r"(s1), "r"(x) - :"memory"); - } - for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x]; - } - for (y=0;y<h;y++) { - const uint8_t* s2=src2+srcStride2*(y>>1); - uint8_t* d=dst2+dstStride2*y; - x=0; - for (;x<w-31;x+=32) { - __asm__ volatile( - PREFETCH" 32(%1,%2) \n\t" - "movq (%1,%2), %%mm0 \n\t" - "movq 8(%1,%2), %%mm2 \n\t" - "movq 16(%1,%2), %%mm4 \n\t" - "movq 24(%1,%2), %%mm6 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "movq %%mm4, %%mm5 \n\t" - "movq %%mm6, %%mm7 \n\t" - "punpcklbw %%mm0, %%mm0 \n\t" - "punpckhbw %%mm1, %%mm1 \n\t" - "punpcklbw %%mm2, %%mm2 \n\t" - "punpckhbw %%mm3, %%mm3 \n\t" - "punpcklbw %%mm4, %%mm4 \n\t" - "punpckhbw %%mm5, %%mm5 \n\t" - "punpcklbw %%mm6, %%mm6 \n\t" - "punpckhbw %%mm7, %%mm7 \n\t" - MOVNTQ" %%mm0, (%0,%2,2) \n\t" - MOVNTQ" %%mm1, 8(%0,%2,2) \n\t" - MOVNTQ" %%mm2, 16(%0,%2,2) \n\t" - MOVNTQ" %%mm3, 24(%0,%2,2) \n\t" - MOVNTQ" %%mm4, 32(%0,%2,2) \n\t" - MOVNTQ" %%mm5, 40(%0,%2,2) \n\t" - MOVNTQ" %%mm6, 48(%0,%2,2) \n\t" - MOVNTQ" %%mm7, 56(%0,%2,2)" - :: "r"(d), "r"(s2), "r"(x) - :"memory"); - } - for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x]; - } - __asm__( - EMMS" \n\t" - SFENCE" \n\t" - ::: "memory" - ); -} - -static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, - uint8_t *dst, - int width, int height, - int srcStride1, int srcStride2, - int srcStride3, int dstStride) -{ - x86_reg x; - int y,w,h; - w=width/2; h=height; - for (y=0;y<h;y++) { - const uint8_t* yp=src1+srcStride1*y; - const uint8_t* up=src2+srcStride2*(y>>2); - const uint8_t* vp=src3+srcStride3*(y>>2); - uint8_t* d=dst+dstStride*y; - x=0; - for (;x<w-7;x+=8) { - __asm__ volatile( - PREFETCH" 32(%1, %0) \n\t" - PREFETCH" 32(%2, %0) \n\t" - PREFETCH" 32(%3, %0) \n\t" - "movq (%1, %0, 4), %%mm0 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ - "movq (%2, %0), %%mm1 \n\t" /* U0U1U2U3U4U5U6U7 */ - "movq (%3, %0), %%mm2 \n\t" /* V0V1V2V3V4V5V6V7 */ - "movq %%mm0, %%mm3 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ - "movq %%mm1, %%mm4 \n\t" /* U0U1U2U3U4U5U6U7 */ - "movq %%mm2, %%mm5 \n\t" /* V0V1V2V3V4V5V6V7 */ - "punpcklbw %%mm1, %%mm1 \n\t" /* U0U0 U1U1 U2U2 U3U3 */ - "punpcklbw %%mm2, %%mm2 \n\t" /* V0V0 V1V1 V2V2 V3V3 */ - "punpckhbw %%mm4, %%mm4 \n\t" /* U4U4 U5U5 U6U6 U7U7 */ - "punpckhbw %%mm5, %%mm5 \n\t" /* V4V4 V5V5 V6V6 V7V7 */ - - "movq %%mm1, %%mm6 \n\t" - "punpcklbw %%mm2, %%mm1 \n\t" /* U0V0 U0V0 U1V1 U1V1*/ - "punpcklbw %%mm1, %%mm0 \n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/ - "punpckhbw %%mm1, %%mm3 \n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/ - MOVNTQ" %%mm0, (%4, %0, 8) \n\t" - MOVNTQ" %%mm3, 8(%4, %0, 8) \n\t" - - "punpckhbw %%mm2, %%mm6 \n\t" /* U2V2 U2V2 U3V3 U3V3*/ - "movq 8(%1, %0, 4), %%mm0 \n\t" - "movq %%mm0, %%mm3 \n\t" - "punpcklbw %%mm6, %%mm0 \n\t" /* Y U2 Y V2 Y U2 Y V2*/ - "punpckhbw %%mm6, %%mm3 \n\t" /* Y U3 Y V3 Y U3 Y V3*/ - MOVNTQ" %%mm0, 16(%4, %0, 8) \n\t" - MOVNTQ" %%mm3, 24(%4, %0, 8) \n\t" - - "movq %%mm4, %%mm6 \n\t" - "movq 16(%1, %0, 4), %%mm0 \n\t" - "movq %%mm0, %%mm3 \n\t" - "punpcklbw %%mm5, %%mm4 \n\t" - "punpcklbw %%mm4, %%mm0 \n\t" /* Y U4 Y V4 Y U4 Y V4*/ - "punpckhbw %%mm4, %%mm3 \n\t" /* Y U5 Y V5 Y U5 Y V5*/ - MOVNTQ" %%mm0, 32(%4, %0, 8) \n\t" - MOVNTQ" %%mm3, 40(%4, %0, 8) \n\t" - - "punpckhbw %%mm5, %%mm6 \n\t" - "movq 24(%1, %0, 4), %%mm0 \n\t" - "movq %%mm0, %%mm3 \n\t" - "punpcklbw %%mm6, %%mm0 \n\t" /* Y U6 Y V6 Y U6 Y V6*/ - "punpckhbw %%mm6, %%mm3 \n\t" /* Y U7 Y V7 Y U7 Y V7*/ - MOVNTQ" %%mm0, 48(%4, %0, 8) \n\t" - MOVNTQ" %%mm3, 56(%4, %0, 8) \n\t" - - : "+r" (x) - : "r"(yp), "r" (up), "r"(vp), "r"(d) - :"memory"); - } - for (; x<w; x++) { - const int x2 = x<<2; - d[8*x+0] = yp[x2]; - d[8*x+1] = up[x]; - d[8*x+2] = yp[x2+1]; - d[8*x+3] = vp[x]; - d[8*x+4] = yp[x2+2]; - d[8*x+5] = up[x]; - d[8*x+6] = yp[x2+3]; - d[8*x+7] = vp[x]; - } - } - __asm__( - EMMS" \n\t" - SFENCE" \n\t" - ::: "memory" - ); -} -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ - -static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count) -{ - dst += count; - src += 2*count; - count= - count; - - if(count <= -16) { - count += 15; - __asm__ volatile( - "pcmpeqw %%mm7, %%mm7 \n\t" - "psrlw $8, %%mm7 \n\t" - "1: \n\t" - "movq -30(%1, %0, 2), %%mm0 \n\t" - "movq -22(%1, %0, 2), %%mm1 \n\t" - "movq -14(%1, %0, 2), %%mm2 \n\t" - "movq -6(%1, %0, 2), %%mm3 \n\t" - "pand %%mm7, %%mm0 \n\t" - "pand %%mm7, %%mm1 \n\t" - "pand %%mm7, %%mm2 \n\t" - "pand %%mm7, %%mm3 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "packuswb %%mm3, %%mm2 \n\t" - MOVNTQ" %%mm0,-15(%2, %0) \n\t" - MOVNTQ" %%mm2,- 7(%2, %0) \n\t" - "add $16, %0 \n\t" - " js 1b \n\t" - : "+r"(count) - : "r"(src), "r"(dst) - ); - count -= 15; - } - while(count<0) { - dst[count]= src[2*count]; - count++; - } -} - -#if !COMPILE_TEMPLATE_AMD3DNOW -static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) -{ - dst0+= count; - dst1+= count; - src += 4*count; - count= - count; - if(count <= -8) { - count += 7; - __asm__ volatile( - "pcmpeqw %%mm7, %%mm7 \n\t" - "psrlw $8, %%mm7 \n\t" - "1: \n\t" - "movq -28(%1, %0, 4), %%mm0 \n\t" - "movq -20(%1, %0, 4), %%mm1 \n\t" - "movq -12(%1, %0, 4), %%mm2 \n\t" - "movq -4(%1, %0, 4), %%mm3 \n\t" - "pand %%mm7, %%mm0 \n\t" - "pand %%mm7, %%mm1 \n\t" - "pand %%mm7, %%mm2 \n\t" - "pand %%mm7, %%mm3 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "packuswb %%mm3, %%mm2 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "psrlw $8, %%mm0 \n\t" - "psrlw $8, %%mm2 \n\t" - "pand %%mm7, %%mm1 \n\t" - "pand %%mm7, %%mm3 \n\t" - "packuswb %%mm2, %%mm0 \n\t" - "packuswb %%mm3, %%mm1 \n\t" - MOVNTQ" %%mm0,- 7(%3, %0) \n\t" - MOVNTQ" %%mm1,- 7(%2, %0) \n\t" - "add $8, %0 \n\t" - " js 1b \n\t" - : "+r"(count) - : "r"(src), "r"(dst0), "r"(dst1) - ); - count -= 7; - } - while(count<0) { - dst0[count]= src[4*count+0]; - dst1[count]= src[4*count+2]; - count++; - } -} -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ - -static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) -{ - dst0 += count; - dst1 += count; - src0 += 4*count; - src1 += 4*count; - count= - count; -#ifdef PAVGB - if(count <= -8) { - count += 7; - __asm__ volatile( - "pcmpeqw %%mm7, %%mm7 \n\t" - "psrlw $8, %%mm7 \n\t" - "1: \n\t" - "movq -28(%1, %0, 4), %%mm0 \n\t" - "movq -20(%1, %0, 4), %%mm1 \n\t" - "movq -12(%1, %0, 4), %%mm2 \n\t" - "movq -4(%1, %0, 4), %%mm3 \n\t" - PAVGB" -28(%2, %0, 4), %%mm0 \n\t" - PAVGB" -20(%2, %0, 4), %%mm1 \n\t" - PAVGB" -12(%2, %0, 4), %%mm2 \n\t" - PAVGB" - 4(%2, %0, 4), %%mm3 \n\t" - "pand %%mm7, %%mm0 \n\t" - "pand %%mm7, %%mm1 \n\t" - "pand %%mm7, %%mm2 \n\t" - "pand %%mm7, %%mm3 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "packuswb %%mm3, %%mm2 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "psrlw $8, %%mm0 \n\t" - "psrlw $8, %%mm2 \n\t" - "pand %%mm7, %%mm1 \n\t" - "pand %%mm7, %%mm3 \n\t" - "packuswb %%mm2, %%mm0 \n\t" - "packuswb %%mm3, %%mm1 \n\t" - MOVNTQ" %%mm0,- 7(%4, %0) \n\t" - MOVNTQ" %%mm1,- 7(%3, %0) \n\t" - "add $8, %0 \n\t" - " js 1b \n\t" - : "+r"(count) - : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1) - ); - count -= 7; - } -#endif - while(count<0) { - dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; - dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; - count++; - } -} - -#if !COMPILE_TEMPLATE_AMD3DNOW -static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) -{ - dst0+= count; - dst1+= count; - src += 4*count; - count= - count; - if(count <= -8) { - count += 7; - __asm__ volatile( - "pcmpeqw %%mm7, %%mm7 \n\t" - "psrlw $8, %%mm7 \n\t" - "1: \n\t" - "movq -28(%1, %0, 4), %%mm0 \n\t" - "movq -20(%1, %0, 4), %%mm1 \n\t" - "movq -12(%1, %0, 4), %%mm2 \n\t" - "movq -4(%1, %0, 4), %%mm3 \n\t" - "psrlw $8, %%mm0 \n\t" - "psrlw $8, %%mm1 \n\t" - "psrlw $8, %%mm2 \n\t" - "psrlw $8, %%mm3 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "packuswb %%mm3, %%mm2 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "psrlw $8, %%mm0 \n\t" - "psrlw $8, %%mm2 \n\t" - "pand %%mm7, %%mm1 \n\t" - "pand %%mm7, %%mm3 \n\t" - "packuswb %%mm2, %%mm0 \n\t" - "packuswb %%mm3, %%mm1 \n\t" - MOVNTQ" %%mm0,- 7(%3, %0) \n\t" - MOVNTQ" %%mm1,- 7(%2, %0) \n\t" - "add $8, %0 \n\t" - " js 1b \n\t" - : "+r"(count) - : "r"(src), "r"(dst0), "r"(dst1) - ); - count -= 7; - } - src++; - while(count<0) { - dst0[count]= src[4*count+0]; - dst1[count]= src[4*count+2]; - count++; - } -} -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ - -static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) -{ - dst0 += count; - dst1 += count; - src0 += 4*count; - src1 += 4*count; - count= - count; -#ifdef PAVGB - if(count <= -8) { - count += 7; - __asm__ volatile( - "pcmpeqw %%mm7, %%mm7 \n\t" - "psrlw $8, %%mm7 \n\t" - "1: \n\t" - "movq -28(%1, %0, 4), %%mm0 \n\t" - "movq -20(%1, %0, 4), %%mm1 \n\t" - "movq -12(%1, %0, 4), %%mm2 \n\t" - "movq -4(%1, %0, 4), %%mm3 \n\t" - PAVGB" -28(%2, %0, 4), %%mm0 \n\t" - PAVGB" -20(%2, %0, 4), %%mm1 \n\t" - PAVGB" -12(%2, %0, 4), %%mm2 \n\t" - PAVGB" - 4(%2, %0, 4), %%mm3 \n\t" - "psrlw $8, %%mm0 \n\t" - "psrlw $8, %%mm1 \n\t" - "psrlw $8, %%mm2 \n\t" - "psrlw $8, %%mm3 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "packuswb %%mm3, %%mm2 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "psrlw $8, %%mm0 \n\t" - "psrlw $8, %%mm2 \n\t" - "pand %%mm7, %%mm1 \n\t" - "pand %%mm7, %%mm3 \n\t" - "packuswb %%mm2, %%mm0 \n\t" - "packuswb %%mm3, %%mm1 \n\t" - MOVNTQ" %%mm0,- 7(%4, %0) \n\t" - MOVNTQ" %%mm1,- 7(%3, %0) \n\t" - "add $8, %0 \n\t" - " js 1b \n\t" - : "+r"(count) - : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1) - ); - count -= 7; - } -#endif - src0++; - src1++; - while(count<0) { - dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; - dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; - count++; - } -} - -static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, - int width, int height, - int lumStride, int chromStride, int srcStride) -{ - int y; - const int chromWidth = FF_CEIL_RSHIFT(width, 1); - - for (y=0; y<height; y++) { - RENAME(extract_even)(src, ydst, width); - if(y&1) { - RENAME(extract_odd2avg)(src-srcStride, src, udst, vdst, chromWidth); - udst+= chromStride; - vdst+= chromStride; - } - - src += srcStride; - ydst+= lumStride; - } - __asm__( - EMMS" \n\t" - SFENCE" \n\t" - ::: "memory" - ); -} - -#if !COMPILE_TEMPLATE_AMD3DNOW -static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, - int width, int height, - int lumStride, int chromStride, int srcStride) -{ - int y; - const int chromWidth = FF_CEIL_RSHIFT(width, 1); - - for (y=0; y<height; y++) { - RENAME(extract_even)(src, ydst, width); - RENAME(extract_odd2)(src, udst, vdst, chromWidth); - - src += srcStride; - ydst+= lumStride; - udst+= chromStride; - vdst+= chromStride; - } - __asm__( - EMMS" \n\t" - SFENCE" \n\t" - ::: "memory" - ); -} -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ - -static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, - int width, int height, - int lumStride, int chromStride, int srcStride) -{ - int y; - const int chromWidth = FF_CEIL_RSHIFT(width, 1); - - for (y=0; y<height; y++) { - RENAME(extract_even)(src+1, ydst, width); - if(y&1) { - RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth); - udst+= chromStride; - vdst+= chromStride; - } - - src += srcStride; - ydst+= lumStride; - } - __asm__( - EMMS" \n\t" - SFENCE" \n\t" - ::: "memory" - ); -} - -#if !COMPILE_TEMPLATE_AMD3DNOW -static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, - int width, int height, - int lumStride, int chromStride, int srcStride) -{ - int y; - const int chromWidth = FF_CEIL_RSHIFT(width, 1); - - for (y=0; y<height; y++) { - RENAME(extract_even)(src+1, ydst, width); - RENAME(extract_even2)(src, udst, vdst, chromWidth); - - src += srcStride; - ydst+= lumStride; - udst+= chromStride; - vdst+= chromStride; - } - __asm__( - EMMS" \n\t" - SFENCE" \n\t" - ::: "memory" - ); -} -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ -#endif /* !COMPILE_TEMPLATE_SSE2 */ - -static av_cold void RENAME(rgb2rgb_init)(void) -{ -#if !COMPILE_TEMPLATE_SSE2 -#if !COMPILE_TEMPLATE_AMD3DNOW - rgb15to16 = RENAME(rgb15to16); - rgb15tobgr24 = RENAME(rgb15tobgr24); - rgb15to32 = RENAME(rgb15to32); - rgb16tobgr24 = RENAME(rgb16tobgr24); - rgb16to32 = RENAME(rgb16to32); - rgb16to15 = RENAME(rgb16to15); - rgb24tobgr16 = RENAME(rgb24tobgr16); - rgb24tobgr15 = RENAME(rgb24tobgr15); - rgb24tobgr32 = RENAME(rgb24tobgr32); - rgb32to16 = RENAME(rgb32to16); - rgb32to15 = RENAME(rgb32to15); - rgb32tobgr24 = RENAME(rgb32tobgr24); - rgb24to15 = RENAME(rgb24to15); - rgb24to16 = RENAME(rgb24to16); - rgb24tobgr24 = RENAME(rgb24tobgr24); - shuffle_bytes_2103 = RENAME(shuffle_bytes_2103); - rgb32tobgr16 = RENAME(rgb32tobgr16); - rgb32tobgr15 = RENAME(rgb32tobgr15); - yv12toyuy2 = RENAME(yv12toyuy2); - yv12touyvy = RENAME(yv12touyvy); - yuv422ptoyuy2 = RENAME(yuv422ptoyuy2); - yuv422ptouyvy = RENAME(yuv422ptouyvy); - yuy2toyv12 = RENAME(yuy2toyv12); - vu9_to_vu12 = RENAME(vu9_to_vu12); - yvu9_to_yuy2 = RENAME(yvu9_to_yuy2); - uyvytoyuv422 = RENAME(uyvytoyuv422); - yuyvtoyuv422 = RENAME(yuyvtoyuv422); -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ - -#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW - planar2x = RENAME(planar2x); -#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */ -#if HAVE_7REGS - ff_rgb24toyv12 = RENAME(rgb24toyv12); -#endif /* HAVE_7REGS */ - - yuyvtoyuv420 = RENAME(yuyvtoyuv420); - uyvytoyuv420 = RENAME(uyvytoyuv420); -#endif /* !COMPILE_TEMPLATE_SSE2 */ - -#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX - interleaveBytes = RENAME(interleaveBytes); -#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX*/ -#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL -#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_YASM - deinterleaveBytes = RENAME(deinterleaveBytes); -#endif -#endif -} diff --git a/ffmpeg/libswscale/x86/scale.asm b/ffmpeg/libswscale/x86/scale.asm deleted file mode 100644 index 940f357..0000000 --- a/ffmpeg/libswscale/x86/scale.asm +++ /dev/null @@ -1,431 +0,0 @@ -;****************************************************************************** -;* x86-optimized horizontal line scaling functions -;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com> -;* -;* This file is part of FFmpeg. -;* -;* FFmpeg is free software; you can redistribute it and/or -;* modify it under the terms of the GNU Lesser General Public -;* License as published by the Free Software Foundation; either -;* version 2.1 of the License, or (at your option) any later version. -;* -;* FFmpeg is distributed in the hope that it will be useful, -;* but WITHOUT ANY WARRANTY; without even the implied warranty of -;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -;* Lesser General Public License for more details. -;* -;* You should have received a copy of the GNU Lesser General Public -;* License along with FFmpeg; if not, write to the Free Software -;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -;****************************************************************************** - -%include "libavutil/x86/x86util.asm" - -SECTION_RODATA - -max_19bit_int: times 4 dd 0x7ffff -max_19bit_flt: times 4 dd 524287.0 -minshort: times 8 dw 0x8000 -unicoeff: times 4 dd 0x20000000 - -SECTION .text - -;----------------------------------------------------------------------------- -; horizontal line scaling -; -; void hscale<source_width>to<intermediate_nbits>_<filterSize>_<opt> -; (SwsContext *c, int{16,32}_t *dst, -; int dstW, const uint{8,16}_t *src, -; const int16_t *filter, -; const int32_t *filterPos, int filterSize); -; -; Scale one horizontal line. Input is either 8-bits width or 16-bits width -; ($source_width can be either 8, 9, 10 or 16, difference is whether we have to -; downscale before multiplying). Filter is 14-bits. Output is either 15bits -; (in int16_t) or 19bits (in int32_t), as given in $intermediate_nbits. Each -; output pixel is generated from $filterSize input pixels, the position of -; the first pixel is given in filterPos[nOutputPixel]. -;----------------------------------------------------------------------------- - -; SCALE_FUNC source_width, intermediate_nbits, filtersize, filtersuffix, n_args, n_xmm -%macro SCALE_FUNC 6 -%ifnidn %3, X -cglobal hscale%1to%2_%4, %5, 7, %6, pos0, dst, w, src, filter, fltpos, pos1 -%else -cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsize -%endif -%if ARCH_X86_64 - movsxd wq, wd -%define mov32 movsxd -%else ; x86-32 -%define mov32 mov -%endif ; x86-64 -%if %2 == 19 -%if mmsize == 8 ; mmx - mova m2, [max_19bit_int] -%elif cpuflag(sse4) - mova m2, [max_19bit_int] -%else ; ssse3/sse2 - mova m2, [max_19bit_flt] -%endif ; mmx/sse2/ssse3/sse4 -%endif ; %2 == 19 -%if %1 == 16 - mova m6, [minshort] - mova m7, [unicoeff] -%elif %1 == 8 - pxor m3, m3 -%endif ; %1 == 8/16 - -%if %1 == 8 -%define movlh movd -%define movbh movh -%define srcmul 1 -%else ; %1 == 9-16 -%define movlh movq -%define movbh movu -%define srcmul 2 -%endif ; %1 == 8/9-16 - -%ifnidn %3, X - - ; setup loop -%if %3 == 8 - shl wq, 1 ; this allows *16 (i.e. now *8) in lea instructions for the 8-tap filter -%define wshr 1 -%else ; %3 == 4 -%define wshr 0 -%endif ; %3 == 8 - lea filterq, [filterq+wq*8] -%if %2 == 15 - lea dstq, [dstq+wq*(2>>wshr)] -%else ; %2 == 19 - lea dstq, [dstq+wq*(4>>wshr)] -%endif ; %2 == 15/19 - lea fltposq, [fltposq+wq*(4>>wshr)] - neg wq - -.loop: -%if %3 == 4 ; filterSize == 4 scaling - ; load 2x4 or 4x4 source pixels into m0/m1 - mov32 pos0q, dword [fltposq+wq*4+ 0] ; filterPos[0] - mov32 pos1q, dword [fltposq+wq*4+ 4] ; filterPos[1] - movlh m0, [srcq+pos0q*srcmul] ; src[filterPos[0] + {0,1,2,3}] -%if mmsize == 8 - movlh m1, [srcq+pos1q*srcmul] ; src[filterPos[1] + {0,1,2,3}] -%else ; mmsize == 16 -%if %1 > 8 - movhps m0, [srcq+pos1q*srcmul] ; src[filterPos[1] + {0,1,2,3}] -%else ; %1 == 8 - movd m4, [srcq+pos1q*srcmul] ; src[filterPos[1] + {0,1,2,3}] -%endif - mov32 pos0q, dword [fltposq+wq*4+ 8] ; filterPos[2] - mov32 pos1q, dword [fltposq+wq*4+12] ; filterPos[3] - movlh m1, [srcq+pos0q*srcmul] ; src[filterPos[2] + {0,1,2,3}] -%if %1 > 8 - movhps m1, [srcq+pos1q*srcmul] ; src[filterPos[3] + {0,1,2,3}] -%else ; %1 == 8 - movd m5, [srcq+pos1q*srcmul] ; src[filterPos[3] + {0,1,2,3}] - punpckldq m0, m4 - punpckldq m1, m5 -%endif ; %1 == 8 -%endif ; mmsize == 8/16 -%if %1 == 8 - punpcklbw m0, m3 ; byte -> word - punpcklbw m1, m3 ; byte -> word -%endif ; %1 == 8 - - ; multiply with filter coefficients -%if %1 == 16 ; pmaddwd needs signed adds, so this moves unsigned -> signed, we'll - ; add back 0x8000 * sum(coeffs) after the horizontal add - psubw m0, m6 - psubw m1, m6 -%endif ; %1 == 16 - pmaddwd m0, [filterq+wq*8+mmsize*0] ; *= filter[{0,1,..,6,7}] - pmaddwd m1, [filterq+wq*8+mmsize*1] ; *= filter[{8,9,..,14,15}] - - ; add up horizontally (4 srcpix * 4 coefficients -> 1 dstpix) -%if mmsize == 8 ; mmx - movq m4, m0 - punpckldq m0, m1 - punpckhdq m4, m1 - paddd m0, m4 -%elif notcpuflag(ssse3) ; sse2 - mova m4, m0 - shufps m0, m1, 10001000b - shufps m4, m1, 11011101b - paddd m0, m4 -%else ; ssse3/sse4 - phaddd m0, m1 ; filter[{ 0, 1, 2, 3}]*src[filterPos[0]+{0,1,2,3}], - ; filter[{ 4, 5, 6, 7}]*src[filterPos[1]+{0,1,2,3}], - ; filter[{ 8, 9,10,11}]*src[filterPos[2]+{0,1,2,3}], - ; filter[{12,13,14,15}]*src[filterPos[3]+{0,1,2,3}] -%endif ; mmx/sse2/ssse3/sse4 -%else ; %3 == 8, i.e. filterSize == 8 scaling - ; load 2x8 or 4x8 source pixels into m0, m1, m4 and m5 - mov32 pos0q, dword [fltposq+wq*2+0] ; filterPos[0] - mov32 pos1q, dword [fltposq+wq*2+4] ; filterPos[1] - movbh m0, [srcq+ pos0q *srcmul] ; src[filterPos[0] + {0,1,2,3,4,5,6,7}] -%if mmsize == 8 - movbh m1, [srcq+(pos0q+4)*srcmul] ; src[filterPos[0] + {4,5,6,7}] - movbh m4, [srcq+ pos1q *srcmul] ; src[filterPos[1] + {0,1,2,3}] - movbh m5, [srcq+(pos1q+4)*srcmul] ; src[filterPos[1] + {4,5,6,7}] -%else ; mmsize == 16 - movbh m1, [srcq+ pos1q *srcmul] ; src[filterPos[1] + {0,1,2,3,4,5,6,7}] - mov32 pos0q, dword [fltposq+wq*2+8] ; filterPos[2] - mov32 pos1q, dword [fltposq+wq*2+12] ; filterPos[3] - movbh m4, [srcq+ pos0q *srcmul] ; src[filterPos[2] + {0,1,2,3,4,5,6,7}] - movbh m5, [srcq+ pos1q *srcmul] ; src[filterPos[3] + {0,1,2,3,4,5,6,7}] -%endif ; mmsize == 8/16 -%if %1 == 8 - punpcklbw m0, m3 ; byte -> word - punpcklbw m1, m3 ; byte -> word - punpcklbw m4, m3 ; byte -> word - punpcklbw m5, m3 ; byte -> word -%endif ; %1 == 8 - - ; multiply -%if %1 == 16 ; pmaddwd needs signed adds, so this moves unsigned -> signed, we'll - ; add back 0x8000 * sum(coeffs) after the horizontal add - psubw m0, m6 - psubw m1, m6 - psubw m4, m6 - psubw m5, m6 -%endif ; %1 == 16 - pmaddwd m0, [filterq+wq*8+mmsize*0] ; *= filter[{0,1,..,6,7}] - pmaddwd m1, [filterq+wq*8+mmsize*1] ; *= filter[{8,9,..,14,15}] - pmaddwd m4, [filterq+wq*8+mmsize*2] ; *= filter[{16,17,..,22,23}] - pmaddwd m5, [filterq+wq*8+mmsize*3] ; *= filter[{24,25,..,30,31}] - - ; add up horizontally (8 srcpix * 8 coefficients -> 1 dstpix) -%if mmsize == 8 - paddd m0, m1 - paddd m4, m5 - movq m1, m0 - punpckldq m0, m4 - punpckhdq m1, m4 - paddd m0, m1 -%elif notcpuflag(ssse3) ; sse2 -%if %1 == 8 -%define mex m6 -%else -%define mex m3 -%endif - ; emulate horizontal add as transpose + vertical add - mova mex, m0 - punpckldq m0, m1 - punpckhdq mex, m1 - paddd m0, mex - mova m1, m4 - punpckldq m4, m5 - punpckhdq m1, m5 - paddd m4, m1 - mova m1, m0 - punpcklqdq m0, m4 - punpckhqdq m1, m4 - paddd m0, m1 -%else ; ssse3/sse4 - ; FIXME if we rearrange the filter in pairs of 4, we can - ; load pixels likewise and use 2 x paddd + phaddd instead - ; of 3 x phaddd here, faster on older cpus - phaddd m0, m1 - phaddd m4, m5 - phaddd m0, m4 ; filter[{ 0, 1,..., 6, 7}]*src[filterPos[0]+{0,1,...,6,7}], - ; filter[{ 8, 9,...,14,15}]*src[filterPos[1]+{0,1,...,6,7}], - ; filter[{16,17,...,22,23}]*src[filterPos[2]+{0,1,...,6,7}], - ; filter[{24,25,...,30,31}]*src[filterPos[3]+{0,1,...,6,7}] -%endif ; mmx/sse2/ssse3/sse4 -%endif ; %3 == 4/8 - -%else ; %3 == X, i.e. any filterSize scaling - -%ifidn %4, X4 -%define dlt 4 -%else ; %4 == X || %4 == X8 -%define dlt 0 -%endif ; %4 ==/!= X4 -%if ARCH_X86_64 -%define srcq r8 -%define pos1q r7 -%define srcendq r9 - movsxd fltsizeq, fltsized ; filterSize - lea srcendq, [srcmemq+(fltsizeq-dlt)*srcmul] ; &src[filterSize&~4] -%else ; x86-32 -%define srcq srcmemq -%define pos1q dstq -%define srcendq r6m - lea pos0q, [srcmemq+(fltsizeq-dlt)*srcmul] ; &src[filterSize&~4] - mov srcendq, pos0q -%endif ; x86-32/64 - lea fltposq, [fltposq+wq*4] -%if %2 == 15 - lea dstq, [dstq+wq*2] -%else ; %2 == 19 - lea dstq, [dstq+wq*4] -%endif ; %2 == 15/19 - movifnidn dstmp, dstq - neg wq - -.loop: - mov32 pos0q, dword [fltposq+wq*4+0] ; filterPos[0] - mov32 pos1q, dword [fltposq+wq*4+4] ; filterPos[1] - ; FIXME maybe do 4px/iteration on x86-64 (x86-32 wouldn't have enough regs)? - pxor m4, m4 - pxor m5, m5 - mov srcq, srcmemmp - -.innerloop: - ; load 2x4 (mmx) or 2x8 (sse) source pixels into m0/m1 -> m4/m5 - movbh m0, [srcq+ pos0q *srcmul] ; src[filterPos[0] + {0,1,2,3(,4,5,6,7)}] - movbh m1, [srcq+(pos1q+dlt)*srcmul] ; src[filterPos[1] + {0,1,2,3(,4,5,6,7)}] -%if %1 == 8 - punpcklbw m0, m3 - punpcklbw m1, m3 -%endif ; %1 == 8 - - ; multiply -%if %1 == 16 ; pmaddwd needs signed adds, so this moves unsigned -> signed, we'll - ; add back 0x8000 * sum(coeffs) after the horizontal add - psubw m0, m6 - psubw m1, m6 -%endif ; %1 == 16 - pmaddwd m0, [filterq] ; filter[{0,1,2,3(,4,5,6,7)}] - pmaddwd m1, [filterq+(fltsizeq+dlt)*2]; filter[filtersize+{0,1,2,3(,4,5,6,7)}] - paddd m4, m0 - paddd m5, m1 - add filterq, mmsize - add srcq, srcmul*mmsize/2 - cmp srcq, srcendq ; while (src += 4) < &src[filterSize] - jl .innerloop - -%ifidn %4, X4 - mov32 pos1q, dword [fltposq+wq*4+4] ; filterPos[1] - movlh m0, [srcq+ pos0q *srcmul] ; split last 4 srcpx of dstpx[0] - sub pos1q, fltsizeq ; and first 4 srcpx of dstpx[1] -%if %1 > 8 - movhps m0, [srcq+(pos1q+dlt)*srcmul] -%else ; %1 == 8 - movd m1, [srcq+(pos1q+dlt)*srcmul] - punpckldq m0, m1 -%endif ; %1 == 8 -%if %1 == 8 - punpcklbw m0, m3 -%endif ; %1 == 8 -%if %1 == 16 ; pmaddwd needs signed adds, so this moves unsigned -> signed, we'll - ; add back 0x8000 * sum(coeffs) after the horizontal add - psubw m0, m6 -%endif ; %1 == 16 - pmaddwd m0, [filterq] -%endif ; %4 == X4 - - lea filterq, [filterq+(fltsizeq+dlt)*2] - -%if mmsize == 8 ; mmx - movq m0, m4 - punpckldq m4, m5 - punpckhdq m0, m5 - paddd m0, m4 -%else ; mmsize == 16 -%if notcpuflag(ssse3) ; sse2 - mova m1, m4 - punpcklqdq m4, m5 - punpckhqdq m1, m5 - paddd m4, m1 -%else ; ssse3/sse4 - phaddd m4, m5 -%endif ; sse2/ssse3/sse4 -%ifidn %4, X4 - paddd m4, m0 -%endif ; %3 == X4 -%if notcpuflag(ssse3) ; sse2 - pshufd m4, m4, 11011000b - movhlps m0, m4 - paddd m0, m4 -%else ; ssse3/sse4 - phaddd m4, m4 - SWAP 0, 4 -%endif ; sse2/ssse3/sse4 -%endif ; mmsize == 8/16 -%endif ; %3 ==/!= X - -%if %1 == 16 ; add 0x8000 * sum(coeffs), i.e. back from signed -> unsigned - paddd m0, m7 -%endif ; %1 == 16 - - ; clip, store - psrad m0, 14 + %1 - %2 -%ifidn %3, X - movifnidn dstq, dstmp -%endif ; %3 == X -%if %2 == 15 - packssdw m0, m0 -%ifnidn %3, X - movh [dstq+wq*(2>>wshr)], m0 -%else ; %3 == X - movd [dstq+wq*2], m0 -%endif ; %3 ==/!= X -%else ; %2 == 19 -%if mmsize == 8 - PMINSD_MMX m0, m2, m4 -%elif cpuflag(sse4) - pminsd m0, m2 -%else ; sse2/ssse3 - cvtdq2ps m0, m0 - minps m0, m2 - cvtps2dq m0, m0 -%endif ; mmx/sse2/ssse3/sse4 -%ifnidn %3, X - mova [dstq+wq*(4>>wshr)], m0 -%else ; %3 == X - movq [dstq+wq*4], m0 -%endif ; %3 ==/!= X -%endif ; %2 == 15/19 -%ifnidn %3, X - add wq, (mmsize<<wshr)/4 ; both 8tap and 4tap really only do 4 pixels (or for mmx: 2 pixels) - ; per iteration. see "shl wq,1" above as for why we do this -%else ; %3 == X - add wq, 2 -%endif ; %3 ==/!= X - jl .loop - REP_RET -%endmacro - -; SCALE_FUNCS source_width, intermediate_nbits, n_xmm -%macro SCALE_FUNCS 3 -SCALE_FUNC %1, %2, 4, 4, 6, %3 -SCALE_FUNC %1, %2, 8, 8, 6, %3 -%if mmsize == 8 -SCALE_FUNC %1, %2, X, X, 7, %3 -%else -SCALE_FUNC %1, %2, X, X4, 7, %3 -SCALE_FUNC %1, %2, X, X8, 7, %3 -%endif -%endmacro - -; SCALE_FUNCS2 8_xmm_args, 9to10_xmm_args, 16_xmm_args -%macro SCALE_FUNCS2 3 -%if notcpuflag(sse4) -SCALE_FUNCS 8, 15, %1 -SCALE_FUNCS 9, 15, %2 -SCALE_FUNCS 10, 15, %2 -SCALE_FUNCS 12, 15, %2 -SCALE_FUNCS 14, 15, %2 -SCALE_FUNCS 16, 15, %3 -%endif ; !sse4 -SCALE_FUNCS 8, 19, %1 -SCALE_FUNCS 9, 19, %2 -SCALE_FUNCS 10, 19, %2 -SCALE_FUNCS 12, 19, %2 -SCALE_FUNCS 14, 19, %2 -SCALE_FUNCS 16, 19, %3 -%endmacro - -%if ARCH_X86_32 -INIT_MMX mmx -SCALE_FUNCS2 0, 0, 0 -%endif -INIT_XMM sse2 -SCALE_FUNCS2 6, 7, 8 -INIT_XMM ssse3 -SCALE_FUNCS2 6, 6, 8 -INIT_XMM sse4 -SCALE_FUNCS2 6, 6, 8 diff --git a/ffmpeg/libswscale/x86/swscale.c b/ffmpeg/libswscale/x86/swscale.c deleted file mode 100644 index 2f7e4f7..0000000 --- a/ffmpeg/libswscale/x86/swscale.c +++ /dev/null @@ -1,580 +0,0 @@ -/* - * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <inttypes.h> -#include "config.h" -#include "libswscale/swscale.h" -#include "libswscale/swscale_internal.h" -#include "libavutil/attributes.h" -#include "libavutil/avassert.h" -#include "libavutil/intreadwrite.h" -#include "libavutil/x86/asm.h" -#include "libavutil/x86/cpu.h" -#include "libavutil/cpu.h" -#include "libavutil/pixdesc.h" - -#if HAVE_INLINE_ASM - -#define DITHER1XBPP - -DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL; -DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL; -DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL; -DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL; - -const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { - 0x0103010301030103LL, - 0x0200020002000200LL,}; - -const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { - 0x0602060206020602LL, - 0x0004000400040004LL,}; - -DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL; -DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL; -DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL; -DECLARE_ASM_CONST(8, uint64_t, b15Mask)= 0x001F001F001F001FLL; -DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL; -DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL; - -DECLARE_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL; -DECLARE_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL; -DECLARE_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL; - -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL; - -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL; -DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; - - -//MMX versions -#if HAVE_MMX_INLINE -#undef RENAME -#define COMPILE_TEMPLATE_MMXEXT 0 -#define RENAME(a) a ## _mmx -#include "swscale_template.c" -#endif - -// MMXEXT versions -#if HAVE_MMXEXT_INLINE -#undef RENAME -#undef COMPILE_TEMPLATE_MMXEXT -#define COMPILE_TEMPLATE_MMXEXT 1 -#define RENAME(a) a ## _mmxext -#include "swscale_template.c" -#endif - -void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex, - int lastInLumBuf, int lastInChrBuf) -{ - const int dstH= c->dstH; - const int flags= c->flags; - int16_t **lumPixBuf= c->lumPixBuf; - int16_t **chrUPixBuf= c->chrUPixBuf; - int16_t **alpPixBuf= c->alpPixBuf; - const int vLumBufSize= c->vLumBufSize; - const int vChrBufSize= c->vChrBufSize; - int32_t *vLumFilterPos= c->vLumFilterPos; - int32_t *vChrFilterPos= c->vChrFilterPos; - int16_t *vLumFilter= c->vLumFilter; - int16_t *vChrFilter= c->vChrFilter; - int32_t *lumMmxFilter= c->lumMmxFilter; - int32_t *chrMmxFilter= c->chrMmxFilter; - int32_t av_unused *alpMmxFilter= c->alpMmxFilter; - const int vLumFilterSize= c->vLumFilterSize; - const int vChrFilterSize= c->vChrFilterSize; - const int chrDstY= dstY>>c->chrDstVSubSample; - const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input - const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input - - c->blueDither= ff_dither8[dstY&1]; - if (c->dstFormat == AV_PIX_FMT_RGB555 || c->dstFormat == AV_PIX_FMT_BGR555) - c->greenDither= ff_dither8[dstY&1]; - else - c->greenDither= ff_dither4[dstY&1]; - c->redDither= ff_dither8[(dstY+1)&1]; - if (dstY < dstH - 2) { - const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; - const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; - const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; - int i; - - if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) { - const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize; - int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize); - for (i = 0; i < neg; i++) - tmpY[i] = lumSrcPtr[neg]; - for ( ; i < end; i++) - tmpY[i] = lumSrcPtr[i]; - for ( ; i < vLumFilterSize; i++) - tmpY[i] = tmpY[i-1]; - lumSrcPtr = tmpY; - - if (alpSrcPtr) { - const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize; - for (i = 0; i < neg; i++) - tmpA[i] = alpSrcPtr[neg]; - for ( ; i < end; i++) - tmpA[i] = alpSrcPtr[i]; - for ( ; i < vLumFilterSize; i++) - tmpA[i] = tmpA[i - 1]; - alpSrcPtr = tmpA; - } - } - if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) { - const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize; - int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize); - for (i = 0; i < neg; i++) { - tmpU[i] = chrUSrcPtr[neg]; - } - for ( ; i < end; i++) { - tmpU[i] = chrUSrcPtr[i]; - } - for ( ; i < vChrFilterSize; i++) { - tmpU[i] = tmpU[i - 1]; - } - chrUSrcPtr = tmpU; - } - - if (flags & SWS_ACCURATE_RND) { - int s= APCK_SIZE / 8; - for (i=0; i<vLumFilterSize; i+=2) { - *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ]; - *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)]; - lumMmxFilter[s*i+APCK_COEF/4 ]= - lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ] - + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0); - if (CONFIG_SWSCALE_ALPHA && alpPixBuf) { - *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ]; - *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)]; - alpMmxFilter[s*i+APCK_COEF/4 ]= - alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ]; - } - } - for (i=0; i<vChrFilterSize; i+=2) { - *(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ]; - *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)]; - chrMmxFilter[s*i+APCK_COEF/4 ]= - chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ] - + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0); - } - } else { - for (i=0; i<vLumFilterSize; i++) { - *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i]; - lumMmxFilter[4*i+2]= - lumMmxFilter[4*i+3]= - ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U; - if (CONFIG_SWSCALE_ALPHA && alpPixBuf) { - *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i]; - alpMmxFilter[4*i+2]= - alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2]; - } - } - for (i=0; i<vChrFilterSize; i++) { - *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i]; - chrMmxFilter[4*i+2]= - chrMmxFilter[4*i+3]= - ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U; - } - } - } -} - -#if HAVE_MMXEXT -static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, - const int16_t **src, uint8_t *dest, int dstW, - const uint8_t *dither, int offset) -{ - if(((int)dest) & 15){ - return yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset); - } - if (offset) { - __asm__ volatile("movq (%0), %%xmm3\n\t" - "movdqa %%xmm3, %%xmm4\n\t" - "psrlq $24, %%xmm3\n\t" - "psllq $40, %%xmm4\n\t" - "por %%xmm4, %%xmm3\n\t" - :: "r"(dither) - ); - } else { - __asm__ volatile("movq (%0), %%xmm3\n\t" - :: "r"(dither) - ); - } - filterSize--; - __asm__ volatile( - "pxor %%xmm0, %%xmm0\n\t" - "punpcklbw %%xmm0, %%xmm3\n\t" - "movd %0, %%xmm1\n\t" - "punpcklwd %%xmm1, %%xmm1\n\t" - "punpckldq %%xmm1, %%xmm1\n\t" - "punpcklqdq %%xmm1, %%xmm1\n\t" - "psllw $3, %%xmm1\n\t" - "paddw %%xmm1, %%xmm3\n\t" - "psraw $4, %%xmm3\n\t" - ::"m"(filterSize) - ); - __asm__ volatile( - "movdqa %%xmm3, %%xmm4\n\t" - "movdqa %%xmm3, %%xmm7\n\t" - "movl %3, %%ecx\n\t" - "mov %0, %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - ".p2align 4 \n\t" /* FIXME Unroll? */\ - "1: \n\t"\ - "movddup 8(%%"REG_d"), %%xmm0 \n\t" /* filterCoeff */\ - "movdqa (%%"REG_S", %%"REG_c", 2), %%xmm2 \n\t" /* srcData */\ - "movdqa 16(%%"REG_S", %%"REG_c", 2), %%xmm5 \n\t" /* srcData */\ - "add $16, %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "test %%"REG_S", %%"REG_S" \n\t"\ - "pmulhw %%xmm0, %%xmm2 \n\t"\ - "pmulhw %%xmm0, %%xmm5 \n\t"\ - "paddw %%xmm2, %%xmm3 \n\t"\ - "paddw %%xmm5, %%xmm4 \n\t"\ - " jnz 1b \n\t"\ - "psraw $3, %%xmm3 \n\t"\ - "psraw $3, %%xmm4 \n\t"\ - "packuswb %%xmm4, %%xmm3 \n\t" - "movntdq %%xmm3, (%1, %%"REG_c")\n\t" - "add $16, %%"REG_c" \n\t"\ - "cmp %2, %%"REG_c" \n\t"\ - "movdqa %%xmm7, %%xmm3\n\t" - "movdqa %%xmm7, %%xmm4\n\t" - "mov %0, %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "jb 1b \n\t"\ - :: "g" (filter), - "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset) - : "%"REG_d, "%"REG_S, "%"REG_c - ); -} -#endif - -#endif /* HAVE_INLINE_ASM */ - -#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \ -void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \ - SwsContext *c, int16_t *data, \ - int dstW, const uint8_t *src, \ - const int16_t *filter, \ - const int32_t *filterPos, int filterSize) - -#define SCALE_FUNCS(filter_n, opt) \ - SCALE_FUNC(filter_n, 8, 15, opt); \ - SCALE_FUNC(filter_n, 9, 15, opt); \ - SCALE_FUNC(filter_n, 10, 15, opt); \ - SCALE_FUNC(filter_n, 12, 15, opt); \ - SCALE_FUNC(filter_n, 14, 15, opt); \ - SCALE_FUNC(filter_n, 16, 15, opt); \ - SCALE_FUNC(filter_n, 8, 19, opt); \ - SCALE_FUNC(filter_n, 9, 19, opt); \ - SCALE_FUNC(filter_n, 10, 19, opt); \ - SCALE_FUNC(filter_n, 12, 19, opt); \ - SCALE_FUNC(filter_n, 14, 19, opt); \ - SCALE_FUNC(filter_n, 16, 19, opt) - -#define SCALE_FUNCS_MMX(opt) \ - SCALE_FUNCS(4, opt); \ - SCALE_FUNCS(8, opt); \ - SCALE_FUNCS(X, opt) - -#define SCALE_FUNCS_SSE(opt) \ - SCALE_FUNCS(4, opt); \ - SCALE_FUNCS(8, opt); \ - SCALE_FUNCS(X4, opt); \ - SCALE_FUNCS(X8, opt) - -#if ARCH_X86_32 -SCALE_FUNCS_MMX(mmx); -#endif -SCALE_FUNCS_SSE(sse2); -SCALE_FUNCS_SSE(ssse3); -SCALE_FUNCS_SSE(sse4); - -#define VSCALEX_FUNC(size, opt) \ -void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \ - const int16_t **src, uint8_t *dest, int dstW, \ - const uint8_t *dither, int offset) -#define VSCALEX_FUNCS(opt) \ - VSCALEX_FUNC(8, opt); \ - VSCALEX_FUNC(9, opt); \ - VSCALEX_FUNC(10, opt) - -#if ARCH_X86_32 -VSCALEX_FUNCS(mmxext); -#endif -VSCALEX_FUNCS(sse2); -VSCALEX_FUNCS(sse4); -VSCALEX_FUNC(16, sse4); -VSCALEX_FUNCS(avx); - -#define VSCALE_FUNC(size, opt) \ -void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \ - const uint8_t *dither, int offset) -#define VSCALE_FUNCS(opt1, opt2) \ - VSCALE_FUNC(8, opt1); \ - VSCALE_FUNC(9, opt2); \ - VSCALE_FUNC(10, opt2); \ - VSCALE_FUNC(16, opt1) - -#if ARCH_X86_32 -VSCALE_FUNCS(mmx, mmxext); -#endif -VSCALE_FUNCS(sse2, sse2); -VSCALE_FUNC(16, sse4); -VSCALE_FUNCS(avx, avx); - -#define INPUT_Y_FUNC(fmt, opt) \ -void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \ - const uint8_t *unused1, const uint8_t *unused2, \ - int w, uint32_t *unused) -#define INPUT_UV_FUNC(fmt, opt) \ -void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \ - const uint8_t *unused0, \ - const uint8_t *src1, \ - const uint8_t *src2, \ - int w, uint32_t *unused) -#define INPUT_FUNC(fmt, opt) \ - INPUT_Y_FUNC(fmt, opt); \ - INPUT_UV_FUNC(fmt, opt) -#define INPUT_FUNCS(opt) \ - INPUT_FUNC(uyvy, opt); \ - INPUT_FUNC(yuyv, opt); \ - INPUT_UV_FUNC(nv12, opt); \ - INPUT_UV_FUNC(nv21, opt); \ - INPUT_FUNC(rgba, opt); \ - INPUT_FUNC(bgra, opt); \ - INPUT_FUNC(argb, opt); \ - INPUT_FUNC(abgr, opt); \ - INPUT_FUNC(rgb24, opt); \ - INPUT_FUNC(bgr24, opt) - -#if ARCH_X86_32 -INPUT_FUNCS(mmx); -#endif -INPUT_FUNCS(sse2); -INPUT_FUNCS(ssse3); -INPUT_FUNCS(avx); - -av_cold void ff_sws_init_swscale_x86(SwsContext *c) -{ - int cpu_flags = av_get_cpu_flags(); - -#if HAVE_MMX_INLINE - if (cpu_flags & AV_CPU_FLAG_MMX) - sws_init_swscale_mmx(c); -#endif -#if HAVE_MMXEXT_INLINE - if (cpu_flags & AV_CPU_FLAG_MMXEXT) - sws_init_swscale_mmxext(c); - if (cpu_flags & AV_CPU_FLAG_SSE3){ - if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) - c->yuv2planeX = yuv2yuvX_sse3; - } -#endif - -#define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \ - if (c->srcBpc == 8) { \ - hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \ - ff_hscale8to19_ ## filtersize ## _ ## opt1; \ - } else if (c->srcBpc == 9) { \ - hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \ - ff_hscale9to19_ ## filtersize ## _ ## opt1; \ - } else if (c->srcBpc == 10) { \ - hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \ - ff_hscale10to19_ ## filtersize ## _ ## opt1; \ - } else if (c->srcBpc == 12) { \ - hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \ - ff_hscale12to19_ ## filtersize ## _ ## opt1; \ - } else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth_minus1<15)) { \ - hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \ - ff_hscale14to19_ ## filtersize ## _ ## opt1; \ - } else { /* c->srcBpc == 16 */ \ - av_assert0(c->srcBpc == 16);\ - hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \ - ff_hscale16to19_ ## filtersize ## _ ## opt1; \ - } \ -} while (0) -#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ - switch (filtersize) { \ - case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \ - case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \ - default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \ - } -#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \ -switch(c->dstBpc){ \ - case 16: do_16_case; break; \ - case 10: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \ - case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \ - default: if (condition_8bit) /*vscalefn = ff_yuv2planeX_8_ ## opt;*/ break; \ - } -#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \ - switch(c->dstBpc){ \ - case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \ - case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \ - case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \ - case 8: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \ - default: av_assert0(c->dstBpc>8); \ - } -#define case_rgb(x, X, opt) \ - case AV_PIX_FMT_ ## X: \ - c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \ - if (!c->chrSrcHSubSample) \ - c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \ - break -#if ARCH_X86_32 - if (EXTERNAL_MMX(cpu_flags)) { - ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx); - ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx); - ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmxext, cpu_flags & AV_CPU_FLAG_MMXEXT); - - switch (c->srcFormat) { - case AV_PIX_FMT_Y400A: - c->lumToYV12 = ff_yuyvToY_mmx; - if (c->alpPixBuf) - c->alpToYV12 = ff_uyvyToY_mmx; - break; - case AV_PIX_FMT_YUYV422: - c->lumToYV12 = ff_yuyvToY_mmx; - c->chrToYV12 = ff_yuyvToUV_mmx; - break; - case AV_PIX_FMT_UYVY422: - c->lumToYV12 = ff_uyvyToY_mmx; - c->chrToYV12 = ff_uyvyToUV_mmx; - break; - case AV_PIX_FMT_NV12: - c->chrToYV12 = ff_nv12ToUV_mmx; - break; - case AV_PIX_FMT_NV21: - c->chrToYV12 = ff_nv21ToUV_mmx; - break; - case_rgb(rgb24, RGB24, mmx); - case_rgb(bgr24, BGR24, mmx); - case_rgb(bgra, BGRA, mmx); - case_rgb(rgba, RGBA, mmx); - case_rgb(abgr, ABGR, mmx); - case_rgb(argb, ARGB, mmx); - default: - break; - } - } - if (EXTERNAL_MMXEXT(cpu_flags)) { - ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmxext, , 1); - } -#endif /* ARCH_X86_32 */ -#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ - switch (filtersize) { \ - case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \ - case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \ - default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \ - else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \ - break; \ - } - if (EXTERNAL_SSE2(cpu_flags)) { - ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2); - ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2); - ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, , - HAVE_ALIGNED_STACK || ARCH_X86_64); - ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1); - - switch (c->srcFormat) { - case AV_PIX_FMT_Y400A: - c->lumToYV12 = ff_yuyvToY_sse2; - if (c->alpPixBuf) - c->alpToYV12 = ff_uyvyToY_sse2; - break; - case AV_PIX_FMT_YUYV422: - c->lumToYV12 = ff_yuyvToY_sse2; - c->chrToYV12 = ff_yuyvToUV_sse2; - break; - case AV_PIX_FMT_UYVY422: - c->lumToYV12 = ff_uyvyToY_sse2; - c->chrToYV12 = ff_uyvyToUV_sse2; - break; - case AV_PIX_FMT_NV12: - c->chrToYV12 = ff_nv12ToUV_sse2; - break; - case AV_PIX_FMT_NV21: - c->chrToYV12 = ff_nv21ToUV_sse2; - break; - case_rgb(rgb24, RGB24, sse2); - case_rgb(bgr24, BGR24, sse2); - case_rgb(bgra, BGRA, sse2); - case_rgb(rgba, RGBA, sse2); - case_rgb(abgr, ABGR, sse2); - case_rgb(argb, ARGB, sse2); - default: - break; - } - } - if (EXTERNAL_SSSE3(cpu_flags)) { - ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3); - ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3); - switch (c->srcFormat) { - case_rgb(rgb24, RGB24, ssse3); - case_rgb(bgr24, BGR24, ssse3); - default: - break; - } - } - if (EXTERNAL_SSE4(cpu_flags)) { - /* Xto15 don't need special sse4 functions */ - ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3); - ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3); - ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4, - if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4, - HAVE_ALIGNED_STACK || ARCH_X86_64); - if (c->dstBpc == 16 && !isBE(c->dstFormat)) - c->yuv2plane1 = ff_yuv2plane1_16_sse4; - } - - if (EXTERNAL_AVX(cpu_flags)) { - ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, , - HAVE_ALIGNED_STACK || ARCH_X86_64); - ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1); - - switch (c->srcFormat) { - case AV_PIX_FMT_YUYV422: - c->chrToYV12 = ff_yuyvToUV_avx; - break; - case AV_PIX_FMT_UYVY422: - c->chrToYV12 = ff_uyvyToUV_avx; - break; - case AV_PIX_FMT_NV12: - c->chrToYV12 = ff_nv12ToUV_avx; - break; - case AV_PIX_FMT_NV21: - c->chrToYV12 = ff_nv21ToUV_avx; - break; - case_rgb(rgb24, RGB24, avx); - case_rgb(bgr24, BGR24, avx); - case_rgb(bgra, BGRA, avx); - case_rgb(rgba, RGBA, avx); - case_rgb(abgr, ABGR, avx); - case_rgb(argb, ARGB, avx); - default: - break; - } - } -} diff --git a/ffmpeg/libswscale/x86/swscale_template.c b/ffmpeg/libswscale/x86/swscale_template.c deleted file mode 100644 index c7a1bb4..0000000 --- a/ffmpeg/libswscale/x86/swscale_template.c +++ /dev/null @@ -1,1717 +0,0 @@ -/* - * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#undef REAL_MOVNTQ -#undef MOVNTQ -#undef MOVNTQ2 -#undef PREFETCH - -#if COMPILE_TEMPLATE_MMXEXT -#define PREFETCH "prefetchnta" -#else -#define PREFETCH " # nop" -#endif - -#if COMPILE_TEMPLATE_MMXEXT -#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" -#define MOVNTQ2 "movntq " -#else -#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t" -#define MOVNTQ2 "movq " -#endif -#define MOVNTQ(a,b) REAL_MOVNTQ(a,b) - -#if !COMPILE_TEMPLATE_MMXEXT -static av_always_inline void -dither_8to16(const uint8_t *srcDither, int rot) -{ - if (rot) { - __asm__ volatile("pxor %%mm0, %%mm0\n\t" - "movq (%0), %%mm3\n\t" - "movq %%mm3, %%mm4\n\t" - "psrlq $24, %%mm3\n\t" - "psllq $40, %%mm4\n\t" - "por %%mm4, %%mm3\n\t" - "movq %%mm3, %%mm4\n\t" - "punpcklbw %%mm0, %%mm3\n\t" - "punpckhbw %%mm0, %%mm4\n\t" - :: "r"(srcDither) - ); - } else { - __asm__ volatile("pxor %%mm0, %%mm0\n\t" - "movq (%0), %%mm3\n\t" - "movq %%mm3, %%mm4\n\t" - "punpcklbw %%mm0, %%mm3\n\t" - "punpckhbw %%mm0, %%mm4\n\t" - :: "r"(srcDither) - ); - } -} -#endif - -static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize, - const int16_t **src, uint8_t *dest, int dstW, - const uint8_t *dither, int offset) -{ - dither_8to16(dither, offset); - filterSize--; - __asm__ volatile( - "movd %0, %%mm1\n\t" - "punpcklwd %%mm1, %%mm1\n\t" - "punpckldq %%mm1, %%mm1\n\t" - "psllw $3, %%mm1\n\t" - "paddw %%mm1, %%mm3\n\t" - "paddw %%mm1, %%mm4\n\t" - "psraw $4, %%mm3\n\t" - "psraw $4, %%mm4\n\t" - ::"m"(filterSize) - ); - - __asm__ volatile(\ - "movq %%mm3, %%mm6\n\t" - "movq %%mm4, %%mm7\n\t" - "movl %3, %%ecx\n\t" - "mov %0, %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - ".p2align 4 \n\t" /* FIXME Unroll? */\ - "1: \n\t"\ - "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ - "movq (%%"REG_S", %%"REG_c", 2), %%mm2 \n\t" /* srcData */\ - "movq 8(%%"REG_S", %%"REG_c", 2), %%mm5 \n\t" /* srcData */\ - "add $16, %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "test %%"REG_S", %%"REG_S" \n\t"\ - "pmulhw %%mm0, %%mm2 \n\t"\ - "pmulhw %%mm0, %%mm5 \n\t"\ - "paddw %%mm2, %%mm3 \n\t"\ - "paddw %%mm5, %%mm4 \n\t"\ - " jnz 1b \n\t"\ - "psraw $3, %%mm3 \n\t"\ - "psraw $3, %%mm4 \n\t"\ - "packuswb %%mm4, %%mm3 \n\t" - MOVNTQ2 " %%mm3, (%1, %%"REG_c")\n\t" - "add $8, %%"REG_c" \n\t"\ - "cmp %2, %%"REG_c" \n\t"\ - "movq %%mm6, %%mm3\n\t" - "movq %%mm7, %%mm4\n\t" - "mov %0, %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "jb 1b \n\t"\ - :: "g" (filter), - "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset) - : "%"REG_d, "%"REG_S, "%"REG_c - ); -} - -#define YSCALEYUV2PACKEDX_UV \ - __asm__ volatile(\ - "xor %%"REG_a", %%"REG_a" \n\t"\ - ".p2align 4 \n\t"\ - "nop \n\t"\ - "1: \n\t"\ - "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\ - "movq %%mm3, %%mm4 \n\t"\ - ".p2align 4 \n\t"\ - "2: \n\t"\ - "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ - "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\ - "add %6, %%"REG_S" \n\t" \ - "movq (%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\ - "add $16, %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "pmulhw %%mm0, %%mm2 \n\t"\ - "pmulhw %%mm0, %%mm5 \n\t"\ - "paddw %%mm2, %%mm3 \n\t"\ - "paddw %%mm5, %%mm4 \n\t"\ - "test %%"REG_S", %%"REG_S" \n\t"\ - " jnz 2b \n\t"\ - -#define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \ - "lea "offset"(%0), %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\ - "movq "#dst1", "#dst2" \n\t"\ - ".p2align 4 \n\t"\ - "2: \n\t"\ - "movq 8(%%"REG_d"), "#coeff" \n\t" /* filterCoeff */\ - "movq (%%"REG_S", %%"REG_a", 2), "#src1" \n\t" /* Y1srcData */\ - "movq 8(%%"REG_S", %%"REG_a", 2), "#src2" \n\t" /* Y2srcData */\ - "add $16, %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "pmulhw "#coeff", "#src1" \n\t"\ - "pmulhw "#coeff", "#src2" \n\t"\ - "paddw "#src1", "#dst1" \n\t"\ - "paddw "#src2", "#dst2" \n\t"\ - "test %%"REG_S", %%"REG_S" \n\t"\ - " jnz 2b \n\t"\ - -#define YSCALEYUV2PACKEDX \ - YSCALEYUV2PACKEDX_UV \ - YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \ - -#define YSCALEYUV2PACKEDX_END \ - :: "r" (&c->redDither), \ - "m" (dummy), "m" (dummy), "m" (dummy),\ - "r" (dest), "m" (dstW_reg), "m"(uv_off) \ - : "%"REG_a, "%"REG_d, "%"REG_S \ - ); - -#define YSCALEYUV2PACKEDX_ACCURATE_UV \ - __asm__ volatile(\ - "xor %%"REG_a", %%"REG_a" \n\t"\ - ".p2align 4 \n\t"\ - "nop \n\t"\ - "1: \n\t"\ - "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "pxor %%mm4, %%mm4 \n\t"\ - "pxor %%mm5, %%mm5 \n\t"\ - "pxor %%mm6, %%mm6 \n\t"\ - "pxor %%mm7, %%mm7 \n\t"\ - ".p2align 4 \n\t"\ - "2: \n\t"\ - "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\ - "add %6, %%"REG_S" \n\t" \ - "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\ - "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\ - "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\ - "movq %%mm0, %%mm3 \n\t"\ - "punpcklwd %%mm1, %%mm0 \n\t"\ - "punpckhwd %%mm1, %%mm3 \n\t"\ - "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" /* filterCoeff */\ - "pmaddwd %%mm1, %%mm0 \n\t"\ - "pmaddwd %%mm1, %%mm3 \n\t"\ - "paddd %%mm0, %%mm4 \n\t"\ - "paddd %%mm3, %%mm5 \n\t"\ - "add %6, %%"REG_S" \n\t" \ - "movq (%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\ - "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\ - "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\ - "test %%"REG_S", %%"REG_S" \n\t"\ - "movq %%mm2, %%mm0 \n\t"\ - "punpcklwd %%mm3, %%mm2 \n\t"\ - "punpckhwd %%mm3, %%mm0 \n\t"\ - "pmaddwd %%mm1, %%mm2 \n\t"\ - "pmaddwd %%mm1, %%mm0 \n\t"\ - "paddd %%mm2, %%mm6 \n\t"\ - "paddd %%mm0, %%mm7 \n\t"\ - " jnz 2b \n\t"\ - "psrad $16, %%mm4 \n\t"\ - "psrad $16, %%mm5 \n\t"\ - "psrad $16, %%mm6 \n\t"\ - "psrad $16, %%mm7 \n\t"\ - "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\ - "packssdw %%mm5, %%mm4 \n\t"\ - "packssdw %%mm7, %%mm6 \n\t"\ - "paddw %%mm0, %%mm4 \n\t"\ - "paddw %%mm0, %%mm6 \n\t"\ - "movq %%mm4, "U_TEMP"(%0) \n\t"\ - "movq %%mm6, "V_TEMP"(%0) \n\t"\ - -#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \ - "lea "offset"(%0), %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "pxor %%mm1, %%mm1 \n\t"\ - "pxor %%mm5, %%mm5 \n\t"\ - "pxor %%mm7, %%mm7 \n\t"\ - "pxor %%mm6, %%mm6 \n\t"\ - ".p2align 4 \n\t"\ - "2: \n\t"\ - "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\ - "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\ - "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\ - "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\ - "movq %%mm0, %%mm3 \n\t"\ - "punpcklwd %%mm4, %%mm0 \n\t"\ - "punpckhwd %%mm4, %%mm3 \n\t"\ - "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\ - "pmaddwd %%mm4, %%mm0 \n\t"\ - "pmaddwd %%mm4, %%mm3 \n\t"\ - "paddd %%mm0, %%mm1 \n\t"\ - "paddd %%mm3, %%mm5 \n\t"\ - "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\ - "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\ - "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\ - "test %%"REG_S", %%"REG_S" \n\t"\ - "movq %%mm2, %%mm0 \n\t"\ - "punpcklwd %%mm3, %%mm2 \n\t"\ - "punpckhwd %%mm3, %%mm0 \n\t"\ - "pmaddwd %%mm4, %%mm2 \n\t"\ - "pmaddwd %%mm4, %%mm0 \n\t"\ - "paddd %%mm2, %%mm7 \n\t"\ - "paddd %%mm0, %%mm6 \n\t"\ - " jnz 2b \n\t"\ - "psrad $16, %%mm1 \n\t"\ - "psrad $16, %%mm5 \n\t"\ - "psrad $16, %%mm7 \n\t"\ - "psrad $16, %%mm6 \n\t"\ - "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\ - "packssdw %%mm5, %%mm1 \n\t"\ - "packssdw %%mm6, %%mm7 \n\t"\ - "paddw %%mm0, %%mm1 \n\t"\ - "paddw %%mm0, %%mm7 \n\t"\ - "movq "U_TEMP"(%0), %%mm3 \n\t"\ - "movq "V_TEMP"(%0), %%mm4 \n\t"\ - -#define YSCALEYUV2PACKEDX_ACCURATE \ - YSCALEYUV2PACKEDX_ACCURATE_UV \ - YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET) - -#define YSCALEYUV2RGBX \ - "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\ - "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\ - "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ - "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ - "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\ - "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\ - /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ - "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\ - "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\ - "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\ - "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\ - "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\ - "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\ - /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ - "paddw %%mm3, %%mm4 \n\t"\ - "movq %%mm2, %%mm0 \n\t"\ - "movq %%mm5, %%mm6 \n\t"\ - "movq %%mm4, %%mm3 \n\t"\ - "punpcklwd %%mm2, %%mm2 \n\t"\ - "punpcklwd %%mm5, %%mm5 \n\t"\ - "punpcklwd %%mm4, %%mm4 \n\t"\ - "paddw %%mm1, %%mm2 \n\t"\ - "paddw %%mm1, %%mm5 \n\t"\ - "paddw %%mm1, %%mm4 \n\t"\ - "punpckhwd %%mm0, %%mm0 \n\t"\ - "punpckhwd %%mm6, %%mm6 \n\t"\ - "punpckhwd %%mm3, %%mm3 \n\t"\ - "paddw %%mm7, %%mm0 \n\t"\ - "paddw %%mm7, %%mm6 \n\t"\ - "paddw %%mm7, %%mm3 \n\t"\ - /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ - "packuswb %%mm0, %%mm2 \n\t"\ - "packuswb %%mm6, %%mm5 \n\t"\ - "packuswb %%mm3, %%mm4 \n\t"\ - -#define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \ - "movq "#b", "#q2" \n\t" /* B */\ - "movq "#r", "#t" \n\t" /* R */\ - "punpcklbw "#g", "#b" \n\t" /* GBGBGBGB 0 */\ - "punpcklbw "#a", "#r" \n\t" /* ARARARAR 0 */\ - "punpckhbw "#g", "#q2" \n\t" /* GBGBGBGB 2 */\ - "punpckhbw "#a", "#t" \n\t" /* ARARARAR 2 */\ - "movq "#b", "#q0" \n\t" /* GBGBGBGB 0 */\ - "movq "#q2", "#q3" \n\t" /* GBGBGBGB 2 */\ - "punpcklwd "#r", "#q0" \n\t" /* ARGBARGB 0 */\ - "punpckhwd "#r", "#b" \n\t" /* ARGBARGB 1 */\ - "punpcklwd "#t", "#q2" \n\t" /* ARGBARGB 2 */\ - "punpckhwd "#t", "#q3" \n\t" /* ARGBARGB 3 */\ -\ - MOVNTQ( q0, (dst, index, 4))\ - MOVNTQ( b, 8(dst, index, 4))\ - MOVNTQ( q2, 16(dst, index, 4))\ - MOVNTQ( q3, 24(dst, index, 4))\ -\ - "add $8, "#index" \n\t"\ - "cmp "#dstw", "#index" \n\t"\ - " jb 1b \n\t" -#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) - -static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, int dstW, int dstY) -{ - x86_reg dummy=0; - x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_offx2; - - if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { - YSCALEYUV2PACKEDX_ACCURATE - YSCALEYUV2RGBX - "movq %%mm2, "U_TEMP"(%0) \n\t" - "movq %%mm4, "V_TEMP"(%0) \n\t" - "movq %%mm5, "Y_TEMP"(%0) \n\t" - YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET) - "movq "Y_TEMP"(%0), %%mm5 \n\t" - "psraw $3, %%mm1 \n\t" - "psraw $3, %%mm7 \n\t" - "packuswb %%mm7, %%mm1 \n\t" - WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6) - YSCALEYUV2PACKEDX_END - } else { - YSCALEYUV2PACKEDX_ACCURATE - YSCALEYUV2RGBX - "pcmpeqd %%mm7, %%mm7 \n\t" - WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - YSCALEYUV2PACKEDX_END - } -} - -static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, int dstW, int dstY) -{ - x86_reg dummy=0; - x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_offx2; - - if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { - YSCALEYUV2PACKEDX - YSCALEYUV2RGBX - YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7) - "psraw $3, %%mm1 \n\t" - "psraw $3, %%mm7 \n\t" - "packuswb %%mm7, %%mm1 \n\t" - WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) - YSCALEYUV2PACKEDX_END - } else { - YSCALEYUV2PACKEDX - YSCALEYUV2RGBX - "pcmpeqd %%mm7, %%mm7 \n\t" - WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - YSCALEYUV2PACKEDX_END - } -} - -#define REAL_WRITERGB16(dst, dstw, index) \ - "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ - "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\ - "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ - "psrlq $3, %%mm2 \n\t"\ -\ - "movq %%mm2, %%mm1 \n\t"\ - "movq %%mm4, %%mm3 \n\t"\ -\ - "punpcklbw %%mm7, %%mm3 \n\t"\ - "punpcklbw %%mm5, %%mm2 \n\t"\ - "punpckhbw %%mm7, %%mm4 \n\t"\ - "punpckhbw %%mm5, %%mm1 \n\t"\ -\ - "psllq $3, %%mm3 \n\t"\ - "psllq $3, %%mm4 \n\t"\ -\ - "por %%mm3, %%mm2 \n\t"\ - "por %%mm4, %%mm1 \n\t"\ -\ - MOVNTQ(%%mm2, (dst, index, 2))\ - MOVNTQ(%%mm1, 8(dst, index, 2))\ -\ - "add $8, "#index" \n\t"\ - "cmp "#dstw", "#index" \n\t"\ - " jb 1b \n\t" -#define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index) - -static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, int dstW, int dstY) -{ - x86_reg dummy=0; - x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_offx2; - - YSCALEYUV2PACKEDX_ACCURATE - YSCALEYUV2RGBX - "pxor %%mm7, %%mm7 \n\t" - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%0), %%mm2\n\t" - "paddusb "GREEN_DITHER"(%0), %%mm4\n\t" - "paddusb "RED_DITHER"(%0), %%mm5\n\t" -#endif - WRITERGB16(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END -} - -static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, int dstW, int dstY) -{ - x86_reg dummy=0; - x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_offx2; - - YSCALEYUV2PACKEDX - YSCALEYUV2RGBX - "pxor %%mm7, %%mm7 \n\t" - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t" - "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t" - "paddusb "RED_DITHER"(%0), %%mm5 \n\t" -#endif - WRITERGB16(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END -} - -#define REAL_WRITERGB15(dst, dstw, index) \ - "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ - "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\ - "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ - "psrlq $3, %%mm2 \n\t"\ - "psrlq $1, %%mm5 \n\t"\ -\ - "movq %%mm2, %%mm1 \n\t"\ - "movq %%mm4, %%mm3 \n\t"\ -\ - "punpcklbw %%mm7, %%mm3 \n\t"\ - "punpcklbw %%mm5, %%mm2 \n\t"\ - "punpckhbw %%mm7, %%mm4 \n\t"\ - "punpckhbw %%mm5, %%mm1 \n\t"\ -\ - "psllq $2, %%mm3 \n\t"\ - "psllq $2, %%mm4 \n\t"\ -\ - "por %%mm3, %%mm2 \n\t"\ - "por %%mm4, %%mm1 \n\t"\ -\ - MOVNTQ(%%mm2, (dst, index, 2))\ - MOVNTQ(%%mm1, 8(dst, index, 2))\ -\ - "add $8, "#index" \n\t"\ - "cmp "#dstw", "#index" \n\t"\ - " jb 1b \n\t" -#define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index) - -static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, int dstW, int dstY) -{ - x86_reg dummy=0; - x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_offx2; - - YSCALEYUV2PACKEDX_ACCURATE - YSCALEYUV2RGBX - "pxor %%mm7, %%mm7 \n\t" - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%0), %%mm2\n\t" - "paddusb "GREEN_DITHER"(%0), %%mm4\n\t" - "paddusb "RED_DITHER"(%0), %%mm5\n\t" -#endif - WRITERGB15(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END -} - -static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, int dstW, int dstY) -{ - x86_reg dummy=0; - x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_offx2; - - YSCALEYUV2PACKEDX - YSCALEYUV2RGBX - "pxor %%mm7, %%mm7 \n\t" - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t" - "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t" - "paddusb "RED_DITHER"(%0), %%mm5 \n\t" -#endif - WRITERGB15(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END -} - -#define WRITEBGR24MMX(dst, dstw, index) \ - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ - "movq %%mm2, %%mm1 \n\t" /* B */\ - "movq %%mm5, %%mm6 \n\t" /* R */\ - "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ - "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ - "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ - "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ - "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ - "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ - "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ - "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ - "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ - "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ -\ - "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\ - "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\ - "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\ - "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\ -\ - "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\ - "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\ - "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\ - "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\ -\ - "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\ - "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\ - "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\ - "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\ -\ - "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\ - "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\ - "psllq $40, %%mm2 \n\t" /* GB000000 1 */\ - "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\ - MOVNTQ(%%mm0, (dst))\ -\ - "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\ - "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\ - "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\ - "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\ - MOVNTQ(%%mm6, 8(dst))\ -\ - "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\ - "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\ - "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\ - MOVNTQ(%%mm5, 16(dst))\ -\ - "add $24, "#dst" \n\t"\ -\ - "add $8, "#index" \n\t"\ - "cmp "#dstw", "#index" \n\t"\ - " jb 1b \n\t" - -#define WRITEBGR24MMXEXT(dst, dstw, index) \ - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ - "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\ - "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\ - "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\ - "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\ - "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\ -\ - "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\ - "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\ - "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\ -\ - "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\ - "por %%mm1, %%mm6 \n\t"\ - "por %%mm3, %%mm6 \n\t"\ - MOVNTQ(%%mm6, (dst))\ -\ - "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\ - "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\ - "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\ - "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\ -\ - "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5 B4 B3 */\ - "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\ - "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\ -\ - "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\ - "por %%mm3, %%mm6 \n\t"\ - MOVNTQ(%%mm6, 8(dst))\ -\ - "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\ - "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\ - "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\ -\ - "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\ - "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\ - "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7 R6 R5 */\ -\ - "por %%mm1, %%mm3 \n\t"\ - "por %%mm3, %%mm6 \n\t"\ - MOVNTQ(%%mm6, 16(dst))\ -\ - "add $24, "#dst" \n\t"\ -\ - "add $8, "#index" \n\t"\ - "cmp "#dstw", "#index" \n\t"\ - " jb 1b \n\t" - -#if COMPILE_TEMPLATE_MMXEXT -#undef WRITEBGR24 -#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMXEXT(dst, dstw, index) -#else -#undef WRITEBGR24 -#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index) -#endif - -static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, int dstW, int dstY) -{ - x86_reg dummy=0; - x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_offx2; - - YSCALEYUV2PACKEDX_ACCURATE - YSCALEYUV2RGBX - "pxor %%mm7, %%mm7 \n\t" - "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize - "add %4, %%"REG_c" \n\t" - WRITEBGR24(%%REGc, %5, %%REGa) - :: "r" (&c->redDither), - "m" (dummy), "m" (dummy), "m" (dummy), - "r" (dest), "m" (dstW_reg), "m"(uv_off) - : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S - ); -} - -static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, int dstW, int dstY) -{ - x86_reg dummy=0; - x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_offx2; - - YSCALEYUV2PACKEDX - YSCALEYUV2RGBX - "pxor %%mm7, %%mm7 \n\t" - "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t" //FIXME optimize - "add %4, %%"REG_c" \n\t" - WRITEBGR24(%%REGc, %5, %%REGa) - :: "r" (&c->redDither), - "m" (dummy), "m" (dummy), "m" (dummy), - "r" (dest), "m" (dstW_reg), "m"(uv_off) - : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S - ); -} - -#define REAL_WRITEYUY2(dst, dstw, index) \ - "packuswb %%mm3, %%mm3 \n\t"\ - "packuswb %%mm4, %%mm4 \n\t"\ - "packuswb %%mm7, %%mm1 \n\t"\ - "punpcklbw %%mm4, %%mm3 \n\t"\ - "movq %%mm1, %%mm7 \n\t"\ - "punpcklbw %%mm3, %%mm1 \n\t"\ - "punpckhbw %%mm3, %%mm7 \n\t"\ -\ - MOVNTQ(%%mm1, (dst, index, 2))\ - MOVNTQ(%%mm7, 8(dst, index, 2))\ -\ - "add $8, "#index" \n\t"\ - "cmp "#dstw", "#index" \n\t"\ - " jb 1b \n\t" -#define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index) - -static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, int dstW, int dstY) -{ - x86_reg dummy=0; - x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_offx2; - - YSCALEYUV2PACKEDX_ACCURATE - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ - "psraw $3, %%mm3 \n\t" - "psraw $3, %%mm4 \n\t" - "psraw $3, %%mm1 \n\t" - "psraw $3, %%mm7 \n\t" - WRITEYUY2(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END -} - -static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, int dstW, int dstY) -{ - x86_reg dummy=0; - x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_offx2; - - YSCALEYUV2PACKEDX - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ - "psraw $3, %%mm3 \n\t" - "psraw $3, %%mm4 \n\t" - "psraw $3, %%mm1 \n\t" - "psraw $3, %%mm7 \n\t" - WRITEYUY2(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END -} - -#define REAL_YSCALEYUV2RGB_UV(index, c) \ - "xor "#index", "#index" \n\t"\ - ".p2align 4 \n\t"\ - "1: \n\t"\ - "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ - "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ - "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \ - "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ - "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ - "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \ - "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ - "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ - "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ - "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ - "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ - "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ - "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ - "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ - "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ - "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\ - "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\ - "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ - "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ - "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\ - "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\ - /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ - -#define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \ - "movq ("#b1", "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\ - "movq ("#b2", "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\ - "movq 8("#b1", "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\ - "movq 8("#b2", "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\ - "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ - "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\ - "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ - "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ - "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ - "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ - -#define REAL_YSCALEYUV2RGB_COEFF(c) \ - "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\ - "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\ - "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\ - "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\ - "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\ - "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\ - /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ - "paddw %%mm3, %%mm4 \n\t"\ - "movq %%mm2, %%mm0 \n\t"\ - "movq %%mm5, %%mm6 \n\t"\ - "movq %%mm4, %%mm3 \n\t"\ - "punpcklwd %%mm2, %%mm2 \n\t"\ - "punpcklwd %%mm5, %%mm5 \n\t"\ - "punpcklwd %%mm4, %%mm4 \n\t"\ - "paddw %%mm1, %%mm2 \n\t"\ - "paddw %%mm1, %%mm5 \n\t"\ - "paddw %%mm1, %%mm4 \n\t"\ - "punpckhwd %%mm0, %%mm0 \n\t"\ - "punpckhwd %%mm6, %%mm6 \n\t"\ - "punpckhwd %%mm3, %%mm3 \n\t"\ - "paddw %%mm7, %%mm0 \n\t"\ - "paddw %%mm7, %%mm6 \n\t"\ - "paddw %%mm7, %%mm3 \n\t"\ - /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ - "packuswb %%mm0, %%mm2 \n\t"\ - "packuswb %%mm6, %%mm5 \n\t"\ - "packuswb %%mm3, %%mm4 \n\t"\ - -#define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) - -#define YSCALEYUV2RGB(index, c) \ - REAL_YSCALEYUV2RGB_UV(index, c) \ - REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \ - REAL_YSCALEYUV2RGB_COEFF(c) - -/** - * vertical bilinear scale YV12 to RGB - */ -static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2], - const int16_t *ubuf[2], const int16_t *vbuf[2], - const int16_t *abuf[2], uint8_t *dest, - int dstW, int yalpha, int uvalpha, int y) -{ - const int16_t *buf0 = buf[0], *buf1 = buf[1], - *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; - - if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { - const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1]; -#if ARCH_X86_64 - __asm__ volatile( - YSCALEYUV2RGB(%%r8, %5) - YSCALEYUV2RGB_YA(%%r8, %5, %6, %7) - "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ - "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ - "packuswb %%mm7, %%mm1 \n\t" - WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) - :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest), - "a" (&c->redDither), - "r" (abuf0), "r" (abuf1) - : "%r8" - ); -#else - c->u_temp=(intptr_t)abuf0; - c->v_temp=(intptr_t)abuf1; - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB(%%REGBP, %5) - "push %0 \n\t" - "push %1 \n\t" - "mov "U_TEMP"(%5), %0 \n\t" - "mov "V_TEMP"(%5), %1 \n\t" - YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1) - "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ - "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ - "packuswb %%mm7, %%mm1 \n\t" - "pop %1 \n\t" - "pop %0 \n\t" - WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), - "a" (&c->redDither) - ); -#endif - } else { - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB(%%REGBP, %5) - "pcmpeqd %%mm7, %%mm7 \n\t" - WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), - "a" (&c->redDither) - ); - } -} - -static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2], - const int16_t *ubuf[2], const int16_t *vbuf[2], - const int16_t *abuf[2], uint8_t *dest, - int dstW, int yalpha, int uvalpha, int y) -{ - const int16_t *buf0 = buf[0], *buf1 = buf[1], - *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; - - //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB(%%REGBP, %5) - "pxor %%mm7, %%mm7 \n\t" - WRITEBGR24(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), - "a" (&c->redDither) - ); -} - -static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2], - const int16_t *ubuf[2], const int16_t *vbuf[2], - const int16_t *abuf[2], uint8_t *dest, - int dstW, int yalpha, int uvalpha, int y) -{ - const int16_t *buf0 = buf[0], *buf1 = buf[1], - *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; - - //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB(%%REGBP, %5) - "pxor %%mm7, %%mm7 \n\t" - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" - "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" - "paddusb "RED_DITHER"(%5), %%mm5 \n\t" -#endif - WRITERGB15(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), - "a" (&c->redDither) - ); -} - -static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2], - const int16_t *ubuf[2], const int16_t *vbuf[2], - const int16_t *abuf[2], uint8_t *dest, - int dstW, int yalpha, int uvalpha, int y) -{ - const int16_t *buf0 = buf[0], *buf1 = buf[1], - *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; - - //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB(%%REGBP, %5) - "pxor %%mm7, %%mm7 \n\t" - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" - "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" - "paddusb "RED_DITHER"(%5), %%mm5 \n\t" -#endif - WRITERGB16(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), - "a" (&c->redDither) - ); -} - -#define REAL_YSCALEYUV2PACKED(index, c) \ - "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ - "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\ - "psraw $3, %%mm0 \n\t"\ - "psraw $3, %%mm1 \n\t"\ - "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\ - "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\ - "xor "#index", "#index" \n\t"\ - ".p2align 4 \n\t"\ - "1: \n\t"\ - "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ - "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ - "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \ - "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ - "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ - "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \ - "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ - "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ - "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ - "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ - "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ - "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ - "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ - "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ - "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ - "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\ - "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\ - "movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\ - "movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\ - "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ - "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\ - "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ - "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ - "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ - "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ - -#define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c) - -static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2], - const int16_t *ubuf[2], const int16_t *vbuf[2], - const int16_t *abuf[2], uint8_t *dest, - int dstW, int yalpha, int uvalpha, int y) -{ - const int16_t *buf0 = buf[0], *buf1 = buf[1], - *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; - - //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2PACKED(%%REGBP, %5) - WRITEYUY2(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), - "a" (&c->redDither) - ); -} - -#define REAL_YSCALEYUV2RGB1(index, c) \ - "xor "#index", "#index" \n\t"\ - ".p2align 4 \n\t"\ - "1: \n\t"\ - "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ - "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \ - "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ - "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \ - "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ - "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ - "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\ - "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\ - "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ - "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ - "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\ - "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\ - /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ - "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ - "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ - "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\ - "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\ - "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\ - "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\ - "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\ - "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\ - /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ - "paddw %%mm3, %%mm4 \n\t"\ - "movq %%mm2, %%mm0 \n\t"\ - "movq %%mm5, %%mm6 \n\t"\ - "movq %%mm4, %%mm3 \n\t"\ - "punpcklwd %%mm2, %%mm2 \n\t"\ - "punpcklwd %%mm5, %%mm5 \n\t"\ - "punpcklwd %%mm4, %%mm4 \n\t"\ - "paddw %%mm1, %%mm2 \n\t"\ - "paddw %%mm1, %%mm5 \n\t"\ - "paddw %%mm1, %%mm4 \n\t"\ - "punpckhwd %%mm0, %%mm0 \n\t"\ - "punpckhwd %%mm6, %%mm6 \n\t"\ - "punpckhwd %%mm3, %%mm3 \n\t"\ - "paddw %%mm7, %%mm0 \n\t"\ - "paddw %%mm7, %%mm6 \n\t"\ - "paddw %%mm7, %%mm3 \n\t"\ - /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ - "packuswb %%mm0, %%mm2 \n\t"\ - "packuswb %%mm6, %%mm5 \n\t"\ - "packuswb %%mm3, %%mm4 \n\t"\ - -#define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c) - -// do vertical chrominance interpolation -#define REAL_YSCALEYUV2RGB1b(index, c) \ - "xor "#index", "#index" \n\t"\ - ".p2align 4 \n\t"\ - "1: \n\t"\ - "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ - "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ - "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \ - "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ - "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ - "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \ - "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ - "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ - "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\ - "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\ - "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\ - "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\ - "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ - "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ - "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\ - "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\ - /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ - "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ - "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ - "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\ - "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\ - "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\ - "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\ - "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\ - "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\ - /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ - "paddw %%mm3, %%mm4 \n\t"\ - "movq %%mm2, %%mm0 \n\t"\ - "movq %%mm5, %%mm6 \n\t"\ - "movq %%mm4, %%mm3 \n\t"\ - "punpcklwd %%mm2, %%mm2 \n\t"\ - "punpcklwd %%mm5, %%mm5 \n\t"\ - "punpcklwd %%mm4, %%mm4 \n\t"\ - "paddw %%mm1, %%mm2 \n\t"\ - "paddw %%mm1, %%mm5 \n\t"\ - "paddw %%mm1, %%mm4 \n\t"\ - "punpckhwd %%mm0, %%mm0 \n\t"\ - "punpckhwd %%mm6, %%mm6 \n\t"\ - "punpckhwd %%mm3, %%mm3 \n\t"\ - "paddw %%mm7, %%mm0 \n\t"\ - "paddw %%mm7, %%mm6 \n\t"\ - "paddw %%mm7, %%mm3 \n\t"\ - /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ - "packuswb %%mm0, %%mm2 \n\t"\ - "packuswb %%mm6, %%mm5 \n\t"\ - "packuswb %%mm3, %%mm4 \n\t"\ - -#define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c) - -#define REAL_YSCALEYUV2RGB1_ALPHA(index) \ - "movq (%1, "#index", 2), %%mm7 \n\t" /* abuf0[index ] */\ - "movq 8(%1, "#index", 2), %%mm1 \n\t" /* abuf0[index+4] */\ - "psraw $7, %%mm7 \n\t" /* abuf0[index ] >>7 */\ - "psraw $7, %%mm1 \n\t" /* abuf0[index+4] >>7 */\ - "packuswb %%mm1, %%mm7 \n\t" -#define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index) - -/** - * YV12 to RGB without scaling or interpolating - */ -static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, - const int16_t *ubuf[2], const int16_t *vbuf[2], - const int16_t *abuf0, uint8_t *dest, - int dstW, int uvalpha, int y) -{ - const int16_t *ubuf0 = ubuf[0]; - const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 - - if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster - const int16_t *ubuf1 = ubuf[0]; - if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) - YSCALEYUV2RGB1_ALPHA(%%REGBP) - WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest), - "a" (&c->redDither) - ); - } else { - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) - "pcmpeqd %%mm7, %%mm7 \n\t" - WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), - "a" (&c->redDither) - ); - } - } else { - const int16_t *ubuf1 = ubuf[1]; - if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) - YSCALEYUV2RGB1_ALPHA(%%REGBP) - WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest), - "a" (&c->redDither) - ); - } else { - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) - "pcmpeqd %%mm7, %%mm7 \n\t" - WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), - "a" (&c->redDither) - ); - } - } -} - -static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0, - const int16_t *ubuf[2], const int16_t *vbuf[2], - const int16_t *abuf0, uint8_t *dest, - int dstW, int uvalpha, int y) -{ - const int16_t *ubuf0 = ubuf[0]; - const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 - - if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster - const int16_t *ubuf1 = ubuf[0]; - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) - "pxor %%mm7, %%mm7 \n\t" - WRITEBGR24(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), - "a" (&c->redDither) - ); - } else { - const int16_t *ubuf1 = ubuf[1]; - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) - "pxor %%mm7, %%mm7 \n\t" - WRITEBGR24(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), - "a" (&c->redDither) - ); - } -} - -static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, - const int16_t *ubuf[2], const int16_t *vbuf[2], - const int16_t *abuf0, uint8_t *dest, - int dstW, int uvalpha, int y) -{ - const int16_t *ubuf0 = ubuf[0]; - const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 - - if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster - const int16_t *ubuf1 = ubuf[0]; - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) - "pxor %%mm7, %%mm7 \n\t" - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" - "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" - "paddusb "RED_DITHER"(%5), %%mm5 \n\t" -#endif - WRITERGB15(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), - "a" (&c->redDither) - ); - } else { - const int16_t *ubuf1 = ubuf[1]; - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) - "pxor %%mm7, %%mm7 \n\t" - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" - "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" - "paddusb "RED_DITHER"(%5), %%mm5 \n\t" -#endif - WRITERGB15(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), - "a" (&c->redDither) - ); - } -} - -static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, - const int16_t *ubuf[2], const int16_t *vbuf[2], - const int16_t *abuf0, uint8_t *dest, - int dstW, int uvalpha, int y) -{ - const int16_t *ubuf0 = ubuf[0]; - const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 - - if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster - const int16_t *ubuf1 = ubuf[0]; - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) - "pxor %%mm7, %%mm7 \n\t" - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" - "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" - "paddusb "RED_DITHER"(%5), %%mm5 \n\t" -#endif - WRITERGB16(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), - "a" (&c->redDither) - ); - } else { - const int16_t *ubuf1 = ubuf[1]; - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) - "pxor %%mm7, %%mm7 \n\t" - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" - "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" - "paddusb "RED_DITHER"(%5), %%mm5 \n\t" -#endif - WRITERGB16(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), - "a" (&c->redDither) - ); - } -} - -#define REAL_YSCALEYUV2PACKED1(index, c) \ - "xor "#index", "#index" \n\t"\ - ".p2align 4 \n\t"\ - "1: \n\t"\ - "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ - "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \ - "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ - "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \ - "psraw $7, %%mm3 \n\t" \ - "psraw $7, %%mm4 \n\t" \ - "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ - "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ - "psraw $7, %%mm1 \n\t" \ - "psraw $7, %%mm7 \n\t" \ - -#define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c) - -#define REAL_YSCALEYUV2PACKED1b(index, c) \ - "xor "#index", "#index" \n\t"\ - ".p2align 4 \n\t"\ - "1: \n\t"\ - "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ - "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ - "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \ - "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ - "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ - "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \ - "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ - "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ - "psrlw $8, %%mm3 \n\t" \ - "psrlw $8, %%mm4 \n\t" \ - "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ - "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ - "psraw $7, %%mm1 \n\t" \ - "psraw $7, %%mm7 \n\t" -#define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c) - -static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0, - const int16_t *ubuf[2], const int16_t *vbuf[2], - const int16_t *abuf0, uint8_t *dest, - int dstW, int uvalpha, int y) -{ - const int16_t *ubuf0 = ubuf[0]; - const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 - - if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster - const int16_t *ubuf1 = ubuf[0]; - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2PACKED1(%%REGBP, %5) - WRITEYUY2(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), - "a" (&c->redDither) - ); - } else { - const int16_t *ubuf1 = ubuf[1]; - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2PACKED1b(%%REGBP, %5) - WRITEYUY2(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), - "a" (&c->redDither) - ); - } -} - -#if COMPILE_TEMPLATE_MMXEXT -static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, - int dstWidth, const uint8_t *src, - int srcW, int xInc) -{ - int32_t *filterPos = c->hLumFilterPos; - int16_t *filter = c->hLumFilter; - void *mmxextFilterCode = c->lumMmxextFilterCode; - int i; -#if defined(PIC) - uint64_t ebxsave; -#endif -#if ARCH_X86_64 - uint64_t retsave; -#endif - - __asm__ volatile( -#if defined(PIC) - "mov %%"REG_b", %5 \n\t" -#if ARCH_X86_64 - "mov -8(%%rsp), %%"REG_a" \n\t" - "mov %%"REG_a", %6 \n\t" -#endif -#else -#if ARCH_X86_64 - "mov -8(%%rsp), %%"REG_a" \n\t" - "mov %%"REG_a", %5 \n\t" -#endif -#endif - "pxor %%mm7, %%mm7 \n\t" - "mov %0, %%"REG_c" \n\t" - "mov %1, %%"REG_D" \n\t" - "mov %2, %%"REG_d" \n\t" - "mov %3, %%"REG_b" \n\t" - "xor %%"REG_a", %%"REG_a" \n\t" // i - PREFETCH" (%%"REG_c") \n\t" - PREFETCH" 32(%%"REG_c") \n\t" - PREFETCH" 64(%%"REG_c") \n\t" - -#if ARCH_X86_64 -#define CALL_MMXEXT_FILTER_CODE \ - "movl (%%"REG_b"), %%esi \n\t"\ - "call *%4 \n\t"\ - "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\ - "add %%"REG_S", %%"REG_c" \n\t"\ - "add %%"REG_a", %%"REG_D" \n\t"\ - "xor %%"REG_a", %%"REG_a" \n\t"\ - -#else -#define CALL_MMXEXT_FILTER_CODE \ - "movl (%%"REG_b"), %%esi \n\t"\ - "call *%4 \n\t"\ - "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\ - "add %%"REG_a", %%"REG_D" \n\t"\ - "xor %%"REG_a", %%"REG_a" \n\t"\ - -#endif /* ARCH_X86_64 */ - - CALL_MMXEXT_FILTER_CODE - CALL_MMXEXT_FILTER_CODE - CALL_MMXEXT_FILTER_CODE - CALL_MMXEXT_FILTER_CODE - CALL_MMXEXT_FILTER_CODE - CALL_MMXEXT_FILTER_CODE - CALL_MMXEXT_FILTER_CODE - CALL_MMXEXT_FILTER_CODE - -#if defined(PIC) - "mov %5, %%"REG_b" \n\t" -#if ARCH_X86_64 - "mov %6, %%"REG_a" \n\t" - "mov %%"REG_a", -8(%%rsp) \n\t" -#endif -#else -#if ARCH_X86_64 - "mov %5, %%"REG_a" \n\t" - "mov %%"REG_a", -8(%%rsp) \n\t" -#endif -#endif - :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos), - "m" (mmxextFilterCode) -#if defined(PIC) - ,"m" (ebxsave) -#endif -#if ARCH_X86_64 - ,"m"(retsave) -#endif - : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D -#if !defined(PIC) - ,"%"REG_b -#endif - ); - - for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) - dst[i] = src[srcW-1]*128; -} - -static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, - int dstWidth, const uint8_t *src1, - const uint8_t *src2, int srcW, int xInc) -{ - int32_t *filterPos = c->hChrFilterPos; - int16_t *filter = c->hChrFilter; - void *mmxextFilterCode = c->chrMmxextFilterCode; - int i; -#if defined(PIC) - DECLARE_ALIGNED(8, uint64_t, ebxsave); -#endif -#if ARCH_X86_64 - DECLARE_ALIGNED(8, uint64_t, retsave); -#endif - - __asm__ volatile( -#if defined(PIC) - "mov %%"REG_b", %7 \n\t" -#if ARCH_X86_64 - "mov -8(%%rsp), %%"REG_a" \n\t" - "mov %%"REG_a", %8 \n\t" -#endif -#else -#if ARCH_X86_64 - "mov -8(%%rsp), %%"REG_a" \n\t" - "mov %%"REG_a", %7 \n\t" -#endif -#endif - "pxor %%mm7, %%mm7 \n\t" - "mov %0, %%"REG_c" \n\t" - "mov %1, %%"REG_D" \n\t" - "mov %2, %%"REG_d" \n\t" - "mov %3, %%"REG_b" \n\t" - "xor %%"REG_a", %%"REG_a" \n\t" // i - PREFETCH" (%%"REG_c") \n\t" - PREFETCH" 32(%%"REG_c") \n\t" - PREFETCH" 64(%%"REG_c") \n\t" - - CALL_MMXEXT_FILTER_CODE - CALL_MMXEXT_FILTER_CODE - CALL_MMXEXT_FILTER_CODE - CALL_MMXEXT_FILTER_CODE - "xor %%"REG_a", %%"REG_a" \n\t" // i - "mov %5, %%"REG_c" \n\t" // src - "mov %6, %%"REG_D" \n\t" // buf2 - PREFETCH" (%%"REG_c") \n\t" - PREFETCH" 32(%%"REG_c") \n\t" - PREFETCH" 64(%%"REG_c") \n\t" - - CALL_MMXEXT_FILTER_CODE - CALL_MMXEXT_FILTER_CODE - CALL_MMXEXT_FILTER_CODE - CALL_MMXEXT_FILTER_CODE - -#if defined(PIC) - "mov %7, %%"REG_b" \n\t" -#if ARCH_X86_64 - "mov %8, %%"REG_a" \n\t" - "mov %%"REG_a", -8(%%rsp) \n\t" -#endif -#else -#if ARCH_X86_64 - "mov %7, %%"REG_a" \n\t" - "mov %%"REG_a", -8(%%rsp) \n\t" -#endif -#endif - :: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos), - "m" (mmxextFilterCode), "m" (src2), "m"(dst2) -#if defined(PIC) - ,"m" (ebxsave) -#endif -#if ARCH_X86_64 - ,"m"(retsave) -#endif - : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D -#if !defined(PIC) - ,"%"REG_b -#endif - ); - - for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) { - dst1[i] = src1[srcW-1]*128; - dst2[i] = src2[srcW-1]*128; - } -} -#endif /* COMPILE_TEMPLATE_MMXEXT */ - -static av_cold void RENAME(sws_init_swscale)(SwsContext *c) -{ - enum AVPixelFormat dstFormat = c->dstFormat; - - c->use_mmx_vfilter= 0; - if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && dstFormat != AV_PIX_FMT_NV12 - && dstFormat != AV_PIX_FMT_NV21 && !(c->flags & SWS_BITEXACT)) { - if (c->flags & SWS_ACCURATE_RND) { - if (!(c->flags & SWS_FULL_CHR_H_INT)) { - switch (c->dstFormat) { - case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break; - case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X_ar); break; - case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X_ar); break; - case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X_ar); break; - case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break; - default: break; - } - } - } else { - c->use_mmx_vfilter= 1; - c->yuv2planeX = RENAME(yuv2yuvX ); - if (!(c->flags & SWS_FULL_CHR_H_INT)) { - switch (c->dstFormat) { - case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break; - case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X); break; - case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X); break; - case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X); break; - case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break; - default: break; - } - } - } - if (!(c->flags & SWS_FULL_CHR_H_INT)) { - switch (c->dstFormat) { - case AV_PIX_FMT_RGB32: - c->yuv2packed1 = RENAME(yuv2rgb32_1); - c->yuv2packed2 = RENAME(yuv2rgb32_2); - break; - case AV_PIX_FMT_BGR24: - c->yuv2packed1 = RENAME(yuv2bgr24_1); - c->yuv2packed2 = RENAME(yuv2bgr24_2); - break; - case AV_PIX_FMT_RGB555: - c->yuv2packed1 = RENAME(yuv2rgb555_1); - c->yuv2packed2 = RENAME(yuv2rgb555_2); - break; - case AV_PIX_FMT_RGB565: - c->yuv2packed1 = RENAME(yuv2rgb565_1); - c->yuv2packed2 = RENAME(yuv2rgb565_2); - break; - case AV_PIX_FMT_YUYV422: - c->yuv2packed1 = RENAME(yuv2yuyv422_1); - c->yuv2packed2 = RENAME(yuv2yuyv422_2); - break; - default: - break; - } - } - } - - if (c->srcBpc == 8 && c->dstBpc <= 14) { - // Use the new MMX scaler if the MMXEXT one can't be used (it is faster than the x86 ASM one). -#if COMPILE_TEMPLATE_MMXEXT - if (c->flags & SWS_FAST_BILINEAR && c->canMMXEXTBeUsed) { - c->hyscale_fast = RENAME(hyscale_fast); - c->hcscale_fast = RENAME(hcscale_fast); - } else { -#endif /* COMPILE_TEMPLATE_MMXEXT */ - c->hyscale_fast = NULL; - c->hcscale_fast = NULL; -#if COMPILE_TEMPLATE_MMXEXT - } -#endif /* COMPILE_TEMPLATE_MMXEXT */ - } -} diff --git a/ffmpeg/libswscale/x86/w64xmmtest.c b/ffmpeg/libswscale/x86/w64xmmtest.c deleted file mode 100644 index 88143d9..0000000 --- a/ffmpeg/libswscale/x86/w64xmmtest.c +++ /dev/null @@ -1,31 +0,0 @@ -/* - * check XMM registers for clobbers on Win64 - * Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/x86/w64xmmtest.h" -#include "libswscale/swscale.h" - -wrap(sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[], - const int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *const dst[], const int dstStride[])) -{ - testxmmclobbers(sws_scale, c, srcSlice, srcStride, srcSliceY, - srcSliceH, dst, dstStride); -} diff --git a/ffmpeg/libswscale/x86/yuv2rgb.c b/ffmpeg/libswscale/x86/yuv2rgb.c deleted file mode 100644 index e4315ef..0000000 --- a/ffmpeg/libswscale/x86/yuv2rgb.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * software YUV to RGB converter - * - * Copyright (C) 2009 Konstantin Shishkov - * - * MMX/MMXEXT template stuff (needed for fast movntq support), - * 1,4,8bpp support and context / deglobalize stuff - * by Michael Niedermayer (michaelni@gmx.at) - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <stdio.h> -#include <stdlib.h> -#include <inttypes.h> - -#include "config.h" -#include "libswscale/rgb2rgb.h" -#include "libswscale/swscale.h" -#include "libswscale/swscale_internal.h" -#include "libavutil/attributes.h" -#include "libavutil/x86/asm.h" -#include "libavutil/cpu.h" - -#if HAVE_INLINE_ASM - -#define DITHER1XBPP // only for MMX - -/* hope these constant values are cache line aligned */ -DECLARE_ASM_CONST(8, uint64_t, mmx_00ffw) = 0x00ff00ff00ff00ffULL; -DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL; -DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL; -DECLARE_ASM_CONST(8, uint64_t, pb_e0) = 0xe0e0e0e0e0e0e0e0ULL; -DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL; -DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; - -//MMX versions -#if HAVE_MMX_INLINE -#undef RENAME -#undef COMPILE_TEMPLATE_MMXEXT -#define COMPILE_TEMPLATE_MMXEXT 0 -#define RENAME(a) a ## _mmx -#include "yuv2rgb_template.c" -#endif /* HAVE_MMX_INLINE */ - -// MMXEXT versions -#if HAVE_MMXEXT_INLINE -#undef RENAME -#undef COMPILE_TEMPLATE_MMXEXT -#define COMPILE_TEMPLATE_MMXEXT 1 -#define RENAME(a) a ## _mmxext -#include "yuv2rgb_template.c" -#endif /* HAVE_MMXEXT_INLINE */ - -#endif /* HAVE_INLINE_ASM */ - -av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) -{ -#if HAVE_MMX_INLINE - int cpu_flags = av_get_cpu_flags(); - -#if HAVE_MMXEXT_INLINE - if (cpu_flags & AV_CPU_FLAG_MMXEXT) { - switch (c->dstFormat) { - case AV_PIX_FMT_RGB24: - return yuv420_rgb24_mmxext; - case AV_PIX_FMT_BGR24: - return yuv420_bgr24_mmxext; - } - } -#endif - - if (cpu_flags & AV_CPU_FLAG_MMX) { - switch (c->dstFormat) { - case AV_PIX_FMT_RGB32: - if (c->srcFormat == AV_PIX_FMT_YUVA420P) { -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA - return yuva420_rgb32_mmx; -#endif - break; - } else - return yuv420_rgb32_mmx; - case AV_PIX_FMT_BGR32: - if (c->srcFormat == AV_PIX_FMT_YUVA420P) { -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA - return yuva420_bgr32_mmx; -#endif - break; - } else - return yuv420_bgr32_mmx; - case AV_PIX_FMT_RGB24: - return yuv420_rgb24_mmx; - case AV_PIX_FMT_BGR24: - return yuv420_bgr24_mmx; - case AV_PIX_FMT_RGB565: - return yuv420_rgb16_mmx; - case AV_PIX_FMT_RGB555: - return yuv420_rgb15_mmx; - } - } -#endif /* HAVE_MMX_INLINE */ - - return NULL; -} diff --git a/ffmpeg/libswscale/x86/yuv2rgb_template.c b/ffmpeg/libswscale/x86/yuv2rgb_template.c deleted file mode 100644 index c879102..0000000 --- a/ffmpeg/libswscale/x86/yuv2rgb_template.c +++ /dev/null @@ -1,451 +0,0 @@ -/* - * software YUV to RGB converter - * - * Copyright (C) 2001-2007 Michael Niedermayer - * (c) 2010 Konstantin Shishkov - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#undef MOVNTQ -#undef EMMS -#undef SFENCE - -#if COMPILE_TEMPLATE_MMXEXT -#define MOVNTQ "movntq" -#define SFENCE "sfence" -#else -#define MOVNTQ "movq" -#define SFENCE " # nop" -#endif - -#define REG_BLUE "0" -#define REG_RED "1" -#define REG_GREEN "2" -#define REG_ALPHA "3" - -#define YUV2RGB_LOOP(depth) \ - h_size = (c->dstW + 7) & ~7; \ - if (h_size * depth > FFABS(dstStride[0])) \ - h_size -= 8; \ - \ - vshift = c->srcFormat != AV_PIX_FMT_YUV422P; \ - \ - __asm__ volatile ("pxor %mm4, %mm4\n\t"); \ - for (y = 0; y < srcSliceH; y++) { \ - uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \ - const uint8_t *py = src[0] + y * srcStride[0]; \ - const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \ - const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \ - x86_reg index = -h_size / 2; \ - -#define YUV2RGB_INITIAL_LOAD \ - __asm__ volatile ( \ - "movq (%5, %0, 2), %%mm6\n\t" \ - "movd (%2, %0), %%mm0\n\t" \ - "movd (%3, %0), %%mm1\n\t" \ - "1: \n\t" \ - -/* YUV2RGB core - * Conversion is performed in usual way: - * R = Y' * Ycoef + Vred * V' - * G = Y' * Ycoef + Vgreen * V' + Ugreen * U' - * B = Y' * Ycoef + Ublue * U' - * - * where X' = X * 8 - Xoffset (multiplication is performed to increase - * precision a bit). - * Since it operates in YUV420 colorspace, Y component is additionally - * split into Y1 and Y2 for even and odd pixels. - * - * Input: - * mm0 - U (4 elems), mm1 - V (4 elems), mm6 - Y (8 elems), mm4 - zero register - * Output: - * mm1 - R, mm2 - G, mm0 - B - */ -#define YUV2RGB \ - /* convert Y, U, V into Y1', Y2', U', V' */ \ - "movq %%mm6, %%mm7\n\t" \ - "punpcklbw %%mm4, %%mm0\n\t" \ - "punpcklbw %%mm4, %%mm1\n\t" \ - "pand "MANGLE(mmx_00ffw)", %%mm6\n\t" \ - "psrlw $8, %%mm7\n\t" \ - "psllw $3, %%mm0\n\t" \ - "psllw $3, %%mm1\n\t" \ - "psllw $3, %%mm6\n\t" \ - "psllw $3, %%mm7\n\t" \ - "psubsw "U_OFFSET"(%4), %%mm0\n\t" \ - "psubsw "V_OFFSET"(%4), %%mm1\n\t" \ - "psubw "Y_OFFSET"(%4), %%mm6\n\t" \ - "psubw "Y_OFFSET"(%4), %%mm7\n\t" \ -\ - /* multiply by coefficients */ \ - "movq %%mm0, %%mm2\n\t" \ - "movq %%mm1, %%mm3\n\t" \ - "pmulhw "UG_COEFF"(%4), %%mm2\n\t" \ - "pmulhw "VG_COEFF"(%4), %%mm3\n\t" \ - "pmulhw "Y_COEFF" (%4), %%mm6\n\t" \ - "pmulhw "Y_COEFF" (%4), %%mm7\n\t" \ - "pmulhw "UB_COEFF"(%4), %%mm0\n\t" \ - "pmulhw "VR_COEFF"(%4), %%mm1\n\t" \ - "paddsw %%mm3, %%mm2\n\t" \ - /* now: mm0 = UB, mm1 = VR, mm2 = CG */ \ - /* mm6 = Y1, mm7 = Y2 */ \ -\ - /* produce RGB */ \ - "movq %%mm7, %%mm3\n\t" \ - "movq %%mm7, %%mm5\n\t" \ - "paddsw %%mm0, %%mm3\n\t" \ - "paddsw %%mm1, %%mm5\n\t" \ - "paddsw %%mm2, %%mm7\n\t" \ - "paddsw %%mm6, %%mm0\n\t" \ - "paddsw %%mm6, %%mm1\n\t" \ - "paddsw %%mm6, %%mm2\n\t" \ - -#define RGB_PACK_INTERLEAVE \ - /* pack and interleave even/odd pixels */ \ - "packuswb %%mm1, %%mm0\n\t" \ - "packuswb %%mm5, %%mm3\n\t" \ - "packuswb %%mm2, %%mm2\n\t" \ - "movq %%mm0, %%mm1\n\n" \ - "packuswb %%mm7, %%mm7\n\t" \ - "punpcklbw %%mm3, %%mm0\n\t" \ - "punpckhbw %%mm3, %%mm1\n\t" \ - "punpcklbw %%mm7, %%mm2\n\t" \ - -#define YUV2RGB_ENDLOOP(depth) \ - "movq 8 (%5, %0, 2), %%mm6\n\t" \ - "movd 4 (%3, %0), %%mm1\n\t" \ - "movd 4 (%2, %0), %%mm0\n\t" \ - "add $"AV_STRINGIFY(depth * 8)", %1\n\t" \ - "add $4, %0\n\t" \ - "js 1b\n\t" \ - -#define YUV2RGB_OPERANDS \ - : "+r" (index), "+r" (image) \ - : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \ - "r" (py - 2*index) \ - : "memory" \ - ); \ - } \ - -#define YUV2RGB_OPERANDS_ALPHA \ - : "+r" (index), "+r" (image) \ - : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \ - "r" (py - 2*index), "r" (pa - 2*index) \ - : "memory" \ - ); \ - } \ - -#define YUV2RGB_ENDFUNC \ - __asm__ volatile (SFENCE"\n\t" \ - "emms \n\t"); \ - return srcSliceH; \ - -#define IF0(x) -#define IF1(x) x - -#define RGB_PACK16(gmask, is15) \ - "pand "MANGLE(mmx_redmask)", %%mm0\n\t" \ - "pand "MANGLE(mmx_redmask)", %%mm1\n\t" \ - "movq %%mm2, %%mm3\n\t" \ - "psllw $"AV_STRINGIFY(3-is15)", %%mm2\n\t" \ - "psrlw $"AV_STRINGIFY(5+is15)", %%mm3\n\t" \ - "psrlw $3, %%mm0\n\t" \ - IF##is15("psrlw $1, %%mm1\n\t") \ - "pand "MANGLE(pb_e0)", %%mm2\n\t" \ - "pand "MANGLE(gmask)", %%mm3\n\t" \ - "por %%mm2, %%mm0\n\t" \ - "por %%mm3, %%mm1\n\t" \ - "movq %%mm0, %%mm2\n\t" \ - "punpcklbw %%mm1, %%mm0\n\t" \ - "punpckhbw %%mm1, %%mm2\n\t" \ - MOVNTQ " %%mm0, (%1)\n\t" \ - MOVNTQ " %%mm2, 8(%1)\n\t" \ - -#define DITHER_RGB \ - "paddusb "BLUE_DITHER"(%4), %%mm0\n\t" \ - "paddusb "GREEN_DITHER"(%4), %%mm2\n\t" \ - "paddusb "RED_DITHER"(%4), %%mm1\n\t" \ - -#if !COMPILE_TEMPLATE_MMXEXT -static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[], - int srcStride[], - int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - int y, h_size, vshift; - - YUV2RGB_LOOP(2) - -#ifdef DITHER1XBPP - c->blueDither = ff_dither8[y & 1]; - c->greenDither = ff_dither8[y & 1]; - c->redDither = ff_dither8[(y + 1) & 1]; -#endif - - YUV2RGB_INITIAL_LOAD - YUV2RGB - RGB_PACK_INTERLEAVE -#ifdef DITHER1XBPP - DITHER_RGB -#endif - RGB_PACK16(pb_03, 1) - - YUV2RGB_ENDLOOP(2) - YUV2RGB_OPERANDS - YUV2RGB_ENDFUNC -} - -static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[], - int srcStride[], - int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - int y, h_size, vshift; - - YUV2RGB_LOOP(2) - -#ifdef DITHER1XBPP - c->blueDither = ff_dither8[y & 1]; - c->greenDither = ff_dither4[y & 1]; - c->redDither = ff_dither8[(y + 1) & 1]; -#endif - - YUV2RGB_INITIAL_LOAD - YUV2RGB - RGB_PACK_INTERLEAVE -#ifdef DITHER1XBPP - DITHER_RGB -#endif - RGB_PACK16(pb_07, 0) - - YUV2RGB_ENDLOOP(2) - YUV2RGB_OPERANDS - YUV2RGB_ENDFUNC -} -#endif /* !COMPILE_TEMPLATE_MMXEXT */ - -#define RGB_PACK24(blue, red)\ - "packuswb %%mm3, %%mm0 \n" /* R0 R2 R4 R6 R1 R3 R5 R7 */\ - "packuswb %%mm5, %%mm1 \n" /* B0 B2 B4 B6 B1 B3 B5 B7 */\ - "packuswb %%mm7, %%mm2 \n" /* G0 G2 G4 G6 G1 G3 G5 G7 */\ - "movq %%mm"red", %%mm3 \n"\ - "movq %%mm"blue", %%mm6 \n"\ - "psrlq $32, %%mm"red" \n" /* R1 R3 R5 R7 */\ - "punpcklbw %%mm2, %%mm3 \n" /* R0 G0 R2 G2 R4 G4 R6 G6 */\ - "punpcklbw %%mm"red", %%mm6 \n" /* B0 R1 B2 R3 B4 R5 B6 R7 */\ - "movq %%mm3, %%mm5 \n"\ - "punpckhbw %%mm"blue", %%mm2 \n" /* G1 B1 G3 B3 G5 B5 G7 B7Â */\ - "punpcklwd %%mm6, %%mm3 \n" /* R0 G0 B0 R1 R2 G2 B2 R3 */\ - "punpckhwd %%mm6, %%mm5 \n" /* R4 G4 B4 R5 R6 G6 B6 R7 */\ - RGB_PACK24_B - -#if COMPILE_TEMPLATE_MMXEXT -DECLARE_ASM_CONST(8, int16_t, mask1101[4]) = {-1,-1, 0,-1}; -DECLARE_ASM_CONST(8, int16_t, mask0010[4]) = { 0, 0,-1, 0}; -DECLARE_ASM_CONST(8, int16_t, mask0110[4]) = { 0,-1,-1, 0}; -DECLARE_ASM_CONST(8, int16_t, mask1001[4]) = {-1, 0, 0,-1}; -DECLARE_ASM_CONST(8, int16_t, mask0100[4]) = { 0,-1, 0, 0}; -#undef RGB_PACK24_B -#define RGB_PACK24_B\ - "pshufw $0xc6, %%mm2, %%mm1 \n"\ - "pshufw $0x84, %%mm3, %%mm6 \n"\ - "pshufw $0x38, %%mm5, %%mm7 \n"\ - "pand "MANGLE(mask1101)", %%mm6 \n" /* R0 G0 B0 R1 -- -- R2 G2 */\ - "movq %%mm1, %%mm0 \n"\ - "pand "MANGLE(mask0110)", %%mm7 \n" /* -- -- R6 G6 B6 R7 -- -- */\ - "movq %%mm1, %%mm2 \n"\ - "pand "MANGLE(mask0100)", %%mm1 \n" /* -- -- G3 B3 -- -- -- -- */\ - "psrlq $48, %%mm3 \n" /* B2 R3 -- -- -- -- -- -- */\ - "pand "MANGLE(mask0010)", %%mm0 \n" /* -- -- -- -- G1 B1 -- -- */\ - "psllq $32, %%mm5 \n" /* -- -- -- -- R4 G4 B4 R5 */\ - "pand "MANGLE(mask1001)", %%mm2 \n" /* G5 B5 -- -- -- -- G7 B7 */\ - "por %%mm3, %%mm1 \n"\ - "por %%mm6, %%mm0 \n"\ - "por %%mm5, %%mm1 \n"\ - "por %%mm7, %%mm2 \n"\ - MOVNTQ" %%mm0, (%1) \n"\ - MOVNTQ" %%mm1, 8(%1) \n"\ - MOVNTQ" %%mm2, 16(%1) \n"\ - -#else -#undef RGB_PACK24_B -#define RGB_PACK24_B\ - "movd %%mm3, (%1) \n" /* R0 G0 B0 R1 */\ - "movd %%mm2, 4(%1) \n" /* G1 B1 */\ - "psrlq $32, %%mm3 \n"\ - "psrlq $16, %%mm2 \n"\ - "movd %%mm3, 6(%1) \n" /* R2 G2 B2 R3 */\ - "movd %%mm2, 10(%1) \n" /* G3 B3 */\ - "psrlq $16, %%mm2 \n"\ - "movd %%mm5, 12(%1) \n" /* R4 G4 B4 R5 */\ - "movd %%mm2, 16(%1) \n" /* G5 B5 */\ - "psrlq $32, %%mm5 \n"\ - "movd %%mm2, 20(%1) \n" /* -- -- G7 B7 */\ - "movd %%mm5, 18(%1) \n" /* R6 G6 B6 R7 */\ - -#endif - -static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[], - int srcStride[], - int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - int y, h_size, vshift; - - YUV2RGB_LOOP(3) - - YUV2RGB_INITIAL_LOAD - YUV2RGB - RGB_PACK24(REG_BLUE, REG_RED) - - YUV2RGB_ENDLOOP(3) - YUV2RGB_OPERANDS - YUV2RGB_ENDFUNC -} - -static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[], - int srcStride[], - int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - int y, h_size, vshift; - - YUV2RGB_LOOP(3) - - YUV2RGB_INITIAL_LOAD - YUV2RGB - RGB_PACK24(REG_RED, REG_BLUE) - - YUV2RGB_ENDLOOP(3) - YUV2RGB_OPERANDS - YUV2RGB_ENDFUNC -} - - -#define SET_EMPTY_ALPHA \ - "pcmpeqd %%mm"REG_ALPHA", %%mm"REG_ALPHA"\n\t" /* set alpha to 0xFF */ \ - -#define LOAD_ALPHA \ - "movq (%6, %0, 2), %%mm"REG_ALPHA"\n\t" \ - -#define RGB_PACK32(red, green, blue, alpha) \ - "movq %%mm"blue", %%mm5\n\t" \ - "movq %%mm"red", %%mm6\n\t" \ - "punpckhbw %%mm"green", %%mm5\n\t" \ - "punpcklbw %%mm"green", %%mm"blue"\n\t" \ - "punpckhbw %%mm"alpha", %%mm6\n\t" \ - "punpcklbw %%mm"alpha", %%mm"red"\n\t" \ - "movq %%mm"blue", %%mm"green"\n\t" \ - "movq %%mm5, %%mm"alpha"\n\t" \ - "punpcklwd %%mm"red", %%mm"blue"\n\t" \ - "punpckhwd %%mm"red", %%mm"green"\n\t" \ - "punpcklwd %%mm6, %%mm5\n\t" \ - "punpckhwd %%mm6, %%mm"alpha"\n\t" \ - MOVNTQ " %%mm"blue", 0(%1)\n\t" \ - MOVNTQ " %%mm"green", 8(%1)\n\t" \ - MOVNTQ " %%mm5, 16(%1)\n\t" \ - MOVNTQ " %%mm"alpha", 24(%1)\n\t" \ - -#if !COMPILE_TEMPLATE_MMXEXT -static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[], - int srcStride[], - int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - int y, h_size, vshift; - - YUV2RGB_LOOP(4) - - YUV2RGB_INITIAL_LOAD - YUV2RGB - RGB_PACK_INTERLEAVE - SET_EMPTY_ALPHA - RGB_PACK32(REG_RED, REG_GREEN, REG_BLUE, REG_ALPHA) - - YUV2RGB_ENDLOOP(4) - YUV2RGB_OPERANDS - YUV2RGB_ENDFUNC -} - -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA -static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[], - int srcStride[], - int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - int y, h_size, vshift; - - YUV2RGB_LOOP(4) - - const uint8_t *pa = src[3] + y * srcStride[3]; - YUV2RGB_INITIAL_LOAD - YUV2RGB - RGB_PACK_INTERLEAVE - LOAD_ALPHA - RGB_PACK32(REG_RED, REG_GREEN, REG_BLUE, REG_ALPHA) - - YUV2RGB_ENDLOOP(4) - YUV2RGB_OPERANDS_ALPHA - YUV2RGB_ENDFUNC -} -#endif - -static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[], - int srcStride[], - int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - int y, h_size, vshift; - - YUV2RGB_LOOP(4) - - YUV2RGB_INITIAL_LOAD - YUV2RGB - RGB_PACK_INTERLEAVE - SET_EMPTY_ALPHA - RGB_PACK32(REG_BLUE, REG_GREEN, REG_RED, REG_ALPHA) - - YUV2RGB_ENDLOOP(4) - YUV2RGB_OPERANDS - YUV2RGB_ENDFUNC -} - -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA -static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[], - int srcStride[], - int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - int y, h_size, vshift; - - YUV2RGB_LOOP(4) - - const uint8_t *pa = src[3] + y * srcStride[3]; - YUV2RGB_INITIAL_LOAD - YUV2RGB - RGB_PACK_INTERLEAVE - LOAD_ALPHA - RGB_PACK32(REG_BLUE, REG_GREEN, REG_RED, REG_ALPHA) - - YUV2RGB_ENDLOOP(4) - YUV2RGB_OPERANDS_ALPHA - YUV2RGB_ENDFUNC -} -#endif - -#endif /* !COMPILE_TEMPLATE_MMXEXT */ diff --git a/ffmpeg/libswscale/yuv2rgb.c b/ffmpeg/libswscale/yuv2rgb.c deleted file mode 100644 index 77c56a9..0000000 --- a/ffmpeg/libswscale/yuv2rgb.c +++ /dev/null @@ -1,920 +0,0 @@ -/* - * software YUV to RGB converter - * - * Copyright (C) 2009 Konstantin Shishkov - * - * 1,4,8bpp support and context / deglobalize stuff - * by Michael Niedermayer (michaelni@gmx.at) - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <stdio.h> -#include <stdlib.h> -#include <inttypes.h> - -#include "libavutil/cpu.h" -#include "libavutil/bswap.h" -#include "config.h" -#include "rgb2rgb.h" -#include "swscale.h" -#include "swscale_internal.h" -#include "libavutil/pixdesc.h" - -const int32_t ff_yuv2rgb_coeffs[8][4] = { - { 117504, 138453, 13954, 34903 }, /* no sequence_display_extension */ - { 117504, 138453, 13954, 34903 }, /* ITU-R Rec. 709 (1990) */ - { 104597, 132201, 25675, 53279 }, /* unspecified */ - { 104597, 132201, 25675, 53279 }, /* reserved */ - { 104448, 132798, 24759, 53109 }, /* FCC */ - { 104597, 132201, 25675, 53279 }, /* ITU-R Rec. 624-4 System B, G */ - { 104597, 132201, 25675, 53279 }, /* SMPTE 170M */ - { 117579, 136230, 16907, 35559 } /* SMPTE 240M (1987) */ -}; - -const int *sws_getCoefficients(int colorspace) -{ - if (colorspace > 7 || colorspace < 0) - colorspace = SWS_CS_DEFAULT; - return ff_yuv2rgb_coeffs[colorspace]; -} - -#define LOADCHROMA(i) \ - U = pu[i]; \ - V = pv[i]; \ - r = (void *)c->table_rV[V+YUVRGB_TABLE_HEADROOM]; \ - g = (void *)(c->table_gU[U+YUVRGB_TABLE_HEADROOM] + c->table_gV[V+YUVRGB_TABLE_HEADROOM]); \ - b = (void *)c->table_bU[U+YUVRGB_TABLE_HEADROOM]; - -#define PUTRGB(dst, src, i) \ - Y = src[2 * i]; \ - dst[2 * i] = r[Y] + g[Y] + b[Y]; \ - Y = src[2 * i + 1]; \ - dst[2 * i + 1] = r[Y] + g[Y] + b[Y]; - -#define PUTRGB24(dst, src, i) \ - Y = src[2 * i]; \ - dst[6 * i + 0] = r[Y]; \ - dst[6 * i + 1] = g[Y]; \ - dst[6 * i + 2] = b[Y]; \ - Y = src[2 * i + 1]; \ - dst[6 * i + 3] = r[Y]; \ - dst[6 * i + 4] = g[Y]; \ - dst[6 * i + 5] = b[Y]; - -#define PUTBGR24(dst, src, i) \ - Y = src[2 * i]; \ - dst[6 * i + 0] = b[Y]; \ - dst[6 * i + 1] = g[Y]; \ - dst[6 * i + 2] = r[Y]; \ - Y = src[2 * i + 1]; \ - dst[6 * i + 3] = b[Y]; \ - dst[6 * i + 4] = g[Y]; \ - dst[6 * i + 5] = r[Y]; - -#define PUTRGBA(dst, ysrc, asrc, i, s) \ - Y = ysrc[2 * i]; \ - dst[2 * i] = r[Y] + g[Y] + b[Y] + (asrc[2 * i] << s); \ - Y = ysrc[2 * i + 1]; \ - dst[2 * i + 1] = r[Y] + g[Y] + b[Y] + (asrc[2 * i + 1] << s); - -#define PUTRGB48(dst, src, i) \ - Y = src[ 2 * i]; \ - dst[12 * i + 0] = dst[12 * i + 1] = r[Y]; \ - dst[12 * i + 2] = dst[12 * i + 3] = g[Y]; \ - dst[12 * i + 4] = dst[12 * i + 5] = b[Y]; \ - Y = src[ 2 * i + 1]; \ - dst[12 * i + 6] = dst[12 * i + 7] = r[Y]; \ - dst[12 * i + 8] = dst[12 * i + 9] = g[Y]; \ - dst[12 * i + 10] = dst[12 * i + 11] = b[Y]; - -#define PUTBGR48(dst, src, i) \ - Y = src[2 * i]; \ - dst[12 * i + 0] = dst[12 * i + 1] = b[Y]; \ - dst[12 * i + 2] = dst[12 * i + 3] = g[Y]; \ - dst[12 * i + 4] = dst[12 * i + 5] = r[Y]; \ - Y = src[2 * i + 1]; \ - dst[12 * i + 6] = dst[12 * i + 7] = b[Y]; \ - dst[12 * i + 8] = dst[12 * i + 9] = g[Y]; \ - dst[12 * i + 10] = dst[12 * i + 11] = r[Y]; - -#define YUV2RGBFUNC(func_name, dst_type, alpha) \ - static int func_name(SwsContext *c, const uint8_t *src[], \ - int srcStride[], int srcSliceY, int srcSliceH, \ - uint8_t *dst[], int dstStride[]) \ - { \ - int y; \ - \ - if (!alpha && c->srcFormat == AV_PIX_FMT_YUV422P) { \ - srcStride[1] *= 2; \ - srcStride[2] *= 2; \ - } \ - for (y = 0; y < srcSliceH; y += 2) { \ - dst_type *dst_1 = \ - (dst_type *)(dst[0] + (y + srcSliceY) * dstStride[0]); \ - dst_type *dst_2 = \ - (dst_type *)(dst[0] + (y + srcSliceY + 1) * dstStride[0]); \ - dst_type av_unused *r, *g, *b; \ - const uint8_t *py_1 = src[0] + y * srcStride[0]; \ - const uint8_t *py_2 = py_1 + srcStride[0]; \ - const uint8_t *pu = src[1] + (y >> 1) * srcStride[1]; \ - const uint8_t *pv = src[2] + (y >> 1) * srcStride[2]; \ - const uint8_t av_unused *pa_1, *pa_2; \ - unsigned int h_size = c->dstW >> 3; \ - if (alpha) { \ - pa_1 = src[3] + y * srcStride[3]; \ - pa_2 = pa_1 + srcStride[3]; \ - } \ - while (h_size--) { \ - int av_unused U, V, Y; \ - -#define ENDYUV2RGBLINE(dst_delta, ss) \ - pu += 4 >> ss; \ - pv += 4 >> ss; \ - py_1 += 8 >> ss; \ - py_2 += 8 >> ss; \ - dst_1 += dst_delta >> ss; \ - dst_2 += dst_delta >> ss; \ - } \ - if (c->dstW & (4 >> ss)) { \ - int av_unused Y, U, V; \ - -#define ENDYUV2RGBFUNC() \ - } \ - } \ - return srcSliceH; \ - } - -#define CLOSEYUV2RGBFUNC(dst_delta) \ - ENDYUV2RGBLINE(dst_delta, 0) \ - ENDYUV2RGBFUNC() - -YUV2RGBFUNC(yuv2rgb_c_48, uint8_t, 0) - LOADCHROMA(0); - PUTRGB48(dst_1, py_1, 0); - PUTRGB48(dst_2, py_2, 0); - - LOADCHROMA(1); - PUTRGB48(dst_2, py_2, 1); - PUTRGB48(dst_1, py_1, 1); - - LOADCHROMA(2); - PUTRGB48(dst_1, py_1, 2); - PUTRGB48(dst_2, py_2, 2); - - LOADCHROMA(3); - PUTRGB48(dst_2, py_2, 3); - PUTRGB48(dst_1, py_1, 3); -ENDYUV2RGBLINE(48, 0) - LOADCHROMA(0); - PUTRGB48(dst_1, py_1, 0); - PUTRGB48(dst_2, py_2, 0); - - LOADCHROMA(1); - PUTRGB48(dst_2, py_2, 1); - PUTRGB48(dst_1, py_1, 1); -ENDYUV2RGBLINE(48, 1) - LOADCHROMA(0); - PUTRGB48(dst_1, py_1, 0); - PUTRGB48(dst_2, py_2, 0); -ENDYUV2RGBFUNC() - -YUV2RGBFUNC(yuv2rgb_c_bgr48, uint8_t, 0) - LOADCHROMA(0); - PUTBGR48(dst_1, py_1, 0); - PUTBGR48(dst_2, py_2, 0); - - LOADCHROMA(1); - PUTBGR48(dst_2, py_2, 1); - PUTBGR48(dst_1, py_1, 1); - - LOADCHROMA(2); - PUTBGR48(dst_1, py_1, 2); - PUTBGR48(dst_2, py_2, 2); - - LOADCHROMA(3); - PUTBGR48(dst_2, py_2, 3); - PUTBGR48(dst_1, py_1, 3); -ENDYUV2RGBLINE(48, 0) - LOADCHROMA(0); - PUTBGR48(dst_1, py_1, 0); - PUTBGR48(dst_2, py_2, 0); - - LOADCHROMA(1); - PUTBGR48(dst_2, py_2, 1); - PUTBGR48(dst_1, py_1, 1); -ENDYUV2RGBLINE(48, 1) - LOADCHROMA(0); - PUTBGR48(dst_1, py_1, 0); - PUTBGR48(dst_2, py_2, 0); -ENDYUV2RGBFUNC() - -YUV2RGBFUNC(yuv2rgb_c_32, uint32_t, 0) - LOADCHROMA(0); - PUTRGB(dst_1, py_1, 0); - PUTRGB(dst_2, py_2, 0); - - LOADCHROMA(1); - PUTRGB(dst_2, py_2, 1); - PUTRGB(dst_1, py_1, 1); - - LOADCHROMA(2); - PUTRGB(dst_1, py_1, 2); - PUTRGB(dst_2, py_2, 2); - - LOADCHROMA(3); - PUTRGB(dst_2, py_2, 3); - PUTRGB(dst_1, py_1, 3); -ENDYUV2RGBLINE(8, 0) - LOADCHROMA(0); - PUTRGB(dst_1, py_1, 0); - PUTRGB(dst_2, py_2, 0); - - LOADCHROMA(1); - PUTRGB(dst_2, py_2, 1); - PUTRGB(dst_1, py_1, 1); -ENDYUV2RGBLINE(8, 1) - LOADCHROMA(0); - PUTRGB(dst_1, py_1, 0); - PUTRGB(dst_2, py_2, 0); -ENDYUV2RGBFUNC() - -YUV2RGBFUNC(yuva2rgba_c, uint32_t, 1) - LOADCHROMA(0); - PUTRGBA(dst_1, py_1, pa_1, 0, 24); - PUTRGBA(dst_2, py_2, pa_2, 0, 24); - - LOADCHROMA(1); - PUTRGBA(dst_2, py_2, pa_2, 1, 24); - PUTRGBA(dst_1, py_1, pa_1, 1, 24); - - LOADCHROMA(2); - PUTRGBA(dst_1, py_1, pa_1, 2, 24); - PUTRGBA(dst_2, py_2, pa_2, 2, 24); - - LOADCHROMA(3); - PUTRGBA(dst_2, py_2, pa_2, 3, 24); - PUTRGBA(dst_1, py_1, pa_1, 3, 24); - pa_1 += 8; - pa_2 += 8; -ENDYUV2RGBLINE(8, 0) - LOADCHROMA(0); - PUTRGBA(dst_1, py_1, pa_1, 0, 24); - PUTRGBA(dst_2, py_2, pa_2, 0, 24); - - LOADCHROMA(1); - PUTRGBA(dst_2, py_2, pa_2, 1, 24); - PUTRGBA(dst_1, py_1, pa_1, 1, 24); - pa_1 += 4; - pa_2 += 4; -ENDYUV2RGBLINE(8, 1) - LOADCHROMA(0); - PUTRGBA(dst_1, py_1, pa_1, 0, 24); - PUTRGBA(dst_2, py_2, pa_2, 0, 24); -ENDYUV2RGBFUNC() - -YUV2RGBFUNC(yuva2argb_c, uint32_t, 1) - LOADCHROMA(0); - PUTRGBA(dst_1, py_1, pa_1, 0, 0); - PUTRGBA(dst_2, py_2, pa_2, 0, 0); - - LOADCHROMA(1); - PUTRGBA(dst_2, py_2, pa_2, 1, 0); - PUTRGBA(dst_1, py_1, pa_1, 1, 0); - - LOADCHROMA(2); - PUTRGBA(dst_1, py_1, pa_1, 2, 0); - PUTRGBA(dst_2, py_2, pa_2, 2, 0); - - LOADCHROMA(3); - PUTRGBA(dst_2, py_2, pa_2, 3, 0); - PUTRGBA(dst_1, py_1, pa_1, 3, 0); - pa_1 += 8; - pa_2 += 8; -ENDYUV2RGBLINE(8, 0) - LOADCHROMA(0); - PUTRGBA(dst_1, py_1, pa_1, 0, 0); - PUTRGBA(dst_2, py_2, pa_2, 0, 0); - - LOADCHROMA(1); - PUTRGBA(dst_2, py_2, pa_2, 1, 0); - PUTRGBA(dst_1, py_1, pa_1, 1, 0); - pa_1 += 4; - pa_2 += 4; -ENDYUV2RGBLINE(8, 1) - LOADCHROMA(0); - PUTRGBA(dst_1, py_1, pa_1, 0, 0); - PUTRGBA(dst_2, py_2, pa_2, 0, 0); -ENDYUV2RGBFUNC() - -YUV2RGBFUNC(yuv2rgb_c_24_rgb, uint8_t, 0) - LOADCHROMA(0); - PUTRGB24(dst_1, py_1, 0); - PUTRGB24(dst_2, py_2, 0); - - LOADCHROMA(1); - PUTRGB24(dst_2, py_2, 1); - PUTRGB24(dst_1, py_1, 1); - - LOADCHROMA(2); - PUTRGB24(dst_1, py_1, 2); - PUTRGB24(dst_2, py_2, 2); - - LOADCHROMA(3); - PUTRGB24(dst_2, py_2, 3); - PUTRGB24(dst_1, py_1, 3); -ENDYUV2RGBLINE(24, 0) - LOADCHROMA(0); - PUTRGB24(dst_1, py_1, 0); - PUTRGB24(dst_2, py_2, 0); - - LOADCHROMA(1); - PUTRGB24(dst_2, py_2, 1); - PUTRGB24(dst_1, py_1, 1); -ENDYUV2RGBLINE(24, 1) - LOADCHROMA(0); - PUTRGB24(dst_1, py_1, 0); - PUTRGB24(dst_2, py_2, 0); -ENDYUV2RGBFUNC() - -// only trivial mods from yuv2rgb_c_24_rgb -YUV2RGBFUNC(yuv2rgb_c_24_bgr, uint8_t, 0) - LOADCHROMA(0); - PUTBGR24(dst_1, py_1, 0); - PUTBGR24(dst_2, py_2, 0); - - LOADCHROMA(1); - PUTBGR24(dst_2, py_2, 1); - PUTBGR24(dst_1, py_1, 1); - - LOADCHROMA(2); - PUTBGR24(dst_1, py_1, 2); - PUTBGR24(dst_2, py_2, 2); - - LOADCHROMA(3); - PUTBGR24(dst_2, py_2, 3); - PUTBGR24(dst_1, py_1, 3); -ENDYUV2RGBLINE(24, 0) - LOADCHROMA(0); - PUTBGR24(dst_1, py_1, 0); - PUTBGR24(dst_2, py_2, 0); - - LOADCHROMA(1); - PUTBGR24(dst_2, py_2, 1); - PUTBGR24(dst_1, py_1, 1); -ENDYUV2RGBLINE(24, 1) - LOADCHROMA(0); - PUTBGR24(dst_1, py_1, 0); - PUTBGR24(dst_2, py_2, 0); -ENDYUV2RGBFUNC() - -YUV2RGBFUNC(yuv2rgb_c_16_ordered_dither, uint16_t, 0) - const uint8_t *d16 = ff_dither_2x2_8[y & 1]; - const uint8_t *e16 = ff_dither_2x2_4[y & 1]; - const uint8_t *f16 = ff_dither_2x2_8[(y & 1)^1]; - -#define PUTRGB16(dst, src, i, o) \ - Y = src[2 * i]; \ - dst[2 * i] = r[Y + d16[0 + o]] + \ - g[Y + e16[0 + o]] + \ - b[Y + f16[0 + o]]; \ - Y = src[2 * i + 1]; \ - dst[2 * i + 1] = r[Y + d16[1 + o]] + \ - g[Y + e16[1 + o]] + \ - b[Y + f16[1 + o]]; - LOADCHROMA(0); - PUTRGB16(dst_1, py_1, 0, 0); - PUTRGB16(dst_2, py_2, 0, 0 + 8); - - LOADCHROMA(1); - PUTRGB16(dst_2, py_2, 1, 2 + 8); - PUTRGB16(dst_1, py_1, 1, 2); - - LOADCHROMA(2); - PUTRGB16(dst_1, py_1, 2, 4); - PUTRGB16(dst_2, py_2, 2, 4 + 8); - - LOADCHROMA(3); - PUTRGB16(dst_2, py_2, 3, 6 + 8); - PUTRGB16(dst_1, py_1, 3, 6); -CLOSEYUV2RGBFUNC(8) - -YUV2RGBFUNC(yuv2rgb_c_15_ordered_dither, uint16_t, 0) - const uint8_t *d16 = ff_dither_2x2_8[y & 1]; - const uint8_t *e16 = ff_dither_2x2_8[(y & 1)^1]; - -#define PUTRGB15(dst, src, i, o) \ - Y = src[2 * i]; \ - dst[2 * i] = r[Y + d16[0 + o]] + \ - g[Y + d16[1 + o]] + \ - b[Y + e16[0 + o]]; \ - Y = src[2 * i + 1]; \ - dst[2 * i + 1] = r[Y + d16[1 + o]] + \ - g[Y + d16[0 + o]] + \ - b[Y + e16[1 + o]]; - LOADCHROMA(0); - PUTRGB15(dst_1, py_1, 0, 0); - PUTRGB15(dst_2, py_2, 0, 0 + 8); - - LOADCHROMA(1); - PUTRGB15(dst_2, py_2, 1, 2 + 8); - PUTRGB15(dst_1, py_1, 1, 2); - - LOADCHROMA(2); - PUTRGB15(dst_1, py_1, 2, 4); - PUTRGB15(dst_2, py_2, 2, 4 + 8); - - LOADCHROMA(3); - PUTRGB15(dst_2, py_2, 3, 6 + 8); - PUTRGB15(dst_1, py_1, 3, 6); -CLOSEYUV2RGBFUNC(8) - -// r, g, b, dst_1, dst_2 -YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0) - const uint8_t *d16 = ff_dither_4x4_16[y & 3]; - -#define PUTRGB12(dst, src, i, o) \ - Y = src[2 * i]; \ - dst[2 * i] = r[Y + d16[0 + o]] + \ - g[Y + d16[0 + o]] + \ - b[Y + d16[0 + o]]; \ - Y = src[2 * i + 1]; \ - dst[2 * i + 1] = r[Y + d16[1 + o]] + \ - g[Y + d16[1 + o]] + \ - b[Y + d16[1 + o]]; - - LOADCHROMA(0); - PUTRGB12(dst_1, py_1, 0, 0); - PUTRGB12(dst_2, py_2, 0, 0 + 8); - - LOADCHROMA(1); - PUTRGB12(dst_2, py_2, 1, 2 + 8); - PUTRGB12(dst_1, py_1, 1, 2); - - LOADCHROMA(2); - PUTRGB12(dst_1, py_1, 2, 4); - PUTRGB12(dst_2, py_2, 2, 4 + 8); - - LOADCHROMA(3); - PUTRGB12(dst_2, py_2, 3, 6 + 8); - PUTRGB12(dst_1, py_1, 3, 6); -CLOSEYUV2RGBFUNC(8) - -// r, g, b, dst_1, dst_2 -YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0) - const uint8_t *d32 = ff_dither_8x8_32[y & 7]; - const uint8_t *d64 = ff_dither_8x8_73[y & 7]; - -#define PUTRGB8(dst, src, i, o) \ - Y = src[2 * i]; \ - dst[2 * i] = r[Y + d32[0 + o]] + \ - g[Y + d32[0 + o]] + \ - b[Y + d64[0 + o]]; \ - Y = src[2 * i + 1]; \ - dst[2 * i + 1] = r[Y + d32[1 + o]] + \ - g[Y + d32[1 + o]] + \ - b[Y + d64[1 + o]]; - - LOADCHROMA(0); - PUTRGB8(dst_1, py_1, 0, 0); - PUTRGB8(dst_2, py_2, 0, 0 + 8); - - LOADCHROMA(1); - PUTRGB8(dst_2, py_2, 1, 2 + 8); - PUTRGB8(dst_1, py_1, 1, 2); - - LOADCHROMA(2); - PUTRGB8(dst_1, py_1, 2, 4); - PUTRGB8(dst_2, py_2, 2, 4 + 8); - - LOADCHROMA(3); - PUTRGB8(dst_2, py_2, 3, 6 + 8); - PUTRGB8(dst_1, py_1, 3, 6); -CLOSEYUV2RGBFUNC(8) - -YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0) - const uint8_t * d64 = ff_dither_8x8_73[y & 7]; - const uint8_t *d128 = ff_dither_8x8_220[y & 7]; - int acc; - -#define PUTRGB4D(dst, src, i, o) \ - Y = src[2 * i]; \ - acc = r[Y + d128[0 + o]] + \ - g[Y + d64[0 + o]] + \ - b[Y + d128[0 + o]]; \ - Y = src[2 * i + 1]; \ - acc |= (r[Y + d128[1 + o]] + \ - g[Y + d64[1 + o]] + \ - b[Y + d128[1 + o]]) << 4; \ - dst[i] = acc; - - LOADCHROMA(0); - PUTRGB4D(dst_1, py_1, 0, 0); - PUTRGB4D(dst_2, py_2, 0, 0 + 8); - - LOADCHROMA(1); - PUTRGB4D(dst_2, py_2, 1, 2 + 8); - PUTRGB4D(dst_1, py_1, 1, 2); - - LOADCHROMA(2); - PUTRGB4D(dst_1, py_1, 2, 4); - PUTRGB4D(dst_2, py_2, 2, 4 + 8); - - LOADCHROMA(3); - PUTRGB4D(dst_2, py_2, 3, 6 + 8); - PUTRGB4D(dst_1, py_1, 3, 6); -CLOSEYUV2RGBFUNC(4) - -YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0) - const uint8_t *d64 = ff_dither_8x8_73[y & 7]; - const uint8_t *d128 = ff_dither_8x8_220[y & 7]; - -#define PUTRGB4DB(dst, src, i, o) \ - Y = src[2 * i]; \ - dst[2 * i] = r[Y + d128[0 + o]] + \ - g[Y + d64[0 + o]] + \ - b[Y + d128[0 + o]]; \ - Y = src[2 * i + 1]; \ - dst[2 * i + 1] = r[Y + d128[1 + o]] + \ - g[Y + d64[1 + o]] + \ - b[Y + d128[1 + o]]; - - LOADCHROMA(0); - PUTRGB4DB(dst_1, py_1, 0, 0); - PUTRGB4DB(dst_2, py_2, 0, 0 + 8); - - LOADCHROMA(1); - PUTRGB4DB(dst_2, py_2, 1, 2 + 8); - PUTRGB4DB(dst_1, py_1, 1, 2); - - LOADCHROMA(2); - PUTRGB4DB(dst_1, py_1, 2, 4); - PUTRGB4DB(dst_2, py_2, 2, 4 + 8); - - LOADCHROMA(3); - PUTRGB4DB(dst_2, py_2, 3, 6 + 8); - PUTRGB4DB(dst_1, py_1, 3, 6); -CLOSEYUV2RGBFUNC(8) - -YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0) - const uint8_t *d128 = ff_dither_8x8_220[y & 7]; - char out_1 = 0, out_2 = 0; - g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM]; - -#define PUTRGB1(out, src, i, o) \ - Y = src[2 * i]; \ - out += out + g[Y + d128[0 + o]]; \ - Y = src[2 * i + 1]; \ - out += out + g[Y + d128[1 + o]]; - - PUTRGB1(out_1, py_1, 0, 0); - PUTRGB1(out_2, py_2, 0, 0 + 8); - - PUTRGB1(out_2, py_2, 1, 2 + 8); - PUTRGB1(out_1, py_1, 1, 2); - - PUTRGB1(out_1, py_1, 2, 4); - PUTRGB1(out_2, py_2, 2, 4 + 8); - - PUTRGB1(out_2, py_2, 3, 6 + 8); - PUTRGB1(out_1, py_1, 3, 6); - - dst_1[0] = out_1; - dst_2[0] = out_2; -CLOSEYUV2RGBFUNC(1) - -SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c) -{ - SwsFunc t = NULL; - - if (ARCH_BFIN) - t = ff_yuv2rgb_init_bfin(c); - if (ARCH_PPC) - t = ff_yuv2rgb_init_ppc(c); - if (HAVE_VIS) - t = ff_yuv2rgb_init_vis(c); - if (ARCH_X86) - t = ff_yuv2rgb_init_x86(c); - - if (t) - return t; - - av_log(c, AV_LOG_WARNING, - "No accelerated colorspace conversion found from %s to %s.\n", - av_get_pix_fmt_name(c->srcFormat), av_get_pix_fmt_name(c->dstFormat)); - - switch (c->dstFormat) { - case AV_PIX_FMT_BGR48BE: - case AV_PIX_FMT_BGR48LE: - return yuv2rgb_c_bgr48; - case AV_PIX_FMT_RGB48BE: - case AV_PIX_FMT_RGB48LE: - return yuv2rgb_c_48; - case AV_PIX_FMT_ARGB: - case AV_PIX_FMT_ABGR: - if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) - return yuva2argb_c; - case AV_PIX_FMT_RGBA: - case AV_PIX_FMT_BGRA: - return (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) ? yuva2rgba_c : yuv2rgb_c_32; - case AV_PIX_FMT_RGB24: - return yuv2rgb_c_24_rgb; - case AV_PIX_FMT_BGR24: - return yuv2rgb_c_24_bgr; - case AV_PIX_FMT_RGB565: - case AV_PIX_FMT_BGR565: - return yuv2rgb_c_16_ordered_dither; - case AV_PIX_FMT_RGB555: - case AV_PIX_FMT_BGR555: - return yuv2rgb_c_15_ordered_dither; - case AV_PIX_FMT_RGB444: - case AV_PIX_FMT_BGR444: - return yuv2rgb_c_12_ordered_dither; - case AV_PIX_FMT_RGB8: - case AV_PIX_FMT_BGR8: - return yuv2rgb_c_8_ordered_dither; - case AV_PIX_FMT_RGB4: - case AV_PIX_FMT_BGR4: - return yuv2rgb_c_4_ordered_dither; - case AV_PIX_FMT_RGB4_BYTE: - case AV_PIX_FMT_BGR4_BYTE: - return yuv2rgb_c_4b_ordered_dither; - case AV_PIX_FMT_MONOBLACK: - return yuv2rgb_c_1_ordered_dither; - } - return NULL; -} - -static void fill_table(uint8_t* table[256 + 2*YUVRGB_TABLE_HEADROOM], const int elemsize, - const int64_t inc, void *y_tab) -{ - int i; - uint8_t *y_table = y_tab; - - y_table -= elemsize * (inc >> 9); - - for (i = 0; i < 256 + 2*YUVRGB_TABLE_HEADROOM; i++) { - int64_t cb = av_clip(i-YUVRGB_TABLE_HEADROOM, 0, 255)*inc; - table[i] = y_table + elemsize * (cb >> 16); - } -} - -static void fill_gv_table(int table[256 + 2*YUVRGB_TABLE_HEADROOM], const int elemsize, const int64_t inc) -{ - int i; - int off = -(inc >> 9); - - for (i = 0; i < 256 + 2*YUVRGB_TABLE_HEADROOM; i++) { - int64_t cb = av_clip(i-YUVRGB_TABLE_HEADROOM, 0, 255)*inc; - table[i] = elemsize * (off + (cb >> 16)); - } -} - -static uint16_t roundToInt16(int64_t f) -{ - int r = (f + (1 << 15)) >> 16; - - if (r < -0x7FFF) - return 0x8000; - else if (r > 0x7FFF) - return 0x7FFF; - else - return r; -} - -av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], - int fullRange, int brightness, - int contrast, int saturation) -{ - const int isRgb = c->dstFormat == AV_PIX_FMT_RGB32 || - c->dstFormat == AV_PIX_FMT_RGB32_1 || - c->dstFormat == AV_PIX_FMT_BGR24 || - c->dstFormat == AV_PIX_FMT_RGB565BE || - c->dstFormat == AV_PIX_FMT_RGB565LE || - c->dstFormat == AV_PIX_FMT_RGB555BE || - c->dstFormat == AV_PIX_FMT_RGB555LE || - c->dstFormat == AV_PIX_FMT_RGB444BE || - c->dstFormat == AV_PIX_FMT_RGB444LE || - c->dstFormat == AV_PIX_FMT_RGB8 || - c->dstFormat == AV_PIX_FMT_RGB4 || - c->dstFormat == AV_PIX_FMT_RGB4_BYTE || - c->dstFormat == AV_PIX_FMT_MONOBLACK; - const int isNotNe = c->dstFormat == AV_PIX_FMT_NE(RGB565LE, RGB565BE) || - c->dstFormat == AV_PIX_FMT_NE(RGB555LE, RGB555BE) || - c->dstFormat == AV_PIX_FMT_NE(RGB444LE, RGB444BE) || - c->dstFormat == AV_PIX_FMT_NE(BGR565LE, BGR565BE) || - c->dstFormat == AV_PIX_FMT_NE(BGR555LE, BGR555BE) || - c->dstFormat == AV_PIX_FMT_NE(BGR444LE, BGR444BE); - const int bpp = c->dstFormatBpp; - uint8_t *y_table; - uint16_t *y_table16; - uint32_t *y_table32; - int i, base, rbase, gbase, bbase, av_uninit(abase), needAlpha; - const int yoffs = fullRange ? 384 : 326; - - int64_t crv = inv_table[0]; - int64_t cbu = inv_table[1]; - int64_t cgu = -inv_table[2]; - int64_t cgv = -inv_table[3]; - int64_t cy = 1 << 16; - int64_t oy = 0; - int64_t yb = 0; - - if (!fullRange) { - cy = (cy * 255) / 219; - oy = 16 << 16; - } else { - crv = (crv * 224) / 255; - cbu = (cbu * 224) / 255; - cgu = (cgu * 224) / 255; - cgv = (cgv * 224) / 255; - } - - cy = (cy * contrast) >> 16; - crv = (crv * contrast * saturation) >> 32; - cbu = (cbu * contrast * saturation) >> 32; - cgu = (cgu * contrast * saturation) >> 32; - cgv = (cgv * contrast * saturation) >> 32; - oy -= 256 * brightness; - - c->uOffset = 0x0400040004000400LL; - c->vOffset = 0x0400040004000400LL; - c->yCoeff = roundToInt16(cy * 8192) * 0x0001000100010001ULL; - c->vrCoeff = roundToInt16(crv * 8192) * 0x0001000100010001ULL; - c->ubCoeff = roundToInt16(cbu * 8192) * 0x0001000100010001ULL; - c->vgCoeff = roundToInt16(cgv * 8192) * 0x0001000100010001ULL; - c->ugCoeff = roundToInt16(cgu * 8192) * 0x0001000100010001ULL; - c->yOffset = roundToInt16(oy * 8) * 0x0001000100010001ULL; - - c->yuv2rgb_y_coeff = (int16_t)roundToInt16(cy << 13); - c->yuv2rgb_y_offset = (int16_t)roundToInt16(oy << 9); - c->yuv2rgb_v2r_coeff = (int16_t)roundToInt16(crv << 13); - c->yuv2rgb_v2g_coeff = (int16_t)roundToInt16(cgv << 13); - c->yuv2rgb_u2g_coeff = (int16_t)roundToInt16(cgu << 13); - c->yuv2rgb_u2b_coeff = (int16_t)roundToInt16(cbu << 13); - - //scale coefficients by cy - crv = ((crv << 16) + 0x8000) / FFMAX(cy, 1); - cbu = ((cbu << 16) + 0x8000) / FFMAX(cy, 1); - cgu = ((cgu << 16) + 0x8000) / FFMAX(cy, 1); - cgv = ((cgv << 16) + 0x8000) / FFMAX(cy, 1); - - av_freep(&c->yuvTable); - - switch (bpp) { - case 1: - c->yuvTable = av_malloc(1024); - y_table = c->yuvTable; - yb = -(384 << 16) - oy; - for (i = 0; i < 1024 - 110; i++) { - y_table[i + 110] = av_clip_uint8((yb + 0x8000) >> 16) >> 7; - yb += cy; - } - fill_table(c->table_gU, 1, cgu, y_table + yoffs); - fill_gv_table(c->table_gV, 1, cgv); - break; - case 4: - case 4 | 128: - rbase = isRgb ? 3 : 0; - gbase = 1; - bbase = isRgb ? 0 : 3; - c->yuvTable = av_malloc(1024 * 3); - y_table = c->yuvTable; - yb = -(384 << 16) - oy; - for (i = 0; i < 1024 - 110; i++) { - int yval = av_clip_uint8((yb + 0x8000) >> 16); - y_table[i + 110] = (yval >> 7) << rbase; - y_table[i + 37 + 1024] = ((yval + 43) / 85) << gbase; - y_table[i + 110 + 2048] = (yval >> 7) << bbase; - yb += cy; - } - fill_table(c->table_rV, 1, crv, y_table + yoffs); - fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024); - fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048); - fill_gv_table(c->table_gV, 1, cgv); - break; - case 8: - rbase = isRgb ? 5 : 0; - gbase = isRgb ? 2 : 3; - bbase = isRgb ? 0 : 6; - c->yuvTable = av_malloc(1024 * 3); - y_table = c->yuvTable; - yb = -(384 << 16) - oy; - for (i = 0; i < 1024 - 38; i++) { - int yval = av_clip_uint8((yb + 0x8000) >> 16); - y_table[i + 16] = ((yval + 18) / 36) << rbase; - y_table[i + 16 + 1024] = ((yval + 18) / 36) << gbase; - y_table[i + 37 + 2048] = ((yval + 43) / 85) << bbase; - yb += cy; - } - fill_table(c->table_rV, 1, crv, y_table + yoffs); - fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024); - fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048); - fill_gv_table(c->table_gV, 1, cgv); - break; - case 12: - rbase = isRgb ? 8 : 0; - gbase = 4; - bbase = isRgb ? 0 : 8; - c->yuvTable = av_malloc(1024 * 3 * 2); - y_table16 = c->yuvTable; - yb = -(384 << 16) - oy; - for (i = 0; i < 1024; i++) { - uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16); - y_table16[i] = (yval >> 4) << rbase; - y_table16[i + 1024] = (yval >> 4) << gbase; - y_table16[i + 2048] = (yval >> 4) << bbase; - yb += cy; - } - if (isNotNe) - for (i = 0; i < 1024 * 3; i++) - y_table16[i] = av_bswap16(y_table16[i]); - fill_table(c->table_rV, 2, crv, y_table16 + yoffs); - fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + 1024); - fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2048); - fill_gv_table(c->table_gV, 2, cgv); - break; - case 15: - case 16: - rbase = isRgb ? bpp - 5 : 0; - gbase = 5; - bbase = isRgb ? 0 : (bpp - 5); - c->yuvTable = av_malloc(1024 * 3 * 2); - y_table16 = c->yuvTable; - yb = -(384 << 16) - oy; - for (i = 0; i < 1024; i++) { - uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16); - y_table16[i] = (yval >> 3) << rbase; - y_table16[i + 1024] = (yval >> (18 - bpp)) << gbase; - y_table16[i + 2048] = (yval >> 3) << bbase; - yb += cy; - } - if (isNotNe) - for (i = 0; i < 1024 * 3; i++) - y_table16[i] = av_bswap16(y_table16[i]); - fill_table(c->table_rV, 2, crv, y_table16 + yoffs); - fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + 1024); - fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2048); - fill_gv_table(c->table_gV, 2, cgv); - break; - case 24: - case 48: - c->yuvTable = av_malloc(1024); - y_table = c->yuvTable; - yb = -(384 << 16) - oy; - for (i = 0; i < 1024; i++) { - y_table[i] = av_clip_uint8((yb + 0x8000) >> 16); - yb += cy; - } - fill_table(c->table_rV, 1, crv, y_table + yoffs); - fill_table(c->table_gU, 1, cgu, y_table + yoffs); - fill_table(c->table_bU, 1, cbu, y_table + yoffs); - fill_gv_table(c->table_gV, 1, cgv); - break; - case 32: - case 64: - base = (c->dstFormat == AV_PIX_FMT_RGB32_1 || - c->dstFormat == AV_PIX_FMT_BGR32_1) ? 8 : 0; - rbase = base + (isRgb ? 16 : 0); - gbase = base + 8; - bbase = base + (isRgb ? 0 : 16); - needAlpha = CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat); - if (!needAlpha) - abase = (base + 24) & 31; - c->yuvTable = av_malloc(1024 * 3 * 4); - y_table32 = c->yuvTable; - yb = -(384 << 16) - oy; - for (i = 0; i < 1024; i++) { - unsigned yval = av_clip_uint8((yb + 0x8000) >> 16); - y_table32[i] = (yval << rbase) + - (needAlpha ? 0 : (255u << abase)); - y_table32[i + 1024] = yval << gbase; - y_table32[i + 2048] = yval << bbase; - yb += cy; - } - fill_table(c->table_rV, 4, crv, y_table32 + yoffs); - fill_table(c->table_gU, 4, cgu, y_table32 + yoffs + 1024); - fill_table(c->table_bU, 4, cbu, y_table32 + yoffs + 2048); - fill_gv_table(c->table_gV, 4, cgv); - break; - default: - if(!isPlanar(c->dstFormat) || bpp <= 24) - av_log(c, AV_LOG_ERROR, "%ibpp not supported by yuv2rgb\n", bpp); - return -1; - } - return 0; -} |
