chasing indexing error

author: Tim Redfern <tim@eclectronics.org> 2014-02-17 13:36:38 +0000
committer: Tim Redfern <tim@eclectronics.org> 2014-02-17 13:36:38 +0000
commit: 22e28216336da876e1fd17f380ce42eaf1446769 (patch)
tree: 444dad3dc7e2656992d29f34f7bce31970c122a5 /ffmpeg/libswscale
parent: ae5e8541f6e06e64c28719467cdf366ac57aff31 (diff)
40 files changed, 0 insertions, 22230 deletions
diff --git a/ffmpeg/libswscale/Makefile b/ffmpeg/libswscale/Makefile
deleted file mode 100644
index ca6e27d..0000000
--- a/ffmpeg/libswscale/Makefile
+++ /dev/null
@@ -1,22 +0,0 @@
-include $(SUBDIR)../config.mak
-
-NAME = swscale
-FFLIBS = avutil
-
-HEADERS = swscale.h                                                     \
-          version.h                                                     \
-
-OBJS = input.o                                          \
-       options.o                                        \
-       output.o                                         \
-       rgb2rgb.o                                        \
-       swscale.o                                        \
-       swscale_unscaled.o                               \
-       utils.o                                          \
-       yuv2rgb.o                                        \
-
-# Windows resource file
-SLIBOBJS-$(HAVE_GNU_WINDRES) += swscaleres.o
-
-TESTPROGS = colorspace                                                  \
-            swscale                                                     \
diff --git a/ffmpeg/libswscale/bfin/Makefile b/ffmpeg/libswscale/bfin/Makefile
deleted file mode 100644
index 5f34550..0000000
--- a/ffmpeg/libswscale/bfin/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-OBJS +=  bfin/internal_bfin.o                                           \
-         bfin/swscale_bfin.o                                            \
-         bfin/yuv2rgb_bfin.o                                            \
diff --git a/ffmpeg/libswscale/bfin/internal_bfin.S b/ffmpeg/libswscale/bfin/internal_bfin.S
deleted file mode 100644
index eab30aa..0000000
--- a/ffmpeg/libswscale/bfin/internal_bfin.S
+++ /dev/null
@@ -1,613 +0,0 @@
-/*
- * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
- *                    April 20, 2007
- *
- * Blackfin video color space converter operations
- * convert I420 YV12 to RGB in various formats
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-
-/*
-YUV420 to RGB565 conversion. This routine takes a YUV 420 planar macroblock
-and converts it to RGB565. R:5 bits, G:6 bits, B:5 bits.. packed into shorts.
-
-
-The following calculation is used for the conversion:
-
-  r = clipz((y - oy) * cy  + crv * (v - 128))
-  g = clipz((y - oy) * cy  + cgv * (v - 128) + cgu * (u - 128))
-  b = clipz((y - oy) * cy  + cbu * (u - 128))
-
-y, u, v are prescaled by a factor of 4 i.e. left-shifted to gain precision.
-
-
-New factorization to eliminate the truncation error which was
-occurring due to the byteop3p.
-
-
-1) Use the bytop16m to subtract quad bytes we use this in U8 this
- then so the offsets need to be renormalized to 8bits.
-
-2) Scale operands up by a factor of 4 not 8 because Blackfin
-   multiplies include a shift.
-
-3) Compute into the accumulators cy * yx0, cy * yx1.
-
-4) Compute each of the linear equations:
-     r = clipz((y - oy) * cy  + crv * (v - 128))
-
-     g = clipz((y - oy) * cy  + cgv * (v - 128) + cgu * (u - 128))
-
-     b = clipz((y - oy) * cy  + cbu * (u - 128))
-
-   Reuse of the accumulators requires that we actually multiply
-   twice once with addition and the second time with a subtraction.
-
-   Because of this we need to compute the equations in the order R B
-   then G saving the writes for B in the case of 24/32 bit color
-   formats.
-
-   API: yuv2rgb_kind (uint8_t *Y, uint8_t *U, uint8_t *V, int *out,
-                      int dW, uint32_t *coeffs);
-
-       A          B
-       ---        ---
-       i2 = cb    i3 = cr
-       i1 = coeff i0 = y
-
-Where coeffs have the following layout in memory.
-
-uint32_t oy, oc, zero, cy, crv, rmask, cbu, bmask, cgu, cgv;
-
-coeffs is a pointer to oy.
-
-The {rgb} masks are only utilized by the 565 packing algorithm. Note the data
-replication is used to simplify the internal algorithms for the dual Mac
-architecture of BlackFin.
-
-All routines are exported with _ff_bfin_ as a symbol prefix.
-
-Rough performance gain compared against -O3:
-
-2779809/1484290 187.28%
-
-which translates to ~33c/pel to ~57c/pel for the reference vs 17.5
-c/pel for the optimized implementations. Not sure why there is such a
-huge variation on the reference codes on Blackfin I guess it must have
-to do with the memory system.
-*/
-
-#define mL3 .text
-#if defined(__FDPIC__) && CONFIG_SRAM
-#define mL1 .l1.text
-#else
-#define mL1 mL3
-#endif
-#define MEM mL1
-
-#define DEFUN(fname,where,interface) \
-        .section where;              \
-        .global _ff_bfin_ ## fname;  \
-        .type _ff_bfin_ ## fname, STT_FUNC; \
-        .align 8;                    \
-        _ff_bfin_ ## fname
-
-#define DEFUN_END(fname) \
-        .size _ff_bfin_ ## fname, . - _ff_bfin_ ## fname
-
-
-.text
-
-#define COEFF_LEN        11*4
-#define COEFF_REL_CY_OFF 4*4
-
-#define ARG_OUT   20
-#define ARG_W     24
-#define ARG_COEFF 28
-
-DEFUN(yuv2rgb565_line,MEM,
-   (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, int dW, uint32_t *coeffs)):
-        link 0;
-        [--sp] = (r7:4);
-        p1 = [fp+ARG_OUT];
-        r3 = [fp+ARG_W];
-
-        i0 = r0;
-        i2 = r1;
-        i3 = r2;
-
-        r0 = [fp+ARG_COEFF];
-        i1 = r0;
-        b1 = i1;
-        l1 = COEFF_LEN;
-        m0 = COEFF_REL_CY_OFF;
-        p0 = r3;
-
-        r0   = [i0++];         // 2Y
-        r1.l = w[i2++];        // 2u
-        r1.h = w[i3++];        // 2v
-        p0 = p0>>2;
-
-        lsetup (.L0565, .L1565) lc0 = p0;
-
-        /*
-           uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv
-           r0 -- used to load 4ys
-           r1 -- used to load 2us,2vs
-           r4 -- y3,y2
-           r5 -- y1,y0
-           r6 -- u1,u0
-           r7 -- v1,v0
-        */
-                                                              r2=[i1++]; // oy
-.L0565:
-        /*
-        rrrrrrrr gggggggg bbbbbbbb
-         5432109876543210
-                    bbbbb >>3
-              gggggggg    <<3
-         rrrrrrrr         <<8
-         rrrrrggggggbbbbb
-        */
-        (r4,r5) = byteop16m (r1:0, r3:2)                   || r3=[i1++]; // oc
-        (r7,r6) = byteop16m (r1:0, r3:2) (r);
-        r5 = r5 << 2 (v);                                                // y1,y0
-        r4 = r4 << 2 (v);                                                // y3,y2
-        r6 = r6 << 2 (v)                                   || r0=[i1++]; // u1,u0, r0=zero
-        r7 = r7 << 2 (v)                                   || r1=[i1++]; // v1,v0  r1=cy
-        /* Y' = y*cy */
-        a1 = r1.h*r5.h, a0 = r1.l*r5.l                     || r1=[i1++]; // crv
-
-        /* R = Y+ crv*(Cr-128) */
-        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);
-                a1 -= r1.h*r7.l,          a0 -= r1.l*r7.l  || r5=[i1++]; // rmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu
-        r2 = r2 >> 3 (v);
-        r3 = r2 & r5;
-
-        /* B = Y+ cbu*(Cb-128) */
-        r2.h = (a1 += r1.h*r6.l), r2.l = (a0 += r1.l*r6.l);
-                a1 -= r1.h*r6.l,          a0 -= r1.l*r6.l  || r5=[i1++]; // bmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu
-        r2 = r2 << 8 (v);
-        r2 = r2 & r5;
-        r3 = r3 | r2;
-
-        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */
-                a1 += r1.h*r6.l,          a0 += r1.l*r6.l  || r1=[i1++]; // cgv
-        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r5=[i1++m0]; // gmask
-        r2 = r2 << 3 (v);
-        r2 = r2 & r5;
-        r3 = r3 | r2;
-        [p1++]=r3                                          || r1=[i1++]; // cy
-
-        /* Y' = y*cy */
-
-        a1 = r1.h*r4.h, a0 = r1.l*r4.l                     || r1=[i1++]; // crv
-
-        /* R = Y+ crv*(Cr-128) */
-        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h);
-                a1 -= r1.h*r7.h,          a0 -= r1.l*r7.h  || r5=[i1++]; // rmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu
-        r2 = r2 >> 3 (v);
-        r3 = r2 & r5;
-
-        /* B = Y+ cbu*(Cb-128) */
-        r2.h = (a1 += r1.h*r6.h), r2.l = (a0 += r1.l*r6.h);
-                a1 -= r1.h*r6.h,          a0 -= r1.l*r6.h  || r5=[i1++]; // bmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu
-        r2 = r2 << 8 (v);
-        r2 = r2 & r5;
-        r3 = r3 | r2;
-
-        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */
-                a1 += r1.h*r6.h,          a0 += r1.l*r6.h  || r1=[i1++]; // cgv
-        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h) || r5=[i1++]; // gmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r0   =  [i0++];        // 2Y
-        r2 = r2 << 3 (v)                                   || r1.l = w[i2++];        // 2u
-        r2 = r2 & r5;
-        r3 = r3 | r2;
-        [p1++]=r3                                          || r1.h = w[i3++];        // 2v
-.L1565:                                                       r2=[i1++]; // oy
-
-        l1 = 0;
-
-        (r7:4) = [sp++];
-        unlink;
-        rts;
-DEFUN_END(yuv2rgb565_line)
-
-DEFUN(yuv2rgb555_line,MEM,
-   (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, int dW, uint32_t *coeffs)):
-        link 0;
-        [--sp] = (r7:4);
-        p1 = [fp+ARG_OUT];
-        r3 = [fp+ARG_W];
-
-        i0 = r0;
-        i2 = r1;
-        i3 = r2;
-
-        r0 = [fp+ARG_COEFF];
-        i1 = r0;
-        b1 = i1;
-        l1 = COEFF_LEN;
-        m0 = COEFF_REL_CY_OFF;
-        p0 = r3;
-
-        r0   = [i0++];         // 2Y
-        r1.l = w[i2++];        // 2u
-        r1.h = w[i3++];        // 2v
-        p0 = p0>>2;
-
-        lsetup (.L0555, .L1555) lc0 = p0;
-
-        /*
-           uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv
-           r0 -- used to load 4ys
-           r1 -- used to load 2us,2vs
-           r4 -- y3,y2
-           r5 -- y1,y0
-           r6 -- u1,u0
-           r7 -- v1,v0
-        */
-                                                              r2=[i1++]; // oy
-.L0555:
-        /*
-        rrrrrrrr gggggggg bbbbbbbb
-         5432109876543210
-                    bbbbb >>3
-               gggggggg   <<2
-          rrrrrrrr        <<7
-         xrrrrrgggggbbbbb
-        */
-
-        (r4,r5) = byteop16m (r1:0, r3:2)                   || r3=[i1++]; // oc
-        (r7,r6) = byteop16m (r1:0, r3:2) (r);
-        r5 = r5 << 2 (v);                                                // y1,y0
-        r4 = r4 << 2 (v);                                                // y3,y2
-        r6 = r6 << 2 (v)                                   || r0=[i1++]; // u1,u0, r0=zero
-        r7 = r7 << 2 (v)                                   || r1=[i1++]; // v1,v0  r1=cy
-        /* Y' = y*cy */
-        a1 = r1.h*r5.h, a0 = r1.l*r5.l                     || r1=[i1++]; // crv
-
-        /* R = Y+ crv*(Cr-128) */
-        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);
-                a1 -= r1.h*r7.l,          a0 -= r1.l*r7.l  || r5=[i1++]; // rmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu
-        r2 = r2 >> 3 (v);
-        r3 = r2 & r5;
-
-        /* B = Y+ cbu*(Cb-128) */
-        r2.h = (a1 += r1.h*r6.l), r2.l = (a0 += r1.l*r6.l);
-                a1 -= r1.h*r6.l,          a0 -= r1.l*r6.l  || r5=[i1++]; // bmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu
-        r2 = r2 << 7 (v);
-        r2 = r2 & r5;
-        r3 = r3 | r2;
-
-        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */
-                a1 += r1.h*r6.l,          a0 += r1.l*r6.l  || r1=[i1++]; // cgv
-        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r5=[i1++m0]; // gmask
-        r2 = r2 << 2 (v);
-        r2 = r2 & r5;
-        r3 = r3 | r2;
-        [p1++]=r3                                          || r1=[i1++]; // cy
-
-        /* Y' = y*cy */
-
-        a1 = r1.h*r4.h, a0 = r1.l*r4.l                     || r1=[i1++]; // crv
-
-        /* R = Y+ crv*(Cr-128) */
-        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h);
-                a1 -= r1.h*r7.h,          a0 -= r1.l*r7.h  || r5=[i1++]; // rmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu
-        r2 = r2 >> 3 (v);
-        r3 = r2 & r5;
-
-        /* B = Y+ cbu*(Cb-128) */
-        r2.h = (a1 += r1.h*r6.h), r2.l = (a0 += r1.l*r6.h);
-                a1 -= r1.h*r6.h,          a0 -= r1.l*r6.h  || r5=[i1++]; // bmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu
-        r2 = r2 << 7 (v);
-        r2 = r2 & r5;
-        r3 = r3 | r2;
-
-        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */
-                a1 += r1.h*r6.h,          a0 += r1.l*r6.h  || r1=[i1++]; // cgv
-        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h) || r5=[i1++]; // gmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r0=[i0++];     // 4Y
-        r2 = r2 << 2 (v)                                   || r1.l=w[i2++];  // 2u
-        r2 = r2 & r5;
-        r3 = r3 | r2;
-        [p1++]=r3                                          || r1.h=w[i3++]; // 2v
-
-.L1555:                                                       r2=[i1++]; // oy
-
-        l1 = 0;
-
-        (r7:4) = [sp++];
-        unlink;
-        rts;
-DEFUN_END(yuv2rgb555_line)
-
-DEFUN(yuv2rgb24_line,MEM,
-   (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, int dW, uint32_t *coeffs)):
-        link 0;
-        [--sp] = (r7:4);
-        p1 = [fp+ARG_OUT];
-        r3 = [fp+ARG_W];
-        p2 = p1;
-        p2 += 3;
-
-        i0 = r0;
-        i2 = r1;
-        i3 = r2;
-
-        r0 = [fp+ARG_COEFF]; // coeff buffer
-        i1 = r0;
-        b1 = i1;
-        l1 = COEFF_LEN;
-        m0 = COEFF_REL_CY_OFF;
-        p0 = r3;
-
-        r0   = [i0++];         // 2Y
-        r1.l = w[i2++];        // 2u
-        r1.h = w[i3++];        // 2v
-        p0 = p0>>2;
-
-        lsetup (.L0888, .L1888) lc0 = p0;
-
-        /*
-           uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv
-           r0 -- used to load 4ys
-           r1 -- used to load 2us,2vs
-           r4 -- y3,y2
-           r5 -- y1,y0
-           r6 -- u1,u0
-           r7 -- v1,v0
-        */
-                                                              r2=[i1++]; // oy
-.L0888:
-        (r4,r5) = byteop16m (r1:0, r3:2)                   || r3=[i1++]; // oc
-        (r7,r6) = byteop16m (r1:0, r3:2) (r);
-        r5 = r5 << 2 (v);               // y1,y0
-        r4 = r4 << 2 (v);               // y3,y2
-        r6 = r6 << 2 (v) || r0=[i1++];  // u1,u0, r0=zero
-        r7 = r7 << 2 (v) || r1=[i1++];  // v1,v0  r1=cy
-
-        /* Y' = y*cy */
-        a1 = r1.h*r5.h, a0 = r1.l*r5.l                     || r1=[i1++]; // crv
-
-        /* R = Y+ crv*(Cr-128) */
-        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);
-                a1 -= r1.h*r7.l,          a0 -= r1.l*r7.l  || r5=[i1++]; // rmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu
-        r2=r2>>16 || B[p1++]=r2;
-                     B[p2++]=r2;
-
-        /* B = Y+ cbu*(Cb-128) */
-        r2.h = (a1 += r1.h*r6.l), r2.l = (a0 += r1.l*r6.l);
-                a1 -= r1.h*r6.l,          a0 -= r1.l*r6.l  || r5=[i1++]; // bmask
-        r3 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu
-
-        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */
-                a1 += r1.h*r6.l,          a0 += r1.l*r6.l  || r1=[i1++]; // cgv
-        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r5=[i1++m0]; // gmask, oy,cy,zero
-
-        r2=r2>>16 || B[p1++]=r2;
-                     B[p2++]=r2;
-
-        r3=r3>>16 || B[p1++]=r3;
-                     B[p2++]=r3                            || r1=[i1++]; // cy
-
-        p1+=3;
-        p2+=3;
-        /* Y' = y*cy */
-        a1 = r1.h*r4.h, a0 = r1.l*r4.l                     || r1=[i1++]; // crv
-
-        /* R = Y+ crv*(Cr-128) */
-        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h);
-                a1 -= r1.h*r7.h,          a0 -= r1.l*r7.h  || r5=[i1++]; // rmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu
-        r2=r2>>16 || B[p1++]=r2;
-        B[p2++]=r2;
-
-        /* B = Y+ cbu*(Cb-128) */
-        r2.h = (a1 += r1.h*r6.h), r2.l = (a0 += r1.l*r6.h);
-                a1 -= r1.h*r6.h,          a0 -= r1.l*r6.h  || r5=[i1++]; // bmask
-        r3 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu
-
-        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */
-                a1 += r1.h*r6.h,          a0 += r1.l*r6.h  || r1=[i1++]; // cgv
-        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h);
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r5=[i1++]; // gmask
-        r2=r2>>16 || B[p1++]=r2 || r0 = [i0++];    // 4y
-                     B[p2++]=r2 || r1.l = w[i2++]; // 2u
-        r3=r3>>16 || B[p1++]=r3 || r1.h = w[i3++]; // 2v
-                     B[p2++]=r3 || r2=[i1++];      // oy
-
-        p1+=3;
-.L1888: p2+=3;
-
-        l1 = 0;
-
-        (r7:4) = [sp++];
-        unlink;
-        rts;
-DEFUN_END(yuv2rgb24_line)
-
-
-
-#define ARG_vdst        20
-#define ARG_width       24
-#define ARG_height      28
-#define ARG_lumStride   32
-#define ARG_chromStride 36
-#define ARG_srcStride   40
-
-DEFUN(uyvytoyv12, mL3,  (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                         int width, int height,
-                         int lumStride, int chromStride, int srcStride)):
-        link 0;
-        [--sp] = (r7:4,p5:4);
-
-        p0 = r1;       // Y top even
-
-        i2 = r2; // *u
-        r2 = [fp + ARG_vdst];
-        i3 = r2; // *v
-
-        r1 = [fp + ARG_srcStride];
-        r2 = r0 + r1;
-        i0 = r0;  // uyvy_T even
-        i1 = r2;  // uyvy_B odd
-
-        p2 = [fp + ARG_lumStride];
-        p1 = p0 + p2;  // Y bot odd
-
-        p5 = [fp + ARG_width];
-        p4 = [fp + ARG_height];
-        r0 = p5;
-        p4 = p4 >> 1;
-        p5 = p5 >> 2;
-
-        r2 = r0 << 1;
-        r1 = r1 << 1;
-        r1 = r1 - r2;  // srcStride + (srcStride - 2*width)
-        r1 += -8;  // i0,i1 is pre read need to correct
-        m0 = r1;
-
-        r2 = [fp + ARG_chromStride];
-        r0 = r0 >> 1;
-        r2 = r2 - r0;
-        m1 = r2;
-
-        /*   I0,I1 - src input line pointers
-         *   p0,p1 - luma output line pointers
-         *   I2    - dstU
-         *   I3    - dstV
-         */
-
-        lsetup (0f, 1f) lc1 = p4;   // H/2
-0:        r0 = [i0++] || r2 = [i1++];
-          r1 = [i0++] || r3 = [i1++];
-          r4 = byteop1p(r1:0, r3:2);
-          r5 = byteop1p(r1:0, r3:2) (r);
-          lsetup (2f, 3f) lc0 = p5; // W/4
-2:          r0 = r0 >> 8(v);
-            r1 = r1 >> 8(v);
-            r2 = r2 >> 8(v);
-            r3 = r3 >> 8(v);
-            r0 = bytepack(r0, r1);
-            r2 = bytepack(r2, r3)         ||  [p0++] = r0;    // yyyy
-            r6 = pack(r5.l, r4.l)         ||  [p1++] = r2;    // yyyy
-            r7 = pack(r5.h, r4.h)         ||  r0 = [i0++] || r2 = [i1++];
-            r6 = bytepack(r6, r7)         ||  r1 = [i0++] || r3 = [i1++];
-            r4 = byteop1p(r1:0, r3:2)     ||  w[i2++] = r6.l; // uu
-3:          r5 = byteop1p(r1:0, r3:2) (r) ||  w[i3++] = r6.h; // vv
-
-          i0 += m0;
-          i1 += m0;
-          i2 += m1;
-          i3 += m1;
-          p0 = p0 + p2;
-1:        p1 = p1 + p2;
-
-        (r7:4,p5:4) = [sp++];
-        unlink;
-        rts;
-DEFUN_END(uyvytoyv12)
-
-DEFUN(yuyvtoyv12, mL3,  (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                         int width, int height,
-                         int lumStride, int chromStride, int srcStride)):
-        link 0;
-        [--sp] = (r7:4,p5:4);
-
-        p0 = r1;       // Y top even
-
-        i2 = r2; // *u
-        r2 = [fp + ARG_vdst];
-        i3 = r2; // *v
-
-        r1 = [fp + ARG_srcStride];
-        r2 = r0 + r1;
-
-        i0 = r0;  // uyvy_T even
-        i1 = r2;  // uyvy_B odd
-
-        p2 = [fp + ARG_lumStride];
-        p1 = p0 + p2;  // Y bot odd
-
-        p5 = [fp + ARG_width];
-        p4 = [fp + ARG_height];
-        r0 = p5;
-        p4 = p4 >> 1;
-        p5 = p5 >> 2;
-
-        r2 = r0 << 1;
-        r1 = r1 << 1;
-        r1 = r1 - r2;  // srcStride + (srcStride - 2*width)
-        r1 += -8;  // i0,i1 is pre read need to correct
-        m0 = r1;
-
-        r2 = [fp + ARG_chromStride];
-        r0 = r0 >> 1;
-        r2 = r2 - r0;
-        m1 = r2;
-
-        /*   I0,I1 - src input line pointers
-         *   p0,p1 - luma output line pointers
-         *   I2    - dstU
-         *   I3    - dstV
-         */
-
-        lsetup (0f, 1f) lc1 = p4;   // H/2
-0:        r0 = [i0++] || r2 = [i1++];
-          r1 = [i0++] || r3 = [i1++];
-          r4 = bytepack(r0, r1);
-          r5 = bytepack(r2, r3);
-          lsetup (2f, 3f) lc0 = p5; // W/4
-2:          r0 = r0 >> 8(v) || [p0++] = r4;  // yyyy-even
-            r1 = r1 >> 8(v) || [p1++] = r5;  // yyyy-odd
-            r2 = r2 >> 8(v);
-            r3 = r3 >> 8(v);
-            r4 = byteop1p(r1:0, r3:2);
-            r5 = byteop1p(r1:0, r3:2) (r);
-            r6 = pack(r5.l, r4.l);
-            r7 = pack(r5.h, r4.h)         ||  r0 = [i0++] || r2 = [i1++];
-            r6 = bytepack(r6, r7)         ||  r1 = [i0++] || r3 = [i1++];
-            r4 = bytepack(r0, r1)         ||  w[i2++] = r6.l; // uu
-3:          r5 = bytepack(r2, r3)         ||  w[i3++] = r6.h; // vv
-
-          i0 += m0;
-          i1 += m0;
-          i2 += m1;
-          i3 += m1;
-          p0 = p0 + p2;
-1:        p1 = p1 + p2;
-
-        (r7:4,p5:4) = [sp++];
-        unlink;
-        rts;
-DEFUN_END(yuyvtoyv12)
diff --git a/ffmpeg/libswscale/bfin/swscale_bfin.c b/ffmpeg/libswscale/bfin/swscale_bfin.c
deleted file mode 100644
index 33c3ec5..0000000
--- a/ffmpeg/libswscale/bfin/swscale_bfin.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
- *
- * Blackfin software video scaler operations
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdint.h>
-
-#include "config.h"
-#include "libavutil/attributes.h"
-#include "libswscale/swscale_internal.h"
-
-#if defined (__FDPIC__) && CONFIG_SRAM
-#define L1CODE __attribute__((l1_text))
-#else
-#define L1CODE
-#endif
-
-int ff_bfin_uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-                       uint8_t *vdst, int width, int height,
-                       int lumStride, int chromStride, int srcStride) L1CODE;
-
-int ff_bfin_yuyvtoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-                       uint8_t *vdst, int width, int height,
-                       int lumStride, int chromStride, int srcStride) L1CODE;
-
-static int uyvytoyv12_unscaled(SwsContext *c, const uint8_t *src[],
-                               int srcStride[], int srcSliceY, int srcSliceH,
-                               uint8_t *dst[], int dstStride[])
-{
-    uint8_t *dsty = dst[0] + dstStride[0] * srcSliceY;
-    uint8_t *dstu = dst[1] + dstStride[1] * srcSliceY / 2;
-    uint8_t *dstv = dst[2] + dstStride[2] * srcSliceY / 2;
-    const uint8_t *ip = src[0] + srcStride[0] * srcSliceY;
-    int w = dstStride[0];
-
-    ff_bfin_uyvytoyv12(ip, dsty, dstu, dstv, w, srcSliceH,
-                       dstStride[0], dstStride[1], srcStride[0]);
-
-    return srcSliceH;
-}
-
-static int yuyvtoyv12_unscaled(SwsContext *c, const uint8_t *src[],
-                               int srcStride[], int srcSliceY, int srcSliceH,
-                               uint8_t *dst[], int dstStride[])
-{
-    uint8_t *dsty = dst[0] + dstStride[0] * srcSliceY;
-    uint8_t *dstu = dst[1] + dstStride[1] * srcSliceY / 2;
-    uint8_t *dstv = dst[2] + dstStride[2] * srcSliceY / 2;
-    const uint8_t *ip = src[0] + srcStride[0] * srcSliceY;
-    int w = dstStride[0];
-
-    ff_bfin_yuyvtoyv12(ip, dsty, dstu, dstv, w, srcSliceH,
-                       dstStride[0], dstStride[1], srcStride[0]);
-
-    return srcSliceH;
-}
-
-av_cold void ff_get_unscaled_swscale_bfin(SwsContext *c)
-{
-    if (c->dstFormat == AV_PIX_FMT_YUV420P && c->srcFormat == AV_PIX_FMT_UYVY422) {
-        av_log(NULL, AV_LOG_VERBOSE,
-               "selecting Blackfin optimized uyvytoyv12_unscaled\n");
-        c->swscale = uyvytoyv12_unscaled;
-    }
-    if (c->dstFormat == AV_PIX_FMT_YUV420P && c->srcFormat == AV_PIX_FMT_YUYV422) {
-        av_log(NULL, AV_LOG_VERBOSE,
-               "selecting Blackfin optimized yuyvtoyv12_unscaled\n");
-        c->swscale = yuyvtoyv12_unscaled;
-    }
-}
diff --git a/ffmpeg/libswscale/bfin/yuv2rgb_bfin.c b/ffmpeg/libswscale/bfin/yuv2rgb_bfin.c
deleted file mode 100644
index 2a36ad5..0000000
--- a/ffmpeg/libswscale/bfin/yuv2rgb_bfin.c
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
- *
- * Blackfin video color space converter operations
- * convert I420 YV12 to RGB in various formats
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/pixdesc.h"
-#include <stdint.h>
-
-#include "config.h"
-#include "libavutil/attributes.h"
-#include "libswscale/swscale_internal.h"
-
-#if defined(__FDPIC__) && CONFIG_SRAM
-#define L1CODE __attribute__((l1_text))
-#else
-#define L1CODE
-#endif
-
-void ff_bfin_yuv2rgb555_line(const uint8_t *Y, const uint8_t *U,
-                             const uint8_t *V, uint8_t *out,
-                             int w, uint32_t *coeffs) L1CODE;
-
-void ff_bfin_yuv2rgb565_line(const uint8_t *Y, const uint8_t *U,
-                             const uint8_t *V, uint8_t *out,
-                             int w, uint32_t *coeffs) L1CODE;
-
-void ff_bfin_yuv2rgb24_line(const uint8_t *Y, const uint8_t *U,
-                            const uint8_t *V, uint8_t *out,
-                            int w, uint32_t *coeffs) L1CODE;
-
-typedef void (*ltransform)(const uint8_t *Y, const uint8_t *U, const uint8_t *V,
-                           uint8_t *out, int w, uint32_t *coeffs);
-
-static void bfin_prepare_coefficients(SwsContext *c, int rgb, int masks)
-{
-    int oy;
-    oy = c->yOffset & 0xffff;
-    oy = oy >> 3;      // keep everything U8.0 for offset calculation
-
-    c->oc = 128 * 0x01010101U;
-    c->oy = oy * 0x01010101U;
-
-    /* copy 64bit vector coeffs down to 32bit vector coeffs */
-    c->cy   = c->yCoeff;
-    c->zero = 0;
-
-    if (rgb) {
-        c->crv = c->vrCoeff;
-        c->cbu = c->ubCoeff;
-        c->cgu = c->ugCoeff;
-        c->cgv = c->vgCoeff;
-    } else {
-        c->crv = c->ubCoeff;
-        c->cbu = c->vrCoeff;
-        c->cgu = c->vgCoeff;
-        c->cgv = c->ugCoeff;
-    }
-
-    if (masks == 555) {
-        c->rmask = 0x001f * 0x00010001U;
-        c->gmask = 0x03e0 * 0x00010001U;
-        c->bmask = 0x7c00 * 0x00010001U;
-    } else if (masks == 565) {
-        c->rmask = 0x001f * 0x00010001U;
-        c->gmask = 0x07e0 * 0x00010001U;
-        c->bmask = 0xf800 * 0x00010001U;
-    }
-}
-
-static int core_yuv420_rgb(SwsContext *c, const uint8_t **in, int *instrides,
-                           int srcSliceY, int srcSliceH, uint8_t **oplanes,
-                           int *outstrides, ltransform lcscf,
-                           int rgb, int masks)
-{
-    const uint8_t *py, *pu, *pv;
-    uint8_t *op;
-    int w  = instrides[0];
-    int h2 = srcSliceH >> 1;
-    int i;
-
-    bfin_prepare_coefficients(c, rgb, masks);
-
-    py = in[0];
-    pu = in[1 + (1 ^ rgb)];
-    pv = in[1 + (0 ^ rgb)];
-
-    op = oplanes[0] + srcSliceY * outstrides[0];
-
-    for (i = 0; i < h2; i++) {
-        lcscf(py, pu, pv, op, w, &c->oy);
-
-        py += instrides[0];
-        op += outstrides[0];
-
-        lcscf(py, pu, pv, op, w, &c->oy);
-
-        py += instrides[0];
-        pu += instrides[1];
-        pv += instrides[2];
-        op += outstrides[0];
-    }
-
-    return srcSliceH;
-}
-
-static int bfin_yuv420_rgb555(SwsContext *c, const uint8_t **in, int *instrides,
-                              int srcSliceY, int srcSliceH,
-                              uint8_t **oplanes, int *outstrides)
-{
-    return core_yuv420_rgb(c, in, instrides, srcSliceY, srcSliceH, oplanes,
-                           outstrides, ff_bfin_yuv2rgb555_line, 1, 555);
-}
-
-static int bfin_yuv420_bgr555(SwsContext *c, const uint8_t **in, int *instrides,
-                              int srcSliceY, int srcSliceH,
-                              uint8_t **oplanes, int *outstrides)
-{
-    return core_yuv420_rgb(c, in, instrides, srcSliceY, srcSliceH, oplanes,
-                           outstrides, ff_bfin_yuv2rgb555_line, 0, 555);
-}
-
-static int bfin_yuv420_rgb24(SwsContext *c, const uint8_t **in, int *instrides,
-                             int srcSliceY, int srcSliceH,
-                             uint8_t **oplanes, int *outstrides)
-{
-    return core_yuv420_rgb(c, in, instrides, srcSliceY, srcSliceH, oplanes,
-                           outstrides, ff_bfin_yuv2rgb24_line, 1, 888);
-}
-
-static int bfin_yuv420_bgr24(SwsContext *c, const uint8_t **in, int *instrides,
-                             int srcSliceY, int srcSliceH,
-                             uint8_t **oplanes, int *outstrides)
-{
-    return core_yuv420_rgb(c, in, instrides, srcSliceY, srcSliceH, oplanes,
-                           outstrides, ff_bfin_yuv2rgb24_line, 0, 888);
-}
-
-static int bfin_yuv420_rgb565(SwsContext *c, const uint8_t **in, int *instrides,
-                              int srcSliceY, int srcSliceH,
-                              uint8_t **oplanes, int *outstrides)
-{
-    return core_yuv420_rgb(c, in, instrides, srcSliceY, srcSliceH, oplanes,
-                           outstrides, ff_bfin_yuv2rgb565_line, 1, 565);
-}
-
-static int bfin_yuv420_bgr565(SwsContext *c, const uint8_t **in, int *instrides,
-                              int srcSliceY, int srcSliceH,
-                              uint8_t **oplanes, int *outstrides)
-{
-    return core_yuv420_rgb(c, in, instrides, srcSliceY, srcSliceH, oplanes,
-                           outstrides, ff_bfin_yuv2rgb565_line, 0, 565);
-}
-
-av_cold SwsFunc ff_yuv2rgb_init_bfin(SwsContext *c)
-{
-    SwsFunc f;
-
-    switch (c->dstFormat) {
-    case AV_PIX_FMT_RGB555:
-        f = bfin_yuv420_rgb555;
-        break;
-    case AV_PIX_FMT_BGR555:
-        f = bfin_yuv420_bgr555;
-        break;
-    case AV_PIX_FMT_RGB565:
-        f = bfin_yuv420_rgb565;
-        break;
-    case AV_PIX_FMT_BGR565:
-        f = bfin_yuv420_bgr565;
-        break;
-    case AV_PIX_FMT_RGB24:
-        f = bfin_yuv420_rgb24;
-        break;
-    case AV_PIX_FMT_BGR24:
-        f = bfin_yuv420_bgr24;
-        break;
-    default:
-        return 0;
-    }
-
-    av_log(c, AV_LOG_INFO, "BlackFin accelerated color space converter %s\n",
-           av_get_pix_fmt_name(c->dstFormat));
-
-    return f;
-}
diff --git a/ffmpeg/libswscale/colorspace-test.c b/ffmpeg/libswscale/colorspace-test.c
deleted file mode 100644
index 42a915b..0000000
--- a/ffmpeg/libswscale/colorspace-test.c
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Copyright (C) 2002 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdio.h>
-#include <string.h>              /* for memset() */
-#include <stdlib.h>
-#include <inttypes.h>
-
-#include "swscale.h"
-#include "rgb2rgb.h"
-#include "libavutil/mem.h"
-
-#define SIZE    1000
-#define srcByte 0x55
-#define dstByte 0xBB
-
-#define FUNC(s, d, n) { s, d, #n, n }
-
-int main(int argc, char **argv)
-{
-    int i, funcNum;
-    uint8_t *srcBuffer = av_malloc(SIZE);
-    uint8_t *dstBuffer = av_malloc(SIZE);
-    int failedNum      = 0;
-    int passedNum      = 0;
-
-    if (!srcBuffer || !dstBuffer)
-        return -1;
-
-    av_log(NULL, AV_LOG_INFO, "memory corruption test ...\n");
-    sws_rgb2rgb_init();
-
-    for (funcNum = 0; ; funcNum++) {
-        struct func_info_s {
-            int src_bpp;
-            int dst_bpp;
-            const char *name;
-            void (*func)(const uint8_t *src, uint8_t *dst, int src_size);
-        } func_info[] = {
-            FUNC(2, 2, rgb12to15),
-            FUNC(2, 2, rgb15to16),
-            FUNC(2, 3, rgb15to24),
-            FUNC(2, 4, rgb15to32),
-            FUNC(2, 3, rgb16to24),
-            FUNC(2, 4, rgb16to32),
-            FUNC(3, 2, rgb24to15),
-            FUNC(3, 2, rgb24to16),
-            FUNC(3, 4, rgb24to32),
-            FUNC(4, 2, rgb32to15),
-            FUNC(4, 2, rgb32to16),
-            FUNC(4, 3, rgb32to24),
-            FUNC(2, 2, rgb16to15),
-            FUNC(2, 2, rgb12tobgr12),
-            FUNC(2, 2, rgb15tobgr15),
-            FUNC(2, 2, rgb15tobgr16),
-            FUNC(2, 3, rgb15tobgr24),
-            FUNC(2, 4, rgb15tobgr32),
-            FUNC(2, 2, rgb16tobgr15),
-            FUNC(2, 2, rgb16tobgr16),
-            FUNC(2, 3, rgb16tobgr24),
-            FUNC(2, 4, rgb16tobgr32),
-            FUNC(3, 2, rgb24tobgr15),
-            FUNC(3, 2, rgb24tobgr16),
-            FUNC(3, 3, rgb24tobgr24),
-            FUNC(3, 4, rgb24tobgr32),
-            FUNC(4, 2, rgb32tobgr15),
-            FUNC(4, 2, rgb32tobgr16),
-            FUNC(4, 3, rgb32tobgr24),
-            FUNC(4, 4, shuffle_bytes_2103), /* rgb32tobgr32 */
-            FUNC(6, 6, rgb48tobgr48_nobswap),
-            FUNC(6, 6, rgb48tobgr48_bswap),
-            FUNC(8, 6, rgb64to48_nobswap),
-            FUNC(8, 6, rgb64to48_bswap),
-            FUNC(8, 6, rgb64tobgr48_nobswap),
-            FUNC(8, 6, rgb64tobgr48_bswap),
-            FUNC(0, 0, NULL)
-        };
-        int width;
-        int failed = 0;
-        int srcBpp = 0;
-        int dstBpp = 0;
-
-        if (!func_info[funcNum].func)
-            break;
-
-        av_log(NULL, AV_LOG_INFO, ".");
-        memset(srcBuffer, srcByte, SIZE);
-
-        for (width = 63; width > 0; width--) {
-            int dstOffset;
-            for (dstOffset = 128; dstOffset < 196; dstOffset += 4) {
-                int srcOffset;
-                memset(dstBuffer, dstByte, SIZE);
-
-                for (srcOffset = 128; srcOffset < 196; srcOffset += 4) {
-                    uint8_t *src     = srcBuffer + srcOffset;
-                    uint8_t *dst     = dstBuffer + dstOffset;
-                    const char *name = NULL;
-
-                    // don't fill the screen with shit ...
-                    if (failed)
-                        break;
-
-                    srcBpp = func_info[funcNum].src_bpp;
-                    dstBpp = func_info[funcNum].dst_bpp;
-                    name   = func_info[funcNum].name;
-
-                    func_info[funcNum].func(src, dst, width * srcBpp);
-
-                    if (!srcBpp)
-                        break;
-
-                    for (i = 0; i < SIZE; i++) {
-                        if (srcBuffer[i] != srcByte) {
-                            av_log(NULL, AV_LOG_INFO,
-                                   "src damaged at %d w:%d src:%d dst:%d %s\n",
-                                   i, width, srcOffset, dstOffset, name);
-                            failed = 1;
-                            break;
-                        }
-                    }
-                    for (i = 0; i < dstOffset; i++) {
-                        if (dstBuffer[i] != dstByte) {
-                            av_log(NULL, AV_LOG_INFO,
-                                   "dst damaged at %d w:%d src:%d dst:%d %s\n",
-                                   i, width, srcOffset, dstOffset, name);
-                            failed = 1;
-                            break;
-                        }
-                    }
-                    for (i = dstOffset + width * dstBpp; i < SIZE; i++) {
-                        if (dstBuffer[i] != dstByte) {
-                            av_log(NULL, AV_LOG_INFO,
-                                   "dst damaged at %d w:%d src:%d dst:%d %s\n",
-                                   i, width, srcOffset, dstOffset, name);
-                            failed = 1;
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-        if (failed)
-            failedNum++;
-        else if (srcBpp)
-            passedNum++;
-    }
-
-    av_log(NULL, AV_LOG_INFO,
-           "\n%d converters passed, %d converters randomly overwrote memory\n",
-           passedNum, failedNum);
-    return failedNum;
-}
diff --git a/ffmpeg/libswscale/input.c b/ffmpeg/libswscale/input.c
deleted file mode 100644
index 919b232..0000000
--- a/ffmpeg/libswscale/input.c
+++ /dev/null
@@ -1,1307 +0,0 @@
-/*
- * Copyright (C) 2001-2012 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <math.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "libavutil/avutil.h"
-#include "libavutil/bswap.h"
-#include "libavutil/cpu.h"
-#include "libavutil/intreadwrite.h"
-#include "libavutil/mathematics.h"
-#include "libavutil/pixdesc.h"
-#include "libavutil/avassert.h"
-#include "config.h"
-#include "rgb2rgb.h"
-#include "swscale.h"
-#include "swscale_internal.h"
-
-#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
-
-#define r ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE) ? b_r : r_b)
-#define b ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE) ? r_b : b_r)
-
-static av_always_inline void
-rgb64ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
-                    enum AVPixelFormat origin, int32_t *rgb2yuv)
-{
-    int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
-    int i;
-    for (i = 0; i < width; i++) {
-        unsigned int r_b = input_pixel(&src[i*4+0]);
-        unsigned int   g = input_pixel(&src[i*4+1]);
-        unsigned int b_r = input_pixel(&src[i*4+2]);
-
-        dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
-    }
-}
-
-static av_always_inline void
-rgb64ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
-                    const uint16_t *src1, const uint16_t *src2,
-                    int width, enum AVPixelFormat origin, int32_t *rgb2yuv)
-{
-    int i;
-    int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
-    int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
-    av_assert1(src1==src2);
-    for (i = 0; i < width; i++) {
-        int r_b = input_pixel(&src1[i*4+0]);
-        int   g = input_pixel(&src1[i*4+1]);
-        int b_r = input_pixel(&src1[i*4+2]);
-
-        dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
-        dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
-    }
-}
-
-static av_always_inline void
-rgb64ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
-                          const uint16_t *src1, const uint16_t *src2,
-                          int width, enum AVPixelFormat origin, int32_t *rgb2yuv)
-{
-    int i;
-    int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
-    int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
-    av_assert1(src1==src2);
-    for (i = 0; i < width; i++) {
-        int r_b = (input_pixel(&src1[8 * i + 0]) + input_pixel(&src1[8 * i + 4]) + 1) >> 1;
-        int   g = (input_pixel(&src1[8 * i + 1]) + input_pixel(&src1[8 * i + 5]) + 1) >> 1;
-        int b_r = (input_pixel(&src1[8 * i + 2]) + input_pixel(&src1[8 * i + 6]) + 1) >> 1;
-
-        dstU[i]= (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
-        dstV[i]= (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
-    }
-}
-
-#define rgb64funcs(pattern, BE_LE, origin) \
-static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\
-                                    int width, uint32_t *rgb2yuv) \
-{ \
-    const uint16_t *src = (const uint16_t *) _src; \
-    uint16_t *dst = (uint16_t *) _dst; \
-    rgb64ToY_c_template(dst, src, width, origin, rgb2yuv); \
-} \
- \
-static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
-                                    const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
-                                    int width, uint32_t *rgb2yuv) \
-{ \
-    const uint16_t *src1 = (const uint16_t *) _src1, \
-                   *src2 = (const uint16_t *) _src2; \
-    uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
-    rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \
-} \
- \
-static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
-                                    const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
-                                    int width, uint32_t *rgb2yuv) \
-{ \
-    const uint16_t *src1 = (const uint16_t *) _src1, \
-                   *src2 = (const uint16_t *) _src2; \
-    uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
-    rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \
-}
-
-rgb64funcs(rgb, LE, AV_PIX_FMT_RGBA64LE)
-rgb64funcs(rgb, BE, AV_PIX_FMT_RGBA64BE)
-
-static av_always_inline void rgb48ToY_c_template(uint16_t *dst,
-                                                 const uint16_t *src, int width,
-                                                 enum AVPixelFormat origin,
-                                                 int32_t *rgb2yuv)
-{
-    int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
-    int i;
-    for (i = 0; i < width; i++) {
-        unsigned int r_b = input_pixel(&src[i * 3 + 0]);
-        unsigned int g   = input_pixel(&src[i * 3 + 1]);
-        unsigned int b_r = input_pixel(&src[i * 3 + 2]);
-
-        dst[i] = (ry*r + gy*g + by*b + (0x2001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
-    }
-}
-
-static av_always_inline void rgb48ToUV_c_template(uint16_t *dstU,
-                                                  uint16_t *dstV,
-                                                  const uint16_t *src1,
-                                                  const uint16_t *src2,
-                                                  int width,
-                                                  enum AVPixelFormat origin,
-                                                  int32_t *rgb2yuv)
-{
-    int i;
-    int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
-    int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
-    av_assert1(src1 == src2);
-    for (i = 0; i < width; i++) {
-        int r_b = input_pixel(&src1[i * 3 + 0]);
-        int g   = input_pixel(&src1[i * 3 + 1]);
-        int b_r = input_pixel(&src1[i * 3 + 2]);
-
-        dstU[i] = (ru*r + gu*g + bu*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
-        dstV[i] = (rv*r + gv*g + bv*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
-    }
-}
-
-static av_always_inline void rgb48ToUV_half_c_template(uint16_t *dstU,
-                                                       uint16_t *dstV,
-                                                       const uint16_t *src1,
-                                                       const uint16_t *src2,
-                                                       int width,
-                                                       enum AVPixelFormat origin,
-                                                       int32_t *rgb2yuv)
-{
-    int i;
-    int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
-    int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
-    av_assert1(src1 == src2);
-    for (i = 0; i < width; i++) {
-        int r_b = (input_pixel(&src1[6 * i + 0]) +
-                   input_pixel(&src1[6 * i + 3]) + 1) >> 1;
-        int g   = (input_pixel(&src1[6 * i + 1]) +
-                   input_pixel(&src1[6 * i + 4]) + 1) >> 1;
-        int b_r = (input_pixel(&src1[6 * i + 2]) +
-                   input_pixel(&src1[6 * i + 5]) + 1) >> 1;
-
-        dstU[i] = (ru*r + gu*g + bu*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
-        dstV[i] = (rv*r + gv*g + bv*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
-    }
-}
-
-#undef r
-#undef b
-#undef input_pixel
-
-#define rgb48funcs(pattern, BE_LE, origin)                              \
-static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst,              \
-                                            const uint8_t *_src,        \
-                                            const uint8_t *unused0, const uint8_t *unused1,\
-                                            int width,                  \
-                                            uint32_t *rgb2yuv)          \
-{                                                                       \
-    const uint16_t *src = (const uint16_t *)_src;                       \
-    uint16_t *dst       = (uint16_t *)_dst;                             \
-    rgb48ToY_c_template(dst, src, width, origin, rgb2yuv);              \
-}                                                                       \
-                                                                        \
-static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU,            \
-                                             uint8_t *_dstV,            \
-                                             const uint8_t *unused0,    \
-                                             const uint8_t *_src1,      \
-                                             const uint8_t *_src2,      \
-                                             int width,                 \
-                                             uint32_t *rgb2yuv)         \
-{                                                                       \
-    const uint16_t *src1 = (const uint16_t *)_src1,                     \
-                   *src2 = (const uint16_t *)_src2;                     \
-    uint16_t *dstU = (uint16_t *)_dstU,                                 \
-             *dstV = (uint16_t *)_dstV;                                 \
-    rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv);        \
-}                                                                       \
-                                                                        \
-static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU,       \
-                                                  uint8_t *_dstV,       \
-                                                  const uint8_t *unused0,    \
-                                                  const uint8_t *_src1, \
-                                                  const uint8_t *_src2, \
-                                                  int width,            \
-                                                  uint32_t *rgb2yuv)    \
-{                                                                       \
-    const uint16_t *src1 = (const uint16_t *)_src1,                     \
-                   *src2 = (const uint16_t *)_src2;                     \
-    uint16_t *dstU = (uint16_t *)_dstU,                                 \
-             *dstV = (uint16_t *)_dstV;                                 \
-    rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv);   \
-}
-
-rgb48funcs(rgb, LE, AV_PIX_FMT_RGB48LE)
-rgb48funcs(rgb, BE, AV_PIX_FMT_RGB48BE)
-rgb48funcs(bgr, LE, AV_PIX_FMT_BGR48LE)
-rgb48funcs(bgr, BE, AV_PIX_FMT_BGR48BE)
-
-#define input_pixel(i) ((origin == AV_PIX_FMT_RGBA ||                      \
-                         origin == AV_PIX_FMT_BGRA ||                      \
-                         origin == AV_PIX_FMT_ARGB ||                      \
-                         origin == AV_PIX_FMT_ABGR)                        \
-                        ? AV_RN32A(&src[(i) * 4])                       \
-                        : (isBE(origin) ? AV_RB16(&src[(i) * 2])        \
-                                        : AV_RL16(&src[(i) * 2])))
-
-static av_always_inline void rgb16_32ToY_c_template(int16_t *dst,
-                                                    const uint8_t *src,
-                                                    int width,
-                                                    enum AVPixelFormat origin,
-                                                    int shr, int shg,
-                                                    int shb, int shp,
-                                                    int maskr, int maskg,
-                                                    int maskb, int rsh,
-                                                    int gsh, int bsh, int S,
-                                                    int32_t *rgb2yuv)
-{
-    const int ry       = rgb2yuv[RY_IDX]<<rsh, gy = rgb2yuv[GY_IDX]<<gsh, by = rgb2yuv[BY_IDX]<<bsh;
-    const unsigned rnd = (32<<((S)-1)) + (1<<(S-7));
-    int i;
-
-    for (i = 0; i < width; i++) {
-        int px = input_pixel(i) >> shp;
-        int b  = (px & maskb) >> shb;
-        int g  = (px & maskg) >> shg;
-        int r  = (px & maskr) >> shr;
-
-        dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
-    }
-}
-
-static av_always_inline void rgb16_32ToUV_c_template(int16_t *dstU,
-                                                     int16_t *dstV,
-                                                     const uint8_t *src,
-                                                     int width,
-                                                     enum AVPixelFormat origin,
-                                                     int shr, int shg,
-                                                     int shb, int shp,
-                                                     int maskr, int maskg,
-                                                     int maskb, int rsh,
-                                                     int gsh, int bsh, int S,
-                                                     int32_t *rgb2yuv)
-{
-    const int ru       = rgb2yuv[RU_IDX] << rsh, gu = rgb2yuv[GU_IDX] << gsh, bu = rgb2yuv[BU_IDX] << bsh,
-              rv       = rgb2yuv[RV_IDX] << rsh, gv = rgb2yuv[GV_IDX] << gsh, bv = rgb2yuv[BV_IDX] << bsh;
-    const unsigned rnd = (256u<<((S)-1)) + (1<<(S-7));
-    int i;
-
-    for (i = 0; i < width; i++) {
-        int px = input_pixel(i) >> shp;
-        int b  = (px & maskb)   >> shb;
-        int g  = (px & maskg)   >> shg;
-        int r  = (px & maskr)   >> shr;
-
-        dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
-        dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
-    }
-}
-
-static av_always_inline void rgb16_32ToUV_half_c_template(int16_t *dstU,
-                                                          int16_t *dstV,
-                                                          const uint8_t *src,
-                                                          int width,
-                                                          enum AVPixelFormat origin,
-                                                          int shr, int shg,
-                                                          int shb, int shp,
-                                                          int maskr, int maskg,
-                                                          int maskb, int rsh,
-                                                          int gsh, int bsh, int S,
-                                                          int32_t *rgb2yuv)
-{
-    const int ru       = rgb2yuv[RU_IDX] << rsh, gu = rgb2yuv[GU_IDX] << gsh, bu = rgb2yuv[BU_IDX] << bsh,
-              rv       = rgb2yuv[RV_IDX] << rsh, gv = rgb2yuv[GV_IDX] << gsh, bv = rgb2yuv[BV_IDX] << bsh,
-              maskgx   = ~(maskr | maskb);
-    const unsigned rnd = (256U<<(S)) + (1<<(S-6));
-    int i;
-
-    maskr |= maskr << 1;
-    maskb |= maskb << 1;
-    maskg |= maskg << 1;
-    for (i = 0; i < width; i++) {
-        int px0 = input_pixel(2 * i + 0) >> shp;
-        int px1 = input_pixel(2 * i + 1) >> shp;
-        int b, r, g = (px0 & maskgx) + (px1 & maskgx);
-        int rb = px0 + px1 - g;
-
-        b = (rb & maskb) >> shb;
-        if (shp ||
-            origin == AV_PIX_FMT_BGR565LE || origin == AV_PIX_FMT_BGR565BE ||
-            origin == AV_PIX_FMT_RGB565LE || origin == AV_PIX_FMT_RGB565BE) {
-            g >>= shg;
-        } else {
-            g = (g & maskg) >> shg;
-        }
-        r = (rb & maskr) >> shr;
-
-        dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
-        dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
-    }
-}
-
-#undef input_pixel
-
-#define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr,          \
-                         maskg, maskb, rsh, gsh, bsh, S)                \
-static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,            \
-                          int width, uint32_t *tab)                     \
-{                                                                       \
-    rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, shr, shg, shb, shp,    \
-                           maskr, maskg, maskb, rsh, gsh, bsh, S, tab); \
-}                                                                       \
-                                                                        \
-static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV,                \
-                           const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy,    \
-                           int width, uint32_t *tab)                    \
-{                                                                       \
-    rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt,                \
-                            shr, shg, shb, shp,                         \
-                            maskr, maskg, maskb, rsh, gsh, bsh, S, tab);\
-}                                                                       \
-                                                                        \
-static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV,           \
-                                const uint8_t *unused0, const uint8_t *src,                     \
-                                const uint8_t *dummy,                   \
-                                int width, uint32_t *tab)               \
-{                                                                       \
-    rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt,           \
-                                 shr, shg, shb, shp,                    \
-                                 maskr, maskg, maskb,                   \
-                                 rsh, gsh, bsh, S, tab);                \
-}
-
-rgb16_32_wrapper(AV_PIX_FMT_BGR32,    bgr32,  16, 0,  0, 0, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_BGR32_1,  bgr321, 16, 0,  0, 8, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_RGB32,    rgb32,   0, 0, 16, 0,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_RGB32_1,  rgb321,  0, 0, 16, 8,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_BGR565LE, bgr16le, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_BGR555LE, bgr15le, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT + 7)
-rgb16_32_wrapper(AV_PIX_FMT_BGR444LE, bgr12le, 0, 0,  0, 0,   0x000F, 0x00F0,   0x0F00,  8, 4,  0, RGB2YUV_SHIFT + 4)
-rgb16_32_wrapper(AV_PIX_FMT_RGB565LE, rgb16le, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_RGB555LE, rgb15le, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT + 7)
-rgb16_32_wrapper(AV_PIX_FMT_RGB444LE, rgb12le, 0, 0,  0, 0,   0x0F00, 0x00F0,   0x000F,  0, 4,  8, RGB2YUV_SHIFT + 4)
-rgb16_32_wrapper(AV_PIX_FMT_BGR565BE, bgr16be, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_BGR555BE, bgr15be, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT + 7)
-rgb16_32_wrapper(AV_PIX_FMT_BGR444BE, bgr12be, 0, 0,  0, 0,   0x000F, 0x00F0,   0x0F00,  8, 4,  0, RGB2YUV_SHIFT + 4)
-rgb16_32_wrapper(AV_PIX_FMT_RGB565BE, rgb16be, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_RGB555BE, rgb15be, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT + 7)
-rgb16_32_wrapper(AV_PIX_FMT_RGB444BE, rgb12be, 0, 0,  0, 0,   0x0F00, 0x00F0,   0x000F,  0, 4,  8, RGB2YUV_SHIFT + 4)
-
-static void gbr24pToUV_half_c(uint8_t *_dstU, uint8_t *_dstV,
-                         const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
-                         int width, uint32_t *rgb2yuv)
-{
-    uint16_t *dstU = (uint16_t *)_dstU;
-    uint16_t *dstV = (uint16_t *)_dstV;
-    int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
-    int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
-
-    int i;
-    for (i = 0; i < width; i++) {
-        unsigned int g   = gsrc[2*i] + gsrc[2*i+1];
-        unsigned int b   = bsrc[2*i] + bsrc[2*i+1];
-        unsigned int r   = rsrc[2*i] + rsrc[2*i+1];
-
-        dstU[i] = (ru*r + gu*g + bu*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
-        dstV[i] = (rv*r + gv*g + bv*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
-    }
-}
-
-static void rgba64ToA_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1,
-                        const uint8_t *unused2, int width, uint32_t *unused)
-{
-    int16_t *dst = (int16_t *)_dst;
-    const uint16_t *src = (const uint16_t *)_src;
-    int i;
-    for (i = 0; i < width; i++)
-        dst[i] = src[4 * i + 3];
-}
-
-static void abgrToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
-{
-    int16_t *dst = (int16_t *)_dst;
-    int i;
-    for (i=0; i<width; i++) {
-        dst[i]= src[4*i]<<6;
-    }
-}
-
-static void rgbaToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
-{
-    int16_t *dst = (int16_t *)_dst;
-    int i;
-    for (i=0; i<width; i++) {
-        dst[i]= src[4*i+3]<<6;
-    }
-}
-
-static void palToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal)
-{
-    int16_t *dst = (int16_t *)_dst;
-    int i;
-    for (i=0; i<width; i++) {
-        int d= src[i];
-
-        dst[i]= (pal[d] >> 24)<<6;
-    }
-}
-
-static void palToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal)
-{
-    int16_t *dst = (int16_t *)_dst;
-    int i;
-    for (i = 0; i < width; i++) {
-        int d = src[i];
-
-        dst[i] = (pal[d] & 0xFF)<<6;
-    }
-}
-
-static void palToUV_c(uint8_t *_dstU, uint8_t *_dstV,
-                           const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
-                      int width, uint32_t *pal)
-{
-    uint16_t *dstU = (uint16_t *)_dstU;
-    int16_t *dstV = (int16_t *)_dstV;
-    int i;
-    av_assert1(src1 == src2);
-    for (i = 0; i < width; i++) {
-        int p = pal[src1[i]];
-
-        dstU[i] = (uint8_t)(p>> 8)<<6;
-        dstV[i] = (uint8_t)(p>>16)<<6;
-    }
-}
-
-static void monowhite2Y_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,  int width, uint32_t *unused)
-{
-    int16_t *dst = (int16_t *)_dst;
-    int i, j;
-    width = (width + 7) >> 3;
-    for (i = 0; i < width; i++) {
-        int d = ~src[i];
-        for (j = 0; j < 8; j++)
-            dst[8*i+j]= ((d>>(7-j))&1) * 16383;
-    }
-    if(width&7){
-        int d= ~src[i];
-        for (j = 0; j < (width&7); j++)
-            dst[8*i+j]= ((d>>(7-j))&1) * 16383;
-    }
-}
-
-static void monoblack2Y_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,  int width, uint32_t *unused)
-{
-    int16_t *dst = (int16_t *)_dst;
-    int i, j;
-    width = (width + 7) >> 3;
-    for (i = 0; i < width; i++) {
-        int d = src[i];
-        for (j = 0; j < 8; j++)
-            dst[8*i+j]= ((d>>(7-j))&1) * 16383;
-    }
-    if(width&7){
-        int d = src[i];
-        for (j = 0; j < (width&7); j++)
-            dst[8*i+j] = ((d>>(7-j))&1) * 16383;
-    }
-}
-
-static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,  int width,
-                      uint32_t *unused)
-{
-    int i;
-    for (i = 0; i < width; i++)
-        dst[i] = src[2 * i];
-}
-
-static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
-                       const uint8_t *src2, int width, uint32_t *unused)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        dstU[i] = src1[4 * i + 1];
-        dstV[i] = src1[4 * i + 3];
-    }
-    av_assert1(src1 == src2);
-}
-
-static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2,  int width,
-                       uint32_t *unused)
-{
-    int i;
-    const uint16_t *src = (const uint16_t *)_src;
-    uint16_t *dst       = (uint16_t *)_dst;
-    for (i = 0; i < width; i++)
-        dst[i] = av_bswap16(src[i]);
-}
-
-static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *_src1,
-                        const uint8_t *_src2, int width, uint32_t *unused)
-{
-    int i;
-    const uint16_t *src1 = (const uint16_t *)_src1,
-    *src2                = (const uint16_t *)_src2;
-    uint16_t *dstU       = (uint16_t *)_dstU, *dstV = (uint16_t *)_dstV;
-    for (i = 0; i < width; i++) {
-        dstU[i] = av_bswap16(src1[i]);
-        dstV[i] = av_bswap16(src2[i]);
-    }
-}
-
-/* This is almost identical to the previous, end exists only because
- * yuy2ToY/UV)(dst, src + 1, ...) would have 100% unaligned accesses. */
-static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,  int width,
-                      uint32_t *unused)
-{
-    int i;
-    for (i = 0; i < width; i++)
-        dst[i] = src[2 * i + 1];
-}
-
-static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
-                       const uint8_t *src2, int width, uint32_t *unused)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        dstU[i] = src1[4 * i + 0];
-        dstV[i] = src1[4 * i + 2];
-    }
-    av_assert1(src1 == src2);
-}
-
-static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
-                                        const uint8_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        dst1[i] = src[2 * i + 0];
-        dst2[i] = src[2 * i + 1];
-    }
-}
-
-static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
-                       const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
-                       int width, uint32_t *unused)
-{
-    nvXXtoUV_c(dstU, dstV, src1, width);
-}
-
-static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
-                       const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
-                       int width, uint32_t *unused)
-{
-    nvXXtoUV_c(dstV, dstU, src1, width);
-}
-
-#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
-
-static void bgr24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
-                       int width, uint32_t *rgb2yuv)
-{
-    int16_t *dst = (int16_t *)_dst;
-    int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
-    int i;
-    for (i = 0; i < width; i++) {
-        int b = src[i * 3 + 0];
-        int g = src[i * 3 + 1];
-        int r = src[i * 3 + 2];
-
-        dst[i] = ((ry*r + gy*g + by*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
-    }
-}
-
-static void bgr24ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1,
-                        const uint8_t *src2, int width, uint32_t *rgb2yuv)
-{
-    int16_t *dstU = (int16_t *)_dstU;
-    int16_t *dstV = (int16_t *)_dstV;
-    int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
-    int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
-    int i;
-    for (i = 0; i < width; i++) {
-        int b = src1[3 * i + 0];
-        int g = src1[3 * i + 1];
-        int r = src1[3 * i + 2];
-
-        dstU[i] = (ru*r + gu*g + bu*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
-        dstV[i] = (rv*r + gv*g + bv*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
-    }
-    av_assert1(src1 == src2);
-}
-
-static void bgr24ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1,
-                             const uint8_t *src2, int width, uint32_t *rgb2yuv)
-{
-    int16_t *dstU = (int16_t *)_dstU;
-    int16_t *dstV = (int16_t *)_dstV;
-    int i;
-    int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
-    int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
-    for (i = 0; i < width; i++) {
-        int b = src1[6 * i + 0] + src1[6 * i + 3];
-        int g = src1[6 * i + 1] + src1[6 * i + 4];
-        int r = src1[6 * i + 2] + src1[6 * i + 5];
-
-        dstU[i] = (ru*r + gu*g + bu*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
-        dstV[i] = (rv*r + gv*g + bv*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
-    }
-    av_assert1(src1 == src2);
-}
-
-static void rgb24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
-                       uint32_t *rgb2yuv)
-{
-    int16_t *dst = (int16_t *)_dst;
-    int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
-    int i;
-    for (i = 0; i < width; i++) {
-        int r = src[i * 3 + 0];
-        int g = src[i * 3 + 1];
-        int b = src[i * 3 + 2];
-
-        dst[i] = ((ry*r + gy*g + by*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
-    }
-}
-
-static void rgb24ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1,
-                        const uint8_t *src2, int width, uint32_t *rgb2yuv)
-{
-    int16_t *dstU = (int16_t *)_dstU;
-    int16_t *dstV = (int16_t *)_dstV;
-    int i;
-    int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
-    int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
-    av_assert1(src1 == src2);
-    for (i = 0; i < width; i++) {
-        int r = src1[3 * i + 0];
-        int g = src1[3 * i + 1];
-        int b = src1[3 * i + 2];
-
-        dstU[i] = (ru*r + gu*g + bu*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
-        dstV[i] = (rv*r + gv*g + bv*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
-    }
-}
-
-static void rgb24ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1,
-                             const uint8_t *src2, int width, uint32_t *rgb2yuv)
-{
-    int16_t *dstU = (int16_t *)_dstU;
-    int16_t *dstV = (int16_t *)_dstV;
-    int i;
-    int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
-    int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
-    av_assert1(src1 == src2);
-    for (i = 0; i < width; i++) {
-        int r = src1[6 * i + 0] + src1[6 * i + 3];
-        int g = src1[6 * i + 1] + src1[6 * i + 4];
-        int b = src1[6 * i + 2] + src1[6 * i + 5];
-
-        dstU[i] = (ru*r + gu*g + bu*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
-        dstV[i] = (rv*r + gv*g + bv*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
-    }
-}
-
-static void planar_rgb_to_y(uint8_t *_dst, const uint8_t *src[4], int width, int32_t *rgb2yuv)
-{
-    uint16_t *dst = (uint16_t *)_dst;
-    int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
-    int i;
-    for (i = 0; i < width; i++) {
-        int g = src[0][i];
-        int b = src[1][i];
-        int r = src[2][i];
-
-        dst[i] = (ry*r + gy*g + by*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
-    }
-}
-
-static void planar_rgb_to_a(uint8_t *_dst, const uint8_t *src[4], int width, int32_t *unused)
-{
-    uint16_t *dst = (uint16_t *)_dst;
-    int i;
-    for (i = 0; i < width; i++)
-        dst[i] = src[3][i] << 6;
-}
-
-static void planar_rgb_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *src[4], int width, int32_t *rgb2yuv)
-{
-    uint16_t *dstU = (uint16_t *)_dstU;
-    uint16_t *dstV = (uint16_t *)_dstV;
-    int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
-    int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
-    int i;
-    for (i = 0; i < width; i++) {
-        int g = src[0][i];
-        int b = src[1][i];
-        int r = src[2][i];
-
-        dstU[i] = (ru*r + gu*g + bu*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
-        dstV[i] = (rv*r + gv*g + bv*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
-    }
-}
-
-#define rdpx(src) \
-    is_be ? AV_RB16(src) : AV_RL16(src)
-static av_always_inline void planar_rgb16_to_y(uint8_t *_dst, const uint8_t *_src[4],
-                                               int width, int bpc, int is_be, int32_t *rgb2yuv)
-{
-    int i;
-    const uint16_t **src = (const uint16_t **)_src;
-    uint16_t *dst        = (uint16_t *)_dst;
-    int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
-    int shift = bpc < 16 ? bpc : 14;
-    for (i = 0; i < width; i++) {
-        int g = rdpx(src[0] + i);
-        int b = rdpx(src[1] + i);
-        int r = rdpx(src[2] + i);
-
-        dst[i] = ((ry*r + gy*g + by*b + (33 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14));
-    }
-}
-
-static av_always_inline void planar_rgb16_to_uv(uint8_t *_dstU, uint8_t *_dstV,
-                                                const uint8_t *_src[4], int width,
-                                                int bpc, int is_be, int32_t *rgb2yuv)
-{
-    int i;
-    const uint16_t **src = (const uint16_t **)_src;
-    uint16_t *dstU       = (uint16_t *)_dstU;
-    uint16_t *dstV       = (uint16_t *)_dstV;
-    int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
-    int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
-    int shift = bpc < 16 ? bpc : 14;
-    for (i = 0; i < width; i++) {
-        int g = rdpx(src[0] + i);
-        int b = rdpx(src[1] + i);
-        int r = rdpx(src[2] + i);
-
-        dstU[i] = (ru*r + gu*g + bu*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14);
-        dstV[i] = (rv*r + gv*g + bv*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14);
-    }
-}
-#undef rdpx
-
-#define rgb9plus_planar_funcs_endian(nbits, endian_name, endian)                                    \
-static void planar_rgb##nbits##endian_name##_to_y(uint8_t *dst, const uint8_t *src[4],              \
-                                                  int w, int32_t *rgb2yuv)                          \
-{                                                                                                   \
-    planar_rgb16_to_y(dst, src, w, nbits, endian, rgb2yuv);                                         \
-}                                                                                                   \
-static void planar_rgb##nbits##endian_name##_to_uv(uint8_t *dstU, uint8_t *dstV,                    \
-                                                   const uint8_t *src[4], int w, int32_t *rgb2yuv)  \
-{                                                                                                   \
-    planar_rgb16_to_uv(dstU, dstV, src, w, nbits, endian, rgb2yuv);                                 \
-}                                                                                                   \
-
-#define rgb9plus_planar_funcs(nbits)            \
-    rgb9plus_planar_funcs_endian(nbits, le, 0)  \
-    rgb9plus_planar_funcs_endian(nbits, be, 1)
-
-rgb9plus_planar_funcs(9)
-rgb9plus_planar_funcs(10)
-rgb9plus_planar_funcs(12)
-rgb9plus_planar_funcs(14)
-rgb9plus_planar_funcs(16)
-
-av_cold void ff_sws_init_input_funcs(SwsContext *c)
-{
-    enum AVPixelFormat srcFormat = c->srcFormat;
-
-    c->chrToYV12 = NULL;
-    switch (srcFormat) {
-    case AV_PIX_FMT_YUYV422:
-        c->chrToYV12 = yuy2ToUV_c;
-        break;
-    case AV_PIX_FMT_UYVY422:
-        c->chrToYV12 = uyvyToUV_c;
-        break;
-    case AV_PIX_FMT_NV12:
-        c->chrToYV12 = nv12ToUV_c;
-        break;
-    case AV_PIX_FMT_NV21:
-        c->chrToYV12 = nv21ToUV_c;
-        break;
-    case AV_PIX_FMT_RGB8:
-    case AV_PIX_FMT_BGR8:
-    case AV_PIX_FMT_PAL8:
-    case AV_PIX_FMT_BGR4_BYTE:
-    case AV_PIX_FMT_RGB4_BYTE:
-        c->chrToYV12 = palToUV_c;
-        break;
-    case AV_PIX_FMT_GBRP9LE:
-        c->readChrPlanar = planar_rgb9le_to_uv;
-        break;
-    case AV_PIX_FMT_GBRP10LE:
-        c->readChrPlanar = planar_rgb10le_to_uv;
-        break;
-    case AV_PIX_FMT_GBRP12LE:
-        c->readChrPlanar = planar_rgb12le_to_uv;
-        break;
-    case AV_PIX_FMT_GBRP14LE:
-        c->readChrPlanar = planar_rgb14le_to_uv;
-        break;
-    case AV_PIX_FMT_GBRAP16LE:
-    case AV_PIX_FMT_GBRP16LE:
-        c->readChrPlanar = planar_rgb16le_to_uv;
-        break;
-    case AV_PIX_FMT_GBRP9BE:
-        c->readChrPlanar = planar_rgb9be_to_uv;
-        break;
-    case AV_PIX_FMT_GBRP10BE:
-        c->readChrPlanar = planar_rgb10be_to_uv;
-        break;
-    case AV_PIX_FMT_GBRP12BE:
-        c->readChrPlanar = planar_rgb12be_to_uv;
-        break;
-    case AV_PIX_FMT_GBRP14BE:
-        c->readChrPlanar = planar_rgb14be_to_uv;
-        break;
-    case AV_PIX_FMT_GBRAP16BE:
-    case AV_PIX_FMT_GBRP16BE:
-        c->readChrPlanar = planar_rgb16be_to_uv;
-        break;
-    case AV_PIX_FMT_GBRAP:
-    case AV_PIX_FMT_GBRP:
-        c->readChrPlanar = planar_rgb_to_uv;
-        break;
-#if HAVE_BIGENDIAN
-    case AV_PIX_FMT_YUV444P9LE:
-    case AV_PIX_FMT_YUV422P9LE:
-    case AV_PIX_FMT_YUV420P9LE:
-    case AV_PIX_FMT_YUV422P10LE:
-    case AV_PIX_FMT_YUV444P10LE:
-    case AV_PIX_FMT_YUV420P10LE:
-    case AV_PIX_FMT_YUV422P12LE:
-    case AV_PIX_FMT_YUV444P12LE:
-    case AV_PIX_FMT_YUV420P12LE:
-    case AV_PIX_FMT_YUV422P14LE:
-    case AV_PIX_FMT_YUV444P14LE:
-    case AV_PIX_FMT_YUV420P14LE:
-    case AV_PIX_FMT_YUV420P16LE:
-    case AV_PIX_FMT_YUV422P16LE:
-    case AV_PIX_FMT_YUV444P16LE:
-
-    case AV_PIX_FMT_YUVA444P9LE:
-    case AV_PIX_FMT_YUVA422P9LE:
-    case AV_PIX_FMT_YUVA420P9LE:
-    case AV_PIX_FMT_YUVA444P10LE:
-    case AV_PIX_FMT_YUVA422P10LE:
-    case AV_PIX_FMT_YUVA420P10LE:
-    case AV_PIX_FMT_YUVA420P16LE:
-    case AV_PIX_FMT_YUVA422P16LE:
-    case AV_PIX_FMT_YUVA444P16LE:
-        c->chrToYV12 = bswap16UV_c;
-        break;
-#else
-    case AV_PIX_FMT_YUV444P9BE:
-    case AV_PIX_FMT_YUV422P9BE:
-    case AV_PIX_FMT_YUV420P9BE:
-    case AV_PIX_FMT_YUV444P10BE:
-    case AV_PIX_FMT_YUV422P10BE:
-    case AV_PIX_FMT_YUV420P10BE:
-    case AV_PIX_FMT_YUV444P12BE:
-    case AV_PIX_FMT_YUV422P12BE:
-    case AV_PIX_FMT_YUV420P12BE:
-    case AV_PIX_FMT_YUV444P14BE:
-    case AV_PIX_FMT_YUV422P14BE:
-    case AV_PIX_FMT_YUV420P14BE:
-    case AV_PIX_FMT_YUV420P16BE:
-    case AV_PIX_FMT_YUV422P16BE:
-    case AV_PIX_FMT_YUV444P16BE:
-
-    case AV_PIX_FMT_YUVA444P9BE:
-    case AV_PIX_FMT_YUVA422P9BE:
-    case AV_PIX_FMT_YUVA420P9BE:
-    case AV_PIX_FMT_YUVA444P10BE:
-    case AV_PIX_FMT_YUVA422P10BE:
-    case AV_PIX_FMT_YUVA420P10BE:
-    case AV_PIX_FMT_YUVA420P16BE:
-    case AV_PIX_FMT_YUVA422P16BE:
-    case AV_PIX_FMT_YUVA444P16BE:
-        c->chrToYV12 = bswap16UV_c;
-        break;
-#endif
-    }
-    if (c->chrSrcHSubSample) {
-        switch (srcFormat) {
-        case AV_PIX_FMT_RGBA64BE:
-            c->chrToYV12 = rgb64BEToUV_half_c;
-            break;
-        case AV_PIX_FMT_RGBA64LE:
-            c->chrToYV12 = rgb64LEToUV_half_c;
-            break;
-        case AV_PIX_FMT_RGB48BE:
-            c->chrToYV12 = rgb48BEToUV_half_c;
-            break;
-        case AV_PIX_FMT_RGB48LE:
-            c->chrToYV12 = rgb48LEToUV_half_c;
-            break;
-        case AV_PIX_FMT_BGR48BE:
-            c->chrToYV12 = bgr48BEToUV_half_c;
-            break;
-        case AV_PIX_FMT_BGR48LE:
-            c->chrToYV12 = bgr48LEToUV_half_c;
-            break;
-        case AV_PIX_FMT_RGB32:
-            c->chrToYV12 = bgr32ToUV_half_c;
-            break;
-        case AV_PIX_FMT_RGB32_1:
-            c->chrToYV12 = bgr321ToUV_half_c;
-            break;
-        case AV_PIX_FMT_BGR24:
-            c->chrToYV12 = bgr24ToUV_half_c;
-            break;
-        case AV_PIX_FMT_BGR565LE:
-            c->chrToYV12 = bgr16leToUV_half_c;
-            break;
-        case AV_PIX_FMT_BGR565BE:
-            c->chrToYV12 = bgr16beToUV_half_c;
-            break;
-        case AV_PIX_FMT_BGR555LE:
-            c->chrToYV12 = bgr15leToUV_half_c;
-            break;
-        case AV_PIX_FMT_BGR555BE:
-            c->chrToYV12 = bgr15beToUV_half_c;
-            break;
-        case AV_PIX_FMT_GBRAP:
-        case AV_PIX_FMT_GBRP:
-            c->chrToYV12 = gbr24pToUV_half_c;
-            break;
-        case AV_PIX_FMT_BGR444LE:
-            c->chrToYV12 = bgr12leToUV_half_c;
-            break;
-        case AV_PIX_FMT_BGR444BE:
-            c->chrToYV12 = bgr12beToUV_half_c;
-            break;
-        case AV_PIX_FMT_BGR32:
-            c->chrToYV12 = rgb32ToUV_half_c;
-            break;
-        case AV_PIX_FMT_BGR32_1:
-            c->chrToYV12 = rgb321ToUV_half_c;
-            break;
-        case AV_PIX_FMT_RGB24:
-            c->chrToYV12 = rgb24ToUV_half_c;
-            break;
-        case AV_PIX_FMT_RGB565LE:
-            c->chrToYV12 = rgb16leToUV_half_c;
-            break;
-        case AV_PIX_FMT_RGB565BE:
-            c->chrToYV12 = rgb16beToUV_half_c;
-            break;
-        case AV_PIX_FMT_RGB555LE:
-            c->chrToYV12 = rgb15leToUV_half_c;
-            break;
-        case AV_PIX_FMT_RGB555BE:
-            c->chrToYV12 = rgb15beToUV_half_c;
-            break;
-        case AV_PIX_FMT_RGB444LE:
-            c->chrToYV12 = rgb12leToUV_half_c;
-            break;
-        case AV_PIX_FMT_RGB444BE:
-            c->chrToYV12 = rgb12beToUV_half_c;
-            break;
-        }
-    } else {
-        switch (srcFormat) {
-        case AV_PIX_FMT_RGBA64BE:
-            c->chrToYV12 = rgb64BEToUV_c;
-            break;
-        case AV_PIX_FMT_RGBA64LE:
-            c->chrToYV12 = rgb64LEToUV_c;
-            break;
-        case AV_PIX_FMT_RGB48BE:
-            c->chrToYV12 = rgb48BEToUV_c;
-            break;
-        case AV_PIX_FMT_RGB48LE:
-            c->chrToYV12 = rgb48LEToUV_c;
-            break;
-        case AV_PIX_FMT_BGR48BE:
-            c->chrToYV12 = bgr48BEToUV_c;
-            break;
-        case AV_PIX_FMT_BGR48LE:
-            c->chrToYV12 = bgr48LEToUV_c;
-            break;
-        case AV_PIX_FMT_RGB32:
-            c->chrToYV12 = bgr32ToUV_c;
-            break;
-        case AV_PIX_FMT_RGB32_1:
-            c->chrToYV12 = bgr321ToUV_c;
-            break;
-        case AV_PIX_FMT_BGR24:
-            c->chrToYV12 = bgr24ToUV_c;
-            break;
-        case AV_PIX_FMT_BGR565LE:
-            c->chrToYV12 = bgr16leToUV_c;
-            break;
-        case AV_PIX_FMT_BGR565BE:
-            c->chrToYV12 = bgr16beToUV_c;
-            break;
-        case AV_PIX_FMT_BGR555LE:
-            c->chrToYV12 = bgr15leToUV_c;
-            break;
-        case AV_PIX_FMT_BGR555BE:
-            c->chrToYV12 = bgr15beToUV_c;
-            break;
-        case AV_PIX_FMT_BGR444LE:
-            c->chrToYV12 = bgr12leToUV_c;
-            break;
-        case AV_PIX_FMT_BGR444BE:
-            c->chrToYV12 = bgr12beToUV_c;
-            break;
-        case AV_PIX_FMT_BGR32:
-            c->chrToYV12 = rgb32ToUV_c;
-            break;
-        case AV_PIX_FMT_BGR32_1:
-            c->chrToYV12 = rgb321ToUV_c;
-            break;
-        case AV_PIX_FMT_RGB24:
-            c->chrToYV12 = rgb24ToUV_c;
-            break;
-        case AV_PIX_FMT_RGB565LE:
-            c->chrToYV12 = rgb16leToUV_c;
-            break;
-        case AV_PIX_FMT_RGB565BE:
-            c->chrToYV12 = rgb16beToUV_c;
-            break;
-        case AV_PIX_FMT_RGB555LE:
-            c->chrToYV12 = rgb15leToUV_c;
-            break;
-        case AV_PIX_FMT_RGB555BE:
-            c->chrToYV12 = rgb15beToUV_c;
-            break;
-        case AV_PIX_FMT_RGB444LE:
-            c->chrToYV12 = rgb12leToUV_c;
-            break;
-        case AV_PIX_FMT_RGB444BE:
-            c->chrToYV12 = rgb12beToUV_c;
-            break;
-        }
-    }
-
-    c->lumToYV12 = NULL;
-    c->alpToYV12 = NULL;
-    switch (srcFormat) {
-    case AV_PIX_FMT_GBRP9LE:
-        c->readLumPlanar = planar_rgb9le_to_y;
-        break;
-    case AV_PIX_FMT_GBRP10LE:
-        c->readLumPlanar = planar_rgb10le_to_y;
-        break;
-    case AV_PIX_FMT_GBRP12LE:
-        c->readLumPlanar = planar_rgb12le_to_y;
-        break;
-    case AV_PIX_FMT_GBRP14LE:
-        c->readLumPlanar = planar_rgb14le_to_y;
-        break;
-    case AV_PIX_FMT_GBRAP16LE:
-    case AV_PIX_FMT_GBRP16LE:
-        c->readLumPlanar = planar_rgb16le_to_y;
-        break;
-    case AV_PIX_FMT_GBRP9BE:
-        c->readLumPlanar = planar_rgb9be_to_y;
-        break;
-    case AV_PIX_FMT_GBRP10BE:
-        c->readLumPlanar = planar_rgb10be_to_y;
-        break;
-    case AV_PIX_FMT_GBRP12BE:
-        c->readLumPlanar = planar_rgb12be_to_y;
-        break;
-    case AV_PIX_FMT_GBRP14BE:
-        c->readLumPlanar = planar_rgb14be_to_y;
-        break;
-    case AV_PIX_FMT_GBRAP16BE:
-    case AV_PIX_FMT_GBRP16BE:
-        c->readLumPlanar = planar_rgb16be_to_y;
-        break;
-    case AV_PIX_FMT_GBRAP:
-        c->readAlpPlanar = planar_rgb_to_a;
-    case AV_PIX_FMT_GBRP:
-        c->readLumPlanar = planar_rgb_to_y;
-        break;
-#if HAVE_BIGENDIAN
-    case AV_PIX_FMT_YUV444P9LE:
-    case AV_PIX_FMT_YUV422P9LE:
-    case AV_PIX_FMT_YUV420P9LE:
-    case AV_PIX_FMT_YUV444P10LE:
-    case AV_PIX_FMT_YUV422P10LE:
-    case AV_PIX_FMT_YUV420P10LE:
-    case AV_PIX_FMT_YUV444P12LE:
-    case AV_PIX_FMT_YUV422P12LE:
-    case AV_PIX_FMT_YUV420P12LE:
-    case AV_PIX_FMT_YUV444P14LE:
-    case AV_PIX_FMT_YUV422P14LE:
-    case AV_PIX_FMT_YUV420P14LE:
-    case AV_PIX_FMT_YUV420P16LE:
-    case AV_PIX_FMT_YUV422P16LE:
-    case AV_PIX_FMT_YUV444P16LE:
-
-    case AV_PIX_FMT_GRAY16LE:
-        c->lumToYV12 = bswap16Y_c;
-        break;
-    case AV_PIX_FMT_YUVA444P9LE:
-    case AV_PIX_FMT_YUVA422P9LE:
-    case AV_PIX_FMT_YUVA420P9LE:
-    case AV_PIX_FMT_YUVA444P10LE:
-    case AV_PIX_FMT_YUVA422P10LE:
-    case AV_PIX_FMT_YUVA420P10LE:
-    case AV_PIX_FMT_YUVA420P16LE:
-    case AV_PIX_FMT_YUVA422P16LE:
-    case AV_PIX_FMT_YUVA444P16LE:
-        c->lumToYV12 = bswap16Y_c;
-        c->alpToYV12 = bswap16Y_c;
-        break;
-#else
-    case AV_PIX_FMT_YUV444P9BE:
-    case AV_PIX_FMT_YUV422P9BE:
-    case AV_PIX_FMT_YUV420P9BE:
-    case AV_PIX_FMT_YUV444P10BE:
-    case AV_PIX_FMT_YUV422P10BE:
-    case AV_PIX_FMT_YUV420P10BE:
-    case AV_PIX_FMT_YUV444P12BE:
-    case AV_PIX_FMT_YUV422P12BE:
-    case AV_PIX_FMT_YUV420P12BE:
-    case AV_PIX_FMT_YUV444P14BE:
-    case AV_PIX_FMT_YUV422P14BE:
-    case AV_PIX_FMT_YUV420P14BE:
-    case AV_PIX_FMT_YUV420P16BE:
-    case AV_PIX_FMT_YUV422P16BE:
-    case AV_PIX_FMT_YUV444P16BE:
-
-    case AV_PIX_FMT_GRAY16BE:
-        c->lumToYV12 = bswap16Y_c;
-        break;
-    case AV_PIX_FMT_YUVA444P9BE:
-    case AV_PIX_FMT_YUVA422P9BE:
-    case AV_PIX_FMT_YUVA420P9BE:
-    case AV_PIX_FMT_YUVA444P10BE:
-    case AV_PIX_FMT_YUVA422P10BE:
-    case AV_PIX_FMT_YUVA420P10BE:
-    case AV_PIX_FMT_YUVA420P16BE:
-    case AV_PIX_FMT_YUVA422P16BE:
-    case AV_PIX_FMT_YUVA444P16BE:
-        c->lumToYV12 = bswap16Y_c;
-        c->alpToYV12 = bswap16Y_c;
-        break;
-#endif
-    case AV_PIX_FMT_YUYV422:
-    case AV_PIX_FMT_Y400A:
-        c->lumToYV12 = yuy2ToY_c;
-        break;
-    case AV_PIX_FMT_UYVY422:
-        c->lumToYV12 = uyvyToY_c;
-        break;
-    case AV_PIX_FMT_BGR24:
-        c->lumToYV12 = bgr24ToY_c;
-        break;
-    case AV_PIX_FMT_BGR565LE:
-        c->lumToYV12 = bgr16leToY_c;
-        break;
-    case AV_PIX_FMT_BGR565BE:
-        c->lumToYV12 = bgr16beToY_c;
-        break;
-    case AV_PIX_FMT_BGR555LE:
-        c->lumToYV12 = bgr15leToY_c;
-        break;
-    case AV_PIX_FMT_BGR555BE:
-        c->lumToYV12 = bgr15beToY_c;
-        break;
-    case AV_PIX_FMT_BGR444LE:
-        c->lumToYV12 = bgr12leToY_c;
-        break;
-    case AV_PIX_FMT_BGR444BE:
-        c->lumToYV12 = bgr12beToY_c;
-        break;
-    case AV_PIX_FMT_RGB24:
-        c->lumToYV12 = rgb24ToY_c;
-        break;
-    case AV_PIX_FMT_RGB565LE:
-        c->lumToYV12 = rgb16leToY_c;
-        break;
-    case AV_PIX_FMT_RGB565BE:
-        c->lumToYV12 = rgb16beToY_c;
-        break;
-    case AV_PIX_FMT_RGB555LE:
-        c->lumToYV12 = rgb15leToY_c;
-        break;
-    case AV_PIX_FMT_RGB555BE:
-        c->lumToYV12 = rgb15beToY_c;
-        break;
-    case AV_PIX_FMT_RGB444LE:
-        c->lumToYV12 = rgb12leToY_c;
-        break;
-    case AV_PIX_FMT_RGB444BE:
-        c->lumToYV12 = rgb12beToY_c;
-        break;
-    case AV_PIX_FMT_RGB8:
-    case AV_PIX_FMT_BGR8:
-    case AV_PIX_FMT_PAL8:
-    case AV_PIX_FMT_BGR4_BYTE:
-    case AV_PIX_FMT_RGB4_BYTE:
-        c->lumToYV12 = palToY_c;
-        break;
-    case AV_PIX_FMT_MONOBLACK:
-        c->lumToYV12 = monoblack2Y_c;
-        break;
-    case AV_PIX_FMT_MONOWHITE:
-        c->lumToYV12 = monowhite2Y_c;
-        break;
-    case AV_PIX_FMT_RGB32:
-        c->lumToYV12 = bgr32ToY_c;
-        break;
-    case AV_PIX_FMT_RGB32_1:
-        c->lumToYV12 = bgr321ToY_c;
-        break;
-    case AV_PIX_FMT_BGR32:
-        c->lumToYV12 = rgb32ToY_c;
-        break;
-    case AV_PIX_FMT_BGR32_1:
-        c->lumToYV12 = rgb321ToY_c;
-        break;
-    case AV_PIX_FMT_RGB48BE:
-        c->lumToYV12 = rgb48BEToY_c;
-        break;
-    case AV_PIX_FMT_RGB48LE:
-        c->lumToYV12 = rgb48LEToY_c;
-        break;
-    case AV_PIX_FMT_BGR48BE:
-        c->lumToYV12 = bgr48BEToY_c;
-        break;
-    case AV_PIX_FMT_BGR48LE:
-        c->lumToYV12 = bgr48LEToY_c;
-        break;
-    case AV_PIX_FMT_RGBA64BE:
-        c->lumToYV12 = rgb64BEToY_c;
-        break;
-    case AV_PIX_FMT_RGBA64LE:
-        c->lumToYV12 = rgb64LEToY_c;
-        break;
-    }
-    if (c->alpPixBuf) {
-        if (is16BPS(srcFormat) || isNBPS(srcFormat)) {
-            if (HAVE_BIGENDIAN == !isBE(srcFormat))
-                c->alpToYV12 = bswap16Y_c;
-        }
-        switch (srcFormat) {
-        case AV_PIX_FMT_RGBA64LE:
-        case AV_PIX_FMT_RGBA64BE:  c->alpToYV12 = rgba64ToA_c; break;
-        case AV_PIX_FMT_BGRA:
-        case AV_PIX_FMT_RGBA:
-            c->alpToYV12 = rgbaToA_c;
-            break;
-        case AV_PIX_FMT_ABGR:
-        case AV_PIX_FMT_ARGB:
-            c->alpToYV12 = abgrToA_c;
-            break;
-        case AV_PIX_FMT_Y400A:
-            c->alpToYV12 = uyvyToY_c;
-            break;
-        case AV_PIX_FMT_PAL8 :
-            c->alpToYV12 = palToA_c;
-            break;
-        }
-    }
-}
diff --git a/ffmpeg/libswscale/libswscale.pc b/ffmpeg/libswscale/libswscale.pc
deleted file mode 100644
index 63e7375..0000000
--- a/ffmpeg/libswscale/libswscale.pc
+++ /dev/null
@@ -1,14 +0,0 @@
-prefix=/usr/local
-exec_prefix=${prefix}
-libdir=${prefix}/lib
-includedir=${prefix}/include
-
-Name: libswscale
-Description: FFmpeg image rescaling library
-Version: 2.5.101
-Requires: 
-Requires.private: libavutil = 52.59.100
-Conflicts:
-Libs: -L${libdir}  -lswscale 
-Libs.private: -lm
-Cflags: -I${includedir}
diff --git a/ffmpeg/libswscale/libswscale.v b/ffmpeg/libswscale/libswscale.v
deleted file mode 100644
index 8b9a96a..0000000
--- a/ffmpeg/libswscale/libswscale.v
+++ /dev/null
@@ -1,4 +0,0 @@
-LIBSWSCALE_$MAJOR {
-        global: swscale_*; sws_*;
-        local: *;
-};
diff --git a/ffmpeg/libswscale/options.c b/ffmpeg/libswscale/options.c
deleted file mode 100644
index 9e8703f..0000000
--- a/ffmpeg/libswscale/options.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/avutil.h"
-#include "libavutil/opt.h"
-#include "libavutil/pixfmt.h"
-#include "swscale.h"
-#include "swscale_internal.h"
-
-static const char *sws_context_to_name(void *ptr)
-{
-    return "swscaler";
-}
-
-#define OFFSET(x) offsetof(SwsContext, x)
-#define DEFAULT 0
-#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
-
-static const AVOption swscale_options[] = {
-    { "sws_flags",       "scaler flags",                  OFFSET(flags),     AV_OPT_TYPE_FLAGS,  { .i64  = SWS_BICUBIC        }, 0,      UINT_MAX,        VE, "sws_flags" },
-    { "fast_bilinear",   "fast bilinear",                 0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_FAST_BILINEAR  }, INT_MIN, INT_MAX,        VE, "sws_flags" },
-    { "bilinear",        "bilinear",                      0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_BILINEAR       }, INT_MIN, INT_MAX,        VE, "sws_flags" },
-    { "bicubic",         "bicubic",                       0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_BICUBIC        }, INT_MIN, INT_MAX,        VE, "sws_flags" },
-    { "experimental",    "experimental",                  0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_X              }, INT_MIN, INT_MAX,        VE, "sws_flags" },
-    { "neighbor",        "nearest neighbor",              0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_POINT          }, INT_MIN, INT_MAX,        VE, "sws_flags" },
-    { "area",            "averaging area",                0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_AREA           }, INT_MIN, INT_MAX,        VE, "sws_flags" },
-    { "bicublin",        "luma bicubic, chroma bilinear", 0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_BICUBLIN       }, INT_MIN, INT_MAX,        VE, "sws_flags" },
-    { "gauss",           "gaussian",                      0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_GAUSS          }, INT_MIN, INT_MAX,        VE, "sws_flags" },
-    { "sinc",            "sinc",                          0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_SINC           }, INT_MIN, INT_MAX,        VE, "sws_flags" },
-    { "lanczos",         "lanczos",                       0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_LANCZOS        }, INT_MIN, INT_MAX,        VE, "sws_flags" },
-    { "spline",          "natural bicubic spline",        0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_SPLINE         }, INT_MIN, INT_MAX,        VE, "sws_flags" },
-    { "print_info",      "print info",                    0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_PRINT_INFO     }, INT_MIN, INT_MAX,        VE, "sws_flags" },
-    { "accurate_rnd",    "accurate rounding",             0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_ACCURATE_RND   }, INT_MIN, INT_MAX,        VE, "sws_flags" },
-    { "full_chroma_int", "full chroma interpolation",     0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_FULL_CHR_H_INT }, INT_MIN, INT_MAX,        VE, "sws_flags" },
-    { "full_chroma_inp", "full chroma input",             0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_FULL_CHR_H_INP }, INT_MIN, INT_MAX,        VE, "sws_flags" },
-    { "bitexact",        "",                              0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_BITEXACT       }, INT_MIN, INT_MAX,        VE, "sws_flags" },
-    { "error_diffusion", "error diffusion dither",        0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_ERROR_DIFFUSION}, INT_MIN, INT_MAX,        VE, "sws_flags" },
-
-    { "srcw",            "source width",                  OFFSET(srcW),      AV_OPT_TYPE_INT,    { .i64 = 16                 }, 1,       INT_MAX,        VE },
-    { "srch",            "source height",                 OFFSET(srcH),      AV_OPT_TYPE_INT,    { .i64 = 16                 }, 1,       INT_MAX,        VE },
-    { "dstw",            "destination width",             OFFSET(dstW),      AV_OPT_TYPE_INT,    { .i64 = 16                 }, 1,       INT_MAX,        VE },
-    { "dsth",            "destination height",            OFFSET(dstH),      AV_OPT_TYPE_INT,    { .i64 = 16                 }, 1,       INT_MAX,        VE },
-    { "src_format",      "source format",                 OFFSET(srcFormat), AV_OPT_TYPE_INT,    { .i64 = DEFAULT            }, 0,       AV_PIX_FMT_NB - 1, VE },
-    { "dst_format",      "destination format",            OFFSET(dstFormat), AV_OPT_TYPE_INT,    { .i64 = DEFAULT            }, 0,       AV_PIX_FMT_NB - 1, VE },
-    { "src_range",       "source range",                  OFFSET(srcRange),  AV_OPT_TYPE_INT,    { .i64 = DEFAULT            }, 0,       1,              VE },
-    { "dst_range",       "destination range",             OFFSET(dstRange),  AV_OPT_TYPE_INT,    { .i64 = DEFAULT            }, 0,       1,              VE },
-    { "param0",          "scaler param 0",                OFFSET(param[0]),  AV_OPT_TYPE_DOUBLE, { .dbl = SWS_PARAM_DEFAULT  }, INT_MIN, INT_MAX,        VE },
-    { "param1",          "scaler param 1",                OFFSET(param[1]),  AV_OPT_TYPE_DOUBLE, { .dbl = SWS_PARAM_DEFAULT  }, INT_MIN, INT_MAX,        VE },
-
-    { "src_v_chr_pos",   "source vertical chroma position in luma grid/256"  , OFFSET(src_v_chr_pos), AV_OPT_TYPE_INT, { .i64 = -1            }, -1,      512,             VE },
-    { "src_h_chr_pos",   "source horizontal chroma position in luma grid/256", OFFSET(src_h_chr_pos), AV_OPT_TYPE_INT, { .i64 = -1            }, -1,      512,             VE },
-    { "dst_v_chr_pos",   "destination vertical chroma position in luma grid/256"  , OFFSET(dst_v_chr_pos), AV_OPT_TYPE_INT, { .i64 = -1            }, -1,      512,             VE },
-    { "dst_h_chr_pos",   "destination horizontal chroma position in luma grid/256", OFFSET(dst_h_chr_pos), AV_OPT_TYPE_INT, { .i64 = -1            }, -1,      512,             VE },
-
-    { "sws_dither",      "set dithering algorithm",       OFFSET(dither),    AV_OPT_TYPE_INT,    { .i64  = SWS_DITHER_AUTO   }, 0,       NB_SWS_DITHER,  VE, "sws_dither" },
-    { "auto",            "leave choice to sws",           0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_DITHER_AUTO   }, INT_MIN, INT_MAX,        VE, "sws_dither" },
-    { "bayer",           "bayer dither",                  0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_DITHER_BAYER  }, INT_MIN, INT_MAX,        VE, "sws_dither" },
-    { "ed",              "error diffusion",               0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_DITHER_ED     }, INT_MIN, INT_MAX,        VE, "sws_dither" },
-
-    { NULL }
-};
-
-const AVClass sws_context_class = {
-    .class_name = "SWScaler",
-    .item_name  = sws_context_to_name,
-    .option     = swscale_options,
-    .category   = AV_CLASS_CATEGORY_SWSCALER,
-    .version    = LIBAVUTIL_VERSION_INT,
-};
-
-const AVClass *sws_get_class(void)
-{
-    return &sws_context_class;
-}
diff --git a/ffmpeg/libswscale/output.c b/ffmpeg/libswscale/output.c
deleted file mode 100644
index ddb0d0c..0000000
--- a/ffmpeg/libswscale/output.c
+++ /dev/null
@@ -1,2135 +0,0 @@
-/*
- * Copyright (C) 2001-2012 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <math.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "libavutil/attributes.h"
-#include "libavutil/avutil.h"
-#include "libavutil/avassert.h"
-#include "libavutil/bswap.h"
-#include "libavutil/cpu.h"
-#include "libavutil/intreadwrite.h"
-#include "libavutil/mathematics.h"
-#include "libavutil/pixdesc.h"
-#include "config.h"
-#include "rgb2rgb.h"
-#include "swscale.h"
-#include "swscale_internal.h"
-
-DECLARE_ALIGNED(8, const uint8_t, ff_dither_2x2_4)[][8] = {
-{  1,   3,   1,   3,   1,   3,   1,   3, },
-{  2,   0,   2,   0,   2,   0,   2,   0, },
-{  1,   3,   1,   3,   1,   3,   1,   3, },
-};
-
-DECLARE_ALIGNED(8, const uint8_t, ff_dither_2x2_8)[][8] = {
-{  6,   2,   6,   2,   6,   2,   6,   2, },
-{  0,   4,   0,   4,   0,   4,   0,   4, },
-{  6,   2,   6,   2,   6,   2,   6,   2, },
-};
-
-DECLARE_ALIGNED(8, const uint8_t, ff_dither_4x4_16)[][8] = {
-{  8,   4,  11,   7,   8,   4,  11,   7, },
-{  2,  14,   1,  13,   2,  14,   1,  13, },
-{ 10,   6,   9,   5,  10,   6,   9,   5, },
-{  0,  12,   3,  15,   0,  12,   3,  15, },
-{  8,   4,  11,   7,   8,   4,  11,   7, },
-};
-
-DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_32)[][8] = {
-{ 17,   9,  23,  15,  16,   8,  22,  14, },
-{  5,  29,   3,  27,   4,  28,   2,  26, },
-{ 21,  13,  19,  11,  20,  12,  18,  10, },
-{  0,  24,   6,  30,   1,  25,   7,  31, },
-{ 16,   8,  22,  14,  17,   9,  23,  15, },
-{  4,  28,   2,  26,   5,  29,   3,  27, },
-{ 20,  12,  18,  10,  21,  13,  19,  11, },
-{  1,  25,   7,  31,   0,  24,   6,  30, },
-{ 17,   9,  23,  15,  16,   8,  22,  14, },
-};
-
-DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_73)[][8] = {
-{  0,  55,  14,  68,   3,  58,  17,  72, },
-{ 37,  18,  50,  32,  40,  22,  54,  35, },
-{  9,  64,   5,  59,  13,  67,   8,  63, },
-{ 46,  27,  41,  23,  49,  31,  44,  26, },
-{  2,  57,  16,  71,   1,  56,  15,  70, },
-{ 39,  21,  52,  34,  38,  19,  51,  33, },
-{ 11,  66,   7,  62,  10,  65,   6,  60, },
-{ 48,  30,  43,  25,  47,  29,  42,  24, },
-{  0,  55,  14,  68,   3,  58,  17,  72, },
-};
-
-#if 1
-DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
-{117,  62, 158, 103, 113,  58, 155, 100, },
-{ 34, 199,  21, 186,  31, 196,  17, 182, },
-{144,  89, 131,  76, 141,  86, 127,  72, },
-{  0, 165,  41, 206,  10, 175,  52, 217, },
-{110,  55, 151,  96, 120,  65, 162, 107, },
-{ 28, 193,  14, 179,  38, 203,  24, 189, },
-{138,  83, 124,  69, 148,  93, 134,  79, },
-{  7, 172,  48, 213,   3, 168,  45, 210, },
-{117,  62, 158, 103, 113,  58, 155, 100, },
-};
-#elif 1
-// tries to correct a gamma of 1.5
-DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
-{  0, 143,  18, 200,   2, 156,  25, 215, },
-{ 78,  28, 125,  64,  89,  36, 138,  74, },
-{ 10, 180,   3, 161,  16, 195,   8, 175, },
-{109,  51,  93,  38, 121,  60, 105,  47, },
-{  1, 152,  23, 210,   0, 147,  20, 205, },
-{ 85,  33, 134,  71,  81,  30, 130,  67, },
-{ 14, 190,   6, 171,  12, 185,   5, 166, },
-{117,  57, 101,  44, 113,  54,  97,  41, },
-{  0, 143,  18, 200,   2, 156,  25, 215, },
-};
-#elif 1
-// tries to correct a gamma of 2.0
-DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
-{  0, 124,   8, 193,   0, 140,  12, 213, },
-{ 55,  14, 104,  42,  66,  19, 119,  52, },
-{  3, 168,   1, 145,   6, 187,   3, 162, },
-{ 86,  31,  70,  21,  99,  39,  82,  28, },
-{  0, 134,  11, 206,   0, 129,   9, 200, },
-{ 62,  17, 114,  48,  58,  16, 109,  45, },
-{  5, 181,   2, 157,   4, 175,   1, 151, },
-{ 95,  36,  78,  26,  90,  34,  74,  24, },
-{  0, 124,   8, 193,   0, 140,  12, 213, },
-};
-#else
-// tries to correct a gamma of 2.5
-DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
-{  0, 107,   3, 187,   0, 125,   6, 212, },
-{ 39,   7,  86,  28,  49,  11, 102,  36, },
-{  1, 158,   0, 131,   3, 180,   1, 151, },
-{ 68,  19,  52,  12,  81,  25,  64,  17, },
-{  0, 119,   5, 203,   0, 113,   4, 195, },
-{ 45,   9,  96,  33,  42,   8,  91,  30, },
-{  2, 172,   1, 144,   2, 165,   0, 137, },
-{ 77,  23,  60,  15,  72,  21,  56,  14, },
-{  0, 107,   3, 187,   0, 125,   6, 212, },
-};
-#endif
-
-#define output_pixel(pos, val, bias, signedness) \
-    if (big_endian) { \
-        AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
-    } else { \
-        AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
-    }
-
-static av_always_inline void
-yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
-                         int big_endian, int output_bits)
-{
-    int i;
-    int shift = 3;
-    av_assert0(output_bits == 16);
-
-    for (i = 0; i < dstW; i++) {
-        int val = src[i] + (1 << (shift - 1));
-        output_pixel(&dest[i], val, 0, uint);
-    }
-}
-
-static av_always_inline void
-yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
-                         const int32_t **src, uint16_t *dest, int dstW,
-                         int big_endian, int output_bits)
-{
-    int i;
-    int shift = 15;
-    av_assert0(output_bits == 16);
-
-    for (i = 0; i < dstW; i++) {
-        int val = 1 << (shift - 1);
-        int j;
-
-        /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
-         * filters (or anything with negative coeffs, the range can be slightly
-         * wider in both directions. To account for this overflow, we subtract
-         * a constant so it always fits in the signed range (assuming a
-         * reasonable filterSize), and re-add that at the end. */
-        val -= 0x40000000;
-        for (j = 0; j < filterSize; j++)
-            val += src[j][i] * (unsigned)filter[j];
-
-        output_pixel(&dest[i], val, 0x8000, int);
-    }
-}
-
-#undef output_pixel
-
-#define output_pixel(pos, val) \
-    if (big_endian) { \
-        AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
-    } else { \
-        AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
-    }
-
-static av_always_inline void
-yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
-                         int big_endian, int output_bits)
-{
-    int i;
-    int shift = 15 - output_bits;
-
-    for (i = 0; i < dstW; i++) {
-        int val = src[i] + (1 << (shift - 1));
-        output_pixel(&dest[i], val);
-    }
-}
-
-static av_always_inline void
-yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
-                         const int16_t **src, uint16_t *dest, int dstW,
-                         int big_endian, int output_bits)
-{
-    int i;
-    int shift = 11 + 16 - output_bits;
-
-    for (i = 0; i < dstW; i++) {
-        int val = 1 << (shift - 1);
-        int j;
-
-        for (j = 0; j < filterSize; j++)
-            val += src[j][i] * filter[j];
-
-        output_pixel(&dest[i], val);
-    }
-}
-
-#undef output_pixel
-
-#define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
-static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
-                              uint8_t *dest, int dstW, \
-                              const uint8_t *dither, int offset)\
-{ \
-    yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
-                         (uint16_t *) dest, dstW, is_be, bits); \
-}\
-static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
-                              const int16_t **src, uint8_t *dest, int dstW, \
-                              const uint8_t *dither, int offset)\
-{ \
-    yuv2planeX_## template_size ## _c_template(filter, \
-                         filterSize, (const typeX_t **) src, \
-                         (uint16_t *) dest, dstW, is_be, bits); \
-}
-yuv2NBPS( 9, BE, 1, 10, int16_t)
-yuv2NBPS( 9, LE, 0, 10, int16_t)
-yuv2NBPS(10, BE, 1, 10, int16_t)
-yuv2NBPS(10, LE, 0, 10, int16_t)
-yuv2NBPS(12, BE, 1, 10, int16_t)
-yuv2NBPS(12, LE, 0, 10, int16_t)
-yuv2NBPS(14, BE, 1, 10, int16_t)
-yuv2NBPS(14, LE, 0, 10, int16_t)
-yuv2NBPS(16, BE, 1, 16, int32_t)
-yuv2NBPS(16, LE, 0, 16, int32_t)
-
-static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
-                           const int16_t **src, uint8_t *dest, int dstW,
-                           const uint8_t *dither, int offset)
-{
-    int i;
-    for (i=0; i<dstW; i++) {
-        int val = dither[(i + offset) & 7] << 12;
-        int j;
-        for (j=0; j<filterSize; j++)
-            val += src[j][i] * filter[j];
-
-        dest[i]= av_clip_uint8(val>>19);
-    }
-}
-
-static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
-                           const uint8_t *dither, int offset)
-{
-    int i;
-    for (i=0; i<dstW; i++) {
-        int val = (src[i] + dither[(i + offset) & 7]) >> 7;
-        dest[i]= av_clip_uint8(val);
-    }
-}
-
-static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
-                        const int16_t **chrUSrc, const int16_t **chrVSrc,
-                        uint8_t *dest, int chrDstW)
-{
-    enum AVPixelFormat dstFormat = c->dstFormat;
-    const uint8_t *chrDither = c->chrDither8;
-    int i;
-
-    if (dstFormat == AV_PIX_FMT_NV12)
-        for (i=0; i<chrDstW; i++) {
-            int u = chrDither[i & 7] << 12;
-            int v = chrDither[(i + 3) & 7] << 12;
-            int j;
-            for (j=0; j<chrFilterSize; j++) {
-                u += chrUSrc[j][i] * chrFilter[j];
-                v += chrVSrc[j][i] * chrFilter[j];
-            }
-
-            dest[2*i]= av_clip_uint8(u>>19);
-            dest[2*i+1]= av_clip_uint8(v>>19);
-        }
-    else
-        for (i=0; i<chrDstW; i++) {
-            int u = chrDither[i & 7] << 12;
-            int v = chrDither[(i + 3) & 7] << 12;
-            int j;
-            for (j=0; j<chrFilterSize; j++) {
-                u += chrUSrc[j][i] * chrFilter[j];
-                v += chrVSrc[j][i] * chrFilter[j];
-            }
-
-            dest[2*i]= av_clip_uint8(v>>19);
-            dest[2*i+1]= av_clip_uint8(u>>19);
-        }
-}
-
-#define accumulate_bit(acc, val) \
-    acc <<= 1; \
-    acc |= (val) >= (128 + 110)
-#define output_pixel(pos, acc) \
-    if (target == AV_PIX_FMT_MONOBLACK) { \
-        pos = acc; \
-    } else { \
-        pos = ~acc; \
-    }
-
-static av_always_inline void
-yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
-                      const int16_t **lumSrc, int lumFilterSize,
-                      const int16_t *chrFilter, const int16_t **chrUSrc,
-                      const int16_t **chrVSrc, int chrFilterSize,
-                      const int16_t **alpSrc, uint8_t *dest, int dstW,
-                      int y, enum AVPixelFormat target)
-{
-    const uint8_t * const d128 = ff_dither_8x8_220[y&7];
-    int i;
-    unsigned acc = 0;
-    int err = 0;
-
-    for (i = 0; i < dstW; i += 2) {
-        int j;
-        int Y1 = 1 << 18;
-        int Y2 = 1 << 18;
-
-        for (j = 0; j < lumFilterSize; j++) {
-            Y1 += lumSrc[j][i]   * lumFilter[j];
-            Y2 += lumSrc[j][i+1] * lumFilter[j];
-        }
-        Y1 >>= 19;
-        Y2 >>= 19;
-        if ((Y1 | Y2) & 0x100) {
-            Y1 = av_clip_uint8(Y1);
-            Y2 = av_clip_uint8(Y2);
-        }
-        if (c->dither == SWS_DITHER_ED) {
-            Y1 += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4;
-            c->dither_error[0][i] = err;
-            acc = 2*acc + (Y1 >= 128);
-            Y1 -= 220*(acc&1);
-
-            err = Y2 + ((7*Y1 + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4);
-            c->dither_error[0][i+1] = Y1;
-            acc = 2*acc + (err >= 128);
-            err -= 220*(acc&1);
-        } else {
-            accumulate_bit(acc, Y1 + d128[(i + 0) & 7]);
-            accumulate_bit(acc, Y2 + d128[(i + 1) & 7]);
-        }
-        if ((i & 7) == 6) {
-            output_pixel(*dest++, acc);
-        }
-    }
-    c->dither_error[0][i] = err;
-
-    if (i & 6) {
-        output_pixel(*dest, acc);
-    }
-}
-
-static av_always_inline void
-yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
-                      const int16_t *ubuf[2], const int16_t *vbuf[2],
-                      const int16_t *abuf[2], uint8_t *dest, int dstW,
-                      int yalpha, int uvalpha, int y,
-                      enum AVPixelFormat target)
-{
-    const int16_t *buf0  = buf[0],  *buf1  = buf[1];
-    const uint8_t * const d128 = ff_dither_8x8_220[y & 7];
-    int  yalpha1 = 4096 - yalpha;
-    int i;
-
-    if (c->dither == SWS_DITHER_ED) {
-        int err = 0;
-        int acc = 0;
-        for (i = 0; i < dstW; i +=2) {
-            int Y;
-
-            Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19;
-            Y += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4;
-            c->dither_error[0][i] = err;
-            acc = 2*acc + (Y >= 128);
-            Y -= 220*(acc&1);
-
-            err = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19;
-            err += (7*Y + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4;
-            c->dither_error[0][i+1] = Y;
-            acc = 2*acc + (err >= 128);
-            err -= 220*(acc&1);
-
-            if ((i & 7) == 6)
-                output_pixel(*dest++, acc);
-        }
-        c->dither_error[0][i] = err;
-    } else {
-    for (i = 0; i < dstW; i += 8) {
-        int Y, acc = 0;
-
-        Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19;
-        accumulate_bit(acc, Y + d128[0]);
-        Y = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19;
-        accumulate_bit(acc, Y + d128[1]);
-        Y = (buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19;
-        accumulate_bit(acc, Y + d128[2]);
-        Y = (buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19;
-        accumulate_bit(acc, Y + d128[3]);
-        Y = (buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19;
-        accumulate_bit(acc, Y + d128[4]);
-        Y = (buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19;
-        accumulate_bit(acc, Y + d128[5]);
-        Y = (buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19;
-        accumulate_bit(acc, Y + d128[6]);
-        Y = (buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19;
-        accumulate_bit(acc, Y + d128[7]);
-
-        output_pixel(*dest++, acc);
-    }
-    }
-}
-
-static av_always_inline void
-yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
-                      const int16_t *ubuf[2], const int16_t *vbuf[2],
-                      const int16_t *abuf0, uint8_t *dest, int dstW,
-                      int uvalpha, int y, enum AVPixelFormat target)
-{
-    const uint8_t * const d128 = ff_dither_8x8_220[y & 7];
-    int i;
-
-    if (c->dither == SWS_DITHER_ED) {
-        int err = 0;
-        int acc = 0;
-        for (i = 0; i < dstW; i +=2) {
-            int Y;
-
-            Y = ((buf0[i + 0] + 64) >> 7);
-            Y += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4;
-            c->dither_error[0][i] = err;
-            acc = 2*acc + (Y >= 128);
-            Y -= 220*(acc&1);
-
-            err = ((buf0[i + 1] + 64) >> 7);
-            err += (7*Y + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4;
-            c->dither_error[0][i+1] = Y;
-            acc = 2*acc + (err >= 128);
-            err -= 220*(acc&1);
-
-            if ((i & 7) == 6)
-                output_pixel(*dest++, acc);
-        }
-        c->dither_error[0][i] = err;
-    } else {
-    for (i = 0; i < dstW; i += 8) {
-        int acc = 0;
-        accumulate_bit(acc, ((buf0[i + 0] + 64) >> 7) + d128[0]);
-        accumulate_bit(acc, ((buf0[i + 1] + 64) >> 7) + d128[1]);
-        accumulate_bit(acc, ((buf0[i + 2] + 64) >> 7) + d128[2]);
-        accumulate_bit(acc, ((buf0[i + 3] + 64) >> 7) + d128[3]);
-        accumulate_bit(acc, ((buf0[i + 4] + 64) >> 7) + d128[4]);
-        accumulate_bit(acc, ((buf0[i + 5] + 64) >> 7) + d128[5]);
-        accumulate_bit(acc, ((buf0[i + 6] + 64) >> 7) + d128[6]);
-        accumulate_bit(acc, ((buf0[i + 7] + 64) >> 7) + d128[7]);
-
-        output_pixel(*dest++, acc);
-    }
-    }
-}
-
-#undef output_pixel
-#undef accumulate_bit
-
-#define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
-static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
-                                const int16_t **lumSrc, int lumFilterSize, \
-                                const int16_t *chrFilter, const int16_t **chrUSrc, \
-                                const int16_t **chrVSrc, int chrFilterSize, \
-                                const int16_t **alpSrc, uint8_t *dest, int dstW, \
-                                int y) \
-{ \
-    name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
-                                  chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
-                                  alpSrc, dest, dstW, y, fmt); \
-} \
- \
-static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
-                                const int16_t *ubuf[2], const int16_t *vbuf[2], \
-                                const int16_t *abuf[2], uint8_t *dest, int dstW, \
-                                int yalpha, int uvalpha, int y) \
-{ \
-    name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
-                                  dest, dstW, yalpha, uvalpha, y, fmt); \
-} \
- \
-static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
-                                const int16_t *ubuf[2], const int16_t *vbuf[2], \
-                                const int16_t *abuf0, uint8_t *dest, int dstW, \
-                                int uvalpha, int y) \
-{ \
-    name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
-                                  abuf0, dest, dstW, uvalpha, \
-                                  y, fmt); \
-}
-
-YUV2PACKEDWRAPPER(yuv2mono,, white, AV_PIX_FMT_MONOWHITE)
-YUV2PACKEDWRAPPER(yuv2mono,, black, AV_PIX_FMT_MONOBLACK)
-
-#define output_pixels(pos, Y1, U, Y2, V) \
-    if (target == AV_PIX_FMT_YUYV422) { \
-        dest[pos + 0] = Y1; \
-        dest[pos + 1] = U;  \
-        dest[pos + 2] = Y2; \
-        dest[pos + 3] = V;  \
-    } else { \
-        dest[pos + 0] = U;  \
-        dest[pos + 1] = Y1; \
-        dest[pos + 2] = V;  \
-        dest[pos + 3] = Y2; \
-    }
-
-static av_always_inline void
-yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
-                     const int16_t **lumSrc, int lumFilterSize,
-                     const int16_t *chrFilter, const int16_t **chrUSrc,
-                     const int16_t **chrVSrc, int chrFilterSize,
-                     const int16_t **alpSrc, uint8_t *dest, int dstW,
-                     int y, enum AVPixelFormat target)
-{
-    int i;
-
-    for (i = 0; i < ((dstW + 1) >> 1); i++) {
-        int j;
-        int Y1 = 1 << 18;
-        int Y2 = 1 << 18;
-        int U  = 1 << 18;
-        int V  = 1 << 18;
-
-        for (j = 0; j < lumFilterSize; j++) {
-            Y1 += lumSrc[j][i * 2]     * lumFilter[j];
-            Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
-        }
-        for (j = 0; j < chrFilterSize; j++) {
-            U += chrUSrc[j][i] * chrFilter[j];
-            V += chrVSrc[j][i] * chrFilter[j];
-        }
-        Y1 >>= 19;
-        Y2 >>= 19;
-        U  >>= 19;
-        V  >>= 19;
-        if ((Y1 | Y2 | U | V) & 0x100) {
-            Y1 = av_clip_uint8(Y1);
-            Y2 = av_clip_uint8(Y2);
-            U  = av_clip_uint8(U);
-            V  = av_clip_uint8(V);
-        }
-        output_pixels(4*i, Y1, U, Y2, V);
-    }
-}
-
-static av_always_inline void
-yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
-                     const int16_t *ubuf[2], const int16_t *vbuf[2],
-                     const int16_t *abuf[2], uint8_t *dest, int dstW,
-                     int yalpha, int uvalpha, int y,
-                     enum AVPixelFormat target)
-{
-    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
-                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
-                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
-    int  yalpha1 = 4096 - yalpha;
-    int uvalpha1 = 4096 - uvalpha;
-    int i;
-
-    for (i = 0; i < ((dstW + 1) >> 1); i++) {
-        int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
-        int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
-        int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
-        int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
-
-        if ((Y1 | Y2 | U | V) & 0x100) {
-            Y1 = av_clip_uint8(Y1);
-            Y2 = av_clip_uint8(Y2);
-            U  = av_clip_uint8(U);
-            V  = av_clip_uint8(V);
-        }
-
-        output_pixels(i * 4, Y1, U, Y2, V);
-    }
-}
-
-static av_always_inline void
-yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
-                     const int16_t *ubuf[2], const int16_t *vbuf[2],
-                     const int16_t *abuf0, uint8_t *dest, int dstW,
-                     int uvalpha, int y, enum AVPixelFormat target)
-{
-    const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
-    int i;
-
-    if (uvalpha < 2048) {
-        for (i = 0; i < ((dstW + 1) >> 1); i++) {
-            int Y1 = (buf0[i * 2    ]+64) >> 7;
-            int Y2 = (buf0[i * 2 + 1]+64) >> 7;
-            int U  = (ubuf0[i]       +64) >> 7;
-            int V  = (vbuf0[i]       +64) >> 7;
-
-            if ((Y1 | Y2 | U | V) & 0x100) {
-                Y1 = av_clip_uint8(Y1);
-                Y2 = av_clip_uint8(Y2);
-                U  = av_clip_uint8(U);
-                V  = av_clip_uint8(V);
-            }
-
-            Y1 = av_clip_uint8(Y1);
-            Y2 = av_clip_uint8(Y2);
-            U  = av_clip_uint8(U);
-            V  = av_clip_uint8(V);
-
-            output_pixels(i * 4, Y1, U, Y2, V);
-        }
-    } else {
-        const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
-        for (i = 0; i < ((dstW + 1) >> 1); i++) {
-            int Y1 = (buf0[i * 2    ]    + 64) >> 7;
-            int Y2 = (buf0[i * 2 + 1]    + 64) >> 7;
-            int U  = (ubuf0[i] + ubuf1[i]+128) >> 8;
-            int V  = (vbuf0[i] + vbuf1[i]+128) >> 8;
-
-            if ((Y1 | Y2 | U | V) & 0x100) {
-                Y1 = av_clip_uint8(Y1);
-                Y2 = av_clip_uint8(Y2);
-                U  = av_clip_uint8(U);
-                V  = av_clip_uint8(V);
-            }
-
-            Y1 = av_clip_uint8(Y1);
-            Y2 = av_clip_uint8(Y2);
-            U  = av_clip_uint8(U);
-            V  = av_clip_uint8(V);
-
-            output_pixels(i * 4, Y1, U, Y2, V);
-        }
-    }
-}
-
-#undef output_pixels
-
-YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, AV_PIX_FMT_YUYV422)
-YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, AV_PIX_FMT_UYVY422)
-
-#define R_B ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE) ? R : B)
-#define B_R ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE) ? B : R)
-#define output_pixel(pos, val) \
-    if (isBE(target)) { \
-        AV_WB16(pos, val); \
-    } else { \
-        AV_WL16(pos, val); \
-    }
-
-static av_always_inline void
-yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter,
-                       const int32_t **lumSrc, int lumFilterSize,
-                       const int16_t *chrFilter, const int32_t **chrUSrc,
-                       const int32_t **chrVSrc, int chrFilterSize,
-                       const int32_t **alpSrc, uint16_t *dest, int dstW,
-                       int y, enum AVPixelFormat target, int hasAlpha)
-{
-    int i;
-    int A1 = 0xffff<<14, A2 = 0xffff<<14;
-
-    for (i = 0; i < ((dstW + 1) >> 1); i++) {
-        int j;
-        int Y1 = -0x40000000;
-        int Y2 = -0x40000000;
-        int U  = -128 << 23; // 19
-        int V  = -128 << 23;
-        int R, G, B;
-
-        for (j = 0; j < lumFilterSize; j++) {
-            Y1 += lumSrc[j][i * 2]     * (unsigned)lumFilter[j];
-            Y2 += lumSrc[j][i * 2 + 1] * (unsigned)lumFilter[j];
-        }
-        for (j = 0; j < chrFilterSize; j++) {;
-            U += chrUSrc[j][i] * (unsigned)chrFilter[j];
-            V += chrVSrc[j][i] * (unsigned)chrFilter[j];
-        }
-
-        if (hasAlpha) {
-            A1 = -0x40000000;
-            A2 = -0x40000000;
-            for (j = 0; j < lumFilterSize; j++) {
-                A1 += alpSrc[j][i * 2]     * (unsigned)lumFilter[j];
-                A2 += alpSrc[j][i * 2 + 1] * (unsigned)lumFilter[j];
-            }
-            A1 >>= 1;
-            A1 += 0x20002000;
-            A2 >>= 1;
-            A2 += 0x20002000;
-        }
-
-        // 8bit: 12+15=27; 16-bit: 12+19=31
-        Y1 >>= 14; // 10
-        Y1 += 0x10000;
-        Y2 >>= 14;
-        Y2 += 0x10000;
-        U  >>= 14;
-        V  >>= 14;
-
-        // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
-        Y1 -= c->yuv2rgb_y_offset;
-        Y2 -= c->yuv2rgb_y_offset;
-        Y1 *= c->yuv2rgb_y_coeff;
-        Y2 *= c->yuv2rgb_y_coeff;
-        Y1 += 1 << 13; // 21
-        Y2 += 1 << 13;
-        // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
-
-        R = V * c->yuv2rgb_v2r_coeff;
-        G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
-        B =                            U * c->yuv2rgb_u2b_coeff;
-
-        // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
-        output_pixel(&dest[0], av_clip_uintp2(B_R + Y1, 30) >> 14);
-        output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
-        output_pixel(&dest[2], av_clip_uintp2(R_B + Y1, 30) >> 14);
-        output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
-        output_pixel(&dest[4], av_clip_uintp2(B_R + Y2, 30) >> 14);
-        output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
-        output_pixel(&dest[6], av_clip_uintp2(R_B + Y2, 30) >> 14);
-        output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
-        dest += 8;
-    }
-}
-
-static av_always_inline void
-yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2],
-                       const int32_t *ubuf[2], const int32_t *vbuf[2],
-                       const int32_t *abuf[2], uint16_t *dest, int dstW,
-                       int yalpha, int uvalpha, int y,
-                       enum AVPixelFormat target, int hasAlpha)
-{
-    const int32_t *buf0  = buf[0],  *buf1  = buf[1],
-                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
-                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
-                  *abuf0 = hasAlpha ? abuf[0] : NULL,
-                  *abuf1 = hasAlpha ? abuf[1] : NULL;
-    int  yalpha1 = 4096 - yalpha;
-    int uvalpha1 = 4096 - uvalpha;
-    int i;
-    int A1 = 0xffff<<14, A2 = 0xffff<<14;
-
-    for (i = 0; i < ((dstW + 1) >> 1); i++) {
-        int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha) >> 14;
-        int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha) >> 14;
-        int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha + (-128 << 23)) >> 14;
-        int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha + (-128 << 23)) >> 14;
-        int R, G, B;
-
-        Y1 -= c->yuv2rgb_y_offset;
-        Y2 -= c->yuv2rgb_y_offset;
-        Y1 *= c->yuv2rgb_y_coeff;
-        Y2 *= c->yuv2rgb_y_coeff;
-        Y1 += 1 << 13;
-        Y2 += 1 << 13;
-
-        R = V * c->yuv2rgb_v2r_coeff;
-        G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
-        B =                            U * c->yuv2rgb_u2b_coeff;
-
-        if (hasAlpha) {
-            A1 = (abuf0[i * 2    ] * yalpha1 + abuf1[i * 2    ] * yalpha) >> 1;
-            A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 1;
-
-            A1 += 1 << 13;
-            A2 += 1 << 13;
-        }
-
-        output_pixel(&dest[0], av_clip_uintp2(B_R + Y1, 30) >> 14);
-        output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
-        output_pixel(&dest[2], av_clip_uintp2(R_B + Y1, 30) >> 14);
-        output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
-        output_pixel(&dest[4], av_clip_uintp2(B_R + Y2, 30) >> 14);
-        output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
-        output_pixel(&dest[6], av_clip_uintp2(R_B + Y2, 30) >> 14);
-        output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
-        dest += 8;
-    }
-}
-
-static av_always_inline void
-yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
-                       const int32_t *ubuf[2], const int32_t *vbuf[2],
-                       const int32_t *abuf0, uint16_t *dest, int dstW,
-                       int uvalpha, int y, enum AVPixelFormat target, int hasAlpha)
-{
-    const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
-    int i;
-    int A1 = 0xffff<<14, A2= 0xffff<<14;
-
-    if (uvalpha < 2048) {
-        for (i = 0; i < ((dstW + 1) >> 1); i++) {
-            int Y1 = (buf0[i * 2]    ) >> 2;
-            int Y2 = (buf0[i * 2 + 1]) >> 2;
-            int U  = (ubuf0[i] + (-128 << 11)) >> 2;
-            int V  = (vbuf0[i] + (-128 << 11)) >> 2;
-            int R, G, B;
-
-            Y1 -= c->yuv2rgb_y_offset;
-            Y2 -= c->yuv2rgb_y_offset;
-            Y1 *= c->yuv2rgb_y_coeff;
-            Y2 *= c->yuv2rgb_y_coeff;
-            Y1 += 1 << 13;
-            Y2 += 1 << 13;
-
-            if (hasAlpha) {
-                A1 = abuf0[i * 2    ] << 11;
-                A2 = abuf0[i * 2 + 1] << 11;
-
-                A1 += 1 << 13;
-                A2 += 1 << 13;
-            }
-
-            R = V * c->yuv2rgb_v2r_coeff;
-            G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
-            B =                            U * c->yuv2rgb_u2b_coeff;
-
-            output_pixel(&dest[0], av_clip_uintp2(B_R + Y1, 30) >> 14);
-            output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
-            output_pixel(&dest[2], av_clip_uintp2(R_B + Y1, 30) >> 14);
-            output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
-            output_pixel(&dest[4], av_clip_uintp2(B_R + Y2, 30) >> 14);
-            output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
-            output_pixel(&dest[6], av_clip_uintp2(R_B + Y2, 30) >> 14);
-            output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
-            dest += 8;
-        }
-    } else {
-        const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
-        int A1 = 0xffff<<14, A2 = 0xffff<<14;
-        for (i = 0; i < ((dstW + 1) >> 1); i++) {
-            int Y1 = (buf0[i * 2]    ) >> 2;
-            int Y2 = (buf0[i * 2 + 1]) >> 2;
-            int U  = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
-            int V  = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
-            int R, G, B;
-
-            Y1 -= c->yuv2rgb_y_offset;
-            Y2 -= c->yuv2rgb_y_offset;
-            Y1 *= c->yuv2rgb_y_coeff;
-            Y2 *= c->yuv2rgb_y_coeff;
-            Y1 += 1 << 13;
-            Y2 += 1 << 13;
-
-            if (hasAlpha) {
-                A1 = abuf0[i * 2    ] << 11;
-                A2 = abuf0[i * 2 + 1] << 11;
-
-                A1 += 1 << 13;
-                A2 += 1 << 13;
-            }
-
-            R = V * c->yuv2rgb_v2r_coeff;
-            G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
-            B =                            U * c->yuv2rgb_u2b_coeff;
-
-            output_pixel(&dest[0], av_clip_uintp2(B_R + Y1, 30) >> 14);
-            output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
-            output_pixel(&dest[2], av_clip_uintp2(R_B + Y1, 30) >> 14);
-            output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
-            output_pixel(&dest[4], av_clip_uintp2(B_R + Y2, 30) >> 14);
-            output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
-            output_pixel(&dest[6], av_clip_uintp2(R_B + Y2, 30) >> 14);
-            output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
-            dest += 8;
-        }
-    }
-}
-
-static av_always_inline void
-yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
-                       const int32_t **lumSrc, int lumFilterSize,
-                       const int16_t *chrFilter, const int32_t **chrUSrc,
-                       const int32_t **chrVSrc, int chrFilterSize,
-                       const int32_t **alpSrc, uint16_t *dest, int dstW,
-                       int y, enum AVPixelFormat target, int hasAlpha)
-{
-    int i;
-
-    for (i = 0; i < ((dstW + 1) >> 1); i++) {
-        int j;
-        int Y1 = -0x40000000;
-        int Y2 = -0x40000000;
-        int U  = -128 << 23; // 19
-        int V  = -128 << 23;
-        int R, G, B;
-
-        for (j = 0; j < lumFilterSize; j++) {
-            Y1 += lumSrc[j][i * 2]     * (unsigned)lumFilter[j];
-            Y2 += lumSrc[j][i * 2 + 1] * (unsigned)lumFilter[j];
-        }
-        for (j = 0; j < chrFilterSize; j++) {;
-            U += chrUSrc[j][i] * (unsigned)chrFilter[j];
-            V += chrVSrc[j][i] * (unsigned)chrFilter[j];
-        }
-
-        // 8bit: 12+15=27; 16-bit: 12+19=31
-        Y1 >>= 14; // 10
-        Y1 += 0x10000;
-        Y2 >>= 14;
-        Y2 += 0x10000;
-        U  >>= 14;
-        V  >>= 14;
-
-        // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
-        Y1 -= c->yuv2rgb_y_offset;
-        Y2 -= c->yuv2rgb_y_offset;
-        Y1 *= c->yuv2rgb_y_coeff;
-        Y2 *= c->yuv2rgb_y_coeff;
-        Y1 += 1 << 13; // 21
-        Y2 += 1 << 13;
-        // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
-
-        R = V * c->yuv2rgb_v2r_coeff;
-        G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
-        B =                            U * c->yuv2rgb_u2b_coeff;
-
-        // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
-        output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
-        output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
-        output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
-        output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
-        output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
-        output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
-        dest += 6;
-    }
-}
-
-static av_always_inline void
-yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
-                       const int32_t *ubuf[2], const int32_t *vbuf[2],
-                       const int32_t *abuf[2], uint16_t *dest, int dstW,
-                       int yalpha, int uvalpha, int y,
-                       enum AVPixelFormat target, int hasAlpha)
-{
-    const int32_t *buf0  = buf[0],  *buf1  = buf[1],
-                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
-                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
-    int  yalpha1 = 4096 - yalpha;
-    int uvalpha1 = 4096 - uvalpha;
-    int i;
-
-    for (i = 0; i < ((dstW + 1) >> 1); i++) {
-        int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha) >> 14;
-        int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha) >> 14;
-        int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha + (-128 << 23)) >> 14;
-        int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha + (-128 << 23)) >> 14;
-        int R, G, B;
-
-        Y1 -= c->yuv2rgb_y_offset;
-        Y2 -= c->yuv2rgb_y_offset;
-        Y1 *= c->yuv2rgb_y_coeff;
-        Y2 *= c->yuv2rgb_y_coeff;
-        Y1 += 1 << 13;
-        Y2 += 1 << 13;
-
-        R = V * c->yuv2rgb_v2r_coeff;
-        G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
-        B =                            U * c->yuv2rgb_u2b_coeff;
-
-        output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
-        output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
-        output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
-        output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
-        output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
-        output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
-        dest += 6;
-    }
-}
-
-static av_always_inline void
-yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
-                       const int32_t *ubuf[2], const int32_t *vbuf[2],
-                       const int32_t *abuf0, uint16_t *dest, int dstW,
-                       int uvalpha, int y, enum AVPixelFormat target, int hasAlpha)
-{
-    const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
-    int i;
-
-    if (uvalpha < 2048) {
-        for (i = 0; i < ((dstW + 1) >> 1); i++) {
-            int Y1 = (buf0[i * 2]    ) >> 2;
-            int Y2 = (buf0[i * 2 + 1]) >> 2;
-            int U  = (ubuf0[i] + (-128 << 11)) >> 2;
-            int V  = (vbuf0[i] + (-128 << 11)) >> 2;
-            int R, G, B;
-
-            Y1 -= c->yuv2rgb_y_offset;
-            Y2 -= c->yuv2rgb_y_offset;
-            Y1 *= c->yuv2rgb_y_coeff;
-            Y2 *= c->yuv2rgb_y_coeff;
-            Y1 += 1 << 13;
-            Y2 += 1 << 13;
-
-            R = V * c->yuv2rgb_v2r_coeff;
-            G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
-            B =                            U * c->yuv2rgb_u2b_coeff;
-
-            output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
-            output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
-            output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
-            output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
-            output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
-            output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
-            dest += 6;
-        }
-    } else {
-        const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
-        for (i = 0; i < ((dstW + 1) >> 1); i++) {
-            int Y1 = (buf0[i * 2]    ) >> 2;
-            int Y2 = (buf0[i * 2 + 1]) >> 2;
-            int U  = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
-            int V  = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
-            int R, G, B;
-
-            Y1 -= c->yuv2rgb_y_offset;
-            Y2 -= c->yuv2rgb_y_offset;
-            Y1 *= c->yuv2rgb_y_coeff;
-            Y2 *= c->yuv2rgb_y_coeff;
-            Y1 += 1 << 13;
-            Y2 += 1 << 13;
-
-            R = V * c->yuv2rgb_v2r_coeff;
-            G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
-            B =                            U * c->yuv2rgb_u2b_coeff;
-
-            output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
-            output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
-            output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
-            output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
-            output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
-            output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
-            dest += 6;
-        }
-    }
-}
-
-#undef output_pixel
-#undef r_b
-#undef b_r
-
-#define YUV2PACKED16WRAPPER(name, base, ext, fmt, hasAlpha) \
-static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
-                        const int16_t **_lumSrc, int lumFilterSize, \
-                        const int16_t *chrFilter, const int16_t **_chrUSrc, \
-                        const int16_t **_chrVSrc, int chrFilterSize, \
-                        const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
-                        int y) \
-{ \
-    const int32_t **lumSrc  = (const int32_t **) _lumSrc, \
-                  **chrUSrc = (const int32_t **) _chrUSrc, \
-                  **chrVSrc = (const int32_t **) _chrVSrc, \
-                  **alpSrc  = (const int32_t **) _alpSrc; \
-    uint16_t *dest = (uint16_t *) _dest; \
-    name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
-                          chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
-                          alpSrc, dest, dstW, y, fmt, hasAlpha); \
-} \
- \
-static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
-                        const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
-                        const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
-                        int yalpha, int uvalpha, int y) \
-{ \
-    const int32_t **buf  = (const int32_t **) _buf, \
-                  **ubuf = (const int32_t **) _ubuf, \
-                  **vbuf = (const int32_t **) _vbuf, \
-                  **abuf = (const int32_t **) _abuf; \
-    uint16_t *dest = (uint16_t *) _dest; \
-    name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
-                          dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
-} \
- \
-static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
-                        const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
-                        const int16_t *_abuf0, uint8_t *_dest, int dstW, \
-                        int uvalpha, int y) \
-{ \
-    const int32_t *buf0  = (const int32_t *)  _buf0, \
-                 **ubuf  = (const int32_t **) _ubuf, \
-                 **vbuf  = (const int32_t **) _vbuf, \
-                  *abuf0 = (const int32_t *)  _abuf0; \
-    uint16_t *dest = (uint16_t *) _dest; \
-    name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
-                                  dstW, uvalpha, y, fmt, hasAlpha); \
-}
-
-YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, AV_PIX_FMT_RGB48BE, 0)
-YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, AV_PIX_FMT_RGB48LE, 0)
-YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, AV_PIX_FMT_BGR48BE, 0)
-YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, AV_PIX_FMT_BGR48LE, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64BE, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64LE, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64BE, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64LE, 0)
-
-/*
- * Write out 2 RGB pixels in the target pixel format. This function takes a
- * R/G/B LUT as generated by ff_yuv2rgb_c_init_tables(), which takes care of
- * things like endianness conversion and shifting. The caller takes care of
- * setting the correct offset in these tables from the chroma (U/V) values.
- * This function then uses the luminance (Y1/Y2) values to write out the
- * correct RGB values into the destination buffer.
- */
-static av_always_inline void
-yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
-              unsigned A1, unsigned A2,
-              const void *_r, const void *_g, const void *_b, int y,
-              enum AVPixelFormat target, int hasAlpha)
-{
-    if (target == AV_PIX_FMT_ARGB || target == AV_PIX_FMT_RGBA ||
-        target == AV_PIX_FMT_ABGR || target == AV_PIX_FMT_BGRA) {
-        uint32_t *dest = (uint32_t *) _dest;
-        const uint32_t *r = (const uint32_t *) _r;
-        const uint32_t *g = (const uint32_t *) _g;
-        const uint32_t *b = (const uint32_t *) _b;
-
-#if CONFIG_SMALL
-        int sh = hasAlpha ? ((target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24) : 0;
-
-        dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
-        dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
-#else
-        if (hasAlpha) {
-            int sh = (target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24;
-
-            av_assert2((((r[Y1] + g[Y1] + b[Y1]) >> sh) & 0xFF) == 0);
-            dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
-            dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
-        } else {
-#if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1
-            int sh = (target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24;
-
-            av_assert2((((r[Y1] + g[Y1] + b[Y1]) >> sh) & 0xFF) == 0xFF);
-#endif
-            dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
-            dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
-        }
-#endif
-    } else if (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) {
-        uint8_t *dest = (uint8_t *) _dest;
-        const uint8_t *r = (const uint8_t *) _r;
-        const uint8_t *g = (const uint8_t *) _g;
-        const uint8_t *b = (const uint8_t *) _b;
-
-#define r_b ((target == AV_PIX_FMT_RGB24) ? r : b)
-#define b_r ((target == AV_PIX_FMT_RGB24) ? b : r)
-
-        dest[i * 6 + 0] = r_b[Y1];
-        dest[i * 6 + 1] =   g[Y1];
-        dest[i * 6 + 2] = b_r[Y1];
-        dest[i * 6 + 3] = r_b[Y2];
-        dest[i * 6 + 4] =   g[Y2];
-        dest[i * 6 + 5] = b_r[Y2];
-#undef r_b
-#undef b_r
-    } else if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565 ||
-               target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555 ||
-               target == AV_PIX_FMT_RGB444 || target == AV_PIX_FMT_BGR444) {
-        uint16_t *dest = (uint16_t *) _dest;
-        const uint16_t *r = (const uint16_t *) _r;
-        const uint16_t *g = (const uint16_t *) _g;
-        const uint16_t *b = (const uint16_t *) _b;
-        int dr1, dg1, db1, dr2, dg2, db2;
-
-        if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565) {
-            dr1 = ff_dither_2x2_8[ y & 1     ][0];
-            dg1 = ff_dither_2x2_4[ y & 1     ][0];
-            db1 = ff_dither_2x2_8[(y & 1) ^ 1][0];
-            dr2 = ff_dither_2x2_8[ y & 1     ][1];
-            dg2 = ff_dither_2x2_4[ y & 1     ][1];
-            db2 = ff_dither_2x2_8[(y & 1) ^ 1][1];
-        } else if (target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555) {
-            dr1 = ff_dither_2x2_8[ y & 1     ][0];
-            dg1 = ff_dither_2x2_8[ y & 1     ][1];
-            db1 = ff_dither_2x2_8[(y & 1) ^ 1][0];
-            dr2 = ff_dither_2x2_8[ y & 1     ][1];
-            dg2 = ff_dither_2x2_8[ y & 1     ][0];
-            db2 = ff_dither_2x2_8[(y & 1) ^ 1][1];
-        } else {
-            dr1 = ff_dither_4x4_16[ y & 3     ][0];
-            dg1 = ff_dither_4x4_16[ y & 3     ][1];
-            db1 = ff_dither_4x4_16[(y & 3) ^ 3][0];
-            dr2 = ff_dither_4x4_16[ y & 3     ][1];
-            dg2 = ff_dither_4x4_16[ y & 3     ][0];
-            db2 = ff_dither_4x4_16[(y & 3) ^ 3][1];
-        }
-
-        dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
-        dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
-    } else /* 8/4-bit */ {
-        uint8_t *dest = (uint8_t *) _dest;
-        const uint8_t *r = (const uint8_t *) _r;
-        const uint8_t *g = (const uint8_t *) _g;
-        const uint8_t *b = (const uint8_t *) _b;
-        int dr1, dg1, db1, dr2, dg2, db2;
-
-        if (target == AV_PIX_FMT_RGB8 || target == AV_PIX_FMT_BGR8) {
-            const uint8_t * const d64 = ff_dither_8x8_73[y & 7];
-            const uint8_t * const d32 = ff_dither_8x8_32[y & 7];
-            dr1 = dg1 = d32[(i * 2 + 0) & 7];
-            db1 =       d64[(i * 2 + 0) & 7];
-            dr2 = dg2 = d32[(i * 2 + 1) & 7];
-            db2 =       d64[(i * 2 + 1) & 7];
-        } else {
-            const uint8_t * const d64  = ff_dither_8x8_73 [y & 7];
-            const uint8_t * const d128 = ff_dither_8x8_220[y & 7];
-            dr1 = db1 = d128[(i * 2 + 0) & 7];
-            dg1 =        d64[(i * 2 + 0) & 7];
-            dr2 = db2 = d128[(i * 2 + 1) & 7];
-            dg2 =        d64[(i * 2 + 1) & 7];
-        }
-
-        if (target == AV_PIX_FMT_RGB4 || target == AV_PIX_FMT_BGR4) {
-            dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
-                    ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
-        } else {
-            dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
-            dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
-        }
-    }
-}
-
-static av_always_inline void
-yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
-                     const int16_t **lumSrc, int lumFilterSize,
-                     const int16_t *chrFilter, const int16_t **chrUSrc,
-                     const int16_t **chrVSrc, int chrFilterSize,
-                     const int16_t **alpSrc, uint8_t *dest, int dstW,
-                     int y, enum AVPixelFormat target, int hasAlpha)
-{
-    int i;
-
-    for (i = 0; i < ((dstW + 1) >> 1); i++) {
-        int j, A1, A2;
-        int Y1 = 1 << 18;
-        int Y2 = 1 << 18;
-        int U  = 1 << 18;
-        int V  = 1 << 18;
-        const void *r, *g, *b;
-
-        for (j = 0; j < lumFilterSize; j++) {
-            Y1 += lumSrc[j][i * 2]     * lumFilter[j];
-            Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
-        }
-        for (j = 0; j < chrFilterSize; j++) {
-            U += chrUSrc[j][i] * chrFilter[j];
-            V += chrVSrc[j][i] * chrFilter[j];
-        }
-        Y1 >>= 19;
-        Y2 >>= 19;
-        U  >>= 19;
-        V  >>= 19;
-        if (hasAlpha) {
-            A1 = 1 << 18;
-            A2 = 1 << 18;
-            for (j = 0; j < lumFilterSize; j++) {
-                A1 += alpSrc[j][i * 2    ] * lumFilter[j];
-                A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
-            }
-            A1 >>= 19;
-            A2 >>= 19;
-            if ((A1 | A2) & 0x100) {
-                A1 = av_clip_uint8(A1);
-                A2 = av_clip_uint8(A2);
-            }
-        }
-
-        r =  c->table_rV[V + YUVRGB_TABLE_HEADROOM];
-        g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]);
-        b =  c->table_bU[U + YUVRGB_TABLE_HEADROOM];
-
-        yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
-                      r, g, b, y, target, hasAlpha);
-    }
-}
-
-static av_always_inline void
-yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
-                     const int16_t *ubuf[2], const int16_t *vbuf[2],
-                     const int16_t *abuf[2], uint8_t *dest, int dstW,
-                     int yalpha, int uvalpha, int y,
-                     enum AVPixelFormat target, int hasAlpha)
-{
-    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
-                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
-                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
-                  *abuf0 = hasAlpha ? abuf[0] : NULL,
-                  *abuf1 = hasAlpha ? abuf[1] : NULL;
-    int  yalpha1 = 4096 - yalpha;
-    int uvalpha1 = 4096 - uvalpha;
-    int i;
-
-    for (i = 0; i < ((dstW + 1) >> 1); i++) {
-        int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
-        int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
-        int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
-        int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
-        int A1, A2;
-        const void *r =  c->table_rV[V + YUVRGB_TABLE_HEADROOM],
-                   *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
-                   *b =  c->table_bU[U + YUVRGB_TABLE_HEADROOM];
-
-        if (hasAlpha) {
-            A1 = (abuf0[i * 2    ] * yalpha1 + abuf1[i * 2    ] * yalpha) >> 19;
-            A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
-            A1 = av_clip_uint8(A1);
-            A2 = av_clip_uint8(A2);
-        }
-
-        yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
-                      r, g, b, y, target, hasAlpha);
-    }
-}
-
-static av_always_inline void
-yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
-                     const int16_t *ubuf[2], const int16_t *vbuf[2],
-                     const int16_t *abuf0, uint8_t *dest, int dstW,
-                     int uvalpha, int y, enum AVPixelFormat target,
-                     int hasAlpha)
-{
-    const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
-    int i;
-
-    if (uvalpha < 2048) {
-        for (i = 0; i < ((dstW + 1) >> 1); i++) {
-            int Y1 = (buf0[i * 2    ] + 64) >> 7;
-            int Y2 = (buf0[i * 2 + 1] + 64) >> 7;
-            int U  = (ubuf0[i]        + 64) >> 7;
-            int V  = (vbuf0[i]        + 64) >> 7;
-            int A1, A2;
-            const void *r =  c->table_rV[V + YUVRGB_TABLE_HEADROOM],
-                       *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
-                       *b =  c->table_bU[U + YUVRGB_TABLE_HEADROOM];
-
-            if (hasAlpha) {
-                A1 = abuf0[i * 2    ] * 255 + 16384 >> 15;
-                A2 = abuf0[i * 2 + 1] * 255 + 16384 >> 15;
-                A1 = av_clip_uint8(A1);
-                A2 = av_clip_uint8(A2);
-            }
-
-            yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
-                          r, g, b, y, target, hasAlpha);
-        }
-    } else {
-        const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
-        for (i = 0; i < ((dstW + 1) >> 1); i++) {
-            int Y1 = (buf0[i * 2    ]     +  64) >> 7;
-            int Y2 = (buf0[i * 2 + 1]     +  64) >> 7;
-            int U  = (ubuf0[i] + ubuf1[i] + 128) >> 8;
-            int V  = (vbuf0[i] + vbuf1[i] + 128) >> 8;
-            int A1, A2;
-            const void *r =  c->table_rV[V + YUVRGB_TABLE_HEADROOM],
-                       *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
-                       *b =  c->table_bU[U + YUVRGB_TABLE_HEADROOM];
-
-            if (hasAlpha) {
-                A1 = (abuf0[i * 2    ] + 64) >> 7;
-                A2 = (abuf0[i * 2 + 1] + 64) >> 7;
-                A1 = av_clip_uint8(A1);
-                A2 = av_clip_uint8(A2);
-            }
-
-            yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
-                          r, g, b, y, target, hasAlpha);
-        }
-    }
-}
-
-#define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
-static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
-                                const int16_t **lumSrc, int lumFilterSize, \
-                                const int16_t *chrFilter, const int16_t **chrUSrc, \
-                                const int16_t **chrVSrc, int chrFilterSize, \
-                                const int16_t **alpSrc, uint8_t *dest, int dstW, \
-                                int y) \
-{ \
-    name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
-                                  chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
-                                  alpSrc, dest, dstW, y, fmt, hasAlpha); \
-}
-
-#define YUV2RGBWRAPPERX2(name, base, ext, fmt, hasAlpha) \
-YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
-static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
-                                const int16_t *ubuf[2], const int16_t *vbuf[2], \
-                                const int16_t *abuf[2], uint8_t *dest, int dstW, \
-                                int yalpha, int uvalpha, int y) \
-{ \
-    name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
-                                  dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
-}
-
-#define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
-YUV2RGBWRAPPERX2(name, base, ext, fmt, hasAlpha) \
-static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
-                                const int16_t *ubuf[2], const int16_t *vbuf[2], \
-                                const int16_t *abuf0, uint8_t *dest, int dstW, \
-                                int uvalpha, int y) \
-{ \
-    name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
-                                  dstW, uvalpha, y, fmt, hasAlpha); \
-}
-
-#if CONFIG_SMALL
-YUV2RGBWRAPPER(yuv2rgb,,  32_1,  AV_PIX_FMT_RGB32_1,   CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
-YUV2RGBWRAPPER(yuv2rgb,,  32,    AV_PIX_FMT_RGB32,     CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
-#else
-#if CONFIG_SWSCALE_ALPHA
-YUV2RGBWRAPPER(yuv2rgb,, a32_1,  AV_PIX_FMT_RGB32_1,   1)
-YUV2RGBWRAPPER(yuv2rgb,, a32,    AV_PIX_FMT_RGB32,     1)
-#endif
-YUV2RGBWRAPPER(yuv2rgb,, x32_1,  AV_PIX_FMT_RGB32_1,   0)
-YUV2RGBWRAPPER(yuv2rgb,, x32,    AV_PIX_FMT_RGB32,     0)
-#endif
-YUV2RGBWRAPPER(yuv2, rgb, rgb24, AV_PIX_FMT_RGB24,   0)
-YUV2RGBWRAPPER(yuv2, rgb, bgr24, AV_PIX_FMT_BGR24,   0)
-YUV2RGBWRAPPER(yuv2rgb,,  16,    AV_PIX_FMT_RGB565,    0)
-YUV2RGBWRAPPER(yuv2rgb,,  15,    AV_PIX_FMT_RGB555,    0)
-YUV2RGBWRAPPER(yuv2rgb,,  12,    AV_PIX_FMT_RGB444,    0)
-YUV2RGBWRAPPER(yuv2rgb,,   8,    AV_PIX_FMT_RGB8,      0)
-YUV2RGBWRAPPER(yuv2rgb,,   4,    AV_PIX_FMT_RGB4,      0)
-YUV2RGBWRAPPER(yuv2rgb,,   4b,   AV_PIX_FMT_RGB4_BYTE, 0)
-
-static av_always_inline void yuv2rgb_write_full(SwsContext *c,
-    uint8_t *dest, int i, int Y, int A, int U, int V,
-    int y, enum AVPixelFormat target, int hasAlpha, int err[4])
-{
-    int R, G, B;
-    int isrgb8 = target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8;
-
-    Y -= c->yuv2rgb_y_offset;
-    Y *= c->yuv2rgb_y_coeff;
-    Y += 1 << 21;
-    R = Y + V*c->yuv2rgb_v2r_coeff;
-    G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
-    B = Y +                          U*c->yuv2rgb_u2b_coeff;
-    if ((R | G | B) & 0xC0000000) {
-        R = av_clip_uintp2(R, 30);
-        G = av_clip_uintp2(G, 30);
-        B = av_clip_uintp2(B, 30);
-    }
-
-    switch(target) {
-    case AV_PIX_FMT_ARGB:
-        dest[0] = hasAlpha ? A : 255;
-        dest[1] = R >> 22;
-        dest[2] = G >> 22;
-        dest[3] = B >> 22;
-        break;
-    case AV_PIX_FMT_RGB24:
-        dest[0] = R >> 22;
-        dest[1] = G >> 22;
-        dest[2] = B >> 22;
-        break;
-    case AV_PIX_FMT_RGBA:
-        dest[0] = R >> 22;
-        dest[1] = G >> 22;
-        dest[2] = B >> 22;
-        dest[3] = hasAlpha ? A : 255;
-        break;
-    case AV_PIX_FMT_ABGR:
-        dest[0] = hasAlpha ? A : 255;
-        dest[1] = B >> 22;
-        dest[2] = G >> 22;
-        dest[3] = R >> 22;
-        break;
-    case AV_PIX_FMT_BGR24:
-        dest[0] = B >> 22;
-        dest[1] = G >> 22;
-        dest[2] = R >> 22;
-        break;
-    case AV_PIX_FMT_BGRA:
-        dest[0] = B >> 22;
-        dest[1] = G >> 22;
-        dest[2] = R >> 22;
-        dest[3] = hasAlpha ? A : 255;
-        break;
-    case AV_PIX_FMT_BGR4_BYTE:
-    case AV_PIX_FMT_RGB4_BYTE:
-    case AV_PIX_FMT_BGR8:
-    case AV_PIX_FMT_RGB8:
-    {
-        int r,g,b;
-        R >>= 22;
-        G >>= 22;
-        B >>= 22;
-        R += (7*err[0] + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2])>>4;
-        G += (7*err[1] + 1*c->dither_error[1][i] + 5*c->dither_error[1][i+1] + 3*c->dither_error[1][i+2])>>4;
-        B += (7*err[2] + 1*c->dither_error[2][i] + 5*c->dither_error[2][i+1] + 3*c->dither_error[2][i+2])>>4;
-        c->dither_error[0][i] = err[0];
-        c->dither_error[1][i] = err[1];
-        c->dither_error[2][i] = err[2];
-        r = R >> (isrgb8 ? 5 : 7);
-        g = G >> (isrgb8 ? 5 : 6);
-        b = B >> (isrgb8 ? 6 : 7);
-        r = av_clip(r, 0, isrgb8 ? 7 : 1);
-        g = av_clip(g, 0, isrgb8 ? 7 : 3);
-        b = av_clip(b, 0, isrgb8 ? 3 : 1);
-        err[0] = R - r*(isrgb8 ? 36 : 255);
-        err[1] = G - g*(isrgb8 ? 36 : 85);
-        err[2] = B - b*(isrgb8 ? 85 : 255);
-        if(target == AV_PIX_FMT_BGR4_BYTE) {
-            dest[0] = r + 2*g + 8*b;
-        } else if(target == AV_PIX_FMT_RGB4_BYTE) {
-            dest[0] = b + 2*g + 8*r;
-        } else if(target == AV_PIX_FMT_BGR8) {
-            dest[0] = r + 8*g + 64*b;
-        } else if(target == AV_PIX_FMT_RGB8) {
-            dest[0] = b + 4*g + 32*r;
-        } else
-            av_assert2(0);
-        break;}
-    }
-}
-
-static av_always_inline void
-yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
-                          const int16_t **lumSrc, int lumFilterSize,
-                          const int16_t *chrFilter, const int16_t **chrUSrc,
-                          const int16_t **chrVSrc, int chrFilterSize,
-                          const int16_t **alpSrc, uint8_t *dest,
-                          int dstW, int y, enum AVPixelFormat target, int hasAlpha)
-{
-    int i;
-    int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4;
-    int err[4] = {0};
-    int A = 0; //init to silence warning
-
-    if(   target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
-       || target == AV_PIX_FMT_BGR8      || target == AV_PIX_FMT_RGB8)
-        step = 1;
-
-    for (i = 0; i < dstW; i++) {
-        int j;
-        int Y = 1<<9;
-        int U = (1<<9)-(128 << 19);
-        int V = (1<<9)-(128 << 19);
-
-        for (j = 0; j < lumFilterSize; j++) {
-            Y += lumSrc[j][i] * lumFilter[j];
-        }
-        for (j = 0; j < chrFilterSize; j++) {
-            U += chrUSrc[j][i] * chrFilter[j];
-            V += chrVSrc[j][i] * chrFilter[j];
-        }
-        Y >>= 10;
-        U >>= 10;
-        V >>= 10;
-        if (hasAlpha) {
-            A = 1 << 18;
-            for (j = 0; j < lumFilterSize; j++) {
-                A += alpSrc[j][i] * lumFilter[j];
-            }
-            A >>= 19;
-            if (A & 0x100)
-                A = av_clip_uint8(A);
-        }
-        yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
-        dest += step;
-    }
-    c->dither_error[0][i] = err[0];
-    c->dither_error[1][i] = err[1];
-    c->dither_error[2][i] = err[2];
-}
-
-static av_always_inline void
-yuv2rgb_full_2_c_template(SwsContext *c, const int16_t *buf[2],
-                     const int16_t *ubuf[2], const int16_t *vbuf[2],
-                     const int16_t *abuf[2], uint8_t *dest, int dstW,
-                     int yalpha, int uvalpha, int y,
-                     enum AVPixelFormat target, int hasAlpha)
-{
-    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
-                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
-                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
-                  *abuf0 = hasAlpha ? abuf[0] : NULL,
-                  *abuf1 = hasAlpha ? abuf[1] : NULL;
-    int  yalpha1 = 4096 - yalpha;
-    int uvalpha1 = 4096 - uvalpha;
-    int i;
-    int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4;
-    int err[4] = {0};
-    int A = 0; // init to silcene warning
-
-    if(   target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
-       || target == AV_PIX_FMT_BGR8      || target == AV_PIX_FMT_RGB8)
-        step = 1;
-
-    for (i = 0; i < dstW; i++) {
-        int Y = ( buf0[i] * yalpha1  +  buf1[i] * yalpha             ) >> 10; //FIXME rounding
-        int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha-(128 << 19)) >> 10;
-        int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha-(128 << 19)) >> 10;
-
-        if (hasAlpha) {
-            A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha + (1<<18)) >> 19;
-            if (A & 0x100)
-                A = av_clip_uint8(A);
-        }
-
-        yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
-        dest += step;
-    }
-    c->dither_error[0][i] = err[0];
-    c->dither_error[1][i] = err[1];
-    c->dither_error[2][i] = err[2];
-}
-
-static av_always_inline void
-yuv2rgb_full_1_c_template(SwsContext *c, const int16_t *buf0,
-                     const int16_t *ubuf[2], const int16_t *vbuf[2],
-                     const int16_t *abuf0, uint8_t *dest, int dstW,
-                     int uvalpha, int y, enum AVPixelFormat target,
-                     int hasAlpha)
-{
-    const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
-    int i;
-    int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4;
-    int err[4] = {0};
-
-    if(   target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
-       || target == AV_PIX_FMT_BGR8      || target == AV_PIX_FMT_RGB8)
-        step = 1;
-
-    if (uvalpha < 2048) {
-        int A = 0; //init to silence warning
-        for (i = 0; i < dstW; i++) {
-            int Y = buf0[i] << 2;
-            int U = (ubuf0[i] - (128<<7)) << 2;
-            int V = (vbuf0[i] - (128<<7)) << 2;
-
-            if (hasAlpha) {
-                A = (abuf0[i] + 64) >> 7;
-                if (A & 0x100)
-                    A = av_clip_uint8(A);
-            }
-
-            yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
-            dest += step;
-        }
-    } else {
-        const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
-        int A = 0; //init to silence warning
-        for (i = 0; i < dstW; i++) {
-            int Y = buf0[i] << 2;
-            int U = (ubuf0[i] + ubuf1[i] - (128<<8)) << 1;
-            int V = (vbuf0[i] + vbuf1[i] - (128<<8)) << 1;
-
-            if (hasAlpha) {
-                A = (abuf0[i] + 64) >> 7;
-                if (A & 0x100)
-                    A = av_clip_uint8(A);
-            }
-
-            yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
-            dest += step;
-        }
-    }
-
-    c->dither_error[0][i] = err[0];
-    c->dither_error[1][i] = err[1];
-    c->dither_error[2][i] = err[2];
-}
-
-#if CONFIG_SMALL
-YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
-YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
-YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
-YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
-#else
-#if CONFIG_SWSCALE_ALPHA
-YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA,  1)
-YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR,  1)
-YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA,  1)
-YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB,  1)
-#endif
-YUV2RGBWRAPPER(yuv2, rgb_full, bgrx32_full, AV_PIX_FMT_BGRA,  0)
-YUV2RGBWRAPPER(yuv2, rgb_full, xbgr32_full, AV_PIX_FMT_ABGR,  0)
-YUV2RGBWRAPPER(yuv2, rgb_full, rgbx32_full, AV_PIX_FMT_RGBA,  0)
-YUV2RGBWRAPPER(yuv2, rgb_full, xrgb32_full, AV_PIX_FMT_ARGB,  0)
-#endif
-YUV2RGBWRAPPER(yuv2, rgb_full, bgr24_full,  AV_PIX_FMT_BGR24, 0)
-YUV2RGBWRAPPER(yuv2, rgb_full, rgb24_full,  AV_PIX_FMT_RGB24, 0)
-
-YUV2RGBWRAPPER(yuv2, rgb_full, bgr4_byte_full,  AV_PIX_FMT_BGR4_BYTE, 0)
-YUV2RGBWRAPPER(yuv2, rgb_full, rgb4_byte_full,  AV_PIX_FMT_RGB4_BYTE, 0)
-YUV2RGBWRAPPER(yuv2, rgb_full, bgr8_full,   AV_PIX_FMT_BGR8,  0)
-YUV2RGBWRAPPER(yuv2, rgb_full, rgb8_full,   AV_PIX_FMT_RGB8,  0)
-
-static void
-yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter,
-                  const int16_t **lumSrc, int lumFilterSize,
-                  const int16_t *chrFilter, const int16_t **chrUSrc,
-                  const int16_t **chrVSrc, int chrFilterSize,
-                  const int16_t **alpSrc, uint8_t **dest,
-                  int dstW, int y)
-{
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
-    int i;
-    int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrc;
-    uint16_t **dest16 = (uint16_t**)dest;
-    int SH = 22 + 7 - desc->comp[0].depth_minus1;
-    int A = 0; // init to silence warning
-
-    for (i = 0; i < dstW; i++) {
-        int j;
-        int Y = 1 << 9;
-        int U = (1 << 9) - (128 << 19);
-        int V = (1 << 9) - (128 << 19);
-        int R, G, B;
-
-        for (j = 0; j < lumFilterSize; j++)
-            Y += lumSrc[j][i] * lumFilter[j];
-
-        for (j = 0; j < chrFilterSize; j++) {
-            U += chrUSrc[j][i] * chrFilter[j];
-            V += chrVSrc[j][i] * chrFilter[j];
-        }
-
-        Y >>= 10;
-        U >>= 10;
-        V >>= 10;
-
-        if (hasAlpha) {
-            A = 1 << 18;
-
-            for (j = 0; j < lumFilterSize; j++)
-                A += alpSrc[j][i] * lumFilter[j];
-
-            A >>= 19;
-
-            if (A & 0x100)
-                A = av_clip_uint8(A);
-        }
-
-        Y -= c->yuv2rgb_y_offset;
-        Y *= c->yuv2rgb_y_coeff;
-        Y += 1 << 21;
-        R = Y + V * c->yuv2rgb_v2r_coeff;
-        G = Y + V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
-        B = Y +                            U * c->yuv2rgb_u2b_coeff;
-
-        if ((R | G | B) & 0xC0000000) {
-            R = av_clip_uintp2(R, 30);
-            G = av_clip_uintp2(G, 30);
-            B = av_clip_uintp2(B, 30);
-        }
-
-        if (SH != 22) {
-            dest16[0][i] = G >> SH;
-            dest16[1][i] = B >> SH;
-            dest16[2][i] = R >> SH;
-            if (hasAlpha)
-                dest16[3][i] = A;
-        } else {
-            dest[0][i] = G >> 22;
-            dest[1][i] = B >> 22;
-            dest[2][i] = R >> 22;
-            if (hasAlpha)
-                dest[3][i] = A;
-        }
-    }
-    if (SH != 22 && (!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
-        for (i = 0; i < dstW; i++) {
-            dest16[0][i] = av_bswap16(dest16[0][i]);
-            dest16[1][i] = av_bswap16(dest16[1][i]);
-            dest16[2][i] = av_bswap16(dest16[2][i]);
-            if (hasAlpha)
-                dest16[3][i] = av_bswap16(dest16[3][i]);
-        }
-    }
-}
-
-av_cold void ff_sws_init_output_funcs(SwsContext *c,
-                                      yuv2planar1_fn *yuv2plane1,
-                                      yuv2planarX_fn *yuv2planeX,
-                                      yuv2interleavedX_fn *yuv2nv12cX,
-                                      yuv2packed1_fn *yuv2packed1,
-                                      yuv2packed2_fn *yuv2packed2,
-                                      yuv2packedX_fn *yuv2packedX,
-                                      yuv2anyX_fn *yuv2anyX)
-{
-    enum AVPixelFormat dstFormat = c->dstFormat;
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
-
-    if (is16BPS(dstFormat)) {
-        *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c  : yuv2planeX_16LE_c;
-        *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c  : yuv2plane1_16LE_c;
-    } else if (is9_OR_10BPS(dstFormat)) {
-        if (desc->comp[0].depth_minus1 == 8) {
-            *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c  : yuv2planeX_9LE_c;
-            *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c  : yuv2plane1_9LE_c;
-        } else if (desc->comp[0].depth_minus1 == 9) {
-            *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c  : yuv2planeX_10LE_c;
-            *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c  : yuv2plane1_10LE_c;
-        } else if (desc->comp[0].depth_minus1 == 11) {
-            *yuv2planeX = isBE(dstFormat) ? yuv2planeX_12BE_c  : yuv2planeX_12LE_c;
-            *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_12BE_c  : yuv2plane1_12LE_c;
-        } else if (desc->comp[0].depth_minus1 == 13) {
-            *yuv2planeX = isBE(dstFormat) ? yuv2planeX_14BE_c  : yuv2planeX_14LE_c;
-            *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_14BE_c  : yuv2plane1_14LE_c;
-        } else
-            av_assert0(0);
-    } else {
-        *yuv2plane1 = yuv2plane1_8_c;
-        *yuv2planeX = yuv2planeX_8_c;
-        if (dstFormat == AV_PIX_FMT_NV12 || dstFormat == AV_PIX_FMT_NV21)
-            *yuv2nv12cX = yuv2nv12cX_c;
-    }
-
-    if(c->flags & SWS_FULL_CHR_H_INT) {
-        switch (dstFormat) {
-            case AV_PIX_FMT_RGBA:
-#if CONFIG_SMALL
-                *yuv2packedX = yuv2rgba32_full_X_c;
-                *yuv2packed2 = yuv2rgba32_full_2_c;
-                *yuv2packed1 = yuv2rgba32_full_1_c;
-#else
-#if CONFIG_SWSCALE_ALPHA
-                if (c->alpPixBuf) {
-                    *yuv2packedX = yuv2rgba32_full_X_c;
-                    *yuv2packed2 = yuv2rgba32_full_2_c;
-                    *yuv2packed1 = yuv2rgba32_full_1_c;
-                } else
-#endif /* CONFIG_SWSCALE_ALPHA */
-                {
-                    *yuv2packedX = yuv2rgbx32_full_X_c;
-                    *yuv2packed2 = yuv2rgbx32_full_2_c;
-                    *yuv2packed1 = yuv2rgbx32_full_1_c;
-                }
-#endif /* !CONFIG_SMALL */
-                break;
-            case AV_PIX_FMT_ARGB:
-#if CONFIG_SMALL
-                *yuv2packedX = yuv2argb32_full_X_c;
-                *yuv2packed2 = yuv2argb32_full_2_c;
-                *yuv2packed1 = yuv2argb32_full_1_c;
-#else
-#if CONFIG_SWSCALE_ALPHA
-                if (c->alpPixBuf) {
-                    *yuv2packedX = yuv2argb32_full_X_c;
-                    *yuv2packed2 = yuv2argb32_full_2_c;
-                    *yuv2packed1 = yuv2argb32_full_1_c;
-                } else
-#endif /* CONFIG_SWSCALE_ALPHA */
-                {
-                    *yuv2packedX = yuv2xrgb32_full_X_c;
-                    *yuv2packed2 = yuv2xrgb32_full_2_c;
-                    *yuv2packed1 = yuv2xrgb32_full_1_c;
-                }
-#endif /* !CONFIG_SMALL */
-                break;
-            case AV_PIX_FMT_BGRA:
-#if CONFIG_SMALL
-                *yuv2packedX = yuv2bgra32_full_X_c;
-                *yuv2packed2 = yuv2bgra32_full_2_c;
-                *yuv2packed1 = yuv2bgra32_full_1_c;
-#else
-#if CONFIG_SWSCALE_ALPHA
-                if (c->alpPixBuf) {
-                    *yuv2packedX = yuv2bgra32_full_X_c;
-                    *yuv2packed2 = yuv2bgra32_full_2_c;
-                    *yuv2packed1 = yuv2bgra32_full_1_c;
-                } else
-#endif /* CONFIG_SWSCALE_ALPHA */
-                {
-                    *yuv2packedX = yuv2bgrx32_full_X_c;
-                    *yuv2packed2 = yuv2bgrx32_full_2_c;
-                    *yuv2packed1 = yuv2bgrx32_full_1_c;
-                }
-#endif /* !CONFIG_SMALL */
-                break;
-            case AV_PIX_FMT_ABGR:
-#if CONFIG_SMALL
-                *yuv2packedX = yuv2abgr32_full_X_c;
-                *yuv2packed2 = yuv2abgr32_full_2_c;
-                *yuv2packed1 = yuv2abgr32_full_1_c;
-#else
-#if CONFIG_SWSCALE_ALPHA
-                if (c->alpPixBuf) {
-                    *yuv2packedX = yuv2abgr32_full_X_c;
-                    *yuv2packed2 = yuv2abgr32_full_2_c;
-                    *yuv2packed1 = yuv2abgr32_full_1_c;
-                } else
-#endif /* CONFIG_SWSCALE_ALPHA */
-                {
-                    *yuv2packedX = yuv2xbgr32_full_X_c;
-                    *yuv2packed2 = yuv2xbgr32_full_2_c;
-                    *yuv2packed1 = yuv2xbgr32_full_1_c;
-                }
-#endif /* !CONFIG_SMALL */
-                break;
-            case AV_PIX_FMT_RGB24:
-            *yuv2packedX = yuv2rgb24_full_X_c;
-            *yuv2packed2 = yuv2rgb24_full_2_c;
-            *yuv2packed1 = yuv2rgb24_full_1_c;
-            break;
-        case AV_PIX_FMT_BGR24:
-            *yuv2packedX = yuv2bgr24_full_X_c;
-            *yuv2packed2 = yuv2bgr24_full_2_c;
-            *yuv2packed1 = yuv2bgr24_full_1_c;
-            break;
-        case AV_PIX_FMT_BGR4_BYTE:
-            *yuv2packedX = yuv2bgr4_byte_full_X_c;
-            *yuv2packed2 = yuv2bgr4_byte_full_2_c;
-            *yuv2packed1 = yuv2bgr4_byte_full_1_c;
-            break;
-        case AV_PIX_FMT_RGB4_BYTE:
-            *yuv2packedX = yuv2rgb4_byte_full_X_c;
-            *yuv2packed2 = yuv2rgb4_byte_full_2_c;
-            *yuv2packed1 = yuv2rgb4_byte_full_1_c;
-            break;
-        case AV_PIX_FMT_BGR8:
-            *yuv2packedX = yuv2bgr8_full_X_c;
-            *yuv2packed2 = yuv2bgr8_full_2_c;
-            *yuv2packed1 = yuv2bgr8_full_1_c;
-            break;
-        case AV_PIX_FMT_RGB8:
-            *yuv2packedX = yuv2rgb8_full_X_c;
-            *yuv2packed2 = yuv2rgb8_full_2_c;
-            *yuv2packed1 = yuv2rgb8_full_1_c;
-            break;
-        case AV_PIX_FMT_GBRP:
-        case AV_PIX_FMT_GBRP9BE:
-        case AV_PIX_FMT_GBRP9LE:
-        case AV_PIX_FMT_GBRP10BE:
-        case AV_PIX_FMT_GBRP10LE:
-        case AV_PIX_FMT_GBRP12BE:
-        case AV_PIX_FMT_GBRP12LE:
-        case AV_PIX_FMT_GBRP14BE:
-        case AV_PIX_FMT_GBRP14LE:
-        case AV_PIX_FMT_GBRP16BE:
-        case AV_PIX_FMT_GBRP16LE:
-        case AV_PIX_FMT_GBRAP:
-            *yuv2anyX = yuv2gbrp_full_X_c;
-            break;
-        }
-        if (!*yuv2packedX && !*yuv2anyX)
-            goto YUV_PACKED;
-    } else {
-        YUV_PACKED:
-        switch (dstFormat) {
-        case AV_PIX_FMT_RGBA64LE:
-#if CONFIG_SWSCALE_ALPHA
-            if (c->alpPixBuf) {
-                *yuv2packed1 = yuv2rgba64le_1_c;
-                *yuv2packed2 = yuv2rgba64le_2_c;
-                *yuv2packedX = yuv2rgba64le_X_c;
-            } else
-#endif /* CONFIG_SWSCALE_ALPHA */
-            {
-                *yuv2packed1 = yuv2rgbx64le_1_c;
-                *yuv2packed2 = yuv2rgbx64le_2_c;
-                *yuv2packedX = yuv2rgbx64le_X_c;
-            }
-            break;
-        case AV_PIX_FMT_RGBA64BE:
-#if CONFIG_SWSCALE_ALPHA
-            if (c->alpPixBuf) {
-                *yuv2packed1 = yuv2rgba64be_1_c;
-                *yuv2packed2 = yuv2rgba64be_2_c;
-                *yuv2packedX = yuv2rgba64be_X_c;
-            } else
-#endif /* CONFIG_SWSCALE_ALPHA */
-            {
-                *yuv2packed1 = yuv2rgbx64be_1_c;
-                *yuv2packed2 = yuv2rgbx64be_2_c;
-                *yuv2packedX = yuv2rgbx64be_X_c;
-            }
-            break;
-        case AV_PIX_FMT_RGB48LE:
-            *yuv2packed1 = yuv2rgb48le_1_c;
-            *yuv2packed2 = yuv2rgb48le_2_c;
-            *yuv2packedX = yuv2rgb48le_X_c;
-            break;
-        case AV_PIX_FMT_RGB48BE:
-            *yuv2packed1 = yuv2rgb48be_1_c;
-            *yuv2packed2 = yuv2rgb48be_2_c;
-            *yuv2packedX = yuv2rgb48be_X_c;
-            break;
-        case AV_PIX_FMT_BGR48LE:
-            *yuv2packed1 = yuv2bgr48le_1_c;
-            *yuv2packed2 = yuv2bgr48le_2_c;
-            *yuv2packedX = yuv2bgr48le_X_c;
-            break;
-        case AV_PIX_FMT_BGR48BE:
-            *yuv2packed1 = yuv2bgr48be_1_c;
-            *yuv2packed2 = yuv2bgr48be_2_c;
-            *yuv2packedX = yuv2bgr48be_X_c;
-            break;
-        case AV_PIX_FMT_RGB32:
-        case AV_PIX_FMT_BGR32:
-#if CONFIG_SMALL
-            *yuv2packed1 = yuv2rgb32_1_c;
-            *yuv2packed2 = yuv2rgb32_2_c;
-            *yuv2packedX = yuv2rgb32_X_c;
-#else
-#if CONFIG_SWSCALE_ALPHA
-                if (c->alpPixBuf) {
-                    *yuv2packed1 = yuv2rgba32_1_c;
-                    *yuv2packed2 = yuv2rgba32_2_c;
-                    *yuv2packedX = yuv2rgba32_X_c;
-                } else
-#endif /* CONFIG_SWSCALE_ALPHA */
-                {
-                    *yuv2packed1 = yuv2rgbx32_1_c;
-                    *yuv2packed2 = yuv2rgbx32_2_c;
-                    *yuv2packedX = yuv2rgbx32_X_c;
-                }
-#endif /* !CONFIG_SMALL */
-            break;
-        case AV_PIX_FMT_RGB32_1:
-        case AV_PIX_FMT_BGR32_1:
-#if CONFIG_SMALL
-                *yuv2packed1 = yuv2rgb32_1_1_c;
-                *yuv2packed2 = yuv2rgb32_1_2_c;
-                *yuv2packedX = yuv2rgb32_1_X_c;
-#else
-#if CONFIG_SWSCALE_ALPHA
-                if (c->alpPixBuf) {
-                    *yuv2packed1 = yuv2rgba32_1_1_c;
-                    *yuv2packed2 = yuv2rgba32_1_2_c;
-                    *yuv2packedX = yuv2rgba32_1_X_c;
-                } else
-#endif /* CONFIG_SWSCALE_ALPHA */
-                {
-                    *yuv2packed1 = yuv2rgbx32_1_1_c;
-                    *yuv2packed2 = yuv2rgbx32_1_2_c;
-                    *yuv2packedX = yuv2rgbx32_1_X_c;
-                }
-#endif /* !CONFIG_SMALL */
-                break;
-        case AV_PIX_FMT_RGB24:
-            *yuv2packed1 = yuv2rgb24_1_c;
-            *yuv2packed2 = yuv2rgb24_2_c;
-            *yuv2packedX = yuv2rgb24_X_c;
-            break;
-        case AV_PIX_FMT_BGR24:
-            *yuv2packed1 = yuv2bgr24_1_c;
-            *yuv2packed2 = yuv2bgr24_2_c;
-            *yuv2packedX = yuv2bgr24_X_c;
-            break;
-        case AV_PIX_FMT_RGB565LE:
-        case AV_PIX_FMT_RGB565BE:
-        case AV_PIX_FMT_BGR565LE:
-        case AV_PIX_FMT_BGR565BE:
-            *yuv2packed1 = yuv2rgb16_1_c;
-            *yuv2packed2 = yuv2rgb16_2_c;
-            *yuv2packedX = yuv2rgb16_X_c;
-            break;
-        case AV_PIX_FMT_RGB555LE:
-        case AV_PIX_FMT_RGB555BE:
-        case AV_PIX_FMT_BGR555LE:
-        case AV_PIX_FMT_BGR555BE:
-            *yuv2packed1 = yuv2rgb15_1_c;
-            *yuv2packed2 = yuv2rgb15_2_c;
-            *yuv2packedX = yuv2rgb15_X_c;
-            break;
-        case AV_PIX_FMT_RGB444LE:
-        case AV_PIX_FMT_RGB444BE:
-        case AV_PIX_FMT_BGR444LE:
-        case AV_PIX_FMT_BGR444BE:
-            *yuv2packed1 = yuv2rgb12_1_c;
-            *yuv2packed2 = yuv2rgb12_2_c;
-            *yuv2packedX = yuv2rgb12_X_c;
-            break;
-        case AV_PIX_FMT_RGB8:
-        case AV_PIX_FMT_BGR8:
-            *yuv2packed1 = yuv2rgb8_1_c;
-            *yuv2packed2 = yuv2rgb8_2_c;
-            *yuv2packedX = yuv2rgb8_X_c;
-            break;
-        case AV_PIX_FMT_RGB4:
-        case AV_PIX_FMT_BGR4:
-            *yuv2packed1 = yuv2rgb4_1_c;
-            *yuv2packed2 = yuv2rgb4_2_c;
-            *yuv2packedX = yuv2rgb4_X_c;
-            break;
-        case AV_PIX_FMT_RGB4_BYTE:
-        case AV_PIX_FMT_BGR4_BYTE:
-            *yuv2packed1 = yuv2rgb4b_1_c;
-            *yuv2packed2 = yuv2rgb4b_2_c;
-            *yuv2packedX = yuv2rgb4b_X_c;
-            break;
-        }
-    }
-    switch (dstFormat) {
-    case AV_PIX_FMT_MONOWHITE:
-        *yuv2packed1 = yuv2monowhite_1_c;
-        *yuv2packed2 = yuv2monowhite_2_c;
-        *yuv2packedX = yuv2monowhite_X_c;
-        break;
-    case AV_PIX_FMT_MONOBLACK:
-        *yuv2packed1 = yuv2monoblack_1_c;
-        *yuv2packed2 = yuv2monoblack_2_c;
-        *yuv2packedX = yuv2monoblack_X_c;
-        break;
-    case AV_PIX_FMT_YUYV422:
-        *yuv2packed1 = yuv2yuyv422_1_c;
-        *yuv2packed2 = yuv2yuyv422_2_c;
-        *yuv2packedX = yuv2yuyv422_X_c;
-        break;
-    case AV_PIX_FMT_UYVY422:
-        *yuv2packed1 = yuv2uyvy422_1_c;
-        *yuv2packed2 = yuv2uyvy422_2_c;
-        *yuv2packedX = yuv2uyvy422_X_c;
-        break;
-    }
-}
diff --git a/ffmpeg/libswscale/ppc/Makefile b/ffmpeg/libswscale/ppc/Makefile
deleted file mode 100644
index d1b596e..0000000
--- a/ffmpeg/libswscale/ppc/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-OBJS += ppc/swscale_altivec.o                                           \
-        ppc/yuv2rgb_altivec.o                                           \
-        ppc/yuv2yuv_altivec.o                                           \
diff --git a/ffmpeg/libswscale/ppc/swscale_altivec.c b/ffmpeg/libswscale/ppc/swscale_altivec.c
deleted file mode 100644
index 86f40ab..0000000
--- a/ffmpeg/libswscale/ppc/swscale_altivec.c
+++ /dev/null
@@ -1,332 +0,0 @@
-/*
- * AltiVec-enhanced yuv2yuvX
- *
- * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
- * based on the equivalent C code in swscale.c
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <inttypes.h>
-
-#include "config.h"
-#include "libswscale/swscale.h"
-#include "libswscale/swscale_internal.h"
-#include "libavutil/attributes.h"
-#include "libavutil/cpu.h"
-#include "yuv2rgb_altivec.h"
-
-#if HAVE_ALTIVEC
-#define vzero vec_splat_s32(0)
-
-#define yuv2planeX_8(d1, d2, l1, src, x, perm, filter) do {     \
-        vector signed short l2  = vec_ld(((x) << 1) + 16, src); \
-        vector signed short ls  = vec_perm(l1, l2, perm);       \
-        vector signed int   i1  = vec_mule(filter, ls);         \
-        vector signed int   i2  = vec_mulo(filter, ls);         \
-        vector signed int   vf1 = vec_mergeh(i1, i2);           \
-        vector signed int   vf2 = vec_mergel(i1, i2);           \
-        d1 = vec_add(d1, vf1);                                  \
-        d2 = vec_add(d2, vf2);                                  \
-        l1 = l2;                                                \
-    } while (0)
-
-static void yuv2planeX_16_altivec(const int16_t *filter, int filterSize,
-                                  const int16_t **src, uint8_t *dest,
-                                  const uint8_t *dither, int offset, int x)
-{
-    register int i, j;
-    DECLARE_ALIGNED(16, int, val)[16];
-    vector signed int vo1, vo2, vo3, vo4;
-    vector unsigned short vs1, vs2;
-    vector unsigned char vf;
-    vector unsigned int altivec_vectorShiftInt19 =
-        vec_add(vec_splat_u32(10), vec_splat_u32(9));
-
-    for (i = 0; i < 16; i++)
-        val[i] = dither[(x + i + offset) & 7] << 12;
-
-    vo1 = vec_ld(0,  val);
-    vo2 = vec_ld(16, val);
-    vo3 = vec_ld(32, val);
-    vo4 = vec_ld(48, val);
-
-    for (j = 0; j < filterSize; j++) {
-        vector signed short l1, vLumFilter = vec_ld(j << 1, filter);
-        vector unsigned char perm, perm0 = vec_lvsl(j << 1, filter);
-        vLumFilter = vec_perm(vLumFilter, vLumFilter, perm0);
-        vLumFilter = vec_splat(vLumFilter, 0); // lumFilter[j] is loaded 8 times in vLumFilter
-
-        perm = vec_lvsl(x << 1, src[j]);
-        l1   = vec_ld(x << 1, src[j]);
-
-        yuv2planeX_8(vo1, vo2, l1, src[j], x,     perm, vLumFilter);
-        yuv2planeX_8(vo3, vo4, l1, src[j], x + 8, perm, vLumFilter);
-    }
-
-    vo1 = vec_sra(vo1, altivec_vectorShiftInt19);
-    vo2 = vec_sra(vo2, altivec_vectorShiftInt19);
-    vo3 = vec_sra(vo3, altivec_vectorShiftInt19);
-    vo4 = vec_sra(vo4, altivec_vectorShiftInt19);
-    vs1 = vec_packsu(vo1, vo2);
-    vs2 = vec_packsu(vo3, vo4);
-    vf  = vec_packsu(vs1, vs2);
-    vec_st(vf, 0, dest);
-}
-
-static inline void yuv2planeX_u(const int16_t *filter, int filterSize,
-                                const int16_t **src, uint8_t *dest, int dstW,
-                                const uint8_t *dither, int offset, int x)
-{
-    int i, j;
-
-    for (i = x; i < dstW; i++) {
-        int t = dither[(i + offset) & 7] << 12;
-        for (j = 0; j < filterSize; j++)
-            t += src[j][i] * filter[j];
-        dest[i] = av_clip_uint8(t >> 19);
-    }
-}
-
-static void yuv2planeX_altivec(const int16_t *filter, int filterSize,
-                               const int16_t **src, uint8_t *dest, int dstW,
-                               const uint8_t *dither, int offset)
-{
-    int dst_u = -(uintptr_t)dest & 15;
-    int i;
-
-    yuv2planeX_u(filter, filterSize, src, dest, dst_u, dither, offset, 0);
-
-    for (i = dst_u; i < dstW - 15; i += 16)
-        yuv2planeX_16_altivec(filter, filterSize, src, dest + i, dither,
-                              offset, i);
-
-    yuv2planeX_u(filter, filterSize, src, dest, dstW, dither, offset, i);
-}
-
-static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW,
-                                const uint8_t *src, const int16_t *filter,
-                                const int32_t *filterPos, int filterSize)
-{
-    register int i;
-    DECLARE_ALIGNED(16, int, tempo)[4];
-
-    if (filterSize % 4) {
-        for (i = 0; i < dstW; i++) {
-            register int j;
-            register int srcPos = filterPos[i];
-            register int val    = 0;
-            for (j = 0; j < filterSize; j++)
-                val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
-            dst[i] = FFMIN(val >> 7, (1 << 15) - 1);
-        }
-    } else
-        switch (filterSize) {
-        case 4:
-            for (i = 0; i < dstW; i++) {
-                register int srcPos = filterPos[i];
-
-                vector unsigned char src_v0 = vec_ld(srcPos, src);
-                vector unsigned char src_v1, src_vF;
-                vector signed short src_v, filter_v;
-                vector signed int val_vEven, val_s;
-                if ((((uintptr_t)src + srcPos) % 16) > 12) {
-                    src_v1 = vec_ld(srcPos + 16, src);
-                }
-                src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src));
-
-                src_v = // vec_unpackh sign-extends...
-                        (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
-                // now put our elements in the even slots
-                src_v = vec_mergeh(src_v, (vector signed short)vzero);
-
-                filter_v = vec_ld(i << 3, filter);
-                // The 3 above is 2 (filterSize == 4) + 1 (sizeof(short) == 2).
-
-                // The neat trick: We only care for half the elements,
-                // high or low depending on (i<<3)%16 (it's 0 or 8 here),
-                // and we're going to use vec_mule, so we choose
-                // carefully how to "unpack" the elements into the even slots.
-                if ((i << 3) % 16)
-                    filter_v = vec_mergel(filter_v, (vector signed short)vzero);
-                else
-                    filter_v = vec_mergeh(filter_v, (vector signed short)vzero);
-
-                val_vEven = vec_mule(src_v, filter_v);
-                val_s     = vec_sums(val_vEven, vzero);
-                vec_st(val_s, 0, tempo);
-                dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
-            }
-        break;
-
-        case 8:
-            for (i = 0; i < dstW; i++) {
-                register int srcPos = filterPos[i];
-
-                vector unsigned char src_v0 = vec_ld(srcPos, src);
-                vector unsigned char src_v1, src_vF;
-                vector signed short src_v, filter_v;
-                vector signed int val_v, val_s;
-                if ((((uintptr_t)src + srcPos) % 16) > 8) {
-                    src_v1 = vec_ld(srcPos + 16, src);
-                }
-                src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src));
-
-                src_v = // vec_unpackh sign-extends...
-                        (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
-                filter_v = vec_ld(i << 4, filter);
-                // the 4 above is 3 (filterSize == 8) + 1 (sizeof(short) == 2)
-
-                val_v = vec_msums(src_v, filter_v, (vector signed int)vzero);
-                val_s = vec_sums(val_v, vzero);
-                vec_st(val_s, 0, tempo);
-                dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
-            }
-        break;
-
-        case 16:
-            for (i = 0; i < dstW; i++) {
-                register int srcPos = filterPos[i];
-
-                vector unsigned char src_v0 = vec_ld(srcPos, src);
-                vector unsigned char src_v1 = vec_ld(srcPos + 16, src);
-                vector unsigned char src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src));
-
-                vector signed short src_vA = // vec_unpackh sign-extends...
-                                             (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
-                vector signed short src_vB = // vec_unpackh sign-extends...
-                                             (vector signed short)(vec_mergel((vector unsigned char)vzero, src_vF));
-
-                vector signed short filter_v0 = vec_ld(i << 5, filter);
-                vector signed short filter_v1 = vec_ld((i << 5) + 16, filter);
-                // the 5 above are 4 (filterSize == 16) + 1 (sizeof(short) == 2)
-
-                vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero);
-                vector signed int val_v   = vec_msums(src_vB, filter_v1, val_acc);
-
-                vector signed int val_s = vec_sums(val_v, vzero);
-
-                vec_st(val_s, 0, tempo);
-                dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
-            }
-        break;
-
-        default:
-            for (i = 0; i < dstW; i++) {
-                register int j;
-                register int srcPos = filterPos[i];
-
-                vector signed int val_s, val_v = (vector signed int)vzero;
-                vector signed short filter_v0R = vec_ld(i * 2 * filterSize, filter);
-                vector unsigned char permF     = vec_lvsl((i * 2 * filterSize), filter);
-
-                vector unsigned char src_v0 = vec_ld(srcPos, src);
-                vector unsigned char permS  = vec_lvsl(srcPos, src);
-
-                for (j = 0; j < filterSize - 15; j += 16) {
-                    vector unsigned char src_v1 = vec_ld(srcPos + j + 16, src);
-                    vector unsigned char src_vF = vec_perm(src_v0, src_v1, permS);
-
-                    vector signed short src_vA = // vec_unpackh sign-extends...
-                                                 (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
-                    vector signed short src_vB = // vec_unpackh sign-extends...
-                                                 (vector signed short)(vec_mergel((vector unsigned char)vzero, src_vF));
-
-                    vector signed short filter_v1R = vec_ld((i * 2 * filterSize) + (j * 2) + 16, filter);
-                    vector signed short filter_v2R = vec_ld((i * 2 * filterSize) + (j * 2) + 32, filter);
-                    vector signed short filter_v0  = vec_perm(filter_v0R, filter_v1R, permF);
-                    vector signed short filter_v1  = vec_perm(filter_v1R, filter_v2R, permF);
-
-                    vector signed int val_acc = vec_msums(src_vA, filter_v0, val_v);
-                    val_v = vec_msums(src_vB, filter_v1, val_acc);
-
-                    filter_v0R = filter_v2R;
-                    src_v0     = src_v1;
-                }
-
-                if (j < filterSize - 7) {
-                    // loading src_v0 is useless, it's already done above
-                    // vector unsigned char src_v0 = vec_ld(srcPos + j, src);
-                    vector unsigned char src_v1, src_vF;
-                    vector signed short src_v, filter_v1R, filter_v;
-                    if ((((uintptr_t)src + srcPos) % 16) > 8) {
-                        src_v1 = vec_ld(srcPos + j + 16, src);
-                    }
-                    src_vF = vec_perm(src_v0, src_v1, permS);
-
-                    src_v = // vec_unpackh sign-extends...
-                            (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
-                    // loading filter_v0R is useless, it's already done above
-                    // vector signed short filter_v0R = vec_ld((i * 2 * filterSize) + j, filter);
-                    filter_v1R = vec_ld((i * 2 * filterSize) + (j * 2) + 16, filter);
-                    filter_v   = vec_perm(filter_v0R, filter_v1R, permF);
-
-                    val_v = vec_msums(src_v, filter_v, val_v);
-                }
-
-                val_s = vec_sums(val_v, vzero);
-
-                vec_st(val_s, 0, tempo);
-                dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
-            }
-        }
-}
-#endif /* HAVE_ALTIVEC */
-
-av_cold void ff_sws_init_swscale_ppc(SwsContext *c)
-{
-#if HAVE_ALTIVEC
-    enum AVPixelFormat dstFormat = c->dstFormat;
-
-    if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
-        return;
-
-    if (c->srcBpc == 8 && c->dstBpc <= 14) {
-        c->hyScale = c->hcScale = hScale_altivec_real;
-    }
-    if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) &&
-        dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21 &&
-        !c->alpPixBuf) {
-        c->yuv2planeX = yuv2planeX_altivec;
-    }
-
-    /* The following list of supported dstFormat values should
-     * match what's found in the body of ff_yuv2packedX_altivec() */
-    if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->alpPixBuf) {
-        switch (c->dstFormat) {
-        case AV_PIX_FMT_ABGR:
-            c->yuv2packedX = ff_yuv2abgr_X_altivec;
-            break;
-        case AV_PIX_FMT_BGRA:
-            c->yuv2packedX = ff_yuv2bgra_X_altivec;
-            break;
-        case AV_PIX_FMT_ARGB:
-            c->yuv2packedX = ff_yuv2argb_X_altivec;
-            break;
-        case AV_PIX_FMT_RGBA:
-            c->yuv2packedX = ff_yuv2rgba_X_altivec;
-            break;
-        case AV_PIX_FMT_BGR24:
-            c->yuv2packedX = ff_yuv2bgr24_X_altivec;
-            break;
-        case AV_PIX_FMT_RGB24:
-            c->yuv2packedX = ff_yuv2rgb24_X_altivec;
-            break;
-        }
-    }
-#endif /* HAVE_ALTIVEC */
-}
diff --git a/ffmpeg/libswscale/ppc/yuv2rgb_altivec.c b/ffmpeg/libswscale/ppc/yuv2rgb_altivec.c
deleted file mode 100644
index 25282bf..0000000
--- a/ffmpeg/libswscale/ppc/yuv2rgb_altivec.c
+++ /dev/null
@@ -1,868 +0,0 @@
-/*
- * AltiVec acceleration for colorspace conversion
- *
- * copyright (C) 2004 Marc Hoffman <marc.hoffman@analog.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/*
- * Convert I420 YV12 to RGB in various formats,
- * it rejects images that are not in 420 formats,
- * it rejects images that don't have widths of multiples of 16,
- * it rejects images that don't have heights of multiples of 2.
- * Reject defers to C simulation code.
- *
- * Lots of optimizations to be done here.
- *
- * 1. Need to fix saturation code. I just couldn't get it to fly with packs
- * and adds, so we currently use max/min to clip.
- *
- * 2. The inefficient use of chroma loading needs a bit of brushing up.
- *
- * 3. Analysis of pipeline stalls needs to be done. Use shark to identify
- * pipeline stalls.
- *
- *
- * MODIFIED to calculate coeffs from currently selected color space.
- * MODIFIED core to be a macro where you specify the output format.
- * ADDED UYVY conversion which is never called due to some thing in swscale.
- * CORRECTED algorithim selection to be strict on input formats.
- * ADDED runtime detection of AltiVec.
- *
- * ADDED altivec_yuv2packedX vertical scl + RGB converter
- *
- * March 27,2004
- * PERFORMANCE ANALYSIS
- *
- * The C version uses 25% of the processor or ~250Mips for D1 video rawvideo
- * used as test.
- * The AltiVec version uses 10% of the processor or ~100Mips for D1 video
- * same sequence.
- *
- * 720 * 480 * 30  ~10MPS
- *
- * so we have roughly 10 clocks per pixel. This is too high, something has
- * to be wrong.
- *
- * OPTIMIZED clip codes to utilize vec_max and vec_packs removing the
- * need for vec_min.
- *
- * OPTIMIZED DST OUTPUT cache/DMA controls. We are pretty much guaranteed to
- * have the input video frame, it was just decompressed so it probably resides
- * in L1 caches. However, we are creating the output video stream. This needs
- * to use the DSTST instruction to optimize for the cache. We couple this with
- * the fact that we are not going to be visiting the input buffer again so we
- * mark it Least Recently Used. This shaves 25% of the processor cycles off.
- *
- * Now memcpy is the largest mips consumer in the system, probably due
- * to the inefficient X11 stuff.
- *
- * GL libraries seem to be very slow on this machine 1.33Ghz PB running
- * Jaguar, this is not the case for my 1Ghz PB.  I thought it might be
- * a versioning issue, however I have libGL.1.2.dylib for both
- * machines. (We need to figure this out now.)
- *
- * GL2 libraries work now with patch for RGB32.
- *
- * NOTE: quartz vo driver ARGB32_to_RGB24 consumes 30% of the processor.
- *
- * Integrated luma prescaling adjustment for saturation/contrast/brightness
- * adjustment.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <inttypes.h>
-#include <assert.h>
-
-#include "config.h"
-#include "libswscale/rgb2rgb.h"
-#include "libswscale/swscale.h"
-#include "libswscale/swscale_internal.h"
-#include "libavutil/attributes.h"
-#include "libavutil/cpu.h"
-#include "libavutil/pixdesc.h"
-#include "yuv2rgb_altivec.h"
-
-#if HAVE_ALTIVEC
-
-#undef PROFILE_THE_BEAST
-#undef INC_SCALING
-
-typedef unsigned char ubyte;
-typedef signed char   sbyte;
-
-/* RGB interleaver, 16 planar pels 8-bit samples per channel in
- * homogeneous vector registers x0,x1,x2 are interleaved with the
- * following technique:
- *
- *    o0 = vec_mergeh(x0, x1);
- *    o1 = vec_perm(o0, x2, perm_rgb_0);
- *    o2 = vec_perm(o0, x2, perm_rgb_1);
- *    o3 = vec_mergel(x0, x1);
- *    o4 = vec_perm(o3, o2, perm_rgb_2);
- *    o5 = vec_perm(o3, o2, perm_rgb_3);
- *
- * perm_rgb_0:   o0(RG).h v1(B) --> o1*
- *            0   1  2   3   4
- *           rgbr|gbrg|brgb|rgbr
- *           0010 0100 1001 0010
- *           0102 3145 2673 894A
- *
- * perm_rgb_1:   o0(RG).h v1(B) --> o2
- *            0   1  2   3   4
- *           gbrg|brgb|bbbb|bbbb
- *           0100 1001 1111 1111
- *           B5CD 6EF7 89AB CDEF
- *
- * perm_rgb_2:   o3(RG).l o2(rgbB.l) --> o4*
- *            0   1  2   3   4
- *           gbrg|brgb|rgbr|gbrg
- *           1111 1111 0010 0100
- *           89AB CDEF 0182 3945
- *
- * perm_rgb_2:   o3(RG).l o2(rgbB.l) ---> o5*
- *            0   1  2   3   4
- *           brgb|rgbr|gbrg|brgb
- *           1001 0010 0100 1001
- *           a67b 89cA BdCD eEFf
- *
- */
-static const vector unsigned char
-    perm_rgb_0 = { 0x00, 0x01, 0x10, 0x02, 0x03, 0x11, 0x04, 0x05,
-                   0x12, 0x06, 0x07, 0x13, 0x08, 0x09, 0x14, 0x0a },
-    perm_rgb_1 = { 0x0b, 0x15, 0x0c, 0x0d, 0x16, 0x0e, 0x0f, 0x17,
-                   0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f },
-    perm_rgb_2 = { 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
-                   0x00, 0x01, 0x18, 0x02, 0x03, 0x19, 0x04, 0x05 },
-    perm_rgb_3 = { 0x1a, 0x06, 0x07, 0x1b, 0x08, 0x09, 0x1c, 0x0a,
-                   0x0b, 0x1d, 0x0c, 0x0d, 0x1e, 0x0e, 0x0f, 0x1f };
-
-#define vec_merge3(x2, x1, x0, y0, y1, y2)     \
-    do {                                       \
-        __typeof__(x0) o0, o2, o3;             \
-        o0 = vec_mergeh(x0, x1);               \
-        y0 = vec_perm(o0, x2, perm_rgb_0);     \
-        o2 = vec_perm(o0, x2, perm_rgb_1);     \
-        o3 = vec_mergel(x0, x1);               \
-        y1 = vec_perm(o3, o2, perm_rgb_2);     \
-        y2 = vec_perm(o3, o2, perm_rgb_3);     \
-    } while (0)
-
-#define vec_mstbgr24(x0, x1, x2, ptr)          \
-    do {                                       \
-        __typeof__(x0) _0, _1, _2;             \
-        vec_merge3(x0, x1, x2, _0, _1, _2);    \
-        vec_st(_0, 0, ptr++);                  \
-        vec_st(_1, 0, ptr++);                  \
-        vec_st(_2, 0, ptr++);                  \
-    } while (0)
-
-#define vec_mstrgb24(x0, x1, x2, ptr)          \
-    do {                                       \
-        __typeof__(x0) _0, _1, _2;             \
-        vec_merge3(x2, x1, x0, _0, _1, _2);    \
-        vec_st(_0, 0, ptr++);                  \
-        vec_st(_1, 0, ptr++);                  \
-        vec_st(_2, 0, ptr++);                  \
-    } while (0)
-
-/* pack the pixels in rgb0 format
- * msb R
- * lsb 0
- */
-#define vec_mstrgb32(T, x0, x1, x2, x3, ptr)                            \
-    do {                                                                \
-        T _0, _1, _2, _3;                                               \
-        _0 = vec_mergeh(x0, x1);                                        \
-        _1 = vec_mergeh(x2, x3);                                        \
-        _2 = (T) vec_mergeh((vector unsigned short) _0,                 \
-                            (vector unsigned short) _1);                \
-        _3 = (T) vec_mergel((vector unsigned short) _0,                 \
-                            (vector unsigned short) _1);                \
-        vec_st(_2, 0 * 16, (T *) ptr);                                  \
-        vec_st(_3, 1 * 16, (T *) ptr);                                  \
-        _0 = vec_mergel(x0, x1);                                        \
-        _1 = vec_mergel(x2, x3);                                        \
-        _2 = (T) vec_mergeh((vector unsigned short) _0,                 \
-                            (vector unsigned short) _1);                \
-        _3 = (T) vec_mergel((vector unsigned short) _0,                 \
-                            (vector unsigned short) _1);                \
-        vec_st(_2, 2 * 16, (T *) ptr);                                  \
-        vec_st(_3, 3 * 16, (T *) ptr);                                  \
-        ptr += 4;                                                       \
-    } while (0)
-
-/*
- * 1     0       1.4021   | | Y |
- * 1    -0.3441 -0.7142   |x| Cb|
- * 1     1.7718  0        | | Cr|
- *
- *
- * Y:      [-128 127]
- * Cb/Cr : [-128 127]
- *
- * typical YUV conversion works on Y: 0-255 this version has been
- * optimized for JPEG decoding.
- */
-
-#define vec_unh(x)                                                      \
-    (vector signed short)                                               \
-        vec_perm(x, (__typeof__(x)) { 0 },                              \
-                 ((vector unsigned char) {                              \
-                     0x10, 0x00, 0x10, 0x01, 0x10, 0x02, 0x10, 0x03,    \
-                     0x10, 0x04, 0x10, 0x05, 0x10, 0x06, 0x10, 0x07 }))
-
-#define vec_unl(x)                                                      \
-    (vector signed short)                                               \
-        vec_perm(x, (__typeof__(x)) { 0 },                              \
-                 ((vector unsigned char) {                              \
-                     0x10, 0x08, 0x10, 0x09, 0x10, 0x0A, 0x10, 0x0B,    \
-                     0x10, 0x0C, 0x10, 0x0D, 0x10, 0x0E, 0x10, 0x0F }))
-
-#define vec_clip_s16(x)                                                 \
-    vec_max(vec_min(x, ((vector signed short) {                         \
-                    235, 235, 235, 235, 235, 235, 235, 235 })),         \
-            ((vector signed short) { 16, 16, 16, 16, 16, 16, 16, 16 }))
-
-#define vec_packclp(x, y)                                               \
-    (vector unsigned char)                                              \
-        vec_packs((vector unsigned short)                               \
-                      vec_max(x, ((vector signed short) { 0 })),        \
-                  (vector unsigned short)                               \
-                      vec_max(y, ((vector signed short) { 0 })))
-
-static inline void cvtyuvtoRGB(SwsContext *c, vector signed short Y,
-                               vector signed short U, vector signed short V,
-                               vector signed short *R, vector signed short *G,
-                               vector signed short *B)
-{
-    vector signed short vx, ux, uvx;
-
-    Y = vec_mradds(Y, c->CY, c->OY);
-    U = vec_sub(U, (vector signed short)
-                       vec_splat((vector signed short) { 128 }, 0));
-    V = vec_sub(V, (vector signed short)
-                       vec_splat((vector signed short) { 128 }, 0));
-
-    // ux  = (CBU * (u << c->CSHIFT) + 0x4000) >> 15;
-    ux = vec_sl(U, c->CSHIFT);
-    *B = vec_mradds(ux, c->CBU, Y);
-
-    // vx  = (CRV * (v << c->CSHIFT) + 0x4000) >> 15;
-    vx = vec_sl(V, c->CSHIFT);
-    *R = vec_mradds(vx, c->CRV, Y);
-
-    // uvx = ((CGU * u) + (CGV * v)) >> 15;
-    uvx = vec_mradds(U, c->CGU, Y);
-    *G  = vec_mradds(V, c->CGV, uvx);
-}
-
-/*
- * ------------------------------------------------------------------------------
- * CS converters
- * ------------------------------------------------------------------------------
- */
-
-#define DEFCSP420_CVT(name, out_pixels)                                       \
-static int altivec_ ## name(SwsContext *c, const unsigned char **in,          \
-                            int *instrides, int srcSliceY, int srcSliceH,     \
-                            unsigned char **oplanes, int *outstrides)         \
-{                                                                             \
-    int w = c->srcW;                                                          \
-    int h = srcSliceH;                                                        \
-    int i, j;                                                                 \
-    int instrides_scl[3];                                                     \
-    vector unsigned char y0, y1;                                              \
-                                                                              \
-    vector signed char u, v;                                                  \
-                                                                              \
-    vector signed short Y0, Y1, Y2, Y3;                                       \
-    vector signed short U, V;                                                 \
-    vector signed short vx, ux, uvx;                                          \
-    vector signed short vx0, ux0, uvx0;                                       \
-    vector signed short vx1, ux1, uvx1;                                       \
-    vector signed short R0, G0, B0;                                           \
-    vector signed short R1, G1, B1;                                           \
-    vector unsigned char R, G, B;                                             \
-                                                                              \
-    const vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP;                  \
-    vector unsigned char align_perm;                                          \
-                                                                              \
-    vector signed short lCY       = c->CY;                                    \
-    vector signed short lOY       = c->OY;                                    \
-    vector signed short lCRV      = c->CRV;                                   \
-    vector signed short lCBU      = c->CBU;                                   \
-    vector signed short lCGU      = c->CGU;                                   \
-    vector signed short lCGV      = c->CGV;                                   \
-    vector unsigned short lCSHIFT = c->CSHIFT;                                \
-                                                                              \
-    const ubyte *y1i = in[0];                                                 \
-    const ubyte *y2i = in[0] + instrides[0];                                  \
-    const ubyte *ui  = in[1];                                                 \
-    const ubyte *vi  = in[2];                                                 \
-                                                                              \
-    vector unsigned char *oute, *outo;                                        \
-                                                                              \
-    /* loop moves y{1, 2}i by w */                                            \
-    instrides_scl[0] = instrides[0] * 2 - w;                                  \
-    /* loop moves ui by w / 2 */                                              \
-    instrides_scl[1] = instrides[1] - w / 2;                                  \
-    /* loop moves vi by w / 2 */                                              \
-    instrides_scl[2] = instrides[2] - w / 2;                                  \
-                                                                              \
-    for (i = 0; i < h / 2; i++) {                                             \
-        oute = (vector unsigned char *)(oplanes[0] + outstrides[0] *          \
-                                        (srcSliceY + i * 2));                 \
-        outo = oute + (outstrides[0] >> 4);                                   \
-        vec_dstst(outo, (0x02000002 | (((w * 3 + 32) / 32) << 16)), 0);       \
-        vec_dstst(oute, (0x02000002 | (((w * 3 + 32) / 32) << 16)), 1);       \
-                                                                              \
-        for (j = 0; j < w / 16; j++) {                                        \
-            y1ivP = (const vector unsigned char *) y1i;                       \
-            y2ivP = (const vector unsigned char *) y2i;                       \
-            uivP  = (const vector unsigned char *) ui;                        \
-            vivP  = (const vector unsigned char *) vi;                        \
-                                                                              \
-            align_perm = vec_lvsl(0, y1i);                                    \
-            y0 = (vector unsigned char)                                       \
-                     vec_perm(y1ivP[0], y1ivP[1], align_perm);                \
-                                                                              \
-            align_perm = vec_lvsl(0, y2i);                                    \
-            y1 = (vector unsigned char)                                       \
-                     vec_perm(y2ivP[0], y2ivP[1], align_perm);                \
-                                                                              \
-            align_perm = vec_lvsl(0, ui);                                     \
-            u = (vector signed char)                                          \
-                    vec_perm(uivP[0], uivP[1], align_perm);                   \
-                                                                              \
-            align_perm = vec_lvsl(0, vi);                                     \
-            v = (vector signed char)                                          \
-                    vec_perm(vivP[0], vivP[1], align_perm);                   \
-                                                                              \
-            u = (vector signed char)                                          \
-                    vec_sub(u,                                                \
-                            (vector signed char)                              \
-                                vec_splat((vector signed char) { 128 }, 0));  \
-            v = (vector signed char)                                          \
-                    vec_sub(v,                                                \
-                            (vector signed char)                              \
-                                vec_splat((vector signed char) { 128 }, 0));  \
-                                                                              \
-            U = vec_unpackh(u);                                               \
-            V = vec_unpackh(v);                                               \
-                                                                              \
-            Y0 = vec_unh(y0);                                                 \
-            Y1 = vec_unl(y0);                                                 \
-            Y2 = vec_unh(y1);                                                 \
-            Y3 = vec_unl(y1);                                                 \
-                                                                              \
-            Y0 = vec_mradds(Y0, lCY, lOY);                                    \
-            Y1 = vec_mradds(Y1, lCY, lOY);                                    \
-            Y2 = vec_mradds(Y2, lCY, lOY);                                    \
-            Y3 = vec_mradds(Y3, lCY, lOY);                                    \
-                                                                              \
-            /* ux  = (CBU * (u << CSHIFT) + 0x4000) >> 15 */                  \
-            ux  = vec_sl(U, lCSHIFT);                                         \
-            ux  = vec_mradds(ux, lCBU, (vector signed short) { 0 });          \
-            ux0 = vec_mergeh(ux, ux);                                         \
-            ux1 = vec_mergel(ux, ux);                                         \
-                                                                              \
-            /* vx  = (CRV * (v << CSHIFT) + 0x4000) >> 15; */                 \
-            vx  = vec_sl(V, lCSHIFT);                                         \
-            vx  = vec_mradds(vx, lCRV, (vector signed short) { 0 });          \
-            vx0 = vec_mergeh(vx, vx);                                         \
-            vx1 = vec_mergel(vx, vx);                                         \
-                                                                              \
-            /* uvx = ((CGU * u) + (CGV * v)) >> 15 */                         \
-            uvx  = vec_mradds(U, lCGU, (vector signed short) { 0 });          \
-            uvx  = vec_mradds(V, lCGV, uvx);                                  \
-            uvx0 = vec_mergeh(uvx, uvx);                                      \
-            uvx1 = vec_mergel(uvx, uvx);                                      \
-                                                                              \
-            R0 = vec_add(Y0, vx0);                                            \
-            G0 = vec_add(Y0, uvx0);                                           \
-            B0 = vec_add(Y0, ux0);                                            \
-            R1 = vec_add(Y1, vx1);                                            \
-            G1 = vec_add(Y1, uvx1);                                           \
-            B1 = vec_add(Y1, ux1);                                            \
-                                                                              \
-            R = vec_packclp(R0, R1);                                          \
-            G = vec_packclp(G0, G1);                                          \
-            B = vec_packclp(B0, B1);                                          \
-                                                                              \
-            out_pixels(R, G, B, oute);                                        \
-                                                                              \
-            R0 = vec_add(Y2, vx0);                                            \
-            G0 = vec_add(Y2, uvx0);                                           \
-            B0 = vec_add(Y2, ux0);                                            \
-            R1 = vec_add(Y3, vx1);                                            \
-            G1 = vec_add(Y3, uvx1);                                           \
-            B1 = vec_add(Y3, ux1);                                            \
-            R  = vec_packclp(R0, R1);                                         \
-            G  = vec_packclp(G0, G1);                                         \
-            B  = vec_packclp(B0, B1);                                         \
-                                                                              \
-                                                                              \
-            out_pixels(R, G, B, outo);                                        \
-                                                                              \
-            y1i += 16;                                                        \
-            y2i += 16;                                                        \
-            ui  += 8;                                                         \
-            vi  += 8;                                                         \
-        }                                                                     \
-                                                                              \
-        ui  += instrides_scl[1];                                              \
-        vi  += instrides_scl[2];                                              \
-        y1i += instrides_scl[0];                                              \
-        y2i += instrides_scl[0];                                              \
-    }                                                                         \
-    return srcSliceH;                                                         \
-}
-
-#define out_abgr(a, b, c, ptr)                                          \
-    vec_mstrgb32(__typeof__(a), ((__typeof__(a)) { 255 }), c, b, a, ptr)
-#define out_bgra(a, b, c, ptr)                                          \
-    vec_mstrgb32(__typeof__(a), c, b, a, ((__typeof__(a)) { 255 }), ptr)
-#define out_rgba(a, b, c, ptr)                                          \
-    vec_mstrgb32(__typeof__(a), a, b, c, ((__typeof__(a)) { 255 }), ptr)
-#define out_argb(a, b, c, ptr)                                          \
-    vec_mstrgb32(__typeof__(a), ((__typeof__(a)) { 255 }), a, b, c, ptr)
-#define out_rgb24(a, b, c, ptr) vec_mstrgb24(a, b, c, ptr)
-#define out_bgr24(a, b, c, ptr) vec_mstbgr24(a, b, c, ptr)
-
-DEFCSP420_CVT(yuv2_abgr,  out_abgr)
-DEFCSP420_CVT(yuv2_bgra,  out_bgra)
-DEFCSP420_CVT(yuv2_rgba,  out_rgba)
-DEFCSP420_CVT(yuv2_argb,  out_argb)
-DEFCSP420_CVT(yuv2_rgb24, out_rgb24)
-DEFCSP420_CVT(yuv2_bgr24, out_bgr24)
-
-// uyvy|uyvy|uyvy|uyvy
-// 0123 4567 89ab cdef
-static const vector unsigned char
-    demux_u = { 0x10, 0x00, 0x10, 0x00,
-                0x10, 0x04, 0x10, 0x04,
-                0x10, 0x08, 0x10, 0x08,
-                0x10, 0x0c, 0x10, 0x0c },
-    demux_v = { 0x10, 0x02, 0x10, 0x02,
-                0x10, 0x06, 0x10, 0x06,
-                0x10, 0x0A, 0x10, 0x0A,
-                0x10, 0x0E, 0x10, 0x0E },
-    demux_y = { 0x10, 0x01, 0x10, 0x03,
-                0x10, 0x05, 0x10, 0x07,
-                0x10, 0x09, 0x10, 0x0B,
-                0x10, 0x0D, 0x10, 0x0F };
-
-/*
- * this is so I can play live CCIR raw video
- */
-static int altivec_uyvy_rgb32(SwsContext *c, const unsigned char **in,
-                              int *instrides, int srcSliceY, int srcSliceH,
-                              unsigned char **oplanes, int *outstrides)
-{
-    int w = c->srcW;
-    int h = srcSliceH;
-    int i, j;
-    vector unsigned char uyvy;
-    vector signed short Y, U, V;
-    vector signed short R0, G0, B0, R1, G1, B1;
-    vector unsigned char R, G, B;
-    vector unsigned char *out;
-    const ubyte *img;
-
-    img = in[0];
-    out = (vector unsigned char *) (oplanes[0] + srcSliceY * outstrides[0]);
-
-    for (i = 0; i < h; i++)
-        for (j = 0; j < w / 16; j++) {
-            uyvy = vec_ld(0, img);
-
-            U = (vector signed short)
-                    vec_perm(uyvy, (vector unsigned char) { 0 }, demux_u);
-            V = (vector signed short)
-                    vec_perm(uyvy, (vector unsigned char) { 0 }, demux_v);
-            Y = (vector signed short)
-                    vec_perm(uyvy, (vector unsigned char) { 0 }, demux_y);
-
-            cvtyuvtoRGB(c, Y, U, V, &R0, &G0, &B0);
-
-            uyvy = vec_ld(16, img);
-
-            U = (vector signed short)
-                    vec_perm(uyvy, (vector unsigned char) { 0 }, demux_u);
-            V = (vector signed short)
-                    vec_perm(uyvy, (vector unsigned char) { 0 }, demux_v);
-            Y = (vector signed short)
-                    vec_perm(uyvy, (vector unsigned char) { 0 }, demux_y);
-
-            cvtyuvtoRGB(c, Y, U, V, &R1, &G1, &B1);
-
-            R = vec_packclp(R0, R1);
-            G = vec_packclp(G0, G1);
-            B = vec_packclp(B0, B1);
-
-            // vec_mstbgr24 (R,G,B, out);
-            out_rgba(R, G, B, out);
-
-            img += 32;
-        }
-    return srcSliceH;
-}
-
-#endif /* HAVE_ALTIVEC */
-
-/* Ok currently the acceleration routine only supports
- * inputs of widths a multiple of 16
- * and heights a multiple 2
- *
- * So we just fall back to the C codes for this.
- */
-av_cold SwsFunc ff_yuv2rgb_init_ppc(SwsContext *c)
-{
-#if HAVE_ALTIVEC
-    if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
-        return NULL;
-
-    /*
-     * and this seems not to matter too much I tried a bunch of
-     * videos with abnormal widths and MPlayer crashes elsewhere.
-     * mplayer -vo x11 -rawvideo on:w=350:h=240 raw-350x240.eyuv
-     * boom with X11 bad match.
-     *
-     */
-    if ((c->srcW & 0xf) != 0)
-        return NULL;
-
-    switch (c->srcFormat) {
-    case AV_PIX_FMT_YUV410P:
-    case AV_PIX_FMT_YUV420P:
-    /*case IMGFMT_CLPL:        ??? */
-    case AV_PIX_FMT_GRAY8:
-    case AV_PIX_FMT_NV12:
-    case AV_PIX_FMT_NV21:
-        if ((c->srcH & 0x1) != 0)
-            return NULL;
-
-        switch (c->dstFormat) {
-        case AV_PIX_FMT_RGB24:
-            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGB24\n");
-            return altivec_yuv2_rgb24;
-        case AV_PIX_FMT_BGR24:
-            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGR24\n");
-            return altivec_yuv2_bgr24;
-        case AV_PIX_FMT_ARGB:
-            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ARGB\n");
-            return altivec_yuv2_argb;
-        case AV_PIX_FMT_ABGR:
-            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ABGR\n");
-            return altivec_yuv2_abgr;
-        case AV_PIX_FMT_RGBA:
-            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGBA\n");
-            return altivec_yuv2_rgba;
-        case AV_PIX_FMT_BGRA:
-            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGRA\n");
-            return altivec_yuv2_bgra;
-        default: return NULL;
-        }
-        break;
-
-    case AV_PIX_FMT_UYVY422:
-        switch (c->dstFormat) {
-        case AV_PIX_FMT_BGR32:
-            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space UYVY -> RGB32\n");
-            return altivec_uyvy_rgb32;
-        default: return NULL;
-        }
-        break;
-    }
-#endif /* HAVE_ALTIVEC */
-
-    return NULL;
-}
-
-av_cold void ff_yuv2rgb_init_tables_ppc(SwsContext *c,
-                                        const int inv_table[4],
-                                        int brightness,
-                                        int contrast,
-                                        int saturation)
-{
-#if HAVE_ALTIVEC
-    union {
-        DECLARE_ALIGNED(16, signed short, tmp)[8];
-        vector signed short vec;
-    } buf;
-
-    if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
-        return;
-
-    buf.tmp[0] = ((0xffffLL) * contrast >> 8) >> 9;                               // cy
-    buf.tmp[1] = -256 * brightness;                                               // oy
-    buf.tmp[2] =   (inv_table[0] >> 3) * (contrast >> 16) * (saturation >> 16);   // crv
-    buf.tmp[3] =   (inv_table[1] >> 3) * (contrast >> 16) * (saturation >> 16);   // cbu
-    buf.tmp[4] = -((inv_table[2] >> 1) * (contrast >> 16) * (saturation >> 16));  // cgu
-    buf.tmp[5] = -((inv_table[3] >> 1) * (contrast >> 16) * (saturation >> 16));  // cgv
-
-    c->CSHIFT = (vector unsigned short) vec_splat_u16(2);
-    c->CY     = vec_splat((vector signed short) buf.vec, 0);
-    c->OY     = vec_splat((vector signed short) buf.vec, 1);
-    c->CRV    = vec_splat((vector signed short) buf.vec, 2);
-    c->CBU    = vec_splat((vector signed short) buf.vec, 3);
-    c->CGU    = vec_splat((vector signed short) buf.vec, 4);
-    c->CGV    = vec_splat((vector signed short) buf.vec, 5);
-    return;
-#endif /* HAVE_ALTIVEC */
-}
-
-#if HAVE_ALTIVEC
-
-static av_always_inline void yuv2packedX_altivec(SwsContext *c,
-                                                 const int16_t *lumFilter,
-                                                 const int16_t **lumSrc,
-                                                 int lumFilterSize,
-                                                 const int16_t *chrFilter,
-                                                 const int16_t **chrUSrc,
-                                                 const int16_t **chrVSrc,
-                                                 int chrFilterSize,
-                                                 const int16_t **alpSrc,
-                                                 uint8_t *dest,
-                                                 int dstW, int dstY,
-                                                 enum AVPixelFormat target)
-{
-    int i, j;
-    vector signed short X, X0, X1, Y0, U0, V0, Y1, U1, V1, U, V;
-    vector signed short R0, G0, B0, R1, G1, B1;
-
-    vector unsigned char R, G, B;
-    vector unsigned char *out, *nout;
-
-    vector signed short RND   = vec_splat_s16(1 << 3);
-    vector unsigned short SCL = vec_splat_u16(4);
-    DECLARE_ALIGNED(16, unsigned int, scratch)[16];
-
-    vector signed short *YCoeffs, *CCoeffs;
-
-    YCoeffs = c->vYCoeffsBank + dstY * lumFilterSize;
-    CCoeffs = c->vCCoeffsBank + dstY * chrFilterSize;
-
-    out = (vector unsigned char *) dest;
-
-    for (i = 0; i < dstW; i += 16) {
-        Y0 = RND;
-        Y1 = RND;
-        /* extract 16 coeffs from lumSrc */
-        for (j = 0; j < lumFilterSize; j++) {
-            X0 = vec_ld(0, &lumSrc[j][i]);
-            X1 = vec_ld(16, &lumSrc[j][i]);
-            Y0 = vec_mradds(X0, YCoeffs[j], Y0);
-            Y1 = vec_mradds(X1, YCoeffs[j], Y1);
-        }
-
-        U = RND;
-        V = RND;
-        /* extract 8 coeffs from U,V */
-        for (j = 0; j < chrFilterSize; j++) {
-            X = vec_ld(0, &chrUSrc[j][i / 2]);
-            U = vec_mradds(X, CCoeffs[j], U);
-            X = vec_ld(0, &chrVSrc[j][i / 2]);
-            V = vec_mradds(X, CCoeffs[j], V);
-        }
-
-        /* scale and clip signals */
-        Y0 = vec_sra(Y0, SCL);
-        Y1 = vec_sra(Y1, SCL);
-        U  = vec_sra(U, SCL);
-        V  = vec_sra(V, SCL);
-
-        Y0 = vec_clip_s16(Y0);
-        Y1 = vec_clip_s16(Y1);
-        U  = vec_clip_s16(U);
-        V  = vec_clip_s16(V);
-
-        /* now we have
-         * Y0 = y0 y1 y2 y3 y4 y5 y6 y7    Y1 = y8 y9 y10 y11 y12 y13 y14 y15
-         * U  = u0 u1 u2 u3 u4 u5 u6 u7    V  = v0 v1 v2 v3 v4 v5 v6 v7
-         *
-         * Y0 = y0 y1 y2 y3 y4 y5 y6 y7    Y1 = y8 y9 y10 y11 y12 y13 y14 y15
-         * U0 = u0 u0 u1 u1 u2 u2 u3 u3    U1 = u4 u4 u5 u5 u6 u6 u7 u7
-         * V0 = v0 v0 v1 v1 v2 v2 v3 v3    V1 = v4 v4 v5 v5 v6 v6 v7 v7
-         */
-
-        U0 = vec_mergeh(U, U);
-        V0 = vec_mergeh(V, V);
-
-        U1 = vec_mergel(U, U);
-        V1 = vec_mergel(V, V);
-
-        cvtyuvtoRGB(c, Y0, U0, V0, &R0, &G0, &B0);
-        cvtyuvtoRGB(c, Y1, U1, V1, &R1, &G1, &B1);
-
-        R = vec_packclp(R0, R1);
-        G = vec_packclp(G0, G1);
-        B = vec_packclp(B0, B1);
-
-        switch (target) {
-        case AV_PIX_FMT_ABGR:
-            out_abgr(R, G, B, out);
-            break;
-        case AV_PIX_FMT_BGRA:
-            out_bgra(R, G, B, out);
-            break;
-        case AV_PIX_FMT_RGBA:
-            out_rgba(R, G, B, out);
-            break;
-        case AV_PIX_FMT_ARGB:
-            out_argb(R, G, B, out);
-            break;
-        case AV_PIX_FMT_RGB24:
-            out_rgb24(R, G, B, out);
-            break;
-        case AV_PIX_FMT_BGR24:
-            out_bgr24(R, G, B, out);
-            break;
-        default:
-        {
-            /* If this is reached, the caller should have called yuv2packedXinC
-             * instead. */
-            static int printed_error_message;
-            if (!printed_error_message) {
-                av_log(c, AV_LOG_ERROR,
-                       "altivec_yuv2packedX doesn't support %s output\n",
-                       av_get_pix_fmt_name(c->dstFormat));
-                printed_error_message = 1;
-            }
-            return;
-        }
-        }
-    }
-
-    if (i < dstW) {
-        i -= 16;
-
-        Y0 = RND;
-        Y1 = RND;
-        /* extract 16 coeffs from lumSrc */
-        for (j = 0; j < lumFilterSize; j++) {
-            X0 = vec_ld(0, &lumSrc[j][i]);
-            X1 = vec_ld(16, &lumSrc[j][i]);
-            Y0 = vec_mradds(X0, YCoeffs[j], Y0);
-            Y1 = vec_mradds(X1, YCoeffs[j], Y1);
-        }
-
-        U = RND;
-        V = RND;
-        /* extract 8 coeffs from U,V */
-        for (j = 0; j < chrFilterSize; j++) {
-            X = vec_ld(0, &chrUSrc[j][i / 2]);
-            U = vec_mradds(X, CCoeffs[j], U);
-            X = vec_ld(0, &chrVSrc[j][i / 2]);
-            V = vec_mradds(X, CCoeffs[j], V);
-        }
-
-        /* scale and clip signals */
-        Y0 = vec_sra(Y0, SCL);
-        Y1 = vec_sra(Y1, SCL);
-        U  = vec_sra(U, SCL);
-        V  = vec_sra(V, SCL);
-
-        Y0 = vec_clip_s16(Y0);
-        Y1 = vec_clip_s16(Y1);
-        U  = vec_clip_s16(U);
-        V  = vec_clip_s16(V);
-
-        /* now we have
-         * Y0 = y0 y1 y2 y3 y4 y5 y6 y7    Y1 = y8 y9 y10 y11 y12 y13 y14 y15
-         * U  = u0 u1 u2 u3 u4 u5 u6 u7    V  = v0 v1 v2 v3 v4 v5 v6 v7
-         *
-         * Y0 = y0 y1 y2 y3 y4 y5 y6 y7    Y1 = y8 y9 y10 y11 y12 y13 y14 y15
-         * U0 = u0 u0 u1 u1 u2 u2 u3 u3    U1 = u4 u4 u5 u5 u6 u6 u7 u7
-         * V0 = v0 v0 v1 v1 v2 v2 v3 v3    V1 = v4 v4 v5 v5 v6 v6 v7 v7
-         */
-
-        U0 = vec_mergeh(U, U);
-        V0 = vec_mergeh(V, V);
-
-        U1 = vec_mergel(U, U);
-        V1 = vec_mergel(V, V);
-
-        cvtyuvtoRGB(c, Y0, U0, V0, &R0, &G0, &B0);
-        cvtyuvtoRGB(c, Y1, U1, V1, &R1, &G1, &B1);
-
-        R = vec_packclp(R0, R1);
-        G = vec_packclp(G0, G1);
-        B = vec_packclp(B0, B1);
-
-        nout = (vector unsigned char *) scratch;
-        switch (target) {
-        case AV_PIX_FMT_ABGR:
-            out_abgr(R, G, B, nout);
-            break;
-        case AV_PIX_FMT_BGRA:
-            out_bgra(R, G, B, nout);
-            break;
-        case AV_PIX_FMT_RGBA:
-            out_rgba(R, G, B, nout);
-            break;
-        case AV_PIX_FMT_ARGB:
-            out_argb(R, G, B, nout);
-            break;
-        case AV_PIX_FMT_RGB24:
-            out_rgb24(R, G, B, nout);
-            break;
-        case AV_PIX_FMT_BGR24:
-            out_bgr24(R, G, B, nout);
-            break;
-        default:
-            /* Unreachable, I think. */
-            av_log(c, AV_LOG_ERROR,
-                   "altivec_yuv2packedX doesn't support %s output\n",
-                   av_get_pix_fmt_name(c->dstFormat));
-            return;
-        }
-
-        memcpy(&((uint32_t *) dest)[i], scratch, (dstW - i) / 4);
-    }
-}
-
-#define YUV2PACKEDX_WRAPPER(suffix, pixfmt)                             \
-void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c,                     \
-                                     const int16_t *lumFilter,          \
-                                     const int16_t **lumSrc,            \
-                                     int lumFilterSize,                 \
-                                     const int16_t *chrFilter,          \
-                                     const int16_t **chrUSrc,           \
-                                     const int16_t **chrVSrc,           \
-                                     int chrFilterSize,                 \
-                                     const int16_t **alpSrc,            \
-                                     uint8_t *dest, int dstW, int dstY) \
-{                                                                       \
-    yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize,            \
-                        chrFilter, chrUSrc, chrVSrc,                    \
-                        chrFilterSize, alpSrc,                          \
-                        dest, dstW, dstY, pixfmt);                      \
-}
-
-YUV2PACKEDX_WRAPPER(abgr,  AV_PIX_FMT_ABGR);
-YUV2PACKEDX_WRAPPER(bgra,  AV_PIX_FMT_BGRA);
-YUV2PACKEDX_WRAPPER(argb,  AV_PIX_FMT_ARGB);
-YUV2PACKEDX_WRAPPER(rgba,  AV_PIX_FMT_RGBA);
-YUV2PACKEDX_WRAPPER(rgb24, AV_PIX_FMT_RGB24);
-YUV2PACKEDX_WRAPPER(bgr24, AV_PIX_FMT_BGR24);
-
-#endif /* HAVE_ALTIVEC */
diff --git a/ffmpeg/libswscale/ppc/yuv2rgb_altivec.h b/ffmpeg/libswscale/ppc/yuv2rgb_altivec.h
deleted file mode 100644
index aa52a47..0000000
--- a/ffmpeg/libswscale/ppc/yuv2rgb_altivec.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * AltiVec-enhanced yuv2yuvX
- *
- * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
- * based on the equivalent C code in swscale.c
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef SWSCALE_PPC_YUV2RGB_ALTIVEC_H
-#define SWSCALE_PPC_YUV2RGB_ALTIVEC_H
-
-#include <stdint.h>
-
-#include "libswscale/swscale_internal.h"
-
-#define YUV2PACKEDX_HEADER(suffix)                                  \
-    void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c,             \
-                                         const int16_t *lumFilter,  \
-                                         const int16_t **lumSrc,    \
-                                         int lumFilterSize,         \
-                                         const int16_t *chrFilter,  \
-                                         const int16_t **chrUSrc,   \
-                                         const int16_t **chrVSrc,   \
-                                         int chrFilterSize,         \
-                                         const int16_t **alpSrc,    \
-                                         uint8_t *dest,             \
-                                         int dstW, int dstY);
-
-YUV2PACKEDX_HEADER(abgr);
-YUV2PACKEDX_HEADER(bgra);
-YUV2PACKEDX_HEADER(argb);
-YUV2PACKEDX_HEADER(rgba);
-YUV2PACKEDX_HEADER(rgb24);
-YUV2PACKEDX_HEADER(bgr24);
-
-#endif /* SWSCALE_PPC_YUV2RGB_ALTIVEC_H */
diff --git a/ffmpeg/libswscale/ppc/yuv2yuv_altivec.c b/ffmpeg/libswscale/ppc/yuv2yuv_altivec.c
deleted file mode 100644
index 2b1c5dd..0000000
--- a/ffmpeg/libswscale/ppc/yuv2yuv_altivec.c
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * AltiVec-enhanced yuv-to-yuv conversion routines.
- *
- * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
- * based on the equivalent C code in swscale.c
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <inttypes.h>
-
-#include "config.h"
-#include "libavutil/attributes.h"
-#include "libavutil/cpu.h"
-#include "libswscale/swscale.h"
-#include "libswscale/swscale_internal.h"
-
-#if HAVE_ALTIVEC
-
-static int yv12toyuy2_unscaled_altivec(SwsContext *c, const uint8_t *src[],
-                                       int srcStride[], int srcSliceY,
-                                       int srcSliceH, uint8_t *dstParam[],
-                                       int dstStride_a[])
-{
-    uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY;
-    // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH,
-    //            srcStride[0], srcStride[1], dstStride[0]);
-    const uint8_t *ysrc   = src[0];
-    const uint8_t *usrc   = src[1];
-    const uint8_t *vsrc   = src[2];
-    const int width       = c->srcW;
-    const int height      = srcSliceH;
-    const int lumStride   = srcStride[0];
-    const int chromStride = srcStride[1];
-    const int dstStride   = dstStride_a[0];
-    const vector unsigned char yperm = vec_lvsl(0, ysrc);
-    const int vertLumPerChroma       = 2;
-    register unsigned int y;
-
-    /* This code assumes:
-     *
-     * 1) dst is 16 bytes-aligned
-     * 2) dstStride is a multiple of 16
-     * 3) width is a multiple of 16
-     * 4) lum & chrom stride are multiples of 8
-     */
-
-    for (y = 0; y < height; y++) {
-        int i;
-        for (i = 0; i < width - 31; i += 32) {
-            const unsigned int j          = i >> 1;
-            vector unsigned char v_yA     = vec_ld(i, ysrc);
-            vector unsigned char v_yB     = vec_ld(i + 16, ysrc);
-            vector unsigned char v_yC     = vec_ld(i + 32, ysrc);
-            vector unsigned char v_y1     = vec_perm(v_yA, v_yB, yperm);
-            vector unsigned char v_y2     = vec_perm(v_yB, v_yC, yperm);
-            vector unsigned char v_uA     = vec_ld(j, usrc);
-            vector unsigned char v_uB     = vec_ld(j + 16, usrc);
-            vector unsigned char v_u      = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
-            vector unsigned char v_vA     = vec_ld(j, vsrc);
-            vector unsigned char v_vB     = vec_ld(j + 16, vsrc);
-            vector unsigned char v_v      = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
-            vector unsigned char v_uv_a   = vec_mergeh(v_u, v_v);
-            vector unsigned char v_uv_b   = vec_mergel(v_u, v_v);
-            vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
-            vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
-            vector unsigned char v_yuy2_2 = vec_mergeh(v_y2, v_uv_b);
-            vector unsigned char v_yuy2_3 = vec_mergel(v_y2, v_uv_b);
-            vec_st(v_yuy2_0, (i << 1), dst);
-            vec_st(v_yuy2_1, (i << 1) + 16, dst);
-            vec_st(v_yuy2_2, (i << 1) + 32, dst);
-            vec_st(v_yuy2_3, (i << 1) + 48, dst);
-        }
-        if (i < width) {
-            const unsigned int j          = i >> 1;
-            vector unsigned char v_y1     = vec_ld(i, ysrc);
-            vector unsigned char v_u      = vec_ld(j, usrc);
-            vector unsigned char v_v      = vec_ld(j, vsrc);
-            vector unsigned char v_uv_a   = vec_mergeh(v_u, v_v);
-            vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
-            vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
-            vec_st(v_yuy2_0, (i << 1), dst);
-            vec_st(v_yuy2_1, (i << 1) + 16, dst);
-        }
-        if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
-            usrc += chromStride;
-            vsrc += chromStride;
-        }
-        ysrc += lumStride;
-        dst  += dstStride;
-    }
-
-    return srcSliceH;
-}
-
-static int yv12touyvy_unscaled_altivec(SwsContext *c, const uint8_t *src[],
-                                       int srcStride[], int srcSliceY,
-                                       int srcSliceH, uint8_t *dstParam[],
-                                       int dstStride_a[])
-{
-    uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY;
-    // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH,
-    //            srcStride[0], srcStride[1], dstStride[0]);
-    const uint8_t *ysrc              = src[0];
-    const uint8_t *usrc              = src[1];
-    const uint8_t *vsrc              = src[2];
-    const int width                  = c->srcW;
-    const int height                 = srcSliceH;
-    const int lumStride              = srcStride[0];
-    const int chromStride            = srcStride[1];
-    const int dstStride              = dstStride_a[0];
-    const int vertLumPerChroma       = 2;
-    const vector unsigned char yperm = vec_lvsl(0, ysrc);
-    register unsigned int y;
-
-    /* This code assumes:
-     *
-     * 1) dst is 16 bytes-aligned
-     * 2) dstStride is a multiple of 16
-     * 3) width is a multiple of 16
-     * 4) lum & chrom stride are multiples of 8
-     */
-
-    for (y = 0; y < height; y++) {
-        int i;
-        for (i = 0; i < width - 31; i += 32) {
-            const unsigned int j          = i >> 1;
-            vector unsigned char v_yA     = vec_ld(i, ysrc);
-            vector unsigned char v_yB     = vec_ld(i + 16, ysrc);
-            vector unsigned char v_yC     = vec_ld(i + 32, ysrc);
-            vector unsigned char v_y1     = vec_perm(v_yA, v_yB, yperm);
-            vector unsigned char v_y2     = vec_perm(v_yB, v_yC, yperm);
-            vector unsigned char v_uA     = vec_ld(j, usrc);
-            vector unsigned char v_uB     = vec_ld(j + 16, usrc);
-            vector unsigned char v_u      = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
-            vector unsigned char v_vA     = vec_ld(j, vsrc);
-            vector unsigned char v_vB     = vec_ld(j + 16, vsrc);
-            vector unsigned char v_v      = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
-            vector unsigned char v_uv_a   = vec_mergeh(v_u, v_v);
-            vector unsigned char v_uv_b   = vec_mergel(v_u, v_v);
-            vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
-            vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
-            vector unsigned char v_uyvy_2 = vec_mergeh(v_uv_b, v_y2);
-            vector unsigned char v_uyvy_3 = vec_mergel(v_uv_b, v_y2);
-            vec_st(v_uyvy_0, (i << 1), dst);
-            vec_st(v_uyvy_1, (i << 1) + 16, dst);
-            vec_st(v_uyvy_2, (i << 1) + 32, dst);
-            vec_st(v_uyvy_3, (i << 1) + 48, dst);
-        }
-        if (i < width) {
-            const unsigned int j          = i >> 1;
-            vector unsigned char v_y1     = vec_ld(i, ysrc);
-            vector unsigned char v_u      = vec_ld(j, usrc);
-            vector unsigned char v_v      = vec_ld(j, vsrc);
-            vector unsigned char v_uv_a   = vec_mergeh(v_u, v_v);
-            vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
-            vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
-            vec_st(v_uyvy_0, (i << 1), dst);
-            vec_st(v_uyvy_1, (i << 1) + 16, dst);
-        }
-        if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
-            usrc += chromStride;
-            vsrc += chromStride;
-        }
-        ysrc += lumStride;
-        dst  += dstStride;
-    }
-    return srcSliceH;
-}
-
-#endif /* HAVE_ALTIVEC */
-
-av_cold void ff_get_unscaled_swscale_ppc(SwsContext *c)
-{
-#if HAVE_ALTIVEC
-    if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
-        return;
-
-    if (!(c->srcW & 15) && !(c->flags & SWS_BITEXACT) &&
-        c->srcFormat == AV_PIX_FMT_YUV420P) {
-        enum AVPixelFormat dstFormat = c->dstFormat;
-
-        // unscaled YV12 -> packed YUV, we want speed
-        if (dstFormat == AV_PIX_FMT_YUYV422)
-            c->swscale = yv12toyuy2_unscaled_altivec;
-        else if (dstFormat == AV_PIX_FMT_UYVY422)
-            c->swscale = yv12touyvy_unscaled_altivec;
-    }
-#endif /* HAVE_ALTIVEC */
-}
diff --git a/ffmpeg/libswscale/rgb2rgb.c b/ffmpeg/libswscale/rgb2rgb.c
deleted file mode 100644
index cf877fb..0000000
--- a/ffmpeg/libswscale/rgb2rgb.c
+++ /dev/null
@@ -1,393 +0,0 @@
-/*
- * software RGB to RGB converter
- * pluralize by software PAL8 to RGB converter
- *              software YUV to YUV converter
- *              software YUV to RGB converter
- * Written by Nick Kurshev.
- * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <inttypes.h>
-
-#include "libavutil/attributes.h"
-#include "libavutil/bswap.h"
-#include "config.h"
-#include "rgb2rgb.h"
-#include "swscale.h"
-#include "swscale_internal.h"
-
-void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
-
-void (*rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size);
-
-void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size);
-
-void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc,
-                   const uint8_t *vsrc, uint8_t *dst,
-                   int width, int height,
-                   int lumStride, int chromStride, int dstStride);
-void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc,
-                   const uint8_t *vsrc, uint8_t *dst,
-                   int width, int height,
-                   int lumStride, int chromStride, int dstStride);
-void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc,
-                      const uint8_t *vsrc, uint8_t *dst,
-                      int width, int height,
-                      int lumStride, int chromStride, int dstStride);
-void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc,
-                      const uint8_t *vsrc, uint8_t *dst,
-                      int width, int height,
-                      int lumStride, int chromStride, int dstStride);
-void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst,
-                   uint8_t *udst, uint8_t *vdst,
-                   int width, int height,
-                   int lumStride, int chromStride, int srcStride);
-void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst,
-                       uint8_t *udst, uint8_t *vdst,
-                       int width, int height,
-                       int lumStride, int chromStride, int srcStride,
-                       int32_t *rgb2yuv);
-void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
-                 int srcStride, int dstStride);
-void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
-                        int width, int height, int src1Stride,
-                        int src2Stride, int dstStride);
-void (*deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2,
-                        int width, int height, int srcStride,
-                        int dst1Stride, int dst2Stride);
-void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
-                    uint8_t *dst1, uint8_t *dst2,
-                    int width, int height,
-                    int srcStride1, int srcStride2,
-                    int dstStride1, int dstStride2);
-void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2,
-                     const uint8_t *src3, uint8_t *dst,
-                     int width, int height,
-                     int srcStride1, int srcStride2,
-                     int srcStride3, int dstStride);
-void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                     const uint8_t *src, int width, int height,
-                     int lumStride, int chromStride, int srcStride);
-void (*uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                     const uint8_t *src, int width, int height,
-                     int lumStride, int chromStride, int srcStride);
-void (*yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                     const uint8_t *src, int width, int height,
-                     int lumStride, int chromStride, int srcStride);
-void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                     const uint8_t *src, int width, int height,
-                     int lumStride, int chromStride, int srcStride);
-
-#define BY ((int)( 0.098 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define BV ((int)(-0.071 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define BU ((int)( 0.439 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define GY ((int)( 0.504 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define GV ((int)(-0.368 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define GU ((int)(-0.291 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define RY ((int)( 0.257 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define RV ((int)( 0.439 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define RU ((int)(-0.148 * (1 << RGB2YUV_SHIFT) + 0.5))
-
-//plain C versions
-#include "rgb2rgb_template.c"
-
-/*
- * RGB15->RGB16 original by Strepto/Astral
- * ported to gcc & bugfixed : A'rpi
- * MMXEXT, 3DNOW optimization by Nick Kurshev
- * 32-bit C version, and and&add trick by Michael Niedermayer
- */
-
-av_cold void sws_rgb2rgb_init(void)
-{
-    rgb2rgb_init_c();
-    if (ARCH_X86)
-        rgb2rgb_init_x86();
-}
-
-void rgb32to24(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    int i, num_pixels = src_size >> 2;
-
-    for (i = 0; i < num_pixels; i++) {
-#if HAVE_BIGENDIAN
-        /* RGB32 (= A,B,G,R) -> BGR24 (= B,G,R) */
-        dst[3 * i + 0] = src[4 * i + 1];
-        dst[3 * i + 1] = src[4 * i + 2];
-        dst[3 * i + 2] = src[4 * i + 3];
-#else
-        dst[3 * i + 0] = src[4 * i + 2];
-        dst[3 * i + 1] = src[4 * i + 1];
-        dst[3 * i + 2] = src[4 * i + 0];
-#endif
-    }
-}
-
-void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    int i;
-
-    for (i = 0; 3 * i < src_size; i++) {
-#if HAVE_BIGENDIAN
-        /* RGB24 (= R, G, B) -> BGR32 (= A, R, G, B) */
-        dst[4 * i + 0] = 255;
-        dst[4 * i + 1] = src[3 * i + 0];
-        dst[4 * i + 2] = src[3 * i + 1];
-        dst[4 * i + 3] = src[3 * i + 2];
-#else
-        dst[4 * i + 0] = src[3 * i + 2];
-        dst[4 * i + 1] = src[3 * i + 1];
-        dst[4 * i + 2] = src[3 * i + 0];
-        dst[4 * i + 3] = 255;
-#endif
-    }
-}
-
-void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    uint8_t *d          = dst;
-    const uint16_t *s   = (const uint16_t *)src;
-    const uint16_t *end = s + src_size / 2;
-
-    while (s < end) {
-        register uint16_t bgr = *s++;
-#if HAVE_BIGENDIAN
-        *d++ = 255;
-        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
-        *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
-        *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
-#else
-        *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
-        *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
-        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
-        *d++ = 255;
-#endif
-    }
-}
-
-void rgb12to15(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    uint16_t rgb, r, g, b;
-    uint16_t *d         = (uint16_t *)dst;
-    const uint16_t *s   = (const uint16_t *)src;
-    const uint16_t *end = s + src_size / 2;
-
-    while (s < end) {
-        rgb  = *s++;
-        r    = rgb & 0xF00;
-        g    = rgb & 0x0F0;
-        b    = rgb & 0x00F;
-        r    = (r << 3) | ((r & 0x800) >> 1);
-        g    = (g << 2) | ((g & 0x080) >> 2);
-        b    = (b << 1) | ( b          >> 3);
-        *d++ = r | g | b;
-    }
-}
-
-void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    uint8_t *d          = dst;
-    const uint16_t *s   = (const uint16_t *)src;
-    const uint16_t *end = s + src_size / 2;
-
-    while (s < end) {
-        register uint16_t bgr = *s++;
-        *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
-        *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
-        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
-    }
-}
-
-void rgb16tobgr16(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    int i, num_pixels = src_size >> 1;
-
-    for (i = 0; i < num_pixels; i++) {
-        unsigned rgb         = ((const uint16_t *)src)[i];
-        ((uint16_t *)dst)[i] = (rgb >> 11) | (rgb & 0x7E0) | (rgb << 11);
-    }
-}
-
-void rgb16tobgr15(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    int i, num_pixels = src_size >> 1;
-
-    for (i = 0; i < num_pixels; i++) {
-        unsigned rgb         = ((const uint16_t *)src)[i];
-        ((uint16_t *)dst)[i] = (rgb >> 11) | ((rgb & 0x7C0) >> 1) | ((rgb & 0x1F) << 10);
-    }
-}
-
-void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    uint8_t *d          = dst;
-    const uint16_t *s   = (const uint16_t *)src;
-    const uint16_t *end = s + src_size / 2;
-
-    while (s < end) {
-        register uint16_t bgr = *s++;
-#if HAVE_BIGENDIAN
-        *d++ = 255;
-        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
-        *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
-        *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
-#else
-        *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
-        *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
-        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
-        *d++ = 255;
-#endif
-    }
-}
-
-void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    uint8_t *d          = dst;
-    const uint16_t *s   = (const uint16_t *)src;
-    const uint16_t *end = s + src_size / 2;
-
-    while (s < end) {
-        register uint16_t bgr = *s++;
-        *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
-        *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
-        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
-    }
-}
-
-void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    int i, num_pixels = src_size >> 1;
-
-    for (i = 0; i < num_pixels; i++) {
-        unsigned rgb         = ((const uint16_t *)src)[i];
-        ((uint16_t *)dst)[i] = ((rgb & 0x7C00) >> 10) | ((rgb & 0x3E0) << 1) | (rgb << 11);
-    }
-}
-
-void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    int i, num_pixels = src_size >> 1;
-
-    for (i = 0; i < num_pixels; i++) {
-        unsigned rgb         = ((const uint16_t *)src)[i];
-        unsigned br          = rgb & 0x7C1F;
-        ((uint16_t *)dst)[i] = (br >> 10) | (rgb & 0x3E0) | (br << 10);
-    }
-}
-
-void rgb12tobgr12(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    uint16_t *d = (uint16_t *)dst;
-    uint16_t *s = (uint16_t *)src;
-    int i, num_pixels = src_size >> 1;
-
-    for (i = 0; i < num_pixels; i++) {
-        unsigned rgb = s[i];
-        d[i]         = (rgb << 8 | rgb & 0xF0 | rgb >> 8) & 0xFFF;
-    }
-}
-
-
-#define DEFINE_SHUFFLE_BYTES(a, b, c, d)                                \
-void shuffle_bytes_ ## a ## b ## c ## d(const uint8_t *src,             \
-                                        uint8_t *dst, int src_size)     \
-{                                                                       \
-    int i;                                                              \
-                                                                        \
-    for (i = 0; i < src_size; i += 4) {                                 \
-        dst[i + 0] = src[i + a];                                        \
-        dst[i + 1] = src[i + b];                                        \
-        dst[i + 2] = src[i + c];                                        \
-        dst[i + 3] = src[i + d];                                        \
-    }                                                                   \
-}
-
-DEFINE_SHUFFLE_BYTES(0, 3, 2, 1)
-DEFINE_SHUFFLE_BYTES(1, 2, 3, 0)
-DEFINE_SHUFFLE_BYTES(3, 0, 1, 2)
-DEFINE_SHUFFLE_BYTES(3, 2, 1, 0)
-
-#define DEFINE_RGB48TOBGR48(need_bswap, swap)                           \
-void rgb48tobgr48_ ## need_bswap(const uint8_t *src,                    \
-                                 uint8_t *dst, int src_size)            \
-{                                                                       \
-    uint16_t *d = (uint16_t *)dst;                                      \
-    uint16_t *s = (uint16_t *)src;                                      \
-    int i, num_pixels = src_size >> 1;                                  \
-                                                                        \
-    for (i = 0; i < num_pixels; i += 3) {                               \
-        d[i    ] = swap ? av_bswap16(s[i + 2]) : s[i + 2];              \
-        d[i + 1] = swap ? av_bswap16(s[i + 1]) : s[i + 1];              \
-        d[i + 2] = swap ? av_bswap16(s[i    ]) : s[i    ];              \
-    }                                                                   \
-}
-
-DEFINE_RGB48TOBGR48(nobswap, 0)
-DEFINE_RGB48TOBGR48(bswap, 1)
-
-#define DEFINE_RGB64TOBGR48(need_bswap, swap)                           \
-void rgb64tobgr48_ ## need_bswap(const uint8_t *src,                    \
-                                 uint8_t *dst, int src_size)            \
-{                                                                       \
-    uint16_t *d = (uint16_t *)dst;                                      \
-    uint16_t *s = (uint16_t *)src;                                      \
-    int i, num_pixels = src_size >> 3;                                  \
-                                                                        \
-    for (i = 0; i < num_pixels; i++) {                                  \
-        d[3 * i    ] = swap ? av_bswap16(s[4 * i + 2]) : s[4 * i + 2];  \
-        d[3 * i + 1] = swap ? av_bswap16(s[4 * i + 1]) : s[4 * i + 1];  \
-        d[3 * i + 2] = swap ? av_bswap16(s[4 * i    ]) : s[4 * i    ];  \
-    }                                                                   \
-}
-
-DEFINE_RGB64TOBGR48(nobswap, 0)
-DEFINE_RGB64TOBGR48(bswap, 1)
-
-#define DEFINE_RGB64TO48(need_bswap, swap)                              \
-void rgb64to48_ ## need_bswap(const uint8_t *src,                       \
-                              uint8_t *dst, int src_size)               \
-{                                                                       \
-    uint16_t *d = (uint16_t *)dst;                                      \
-    uint16_t *s = (uint16_t *)src;                                      \
-    int i, num_pixels = src_size >> 3;                                  \
-                                                                        \
-    for (i = 0; i < num_pixels; i++) {                                  \
-        d[3 * i    ] = swap ? av_bswap16(s[4 * i    ]) : s[4 * i    ];  \
-        d[3 * i + 1] = swap ? av_bswap16(s[4 * i + 1]) : s[4 * i + 1];  \
-        d[3 * i + 2] = swap ? av_bswap16(s[4 * i + 2]) : s[4 * i + 2];  \
-    }                                                                   \
-}
-
-DEFINE_RGB64TO48(nobswap, 0)
-DEFINE_RGB64TO48(bswap, 1)
diff --git a/ffmpeg/libswscale/rgb2rgb.h b/ffmpeg/libswscale/rgb2rgb.h
deleted file mode 100644
index 5df5dea..0000000
--- a/ffmpeg/libswscale/rgb2rgb.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- *  software RGB to RGB converter
- *  pluralize by Software PAL8 to RGB converter
- *               Software YUV to YUV converter
- *               Software YUV to RGB converter
- *  Written by Nick Kurshev.
- *  YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef SWSCALE_RGB2RGB_H
-#define SWSCALE_RGB2RGB_H
-
-#include <inttypes.h>
-
-#include "libavutil/avutil.h"
-#include "swscale.h"
-
-/* A full collection of RGB to RGB(BGR) converters */
-extern void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size);
-extern void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
-extern void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
-extern void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
-extern void    (*rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size);
-extern void    (*rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size);
-extern void    (*rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size);
-extern void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
-extern void    (*rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size);
-extern void    (*rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size);
-extern void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
-extern void    (*rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size);
-extern void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
-extern void    (*rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size);
-extern void    (*rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size);
-extern void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
-extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
-
-extern void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size);
-
-void rgb64tobgr48_nobswap(const uint8_t *src, uint8_t *dst, int src_size);
-void   rgb64tobgr48_bswap(const uint8_t *src, uint8_t *dst, int src_size);
-void rgb48tobgr48_nobswap(const uint8_t *src, uint8_t *dst, int src_size);
-void   rgb48tobgr48_bswap(const uint8_t *src, uint8_t *dst, int src_size);
-void    rgb64to48_nobswap(const uint8_t *src, uint8_t *dst, int src_size);
-void      rgb64to48_bswap(const uint8_t *src, uint8_t *dst, int src_size);
-void    rgb24to32(const uint8_t *src, uint8_t *dst, int src_size);
-void    rgb32to24(const uint8_t *src, uint8_t *dst, int src_size);
-void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size);
-void    rgb16to24(const uint8_t *src, uint8_t *dst, int src_size);
-void rgb16tobgr16(const uint8_t *src, uint8_t *dst, int src_size);
-void rgb16tobgr15(const uint8_t *src, uint8_t *dst, int src_size);
-void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size);
-void    rgb15to24(const uint8_t *src, uint8_t *dst, int src_size);
-void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size);
-void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size);
-void rgb12tobgr12(const uint8_t *src, uint8_t *dst, int src_size);
-void    rgb12to15(const uint8_t *src, uint8_t *dst, int src_size);
-
-void shuffle_bytes_0321(const uint8_t *src, uint8_t *dst, int src_size);
-void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, int src_size);
-void shuffle_bytes_3012(const uint8_t *src, uint8_t *dst, int src_size);
-void shuffle_bytes_3210(const uint8_t *src, uint8_t *dst, int src_size);
-
-void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-                      uint8_t *vdst, int width, int height, int lumStride,
-                      int chromStride, int srcStride, int32_t *rgb2yuv);
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 16.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- */
-extern void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                          int width, int height,
-                          int lumStride, int chromStride, int dstStride);
-
-/**
- * Width should be a multiple of 16.
- */
-extern void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                             int width, int height,
-                             int lumStride, int chromStride, int dstStride);
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 16.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- */
-extern void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                          int width, int height,
-                          int lumStride, int chromStride, int srcStride);
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 16.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- */
-extern void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                          int width, int height,
-                          int lumStride, int chromStride, int dstStride);
-
-/**
- * Width should be a multiple of 16.
- */
-extern void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                             int width, int height,
-                             int lumStride, int chromStride, int dstStride);
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 2.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- * Chrominance data is only taken from every second line, others are ignored.
- * FIXME: Write high quality version.
- */
-extern void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                              int width, int height,
-                              int lumStride, int chromStride, int srcStride,
-                              int32_t *rgb2yuv);
-extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
-                        int srcStride, int dstStride);
-
-extern void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
-                               int width, int height, int src1Stride,
-                               int src2Stride, int dstStride);
-
-extern void (*deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2,
-                                 int width, int height, int srcStride,
-                                 int dst1Stride, int dst2Stride);
-
-extern void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
-                           uint8_t *dst1, uint8_t *dst2,
-                           int width, int height,
-                           int srcStride1, int srcStride2,
-                           int dstStride1, int dstStride2);
-
-extern void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
-                            uint8_t *dst,
-                            int width, int height,
-                            int srcStride1, int srcStride2,
-                            int srcStride3, int dstStride);
-
-extern void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                            int width, int height,
-                            int lumStride, int chromStride, int srcStride);
-extern void (*uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                            int width, int height,
-                            int lumStride, int chromStride, int srcStride);
-extern void (*yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                            int width, int height,
-                            int lumStride, int chromStride, int srcStride);
-extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                            int width, int height,
-                            int lumStride, int chromStride, int srcStride);
-
-void sws_rgb2rgb_init(void);
-
-void rgb2rgb_init_x86(void);
-
-#endif /* SWSCALE_RGB2RGB_H */
diff --git a/ffmpeg/libswscale/rgb2rgb_template.c b/ffmpeg/libswscale/rgb2rgb_template.c
deleted file mode 100644
index 56e735f..0000000
--- a/ffmpeg/libswscale/rgb2rgb_template.c
+++ /dev/null
@@ -1,951 +0,0 @@
-/*
- * software RGB to RGB converter
- * pluralize by software PAL8 to RGB converter
- *              software YUV to YUV converter
- *              software YUV to RGB converter
- * Written by Nick Kurshev.
- * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
- * lot of big-endian byte order fixes by Alex Beregszaszi
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stddef.h>
-
-#include "libavutil/attributes.h"
-
-static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst,
-                                  int src_size)
-{
-    uint8_t *dest      = dst;
-    const uint8_t *s   = src;
-    const uint8_t *end = s + src_size;
-
-    while (s < end) {
-#if HAVE_BIGENDIAN
-        /* RGB24 (= R, G, B) -> RGB32 (= A, B, G, R) */
-        *dest++  = 255;
-        *dest++  = s[2];
-        *dest++  = s[1];
-        *dest++  = s[0];
-        s       += 3;
-#else
-        *dest++  = *s++;
-        *dest++  = *s++;
-        *dest++  = *s++;
-        *dest++  = 255;
-#endif
-    }
-}
-
-static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst,
-                                  int src_size)
-{
-    uint8_t *dest      = dst;
-    const uint8_t *s   = src;
-    const uint8_t *end = s + src_size;
-
-    while (s < end) {
-#if HAVE_BIGENDIAN
-        /* RGB32 (= A, B, G, R) -> RGB24 (= R, G, B) */
-        s++;
-        dest[2]  = *s++;
-        dest[1]  = *s++;
-        dest[0]  = *s++;
-        dest    += 3;
-#else
-        *dest++  = *s++;
-        *dest++  = *s++;
-        *dest++  = *s++;
-        s++;
-#endif
-    }
-}
-
-/*
- * original by Strepto/Astral
- * ported to gcc & bugfixed: A'rpi
- * MMXEXT, 3DNOW optimization by Nick Kurshev
- * 32-bit C version, and and&add trick by Michael Niedermayer
- */
-static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    register uint8_t *d         = dst;
-    register const uint8_t *s   = src;
-    register const uint8_t *end = s + src_size;
-    const uint8_t *mm_end       = end - 3;
-
-    while (s < mm_end) {
-        register unsigned x = *((const uint32_t *)s);
-        *((uint32_t *)d)    = (x & 0x7FFF7FFF) + (x & 0x7FE07FE0);
-        d += 4;
-        s += 4;
-    }
-    if (s < end) {
-        register unsigned short x = *((const uint16_t *)s);
-        *((uint16_t *)d)          = (x & 0x7FFF) + (x & 0x7FE0);
-    }
-}
-
-static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    register uint8_t *d         = dst;
-    register const uint8_t *s   = src;
-    register const uint8_t *end = s + src_size;
-    const uint8_t *mm_end       = end - 3;
-
-    while (s < mm_end) {
-        register uint32_t x  = *((const uint32_t *)s);
-        *((uint32_t *)d)     = ((x >> 1) & 0x7FE07FE0) | (x & 0x001F001F);
-        s                   += 4;
-        d                   += 4;
-    }
-    if (s < end) {
-        register uint16_t x = *((const uint16_t *)s);
-        *((uint16_t *)d)    = ((x >> 1) & 0x7FE0) | (x & 0x001F);
-    }
-}
-
-static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    uint16_t *d        = (uint16_t *)dst;
-    const uint8_t *s   = src;
-    const uint8_t *end = s + src_size;
-
-    while (s < end) {
-        register int rgb  = *(const uint32_t *)s;
-        s                += 4;
-        *d++              = ((rgb & 0xFF)     >> 3) +
-                            ((rgb & 0xFC00)   >> 5) +
-                            ((rgb & 0xF80000) >> 8);
-    }
-}
-
-static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst,
-                                  int src_size)
-{
-    uint16_t *d        = (uint16_t *)dst;
-    const uint8_t *s   = src;
-    const uint8_t *end = s + src_size;
-
-    while (s < end) {
-        register int rgb  = *(const uint32_t *)s;
-        s                += 4;
-        *d++              = ((rgb & 0xF8)     << 8) +
-                            ((rgb & 0xFC00)   >> 5) +
-                            ((rgb & 0xF80000) >> 19);
-    }
-}
-
-static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    uint16_t *d        = (uint16_t *)dst;
-    const uint8_t *s   = src;
-    const uint8_t *end = s + src_size;
-
-    while (s < end) {
-        register int rgb  = *(const uint32_t *)s;
-        s                += 4;
-        *d++              = ((rgb & 0xFF)     >> 3) +
-                            ((rgb & 0xF800)   >> 6) +
-                            ((rgb & 0xF80000) >> 9);
-    }
-}
-
-static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst,
-                                  int src_size)
-{
-    uint16_t *d        = (uint16_t *)dst;
-    const uint8_t *s   = src;
-    const uint8_t *end = s + src_size;
-
-    while (s < end) {
-        register int rgb  = *(const uint32_t *)s;
-        s                += 4;
-        *d++              = ((rgb & 0xF8)     <<  7) +
-                            ((rgb & 0xF800)   >>  6) +
-                            ((rgb & 0xF80000) >> 19);
-    }
-}
-
-static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst,
-                                  int src_size)
-{
-    uint16_t *d        = (uint16_t *)dst;
-    const uint8_t *s   = src;
-    const uint8_t *end = s + src_size;
-
-    while (s < end) {
-        const int b = *s++;
-        const int g = *s++;
-        const int r = *s++;
-        *d++        = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
-    }
-}
-
-static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    uint16_t *d        = (uint16_t *)dst;
-    const uint8_t *s   = src;
-    const uint8_t *end = s + src_size;
-
-    while (s < end) {
-        const int r = *s++;
-        const int g = *s++;
-        const int b = *s++;
-        *d++        = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
-    }
-}
-
-static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst,
-                                  int src_size)
-{
-    uint16_t *d        = (uint16_t *)dst;
-    const uint8_t *s   = src;
-    const uint8_t *end = s + src_size;
-
-    while (s < end) {
-        const int b = *s++;
-        const int g = *s++;
-        const int r = *s++;
-        *d++        = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
-    }
-}
-
-static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    uint16_t *d        = (uint16_t *)dst;
-    const uint8_t *s   = src;
-    const uint8_t *end = s + src_size;
-
-    while (s < end) {
-        const int r = *s++;
-        const int g = *s++;
-        const int b = *s++;
-        *d++        = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
-    }
-}
-
-static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst,
-                                  int src_size)
-{
-    uint8_t *d          = dst;
-    const uint16_t *s   = (const uint16_t *)src;
-    const uint16_t *end = s + src_size / 2;
-
-    while (s < end) {
-        register uint16_t bgr = *s++;
-        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
-        *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
-        *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
-    }
-}
-
-static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst,
-                                  int src_size)
-{
-    uint8_t *d          = (uint8_t *)dst;
-    const uint16_t *s   = (const uint16_t *)src;
-    const uint16_t *end = s + src_size / 2;
-
-    while (s < end) {
-        register uint16_t bgr = *s++;
-        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
-        *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
-        *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
-    }
-}
-
-static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    uint8_t *d          = dst;
-    const uint16_t *s   = (const uint16_t *)src;
-    const uint16_t *end = s + src_size / 2;
-
-    while (s < end) {
-        register uint16_t bgr = *s++;
-#if HAVE_BIGENDIAN
-        *d++ = 255;
-        *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
-        *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
-        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
-#else
-        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
-        *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
-        *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
-        *d++ = 255;
-#endif
-    }
-}
-
-static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    uint8_t *d          = dst;
-    const uint16_t *s   = (const uint16_t *)src;
-    const uint16_t *end = s + src_size / 2;
-
-    while (s < end) {
-        register uint16_t bgr = *s++;
-#if HAVE_BIGENDIAN
-        *d++ = 255;
-        *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
-        *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
-        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
-#else
-        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
-        *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
-        *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
-        *d++ = 255;
-#endif
-    }
-}
-
-static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst,
-                                        int src_size)
-{
-    int idx          = 15  - src_size;
-    const uint8_t *s = src - idx;
-    uint8_t *d       = dst - idx;
-
-    for (; idx < 15; idx += 4) {
-        register int v        = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
-        v                    &= 0xff00ff;
-        *(uint32_t *)&d[idx]  = (v >> 16) + g + (v << 16);
-    }
-}
-
-static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    unsigned i;
-
-    for (i = 0; i < src_size; i += 3) {
-        register uint8_t x = src[i + 2];
-        dst[i + 1]         = src[i + 1];
-        dst[i + 2]         = src[i + 0];
-        dst[i + 0]         = x;
-    }
-}
-
-static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
-                                     const uint8_t *vsrc, uint8_t *dst,
-                                     int width, int height,
-                                     int lumStride, int chromStride,
-                                     int dstStride, int vertLumPerChroma)
-{
-    int y, i;
-    const int chromWidth = width >> 1;
-
-    for (y = 0; y < height; y++) {
-#if HAVE_FAST_64BIT
-        uint64_t *ldst = (uint64_t *)dst;
-        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
-        for (i = 0; i < chromWidth; i += 2) {
-            uint64_t k = yc[0] + (uc[0] << 8) +
-                         (yc[1] << 16) + (unsigned)(vc[0] << 24);
-            uint64_t l = yc[2] + (uc[1] << 8) +
-                         (yc[3] << 16) + (unsigned)(vc[1] << 24);
-            *ldst++ = k + (l << 32);
-            yc     += 4;
-            uc     += 2;
-            vc     += 2;
-        }
-
-#else
-        int *idst = (int32_t *)dst;
-        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
-
-        for (i = 0; i < chromWidth; i++) {
-#if HAVE_BIGENDIAN
-            *idst++ = (yc[0] << 24) + (uc[0] << 16) +
-                      (yc[1] <<  8) + (vc[0] <<  0);
-#else
-            *idst++ = yc[0] + (uc[0] << 8) +
-                      (yc[1] << 16) + (vc[0] << 24);
-#endif
-            yc += 2;
-            uc++;
-            vc++;
-        }
-#endif
-        if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
-            usrc += chromStride;
-            vsrc += chromStride;
-        }
-        ysrc += lumStride;
-        dst  += dstStride;
-    }
-}
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 16.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- */
-static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
-                                const uint8_t *vsrc, uint8_t *dst,
-                                int width, int height, int lumStride,
-                                int chromStride, int dstStride)
-{
-    //FIXME interpolate chroma
-    yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
-                      chromStride, dstStride, 2);
-}
-
-static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
-                                     const uint8_t *vsrc, uint8_t *dst,
-                                     int width, int height,
-                                     int lumStride, int chromStride,
-                                     int dstStride, int vertLumPerChroma)
-{
-    int y, i;
-    const int chromWidth = width >> 1;
-
-    for (y = 0; y < height; y++) {
-#if HAVE_FAST_64BIT
-        uint64_t *ldst = (uint64_t *)dst;
-        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
-        for (i = 0; i < chromWidth; i += 2) {
-            uint64_t k = uc[0] + (yc[0] << 8) +
-                         (vc[0] << 16) + (unsigned)(yc[1] << 24);
-            uint64_t l = uc[1] + (yc[2] << 8) +
-                         (vc[1] << 16) + (unsigned)(yc[3] << 24);
-            *ldst++ = k + (l << 32);
-            yc     += 4;
-            uc     += 2;
-            vc     += 2;
-        }
-
-#else
-        int *idst = (int32_t *)dst;
-        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
-
-        for (i = 0; i < chromWidth; i++) {
-#if HAVE_BIGENDIAN
-            *idst++ = (uc[0] << 24) + (yc[0] << 16) +
-                      (vc[0] <<  8) + (yc[1] <<  0);
-#else
-            *idst++ = uc[0] + (yc[0] << 8) +
-                      (vc[0] << 16) + (yc[1] << 24);
-#endif
-            yc += 2;
-            uc++;
-            vc++;
-        }
-#endif
-        if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
-            usrc += chromStride;
-            vsrc += chromStride;
-        }
-        ysrc += lumStride;
-        dst  += dstStride;
-    }
-}
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 16
- * (If this is a problem for anyone then tell me, and I will fix it.)
- */
-static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
-                                const uint8_t *vsrc, uint8_t *dst,
-                                int width, int height, int lumStride,
-                                int chromStride, int dstStride)
-{
-    //FIXME interpolate chroma
-    yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
-                      chromStride, dstStride, 2);
-}
-
-/**
- * Width should be a multiple of 16.
- */
-static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
-                                   const uint8_t *vsrc, uint8_t *dst,
-                                   int width, int height, int lumStride,
-                                   int chromStride, int dstStride)
-{
-    yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
-                      chromStride, dstStride, 1);
-}
-
-/**
- * Width should be a multiple of 16.
- */
-static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
-                                   const uint8_t *vsrc, uint8_t *dst,
-                                   int width, int height, int lumStride,
-                                   int chromStride, int dstStride)
-{
-    yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
-                      chromStride, dstStride, 1);
-}
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 16.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- */
-static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
-                                uint8_t *udst, uint8_t *vdst,
-                                int width, int height, int lumStride,
-                                int chromStride, int srcStride)
-{
-    int y;
-    const int chromWidth = width >> 1;
-
-    for (y = 0; y < height; y += 2) {
-        int i;
-        for (i = 0; i < chromWidth; i++) {
-            ydst[2 * i + 0] = src[4 * i + 0];
-            udst[i]         = src[4 * i + 1];
-            ydst[2 * i + 1] = src[4 * i + 2];
-            vdst[i]         = src[4 * i + 3];
-        }
-        ydst += lumStride;
-        src  += srcStride;
-
-        for (i = 0; i < chromWidth; i++) {
-            ydst[2 * i + 0] = src[4 * i + 0];
-            ydst[2 * i + 1] = src[4 * i + 2];
-        }
-        udst += chromStride;
-        vdst += chromStride;
-        ydst += lumStride;
-        src  += srcStride;
-    }
-}
-
-static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth,
-                              int srcHeight, int srcStride, int dstStride)
-{
-    int x, y;
-
-    dst[0] = src[0];
-
-    // first line
-    for (x = 0; x < srcWidth - 1; x++) {
-        dst[2 * x + 1] = (3 * src[x] + src[x + 1]) >> 2;
-        dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
-    }
-    dst[2 * srcWidth - 1] = src[srcWidth - 1];
-
-    dst += dstStride;
-
-    for (y = 1; y < srcHeight; y++) {
-        const int mmxSize = 1;
-
-        dst[0]         = (src[0] * 3 + src[srcStride]) >> 2;
-        dst[dstStride] = (src[0] + 3 * src[srcStride]) >> 2;
-
-        for (x = mmxSize - 1; x < srcWidth - 1; x++) {
-            dst[2 * x + 1]             = (src[x + 0] * 3 + src[x + srcStride + 1]) >> 2;
-            dst[2 * x + dstStride + 2] = (src[x + 0] + 3 * src[x + srcStride + 1]) >> 2;
-            dst[2 * x + dstStride + 1] = (src[x + 1] + 3 * src[x + srcStride])     >> 2;
-            dst[2 * x + 2]             = (src[x + 1] * 3 + src[x + srcStride])     >> 2;
-        }
-        dst[srcWidth * 2 - 1]             = (src[srcWidth - 1] * 3 + src[srcWidth - 1 + srcStride]) >> 2;
-        dst[srcWidth * 2 - 1 + dstStride] = (src[srcWidth - 1] + 3 * src[srcWidth - 1 + srcStride]) >> 2;
-
-        dst += dstStride * 2;
-        src += srcStride;
-    }
-
-    // last line
-    dst[0] = src[0];
-
-    for (x = 0; x < srcWidth - 1; x++) {
-        dst[2 * x + 1] = (src[x] * 3 + src[x + 1]) >> 2;
-        dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
-    }
-    dst[2 * srcWidth - 1] = src[srcWidth - 1];
-}
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 16.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- * Chrominance data is only taken from every second line, others are ignored.
- * FIXME: Write HQ version.
- */
-static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
-                                uint8_t *udst, uint8_t *vdst,
-                                int width, int height, int lumStride,
-                                int chromStride, int srcStride)
-{
-    int y;
-    const int chromWidth = width >> 1;
-
-    for (y = 0; y < height; y += 2) {
-        int i;
-        for (i = 0; i < chromWidth; i++) {
-            udst[i]         = src[4 * i + 0];
-            ydst[2 * i + 0] = src[4 * i + 1];
-            vdst[i]         = src[4 * i + 2];
-            ydst[2 * i + 1] = src[4 * i + 3];
-        }
-        ydst += lumStride;
-        src  += srcStride;
-
-        for (i = 0; i < chromWidth; i++) {
-            ydst[2 * i + 0] = src[4 * i + 1];
-            ydst[2 * i + 1] = src[4 * i + 3];
-        }
-        udst += chromStride;
-        vdst += chromStride;
-        ydst += lumStride;
-        src  += srcStride;
-    }
-}
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 2.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- * Chrominance data is only taken from every second line,
- * others are ignored in the C version.
- * FIXME: Write HQ version.
- */
-void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-                   uint8_t *vdst, int width, int height, int lumStride,
-                   int chromStride, int srcStride, int32_t *rgb2yuv)
-{
-    int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
-    int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
-    int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
-    int y;
-    const int chromWidth = width >> 1;
-
-    for (y = 0; y < height; y += 2) {
-        int i;
-        for (i = 0; i < chromWidth; i++) {
-            unsigned int b = src[6 * i + 0];
-            unsigned int g = src[6 * i + 1];
-            unsigned int r = src[6 * i + 2];
-
-            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) +  16;
-            unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
-            unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
-
-            udst[i]     = U;
-            vdst[i]     = V;
-            ydst[2 * i] = Y;
-
-            b = src[6 * i + 3];
-            g = src[6 * i + 4];
-            r = src[6 * i + 5];
-
-            Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
-            ydst[2 * i + 1] = Y;
-        }
-        ydst += lumStride;
-        src  += srcStride;
-
-        if (y+1 == height)
-            break;
-
-        for (i = 0; i < chromWidth; i++) {
-            unsigned int b = src[6 * i + 0];
-            unsigned int g = src[6 * i + 1];
-            unsigned int r = src[6 * i + 2];
-
-            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
-
-            ydst[2 * i] = Y;
-
-            b = src[6 * i + 3];
-            g = src[6 * i + 4];
-            r = src[6 * i + 5];
-
-            Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
-            ydst[2 * i + 1] = Y;
-        }
-        udst += chromStride;
-        vdst += chromStride;
-        ydst += lumStride;
-        src  += srcStride;
-    }
-}
-
-static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
-                              uint8_t *dest, int width, int height,
-                              int src1Stride, int src2Stride, int dstStride)
-{
-    int h;
-
-    for (h = 0; h < height; h++) {
-        int w;
-        for (w = 0; w < width; w++) {
-            dest[2 * w + 0] = src1[w];
-            dest[2 * w + 1] = src2[w];
-        }
-        dest += dstStride;
-        src1 += src1Stride;
-        src2 += src2Stride;
-    }
-}
-
-static void deinterleaveBytes_c(const uint8_t *src, uint8_t *dst1, uint8_t *dst2,
-                                int width, int height, int srcStride,
-                                int dst1Stride, int dst2Stride)
-{
-    int h;
-
-    for (h = 0; h < height; h++) {
-        int w;
-        for (w = 0; w < width; w++) {
-            dst1[w] = src[2 * w + 0];
-            dst2[w] = src[2 * w + 1];
-        }
-        src += srcStride;
-        dst1 += dst1Stride;
-        dst2 += dst2Stride;
-    }
-}
-
-static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2,
-                                 uint8_t *dst1, uint8_t *dst2,
-                                 int width, int height,
-                                 int srcStride1, int srcStride2,
-                                 int dstStride1, int dstStride2)
-{
-    int x, y;
-    int w = width  / 2;
-    int h = height / 2;
-
-    for (y = 0; y < h; y++) {
-        const uint8_t *s1 = src1 + srcStride1 * (y >> 1);
-        uint8_t *d        = dst1 + dstStride1 *  y;
-        for (x = 0; x < w; x++)
-            d[2 * x] = d[2 * x + 1] = s1[x];
-    }
-    for (y = 0; y < h; y++) {
-        const uint8_t *s2 = src2 + srcStride2 * (y >> 1);
-        uint8_t *d        = dst2 + dstStride2 *  y;
-        for (x = 0; x < w; x++)
-            d[2 * x] = d[2 * x + 1] = s2[x];
-    }
-}
-
-static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2,
-                                  const uint8_t *src3, uint8_t *dst,
-                                  int width, int height,
-                                  int srcStride1, int srcStride2,
-                                  int srcStride3, int dstStride)
-{
-    int x, y;
-    int w = width / 2;
-    int h = height;
-
-    for (y = 0; y < h; y++) {
-        const uint8_t *yp = src1 + srcStride1 *  y;
-        const uint8_t *up = src2 + srcStride2 * (y >> 2);
-        const uint8_t *vp = src3 + srcStride3 * (y >> 2);
-        uint8_t *d        = dst  + dstStride  *  y;
-        for (x = 0; x < w; x++) {
-            const int x2 = x << 2;
-            d[8 * x + 0] = yp[x2];
-            d[8 * x + 1] = up[x];
-            d[8 * x + 2] = yp[x2 + 1];
-            d[8 * x + 3] = vp[x];
-            d[8 * x + 4] = yp[x2 + 2];
-            d[8 * x + 5] = up[x];
-            d[8 * x + 6] = yp[x2 + 3];
-            d[8 * x + 7] = vp[x];
-        }
-    }
-}
-
-static void extract_even_c(const uint8_t *src, uint8_t *dst, int count)
-{
-    dst   +=  count;
-    src   +=  count * 2;
-    count  = -count;
-    while (count < 0) {
-        dst[count] = src[2 * count];
-        count++;
-    }
-}
-
-static void extract_even2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
-                            int count)
-{
-    dst0  +=  count;
-    dst1  +=  count;
-    src   +=  count * 4;
-    count  = -count;
-    while (count < 0) {
-        dst0[count] = src[4 * count + 0];
-        dst1[count] = src[4 * count + 2];
-        count++;
-    }
-}
-
-static void extract_even2avg_c(const uint8_t *src0, const uint8_t *src1,
-                               uint8_t *dst0, uint8_t *dst1, int count)
-{
-    dst0  +=  count;
-    dst1  +=  count;
-    src0  +=  count * 4;
-    src1  +=  count * 4;
-    count  = -count;
-    while (count < 0) {
-        dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1;
-        dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1;
-        count++;
-    }
-}
-
-static void extract_odd2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
-                           int count)
-{
-    dst0  +=  count;
-    dst1  +=  count;
-    src   +=  count * 4;
-    count  = -count;
-    src++;
-    while (count < 0) {
-        dst0[count] = src[4 * count + 0];
-        dst1[count] = src[4 * count + 2];
-        count++;
-    }
-}
-
-static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1,
-                              uint8_t *dst0, uint8_t *dst1, int count)
-{
-    dst0  +=  count;
-    dst1  +=  count;
-    src0  +=  count * 4;
-    src1  +=  count * 4;
-    count  = -count;
-    src0++;
-    src1++;
-    while (count < 0) {
-        dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1;
-        dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1;
-        count++;
-    }
-}
-
-static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                           const uint8_t *src, int width, int height,
-                           int lumStride, int chromStride, int srcStride)
-{
-    int y;
-    const int chromWidth = FF_CEIL_RSHIFT(width, 1);
-
-    for (y = 0; y < height; y++) {
-        extract_even_c(src, ydst, width);
-        if (y & 1) {
-            extract_odd2avg_c(src - srcStride, src, udst, vdst, chromWidth);
-            udst += chromStride;
-            vdst += chromStride;
-        }
-
-        src  += srcStride;
-        ydst += lumStride;
-    }
-}
-
-static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                           const uint8_t *src, int width, int height,
-                           int lumStride, int chromStride, int srcStride)
-{
-    int y;
-    const int chromWidth = FF_CEIL_RSHIFT(width, 1);
-
-    for (y = 0; y < height; y++) {
-        extract_even_c(src, ydst, width);
-        extract_odd2_c(src, udst, vdst, chromWidth);
-
-        src  += srcStride;
-        ydst += lumStride;
-        udst += chromStride;
-        vdst += chromStride;
-    }
-}
-
-static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                           const uint8_t *src, int width, int height,
-                           int lumStride, int chromStride, int srcStride)
-{
-    int y;
-    const int chromWidth = FF_CEIL_RSHIFT(width, 1);
-
-    for (y = 0; y < height; y++) {
-        extract_even_c(src + 1, ydst, width);
-        if (y & 1) {
-            extract_even2avg_c(src - srcStride, src, udst, vdst, chromWidth);
-            udst += chromStride;
-            vdst += chromStride;
-        }
-
-        src  += srcStride;
-        ydst += lumStride;
-    }
-}
-
-static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                           const uint8_t *src, int width, int height,
-                           int lumStride, int chromStride, int srcStride)
-{
-    int y;
-    const int chromWidth = FF_CEIL_RSHIFT(width, 1);
-
-    for (y = 0; y < height; y++) {
-        extract_even_c(src + 1, ydst, width);
-        extract_even2_c(src, udst, vdst, chromWidth);
-
-        src  += srcStride;
-        ydst += lumStride;
-        udst += chromStride;
-        vdst += chromStride;
-    }
-}
-
-static av_cold void rgb2rgb_init_c(void)
-{
-    rgb15to16          = rgb15to16_c;
-    rgb15tobgr24       = rgb15tobgr24_c;
-    rgb15to32          = rgb15to32_c;
-    rgb16tobgr24       = rgb16tobgr24_c;
-    rgb16to32          = rgb16to32_c;
-    rgb16to15          = rgb16to15_c;
-    rgb24tobgr16       = rgb24tobgr16_c;
-    rgb24tobgr15       = rgb24tobgr15_c;
-    rgb24tobgr32       = rgb24tobgr32_c;
-    rgb32to16          = rgb32to16_c;
-    rgb32to15          = rgb32to15_c;
-    rgb32tobgr24       = rgb32tobgr24_c;
-    rgb24to15          = rgb24to15_c;
-    rgb24to16          = rgb24to16_c;
-    rgb24tobgr24       = rgb24tobgr24_c;
-    shuffle_bytes_2103 = shuffle_bytes_2103_c;
-    rgb32tobgr16       = rgb32tobgr16_c;
-    rgb32tobgr15       = rgb32tobgr15_c;
-    yv12toyuy2         = yv12toyuy2_c;
-    yv12touyvy         = yv12touyvy_c;
-    yuv422ptoyuy2      = yuv422ptoyuy2_c;
-    yuv422ptouyvy      = yuv422ptouyvy_c;
-    yuy2toyv12         = yuy2toyv12_c;
-    planar2x           = planar2x_c;
-    ff_rgb24toyv12     = ff_rgb24toyv12_c;
-    interleaveBytes    = interleaveBytes_c;
-    deinterleaveBytes  = deinterleaveBytes_c;
-    vu9_to_vu12        = vu9_to_vu12_c;
-    yvu9_to_yuy2       = yvu9_to_yuy2_c;
-
-    uyvytoyuv420       = uyvytoyuv420_c;
-    uyvytoyuv422       = uyvytoyuv422_c;
-    yuyvtoyuv420       = yuyvtoyuv420_c;
-    yuyvtoyuv422       = yuyvtoyuv422_c;
-}
diff --git a/ffmpeg/libswscale/sparc/Makefile b/ffmpeg/libswscale/sparc/Makefile
deleted file mode 100644
index 2351ba4..0000000
--- a/ffmpeg/libswscale/sparc/Makefile
+++ /dev/null
@@ -1 +0,0 @@
-VIS-OBJS += sparc/yuv2rgb_vis.o                                         \
diff --git a/ffmpeg/libswscale/sparc/yuv2rgb_vis.c b/ffmpeg/libswscale/sparc/yuv2rgb_vis.c
deleted file mode 100644
index ed00837..0000000
--- a/ffmpeg/libswscale/sparc/yuv2rgb_vis.c
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * VIS optimized software YUV to RGB converter
- * Copyright (c) 2007 Denes Balatoni <dbalatoni@programozo.hu>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <inttypes.h>
-#include <stdlib.h>
-
-#include "libavutil/attributes.h"
-#include "libswscale/swscale.h"
-#include "libswscale/swscale_internal.h"
-
-#define YUV2RGB_INIT                               \
-    "wr %%g0, 0x10, %%gsr \n\t"                    \
-    "ldd [%5],      %%f32 \n\t"                    \
-    "ldd [%5 +  8], %%f34 \n\t"                    \
-    "ldd [%5 + 16], %%f36 \n\t"                    \
-    "ldd [%5 + 24], %%f38 \n\t"                    \
-    "ldd [%5 + 32], %%f40 \n\t"                    \
-    "ldd [%5 + 40], %%f42 \n\t"                    \
-    "ldd [%5 + 48], %%f44 \n\t"                    \
-    "ldd [%5 + 56], %%f46 \n\t"                    \
-    "ldd [%5 + 64], %%f48 \n\t"                    \
-    "ldd [%5 + 72], %%f50 \n\t"
-
-#define YUV2RGB_KERNEL                             \
-    /* ^^^^ f0=Y f3=u f5=v */                      \
-    "fmul8x16 %%f3,  %%f48,  %%f6 \n\t"            \
-    "fmul8x16 %%f19, %%f48, %%f22 \n\t"            \
-    "fmul8x16 %%f5,  %%f44,  %%f8 \n\t"            \
-    "fmul8x16 %%f21, %%f44, %%f24 \n\t"            \
-    "fmul8x16 %%f0,  %%f42,  %%f0 \n\t"            \
-    "fmul8x16 %%f16, %%f42, %%f16 \n\t"            \
-    "fmul8x16 %%f3,  %%f50,  %%f2 \n\t"            \
-    "fmul8x16 %%f19, %%f50, %%f18 \n\t"            \
-    "fmul8x16 %%f5,  %%f46,  %%f4 \n\t"            \
-    "fmul8x16 %%f21, %%f46, %%f20 \n\t"            \
-                                                   \
-    "fpsub16 %%f6,  %%f34,  %%f6 \n\t" /* 1 */     \
-    "fpsub16 %%f22, %%f34, %%f22 \n\t" /* 1 */     \
-    "fpsub16 %%f8,  %%f38,  %%f8 \n\t" /* 3 */     \
-    "fpsub16 %%f24, %%f38, %%f24 \n\t" /* 3 */     \
-    "fpsub16 %%f0,  %%f32,  %%f0 \n\t" /* 0 */     \
-    "fpsub16 %%f16, %%f32, %%f16 \n\t" /* 0 */     \
-    "fpsub16 %%f2,  %%f36,  %%f2 \n\t" /* 2 */     \
-    "fpsub16 %%f18, %%f36, %%f18 \n\t" /* 2 */     \
-    "fpsub16 %%f4,  %%f40,  %%f4 \n\t" /* 4 */     \
-    "fpsub16 %%f20, %%f40, %%f20 \n\t" /* 4 */     \
-                                                   \
-    "fpadd16 %%f0,  %%f8,  %%f8  \n\t" /* Gt */    \
-    "fpadd16 %%f16, %%f24, %%f24 \n\t" /* Gt */    \
-    "fpadd16 %%f0,  %%f4,  %%f4  \n\t" /* R */     \
-    "fpadd16 %%f16, %%f20, %%f20 \n\t" /* R */     \
-    "fpadd16 %%f0,  %%f6,  %%f6  \n\t" /* B */     \
-    "fpadd16 %%f16, %%f22, %%f22 \n\t" /* B */     \
-    "fpadd16 %%f8,  %%f2,  %%f2  \n\t" /* G */     \
-    "fpadd16 %%f24, %%f18, %%f18 \n\t" /* G */     \
-                                                   \
-    "fpack16 %%f4,  %%f4  \n\t"                    \
-    "fpack16 %%f20, %%f20 \n\t"                    \
-    "fpack16 %%f6,  %%f6  \n\t"                    \
-    "fpack16 %%f22, %%f22 \n\t"                    \
-    "fpack16 %%f2,  %%f2  \n\t"                    \
-    "fpack16 %%f18, %%f18 \n\t"
-
-// FIXME: must be changed to set alpha to 255 instead of 0
-static int vis_420P_ARGB32(SwsContext *c, uint8_t *src[], int srcStride[],
-                           int srcSliceY, int srcSliceH,
-                           uint8_t *dst[], int dstStride[])
-{
-    int y, out1, out2, out3, out4, out5, out6;
-
-    for (y = 0; y < srcSliceH; ++y)
-        __asm__ volatile (
-            YUV2RGB_INIT
-            "wr %%g0, 0xd2, %%asi        \n\t"  /* ASI_FL16_P */
-            "1:                          \n\t"
-            "ldda [%1]     %%asi, %%f2   \n\t"
-            "ldda [%1 + 2] %%asi, %%f18  \n\t"
-            "ldda [%2]     %%asi, %%f4   \n\t"
-            "ldda [%2 + 2] %%asi, %%f20  \n\t"
-            "ld [%0], %%f0               \n\t"
-            "ld [%0+4], %%f16            \n\t"
-            "fpmerge %%f3,  %%f3,  %%f2  \n\t"
-            "fpmerge %%f19, %%f19, %%f18 \n\t"
-            "fpmerge %%f5,  %%f5,  %%f4  \n\t"
-            "fpmerge %%f21, %%f21, %%f20 \n\t"
-            YUV2RGB_KERNEL
-            "fzero %%f0                  \n\t"
-            "fpmerge %%f4,  %%f6,  %%f8  \n\t"  // r, b, t1
-            "fpmerge %%f20, %%f22, %%f24 \n\t"  // r, b, t1
-            "fpmerge %%f0,  %%f2,  %%f10 \n\t"  // 0, g, t2
-            "fpmerge %%f0,  %%f18, %%f26 \n\t"  // 0, g, t2
-            "fpmerge %%f10, %%f8,  %%f4  \n\t"  // t2, t1, msb
-            "fpmerge %%f26, %%f24, %%f20 \n\t"  // t2, t1, msb
-            "fpmerge %%f11, %%f9,  %%f6  \n\t"  // t2, t1, lsb
-            "fpmerge %%f27, %%f25, %%f22 \n\t"  // t2, t1, lsb
-            "std %%f4,  [%3]             \n\t"
-            "std %%f20, [%3 + 16]        \n\t"
-            "std %%f6,  [%3 +  8]        \n\t"
-            "std %%f22, [%3 + 24]        \n\t"
-
-            "add %0, 8, %0   \n\t"
-            "add %1, 4, %1   \n\t"
-            "add %2, 4, %2   \n\t"
-            "subcc %4, 8, %4 \n\t"
-            "bne 1b          \n\t"
-            "add %3, 32, %3  \n\t"              // delay slot
-            : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6)
-            : "0" (src[0] + (y + srcSliceY) * srcStride[0]), "1" (src[1] + ((y + srcSliceY) >> 1) * srcStride[1]),
-            "2" (src[2] + ((y + srcSliceY) >> 1) * srcStride[2]), "3" (dst[0] + (y + srcSliceY) * dstStride[0]),
-            "4" (c->dstW),
-            "5" (c->sparc_coeffs)
-            );
-
-    return srcSliceH;
-}
-
-// FIXME: must be changed to set alpha to 255 instead of 0
-static int vis_422P_ARGB32(SwsContext *c, uint8_t *src[], int srcStride[],
-                           int srcSliceY, int srcSliceH,
-                           uint8_t *dst[], int dstStride[])
-{
-    int y, out1, out2, out3, out4, out5, out6;
-
-    for (y = 0; y < srcSliceH; ++y)
-        __asm__ volatile (
-            YUV2RGB_INIT
-            "wr %%g0, 0xd2, %%asi        \n\t" /* ASI_FL16_P */
-            "1:                          \n\t"
-            "ldda [%1]     %%asi, %%f2   \n\t"
-            "ldda [%1 + 2] %%asi, %%f18  \n\t"
-            "ldda [%2]     %%asi, %%f4   \n\t"
-            "ldda [%2 + 2] %%asi, %%f20  \n\t"
-            "ld [%0],     %%f0           \n\t"
-            "ld [%0 + 4], %%f16          \n\t"
-            "fpmerge %%f3,  %%f3,  %%f2  \n\t"
-            "fpmerge %%f19, %%f19, %%f18 \n\t"
-            "fpmerge %%f5,  %%f5,  %%f4  \n\t"
-            "fpmerge %%f21, %%f21, %%f20 \n\t"
-            YUV2RGB_KERNEL
-            "fzero %%f0 \n\t"
-            "fpmerge %%f4,  %%f6,  %%f8  \n\t"  // r,b,t1
-            "fpmerge %%f20, %%f22, %%f24 \n\t"  // r,b,t1
-            "fpmerge %%f0,  %%f2,  %%f10 \n\t"  // 0,g,t2
-            "fpmerge %%f0,  %%f18, %%f26 \n\t"  // 0,g,t2
-            "fpmerge %%f10, %%f8,  %%f4  \n\t"  // t2,t1,msb
-            "fpmerge %%f26, %%f24, %%f20 \n\t"  // t2,t1,msb
-            "fpmerge %%f11, %%f9,  %%f6  \n\t"  // t2,t1,lsb
-            "fpmerge %%f27, %%f25, %%f22 \n\t"  // t2,t1,lsb
-            "std %%f4,  [%3]             \n\t"
-            "std %%f20, [%3 + 16]        \n\t"
-            "std %%f6,  [%3 + 8]         \n\t"
-            "std %%f22, [%3 + 24]        \n\t"
-
-            "add %0, 8, %0   \n\t"
-            "add %1, 4, %1   \n\t"
-            "add %2, 4, %2   \n\t"
-            "subcc %4, 8, %4 \n\t"
-            "bne 1b          \n\t"
-            "add %3, 32, %3  \n\t" //delay slot
-            : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6)
-            : "0" (src[0] + (y + srcSliceY) * srcStride[0]), "1" (src[1] + (y + srcSliceY) * srcStride[1]),
-            "2" (src[2] + (y + srcSliceY) * srcStride[2]), "3" (dst[0] + (y + srcSliceY) * dstStride[0]),
-            "4" (c->dstW),
-            "5" (c->sparc_coeffs)
-            );
-
-    return srcSliceH;
-}
-
-av_cold SwsFunc ff_yuv2rgb_init_vis(SwsContext *c)
-{
-    c->sparc_coeffs[5] = c->yCoeff;
-    c->sparc_coeffs[6] = c->vgCoeff;
-    c->sparc_coeffs[7] = c->vrCoeff;
-    c->sparc_coeffs[8] = c->ubCoeff;
-    c->sparc_coeffs[9] = c->ugCoeff;
-
-    c->sparc_coeffs[0] = (((int16_t)c->yOffset * (int16_t)c->yCoeff  >> 11) & 0xffff) * 0x0001000100010001ULL;
-    c->sparc_coeffs[1] = (((int16_t)c->uOffset * (int16_t)c->ubCoeff >> 11) & 0xffff) * 0x0001000100010001ULL;
-    c->sparc_coeffs[2] = (((int16_t)c->uOffset * (int16_t)c->ugCoeff >> 11) & 0xffff) * 0x0001000100010001ULL;
-    c->sparc_coeffs[3] = (((int16_t)c->vOffset * (int16_t)c->vgCoeff >> 11) & 0xffff) * 0x0001000100010001ULL;
-    c->sparc_coeffs[4] = (((int16_t)c->vOffset * (int16_t)c->vrCoeff >> 11) & 0xffff) * 0x0001000100010001ULL;
-
-    if (c->dstFormat == AV_PIX_FMT_RGB32 && c->srcFormat == AV_PIX_FMT_YUV422P && (c->dstW & 7) == 0) {
-        av_log(c, AV_LOG_INFO,
-               "SPARC VIS accelerated YUV422P -> RGB32 (WARNING: alpha value is wrong)\n");
-        return vis_422P_ARGB32;
-    } else if (c->dstFormat == AV_PIX_FMT_RGB32 && c->srcFormat == AV_PIX_FMT_YUV420P && (c->dstW & 7) == 0) {
-        av_log(c, AV_LOG_INFO,
-               "SPARC VIS accelerated YUV420P -> RGB32 (WARNING: alpha value is wrong)\n");
-        return vis_420P_ARGB32;
-    }
-    return NULL;
-}
diff --git a/ffmpeg/libswscale/swscale-test.c b/ffmpeg/libswscale/swscale-test.c
deleted file mode 100644
index e0b887e..0000000
--- a/ffmpeg/libswscale/swscale-test.c
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- * Copyright (C) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <inttypes.h>
-#include <stdarg.h>
-
-#undef HAVE_AV_CONFIG_H
-#include "libavutil/imgutils.h"
-#include "libavutil/mem.h"
-#include "libavutil/avutil.h"
-#include "libavutil/crc.h"
-#include "libavutil/pixdesc.h"
-#include "libavutil/lfg.h"
-#include "swscale.h"
-
-/* HACK Duplicated from swscale_internal.h.
- * Should be removed when a cleaner pixel format system exists. */
-#define isGray(x)                      \
-    ((x) == AV_PIX_FMT_GRAY8       ||     \
-     (x) == AV_PIX_FMT_Y400A       ||     \
-     (x) == AV_PIX_FMT_GRAY16BE    ||     \
-     (x) == AV_PIX_FMT_GRAY16LE)
-#define hasChroma(x)                   \
-    (!(isGray(x)                ||     \
-       (x) == AV_PIX_FMT_MONOBLACK ||     \
-       (x) == AV_PIX_FMT_MONOWHITE))
-#define isALPHA(x)                     \
-    ((x) == AV_PIX_FMT_BGR32   ||         \
-     (x) == AV_PIX_FMT_BGR32_1 ||         \
-     (x) == AV_PIX_FMT_RGB32   ||         \
-     (x) == AV_PIX_FMT_RGB32_1 ||         \
-     (x) == AV_PIX_FMT_YUVA420P)
-
-static uint64_t getSSD(const uint8_t *src1, const uint8_t *src2, int stride1,
-                       int stride2, int w, int h)
-{
-    int x, y;
-    uint64_t ssd = 0;
-
-    for (y = 0; y < h; y++) {
-        for (x = 0; x < w; x++) {
-            int d = src1[x + y * stride1] - src2[x + y * stride2];
-            ssd += d * d;
-        }
-    }
-    return ssd;
-}
-
-struct Results {
-    uint64_t ssdY;
-    uint64_t ssdU;
-    uint64_t ssdV;
-    uint64_t ssdA;
-    uint32_t crc;
-};
-
-// test by ref -> src -> dst -> out & compare out against ref
-// ref & out are YV12
-static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
-                  enum AVPixelFormat srcFormat, enum AVPixelFormat dstFormat,
-                  int srcW, int srcH, int dstW, int dstH, int flags,
-                  struct Results *r)
-{
-    const AVPixFmtDescriptor *desc_yuva420p = av_pix_fmt_desc_get(AV_PIX_FMT_YUVA420P);
-    const AVPixFmtDescriptor *desc_src      = av_pix_fmt_desc_get(srcFormat);
-    const AVPixFmtDescriptor *desc_dst      = av_pix_fmt_desc_get(dstFormat);
-    static enum AVPixelFormat cur_srcFormat;
-    static int cur_srcW, cur_srcH;
-    static uint8_t *src[4];
-    static int srcStride[4];
-    uint8_t *dst[4] = { 0 };
-    uint8_t *out[4] = { 0 };
-    int dstStride[4] = {0};
-    int i;
-    uint64_t ssdY, ssdU = 0, ssdV = 0, ssdA = 0;
-    struct SwsContext *dstContext = NULL, *outContext = NULL;
-    uint32_t crc = 0;
-    int res      = 0;
-
-    if (cur_srcFormat != srcFormat || cur_srcW != srcW || cur_srcH != srcH) {
-        struct SwsContext *srcContext = NULL;
-        int p;
-
-        for (p = 0; p < 4; p++)
-            av_freep(&src[p]);
-
-        av_image_fill_linesizes(srcStride, srcFormat, srcW);
-        for (p = 0; p < 4; p++) {
-            srcStride[p] = FFALIGN(srcStride[p], 16);
-            if (srcStride[p])
-                src[p] = av_mallocz(srcStride[p] * srcH + 16);
-            if (srcStride[p] && !src[p]) {
-                perror("Malloc");
-                res = -1;
-                goto end;
-            }
-        }
-        srcContext = sws_getContext(w, h, AV_PIX_FMT_YUVA420P, srcW, srcH,
-                                    srcFormat, SWS_BILINEAR, NULL, NULL, NULL);
-        if (!srcContext) {
-            fprintf(stderr, "Failed to get %s ---> %s\n",
-                    desc_yuva420p->name,
-                    desc_src->name);
-            res = -1;
-            goto end;
-        }
-        sws_scale(srcContext, (const uint8_t * const*)ref, refStride, 0, h, src, srcStride);
-        sws_freeContext(srcContext);
-
-        cur_srcFormat = srcFormat;
-        cur_srcW      = srcW;
-        cur_srcH      = srcH;
-    }
-
-    av_image_fill_linesizes(dstStride, dstFormat, dstW);
-    for (i = 0; i < 4; i++) {
-        /* Image buffers passed into libswscale can be allocated any way you
-         * prefer, as long as they're aligned enough for the architecture, and
-         * they're freed appropriately (such as using av_free for buffers
-         * allocated with av_malloc). */
-        /* An extra 16 bytes is being allocated because some scalers may write
-         * out of bounds. */
-        dstStride[i] = FFALIGN(dstStride[i], 16);
-        if (dstStride[i])
-            dst[i] = av_mallocz(dstStride[i] * dstH + 16);
-        if (dstStride[i] && !dst[i]) {
-            perror("Malloc");
-            res = -1;
-
-            goto end;
-        }
-    }
-
-    dstContext = sws_getContext(srcW, srcH, srcFormat, dstW, dstH, dstFormat,
-                                flags, NULL, NULL, NULL);
-    if (!dstContext) {
-        fprintf(stderr, "Failed to get %s ---> %s\n",
-                desc_src->name, desc_dst->name);
-        res = -1;
-        goto end;
-    }
-
-    printf(" %s %dx%d -> %s %3dx%3d flags=%2d",
-           desc_src->name, srcW, srcH,
-           desc_dst->name, dstW, dstH,
-           flags);
-    fflush(stdout);
-
-    sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride);
-
-    for (i = 0; i < 4 && dstStride[i]; i++)
-        crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), crc, dst[i],
-                     dstStride[i] * dstH);
-
-    if (r && crc == r->crc) {
-        ssdY = r->ssdY;
-        ssdU = r->ssdU;
-        ssdV = r->ssdV;
-        ssdA = r->ssdA;
-    } else {
-        for (i = 0; i < 4; i++) {
-            refStride[i] = FFALIGN(refStride[i], 16);
-            if (refStride[i])
-                out[i] = av_mallocz(refStride[i] * h);
-            if (refStride[i] && !out[i]) {
-                perror("Malloc");
-                res = -1;
-                goto end;
-            }
-        }
-        outContext = sws_getContext(dstW, dstH, dstFormat, w, h,
-                                    AV_PIX_FMT_YUVA420P, SWS_BILINEAR,
-                                    NULL, NULL, NULL);
-        if (!outContext) {
-            fprintf(stderr, "Failed to get %s ---> %s\n",
-                    desc_dst->name,
-                    desc_yuva420p->name);
-            res = -1;
-            goto end;
-        }
-        sws_scale(outContext, (const uint8_t * const*)dst, dstStride, 0, dstH, out, refStride);
-
-        ssdY = getSSD(ref[0], out[0], refStride[0], refStride[0], w, h);
-        if (hasChroma(srcFormat) && hasChroma(dstFormat)) {
-            //FIXME check that output is really gray
-            ssdU = getSSD(ref[1], out[1], refStride[1], refStride[1],
-                          (w + 1) >> 1, (h + 1) >> 1);
-            ssdV = getSSD(ref[2], out[2], refStride[2], refStride[2],
-                          (w + 1) >> 1, (h + 1) >> 1);
-        }
-        if (isALPHA(srcFormat) && isALPHA(dstFormat))
-            ssdA = getSSD(ref[3], out[3], refStride[3], refStride[3], w, h);
-
-        ssdY /= w * h;
-        ssdU /= w * h / 4;
-        ssdV /= w * h / 4;
-        ssdA /= w * h;
-
-        sws_freeContext(outContext);
-
-        for (i = 0; i < 4; i++)
-            if (refStride[i])
-                av_free(out[i]);
-    }
-
-    printf(" CRC=%08x SSD=%5"PRId64 ",%5"PRId64 ",%5"PRId64 ",%5"PRId64 "\n",
-           crc, ssdY, ssdU, ssdV, ssdA);
-
-end:
-    sws_freeContext(dstContext);
-
-    for (i = 0; i < 4; i++)
-        if (dstStride[i])
-            av_free(dst[i]);
-
-    return res;
-}
-
-static void selfTest(uint8_t *ref[4], int refStride[4], int w, int h,
-                     enum AVPixelFormat srcFormat_in,
-                     enum AVPixelFormat dstFormat_in)
-{
-    const int flags[] = { SWS_FAST_BILINEAR, SWS_BILINEAR, SWS_BICUBIC,
-                          SWS_X, SWS_POINT, SWS_AREA, 0 };
-    const int srcW   = w;
-    const int srcH   = h;
-    const int dstW[] = { srcW - srcW / 3, srcW, srcW + srcW / 3, 0 };
-    const int dstH[] = { srcH - srcH / 3, srcH, srcH + srcH / 3, 0 };
-    enum AVPixelFormat srcFormat, dstFormat;
-    const AVPixFmtDescriptor *desc_src, *desc_dst;
-
-    for (srcFormat = srcFormat_in != AV_PIX_FMT_NONE ? srcFormat_in : 0;
-         srcFormat < AV_PIX_FMT_NB; srcFormat++) {
-        if (!sws_isSupportedInput(srcFormat) ||
-            !sws_isSupportedOutput(srcFormat))
-            continue;
-
-        desc_src = av_pix_fmt_desc_get(srcFormat);
-
-        for (dstFormat = dstFormat_in != AV_PIX_FMT_NONE ? dstFormat_in : 0;
-             dstFormat < AV_PIX_FMT_NB; dstFormat++) {
-            int i, j, k;
-            int res = 0;
-
-            if (!sws_isSupportedInput(dstFormat) ||
-                !sws_isSupportedOutput(dstFormat))
-                continue;
-
-            desc_dst = av_pix_fmt_desc_get(dstFormat);
-
-            printf("%s -> %s\n", desc_src->name, desc_dst->name);
-            fflush(stdout);
-
-            for (k = 0; flags[k] && !res; k++)
-                for (i = 0; dstW[i] && !res; i++)
-                    for (j = 0; dstH[j] && !res; j++)
-                        res = doTest(ref, refStride, w, h,
-                                     srcFormat, dstFormat,
-                                     srcW, srcH, dstW[i], dstH[j], flags[k],
-                                     NULL);
-            if (dstFormat_in != AV_PIX_FMT_NONE)
-                break;
-        }
-        if (srcFormat_in != AV_PIX_FMT_NONE)
-            break;
-    }
-}
-
-static int fileTest(uint8_t *ref[4], int refStride[4], int w, int h, FILE *fp,
-                    enum AVPixelFormat srcFormat_in,
-                    enum AVPixelFormat dstFormat_in)
-{
-    char buf[256];
-
-    while (fgets(buf, sizeof(buf), fp)) {
-        struct Results r;
-        enum AVPixelFormat srcFormat;
-        char srcStr[12];
-        int srcW, srcH;
-        enum AVPixelFormat dstFormat;
-        char dstStr[12];
-        int dstW, dstH;
-        int flags;
-        int ret;
-
-        ret = sscanf(buf,
-                     " %12s %dx%d -> %12s %dx%d flags=%d CRC=%x"
-                     " SSD=%"SCNd64 ", %"SCNd64 ", %"SCNd64 ", %"SCNd64 "\n",
-                     srcStr, &srcW, &srcH, dstStr, &dstW, &dstH,
-                     &flags, &r.crc, &r.ssdY, &r.ssdU, &r.ssdV, &r.ssdA);
-        if (ret != 12) {
-            srcStr[0] = dstStr[0] = 0;
-            ret       = sscanf(buf, "%12s -> %12s\n", srcStr, dstStr);
-        }
-
-        srcFormat = av_get_pix_fmt(srcStr);
-        dstFormat = av_get_pix_fmt(dstStr);
-
-        if (srcFormat == AV_PIX_FMT_NONE || dstFormat == AV_PIX_FMT_NONE ||
-            srcW > 8192U || srcH > 8192U || dstW > 8192U || dstH > 8192U) {
-            fprintf(stderr, "malformed input file\n");
-            return -1;
-        }
-        if ((srcFormat_in != AV_PIX_FMT_NONE && srcFormat_in != srcFormat) ||
-            (dstFormat_in != AV_PIX_FMT_NONE && dstFormat_in != dstFormat))
-            continue;
-        if (ret != 12) {
-            printf("%s", buf);
-            continue;
-        }
-
-        doTest(ref, refStride, w, h,
-               srcFormat, dstFormat,
-               srcW, srcH, dstW, dstH, flags,
-               &r);
-    }
-
-    return 0;
-}
-
-#define W 96
-#define H 96
-
-int main(int argc, char **argv)
-{
-    enum AVPixelFormat srcFormat = AV_PIX_FMT_NONE;
-    enum AVPixelFormat dstFormat = AV_PIX_FMT_NONE;
-    uint8_t *rgb_data   = av_malloc(W * H * 4);
-    const uint8_t * const rgb_src[4] = { rgb_data, NULL, NULL, NULL };
-    int rgb_stride[4]   = { 4 * W, 0, 0, 0 };
-    uint8_t *data       = av_malloc(4 * W * H);
-    uint8_t *src[4]     = { data, data + W * H, data + W * H * 2, data + W * H * 3 };
-    int stride[4]       = { W, W, W, W };
-    int x, y;
-    struct SwsContext *sws;
-    AVLFG rand;
-    int res = -1;
-    int i;
-    FILE *fp = NULL;
-
-    if (!rgb_data || !data)
-        return -1;
-
-    for (i = 1; i < argc; i += 2) {
-        if (argv[i][0] != '-' || i + 1 == argc)
-            goto bad_option;
-        if (!strcmp(argv[i], "-ref")) {
-            fp = fopen(argv[i + 1], "r");
-            if (!fp) {
-                fprintf(stderr, "could not open '%s'\n", argv[i + 1]);
-                goto error;
-            }
-        } else if (!strcmp(argv[i], "-src")) {
-            srcFormat = av_get_pix_fmt(argv[i + 1]);
-            if (srcFormat == AV_PIX_FMT_NONE) {
-                fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]);
-                return -1;
-            }
-        } else if (!strcmp(argv[i], "-dst")) {
-            dstFormat = av_get_pix_fmt(argv[i + 1]);
-            if (dstFormat == AV_PIX_FMT_NONE) {
-                fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]);
-                return -1;
-            }
-        } else {
-bad_option:
-            fprintf(stderr, "bad option or argument missing (%s)\n", argv[i]);
-            goto error;
-        }
-    }
-
-    sws = sws_getContext(W / 12, H / 12, AV_PIX_FMT_RGB32, W, H,
-                         AV_PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL);
-
-    av_lfg_init(&rand, 1);
-
-    for (y = 0; y < H; y++)
-        for (x = 0; x < W * 4; x++)
-            rgb_data[ x + y * 4 * W] = av_lfg_get(&rand);
-    sws_scale(sws, rgb_src, rgb_stride, 0, H, src, stride);
-    sws_freeContext(sws);
-    av_free(rgb_data);
-
-    if(fp) {
-        res = fileTest(src, stride, W, H, fp, srcFormat, dstFormat);
-        fclose(fp);
-    } else {
-        selfTest(src, stride, W, H, srcFormat, dstFormat);
-        res = 0;
-    }
-error:
-    av_free(data);
-
-    return res;
-}
diff --git a/ffmpeg/libswscale/swscale.c b/ffmpeg/libswscale/swscale.c
deleted file mode 100644
index 710dce3..0000000
--- a/ffmpeg/libswscale/swscale.c
+++ /dev/null
@@ -1,1114 +0,0 @@
-/*
- * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <inttypes.h>
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "libavutil/avassert.h"
-#include "libavutil/avutil.h"
-#include "libavutil/bswap.h"
-#include "libavutil/cpu.h"
-#include "libavutil/intreadwrite.h"
-#include "libavutil/mathematics.h"
-#include "libavutil/pixdesc.h"
-#include "config.h"
-#include "rgb2rgb.h"
-#include "swscale_internal.h"
-#include "swscale.h"
-
-DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_128)[9][8] = {
-    {  36, 68,  60, 92,  34, 66,  58, 90, },
-    { 100,  4, 124, 28,  98,  2, 122, 26, },
-    {  52, 84,  44, 76,  50, 82,  42, 74, },
-    { 116, 20, 108, 12, 114, 18, 106, 10, },
-    {  32, 64,  56, 88,  38, 70,  62, 94, },
-    {  96,  0, 120, 24, 102,  6, 126, 30, },
-    {  48, 80,  40, 72,  54, 86,  46, 78, },
-    { 112, 16, 104,  8, 118, 22, 110, 14, },
-    {  36, 68,  60, 92,  34, 66,  58, 90, },
-};
-
-DECLARE_ALIGNED(8, static const uint8_t, sws_pb_64)[8] = {
-    64, 64, 64, 64, 64, 64, 64, 64
-};
-
-static av_always_inline void fillPlane(uint8_t *plane, int stride, int width,
-                                       int height, int y, uint8_t val)
-{
-    int i;
-    uint8_t *ptr = plane + stride * y;
-    for (i = 0; i < height; i++) {
-        memset(ptr, val, width);
-        ptr += stride;
-    }
-}
-
-static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW,
-                           const uint8_t *_src, const int16_t *filter,
-                           const int32_t *filterPos, int filterSize)
-{
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
-    int i;
-    int32_t *dst        = (int32_t *) _dst;
-    const uint16_t *src = (const uint16_t *) _src;
-    int bits            = desc->comp[0].depth_minus1;
-    int sh              = bits - 4;
-
-    if((isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8) && desc->comp[0].depth_minus1<15)
-        sh= 9;
-
-    for (i = 0; i < dstW; i++) {
-        int j;
-        int srcPos = filterPos[i];
-        int val    = 0;
-
-        for (j = 0; j < filterSize; j++) {
-            val += src[srcPos + j] * filter[filterSize * i + j];
-        }
-        // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
-        dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
-    }
-}
-
-static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW,
-                           const uint8_t *_src, const int16_t *filter,
-                           const int32_t *filterPos, int filterSize)
-{
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
-    int i;
-    const uint16_t *src = (const uint16_t *) _src;
-    int sh              = desc->comp[0].depth_minus1;
-
-    if(sh<15)
-        sh= isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8 ? 13 : desc->comp[0].depth_minus1;
-
-    for (i = 0; i < dstW; i++) {
-        int j;
-        int srcPos = filterPos[i];
-        int val    = 0;
-
-        for (j = 0; j < filterSize; j++) {
-            val += src[srcPos + j] * filter[filterSize * i + j];
-        }
-        // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
-        dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
-    }
-}
-
-// bilinear / bicubic scaling
-static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW,
-                          const uint8_t *src, const int16_t *filter,
-                          const int32_t *filterPos, int filterSize)
-{
-    int i;
-    for (i = 0; i < dstW; i++) {
-        int j;
-        int srcPos = filterPos[i];
-        int val    = 0;
-        for (j = 0; j < filterSize; j++) {
-            val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
-        }
-        dst[i] = FFMIN(val >> 7, (1 << 15) - 1); // the cubic equation does overflow ...
-    }
-}
-
-static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW,
-                          const uint8_t *src, const int16_t *filter,
-                          const int32_t *filterPos, int filterSize)
-{
-    int i;
-    int32_t *dst = (int32_t *) _dst;
-    for (i = 0; i < dstW; i++) {
-        int j;
-        int srcPos = filterPos[i];
-        int val    = 0;
-        for (j = 0; j < filterSize; j++) {
-            val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
-        }
-        dst[i] = FFMIN(val >> 3, (1 << 19) - 1); // the cubic equation does overflow ...
-    }
-}
-
-// FIXME all pal and rgb srcFormats could do this conversion as well
-// FIXME all scalers more complex than bilinear could do half of this transform
-static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        dstU[i] = (FFMIN(dstU[i], 30775) * 4663 - 9289992) >> 12; // -264
-        dstV[i] = (FFMIN(dstV[i], 30775) * 4663 - 9289992) >> 12; // -264
-    }
-}
-
-static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        dstU[i] = (dstU[i] * 1799 + 4081085) >> 11; // 1469
-        dstV[i] = (dstV[i] * 1799 + 4081085) >> 11; // 1469
-    }
-}
-
-static void lumRangeToJpeg_c(int16_t *dst, int width)
-{
-    int i;
-    for (i = 0; i < width; i++)
-        dst[i] = (FFMIN(dst[i], 30189) * 19077 - 39057361) >> 14;
-}
-
-static void lumRangeFromJpeg_c(int16_t *dst, int width)
-{
-    int i;
-    for (i = 0; i < width; i++)
-        dst[i] = (dst[i] * 14071 + 33561947) >> 14;
-}
-
-static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
-{
-    int i;
-    int32_t *dstU = (int32_t *) _dstU;
-    int32_t *dstV = (int32_t *) _dstV;
-    for (i = 0; i < width; i++) {
-        dstU[i] = (FFMIN(dstU[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264
-        dstV[i] = (FFMIN(dstV[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264
-    }
-}
-
-static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
-{
-    int i;
-    int32_t *dstU = (int32_t *) _dstU;
-    int32_t *dstV = (int32_t *) _dstV;
-    for (i = 0; i < width; i++) {
-        dstU[i] = (dstU[i] * 1799 + (4081085 << 4)) >> 11; // 1469
-        dstV[i] = (dstV[i] * 1799 + (4081085 << 4)) >> 11; // 1469
-    }
-}
-
-static void lumRangeToJpeg16_c(int16_t *_dst, int width)
-{
-    int i;
-    int32_t *dst = (int32_t *) _dst;
-    for (i = 0; i < width; i++)
-        dst[i] = (FFMIN(dst[i], 30189 << 4) * 4769 - (39057361 << 2)) >> 12;
-}
-
-static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
-{
-    int i;
-    int32_t *dst = (int32_t *) _dst;
-    for (i = 0; i < width; i++)
-        dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
-}
-
-static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
-                           const uint8_t *src, int srcW, int xInc)
-{
-    int i;
-    unsigned int xpos = 0;
-    for (i = 0; i < dstWidth; i++) {
-        register unsigned int xx     = xpos >> 16;
-        register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
-        dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha;
-        xpos  += xInc;
-    }
-    for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
-        dst[i] = src[srcW-1]*128;
-}
-
-// *** horizontal scale Y line to temp buffer
-static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
-                                     const uint8_t *src_in[4],
-                                     int srcW, int xInc,
-                                     const int16_t *hLumFilter,
-                                     const int32_t *hLumFilterPos,
-                                     int hLumFilterSize,
-                                     uint8_t *formatConvBuffer,
-                                     uint32_t *pal, int isAlpha)
-{
-    void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) =
-        isAlpha ? c->alpToYV12 : c->lumToYV12;
-    void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
-    const uint8_t *src = src_in[isAlpha ? 3 : 0];
-
-    if (toYV12) {
-        toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal);
-        src = formatConvBuffer;
-    } else if (c->readLumPlanar && !isAlpha) {
-        c->readLumPlanar(formatConvBuffer, src_in, srcW, c->input_rgb2yuv_table);
-        src = formatConvBuffer;
-    } else if (c->readAlpPlanar && isAlpha) {
-        c->readAlpPlanar(formatConvBuffer, src_in, srcW, NULL);
-        src = formatConvBuffer;
-    }
-
-    if (!c->hyscale_fast) {
-        c->hyScale(c, dst, dstWidth, src, hLumFilter,
-                   hLumFilterPos, hLumFilterSize);
-    } else { // fast bilinear upscale / crap downscale
-        c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
-    }
-
-    if (convertRange)
-        convertRange(dst, dstWidth);
-}
-
-static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
-                           int dstWidth, const uint8_t *src1,
-                           const uint8_t *src2, int srcW, int xInc)
-{
-    int i;
-    unsigned int xpos = 0;
-    for (i = 0; i < dstWidth; i++) {
-        register unsigned int xx     = xpos >> 16;
-        register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
-        dst1[i] = (src1[xx] * (xalpha ^ 127) + src1[xx + 1] * xalpha);
-        dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha);
-        xpos   += xInc;
-    }
-    for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
-        dst1[i] = src1[srcW-1]*128;
-        dst2[i] = src2[srcW-1]*128;
-    }
-}
-
-static av_always_inline void hcscale(SwsContext *c, int16_t *dst1,
-                                     int16_t *dst2, int dstWidth,
-                                     const uint8_t *src_in[4],
-                                     int srcW, int xInc,
-                                     const int16_t *hChrFilter,
-                                     const int32_t *hChrFilterPos,
-                                     int hChrFilterSize,
-                                     uint8_t *formatConvBuffer, uint32_t *pal)
-{
-    const uint8_t *src1 = src_in[1], *src2 = src_in[2];
-    if (c->chrToYV12) {
-        uint8_t *buf2 = formatConvBuffer +
-                        FFALIGN(srcW*2+78, 16);
-        c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal);
-        src1= formatConvBuffer;
-        src2= buf2;
-    } else if (c->readChrPlanar) {
-        uint8_t *buf2 = formatConvBuffer +
-                        FFALIGN(srcW*2+78, 16);
-        c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW, c->input_rgb2yuv_table);
-        src1 = formatConvBuffer;
-        src2 = buf2;
-    }
-
-    if (!c->hcscale_fast) {
-        c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
-        c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
-    } else { // fast bilinear upscale / crap downscale
-        c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
-    }
-
-    if (c->chrConvertRange)
-        c->chrConvertRange(dst1, dst2, dstWidth);
-}
-
-#define DEBUG_SWSCALE_BUFFERS 0
-#define DEBUG_BUFFERS(...)                      \
-    if (DEBUG_SWSCALE_BUFFERS)                  \
-        av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
-
-static int swscale(SwsContext *c, const uint8_t *src[],
-                   int srcStride[], int srcSliceY,
-                   int srcSliceH, uint8_t *dst[], int dstStride[])
-{
-    /* load a few things into local vars to make the code more readable?
-     * and faster */
-    const int srcW                   = c->srcW;
-    const int dstW                   = c->dstW;
-    const int dstH                   = c->dstH;
-    const int chrDstW                = c->chrDstW;
-    const int chrSrcW                = c->chrSrcW;
-    const int lumXInc                = c->lumXInc;
-    const int chrXInc                = c->chrXInc;
-    const enum AVPixelFormat dstFormat = c->dstFormat;
-    const int flags                  = c->flags;
-    int32_t *vLumFilterPos           = c->vLumFilterPos;
-    int32_t *vChrFilterPos           = c->vChrFilterPos;
-    int32_t *hLumFilterPos           = c->hLumFilterPos;
-    int32_t *hChrFilterPos           = c->hChrFilterPos;
-    int16_t *hLumFilter              = c->hLumFilter;
-    int16_t *hChrFilter              = c->hChrFilter;
-    int32_t *lumMmxFilter            = c->lumMmxFilter;
-    int32_t *chrMmxFilter            = c->chrMmxFilter;
-    const int vLumFilterSize         = c->vLumFilterSize;
-    const int vChrFilterSize         = c->vChrFilterSize;
-    const int hLumFilterSize         = c->hLumFilterSize;
-    const int hChrFilterSize         = c->hChrFilterSize;
-    int16_t **lumPixBuf              = c->lumPixBuf;
-    int16_t **chrUPixBuf             = c->chrUPixBuf;
-    int16_t **chrVPixBuf             = c->chrVPixBuf;
-    int16_t **alpPixBuf              = c->alpPixBuf;
-    const int vLumBufSize            = c->vLumBufSize;
-    const int vChrBufSize            = c->vChrBufSize;
-    uint8_t *formatConvBuffer        = c->formatConvBuffer;
-    uint32_t *pal                    = c->pal_yuv;
-    yuv2planar1_fn yuv2plane1        = c->yuv2plane1;
-    yuv2planarX_fn yuv2planeX        = c->yuv2planeX;
-    yuv2interleavedX_fn yuv2nv12cX   = c->yuv2nv12cX;
-    yuv2packed1_fn yuv2packed1       = c->yuv2packed1;
-    yuv2packed2_fn yuv2packed2       = c->yuv2packed2;
-    yuv2packedX_fn yuv2packedX       = c->yuv2packedX;
-    yuv2anyX_fn yuv2anyX             = c->yuv2anyX;
-    const int chrSrcSliceY           =                srcSliceY >> c->chrSrcVSubSample;
-    const int chrSrcSliceH           = FF_CEIL_RSHIFT(srcSliceH,   c->chrSrcVSubSample);
-    int should_dither                = is9_OR_10BPS(c->srcFormat) ||
-                                       is16BPS(c->srcFormat);
-    int lastDstY;
-
-    /* vars which will change and which we need to store back in the context */
-    int dstY         = c->dstY;
-    int lumBufIndex  = c->lumBufIndex;
-    int chrBufIndex  = c->chrBufIndex;
-    int lastInLumBuf = c->lastInLumBuf;
-    int lastInChrBuf = c->lastInChrBuf;
-
-    if (!usePal(c->srcFormat)) {
-        pal = c->input_rgb2yuv_table;
-    }
-
-    if (isPacked(c->srcFormat)) {
-        src[0] =
-        src[1] =
-        src[2] =
-        src[3] = src[0];
-        srcStride[0] =
-        srcStride[1] =
-        srcStride[2] =
-        srcStride[3] = srcStride[0];
-    }
-    srcStride[1] <<= c->vChrDrop;
-    srcStride[2] <<= c->vChrDrop;
-
-    DEBUG_BUFFERS("swscale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
-                  src[0], srcStride[0], src[1], srcStride[1],
-                  src[2], srcStride[2], src[3], srcStride[3],
-                  dst[0], dstStride[0], dst[1], dstStride[1],
-                  dst[2], dstStride[2], dst[3], dstStride[3]);
-    DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
-                  srcSliceY, srcSliceH, dstY, dstH);
-    DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
-                  vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
-
-    if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 ||
-        dstStride[2]%16 !=0 || dstStride[3]%16 != 0) {
-        static int warnedAlready = 0; // FIXME maybe move this into the context
-        if (flags & SWS_PRINT_INFO && !warnedAlready) {
-            av_log(c, AV_LOG_WARNING,
-                   "Warning: dstStride is not aligned!\n"
-                   "         ->cannot do aligned memory accesses anymore\n");
-            warnedAlready = 1;
-        }
-    }
-
-    if (   (uintptr_t)dst[0]%16 || (uintptr_t)dst[1]%16 || (uintptr_t)dst[2]%16
-        || (uintptr_t)src[0]%16 || (uintptr_t)src[1]%16 || (uintptr_t)src[2]%16
-        || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16
-        || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16
-    ) {
-        static int warnedAlready=0;
-        int cpu_flags = av_get_cpu_flags();
-        if (HAVE_MMXEXT && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
-            av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n");
-            warnedAlready=1;
-        }
-    }
-
-    /* Note the user might start scaling the picture in the middle so this
-     * will not get executed. This is not really intended but works
-     * currently, so people might do it. */
-    if (srcSliceY == 0) {
-        lumBufIndex  = -1;
-        chrBufIndex  = -1;
-        dstY         = 0;
-        lastInLumBuf = -1;
-        lastInChrBuf = -1;
-    }
-
-    if (!should_dither) {
-        c->chrDither8 = c->lumDither8 = sws_pb_64;
-    }
-    lastDstY = dstY;
-
-    for (; dstY < dstH; dstY++) {
-        const int chrDstY = dstY >> c->chrDstVSubSample;
-        uint8_t *dest[4]  = {
-            dst[0] + dstStride[0] * dstY,
-            dst[1] + dstStride[1] * chrDstY,
-            dst[2] + dstStride[2] * chrDstY,
-            (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
-        };
-        int use_mmx_vfilter= c->use_mmx_vfilter;
-
-        // First line needed as input
-        const int firstLumSrcY  = FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]);
-        const int firstLumSrcY2 = FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1 << c->chrDstVSubSample) - 1), dstH - 1)]);
-        // First line needed as input
-        const int firstChrSrcY  = FFMAX(1 - vChrFilterSize, vChrFilterPos[chrDstY]);
-
-        // Last line needed as input
-        int lastLumSrcY  = FFMIN(c->srcH,    firstLumSrcY  + vLumFilterSize) - 1;
-        int lastLumSrcY2 = FFMIN(c->srcH,    firstLumSrcY2 + vLumFilterSize) - 1;
-        int lastChrSrcY  = FFMIN(c->chrSrcH, firstChrSrcY  + vChrFilterSize) - 1;
-        int enough_lines;
-
-        // handle holes (FAST_BILINEAR & weird filters)
-        if (firstLumSrcY > lastInLumBuf)
-            lastInLumBuf = firstLumSrcY - 1;
-        if (firstChrSrcY > lastInChrBuf)
-            lastInChrBuf = firstChrSrcY - 1;
-        av_assert0(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
-        av_assert0(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
-
-        DEBUG_BUFFERS("dstY: %d\n", dstY);
-        DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
-                      firstLumSrcY, lastLumSrcY, lastInLumBuf);
-        DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
-                      firstChrSrcY, lastChrSrcY, lastInChrBuf);
-
-        // Do we have enough lines in this slice to output the dstY line
-        enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH &&
-                       lastChrSrcY < FF_CEIL_RSHIFT(srcSliceY + srcSliceH, c->chrSrcVSubSample);
-
-        if (!enough_lines) {
-            lastLumSrcY = srcSliceY + srcSliceH - 1;
-            lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
-            DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
-                          lastLumSrcY, lastChrSrcY);
-        }
-
-        // Do horizontal scaling
-        while (lastInLumBuf < lastLumSrcY) {
-            const uint8_t *src1[4] = {
-                src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
-                src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
-                src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
-                src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
-            };
-            lumBufIndex++;
-            av_assert0(lumBufIndex < 2 * vLumBufSize);
-            av_assert0(lastInLumBuf + 1 - srcSliceY < srcSliceH);
-            av_assert0(lastInLumBuf + 1 - srcSliceY >= 0);
-            hyscale(c, lumPixBuf[lumBufIndex], dstW, src1, srcW, lumXInc,
-                    hLumFilter, hLumFilterPos, hLumFilterSize,
-                    formatConvBuffer, pal, 0);
-            if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
-                hyscale(c, alpPixBuf[lumBufIndex], dstW, src1, srcW,
-                        lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
-                        formatConvBuffer, pal, 1);
-            lastInLumBuf++;
-            DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
-                          lumBufIndex, lastInLumBuf);
-        }
-        while (lastInChrBuf < lastChrSrcY) {
-            const uint8_t *src1[4] = {
-                src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
-                src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
-                src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
-                src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
-            };
-            chrBufIndex++;
-            av_assert0(chrBufIndex < 2 * vChrBufSize);
-            av_assert0(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
-            av_assert0(lastInChrBuf + 1 - chrSrcSliceY >= 0);
-            // FIXME replace parameters through context struct (some at least)
-
-            if (c->needs_hcscale)
-                hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
-                        chrDstW, src1, chrSrcW, chrXInc,
-                        hChrFilter, hChrFilterPos, hChrFilterSize,
-                        formatConvBuffer, pal);
-            lastInChrBuf++;
-            DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
-                          chrBufIndex, lastInChrBuf);
-        }
-        // wrap buf index around to stay inside the ring buffer
-        if (lumBufIndex >= vLumBufSize)
-            lumBufIndex -= vLumBufSize;
-        if (chrBufIndex >= vChrBufSize)
-            chrBufIndex -= vChrBufSize;
-        if (!enough_lines)
-            break;  // we can't output a dstY line so let's try with the next slice
-
-#if HAVE_MMX_INLINE
-        updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex,
-                              lastInLumBuf, lastInChrBuf);
-#endif
-        if (should_dither) {
-            c->chrDither8 = ff_dither_8x8_128[chrDstY & 7];
-            c->lumDither8 = ff_dither_8x8_128[dstY    & 7];
-        }
-        if (dstY >= dstH - 2) {
-            /* hmm looks like we can't use MMX here without overwriting
-             * this array's tail */
-            ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
-                                     &yuv2packed1, &yuv2packed2, &yuv2packedX, &yuv2anyX);
-            use_mmx_vfilter= 0;
-        }
-
-        {
-            const int16_t **lumSrcPtr  = (const int16_t **)(void*) lumPixBuf  + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
-            const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-            const int16_t **chrVSrcPtr = (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-            const int16_t **alpSrcPtr  = (CONFIG_SWSCALE_ALPHA && alpPixBuf) ?
-                                         (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
-            int16_t *vLumFilter = c->vLumFilter;
-            int16_t *vChrFilter = c->vChrFilter;
-
-            if (isPlanarYUV(dstFormat) ||
-                (isGray(dstFormat) && !isALPHA(dstFormat))) { // YV12 like
-                const int chrSkipMask = (1 << c->chrDstVSubSample) - 1;
-
-                vLumFilter +=    dstY * vLumFilterSize;
-                vChrFilter += chrDstY * vChrFilterSize;
-
-//                 av_assert0(use_mmx_vfilter != (
-//                                yuv2planeX == yuv2planeX_10BE_c
-//                             || yuv2planeX == yuv2planeX_10LE_c
-//                             || yuv2planeX == yuv2planeX_9BE_c
-//                             || yuv2planeX == yuv2planeX_9LE_c
-//                             || yuv2planeX == yuv2planeX_16BE_c
-//                             || yuv2planeX == yuv2planeX_16LE_c
-//                             || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86);
-
-                if(use_mmx_vfilter){
-                    vLumFilter= (int16_t *)c->lumMmxFilter;
-                    vChrFilter= (int16_t *)c->chrMmxFilter;
-                }
-
-                if (vLumFilterSize == 1) {
-                    yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
-                } else {
-                    yuv2planeX(vLumFilter, vLumFilterSize,
-                               lumSrcPtr, dest[0],
-                               dstW, c->lumDither8, 0);
-                }
-
-                if (!((dstY & chrSkipMask) || isGray(dstFormat))) {
-                    if (yuv2nv12cX) {
-                        yuv2nv12cX(c, vChrFilter,
-                                   vChrFilterSize, chrUSrcPtr, chrVSrcPtr,
-                                   dest[1], chrDstW);
-                    } else if (vChrFilterSize == 1) {
-                        yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
-                        yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
-                    } else {
-                        yuv2planeX(vChrFilter,
-                                   vChrFilterSize, chrUSrcPtr, dest[1],
-                                   chrDstW, c->chrDither8, 0);
-                        yuv2planeX(vChrFilter,
-                                   vChrFilterSize, chrVSrcPtr, dest[2],
-                                   chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3);
-                    }
-                }
-
-                if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
-                    if(use_mmx_vfilter){
-                        vLumFilter= (int16_t *)c->alpMmxFilter;
-                    }
-                    if (vLumFilterSize == 1) {
-                        yuv2plane1(alpSrcPtr[0], dest[3], dstW,
-                                   c->lumDither8, 0);
-                    } else {
-                        yuv2planeX(vLumFilter,
-                                   vLumFilterSize, alpSrcPtr, dest[3],
-                                   dstW, c->lumDither8, 0);
-                    }
-                }
-            } else if (yuv2packedX) {
-                av_assert1(lumSrcPtr  + vLumFilterSize - 1 < (const int16_t **)lumPixBuf  + vLumBufSize * 2);
-                av_assert1(chrUSrcPtr + vChrFilterSize - 1 < (const int16_t **)chrUPixBuf + vChrBufSize * 2);
-                if (c->yuv2packed1 && vLumFilterSize == 1 &&
-                    vChrFilterSize <= 2) { // unscaled RGB
-                    int chrAlpha = vChrFilterSize == 1 ? 0 : vChrFilter[2 * dstY + 1];
-                    yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
-                                alpPixBuf ? *alpSrcPtr : NULL,
-                                dest[0], dstW, chrAlpha, dstY);
-                } else if (c->yuv2packed2 && vLumFilterSize == 2 &&
-                           vChrFilterSize == 2) { // bilinear upscale RGB
-                    int lumAlpha = vLumFilter[2 * dstY + 1];
-                    int chrAlpha = vChrFilter[2 * dstY + 1];
-                    lumMmxFilter[2] =
-                    lumMmxFilter[3] = vLumFilter[2 * dstY]    * 0x10001;
-                    chrMmxFilter[2] =
-                    chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
-                    yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
-                                alpPixBuf ? alpSrcPtr : NULL,
-                                dest[0], dstW, lumAlpha, chrAlpha, dstY);
-                } else { // general RGB
-                    yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
-                                lumSrcPtr, vLumFilterSize,
-                                vChrFilter + dstY * vChrFilterSize,
-                                chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                alpSrcPtr, dest[0], dstW, dstY);
-                }
-            } else {
-                av_assert1(!yuv2packed1 && !yuv2packed2);
-                yuv2anyX(c, vLumFilter + dstY * vLumFilterSize,
-                         lumSrcPtr, vLumFilterSize,
-                         vChrFilter + dstY * vChrFilterSize,
-                         chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                         alpSrcPtr, dest, dstW, dstY);
-            }
-        }
-    }
-    if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf) {
-        int length = dstW;
-        int height = dstY - lastDstY;
-
-        if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
-            const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
-            fillPlane16(dst[3], dstStride[3], length, height, lastDstY,
-                    1, desc->comp[3].depth_minus1,
-                    isBE(dstFormat));
-        } else
-            fillPlane(dst[3], dstStride[3], length, height, lastDstY, 255);
-    }
-
-#if HAVE_MMXEXT_INLINE
-    if (av_get_cpu_flags() & AV_CPU_FLAG_MMXEXT)
-        __asm__ volatile ("sfence" ::: "memory");
-#endif
-    emms_c();
-
-    /* store changed local vars back in the context */
-    c->dstY         = dstY;
-    c->lumBufIndex  = lumBufIndex;
-    c->chrBufIndex  = chrBufIndex;
-    c->lastInLumBuf = lastInLumBuf;
-    c->lastInChrBuf = lastInChrBuf;
-
-    return dstY - lastDstY;
-}
-
-static av_cold void sws_init_swscale(SwsContext *c)
-{
-    enum AVPixelFormat srcFormat = c->srcFormat;
-
-    ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
-                             &c->yuv2nv12cX, &c->yuv2packed1,
-                             &c->yuv2packed2, &c->yuv2packedX, &c->yuv2anyX);
-
-    ff_sws_init_input_funcs(c);
-
-
-    if (c->srcBpc == 8) {
-        if (c->dstBpc <= 14) {
-            c->hyScale = c->hcScale = hScale8To15_c;
-            if (c->flags & SWS_FAST_BILINEAR) {
-                c->hyscale_fast = hyscale_fast_c;
-                c->hcscale_fast = hcscale_fast_c;
-            }
-        } else {
-            c->hyScale = c->hcScale = hScale8To19_c;
-        }
-    } else {
-        c->hyScale = c->hcScale = c->dstBpc > 14 ? hScale16To19_c
-                                                 : hScale16To15_c;
-    }
-
-    if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
-        if (c->dstBpc <= 14) {
-            if (c->srcRange) {
-                c->lumConvertRange = lumRangeFromJpeg_c;
-                c->chrConvertRange = chrRangeFromJpeg_c;
-            } else {
-                c->lumConvertRange = lumRangeToJpeg_c;
-                c->chrConvertRange = chrRangeToJpeg_c;
-            }
-        } else {
-            if (c->srcRange) {
-                c->lumConvertRange = lumRangeFromJpeg16_c;
-                c->chrConvertRange = chrRangeFromJpeg16_c;
-            } else {
-                c->lumConvertRange = lumRangeToJpeg16_c;
-                c->chrConvertRange = chrRangeToJpeg16_c;
-            }
-        }
-    }
-
-    if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
-          srcFormat == AV_PIX_FMT_MONOBLACK || srcFormat == AV_PIX_FMT_MONOWHITE))
-        c->needs_hcscale = 1;
-}
-
-SwsFunc ff_getSwsFunc(SwsContext *c)
-{
-    sws_init_swscale(c);
-
-    if (ARCH_PPC)
-        ff_sws_init_swscale_ppc(c);
-    if (ARCH_X86)
-        ff_sws_init_swscale_x86(c);
-
-    return swscale;
-}
-
-static void reset_ptr(const uint8_t *src[], int format)
-{
-    if (!isALPHA(format))
-        src[3] = NULL;
-    if (!isPlanar(format)) {
-        src[3] = src[2] = NULL;
-
-        if (!usePal(format))
-            src[1] = NULL;
-    }
-}
-
-static int check_image_pointers(const uint8_t * const data[4], enum AVPixelFormat pix_fmt,
-                                const int linesizes[4])
-{
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
-    int i;
-
-    for (i = 0; i < 4; i++) {
-        int plane = desc->comp[i].plane;
-        if (!data[plane] || !linesizes[plane])
-            return 0;
-    }
-
-    return 1;
-}
-
-static void xyz12Torgb48(struct SwsContext *c, uint16_t *dst,
-                         const uint16_t *src, int stride, int h)
-{
-    int xp,yp;
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
-
-    for (yp=0; yp<h; yp++) {
-        for (xp=0; xp+2<stride; xp+=3) {
-            int x, y, z, r, g, b;
-
-            if (desc->flags & AV_PIX_FMT_FLAG_BE) {
-                x = AV_RB16(src + xp + 0);
-                y = AV_RB16(src + xp + 1);
-                z = AV_RB16(src + xp + 2);
-            } else {
-                x = AV_RL16(src + xp + 0);
-                y = AV_RL16(src + xp + 1);
-                z = AV_RL16(src + xp + 2);
-            }
-
-            x = c->xyzgamma[x>>4];
-            y = c->xyzgamma[y>>4];
-            z = c->xyzgamma[z>>4];
-
-            // convert from XYZlinear to sRGBlinear
-            r = c->xyz2rgb_matrix[0][0] * x +
-                c->xyz2rgb_matrix[0][1] * y +
-                c->xyz2rgb_matrix[0][2] * z >> 12;
-            g = c->xyz2rgb_matrix[1][0] * x +
-                c->xyz2rgb_matrix[1][1] * y +
-                c->xyz2rgb_matrix[1][2] * z >> 12;
-            b = c->xyz2rgb_matrix[2][0] * x +
-                c->xyz2rgb_matrix[2][1] * y +
-                c->xyz2rgb_matrix[2][2] * z >> 12;
-
-            // limit values to 12-bit depth
-            r = av_clip_c(r,0,4095);
-            g = av_clip_c(g,0,4095);
-            b = av_clip_c(b,0,4095);
-
-            // convert from sRGBlinear to RGB and scale from 12bit to 16bit
-            if (desc->flags & AV_PIX_FMT_FLAG_BE) {
-                AV_WB16(dst + xp + 0, c->rgbgamma[r] << 4);
-                AV_WB16(dst + xp + 1, c->rgbgamma[g] << 4);
-                AV_WB16(dst + xp + 2, c->rgbgamma[b] << 4);
-            } else {
-                AV_WL16(dst + xp + 0, c->rgbgamma[r] << 4);
-                AV_WL16(dst + xp + 1, c->rgbgamma[g] << 4);
-                AV_WL16(dst + xp + 2, c->rgbgamma[b] << 4);
-            }
-        }
-        src += stride;
-        dst += stride;
-    }
-}
-
-static void rgb48Toxyz12(struct SwsContext *c, uint16_t *dst,
-                         const uint16_t *src, int stride, int h)
-{
-    int xp,yp;
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
-
-    for (yp=0; yp<h; yp++) {
-        for (xp=0; xp+2<stride; xp+=3) {
-            int x, y, z, r, g, b;
-
-            if (desc->flags & AV_PIX_FMT_FLAG_BE) {
-                r = AV_RB16(src + xp + 0);
-                g = AV_RB16(src + xp + 1);
-                b = AV_RB16(src + xp + 2);
-            } else {
-                r = AV_RL16(src + xp + 0);
-                g = AV_RL16(src + xp + 1);
-                b = AV_RL16(src + xp + 2);
-            }
-
-            r = c->rgbgammainv[r>>4];
-            g = c->rgbgammainv[g>>4];
-            b = c->rgbgammainv[b>>4];
-
-            // convert from sRGBlinear to XYZlinear
-            x = c->rgb2xyz_matrix[0][0] * r +
-                c->rgb2xyz_matrix[0][1] * g +
-                c->rgb2xyz_matrix[0][2] * b >> 12;
-            y = c->rgb2xyz_matrix[1][0] * r +
-                c->rgb2xyz_matrix[1][1] * g +
-                c->rgb2xyz_matrix[1][2] * b >> 12;
-            z = c->rgb2xyz_matrix[2][0] * r +
-                c->rgb2xyz_matrix[2][1] * g +
-                c->rgb2xyz_matrix[2][2] * b >> 12;
-
-            // limit values to 12-bit depth
-            x = av_clip_c(x,0,4095);
-            y = av_clip_c(y,0,4095);
-            z = av_clip_c(z,0,4095);
-
-            // convert from XYZlinear to X'Y'Z' and scale from 12bit to 16bit
-            if (desc->flags & AV_PIX_FMT_FLAG_BE) {
-                AV_WB16(dst + xp + 0, c->xyzgammainv[x] << 4);
-                AV_WB16(dst + xp + 1, c->xyzgammainv[y] << 4);
-                AV_WB16(dst + xp + 2, c->xyzgammainv[z] << 4);
-            } else {
-                AV_WL16(dst + xp + 0, c->xyzgammainv[x] << 4);
-                AV_WL16(dst + xp + 1, c->xyzgammainv[y] << 4);
-                AV_WL16(dst + xp + 2, c->xyzgammainv[z] << 4);
-            }
-        }
-        src += stride;
-        dst += stride;
-    }
-}
-
-/**
- * swscale wrapper, so we don't need to export the SwsContext.
- * Assumes planar YUV to be in YUV order instead of YVU.
- */
-int attribute_align_arg sws_scale(struct SwsContext *c,
-                                  const uint8_t * const srcSlice[],
-                                  const int srcStride[], int srcSliceY,
-                                  int srcSliceH, uint8_t *const dst[],
-                                  const int dstStride[])
-{
-    int i, ret;
-    const uint8_t *src2[4];
-    uint8_t *dst2[4];
-    uint8_t *rgb0_tmp = NULL;
-
-    if (!srcSlice || !dstStride || !dst || !srcSlice) {
-        av_log(c, AV_LOG_ERROR, "One of the input parameters to sws_scale() is NULL, please check the calling code\n");
-        return 0;
-    }
-    memcpy(src2, srcSlice, sizeof(src2));
-    memcpy(dst2, dst, sizeof(dst2));
-
-    // do not mess up sliceDir if we have a "trailing" 0-size slice
-    if (srcSliceH == 0)
-        return 0;
-
-    if (!check_image_pointers(srcSlice, c->srcFormat, srcStride)) {
-        av_log(c, AV_LOG_ERROR, "bad src image pointers\n");
-        return 0;
-    }
-    if (!check_image_pointers((const uint8_t* const*)dst, c->dstFormat, dstStride)) {
-        av_log(c, AV_LOG_ERROR, "bad dst image pointers\n");
-        return 0;
-    }
-
-    if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
-        av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n");
-        return 0;
-    }
-    if (c->sliceDir == 0) {
-        if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
-    }
-
-    if (usePal(c->srcFormat)) {
-        for (i = 0; i < 256; i++) {
-            int p, r, g, b, y, u, v, a = 0xff;
-            if (c->srcFormat == AV_PIX_FMT_PAL8) {
-                p = ((const uint32_t *)(srcSlice[1]))[i];
-                a = (p >> 24) & 0xFF;
-                r = (p >> 16) & 0xFF;
-                g = (p >>  8) & 0xFF;
-                b =  p        & 0xFF;
-            } else if (c->srcFormat == AV_PIX_FMT_RGB8) {
-                r = ( i >> 5     ) * 36;
-                g = ((i >> 2) & 7) * 36;
-                b = ( i       & 3) * 85;
-            } else if (c->srcFormat == AV_PIX_FMT_BGR8) {
-                b = ( i >> 6     ) * 85;
-                g = ((i >> 3) & 7) * 36;
-                r = ( i       & 7) * 36;
-            } else if (c->srcFormat == AV_PIX_FMT_RGB4_BYTE) {
-                r = ( i >> 3     ) * 255;
-                g = ((i >> 1) & 3) * 85;
-                b = ( i       & 1) * 255;
-            } else if (c->srcFormat == AV_PIX_FMT_GRAY8 || c->srcFormat == AV_PIX_FMT_GRAY8A) {
-                r = g = b = i;
-            } else {
-                av_assert1(c->srcFormat == AV_PIX_FMT_BGR4_BYTE);
-                b = ( i >> 3     ) * 255;
-                g = ((i >> 1) & 3) * 85;
-                r = ( i       & 1) * 255;
-            }
-#define RGB2YUV_SHIFT 15
-#define BY ( (int) (0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define BV (-(int) (0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define BU ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define GY ( (int) (0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define GV (-(int) (0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define GU (-(int) (0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define RY ( (int) (0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define RV ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define RU (-(int) (0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-
-            y = av_clip_uint8((RY * r + GY * g + BY * b + ( 33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
-            u = av_clip_uint8((RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
-            v = av_clip_uint8((RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
-            c->pal_yuv[i]= y + (u<<8) + (v<<16) + ((unsigned)a<<24);
-
-            switch (c->dstFormat) {
-            case AV_PIX_FMT_BGR32:
-#if !HAVE_BIGENDIAN
-            case AV_PIX_FMT_RGB24:
-#endif
-                c->pal_rgb[i]=  r + (g<<8) + (b<<16) + ((unsigned)a<<24);
-                break;
-            case AV_PIX_FMT_BGR32_1:
-#if HAVE_BIGENDIAN
-            case AV_PIX_FMT_BGR24:
-#endif
-                c->pal_rgb[i]= a + (r<<8) + (g<<16) + ((unsigned)b<<24);
-                break;
-            case AV_PIX_FMT_RGB32_1:
-#if HAVE_BIGENDIAN
-            case AV_PIX_FMT_RGB24:
-#endif
-                c->pal_rgb[i]= a + (b<<8) + (g<<16) + ((unsigned)r<<24);
-                break;
-            case AV_PIX_FMT_RGB32:
-#if !HAVE_BIGENDIAN
-            case AV_PIX_FMT_BGR24:
-#endif
-            default:
-                c->pal_rgb[i]=  b + (g<<8) + (r<<16) + ((unsigned)a<<24);
-            }
-        }
-    }
-
-    if (c->src0Alpha && !c->dst0Alpha && isALPHA(c->dstFormat)) {
-        uint8_t *base;
-        int x,y;
-        rgb0_tmp = av_malloc(FFABS(srcStride[0]) * srcSliceH + 32);
-        if (!rgb0_tmp)
-            return AVERROR(ENOMEM);
-
-        base = srcStride[0] < 0 ? rgb0_tmp - srcStride[0] * (srcSliceH-1) : rgb0_tmp;
-        for (y=0; y<srcSliceH; y++){
-            memcpy(base + srcStride[0]*y, src2[0] + srcStride[0]*y, 4*c->srcW);
-            for (x=c->src0Alpha-1; x<4*c->srcW; x+=4) {
-                base[ srcStride[0]*y + x] = 0xFF;
-            }
-        }
-        src2[0] = base;
-    }
-
-    if (c->srcXYZ && !(c->dstXYZ && c->srcW==c->dstW && c->srcH==c->dstH)) {
-        uint8_t *base;
-        rgb0_tmp = av_malloc(FFABS(srcStride[0]) * srcSliceH + 32);
-        if (!rgb0_tmp)
-            return AVERROR(ENOMEM);
-
-        base = srcStride[0] < 0 ? rgb0_tmp - srcStride[0] * (srcSliceH-1) : rgb0_tmp;
-
-        xyz12Torgb48(c, (uint16_t*)base, (const uint16_t*)src2[0], srcStride[0]/2, srcSliceH);
-        src2[0] = base;
-    }
-
-    if (!srcSliceY && (c->flags & SWS_BITEXACT) && c->dither == SWS_DITHER_ED && c->dither_error[0])
-        for (i = 0; i < 4; i++)
-            memset(c->dither_error[i], 0, sizeof(c->dither_error[0][0]) * (c->dstW+2));
-
-
-    // copy strides, so they can safely be modified
-    if (c->sliceDir == 1) {
-        // slices go from top to bottom
-        int srcStride2[4] = { srcStride[0], srcStride[1], srcStride[2],
-                              srcStride[3] };
-        int dstStride2[4] = { dstStride[0], dstStride[1], dstStride[2],
-                              dstStride[3] };
-
-        reset_ptr(src2, c->srcFormat);
-        reset_ptr((void*)dst2, c->dstFormat);
-
-        /* reset slice direction at end of frame */
-        if (srcSliceY + srcSliceH == c->srcH)
-            c->sliceDir = 0;
-
-        ret = c->swscale(c, src2, srcStride2, srcSliceY, srcSliceH, dst2,
-                          dstStride2);
-    } else {
-        // slices go from bottom to top => we flip the image internally
-        int srcStride2[4] = { -srcStride[0], -srcStride[1], -srcStride[2],
-                              -srcStride[3] };
-        int dstStride2[4] = { -dstStride[0], -dstStride[1], -dstStride[2],
-                              -dstStride[3] };
-
-        src2[0] += (srcSliceH - 1) * srcStride[0];
-        if (!usePal(c->srcFormat))
-            src2[1] += ((srcSliceH >> c->chrSrcVSubSample) - 1) * srcStride[1];
-        src2[2] += ((srcSliceH >> c->chrSrcVSubSample) - 1) * srcStride[2];
-        src2[3] += (srcSliceH - 1) * srcStride[3];
-        dst2[0] += ( c->dstH                         - 1) * dstStride[0];
-        dst2[1] += ((c->dstH >> c->chrDstVSubSample) - 1) * dstStride[1];
-        dst2[2] += ((c->dstH >> c->chrDstVSubSample) - 1) * dstStride[2];
-        dst2[3] += ( c->dstH                         - 1) * dstStride[3];
-
-        reset_ptr(src2, c->srcFormat);
-        reset_ptr((void*)dst2, c->dstFormat);
-
-        /* reset slice direction at end of frame */
-        if (!srcSliceY)
-            c->sliceDir = 0;
-
-        ret = c->swscale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH,
-                          srcSliceH, dst2, dstStride2);
-    }
-
-
-    if (c->dstXYZ && !(c->srcXYZ && c->srcW==c->dstW && c->srcH==c->dstH)) {
-        /* replace on the same data */
-        rgb48Toxyz12(c, (uint16_t*)dst2[0], (const uint16_t*)dst2[0], dstStride[0]/2, ret);
-    }
-
-    av_free(rgb0_tmp);
-    return ret;
-}
-
diff --git a/ffmpeg/libswscale/swscale.h b/ffmpeg/libswscale/swscale.h
deleted file mode 100644
index 25c8b5e..0000000
--- a/ffmpeg/libswscale/swscale.h
+++ /dev/null
@@ -1,360 +0,0 @@
-/*
- * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef SWSCALE_SWSCALE_H
-#define SWSCALE_SWSCALE_H
-
-/**
- * @file
- * @ingroup libsws
- * external API header
- */
-
-#include <stdint.h>
-
-#include "libavutil/avutil.h"
-#include "libavutil/log.h"
-#include "libavutil/pixfmt.h"
-#include "version.h"
-
-/**
- * @defgroup libsws Color conversion and scaling
- * @{
- *
- * Return the LIBSWSCALE_VERSION_INT constant.
- */
-unsigned swscale_version(void);
-
-/**
- * Return the libswscale build-time configuration.
- */
-const char *swscale_configuration(void);
-
-/**
- * Return the libswscale license.
- */
-const char *swscale_license(void);
-
-/* values for the flags, the stuff on the command line is different */
-#define SWS_FAST_BILINEAR     1
-#define SWS_BILINEAR          2
-#define SWS_BICUBIC           4
-#define SWS_X                 8
-#define SWS_POINT          0x10
-#define SWS_AREA           0x20
-#define SWS_BICUBLIN       0x40
-#define SWS_GAUSS          0x80
-#define SWS_SINC          0x100
-#define SWS_LANCZOS       0x200
-#define SWS_SPLINE        0x400
-
-#define SWS_SRC_V_CHR_DROP_MASK     0x30000
-#define SWS_SRC_V_CHR_DROP_SHIFT    16
-
-#define SWS_PARAM_DEFAULT           123456
-
-#define SWS_PRINT_INFO              0x1000
-
-//the following 3 flags are not completely implemented
-//internal chrominace subsampling info
-#define SWS_FULL_CHR_H_INT    0x2000
-//input subsampling info
-#define SWS_FULL_CHR_H_INP    0x4000
-#define SWS_DIRECT_BGR        0x8000
-#define SWS_ACCURATE_RND      0x40000
-#define SWS_BITEXACT          0x80000
-#define SWS_ERROR_DIFFUSION  0x800000
-
-#if FF_API_SWS_CPU_CAPS
-/**
- * CPU caps are autodetected now, those flags
- * are only provided for API compatibility.
- */
-#define SWS_CPU_CAPS_MMX      0x80000000
-#define SWS_CPU_CAPS_MMXEXT   0x20000000
-#define SWS_CPU_CAPS_MMX2     0x20000000
-#define SWS_CPU_CAPS_3DNOW    0x40000000
-#define SWS_CPU_CAPS_ALTIVEC  0x10000000
-#define SWS_CPU_CAPS_BFIN     0x01000000
-#define SWS_CPU_CAPS_SSE2     0x02000000
-#endif
-
-#define SWS_MAX_REDUCE_CUTOFF 0.002
-
-#define SWS_CS_ITU709         1
-#define SWS_CS_FCC            4
-#define SWS_CS_ITU601         5
-#define SWS_CS_ITU624         5
-#define SWS_CS_SMPTE170M      5
-#define SWS_CS_SMPTE240M      7
-#define SWS_CS_DEFAULT        5
-
-/**
- * Return a pointer to yuv<->rgb coefficients for the given colorspace
- * suitable for sws_setColorspaceDetails().
- *
- * @param colorspace One of the SWS_CS_* macros. If invalid,
- * SWS_CS_DEFAULT is used.
- */
-const int *sws_getCoefficients(int colorspace);
-
-// when used for filters they must have an odd number of elements
-// coeffs cannot be shared between vectors
-typedef struct SwsVector {
-    double *coeff;              ///< pointer to the list of coefficients
-    int length;                 ///< number of coefficients in the vector
-} SwsVector;
-
-// vectors can be shared
-typedef struct SwsFilter {
-    SwsVector *lumH;
-    SwsVector *lumV;
-    SwsVector *chrH;
-    SwsVector *chrV;
-} SwsFilter;
-
-struct SwsContext;
-
-/**
- * Return a positive value if pix_fmt is a supported input format, 0
- * otherwise.
- */
-int sws_isSupportedInput(enum AVPixelFormat pix_fmt);
-
-/**
- * Return a positive value if pix_fmt is a supported output format, 0
- * otherwise.
- */
-int sws_isSupportedOutput(enum AVPixelFormat pix_fmt);
-
-/**
- * @param[in]  pix_fmt the pixel format
- * @return a positive value if an endianness conversion for pix_fmt is
- * supported, 0 otherwise.
- */
-int sws_isSupportedEndiannessConversion(enum AVPixelFormat pix_fmt);
-
-/**
- * Allocate an empty SwsContext. This must be filled and passed to
- * sws_init_context(). For filling see AVOptions, options.c and
- * sws_setColorspaceDetails().
- */
-struct SwsContext *sws_alloc_context(void);
-
-/**
- * Initialize the swscaler context sws_context.
- *
- * @return zero or positive value on success, a negative value on
- * error
- */
-int sws_init_context(struct SwsContext *sws_context, SwsFilter *srcFilter, SwsFilter *dstFilter);
-
-/**
- * Free the swscaler context swsContext.
- * If swsContext is NULL, then does nothing.
- */
-void sws_freeContext(struct SwsContext *swsContext);
-
-#if FF_API_SWS_GETCONTEXT
-/**
- * Allocate and return an SwsContext. You need it to perform
- * scaling/conversion operations using sws_scale().
- *
- * @param srcW the width of the source image
- * @param srcH the height of the source image
- * @param srcFormat the source image format
- * @param dstW the width of the destination image
- * @param dstH the height of the destination image
- * @param dstFormat the destination image format
- * @param flags specify which algorithm and options to use for rescaling
- * @return a pointer to an allocated context, or NULL in case of error
- * @note this function is to be removed after a saner alternative is
- *       written
- * @deprecated Use sws_getCachedContext() instead.
- */
-struct SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat,
-                                  int dstW, int dstH, enum AVPixelFormat dstFormat,
-                                  int flags, SwsFilter *srcFilter,
-                                  SwsFilter *dstFilter, const double *param);
-#endif
-
-/**
- * Scale the image slice in srcSlice and put the resulting scaled
- * slice in the image in dst. A slice is a sequence of consecutive
- * rows in an image.
- *
- * Slices have to be provided in sequential order, either in
- * top-bottom or bottom-top order. If slices are provided in
- * non-sequential order the behavior of the function is undefined.
- *
- * @param c         the scaling context previously created with
- *                  sws_getContext()
- * @param srcSlice  the array containing the pointers to the planes of
- *                  the source slice
- * @param srcStride the array containing the strides for each plane of
- *                  the source image
- * @param srcSliceY the position in the source image of the slice to
- *                  process, that is the number (counted starting from
- *                  zero) in the image of the first row of the slice
- * @param srcSliceH the height of the source slice, that is the number
- *                  of rows in the slice
- * @param dst       the array containing the pointers to the planes of
- *                  the destination image
- * @param dstStride the array containing the strides for each plane of
- *                  the destination image
- * @return          the height of the output slice
- */
-int sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[],
-              const int srcStride[], int srcSliceY, int srcSliceH,
-              uint8_t *const dst[], const int dstStride[]);
-
-/**
- * @param dstRange flag indicating the while-black range of the output (1=jpeg / 0=mpeg)
- * @param srcRange flag indicating the while-black range of the input (1=jpeg / 0=mpeg)
- * @param table the yuv2rgb coefficients describing the output yuv space, normally ff_yuv2rgb_coeffs[x]
- * @param inv_table the yuv2rgb coefficients describing the input yuv space, normally ff_yuv2rgb_coeffs[x]
- * @param brightness 16.16 fixed point brightness correction
- * @param contrast 16.16 fixed point contrast correction
- * @param saturation 16.16 fixed point saturation correction
- * @return -1 if not supported
- */
-int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4],
-                             int srcRange, const int table[4], int dstRange,
-                             int brightness, int contrast, int saturation);
-
-/**
- * @return -1 if not supported
- */
-int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table,
-                             int *srcRange, int **table, int *dstRange,
-                             int *brightness, int *contrast, int *saturation);
-
-/**
- * Allocate and return an uninitialized vector with length coefficients.
- */
-SwsVector *sws_allocVec(int length);
-
-/**
- * Return a normalized Gaussian curve used to filter stuff
- * quality = 3 is high quality, lower is lower quality.
- */
-SwsVector *sws_getGaussianVec(double variance, double quality);
-
-/**
- * Allocate and return a vector with length coefficients, all
- * with the same value c.
- */
-SwsVector *sws_getConstVec(double c, int length);
-
-/**
- * Allocate and return a vector with just one coefficient, with
- * value 1.0.
- */
-SwsVector *sws_getIdentityVec(void);
-
-/**
- * Scale all the coefficients of a by the scalar value.
- */
-void sws_scaleVec(SwsVector *a, double scalar);
-
-/**
- * Scale all the coefficients of a so that their sum equals height.
- */
-void sws_normalizeVec(SwsVector *a, double height);
-void sws_convVec(SwsVector *a, SwsVector *b);
-void sws_addVec(SwsVector *a, SwsVector *b);
-void sws_subVec(SwsVector *a, SwsVector *b);
-void sws_shiftVec(SwsVector *a, int shift);
-
-/**
- * Allocate and return a clone of the vector a, that is a vector
- * with the same coefficients as a.
- */
-SwsVector *sws_cloneVec(SwsVector *a);
-
-/**
- * Print with av_log() a textual representation of the vector a
- * if log_level <= av_log_level.
- */
-void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level);
-
-void sws_freeVec(SwsVector *a);
-
-SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
-                                float lumaSharpen, float chromaSharpen,
-                                float chromaHShift, float chromaVShift,
-                                int verbose);
-void sws_freeFilter(SwsFilter *filter);
-
-/**
- * Check if context can be reused, otherwise reallocate a new one.
- *
- * If context is NULL, just calls sws_getContext() to get a new
- * context. Otherwise, checks if the parameters are the ones already
- * saved in context. If that is the case, returns the current
- * context. Otherwise, frees context and gets a new context with
- * the new parameters.
- *
- * Be warned that srcFilter and dstFilter are not checked, they
- * are assumed to remain the same.
- */
-struct SwsContext *sws_getCachedContext(struct SwsContext *context,
-                                        int srcW, int srcH, enum AVPixelFormat srcFormat,
-                                        int dstW, int dstH, enum AVPixelFormat dstFormat,
-                                        int flags, SwsFilter *srcFilter,
-                                        SwsFilter *dstFilter, const double *param);
-
-/**
- * Convert an 8-bit paletted frame into a frame with a color depth of 32 bits.
- *
- * The output frame will have the same packed format as the palette.
- *
- * @param src        source frame buffer
- * @param dst        destination frame buffer
- * @param num_pixels number of pixels to convert
- * @param palette    array with [256] entries, which must match color arrangement (RGB or BGR) of src
- */
-void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette);
-
-/**
- * Convert an 8-bit paletted frame into a frame with a color depth of 24 bits.
- *
- * With the palette format "ABCD", the destination frame ends up with the format "ABC".
- *
- * @param src        source frame buffer
- * @param dst        destination frame buffer
- * @param num_pixels number of pixels to convert
- * @param palette    array with [256] entries, which must match color arrangement (RGB or BGR) of src
- */
-void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette);
-
-/**
- * Get the AVClass for swsContext. It can be used in combination with
- * AV_OPT_SEARCH_FAKE_OBJ for examining options.
- *
- * @see av_opt_find().
- */
-const AVClass *sws_get_class(void);
-
-/**
- * @}
- */
-
-#endif /* SWSCALE_SWSCALE_H */
diff --git a/ffmpeg/libswscale/swscale_internal.h b/ffmpeg/libswscale/swscale_internal.h
deleted file mode 100644
index 443615d..0000000
--- a/ffmpeg/libswscale/swscale_internal.h
+++ /dev/null
@@ -1,878 +0,0 @@
-/*
- * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef SWSCALE_SWSCALE_INTERNAL_H
-#define SWSCALE_SWSCALE_INTERNAL_H
-
-#include "config.h"
-
-#if HAVE_ALTIVEC_H
-#include <altivec.h>
-#endif
-
-#include "libavutil/avassert.h"
-#include "libavutil/avutil.h"
-#include "libavutil/common.h"
-#include "libavutil/intreadwrite.h"
-#include "libavutil/log.h"
-#include "libavutil/pixfmt.h"
-#include "libavutil/pixdesc.h"
-
-#define STR(s) AV_TOSTRING(s) // AV_STRINGIFY is too long
-
-#define YUVRGB_TABLE_HEADROOM 128
-
-#define MAX_FILTER_SIZE 256
-
-#define DITHER1XBPP
-
-#if HAVE_BIGENDIAN
-#define ALT32_CORR (-1)
-#else
-#define ALT32_CORR   1
-#endif
-
-#if ARCH_X86_64
-#   define APCK_PTR2  8
-#   define APCK_COEF 16
-#   define APCK_SIZE 24
-#else
-#   define APCK_PTR2  4
-#   define APCK_COEF  8
-#   define APCK_SIZE 16
-#endif
-
-struct SwsContext;
-
-typedef enum SwsDither {
-    SWS_DITHER_NONE = 0,
-    SWS_DITHER_AUTO,
-    SWS_DITHER_BAYER,
-    SWS_DITHER_ED,
-    NB_SWS_DITHER,
-} SwsDither;
-
-typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t *src[],
-                       int srcStride[], int srcSliceY, int srcSliceH,
-                       uint8_t *dst[], int dstStride[]);
-
-/**
- * Write one line of horizontally scaled data to planar output
- * without any additional vertical scaling (or point-scaling).
- *
- * @param src     scaled source data, 15bit for 8-10bit output,
- *                19-bit for 16bit output (in int32_t)
- * @param dest    pointer to the output plane. For >8bit
- *                output, this is in uint16_t
- * @param dstW    width of destination in pixels
- * @param dither  ordered dither array of type int16_t and size 8
- * @param offset  Dither offset
- */
-typedef void (*yuv2planar1_fn)(const int16_t *src, uint8_t *dest, int dstW,
-                               const uint8_t *dither, int offset);
-
-/**
- * Write one line of horizontally scaled data to planar output
- * with multi-point vertical scaling between input pixels.
- *
- * @param filter        vertical luma/alpha scaling coefficients, 12bit [0,4096]
- * @param src           scaled luma (Y) or alpha (A) source data, 15bit for 8-10bit output,
- *                      19-bit for 16bit output (in int32_t)
- * @param filterSize    number of vertical input lines to scale
- * @param dest          pointer to output plane. For >8bit
- *                      output, this is in uint16_t
- * @param dstW          width of destination pixels
- * @param offset        Dither offset
- */
-typedef void (*yuv2planarX_fn)(const int16_t *filter, int filterSize,
-                               const int16_t **src, uint8_t *dest, int dstW,
-                               const uint8_t *dither, int offset);
-
-/**
- * Write one line of horizontally scaled chroma to interleaved output
- * with multi-point vertical scaling between input pixels.
- *
- * @param c             SWS scaling context
- * @param chrFilter     vertical chroma scaling coefficients, 12bit [0,4096]
- * @param chrUSrc       scaled chroma (U) source data, 15bit for 8-10bit output,
- *                      19-bit for 16bit output (in int32_t)
- * @param chrVSrc       scaled chroma (V) source data, 15bit for 8-10bit output,
- *                      19-bit for 16bit output (in int32_t)
- * @param chrFilterSize number of vertical chroma input lines to scale
- * @param dest          pointer to the output plane. For >8bit
- *                      output, this is in uint16_t
- * @param dstW          width of chroma planes
- */
-typedef void (*yuv2interleavedX_fn)(struct SwsContext *c,
-                                    const int16_t *chrFilter,
-                                    int chrFilterSize,
-                                    const int16_t **chrUSrc,
-                                    const int16_t **chrVSrc,
-                                    uint8_t *dest, int dstW);
-
-/**
- * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
- * output without any additional vertical scaling (or point-scaling). Note
- * that this function may do chroma scaling, see the "uvalpha" argument.
- *
- * @param c       SWS scaling context
- * @param lumSrc  scaled luma (Y) source data, 15bit for 8-10bit output,
- *                19-bit for 16bit output (in int32_t)
- * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output,
- *                19-bit for 16bit output (in int32_t)
- * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output,
- *                19-bit for 16bit output (in int32_t)
- * @param alpSrc  scaled alpha (A) source data, 15bit for 8-10bit output,
- *                19-bit for 16bit output (in int32_t)
- * @param dest    pointer to the output plane. For 16bit output, this is
- *                uint16_t
- * @param dstW    width of lumSrc and alpSrc in pixels, number of pixels
- *                to write into dest[]
- * @param uvalpha chroma scaling coefficient for the second line of chroma
- *                pixels, either 2048 or 0. If 0, one chroma input is used
- *                for 2 output pixels (or if the SWS_FLAG_FULL_CHR_INT flag
- *                is set, it generates 1 output pixel). If 2048, two chroma
- *                input pixels should be averaged for 2 output pixels (this
- *                only happens if SWS_FLAG_FULL_CHR_INT is not set)
- * @param y       vertical line number for this output. This does not need
- *                to be used to calculate the offset in the destination,
- *                but can be used to generate comfort noise using dithering
- *                for some output formats.
- */
-typedef void (*yuv2packed1_fn)(struct SwsContext *c, const int16_t *lumSrc,
-                               const int16_t *chrUSrc[2],
-                               const int16_t *chrVSrc[2],
-                               const int16_t *alpSrc, uint8_t *dest,
-                               int dstW, int uvalpha, int y);
-/**
- * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
- * output by doing bilinear scaling between two input lines.
- *
- * @param c       SWS scaling context
- * @param lumSrc  scaled luma (Y) source data, 15bit for 8-10bit output,
- *                19-bit for 16bit output (in int32_t)
- * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output,
- *                19-bit for 16bit output (in int32_t)
- * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output,
- *                19-bit for 16bit output (in int32_t)
- * @param alpSrc  scaled alpha (A) source data, 15bit for 8-10bit output,
- *                19-bit for 16bit output (in int32_t)
- * @param dest    pointer to the output plane. For 16bit output, this is
- *                uint16_t
- * @param dstW    width of lumSrc and alpSrc in pixels, number of pixels
- *                to write into dest[]
- * @param yalpha  luma/alpha scaling coefficients for the second input line.
- *                The first line's coefficients can be calculated by using
- *                4096 - yalpha
- * @param uvalpha chroma scaling coefficient for the second input line. The
- *                first line's coefficients can be calculated by using
- *                4096 - uvalpha
- * @param y       vertical line number for this output. This does not need
- *                to be used to calculate the offset in the destination,
- *                but can be used to generate comfort noise using dithering
- *                for some output formats.
- */
-typedef void (*yuv2packed2_fn)(struct SwsContext *c, const int16_t *lumSrc[2],
-                               const int16_t *chrUSrc[2],
-                               const int16_t *chrVSrc[2],
-                               const int16_t *alpSrc[2],
-                               uint8_t *dest,
-                               int dstW, int yalpha, int uvalpha, int y);
-/**
- * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
- * output by doing multi-point vertical scaling between input pixels.
- *
- * @param c             SWS scaling context
- * @param lumFilter     vertical luma/alpha scaling coefficients, 12bit [0,4096]
- * @param lumSrc        scaled luma (Y) source data, 15bit for 8-10bit output,
- *                      19-bit for 16bit output (in int32_t)
- * @param lumFilterSize number of vertical luma/alpha input lines to scale
- * @param chrFilter     vertical chroma scaling coefficients, 12bit [0,4096]
- * @param chrUSrc       scaled chroma (U) source data, 15bit for 8-10bit output,
- *                      19-bit for 16bit output (in int32_t)
- * @param chrVSrc       scaled chroma (V) source data, 15bit for 8-10bit output,
- *                      19-bit for 16bit output (in int32_t)
- * @param chrFilterSize number of vertical chroma input lines to scale
- * @param alpSrc        scaled alpha (A) source data, 15bit for 8-10bit output,
- *                      19-bit for 16bit output (in int32_t)
- * @param dest          pointer to the output plane. For 16bit output, this is
- *                      uint16_t
- * @param dstW          width of lumSrc and alpSrc in pixels, number of pixels
- *                      to write into dest[]
- * @param y             vertical line number for this output. This does not need
- *                      to be used to calculate the offset in the destination,
- *                      but can be used to generate comfort noise using dithering
- *                      or some output formats.
- */
-typedef void (*yuv2packedX_fn)(struct SwsContext *c, const int16_t *lumFilter,
-                               const int16_t **lumSrc, int lumFilterSize,
-                               const int16_t *chrFilter,
-                               const int16_t **chrUSrc,
-                               const int16_t **chrVSrc, int chrFilterSize,
-                               const int16_t **alpSrc, uint8_t *dest,
-                               int dstW, int y);
-
-/**
- * Write one line of horizontally scaled Y/U/V/A to YUV/RGB
- * output by doing multi-point vertical scaling between input pixels.
- *
- * @param c             SWS scaling context
- * @param lumFilter     vertical luma/alpha scaling coefficients, 12bit [0,4096]
- * @param lumSrc        scaled luma (Y) source data, 15bit for 8-10bit output,
- *                      19-bit for 16bit output (in int32_t)
- * @param lumFilterSize number of vertical luma/alpha input lines to scale
- * @param chrFilter     vertical chroma scaling coefficients, 12bit [0,4096]
- * @param chrUSrc       scaled chroma (U) source data, 15bit for 8-10bit output,
- *                      19-bit for 16bit output (in int32_t)
- * @param chrVSrc       scaled chroma (V) source data, 15bit for 8-10bit output,
- *                      19-bit for 16bit output (in int32_t)
- * @param chrFilterSize number of vertical chroma input lines to scale
- * @param alpSrc        scaled alpha (A) source data, 15bit for 8-10bit output,
- *                      19-bit for 16bit output (in int32_t)
- * @param dest          pointer to the output planes. For 16bit output, this is
- *                      uint16_t
- * @param dstW          width of lumSrc and alpSrc in pixels, number of pixels
- *                      to write into dest[]
- * @param y             vertical line number for this output. This does not need
- *                      to be used to calculate the offset in the destination,
- *                      but can be used to generate comfort noise using dithering
- *                      or some output formats.
- */
-typedef void (*yuv2anyX_fn)(struct SwsContext *c, const int16_t *lumFilter,
-                            const int16_t **lumSrc, int lumFilterSize,
-                            const int16_t *chrFilter,
-                            const int16_t **chrUSrc,
-                            const int16_t **chrVSrc, int chrFilterSize,
-                            const int16_t **alpSrc, uint8_t **dest,
-                            int dstW, int y);
-
-/* This struct should be aligned on at least a 32-byte boundary. */
-typedef struct SwsContext {
-    /**
-     * info on struct for av_log
-     */
-    const AVClass *av_class;
-
-    /**
-     * Note that src, dst, srcStride, dstStride will be copied in the
-     * sws_scale() wrapper so they can be freely modified here.
-     */
-    SwsFunc swscale;
-    int srcW;                     ///< Width  of source      luma/alpha planes.
-    int srcH;                     ///< Height of source      luma/alpha planes.
-    int dstH;                     ///< Height of destination luma/alpha planes.
-    int chrSrcW;                  ///< Width  of source      chroma     planes.
-    int chrSrcH;                  ///< Height of source      chroma     planes.
-    int chrDstW;                  ///< Width  of destination chroma     planes.
-    int chrDstH;                  ///< Height of destination chroma     planes.
-    int lumXInc, chrXInc;
-    int lumYInc, chrYInc;
-    enum AVPixelFormat dstFormat; ///< Destination pixel format.
-    enum AVPixelFormat srcFormat; ///< Source      pixel format.
-    int dstFormatBpp;             ///< Number of bits per pixel of the destination pixel format.
-    int srcFormatBpp;             ///< Number of bits per pixel of the source      pixel format.
-    int dstBpc, srcBpc;
-    int chrSrcHSubSample;         ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in source      image.
-    int chrSrcVSubSample;         ///< Binary logarithm of vertical   subsampling factor between luma/alpha and chroma planes in source      image.
-    int chrDstHSubSample;         ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image.
-    int chrDstVSubSample;         ///< Binary logarithm of vertical   subsampling factor between luma/alpha and chroma planes in destination image.
-    int vChrDrop;                 ///< Binary logarithm of extra vertical subsampling factor in source image chroma planes specified by user.
-    int sliceDir;                 ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top).
-    double param[2];              ///< Input parameters for scaling algorithms that need them.
-
-    uint32_t pal_yuv[256];
-    uint32_t pal_rgb[256];
-
-    /**
-     * @name Scaled horizontal lines ring buffer.
-     * The horizontal scaler keeps just enough scaled lines in a ring buffer
-     * so they may be passed to the vertical scaler. The pointers to the
-     * allocated buffers for each line are duplicated in sequence in the ring
-     * buffer to simplify indexing and avoid wrapping around between lines
-     * inside the vertical scaler code. The wrapping is done before the
-     * vertical scaler is called.
-     */
-    //@{
-    int16_t **lumPixBuf;          ///< Ring buffer for scaled horizontal luma   plane lines to be fed to the vertical scaler.
-    int16_t **chrUPixBuf;         ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
-    int16_t **chrVPixBuf;         ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
-    int16_t **alpPixBuf;          ///< Ring buffer for scaled horizontal alpha  plane lines to be fed to the vertical scaler.
-    int vLumBufSize;              ///< Number of vertical luma/alpha lines allocated in the ring buffer.
-    int vChrBufSize;              ///< Number of vertical chroma     lines allocated in the ring buffer.
-    int lastInLumBuf;             ///< Last scaled horizontal luma/alpha line from source in the ring buffer.
-    int lastInChrBuf;             ///< Last scaled horizontal chroma     line from source in the ring buffer.
-    int lumBufIndex;              ///< Index in ring buffer of the last scaled horizontal luma/alpha line from source.
-    int chrBufIndex;              ///< Index in ring buffer of the last scaled horizontal chroma     line from source.
-    //@}
-
-    uint8_t *formatConvBuffer;
-
-    /**
-     * @name Horizontal and vertical filters.
-     * To better understand the following fields, here is a pseudo-code of
-     * their usage in filtering a horizontal line:
-     * @code
-     * for (i = 0; i < width; i++) {
-     *     dst[i] = 0;
-     *     for (j = 0; j < filterSize; j++)
-     *         dst[i] += src[ filterPos[i] + j ] * filter[ filterSize * i + j ];
-     *     dst[i] >>= FRAC_BITS; // The actual implementation is fixed-point.
-     * }
-     * @endcode
-     */
-    //@{
-    int16_t *hLumFilter;          ///< Array of horizontal filter coefficients for luma/alpha planes.
-    int16_t *hChrFilter;          ///< Array of horizontal filter coefficients for chroma     planes.
-    int16_t *vLumFilter;          ///< Array of vertical   filter coefficients for luma/alpha planes.
-    int16_t *vChrFilter;          ///< Array of vertical   filter coefficients for chroma     planes.
-    int32_t *hLumFilterPos;       ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes.
-    int32_t *hChrFilterPos;       ///< Array of horizontal filter starting positions for each dst[i] for chroma     planes.
-    int32_t *vLumFilterPos;       ///< Array of vertical   filter starting positions for each dst[i] for luma/alpha planes.
-    int32_t *vChrFilterPos;       ///< Array of vertical   filter starting positions for each dst[i] for chroma     planes.
-    int hLumFilterSize;           ///< Horizontal filter size for luma/alpha pixels.
-    int hChrFilterSize;           ///< Horizontal filter size for chroma     pixels.
-    int vLumFilterSize;           ///< Vertical   filter size for luma/alpha pixels.
-    int vChrFilterSize;           ///< Vertical   filter size for chroma     pixels.
-    //@}
-
-    int lumMmxextFilterCodeSize;  ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for luma/alpha planes.
-    int chrMmxextFilterCodeSize;  ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for chroma planes.
-    uint8_t *lumMmxextFilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for luma/alpha planes.
-    uint8_t *chrMmxextFilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for chroma planes.
-
-    int canMMXEXTBeUsed;
-
-    int dstY;                     ///< Last destination vertical line output from last slice.
-    int flags;                    ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
-    void *yuvTable;             // pointer to the yuv->rgb table start so it can be freed()
-    // alignment ensures the offset can be added in a single
-    // instruction on e.g. ARM
-    DECLARE_ALIGNED(16, int, table_gV)[256 + 2*YUVRGB_TABLE_HEADROOM];
-    uint8_t *table_rV[256 + 2*YUVRGB_TABLE_HEADROOM];
-    uint8_t *table_gU[256 + 2*YUVRGB_TABLE_HEADROOM];
-    uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM];
-    DECLARE_ALIGNED(16, int32_t, input_rgb2yuv_table)[16+40*4]; // This table can contain both C and SIMD formatted values, teh C vales are always at the XY_IDX points
-#define RY_IDX 0
-#define GY_IDX 1
-#define BY_IDX 2
-#define RU_IDX 3
-#define GU_IDX 4
-#define BU_IDX 5
-#define RV_IDX 6
-#define GV_IDX 7
-#define BV_IDX 8
-#define RGB2YUV_SHIFT 15
-
-    int *dither_error[4];
-
-    //Colorspace stuff
-    int contrast, brightness, saturation;    // for sws_getColorspaceDetails
-    int srcColorspaceTable[4];
-    int dstColorspaceTable[4];
-    int srcRange;                 ///< 0 = MPG YUV range, 1 = JPG YUV range (source      image).
-    int dstRange;                 ///< 0 = MPG YUV range, 1 = JPG YUV range (destination image).
-    int src0Alpha;
-    int dst0Alpha;
-    int srcXYZ;
-    int dstXYZ;
-    int src_h_chr_pos;
-    int dst_h_chr_pos;
-    int src_v_chr_pos;
-    int dst_v_chr_pos;
-    int yuv2rgb_y_offset;
-    int yuv2rgb_y_coeff;
-    int yuv2rgb_v2r_coeff;
-    int yuv2rgb_v2g_coeff;
-    int yuv2rgb_u2g_coeff;
-    int yuv2rgb_u2b_coeff;
-
-#define RED_DITHER            "0*8"
-#define GREEN_DITHER          "1*8"
-#define BLUE_DITHER           "2*8"
-#define Y_COEFF               "3*8"
-#define VR_COEFF              "4*8"
-#define UB_COEFF              "5*8"
-#define VG_COEFF              "6*8"
-#define UG_COEFF              "7*8"
-#define Y_OFFSET              "8*8"
-#define U_OFFSET              "9*8"
-#define V_OFFSET              "10*8"
-#define LUM_MMX_FILTER_OFFSET "11*8"
-#define CHR_MMX_FILTER_OFFSET "11*8+4*4*256"
-#define DSTW_OFFSET           "11*8+4*4*256*2" //do not change, it is hardcoded in the ASM
-#define ESP_OFFSET            "11*8+4*4*256*2+8"
-#define VROUNDER_OFFSET       "11*8+4*4*256*2+16"
-#define U_TEMP                "11*8+4*4*256*2+24"
-#define V_TEMP                "11*8+4*4*256*2+32"
-#define Y_TEMP                "11*8+4*4*256*2+40"
-#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
-#define UV_OFF_PX             "11*8+4*4*256*3+48"
-#define UV_OFF_BYTE           "11*8+4*4*256*3+56"
-#define DITHER16              "11*8+4*4*256*3+64"
-#define DITHER32              "11*8+4*4*256*3+80"
-
-    DECLARE_ALIGNED(8, uint64_t, redDither);
-    DECLARE_ALIGNED(8, uint64_t, greenDither);
-    DECLARE_ALIGNED(8, uint64_t, blueDither);
-
-    DECLARE_ALIGNED(8, uint64_t, yCoeff);
-    DECLARE_ALIGNED(8, uint64_t, vrCoeff);
-    DECLARE_ALIGNED(8, uint64_t, ubCoeff);
-    DECLARE_ALIGNED(8, uint64_t, vgCoeff);
-    DECLARE_ALIGNED(8, uint64_t, ugCoeff);
-    DECLARE_ALIGNED(8, uint64_t, yOffset);
-    DECLARE_ALIGNED(8, uint64_t, uOffset);
-    DECLARE_ALIGNED(8, uint64_t, vOffset);
-    int32_t lumMmxFilter[4 * MAX_FILTER_SIZE];
-    int32_t chrMmxFilter[4 * MAX_FILTER_SIZE];
-    int dstW;                     ///< Width  of destination luma/alpha planes.
-    DECLARE_ALIGNED(8, uint64_t, esp);
-    DECLARE_ALIGNED(8, uint64_t, vRounder);
-    DECLARE_ALIGNED(8, uint64_t, u_temp);
-    DECLARE_ALIGNED(8, uint64_t, v_temp);
-    DECLARE_ALIGNED(8, uint64_t, y_temp);
-    int32_t alpMmxFilter[4 * MAX_FILTER_SIZE];
-    // alignment of these values is not necessary, but merely here
-    // to maintain the same offset across x8632 and x86-64. Once we
-    // use proper offset macros in the asm, they can be removed.
-    DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
-    DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes
-    DECLARE_ALIGNED(8, uint16_t, dither16)[8];
-    DECLARE_ALIGNED(8, uint32_t, dither32)[8];
-
-    const uint8_t *chrDither8, *lumDither8;
-
-#if HAVE_ALTIVEC
-    vector signed short   CY;
-    vector signed short   CRV;
-    vector signed short   CBU;
-    vector signed short   CGU;
-    vector signed short   CGV;
-    vector signed short   OY;
-    vector unsigned short CSHIFT;
-    vector signed short  *vYCoeffsBank, *vCCoeffsBank;
-#endif
-
-#if ARCH_BFIN
-    DECLARE_ALIGNED(4, uint32_t, oy);
-    DECLARE_ALIGNED(4, uint32_t, oc);
-    DECLARE_ALIGNED(4, uint32_t, zero);
-    DECLARE_ALIGNED(4, uint32_t, cy);
-    DECLARE_ALIGNED(4, uint32_t, crv);
-    DECLARE_ALIGNED(4, uint32_t, rmask);
-    DECLARE_ALIGNED(4, uint32_t, cbu);
-    DECLARE_ALIGNED(4, uint32_t, bmask);
-    DECLARE_ALIGNED(4, uint32_t, cgu);
-    DECLARE_ALIGNED(4, uint32_t, cgv);
-    DECLARE_ALIGNED(4, uint32_t, gmask);
-#endif
-
-#if HAVE_VIS
-    DECLARE_ALIGNED(8, uint64_t, sparc_coeffs)[10];
-#endif
-    int use_mmx_vfilter;
-
-/* pre defined color-spaces gamma */
-#define XYZ_GAMMA (2.6f)
-#define RGB_GAMMA (2.2f)
-    int16_t *xyzgamma;
-    int16_t *rgbgamma;
-    int16_t *xyzgammainv;
-    int16_t *rgbgammainv;
-    int16_t xyz2rgb_matrix[3][4];
-    int16_t rgb2xyz_matrix[3][4];
-
-    /* function pointers for swscale() */
-    yuv2planar1_fn yuv2plane1;
-    yuv2planarX_fn yuv2planeX;
-    yuv2interleavedX_fn yuv2nv12cX;
-    yuv2packed1_fn yuv2packed1;
-    yuv2packed2_fn yuv2packed2;
-    yuv2packedX_fn yuv2packedX;
-    yuv2anyX_fn yuv2anyX;
-
-    /// Unscaled conversion of luma plane to YV12 for horizontal scaler.
-    void (*lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
-                      int width, uint32_t *pal);
-    /// Unscaled conversion of alpha plane to YV12 for horizontal scaler.
-    void (*alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
-                      int width, uint32_t *pal);
-    /// Unscaled conversion of chroma planes to YV12 for horizontal scaler.
-    void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV,
-                      const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
-                      int width, uint32_t *pal);
-
-    /**
-     * Functions to read planar input, such as planar RGB, and convert
-     * internally to Y/UV/A.
-     */
-    /** @{ */
-    void (*readLumPlanar)(uint8_t *dst, const uint8_t *src[4], int width, int32_t *rgb2yuv);
-    void (*readChrPlanar)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4],
-                          int width, int32_t *rgb2yuv);
-    void (*readAlpPlanar)(uint8_t *dst, const uint8_t *src[4], int width, int32_t *rgb2yuv);
-    /** @} */
-
-    /**
-     * Scale one horizontal line of input data using a bilinear filter
-     * to produce one line of output data. Compared to SwsContext->hScale(),
-     * please take note of the following caveats when using these:
-     * - Scaling is done using only 7bit instead of 14bit coefficients.
-     * - You can use no more than 5 input pixels to produce 4 output
-     *   pixels. Therefore, this filter should not be used for downscaling
-     *   by more than ~20% in width (because that equals more than 5/4th
-     *   downscaling and thus more than 5 pixels input per 4 pixels output).
-     * - In general, bilinear filters create artifacts during downscaling
-     *   (even when <20%), because one output pixel will span more than one
-     *   input pixel, and thus some pixels will need edges of both neighbor
-     *   pixels to interpolate the output pixel. Since you can use at most
-     *   two input pixels per output pixel in bilinear scaling, this is
-     *   impossible and thus downscaling by any size will create artifacts.
-     * To enable this type of scaling, set SWS_FLAG_FAST_BILINEAR
-     * in SwsContext->flags.
-     */
-    /** @{ */
-    void (*hyscale_fast)(struct SwsContext *c,
-                         int16_t *dst, int dstWidth,
-                         const uint8_t *src, int srcW, int xInc);
-    void (*hcscale_fast)(struct SwsContext *c,
-                         int16_t *dst1, int16_t *dst2, int dstWidth,
-                         const uint8_t *src1, const uint8_t *src2,
-                         int srcW, int xInc);
-    /** @} */
-
-    /**
-     * Scale one horizontal line of input data using a filter over the input
-     * lines, to produce one (differently sized) line of output data.
-     *
-     * @param dst        pointer to destination buffer for horizontally scaled
-     *                   data. If the number of bits per component of one
-     *                   destination pixel (SwsContext->dstBpc) is <= 10, data
-     *                   will be 15bpc in 16bits (int16_t) width. Else (i.e.
-     *                   SwsContext->dstBpc == 16), data will be 19bpc in
-     *                   32bits (int32_t) width.
-     * @param dstW       width of destination image
-     * @param src        pointer to source data to be scaled. If the number of
-     *                   bits per component of a source pixel (SwsContext->srcBpc)
-     *                   is 8, this is 8bpc in 8bits (uint8_t) width. Else
-     *                   (i.e. SwsContext->dstBpc > 8), this is native depth
-     *                   in 16bits (uint16_t) width. In other words, for 9-bit
-     *                   YUV input, this is 9bpc, for 10-bit YUV input, this is
-     *                   10bpc, and for 16-bit RGB or YUV, this is 16bpc.
-     * @param filter     filter coefficients to be used per output pixel for
-     *                   scaling. This contains 14bpp filtering coefficients.
-     *                   Guaranteed to contain dstW * filterSize entries.
-     * @param filterPos  position of the first input pixel to be used for
-     *                   each output pixel during scaling. Guaranteed to
-     *                   contain dstW entries.
-     * @param filterSize the number of input coefficients to be used (and
-     *                   thus the number of input pixels to be used) for
-     *                   creating a single output pixel. Is aligned to 4
-     *                   (and input coefficients thus padded with zeroes)
-     *                   to simplify creating SIMD code.
-     */
-    /** @{ */
-    void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW,
-                    const uint8_t *src, const int16_t *filter,
-                    const int32_t *filterPos, int filterSize);
-    void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW,
-                    const uint8_t *src, const int16_t *filter,
-                    const int32_t *filterPos, int filterSize);
-    /** @} */
-
-    /// Color range conversion function for luma plane if needed.
-    void (*lumConvertRange)(int16_t *dst, int width);
-    /// Color range conversion function for chroma planes if needed.
-    void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width);
-
-    int needs_hcscale; ///< Set if there are chroma planes to be converted.
-
-    SwsDither dither;
-} SwsContext;
-//FIXME check init (where 0)
-
-SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c);
-int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
-                             int fullRange, int brightness,
-                             int contrast, int saturation);
-void ff_yuv2rgb_init_tables_ppc(SwsContext *c, const int inv_table[4],
-                                int brightness, int contrast, int saturation);
-
-void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex,
-                           int lastInLumBuf, int lastInChrBuf);
-
-SwsFunc ff_yuv2rgb_init_x86(SwsContext *c);
-SwsFunc ff_yuv2rgb_init_vis(SwsContext *c);
-SwsFunc ff_yuv2rgb_init_ppc(SwsContext *c);
-SwsFunc ff_yuv2rgb_init_bfin(SwsContext *c);
-
-#if FF_API_SWS_FORMAT_NAME
-/**
- * @deprecated Use av_get_pix_fmt_name() instead.
- */
-attribute_deprecated
-const char *sws_format_name(enum AVPixelFormat format);
-#endif
-
-static av_always_inline int is16BPS(enum AVPixelFormat pix_fmt)
-{
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
-    av_assert0(desc);
-    return desc->comp[0].depth_minus1 == 15;
-}
-
-static av_always_inline int is9_OR_10BPS(enum AVPixelFormat pix_fmt)
-{
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
-    av_assert0(desc);
-    return desc->comp[0].depth_minus1 >= 8 && desc->comp[0].depth_minus1 <= 13;
-}
-
-#define isNBPS(x) is9_OR_10BPS(x)
-
-static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
-{
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
-    av_assert0(desc);
-    return desc->flags & AV_PIX_FMT_FLAG_BE;
-}
-
-static av_always_inline int isYUV(enum AVPixelFormat pix_fmt)
-{
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
-    av_assert0(desc);
-    return !(desc->flags & AV_PIX_FMT_FLAG_RGB) && desc->nb_components >= 2;
-}
-
-static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt)
-{
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
-    av_assert0(desc);
-    return ((desc->flags & AV_PIX_FMT_FLAG_PLANAR) && isYUV(pix_fmt));
-}
-
-static av_always_inline int isRGB(enum AVPixelFormat pix_fmt)
-{
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
-    av_assert0(desc);
-    return (desc->flags & AV_PIX_FMT_FLAG_RGB);
-}
-
-#if 0 // FIXME
-#define isGray(x) \
-    (!(av_pix_fmt_desc_get(x)->flags & AV_PIX_FMT_FLAG_PAL) && \
-     av_pix_fmt_desc_get(x)->nb_components <= 2)
-#else
-#define isGray(x)                      \
-    ((x) == AV_PIX_FMT_GRAY8       ||  \
-     (x) == AV_PIX_FMT_Y400A       ||  \
-     (x) == AV_PIX_FMT_GRAY16BE    ||  \
-     (x) == AV_PIX_FMT_GRAY16LE)
-#endif
-
-#define isRGBinInt(x) \
-    (           \
-     (x) == AV_PIX_FMT_RGB48BE     ||  \
-     (x) == AV_PIX_FMT_RGB48LE     ||  \
-     (x) == AV_PIX_FMT_RGBA64BE    ||  \
-     (x) == AV_PIX_FMT_RGBA64LE    ||  \
-     (x) == AV_PIX_FMT_RGB32       ||  \
-     (x) == AV_PIX_FMT_RGB32_1     ||  \
-     (x) == AV_PIX_FMT_RGB24       ||  \
-     (x) == AV_PIX_FMT_RGB565BE    ||  \
-     (x) == AV_PIX_FMT_RGB565LE    ||  \
-     (x) == AV_PIX_FMT_RGB555BE    ||  \
-     (x) == AV_PIX_FMT_RGB555LE    ||  \
-     (x) == AV_PIX_FMT_RGB444BE    ||  \
-     (x) == AV_PIX_FMT_RGB444LE    ||  \
-     (x) == AV_PIX_FMT_RGB8        ||  \
-     (x) == AV_PIX_FMT_RGB4        ||  \
-     (x) == AV_PIX_FMT_RGB4_BYTE   ||  \
-     (x) == AV_PIX_FMT_MONOBLACK   ||  \
-     (x) == AV_PIX_FMT_MONOWHITE   \
-    )
-#define isBGRinInt(x) \
-    (           \
-     (x) == AV_PIX_FMT_BGR48BE     ||  \
-     (x) == AV_PIX_FMT_BGR48LE     ||  \
-     (x) == AV_PIX_FMT_BGRA64BE    ||  \
-     (x) == AV_PIX_FMT_BGRA64LE    ||  \
-     (x) == AV_PIX_FMT_BGR32       ||  \
-     (x) == AV_PIX_FMT_BGR32_1     ||  \
-     (x) == AV_PIX_FMT_BGR24       ||  \
-     (x) == AV_PIX_FMT_BGR565BE    ||  \
-     (x) == AV_PIX_FMT_BGR565LE    ||  \
-     (x) == AV_PIX_FMT_BGR555BE    ||  \
-     (x) == AV_PIX_FMT_BGR555LE    ||  \
-     (x) == AV_PIX_FMT_BGR444BE    ||  \
-     (x) == AV_PIX_FMT_BGR444LE    ||  \
-     (x) == AV_PIX_FMT_BGR8        ||  \
-     (x) == AV_PIX_FMT_BGR4        ||  \
-     (x) == AV_PIX_FMT_BGR4_BYTE   ||  \
-     (x) == AV_PIX_FMT_MONOBLACK   ||  \
-     (x) == AV_PIX_FMT_MONOWHITE   \
-    )
-
-#define isRGBinBytes(x) (           \
-           (x) == AV_PIX_FMT_RGB48BE     \
-        || (x) == AV_PIX_FMT_RGB48LE     \
-        || (x) == AV_PIX_FMT_RGBA64BE    \
-        || (x) == AV_PIX_FMT_RGBA64LE    \
-        || (x) == AV_PIX_FMT_RGBA        \
-        || (x) == AV_PIX_FMT_ARGB        \
-        || (x) == AV_PIX_FMT_RGB24       \
-    )
-#define isBGRinBytes(x) (           \
-           (x) == AV_PIX_FMT_BGR48BE     \
-        || (x) == AV_PIX_FMT_BGR48LE     \
-        || (x) == AV_PIX_FMT_BGRA64BE    \
-        || (x) == AV_PIX_FMT_BGRA64LE    \
-        || (x) == AV_PIX_FMT_BGRA        \
-        || (x) == AV_PIX_FMT_ABGR        \
-        || (x) == AV_PIX_FMT_BGR24       \
-    )
-
-#define isAnyRGB(x) \
-    (           \
-          isRGBinInt(x)       ||    \
-          isBGRinInt(x)       ||    \
-          isRGB(x)      \
-    )
-
-static av_always_inline int isALPHA(enum AVPixelFormat pix_fmt)
-{
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
-    av_assert0(desc);
-    if (pix_fmt == AV_PIX_FMT_PAL8)
-        return 1;
-    return desc->flags & AV_PIX_FMT_FLAG_ALPHA;
-}
-
-#if 1
-#define isPacked(x)         (       \
-           (x)==AV_PIX_FMT_PAL8        \
-        || (x)==AV_PIX_FMT_YUYV422     \
-        || (x)==AV_PIX_FMT_UYVY422     \
-        || (x)==AV_PIX_FMT_Y400A       \
-        ||  isRGBinInt(x)           \
-        ||  isBGRinInt(x)           \
-    )
-#else
-static av_always_inline int isPacked(enum AVPixelFormat pix_fmt)
-{
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
-    av_assert0(desc);
-    return ((desc->nb_components >= 2 && !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) ||
-            pix_fmt == AV_PIX_FMT_PAL8);
-}
-
-#endif
-static av_always_inline int isPlanar(enum AVPixelFormat pix_fmt)
-{
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
-    av_assert0(desc);
-    return (desc->nb_components >= 2 && (desc->flags & AV_PIX_FMT_FLAG_PLANAR));
-}
-
-static av_always_inline int isPackedRGB(enum AVPixelFormat pix_fmt)
-{
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
-    av_assert0(desc);
-    return ((desc->flags & (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB)) == AV_PIX_FMT_FLAG_RGB);
-}
-
-static av_always_inline int isPlanarRGB(enum AVPixelFormat pix_fmt)
-{
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
-    av_assert0(desc);
-    return ((desc->flags & (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB)) ==
-            (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB));
-}
-
-static av_always_inline int usePal(enum AVPixelFormat pix_fmt)
-{
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
-    av_assert0(desc);
-    return (desc->flags & AV_PIX_FMT_FLAG_PAL) || (desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL);
-}
-
-extern const uint64_t ff_dither4[2];
-extern const uint64_t ff_dither8[2];
-
-extern const uint8_t ff_dither_2x2_4[3][8];
-extern const uint8_t ff_dither_2x2_8[3][8];
-extern const uint8_t ff_dither_4x4_16[5][8];
-extern const uint8_t ff_dither_8x8_32[9][8];
-extern const uint8_t ff_dither_8x8_73[9][8];
-extern const uint8_t ff_dither_8x8_128[9][8];
-extern const uint8_t ff_dither_8x8_220[9][8];
-
-extern const int32_t ff_yuv2rgb_coeffs[8][4];
-
-extern const AVClass sws_context_class;
-
-/**
- * Set c->swscale to an unscaled converter if one exists for the specific
- * source and destination formats, bit depths, flags, etc.
- */
-void ff_get_unscaled_swscale(SwsContext *c);
-void ff_get_unscaled_swscale_bfin(SwsContext *c);
-void ff_get_unscaled_swscale_ppc(SwsContext *c);
-void ff_get_unscaled_swscale_arm(SwsContext *c);
-
-/**
- * Return function pointer to fastest main scaler path function depending
- * on architecture and available optimizations.
- */
-SwsFunc ff_getSwsFunc(SwsContext *c);
-
-void ff_sws_init_input_funcs(SwsContext *c);
-void ff_sws_init_output_funcs(SwsContext *c,
-                              yuv2planar1_fn *yuv2plane1,
-                              yuv2planarX_fn *yuv2planeX,
-                              yuv2interleavedX_fn *yuv2nv12cX,
-                              yuv2packed1_fn *yuv2packed1,
-                              yuv2packed2_fn *yuv2packed2,
-                              yuv2packedX_fn *yuv2packedX,
-                              yuv2anyX_fn *yuv2anyX);
-void ff_sws_init_swscale_ppc(SwsContext *c);
-void ff_sws_init_swscale_x86(SwsContext *c);
-
-static inline void fillPlane16(uint8_t *plane, int stride, int width, int height, int y,
-                               int alpha, int bits, const int big_endian)
-{
-    int i, j;
-    uint8_t *ptr = plane + stride * y;
-    int v = alpha ? 0xFFFF>>(15-bits) : (1<<bits);
-    for (i = 0; i < height; i++) {
-#define FILL(wfunc) \
-        for (j = 0; j < width; j++) {\
-            wfunc(ptr+2*j, v);\
-        }
-        if (big_endian) {
-            FILL(AV_WB16);
-        } else {
-            FILL(AV_WL16);
-        }
-        ptr += stride;
-    }
-}
-
-#endif /* SWSCALE_SWSCALE_INTERNAL_H */
diff --git a/ffmpeg/libswscale/swscale_unscaled.c b/ffmpeg/libswscale/swscale_unscaled.c
deleted file mode 100644
index ccf9980..0000000
--- a/ffmpeg/libswscale/swscale_unscaled.c
+++ /dev/null
@@ -1,1415 +0,0 @@
-/*
- * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <inttypes.h>
-#include <string.h>
-#include <math.h>
-#include <stdio.h>
-#include "config.h"
-#include "swscale.h"
-#include "swscale_internal.h"
-#include "rgb2rgb.h"
-#include "libavutil/intreadwrite.h"
-#include "libavutil/cpu.h"
-#include "libavutil/avutil.h"
-#include "libavutil/mathematics.h"
-#include "libavutil/bswap.h"
-#include "libavutil/pixdesc.h"
-#include "libavutil/avassert.h"
-
-DECLARE_ALIGNED(8, static const uint8_t, dithers)[8][8][8]={
-{
-  {   0,  1,  0,  1,  0,  1,  0,  1,},
-  {   1,  0,  1,  0,  1,  0,  1,  0,},
-  {   0,  1,  0,  1,  0,  1,  0,  1,},
-  {   1,  0,  1,  0,  1,  0,  1,  0,},
-  {   0,  1,  0,  1,  0,  1,  0,  1,},
-  {   1,  0,  1,  0,  1,  0,  1,  0,},
-  {   0,  1,  0,  1,  0,  1,  0,  1,},
-  {   1,  0,  1,  0,  1,  0,  1,  0,},
-},{
-  {   1,  2,  1,  2,  1,  2,  1,  2,},
-  {   3,  0,  3,  0,  3,  0,  3,  0,},
-  {   1,  2,  1,  2,  1,  2,  1,  2,},
-  {   3,  0,  3,  0,  3,  0,  3,  0,},
-  {   1,  2,  1,  2,  1,  2,  1,  2,},
-  {   3,  0,  3,  0,  3,  0,  3,  0,},
-  {   1,  2,  1,  2,  1,  2,  1,  2,},
-  {   3,  0,  3,  0,  3,  0,  3,  0,},
-},{
-  {   2,  4,  3,  5,  2,  4,  3,  5,},
-  {   6,  0,  7,  1,  6,  0,  7,  1,},
-  {   3,  5,  2,  4,  3,  5,  2,  4,},
-  {   7,  1,  6,  0,  7,  1,  6,  0,},
-  {   2,  4,  3,  5,  2,  4,  3,  5,},
-  {   6,  0,  7,  1,  6,  0,  7,  1,},
-  {   3,  5,  2,  4,  3,  5,  2,  4,},
-  {   7,  1,  6,  0,  7,  1,  6,  0,},
-},{
-  {   4,  8,  7, 11,  4,  8,  7, 11,},
-  {  12,  0, 15,  3, 12,  0, 15,  3,},
-  {   6, 10,  5,  9,  6, 10,  5,  9,},
-  {  14,  2, 13,  1, 14,  2, 13,  1,},
-  {   4,  8,  7, 11,  4,  8,  7, 11,},
-  {  12,  0, 15,  3, 12,  0, 15,  3,},
-  {   6, 10,  5,  9,  6, 10,  5,  9,},
-  {  14,  2, 13,  1, 14,  2, 13,  1,},
-},{
-  {   9, 17, 15, 23,  8, 16, 14, 22,},
-  {  25,  1, 31,  7, 24,  0, 30,  6,},
-  {  13, 21, 11, 19, 12, 20, 10, 18,},
-  {  29,  5, 27,  3, 28,  4, 26,  2,},
-  {   8, 16, 14, 22,  9, 17, 15, 23,},
-  {  24,  0, 30,  6, 25,  1, 31,  7,},
-  {  12, 20, 10, 18, 13, 21, 11, 19,},
-  {  28,  4, 26,  2, 29,  5, 27,  3,},
-},{
-  {  18, 34, 30, 46, 17, 33, 29, 45,},
-  {  50,  2, 62, 14, 49,  1, 61, 13,},
-  {  26, 42, 22, 38, 25, 41, 21, 37,},
-  {  58, 10, 54,  6, 57,  9, 53,  5,},
-  {  16, 32, 28, 44, 19, 35, 31, 47,},
-  {  48,  0, 60, 12, 51,  3, 63, 15,},
-  {  24, 40, 20, 36, 27, 43, 23, 39,},
-  {  56,  8, 52,  4, 59, 11, 55,  7,},
-},{
-  {  18, 34, 30, 46, 17, 33, 29, 45,},
-  {  50,  2, 62, 14, 49,  1, 61, 13,},
-  {  26, 42, 22, 38, 25, 41, 21, 37,},
-  {  58, 10, 54,  6, 57,  9, 53,  5,},
-  {  16, 32, 28, 44, 19, 35, 31, 47,},
-  {  48,  0, 60, 12, 51,  3, 63, 15,},
-  {  24, 40, 20, 36, 27, 43, 23, 39,},
-  {  56,  8, 52,  4, 59, 11, 55,  7,},
-},{
-  {  36, 68, 60, 92, 34, 66, 58, 90,},
-  { 100,  4,124, 28, 98,  2,122, 26,},
-  {  52, 84, 44, 76, 50, 82, 42, 74,},
-  { 116, 20,108, 12,114, 18,106, 10,},
-  {  32, 64, 56, 88, 38, 70, 62, 94,},
-  {  96,  0,120, 24,102,  6,126, 30,},
-  {  48, 80, 40, 72, 54, 86, 46, 78,},
-  { 112, 16,104,  8,118, 22,110, 14,},
-}};
-
-static const uint16_t dither_scale[15][16]={
-{    2,    3,    3,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,},
-{    2,    3,    7,    7,   13,   13,   25,   25,   25,   25,   25,   25,   25,   25,   25,   25,},
-{    3,    3,    4,   15,   15,   29,   57,   57,   57,  113,  113,  113,  113,  113,  113,  113,},
-{    3,    4,    4,    5,   31,   31,   61,  121,  241,  241,  241,  241,  481,  481,  481,  481,},
-{    3,    4,    5,    5,    6,   63,   63,  125,  249,  497,  993,  993,  993,  993,  993, 1985,},
-{    3,    5,    6,    6,    6,    7,  127,  127,  253,  505, 1009, 2017, 4033, 4033, 4033, 4033,},
-{    3,    5,    6,    7,    7,    7,    8,  255,  255,  509, 1017, 2033, 4065, 8129,16257,16257,},
-{    3,    5,    6,    8,    8,    8,    8,    9,  511,  511, 1021, 2041, 4081, 8161,16321,32641,},
-{    3,    5,    7,    8,    9,    9,    9,    9,   10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
-{    3,    5,    7,    8,   10,   10,   10,   10,   10,   11, 2047, 2047, 4093, 8185,16369,32737,},
-{    3,    5,    7,    8,   10,   11,   11,   11,   11,   11,   12, 4095, 4095, 8189,16377,32753,},
-{    3,    5,    7,    9,   10,   12,   12,   12,   12,   12,   12,   13, 8191, 8191,16381,32761,},
-{    3,    5,    7,    9,   10,   12,   13,   13,   13,   13,   13,   13,   14,16383,16383,32765,},
-{    3,    5,    7,    9,   10,   12,   14,   14,   14,   14,   14,   14,   14,   15,32767,32767,},
-{    3,    5,    7,    9,   11,   12,   14,   15,   15,   15,   15,   15,   15,   15,   16,65535,},
-};
-
-
-static void fillPlane(uint8_t *plane, int stride, int width, int height, int y,
-                      uint8_t val)
-{
-    int i;
-    uint8_t *ptr = plane + stride * y;
-    for (i = 0; i < height; i++) {
-        memset(ptr, val, width);
-        ptr += stride;
-    }
-}
-
-static void copyPlane(const uint8_t *src, int srcStride,
-                      int srcSliceY, int srcSliceH, int width,
-                      uint8_t *dst, int dstStride)
-{
-    dst += dstStride * srcSliceY;
-    if (dstStride == srcStride && srcStride > 0) {
-        memcpy(dst, src, srcSliceH * dstStride);
-    } else {
-        int i;
-        for (i = 0; i < srcSliceH; i++) {
-            memcpy(dst, src, width);
-            src += srcStride;
-            dst += dstStride;
-        }
-    }
-}
-
-static int planarToNv12Wrapper(SwsContext *c, const uint8_t *src[],
-                               int srcStride[], int srcSliceY,
-                               int srcSliceH, uint8_t *dstParam[],
-                               int dstStride[])
-{
-    uint8_t *dst = dstParam[1] + dstStride[1] * srcSliceY / 2;
-
-    copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW,
-              dstParam[0], dstStride[0]);
-
-    if (c->dstFormat == AV_PIX_FMT_NV12)
-        interleaveBytes(src[1], src[2], dst, c->srcW / 2, srcSliceH / 2,
-                        srcStride[1], srcStride[2], dstStride[1]);
-    else
-        interleaveBytes(src[2], src[1], dst, c->srcW / 2, srcSliceH / 2,
-                        srcStride[2], srcStride[1], dstStride[1]);
-
-    return srcSliceH;
-}
-
-static int nv12ToPlanarWrapper(SwsContext *c, const uint8_t *src[],
-                               int srcStride[], int srcSliceY,
-                               int srcSliceH, uint8_t *dstParam[],
-                               int dstStride[])
-{
-    uint8_t *dst1 = dstParam[1] + dstStride[1] * srcSliceY / 2;
-    uint8_t *dst2 = dstParam[2] + dstStride[2] * srcSliceY / 2;
-
-    copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW,
-              dstParam[0], dstStride[0]);
-
-    if (c->srcFormat == AV_PIX_FMT_NV12)
-        deinterleaveBytes(src[1], dst1, dst2,c->srcW / 2, srcSliceH / 2,
-                          srcStride[1], dstStride[1], dstStride[2]);
-    else
-        deinterleaveBytes(src[1], dst2, dst1, c->srcW / 2, srcSliceH / 2,
-                          srcStride[1], dstStride[2], dstStride[1]);
-
-    return srcSliceH;
-}
-
-static int planarToYuy2Wrapper(SwsContext *c, const uint8_t *src[],
-                               int srcStride[], int srcSliceY, int srcSliceH,
-                               uint8_t *dstParam[], int dstStride[])
-{
-    uint8_t *dst = dstParam[0] + dstStride[0] * srcSliceY;
-
-    yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0],
-               srcStride[1], dstStride[0]);
-
-    return srcSliceH;
-}
-
-static int planarToUyvyWrapper(SwsContext *c, const uint8_t *src[],
-                               int srcStride[], int srcSliceY, int srcSliceH,
-                               uint8_t *dstParam[], int dstStride[])
-{
-    uint8_t *dst = dstParam[0] + dstStride[0] * srcSliceY;
-
-    yv12touyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0],
-               srcStride[1], dstStride[0]);
-
-    return srcSliceH;
-}
-
-static int yuv422pToYuy2Wrapper(SwsContext *c, const uint8_t *src[],
-                                int srcStride[], int srcSliceY, int srcSliceH,
-                                uint8_t *dstParam[], int dstStride[])
-{
-    uint8_t *dst = dstParam[0] + dstStride[0] * srcSliceY;
-
-    yuv422ptoyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0],
-                  srcStride[1], dstStride[0]);
-
-    return srcSliceH;
-}
-
-static int yuv422pToUyvyWrapper(SwsContext *c, const uint8_t *src[],
-                                int srcStride[], int srcSliceY, int srcSliceH,
-                                uint8_t *dstParam[], int dstStride[])
-{
-    uint8_t *dst = dstParam[0] + dstStride[0] * srcSliceY;
-
-    yuv422ptouyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0],
-                  srcStride[1], dstStride[0]);
-
-    return srcSliceH;
-}
-
-static int yuyvToYuv420Wrapper(SwsContext *c, const uint8_t *src[],
-                               int srcStride[], int srcSliceY, int srcSliceH,
-                               uint8_t *dstParam[], int dstStride[])
-{
-    uint8_t *ydst = dstParam[0] + dstStride[0] * srcSliceY;
-    uint8_t *udst = dstParam[1] + dstStride[1] * srcSliceY / 2;
-    uint8_t *vdst = dstParam[2] + dstStride[2] * srcSliceY / 2;
-
-    yuyvtoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0],
-                 dstStride[1], srcStride[0]);
-
-    if (dstParam[3])
-        fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
-
-    return srcSliceH;
-}
-
-static int yuyvToYuv422Wrapper(SwsContext *c, const uint8_t *src[],
-                               int srcStride[], int srcSliceY, int srcSliceH,
-                               uint8_t *dstParam[], int dstStride[])
-{
-    uint8_t *ydst = dstParam[0] + dstStride[0] * srcSliceY;
-    uint8_t *udst = dstParam[1] + dstStride[1] * srcSliceY;
-    uint8_t *vdst = dstParam[2] + dstStride[2] * srcSliceY;
-
-    yuyvtoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0],
-                 dstStride[1], srcStride[0]);
-
-    return srcSliceH;
-}
-
-static int uyvyToYuv420Wrapper(SwsContext *c, const uint8_t *src[],
-                               int srcStride[], int srcSliceY, int srcSliceH,
-                               uint8_t *dstParam[], int dstStride[])
-{
-    uint8_t *ydst = dstParam[0] + dstStride[0] * srcSliceY;
-    uint8_t *udst = dstParam[1] + dstStride[1] * srcSliceY / 2;
-    uint8_t *vdst = dstParam[2] + dstStride[2] * srcSliceY / 2;
-
-    uyvytoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0],
-                 dstStride[1], srcStride[0]);
-
-    if (dstParam[3])
-        fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
-
-    return srcSliceH;
-}
-
-static int uyvyToYuv422Wrapper(SwsContext *c, const uint8_t *src[],
-                               int srcStride[], int srcSliceY, int srcSliceH,
-                               uint8_t *dstParam[], int dstStride[])
-{
-    uint8_t *ydst = dstParam[0] + dstStride[0] * srcSliceY;
-    uint8_t *udst = dstParam[1] + dstStride[1] * srcSliceY;
-    uint8_t *vdst = dstParam[2] + dstStride[2] * srcSliceY;
-
-    uyvytoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0],
-                 dstStride[1], srcStride[0]);
-
-    return srcSliceH;
-}
-
-static void gray8aToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels,
-                             const uint8_t *palette)
-{
-    int i;
-    for (i = 0; i < num_pixels; i++)
-        ((uint32_t *) dst)[i] = ((const uint32_t *) palette)[src[i << 1]] | (src[(i << 1) + 1] << 24);
-}
-
-static void gray8aToPacked32_1(const uint8_t *src, uint8_t *dst, int num_pixels,
-                               const uint8_t *palette)
-{
-    int i;
-
-    for (i = 0; i < num_pixels; i++)
-        ((uint32_t *) dst)[i] = ((const uint32_t *) palette)[src[i << 1]] | src[(i << 1) + 1];
-}
-
-static void gray8aToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels,
-                             const uint8_t *palette)
-{
-    int i;
-
-    for (i = 0; i < num_pixels; i++) {
-        //FIXME slow?
-        dst[0] = palette[src[i << 1] * 4 + 0];
-        dst[1] = palette[src[i << 1] * 4 + 1];
-        dst[2] = palette[src[i << 1] * 4 + 2];
-        dst += 3;
-    }
-}
-
-static int packed_16bpc_bswap(SwsContext *c, const uint8_t *src[],
-                              int srcStride[], int srcSliceY, int srcSliceH,
-                              uint8_t *dst[], int dstStride[])
-{
-    int i, j, p;
-
-    for (p = 0; p < 4; p++) {
-        int srcstr = srcStride[p] / 2;
-        int dststr = dstStride[p] / 2;
-        uint16_t       *dstPtr =       (uint16_t *) dst[p];
-        const uint16_t *srcPtr = (const uint16_t *) src[p];
-        int min_stride         = FFMIN(FFABS(srcstr), FFABS(dststr));
-        if(!dstPtr || !srcPtr)
-            continue;
-        for (i = 0; i < (srcSliceH >> c->chrDstVSubSample); i++) {
-            for (j = 0; j < min_stride; j++) {
-                dstPtr[j] = av_bswap16(srcPtr[j]);
-            }
-            srcPtr += srcstr;
-            dstPtr += dststr;
-        }
-    }
-
-    return srcSliceH;
-}
-
-static int palToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[],
-                           int srcSliceY, int srcSliceH, uint8_t *dst[],
-                           int dstStride[])
-{
-    const enum AVPixelFormat srcFormat = c->srcFormat;
-    const enum AVPixelFormat dstFormat = c->dstFormat;
-    void (*conv)(const uint8_t *src, uint8_t *dst, int num_pixels,
-                 const uint8_t *palette) = NULL;
-    int i;
-    uint8_t *dstPtr = dst[0] + dstStride[0] * srcSliceY;
-    const uint8_t *srcPtr = src[0];
-
-    if (srcFormat == AV_PIX_FMT_GRAY8A) {
-        switch (dstFormat) {
-        case AV_PIX_FMT_RGB32  : conv = gray8aToPacked32; break;
-        case AV_PIX_FMT_BGR32  : conv = gray8aToPacked32; break;
-        case AV_PIX_FMT_BGR32_1: conv = gray8aToPacked32_1; break;
-        case AV_PIX_FMT_RGB32_1: conv = gray8aToPacked32_1; break;
-        case AV_PIX_FMT_RGB24  : conv = gray8aToPacked24; break;
-        case AV_PIX_FMT_BGR24  : conv = gray8aToPacked24; break;
-        }
-    } else if (usePal(srcFormat)) {
-        switch (dstFormat) {
-        case AV_PIX_FMT_RGB32  : conv = sws_convertPalette8ToPacked32; break;
-        case AV_PIX_FMT_BGR32  : conv = sws_convertPalette8ToPacked32; break;
-        case AV_PIX_FMT_BGR32_1: conv = sws_convertPalette8ToPacked32; break;
-        case AV_PIX_FMT_RGB32_1: conv = sws_convertPalette8ToPacked32; break;
-        case AV_PIX_FMT_RGB24  : conv = sws_convertPalette8ToPacked24; break;
-        case AV_PIX_FMT_BGR24  : conv = sws_convertPalette8ToPacked24; break;
-        }
-    }
-
-    if (!conv)
-        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
-               av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat));
-    else {
-        for (i = 0; i < srcSliceH; i++) {
-            conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb);
-            srcPtr += srcStride[0];
-            dstPtr += dstStride[0];
-        }
-    }
-
-    return srcSliceH;
-}
-
-static void gbr16ptopacked16(const uint16_t *src[], int srcStride[],
-                             uint8_t *dst, int dstStride, int srcSliceH,
-                             int alpha, int swap, int bpp, int width)
-{
-    int x, h, i;
-    int src_alpha = src[3] != NULL;
-    int scale_high = 16 - bpp, scale_low = (bpp - 8) * 2;
-    for (h = 0; h < srcSliceH; h++) {
-        uint16_t *dest = (uint16_t *)(dst + dstStride * h);
-        uint16_t component;
-
-        switch(swap) {
-        case 3:
-            if (alpha && !src_alpha) {
-                for (x = 0; x < width; x++) {
-                    component = av_bswap16(src[0][x]);
-                    *dest++ = av_bswap16(component << scale_high | component >> scale_low);
-                    component = av_bswap16(src[1][x]);
-                    *dest++ = av_bswap16(component << scale_high | component >> scale_low);
-                    component = av_bswap16(src[2][x]);
-                    *dest++ = av_bswap16(component << scale_high | component >> scale_low);
-                    *dest++ = 0xffff;
-                }
-            } else if (alpha && src_alpha) {
-                for (x = 0; x < width; x++) {
-                    component = av_bswap16(src[0][x]);
-                    *dest++ = av_bswap16(component << scale_high | component >> scale_low);
-                    component = av_bswap16(src[1][x]);
-                    *dest++ = av_bswap16(component << scale_high | component >> scale_low);
-                    component = av_bswap16(src[2][x]);
-                    *dest++ = av_bswap16(component << scale_high | component >> scale_low);
-                    component = av_bswap16(src[3][x]);
-                    *dest++ = av_bswap16(component << scale_high | component >> scale_low);
-                }
-            } else {
-                for (x = 0; x < width; x++) {
-                    component = av_bswap16(src[0][x]);
-                    *dest++ = av_bswap16(component << scale_high | component >> scale_low);
-                    component = av_bswap16(src[1][x]);
-                    *dest++ = av_bswap16(component << scale_high | component >> scale_low);
-                    component = av_bswap16(src[2][x]);
-                    *dest++ = av_bswap16(component << scale_high | component >> scale_low);
-                }
-            }
-            break;
-        case 2:
-            if (alpha && !src_alpha) {
-                for (x = 0; x < width; x++) {
-                    *dest++ = av_bswap16(src[0][x] << scale_high | src[0][x] >> scale_low);
-                    *dest++ = av_bswap16(src[1][x] << scale_high | src[1][x] >> scale_low);
-                    *dest++ = av_bswap16(src[2][x] << scale_high | src[2][x] >> scale_low);
-                    *dest++ = 0xffff;
-                }
-            } else if (alpha && src_alpha) {
-                for (x = 0; x < width; x++) {
-                    *dest++ = av_bswap16(src[0][x] << scale_high | src[0][x] >> scale_low);
-                    *dest++ = av_bswap16(src[1][x] << scale_high | src[1][x] >> scale_low);
-                    *dest++ = av_bswap16(src[2][x] << scale_high | src[2][x] >> scale_low);
-                    *dest++ = av_bswap16(src[3][x] << scale_high | src[3][x] >> scale_low);
-                }
-            } else {
-                for (x = 0; x < width; x++) {
-                    *dest++ = av_bswap16(src[0][x] << scale_high | src[0][x] >> scale_low);
-                    *dest++ = av_bswap16(src[1][x] << scale_high | src[1][x] >> scale_low);
-                    *dest++ = av_bswap16(src[2][x] << scale_high | src[2][x] >> scale_low);
-                }
-            }
-            break;
-        case 1:
-            if (alpha && !src_alpha) {
-                for (x = 0; x < width; x++) {
-                    *dest++ = av_bswap16(src[0][x]) << scale_high | av_bswap16(src[0][x]) >> scale_low;
-                    *dest++ = av_bswap16(src[1][x]) << scale_high | av_bswap16(src[1][x]) >> scale_low;
-                    *dest++ = av_bswap16(src[2][x]) << scale_high | av_bswap16(src[2][x]) >> scale_low;
-                    *dest++ = 0xffff;
-                }
-            } else if (alpha && src_alpha) {
-                for (x = 0; x < width; x++) {
-                    *dest++ = av_bswap16(src[0][x]) << scale_high | av_bswap16(src[0][x]) >> scale_low;
-                    *dest++ = av_bswap16(src[1][x]) << scale_high | av_bswap16(src[1][x]) >> scale_low;
-                    *dest++ = av_bswap16(src[2][x]) << scale_high | av_bswap16(src[2][x]) >> scale_low;
-                    *dest++ = av_bswap16(src[3][x]) << scale_high | av_bswap16(src[3][x]) >> scale_low;
-                }
-            } else {
-                for (x = 0; x < width; x++) {
-                    *dest++ = av_bswap16(src[0][x]) << scale_high | av_bswap16(src[0][x]) >> scale_low;
-                    *dest++ = av_bswap16(src[1][x]) << scale_high | av_bswap16(src[1][x]) >> scale_low;
-                    *dest++ = av_bswap16(src[2][x]) << scale_high | av_bswap16(src[2][x]) >> scale_low;
-                }
-            }
-            break;
-        default:
-            if (alpha && !src_alpha) {
-                for (x = 0; x < width; x++) {
-                    *dest++ = src[0][x] << scale_high | src[0][x] >> scale_low;
-                    *dest++ = src[1][x] << scale_high | src[1][x] >> scale_low;
-                    *dest++ = src[2][x] << scale_high | src[2][x] >> scale_low;
-                    *dest++ = 0xffff;
-                }
-            } else if (alpha && src_alpha) {
-                for (x = 0; x < width; x++) {
-                    *dest++ = src[0][x] << scale_high | src[0][x] >> scale_low;
-                    *dest++ = src[1][x] << scale_high | src[1][x] >> scale_low;
-                    *dest++ = src[2][x] << scale_high | src[2][x] >> scale_low;
-                    *dest++ = src[3][x] << scale_high | src[3][x] >> scale_low;
-                }
-            } else {
-                for (x = 0; x < width; x++) {
-                    *dest++ = src[0][x] << scale_high | src[0][x] >> scale_low;
-                    *dest++ = src[1][x] << scale_high | src[1][x] >> scale_low;
-                    *dest++ = src[2][x] << scale_high | src[2][x] >> scale_low;
-                }
-            }
-        }
-        for (i = 0; i < 3 + src_alpha; i++)
-            src[i] += srcStride[i] >> 1;
-    }
-}
-
-static int planarRgb16ToRgb16Wrapper(SwsContext *c, const uint8_t *src[],
-                                     int srcStride[], int srcSliceY, int srcSliceH,
-                                     uint8_t *dst[], int dstStride[])
-{
-    const uint16_t *src102[] = { (uint16_t *)src[1], (uint16_t *)src[0], (uint16_t *)src[2], (uint16_t *)src[3] };
-    const uint16_t *src201[] = { (uint16_t *)src[2], (uint16_t *)src[0], (uint16_t *)src[1], (uint16_t *)src[3] };
-    int stride102[] = { srcStride[1], srcStride[0], srcStride[2], srcStride[3] };
-    int stride201[] = { srcStride[2], srcStride[0], srcStride[1], srcStride[3] };
-    const AVPixFmtDescriptor *src_format = av_pix_fmt_desc_get(c->srcFormat);
-    const AVPixFmtDescriptor *dst_format = av_pix_fmt_desc_get(c->dstFormat);
-    int bits_per_sample = src_format->comp[0].depth_minus1 + 1;
-    int swap = 0;
-    if ( HAVE_BIGENDIAN && !(src_format->flags & AV_PIX_FMT_FLAG_BE) ||
-        !HAVE_BIGENDIAN &&   src_format->flags & AV_PIX_FMT_FLAG_BE)
-        swap++;
-    if ( HAVE_BIGENDIAN && !(dst_format->flags & AV_PIX_FMT_FLAG_BE) ||
-        !HAVE_BIGENDIAN &&   dst_format->flags & AV_PIX_FMT_FLAG_BE)
-        swap += 2;
-
-    if ((src_format->flags & (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB)) !=
-        (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB) ||
-        bits_per_sample <= 8) {
-        av_log(c, AV_LOG_ERROR, "unsupported planar RGB conversion %s -> %s\n",
-               src_format->name, dst_format->name);
-        return srcSliceH;
-    }
-    switch (c->dstFormat) {
-    case AV_PIX_FMT_BGR48LE:
-    case AV_PIX_FMT_BGR48BE:
-        gbr16ptopacked16(src102, stride102,
-                         dst[0] + srcSliceY * dstStride[0], dstStride[0],
-                         srcSliceH, 0, swap, bits_per_sample, c->srcW);
-        break;
-    case AV_PIX_FMT_RGB48LE:
-    case AV_PIX_FMT_RGB48BE:
-        gbr16ptopacked16(src201, stride201,
-                         dst[0] + srcSliceY * dstStride[0], dstStride[0],
-                         srcSliceH, 0, swap, bits_per_sample, c->srcW);
-        break;
-    case AV_PIX_FMT_RGBA64LE:
-    case AV_PIX_FMT_RGBA64BE:
-         gbr16ptopacked16(src201, stride201,
-                          dst[0] + srcSliceY * dstStride[0], dstStride[0],
-                          srcSliceH, 1, swap, bits_per_sample, c->srcW);
-        break;
-    case AV_PIX_FMT_BGRA64LE:
-    case AV_PIX_FMT_BGRA64BE:
-        gbr16ptopacked16(src102, stride102,
-                         dst[0] + srcSliceY * dstStride[0], dstStride[0],
-                         srcSliceH, 1, swap, bits_per_sample, c->srcW);
-        break;
-    default:
-        av_log(c, AV_LOG_ERROR,
-               "unsupported planar RGB conversion %s -> %s\n",
-               src_format->name, dst_format->name);
-    }
-
-    return srcSliceH;
-}
-
-static void gbr24ptopacked24(const uint8_t *src[], int srcStride[],
-                             uint8_t *dst, int dstStride, int srcSliceH,
-                             int width)
-{
-    int x, h, i;
-    for (h = 0; h < srcSliceH; h++) {
-        uint8_t *dest = dst + dstStride * h;
-        for (x = 0; x < width; x++) {
-            *dest++ = src[0][x];
-            *dest++ = src[1][x];
-            *dest++ = src[2][x];
-        }
-
-        for (i = 0; i < 3; i++)
-            src[i] += srcStride[i];
-    }
-}
-
-static void gbr24ptopacked32(const uint8_t *src[], int srcStride[],
-                             uint8_t *dst, int dstStride, int srcSliceH,
-                             int alpha_first, int width)
-{
-    int x, h, i;
-    for (h = 0; h < srcSliceH; h++) {
-        uint8_t *dest = dst + dstStride * h;
-
-        if (alpha_first) {
-            for (x = 0; x < width; x++) {
-                *dest++ = 0xff;
-                *dest++ = src[0][x];
-                *dest++ = src[1][x];
-                *dest++ = src[2][x];
-            }
-        } else {
-            for (x = 0; x < width; x++) {
-                *dest++ = src[0][x];
-                *dest++ = src[1][x];
-                *dest++ = src[2][x];
-                *dest++ = 0xff;
-            }
-        }
-
-        for (i = 0; i < 3; i++)
-            src[i] += srcStride[i];
-    }
-}
-
-static int planarRgbToRgbWrapper(SwsContext *c, const uint8_t *src[],
-                                 int srcStride[], int srcSliceY, int srcSliceH,
-                                 uint8_t *dst[], int dstStride[])
-{
-    int alpha_first = 0;
-    const uint8_t *src102[] = { src[1], src[0], src[2] };
-    const uint8_t *src201[] = { src[2], src[0], src[1] };
-    int stride102[] = { srcStride[1], srcStride[0], srcStride[2] };
-    int stride201[] = { srcStride[2], srcStride[0], srcStride[1] };
-
-    if (c->srcFormat != AV_PIX_FMT_GBRP) {
-        av_log(c, AV_LOG_ERROR, "unsupported planar RGB conversion %s -> %s\n",
-               av_get_pix_fmt_name(c->srcFormat),
-               av_get_pix_fmt_name(c->dstFormat));
-        return srcSliceH;
-    }
-
-    switch (c->dstFormat) {
-    case AV_PIX_FMT_BGR24:
-        gbr24ptopacked24(src102, stride102,
-                         dst[0] + srcSliceY * dstStride[0], dstStride[0],
-                         srcSliceH, c->srcW);
-        break;
-
-    case AV_PIX_FMT_RGB24:
-        gbr24ptopacked24(src201, stride201,
-                         dst[0] + srcSliceY * dstStride[0], dstStride[0],
-                         srcSliceH, c->srcW);
-        break;
-
-    case AV_PIX_FMT_ARGB:
-        alpha_first = 1;
-    case AV_PIX_FMT_RGBA:
-        gbr24ptopacked32(src201, stride201,
-                         dst[0] + srcSliceY * dstStride[0], dstStride[0],
-                         srcSliceH, alpha_first, c->srcW);
-        break;
-
-    case AV_PIX_FMT_ABGR:
-        alpha_first = 1;
-    case AV_PIX_FMT_BGRA:
-        gbr24ptopacked32(src102, stride102,
-                         dst[0] + srcSliceY * dstStride[0], dstStride[0],
-                         srcSliceH, alpha_first, c->srcW);
-        break;
-
-    default:
-        av_log(c, AV_LOG_ERROR,
-               "unsupported planar RGB conversion %s -> %s\n",
-               av_get_pix_fmt_name(c->srcFormat),
-               av_get_pix_fmt_name(c->dstFormat));
-    }
-
-    return srcSliceH;
-}
-
-static int planarRgbToplanarRgbWrapper(SwsContext *c, const uint8_t *src[],
-                                       int srcStride[], int srcSliceY, int srcSliceH,
-                                       uint8_t *dst[], int dstStride[])
-{
-    copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW,
-              dst[0], dstStride[0]);
-    copyPlane(src[1], srcStride[1], srcSliceY, srcSliceH, c->srcW,
-              dst[1], dstStride[1]);
-    copyPlane(src[2], srcStride[2], srcSliceY, srcSliceH, c->srcW,
-              dst[2], dstStride[2]);
-    if (dst[3])
-        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
-
-    return srcSliceH;
-}
-
-static void packedtogbr24p(const uint8_t *src, int srcStride,
-                           uint8_t *dst[], int dstStride[], int srcSliceH,
-                           int alpha_first, int inc_size, int width)
-{
-    uint8_t *dest[3];
-    int x, h;
-
-    dest[0] = dst[0];
-    dest[1] = dst[1];
-    dest[2] = dst[2];
-
-    if (alpha_first)
-        src++;
-
-    for (h = 0; h < srcSliceH; h++) {
-        for (x = 0; x < width; x++) {
-            dest[0][x] = src[0];
-            dest[1][x] = src[1];
-            dest[2][x] = src[2];
-
-            src += inc_size;
-        }
-        src     += srcStride - width * inc_size;
-        dest[0] += dstStride[0];
-        dest[1] += dstStride[1];
-        dest[2] += dstStride[2];
-    }
-}
-
-static int rgbToPlanarRgbWrapper(SwsContext *c, const uint8_t *src[],
-                                 int srcStride[], int srcSliceY, int srcSliceH,
-                                 uint8_t *dst[], int dstStride[])
-{
-    int alpha_first = 0;
-    int stride102[] = { dstStride[1], dstStride[0], dstStride[2] };
-    int stride201[] = { dstStride[2], dstStride[0], dstStride[1] };
-    uint8_t *dst102[] = { dst[1] + srcSliceY * dstStride[1],
-                          dst[0] + srcSliceY * dstStride[0],
-                          dst[2] + srcSliceY * dstStride[2] };
-    uint8_t *dst201[] = { dst[2] + srcSliceY * dstStride[2],
-                          dst[0] + srcSliceY * dstStride[0],
-                          dst[1] + srcSliceY * dstStride[1] };
-
-    switch (c->srcFormat) {
-    case AV_PIX_FMT_RGB24:
-        packedtogbr24p((const uint8_t *) src[0], srcStride[0], dst201,
-                       stride201, srcSliceH, alpha_first, 3, c->srcW);
-        break;
-    case AV_PIX_FMT_BGR24:
-        packedtogbr24p((const uint8_t *) src[0], srcStride[0], dst102,
-                       stride102, srcSliceH, alpha_first, 3, c->srcW);
-        break;
-    case AV_PIX_FMT_ARGB:
-        alpha_first = 1;
-    case AV_PIX_FMT_RGBA:
-        packedtogbr24p((const uint8_t *) src[0], srcStride[0], dst201,
-                       stride201, srcSliceH, alpha_first, 4, c->srcW);
-        break;
-    case AV_PIX_FMT_ABGR:
-        alpha_first = 1;
-    case AV_PIX_FMT_BGRA:
-        packedtogbr24p((const uint8_t *) src[0], srcStride[0], dst102,
-                       stride102, srcSliceH, alpha_first, 4, c->srcW);
-        break;
-    default:
-        av_log(c, AV_LOG_ERROR,
-               "unsupported planar RGB conversion %s -> %s\n",
-               av_get_pix_fmt_name(c->srcFormat),
-               av_get_pix_fmt_name(c->dstFormat));
-    }
-
-    return srcSliceH;
-}
-
-#define isRGBA32(x) (            \
-           (x) == AV_PIX_FMT_ARGB   \
-        || (x) == AV_PIX_FMT_RGBA   \
-        || (x) == AV_PIX_FMT_BGRA   \
-        || (x) == AV_PIX_FMT_ABGR   \
-        )
-
-#define isRGBA64(x) (                \
-           (x) == AV_PIX_FMT_RGBA64LE   \
-        || (x) == AV_PIX_FMT_RGBA64BE   \
-        || (x) == AV_PIX_FMT_BGRA64LE   \
-        || (x) == AV_PIX_FMT_BGRA64BE   \
-        )
-
-#define isRGB48(x) (                \
-           (x) == AV_PIX_FMT_RGB48LE   \
-        || (x) == AV_PIX_FMT_RGB48BE   \
-        || (x) == AV_PIX_FMT_BGR48LE   \
-        || (x) == AV_PIX_FMT_BGR48BE   \
-        )
-
-/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */
-typedef void (* rgbConvFn) (const uint8_t *, uint8_t *, int);
-static rgbConvFn findRgbConvFn(SwsContext *c)
-{
-    const enum AVPixelFormat srcFormat = c->srcFormat;
-    const enum AVPixelFormat dstFormat = c->dstFormat;
-    const int srcId = c->srcFormatBpp;
-    const int dstId = c->dstFormatBpp;
-    rgbConvFn conv = NULL;
-
-#define IS_NOT_NE(bpp, desc) \
-    (((bpp + 7) >> 3) == 2 && \
-     (!(desc->flags & AV_PIX_FMT_FLAG_BE) != !HAVE_BIGENDIAN))
-
-#define CONV_IS(src, dst) (srcFormat == AV_PIX_FMT_##src && dstFormat == AV_PIX_FMT_##dst)
-
-    if (isRGBA32(srcFormat) && isRGBA32(dstFormat)) {
-        if (     CONV_IS(ABGR, RGBA)
-              || CONV_IS(ARGB, BGRA)
-              || CONV_IS(BGRA, ARGB)
-              || CONV_IS(RGBA, ABGR)) conv = shuffle_bytes_3210;
-        else if (CONV_IS(ABGR, ARGB)
-              || CONV_IS(ARGB, ABGR)) conv = shuffle_bytes_0321;
-        else if (CONV_IS(ABGR, BGRA)
-              || CONV_IS(ARGB, RGBA)) conv = shuffle_bytes_1230;
-        else if (CONV_IS(BGRA, RGBA)
-              || CONV_IS(RGBA, BGRA)) conv = shuffle_bytes_2103;
-        else if (CONV_IS(BGRA, ABGR)
-              || CONV_IS(RGBA, ARGB)) conv = shuffle_bytes_3012;
-    } else if (isRGB48(srcFormat) && isRGB48(dstFormat)) {
-        if      (CONV_IS(RGB48LE, BGR48LE)
-              || CONV_IS(BGR48LE, RGB48LE)
-              || CONV_IS(RGB48BE, BGR48BE)
-              || CONV_IS(BGR48BE, RGB48BE)) conv = rgb48tobgr48_nobswap;
-        else if (CONV_IS(RGB48LE, BGR48BE)
-              || CONV_IS(BGR48LE, RGB48BE)
-              || CONV_IS(RGB48BE, BGR48LE)
-              || CONV_IS(BGR48BE, RGB48LE)) conv = rgb48tobgr48_bswap;
-    } else if (isRGBA64(srcFormat) && isRGB48(dstFormat)) {
-        if      (CONV_IS(RGBA64LE, BGR48LE)
-              || CONV_IS(BGRA64LE, RGB48LE)
-              || CONV_IS(RGBA64BE, BGR48BE)
-              || CONV_IS(BGRA64BE, RGB48BE)) conv = rgb64tobgr48_nobswap;
-        else if (CONV_IS(RGBA64LE, BGR48BE)
-              || CONV_IS(BGRA64LE, RGB48BE)
-              || CONV_IS(RGBA64BE, BGR48LE)
-              || CONV_IS(BGRA64BE, RGB48LE)) conv = rgb64tobgr48_bswap;
-        else if (CONV_IS(RGBA64LE, RGB48LE)
-              || CONV_IS(BGRA64LE, BGR48LE)
-              || CONV_IS(RGBA64BE, RGB48BE)
-              || CONV_IS(BGRA64BE, BGR48BE)) conv = rgb64to48_nobswap;
-        else if (CONV_IS(RGBA64LE, RGB48BE)
-              || CONV_IS(BGRA64LE, BGR48BE)
-              || CONV_IS(RGBA64BE, RGB48LE)
-              || CONV_IS(BGRA64BE, BGR48LE)) conv = rgb64to48_bswap;
-    } else
-    /* BGR -> BGR */
-    if ((isBGRinInt(srcFormat) && isBGRinInt(dstFormat)) ||
-        (isRGBinInt(srcFormat) && isRGBinInt(dstFormat))) {
-        switch (srcId | (dstId << 16)) {
-        case 0x000F000C: conv = rgb12to15; break;
-        case 0x000F0010: conv = rgb16to15; break;
-        case 0x000F0018: conv = rgb24to15; break;
-        case 0x000F0020: conv = rgb32to15; break;
-        case 0x0010000F: conv = rgb15to16; break;
-        case 0x00100018: conv = rgb24to16; break;
-        case 0x00100020: conv = rgb32to16; break;
-        case 0x0018000F: conv = rgb15to24; break;
-        case 0x00180010: conv = rgb16to24; break;
-        case 0x00180020: conv = rgb32to24; break;
-        case 0x0020000F: conv = rgb15to32; break;
-        case 0x00200010: conv = rgb16to32; break;
-        case 0x00200018: conv = rgb24to32; break;
-        }
-    } else if ((isBGRinInt(srcFormat) && isRGBinInt(dstFormat)) ||
-               (isRGBinInt(srcFormat) && isBGRinInt(dstFormat))) {
-        switch (srcId | (dstId << 16)) {
-        case 0x000C000C: conv = rgb12tobgr12; break;
-        case 0x000F000F: conv = rgb15tobgr15; break;
-        case 0x000F0010: conv = rgb16tobgr15; break;
-        case 0x000F0018: conv = rgb24tobgr15; break;
-        case 0x000F0020: conv = rgb32tobgr15; break;
-        case 0x0010000F: conv = rgb15tobgr16; break;
-        case 0x00100010: conv = rgb16tobgr16; break;
-        case 0x00100018: conv = rgb24tobgr16; break;
-        case 0x00100020: conv = rgb32tobgr16; break;
-        case 0x0018000F: conv = rgb15tobgr24; break;
-        case 0x00180010: conv = rgb16tobgr24; break;
-        case 0x00180018: conv = rgb24tobgr24; break;
-        case 0x00180020: conv = rgb32tobgr24; break;
-        case 0x0020000F: conv = rgb15tobgr32; break;
-        case 0x00200010: conv = rgb16tobgr32; break;
-        case 0x00200018: conv = rgb24tobgr32; break;
-        }
-    }
-
-    if ((dstFormat == AV_PIX_FMT_RGB32_1 || dstFormat == AV_PIX_FMT_BGR32_1) && !isRGBA32(srcFormat) && ALT32_CORR<0)
-        return NULL;
-
-    return conv;
-}
-
-/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */
-static int rgbToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[],
-                           int srcSliceY, int srcSliceH, uint8_t *dst[],
-                           int dstStride[])
-
-{
-    const enum AVPixelFormat srcFormat = c->srcFormat;
-    const enum AVPixelFormat dstFormat = c->dstFormat;
-    const AVPixFmtDescriptor *desc_src = av_pix_fmt_desc_get(c->srcFormat);
-    const AVPixFmtDescriptor *desc_dst = av_pix_fmt_desc_get(c->dstFormat);
-    const int srcBpp = (c->srcFormatBpp + 7) >> 3;
-    const int dstBpp = (c->dstFormatBpp + 7) >> 3;
-    rgbConvFn conv = findRgbConvFn(c);
-
-    if (!conv) {
-        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
-               av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat));
-    } else {
-        const uint8_t *srcPtr = src[0];
-              uint8_t *dstPtr = dst[0];
-        int src_bswap = IS_NOT_NE(c->srcFormatBpp, desc_src);
-        int dst_bswap = IS_NOT_NE(c->dstFormatBpp, desc_dst);
-
-        if ((srcFormat == AV_PIX_FMT_RGB32_1 || srcFormat == AV_PIX_FMT_BGR32_1) &&
-            !isRGBA32(dstFormat))
-            srcPtr += ALT32_CORR;
-
-        if ((dstFormat == AV_PIX_FMT_RGB32_1 || dstFormat == AV_PIX_FMT_BGR32_1) &&
-            !isRGBA32(srcFormat)) {
-            int i;
-            av_assert0(ALT32_CORR == 1);
-            for (i = 0; i < srcSliceH; i++)
-                dstPtr[dstStride[0] * (srcSliceY + i)] = 255;
-            dstPtr += ALT32_CORR;
-        }
-
-        if (dstStride[0] * srcBpp == srcStride[0] * dstBpp && srcStride[0] > 0 &&
-            !(srcStride[0] % srcBpp) && !dst_bswap && !src_bswap)
-            conv(srcPtr, dstPtr + dstStride[0] * srcSliceY,
-                 srcSliceH * srcStride[0]);
-        else {
-            int i, j;
-            dstPtr += dstStride[0] * srcSliceY;
-
-            for (i = 0; i < srcSliceH; i++) {
-                if(src_bswap) {
-                    for(j=0; j<c->srcW; j++)
-                        ((uint16_t*)c->formatConvBuffer)[j] = av_bswap16(((uint16_t*)srcPtr)[j]);
-                    conv(c->formatConvBuffer, dstPtr, c->srcW * srcBpp);
-                }else
-                    conv(srcPtr, dstPtr, c->srcW * srcBpp);
-                if(dst_bswap)
-                    for(j=0; j<c->srcW; j++)
-                        ((uint16_t*)dstPtr)[j] = av_bswap16(((uint16_t*)dstPtr)[j]);
-                srcPtr += srcStride[0];
-                dstPtr += dstStride[0];
-            }
-        }
-    }
-    return srcSliceH;
-}
-
-static int bgr24ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
-                              int srcStride[], int srcSliceY, int srcSliceH,
-                              uint8_t *dst[], int dstStride[])
-{
-    ff_rgb24toyv12(
-        src[0],
-        dst[0] +  srcSliceY       * dstStride[0],
-        dst[1] + (srcSliceY >> 1) * dstStride[1],
-        dst[2] + (srcSliceY >> 1) * dstStride[2],
-        c->srcW, srcSliceH,
-        dstStride[0], dstStride[1], srcStride[0],
-        c->input_rgb2yuv_table);
-    if (dst[3])
-        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
-    return srcSliceH;
-}
-
-static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
-                             int srcStride[], int srcSliceY, int srcSliceH,
-                             uint8_t *dst[], int dstStride[])
-{
-    copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW,
-              dst[0], dstStride[0]);
-
-    planar2x(src[1], dst[1] + dstStride[1] * (srcSliceY >> 1), c->chrSrcW,
-             srcSliceH >> 2, srcStride[1], dstStride[1]);
-    planar2x(src[2], dst[2] + dstStride[2] * (srcSliceY >> 1), c->chrSrcW,
-             srcSliceH >> 2, srcStride[2], dstStride[2]);
-    if (dst[3])
-        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
-    return srcSliceH;
-}
-
-/* unscaled copy like stuff (assumes nearly identical formats) */
-static int packedCopyWrapper(SwsContext *c, const uint8_t *src[],
-                             int srcStride[], int srcSliceY, int srcSliceH,
-                             uint8_t *dst[], int dstStride[])
-{
-    if (dstStride[0] == srcStride[0] && srcStride[0] > 0)
-        memcpy(dst[0] + dstStride[0] * srcSliceY, src[0], srcSliceH * dstStride[0]);
-    else {
-        int i;
-        const uint8_t *srcPtr = src[0];
-        uint8_t *dstPtr = dst[0] + dstStride[0] * srcSliceY;
-        int length = 0;
-
-        /* universal length finder */
-        while (length + c->srcW <= FFABS(dstStride[0]) &&
-               length + c->srcW <= FFABS(srcStride[0]))
-            length += c->srcW;
-        av_assert1(length != 0);
-
-        for (i = 0; i < srcSliceH; i++) {
-            memcpy(dstPtr, srcPtr, length);
-            srcPtr += srcStride[0];
-            dstPtr += dstStride[0];
-        }
-    }
-    return srcSliceH;
-}
-
-#define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
-    uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\
-    int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\
-    for (i = 0; i < height; i++) {\
-        const uint8_t *dither= dithers[src_depth-9][i&7];\
-        for (j = 0; j < length-7; j+=8){\
-            dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\
-            dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\
-            dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\
-            dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\
-            dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\
-            dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\
-            dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\
-            dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\
-        }\
-        for (; j < length; j++)\
-            dst[j] = dbswap((bswap(src[j]) + dither[j&7])*scale>>shift);\
-        dst += dstStride;\
-        src += srcStride;\
-    }
-
-static int planarCopyWrapper(SwsContext *c, const uint8_t *src[],
-                             int srcStride[], int srcSliceY, int srcSliceH,
-                             uint8_t *dst[], int dstStride[])
-{
-    const AVPixFmtDescriptor *desc_src = av_pix_fmt_desc_get(c->srcFormat);
-    const AVPixFmtDescriptor *desc_dst = av_pix_fmt_desc_get(c->dstFormat);
-    int plane, i, j;
-    for (plane = 0; plane < 4; plane++) {
-        int length = (plane == 0 || plane == 3) ? c->srcW  : FF_CEIL_RSHIFT(c->srcW,   c->chrDstHSubSample);
-        int y =      (plane == 0 || plane == 3) ? srcSliceY: FF_CEIL_RSHIFT(srcSliceY, c->chrDstVSubSample);
-        int height = (plane == 0 || plane == 3) ? srcSliceH: FF_CEIL_RSHIFT(srcSliceH, c->chrDstVSubSample);
-        const uint8_t *srcPtr = src[plane];
-        uint8_t *dstPtr = dst[plane] + dstStride[plane] * y;
-        int shiftonly= plane==1 || plane==2 || (!c->srcRange && plane==0);
-
-        if (!dst[plane])
-            continue;
-        // ignore palette for GRAY8
-        if (plane == 1 && !dst[2]) continue;
-        if (!src[plane] || (plane == 1 && !src[2])) {
-            if (is16BPS(c->dstFormat) || isNBPS(c->dstFormat)) {
-                fillPlane16(dst[plane], dstStride[plane], length, height, y,
-                        plane == 3, desc_dst->comp[plane].depth_minus1,
-                        isBE(c->dstFormat));
-            } else {
-                fillPlane(dst[plane], dstStride[plane], length, height, y,
-                        (plane == 3) ? 255 : 128);
-            }
-        } else {
-            if(isNBPS(c->srcFormat) || isNBPS(c->dstFormat)
-               || (is16BPS(c->srcFormat) != is16BPS(c->dstFormat))
-            ) {
-                const int src_depth = desc_src->comp[plane].depth_minus1 + 1;
-                const int dst_depth = desc_dst->comp[plane].depth_minus1 + 1;
-                const uint16_t *srcPtr2 = (const uint16_t *) srcPtr;
-                uint16_t *dstPtr2 = (uint16_t*)dstPtr;
-
-                if (dst_depth == 8) {
-                    if(isBE(c->srcFormat) == HAVE_BIGENDIAN){
-                        DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, , )
-                    } else {
-                        DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, av_bswap16, )
-                    }
-                } else if (src_depth == 8) {
-                    for (i = 0; i < height; i++) {
-                        #define COPY816(w)\
-                        if(shiftonly){\
-                            for (j = 0; j < length; j++)\
-                                w(&dstPtr2[j], srcPtr[j]<<(dst_depth-8));\
-                        }else{\
-                            for (j = 0; j < length; j++)\
-                                w(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) |\
-                                               (srcPtr[j]>>(2*8-dst_depth)));\
-                        }
-                        if(isBE(c->dstFormat)){
-                            COPY816(AV_WB16)
-                        } else {
-                            COPY816(AV_WL16)
-                        }
-                        dstPtr2 += dstStride[plane]/2;
-                        srcPtr  += srcStride[plane];
-                    }
-                } else if (src_depth <= dst_depth) {
-                    for (i = 0; i < height; i++) {
-                        j = 0;
-                        if(isBE(c->srcFormat) == HAVE_BIGENDIAN &&
-                           isBE(c->dstFormat) == HAVE_BIGENDIAN &&
-                           shiftonly) {
-                             unsigned shift = dst_depth - src_depth;
-#if HAVE_FAST_64BIT
-#define FAST_COPY_UP(shift) \
-    for (; j < length - 3; j += 4) { \
-        uint64_t v = AV_RN64A(srcPtr2 + j); \
-        AV_WN64A(dstPtr2 + j, v << shift); \
-    }
-#else
-#define FAST_COPY_UP(shift) \
-    for (; j < length - 1; j += 2) { \
-        uint32_t v = AV_RN32A(srcPtr2 + j); \
-        AV_WN32A(dstPtr2 + j, v << shift); \
-    }
-#endif
-                             switch (shift)
-                             {
-                             case 6: FAST_COPY_UP(6); break;
-                             case 7: FAST_COPY_UP(7); break;
-                             }
-                        }
-#define COPY_UP(r,w) \
-    if(shiftonly){\
-        for (; j < length; j++){ \
-            unsigned int v= r(&srcPtr2[j]);\
-            w(&dstPtr2[j], v<<(dst_depth-src_depth));\
-        }\
-    }else{\
-        for (; j < length; j++){ \
-            unsigned int v= r(&srcPtr2[j]);\
-            w(&dstPtr2[j], (v<<(dst_depth-src_depth)) | \
-                        (v>>(2*src_depth-dst_depth)));\
-        }\
-    }
-                        if(isBE(c->srcFormat)){
-                            if(isBE(c->dstFormat)){
-                                COPY_UP(AV_RB16, AV_WB16)
-                            } else {
-                                COPY_UP(AV_RB16, AV_WL16)
-                            }
-                        } else {
-                            if(isBE(c->dstFormat)){
-                                COPY_UP(AV_RL16, AV_WB16)
-                            } else {
-                                COPY_UP(AV_RL16, AV_WL16)
-                            }
-                        }
-                        dstPtr2 += dstStride[plane]/2;
-                        srcPtr2 += srcStride[plane]/2;
-                    }
-                } else {
-                    if(isBE(c->srcFormat) == HAVE_BIGENDIAN){
-                        if(isBE(c->dstFormat) == HAVE_BIGENDIAN){
-                            DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , )
-                        } else {
-                            DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , av_bswap16)
-                        }
-                    }else{
-                        if(isBE(c->dstFormat) == HAVE_BIGENDIAN){
-                            DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, )
-                        } else {
-                            DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, av_bswap16)
-                        }
-                    }
-                }
-            } else if (is16BPS(c->srcFormat) && is16BPS(c->dstFormat) &&
-                      isBE(c->srcFormat) != isBE(c->dstFormat)) {
-
-                for (i = 0; i < height; i++) {
-                    for (j = 0; j < length; j++)
-                        ((uint16_t *) dstPtr)[j] = av_bswap16(((const uint16_t *) srcPtr)[j]);
-                    srcPtr += srcStride[plane];
-                    dstPtr += dstStride[plane];
-                }
-            } else if (dstStride[plane] == srcStride[plane] &&
-                       srcStride[plane] > 0 && srcStride[plane] == length) {
-                memcpy(dst[plane] + dstStride[plane] * y, src[plane],
-                       height * dstStride[plane]);
-            } else {
-                if (is16BPS(c->srcFormat) && is16BPS(c->dstFormat))
-                    length *= 2;
-                else if (!desc_src->comp[0].depth_minus1)
-                    length >>= 3; // monowhite/black
-                for (i = 0; i < height; i++) {
-                    memcpy(dstPtr, srcPtr, length);
-                    srcPtr += srcStride[plane];
-                    dstPtr += dstStride[plane];
-                }
-            }
-        }
-    }
-    return srcSliceH;
-}
-
-
-#define IS_DIFFERENT_ENDIANESS(src_fmt, dst_fmt, pix_fmt)          \
-    ((src_fmt == pix_fmt ## BE && dst_fmt == pix_fmt ## LE) ||     \
-     (src_fmt == pix_fmt ## LE && dst_fmt == pix_fmt ## BE))
-
-
-void ff_get_unscaled_swscale(SwsContext *c)
-{
-    const enum AVPixelFormat srcFormat = c->srcFormat;
-    const enum AVPixelFormat dstFormat = c->dstFormat;
-    const int flags = c->flags;
-    const int dstH = c->dstH;
-    int needsDither;
-
-    needsDither = isAnyRGB(dstFormat) &&
-            c->dstFormatBpp < 24 &&
-           (c->dstFormatBpp < c->srcFormatBpp || (!isAnyRGB(srcFormat)));
-
-    /* yv12_to_nv12 */
-    if ((srcFormat == AV_PIX_FMT_YUV420P || srcFormat == AV_PIX_FMT_YUVA420P) &&
-        (dstFormat == AV_PIX_FMT_NV12 || dstFormat == AV_PIX_FMT_NV21)) {
-        c->swscale = planarToNv12Wrapper;
-    }
-    /* nv12_to_yv12 */
-    if (dstFormat == AV_PIX_FMT_YUV420P &&
-        (srcFormat == AV_PIX_FMT_NV12 || srcFormat == AV_PIX_FMT_NV21)) {
-        c->swscale = nv12ToPlanarWrapper;
-    }
-    /* yuv2bgr */
-    if ((srcFormat == AV_PIX_FMT_YUV420P || srcFormat == AV_PIX_FMT_YUV422P ||
-         srcFormat == AV_PIX_FMT_YUVA420P) && isAnyRGB(dstFormat) &&
-        !(flags & SWS_ACCURATE_RND) && (c->dither == SWS_DITHER_BAYER || c->dither == SWS_DITHER_AUTO) && !(dstH & 1)) {
-        c->swscale = ff_yuv2rgb_get_func_ptr(c);
-    }
-
-    if (srcFormat == AV_PIX_FMT_YUV410P &&
-        (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
-        !(flags & SWS_BITEXACT)) {
-        c->swscale = yvu9ToYv12Wrapper;
-    }
-
-    /* bgr24toYV12 */
-    if (srcFormat == AV_PIX_FMT_BGR24 &&
-        (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
-        !(flags & SWS_ACCURATE_RND))
-        c->swscale = bgr24ToYv12Wrapper;
-
-    /* RGB/BGR -> RGB/BGR (no dither needed forms) */
-    if (isAnyRGB(srcFormat) && isAnyRGB(dstFormat) && findRgbConvFn(c)
-        && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT))))
-        c->swscale = rgbToRgbWrapper;
-
-    if ((srcFormat == AV_PIX_FMT_GBRP && dstFormat == AV_PIX_FMT_GBRAP) ||
-        (srcFormat == AV_PIX_FMT_GBRAP && dstFormat == AV_PIX_FMT_GBRP))
-        c->swscale = planarRgbToplanarRgbWrapper;
-
-#define isByteRGB(f) (             \
-        f == AV_PIX_FMT_RGB32   || \
-        f == AV_PIX_FMT_RGB32_1 || \
-        f == AV_PIX_FMT_RGB24   || \
-        f == AV_PIX_FMT_BGR32   || \
-        f == AV_PIX_FMT_BGR32_1 || \
-        f == AV_PIX_FMT_BGR24)
-
-    if (srcFormat == AV_PIX_FMT_GBRP && isPlanar(srcFormat) && isByteRGB(dstFormat))
-        c->swscale = planarRgbToRgbWrapper;
-
-    if ((srcFormat == AV_PIX_FMT_GBRP9LE  || srcFormat == AV_PIX_FMT_GBRP9BE  ||
-         srcFormat == AV_PIX_FMT_GBRP16LE || srcFormat == AV_PIX_FMT_GBRP16BE ||
-         srcFormat == AV_PIX_FMT_GBRP10LE || srcFormat == AV_PIX_FMT_GBRP10BE ||
-         srcFormat == AV_PIX_FMT_GBRP12LE || srcFormat == AV_PIX_FMT_GBRP12BE ||
-         srcFormat == AV_PIX_FMT_GBRP14LE || srcFormat == AV_PIX_FMT_GBRP14BE ||
-         srcFormat == AV_PIX_FMT_GBRAP16LE || srcFormat == AV_PIX_FMT_GBRAP16BE) &&
-        (dstFormat == AV_PIX_FMT_RGB48LE  || dstFormat == AV_PIX_FMT_RGB48BE  ||
-         dstFormat == AV_PIX_FMT_BGR48LE  || dstFormat == AV_PIX_FMT_BGR48BE  ||
-         dstFormat == AV_PIX_FMT_RGBA64LE || dstFormat == AV_PIX_FMT_RGBA64BE ||
-         dstFormat == AV_PIX_FMT_BGRA64LE || dstFormat == AV_PIX_FMT_BGRA64BE))
-        c->swscale = planarRgb16ToRgb16Wrapper;
-
-    if (av_pix_fmt_desc_get(srcFormat)->comp[0].depth_minus1 == 7 &&
-        isPackedRGB(srcFormat) && dstFormat == AV_PIX_FMT_GBRP)
-        c->swscale = rgbToPlanarRgbWrapper;
-
-    /* bswap 16 bits per pixel/component packed formats */
-    if (IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR444) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR48)  ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGRA64) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR555) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR565) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GRAY16) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP9)  ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP10) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP12) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP14) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP16) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRAP16) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB444) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB48)  ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGBA64) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB555) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB565) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_XYZ12)  ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P9)  ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P10) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P12) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P14) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P16) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P9)  ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P10) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P12) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P14) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P16) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P9)  ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P10) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P12) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P14) ||
-        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P16))
-        c->swscale = packed_16bpc_bswap;
-
-    if (usePal(srcFormat) && isByteRGB(dstFormat))
-        c->swscale = palToRgbWrapper;
-
-    if (srcFormat == AV_PIX_FMT_YUV422P) {
-        if (dstFormat == AV_PIX_FMT_YUYV422)
-            c->swscale = yuv422pToYuy2Wrapper;
-        else if (dstFormat == AV_PIX_FMT_UYVY422)
-            c->swscale = yuv422pToUyvyWrapper;
-    }
-
-    /* LQ converters if -sws 0 or -sws 4*/
-    if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)) {
-        /* yv12_to_yuy2 */
-        if (srcFormat == AV_PIX_FMT_YUV420P || srcFormat == AV_PIX_FMT_YUVA420P) {
-            if (dstFormat == AV_PIX_FMT_YUYV422)
-                c->swscale = planarToYuy2Wrapper;
-            else if (dstFormat == AV_PIX_FMT_UYVY422)
-                c->swscale = planarToUyvyWrapper;
-        }
-    }
-    if (srcFormat == AV_PIX_FMT_YUYV422 &&
-       (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))
-        c->swscale = yuyvToYuv420Wrapper;
-    if (srcFormat == AV_PIX_FMT_UYVY422 &&
-       (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))
-        c->swscale = uyvyToYuv420Wrapper;
-    if (srcFormat == AV_PIX_FMT_YUYV422 && dstFormat == AV_PIX_FMT_YUV422P)
-        c->swscale = yuyvToYuv422Wrapper;
-    if (srcFormat == AV_PIX_FMT_UYVY422 && dstFormat == AV_PIX_FMT_YUV422P)
-        c->swscale = uyvyToYuv422Wrapper;
-
-#define isPlanarGray(x) (isGray(x) && (x) != AV_PIX_FMT_GRAY8A)
-    /* simple copy */
-    if ( srcFormat == dstFormat ||
-        (srcFormat == AV_PIX_FMT_YUVA420P && dstFormat == AV_PIX_FMT_YUV420P) ||
-        (srcFormat == AV_PIX_FMT_YUV420P && dstFormat == AV_PIX_FMT_YUVA420P) ||
-        (isPlanarYUV(srcFormat) && isPlanarGray(dstFormat)) ||
-        (isPlanarYUV(dstFormat) && isPlanarGray(srcFormat)) ||
-        (isPlanarGray(dstFormat) && isPlanarGray(srcFormat)) ||
-        (isPlanarYUV(srcFormat) && isPlanarYUV(dstFormat) &&
-         c->chrDstHSubSample == c->chrSrcHSubSample &&
-         c->chrDstVSubSample == c->chrSrcVSubSample &&
-         dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21 &&
-         srcFormat != AV_PIX_FMT_NV12 && srcFormat != AV_PIX_FMT_NV21))
-    {
-        if (isPacked(c->srcFormat))
-            c->swscale = packedCopyWrapper;
-        else /* Planar YUV or gray */
-            c->swscale = planarCopyWrapper;
-    }
-
-    if (ARCH_BFIN)
-        ff_get_unscaled_swscale_bfin(c);
-    if (ARCH_PPC)
-        ff_get_unscaled_swscale_ppc(c);
-    if (ARCH_ARM)
-        ff_get_unscaled_swscale_arm(c);
-
-}
-
-/* Convert the palette to the same packed 32-bit format as the palette */
-void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst,
-                                   int num_pixels, const uint8_t *palette)
-{
-    int i;
-
-    for (i = 0; i < num_pixels; i++)
-        ((uint32_t *) dst)[i] = ((const uint32_t *) palette)[src[i]];
-}
-
-/* Palette format: ABCD -> dst format: ABC */
-void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst,
-                                   int num_pixels, const uint8_t *palette)
-{
-    int i;
-
-    for (i = 0; i < num_pixels; i++) {
-        //FIXME slow?
-        dst[0] = palette[src[i] * 4 + 0];
-        dst[1] = palette[src[i] * 4 + 1];
-        dst[2] = palette[src[i] * 4 + 2];
-        dst += 3;
-    }
-}
diff --git a/ffmpeg/libswscale/utils.c b/ffmpeg/libswscale/utils.c
deleted file mode 100644
index b8b95a9..0000000
--- a/ffmpeg/libswscale/utils.c
+++ /dev/null
@@ -1,2093 +0,0 @@
-/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "config.h"
-
-#define _SVID_SOURCE // needed for MAP_ANONYMOUS
-#define _DARWIN_C_SOURCE // needed for MAP_ANON
-#include <inttypes.h>
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
-#if HAVE_SYS_MMAN_H
-#include <sys/mman.h>
-#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
-#define MAP_ANONYMOUS MAP_ANON
-#endif
-#endif
-#if HAVE_VIRTUALALLOC
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-#endif
-
-#include "libavutil/attributes.h"
-#include "libavutil/avassert.h"
-#include "libavutil/avutil.h"
-#include "libavutil/bswap.h"
-#include "libavutil/cpu.h"
-#include "libavutil/intreadwrite.h"
-#include "libavutil/mathematics.h"
-#include "libavutil/opt.h"
-#include "libavutil/pixdesc.h"
-#include "libavutil/ppc/cpu.h"
-#include "libavutil/x86/asm.h"
-#include "libavutil/x86/cpu.h"
-#include "rgb2rgb.h"
-#include "swscale.h"
-#include "swscale_internal.h"
-
-static void handle_formats(SwsContext *c);
-
-unsigned swscale_version(void)
-{
-    av_assert0(LIBSWSCALE_VERSION_MICRO >= 100);
-    return LIBSWSCALE_VERSION_INT;
-}
-
-const char *swscale_configuration(void)
-{
-    return FFMPEG_CONFIGURATION;
-}
-
-const char *swscale_license(void)
-{
-#define LICENSE_PREFIX "libswscale license: "
-    return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
-}
-
-#define RET 0xC3 // near return opcode for x86
-
-typedef struct FormatEntry {
-    uint8_t is_supported_in         :1;
-    uint8_t is_supported_out        :1;
-    uint8_t is_supported_endianness :1;
-} FormatEntry;
-
-static const FormatEntry format_entries[AV_PIX_FMT_NB] = {
-    [AV_PIX_FMT_YUV420P]     = { 1, 1 },
-    [AV_PIX_FMT_YUYV422]     = { 1, 1 },
-    [AV_PIX_FMT_RGB24]       = { 1, 1 },
-    [AV_PIX_FMT_BGR24]       = { 1, 1 },
-    [AV_PIX_FMT_YUV422P]     = { 1, 1 },
-    [AV_PIX_FMT_YUV444P]     = { 1, 1 },
-    [AV_PIX_FMT_YUV410P]     = { 1, 1 },
-    [AV_PIX_FMT_YUV411P]     = { 1, 1 },
-    [AV_PIX_FMT_GRAY8]       = { 1, 1 },
-    [AV_PIX_FMT_MONOWHITE]   = { 1, 1 },
-    [AV_PIX_FMT_MONOBLACK]   = { 1, 1 },
-    [AV_PIX_FMT_PAL8]        = { 1, 0 },
-    [AV_PIX_FMT_YUVJ420P]    = { 1, 1 },
-    [AV_PIX_FMT_YUVJ411P]    = { 1, 1 },
-    [AV_PIX_FMT_YUVJ422P]    = { 1, 1 },
-    [AV_PIX_FMT_YUVJ444P]    = { 1, 1 },
-    [AV_PIX_FMT_UYVY422]     = { 1, 1 },
-    [AV_PIX_FMT_UYYVYY411]   = { 0, 0 },
-    [AV_PIX_FMT_BGR8]        = { 1, 1 },
-    [AV_PIX_FMT_BGR4]        = { 0, 1 },
-    [AV_PIX_FMT_BGR4_BYTE]   = { 1, 1 },
-    [AV_PIX_FMT_RGB8]        = { 1, 1 },
-    [AV_PIX_FMT_RGB4]        = { 0, 1 },
-    [AV_PIX_FMT_RGB4_BYTE]   = { 1, 1 },
-    [AV_PIX_FMT_NV12]        = { 1, 1 },
-    [AV_PIX_FMT_NV21]        = { 1, 1 },
-    [AV_PIX_FMT_ARGB]        = { 1, 1 },
-    [AV_PIX_FMT_RGBA]        = { 1, 1 },
-    [AV_PIX_FMT_ABGR]        = { 1, 1 },
-    [AV_PIX_FMT_BGRA]        = { 1, 1 },
-    [AV_PIX_FMT_0RGB]        = { 1, 1 },
-    [AV_PIX_FMT_RGB0]        = { 1, 1 },
-    [AV_PIX_FMT_0BGR]        = { 1, 1 },
-    [AV_PIX_FMT_BGR0]        = { 1, 1 },
-    [AV_PIX_FMT_GRAY16BE]    = { 1, 1 },
-    [AV_PIX_FMT_GRAY16LE]    = { 1, 1 },
-    [AV_PIX_FMT_YUV440P]     = { 1, 1 },
-    [AV_PIX_FMT_YUVJ440P]    = { 1, 1 },
-    [AV_PIX_FMT_YUVA420P]    = { 1, 1 },
-    [AV_PIX_FMT_YUVA422P]    = { 1, 1 },
-    [AV_PIX_FMT_YUVA444P]    = { 1, 1 },
-    [AV_PIX_FMT_YUVA420P9BE] = { 1, 1 },
-    [AV_PIX_FMT_YUVA420P9LE] = { 1, 1 },
-    [AV_PIX_FMT_YUVA422P9BE] = { 1, 1 },
-    [AV_PIX_FMT_YUVA422P9LE] = { 1, 1 },
-    [AV_PIX_FMT_YUVA444P9BE] = { 1, 1 },
-    [AV_PIX_FMT_YUVA444P9LE] = { 1, 1 },
-    [AV_PIX_FMT_YUVA420P10BE]= { 1, 1 },
-    [AV_PIX_FMT_YUVA420P10LE]= { 1, 1 },
-    [AV_PIX_FMT_YUVA422P10BE]= { 1, 1 },
-    [AV_PIX_FMT_YUVA422P10LE]= { 1, 1 },
-    [AV_PIX_FMT_YUVA444P10BE]= { 1, 1 },
-    [AV_PIX_FMT_YUVA444P10LE]= { 1, 1 },
-    [AV_PIX_FMT_YUVA420P16BE]= { 1, 1 },
-    [AV_PIX_FMT_YUVA420P16LE]= { 1, 1 },
-    [AV_PIX_FMT_YUVA422P16BE]= { 1, 1 },
-    [AV_PIX_FMT_YUVA422P16LE]= { 1, 1 },
-    [AV_PIX_FMT_YUVA444P16BE]= { 1, 1 },
-    [AV_PIX_FMT_YUVA444P16LE]= { 1, 1 },
-    [AV_PIX_FMT_RGB48BE]     = { 1, 1 },
-    [AV_PIX_FMT_RGB48LE]     = { 1, 1 },
-    [AV_PIX_FMT_RGBA64BE]    = { 1, 1 },
-    [AV_PIX_FMT_RGBA64LE]    = { 1, 1 },
-    [AV_PIX_FMT_RGB565BE]    = { 1, 1 },
-    [AV_PIX_FMT_RGB565LE]    = { 1, 1 },
-    [AV_PIX_FMT_RGB555BE]    = { 1, 1 },
-    [AV_PIX_FMT_RGB555LE]    = { 1, 1 },
-    [AV_PIX_FMT_BGR565BE]    = { 1, 1 },
-    [AV_PIX_FMT_BGR565LE]    = { 1, 1 },
-    [AV_PIX_FMT_BGR555BE]    = { 1, 1 },
-    [AV_PIX_FMT_BGR555LE]    = { 1, 1 },
-    [AV_PIX_FMT_YUV420P16LE] = { 1, 1 },
-    [AV_PIX_FMT_YUV420P16BE] = { 1, 1 },
-    [AV_PIX_FMT_YUV422P16LE] = { 1, 1 },
-    [AV_PIX_FMT_YUV422P16BE] = { 1, 1 },
-    [AV_PIX_FMT_YUV444P16LE] = { 1, 1 },
-    [AV_PIX_FMT_YUV444P16BE] = { 1, 1 },
-    [AV_PIX_FMT_RGB444LE]    = { 1, 1 },
-    [AV_PIX_FMT_RGB444BE]    = { 1, 1 },
-    [AV_PIX_FMT_BGR444LE]    = { 1, 1 },
-    [AV_PIX_FMT_BGR444BE]    = { 1, 1 },
-    [AV_PIX_FMT_Y400A]       = { 1, 0 },
-    [AV_PIX_FMT_BGR48BE]     = { 1, 1 },
-    [AV_PIX_FMT_BGR48LE]     = { 1, 1 },
-    [AV_PIX_FMT_BGRA64BE]    = { 0, 0 },
-    [AV_PIX_FMT_BGRA64LE]    = { 0, 0 },
-    [AV_PIX_FMT_YUV420P9BE]  = { 1, 1 },
-    [AV_PIX_FMT_YUV420P9LE]  = { 1, 1 },
-    [AV_PIX_FMT_YUV420P10BE] = { 1, 1 },
-    [AV_PIX_FMT_YUV420P10LE] = { 1, 1 },
-    [AV_PIX_FMT_YUV420P12BE] = { 1, 1 },
-    [AV_PIX_FMT_YUV420P12LE] = { 1, 1 },
-    [AV_PIX_FMT_YUV420P14BE] = { 1, 1 },
-    [AV_PIX_FMT_YUV420P14LE] = { 1, 1 },
-    [AV_PIX_FMT_YUV422P9BE]  = { 1, 1 },
-    [AV_PIX_FMT_YUV422P9LE]  = { 1, 1 },
-    [AV_PIX_FMT_YUV422P10BE] = { 1, 1 },
-    [AV_PIX_FMT_YUV422P10LE] = { 1, 1 },
-    [AV_PIX_FMT_YUV422P12BE] = { 1, 1 },
-    [AV_PIX_FMT_YUV422P12LE] = { 1, 1 },
-    [AV_PIX_FMT_YUV422P14BE] = { 1, 1 },
-    [AV_PIX_FMT_YUV422P14LE] = { 1, 1 },
-    [AV_PIX_FMT_YUV444P9BE]  = { 1, 1 },
-    [AV_PIX_FMT_YUV444P9LE]  = { 1, 1 },
-    [AV_PIX_FMT_YUV444P10BE] = { 1, 1 },
-    [AV_PIX_FMT_YUV444P10LE] = { 1, 1 },
-    [AV_PIX_FMT_YUV444P12BE] = { 1, 1 },
-    [AV_PIX_FMT_YUV444P12LE] = { 1, 1 },
-    [AV_PIX_FMT_YUV444P14BE] = { 1, 1 },
-    [AV_PIX_FMT_YUV444P14LE] = { 1, 1 },
-    [AV_PIX_FMT_GBRP]        = { 1, 1 },
-    [AV_PIX_FMT_GBRP9LE]     = { 1, 1 },
-    [AV_PIX_FMT_GBRP9BE]     = { 1, 1 },
-    [AV_PIX_FMT_GBRP10LE]    = { 1, 1 },
-    [AV_PIX_FMT_GBRP10BE]    = { 1, 1 },
-    [AV_PIX_FMT_GBRP12LE]    = { 1, 1 },
-    [AV_PIX_FMT_GBRP12BE]    = { 1, 1 },
-    [AV_PIX_FMT_GBRP14LE]    = { 1, 1 },
-    [AV_PIX_FMT_GBRP14BE]    = { 1, 1 },
-    [AV_PIX_FMT_GBRP16LE]    = { 1, 0 },
-    [AV_PIX_FMT_GBRP16BE]    = { 1, 0 },
-    [AV_PIX_FMT_XYZ12BE]     = { 1, 1, 1 },
-    [AV_PIX_FMT_XYZ12LE]     = { 1, 1, 1 },
-    [AV_PIX_FMT_GBRAP]       = { 1, 1 },
-    [AV_PIX_FMT_GBRAP16LE]   = { 1, 0 },
-    [AV_PIX_FMT_GBRAP16BE]   = { 1, 0 },
-};
-
-int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
-{
-    return (unsigned)pix_fmt < AV_PIX_FMT_NB ?
-           format_entries[pix_fmt].is_supported_in : 0;
-}
-
-int sws_isSupportedOutput(enum AVPixelFormat pix_fmt)
-{
-    return (unsigned)pix_fmt < AV_PIX_FMT_NB ?
-           format_entries[pix_fmt].is_supported_out : 0;
-}
-
-int sws_isSupportedEndiannessConversion(enum AVPixelFormat pix_fmt)
-{
-    return (unsigned)pix_fmt < AV_PIX_FMT_NB ?
-           format_entries[pix_fmt].is_supported_endianness : 0;
-}
-
-#if FF_API_SWS_FORMAT_NAME
-const char *sws_format_name(enum AVPixelFormat format)
-{
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
-    if (desc)
-        return desc->name;
-    else
-        return "Unknown format";
-}
-#endif
-
-static double getSplineCoeff(double a, double b, double c, double d,
-                             double dist)
-{
-    if (dist <= 1.0)
-        return ((d * dist + c) * dist + b) * dist + a;
-    else
-        return getSplineCoeff(0.0,
-                               b + 2.0 * c + 3.0 * d,
-                               c + 3.0 * d,
-                              -b - 3.0 * c - 6.0 * d,
-                              dist - 1.0);
-}
-
-static av_cold int get_local_pos(SwsContext *s, int chr_subsample, int pos, int dir)
-{
-    if (pos < 0) {
-        pos = (128 << chr_subsample) - 128;
-    }
-    pos += 128; // relative to ideal left edge
-    return pos >> chr_subsample;
-}
-
-typedef struct {
-    int flag;                   ///< flag associated to the algorithm
-    const char *description;    ///< human-readable description
-    int size_factor;            ///< size factor used when initing the filters
-} ScaleAlgorithm;
-
-static const ScaleAlgorithm scale_algorithms[] = {
-    { SWS_AREA,          "area averaging",                  1 /* downscale only, for upscale it is bilinear */ },
-    { SWS_BICUBIC,       "bicubic",                         4 },
-    { SWS_BICUBLIN,      "luma bicubic / chroma bilinear", -1 },
-    { SWS_BILINEAR,      "bilinear",                        2 },
-    { SWS_FAST_BILINEAR, "fast bilinear",                  -1 },
-    { SWS_GAUSS,         "Gaussian",                        8 /* infinite ;) */ },
-    { SWS_LANCZOS,       "Lanczos",                        -1 /* custom */ },
-    { SWS_POINT,         "nearest neighbor / point",       -1 },
-    { SWS_SINC,          "sinc",                           20 /* infinite ;) */ },
-    { SWS_SPLINE,        "bicubic spline",                 20 /* infinite :)*/ },
-    { SWS_X,             "experimental",                    8 },
-};
-
-static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
-                              int *outFilterSize, int xInc, int srcW,
-                              int dstW, int filterAlign, int one,
-                              int flags, int cpu_flags,
-                              SwsVector *srcFilter, SwsVector *dstFilter,
-                              double param[2], int srcPos, int dstPos)
-{
-    int i;
-    int filterSize;
-    int filter2Size;
-    int minFilterSize;
-    int64_t *filter    = NULL;
-    int64_t *filter2   = NULL;
-    const int64_t fone = 1LL << (54 - FFMIN(av_log2(srcW/dstW), 8));
-    int ret            = -1;
-
-    emms_c(); // FIXME should not be required but IS (even for non-MMX versions)
-
-    // NOTE: the +3 is for the MMX(+1) / SSE(+3) scaler which reads over the end
-    FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW + 3) * sizeof(**filterPos), fail);
-
-    if (FFABS(xInc - 0x10000) < 10 && srcPos == dstPos) { // unscaled
-        int i;
-        filterSize = 1;
-        FF_ALLOCZ_OR_GOTO(NULL, filter,
-                          dstW * sizeof(*filter) * filterSize, fail);
-
-        for (i = 0; i < dstW; i++) {
-            filter[i * filterSize] = fone;
-            (*filterPos)[i]        = i;
-        }
-    } else if (flags & SWS_POINT) { // lame looking point sampling mode
-        int i;
-        int64_t xDstInSrc;
-        filterSize = 1;
-        FF_ALLOC_OR_GOTO(NULL, filter,
-                         dstW * sizeof(*filter) * filterSize, fail);
-
-        xDstInSrc = ((dstPos*(int64_t)xInc)>>8) - ((srcPos*0x8000LL)>>7);
-        for (i = 0; i < dstW; i++) {
-            int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
-
-            (*filterPos)[i] = xx;
-            filter[i]       = fone;
-            xDstInSrc      += xInc;
-        }
-    } else if ((xInc <= (1 << 16) && (flags & SWS_AREA)) ||
-               (flags & SWS_FAST_BILINEAR)) { // bilinear upscale
-        int i;
-        int64_t xDstInSrc;
-        filterSize = 2;
-        FF_ALLOC_OR_GOTO(NULL, filter,
-                         dstW * sizeof(*filter) * filterSize, fail);
-
-        xDstInSrc = ((dstPos*(int64_t)xInc)>>8) - ((srcPos*0x8000LL)>>7);
-        for (i = 0; i < dstW; i++) {
-            int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
-            int j;
-
-            (*filterPos)[i] = xx;
-            // bilinear upscale / linear interpolate / area averaging
-            for (j = 0; j < filterSize; j++) {
-                int64_t coeff= fone - FFABS(((int64_t)xx<<16) - xDstInSrc)*(fone>>16);
-                if (coeff < 0)
-                    coeff = 0;
-                filter[i * filterSize + j] = coeff;
-                xx++;
-            }
-            xDstInSrc += xInc;
-        }
-    } else {
-        int64_t xDstInSrc;
-        int sizeFactor = -1;
-
-        for (i = 0; i < FF_ARRAY_ELEMS(scale_algorithms); i++) {
-            if (flags & scale_algorithms[i].flag) {
-                sizeFactor = scale_algorithms[i].size_factor;
-                break;
-            }
-        }
-        if (flags & SWS_LANCZOS)
-            sizeFactor = param[0] != SWS_PARAM_DEFAULT ? ceil(2 * param[0]) : 6;
-        av_assert0(sizeFactor > 0);
-
-        if (xInc <= 1 << 16)
-            filterSize = 1 + sizeFactor;    // upscale
-        else
-            filterSize = 1 + (sizeFactor * srcW + dstW - 1) / dstW;
-
-        filterSize = FFMIN(filterSize, srcW - 2);
-        filterSize = FFMAX(filterSize, 1);
-
-        FF_ALLOC_OR_GOTO(NULL, filter,
-                         dstW * sizeof(*filter) * filterSize, fail);
-
-        xDstInSrc = ((dstPos*(int64_t)xInc)>>7) - ((srcPos*0x10000LL)>>7);
-        for (i = 0; i < dstW; i++) {
-            int xx = (xDstInSrc - ((filterSize - 2) << 16)) / (1 << 17);
-            int j;
-            (*filterPos)[i] = xx;
-            for (j = 0; j < filterSize; j++) {
-                int64_t d = (FFABS(((int64_t)xx << 17) - xDstInSrc)) << 13;
-                double floatd;
-                int64_t coeff;
-
-                if (xInc > 1 << 16)
-                    d = d * dstW / srcW;
-                floatd = d * (1.0 / (1 << 30));
-
-                if (flags & SWS_BICUBIC) {
-                    int64_t B = (param[0] != SWS_PARAM_DEFAULT ? param[0] :   0) * (1 << 24);
-                    int64_t C = (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1 << 24);
-
-                    if (d >= 1LL << 31) {
-                        coeff = 0.0;
-                    } else {
-                        int64_t dd  = (d  * d) >> 30;
-                        int64_t ddd = (dd * d) >> 30;
-
-                        if (d < 1LL << 30)
-                            coeff =  (12 * (1 << 24) -  9 * B - 6 * C) * ddd +
-                                    (-18 * (1 << 24) + 12 * B + 6 * C) *  dd +
-                                      (6 * (1 << 24) -  2 * B)         * (1 << 30);
-                        else
-                            coeff =      (-B -  6 * C) * ddd +
-                                      (6 * B + 30 * C) * dd  +
-                                    (-12 * B - 48 * C) * d   +
-                                      (8 * B + 24 * C) * (1 << 30);
-                    }
-                    coeff /= (1LL<<54)/fone;
-                }
-#if 0
-                else if (flags & SWS_X) {
-                    double p  = param ? param * 0.01 : 0.3;
-                    coeff     = d ? sin(d * M_PI) / (d * M_PI) : 1.0;
-                    coeff    *= pow(2.0, -p * d * d);
-                }
-#endif
-                else if (flags & SWS_X) {
-                    double A = param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
-                    double c;
-
-                    if (floatd < 1.0)
-                        c = cos(floatd * M_PI);
-                    else
-                        c = -1.0;
-                    if (c < 0.0)
-                        c = -pow(-c, A);
-                    else
-                        c = pow(c, A);
-                    coeff = (c * 0.5 + 0.5) * fone;
-                } else if (flags & SWS_AREA) {
-                    int64_t d2 = d - (1 << 29);
-                    if (d2 * xInc < -(1LL << (29 + 16)))
-                        coeff = 1.0 * (1LL << (30 + 16));
-                    else if (d2 * xInc < (1LL << (29 + 16)))
-                        coeff = -d2 * xInc + (1LL << (29 + 16));
-                    else
-                        coeff = 0.0;
-                    coeff *= fone >> (30 + 16);
-                } else if (flags & SWS_GAUSS) {
-                    double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
-                    coeff = (pow(2.0, -p * floatd * floatd)) * fone;
-                } else if (flags & SWS_SINC) {
-                    coeff = (d ? sin(floatd * M_PI) / (floatd * M_PI) : 1.0) * fone;
-                } else if (flags & SWS_LANCZOS) {
-                    double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
-                    coeff = (d ? sin(floatd * M_PI) * sin(floatd * M_PI / p) /
-                             (floatd * floatd * M_PI * M_PI / p) : 1.0) * fone;
-                    if (floatd > p)
-                        coeff = 0;
-                } else if (flags & SWS_BILINEAR) {
-                    coeff = (1 << 30) - d;
-                    if (coeff < 0)
-                        coeff = 0;
-                    coeff *= fone >> 30;
-                } else if (flags & SWS_SPLINE) {
-                    double p = -2.196152422706632;
-                    coeff = getSplineCoeff(1.0, 0.0, p, -p - 1.0, floatd) * fone;
-                } else {
-                    av_assert0(0);
-                }
-
-                filter[i * filterSize + j] = coeff;
-                xx++;
-            }
-            xDstInSrc += 2 * xInc;
-        }
-    }
-
-    /* apply src & dst Filter to filter -> filter2
-     * av_free(filter);
-     */
-    av_assert0(filterSize > 0);
-    filter2Size = filterSize;
-    if (srcFilter)
-        filter2Size += srcFilter->length - 1;
-    if (dstFilter)
-        filter2Size += dstFilter->length - 1;
-    av_assert0(filter2Size > 0);
-    FF_ALLOCZ_OR_GOTO(NULL, filter2, filter2Size * dstW * sizeof(*filter2), fail);
-
-    for (i = 0; i < dstW; i++) {
-        int j, k;
-
-        if (srcFilter) {
-            for (k = 0; k < srcFilter->length; k++) {
-                for (j = 0; j < filterSize; j++)
-                    filter2[i * filter2Size + k + j] +=
-                        srcFilter->coeff[k] * filter[i * filterSize + j];
-            }
-        } else {
-            for (j = 0; j < filterSize; j++)
-                filter2[i * filter2Size + j] = filter[i * filterSize + j];
-        }
-        // FIXME dstFilter
-
-        (*filterPos)[i] += (filterSize - 1) / 2 - (filter2Size - 1) / 2;
-    }
-    av_freep(&filter);
-
-    /* try to reduce the filter-size (step1 find size and shift left) */
-    // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
-    minFilterSize = 0;
-    for (i = dstW - 1; i >= 0; i--) {
-        int min = filter2Size;
-        int j;
-        int64_t cutOff = 0.0;
-
-        /* get rid of near zero elements on the left by shifting left */
-        for (j = 0; j < filter2Size; j++) {
-            int k;
-            cutOff += FFABS(filter2[i * filter2Size]);
-
-            if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone)
-                break;
-
-            /* preserve monotonicity because the core can't handle the
-             * filter otherwise */
-            if (i < dstW - 1 && (*filterPos)[i] >= (*filterPos)[i + 1])
-                break;
-
-            // move filter coefficients left
-            for (k = 1; k < filter2Size; k++)
-                filter2[i * filter2Size + k - 1] = filter2[i * filter2Size + k];
-            filter2[i * filter2Size + k - 1] = 0;
-            (*filterPos)[i]++;
-        }
-
-        cutOff = 0;
-        /* count near zeros on the right */
-        for (j = filter2Size - 1; j > 0; j--) {
-            cutOff += FFABS(filter2[i * filter2Size + j]);
-
-            if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone)
-                break;
-            min--;
-        }
-
-        if (min > minFilterSize)
-            minFilterSize = min;
-    }
-
-    if (PPC_ALTIVEC(cpu_flags)) {
-        // we can handle the special case 4, so we don't want to go the full 8
-        if (minFilterSize < 5)
-            filterAlign = 4;
-
-        /* We really don't want to waste our time doing useless computation, so
-         * fall back on the scalar C code for very small filters.
-         * Vectorizing is worth it only if you have a decent-sized vector. */
-        if (minFilterSize < 3)
-            filterAlign = 1;
-    }
-
-    if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
-        // special case for unscaled vertical filtering
-        if (minFilterSize == 1 && filterAlign == 2)
-            filterAlign = 1;
-    }
-
-    av_assert0(minFilterSize > 0);
-    filterSize = (minFilterSize + (filterAlign - 1)) & (~(filterAlign - 1));
-    av_assert0(filterSize > 0);
-    filter = av_malloc(filterSize * dstW * sizeof(*filter));
-    if (filterSize >= MAX_FILTER_SIZE * 16 /
-                      ((flags & SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter) {
-        av_log(NULL, AV_LOG_ERROR, "sws: filterSize %d is too large, try less extreme scaling or increase MAX_FILTER_SIZE and recompile\n", filterSize);
-        goto fail;
-    }
-    *outFilterSize = filterSize;
-
-    if (flags & SWS_PRINT_INFO)
-        av_log(NULL, AV_LOG_VERBOSE,
-               "SwScaler: reducing / aligning filtersize %d -> %d\n",
-               filter2Size, filterSize);
-    /* try to reduce the filter-size (step2 reduce it) */
-    for (i = 0; i < dstW; i++) {
-        int j;
-
-        for (j = 0; j < filterSize; j++) {
-            if (j >= filter2Size)
-                filter[i * filterSize + j] = 0;
-            else
-                filter[i * filterSize + j] = filter2[i * filter2Size + j];
-            if ((flags & SWS_BITEXACT) && j >= minFilterSize)
-                filter[i * filterSize + j] = 0;
-        }
-    }
-
-    // FIXME try to align filterPos if possible
-
-    // fix borders
-    for (i = 0; i < dstW; i++) {
-        int j;
-        if ((*filterPos)[i] < 0) {
-            // move filter coefficients left to compensate for filterPos
-            for (j = 1; j < filterSize; j++) {
-                int left = FFMAX(j + (*filterPos)[i], 0);
-                filter[i * filterSize + left] += filter[i * filterSize + j];
-                filter[i * filterSize + j]     = 0;
-            }
-            (*filterPos)[i]= 0;
-        }
-
-        if ((*filterPos)[i] + filterSize > srcW) {
-            int shift = (*filterPos)[i] + filterSize - srcW;
-            // move filter coefficients right to compensate for filterPos
-            for (j = filterSize - 2; j >= 0; j--) {
-                int right = FFMIN(j + shift, filterSize - 1);
-                filter[i * filterSize + right] += filter[i * filterSize + j];
-                filter[i * filterSize + j]      = 0;
-            }
-            (*filterPos)[i]= srcW - filterSize;
-        }
-    }
-
-    // Note the +1 is for the MMX scaler which reads over the end
-    /* align at 16 for AltiVec (needed by hScale_altivec_real) */
-    FF_ALLOCZ_OR_GOTO(NULL, *outFilter,
-                      *outFilterSize * (dstW + 3) * sizeof(int16_t), fail);
-
-    /* normalize & store in outFilter */
-    for (i = 0; i < dstW; i++) {
-        int j;
-        int64_t error = 0;
-        int64_t sum   = 0;
-
-        for (j = 0; j < filterSize; j++) {
-            sum += filter[i * filterSize + j];
-        }
-        sum = (sum + one / 2) / one;
-        if (!sum) {
-            av_log(NULL, AV_LOG_WARNING, "SwScaler: zero vector in scaling\n");
-            sum = 1;
-        }
-        for (j = 0; j < *outFilterSize; j++) {
-            int64_t v = filter[i * filterSize + j] + error;
-            int intV  = ROUNDED_DIV(v, sum);
-            (*outFilter)[i * (*outFilterSize) + j] = intV;
-            error                                  = v - intV * sum;
-        }
-    }
-
-    (*filterPos)[dstW + 0] =
-    (*filterPos)[dstW + 1] =
-    (*filterPos)[dstW + 2] = (*filterPos)[dstW - 1]; /* the MMX/SSE scaler will
-                                                      * read over the end */
-    for (i = 0; i < *outFilterSize; i++) {
-        int k = (dstW - 1) * (*outFilterSize) + i;
-        (*outFilter)[k + 1 * (*outFilterSize)] =
-        (*outFilter)[k + 2 * (*outFilterSize)] =
-        (*outFilter)[k + 3 * (*outFilterSize)] = (*outFilter)[k];
-    }
-
-    ret = 0;
-
-fail:
-    if(ret < 0)
-        av_log(NULL, AV_LOG_ERROR, "sws: initFilter failed\n");
-    av_free(filter);
-    av_free(filter2);
-    return ret;
-}
-
-#if HAVE_MMXEXT_INLINE
-static av_cold int init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
-                                       int16_t *filter, int32_t *filterPos,
-                                       int numSplits)
-{
-    uint8_t *fragmentA;
-    x86_reg imm8OfPShufW1A;
-    x86_reg imm8OfPShufW2A;
-    x86_reg fragmentLengthA;
-    uint8_t *fragmentB;
-    x86_reg imm8OfPShufW1B;
-    x86_reg imm8OfPShufW2B;
-    x86_reg fragmentLengthB;
-    int fragmentPos;
-
-    int xpos, i;
-
-    // create an optimized horizontal scaling routine
-    /* This scaler is made of runtime-generated MMXEXT code using specially tuned
-     * pshufw instructions. For every four output pixels, if four input pixels
-     * are enough for the fast bilinear scaling, then a chunk of fragmentB is
-     * used. If five input pixels are needed, then a chunk of fragmentA is used.
-     */
-
-    // code fragment
-
-    __asm__ volatile (
-        "jmp                         9f                 \n\t"
-        // Begin
-        "0:                                             \n\t"
-        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
-        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
-        "movd   1(%%"REG_c", %%"REG_S"), %%mm1          \n\t"
-        "punpcklbw                %%mm7, %%mm1          \n\t"
-        "punpcklbw                %%mm7, %%mm0          \n\t"
-        "pshufw                   $0xFF, %%mm1, %%mm1   \n\t"
-        "1:                                             \n\t"
-        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
-        "2:                                             \n\t"
-        "psubw                    %%mm1, %%mm0          \n\t"
-        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
-        "pmullw                   %%mm3, %%mm0          \n\t"
-        "psllw                       $7, %%mm1          \n\t"
-        "paddw                    %%mm1, %%mm0          \n\t"
-
-        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
-
-        "add                         $8, %%"REG_a"      \n\t"
-        // End
-        "9:                                             \n\t"
-        // "int $3                                         \n\t"
-        "lea       " LOCAL_MANGLE(0b) ", %0             \n\t"
-        "lea       " LOCAL_MANGLE(1b) ", %1             \n\t"
-        "lea       " LOCAL_MANGLE(2b) ", %2             \n\t"
-        "dec                         %1                 \n\t"
-        "dec                         %2                 \n\t"
-        "sub                         %0, %1             \n\t"
-        "sub                         %0, %2             \n\t"
-        "lea       " LOCAL_MANGLE(9b) ", %3             \n\t"
-        "sub                         %0, %3             \n\t"
-
-
-        : "=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
-          "=r" (fragmentLengthA)
-        );
-
-    __asm__ volatile (
-        "jmp                         9f                 \n\t"
-        // Begin
-        "0:                                             \n\t"
-        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
-        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
-        "punpcklbw                %%mm7, %%mm0          \n\t"
-        "pshufw                   $0xFF, %%mm0, %%mm1   \n\t"
-        "1:                                             \n\t"
-        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
-        "2:                                             \n\t"
-        "psubw                    %%mm1, %%mm0          \n\t"
-        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
-        "pmullw                   %%mm3, %%mm0          \n\t"
-        "psllw                       $7, %%mm1          \n\t"
-        "paddw                    %%mm1, %%mm0          \n\t"
-
-        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
-
-        "add                         $8, %%"REG_a"      \n\t"
-        // End
-        "9:                                             \n\t"
-        // "int                       $3                   \n\t"
-        "lea       " LOCAL_MANGLE(0b) ", %0             \n\t"
-        "lea       " LOCAL_MANGLE(1b) ", %1             \n\t"
-        "lea       " LOCAL_MANGLE(2b) ", %2             \n\t"
-        "dec                         %1                 \n\t"
-        "dec                         %2                 \n\t"
-        "sub                         %0, %1             \n\t"
-        "sub                         %0, %2             \n\t"
-        "lea       " LOCAL_MANGLE(9b) ", %3             \n\t"
-        "sub                         %0, %3             \n\t"
-
-
-        : "=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
-          "=r" (fragmentLengthB)
-        );
-
-    xpos        = 0; // lumXInc/2 - 0x8000; // difference between pixel centers
-    fragmentPos = 0;
-
-    for (i = 0; i < dstW / numSplits; i++) {
-        int xx = xpos >> 16;
-
-        if ((i & 3) == 0) {
-            int a                  = 0;
-            int b                  = ((xpos + xInc) >> 16) - xx;
-            int c                  = ((xpos + xInc * 2) >> 16) - xx;
-            int d                  = ((xpos + xInc * 3) >> 16) - xx;
-            int inc                = (d + 1 < 4);
-            uint8_t *fragment      = inc ? fragmentB : fragmentA;
-            x86_reg imm8OfPShufW1  = inc ? imm8OfPShufW1B : imm8OfPShufW1A;
-            x86_reg imm8OfPShufW2  = inc ? imm8OfPShufW2B : imm8OfPShufW2A;
-            x86_reg fragmentLength = inc ? fragmentLengthB : fragmentLengthA;
-            int maxShift           = 3 - (d + inc);
-            int shift              = 0;
-
-            if (filterCode) {
-                filter[i]        = ((xpos              & 0xFFFF) ^ 0xFFFF) >> 9;
-                filter[i + 1]    = (((xpos + xInc)     & 0xFFFF) ^ 0xFFFF) >> 9;
-                filter[i + 2]    = (((xpos + xInc * 2) & 0xFFFF) ^ 0xFFFF) >> 9;
-                filter[i + 3]    = (((xpos + xInc * 3) & 0xFFFF) ^ 0xFFFF) >> 9;
-                filterPos[i / 2] = xx;
-
-                memcpy(filterCode + fragmentPos, fragment, fragmentLength);
-
-                filterCode[fragmentPos + imm8OfPShufW1] =  (a + inc)       |
-                                                          ((b + inc) << 2) |
-                                                          ((c + inc) << 4) |
-                                                          ((d + inc) << 6);
-                filterCode[fragmentPos + imm8OfPShufW2] =  a | (b << 2) |
-                                                               (c << 4) |
-                                                               (d << 6);
-
-                if (i + 4 - inc >= dstW)
-                    shift = maxShift;               // avoid overread
-                else if ((filterPos[i / 2] & 3) <= maxShift)
-                    shift = filterPos[i / 2] & 3;   // align
-
-                if (shift && i >= shift) {
-                    filterCode[fragmentPos + imm8OfPShufW1] += 0x55 * shift;
-                    filterCode[fragmentPos + imm8OfPShufW2] += 0x55 * shift;
-                    filterPos[i / 2]                        -= shift;
-                }
-            }
-
-            fragmentPos += fragmentLength;
-
-            if (filterCode)
-                filterCode[fragmentPos] = RET;
-        }
-        xpos += xInc;
-    }
-    if (filterCode)
-        filterPos[((i / 2) + 1) & (~1)] = xpos >> 16;  // needed to jump to the next part
-
-    return fragmentPos + 1;
-}
-#endif /* HAVE_MMXEXT_INLINE */
-
-static void fill_rgb2yuv_table(SwsContext *c, const int table[4], int dstRange)
-{
-    int64_t W, V, Z, Cy, Cu, Cv;
-    int64_t vr =  table[0];
-    int64_t ub =  table[1];
-    int64_t ug = -table[2];
-    int64_t vg = -table[3];
-    int64_t ONE = 65536;
-    int64_t cy = ONE;
-    uint8_t *p = (uint8_t*)c->input_rgb2yuv_table;
-    int i;
-    static const int8_t map[] = {
-    BY_IDX, GY_IDX, -1    , BY_IDX, BY_IDX, GY_IDX, -1    , BY_IDX,
-    RY_IDX, -1    , GY_IDX, RY_IDX, RY_IDX, -1    , GY_IDX, RY_IDX,
-    RY_IDX, GY_IDX, -1    , RY_IDX, RY_IDX, GY_IDX, -1    , RY_IDX,
-    BY_IDX, -1    , GY_IDX, BY_IDX, BY_IDX, -1    , GY_IDX, BY_IDX,
-    BU_IDX, GU_IDX, -1    , BU_IDX, BU_IDX, GU_IDX, -1    , BU_IDX,
-    RU_IDX, -1    , GU_IDX, RU_IDX, RU_IDX, -1    , GU_IDX, RU_IDX,
-    RU_IDX, GU_IDX, -1    , RU_IDX, RU_IDX, GU_IDX, -1    , RU_IDX,
-    BU_IDX, -1    , GU_IDX, BU_IDX, BU_IDX, -1    , GU_IDX, BU_IDX,
-    BV_IDX, GV_IDX, -1    , BV_IDX, BV_IDX, GV_IDX, -1    , BV_IDX,
-    RV_IDX, -1    , GV_IDX, RV_IDX, RV_IDX, -1    , GV_IDX, RV_IDX,
-    RV_IDX, GV_IDX, -1    , RV_IDX, RV_IDX, GV_IDX, -1    , RV_IDX,
-    BV_IDX, -1    , GV_IDX, BV_IDX, BV_IDX, -1    , GV_IDX, BV_IDX,
-    RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX,
-    BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX,
-    GY_IDX, -1    , GY_IDX, -1    , GY_IDX, -1    , GY_IDX, -1    ,
-    -1    , GY_IDX, -1    , GY_IDX, -1    , GY_IDX, -1    , GY_IDX,
-    RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX,
-    BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX,
-    GU_IDX, -1    , GU_IDX, -1    , GU_IDX, -1    , GU_IDX, -1    ,
-    -1    , GU_IDX, -1    , GU_IDX, -1    , GU_IDX, -1    , GU_IDX,
-    RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX,
-    BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX,
-    GV_IDX, -1    , GV_IDX, -1    , GV_IDX, -1    , GV_IDX, -1    ,
-    -1    , GV_IDX, -1    , GV_IDX, -1    , GV_IDX, -1    , GV_IDX, //23
-    -1    , -1    , -1    , -1    , -1    , -1    , -1    , -1    , //24
-    -1    , -1    , -1    , -1    , -1    , -1    , -1    , -1    , //25
-    -1    , -1    , -1    , -1    , -1    , -1    , -1    , -1    , //26
-    -1    , -1    , -1    , -1    , -1    , -1    , -1    , -1    , //27
-    -1    , -1    , -1    , -1    , -1    , -1    , -1    , -1    , //28
-    -1    , -1    , -1    , -1    , -1    , -1    , -1    , -1    , //29
-    -1    , -1    , -1    , -1    , -1    , -1    , -1    , -1    , //30
-    -1    , -1    , -1    , -1    , -1    , -1    , -1    , -1    , //31
-    BY_IDX, GY_IDX, RY_IDX, -1    , -1    , -1    , -1    , -1    , //32
-    BU_IDX, GU_IDX, RU_IDX, -1    , -1    , -1    , -1    , -1    , //33
-    BV_IDX, GV_IDX, RV_IDX, -1    , -1    , -1    , -1    , -1    , //34
-    };
-
-    dstRange = 0; //FIXME range = 1 is handled elsewhere
-
-    if (!dstRange) {
-        cy = cy * 255 / 219;
-    } else {
-        vr = vr * 224 / 255;
-        ub = ub * 224 / 255;
-        ug = ug * 224 / 255;
-        vg = vg * 224 / 255;
-    }
-    W = ROUNDED_DIV(ONE*ONE*ug, ub);
-    V = ROUNDED_DIV(ONE*ONE*vg, vr);
-    Z = ONE*ONE-W-V;
-
-    Cy = ROUNDED_DIV(cy*Z, ONE);
-    Cu = ROUNDED_DIV(ub*Z, ONE);
-    Cv = ROUNDED_DIV(vr*Z, ONE);
-
-    c->input_rgb2yuv_table[RY_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*V        , Cy);
-    c->input_rgb2yuv_table[GY_IDX] =  ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE  , Cy);
-    c->input_rgb2yuv_table[BY_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*W        , Cy);
-
-    c->input_rgb2yuv_table[RU_IDX] =  ROUNDED_DIV((1 << RGB2YUV_SHIFT)*V        , Cu);
-    c->input_rgb2yuv_table[GU_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE  , Cu);
-    c->input_rgb2yuv_table[BU_IDX] =  ROUNDED_DIV((1 << RGB2YUV_SHIFT)*(Z+W)    , Cu);
-
-    c->input_rgb2yuv_table[RV_IDX] =  ROUNDED_DIV((1 << RGB2YUV_SHIFT)*(V+Z)    , Cv);
-    c->input_rgb2yuv_table[GV_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE  , Cv);
-    c->input_rgb2yuv_table[BV_IDX] =  ROUNDED_DIV((1 << RGB2YUV_SHIFT)*W        , Cv);
-
-    if(/*!dstRange && */!memcmp(table, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], sizeof(ff_yuv2rgb_coeffs[SWS_CS_DEFAULT]))) {
-        c->input_rgb2yuv_table[BY_IDX] =  ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
-        c->input_rgb2yuv_table[BV_IDX] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
-        c->input_rgb2yuv_table[BU_IDX] =  ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
-        c->input_rgb2yuv_table[GY_IDX] =  ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
-        c->input_rgb2yuv_table[GV_IDX] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
-        c->input_rgb2yuv_table[GU_IDX] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
-        c->input_rgb2yuv_table[RY_IDX] =  ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
-        c->input_rgb2yuv_table[RV_IDX] =  ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
-        c->input_rgb2yuv_table[RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
-    }
-    for(i=0; i<FF_ARRAY_ELEMS(map); i++)
-        AV_WL16(p + 16*4 + 2*i, map[i] >= 0 ? c->input_rgb2yuv_table[map[i]] : 0);
-}
-
-static void fill_xyztables(struct SwsContext *c)
-{
-    int i;
-    double xyzgamma = XYZ_GAMMA;
-    double rgbgamma = 1.0 / RGB_GAMMA;
-    double xyzgammainv = 1.0 / XYZ_GAMMA;
-    double rgbgammainv = RGB_GAMMA;
-    static const int16_t xyz2rgb_matrix[3][4] = {
-        {13270, -6295, -2041},
-        {-3969,  7682,   170},
-        {  228,  -835,  4329} };
-    static const int16_t rgb2xyz_matrix[3][4] = {
-        {1689, 1464,  739},
-        { 871, 2929,  296},
-        {  79,  488, 3891} };
-    static int16_t xyzgamma_tab[4096], rgbgamma_tab[4096], xyzgammainv_tab[4096], rgbgammainv_tab[4096];
-
-    memcpy(c->xyz2rgb_matrix, xyz2rgb_matrix, sizeof(c->xyz2rgb_matrix));
-    memcpy(c->rgb2xyz_matrix, rgb2xyz_matrix, sizeof(c->rgb2xyz_matrix));
-    c->xyzgamma = xyzgamma_tab;
-    c->rgbgamma = rgbgamma_tab;
-    c->xyzgammainv = xyzgammainv_tab;
-    c->rgbgammainv = rgbgammainv_tab;
-
-    if (rgbgamma_tab[4095])
-        return;
-
-    /* set gamma vectors */
-    for (i = 0; i < 4096; i++) {
-        xyzgamma_tab[i] = lrint(pow(i / 4095.0, xyzgamma) * 4095.0);
-        rgbgamma_tab[i] = lrint(pow(i / 4095.0, rgbgamma) * 4095.0);
-        xyzgammainv_tab[i] = lrint(pow(i / 4095.0, xyzgammainv) * 4095.0);
-        rgbgammainv_tab[i] = lrint(pow(i / 4095.0, rgbgammainv) * 4095.0);
-    }
-}
-
-int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4],
-                             int srcRange, const int table[4], int dstRange,
-                             int brightness, int contrast, int saturation)
-{
-    const AVPixFmtDescriptor *desc_dst;
-    const AVPixFmtDescriptor *desc_src;
-    memmove(c->srcColorspaceTable, inv_table, sizeof(int) * 4);
-    memmove(c->dstColorspaceTable, table, sizeof(int) * 4);
-
-    handle_formats(c);
-    desc_dst = av_pix_fmt_desc_get(c->dstFormat);
-    desc_src = av_pix_fmt_desc_get(c->srcFormat);
-
-    if(!isYUV(c->dstFormat) && !isGray(c->dstFormat))
-        dstRange = 0;
-    if(!isYUV(c->srcFormat) && !isGray(c->srcFormat))
-        srcRange = 0;
-
-    c->brightness = brightness;
-    c->contrast   = contrast;
-    c->saturation = saturation;
-    c->srcRange   = srcRange;
-    c->dstRange   = dstRange;
-
-    if ((isYUV(c->dstFormat) || isGray(c->dstFormat)) && (isYUV(c->srcFormat) || isGray(c->srcFormat)))
-        return -1;
-
-    c->dstFormatBpp = av_get_bits_per_pixel(desc_dst);
-    c->srcFormatBpp = av_get_bits_per_pixel(desc_src);
-
-    if (!isYUV(c->dstFormat) && !isGray(c->dstFormat)) {
-        ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness,
-                                 contrast, saturation);
-        // FIXME factorize
-
-        if (ARCH_PPC)
-            ff_yuv2rgb_init_tables_ppc(c, inv_table, brightness,
-                                       contrast, saturation);
-    }
-
-    fill_rgb2yuv_table(c, table, dstRange);
-
-    return 0;
-}
-
-int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table,
-                             int *srcRange, int **table, int *dstRange,
-                             int *brightness, int *contrast, int *saturation)
-{
-    if (!c )
-        return -1;
-
-    *inv_table  = c->srcColorspaceTable;
-    *table      = c->dstColorspaceTable;
-    *srcRange   = c->srcRange;
-    *dstRange   = c->dstRange;
-    *brightness = c->brightness;
-    *contrast   = c->contrast;
-    *saturation = c->saturation;
-
-    return 0;
-}
-
-static int handle_jpeg(enum AVPixelFormat *format)
-{
-    switch (*format) {
-    case AV_PIX_FMT_YUVJ420P:
-        *format = AV_PIX_FMT_YUV420P;
-        return 1;
-    case AV_PIX_FMT_YUVJ411P:
-        *format = AV_PIX_FMT_YUV411P;
-        return 1;
-    case AV_PIX_FMT_YUVJ422P:
-        *format = AV_PIX_FMT_YUV422P;
-        return 1;
-    case AV_PIX_FMT_YUVJ444P:
-        *format = AV_PIX_FMT_YUV444P;
-        return 1;
-    case AV_PIX_FMT_YUVJ440P:
-        *format = AV_PIX_FMT_YUV440P;
-        return 1;
-    case AV_PIX_FMT_GRAY8:
-        return 1;
-    default:
-        return 0;
-    }
-}
-
-static int handle_0alpha(enum AVPixelFormat *format)
-{
-    switch (*format) {
-    case AV_PIX_FMT_0BGR    : *format = AV_PIX_FMT_ABGR   ; return 1;
-    case AV_PIX_FMT_BGR0    : *format = AV_PIX_FMT_BGRA   ; return 4;
-    case AV_PIX_FMT_0RGB    : *format = AV_PIX_FMT_ARGB   ; return 1;
-    case AV_PIX_FMT_RGB0    : *format = AV_PIX_FMT_RGBA   ; return 4;
-    default:                                          return 0;
-    }
-}
-
-static int handle_xyz(enum AVPixelFormat *format)
-{
-    switch (*format) {
-    case AV_PIX_FMT_XYZ12BE : *format = AV_PIX_FMT_RGB48BE; return 1;
-    case AV_PIX_FMT_XYZ12LE : *format = AV_PIX_FMT_RGB48LE; return 1;
-    default:                                                return 0;
-    }
-}
-
-static void handle_formats(SwsContext *c)
-{
-    c->src0Alpha |= handle_0alpha(&c->srcFormat);
-    c->dst0Alpha |= handle_0alpha(&c->dstFormat);
-    c->srcXYZ    |= handle_xyz(&c->srcFormat);
-    c->dstXYZ    |= handle_xyz(&c->dstFormat);
-    if (c->srcXYZ || c->dstXYZ)
-        fill_xyztables(c);
-}
-
-SwsContext *sws_alloc_context(void)
-{
-    SwsContext *c = av_mallocz(sizeof(SwsContext));
-
-    if (c) {
-        c->av_class = &sws_context_class;
-        av_opt_set_defaults(c);
-    }
-
-    return c;
-}
-
-av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
-                             SwsFilter *dstFilter)
-{
-    int i, j;
-    int usesVFilter, usesHFilter;
-    int unscaled;
-    SwsFilter dummyFilter = { NULL, NULL, NULL, NULL };
-    int srcW              = c->srcW;
-    int srcH              = c->srcH;
-    int dstW              = c->dstW;
-    int dstH              = c->dstH;
-    int dst_stride        = FFALIGN(dstW * sizeof(int16_t) + 66, 16);
-    int flags, cpu_flags;
-    enum AVPixelFormat srcFormat = c->srcFormat;
-    enum AVPixelFormat dstFormat = c->dstFormat;
-    const AVPixFmtDescriptor *desc_src;
-    const AVPixFmtDescriptor *desc_dst;
-
-    cpu_flags = av_get_cpu_flags();
-    flags     = c->flags;
-    emms_c();
-    if (!rgb15to16)
-        sws_rgb2rgb_init();
-
-    unscaled = (srcW == dstW && srcH == dstH);
-
-    c->srcRange |= handle_jpeg(&c->srcFormat);
-    c->dstRange |= handle_jpeg(&c->dstFormat);
-
-    if(srcFormat!=c->srcFormat || dstFormat!=c->dstFormat)
-        av_log(c, AV_LOG_WARNING, "deprecated pixel format used, make sure you did set range correctly\n");
-
-    if (!c->contrast && !c->saturation && !c->dstFormatBpp)
-        sws_setColorspaceDetails(c, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], c->srcRange,
-                                 ff_yuv2rgb_coeffs[SWS_CS_DEFAULT],
-                                 c->dstRange, 0, 1 << 16, 1 << 16);
-
-    handle_formats(c);
-    srcFormat = c->srcFormat;
-    dstFormat = c->dstFormat;
-    desc_src = av_pix_fmt_desc_get(srcFormat);
-    desc_dst = av_pix_fmt_desc_get(dstFormat);
-
-    if (!(unscaled && sws_isSupportedEndiannessConversion(srcFormat) &&
-          av_pix_fmt_swap_endianness(srcFormat) == dstFormat)) {
-    if (!sws_isSupportedInput(srcFormat)) {
-        av_log(c, AV_LOG_ERROR, "%s is not supported as input pixel format\n",
-               av_get_pix_fmt_name(srcFormat));
-        return AVERROR(EINVAL);
-    }
-    if (!sws_isSupportedOutput(dstFormat)) {
-        av_log(c, AV_LOG_ERROR, "%s is not supported as output pixel format\n",
-               av_get_pix_fmt_name(dstFormat));
-        return AVERROR(EINVAL);
-    }
-    }
-
-    i = flags & (SWS_POINT         |
-                 SWS_AREA          |
-                 SWS_BILINEAR      |
-                 SWS_FAST_BILINEAR |
-                 SWS_BICUBIC       |
-                 SWS_X             |
-                 SWS_GAUSS         |
-                 SWS_LANCZOS       |
-                 SWS_SINC          |
-                 SWS_SPLINE        |
-                 SWS_BICUBLIN);
-
-    /* provide a default scaler if not set by caller */
-    if (!i) {
-        if (dstW < srcW && dstH < srcH)
-            flags |= SWS_BICUBIC;
-        else if (dstW > srcW && dstH > srcH)
-            flags |= SWS_BICUBIC;
-        else
-            flags |= SWS_BICUBIC;
-        c->flags = flags;
-    } else if (i & (i - 1)) {
-        av_log(c, AV_LOG_ERROR,
-               "Exactly one scaler algorithm must be chosen, got %X\n", i);
-        return AVERROR(EINVAL);
-    }
-    /* sanity check */
-    if (srcW < 1 || srcH < 1 || dstW < 1 || dstH < 1) {
-        /* FIXME check if these are enough and try to lower them after
-         * fixing the relevant parts of the code */
-        av_log(c, AV_LOG_ERROR, "%dx%d -> %dx%d is invalid scaling dimension\n",
-               srcW, srcH, dstW, dstH);
-        return AVERROR(EINVAL);
-    }
-
-    if (!dstFilter)
-        dstFilter = &dummyFilter;
-    if (!srcFilter)
-        srcFilter = &dummyFilter;
-
-    c->lumXInc      = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW;
-    c->lumYInc      = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH;
-    c->dstFormatBpp = av_get_bits_per_pixel(desc_dst);
-    c->srcFormatBpp = av_get_bits_per_pixel(desc_src);
-    c->vRounder     = 4 * 0x0001000100010001ULL;
-
-    usesVFilter = (srcFilter->lumV && srcFilter->lumV->length > 1) ||
-                  (srcFilter->chrV && srcFilter->chrV->length > 1) ||
-                  (dstFilter->lumV && dstFilter->lumV->length > 1) ||
-                  (dstFilter->chrV && dstFilter->chrV->length > 1);
-    usesHFilter = (srcFilter->lumH && srcFilter->lumH->length > 1) ||
-                  (srcFilter->chrH && srcFilter->chrH->length > 1) ||
-                  (dstFilter->lumH && dstFilter->lumH->length > 1) ||
-                  (dstFilter->chrH && dstFilter->chrH->length > 1);
-
-    av_pix_fmt_get_chroma_sub_sample(srcFormat, &c->chrSrcHSubSample, &c->chrSrcVSubSample);
-    av_pix_fmt_get_chroma_sub_sample(dstFormat, &c->chrDstHSubSample, &c->chrDstVSubSample);
-
-    if (isAnyRGB(dstFormat) && !(flags&SWS_FULL_CHR_H_INT)) {
-        if (dstW&1) {
-            av_log(c, AV_LOG_DEBUG, "Forcing full internal H chroma due to odd output size\n");
-            flags |= SWS_FULL_CHR_H_INT;
-            c->flags = flags;
-        }
-
-        if (   c->chrSrcHSubSample == 0
-            && c->chrSrcVSubSample == 0
-            && c->dither != SWS_DITHER_BAYER //SWS_FULL_CHR_H_INT is currently not supported with SWS_DITHER_BAYER
-            && !(c->flags & SWS_FAST_BILINEAR)
-        ) {
-            av_log(c, AV_LOG_DEBUG, "Forcing full internal H chroma due to input having non subsampled chroma\n");
-            flags |= SWS_FULL_CHR_H_INT;
-            c->flags = flags;
-        }
-    }
-
-    if (c->dither == SWS_DITHER_AUTO) {
-        if (flags & SWS_ERROR_DIFFUSION)
-            c->dither = SWS_DITHER_ED;
-    }
-
-    if(dstFormat == AV_PIX_FMT_BGR4_BYTE ||
-       dstFormat == AV_PIX_FMT_RGB4_BYTE ||
-       dstFormat == AV_PIX_FMT_BGR8 ||
-       dstFormat == AV_PIX_FMT_RGB8) {
-        if (c->dither == SWS_DITHER_AUTO)
-            c->dither = (flags & SWS_FULL_CHR_H_INT) ? SWS_DITHER_ED : SWS_DITHER_BAYER;
-        if (!(flags & SWS_FULL_CHR_H_INT)) {
-            if (c->dither == SWS_DITHER_ED) {
-                av_log(c, AV_LOG_DEBUG,
-                    "Desired dithering only supported in full chroma interpolation for destination format '%s'\n",
-                    av_get_pix_fmt_name(dstFormat));
-                flags   |= SWS_FULL_CHR_H_INT;
-                c->flags = flags;
-            }
-        }
-        if (flags & SWS_FULL_CHR_H_INT) {
-            if (c->dither == SWS_DITHER_BAYER) {
-                av_log(c, AV_LOG_DEBUG,
-                    "Ordered dither is not supported in full chroma interpolation for destination format '%s'\n",
-                    av_get_pix_fmt_name(dstFormat));
-                c->dither = SWS_DITHER_ED;
-            }
-        }
-    }
-    if (isPlanarRGB(dstFormat)) {
-        if (!(flags & SWS_FULL_CHR_H_INT)) {
-            av_log(c, AV_LOG_DEBUG,
-                   "%s output is not supported with half chroma resolution, switching to full\n",
-                   av_get_pix_fmt_name(dstFormat));
-            flags   |= SWS_FULL_CHR_H_INT;
-            c->flags = flags;
-        }
-    }
-
-    /* reuse chroma for 2 pixels RGB/BGR unless user wants full
-     * chroma interpolation */
-    if (flags & SWS_FULL_CHR_H_INT &&
-        isAnyRGB(dstFormat)        &&
-        !isPlanarRGB(dstFormat)    &&
-        dstFormat != AV_PIX_FMT_RGBA  &&
-        dstFormat != AV_PIX_FMT_ARGB  &&
-        dstFormat != AV_PIX_FMT_BGRA  &&
-        dstFormat != AV_PIX_FMT_ABGR  &&
-        dstFormat != AV_PIX_FMT_RGB24 &&
-        dstFormat != AV_PIX_FMT_BGR24 &&
-        dstFormat != AV_PIX_FMT_BGR4_BYTE &&
-        dstFormat != AV_PIX_FMT_RGB4_BYTE &&
-        dstFormat != AV_PIX_FMT_BGR8 &&
-        dstFormat != AV_PIX_FMT_RGB8
-    ) {
-        av_log(c, AV_LOG_WARNING,
-               "full chroma interpolation for destination format '%s' not yet implemented\n",
-               av_get_pix_fmt_name(dstFormat));
-        flags   &= ~SWS_FULL_CHR_H_INT;
-        c->flags = flags;
-    }
-    if (isAnyRGB(dstFormat) && !(flags & SWS_FULL_CHR_H_INT))
-        c->chrDstHSubSample = 1;
-
-    // drop some chroma lines if the user wants it
-    c->vChrDrop          = (flags & SWS_SRC_V_CHR_DROP_MASK) >>
-                           SWS_SRC_V_CHR_DROP_SHIFT;
-    c->chrSrcVSubSample += c->vChrDrop;
-
-    /* drop every other pixel for chroma calculation unless user
-     * wants full chroma */
-    if (isAnyRGB(srcFormat) && !(flags & SWS_FULL_CHR_H_INP)   &&
-        srcFormat != AV_PIX_FMT_RGB8 && srcFormat != AV_PIX_FMT_BGR8 &&
-        srcFormat != AV_PIX_FMT_RGB4 && srcFormat != AV_PIX_FMT_BGR4 &&
-        srcFormat != AV_PIX_FMT_RGB4_BYTE && srcFormat != AV_PIX_FMT_BGR4_BYTE &&
-        srcFormat != AV_PIX_FMT_GBRP9BE   && srcFormat != AV_PIX_FMT_GBRP9LE  &&
-        srcFormat != AV_PIX_FMT_GBRP10BE  && srcFormat != AV_PIX_FMT_GBRP10LE &&
-        srcFormat != AV_PIX_FMT_GBRP12BE  && srcFormat != AV_PIX_FMT_GBRP12LE &&
-        srcFormat != AV_PIX_FMT_GBRP14BE  && srcFormat != AV_PIX_FMT_GBRP14LE &&
-        srcFormat != AV_PIX_FMT_GBRP16BE  && srcFormat != AV_PIX_FMT_GBRP16LE &&
-        ((dstW >> c->chrDstHSubSample) <= (srcW >> 1) ||
-         (flags & SWS_FAST_BILINEAR)))
-        c->chrSrcHSubSample = 1;
-
-    // Note the FF_CEIL_RSHIFT is so that we always round toward +inf.
-    c->chrSrcW = FF_CEIL_RSHIFT(srcW, c->chrSrcHSubSample);
-    c->chrSrcH = FF_CEIL_RSHIFT(srcH, c->chrSrcVSubSample);
-    c->chrDstW = FF_CEIL_RSHIFT(dstW, c->chrDstHSubSample);
-    c->chrDstH = FF_CEIL_RSHIFT(dstH, c->chrDstVSubSample);
-
-    FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail);
-
-    /* unscaled special cases */
-    if (unscaled && !usesHFilter && !usesVFilter &&
-        (c->srcRange == c->dstRange || isAnyRGB(dstFormat))) {
-        ff_get_unscaled_swscale(c);
-
-        if (c->swscale) {
-            if (flags & SWS_PRINT_INFO)
-                av_log(c, AV_LOG_INFO,
-                       "using unscaled %s -> %s special converter\n",
-                       av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat));
-            return 0;
-        }
-    }
-
-    c->srcBpc = 1 + desc_src->comp[0].depth_minus1;
-    if (c->srcBpc < 8)
-        c->srcBpc = 8;
-    c->dstBpc = 1 + desc_dst->comp[0].depth_minus1;
-    if (c->dstBpc < 8)
-        c->dstBpc = 8;
-    if (isAnyRGB(srcFormat) || srcFormat == AV_PIX_FMT_PAL8)
-        c->srcBpc = 16;
-    if (c->dstBpc == 16)
-        dst_stride <<= 1;
-
-    if (INLINE_MMXEXT(cpu_flags) && c->srcBpc == 8 && c->dstBpc <= 14) {
-        c->canMMXEXTBeUsed = dstW >= srcW && (dstW & 31) == 0 &&
-                             c->chrDstW >= c->chrSrcW &&
-                             (srcW & 15) == 0;
-        if (!c->canMMXEXTBeUsed && dstW >= srcW && c->chrDstW >= c->chrSrcW && (srcW & 15) == 0
-
-            && (flags & SWS_FAST_BILINEAR)) {
-            if (flags & SWS_PRINT_INFO)
-                av_log(c, AV_LOG_INFO,
-                       "output width is not a multiple of 32 -> no MMXEXT scaler\n");
-        }
-        if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat) || isAnyRGB(c->srcFormat))
-            c->canMMXEXTBeUsed = 0;
-    } else
-        c->canMMXEXTBeUsed = 0;
-
-    c->chrXInc = (((int64_t)c->chrSrcW << 16) + (c->chrDstW >> 1)) / c->chrDstW;
-    c->chrYInc = (((int64_t)c->chrSrcH << 16) + (c->chrDstH >> 1)) / c->chrDstH;
-
-    /* Match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src
-     * to pixel n-2 of dst, but only for the FAST_BILINEAR mode otherwise do
-     * correct scaling.
-     * n-2 is the last chrominance sample available.
-     * This is not perfect, but no one should notice the difference, the more
-     * correct variant would be like the vertical one, but that would require
-     * some special code for the first and last pixel */
-    if (flags & SWS_FAST_BILINEAR) {
-        if (c->canMMXEXTBeUsed) {
-            c->lumXInc += 20;
-            c->chrXInc += 20;
-        }
-        // we don't use the x86 asm scaler if MMX is available
-        else if (INLINE_MMX(cpu_flags) && c->dstBpc <= 14) {
-            c->lumXInc = ((int64_t)(srcW       - 2) << 16) / (dstW       - 2) - 20;
-            c->chrXInc = ((int64_t)(c->chrSrcW - 2) << 16) / (c->chrDstW - 2) - 20;
-        }
-    }
-
-#define USE_MMAP (HAVE_MMAP && HAVE_MPROTECT && defined MAP_ANONYMOUS)
-
-    /* precalculate horizontal scaler filter coefficients */
-    {
-#if HAVE_MMXEXT_INLINE
-// can't downscale !!!
-        if (c->canMMXEXTBeUsed && (flags & SWS_FAST_BILINEAR)) {
-            c->lumMmxextFilterCodeSize = init_hscaler_mmxext(dstW, c->lumXInc, NULL,
-                                                             NULL, NULL, 8);
-            c->chrMmxextFilterCodeSize = init_hscaler_mmxext(c->chrDstW, c->chrXInc,
-                                                             NULL, NULL, NULL, 4);
-
-#if USE_MMAP
-            c->lumMmxextFilterCode = mmap(NULL, c->lumMmxextFilterCodeSize,
-                                          PROT_READ | PROT_WRITE,
-                                          MAP_PRIVATE | MAP_ANONYMOUS,
-                                          -1, 0);
-            c->chrMmxextFilterCode = mmap(NULL, c->chrMmxextFilterCodeSize,
-                                          PROT_READ | PROT_WRITE,
-                                          MAP_PRIVATE | MAP_ANONYMOUS,
-                                          -1, 0);
-#elif HAVE_VIRTUALALLOC
-            c->lumMmxextFilterCode = VirtualAlloc(NULL,
-                                                  c->lumMmxextFilterCodeSize,
-                                                  MEM_COMMIT,
-                                                  PAGE_EXECUTE_READWRITE);
-            c->chrMmxextFilterCode = VirtualAlloc(NULL,
-                                                  c->chrMmxextFilterCodeSize,
-                                                  MEM_COMMIT,
-                                                  PAGE_EXECUTE_READWRITE);
-#else
-            c->lumMmxextFilterCode = av_malloc(c->lumMmxextFilterCodeSize);
-            c->chrMmxextFilterCode = av_malloc(c->chrMmxextFilterCodeSize);
-#endif
-
-#ifdef MAP_ANONYMOUS
-            if (c->lumMmxextFilterCode == MAP_FAILED || c->chrMmxextFilterCode == MAP_FAILED)
-#else
-            if (!c->lumMmxextFilterCode || !c->chrMmxextFilterCode)
-#endif
-            {
-                av_log(c, AV_LOG_ERROR, "Failed to allocate MMX2FilterCode\n");
-                return AVERROR(ENOMEM);
-            }
-
-            FF_ALLOCZ_OR_GOTO(c, c->hLumFilter,    (dstW           / 8 + 8) * sizeof(int16_t), fail);
-            FF_ALLOCZ_OR_GOTO(c, c->hChrFilter,    (c->chrDstW     / 4 + 8) * sizeof(int16_t), fail);
-            FF_ALLOCZ_OR_GOTO(c, c->hLumFilterPos, (dstW       / 2 / 8 + 8) * sizeof(int32_t), fail);
-            FF_ALLOCZ_OR_GOTO(c, c->hChrFilterPos, (c->chrDstW / 2 / 4 + 8) * sizeof(int32_t), fail);
-
-            init_hscaler_mmxext(      dstW, c->lumXInc, c->lumMmxextFilterCode,
-                                c->hLumFilter, (uint32_t*)c->hLumFilterPos, 8);
-            init_hscaler_mmxext(c->chrDstW, c->chrXInc, c->chrMmxextFilterCode,
-                                c->hChrFilter, (uint32_t*)c->hChrFilterPos, 4);
-
-#if USE_MMAP
-            if (   mprotect(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize, PROT_EXEC | PROT_READ) == -1
-                || mprotect(c->chrMmxextFilterCode, c->chrMmxextFilterCodeSize, PROT_EXEC | PROT_READ) == -1) {
-                av_log(c, AV_LOG_ERROR, "mprotect failed, cannot use fast bilinear scaler\n");
-                goto fail;
-            }
-#endif
-        } else
-#endif /* HAVE_MMXEXT_INLINE */
-        {
-            const int filterAlign = X86_MMX(cpu_flags)     ? 4 :
-                                    PPC_ALTIVEC(cpu_flags) ? 8 : 1;
-
-            if (initFilter(&c->hLumFilter, &c->hLumFilterPos,
-                           &c->hLumFilterSize, c->lumXInc,
-                           srcW, dstW, filterAlign, 1 << 14,
-                           (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags,
-                           cpu_flags, srcFilter->lumH, dstFilter->lumH,
-                           c->param,
-                           get_local_pos(c, 0, 0, 0),
-                           get_local_pos(c, 0, 0, 0)) < 0)
-                goto fail;
-            if (initFilter(&c->hChrFilter, &c->hChrFilterPos,
-                           &c->hChrFilterSize, c->chrXInc,
-                           c->chrSrcW, c->chrDstW, filterAlign, 1 << 14,
-                           (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags,
-                           cpu_flags, srcFilter->chrH, dstFilter->chrH,
-                           c->param,
-                           get_local_pos(c, c->chrSrcHSubSample, c->src_h_chr_pos, 0),
-                           get_local_pos(c, c->chrDstHSubSample, c->dst_h_chr_pos, 0)) < 0)
-                goto fail;
-        }
-    } // initialize horizontal stuff
-
-    /* precalculate vertical scaler filter coefficients */
-    {
-        const int filterAlign = X86_MMX(cpu_flags)     ? 2 :
-                                PPC_ALTIVEC(cpu_flags) ? 8 : 1;
-
-        if (initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize,
-                       c->lumYInc, srcH, dstH, filterAlign, (1 << 12),
-                       (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags,
-                       cpu_flags, srcFilter->lumV, dstFilter->lumV,
-                       c->param,
-                       get_local_pos(c, 0, 0, 1),
-                       get_local_pos(c, 0, 0, 1)) < 0)
-            goto fail;
-        if (initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize,
-                       c->chrYInc, c->chrSrcH, c->chrDstH,
-                       filterAlign, (1 << 12),
-                       (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags,
-                       cpu_flags, srcFilter->chrV, dstFilter->chrV,
-                       c->param,
-                       get_local_pos(c, c->chrSrcVSubSample, c->src_v_chr_pos, 1),
-                       get_local_pos(c, c->chrDstVSubSample, c->dst_v_chr_pos, 1)) < 0)
-
-            goto fail;
-
-#if HAVE_ALTIVEC
-        FF_ALLOC_OR_GOTO(c, c->vYCoeffsBank, sizeof(vector signed short) * c->vLumFilterSize * c->dstH,    fail);
-        FF_ALLOC_OR_GOTO(c, c->vCCoeffsBank, sizeof(vector signed short) * c->vChrFilterSize * c->chrDstH, fail);
-
-        for (i = 0; i < c->vLumFilterSize * c->dstH; i++) {
-            int j;
-            short *p = (short *)&c->vYCoeffsBank[i];
-            for (j = 0; j < 8; j++)
-                p[j] = c->vLumFilter[i];
-        }
-
-        for (i = 0; i < c->vChrFilterSize * c->chrDstH; i++) {
-            int j;
-            short *p = (short *)&c->vCCoeffsBank[i];
-            for (j = 0; j < 8; j++)
-                p[j] = c->vChrFilter[i];
-        }
-#endif
-    }
-
-    // calculate buffer sizes so that they won't run out while handling these damn slices
-    c->vLumBufSize = c->vLumFilterSize;
-    c->vChrBufSize = c->vChrFilterSize;
-    for (i = 0; i < dstH; i++) {
-        int chrI      = (int64_t)i * c->chrDstH / dstH;
-        int nextSlice = FFMAX(c->vLumFilterPos[i] + c->vLumFilterSize - 1,
-                              ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)
-                               << c->chrSrcVSubSample));
-
-        nextSlice >>= c->chrSrcVSubSample;
-        nextSlice <<= c->chrSrcVSubSample;
-        if (c->vLumFilterPos[i] + c->vLumBufSize < nextSlice)
-            c->vLumBufSize = nextSlice - c->vLumFilterPos[i];
-        if (c->vChrFilterPos[chrI] + c->vChrBufSize <
-            (nextSlice >> c->chrSrcVSubSample))
-            c->vChrBufSize = (nextSlice >> c->chrSrcVSubSample) -
-                             c->vChrFilterPos[chrI];
-    }
-
-    for (i = 0; i < 4; i++)
-        FF_ALLOCZ_OR_GOTO(c, c->dither_error[i], (c->dstW+2) * sizeof(int), fail);
-
-    /* Allocate pixbufs (we use dynamic allocation because otherwise we would
-     * need to allocate several megabytes to handle all possible cases) */
-    FF_ALLOC_OR_GOTO(c, c->lumPixBuf,  c->vLumBufSize * 3 * sizeof(int16_t *), fail);
-    FF_ALLOC_OR_GOTO(c, c->chrUPixBuf, c->vChrBufSize * 3 * sizeof(int16_t *), fail);
-    FF_ALLOC_OR_GOTO(c, c->chrVPixBuf, c->vChrBufSize * 3 * sizeof(int16_t *), fail);
-    if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat) && isALPHA(c->dstFormat))
-        FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf, c->vLumBufSize * 3 * sizeof(int16_t *), fail);
-    /* Note we need at least one pixel more at the end because of the MMX code
-     * (just in case someone wants to replace the 4000/8000). */
-    /* align at 16 bytes for AltiVec */
-    for (i = 0; i < c->vLumBufSize; i++) {
-        FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf[i + c->vLumBufSize],
-                          dst_stride + 16, fail);
-        c->lumPixBuf[i] = c->lumPixBuf[i + c->vLumBufSize];
-    }
-    // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate)
-    c->uv_off   = (dst_stride>>1) + 64 / (c->dstBpc &~ 7);
-    c->uv_offx2 = dst_stride + 16;
-    for (i = 0; i < c->vChrBufSize; i++) {
-        FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i + c->vChrBufSize],
-                         dst_stride * 2 + 32, fail);
-        c->chrUPixBuf[i] = c->chrUPixBuf[i + c->vChrBufSize];
-        c->chrVPixBuf[i] = c->chrVPixBuf[i + c->vChrBufSize]
-                         = c->chrUPixBuf[i] + (dst_stride >> 1) + 8;
-    }
-    if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
-        for (i = 0; i < c->vLumBufSize; i++) {
-            FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf[i + c->vLumBufSize],
-                              dst_stride + 16, fail);
-            c->alpPixBuf[i] = c->alpPixBuf[i + c->vLumBufSize];
-        }
-
-    // try to avoid drawing green stuff between the right end and the stride end
-    for (i = 0; i < c->vChrBufSize; i++)
-        if(desc_dst->comp[0].depth_minus1 == 15){
-            av_assert0(c->dstBpc > 14);
-            for(j=0; j<dst_stride/2+1; j++)
-                ((int32_t*)(c->chrUPixBuf[i]))[j] = 1<<18;
-        } else
-            for(j=0; j<dst_stride+1; j++)
-                ((int16_t*)(c->chrUPixBuf[i]))[j] = 1<<14;
-
-    av_assert0(c->chrDstH <= dstH);
-
-    if (flags & SWS_PRINT_INFO) {
-        const char *scaler = NULL, *cpucaps;
-
-        for (i = 0; i < FF_ARRAY_ELEMS(scale_algorithms); i++) {
-            if (flags & scale_algorithms[i].flag) {
-                scaler = scale_algorithms[i].description;
-                break;
-            }
-        }
-        if (!scaler)
-            scaler =  "ehh flags invalid?!";
-        av_log(c, AV_LOG_INFO, "%s scaler, from %s to %s%s ",
-               scaler,
-               av_get_pix_fmt_name(srcFormat),
-#ifdef DITHER1XBPP
-               dstFormat == AV_PIX_FMT_BGR555   || dstFormat == AV_PIX_FMT_BGR565   ||
-               dstFormat == AV_PIX_FMT_RGB444BE || dstFormat == AV_PIX_FMT_RGB444LE ||
-               dstFormat == AV_PIX_FMT_BGR444BE || dstFormat == AV_PIX_FMT_BGR444LE ?
-                                                             "dithered " : "",
-#else
-               "",
-#endif
-               av_get_pix_fmt_name(dstFormat));
-
-        if (INLINE_MMXEXT(cpu_flags))
-            cpucaps = "MMXEXT";
-        else if (INLINE_AMD3DNOW(cpu_flags))
-            cpucaps = "3DNOW";
-        else if (INLINE_MMX(cpu_flags))
-            cpucaps = "MMX";
-        else if (PPC_ALTIVEC(cpu_flags))
-            cpucaps = "AltiVec";
-        else
-            cpucaps = "C";
-
-        av_log(c, AV_LOG_INFO, "using %s\n", cpucaps);
-
-        av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
-        av_log(c, AV_LOG_DEBUG,
-               "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
-               c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
-        av_log(c, AV_LOG_DEBUG,
-               "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
-               c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH,
-               c->chrXInc, c->chrYInc);
-    }
-
-    c->swscale = ff_getSwsFunc(c);
-    return 0;
-fail: // FIXME replace things by appropriate error codes
-    return -1;
-}
-
-#if FF_API_SWS_GETCONTEXT
-SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat,
-                           int dstW, int dstH, enum AVPixelFormat dstFormat,
-                           int flags, SwsFilter *srcFilter,
-                           SwsFilter *dstFilter, const double *param)
-{
-    SwsContext *c;
-
-    if (!(c = sws_alloc_context()))
-        return NULL;
-
-    c->flags     = flags;
-    c->srcW      = srcW;
-    c->srcH      = srcH;
-    c->dstW      = dstW;
-    c->dstH      = dstH;
-    c->srcFormat = srcFormat;
-    c->dstFormat = dstFormat;
-
-    if (param) {
-        c->param[0] = param[0];
-        c->param[1] = param[1];
-    }
-
-    if (sws_init_context(c, srcFilter, dstFilter) < 0) {
-        sws_freeContext(c);
-        return NULL;
-    }
-
-    return c;
-}
-#endif
-
-SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
-                                float lumaSharpen, float chromaSharpen,
-                                float chromaHShift, float chromaVShift,
-                                int verbose)
-{
-    SwsFilter *filter = av_malloc(sizeof(SwsFilter));
-    if (!filter)
-        return NULL;
-
-    if (lumaGBlur != 0.0) {
-        filter->lumH = sws_getGaussianVec(lumaGBlur, 3.0);
-        filter->lumV = sws_getGaussianVec(lumaGBlur, 3.0);
-    } else {
-        filter->lumH = sws_getIdentityVec();
-        filter->lumV = sws_getIdentityVec();
-    }
-
-    if (chromaGBlur != 0.0) {
-        filter->chrH = sws_getGaussianVec(chromaGBlur, 3.0);
-        filter->chrV = sws_getGaussianVec(chromaGBlur, 3.0);
-    } else {
-        filter->chrH = sws_getIdentityVec();
-        filter->chrV = sws_getIdentityVec();
-    }
-
-    if (chromaSharpen != 0.0) {
-        SwsVector *id = sws_getIdentityVec();
-        sws_scaleVec(filter->chrH, -chromaSharpen);
-        sws_scaleVec(filter->chrV, -chromaSharpen);
-        sws_addVec(filter->chrH, id);
-        sws_addVec(filter->chrV, id);
-        sws_freeVec(id);
-    }
-
-    if (lumaSharpen != 0.0) {
-        SwsVector *id = sws_getIdentityVec();
-        sws_scaleVec(filter->lumH, -lumaSharpen);
-        sws_scaleVec(filter->lumV, -lumaSharpen);
-        sws_addVec(filter->lumH, id);
-        sws_addVec(filter->lumV, id);
-        sws_freeVec(id);
-    }
-
-    if (chromaHShift != 0.0)
-        sws_shiftVec(filter->chrH, (int)(chromaHShift + 0.5));
-
-    if (chromaVShift != 0.0)
-        sws_shiftVec(filter->chrV, (int)(chromaVShift + 0.5));
-
-    sws_normalizeVec(filter->chrH, 1.0);
-    sws_normalizeVec(filter->chrV, 1.0);
-    sws_normalizeVec(filter->lumH, 1.0);
-    sws_normalizeVec(filter->lumV, 1.0);
-
-    if (verbose)
-        sws_printVec2(filter->chrH, NULL, AV_LOG_DEBUG);
-    if (verbose)
-        sws_printVec2(filter->lumH, NULL, AV_LOG_DEBUG);
-
-    return filter;
-}
-
-SwsVector *sws_allocVec(int length)
-{
-    SwsVector *vec;
-
-    if(length <= 0 || length > INT_MAX/ sizeof(double))
-        return NULL;
-
-    vec = av_malloc(sizeof(SwsVector));
-    if (!vec)
-        return NULL;
-    vec->length = length;
-    vec->coeff  = av_malloc(sizeof(double) * length);
-    if (!vec->coeff)
-        av_freep(&vec);
-    return vec;
-}
-
-SwsVector *sws_getGaussianVec(double variance, double quality)
-{
-    const int length = (int)(variance * quality + 0.5) | 1;
-    int i;
-    double middle  = (length - 1) * 0.5;
-    SwsVector *vec;
-
-    if(variance < 0 || quality < 0)
-        return NULL;
-
-    vec = sws_allocVec(length);
-
-    if (!vec)
-        return NULL;
-
-    for (i = 0; i < length; i++) {
-        double dist = i - middle;
-        vec->coeff[i] = exp(-dist * dist / (2 * variance * variance)) /
-                        sqrt(2 * variance * M_PI);
-    }
-
-    sws_normalizeVec(vec, 1.0);
-
-    return vec;
-}
-
-SwsVector *sws_getConstVec(double c, int length)
-{
-    int i;
-    SwsVector *vec = sws_allocVec(length);
-
-    if (!vec)
-        return NULL;
-
-    for (i = 0; i < length; i++)
-        vec->coeff[i] = c;
-
-    return vec;
-}
-
-SwsVector *sws_getIdentityVec(void)
-{
-    return sws_getConstVec(1.0, 1);
-}
-
-static double sws_dcVec(SwsVector *a)
-{
-    int i;
-    double sum = 0;
-
-    for (i = 0; i < a->length; i++)
-        sum += a->coeff[i];
-
-    return sum;
-}
-
-void sws_scaleVec(SwsVector *a, double scalar)
-{
-    int i;
-
-    for (i = 0; i < a->length; i++)
-        a->coeff[i] *= scalar;
-}
-
-void sws_normalizeVec(SwsVector *a, double height)
-{
-    sws_scaleVec(a, height / sws_dcVec(a));
-}
-
-static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b)
-{
-    int length = a->length + b->length - 1;
-    int i, j;
-    SwsVector *vec = sws_getConstVec(0.0, length);
-
-    if (!vec)
-        return NULL;
-
-    for (i = 0; i < a->length; i++) {
-        for (j = 0; j < b->length; j++) {
-            vec->coeff[i + j] += a->coeff[i] * b->coeff[j];
-        }
-    }
-
-    return vec;
-}
-
-static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b)
-{
-    int length = FFMAX(a->length, b->length);
-    int i;
-    SwsVector *vec = sws_getConstVec(0.0, length);
-
-    if (!vec)
-        return NULL;
-
-    for (i = 0; i < a->length; i++)
-        vec->coeff[i + (length - 1) / 2 - (a->length - 1) / 2] += a->coeff[i];
-    for (i = 0; i < b->length; i++)
-        vec->coeff[i + (length - 1) / 2 - (b->length - 1) / 2] += b->coeff[i];
-
-    return vec;
-}
-
-static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b)
-{
-    int length = FFMAX(a->length, b->length);
-    int i;
-    SwsVector *vec = sws_getConstVec(0.0, length);
-
-    if (!vec)
-        return NULL;
-
-    for (i = 0; i < a->length; i++)
-        vec->coeff[i + (length - 1) / 2 - (a->length - 1) / 2] += a->coeff[i];
-    for (i = 0; i < b->length; i++)
-        vec->coeff[i + (length - 1) / 2 - (b->length - 1) / 2] -= b->coeff[i];
-
-    return vec;
-}
-
-/* shift left / or right if "shift" is negative */
-static SwsVector *sws_getShiftedVec(SwsVector *a, int shift)
-{
-    int length = a->length + FFABS(shift) * 2;
-    int i;
-    SwsVector *vec = sws_getConstVec(0.0, length);
-
-    if (!vec)
-        return NULL;
-
-    for (i = 0; i < a->length; i++) {
-        vec->coeff[i + (length    - 1) / 2 -
-                       (a->length - 1) / 2 - shift] = a->coeff[i];
-    }
-
-    return vec;
-}
-
-void sws_shiftVec(SwsVector *a, int shift)
-{
-    SwsVector *shifted = sws_getShiftedVec(a, shift);
-    av_free(a->coeff);
-    a->coeff  = shifted->coeff;
-    a->length = shifted->length;
-    av_free(shifted);
-}
-
-void sws_addVec(SwsVector *a, SwsVector *b)
-{
-    SwsVector *sum = sws_sumVec(a, b);
-    av_free(a->coeff);
-    a->coeff  = sum->coeff;
-    a->length = sum->length;
-    av_free(sum);
-}
-
-void sws_subVec(SwsVector *a, SwsVector *b)
-{
-    SwsVector *diff = sws_diffVec(a, b);
-    av_free(a->coeff);
-    a->coeff  = diff->coeff;
-    a->length = diff->length;
-    av_free(diff);
-}
-
-void sws_convVec(SwsVector *a, SwsVector *b)
-{
-    SwsVector *conv = sws_getConvVec(a, b);
-    av_free(a->coeff);
-    a->coeff  = conv->coeff;
-    a->length = conv->length;
-    av_free(conv);
-}
-
-SwsVector *sws_cloneVec(SwsVector *a)
-{
-    SwsVector *vec = sws_allocVec(a->length);
-
-    if (!vec)
-        return NULL;
-
-    memcpy(vec->coeff, a->coeff, a->length * sizeof(*a->coeff));
-
-    return vec;
-}
-
-void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level)
-{
-    int i;
-    double max = 0;
-    double min = 0;
-    double range;
-
-    for (i = 0; i < a->length; i++)
-        if (a->coeff[i] > max)
-            max = a->coeff[i];
-
-    for (i = 0; i < a->length; i++)
-        if (a->coeff[i] < min)
-            min = a->coeff[i];
-
-    range = max - min;
-
-    for (i = 0; i < a->length; i++) {
-        int x = (int)((a->coeff[i] - min) * 60.0 / range + 0.5);
-        av_log(log_ctx, log_level, "%1.3f ", a->coeff[i]);
-        for (; x > 0; x--)
-            av_log(log_ctx, log_level, " ");
-        av_log(log_ctx, log_level, "|\n");
-    }
-}
-
-void sws_freeVec(SwsVector *a)
-{
-    if (!a)
-        return;
-    av_freep(&a->coeff);
-    a->length = 0;
-    av_free(a);
-}
-
-void sws_freeFilter(SwsFilter *filter)
-{
-    if (!filter)
-        return;
-
-    sws_freeVec(filter->lumH);
-    sws_freeVec(filter->lumV);
-    sws_freeVec(filter->chrH);
-    sws_freeVec(filter->chrV);
-    av_free(filter);
-}
-
-void sws_freeContext(SwsContext *c)
-{
-    int i;
-    if (!c)
-        return;
-
-    if (c->lumPixBuf) {
-        for (i = 0; i < c->vLumBufSize; i++)
-            av_freep(&c->lumPixBuf[i]);
-        av_freep(&c->lumPixBuf);
-    }
-
-    if (c->chrUPixBuf) {
-        for (i = 0; i < c->vChrBufSize; i++)
-            av_freep(&c->chrUPixBuf[i]);
-        av_freep(&c->chrUPixBuf);
-        av_freep(&c->chrVPixBuf);
-    }
-
-    if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-        for (i = 0; i < c->vLumBufSize; i++)
-            av_freep(&c->alpPixBuf[i]);
-        av_freep(&c->alpPixBuf);
-    }
-
-    for (i = 0; i < 4; i++)
-        av_freep(&c->dither_error[i]);
-
-    av_freep(&c->vLumFilter);
-    av_freep(&c->vChrFilter);
-    av_freep(&c->hLumFilter);
-    av_freep(&c->hChrFilter);
-#if HAVE_ALTIVEC
-    av_freep(&c->vYCoeffsBank);
-    av_freep(&c->vCCoeffsBank);
-#endif
-
-    av_freep(&c->vLumFilterPos);
-    av_freep(&c->vChrFilterPos);
-    av_freep(&c->hLumFilterPos);
-    av_freep(&c->hChrFilterPos);
-
-#if HAVE_MMX_INLINE
-#if USE_MMAP
-    if (c->lumMmxextFilterCode)
-        munmap(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize);
-    if (c->chrMmxextFilterCode)
-        munmap(c->chrMmxextFilterCode, c->chrMmxextFilterCodeSize);
-#elif HAVE_VIRTUALALLOC
-    if (c->lumMmxextFilterCode)
-        VirtualFree(c->lumMmxextFilterCode, 0, MEM_RELEASE);
-    if (c->chrMmxextFilterCode)
-        VirtualFree(c->chrMmxextFilterCode, 0, MEM_RELEASE);
-#else
-    av_free(c->lumMmxextFilterCode);
-    av_free(c->chrMmxextFilterCode);
-#endif
-    c->lumMmxextFilterCode = NULL;
-    c->chrMmxextFilterCode = NULL;
-#endif /* HAVE_MMX_INLINE */
-
-    av_freep(&c->yuvTable);
-    av_freep(&c->formatConvBuffer);
-
-    av_free(c);
-}
-
-struct SwsContext *sws_getCachedContext(struct SwsContext *context, int srcW,
-                                        int srcH, enum AVPixelFormat srcFormat,
-                                        int dstW, int dstH,
-                                        enum AVPixelFormat dstFormat, int flags,
-                                        SwsFilter *srcFilter,
-                                        SwsFilter *dstFilter,
-                                        const double *param)
-{
-    static const double default_param[2] = { SWS_PARAM_DEFAULT,
-                                             SWS_PARAM_DEFAULT };
-
-    if (!param)
-        param = default_param;
-
-    if (context &&
-        (context->srcW      != srcW      ||
-         context->srcH      != srcH      ||
-         context->srcFormat != srcFormat ||
-         context->dstW      != dstW      ||
-         context->dstH      != dstH      ||
-         context->dstFormat != dstFormat ||
-         context->flags     != flags     ||
-         context->param[0]  != param[0]  ||
-         context->param[1]  != param[1])) {
-        sws_freeContext(context);
-        context = NULL;
-    }
-
-    if (!context) {
-        if (!(context = sws_alloc_context()))
-            return NULL;
-        context->srcW      = srcW;
-        context->srcH      = srcH;
-        context->srcFormat = srcFormat;
-        context->dstW      = dstW;
-        context->dstH      = dstH;
-        context->dstFormat = dstFormat;
-        context->flags     = flags;
-        context->param[0]  = param[0];
-        context->param[1]  = param[1];
-        if (sws_init_context(context, srcFilter, dstFilter) < 0) {
-            sws_freeContext(context);
-            return NULL;
-        }
-    }
-    return context;
-}
diff --git a/ffmpeg/libswscale/version.h b/ffmpeg/libswscale/version.h
deleted file mode 100644
index 99f3295..0000000
--- a/ffmpeg/libswscale/version.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef SWSCALE_VERSION_H
-#define SWSCALE_VERSION_H
-
-/**
- * @file
- * swscale version macros
- */
-
-#include "libavutil/version.h"
-
-#define LIBSWSCALE_VERSION_MAJOR 2
-#define LIBSWSCALE_VERSION_MINOR 5
-#define LIBSWSCALE_VERSION_MICRO 101
-
-#define LIBSWSCALE_VERSION_INT  AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \
-                                               LIBSWSCALE_VERSION_MINOR, \
-                                               LIBSWSCALE_VERSION_MICRO)
-#define LIBSWSCALE_VERSION      AV_VERSION(LIBSWSCALE_VERSION_MAJOR, \
-                                           LIBSWSCALE_VERSION_MINOR, \
-                                           LIBSWSCALE_VERSION_MICRO)
-#define LIBSWSCALE_BUILD        LIBSWSCALE_VERSION_INT
-
-#define LIBSWSCALE_IDENT        "SwS" AV_STRINGIFY(LIBSWSCALE_VERSION)
-
-/**
- * FF_API_* defines may be placed below to indicate public API that will be
- * dropped at a future version bump. The defines themselves are not part of
- * the public API and may change, break or disappear at any time.
- */
-
-#ifndef FF_API_SWS_GETCONTEXT
-#define FF_API_SWS_GETCONTEXT  (LIBSWSCALE_VERSION_MAJOR < 3)
-#endif
-#ifndef FF_API_SWS_CPU_CAPS
-#define FF_API_SWS_CPU_CAPS    (LIBSWSCALE_VERSION_MAJOR < 3)
-#endif
-#ifndef FF_API_SWS_FORMAT_NAME
-#define FF_API_SWS_FORMAT_NAME  (LIBSWSCALE_VERSION_MAJOR < 3)
-#endif
-
-#endif /* SWSCALE_VERSION_H */
diff --git a/ffmpeg/libswscale/x86/Makefile b/ffmpeg/libswscale/x86/Makefile
deleted file mode 100644
index e767a5c..0000000
--- a/ffmpeg/libswscale/x86/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-$(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS)
-
-OBJS                            += x86/rgb2rgb.o                        \
-                                   x86/swscale.o                        \
-                                   x86/yuv2rgb.o                        \
-
-OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
-
-YASM-OBJS                       += x86/input.o                          \
-                                   x86/output.o                         \
-                                   x86/scale.o                          \
diff --git a/ffmpeg/libswscale/x86/input.asm b/ffmpeg/libswscale/x86/input.asm
deleted file mode 100644
index 0c4f30e..0000000
--- a/ffmpeg/libswscale/x86/input.asm
+++ /dev/null
@@ -1,696 +0,0 @@
-;******************************************************************************
-;* x86-optimized input routines; does shuffling of packed
-;* YUV formats into individual planes, and converts RGB
-;* into YUV planes also.
-;* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
-;*
-;* This file is part of FFmpeg.
-;*
-;* FFmpeg is free software; you can redistribute it and/or
-;* modify it under the terms of the GNU Lesser General Public
-;* License as published by the Free Software Foundation; either
-;* version 2.1 of the License, or (at your option) any later version.
-;*
-;* FFmpeg is distributed in the hope that it will be useful,
-;* but WITHOUT ANY WARRANTY; without even the implied warranty of
-;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-;* Lesser General Public License for more details.
-;*
-;* You should have received a copy of the GNU Lesser General Public
-;* License along with FFmpeg; if not, write to the Free Software
-;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-;******************************************************************************
-
-%include "libavutil/x86/x86util.asm"
-
-SECTION_RODATA
-
-%define RY 0x20DE
-%define GY 0x4087
-%define BY 0x0C88
-%define RU 0xECFF
-%define GU 0xDAC8
-%define BU 0x3838
-%define RV 0x3838
-%define GV 0xD0E3
-%define BV 0xF6E4
-
-rgb_Yrnd:        times 4 dd 0x80100        ;  16.5 << 15
-rgb_UVrnd:       times 4 dd 0x400100       ; 128.5 << 15
-%define bgr_Ycoeff_12x4 16*4 + 16* 0 + tableq
-%define bgr_Ycoeff_3x56 16*4 + 16* 1 + tableq
-%define rgb_Ycoeff_12x4 16*4 + 16* 2 + tableq
-%define rgb_Ycoeff_3x56 16*4 + 16* 3 + tableq
-%define bgr_Ucoeff_12x4 16*4 + 16* 4 + tableq
-%define bgr_Ucoeff_3x56 16*4 + 16* 5 + tableq
-%define rgb_Ucoeff_12x4 16*4 + 16* 6 + tableq
-%define rgb_Ucoeff_3x56 16*4 + 16* 7 + tableq
-%define bgr_Vcoeff_12x4 16*4 + 16* 8 + tableq
-%define bgr_Vcoeff_3x56 16*4 + 16* 9 + tableq
-%define rgb_Vcoeff_12x4 16*4 + 16*10 + tableq
-%define rgb_Vcoeff_3x56 16*4 + 16*11 + tableq
-
-%define rgba_Ycoeff_rb 16*4 + 16*12 + tableq
-%define rgba_Ycoeff_br 16*4 + 16*13 + tableq
-%define rgba_Ycoeff_ga 16*4 + 16*14 + tableq
-%define rgba_Ycoeff_ag 16*4 + 16*15 + tableq
-%define rgba_Ucoeff_rb 16*4 + 16*16 + tableq
-%define rgba_Ucoeff_br 16*4 + 16*17 + tableq
-%define rgba_Ucoeff_ga 16*4 + 16*18 + tableq
-%define rgba_Ucoeff_ag 16*4 + 16*19 + tableq
-%define rgba_Vcoeff_rb 16*4 + 16*20 + tableq
-%define rgba_Vcoeff_br 16*4 + 16*21 + tableq
-%define rgba_Vcoeff_ga 16*4 + 16*22 + tableq
-%define rgba_Vcoeff_ag 16*4 + 16*23 + tableq
-
-; bgr_Ycoeff_12x4: times 2 dw BY, GY, 0, BY
-; bgr_Ycoeff_3x56: times 2 dw RY, 0, GY, RY
-; rgb_Ycoeff_12x4: times 2 dw RY, GY, 0, RY
-; rgb_Ycoeff_3x56: times 2 dw BY, 0, GY, BY
-; bgr_Ucoeff_12x4: times 2 dw BU, GU, 0, BU
-; bgr_Ucoeff_3x56: times 2 dw RU, 0, GU, RU
-; rgb_Ucoeff_12x4: times 2 dw RU, GU, 0, RU
-; rgb_Ucoeff_3x56: times 2 dw BU, 0, GU, BU
-; bgr_Vcoeff_12x4: times 2 dw BV, GV, 0, BV
-; bgr_Vcoeff_3x56: times 2 dw RV, 0, GV, RV
-; rgb_Vcoeff_12x4: times 2 dw RV, GV, 0, RV
-; rgb_Vcoeff_3x56: times 2 dw BV, 0, GV, BV
-
-; rgba_Ycoeff_rb:  times 4 dw RY, BY
-; rgba_Ycoeff_br:  times 4 dw BY, RY
-; rgba_Ycoeff_ga:  times 4 dw GY, 0
-; rgba_Ycoeff_ag:  times 4 dw 0,  GY
-; rgba_Ucoeff_rb:  times 4 dw RU, BU
-; rgba_Ucoeff_br:  times 4 dw BU, RU
-; rgba_Ucoeff_ga:  times 4 dw GU, 0
-; rgba_Ucoeff_ag:  times 4 dw 0,  GU
-; rgba_Vcoeff_rb:  times 4 dw RV, BV
-; rgba_Vcoeff_br:  times 4 dw BV, RV
-; rgba_Vcoeff_ga:  times 4 dw GV, 0
-; rgba_Vcoeff_ag:  times 4 dw 0,  GV
-
-shuf_rgb_12x4:   db 0, 0x80, 1, 0x80,  2, 0x80,  3, 0x80, \
-                    6, 0x80, 7, 0x80,  8, 0x80,  9, 0x80
-shuf_rgb_3x56:   db 2, 0x80, 3, 0x80,  4, 0x80,  5, 0x80, \
-                    8, 0x80, 9, 0x80, 10, 0x80, 11, 0x80
-
-SECTION .text
-
-;-----------------------------------------------------------------------------
-; RGB to Y/UV.
-;
-; void <fmt>ToY_<opt>(uint8_t *dst, const uint8_t *src, int w);
-; and
-; void <fmt>toUV_<opt>(uint8_t *dstU, uint8_t *dstV, const uint8_t *src,
-;                      const uint8_t *unused, int w);
-;-----------------------------------------------------------------------------
-
-; %1 = nr. of XMM registers
-; %2 = rgb or bgr
-%macro RGB24_TO_Y_FN 2-3
-cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table
-%if mmsize == 8
-    mova           m5, [%2_Ycoeff_12x4]
-    mova           m6, [%2_Ycoeff_3x56]
-%define coeff1 m5
-%define coeff2 m6
-%elif ARCH_X86_64
-    mova           m8, [%2_Ycoeff_12x4]
-    mova           m9, [%2_Ycoeff_3x56]
-%define coeff1 m8
-%define coeff2 m9
-%else ; x86-32 && mmsize == 16
-%define coeff1 [%2_Ycoeff_12x4]
-%define coeff2 [%2_Ycoeff_3x56]
-%endif ; x86-32/64 && mmsize == 8/16
-%if (ARCH_X86_64 || mmsize == 8) && %0 == 3
-    jmp mangle(private_prefix %+ _ %+ %3 %+ 24ToY %+ SUFFIX).body
-%else ; (ARCH_X86_64 && %0 == 3) || mmsize == 8
-.body:
-%if cpuflag(ssse3)
-    mova           m7, [shuf_rgb_12x4]
-%define shuf_rgb1 m7
-%if ARCH_X86_64
-    mova          m10, [shuf_rgb_3x56]
-%define shuf_rgb2 m10
-%else ; x86-32
-%define shuf_rgb2 [shuf_rgb_3x56]
-%endif ; x86-32/64
-%endif ; cpuflag(ssse3)
-%if ARCH_X86_64
-    movsxd         wq, wd
-%endif
-    add            wq, wq
-    add          dstq, wq
-    neg            wq
-%if notcpuflag(ssse3)
-    pxor           m7, m7
-%endif ; !cpuflag(ssse3)
-    mova           m4, [rgb_Yrnd]
-.loop:
-%if cpuflag(ssse3)
-    movu           m0, [srcq+0]           ; (byte) { Bx, Gx, Rx }[0-3]
-    movu           m2, [srcq+12]          ; (byte) { Bx, Gx, Rx }[4-7]
-    pshufb         m1, m0, shuf_rgb2      ; (word) { R0, B1, G1, R1, R2, B3, G3, R3 }
-    pshufb         m0, shuf_rgb1          ; (word) { B0, G0, R0, B1, B2, G2, R2, B3 }
-    pshufb         m3, m2, shuf_rgb2      ; (word) { R4, B5, G5, R5, R6, B7, G7, R7 }
-    pshufb         m2, shuf_rgb1          ; (word) { B4, G4, R4, B5, B6, G6, R6, B7 }
-%else ; !cpuflag(ssse3)
-    movd           m0, [srcq+0]           ; (byte) { B0, G0, R0, B1 }
-    movd           m1, [srcq+2]           ; (byte) { R0, B1, G1, R1 }
-    movd           m2, [srcq+6]           ; (byte) { B2, G2, R2, B3 }
-    movd           m3, [srcq+8]           ; (byte) { R2, B3, G3, R3 }
-%if mmsize == 16 ; i.e. sse2
-    punpckldq      m0, m2                 ; (byte) { B0, G0, R0, B1, B2, G2, R2, B3 }
-    punpckldq      m1, m3                 ; (byte) { R0, B1, G1, R1, R2, B3, G3, R3 }
-    movd           m2, [srcq+12]          ; (byte) { B4, G4, R4, B5 }
-    movd           m3, [srcq+14]          ; (byte) { R4, B5, G5, R5 }
-    movd           m5, [srcq+18]          ; (byte) { B6, G6, R6, B7 }
-    movd           m6, [srcq+20]          ; (byte) { R6, B7, G7, R7 }
-    punpckldq      m2, m5                 ; (byte) { B4, G4, R4, B5, B6, G6, R6, B7 }
-    punpckldq      m3, m6                 ; (byte) { R4, B5, G5, R5, R6, B7, G7, R7 }
-%endif ; mmsize == 16
-    punpcklbw      m0, m7                 ; (word) { B0, G0, R0, B1, B2, G2, R2, B3 }
-    punpcklbw      m1, m7                 ; (word) { R0, B1, G1, R1, R2, B3, G3, R3 }
-    punpcklbw      m2, m7                 ; (word) { B4, G4, R4, B5, B6, G6, R6, B7 }
-    punpcklbw      m3, m7                 ; (word) { R4, B5, G5, R5, R6, B7, G7, R7 }
-%endif ; cpuflag(ssse3)
-    add          srcq, 3 * mmsize / 2
-    pmaddwd        m0, coeff1             ; (dword) { B0*BY + G0*GY, B1*BY, B2*BY + G2*GY, B3*BY }
-    pmaddwd        m1, coeff2             ; (dword) { R0*RY, G1+GY + R1*RY, R2*RY, G3+GY + R3*RY }
-    pmaddwd        m2, coeff1             ; (dword) { B4*BY + G4*GY, B5*BY, B6*BY + G6*GY, B7*BY }
-    pmaddwd        m3, coeff2             ; (dword) { R4*RY, G5+GY + R5*RY, R6*RY, G7+GY + R7*RY }
-    paddd          m0, m1                 ; (dword) { Bx*BY + Gx*GY + Rx*RY }[0-3]
-    paddd          m2, m3                 ; (dword) { Bx*BY + Gx*GY + Rx*RY }[4-7]
-    paddd          m0, m4                 ; += rgb_Yrnd, i.e. (dword) { Y[0-3] }
-    paddd          m2, m4                 ; += rgb_Yrnd, i.e. (dword) { Y[4-7] }
-    psrad          m0, 9
-    psrad          m2, 9
-    packssdw       m0, m2                 ; (word) { Y[0-7] }
-    mova    [dstq+wq], m0
-    add            wq, mmsize
-    jl .loop
-    REP_RET
-%endif ; (ARCH_X86_64 && %0 == 3) || mmsize == 8
-%endmacro
-
-; %1 = nr. of XMM registers
-; %2 = rgb or bgr
-%macro RGB24_TO_UV_FN 2-3
-cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table
-%if ARCH_X86_64
-    mova           m8, [%2_Ucoeff_12x4]
-    mova           m9, [%2_Ucoeff_3x56]
-    mova          m10, [%2_Vcoeff_12x4]
-    mova          m11, [%2_Vcoeff_3x56]
-%define coeffU1 m8
-%define coeffU2 m9
-%define coeffV1 m10
-%define coeffV2 m11
-%else ; x86-32
-%define coeffU1 [%2_Ucoeff_12x4]
-%define coeffU2 [%2_Ucoeff_3x56]
-%define coeffV1 [%2_Vcoeff_12x4]
-%define coeffV2 [%2_Vcoeff_3x56]
-%endif ; x86-32/64
-%if ARCH_X86_64 && %0 == 3
-    jmp mangle(private_prefix %+ _ %+ %3 %+ 24ToUV %+ SUFFIX).body
-%else ; ARCH_X86_64 && %0 == 3
-.body:
-%if cpuflag(ssse3)
-    mova           m7, [shuf_rgb_12x4]
-%define shuf_rgb1 m7
-%if ARCH_X86_64
-    mova          m12, [shuf_rgb_3x56]
-%define shuf_rgb2 m12
-%else ; x86-32
-%define shuf_rgb2 [shuf_rgb_3x56]
-%endif ; x86-32/64
-%endif ; cpuflag(ssse3)
-%if ARCH_X86_64
-    movsxd         wq, dword r5m
-%else ; x86-32
-    mov            wq, r5m
-%endif
-    add            wq, wq
-    add         dstUq, wq
-    add         dstVq, wq
-    neg            wq
-    mova           m6, [rgb_UVrnd]
-%if notcpuflag(ssse3)
-    pxor           m7, m7
-%endif
-.loop:
-%if cpuflag(ssse3)
-    movu           m0, [srcq+0]           ; (byte) { Bx, Gx, Rx }[0-3]
-    movu           m4, [srcq+12]          ; (byte) { Bx, Gx, Rx }[4-7]
-    pshufb         m1, m0, shuf_rgb2      ; (word) { R0, B1, G1, R1, R2, B3, G3, R3 }
-    pshufb         m0, shuf_rgb1          ; (word) { B0, G0, R0, B1, B2, G2, R2, B3 }
-%else ; !cpuflag(ssse3)
-    movd           m0, [srcq+0]           ; (byte) { B0, G0, R0, B1 }
-    movd           m1, [srcq+2]           ; (byte) { R0, B1, G1, R1 }
-    movd           m4, [srcq+6]           ; (byte) { B2, G2, R2, B3 }
-    movd           m5, [srcq+8]           ; (byte) { R2, B3, G3, R3 }
-%if mmsize == 16
-    punpckldq      m0, m4                 ; (byte) { B0, G0, R0, B1, B2, G2, R2, B3 }
-    punpckldq      m1, m5                 ; (byte) { R0, B1, G1, R1, R2, B3, G3, R3 }
-    movd           m4, [srcq+12]          ; (byte) { B4, G4, R4, B5 }
-    movd           m5, [srcq+14]          ; (byte) { R4, B5, G5, R5 }
-%endif ; mmsize == 16
-    punpcklbw      m0, m7                 ; (word) { B0, G0, R0, B1, B2, G2, R2, B3 }
-    punpcklbw      m1, m7                 ; (word) { R0, B1, G1, R1, R2, B3, G3, R3 }
-%endif ; cpuflag(ssse3)
-    pmaddwd        m2, m0, coeffV1        ; (dword) { B0*BV + G0*GV, B1*BV, B2*BV + G2*GV, B3*BV }
-    pmaddwd        m3, m1, coeffV2        ; (dword) { R0*BV, G1*GV + R1*BV, R2*BV, G3*GV + R3*BV }
-    pmaddwd        m0, coeffU1            ; (dword) { B0*BU + G0*GU, B1*BU, B2*BU + G2*GU, B3*BU }
-    pmaddwd        m1, coeffU2            ; (dword) { R0*BU, G1*GU + R1*BU, R2*BU, G3*GU + R3*BU }
-    paddd          m0, m1                 ; (dword) { Bx*BU + Gx*GU + Rx*RU }[0-3]
-    paddd          m2, m3                 ; (dword) { Bx*BV + Gx*GV + Rx*RV }[0-3]
-%if cpuflag(ssse3)
-    pshufb         m5, m4, shuf_rgb2      ; (word) { R4, B5, G5, R5, R6, B7, G7, R7 }
-    pshufb         m4, shuf_rgb1          ; (word) { B4, G4, R4, B5, B6, G6, R6, B7 }
-%else ; !cpuflag(ssse3)
-%if mmsize == 16
-    movd           m1, [srcq+18]          ; (byte) { B6, G6, R6, B7 }
-    movd           m3, [srcq+20]          ; (byte) { R6, B7, G7, R7 }
-    punpckldq      m4, m1                 ; (byte) { B4, G4, R4, B5, B6, G6, R6, B7 }
-    punpckldq      m5, m3                 ; (byte) { R4, B5, G5, R5, R6, B7, G7, R7 }
-%endif ; mmsize == 16 && !cpuflag(ssse3)
-    punpcklbw      m4, m7                 ; (word) { B4, G4, R4, B5, B6, G6, R6, B7 }
-    punpcklbw      m5, m7                 ; (word) { R4, B5, G5, R5, R6, B7, G7, R7 }
-%endif ; cpuflag(ssse3)
-    add          srcq, 3 * mmsize / 2
-    pmaddwd        m1, m4, coeffU1        ; (dword) { B4*BU + G4*GU, B5*BU, B6*BU + G6*GU, B7*BU }
-    pmaddwd        m3, m5, coeffU2        ; (dword) { R4*BU, G5*GU + R5*BU, R6*BU, G7*GU + R7*BU }
-    pmaddwd        m4, coeffV1            ; (dword) { B4*BV + G4*GV, B5*BV, B6*BV + G6*GV, B7*BV }
-    pmaddwd        m5, coeffV2            ; (dword) { R4*BV, G5*GV + R5*BV, R6*BV, G7*GV + R7*BV }
-    paddd          m1, m3                 ; (dword) { Bx*BU + Gx*GU + Rx*RU }[4-7]
-    paddd          m4, m5                 ; (dword) { Bx*BV + Gx*GV + Rx*RV }[4-7]
-    paddd          m0, m6                 ; += rgb_UVrnd, i.e. (dword) { U[0-3] }
-    paddd          m2, m6                 ; += rgb_UVrnd, i.e. (dword) { V[0-3] }
-    paddd          m1, m6                 ; += rgb_UVrnd, i.e. (dword) { U[4-7] }
-    paddd          m4, m6                 ; += rgb_UVrnd, i.e. (dword) { V[4-7] }
-    psrad          m0, 9
-    psrad          m2, 9
-    psrad          m1, 9
-    psrad          m4, 9
-    packssdw       m0, m1                 ; (word) { U[0-7] }
-    packssdw       m2, m4                 ; (word) { V[0-7] }
-%if mmsize == 8
-    mova   [dstUq+wq], m0
-    mova   [dstVq+wq], m2
-%else ; mmsize == 16
-    mova   [dstUq+wq], m0
-    mova   [dstVq+wq], m2
-%endif ; mmsize == 8/16
-    add            wq, mmsize
-    jl .loop
-    REP_RET
-%endif ; ARCH_X86_64 && %0 == 3
-%endmacro
-
-; %1 = nr. of XMM registers for rgb-to-Y func
-; %2 = nr. of XMM registers for rgb-to-UV func
-%macro RGB24_FUNCS 2
-RGB24_TO_Y_FN %1, rgb
-RGB24_TO_Y_FN %1, bgr, rgb
-RGB24_TO_UV_FN %2, rgb
-RGB24_TO_UV_FN %2, bgr, rgb
-%endmacro
-
-%if ARCH_X86_32
-INIT_MMX mmx
-RGB24_FUNCS 0, 0
-%endif
-
-INIT_XMM sse2
-RGB24_FUNCS 10, 12
-
-INIT_XMM ssse3
-RGB24_FUNCS 11, 13
-
-%if HAVE_AVX_EXTERNAL
-INIT_XMM avx
-RGB24_FUNCS 11, 13
-%endif
-
-; %1 = nr. of XMM registers
-; %2-5 = rgba, bgra, argb or abgr (in individual characters)
-%macro RGB32_TO_Y_FN 5-6
-cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, table
-    mova           m5, [rgba_Ycoeff_%2%4]
-    mova           m6, [rgba_Ycoeff_%3%5]
-%if %0 == 6
-    jmp mangle(private_prefix %+ _ %+ %6 %+ ToY %+ SUFFIX).body
-%else ; %0 == 6
-.body:
-%if ARCH_X86_64
-    movsxd         wq, wd
-%endif
-    lea          srcq, [srcq+wq*4]
-    add            wq, wq
-    add          dstq, wq
-    neg            wq
-    mova           m4, [rgb_Yrnd]
-    pcmpeqb        m7, m7
-    psrlw          m7, 8                  ; (word) { 0x00ff } x4
-.loop:
-    ; FIXME check alignment and use mova
-    movu           m0, [srcq+wq*2+0]      ; (byte) { Bx, Gx, Rx, xx }[0-3]
-    movu           m2, [srcq+wq*2+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7]
-    DEINTB          1,  0,  3,  2,  7     ; (word) { Gx, xx (m0/m2) or Bx, Rx (m1/m3) }[0-3]/[4-7]
-    pmaddwd        m1, m5                 ; (dword) { Bx*BY + Rx*RY }[0-3]
-    pmaddwd        m0, m6                 ; (dword) { Gx*GY }[0-3]
-    pmaddwd        m3, m5                 ; (dword) { Bx*BY + Rx*RY }[4-7]
-    pmaddwd        m2, m6                 ; (dword) { Gx*GY }[4-7]
-    paddd          m0, m4                 ; += rgb_Yrnd
-    paddd          m2, m4                 ; += rgb_Yrnd
-    paddd          m0, m1                 ; (dword) { Y[0-3] }
-    paddd          m2, m3                 ; (dword) { Y[4-7] }
-    psrad          m0, 9
-    psrad          m2, 9
-    packssdw       m0, m2                 ; (word) { Y[0-7] }
-    mova    [dstq+wq], m0
-    add            wq, mmsize
-    jl .loop
-    REP_RET
-%endif ; %0 == 3
-%endmacro
-
-; %1 = nr. of XMM registers
-; %2-5 = rgba, bgra, argb or abgr (in individual characters)
-%macro RGB32_TO_UV_FN 5-6
-cglobal %2%3%4%5 %+ ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table
-%if ARCH_X86_64
-    mova           m8, [rgba_Ucoeff_%2%4]
-    mova           m9, [rgba_Ucoeff_%3%5]
-    mova          m10, [rgba_Vcoeff_%2%4]
-    mova          m11, [rgba_Vcoeff_%3%5]
-%define coeffU1 m8
-%define coeffU2 m9
-%define coeffV1 m10
-%define coeffV2 m11
-%else ; x86-32
-%define coeffU1 [rgba_Ucoeff_%2%4]
-%define coeffU2 [rgba_Ucoeff_%3%5]
-%define coeffV1 [rgba_Vcoeff_%2%4]
-%define coeffV2 [rgba_Vcoeff_%3%5]
-%endif ; x86-64/32
-%if ARCH_X86_64 && %0 == 6
-    jmp mangle(private_prefix %+ _ %+ %6 %+ ToUV %+ SUFFIX).body
-%else ; ARCH_X86_64 && %0 == 6
-.body:
-%if ARCH_X86_64
-    movsxd         wq, dword r5m
-%else ; x86-32
-    mov            wq, r5m
-%endif
-    add            wq, wq
-    add         dstUq, wq
-    add         dstVq, wq
-    lea          srcq, [srcq+wq*2]
-    neg            wq
-    pcmpeqb        m7, m7
-    psrlw          m7, 8                  ; (word) { 0x00ff } x4
-    mova           m6, [rgb_UVrnd]
-.loop:
-    ; FIXME check alignment and use mova
-    movu           m0, [srcq+wq*2+0]      ; (byte) { Bx, Gx, Rx, xx }[0-3]
-    movu           m4, [srcq+wq*2+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7]
-    DEINTB          1,  0,  5,  4,  7     ; (word) { Gx, xx (m0/m4) or Bx, Rx (m1/m5) }[0-3]/[4-7]
-    pmaddwd        m3, m1, coeffV1        ; (dword) { Bx*BV + Rx*RV }[0-3]
-    pmaddwd        m2, m0, coeffV2        ; (dword) { Gx*GV }[0-3]
-    pmaddwd        m1, coeffU1            ; (dword) { Bx*BU + Rx*RU }[0-3]
-    pmaddwd        m0, coeffU2            ; (dword) { Gx*GU }[0-3]
-    paddd          m3, m6                 ; += rgb_UVrnd
-    paddd          m1, m6                 ; += rgb_UVrnd
-    paddd          m2, m3                 ; (dword) { V[0-3] }
-    paddd          m0, m1                 ; (dword) { U[0-3] }
-    pmaddwd        m3, m5, coeffV1        ; (dword) { Bx*BV + Rx*RV }[4-7]
-    pmaddwd        m1, m4, coeffV2        ; (dword) { Gx*GV }[4-7]
-    pmaddwd        m5, coeffU1            ; (dword) { Bx*BU + Rx*RU }[4-7]
-    pmaddwd        m4, coeffU2            ; (dword) { Gx*GU }[4-7]
-    paddd          m3, m6                 ; += rgb_UVrnd
-    paddd          m5, m6                 ; += rgb_UVrnd
-    psrad          m0, 9
-    paddd          m1, m3                 ; (dword) { V[4-7] }
-    paddd          m4, m5                 ; (dword) { U[4-7] }
-    psrad          m2, 9
-    psrad          m4, 9
-    psrad          m1, 9
-    packssdw       m0, m4                 ; (word) { U[0-7] }
-    packssdw       m2, m1                 ; (word) { V[0-7] }
-%if mmsize == 8
-    mova   [dstUq+wq], m0
-    mova   [dstVq+wq], m2
-%else ; mmsize == 16
-    mova   [dstUq+wq], m0
-    mova   [dstVq+wq], m2
-%endif ; mmsize == 8/16
-    add            wq, mmsize
-    jl .loop
-    REP_RET
-%endif ; ARCH_X86_64 && %0 == 3
-%endmacro
-
-; %1 = nr. of XMM registers for rgb-to-Y func
-; %2 = nr. of XMM registers for rgb-to-UV func
-%macro RGB32_FUNCS 2
-RGB32_TO_Y_FN %1, r, g, b, a
-RGB32_TO_Y_FN %1, b, g, r, a, rgba
-RGB32_TO_Y_FN %1, a, r, g, b, rgba
-RGB32_TO_Y_FN %1, a, b, g, r, rgba
-
-RGB32_TO_UV_FN %2, r, g, b, a
-RGB32_TO_UV_FN %2, b, g, r, a, rgba
-RGB32_TO_UV_FN %2, a, r, g, b, rgba
-RGB32_TO_UV_FN %2, a, b, g, r, rgba
-%endmacro
-
-%if ARCH_X86_32
-INIT_MMX mmx
-RGB32_FUNCS 0, 0
-%endif
-
-INIT_XMM sse2
-RGB32_FUNCS 8, 12
-
-%if HAVE_AVX_EXTERNAL
-INIT_XMM avx
-RGB32_FUNCS 8, 12
-%endif
-
-;-----------------------------------------------------------------------------
-; YUYV/UYVY/NV12/NV21 packed pixel shuffling.
-;
-; void <fmt>ToY_<opt>(uint8_t *dst, const uint8_t *src, int w);
-; and
-; void <fmt>toUV_<opt>(uint8_t *dstU, uint8_t *dstV, const uint8_t *src,
-;                      const uint8_t *unused, int w);
-;-----------------------------------------------------------------------------
-
-; %1 = a (aligned) or u (unaligned)
-; %2 = yuyv or uyvy
-%macro LOOP_YUYV_TO_Y 2
-.loop_%1:
-    mov%1          m0, [srcq+wq*2]        ; (byte) { Y0, U0, Y1, V0, ... }
-    mov%1          m1, [srcq+wq*2+mmsize] ; (byte) { Y8, U4, Y9, V4, ... }
-%ifidn %2, yuyv
-    pand           m0, m2                 ; (word) { Y0, Y1, ..., Y7 }
-    pand           m1, m2                 ; (word) { Y8, Y9, ..., Y15 }
-%else ; uyvy
-    psrlw          m0, 8                  ; (word) { Y0, Y1, ..., Y7 }
-    psrlw          m1, 8                  ; (word) { Y8, Y9, ..., Y15 }
-%endif ; yuyv/uyvy
-    packuswb       m0, m1                 ; (byte) { Y0, ..., Y15 }
-    mova    [dstq+wq], m0
-    add            wq, mmsize
-    jl .loop_%1
-    REP_RET
-%endmacro
-
-; %1 = nr. of XMM registers
-; %2 = yuyv or uyvy
-; %3 = if specified, it means that unaligned and aligned code in loop
-;      will be the same (i.e. YUYV+AVX), and thus we don't need to
-;      split the loop in an aligned and unaligned case
-%macro YUYV_TO_Y_FN 2-3
-cglobal %2ToY, 5, 5, %1, dst, unused0, unused1, src, w
-%if ARCH_X86_64
-    movsxd         wq, wd
-%endif
-    add          dstq, wq
-%if mmsize == 16
-    test         srcq, 15
-%endif
-    lea          srcq, [srcq+wq*2]
-%ifidn %2, yuyv
-    pcmpeqb        m2, m2                 ; (byte) { 0xff } x 16
-    psrlw          m2, 8                  ; (word) { 0x00ff } x 8
-%endif ; yuyv
-%if mmsize == 16
-    jnz .loop_u_start
-    neg            wq
-    LOOP_YUYV_TO_Y  a, %2
-.loop_u_start:
-    neg            wq
-    LOOP_YUYV_TO_Y  u, %2
-%else ; mmsize == 8
-    neg            wq
-    LOOP_YUYV_TO_Y  a, %2
-%endif ; mmsize == 8/16
-%endmacro
-
-; %1 = a (aligned) or u (unaligned)
-; %2 = yuyv or uyvy
-%macro LOOP_YUYV_TO_UV 2
-.loop_%1:
-%ifidn %2, yuyv
-    mov%1          m0, [srcq+wq*4]        ; (byte) { Y0, U0, Y1, V0, ... }
-    mov%1          m1, [srcq+wq*4+mmsize] ; (byte) { Y8, U4, Y9, V4, ... }
-    psrlw          m0, 8                  ; (word) { U0, V0, ..., U3, V3 }
-    psrlw          m1, 8                  ; (word) { U4, V4, ..., U7, V7 }
-%else ; uyvy
-%if cpuflag(avx)
-    vpand          m0, m2, [srcq+wq*4]        ; (word) { U0, V0, ..., U3, V3 }
-    vpand          m1, m2, [srcq+wq*4+mmsize] ; (word) { U4, V4, ..., U7, V7 }
-%else
-    mov%1          m0, [srcq+wq*4]        ; (byte) { Y0, U0, Y1, V0, ... }
-    mov%1          m1, [srcq+wq*4+mmsize] ; (byte) { Y8, U4, Y9, V4, ... }
-    pand           m0, m2                 ; (word) { U0, V0, ..., U3, V3 }
-    pand           m1, m2                 ; (word) { U4, V4, ..., U7, V7 }
-%endif
-%endif ; yuyv/uyvy
-    packuswb       m0, m1                 ; (byte) { U0, V0, ..., U7, V7 }
-    pand           m1, m0, m2             ; (word) { U0, U1, ..., U7 }
-    psrlw          m0, 8                  ; (word) { V0, V1, ..., V7 }
-%if mmsize == 16
-    packuswb       m1, m0                 ; (byte) { U0, ... U7, V1, ... V7 }
-    movh   [dstUq+wq], m1
-    movhps [dstVq+wq], m1
-%else ; mmsize == 8
-    packuswb       m1, m1                 ; (byte) { U0, ... U3 }
-    packuswb       m0, m0                 ; (byte) { V0, ... V3 }
-    movh   [dstUq+wq], m1
-    movh   [dstVq+wq], m0
-%endif ; mmsize == 8/16
-    add            wq, mmsize / 2
-    jl .loop_%1
-    REP_RET
-%endmacro
-
-; %1 = nr. of XMM registers
-; %2 = yuyv or uyvy
-; %3 = if specified, it means that unaligned and aligned code in loop
-;      will be the same (i.e. UYVY+AVX), and thus we don't need to
-;      split the loop in an aligned and unaligned case
-%macro YUYV_TO_UV_FN 2-3
-cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w
-%if ARCH_X86_64
-    movsxd         wq, dword r5m
-%else ; x86-32
-    mov            wq, r5m
-%endif
-    add         dstUq, wq
-    add         dstVq, wq
-%if mmsize == 16 && %0 == 2
-    test         srcq, 15
-%endif
-    lea          srcq, [srcq+wq*4]
-    pcmpeqb        m2, m2                 ; (byte) { 0xff } x 16
-    psrlw          m2, 8                  ; (word) { 0x00ff } x 8
-    ; NOTE: if uyvy+avx, u/a are identical
-%if mmsize == 16 && %0 == 2
-    jnz .loop_u_start
-    neg            wq
-    LOOP_YUYV_TO_UV a, %2
-.loop_u_start:
-    neg            wq
-    LOOP_YUYV_TO_UV u, %2
-%else ; mmsize == 8
-    neg            wq
-    LOOP_YUYV_TO_UV a, %2
-%endif ; mmsize == 8/16
-%endmacro
-
-; %1 = a (aligned) or u (unaligned)
-; %2 = nv12 or nv21
-%macro LOOP_NVXX_TO_UV 2
-.loop_%1:
-    mov%1          m0, [srcq+wq*2]        ; (byte) { U0, V0, U1, V1, ... }
-    mov%1          m1, [srcq+wq*2+mmsize] ; (byte) { U8, V8, U9, V9, ... }
-    pand           m2, m0, m5             ; (word) { U0, U1, ..., U7 }
-    pand           m3, m1, m5             ; (word) { U8, U9, ..., U15 }
-    psrlw          m0, 8                  ; (word) { V0, V1, ..., V7 }
-    psrlw          m1, 8                  ; (word) { V8, V9, ..., V15 }
-    packuswb       m2, m3                 ; (byte) { U0, ..., U15 }
-    packuswb       m0, m1                 ; (byte) { V0, ..., V15 }
-%ifidn %2, nv12
-    mova   [dstUq+wq], m2
-    mova   [dstVq+wq], m0
-%else ; nv21
-    mova   [dstVq+wq], m2
-    mova   [dstUq+wq], m0
-%endif ; nv12/21
-    add            wq, mmsize
-    jl .loop_%1
-    REP_RET
-%endmacro
-
-; %1 = nr. of XMM registers
-; %2 = nv12 or nv21
-%macro NVXX_TO_UV_FN 2
-cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w
-%if ARCH_X86_64
-    movsxd         wq, dword r5m
-%else ; x86-32
-    mov            wq, r5m
-%endif
-    add         dstUq, wq
-    add         dstVq, wq
-%if mmsize == 16
-    test         srcq, 15
-%endif
-    lea          srcq, [srcq+wq*2]
-    pcmpeqb        m5, m5                 ; (byte) { 0xff } x 16
-    psrlw          m5, 8                  ; (word) { 0x00ff } x 8
-%if mmsize == 16
-    jnz .loop_u_start
-    neg            wq
-    LOOP_NVXX_TO_UV a, %2
-.loop_u_start:
-    neg            wq
-    LOOP_NVXX_TO_UV u, %2
-%else ; mmsize == 8
-    neg            wq
-    LOOP_NVXX_TO_UV a, %2
-%endif ; mmsize == 8/16
-%endmacro
-
-%if ARCH_X86_32
-INIT_MMX mmx
-YUYV_TO_Y_FN  0, yuyv
-YUYV_TO_Y_FN  0, uyvy
-YUYV_TO_UV_FN 0, yuyv
-YUYV_TO_UV_FN 0, uyvy
-NVXX_TO_UV_FN 0, nv12
-NVXX_TO_UV_FN 0, nv21
-%endif
-
-INIT_XMM sse2
-YUYV_TO_Y_FN  3, yuyv
-YUYV_TO_Y_FN  2, uyvy
-YUYV_TO_UV_FN 3, yuyv
-YUYV_TO_UV_FN 3, uyvy
-NVXX_TO_UV_FN 5, nv12
-NVXX_TO_UV_FN 5, nv21
-
-%if HAVE_AVX_EXTERNAL
-INIT_XMM avx
-; in theory, we could write a yuy2-to-y using vpand (i.e. AVX), but
-; that's not faster in practice
-YUYV_TO_UV_FN 3, yuyv
-YUYV_TO_UV_FN 3, uyvy, 1
-NVXX_TO_UV_FN 5, nv12
-NVXX_TO_UV_FN 5, nv21
-%endif
diff --git a/ffmpeg/libswscale/x86/output.asm b/ffmpeg/libswscale/x86/output.asm
deleted file mode 100644
index 9ea4af9..0000000
--- a/ffmpeg/libswscale/x86/output.asm
+++ /dev/null
@@ -1,413 +0,0 @@
-;******************************************************************************
-;* x86-optimized vertical line scaling functions
-;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
-;*                    Kieran Kunhya <kieran@kunhya.com>
-;*
-;* This file is part of FFmpeg.
-;*
-;* FFmpeg is free software; you can redistribute it and/or
-;* modify it under the terms of the GNU Lesser General Public
-;* License as published by the Free Software Foundation; either
-;* version 2.1 of the License, or (at your option) any later version.
-;*
-;* FFmpeg is distributed in the hope that it will be useful,
-;* but WITHOUT ANY WARRANTY; without even the implied warranty of
-;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-;* Lesser General Public License for more details.
-;*
-;* You should have received a copy of the GNU Lesser General Public
-;* License along with FFmpeg; if not, write to the Free Software
-;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-;******************************************************************************
-
-%include "libavutil/x86/x86util.asm"
-
-SECTION_RODATA
-
-minshort:      times 8 dw 0x8000
-yuv2yuvX_16_start:  times 4 dd 0x4000 - 0x40000000
-yuv2yuvX_10_start:  times 4 dd 0x10000
-yuv2yuvX_9_start:   times 4 dd 0x20000
-yuv2yuvX_10_upper:  times 8 dw 0x3ff
-yuv2yuvX_9_upper:   times 8 dw 0x1ff
-pd_4:          times 4 dd 4
-pd_4min0x40000:times 4 dd 4 - (0x40000)
-pw_16:         times 8 dw 16
-pw_32:         times 8 dw 32
-pw_512:        times 8 dw 512
-pw_1024:       times 8 dw 1024
-
-SECTION .text
-
-;-----------------------------------------------------------------------------
-; vertical line scaling
-;
-; void yuv2plane1_<output_size>_<opt>(const int16_t *src, uint8_t *dst, int dstW,
-;                                     const uint8_t *dither, int offset)
-; and
-; void yuv2planeX_<output_size>_<opt>(const int16_t *filter, int filterSize,
-;                                     const int16_t **src, uint8_t *dst, int dstW,
-;                                     const uint8_t *dither, int offset)
-;
-; Scale one or $filterSize lines of source data to generate one line of output
-; data. The input is 15-bit in int16_t if $output_size is [8,10] and 19-bit in
-; int32_t if $output_size is 16. $filter is 12-bits. $filterSize is a multiple
-; of 2. $offset is either 0 or 3. $dither holds 8 values.
-;-----------------------------------------------------------------------------
-
-%macro yuv2planeX_fn 3
-
-%if ARCH_X86_32
-%define cntr_reg fltsizeq
-%define movsx mov
-%else
-%define cntr_reg r7
-%define movsx movsxd
-%endif
-
-cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
-%if %1 == 8 || %1 == 9 || %1 == 10
-    pxor            m6,  m6
-%endif ; %1 == 8/9/10
-
-%if %1 == 8
-%if ARCH_X86_32
-%assign pad 0x2c - (stack_offset & 15)
-    SUB             rsp, pad
-%define m_dith m7
-%else ; x86-64
-%define m_dith m9
-%endif ; x86-32
-
-    ; create registers holding dither
-    movq        m_dith, [ditherq]        ; dither
-    test        offsetd, offsetd
-    jz              .no_rot
-%if mmsize == 16
-    punpcklqdq  m_dith,  m_dith
-%endif ; mmsize == 16
-    PALIGNR     m_dith,  m_dith,  3,  m0
-.no_rot:
-%if mmsize == 16
-    punpcklbw   m_dith,  m6
-%if ARCH_X86_64
-    punpcklwd       m8,  m_dith,  m6
-    pslld           m8,  12
-%else ; x86-32
-    punpcklwd       m5,  m_dith,  m6
-    pslld           m5,  12
-%endif ; x86-32/64
-    punpckhwd   m_dith,  m6
-    pslld       m_dith,  12
-%if ARCH_X86_32
-    mova      [rsp+ 0],  m5
-    mova      [rsp+16],  m_dith
-%endif
-%else ; mmsize == 8
-    punpcklbw       m5,  m_dith,  m6
-    punpckhbw   m_dith,  m6
-    punpcklwd       m4,  m5,  m6
-    punpckhwd       m5,  m6
-    punpcklwd       m3,  m_dith,  m6
-    punpckhwd   m_dith,  m6
-    pslld           m4,  12
-    pslld           m5,  12
-    pslld           m3,  12
-    pslld       m_dith,  12
-    mova      [rsp+ 0],  m4
-    mova      [rsp+ 8],  m5
-    mova      [rsp+16],  m3
-    mova      [rsp+24],  m_dith
-%endif ; mmsize == 8/16
-%endif ; %1 == 8
-
-    xor             r5,  r5
-
-.pixelloop:
-%assign %%i 0
-    ; the rep here is for the 8bit output mmx case, where dither covers
-    ; 8 pixels but we can only handle 2 pixels per register, and thus 4
-    ; pixels per iteration. In order to not have to keep track of where
-    ; we are w.r.t. dithering, we unroll the mmx/8bit loop x2.
-%if %1 == 8
-%assign %%repcnt 16/mmsize
-%else
-%assign %%repcnt 1
-%endif
-
-%rep %%repcnt
-
-%if %1 == 8
-%if ARCH_X86_32
-    mova            m2, [rsp+mmsize*(0+%%i)]
-    mova            m1, [rsp+mmsize*(1+%%i)]
-%else ; x86-64
-    mova            m2,  m8
-    mova            m1,  m_dith
-%endif ; x86-32/64
-%else ; %1 == 9/10/16
-    mova            m1, [yuv2yuvX_%1_start]
-    mova            m2,  m1
-%endif ; %1 == 8/9/10/16
-    movsx     cntr_reg,  fltsizem
-.filterloop_ %+ %%i:
-    ; input pixels
-    mov             r6, [srcq+gprsize*cntr_reg-2*gprsize]
-%if %1 == 16
-    mova            m3, [r6+r5*4]
-    mova            m5, [r6+r5*4+mmsize]
-%else ; %1 == 8/9/10
-    mova            m3, [r6+r5*2]
-%endif ; %1 == 8/9/10/16
-    mov             r6, [srcq+gprsize*cntr_reg-gprsize]
-%if %1 == 16
-    mova            m4, [r6+r5*4]
-    mova            m6, [r6+r5*4+mmsize]
-%else ; %1 == 8/9/10
-    mova            m4, [r6+r5*2]
-%endif ; %1 == 8/9/10/16
-
-    ; coefficients
-    movd            m0, [filterq+2*cntr_reg-4] ; coeff[0], coeff[1]
-%if %1 == 16
-    pshuflw         m7,  m0,  0          ; coeff[0]
-    pshuflw         m0,  m0,  0x55       ; coeff[1]
-    pmovsxwd        m7,  m7              ; word -> dword
-    pmovsxwd        m0,  m0              ; word -> dword
-
-    pmulld          m3,  m7
-    pmulld          m5,  m7
-    pmulld          m4,  m0
-    pmulld          m6,  m0
-
-    paddd           m2,  m3
-    paddd           m1,  m5
-    paddd           m2,  m4
-    paddd           m1,  m6
-%else ; %1 == 10/9/8
-    punpcklwd       m5,  m3,  m4
-    punpckhwd       m3,  m4
-    SPLATD          m0
-
-    pmaddwd         m5,  m0
-    pmaddwd         m3,  m0
-
-    paddd           m2,  m5
-    paddd           m1,  m3
-%endif ; %1 == 8/9/10/16
-
-    sub       cntr_reg,  2
-    jg .filterloop_ %+ %%i
-
-%if %1 == 16
-    psrad           m2,  31 - %1
-    psrad           m1,  31 - %1
-%else ; %1 == 10/9/8
-    psrad           m2,  27 - %1
-    psrad           m1,  27 - %1
-%endif ; %1 == 8/9/10/16
-
-%if %1 == 8
-    packssdw        m2,  m1
-    packuswb        m2,  m2
-    movh   [dstq+r5*1],  m2
-%else ; %1 == 9/10/16
-%if %1 == 16
-    packssdw        m2,  m1
-    paddw           m2, [minshort]
-%else ; %1 == 9/10
-%if cpuflag(sse4)
-    packusdw        m2,  m1
-%else ; mmxext/sse2
-    packssdw        m2,  m1
-    pmaxsw          m2,  m6
-%endif ; mmxext/sse2/sse4/avx
-    pminsw          m2, [yuv2yuvX_%1_upper]
-%endif ; %1 == 9/10/16
-    mova   [dstq+r5*2],  m2
-%endif ; %1 == 8/9/10/16
-
-    add             r5,  mmsize/2
-    sub             wd,  mmsize/2
-
-%assign %%i %%i+2
-%endrep
-    jg .pixelloop
-
-%if %1 == 8
-%if ARCH_X86_32
-    ADD             rsp, pad
-    RET
-%else ; x86-64
-    REP_RET
-%endif ; x86-32/64
-%else ; %1 == 9/10/16
-    REP_RET
-%endif ; %1 == 8/9/10/16
-%endmacro
-
-%if ARCH_X86_32
-INIT_MMX mmxext
-yuv2planeX_fn  8,  0, 7
-yuv2planeX_fn  9,  0, 5
-yuv2planeX_fn 10,  0, 5
-%endif
-
-INIT_XMM sse2
-yuv2planeX_fn  8, 10, 7
-yuv2planeX_fn  9,  7, 5
-yuv2planeX_fn 10,  7, 5
-
-INIT_XMM sse4
-yuv2planeX_fn  8, 10, 7
-yuv2planeX_fn  9,  7, 5
-yuv2planeX_fn 10,  7, 5
-yuv2planeX_fn 16,  8, 5
-
-%if HAVE_AVX_EXTERNAL
-INIT_XMM avx
-yuv2planeX_fn  8, 10, 7
-yuv2planeX_fn  9,  7, 5
-yuv2planeX_fn 10,  7, 5
-%endif
-
-; %1=outout-bpc, %2=alignment (u/a)
-%macro yuv2plane1_mainloop 2
-.loop_%2:
-%if %1 == 8
-    paddsw          m0, m2, [srcq+wq*2+mmsize*0]
-    paddsw          m1, m3, [srcq+wq*2+mmsize*1]
-    psraw           m0, 7
-    psraw           m1, 7
-    packuswb        m0, m1
-    mov%2    [dstq+wq], m0
-%elif %1 == 16
-    paddd           m0, m4, [srcq+wq*4+mmsize*0]
-    paddd           m1, m4, [srcq+wq*4+mmsize*1]
-    paddd           m2, m4, [srcq+wq*4+mmsize*2]
-    paddd           m3, m4, [srcq+wq*4+mmsize*3]
-    psrad           m0, 3
-    psrad           m1, 3
-    psrad           m2, 3
-    psrad           m3, 3
-%if cpuflag(sse4) ; avx/sse4
-    packusdw        m0, m1
-    packusdw        m2, m3
-%else ; mmx/sse2
-    packssdw        m0, m1
-    packssdw        m2, m3
-    paddw           m0, m5
-    paddw           m2, m5
-%endif ; mmx/sse2/sse4/avx
-    mov%2    [dstq+wq*2+mmsize*0], m0
-    mov%2    [dstq+wq*2+mmsize*1], m2
-%else ; %1 == 9/10
-    paddsw          m0, m2, [srcq+wq*2+mmsize*0]
-    paddsw          m1, m2, [srcq+wq*2+mmsize*1]
-    psraw           m0, 15 - %1
-    psraw           m1, 15 - %1
-    pmaxsw          m0, m4
-    pmaxsw          m1, m4
-    pminsw          m0, m3
-    pminsw          m1, m3
-    mov%2    [dstq+wq*2+mmsize*0], m0
-    mov%2    [dstq+wq*2+mmsize*1], m1
-%endif
-    add             wq, mmsize
-    jl .loop_%2
-%endmacro
-
-%macro yuv2plane1_fn 3
-cglobal yuv2plane1_%1, %3, %3, %2, src, dst, w, dither, offset
-    movsxdifnidn    wq, wd
-    add             wq, mmsize - 1
-    and             wq, ~(mmsize - 1)
-%if %1 == 8
-    add           dstq, wq
-%else ; %1 != 8
-    lea           dstq, [dstq+wq*2]
-%endif ; %1 == 8
-%if %1 == 16
-    lea           srcq, [srcq+wq*4]
-%else ; %1 != 16
-    lea           srcq, [srcq+wq*2]
-%endif ; %1 == 16
-    neg             wq
-
-%if %1 == 8
-    pxor            m4, m4               ; zero
-
-    ; create registers holding dither
-    movq            m3, [ditherq]        ; dither
-    test       offsetd, offsetd
-    jz              .no_rot
-%if mmsize == 16
-    punpcklqdq      m3, m3
-%endif ; mmsize == 16
-    PALIGNR         m3, m3, 3, m2
-.no_rot:
-%if mmsize == 8
-    mova            m2, m3
-    punpckhbw       m3, m4               ; byte->word
-    punpcklbw       m2, m4               ; byte->word
-%else
-    punpcklbw       m3, m4
-    mova            m2, m3
-%endif
-%elif %1 == 9
-    pxor            m4, m4
-    mova            m3, [pw_512]
-    mova            m2, [pw_32]
-%elif %1 == 10
-    pxor            m4, m4
-    mova            m3, [pw_1024]
-    mova            m2, [pw_16]
-%else ; %1 == 16
-%if cpuflag(sse4) ; sse4/avx
-    mova            m4, [pd_4]
-%else ; mmx/sse2
-    mova            m4, [pd_4min0x40000]
-    mova            m5, [minshort]
-%endif ; mmx/sse2/sse4/avx
-%endif ; %1 == ..
-
-    ; actual pixel scaling
-%if mmsize == 8
-    yuv2plane1_mainloop %1, a
-%else ; mmsize == 16
-    test          dstq, 15
-    jnz .unaligned
-    yuv2plane1_mainloop %1, a
-    REP_RET
-.unaligned:
-    yuv2plane1_mainloop %1, u
-%endif ; mmsize == 8/16
-    REP_RET
-%endmacro
-
-%if ARCH_X86_32
-INIT_MMX mmx
-yuv2plane1_fn  8, 0, 5
-yuv2plane1_fn 16, 0, 3
-
-INIT_MMX mmxext
-yuv2plane1_fn  9, 0, 3
-yuv2plane1_fn 10, 0, 3
-%endif
-
-INIT_XMM sse2
-yuv2plane1_fn  8, 5, 5
-yuv2plane1_fn  9, 5, 3
-yuv2plane1_fn 10, 5, 3
-yuv2plane1_fn 16, 6, 3
-
-INIT_XMM sse4
-yuv2plane1_fn 16, 5, 3
-
-%if HAVE_AVX_EXTERNAL
-INIT_XMM avx
-yuv2plane1_fn  8, 5, 5
-yuv2plane1_fn  9, 5, 3
-yuv2plane1_fn 10, 5, 3
-yuv2plane1_fn 16, 5, 3
-%endif
diff --git a/ffmpeg/libswscale/x86/rgb2rgb.c b/ffmpeg/libswscale/x86/rgb2rgb.c
deleted file mode 100644
index 8cc99c6..0000000
--- a/ffmpeg/libswscale/x86/rgb2rgb.c
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * software RGB to RGB converter
- * pluralize by software PAL8 to RGB converter
- *              software YUV to YUV converter
- *              software YUV to RGB converter
- * Written by Nick Kurshev.
- * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdint.h>
-
-#include "config.h"
-#include "libavutil/attributes.h"
-#include "libavutil/x86/asm.h"
-#include "libavutil/x86/cpu.h"
-#include "libavutil/cpu.h"
-#include "libavutil/bswap.h"
-#include "libswscale/rgb2rgb.h"
-#include "libswscale/swscale.h"
-#include "libswscale/swscale_internal.h"
-
-#if HAVE_INLINE_ASM
-
-DECLARE_ASM_CONST(8, uint64_t, mmx_ff)       = 0x00000000000000FFULL;
-DECLARE_ASM_CONST(8, uint64_t, mmx_null)     = 0x0000000000000000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mmx_one)      = 0xFFFFFFFFFFFFFFFFULL;
-DECLARE_ASM_CONST(8, uint64_t, mask32b)      = 0x000000FF000000FFULL;
-DECLARE_ASM_CONST(8, uint64_t, mask32g)      = 0x0000FF000000FF00ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask32r)      = 0x00FF000000FF0000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask32a)      = 0xFF000000FF000000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask32)       = 0x00FFFFFF00FFFFFFULL;
-DECLARE_ASM_CONST(8, uint64_t, mask3216br)   = 0x00F800F800F800F8ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask3216g)    = 0x0000FC000000FC00ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask3215g)    = 0x0000F8000000F800ULL;
-DECLARE_ASM_CONST(8, uint64_t, mul3216)      = 0x2000000420000004ULL;
-DECLARE_ASM_CONST(8, uint64_t, mul3215)      = 0x2000000820000008ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24b)      = 0x00FF0000FF0000FFULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24g)      = 0xFF0000FF0000FF00ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24r)      = 0x0000FF0000FF0000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24l)      = 0x0000000000FFFFFFULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24h)      = 0x0000FFFFFF000000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24hh)     = 0xffff000000000000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24hhh)    = 0xffffffff00000000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24hhhh)   = 0xffffffffffff0000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask15b)      = 0x001F001F001F001FULL; /* 00000000 00011111  xxB */
-DECLARE_ASM_CONST(8, uint64_t, mask15rg)     = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000  RGx */
-DECLARE_ASM_CONST(8, uint64_t, mask15s)      = 0xFFE0FFE0FFE0FFE0ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask15g)      = 0x03E003E003E003E0ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask15r)      = 0x7C007C007C007C00ULL;
-#define mask16b mask15b
-DECLARE_ASM_CONST(8, uint64_t, mask16g)      = 0x07E007E007E007E0ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask16r)      = 0xF800F800F800F800ULL;
-DECLARE_ASM_CONST(8, uint64_t, red_16mask)   = 0x0000f8000000f800ULL;
-DECLARE_ASM_CONST(8, uint64_t, green_16mask) = 0x000007e0000007e0ULL;
-DECLARE_ASM_CONST(8, uint64_t, blue_16mask)  = 0x0000001f0000001fULL;
-DECLARE_ASM_CONST(8, uint64_t, red_15mask)   = 0x00007c0000007c00ULL;
-DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL;
-DECLARE_ASM_CONST(8, uint64_t, blue_15mask)  = 0x0000001f0000001fULL;
-DECLARE_ASM_CONST(8, uint64_t, mul15_mid)    = 0x4200420042004200ULL;
-DECLARE_ASM_CONST(8, uint64_t, mul15_hi)     = 0x0210021002100210ULL;
-DECLARE_ASM_CONST(8, uint64_t, mul16_mid)    = 0x2080208020802080ULL;
-
-#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
-#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
-#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
-#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
-#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
-#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
-#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
-#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
-#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
-
-// Note: We have C, MMX, MMXEXT, 3DNOW versions, there is no 3DNOW + MMXEXT one.
-
-#define COMPILE_TEMPLATE_MMXEXT 0
-#define COMPILE_TEMPLATE_AMD3DNOW 0
-#define COMPILE_TEMPLATE_SSE2 0
-#define COMPILE_TEMPLATE_AVX 0
-
-//MMX versions
-#undef RENAME
-#define RENAME(a) a ## _mmx
-#include "rgb2rgb_template.c"
-
-// MMXEXT versions
-#undef RENAME
-#undef COMPILE_TEMPLATE_MMXEXT
-#define COMPILE_TEMPLATE_MMXEXT 1
-#define RENAME(a) a ## _mmxext
-#include "rgb2rgb_template.c"
-
-//SSE2 versions
-#undef RENAME
-#undef COMPILE_TEMPLATE_SSE2
-#define COMPILE_TEMPLATE_SSE2 1
-#define RENAME(a) a ## _sse2
-#include "rgb2rgb_template.c"
-
-//AVX versions
-#undef RENAME
-#undef COMPILE_TEMPLATE_AVX
-#define COMPILE_TEMPLATE_AVX 1
-#define RENAME(a) a ## _avx
-#include "rgb2rgb_template.c"
-
-//3DNOW versions
-#undef RENAME
-#undef COMPILE_TEMPLATE_MMXEXT
-#undef COMPILE_TEMPLATE_SSE2
-#undef COMPILE_TEMPLATE_AVX
-#undef COMPILE_TEMPLATE_AMD3DNOW
-#define COMPILE_TEMPLATE_MMXEXT 0
-#define COMPILE_TEMPLATE_SSE2 0
-#define COMPILE_TEMPLATE_AVX 0
-#define COMPILE_TEMPLATE_AMD3DNOW 1
-#define RENAME(a) a ## _3dnow
-#include "rgb2rgb_template.c"
-
-/*
- RGB15->RGB16 original by Strepto/Astral
- ported to gcc & bugfixed : A'rpi
- MMXEXT, 3DNOW optimization by Nick Kurshev
- 32-bit C version, and and&add trick by Michael Niedermayer
-*/
-
-#endif /* HAVE_INLINE_ASM */
-
-av_cold void rgb2rgb_init_x86(void)
-{
-#if HAVE_INLINE_ASM
-    int cpu_flags = av_get_cpu_flags();
-
-    if (INLINE_MMX(cpu_flags))
-        rgb2rgb_init_mmx();
-    if (INLINE_AMD3DNOW(cpu_flags))
-        rgb2rgb_init_3dnow();
-    if (INLINE_MMXEXT(cpu_flags))
-        rgb2rgb_init_mmxext();
-    if (INLINE_SSE2(cpu_flags))
-        rgb2rgb_init_sse2();
-    if (INLINE_AVX(cpu_flags))
-        rgb2rgb_init_avx();
-#endif /* HAVE_INLINE_ASM */
-}
diff --git a/ffmpeg/libswscale/x86/rgb2rgb_template.c b/ffmpeg/libswscale/x86/rgb2rgb_template.c
deleted file mode 100644
index d58219b..0000000
--- a/ffmpeg/libswscale/x86/rgb2rgb_template.c
+++ /dev/null
@@ -1,2533 +0,0 @@
-/*
- * software RGB to RGB converter
- * pluralize by software PAL8 to RGB converter
- *              software YUV to YUV converter
- *              software YUV to RGB converter
- * Written by Nick Kurshev.
- * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
- * lot of big-endian byte order fixes by Alex Beregszaszi
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stddef.h>
-
-#include "libavutil/attributes.h"
-
-#undef PREFETCH
-#undef MOVNTQ
-#undef EMMS
-#undef SFENCE
-#undef PAVGB
-
-#if COMPILE_TEMPLATE_AMD3DNOW
-#define PREFETCH  "prefetch"
-#define PAVGB     "pavgusb"
-#elif COMPILE_TEMPLATE_MMXEXT
-#define PREFETCH "prefetchnta"
-#define PAVGB     "pavgb"
-#else
-#define PREFETCH  " # nop"
-#endif
-
-#if COMPILE_TEMPLATE_AMD3DNOW
-/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
-#define EMMS     "femms"
-#else
-#define EMMS     "emms"
-#endif
-
-#if COMPILE_TEMPLATE_MMXEXT
-#define MOVNTQ "movntq"
-#define SFENCE "sfence"
-#else
-#define MOVNTQ "movq"
-#define SFENCE " # nop"
-#endif
-
-#if !COMPILE_TEMPLATE_SSE2
-
-#if !COMPILE_TEMPLATE_AMD3DNOW
-
-static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    uint8_t *dest = dst;
-    const uint8_t *s = src;
-    const uint8_t *end;
-    const uint8_t *mm_end;
-    end = s + src_size;
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
-    mm_end = end - 23;
-    __asm__ volatile("movq        %0, %%mm7"::"m"(mask32a):"memory");
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"  32(%1)           \n\t"
-            "movd        (%1), %%mm0    \n\t"
-            "punpckldq  3(%1), %%mm0    \n\t"
-            "movd       6(%1), %%mm1    \n\t"
-            "punpckldq  9(%1), %%mm1    \n\t"
-            "movd      12(%1), %%mm2    \n\t"
-            "punpckldq 15(%1), %%mm2    \n\t"
-            "movd      18(%1), %%mm3    \n\t"
-            "punpckldq 21(%1), %%mm3    \n\t"
-            "por        %%mm7, %%mm0    \n\t"
-            "por        %%mm7, %%mm1    \n\t"
-            "por        %%mm7, %%mm2    \n\t"
-            "por        %%mm7, %%mm3    \n\t"
-            MOVNTQ"     %%mm0,   (%0)   \n\t"
-            MOVNTQ"     %%mm1,  8(%0)   \n\t"
-            MOVNTQ"     %%mm2, 16(%0)   \n\t"
-            MOVNTQ"     %%mm3, 24(%0)"
-            :: "r"(dest), "r"(s)
-            :"memory");
-        dest += 32;
-        s += 24;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-    while (s < end) {
-        *dest++ = *s++;
-        *dest++ = *s++;
-        *dest++ = *s++;
-        *dest++ = 255;
-    }
-}
-
-#define STORE_BGR24_MMX \
-            "psrlq         $8, %%mm2    \n\t" \
-            "psrlq         $8, %%mm3    \n\t" \
-            "psrlq         $8, %%mm6    \n\t" \
-            "psrlq         $8, %%mm7    \n\t" \
-            "pand "MANGLE(mask24l)", %%mm0\n\t" \
-            "pand "MANGLE(mask24l)", %%mm1\n\t" \
-            "pand "MANGLE(mask24l)", %%mm4\n\t" \
-            "pand "MANGLE(mask24l)", %%mm5\n\t" \
-            "pand "MANGLE(mask24h)", %%mm2\n\t" \
-            "pand "MANGLE(mask24h)", %%mm3\n\t" \
-            "pand "MANGLE(mask24h)", %%mm6\n\t" \
-            "pand "MANGLE(mask24h)", %%mm7\n\t" \
-            "por        %%mm2, %%mm0    \n\t" \
-            "por        %%mm3, %%mm1    \n\t" \
-            "por        %%mm6, %%mm4    \n\t" \
-            "por        %%mm7, %%mm5    \n\t" \
- \
-            "movq       %%mm1, %%mm2    \n\t" \
-            "movq       %%mm4, %%mm3    \n\t" \
-            "psllq        $48, %%mm2    \n\t" \
-            "psllq        $32, %%mm3    \n\t" \
-            "por        %%mm2, %%mm0    \n\t" \
-            "psrlq        $16, %%mm1    \n\t" \
-            "psrlq        $32, %%mm4    \n\t" \
-            "psllq        $16, %%mm5    \n\t" \
-            "por        %%mm3, %%mm1    \n\t" \
-            "por        %%mm5, %%mm4    \n\t" \
- \
-            MOVNTQ"     %%mm0,   (%0)    \n\t" \
-            MOVNTQ"     %%mm1,  8(%0)    \n\t" \
-            MOVNTQ"     %%mm4, 16(%0)"
-
-
-static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    uint8_t *dest = dst;
-    const uint8_t *s = src;
-    const uint8_t *end;
-    const uint8_t *mm_end;
-    end = s + src_size;
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
-    mm_end = end - 31;
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"  32(%1)           \n\t"
-            "movq        (%1), %%mm0    \n\t"
-            "movq       8(%1), %%mm1    \n\t"
-            "movq      16(%1), %%mm4    \n\t"
-            "movq      24(%1), %%mm5    \n\t"
-            "movq       %%mm0, %%mm2    \n\t"
-            "movq       %%mm1, %%mm3    \n\t"
-            "movq       %%mm4, %%mm6    \n\t"
-            "movq       %%mm5, %%mm7    \n\t"
-            STORE_BGR24_MMX
-            :: "r"(dest), "r"(s)
-            :"memory");
-        dest += 24;
-        s += 32;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-    while (s < end) {
-        *dest++ = *s++;
-        *dest++ = *s++;
-        *dest++ = *s++;
-        s++;
-    }
-}
-
-/*
- original by Strepto/Astral
- ported to gcc & bugfixed: A'rpi
- MMXEXT, 3DNOW optimization by Nick Kurshev
- 32-bit C version, and and&add trick by Michael Niedermayer
-*/
-static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    register const uint8_t* s=src;
-    register uint8_t* d=dst;
-    register const uint8_t *end;
-    const uint8_t *mm_end;
-    end = s + src_size;
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s));
-    __asm__ volatile("movq        %0, %%mm4"::"m"(mask15s));
-    mm_end = end - 15;
-    while (s<mm_end) {
-        __asm__ volatile(
-            PREFETCH" 32(%1)        \n\t"
-            "movq      (%1), %%mm0  \n\t"
-            "movq     8(%1), %%mm2  \n\t"
-            "movq     %%mm0, %%mm1  \n\t"
-            "movq     %%mm2, %%mm3  \n\t"
-            "pand     %%mm4, %%mm0  \n\t"
-            "pand     %%mm4, %%mm2  \n\t"
-            "paddw    %%mm1, %%mm0  \n\t"
-            "paddw    %%mm3, %%mm2  \n\t"
-            MOVNTQ"   %%mm0,  (%0)  \n\t"
-            MOVNTQ"   %%mm2, 8(%0)"
-            :: "r"(d), "r"(s)
-        );
-        d+=16;
-        s+=16;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-    mm_end = end - 3;
-    while (s < mm_end) {
-        register unsigned x= *((const uint32_t *)s);
-        *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
-        d+=4;
-        s+=4;
-    }
-    if (s < end) {
-        register unsigned short x= *((const uint16_t *)s);
-        *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
-    }
-}
-
-static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    register const uint8_t* s=src;
-    register uint8_t* d=dst;
-    register const uint8_t *end;
-    const uint8_t *mm_end;
-    end = s + src_size;
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s));
-    __asm__ volatile("movq        %0, %%mm7"::"m"(mask15rg));
-    __asm__ volatile("movq        %0, %%mm6"::"m"(mask15b));
-    mm_end = end - 15;
-    while (s<mm_end) {
-        __asm__ volatile(
-            PREFETCH" 32(%1)        \n\t"
-            "movq      (%1), %%mm0  \n\t"
-            "movq     8(%1), %%mm2  \n\t"
-            "movq     %%mm0, %%mm1  \n\t"
-            "movq     %%mm2, %%mm3  \n\t"
-            "psrlq       $1, %%mm0  \n\t"
-            "psrlq       $1, %%mm2  \n\t"
-            "pand     %%mm7, %%mm0  \n\t"
-            "pand     %%mm7, %%mm2  \n\t"
-            "pand     %%mm6, %%mm1  \n\t"
-            "pand     %%mm6, %%mm3  \n\t"
-            "por      %%mm1, %%mm0  \n\t"
-            "por      %%mm3, %%mm2  \n\t"
-            MOVNTQ"   %%mm0,  (%0)  \n\t"
-            MOVNTQ"   %%mm2, 8(%0)"
-            :: "r"(d), "r"(s)
-        );
-        d+=16;
-        s+=16;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-    mm_end = end - 3;
-    while (s < mm_end) {
-        register uint32_t x= *((const uint32_t*)s);
-        *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
-        s+=4;
-        d+=4;
-    }
-    if (s < end) {
-        register uint16_t x= *((const uint16_t*)s);
-        *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
-    }
-}
-
-static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    const uint8_t *s = src;
-    const uint8_t *end;
-    const uint8_t *mm_end;
-    uint16_t *d = (uint16_t *)dst;
-    end = s + src_size;
-    mm_end = end - 15;
-    __asm__ volatile(
-        "movq           %3, %%mm5   \n\t"
-        "movq           %4, %%mm6   \n\t"
-        "movq           %5, %%mm7   \n\t"
-        "jmp 2f                     \n\t"
-        ".p2align        4          \n\t"
-        "1:                         \n\t"
-        PREFETCH"   32(%1)          \n\t"
-        "movd         (%1), %%mm0   \n\t"
-        "movd        4(%1), %%mm3   \n\t"
-        "punpckldq   8(%1), %%mm0   \n\t"
-        "punpckldq  12(%1), %%mm3   \n\t"
-        "movq        %%mm0, %%mm1   \n\t"
-        "movq        %%mm3, %%mm4   \n\t"
-        "pand        %%mm6, %%mm0   \n\t"
-        "pand        %%mm6, %%mm3   \n\t"
-        "pmaddwd     %%mm7, %%mm0   \n\t"
-        "pmaddwd     %%mm7, %%mm3   \n\t"
-        "pand        %%mm5, %%mm1   \n\t"
-        "pand        %%mm5, %%mm4   \n\t"
-        "por         %%mm1, %%mm0   \n\t"
-        "por         %%mm4, %%mm3   \n\t"
-        "psrld          $5, %%mm0   \n\t"
-        "pslld         $11, %%mm3   \n\t"
-        "por         %%mm3, %%mm0   \n\t"
-        MOVNTQ"      %%mm0, (%0)    \n\t"
-        "add           $16,  %1     \n\t"
-        "add            $8,  %0     \n\t"
-        "2:                         \n\t"
-        "cmp            %2,  %1     \n\t"
-        " jb            1b          \n\t"
-        : "+r" (d), "+r"(s)
-        : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216)
-    );
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-    while (s < end) {
-        register int rgb = *(const uint32_t*)s; s += 4;
-        *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
-    }
-}
-
-static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    const uint8_t *s = src;
-    const uint8_t *end;
-    const uint8_t *mm_end;
-    uint16_t *d = (uint16_t *)dst;
-    end = s + src_size;
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq          %0, %%mm7    \n\t"
-        "movq          %1, %%mm6    \n\t"
-        ::"m"(red_16mask),"m"(green_16mask));
-    mm_end = end - 15;
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"  32(%1)           \n\t"
-            "movd        (%1), %%mm0    \n\t"
-            "movd       4(%1), %%mm3    \n\t"
-            "punpckldq  8(%1), %%mm0    \n\t"
-            "punpckldq 12(%1), %%mm3    \n\t"
-            "movq       %%mm0, %%mm1    \n\t"
-            "movq       %%mm0, %%mm2    \n\t"
-            "movq       %%mm3, %%mm4    \n\t"
-            "movq       %%mm3, %%mm5    \n\t"
-            "psllq         $8, %%mm0    \n\t"
-            "psllq         $8, %%mm3    \n\t"
-            "pand       %%mm7, %%mm0    \n\t"
-            "pand       %%mm7, %%mm3    \n\t"
-            "psrlq         $5, %%mm1    \n\t"
-            "psrlq         $5, %%mm4    \n\t"
-            "pand       %%mm6, %%mm1    \n\t"
-            "pand       %%mm6, %%mm4    \n\t"
-            "psrlq        $19, %%mm2    \n\t"
-            "psrlq        $19, %%mm5    \n\t"
-            "pand          %2, %%mm2    \n\t"
-            "pand          %2, %%mm5    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-            "psllq        $16, %%mm3    \n\t"
-            "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, (%0)     \n\t"
-            :: "r"(d),"r"(s),"m"(blue_16mask):"memory");
-        d += 4;
-        s += 16;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-    while (s < end) {
-        register int rgb = *(const uint32_t*)s; s += 4;
-        *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
-    }
-}
-
-static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    const uint8_t *s = src;
-    const uint8_t *end;
-    const uint8_t *mm_end;
-    uint16_t *d = (uint16_t *)dst;
-    end = s + src_size;
-    mm_end = end - 15;
-    __asm__ volatile(
-        "movq           %3, %%mm5   \n\t"
-        "movq           %4, %%mm6   \n\t"
-        "movq           %5, %%mm7   \n\t"
-        "jmp            2f          \n\t"
-        ".p2align        4          \n\t"
-        "1:                         \n\t"
-        PREFETCH"   32(%1)          \n\t"
-        "movd         (%1), %%mm0   \n\t"
-        "movd        4(%1), %%mm3   \n\t"
-        "punpckldq   8(%1), %%mm0   \n\t"
-        "punpckldq  12(%1), %%mm3   \n\t"
-        "movq        %%mm0, %%mm1   \n\t"
-        "movq        %%mm3, %%mm4   \n\t"
-        "pand        %%mm6, %%mm0   \n\t"
-        "pand        %%mm6, %%mm3   \n\t"
-        "pmaddwd     %%mm7, %%mm0   \n\t"
-        "pmaddwd     %%mm7, %%mm3   \n\t"
-        "pand        %%mm5, %%mm1   \n\t"
-        "pand        %%mm5, %%mm4   \n\t"
-        "por         %%mm1, %%mm0   \n\t"
-        "por         %%mm4, %%mm3   \n\t"
-        "psrld          $6, %%mm0   \n\t"
-        "pslld         $10, %%mm3   \n\t"
-        "por         %%mm3, %%mm0   \n\t"
-        MOVNTQ"      %%mm0, (%0)    \n\t"
-        "add           $16,  %1     \n\t"
-        "add            $8,  %0     \n\t"
-        "2:                         \n\t"
-        "cmp            %2,  %1     \n\t"
-        " jb            1b          \n\t"
-        : "+r" (d), "+r"(s)
-        : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
-    );
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-    while (s < end) {
-        register int rgb = *(const uint32_t*)s; s += 4;
-        *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
-    }
-}
-
-static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    const uint8_t *s = src;
-    const uint8_t *end;
-    const uint8_t *mm_end;
-    uint16_t *d = (uint16_t *)dst;
-    end = s + src_size;
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq          %0, %%mm7    \n\t"
-        "movq          %1, %%mm6    \n\t"
-        ::"m"(red_15mask),"m"(green_15mask));
-    mm_end = end - 15;
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"  32(%1)           \n\t"
-            "movd        (%1), %%mm0    \n\t"
-            "movd       4(%1), %%mm3    \n\t"
-            "punpckldq  8(%1), %%mm0    \n\t"
-            "punpckldq 12(%1), %%mm3    \n\t"
-            "movq       %%mm0, %%mm1    \n\t"
-            "movq       %%mm0, %%mm2    \n\t"
-            "movq       %%mm3, %%mm4    \n\t"
-            "movq       %%mm3, %%mm5    \n\t"
-            "psllq         $7, %%mm0    \n\t"
-            "psllq         $7, %%mm3    \n\t"
-            "pand       %%mm7, %%mm0    \n\t"
-            "pand       %%mm7, %%mm3    \n\t"
-            "psrlq         $6, %%mm1    \n\t"
-            "psrlq         $6, %%mm4    \n\t"
-            "pand       %%mm6, %%mm1    \n\t"
-            "pand       %%mm6, %%mm4    \n\t"
-            "psrlq        $19, %%mm2    \n\t"
-            "psrlq        $19, %%mm5    \n\t"
-            "pand          %2, %%mm2    \n\t"
-            "pand          %2, %%mm5    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-            "psllq        $16, %%mm3    \n\t"
-            "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, (%0)     \n\t"
-            ::"r"(d),"r"(s),"m"(blue_15mask):"memory");
-        d += 4;
-        s += 16;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-    while (s < end) {
-        register int rgb = *(const uint32_t*)s; s += 4;
-        *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
-    }
-}
-
-static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    const uint8_t *s = src;
-    const uint8_t *end;
-    const uint8_t *mm_end;
-    uint16_t *d = (uint16_t *)dst;
-    end = s + src_size;
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq         %0, %%mm7     \n\t"
-        "movq         %1, %%mm6     \n\t"
-        ::"m"(red_16mask),"m"(green_16mask));
-    mm_end = end - 11;
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"  32(%1)           \n\t"
-            "movd        (%1), %%mm0    \n\t"
-            "movd       3(%1), %%mm3    \n\t"
-            "punpckldq  6(%1), %%mm0    \n\t"
-            "punpckldq  9(%1), %%mm3    \n\t"
-            "movq       %%mm0, %%mm1    \n\t"
-            "movq       %%mm0, %%mm2    \n\t"
-            "movq       %%mm3, %%mm4    \n\t"
-            "movq       %%mm3, %%mm5    \n\t"
-            "psrlq         $3, %%mm0    \n\t"
-            "psrlq         $3, %%mm3    \n\t"
-            "pand          %2, %%mm0    \n\t"
-            "pand          %2, %%mm3    \n\t"
-            "psrlq         $5, %%mm1    \n\t"
-            "psrlq         $5, %%mm4    \n\t"
-            "pand       %%mm6, %%mm1    \n\t"
-            "pand       %%mm6, %%mm4    \n\t"
-            "psrlq         $8, %%mm2    \n\t"
-            "psrlq         $8, %%mm5    \n\t"
-            "pand       %%mm7, %%mm2    \n\t"
-            "pand       %%mm7, %%mm5    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-            "psllq        $16, %%mm3    \n\t"
-            "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, (%0)     \n\t"
-            ::"r"(d),"r"(s),"m"(blue_16mask):"memory");
-        d += 4;
-        s += 12;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-    while (s < end) {
-        const int b = *s++;
-        const int g = *s++;
-        const int r = *s++;
-        *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
-    }
-}
-
-static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    const uint8_t *s = src;
-    const uint8_t *end;
-    const uint8_t *mm_end;
-    uint16_t *d = (uint16_t *)dst;
-    end = s + src_size;
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq         %0, %%mm7     \n\t"
-        "movq         %1, %%mm6     \n\t"
-        ::"m"(red_16mask),"m"(green_16mask));
-    mm_end = end - 15;
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"  32(%1)           \n\t"
-            "movd        (%1), %%mm0    \n\t"
-            "movd       3(%1), %%mm3    \n\t"
-            "punpckldq  6(%1), %%mm0    \n\t"
-            "punpckldq  9(%1), %%mm3    \n\t"
-            "movq       %%mm0, %%mm1    \n\t"
-            "movq       %%mm0, %%mm2    \n\t"
-            "movq       %%mm3, %%mm4    \n\t"
-            "movq       %%mm3, %%mm5    \n\t"
-            "psllq         $8, %%mm0    \n\t"
-            "psllq         $8, %%mm3    \n\t"
-            "pand       %%mm7, %%mm0    \n\t"
-            "pand       %%mm7, %%mm3    \n\t"
-            "psrlq         $5, %%mm1    \n\t"
-            "psrlq         $5, %%mm4    \n\t"
-            "pand       %%mm6, %%mm1    \n\t"
-            "pand       %%mm6, %%mm4    \n\t"
-            "psrlq        $19, %%mm2    \n\t"
-            "psrlq        $19, %%mm5    \n\t"
-            "pand          %2, %%mm2    \n\t"
-            "pand          %2, %%mm5    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-            "psllq        $16, %%mm3    \n\t"
-            "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, (%0)     \n\t"
-            ::"r"(d),"r"(s),"m"(blue_16mask):"memory");
-        d += 4;
-        s += 12;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-    while (s < end) {
-        const int r = *s++;
-        const int g = *s++;
-        const int b = *s++;
-        *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
-    }
-}
-
-static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    const uint8_t *s = src;
-    const uint8_t *end;
-    const uint8_t *mm_end;
-    uint16_t *d = (uint16_t *)dst;
-    end = s + src_size;
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq          %0, %%mm7    \n\t"
-        "movq          %1, %%mm6    \n\t"
-        ::"m"(red_15mask),"m"(green_15mask));
-    mm_end = end - 11;
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"  32(%1)           \n\t"
-            "movd        (%1), %%mm0    \n\t"
-            "movd       3(%1), %%mm3    \n\t"
-            "punpckldq  6(%1), %%mm0    \n\t"
-            "punpckldq  9(%1), %%mm3    \n\t"
-            "movq       %%mm0, %%mm1    \n\t"
-            "movq       %%mm0, %%mm2    \n\t"
-            "movq       %%mm3, %%mm4    \n\t"
-            "movq       %%mm3, %%mm5    \n\t"
-            "psrlq         $3, %%mm0    \n\t"
-            "psrlq         $3, %%mm3    \n\t"
-            "pand          %2, %%mm0    \n\t"
-            "pand          %2, %%mm3    \n\t"
-            "psrlq         $6, %%mm1    \n\t"
-            "psrlq         $6, %%mm4    \n\t"
-            "pand       %%mm6, %%mm1    \n\t"
-            "pand       %%mm6, %%mm4    \n\t"
-            "psrlq         $9, %%mm2    \n\t"
-            "psrlq         $9, %%mm5    \n\t"
-            "pand       %%mm7, %%mm2    \n\t"
-            "pand       %%mm7, %%mm5    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-            "psllq        $16, %%mm3    \n\t"
-            "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, (%0)     \n\t"
-            ::"r"(d),"r"(s),"m"(blue_15mask):"memory");
-        d += 4;
-        s += 12;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-    while (s < end) {
-        const int b = *s++;
-        const int g = *s++;
-        const int r = *s++;
-        *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
-    }
-}
-
-static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    const uint8_t *s = src;
-    const uint8_t *end;
-    const uint8_t *mm_end;
-    uint16_t *d = (uint16_t *)dst;
-    end = s + src_size;
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq         %0, %%mm7     \n\t"
-        "movq         %1, %%mm6     \n\t"
-        ::"m"(red_15mask),"m"(green_15mask));
-    mm_end = end - 15;
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH" 32(%1)            \n\t"
-            "movd       (%1), %%mm0     \n\t"
-            "movd      3(%1), %%mm3     \n\t"
-            "punpckldq 6(%1), %%mm0     \n\t"
-            "punpckldq 9(%1), %%mm3     \n\t"
-            "movq      %%mm0, %%mm1     \n\t"
-            "movq      %%mm0, %%mm2     \n\t"
-            "movq      %%mm3, %%mm4     \n\t"
-            "movq      %%mm3, %%mm5     \n\t"
-            "psllq        $7, %%mm0     \n\t"
-            "psllq        $7, %%mm3     \n\t"
-            "pand      %%mm7, %%mm0     \n\t"
-            "pand      %%mm7, %%mm3     \n\t"
-            "psrlq        $6, %%mm1     \n\t"
-            "psrlq        $6, %%mm4     \n\t"
-            "pand      %%mm6, %%mm1     \n\t"
-            "pand      %%mm6, %%mm4     \n\t"
-            "psrlq       $19, %%mm2     \n\t"
-            "psrlq       $19, %%mm5     \n\t"
-            "pand         %2, %%mm2     \n\t"
-            "pand         %2, %%mm5     \n\t"
-            "por       %%mm1, %%mm0     \n\t"
-            "por       %%mm4, %%mm3     \n\t"
-            "por       %%mm2, %%mm0     \n\t"
-            "por       %%mm5, %%mm3     \n\t"
-            "psllq       $16, %%mm3     \n\t"
-            "por       %%mm3, %%mm0     \n\t"
-            MOVNTQ"    %%mm0, (%0)      \n\t"
-            ::"r"(d),"r"(s),"m"(blue_15mask):"memory");
-        d += 4;
-        s += 12;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-    while (s < end) {
-        const int r = *s++;
-        const int g = *s++;
-        const int b = *s++;
-        *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
-    }
-}
-
-static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    const uint16_t *end;
-    const uint16_t *mm_end;
-    uint8_t *d = dst;
-    const uint16_t *s = (const uint16_t*)src;
-    end = s + src_size/2;
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
-    mm_end = end - 7;
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"  32(%1)           \n\t"
-            "movq        (%1), %%mm0    \n\t"
-            "movq        (%1), %%mm1    \n\t"
-            "movq        (%1), %%mm2    \n\t"
-            "pand          %2, %%mm0    \n\t"
-            "pand          %3, %%mm1    \n\t"
-            "pand          %4, %%mm2    \n\t"
-            "psllq         $5, %%mm0    \n\t"
-            "pmulhw        "MANGLE(mul15_mid)", %%mm0    \n\t"
-            "pmulhw        "MANGLE(mul15_mid)", %%mm1    \n\t"
-            "pmulhw        "MANGLE(mul15_hi)", %%mm2    \n\t"
-            "movq       %%mm0, %%mm3    \n\t"
-            "movq       %%mm1, %%mm4    \n\t"
-            "movq       %%mm2, %%mm5    \n\t"
-            "punpcklwd     %5, %%mm0    \n\t"
-            "punpcklwd     %5, %%mm1    \n\t"
-            "punpcklwd     %5, %%mm2    \n\t"
-            "punpckhwd     %5, %%mm3    \n\t"
-            "punpckhwd     %5, %%mm4    \n\t"
-            "punpckhwd     %5, %%mm5    \n\t"
-            "psllq         $8, %%mm1    \n\t"
-            "psllq        $16, %%mm2    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "psllq         $8, %%mm4    \n\t"
-            "psllq        $16, %%mm5    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-
-            "movq       %%mm0, %%mm6    \n\t"
-            "movq       %%mm3, %%mm7    \n\t"
-
-            "movq       8(%1), %%mm0    \n\t"
-            "movq       8(%1), %%mm1    \n\t"
-            "movq       8(%1), %%mm2    \n\t"
-            "pand          %2, %%mm0    \n\t"
-            "pand          %3, %%mm1    \n\t"
-            "pand          %4, %%mm2    \n\t"
-            "psllq         $5, %%mm0    \n\t"
-            "pmulhw        "MANGLE(mul15_mid)", %%mm0    \n\t"
-            "pmulhw        "MANGLE(mul15_mid)", %%mm1    \n\t"
-            "pmulhw        "MANGLE(mul15_hi)", %%mm2    \n\t"
-            "movq       %%mm0, %%mm3    \n\t"
-            "movq       %%mm1, %%mm4    \n\t"
-            "movq       %%mm2, %%mm5    \n\t"
-            "punpcklwd     %5, %%mm0    \n\t"
-            "punpcklwd     %5, %%mm1    \n\t"
-            "punpcklwd     %5, %%mm2    \n\t"
-            "punpckhwd     %5, %%mm3    \n\t"
-            "punpckhwd     %5, %%mm4    \n\t"
-            "punpckhwd     %5, %%mm5    \n\t"
-            "psllq         $8, %%mm1    \n\t"
-            "psllq        $16, %%mm2    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "psllq         $8, %%mm4    \n\t"
-            "psllq        $16, %%mm5    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-
-            :"=m"(*d)
-            :"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
-            :"memory");
-        /* borrowed 32 to 24 */
-        __asm__ volatile(
-            "movq       %%mm0, %%mm4    \n\t"
-            "movq       %%mm3, %%mm5    \n\t"
-            "movq       %%mm6, %%mm0    \n\t"
-            "movq       %%mm7, %%mm1    \n\t"
-
-            "movq       %%mm4, %%mm6    \n\t"
-            "movq       %%mm5, %%mm7    \n\t"
-            "movq       %%mm0, %%mm2    \n\t"
-            "movq       %%mm1, %%mm3    \n\t"
-
-            STORE_BGR24_MMX
-
-            :: "r"(d), "m"(*s)
-            :"memory");
-        d += 24;
-        s += 8;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-    while (s < end) {
-        register uint16_t bgr;
-        bgr = *s++;
-        *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
-        *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
-        *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
-    }
-}
-
-static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    const uint16_t *end;
-    const uint16_t *mm_end;
-    uint8_t *d = (uint8_t *)dst;
-    const uint16_t *s = (const uint16_t *)src;
-    end = s + src_size/2;
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
-    mm_end = end - 7;
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"  32(%1)           \n\t"
-            "movq        (%1), %%mm0    \n\t"
-            "movq        (%1), %%mm1    \n\t"
-            "movq        (%1), %%mm2    \n\t"
-            "pand          %2, %%mm0    \n\t"
-            "pand          %3, %%mm1    \n\t"
-            "pand          %4, %%mm2    \n\t"
-            "psllq         $5, %%mm0    \n\t"
-            "psrlq         $1, %%mm2    \n\t"
-            "pmulhw        "MANGLE(mul15_mid)", %%mm0    \n\t"
-            "pmulhw        "MANGLE(mul16_mid)", %%mm1    \n\t"
-            "pmulhw        "MANGLE(mul15_hi)", %%mm2    \n\t"
-            "movq       %%mm0, %%mm3    \n\t"
-            "movq       %%mm1, %%mm4    \n\t"
-            "movq       %%mm2, %%mm5    \n\t"
-            "punpcklwd     %5, %%mm0    \n\t"
-            "punpcklwd     %5, %%mm1    \n\t"
-            "punpcklwd     %5, %%mm2    \n\t"
-            "punpckhwd     %5, %%mm3    \n\t"
-            "punpckhwd     %5, %%mm4    \n\t"
-            "punpckhwd     %5, %%mm5    \n\t"
-            "psllq         $8, %%mm1    \n\t"
-            "psllq        $16, %%mm2    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "psllq         $8, %%mm4    \n\t"
-            "psllq        $16, %%mm5    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-
-            "movq       %%mm0, %%mm6    \n\t"
-            "movq       %%mm3, %%mm7    \n\t"
-
-            "movq       8(%1), %%mm0    \n\t"
-            "movq       8(%1), %%mm1    \n\t"
-            "movq       8(%1), %%mm2    \n\t"
-            "pand          %2, %%mm0    \n\t"
-            "pand          %3, %%mm1    \n\t"
-            "pand          %4, %%mm2    \n\t"
-            "psllq         $5, %%mm0    \n\t"
-            "psrlq         $1, %%mm2    \n\t"
-            "pmulhw        "MANGLE(mul15_mid)", %%mm0    \n\t"
-            "pmulhw        "MANGLE(mul16_mid)", %%mm1    \n\t"
-            "pmulhw        "MANGLE(mul15_hi)", %%mm2    \n\t"
-            "movq       %%mm0, %%mm3    \n\t"
-            "movq       %%mm1, %%mm4    \n\t"
-            "movq       %%mm2, %%mm5    \n\t"
-            "punpcklwd     %5, %%mm0    \n\t"
-            "punpcklwd     %5, %%mm1    \n\t"
-            "punpcklwd     %5, %%mm2    \n\t"
-            "punpckhwd     %5, %%mm3    \n\t"
-            "punpckhwd     %5, %%mm4    \n\t"
-            "punpckhwd     %5, %%mm5    \n\t"
-            "psllq         $8, %%mm1    \n\t"
-            "psllq        $16, %%mm2    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "psllq         $8, %%mm4    \n\t"
-            "psllq        $16, %%mm5    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-            :"=m"(*d)
-            :"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
-            :"memory");
-        /* borrowed 32 to 24 */
-        __asm__ volatile(
-            "movq       %%mm0, %%mm4    \n\t"
-            "movq       %%mm3, %%mm5    \n\t"
-            "movq       %%mm6, %%mm0    \n\t"
-            "movq       %%mm7, %%mm1    \n\t"
-
-            "movq       %%mm4, %%mm6    \n\t"
-            "movq       %%mm5, %%mm7    \n\t"
-            "movq       %%mm0, %%mm2    \n\t"
-            "movq       %%mm1, %%mm3    \n\t"
-
-            STORE_BGR24_MMX
-
-            :: "r"(d), "m"(*s)
-            :"memory");
-        d += 24;
-        s += 8;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-    while (s < end) {
-        register uint16_t bgr;
-        bgr = *s++;
-        *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
-        *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
-        *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
-    }
-}
-
-/*
- * mm0 = 00 B3 00 B2 00 B1 00 B0
- * mm1 = 00 G3 00 G2 00 G1 00 G0
- * mm2 = 00 R3 00 R2 00 R1 00 R0
- * mm6 = FF FF FF FF FF FF FF FF
- * mm7 = 00 00 00 00 00 00 00 00
- */
-#define PACK_RGB32 \
-    "packuswb   %%mm7, %%mm0    \n\t" /* 00 00 00 00 B3 B2 B1 B0 */ \
-    "packuswb   %%mm7, %%mm1    \n\t" /* 00 00 00 00 G3 G2 G1 G0 */ \
-    "packuswb   %%mm7, %%mm2    \n\t" /* 00 00 00 00 R3 R2 R1 R0 */ \
-    "punpcklbw  %%mm1, %%mm0    \n\t" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \
-    "punpcklbw  %%mm6, %%mm2    \n\t" /* FF R3 FF R2 FF R1 FF R0 */ \
-    "movq       %%mm0, %%mm3    \n\t"                               \
-    "punpcklwd  %%mm2, %%mm0    \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \
-    "punpckhwd  %%mm2, %%mm3    \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \
-    MOVNTQ"     %%mm0,  (%0)    \n\t"                               \
-    MOVNTQ"     %%mm3, 8(%0)    \n\t"                               \
-
-static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    const uint16_t *end;
-    const uint16_t *mm_end;
-    uint8_t *d = dst;
-    const uint16_t *s = (const uint16_t *)src;
-    end = s + src_size/2;
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
-    __asm__ volatile("pxor    %%mm7,%%mm7    \n\t":::"memory");
-    __asm__ volatile("pcmpeqd %%mm6,%%mm6    \n\t":::"memory");
-    mm_end = end - 3;
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"  32(%1)           \n\t"
-            "movq        (%1), %%mm0    \n\t"
-            "movq        (%1), %%mm1    \n\t"
-            "movq        (%1), %%mm2    \n\t"
-            "pand          %2, %%mm0    \n\t"
-            "pand          %3, %%mm1    \n\t"
-            "pand          %4, %%mm2    \n\t"
-            "psllq         $5, %%mm0    \n\t"
-            "pmulhw        %5, %%mm0    \n\t"
-            "pmulhw        %5, %%mm1    \n\t"
-            "pmulhw        "MANGLE(mul15_hi)", %%mm2    \n\t"
-            PACK_RGB32
-            ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) ,"m"(mul15_mid)
-            :"memory");
-        d += 16;
-        s += 4;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-    while (s < end) {
-        register uint16_t bgr;
-        bgr = *s++;
-        *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
-        *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
-        *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
-        *d++ = 255;
-    }
-}
-
-static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    const uint16_t *end;
-    const uint16_t *mm_end;
-    uint8_t *d = dst;
-    const uint16_t *s = (const uint16_t*)src;
-    end = s + src_size/2;
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
-    __asm__ volatile("pxor    %%mm7,%%mm7    \n\t":::"memory");
-    __asm__ volatile("pcmpeqd %%mm6,%%mm6    \n\t":::"memory");
-    mm_end = end - 3;
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"  32(%1)           \n\t"
-            "movq        (%1), %%mm0    \n\t"
-            "movq        (%1), %%mm1    \n\t"
-            "movq        (%1), %%mm2    \n\t"
-            "pand          %2, %%mm0    \n\t"
-            "pand          %3, %%mm1    \n\t"
-            "pand          %4, %%mm2    \n\t"
-            "psllq         $5, %%mm0    \n\t"
-            "psrlq         $1, %%mm2    \n\t"
-            "pmulhw        %5, %%mm0    \n\t"
-            "pmulhw        "MANGLE(mul16_mid)", %%mm1    \n\t"
-            "pmulhw        "MANGLE(mul15_hi)", %%mm2    \n\t"
-            PACK_RGB32
-            ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid)
-            :"memory");
-        d += 16;
-        s += 4;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-    while (s < end) {
-        register uint16_t bgr;
-        bgr = *s++;
-        *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
-        *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
-        *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
-        *d++ = 255;
-    }
-}
-
-static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    x86_reg idx = 15 - src_size;
-    const uint8_t *s = src-idx;
-    uint8_t *d = dst-idx;
-    __asm__ volatile(
-        "test          %0, %0           \n\t"
-        "jns           2f               \n\t"
-        PREFETCH"       (%1, %0)        \n\t"
-        "movq          %3, %%mm7        \n\t"
-        "pxor          %4, %%mm7        \n\t"
-        "movq       %%mm7, %%mm6        \n\t"
-        "pxor          %5, %%mm7        \n\t"
-        ".p2align       4               \n\t"
-        "1:                             \n\t"
-        PREFETCH"     32(%1, %0)        \n\t"
-        "movq           (%1, %0), %%mm0 \n\t"
-        "movq          8(%1, %0), %%mm1 \n\t"
-# if COMPILE_TEMPLATE_MMXEXT
-        "pshufw      $177, %%mm0, %%mm3 \n\t"
-        "pshufw      $177, %%mm1, %%mm5 \n\t"
-        "pand       %%mm7, %%mm0        \n\t"
-        "pand       %%mm6, %%mm3        \n\t"
-        "pand       %%mm7, %%mm1        \n\t"
-        "pand       %%mm6, %%mm5        \n\t"
-        "por        %%mm3, %%mm0        \n\t"
-        "por        %%mm5, %%mm1        \n\t"
-# else
-        "movq       %%mm0, %%mm2        \n\t"
-        "movq       %%mm1, %%mm4        \n\t"
-        "pand       %%mm7, %%mm0        \n\t"
-        "pand       %%mm6, %%mm2        \n\t"
-        "pand       %%mm7, %%mm1        \n\t"
-        "pand       %%mm6, %%mm4        \n\t"
-        "movq       %%mm2, %%mm3        \n\t"
-        "movq       %%mm4, %%mm5        \n\t"
-        "pslld        $16, %%mm2        \n\t"
-        "psrld        $16, %%mm3        \n\t"
-        "pslld        $16, %%mm4        \n\t"
-        "psrld        $16, %%mm5        \n\t"
-        "por        %%mm2, %%mm0        \n\t"
-        "por        %%mm4, %%mm1        \n\t"
-        "por        %%mm3, %%mm0        \n\t"
-        "por        %%mm5, %%mm1        \n\t"
-# endif
-        MOVNTQ"     %%mm0,  (%2, %0)    \n\t"
-        MOVNTQ"     %%mm1, 8(%2, %0)    \n\t"
-        "add          $16, %0           \n\t"
-        "js            1b               \n\t"
-        SFENCE"                         \n\t"
-        EMMS"                           \n\t"
-        "2:                             \n\t"
-        : "+&r"(idx)
-        : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one)
-        : "memory");
-    for (; idx<15; idx+=4) {
-        register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
-        v &= 0xff00ff;
-        *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
-    }
-}
-
-static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    unsigned i;
-    x86_reg mmx_size= 23 - src_size;
-    __asm__ volatile (
-        "test             %%"REG_a", %%"REG_a"          \n\t"
-        "jns                     2f                     \n\t"
-        "movq     "MANGLE(mask24r)", %%mm5              \n\t"
-        "movq     "MANGLE(mask24g)", %%mm6              \n\t"
-        "movq     "MANGLE(mask24b)", %%mm7              \n\t"
-        ".p2align                 4                     \n\t"
-        "1:                                             \n\t"
-        PREFETCH" 32(%1, %%"REG_a")                     \n\t"
-        "movq       (%1, %%"REG_a"), %%mm0              \n\t" // BGR BGR BG
-        "movq       (%1, %%"REG_a"), %%mm1              \n\t" // BGR BGR BG
-        "movq      2(%1, %%"REG_a"), %%mm2              \n\t" // R BGR BGR B
-        "psllq                  $16, %%mm0              \n\t" // 00 BGR BGR
-        "pand                 %%mm5, %%mm0              \n\t"
-        "pand                 %%mm6, %%mm1              \n\t"
-        "pand                 %%mm7, %%mm2              \n\t"
-        "por                  %%mm0, %%mm1              \n\t"
-        "por                  %%mm2, %%mm1              \n\t"
-        "movq      6(%1, %%"REG_a"), %%mm0              \n\t" // BGR BGR BG
-        MOVNTQ"               %%mm1,   (%2, %%"REG_a")  \n\t" // RGB RGB RG
-        "movq      8(%1, %%"REG_a"), %%mm1              \n\t" // R BGR BGR B
-        "movq     10(%1, %%"REG_a"), %%mm2              \n\t" // GR BGR BGR
-        "pand                 %%mm7, %%mm0              \n\t"
-        "pand                 %%mm5, %%mm1              \n\t"
-        "pand                 %%mm6, %%mm2              \n\t"
-        "por                  %%mm0, %%mm1              \n\t"
-        "por                  %%mm2, %%mm1              \n\t"
-        "movq     14(%1, %%"REG_a"), %%mm0              \n\t" // R BGR BGR B
-        MOVNTQ"               %%mm1,  8(%2, %%"REG_a")  \n\t" // B RGB RGB R
-        "movq     16(%1, %%"REG_a"), %%mm1              \n\t" // GR BGR BGR
-        "movq     18(%1, %%"REG_a"), %%mm2              \n\t" // BGR BGR BG
-        "pand                 %%mm6, %%mm0              \n\t"
-        "pand                 %%mm7, %%mm1              \n\t"
-        "pand                 %%mm5, %%mm2              \n\t"
-        "por                  %%mm0, %%mm1              \n\t"
-        "por                  %%mm2, %%mm1              \n\t"
-        MOVNTQ"               %%mm1, 16(%2, %%"REG_a")  \n\t"
-        "add                    $24, %%"REG_a"          \n\t"
-        " js                     1b                     \n\t"
-        "2:                                             \n\t"
-        : "+a" (mmx_size)
-        : "r" (src-mmx_size), "r"(dst-mmx_size)
-    );
-
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-
-    if (mmx_size==23) return; //finished, was multiple of 8
-
-    src+= src_size;
-    dst+= src_size;
-    src_size= 23-mmx_size;
-    src-= src_size;
-    dst-= src_size;
-    for (i=0; i<src_size; i+=3) {
-        register uint8_t x;
-        x          = src[i + 2];
-        dst[i + 1] = src[i + 1];
-        dst[i + 2] = src[i + 0];
-        dst[i + 0] = x;
-    }
-}
-
-static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                           int width, int height,
-                                           int lumStride, int chromStride, int dstStride, int vertLumPerChroma)
-{
-    int y;
-    const x86_reg chromWidth= width>>1;
-    for (y=0; y<height; y++) {
-        //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
-        __asm__ volatile(
-            "xor                 %%"REG_a", %%"REG_a"   \n\t"
-            ".p2align                    4              \n\t"
-            "1:                                         \n\t"
-            PREFETCH"    32(%1, %%"REG_a", 2)           \n\t"
-            PREFETCH"    32(%2, %%"REG_a")              \n\t"
-            PREFETCH"    32(%3, %%"REG_a")              \n\t"
-            "movq          (%2, %%"REG_a"), %%mm0       \n\t" // U(0)
-            "movq                    %%mm0, %%mm2       \n\t" // U(0)
-            "movq          (%3, %%"REG_a"), %%mm1       \n\t" // V(0)
-            "punpcklbw               %%mm1, %%mm0       \n\t" // UVUV UVUV(0)
-            "punpckhbw               %%mm1, %%mm2       \n\t" // UVUV UVUV(8)
-
-            "movq        (%1, %%"REG_a",2), %%mm3       \n\t" // Y(0)
-            "movq       8(%1, %%"REG_a",2), %%mm5       \n\t" // Y(8)
-            "movq                    %%mm3, %%mm4       \n\t" // Y(0)
-            "movq                    %%mm5, %%mm6       \n\t" // Y(8)
-            "punpcklbw               %%mm0, %%mm3       \n\t" // YUYV YUYV(0)
-            "punpckhbw               %%mm0, %%mm4       \n\t" // YUYV YUYV(4)
-            "punpcklbw               %%mm2, %%mm5       \n\t" // YUYV YUYV(8)
-            "punpckhbw               %%mm2, %%mm6       \n\t" // YUYV YUYV(12)
-
-            MOVNTQ"                  %%mm3,   (%0, %%"REG_a", 4)    \n\t"
-            MOVNTQ"                  %%mm4,  8(%0, %%"REG_a", 4)    \n\t"
-            MOVNTQ"                  %%mm5, 16(%0, %%"REG_a", 4)    \n\t"
-            MOVNTQ"                  %%mm6, 24(%0, %%"REG_a", 4)    \n\t"
-
-            "add                        $8, %%"REG_a"   \n\t"
-            "cmp                        %4, %%"REG_a"   \n\t"
-            " jb                        1b              \n\t"
-            ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
-            : "%"REG_a
-        );
-        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
-            usrc += chromStride;
-            vsrc += chromStride;
-        }
-        ysrc += lumStride;
-        dst  += dstStride;
-    }
-    __asm__(EMMS"       \n\t"
-            SFENCE"     \n\t"
-            :::"memory");
-}
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 16.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- */
-static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                      int width, int height,
-                                      int lumStride, int chromStride, int dstStride)
-{
-    //FIXME interpolate chroma
-    RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
-}
-
-static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                           int width, int height,
-                                           int lumStride, int chromStride, int dstStride, int vertLumPerChroma)
-{
-    int y;
-    const x86_reg chromWidth= width>>1;
-    for (y=0; y<height; y++) {
-        //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
-        __asm__ volatile(
-            "xor                %%"REG_a", %%"REG_a"    \n\t"
-            ".p2align                   4               \n\t"
-            "1:                                         \n\t"
-            PREFETCH"   32(%1, %%"REG_a", 2)            \n\t"
-            PREFETCH"   32(%2, %%"REG_a")               \n\t"
-            PREFETCH"   32(%3, %%"REG_a")               \n\t"
-            "movq         (%2, %%"REG_a"), %%mm0        \n\t" // U(0)
-            "movq                   %%mm0, %%mm2        \n\t" // U(0)
-            "movq         (%3, %%"REG_a"), %%mm1        \n\t" // V(0)
-            "punpcklbw              %%mm1, %%mm0        \n\t" // UVUV UVUV(0)
-            "punpckhbw              %%mm1, %%mm2        \n\t" // UVUV UVUV(8)
-
-            "movq       (%1, %%"REG_a",2), %%mm3        \n\t" // Y(0)
-            "movq      8(%1, %%"REG_a",2), %%mm5        \n\t" // Y(8)
-            "movq                   %%mm0, %%mm4        \n\t" // Y(0)
-            "movq                   %%mm2, %%mm6        \n\t" // Y(8)
-            "punpcklbw              %%mm3, %%mm0        \n\t" // YUYV YUYV(0)
-            "punpckhbw              %%mm3, %%mm4        \n\t" // YUYV YUYV(4)
-            "punpcklbw              %%mm5, %%mm2        \n\t" // YUYV YUYV(8)
-            "punpckhbw              %%mm5, %%mm6        \n\t" // YUYV YUYV(12)
-
-            MOVNTQ"                 %%mm0,   (%0, %%"REG_a", 4)     \n\t"
-            MOVNTQ"                 %%mm4,  8(%0, %%"REG_a", 4)     \n\t"
-            MOVNTQ"                 %%mm2, 16(%0, %%"REG_a", 4)     \n\t"
-            MOVNTQ"                 %%mm6, 24(%0, %%"REG_a", 4)     \n\t"
-
-            "add                       $8, %%"REG_a"    \n\t"
-            "cmp                       %4, %%"REG_a"    \n\t"
-            " jb                       1b               \n\t"
-            ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
-            : "%"REG_a
-        );
-        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
-            usrc += chromStride;
-            vsrc += chromStride;
-        }
-        ysrc += lumStride;
-        dst += dstStride;
-    }
-    __asm__(EMMS"       \n\t"
-            SFENCE"     \n\t"
-            :::"memory");
-}
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 16
- * (If this is a problem for anyone then tell me, and I will fix it.)
- */
-static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                      int width, int height,
-                                      int lumStride, int chromStride, int dstStride)
-{
-    //FIXME interpolate chroma
-    RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
-}
-
-/**
- * Width should be a multiple of 16.
- */
-static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                         int width, int height,
-                                         int lumStride, int chromStride, int dstStride)
-{
-    RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
-}
-
-/**
- * Width should be a multiple of 16.
- */
-static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                         int width, int height,
-                                         int lumStride, int chromStride, int dstStride)
-{
-    RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
-}
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 16.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- */
-static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                                      int width, int height,
-                                      int lumStride, int chromStride, int srcStride)
-{
-    int y;
-    const x86_reg chromWidth= width>>1;
-    for (y=0; y<height; y+=2) {
-        __asm__ volatile(
-            "xor                 %%"REG_a", %%"REG_a"   \n\t"
-            "pcmpeqw                 %%mm7, %%mm7       \n\t"
-            "psrlw                      $8, %%mm7       \n\t" // FF,00,FF,00...
-            ".p2align                    4              \n\t"
-            "1:                \n\t"
-            PREFETCH" 64(%0, %%"REG_a", 4)              \n\t"
-            "movq       (%0, %%"REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0)
-            "movq      8(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4)
-            "movq                    %%mm0, %%mm2       \n\t" // YUYV YUYV(0)
-            "movq                    %%mm1, %%mm3       \n\t" // YUYV YUYV(4)
-            "psrlw                      $8, %%mm0       \n\t" // U0V0 U0V0(0)
-            "psrlw                      $8, %%mm1       \n\t" // U0V0 U0V0(4)
-            "pand                    %%mm7, %%mm2       \n\t" // Y0Y0 Y0Y0(0)
-            "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(4)
-            "packuswb                %%mm1, %%mm0       \n\t" // UVUV UVUV(0)
-            "packuswb                %%mm3, %%mm2       \n\t" // YYYY YYYY(0)
-
-            MOVNTQ"                  %%mm2, (%1, %%"REG_a", 2)  \n\t"
-
-            "movq     16(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(8)
-            "movq     24(%0, %%"REG_a", 4), %%mm2       \n\t" // YUYV YUYV(12)
-            "movq                    %%mm1, %%mm3       \n\t" // YUYV YUYV(8)
-            "movq                    %%mm2, %%mm4       \n\t" // YUYV YUYV(12)
-            "psrlw                      $8, %%mm1       \n\t" // U0V0 U0V0(8)
-            "psrlw                      $8, %%mm2       \n\t" // U0V0 U0V0(12)
-            "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(8)
-            "pand                    %%mm7, %%mm4       \n\t" // Y0Y0 Y0Y0(12)
-            "packuswb                %%mm2, %%mm1       \n\t" // UVUV UVUV(8)
-            "packuswb                %%mm4, %%mm3       \n\t" // YYYY YYYY(8)
-
-            MOVNTQ"                  %%mm3, 8(%1, %%"REG_a", 2) \n\t"
-
-            "movq                    %%mm0, %%mm2       \n\t" // UVUV UVUV(0)
-            "movq                    %%mm1, %%mm3       \n\t" // UVUV UVUV(8)
-            "psrlw                      $8, %%mm0       \n\t" // V0V0 V0V0(0)
-            "psrlw                      $8, %%mm1       \n\t" // V0V0 V0V0(8)
-            "pand                    %%mm7, %%mm2       \n\t" // U0U0 U0U0(0)
-            "pand                    %%mm7, %%mm3       \n\t" // U0U0 U0U0(8)
-            "packuswb                %%mm1, %%mm0       \n\t" // VVVV VVVV(0)
-            "packuswb                %%mm3, %%mm2       \n\t" // UUUU UUUU(0)
-
-            MOVNTQ"                  %%mm0, (%3, %%"REG_a")     \n\t"
-            MOVNTQ"                  %%mm2, (%2, %%"REG_a")     \n\t"
-
-            "add                        $8, %%"REG_a"   \n\t"
-            "cmp                        %4, %%"REG_a"   \n\t"
-            " jb                        1b              \n\t"
-            ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
-            : "memory", "%"REG_a
-        );
-
-        ydst += lumStride;
-        src  += srcStride;
-
-        __asm__ volatile(
-            "xor                 %%"REG_a", %%"REG_a"   \n\t"
-            ".p2align                    4              \n\t"
-            "1:                                         \n\t"
-            PREFETCH" 64(%0, %%"REG_a", 4)              \n\t"
-            "movq       (%0, %%"REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0)
-            "movq      8(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4)
-            "movq     16(%0, %%"REG_a", 4), %%mm2       \n\t" // YUYV YUYV(8)
-            "movq     24(%0, %%"REG_a", 4), %%mm3       \n\t" // YUYV YUYV(12)
-            "pand                    %%mm7, %%mm0       \n\t" // Y0Y0 Y0Y0(0)
-            "pand                    %%mm7, %%mm1       \n\t" // Y0Y0 Y0Y0(4)
-            "pand                    %%mm7, %%mm2       \n\t" // Y0Y0 Y0Y0(8)
-            "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(12)
-            "packuswb                %%mm1, %%mm0       \n\t" // YYYY YYYY(0)
-            "packuswb                %%mm3, %%mm2       \n\t" // YYYY YYYY(8)
-
-            MOVNTQ"                  %%mm0,  (%1, %%"REG_a", 2) \n\t"
-            MOVNTQ"                  %%mm2, 8(%1, %%"REG_a", 2) \n\t"
-
-            "add                        $8, %%"REG_a"   \n\t"
-            "cmp                        %4, %%"REG_a"   \n\t"
-            " jb                        1b              \n\t"
-
-            ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
-            : "memory", "%"REG_a
-        );
-        udst += chromStride;
-        vdst += chromStride;
-        ydst += lumStride;
-        src  += srcStride;
-    }
-    __asm__ volatile(EMMS"       \n\t"
-                     SFENCE"     \n\t"
-                     :::"memory");
-}
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
-
-#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
-static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride)
-{
-    int x,y;
-
-    dst[0]= src[0];
-
-    // first line
-    for (x=0; x<srcWidth-1; x++) {
-        dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
-        dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
-    }
-    dst[2*srcWidth-1]= src[srcWidth-1];
-
-    dst+= dstStride;
-
-    for (y=1; y<srcHeight; y++) {
-        const x86_reg mmxSize= srcWidth&~15;
-        __asm__ volatile(
-            "mov           %4, %%"REG_a"            \n\t"
-            "movq        "MANGLE(mmx_ff)", %%mm0    \n\t"
-            "movq         (%0, %%"REG_a"), %%mm4    \n\t"
-            "movq                   %%mm4, %%mm2    \n\t"
-            "psllq                     $8, %%mm4    \n\t"
-            "pand                   %%mm0, %%mm2    \n\t"
-            "por                    %%mm2, %%mm4    \n\t"
-            "movq         (%1, %%"REG_a"), %%mm5    \n\t"
-            "movq                   %%mm5, %%mm3    \n\t"
-            "psllq                     $8, %%mm5    \n\t"
-            "pand                   %%mm0, %%mm3    \n\t"
-            "por                    %%mm3, %%mm5    \n\t"
-            "1:                                     \n\t"
-            "movq         (%0, %%"REG_a"), %%mm0    \n\t"
-            "movq         (%1, %%"REG_a"), %%mm1    \n\t"
-            "movq        1(%0, %%"REG_a"), %%mm2    \n\t"
-            "movq        1(%1, %%"REG_a"), %%mm3    \n\t"
-            PAVGB"                  %%mm0, %%mm5    \n\t"
-            PAVGB"                  %%mm0, %%mm3    \n\t"
-            PAVGB"                  %%mm0, %%mm5    \n\t"
-            PAVGB"                  %%mm0, %%mm3    \n\t"
-            PAVGB"                  %%mm1, %%mm4    \n\t"
-            PAVGB"                  %%mm1, %%mm2    \n\t"
-            PAVGB"                  %%mm1, %%mm4    \n\t"
-            PAVGB"                  %%mm1, %%mm2    \n\t"
-            "movq                   %%mm5, %%mm7    \n\t"
-            "movq                   %%mm4, %%mm6    \n\t"
-            "punpcklbw              %%mm3, %%mm5    \n\t"
-            "punpckhbw              %%mm3, %%mm7    \n\t"
-            "punpcklbw              %%mm2, %%mm4    \n\t"
-            "punpckhbw              %%mm2, %%mm6    \n\t"
-            MOVNTQ"                 %%mm5,  (%2, %%"REG_a", 2)  \n\t"
-            MOVNTQ"                 %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
-            MOVNTQ"                 %%mm4,  (%3, %%"REG_a", 2)  \n\t"
-            MOVNTQ"                 %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
-            "add                       $8, %%"REG_a"            \n\t"
-            "movq       -1(%0, %%"REG_a"), %%mm4    \n\t"
-            "movq       -1(%1, %%"REG_a"), %%mm5    \n\t"
-            " js                       1b                       \n\t"
-            :: "r" (src + mmxSize  ), "r" (src + srcStride + mmxSize  ),
-               "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
-               "g" (-mmxSize)
-            : "%"REG_a
-        );
-
-        for (x=mmxSize-1; x<srcWidth-1; x++) {
-            dst[2*x          +1]= (3*src[x+0] +   src[x+srcStride+1])>>2;
-            dst[2*x+dstStride+2]= (  src[x+0] + 3*src[x+srcStride+1])>>2;
-            dst[2*x+dstStride+1]= (  src[x+1] + 3*src[x+srcStride  ])>>2;
-            dst[2*x          +2]= (3*src[x+1] +   src[x+srcStride  ])>>2;
-        }
-        dst[srcWidth*2 -1            ]= (3*src[srcWidth-1] +   src[srcWidth-1 + srcStride])>>2;
-        dst[srcWidth*2 -1 + dstStride]= (  src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
-
-        dst+=dstStride*2;
-        src+=srcStride;
-    }
-
-    // last line
-    dst[0]= src[0];
-
-    for (x=0; x<srcWidth-1; x++) {
-        dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
-        dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
-    }
-    dst[2*srcWidth-1]= src[srcWidth-1];
-
-    __asm__ volatile(EMMS"       \n\t"
-                     SFENCE"     \n\t"
-                     :::"memory");
-}
-#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */
-
-#if !COMPILE_TEMPLATE_AMD3DNOW
-/**
- * Height should be a multiple of 2 and width should be a multiple of 16.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- * Chrominance data is only taken from every second line, others are ignored.
- * FIXME: Write HQ version.
- */
-static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                                      int width, int height,
-                                      int lumStride, int chromStride, int srcStride)
-{
-    int y;
-    const x86_reg chromWidth= width>>1;
-    for (y=0; y<height; y+=2) {
-        __asm__ volatile(
-            "xor                 %%"REG_a", %%"REG_a"   \n\t"
-            "pcmpeqw             %%mm7, %%mm7   \n\t"
-            "psrlw                  $8, %%mm7   \n\t" // FF,00,FF,00...
-            ".p2align                4          \n\t"
-            "1:                                 \n\t"
-            PREFETCH" 64(%0, %%"REG_a", 4)          \n\t"
-            "movq       (%0, %%"REG_a", 4), %%mm0   \n\t" // UYVY UYVY(0)
-            "movq      8(%0, %%"REG_a", 4), %%mm1   \n\t" // UYVY UYVY(4)
-            "movq                %%mm0, %%mm2   \n\t" // UYVY UYVY(0)
-            "movq                %%mm1, %%mm3   \n\t" // UYVY UYVY(4)
-            "pand                %%mm7, %%mm0   \n\t" // U0V0 U0V0(0)
-            "pand                %%mm7, %%mm1   \n\t" // U0V0 U0V0(4)
-            "psrlw                  $8, %%mm2   \n\t" // Y0Y0 Y0Y0(0)
-            "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(4)
-            "packuswb            %%mm1, %%mm0   \n\t" // UVUV UVUV(0)
-            "packuswb            %%mm3, %%mm2   \n\t" // YYYY YYYY(0)
-
-            MOVNTQ"              %%mm2,  (%1, %%"REG_a", 2) \n\t"
-
-            "movq     16(%0, %%"REG_a", 4), %%mm1   \n\t" // UYVY UYVY(8)
-            "movq     24(%0, %%"REG_a", 4), %%mm2   \n\t" // UYVY UYVY(12)
-            "movq                %%mm1, %%mm3   \n\t" // UYVY UYVY(8)
-            "movq                %%mm2, %%mm4   \n\t" // UYVY UYVY(12)
-            "pand                %%mm7, %%mm1   \n\t" // U0V0 U0V0(8)
-            "pand                %%mm7, %%mm2   \n\t" // U0V0 U0V0(12)
-            "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(8)
-            "psrlw                  $8, %%mm4   \n\t" // Y0Y0 Y0Y0(12)
-            "packuswb            %%mm2, %%mm1   \n\t" // UVUV UVUV(8)
-            "packuswb            %%mm4, %%mm3   \n\t" // YYYY YYYY(8)
-
-            MOVNTQ"              %%mm3, 8(%1, %%"REG_a", 2) \n\t"
-
-            "movq                %%mm0, %%mm2   \n\t" // UVUV UVUV(0)
-            "movq                %%mm1, %%mm3   \n\t" // UVUV UVUV(8)
-            "psrlw                  $8, %%mm0   \n\t" // V0V0 V0V0(0)
-            "psrlw                  $8, %%mm1   \n\t" // V0V0 V0V0(8)
-            "pand                %%mm7, %%mm2   \n\t" // U0U0 U0U0(0)
-            "pand                %%mm7, %%mm3   \n\t" // U0U0 U0U0(8)
-            "packuswb            %%mm1, %%mm0   \n\t" // VVVV VVVV(0)
-            "packuswb            %%mm3, %%mm2   \n\t" // UUUU UUUU(0)
-
-            MOVNTQ"              %%mm0, (%3, %%"REG_a") \n\t"
-            MOVNTQ"              %%mm2, (%2, %%"REG_a") \n\t"
-
-            "add                    $8, %%"REG_a"   \n\t"
-            "cmp                    %4, %%"REG_a"   \n\t"
-            " jb                    1b          \n\t"
-            ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
-            : "memory", "%"REG_a
-        );
-
-        ydst += lumStride;
-        src  += srcStride;
-
-        __asm__ volatile(
-            "xor                 %%"REG_a", %%"REG_a"   \n\t"
-            ".p2align                    4              \n\t"
-            "1:                                 \n\t"
-            PREFETCH" 64(%0, %%"REG_a", 4)          \n\t"
-            "movq       (%0, %%"REG_a", 4), %%mm0   \n\t" // YUYV YUYV(0)
-            "movq      8(%0, %%"REG_a", 4), %%mm1   \n\t" // YUYV YUYV(4)
-            "movq     16(%0, %%"REG_a", 4), %%mm2   \n\t" // YUYV YUYV(8)
-            "movq     24(%0, %%"REG_a", 4), %%mm3   \n\t" // YUYV YUYV(12)
-            "psrlw                  $8, %%mm0   \n\t" // Y0Y0 Y0Y0(0)
-            "psrlw                  $8, %%mm1   \n\t" // Y0Y0 Y0Y0(4)
-            "psrlw                  $8, %%mm2   \n\t" // Y0Y0 Y0Y0(8)
-            "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(12)
-            "packuswb            %%mm1, %%mm0   \n\t" // YYYY YYYY(0)
-            "packuswb            %%mm3, %%mm2   \n\t" // YYYY YYYY(8)
-
-            MOVNTQ"              %%mm0,  (%1, %%"REG_a", 2) \n\t"
-            MOVNTQ"              %%mm2, 8(%1, %%"REG_a", 2) \n\t"
-
-            "add                    $8, %%"REG_a"   \n\t"
-            "cmp                    %4, %%"REG_a"   \n\t"
-            " jb                    1b          \n\t"
-
-            ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
-            : "memory", "%"REG_a
-        );
-        udst += chromStride;
-        vdst += chromStride;
-        ydst += lumStride;
-        src  += srcStride;
-    }
-    __asm__ volatile(EMMS"       \n\t"
-                     SFENCE"     \n\t"
-                     :::"memory");
-}
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 2.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- * Chrominance data is only taken from every second line,
- * others are ignored in the C version.
- * FIXME: Write HQ version.
- */
-#if HAVE_7REGS
-static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                                       int width, int height,
-                                       int lumStride, int chromStride, int srcStride,
-                                       int32_t *rgb2yuv)
-{
-#define BGR2Y_IDX "16*4+16*32"
-#define BGR2U_IDX "16*4+16*33"
-#define BGR2V_IDX "16*4+16*34"
-    int y;
-    const x86_reg chromWidth= width>>1;
-    for (y=0; y<height-2; y+=2) {
-        int i;
-        for (i=0; i<2; i++) {
-            __asm__ volatile(
-                "mov                        %2, %%"REG_a"   \n\t"
-                "movq          "BGR2Y_IDX"(%3), %%mm6       \n\t"
-                "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
-                "pxor                    %%mm7, %%mm7       \n\t"
-                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
-                ".p2align                    4              \n\t"
-                "1:                                         \n\t"
-                PREFETCH"    64(%0, %%"REG_d")              \n\t"
-                "movd          (%0, %%"REG_d"), %%mm0       \n\t"
-                "movd         3(%0, %%"REG_d"), %%mm1       \n\t"
-                "punpcklbw               %%mm7, %%mm0       \n\t"
-                "punpcklbw               %%mm7, %%mm1       \n\t"
-                "movd         6(%0, %%"REG_d"), %%mm2       \n\t"
-                "movd         9(%0, %%"REG_d"), %%mm3       \n\t"
-                "punpcklbw               %%mm7, %%mm2       \n\t"
-                "punpcklbw               %%mm7, %%mm3       \n\t"
-                "pmaddwd                 %%mm6, %%mm0       \n\t"
-                "pmaddwd                 %%mm6, %%mm1       \n\t"
-                "pmaddwd                 %%mm6, %%mm2       \n\t"
-                "pmaddwd                 %%mm6, %%mm3       \n\t"
-                "psrad                      $8, %%mm0       \n\t"
-                "psrad                      $8, %%mm1       \n\t"
-                "psrad                      $8, %%mm2       \n\t"
-                "psrad                      $8, %%mm3       \n\t"
-                "packssdw                %%mm1, %%mm0       \n\t"
-                "packssdw                %%mm3, %%mm2       \n\t"
-                "pmaddwd                 %%mm5, %%mm0       \n\t"
-                "pmaddwd                 %%mm5, %%mm2       \n\t"
-                "packssdw                %%mm2, %%mm0       \n\t"
-                "psraw                      $7, %%mm0       \n\t"
-
-                "movd        12(%0, %%"REG_d"), %%mm4       \n\t"
-                "movd        15(%0, %%"REG_d"), %%mm1       \n\t"
-                "punpcklbw               %%mm7, %%mm4       \n\t"
-                "punpcklbw               %%mm7, %%mm1       \n\t"
-                "movd        18(%0, %%"REG_d"), %%mm2       \n\t"
-                "movd        21(%0, %%"REG_d"), %%mm3       \n\t"
-                "punpcklbw               %%mm7, %%mm2       \n\t"
-                "punpcklbw               %%mm7, %%mm3       \n\t"
-                "pmaddwd                 %%mm6, %%mm4       \n\t"
-                "pmaddwd                 %%mm6, %%mm1       \n\t"
-                "pmaddwd                 %%mm6, %%mm2       \n\t"
-                "pmaddwd                 %%mm6, %%mm3       \n\t"
-                "psrad                      $8, %%mm4       \n\t"
-                "psrad                      $8, %%mm1       \n\t"
-                "psrad                      $8, %%mm2       \n\t"
-                "psrad                      $8, %%mm3       \n\t"
-                "packssdw                %%mm1, %%mm4       \n\t"
-                "packssdw                %%mm3, %%mm2       \n\t"
-                "pmaddwd                 %%mm5, %%mm4       \n\t"
-                "pmaddwd                 %%mm5, %%mm2       \n\t"
-                "add                       $24, %%"REG_d"   \n\t"
-                "packssdw                %%mm2, %%mm4       \n\t"
-                "psraw                      $7, %%mm4       \n\t"
-
-                "packuswb                %%mm4, %%mm0       \n\t"
-                "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0    \n\t"
-
-                MOVNTQ"                  %%mm0, (%1, %%"REG_a") \n\t"
-                "add                        $8,      %%"REG_a"  \n\t"
-                " js                        1b                  \n\t"
-                : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width), "r"(rgb2yuv)
-                : "%"REG_a, "%"REG_d
-            );
-            ydst += lumStride;
-            src  += srcStride;
-        }
-        src -= srcStride*2;
-        __asm__ volatile(
-            "mov                        %4, %%"REG_a"   \n\t"
-            "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
-            "movq          "BGR2U_IDX"(%5), %%mm6       \n\t"
-            "pxor                    %%mm7, %%mm7       \n\t"
-            "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
-            "add                 %%"REG_d", %%"REG_d"   \n\t"
-            ".p2align                    4              \n\t"
-            "1:                                         \n\t"
-            PREFETCH"    64(%0, %%"REG_d")              \n\t"
-            PREFETCH"    64(%1, %%"REG_d")              \n\t"
-#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
-            "movq          (%0, %%"REG_d"), %%mm0       \n\t"
-            "movq          (%1, %%"REG_d"), %%mm1       \n\t"
-            "movq         6(%0, %%"REG_d"), %%mm2       \n\t"
-            "movq         6(%1, %%"REG_d"), %%mm3       \n\t"
-            PAVGB"                   %%mm1, %%mm0       \n\t"
-            PAVGB"                   %%mm3, %%mm2       \n\t"
-            "movq                    %%mm0, %%mm1       \n\t"
-            "movq                    %%mm2, %%mm3       \n\t"
-            "psrlq                     $24, %%mm0       \n\t"
-            "psrlq                     $24, %%mm2       \n\t"
-            PAVGB"                   %%mm1, %%mm0       \n\t"
-            PAVGB"                   %%mm3, %%mm2       \n\t"
-            "punpcklbw               %%mm7, %%mm0       \n\t"
-            "punpcklbw               %%mm7, %%mm2       \n\t"
-#else
-            "movd          (%0, %%"REG_d"), %%mm0       \n\t"
-            "movd          (%1, %%"REG_d"), %%mm1       \n\t"
-            "movd         3(%0, %%"REG_d"), %%mm2       \n\t"
-            "movd         3(%1, %%"REG_d"), %%mm3       \n\t"
-            "punpcklbw               %%mm7, %%mm0       \n\t"
-            "punpcklbw               %%mm7, %%mm1       \n\t"
-            "punpcklbw               %%mm7, %%mm2       \n\t"
-            "punpcklbw               %%mm7, %%mm3       \n\t"
-            "paddw                   %%mm1, %%mm0       \n\t"
-            "paddw                   %%mm3, %%mm2       \n\t"
-            "paddw                   %%mm2, %%mm0       \n\t"
-            "movd         6(%0, %%"REG_d"), %%mm4       \n\t"
-            "movd         6(%1, %%"REG_d"), %%mm1       \n\t"
-            "movd         9(%0, %%"REG_d"), %%mm2       \n\t"
-            "movd         9(%1, %%"REG_d"), %%mm3       \n\t"
-            "punpcklbw               %%mm7, %%mm4       \n\t"
-            "punpcklbw               %%mm7, %%mm1       \n\t"
-            "punpcklbw               %%mm7, %%mm2       \n\t"
-            "punpcklbw               %%mm7, %%mm3       \n\t"
-            "paddw                   %%mm1, %%mm4       \n\t"
-            "paddw                   %%mm3, %%mm2       \n\t"
-            "paddw                   %%mm4, %%mm2       \n\t"
-            "psrlw                      $2, %%mm0       \n\t"
-            "psrlw                      $2, %%mm2       \n\t"
-#endif
-            "movq          "BGR2V_IDX"(%5), %%mm1       \n\t"
-            "movq          "BGR2V_IDX"(%5), %%mm3       \n\t"
-
-            "pmaddwd                 %%mm0, %%mm1       \n\t"
-            "pmaddwd                 %%mm2, %%mm3       \n\t"
-            "pmaddwd                 %%mm6, %%mm0       \n\t"
-            "pmaddwd                 %%mm6, %%mm2       \n\t"
-            "psrad                      $8, %%mm0       \n\t"
-            "psrad                      $8, %%mm1       \n\t"
-            "psrad                      $8, %%mm2       \n\t"
-            "psrad                      $8, %%mm3       \n\t"
-            "packssdw                %%mm2, %%mm0       \n\t"
-            "packssdw                %%mm3, %%mm1       \n\t"
-            "pmaddwd                 %%mm5, %%mm0       \n\t"
-            "pmaddwd                 %%mm5, %%mm1       \n\t"
-            "packssdw                %%mm1, %%mm0       \n\t" // V1 V0 U1 U0
-            "psraw                      $7, %%mm0       \n\t"
-
-#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
-            "movq        12(%0, %%"REG_d"), %%mm4       \n\t"
-            "movq        12(%1, %%"REG_d"), %%mm1       \n\t"
-            "movq        18(%0, %%"REG_d"), %%mm2       \n\t"
-            "movq        18(%1, %%"REG_d"), %%mm3       \n\t"
-            PAVGB"                   %%mm1, %%mm4       \n\t"
-            PAVGB"                   %%mm3, %%mm2       \n\t"
-            "movq                    %%mm4, %%mm1       \n\t"
-            "movq                    %%mm2, %%mm3       \n\t"
-            "psrlq                     $24, %%mm4       \n\t"
-            "psrlq                     $24, %%mm2       \n\t"
-            PAVGB"                   %%mm1, %%mm4       \n\t"
-            PAVGB"                   %%mm3, %%mm2       \n\t"
-            "punpcklbw               %%mm7, %%mm4       \n\t"
-            "punpcklbw               %%mm7, %%mm2       \n\t"
-#else
-            "movd        12(%0, %%"REG_d"), %%mm4       \n\t"
-            "movd        12(%1, %%"REG_d"), %%mm1       \n\t"
-            "movd        15(%0, %%"REG_d"), %%mm2       \n\t"
-            "movd        15(%1, %%"REG_d"), %%mm3       \n\t"
-            "punpcklbw               %%mm7, %%mm4       \n\t"
-            "punpcklbw               %%mm7, %%mm1       \n\t"
-            "punpcklbw               %%mm7, %%mm2       \n\t"
-            "punpcklbw               %%mm7, %%mm3       \n\t"
-            "paddw                   %%mm1, %%mm4       \n\t"
-            "paddw                   %%mm3, %%mm2       \n\t"
-            "paddw                   %%mm2, %%mm4       \n\t"
-            "movd        18(%0, %%"REG_d"), %%mm5       \n\t"
-            "movd        18(%1, %%"REG_d"), %%mm1       \n\t"
-            "movd        21(%0, %%"REG_d"), %%mm2       \n\t"
-            "movd        21(%1, %%"REG_d"), %%mm3       \n\t"
-            "punpcklbw               %%mm7, %%mm5       \n\t"
-            "punpcklbw               %%mm7, %%mm1       \n\t"
-            "punpcklbw               %%mm7, %%mm2       \n\t"
-            "punpcklbw               %%mm7, %%mm3       \n\t"
-            "paddw                   %%mm1, %%mm5       \n\t"
-            "paddw                   %%mm3, %%mm2       \n\t"
-            "paddw                   %%mm5, %%mm2       \n\t"
-            "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
-            "psrlw                      $2, %%mm4       \n\t"
-            "psrlw                      $2, %%mm2       \n\t"
-#endif
-            "movq          "BGR2V_IDX"(%5), %%mm1       \n\t"
-            "movq          "BGR2V_IDX"(%5), %%mm3       \n\t"
-
-            "pmaddwd                 %%mm4, %%mm1       \n\t"
-            "pmaddwd                 %%mm2, %%mm3       \n\t"
-            "pmaddwd                 %%mm6, %%mm4       \n\t"
-            "pmaddwd                 %%mm6, %%mm2       \n\t"
-            "psrad                      $8, %%mm4       \n\t"
-            "psrad                      $8, %%mm1       \n\t"
-            "psrad                      $8, %%mm2       \n\t"
-            "psrad                      $8, %%mm3       \n\t"
-            "packssdw                %%mm2, %%mm4       \n\t"
-            "packssdw                %%mm3, %%mm1       \n\t"
-            "pmaddwd                 %%mm5, %%mm4       \n\t"
-            "pmaddwd                 %%mm5, %%mm1       \n\t"
-            "add                       $24, %%"REG_d"   \n\t"
-            "packssdw                %%mm1, %%mm4       \n\t" // V3 V2 U3 U2
-            "psraw                      $7, %%mm4       \n\t"
-
-            "movq                    %%mm0, %%mm1           \n\t"
-            "punpckldq               %%mm4, %%mm0           \n\t"
-            "punpckhdq               %%mm4, %%mm1           \n\t"
-            "packsswb                %%mm1, %%mm0           \n\t"
-            "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0         \n\t"
-            "movd                    %%mm0, (%2, %%"REG_a") \n\t"
-            "punpckhdq               %%mm0, %%mm0           \n\t"
-            "movd                    %%mm0, (%3, %%"REG_a") \n\t"
-            "add                        $4, %%"REG_a"       \n\t"
-            " js                        1b                  \n\t"
-            : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth), "r"(rgb2yuv)
-            : "%"REG_a, "%"REG_d
-        );
-
-        udst += chromStride;
-        vdst += chromStride;
-        src  += srcStride*2;
-    }
-
-    __asm__ volatile(EMMS"       \n\t"
-                     SFENCE"     \n\t"
-                     :::"memory");
-
-     ff_rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride, rgb2yuv);
-}
-#endif /* HAVE_7REGS */
-#endif /* !COMPILE_TEMPLATE_SSE2 */
-
-#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX
-static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest,
-                                    int width, int height, int src1Stride,
-                                    int src2Stride, int dstStride)
-{
-    int h;
-
-    for (h=0; h < height; h++) {
-        int w;
-
-#if COMPILE_TEMPLATE_SSE2
-        __asm__(
-            "xor              %%"REG_a", %%"REG_a"  \n\t"
-            "1:                                     \n\t"
-            PREFETCH" 64(%1, %%"REG_a")             \n\t"
-            PREFETCH" 64(%2, %%"REG_a")             \n\t"
-            "movdqa     (%1, %%"REG_a"), %%xmm0     \n\t"
-            "movdqa     (%1, %%"REG_a"), %%xmm1     \n\t"
-            "movdqa     (%2, %%"REG_a"), %%xmm2     \n\t"
-            "punpcklbw           %%xmm2, %%xmm0     \n\t"
-            "punpckhbw           %%xmm2, %%xmm1     \n\t"
-            "movntdq             %%xmm0,   (%0, %%"REG_a", 2)   \n\t"
-            "movntdq             %%xmm1, 16(%0, %%"REG_a", 2)   \n\t"
-            "add                    $16, %%"REG_a"  \n\t"
-            "cmp                     %3, %%"REG_a"  \n\t"
-            " jb                     1b             \n\t"
-            ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
-            : "memory", "%"REG_a""
-        );
-#else
-        __asm__(
-            "xor %%"REG_a", %%"REG_a"               \n\t"
-            "1:                                     \n\t"
-            PREFETCH" 64(%1, %%"REG_a")             \n\t"
-            PREFETCH" 64(%2, %%"REG_a")             \n\t"
-            "movq       (%1, %%"REG_a"), %%mm0      \n\t"
-            "movq      8(%1, %%"REG_a"), %%mm2      \n\t"
-            "movq                 %%mm0, %%mm1      \n\t"
-            "movq                 %%mm2, %%mm3      \n\t"
-            "movq       (%2, %%"REG_a"), %%mm4      \n\t"
-            "movq      8(%2, %%"REG_a"), %%mm5      \n\t"
-            "punpcklbw            %%mm4, %%mm0      \n\t"
-            "punpckhbw            %%mm4, %%mm1      \n\t"
-            "punpcklbw            %%mm5, %%mm2      \n\t"
-            "punpckhbw            %%mm5, %%mm3      \n\t"
-            MOVNTQ"               %%mm0,   (%0, %%"REG_a", 2)   \n\t"
-            MOVNTQ"               %%mm1,  8(%0, %%"REG_a", 2)   \n\t"
-            MOVNTQ"               %%mm2, 16(%0, %%"REG_a", 2)   \n\t"
-            MOVNTQ"               %%mm3, 24(%0, %%"REG_a", 2)   \n\t"
-            "add                    $16, %%"REG_a"  \n\t"
-            "cmp                     %3, %%"REG_a"  \n\t"
-            " jb                     1b             \n\t"
-            ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
-            : "memory", "%"REG_a
-        );
-#endif
-        for (w= (width&(~15)); w < width; w++) {
-            dest[2*w+0] = src1[w];
-            dest[2*w+1] = src2[w];
-        }
-        dest += dstStride;
-        src1 += src1Stride;
-        src2 += src2Stride;
-    }
-    __asm__(
-            EMMS"       \n\t"
-            SFENCE"     \n\t"
-            ::: "memory"
-            );
-}
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX*/
-
-#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL
-#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_YASM
-void RENAME(ff_nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
-                                const uint8_t *unused0,
-                                const uint8_t *src1,
-                                const uint8_t *src2,
-                                int w, uint32_t *unused);
-static void RENAME(deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2,
-                                      int width, int height, int srcStride,
-                                      int dst1Stride, int dst2Stride)
-{
-    int h;
-
-    for (h=0; h < height; h++) {
-        RENAME(ff_nv12ToUV)(dst1, dst2, NULL, src, NULL, width, NULL);
-        src += srcStride;
-        dst1 += dst1Stride;
-        dst2 += dst2Stride;
-    }
-    __asm__(
-            EMMS"       \n\t"
-            SFENCE"     \n\t"
-            ::: "memory"
-            );
-}
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
-#endif /* !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL */
-
-#if !COMPILE_TEMPLATE_SSE2
-#if !COMPILE_TEMPLATE_AMD3DNOW
-static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
-                                       uint8_t *dst1, uint8_t *dst2,
-                                       int width, int height,
-                                       int srcStride1, int srcStride2,
-                                       int dstStride1, int dstStride2)
-{
-    x86_reg x, y;
-    int w,h;
-    w=width/2; h=height/2;
-    __asm__ volatile(
-        PREFETCH" %0    \n\t"
-        PREFETCH" %1    \n\t"
-        ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory");
-    for (y=0;y<h;y++) {
-        const uint8_t* s1=src1+srcStride1*(y>>1);
-        uint8_t* d=dst1+dstStride1*y;
-        x=0;
-        for (;x<w-31;x+=32) {
-            __asm__ volatile(
-                PREFETCH"   32(%1,%2)        \n\t"
-                "movq         (%1,%2), %%mm0 \n\t"
-                "movq        8(%1,%2), %%mm2 \n\t"
-                "movq       16(%1,%2), %%mm4 \n\t"
-                "movq       24(%1,%2), %%mm6 \n\t"
-                "movq      %%mm0, %%mm1 \n\t"
-                "movq      %%mm2, %%mm3 \n\t"
-                "movq      %%mm4, %%mm5 \n\t"
-                "movq      %%mm6, %%mm7 \n\t"
-                "punpcklbw %%mm0, %%mm0 \n\t"
-                "punpckhbw %%mm1, %%mm1 \n\t"
-                "punpcklbw %%mm2, %%mm2 \n\t"
-                "punpckhbw %%mm3, %%mm3 \n\t"
-                "punpcklbw %%mm4, %%mm4 \n\t"
-                "punpckhbw %%mm5, %%mm5 \n\t"
-                "punpcklbw %%mm6, %%mm6 \n\t"
-                "punpckhbw %%mm7, %%mm7 \n\t"
-                MOVNTQ"    %%mm0,   (%0,%2,2)  \n\t"
-                MOVNTQ"    %%mm1,  8(%0,%2,2)  \n\t"
-                MOVNTQ"    %%mm2, 16(%0,%2,2)  \n\t"
-                MOVNTQ"    %%mm3, 24(%0,%2,2)  \n\t"
-                MOVNTQ"    %%mm4, 32(%0,%2,2)  \n\t"
-                MOVNTQ"    %%mm5, 40(%0,%2,2)  \n\t"
-                MOVNTQ"    %%mm6, 48(%0,%2,2)  \n\t"
-                MOVNTQ"    %%mm7, 56(%0,%2,2)"
-                :: "r"(d), "r"(s1), "r"(x)
-                :"memory");
-        }
-        for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
-    }
-    for (y=0;y<h;y++) {
-        const uint8_t* s2=src2+srcStride2*(y>>1);
-        uint8_t* d=dst2+dstStride2*y;
-        x=0;
-        for (;x<w-31;x+=32) {
-            __asm__ volatile(
-                PREFETCH"   32(%1,%2)        \n\t"
-                "movq         (%1,%2), %%mm0 \n\t"
-                "movq        8(%1,%2), %%mm2 \n\t"
-                "movq       16(%1,%2), %%mm4 \n\t"
-                "movq       24(%1,%2), %%mm6 \n\t"
-                "movq      %%mm0, %%mm1 \n\t"
-                "movq      %%mm2, %%mm3 \n\t"
-                "movq      %%mm4, %%mm5 \n\t"
-                "movq      %%mm6, %%mm7 \n\t"
-                "punpcklbw %%mm0, %%mm0 \n\t"
-                "punpckhbw %%mm1, %%mm1 \n\t"
-                "punpcklbw %%mm2, %%mm2 \n\t"
-                "punpckhbw %%mm3, %%mm3 \n\t"
-                "punpcklbw %%mm4, %%mm4 \n\t"
-                "punpckhbw %%mm5, %%mm5 \n\t"
-                "punpcklbw %%mm6, %%mm6 \n\t"
-                "punpckhbw %%mm7, %%mm7 \n\t"
-                MOVNTQ"    %%mm0,   (%0,%2,2)  \n\t"
-                MOVNTQ"    %%mm1,  8(%0,%2,2)  \n\t"
-                MOVNTQ"    %%mm2, 16(%0,%2,2)  \n\t"
-                MOVNTQ"    %%mm3, 24(%0,%2,2)  \n\t"
-                MOVNTQ"    %%mm4, 32(%0,%2,2)  \n\t"
-                MOVNTQ"    %%mm5, 40(%0,%2,2)  \n\t"
-                MOVNTQ"    %%mm6, 48(%0,%2,2)  \n\t"
-                MOVNTQ"    %%mm7, 56(%0,%2,2)"
-                :: "r"(d), "r"(s2), "r"(x)
-                :"memory");
-        }
-        for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
-    }
-    __asm__(
-            EMMS"       \n\t"
-            SFENCE"     \n\t"
-            ::: "memory"
-        );
-}
-
-static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
-                                        uint8_t *dst,
-                                        int width, int height,
-                                        int srcStride1, int srcStride2,
-                                        int srcStride3, int dstStride)
-{
-    x86_reg x;
-    int y,w,h;
-    w=width/2; h=height;
-    for (y=0;y<h;y++) {
-        const uint8_t* yp=src1+srcStride1*y;
-        const uint8_t* up=src2+srcStride2*(y>>2);
-        const uint8_t* vp=src3+srcStride3*(y>>2);
-        uint8_t* d=dst+dstStride*y;
-        x=0;
-        for (;x<w-7;x+=8) {
-            __asm__ volatile(
-                PREFETCH"   32(%1, %0)          \n\t"
-                PREFETCH"   32(%2, %0)          \n\t"
-                PREFETCH"   32(%3, %0)          \n\t"
-                "movq      (%1, %0, 4), %%mm0   \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
-                "movq         (%2, %0), %%mm1   \n\t" /* U0U1U2U3U4U5U6U7 */
-                "movq         (%3, %0), %%mm2   \n\t" /* V0V1V2V3V4V5V6V7 */
-                "movq            %%mm0, %%mm3   \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
-                "movq            %%mm1, %%mm4   \n\t" /* U0U1U2U3U4U5U6U7 */
-                "movq            %%mm2, %%mm5   \n\t" /* V0V1V2V3V4V5V6V7 */
-                "punpcklbw       %%mm1, %%mm1   \n\t" /* U0U0 U1U1 U2U2 U3U3 */
-                "punpcklbw       %%mm2, %%mm2   \n\t" /* V0V0 V1V1 V2V2 V3V3 */
-                "punpckhbw       %%mm4, %%mm4   \n\t" /* U4U4 U5U5 U6U6 U7U7 */
-                "punpckhbw       %%mm5, %%mm5   \n\t" /* V4V4 V5V5 V6V6 V7V7 */
-
-                "movq            %%mm1, %%mm6   \n\t"
-                "punpcklbw       %%mm2, %%mm1   \n\t" /* U0V0 U0V0 U1V1 U1V1*/
-                "punpcklbw       %%mm1, %%mm0   \n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/
-                "punpckhbw       %%mm1, %%mm3   \n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/
-                MOVNTQ"          %%mm0,  (%4, %0, 8)    \n\t"
-                MOVNTQ"          %%mm3, 8(%4, %0, 8)    \n\t"
-
-                "punpckhbw       %%mm2, %%mm6   \n\t" /* U2V2 U2V2 U3V3 U3V3*/
-                "movq     8(%1, %0, 4), %%mm0   \n\t"
-                "movq            %%mm0, %%mm3   \n\t"
-                "punpcklbw       %%mm6, %%mm0   \n\t" /* Y U2 Y V2 Y U2 Y V2*/
-                "punpckhbw       %%mm6, %%mm3   \n\t" /* Y U3 Y V3 Y U3 Y V3*/
-                MOVNTQ"          %%mm0, 16(%4, %0, 8)   \n\t"
-                MOVNTQ"          %%mm3, 24(%4, %0, 8)   \n\t"
-
-                "movq            %%mm4, %%mm6   \n\t"
-                "movq    16(%1, %0, 4), %%mm0   \n\t"
-                "movq            %%mm0, %%mm3   \n\t"
-                "punpcklbw       %%mm5, %%mm4   \n\t"
-                "punpcklbw       %%mm4, %%mm0   \n\t" /* Y U4 Y V4 Y U4 Y V4*/
-                "punpckhbw       %%mm4, %%mm3   \n\t" /* Y U5 Y V5 Y U5 Y V5*/
-                MOVNTQ"          %%mm0, 32(%4, %0, 8)   \n\t"
-                MOVNTQ"          %%mm3, 40(%4, %0, 8)   \n\t"
-
-                "punpckhbw       %%mm5, %%mm6   \n\t"
-                "movq    24(%1, %0, 4), %%mm0   \n\t"
-                "movq            %%mm0, %%mm3   \n\t"
-                "punpcklbw       %%mm6, %%mm0   \n\t" /* Y U6 Y V6 Y U6 Y V6*/
-                "punpckhbw       %%mm6, %%mm3   \n\t" /* Y U7 Y V7 Y U7 Y V7*/
-                MOVNTQ"          %%mm0, 48(%4, %0, 8)   \n\t"
-                MOVNTQ"          %%mm3, 56(%4, %0, 8)   \n\t"
-
-                : "+r" (x)
-                : "r"(yp), "r" (up), "r"(vp), "r"(d)
-                :"memory");
-        }
-        for (; x<w; x++) {
-            const int x2 = x<<2;
-            d[8*x+0] = yp[x2];
-            d[8*x+1] = up[x];
-            d[8*x+2] = yp[x2+1];
-            d[8*x+3] = vp[x];
-            d[8*x+4] = yp[x2+2];
-            d[8*x+5] = up[x];
-            d[8*x+6] = yp[x2+3];
-            d[8*x+7] = vp[x];
-        }
-    }
-    __asm__(
-            EMMS"       \n\t"
-            SFENCE"     \n\t"
-            ::: "memory"
-        );
-}
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
-
-static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count)
-{
-    dst +=   count;
-    src += 2*count;
-    count= - count;
-
-    if(count <= -16) {
-        count += 15;
-        __asm__ volatile(
-            "pcmpeqw       %%mm7, %%mm7        \n\t"
-            "psrlw            $8, %%mm7        \n\t"
-            "1:                                \n\t"
-            "movq -30(%1, %0, 2), %%mm0        \n\t"
-            "movq -22(%1, %0, 2), %%mm1        \n\t"
-            "movq -14(%1, %0, 2), %%mm2        \n\t"
-            "movq  -6(%1, %0, 2), %%mm3        \n\t"
-            "pand          %%mm7, %%mm0        \n\t"
-            "pand          %%mm7, %%mm1        \n\t"
-            "pand          %%mm7, %%mm2        \n\t"
-            "pand          %%mm7, %%mm3        \n\t"
-            "packuswb      %%mm1, %%mm0        \n\t"
-            "packuswb      %%mm3, %%mm2        \n\t"
-            MOVNTQ"        %%mm0,-15(%2, %0)   \n\t"
-            MOVNTQ"        %%mm2,- 7(%2, %0)   \n\t"
-            "add             $16, %0           \n\t"
-            " js 1b                            \n\t"
-            : "+r"(count)
-            : "r"(src), "r"(dst)
-        );
-        count -= 15;
-    }
-    while(count<0) {
-        dst[count]= src[2*count];
-        count++;
-    }
-}
-
-#if !COMPILE_TEMPLATE_AMD3DNOW
-static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
-{
-    dst0+=   count;
-    dst1+=   count;
-    src += 4*count;
-    count= - count;
-    if(count <= -8) {
-        count += 7;
-        __asm__ volatile(
-            "pcmpeqw       %%mm7, %%mm7        \n\t"
-            "psrlw            $8, %%mm7        \n\t"
-            "1:                                \n\t"
-            "movq -28(%1, %0, 4), %%mm0        \n\t"
-            "movq -20(%1, %0, 4), %%mm1        \n\t"
-            "movq -12(%1, %0, 4), %%mm2        \n\t"
-            "movq  -4(%1, %0, 4), %%mm3        \n\t"
-            "pand          %%mm7, %%mm0        \n\t"
-            "pand          %%mm7, %%mm1        \n\t"
-            "pand          %%mm7, %%mm2        \n\t"
-            "pand          %%mm7, %%mm3        \n\t"
-            "packuswb      %%mm1, %%mm0        \n\t"
-            "packuswb      %%mm3, %%mm2        \n\t"
-            "movq          %%mm0, %%mm1        \n\t"
-            "movq          %%mm2, %%mm3        \n\t"
-            "psrlw            $8, %%mm0        \n\t"
-            "psrlw            $8, %%mm2        \n\t"
-            "pand          %%mm7, %%mm1        \n\t"
-            "pand          %%mm7, %%mm3        \n\t"
-            "packuswb      %%mm2, %%mm0        \n\t"
-            "packuswb      %%mm3, %%mm1        \n\t"
-            MOVNTQ"        %%mm0,- 7(%3, %0)   \n\t"
-            MOVNTQ"        %%mm1,- 7(%2, %0)   \n\t"
-            "add              $8, %0           \n\t"
-            " js 1b                            \n\t"
-            : "+r"(count)
-            : "r"(src), "r"(dst0), "r"(dst1)
-        );
-        count -= 7;
-    }
-    while(count<0) {
-        dst0[count]= src[4*count+0];
-        dst1[count]= src[4*count+2];
-        count++;
-    }
-}
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
-
-static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
-{
-    dst0 +=   count;
-    dst1 +=   count;
-    src0 += 4*count;
-    src1 += 4*count;
-    count= - count;
-#ifdef PAVGB
-    if(count <= -8) {
-        count += 7;
-        __asm__ volatile(
-            "pcmpeqw        %%mm7, %%mm7        \n\t"
-            "psrlw             $8, %%mm7        \n\t"
-            "1:                                \n\t"
-            "movq  -28(%1, %0, 4), %%mm0        \n\t"
-            "movq  -20(%1, %0, 4), %%mm1        \n\t"
-            "movq  -12(%1, %0, 4), %%mm2        \n\t"
-            "movq   -4(%1, %0, 4), %%mm3        \n\t"
-            PAVGB" -28(%2, %0, 4), %%mm0        \n\t"
-            PAVGB" -20(%2, %0, 4), %%mm1        \n\t"
-            PAVGB" -12(%2, %0, 4), %%mm2        \n\t"
-            PAVGB" - 4(%2, %0, 4), %%mm3        \n\t"
-            "pand           %%mm7, %%mm0        \n\t"
-            "pand           %%mm7, %%mm1        \n\t"
-            "pand           %%mm7, %%mm2        \n\t"
-            "pand           %%mm7, %%mm3        \n\t"
-            "packuswb       %%mm1, %%mm0        \n\t"
-            "packuswb       %%mm3, %%mm2        \n\t"
-            "movq           %%mm0, %%mm1        \n\t"
-            "movq           %%mm2, %%mm3        \n\t"
-            "psrlw             $8, %%mm0        \n\t"
-            "psrlw             $8, %%mm2        \n\t"
-            "pand           %%mm7, %%mm1        \n\t"
-            "pand           %%mm7, %%mm3        \n\t"
-            "packuswb       %%mm2, %%mm0        \n\t"
-            "packuswb       %%mm3, %%mm1        \n\t"
-            MOVNTQ"         %%mm0,- 7(%4, %0)   \n\t"
-            MOVNTQ"         %%mm1,- 7(%3, %0)   \n\t"
-            "add               $8, %0           \n\t"
-            " js 1b                            \n\t"
-            : "+r"(count)
-            : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1)
-        );
-        count -= 7;
-    }
-#endif
-    while(count<0) {
-        dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
-        dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
-        count++;
-    }
-}
-
-#if !COMPILE_TEMPLATE_AMD3DNOW
-static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
-{
-    dst0+=   count;
-    dst1+=   count;
-    src += 4*count;
-    count= - count;
-    if(count <= -8) {
-        count += 7;
-        __asm__ volatile(
-            "pcmpeqw       %%mm7, %%mm7        \n\t"
-            "psrlw            $8, %%mm7        \n\t"
-            "1:                                \n\t"
-            "movq -28(%1, %0, 4), %%mm0        \n\t"
-            "movq -20(%1, %0, 4), %%mm1        \n\t"
-            "movq -12(%1, %0, 4), %%mm2        \n\t"
-            "movq  -4(%1, %0, 4), %%mm3        \n\t"
-            "psrlw            $8, %%mm0        \n\t"
-            "psrlw            $8, %%mm1        \n\t"
-            "psrlw            $8, %%mm2        \n\t"
-            "psrlw            $8, %%mm3        \n\t"
-            "packuswb      %%mm1, %%mm0        \n\t"
-            "packuswb      %%mm3, %%mm2        \n\t"
-            "movq          %%mm0, %%mm1        \n\t"
-            "movq          %%mm2, %%mm3        \n\t"
-            "psrlw            $8, %%mm0        \n\t"
-            "psrlw            $8, %%mm2        \n\t"
-            "pand          %%mm7, %%mm1        \n\t"
-            "pand          %%mm7, %%mm3        \n\t"
-            "packuswb      %%mm2, %%mm0        \n\t"
-            "packuswb      %%mm3, %%mm1        \n\t"
-            MOVNTQ"        %%mm0,- 7(%3, %0)   \n\t"
-            MOVNTQ"        %%mm1,- 7(%2, %0)   \n\t"
-            "add              $8, %0           \n\t"
-            " js 1b                            \n\t"
-            : "+r"(count)
-            : "r"(src), "r"(dst0), "r"(dst1)
-        );
-        count -= 7;
-    }
-    src++;
-    while(count<0) {
-        dst0[count]= src[4*count+0];
-        dst1[count]= src[4*count+2];
-        count++;
-    }
-}
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
-
-static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
-{
-    dst0 +=   count;
-    dst1 +=   count;
-    src0 += 4*count;
-    src1 += 4*count;
-    count= - count;
-#ifdef PAVGB
-    if(count <= -8) {
-        count += 7;
-        __asm__ volatile(
-            "pcmpeqw        %%mm7, %%mm7        \n\t"
-            "psrlw             $8, %%mm7        \n\t"
-            "1:                                \n\t"
-            "movq  -28(%1, %0, 4), %%mm0        \n\t"
-            "movq  -20(%1, %0, 4), %%mm1        \n\t"
-            "movq  -12(%1, %0, 4), %%mm2        \n\t"
-            "movq   -4(%1, %0, 4), %%mm3        \n\t"
-            PAVGB" -28(%2, %0, 4), %%mm0        \n\t"
-            PAVGB" -20(%2, %0, 4), %%mm1        \n\t"
-            PAVGB" -12(%2, %0, 4), %%mm2        \n\t"
-            PAVGB" - 4(%2, %0, 4), %%mm3        \n\t"
-            "psrlw             $8, %%mm0        \n\t"
-            "psrlw             $8, %%mm1        \n\t"
-            "psrlw             $8, %%mm2        \n\t"
-            "psrlw             $8, %%mm3        \n\t"
-            "packuswb       %%mm1, %%mm0        \n\t"
-            "packuswb       %%mm3, %%mm2        \n\t"
-            "movq           %%mm0, %%mm1        \n\t"
-            "movq           %%mm2, %%mm3        \n\t"
-            "psrlw             $8, %%mm0        \n\t"
-            "psrlw             $8, %%mm2        \n\t"
-            "pand           %%mm7, %%mm1        \n\t"
-            "pand           %%mm7, %%mm3        \n\t"
-            "packuswb       %%mm2, %%mm0        \n\t"
-            "packuswb       %%mm3, %%mm1        \n\t"
-            MOVNTQ"         %%mm0,- 7(%4, %0)   \n\t"
-            MOVNTQ"         %%mm1,- 7(%3, %0)   \n\t"
-            "add               $8, %0           \n\t"
-            " js 1b                            \n\t"
-            : "+r"(count)
-            : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1)
-        );
-        count -= 7;
-    }
-#endif
-    src0++;
-    src1++;
-    while(count<0) {
-        dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
-        dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
-        count++;
-    }
-}
-
-static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                                 int width, int height,
-                                 int lumStride, int chromStride, int srcStride)
-{
-    int y;
-    const int chromWidth = FF_CEIL_RSHIFT(width, 1);
-
-    for (y=0; y<height; y++) {
-        RENAME(extract_even)(src, ydst, width);
-        if(y&1) {
-            RENAME(extract_odd2avg)(src-srcStride, src, udst, vdst, chromWidth);
-            udst+= chromStride;
-            vdst+= chromStride;
-        }
-
-        src += srcStride;
-        ydst+= lumStride;
-    }
-    __asm__(
-            EMMS"       \n\t"
-            SFENCE"     \n\t"
-            ::: "memory"
-        );
-}
-
-#if !COMPILE_TEMPLATE_AMD3DNOW
-static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                                 int width, int height,
-                                 int lumStride, int chromStride, int srcStride)
-{
-    int y;
-    const int chromWidth = FF_CEIL_RSHIFT(width, 1);
-
-    for (y=0; y<height; y++) {
-        RENAME(extract_even)(src, ydst, width);
-        RENAME(extract_odd2)(src, udst, vdst, chromWidth);
-
-        src += srcStride;
-        ydst+= lumStride;
-        udst+= chromStride;
-        vdst+= chromStride;
-    }
-    __asm__(
-            EMMS"       \n\t"
-            SFENCE"     \n\t"
-            ::: "memory"
-        );
-}
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
-
-static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                                 int width, int height,
-                                 int lumStride, int chromStride, int srcStride)
-{
-    int y;
-    const int chromWidth = FF_CEIL_RSHIFT(width, 1);
-
-    for (y=0; y<height; y++) {
-        RENAME(extract_even)(src+1, ydst, width);
-        if(y&1) {
-            RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth);
-            udst+= chromStride;
-            vdst+= chromStride;
-        }
-
-        src += srcStride;
-        ydst+= lumStride;
-    }
-    __asm__(
-            EMMS"       \n\t"
-            SFENCE"     \n\t"
-            ::: "memory"
-        );
-}
-
-#if !COMPILE_TEMPLATE_AMD3DNOW
-static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                                 int width, int height,
-                                 int lumStride, int chromStride, int srcStride)
-{
-    int y;
-    const int chromWidth = FF_CEIL_RSHIFT(width, 1);
-
-    for (y=0; y<height; y++) {
-        RENAME(extract_even)(src+1, ydst, width);
-        RENAME(extract_even2)(src, udst, vdst, chromWidth);
-
-        src += srcStride;
-        ydst+= lumStride;
-        udst+= chromStride;
-        vdst+= chromStride;
-    }
-    __asm__(
-            EMMS"       \n\t"
-            SFENCE"     \n\t"
-            ::: "memory"
-        );
-}
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
-#endif /* !COMPILE_TEMPLATE_SSE2 */
-
-static av_cold void RENAME(rgb2rgb_init)(void)
-{
-#if !COMPILE_TEMPLATE_SSE2
-#if !COMPILE_TEMPLATE_AMD3DNOW
-    rgb15to16          = RENAME(rgb15to16);
-    rgb15tobgr24       = RENAME(rgb15tobgr24);
-    rgb15to32          = RENAME(rgb15to32);
-    rgb16tobgr24       = RENAME(rgb16tobgr24);
-    rgb16to32          = RENAME(rgb16to32);
-    rgb16to15          = RENAME(rgb16to15);
-    rgb24tobgr16       = RENAME(rgb24tobgr16);
-    rgb24tobgr15       = RENAME(rgb24tobgr15);
-    rgb24tobgr32       = RENAME(rgb24tobgr32);
-    rgb32to16          = RENAME(rgb32to16);
-    rgb32to15          = RENAME(rgb32to15);
-    rgb32tobgr24       = RENAME(rgb32tobgr24);
-    rgb24to15          = RENAME(rgb24to15);
-    rgb24to16          = RENAME(rgb24to16);
-    rgb24tobgr24       = RENAME(rgb24tobgr24);
-    shuffle_bytes_2103 = RENAME(shuffle_bytes_2103);
-    rgb32tobgr16       = RENAME(rgb32tobgr16);
-    rgb32tobgr15       = RENAME(rgb32tobgr15);
-    yv12toyuy2         = RENAME(yv12toyuy2);
-    yv12touyvy         = RENAME(yv12touyvy);
-    yuv422ptoyuy2      = RENAME(yuv422ptoyuy2);
-    yuv422ptouyvy      = RENAME(yuv422ptouyvy);
-    yuy2toyv12         = RENAME(yuy2toyv12);
-    vu9_to_vu12        = RENAME(vu9_to_vu12);
-    yvu9_to_yuy2       = RENAME(yvu9_to_yuy2);
-    uyvytoyuv422       = RENAME(uyvytoyuv422);
-    yuyvtoyuv422       = RENAME(yuyvtoyuv422);
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
-
-#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
-    planar2x           = RENAME(planar2x);
-#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */
-#if HAVE_7REGS
-    ff_rgb24toyv12     = RENAME(rgb24toyv12);
-#endif /* HAVE_7REGS */
-
-    yuyvtoyuv420       = RENAME(yuyvtoyuv420);
-    uyvytoyuv420       = RENAME(uyvytoyuv420);
-#endif /* !COMPILE_TEMPLATE_SSE2 */
-
-#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX
-    interleaveBytes    = RENAME(interleaveBytes);
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW  && !COMPILE_TEMPLATE_AVX*/
-#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL
-#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_YASM
-    deinterleaveBytes  = RENAME(deinterleaveBytes);
-#endif
-#endif
-}
diff --git a/ffmpeg/libswscale/x86/scale.asm b/ffmpeg/libswscale/x86/scale.asm
deleted file mode 100644
index 940f357..0000000
--- a/ffmpeg/libswscale/x86/scale.asm
+++ /dev/null
@@ -1,431 +0,0 @@
-;******************************************************************************
-;* x86-optimized horizontal line scaling functions
-;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
-;*
-;* This file is part of FFmpeg.
-;*
-;* FFmpeg is free software; you can redistribute it and/or
-;* modify it under the terms of the GNU Lesser General Public
-;* License as published by the Free Software Foundation; either
-;* version 2.1 of the License, or (at your option) any later version.
-;*
-;* FFmpeg is distributed in the hope that it will be useful,
-;* but WITHOUT ANY WARRANTY; without even the implied warranty of
-;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-;* Lesser General Public License for more details.
-;*
-;* You should have received a copy of the GNU Lesser General Public
-;* License along with FFmpeg; if not, write to the Free Software
-;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-;******************************************************************************
-
-%include "libavutil/x86/x86util.asm"
-
-SECTION_RODATA
-
-max_19bit_int: times 4 dd 0x7ffff
-max_19bit_flt: times 4 dd 524287.0
-minshort:      times 8 dw 0x8000
-unicoeff:      times 4 dd 0x20000000
-
-SECTION .text
-
-;-----------------------------------------------------------------------------
-; horizontal line scaling
-;
-; void hscale<source_width>to<intermediate_nbits>_<filterSize>_<opt>
-;                               (SwsContext *c, int{16,32}_t *dst,
-;                                int dstW, const uint{8,16}_t *src,
-;                                const int16_t *filter,
-;                                const int32_t *filterPos, int filterSize);
-;
-; Scale one horizontal line. Input is either 8-bits width or 16-bits width
-; ($source_width can be either 8, 9, 10 or 16, difference is whether we have to
-; downscale before multiplying). Filter is 14-bits. Output is either 15bits
-; (in int16_t) or 19bits (in int32_t), as given in $intermediate_nbits. Each
-; output pixel is generated from $filterSize input pixels, the position of
-; the first pixel is given in filterPos[nOutputPixel].
-;-----------------------------------------------------------------------------
-
-; SCALE_FUNC source_width, intermediate_nbits, filtersize, filtersuffix, n_args, n_xmm
-%macro SCALE_FUNC 6
-%ifnidn %3, X
-cglobal hscale%1to%2_%4, %5, 7, %6, pos0, dst, w, src, filter, fltpos, pos1
-%else
-cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsize
-%endif
-%if ARCH_X86_64
-    movsxd        wq, wd
-%define mov32 movsxd
-%else ; x86-32
-%define mov32 mov
-%endif ; x86-64
-%if %2 == 19
-%if mmsize == 8 ; mmx
-    mova          m2, [max_19bit_int]
-%elif cpuflag(sse4)
-    mova          m2, [max_19bit_int]
-%else ; ssse3/sse2
-    mova          m2, [max_19bit_flt]
-%endif ; mmx/sse2/ssse3/sse4
-%endif ; %2 == 19
-%if %1 == 16
-    mova          m6, [minshort]
-    mova          m7, [unicoeff]
-%elif %1 == 8
-    pxor          m3, m3
-%endif ; %1 == 8/16
-
-%if %1 == 8
-%define movlh movd
-%define movbh movh
-%define srcmul 1
-%else ; %1 == 9-16
-%define movlh movq
-%define movbh movu
-%define srcmul 2
-%endif ; %1 == 8/9-16
-
-%ifnidn %3, X
-
-    ; setup loop
-%if %3 == 8
-    shl           wq, 1                         ; this allows *16 (i.e. now *8) in lea instructions for the 8-tap filter
-%define wshr 1
-%else ; %3 == 4
-%define wshr 0
-%endif ; %3 == 8
-    lea      filterq, [filterq+wq*8]
-%if %2 == 15
-    lea         dstq, [dstq+wq*(2>>wshr)]
-%else ; %2 == 19
-    lea         dstq, [dstq+wq*(4>>wshr)]
-%endif ; %2 == 15/19
-    lea      fltposq, [fltposq+wq*(4>>wshr)]
-    neg           wq
-
-.loop:
-%if %3 == 4 ; filterSize == 4 scaling
-    ; load 2x4 or 4x4 source pixels into m0/m1
-    mov32      pos0q, dword [fltposq+wq*4+ 0]   ; filterPos[0]
-    mov32      pos1q, dword [fltposq+wq*4+ 4]   ; filterPos[1]
-    movlh         m0, [srcq+pos0q*srcmul]       ; src[filterPos[0] + {0,1,2,3}]
-%if mmsize == 8
-    movlh         m1, [srcq+pos1q*srcmul]       ; src[filterPos[1] + {0,1,2,3}]
-%else ; mmsize == 16
-%if %1 > 8
-    movhps        m0, [srcq+pos1q*srcmul]       ; src[filterPos[1] + {0,1,2,3}]
-%else ; %1 == 8
-    movd          m4, [srcq+pos1q*srcmul]       ; src[filterPos[1] + {0,1,2,3}]
-%endif
-    mov32      pos0q, dword [fltposq+wq*4+ 8]   ; filterPos[2]
-    mov32      pos1q, dword [fltposq+wq*4+12]   ; filterPos[3]
-    movlh         m1, [srcq+pos0q*srcmul]       ; src[filterPos[2] + {0,1,2,3}]
-%if %1 > 8
-    movhps        m1, [srcq+pos1q*srcmul]       ; src[filterPos[3] + {0,1,2,3}]
-%else ; %1 == 8
-    movd          m5, [srcq+pos1q*srcmul]       ; src[filterPos[3] + {0,1,2,3}]
-    punpckldq     m0, m4
-    punpckldq     m1, m5
-%endif ; %1 == 8
-%endif ; mmsize == 8/16
-%if %1 == 8
-    punpcklbw     m0, m3                        ; byte -> word
-    punpcklbw     m1, m3                        ; byte -> word
-%endif ; %1 == 8
-
-    ; multiply with filter coefficients
-%if %1 == 16 ; pmaddwd needs signed adds, so this moves unsigned -> signed, we'll
-             ; add back 0x8000 * sum(coeffs) after the horizontal add
-    psubw         m0, m6
-    psubw         m1, m6
-%endif ; %1 == 16
-    pmaddwd       m0, [filterq+wq*8+mmsize*0]   ; *= filter[{0,1,..,6,7}]
-    pmaddwd       m1, [filterq+wq*8+mmsize*1]   ; *= filter[{8,9,..,14,15}]
-
-    ; add up horizontally (4 srcpix * 4 coefficients -> 1 dstpix)
-%if mmsize == 8 ; mmx
-    movq          m4, m0
-    punpckldq     m0, m1
-    punpckhdq     m4, m1
-    paddd         m0, m4
-%elif notcpuflag(ssse3) ; sse2
-    mova          m4, m0
-    shufps        m0, m1, 10001000b
-    shufps        m4, m1, 11011101b
-    paddd         m0, m4
-%else ; ssse3/sse4
-    phaddd        m0, m1                        ; filter[{ 0, 1, 2, 3}]*src[filterPos[0]+{0,1,2,3}],
-                                                ; filter[{ 4, 5, 6, 7}]*src[filterPos[1]+{0,1,2,3}],
-                                                ; filter[{ 8, 9,10,11}]*src[filterPos[2]+{0,1,2,3}],
-                                                ; filter[{12,13,14,15}]*src[filterPos[3]+{0,1,2,3}]
-%endif ; mmx/sse2/ssse3/sse4
-%else ; %3 == 8, i.e. filterSize == 8 scaling
-    ; load 2x8 or 4x8 source pixels into m0, m1, m4 and m5
-    mov32      pos0q, dword [fltposq+wq*2+0]    ; filterPos[0]
-    mov32      pos1q, dword [fltposq+wq*2+4]    ; filterPos[1]
-    movbh         m0, [srcq+ pos0q   *srcmul]   ; src[filterPos[0] + {0,1,2,3,4,5,6,7}]
-%if mmsize == 8
-    movbh         m1, [srcq+(pos0q+4)*srcmul]   ; src[filterPos[0] + {4,5,6,7}]
-    movbh         m4, [srcq+ pos1q   *srcmul]   ; src[filterPos[1] + {0,1,2,3}]
-    movbh         m5, [srcq+(pos1q+4)*srcmul]   ; src[filterPos[1] + {4,5,6,7}]
-%else ; mmsize == 16
-    movbh         m1, [srcq+ pos1q   *srcmul]   ; src[filterPos[1] + {0,1,2,3,4,5,6,7}]
-    mov32      pos0q, dword [fltposq+wq*2+8]    ; filterPos[2]
-    mov32      pos1q, dword [fltposq+wq*2+12]   ; filterPos[3]
-    movbh         m4, [srcq+ pos0q   *srcmul]   ; src[filterPos[2] + {0,1,2,3,4,5,6,7}]
-    movbh         m5, [srcq+ pos1q   *srcmul]   ; src[filterPos[3] + {0,1,2,3,4,5,6,7}]
-%endif ; mmsize == 8/16
-%if %1 == 8
-    punpcklbw     m0, m3                        ; byte -> word
-    punpcklbw     m1, m3                        ; byte -> word
-    punpcklbw     m4, m3                        ; byte -> word
-    punpcklbw     m5, m3                        ; byte -> word
-%endif ; %1 == 8
-
-    ; multiply
-%if %1 == 16 ; pmaddwd needs signed adds, so this moves unsigned -> signed, we'll
-             ; add back 0x8000 * sum(coeffs) after the horizontal add
-    psubw         m0, m6
-    psubw         m1, m6
-    psubw         m4, m6
-    psubw         m5, m6
-%endif ; %1 == 16
-    pmaddwd       m0, [filterq+wq*8+mmsize*0]   ; *= filter[{0,1,..,6,7}]
-    pmaddwd       m1, [filterq+wq*8+mmsize*1]   ; *= filter[{8,9,..,14,15}]
-    pmaddwd       m4, [filterq+wq*8+mmsize*2]   ; *= filter[{16,17,..,22,23}]
-    pmaddwd       m5, [filterq+wq*8+mmsize*3]   ; *= filter[{24,25,..,30,31}]
-
-    ; add up horizontally (8 srcpix * 8 coefficients -> 1 dstpix)
-%if mmsize == 8
-    paddd         m0, m1
-    paddd         m4, m5
-    movq          m1, m0
-    punpckldq     m0, m4
-    punpckhdq     m1, m4
-    paddd         m0, m1
-%elif notcpuflag(ssse3) ; sse2
-%if %1 == 8
-%define mex m6
-%else
-%define mex m3
-%endif
-    ; emulate horizontal add as transpose + vertical add
-    mova         mex, m0
-    punpckldq     m0, m1
-    punpckhdq    mex, m1
-    paddd         m0, mex
-    mova          m1, m4
-    punpckldq     m4, m5
-    punpckhdq     m1, m5
-    paddd         m4, m1
-    mova          m1, m0
-    punpcklqdq    m0, m4
-    punpckhqdq    m1, m4
-    paddd         m0, m1
-%else ; ssse3/sse4
-    ; FIXME if we rearrange the filter in pairs of 4, we can
-    ; load pixels likewise and use 2 x paddd + phaddd instead
-    ; of 3 x phaddd here, faster on older cpus
-    phaddd        m0, m1
-    phaddd        m4, m5
-    phaddd        m0, m4                        ; filter[{ 0, 1,..., 6, 7}]*src[filterPos[0]+{0,1,...,6,7}],
-                                                ; filter[{ 8, 9,...,14,15}]*src[filterPos[1]+{0,1,...,6,7}],
-                                                ; filter[{16,17,...,22,23}]*src[filterPos[2]+{0,1,...,6,7}],
-                                                ; filter[{24,25,...,30,31}]*src[filterPos[3]+{0,1,...,6,7}]
-%endif ; mmx/sse2/ssse3/sse4
-%endif ; %3 == 4/8
-
-%else ; %3 == X, i.e. any filterSize scaling
-
-%ifidn %4, X4
-%define dlt 4
-%else ; %4 == X || %4 == X8
-%define dlt 0
-%endif ; %4 ==/!= X4
-%if ARCH_X86_64
-%define srcq    r8
-%define pos1q   r7
-%define srcendq r9
-    movsxd  fltsizeq, fltsized                  ; filterSize
-    lea      srcendq, [srcmemq+(fltsizeq-dlt)*srcmul] ; &src[filterSize&~4]
-%else ; x86-32
-%define srcq    srcmemq
-%define pos1q   dstq
-%define srcendq r6m
-    lea        pos0q, [srcmemq+(fltsizeq-dlt)*srcmul] ; &src[filterSize&~4]
-    mov      srcendq, pos0q
-%endif ; x86-32/64
-    lea      fltposq, [fltposq+wq*4]
-%if %2 == 15
-    lea         dstq, [dstq+wq*2]
-%else ; %2 == 19
-    lea         dstq, [dstq+wq*4]
-%endif ; %2 == 15/19
-    movifnidn  dstmp, dstq
-    neg           wq
-
-.loop:
-    mov32      pos0q, dword [fltposq+wq*4+0]    ; filterPos[0]
-    mov32      pos1q, dword [fltposq+wq*4+4]    ; filterPos[1]
-    ; FIXME maybe do 4px/iteration on x86-64 (x86-32 wouldn't have enough regs)?
-    pxor          m4, m4
-    pxor          m5, m5
-    mov         srcq, srcmemmp
-
-.innerloop:
-    ; load 2x4 (mmx) or 2x8 (sse) source pixels into m0/m1 -> m4/m5
-    movbh         m0, [srcq+ pos0q     *srcmul] ; src[filterPos[0] + {0,1,2,3(,4,5,6,7)}]
-    movbh         m1, [srcq+(pos1q+dlt)*srcmul] ; src[filterPos[1] + {0,1,2,3(,4,5,6,7)}]
-%if %1 == 8
-    punpcklbw     m0, m3
-    punpcklbw     m1, m3
-%endif ; %1 == 8
-
-    ; multiply
-%if %1 == 16 ; pmaddwd needs signed adds, so this moves unsigned -> signed, we'll
-             ; add back 0x8000 * sum(coeffs) after the horizontal add
-    psubw         m0, m6
-    psubw         m1, m6
-%endif ; %1 == 16
-    pmaddwd       m0, [filterq]                 ; filter[{0,1,2,3(,4,5,6,7)}]
-    pmaddwd       m1, [filterq+(fltsizeq+dlt)*2]; filter[filtersize+{0,1,2,3(,4,5,6,7)}]
-    paddd         m4, m0
-    paddd         m5, m1
-    add      filterq, mmsize
-    add         srcq, srcmul*mmsize/2
-    cmp         srcq, srcendq                   ; while (src += 4) < &src[filterSize]
-    jl .innerloop
-
-%ifidn %4, X4
-    mov32      pos1q, dword [fltposq+wq*4+4]    ; filterPos[1]
-    movlh         m0, [srcq+ pos0q     *srcmul] ; split last 4 srcpx of dstpx[0]
-    sub        pos1q, fltsizeq                  ; and first 4 srcpx of dstpx[1]
-%if %1 > 8
-    movhps        m0, [srcq+(pos1q+dlt)*srcmul]
-%else ; %1 == 8
-    movd          m1, [srcq+(pos1q+dlt)*srcmul]
-    punpckldq     m0, m1
-%endif ; %1 == 8
-%if %1 == 8
-    punpcklbw     m0, m3
-%endif ; %1 == 8
-%if %1 == 16 ; pmaddwd needs signed adds, so this moves unsigned -> signed, we'll
-             ; add back 0x8000 * sum(coeffs) after the horizontal add
-    psubw         m0, m6
-%endif ; %1 == 16
-    pmaddwd       m0, [filterq]
-%endif ; %4 == X4
-
-    lea      filterq, [filterq+(fltsizeq+dlt)*2]
-
-%if mmsize == 8 ; mmx
-    movq          m0, m4
-    punpckldq     m4, m5
-    punpckhdq     m0, m5
-    paddd         m0, m4
-%else ; mmsize == 16
-%if notcpuflag(ssse3) ; sse2
-    mova          m1, m4
-    punpcklqdq    m4, m5
-    punpckhqdq    m1, m5
-    paddd         m4, m1
-%else ; ssse3/sse4
-    phaddd        m4, m5
-%endif ; sse2/ssse3/sse4
-%ifidn %4, X4
-    paddd         m4, m0
-%endif ; %3 == X4
-%if notcpuflag(ssse3) ; sse2
-    pshufd        m4, m4, 11011000b
-    movhlps       m0, m4
-    paddd         m0, m4
-%else ; ssse3/sse4
-    phaddd        m4, m4
-    SWAP           0, 4
-%endif ; sse2/ssse3/sse4
-%endif ; mmsize == 8/16
-%endif ; %3 ==/!= X
-
-%if %1 == 16 ; add 0x8000 * sum(coeffs), i.e. back from signed -> unsigned
-    paddd         m0, m7
-%endif ; %1 == 16
-
-    ; clip, store
-    psrad         m0, 14 + %1 - %2
-%ifidn %3, X
-    movifnidn   dstq, dstmp
-%endif ; %3 == X
-%if %2 == 15
-    packssdw      m0, m0
-%ifnidn %3, X
-    movh [dstq+wq*(2>>wshr)], m0
-%else ; %3 == X
-    movd [dstq+wq*2], m0
-%endif ; %3 ==/!= X
-%else ; %2 == 19
-%if mmsize == 8
-    PMINSD_MMX    m0, m2, m4
-%elif cpuflag(sse4)
-    pminsd        m0, m2
-%else ; sse2/ssse3
-    cvtdq2ps      m0, m0
-    minps         m0, m2
-    cvtps2dq      m0, m0
-%endif ; mmx/sse2/ssse3/sse4
-%ifnidn %3, X
-    mova [dstq+wq*(4>>wshr)], m0
-%else ; %3 == X
-    movq [dstq+wq*4], m0
-%endif ; %3 ==/!= X
-%endif ; %2 == 15/19
-%ifnidn %3, X
-    add           wq, (mmsize<<wshr)/4          ; both 8tap and 4tap really only do 4 pixels (or for mmx: 2 pixels)
-                                                ; per iteration. see "shl wq,1" above as for why we do this
-%else ; %3 == X
-    add           wq, 2
-%endif ; %3 ==/!= X
-    jl .loop
-    REP_RET
-%endmacro
-
-; SCALE_FUNCS source_width, intermediate_nbits, n_xmm
-%macro SCALE_FUNCS 3
-SCALE_FUNC %1, %2, 4, 4,  6, %3
-SCALE_FUNC %1, %2, 8, 8,  6, %3
-%if mmsize == 8
-SCALE_FUNC %1, %2, X, X,  7, %3
-%else
-SCALE_FUNC %1, %2, X, X4, 7, %3
-SCALE_FUNC %1, %2, X, X8, 7, %3
-%endif
-%endmacro
-
-; SCALE_FUNCS2 8_xmm_args, 9to10_xmm_args, 16_xmm_args
-%macro SCALE_FUNCS2 3
-%if notcpuflag(sse4)
-SCALE_FUNCS  8, 15, %1
-SCALE_FUNCS  9, 15, %2
-SCALE_FUNCS 10, 15, %2
-SCALE_FUNCS 12, 15, %2
-SCALE_FUNCS 14, 15, %2
-SCALE_FUNCS 16, 15, %3
-%endif ; !sse4
-SCALE_FUNCS  8, 19, %1
-SCALE_FUNCS  9, 19, %2
-SCALE_FUNCS 10, 19, %2
-SCALE_FUNCS 12, 19, %2
-SCALE_FUNCS 14, 19, %2
-SCALE_FUNCS 16, 19, %3
-%endmacro
-
-%if ARCH_X86_32
-INIT_MMX mmx
-SCALE_FUNCS2 0, 0, 0
-%endif
-INIT_XMM sse2
-SCALE_FUNCS2 6, 7, 8
-INIT_XMM ssse3
-SCALE_FUNCS2 6, 6, 8
-INIT_XMM sse4
-SCALE_FUNCS2 6, 6, 8
diff --git a/ffmpeg/libswscale/x86/swscale.c b/ffmpeg/libswscale/x86/swscale.c
deleted file mode 100644
index 2f7e4f7..0000000
--- a/ffmpeg/libswscale/x86/swscale.c
+++ /dev/null
@@ -1,580 +0,0 @@
-/*
- * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <inttypes.h>
-#include "config.h"
-#include "libswscale/swscale.h"
-#include "libswscale/swscale_internal.h"
-#include "libavutil/attributes.h"
-#include "libavutil/avassert.h"
-#include "libavutil/intreadwrite.h"
-#include "libavutil/x86/asm.h"
-#include "libavutil/x86/cpu.h"
-#include "libavutil/cpu.h"
-#include "libavutil/pixdesc.h"
-
-#if HAVE_INLINE_ASM
-
-#define DITHER1XBPP
-
-DECLARE_ASM_CONST(8, uint64_t, bF8)=       0xF8F8F8F8F8F8F8F8LL;
-DECLARE_ASM_CONST(8, uint64_t, bFC)=       0xFCFCFCFCFCFCFCFCLL;
-DECLARE_ASM_CONST(8, uint64_t, w10)=       0x0010001000100010LL;
-DECLARE_ASM_CONST(8, uint64_t, w02)=       0x0002000200020002LL;
-
-const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
-    0x0103010301030103LL,
-    0x0200020002000200LL,};
-
-const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
-    0x0602060206020602LL,
-    0x0004000400040004LL,};
-
-DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
-DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
-DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
-DECLARE_ASM_CONST(8, uint64_t, b15Mask)=   0x001F001F001F001FLL;
-DECLARE_ASM_CONST(8, uint64_t, g15Mask)=   0x03E003E003E003E0LL;
-DECLARE_ASM_CONST(8, uint64_t, r15Mask)=   0x7C007C007C007C00LL;
-
-DECLARE_ALIGNED(8, const uint64_t, ff_M24A)         = 0x00FF0000FF0000FFLL;
-DECLARE_ALIGNED(8, const uint64_t, ff_M24B)         = 0xFF0000FF0000FF00LL;
-DECLARE_ALIGNED(8, const uint64_t, ff_M24C)         = 0x0000FF0000FF0000LL;
-
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000020E540830C8BULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000ED0FDAC23831ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00003831D0E6F6EAULL;
-
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset)  = 0x1010101010101010ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_w1111)        = 0x0001000100010001ULL;
-
-
-//MMX versions
-#if HAVE_MMX_INLINE
-#undef RENAME
-#define COMPILE_TEMPLATE_MMXEXT 0
-#define RENAME(a) a ## _mmx
-#include "swscale_template.c"
-#endif
-
-// MMXEXT versions
-#if HAVE_MMXEXT_INLINE
-#undef RENAME
-#undef COMPILE_TEMPLATE_MMXEXT
-#define COMPILE_TEMPLATE_MMXEXT 1
-#define RENAME(a) a ## _mmxext
-#include "swscale_template.c"
-#endif
-
-void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex,
-                           int lastInLumBuf, int lastInChrBuf)
-{
-    const int dstH= c->dstH;
-    const int flags= c->flags;
-    int16_t **lumPixBuf= c->lumPixBuf;
-    int16_t **chrUPixBuf= c->chrUPixBuf;
-    int16_t **alpPixBuf= c->alpPixBuf;
-    const int vLumBufSize= c->vLumBufSize;
-    const int vChrBufSize= c->vChrBufSize;
-    int32_t *vLumFilterPos= c->vLumFilterPos;
-    int32_t *vChrFilterPos= c->vChrFilterPos;
-    int16_t *vLumFilter= c->vLumFilter;
-    int16_t *vChrFilter= c->vChrFilter;
-    int32_t *lumMmxFilter= c->lumMmxFilter;
-    int32_t *chrMmxFilter= c->chrMmxFilter;
-    int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
-    const int vLumFilterSize= c->vLumFilterSize;
-    const int vChrFilterSize= c->vChrFilterSize;
-    const int chrDstY= dstY>>c->chrDstVSubSample;
-    const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
-    const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
-
-    c->blueDither= ff_dither8[dstY&1];
-    if (c->dstFormat == AV_PIX_FMT_RGB555 || c->dstFormat == AV_PIX_FMT_BGR555)
-        c->greenDither= ff_dither8[dstY&1];
-    else
-        c->greenDither= ff_dither4[dstY&1];
-    c->redDither= ff_dither8[(dstY+1)&1];
-    if (dstY < dstH - 2) {
-        const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
-        const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-        const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
-        int i;
-
-        if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
-            const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize;
-            int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
-            for (i = 0; i < neg;            i++)
-                tmpY[i] = lumSrcPtr[neg];
-            for (     ; i < end;            i++)
-                tmpY[i] = lumSrcPtr[i];
-            for (     ; i < vLumFilterSize; i++)
-                tmpY[i] = tmpY[i-1];
-            lumSrcPtr = tmpY;
-
-            if (alpSrcPtr) {
-                const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize;
-                for (i = 0; i < neg;            i++)
-                    tmpA[i] = alpSrcPtr[neg];
-                for (     ; i < end;            i++)
-                    tmpA[i] = alpSrcPtr[i];
-                for (     ; i < vLumFilterSize; i++)
-                    tmpA[i] = tmpA[i - 1];
-                alpSrcPtr = tmpA;
-            }
-        }
-        if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
-            const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize;
-            int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
-            for (i = 0; i < neg;            i++) {
-                tmpU[i] = chrUSrcPtr[neg];
-            }
-            for (     ; i < end;            i++) {
-                tmpU[i] = chrUSrcPtr[i];
-            }
-            for (     ; i < vChrFilterSize; i++) {
-                tmpU[i] = tmpU[i - 1];
-            }
-            chrUSrcPtr = tmpU;
-        }
-
-        if (flags & SWS_ACCURATE_RND) {
-            int s= APCK_SIZE / 8;
-            for (i=0; i<vLumFilterSize; i+=2) {
-                *(const void**)&lumMmxFilter[s*i              ]= lumSrcPtr[i  ];
-                *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4  ]= lumSrcPtr[i+(vLumFilterSize>1)];
-                lumMmxFilter[s*i+APCK_COEF/4  ]=
-                lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i    ]
-                + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
-                if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
-                    *(const void**)&alpMmxFilter[s*i              ]= alpSrcPtr[i  ];
-                    *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4  ]= alpSrcPtr[i+(vLumFilterSize>1)];
-                    alpMmxFilter[s*i+APCK_COEF/4  ]=
-                    alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4  ];
-                }
-            }
-            for (i=0; i<vChrFilterSize; i+=2) {
-                *(const void**)&chrMmxFilter[s*i              ]= chrUSrcPtr[i  ];
-                *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4  ]= chrUSrcPtr[i+(vChrFilterSize>1)];
-                chrMmxFilter[s*i+APCK_COEF/4  ]=
-                chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i    ]
-                + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
-            }
-        } else {
-            for (i=0; i<vLumFilterSize; i++) {
-                *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
-                lumMmxFilter[4*i+2]=
-                lumMmxFilter[4*i+3]=
-                ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
-                if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
-                    *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
-                    alpMmxFilter[4*i+2]=
-                    alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
-                }
-            }
-            for (i=0; i<vChrFilterSize; i++) {
-                *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
-                chrMmxFilter[4*i+2]=
-                chrMmxFilter[4*i+3]=
-                ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U;
-            }
-        }
-    }
-}
-
-#if HAVE_MMXEXT
-static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
-                           const int16_t **src, uint8_t *dest, int dstW,
-                           const uint8_t *dither, int offset)
-{
-    if(((int)dest) & 15){
-        return yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset);
-    }
-    if (offset) {
-        __asm__ volatile("movq       (%0), %%xmm3\n\t"
-                         "movdqa    %%xmm3, %%xmm4\n\t"
-                         "psrlq       $24, %%xmm3\n\t"
-                         "psllq       $40, %%xmm4\n\t"
-                         "por       %%xmm4, %%xmm3\n\t"
-                         :: "r"(dither)
-                         );
-    } else {
-        __asm__ volatile("movq       (%0), %%xmm3\n\t"
-                         :: "r"(dither)
-                         );
-    }
-    filterSize--;
-    __asm__ volatile(
-        "pxor      %%xmm0, %%xmm0\n\t"
-        "punpcklbw %%xmm0, %%xmm3\n\t"
-        "movd          %0, %%xmm1\n\t"
-        "punpcklwd %%xmm1, %%xmm1\n\t"
-        "punpckldq %%xmm1, %%xmm1\n\t"
-        "punpcklqdq %%xmm1, %%xmm1\n\t"
-        "psllw         $3, %%xmm1\n\t"
-        "paddw     %%xmm1, %%xmm3\n\t"
-        "psraw         $4, %%xmm3\n\t"
-        ::"m"(filterSize)
-     );
-    __asm__ volatile(
-        "movdqa    %%xmm3, %%xmm4\n\t"
-        "movdqa    %%xmm3, %%xmm7\n\t"
-        "movl %3, %%ecx\n\t"
-        "mov                                 %0, %%"REG_d"  \n\t"\
-        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
-        ".p2align                             4             \n\t" /* FIXME Unroll? */\
-        "1:                                                 \n\t"\
-        "movddup                  8(%%"REG_d"), %%xmm0      \n\t" /* filterCoeff */\
-        "movdqa              (%%"REG_S", %%"REG_c", 2), %%xmm2      \n\t" /* srcData */\
-        "movdqa            16(%%"REG_S", %%"REG_c", 2), %%xmm5      \n\t" /* srcData */\
-        "add                                $16, %%"REG_d"  \n\t"\
-        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
-        "test                         %%"REG_S", %%"REG_S"  \n\t"\
-        "pmulhw                           %%xmm0, %%xmm2      \n\t"\
-        "pmulhw                           %%xmm0, %%xmm5      \n\t"\
-        "paddw                            %%xmm2, %%xmm3      \n\t"\
-        "paddw                            %%xmm5, %%xmm4      \n\t"\
-        " jnz                                1b             \n\t"\
-        "psraw                               $3, %%xmm3      \n\t"\
-        "psraw                               $3, %%xmm4      \n\t"\
-        "packuswb                         %%xmm4, %%xmm3      \n\t"
-        "movntdq                          %%xmm3, (%1, %%"REG_c")\n\t"
-        "add                         $16, %%"REG_c"         \n\t"\
-        "cmp                          %2, %%"REG_c"         \n\t"\
-        "movdqa    %%xmm7, %%xmm3\n\t"
-        "movdqa    %%xmm7, %%xmm4\n\t"
-        "mov                                 %0, %%"REG_d"  \n\t"\
-        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
-        "jb                                  1b             \n\t"\
-        :: "g" (filter),
-           "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
-        : "%"REG_d, "%"REG_S, "%"REG_c
-    );
-}
-#endif
-
-#endif /* HAVE_INLINE_ASM */
-
-#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
-void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
-                                                SwsContext *c, int16_t *data, \
-                                                int dstW, const uint8_t *src, \
-                                                const int16_t *filter, \
-                                                const int32_t *filterPos, int filterSize)
-
-#define SCALE_FUNCS(filter_n, opt) \
-    SCALE_FUNC(filter_n,  8, 15, opt); \
-    SCALE_FUNC(filter_n,  9, 15, opt); \
-    SCALE_FUNC(filter_n, 10, 15, opt); \
-    SCALE_FUNC(filter_n, 12, 15, opt); \
-    SCALE_FUNC(filter_n, 14, 15, opt); \
-    SCALE_FUNC(filter_n, 16, 15, opt); \
-    SCALE_FUNC(filter_n,  8, 19, opt); \
-    SCALE_FUNC(filter_n,  9, 19, opt); \
-    SCALE_FUNC(filter_n, 10, 19, opt); \
-    SCALE_FUNC(filter_n, 12, 19, opt); \
-    SCALE_FUNC(filter_n, 14, 19, opt); \
-    SCALE_FUNC(filter_n, 16, 19, opt)
-
-#define SCALE_FUNCS_MMX(opt) \
-    SCALE_FUNCS(4, opt); \
-    SCALE_FUNCS(8, opt); \
-    SCALE_FUNCS(X, opt)
-
-#define SCALE_FUNCS_SSE(opt) \
-    SCALE_FUNCS(4, opt); \
-    SCALE_FUNCS(8, opt); \
-    SCALE_FUNCS(X4, opt); \
-    SCALE_FUNCS(X8, opt)
-
-#if ARCH_X86_32
-SCALE_FUNCS_MMX(mmx);
-#endif
-SCALE_FUNCS_SSE(sse2);
-SCALE_FUNCS_SSE(ssse3);
-SCALE_FUNCS_SSE(sse4);
-
-#define VSCALEX_FUNC(size, opt) \
-void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
-                                        const int16_t **src, uint8_t *dest, int dstW, \
-                                        const uint8_t *dither, int offset)
-#define VSCALEX_FUNCS(opt) \
-    VSCALEX_FUNC(8,  opt); \
-    VSCALEX_FUNC(9,  opt); \
-    VSCALEX_FUNC(10, opt)
-
-#if ARCH_X86_32
-VSCALEX_FUNCS(mmxext);
-#endif
-VSCALEX_FUNCS(sse2);
-VSCALEX_FUNCS(sse4);
-VSCALEX_FUNC(16, sse4);
-VSCALEX_FUNCS(avx);
-
-#define VSCALE_FUNC(size, opt) \
-void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
-                                        const uint8_t *dither, int offset)
-#define VSCALE_FUNCS(opt1, opt2) \
-    VSCALE_FUNC(8,  opt1); \
-    VSCALE_FUNC(9,  opt2); \
-    VSCALE_FUNC(10, opt2); \
-    VSCALE_FUNC(16, opt1)
-
-#if ARCH_X86_32
-VSCALE_FUNCS(mmx, mmxext);
-#endif
-VSCALE_FUNCS(sse2, sse2);
-VSCALE_FUNC(16, sse4);
-VSCALE_FUNCS(avx, avx);
-
-#define INPUT_Y_FUNC(fmt, opt) \
-void ff_ ## fmt ## ToY_  ## opt(uint8_t *dst, const uint8_t *src, \
-                                const uint8_t *unused1, const uint8_t *unused2, \
-                                int w, uint32_t *unused)
-#define INPUT_UV_FUNC(fmt, opt) \
-void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
-                                const uint8_t *unused0, \
-                                const uint8_t *src1, \
-                                const uint8_t *src2, \
-                                int w, uint32_t *unused)
-#define INPUT_FUNC(fmt, opt) \
-    INPUT_Y_FUNC(fmt, opt); \
-    INPUT_UV_FUNC(fmt, opt)
-#define INPUT_FUNCS(opt) \
-    INPUT_FUNC(uyvy, opt); \
-    INPUT_FUNC(yuyv, opt); \
-    INPUT_UV_FUNC(nv12, opt); \
-    INPUT_UV_FUNC(nv21, opt); \
-    INPUT_FUNC(rgba, opt); \
-    INPUT_FUNC(bgra, opt); \
-    INPUT_FUNC(argb, opt); \
-    INPUT_FUNC(abgr, opt); \
-    INPUT_FUNC(rgb24, opt); \
-    INPUT_FUNC(bgr24, opt)
-
-#if ARCH_X86_32
-INPUT_FUNCS(mmx);
-#endif
-INPUT_FUNCS(sse2);
-INPUT_FUNCS(ssse3);
-INPUT_FUNCS(avx);
-
-av_cold void ff_sws_init_swscale_x86(SwsContext *c)
-{
-    int cpu_flags = av_get_cpu_flags();
-
-#if HAVE_MMX_INLINE
-    if (cpu_flags & AV_CPU_FLAG_MMX)
-        sws_init_swscale_mmx(c);
-#endif
-#if HAVE_MMXEXT_INLINE
-    if (cpu_flags & AV_CPU_FLAG_MMXEXT)
-        sws_init_swscale_mmxext(c);
-    if (cpu_flags & AV_CPU_FLAG_SSE3){
-        if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND))
-            c->yuv2planeX = yuv2yuvX_sse3;
-    }
-#endif
-
-#define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
-    if (c->srcBpc == 8) { \
-        hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
-                                     ff_hscale8to19_ ## filtersize ## _ ## opt1; \
-    } else if (c->srcBpc == 9) { \
-        hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
-                                     ff_hscale9to19_ ## filtersize ## _ ## opt1; \
-    } else if (c->srcBpc == 10) { \
-        hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
-                                     ff_hscale10to19_ ## filtersize ## _ ## opt1; \
-    } else if (c->srcBpc == 12) { \
-        hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
-                                     ff_hscale12to19_ ## filtersize ## _ ## opt1; \
-    } else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth_minus1<15)) { \
-        hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
-                                     ff_hscale14to19_ ## filtersize ## _ ## opt1; \
-    } else { /* c->srcBpc == 16 */ \
-        av_assert0(c->srcBpc == 16);\
-        hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
-                                     ff_hscale16to19_ ## filtersize ## _ ## opt1; \
-    } \
-} while (0)
-#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
-    switch (filtersize) { \
-    case 4:  ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
-    case 8:  ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
-    default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \
-    }
-#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
-switch(c->dstBpc){ \
-    case 16:                          do_16_case;                          break; \
-    case 10: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
-    case 9:  if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_  ## opt; break; \
-    default: if (condition_8bit)    /*vscalefn = ff_yuv2planeX_8_  ## opt;*/ break; \
-    }
-#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
-    switch(c->dstBpc){ \
-    case 16: if (!isBE(c->dstFormat))            vscalefn = ff_yuv2plane1_16_ ## opt1; break; \
-    case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
-    case 9:  if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_  ## opt2;  break; \
-    case 8:                                      vscalefn = ff_yuv2plane1_8_  ## opt1;  break; \
-    default: av_assert0(c->dstBpc>8); \
-    }
-#define case_rgb(x, X, opt) \
-        case AV_PIX_FMT_ ## X: \
-            c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
-            if (!c->chrSrcHSubSample) \
-                c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
-            break
-#if ARCH_X86_32
-    if (EXTERNAL_MMX(cpu_flags)) {
-        ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
-        ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
-        ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmxext, cpu_flags & AV_CPU_FLAG_MMXEXT);
-
-        switch (c->srcFormat) {
-        case AV_PIX_FMT_Y400A:
-            c->lumToYV12 = ff_yuyvToY_mmx;
-            if (c->alpPixBuf)
-                c->alpToYV12 = ff_uyvyToY_mmx;
-            break;
-        case AV_PIX_FMT_YUYV422:
-            c->lumToYV12 = ff_yuyvToY_mmx;
-            c->chrToYV12 = ff_yuyvToUV_mmx;
-            break;
-        case AV_PIX_FMT_UYVY422:
-            c->lumToYV12 = ff_uyvyToY_mmx;
-            c->chrToYV12 = ff_uyvyToUV_mmx;
-            break;
-        case AV_PIX_FMT_NV12:
-            c->chrToYV12 = ff_nv12ToUV_mmx;
-            break;
-        case AV_PIX_FMT_NV21:
-            c->chrToYV12 = ff_nv21ToUV_mmx;
-            break;
-        case_rgb(rgb24, RGB24, mmx);
-        case_rgb(bgr24, BGR24, mmx);
-        case_rgb(bgra,  BGRA,  mmx);
-        case_rgb(rgba,  RGBA,  mmx);
-        case_rgb(abgr,  ABGR,  mmx);
-        case_rgb(argb,  ARGB,  mmx);
-        default:
-            break;
-        }
-    }
-    if (EXTERNAL_MMXEXT(cpu_flags)) {
-        ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmxext, , 1);
-    }
-#endif /* ARCH_X86_32 */
-#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
-    switch (filtersize) { \
-    case 4:  ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
-    case 8:  ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
-    default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
-             else                ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
-             break; \
-    }
-    if (EXTERNAL_SSE2(cpu_flags)) {
-        ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
-        ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
-        ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
-                            HAVE_ALIGNED_STACK || ARCH_X86_64);
-        ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1);
-
-        switch (c->srcFormat) {
-        case AV_PIX_FMT_Y400A:
-            c->lumToYV12 = ff_yuyvToY_sse2;
-            if (c->alpPixBuf)
-                c->alpToYV12 = ff_uyvyToY_sse2;
-            break;
-        case AV_PIX_FMT_YUYV422:
-            c->lumToYV12 = ff_yuyvToY_sse2;
-            c->chrToYV12 = ff_yuyvToUV_sse2;
-            break;
-        case AV_PIX_FMT_UYVY422:
-            c->lumToYV12 = ff_uyvyToY_sse2;
-            c->chrToYV12 = ff_uyvyToUV_sse2;
-            break;
-        case AV_PIX_FMT_NV12:
-            c->chrToYV12 = ff_nv12ToUV_sse2;
-            break;
-        case AV_PIX_FMT_NV21:
-            c->chrToYV12 = ff_nv21ToUV_sse2;
-            break;
-        case_rgb(rgb24, RGB24, sse2);
-        case_rgb(bgr24, BGR24, sse2);
-        case_rgb(bgra,  BGRA,  sse2);
-        case_rgb(rgba,  RGBA,  sse2);
-        case_rgb(abgr,  ABGR,  sse2);
-        case_rgb(argb,  ARGB,  sse2);
-        default:
-            break;
-        }
-    }
-    if (EXTERNAL_SSSE3(cpu_flags)) {
-        ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
-        ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
-        switch (c->srcFormat) {
-        case_rgb(rgb24, RGB24, ssse3);
-        case_rgb(bgr24, BGR24, ssse3);
-        default:
-            break;
-        }
-    }
-    if (EXTERNAL_SSE4(cpu_flags)) {
-        /* Xto15 don't need special sse4 functions */
-        ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
-        ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
-        ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4,
-                            if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4,
-                            HAVE_ALIGNED_STACK || ARCH_X86_64);
-        if (c->dstBpc == 16 && !isBE(c->dstFormat))
-            c->yuv2plane1 = ff_yuv2plane1_16_sse4;
-    }
-
-    if (EXTERNAL_AVX(cpu_flags)) {
-        ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, ,
-                            HAVE_ALIGNED_STACK || ARCH_X86_64);
-        ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
-
-        switch (c->srcFormat) {
-        case AV_PIX_FMT_YUYV422:
-            c->chrToYV12 = ff_yuyvToUV_avx;
-            break;
-        case AV_PIX_FMT_UYVY422:
-            c->chrToYV12 = ff_uyvyToUV_avx;
-            break;
-        case AV_PIX_FMT_NV12:
-            c->chrToYV12 = ff_nv12ToUV_avx;
-            break;
-        case AV_PIX_FMT_NV21:
-            c->chrToYV12 = ff_nv21ToUV_avx;
-            break;
-        case_rgb(rgb24, RGB24, avx);
-        case_rgb(bgr24, BGR24, avx);
-        case_rgb(bgra,  BGRA,  avx);
-        case_rgb(rgba,  RGBA,  avx);
-        case_rgb(abgr,  ABGR,  avx);
-        case_rgb(argb,  ARGB,  avx);
-        default:
-            break;
-        }
-    }
-}
diff --git a/ffmpeg/libswscale/x86/swscale_template.c b/ffmpeg/libswscale/x86/swscale_template.c
deleted file mode 100644
index c7a1bb4..0000000
--- a/ffmpeg/libswscale/x86/swscale_template.c
+++ /dev/null
@@ -1,1717 +0,0 @@
-/*
- * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#undef REAL_MOVNTQ
-#undef MOVNTQ
-#undef MOVNTQ2
-#undef PREFETCH
-
-#if COMPILE_TEMPLATE_MMXEXT
-#define PREFETCH "prefetchnta"
-#else
-#define PREFETCH  " # nop"
-#endif
-
-#if COMPILE_TEMPLATE_MMXEXT
-#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
-#define MOVNTQ2 "movntq "
-#else
-#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
-#define MOVNTQ2 "movq "
-#endif
-#define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)
-
-#if !COMPILE_TEMPLATE_MMXEXT
-static av_always_inline void
-dither_8to16(const uint8_t *srcDither, int rot)
-{
-    if (rot) {
-        __asm__ volatile("pxor      %%mm0, %%mm0\n\t"
-                         "movq       (%0), %%mm3\n\t"
-                         "movq      %%mm3, %%mm4\n\t"
-                         "psrlq       $24, %%mm3\n\t"
-                         "psllq       $40, %%mm4\n\t"
-                         "por       %%mm4, %%mm3\n\t"
-                         "movq      %%mm3, %%mm4\n\t"
-                         "punpcklbw %%mm0, %%mm3\n\t"
-                         "punpckhbw %%mm0, %%mm4\n\t"
-                         :: "r"(srcDither)
-                         );
-    } else {
-        __asm__ volatile("pxor      %%mm0, %%mm0\n\t"
-                         "movq       (%0), %%mm3\n\t"
-                         "movq      %%mm3, %%mm4\n\t"
-                         "punpcklbw %%mm0, %%mm3\n\t"
-                         "punpckhbw %%mm0, %%mm4\n\t"
-                         :: "r"(srcDither)
-                         );
-    }
-}
-#endif
-
-static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
-                           const int16_t **src, uint8_t *dest, int dstW,
-                           const uint8_t *dither, int offset)
-{
-    dither_8to16(dither, offset);
-    filterSize--;
-    __asm__ volatile(
-        "movd %0, %%mm1\n\t"
-        "punpcklwd %%mm1, %%mm1\n\t"
-        "punpckldq %%mm1, %%mm1\n\t"
-        "psllw        $3, %%mm1\n\t"
-        "paddw     %%mm1, %%mm3\n\t"
-        "paddw     %%mm1, %%mm4\n\t"
-        "psraw        $4, %%mm3\n\t"
-        "psraw        $4, %%mm4\n\t"
-        ::"m"(filterSize)
-     );
-
-    __asm__ volatile(\
-        "movq    %%mm3, %%mm6\n\t"
-        "movq    %%mm4, %%mm7\n\t"
-        "movl %3, %%ecx\n\t"
-        "mov                                 %0, %%"REG_d"  \n\t"\
-        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
-        ".p2align                             4             \n\t" /* FIXME Unroll? */\
-        "1:                                                 \n\t"\
-        "movq                      8(%%"REG_d"), %%mm0      \n\t" /* filterCoeff */\
-        "movq                (%%"REG_S", %%"REG_c", 2), %%mm2      \n\t" /* srcData */\
-        "movq               8(%%"REG_S", %%"REG_c", 2), %%mm5      \n\t" /* srcData */\
-        "add                                $16, %%"REG_d"  \n\t"\
-        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
-        "test                         %%"REG_S", %%"REG_S"  \n\t"\
-        "pmulhw                           %%mm0, %%mm2      \n\t"\
-        "pmulhw                           %%mm0, %%mm5      \n\t"\
-        "paddw                            %%mm2, %%mm3      \n\t"\
-        "paddw                            %%mm5, %%mm4      \n\t"\
-        " jnz                                1b             \n\t"\
-        "psraw                               $3, %%mm3      \n\t"\
-        "psraw                               $3, %%mm4      \n\t"\
-        "packuswb                         %%mm4, %%mm3      \n\t"
-        MOVNTQ2 "                         %%mm3, (%1, %%"REG_c")\n\t"
-        "add                          $8, %%"REG_c"         \n\t"\
-        "cmp                          %2, %%"REG_c"         \n\t"\
-        "movq    %%mm6, %%mm3\n\t"
-        "movq    %%mm7, %%mm4\n\t"
-        "mov                                 %0, %%"REG_d"  \n\t"\
-        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
-        "jb                                  1b             \n\t"\
-        :: "g" (filter),
-           "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
-        : "%"REG_d, "%"REG_S, "%"REG_c
-    );
-}
-
-#define YSCALEYUV2PACKEDX_UV \
-    __asm__ volatile(\
-        "xor                   %%"REG_a", %%"REG_a"     \n\t"\
-        ".p2align                      4                \n\t"\
-        "nop                                            \n\t"\
-        "1:                                             \n\t"\
-        "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\
-        "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
-        "movq      "VROUNDER_OFFSET"(%0), %%mm3         \n\t"\
-        "movq                      %%mm3, %%mm4         \n\t"\
-        ".p2align                      4                \n\t"\
-        "2:                                             \n\t"\
-        "movq               8(%%"REG_d"), %%mm0         \n\t" /* filterCoeff */\
-        "movq     (%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* UsrcData */\
-        "add                          %6, %%"REG_S"     \n\t" \
-        "movq     (%%"REG_S", %%"REG_a"), %%mm5         \n\t" /* VsrcData */\
-        "add                         $16, %%"REG_d"     \n\t"\
-        "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
-        "pmulhw                    %%mm0, %%mm2         \n\t"\
-        "pmulhw                    %%mm0, %%mm5         \n\t"\
-        "paddw                     %%mm2, %%mm3         \n\t"\
-        "paddw                     %%mm5, %%mm4         \n\t"\
-        "test                  %%"REG_S", %%"REG_S"     \n\t"\
-        " jnz                         2b                \n\t"\
-
-#define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
-    "lea                "offset"(%0), %%"REG_d"     \n\t"\
-    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
-    "movq      "VROUNDER_OFFSET"(%0), "#dst1"       \n\t"\
-    "movq                    "#dst1", "#dst2"       \n\t"\
-    ".p2align                      4                \n\t"\
-    "2:                                             \n\t"\
-    "movq               8(%%"REG_d"), "#coeff"      \n\t" /* filterCoeff */\
-    "movq  (%%"REG_S", %%"REG_a", 2), "#src1"       \n\t" /* Y1srcData */\
-    "movq 8(%%"REG_S", %%"REG_a", 2), "#src2"       \n\t" /* Y2srcData */\
-    "add                         $16, %%"REG_d"            \n\t"\
-    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
-    "pmulhw                 "#coeff", "#src1"       \n\t"\
-    "pmulhw                 "#coeff", "#src2"       \n\t"\
-    "paddw                   "#src1", "#dst1"       \n\t"\
-    "paddw                   "#src2", "#dst2"       \n\t"\
-    "test                  %%"REG_S", %%"REG_S"     \n\t"\
-    " jnz                         2b                \n\t"\
-
-#define YSCALEYUV2PACKEDX \
-    YSCALEYUV2PACKEDX_UV \
-    YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \
-
-#define YSCALEYUV2PACKEDX_END                     \
-        :: "r" (&c->redDither),                   \
-            "m" (dummy), "m" (dummy), "m" (dummy),\
-            "r" (dest), "m" (dstW_reg), "m"(uv_off) \
-        : "%"REG_a, "%"REG_d, "%"REG_S            \
-    );
-
-#define YSCALEYUV2PACKEDX_ACCURATE_UV \
-    __asm__ volatile(\
-        "xor %%"REG_a", %%"REG_a"                       \n\t"\
-        ".p2align                      4                \n\t"\
-        "nop                                            \n\t"\
-        "1:                                             \n\t"\
-        "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\
-        "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
-        "pxor                      %%mm4, %%mm4         \n\t"\
-        "pxor                      %%mm5, %%mm5         \n\t"\
-        "pxor                      %%mm6, %%mm6         \n\t"\
-        "pxor                      %%mm7, %%mm7         \n\t"\
-        ".p2align                      4                \n\t"\
-        "2:                                             \n\t"\
-        "movq     (%%"REG_S", %%"REG_a"), %%mm0         \n\t" /* UsrcData */\
-        "add                          %6, %%"REG_S"      \n\t" \
-        "movq     (%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* VsrcData */\
-        "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\
-        "movq     (%%"REG_S", %%"REG_a"), %%mm1         \n\t" /* UsrcData */\
-        "movq                      %%mm0, %%mm3         \n\t"\
-        "punpcklwd                 %%mm1, %%mm0         \n\t"\
-        "punpckhwd                 %%mm1, %%mm3         \n\t"\
-        "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1         \n\t" /* filterCoeff */\
-        "pmaddwd                   %%mm1, %%mm0         \n\t"\
-        "pmaddwd                   %%mm1, %%mm3         \n\t"\
-        "paddd                     %%mm0, %%mm4         \n\t"\
-        "paddd                     %%mm3, %%mm5         \n\t"\
-        "add                          %6, %%"REG_S"      \n\t" \
-        "movq     (%%"REG_S", %%"REG_a"), %%mm3         \n\t" /* VsrcData */\
-        "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\
-        "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\
-        "test                  %%"REG_S", %%"REG_S"     \n\t"\
-        "movq                      %%mm2, %%mm0         \n\t"\
-        "punpcklwd                 %%mm3, %%mm2         \n\t"\
-        "punpckhwd                 %%mm3, %%mm0         \n\t"\
-        "pmaddwd                   %%mm1, %%mm2         \n\t"\
-        "pmaddwd                   %%mm1, %%mm0         \n\t"\
-        "paddd                     %%mm2, %%mm6         \n\t"\
-        "paddd                     %%mm0, %%mm7         \n\t"\
-        " jnz                         2b                \n\t"\
-        "psrad                       $16, %%mm4         \n\t"\
-        "psrad                       $16, %%mm5         \n\t"\
-        "psrad                       $16, %%mm6         \n\t"\
-        "psrad                       $16, %%mm7         \n\t"\
-        "movq      "VROUNDER_OFFSET"(%0), %%mm0         \n\t"\
-        "packssdw                  %%mm5, %%mm4         \n\t"\
-        "packssdw                  %%mm7, %%mm6         \n\t"\
-        "paddw                     %%mm0, %%mm4         \n\t"\
-        "paddw                     %%mm0, %%mm6         \n\t"\
-        "movq                      %%mm4, "U_TEMP"(%0)  \n\t"\
-        "movq                      %%mm6, "V_TEMP"(%0)  \n\t"\
-
-#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
-    "lea                "offset"(%0), %%"REG_d"     \n\t"\
-    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
-    "pxor                      %%mm1, %%mm1         \n\t"\
-    "pxor                      %%mm5, %%mm5         \n\t"\
-    "pxor                      %%mm7, %%mm7         \n\t"\
-    "pxor                      %%mm6, %%mm6         \n\t"\
-    ".p2align                      4                \n\t"\
-    "2:                                             \n\t"\
-    "movq  (%%"REG_S", %%"REG_a", 2), %%mm0         \n\t" /* Y1srcData */\
-    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2         \n\t" /* Y2srcData */\
-    "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\
-    "movq  (%%"REG_S", %%"REG_a", 2), %%mm4         \n\t" /* Y1srcData */\
-    "movq                      %%mm0, %%mm3         \n\t"\
-    "punpcklwd                 %%mm4, %%mm0         \n\t"\
-    "punpckhwd                 %%mm4, %%mm3         \n\t"\
-    "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4         \n\t" /* filterCoeff */\
-    "pmaddwd                   %%mm4, %%mm0         \n\t"\
-    "pmaddwd                   %%mm4, %%mm3         \n\t"\
-    "paddd                     %%mm0, %%mm1         \n\t"\
-    "paddd                     %%mm3, %%mm5         \n\t"\
-    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3         \n\t" /* Y2srcData */\
-    "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\
-    "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\
-    "test                  %%"REG_S", %%"REG_S"     \n\t"\
-    "movq                      %%mm2, %%mm0         \n\t"\
-    "punpcklwd                 %%mm3, %%mm2         \n\t"\
-    "punpckhwd                 %%mm3, %%mm0         \n\t"\
-    "pmaddwd                   %%mm4, %%mm2         \n\t"\
-    "pmaddwd                   %%mm4, %%mm0         \n\t"\
-    "paddd                     %%mm2, %%mm7         \n\t"\
-    "paddd                     %%mm0, %%mm6         \n\t"\
-    " jnz                         2b                \n\t"\
-    "psrad                       $16, %%mm1         \n\t"\
-    "psrad                       $16, %%mm5         \n\t"\
-    "psrad                       $16, %%mm7         \n\t"\
-    "psrad                       $16, %%mm6         \n\t"\
-    "movq      "VROUNDER_OFFSET"(%0), %%mm0         \n\t"\
-    "packssdw                  %%mm5, %%mm1         \n\t"\
-    "packssdw                  %%mm6, %%mm7         \n\t"\
-    "paddw                     %%mm0, %%mm1         \n\t"\
-    "paddw                     %%mm0, %%mm7         \n\t"\
-    "movq               "U_TEMP"(%0), %%mm3         \n\t"\
-    "movq               "V_TEMP"(%0), %%mm4         \n\t"\
-
-#define YSCALEYUV2PACKEDX_ACCURATE \
-    YSCALEYUV2PACKEDX_ACCURATE_UV \
-    YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
-
-#define YSCALEYUV2RGBX \
-    "psubw  "U_OFFSET"(%0), %%mm3       \n\t" /* (U-128)8*/\
-    "psubw  "V_OFFSET"(%0), %%mm4       \n\t" /* (V-128)8*/\
-    "movq            %%mm3, %%mm2       \n\t" /* (U-128)8*/\
-    "movq            %%mm4, %%mm5       \n\t" /* (V-128)8*/\
-    "pmulhw "UG_COEFF"(%0), %%mm3       \n\t"\
-    "pmulhw "VG_COEFF"(%0), %%mm4       \n\t"\
-    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
-    "pmulhw "UB_COEFF"(%0), %%mm2       \n\t"\
-    "pmulhw "VR_COEFF"(%0), %%mm5       \n\t"\
-    "psubw  "Y_OFFSET"(%0), %%mm1       \n\t" /* 8(Y-16)*/\
-    "psubw  "Y_OFFSET"(%0), %%mm7       \n\t" /* 8(Y-16)*/\
-    "pmulhw  "Y_COEFF"(%0), %%mm1       \n\t"\
-    "pmulhw  "Y_COEFF"(%0), %%mm7       \n\t"\
-    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
-    "paddw           %%mm3, %%mm4       \n\t"\
-    "movq            %%mm2, %%mm0       \n\t"\
-    "movq            %%mm5, %%mm6       \n\t"\
-    "movq            %%mm4, %%mm3       \n\t"\
-    "punpcklwd       %%mm2, %%mm2       \n\t"\
-    "punpcklwd       %%mm5, %%mm5       \n\t"\
-    "punpcklwd       %%mm4, %%mm4       \n\t"\
-    "paddw           %%mm1, %%mm2       \n\t"\
-    "paddw           %%mm1, %%mm5       \n\t"\
-    "paddw           %%mm1, %%mm4       \n\t"\
-    "punpckhwd       %%mm0, %%mm0       \n\t"\
-    "punpckhwd       %%mm6, %%mm6       \n\t"\
-    "punpckhwd       %%mm3, %%mm3       \n\t"\
-    "paddw           %%mm7, %%mm0       \n\t"\
-    "paddw           %%mm7, %%mm6       \n\t"\
-    "paddw           %%mm7, %%mm3       \n\t"\
-    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
-    "packuswb        %%mm0, %%mm2       \n\t"\
-    "packuswb        %%mm6, %%mm5       \n\t"\
-    "packuswb        %%mm3, %%mm4       \n\t"\
-
-#define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
-    "movq       "#b", "#q2"     \n\t" /* B */\
-    "movq       "#r", "#t"      \n\t" /* R */\
-    "punpcklbw  "#g", "#b"      \n\t" /* GBGBGBGB 0 */\
-    "punpcklbw  "#a", "#r"      \n\t" /* ARARARAR 0 */\
-    "punpckhbw  "#g", "#q2"     \n\t" /* GBGBGBGB 2 */\
-    "punpckhbw  "#a", "#t"      \n\t" /* ARARARAR 2 */\
-    "movq       "#b", "#q0"     \n\t" /* GBGBGBGB 0 */\
-    "movq      "#q2", "#q3"     \n\t" /* GBGBGBGB 2 */\
-    "punpcklwd  "#r", "#q0"     \n\t" /* ARGBARGB 0 */\
-    "punpckhwd  "#r", "#b"      \n\t" /* ARGBARGB 1 */\
-    "punpcklwd  "#t", "#q2"     \n\t" /* ARGBARGB 2 */\
-    "punpckhwd  "#t", "#q3"     \n\t" /* ARGBARGB 3 */\
-\
-    MOVNTQ(   q0,   (dst, index, 4))\
-    MOVNTQ(    b,  8(dst, index, 4))\
-    MOVNTQ(   q2, 16(dst, index, 4))\
-    MOVNTQ(   q3, 24(dst, index, 4))\
-\
-    "add      $8, "#index"      \n\t"\
-    "cmp "#dstw", "#index"      \n\t"\
-    " jb      1b                \n\t"
-#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)  REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
-
-static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
-                                   const int16_t **lumSrc, int lumFilterSize,
-                                   const int16_t *chrFilter, const int16_t **chrUSrc,
-                                   const int16_t **chrVSrc,
-                                   int chrFilterSize, const int16_t **alpSrc,
-                                   uint8_t *dest, int dstW, int dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_offx2;
-
-    if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-        YSCALEYUV2PACKEDX_ACCURATE
-        YSCALEYUV2RGBX
-        "movq                      %%mm2, "U_TEMP"(%0)  \n\t"
-        "movq                      %%mm4, "V_TEMP"(%0)  \n\t"
-        "movq                      %%mm5, "Y_TEMP"(%0)  \n\t"
-        YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
-        "movq               "Y_TEMP"(%0), %%mm5         \n\t"
-        "psraw                        $3, %%mm1         \n\t"
-        "psraw                        $3, %%mm7         \n\t"
-        "packuswb                  %%mm7, %%mm1         \n\t"
-        WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
-        YSCALEYUV2PACKEDX_END
-    } else {
-        YSCALEYUV2PACKEDX_ACCURATE
-        YSCALEYUV2RGBX
-        "pcmpeqd %%mm7, %%mm7 \n\t"
-        WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-        YSCALEYUV2PACKEDX_END
-    }
-}
-
-static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
-                                const int16_t **lumSrc, int lumFilterSize,
-                                const int16_t *chrFilter, const int16_t **chrUSrc,
-                                const int16_t **chrVSrc,
-                                int chrFilterSize, const int16_t **alpSrc,
-                                uint8_t *dest, int dstW, int dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_offx2;
-
-    if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-        YSCALEYUV2PACKEDX
-        YSCALEYUV2RGBX
-        YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
-        "psraw                        $3, %%mm1         \n\t"
-        "psraw                        $3, %%mm7         \n\t"
-        "packuswb                  %%mm7, %%mm1         \n\t"
-        WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
-        YSCALEYUV2PACKEDX_END
-    } else {
-        YSCALEYUV2PACKEDX
-        YSCALEYUV2RGBX
-        "pcmpeqd %%mm7, %%mm7 \n\t"
-        WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-        YSCALEYUV2PACKEDX_END
-    }
-}
-
-#define REAL_WRITERGB16(dst, dstw, index) \
-    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
-    "pand "MANGLE(bFC)", %%mm4  \n\t" /* G */\
-    "pand "MANGLE(bF8)", %%mm5  \n\t" /* R */\
-    "psrlq           $3, %%mm2  \n\t"\
-\
-    "movq         %%mm2, %%mm1  \n\t"\
-    "movq         %%mm4, %%mm3  \n\t"\
-\
-    "punpcklbw    %%mm7, %%mm3  \n\t"\
-    "punpcklbw    %%mm5, %%mm2  \n\t"\
-    "punpckhbw    %%mm7, %%mm4  \n\t"\
-    "punpckhbw    %%mm5, %%mm1  \n\t"\
-\
-    "psllq           $3, %%mm3  \n\t"\
-    "psllq           $3, %%mm4  \n\t"\
-\
-    "por          %%mm3, %%mm2  \n\t"\
-    "por          %%mm4, %%mm1  \n\t"\
-\
-    MOVNTQ(%%mm2,  (dst, index, 2))\
-    MOVNTQ(%%mm1, 8(dst, index, 2))\
-\
-    "add             $8, "#index"   \n\t"\
-    "cmp        "#dstw", "#index"   \n\t"\
-    " jb             1b             \n\t"
-#define WRITERGB16(dst, dstw, index)  REAL_WRITERGB16(dst, dstw, index)
-
-static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
-                                    const int16_t **lumSrc, int lumFilterSize,
-                                    const int16_t *chrFilter, const int16_t **chrUSrc,
-                                    const int16_t **chrVSrc,
-                                    int chrFilterSize, const int16_t **alpSrc,
-                                    uint8_t *dest, int dstW, int dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_offx2;
-
-    YSCALEYUV2PACKEDX_ACCURATE
-    YSCALEYUV2RGBX
-    "pxor %%mm7, %%mm7 \n\t"
-    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-    "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
-    "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
-    "paddusb "RED_DITHER"(%0), %%mm5\n\t"
-#endif
-    WRITERGB16(%4, %5, %%REGa)
-    YSCALEYUV2PACKEDX_END
-}
-
-static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
-                                 const int16_t **lumSrc, int lumFilterSize,
-                                 const int16_t *chrFilter, const int16_t **chrUSrc,
-                                 const int16_t **chrVSrc,
-                                 int chrFilterSize, const int16_t **alpSrc,
-                                 uint8_t *dest, int dstW, int dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_offx2;
-
-    YSCALEYUV2PACKEDX
-    YSCALEYUV2RGBX
-    "pxor %%mm7, %%mm7 \n\t"
-    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-    "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
-    "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
-    "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
-#endif
-    WRITERGB16(%4, %5, %%REGa)
-    YSCALEYUV2PACKEDX_END
-}
-
-#define REAL_WRITERGB15(dst, dstw, index) \
-    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
-    "pand "MANGLE(bF8)", %%mm4  \n\t" /* G */\
-    "pand "MANGLE(bF8)", %%mm5  \n\t" /* R */\
-    "psrlq           $3, %%mm2  \n\t"\
-    "psrlq           $1, %%mm5  \n\t"\
-\
-    "movq         %%mm2, %%mm1  \n\t"\
-    "movq         %%mm4, %%mm3  \n\t"\
-\
-    "punpcklbw    %%mm7, %%mm3  \n\t"\
-    "punpcklbw    %%mm5, %%mm2  \n\t"\
-    "punpckhbw    %%mm7, %%mm4  \n\t"\
-    "punpckhbw    %%mm5, %%mm1  \n\t"\
-\
-    "psllq           $2, %%mm3  \n\t"\
-    "psllq           $2, %%mm4  \n\t"\
-\
-    "por          %%mm3, %%mm2  \n\t"\
-    "por          %%mm4, %%mm1  \n\t"\
-\
-    MOVNTQ(%%mm2,  (dst, index, 2))\
-    MOVNTQ(%%mm1, 8(dst, index, 2))\
-\
-    "add             $8, "#index"   \n\t"\
-    "cmp        "#dstw", "#index"   \n\t"\
-    " jb             1b             \n\t"
-#define WRITERGB15(dst, dstw, index)  REAL_WRITERGB15(dst, dstw, index)
-
-static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
-                                    const int16_t **lumSrc, int lumFilterSize,
-                                    const int16_t *chrFilter, const int16_t **chrUSrc,
-                                    const int16_t **chrVSrc,
-                                    int chrFilterSize, const int16_t **alpSrc,
-                                    uint8_t *dest, int dstW, int dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_offx2;
-
-    YSCALEYUV2PACKEDX_ACCURATE
-    YSCALEYUV2RGBX
-    "pxor %%mm7, %%mm7 \n\t"
-    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-    "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
-    "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
-    "paddusb "RED_DITHER"(%0), %%mm5\n\t"
-#endif
-    WRITERGB15(%4, %5, %%REGa)
-    YSCALEYUV2PACKEDX_END
-}
-
-static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
-                                 const int16_t **lumSrc, int lumFilterSize,
-                                 const int16_t *chrFilter, const int16_t **chrUSrc,
-                                 const int16_t **chrVSrc,
-                                 int chrFilterSize, const int16_t **alpSrc,
-                                 uint8_t *dest, int dstW, int dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_offx2;
-
-    YSCALEYUV2PACKEDX
-    YSCALEYUV2RGBX
-    "pxor %%mm7, %%mm7 \n\t"
-    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-    "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
-    "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
-    "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
-#endif
-    WRITERGB15(%4, %5, %%REGa)
-    YSCALEYUV2PACKEDX_END
-}
-
-#define WRITEBGR24MMX(dst, dstw, index) \
-    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
-    "movq      %%mm2, %%mm1     \n\t" /* B */\
-    "movq      %%mm5, %%mm6     \n\t" /* R */\
-    "punpcklbw %%mm4, %%mm2     \n\t" /* GBGBGBGB 0 */\
-    "punpcklbw %%mm7, %%mm5     \n\t" /* 0R0R0R0R 0 */\
-    "punpckhbw %%mm4, %%mm1     \n\t" /* GBGBGBGB 2 */\
-    "punpckhbw %%mm7, %%mm6     \n\t" /* 0R0R0R0R 2 */\
-    "movq      %%mm2, %%mm0     \n\t" /* GBGBGBGB 0 */\
-    "movq      %%mm1, %%mm3     \n\t" /* GBGBGBGB 2 */\
-    "punpcklwd %%mm5, %%mm0     \n\t" /* 0RGB0RGB 0 */\
-    "punpckhwd %%mm5, %%mm2     \n\t" /* 0RGB0RGB 1 */\
-    "punpcklwd %%mm6, %%mm1     \n\t" /* 0RGB0RGB 2 */\
-    "punpckhwd %%mm6, %%mm3     \n\t" /* 0RGB0RGB 3 */\
-\
-    "movq      %%mm0, %%mm4     \n\t" /* 0RGB0RGB 0 */\
-    "movq      %%mm2, %%mm6     \n\t" /* 0RGB0RGB 1 */\
-    "movq      %%mm1, %%mm5     \n\t" /* 0RGB0RGB 2 */\
-    "movq      %%mm3, %%mm7     \n\t" /* 0RGB0RGB 3 */\
-\
-    "psllq       $40, %%mm0     \n\t" /* RGB00000 0 */\
-    "psllq       $40, %%mm2     \n\t" /* RGB00000 1 */\
-    "psllq       $40, %%mm1     \n\t" /* RGB00000 2 */\
-    "psllq       $40, %%mm3     \n\t" /* RGB00000 3 */\
-\
-    "punpckhdq %%mm4, %%mm0     \n\t" /* 0RGBRGB0 0 */\
-    "punpckhdq %%mm6, %%mm2     \n\t" /* 0RGBRGB0 1 */\
-    "punpckhdq %%mm5, %%mm1     \n\t" /* 0RGBRGB0 2 */\
-    "punpckhdq %%mm7, %%mm3     \n\t" /* 0RGBRGB0 3 */\
-\
-    "psrlq        $8, %%mm0     \n\t" /* 00RGBRGB 0 */\
-    "movq      %%mm2, %%mm6     \n\t" /* 0RGBRGB0 1 */\
-    "psllq       $40, %%mm2     \n\t" /* GB000000 1 */\
-    "por       %%mm2, %%mm0     \n\t" /* GBRGBRGB 0 */\
-    MOVNTQ(%%mm0, (dst))\
-\
-    "psrlq       $24, %%mm6     \n\t" /* 0000RGBR 1 */\
-    "movq      %%mm1, %%mm5     \n\t" /* 0RGBRGB0 2 */\
-    "psllq       $24, %%mm1     \n\t" /* BRGB0000 2 */\
-    "por       %%mm1, %%mm6     \n\t" /* BRGBRGBR 1 */\
-    MOVNTQ(%%mm6, 8(dst))\
-\
-    "psrlq       $40, %%mm5     \n\t" /* 000000RG 2 */\
-    "psllq        $8, %%mm3     \n\t" /* RGBRGB00 3 */\
-    "por       %%mm3, %%mm5     \n\t" /* RGBRGBRG 2 */\
-    MOVNTQ(%%mm5, 16(dst))\
-\
-    "add         $24, "#dst"    \n\t"\
-\
-    "add          $8, "#index"  \n\t"\
-    "cmp     "#dstw", "#index"  \n\t"\
-    " jb          1b            \n\t"
-
-#define WRITEBGR24MMXEXT(dst, dstw, index) \
-    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
-    "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
-    "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
-    "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2  B1 B0 B1 B0 */\
-    "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2  G1 G0 G1 G0 */\
-    "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0  R1 R0 R1 R0 */\
-\
-    "pand   %%mm0, %%mm1        \n\t" /*    B2        B1       B0 */\
-    "pand   %%mm0, %%mm3        \n\t" /*    G2        G1       G0 */\
-    "pand   %%mm7, %%mm6        \n\t" /*       R1        R0       */\
-\
-    "psllq     $8, %%mm3        \n\t" /* G2        G1       G0    */\
-    "por    %%mm1, %%mm6        \n\t"\
-    "por    %%mm3, %%mm6        \n\t"\
-    MOVNTQ(%%mm6, (dst))\
-\
-    "psrlq     $8, %%mm4        \n\t" /* 00 G7 G6 G5  G4 G3 G2 G1 */\
-    "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4  B3 B2 B3 B2 */\
-    "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3  G4 G3 G4 G3 */\
-    "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4  R3 R2 R3 R2 */\
-\
-    "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5       B4        B3    */\
-    "pand   %%mm7, %%mm3        \n\t" /*       G4        G3       */\
-    "pand   %%mm0, %%mm6        \n\t" /*    R4        R3       R2 */\
-\
-    "por    %%mm1, %%mm3        \n\t" /* B5    G4 B4     G3 B3    */\
-    "por    %%mm3, %%mm6        \n\t"\
-    MOVNTQ(%%mm6, 8(dst))\
-\
-    "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6  B7 B6 B6 B7 */\
-    "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7  G6 G5 G6 G5 */\
-    "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6  R5 R4 R5 R4 */\
-\
-    "pand   %%mm7, %%mm1        \n\t" /*       B7        B6       */\
-    "pand   %%mm0, %%mm3        \n\t" /*    G7        G6       G5 */\
-    "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7       R6        R5    */\
-\
-    "por    %%mm1, %%mm3        \n\t"\
-    "por    %%mm3, %%mm6        \n\t"\
-    MOVNTQ(%%mm6, 16(dst))\
-\
-    "add      $24, "#dst"       \n\t"\
-\
-    "add       $8, "#index"     \n\t"\
-    "cmp  "#dstw", "#index"     \n\t"\
-    " jb       1b               \n\t"
-
-#if COMPILE_TEMPLATE_MMXEXT
-#undef WRITEBGR24
-#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMXEXT(dst, dstw, index)
-#else
-#undef WRITEBGR24
-#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX(dst, dstw, index)
-#endif
-
-static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
-                                   const int16_t **lumSrc, int lumFilterSize,
-                                   const int16_t *chrFilter, const int16_t **chrUSrc,
-                                   const int16_t **chrVSrc,
-                                   int chrFilterSize, const int16_t **alpSrc,
-                                   uint8_t *dest, int dstW, int dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_offx2;
-
-    YSCALEYUV2PACKEDX_ACCURATE
-    YSCALEYUV2RGBX
-    "pxor %%mm7, %%mm7 \n\t"
-    "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
-    "add %4, %%"REG_c"                        \n\t"
-    WRITEBGR24(%%REGc, %5, %%REGa)
-    :: "r" (&c->redDither),
-       "m" (dummy), "m" (dummy), "m" (dummy),
-       "r" (dest), "m" (dstW_reg), "m"(uv_off)
-    : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
-    );
-}
-
-static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
-                                const int16_t **lumSrc, int lumFilterSize,
-                                const int16_t *chrFilter, const int16_t **chrUSrc,
-                                const int16_t **chrVSrc,
-                                int chrFilterSize, const int16_t **alpSrc,
-                                uint8_t *dest, int dstW, int dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_offx2;
-
-    YSCALEYUV2PACKEDX
-    YSCALEYUV2RGBX
-    "pxor                    %%mm7, %%mm7       \n\t"
-    "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"   \n\t" //FIXME optimize
-    "add                        %4, %%"REG_c"   \n\t"
-    WRITEBGR24(%%REGc, %5, %%REGa)
-    :: "r" (&c->redDither),
-       "m" (dummy), "m" (dummy), "m" (dummy),
-       "r" (dest),  "m" (dstW_reg), "m"(uv_off)
-    : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
-    );
-}
-
-#define REAL_WRITEYUY2(dst, dstw, index) \
-    "packuswb  %%mm3, %%mm3     \n\t"\
-    "packuswb  %%mm4, %%mm4     \n\t"\
-    "packuswb  %%mm7, %%mm1     \n\t"\
-    "punpcklbw %%mm4, %%mm3     \n\t"\
-    "movq      %%mm1, %%mm7     \n\t"\
-    "punpcklbw %%mm3, %%mm1     \n\t"\
-    "punpckhbw %%mm3, %%mm7     \n\t"\
-\
-    MOVNTQ(%%mm1, (dst, index, 2))\
-    MOVNTQ(%%mm7, 8(dst, index, 2))\
-\
-    "add          $8, "#index"  \n\t"\
-    "cmp     "#dstw", "#index"  \n\t"\
-    " jb          1b            \n\t"
-#define WRITEYUY2(dst, dstw, index)  REAL_WRITEYUY2(dst, dstw, index)
-
-static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
-                                     const int16_t **lumSrc, int lumFilterSize,
-                                     const int16_t *chrFilter, const int16_t **chrUSrc,
-                                     const int16_t **chrVSrc,
-                                     int chrFilterSize, const int16_t **alpSrc,
-                                     uint8_t *dest, int dstW, int dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_offx2;
-
-    YSCALEYUV2PACKEDX_ACCURATE
-    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-    "psraw $3, %%mm3    \n\t"
-    "psraw $3, %%mm4    \n\t"
-    "psraw $3, %%mm1    \n\t"
-    "psraw $3, %%mm7    \n\t"
-    WRITEYUY2(%4, %5, %%REGa)
-    YSCALEYUV2PACKEDX_END
-}
-
-static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
-                                  const int16_t **lumSrc, int lumFilterSize,
-                                  const int16_t *chrFilter, const int16_t **chrUSrc,
-                                  const int16_t **chrVSrc,
-                                  int chrFilterSize, const int16_t **alpSrc,
-                                  uint8_t *dest, int dstW, int dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_offx2;
-
-    YSCALEYUV2PACKEDX
-    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-    "psraw $3, %%mm3    \n\t"
-    "psraw $3, %%mm4    \n\t"
-    "psraw $3, %%mm1    \n\t"
-    "psraw $3, %%mm7    \n\t"
-    WRITEYUY2(%4, %5, %%REGa)
-    YSCALEYUV2PACKEDX_END
-}
-
-#define REAL_YSCALEYUV2RGB_UV(index, c) \
-    "xor            "#index", "#index"  \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
-    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "add "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
-    "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "sub "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
-    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
-    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
-    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
-    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
-    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
-    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
-    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
-    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
-    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
-    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
-    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
-    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
-    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
-    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
-    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
-    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
-
-#define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
-    "movq  ("#b1", "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
-    "movq  ("#b2", "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
-    "movq 8("#b1", "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
-    "movq 8("#b2", "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
-    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
-    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
-    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
-    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
-    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
-    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
-
-#define REAL_YSCALEYUV2RGB_COEFF(c) \
-    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
-    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
-    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
-    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
-    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
-    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
-    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
-    "paddw             %%mm3, %%mm4     \n\t"\
-    "movq              %%mm2, %%mm0     \n\t"\
-    "movq              %%mm5, %%mm6     \n\t"\
-    "movq              %%mm4, %%mm3     \n\t"\
-    "punpcklwd         %%mm2, %%mm2     \n\t"\
-    "punpcklwd         %%mm5, %%mm5     \n\t"\
-    "punpcklwd         %%mm4, %%mm4     \n\t"\
-    "paddw             %%mm1, %%mm2     \n\t"\
-    "paddw             %%mm1, %%mm5     \n\t"\
-    "paddw             %%mm1, %%mm4     \n\t"\
-    "punpckhwd         %%mm0, %%mm0     \n\t"\
-    "punpckhwd         %%mm6, %%mm6     \n\t"\
-    "punpckhwd         %%mm3, %%mm3     \n\t"\
-    "paddw             %%mm7, %%mm0     \n\t"\
-    "paddw             %%mm7, %%mm6     \n\t"\
-    "paddw             %%mm7, %%mm3     \n\t"\
-    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
-    "packuswb          %%mm0, %%mm2     \n\t"\
-    "packuswb          %%mm6, %%mm5     \n\t"\
-    "packuswb          %%mm3, %%mm4     \n\t"\
-
-#define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
-
-#define YSCALEYUV2RGB(index, c) \
-    REAL_YSCALEYUV2RGB_UV(index, c) \
-    REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
-    REAL_YSCALEYUV2RGB_COEFF(c)
-
-/**
- * vertical bilinear scale YV12 to RGB
- */
-static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
-                                const int16_t *ubuf[2], const int16_t *vbuf[2],
-                                const int16_t *abuf[2], uint8_t *dest,
-                                int dstW, int yalpha, int uvalpha, int y)
-{
-    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
-                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
-
-    if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-        const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1];
-#if ARCH_X86_64
-        __asm__ volatile(
-            YSCALEYUV2RGB(%%r8, %5)
-            YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
-            "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
-            "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
-            "packuswb            %%mm7, %%mm1       \n\t"
-            WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
-            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest),
-               "a" (&c->redDither),
-               "r" (abuf0), "r" (abuf1)
-            : "%r8"
-        );
-#else
-        c->u_temp=(intptr_t)abuf0;
-        c->v_temp=(intptr_t)abuf1;
-        __asm__ volatile(
-            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-            "mov        %4, %%"REG_b"               \n\t"
-            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB(%%REGBP, %5)
-            "push                   %0              \n\t"
-            "push                   %1              \n\t"
-            "mov          "U_TEMP"(%5), %0          \n\t"
-            "mov          "V_TEMP"(%5), %1          \n\t"
-            YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
-            "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
-            "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
-            "packuswb            %%mm7, %%mm1       \n\t"
-            "pop                    %1              \n\t"
-            "pop                    %0              \n\t"
-            WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
-            "pop %%"REG_BP"                         \n\t"
-            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither)
-        );
-#endif
-    } else {
-        __asm__ volatile(
-            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-            "mov        %4, %%"REG_b"               \n\t"
-            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB(%%REGBP, %5)
-            "pcmpeqd %%mm7, %%mm7                   \n\t"
-            WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-            "pop %%"REG_BP"                         \n\t"
-            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither)
-        );
-    }
-}
-
-static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
-                                const int16_t *ubuf[2], const int16_t *vbuf[2],
-                                const int16_t *abuf[2], uint8_t *dest,
-                                int dstW, int yalpha, int uvalpha, int y)
-{
-    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
-                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
-
-    //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
-    __asm__ volatile(
-        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-        "mov        %4, %%"REG_b"               \n\t"
-        "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2RGB(%%REGBP, %5)
-        "pxor    %%mm7, %%mm7                   \n\t"
-        WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
-        "pop %%"REG_BP"                         \n\t"
-        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-           "a" (&c->redDither)
-    );
-}
-
-static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
-                                 const int16_t *ubuf[2], const int16_t *vbuf[2],
-                                 const int16_t *abuf[2], uint8_t *dest,
-                                 int dstW, int yalpha, int uvalpha, int y)
-{
-    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
-                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
-
-    //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
-    __asm__ volatile(
-        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-        "mov        %4, %%"REG_b"               \n\t"
-        "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2RGB(%%REGBP, %5)
-        "pxor    %%mm7, %%mm7                   \n\t"
-        /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-        "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-        "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-        "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
-#endif
-        WRITERGB15(%%REGb, 8280(%5), %%REGBP)
-        "pop %%"REG_BP"                         \n\t"
-        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-           "a" (&c->redDither)
-    );
-}
-
-static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
-                                 const int16_t *ubuf[2], const int16_t *vbuf[2],
-                                 const int16_t *abuf[2], uint8_t *dest,
-                                 int dstW, int yalpha, int uvalpha, int y)
-{
-    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
-                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
-
-    //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
-    __asm__ volatile(
-        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-        "mov        %4, %%"REG_b"               \n\t"
-        "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2RGB(%%REGBP, %5)
-        "pxor    %%mm7, %%mm7                   \n\t"
-        /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-        "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-        "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-        "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
-#endif
-        WRITERGB16(%%REGb, 8280(%5), %%REGBP)
-        "pop %%"REG_BP"                         \n\t"
-        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-           "a" (&c->redDither)
-    );
-}
-
-#define REAL_YSCALEYUV2PACKED(index, c) \
-    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0              \n\t"\
-    "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1              \n\t"\
-    "psraw                $3, %%mm0                           \n\t"\
-    "psraw                $3, %%mm1                           \n\t"\
-    "movq              %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
-    "movq              %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
-    "xor            "#index", "#index"                        \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
-    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "add "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
-    "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "sub "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
-    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
-    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
-    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
-    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
-    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
-    "psraw                $7, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
-    "psraw                $7, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
-    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
-    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
-    "movq  (%0, "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
-    "movq  (%1, "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
-    "movq 8(%1, "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
-    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
-    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
-    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
-    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
-    "psraw                $7, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "psraw                $7, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
-    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
-
-#define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
-
-static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
-                                  const int16_t *ubuf[2], const int16_t *vbuf[2],
-                                  const int16_t *abuf[2], uint8_t *dest,
-                                  int dstW, int yalpha, int uvalpha, int y)
-{
-    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
-                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
-
-    //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
-    __asm__ volatile(
-        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-        "mov %4, %%"REG_b"                        \n\t"
-        "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2PACKED(%%REGBP, %5)
-        WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
-        "pop %%"REG_BP"                         \n\t"
-        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-           "a" (&c->redDither)
-    );
-}
-
-#define REAL_YSCALEYUV2RGB1(index, c) \
-    "xor            "#index", "#index"  \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
-    "add "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
-    "movq     (%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
-    "sub "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
-    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
-    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
-    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
-    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
-    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
-    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
-    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
-    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
-    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
-    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
-    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
-    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
-    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
-    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
-    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
-    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
-    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
-    "paddw             %%mm3, %%mm4     \n\t"\
-    "movq              %%mm2, %%mm0     \n\t"\
-    "movq              %%mm5, %%mm6     \n\t"\
-    "movq              %%mm4, %%mm3     \n\t"\
-    "punpcklwd         %%mm2, %%mm2     \n\t"\
-    "punpcklwd         %%mm5, %%mm5     \n\t"\
-    "punpcklwd         %%mm4, %%mm4     \n\t"\
-    "paddw             %%mm1, %%mm2     \n\t"\
-    "paddw             %%mm1, %%mm5     \n\t"\
-    "paddw             %%mm1, %%mm4     \n\t"\
-    "punpckhwd         %%mm0, %%mm0     \n\t"\
-    "punpckhwd         %%mm6, %%mm6     \n\t"\
-    "punpckhwd         %%mm3, %%mm3     \n\t"\
-    "paddw             %%mm7, %%mm0     \n\t"\
-    "paddw             %%mm7, %%mm6     \n\t"\
-    "paddw             %%mm7, %%mm3     \n\t"\
-    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
-    "packuswb          %%mm0, %%mm2     \n\t"\
-    "packuswb          %%mm6, %%mm5     \n\t"\
-    "packuswb          %%mm3, %%mm4     \n\t"\
-
-#define YSCALEYUV2RGB1(index, c)  REAL_YSCALEYUV2RGB1(index, c)
-
-// do vertical chrominance interpolation
-#define REAL_YSCALEYUV2RGB1b(index, c) \
-    "xor            "#index", "#index"  \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
-    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "add "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
-    "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "sub "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
-    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
-    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
-    "psrlw                $5, %%mm3     \n\t" /*FIXME might overflow*/\
-    "psrlw                $5, %%mm4     \n\t" /*FIXME might overflow*/\
-    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
-    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
-    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
-    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
-    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
-    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
-    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
-    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
-    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
-    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
-    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
-    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
-    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
-    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
-    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
-    "paddw             %%mm3, %%mm4     \n\t"\
-    "movq              %%mm2, %%mm0     \n\t"\
-    "movq              %%mm5, %%mm6     \n\t"\
-    "movq              %%mm4, %%mm3     \n\t"\
-    "punpcklwd         %%mm2, %%mm2     \n\t"\
-    "punpcklwd         %%mm5, %%mm5     \n\t"\
-    "punpcklwd         %%mm4, %%mm4     \n\t"\
-    "paddw             %%mm1, %%mm2     \n\t"\
-    "paddw             %%mm1, %%mm5     \n\t"\
-    "paddw             %%mm1, %%mm4     \n\t"\
-    "punpckhwd         %%mm0, %%mm0     \n\t"\
-    "punpckhwd         %%mm6, %%mm6     \n\t"\
-    "punpckhwd         %%mm3, %%mm3     \n\t"\
-    "paddw             %%mm7, %%mm0     \n\t"\
-    "paddw             %%mm7, %%mm6     \n\t"\
-    "paddw             %%mm7, %%mm3     \n\t"\
-    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
-    "packuswb          %%mm0, %%mm2     \n\t"\
-    "packuswb          %%mm6, %%mm5     \n\t"\
-    "packuswb          %%mm3, %%mm4     \n\t"\
-
-#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
-
-#define REAL_YSCALEYUV2RGB1_ALPHA(index) \
-    "movq  (%1, "#index", 2), %%mm7     \n\t" /* abuf0[index  ]     */\
-    "movq 8(%1, "#index", 2), %%mm1     \n\t" /* abuf0[index+4]     */\
-    "psraw                $7, %%mm7     \n\t" /* abuf0[index  ] >>7 */\
-    "psraw                $7, %%mm1     \n\t" /* abuf0[index+4] >>7 */\
-    "packuswb          %%mm1, %%mm7     \n\t"
-#define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
-
-/**
- * YV12 to RGB without scaling or interpolating
- */
-static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
-                                const int16_t *ubuf[2], const int16_t *vbuf[2],
-                                const int16_t *abuf0, uint8_t *dest,
-                                int dstW, int uvalpha, int y)
-{
-    const int16_t *ubuf0 = ubuf[0];
-    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
-
-    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
-        const int16_t *ubuf1 = ubuf[0];
-        if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-            __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1(%%REGBP, %5)
-                YSCALEYUV2RGB1_ALPHA(%%REGBP)
-                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-                   "a" (&c->redDither)
-            );
-        } else {
-            __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1(%%REGBP, %5)
-                "pcmpeqd %%mm7, %%mm7                   \n\t"
-                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-                   "a" (&c->redDither)
-            );
-        }
-    } else {
-        const int16_t *ubuf1 = ubuf[1];
-        if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-            __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1b(%%REGBP, %5)
-                YSCALEYUV2RGB1_ALPHA(%%REGBP)
-                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-                   "a" (&c->redDither)
-            );
-        } else {
-            __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1b(%%REGBP, %5)
-                "pcmpeqd %%mm7, %%mm7                   \n\t"
-                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-                   "a" (&c->redDither)
-            );
-        }
-    }
-}
-
-static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
-                                const int16_t *ubuf[2], const int16_t *vbuf[2],
-                                const int16_t *abuf0, uint8_t *dest,
-                                int dstW, int uvalpha, int y)
-{
-    const int16_t *ubuf0 = ubuf[0];
-    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
-
-    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
-        const int16_t *ubuf1 = ubuf[0];
-        __asm__ volatile(
-            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-            "mov        %4, %%"REG_b"               \n\t"
-            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1(%%REGBP, %5)
-            "pxor    %%mm7, %%mm7                   \n\t"
-            WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
-            "pop %%"REG_BP"                         \n\t"
-            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither)
-        );
-    } else {
-        const int16_t *ubuf1 = ubuf[1];
-        __asm__ volatile(
-            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-            "mov        %4, %%"REG_b"               \n\t"
-            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1b(%%REGBP, %5)
-            "pxor    %%mm7, %%mm7                   \n\t"
-            WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
-            "pop %%"REG_BP"                         \n\t"
-            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither)
-        );
-    }
-}
-
-static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
-                                 const int16_t *ubuf[2], const int16_t *vbuf[2],
-                                 const int16_t *abuf0, uint8_t *dest,
-                                 int dstW, int uvalpha, int y)
-{
-    const int16_t *ubuf0 = ubuf[0];
-    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
-
-    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
-        const int16_t *ubuf1 = ubuf[0];
-        __asm__ volatile(
-            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-            "mov        %4, %%"REG_b"               \n\t"
-            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1(%%REGBP, %5)
-            "pxor    %%mm7, %%mm7                   \n\t"
-            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-            "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-            "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
-#endif
-            WRITERGB15(%%REGb, 8280(%5), %%REGBP)
-            "pop %%"REG_BP"                         \n\t"
-            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither)
-        );
-    } else {
-        const int16_t *ubuf1 = ubuf[1];
-        __asm__ volatile(
-            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-            "mov        %4, %%"REG_b"               \n\t"
-            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1b(%%REGBP, %5)
-            "pxor    %%mm7, %%mm7                   \n\t"
-            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-            "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-            "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
-#endif
-            WRITERGB15(%%REGb, 8280(%5), %%REGBP)
-            "pop %%"REG_BP"                         \n\t"
-            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither)
-        );
-    }
-}
-
-static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
-                                 const int16_t *ubuf[2], const int16_t *vbuf[2],
-                                 const int16_t *abuf0, uint8_t *dest,
-                                 int dstW, int uvalpha, int y)
-{
-    const int16_t *ubuf0 = ubuf[0];
-    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
-
-    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
-        const int16_t *ubuf1 = ubuf[0];
-        __asm__ volatile(
-            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-            "mov        %4, %%"REG_b"               \n\t"
-            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1(%%REGBP, %5)
-            "pxor    %%mm7, %%mm7                   \n\t"
-            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-            "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-            "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
-#endif
-            WRITERGB16(%%REGb, 8280(%5), %%REGBP)
-            "pop %%"REG_BP"                         \n\t"
-            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither)
-        );
-    } else {
-        const int16_t *ubuf1 = ubuf[1];
-        __asm__ volatile(
-            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-            "mov        %4, %%"REG_b"               \n\t"
-            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1b(%%REGBP, %5)
-            "pxor    %%mm7, %%mm7                   \n\t"
-            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-            "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-            "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
-#endif
-            WRITERGB16(%%REGb, 8280(%5), %%REGBP)
-            "pop %%"REG_BP"                         \n\t"
-            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither)
-        );
-    }
-}
-
-#define REAL_YSCALEYUV2PACKED1(index, c) \
-    "xor            "#index", "#index"  \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
-    "add "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
-    "movq     (%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
-    "sub "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
-    "psraw                $7, %%mm3     \n\t" \
-    "psraw                $7, %%mm4     \n\t" \
-    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
-    "psraw                $7, %%mm1     \n\t" \
-    "psraw                $7, %%mm7     \n\t" \
-
-#define YSCALEYUV2PACKED1(index, c)  REAL_YSCALEYUV2PACKED1(index, c)
-
-#define REAL_YSCALEYUV2PACKED1b(index, c) \
-    "xor "#index", "#index"             \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
-    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "add "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
-    "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "sub "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
-    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
-    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
-    "psrlw                $8, %%mm3     \n\t" \
-    "psrlw                $8, %%mm4     \n\t" \
-    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
-    "psraw                $7, %%mm1     \n\t" \
-    "psraw                $7, %%mm7     \n\t"
-#define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
-
-static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
-                                  const int16_t *ubuf[2], const int16_t *vbuf[2],
-                                  const int16_t *abuf0, uint8_t *dest,
-                                  int dstW, int uvalpha, int y)
-{
-    const int16_t *ubuf0 = ubuf[0];
-    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
-
-    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
-        const int16_t *ubuf1 = ubuf[0];
-        __asm__ volatile(
-            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-            "mov        %4, %%"REG_b"               \n\t"
-            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2PACKED1(%%REGBP, %5)
-            WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
-            "pop %%"REG_BP"                         \n\t"
-            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither)
-        );
-    } else {
-        const int16_t *ubuf1 = ubuf[1];
-        __asm__ volatile(
-            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-            "mov        %4, %%"REG_b"               \n\t"
-            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2PACKED1b(%%REGBP, %5)
-            WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
-            "pop %%"REG_BP"                         \n\t"
-            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither)
-        );
-    }
-}
-
-#if COMPILE_TEMPLATE_MMXEXT
-static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
-                                 int dstWidth, const uint8_t *src,
-                                 int srcW, int xInc)
-{
-    int32_t *filterPos = c->hLumFilterPos;
-    int16_t *filter    = c->hLumFilter;
-    void    *mmxextFilterCode = c->lumMmxextFilterCode;
-    int i;
-#if defined(PIC)
-    uint64_t ebxsave;
-#endif
-#if ARCH_X86_64
-    uint64_t retsave;
-#endif
-
-    __asm__ volatile(
-#if defined(PIC)
-        "mov               %%"REG_b", %5        \n\t"
-#if ARCH_X86_64
-        "mov               -8(%%rsp), %%"REG_a" \n\t"
-        "mov               %%"REG_a", %6        \n\t"
-#endif
-#else
-#if ARCH_X86_64
-        "mov               -8(%%rsp), %%"REG_a" \n\t"
-        "mov               %%"REG_a", %5        \n\t"
-#endif
-#endif
-        "pxor                  %%mm7, %%mm7     \n\t"
-        "mov                      %0, %%"REG_c" \n\t"
-        "mov                      %1, %%"REG_D" \n\t"
-        "mov                      %2, %%"REG_d" \n\t"
-        "mov                      %3, %%"REG_b" \n\t"
-        "xor               %%"REG_a", %%"REG_a" \n\t" // i
-        PREFETCH"        (%%"REG_c")            \n\t"
-        PREFETCH"      32(%%"REG_c")            \n\t"
-        PREFETCH"      64(%%"REG_c")            \n\t"
-
-#if ARCH_X86_64
-#define CALL_MMXEXT_FILTER_CODE \
-        "movl            (%%"REG_b"), %%esi     \n\t"\
-        "call                    *%4            \n\t"\
-        "movl (%%"REG_b", %%"REG_a"), %%esi     \n\t"\
-        "add               %%"REG_S", %%"REG_c" \n\t"\
-        "add               %%"REG_a", %%"REG_D" \n\t"\
-        "xor               %%"REG_a", %%"REG_a" \n\t"\
-
-#else
-#define CALL_MMXEXT_FILTER_CODE \
-        "movl (%%"REG_b"), %%esi        \n\t"\
-        "call         *%4                       \n\t"\
-        "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
-        "add               %%"REG_a", %%"REG_D" \n\t"\
-        "xor               %%"REG_a", %%"REG_a" \n\t"\
-
-#endif /* ARCH_X86_64 */
-
-        CALL_MMXEXT_FILTER_CODE
-        CALL_MMXEXT_FILTER_CODE
-        CALL_MMXEXT_FILTER_CODE
-        CALL_MMXEXT_FILTER_CODE
-        CALL_MMXEXT_FILTER_CODE
-        CALL_MMXEXT_FILTER_CODE
-        CALL_MMXEXT_FILTER_CODE
-        CALL_MMXEXT_FILTER_CODE
-
-#if defined(PIC)
-        "mov                      %5, %%"REG_b" \n\t"
-#if ARCH_X86_64
-        "mov                      %6, %%"REG_a" \n\t"
-        "mov               %%"REG_a", -8(%%rsp) \n\t"
-#endif
-#else
-#if ARCH_X86_64
-        "mov                      %5, %%"REG_a" \n\t"
-        "mov               %%"REG_a", -8(%%rsp) \n\t"
-#endif
-#endif
-        :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
-           "m" (mmxextFilterCode)
-#if defined(PIC)
-          ,"m" (ebxsave)
-#endif
-#if ARCH_X86_64
-          ,"m"(retsave)
-#endif
-        : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
-#if !defined(PIC)
-         ,"%"REG_b
-#endif
-    );
-
-    for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
-        dst[i] = src[srcW-1]*128;
-}
-
-static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
-                                 int dstWidth, const uint8_t *src1,
-                                 const uint8_t *src2, int srcW, int xInc)
-{
-    int32_t *filterPos = c->hChrFilterPos;
-    int16_t *filter    = c->hChrFilter;
-    void    *mmxextFilterCode = c->chrMmxextFilterCode;
-    int i;
-#if defined(PIC)
-    DECLARE_ALIGNED(8, uint64_t, ebxsave);
-#endif
-#if ARCH_X86_64
-    DECLARE_ALIGNED(8, uint64_t, retsave);
-#endif
-
-    __asm__ volatile(
-#if defined(PIC)
-        "mov          %%"REG_b", %7         \n\t"
-#if ARCH_X86_64
-        "mov          -8(%%rsp), %%"REG_a"  \n\t"
-        "mov          %%"REG_a", %8         \n\t"
-#endif
-#else
-#if ARCH_X86_64
-        "mov          -8(%%rsp), %%"REG_a"  \n\t"
-        "mov          %%"REG_a", %7         \n\t"
-#endif
-#endif
-        "pxor             %%mm7, %%mm7      \n\t"
-        "mov                 %0, %%"REG_c"  \n\t"
-        "mov                 %1, %%"REG_D"  \n\t"
-        "mov                 %2, %%"REG_d"  \n\t"
-        "mov                 %3, %%"REG_b"  \n\t"
-        "xor          %%"REG_a", %%"REG_a"  \n\t" // i
-        PREFETCH"   (%%"REG_c")             \n\t"
-        PREFETCH" 32(%%"REG_c")             \n\t"
-        PREFETCH" 64(%%"REG_c")             \n\t"
-
-        CALL_MMXEXT_FILTER_CODE
-        CALL_MMXEXT_FILTER_CODE
-        CALL_MMXEXT_FILTER_CODE
-        CALL_MMXEXT_FILTER_CODE
-        "xor          %%"REG_a", %%"REG_a"  \n\t" // i
-        "mov                 %5, %%"REG_c"  \n\t" // src
-        "mov                 %6, %%"REG_D"  \n\t" // buf2
-        PREFETCH"   (%%"REG_c")             \n\t"
-        PREFETCH" 32(%%"REG_c")             \n\t"
-        PREFETCH" 64(%%"REG_c")             \n\t"
-
-        CALL_MMXEXT_FILTER_CODE
-        CALL_MMXEXT_FILTER_CODE
-        CALL_MMXEXT_FILTER_CODE
-        CALL_MMXEXT_FILTER_CODE
-
-#if defined(PIC)
-        "mov %7, %%"REG_b"    \n\t"
-#if ARCH_X86_64
-        "mov                 %8, %%"REG_a"  \n\t"
-        "mov          %%"REG_a", -8(%%rsp)  \n\t"
-#endif
-#else
-#if ARCH_X86_64
-        "mov                 %7, %%"REG_a"  \n\t"
-        "mov          %%"REG_a", -8(%%rsp)  \n\t"
-#endif
-#endif
-        :: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos),
-           "m" (mmxextFilterCode), "m" (src2), "m"(dst2)
-#if defined(PIC)
-          ,"m" (ebxsave)
-#endif
-#if ARCH_X86_64
-          ,"m"(retsave)
-#endif
-        : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
-#if !defined(PIC)
-         ,"%"REG_b
-#endif
-    );
-
-    for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
-        dst1[i] = src1[srcW-1]*128;
-        dst2[i] = src2[srcW-1]*128;
-    }
-}
-#endif /* COMPILE_TEMPLATE_MMXEXT */
-
-static av_cold void RENAME(sws_init_swscale)(SwsContext *c)
-{
-    enum AVPixelFormat dstFormat = c->dstFormat;
-
-    c->use_mmx_vfilter= 0;
-    if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && dstFormat != AV_PIX_FMT_NV12
-        && dstFormat != AV_PIX_FMT_NV21 && !(c->flags & SWS_BITEXACT)) {
-            if (c->flags & SWS_ACCURATE_RND) {
-                if (!(c->flags & SWS_FULL_CHR_H_INT)) {
-                    switch (c->dstFormat) {
-                    case AV_PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X_ar);   break;
-                    case AV_PIX_FMT_BGR24:   c->yuv2packedX = RENAME(yuv2bgr24_X_ar);   break;
-                    case AV_PIX_FMT_RGB555:  c->yuv2packedX = RENAME(yuv2rgb555_X_ar);  break;
-                    case AV_PIX_FMT_RGB565:  c->yuv2packedX = RENAME(yuv2rgb565_X_ar);  break;
-                    case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
-                    default: break;
-                    }
-                }
-            } else {
-                c->use_mmx_vfilter= 1;
-                c->yuv2planeX = RENAME(yuv2yuvX    );
-                if (!(c->flags & SWS_FULL_CHR_H_INT)) {
-                    switch (c->dstFormat) {
-                    case AV_PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X);   break;
-                    case AV_PIX_FMT_BGR24:   c->yuv2packedX = RENAME(yuv2bgr24_X);   break;
-                    case AV_PIX_FMT_RGB555:  c->yuv2packedX = RENAME(yuv2rgb555_X);  break;
-                    case AV_PIX_FMT_RGB565:  c->yuv2packedX = RENAME(yuv2rgb565_X);  break;
-                    case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
-                    default: break;
-                    }
-                }
-            }
-        if (!(c->flags & SWS_FULL_CHR_H_INT)) {
-            switch (c->dstFormat) {
-            case AV_PIX_FMT_RGB32:
-                c->yuv2packed1 = RENAME(yuv2rgb32_1);
-                c->yuv2packed2 = RENAME(yuv2rgb32_2);
-                break;
-            case AV_PIX_FMT_BGR24:
-                c->yuv2packed1 = RENAME(yuv2bgr24_1);
-                c->yuv2packed2 = RENAME(yuv2bgr24_2);
-                break;
-            case AV_PIX_FMT_RGB555:
-                c->yuv2packed1 = RENAME(yuv2rgb555_1);
-                c->yuv2packed2 = RENAME(yuv2rgb555_2);
-                break;
-            case AV_PIX_FMT_RGB565:
-                c->yuv2packed1 = RENAME(yuv2rgb565_1);
-                c->yuv2packed2 = RENAME(yuv2rgb565_2);
-                break;
-            case AV_PIX_FMT_YUYV422:
-                c->yuv2packed1 = RENAME(yuv2yuyv422_1);
-                c->yuv2packed2 = RENAME(yuv2yuyv422_2);
-                break;
-            default:
-                break;
-            }
-        }
-    }
-
-    if (c->srcBpc == 8 && c->dstBpc <= 14) {
-    // Use the new MMX scaler if the MMXEXT one can't be used (it is faster than the x86 ASM one).
-#if COMPILE_TEMPLATE_MMXEXT
-    if (c->flags & SWS_FAST_BILINEAR && c->canMMXEXTBeUsed) {
-        c->hyscale_fast = RENAME(hyscale_fast);
-        c->hcscale_fast = RENAME(hcscale_fast);
-    } else {
-#endif /* COMPILE_TEMPLATE_MMXEXT */
-        c->hyscale_fast = NULL;
-        c->hcscale_fast = NULL;
-#if COMPILE_TEMPLATE_MMXEXT
-    }
-#endif /* COMPILE_TEMPLATE_MMXEXT */
-    }
-}
diff --git a/ffmpeg/libswscale/x86/w64xmmtest.c b/ffmpeg/libswscale/x86/w64xmmtest.c
deleted file mode 100644
index 88143d9..0000000
--- a/ffmpeg/libswscale/x86/w64xmmtest.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * check XMM registers for clobbers on Win64
- * Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/x86/w64xmmtest.h"
-#include "libswscale/swscale.h"
-
-wrap(sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[],
-               const int srcStride[], int srcSliceY, int srcSliceH,
-               uint8_t *const dst[], const int dstStride[]))
-{
-    testxmmclobbers(sws_scale, c, srcSlice, srcStride, srcSliceY,
-                    srcSliceH, dst, dstStride);
-}
diff --git a/ffmpeg/libswscale/x86/yuv2rgb.c b/ffmpeg/libswscale/x86/yuv2rgb.c
deleted file mode 100644
index e4315ef..0000000
--- a/ffmpeg/libswscale/x86/yuv2rgb.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * software YUV to RGB converter
- *
- * Copyright (C) 2009 Konstantin Shishkov
- *
- * MMX/MMXEXT template stuff (needed for fast movntq support),
- * 1,4,8bpp support and context / deglobalize stuff
- * by Michael Niedermayer (michaelni@gmx.at)
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <inttypes.h>
-
-#include "config.h"
-#include "libswscale/rgb2rgb.h"
-#include "libswscale/swscale.h"
-#include "libswscale/swscale_internal.h"
-#include "libavutil/attributes.h"
-#include "libavutil/x86/asm.h"
-#include "libavutil/cpu.h"
-
-#if HAVE_INLINE_ASM
-
-#define DITHER1XBPP // only for MMX
-
-/* hope these constant values are cache line aligned */
-DECLARE_ASM_CONST(8, uint64_t, mmx_00ffw)   = 0x00ff00ff00ff00ffULL;
-DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL;
-DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL;
-DECLARE_ASM_CONST(8, uint64_t, pb_e0) = 0xe0e0e0e0e0e0e0e0ULL;
-DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL;
-DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
-
-//MMX versions
-#if HAVE_MMX_INLINE
-#undef RENAME
-#undef COMPILE_TEMPLATE_MMXEXT
-#define COMPILE_TEMPLATE_MMXEXT 0
-#define RENAME(a) a ## _mmx
-#include "yuv2rgb_template.c"
-#endif /* HAVE_MMX_INLINE */
-
-// MMXEXT versions
-#if HAVE_MMXEXT_INLINE
-#undef RENAME
-#undef COMPILE_TEMPLATE_MMXEXT
-#define COMPILE_TEMPLATE_MMXEXT 1
-#define RENAME(a) a ## _mmxext
-#include "yuv2rgb_template.c"
-#endif /* HAVE_MMXEXT_INLINE */
-
-#endif /* HAVE_INLINE_ASM */
-
-av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
-{
-#if HAVE_MMX_INLINE
-    int cpu_flags = av_get_cpu_flags();
-
-#if HAVE_MMXEXT_INLINE
-    if (cpu_flags & AV_CPU_FLAG_MMXEXT) {
-        switch (c->dstFormat) {
-        case AV_PIX_FMT_RGB24:
-            return yuv420_rgb24_mmxext;
-        case AV_PIX_FMT_BGR24:
-            return yuv420_bgr24_mmxext;
-        }
-    }
-#endif
-
-    if (cpu_flags & AV_CPU_FLAG_MMX) {
-        switch (c->dstFormat) {
-            case AV_PIX_FMT_RGB32:
-                if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
-                    return yuva420_rgb32_mmx;
-#endif
-                    break;
-                } else
-                    return yuv420_rgb32_mmx;
-            case AV_PIX_FMT_BGR32:
-                if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
-                    return yuva420_bgr32_mmx;
-#endif
-                    break;
-                } else
-                    return yuv420_bgr32_mmx;
-            case AV_PIX_FMT_RGB24:
-                return yuv420_rgb24_mmx;
-            case AV_PIX_FMT_BGR24:
-                return yuv420_bgr24_mmx;
-            case AV_PIX_FMT_RGB565:
-                return yuv420_rgb16_mmx;
-            case AV_PIX_FMT_RGB555:
-                return yuv420_rgb15_mmx;
-        }
-    }
-#endif /* HAVE_MMX_INLINE */
-
-    return NULL;
-}
diff --git a/ffmpeg/libswscale/x86/yuv2rgb_template.c b/ffmpeg/libswscale/x86/yuv2rgb_template.c
deleted file mode 100644
index c879102..0000000
--- a/ffmpeg/libswscale/x86/yuv2rgb_template.c
+++ /dev/null
@@ -1,451 +0,0 @@
-/*
- * software YUV to RGB converter
- *
- * Copyright (C) 2001-2007 Michael Niedermayer
- *           (c) 2010 Konstantin Shishkov
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#undef MOVNTQ
-#undef EMMS
-#undef SFENCE
-
-#if COMPILE_TEMPLATE_MMXEXT
-#define MOVNTQ "movntq"
-#define SFENCE "sfence"
-#else
-#define MOVNTQ "movq"
-#define SFENCE " # nop"
-#endif
-
-#define REG_BLUE  "0"
-#define REG_RED   "1"
-#define REG_GREEN "2"
-#define REG_ALPHA "3"
-
-#define YUV2RGB_LOOP(depth)                                          \
-    h_size = (c->dstW + 7) & ~7;                                     \
-    if (h_size * depth > FFABS(dstStride[0]))                        \
-        h_size -= 8;                                                 \
-                                                                     \
-    vshift = c->srcFormat != AV_PIX_FMT_YUV422P;                        \
-                                                                     \
-    __asm__ volatile ("pxor %mm4, %mm4\n\t");                        \
-    for (y = 0; y < srcSliceH; y++) {                                \
-        uint8_t *image    = dst[0] + (y + srcSliceY) * dstStride[0]; \
-        const uint8_t *py = src[0] +               y * srcStride[0]; \
-        const uint8_t *pu = src[1] +   (y >> vshift) * srcStride[1]; \
-        const uint8_t *pv = src[2] +   (y >> vshift) * srcStride[2]; \
-        x86_reg index = -h_size / 2;                                 \
-
-#define YUV2RGB_INITIAL_LOAD          \
-    __asm__ volatile (                \
-        "movq (%5, %0, 2), %%mm6\n\t" \
-        "movd    (%2, %0), %%mm0\n\t" \
-        "movd    (%3, %0), %%mm1\n\t" \
-        "1: \n\t"                     \
-
-/* YUV2RGB core
- * Conversion is performed in usual way:
- * R = Y' * Ycoef + Vred * V'
- * G = Y' * Ycoef + Vgreen * V' + Ugreen * U'
- * B = Y' * Ycoef               + Ublue * U'
- *
- * where X' = X * 8 - Xoffset (multiplication is performed to increase
- * precision a bit).
- * Since it operates in YUV420 colorspace, Y component is additionally
- * split into Y1 and Y2 for even and odd pixels.
- *
- * Input:
- * mm0 - U (4 elems), mm1 - V (4 elems), mm6 - Y (8 elems), mm4 - zero register
- * Output:
- * mm1 - R, mm2 - G, mm0 - B
- */
-#define YUV2RGB                                  \
-    /* convert Y, U, V into Y1', Y2', U', V' */  \
-    "movq      %%mm6, %%mm7\n\t"                 \
-    "punpcklbw %%mm4, %%mm0\n\t"                 \
-    "punpcklbw %%mm4, %%mm1\n\t"                 \
-    "pand     "MANGLE(mmx_00ffw)", %%mm6\n\t"    \
-    "psrlw     $8,    %%mm7\n\t"                 \
-    "psllw     $3,    %%mm0\n\t"                 \
-    "psllw     $3,    %%mm1\n\t"                 \
-    "psllw     $3,    %%mm6\n\t"                 \
-    "psllw     $3,    %%mm7\n\t"                 \
-    "psubsw   "U_OFFSET"(%4), %%mm0\n\t"         \
-    "psubsw   "V_OFFSET"(%4), %%mm1\n\t"         \
-    "psubw    "Y_OFFSET"(%4), %%mm6\n\t"         \
-    "psubw    "Y_OFFSET"(%4), %%mm7\n\t"         \
-\
-     /* multiply by coefficients */              \
-    "movq      %%mm0, %%mm2\n\t"                 \
-    "movq      %%mm1, %%mm3\n\t"                 \
-    "pmulhw   "UG_COEFF"(%4), %%mm2\n\t"         \
-    "pmulhw   "VG_COEFF"(%4), %%mm3\n\t"         \
-    "pmulhw   "Y_COEFF" (%4), %%mm6\n\t"         \
-    "pmulhw   "Y_COEFF" (%4), %%mm7\n\t"         \
-    "pmulhw   "UB_COEFF"(%4), %%mm0\n\t"         \
-    "pmulhw   "VR_COEFF"(%4), %%mm1\n\t"         \
-    "paddsw    %%mm3, %%mm2\n\t"                 \
-    /* now: mm0 = UB, mm1 = VR, mm2 = CG */      \
-    /*      mm6 = Y1, mm7 = Y2 */                \
-\
-    /* produce RGB */                            \
-    "movq      %%mm7, %%mm3\n\t"                 \
-    "movq      %%mm7, %%mm5\n\t"                 \
-    "paddsw    %%mm0, %%mm3\n\t"                 \
-    "paddsw    %%mm1, %%mm5\n\t"                 \
-    "paddsw    %%mm2, %%mm7\n\t"                 \
-    "paddsw    %%mm6, %%mm0\n\t"                 \
-    "paddsw    %%mm6, %%mm1\n\t"                 \
-    "paddsw    %%mm6, %%mm2\n\t"                 \
-
-#define RGB_PACK_INTERLEAVE                  \
-    /* pack and interleave even/odd pixels */    \
-    "packuswb  %%mm1, %%mm0\n\t"                 \
-    "packuswb  %%mm5, %%mm3\n\t"                 \
-    "packuswb  %%mm2, %%mm2\n\t"                 \
-    "movq      %%mm0, %%mm1\n\n"                 \
-    "packuswb  %%mm7, %%mm7\n\t"                 \
-    "punpcklbw %%mm3, %%mm0\n\t"                 \
-    "punpckhbw %%mm3, %%mm1\n\t"                 \
-    "punpcklbw %%mm7, %%mm2\n\t"                 \
-
-#define YUV2RGB_ENDLOOP(depth)                   \
-    "movq 8 (%5, %0, 2), %%mm6\n\t"              \
-    "movd 4 (%3, %0),    %%mm1\n\t"              \
-    "movd 4 (%2, %0),    %%mm0\n\t"              \
-    "add $"AV_STRINGIFY(depth * 8)", %1\n\t"     \
-    "add  $4, %0\n\t"                            \
-    "js   1b\n\t"                                \
-
-#define YUV2RGB_OPERANDS                                          \
-        : "+r" (index), "+r" (image)                              \
-        : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \
-          "r" (py - 2*index)                                      \
-        : "memory"                                                \
-        );                                                        \
-    }                                                             \
-
-#define YUV2RGB_OPERANDS_ALPHA                                    \
-        : "+r" (index), "+r" (image)                              \
-        : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \
-          "r" (py - 2*index), "r" (pa - 2*index)                  \
-        : "memory"                                                \
-        );                                                        \
-    }                                                             \
-
-#define YUV2RGB_ENDFUNC                          \
-    __asm__ volatile (SFENCE"\n\t"               \
-                    "emms    \n\t");             \
-    return srcSliceH;                            \
-
-#define IF0(x)
-#define IF1(x) x
-
-#define RGB_PACK16(gmask, is15)                  \
-    "pand      "MANGLE(mmx_redmask)", %%mm0\n\t" \
-    "pand      "MANGLE(mmx_redmask)", %%mm1\n\t" \
-    "movq      %%mm2,     %%mm3\n\t"             \
-    "psllw   $"AV_STRINGIFY(3-is15)", %%mm2\n\t" \
-    "psrlw   $"AV_STRINGIFY(5+is15)", %%mm3\n\t" \
-    "psrlw     $3,        %%mm0\n\t"             \
-    IF##is15("psrlw  $1,  %%mm1\n\t")            \
-    "pand "MANGLE(pb_e0)", %%mm2\n\t"            \
-    "pand "MANGLE(gmask)", %%mm3\n\t"            \
-    "por       %%mm2,     %%mm0\n\t"             \
-    "por       %%mm3,     %%mm1\n\t"             \
-    "movq      %%mm0,     %%mm2\n\t"             \
-    "punpcklbw %%mm1,     %%mm0\n\t"             \
-    "punpckhbw %%mm1,     %%mm2\n\t"             \
-    MOVNTQ "   %%mm0,      (%1)\n\t"             \
-    MOVNTQ "   %%mm2,     8(%1)\n\t"             \
-
-#define DITHER_RGB                               \
-    "paddusb "BLUE_DITHER"(%4),  %%mm0\n\t"      \
-    "paddusb "GREEN_DITHER"(%4), %%mm2\n\t"      \
-    "paddusb "RED_DITHER"(%4),   %%mm1\n\t"      \
-
-#if !COMPILE_TEMPLATE_MMXEXT
-static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[],
-                                       int srcStride[],
-                                       int srcSliceY, int srcSliceH,
-                                       uint8_t *dst[], int dstStride[])
-{
-    int y, h_size, vshift;
-
-    YUV2RGB_LOOP(2)
-
-#ifdef DITHER1XBPP
-        c->blueDither  = ff_dither8[y       & 1];
-        c->greenDither = ff_dither8[y       & 1];
-        c->redDither   = ff_dither8[(y + 1) & 1];
-#endif
-
-        YUV2RGB_INITIAL_LOAD
-        YUV2RGB
-        RGB_PACK_INTERLEAVE
-#ifdef DITHER1XBPP
-        DITHER_RGB
-#endif
-        RGB_PACK16(pb_03, 1)
-
-    YUV2RGB_ENDLOOP(2)
-    YUV2RGB_OPERANDS
-    YUV2RGB_ENDFUNC
-}
-
-static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[],
-                                       int srcStride[],
-                                       int srcSliceY, int srcSliceH,
-                                       uint8_t *dst[], int dstStride[])
-{
-    int y, h_size, vshift;
-
-    YUV2RGB_LOOP(2)
-
-#ifdef DITHER1XBPP
-        c->blueDither  = ff_dither8[y       & 1];
-        c->greenDither = ff_dither4[y       & 1];
-        c->redDither   = ff_dither8[(y + 1) & 1];
-#endif
-
-        YUV2RGB_INITIAL_LOAD
-        YUV2RGB
-        RGB_PACK_INTERLEAVE
-#ifdef DITHER1XBPP
-        DITHER_RGB
-#endif
-        RGB_PACK16(pb_07, 0)
-
-    YUV2RGB_ENDLOOP(2)
-    YUV2RGB_OPERANDS
-    YUV2RGB_ENDFUNC
-}
-#endif /* !COMPILE_TEMPLATE_MMXEXT */
-
-#define RGB_PACK24(blue, red)\
-    "packuswb  %%mm3,      %%mm0 \n" /* R0 R2 R4 R6 R1 R3 R5 R7 */\
-    "packuswb  %%mm5,      %%mm1 \n" /* B0 B2 B4 B6 B1 B3 B5 B7 */\
-    "packuswb  %%mm7,      %%mm2 \n" /* G0 G2 G4 G6 G1 G3 G5 G7 */\
-    "movq      %%mm"red",  %%mm3 \n"\
-    "movq      %%mm"blue", %%mm6 \n"\
-    "psrlq     $32,        %%mm"red" \n" /* R1 R3 R5 R7 */\
-    "punpcklbw %%mm2,      %%mm3 \n" /* R0 G0 R2 G2 R4 G4 R6 G6 */\
-    "punpcklbw %%mm"red",  %%mm6 \n" /* B0 R1 B2 R3 B4 R5 B6 R7 */\
-    "movq      %%mm3,      %%mm5 \n"\
-    "punpckhbw %%mm"blue", %%mm2 \n" /* G1 B1 G3 B3 G5 B5 G7 B7 */\
-    "punpcklwd %%mm6,      %%mm3 \n" /* R0 G0 B0 R1 R2 G2 B2 R3 */\
-    "punpckhwd %%mm6,      %%mm5 \n" /* R4 G4 B4 R5 R6 G6 B6 R7 */\
-    RGB_PACK24_B
-
-#if COMPILE_TEMPLATE_MMXEXT
-DECLARE_ASM_CONST(8, int16_t, mask1101[4]) = {-1,-1, 0,-1};
-DECLARE_ASM_CONST(8, int16_t, mask0010[4]) = { 0, 0,-1, 0};
-DECLARE_ASM_CONST(8, int16_t, mask0110[4]) = { 0,-1,-1, 0};
-DECLARE_ASM_CONST(8, int16_t, mask1001[4]) = {-1, 0, 0,-1};
-DECLARE_ASM_CONST(8, int16_t, mask0100[4]) = { 0,-1, 0, 0};
-#undef RGB_PACK24_B
-#define RGB_PACK24_B\
-    "pshufw    $0xc6,  %%mm2, %%mm1 \n"\
-    "pshufw    $0x84,  %%mm3, %%mm6 \n"\
-    "pshufw    $0x38,  %%mm5, %%mm7 \n"\
-    "pand "MANGLE(mask1101)", %%mm6 \n" /* R0 G0 B0 R1 -- -- R2 G2 */\
-    "movq      %%mm1,         %%mm0 \n"\
-    "pand "MANGLE(mask0110)", %%mm7 \n" /* -- -- R6 G6 B6 R7 -- -- */\
-    "movq      %%mm1,         %%mm2 \n"\
-    "pand "MANGLE(mask0100)", %%mm1 \n" /* -- -- G3 B3 -- -- -- -- */\
-    "psrlq       $48,         %%mm3 \n" /* B2 R3 -- -- -- -- -- -- */\
-    "pand "MANGLE(mask0010)", %%mm0 \n" /* -- -- -- -- G1 B1 -- -- */\
-    "psllq       $32,         %%mm5 \n" /* -- -- -- -- R4 G4 B4 R5 */\
-    "pand "MANGLE(mask1001)", %%mm2 \n" /* G5 B5 -- -- -- -- G7 B7 */\
-    "por       %%mm3,         %%mm1 \n"\
-    "por       %%mm6,         %%mm0 \n"\
-    "por       %%mm5,         %%mm1 \n"\
-    "por       %%mm7,         %%mm2 \n"\
-    MOVNTQ"    %%mm0,          (%1) \n"\
-    MOVNTQ"    %%mm1,         8(%1) \n"\
-    MOVNTQ"    %%mm2,        16(%1) \n"\
-
-#else
-#undef RGB_PACK24_B
-#define RGB_PACK24_B\
-    "movd      %%mm3,       (%1) \n" /* R0 G0 B0 R1 */\
-    "movd      %%mm2,      4(%1) \n" /* G1 B1 */\
-    "psrlq     $32,        %%mm3 \n"\
-    "psrlq     $16,        %%mm2 \n"\
-    "movd      %%mm3,      6(%1) \n" /* R2 G2 B2 R3 */\
-    "movd      %%mm2,     10(%1) \n" /* G3 B3 */\
-    "psrlq     $16,        %%mm2 \n"\
-    "movd      %%mm5,     12(%1) \n" /* R4 G4 B4 R5 */\
-    "movd      %%mm2,     16(%1) \n" /* G5 B5 */\
-    "psrlq     $32,        %%mm5 \n"\
-    "movd      %%mm2,     20(%1) \n" /* -- -- G7 B7 */\
-    "movd      %%mm5,     18(%1) \n" /* R6 G6 B6 R7 */\
-
-#endif
-
-static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[],
-                                       int srcStride[],
-                                       int srcSliceY, int srcSliceH,
-                                       uint8_t *dst[], int dstStride[])
-{
-    int y, h_size, vshift;
-
-    YUV2RGB_LOOP(3)
-
-        YUV2RGB_INITIAL_LOAD
-        YUV2RGB
-        RGB_PACK24(REG_BLUE, REG_RED)
-
-    YUV2RGB_ENDLOOP(3)
-    YUV2RGB_OPERANDS
-    YUV2RGB_ENDFUNC
-}
-
-static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[],
-                                       int srcStride[],
-                                       int srcSliceY, int srcSliceH,
-                                       uint8_t *dst[], int dstStride[])
-{
-    int y, h_size, vshift;
-
-    YUV2RGB_LOOP(3)
-
-        YUV2RGB_INITIAL_LOAD
-        YUV2RGB
-        RGB_PACK24(REG_RED, REG_BLUE)
-
-    YUV2RGB_ENDLOOP(3)
-    YUV2RGB_OPERANDS
-    YUV2RGB_ENDFUNC
-}
-
-
-#define SET_EMPTY_ALPHA                                                      \
-    "pcmpeqd   %%mm"REG_ALPHA", %%mm"REG_ALPHA"\n\t" /* set alpha to 0xFF */ \
-
-#define LOAD_ALPHA                                   \
-    "movq      (%6, %0, 2),     %%mm"REG_ALPHA"\n\t" \
-
-#define RGB_PACK32(red, green, blue, alpha)  \
-    "movq      %%mm"blue",  %%mm5\n\t"       \
-    "movq      %%mm"red",   %%mm6\n\t"       \
-    "punpckhbw %%mm"green", %%mm5\n\t"       \
-    "punpcklbw %%mm"green", %%mm"blue"\n\t"  \
-    "punpckhbw %%mm"alpha", %%mm6\n\t"       \
-    "punpcklbw %%mm"alpha", %%mm"red"\n\t"   \
-    "movq      %%mm"blue",  %%mm"green"\n\t" \
-    "movq      %%mm5,       %%mm"alpha"\n\t" \
-    "punpcklwd %%mm"red",   %%mm"blue"\n\t"  \
-    "punpckhwd %%mm"red",   %%mm"green"\n\t" \
-    "punpcklwd %%mm6,       %%mm5\n\t"       \
-    "punpckhwd %%mm6,       %%mm"alpha"\n\t" \
-    MOVNTQ "   %%mm"blue",   0(%1)\n\t"      \
-    MOVNTQ "   %%mm"green",  8(%1)\n\t"      \
-    MOVNTQ "   %%mm5,       16(%1)\n\t"      \
-    MOVNTQ "   %%mm"alpha", 24(%1)\n\t"      \
-
-#if !COMPILE_TEMPLATE_MMXEXT
-static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[],
-                                       int srcStride[],
-                                       int srcSliceY, int srcSliceH,
-                                       uint8_t *dst[], int dstStride[])
-{
-    int y, h_size, vshift;
-
-    YUV2RGB_LOOP(4)
-
-        YUV2RGB_INITIAL_LOAD
-        YUV2RGB
-        RGB_PACK_INTERLEAVE
-        SET_EMPTY_ALPHA
-        RGB_PACK32(REG_RED, REG_GREEN, REG_BLUE, REG_ALPHA)
-
-    YUV2RGB_ENDLOOP(4)
-    YUV2RGB_OPERANDS
-    YUV2RGB_ENDFUNC
-}
-
-#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
-static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[],
-                                        int srcStride[],
-                                        int srcSliceY, int srcSliceH,
-                                        uint8_t *dst[], int dstStride[])
-{
-    int y, h_size, vshift;
-
-    YUV2RGB_LOOP(4)
-
-        const uint8_t *pa = src[3] + y * srcStride[3];
-        YUV2RGB_INITIAL_LOAD
-        YUV2RGB
-        RGB_PACK_INTERLEAVE
-        LOAD_ALPHA
-        RGB_PACK32(REG_RED, REG_GREEN, REG_BLUE, REG_ALPHA)
-
-    YUV2RGB_ENDLOOP(4)
-    YUV2RGB_OPERANDS_ALPHA
-    YUV2RGB_ENDFUNC
-}
-#endif
-
-static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[],
-                                       int srcStride[],
-                                       int srcSliceY, int srcSliceH,
-                                       uint8_t *dst[], int dstStride[])
-{
-    int y, h_size, vshift;
-
-    YUV2RGB_LOOP(4)
-
-        YUV2RGB_INITIAL_LOAD
-        YUV2RGB
-        RGB_PACK_INTERLEAVE
-        SET_EMPTY_ALPHA
-        RGB_PACK32(REG_BLUE, REG_GREEN, REG_RED, REG_ALPHA)
-
-    YUV2RGB_ENDLOOP(4)
-    YUV2RGB_OPERANDS
-    YUV2RGB_ENDFUNC
-}
-
-#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
-static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[],
-                                        int srcStride[],
-                                        int srcSliceY, int srcSliceH,
-                                        uint8_t *dst[], int dstStride[])
-{
-    int y, h_size, vshift;
-
-    YUV2RGB_LOOP(4)
-
-        const uint8_t *pa = src[3] + y * srcStride[3];
-        YUV2RGB_INITIAL_LOAD
-        YUV2RGB
-        RGB_PACK_INTERLEAVE
-        LOAD_ALPHA
-        RGB_PACK32(REG_BLUE, REG_GREEN, REG_RED, REG_ALPHA)
-
-    YUV2RGB_ENDLOOP(4)
-    YUV2RGB_OPERANDS_ALPHA
-    YUV2RGB_ENDFUNC
-}
-#endif
-
-#endif /* !COMPILE_TEMPLATE_MMXEXT */
diff --git a/ffmpeg/libswscale/yuv2rgb.c b/ffmpeg/libswscale/yuv2rgb.c
deleted file mode 100644
index 77c56a9..0000000
--- a/ffmpeg/libswscale/yuv2rgb.c
+++ /dev/null
@@ -1,920 +0,0 @@
-/*
- * software YUV to RGB converter
- *
- * Copyright (C) 2009 Konstantin Shishkov
- *
- * 1,4,8bpp support and context / deglobalize stuff
- * by Michael Niedermayer (michaelni@gmx.at)
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <inttypes.h>
-
-#include "libavutil/cpu.h"
-#include "libavutil/bswap.h"
-#include "config.h"
-#include "rgb2rgb.h"
-#include "swscale.h"
-#include "swscale_internal.h"
-#include "libavutil/pixdesc.h"
-
-const int32_t ff_yuv2rgb_coeffs[8][4] = {
-    { 117504, 138453, 13954, 34903 }, /* no sequence_display_extension */
-    { 117504, 138453, 13954, 34903 }, /* ITU-R Rec. 709 (1990) */
-    { 104597, 132201, 25675, 53279 }, /* unspecified */
-    { 104597, 132201, 25675, 53279 }, /* reserved */
-    { 104448, 132798, 24759, 53109 }, /* FCC */
-    { 104597, 132201, 25675, 53279 }, /* ITU-R Rec. 624-4 System B, G */
-    { 104597, 132201, 25675, 53279 }, /* SMPTE 170M */
-    { 117579, 136230, 16907, 35559 }  /* SMPTE 240M (1987) */
-};
-
-const int *sws_getCoefficients(int colorspace)
-{
-    if (colorspace > 7 || colorspace < 0)
-        colorspace = SWS_CS_DEFAULT;
-    return ff_yuv2rgb_coeffs[colorspace];
-}
-
-#define LOADCHROMA(i)                               \
-    U = pu[i];                                      \
-    V = pv[i];                                      \
-    r = (void *)c->table_rV[V+YUVRGB_TABLE_HEADROOM];                     \
-    g = (void *)(c->table_gU[U+YUVRGB_TABLE_HEADROOM] + c->table_gV[V+YUVRGB_TABLE_HEADROOM]);  \
-    b = (void *)c->table_bU[U+YUVRGB_TABLE_HEADROOM];
-
-#define PUTRGB(dst, src, i)                         \
-    Y              = src[2 * i];                    \
-    dst[2 * i]     = r[Y] + g[Y] + b[Y];            \
-    Y              = src[2 * i + 1];                \
-    dst[2 * i + 1] = r[Y] + g[Y] + b[Y];
-
-#define PUTRGB24(dst, src, i)                       \
-    Y              = src[2 * i];                    \
-    dst[6 * i + 0] = r[Y];                          \
-    dst[6 * i + 1] = g[Y];                          \
-    dst[6 * i + 2] = b[Y];                          \
-    Y              = src[2 * i + 1];                \
-    dst[6 * i + 3] = r[Y];                          \
-    dst[6 * i + 4] = g[Y];                          \
-    dst[6 * i + 5] = b[Y];
-
-#define PUTBGR24(dst, src, i)                       \
-    Y              = src[2 * i];                    \
-    dst[6 * i + 0] = b[Y];                          \
-    dst[6 * i + 1] = g[Y];                          \
-    dst[6 * i + 2] = r[Y];                          \
-    Y              = src[2 * i + 1];                \
-    dst[6 * i + 3] = b[Y];                          \
-    dst[6 * i + 4] = g[Y];                          \
-    dst[6 * i + 5] = r[Y];
-
-#define PUTRGBA(dst, ysrc, asrc, i, s)                                  \
-    Y              = ysrc[2 * i];                                       \
-    dst[2 * i]     = r[Y] + g[Y] + b[Y] + (asrc[2 * i]     << s);       \
-    Y              = ysrc[2 * i + 1];                                   \
-    dst[2 * i + 1] = r[Y] + g[Y] + b[Y] + (asrc[2 * i + 1] << s);
-
-#define PUTRGB48(dst, src, i)                       \
-    Y                = src[ 2 * i];                 \
-    dst[12 * i +  0] = dst[12 * i +  1] = r[Y];     \
-    dst[12 * i +  2] = dst[12 * i +  3] = g[Y];     \
-    dst[12 * i +  4] = dst[12 * i +  5] = b[Y];     \
-    Y                = src[ 2 * i + 1];             \
-    dst[12 * i +  6] = dst[12 * i +  7] = r[Y];     \
-    dst[12 * i +  8] = dst[12 * i +  9] = g[Y];     \
-    dst[12 * i + 10] = dst[12 * i + 11] = b[Y];
-
-#define PUTBGR48(dst, src, i)                       \
-    Y                = src[2 * i];                  \
-    dst[12 * i +  0] = dst[12 * i +  1] = b[Y];     \
-    dst[12 * i +  2] = dst[12 * i +  3] = g[Y];     \
-    dst[12 * i +  4] = dst[12 * i +  5] = r[Y];     \
-    Y                = src[2  * i +  1];            \
-    dst[12 * i +  6] = dst[12 * i +  7] = b[Y];     \
-    dst[12 * i +  8] = dst[12 * i +  9] = g[Y];     \
-    dst[12 * i + 10] = dst[12 * i + 11] = r[Y];
-
-#define YUV2RGBFUNC(func_name, dst_type, alpha)                             \
-    static int func_name(SwsContext *c, const uint8_t *src[],               \
-                         int srcStride[], int srcSliceY, int srcSliceH,     \
-                         uint8_t *dst[], int dstStride[])                   \
-    {                                                                       \
-        int y;                                                              \
-                                                                            \
-        if (!alpha && c->srcFormat == AV_PIX_FMT_YUV422P) {                    \
-            srcStride[1] *= 2;                                              \
-            srcStride[2] *= 2;                                              \
-        }                                                                   \
-        for (y = 0; y < srcSliceH; y += 2) {                                \
-            dst_type *dst_1 =                                               \
-                (dst_type *)(dst[0] + (y + srcSliceY)     * dstStride[0]);  \
-            dst_type *dst_2 =                                               \
-                (dst_type *)(dst[0] + (y + srcSliceY + 1) * dstStride[0]);  \
-            dst_type av_unused *r, *g, *b;                                  \
-            const uint8_t *py_1 = src[0] +  y       * srcStride[0];         \
-            const uint8_t *py_2 = py_1   +            srcStride[0];         \
-            const uint8_t *pu   = src[1] + (y >> 1) * srcStride[1];         \
-            const uint8_t *pv   = src[2] + (y >> 1) * srcStride[2];         \
-            const uint8_t av_unused *pa_1, *pa_2;                           \
-            unsigned int h_size = c->dstW >> 3;                             \
-            if (alpha) {                                                    \
-                pa_1 = src[3] + y * srcStride[3];                           \
-                pa_2 = pa_1   +     srcStride[3];                           \
-            }                                                               \
-            while (h_size--) {                                              \
-                int av_unused U, V, Y;                                      \
-
-#define ENDYUV2RGBLINE(dst_delta, ss)               \
-    pu    += 4 >> ss;                               \
-    pv    += 4 >> ss;                               \
-    py_1  += 8 >> ss;                               \
-    py_2  += 8 >> ss;                               \
-    dst_1 += dst_delta >> ss;                       \
-    dst_2 += dst_delta >> ss;                       \
-    }                                               \
-    if (c->dstW & (4 >> ss)) {                      \
-        int av_unused Y, U, V;                      \
-
-#define ENDYUV2RGBFUNC()                            \
-            }                                       \
-        }                                           \
-        return srcSliceH;                           \
-    }
-
-#define CLOSEYUV2RGBFUNC(dst_delta)                 \
-    ENDYUV2RGBLINE(dst_delta, 0)                    \
-    ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_48, uint8_t, 0)
-    LOADCHROMA(0);
-    PUTRGB48(dst_1, py_1, 0);
-    PUTRGB48(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTRGB48(dst_2, py_2, 1);
-    PUTRGB48(dst_1, py_1, 1);
-
-    LOADCHROMA(2);
-    PUTRGB48(dst_1, py_1, 2);
-    PUTRGB48(dst_2, py_2, 2);
-
-    LOADCHROMA(3);
-    PUTRGB48(dst_2, py_2, 3);
-    PUTRGB48(dst_1, py_1, 3);
-ENDYUV2RGBLINE(48, 0)
-    LOADCHROMA(0);
-    PUTRGB48(dst_1, py_1, 0);
-    PUTRGB48(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTRGB48(dst_2, py_2, 1);
-    PUTRGB48(dst_1, py_1, 1);
-ENDYUV2RGBLINE(48, 1)
-    LOADCHROMA(0);
-    PUTRGB48(dst_1, py_1, 0);
-    PUTRGB48(dst_2, py_2, 0);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_bgr48, uint8_t, 0)
-    LOADCHROMA(0);
-    PUTBGR48(dst_1, py_1, 0);
-    PUTBGR48(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTBGR48(dst_2, py_2, 1);
-    PUTBGR48(dst_1, py_1, 1);
-
-    LOADCHROMA(2);
-    PUTBGR48(dst_1, py_1, 2);
-    PUTBGR48(dst_2, py_2, 2);
-
-    LOADCHROMA(3);
-    PUTBGR48(dst_2, py_2, 3);
-    PUTBGR48(dst_1, py_1, 3);
-ENDYUV2RGBLINE(48, 0)
-    LOADCHROMA(0);
-    PUTBGR48(dst_1, py_1, 0);
-    PUTBGR48(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTBGR48(dst_2, py_2, 1);
-    PUTBGR48(dst_1, py_1, 1);
-ENDYUV2RGBLINE(48, 1)
-    LOADCHROMA(0);
-    PUTBGR48(dst_1, py_1, 0);
-    PUTBGR48(dst_2, py_2, 0);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_32, uint32_t, 0)
-    LOADCHROMA(0);
-    PUTRGB(dst_1, py_1, 0);
-    PUTRGB(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTRGB(dst_2, py_2, 1);
-    PUTRGB(dst_1, py_1, 1);
-
-    LOADCHROMA(2);
-    PUTRGB(dst_1, py_1, 2);
-    PUTRGB(dst_2, py_2, 2);
-
-    LOADCHROMA(3);
-    PUTRGB(dst_2, py_2, 3);
-    PUTRGB(dst_1, py_1, 3);
-ENDYUV2RGBLINE(8, 0)
-    LOADCHROMA(0);
-    PUTRGB(dst_1, py_1, 0);
-    PUTRGB(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTRGB(dst_2, py_2, 1);
-    PUTRGB(dst_1, py_1, 1);
-ENDYUV2RGBLINE(8, 1)
-    LOADCHROMA(0);
-    PUTRGB(dst_1, py_1, 0);
-    PUTRGB(dst_2, py_2, 0);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuva2rgba_c, uint32_t, 1)
-    LOADCHROMA(0);
-    PUTRGBA(dst_1, py_1, pa_1, 0, 24);
-    PUTRGBA(dst_2, py_2, pa_2, 0, 24);
-
-    LOADCHROMA(1);
-    PUTRGBA(dst_2, py_2, pa_2, 1, 24);
-    PUTRGBA(dst_1, py_1, pa_1, 1, 24);
-
-    LOADCHROMA(2);
-    PUTRGBA(dst_1, py_1, pa_1, 2, 24);
-    PUTRGBA(dst_2, py_2, pa_2, 2, 24);
-
-    LOADCHROMA(3);
-    PUTRGBA(dst_2, py_2, pa_2, 3, 24);
-    PUTRGBA(dst_1, py_1, pa_1, 3, 24);
-    pa_1 += 8;
-    pa_2 += 8;
-ENDYUV2RGBLINE(8, 0)
-    LOADCHROMA(0);
-    PUTRGBA(dst_1, py_1, pa_1, 0, 24);
-    PUTRGBA(dst_2, py_2, pa_2, 0, 24);
-
-    LOADCHROMA(1);
-    PUTRGBA(dst_2, py_2, pa_2, 1, 24);
-    PUTRGBA(dst_1, py_1, pa_1, 1, 24);
-    pa_1 += 4;
-    pa_2 += 4;
-ENDYUV2RGBLINE(8, 1)
-    LOADCHROMA(0);
-    PUTRGBA(dst_1, py_1, pa_1, 0, 24);
-    PUTRGBA(dst_2, py_2, pa_2, 0, 24);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuva2argb_c, uint32_t, 1)
-    LOADCHROMA(0);
-    PUTRGBA(dst_1, py_1, pa_1, 0, 0);
-    PUTRGBA(dst_2, py_2, pa_2, 0, 0);
-
-    LOADCHROMA(1);
-    PUTRGBA(dst_2, py_2, pa_2, 1, 0);
-    PUTRGBA(dst_1, py_1, pa_1, 1, 0);
-
-    LOADCHROMA(2);
-    PUTRGBA(dst_1, py_1, pa_1, 2, 0);
-    PUTRGBA(dst_2, py_2, pa_2, 2, 0);
-
-    LOADCHROMA(3);
-    PUTRGBA(dst_2, py_2, pa_2, 3, 0);
-    PUTRGBA(dst_1, py_1, pa_1, 3, 0);
-    pa_1 += 8;
-    pa_2 += 8;
-ENDYUV2RGBLINE(8, 0)
-    LOADCHROMA(0);
-    PUTRGBA(dst_1, py_1, pa_1, 0, 0);
-    PUTRGBA(dst_2, py_2, pa_2, 0, 0);
-
-    LOADCHROMA(1);
-    PUTRGBA(dst_2, py_2, pa_2, 1, 0);
-    PUTRGBA(dst_1, py_1, pa_1, 1, 0);
-    pa_1 += 4;
-    pa_2 += 4;
-ENDYUV2RGBLINE(8, 1)
-    LOADCHROMA(0);
-    PUTRGBA(dst_1, py_1, pa_1, 0, 0);
-    PUTRGBA(dst_2, py_2, pa_2, 0, 0);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_24_rgb, uint8_t, 0)
-    LOADCHROMA(0);
-    PUTRGB24(dst_1, py_1, 0);
-    PUTRGB24(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTRGB24(dst_2, py_2, 1);
-    PUTRGB24(dst_1, py_1, 1);
-
-    LOADCHROMA(2);
-    PUTRGB24(dst_1, py_1, 2);
-    PUTRGB24(dst_2, py_2, 2);
-
-    LOADCHROMA(3);
-    PUTRGB24(dst_2, py_2, 3);
-    PUTRGB24(dst_1, py_1, 3);
-ENDYUV2RGBLINE(24, 0)
-    LOADCHROMA(0);
-    PUTRGB24(dst_1, py_1, 0);
-    PUTRGB24(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTRGB24(dst_2, py_2, 1);
-    PUTRGB24(dst_1, py_1, 1);
-ENDYUV2RGBLINE(24, 1)
-    LOADCHROMA(0);
-    PUTRGB24(dst_1, py_1, 0);
-    PUTRGB24(dst_2, py_2, 0);
-ENDYUV2RGBFUNC()
-
-// only trivial mods from yuv2rgb_c_24_rgb
-YUV2RGBFUNC(yuv2rgb_c_24_bgr, uint8_t, 0)
-    LOADCHROMA(0);
-    PUTBGR24(dst_1, py_1, 0);
-    PUTBGR24(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTBGR24(dst_2, py_2, 1);
-    PUTBGR24(dst_1, py_1, 1);
-
-    LOADCHROMA(2);
-    PUTBGR24(dst_1, py_1, 2);
-    PUTBGR24(dst_2, py_2, 2);
-
-    LOADCHROMA(3);
-    PUTBGR24(dst_2, py_2, 3);
-    PUTBGR24(dst_1, py_1, 3);
-ENDYUV2RGBLINE(24, 0)
-    LOADCHROMA(0);
-    PUTBGR24(dst_1, py_1, 0);
-    PUTBGR24(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTBGR24(dst_2, py_2, 1);
-    PUTBGR24(dst_1, py_1, 1);
-ENDYUV2RGBLINE(24, 1)
-    LOADCHROMA(0);
-    PUTBGR24(dst_1, py_1, 0);
-    PUTBGR24(dst_2, py_2, 0);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_16_ordered_dither, uint16_t, 0)
-    const uint8_t *d16 = ff_dither_2x2_8[y & 1];
-    const uint8_t *e16 = ff_dither_2x2_4[y & 1];
-    const uint8_t *f16 = ff_dither_2x2_8[(y & 1)^1];
-
-#define PUTRGB16(dst, src, i, o)                    \
-    Y              = src[2 * i];                    \
-    dst[2 * i]     = r[Y + d16[0 + o]] +            \
-                     g[Y + e16[0 + o]] +            \
-                     b[Y + f16[0 + o]];             \
-    Y              = src[2 * i + 1];                \
-    dst[2 * i + 1] = r[Y + d16[1 + o]] +            \
-                     g[Y + e16[1 + o]] +            \
-                     b[Y + f16[1 + o]];
-    LOADCHROMA(0);
-    PUTRGB16(dst_1, py_1, 0, 0);
-    PUTRGB16(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB16(dst_2, py_2, 1, 2 + 8);
-    PUTRGB16(dst_1, py_1, 1, 2);
-
-    LOADCHROMA(2);
-    PUTRGB16(dst_1, py_1, 2, 4);
-    PUTRGB16(dst_2, py_2, 2, 4 + 8);
-
-    LOADCHROMA(3);
-    PUTRGB16(dst_2, py_2, 3, 6 + 8);
-    PUTRGB16(dst_1, py_1, 3, 6);
-CLOSEYUV2RGBFUNC(8)
-
-YUV2RGBFUNC(yuv2rgb_c_15_ordered_dither, uint16_t, 0)
-    const uint8_t *d16 = ff_dither_2x2_8[y & 1];
-    const uint8_t *e16 = ff_dither_2x2_8[(y & 1)^1];
-
-#define PUTRGB15(dst, src, i, o)                    \
-    Y              = src[2 * i];                    \
-    dst[2 * i]     = r[Y + d16[0 + o]] +            \
-                     g[Y + d16[1 + o]] +            \
-                     b[Y + e16[0 + o]];             \
-    Y              = src[2 * i + 1];                \
-    dst[2 * i + 1] = r[Y + d16[1 + o]] +            \
-                     g[Y + d16[0 + o]] +            \
-                     b[Y + e16[1 + o]];
-    LOADCHROMA(0);
-    PUTRGB15(dst_1, py_1, 0, 0);
-    PUTRGB15(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB15(dst_2, py_2, 1, 2 + 8);
-    PUTRGB15(dst_1, py_1, 1, 2);
-
-    LOADCHROMA(2);
-    PUTRGB15(dst_1, py_1, 2, 4);
-    PUTRGB15(dst_2, py_2, 2, 4 + 8);
-
-    LOADCHROMA(3);
-    PUTRGB15(dst_2, py_2, 3, 6 + 8);
-    PUTRGB15(dst_1, py_1, 3, 6);
-CLOSEYUV2RGBFUNC(8)
-
-// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0)
-    const uint8_t *d16 = ff_dither_4x4_16[y & 3];
-
-#define PUTRGB12(dst, src, i, o)                    \
-    Y              = src[2 * i];                    \
-    dst[2 * i]     = r[Y + d16[0 + o]] +            \
-                     g[Y + d16[0 + o]] +            \
-                     b[Y + d16[0 + o]];             \
-    Y              = src[2 * i + 1];                \
-    dst[2 * i + 1] = r[Y + d16[1 + o]] +            \
-                     g[Y + d16[1 + o]] +            \
-                     b[Y + d16[1 + o]];
-
-    LOADCHROMA(0);
-    PUTRGB12(dst_1, py_1, 0, 0);
-    PUTRGB12(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB12(dst_2, py_2, 1, 2 + 8);
-    PUTRGB12(dst_1, py_1, 1, 2);
-
-    LOADCHROMA(2);
-    PUTRGB12(dst_1, py_1, 2, 4);
-    PUTRGB12(dst_2, py_2, 2, 4 + 8);
-
-    LOADCHROMA(3);
-    PUTRGB12(dst_2, py_2, 3, 6 + 8);
-    PUTRGB12(dst_1, py_1, 3, 6);
-CLOSEYUV2RGBFUNC(8)
-
-// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0)
-    const uint8_t *d32 = ff_dither_8x8_32[y & 7];
-    const uint8_t *d64 = ff_dither_8x8_73[y & 7];
-
-#define PUTRGB8(dst, src, i, o)                     \
-    Y              = src[2 * i];                    \
-    dst[2 * i]     = r[Y + d32[0 + o]] +            \
-                     g[Y + d32[0 + o]] +            \
-                     b[Y + d64[0 + o]];             \
-    Y              = src[2 * i + 1];                \
-    dst[2 * i + 1] = r[Y + d32[1 + o]] +            \
-                     g[Y + d32[1 + o]] +            \
-                     b[Y + d64[1 + o]];
-
-    LOADCHROMA(0);
-    PUTRGB8(dst_1, py_1, 0, 0);
-    PUTRGB8(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB8(dst_2, py_2, 1, 2 + 8);
-    PUTRGB8(dst_1, py_1, 1, 2);
-
-    LOADCHROMA(2);
-    PUTRGB8(dst_1, py_1, 2, 4);
-    PUTRGB8(dst_2, py_2, 2, 4 + 8);
-
-    LOADCHROMA(3);
-    PUTRGB8(dst_2, py_2, 3, 6 + 8);
-    PUTRGB8(dst_1, py_1, 3, 6);
-CLOSEYUV2RGBFUNC(8)
-
-YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0)
-    const uint8_t * d64 = ff_dither_8x8_73[y & 7];
-    const uint8_t *d128 = ff_dither_8x8_220[y & 7];
-    int acc;
-
-#define PUTRGB4D(dst, src, i, o)                    \
-    Y      = src[2 * i];                            \
-    acc    = r[Y + d128[0 + o]] +                   \
-             g[Y +  d64[0 + o]] +                   \
-             b[Y + d128[0 + o]];                    \
-    Y      = src[2 * i + 1];                        \
-    acc   |= (r[Y + d128[1 + o]] +                  \
-              g[Y +  d64[1 + o]] +                  \
-              b[Y + d128[1 + o]]) << 4;             \
-    dst[i] = acc;
-
-    LOADCHROMA(0);
-    PUTRGB4D(dst_1, py_1, 0, 0);
-    PUTRGB4D(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB4D(dst_2, py_2, 1, 2 + 8);
-    PUTRGB4D(dst_1, py_1, 1, 2);
-
-    LOADCHROMA(2);
-    PUTRGB4D(dst_1, py_1, 2, 4);
-    PUTRGB4D(dst_2, py_2, 2, 4 + 8);
-
-    LOADCHROMA(3);
-    PUTRGB4D(dst_2, py_2, 3, 6 + 8);
-    PUTRGB4D(dst_1, py_1, 3, 6);
-CLOSEYUV2RGBFUNC(4)
-
-YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0)
-    const uint8_t *d64  = ff_dither_8x8_73[y & 7];
-    const uint8_t *d128 = ff_dither_8x8_220[y & 7];
-
-#define PUTRGB4DB(dst, src, i, o)                   \
-    Y              = src[2 * i];                    \
-    dst[2 * i]     = r[Y + d128[0 + o]] +           \
-                     g[Y +  d64[0 + o]] +           \
-                     b[Y + d128[0 + o]];            \
-    Y              = src[2 * i + 1];                \
-    dst[2 * i + 1] = r[Y + d128[1 + o]] +           \
-                     g[Y +  d64[1 + o]] +           \
-                     b[Y + d128[1 + o]];
-
-    LOADCHROMA(0);
-    PUTRGB4DB(dst_1, py_1, 0, 0);
-    PUTRGB4DB(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB4DB(dst_2, py_2, 1, 2 + 8);
-    PUTRGB4DB(dst_1, py_1, 1, 2);
-
-    LOADCHROMA(2);
-    PUTRGB4DB(dst_1, py_1, 2, 4);
-    PUTRGB4DB(dst_2, py_2, 2, 4 + 8);
-
-    LOADCHROMA(3);
-    PUTRGB4DB(dst_2, py_2, 3, 6 + 8);
-    PUTRGB4DB(dst_1, py_1, 3, 6);
-CLOSEYUV2RGBFUNC(8)
-
-YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0)
-    const uint8_t *d128 = ff_dither_8x8_220[y & 7];
-    char out_1 = 0, out_2 = 0;
-    g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM];
-
-#define PUTRGB1(out, src, i, o)                     \
-    Y    = src[2 * i];                              \
-    out += out + g[Y + d128[0 + o]];                \
-    Y    = src[2 * i + 1];                          \
-    out += out + g[Y + d128[1 + o]];
-
-    PUTRGB1(out_1, py_1, 0, 0);
-    PUTRGB1(out_2, py_2, 0, 0 + 8);
-
-    PUTRGB1(out_2, py_2, 1, 2 + 8);
-    PUTRGB1(out_1, py_1, 1, 2);
-
-    PUTRGB1(out_1, py_1, 2, 4);
-    PUTRGB1(out_2, py_2, 2, 4 + 8);
-
-    PUTRGB1(out_2, py_2, 3, 6 + 8);
-    PUTRGB1(out_1, py_1, 3, 6);
-
-    dst_1[0] = out_1;
-    dst_2[0] = out_2;
-CLOSEYUV2RGBFUNC(1)
-
-SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
-{
-    SwsFunc t = NULL;
-
-    if (ARCH_BFIN)
-        t = ff_yuv2rgb_init_bfin(c);
-    if (ARCH_PPC)
-        t = ff_yuv2rgb_init_ppc(c);
-    if (HAVE_VIS)
-        t = ff_yuv2rgb_init_vis(c);
-    if (ARCH_X86)
-        t = ff_yuv2rgb_init_x86(c);
-
-    if (t)
-        return t;
-
-    av_log(c, AV_LOG_WARNING,
-           "No accelerated colorspace conversion found from %s to %s.\n",
-           av_get_pix_fmt_name(c->srcFormat), av_get_pix_fmt_name(c->dstFormat));
-
-    switch (c->dstFormat) {
-    case AV_PIX_FMT_BGR48BE:
-    case AV_PIX_FMT_BGR48LE:
-        return yuv2rgb_c_bgr48;
-    case AV_PIX_FMT_RGB48BE:
-    case AV_PIX_FMT_RGB48LE:
-        return yuv2rgb_c_48;
-    case AV_PIX_FMT_ARGB:
-    case AV_PIX_FMT_ABGR:
-        if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat))
-            return yuva2argb_c;
-    case AV_PIX_FMT_RGBA:
-    case AV_PIX_FMT_BGRA:
-        return (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) ? yuva2rgba_c : yuv2rgb_c_32;
-    case AV_PIX_FMT_RGB24:
-        return yuv2rgb_c_24_rgb;
-    case AV_PIX_FMT_BGR24:
-        return yuv2rgb_c_24_bgr;
-    case AV_PIX_FMT_RGB565:
-    case AV_PIX_FMT_BGR565:
-        return yuv2rgb_c_16_ordered_dither;
-    case AV_PIX_FMT_RGB555:
-    case AV_PIX_FMT_BGR555:
-        return yuv2rgb_c_15_ordered_dither;
-    case AV_PIX_FMT_RGB444:
-    case AV_PIX_FMT_BGR444:
-        return yuv2rgb_c_12_ordered_dither;
-    case AV_PIX_FMT_RGB8:
-    case AV_PIX_FMT_BGR8:
-        return yuv2rgb_c_8_ordered_dither;
-    case AV_PIX_FMT_RGB4:
-    case AV_PIX_FMT_BGR4:
-        return yuv2rgb_c_4_ordered_dither;
-    case AV_PIX_FMT_RGB4_BYTE:
-    case AV_PIX_FMT_BGR4_BYTE:
-        return yuv2rgb_c_4b_ordered_dither;
-    case AV_PIX_FMT_MONOBLACK:
-        return yuv2rgb_c_1_ordered_dither;
-    }
-    return NULL;
-}
-
-static void fill_table(uint8_t* table[256 + 2*YUVRGB_TABLE_HEADROOM], const int elemsize,
-                       const int64_t inc, void *y_tab)
-{
-    int i;
-    uint8_t *y_table = y_tab;
-
-    y_table -= elemsize * (inc >> 9);
-
-    for (i = 0; i < 256 + 2*YUVRGB_TABLE_HEADROOM; i++) {
-        int64_t cb = av_clip(i-YUVRGB_TABLE_HEADROOM, 0, 255)*inc;
-        table[i] = y_table + elemsize * (cb >> 16);
-    }
-}
-
-static void fill_gv_table(int table[256 + 2*YUVRGB_TABLE_HEADROOM], const int elemsize, const int64_t inc)
-{
-    int i;
-    int off    = -(inc >> 9);
-
-    for (i = 0; i < 256 + 2*YUVRGB_TABLE_HEADROOM; i++) {
-        int64_t cb = av_clip(i-YUVRGB_TABLE_HEADROOM, 0, 255)*inc;
-        table[i] = elemsize * (off + (cb >> 16));
-    }
-}
-
-static uint16_t roundToInt16(int64_t f)
-{
-    int r = (f + (1 << 15)) >> 16;
-
-    if (r < -0x7FFF)
-        return 0x8000;
-    else if (r > 0x7FFF)
-        return 0x7FFF;
-    else
-        return r;
-}
-
-av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
-                                     int fullRange, int brightness,
-                                     int contrast, int saturation)
-{
-    const int isRgb = c->dstFormat == AV_PIX_FMT_RGB32     ||
-                      c->dstFormat == AV_PIX_FMT_RGB32_1   ||
-                      c->dstFormat == AV_PIX_FMT_BGR24     ||
-                      c->dstFormat == AV_PIX_FMT_RGB565BE  ||
-                      c->dstFormat == AV_PIX_FMT_RGB565LE  ||
-                      c->dstFormat == AV_PIX_FMT_RGB555BE  ||
-                      c->dstFormat == AV_PIX_FMT_RGB555LE  ||
-                      c->dstFormat == AV_PIX_FMT_RGB444BE  ||
-                      c->dstFormat == AV_PIX_FMT_RGB444LE  ||
-                      c->dstFormat == AV_PIX_FMT_RGB8      ||
-                      c->dstFormat == AV_PIX_FMT_RGB4      ||
-                      c->dstFormat == AV_PIX_FMT_RGB4_BYTE ||
-                      c->dstFormat == AV_PIX_FMT_MONOBLACK;
-    const int isNotNe = c->dstFormat == AV_PIX_FMT_NE(RGB565LE, RGB565BE) ||
-                        c->dstFormat == AV_PIX_FMT_NE(RGB555LE, RGB555BE) ||
-                        c->dstFormat == AV_PIX_FMT_NE(RGB444LE, RGB444BE) ||
-                        c->dstFormat == AV_PIX_FMT_NE(BGR565LE, BGR565BE) ||
-                        c->dstFormat == AV_PIX_FMT_NE(BGR555LE, BGR555BE) ||
-                        c->dstFormat == AV_PIX_FMT_NE(BGR444LE, BGR444BE);
-    const int bpp = c->dstFormatBpp;
-    uint8_t *y_table;
-    uint16_t *y_table16;
-    uint32_t *y_table32;
-    int i, base, rbase, gbase, bbase, av_uninit(abase), needAlpha;
-    const int yoffs = fullRange ? 384 : 326;
-
-    int64_t crv =  inv_table[0];
-    int64_t cbu =  inv_table[1];
-    int64_t cgu = -inv_table[2];
-    int64_t cgv = -inv_table[3];
-    int64_t cy  = 1 << 16;
-    int64_t oy  = 0;
-    int64_t yb  = 0;
-
-    if (!fullRange) {
-        cy = (cy * 255) / 219;
-        oy = 16 << 16;
-    } else {
-        crv = (crv * 224) / 255;
-        cbu = (cbu * 224) / 255;
-        cgu = (cgu * 224) / 255;
-        cgv = (cgv * 224) / 255;
-    }
-
-    cy   = (cy  * contrast)              >> 16;
-    crv  = (crv * contrast * saturation) >> 32;
-    cbu  = (cbu * contrast * saturation) >> 32;
-    cgu  = (cgu * contrast * saturation) >> 32;
-    cgv  = (cgv * contrast * saturation) >> 32;
-    oy  -= 256 * brightness;
-
-    c->uOffset = 0x0400040004000400LL;
-    c->vOffset = 0x0400040004000400LL;
-    c->yCoeff  = roundToInt16(cy  * 8192) * 0x0001000100010001ULL;
-    c->vrCoeff = roundToInt16(crv * 8192) * 0x0001000100010001ULL;
-    c->ubCoeff = roundToInt16(cbu * 8192) * 0x0001000100010001ULL;
-    c->vgCoeff = roundToInt16(cgv * 8192) * 0x0001000100010001ULL;
-    c->ugCoeff = roundToInt16(cgu * 8192) * 0x0001000100010001ULL;
-    c->yOffset = roundToInt16(oy  *    8) * 0x0001000100010001ULL;
-
-    c->yuv2rgb_y_coeff   = (int16_t)roundToInt16(cy  << 13);
-    c->yuv2rgb_y_offset  = (int16_t)roundToInt16(oy  <<  9);
-    c->yuv2rgb_v2r_coeff = (int16_t)roundToInt16(crv << 13);
-    c->yuv2rgb_v2g_coeff = (int16_t)roundToInt16(cgv << 13);
-    c->yuv2rgb_u2g_coeff = (int16_t)roundToInt16(cgu << 13);
-    c->yuv2rgb_u2b_coeff = (int16_t)roundToInt16(cbu << 13);
-
-    //scale coefficients by cy
-    crv = ((crv << 16) + 0x8000) / FFMAX(cy, 1);
-    cbu = ((cbu << 16) + 0x8000) / FFMAX(cy, 1);
-    cgu = ((cgu << 16) + 0x8000) / FFMAX(cy, 1);
-    cgv = ((cgv << 16) + 0x8000) / FFMAX(cy, 1);
-
-    av_freep(&c->yuvTable);
-
-    switch (bpp) {
-    case 1:
-        c->yuvTable = av_malloc(1024);
-        y_table     = c->yuvTable;
-        yb = -(384 << 16) - oy;
-        for (i = 0; i < 1024 - 110; i++) {
-            y_table[i + 110]  = av_clip_uint8((yb + 0x8000) >> 16) >> 7;
-            yb               += cy;
-        }
-        fill_table(c->table_gU, 1, cgu, y_table + yoffs);
-        fill_gv_table(c->table_gV, 1, cgv);
-        break;
-    case 4:
-    case 4 | 128:
-        rbase       = isRgb ? 3 : 0;
-        gbase       = 1;
-        bbase       = isRgb ? 0 : 3;
-        c->yuvTable = av_malloc(1024 * 3);
-        y_table     = c->yuvTable;
-        yb = -(384 << 16) - oy;
-        for (i = 0; i < 1024 - 110; i++) {
-            int yval                = av_clip_uint8((yb + 0x8000) >> 16);
-            y_table[i + 110]        = (yval >> 7)        << rbase;
-            y_table[i +  37 + 1024] = ((yval + 43) / 85) << gbase;
-            y_table[i + 110 + 2048] = (yval >> 7)        << bbase;
-            yb += cy;
-        }
-        fill_table(c->table_rV, 1, crv, y_table + yoffs);
-        fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024);
-        fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048);
-        fill_gv_table(c->table_gV, 1, cgv);
-        break;
-    case 8:
-        rbase       = isRgb ? 5 : 0;
-        gbase       = isRgb ? 2 : 3;
-        bbase       = isRgb ? 0 : 6;
-        c->yuvTable = av_malloc(1024 * 3);
-        y_table     = c->yuvTable;
-        yb = -(384 << 16) - oy;
-        for (i = 0; i < 1024 - 38; i++) {
-            int yval               = av_clip_uint8((yb + 0x8000) >> 16);
-            y_table[i + 16]        = ((yval + 18) / 36) << rbase;
-            y_table[i + 16 + 1024] = ((yval + 18) / 36) << gbase;
-            y_table[i + 37 + 2048] = ((yval + 43) / 85) << bbase;
-            yb += cy;
-        }
-        fill_table(c->table_rV, 1, crv, y_table + yoffs);
-        fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024);
-        fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048);
-        fill_gv_table(c->table_gV, 1, cgv);
-        break;
-    case 12:
-        rbase       = isRgb ? 8 : 0;
-        gbase       = 4;
-        bbase       = isRgb ? 0 : 8;
-        c->yuvTable = av_malloc(1024 * 3 * 2);
-        y_table16   = c->yuvTable;
-        yb = -(384 << 16) - oy;
-        for (i = 0; i < 1024; i++) {
-            uint8_t yval        = av_clip_uint8((yb + 0x8000) >> 16);
-            y_table16[i]        = (yval >> 4) << rbase;
-            y_table16[i + 1024] = (yval >> 4) << gbase;
-            y_table16[i + 2048] = (yval >> 4) << bbase;
-            yb += cy;
-        }
-        if (isNotNe)
-            for (i = 0; i < 1024 * 3; i++)
-                y_table16[i] = av_bswap16(y_table16[i]);
-        fill_table(c->table_rV, 2, crv, y_table16 + yoffs);
-        fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + 1024);
-        fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2048);
-        fill_gv_table(c->table_gV, 2, cgv);
-        break;
-    case 15:
-    case 16:
-        rbase       = isRgb ? bpp - 5 : 0;
-        gbase       = 5;
-        bbase       = isRgb ? 0 : (bpp - 5);
-        c->yuvTable = av_malloc(1024 * 3 * 2);
-        y_table16   = c->yuvTable;
-        yb = -(384 << 16) - oy;
-        for (i = 0; i < 1024; i++) {
-            uint8_t yval        = av_clip_uint8((yb + 0x8000) >> 16);
-            y_table16[i]        = (yval >> 3)          << rbase;
-            y_table16[i + 1024] = (yval >> (18 - bpp)) << gbase;
-            y_table16[i + 2048] = (yval >> 3)          << bbase;
-            yb += cy;
-        }
-        if (isNotNe)
-            for (i = 0; i < 1024 * 3; i++)
-                y_table16[i] = av_bswap16(y_table16[i]);
-        fill_table(c->table_rV, 2, crv, y_table16 + yoffs);
-        fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + 1024);
-        fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2048);
-        fill_gv_table(c->table_gV, 2, cgv);
-        break;
-    case 24:
-    case 48:
-        c->yuvTable = av_malloc(1024);
-        y_table     = c->yuvTable;
-        yb = -(384 << 16) - oy;
-        for (i = 0; i < 1024; i++) {
-            y_table[i]  = av_clip_uint8((yb + 0x8000) >> 16);
-            yb         += cy;
-        }
-        fill_table(c->table_rV, 1, crv, y_table + yoffs);
-        fill_table(c->table_gU, 1, cgu, y_table + yoffs);
-        fill_table(c->table_bU, 1, cbu, y_table + yoffs);
-        fill_gv_table(c->table_gV, 1, cgv);
-        break;
-    case 32:
-    case 64:
-        base      = (c->dstFormat == AV_PIX_FMT_RGB32_1 ||
-                     c->dstFormat == AV_PIX_FMT_BGR32_1) ? 8 : 0;
-        rbase     = base + (isRgb ? 16 : 0);
-        gbase     = base + 8;
-        bbase     = base + (isRgb ? 0 : 16);
-        needAlpha = CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat);
-        if (!needAlpha)
-            abase = (base + 24) & 31;
-        c->yuvTable = av_malloc(1024 * 3 * 4);
-        y_table32   = c->yuvTable;
-        yb = -(384 << 16) - oy;
-        for (i = 0; i < 1024; i++) {
-            unsigned yval       = av_clip_uint8((yb + 0x8000) >> 16);
-            y_table32[i]        = (yval << rbase) +
-                                  (needAlpha ? 0 : (255u << abase));
-            y_table32[i + 1024] =  yval << gbase;
-            y_table32[i + 2048] =  yval << bbase;
-            yb += cy;
-        }
-        fill_table(c->table_rV, 4, crv, y_table32 + yoffs);
-        fill_table(c->table_gU, 4, cgu, y_table32 + yoffs + 1024);
-        fill_table(c->table_bU, 4, cbu, y_table32 + yoffs + 2048);
-        fill_gv_table(c->table_gV, 4, cgv);
-        break;
-    default:
-        if(!isPlanar(c->dstFormat) || bpp <= 24)
-            av_log(c, AV_LOG_ERROR, "%ibpp not supported by yuv2rgb\n", bpp);
-        return -1;
-    }
-    return 0;
-}
author	Tim Redfern <tim@eclectronics.org>	2014-02-17 13:36:38 +0000
committer	Tim Redfern <tim@eclectronics.org>	2014-02-17 13:36:38 +0000
commit	22e28216336da876e1fd17f380ce42eaf1446769 (patch)
tree	444dad3dc7e2656992d29f34f7bce31970c122a5 /ffmpeg/libswscale
parent	ae5e8541f6e06e64c28719467cdf366ac57aff31 (diff)