41 files changed, 610 insertions, 578 deletions
diff --git a/ffmpeg/libavcodec/arm/Makefile b/ffmpeg/libavcodec/arm/Makefile
index 011404c..277abd9 100644
--- a/ffmpeg/libavcodec/arm/Makefile
+++ b/ffmpeg/libavcodec/arm/Makefile
@@ -1,116 +1,98 @@
 ARCH_HEADERS = mathops.h
 
+OBJS                                   += arm/fmtconvert_init_arm.o
+
+OBJS-$(CONFIG_AAC_DECODER)             += arm/aacpsdsp_init_arm.o       \
+                                          arm/sbrdsp_init_arm.o
 OBJS-$(CONFIG_AC3DSP)                  += arm/ac3dsp_init_arm.o         \
                                           arm/ac3dsp_arm.o
-
-OBJS-$(CONFIG_AAC_DECODER)             += arm/sbrdsp_init_arm.o         \
-                                          arm/aacpsdsp_init_arm.o
-
-OBJS-$(CONFIG_DCA_DECODER)             += arm/dcadsp_init_arm.o         \
-
-ARMV6-OBJS-$(CONFIG_AC3DSP)            += arm/ac3dsp_armv6.o
-
+OBJS-$(CONFIG_DCA_DECODER)             += arm/dcadsp_init_arm.o
+OBJS-$(CONFIG_DSPUTIL)                 += arm/dsputil_init_arm.o        \
+                                          arm/dsputil_arm.o             \
+                                          arm/jrevdct_arm.o             \
+                                          arm/simple_idct_arm.o
+OBJS-$(CONFIG_FFT)                     += arm/fft_init_arm.o            \
+                                          arm/fft_fixed_init_arm.o
 OBJS-$(CONFIG_FLAC_DECODER)            += arm/flacdsp_init_arm.o        \
-                                          arm/flacdsp_arm.o             \
-
+                                          arm/flacdsp_arm.o
+OBJS-$(CONFIG_H264CHROMA)              += arm/h264chroma_init_arm.o
+OBJS-$(CONFIG_H264DSP)                 += arm/h264dsp_init_arm.o
+OBJS-$(CONFIG_H264PRED)                += arm/h264pred_init_arm.o
+OBJS-$(CONFIG_H264QPEL)                += arm/h264qpel_init_arm.o
+OBJS-$(CONFIG_HPELDSP)                 += arm/hpeldsp_init_arm.o        \
+                                          arm/hpeldsp_arm.o
 OBJS-$(CONFIG_MPEGAUDIODSP)            += arm/mpegaudiodsp_init_arm.o
-ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP)      += arm/mpegaudiodsp_fixed_armv6.o
-
 OBJS-$(CONFIG_MPEGVIDEO)               += arm/mpegvideo_arm.o
+OBJS-$(CONFIG_VC1_DECODER)             += arm/vc1dsp_init_arm.o
 OBJS-$(CONFIG_VORBIS_DECODER)          += arm/vorbisdsp_init_arm.o
 OBJS-$(CONFIG_VP3DSP)                  += arm/vp3dsp_init_arm.o
-OBJS-$(CONFIG_VP5_DECODER)             += arm/vp56dsp_init_arm.o
-OBJS-$(CONFIG_VP6_DECODER)             += arm/vp56dsp_init_arm.o
+OBJS-$(CONFIG_VP6_DECODER)             += arm/vp6dsp_init_arm.o
 OBJS-$(CONFIG_VP8_DECODER)             += arm/vp8dsp_init_arm.o
-ARMV6-OBJS-$(CONFIG_VP8_DECODER)       += arm/vp8_armv6.o               \
-                                          arm/vp8dsp_init_armv6.o       \
-                                          arm/vp8dsp_armv6.o
-
-OBJS-$(CONFIG_H264CHROMA)              += arm/h264chroma_init_arm.o
-OBJS-$(CONFIG_H264DSP)                 += arm/h264dsp_init_arm.o
-OBJS-$(CONFIG_H264PRED)                += arm/h264pred_init_arm.o
-OBJS-$(CONFIG_H264QPEL)                += arm/h264qpel_init_arm.o
-
-OBJS-$(CONFIG_HPELDSP)                 += arm/hpeldsp_arm.o             \
-                                          arm/hpeldsp_init_arm.o
-
 OBJS-$(CONFIG_RV30_DECODER)            += arm/rv34dsp_init_arm.o
 OBJS-$(CONFIG_RV40_DECODER)            += arm/rv34dsp_init_arm.o        \
-                                          arm/rv40dsp_init_arm.o        \
-
+                                          arm/rv40dsp_init_arm.o
 OBJS-$(CONFIG_VIDEODSP)                += arm/videodsp_init_arm.o       \
 
-OBJS                                   += arm/dsputil_init_arm.o        \
-                                          arm/dsputil_arm.o             \
-                                          arm/fft_init_arm.o            \
-                                          arm/fft_fixed_init_arm.o      \
-                                          arm/fmtconvert_init_arm.o     \
-                                          arm/jrevdct_arm.o             \
-                                          arm/simple_idct_arm.o         \
-
+ARMV5TE-OBJS-$(CONFIG_DSPUTIL)         += arm/dsputil_init_armv5te.o    \
+                                          arm/simple_idct_armv5te.o
 ARMV5TE-OBJS-$(CONFIG_MPEGVIDEO)       += arm/mpegvideo_armv5te.o       \
-                                          arm/mpegvideo_armv5te_s.o     \
-
+                                          arm/mpegvideo_armv5te_s.o
 ARMV5TE-OBJS-$(CONFIG_VIDEODSP)        += arm/videodsp_init_armv5te.o   \
-                                          arm/videodsp_armv5te.o        \
-
-ARMV5TE-OBJS                           += arm/dsputil_init_armv5te.o    \
-                                          arm/simple_idct_armv5te.o     \
+                                          arm/videodsp_armv5te.o
 
-ARMV6-OBJS                             += arm/dsputil_init_armv6.o      \
+ARMV6-OBJS-$(CONFIG_DSPUTIL)           += arm/dsputil_init_armv6.o      \
                                           arm/dsputil_armv6.o           \
                                           arm/simple_idct_armv6.o       \
 
-ARMV6-OBJS-$(CONFIG_HPELDSP)           += arm/hpeldsp_armv6.o           \
-                                          arm/hpeldsp_init_armv6.o
-
-VFP-OBJS-$(HAVE_ARMV6)                 += arm/fmtconvert_vfp.o
+ARMV6-OBJS-$(CONFIG_AC3DSP)            += arm/ac3dsp_armv6.o
+ARMV6-OBJS-$(CONFIG_H264DSP)           += arm/h264dsp_armv6.o
+ARMV6-OBJS-$(CONFIG_HPELDSP)           += arm/hpeldsp_init_armv6.o      \
+                                          arm/hpeldsp_armv6.o
+ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP)      += arm/mpegaudiodsp_fixed_armv6.o
+ARMV6-OBJS-$(CONFIG_VP8_DECODER)       += arm/vp8_armv6.o               \
+                                          arm/vp8dsp_init_armv6.o       \
+                                          arm/vp8dsp_armv6.o
 
-NEON-OBJS-$(CONFIG_FFT)                += arm/fft_neon.o                \
-                                          arm/fft_fixed_neon.o          \
+VFP-OBJS                               += arm/fmtconvert_vfp.o
 
-NEON-OBJS-$(CONFIG_MDCT)               += arm/mdct_neon.o               \
-                                          arm/mdct_fixed_neon.o         \
+VFP-OBJS-$(CONFIG_DCA_DECODER)         += arm/dcadsp_vfp.o              \
+                                          arm/synth_filter_vfp.o
+VFP-OBJS-$(CONFIG_FFT)                 += arm/fft_vfp.o
+VFP-OBJS-$(CONFIG_MDCT)                += arm/mdct_vfp.o
+VFP-OBJS-$(HAVE_ARMV6)                 += arm/fmtconvert_vfp_armv6.o
 
-NEON-OBJS-$(CONFIG_RDFT)               += arm/rdft_neon.o               \
+NEON-OBJS                              += arm/fmtconvert_neon.o
 
+NEON-OBJS-$(CONFIG_AC3DSP)             += arm/ac3dsp_neon.o
+NEON-OBJS-$(CONFIG_AAC_DECODER)        += arm/aacpsdsp_neon.o           \
+                                          arm/sbrdsp_neon.o
+NEON-OBJS-$(CONFIG_DCA_DECODER)        += arm/dcadsp_neon.o             \
+                                          arm/synth_filter_neon.o
+NEON-OBJS-$(CONFIG_DSPUTIL)            += arm/dsputil_init_neon.o       \
+                                          arm/dsputil_neon.o            \
+                                          arm/int_neon.o                \
+                                          arm/simple_idct_neon.o
+NEON-OBJS-$(CONFIG_FFT)                += arm/fft_neon.o                \
+                                          arm/fft_fixed_neon.o
 NEON-OBJS-$(CONFIG_H264CHROMA)         += arm/h264cmc_neon.o
 NEON-OBJS-$(CONFIG_H264DSP)            += arm/h264dsp_neon.o            \
-                                          arm/h264idct_neon.o           \
-
-NEON-OBJS-$(CONFIG_H264PRED)           += arm/h264pred_neon.o           \
-
+                                          arm/h264idct_neon.o
+NEON-OBJS-$(CONFIG_H264PRED)           += arm/h264pred_neon.o
 NEON-OBJS-$(CONFIG_H264QPEL)           += arm/h264qpel_neon.o           \
-
-NEON-OBJS-$(CONFIG_HPELDSP)            += arm/hpeldsp_neon.o            \
-                                          arm/hpeldsp_init_neon.o
-
-NEON-OBJS-$(CONFIG_AC3DSP)             += arm/ac3dsp_neon.o
-
-NEON-OBJS-$(CONFIG_AAC_DECODER)        += arm/sbrdsp_neon.o             \
-                                          arm/aacpsdsp_neon.o
-
-NEON-OBJS-$(CONFIG_DCA_DECODER)        += arm/dcadsp_neon.o             \
-                                          arm/synth_filter_neon.o       \
-
+                                          arm/hpeldsp_neon.o
+NEON-OBJS-$(CONFIG_HPELDSP)            += arm/hpeldsp_init_neon.o       \
+                                          arm/hpeldsp_neon.o
+NEON-OBJS-$(CONFIG_MDCT)               += arm/mdct_neon.o               \
+                                          arm/mdct_fixed_neon.o
 NEON-OBJS-$(CONFIG_MPEGVIDEO)          += arm/mpegvideo_neon.o
+NEON-OBJS-$(CONFIG_RDFT)               += arm/rdft_neon.o
 NEON-OBJS-$(CONFIG_RV30_DECODER)       += arm/rv34dsp_neon.o
 NEON-OBJS-$(CONFIG_RV40_DECODER)       += arm/rv34dsp_neon.o            \
-                                          arm/rv40dsp_neon.o            \
-
+                                          arm/rv40dsp_neon.o
+NEON-OBJS-$(CONFIG_VC1_DECODER)        += arm/vc1dsp_init_neon.o        \
+                                          arm/vc1dsp_neon.o
 NEON-OBJS-$(CONFIG_VORBIS_DECODER)     += arm/vorbisdsp_neon.o
-
 NEON-OBJS-$(CONFIG_VP3DSP)             += arm/vp3dsp_neon.o
-
-NEON-OBJS-$(CONFIG_VP5_DECODER)        += arm/vp56dsp_neon.o            \
-
-NEON-OBJS-$(CONFIG_VP6_DECODER)        += arm/vp56dsp_neon.o            \
-
+NEON-OBJS-$(CONFIG_VP6_DECODER)        += arm/vp6dsp_neon.o
 NEON-OBJS-$(CONFIG_VP8_DECODER)        += arm/vp8dsp_init_neon.o        \
                                           arm/vp8dsp_neon.o
-
-NEON-OBJS                              += arm/dsputil_init_neon.o       \
-                                          arm/dsputil_neon.o            \
-                                          arm/fmtconvert_neon.o         \
-                                          arm/int_neon.o                \
-                                          arm/simple_idct_neon.o        \
diff --git a/ffmpeg/libavcodec/arm/aacpsdsp_init_arm.c b/ffmpeg/libavcodec/arm/aacpsdsp_init_arm.c
index 6326376..e04787c 100644
--- a/ffmpeg/libavcodec/arm/aacpsdsp_init_arm.c
+++ b/ffmpeg/libavcodec/arm/aacpsdsp_init_arm.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2012 Mans Rullgard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/ffmpeg/libavcodec/arm/aacpsdsp_neon.S b/ffmpeg/libavcodec/arm/aacpsdsp_neon.S
index fb00900..a93bbfe 100644
--- a/ffmpeg/libavcodec/arm/aacpsdsp_neon.S
+++ b/ffmpeg/libavcodec/arm/aacpsdsp_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2012 Mans Rullgard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/ffmpeg/libavcodec/arm/ac3dsp_arm.S b/ffmpeg/libavcodec/arm/ac3dsp_arm.S
index ed8eb37..1aea190 100644
--- a/ffmpeg/libavcodec/arm/ac3dsp_arm.S
+++ b/ffmpeg/libavcodec/arm/ac3dsp_arm.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/ffmpeg/libavcodec/arm/ac3dsp_armv6.S b/ffmpeg/libavcodec/arm/ac3dsp_armv6.S
index 2028d0b..1d2563d 100644
--- a/ffmpeg/libavcodec/arm/ac3dsp_armv6.S
+++ b/ffmpeg/libavcodec/arm/ac3dsp_armv6.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/ffmpeg/libavcodec/arm/ac3dsp_init_arm.c b/ffmpeg/libavcodec/arm/ac3dsp_init_arm.c
index ffe0747..a3c32ff 100644
--- a/ffmpeg/libavcodec/arm/ac3dsp_init_arm.c
+++ b/ffmpeg/libavcodec/arm/ac3dsp_init_arm.c
@@ -31,6 +31,8 @@ void ff_ac3_lshift_int16_neon(int16_t *src, unsigned len, unsigned shift);
 void ff_ac3_rshift_int32_neon(int32_t *src, unsigned len, unsigned shift);
 void ff_float_to_fixed24_neon(int32_t *dst, const float *src, unsigned int len);
 void ff_ac3_extract_exponents_neon(uint8_t *exp, int32_t *coef, int nb_coefs);
+void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src,
+                                const int16_t *window, unsigned n);
 void ff_ac3_sum_square_butterfly_int32_neon(int64_t sum[4],
                                             const int32_t *coef0,
                                             const int32_t *coef1,
@@ -64,6 +66,7 @@ av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact)
         c->ac3_rshift_int32      = ff_ac3_rshift_int32_neon;
         c->float_to_fixed24      = ff_float_to_fixed24_neon;
         c->extract_exponents     = ff_ac3_extract_exponents_neon;
+        c->apply_window_int16    = ff_apply_window_int16_neon;
         c->sum_square_butterfly_int32 = ff_ac3_sum_square_butterfly_int32_neon;
         c->sum_square_butterfly_float = ff_ac3_sum_square_butterfly_float_neon;
     }
diff --git a/ffmpeg/libavcodec/arm/ac3dsp_neon.S b/ffmpeg/libavcodec/arm/ac3dsp_neon.S
index 42f35e3..89d0ae8 100644
--- a/ffmpeg/libavcodec/arm/ac3dsp_neon.S
+++ b/ffmpeg/libavcodec/arm/ac3dsp_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -109,6 +109,29 @@ function ff_ac3_extract_exponents_neon, export=1
         bx              lr
 endfunc
 
+function ff_apply_window_int16_neon, export=1
+        push            {r4,lr}
+        add             r4,  r1,  r3,  lsl #1
+        add             lr,  r0,  r3,  lsl #1
+        sub             r4,  r4,  #16
+        sub             lr,  lr,  #16
+        mov             r12, #-16
+1:
+        vld1.16         {q0},     [r1,:128]!
+        vld1.16         {q2},     [r2,:128]!
+        vld1.16         {q1},     [r4,:128], r12
+        vrev64.16       q3,  q2
+        vqrdmulh.s16    q0,  q0,  q2
+        vqrdmulh.s16    d2,  d2,  d7
+        vqrdmulh.s16    d3,  d3,  d6
+        vst1.16         {q0},     [r0,:128]!
+        vst1.16         {q1},     [lr,:128], r12
+        subs            r3,  r3,  #16
+        bgt             1b
+
+        pop             {r4,pc}
+endfunc
+
 function ff_ac3_sum_square_butterfly_int32_neon, export=1
         vmov.i64        q0,  #0
         vmov.i64        q1,  #0
diff --git a/ffmpeg/libavcodec/arm/dca.h b/ffmpeg/libavcodec/arm/dca.h
index 2cfd18a..35971a8 100644
--- a/ffmpeg/libavcodec/arm/dca.h
+++ b/ffmpeg/libavcodec/arm/dca.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -30,50 +30,48 @@
 
 #define decode_blockcodes decode_blockcodes
 static inline int decode_blockcodes(int code1, int code2, int levels,
-                                    int *values)
+                                    int32_t *values)
 {
-    int v0, v1, v2, v3, v4, v5;
+    int32_t v0, v1, v2, v3, v4, v5;
 
-    __asm__ ("smmul   %8,  %14, %18           \n"
-             "smmul   %11, %15, %18           \n"
-             "smlabb  %14, %8,  %17, %14      \n"
-             "smlabb  %15, %11, %17, %15      \n"
-             "smmul   %9,  %8,  %18           \n"
-             "smmul   %12, %11, %18           \n"
-             "sub     %14, %14, %16, lsr #1   \n"
-             "sub     %15, %15, %16, lsr #1   \n"
-             "smlabb  %8,  %9,  %17, %8       \n"
-             "smlabb  %11, %12, %17, %11      \n"
-             "smmul   %10, %9,  %18           \n"
-             "smmul   %13, %12, %18           \n"
-             "str     %14, %0                 \n"
-             "str     %15, %4                 \n"
-             "sub     %8,  %8,  %16, lsr #1   \n"
-             "sub     %11, %11, %16, lsr #1   \n"
-             "smlabb  %9,  %10, %17, %9       \n"
-             "smlabb  %12, %13, %17, %12      \n"
-             "smmul   %14, %10, %18           \n"
-             "smmul   %15, %13, %18           \n"
-             "str     %8,  %1                 \n"
-             "str     %11, %5                 \n"
-             "sub     %9,  %9,  %16, lsr #1   \n"
-             "sub     %12, %12, %16, lsr #1   \n"
-             "smlabb  %10, %14, %17, %10      \n"
-             "smlabb  %13, %15, %17, %13      \n"
-             "str     %9,  %2                 \n"
-             "str     %12, %6                 \n"
-             "sub     %10, %10, %16, lsr #1   \n"
-             "sub     %13, %13, %16, lsr #1   \n"
-             "str     %10, %3                 \n"
-             "str     %13, %7                 \n"
-             : "=m"(values[0]), "=m"(values[1]),
-               "=m"(values[2]), "=m"(values[3]),
-               "=m"(values[4]), "=m"(values[5]),
-               "=m"(values[6]), "=m"(values[7]),
-               "=&r"(v0), "=&r"(v1), "=&r"(v2),
+    __asm__ ("smmul   %0,  %6,  %10           \n"
+             "smmul   %3,  %7,  %10           \n"
+             "smlabb  %6,  %0,  %9,  %6       \n"
+             "smlabb  %7,  %3,  %9,  %7       \n"
+             "smmul   %1,  %0,  %10           \n"
+             "smmul   %4,  %3,  %10           \n"
+             "sub     %6,  %6,  %8,  lsr #1   \n"
+             "sub     %7,  %7,  %8,  lsr #1   \n"
+             "smlabb  %0,  %1,  %9,  %0       \n"
+             "smlabb  %3,  %4,  %9,  %3       \n"
+             "smmul   %2,  %1,  %10           \n"
+             "smmul   %5,  %4,  %10           \n"
+             "str     %6,  [%11, #0]          \n"
+             "str     %7,  [%11, #16]         \n"
+             "sub     %0,  %0,  %8,  lsr #1   \n"
+             "sub     %3,  %3,  %8,  lsr #1   \n"
+             "smlabb  %1,  %2,  %9,  %1       \n"
+             "smlabb  %4,  %5,  %9,  %4       \n"
+             "smmul   %6,  %2,  %10           \n"
+             "smmul   %7,  %5,  %10           \n"
+             "str     %0,  [%11, #4]          \n"
+             "str     %3,  [%11, #20]         \n"
+             "sub     %1,  %1,  %8,  lsr #1   \n"
+             "sub     %4,  %4,  %8,  lsr #1   \n"
+             "smlabb  %2,  %6,  %9,  %2       \n"
+             "smlabb  %5,  %7,  %9,  %5       \n"
+             "str     %1,  [%11, #8]          \n"
+             "str     %4,  [%11, #24]         \n"
+             "sub     %2,  %2,  %8,  lsr #1   \n"
+             "sub     %5,  %5,  %8,  lsr #1   \n"
+             "str     %2,  [%11, #12]         \n"
+             "str     %5,  [%11, #28]         \n"
+             : "=&r"(v0), "=&r"(v1), "=&r"(v2),
                "=&r"(v3), "=&r"(v4), "=&r"(v5),
                "+&r"(code1), "+&r"(code2)
-             : "r"(levels - 1), "r"(-levels), "r"(ff_inverse[levels]));
+             : "r"(levels - 1), "r"(-levels),
+               "r"(ff_inverse[levels]), "r"(values)
+             : "memory");
 
     return code1 | code2;
 }
diff --git a/ffmpeg/libavcodec/arm/dcadsp_init_arm.c b/ffmpeg/libavcodec/arm/dcadsp_init_arm.c
index 56568e0..8893f48 100644
--- a/ffmpeg/libavcodec/arm/dcadsp_init_arm.c
+++ b/ffmpeg/libavcodec/arm/dcadsp_init_arm.c
@@ -24,13 +24,47 @@
 #include "libavutil/attributes.h"
 #include "libavcodec/dcadsp.h"
 
+void ff_dca_lfe_fir_vfp(float *out, const float *in, const float *coefs,
+                        int decifactor, float scale);
+void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act,
+                                SynthFilterContext *synth, FFTContext *imdct,
+                                float synth_buf_ptr[512],
+                                int *synth_buf_offset, float synth_buf2[32],
+                                const float window[512], float *samples_out,
+                                float raXin[32], float scale);
 void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs,
                          int decifactor, float scale);
 
+void ff_synth_filter_float_vfp(FFTContext *imdct,
+                               float *synth_buf_ptr, int *synth_buf_offset,
+                               float synth_buf2[32], const float window[512],
+                               float out[32], const float in[32],
+                               float scale);
+
+void ff_synth_filter_float_neon(FFTContext *imdct,
+                                float *synth_buf_ptr, int *synth_buf_offset,
+                                float synth_buf2[32], const float window[512],
+                                float out[32], const float in[32],
+                                float scale);
+
 av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
 {
     int cpu_flags = av_get_cpu_flags();
 
+    if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) {
+        s->lfe_fir = ff_dca_lfe_fir_vfp;
+        s->qmf_32_subbands = ff_dca_qmf_32_subbands_vfp;
+    }
     if (have_neon(cpu_flags))
         s->lfe_fir = ff_dca_lfe_fir_neon;
 }
+
+av_cold void ff_synth_filter_init_arm(SynthFilterContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags))
+        s->synth_filter_float = ff_synth_filter_float_vfp;
+    if (have_neon(cpu_flags))
+        s->synth_filter_float = ff_synth_filter_float_neon;
+}
diff --git a/ffmpeg/libavcodec/arm/dsputil_init_neon.c b/ffmpeg/libavcodec/arm/dsputil_init_neon.c
index 6d19af7..c1f250a 100644
--- a/ffmpeg/libavcodec/arm/dsputil_init_neon.c
+++ b/ffmpeg/libavcodec/arm/dsputil_init_neon.c
@@ -45,9 +45,6 @@ int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int le
 int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2,
                                              const int16_t *v3, int len, int mul);
 
-void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src,
-                                const int16_t *window, unsigned n);
-
 av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
 {
     const int high_bit_depth = avctx->bits_per_raw_sample > 8;
@@ -76,6 +73,4 @@ av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
 
     c->scalarproduct_int16 = ff_scalarproduct_int16_neon;
     c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_neon;
-
-    c->apply_window_int16 = ff_apply_window_int16_neon;
 }
diff --git a/ffmpeg/libavcodec/arm/dsputil_neon.S b/ffmpeg/libavcodec/arm/dsputil_neon.S
index 307e122..6c8231e 100644
--- a/ffmpeg/libavcodec/arm/dsputil_neon.S
+++ b/ffmpeg/libavcodec/arm/dsputil_neon.S
@@ -169,29 +169,6 @@ NOVFP   ldr             r2,  [sp]
         bx              lr
 endfunc
 
-function ff_apply_window_int16_neon, export=1
-        push            {r4,lr}
-        add             r4,  r1,  r3,  lsl #1
-        add             lr,  r0,  r3,  lsl #1
-        sub             r4,  r4,  #16
-        sub             lr,  lr,  #16
-        mov             r12, #-16
-1:
-        vld1.16         {q0},     [r1,:128]!
-        vld1.16         {q2},     [r2,:128]!
-        vld1.16         {q1},     [r4,:128], r12
-        vrev64.16       q3,  q2
-        vqrdmulh.s16    q0,  q0,  q2
-        vqrdmulh.s16    d2,  d2,  d7
-        vqrdmulh.s16    d3,  d3,  d6
-        vst1.16         {q0},     [r0,:128]!
-        vst1.16         {q1},     [lr,:128], r12
-        subs            r3,  r3,  #16
-        bgt             1b
-
-        pop             {r4,pc}
-endfunc
-
 function ff_vector_clip_int32_neon, export=1
         vdup.32         q0,  r2
         vdup.32         q1,  r3
diff --git a/ffmpeg/libavcodec/arm/fft_fixed_neon.S b/ffmpeg/libavcodec/arm/fft_fixed_neon.S
index fa33eac..d4a38a2 100644
--- a/ffmpeg/libavcodec/arm/fft_fixed_neon.S
+++ b/ffmpeg/libavcodec/arm/fft_fixed_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/ffmpeg/libavcodec/arm/fft_init_arm.c b/ffmpeg/libavcodec/arm/fft_init_arm.c
index 8c98abc..7e49b9c 100644
--- a/ffmpeg/libavcodec/arm/fft_init_arm.c
+++ b/ffmpeg/libavcodec/arm/fft_init_arm.c
@@ -26,22 +26,25 @@
 void ff_fft_permute_neon(FFTContext *s, FFTComplex *z);
 void ff_fft_calc_neon(FFTContext *s, FFTComplex *z);
 
+void ff_imdct_half_vfp(FFTContext *s, FFTSample *output, const FFTSample *input);
+
 void ff_imdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
 void ff_imdct_half_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
 void ff_mdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
 
 void ff_rdft_calc_neon(struct RDFTContext *s, FFTSample *z);
 
-void ff_synth_filter_float_neon(FFTContext *imdct,
-                                float *synth_buf_ptr, int *synth_buf_offset,
-                                float synth_buf2[32], const float window[512],
-                                float out[32], const float in[32],
-                                float scale);
-
 av_cold void ff_fft_init_arm(FFTContext *s)
 {
     int cpu_flags = av_get_cpu_flags();
 
+    if (have_vfp(cpu_flags)) {
+#if CONFIG_MDCT
+        if (!have_vfpv3(cpu_flags))
+            s->imdct_half   = ff_imdct_half_vfp;
+#endif
+    }
+
     if (have_neon(cpu_flags)) {
 #if CONFIG_FFT
         s->fft_permute  = ff_fft_permute_neon;
@@ -65,13 +68,3 @@ av_cold void ff_rdft_init_arm(RDFTContext *s)
         s->rdft_calc    = ff_rdft_calc_neon;
 }
 #endif
-
-#if CONFIG_DCA_DECODER
-av_cold void ff_synth_filter_init_arm(SynthFilterContext *s)
-{
-    int cpu_flags = av_get_cpu_flags();
-
-    if (have_neon(cpu_flags))
-        s->synth_filter_float = ff_synth_filter_float_neon;
-}
-#endif
diff --git a/ffmpeg/libavcodec/arm/fmtconvert_init_arm.c b/ffmpeg/libavcodec/arm/fmtconvert_init_arm.c
index 1d99c97..37319ed 100644
--- a/ffmpeg/libavcodec/arm/fmtconvert_init_arm.c
+++ b/ffmpeg/libavcodec/arm/fmtconvert_init_arm.c
@@ -25,9 +25,15 @@
 #include "libavcodec/avcodec.h"
 #include "libavcodec/fmtconvert.h"
 
-void ff_int32_to_float_fmul_scalar_neon(float *dst, const int *src,
+void ff_int32_to_float_fmul_scalar_neon(float *dst, const int32_t *src,
                                         float mul, int len);
 
+void ff_int32_to_float_fmul_scalar_vfp(float *dst, const int32_t *src,
+                                       float mul, int len);
+void ff_int32_to_float_fmul_array8_vfp(FmtConvertContext *c, float *dst,
+                                       const int32_t *src, const float *mul,
+                                       int len);
+
 void ff_float_to_int16_neon(int16_t *dst, const float *src, long len);
 void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int);
 
@@ -37,8 +43,15 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx
 {
     int cpu_flags = av_get_cpu_flags();
 
-    if (have_vfp(cpu_flags) && have_armv6(cpu_flags)) {
-        c->float_to_int16 = ff_float_to_int16_vfp;
+    if (have_vfp(cpu_flags)) {
+        if (!have_vfpv3(cpu_flags)) {
+            c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_vfp;
+            c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_vfp;
+        }
+
+        if (have_armv6(cpu_flags)) {
+            c->float_to_int16 = ff_float_to_int16_vfp;
+        }
     }
 
     if (have_neon(cpu_flags)) {
diff --git a/ffmpeg/libavcodec/arm/fmtconvert_vfp.S b/ffmpeg/libavcodec/arm/fmtconvert_vfp.S
index 7b012bc..b14af45 100644
--- a/ffmpeg/libavcodec/arm/fmtconvert_vfp.S
+++ b/ffmpeg/libavcodec/arm/fmtconvert_vfp.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
+ * Copyright (c) 2013 RISC OS Open Ltd <bavison@riscosopen.org>
  *
  * This file is part of FFmpeg.
  *
@@ -22,57 +22,200 @@
 #include "libavutil/arm/asm.S"
 
 /**
- * ARM VFP optimized float to int16 conversion.
- * Assume that len is a positive number and is multiple of 8, destination
- * buffer is at least 4 bytes aligned (8 bytes alignment is better for
- * performance), little-endian byte sex.
+ * ARM VFP optimised int32 to float conversion.
+ * Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned
+ * (16 bytes alignment is best for BCM2835), little-endian.
  */
-@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len)
-function ff_float_to_int16_vfp, export=1
-        push            {r4-r8,lr}
-        vpush           {d8-d11}
-        vldmia          r1!, {s16-s23}
-        vcvt.s32.f32    s0,  s16
-        vcvt.s32.f32    s1,  s17
-        vcvt.s32.f32    s2,  s18
-        vcvt.s32.f32    s3,  s19
-        vcvt.s32.f32    s4,  s20
-        vcvt.s32.f32    s5,  s21
-        vcvt.s32.f32    s6,  s22
-        vcvt.s32.f32    s7,  s23
+@ void ff_int32_to_float_fmul_array8_vfp(FmtConvertContext *c, float *dst, const int32_t *src, const float *mul, int len)
+function ff_int32_to_float_fmul_array8_vfp, export=1
+        push    {lr}
+        ldr     a1, [sp, #4]
+        subs    lr, a1, #3*8
+        bcc     50f                        @ too short to pipeline
+        @ Now need to find (len / 8) % 3. The approximation
+        @ x / 24 = (x * 0xAB) >> 12
+        @ is good for x < 4096, which is true for both AC3 and DCA.
+        mov     a1, #0xAB
+        ldr     ip, =0x03070000            @ RunFast mode, short vectors of length 8, stride 1
+        mul     a1, lr, a1
+        vpush   {s16-s31}
+        mov     a1, a1, lsr #12
+        add     a1, a1, a1, lsl #1
+        rsb     a1, a1, lr, lsr #3
+        cmp     a1, #1
+        fmrx    a1, FPSCR
+        fmxr    FPSCR, ip
+        beq     11f
+        blo     10f
+        @ Array is (2 + multiple of 3) x 8 floats long
+        @ drop through...
+        vldmia          a3!, {s16-s23}
+        vldmia          a4!, {s2,s3}
+        vldmia          a3!, {s24-s31}
+        vcvt.f32.s32    s16, s16
+        vcvt.f32.s32    s17, s17
+        vcvt.f32.s32    s18, s18
+        vcvt.f32.s32    s19, s19
+        vcvt.f32.s32    s20, s20
+        vcvt.f32.s32    s21, s21
+        vcvt.f32.s32    s22, s22
+        vcvt.f32.s32    s23, s23
+        vmul.f32        s16, s16, s2
+        @ drop through...
+3:
+        vldmia          a3!, {s8-s15}
+        vldmia          a4!, {s1}
+        vcvt.f32.s32    s24, s24
+        vcvt.f32.s32    s25, s25
+        vcvt.f32.s32    s26, s26
+        vcvt.f32.s32    s27, s27
+        vcvt.f32.s32    s28, s28
+        vcvt.f32.s32    s29, s29
+        vcvt.f32.s32    s30, s30
+        vcvt.f32.s32    s31, s31
+        vmul.f32        s24, s24, s3
+        vstmia          a2!, {s16-s19}
+        vstmia          a2!, {s20-s23}
+2:
+        vldmia          a3!, {s16-s23}
+        vldmia          a4!, {s2}
+        vcvt.f32.s32    s8, s8
+        vcvt.f32.s32    s9, s9
+        vcvt.f32.s32    s10, s10
+        vcvt.f32.s32    s11, s11
+        vcvt.f32.s32    s12, s12
+        vcvt.f32.s32    s13, s13
+        vcvt.f32.s32    s14, s14
+        vcvt.f32.s32    s15, s15
+        vmul.f32        s8, s8, s1
+        vstmia          a2!, {s24-s27}
+        vstmia          a2!, {s28-s31}
 1:
-        subs            r2,  r2,  #8
-        vmov            r3,  r4,  s0, s1
-        vmov            r5,  r6,  s2, s3
-        vmov            r7,  r8,  s4, s5
-        vmov            ip,  lr,  s6, s7
-        it              gt
-        vldmiagt        r1!, {s16-s23}
-        ssat            r4,  #16, r4
-        ssat            r3,  #16, r3
-        ssat            r6,  #16, r6
-        ssat            r5,  #16, r5
-        pkhbt           r3,  r3,  r4, lsl #16
-        pkhbt           r4,  r5,  r6, lsl #16
-        itttt           gt
-        vcvtgt.s32.f32  s0,  s16
-        vcvtgt.s32.f32  s1,  s17
-        vcvtgt.s32.f32  s2,  s18
-        vcvtgt.s32.f32  s3,  s19
-        itttt           gt
-        vcvtgt.s32.f32  s4,  s20
-        vcvtgt.s32.f32  s5,  s21
-        vcvtgt.s32.f32  s6,  s22
-        vcvtgt.s32.f32  s7,  s23
-        ssat            r8,  #16, r8
-        ssat            r7,  #16, r7
-        ssat            lr,  #16, lr
-        ssat            ip,  #16, ip
-        pkhbt           r5,  r7,  r8, lsl #16
-        pkhbt           r6,  ip,  lr, lsl #16
-        stmia           r0!, {r3-r6}
-        bgt             1b
+        vldmia          a3!, {s24-s31}
+        vldmia          a4!, {s3}
+        vcvt.f32.s32    s16, s16
+        vcvt.f32.s32    s17, s17
+        vcvt.f32.s32    s18, s18
+        vcvt.f32.s32    s19, s19
+        vcvt.f32.s32    s20, s20
+        vcvt.f32.s32    s21, s21
+        vcvt.f32.s32    s22, s22
+        vcvt.f32.s32    s23, s23
+        vmul.f32        s16, s16, s2
+        vstmia          a2!, {s8-s11}
+        vstmia          a2!, {s12-s15}
 
-        vpop            {d8-d11}
-        pop             {r4-r8,pc}
+        subs            lr, lr, #8*3
+        bpl             3b
+
+        vcvt.f32.s32    s24, s24
+        vcvt.f32.s32    s25, s25
+        vcvt.f32.s32    s26, s26
+        vcvt.f32.s32    s27, s27
+        vcvt.f32.s32    s28, s28
+        vcvt.f32.s32    s29, s29
+        vcvt.f32.s32    s30, s30
+        vcvt.f32.s32    s31, s31
+        vmul.f32        s24, s24, s3
+        vstmia          a2!, {s16-s19}
+        vstmia          a2!, {s20-s23}
+        vstmia          a2!, {s24-s27}
+        vstmia          a2!, {s28-s31}
+
+        fmxr    FPSCR, a1
+        vpop    {s16-s31}
+        pop     {pc}
+
+10:     @ Array is (multiple of 3) x 8 floats long
+        vldmia          a3!, {s8-s15}
+        vldmia          a4!, {s1,s2}
+        vldmia          a3!, {s16-s23}
+        vcvt.f32.s32    s8, s8
+        vcvt.f32.s32    s9, s9
+        vcvt.f32.s32    s10, s10
+        vcvt.f32.s32    s11, s11
+        vcvt.f32.s32    s12, s12
+        vcvt.f32.s32    s13, s13
+        vcvt.f32.s32    s14, s14
+        vcvt.f32.s32    s15, s15
+        vmul.f32        s8, s8, s1
+        b               1b
+
+11:     @ Array is (1 + multiple of 3) x 8 floats long
+        vldmia          a3!, {s24-s31}
+        vldmia          a4!, {s3}
+        vldmia          a3!, {s8-s15}
+        vldmia          a4!, {s1}
+        vcvt.f32.s32    s24, s24
+        vcvt.f32.s32    s25, s25
+        vcvt.f32.s32    s26, s26
+        vcvt.f32.s32    s27, s27
+        vcvt.f32.s32    s28, s28
+        vcvt.f32.s32    s29, s29
+        vcvt.f32.s32    s30, s30
+        vcvt.f32.s32    s31, s31
+        vmul.f32        s24, s24, s3
+        b               2b
+
+50:
+        ldr     lr, =0x03070000         @ RunFast mode, short vectors of length 8, stride 1
+        fmrx    ip, FPSCR
+        fmxr    FPSCR, lr
+51:
+        vldmia          a3!, {s8-s15}
+        vldmia          a4!, {s0}
+        vcvt.f32.s32    s8, s8
+        vcvt.f32.s32    s9, s9
+        vcvt.f32.s32    s10, s10
+        vcvt.f32.s32    s11, s11
+        vcvt.f32.s32    s12, s12
+        vcvt.f32.s32    s13, s13
+        vcvt.f32.s32    s14, s14
+        vcvt.f32.s32    s15, s15
+        vmul.f32        s8, s8, s0
+        subs            a1, a1, #8
+        vstmia          a2!, {s8-s11}
+        vstmia          a2!, {s12-s15}
+        bne             51b
+
+        fmxr    FPSCR, ip
+        pop     {pc}
+endfunc
+
+/**
+ * ARM VFP optimised int32 to float conversion.
+ * Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned
+ * (16 bytes alignment is best for BCM2835), little-endian.
+ * TODO: could be further optimised by unrolling and interleaving, as above
+ */
+@ void ff_int32_to_float_fmul_scalar_vfp(float *dst, const int32_t *src, float mul, int len)
+function ff_int32_to_float_fmul_scalar_vfp, export=1
+VFP     tmp     .req    a4
+VFP     len     .req    a3
+NOVFP   tmp     .req    a3
+NOVFP   len     .req    a4
+NOVFP   vmov    s0, a3
+        ldr     tmp, =0x03070000           @ RunFast mode, short vectors of length 8, stride 1
+        fmrx    ip, FPSCR
+        fmxr    FPSCR, tmp
+1:
+        vldmia          a2!, {s8-s15}
+        vcvt.f32.s32    s8, s8
+        vcvt.f32.s32    s9, s9
+        vcvt.f32.s32    s10, s10
+        vcvt.f32.s32    s11, s11
+        vcvt.f32.s32    s12, s12
+        vcvt.f32.s32    s13, s13
+        vcvt.f32.s32    s14, s14
+        vcvt.f32.s32    s15, s15
+        vmul.f32        s8, s8, s0
+        subs            len, len, #8
+        vstmia          a1!, {s8-s11}
+        vstmia          a1!, {s12-s15}
+        bne             1b
+
+        fmxr    FPSCR, ip
+        bx      lr
 endfunc
+        .unreq  tmp
+        .unreq  len
diff --git a/ffmpeg/libavcodec/arm/h264cmc_neon.S b/ffmpeg/libavcodec/arm/h264cmc_neon.S
index 3427e36..0bcae11 100644
--- a/ffmpeg/libavcodec/arm/h264cmc_neon.S
+++ b/ffmpeg/libavcodec/arm/h264cmc_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -39,6 +39,9 @@ function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
         add             r6,  r6,  r7,  lsl #1
         vld1.16         {d22[],d23[]}, [r6,:16]
   .endif
+  .ifc \codec,vc1
+        vmov.u16        q11, #28
+  .endif
 
 A       muls            r7,  r4,  r5
 T       mul             r7,  r4,  r5
@@ -183,6 +186,9 @@ function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
         add             r6,  r6,  r7,  lsl #1
         vld1.16         {d22[],d23[]}, [r6,:16]
   .endif
+  .ifc \codec,vc1
+        vmov.u16        q11, #28
+  .endif
 
 A       muls            r7,  r4,  r5
 T       mul             r7,  r4,  r5
@@ -376,14 +382,12 @@ function ff_\type\()_h264_chroma_mc2_neon, export=1
 endfunc
 .endm
 
-#if CONFIG_H264_DECODER
         h264_chroma_mc8 put
         h264_chroma_mc8 avg
         h264_chroma_mc4 put
         h264_chroma_mc4 avg
         h264_chroma_mc2 put
         h264_chroma_mc2 avg
-#endif
 
 #if CONFIG_RV40_DECODER
 const   rv40bias
@@ -398,3 +402,10 @@ endconst
         h264_chroma_mc4 put, rv40
         h264_chroma_mc4 avg, rv40
 #endif
+
+#if CONFIG_VC1_DECODER
+        h264_chroma_mc8 put, vc1
+        h264_chroma_mc8 avg, vc1
+        h264_chroma_mc4 put, vc1
+        h264_chroma_mc4 avg, vc1
+#endif
diff --git a/ffmpeg/libavcodec/arm/h264dsp_init_arm.c b/ffmpeg/libavcodec/arm/h264dsp_init_arm.c
index 785b604..2cafbaf 100644
--- a/ffmpeg/libavcodec/arm/h264dsp_init_arm.c
+++ b/ffmpeg/libavcodec/arm/h264dsp_init_arm.c
@@ -24,6 +24,8 @@
 #include "libavutil/arm/cpu.h"
 #include "libavcodec/h264dsp.h"
 
+int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size);
+
 void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
                                      int beta, int8_t *tc0);
 void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
@@ -68,8 +70,8 @@ void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset,
                              int16_t *block, int stride,
                              const uint8_t nnzc[6*8]);
 
-static av_cold void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth,
-                                         const int chroma_format_idc)
+static av_cold void h264dsp_init_neon(H264DSPContext *c, const int bit_depth,
+                                      const int chroma_format_idc)
 {
 #if HAVE_NEON
     if (bit_depth == 8) {
@@ -106,6 +108,8 @@ av_cold void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth,
 {
     int cpu_flags = av_get_cpu_flags();
 
+    if (have_armv6(cpu_flags))
+        c->h264_find_start_code_candidate = ff_h264_find_start_code_candidate_armv6;
     if (have_neon(cpu_flags))
-        ff_h264dsp_init_neon(c, bit_depth, chroma_format_idc);
+        h264dsp_init_neon(c, bit_depth, chroma_format_idc);
 }
diff --git a/ffmpeg/libavcodec/arm/h264idct_neon.S b/ffmpeg/libavcodec/arm/h264idct_neon.S
index fa5b90c..2edeca2 100644
--- a/ffmpeg/libavcodec/arm/h264idct_neon.S
+++ b/ffmpeg/libavcodec/arm/h264idct_neon.S
@@ -187,8 +187,8 @@ endfunc
         vshr.s16        q2,  q10, #1
         vadd.i16        q0,  q8,  q12
         vld1.16         {q14-q15},[r1,:128]
-        vst1.16         {q7},     [r1,:128]!
-        vst1.16         {q7},     [r1,:128]!
+        vst1.16         {q3},     [r1,:128]!
+        vst1.16         {q3},     [r1,:128]!
         vsub.i16        q1,  q8,  q12
         vshr.s16        q3,  q14, #1
         vsub.i16        q2,  q2,  q14
@@ -267,16 +267,16 @@ endfunc
 .endm
 
 function ff_h264_idct8_add_neon, export=1
-        vmov.i16        q7,       #0
+        vmov.i16        q3,       #0
         vld1.16         {q8-q9},  [r1,:128]
-        vst1.16         {q7},     [r1,:128]!
-        vst1.16         {q7},     [r1,:128]!
+        vst1.16         {q3},     [r1,:128]!
+        vst1.16         {q3},     [r1,:128]!
         vld1.16         {q10-q11},[r1,:128]
-        vst1.16         {q7},     [r1,:128]!
-        vst1.16         {q7},     [r1,:128]!
+        vst1.16         {q3},     [r1,:128]!
+        vst1.16         {q3},     [r1,:128]!
         vld1.16         {q12-q13},[r1,:128]
-        vst1.16         {q7},     [r1,:128]!
-        vst1.16         {q7},     [r1,:128]!
+        vst1.16         {q3},     [r1,:128]!
+        vst1.16         {q3},     [r1,:128]!
 
         idct8x8_cols    0
         idct8x8_cols    1
diff --git a/ffmpeg/libavcodec/arm/h264pred_init_arm.c b/ffmpeg/libavcodec/arm/h264pred_init_arm.c
index 5ec39ce..1562f0b 100644
--- a/ffmpeg/libavcodec/arm/h264pred_init_arm.c
+++ b/ffmpeg/libavcodec/arm/h264pred_init_arm.c
@@ -45,9 +45,9 @@ void ff_pred8x8_0lt_dc_neon(uint8_t *src, ptrdiff_t stride);
 void ff_pred8x8_l00_dc_neon(uint8_t *src, ptrdiff_t stride);
 void ff_pred8x8_0l0_dc_neon(uint8_t *src, ptrdiff_t stride);
 
-static av_cold void ff_h264_pred_init_neon(H264PredContext *h, int codec_id,
-                                           const int bit_depth,
-                                           const int chroma_format_idc)
+static av_cold void h264_pred_init_neon(H264PredContext *h, int codec_id,
+                                        const int bit_depth,
+                                        const int chroma_format_idc)
 {
 #if HAVE_NEON
     const int high_depth = bit_depth > 8;
@@ -88,5 +88,5 @@ av_cold void ff_h264_pred_init_arm(H264PredContext *h, int codec_id,
     int cpu_flags = av_get_cpu_flags();
 
     if (have_neon(cpu_flags))
-        ff_h264_pred_init_neon(h, codec_id, bit_depth, chroma_format_idc);
+        h264_pred_init_neon(h, codec_id, bit_depth, chroma_format_idc);
 }
diff --git a/ffmpeg/libavcodec/arm/hpeldsp_arm.h b/ffmpeg/libavcodec/arm/hpeldsp_arm.h
index e79bc6f..3f18c62 100644
--- a/ffmpeg/libavcodec/arm/hpeldsp_arm.h
+++ b/ffmpeg/libavcodec/arm/hpeldsp_arm.h
@@ -23,7 +23,7 @@
 
 #include "libavcodec/hpeldsp.h"
 
-void ff_hpeldsp_init_armv6(HpelDSPContext* c, int flags);
+void ff_hpeldsp_init_armv6(HpelDSPContext *c, int flags);
 void ff_hpeldsp_init_neon(HpelDSPContext *c, int flags);
 
 #endif /* AVCODEC_ARM_HPELDSP_H */
diff --git a/ffmpeg/libavcodec/arm/hpeldsp_init_arm.c b/ffmpeg/libavcodec/arm/hpeldsp_init_arm.c
index bae93eb..2cc2b78 100644
--- a/ffmpeg/libavcodec/arm/hpeldsp_init_arm.c
+++ b/ffmpeg/libavcodec/arm/hpeldsp_init_arm.c
@@ -20,7 +20,9 @@
  */
 
 #include "libavutil/arm/cpu.h"
+#include "libavutil/attributes.h"
 #include "libavcodec/bit_depth_template.c" // for CALL_2X_PIXELS
+#include "libavcodec/rnd_avg.h"
 #include "hpeldsp_arm.h"
 
 void ff_put_pixels8_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
@@ -41,7 +43,7 @@ CALL_2X_PIXELS(ff_put_no_rnd_pixels16_x2_arm,  ff_put_no_rnd_pixels8_x2_arm, 8)
 CALL_2X_PIXELS(ff_put_no_rnd_pixels16_y2_arm,  ff_put_no_rnd_pixels8_y2_arm, 8)
 CALL_2X_PIXELS(ff_put_no_rnd_pixels16_xy2_arm, ff_put_no_rnd_pixels8_xy2_arm,8)
 
-void ff_hpeldsp_init_arm(HpelDSPContext* c, int flags)
+av_cold void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags)
 {
     int cpu_flags = av_get_cpu_flags();
 
@@ -63,6 +65,8 @@ void ff_hpeldsp_init_arm(HpelDSPContext* c, int flags)
     c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_arm;
     c->put_no_rnd_pixels_tab[1][3] = ff_put_no_rnd_pixels8_xy2_arm;
 
-    if (have_armv6(cpu_flags))   ff_hpeldsp_init_armv6(c, flags);
-    if (have_neon(cpu_flags))    ff_hpeldsp_init_neon(c, flags);
+    if (have_armv6(cpu_flags))
+        ff_hpeldsp_init_armv6(c, flags);
+    if (have_neon(cpu_flags))
+        ff_hpeldsp_init_neon(c, flags);
 }
diff --git a/ffmpeg/libavcodec/arm/hpeldsp_init_armv6.c b/ffmpeg/libavcodec/arm/hpeldsp_init_armv6.c
index da4caf8..967a8e0 100644
--- a/ffmpeg/libavcodec/arm/hpeldsp_init_armv6.c
+++ b/ffmpeg/libavcodec/arm/hpeldsp_init_armv6.c
@@ -18,6 +18,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <stddef.h>
 #include <stdint.h>
 
 #include "libavutil/attributes.h"
diff --git a/ffmpeg/libavcodec/arm/hpeldsp_init_neon.c b/ffmpeg/libavcodec/arm/hpeldsp_init_neon.c
index d577735..d9feadd 100644
--- a/ffmpeg/libavcodec/arm/hpeldsp_init_neon.c
+++ b/ffmpeg/libavcodec/arm/hpeldsp_init_neon.c
@@ -19,8 +19,10 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <stddef.h>
 #include <stdint.h>
 
+#include "libavutil/attributes.h"
 #include "hpeldsp_arm.h"
 
 void ff_put_pixels16_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
@@ -50,7 +52,7 @@ void ff_avg_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
 void ff_avg_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
 void ff_avg_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
 
-void ff_hpeldsp_init_neon(HpelDSPContext *c, int flags)
+av_cold void ff_hpeldsp_init_neon(HpelDSPContext *c, int flags)
 {
     c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
     c->put_pixels_tab[0][1] = ff_put_pixels16_x2_neon;
diff --git a/ffmpeg/libavcodec/arm/int_neon.S b/ffmpeg/libavcodec/arm/int_neon.S
index 6b28a97..b3f5a69 100644
--- a/ffmpeg/libavcodec/arm/int_neon.S
+++ b/ffmpeg/libavcodec/arm/int_neon.S
@@ -1,6 +1,6 @@
 /*
  * ARM NEON optimised integer operations
- * Copyright (c) 2009 Kostya Shishkov
+ * Copyright (c) 2009 Konstantin Shishkov
  *
  * This file is part of FFmpeg.
  *
@@ -41,10 +41,10 @@ function ff_scalarproduct_int16_neon, export=1
 
         vpadd.s32       d16, d0,   d1
         vpadd.s32       d17, d2,   d3
-        vpadd.s32       d10, d4,   d5
-        vpadd.s32       d11, d6,   d7
+        vpadd.s32       d18, d4,   d5
+        vpadd.s32       d19, d6,   d7
         vpadd.s32       d0,  d16,  d17
-        vpadd.s32       d1,  d10,  d11
+        vpadd.s32       d1,  d18,  d19
         vpadd.s32       d2,  d0,   d1
         vpaddl.s32      d3,  d2
         vmov.32         r0,  d3[0]
@@ -81,10 +81,10 @@ function ff_scalarproduct_and_madd_int16_neon, export=1
 
         vpadd.s32       d16, d0,   d1
         vpadd.s32       d17, d2,   d3
-        vpadd.s32       d10, d4,   d5
-        vpadd.s32       d11, d6,   d7
+        vpadd.s32       d18, d4,   d5
+        vpadd.s32       d19, d6,   d7
         vpadd.s32       d0,  d16,  d17
-        vpadd.s32       d1,  d10,  d11
+        vpadd.s32       d1,  d18,  d19
         vpadd.s32       d2,  d0,   d1
         vpaddl.s32      d3,  d2
         vmov.32         r0,  d3[0]
diff --git a/ffmpeg/libavcodec/arm/mdct_fixed_neon.S b/ffmpeg/libavcodec/arm/mdct_fixed_neon.S
index c77be59..365c5e7 100644
--- a/ffmpeg/libavcodec/arm/mdct_fixed_neon.S
+++ b/ffmpeg/libavcodec/arm/mdct_fixed_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/ffmpeg/libavcodec/arm/mpegaudiodsp_fixed_armv6.S b/ffmpeg/libavcodec/arm/mpegaudiodsp_fixed_armv6.S
index 49bd0bc..977abb6 100644
--- a/ffmpeg/libavcodec/arm/mpegaudiodsp_fixed_armv6.S
+++ b/ffmpeg/libavcodec/arm/mpegaudiodsp_fixed_armv6.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/ffmpeg/libavcodec/arm/mpegaudiodsp_init_arm.c b/ffmpeg/libavcodec/arm/mpegaudiodsp_init_arm.c
index e73aee6..98e0c8a 100644
--- a/ffmpeg/libavcodec/arm/mpegaudiodsp_init_arm.c
+++ b/ffmpeg/libavcodec/arm/mpegaudiodsp_init_arm.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Mans Rullgard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/ffmpeg/libavcodec/arm/neon.S b/ffmpeg/libavcodec/arm/neon.S
index 716a607..787bc4b 100644
--- a/ffmpeg/libavcodec/arm/neon.S
+++ b/ffmpeg/libavcodec/arm/neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/ffmpeg/libavcodec/arm/rv34dsp_neon.S b/ffmpeg/libavcodec/arm/rv34dsp_neon.S
index a29123f..3d4a83d 100644
--- a/ffmpeg/libavcodec/arm/rv34dsp_neon.S
+++ b/ffmpeg/libavcodec/arm/rv34dsp_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Janne Grunau <janne-libav@jannau.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/ffmpeg/libavcodec/arm/rv40dsp_init_arm.c b/ffmpeg/libavcodec/arm/rv40dsp_init_arm.c
index fec3702..3bf9ac7 100644
--- a/ffmpeg/libavcodec/arm/rv40dsp_init_arm.c
+++ b/ffmpeg/libavcodec/arm/rv40dsp_init_arm.c
@@ -70,7 +70,7 @@ void ff_rv40_v_weak_loop_filter_neon(uint8_t *src, ptrdiff_t stride, int filter_
                                      int filter_q1, int alpha, int beta,
                                      int lim_p0q0, int lim_q1, int lim_p1);
 
-static av_cold void ff_rv40dsp_init_neon(RV34DSPContext *c)
+static av_cold void rv40dsp_init_neon(RV34DSPContext *c)
 {
     c->put_pixels_tab[0][ 1] = ff_put_rv40_qpel16_mc10_neon;
     c->put_pixels_tab[0][ 3] = ff_put_rv40_qpel16_mc30_neon;
@@ -144,5 +144,5 @@ av_cold void ff_rv40dsp_init_arm(RV34DSPContext *c)
     int cpu_flags = av_get_cpu_flags();
 
     if (have_neon(cpu_flags))
-        ff_rv40dsp_init_neon(c);
+        rv40dsp_init_neon(c);
 }
diff --git a/ffmpeg/libavcodec/arm/rv40dsp_neon.S b/ffmpeg/libavcodec/arm/rv40dsp_neon.S
index 6bd45eb..099f88c 100644
--- a/ffmpeg/libavcodec/arm/rv40dsp_neon.S
+++ b/ffmpeg/libavcodec/arm/rv40dsp_neon.S
@@ -2,20 +2,20 @@
  * Copyright (c) 2011 Janne Grunau <janne-libav@jannau.net>
  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/ffmpeg/libavcodec/arm/sbrdsp_init_arm.c b/ffmpeg/libavcodec/arm/sbrdsp_init_arm.c
index 4da7967..4fb69f9 100644
--- a/ffmpeg/libavcodec/arm/sbrdsp_init_arm.c
+++ b/ffmpeg/libavcodec/arm/sbrdsp_init_arm.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2012 Mans Rullgard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/ffmpeg/libavcodec/arm/sbrdsp_neon.S b/ffmpeg/libavcodec/arm/sbrdsp_neon.S
index 610397f..e66abd6 100644
--- a/ffmpeg/libavcodec/arm/sbrdsp_neon.S
+++ b/ffmpeg/libavcodec/arm/sbrdsp_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2012 Mans Rullgard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/ffmpeg/libavcodec/arm/simple_idct_arm.S b/ffmpeg/libavcodec/arm/simple_idct_arm.S
index dd1c815..50d20c9 100644
--- a/ffmpeg/libavcodec/arm/simple_idct_arm.S
+++ b/ffmpeg/libavcodec/arm/simple_idct_arm.S
@@ -83,7 +83,7 @@ __row_loop:
         orrs r5, r5, r7          @ R5=R4 | R3 | R2 | R7
         beq __almost_empty_row
 
-__b_evaluation:
+@@ __b_evaluation:
         @@ at this point, R0=block (temp),  R1(free), R2=ROWr32[1], R3=ROWr32[2], R4=ROWr32[3],
         @@     R5=(temp), R6=ROWr16[0], R7=ROWr16[1], R8-R11 free,
         @@     R12=__const_ptr_, R14=&block[n]
@@ -159,7 +159,7 @@ __end_b_evaluation:
         @@     R5=b2, R6=ROWr16[0], R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
         @@     R12=__const_ptr_, R14=&block[n]
 
-__a_evaluation:
+@@ __a_evaluation:
         @@ a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
         @@ a1 = a0 + W6 * row[2];
         @@ a2 = a0 - W6 * row[2];
@@ -295,7 +295,7 @@ __end_row_loop:
         add r14, r0, #14        @ R14=&block[7], better start from the last col, and decrease the value until col=0, i.e. R14=block.
 __col_loop:
 
-__b_evaluation2:
+@@ __b_evaluation2:
         @@ at this point, R0=block (temp),  R1-R11 (free)
         @@     R12=__const_ptr_, R14=&block[n]
         @@ proceed with b0-b3 first, followed by a0-a3
@@ -357,12 +357,12 @@ __b_evaluation2:
         it    ne
         mlane r1, r10, r4, r1    @ R1-=W5*ROWr16[7x8]=b1
         @@ R4 is free now
-__end_b_evaluation2:
+@@ __end_b_evaluation2:
         @@ at this point, R0=b0,  R1=b1, R2 (free), R3 (free), R4 (free),
         @@     R5=b2, R6 (free), R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
         @@     R12=__const_ptr_, R14=&block[n]
 
-__a_evaluation2:
+@@ __a_evaluation2:
         @@ a0 = (W4 * col[8x0]) + (1 << (COL_SHIFT - 1));
         @@ a1 = a0 + W6 * row[2];
         @@ a2 = a0 - W6 * row[2];
@@ -414,7 +414,7 @@ __a_evaluation2:
         itt   ne
         subne r2, r2, r10        @ R2-=W2*ROWr16[6] (a1)
         addne r3, r3, r10        @ R3+=W2*ROWr16[6] (a2)
-__end_a_evaluation2:
+@@ __end_a_evaluation2:
         @@ at this point, R0=b0,  R1=b1, R2=a1, R3=a2, R4=a3,
         @@     R5=b2, R6=a0, R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
         @@     R12=__const_ptr_, R14=&block[n]
@@ -452,7 +452,7 @@ __end_a_evaluation2:
         strh r8, [r14, #96]
         strh r9, [r14, #112]
 
-__end_col_loop:
+@@ __end_col_loop:
         @@ at this point, R0-R11 (free)
         @@     R12=__const_ptr_, R14=&block[n]
         ldr r0, [sp, #0]         @ R0=block
@@ -463,7 +463,7 @@ __end_col_loop:
 
 
 
-__end_simple_idct_arm:
+@@ __end_simple_idct_arm:
         @@ restore registers to previous status!
         add sp, sp, #8 @@ the local variables!
         ldmfd sp!, {r4-r11, r15} @@ update PC with LR content.
diff --git a/ffmpeg/libavcodec/arm/vp56dsp_init_arm.c b/ffmpeg/libavcodec/arm/vp56dsp_init_arm.c
deleted file mode 100644
index f53cbae..0000000
--- a/ffmpeg/libavcodec/arm/vp56dsp_init_arm.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdint.h>
-
-#include "libavutil/attributes.h"
-#include "libavutil/arm/cpu.h"
-#include "libavcodec/avcodec.h"
-#include "libavcodec/vp56dsp.h"
-
-void ff_vp6_edge_filter_hor_neon(uint8_t *yuv, int stride, int t);
-void ff_vp6_edge_filter_ver_neon(uint8_t *yuv, int stride, int t);
-
-av_cold void ff_vp56dsp_init_arm(VP56DSPContext *s, enum AVCodecID codec)
-{
-    int cpu_flags = av_get_cpu_flags();
-
-    if (codec != AV_CODEC_ID_VP5 && have_neon(cpu_flags)) {
-        s->edge_filter_hor = ff_vp6_edge_filter_hor_neon;
-        s->edge_filter_ver = ff_vp6_edge_filter_ver_neon;
-    }
-}
diff --git a/ffmpeg/libavcodec/arm/vp56dsp_neon.S b/ffmpeg/libavcodec/arm/vp56dsp_neon.S
deleted file mode 100644
index 03dd28d..0000000
--- a/ffmpeg/libavcodec/arm/vp56dsp_neon.S
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/arm/asm.S"
-
-.macro  vp6_edge_filter
-        vdup.16         q3,  r2                 @ t
-        vmov.i16        q13, #1
-        vsubl.u8        q0,  d20, d18           @ p[   0] - p[-s]
-        vsubl.u8        q1,  d16, d22           @ p[-2*s] - p[ s]
-        vsubl.u8        q14, d21, d19
-        vsubl.u8        q15, d17, d23
-        vadd.i16        q2,  q0,  q0            @ 2*(p[0]-p[-s])
-        vadd.i16        d29, d28, d28
-        vadd.i16        q0,  q0,  q1            @    p[0]-p[-s]  + p[-2*s]-p[s]
-        vadd.i16        d28, d28, d30
-        vadd.i16        q0,  q0,  q2            @ 3*(p[0]-p[-s]) + p[-2*s]-p[s]
-        vadd.i16        d28, d28, d29
-        vrshr.s16       q0,  q0,  #3            @ v
-        vrshr.s16       d28, d28, #3
-        vsub.i16        q8,  q3,  q13           @ t-1
-        vabs.s16        q1,  q0                 @ V
-        vshr.s16        q2,  q0,  #15           @ s
-        vabs.s16        d30, d28
-        vshr.s16        d29, d28, #15
-        vsub.i16        q12, q1,  q3            @ V-t
-        vsub.i16        d31, d30, d6
-        vsub.i16        q12, q12, q13           @ V-t-1
-        vsub.i16        d31, d31, d26
-        vcge.u16        q12, q12, q8            @ V-t-1 >= t-1
-        vcge.u16        d31, d31, d16
-        vadd.i16        q13, q3,  q3            @ 2*t
-        vadd.i16        d16, d6,  d6
-        vsub.i16        q13, q13, q1            @ 2*t - V
-        vsub.i16        d16, d16, d30
-        vadd.i16        q13, q13, q2            @ += s
-        vadd.i16        d16, d16, d29
-        veor            q13, q13, q2            @ ^= s
-        veor            d16, d16, d29
-        vbif            q0,  q13, q12
-        vbif            d28, d16, d31
-        vmovl.u8        q1,  d20
-        vmovl.u8        q15, d21
-        vaddw.u8        q2,  q0,  d18
-        vaddw.u8        q3,  q14, d19
-        vsub.i16        q1,  q1,  q0
-        vsub.i16        d30, d30, d28
-        vqmovun.s16     d18, q2
-        vqmovun.s16     d19, q3
-        vqmovun.s16     d20, q1
-        vqmovun.s16     d21, q15
-.endm
-
-function ff_vp6_edge_filter_ver_neon, export=1
-        sub             r0,  r0,  r1,  lsl #1
-        vld1.8          {q8},     [r0], r1      @ p[-2*s]
-        vld1.8          {q9},     [r0], r1      @ p[-s]
-        vld1.8          {q10},    [r0], r1      @ p[0]
-        vld1.8          {q11},    [r0]          @ p[s]
-        vp6_edge_filter
-        sub             r0,  r0,  r1,  lsl #1
-        sub             r1,  r1,  #8
-        vst1.8          {d18},    [r0]!
-        vst1.32         {d19[0]}, [r0], r1
-        vst1.8          {d20},    [r0]!
-        vst1.32         {d21[0]}, [r0]
-        bx              lr
-endfunc
-
-function ff_vp6_edge_filter_hor_neon, export=1
-        sub             r3,  r0,  #1
-        sub             r0,  r0,  #2
-        vld1.32         {d16[0]}, [r0], r1
-        vld1.32         {d18[0]}, [r0], r1
-        vld1.32         {d20[0]}, [r0], r1
-        vld1.32         {d22[0]}, [r0], r1
-        vld1.32         {d16[1]}, [r0], r1
-        vld1.32         {d18[1]}, [r0], r1
-        vld1.32         {d20[1]}, [r0], r1
-        vld1.32         {d22[1]}, [r0], r1
-        vld1.32         {d17[0]}, [r0], r1
-        vld1.32         {d19[0]}, [r0], r1
-        vld1.32         {d21[0]}, [r0], r1
-        vld1.32         {d23[0]}, [r0], r1
-        vtrn.8          q8,  q9
-        vtrn.8          q10, q11
-        vtrn.16         q8,  q10
-        vtrn.16         q9,  q11
-        vp6_edge_filter
-        vtrn.8          q9,  q10
-        vst1.16         {d18[0]}, [r3], r1
-        vst1.16         {d20[0]}, [r3], r1
-        vst1.16         {d18[1]}, [r3], r1
-        vst1.16         {d20[1]}, [r3], r1
-        vst1.16         {d18[2]}, [r3], r1
-        vst1.16         {d20[2]}, [r3], r1
-        vst1.16         {d18[3]}, [r3], r1
-        vst1.16         {d20[3]}, [r3], r1
-        vst1.16         {d19[0]}, [r3], r1
-        vst1.16         {d21[0]}, [r3], r1
-        vst1.16         {d19[1]}, [r3], r1
-        vst1.16         {d21[1]}, [r3], r1
-        bx              lr
-endfunc
diff --git a/ffmpeg/libavcodec/arm/vp8dsp.h b/ffmpeg/libavcodec/arm/vp8dsp.h
index ce00e4a..6041ef1 100644
--- a/ffmpeg/libavcodec/arm/vp8dsp.h
+++ b/ffmpeg/libavcodec/arm/vp8dsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/ffmpeg/libavcodec/arm/vp8dsp_armv6.S b/ffmpeg/libavcodec/arm/vp8dsp_armv6.S
index 5207758..a14b188 100644
--- a/ffmpeg/libavcodec/arm/vp8dsp_armv6.S
+++ b/ffmpeg/libavcodec/arm/vp8dsp_armv6.S
@@ -5,20 +5,20 @@
  * Copyright (c) 2010 Rob Clark <rob@ti.com>
  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  * This code was partially ported from libvpx, which uses this license:
diff --git a/ffmpeg/libavcodec/arm/vp8dsp_init_armv6.c b/ffmpeg/libavcodec/arm/vp8dsp_init_armv6.c
index e15e191..563268e 100644
--- a/ffmpeg/libavcodec/arm/vp8dsp_init_armv6.c
+++ b/ffmpeg/libavcodec/arm/vp8dsp_init_armv6.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/ffmpeg/libavcodec/arm/vp8dsp_init_neon.c b/ffmpeg/libavcodec/arm/vp8dsp_init_neon.c
index 0468181..ae045a6 100644
--- a/ffmpeg/libavcodec/arm/vp8dsp_init_neon.c
+++ b/ffmpeg/libavcodec/arm/vp8dsp_init_neon.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/ffmpeg/libavcodec/arm/vp8dsp_neon.S b/ffmpeg/libavcodec/arm/vp8dsp_neon.S
index 04e7c5c..436b340 100644
--- a/ffmpeg/libavcodec/arm/vp8dsp_neon.S
+++ b/ffmpeg/libavcodec/arm/vp8dsp_neon.S
@@ -1576,18 +1576,19 @@ endconst
 /* Bilinear MC */
 
 function ff_put_vp8_bilin16_h_neon, export=1
-        ldr             r3,  [sp, #4]           @ mx
-        rsb             r12, r3,  #8
-        vdup.8          d0,  r3
+        push            {lr}
+        ldr             lr,  [sp, #8]           @ mx
+        rsb             r12, lr,  #8
+        vdup.8          d0,  lr
         vdup.8          d1,  r12
-        ldr             r12, [sp]               @ h
+        ldr             r12, [sp, #4]           @ h
 1:
         subs            r12, r12, #2
-        vld1.8          {d2-d4},  [r2], r1
+        vld1.8          {d2-d4},  [r2], r3
         vext.8          q2,  q1,  q2,  #1
         vmull.u8        q8,  d2,  d1
         vmlal.u8        q8,  d4,  d0
-        vld1.8          {d18-d20},[r2], r1
+        vld1.8          {d18-d20},[r2], r3
         vmull.u8        q3,  d3,  d1
         vmlal.u8        q3,  d5,  d0
         vext.8          q10, q9,  q10, #1
@@ -1603,24 +1604,25 @@ function ff_put_vp8_bilin16_h_neon, export=1
         vst1.8          {q3},     [r0,:128], r1
         bgt             1b
 
-        bx              lr
+        pop             {pc}
 endfunc
 
 function ff_put_vp8_bilin16_v_neon, export=1
-        ldr             r3,  [sp, #8]           @ my
-        rsb             r12, r3,  #8
-        vdup.8          d0,  r3
+        push            {lr}
+        ldr             lr,  [sp, #12]          @ my
+        rsb             r12, lr,  #8
+        vdup.8          d0,  lr
         vdup.8          d1,  r12
-        ldr             r12, [sp]               @ h
-        vld1.8          {q1},     [r2], r1
+        ldr             r12, [sp, #4]           @ h
+        vld1.8          {q1},     [r2], r3
 1:
         subs            r12, r12, #2
-        vld1.8          {q2},     [r2], r1
+        vld1.8          {q2},     [r2], r3
         vmull.u8        q3,  d2,  d1
         vmlal.u8        q3,  d4,  d0
         vmull.u8        q8,  d3,  d1
         vmlal.u8        q8,  d5,  d0
-        vld1.8          {q1},     [r2], r1
+        vld1.8          {q1},     [r2], r3
         vmull.u8        q9,  d4,  d1
         vmlal.u8        q9,  d2,  d0
         vmull.u8        q10, d5,  d1
@@ -1633,21 +1635,22 @@ function ff_put_vp8_bilin16_v_neon, export=1
         vst1.8          {q3},     [r0,:128], r1
         bgt             1b
 
-        bx              lr
+        pop             {pc}
 endfunc
 
 function ff_put_vp8_bilin16_hv_neon, export=1
-        ldr             r3,  [sp, #4]           @ mx
-        rsb             r12, r3,  #8
-        vdup.8          d0,  r3
+        push            {lr}
+        ldr             lr,  [sp, #8]           @ mx
+        rsb             r12, lr,  #8
+        vdup.8          d0,  lr
         vdup.8          d1,  r12
-        ldr             r3,  [sp, #8]           @ my
-        rsb             r12, r3,  #8
-        vdup.8          d2,  r3
+        ldr             lr,  [sp, #12]          @ my
+        rsb             r12, lr,  #8
+        vdup.8          d2,  lr
         vdup.8          d3,  r12
-        ldr             r12, [sp]               @ h
+        ldr             r12, [sp, #4]           @ h
 
-        vld1.8          {d4-d6},  [r2], r1
+        vld1.8          {d4-d6},  [r2], r3
         vext.8          q3,  q2,  q3,  #1
         vmull.u8        q8,  d4,  d1
         vmlal.u8        q8,  d6,  d0
@@ -1657,11 +1660,11 @@ function ff_put_vp8_bilin16_hv_neon, export=1
         vrshrn.u16      d5,  q9,  #3
 1:
         subs            r12, r12, #2
-        vld1.8          {d18-d20},[r2], r1
+        vld1.8          {d18-d20},[r2], r3
         vext.8          q10, q9,  q10, #1
         vmull.u8        q11, d18, d1
         vmlal.u8        q11, d20, d0
-        vld1.8          {d26-d28},[r2], r1
+        vld1.8          {d26-d28},[r2], r3
         vmull.u8        q12, d19, d1
         vmlal.u8        q12, d21, d0
         vext.8          q14, q13, q14, #1
@@ -1689,22 +1692,23 @@ function ff_put_vp8_bilin16_hv_neon, export=1
         vst1.8          {q10},    [r0,:128], r1
         bgt             1b
 
-        bx              lr
+        pop             {pc}
 endfunc
 
 function ff_put_vp8_bilin8_h_neon, export=1
-        ldr             r3,  [sp, #4]           @ mx
-        rsb             r12, r3,  #8
-        vdup.8          d0,  r3
+        push            {lr}
+        ldr             lr,  [sp, #8]           @ mx
+        rsb             r12, lr,  #8
+        vdup.8          d0,  lr
         vdup.8          d1,  r12
-        ldr             r12, [sp]               @ h
+        ldr             r12, [sp, #4]           @ h
 1:
         subs            r12, r12, #2
-        vld1.8          {q1},     [r2], r1
+        vld1.8          {q1},     [r2], r3
         vext.8          d3,  d2,  d3,  #1
         vmull.u8        q2,  d2,  d1
         vmlal.u8        q2,  d3,  d0
-        vld1.8          {q3},     [r2], r1
+        vld1.8          {q3},     [r2], r3
         vext.8          d7,  d6,  d7,  #1
         vmull.u8        q8,  d6,  d1
         vmlal.u8        q8,  d7,  d0
@@ -1714,22 +1718,23 @@ function ff_put_vp8_bilin8_h_neon, export=1
         vst1.8          {d16},    [r0,:64], r1
         bgt             1b
 
-        bx              lr
+        pop             {pc}
 endfunc
 
 function ff_put_vp8_bilin8_v_neon, export=1
-        ldr             r3,  [sp, #8]           @ my
-        rsb             r12, r3,  #8
-        vdup.8          d0,  r3
+        push            {lr}
+        ldr             lr,  [sp, #12]          @ my
+        rsb             r12, lr,  #8
+        vdup.8          d0,  lr
         vdup.8          d1,  r12
-        ldr             r12, [sp]               @ h
-        vld1.8          {d2},     [r2], r1
+        ldr             r12, [sp, #4]           @ h
+        vld1.8          {d2},     [r2], r3
 1:
         subs            r12, r12, #2
-        vld1.8          {d3},     [r2], r1
+        vld1.8          {d3},     [r2], r3
         vmull.u8        q2,  d2,  d1
         vmlal.u8        q2,  d3,  d0
-        vld1.8          {d2},     [r2], r1
+        vld1.8          {d2},     [r2], r3
         vmull.u8        q3,  d3,  d1
         vmlal.u8        q3,  d2,  d0
         vrshrn.u16      d4,  q2,  #3
@@ -1738,32 +1743,33 @@ function ff_put_vp8_bilin8_v_neon, export=1
         vst1.8          {d6},     [r0,:64], r1
         bgt             1b
 
-        bx              lr
+        pop             {pc}
 endfunc
 
 function ff_put_vp8_bilin8_hv_neon, export=1
-        ldr             r3,  [sp, #4]           @ mx
-        rsb             r12, r3,  #8
-        vdup.8          d0,  r3
+        push            {lr}
+        ldr             lr,  [sp, #8]           @ mx
+        rsb             r12, lr,  #8
+        vdup.8          d0,  lr
         vdup.8          d1,  r12
-        ldr             r3,  [sp, #8]           @ my
-        rsb             r12, r3,  #8
-        vdup.8          d2,  r3
+        ldr             lr,  [sp, #12]          @ my
+        rsb             r12, lr,  #8
+        vdup.8          d2,  lr
         vdup.8          d3,  r12
-        ldr             r12, [sp]               @ h
+        ldr             r12, [sp, #4]           @ h
 
-        vld1.8          {q2},     [r2], r1
+        vld1.8          {q2},     [r2], r3
         vext.8          d5,  d4,  d5,  #1
         vmull.u8        q9,  d4,  d1
         vmlal.u8        q9,  d5,  d0
         vrshrn.u16      d22, q9,  #3
 1:
         subs            r12, r12, #2
-        vld1.8          {q3},     [r2], r1
+        vld1.8          {q3},     [r2], r3
         vext.8          d7,  d6,  d7,  #1
         vmull.u8        q8,  d6,  d1
         vmlal.u8        q8,  d7,  d0
-        vld1.8          {q2},     [r2], r1
+        vld1.8          {q2},     [r2], r3
         vext.8          d5,  d4,  d5,  #1
         vmull.u8        q9,  d4,  d1
         vmlal.u8        q9,  d5,  d0
@@ -1779,20 +1785,21 @@ function ff_put_vp8_bilin8_hv_neon, export=1
         vst1.8          {d23},    [r0,:64], r1
         bgt             1b
 
-        bx              lr
+        pop             {pc}
 endfunc
 
 function ff_put_vp8_bilin4_h_neon, export=1
-        ldr             r3,  [sp, #4]           @ mx
-        rsb             r12, r3,  #8
-        vdup.8          d0,  r3
+        push            {lr}
+        ldr             lr,  [sp, #8]           @ mx
+        rsb             r12, lr,  #8
+        vdup.8          d0,  lr
         vdup.8          d1,  r12
-        ldr             r12, [sp]               @ h
+        ldr             r12, [sp, #4]           @ h
 1:
         subs            r12, r12, #2
-        vld1.8          {d2},     [r2], r1
+        vld1.8          {d2},     [r2], r3
         vext.8          d3,  d2,  d3,  #1
-        vld1.8          {d6},     [r2], r1
+        vld1.8          {d6},     [r2], r3
         vext.8          d7,  d6,  d7,  #1
         vtrn.32         q1,  q3
         vmull.u8        q2,  d2,  d1
@@ -1802,20 +1809,21 @@ function ff_put_vp8_bilin4_h_neon, export=1
         vst1.32         {d4[1]}, [r0,:32], r1
         bgt             1b
 
-        bx              lr
+        pop             {pc}
 endfunc
 
 function ff_put_vp8_bilin4_v_neon, export=1
-        ldr             r3,  [sp, #8]           @ my
-        rsb             r12, r3,  #8
-        vdup.8          d0,  r3
+        push            {lr}
+        ldr             lr,  [sp, #12]           @ my
+        rsb             r12, lr,  #8
+        vdup.8          d0,  lr
         vdup.8          d1,  r12
-        ldr             r12, [sp]               @ h
-        vld1.32         {d2[]},   [r2], r1
+        ldr             r12, [sp, #4]            @ h
+        vld1.32         {d2[]},   [r2], r3
 1:
         vld1.32         {d3[]},   [r2]
-        vld1.32         {d2[1]},  [r2], r1
-        vld1.32         {d3[1]},  [r2], r1
+        vld1.32         {d2[1]},  [r2], r3
+        vld1.32         {d3[1]},  [r2], r3
         vmull.u8        q2,  d2,  d1
         vmlal.u8        q2,  d3,  d0
         vtrn.32         d3,  d2
@@ -1825,30 +1833,31 @@ function ff_put_vp8_bilin4_v_neon, export=1
         subs            r12, r12, #2
         bgt             1b
 
-        bx              lr
+        pop             {pc}
 endfunc
 
 function ff_put_vp8_bilin4_hv_neon, export=1
-        ldr             r3,  [sp, #4]           @ mx
-        rsb             r12, r3,  #8
-        vdup.8          d0,  r3
+        push            {lr}
+        ldr             lr,  [sp, #8]           @ mx
+        rsb             r12, lr,  #8
+        vdup.8          d0,  lr
         vdup.8          d1,  r12
-        ldr             r3,  [sp, #8]           @ my
-        rsb             r12, r3,  #8
-        vdup.8          d2,  r3
+        ldr             lr,  [sp, #12]          @ my
+        rsb             r12, lr,  #8
+        vdup.8          d2,  lr
         vdup.8          d3,  r12
-        ldr             r12, [sp]               @ h
+        ldr             r12, [sp, #4]           @ h
 
-        vld1.8          {d4},     [r2], r1
+        vld1.8          {d4},     [r2], r3
         vext.8          d5,  d4,  d4,  #1
         vmull.u8        q9,  d4,  d1
         vmlal.u8        q9,  d5,  d0
         vrshrn.u16      d22, q9,  #3
 1:
         subs            r12, r12, #2
-        vld1.8          {d6},     [r2], r1
+        vld1.8          {d6},     [r2], r3
         vext.8          d7,  d6,  d6,  #1
-        vld1.8          {d4},     [r2], r1
+        vld1.8          {d4},     [r2], r3
         vext.8          d5,  d4,  d4,  #1
         vtrn.32         q3,  q2
         vmull.u8        q8,  d6,  d1
@@ -1863,5 +1872,5 @@ function ff_put_vp8_bilin4_hv_neon, export=1
         vst1.32         {d20[1]}, [r0,:32], r1
         bgt             1b
 
-        bx              lr
+        pop             {pc}
 endfunc