summaryrefslogtreecommitdiff
path: root/ffmpeg/libavcodec/arm
diff options
context:
space:
mode:
Diffstat (limited to 'ffmpeg/libavcodec/arm')
-rw-r--r--ffmpeg/libavcodec/arm/Makefile146
-rw-r--r--ffmpeg/libavcodec/arm/aacpsdsp_init_arm.c8
-rw-r--r--ffmpeg/libavcodec/arm/aacpsdsp_neon.S8
-rw-r--r--ffmpeg/libavcodec/arm/ac3dsp_arm.S8
-rw-r--r--ffmpeg/libavcodec/arm/ac3dsp_armv6.S8
-rw-r--r--ffmpeg/libavcodec/arm/ac3dsp_init_arm.c3
-rw-r--r--ffmpeg/libavcodec/arm/ac3dsp_neon.S31
-rw-r--r--ffmpeg/libavcodec/arm/dca.h86
-rw-r--r--ffmpeg/libavcodec/arm/dcadsp_init_arm.c34
-rw-r--r--ffmpeg/libavcodec/arm/dsputil_init_neon.c5
-rw-r--r--ffmpeg/libavcodec/arm/dsputil_neon.S23
-rw-r--r--ffmpeg/libavcodec/arm/fft_fixed_neon.S8
-rw-r--r--ffmpeg/libavcodec/arm/fft_init_arm.c25
-rw-r--r--ffmpeg/libavcodec/arm/fmtconvert_init_arm.c19
-rw-r--r--ffmpeg/libavcodec/arm/fmtconvert_vfp.S245
-rw-r--r--ffmpeg/libavcodec/arm/h264cmc_neon.S23
-rw-r--r--ffmpeg/libavcodec/arm/h264dsp_init_arm.c10
-rw-r--r--ffmpeg/libavcodec/arm/h264idct_neon.S18
-rw-r--r--ffmpeg/libavcodec/arm/h264pred_init_arm.c8
-rw-r--r--ffmpeg/libavcodec/arm/hpeldsp_arm.h2
-rw-r--r--ffmpeg/libavcodec/arm/hpeldsp_init_arm.c10
-rw-r--r--ffmpeg/libavcodec/arm/hpeldsp_init_armv6.c1
-rw-r--r--ffmpeg/libavcodec/arm/hpeldsp_init_neon.c4
-rw-r--r--ffmpeg/libavcodec/arm/int_neon.S14
-rw-r--r--ffmpeg/libavcodec/arm/mdct_fixed_neon.S8
-rw-r--r--ffmpeg/libavcodec/arm/mpegaudiodsp_fixed_armv6.S8
-rw-r--r--ffmpeg/libavcodec/arm/mpegaudiodsp_init_arm.c8
-rw-r--r--ffmpeg/libavcodec/arm/neon.S8
-rw-r--r--ffmpeg/libavcodec/arm/rv34dsp_neon.S8
-rw-r--r--ffmpeg/libavcodec/arm/rv40dsp_init_arm.c4
-rw-r--r--ffmpeg/libavcodec/arm/rv40dsp_neon.S8
-rw-r--r--ffmpeg/libavcodec/arm/sbrdsp_init_arm.c8
-rw-r--r--ffmpeg/libavcodec/arm/sbrdsp_neon.S8
-rw-r--r--ffmpeg/libavcodec/arm/simple_idct_arm.S16
-rw-r--r--ffmpeg/libavcodec/arm/vp56dsp_init_arm.c39
-rw-r--r--ffmpeg/libavcodec/arm/vp56dsp_neon.S121
-rw-r--r--ffmpeg/libavcodec/arm/vp8dsp.h8
-rw-r--r--ffmpeg/libavcodec/arm/vp8dsp_armv6.S8
-rw-r--r--ffmpeg/libavcodec/arm/vp8dsp_init_armv6.c8
-rw-r--r--ffmpeg/libavcodec/arm/vp8dsp_init_neon.c8
-rw-r--r--ffmpeg/libavcodec/arm/vp8dsp_neon.S165
41 files changed, 610 insertions, 578 deletions
diff --git a/ffmpeg/libavcodec/arm/Makefile b/ffmpeg/libavcodec/arm/Makefile
index 011404c..277abd9 100644
--- a/ffmpeg/libavcodec/arm/Makefile
+++ b/ffmpeg/libavcodec/arm/Makefile
@@ -1,116 +1,98 @@
ARCH_HEADERS = mathops.h
+OBJS += arm/fmtconvert_init_arm.o
+
+OBJS-$(CONFIG_AAC_DECODER) += arm/aacpsdsp_init_arm.o \
+ arm/sbrdsp_init_arm.o
OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o \
arm/ac3dsp_arm.o
-
-OBJS-$(CONFIG_AAC_DECODER) += arm/sbrdsp_init_arm.o \
- arm/aacpsdsp_init_arm.o
-
-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_init_arm.o \
-
-ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o
-
+OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_init_arm.o
+OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_arm.o \
+ arm/dsputil_arm.o \
+ arm/jrevdct_arm.o \
+ arm/simple_idct_arm.o
+OBJS-$(CONFIG_FFT) += arm/fft_init_arm.o \
+ arm/fft_fixed_init_arm.o
OBJS-$(CONFIG_FLAC_DECODER) += arm/flacdsp_init_arm.o \
- arm/flacdsp_arm.o \
-
+ arm/flacdsp_arm.o
+OBJS-$(CONFIG_H264CHROMA) += arm/h264chroma_init_arm.o
+OBJS-$(CONFIG_H264DSP) += arm/h264dsp_init_arm.o
+OBJS-$(CONFIG_H264PRED) += arm/h264pred_init_arm.o
+OBJS-$(CONFIG_H264QPEL) += arm/h264qpel_init_arm.o
+OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_arm.o \
+ arm/hpeldsp_arm.o
OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o
-ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o
-
OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o
+OBJS-$(CONFIG_VC1_DECODER) += arm/vc1dsp_init_arm.o
OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_init_arm.o
OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o
-OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_init_arm.o
-OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_init_arm.o
+OBJS-$(CONFIG_VP6_DECODER) += arm/vp6dsp_init_arm.o
OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_init_arm.o
-ARMV6-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8_armv6.o \
- arm/vp8dsp_init_armv6.o \
- arm/vp8dsp_armv6.o
-
-OBJS-$(CONFIG_H264CHROMA) += arm/h264chroma_init_arm.o
-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_init_arm.o
-OBJS-$(CONFIG_H264PRED) += arm/h264pred_init_arm.o
-OBJS-$(CONFIG_H264QPEL) += arm/h264qpel_init_arm.o
-
-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_arm.o \
- arm/hpeldsp_init_arm.o
-
OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_init_arm.o
OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_init_arm.o \
- arm/rv40dsp_init_arm.o \
-
+ arm/rv40dsp_init_arm.o
OBJS-$(CONFIG_VIDEODSP) += arm/videodsp_init_arm.o \
-OBJS += arm/dsputil_init_arm.o \
- arm/dsputil_arm.o \
- arm/fft_init_arm.o \
- arm/fft_fixed_init_arm.o \
- arm/fmtconvert_init_arm.o \
- arm/jrevdct_arm.o \
- arm/simple_idct_arm.o \
-
+ARMV5TE-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_armv5te.o \
+ arm/simple_idct_armv5te.o
ARMV5TE-OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_armv5te.o \
- arm/mpegvideo_armv5te_s.o \
-
+ arm/mpegvideo_armv5te_s.o
ARMV5TE-OBJS-$(CONFIG_VIDEODSP) += arm/videodsp_init_armv5te.o \
- arm/videodsp_armv5te.o \
-
-ARMV5TE-OBJS += arm/dsputil_init_armv5te.o \
- arm/simple_idct_armv5te.o \
+ arm/videodsp_armv5te.o
-ARMV6-OBJS += arm/dsputil_init_armv6.o \
+ARMV6-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_armv6.o \
arm/dsputil_armv6.o \
arm/simple_idct_armv6.o \
-ARMV6-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_armv6.o \
- arm/hpeldsp_init_armv6.o
-
-VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o
+ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o
+ARMV6-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_armv6.o
+ARMV6-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_armv6.o \
+ arm/hpeldsp_armv6.o
+ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o
+ARMV6-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8_armv6.o \
+ arm/vp8dsp_init_armv6.o \
+ arm/vp8dsp_armv6.o
-NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \
- arm/fft_fixed_neon.o \
+VFP-OBJS += arm/fmtconvert_vfp.o
-NEON-OBJS-$(CONFIG_MDCT) += arm/mdct_neon.o \
- arm/mdct_fixed_neon.o \
+VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_vfp.o \
+ arm/synth_filter_vfp.o
+VFP-OBJS-$(CONFIG_FFT) += arm/fft_vfp.o
+VFP-OBJS-$(CONFIG_MDCT) += arm/mdct_vfp.o
+VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp_armv6.o
-NEON-OBJS-$(CONFIG_RDFT) += arm/rdft_neon.o \
+NEON-OBJS += arm/fmtconvert_neon.o
+NEON-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_neon.o
+NEON-OBJS-$(CONFIG_AAC_DECODER) += arm/aacpsdsp_neon.o \
+ arm/sbrdsp_neon.o
+NEON-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_neon.o \
+ arm/synth_filter_neon.o
+NEON-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_neon.o \
+ arm/dsputil_neon.o \
+ arm/int_neon.o \
+ arm/simple_idct_neon.o
+NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \
+ arm/fft_fixed_neon.o
NEON-OBJS-$(CONFIG_H264CHROMA) += arm/h264cmc_neon.o
NEON-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_neon.o \
- arm/h264idct_neon.o \
-
-NEON-OBJS-$(CONFIG_H264PRED) += arm/h264pred_neon.o \
-
+ arm/h264idct_neon.o
+NEON-OBJS-$(CONFIG_H264PRED) += arm/h264pred_neon.o
NEON-OBJS-$(CONFIG_H264QPEL) += arm/h264qpel_neon.o \
-
-NEON-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_neon.o \
- arm/hpeldsp_init_neon.o
-
-NEON-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_neon.o
-
-NEON-OBJS-$(CONFIG_AAC_DECODER) += arm/sbrdsp_neon.o \
- arm/aacpsdsp_neon.o
-
-NEON-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_neon.o \
- arm/synth_filter_neon.o \
-
+ arm/hpeldsp_neon.o
+NEON-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_neon.o \
+ arm/hpeldsp_neon.o
+NEON-OBJS-$(CONFIG_MDCT) += arm/mdct_neon.o \
+ arm/mdct_fixed_neon.o
NEON-OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_neon.o
+NEON-OBJS-$(CONFIG_RDFT) += arm/rdft_neon.o
NEON-OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_neon.o
NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_neon.o \
- arm/rv40dsp_neon.o \
-
+ arm/rv40dsp_neon.o
+NEON-OBJS-$(CONFIG_VC1_DECODER) += arm/vc1dsp_init_neon.o \
+ arm/vc1dsp_neon.o
NEON-OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_neon.o
-
NEON-OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_neon.o
-
-NEON-OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_neon.o \
-
-NEON-OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_neon.o \
-
+NEON-OBJS-$(CONFIG_VP6_DECODER) += arm/vp6dsp_neon.o
NEON-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_init_neon.o \
arm/vp8dsp_neon.o
-
-NEON-OBJS += arm/dsputil_init_neon.o \
- arm/dsputil_neon.o \
- arm/fmtconvert_neon.o \
- arm/int_neon.o \
- arm/simple_idct_neon.o \
diff --git a/ffmpeg/libavcodec/arm/aacpsdsp_init_arm.c b/ffmpeg/libavcodec/arm/aacpsdsp_init_arm.c
index 6326376..e04787c 100644
--- a/ffmpeg/libavcodec/arm/aacpsdsp_init_arm.c
+++ b/ffmpeg/libavcodec/arm/aacpsdsp_init_arm.c
@@ -1,20 +1,20 @@
/*
* Copyright (c) 2012 Mans Rullgard
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/ffmpeg/libavcodec/arm/aacpsdsp_neon.S b/ffmpeg/libavcodec/arm/aacpsdsp_neon.S
index fb00900..a93bbfe 100644
--- a/ffmpeg/libavcodec/arm/aacpsdsp_neon.S
+++ b/ffmpeg/libavcodec/arm/aacpsdsp_neon.S
@@ -1,20 +1,20 @@
/*
* Copyright (c) 2012 Mans Rullgard
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/ffmpeg/libavcodec/arm/ac3dsp_arm.S b/ffmpeg/libavcodec/arm/ac3dsp_arm.S
index ed8eb37..1aea190 100644
--- a/ffmpeg/libavcodec/arm/ac3dsp_arm.S
+++ b/ffmpeg/libavcodec/arm/ac3dsp_arm.S
@@ -1,20 +1,20 @@
/*
* Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/ffmpeg/libavcodec/arm/ac3dsp_armv6.S b/ffmpeg/libavcodec/arm/ac3dsp_armv6.S
index 2028d0b..1d2563d 100644
--- a/ffmpeg/libavcodec/arm/ac3dsp_armv6.S
+++ b/ffmpeg/libavcodec/arm/ac3dsp_armv6.S
@@ -1,20 +1,20 @@
/*
* Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/ffmpeg/libavcodec/arm/ac3dsp_init_arm.c b/ffmpeg/libavcodec/arm/ac3dsp_init_arm.c
index ffe0747..a3c32ff 100644
--- a/ffmpeg/libavcodec/arm/ac3dsp_init_arm.c
+++ b/ffmpeg/libavcodec/arm/ac3dsp_init_arm.c
@@ -31,6 +31,8 @@ void ff_ac3_lshift_int16_neon(int16_t *src, unsigned len, unsigned shift);
void ff_ac3_rshift_int32_neon(int32_t *src, unsigned len, unsigned shift);
void ff_float_to_fixed24_neon(int32_t *dst, const float *src, unsigned int len);
void ff_ac3_extract_exponents_neon(uint8_t *exp, int32_t *coef, int nb_coefs);
+void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src,
+ const int16_t *window, unsigned n);
void ff_ac3_sum_square_butterfly_int32_neon(int64_t sum[4],
const int32_t *coef0,
const int32_t *coef1,
@@ -64,6 +66,7 @@ av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact)
c->ac3_rshift_int32 = ff_ac3_rshift_int32_neon;
c->float_to_fixed24 = ff_float_to_fixed24_neon;
c->extract_exponents = ff_ac3_extract_exponents_neon;
+ c->apply_window_int16 = ff_apply_window_int16_neon;
c->sum_square_butterfly_int32 = ff_ac3_sum_square_butterfly_int32_neon;
c->sum_square_butterfly_float = ff_ac3_sum_square_butterfly_float_neon;
}
diff --git a/ffmpeg/libavcodec/arm/ac3dsp_neon.S b/ffmpeg/libavcodec/arm/ac3dsp_neon.S
index 42f35e3..89d0ae8 100644
--- a/ffmpeg/libavcodec/arm/ac3dsp_neon.S
+++ b/ffmpeg/libavcodec/arm/ac3dsp_neon.S
@@ -1,20 +1,20 @@
/*
* Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -109,6 +109,29 @@ function ff_ac3_extract_exponents_neon, export=1
bx lr
endfunc
+function ff_apply_window_int16_neon, export=1
+ push {r4,lr}
+ add r4, r1, r3, lsl #1
+ add lr, r0, r3, lsl #1
+ sub r4, r4, #16
+ sub lr, lr, #16
+ mov r12, #-16
+1:
+ vld1.16 {q0}, [r1,:128]!
+ vld1.16 {q2}, [r2,:128]!
+ vld1.16 {q1}, [r4,:128], r12
+ vrev64.16 q3, q2
+ vqrdmulh.s16 q0, q0, q2
+ vqrdmulh.s16 d2, d2, d7
+ vqrdmulh.s16 d3, d3, d6
+ vst1.16 {q0}, [r0,:128]!
+ vst1.16 {q1}, [lr,:128], r12
+ subs r3, r3, #16
+ bgt 1b
+
+ pop {r4,pc}
+endfunc
+
function ff_ac3_sum_square_butterfly_int32_neon, export=1
vmov.i64 q0, #0
vmov.i64 q1, #0
diff --git a/ffmpeg/libavcodec/arm/dca.h b/ffmpeg/libavcodec/arm/dca.h
index 2cfd18a..35971a8 100644
--- a/ffmpeg/libavcodec/arm/dca.h
+++ b/ffmpeg/libavcodec/arm/dca.h
@@ -1,20 +1,20 @@
/*
* Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -30,50 +30,48 @@
#define decode_blockcodes decode_blockcodes
static inline int decode_blockcodes(int code1, int code2, int levels,
- int *values)
+ int32_t *values)
{
- int v0, v1, v2, v3, v4, v5;
+ int32_t v0, v1, v2, v3, v4, v5;
- __asm__ ("smmul %8, %14, %18 \n"
- "smmul %11, %15, %18 \n"
- "smlabb %14, %8, %17, %14 \n"
- "smlabb %15, %11, %17, %15 \n"
- "smmul %9, %8, %18 \n"
- "smmul %12, %11, %18 \n"
- "sub %14, %14, %16, lsr #1 \n"
- "sub %15, %15, %16, lsr #1 \n"
- "smlabb %8, %9, %17, %8 \n"
- "smlabb %11, %12, %17, %11 \n"
- "smmul %10, %9, %18 \n"
- "smmul %13, %12, %18 \n"
- "str %14, %0 \n"
- "str %15, %4 \n"
- "sub %8, %8, %16, lsr #1 \n"
- "sub %11, %11, %16, lsr #1 \n"
- "smlabb %9, %10, %17, %9 \n"
- "smlabb %12, %13, %17, %12 \n"
- "smmul %14, %10, %18 \n"
- "smmul %15, %13, %18 \n"
- "str %8, %1 \n"
- "str %11, %5 \n"
- "sub %9, %9, %16, lsr #1 \n"
- "sub %12, %12, %16, lsr #1 \n"
- "smlabb %10, %14, %17, %10 \n"
- "smlabb %13, %15, %17, %13 \n"
- "str %9, %2 \n"
- "str %12, %6 \n"
- "sub %10, %10, %16, lsr #1 \n"
- "sub %13, %13, %16, lsr #1 \n"
- "str %10, %3 \n"
- "str %13, %7 \n"
- : "=m"(values[0]), "=m"(values[1]),
- "=m"(values[2]), "=m"(values[3]),
- "=m"(values[4]), "=m"(values[5]),
- "=m"(values[6]), "=m"(values[7]),
- "=&r"(v0), "=&r"(v1), "=&r"(v2),
+ __asm__ ("smmul %0, %6, %10 \n"
+ "smmul %3, %7, %10 \n"
+ "smlabb %6, %0, %9, %6 \n"
+ "smlabb %7, %3, %9, %7 \n"
+ "smmul %1, %0, %10 \n"
+ "smmul %4, %3, %10 \n"
+ "sub %6, %6, %8, lsr #1 \n"
+ "sub %7, %7, %8, lsr #1 \n"
+ "smlabb %0, %1, %9, %0 \n"
+ "smlabb %3, %4, %9, %3 \n"
+ "smmul %2, %1, %10 \n"
+ "smmul %5, %4, %10 \n"
+ "str %6, [%11, #0] \n"
+ "str %7, [%11, #16] \n"
+ "sub %0, %0, %8, lsr #1 \n"
+ "sub %3, %3, %8, lsr #1 \n"
+ "smlabb %1, %2, %9, %1 \n"
+ "smlabb %4, %5, %9, %4 \n"
+ "smmul %6, %2, %10 \n"
+ "smmul %7, %5, %10 \n"
+ "str %0, [%11, #4] \n"
+ "str %3, [%11, #20] \n"
+ "sub %1, %1, %8, lsr #1 \n"
+ "sub %4, %4, %8, lsr #1 \n"
+ "smlabb %2, %6, %9, %2 \n"
+ "smlabb %5, %7, %9, %5 \n"
+ "str %1, [%11, #8] \n"
+ "str %4, [%11, #24] \n"
+ "sub %2, %2, %8, lsr #1 \n"
+ "sub %5, %5, %8, lsr #1 \n"
+ "str %2, [%11, #12] \n"
+ "str %5, [%11, #28] \n"
+ : "=&r"(v0), "=&r"(v1), "=&r"(v2),
"=&r"(v3), "=&r"(v4), "=&r"(v5),
"+&r"(code1), "+&r"(code2)
- : "r"(levels - 1), "r"(-levels), "r"(ff_inverse[levels]));
+ : "r"(levels - 1), "r"(-levels),
+ "r"(ff_inverse[levels]), "r"(values)
+ : "memory");
return code1 | code2;
}
diff --git a/ffmpeg/libavcodec/arm/dcadsp_init_arm.c b/ffmpeg/libavcodec/arm/dcadsp_init_arm.c
index 56568e0..8893f48 100644
--- a/ffmpeg/libavcodec/arm/dcadsp_init_arm.c
+++ b/ffmpeg/libavcodec/arm/dcadsp_init_arm.c
@@ -24,13 +24,47 @@
#include "libavutil/attributes.h"
#include "libavcodec/dcadsp.h"
+void ff_dca_lfe_fir_vfp(float *out, const float *in, const float *coefs,
+ int decifactor, float scale);
+void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act,
+ SynthFilterContext *synth, FFTContext *imdct,
+ float synth_buf_ptr[512],
+ int *synth_buf_offset, float synth_buf2[32],
+ const float window[512], float *samples_out,
+ float raXin[32], float scale);
void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs,
int decifactor, float scale);
+void ff_synth_filter_float_vfp(FFTContext *imdct,
+ float *synth_buf_ptr, int *synth_buf_offset,
+ float synth_buf2[32], const float window[512],
+ float out[32], const float in[32],
+ float scale);
+
+void ff_synth_filter_float_neon(FFTContext *imdct,
+ float *synth_buf_ptr, int *synth_buf_offset,
+ float synth_buf2[32], const float window[512],
+ float out[32], const float in[32],
+ float scale);
+
av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
{
int cpu_flags = av_get_cpu_flags();
+ if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) {
+ s->lfe_fir = ff_dca_lfe_fir_vfp;
+ s->qmf_32_subbands = ff_dca_qmf_32_subbands_vfp;
+ }
if (have_neon(cpu_flags))
s->lfe_fir = ff_dca_lfe_fir_neon;
}
+
+av_cold void ff_synth_filter_init_arm(SynthFilterContext *s)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags))
+ s->synth_filter_float = ff_synth_filter_float_vfp;
+ if (have_neon(cpu_flags))
+ s->synth_filter_float = ff_synth_filter_float_neon;
+}
diff --git a/ffmpeg/libavcodec/arm/dsputil_init_neon.c b/ffmpeg/libavcodec/arm/dsputil_init_neon.c
index 6d19af7..c1f250a 100644
--- a/ffmpeg/libavcodec/arm/dsputil_init_neon.c
+++ b/ffmpeg/libavcodec/arm/dsputil_init_neon.c
@@ -45,9 +45,6 @@ int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int le
int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2,
const int16_t *v3, int len, int mul);
-void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src,
- const int16_t *window, unsigned n);
-
av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
{
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
@@ -76,6 +73,4 @@ av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c->scalarproduct_int16 = ff_scalarproduct_int16_neon;
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_neon;
-
- c->apply_window_int16 = ff_apply_window_int16_neon;
}
diff --git a/ffmpeg/libavcodec/arm/dsputil_neon.S b/ffmpeg/libavcodec/arm/dsputil_neon.S
index 307e122..6c8231e 100644
--- a/ffmpeg/libavcodec/arm/dsputil_neon.S
+++ b/ffmpeg/libavcodec/arm/dsputil_neon.S
@@ -169,29 +169,6 @@ NOVFP ldr r2, [sp]
bx lr
endfunc
-function ff_apply_window_int16_neon, export=1
- push {r4,lr}
- add r4, r1, r3, lsl #1
- add lr, r0, r3, lsl #1
- sub r4, r4, #16
- sub lr, lr, #16
- mov r12, #-16
-1:
- vld1.16 {q0}, [r1,:128]!
- vld1.16 {q2}, [r2,:128]!
- vld1.16 {q1}, [r4,:128], r12
- vrev64.16 q3, q2
- vqrdmulh.s16 q0, q0, q2
- vqrdmulh.s16 d2, d2, d7
- vqrdmulh.s16 d3, d3, d6
- vst1.16 {q0}, [r0,:128]!
- vst1.16 {q1}, [lr,:128], r12
- subs r3, r3, #16
- bgt 1b
-
- pop {r4,pc}
-endfunc
-
function ff_vector_clip_int32_neon, export=1
vdup.32 q0, r2
vdup.32 q1, r3
diff --git a/ffmpeg/libavcodec/arm/fft_fixed_neon.S b/ffmpeg/libavcodec/arm/fft_fixed_neon.S
index fa33eac..d4a38a2 100644
--- a/ffmpeg/libavcodec/arm/fft_fixed_neon.S
+++ b/ffmpeg/libavcodec/arm/fft_fixed_neon.S
@@ -1,20 +1,20 @@
/*
* Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/ffmpeg/libavcodec/arm/fft_init_arm.c b/ffmpeg/libavcodec/arm/fft_init_arm.c
index 8c98abc..7e49b9c 100644
--- a/ffmpeg/libavcodec/arm/fft_init_arm.c
+++ b/ffmpeg/libavcodec/arm/fft_init_arm.c
@@ -26,22 +26,25 @@
void ff_fft_permute_neon(FFTContext *s, FFTComplex *z);
void ff_fft_calc_neon(FFTContext *s, FFTComplex *z);
+void ff_imdct_half_vfp(FFTContext *s, FFTSample *output, const FFTSample *input);
+
void ff_imdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_mdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_rdft_calc_neon(struct RDFTContext *s, FFTSample *z);
-void ff_synth_filter_float_neon(FFTContext *imdct,
- float *synth_buf_ptr, int *synth_buf_offset,
- float synth_buf2[32], const float window[512],
- float out[32], const float in[32],
- float scale);
-
av_cold void ff_fft_init_arm(FFTContext *s)
{
int cpu_flags = av_get_cpu_flags();
+ if (have_vfp(cpu_flags)) {
+#if CONFIG_MDCT
+ if (!have_vfpv3(cpu_flags))
+ s->imdct_half = ff_imdct_half_vfp;
+#endif
+ }
+
if (have_neon(cpu_flags)) {
#if CONFIG_FFT
s->fft_permute = ff_fft_permute_neon;
@@ -65,13 +68,3 @@ av_cold void ff_rdft_init_arm(RDFTContext *s)
s->rdft_calc = ff_rdft_calc_neon;
}
#endif
-
-#if CONFIG_DCA_DECODER
-av_cold void ff_synth_filter_init_arm(SynthFilterContext *s)
-{
- int cpu_flags = av_get_cpu_flags();
-
- if (have_neon(cpu_flags))
- s->synth_filter_float = ff_synth_filter_float_neon;
-}
-#endif
diff --git a/ffmpeg/libavcodec/arm/fmtconvert_init_arm.c b/ffmpeg/libavcodec/arm/fmtconvert_init_arm.c
index 1d99c97..37319ed 100644
--- a/ffmpeg/libavcodec/arm/fmtconvert_init_arm.c
+++ b/ffmpeg/libavcodec/arm/fmtconvert_init_arm.c
@@ -25,9 +25,15 @@
#include "libavcodec/avcodec.h"
#include "libavcodec/fmtconvert.h"
-void ff_int32_to_float_fmul_scalar_neon(float *dst, const int *src,
+void ff_int32_to_float_fmul_scalar_neon(float *dst, const int32_t *src,
float mul, int len);
+void ff_int32_to_float_fmul_scalar_vfp(float *dst, const int32_t *src,
+ float mul, int len);
+void ff_int32_to_float_fmul_array8_vfp(FmtConvertContext *c, float *dst,
+ const int32_t *src, const float *mul,
+ int len);
+
void ff_float_to_int16_neon(int16_t *dst, const float *src, long len);
void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int);
@@ -37,8 +43,15 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx
{
int cpu_flags = av_get_cpu_flags();
- if (have_vfp(cpu_flags) && have_armv6(cpu_flags)) {
- c->float_to_int16 = ff_float_to_int16_vfp;
+ if (have_vfp(cpu_flags)) {
+ if (!have_vfpv3(cpu_flags)) {
+ c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_vfp;
+ c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_vfp;
+ }
+
+ if (have_armv6(cpu_flags)) {
+ c->float_to_int16 = ff_float_to_int16_vfp;
+ }
}
if (have_neon(cpu_flags)) {
diff --git a/ffmpeg/libavcodec/arm/fmtconvert_vfp.S b/ffmpeg/libavcodec/arm/fmtconvert_vfp.S
index 7b012bc..b14af45 100644
--- a/ffmpeg/libavcodec/arm/fmtconvert_vfp.S
+++ b/ffmpeg/libavcodec/arm/fmtconvert_vfp.S
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
+ * Copyright (c) 2013 RISC OS Open Ltd <bavison@riscosopen.org>
*
* This file is part of FFmpeg.
*
@@ -22,57 +22,200 @@
#include "libavutil/arm/asm.S"
/**
- * ARM VFP optimized float to int16 conversion.
- * Assume that len is a positive number and is multiple of 8, destination
- * buffer is at least 4 bytes aligned (8 bytes alignment is better for
- * performance), little-endian byte sex.
+ * ARM VFP optimised int32 to float conversion.
+ * Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned
+ * (16 bytes alignment is best for BCM2835), little-endian.
*/
-@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len)
-function ff_float_to_int16_vfp, export=1
- push {r4-r8,lr}
- vpush {d8-d11}
- vldmia r1!, {s16-s23}
- vcvt.s32.f32 s0, s16
- vcvt.s32.f32 s1, s17
- vcvt.s32.f32 s2, s18
- vcvt.s32.f32 s3, s19
- vcvt.s32.f32 s4, s20
- vcvt.s32.f32 s5, s21
- vcvt.s32.f32 s6, s22
- vcvt.s32.f32 s7, s23
+@ void ff_int32_to_float_fmul_array8_vfp(FmtConvertContext *c, float *dst, const int32_t *src, const float *mul, int len)
+function ff_int32_to_float_fmul_array8_vfp, export=1
+ push {lr}
+ ldr a1, [sp, #4]
+ subs lr, a1, #3*8
+ bcc 50f @ too short to pipeline
+ @ Now need to find (len / 8) % 3. The approximation
+ @ x / 24 = (x * 0xAB) >> 12
+ @ is good for x < 4096, which is true for both AC3 and DCA.
+ mov a1, #0xAB
+ ldr ip, =0x03070000 @ RunFast mode, short vectors of length 8, stride 1
+ mul a1, lr, a1
+ vpush {s16-s31}
+ mov a1, a1, lsr #12
+ add a1, a1, a1, lsl #1
+ rsb a1, a1, lr, lsr #3
+ cmp a1, #1
+ fmrx a1, FPSCR
+ fmxr FPSCR, ip
+ beq 11f
+ blo 10f
+ @ Array is (2 + multiple of 3) x 8 floats long
+ @ drop through...
+ vldmia a3!, {s16-s23}
+ vldmia a4!, {s2,s3}
+ vldmia a3!, {s24-s31}
+ vcvt.f32.s32 s16, s16
+ vcvt.f32.s32 s17, s17
+ vcvt.f32.s32 s18, s18
+ vcvt.f32.s32 s19, s19
+ vcvt.f32.s32 s20, s20
+ vcvt.f32.s32 s21, s21
+ vcvt.f32.s32 s22, s22
+ vcvt.f32.s32 s23, s23
+ vmul.f32 s16, s16, s2
+ @ drop through...
+3:
+ vldmia a3!, {s8-s15}
+ vldmia a4!, {s1}
+ vcvt.f32.s32 s24, s24
+ vcvt.f32.s32 s25, s25
+ vcvt.f32.s32 s26, s26
+ vcvt.f32.s32 s27, s27
+ vcvt.f32.s32 s28, s28
+ vcvt.f32.s32 s29, s29
+ vcvt.f32.s32 s30, s30
+ vcvt.f32.s32 s31, s31
+ vmul.f32 s24, s24, s3
+ vstmia a2!, {s16-s19}
+ vstmia a2!, {s20-s23}
+2:
+ vldmia a3!, {s16-s23}
+ vldmia a4!, {s2}
+ vcvt.f32.s32 s8, s8
+ vcvt.f32.s32 s9, s9
+ vcvt.f32.s32 s10, s10
+ vcvt.f32.s32 s11, s11
+ vcvt.f32.s32 s12, s12
+ vcvt.f32.s32 s13, s13
+ vcvt.f32.s32 s14, s14
+ vcvt.f32.s32 s15, s15
+ vmul.f32 s8, s8, s1
+ vstmia a2!, {s24-s27}
+ vstmia a2!, {s28-s31}
1:
- subs r2, r2, #8
- vmov r3, r4, s0, s1
- vmov r5, r6, s2, s3
- vmov r7, r8, s4, s5
- vmov ip, lr, s6, s7
- it gt
- vldmiagt r1!, {s16-s23}
- ssat r4, #16, r4
- ssat r3, #16, r3
- ssat r6, #16, r6
- ssat r5, #16, r5
- pkhbt r3, r3, r4, lsl #16
- pkhbt r4, r5, r6, lsl #16
- itttt gt
- vcvtgt.s32.f32 s0, s16
- vcvtgt.s32.f32 s1, s17
- vcvtgt.s32.f32 s2, s18
- vcvtgt.s32.f32 s3, s19
- itttt gt
- vcvtgt.s32.f32 s4, s20
- vcvtgt.s32.f32 s5, s21
- vcvtgt.s32.f32 s6, s22
- vcvtgt.s32.f32 s7, s23
- ssat r8, #16, r8
- ssat r7, #16, r7
- ssat lr, #16, lr
- ssat ip, #16, ip
- pkhbt r5, r7, r8, lsl #16
- pkhbt r6, ip, lr, lsl #16
- stmia r0!, {r3-r6}
- bgt 1b
+ vldmia a3!, {s24-s31}
+ vldmia a4!, {s3}
+ vcvt.f32.s32 s16, s16
+ vcvt.f32.s32 s17, s17
+ vcvt.f32.s32 s18, s18
+ vcvt.f32.s32 s19, s19
+ vcvt.f32.s32 s20, s20
+ vcvt.f32.s32 s21, s21
+ vcvt.f32.s32 s22, s22
+ vcvt.f32.s32 s23, s23
+ vmul.f32 s16, s16, s2
+ vstmia a2!, {s8-s11}
+ vstmia a2!, {s12-s15}
- vpop {d8-d11}
- pop {r4-r8,pc}
+ subs lr, lr, #8*3
+ bpl 3b
+
+ vcvt.f32.s32 s24, s24
+ vcvt.f32.s32 s25, s25
+ vcvt.f32.s32 s26, s26
+ vcvt.f32.s32 s27, s27
+ vcvt.f32.s32 s28, s28
+ vcvt.f32.s32 s29, s29
+ vcvt.f32.s32 s30, s30
+ vcvt.f32.s32 s31, s31
+ vmul.f32 s24, s24, s3
+ vstmia a2!, {s16-s19}
+ vstmia a2!, {s20-s23}
+ vstmia a2!, {s24-s27}
+ vstmia a2!, {s28-s31}
+
+ fmxr FPSCR, a1
+ vpop {s16-s31}
+ pop {pc}
+
+10: @ Array is (multiple of 3) x 8 floats long
+ vldmia a3!, {s8-s15}
+ vldmia a4!, {s1,s2}
+ vldmia a3!, {s16-s23}
+ vcvt.f32.s32 s8, s8
+ vcvt.f32.s32 s9, s9
+ vcvt.f32.s32 s10, s10
+ vcvt.f32.s32 s11, s11
+ vcvt.f32.s32 s12, s12
+ vcvt.f32.s32 s13, s13
+ vcvt.f32.s32 s14, s14
+ vcvt.f32.s32 s15, s15
+ vmul.f32 s8, s8, s1
+ b 1b
+
+11: @ Array is (1 + multiple of 3) x 8 floats long
+ vldmia a3!, {s24-s31}
+ vldmia a4!, {s3}
+ vldmia a3!, {s8-s15}
+ vldmia a4!, {s1}
+ vcvt.f32.s32 s24, s24
+ vcvt.f32.s32 s25, s25
+ vcvt.f32.s32 s26, s26
+ vcvt.f32.s32 s27, s27
+ vcvt.f32.s32 s28, s28
+ vcvt.f32.s32 s29, s29
+ vcvt.f32.s32 s30, s30
+ vcvt.f32.s32 s31, s31
+ vmul.f32 s24, s24, s3
+ b 2b
+
+50:
+ ldr lr, =0x03070000 @ RunFast mode, short vectors of length 8, stride 1
+ fmrx ip, FPSCR
+ fmxr FPSCR, lr
+51:
+ vldmia a3!, {s8-s15}
+ vldmia a4!, {s0}
+ vcvt.f32.s32 s8, s8
+ vcvt.f32.s32 s9, s9
+ vcvt.f32.s32 s10, s10
+ vcvt.f32.s32 s11, s11
+ vcvt.f32.s32 s12, s12
+ vcvt.f32.s32 s13, s13
+ vcvt.f32.s32 s14, s14
+ vcvt.f32.s32 s15, s15
+ vmul.f32 s8, s8, s0
+ subs a1, a1, #8
+ vstmia a2!, {s8-s11}
+ vstmia a2!, {s12-s15}
+ bne 51b
+
+ fmxr FPSCR, ip
+ pop {pc}
+endfunc
+
+/**
+ * ARM VFP optimised int32 to float conversion.
+ * Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned
+ * (16 bytes alignment is best for BCM2835), little-endian.
+ * TODO: could be further optimised by unrolling and interleaving, as above
+ */
+@ void ff_int32_to_float_fmul_scalar_vfp(float *dst, const int32_t *src, float mul, int len)
+function ff_int32_to_float_fmul_scalar_vfp, export=1
+VFP tmp .req a4
+VFP len .req a3
+NOVFP tmp .req a3
+NOVFP len .req a4
+NOVFP vmov s0, a3
+ ldr tmp, =0x03070000 @ RunFast mode, short vectors of length 8, stride 1
+ fmrx ip, FPSCR
+ fmxr FPSCR, tmp
+1:
+ vldmia a2!, {s8-s15}
+ vcvt.f32.s32 s8, s8
+ vcvt.f32.s32 s9, s9
+ vcvt.f32.s32 s10, s10
+ vcvt.f32.s32 s11, s11
+ vcvt.f32.s32 s12, s12
+ vcvt.f32.s32 s13, s13
+ vcvt.f32.s32 s14, s14
+ vcvt.f32.s32 s15, s15
+ vmul.f32 s8, s8, s0
+ subs len, len, #8
+ vstmia a1!, {s8-s11}
+ vstmia a1!, {s12-s15}
+ bne 1b
+
+ fmxr FPSCR, ip
+ bx lr
endfunc
+ .unreq tmp
+ .unreq len
diff --git a/ffmpeg/libavcodec/arm/h264cmc_neon.S b/ffmpeg/libavcodec/arm/h264cmc_neon.S
index 3427e36..0bcae11 100644
--- a/ffmpeg/libavcodec/arm/h264cmc_neon.S
+++ b/ffmpeg/libavcodec/arm/h264cmc_neon.S
@@ -1,20 +1,20 @@
/*
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -39,6 +39,9 @@ function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
add r6, r6, r7, lsl #1
vld1.16 {d22[],d23[]}, [r6,:16]
.endif
+ .ifc \codec,vc1
+ vmov.u16 q11, #28
+ .endif
A muls r7, r4, r5
T mul r7, r4, r5
@@ -183,6 +186,9 @@ function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
add r6, r6, r7, lsl #1
vld1.16 {d22[],d23[]}, [r6,:16]
.endif
+ .ifc \codec,vc1
+ vmov.u16 q11, #28
+ .endif
A muls r7, r4, r5
T mul r7, r4, r5
@@ -376,14 +382,12 @@ function ff_\type\()_h264_chroma_mc2_neon, export=1
endfunc
.endm
-#if CONFIG_H264_DECODER
h264_chroma_mc8 put
h264_chroma_mc8 avg
h264_chroma_mc4 put
h264_chroma_mc4 avg
h264_chroma_mc2 put
h264_chroma_mc2 avg
-#endif
#if CONFIG_RV40_DECODER
const rv40bias
@@ -398,3 +402,10 @@ endconst
h264_chroma_mc4 put, rv40
h264_chroma_mc4 avg, rv40
#endif
+
+#if CONFIG_VC1_DECODER
+ h264_chroma_mc8 put, vc1
+ h264_chroma_mc8 avg, vc1
+ h264_chroma_mc4 put, vc1
+ h264_chroma_mc4 avg, vc1
+#endif
diff --git a/ffmpeg/libavcodec/arm/h264dsp_init_arm.c b/ffmpeg/libavcodec/arm/h264dsp_init_arm.c
index 785b604..2cafbaf 100644
--- a/ffmpeg/libavcodec/arm/h264dsp_init_arm.c
+++ b/ffmpeg/libavcodec/arm/h264dsp_init_arm.c
@@ -24,6 +24,8 @@
#include "libavutil/arm/cpu.h"
#include "libavcodec/h264dsp.h"
+int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size);
+
void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
int beta, int8_t *tc0);
void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
@@ -68,8 +70,8 @@ void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset,
int16_t *block, int stride,
const uint8_t nnzc[6*8]);
-static av_cold void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth,
- const int chroma_format_idc)
+static av_cold void h264dsp_init_neon(H264DSPContext *c, const int bit_depth,
+ const int chroma_format_idc)
{
#if HAVE_NEON
if (bit_depth == 8) {
@@ -106,6 +108,8 @@ av_cold void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth,
{
int cpu_flags = av_get_cpu_flags();
+ if (have_armv6(cpu_flags))
+ c->h264_find_start_code_candidate = ff_h264_find_start_code_candidate_armv6;
if (have_neon(cpu_flags))
- ff_h264dsp_init_neon(c, bit_depth, chroma_format_idc);
+ h264dsp_init_neon(c, bit_depth, chroma_format_idc);
}
diff --git a/ffmpeg/libavcodec/arm/h264idct_neon.S b/ffmpeg/libavcodec/arm/h264idct_neon.S
index fa5b90c..2edeca2 100644
--- a/ffmpeg/libavcodec/arm/h264idct_neon.S
+++ b/ffmpeg/libavcodec/arm/h264idct_neon.S
@@ -187,8 +187,8 @@ endfunc
vshr.s16 q2, q10, #1
vadd.i16 q0, q8, q12
vld1.16 {q14-q15},[r1,:128]
- vst1.16 {q7}, [r1,:128]!
- vst1.16 {q7}, [r1,:128]!
+ vst1.16 {q3}, [r1,:128]!
+ vst1.16 {q3}, [r1,:128]!
vsub.i16 q1, q8, q12
vshr.s16 q3, q14, #1
vsub.i16 q2, q2, q14
@@ -267,16 +267,16 @@ endfunc
.endm
function ff_h264_idct8_add_neon, export=1
- vmov.i16 q7, #0
+ vmov.i16 q3, #0
vld1.16 {q8-q9}, [r1,:128]
- vst1.16 {q7}, [r1,:128]!
- vst1.16 {q7}, [r1,:128]!
+ vst1.16 {q3}, [r1,:128]!
+ vst1.16 {q3}, [r1,:128]!
vld1.16 {q10-q11},[r1,:128]
- vst1.16 {q7}, [r1,:128]!
- vst1.16 {q7}, [r1,:128]!
+ vst1.16 {q3}, [r1,:128]!
+ vst1.16 {q3}, [r1,:128]!
vld1.16 {q12-q13},[r1,:128]
- vst1.16 {q7}, [r1,:128]!
- vst1.16 {q7}, [r1,:128]!
+ vst1.16 {q3}, [r1,:128]!
+ vst1.16 {q3}, [r1,:128]!
idct8x8_cols 0
idct8x8_cols 1
diff --git a/ffmpeg/libavcodec/arm/h264pred_init_arm.c b/ffmpeg/libavcodec/arm/h264pred_init_arm.c
index 5ec39ce..1562f0b 100644
--- a/ffmpeg/libavcodec/arm/h264pred_init_arm.c
+++ b/ffmpeg/libavcodec/arm/h264pred_init_arm.c
@@ -45,9 +45,9 @@ void ff_pred8x8_0lt_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_l00_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_0l0_dc_neon(uint8_t *src, ptrdiff_t stride);
-static av_cold void ff_h264_pred_init_neon(H264PredContext *h, int codec_id,
- const int bit_depth,
- const int chroma_format_idc)
+static av_cold void h264_pred_init_neon(H264PredContext *h, int codec_id,
+ const int bit_depth,
+ const int chroma_format_idc)
{
#if HAVE_NEON
const int high_depth = bit_depth > 8;
@@ -88,5 +88,5 @@ av_cold void ff_h264_pred_init_arm(H264PredContext *h, int codec_id,
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags))
- ff_h264_pred_init_neon(h, codec_id, bit_depth, chroma_format_idc);
+ h264_pred_init_neon(h, codec_id, bit_depth, chroma_format_idc);
}
diff --git a/ffmpeg/libavcodec/arm/hpeldsp_arm.h b/ffmpeg/libavcodec/arm/hpeldsp_arm.h
index e79bc6f..3f18c62 100644
--- a/ffmpeg/libavcodec/arm/hpeldsp_arm.h
+++ b/ffmpeg/libavcodec/arm/hpeldsp_arm.h
@@ -23,7 +23,7 @@
#include "libavcodec/hpeldsp.h"
-void ff_hpeldsp_init_armv6(HpelDSPContext* c, int flags);
+void ff_hpeldsp_init_armv6(HpelDSPContext *c, int flags);
void ff_hpeldsp_init_neon(HpelDSPContext *c, int flags);
#endif /* AVCODEC_ARM_HPELDSP_H */
diff --git a/ffmpeg/libavcodec/arm/hpeldsp_init_arm.c b/ffmpeg/libavcodec/arm/hpeldsp_init_arm.c
index bae93eb..2cc2b78 100644
--- a/ffmpeg/libavcodec/arm/hpeldsp_init_arm.c
+++ b/ffmpeg/libavcodec/arm/hpeldsp_init_arm.c
@@ -20,7 +20,9 @@
*/
#include "libavutil/arm/cpu.h"
+#include "libavutil/attributes.h"
#include "libavcodec/bit_depth_template.c" // for CALL_2X_PIXELS
+#include "libavcodec/rnd_avg.h"
#include "hpeldsp_arm.h"
void ff_put_pixels8_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
@@ -41,7 +43,7 @@ CALL_2X_PIXELS(ff_put_no_rnd_pixels16_x2_arm, ff_put_no_rnd_pixels8_x2_arm, 8)
CALL_2X_PIXELS(ff_put_no_rnd_pixels16_y2_arm, ff_put_no_rnd_pixels8_y2_arm, 8)
CALL_2X_PIXELS(ff_put_no_rnd_pixels16_xy2_arm, ff_put_no_rnd_pixels8_xy2_arm,8)
-void ff_hpeldsp_init_arm(HpelDSPContext* c, int flags)
+av_cold void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags)
{
int cpu_flags = av_get_cpu_flags();
@@ -63,6 +65,8 @@ void ff_hpeldsp_init_arm(HpelDSPContext* c, int flags)
c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_arm;
c->put_no_rnd_pixels_tab[1][3] = ff_put_no_rnd_pixels8_xy2_arm;
- if (have_armv6(cpu_flags)) ff_hpeldsp_init_armv6(c, flags);
- if (have_neon(cpu_flags)) ff_hpeldsp_init_neon(c, flags);
+ if (have_armv6(cpu_flags))
+ ff_hpeldsp_init_armv6(c, flags);
+ if (have_neon(cpu_flags))
+ ff_hpeldsp_init_neon(c, flags);
}
diff --git a/ffmpeg/libavcodec/arm/hpeldsp_init_armv6.c b/ffmpeg/libavcodec/arm/hpeldsp_init_armv6.c
index da4caf8..967a8e0 100644
--- a/ffmpeg/libavcodec/arm/hpeldsp_init_armv6.c
+++ b/ffmpeg/libavcodec/arm/hpeldsp_init_armv6.c
@@ -18,6 +18,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include <stddef.h>
#include <stdint.h>
#include "libavutil/attributes.h"
diff --git a/ffmpeg/libavcodec/arm/hpeldsp_init_neon.c b/ffmpeg/libavcodec/arm/hpeldsp_init_neon.c
index d577735..d9feadd 100644
--- a/ffmpeg/libavcodec/arm/hpeldsp_init_neon.c
+++ b/ffmpeg/libavcodec/arm/hpeldsp_init_neon.c
@@ -19,8 +19,10 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include <stddef.h>
#include <stdint.h>
+#include "libavutil/attributes.h"
#include "hpeldsp_arm.h"
void ff_put_pixels16_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
@@ -50,7 +52,7 @@ void ff_avg_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
-void ff_hpeldsp_init_neon(HpelDSPContext *c, int flags)
+av_cold void ff_hpeldsp_init_neon(HpelDSPContext *c, int flags)
{
c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_neon;
diff --git a/ffmpeg/libavcodec/arm/int_neon.S b/ffmpeg/libavcodec/arm/int_neon.S
index 6b28a97..b3f5a69 100644
--- a/ffmpeg/libavcodec/arm/int_neon.S
+++ b/ffmpeg/libavcodec/arm/int_neon.S
@@ -1,6 +1,6 @@
/*
* ARM NEON optimised integer operations
- * Copyright (c) 2009 Kostya Shishkov
+ * Copyright (c) 2009 Konstantin Shishkov
*
* This file is part of FFmpeg.
*
@@ -41,10 +41,10 @@ function ff_scalarproduct_int16_neon, export=1
vpadd.s32 d16, d0, d1
vpadd.s32 d17, d2, d3
- vpadd.s32 d10, d4, d5
- vpadd.s32 d11, d6, d7
+ vpadd.s32 d18, d4, d5
+ vpadd.s32 d19, d6, d7
vpadd.s32 d0, d16, d17
- vpadd.s32 d1, d10, d11
+ vpadd.s32 d1, d18, d19
vpadd.s32 d2, d0, d1
vpaddl.s32 d3, d2
vmov.32 r0, d3[0]
@@ -81,10 +81,10 @@ function ff_scalarproduct_and_madd_int16_neon, export=1
vpadd.s32 d16, d0, d1
vpadd.s32 d17, d2, d3
- vpadd.s32 d10, d4, d5
- vpadd.s32 d11, d6, d7
+ vpadd.s32 d18, d4, d5
+ vpadd.s32 d19, d6, d7
vpadd.s32 d0, d16, d17
- vpadd.s32 d1, d10, d11
+ vpadd.s32 d1, d18, d19
vpadd.s32 d2, d0, d1
vpaddl.s32 d3, d2
vmov.32 r0, d3[0]
diff --git a/ffmpeg/libavcodec/arm/mdct_fixed_neon.S b/ffmpeg/libavcodec/arm/mdct_fixed_neon.S
index c77be59..365c5e7 100644
--- a/ffmpeg/libavcodec/arm/mdct_fixed_neon.S
+++ b/ffmpeg/libavcodec/arm/mdct_fixed_neon.S
@@ -1,20 +1,20 @@
/*
* Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/ffmpeg/libavcodec/arm/mpegaudiodsp_fixed_armv6.S b/ffmpeg/libavcodec/arm/mpegaudiodsp_fixed_armv6.S
index 49bd0bc..977abb6 100644
--- a/ffmpeg/libavcodec/arm/mpegaudiodsp_fixed_armv6.S
+++ b/ffmpeg/libavcodec/arm/mpegaudiodsp_fixed_armv6.S
@@ -1,20 +1,20 @@
/*
* Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/ffmpeg/libavcodec/arm/mpegaudiodsp_init_arm.c b/ffmpeg/libavcodec/arm/mpegaudiodsp_init_arm.c
index e73aee6..98e0c8a 100644
--- a/ffmpeg/libavcodec/arm/mpegaudiodsp_init_arm.c
+++ b/ffmpeg/libavcodec/arm/mpegaudiodsp_init_arm.c
@@ -1,20 +1,20 @@
/*
* Copyright (c) 2011 Mans Rullgard
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/ffmpeg/libavcodec/arm/neon.S b/ffmpeg/libavcodec/arm/neon.S
index 716a607..787bc4b 100644
--- a/ffmpeg/libavcodec/arm/neon.S
+++ b/ffmpeg/libavcodec/arm/neon.S
@@ -1,20 +1,20 @@
/*
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/ffmpeg/libavcodec/arm/rv34dsp_neon.S b/ffmpeg/libavcodec/arm/rv34dsp_neon.S
index a29123f..3d4a83d 100644
--- a/ffmpeg/libavcodec/arm/rv34dsp_neon.S
+++ b/ffmpeg/libavcodec/arm/rv34dsp_neon.S
@@ -1,20 +1,20 @@
/*
* Copyright (c) 2011 Janne Grunau <janne-libav@jannau.net>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/ffmpeg/libavcodec/arm/rv40dsp_init_arm.c b/ffmpeg/libavcodec/arm/rv40dsp_init_arm.c
index fec3702..3bf9ac7 100644
--- a/ffmpeg/libavcodec/arm/rv40dsp_init_arm.c
+++ b/ffmpeg/libavcodec/arm/rv40dsp_init_arm.c
@@ -70,7 +70,7 @@ void ff_rv40_v_weak_loop_filter_neon(uint8_t *src, ptrdiff_t stride, int filter_
int filter_q1, int alpha, int beta,
int lim_p0q0, int lim_q1, int lim_p1);
-static av_cold void ff_rv40dsp_init_neon(RV34DSPContext *c)
+static av_cold void rv40dsp_init_neon(RV34DSPContext *c)
{
c->put_pixels_tab[0][ 1] = ff_put_rv40_qpel16_mc10_neon;
c->put_pixels_tab[0][ 3] = ff_put_rv40_qpel16_mc30_neon;
@@ -144,5 +144,5 @@ av_cold void ff_rv40dsp_init_arm(RV34DSPContext *c)
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags))
- ff_rv40dsp_init_neon(c);
+ rv40dsp_init_neon(c);
}
diff --git a/ffmpeg/libavcodec/arm/rv40dsp_neon.S b/ffmpeg/libavcodec/arm/rv40dsp_neon.S
index 6bd45eb..099f88c 100644
--- a/ffmpeg/libavcodec/arm/rv40dsp_neon.S
+++ b/ffmpeg/libavcodec/arm/rv40dsp_neon.S
@@ -2,20 +2,20 @@
* Copyright (c) 2011 Janne Grunau <janne-libav@jannau.net>
* Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/ffmpeg/libavcodec/arm/sbrdsp_init_arm.c b/ffmpeg/libavcodec/arm/sbrdsp_init_arm.c
index 4da7967..4fb69f9 100644
--- a/ffmpeg/libavcodec/arm/sbrdsp_init_arm.c
+++ b/ffmpeg/libavcodec/arm/sbrdsp_init_arm.c
@@ -1,20 +1,20 @@
/*
* Copyright (c) 2012 Mans Rullgard
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/ffmpeg/libavcodec/arm/sbrdsp_neon.S b/ffmpeg/libavcodec/arm/sbrdsp_neon.S
index 610397f..e66abd6 100644
--- a/ffmpeg/libavcodec/arm/sbrdsp_neon.S
+++ b/ffmpeg/libavcodec/arm/sbrdsp_neon.S
@@ -1,20 +1,20 @@
/*
* Copyright (c) 2012 Mans Rullgard
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/ffmpeg/libavcodec/arm/simple_idct_arm.S b/ffmpeg/libavcodec/arm/simple_idct_arm.S
index dd1c815..50d20c9 100644
--- a/ffmpeg/libavcodec/arm/simple_idct_arm.S
+++ b/ffmpeg/libavcodec/arm/simple_idct_arm.S
@@ -83,7 +83,7 @@ __row_loop:
orrs r5, r5, r7 @ R5=R4 | R3 | R2 | R7
beq __almost_empty_row
-__b_evaluation:
+@@ __b_evaluation:
@@ at this point, R0=block (temp), R1(free), R2=ROWr32[1], R3=ROWr32[2], R4=ROWr32[3],
@@ R5=(temp), R6=ROWr16[0], R7=ROWr16[1], R8-R11 free,
@@ R12=__const_ptr_, R14=&block[n]
@@ -159,7 +159,7 @@ __end_b_evaluation:
@@ R5=b2, R6=ROWr16[0], R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
@@ R12=__const_ptr_, R14=&block[n]
-__a_evaluation:
+@@ __a_evaluation:
@@ a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
@@ a1 = a0 + W6 * row[2];
@@ a2 = a0 - W6 * row[2];
@@ -295,7 +295,7 @@ __end_row_loop:
add r14, r0, #14 @ R14=&block[7], better start from the last col, and decrease the value until col=0, i.e. R14=block.
__col_loop:
-__b_evaluation2:
+@@ __b_evaluation2:
@@ at this point, R0=block (temp), R1-R11 (free)
@@ R12=__const_ptr_, R14=&block[n]
@@ proceed with b0-b3 first, followed by a0-a3
@@ -357,12 +357,12 @@ __b_evaluation2:
it ne
mlane r1, r10, r4, r1 @ R1-=W5*ROWr16[7x8]=b1
@@ R4 is free now
-__end_b_evaluation2:
+@@ __end_b_evaluation2:
@@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free),
@@ R5=b2, R6 (free), R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
@@ R12=__const_ptr_, R14=&block[n]
-__a_evaluation2:
+@@ __a_evaluation2:
@@ a0 = (W4 * col[8x0]) + (1 << (COL_SHIFT - 1));
@@ a1 = a0 + W6 * row[2];
@@ a2 = a0 - W6 * row[2];
@@ -414,7 +414,7 @@ __a_evaluation2:
itt ne
subne r2, r2, r10 @ R2-=W2*ROWr16[6] (a1)
addne r3, r3, r10 @ R3+=W2*ROWr16[6] (a2)
-__end_a_evaluation2:
+@@ __end_a_evaluation2:
@@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3,
@@ R5=b2, R6=a0, R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
@@ R12=__const_ptr_, R14=&block[n]
@@ -452,7 +452,7 @@ __end_a_evaluation2:
strh r8, [r14, #96]
strh r9, [r14, #112]
-__end_col_loop:
+@@ __end_col_loop:
@@ at this point, R0-R11 (free)
@@ R12=__const_ptr_, R14=&block[n]
ldr r0, [sp, #0] @ R0=block
@@ -463,7 +463,7 @@ __end_col_loop:
-__end_simple_idct_arm:
+@@ __end_simple_idct_arm:
@@ restore registers to previous status!
add sp, sp, #8 @@ the local variables!
ldmfd sp!, {r4-r11, r15} @@ update PC with LR content.
diff --git a/ffmpeg/libavcodec/arm/vp56dsp_init_arm.c b/ffmpeg/libavcodec/arm/vp56dsp_init_arm.c
deleted file mode 100644
index f53cbae..0000000
--- a/ffmpeg/libavcodec/arm/vp56dsp_init_arm.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdint.h>
-
-#include "libavutil/attributes.h"
-#include "libavutil/arm/cpu.h"
-#include "libavcodec/avcodec.h"
-#include "libavcodec/vp56dsp.h"
-
-void ff_vp6_edge_filter_hor_neon(uint8_t *yuv, int stride, int t);
-void ff_vp6_edge_filter_ver_neon(uint8_t *yuv, int stride, int t);
-
-av_cold void ff_vp56dsp_init_arm(VP56DSPContext *s, enum AVCodecID codec)
-{
- int cpu_flags = av_get_cpu_flags();
-
- if (codec != AV_CODEC_ID_VP5 && have_neon(cpu_flags)) {
- s->edge_filter_hor = ff_vp6_edge_filter_hor_neon;
- s->edge_filter_ver = ff_vp6_edge_filter_ver_neon;
- }
-}
diff --git a/ffmpeg/libavcodec/arm/vp56dsp_neon.S b/ffmpeg/libavcodec/arm/vp56dsp_neon.S
deleted file mode 100644
index 03dd28d..0000000
--- a/ffmpeg/libavcodec/arm/vp56dsp_neon.S
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/arm/asm.S"
-
-.macro vp6_edge_filter
- vdup.16 q3, r2 @ t
- vmov.i16 q13, #1
- vsubl.u8 q0, d20, d18 @ p[ 0] - p[-s]
- vsubl.u8 q1, d16, d22 @ p[-2*s] - p[ s]
- vsubl.u8 q14, d21, d19
- vsubl.u8 q15, d17, d23
- vadd.i16 q2, q0, q0 @ 2*(p[0]-p[-s])
- vadd.i16 d29, d28, d28
- vadd.i16 q0, q0, q1 @ p[0]-p[-s] + p[-2*s]-p[s]
- vadd.i16 d28, d28, d30
- vadd.i16 q0, q0, q2 @ 3*(p[0]-p[-s]) + p[-2*s]-p[s]
- vadd.i16 d28, d28, d29
- vrshr.s16 q0, q0, #3 @ v
- vrshr.s16 d28, d28, #3
- vsub.i16 q8, q3, q13 @ t-1
- vabs.s16 q1, q0 @ V
- vshr.s16 q2, q0, #15 @ s
- vabs.s16 d30, d28
- vshr.s16 d29, d28, #15
- vsub.i16 q12, q1, q3 @ V-t
- vsub.i16 d31, d30, d6
- vsub.i16 q12, q12, q13 @ V-t-1
- vsub.i16 d31, d31, d26
- vcge.u16 q12, q12, q8 @ V-t-1 >= t-1
- vcge.u16 d31, d31, d16
- vadd.i16 q13, q3, q3 @ 2*t
- vadd.i16 d16, d6, d6
- vsub.i16 q13, q13, q1 @ 2*t - V
- vsub.i16 d16, d16, d30
- vadd.i16 q13, q13, q2 @ += s
- vadd.i16 d16, d16, d29
- veor q13, q13, q2 @ ^= s
- veor d16, d16, d29
- vbif q0, q13, q12
- vbif d28, d16, d31
- vmovl.u8 q1, d20
- vmovl.u8 q15, d21
- vaddw.u8 q2, q0, d18
- vaddw.u8 q3, q14, d19
- vsub.i16 q1, q1, q0
- vsub.i16 d30, d30, d28
- vqmovun.s16 d18, q2
- vqmovun.s16 d19, q3
- vqmovun.s16 d20, q1
- vqmovun.s16 d21, q15
-.endm
-
-function ff_vp6_edge_filter_ver_neon, export=1
- sub r0, r0, r1, lsl #1
- vld1.8 {q8}, [r0], r1 @ p[-2*s]
- vld1.8 {q9}, [r0], r1 @ p[-s]
- vld1.8 {q10}, [r0], r1 @ p[0]
- vld1.8 {q11}, [r0] @ p[s]
- vp6_edge_filter
- sub r0, r0, r1, lsl #1
- sub r1, r1, #8
- vst1.8 {d18}, [r0]!
- vst1.32 {d19[0]}, [r0], r1
- vst1.8 {d20}, [r0]!
- vst1.32 {d21[0]}, [r0]
- bx lr
-endfunc
-
-function ff_vp6_edge_filter_hor_neon, export=1
- sub r3, r0, #1
- sub r0, r0, #2
- vld1.32 {d16[0]}, [r0], r1
- vld1.32 {d18[0]}, [r0], r1
- vld1.32 {d20[0]}, [r0], r1
- vld1.32 {d22[0]}, [r0], r1
- vld1.32 {d16[1]}, [r0], r1
- vld1.32 {d18[1]}, [r0], r1
- vld1.32 {d20[1]}, [r0], r1
- vld1.32 {d22[1]}, [r0], r1
- vld1.32 {d17[0]}, [r0], r1
- vld1.32 {d19[0]}, [r0], r1
- vld1.32 {d21[0]}, [r0], r1
- vld1.32 {d23[0]}, [r0], r1
- vtrn.8 q8, q9
- vtrn.8 q10, q11
- vtrn.16 q8, q10
- vtrn.16 q9, q11
- vp6_edge_filter
- vtrn.8 q9, q10
- vst1.16 {d18[0]}, [r3], r1
- vst1.16 {d20[0]}, [r3], r1
- vst1.16 {d18[1]}, [r3], r1
- vst1.16 {d20[1]}, [r3], r1
- vst1.16 {d18[2]}, [r3], r1
- vst1.16 {d20[2]}, [r3], r1
- vst1.16 {d18[3]}, [r3], r1
- vst1.16 {d20[3]}, [r3], r1
- vst1.16 {d19[0]}, [r3], r1
- vst1.16 {d21[0]}, [r3], r1
- vst1.16 {d19[1]}, [r3], r1
- vst1.16 {d21[1]}, [r3], r1
- bx lr
-endfunc
diff --git a/ffmpeg/libavcodec/arm/vp8dsp.h b/ffmpeg/libavcodec/arm/vp8dsp.h
index ce00e4a..6041ef1 100644
--- a/ffmpeg/libavcodec/arm/vp8dsp.h
+++ b/ffmpeg/libavcodec/arm/vp8dsp.h
@@ -1,18 +1,18 @@
/*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/ffmpeg/libavcodec/arm/vp8dsp_armv6.S b/ffmpeg/libavcodec/arm/vp8dsp_armv6.S
index 5207758..a14b188 100644
--- a/ffmpeg/libavcodec/arm/vp8dsp_armv6.S
+++ b/ffmpeg/libavcodec/arm/vp8dsp_armv6.S
@@ -5,20 +5,20 @@
* Copyright (c) 2010 Rob Clark <rob@ti.com>
* Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* This code was partially ported from libvpx, which uses this license:
diff --git a/ffmpeg/libavcodec/arm/vp8dsp_init_armv6.c b/ffmpeg/libavcodec/arm/vp8dsp_init_armv6.c
index e15e191..563268e 100644
--- a/ffmpeg/libavcodec/arm/vp8dsp_init_armv6.c
+++ b/ffmpeg/libavcodec/arm/vp8dsp_init_armv6.c
@@ -1,18 +1,18 @@
/*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/ffmpeg/libavcodec/arm/vp8dsp_init_neon.c b/ffmpeg/libavcodec/arm/vp8dsp_init_neon.c
index 0468181..ae045a6 100644
--- a/ffmpeg/libavcodec/arm/vp8dsp_init_neon.c
+++ b/ffmpeg/libavcodec/arm/vp8dsp_init_neon.c
@@ -1,18 +1,18 @@
/*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/ffmpeg/libavcodec/arm/vp8dsp_neon.S b/ffmpeg/libavcodec/arm/vp8dsp_neon.S
index 04e7c5c..436b340 100644
--- a/ffmpeg/libavcodec/arm/vp8dsp_neon.S
+++ b/ffmpeg/libavcodec/arm/vp8dsp_neon.S
@@ -1576,18 +1576,19 @@ endconst
/* Bilinear MC */
function ff_put_vp8_bilin16_h_neon, export=1
- ldr r3, [sp, #4] @ mx
- rsb r12, r3, #8
- vdup.8 d0, r3
+ push {lr}
+ ldr lr, [sp, #8] @ mx
+ rsb r12, lr, #8
+ vdup.8 d0, lr
vdup.8 d1, r12
- ldr r12, [sp] @ h
+ ldr r12, [sp, #4] @ h
1:
subs r12, r12, #2
- vld1.8 {d2-d4}, [r2], r1
+ vld1.8 {d2-d4}, [r2], r3
vext.8 q2, q1, q2, #1
vmull.u8 q8, d2, d1
vmlal.u8 q8, d4, d0
- vld1.8 {d18-d20},[r2], r1
+ vld1.8 {d18-d20},[r2], r3
vmull.u8 q3, d3, d1
vmlal.u8 q3, d5, d0
vext.8 q10, q9, q10, #1
@@ -1603,24 +1604,25 @@ function ff_put_vp8_bilin16_h_neon, export=1
vst1.8 {q3}, [r0,:128], r1
bgt 1b
- bx lr
+ pop {pc}
endfunc
function ff_put_vp8_bilin16_v_neon, export=1
- ldr r3, [sp, #8] @ my
- rsb r12, r3, #8
- vdup.8 d0, r3
+ push {lr}
+ ldr lr, [sp, #12] @ my
+ rsb r12, lr, #8
+ vdup.8 d0, lr
vdup.8 d1, r12
- ldr r12, [sp] @ h
- vld1.8 {q1}, [r2], r1
+ ldr r12, [sp, #4] @ h
+ vld1.8 {q1}, [r2], r3
1:
subs r12, r12, #2
- vld1.8 {q2}, [r2], r1
+ vld1.8 {q2}, [r2], r3
vmull.u8 q3, d2, d1
vmlal.u8 q3, d4, d0
vmull.u8 q8, d3, d1
vmlal.u8 q8, d5, d0
- vld1.8 {q1}, [r2], r1
+ vld1.8 {q1}, [r2], r3
vmull.u8 q9, d4, d1
vmlal.u8 q9, d2, d0
vmull.u8 q10, d5, d1
@@ -1633,21 +1635,22 @@ function ff_put_vp8_bilin16_v_neon, export=1
vst1.8 {q3}, [r0,:128], r1
bgt 1b
- bx lr
+ pop {pc}
endfunc
function ff_put_vp8_bilin16_hv_neon, export=1
- ldr r3, [sp, #4] @ mx
- rsb r12, r3, #8
- vdup.8 d0, r3
+ push {lr}
+ ldr lr, [sp, #8] @ mx
+ rsb r12, lr, #8
+ vdup.8 d0, lr
vdup.8 d1, r12
- ldr r3, [sp, #8] @ my
- rsb r12, r3, #8
- vdup.8 d2, r3
+ ldr lr, [sp, #12] @ my
+ rsb r12, lr, #8
+ vdup.8 d2, lr
vdup.8 d3, r12
- ldr r12, [sp] @ h
+ ldr r12, [sp, #4] @ h
- vld1.8 {d4-d6}, [r2], r1
+ vld1.8 {d4-d6}, [r2], r3
vext.8 q3, q2, q3, #1
vmull.u8 q8, d4, d1
vmlal.u8 q8, d6, d0
@@ -1657,11 +1660,11 @@ function ff_put_vp8_bilin16_hv_neon, export=1
vrshrn.u16 d5, q9, #3
1:
subs r12, r12, #2
- vld1.8 {d18-d20},[r2], r1
+ vld1.8 {d18-d20},[r2], r3
vext.8 q10, q9, q10, #1
vmull.u8 q11, d18, d1
vmlal.u8 q11, d20, d0
- vld1.8 {d26-d28},[r2], r1
+ vld1.8 {d26-d28},[r2], r3
vmull.u8 q12, d19, d1
vmlal.u8 q12, d21, d0
vext.8 q14, q13, q14, #1
@@ -1689,22 +1692,23 @@ function ff_put_vp8_bilin16_hv_neon, export=1
vst1.8 {q10}, [r0,:128], r1
bgt 1b
- bx lr
+ pop {pc}
endfunc
function ff_put_vp8_bilin8_h_neon, export=1
- ldr r3, [sp, #4] @ mx
- rsb r12, r3, #8
- vdup.8 d0, r3
+ push {lr}
+ ldr lr, [sp, #8] @ mx
+ rsb r12, lr, #8
+ vdup.8 d0, lr
vdup.8 d1, r12
- ldr r12, [sp] @ h
+ ldr r12, [sp, #4] @ h
1:
subs r12, r12, #2
- vld1.8 {q1}, [r2], r1
+ vld1.8 {q1}, [r2], r3
vext.8 d3, d2, d3, #1
vmull.u8 q2, d2, d1
vmlal.u8 q2, d3, d0
- vld1.8 {q3}, [r2], r1
+ vld1.8 {q3}, [r2], r3
vext.8 d7, d6, d7, #1
vmull.u8 q8, d6, d1
vmlal.u8 q8, d7, d0
@@ -1714,22 +1718,23 @@ function ff_put_vp8_bilin8_h_neon, export=1
vst1.8 {d16}, [r0,:64], r1
bgt 1b
- bx lr
+ pop {pc}
endfunc
function ff_put_vp8_bilin8_v_neon, export=1
- ldr r3, [sp, #8] @ my
- rsb r12, r3, #8
- vdup.8 d0, r3
+ push {lr}
+ ldr lr, [sp, #12] @ my
+ rsb r12, lr, #8
+ vdup.8 d0, lr
vdup.8 d1, r12
- ldr r12, [sp] @ h
- vld1.8 {d2}, [r2], r1
+ ldr r12, [sp, #4] @ h
+ vld1.8 {d2}, [r2], r3
1:
subs r12, r12, #2
- vld1.8 {d3}, [r2], r1
+ vld1.8 {d3}, [r2], r3
vmull.u8 q2, d2, d1
vmlal.u8 q2, d3, d0
- vld1.8 {d2}, [r2], r1
+ vld1.8 {d2}, [r2], r3
vmull.u8 q3, d3, d1
vmlal.u8 q3, d2, d0
vrshrn.u16 d4, q2, #3
@@ -1738,32 +1743,33 @@ function ff_put_vp8_bilin8_v_neon, export=1
vst1.8 {d6}, [r0,:64], r1
bgt 1b
- bx lr
+ pop {pc}
endfunc
function ff_put_vp8_bilin8_hv_neon, export=1
- ldr r3, [sp, #4] @ mx
- rsb r12, r3, #8
- vdup.8 d0, r3
+ push {lr}
+ ldr lr, [sp, #8] @ mx
+ rsb r12, lr, #8
+ vdup.8 d0, lr
vdup.8 d1, r12
- ldr r3, [sp, #8] @ my
- rsb r12, r3, #8
- vdup.8 d2, r3
+ ldr lr, [sp, #12] @ my
+ rsb r12, lr, #8
+ vdup.8 d2, lr
vdup.8 d3, r12
- ldr r12, [sp] @ h
+ ldr r12, [sp, #4] @ h
- vld1.8 {q2}, [r2], r1
+ vld1.8 {q2}, [r2], r3
vext.8 d5, d4, d5, #1
vmull.u8 q9, d4, d1
vmlal.u8 q9, d5, d0
vrshrn.u16 d22, q9, #3
1:
subs r12, r12, #2
- vld1.8 {q3}, [r2], r1
+ vld1.8 {q3}, [r2], r3
vext.8 d7, d6, d7, #1
vmull.u8 q8, d6, d1
vmlal.u8 q8, d7, d0
- vld1.8 {q2}, [r2], r1
+ vld1.8 {q2}, [r2], r3
vext.8 d5, d4, d5, #1
vmull.u8 q9, d4, d1
vmlal.u8 q9, d5, d0
@@ -1779,20 +1785,21 @@ function ff_put_vp8_bilin8_hv_neon, export=1
vst1.8 {d23}, [r0,:64], r1
bgt 1b
- bx lr
+ pop {pc}
endfunc
function ff_put_vp8_bilin4_h_neon, export=1
- ldr r3, [sp, #4] @ mx
- rsb r12, r3, #8
- vdup.8 d0, r3
+ push {lr}
+ ldr lr, [sp, #8] @ mx
+ rsb r12, lr, #8
+ vdup.8 d0, lr
vdup.8 d1, r12
- ldr r12, [sp] @ h
+ ldr r12, [sp, #4] @ h
1:
subs r12, r12, #2
- vld1.8 {d2}, [r2], r1
+ vld1.8 {d2}, [r2], r3
vext.8 d3, d2, d3, #1
- vld1.8 {d6}, [r2], r1
+ vld1.8 {d6}, [r2], r3
vext.8 d7, d6, d7, #1
vtrn.32 q1, q3
vmull.u8 q2, d2, d1
@@ -1802,20 +1809,21 @@ function ff_put_vp8_bilin4_h_neon, export=1
vst1.32 {d4[1]}, [r0,:32], r1
bgt 1b
- bx lr
+ pop {pc}
endfunc
function ff_put_vp8_bilin4_v_neon, export=1
- ldr r3, [sp, #8] @ my
- rsb r12, r3, #8
- vdup.8 d0, r3
+ push {lr}
+ ldr lr, [sp, #12] @ my
+ rsb r12, lr, #8
+ vdup.8 d0, lr
vdup.8 d1, r12
- ldr r12, [sp] @ h
- vld1.32 {d2[]}, [r2], r1
+ ldr r12, [sp, #4] @ h
+ vld1.32 {d2[]}, [r2], r3
1:
vld1.32 {d3[]}, [r2]
- vld1.32 {d2[1]}, [r2], r1
- vld1.32 {d3[1]}, [r2], r1
+ vld1.32 {d2[1]}, [r2], r3
+ vld1.32 {d3[1]}, [r2], r3
vmull.u8 q2, d2, d1
vmlal.u8 q2, d3, d0
vtrn.32 d3, d2
@@ -1825,30 +1833,31 @@ function ff_put_vp8_bilin4_v_neon, export=1
subs r12, r12, #2
bgt 1b
- bx lr
+ pop {pc}
endfunc
function ff_put_vp8_bilin4_hv_neon, export=1
- ldr r3, [sp, #4] @ mx
- rsb r12, r3, #8
- vdup.8 d0, r3
+ push {lr}
+ ldr lr, [sp, #8] @ mx
+ rsb r12, lr, #8
+ vdup.8 d0, lr
vdup.8 d1, r12
- ldr r3, [sp, #8] @ my
- rsb r12, r3, #8
- vdup.8 d2, r3
+ ldr lr, [sp, #12] @ my
+ rsb r12, lr, #8
+ vdup.8 d2, lr
vdup.8 d3, r12
- ldr r12, [sp] @ h
+ ldr r12, [sp, #4] @ h
- vld1.8 {d4}, [r2], r1
+ vld1.8 {d4}, [r2], r3
vext.8 d5, d4, d4, #1
vmull.u8 q9, d4, d1
vmlal.u8 q9, d5, d0
vrshrn.u16 d22, q9, #3
1:
subs r12, r12, #2
- vld1.8 {d6}, [r2], r1
+ vld1.8 {d6}, [r2], r3
vext.8 d7, d6, d6, #1
- vld1.8 {d4}, [r2], r1
+ vld1.8 {d4}, [r2], r3
vext.8 d5, d4, d4, #1
vtrn.32 q3, q2
vmull.u8 q8, d6, d1
@@ -1863,5 +1872,5 @@ function ff_put_vp8_bilin4_hv_neon, export=1
vst1.32 {d20[1]}, [r0,:32], r1
bgt 1b
- bx lr
+ pop {pc}
endfunc