diff options
Diffstat (limited to 'ffmpeg/libavcodec/arm')
41 files changed, 610 insertions, 578 deletions
diff --git a/ffmpeg/libavcodec/arm/Makefile b/ffmpeg/libavcodec/arm/Makefile index 011404c..277abd9 100644 --- a/ffmpeg/libavcodec/arm/Makefile +++ b/ffmpeg/libavcodec/arm/Makefile @@ -1,116 +1,98 @@ ARCH_HEADERS = mathops.h +OBJS += arm/fmtconvert_init_arm.o + +OBJS-$(CONFIG_AAC_DECODER) += arm/aacpsdsp_init_arm.o \ + arm/sbrdsp_init_arm.o OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o \ arm/ac3dsp_arm.o - -OBJS-$(CONFIG_AAC_DECODER) += arm/sbrdsp_init_arm.o \ - arm/aacpsdsp_init_arm.o - -OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_init_arm.o \ - -ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o - +OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_init_arm.o +OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_arm.o \ + arm/dsputil_arm.o \ + arm/jrevdct_arm.o \ + arm/simple_idct_arm.o +OBJS-$(CONFIG_FFT) += arm/fft_init_arm.o \ + arm/fft_fixed_init_arm.o OBJS-$(CONFIG_FLAC_DECODER) += arm/flacdsp_init_arm.o \ - arm/flacdsp_arm.o \ - + arm/flacdsp_arm.o +OBJS-$(CONFIG_H264CHROMA) += arm/h264chroma_init_arm.o +OBJS-$(CONFIG_H264DSP) += arm/h264dsp_init_arm.o +OBJS-$(CONFIG_H264PRED) += arm/h264pred_init_arm.o +OBJS-$(CONFIG_H264QPEL) += arm/h264qpel_init_arm.o +OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_arm.o \ + arm/hpeldsp_arm.o OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o -ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o - OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o +OBJS-$(CONFIG_VC1_DECODER) += arm/vc1dsp_init_arm.o OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_init_arm.o OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o -OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_init_arm.o -OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_init_arm.o +OBJS-$(CONFIG_VP6_DECODER) += arm/vp6dsp_init_arm.o OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_init_arm.o -ARMV6-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8_armv6.o \ - arm/vp8dsp_init_armv6.o \ - arm/vp8dsp_armv6.o - -OBJS-$(CONFIG_H264CHROMA) += arm/h264chroma_init_arm.o -OBJS-$(CONFIG_H264DSP) += arm/h264dsp_init_arm.o -OBJS-$(CONFIG_H264PRED) += arm/h264pred_init_arm.o -OBJS-$(CONFIG_H264QPEL) += arm/h264qpel_init_arm.o - -OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_arm.o \ - arm/hpeldsp_init_arm.o - OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_init_arm.o OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_init_arm.o \ - arm/rv40dsp_init_arm.o \ - + arm/rv40dsp_init_arm.o OBJS-$(CONFIG_VIDEODSP) += arm/videodsp_init_arm.o \ -OBJS += arm/dsputil_init_arm.o \ - arm/dsputil_arm.o \ - arm/fft_init_arm.o \ - arm/fft_fixed_init_arm.o \ - arm/fmtconvert_init_arm.o \ - arm/jrevdct_arm.o \ - arm/simple_idct_arm.o \ - +ARMV5TE-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_armv5te.o \ + arm/simple_idct_armv5te.o ARMV5TE-OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_armv5te.o \ - arm/mpegvideo_armv5te_s.o \ - + arm/mpegvideo_armv5te_s.o ARMV5TE-OBJS-$(CONFIG_VIDEODSP) += arm/videodsp_init_armv5te.o \ - arm/videodsp_armv5te.o \ - -ARMV5TE-OBJS += arm/dsputil_init_armv5te.o \ - arm/simple_idct_armv5te.o \ + arm/videodsp_armv5te.o -ARMV6-OBJS += arm/dsputil_init_armv6.o \ +ARMV6-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_armv6.o \ arm/dsputil_armv6.o \ arm/simple_idct_armv6.o \ -ARMV6-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_armv6.o \ - arm/hpeldsp_init_armv6.o - -VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o +ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o +ARMV6-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_armv6.o +ARMV6-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_armv6.o \ + arm/hpeldsp_armv6.o +ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o +ARMV6-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8_armv6.o \ + arm/vp8dsp_init_armv6.o \ + arm/vp8dsp_armv6.o -NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \ - arm/fft_fixed_neon.o \ +VFP-OBJS += arm/fmtconvert_vfp.o -NEON-OBJS-$(CONFIG_MDCT) += arm/mdct_neon.o \ - arm/mdct_fixed_neon.o \ +VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_vfp.o \ + arm/synth_filter_vfp.o +VFP-OBJS-$(CONFIG_FFT) += arm/fft_vfp.o +VFP-OBJS-$(CONFIG_MDCT) += arm/mdct_vfp.o +VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp_armv6.o -NEON-OBJS-$(CONFIG_RDFT) += arm/rdft_neon.o \ +NEON-OBJS += arm/fmtconvert_neon.o +NEON-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_neon.o +NEON-OBJS-$(CONFIG_AAC_DECODER) += arm/aacpsdsp_neon.o \ + arm/sbrdsp_neon.o +NEON-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_neon.o \ + arm/synth_filter_neon.o +NEON-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_neon.o \ + arm/dsputil_neon.o \ + arm/int_neon.o \ + arm/simple_idct_neon.o +NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \ + arm/fft_fixed_neon.o NEON-OBJS-$(CONFIG_H264CHROMA) += arm/h264cmc_neon.o NEON-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_neon.o \ - arm/h264idct_neon.o \ - -NEON-OBJS-$(CONFIG_H264PRED) += arm/h264pred_neon.o \ - + arm/h264idct_neon.o +NEON-OBJS-$(CONFIG_H264PRED) += arm/h264pred_neon.o NEON-OBJS-$(CONFIG_H264QPEL) += arm/h264qpel_neon.o \ - -NEON-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_neon.o \ - arm/hpeldsp_init_neon.o - -NEON-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_neon.o - -NEON-OBJS-$(CONFIG_AAC_DECODER) += arm/sbrdsp_neon.o \ - arm/aacpsdsp_neon.o - -NEON-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_neon.o \ - arm/synth_filter_neon.o \ - + arm/hpeldsp_neon.o +NEON-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_neon.o \ + arm/hpeldsp_neon.o +NEON-OBJS-$(CONFIG_MDCT) += arm/mdct_neon.o \ + arm/mdct_fixed_neon.o NEON-OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_neon.o +NEON-OBJS-$(CONFIG_RDFT) += arm/rdft_neon.o NEON-OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_neon.o NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_neon.o \ - arm/rv40dsp_neon.o \ - + arm/rv40dsp_neon.o +NEON-OBJS-$(CONFIG_VC1_DECODER) += arm/vc1dsp_init_neon.o \ + arm/vc1dsp_neon.o NEON-OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_neon.o - NEON-OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_neon.o - -NEON-OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_neon.o \ - -NEON-OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_neon.o \ - +NEON-OBJS-$(CONFIG_VP6_DECODER) += arm/vp6dsp_neon.o NEON-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_init_neon.o \ arm/vp8dsp_neon.o - -NEON-OBJS += arm/dsputil_init_neon.o \ - arm/dsputil_neon.o \ - arm/fmtconvert_neon.o \ - arm/int_neon.o \ - arm/simple_idct_neon.o \ diff --git a/ffmpeg/libavcodec/arm/aacpsdsp_init_arm.c b/ffmpeg/libavcodec/arm/aacpsdsp_init_arm.c index 6326376..e04787c 100644 --- a/ffmpeg/libavcodec/arm/aacpsdsp_init_arm.c +++ b/ffmpeg/libavcodec/arm/aacpsdsp_init_arm.c @@ -1,20 +1,20 @@ /* * Copyright (c) 2012 Mans Rullgard * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/ffmpeg/libavcodec/arm/aacpsdsp_neon.S b/ffmpeg/libavcodec/arm/aacpsdsp_neon.S index fb00900..a93bbfe 100644 --- a/ffmpeg/libavcodec/arm/aacpsdsp_neon.S +++ b/ffmpeg/libavcodec/arm/aacpsdsp_neon.S @@ -1,20 +1,20 @@ /* * Copyright (c) 2012 Mans Rullgard * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/ffmpeg/libavcodec/arm/ac3dsp_arm.S b/ffmpeg/libavcodec/arm/ac3dsp_arm.S index ed8eb37..1aea190 100644 --- a/ffmpeg/libavcodec/arm/ac3dsp_arm.S +++ b/ffmpeg/libavcodec/arm/ac3dsp_arm.S @@ -1,20 +1,20 @@ /* * Copyright (c) 2011 Mans Rullgard <mans@mansr.com> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/ffmpeg/libavcodec/arm/ac3dsp_armv6.S b/ffmpeg/libavcodec/arm/ac3dsp_armv6.S index 2028d0b..1d2563d 100644 --- a/ffmpeg/libavcodec/arm/ac3dsp_armv6.S +++ b/ffmpeg/libavcodec/arm/ac3dsp_armv6.S @@ -1,20 +1,20 @@ /* * Copyright (c) 2011 Mans Rullgard <mans@mansr.com> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/ffmpeg/libavcodec/arm/ac3dsp_init_arm.c b/ffmpeg/libavcodec/arm/ac3dsp_init_arm.c index ffe0747..a3c32ff 100644 --- a/ffmpeg/libavcodec/arm/ac3dsp_init_arm.c +++ b/ffmpeg/libavcodec/arm/ac3dsp_init_arm.c @@ -31,6 +31,8 @@ void ff_ac3_lshift_int16_neon(int16_t *src, unsigned len, unsigned shift); void ff_ac3_rshift_int32_neon(int32_t *src, unsigned len, unsigned shift); void ff_float_to_fixed24_neon(int32_t *dst, const float *src, unsigned int len); void ff_ac3_extract_exponents_neon(uint8_t *exp, int32_t *coef, int nb_coefs); +void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src, + const int16_t *window, unsigned n); void ff_ac3_sum_square_butterfly_int32_neon(int64_t sum[4], const int32_t *coef0, const int32_t *coef1, @@ -64,6 +66,7 @@ av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact) c->ac3_rshift_int32 = ff_ac3_rshift_int32_neon; c->float_to_fixed24 = ff_float_to_fixed24_neon; c->extract_exponents = ff_ac3_extract_exponents_neon; + c->apply_window_int16 = ff_apply_window_int16_neon; c->sum_square_butterfly_int32 = ff_ac3_sum_square_butterfly_int32_neon; c->sum_square_butterfly_float = ff_ac3_sum_square_butterfly_float_neon; } diff --git a/ffmpeg/libavcodec/arm/ac3dsp_neon.S b/ffmpeg/libavcodec/arm/ac3dsp_neon.S index 42f35e3..89d0ae8 100644 --- a/ffmpeg/libavcodec/arm/ac3dsp_neon.S +++ b/ffmpeg/libavcodec/arm/ac3dsp_neon.S @@ -1,20 +1,20 @@ /* * Copyright (c) 2011 Mans Rullgard <mans@mansr.com> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -109,6 +109,29 @@ function ff_ac3_extract_exponents_neon, export=1 bx lr endfunc +function ff_apply_window_int16_neon, export=1 + push {r4,lr} + add r4, r1, r3, lsl #1 + add lr, r0, r3, lsl #1 + sub r4, r4, #16 + sub lr, lr, #16 + mov r12, #-16 +1: + vld1.16 {q0}, [r1,:128]! + vld1.16 {q2}, [r2,:128]! + vld1.16 {q1}, [r4,:128], r12 + vrev64.16 q3, q2 + vqrdmulh.s16 q0, q0, q2 + vqrdmulh.s16 d2, d2, d7 + vqrdmulh.s16 d3, d3, d6 + vst1.16 {q0}, [r0,:128]! + vst1.16 {q1}, [lr,:128], r12 + subs r3, r3, #16 + bgt 1b + + pop {r4,pc} +endfunc + function ff_ac3_sum_square_butterfly_int32_neon, export=1 vmov.i64 q0, #0 vmov.i64 q1, #0 diff --git a/ffmpeg/libavcodec/arm/dca.h b/ffmpeg/libavcodec/arm/dca.h index 2cfd18a..35971a8 100644 --- a/ffmpeg/libavcodec/arm/dca.h +++ b/ffmpeg/libavcodec/arm/dca.h @@ -1,20 +1,20 @@ /* * Copyright (c) 2011 Mans Rullgard <mans@mansr.com> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -30,50 +30,48 @@ #define decode_blockcodes decode_blockcodes static inline int decode_blockcodes(int code1, int code2, int levels, - int *values) + int32_t *values) { - int v0, v1, v2, v3, v4, v5; + int32_t v0, v1, v2, v3, v4, v5; - __asm__ ("smmul %8, %14, %18 \n" - "smmul %11, %15, %18 \n" - "smlabb %14, %8, %17, %14 \n" - "smlabb %15, %11, %17, %15 \n" - "smmul %9, %8, %18 \n" - "smmul %12, %11, %18 \n" - "sub %14, %14, %16, lsr #1 \n" - "sub %15, %15, %16, lsr #1 \n" - "smlabb %8, %9, %17, %8 \n" - "smlabb %11, %12, %17, %11 \n" - "smmul %10, %9, %18 \n" - "smmul %13, %12, %18 \n" - "str %14, %0 \n" - "str %15, %4 \n" - "sub %8, %8, %16, lsr #1 \n" - "sub %11, %11, %16, lsr #1 \n" - "smlabb %9, %10, %17, %9 \n" - "smlabb %12, %13, %17, %12 \n" - "smmul %14, %10, %18 \n" - "smmul %15, %13, %18 \n" - "str %8, %1 \n" - "str %11, %5 \n" - "sub %9, %9, %16, lsr #1 \n" - "sub %12, %12, %16, lsr #1 \n" - "smlabb %10, %14, %17, %10 \n" - "smlabb %13, %15, %17, %13 \n" - "str %9, %2 \n" - "str %12, %6 \n" - "sub %10, %10, %16, lsr #1 \n" - "sub %13, %13, %16, lsr #1 \n" - "str %10, %3 \n" - "str %13, %7 \n" - : "=m"(values[0]), "=m"(values[1]), - "=m"(values[2]), "=m"(values[3]), - "=m"(values[4]), "=m"(values[5]), - "=m"(values[6]), "=m"(values[7]), - "=&r"(v0), "=&r"(v1), "=&r"(v2), + __asm__ ("smmul %0, %6, %10 \n" + "smmul %3, %7, %10 \n" + "smlabb %6, %0, %9, %6 \n" + "smlabb %7, %3, %9, %7 \n" + "smmul %1, %0, %10 \n" + "smmul %4, %3, %10 \n" + "sub %6, %6, %8, lsr #1 \n" + "sub %7, %7, %8, lsr #1 \n" + "smlabb %0, %1, %9, %0 \n" + "smlabb %3, %4, %9, %3 \n" + "smmul %2, %1, %10 \n" + "smmul %5, %4, %10 \n" + "str %6, [%11, #0] \n" + "str %7, [%11, #16] \n" + "sub %0, %0, %8, lsr #1 \n" + "sub %3, %3, %8, lsr #1 \n" + "smlabb %1, %2, %9, %1 \n" + "smlabb %4, %5, %9, %4 \n" + "smmul %6, %2, %10 \n" + "smmul %7, %5, %10 \n" + "str %0, [%11, #4] \n" + "str %3, [%11, #20] \n" + "sub %1, %1, %8, lsr #1 \n" + "sub %4, %4, %8, lsr #1 \n" + "smlabb %2, %6, %9, %2 \n" + "smlabb %5, %7, %9, %5 \n" + "str %1, [%11, #8] \n" + "str %4, [%11, #24] \n" + "sub %2, %2, %8, lsr #1 \n" + "sub %5, %5, %8, lsr #1 \n" + "str %2, [%11, #12] \n" + "str %5, [%11, #28] \n" + : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "=&r"(v4), "=&r"(v5), "+&r"(code1), "+&r"(code2) - : "r"(levels - 1), "r"(-levels), "r"(ff_inverse[levels])); + : "r"(levels - 1), "r"(-levels), + "r"(ff_inverse[levels]), "r"(values) + : "memory"); return code1 | code2; } diff --git a/ffmpeg/libavcodec/arm/dcadsp_init_arm.c b/ffmpeg/libavcodec/arm/dcadsp_init_arm.c index 56568e0..8893f48 100644 --- a/ffmpeg/libavcodec/arm/dcadsp_init_arm.c +++ b/ffmpeg/libavcodec/arm/dcadsp_init_arm.c @@ -24,13 +24,47 @@ #include "libavutil/attributes.h" #include "libavcodec/dcadsp.h" +void ff_dca_lfe_fir_vfp(float *out, const float *in, const float *coefs, + int decifactor, float scale); +void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act, + SynthFilterContext *synth, FFTContext *imdct, + float synth_buf_ptr[512], + int *synth_buf_offset, float synth_buf2[32], + const float window[512], float *samples_out, + float raXin[32], float scale); void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs, int decifactor, float scale); +void ff_synth_filter_float_vfp(FFTContext *imdct, + float *synth_buf_ptr, int *synth_buf_offset, + float synth_buf2[32], const float window[512], + float out[32], const float in[32], + float scale); + +void ff_synth_filter_float_neon(FFTContext *imdct, + float *synth_buf_ptr, int *synth_buf_offset, + float synth_buf2[32], const float window[512], + float out[32], const float in[32], + float scale); + av_cold void ff_dcadsp_init_arm(DCADSPContext *s) { int cpu_flags = av_get_cpu_flags(); + if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) { + s->lfe_fir = ff_dca_lfe_fir_vfp; + s->qmf_32_subbands = ff_dca_qmf_32_subbands_vfp; + } if (have_neon(cpu_flags)) s->lfe_fir = ff_dca_lfe_fir_neon; } + +av_cold void ff_synth_filter_init_arm(SynthFilterContext *s) +{ + int cpu_flags = av_get_cpu_flags(); + + if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) + s->synth_filter_float = ff_synth_filter_float_vfp; + if (have_neon(cpu_flags)) + s->synth_filter_float = ff_synth_filter_float_neon; +} diff --git a/ffmpeg/libavcodec/arm/dsputil_init_neon.c b/ffmpeg/libavcodec/arm/dsputil_init_neon.c index 6d19af7..c1f250a 100644 --- a/ffmpeg/libavcodec/arm/dsputil_init_neon.c +++ b/ffmpeg/libavcodec/arm/dsputil_init_neon.c @@ -45,9 +45,6 @@ int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int le int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2, const int16_t *v3, int len, int mul); -void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src, - const int16_t *window, unsigned n); - av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) { const int high_bit_depth = avctx->bits_per_raw_sample > 8; @@ -76,6 +73,4 @@ av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) c->scalarproduct_int16 = ff_scalarproduct_int16_neon; c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_neon; - - c->apply_window_int16 = ff_apply_window_int16_neon; } diff --git a/ffmpeg/libavcodec/arm/dsputil_neon.S b/ffmpeg/libavcodec/arm/dsputil_neon.S index 307e122..6c8231e 100644 --- a/ffmpeg/libavcodec/arm/dsputil_neon.S +++ b/ffmpeg/libavcodec/arm/dsputil_neon.S @@ -169,29 +169,6 @@ NOVFP ldr r2, [sp] bx lr endfunc -function ff_apply_window_int16_neon, export=1 - push {r4,lr} - add r4, r1, r3, lsl #1 - add lr, r0, r3, lsl #1 - sub r4, r4, #16 - sub lr, lr, #16 - mov r12, #-16 -1: - vld1.16 {q0}, [r1,:128]! - vld1.16 {q2}, [r2,:128]! - vld1.16 {q1}, [r4,:128], r12 - vrev64.16 q3, q2 - vqrdmulh.s16 q0, q0, q2 - vqrdmulh.s16 d2, d2, d7 - vqrdmulh.s16 d3, d3, d6 - vst1.16 {q0}, [r0,:128]! - vst1.16 {q1}, [lr,:128], r12 - subs r3, r3, #16 - bgt 1b - - pop {r4,pc} -endfunc - function ff_vector_clip_int32_neon, export=1 vdup.32 q0, r2 vdup.32 q1, r3 diff --git a/ffmpeg/libavcodec/arm/fft_fixed_neon.S b/ffmpeg/libavcodec/arm/fft_fixed_neon.S index fa33eac..d4a38a2 100644 --- a/ffmpeg/libavcodec/arm/fft_fixed_neon.S +++ b/ffmpeg/libavcodec/arm/fft_fixed_neon.S @@ -1,20 +1,20 @@ /* * Copyright (c) 2011 Mans Rullgard <mans@mansr.com> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/ffmpeg/libavcodec/arm/fft_init_arm.c b/ffmpeg/libavcodec/arm/fft_init_arm.c index 8c98abc..7e49b9c 100644 --- a/ffmpeg/libavcodec/arm/fft_init_arm.c +++ b/ffmpeg/libavcodec/arm/fft_init_arm.c @@ -26,22 +26,25 @@ void ff_fft_permute_neon(FFTContext *s, FFTComplex *z); void ff_fft_calc_neon(FFTContext *s, FFTComplex *z); +void ff_imdct_half_vfp(FFTContext *s, FFTSample *output, const FFTSample *input); + void ff_imdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_half_neon(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_mdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_rdft_calc_neon(struct RDFTContext *s, FFTSample *z); -void ff_synth_filter_float_neon(FFTContext *imdct, - float *synth_buf_ptr, int *synth_buf_offset, - float synth_buf2[32], const float window[512], - float out[32], const float in[32], - float scale); - av_cold void ff_fft_init_arm(FFTContext *s) { int cpu_flags = av_get_cpu_flags(); + if (have_vfp(cpu_flags)) { +#if CONFIG_MDCT + if (!have_vfpv3(cpu_flags)) + s->imdct_half = ff_imdct_half_vfp; +#endif + } + if (have_neon(cpu_flags)) { #if CONFIG_FFT s->fft_permute = ff_fft_permute_neon; @@ -65,13 +68,3 @@ av_cold void ff_rdft_init_arm(RDFTContext *s) s->rdft_calc = ff_rdft_calc_neon; } #endif - -#if CONFIG_DCA_DECODER -av_cold void ff_synth_filter_init_arm(SynthFilterContext *s) -{ - int cpu_flags = av_get_cpu_flags(); - - if (have_neon(cpu_flags)) - s->synth_filter_float = ff_synth_filter_float_neon; -} -#endif diff --git a/ffmpeg/libavcodec/arm/fmtconvert_init_arm.c b/ffmpeg/libavcodec/arm/fmtconvert_init_arm.c index 1d99c97..37319ed 100644 --- a/ffmpeg/libavcodec/arm/fmtconvert_init_arm.c +++ b/ffmpeg/libavcodec/arm/fmtconvert_init_arm.c @@ -25,9 +25,15 @@ #include "libavcodec/avcodec.h" #include "libavcodec/fmtconvert.h" -void ff_int32_to_float_fmul_scalar_neon(float *dst, const int *src, +void ff_int32_to_float_fmul_scalar_neon(float *dst, const int32_t *src, float mul, int len); +void ff_int32_to_float_fmul_scalar_vfp(float *dst, const int32_t *src, + float mul, int len); +void ff_int32_to_float_fmul_array8_vfp(FmtConvertContext *c, float *dst, + const int32_t *src, const float *mul, + int len); + void ff_float_to_int16_neon(int16_t *dst, const float *src, long len); void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int); @@ -37,8 +43,15 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx { int cpu_flags = av_get_cpu_flags(); - if (have_vfp(cpu_flags) && have_armv6(cpu_flags)) { - c->float_to_int16 = ff_float_to_int16_vfp; + if (have_vfp(cpu_flags)) { + if (!have_vfpv3(cpu_flags)) { + c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_vfp; + c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_vfp; + } + + if (have_armv6(cpu_flags)) { + c->float_to_int16 = ff_float_to_int16_vfp; + } } if (have_neon(cpu_flags)) { diff --git a/ffmpeg/libavcodec/arm/fmtconvert_vfp.S b/ffmpeg/libavcodec/arm/fmtconvert_vfp.S index 7b012bc..b14af45 100644 --- a/ffmpeg/libavcodec/arm/fmtconvert_vfp.S +++ b/ffmpeg/libavcodec/arm/fmtconvert_vfp.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net> + * Copyright (c) 2013 RISC OS Open Ltd <bavison@riscosopen.org> * * This file is part of FFmpeg. * @@ -22,57 +22,200 @@ #include "libavutil/arm/asm.S" /** - * ARM VFP optimized float to int16 conversion. - * Assume that len is a positive number and is multiple of 8, destination - * buffer is at least 4 bytes aligned (8 bytes alignment is better for - * performance), little-endian byte sex. + * ARM VFP optimised int32 to float conversion. + * Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned + * (16 bytes alignment is best for BCM2835), little-endian. */ -@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len) -function ff_float_to_int16_vfp, export=1 - push {r4-r8,lr} - vpush {d8-d11} - vldmia r1!, {s16-s23} - vcvt.s32.f32 s0, s16 - vcvt.s32.f32 s1, s17 - vcvt.s32.f32 s2, s18 - vcvt.s32.f32 s3, s19 - vcvt.s32.f32 s4, s20 - vcvt.s32.f32 s5, s21 - vcvt.s32.f32 s6, s22 - vcvt.s32.f32 s7, s23 +@ void ff_int32_to_float_fmul_array8_vfp(FmtConvertContext *c, float *dst, const int32_t *src, const float *mul, int len) +function ff_int32_to_float_fmul_array8_vfp, export=1 + push {lr} + ldr a1, [sp, #4] + subs lr, a1, #3*8 + bcc 50f @ too short to pipeline + @ Now need to find (len / 8) % 3. The approximation + @ x / 24 = (x * 0xAB) >> 12 + @ is good for x < 4096, which is true for both AC3 and DCA. + mov a1, #0xAB + ldr ip, =0x03070000 @ RunFast mode, short vectors of length 8, stride 1 + mul a1, lr, a1 + vpush {s16-s31} + mov a1, a1, lsr #12 + add a1, a1, a1, lsl #1 + rsb a1, a1, lr, lsr #3 + cmp a1, #1 + fmrx a1, FPSCR + fmxr FPSCR, ip + beq 11f + blo 10f + @ Array is (2 + multiple of 3) x 8 floats long + @ drop through... + vldmia a3!, {s16-s23} + vldmia a4!, {s2,s3} + vldmia a3!, {s24-s31} + vcvt.f32.s32 s16, s16 + vcvt.f32.s32 s17, s17 + vcvt.f32.s32 s18, s18 + vcvt.f32.s32 s19, s19 + vcvt.f32.s32 s20, s20 + vcvt.f32.s32 s21, s21 + vcvt.f32.s32 s22, s22 + vcvt.f32.s32 s23, s23 + vmul.f32 s16, s16, s2 + @ drop through... +3: + vldmia a3!, {s8-s15} + vldmia a4!, {s1} + vcvt.f32.s32 s24, s24 + vcvt.f32.s32 s25, s25 + vcvt.f32.s32 s26, s26 + vcvt.f32.s32 s27, s27 + vcvt.f32.s32 s28, s28 + vcvt.f32.s32 s29, s29 + vcvt.f32.s32 s30, s30 + vcvt.f32.s32 s31, s31 + vmul.f32 s24, s24, s3 + vstmia a2!, {s16-s19} + vstmia a2!, {s20-s23} +2: + vldmia a3!, {s16-s23} + vldmia a4!, {s2} + vcvt.f32.s32 s8, s8 + vcvt.f32.s32 s9, s9 + vcvt.f32.s32 s10, s10 + vcvt.f32.s32 s11, s11 + vcvt.f32.s32 s12, s12 + vcvt.f32.s32 s13, s13 + vcvt.f32.s32 s14, s14 + vcvt.f32.s32 s15, s15 + vmul.f32 s8, s8, s1 + vstmia a2!, {s24-s27} + vstmia a2!, {s28-s31} 1: - subs r2, r2, #8 - vmov r3, r4, s0, s1 - vmov r5, r6, s2, s3 - vmov r7, r8, s4, s5 - vmov ip, lr, s6, s7 - it gt - vldmiagt r1!, {s16-s23} - ssat r4, #16, r4 - ssat r3, #16, r3 - ssat r6, #16, r6 - ssat r5, #16, r5 - pkhbt r3, r3, r4, lsl #16 - pkhbt r4, r5, r6, lsl #16 - itttt gt - vcvtgt.s32.f32 s0, s16 - vcvtgt.s32.f32 s1, s17 - vcvtgt.s32.f32 s2, s18 - vcvtgt.s32.f32 s3, s19 - itttt gt - vcvtgt.s32.f32 s4, s20 - vcvtgt.s32.f32 s5, s21 - vcvtgt.s32.f32 s6, s22 - vcvtgt.s32.f32 s7, s23 - ssat r8, #16, r8 - ssat r7, #16, r7 - ssat lr, #16, lr - ssat ip, #16, ip - pkhbt r5, r7, r8, lsl #16 - pkhbt r6, ip, lr, lsl #16 - stmia r0!, {r3-r6} - bgt 1b + vldmia a3!, {s24-s31} + vldmia a4!, {s3} + vcvt.f32.s32 s16, s16 + vcvt.f32.s32 s17, s17 + vcvt.f32.s32 s18, s18 + vcvt.f32.s32 s19, s19 + vcvt.f32.s32 s20, s20 + vcvt.f32.s32 s21, s21 + vcvt.f32.s32 s22, s22 + vcvt.f32.s32 s23, s23 + vmul.f32 s16, s16, s2 + vstmia a2!, {s8-s11} + vstmia a2!, {s12-s15} - vpop {d8-d11} - pop {r4-r8,pc} + subs lr, lr, #8*3 + bpl 3b + + vcvt.f32.s32 s24, s24 + vcvt.f32.s32 s25, s25 + vcvt.f32.s32 s26, s26 + vcvt.f32.s32 s27, s27 + vcvt.f32.s32 s28, s28 + vcvt.f32.s32 s29, s29 + vcvt.f32.s32 s30, s30 + vcvt.f32.s32 s31, s31 + vmul.f32 s24, s24, s3 + vstmia a2!, {s16-s19} + vstmia a2!, {s20-s23} + vstmia a2!, {s24-s27} + vstmia a2!, {s28-s31} + + fmxr FPSCR, a1 + vpop {s16-s31} + pop {pc} + +10: @ Array is (multiple of 3) x 8 floats long + vldmia a3!, {s8-s15} + vldmia a4!, {s1,s2} + vldmia a3!, {s16-s23} + vcvt.f32.s32 s8, s8 + vcvt.f32.s32 s9, s9 + vcvt.f32.s32 s10, s10 + vcvt.f32.s32 s11, s11 + vcvt.f32.s32 s12, s12 + vcvt.f32.s32 s13, s13 + vcvt.f32.s32 s14, s14 + vcvt.f32.s32 s15, s15 + vmul.f32 s8, s8, s1 + b 1b + +11: @ Array is (1 + multiple of 3) x 8 floats long + vldmia a3!, {s24-s31} + vldmia a4!, {s3} + vldmia a3!, {s8-s15} + vldmia a4!, {s1} + vcvt.f32.s32 s24, s24 + vcvt.f32.s32 s25, s25 + vcvt.f32.s32 s26, s26 + vcvt.f32.s32 s27, s27 + vcvt.f32.s32 s28, s28 + vcvt.f32.s32 s29, s29 + vcvt.f32.s32 s30, s30 + vcvt.f32.s32 s31, s31 + vmul.f32 s24, s24, s3 + b 2b + +50: + ldr lr, =0x03070000 @ RunFast mode, short vectors of length 8, stride 1 + fmrx ip, FPSCR + fmxr FPSCR, lr +51: + vldmia a3!, {s8-s15} + vldmia a4!, {s0} + vcvt.f32.s32 s8, s8 + vcvt.f32.s32 s9, s9 + vcvt.f32.s32 s10, s10 + vcvt.f32.s32 s11, s11 + vcvt.f32.s32 s12, s12 + vcvt.f32.s32 s13, s13 + vcvt.f32.s32 s14, s14 + vcvt.f32.s32 s15, s15 + vmul.f32 s8, s8, s0 + subs a1, a1, #8 + vstmia a2!, {s8-s11} + vstmia a2!, {s12-s15} + bne 51b + + fmxr FPSCR, ip + pop {pc} +endfunc + +/** + * ARM VFP optimised int32 to float conversion. + * Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned + * (16 bytes alignment is best for BCM2835), little-endian. + * TODO: could be further optimised by unrolling and interleaving, as above + */ +@ void ff_int32_to_float_fmul_scalar_vfp(float *dst, const int32_t *src, float mul, int len) +function ff_int32_to_float_fmul_scalar_vfp, export=1 +VFP tmp .req a4 +VFP len .req a3 +NOVFP tmp .req a3 +NOVFP len .req a4 +NOVFP vmov s0, a3 + ldr tmp, =0x03070000 @ RunFast mode, short vectors of length 8, stride 1 + fmrx ip, FPSCR + fmxr FPSCR, tmp +1: + vldmia a2!, {s8-s15} + vcvt.f32.s32 s8, s8 + vcvt.f32.s32 s9, s9 + vcvt.f32.s32 s10, s10 + vcvt.f32.s32 s11, s11 + vcvt.f32.s32 s12, s12 + vcvt.f32.s32 s13, s13 + vcvt.f32.s32 s14, s14 + vcvt.f32.s32 s15, s15 + vmul.f32 s8, s8, s0 + subs len, len, #8 + vstmia a1!, {s8-s11} + vstmia a1!, {s12-s15} + bne 1b + + fmxr FPSCR, ip + bx lr endfunc + .unreq tmp + .unreq len diff --git a/ffmpeg/libavcodec/arm/h264cmc_neon.S b/ffmpeg/libavcodec/arm/h264cmc_neon.S index 3427e36..0bcae11 100644 --- a/ffmpeg/libavcodec/arm/h264cmc_neon.S +++ b/ffmpeg/libavcodec/arm/h264cmc_neon.S @@ -1,20 +1,20 @@ /* * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -39,6 +39,9 @@ function ff_\type\()_\codec\()_chroma_mc8_neon, export=1 add r6, r6, r7, lsl #1 vld1.16 {d22[],d23[]}, [r6,:16] .endif + .ifc \codec,vc1 + vmov.u16 q11, #28 + .endif A muls r7, r4, r5 T mul r7, r4, r5 @@ -183,6 +186,9 @@ function ff_\type\()_\codec\()_chroma_mc4_neon, export=1 add r6, r6, r7, lsl #1 vld1.16 {d22[],d23[]}, [r6,:16] .endif + .ifc \codec,vc1 + vmov.u16 q11, #28 + .endif A muls r7, r4, r5 T mul r7, r4, r5 @@ -376,14 +382,12 @@ function ff_\type\()_h264_chroma_mc2_neon, export=1 endfunc .endm -#if CONFIG_H264_DECODER h264_chroma_mc8 put h264_chroma_mc8 avg h264_chroma_mc4 put h264_chroma_mc4 avg h264_chroma_mc2 put h264_chroma_mc2 avg -#endif #if CONFIG_RV40_DECODER const rv40bias @@ -398,3 +402,10 @@ endconst h264_chroma_mc4 put, rv40 h264_chroma_mc4 avg, rv40 #endif + +#if CONFIG_VC1_DECODER + h264_chroma_mc8 put, vc1 + h264_chroma_mc8 avg, vc1 + h264_chroma_mc4 put, vc1 + h264_chroma_mc4 avg, vc1 +#endif diff --git a/ffmpeg/libavcodec/arm/h264dsp_init_arm.c b/ffmpeg/libavcodec/arm/h264dsp_init_arm.c index 785b604..2cafbaf 100644 --- a/ffmpeg/libavcodec/arm/h264dsp_init_arm.c +++ b/ffmpeg/libavcodec/arm/h264dsp_init_arm.c @@ -24,6 +24,8 @@ #include "libavutil/arm/cpu.h" #include "libavcodec/h264dsp.h" +int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size); + void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, @@ -68,8 +70,8 @@ void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[6*8]); -static av_cold void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth, - const int chroma_format_idc) +static av_cold void h264dsp_init_neon(H264DSPContext *c, const int bit_depth, + const int chroma_format_idc) { #if HAVE_NEON if (bit_depth == 8) { @@ -106,6 +108,8 @@ av_cold void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, { int cpu_flags = av_get_cpu_flags(); + if (have_armv6(cpu_flags)) + c->h264_find_start_code_candidate = ff_h264_find_start_code_candidate_armv6; if (have_neon(cpu_flags)) - ff_h264dsp_init_neon(c, bit_depth, chroma_format_idc); + h264dsp_init_neon(c, bit_depth, chroma_format_idc); } diff --git a/ffmpeg/libavcodec/arm/h264idct_neon.S b/ffmpeg/libavcodec/arm/h264idct_neon.S index fa5b90c..2edeca2 100644 --- a/ffmpeg/libavcodec/arm/h264idct_neon.S +++ b/ffmpeg/libavcodec/arm/h264idct_neon.S @@ -187,8 +187,8 @@ endfunc vshr.s16 q2, q10, #1 vadd.i16 q0, q8, q12 vld1.16 {q14-q15},[r1,:128] - vst1.16 {q7}, [r1,:128]! - vst1.16 {q7}, [r1,:128]! + vst1.16 {q3}, [r1,:128]! + vst1.16 {q3}, [r1,:128]! vsub.i16 q1, q8, q12 vshr.s16 q3, q14, #1 vsub.i16 q2, q2, q14 @@ -267,16 +267,16 @@ endfunc .endm function ff_h264_idct8_add_neon, export=1 - vmov.i16 q7, #0 + vmov.i16 q3, #0 vld1.16 {q8-q9}, [r1,:128] - vst1.16 {q7}, [r1,:128]! - vst1.16 {q7}, [r1,:128]! + vst1.16 {q3}, [r1,:128]! + vst1.16 {q3}, [r1,:128]! vld1.16 {q10-q11},[r1,:128] - vst1.16 {q7}, [r1,:128]! - vst1.16 {q7}, [r1,:128]! + vst1.16 {q3}, [r1,:128]! + vst1.16 {q3}, [r1,:128]! vld1.16 {q12-q13},[r1,:128] - vst1.16 {q7}, [r1,:128]! - vst1.16 {q7}, [r1,:128]! + vst1.16 {q3}, [r1,:128]! + vst1.16 {q3}, [r1,:128]! idct8x8_cols 0 idct8x8_cols 1 diff --git a/ffmpeg/libavcodec/arm/h264pred_init_arm.c b/ffmpeg/libavcodec/arm/h264pred_init_arm.c index 5ec39ce..1562f0b 100644 --- a/ffmpeg/libavcodec/arm/h264pred_init_arm.c +++ b/ffmpeg/libavcodec/arm/h264pred_init_arm.c @@ -45,9 +45,9 @@ void ff_pred8x8_0lt_dc_neon(uint8_t *src, ptrdiff_t stride); void ff_pred8x8_l00_dc_neon(uint8_t *src, ptrdiff_t stride); void ff_pred8x8_0l0_dc_neon(uint8_t *src, ptrdiff_t stride); -static av_cold void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, - const int bit_depth, - const int chroma_format_idc) +static av_cold void h264_pred_init_neon(H264PredContext *h, int codec_id, + const int bit_depth, + const int chroma_format_idc) { #if HAVE_NEON const int high_depth = bit_depth > 8; @@ -88,5 +88,5 @@ av_cold void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, int cpu_flags = av_get_cpu_flags(); if (have_neon(cpu_flags)) - ff_h264_pred_init_neon(h, codec_id, bit_depth, chroma_format_idc); + h264_pred_init_neon(h, codec_id, bit_depth, chroma_format_idc); } diff --git a/ffmpeg/libavcodec/arm/hpeldsp_arm.h b/ffmpeg/libavcodec/arm/hpeldsp_arm.h index e79bc6f..3f18c62 100644 --- a/ffmpeg/libavcodec/arm/hpeldsp_arm.h +++ b/ffmpeg/libavcodec/arm/hpeldsp_arm.h @@ -23,7 +23,7 @@ #include "libavcodec/hpeldsp.h" -void ff_hpeldsp_init_armv6(HpelDSPContext* c, int flags); +void ff_hpeldsp_init_armv6(HpelDSPContext *c, int flags); void ff_hpeldsp_init_neon(HpelDSPContext *c, int flags); #endif /* AVCODEC_ARM_HPELDSP_H */ diff --git a/ffmpeg/libavcodec/arm/hpeldsp_init_arm.c b/ffmpeg/libavcodec/arm/hpeldsp_init_arm.c index bae93eb..2cc2b78 100644 --- a/ffmpeg/libavcodec/arm/hpeldsp_init_arm.c +++ b/ffmpeg/libavcodec/arm/hpeldsp_init_arm.c @@ -20,7 +20,9 @@ */ #include "libavutil/arm/cpu.h" +#include "libavutil/attributes.h" #include "libavcodec/bit_depth_template.c" // for CALL_2X_PIXELS +#include "libavcodec/rnd_avg.h" #include "hpeldsp_arm.h" void ff_put_pixels8_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); @@ -41,7 +43,7 @@ CALL_2X_PIXELS(ff_put_no_rnd_pixels16_x2_arm, ff_put_no_rnd_pixels8_x2_arm, 8) CALL_2X_PIXELS(ff_put_no_rnd_pixels16_y2_arm, ff_put_no_rnd_pixels8_y2_arm, 8) CALL_2X_PIXELS(ff_put_no_rnd_pixels16_xy2_arm, ff_put_no_rnd_pixels8_xy2_arm,8) -void ff_hpeldsp_init_arm(HpelDSPContext* c, int flags) +av_cold void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags) { int cpu_flags = av_get_cpu_flags(); @@ -63,6 +65,8 @@ void ff_hpeldsp_init_arm(HpelDSPContext* c, int flags) c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_arm; c->put_no_rnd_pixels_tab[1][3] = ff_put_no_rnd_pixels8_xy2_arm; - if (have_armv6(cpu_flags)) ff_hpeldsp_init_armv6(c, flags); - if (have_neon(cpu_flags)) ff_hpeldsp_init_neon(c, flags); + if (have_armv6(cpu_flags)) + ff_hpeldsp_init_armv6(c, flags); + if (have_neon(cpu_flags)) + ff_hpeldsp_init_neon(c, flags); } diff --git a/ffmpeg/libavcodec/arm/hpeldsp_init_armv6.c b/ffmpeg/libavcodec/arm/hpeldsp_init_armv6.c index da4caf8..967a8e0 100644 --- a/ffmpeg/libavcodec/arm/hpeldsp_init_armv6.c +++ b/ffmpeg/libavcodec/arm/hpeldsp_init_armv6.c @@ -18,6 +18,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include <stddef.h> #include <stdint.h> #include "libavutil/attributes.h" diff --git a/ffmpeg/libavcodec/arm/hpeldsp_init_neon.c b/ffmpeg/libavcodec/arm/hpeldsp_init_neon.c index d577735..d9feadd 100644 --- a/ffmpeg/libavcodec/arm/hpeldsp_init_neon.c +++ b/ffmpeg/libavcodec/arm/hpeldsp_init_neon.c @@ -19,8 +19,10 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include <stddef.h> #include <stdint.h> +#include "libavutil/attributes.h" #include "hpeldsp_arm.h" void ff_put_pixels16_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); @@ -50,7 +52,7 @@ void ff_avg_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); void ff_avg_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); void ff_avg_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); -void ff_hpeldsp_init_neon(HpelDSPContext *c, int flags) +av_cold void ff_hpeldsp_init_neon(HpelDSPContext *c, int flags) { c->put_pixels_tab[0][0] = ff_put_pixels16_neon; c->put_pixels_tab[0][1] = ff_put_pixels16_x2_neon; diff --git a/ffmpeg/libavcodec/arm/int_neon.S b/ffmpeg/libavcodec/arm/int_neon.S index 6b28a97..b3f5a69 100644 --- a/ffmpeg/libavcodec/arm/int_neon.S +++ b/ffmpeg/libavcodec/arm/int_neon.S @@ -1,6 +1,6 @@ /* * ARM NEON optimised integer operations - * Copyright (c) 2009 Kostya Shishkov + * Copyright (c) 2009 Konstantin Shishkov * * This file is part of FFmpeg. * @@ -41,10 +41,10 @@ function ff_scalarproduct_int16_neon, export=1 vpadd.s32 d16, d0, d1 vpadd.s32 d17, d2, d3 - vpadd.s32 d10, d4, d5 - vpadd.s32 d11, d6, d7 + vpadd.s32 d18, d4, d5 + vpadd.s32 d19, d6, d7 vpadd.s32 d0, d16, d17 - vpadd.s32 d1, d10, d11 + vpadd.s32 d1, d18, d19 vpadd.s32 d2, d0, d1 vpaddl.s32 d3, d2 vmov.32 r0, d3[0] @@ -81,10 +81,10 @@ function ff_scalarproduct_and_madd_int16_neon, export=1 vpadd.s32 d16, d0, d1 vpadd.s32 d17, d2, d3 - vpadd.s32 d10, d4, d5 - vpadd.s32 d11, d6, d7 + vpadd.s32 d18, d4, d5 + vpadd.s32 d19, d6, d7 vpadd.s32 d0, d16, d17 - vpadd.s32 d1, d10, d11 + vpadd.s32 d1, d18, d19 vpadd.s32 d2, d0, d1 vpaddl.s32 d3, d2 vmov.32 r0, d3[0] diff --git a/ffmpeg/libavcodec/arm/mdct_fixed_neon.S b/ffmpeg/libavcodec/arm/mdct_fixed_neon.S index c77be59..365c5e7 100644 --- a/ffmpeg/libavcodec/arm/mdct_fixed_neon.S +++ b/ffmpeg/libavcodec/arm/mdct_fixed_neon.S @@ -1,20 +1,20 @@ /* * Copyright (c) 2011 Mans Rullgard <mans@mansr.com> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/ffmpeg/libavcodec/arm/mpegaudiodsp_fixed_armv6.S b/ffmpeg/libavcodec/arm/mpegaudiodsp_fixed_armv6.S index 49bd0bc..977abb6 100644 --- a/ffmpeg/libavcodec/arm/mpegaudiodsp_fixed_armv6.S +++ b/ffmpeg/libavcodec/arm/mpegaudiodsp_fixed_armv6.S @@ -1,20 +1,20 @@ /* * Copyright (c) 2011 Mans Rullgard <mans@mansr.com> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/ffmpeg/libavcodec/arm/mpegaudiodsp_init_arm.c b/ffmpeg/libavcodec/arm/mpegaudiodsp_init_arm.c index e73aee6..98e0c8a 100644 --- a/ffmpeg/libavcodec/arm/mpegaudiodsp_init_arm.c +++ b/ffmpeg/libavcodec/arm/mpegaudiodsp_init_arm.c @@ -1,20 +1,20 @@ /* * Copyright (c) 2011 Mans Rullgard * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/ffmpeg/libavcodec/arm/neon.S b/ffmpeg/libavcodec/arm/neon.S index 716a607..787bc4b 100644 --- a/ffmpeg/libavcodec/arm/neon.S +++ b/ffmpeg/libavcodec/arm/neon.S @@ -1,20 +1,20 @@ /* * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/ffmpeg/libavcodec/arm/rv34dsp_neon.S b/ffmpeg/libavcodec/arm/rv34dsp_neon.S index a29123f..3d4a83d 100644 --- a/ffmpeg/libavcodec/arm/rv34dsp_neon.S +++ b/ffmpeg/libavcodec/arm/rv34dsp_neon.S @@ -1,20 +1,20 @@ /* * Copyright (c) 2011 Janne Grunau <janne-libav@jannau.net> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/ffmpeg/libavcodec/arm/rv40dsp_init_arm.c b/ffmpeg/libavcodec/arm/rv40dsp_init_arm.c index fec3702..3bf9ac7 100644 --- a/ffmpeg/libavcodec/arm/rv40dsp_init_arm.c +++ b/ffmpeg/libavcodec/arm/rv40dsp_init_arm.c @@ -70,7 +70,7 @@ void ff_rv40_v_weak_loop_filter_neon(uint8_t *src, ptrdiff_t stride, int filter_ int filter_q1, int alpha, int beta, int lim_p0q0, int lim_q1, int lim_p1); -static av_cold void ff_rv40dsp_init_neon(RV34DSPContext *c) +static av_cold void rv40dsp_init_neon(RV34DSPContext *c) { c->put_pixels_tab[0][ 1] = ff_put_rv40_qpel16_mc10_neon; c->put_pixels_tab[0][ 3] = ff_put_rv40_qpel16_mc30_neon; @@ -144,5 +144,5 @@ av_cold void ff_rv40dsp_init_arm(RV34DSPContext *c) int cpu_flags = av_get_cpu_flags(); if (have_neon(cpu_flags)) - ff_rv40dsp_init_neon(c); + rv40dsp_init_neon(c); } diff --git a/ffmpeg/libavcodec/arm/rv40dsp_neon.S b/ffmpeg/libavcodec/arm/rv40dsp_neon.S index 6bd45eb..099f88c 100644 --- a/ffmpeg/libavcodec/arm/rv40dsp_neon.S +++ b/ffmpeg/libavcodec/arm/rv40dsp_neon.S @@ -2,20 +2,20 @@ * Copyright (c) 2011 Janne Grunau <janne-libav@jannau.net> * Copyright (c) 2011 Mans Rullgard <mans@mansr.com> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/ffmpeg/libavcodec/arm/sbrdsp_init_arm.c b/ffmpeg/libavcodec/arm/sbrdsp_init_arm.c index 4da7967..4fb69f9 100644 --- a/ffmpeg/libavcodec/arm/sbrdsp_init_arm.c +++ b/ffmpeg/libavcodec/arm/sbrdsp_init_arm.c @@ -1,20 +1,20 @@ /* * Copyright (c) 2012 Mans Rullgard * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/ffmpeg/libavcodec/arm/sbrdsp_neon.S b/ffmpeg/libavcodec/arm/sbrdsp_neon.S index 610397f..e66abd6 100644 --- a/ffmpeg/libavcodec/arm/sbrdsp_neon.S +++ b/ffmpeg/libavcodec/arm/sbrdsp_neon.S @@ -1,20 +1,20 @@ /* * Copyright (c) 2012 Mans Rullgard * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/ffmpeg/libavcodec/arm/simple_idct_arm.S b/ffmpeg/libavcodec/arm/simple_idct_arm.S index dd1c815..50d20c9 100644 --- a/ffmpeg/libavcodec/arm/simple_idct_arm.S +++ b/ffmpeg/libavcodec/arm/simple_idct_arm.S @@ -83,7 +83,7 @@ __row_loop: orrs r5, r5, r7 @ R5=R4 | R3 | R2 | R7 beq __almost_empty_row -__b_evaluation: +@@ __b_evaluation: @@ at this point, R0=block (temp), R1(free), R2=ROWr32[1], R3=ROWr32[2], R4=ROWr32[3], @@ R5=(temp), R6=ROWr16[0], R7=ROWr16[1], R8-R11 free, @@ R12=__const_ptr_, R14=&block[n] @@ -159,7 +159,7 @@ __end_b_evaluation: @@ R5=b2, R6=ROWr16[0], R7=b3, R8 (free), R9 (free), R10 (free), R11 (free), @@ R12=__const_ptr_, R14=&block[n] -__a_evaluation: +@@ __a_evaluation: @@ a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); @@ a1 = a0 + W6 * row[2]; @@ a2 = a0 - W6 * row[2]; @@ -295,7 +295,7 @@ __end_row_loop: add r14, r0, #14 @ R14=&block[7], better start from the last col, and decrease the value until col=0, i.e. R14=block. __col_loop: -__b_evaluation2: +@@ __b_evaluation2: @@ at this point, R0=block (temp), R1-R11 (free) @@ R12=__const_ptr_, R14=&block[n] @@ proceed with b0-b3 first, followed by a0-a3 @@ -357,12 +357,12 @@ __b_evaluation2: it ne mlane r1, r10, r4, r1 @ R1-=W5*ROWr16[7x8]=b1 @@ R4 is free now -__end_b_evaluation2: +@@ __end_b_evaluation2: @@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free), @@ R5=b2, R6 (free), R7=b3, R8 (free), R9 (free), R10 (free), R11 (free), @@ R12=__const_ptr_, R14=&block[n] -__a_evaluation2: +@@ __a_evaluation2: @@ a0 = (W4 * col[8x0]) + (1 << (COL_SHIFT - 1)); @@ a1 = a0 + W6 * row[2]; @@ a2 = a0 - W6 * row[2]; @@ -414,7 +414,7 @@ __a_evaluation2: itt ne subne r2, r2, r10 @ R2-=W2*ROWr16[6] (a1) addne r3, r3, r10 @ R3+=W2*ROWr16[6] (a2) -__end_a_evaluation2: +@@ __end_a_evaluation2: @@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3, @@ R5=b2, R6=a0, R7=b3, R8 (free), R9 (free), R10 (free), R11 (free), @@ R12=__const_ptr_, R14=&block[n] @@ -452,7 +452,7 @@ __end_a_evaluation2: strh r8, [r14, #96] strh r9, [r14, #112] -__end_col_loop: +@@ __end_col_loop: @@ at this point, R0-R11 (free) @@ R12=__const_ptr_, R14=&block[n] ldr r0, [sp, #0] @ R0=block @@ -463,7 +463,7 @@ __end_col_loop: -__end_simple_idct_arm: +@@ __end_simple_idct_arm: @@ restore registers to previous status! add sp, sp, #8 @@ the local variables! ldmfd sp!, {r4-r11, r15} @@ update PC with LR content. diff --git a/ffmpeg/libavcodec/arm/vp56dsp_init_arm.c b/ffmpeg/libavcodec/arm/vp56dsp_init_arm.c deleted file mode 100644 index f53cbae..0000000 --- a/ffmpeg/libavcodec/arm/vp56dsp_init_arm.c +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2010 Mans Rullgard <mans@mansr.com> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <stdint.h> - -#include "libavutil/attributes.h" -#include "libavutil/arm/cpu.h" -#include "libavcodec/avcodec.h" -#include "libavcodec/vp56dsp.h" - -void ff_vp6_edge_filter_hor_neon(uint8_t *yuv, int stride, int t); -void ff_vp6_edge_filter_ver_neon(uint8_t *yuv, int stride, int t); - -av_cold void ff_vp56dsp_init_arm(VP56DSPContext *s, enum AVCodecID codec) -{ - int cpu_flags = av_get_cpu_flags(); - - if (codec != AV_CODEC_ID_VP5 && have_neon(cpu_flags)) { - s->edge_filter_hor = ff_vp6_edge_filter_hor_neon; - s->edge_filter_ver = ff_vp6_edge_filter_ver_neon; - } -} diff --git a/ffmpeg/libavcodec/arm/vp56dsp_neon.S b/ffmpeg/libavcodec/arm/vp56dsp_neon.S deleted file mode 100644 index 03dd28d..0000000 --- a/ffmpeg/libavcodec/arm/vp56dsp_neon.S +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2010 Mans Rullgard <mans@mansr.com> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/arm/asm.S" - -.macro vp6_edge_filter - vdup.16 q3, r2 @ t - vmov.i16 q13, #1 - vsubl.u8 q0, d20, d18 @ p[ 0] - p[-s] - vsubl.u8 q1, d16, d22 @ p[-2*s] - p[ s] - vsubl.u8 q14, d21, d19 - vsubl.u8 q15, d17, d23 - vadd.i16 q2, q0, q0 @ 2*(p[0]-p[-s]) - vadd.i16 d29, d28, d28 - vadd.i16 q0, q0, q1 @ p[0]-p[-s] + p[-2*s]-p[s] - vadd.i16 d28, d28, d30 - vadd.i16 q0, q0, q2 @ 3*(p[0]-p[-s]) + p[-2*s]-p[s] - vadd.i16 d28, d28, d29 - vrshr.s16 q0, q0, #3 @ v - vrshr.s16 d28, d28, #3 - vsub.i16 q8, q3, q13 @ t-1 - vabs.s16 q1, q0 @ V - vshr.s16 q2, q0, #15 @ s - vabs.s16 d30, d28 - vshr.s16 d29, d28, #15 - vsub.i16 q12, q1, q3 @ V-t - vsub.i16 d31, d30, d6 - vsub.i16 q12, q12, q13 @ V-t-1 - vsub.i16 d31, d31, d26 - vcge.u16 q12, q12, q8 @ V-t-1 >= t-1 - vcge.u16 d31, d31, d16 - vadd.i16 q13, q3, q3 @ 2*t - vadd.i16 d16, d6, d6 - vsub.i16 q13, q13, q1 @ 2*t - V - vsub.i16 d16, d16, d30 - vadd.i16 q13, q13, q2 @ += s - vadd.i16 d16, d16, d29 - veor q13, q13, q2 @ ^= s - veor d16, d16, d29 - vbif q0, q13, q12 - vbif d28, d16, d31 - vmovl.u8 q1, d20 - vmovl.u8 q15, d21 - vaddw.u8 q2, q0, d18 - vaddw.u8 q3, q14, d19 - vsub.i16 q1, q1, q0 - vsub.i16 d30, d30, d28 - vqmovun.s16 d18, q2 - vqmovun.s16 d19, q3 - vqmovun.s16 d20, q1 - vqmovun.s16 d21, q15 -.endm - -function ff_vp6_edge_filter_ver_neon, export=1 - sub r0, r0, r1, lsl #1 - vld1.8 {q8}, [r0], r1 @ p[-2*s] - vld1.8 {q9}, [r0], r1 @ p[-s] - vld1.8 {q10}, [r0], r1 @ p[0] - vld1.8 {q11}, [r0] @ p[s] - vp6_edge_filter - sub r0, r0, r1, lsl #1 - sub r1, r1, #8 - vst1.8 {d18}, [r0]! - vst1.32 {d19[0]}, [r0], r1 - vst1.8 {d20}, [r0]! - vst1.32 {d21[0]}, [r0] - bx lr -endfunc - -function ff_vp6_edge_filter_hor_neon, export=1 - sub r3, r0, #1 - sub r0, r0, #2 - vld1.32 {d16[0]}, [r0], r1 - vld1.32 {d18[0]}, [r0], r1 - vld1.32 {d20[0]}, [r0], r1 - vld1.32 {d22[0]}, [r0], r1 - vld1.32 {d16[1]}, [r0], r1 - vld1.32 {d18[1]}, [r0], r1 - vld1.32 {d20[1]}, [r0], r1 - vld1.32 {d22[1]}, [r0], r1 - vld1.32 {d17[0]}, [r0], r1 - vld1.32 {d19[0]}, [r0], r1 - vld1.32 {d21[0]}, [r0], r1 - vld1.32 {d23[0]}, [r0], r1 - vtrn.8 q8, q9 - vtrn.8 q10, q11 - vtrn.16 q8, q10 - vtrn.16 q9, q11 - vp6_edge_filter - vtrn.8 q9, q10 - vst1.16 {d18[0]}, [r3], r1 - vst1.16 {d20[0]}, [r3], r1 - vst1.16 {d18[1]}, [r3], r1 - vst1.16 {d20[1]}, [r3], r1 - vst1.16 {d18[2]}, [r3], r1 - vst1.16 {d20[2]}, [r3], r1 - vst1.16 {d18[3]}, [r3], r1 - vst1.16 {d20[3]}, [r3], r1 - vst1.16 {d19[0]}, [r3], r1 - vst1.16 {d21[0]}, [r3], r1 - vst1.16 {d19[1]}, [r3], r1 - vst1.16 {d21[1]}, [r3], r1 - bx lr -endfunc diff --git a/ffmpeg/libavcodec/arm/vp8dsp.h b/ffmpeg/libavcodec/arm/vp8dsp.h index ce00e4a..6041ef1 100644 --- a/ffmpeg/libavcodec/arm/vp8dsp.h +++ b/ffmpeg/libavcodec/arm/vp8dsp.h @@ -1,18 +1,18 @@ /* - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/ffmpeg/libavcodec/arm/vp8dsp_armv6.S b/ffmpeg/libavcodec/arm/vp8dsp_armv6.S index 5207758..a14b188 100644 --- a/ffmpeg/libavcodec/arm/vp8dsp_armv6.S +++ b/ffmpeg/libavcodec/arm/vp8dsp_armv6.S @@ -5,20 +5,20 @@ * Copyright (c) 2010 Rob Clark <rob@ti.com> * Copyright (c) 2011 Mans Rullgard <mans@mansr.com> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * This code was partially ported from libvpx, which uses this license: diff --git a/ffmpeg/libavcodec/arm/vp8dsp_init_armv6.c b/ffmpeg/libavcodec/arm/vp8dsp_init_armv6.c index e15e191..563268e 100644 --- a/ffmpeg/libavcodec/arm/vp8dsp_init_armv6.c +++ b/ffmpeg/libavcodec/arm/vp8dsp_init_armv6.c @@ -1,18 +1,18 @@ /* - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/ffmpeg/libavcodec/arm/vp8dsp_init_neon.c b/ffmpeg/libavcodec/arm/vp8dsp_init_neon.c index 0468181..ae045a6 100644 --- a/ffmpeg/libavcodec/arm/vp8dsp_init_neon.c +++ b/ffmpeg/libavcodec/arm/vp8dsp_init_neon.c @@ -1,18 +1,18 @@ /* - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/ffmpeg/libavcodec/arm/vp8dsp_neon.S b/ffmpeg/libavcodec/arm/vp8dsp_neon.S index 04e7c5c..436b340 100644 --- a/ffmpeg/libavcodec/arm/vp8dsp_neon.S +++ b/ffmpeg/libavcodec/arm/vp8dsp_neon.S @@ -1576,18 +1576,19 @@ endconst /* Bilinear MC */ function ff_put_vp8_bilin16_h_neon, export=1 - ldr r3, [sp, #4] @ mx - rsb r12, r3, #8 - vdup.8 d0, r3 + push {lr} + ldr lr, [sp, #8] @ mx + rsb r12, lr, #8 + vdup.8 d0, lr vdup.8 d1, r12 - ldr r12, [sp] @ h + ldr r12, [sp, #4] @ h 1: subs r12, r12, #2 - vld1.8 {d2-d4}, [r2], r1 + vld1.8 {d2-d4}, [r2], r3 vext.8 q2, q1, q2, #1 vmull.u8 q8, d2, d1 vmlal.u8 q8, d4, d0 - vld1.8 {d18-d20},[r2], r1 + vld1.8 {d18-d20},[r2], r3 vmull.u8 q3, d3, d1 vmlal.u8 q3, d5, d0 vext.8 q10, q9, q10, #1 @@ -1603,24 +1604,25 @@ function ff_put_vp8_bilin16_h_neon, export=1 vst1.8 {q3}, [r0,:128], r1 bgt 1b - bx lr + pop {pc} endfunc function ff_put_vp8_bilin16_v_neon, export=1 - ldr r3, [sp, #8] @ my - rsb r12, r3, #8 - vdup.8 d0, r3 + push {lr} + ldr lr, [sp, #12] @ my + rsb r12, lr, #8 + vdup.8 d0, lr vdup.8 d1, r12 - ldr r12, [sp] @ h - vld1.8 {q1}, [r2], r1 + ldr r12, [sp, #4] @ h + vld1.8 {q1}, [r2], r3 1: subs r12, r12, #2 - vld1.8 {q2}, [r2], r1 + vld1.8 {q2}, [r2], r3 vmull.u8 q3, d2, d1 vmlal.u8 q3, d4, d0 vmull.u8 q8, d3, d1 vmlal.u8 q8, d5, d0 - vld1.8 {q1}, [r2], r1 + vld1.8 {q1}, [r2], r3 vmull.u8 q9, d4, d1 vmlal.u8 q9, d2, d0 vmull.u8 q10, d5, d1 @@ -1633,21 +1635,22 @@ function ff_put_vp8_bilin16_v_neon, export=1 vst1.8 {q3}, [r0,:128], r1 bgt 1b - bx lr + pop {pc} endfunc function ff_put_vp8_bilin16_hv_neon, export=1 - ldr r3, [sp, #4] @ mx - rsb r12, r3, #8 - vdup.8 d0, r3 + push {lr} + ldr lr, [sp, #8] @ mx + rsb r12, lr, #8 + vdup.8 d0, lr vdup.8 d1, r12 - ldr r3, [sp, #8] @ my - rsb r12, r3, #8 - vdup.8 d2, r3 + ldr lr, [sp, #12] @ my + rsb r12, lr, #8 + vdup.8 d2, lr vdup.8 d3, r12 - ldr r12, [sp] @ h + ldr r12, [sp, #4] @ h - vld1.8 {d4-d6}, [r2], r1 + vld1.8 {d4-d6}, [r2], r3 vext.8 q3, q2, q3, #1 vmull.u8 q8, d4, d1 vmlal.u8 q8, d6, d0 @@ -1657,11 +1660,11 @@ function ff_put_vp8_bilin16_hv_neon, export=1 vrshrn.u16 d5, q9, #3 1: subs r12, r12, #2 - vld1.8 {d18-d20},[r2], r1 + vld1.8 {d18-d20},[r2], r3 vext.8 q10, q9, q10, #1 vmull.u8 q11, d18, d1 vmlal.u8 q11, d20, d0 - vld1.8 {d26-d28},[r2], r1 + vld1.8 {d26-d28},[r2], r3 vmull.u8 q12, d19, d1 vmlal.u8 q12, d21, d0 vext.8 q14, q13, q14, #1 @@ -1689,22 +1692,23 @@ function ff_put_vp8_bilin16_hv_neon, export=1 vst1.8 {q10}, [r0,:128], r1 bgt 1b - bx lr + pop {pc} endfunc function ff_put_vp8_bilin8_h_neon, export=1 - ldr r3, [sp, #4] @ mx - rsb r12, r3, #8 - vdup.8 d0, r3 + push {lr} + ldr lr, [sp, #8] @ mx + rsb r12, lr, #8 + vdup.8 d0, lr vdup.8 d1, r12 - ldr r12, [sp] @ h + ldr r12, [sp, #4] @ h 1: subs r12, r12, #2 - vld1.8 {q1}, [r2], r1 + vld1.8 {q1}, [r2], r3 vext.8 d3, d2, d3, #1 vmull.u8 q2, d2, d1 vmlal.u8 q2, d3, d0 - vld1.8 {q3}, [r2], r1 + vld1.8 {q3}, [r2], r3 vext.8 d7, d6, d7, #1 vmull.u8 q8, d6, d1 vmlal.u8 q8, d7, d0 @@ -1714,22 +1718,23 @@ function ff_put_vp8_bilin8_h_neon, export=1 vst1.8 {d16}, [r0,:64], r1 bgt 1b - bx lr + pop {pc} endfunc function ff_put_vp8_bilin8_v_neon, export=1 - ldr r3, [sp, #8] @ my - rsb r12, r3, #8 - vdup.8 d0, r3 + push {lr} + ldr lr, [sp, #12] @ my + rsb r12, lr, #8 + vdup.8 d0, lr vdup.8 d1, r12 - ldr r12, [sp] @ h - vld1.8 {d2}, [r2], r1 + ldr r12, [sp, #4] @ h + vld1.8 {d2}, [r2], r3 1: subs r12, r12, #2 - vld1.8 {d3}, [r2], r1 + vld1.8 {d3}, [r2], r3 vmull.u8 q2, d2, d1 vmlal.u8 q2, d3, d0 - vld1.8 {d2}, [r2], r1 + vld1.8 {d2}, [r2], r3 vmull.u8 q3, d3, d1 vmlal.u8 q3, d2, d0 vrshrn.u16 d4, q2, #3 @@ -1738,32 +1743,33 @@ function ff_put_vp8_bilin8_v_neon, export=1 vst1.8 {d6}, [r0,:64], r1 bgt 1b - bx lr + pop {pc} endfunc function ff_put_vp8_bilin8_hv_neon, export=1 - ldr r3, [sp, #4] @ mx - rsb r12, r3, #8 - vdup.8 d0, r3 + push {lr} + ldr lr, [sp, #8] @ mx + rsb r12, lr, #8 + vdup.8 d0, lr vdup.8 d1, r12 - ldr r3, [sp, #8] @ my - rsb r12, r3, #8 - vdup.8 d2, r3 + ldr lr, [sp, #12] @ my + rsb r12, lr, #8 + vdup.8 d2, lr vdup.8 d3, r12 - ldr r12, [sp] @ h + ldr r12, [sp, #4] @ h - vld1.8 {q2}, [r2], r1 + vld1.8 {q2}, [r2], r3 vext.8 d5, d4, d5, #1 vmull.u8 q9, d4, d1 vmlal.u8 q9, d5, d0 vrshrn.u16 d22, q9, #3 1: subs r12, r12, #2 - vld1.8 {q3}, [r2], r1 + vld1.8 {q3}, [r2], r3 vext.8 d7, d6, d7, #1 vmull.u8 q8, d6, d1 vmlal.u8 q8, d7, d0 - vld1.8 {q2}, [r2], r1 + vld1.8 {q2}, [r2], r3 vext.8 d5, d4, d5, #1 vmull.u8 q9, d4, d1 vmlal.u8 q9, d5, d0 @@ -1779,20 +1785,21 @@ function ff_put_vp8_bilin8_hv_neon, export=1 vst1.8 {d23}, [r0,:64], r1 bgt 1b - bx lr + pop {pc} endfunc function ff_put_vp8_bilin4_h_neon, export=1 - ldr r3, [sp, #4] @ mx - rsb r12, r3, #8 - vdup.8 d0, r3 + push {lr} + ldr lr, [sp, #8] @ mx + rsb r12, lr, #8 + vdup.8 d0, lr vdup.8 d1, r12 - ldr r12, [sp] @ h + ldr r12, [sp, #4] @ h 1: subs r12, r12, #2 - vld1.8 {d2}, [r2], r1 + vld1.8 {d2}, [r2], r3 vext.8 d3, d2, d3, #1 - vld1.8 {d6}, [r2], r1 + vld1.8 {d6}, [r2], r3 vext.8 d7, d6, d7, #1 vtrn.32 q1, q3 vmull.u8 q2, d2, d1 @@ -1802,20 +1809,21 @@ function ff_put_vp8_bilin4_h_neon, export=1 vst1.32 {d4[1]}, [r0,:32], r1 bgt 1b - bx lr + pop {pc} endfunc function ff_put_vp8_bilin4_v_neon, export=1 - ldr r3, [sp, #8] @ my - rsb r12, r3, #8 - vdup.8 d0, r3 + push {lr} + ldr lr, [sp, #12] @ my + rsb r12, lr, #8 + vdup.8 d0, lr vdup.8 d1, r12 - ldr r12, [sp] @ h - vld1.32 {d2[]}, [r2], r1 + ldr r12, [sp, #4] @ h + vld1.32 {d2[]}, [r2], r3 1: vld1.32 {d3[]}, [r2] - vld1.32 {d2[1]}, [r2], r1 - vld1.32 {d3[1]}, [r2], r1 + vld1.32 {d2[1]}, [r2], r3 + vld1.32 {d3[1]}, [r2], r3 vmull.u8 q2, d2, d1 vmlal.u8 q2, d3, d0 vtrn.32 d3, d2 @@ -1825,30 +1833,31 @@ function ff_put_vp8_bilin4_v_neon, export=1 subs r12, r12, #2 bgt 1b - bx lr + pop {pc} endfunc function ff_put_vp8_bilin4_hv_neon, export=1 - ldr r3, [sp, #4] @ mx - rsb r12, r3, #8 - vdup.8 d0, r3 + push {lr} + ldr lr, [sp, #8] @ mx + rsb r12, lr, #8 + vdup.8 d0, lr vdup.8 d1, r12 - ldr r3, [sp, #8] @ my - rsb r12, r3, #8 - vdup.8 d2, r3 + ldr lr, [sp, #12] @ my + rsb r12, lr, #8 + vdup.8 d2, lr vdup.8 d3, r12 - ldr r12, [sp] @ h + ldr r12, [sp, #4] @ h - vld1.8 {d4}, [r2], r1 + vld1.8 {d4}, [r2], r3 vext.8 d5, d4, d4, #1 vmull.u8 q9, d4, d1 vmlal.u8 q9, d5, d0 vrshrn.u16 d22, q9, #3 1: subs r12, r12, #2 - vld1.8 {d6}, [r2], r1 + vld1.8 {d6}, [r2], r3 vext.8 d7, d6, d6, #1 - vld1.8 {d4}, [r2], r1 + vld1.8 {d4}, [r2], r3 vext.8 d5, d4, d4, #1 vtrn.32 q3, q2 vmull.u8 q8, d6, d1 @@ -1863,5 +1872,5 @@ function ff_put_vp8_bilin4_hv_neon, export=1 vst1.32 {d20[1]}, [r0,:32], r1 bgt 1b - bx lr + pop {pc} endfunc |
