diff options
Diffstat (limited to 'ffmpeg/libavcodec/x86/dsputilenc_mmx.c')
| -rw-r--r-- | ffmpeg/libavcodec/x86/dsputilenc_mmx.c | 115 |
1 files changed, 58 insertions, 57 deletions
diff --git a/ffmpeg/libavcodec/x86/dsputilenc_mmx.c b/ffmpeg/libavcodec/x86/dsputilenc_mmx.c index a3f268e..5de8ade 100644 --- a/ffmpeg/libavcodec/x86/dsputilenc_mmx.c +++ b/ffmpeg/libavcodec/x86/dsputilenc_mmx.c @@ -3,6 +3,8 @@ * Copyright (c) 2000, 2001 Fabrice Bellard * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> * + * MMX optimization by Nick Kurshev <nickols_k@mail.ru> + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -18,8 +20,6 @@ * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - * - * MMX optimization by Nick Kurshev <nickols_k@mail.ru> */ #include "libavutil/attributes.h" @@ -30,7 +30,7 @@ #include "libavcodec/dsputil.h" #include "libavcodec/mpegvideo.h" #include "libavcodec/mathops.h" -#include "dsputil_mmx.h" +#include "dsputil_x86.h" void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size); void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size); @@ -946,11 +946,13 @@ hadamard_func(ssse3) av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx) { - int mm_flags = av_get_cpu_flags(); - int bit_depth = avctx->bits_per_raw_sample; + int cpu_flags = av_get_cpu_flags(); + const int dct_algo = avctx->dct_algo; #if HAVE_YASM - if (EXTERNAL_MMX(mm_flags)) { + int bit_depth = avctx->bits_per_raw_sample; + + if (EXTERNAL_MMX(cpu_flags)) { if (bit_depth <= 8) c->get_pixels = ff_get_pixels_mmx; c->diff_pixels = ff_diff_pixels_mmx; @@ -958,25 +960,16 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx) c->pix_norm1 = ff_pix_norm1_mmx; } - if (EXTERNAL_SSE2(mm_flags)) + if (EXTERNAL_SSE2(cpu_flags)) if (bit_depth <= 8) c->get_pixels = ff_get_pixels_sse2; #endif /* HAVE_YASM */ #if HAVE_INLINE_ASM - if (mm_flags & AV_CPU_FLAG_MMX) { - const int dct_algo = avctx->dct_algo; + if (INLINE_MMX(cpu_flags)) { if (avctx->bits_per_raw_sample <= 8 && - (dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX)) { - if(mm_flags & AV_CPU_FLAG_SSE2){ - c->fdct = ff_fdct_sse2; - } else if (mm_flags & AV_CPU_FLAG_MMXEXT) { - c->fdct = ff_fdct_mmxext; - }else{ - c->fdct = ff_fdct_mmx; - } - } - + (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) + c->fdct = ff_fdct_mmx; c->diff_bytes= diff_bytes_mmx; c->sum_abs_dctelem= sum_abs_dctelem_mmx; @@ -997,63 +990,71 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx) c->add_8x8basis= add_8x8basis_mmx; c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx; + } - if (mm_flags & AV_CPU_FLAG_MMXEXT) { - c->sum_abs_dctelem = sum_abs_dctelem_mmxext; - c->vsad[4] = vsad_intra16_mmxext; + if (INLINE_AMD3DNOW(cpu_flags)) { + if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { + c->try_8x8basis = try_8x8basis_3dnow; + } + c->add_8x8basis = add_8x8basis_3dnow; + } - if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ - c->vsad[0] = vsad16_mmxext; - } + if (INLINE_MMXEXT(cpu_flags)) { + if (avctx->bits_per_raw_sample <= 8 && + (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) + c->fdct = ff_fdct_mmxext; - c->sub_hfyu_median_prediction = sub_hfyu_median_prediction_mmxext; - } + c->sum_abs_dctelem = sum_abs_dctelem_mmxext; + c->vsad[4] = vsad_intra16_mmxext; - if(mm_flags & AV_CPU_FLAG_SSE2){ - c->sum_abs_dctelem= sum_abs_dctelem_sse2; + if (!(avctx->flags & CODEC_FLAG_BITEXACT)){ + c->vsad[0] = vsad16_mmxext; } -#if HAVE_SSSE3_INLINE - if(mm_flags & AV_CPU_FLAG_SSSE3){ - if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ - c->try_8x8basis= try_8x8basis_ssse3; - } - c->add_8x8basis= add_8x8basis_ssse3; - c->sum_abs_dctelem= sum_abs_dctelem_ssse3; - } -#endif + c->sub_hfyu_median_prediction = sub_hfyu_median_prediction_mmxext; + } + + if (INLINE_SSE2(cpu_flags)) { + if (avctx->bits_per_raw_sample <= 8 && + (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) + c->fdct = ff_fdct_sse2; + + c->sum_abs_dctelem= sum_abs_dctelem_sse2; + } - if(mm_flags & AV_CPU_FLAG_3DNOW){ - if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ - c->try_8x8basis= try_8x8basis_3dnow; - } - c->add_8x8basis= add_8x8basis_3dnow; +#if HAVE_SSSE3_INLINE + if (INLINE_SSSE3(cpu_flags)) { + if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { + c->try_8x8basis = try_8x8basis_ssse3; } + c->add_8x8basis = add_8x8basis_ssse3; + c->sum_abs_dctelem = sum_abs_dctelem_ssse3; } +#endif #endif /* HAVE_INLINE_ASM */ - if (EXTERNAL_MMX(mm_flags)) { + if (EXTERNAL_MMX(cpu_flags)) { c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx; c->hadamard8_diff[1] = ff_hadamard8_diff_mmx; + } - if (EXTERNAL_MMXEXT(mm_flags)) { - c->hadamard8_diff[0] = ff_hadamard8_diff16_mmxext; - c->hadamard8_diff[1] = ff_hadamard8_diff_mmxext; - } + if (EXTERNAL_MMXEXT(cpu_flags)) { + c->hadamard8_diff[0] = ff_hadamard8_diff16_mmxext; + c->hadamard8_diff[1] = ff_hadamard8_diff_mmxext; + } - if (EXTERNAL_SSE2(mm_flags)) { - c->sse[0] = ff_sse16_sse2; + if (EXTERNAL_SSE2(cpu_flags)) { + c->sse[0] = ff_sse16_sse2; #if HAVE_ALIGNED_STACK - c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2; - c->hadamard8_diff[1] = ff_hadamard8_diff_sse2; + c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2; + c->hadamard8_diff[1] = ff_hadamard8_diff_sse2; #endif - } + } - if (EXTERNAL_SSSE3(mm_flags) && HAVE_ALIGNED_STACK) { - c->hadamard8_diff[0] = ff_hadamard8_diff16_ssse3; - c->hadamard8_diff[1] = ff_hadamard8_diff_ssse3; - } + if (EXTERNAL_SSSE3(cpu_flags) && HAVE_ALIGNED_STACK) { + c->hadamard8_diff[0] = ff_hadamard8_diff16_ssse3; + c->hadamard8_diff[1] = ff_hadamard8_diff_ssse3; } ff_dsputil_init_pix_mmx(c, avctx); |
