diff options
Diffstat (limited to 'ffmpeg/libavcodec/x86/vorbisdsp.asm')
| -rw-r--r-- | ffmpeg/libavcodec/x86/vorbisdsp.asm | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/ffmpeg/libavcodec/x86/vorbisdsp.asm b/ffmpeg/libavcodec/x86/vorbisdsp.asm new file mode 100644 index 0000000..b25d838 --- /dev/null +++ b/ffmpeg/libavcodec/x86/vorbisdsp.asm @@ -0,0 +1,83 @@ +;****************************************************************************** +;* Vorbis x86 optimizations +;* Copyright (C) 2006 Loren Merritt <lorenm@u.washington.edu> +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA + +pdw_80000000: times 4 dd 0x80000000 + +SECTION .text + +%if ARCH_X86_32 +INIT_MMX 3dnow +cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size + pxor m7, m7 + lea magq, [magq+block_sizeq*4] + lea angq, [angq+block_sizeq*4] + neg block_sizeq +.loop: + mova m0, [magq+block_sizeq*4] + mova m1, [angq+block_sizeq*4] + mova m2, m0 + mova m3, m1 + pfcmpge m2, m7 ; m <= 0.0 + pfcmpge m3, m7 ; a <= 0.0 + pslld m2, 31 ; keep only the sign bit + pxor m1, m2 + mova m4, m3 + pand m3, m1 + pandn m4, m1 + pfadd m3, m0 ; a = m + ((a < 0) & (a ^ sign(m))) + pfsub m0, m4 ; m = m + ((a > 0) & (a ^ sign(m))) + mova [angq+block_sizeq*4], m3 + mova [magq+block_sizeq*4], m0 + add block_sizeq, 2 + jl .loop + femms + RET +%endif + +INIT_XMM sse +cglobal vorbis_inverse_coupling, 3, 4, 6, mag, ang, block_size, cntr + mova m5, [pdw_80000000] + xor cntrq, cntrq +align 16 +.loop: + mova m0, [magq+cntrq*4] + mova m1, [angq+cntrq*4] + xorps m2, m2 + xorps m3, m3 + cmpleps m2, m0 ; m <= 0.0 + cmpleps m3, m1 ; a <= 0.0 + andps m2, m5 ; keep only the sign bit + xorps m1, m2 + mova m4, m3 + andps m3, m1 + andnps m4, m1 + addps m3, m0 ; a = m + ((a < 0) & (a ^ sign(m))) + subps m0, m4 ; m = m + ((a > 0) & (a ^ sign(m))) + mova [angq+cntrq*4], m3 + mova [magq+cntrq*4], m0 + add cntrq, 4 + cmp cntrq, block_sizeq + jl .loop + RET |
