diff options
| author | Tim Redfern <tim@eclectronics.org> | 2014-02-17 13:36:38 +0000 |
|---|---|---|
| committer | Tim Redfern <tim@eclectronics.org> | 2014-02-17 13:36:38 +0000 |
| commit | 22e28216336da876e1fd17f380ce42eaf1446769 (patch) | |
| tree | 444dad3dc7e2656992d29f34f7bce31970c122a5 /ffmpeg/libavcodec/x86/dwt_yasm.asm | |
| parent | ae5e8541f6e06e64c28719467cdf366ac57aff31 (diff) | |
chasing indexing error
Diffstat (limited to 'ffmpeg/libavcodec/x86/dwt_yasm.asm')
| -rw-r--r-- | ffmpeg/libavcodec/x86/dwt_yasm.asm | 306 |
1 files changed, 0 insertions, 306 deletions
diff --git a/ffmpeg/libavcodec/x86/dwt_yasm.asm b/ffmpeg/libavcodec/x86/dwt_yasm.asm deleted file mode 100644 index 5253abc..0000000 --- a/ffmpeg/libavcodec/x86/dwt_yasm.asm +++ /dev/null @@ -1,306 +0,0 @@ -;****************************************************************************** -;* MMX optimized discrete wavelet trasnform -;* Copyright (c) 2010 David Conrad -;* -;* This file is part of FFmpeg. -;* -;* FFmpeg is free software; you can redistribute it and/or -;* modify it under the terms of the GNU Lesser General Public -;* License as published by the Free Software Foundation; either -;* version 2.1 of the License, or (at your option) any later version. -;* -;* FFmpeg is distributed in the hope that it will be useful, -;* but WITHOUT ANY WARRANTY; without even the implied warranty of -;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -;* Lesser General Public License for more details. -;* -;* You should have received a copy of the GNU Lesser General Public -;* License along with FFmpeg; if not, write to the Free Software -;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -;****************************************************************************** - -%include "libavutil/x86/x86util.asm" - -SECTION_RODATA -pw_1: times 8 dw 1 -pw_2: times 8 dw 2 -pw_8: times 8 dw 8 -pw_16: times 8 dw 16 -pw_1991: times 4 dw 9,-1 - -section .text - -; %1 -= (%2 + %3 + 2)>>2 %4 is pw_2 -%macro COMPOSE_53iL0 4 - paddw %2, %3 - paddw %2, %4 - psraw %2, 2 - psubw %1, %2 -%endm - -; m1 = %1 + (-m0 + 9*m1 + 9*%2 -%3 + 8)>>4 -; if %4 is supplied, %1 is loaded unaligned from there -; m2: clobbered m3: pw_8 m4: pw_1991 -%macro COMPOSE_DD97iH0 3-4 - paddw m0, %3 - paddw m1, %2 - psubw m0, m3 - mova m2, m1 - punpcklwd m1, m0 - punpckhwd m2, m0 - pmaddwd m1, m4 - pmaddwd m2, m4 -%if %0 > 3 - movu %1, %4 -%endif - psrad m1, 4 - psrad m2, 4 - packssdw m1, m2 - paddw m1, %1 -%endm - -%macro COMPOSE_VERTICAL 1 -; void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, -; int width) -cglobal vertical_compose53iL0_%1, 4,4,1, b0, b1, b2, width - mova m2, [pw_2] -%if ARCH_X86_64 - mov widthd, widthd -%endif -.loop: - sub widthq, mmsize/2 - mova m1, [b0q+2*widthq] - mova m0, [b1q+2*widthq] - COMPOSE_53iL0 m0, m1, [b2q+2*widthq], m2 - mova [b1q+2*widthq], m0 - jg .loop - REP_RET - -; void vertical_compose_dirac53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, -; int width) -cglobal vertical_compose_dirac53iH0_%1, 4,4,1, b0, b1, b2, width - mova m1, [pw_1] -%if ARCH_X86_64 - mov widthd, widthd -%endif -.loop: - sub widthq, mmsize/2 - mova m0, [b0q+2*widthq] - paddw m0, [b2q+2*widthq] - paddw m0, m1 - psraw m0, 1 - paddw m0, [b1q+2*widthq] - mova [b1q+2*widthq], m0 - jg .loop - REP_RET - -; void vertical_compose_dd97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, -; IDWTELEM *b3, IDWTELEM *b4, int width) -cglobal vertical_compose_dd97iH0_%1, 6,6,5, b0, b1, b2, b3, b4, width - mova m3, [pw_8] - mova m4, [pw_1991] -%if ARCH_X86_64 - mov widthd, widthd -%endif -.loop: - sub widthq, mmsize/2 - mova m0, [b0q+2*widthq] - mova m1, [b1q+2*widthq] - COMPOSE_DD97iH0 [b2q+2*widthq], [b3q+2*widthq], [b4q+2*widthq] - mova [b2q+2*widthq], m1 - jg .loop - REP_RET - -; void vertical_compose_dd137iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, -; IDWTELEM *b3, IDWTELEM *b4, int width) -cglobal vertical_compose_dd137iL0_%1, 6,6,6, b0, b1, b2, b3, b4, width - mova m3, [pw_16] - mova m4, [pw_1991] -%if ARCH_X86_64 - mov widthd, widthd -%endif -.loop: - sub widthq, mmsize/2 - mova m0, [b0q+2*widthq] - mova m1, [b1q+2*widthq] - mova m5, [b2q+2*widthq] - paddw m0, [b4q+2*widthq] - paddw m1, [b3q+2*widthq] - psubw m0, m3 - mova m2, m1 - punpcklwd m1, m0 - punpckhwd m2, m0 - pmaddwd m1, m4 - pmaddwd m2, m4 - psrad m1, 5 - psrad m2, 5 - packssdw m1, m2 - psubw m5, m1 - mova [b2q+2*widthq], m5 - jg .loop - REP_RET - -; void vertical_compose_haar(IDWTELEM *b0, IDWTELEM *b1, int width) -cglobal vertical_compose_haar_%1, 3,4,3, b0, b1, width - mova m3, [pw_1] -%if ARCH_X86_64 - mov widthd, widthd -%endif -.loop: - sub widthq, mmsize/2 - mova m1, [b1q+2*widthq] - mova m0, [b0q+2*widthq] - mova m2, m1 - paddw m1, m3 - psraw m1, 1 - psubw m0, m1 - mova [b0q+2*widthq], m0 - paddw m2, m0 - mova [b1q+2*widthq], m2 - jg .loop - REP_RET -%endmacro - -; extend the left and right edges of the tmp array by %1 and %2 respectively -%macro EDGE_EXTENSION 3 - mov %3, [tmpq] -%assign %%i 1 -%rep %1 - mov [tmpq-2*%%i], %3 - %assign %%i %%i+1 -%endrep - mov %3, [tmpq+2*w2q-2] -%assign %%i 0 -%rep %2 - mov [tmpq+2*w2q+2*%%i], %3 - %assign %%i %%i+1 -%endrep -%endmacro - - -%macro HAAR_HORIZONTAL 2 -; void horizontal_compose_haari(IDWTELEM *b, IDWTELEM *tmp, int width) -cglobal horizontal_compose_haar%2i_%1, 3,6,4, b, tmp, w, x, w2, b_w2 - mov w2d, wd - xor xq, xq - shr w2d, 1 - lea b_w2q, [bq+wq] - mova m3, [pw_1] -.lowpass_loop: - movu m1, [b_w2q + 2*xq] - mova m0, [bq + 2*xq] - paddw m1, m3 - psraw m1, 1 - psubw m0, m1 - mova [tmpq + 2*xq], m0 - add xq, mmsize/2 - cmp xq, w2q - jl .lowpass_loop - - xor xq, xq - and w2q, ~(mmsize/2 - 1) - cmp w2q, mmsize/2 - jl .end - -.highpass_loop: - movu m1, [b_w2q + 2*xq] - mova m0, [tmpq + 2*xq] - paddw m1, m0 - - ; shift and interleave -%if %2 == 1 - paddw m0, m3 - paddw m1, m3 - psraw m0, 1 - psraw m1, 1 -%endif - mova m2, m0 - punpcklwd m0, m1 - punpckhwd m2, m1 - mova [bq+4*xq], m0 - mova [bq+4*xq+mmsize], m2 - - add xq, mmsize/2 - cmp xq, w2q - jl .highpass_loop -.end: - REP_RET -%endmacro - - -INIT_XMM -; void horizontal_compose_dd97i(IDWTELEM *b, IDWTELEM *tmp, int width) -cglobal horizontal_compose_dd97i_ssse3, 3,6,8, b, tmp, w, x, w2, b_w2 - mov w2d, wd - xor xd, xd - shr w2d, 1 - lea b_w2q, [bq+wq] - movu m4, [bq+wq] - mova m7, [pw_2] - pslldq m4, 14 -.lowpass_loop: - movu m1, [b_w2q + 2*xq] - mova m0, [bq + 2*xq] - mova m2, m1 - palignr m1, m4, 14 - mova m4, m2 - COMPOSE_53iL0 m0, m1, m2, m7 - mova [tmpq + 2*xq], m0 - add xd, mmsize/2 - cmp xd, w2d - jl .lowpass_loop - - EDGE_EXTENSION 1, 2, xw - ; leave the last up to 7 (sse) or 3 (mmx) values for C - xor xd, xd - and w2d, ~(mmsize/2 - 1) - cmp w2d, mmsize/2 - jl .end - - mova m7, [tmpq-mmsize] - mova m0, [tmpq] - mova m5, [pw_1] - mova m3, [pw_8] - mova m4, [pw_1991] -.highpass_loop: - mova m6, m0 - palignr m0, m7, 14 - mova m7, [tmpq + 2*xq + 16] - mova m1, m7 - mova m2, m7 - palignr m1, m6, 2 - palignr m2, m6, 4 - COMPOSE_DD97iH0 m0, m6, m2, [b_w2q + 2*xq] - mova m0, m7 - mova m7, m6 - - ; shift and interleave - paddw m6, m5 - paddw m1, m5 - psraw m6, 1 - psraw m1, 1 - mova m2, m6 - punpcklwd m6, m1 - punpckhwd m2, m1 - mova [bq+4*xq], m6 - mova [bq+4*xq+mmsize], m2 - - add xd, mmsize/2 - cmp xd, w2d - jl .highpass_loop -.end: - REP_RET - - -%if ARCH_X86_64 == 0 -INIT_MMX -COMPOSE_VERTICAL mmx -HAAR_HORIZONTAL mmx, 0 -HAAR_HORIZONTAL mmx, 1 -%endif - -;;INIT_XMM -INIT_XMM -COMPOSE_VERTICAL sse2 -HAAR_HORIZONTAL sse2, 0 -HAAR_HORIZONTAL sse2, 1 |
