mirror of
https://github.com/gnif/LookingGlass.git
synced 2024-11-22 05:27:20 +00:00
[common] tune windows memcpySSE asm implementation:wq
This commit is contained in:
parent
e9d77e6c52
commit
e8b1b8fbdf
@ -1,44 +1,27 @@
|
|||||||
.code
|
.code
|
||||||
|
|
||||||
memcpySSE proc
|
memcpySSE proc
|
||||||
; dst = rcx
|
; dst = rcx
|
||||||
; src = rdx
|
; src = rdx
|
||||||
; len = r8
|
; len = r8
|
||||||
|
|
||||||
test r8 , r8
|
test r8, r8
|
||||||
jne OK
|
jz @Exit
|
||||||
ret
|
cmp rcx, rdx
|
||||||
|
je @Exit
|
||||||
|
|
||||||
OK:
|
sub rsp, 8 + 2*16 + 4*8
|
||||||
; void * end = dst + (length & ~0x100);
|
movdqa oword ptr [rsp + 4*8 + 00 ], xmm6
|
||||||
|
movdqa oword ptr [rsp + 4*8 + 16 ], xmm7
|
||||||
|
|
||||||
|
; void * end = dst + (length & ~0x7F);
|
||||||
; end = r10
|
; end = r10
|
||||||
mov r9 , r8
|
mov r9 , r8
|
||||||
and r9 , -0100h
|
and r9 , -07Fh
|
||||||
|
jz @RemainingBlocks
|
||||||
mov r10, rcx
|
mov r10, rcx
|
||||||
add r10, r9
|
add r10, r9
|
||||||
|
|
||||||
; size_t rem = (length & 0xFF) >> 4);
|
@FullLoop:
|
||||||
; rem = r11
|
|
||||||
mov r11, r8
|
|
||||||
and r11, 0FFh
|
|
||||||
shr r11, 4
|
|
||||||
|
|
||||||
sub rsp, 8 + 10*16 + 4*8
|
|
||||||
movdqa oword ptr [rsp + 4*8 + 00 ], xmm6
|
|
||||||
movdqa oword ptr [rsp + 4*8 + 16 ], xmm7
|
|
||||||
movdqa oword ptr [rsp + 4*8 + 32 ], xmm8
|
|
||||||
movdqa oword ptr [rsp + 4*8 + 48 ], xmm9
|
|
||||||
movdqa oword ptr [rsp + 4*8 + 64 ], xmm10
|
|
||||||
movdqa oword ptr [rsp + 4*8 + 80 ], xmm11
|
|
||||||
movdqa oword ptr [rsp + 4*8 + 96 ], xmm12
|
|
||||||
movdqa oword ptr [rsp + 4*8 + 112], xmm13
|
|
||||||
movdqa oword ptr [rsp + 4*8 + 128], xmm14
|
|
||||||
movdqa oword ptr [rsp + 4*8 + 144], xmm15
|
|
||||||
|
|
||||||
cmp rcx, r10
|
|
||||||
je RemainingBlocks
|
|
||||||
|
|
||||||
FullLoop:
|
|
||||||
vmovaps xmm0 , xmmword ptr [rdx + 000h]
|
vmovaps xmm0 , xmmword ptr [rdx + 000h]
|
||||||
vmovaps xmm1 , xmmword ptr [rdx + 010h]
|
vmovaps xmm1 , xmmword ptr [rdx + 010h]
|
||||||
vmovaps xmm2 , xmmword ptr [rdx + 020h]
|
vmovaps xmm2 , xmmword ptr [rdx + 020h]
|
||||||
@ -47,14 +30,6 @@ memcpySSE proc
|
|||||||
vmovaps xmm5 , xmmword ptr [rdx + 050h]
|
vmovaps xmm5 , xmmword ptr [rdx + 050h]
|
||||||
vmovaps xmm6 , xmmword ptr [rdx + 060h]
|
vmovaps xmm6 , xmmword ptr [rdx + 060h]
|
||||||
vmovaps xmm7 , xmmword ptr [rdx + 070h]
|
vmovaps xmm7 , xmmword ptr [rdx + 070h]
|
||||||
vmovaps xmm8 , xmmword ptr [rdx + 080h]
|
|
||||||
vmovaps xmm9 , xmmword ptr [rdx + 090h]
|
|
||||||
vmovaps xmm10, xmmword ptr [rdx + 0A0h]
|
|
||||||
vmovaps xmm11, xmmword ptr [rdx + 0B0h]
|
|
||||||
vmovaps xmm12, xmmword ptr [rdx + 0C0h]
|
|
||||||
vmovaps xmm13, xmmword ptr [rdx + 0D0h]
|
|
||||||
vmovaps xmm14, xmmword ptr [rdx + 0E0h]
|
|
||||||
vmovaps xmm15, xmmword ptr [rdx + 0F0h]
|
|
||||||
vmovntdq xmmword ptr [rcx + 000h], xmm0
|
vmovntdq xmmword ptr [rcx + 000h], xmm0
|
||||||
vmovntdq xmmword ptr [rcx + 010h], xmm1
|
vmovntdq xmmword ptr [rcx + 010h], xmm1
|
||||||
vmovntdq xmmword ptr [rcx + 020h], xmm2
|
vmovntdq xmmword ptr [rcx + 020h], xmm2
|
||||||
@ -63,140 +38,191 @@ memcpySSE proc
|
|||||||
vmovntdq xmmword ptr [rcx + 050h], xmm5
|
vmovntdq xmmword ptr [rcx + 050h], xmm5
|
||||||
vmovntdq xmmword ptr [rcx + 060h], xmm6
|
vmovntdq xmmword ptr [rcx + 060h], xmm6
|
||||||
vmovntdq xmmword ptr [rcx + 070h], xmm7
|
vmovntdq xmmword ptr [rcx + 070h], xmm7
|
||||||
vmovntdq xmmword ptr [rcx + 080h], xmm8
|
add rdx, 080h
|
||||||
vmovntdq xmmword ptr [rcx + 090h], xmm9
|
add rcx, 080h
|
||||||
vmovntdq xmmword ptr [rcx + 0A0h], xmm10
|
|
||||||
vmovntdq xmmword ptr [rcx + 0B0h], xmm11
|
|
||||||
vmovntdq xmmword ptr [rcx + 0C0h], xmm12
|
|
||||||
vmovntdq xmmword ptr [rcx + 0D0h], xmm13
|
|
||||||
vmovntdq xmmword ptr [rcx + 0E0h], xmm14
|
|
||||||
vmovntdq xmmword ptr [rcx + 0F0h], xmm15
|
|
||||||
add rdx, 0100h
|
|
||||||
add rcx, 0100h
|
|
||||||
cmp rcx, r10
|
cmp rcx, r10
|
||||||
jne FullLoop
|
jne @FullLoop
|
||||||
|
|
||||||
RemainingBlocks:
|
@RemainingBlocks:
|
||||||
lea r9 , JumpTable
|
; size_t rem = (length & 0x7F) >> 4);
|
||||||
mov r10, 15
|
; rem = r11
|
||||||
|
mov r11, r8
|
||||||
|
and r11, 07Fh
|
||||||
|
jz @RestoreExit
|
||||||
|
shr r11, 4
|
||||||
|
|
||||||
|
mov r10, 7
|
||||||
sub r10, r11
|
sub r10, r11
|
||||||
imul r10, 5
|
imul r10, 10
|
||||||
|
lea r9 , @FinalBlocks
|
||||||
add r9 , r10
|
add r9 , r10
|
||||||
jmp r9
|
jmp r9
|
||||||
|
|
||||||
JumpTable:
|
@RestoreExit:
|
||||||
jmp Block15
|
|
||||||
jmp Block14
|
|
||||||
jmp Block13
|
|
||||||
jmp Block12
|
|
||||||
jmp Block11
|
|
||||||
jmp Block10
|
|
||||||
jmp Block9
|
|
||||||
jmp Block8
|
|
||||||
jmp Block7
|
|
||||||
jmp Block6
|
|
||||||
jmp Block5
|
|
||||||
jmp Block4
|
|
||||||
jmp Block3
|
|
||||||
jmp Block2
|
|
||||||
jmp Block1
|
|
||||||
jmp Block0
|
|
||||||
|
|
||||||
; ensure we generate near jumps
|
|
||||||
padding1 db 127 dup(090h)
|
|
||||||
|
|
||||||
Block15:
|
|
||||||
vmovaps xmm14, xmmword ptr [rdx + 0E0h]
|
|
||||||
vmovntdq xmmword ptr [rcx + 0E0h], xmm14
|
|
||||||
Block14:
|
|
||||||
vmovaps xmm13, xmmword ptr [rdx + 0D0h]
|
|
||||||
vmovntdq xmmword ptr [rcx + 0D0h], xmm13
|
|
||||||
Block13:
|
|
||||||
vmovaps xmm12, xmmword ptr [rdx + 0C0h]
|
|
||||||
vmovntdq xmmword ptr [rcx + 0C0h], xmm12
|
|
||||||
Block12:
|
|
||||||
vmovaps xmm11, xmmword ptr [rdx + 0B0h]
|
|
||||||
vmovntdq xmmword ptr [rcx + 0B0h], xmm11
|
|
||||||
Block11:
|
|
||||||
vmovaps xmm10, xmmword ptr [rdx + 0A0h]
|
|
||||||
vmovntdq xmmword ptr [rcx + 0A0h], xmm10
|
|
||||||
Block10:
|
|
||||||
vmovaps xmm9 , xmmword ptr [rdx + 090h]
|
|
||||||
vmovntdq xmmword ptr [rcx + 090h], xmm9
|
|
||||||
Block9:
|
|
||||||
vmovaps xmm8 , xmmword ptr [rdx + 080h]
|
|
||||||
vmovntdq xmmword ptr [rcx + 080h], xmm8
|
|
||||||
Block8:
|
|
||||||
vmovaps xmm7 , xmmword ptr [rdx + 070h]
|
|
||||||
vmovntdq xmmword ptr [rcx + 070h], xmm7
|
|
||||||
Block7:
|
|
||||||
vmovaps xmm6 , xmmword ptr [rdx + 060h]
|
|
||||||
vmovntdq xmmword ptr [rcx + 060h], xmm6
|
|
||||||
Block6:
|
|
||||||
vmovaps xmm5 , xmmword ptr [rdx + 050h]
|
|
||||||
vmovntdq xmmword ptr [rcx + 050h], xmm5
|
|
||||||
Block5:
|
|
||||||
vmovaps xmm4 , xmmword ptr [rdx + 040h]
|
|
||||||
vmovntdq xmmword ptr [rcx + 040h], xmm4
|
|
||||||
Block4:
|
|
||||||
vmovaps xmm3 , xmmword ptr [rdx + 030h]
|
|
||||||
vmovntdq xmmword ptr [rcx + 030h], xmm3
|
|
||||||
Block3:
|
|
||||||
vmovaps xmm2 , xmmword ptr [rdx + 020h]
|
|
||||||
vmovntdq xmmword ptr [rcx + 020h], xmm2
|
|
||||||
Block2:
|
|
||||||
vmovaps xmm1 , xmmword ptr [rdx + 010h]
|
|
||||||
vmovntdq xmmword ptr [rcx + 010h], xmm1
|
|
||||||
Block1:
|
|
||||||
vmovaps xmm0 , xmmword ptr [rdx + 000h]
|
|
||||||
vmovntdq xmmword ptr [rcx + 000h], xmm0
|
|
||||||
|
|
||||||
imul r11, 16
|
|
||||||
add rdx, r11
|
|
||||||
add rcx, r11
|
|
||||||
|
|
||||||
Block0:
|
|
||||||
movdqa xmm6 , oword ptr [rsp + 4*8 + 00 ]
|
movdqa xmm6 , oword ptr [rsp + 4*8 + 00 ]
|
||||||
movdqa xmm7 , oword ptr [rsp + 4*8 + 16 ]
|
movdqa xmm7 , oword ptr [rsp + 4*8 + 16 ]
|
||||||
movdqa xmm8 , oword ptr [rsp + 4*8 + 32 ]
|
add rsp, 8 + 2*16 + 4*8
|
||||||
movdqa xmm9 , oword ptr [rsp + 4*8 + 48 ]
|
|
||||||
movdqa xmm10, oword ptr [rsp + 4*8 + 64 ]
|
|
||||||
movdqa xmm11, oword ptr [rsp + 4*8 + 80 ]
|
|
||||||
movdqa xmm12, oword ptr [rsp + 4*8 + 96 ]
|
|
||||||
movdqa xmm13, oword ptr [rsp + 4*8 + 112]
|
|
||||||
movdqa xmm14, oword ptr [rsp + 4*8 + 128]
|
|
||||||
movdqa xmm15, oword ptr [rsp + 4*8 + 144]
|
|
||||||
add rsp, 8 + 10*16 + 4*8
|
|
||||||
|
|
||||||
and r8, 0Fh
|
@Exit:
|
||||||
imul r8, 5
|
sfence
|
||||||
lea r9, CopyTable
|
|
||||||
add r9, r8
|
|
||||||
jmp r9
|
|
||||||
|
|
||||||
CopyTable:
|
|
||||||
ret
|
ret
|
||||||
nop
|
|
||||||
nop
|
@FinalBlocks:
|
||||||
|
vmovaps xmm6 , xmmword ptr [rdx + 060h]
|
||||||
|
vmovntdq xmmword ptr [rcx + 060h], xmm6
|
||||||
|
vmovaps xmm5 , xmmword ptr [rdx + 050h]
|
||||||
|
vmovntdq xmmword ptr [rcx + 050h], xmm5
|
||||||
|
vmovaps xmm4 , xmmword ptr [rdx + 040h]
|
||||||
|
vmovntdq xmmword ptr [rcx + 040h], xmm4
|
||||||
|
vmovaps xmm3 , xmmword ptr [rdx + 030h]
|
||||||
|
vmovntdq xmmword ptr [rcx + 030h], xmm3
|
||||||
|
vmovaps xmm2 , xmmword ptr [rdx + 020h]
|
||||||
|
vmovntdq xmmword ptr [rcx + 020h], xmm2
|
||||||
|
vmovaps xmm1 , xmmword ptr [rdx + 010h]
|
||||||
|
vmovntdq xmmword ptr [rcx + 010h], xmm1
|
||||||
|
vmovaps xmm0 , xmmword ptr [rdx + 000h]
|
||||||
|
vmovntdq xmmword ptr [rcx + 000h], xmm0
|
||||||
nop
|
nop
|
||||||
nop
|
nop
|
||||||
|
|
||||||
jmp Copy1
|
imul r9, 16
|
||||||
jmp Copy2
|
add rdx, r9
|
||||||
jmp Copy3
|
add rcx, r9
|
||||||
jmp Copy4
|
|
||||||
jmp Copy5
|
@EndBlocks:
|
||||||
jmp Copy6
|
and r8, 0Fh
|
||||||
jmp Copy7
|
test r8, r8
|
||||||
jmp Copy8
|
je @RestoreExit
|
||||||
jmp Copy9
|
|
||||||
jmp Copy10
|
cmp r8, 2
|
||||||
jmp Copy11
|
je @Copy2
|
||||||
jmp Copy12
|
cmp r8, 3
|
||||||
jmp Copy13
|
je @Copy3
|
||||||
jmp Copy14
|
cmp r8, 4
|
||||||
|
je @Copy4
|
||||||
|
cmp r8, 5
|
||||||
|
je @Copy5
|
||||||
|
cmp r8, 6
|
||||||
|
je @Copy6
|
||||||
|
cmp r8, 7
|
||||||
|
je @Copy7
|
||||||
|
cmp r8, 8
|
||||||
|
je @Copy8
|
||||||
|
cmp r8, 9
|
||||||
|
je @Copy9
|
||||||
|
cmp r8, 10
|
||||||
|
je @Copy10
|
||||||
|
cmp r8, 11
|
||||||
|
je @Copy11
|
||||||
|
cmp r8, 12
|
||||||
|
je @Copy12
|
||||||
|
cmp r8, 13
|
||||||
|
je @Copy13
|
||||||
|
cmp r8, 14
|
||||||
|
je @Copy14
|
||||||
|
cmp r8, 15
|
||||||
|
je @Copy15
|
||||||
|
|
||||||
|
; fall through - 1 byte
|
||||||
|
mov al, byte ptr [rdx]
|
||||||
|
mov byte ptr [rcx], al
|
||||||
|
jmp @RestoreExit
|
||||||
|
|
||||||
|
@Copy2:
|
||||||
|
mov r10w, word ptr [rdx]
|
||||||
|
mov word ptr [rcx], r10w
|
||||||
|
jmp @RestoreExit
|
||||||
|
|
||||||
|
@Copy3:
|
||||||
|
mov r10w, word ptr [rdx]
|
||||||
|
mov word ptr [rcx], r10w
|
||||||
|
mov al, byte ptr [rdx + 02h]
|
||||||
|
mov byte ptr [rcx + 02h], al
|
||||||
|
jmp @RestoreExit
|
||||||
|
|
||||||
|
@Copy4:
|
||||||
|
mov r9d, dword ptr [rdx]
|
||||||
|
mov dword ptr [rcx], r9d
|
||||||
|
jmp @RestoreExit
|
||||||
|
|
||||||
|
@Copy5:
|
||||||
|
mov r9d, dword ptr [rdx ]
|
||||||
|
mov al , byte ptr [rdx + 04h]
|
||||||
|
mov dword ptr [rcx ], r9d
|
||||||
|
mov byte ptr [rcx + 04h], al
|
||||||
|
jmp @RestoreExit
|
||||||
|
|
||||||
|
@Copy6:
|
||||||
|
mov r9d , dword ptr [rdx ]
|
||||||
|
mov r10w, word ptr [rdx + 04h]
|
||||||
|
mov dword ptr [rcx ], r9d
|
||||||
|
mov word ptr [rcx + 04h], r10w
|
||||||
|
jmp @RestoreExit
|
||||||
|
|
||||||
|
@Copy7:
|
||||||
|
mov r9d , dword ptr [rdx ]
|
||||||
|
mov r10w, word ptr [rdx + 04h]
|
||||||
|
mov al , byte ptr [rdx + 06h]
|
||||||
|
mov dword ptr [rcx ], r9d
|
||||||
|
mov word ptr [rcx + 04h], r10w
|
||||||
|
mov byte ptr [rcx + 06h], al
|
||||||
|
jmp @RestoreExit
|
||||||
|
|
||||||
|
@Copy8:
|
||||||
|
mov r8, qword ptr [rdx]
|
||||||
|
mov qword ptr [rcx], r8
|
||||||
|
jmp @RestoreExit
|
||||||
|
|
||||||
|
@Copy9:
|
||||||
|
mov r8, qword ptr [rdx ]
|
||||||
|
mov al, byte ptr [rdx + 08h]
|
||||||
|
mov qword ptr [rcx ], r8
|
||||||
|
mov byte ptr [rcx + 08h], al
|
||||||
|
jmp @RestoreExit
|
||||||
|
|
||||||
|
@Copy10:
|
||||||
|
mov r8 , qword ptr [rdx ]
|
||||||
|
mov r10w, word ptr [rdx + 08h]
|
||||||
|
mov qword ptr [rcx ], r8
|
||||||
|
mov word ptr [rcx + 08h], r10w
|
||||||
|
jmp @RestoreExit
|
||||||
|
|
||||||
|
@Copy11:
|
||||||
|
mov r8 , qword ptr [rdx ]
|
||||||
|
mov r10w, word ptr [rdx + 08h]
|
||||||
|
mov al , byte ptr [rdx + 0Ah]
|
||||||
|
mov qword ptr [rcx ], r8
|
||||||
|
mov word ptr [rcx + 08h], r10w
|
||||||
|
mov byte ptr [rcx + 0Ah], al
|
||||||
|
jmp @RestoreExit
|
||||||
|
|
||||||
|
@Copy12:
|
||||||
|
mov r8 , qword ptr [rdx ]
|
||||||
|
mov r9d, dword ptr [rdx + 08h]
|
||||||
|
mov qword ptr [rcx ], r8
|
||||||
|
mov dword ptr [rcx + 08h], r9d
|
||||||
|
jmp @RestoreExit
|
||||||
|
|
||||||
|
@Copy13:
|
||||||
|
mov r8 , qword ptr [rdx ]
|
||||||
|
mov r9d, dword ptr [rdx + 08h]
|
||||||
|
mov al , byte ptr [rdx + 0Ch]
|
||||||
|
mov qword ptr [rcx ], r8
|
||||||
|
mov dword ptr [rcx + 08h], r9d
|
||||||
|
mov byte ptr [rcx + 0Ch], al
|
||||||
|
jmp @RestoreExit
|
||||||
|
|
||||||
|
@Copy14:
|
||||||
|
mov r8 , qword ptr [rdx ]
|
||||||
|
mov r9d , dword ptr [rdx + 08h]
|
||||||
|
mov r10w, word ptr [rdx + 0Ch]
|
||||||
|
mov qword ptr [rcx ], r8
|
||||||
|
mov dword ptr [rcx + 08h], r9d
|
||||||
|
mov word ptr [rcx + 0Ch], r10w
|
||||||
|
jmp @RestoreExit
|
||||||
|
|
||||||
; copy 15
|
; copy 15
|
||||||
|
@Copy15:
|
||||||
mov r8 , qword ptr [rdx + 00h]
|
mov r8 , qword ptr [rdx + 00h]
|
||||||
mov r9d , dword ptr [rdx + 08h]
|
mov r9d , dword ptr [rdx + 08h]
|
||||||
mov r10w, word ptr [rdx + 0Ch]
|
mov r10w, word ptr [rdx + 0Ch]
|
||||||
@ -205,108 +231,7 @@ memcpySSE proc
|
|||||||
mov dword ptr [rcx + 08h], r9d
|
mov dword ptr [rcx + 08h], r9d
|
||||||
mov word ptr [rcx + 0Ch], r10w
|
mov word ptr [rcx + 0Ch], r10w
|
||||||
mov byte ptr [rcx + 0Eh], al
|
mov byte ptr [rcx + 0Eh], al
|
||||||
ret
|
jmp @RestoreExit
|
||||||
|
|
||||||
; ensure we generate near jumps
|
|
||||||
padding2 db 127 dup(090h)
|
|
||||||
|
|
||||||
Copy1:
|
|
||||||
mov al, byte ptr [rdx]
|
|
||||||
mov byte ptr [rcx], al
|
|
||||||
ret
|
|
||||||
|
|
||||||
Copy2:
|
|
||||||
mov r10w, word ptr [rdx]
|
|
||||||
mov word ptr [rcx], r10w
|
|
||||||
ret
|
|
||||||
|
|
||||||
Copy3:
|
|
||||||
mov r10w, word ptr [rdx]
|
|
||||||
mov word ptr [rcx], r10w
|
|
||||||
mov al, byte ptr [rdx + 02h]
|
|
||||||
mov byte ptr [rcx + 02h], al
|
|
||||||
ret
|
|
||||||
|
|
||||||
Copy4:
|
|
||||||
mov r9d , dword ptr [rdx]
|
|
||||||
mov dword ptr [rcx], r9d
|
|
||||||
ret
|
|
||||||
|
|
||||||
Copy5:
|
|
||||||
mov r9d , dword ptr [rdx]
|
|
||||||
mov dword ptr [rcx], r9d
|
|
||||||
mov al, byte ptr [rdx + 04h]
|
|
||||||
mov byte ptr [rcx + 04h], al
|
|
||||||
ret
|
|
||||||
|
|
||||||
Copy6:
|
|
||||||
mov r9d , dword ptr [rdx]
|
|
||||||
mov dword ptr [rcx], r9d
|
|
||||||
mov r10w, word ptr [rdx + 04h]
|
|
||||||
mov word ptr [rcx + 04h], r10w
|
|
||||||
ret
|
|
||||||
|
|
||||||
Copy7:
|
|
||||||
mov r9d , dword ptr [rdx]
|
|
||||||
mov dword ptr [rcx], r9d
|
|
||||||
mov r10w, word ptr [rdx + 04h]
|
|
||||||
mov word ptr [rcx + 04h], r10w
|
|
||||||
mov al, byte ptr [rdx + 06h]
|
|
||||||
mov byte ptr [rcx + 06h], al
|
|
||||||
ret
|
|
||||||
|
|
||||||
Copy8:
|
|
||||||
mov r8, qword ptr [rdx]
|
|
||||||
mov qword ptr [rcx], r8
|
|
||||||
ret
|
|
||||||
|
|
||||||
Copy9:
|
|
||||||
mov r8, qword ptr [rdx]
|
|
||||||
mov qword ptr [rcx], r8
|
|
||||||
mov al, byte ptr [rdx + 08h]
|
|
||||||
mov byte ptr [rcx + 08h], al
|
|
||||||
ret
|
|
||||||
|
|
||||||
Copy10:
|
|
||||||
mov r8, qword ptr [rdx]
|
|
||||||
mov qword ptr [rcx], r8
|
|
||||||
mov r10w, word ptr [rdx + 08h]
|
|
||||||
mov word ptr [rcx + 08h], r10w
|
|
||||||
ret
|
|
||||||
|
|
||||||
Copy11:
|
|
||||||
mov r8, qword ptr [rdx]
|
|
||||||
mov qword ptr [rcx], r8
|
|
||||||
mov r10w, word ptr [rdx + 08h]
|
|
||||||
mov word ptr [rcx + 08h], r10w
|
|
||||||
mov al, byte ptr [rdx + 0Ah]
|
|
||||||
mov byte ptr [rcx + 0Ah], al
|
|
||||||
ret
|
|
||||||
|
|
||||||
Copy12:
|
|
||||||
mov r8, qword ptr [rdx]
|
|
||||||
mov qword ptr [rcx], r8
|
|
||||||
mov r9d , dword ptr [rdx + 08h]
|
|
||||||
mov dword ptr [rcx + 08h], r9d
|
|
||||||
ret
|
|
||||||
|
|
||||||
Copy13:
|
|
||||||
mov r8, qword ptr [rdx]
|
|
||||||
mov qword ptr [rcx], r8
|
|
||||||
mov r9d , dword ptr [rdx + 08h]
|
|
||||||
mov dword ptr [rcx + 08h], r9d
|
|
||||||
mov al, byte ptr [rdx + 0Ch]
|
|
||||||
mov byte ptr [rcx + 0Ch], al
|
|
||||||
ret
|
|
||||||
|
|
||||||
Copy14:
|
|
||||||
mov r8 , qword ptr [rdx ]
|
|
||||||
mov r9d , dword ptr [rdx + 08h]
|
|
||||||
mov r10w, word ptr [rdx + 0Ch]
|
|
||||||
mov qword ptr [rcx ], r8
|
|
||||||
mov dword ptr [rcx + 08h], r9d
|
|
||||||
mov word ptr [rcx + 0Ch], r10w
|
|
||||||
ret
|
|
||||||
|
|
||||||
memcpySSE endp
|
memcpySSE endp
|
||||||
end
|
end
|
@ -28,7 +28,7 @@ Place, Suite 330, Boston, MA 02111-1307 USA
|
|||||||
|
|
||||||
#if defined(NATIVE_MEMCPY)
|
#if defined(NATIVE_MEMCPY)
|
||||||
#define memcpySSE memcpy
|
#define memcpySSE memcpy
|
||||||
#elif defined(MSVC)
|
#elif defined(_MSC_VER)
|
||||||
extern "C" void memcpySSE(void *dst, const void * src, size_t length);
|
extern "C" void memcpySSE(void *dst, const void * src, size_t length);
|
||||||
#elif (defined(__GNUC__) || defined(__GNUG__)) && defined(__i386__)
|
#elif (defined(__GNUC__) || defined(__GNUG__)) && defined(__i386__)
|
||||||
inline static void memcpySSE(void *dst, const void * src, size_t length)
|
inline static void memcpySSE(void *dst, const void * src, size_t length)
|
||||||
|
Loading…
Reference in New Issue
Block a user