lea r9, [rsi + r8 - 1] # r9 <- End of Source
cmp rsi, rdi
mov rax, rdi # rax <- Destination as return value
- jae _InternalMemCopyMem_al_0000
+ jae L0
cmp r9, rdi
- jae _atSym_CopyBackward # Copy backward if overlapped
-_InternalMemCopyMem_al_0000:
+ jae L_CopyBackward # Copy backward if overlapped
+L0:
mov rcx, r8
and r8, 7
shr rcx, 3
rep movsq # Copy as many Qwords as possible
- jmp _atSym_CopyBytes
-_atSym_CopyBackward:
+ jmp L_CopyBytes
+L_CopyBackward:
mov rsi, r9 # rsi <- End of Source
lea rdi, [rdi + r8 - 1] # esi <- End of Destination
std # set direction flag
-_atSym_CopyBytes:
+L_CopyBytes:
mov rcx, r8
rep movsb # Copy bytes backward
cld
lea r9, [rsi + r8 - 1] # r9 <- Last byte of Source
cmp rsi, rdi
mov rax, rdi # rax <- Destination as return value
- jae _InternalMemCopyMem_al_0000 # Copy forward if Source > Destination
+ jae L0 # Copy forward if Source > Destination
cmp r9, rdi # Overlapped?
- jae _atSym_CopyBackward # Copy backward if overlapped
-_InternalMemCopyMem_al_0000:
+ jae L_CopyBackward # Copy backward if overlapped
+L0:
xor rcx, rcx
sub rcx, rdi # rcx <- -rdi
and rcx, 15 # rcx + rsi should be 16 bytes aligned
- jz _InternalMemCopyMem_al_0001 # skip if rcx == 0
+ jz L1 # skip if rcx == 0
cmp rcx, r8
cmova rcx, r8
sub r8, rcx
rep movsb
-_InternalMemCopyMem_al_0001:
+L1:
mov rcx, r8
and r8, 15
shr rcx, 4 # rcx <- # of DQwords to copy
- jz _atSym_CopyBytes
- movdqa [rsp + 0x18], xmm0 # save xmm0 on stack
-_InternalMemCopyMem_al_0002:
+ jz L_CopyBytes
+ movdqa [rsp + 0x18], xmm0 # save xmm0 on stack
+L2:
movdqu xmm0, [rsi] # rsi may not be 16-byte aligned
movntdq [rdi], xmm0 # rdi should be 16-byte aligned
add rsi, 16
add rdi, 16
- loop _InternalMemCopyMem_al_0002
+ loop L2
mfence
- movdqa xmm0, [rsp + 0x18] # restore xmm0
- jmp _atSym_CopyBytes # copy remaining bytes
-_atSym_CopyBackward:
+ movdqa xmm0, [rsp + 0x18] # restore xmm0
+ jmp L_CopyBytes # copy remaining bytes
+L_CopyBackward:
mov rsi, r9 # rsi <- Last byte of Source
lea rdi, [rdi + r8 - 1] # rdi <- Last byte of Destination
std
-_atSym_CopyBytes:
+L_CopyBytes:
mov rcx, r8
rep movsb
cld
mov rcx, rdx
and rdx, 15
shr rcx, 4
- jz _SetBytes_L2
+ jz L_SetBytes
mov ah, al # ax <- Value repeats twice
- movdqa [rsp + 0x10], xmm0 # save xmm0
+ movdqa [rsp + 0x10], xmm0 # save xmm0
movd xmm0, eax # xmm0[0..16] <- Value repeats twice
pshuflw xmm0, xmm0, 0 # xmm0[0..63] <- Value repeats 8 times
movlhps xmm0, xmm0 # xmm0 <- Value repeats 16 times
add rdi, 16
loop L1
mfence
- movdqa xmm0, [rsp + 0x10] # restore xmm0
-_SetBytes_L2:
+ movdqa xmm0, [rsp + 0x10] # restore xmm0
+L_SetBytes:
mov ecx, edx # high 32 bits of rcx are always zero
rep stosb
mov rax, r9 # rax <- Return value