#\r
#------------------------------------------------------------------------------\r
#\r
-# Copyright (c) 2006, Intel Corporation\r
+# Copyright (c) 2006 - 2009, Intel Corporation\r
# All rights reserved. This program and the accompanying materials\r
# are licensed and made available under the terms and conditions of the BSD License\r
# which accompanies this distribution. The full text of the license may be found at\r
# IN UINTN Count\r
# )\r
#------------------------------------------------------------------------------\r
-.intel_syntax noprefix\r
-.globl ASM_PFX(InternalMemCopyMem)\r
+ASM_GLOBAL ASM_PFX(InternalMemCopyMem)\r
ASM_PFX(InternalMemCopyMem):\r
- push rsi\r
- push rdi\r
- mov rsi, rdx # rsi <- Source\r
- mov rdi, rcx # rdi <- Destination\r
- lea r9, [rsi + r8 - 1] # r9 <- Last byte of Source\r
- cmp rsi, rdi \r
- mov rax, rdi # rax <- Destination as return value\r
+ pushq %rsi\r
+ pushq %rdi\r
+ movq %rdx, %rsi # rsi <- Source\r
+ movq %rcx, %rdi # rdi <- Destination\r
+ leaq -1(%rsi, %r8,), %r9 # r9 <- Last byte of Source\r
+ cmpq %rdi, %rsi\r
+ movq %rdi, %rax # rax <- Destination as return value\r
jae L0 # Copy forward if Source > Destination\r
- cmp r9, rdi # Overlapped?\r
+ cmpq %rdi, %r9 # Overlapped?\r
jae L_CopyBackward # Copy backward if overlapped\r
L0: \r
- xor rcx, rcx \r
- sub rcx, rdi # rcx <- -rdi\r
- and rcx, 15 # rcx + rsi should be 16 bytes aligned\r
+ xorq %rcx, %rcx \r
+ subq %rdi, %rcx # rcx <- -rdi\r
+ andq $15, %rcx # rcx + rsi should be 16 bytes aligned\r
jz L1 # skip if rcx == 0\r
- cmp rcx, r8\r
- cmova rcx, r8\r
- sub r8, rcx\r
+ cmpq %r8, %rcx\r
+ cmova %r8, %rcx\r
+ subq %rcx, %r8\r
rep movsb\r
L1:\r
- mov rcx, r8\r
- and r8, 15\r
- shr rcx, 4 # rcx <- # of DQwords to copy\r
+ movq %r8, %rcx\r
+ andq $15, %r8\r
+ shrq $4, %rcx # rcx <- # of DQwords to copy\r
jz L_CopyBytes\r
- movdqa [rsp + 0x18], xmm0 # save xmm0 on stack\r
+ movdqa %xmm0, 0x18(%rsp) # save xmm0 on stack\r
L2:\r
- movdqu xmm0, [rsi] # rsi may not be 16-byte aligned\r
- movntdq [rdi], xmm0 # rdi should be 16-byte aligned\r
- add rsi, 16\r
- add rdi, 16\r
+ movdqu (%rsi), %xmm0 # rsi may not be 16-byte aligned\r
+ movntdq %xmm0, (%rdi) # rdi should be 16-byte aligned\r
+ addq $16, %rsi\r
+ addq $16, %rdi\r
loop L2\r
mfence\r
- movdqa xmm0, [rsp + 0x18] # restore xmm0\r
+ movdqa 0x18(%rsp), %xmm0 # restore xmm0\r
jmp L_CopyBytes # copy remaining bytes\r
L_CopyBackward:\r
- mov rsi, r9 # rsi <- Last byte of Source\r
- lea rdi, [rdi + r8 - 1] # rdi <- Last byte of Destination\r
+ movq %r9, %rsi # rsi <- Last byte of Source\r
+ leaq -1(%rdi, %r8,), %rdi # rdi <- Last byte of Destination\r
std\r
L_CopyBytes:\r
- mov rcx, r8\r
+ movq %r8, %rcx\r
rep movsb\r
cld\r
- pop rdi\r
- pop rsi\r
+ popq %rdi\r
+ popq %rsi\r
ret\r