-#
-# ConvertAsm.py: Automatically generated from CopyMem.asm
-#
-#------------------------------------------------------------------------------
-#
-# Copyright (c) 2006, Intel Corporation
-# All rights reserved. This program and the accompanying materials
-# are licensed and made available under the terms and conditions of the BSD License
-# which accompanies this distribution. The full text of the license may be found at
-# http://opensource.org/licenses/bsd-license.php
-#
-# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
-# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
-#
-# Module Name:
-#
-# CopyMem.S
-#
-# Abstract:
-#
-# CopyMem function
-#
-# Notes:
-#
-#------------------------------------------------------------------------------
-
-
-#------------------------------------------------------------------------------
-# VOID *
-# EFIAPI
-# InternalMemCopyMem (
-# IN VOID *Destination,
-# IN VOID *Source,
-# IN UINTN Count
-# )
-#------------------------------------------------------------------------------
-.intel_syntax noprefix
-.globl ASM_PFX(InternalMemCopyMem)
-ASM_PFX(InternalMemCopyMem):
- push rsi
- push rdi
- mov rsi, rdx # rsi <- Source
- mov rdi, rcx # rdi <- Destination
- lea r9, [rsi + r8 - 1] # r9 <- End of Source
- cmp rsi, rdi
- mov rax, rdi # rax <- Destination as return value
- jae L0
- cmp r9, rdi
- jae L_CopyBackward # Copy backward if overlapped
-L0:
- mov rcx, r8
- and r8, 7
- shr rcx, 3 # rcx <- # of Qwords to copy
- jz L_CopyBytes
- movd r10, mm0 # (Save mm0 in r10)
-L1:
- movq mm0, [rsi]
- movntq [rdi], mm0
- add rsi, 8
- add rdi, 8
- loop L1
- mfence
- movd mm0, r10 # (Restore mm0)
- jmp L_CopyBytes
-L_CopyBackward:
- mov rsi, r9 # rsi <- End of Source
- lea rdi, [rdi + r8 - 1] # rdi <- End of Destination
- std # set direction flag
-L_CopyBytes:
- mov rcx, r8
- rep movsb # Copy bytes backward
- cld
- pop rdi
- pop rsi
- ret
+#\r
+# ConvertAsm.py: Automatically generated from CopyMem.asm\r
+#\r
+#------------------------------------------------------------------------------\r
+#\r
+# Copyright (c) 2006 - 2018, Intel Corporation. All rights reserved.<BR>\r
+# This program and the accompanying materials\r
+# are licensed and made available under the terms and conditions of the BSD License\r
+# which accompanies this distribution. The full text of the license may be found at\r
+# http://opensource.org/licenses/bsd-license.php.\r
+#\r
+# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
+# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
+#\r
+# Module Name:\r
+#\r
+# CopyMem.S\r
+#\r
+# Abstract:\r
+#\r
+# CopyMem function\r
+#\r
+# Notes:\r
+#\r
+#------------------------------------------------------------------------------\r
+\r
+#------------------------------------------------------------------------------\r
+# VOID *\r
+# EFIAPI\r
+# InternalMemCopyMem (\r
+# IN VOID *Destination,\r
+# IN VOID *Source,\r
+# IN UINTN Count\r
+# )\r
+#------------------------------------------------------------------------------\r
+ASM_GLOBAL ASM_PFX(InternalMemCopyMem)\r
+ASM_PFX(InternalMemCopyMem):\r
+ pushq %rsi\r
+ pushq %rdi\r
+ movq %rdx, %rsi # rsi <- Source\r
+ movq %rcx, %rdi # rdi <- Destination\r
+ leaq -1(%rsi,%r8,), %r9 # r9 <- Last byte of Source\r
+ cmpq %rdi, %rsi\r
+ movq %rdi, %rax # rax <- Destination as return value\r
+ jae L0 # Copy forward if Source > Destination\r
+ cmpq %rdi, %r9 # Overlapped?\r
+ jae L_CopyBackward # Copy backward if overlapped\r
+L0:\r
+ xorq %rcx, %rcx\r
+ subq %rdi, %rcx # rcx <- -rdi\r
+ andq $15, %rcx # rcx + rsi should be 16 bytes aligned\r
+ jz L1 # skip if rcx == 0\r
+ cmpq %r8, %rcx\r
+ cmova %r8, %rcx\r
+ subq %rcx, %r8\r
+ rep movsb\r
+L1:\r
+ movq %r8, %rcx\r
+ andq $15, %r8\r
+ shrq $4, %rcx # rcx <- # of DQwords to copy\r
+ jz L_CopyBytes\r
+ movdqu %xmm0, 0x18(%rsp) # save xmm0 on stack\r
+L2:\r
+ movdqu (%rsi), %xmm0 # rsi may not be 16-byte aligned\r
+ movntdq %xmm0, (%rdi) # rdi should be 16-byte aligned\r
+ addq $16, %rsi\r
+ addq $16, %rdi\r
+ loop L2\r
+ mfence\r
+ movdqa 0x18(%rsp), %xmm0 # restore xmm0\r
+ jmp L_CopyBytes # copy remaining bytes\r
+L_CopyBackward:\r
+ movq %r9, %rsi # rsi <- Last byte of Source\r
+ leaq -1(%rdi, %r8,), %rdi # rdi <- Last byte of Destination\r
+ std\r
+L_CopyBytes:\r
+ movq %r8, %rcx\r
+ rep movsb\r
+ cld\r
+ popq %rdi\r
+ popq %rsi\r
+ ret\r