]> git.proxmox.com Git - mirror_edk2.git/commitdiff
MdePkg BaseMemoryLibSse2: Convert X64/CopyMem.asm to NASM
authorJordan Justen <jordan.l.justen@intel.com>
Tue, 31 May 2016 01:52:13 +0000 (18:52 -0700)
committerLiming Gao <liming.gao@intel.com>
Tue, 28 Jun 2016 01:51:42 +0000 (09:51 +0800)
The BaseTools/Scripts/ConvertMasmToNasm.py script was used to convert
X64/CopyMem.asm to X64/CopyMem.nasm

Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
MdePkg/Library/BaseMemoryLibSse2/BaseMemoryLibSse2.inf
MdePkg/Library/BaseMemoryLibSse2/X64/CopyMem.nasm [new file with mode: 0644]

index 8aefe4fe153972b28e1ae42e2a846aeeb9f68750..a78d823a3f311da6d914e87674ed385582798514 100644 (file)
   X64/SetMem16.asm\r
   X64/SetMem.nasm\r
   X64/SetMem.asm\r
+  X64/CopyMem.nasm\r
   X64/CopyMem.asm\r
   X64/ScanMem64.nasm\r
   X64/ScanMem64.S\r
   X64/SetMem16.S\r
   X64/SetMem.nasm\r
   X64/SetMem.S\r
+  X64/CopyMem.nasm\r
   X64/CopyMem.S\r
 \r
 [Packages]\r
diff --git a/MdePkg/Library/BaseMemoryLibSse2/X64/CopyMem.nasm b/MdePkg/Library/BaseMemoryLibSse2/X64/CopyMem.nasm
new file mode 100644 (file)
index 0000000..d312c2d
--- /dev/null
@@ -0,0 +1,83 @@
+;------------------------------------------------------------------------------\r
+;\r
+; Copyright (c) 2006, Intel Corporation. All rights reserved.<BR>\r
+; This program and the accompanying materials\r
+; are licensed and made available under the terms and conditions of the BSD License\r
+; which accompanies this distribution.  The full text of the license may be found at\r
+; http://opensource.org/licenses/bsd-license.php.\r
+;\r
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
+;\r
+; Module Name:\r
+;\r
+;   CopyMem.nasm\r
+;\r
+; Abstract:\r
+;\r
+;   CopyMem function\r
+;\r
+; Notes:\r
+;\r
+;------------------------------------------------------------------------------\r
+\r
+    DEFAULT REL\r
+    SECTION .text\r
+\r
+;------------------------------------------------------------------------------\r
+;  VOID *\r
+;  EFIAPI\r
+;  InternalMemCopyMem (\r
+;    IN VOID   *Destination,\r
+;    IN VOID   *Source,\r
+;    IN UINTN  Count\r
+;    );\r
+;------------------------------------------------------------------------------\r
+global ASM_PFX(InternalMemCopyMem)\r
+ASM_PFX(InternalMemCopyMem):\r
+    push    rsi\r
+    push    rdi\r
+    mov     rsi, rdx                    ; rsi <- Source\r
+    mov     rdi, rcx                    ; rdi <- Destination\r
+    lea     r9, [rsi + r8 - 1]          ; r9 <- Last byte of Source\r
+    cmp     rsi, rdi\r
+    mov     rax, rdi                    ; rax <- Destination as return value\r
+    jae     .0                          ; Copy forward if Source > Destination\r
+    cmp     r9, rdi                     ; Overlapped?\r
+    jae     @CopyBackward               ; Copy backward if overlapped\r
+.0:\r
+    xor     rcx, rcx\r
+    sub     rcx, rdi                    ; rcx <- -rdi\r
+    and     rcx, 15                     ; rcx + rsi should be 16 bytes aligned\r
+    jz      .1                          ; skip if rcx == 0\r
+    cmp     rcx, r8\r
+    cmova   rcx, r8\r
+    sub     r8, rcx\r
+    rep     movsb\r
+.1:\r
+    mov     rcx, r8\r
+    and     r8, 15\r
+    shr     rcx, 4                      ; rcx <- # of DQwords to copy\r
+    jz      @CopyBytes\r
+    movdqa  [rsp + 0x18], xmm0           ; save xmm0 on stack\r
+.2:\r
+    movdqu  xmm0, [rsi]                 ; rsi may not be 16-byte aligned\r
+    movntdq [rdi], xmm0                 ; rdi should be 16-byte aligned\r
+    add     rsi, 16\r
+    add     rdi, 16\r
+    loop    .2\r
+    mfence\r
+    movdqa  xmm0, [rsp + 0x18]           ; restore xmm0\r
+    jmp     @CopyBytes                  ; copy remaining bytes\r
+@CopyBackward:\r
+    mov     rsi, r9                     ; rsi <- Last byte of Source\r
+    lea     rdi, [rdi + r8 - 1]         ; rdi <- Last byte of Destination\r
+    std\r
+@CopyBytes:\r
+    mov     rcx, r8\r
+    rep     movsb\r
+    cld\r
+    pop     rdi\r
+    pop     rsi\r
+    ret\r
+\r