MdePkg/Library/BaseMemoryLibSse2/X64/CopyMem.S

   1 #
   2 # ConvertAsm.py: Automatically generated from CopyMem.asm
   3 #
   4 #------------------------------------------------------------------------------
   5 #
   6 # Copyright (c) 2006 - 2018, Intel Corporation. All rights reserved.<BR>
   7 # This program and the accompanying materials
   8 # are licensed and made available under the terms and conditions of the BSD License
   9 # which accompanies this distribution.  The full text of the license may be found at
  10 # http://opensource.org/licenses/bsd-license.php.
  11 #
  12 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
  13 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
  14 #
  15 # Module Name:
  16 #
  17 #   CopyMem.S
  18 #
  19 # Abstract:
  20 #
  21 #   CopyMem function
  22 #
  23 # Notes:
  24 #
  25 #------------------------------------------------------------------------------
  26
  27
  28 #------------------------------------------------------------------------------
  29 #  VOID *
  30 #  EFIAPI
  31 #  InternalMemCopyMem (
  32 #    IN VOID   *Destination,
  33 #    IN VOID   *Source,
  34 #    IN UINTN  Count
  35 #    )
  36 #------------------------------------------------------------------------------
  37 ASM_GLOBAL ASM_PFX(InternalMemCopyMem)
  38 ASM_PFX(InternalMemCopyMem):
  39     pushq   %rsi
  40     pushq   %rdi
  41     movq    %rdx, %rsi                  # rsi <- Source
  42     movq    %rcx, %rdi                  # rdi <- Destination
  43     leaq    -1(%rsi, %r8,), %r9         # r9 <- Last byte of Source
  44     cmpq    %rdi, %rsi
  45     movq    %rdi, %rax                  # rax <- Destination as return value
  46     jae     L0                          # Copy forward if Source > Destination
  47     cmpq    %rdi, %r9                   # Overlapped?
  48     jae     L_CopyBackward              # Copy backward if overlapped
  49 L0:
  50     xorq    %rcx, %rcx
  51     subq    %rdi, %rcx                  # rcx <- -rdi
  52     andq    $15, %rcx                   # rcx + rsi should be 16 bytes aligned
  53     jz      L1                          # skip if rcx == 0
  54     cmpq    %r8, %rcx
  55     cmova   %r8, %rcx
  56     subq    %rcx, %r8
  57     rep     movsb
  58 L1:
  59     movq    %r8, %rcx
  60     andq    $15, %r8
  61     shrq    $4, %rcx                    # rcx <- # of DQwords to copy
  62     jz      L_CopyBytes
  63     movdqa  %xmm0, 0x18(%rsp)           # save xmm0 on stack
  64 L2:
  65     movdqu  (%rsi), %xmm0               # rsi may not be 16-byte aligned
  66     movntdq  %xmm0, (%rdi)              # rdi should be 16-byte aligned
  67     addq    $16, %rsi
  68     addq    $16, %rdi
  69     loop    L2
  70     mfence
  71     movdqa  0x18(%rsp), %xmm0           # restore xmm0
  72     jmp     L_CopyBytes                 # copy remaining bytes
  73 L_CopyBackward:
  74     movq    %r9, %rsi                   # rsi <- Last byte of Source
  75     leaq    -1(%rdi, %r8,), %rdi        # rdi <- Last byte of Destination
  76     std
  77 L_CopyBytes:
  78     movq    %r8, %rcx
  79     rep     movsb
  80     cld
  81     popq    %rdi
  82     popq    %rsi
  83     ret