[mirror_edk2.git] / MdePkg / Library / BaseMemoryLibSse2 / X64 / CopyMem.S

#
# ConvertAsm.py: Automatically generated from CopyMem.asm
#
#------------------------------------------------------------------------------
#
# Copyright (c) 2006, Intel Corporation
# All rights reserved. This program and the accompanying materials
# are licensed and made available under the terms and conditions of the BSD License
# which accompanies this distribution.  The full text of the license may be found at
# http://opensource.org/licenses/bsd-license.php
#
# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
#
# Module Name:
#
#   CopyMem.S
#
# Abstract:
#
#   CopyMem function
#
# Notes:
#
#------------------------------------------------------------------------------


#------------------------------------------------------------------------------
#  VOID *
#  EFIAPI
#  InternalMemCopyMem (
#    IN VOID   *Destination,
#    IN VOID   *Source,
#    IN UINTN  Count
#    )
#------------------------------------------------------------------------------
.intel_syntax noprefix
.globl ASM_PFX(InternalMemCopyMem)
ASM_PFX(InternalMemCopyMem):
    push    rsi
    push    rdi
    mov     rsi, rdx                    # rsi <- Source
    mov     rdi, rcx                    # rdi <- Destination
    lea     r9, [rsi + r8 - 1]          # r9 <- Last byte of Source
    cmp     rsi, rdi                    
    mov     rax, rdi                    # rax <- Destination as return value
    jae     L0                          # Copy forward if Source > Destination
    cmp     r9, rdi                     # Overlapped?
    jae     L_CopyBackward              # Copy backward if overlapped
L0:            
    xor     rcx, rcx                    
    sub     rcx, rdi                    # rcx <- -rdi
    and     rcx, 15                     # rcx + rsi should be 16 bytes aligned
    jz      L1                          # skip if rcx == 0
    cmp     rcx, r8
    cmova   rcx, r8
    sub     r8, rcx
    rep     movsb
L1:
    mov     rcx, r8
    and     r8, 15
    shr     rcx, 4                      # rcx <- # of DQwords to copy
    jz      L_CopyBytes
    movdqa  [rsp + 0x18], xmm0          # save xmm0 on stack
L2:
    movdqu  xmm0, [rsi]                 # rsi may not be 16-byte aligned
    movntdq [rdi], xmm0                 # rdi should be 16-byte aligned
    add     rsi, 16
    add     rdi, 16
    loop    L2
    mfence
    movdqa  xmm0, [rsp + 0x18]          # restore xmm0
    jmp     L_CopyBytes                 # copy remaining bytes
L_CopyBackward:
    mov     rsi, r9                     # rsi <- Last byte of Source
    lea     rdi, [rdi + r8 - 1]         # rdi <- Last byte of Destination
    std
L_CopyBytes:
    mov     rcx, r8
    rep     movsb
    cld
    pop     rdi
    pop     rsi
    ret
Commit	Line	Data
4df876ad	1	#
	2	# ConvertAsm.py: Automatically generated from CopyMem.asm
	3	#
	4	#------------------------------------------------------------------------------
	5	#
	6	# Copyright (c) 2006, Intel Corporation
	7	# All rights reserved. This program and the accompanying materials
	8	# are licensed and made available under the terms and conditions of the BSD License
	9	# which accompanies this distribution. The full text of the license may be found at
	10	# http://opensource.org/licenses/bsd-license.php
	11	#
	12	# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
	13	# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
	14	#
	15	# Module Name:
	16	#
	17	# CopyMem.S
	18	#
	19	# Abstract:
	20	#
	21	# CopyMem function
	22	#
	23	# Notes:
	24	#
	25	#------------------------------------------------------------------------------
	26
	27
	28	#------------------------------------------------------------------------------
	29	# VOID *
	30	# EFIAPI
	31	# InternalMemCopyMem (
	32	# IN VOID *Destination,
	33	# IN VOID *Source,
	34	# IN UINTN Count
	35	# )
	36	#------------------------------------------------------------------------------
	37	.intel_syntax noprefix
	38	.globl ASM_PFX(InternalMemCopyMem)
	39	ASM_PFX(InternalMemCopyMem):
	40	push rsi
	41	push rdi
	42	mov rsi, rdx # rsi <- Source
	43	mov rdi, rcx # rdi <- Destination
	44	lea r9, [rsi + r8 - 1] # r9 <- Last byte of Source
	45	cmp rsi, rdi
	46	mov rax, rdi # rax <- Destination as return value
852f5d96	47	jae L0 # Copy forward if Source > Destination
4df876ad	48	cmp r9, rdi # Overlapped?
852f5d96	49	jae L_CopyBackward # Copy backward if overlapped
852f5d96	50	L0:
4df876ad	51	xor rcx, rcx
	52	sub rcx, rdi # rcx <- -rdi
	53	and rcx, 15 # rcx + rsi should be 16 bytes aligned
852f5d96	54	jz L1 # skip if rcx == 0
4df876ad	55	cmp rcx, r8
	56	cmova rcx, r8
	57	sub r8, rcx
	58	rep movsb
852f5d96	59	L1:
4df876ad	60	mov rcx, r8
	61	and r8, 15
	62	shr rcx, 4 # rcx <- # of DQwords to copy
852f5d96	63	jz L_CopyBytes
	64	movdqa [rsp + 0x18], xmm0 # save xmm0 on stack
	65	L2:
4df876ad	66	movdqu xmm0, [rsi] # rsi may not be 16-byte aligned
	67	movntdq [rdi], xmm0 # rdi should be 16-byte aligned
	68	add rsi, 16
	69	add rdi, 16
852f5d96	70	loop L2
4df876ad	71	mfence
852f5d96	72	movdqa xmm0, [rsp + 0x18] # restore xmm0
	73	jmp L_CopyBytes # copy remaining bytes
	74	L_CopyBackward:
4df876ad	75	mov rsi, r9 # rsi <- Last byte of Source
	76	lea rdi, [rdi + r8 - 1] # rdi <- Last byte of Destination
	77	std
852f5d96	78	L_CopyBytes:
4df876ad	79	mov rcx, r8
	80	rep movsb
	81	cld
	82	pop rdi
	83	pop rsi
	84	ret