[mirror_edk2.git] / MdePkg / Library / BaseMemoryLibOptDxe / X64 / CopyMem.S

#\r
# ConvertAsm.py: Automatically generated from CopyMem.asm\r
#\r
#------------------------------------------------------------------------------\r
#\r
# Copyright (c) 2006 - 2008, Intel Corporation\r
# All rights reserved. This program and the accompanying materials\r
# are licensed and made available under the terms and conditions of the BSD License\r
# which accompanies this distribution.  The full text of the license may be found at\r
# http://opensource.org/licenses/bsd-license.php\r
#\r
# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
#\r
# Module Name:\r
#\r
#   CopyMem.S\r
#\r
# Abstract:\r
#\r
#   CopyMem function\r
#\r
# Notes:\r
#\r
#------------------------------------------------------------------------------\r
\r
#------------------------------------------------------------------------------\r
#  VOID *\r
#  EFIAPI\r
#  InternalMemCopyMem (\r
#    IN VOID   *Destination,\r
#    IN VOID   *Source,\r
#    IN UINTN  Count\r
#    )\r
#------------------------------------------------------------------------------\r
.intel_syntax noprefix\r
.globl ASM_PFX(InternalMemCopyMem)\r
ASM_PFX(InternalMemCopyMem):\r
    push    rsi\r
    push    rdi\r
    mov     rsi, rdx                    # rsi <- Source\r
    mov     rdi, rcx                    # rdi <- Destination\r
    lea     r9, [rsi + r8 - 1]          # r9 <- Last byte of Source\r
    cmp     rsi, rdi\r
    mov     rax, rdi                    # rax <- Destination as return value\r
    jae     L0                          # Copy forward if Source > Destination\r
    cmp     r9, rdi                     # Overlapped?\r
    jae     @CopyBackward               # Copy backward if overlapped\r
L0:\r
    xor     rcx, rcx\r
    sub     rcx, rdi                    # rcx <- -rdi\r
    and     rcx, 15                     # rcx + rsi should be 16 bytes aligned\r
    jz      L1                          # skip if rcx == 0\r
    cmp     rcx, r8\r
    cmova   rcx, r8\r
    sub     r8, rcx\r
    rep     movsb\r
L1:\r
    mov     rcx, r8\r
    and     r8, 15\r
    shr     rcx, 4                      # rcx <- # of DQwords to copy\r
    jz      L_CopyBytes\r
    movdqa  [rsp + 0x18], xmm0           # save xmm0 on stack\r
L2:\r
    movdqu  xmm0, [rsi]                 # rsi may not be 16-byte aligned\r
    movntdq [rdi], xmm0                 # rdi should be 16-byte aligned\r
    add     rsi, 16\r
    add     rdi, 16\r
    loop    L2\r
    mfence\r
    movdqa  xmm0, [rsp + 0x18]           # restore xmm0\r
    jmp     L_CopyBytes                  # copy remaining bytes\r
L_CopyBackward:\r
    mov     rsi, r9                     # rsi <- Last byte of Source\r
    lea     rdi, [rdi + r8 - 1]         # rdi <- Last byte of Destination\r
    std\r
L_CopyBytes:\r
    mov     rcx, r8\r
    rep     movsb\r
    cld\r
    pop     rdi\r
    pop     rsi\r
    ret\r
Commit	Line	Data
b1ff428c	1	#\r
	2	# ConvertAsm.py: Automatically generated from CopyMem.asm\r
	3	#\r
	4	#------------------------------------------------------------------------------\r
	5	#\r
373ade0e	6	# Copyright (c) 2006 - 2008, Intel Corporation\r
b1ff428c	7	# All rights reserved. This program and the accompanying materials\r
	8	# are licensed and made available under the terms and conditions of the BSD License\r
	9	# which accompanies this distribution. The full text of the license may be found at\r
	10	# http://opensource.org/licenses/bsd-license.php\r
	11	#\r
	12	# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
	13	# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
	14	#\r
	15	# Module Name:\r
	16	#\r
	17	# CopyMem.S\r
	18	#\r
	19	# Abstract:\r
	20	#\r
	21	# CopyMem function\r
	22	#\r
	23	# Notes:\r
	24	#\r
	25	#------------------------------------------------------------------------------\r
	26	\r
	27	#------------------------------------------------------------------------------\r
	28	# VOID *\r
	29	# EFIAPI\r
	30	# InternalMemCopyMem (\r
	31	# IN VOID *Destination,\r
	32	# IN VOID *Source,\r
	33	# IN UINTN Count\r
	34	# )\r
2f3d6fb5	35	#------------------------------------------------------------------------------\r
	36	.intel_syntax noprefix\r
	37	.globl ASM_PFX(InternalMemCopyMem)\r
	38	ASM_PFX(InternalMemCopyMem):\r
	39	push rsi\r
	40	push rdi\r
	41	mov rsi, rdx # rsi <- Source\r
	42	mov rdi, rcx # rdi <- Destination\r
	43	lea r9, [rsi + r8 - 1] # r9 <- Last byte of Source\r
	44	cmp rsi, rdi\r
	45	mov rax, rdi # rax <- Destination as return value\r
	46	jae L0 # Copy forward if Source > Destination\r
	47	cmp r9, rdi # Overlapped?\r
	48	jae @CopyBackward # Copy backward if overlapped\r
	49	L0:\r
	50	xor rcx, rcx\r
	51	sub rcx, rdi # rcx <- -rdi\r
	52	and rcx, 15 # rcx + rsi should be 16 bytes aligned\r
	53	jz L1 # skip if rcx == 0\r
	54	cmp rcx, r8\r
	55	cmova rcx, r8\r
	56	sub r8, rcx\r
	57	rep movsb\r
	58	L1:\r
	59	mov rcx, r8\r
	60	and r8, 15\r
	61	shr rcx, 4 # rcx <- # of DQwords to copy\r
	62	jz L_CopyBytes\r
	63	movdqa [rsp + 0x18], xmm0 # save xmm0 on stack\r
	64	L2:\r
	65	movdqu xmm0, [rsi] # rsi may not be 16-byte aligned\r
	66	movntdq [rdi], xmm0 # rdi should be 16-byte aligned\r
	67	add rsi, 16\r
	68	add rdi, 16\r
	69	loop L2\r
	70	mfence\r
	71	movdqa xmm0, [rsp + 0x18] # restore xmm0\r
	72	jmp L_CopyBytes # copy remaining bytes\r
	73	L_CopyBackward:\r
	74	mov rsi, r9 # rsi <- Last byte of Source\r
	75	lea rdi, [rdi + r8 - 1] # rdi <- Last byte of Destination\r
	76	std\r
	77	L_CopyBytes:\r
	78	mov rcx, r8\r
	79	rep movsb\r
	80	cld\r
	81	pop rdi\r
	82	pop rsi\r
	83	ret\r