]> git.proxmox.com Git - mirror_edk2.git/blame - MdePkg/Library/BaseMemoryLibOptDxe/AArch64/CompareMem.S
MdePkg/BaseMemoryLibOptDxe: Apply BSD-2-Clause-Patent
[mirror_edk2.git] / MdePkg / Library / BaseMemoryLibOptDxe / AArch64 / CompareMem.S
CommitLineData
c86cd1e1
AB
1//\r
2// Copyright (c) 2013, Linaro Limited\r
3// All rights reserved.\r
aa1b377e 4// SPDX-License-Identifier: BSD-2-Clause-Patent\r
c86cd1e1
AB
5//\r
6\r
7// Assumptions:\r
8//\r
9// ARMv8-a, AArch64\r
10//\r
11\r
12\r
13// Parameters and result.\r
14#define src1 x0\r
15#define src2 x1\r
16#define limit x2\r
17#define result x0\r
18\r
19// Internal variables.\r
20#define data1 x3\r
21#define data1w w3\r
22#define data2 x4\r
23#define data2w w4\r
24#define diff x6\r
25#define endloop x7\r
26#define tmp1 x8\r
27#define tmp2 x9\r
28#define pos x11\r
29#define limit_wd x12\r
30#define mask x13\r
31\r
32 .p2align 6\r
33ASM_GLOBAL ASM_PFX(InternalMemCompareMem)\r
34ASM_PFX(InternalMemCompareMem):\r
35 eor tmp1, src1, src2\r
36 tst tmp1, #7\r
37 b.ne .Lmisaligned8\r
38 ands tmp1, src1, #7\r
39 b.ne .Lmutual_align\r
40 add limit_wd, limit, #7\r
41 lsr limit_wd, limit_wd, #3\r
42\r
43 // Start of performance-critical section -- one 64B cache line.\r
44.Lloop_aligned:\r
45 ldr data1, [src1], #8\r
46 ldr data2, [src2], #8\r
47.Lstart_realigned:\r
48 subs limit_wd, limit_wd, #1\r
49 eor diff, data1, data2 // Non-zero if differences found.\r
50 csinv endloop, diff, xzr, ne // Last Dword or differences.\r
51 cbz endloop, .Lloop_aligned\r
52 // End of performance-critical section -- one 64B cache line.\r
53\r
54 // Not reached the limit, must have found a diff.\r
55 cbnz limit_wd, .Lnot_limit\r
56\r
57 // Limit % 8 == 0 => all bytes significant.\r
58 ands limit, limit, #7\r
59 b.eq .Lnot_limit\r
60\r
61 lsl limit, limit, #3 // Bits -> bytes.\r
62 mov mask, #~0\r
63 lsl mask, mask, limit\r
64 bic data1, data1, mask\r
65 bic data2, data2, mask\r
66\r
67 orr diff, diff, mask\r
68\r
69.Lnot_limit:\r
70 rev diff, diff\r
71 rev data1, data1\r
72 rev data2, data2\r
73\r
74 // The MS-non-zero bit of DIFF marks either the first bit\r
75 // that is different, or the end of the significant data.\r
76 // Shifting left now will bring the critical information into the\r
77 // top bits.\r
78 clz pos, diff\r
79 lsl data1, data1, pos\r
80 lsl data2, data2, pos\r
81\r
82 // But we need to zero-extend (char is unsigned) the value and then\r
83 // perform a signed 32-bit subtraction.\r
84 lsr data1, data1, #56\r
85 sub result, data1, data2, lsr #56\r
86 ret\r
87\r
88.Lmutual_align:\r
89 // Sources are mutually aligned, but are not currently at an\r
90 // alignment boundary. Round down the addresses and then mask off\r
91 // the bytes that precede the start point.\r
92 bic src1, src1, #7\r
93 bic src2, src2, #7\r
94 add limit, limit, tmp1 // Adjust the limit for the extra.\r
95 lsl tmp1, tmp1, #3 // Bytes beyond alignment -> bits.\r
96 ldr data1, [src1], #8\r
97 neg tmp1, tmp1 // Bits to alignment -64.\r
98 ldr data2, [src2], #8\r
99 mov tmp2, #~0\r
100\r
101 // Little-endian. Early bytes are at LSB.\r
102 lsr tmp2, tmp2, tmp1 // Shift (tmp1 & 63).\r
103 add limit_wd, limit, #7\r
104 orr data1, data1, tmp2\r
105 orr data2, data2, tmp2\r
106 lsr limit_wd, limit_wd, #3\r
107 b .Lstart_realigned\r
108\r
109 .p2align 6\r
110.Lmisaligned8:\r
111 sub limit, limit, #1\r
1121:\r
113 // Perhaps we can do better than this.\r
114 ldrb data1w, [src1], #1\r
115 ldrb data2w, [src2], #1\r
116 subs limit, limit, #1\r
117 ccmp data1w, data2w, #0, cs // NZCV = 0b0000.\r
118 b.eq 1b\r
119 sub result, data1, data2\r
120 ret\r