]> git.proxmox.com Git - mirror_edk2.git/blame - MdePkg/Library/BaseMemoryLibOptDxe/AArch64/CompareMem.S
MdePkg/DxeRuntimeDebugLibSerialPort: Add new APIs
[mirror_edk2.git] / MdePkg / Library / BaseMemoryLibOptDxe / AArch64 / CompareMem.S
CommitLineData
c86cd1e1
AB
1//\r
2// Copyright (c) 2013, Linaro Limited\r
3// All rights reserved.\r
4//\r
5// Redistribution and use in source and binary forms, with or without\r
6// modification, are permitted provided that the following conditions are met:\r
7// * Redistributions of source code must retain the above copyright\r
8// notice, this list of conditions and the following disclaimer.\r
9// * Redistributions in binary form must reproduce the above copyright\r
10// notice, this list of conditions and the following disclaimer in the\r
11// documentation and/or other materials provided with the distribution.\r
12// * Neither the name of the Linaro nor the\r
13// names of its contributors may be used to endorse or promote products\r
14// derived from this software without specific prior written permission.\r
15//\r
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\r
20// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\r
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\r
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\r
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\r
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\r
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\r
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
27//\r
28\r
29// Assumptions:\r
30//\r
31// ARMv8-a, AArch64\r
32//\r
33\r
34\r
35// Parameters and result.\r
36#define src1 x0\r
37#define src2 x1\r
38#define limit x2\r
39#define result x0\r
40\r
41// Internal variables.\r
42#define data1 x3\r
43#define data1w w3\r
44#define data2 x4\r
45#define data2w w4\r
46#define diff x6\r
47#define endloop x7\r
48#define tmp1 x8\r
49#define tmp2 x9\r
50#define pos x11\r
51#define limit_wd x12\r
52#define mask x13\r
53\r
54 .p2align 6\r
55ASM_GLOBAL ASM_PFX(InternalMemCompareMem)\r
56ASM_PFX(InternalMemCompareMem):\r
57 eor tmp1, src1, src2\r
58 tst tmp1, #7\r
59 b.ne .Lmisaligned8\r
60 ands tmp1, src1, #7\r
61 b.ne .Lmutual_align\r
62 add limit_wd, limit, #7\r
63 lsr limit_wd, limit_wd, #3\r
64\r
65 // Start of performance-critical section -- one 64B cache line.\r
66.Lloop_aligned:\r
67 ldr data1, [src1], #8\r
68 ldr data2, [src2], #8\r
69.Lstart_realigned:\r
70 subs limit_wd, limit_wd, #1\r
71 eor diff, data1, data2 // Non-zero if differences found.\r
72 csinv endloop, diff, xzr, ne // Last Dword or differences.\r
73 cbz endloop, .Lloop_aligned\r
74 // End of performance-critical section -- one 64B cache line.\r
75\r
76 // Not reached the limit, must have found a diff.\r
77 cbnz limit_wd, .Lnot_limit\r
78\r
79 // Limit % 8 == 0 => all bytes significant.\r
80 ands limit, limit, #7\r
81 b.eq .Lnot_limit\r
82\r
83 lsl limit, limit, #3 // Bits -> bytes.\r
84 mov mask, #~0\r
85 lsl mask, mask, limit\r
86 bic data1, data1, mask\r
87 bic data2, data2, mask\r
88\r
89 orr diff, diff, mask\r
90\r
91.Lnot_limit:\r
92 rev diff, diff\r
93 rev data1, data1\r
94 rev data2, data2\r
95\r
96 // The MS-non-zero bit of DIFF marks either the first bit\r
97 // that is different, or the end of the significant data.\r
98 // Shifting left now will bring the critical information into the\r
99 // top bits.\r
100 clz pos, diff\r
101 lsl data1, data1, pos\r
102 lsl data2, data2, pos\r
103\r
104 // But we need to zero-extend (char is unsigned) the value and then\r
105 // perform a signed 32-bit subtraction.\r
106 lsr data1, data1, #56\r
107 sub result, data1, data2, lsr #56\r
108 ret\r
109\r
110.Lmutual_align:\r
111 // Sources are mutually aligned, but are not currently at an\r
112 // alignment boundary. Round down the addresses and then mask off\r
113 // the bytes that precede the start point.\r
114 bic src1, src1, #7\r
115 bic src2, src2, #7\r
116 add limit, limit, tmp1 // Adjust the limit for the extra.\r
117 lsl tmp1, tmp1, #3 // Bytes beyond alignment -> bits.\r
118 ldr data1, [src1], #8\r
119 neg tmp1, tmp1 // Bits to alignment -64.\r
120 ldr data2, [src2], #8\r
121 mov tmp2, #~0\r
122\r
123 // Little-endian. Early bytes are at LSB.\r
124 lsr tmp2, tmp2, tmp1 // Shift (tmp1 & 63).\r
125 add limit_wd, limit, #7\r
126 orr data1, data1, tmp2\r
127 orr data2, data2, tmp2\r
128 lsr limit_wd, limit_wd, #3\r
129 b .Lstart_realigned\r
130\r
131 .p2align 6\r
132.Lmisaligned8:\r
133 sub limit, limit, #1\r
1341:\r
135 // Perhaps we can do better than this.\r
136 ldrb data1w, [src1], #1\r
137 ldrb data2w, [src2], #1\r
138 subs limit, limit, #1\r
139 ccmp data1w, data2w, #0, cs // NZCV = 0b0000.\r
140 b.eq 1b\r
141 sub result, data1, data2\r
142 ret\r