2 // Copyright (c) 2012 - 2016, Linaro Limited
3 // All rights reserved.
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are met:
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright
10 // notice, this list of conditions and the following disclaimer in the
11 // documentation and/or other materials provided with the distribution.
12 // * Neither the name of the Linaro nor the
13 // names of its contributors may be used to endorse or promote products
14 // derived from this software without specific prior written permission.
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 // Copyright (c) 2015 ARM Ltd
31 // All rights reserved.
33 // Redistribution and use in source and binary forms, with or without
34 // modification, are permitted provided that the following conditions
36 // 1. Redistributions of source code must retain the above copyright
37 // notice, this list of conditions and the following disclaimer.
38 // 2. Redistributions in binary form must reproduce the above copyright
39 // notice, this list of conditions and the following disclaimer in the
40 // documentation and/or other materials provided with the distribution.
41 // 3. The name of the company may not be used to endorse or promote
42 // products derived from this software without specific prior written
45 // THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
46 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
47 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
48 // IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
49 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
50 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
51 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
52 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
53 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
54 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
59 // ARMv8-a, AArch64, unaligned accesses
78 ASM_GLOBAL ASM_PFX(InternalMemSetMem16)
79 ASM_PFX(InternalMemSetMem16):
83 ASM_GLOBAL ASM_PFX(InternalMemSetMem32)
84 ASM_PFX(InternalMemSetMem32):
88 ASM_GLOBAL ASM_PFX(InternalMemSetMem64)
89 ASM_PFX(InternalMemSetMem64):
93 ASM_GLOBAL ASM_PFX(InternalMemZeroMem)
94 ASM_PFX(InternalMemZeroMem):
98 ASM_GLOBAL ASM_PFX(InternalMemSetMem)
99 ASM_PFX(InternalMemSetMem):
101 0: add dstend, dstin, count
112 str val, [dstend, -8]
117 str valw, [dstend, -4]
122 strh valw, [dstend, -2]
128 tbnz count, 6, L(set96)
129 str q0, [dstend, -16]
132 str q0, [dstend, -32]
136 // Set 64..96 bytes. Write 64 bytes from the start and
137 // 32 bytes from the end.
140 stp q0, q0, [dstin, 32]
141 stp q0, q0, [dstend, -32]
153 sub count, dstend, dst // Count is 16 too large.
155 sub count, count, 64 + 16 // Adjust count and bias for loop.
156 1: stp q0, q0, [dst], 64
157 stp q0, q0, [dst, -32]
159 subs count, count, 64
161 2: stp q0, q0, [dstend, -64]
162 stp q0, q0, [dstend, -32]
168 tbnz tmp1w, 4, L(no_zva)
170 cmp tmp1w, 4 // ZVA size is 64 bytes.
173 // Write the first and last 64 byte aligned block using stp rather
174 // than using DC ZVA. This is faster on some cores.
177 stp q0, q0, [dst, 32]
179 stp q0, q0, [dst, 64]
180 stp q0, q0, [dst, 96]
181 sub count, dstend, dst // Count is now 128 too large.
182 sub count, count, 128+64+64 // Adjust count and bias for loop.
187 subs count, count, 64
190 stp q0, q0, [dst, 32]
191 stp q0, q0, [dstend, -64]
192 stp q0, q0, [dstend, -32]
197 cmp tmp1w, 5 // ZVA size is 128 bytes.
201 stp q0, q0, [dst, 32]
202 stp q0, q0, [dst, 64]
203 stp q0, q0, [dst, 96]
205 sub count, dstend, dst // Count is now 128 too large.
206 sub count, count, 128+128 // Adjust count and bias for loop.
210 subs count, count, 128
212 stp q0, q0, [dstend, -128]
213 stp q0, q0, [dstend, -96]
214 stp q0, q0, [dstend, -64]
215 stp q0, q0, [dstend, -32]
220 lsl zva_lenw, tmp2w, tmp1w
221 add tmp1, zva_len, 64 // Max alignment bytes written.
226 add tmp1, dst, zva_len
228 subs count, tmp1, dst // Actual alignment bytes to write.
229 bic tmp1, tmp1, tmp2 // Aligned dc zva start address.
231 1: stp q0, q0, [dst], 64
232 stp q0, q0, [dst, -32]
233 subs count, count, 64
236 sub count, dstend, tmp1 // Remaining bytes to write.
237 subs count, count, zva_len
240 add dst, dst, zva_len
241 subs count, count, zva_len
243 4: add count, count, zva_len