]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
ae2c6ca6 DM |
2 | /* NG4memcpy.S: Niagara-4 optimized memcpy. |
3 | * | |
4 | * Copyright (C) 2012 David S. Miller (davem@davemloft.net) | |
5 | */ | |
6 | ||
7 | #ifdef __KERNEL__ | |
95707704 | 8 | #include <linux/linkage.h> |
ae2c6ca6 DM |
9 | #include <asm/visasm.h> |
10 | #include <asm/asi.h> | |
11 | #define GLOBAL_SPARE %g7 | |
12 | #else | |
13 | #define ASI_BLK_INIT_QUAD_LDD_P 0xe2 | |
14 | #define FPRS_FEF 0x04 | |
15 | ||
16 | /* On T4 it is very expensive to access ASRs like %fprs and | |
17 | * %asi, avoiding a read or a write can save ~50 cycles. | |
18 | */ | |
19 | #define FPU_ENTER \ | |
20 | rd %fprs, %o5; \ | |
21 | andcc %o5, FPRS_FEF, %g0; \ | |
22 | be,a,pn %icc, 999f; \ | |
23 | wr %g0, FPRS_FEF, %fprs; \ | |
24 | 999: | |
25 | ||
26 | #ifdef MEMCPY_DEBUG | |
27 | #define VISEntryHalf FPU_ENTER; \ | |
28 | clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0; | |
29 | #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs | |
30 | #else | |
31 | #define VISEntryHalf FPU_ENTER | |
32 | #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs | |
33 | #endif | |
34 | ||
35 | #define GLOBAL_SPARE %g5 | |
36 | #endif | |
37 | ||
38 | #ifndef STORE_ASI | |
39 | #ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA | |
40 | #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P | |
41 | #else | |
42 | #define STORE_ASI 0x80 /* ASI_P */ | |
43 | #endif | |
44 | #endif | |
45 | ||
f4da3628 DM |
46 | #if !defined(EX_LD) && !defined(EX_ST) |
47 | #define NON_USER_COPY | |
48 | #endif | |
49 | ||
ae2c6ca6 | 50 | #ifndef EX_LD |
95707704 | 51 | #define EX_LD(x,y) x |
ae2c6ca6 | 52 | #endif |
a7c5724b | 53 | #ifndef EX_LD_FP |
95707704 | 54 | #define EX_LD_FP(x,y) x |
a7c5724b | 55 | #endif |
ae2c6ca6 DM |
56 | |
57 | #ifndef EX_ST | |
95707704 | 58 | #define EX_ST(x,y) x |
ae2c6ca6 | 59 | #endif |
a7c5724b | 60 | #ifndef EX_ST_FP |
95707704 | 61 | #define EX_ST_FP(x,y) x |
a7c5724b | 62 | #endif |
ae2c6ca6 | 63 | |
ae2c6ca6 DM |
64 | |
65 | #ifndef LOAD | |
66 | #define LOAD(type,addr,dest) type [addr], dest | |
67 | #endif | |
68 | ||
69 | #ifndef STORE | |
70 | #ifndef MEMCPY_DEBUG | |
71 | #define STORE(type,src,addr) type src, [addr] | |
72 | #else | |
73 | #define STORE(type,src,addr) type##a src, [addr] %asi | |
74 | #endif | |
75 | #endif | |
76 | ||
77 | #ifndef STORE_INIT | |
78 | #define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI | |
79 | #endif | |
80 | ||
81 | #ifndef FUNC_NAME | |
82 | #define FUNC_NAME NG4memcpy | |
83 | #endif | |
84 | #ifndef PREAMBLE | |
85 | #define PREAMBLE | |
86 | #endif | |
87 | ||
88 | #ifndef XCC | |
89 | #define XCC xcc | |
90 | #endif | |
91 | ||
92 | .register %g2,#scratch | |
93 | .register %g3,#scratch | |
94 | ||
95 | .text | |
95707704 DM |
96 | #ifndef EX_RETVAL |
97 | #define EX_RETVAL(x) x | |
95707704 | 98 | #endif |
ae2c6ca6 DM |
99 | .align 64 |
100 | ||
101 | .globl FUNC_NAME | |
102 | .type FUNC_NAME,#function | |
103 | FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |
104 | #ifdef MEMCPY_DEBUG | |
105 | wr %g0, 0x80, %asi | |
106 | #endif | |
107 | srlx %o2, 31, %g2 | |
108 | cmp %g2, 0 | |
109 | tne %XCC, 5 | |
110 | PREAMBLE | |
111 | mov %o0, %o3 | |
112 | brz,pn %o2, .Lexit | |
113 | cmp %o2, 3 | |
114 | ble,pn %icc, .Ltiny | |
115 | cmp %o2, 19 | |
116 | ble,pn %icc, .Lsmall | |
117 | or %o0, %o1, %g2 | |
118 | cmp %o2, 128 | |
119 | bl,pn %icc, .Lmedium | |
120 | nop | |
121 | ||
122 | .Llarge:/* len >= 0x80 */ | |
123 | /* First get dest 8 byte aligned. */ | |
124 | sub %g0, %o0, %g1 | |
125 | and %g1, 0x7, %g1 | |
126 | brz,pt %g1, 51f | |
127 | sub %o2, %g1, %o2 | |
42a4172b | 128 | |
95707704 | 129 | |
1ab32693 | 130 | 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1) |
ae2c6ca6 DM |
131 | add %o1, 1, %o1 |
132 | subcc %g1, 1, %g1 | |
133 | add %o0, 1, %o0 | |
134 | bne,pt %icc, 1b | |
1ab32693 | 135 | EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1) |
ae2c6ca6 DM |
136 | |
137 | 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) | |
138 | LOAD(prefetch, %o1 + 0x080, #n_reads_strong) | |
139 | LOAD(prefetch, %o1 + 0x0c0, #n_reads_strong) | |
140 | LOAD(prefetch, %o1 + 0x100, #n_reads_strong) | |
141 | LOAD(prefetch, %o1 + 0x140, #n_reads_strong) | |
142 | LOAD(prefetch, %o1 + 0x180, #n_reads_strong) | |
143 | LOAD(prefetch, %o1 + 0x1c0, #n_reads_strong) | |
144 | LOAD(prefetch, %o1 + 0x200, #n_reads_strong) | |
145 | ||
146 | /* Check if we can use the straight fully aligned | |
147 | * loop, or we require the alignaddr/faligndata variant. | |
148 | */ | |
149 | andcc %o1, 0x7, %o5 | |
150 | bne,pn %icc, .Llarge_src_unaligned | |
151 | sub %g0, %o0, %g1 | |
152 | ||
153 | /* Legitimize the use of initializing stores by getting dest | |
154 | * to be 64-byte aligned. | |
155 | */ | |
156 | and %g1, 0x3f, %g1 | |
157 | brz,pt %g1, .Llarge_aligned | |
158 | sub %o2, %g1, %o2 | |
42a4172b | 159 | |
1ab32693 | 160 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1) |
ae2c6ca6 DM |
161 | add %o1, 8, %o1 |
162 | subcc %g1, 8, %g1 | |
163 | add %o0, 8, %o0 | |
164 | bne,pt %icc, 1b | |
1ab32693 | 165 | EX_ST(STORE(stx, %g2, %o0 - 0x08), memcpy_retl_o2_plus_g1_plus_8) |
ae2c6ca6 DM |
166 | |
167 | .Llarge_aligned: | |
168 | /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ | |
169 | andn %o2, 0x3f, %o4 | |
170 | sub %o2, %o4, %o2 | |
171 | ||
1ab32693 | 172 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o4) |
ae2c6ca6 | 173 | add %o1, 0x40, %o1 |
1ab32693 | 174 | EX_LD(LOAD(ldx, %o1 - 0x38, %g2), memcpy_retl_o2_plus_o4) |
ae2c6ca6 | 175 | subcc %o4, 0x40, %o4 |
1ab32693 BM |
176 | EX_LD(LOAD(ldx, %o1 - 0x30, %g3), memcpy_retl_o2_plus_o4_plus_64) |
177 | EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), memcpy_retl_o2_plus_o4_plus_64) | |
178 | EX_LD(LOAD(ldx, %o1 - 0x20, %o5), memcpy_retl_o2_plus_o4_plus_64) | |
179 | EX_ST(STORE_INIT(%g1, %o0), memcpy_retl_o2_plus_o4_plus_64) | |
ae2c6ca6 | 180 | add %o0, 0x08, %o0 |
1ab32693 | 181 | EX_ST(STORE_INIT(%g2, %o0), memcpy_retl_o2_plus_o4_plus_56) |
ae2c6ca6 | 182 | add %o0, 0x08, %o0 |
1ab32693 BM |
183 | EX_LD(LOAD(ldx, %o1 - 0x18, %g2), memcpy_retl_o2_plus_o4_plus_48) |
184 | EX_ST(STORE_INIT(%g3, %o0), memcpy_retl_o2_plus_o4_plus_48) | |
ae2c6ca6 | 185 | add %o0, 0x08, %o0 |
1ab32693 BM |
186 | EX_LD(LOAD(ldx, %o1 - 0x10, %g3), memcpy_retl_o2_plus_o4_plus_40) |
187 | EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), memcpy_retl_o2_plus_o4_plus_40) | |
ae2c6ca6 | 188 | add %o0, 0x08, %o0 |
1ab32693 BM |
189 | EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), memcpy_retl_o2_plus_o4_plus_32) |
190 | EX_ST(STORE_INIT(%o5, %o0), memcpy_retl_o2_plus_o4_plus_32) | |
ae2c6ca6 | 191 | add %o0, 0x08, %o0 |
1ab32693 | 192 | EX_ST(STORE_INIT(%g2, %o0), memcpy_retl_o2_plus_o4_plus_24) |
ae2c6ca6 | 193 | add %o0, 0x08, %o0 |
1ab32693 | 194 | EX_ST(STORE_INIT(%g3, %o0), memcpy_retl_o2_plus_o4_plus_16) |
ae2c6ca6 | 195 | add %o0, 0x08, %o0 |
1ab32693 | 196 | EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), memcpy_retl_o2_plus_o4_plus_8) |
ae2c6ca6 DM |
197 | add %o0, 0x08, %o0 |
198 | bne,pt %icc, 1b | |
199 | LOAD(prefetch, %o1 + 0x200, #n_reads_strong) | |
200 | ||
201 | membar #StoreLoad | #StoreStore | |
202 | ||
203 | brz,pn %o2, .Lexit | |
204 | cmp %o2, 19 | |
205 | ble,pn %icc, .Lsmall_unaligned | |
206 | nop | |
207 | ba,a,pt %icc, .Lmedium_noprefetch | |
208 | ||
209 | .Lexit: retl | |
210 | mov EX_RETVAL(%o3), %o0 | |
211 | ||
212 | .Llarge_src_unaligned: | |
f4da3628 DM |
213 | #ifdef NON_USER_COPY |
214 | VISEntryHalfFast(.Lmedium_vis_entry_fail) | |
215 | #else | |
216 | VISEntryHalf | |
217 | #endif | |
ae2c6ca6 DM |
218 | andn %o2, 0x3f, %o4 |
219 | sub %o2, %o4, %o2 | |
ae2c6ca6 DM |
220 | alignaddr %o1, %g0, %g1 |
221 | add %o1, %o4, %o1 | |
1ab32693 BM |
222 | EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), memcpy_retl_o2_plus_o4) |
223 | 1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), memcpy_retl_o2_plus_o4) | |
ae2c6ca6 | 224 | subcc %o4, 0x40, %o4 |
1ab32693 BM |
225 | EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), memcpy_retl_o2_plus_o4_plus_64) |
226 | EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), memcpy_retl_o2_plus_o4_plus_64) | |
227 | EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), memcpy_retl_o2_plus_o4_plus_64) | |
228 | EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), memcpy_retl_o2_plus_o4_plus_64) | |
229 | EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), memcpy_retl_o2_plus_o4_plus_64) | |
230 | EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), memcpy_retl_o2_plus_o4_plus_64) | |
ae2c6ca6 | 231 | faligndata %f0, %f2, %f16 |
1ab32693 | 232 | EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), memcpy_retl_o2_plus_o4_plus_64) |
ae2c6ca6 DM |
233 | faligndata %f2, %f4, %f18 |
234 | add %g1, 0x40, %g1 | |
235 | faligndata %f4, %f6, %f20 | |
236 | faligndata %f6, %f8, %f22 | |
237 | faligndata %f8, %f10, %f24 | |
238 | faligndata %f10, %f12, %f26 | |
239 | faligndata %f12, %f14, %f28 | |
240 | faligndata %f14, %f0, %f30 | |
1ab32693 BM |
241 | EX_ST_FP(STORE(std, %f16, %o0 + 0x00), memcpy_retl_o2_plus_o4_plus_64) |
242 | EX_ST_FP(STORE(std, %f18, %o0 + 0x08), memcpy_retl_o2_plus_o4_plus_56) | |
243 | EX_ST_FP(STORE(std, %f20, %o0 + 0x10), memcpy_retl_o2_plus_o4_plus_48) | |
244 | EX_ST_FP(STORE(std, %f22, %o0 + 0x18), memcpy_retl_o2_plus_o4_plus_40) | |
245 | EX_ST_FP(STORE(std, %f24, %o0 + 0x20), memcpy_retl_o2_plus_o4_plus_32) | |
246 | EX_ST_FP(STORE(std, %f26, %o0 + 0x28), memcpy_retl_o2_plus_o4_plus_24) | |
247 | EX_ST_FP(STORE(std, %f28, %o0 + 0x30), memcpy_retl_o2_plus_o4_plus_16) | |
248 | EX_ST_FP(STORE(std, %f30, %o0 + 0x38), memcpy_retl_o2_plus_o4_plus_8) | |
ae2c6ca6 DM |
249 | add %o0, 0x40, %o0 |
250 | bne,pt %icc, 1b | |
251 | LOAD(prefetch, %g1 + 0x200, #n_reads_strong) | |
44922150 DM |
252 | #ifdef NON_USER_COPY |
253 | VISExitHalfFast | |
254 | #else | |
ae2c6ca6 | 255 | VISExitHalf |
44922150 | 256 | #endif |
ae2c6ca6 DM |
257 | brz,pn %o2, .Lexit |
258 | cmp %o2, 19 | |
259 | ble,pn %icc, .Lsmall_unaligned | |
260 | nop | |
261 | ba,a,pt %icc, .Lmedium_unaligned | |
262 | ||
f4da3628 DM |
263 | #ifdef NON_USER_COPY |
264 | .Lmedium_vis_entry_fail: | |
265 | or %o0, %o1, %g2 | |
266 | #endif | |
ae2c6ca6 DM |
267 | .Lmedium: |
268 | LOAD(prefetch, %o1 + 0x40, #n_reads_strong) | |
269 | andcc %g2, 0x7, %g0 | |
270 | bne,pn %icc, .Lmedium_unaligned | |
271 | nop | |
272 | .Lmedium_noprefetch: | |
273 | andncc %o2, 0x20 - 1, %o5 | |
274 | be,pn %icc, 2f | |
275 | sub %o2, %o5, %o2 | |
1ab32693 BM |
276 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5) |
277 | EX_LD(LOAD(ldx, %o1 + 0x08, %g2), memcpy_retl_o2_plus_o5) | |
278 | EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), memcpy_retl_o2_plus_o5) | |
279 | EX_LD(LOAD(ldx, %o1 + 0x18, %o4), memcpy_retl_o2_plus_o5) | |
ae2c6ca6 DM |
280 | add %o1, 0x20, %o1 |
281 | subcc %o5, 0x20, %o5 | |
1ab32693 BM |
282 | EX_ST(STORE(stx, %g1, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_32) |
283 | EX_ST(STORE(stx, %g2, %o0 + 0x08), memcpy_retl_o2_plus_o5_plus_24) | |
284 | EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), memcpy_retl_o2_plus_o5_plus_24) | |
285 | EX_ST(STORE(stx, %o4, %o0 + 0x18), memcpy_retl_o2_plus_o5_plus_8) | |
ae2c6ca6 DM |
286 | bne,pt %icc, 1b |
287 | add %o0, 0x20, %o0 | |
288 | 2: andcc %o2, 0x18, %o5 | |
289 | be,pt %icc, 3f | |
290 | sub %o2, %o5, %o2 | |
95707704 | 291 | |
1ab32693 | 292 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5) |
ae2c6ca6 DM |
293 | add %o1, 0x08, %o1 |
294 | add %o0, 0x08, %o0 | |
295 | subcc %o5, 0x08, %o5 | |
296 | bne,pt %icc, 1b | |
1ab32693 | 297 | EX_ST(STORE(stx, %g1, %o0 - 0x08), memcpy_retl_o2_plus_o5_plus_8) |
ae2c6ca6 DM |
298 | 3: brz,pt %o2, .Lexit |
299 | cmp %o2, 0x04 | |
300 | bl,pn %icc, .Ltiny | |
301 | nop | |
1ab32693 | 302 | EX_LD(LOAD(lduw, %o1 + 0x00, %g1), memcpy_retl_o2) |
ae2c6ca6 DM |
303 | add %o1, 0x04, %o1 |
304 | add %o0, 0x04, %o0 | |
305 | subcc %o2, 0x04, %o2 | |
306 | bne,pn %icc, .Ltiny | |
1ab32693 | 307 | EX_ST(STORE(stw, %g1, %o0 - 0x04), memcpy_retl_o2_plus_4) |
ae2c6ca6 DM |
308 | ba,a,pt %icc, .Lexit |
309 | .Lmedium_unaligned: | |
310 | /* First get dest 8 byte aligned. */ | |
311 | sub %g0, %o0, %g1 | |
312 | and %g1, 0x7, %g1 | |
313 | brz,pt %g1, 2f | |
314 | sub %o2, %g1, %o2 | |
42a4172b | 315 | |
1ab32693 | 316 | 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1) |
ae2c6ca6 DM |
317 | add %o1, 1, %o1 |
318 | subcc %g1, 1, %g1 | |
319 | add %o0, 1, %o0 | |
320 | bne,pt %icc, 1b | |
1ab32693 | 321 | EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1) |
ae2c6ca6 DM |
322 | 2: |
323 | and %o1, 0x7, %g1 | |
324 | brz,pn %g1, .Lmedium_noprefetch | |
325 | sll %g1, 3, %g1 | |
326 | mov 64, %g2 | |
327 | sub %g2, %g1, %g2 | |
328 | andn %o1, 0x7, %o1 | |
1ab32693 | 329 | EX_LD(LOAD(ldx, %o1 + 0x00, %o4), memcpy_retl_o2) |
ae2c6ca6 DM |
330 | sllx %o4, %g1, %o4 |
331 | andn %o2, 0x08 - 1, %o5 | |
332 | sub %o2, %o5, %o2 | |
1ab32693 | 333 | 1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), memcpy_retl_o2_plus_o5) |
ae2c6ca6 DM |
334 | add %o1, 0x08, %o1 |
335 | subcc %o5, 0x08, %o5 | |
336 | srlx %g3, %g2, GLOBAL_SPARE | |
337 | or GLOBAL_SPARE, %o4, GLOBAL_SPARE | |
1ab32693 | 338 | EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_8) |
ae2c6ca6 DM |
339 | add %o0, 0x08, %o0 |
340 | bne,pt %icc, 1b | |
341 | sllx %g3, %g1, %o4 | |
342 | srl %g1, 3, %g1 | |
343 | add %o1, %g1, %o1 | |
344 | brz,pn %o2, .Lexit | |
345 | nop | |
346 | ba,pt %icc, .Lsmall_unaligned | |
347 | ||
348 | .Ltiny: | |
1ab32693 | 349 | EX_LD(LOAD(ldub, %o1 + 0x00, %g1), memcpy_retl_o2) |
ae2c6ca6 DM |
350 | subcc %o2, 1, %o2 |
351 | be,pn %icc, .Lexit | |
1ab32693 BM |
352 | EX_ST(STORE(stb, %g1, %o0 + 0x00), memcpy_retl_o2_plus_1) |
353 | EX_LD(LOAD(ldub, %o1 + 0x01, %g1), memcpy_retl_o2) | |
ae2c6ca6 DM |
354 | subcc %o2, 1, %o2 |
355 | be,pn %icc, .Lexit | |
1ab32693 BM |
356 | EX_ST(STORE(stb, %g1, %o0 + 0x01), memcpy_retl_o2_plus_1) |
357 | EX_LD(LOAD(ldub, %o1 + 0x02, %g1), memcpy_retl_o2) | |
ae2c6ca6 | 358 | ba,pt %icc, .Lexit |
1ab32693 | 359 | EX_ST(STORE(stb, %g1, %o0 + 0x02), memcpy_retl_o2) |
ae2c6ca6 DM |
360 | |
361 | .Lsmall: | |
362 | andcc %g2, 0x3, %g0 | |
363 | bne,pn %icc, .Lsmall_unaligned | |
364 | andn %o2, 0x4 - 1, %o5 | |
365 | sub %o2, %o5, %o2 | |
366 | 1: | |
1ab32693 | 367 | EX_LD(LOAD(lduw, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5) |
ae2c6ca6 DM |
368 | add %o1, 0x04, %o1 |
369 | subcc %o5, 0x04, %o5 | |
370 | add %o0, 0x04, %o0 | |
371 | bne,pt %icc, 1b | |
1ab32693 | 372 | EX_ST(STORE(stw, %g1, %o0 - 0x04), memcpy_retl_o2_plus_o5_plus_4) |
ae2c6ca6 DM |
373 | brz,pt %o2, .Lexit |
374 | nop | |
375 | ba,a,pt %icc, .Ltiny | |
376 | ||
377 | .Lsmall_unaligned: | |
1ab32693 | 378 | 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), memcpy_retl_o2) |
ae2c6ca6 DM |
379 | add %o1, 1, %o1 |
380 | add %o0, 1, %o0 | |
381 | subcc %o2, 1, %o2 | |
382 | bne,pt %icc, 1b | |
1ab32693 | 383 | EX_ST(STORE(stb, %g1, %o0 - 0x01), memcpy_retl_o2_plus_1) |
ae2c6ca6 | 384 | ba,a,pt %icc, .Lexit |
0ae2d26f | 385 | nop |
ae2c6ca6 | 386 | .size FUNC_NAME, .-FUNC_NAME |