]> git.proxmox.com Git - wasi-libc.git/blob - libc-top-half/musl/src/string/aarch64/memset.S
Update to musl 1.2.1. (#222)
[wasi-libc.git] / libc-top-half / musl / src / string / aarch64 / memset.S
1 /*
2 * memset - fill memory with a constant byte
3 *
4 * Copyright (c) 2012-2020, Arm Limited.
5 * SPDX-License-Identifier: MIT
6 */
7
8 /* Assumptions:
9 *
10 * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
11 *
12 */
13
14 #define dstin x0
15 #define val x1
16 #define valw w1
17 #define count x2
18 #define dst x3
19 #define dstend x4
20 #define zva_val x5
21
22 .global memset
23 .type memset,%function
24 memset:
25
26 dup v0.16B, valw
27 add dstend, dstin, count
28
29 cmp count, 96
30 b.hi .Lset_long
31 cmp count, 16
32 b.hs .Lset_medium
33 mov val, v0.D[0]
34
35 /* Set 0..15 bytes. */
36 tbz count, 3, 1f
37 str val, [dstin]
38 str val, [dstend, -8]
39 ret
40 nop
41 1: tbz count, 2, 2f
42 str valw, [dstin]
43 str valw, [dstend, -4]
44 ret
45 2: cbz count, 3f
46 strb valw, [dstin]
47 tbz count, 1, 3f
48 strh valw, [dstend, -2]
49 3: ret
50
51 /* Set 17..96 bytes. */
52 .Lset_medium:
53 str q0, [dstin]
54 tbnz count, 6, .Lset96
55 str q0, [dstend, -16]
56 tbz count, 5, 1f
57 str q0, [dstin, 16]
58 str q0, [dstend, -32]
59 1: ret
60
61 .p2align 4
62 /* Set 64..96 bytes. Write 64 bytes from the start and
63 32 bytes from the end. */
64 .Lset96:
65 str q0, [dstin, 16]
66 stp q0, q0, [dstin, 32]
67 stp q0, q0, [dstend, -32]
68 ret
69
70 .p2align 4
71 .Lset_long:
72 and valw, valw, 255
73 bic dst, dstin, 15
74 str q0, [dstin]
75 cmp count, 160
76 ccmp valw, 0, 0, hs
77 b.ne .Lno_zva
78
79 #ifndef SKIP_ZVA_CHECK
80 mrs zva_val, dczid_el0
81 and zva_val, zva_val, 31
82 cmp zva_val, 4 /* ZVA size is 64 bytes. */
83 b.ne .Lno_zva
84 #endif
85 str q0, [dst, 16]
86 stp q0, q0, [dst, 32]
87 bic dst, dst, 63
88 sub count, dstend, dst /* Count is now 64 too large. */
89 sub count, count, 128 /* Adjust count and bias for loop. */
90
91 .p2align 4
92 .Lzva_loop:
93 add dst, dst, 64
94 dc zva, dst
95 subs count, count, 64
96 b.hi .Lzva_loop
97 stp q0, q0, [dstend, -64]
98 stp q0, q0, [dstend, -32]
99 ret
100
101 .Lno_zva:
102 sub count, dstend, dst /* Count is 16 too large. */
103 sub dst, dst, 16 /* Dst is biased by -32. */
104 sub count, count, 64 + 16 /* Adjust count and bias for loop. */
105 .Lno_zva_loop:
106 stp q0, q0, [dst, 32]
107 stp q0, q0, [dst, 64]!
108 subs count, count, 64
109 b.hi .Lno_zva_loop
110 stp q0, q0, [dstend, -64]
111 stp q0, q0, [dstend, -32]
112 ret
113
114 .size memset,.-memset
115