]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * linux/arch/arm/lib/memset.S | |
3 | * | |
4 | * Copyright (C) 1995-2000 Russell King | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 as | |
8 | * published by the Free Software Foundation. | |
9 | * | |
10 | * ASM optimised string functions | |
11 | */ | |
12 | #include <linux/linkage.h> | |
13 | #include <asm/assembler.h> | |
c2459d35 | 14 | #include <asm/unwind.h> |
1da177e4 LT |
15 | |
16 | .text | |
17 | .align 5 | |
1da177e4 | 18 | |
1bd46782 | 19 | ENTRY(mmioset) |
1da177e4 | 20 | ENTRY(memset) |
c2459d35 | 21 | UNWIND( .fnstart ) |
418df63a NP |
22 | ands r3, r0, #3 @ 1 unaligned? |
23 | mov ip, r0 @ preserve r0 as return value | |
24 | bne 6f @ 1 | |
1da177e4 | 25 | /* |
455bd4c4 | 26 | * we know that the pointer in ip is aligned to a word boundary. |
1da177e4 | 27 | */ |
418df63a | 28 | 1: orr r1, r1, r1, lsl #8 |
1da177e4 LT |
29 | orr r1, r1, r1, lsl #16 |
30 | mov r3, r1 | |
31 | cmp r2, #16 | |
32 | blt 4f | |
f91a8dcc NP |
33 | |
34 | #if ! CALGN(1)+0 | |
35 | ||
1da177e4 | 36 | /* |
455bd4c4 | 37 | * We need 2 extra registers for this loop - use r8 and the LR |
1da177e4 | 38 | */ |
455bd4c4 | 39 | stmfd sp!, {r8, lr} |
c2459d35 LY |
40 | UNWIND( .fnend ) |
41 | UNWIND( .fnstart ) | |
42 | UNWIND( .save {r8, lr} ) | |
455bd4c4 | 43 | mov r8, r1 |
1da177e4 LT |
44 | mov lr, r1 |
45 | ||
46 | 2: subs r2, r2, #64 | |
455bd4c4 ID |
47 | stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time. |
48 | stmgeia ip!, {r1, r3, r8, lr} | |
49 | stmgeia ip!, {r1, r3, r8, lr} | |
50 | stmgeia ip!, {r1, r3, r8, lr} | |
1da177e4 | 51 | bgt 2b |
455bd4c4 | 52 | ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go. |
1da177e4 LT |
53 | /* |
54 | * No need to correct the count; we're only testing bits from now on | |
55 | */ | |
56 | tst r2, #32 | |
455bd4c4 ID |
57 | stmneia ip!, {r1, r3, r8, lr} |
58 | stmneia ip!, {r1, r3, r8, lr} | |
1da177e4 | 59 | tst r2, #16 |
455bd4c4 ID |
60 | stmneia ip!, {r1, r3, r8, lr} |
61 | ldmfd sp!, {r8, lr} | |
c2459d35 | 62 | UNWIND( .fnend ) |
1da177e4 | 63 | |
f91a8dcc NP |
64 | #else |
65 | ||
66 | /* | |
67 | * This version aligns the destination pointer in order to write | |
68 | * whole cache lines at once. | |
69 | */ | |
70 | ||
455bd4c4 | 71 | stmfd sp!, {r4-r8, lr} |
c2459d35 LY |
72 | UNWIND( .fnend ) |
73 | UNWIND( .fnstart ) | |
74 | UNWIND( .save {r4-r8, lr} ) | |
f91a8dcc NP |
75 | mov r4, r1 |
76 | mov r5, r1 | |
77 | mov r6, r1 | |
78 | mov r7, r1 | |
455bd4c4 | 79 | mov r8, r1 |
f91a8dcc NP |
80 | mov lr, r1 |
81 | ||
82 | cmp r2, #96 | |
455bd4c4 | 83 | tstgt ip, #31 |
f91a8dcc NP |
84 | ble 3f |
85 | ||
455bd4c4 ID |
86 | and r8, ip, #31 |
87 | rsb r8, r8, #32 | |
88 | sub r2, r2, r8 | |
89 | movs r8, r8, lsl #(32 - 4) | |
90 | stmcsia ip!, {r4, r5, r6, r7} | |
91 | stmmiia ip!, {r4, r5} | |
92 | tst r8, #(1 << 30) | |
93 | mov r8, r1 | |
94 | strne r1, [ip], #4 | |
f91a8dcc NP |
95 | |
96 | 3: subs r2, r2, #64 | |
455bd4c4 ID |
97 | stmgeia ip!, {r1, r3-r8, lr} |
98 | stmgeia ip!, {r1, r3-r8, lr} | |
f91a8dcc | 99 | bgt 3b |
455bd4c4 | 100 | ldmeqfd sp!, {r4-r8, pc} |
f91a8dcc NP |
101 | |
102 | tst r2, #32 | |
455bd4c4 | 103 | stmneia ip!, {r1, r3-r8, lr} |
f91a8dcc | 104 | tst r2, #16 |
455bd4c4 ID |
105 | stmneia ip!, {r4-r7} |
106 | ldmfd sp!, {r4-r8, lr} | |
c2459d35 | 107 | UNWIND( .fnend ) |
f91a8dcc NP |
108 | |
109 | #endif | |
110 | ||
c2459d35 | 111 | UNWIND( .fnstart ) |
1da177e4 | 112 | 4: tst r2, #8 |
455bd4c4 | 113 | stmneia ip!, {r1, r3} |
1da177e4 | 114 | tst r2, #4 |
455bd4c4 | 115 | strne r1, [ip], #4 |
1da177e4 LT |
116 | /* |
117 | * When we get here, we've got less than 4 bytes to zero. We | |
118 | * may have an unaligned pointer as well. | |
119 | */ | |
120 | 5: tst r2, #2 | |
455bd4c4 ID |
121 | strneb r1, [ip], #1 |
122 | strneb r1, [ip], #1 | |
1da177e4 | 123 | tst r2, #1 |
455bd4c4 | 124 | strneb r1, [ip], #1 |
6ebbf2ce | 125 | ret lr |
418df63a NP |
126 | |
127 | 6: subs r2, r2, #4 @ 1 do we have enough | |
128 | blt 5b @ 1 bytes to align with? | |
129 | cmp r3, #2 @ 1 | |
130 | strltb r1, [ip], #1 @ 1 | |
131 | strleb r1, [ip], #1 @ 1 | |
132 | strb r1, [ip], #1 @ 1 | |
133 | add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) | |
134 | b 1b | |
c2459d35 | 135 | UNWIND( .fnend ) |
93ed3970 | 136 | ENDPROC(memset) |
1bd46782 | 137 | ENDPROC(mmioset) |