]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * arch/ia64/lib/xor.S | |
3 | * | |
4 | * Optimized RAID-5 checksumming functions for IA-64. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2, or (at your option) | |
9 | * any later version. | |
10 | * | |
11 | * You should have received a copy of the GNU General Public License | |
12 | * (for example /usr/src/linux/COPYING); if not, write to the Free | |
13 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
14 | */ | |
15 | ||
16 | #include <asm/asmmacro.h> | |
17 | ||
18 | GLOBAL_ENTRY(xor_ia64_2) | |
19 | .prologue | |
20 | .fframe 0 | |
21 | .save ar.pfs, r31 | |
22 | alloc r31 = ar.pfs, 3, 0, 13, 16 | |
23 | .save ar.lc, r30 | |
24 | mov r30 = ar.lc | |
25 | .save pr, r29 | |
26 | mov r29 = pr | |
27 | ;; | |
28 | .body | |
29 | mov r8 = in1 | |
30 | mov ar.ec = 6 + 2 | |
31 | shr in0 = in0, 3 | |
32 | ;; | |
33 | adds in0 = -1, in0 | |
34 | mov r16 = in1 | |
35 | mov r17 = in2 | |
36 | ;; | |
37 | mov ar.lc = in0 | |
38 | mov pr.rot = 1 << 16 | |
39 | ;; | |
40 | .rotr s1[6+1], s2[6+1], d[2] | |
41 | .rotp p[6+2] | |
42 | 0: | |
43 | (p[0]) ld8.nta s1[0] = [r16], 8 | |
44 | (p[0]) ld8.nta s2[0] = [r17], 8 | |
45 | (p[6]) xor d[0] = s1[6], s2[6] | |
46 | (p[6+1])st8.nta [r8] = d[1], 8 | |
47 | nop.f 0 | |
48 | br.ctop.dptk.few 0b | |
49 | ;; | |
50 | mov ar.lc = r30 | |
51 | mov pr = r29, -1 | |
52 | br.ret.sptk.few rp | |
53 | END(xor_ia64_2) | |
54 | ||
55 | GLOBAL_ENTRY(xor_ia64_3) | |
56 | .prologue | |
57 | .fframe 0 | |
58 | .save ar.pfs, r31 | |
59 | alloc r31 = ar.pfs, 4, 0, 20, 24 | |
60 | .save ar.lc, r30 | |
61 | mov r30 = ar.lc | |
62 | .save pr, r29 | |
63 | mov r29 = pr | |
64 | ;; | |
65 | .body | |
66 | mov r8 = in1 | |
67 | mov ar.ec = 6 + 2 | |
68 | shr in0 = in0, 3 | |
69 | ;; | |
70 | adds in0 = -1, in0 | |
71 | mov r16 = in1 | |
72 | mov r17 = in2 | |
73 | ;; | |
74 | mov r18 = in3 | |
75 | mov ar.lc = in0 | |
76 | mov pr.rot = 1 << 16 | |
77 | ;; | |
78 | .rotr s1[6+1], s2[6+1], s3[6+1], d[2] | |
79 | .rotp p[6+2] | |
80 | 0: | |
81 | (p[0]) ld8.nta s1[0] = [r16], 8 | |
82 | (p[0]) ld8.nta s2[0] = [r17], 8 | |
83 | (p[6]) xor d[0] = s1[6], s2[6] | |
84 | ;; | |
85 | (p[0]) ld8.nta s3[0] = [r18], 8 | |
86 | (p[6+1])st8.nta [r8] = d[1], 8 | |
87 | (p[6]) xor d[0] = d[0], s3[6] | |
88 | br.ctop.dptk.few 0b | |
89 | ;; | |
90 | mov ar.lc = r30 | |
91 | mov pr = r29, -1 | |
92 | br.ret.sptk.few rp | |
93 | END(xor_ia64_3) | |
94 | ||
95 | GLOBAL_ENTRY(xor_ia64_4) | |
96 | .prologue | |
97 | .fframe 0 | |
98 | .save ar.pfs, r31 | |
99 | alloc r31 = ar.pfs, 5, 0, 27, 32 | |
100 | .save ar.lc, r30 | |
101 | mov r30 = ar.lc | |
102 | .save pr, r29 | |
103 | mov r29 = pr | |
104 | ;; | |
105 | .body | |
106 | mov r8 = in1 | |
107 | mov ar.ec = 6 + 2 | |
108 | shr in0 = in0, 3 | |
109 | ;; | |
110 | adds in0 = -1, in0 | |
111 | mov r16 = in1 | |
112 | mov r17 = in2 | |
113 | ;; | |
114 | mov r18 = in3 | |
115 | mov ar.lc = in0 | |
116 | mov pr.rot = 1 << 16 | |
117 | mov r19 = in4 | |
118 | ;; | |
119 | .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2] | |
120 | .rotp p[6+2] | |
121 | 0: | |
122 | (p[0]) ld8.nta s1[0] = [r16], 8 | |
123 | (p[0]) ld8.nta s2[0] = [r17], 8 | |
124 | (p[6]) xor d[0] = s1[6], s2[6] | |
125 | (p[0]) ld8.nta s3[0] = [r18], 8 | |
126 | (p[0]) ld8.nta s4[0] = [r19], 8 | |
127 | (p[6]) xor r20 = s3[6], s4[6] | |
128 | ;; | |
129 | (p[6+1])st8.nta [r8] = d[1], 8 | |
130 | (p[6]) xor d[0] = d[0], r20 | |
131 | br.ctop.dptk.few 0b | |
132 | ;; | |
133 | mov ar.lc = r30 | |
134 | mov pr = r29, -1 | |
135 | br.ret.sptk.few rp | |
136 | END(xor_ia64_4) | |
137 | ||
138 | GLOBAL_ENTRY(xor_ia64_5) | |
139 | .prologue | |
140 | .fframe 0 | |
141 | .save ar.pfs, r31 | |
142 | alloc r31 = ar.pfs, 6, 0, 34, 40 | |
143 | .save ar.lc, r30 | |
144 | mov r30 = ar.lc | |
145 | .save pr, r29 | |
146 | mov r29 = pr | |
147 | ;; | |
148 | .body | |
149 | mov r8 = in1 | |
150 | mov ar.ec = 6 + 2 | |
151 | shr in0 = in0, 3 | |
152 | ;; | |
153 | adds in0 = -1, in0 | |
154 | mov r16 = in1 | |
155 | mov r17 = in2 | |
156 | ;; | |
157 | mov r18 = in3 | |
158 | mov ar.lc = in0 | |
159 | mov pr.rot = 1 << 16 | |
160 | mov r19 = in4 | |
161 | mov r20 = in5 | |
162 | ;; | |
163 | .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2] | |
164 | .rotp p[6+2] | |
165 | 0: | |
166 | (p[0]) ld8.nta s1[0] = [r16], 8 | |
167 | (p[0]) ld8.nta s2[0] = [r17], 8 | |
168 | (p[6]) xor d[0] = s1[6], s2[6] | |
169 | (p[0]) ld8.nta s3[0] = [r18], 8 | |
170 | (p[0]) ld8.nta s4[0] = [r19], 8 | |
171 | (p[6]) xor r21 = s3[6], s4[6] | |
172 | ;; | |
173 | (p[0]) ld8.nta s5[0] = [r20], 8 | |
174 | (p[6+1])st8.nta [r8] = d[1], 8 | |
175 | (p[6]) xor d[0] = d[0], r21 | |
176 | ;; | |
177 | (p[6]) xor d[0] = d[0], s5[6] | |
178 | nop.f 0 | |
179 | br.ctop.dptk.few 0b | |
180 | ;; | |
181 | mov ar.lc = r30 | |
182 | mov pr = r29, -1 | |
183 | br.ret.sptk.few rp | |
184 | END(xor_ia64_5) |