]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * This file is subject to the terms and conditions of the GNU General Public | |
3 | * License. See the file "COPYING" in the main directory of this archive | |
4 | * for more details. | |
5 | * | |
6 | * Copyright (C) 1998 Ralf Baechle | |
7 | */ | |
8 | #include <asm/asm.h> | |
9 | #include <asm/regdef.h> | |
10 | ||
11 | #define ADDC(sum,reg) \ | |
12 | addu sum, reg; \ | |
13 | sltu v1, sum, reg; \ | |
14 | addu sum, v1 | |
15 | ||
16 | #define CSUM_BIGCHUNK(src, offset, sum, t0, t1, t2, t3) \ | |
17 | lw t0, (offset + 0x00)(src); \ | |
18 | lw t1, (offset + 0x04)(src); \ | |
19 | lw t2, (offset + 0x08)(src); \ | |
20 | lw t3, (offset + 0x0c)(src); \ | |
21 | ADDC(sum, t0); \ | |
22 | ADDC(sum, t1); \ | |
23 | ADDC(sum, t2); \ | |
24 | ADDC(sum, t3); \ | |
25 | lw t0, (offset + 0x10)(src); \ | |
26 | lw t1, (offset + 0x14)(src); \ | |
27 | lw t2, (offset + 0x18)(src); \ | |
28 | lw t3, (offset + 0x1c)(src); \ | |
29 | ADDC(sum, t0); \ | |
30 | ADDC(sum, t1); \ | |
31 | ADDC(sum, t2); \ | |
32 | ADDC(sum, t3); \ | |
33 | ||
34 | /* | |
35 | * a0: source address | |
36 | * a1: length of the area to checksum | |
37 | * a2: partial checksum | |
38 | */ | |
39 | ||
40 | #define src a0 | |
41 | #define dest a1 | |
42 | #define sum v0 | |
43 | ||
44 | .text | |
45 | .set noreorder | |
46 | ||
47 | /* unknown src alignment and < 8 bytes to go */ | |
48 | small_csumcpy: | |
49 | move a1, t2 | |
50 | ||
51 | andi t0, a1, 4 | |
52 | beqz t0, 1f | |
53 | andi t0, a1, 2 | |
54 | ||
55 | /* Still a full word to go */ | |
56 | ulw t1, (src) | |
57 | addiu src, 4 | |
58 | ADDC(sum, t1) | |
59 | ||
60 | 1: move t1, zero | |
61 | beqz t0, 1f | |
62 | andi t0, a1, 1 | |
63 | ||
64 | /* Still a halfword to go */ | |
65 | ulhu t1, (src) | |
66 | addiu src, 2 | |
67 | ||
68 | 1: beqz t0, 1f | |
69 | sll t1, t1, 16 | |
70 | ||
71 | lbu t2, (src) | |
72 | nop | |
73 | ||
74 | #ifdef __MIPSEB__ | |
75 | sll t2, t2, 8 | |
76 | #endif | |
77 | or t1, t2 | |
78 | ||
79 | 1: ADDC(sum, t1) | |
80 | ||
81 | /* fold checksum */ | |
82 | sll v1, sum, 16 | |
83 | addu sum, v1 | |
84 | sltu v1, sum, v1 | |
85 | srl sum, sum, 16 | |
86 | addu sum, v1 | |
87 | ||
88 | /* odd buffer alignment? */ | |
89 | beqz t7, 1f | |
90 | nop | |
91 | sll v1, sum, 8 | |
92 | srl sum, sum, 8 | |
93 | or sum, v1 | |
94 | andi sum, 0xffff | |
95 | 1: | |
96 | .set reorder | |
97 | /* Add the passed partial csum. */ | |
98 | ADDC(sum, a2) | |
99 | jr ra | |
100 | .set noreorder | |
101 | ||
102 | /* ------------------------------------------------------------------------- */ | |
103 | ||
104 | .align 5 | |
105 | LEAF(csum_partial) | |
106 | move sum, zero | |
107 | move t7, zero | |
108 | ||
109 | sltiu t8, a1, 0x8 | |
110 | bnez t8, small_csumcpy /* < 8 bytes to copy */ | |
111 | move t2, a1 | |
112 | ||
113 | beqz a1, out | |
114 | andi t7, src, 0x1 /* odd buffer? */ | |
115 | ||
116 | hword_align: | |
117 | beqz t7, word_align | |
118 | andi t8, src, 0x2 | |
119 | ||
120 | lbu t0, (src) | |
121 | subu a1, a1, 0x1 | |
122 | #ifdef __MIPSEL__ | |
123 | sll t0, t0, 8 | |
124 | #endif | |
125 | ADDC(sum, t0) | |
126 | addu src, src, 0x1 | |
127 | andi t8, src, 0x2 | |
128 | ||
129 | word_align: | |
130 | beqz t8, dword_align | |
131 | sltiu t8, a1, 56 | |
132 | ||
133 | lhu t0, (src) | |
134 | subu a1, a1, 0x2 | |
135 | ADDC(sum, t0) | |
136 | sltiu t8, a1, 56 | |
137 | addu src, src, 0x2 | |
138 | ||
139 | dword_align: | |
140 | bnez t8, do_end_words | |
141 | move t8, a1 | |
142 | ||
143 | andi t8, src, 0x4 | |
144 | beqz t8, qword_align | |
145 | andi t8, src, 0x8 | |
146 | ||
147 | lw t0, 0x00(src) | |
148 | subu a1, a1, 0x4 | |
149 | ADDC(sum, t0) | |
150 | addu src, src, 0x4 | |
151 | andi t8, src, 0x8 | |
152 | ||
153 | qword_align: | |
154 | beqz t8, oword_align | |
155 | andi t8, src, 0x10 | |
156 | ||
157 | lw t0, 0x00(src) | |
158 | lw t1, 0x04(src) | |
159 | subu a1, a1, 0x8 | |
160 | ADDC(sum, t0) | |
161 | ADDC(sum, t1) | |
162 | addu src, src, 0x8 | |
163 | andi t8, src, 0x10 | |
164 | ||
165 | oword_align: | |
166 | beqz t8, begin_movement | |
167 | srl t8, a1, 0x7 | |
168 | ||
169 | lw t3, 0x08(src) | |
170 | lw t4, 0x0c(src) | |
171 | lw t0, 0x00(src) | |
172 | lw t1, 0x04(src) | |
173 | ADDC(sum, t3) | |
174 | ADDC(sum, t4) | |
175 | ADDC(sum, t0) | |
176 | ADDC(sum, t1) | |
177 | subu a1, a1, 0x10 | |
178 | addu src, src, 0x10 | |
179 | srl t8, a1, 0x7 | |
180 | ||
181 | begin_movement: | |
182 | beqz t8, 1f | |
183 | andi t2, a1, 0x40 | |
184 | ||
185 | move_128bytes: | |
186 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) | |
187 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) | |
188 | CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) | |
189 | CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4) | |
190 | subu t8, t8, 0x01 | |
191 | bnez t8, move_128bytes | |
192 | addu src, src, 0x80 | |
193 | ||
194 | 1: | |
195 | beqz t2, 1f | |
196 | andi t2, a1, 0x20 | |
197 | ||
198 | move_64bytes: | |
199 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) | |
200 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) | |
201 | addu src, src, 0x40 | |
202 | ||
203 | 1: | |
204 | beqz t2, do_end_words | |
205 | andi t8, a1, 0x1c | |
206 | ||
207 | move_32bytes: | |
208 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) | |
209 | andi t8, a1, 0x1c | |
210 | addu src, src, 0x20 | |
211 | ||
212 | do_end_words: | |
213 | beqz t8, maybe_end_cruft | |
214 | srl t8, t8, 0x2 | |
215 | ||
216 | end_words: | |
217 | lw t0, (src) | |
218 | subu t8, t8, 0x1 | |
219 | ADDC(sum, t0) | |
220 | bnez t8, end_words | |
221 | addu src, src, 0x4 | |
222 | ||
223 | maybe_end_cruft: | |
224 | andi t2, a1, 0x3 | |
225 | ||
226 | small_memcpy: | |
227 | j small_csumcpy; move a1, t2 | |
228 | beqz t2, out | |
229 | move a1, t2 | |
230 | ||
231 | end_bytes: | |
232 | lb t0, (src) | |
233 | subu a1, a1, 0x1 | |
234 | bnez a2, end_bytes | |
235 | addu src, src, 0x1 | |
236 | ||
237 | out: | |
238 | jr ra | |
239 | move v0, sum | |
240 | END(csum_partial) |