]>
Commit | Line | Data |
---|---|---|
075a46a0 RK |
1 | /* |
2 | * Checksum functions for Hexagon | |
3 | * | |
e1858b2a | 4 | * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. |
075a46a0 RK |
5 | * |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 and | |
8 | * only version 2 as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, | |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | * GNU General Public License for more details. | |
14 | * | |
15 | * You should have received a copy of the GNU General Public License | |
16 | * along with this program; if not, write to the Free Software | |
17 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | |
18 | * 02110-1301, USA. | |
19 | */ | |
20 | ||
21 | /* This was derived from arch/alpha/lib/checksum.c */ | |
22 | ||
23 | ||
24 | #include <linux/module.h> | |
25 | #include <linux/string.h> | |
26 | ||
27 | #include <asm/byteorder.h> | |
28 | #include <net/checksum.h> | |
29 | #include <linux/uaccess.h> | |
30 | #include <asm/intrinsics.h> | |
31 | ||
32 | ||
33 | /* Vector value operations */ | |
34 | #define SIGN(x, y) ((0x8000ULL*x)<<y) | |
35 | #define CARRY(x, y) ((0x0002ULL*x)<<y) | |
36 | #define SELECT(x, y) ((0x0001ULL*x)<<y) | |
37 | ||
38 | #define VR_NEGATE(a, b, c, d) (SIGN(a, 48) + SIGN(b, 32) + SIGN(c, 16) \ | |
39 | + SIGN(d, 0)) | |
40 | #define VR_CARRY(a, b, c, d) (CARRY(a, 48) + CARRY(b, 32) + CARRY(c, 16) \ | |
41 | + CARRY(d, 0)) | |
42 | #define VR_SELECT(a, b, c, d) (SELECT(a, 48) + SELECT(b, 32) + SELECT(c, 16) \ | |
43 | + SELECT(d, 0)) | |
44 | ||
45 | ||
46 | /* optimized HEXAGON V3 intrinsic version */ | |
47 | static inline unsigned short from64to16(u64 x) | |
48 | { | |
49 | u64 sum; | |
50 | ||
51 | sum = HEXAGON_P_vrmpyh_PP(x^VR_NEGATE(1, 1, 1, 1), | |
52 | VR_SELECT(1, 1, 1, 1)); | |
53 | sum += VR_CARRY(0, 0, 1, 0); | |
54 | sum = HEXAGON_P_vrmpyh_PP(sum, VR_SELECT(0, 0, 1, 1)); | |
55 | ||
56 | return 0xFFFF & sum; | |
57 | } | |
58 | ||
59 | /* | |
60 | * computes the checksum of the TCP/UDP pseudo-header | |
61 | * returns a 16-bit checksum, already complemented. | |
62 | */ | |
63 | __sum16 csum_tcpudp_magic(unsigned long saddr, unsigned long daddr, | |
64 | unsigned short len, unsigned short proto, | |
65 | __wsum sum) | |
66 | { | |
67 | return (__force __sum16)~from64to16( | |
68 | (__force u64)saddr + (__force u64)daddr + | |
69 | (__force u64)sum + ((len + proto) << 8)); | |
70 | } | |
71 | ||
72 | __wsum csum_tcpudp_nofold(unsigned long saddr, unsigned long daddr, | |
73 | unsigned short len, unsigned short proto, | |
74 | __wsum sum) | |
75 | { | |
76 | u64 result; | |
77 | ||
78 | result = (__force u64)saddr + (__force u64)daddr + | |
79 | (__force u64)sum + ((len + proto) << 8); | |
80 | ||
81 | /* Fold down to 32-bits so we don't lose in the typedef-less | |
82 | network stack. */ | |
83 | /* 64 to 33 */ | |
84 | result = (result & 0xffffffffUL) + (result >> 32); | |
85 | /* 33 to 32 */ | |
86 | result = (result & 0xffffffffUL) + (result >> 32); | |
87 | return (__force __wsum)result; | |
88 | } | |
89 | EXPORT_SYMBOL(csum_tcpudp_nofold); | |
90 | ||
91 | /* | |
92 | * Do a 64-bit checksum on an arbitrary memory area.. | |
93 | * | |
94 | * This isn't a great routine, but it's not _horrible_ either. The | |
95 | * inner loop could be unrolled a bit further, and there are better | |
96 | * ways to do the carry, but this is reasonable. | |
97 | */ | |
98 | ||
99 | /* optimized HEXAGON intrinsic version, with over read fixed */ | |
100 | unsigned int do_csum(const void *voidptr, int len) | |
101 | { | |
102 | u64 sum0, sum1, x0, x1, *ptr8_o, *ptr8_e, *ptr8; | |
103 | int i, start, mid, end, mask; | |
104 | const char *ptr = voidptr; | |
105 | unsigned short *ptr2; | |
106 | unsigned int *ptr4; | |
107 | ||
108 | if (len <= 0) | |
109 | return 0; | |
110 | ||
111 | start = 0xF & (16-(((int) ptr) & 0xF)) ; | |
112 | mask = 0x7fffffffUL >> HEXAGON_R_cl0_R(len); | |
113 | start = start & mask ; | |
114 | ||
115 | mid = len - start; | |
116 | end = mid & 0xF; | |
117 | mid = mid>>4; | |
118 | sum0 = mid << 18; | |
119 | sum1 = 0; | |
120 | ||
121 | if (start & 1) | |
122 | sum0 += (u64) (ptr[0] << 8); | |
123 | ptr2 = (unsigned short *) &ptr[start & 1]; | |
124 | if (start & 2) | |
125 | sum1 += (u64) ptr2[0]; | |
126 | ptr4 = (unsigned int *) &ptr[start & 3]; | |
127 | if (start & 4) { | |
128 | sum0 = HEXAGON_P_vrmpyhacc_PP(sum0, | |
129 | VR_NEGATE(0, 0, 1, 1)^((u64)ptr4[0]), | |
130 | VR_SELECT(0, 0, 1, 1)); | |
131 | sum0 += VR_SELECT(0, 0, 1, 0); | |
132 | } | |
133 | ptr8 = (u64 *) &ptr[start & 7]; | |
134 | if (start & 8) { | |
135 | sum1 = HEXAGON_P_vrmpyhacc_PP(sum1, | |
136 | VR_NEGATE(1, 1, 1, 1)^(ptr8[0]), | |
137 | VR_SELECT(1, 1, 1, 1)); | |
138 | sum1 += VR_CARRY(0, 0, 1, 0); | |
139 | } | |
140 | ptr8_o = (u64 *) (ptr + start); | |
141 | ptr8_e = (u64 *) (ptr + start + 8); | |
142 | ||
143 | if (mid) { | |
144 | x0 = *ptr8_e; ptr8_e += 2; | |
145 | x1 = *ptr8_o; ptr8_o += 2; | |
146 | if (mid > 1) | |
147 | for (i = 0; i < mid-1; i++) { | |
148 | sum0 = HEXAGON_P_vrmpyhacc_PP(sum0, | |
149 | x0^VR_NEGATE(1, 1, 1, 1), | |
150 | VR_SELECT(1, 1, 1, 1)); | |
151 | sum1 = HEXAGON_P_vrmpyhacc_PP(sum1, | |
152 | x1^VR_NEGATE(1, 1, 1, 1), | |
153 | VR_SELECT(1, 1, 1, 1)); | |
154 | x0 = *ptr8_e; ptr8_e += 2; | |
155 | x1 = *ptr8_o; ptr8_o += 2; | |
156 | } | |
157 | sum0 = HEXAGON_P_vrmpyhacc_PP(sum0, x0^VR_NEGATE(1, 1, 1, 1), | |
158 | VR_SELECT(1, 1, 1, 1)); | |
159 | sum1 = HEXAGON_P_vrmpyhacc_PP(sum1, x1^VR_NEGATE(1, 1, 1, 1), | |
160 | VR_SELECT(1, 1, 1, 1)); | |
161 | } | |
162 | ||
163 | ptr4 = (unsigned int *) &ptr[start + (mid * 16) + (end & 8)]; | |
164 | if (end & 4) { | |
165 | sum1 = HEXAGON_P_vrmpyhacc_PP(sum1, | |
166 | VR_NEGATE(0, 0, 1, 1)^((u64)ptr4[0]), | |
167 | VR_SELECT(0, 0, 1, 1)); | |
168 | sum1 += VR_SELECT(0, 0, 1, 0); | |
169 | } | |
170 | ptr2 = (unsigned short *) &ptr[start + (mid * 16) + (end & 12)]; | |
171 | if (end & 2) | |
172 | sum0 += (u64) ptr2[0]; | |
173 | ||
174 | if (end & 1) | |
175 | sum1 += (u64) ptr[start + (mid * 16) + (end & 14)]; | |
176 | ||
177 | ptr8 = (u64 *) &ptr[start + (mid * 16)]; | |
178 | if (end & 8) { | |
179 | sum0 = HEXAGON_P_vrmpyhacc_PP(sum0, | |
180 | VR_NEGATE(1, 1, 1, 1)^(ptr8[0]), | |
181 | VR_SELECT(1, 1, 1, 1)); | |
182 | sum0 += VR_CARRY(0, 0, 1, 0); | |
183 | } | |
184 | sum0 = HEXAGON_P_vrmpyh_PP((sum0+sum1)^VR_NEGATE(0, 0, 0, 1), | |
185 | VR_SELECT(0, 0, 1, 1)); | |
186 | sum0 += VR_NEGATE(0, 0, 0, 1); | |
187 | sum0 = HEXAGON_P_vrmpyh_PP(sum0, VR_SELECT(0, 0, 1, 1)); | |
188 | ||
189 | if (start & 1) | |
190 | sum0 = (sum0 << 8) | (0xFF & (sum0 >> 8)); | |
191 | ||
192 | return 0xFFFF & sum0; | |
193 | } | |
194 | ||
195 | /* | |
196 | * copy from ds while checksumming, otherwise like csum_partial | |
197 | */ | |
198 | __wsum | |
199 | csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum) | |
200 | { | |
201 | memcpy(dst, src, len); | |
202 | return csum_partial(dst, len, sum); | |
203 | } |