]>
Commit | Line | Data |
---|---|---|
dd8d6b8c | 1 | /* |
a9a29d22 | 2 | * Copyright (c) 2007-2011 Nicira Networks. |
dd8d6b8c | 3 | * |
a9a29d22 JG |
4 | * This program is free software; you can redistribute it and/or |
5 | * modify it under the terms of version 2 of the GNU General Public | |
6 | * License as published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, but | |
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | * General Public License for more details. | |
12 | * | |
13 | * You should have received a copy of the GNU General Public License | |
14 | * along with this program; if not, write to the Free Software | |
15 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | |
16 | * 02110-1301, USA | |
dd8d6b8c JG |
17 | */ |
18 | ||
19 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
20 | ||
21 | #include <linux/in.h> | |
22 | #include <linux/ip.h> | |
23 | #include <linux/tcp.h> | |
24 | #include <linux/udp.h> | |
25 | ||
26 | #include "checksum.h" | |
27 | #include "datapath.h" | |
28 | ||
c3729ee4 JG |
29 | #ifdef NEED_CSUM_NORMALIZE |
30 | ||
31 | #if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID) | |
32 | /* This code is based on skb_checksum_setup() from Xen's net/dev/core.c. We | |
33 | * can't call this function directly because it isn't exported in all | |
34 | * versions. */ | |
35 | static int vswitch_skb_checksum_setup(struct sk_buff *skb) | |
36 | { | |
37 | struct iphdr *iph; | |
38 | unsigned char *th; | |
39 | int err = -EPROTO; | |
40 | __u16 csum_start, csum_offset; | |
41 | ||
42 | if (!skb->proto_csum_blank) | |
43 | return 0; | |
44 | ||
45 | if (skb->protocol != htons(ETH_P_IP)) | |
46 | goto out; | |
47 | ||
48 | if (!pskb_may_pull(skb, skb_network_header(skb) + sizeof(struct iphdr) - skb->data)) | |
49 | goto out; | |
50 | ||
51 | iph = ip_hdr(skb); | |
52 | th = skb_network_header(skb) + 4 * iph->ihl; | |
53 | ||
54 | csum_start = th - skb->head; | |
55 | switch (iph->protocol) { | |
56 | case IPPROTO_TCP: | |
57 | csum_offset = offsetof(struct tcphdr, check); | |
58 | break; | |
59 | case IPPROTO_UDP: | |
60 | csum_offset = offsetof(struct udphdr, check); | |
61 | break; | |
62 | default: | |
63 | if (net_ratelimit()) | |
64 | pr_err("Attempting to checksum a non-TCP/UDP packet, " | |
65 | "dropping a protocol %d packet", | |
66 | iph->protocol); | |
67 | goto out; | |
68 | } | |
69 | ||
70 | if (!pskb_may_pull(skb, th + csum_offset + 2 - skb->data)) | |
71 | goto out; | |
72 | ||
73 | skb->proto_csum_blank = 0; | |
74 | set_ip_summed(skb, OVS_CSUM_PARTIAL); | |
75 | set_skb_csum_pointers(skb, csum_start, csum_offset); | |
76 | ||
77 | err = 0; | |
78 | ||
79 | out: | |
80 | return err; | |
81 | } | |
82 | #else | |
83 | static int vswitch_skb_checksum_setup(struct sk_buff *skb) | |
84 | { | |
85 | return 0; | |
86 | } | |
87 | #endif /* not Xen old style checksums */ | |
88 | ||
89 | /* | |
90 | * compute_ip_summed - map external checksum state onto OVS representation | |
91 | * | |
92 | * @skb: Packet to manipulate. | |
93 | * @xmit: Whether we were on transmit path of network stack. For example, | |
94 | * this is true for the internal dev vport because it receives skbs | |
95 | * that passed through dev_queue_xmit() but false for the netdev vport | |
96 | * because its packets come from netif_receive_skb(). | |
97 | * | |
98 | * Older kernels (and various versions of Xen) were not explicit enough about | |
99 | * checksum offload parameters and rely on a combination of context and | |
100 | * non standard fields. This deals with all those variations so that we | |
101 | * can internally manipulate checksum offloads without worrying about kernel | |
102 | * version. | |
103 | * | |
104 | * Types of checksums that we can receive (these all refer to L4 checksums): | |
dd8d6b8c JG |
105 | * 1. CHECKSUM_NONE: Device that did not compute checksum, contains full |
106 | * (though not verified) checksum in packet but not in skb->csum. Packets | |
107 | * from the bridge local port will also have this type. | |
108 | * 2. CHECKSUM_COMPLETE (CHECKSUM_HW): Good device that computes checksums, | |
109 | * also the GRE module. This is the same as CHECKSUM_NONE, except it has | |
110 | * a valid skb->csum. Importantly, both contain a full checksum (not | |
111 | * verified) in the packet itself. The only difference is that if the | |
112 | * packet gets to L4 processing on this machine (not in DomU) we won't | |
113 | * have to recompute the checksum to verify. Most hardware devices do not | |
114 | * produce packets with this type, even if they support receive checksum | |
115 | * offloading (they produce type #5). | |
116 | * 3. CHECKSUM_PARTIAL (CHECKSUM_HW): Packet without full checksum and needs to | |
117 | * be computed if it is sent off box. Unfortunately on earlier kernels, | |
118 | * this case is impossible to distinguish from #2, despite having opposite | |
119 | * meanings. Xen adds an extra field on earlier kernels (see #4) in order | |
120 | * to distinguish the different states. | |
121 | * 4. CHECKSUM_UNNECESSARY (with proto_csum_blank true): This packet was | |
122 | * generated locally by a Xen DomU and has a partial checksum. If it is | |
123 | * handled on this machine (Dom0 or DomU), then the checksum will not be | |
124 | * computed. If it goes off box, the checksum in the packet needs to be | |
125 | * completed. Calling skb_checksum_setup converts this to CHECKSUM_HW | |
126 | * (CHECKSUM_PARTIAL) so that the checksum can be completed. In later | |
127 | * kernels, this combination is replaced with CHECKSUM_PARTIAL. | |
128 | * 5. CHECKSUM_UNNECESSARY (with proto_csum_blank false): Packet with a correct | |
129 | * full checksum or using a protocol without a checksum. skb->csum is | |
130 | * undefined. This is common from devices with receive checksum | |
131 | * offloading. This is somewhat similar to CHECKSUM_NONE, except that | |
132 | * nobody will try to verify the checksum with CHECKSUM_UNNECESSARY. | |
133 | * | |
134 | * Note that on earlier kernels, CHECKSUM_COMPLETE and CHECKSUM_PARTIAL are | |
135 | * both defined as CHECKSUM_HW. Normally the meaning of CHECKSUM_HW is clear | |
136 | * based on whether it is on the transmit or receive path. After the datapath | |
137 | * it will be intepreted as CHECKSUM_PARTIAL. If the packet already has a | |
138 | * checksum, we will panic. Since we can receive packets with checksums, we | |
139 | * assume that all CHECKSUM_HW packets have checksums and map them to | |
140 | * CHECKSUM_NONE, which has a similar meaning (the it is only different if the | |
141 | * packet is processed by the local IP stack, in which case it will need to | |
142 | * be reverified). If we receive a packet with CHECKSUM_HW that really means | |
143 | * CHECKSUM_PARTIAL, it will be sent with the wrong checksum. However, there | |
144 | * shouldn't be any devices that do this with bridging. | |
145 | */ | |
c3729ee4 | 146 | int compute_ip_summed(struct sk_buff *skb, bool xmit) |
dd8d6b8c JG |
147 | { |
148 | /* For our convenience these defines change repeatedly between kernel | |
149 | * versions, so we can't just copy them over... | |
150 | */ | |
151 | switch (skb->ip_summed) { | |
152 | case CHECKSUM_NONE: | |
c3729ee4 | 153 | set_ip_summed(skb, OVS_CSUM_NONE); |
dd8d6b8c JG |
154 | break; |
155 | case CHECKSUM_UNNECESSARY: | |
c3729ee4 | 156 | set_ip_summed(skb, OVS_CSUM_UNNECESSARY); |
dd8d6b8c JG |
157 | break; |
158 | #ifdef CHECKSUM_HW | |
159 | /* In theory this could be either CHECKSUM_PARTIAL or CHECKSUM_COMPLETE. | |
160 | * However, on the receive side we should only get CHECKSUM_PARTIAL | |
161 | * packets from Xen, which uses some special fields to represent this | |
6455100f PS |
162 | * (see vswitch_skb_checksum_setup()). Since we can only make one type |
163 | * work, pick the one that actually happens in practice. | |
dd8d6b8c JG |
164 | * |
165 | * On the transmit side (basically after skb_checksum_setup() | |
166 | * has been run or on internal dev transmit), packets with | |
167 | * CHECKSUM_COMPLETE aren't generated, so assume CHECKSUM_PARTIAL. | |
168 | */ | |
169 | case CHECKSUM_HW: | |
170 | if (!xmit) | |
c3729ee4 | 171 | set_ip_summed(skb, OVS_CSUM_COMPLETE); |
dd8d6b8c | 172 | else |
c3729ee4 | 173 | set_ip_summed(skb, OVS_CSUM_PARTIAL); |
dd8d6b8c JG |
174 | break; |
175 | #else | |
176 | case CHECKSUM_COMPLETE: | |
c3729ee4 | 177 | set_ip_summed(skb, OVS_CSUM_COMPLETE); |
dd8d6b8c JG |
178 | break; |
179 | case CHECKSUM_PARTIAL: | |
c3729ee4 | 180 | set_ip_summed(skb, OVS_CSUM_PARTIAL); |
dd8d6b8c JG |
181 | break; |
182 | #endif | |
183 | } | |
184 | ||
c3729ee4 | 185 | OVS_CB(skb)->csum_start = skb_headroom(skb) + skb_transport_offset(skb); |
dd8d6b8c | 186 | |
c3729ee4 | 187 | return vswitch_skb_checksum_setup(skb); |
dd8d6b8c JG |
188 | } |
189 | ||
c3729ee4 | 190 | /* |
6455100f PS |
191 | * forward_ip_summed - map internal checksum state back onto native |
192 | * kernel fields. | |
c3729ee4 JG |
193 | * |
194 | * @skb: Packet to manipulate. | |
6455100f PS |
195 | * @xmit: Whether we are about send on the transmit path the network stack. |
196 | * This follows the same logic as the @xmit field in compute_ip_summed(). | |
197 | * Generally, a given vport will have opposite values for @xmit passed to | |
198 | * these two functions. | |
c3729ee4 JG |
199 | * |
200 | * When a packet is about to egress from OVS take our internal fields (including | |
201 | * any modifications we have made) and recreate the correct representation for | |
202 | * this kernel. This may do things like change the transport header offset. | |
203 | */ | |
204 | void forward_ip_summed(struct sk_buff *skb, bool xmit) | |
dd8d6b8c | 205 | { |
6455100f | 206 | switch (get_ip_summed(skb)) { |
c3729ee4 JG |
207 | case OVS_CSUM_NONE: |
208 | skb->ip_summed = CHECKSUM_NONE; | |
209 | break; | |
210 | case OVS_CSUM_UNNECESSARY: | |
211 | skb->ip_summed = CHECKSUM_UNNECESSARY; | |
dd8d6b8c | 212 | #if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID) |
c3729ee4 JG |
213 | skb->proto_data_valid = 1; |
214 | #endif | |
dd8d6b8c | 215 | break; |
c3729ee4 JG |
216 | #ifdef CHECKSUM_HW |
217 | case OVS_CSUM_COMPLETE: | |
218 | if (!xmit) | |
219 | skb->ip_summed = CHECKSUM_HW; | |
220 | else | |
221 | skb->ip_summed = CHECKSUM_NONE; | |
dd8d6b8c | 222 | break; |
c3729ee4 JG |
223 | case OVS_CSUM_PARTIAL: |
224 | if (!xmit) { | |
225 | skb->ip_summed = CHECKSUM_UNNECESSARY; | |
226 | #if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID) | |
227 | skb->proto_csum_blank = 1; | |
228 | #endif | |
229 | } else { | |
230 | skb->ip_summed = CHECKSUM_HW; | |
231 | } | |
232 | break; | |
233 | #else | |
234 | case OVS_CSUM_COMPLETE: | |
235 | skb->ip_summed = CHECKSUM_COMPLETE; | |
236 | break; | |
237 | case OVS_CSUM_PARTIAL: | |
238 | skb->ip_summed = CHECKSUM_PARTIAL; | |
239 | break; | |
240 | #endif | |
dd8d6b8c JG |
241 | } |
242 | ||
c3729ee4 | 243 | if (get_ip_summed(skb) == OVS_CSUM_PARTIAL) |
6455100f PS |
244 | skb_set_transport_header(skb, OVS_CB(skb)->csum_start - |
245 | skb_headroom(skb)); | |
c3729ee4 | 246 | } |
dd8d6b8c | 247 | |
c3729ee4 JG |
248 | u8 get_ip_summed(struct sk_buff *skb) |
249 | { | |
250 | return OVS_CB(skb)->ip_summed; | |
251 | } | |
dd8d6b8c | 252 | |
c3729ee4 JG |
253 | void set_ip_summed(struct sk_buff *skb, u8 ip_summed) |
254 | { | |
255 | OVS_CB(skb)->ip_summed = ip_summed; | |
256 | } | |
dd8d6b8c | 257 | |
c3729ee4 JG |
258 | void get_skb_csum_pointers(const struct sk_buff *skb, u16 *csum_start, |
259 | u16 *csum_offset) | |
260 | { | |
261 | *csum_start = OVS_CB(skb)->csum_start; | |
262 | *csum_offset = skb->csum; | |
dd8d6b8c | 263 | } |
c3729ee4 | 264 | |
6455100f PS |
265 | void set_skb_csum_pointers(struct sk_buff *skb, u16 csum_start, |
266 | u16 csum_offset) | |
c3729ee4 JG |
267 | { |
268 | OVS_CB(skb)->csum_start = csum_start; | |
269 | skb->csum = csum_offset; | |
270 | } | |
271 | #endif /* NEED_CSUM_NORMALIZE */ |