]> git.proxmox.com Git - ovs.git/blame - datapath/checksum.c
util: New macro CONST_CAST.
[ovs.git] / datapath / checksum.c
CommitLineData
dd8d6b8c 1/*
e0edde6f 2 * Copyright (c) 2007-2011 Nicira, Inc.
dd8d6b8c 3 *
a9a29d22
JG
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
dd8d6b8c
JG
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/in.h>
22#include <linux/ip.h>
23#include <linux/tcp.h>
24#include <linux/udp.h>
25
26#include "checksum.h"
27#include "datapath.h"
28
c3729ee4
JG
29#ifdef NEED_CSUM_NORMALIZE
30
31#if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID)
32/* This code is based on skb_checksum_setup() from Xen's net/dev/core.c. We
33 * can't call this function directly because it isn't exported in all
34 * versions. */
35static int vswitch_skb_checksum_setup(struct sk_buff *skb)
36{
37 struct iphdr *iph;
38 unsigned char *th;
39 int err = -EPROTO;
40 __u16 csum_start, csum_offset;
41
42 if (!skb->proto_csum_blank)
43 return 0;
44
45 if (skb->protocol != htons(ETH_P_IP))
46 goto out;
47
48 if (!pskb_may_pull(skb, skb_network_header(skb) + sizeof(struct iphdr) - skb->data))
49 goto out;
50
51 iph = ip_hdr(skb);
52 th = skb_network_header(skb) + 4 * iph->ihl;
53
54 csum_start = th - skb->head;
55 switch (iph->protocol) {
56 case IPPROTO_TCP:
57 csum_offset = offsetof(struct tcphdr, check);
58 break;
59 case IPPROTO_UDP:
60 csum_offset = offsetof(struct udphdr, check);
61 break;
62 default:
63 if (net_ratelimit())
64 pr_err("Attempting to checksum a non-TCP/UDP packet, "
65 "dropping a protocol %d packet",
66 iph->protocol);
67 goto out;
68 }
69
70 if (!pskb_may_pull(skb, th + csum_offset + 2 - skb->data))
71 goto out;
72
73 skb->proto_csum_blank = 0;
74 set_ip_summed(skb, OVS_CSUM_PARTIAL);
75 set_skb_csum_pointers(skb, csum_start, csum_offset);
76
77 err = 0;
78
79out:
80 return err;
81}
82#else
83static int vswitch_skb_checksum_setup(struct sk_buff *skb)
84{
85 return 0;
86}
87#endif /* not Xen old style checksums */
88
89/*
90 * compute_ip_summed - map external checksum state onto OVS representation
91 *
92 * @skb: Packet to manipulate.
93 * @xmit: Whether we were on transmit path of network stack. For example,
94 * this is true for the internal dev vport because it receives skbs
95 * that passed through dev_queue_xmit() but false for the netdev vport
96 * because its packets come from netif_receive_skb().
97 *
98 * Older kernels (and various versions of Xen) were not explicit enough about
99 * checksum offload parameters and rely on a combination of context and
100 * non standard fields. This deals with all those variations so that we
101 * can internally manipulate checksum offloads without worrying about kernel
102 * version.
103 *
104 * Types of checksums that we can receive (these all refer to L4 checksums):
dd8d6b8c
JG
105 * 1. CHECKSUM_NONE: Device that did not compute checksum, contains full
106 * (though not verified) checksum in packet but not in skb->csum. Packets
107 * from the bridge local port will also have this type.
108 * 2. CHECKSUM_COMPLETE (CHECKSUM_HW): Good device that computes checksums,
109 * also the GRE module. This is the same as CHECKSUM_NONE, except it has
110 * a valid skb->csum. Importantly, both contain a full checksum (not
111 * verified) in the packet itself. The only difference is that if the
112 * packet gets to L4 processing on this machine (not in DomU) we won't
113 * have to recompute the checksum to verify. Most hardware devices do not
114 * produce packets with this type, even if they support receive checksum
115 * offloading (they produce type #5).
116 * 3. CHECKSUM_PARTIAL (CHECKSUM_HW): Packet without full checksum and needs to
117 * be computed if it is sent off box. Unfortunately on earlier kernels,
118 * this case is impossible to distinguish from #2, despite having opposite
119 * meanings. Xen adds an extra field on earlier kernels (see #4) in order
120 * to distinguish the different states.
121 * 4. CHECKSUM_UNNECESSARY (with proto_csum_blank true): This packet was
122 * generated locally by a Xen DomU and has a partial checksum. If it is
123 * handled on this machine (Dom0 or DomU), then the checksum will not be
124 * computed. If it goes off box, the checksum in the packet needs to be
125 * completed. Calling skb_checksum_setup converts this to CHECKSUM_HW
126 * (CHECKSUM_PARTIAL) so that the checksum can be completed. In later
127 * kernels, this combination is replaced with CHECKSUM_PARTIAL.
128 * 5. CHECKSUM_UNNECESSARY (with proto_csum_blank false): Packet with a correct
129 * full checksum or using a protocol without a checksum. skb->csum is
130 * undefined. This is common from devices with receive checksum
131 * offloading. This is somewhat similar to CHECKSUM_NONE, except that
132 * nobody will try to verify the checksum with CHECKSUM_UNNECESSARY.
133 *
134 * Note that on earlier kernels, CHECKSUM_COMPLETE and CHECKSUM_PARTIAL are
135 * both defined as CHECKSUM_HW. Normally the meaning of CHECKSUM_HW is clear
136 * based on whether it is on the transmit or receive path. After the datapath
137 * it will be intepreted as CHECKSUM_PARTIAL. If the packet already has a
138 * checksum, we will panic. Since we can receive packets with checksums, we
139 * assume that all CHECKSUM_HW packets have checksums and map them to
140 * CHECKSUM_NONE, which has a similar meaning (the it is only different if the
141 * packet is processed by the local IP stack, in which case it will need to
142 * be reverified). If we receive a packet with CHECKSUM_HW that really means
143 * CHECKSUM_PARTIAL, it will be sent with the wrong checksum. However, there
144 * shouldn't be any devices that do this with bridging.
145 */
c3729ee4 146int compute_ip_summed(struct sk_buff *skb, bool xmit)
dd8d6b8c
JG
147{
148 /* For our convenience these defines change repeatedly between kernel
149 * versions, so we can't just copy them over...
150 */
151 switch (skb->ip_summed) {
152 case CHECKSUM_NONE:
c3729ee4 153 set_ip_summed(skb, OVS_CSUM_NONE);
dd8d6b8c
JG
154 break;
155 case CHECKSUM_UNNECESSARY:
c3729ee4 156 set_ip_summed(skb, OVS_CSUM_UNNECESSARY);
dd8d6b8c
JG
157 break;
158#ifdef CHECKSUM_HW
159 /* In theory this could be either CHECKSUM_PARTIAL or CHECKSUM_COMPLETE.
160 * However, on the receive side we should only get CHECKSUM_PARTIAL
161 * packets from Xen, which uses some special fields to represent this
6455100f
PS
162 * (see vswitch_skb_checksum_setup()). Since we can only make one type
163 * work, pick the one that actually happens in practice.
dd8d6b8c
JG
164 *
165 * On the transmit side (basically after skb_checksum_setup()
166 * has been run or on internal dev transmit), packets with
167 * CHECKSUM_COMPLETE aren't generated, so assume CHECKSUM_PARTIAL.
168 */
169 case CHECKSUM_HW:
170 if (!xmit)
c3729ee4 171 set_ip_summed(skb, OVS_CSUM_COMPLETE);
dd8d6b8c 172 else
c3729ee4 173 set_ip_summed(skb, OVS_CSUM_PARTIAL);
dd8d6b8c
JG
174 break;
175#else
176 case CHECKSUM_COMPLETE:
c3729ee4 177 set_ip_summed(skb, OVS_CSUM_COMPLETE);
dd8d6b8c
JG
178 break;
179 case CHECKSUM_PARTIAL:
c3729ee4 180 set_ip_summed(skb, OVS_CSUM_PARTIAL);
dd8d6b8c
JG
181 break;
182#endif
183 }
184
c3729ee4 185 OVS_CB(skb)->csum_start = skb_headroom(skb) + skb_transport_offset(skb);
dd8d6b8c 186
c3729ee4 187 return vswitch_skb_checksum_setup(skb);
dd8d6b8c
JG
188}
189
c3729ee4 190/*
6455100f
PS
191 * forward_ip_summed - map internal checksum state back onto native
192 * kernel fields.
c3729ee4
JG
193 *
194 * @skb: Packet to manipulate.
6455100f
PS
195 * @xmit: Whether we are about send on the transmit path the network stack.
196 * This follows the same logic as the @xmit field in compute_ip_summed().
197 * Generally, a given vport will have opposite values for @xmit passed to
198 * these two functions.
c3729ee4
JG
199 *
200 * When a packet is about to egress from OVS take our internal fields (including
201 * any modifications we have made) and recreate the correct representation for
202 * this kernel. This may do things like change the transport header offset.
203 */
204void forward_ip_summed(struct sk_buff *skb, bool xmit)
dd8d6b8c 205{
6455100f 206 switch (get_ip_summed(skb)) {
c3729ee4
JG
207 case OVS_CSUM_NONE:
208 skb->ip_summed = CHECKSUM_NONE;
209 break;
210 case OVS_CSUM_UNNECESSARY:
211 skb->ip_summed = CHECKSUM_UNNECESSARY;
dd8d6b8c 212#if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID)
c3729ee4
JG
213 skb->proto_data_valid = 1;
214#endif
dd8d6b8c 215 break;
c3729ee4
JG
216#ifdef CHECKSUM_HW
217 case OVS_CSUM_COMPLETE:
218 if (!xmit)
219 skb->ip_summed = CHECKSUM_HW;
220 else
221 skb->ip_summed = CHECKSUM_NONE;
dd8d6b8c 222 break;
c3729ee4
JG
223 case OVS_CSUM_PARTIAL:
224 if (!xmit) {
225 skb->ip_summed = CHECKSUM_UNNECESSARY;
226#if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID)
227 skb->proto_csum_blank = 1;
228#endif
229 } else {
230 skb->ip_summed = CHECKSUM_HW;
231 }
232 break;
233#else
234 case OVS_CSUM_COMPLETE:
235 skb->ip_summed = CHECKSUM_COMPLETE;
236 break;
237 case OVS_CSUM_PARTIAL:
238 skb->ip_summed = CHECKSUM_PARTIAL;
239 break;
240#endif
dd8d6b8c
JG
241 }
242
c3729ee4 243 if (get_ip_summed(skb) == OVS_CSUM_PARTIAL)
6455100f
PS
244 skb_set_transport_header(skb, OVS_CB(skb)->csum_start -
245 skb_headroom(skb));
c3729ee4 246}
dd8d6b8c 247
c3729ee4
JG
248u8 get_ip_summed(struct sk_buff *skb)
249{
250 return OVS_CB(skb)->ip_summed;
251}
dd8d6b8c 252
c3729ee4
JG
253void set_ip_summed(struct sk_buff *skb, u8 ip_summed)
254{
255 OVS_CB(skb)->ip_summed = ip_summed;
256}
dd8d6b8c 257
c3729ee4
JG
258void get_skb_csum_pointers(const struct sk_buff *skb, u16 *csum_start,
259 u16 *csum_offset)
260{
261 *csum_start = OVS_CB(skb)->csum_start;
262 *csum_offset = skb->csum;
dd8d6b8c 263}
c3729ee4 264
6455100f
PS
265void set_skb_csum_pointers(struct sk_buff *skb, u16 csum_start,
266 u16 csum_offset)
c3729ee4
JG
267{
268 OVS_CB(skb)->csum_start = csum_start;
269 skb->csum = csum_offset;
270}
271#endif /* NEED_CSUM_NORMALIZE */