]>
Commit | Line | Data |
---|---|---|
064af421 BP |
1 | /* |
2 | * Distributed under the terms of the GNU GPL version 2. | |
3 | * Copyright (c) 2007, 2008, 2009 Nicira Networks. | |
4 | */ | |
5 | ||
6 | /* Functions for executing flow actions. */ | |
7 | ||
8 | #include <linux/skbuff.h> | |
9 | #include <linux/in.h> | |
10 | #include <linux/ip.h> | |
11 | #include <linux/tcp.h> | |
12 | #include <linux/udp.h> | |
13 | #include <linux/in6.h> | |
14 | #include <linux/if_vlan.h> | |
15 | #include <net/ip.h> | |
16 | #include <net/checksum.h> | |
17 | #include "datapath.h" | |
18 | #include "dp_dev.h" | |
19 | #include "actions.h" | |
20 | #include "openvswitch/datapath-protocol.h" | |
21 | ||
22 | struct sk_buff * | |
23 | make_writable(struct sk_buff *skb, gfp_t gfp) | |
24 | { | |
25 | if (skb_shared(skb) || skb_cloned(skb)) { | |
26 | struct sk_buff *nskb = skb_copy(skb, gfp); | |
27 | if (nskb) { | |
28 | kfree_skb(skb); | |
29 | return nskb; | |
30 | } | |
31 | } else { | |
32 | unsigned int hdr_len = (skb_transport_offset(skb) | |
33 | + sizeof(struct tcphdr)); | |
34 | if (pskb_may_pull(skb, min(hdr_len, skb->len))) | |
35 | return skb; | |
36 | } | |
37 | kfree_skb(skb); | |
38 | return NULL; | |
39 | } | |
40 | ||
41 | ||
42 | static struct sk_buff * | |
43 | vlan_pull_tag(struct sk_buff *skb) | |
44 | { | |
45 | struct vlan_ethhdr *vh = vlan_eth_hdr(skb); | |
46 | struct ethhdr *eh; | |
47 | ||
48 | ||
49 | /* Verify we were given a vlan packet */ | |
50 | if (vh->h_vlan_proto != htons(ETH_P_8021Q)) | |
51 | return skb; | |
52 | ||
53 | memmove(skb->data + VLAN_HLEN, skb->data, 2 * VLAN_ETH_ALEN); | |
54 | ||
55 | eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN); | |
56 | ||
57 | skb->protocol = eh->h_proto; | |
58 | skb->mac_header += VLAN_HLEN; | |
59 | ||
60 | return skb; | |
61 | } | |
62 | ||
63 | ||
64 | static struct sk_buff * | |
65 | modify_vlan_tci(struct datapath *dp, struct sk_buff *skb, | |
66 | struct odp_flow_key *key, const union odp_action *a, | |
67 | int n_actions, gfp_t gfp) | |
68 | { | |
69 | u16 tci, mask; | |
70 | ||
71 | if (a->type == ODPAT_SET_VLAN_VID) { | |
72 | tci = ntohs(a->vlan_vid.vlan_vid); | |
73 | mask = VLAN_VID_MASK; | |
74 | key->dl_vlan = htons(tci & mask); | |
75 | } else { | |
76 | tci = a->vlan_pcp.vlan_pcp << 13; | |
77 | mask = VLAN_PCP_MASK; | |
78 | } | |
79 | ||
80 | skb = make_writable(skb, gfp); | |
81 | if (!skb) | |
82 | return ERR_PTR(-ENOMEM); | |
83 | ||
84 | if (skb->protocol == htons(ETH_P_8021Q)) { | |
85 | /* Modify vlan id, but maintain other TCI values */ | |
86 | struct vlan_ethhdr *vh = vlan_eth_hdr(skb); | |
87 | vh->h_vlan_TCI = htons((ntohs(vh->h_vlan_TCI) & ~mask) | tci); | |
88 | } else { | |
89 | /* Add vlan header */ | |
90 | ||
91 | /* Set up checksumming pointers for checksum-deferred packets | |
92 | * on Xen. Otherwise, dev_queue_xmit() will try to do this | |
93 | * when we send the packet out on the wire, and it will fail at | |
94 | * that point because skb_checksum_setup() will not look inside | |
95 | * an 802.1Q header. */ | |
96 | skb_checksum_setup(skb); | |
97 | ||
98 | /* GSO is not implemented for packets with an 802.1Q header, so | |
99 | * we have to do segmentation before we add that header. | |
100 | * | |
101 | * GSO does work with hardware-accelerated VLAN tagging, but we | |
102 | * can't use hardware-accelerated VLAN tagging since it | |
103 | * requires the device to have a VLAN group configured (with | |
104 | * e.g. vconfig(8)) and we don't do that. | |
105 | * | |
106 | * Having to do this here may be a performance loss, since we | |
107 | * can't take advantage of TSO hardware support, although it | |
108 | * does not make a measurable network performance difference | |
109 | * for 1G Ethernet. Fixing that would require patching the | |
110 | * kernel (either to add GSO support to the VLAN protocol or to | |
111 | * support hardware-accelerated VLAN tagging without VLAN | |
112 | * groups configured). */ | |
113 | if (skb_is_gso(skb)) { | |
114 | struct sk_buff *segs; | |
115 | ||
116 | segs = skb_gso_segment(skb, 0); | |
117 | kfree_skb(skb); | |
118 | if (unlikely(IS_ERR(segs))) | |
119 | return ERR_CAST(segs); | |
120 | ||
121 | do { | |
122 | struct sk_buff *nskb = segs->next; | |
123 | int err; | |
124 | ||
125 | segs->next = NULL; | |
126 | ||
127 | segs = __vlan_put_tag(segs, tci); | |
128 | err = -ENOMEM; | |
129 | if (segs) { | |
130 | struct odp_flow_key segkey = *key; | |
131 | err = execute_actions(dp, segs, | |
132 | &segkey, a + 1, | |
133 | n_actions - 1, | |
134 | gfp); | |
135 | } | |
136 | ||
137 | if (unlikely(err)) { | |
138 | while ((segs = nskb)) { | |
139 | nskb = segs->next; | |
140 | segs->next = NULL; | |
141 | kfree_skb(segs); | |
142 | } | |
143 | return ERR_PTR(err); | |
144 | } | |
145 | ||
146 | segs = nskb; | |
147 | } while (segs->next); | |
148 | ||
149 | skb = segs; | |
150 | } | |
151 | ||
152 | /* The hardware-accelerated version of vlan_put_tag() works | |
153 | * only for a device that has a VLAN group configured (with | |
154 | * e.g. vconfig(8)), so call the software-only version | |
155 | * __vlan_put_tag() directly instead. | |
156 | */ | |
157 | skb = __vlan_put_tag(skb, tci); | |
158 | if (!skb) | |
159 | return ERR_PTR(-ENOMEM); | |
160 | } | |
161 | ||
162 | return skb; | |
163 | } | |
164 | ||
165 | static struct sk_buff *strip_vlan(struct sk_buff *skb, | |
166 | struct odp_flow_key *key, gfp_t gfp) | |
167 | { | |
168 | skb = make_writable(skb, gfp); | |
169 | if (skb) { | |
170 | vlan_pull_tag(skb); | |
171 | key->dl_vlan = htons(ODP_VLAN_NONE); | |
172 | } | |
173 | return skb; | |
174 | } | |
175 | ||
176 | static struct sk_buff *set_dl_addr(struct sk_buff *skb, | |
177 | const struct odp_action_dl_addr *a, | |
178 | gfp_t gfp) | |
179 | { | |
180 | skb = make_writable(skb, gfp); | |
181 | if (skb) { | |
182 | struct ethhdr *eh = eth_hdr(skb); | |
183 | memcpy(a->type == ODPAT_SET_DL_SRC ? eh->h_source : eh->h_dest, | |
184 | a->dl_addr, ETH_ALEN); | |
185 | } | |
186 | return skb; | |
187 | } | |
188 | ||
189 | /* Updates 'sum', which is a field in 'skb''s data, given that a 4-byte field | |
190 | * covered by the sum has been changed from 'from' to 'to'. If set, | |
191 | * 'pseudohdr' indicates that the field is in the TCP or UDP pseudo-header. | |
192 | * Based on nf_proto_csum_replace4. */ | |
193 | static void update_csum(__sum16 *sum, struct sk_buff *skb, | |
194 | __be32 from, __be32 to, int pseudohdr) | |
195 | { | |
196 | __be32 diff[] = { ~from, to }; | |
197 | if (skb->ip_summed != CHECKSUM_PARTIAL) { | |
198 | *sum = csum_fold(csum_partial((char *)diff, sizeof(diff), | |
199 | ~csum_unfold(*sum))); | |
200 | if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) | |
201 | skb->csum = ~csum_partial((char *)diff, sizeof(diff), | |
202 | ~skb->csum); | |
203 | } else if (pseudohdr) | |
204 | *sum = ~csum_fold(csum_partial((char *)diff, sizeof(diff), | |
205 | csum_unfold(*sum))); | |
206 | } | |
207 | ||
208 | static struct sk_buff *set_nw_addr(struct sk_buff *skb, | |
209 | struct odp_flow_key *key, | |
210 | const struct odp_action_nw_addr *a, | |
211 | gfp_t gfp) | |
212 | { | |
213 | if (key->dl_type != htons(ETH_P_IP)) | |
214 | return skb; | |
215 | ||
216 | skb = make_writable(skb, gfp); | |
217 | if (skb) { | |
218 | struct iphdr *nh = ip_hdr(skb); | |
219 | u32 *f = a->type == ODPAT_SET_NW_SRC ? &nh->saddr : &nh->daddr; | |
220 | u32 old = *f; | |
221 | u32 new = a->nw_addr; | |
222 | ||
223 | if (key->nw_proto == IPPROTO_TCP) { | |
224 | struct tcphdr *th = tcp_hdr(skb); | |
225 | update_csum(&th->check, skb, old, new, 1); | |
226 | } else if (key->nw_proto == IPPROTO_UDP) { | |
227 | struct udphdr *th = udp_hdr(skb); | |
228 | update_csum(&th->check, skb, old, new, 1); | |
229 | } | |
230 | update_csum(&nh->check, skb, old, new, 0); | |
231 | *f = new; | |
232 | } | |
233 | return skb; | |
234 | } | |
235 | ||
236 | static struct sk_buff * | |
237 | set_tp_port(struct sk_buff *skb, struct odp_flow_key *key, | |
238 | const struct odp_action_tp_port *a, | |
239 | gfp_t gfp) | |
240 | { | |
241 | int check_ofs; | |
242 | ||
243 | if (key->dl_type != htons(ETH_P_IP)) | |
244 | return skb; | |
245 | ||
246 | if (key->nw_proto == IPPROTO_TCP) | |
247 | check_ofs = offsetof(struct tcphdr, check); | |
248 | else if (key->nw_proto == IPPROTO_UDP) | |
249 | check_ofs = offsetof(struct udphdr, check); | |
250 | else | |
251 | return skb; | |
252 | ||
253 | skb = make_writable(skb, gfp); | |
254 | if (skb) { | |
255 | struct udphdr *th = udp_hdr(skb); | |
256 | u16 *f = a->type == ODPAT_SET_TP_SRC ? &th->source : &th->dest; | |
257 | u16 old = *f; | |
258 | u16 new = a->tp_port; | |
259 | update_csum((u16*)((u8*)skb->data + check_ofs), | |
260 | skb, old, new, 1); | |
261 | *f = new; | |
262 | } | |
263 | return skb; | |
264 | } | |
265 | ||
266 | static inline unsigned packet_length(const struct sk_buff *skb) | |
267 | { | |
268 | unsigned length = skb->len - ETH_HLEN; | |
269 | if (skb->protocol == htons(ETH_P_8021Q)) | |
270 | length -= VLAN_HLEN; | |
271 | return length; | |
272 | } | |
273 | ||
274 | int dp_xmit_skb(struct sk_buff *skb) | |
275 | { | |
276 | struct datapath *dp = skb->dev->br_port->dp; | |
277 | int len = skb->len; | |
278 | ||
279 | if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) { | |
280 | printk(KERN_WARNING "%s: dropped over-mtu packet: %d > %d\n", | |
281 | dp_name(dp), packet_length(skb), skb->dev->mtu); | |
282 | kfree_skb(skb); | |
283 | return -E2BIG; | |
284 | } | |
285 | ||
286 | dev_queue_xmit(skb); | |
287 | ||
288 | return len; | |
289 | } | |
290 | ||
291 | static void | |
292 | do_output(struct datapath *dp, struct sk_buff *skb, int out_port) | |
293 | { | |
294 | struct net_bridge_port *p; | |
295 | struct net_device *dev; | |
296 | ||
297 | if (!skb) | |
298 | goto error; | |
299 | ||
300 | p = dp->ports[out_port]; | |
301 | if (!p) | |
302 | goto error; | |
303 | ||
304 | dev = skb->dev = p->dev; | |
305 | if (is_dp_dev(dev)) | |
306 | dp_dev_recv(dev, skb); | |
307 | else | |
308 | dp_xmit_skb(skb); | |
309 | return; | |
310 | ||
311 | error: | |
312 | kfree_skb(skb); | |
313 | } | |
314 | ||
315 | /* Never consumes 'skb'. Returns a port that 'skb' should be sent to, -1 if | |
316 | * none. */ | |
317 | static int output_group(struct datapath *dp, __u16 group, | |
318 | struct sk_buff *skb, gfp_t gfp) | |
319 | { | |
320 | struct dp_port_group *g = rcu_dereference(dp->groups[group]); | |
321 | int prev_port = -1; | |
322 | int i; | |
323 | ||
324 | if (!g) | |
325 | return -1; | |
326 | for (i = 0; i < g->n_ports; i++) { | |
327 | struct net_bridge_port *p = dp->ports[g->ports[i]]; | |
328 | if (!p || skb->dev == p->dev) | |
329 | continue; | |
330 | if (prev_port != -1) { | |
331 | struct sk_buff *clone = skb_clone(skb, gfp); | |
332 | if (!clone) | |
333 | return -1; | |
334 | do_output(dp, clone, prev_port); | |
335 | } | |
336 | prev_port = p->port_no; | |
337 | } | |
338 | return prev_port; | |
339 | } | |
340 | ||
341 | static int | |
342 | output_control(struct datapath *dp, struct sk_buff *skb, u32 arg, gfp_t gfp) | |
343 | { | |
344 | skb = skb_clone(skb, gfp); | |
345 | if (!skb) | |
346 | return -ENOMEM; | |
347 | return dp_output_control(dp, skb, _ODPL_ACTION_NR, arg); | |
348 | } | |
349 | ||
350 | /* Execute a list of actions against 'skb'. */ | |
351 | int execute_actions(struct datapath *dp, struct sk_buff *skb, | |
352 | struct odp_flow_key *key, | |
353 | const union odp_action *a, int n_actions, | |
354 | gfp_t gfp) | |
355 | { | |
356 | /* Every output action needs a separate clone of 'skb', but the common | |
357 | * case is just a single output action, so that doing a clone and | |
358 | * then freeing the original skbuff is wasteful. So the following code | |
359 | * is slightly obscure just to avoid that. */ | |
360 | int prev_port = -1; | |
361 | int err = 0; | |
362 | for (; n_actions > 0; a++, n_actions--) { | |
363 | WARN_ON_ONCE(skb_shared(skb)); | |
364 | if (prev_port != -1) { | |
365 | do_output(dp, skb_clone(skb, gfp), prev_port); | |
366 | prev_port = -1; | |
367 | } | |
368 | ||
369 | switch (a->type) { | |
370 | case ODPAT_OUTPUT: | |
371 | prev_port = a->output.port; | |
372 | break; | |
373 | ||
374 | case ODPAT_OUTPUT_GROUP: | |
375 | prev_port = output_group(dp, a->output_group.group, | |
376 | skb, gfp); | |
377 | break; | |
378 | ||
379 | case ODPAT_CONTROLLER: | |
380 | err = output_control(dp, skb, a->controller.arg, gfp); | |
381 | if (err) { | |
382 | kfree_skb(skb); | |
383 | return err; | |
384 | } | |
385 | break; | |
386 | ||
387 | case ODPAT_SET_VLAN_VID: | |
388 | case ODPAT_SET_VLAN_PCP: | |
389 | skb = modify_vlan_tci(dp, skb, key, a, n_actions, gfp); | |
390 | if (IS_ERR(skb)) | |
391 | return PTR_ERR(skb); | |
392 | break; | |
393 | ||
394 | case ODPAT_STRIP_VLAN: | |
395 | skb = strip_vlan(skb, key, gfp); | |
396 | break; | |
397 | ||
398 | case ODPAT_SET_DL_SRC: | |
399 | case ODPAT_SET_DL_DST: | |
400 | skb = set_dl_addr(skb, &a->dl_addr, gfp); | |
401 | break; | |
402 | ||
403 | case ODPAT_SET_NW_SRC: | |
404 | case ODPAT_SET_NW_DST: | |
405 | skb = set_nw_addr(skb, key, &a->nw_addr, gfp); | |
406 | break; | |
407 | ||
408 | case ODPAT_SET_TP_SRC: | |
409 | case ODPAT_SET_TP_DST: | |
410 | skb = set_tp_port(skb, key, &a->tp_port, gfp); | |
411 | break; | |
412 | } | |
413 | if (!skb) | |
414 | return -ENOMEM; | |
415 | } | |
416 | if (prev_port != -1) | |
417 | do_output(dp, skb, prev_port); | |
418 | else | |
419 | kfree_skb(skb); | |
420 | return err; | |
421 | } |