]> git.proxmox.com Git - ovs.git/blame - datapath/actions.c
netdev: New functions for interpreting "enum ofp_port_features" bitmaps.
[ovs.git] / datapath / actions.c
CommitLineData
064af421
BP
1/*
2 * Distributed under the terms of the GNU GPL version 2.
3 * Copyright (c) 2007, 2008, 2009 Nicira Networks.
a14bc59f
BP
4 *
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
064af421
BP
7 */
8
9/* Functions for executing flow actions. */
10
11#include <linux/skbuff.h>
12#include <linux/in.h>
13#include <linux/ip.h>
14#include <linux/tcp.h>
15#include <linux/udp.h>
16#include <linux/in6.h>
17#include <linux/if_vlan.h>
18#include <net/ip.h>
19#include <net/checksum.h>
20#include "datapath.h"
21#include "dp_dev.h"
22#include "actions.h"
23#include "openvswitch/datapath-protocol.h"
24
0cd8a05e
JG
25static struct sk_buff *
26make_writable(struct sk_buff *skb, unsigned min_headroom, gfp_t gfp)
064af421
BP
27{
28 if (skb_shared(skb) || skb_cloned(skb)) {
0cd8a05e
JG
29 struct sk_buff *nskb;
30 unsigned headroom = max(min_headroom, skb_headroom(skb));
31
32 nskb = skb_copy_expand(skb, headroom, skb_tailroom(skb), gfp);
064af421 33 if (nskb) {
0cd8a05e
JG
34#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
35 /* Before 2.6.24 these fields were not copied when
36 * doing an skb_copy_expand. */
37 nskb->ip_summed = skb->ip_summed;
38 nskb->csum = skb->csum;
39#endif
6a33828d 40#if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID)
5ef800a6
JG
41 /* These fields are copied in skb_clone but not in
42 * skb_copy or related functions. We need to manually
43 * copy them over here. */
44 nskb->proto_data_valid = skb->proto_data_valid;
45 nskb->proto_csum_blank = skb->proto_csum_blank;
46#endif
064af421
BP
47 kfree_skb(skb);
48 return nskb;
49 }
50 } else {
51 unsigned int hdr_len = (skb_transport_offset(skb)
52 + sizeof(struct tcphdr));
53 if (pskb_may_pull(skb, min(hdr_len, skb->len)))
54 return skb;
55 }
56 kfree_skb(skb);
57 return NULL;
58}
59
60
61static struct sk_buff *
62vlan_pull_tag(struct sk_buff *skb)
63{
64 struct vlan_ethhdr *vh = vlan_eth_hdr(skb);
65 struct ethhdr *eh;
66
67
68 /* Verify we were given a vlan packet */
69 if (vh->h_vlan_proto != htons(ETH_P_8021Q))
70 return skb;
71
72 memmove(skb->data + VLAN_HLEN, skb->data, 2 * VLAN_ETH_ALEN);
73
74 eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN);
75
76 skb->protocol = eh->h_proto;
77 skb->mac_header += VLAN_HLEN;
78
79 return skb;
80}
81
82
83static struct sk_buff *
84modify_vlan_tci(struct datapath *dp, struct sk_buff *skb,
85 struct odp_flow_key *key, const union odp_action *a,
86 int n_actions, gfp_t gfp)
87{
88 u16 tci, mask;
89
90 if (a->type == ODPAT_SET_VLAN_VID) {
91 tci = ntohs(a->vlan_vid.vlan_vid);
92 mask = VLAN_VID_MASK;
93 key->dl_vlan = htons(tci & mask);
94 } else {
95 tci = a->vlan_pcp.vlan_pcp << 13;
96 mask = VLAN_PCP_MASK;
97 }
98
0cd8a05e 99 skb = make_writable(skb, VLAN_HLEN, gfp);
064af421
BP
100 if (!skb)
101 return ERR_PTR(-ENOMEM);
102
103 if (skb->protocol == htons(ETH_P_8021Q)) {
104 /* Modify vlan id, but maintain other TCI values */
105 struct vlan_ethhdr *vh = vlan_eth_hdr(skb);
106 vh->h_vlan_TCI = htons((ntohs(vh->h_vlan_TCI) & ~mask) | tci);
107 } else {
108 /* Add vlan header */
109
110 /* Set up checksumming pointers for checksum-deferred packets
111 * on Xen. Otherwise, dev_queue_xmit() will try to do this
112 * when we send the packet out on the wire, and it will fail at
113 * that point because skb_checksum_setup() will not look inside
114 * an 802.1Q header. */
b2f460c7 115 vswitch_skb_checksum_setup(skb);
064af421
BP
116
117 /* GSO is not implemented for packets with an 802.1Q header, so
118 * we have to do segmentation before we add that header.
119 *
120 * GSO does work with hardware-accelerated VLAN tagging, but we
121 * can't use hardware-accelerated VLAN tagging since it
122 * requires the device to have a VLAN group configured (with
123 * e.g. vconfig(8)) and we don't do that.
124 *
125 * Having to do this here may be a performance loss, since we
126 * can't take advantage of TSO hardware support, although it
127 * does not make a measurable network performance difference
128 * for 1G Ethernet. Fixing that would require patching the
129 * kernel (either to add GSO support to the VLAN protocol or to
130 * support hardware-accelerated VLAN tagging without VLAN
131 * groups configured). */
132 if (skb_is_gso(skb)) {
133 struct sk_buff *segs;
134
135 segs = skb_gso_segment(skb, 0);
136 kfree_skb(skb);
137 if (unlikely(IS_ERR(segs)))
138 return ERR_CAST(segs);
139
140 do {
141 struct sk_buff *nskb = segs->next;
142 int err;
143
144 segs->next = NULL;
145
146 segs = __vlan_put_tag(segs, tci);
147 err = -ENOMEM;
148 if (segs) {
149 struct odp_flow_key segkey = *key;
150 err = execute_actions(dp, segs,
151 &segkey, a + 1,
152 n_actions - 1,
153 gfp);
154 }
155
156 if (unlikely(err)) {
157 while ((segs = nskb)) {
158 nskb = segs->next;
159 segs->next = NULL;
160 kfree_skb(segs);
161 }
162 return ERR_PTR(err);
163 }
164
165 segs = nskb;
166 } while (segs->next);
167
168 skb = segs;
169 }
170
171 /* The hardware-accelerated version of vlan_put_tag() works
172 * only for a device that has a VLAN group configured (with
173 * e.g. vconfig(8)), so call the software-only version
174 * __vlan_put_tag() directly instead.
175 */
176 skb = __vlan_put_tag(skb, tci);
177 if (!skb)
178 return ERR_PTR(-ENOMEM);
179 }
180
181 return skb;
182}
183
184static struct sk_buff *strip_vlan(struct sk_buff *skb,
185 struct odp_flow_key *key, gfp_t gfp)
186{
0cd8a05e 187 skb = make_writable(skb, 0, gfp);
064af421
BP
188 if (skb) {
189 vlan_pull_tag(skb);
190 key->dl_vlan = htons(ODP_VLAN_NONE);
191 }
192 return skb;
193}
194
195static struct sk_buff *set_dl_addr(struct sk_buff *skb,
196 const struct odp_action_dl_addr *a,
197 gfp_t gfp)
198{
0cd8a05e 199 skb = make_writable(skb, 0, gfp);
064af421
BP
200 if (skb) {
201 struct ethhdr *eh = eth_hdr(skb);
202 memcpy(a->type == ODPAT_SET_DL_SRC ? eh->h_source : eh->h_dest,
203 a->dl_addr, ETH_ALEN);
204 }
205 return skb;
206}
207
208/* Updates 'sum', which is a field in 'skb''s data, given that a 4-byte field
209 * covered by the sum has been changed from 'from' to 'to'. If set,
210 * 'pseudohdr' indicates that the field is in the TCP or UDP pseudo-header.
211 * Based on nf_proto_csum_replace4. */
212static void update_csum(__sum16 *sum, struct sk_buff *skb,
213 __be32 from, __be32 to, int pseudohdr)
214{
215 __be32 diff[] = { ~from, to };
216 if (skb->ip_summed != CHECKSUM_PARTIAL) {
217 *sum = csum_fold(csum_partial((char *)diff, sizeof(diff),
218 ~csum_unfold(*sum)));
219 if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
220 skb->csum = ~csum_partial((char *)diff, sizeof(diff),
221 ~skb->csum);
222 } else if (pseudohdr)
223 *sum = ~csum_fold(csum_partial((char *)diff, sizeof(diff),
224 csum_unfold(*sum)));
225}
226
227static struct sk_buff *set_nw_addr(struct sk_buff *skb,
228 struct odp_flow_key *key,
229 const struct odp_action_nw_addr *a,
230 gfp_t gfp)
231{
232 if (key->dl_type != htons(ETH_P_IP))
233 return skb;
234
0cd8a05e 235 skb = make_writable(skb, 0, gfp);
064af421
BP
236 if (skb) {
237 struct iphdr *nh = ip_hdr(skb);
238 u32 *f = a->type == ODPAT_SET_NW_SRC ? &nh->saddr : &nh->daddr;
239 u32 old = *f;
240 u32 new = a->nw_addr;
241
242 if (key->nw_proto == IPPROTO_TCP) {
243 struct tcphdr *th = tcp_hdr(skb);
244 update_csum(&th->check, skb, old, new, 1);
245 } else if (key->nw_proto == IPPROTO_UDP) {
246 struct udphdr *th = udp_hdr(skb);
247 update_csum(&th->check, skb, old, new, 1);
248 }
249 update_csum(&nh->check, skb, old, new, 0);
250 *f = new;
251 }
252 return skb;
253}
254
255static struct sk_buff *
256set_tp_port(struct sk_buff *skb, struct odp_flow_key *key,
257 const struct odp_action_tp_port *a,
258 gfp_t gfp)
259{
260 int check_ofs;
261
262 if (key->dl_type != htons(ETH_P_IP))
263 return skb;
264
265 if (key->nw_proto == IPPROTO_TCP)
266 check_ofs = offsetof(struct tcphdr, check);
267 else if (key->nw_proto == IPPROTO_UDP)
268 check_ofs = offsetof(struct udphdr, check);
269 else
270 return skb;
271
0cd8a05e 272 skb = make_writable(skb, 0, gfp);
064af421
BP
273 if (skb) {
274 struct udphdr *th = udp_hdr(skb);
275 u16 *f = a->type == ODPAT_SET_TP_SRC ? &th->source : &th->dest;
276 u16 old = *f;
277 u16 new = a->tp_port;
985224ac
JP
278 update_csum((u16*)(skb_transport_header(skb) + check_ofs),
279 skb, old, new, 1);
064af421
BP
280 *f = new;
281 }
282 return skb;
283}
284
285static inline unsigned packet_length(const struct sk_buff *skb)
286{
287 unsigned length = skb->len - ETH_HLEN;
288 if (skb->protocol == htons(ETH_P_8021Q))
289 length -= VLAN_HLEN;
290 return length;
291}
292
293int dp_xmit_skb(struct sk_buff *skb)
294{
295 struct datapath *dp = skb->dev->br_port->dp;
296 int len = skb->len;
297
298 if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) {
299 printk(KERN_WARNING "%s: dropped over-mtu packet: %d > %d\n",
300 dp_name(dp), packet_length(skb), skb->dev->mtu);
301 kfree_skb(skb);
302 return -E2BIG;
303 }
304
305 dev_queue_xmit(skb);
306
307 return len;
308}
309
310static void
311do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
312{
313 struct net_bridge_port *p;
314 struct net_device *dev;
315
316 if (!skb)
317 goto error;
318
319 p = dp->ports[out_port];
320 if (!p)
321 goto error;
322
323 dev = skb->dev = p->dev;
324 if (is_dp_dev(dev))
325 dp_dev_recv(dev, skb);
de3f65ea 326 else
064af421
BP
327 dp_xmit_skb(skb);
328 return;
329
330error:
331 kfree_skb(skb);
332}
333
334/* Never consumes 'skb'. Returns a port that 'skb' should be sent to, -1 if
335 * none. */
336static int output_group(struct datapath *dp, __u16 group,
337 struct sk_buff *skb, gfp_t gfp)
338{
339 struct dp_port_group *g = rcu_dereference(dp->groups[group]);
340 int prev_port = -1;
341 int i;
342
343 if (!g)
344 return -1;
345 for (i = 0; i < g->n_ports; i++) {
346 struct net_bridge_port *p = dp->ports[g->ports[i]];
347 if (!p || skb->dev == p->dev)
348 continue;
349 if (prev_port != -1) {
350 struct sk_buff *clone = skb_clone(skb, gfp);
351 if (!clone)
352 return -1;
353 do_output(dp, clone, prev_port);
354 }
355 prev_port = p->port_no;
356 }
357 return prev_port;
358}
359
360static int
361output_control(struct datapath *dp, struct sk_buff *skb, u32 arg, gfp_t gfp)
362{
363 skb = skb_clone(skb, gfp);
364 if (!skb)
365 return -ENOMEM;
366 return dp_output_control(dp, skb, _ODPL_ACTION_NR, arg);
367}
368
369/* Execute a list of actions against 'skb'. */
370int execute_actions(struct datapath *dp, struct sk_buff *skb,
371 struct odp_flow_key *key,
372 const union odp_action *a, int n_actions,
373 gfp_t gfp)
374{
375 /* Every output action needs a separate clone of 'skb', but the common
376 * case is just a single output action, so that doing a clone and
377 * then freeing the original skbuff is wasteful. So the following code
378 * is slightly obscure just to avoid that. */
379 int prev_port = -1;
a5225dd6 380 int err;
064af421
BP
381 for (; n_actions > 0; a++, n_actions--) {
382 WARN_ON_ONCE(skb_shared(skb));
383 if (prev_port != -1) {
384 do_output(dp, skb_clone(skb, gfp), prev_port);
385 prev_port = -1;
386 }
387
388 switch (a->type) {
389 case ODPAT_OUTPUT:
390 prev_port = a->output.port;
391 break;
392
393 case ODPAT_OUTPUT_GROUP:
394 prev_port = output_group(dp, a->output_group.group,
395 skb, gfp);
396 break;
397
398 case ODPAT_CONTROLLER:
399 err = output_control(dp, skb, a->controller.arg, gfp);
400 if (err) {
401 kfree_skb(skb);
402 return err;
403 }
404 break;
405
406 case ODPAT_SET_VLAN_VID:
407 case ODPAT_SET_VLAN_PCP:
408 skb = modify_vlan_tci(dp, skb, key, a, n_actions, gfp);
409 if (IS_ERR(skb))
410 return PTR_ERR(skb);
411 break;
412
413 case ODPAT_STRIP_VLAN:
414 skb = strip_vlan(skb, key, gfp);
415 break;
416
417 case ODPAT_SET_DL_SRC:
418 case ODPAT_SET_DL_DST:
419 skb = set_dl_addr(skb, &a->dl_addr, gfp);
420 break;
421
422 case ODPAT_SET_NW_SRC:
423 case ODPAT_SET_NW_DST:
424 skb = set_nw_addr(skb, key, &a->nw_addr, gfp);
425 break;
426
427 case ODPAT_SET_TP_SRC:
428 case ODPAT_SET_TP_DST:
429 skb = set_tp_port(skb, key, &a->tp_port, gfp);
430 break;
431 }
432 if (!skb)
433 return -ENOMEM;
434 }
435 if (prev_port != -1)
436 do_output(dp, skb, prev_port);
437 else
438 kfree_skb(skb);
a5225dd6 439 return 0;
064af421 440}