]>
Commit | Line | Data |
---|---|---|
064af421 BP |
1 | /* |
2 | * Distributed under the terms of the GNU GPL version 2. | |
982b8810 | 3 | * Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira Networks. |
a14bc59f BP |
4 | * |
5 | * Significant portions of this file may be copied from parts of the Linux | |
6 | * kernel, by Linus Torvalds and others. | |
064af421 BP |
7 | */ |
8 | ||
9 | /* Functions for executing flow actions. */ | |
10 | ||
11 | #include <linux/skbuff.h> | |
12 | #include <linux/in.h> | |
13 | #include <linux/ip.h> | |
14 | #include <linux/tcp.h> | |
15 | #include <linux/udp.h> | |
16 | #include <linux/in6.h> | |
401eeb92 | 17 | #include <linux/if_arp.h> |
064af421 | 18 | #include <linux/if_vlan.h> |
f1193301 | 19 | #include <net/inet_ecn.h> |
064af421 BP |
20 | #include <net/ip.h> |
21 | #include <net/checksum.h> | |
f2459fe7 | 22 | |
064af421 | 23 | #include "actions.h" |
dd8d6b8c | 24 | #include "checksum.h" |
f2459fe7 | 25 | #include "datapath.h" |
064af421 | 26 | #include "openvswitch/datapath-protocol.h" |
6ce39213 | 27 | #include "vlan.h" |
f2459fe7 | 28 | #include "vport.h" |
064af421 | 29 | |
871dfe07 | 30 | static int do_execute_actions(struct datapath *, struct sk_buff *, |
36956a7d | 31 | const struct sw_flow_key *, |
871dfe07 BP |
32 | const struct nlattr *actions, u32 actions_len); |
33 | ||
7956695a | 34 | static struct sk_buff *make_writable(struct sk_buff *skb, unsigned min_headroom) |
064af421 | 35 | { |
67c74f75 | 36 | if (skb_cloned(skb)) { |
0cd8a05e JG |
37 | struct sk_buff *nskb; |
38 | unsigned headroom = max(min_headroom, skb_headroom(skb)); | |
39 | ||
7956695a | 40 | nskb = skb_copy_expand(skb, headroom, skb_tailroom(skb), GFP_ATOMIC); |
064af421 | 41 | if (nskb) { |
ff6402a9 | 42 | set_skb_csum_bits(skb, nskb); |
064af421 BP |
43 | kfree_skb(skb); |
44 | return nskb; | |
45 | } | |
46 | } else { | |
47 | unsigned int hdr_len = (skb_transport_offset(skb) | |
48 | + sizeof(struct tcphdr)); | |
49 | if (pskb_may_pull(skb, min(hdr_len, skb->len))) | |
50 | return skb; | |
51 | } | |
52 | kfree_skb(skb); | |
53 | return NULL; | |
54 | } | |
55 | ||
6ce39213 | 56 | static struct sk_buff *strip_vlan(struct sk_buff *skb) |
064af421 | 57 | { |
064af421 BP |
58 | struct ethhdr *eh; |
59 | ||
6ce39213 JG |
60 | if (vlan_tx_tag_present(skb)) { |
61 | vlan_set_tci(skb, 0); | |
62 | return skb; | |
63 | } | |
64 | ||
65 | if (unlikely(vlan_eth_hdr(skb)->h_vlan_proto != htons(ETH_P_8021Q) || | |
66 | skb->len < VLAN_ETH_HLEN)) | |
064af421 BP |
67 | return skb; |
68 | ||
6ce39213 JG |
69 | skb = make_writable(skb, 0); |
70 | if (unlikely(!skb)) | |
71 | return NULL; | |
72 | ||
dd8d6b8c | 73 | if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) |
635c9298 JG |
74 | skb->csum = csum_sub(skb->csum, csum_partial(skb->data |
75 | + ETH_HLEN, VLAN_HLEN, 0)); | |
76 | ||
6ce39213 | 77 | memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN); |
064af421 BP |
78 | |
79 | eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN); | |
80 | ||
81 | skb->protocol = eh->h_proto; | |
82 | skb->mac_header += VLAN_HLEN; | |
83 | ||
84 | return skb; | |
85 | } | |
86 | ||
09842539 | 87 | static struct sk_buff *modify_vlan_tci(struct sk_buff *skb, __be16 tci) |
064af421 | 88 | { |
6ce39213 JG |
89 | struct vlan_ethhdr *vh; |
90 | __be16 old_tci; | |
064af421 | 91 | |
6ce39213 JG |
92 | if (vlan_tx_tag_present(skb) || skb->protocol != htons(ETH_P_8021Q)) |
93 | return __vlan_hwaccel_put_tag(skb, ntohs(tci)); | |
ca78c6b6 | 94 | |
6ce39213 JG |
95 | skb = make_writable(skb, 0); |
96 | if (unlikely(!skb)) | |
97 | return NULL; | |
b28c72ba | 98 | |
6ce39213 JG |
99 | if (unlikely(skb->len < VLAN_ETH_HLEN)) |
100 | return skb; | |
064af421 | 101 | |
6ce39213 | 102 | vh = vlan_eth_hdr(skb); |
064af421 | 103 | |
6ce39213 JG |
104 | old_tci = vh->h_vlan_TCI; |
105 | vh->h_vlan_TCI = tci; | |
064af421 | 106 | |
6ce39213 JG |
107 | if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) { |
108 | __be16 diff[] = { ~old_tci, vh->h_vlan_TCI }; | |
109 | skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); | |
064af421 BP |
110 | } |
111 | ||
112 | return skb; | |
113 | } | |
114 | ||
36956a7d | 115 | static bool is_ip(struct sk_buff *skb, const struct sw_flow_key *key) |
ca78c6b6 BP |
116 | { |
117 | return (key->dl_type == htons(ETH_P_IP) && | |
118 | skb->transport_header > skb->network_header); | |
119 | } | |
120 | ||
36956a7d | 121 | static __sum16 *get_l4_checksum(struct sk_buff *skb, const struct sw_flow_key *key) |
ca78c6b6 BP |
122 | { |
123 | int transport_len = skb->len - skb_transport_offset(skb); | |
124 | if (key->nw_proto == IPPROTO_TCP) { | |
125 | if (likely(transport_len >= sizeof(struct tcphdr))) | |
126 | return &tcp_hdr(skb)->check; | |
127 | } else if (key->nw_proto == IPPROTO_UDP) { | |
128 | if (likely(transport_len >= sizeof(struct udphdr))) | |
129 | return &udp_hdr(skb)->check; | |
130 | } | |
131 | return NULL; | |
132 | } | |
133 | ||
064af421 | 134 | static struct sk_buff *set_nw_addr(struct sk_buff *skb, |
36956a7d | 135 | const struct sw_flow_key *key, |
cdee00fd | 136 | const struct nlattr *a) |
064af421 | 137 | { |
cdee00fd | 138 | __be32 new_nwaddr = nla_get_be32(a); |
ca78c6b6 BP |
139 | struct iphdr *nh; |
140 | __sum16 *check; | |
141 | __be32 *nwaddr; | |
142 | ||
143 | if (unlikely(!is_ip(skb, key))) | |
064af421 BP |
144 | return skb; |
145 | ||
7956695a | 146 | skb = make_writable(skb, 0); |
ca78c6b6 BP |
147 | if (unlikely(!skb)) |
148 | return NULL; | |
149 | ||
150 | nh = ip_hdr(skb); | |
7aec165d | 151 | nwaddr = nla_type(a) == ODP_ACTION_ATTR_SET_NW_SRC ? &nh->saddr : &nh->daddr; |
ca78c6b6 BP |
152 | |
153 | check = get_l4_checksum(skb, key); | |
154 | if (likely(check)) | |
cdee00fd BP |
155 | inet_proto_csum_replace4(check, skb, *nwaddr, new_nwaddr, 1); |
156 | csum_replace4(&nh->check, *nwaddr, new_nwaddr); | |
ca78c6b6 | 157 | |
a4a26436 SH |
158 | skb_clear_rxhash(skb); |
159 | ||
cdee00fd | 160 | *nwaddr = new_nwaddr; |
ca78c6b6 | 161 | |
064af421 BP |
162 | return skb; |
163 | } | |
164 | ||
959a2ecd | 165 | static struct sk_buff *set_nw_tos(struct sk_buff *skb, |
36956a7d | 166 | const struct sw_flow_key *key, |
cdee00fd | 167 | u8 nw_tos) |
959a2ecd | 168 | { |
ca78c6b6 | 169 | if (unlikely(!is_ip(skb, key))) |
959a2ecd JP |
170 | return skb; |
171 | ||
7956695a | 172 | skb = make_writable(skb, 0); |
959a2ecd JP |
173 | if (skb) { |
174 | struct iphdr *nh = ip_hdr(skb); | |
175 | u8 *f = &nh->tos; | |
176 | u8 old = *f; | |
f1193301 | 177 | u8 new; |
959a2ecd | 178 | |
f1193301 | 179 | /* Set the DSCP bits and preserve the ECN bits. */ |
cdee00fd | 180 | new = nw_tos | (nh->tos & INET_ECN_MASK); |
8b69563f JG |
181 | csum_replace4(&nh->check, (__force __be32)old, |
182 | (__force __be32)new); | |
959a2ecd JP |
183 | *f = new; |
184 | } | |
185 | return skb; | |
186 | } | |
187 | ||
aebdcb93 | 188 | static struct sk_buff *set_tp_port(struct sk_buff *skb, |
36956a7d | 189 | const struct sw_flow_key *key, |
cdee00fd | 190 | const struct nlattr *a) |
064af421 | 191 | { |
ca78c6b6 BP |
192 | struct udphdr *th; |
193 | __sum16 *check; | |
194 | __be16 *port; | |
064af421 | 195 | |
ca78c6b6 | 196 | if (unlikely(!is_ip(skb, key))) |
064af421 BP |
197 | return skb; |
198 | ||
7956695a | 199 | skb = make_writable(skb, 0); |
ca78c6b6 BP |
200 | if (unlikely(!skb)) |
201 | return NULL; | |
202 | ||
203 | /* Must follow make_writable() since that can move the skb data. */ | |
204 | check = get_l4_checksum(skb, key); | |
205 | if (unlikely(!check)) | |
064af421 BP |
206 | return skb; |
207 | ||
ca78c6b6 BP |
208 | /* |
209 | * Update port and checksum. | |
210 | * | |
211 | * This is OK because source and destination port numbers are at the | |
212 | * same offsets in both UDP and TCP headers, and get_l4_checksum() only | |
213 | * supports those protocols. | |
214 | */ | |
215 | th = udp_hdr(skb); | |
7aec165d | 216 | port = nla_type(a) == ODP_ACTION_ATTR_SET_TP_SRC ? &th->source : &th->dest; |
cdee00fd BP |
217 | inet_proto_csum_replace2(check, skb, *port, nla_get_be16(a), 0); |
218 | *port = nla_get_be16(a); | |
a4a26436 | 219 | skb_clear_rxhash(skb); |
ca78c6b6 | 220 | |
064af421 BP |
221 | return skb; |
222 | } | |
223 | ||
401eeb92 BP |
224 | /** |
225 | * is_spoofed_arp - check for invalid ARP packet | |
226 | * | |
227 | * @skb: skbuff containing an Ethernet packet, with network header pointing | |
228 | * just past the Ethernet and optional 802.1Q header. | |
229 | * @key: flow key extracted from @skb by flow_extract() | |
230 | * | |
231 | * Returns true if @skb is an invalid Ethernet+IPv4 ARP packet: one with screwy | |
232 | * or truncated header fields or one whose inner and outer Ethernet address | |
233 | * differ. | |
234 | */ | |
36956a7d | 235 | static bool is_spoofed_arp(struct sk_buff *skb, const struct sw_flow_key *key) |
401eeb92 BP |
236 | { |
237 | struct arp_eth_header *arp; | |
238 | ||
239 | if (key->dl_type != htons(ETH_P_ARP)) | |
240 | return false; | |
241 | ||
242 | if (skb_network_offset(skb) + sizeof(struct arp_eth_header) > skb->len) | |
243 | return true; | |
244 | ||
245 | arp = (struct arp_eth_header *)skb_network_header(skb); | |
246 | return (arp->ar_hrd != htons(ARPHRD_ETHER) || | |
247 | arp->ar_pro != htons(ETH_P_IP) || | |
248 | arp->ar_hln != ETH_ALEN || | |
249 | arp->ar_pln != 4 || | |
250 | compare_ether_addr(arp->ar_sha, eth_hdr(skb)->h_source)); | |
251 | } | |
252 | ||
fceb2a5b | 253 | static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port) |
064af421 | 254 | { |
e779d8d9 | 255 | struct vport *p; |
064af421 BP |
256 | |
257 | if (!skb) | |
258 | goto error; | |
259 | ||
f2459fe7 | 260 | p = rcu_dereference(dp->ports[out_port]); |
064af421 BP |
261 | if (!p) |
262 | goto error; | |
263 | ||
e779d8d9 | 264 | vport_send(p, skb); |
064af421 BP |
265 | return; |
266 | ||
267 | error: | |
268 | kfree_skb(skb); | |
269 | } | |
270 | ||
856081f6 BP |
271 | static int output_control(struct datapath *dp, struct sk_buff *skb, u64 arg, |
272 | const struct sw_flow_key *key) | |
064af421 | 273 | { |
856081f6 BP |
274 | struct dp_upcall_info upcall; |
275 | ||
7956695a | 276 | skb = skb_clone(skb, GFP_ATOMIC); |
064af421 BP |
277 | if (!skb) |
278 | return -ENOMEM; | |
856081f6 | 279 | |
982b8810 | 280 | upcall.cmd = ODP_PACKET_CMD_ACTION; |
856081f6 BP |
281 | upcall.key = key; |
282 | upcall.userdata = arg; | |
283 | upcall.sample_pool = 0; | |
284 | upcall.actions = NULL; | |
285 | upcall.actions_len = 0; | |
286 | return dp_upcall(dp, skb, &upcall); | |
064af421 BP |
287 | } |
288 | ||
289 | /* Execute a list of actions against 'skb'. */ | |
871dfe07 | 290 | static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, |
36956a7d | 291 | const struct sw_flow_key *key, |
871dfe07 | 292 | const struct nlattr *actions, u32 actions_len) |
064af421 BP |
293 | { |
294 | /* Every output action needs a separate clone of 'skb', but the common | |
295 | * case is just a single output action, so that doing a clone and | |
296 | * then freeing the original skbuff is wasteful. So the following code | |
297 | * is slightly obscure just to avoid that. */ | |
298 | int prev_port = -1; | |
c1c9c9c4 | 299 | u32 priority = skb->priority; |
cdee00fd BP |
300 | const struct nlattr *a; |
301 | int rem, err; | |
72b06300 | 302 | |
cdee00fd | 303 | for (a = actions, rem = actions_len; rem > 0; a = nla_next(a, &rem)) { |
064af421 | 304 | if (prev_port != -1) { |
7956695a | 305 | do_output(dp, skb_clone(skb, GFP_ATOMIC), prev_port); |
064af421 BP |
306 | prev_port = -1; |
307 | } | |
308 | ||
cdee00fd | 309 | switch (nla_type(a)) { |
7aec165d | 310 | case ODP_ACTION_ATTR_OUTPUT: |
cdee00fd | 311 | prev_port = nla_get_u32(a); |
064af421 BP |
312 | break; |
313 | ||
7aec165d | 314 | case ODP_ACTION_ATTR_CONTROLLER: |
856081f6 | 315 | err = output_control(dp, skb, nla_get_u64(a), key); |
064af421 BP |
316 | if (err) { |
317 | kfree_skb(skb); | |
318 | return err; | |
319 | } | |
320 | break; | |
321 | ||
7aec165d | 322 | case ODP_ACTION_ATTR_SET_TUNNEL: |
b9298d3f | 323 | OVS_CB(skb)->tun_id = nla_get_be64(a); |
659586ef JG |
324 | break; |
325 | ||
7aec165d | 326 | case ODP_ACTION_ATTR_SET_DL_TCI: |
09842539 | 327 | skb = modify_vlan_tci(skb, nla_get_be16(a)); |
064af421 BP |
328 | break; |
329 | ||
7aec165d | 330 | case ODP_ACTION_ATTR_STRIP_VLAN: |
7956695a | 331 | skb = strip_vlan(skb); |
064af421 BP |
332 | break; |
333 | ||
7aec165d | 334 | case ODP_ACTION_ATTR_SET_DL_SRC: |
cdee00fd BP |
335 | skb = make_writable(skb, 0); |
336 | if (!skb) | |
337 | return -ENOMEM; | |
338 | memcpy(eth_hdr(skb)->h_source, nla_data(a), ETH_ALEN); | |
339 | break; | |
340 | ||
7aec165d | 341 | case ODP_ACTION_ATTR_SET_DL_DST: |
cdee00fd BP |
342 | skb = make_writable(skb, 0); |
343 | if (!skb) | |
344 | return -ENOMEM; | |
345 | memcpy(eth_hdr(skb)->h_dest, nla_data(a), ETH_ALEN); | |
064af421 BP |
346 | break; |
347 | ||
7aec165d BP |
348 | case ODP_ACTION_ATTR_SET_NW_SRC: |
349 | case ODP_ACTION_ATTR_SET_NW_DST: | |
cdee00fd | 350 | skb = set_nw_addr(skb, key, a); |
064af421 BP |
351 | break; |
352 | ||
7aec165d | 353 | case ODP_ACTION_ATTR_SET_NW_TOS: |
cdee00fd | 354 | skb = set_nw_tos(skb, key, nla_get_u8(a)); |
959a2ecd JP |
355 | break; |
356 | ||
7aec165d BP |
357 | case ODP_ACTION_ATTR_SET_TP_SRC: |
358 | case ODP_ACTION_ATTR_SET_TP_DST: | |
cdee00fd | 359 | skb = set_tp_port(skb, key, a); |
064af421 | 360 | break; |
c1c9c9c4 | 361 | |
7aec165d | 362 | case ODP_ACTION_ATTR_SET_PRIORITY: |
cdee00fd | 363 | skb->priority = nla_get_u32(a); |
c1c9c9c4 BP |
364 | break; |
365 | ||
7aec165d | 366 | case ODP_ACTION_ATTR_POP_PRIORITY: |
c1c9c9c4 BP |
367 | skb->priority = priority; |
368 | break; | |
401eeb92 | 369 | |
7aec165d | 370 | case ODP_ACTION_ATTR_DROP_SPOOFED_ARP: |
401eeb92 BP |
371 | if (unlikely(is_spoofed_arp(skb, key))) |
372 | goto exit; | |
373 | break; | |
064af421 BP |
374 | } |
375 | if (!skb) | |
376 | return -ENOMEM; | |
377 | } | |
401eeb92 | 378 | exit: |
064af421 BP |
379 | if (prev_port != -1) |
380 | do_output(dp, skb, prev_port); | |
381 | else | |
382 | kfree_skb(skb); | |
a5225dd6 | 383 | return 0; |
064af421 | 384 | } |
871dfe07 | 385 | |
871dfe07 | 386 | static void sflow_sample(struct datapath *dp, struct sk_buff *skb, |
856081f6 BP |
387 | const struct sw_flow_key *key, |
388 | const struct nlattr *a, u32 actions_len) | |
871dfe07 | 389 | { |
871dfe07 | 390 | struct sk_buff *nskb; |
856081f6 BP |
391 | struct vport *p = OVS_CB(skb)->vport; |
392 | struct dp_upcall_info upcall; | |
393 | ||
394 | if (unlikely(!p)) | |
395 | return; | |
871dfe07 | 396 | |
856081f6 BP |
397 | atomic_inc(&p->sflow_pool); |
398 | if (net_random() >= dp->sflow_probability) | |
871dfe07 BP |
399 | return; |
400 | ||
856081f6 BP |
401 | nskb = skb_clone(skb, GFP_ATOMIC); |
402 | if (unlikely(!nskb)) | |
403 | return; | |
404 | ||
982b8810 | 405 | upcall.cmd = ODP_PACKET_CMD_SAMPLE; |
856081f6 BP |
406 | upcall.key = key; |
407 | upcall.userdata = 0; | |
408 | upcall.sample_pool = atomic_read(&p->sflow_pool); | |
409 | upcall.actions = a; | |
410 | upcall.actions_len = actions_len; | |
411 | dp_upcall(dp, nskb, &upcall); | |
871dfe07 BP |
412 | } |
413 | ||
414 | /* Execute a list of actions against 'skb'. */ | |
415 | int execute_actions(struct datapath *dp, struct sk_buff *skb, | |
36956a7d | 416 | const struct sw_flow_key *key, |
871dfe07 BP |
417 | const struct nlattr *actions, u32 actions_len) |
418 | { | |
856081f6 BP |
419 | if (dp->sflow_probability) |
420 | sflow_sample(dp, skb, key, actions, actions_len); | |
871dfe07 BP |
421 | |
422 | OVS_CB(skb)->tun_id = 0; | |
423 | ||
424 | return do_execute_actions(dp, skb, key, actions, actions_len); | |
425 | } |