]>
Commit | Line | Data |
---|---|---|
064af421 BP |
1 | /* |
2 | * Distributed under the terms of the GNU GPL version 2. | |
982b8810 | 3 | * Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira Networks. |
a14bc59f BP |
4 | * |
5 | * Significant portions of this file may be copied from parts of the Linux | |
6 | * kernel, by Linus Torvalds and others. | |
064af421 BP |
7 | */ |
8 | ||
9 | /* Functions for executing flow actions. */ | |
10 | ||
11 | #include <linux/skbuff.h> | |
12 | #include <linux/in.h> | |
13 | #include <linux/ip.h> | |
14 | #include <linux/tcp.h> | |
15 | #include <linux/udp.h> | |
16 | #include <linux/in6.h> | |
401eeb92 | 17 | #include <linux/if_arp.h> |
064af421 | 18 | #include <linux/if_vlan.h> |
f1193301 | 19 | #include <net/inet_ecn.h> |
064af421 BP |
20 | #include <net/ip.h> |
21 | #include <net/checksum.h> | |
f2459fe7 | 22 | |
064af421 | 23 | #include "actions.h" |
dd8d6b8c | 24 | #include "checksum.h" |
f2459fe7 | 25 | #include "datapath.h" |
a4af2475 | 26 | #include "loop_counter.h" |
064af421 | 27 | #include "openvswitch/datapath-protocol.h" |
6ce39213 | 28 | #include "vlan.h" |
f2459fe7 | 29 | #include "vport.h" |
064af421 | 30 | |
871dfe07 | 31 | static int do_execute_actions(struct datapath *, struct sk_buff *, |
a4af2475 | 32 | struct sw_flow_actions *acts); |
871dfe07 | 33 | |
10db8b20 | 34 | static int make_writable(struct sk_buff *skb, int write_len) |
064af421 | 35 | { |
10db8b20 JG |
36 | if (!skb_cloned(skb) || skb_clone_writable(skb, write_len)) |
37 | return 0; | |
0cd8a05e | 38 | |
10db8b20 | 39 | return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); |
064af421 BP |
40 | } |
41 | ||
10db8b20 | 42 | static int strip_vlan(struct sk_buff *skb) |
064af421 | 43 | { |
064af421 | 44 | struct ethhdr *eh; |
10db8b20 | 45 | int err; |
064af421 | 46 | |
6ce39213 JG |
47 | if (vlan_tx_tag_present(skb)) { |
48 | vlan_set_tci(skb, 0); | |
10db8b20 | 49 | return 0; |
6ce39213 JG |
50 | } |
51 | ||
1d0f14d4 | 52 | if (unlikely(skb->protocol != htons(ETH_P_8021Q) || |
6ce39213 | 53 | skb->len < VLAN_ETH_HLEN)) |
10db8b20 | 54 | return 0; |
064af421 | 55 | |
10db8b20 JG |
56 | err = make_writable(skb, VLAN_ETH_HLEN); |
57 | if (unlikely(err)) | |
58 | return err; | |
6ce39213 | 59 | |
dd8d6b8c | 60 | if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) |
635c9298 JG |
61 | skb->csum = csum_sub(skb->csum, csum_partial(skb->data |
62 | + ETH_HLEN, VLAN_HLEN, 0)); | |
63 | ||
6ce39213 | 64 | memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN); |
064af421 BP |
65 | |
66 | eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN); | |
67 | ||
68 | skb->protocol = eh->h_proto; | |
69 | skb->mac_header += VLAN_HLEN; | |
70 | ||
10db8b20 | 71 | return 0; |
064af421 BP |
72 | } |
73 | ||
10db8b20 | 74 | static int modify_vlan_tci(struct sk_buff *skb, __be16 tci) |
064af421 | 75 | { |
1d0f14d4 | 76 | if (!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_8021Q)) { |
10db8b20 JG |
77 | int err; |
78 | ||
1d0f14d4 | 79 | if (unlikely(skb->len < VLAN_ETH_HLEN)) |
10db8b20 | 80 | return 0; |
064af421 | 81 | |
10db8b20 JG |
82 | err = strip_vlan(skb); |
83 | if (unlikely(err)) | |
84 | return err; | |
064af421 BP |
85 | } |
86 | ||
10db8b20 JG |
87 | __vlan_hwaccel_put_tag(skb, ntohs(tci)); |
88 | ||
89 | return 0; | |
064af421 BP |
90 | } |
91 | ||
a4af2475 | 92 | static bool is_ip(struct sk_buff *skb) |
ca78c6b6 | 93 | { |
76abe283 | 94 | return (OVS_CB(skb)->flow->key.eth.type == htons(ETH_P_IP) && |
ca78c6b6 BP |
95 | skb->transport_header > skb->network_header); |
96 | } | |
97 | ||
a4af2475 | 98 | static __sum16 *get_l4_checksum(struct sk_buff *skb) |
ca78c6b6 | 99 | { |
28bad473 | 100 | u8 nw_proto = OVS_CB(skb)->flow->key.ip.proto; |
ca78c6b6 | 101 | int transport_len = skb->len - skb_transport_offset(skb); |
a4af2475 | 102 | if (nw_proto == IPPROTO_TCP) { |
ca78c6b6 BP |
103 | if (likely(transport_len >= sizeof(struct tcphdr))) |
104 | return &tcp_hdr(skb)->check; | |
a4af2475 | 105 | } else if (nw_proto == IPPROTO_UDP) { |
ca78c6b6 BP |
106 | if (likely(transport_len >= sizeof(struct udphdr))) |
107 | return &udp_hdr(skb)->check; | |
108 | } | |
109 | return NULL; | |
110 | } | |
111 | ||
10db8b20 | 112 | static int set_nw_addr(struct sk_buff *skb, const struct nlattr *a) |
064af421 | 113 | { |
cdee00fd | 114 | __be32 new_nwaddr = nla_get_be32(a); |
ca78c6b6 BP |
115 | struct iphdr *nh; |
116 | __sum16 *check; | |
117 | __be32 *nwaddr; | |
10db8b20 | 118 | int err; |
ca78c6b6 | 119 | |
a4af2475 | 120 | if (unlikely(!is_ip(skb))) |
10db8b20 | 121 | return 0; |
064af421 | 122 | |
10db8b20 JG |
123 | err = make_writable(skb, skb_network_offset(skb) + |
124 | sizeof(struct iphdr)); | |
125 | if (unlikely(err)) | |
126 | return err; | |
ca78c6b6 BP |
127 | |
128 | nh = ip_hdr(skb); | |
7aec165d | 129 | nwaddr = nla_type(a) == ODP_ACTION_ATTR_SET_NW_SRC ? &nh->saddr : &nh->daddr; |
ca78c6b6 | 130 | |
a4af2475 | 131 | check = get_l4_checksum(skb); |
ca78c6b6 | 132 | if (likely(check)) |
cdee00fd BP |
133 | inet_proto_csum_replace4(check, skb, *nwaddr, new_nwaddr, 1); |
134 | csum_replace4(&nh->check, *nwaddr, new_nwaddr); | |
ca78c6b6 | 135 | |
a4a26436 SH |
136 | skb_clear_rxhash(skb); |
137 | ||
cdee00fd | 138 | *nwaddr = new_nwaddr; |
ca78c6b6 | 139 | |
10db8b20 | 140 | return 0; |
064af421 BP |
141 | } |
142 | ||
10db8b20 | 143 | static int set_nw_tos(struct sk_buff *skb, u8 nw_tos) |
959a2ecd | 144 | { |
10db8b20 JG |
145 | struct iphdr *nh = ip_hdr(skb); |
146 | u8 old, new; | |
147 | int err; | |
148 | ||
a4af2475 | 149 | if (unlikely(!is_ip(skb))) |
10db8b20 JG |
150 | return 0; |
151 | ||
152 | err = make_writable(skb, skb_network_offset(skb) + | |
153 | sizeof(struct iphdr)); | |
154 | if (unlikely(err)) | |
155 | return err; | |
156 | ||
157 | /* Set the DSCP bits and preserve the ECN bits. */ | |
158 | old = nh->tos; | |
159 | new = nw_tos | (nh->tos & INET_ECN_MASK); | |
160 | csum_replace4(&nh->check, (__force __be32)old, | |
161 | (__force __be32)new); | |
162 | nh->tos = new; | |
163 | ||
164 | return 0; | |
959a2ecd JP |
165 | } |
166 | ||
10db8b20 | 167 | static int set_tp_port(struct sk_buff *skb, const struct nlattr *a) |
064af421 | 168 | { |
ca78c6b6 BP |
169 | struct udphdr *th; |
170 | __sum16 *check; | |
171 | __be16 *port; | |
10db8b20 | 172 | int err; |
064af421 | 173 | |
a4af2475 | 174 | if (unlikely(!is_ip(skb))) |
10db8b20 | 175 | return 0; |
064af421 | 176 | |
10db8b20 JG |
177 | err = make_writable(skb, skb_transport_offset(skb) + |
178 | sizeof(struct tcphdr)); | |
179 | if (unlikely(err)) | |
180 | return err; | |
ca78c6b6 BP |
181 | |
182 | /* Must follow make_writable() since that can move the skb data. */ | |
a4af2475 | 183 | check = get_l4_checksum(skb); |
ca78c6b6 | 184 | if (unlikely(!check)) |
10db8b20 | 185 | return 0; |
064af421 | 186 | |
ca78c6b6 BP |
187 | /* |
188 | * Update port and checksum. | |
189 | * | |
190 | * This is OK because source and destination port numbers are at the | |
191 | * same offsets in both UDP and TCP headers, and get_l4_checksum() only | |
192 | * supports those protocols. | |
193 | */ | |
194 | th = udp_hdr(skb); | |
7aec165d | 195 | port = nla_type(a) == ODP_ACTION_ATTR_SET_TP_SRC ? &th->source : &th->dest; |
cdee00fd BP |
196 | inet_proto_csum_replace2(check, skb, *port, nla_get_be16(a), 0); |
197 | *port = nla_get_be16(a); | |
a4a26436 | 198 | skb_clear_rxhash(skb); |
ca78c6b6 | 199 | |
10db8b20 | 200 | return 0; |
064af421 BP |
201 | } |
202 | ||
fceb2a5b | 203 | static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port) |
064af421 | 204 | { |
e779d8d9 | 205 | struct vport *p; |
064af421 BP |
206 | |
207 | if (!skb) | |
208 | goto error; | |
209 | ||
f2459fe7 | 210 | p = rcu_dereference(dp->ports[out_port]); |
064af421 BP |
211 | if (!p) |
212 | goto error; | |
213 | ||
e779d8d9 | 214 | vport_send(p, skb); |
064af421 BP |
215 | return; |
216 | ||
217 | error: | |
218 | kfree_skb(skb); | |
219 | } | |
220 | ||
b85d8d61 | 221 | static int output_userspace(struct datapath *dp, struct sk_buff *skb, u64 arg) |
064af421 | 222 | { |
856081f6 BP |
223 | struct dp_upcall_info upcall; |
224 | ||
7956695a | 225 | skb = skb_clone(skb, GFP_ATOMIC); |
064af421 BP |
226 | if (!skb) |
227 | return -ENOMEM; | |
856081f6 | 228 | |
982b8810 | 229 | upcall.cmd = ODP_PACKET_CMD_ACTION; |
a4af2475 | 230 | upcall.key = &OVS_CB(skb)->flow->key; |
856081f6 BP |
231 | upcall.userdata = arg; |
232 | upcall.sample_pool = 0; | |
233 | upcall.actions = NULL; | |
234 | upcall.actions_len = 0; | |
235 | return dp_upcall(dp, skb, &upcall); | |
064af421 BP |
236 | } |
237 | ||
238 | /* Execute a list of actions against 'skb'. */ | |
871dfe07 | 239 | static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, |
a4af2475 | 240 | struct sw_flow_actions *acts) |
064af421 BP |
241 | { |
242 | /* Every output action needs a separate clone of 'skb', but the common | |
243 | * case is just a single output action, so that doing a clone and | |
244 | * then freeing the original skbuff is wasteful. So the following code | |
245 | * is slightly obscure just to avoid that. */ | |
246 | int prev_port = -1; | |
c1c9c9c4 | 247 | u32 priority = skb->priority; |
cdee00fd | 248 | const struct nlattr *a; |
10db8b20 | 249 | int rem; |
72b06300 | 250 | |
a4af2475 BP |
251 | for (a = acts->actions, rem = acts->actions_len; rem > 0; |
252 | a = nla_next(a, &rem)) { | |
10db8b20 JG |
253 | int err = 0; |
254 | ||
064af421 | 255 | if (prev_port != -1) { |
7956695a | 256 | do_output(dp, skb_clone(skb, GFP_ATOMIC), prev_port); |
064af421 BP |
257 | prev_port = -1; |
258 | } | |
259 | ||
cdee00fd | 260 | switch (nla_type(a)) { |
7aec165d | 261 | case ODP_ACTION_ATTR_OUTPUT: |
cdee00fd | 262 | prev_port = nla_get_u32(a); |
064af421 BP |
263 | break; |
264 | ||
b85d8d61 PS |
265 | case ODP_ACTION_ATTR_USERSPACE: |
266 | err = output_userspace(dp, skb, nla_get_u64(a)); | |
064af421 BP |
267 | break; |
268 | ||
7aec165d | 269 | case ODP_ACTION_ATTR_SET_TUNNEL: |
b9298d3f | 270 | OVS_CB(skb)->tun_id = nla_get_be64(a); |
659586ef JG |
271 | break; |
272 | ||
7aec165d | 273 | case ODP_ACTION_ATTR_SET_DL_TCI: |
10db8b20 | 274 | err = modify_vlan_tci(skb, nla_get_be16(a)); |
064af421 BP |
275 | break; |
276 | ||
7aec165d | 277 | case ODP_ACTION_ATTR_STRIP_VLAN: |
10db8b20 | 278 | err = strip_vlan(skb); |
064af421 BP |
279 | break; |
280 | ||
7aec165d | 281 | case ODP_ACTION_ATTR_SET_DL_SRC: |
10db8b20 JG |
282 | err = make_writable(skb, ETH_HLEN); |
283 | if (likely(!err)) | |
284 | memcpy(eth_hdr(skb)->h_source, nla_data(a), ETH_ALEN); | |
cdee00fd BP |
285 | break; |
286 | ||
7aec165d | 287 | case ODP_ACTION_ATTR_SET_DL_DST: |
10db8b20 JG |
288 | err = make_writable(skb, ETH_HLEN); |
289 | if (likely(!err)) | |
290 | memcpy(eth_hdr(skb)->h_dest, nla_data(a), ETH_ALEN); | |
064af421 BP |
291 | break; |
292 | ||
7aec165d BP |
293 | case ODP_ACTION_ATTR_SET_NW_SRC: |
294 | case ODP_ACTION_ATTR_SET_NW_DST: | |
10db8b20 | 295 | err = set_nw_addr(skb, a); |
064af421 BP |
296 | break; |
297 | ||
7aec165d | 298 | case ODP_ACTION_ATTR_SET_NW_TOS: |
10db8b20 | 299 | err = set_nw_tos(skb, nla_get_u8(a)); |
959a2ecd JP |
300 | break; |
301 | ||
7aec165d BP |
302 | case ODP_ACTION_ATTR_SET_TP_SRC: |
303 | case ODP_ACTION_ATTR_SET_TP_DST: | |
10db8b20 | 304 | err = set_tp_port(skb, a); |
064af421 | 305 | break; |
c1c9c9c4 | 306 | |
7aec165d | 307 | case ODP_ACTION_ATTR_SET_PRIORITY: |
cdee00fd | 308 | skb->priority = nla_get_u32(a); |
c1c9c9c4 BP |
309 | break; |
310 | ||
7aec165d | 311 | case ODP_ACTION_ATTR_POP_PRIORITY: |
c1c9c9c4 BP |
312 | skb->priority = priority; |
313 | break; | |
064af421 | 314 | } |
10db8b20 JG |
315 | |
316 | if (unlikely(err)) { | |
317 | kfree_skb(skb); | |
318 | return err; | |
319 | } | |
064af421 | 320 | } |
6c222e55 | 321 | |
064af421 BP |
322 | if (prev_port != -1) |
323 | do_output(dp, skb, prev_port); | |
324 | else | |
5b95ab0e | 325 | consume_skb(skb); |
10db8b20 | 326 | |
a5225dd6 | 327 | return 0; |
064af421 | 328 | } |
871dfe07 | 329 | |
871dfe07 | 330 | static void sflow_sample(struct datapath *dp, struct sk_buff *skb, |
a4af2475 | 331 | struct sw_flow_actions *acts) |
871dfe07 | 332 | { |
871dfe07 | 333 | struct sk_buff *nskb; |
856081f6 BP |
334 | struct vport *p = OVS_CB(skb)->vport; |
335 | struct dp_upcall_info upcall; | |
336 | ||
337 | if (unlikely(!p)) | |
338 | return; | |
871dfe07 | 339 | |
856081f6 BP |
340 | atomic_inc(&p->sflow_pool); |
341 | if (net_random() >= dp->sflow_probability) | |
871dfe07 BP |
342 | return; |
343 | ||
856081f6 BP |
344 | nskb = skb_clone(skb, GFP_ATOMIC); |
345 | if (unlikely(!nskb)) | |
346 | return; | |
347 | ||
982b8810 | 348 | upcall.cmd = ODP_PACKET_CMD_SAMPLE; |
a4af2475 | 349 | upcall.key = &OVS_CB(skb)->flow->key; |
856081f6 BP |
350 | upcall.userdata = 0; |
351 | upcall.sample_pool = atomic_read(&p->sflow_pool); | |
a4af2475 BP |
352 | upcall.actions = acts->actions; |
353 | upcall.actions_len = acts->actions_len; | |
856081f6 | 354 | dp_upcall(dp, nskb, &upcall); |
871dfe07 BP |
355 | } |
356 | ||
357 | /* Execute a list of actions against 'skb'. */ | |
a4af2475 | 358 | int execute_actions(struct datapath *dp, struct sk_buff *skb) |
871dfe07 | 359 | { |
a4af2475 BP |
360 | struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts); |
361 | struct loop_counter *loop; | |
362 | int error; | |
363 | ||
364 | /* Check whether we've looped too much. */ | |
365 | loop = loop_get_counter(); | |
366 | if (unlikely(++loop->count > MAX_LOOPS)) | |
367 | loop->looping = true; | |
368 | if (unlikely(loop->looping)) { | |
369 | error = loop_suppress(dp, acts); | |
370 | kfree_skb(skb); | |
371 | goto out_loop; | |
372 | } | |
871dfe07 | 373 | |
a4af2475 BP |
374 | /* Really execute actions. */ |
375 | if (dp->sflow_probability) | |
376 | sflow_sample(dp, skb, acts); | |
871dfe07 | 377 | OVS_CB(skb)->tun_id = 0; |
a4af2475 BP |
378 | error = do_execute_actions(dp, skb, acts); |
379 | ||
380 | /* Check whether sub-actions looped too much. */ | |
381 | if (unlikely(loop->looping)) | |
382 | error = loop_suppress(dp, acts); | |
383 | ||
384 | out_loop: | |
385 | /* Decrement loop counter. */ | |
386 | if (!--loop->count) | |
387 | loop->looping = false; | |
388 | loop_put_counter(); | |
871dfe07 | 389 | |
a4af2475 | 390 | return error; |
871dfe07 | 391 | } |