]> git.proxmox.com Git - mirror_ovs.git/blob - datapath/actions.c
datapath/actions: Mark recalculate_csum as likely in set_ipv6_addr().
[mirror_ovs.git] / datapath / actions.c
1 /*
2 * Copyright (c) 2007-2014 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/skbuff.h>
22 #include <linux/in.h>
23 #include <linux/ip.h>
24 #include <linux/openvswitch.h>
25 #include <linux/sctp.h>
26 #include <linux/tcp.h>
27 #include <linux/udp.h>
28 #include <linux/in6.h>
29 #include <linux/if_arp.h>
30 #include <linux/if_vlan.h>
31 #include <net/ip.h>
32 #include <net/ipv6.h>
33 #include <net/checksum.h>
34 #include <net/dsfield.h>
35 #include <net/sctp/checksum.h>
36
37 #include "datapath.h"
38 #include "gso.h"
39 #include "mpls.h"
40 #include "vlan.h"
41 #include "vport.h"
42
43 static void flow_key_set_priority(struct sk_buff *skb, u32 priority)
44 {
45 OVS_CB(skb)->pkt_key->phy.priority = priority;
46 }
47
48 static void flow_key_set_skb_mark(struct sk_buff *skb, u32 skb_mark)
49 {
50 OVS_CB(skb)->pkt_key->phy.skb_mark = skb_mark;
51 }
52
53 static void flow_key_set_eth_src(struct sk_buff *skb, const u8 addr[])
54 {
55 ether_addr_copy(OVS_CB(skb)->pkt_key->eth.src, addr);
56 }
57
58 static void flow_key_set_eth_dst(struct sk_buff *skb, const u8 addr[])
59 {
60 ether_addr_copy(OVS_CB(skb)->pkt_key->eth.dst, addr);
61 }
62
63 static void flow_key_set_vlan_tci(struct sk_buff *skb, __be16 tci)
64 {
65 OVS_CB(skb)->pkt_key->eth.tci = tci;
66 }
67
68 static void flow_key_set_mpls_top_lse(struct sk_buff *skb, __be32 top_lse)
69 {
70 OVS_CB(skb)->pkt_key->mpls.top_lse = top_lse;
71 }
72
73 static void flow_key_set_ipv4_src(struct sk_buff *skb, __be32 addr)
74 {
75 OVS_CB(skb)->pkt_key->ipv4.addr.src = addr;
76 }
77
78 static void flow_key_set_ipv4_dst(struct sk_buff *skb, __be32 addr)
79 {
80 OVS_CB(skb)->pkt_key->ipv4.addr.src = addr;
81 }
82
83 static void flow_key_set_ip_tos(struct sk_buff *skb, u8 tos)
84 {
85 OVS_CB(skb)->pkt_key->ip.tos = tos;
86 }
87
88 static void flow_key_set_ip_ttl(struct sk_buff *skb, u8 ttl)
89 {
90 OVS_CB(skb)->pkt_key->ip.ttl = ttl;
91 }
92
93 static void flow_key_set_ipv6_src(struct sk_buff *skb,
94 const __be32 addr[4])
95 {
96 memcpy(&OVS_CB(skb)->pkt_key->ipv6.addr.src, addr, sizeof(__be32[4]));
97 }
98
99 static void flow_key_set_ipv6_dst(struct sk_buff *skb,
100 const __be32 addr[4])
101 {
102 memcpy(&OVS_CB(skb)->pkt_key->ipv6.addr.dst, addr, sizeof(__be32[4]));
103 }
104
105 static void flow_key_set_ipv6_fl(struct sk_buff *skb,
106 const struct ipv6hdr *nh)
107 {
108 OVS_CB(skb)->pkt_key->ipv6.label = *(__be32 *)nh &
109 htonl(IPV6_FLOWINFO_FLOWLABEL);
110 }
111
112 static void flow_key_set_tp_src(struct sk_buff *skb, __be16 port)
113 {
114 OVS_CB(skb)->pkt_key->tp.src = port;
115 }
116
117 static void flow_key_set_tp_dst(struct sk_buff *skb, __be16 port)
118 {
119 OVS_CB(skb)->pkt_key->tp.dst = port;
120 }
121
122 static void invalidate_skb_flow_key(struct sk_buff *skb)
123 {
124 OVS_CB(skb)->pkt_key->eth.type = htons(0);
125 }
126
127 static bool is_skb_flow_key_valid(struct sk_buff *skb)
128 {
129 return !!OVS_CB(skb)->pkt_key->eth.type;
130 }
131
132 static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
133 const struct nlattr *attr, int len);
134
135 static int make_writable(struct sk_buff *skb, int write_len)
136 {
137 if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
138 return 0;
139
140 return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
141 }
142
143 /* The end of the mac header.
144 *
145 * For non-MPLS skbs this will correspond to the network header.
146 * For MPLS skbs it will be before the network_header as the MPLS
147 * label stack lies between the end of the mac header and the network
148 * header. That is, for MPLS skbs the end of the mac header
149 * is the top of the MPLS label stack.
150 */
151 static unsigned char *mac_header_end(const struct sk_buff *skb)
152 {
153 return skb_mac_header(skb) + skb->mac_len;
154 }
155
156 static int push_mpls(struct sk_buff *skb,
157 const struct ovs_action_push_mpls *mpls)
158 {
159 __be32 *new_mpls_lse;
160 struct ethhdr *hdr;
161
162 if (skb_cow_head(skb, MPLS_HLEN) < 0)
163 return -ENOMEM;
164
165 skb_push(skb, MPLS_HLEN);
166 memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
167 skb->mac_len);
168 skb_reset_mac_header(skb);
169
170 new_mpls_lse = (__be32 *)mac_header_end(skb);
171 *new_mpls_lse = mpls->mpls_lse;
172
173 if (skb->ip_summed == CHECKSUM_COMPLETE)
174 skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
175 MPLS_HLEN, 0));
176
177 hdr = eth_hdr(skb);
178 hdr->h_proto = mpls->mpls_ethertype;
179 if (!ovs_skb_get_inner_protocol(skb))
180 ovs_skb_set_inner_protocol(skb, skb->protocol);
181 skb->protocol = mpls->mpls_ethertype;
182 invalidate_skb_flow_key(skb);
183 return 0;
184 }
185
186 static int pop_mpls(struct sk_buff *skb, const __be16 ethertype)
187 {
188 struct ethhdr *hdr;
189 int err;
190
191 err = make_writable(skb, skb->mac_len + MPLS_HLEN);
192 if (unlikely(err))
193 return err;
194
195 if (skb->ip_summed == CHECKSUM_COMPLETE)
196 skb->csum = csum_sub(skb->csum,
197 csum_partial(mac_header_end(skb),
198 MPLS_HLEN, 0));
199
200 memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
201 skb->mac_len);
202
203 __skb_pull(skb, MPLS_HLEN);
204 skb_reset_mac_header(skb);
205
206 /* mac_header_end() is used to locate the ethertype
207 * field correctly in the presence of VLAN tags.
208 */
209 hdr = (struct ethhdr *)(mac_header_end(skb) - ETH_HLEN);
210 hdr->h_proto = ethertype;
211 if (eth_p_mpls(skb->protocol))
212 skb->protocol = ethertype;
213 invalidate_skb_flow_key(skb);
214 return 0;
215 }
216
217 static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse)
218 {
219 __be32 *stack = (__be32 *)mac_header_end(skb);
220 int err;
221
222 err = make_writable(skb, skb->mac_len + MPLS_HLEN);
223 if (unlikely(err))
224 return err;
225
226 if (skb->ip_summed == CHECKSUM_COMPLETE) {
227 __be32 diff[] = { ~(*stack), *mpls_lse };
228 skb->csum = ~csum_partial((char *)diff, sizeof(diff),
229 ~skb->csum);
230 }
231
232 *stack = *mpls_lse;
233 flow_key_set_mpls_top_lse(skb, *stack);
234 return 0;
235 }
236
237 /* remove VLAN header from packet and update csum accordingly. */
238 static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
239 {
240 struct vlan_hdr *vhdr;
241 int err;
242
243 err = make_writable(skb, VLAN_ETH_HLEN);
244 if (unlikely(err))
245 return err;
246
247 if (skb->ip_summed == CHECKSUM_COMPLETE)
248 skb->csum = csum_sub(skb->csum, csum_partial(skb->data
249 + (2 * ETH_ALEN), VLAN_HLEN, 0));
250
251 vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
252 *current_tci = vhdr->h_vlan_TCI;
253
254 memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
255 __skb_pull(skb, VLAN_HLEN);
256
257 vlan_set_encap_proto(skb, vhdr);
258 skb->mac_header += VLAN_HLEN;
259 /* Update mac_len for subsequent MPLS actions */
260 skb->mac_len -= VLAN_HLEN;
261
262 return 0;
263 }
264
265 static int pop_vlan(struct sk_buff *skb)
266 {
267 __be16 tci;
268 int err;
269
270 if (likely(vlan_tx_tag_present(skb))) {
271 vlan_set_tci(skb, 0);
272 } else {
273 if (unlikely(skb->protocol != htons(ETH_P_8021Q) ||
274 skb->len < VLAN_ETH_HLEN))
275 return 0;
276
277 err = __pop_vlan_tci(skb, &tci);
278 if (err)
279 return err;
280 }
281 /* move next vlan tag to hw accel tag */
282 if (likely(skb->protocol != htons(ETH_P_8021Q) ||
283 skb->len < VLAN_ETH_HLEN)) {
284 flow_key_set_vlan_tci(skb, 0);
285 return 0;
286 }
287
288 invalidate_skb_flow_key(skb);
289 err = __pop_vlan_tci(skb, &tci);
290 if (unlikely(err))
291 return err;
292
293 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(tci));
294 return 0;
295 }
296
297 static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vlan)
298 {
299 if (unlikely(vlan_tx_tag_present(skb))) {
300 u16 current_tag;
301
302 /* push down current VLAN tag */
303 current_tag = vlan_tx_tag_get(skb);
304
305 if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
306 return -ENOMEM;
307
308 /* Update mac_len for subsequent MPLS actions */
309 skb->mac_len += VLAN_HLEN;
310
311 if (skb->ip_summed == CHECKSUM_COMPLETE)
312 skb->csum = csum_add(skb->csum, csum_partial(skb->data
313 + (2 * ETH_ALEN), VLAN_HLEN, 0));
314
315 invalidate_skb_flow_key(skb);
316 } else {
317 flow_key_set_vlan_tci(skb, vlan->vlan_tci);
318 }
319 __vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
320 return 0;
321 }
322
323 static int set_eth_addr(struct sk_buff *skb,
324 const struct ovs_key_ethernet *eth_key)
325 {
326 int err;
327 err = make_writable(skb, ETH_HLEN);
328 if (unlikely(err))
329 return err;
330
331 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
332
333 ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src);
334 ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst);
335
336 ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
337
338 flow_key_set_eth_src(skb, eth_key->eth_src);
339 flow_key_set_eth_dst(skb, eth_key->eth_dst);
340 return 0;
341 }
342
343 static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
344 __be32 *addr, __be32 new_addr)
345 {
346 int transport_len = skb->len - skb_transport_offset(skb);
347
348 if (nh->protocol == IPPROTO_TCP) {
349 if (likely(transport_len >= sizeof(struct tcphdr)))
350 inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
351 *addr, new_addr, 1);
352 } else if (nh->protocol == IPPROTO_UDP) {
353 if (likely(transport_len >= sizeof(struct udphdr))) {
354 struct udphdr *uh = udp_hdr(skb);
355
356 if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
357 inet_proto_csum_replace4(&uh->check, skb,
358 *addr, new_addr, 1);
359 if (!uh->check)
360 uh->check = CSUM_MANGLED_0;
361 }
362 }
363 }
364
365 csum_replace4(&nh->check, *addr, new_addr);
366 skb_clear_hash(skb);
367 *addr = new_addr;
368 }
369
370 static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
371 __be32 addr[4], const __be32 new_addr[4])
372 {
373 int transport_len = skb->len - skb_transport_offset(skb);
374
375 if (l4_proto == IPPROTO_TCP) {
376 if (likely(transport_len >= sizeof(struct tcphdr)))
377 inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
378 addr, new_addr, 1);
379 } else if (l4_proto == IPPROTO_UDP) {
380 if (likely(transport_len >= sizeof(struct udphdr))) {
381 struct udphdr *uh = udp_hdr(skb);
382
383 if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
384 inet_proto_csum_replace16(&uh->check, skb,
385 addr, new_addr, 1);
386 if (!uh->check)
387 uh->check = CSUM_MANGLED_0;
388 }
389 }
390 }
391 }
392
393 static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
394 __be32 addr[4], const __be32 new_addr[4],
395 bool recalculate_csum)
396 {
397 if (likely(recalculate_csum))
398 update_ipv6_checksum(skb, l4_proto, addr, new_addr);
399
400 skb_clear_hash(skb);
401 memcpy(addr, new_addr, sizeof(__be32[4]));
402 }
403
404 static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc)
405 {
406 nh->priority = tc >> 4;
407 nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4);
408 }
409
410 static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl)
411 {
412 nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16;
413 nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8;
414 nh->flow_lbl[2] = fl & 0x000000FF;
415 }
416
417 static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
418 {
419 csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
420 nh->ttl = new_ttl;
421 }
422
423 static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
424 {
425 struct iphdr *nh;
426 int err;
427
428 err = make_writable(skb, skb_network_offset(skb) +
429 sizeof(struct iphdr));
430 if (unlikely(err))
431 return err;
432
433 nh = ip_hdr(skb);
434
435 if (ipv4_key->ipv4_src != nh->saddr) {
436 set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src);
437 flow_key_set_ipv4_src(skb, ipv4_key->ipv4_src);
438 }
439
440 if (ipv4_key->ipv4_dst != nh->daddr) {
441 set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst);
442 flow_key_set_ipv4_dst(skb, ipv4_key->ipv4_dst);
443 }
444
445 if (ipv4_key->ipv4_tos != nh->tos) {
446 ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos);
447 flow_key_set_ip_tos(skb, nh->tos);
448 }
449
450 if (ipv4_key->ipv4_ttl != nh->ttl) {
451 set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl);
452 flow_key_set_ip_ttl(skb, ipv4_key->ipv4_ttl);
453 }
454
455 return 0;
456 }
457
458 static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
459 {
460 struct ipv6hdr *nh;
461 int err;
462 __be32 *saddr;
463 __be32 *daddr;
464
465 err = make_writable(skb, skb_network_offset(skb) +
466 sizeof(struct ipv6hdr));
467 if (unlikely(err))
468 return err;
469
470 nh = ipv6_hdr(skb);
471 saddr = (__be32 *)&nh->saddr;
472 daddr = (__be32 *)&nh->daddr;
473
474 if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) {
475 set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
476 ipv6_key->ipv6_src, true);
477 flow_key_set_ipv6_src(skb, ipv6_key->ipv6_src);
478 }
479
480 if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) {
481 unsigned int offset = 0;
482 int flags = OVS_IP6T_FH_F_SKIP_RH;
483 bool recalc_csum = true;
484
485 if (ipv6_ext_hdr(nh->nexthdr))
486 recalc_csum = ipv6_find_hdr(skb, &offset,
487 NEXTHDR_ROUTING, NULL,
488 &flags) != NEXTHDR_ROUTING;
489
490 set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr,
491 ipv6_key->ipv6_dst, recalc_csum);
492 flow_key_set_ipv6_dst(skb, ipv6_key->ipv6_dst);
493 }
494
495 set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
496 flow_key_set_ip_tos(skb, ipv6_get_dsfield(nh));
497
498 set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
499 flow_key_set_ipv6_fl(skb, nh);
500
501 nh->hop_limit = ipv6_key->ipv6_hlimit;
502 flow_key_set_ip_ttl(skb, ipv6_key->ipv6_hlimit);
503 return 0;
504 }
505
506 /* Must follow make_writable() since that can move the skb data. */
507 static void set_tp_port(struct sk_buff *skb, __be16 *port,
508 __be16 new_port, __sum16 *check)
509 {
510 inet_proto_csum_replace2(check, skb, *port, new_port, 0);
511 *port = new_port;
512 skb_clear_hash(skb);
513 }
514
515 static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port)
516 {
517 struct udphdr *uh = udp_hdr(skb);
518
519 if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
520 set_tp_port(skb, port, new_port, &uh->check);
521
522 if (!uh->check)
523 uh->check = CSUM_MANGLED_0;
524 } else {
525 *port = new_port;
526 skb_clear_hash(skb);
527 }
528 }
529
530 static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *udp_port_key)
531 {
532 struct udphdr *uh;
533 int err;
534
535 err = make_writable(skb, skb_transport_offset(skb) +
536 sizeof(struct udphdr));
537 if (unlikely(err))
538 return err;
539
540 uh = udp_hdr(skb);
541 if (udp_port_key->udp_src != uh->source) {
542 set_udp_port(skb, &uh->source, udp_port_key->udp_src);
543 flow_key_set_tp_src(skb, udp_port_key->udp_src);
544 }
545
546 if (udp_port_key->udp_dst != uh->dest) {
547 set_udp_port(skb, &uh->dest, udp_port_key->udp_dst);
548 flow_key_set_tp_dst(skb, udp_port_key->udp_dst);
549 }
550
551 return 0;
552 }
553
554 static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key)
555 {
556 struct tcphdr *th;
557 int err;
558
559 err = make_writable(skb, skb_transport_offset(skb) +
560 sizeof(struct tcphdr));
561 if (unlikely(err))
562 return err;
563
564 th = tcp_hdr(skb);
565 if (tcp_port_key->tcp_src != th->source) {
566 set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check);
567 flow_key_set_tp_src(skb, tcp_port_key->tcp_src);
568 }
569
570 if (tcp_port_key->tcp_dst != th->dest) {
571 set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check);
572 flow_key_set_tp_dst(skb, tcp_port_key->tcp_dst);
573 }
574
575 return 0;
576 }
577
578 static int set_sctp(struct sk_buff *skb,
579 const struct ovs_key_sctp *sctp_port_key)
580 {
581 struct sctphdr *sh;
582 int err;
583 unsigned int sctphoff = skb_transport_offset(skb);
584
585 err = make_writable(skb, sctphoff + sizeof(struct sctphdr));
586 if (unlikely(err))
587 return err;
588
589 sh = sctp_hdr(skb);
590 if (sctp_port_key->sctp_src != sh->source ||
591 sctp_port_key->sctp_dst != sh->dest) {
592 __le32 old_correct_csum, new_csum, old_csum;
593
594 old_csum = sh->checksum;
595 old_correct_csum = sctp_compute_cksum(skb, sctphoff);
596
597 sh->source = sctp_port_key->sctp_src;
598 sh->dest = sctp_port_key->sctp_dst;
599
600 new_csum = sctp_compute_cksum(skb, sctphoff);
601
602 /* Carry any checksum errors through. */
603 sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
604
605 skb_clear_hash(skb);
606 flow_key_set_tp_src(skb, sctp_port_key->sctp_src);
607 flow_key_set_tp_dst(skb, sctp_port_key->sctp_dst);
608 }
609
610 return 0;
611 }
612
613 static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
614 {
615 struct vport *vport = ovs_vport_rcu(dp, out_port);
616
617 if (likely(vport))
618 ovs_vport_send(vport, skb);
619 else
620 kfree_skb(skb);
621 }
622
623 static int output_userspace(struct datapath *dp, struct sk_buff *skb,
624 const struct nlattr *attr)
625 {
626 struct dp_upcall_info upcall;
627 const struct nlattr *a;
628 int rem;
629
630 upcall.cmd = OVS_PACKET_CMD_ACTION;
631 upcall.userdata = NULL;
632 upcall.portid = 0;
633
634 for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
635 a = nla_next(a, &rem)) {
636 switch (nla_type(a)) {
637 case OVS_USERSPACE_ATTR_USERDATA:
638 upcall.userdata = a;
639 break;
640
641 case OVS_USERSPACE_ATTR_PID:
642 upcall.portid = nla_get_u32(a);
643 break;
644 }
645 }
646
647 return ovs_dp_upcall(dp, skb, &upcall);
648 }
649
650 static bool last_action(const struct nlattr *a, int rem)
651 {
652 return a->nla_len == rem;
653 }
654
655 static int sample(struct datapath *dp, struct sk_buff *skb,
656 const struct nlattr *attr)
657 {
658 struct sw_flow_key sample_key;
659 const struct nlattr *acts_list = NULL;
660 const struct nlattr *a;
661 struct sk_buff *sample_skb;
662 int rem;
663
664 for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
665 a = nla_next(a, &rem)) {
666 switch (nla_type(a)) {
667 case OVS_SAMPLE_ATTR_PROBABILITY:
668 if (prandom_u32() >= nla_get_u32(a))
669 return 0;
670 break;
671
672 case OVS_SAMPLE_ATTR_ACTIONS:
673 acts_list = a;
674 break;
675 }
676 }
677
678 rem = nla_len(acts_list);
679 a = nla_data(acts_list);
680
681 /* Actions list is either empty or only contains a single user-space
682 * action, the latter being a special case as it is the only known
683 * usage of the sample action.
684 * In these special cases don't clone the skb as there are no
685 * side-effects in the nested actions.
686 * Otherwise, clone in case the nested actions have side effects. */
687 if (likely(rem == 0 ||
688 (nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
689 last_action(a, rem)))) {
690 sample_skb = skb;
691 skb_get(skb);
692 } else {
693 sample_skb = skb_clone(skb, GFP_ATOMIC);
694 if (!sample_skb)
695 /* Skip the sample action when out of memory. */
696 return 0;
697
698 sample_key = *OVS_CB(skb)->pkt_key;
699 OVS_CB(sample_skb)->pkt_key = &sample_key;
700 }
701
702 /* Note that do_execute_actions() never consumes skb.
703 * In the case where skb has been cloned above it is the clone that
704 * is consumed. Otherwise the skb_get(skb) call prevents
705 * consumption by do_execute_actions(). Thus, it is safe to simply
706 * return the error code and let the caller (also
707 * do_execute_actions()) free skb on error. */
708 return do_execute_actions(dp, sample_skb, a, rem);
709 }
710
711 static void execute_hash(struct sk_buff *skb, const struct nlattr *attr)
712 {
713 struct sw_flow_key *key = OVS_CB(skb)->pkt_key;
714 struct ovs_action_hash *hash_act = nla_data(attr);
715 u32 hash = 0;
716
717 /* OVS_HASH_ALG_L4 is the only possible hash algorithm. */
718 hash = skb_get_hash(skb);
719 hash = jhash_1word(hash, hash_act->hash_basis);
720 if (!hash)
721 hash = 0x1;
722
723 key->ovs_flow_hash = hash;
724 }
725
726 static int execute_set_action(struct sk_buff *skb,
727 const struct nlattr *nested_attr)
728 {
729 int err = 0;
730
731 switch (nla_type(nested_attr)) {
732 case OVS_KEY_ATTR_PRIORITY:
733 skb->priority = nla_get_u32(nested_attr);
734 flow_key_set_priority(skb, skb->priority);
735 break;
736
737 case OVS_KEY_ATTR_SKB_MARK:
738 skb->mark = nla_get_u32(nested_attr);
739 flow_key_set_skb_mark(skb, skb->mark);
740 break;
741
742 case OVS_KEY_ATTR_TUNNEL_INFO:
743 OVS_CB(skb)->egress_tun_info = nla_data(nested_attr);
744 break;
745
746 case OVS_KEY_ATTR_ETHERNET:
747 err = set_eth_addr(skb, nla_data(nested_attr));
748 break;
749
750 case OVS_KEY_ATTR_IPV4:
751 err = set_ipv4(skb, nla_data(nested_attr));
752 break;
753
754 case OVS_KEY_ATTR_IPV6:
755 err = set_ipv6(skb, nla_data(nested_attr));
756 break;
757
758 case OVS_KEY_ATTR_TCP:
759 err = set_tcp(skb, nla_data(nested_attr));
760 break;
761
762 case OVS_KEY_ATTR_UDP:
763 err = set_udp(skb, nla_data(nested_attr));
764 break;
765
766 case OVS_KEY_ATTR_SCTP:
767 err = set_sctp(skb, nla_data(nested_attr));
768 break;
769
770 case OVS_KEY_ATTR_MPLS:
771 err = set_mpls(skb, nla_data(nested_attr));
772 break;
773 }
774
775 return err;
776 }
777
778 static void flow_key_clone_recirc(struct sk_buff *skb, u32 recirc_id,
779 struct sw_flow_key *recirc_key)
780 {
781 *recirc_key = *OVS_CB(skb)->pkt_key;
782 recirc_key->recirc_id = recirc_id;
783 OVS_CB(skb)->pkt_key = recirc_key;
784 }
785
786 static void flow_key_set_recirc_id(struct sk_buff *skb, u32 recirc_id)
787 {
788 OVS_CB(skb)->pkt_key->recirc_id = recirc_id;
789 }
790
791 static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
792 const struct nlattr *a, int rem)
793 {
794 struct sw_flow_key recirc_key;
795 int err;
796
797 if (!last_action(a, rem)) {
798 /* Recirc action is the not the last action
799 * of the action list. */
800 skb = skb_clone(skb, GFP_ATOMIC);
801
802 /* Skip the recirc action when out of memory, but
803 * continue on with the rest of the action list. */
804 if (!skb)
805 return 0;
806 }
807
808 if (is_skb_flow_key_valid(skb)) {
809 if (!last_action(a, rem))
810 flow_key_clone_recirc(skb, nla_get_u32(a), &recirc_key);
811 else
812 flow_key_set_recirc_id(skb, nla_get_u32(a));
813 } else {
814 struct sw_flow_key *pkt_key = OVS_CB(skb)->pkt_key;
815
816 err = ovs_flow_key_extract_recirc(nla_get_u32(a), pkt_key,
817 skb, &recirc_key);
818 if (err) {
819 kfree_skb(skb);
820 return err;
821 }
822 }
823
824 ovs_dp_process_packet(skb, true);
825 return 0;
826 }
827
828 /* Execute a list of actions against 'skb'. */
829 static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
830 const struct nlattr *attr, int len)
831 {
832 /* Every output action needs a separate clone of 'skb', but the common
833 * case is just a single output action, so that doing a clone and
834 * then freeing the original skbuff is wasteful. So the following code
835 * is slightly obscure just to avoid that. */
836 int prev_port = -1;
837 const struct nlattr *a;
838 int rem;
839
840 for (a = attr, rem = len; rem > 0;
841 a = nla_next(a, &rem)) {
842 int err = 0;
843
844 if (unlikely(prev_port != -1)) {
845 struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC);
846
847 if (out_skb)
848 do_output(dp, out_skb, prev_port);
849
850 prev_port = -1;
851 }
852
853 switch (nla_type(a)) {
854 case OVS_ACTION_ATTR_OUTPUT:
855 prev_port = nla_get_u32(a);
856 break;
857
858 case OVS_ACTION_ATTR_USERSPACE:
859 output_userspace(dp, skb, a);
860 break;
861
862 case OVS_ACTION_ATTR_HASH:
863 execute_hash(skb, a);
864 break;
865
866 case OVS_ACTION_ATTR_PUSH_MPLS:
867 err = push_mpls(skb, nla_data(a));
868 break;
869
870 case OVS_ACTION_ATTR_POP_MPLS:
871 err = pop_mpls(skb, nla_get_be16(a));
872 break;
873
874 case OVS_ACTION_ATTR_PUSH_VLAN:
875 err = push_vlan(skb, nla_data(a));
876 if (unlikely(err)) /* skb already freed. */
877 return err;
878 break;
879
880 case OVS_ACTION_ATTR_POP_VLAN:
881 err = pop_vlan(skb);
882 break;
883
884 case OVS_ACTION_ATTR_RECIRC:
885 err = execute_recirc(dp, skb, a, rem);
886 break;
887
888 case OVS_ACTION_ATTR_SET:
889 err = execute_set_action(skb, nla_data(a));
890 break;
891
892 case OVS_ACTION_ATTR_SAMPLE:
893 err = sample(dp, skb, a);
894 break;
895 }
896
897 if (unlikely(err)) {
898 kfree_skb(skb);
899 return err;
900 }
901 }
902
903 if (prev_port != -1)
904 do_output(dp, skb, prev_port);
905 else
906 consume_skb(skb);
907
908 return 0;
909 }
910
911 /* We limit the number of times that we pass into execute_actions()
912 * to avoid blowing out the stack in the event that we have a loop.
913 *
914 * Each loop adds some (estimated) cost to the kernel stack.
915 * The loop terminates when the max cost is exceeded.
916 * */
917 #define RECIRC_STACK_COST 1
918 #define DEFAULT_STACK_COST 4
919 /* Allow up to 4 regular services, and up to 3 recirculations */
920 #define MAX_STACK_COST (DEFAULT_STACK_COST * 4 + RECIRC_STACK_COST * 3)
921
922 struct loop_counter {
923 u8 stack_cost; /* loop stack cost. */
924 bool looping; /* Loop detected? */
925 };
926
927 static DEFINE_PER_CPU(struct loop_counter, loop_counters);
928
929 static int loop_suppress(struct datapath *dp, struct sw_flow_actions *actions)
930 {
931 if (net_ratelimit())
932 pr_warn("%s: flow loop detected, dropping\n",
933 ovs_dp_name(dp));
934 actions->actions_len = 0;
935 return -ELOOP;
936 }
937
938 /* Execute a list of actions against 'skb'. */
939 int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, bool recirc)
940 {
941 struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
942 const u8 stack_cost = recirc ? RECIRC_STACK_COST : DEFAULT_STACK_COST;
943 struct loop_counter *loop;
944 int error;
945
946 /* Check whether we've looped too much. */
947 loop = &__get_cpu_var(loop_counters);
948 loop->stack_cost += stack_cost;
949 if (unlikely(loop->stack_cost > MAX_STACK_COST))
950 loop->looping = true;
951 if (unlikely(loop->looping)) {
952 error = loop_suppress(dp, acts);
953 kfree_skb(skb);
954 goto out_loop;
955 }
956
957 error = do_execute_actions(dp, skb, acts->actions, acts->actions_len);
958
959 /* Check whether sub-actions looped too much. */
960 if (unlikely(loop->looping))
961 error = loop_suppress(dp, acts);
962
963 out_loop:
964 /* Decrement loop stack cost. */
965 loop->stack_cost -= stack_cost;
966 if (!loop->stack_cost)
967 loop->looping = false;
968
969 return error;
970 }