2 * Copyright (c) 2007-2014 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 #include <linux/skbuff.h>
24 #include <linux/openvswitch.h>
25 #include <linux/sctp.h>
26 #include <linux/tcp.h>
27 #include <linux/udp.h>
28 #include <linux/in6.h>
29 #include <linux/if_arp.h>
30 #include <linux/if_vlan.h>
33 #include <net/checksum.h>
34 #include <net/dsfield.h>
35 #include <net/sctp/checksum.h>
43 static void flow_key_clone(struct sk_buff
*skb
, struct sw_flow_key
*new_key
)
45 *new_key
= *OVS_CB(skb
)->pkt_key
;
46 OVS_CB(skb
)->pkt_key
= new_key
;
49 static void flow_key_set_recirc_id(struct sk_buff
*skb
, u32 recirc_id
)
51 OVS_CB(skb
)->pkt_key
->recirc_id
= recirc_id
;
54 static void flow_key_set_priority(struct sk_buff
*skb
, u32 priority
)
56 OVS_CB(skb
)->pkt_key
->phy
.priority
= priority
;
59 static void flow_key_set_skb_mark(struct sk_buff
*skb
, u32 skb_mark
)
61 OVS_CB(skb
)->pkt_key
->phy
.skb_mark
= skb_mark
;
64 static void flow_key_set_eth_src(struct sk_buff
*skb
, const u8 addr
[])
66 ether_addr_copy(OVS_CB(skb
)->pkt_key
->eth
.src
, addr
);
69 static void flow_key_set_eth_dst(struct sk_buff
*skb
, const u8 addr
[])
71 ether_addr_copy(OVS_CB(skb
)->pkt_key
->eth
.dst
, addr
);
74 static void flow_key_set_vlan_tci(struct sk_buff
*skb
, __be16 tci
)
76 OVS_CB(skb
)->pkt_key
->eth
.tci
= tci
;
79 static void flow_key_set_mpls_top_lse(struct sk_buff
*skb
, __be32 top_lse
)
81 OVS_CB(skb
)->pkt_key
->mpls
.top_lse
= top_lse
;
84 static void flow_key_set_ipv4_src(struct sk_buff
*skb
, __be32 addr
)
86 OVS_CB(skb
)->pkt_key
->ipv4
.addr
.src
= addr
;
89 static void flow_key_set_ipv4_dst(struct sk_buff
*skb
, __be32 addr
)
91 OVS_CB(skb
)->pkt_key
->ipv4
.addr
.src
= addr
;
94 static void flow_key_set_ip_tos(struct sk_buff
*skb
, u8 tos
)
96 OVS_CB(skb
)->pkt_key
->ip
.tos
= tos
;
99 static void flow_key_set_ip_ttl(struct sk_buff
*skb
, u8 ttl
)
101 OVS_CB(skb
)->pkt_key
->ip
.ttl
= ttl
;
104 static void flow_key_set_ipv6_src(struct sk_buff
*skb
,
105 const __be32 addr
[4])
107 memcpy(&OVS_CB(skb
)->pkt_key
->ipv6
.addr
.src
, addr
, sizeof(__be32
[4]));
110 static void flow_key_set_ipv6_dst(struct sk_buff
*skb
,
111 const __be32 addr
[4])
113 memcpy(&OVS_CB(skb
)->pkt_key
->ipv6
.addr
.dst
, addr
, sizeof(__be32
[4]));
116 static void flow_key_set_ipv6_fl(struct sk_buff
*skb
,
117 const struct ipv6hdr
*nh
)
119 OVS_CB(skb
)->pkt_key
->ipv6
.label
= *(__be32
*)nh
&
120 htonl(IPV6_FLOWINFO_FLOWLABEL
);
123 static void flow_key_set_tp_src(struct sk_buff
*skb
, __be16 port
)
125 OVS_CB(skb
)->pkt_key
->tp
.src
= port
;
128 static void flow_key_set_tp_dst(struct sk_buff
*skb
, __be16 port
)
130 OVS_CB(skb
)->pkt_key
->tp
.dst
= port
;
133 static void invalidate_skb_flow_key(struct sk_buff
*skb
)
135 OVS_CB(skb
)->pkt_key
->eth
.type
= htons(0);
138 static bool is_skb_flow_key_valid(struct sk_buff
*skb
)
140 return !!OVS_CB(skb
)->pkt_key
->eth
.type
;
143 static int do_execute_actions(struct datapath
*dp
, struct sk_buff
*skb
,
144 const struct nlattr
*attr
, int len
);
146 static int make_writable(struct sk_buff
*skb
, int write_len
)
148 if (!skb_cloned(skb
) || skb_clone_writable(skb
, write_len
))
151 return pskb_expand_head(skb
, 0, 0, GFP_ATOMIC
);
154 /* The end of the mac header.
156 * For non-MPLS skbs this will correspond to the network header.
157 * For MPLS skbs it will be before the network_header as the MPLS
158 * label stack lies between the end of the mac header and the network
159 * header. That is, for MPLS skbs the end of the mac header
160 * is the top of the MPLS label stack.
162 static unsigned char *mac_header_end(const struct sk_buff
*skb
)
164 return skb_mac_header(skb
) + skb
->mac_len
;
167 static int push_mpls(struct sk_buff
*skb
,
168 const struct ovs_action_push_mpls
*mpls
)
170 __be32
*new_mpls_lse
;
173 if (skb_cow_head(skb
, MPLS_HLEN
) < 0)
176 skb_push(skb
, MPLS_HLEN
);
177 memmove(skb_mac_header(skb
) - MPLS_HLEN
, skb_mac_header(skb
),
179 skb_reset_mac_header(skb
);
181 new_mpls_lse
= (__be32
*)mac_header_end(skb
);
182 *new_mpls_lse
= mpls
->mpls_lse
;
184 if (skb
->ip_summed
== CHECKSUM_COMPLETE
)
185 skb
->csum
= csum_add(skb
->csum
, csum_partial(new_mpls_lse
,
189 hdr
->h_proto
= mpls
->mpls_ethertype
;
190 if (!ovs_skb_get_inner_protocol(skb
))
191 ovs_skb_set_inner_protocol(skb
, skb
->protocol
);
192 skb
->protocol
= mpls
->mpls_ethertype
;
193 invalidate_skb_flow_key(skb
);
197 static int pop_mpls(struct sk_buff
*skb
, const __be16 ethertype
)
202 err
= make_writable(skb
, skb
->mac_len
+ MPLS_HLEN
);
206 if (skb
->ip_summed
== CHECKSUM_COMPLETE
)
207 skb
->csum
= csum_sub(skb
->csum
,
208 csum_partial(mac_header_end(skb
),
211 memmove(skb_mac_header(skb
) + MPLS_HLEN
, skb_mac_header(skb
),
214 __skb_pull(skb
, MPLS_HLEN
);
215 skb_reset_mac_header(skb
);
217 /* mac_header_end() is used to locate the ethertype
218 * field correctly in the presence of VLAN tags.
220 hdr
= (struct ethhdr
*)(mac_header_end(skb
) - ETH_HLEN
);
221 hdr
->h_proto
= ethertype
;
222 if (eth_p_mpls(skb
->protocol
))
223 skb
->protocol
= ethertype
;
224 invalidate_skb_flow_key(skb
);
228 static int set_mpls(struct sk_buff
*skb
, const __be32
*mpls_lse
)
230 __be32
*stack
= (__be32
*)mac_header_end(skb
);
233 err
= make_writable(skb
, skb
->mac_len
+ MPLS_HLEN
);
237 if (skb
->ip_summed
== CHECKSUM_COMPLETE
) {
238 __be32 diff
[] = { ~(*stack
), *mpls_lse
};
239 skb
->csum
= ~csum_partial((char *)diff
, sizeof(diff
),
244 flow_key_set_mpls_top_lse(skb
, *stack
);
248 /* remove VLAN header from packet and update csum accordingly. */
249 static int __pop_vlan_tci(struct sk_buff
*skb
, __be16
*current_tci
)
251 struct vlan_hdr
*vhdr
;
254 err
= make_writable(skb
, VLAN_ETH_HLEN
);
258 if (skb
->ip_summed
== CHECKSUM_COMPLETE
)
259 skb
->csum
= csum_sub(skb
->csum
, csum_partial(skb
->data
260 + (2 * ETH_ALEN
), VLAN_HLEN
, 0));
262 vhdr
= (struct vlan_hdr
*)(skb
->data
+ ETH_HLEN
);
263 *current_tci
= vhdr
->h_vlan_TCI
;
265 memmove(skb
->data
+ VLAN_HLEN
, skb
->data
, 2 * ETH_ALEN
);
266 __skb_pull(skb
, VLAN_HLEN
);
268 vlan_set_encap_proto(skb
, vhdr
);
269 skb
->mac_header
+= VLAN_HLEN
;
270 /* Update mac_len for subsequent MPLS actions */
271 skb
->mac_len
-= VLAN_HLEN
;
276 static int pop_vlan(struct sk_buff
*skb
)
281 if (likely(vlan_tx_tag_present(skb
))) {
282 vlan_set_tci(skb
, 0);
284 if (unlikely(skb
->protocol
!= htons(ETH_P_8021Q
) ||
285 skb
->len
< VLAN_ETH_HLEN
))
288 err
= __pop_vlan_tci(skb
, &tci
);
292 /* move next vlan tag to hw accel tag */
293 if (likely(skb
->protocol
!= htons(ETH_P_8021Q
) ||
294 skb
->len
< VLAN_ETH_HLEN
)) {
295 flow_key_set_vlan_tci(skb
, 0);
299 invalidate_skb_flow_key(skb
);
300 err
= __pop_vlan_tci(skb
, &tci
);
304 __vlan_hwaccel_put_tag(skb
, htons(ETH_P_8021Q
), ntohs(tci
));
308 static int push_vlan(struct sk_buff
*skb
, const struct ovs_action_push_vlan
*vlan
)
310 if (unlikely(vlan_tx_tag_present(skb
))) {
313 /* push down current VLAN tag */
314 current_tag
= vlan_tx_tag_get(skb
);
316 if (!__vlan_put_tag(skb
, skb
->vlan_proto
, current_tag
))
319 /* Update mac_len for subsequent MPLS actions */
320 skb
->mac_len
+= VLAN_HLEN
;
322 if (skb
->ip_summed
== CHECKSUM_COMPLETE
)
323 skb
->csum
= csum_add(skb
->csum
, csum_partial(skb
->data
324 + (2 * ETH_ALEN
), VLAN_HLEN
, 0));
326 invalidate_skb_flow_key(skb
);
328 flow_key_set_vlan_tci(skb
, vlan
->vlan_tci
);
330 __vlan_hwaccel_put_tag(skb
, vlan
->vlan_tpid
, ntohs(vlan
->vlan_tci
) & ~VLAN_TAG_PRESENT
);
334 static int set_eth_addr(struct sk_buff
*skb
,
335 const struct ovs_key_ethernet
*eth_key
)
338 err
= make_writable(skb
, ETH_HLEN
);
342 skb_postpull_rcsum(skb
, eth_hdr(skb
), ETH_ALEN
* 2);
344 ether_addr_copy(eth_hdr(skb
)->h_source
, eth_key
->eth_src
);
345 ether_addr_copy(eth_hdr(skb
)->h_dest
, eth_key
->eth_dst
);
347 ovs_skb_postpush_rcsum(skb
, eth_hdr(skb
), ETH_ALEN
* 2);
349 flow_key_set_eth_src(skb
, eth_key
->eth_src
);
350 flow_key_set_eth_dst(skb
, eth_key
->eth_dst
);
354 static void set_ip_addr(struct sk_buff
*skb
, struct iphdr
*nh
,
355 __be32
*addr
, __be32 new_addr
)
357 int transport_len
= skb
->len
- skb_transport_offset(skb
);
359 if (nh
->protocol
== IPPROTO_TCP
) {
360 if (likely(transport_len
>= sizeof(struct tcphdr
)))
361 inet_proto_csum_replace4(&tcp_hdr(skb
)->check
, skb
,
363 } else if (nh
->protocol
== IPPROTO_UDP
) {
364 if (likely(transport_len
>= sizeof(struct udphdr
))) {
365 struct udphdr
*uh
= udp_hdr(skb
);
367 if (uh
->check
|| skb
->ip_summed
== CHECKSUM_PARTIAL
) {
368 inet_proto_csum_replace4(&uh
->check
, skb
,
371 uh
->check
= CSUM_MANGLED_0
;
376 csum_replace4(&nh
->check
, *addr
, new_addr
);
381 static void update_ipv6_checksum(struct sk_buff
*skb
, u8 l4_proto
,
382 __be32 addr
[4], const __be32 new_addr
[4])
384 int transport_len
= skb
->len
- skb_transport_offset(skb
);
386 if (l4_proto
== NEXTHDR_TCP
) {
387 if (likely(transport_len
>= sizeof(struct tcphdr
)))
388 inet_proto_csum_replace16(&tcp_hdr(skb
)->check
, skb
,
390 } else if (l4_proto
== NEXTHDR_UDP
) {
391 if (likely(transport_len
>= sizeof(struct udphdr
))) {
392 struct udphdr
*uh
= udp_hdr(skb
);
394 if (uh
->check
|| skb
->ip_summed
== CHECKSUM_PARTIAL
) {
395 inet_proto_csum_replace16(&uh
->check
, skb
,
398 uh
->check
= CSUM_MANGLED_0
;
401 } else if (l4_proto
== NEXTHDR_ICMP
) {
402 if (likely(transport_len
>= sizeof(struct icmp6hdr
)))
403 inet_proto_csum_replace16(&icmp6_hdr(skb
)->icmp6_cksum
,
404 skb
, addr
, new_addr
, 1);
408 static void set_ipv6_addr(struct sk_buff
*skb
, u8 l4_proto
,
409 __be32 addr
[4], const __be32 new_addr
[4],
410 bool recalculate_csum
)
412 if (likely(recalculate_csum
))
413 update_ipv6_checksum(skb
, l4_proto
, addr
, new_addr
);
416 memcpy(addr
, new_addr
, sizeof(__be32
[4]));
419 static void set_ipv6_tc(struct ipv6hdr
*nh
, u8 tc
)
421 nh
->priority
= tc
>> 4;
422 nh
->flow_lbl
[0] = (nh
->flow_lbl
[0] & 0x0F) | ((tc
& 0x0F) << 4);
425 static void set_ipv6_fl(struct ipv6hdr
*nh
, u32 fl
)
427 nh
->flow_lbl
[0] = (nh
->flow_lbl
[0] & 0xF0) | (fl
& 0x000F0000) >> 16;
428 nh
->flow_lbl
[1] = (fl
& 0x0000FF00) >> 8;
429 nh
->flow_lbl
[2] = fl
& 0x000000FF;
432 static void set_ip_ttl(struct sk_buff
*skb
, struct iphdr
*nh
, u8 new_ttl
)
434 csum_replace2(&nh
->check
, htons(nh
->ttl
<< 8), htons(new_ttl
<< 8));
438 static int set_ipv4(struct sk_buff
*skb
, const struct ovs_key_ipv4
*ipv4_key
)
443 err
= make_writable(skb
, skb_network_offset(skb
) +
444 sizeof(struct iphdr
));
450 if (ipv4_key
->ipv4_src
!= nh
->saddr
) {
451 set_ip_addr(skb
, nh
, &nh
->saddr
, ipv4_key
->ipv4_src
);
452 flow_key_set_ipv4_src(skb
, ipv4_key
->ipv4_src
);
455 if (ipv4_key
->ipv4_dst
!= nh
->daddr
) {
456 set_ip_addr(skb
, nh
, &nh
->daddr
, ipv4_key
->ipv4_dst
);
457 flow_key_set_ipv4_dst(skb
, ipv4_key
->ipv4_dst
);
460 if (ipv4_key
->ipv4_tos
!= nh
->tos
) {
461 ipv4_change_dsfield(nh
, 0, ipv4_key
->ipv4_tos
);
462 flow_key_set_ip_tos(skb
, nh
->tos
);
465 if (ipv4_key
->ipv4_ttl
!= nh
->ttl
) {
466 set_ip_ttl(skb
, nh
, ipv4_key
->ipv4_ttl
);
467 flow_key_set_ip_ttl(skb
, ipv4_key
->ipv4_ttl
);
473 static int set_ipv6(struct sk_buff
*skb
, const struct ovs_key_ipv6
*ipv6_key
)
480 err
= make_writable(skb
, skb_network_offset(skb
) +
481 sizeof(struct ipv6hdr
));
486 saddr
= (__be32
*)&nh
->saddr
;
487 daddr
= (__be32
*)&nh
->daddr
;
489 if (memcmp(ipv6_key
->ipv6_src
, saddr
, sizeof(ipv6_key
->ipv6_src
))) {
490 set_ipv6_addr(skb
, ipv6_key
->ipv6_proto
, saddr
,
491 ipv6_key
->ipv6_src
, true);
492 flow_key_set_ipv6_src(skb
, ipv6_key
->ipv6_src
);
495 if (memcmp(ipv6_key
->ipv6_dst
, daddr
, sizeof(ipv6_key
->ipv6_dst
))) {
496 unsigned int offset
= 0;
497 int flags
= OVS_IP6T_FH_F_SKIP_RH
;
498 bool recalc_csum
= true;
500 if (ipv6_ext_hdr(nh
->nexthdr
))
501 recalc_csum
= ipv6_find_hdr(skb
, &offset
,
502 NEXTHDR_ROUTING
, NULL
,
503 &flags
) != NEXTHDR_ROUTING
;
505 set_ipv6_addr(skb
, ipv6_key
->ipv6_proto
, daddr
,
506 ipv6_key
->ipv6_dst
, recalc_csum
);
507 flow_key_set_ipv6_dst(skb
, ipv6_key
->ipv6_dst
);
510 set_ipv6_tc(nh
, ipv6_key
->ipv6_tclass
);
511 flow_key_set_ip_tos(skb
, ipv6_get_dsfield(nh
));
513 set_ipv6_fl(nh
, ntohl(ipv6_key
->ipv6_label
));
514 flow_key_set_ipv6_fl(skb
, nh
);
516 nh
->hop_limit
= ipv6_key
->ipv6_hlimit
;
517 flow_key_set_ip_ttl(skb
, ipv6_key
->ipv6_hlimit
);
521 /* Must follow make_writable() since that can move the skb data. */
522 static void set_tp_port(struct sk_buff
*skb
, __be16
*port
,
523 __be16 new_port
, __sum16
*check
)
525 inet_proto_csum_replace2(check
, skb
, *port
, new_port
, 0);
530 static void set_udp_port(struct sk_buff
*skb
, __be16
*port
, __be16 new_port
)
532 struct udphdr
*uh
= udp_hdr(skb
);
534 if (uh
->check
&& skb
->ip_summed
!= CHECKSUM_PARTIAL
) {
535 set_tp_port(skb
, port
, new_port
, &uh
->check
);
538 uh
->check
= CSUM_MANGLED_0
;
545 static int set_udp(struct sk_buff
*skb
, const struct ovs_key_udp
*udp_port_key
)
550 err
= make_writable(skb
, skb_transport_offset(skb
) +
551 sizeof(struct udphdr
));
556 if (udp_port_key
->udp_src
!= uh
->source
) {
557 set_udp_port(skb
, &uh
->source
, udp_port_key
->udp_src
);
558 flow_key_set_tp_src(skb
, udp_port_key
->udp_src
);
561 if (udp_port_key
->udp_dst
!= uh
->dest
) {
562 set_udp_port(skb
, &uh
->dest
, udp_port_key
->udp_dst
);
563 flow_key_set_tp_dst(skb
, udp_port_key
->udp_dst
);
569 static int set_tcp(struct sk_buff
*skb
, const struct ovs_key_tcp
*tcp_port_key
)
574 err
= make_writable(skb
, skb_transport_offset(skb
) +
575 sizeof(struct tcphdr
));
580 if (tcp_port_key
->tcp_src
!= th
->source
) {
581 set_tp_port(skb
, &th
->source
, tcp_port_key
->tcp_src
, &th
->check
);
582 flow_key_set_tp_src(skb
, tcp_port_key
->tcp_src
);
585 if (tcp_port_key
->tcp_dst
!= th
->dest
) {
586 set_tp_port(skb
, &th
->dest
, tcp_port_key
->tcp_dst
, &th
->check
);
587 flow_key_set_tp_dst(skb
, tcp_port_key
->tcp_dst
);
593 static int set_sctp(struct sk_buff
*skb
,
594 const struct ovs_key_sctp
*sctp_port_key
)
598 unsigned int sctphoff
= skb_transport_offset(skb
);
600 err
= make_writable(skb
, sctphoff
+ sizeof(struct sctphdr
));
605 if (sctp_port_key
->sctp_src
!= sh
->source
||
606 sctp_port_key
->sctp_dst
!= sh
->dest
) {
607 __le32 old_correct_csum
, new_csum
, old_csum
;
609 old_csum
= sh
->checksum
;
610 old_correct_csum
= sctp_compute_cksum(skb
, sctphoff
);
612 sh
->source
= sctp_port_key
->sctp_src
;
613 sh
->dest
= sctp_port_key
->sctp_dst
;
615 new_csum
= sctp_compute_cksum(skb
, sctphoff
);
617 /* Carry any checksum errors through. */
618 sh
->checksum
= old_csum
^ old_correct_csum
^ new_csum
;
621 flow_key_set_tp_src(skb
, sctp_port_key
->sctp_src
);
622 flow_key_set_tp_dst(skb
, sctp_port_key
->sctp_dst
);
628 static void do_output(struct datapath
*dp
, struct sk_buff
*skb
, int out_port
)
630 struct vport
*vport
= ovs_vport_rcu(dp
, out_port
);
633 ovs_vport_send(vport
, skb
);
638 static int output_userspace(struct datapath
*dp
, struct sk_buff
*skb
,
639 const struct nlattr
*attr
)
641 struct dp_upcall_info upcall
;
642 const struct nlattr
*a
;
644 struct ovs_tunnel_info info
;
646 upcall
.cmd
= OVS_PACKET_CMD_ACTION
;
647 upcall
.userdata
= NULL
;
649 upcall
.egress_tun_info
= NULL
;
651 for (a
= nla_data(attr
), rem
= nla_len(attr
); rem
> 0;
652 a
= nla_next(a
, &rem
)) {
653 switch (nla_type(a
)) {
654 case OVS_USERSPACE_ATTR_USERDATA
:
658 case OVS_USERSPACE_ATTR_PID
:
659 upcall
.portid
= nla_get_u32(a
);
662 case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT
: {
663 /* Get out tunnel info. */
666 vport
= ovs_vport_rcu(dp
, nla_get_u32(a
));
670 err
= ovs_vport_get_egress_tun_info(vport
, skb
,
673 upcall
.egress_tun_info
= &info
;
678 } /* End of switch. */
681 return ovs_dp_upcall(dp
, skb
, &upcall
);
684 static bool last_action(const struct nlattr
*a
, int rem
)
686 return a
->nla_len
== rem
;
689 static int sample(struct datapath
*dp
, struct sk_buff
*skb
,
690 const struct nlattr
*attr
)
692 struct sw_flow_key sample_key
;
693 const struct nlattr
*acts_list
= NULL
;
694 const struct nlattr
*a
;
697 for (a
= nla_data(attr
), rem
= nla_len(attr
); rem
> 0;
698 a
= nla_next(a
, &rem
)) {
699 switch (nla_type(a
)) {
700 case OVS_SAMPLE_ATTR_PROBABILITY
:
701 if (prandom_u32() >= nla_get_u32(a
))
705 case OVS_SAMPLE_ATTR_ACTIONS
:
711 rem
= nla_len(acts_list
);
712 a
= nla_data(acts_list
);
714 /* Actions list is empty, do nothing */
718 /* The only known usage of sample action is having a single user-space
719 * action. Treat this usage as a special case.
720 * The output_userspace() should clone the skb to be sent to the
721 * user space. This skb will be consumed by its caller. */
722 if (likely(nla_type(a
) == OVS_ACTION_ATTR_USERSPACE
&&
723 last_action(a
, rem
)))
724 return output_userspace(dp
, skb
, a
);
726 skb
= skb_clone(skb
, GFP_ATOMIC
);
728 /* Skip the sample action when out of memory. */
731 flow_key_clone(skb
, &sample_key
);
733 /* do_execute_actions() will consume the cloned skb. */
734 return do_execute_actions(dp
, skb
, a
, rem
);
737 static void execute_hash(struct sk_buff
*skb
, const struct nlattr
*attr
)
739 struct sw_flow_key
*key
= OVS_CB(skb
)->pkt_key
;
740 struct ovs_action_hash
*hash_act
= nla_data(attr
);
743 /* OVS_HASH_ALG_L4 is the only possible hash algorithm. */
744 hash
= skb_get_hash(skb
);
745 hash
= jhash_1word(hash
, hash_act
->hash_basis
);
749 key
->ovs_flow_hash
= hash
;
752 static int execute_set_action(struct sk_buff
*skb
,
753 const struct nlattr
*nested_attr
)
757 switch (nla_type(nested_attr
)) {
758 case OVS_KEY_ATTR_PRIORITY
:
759 skb
->priority
= nla_get_u32(nested_attr
);
760 flow_key_set_priority(skb
, skb
->priority
);
763 case OVS_KEY_ATTR_SKB_MARK
:
764 skb
->mark
= nla_get_u32(nested_attr
);
765 flow_key_set_skb_mark(skb
, skb
->mark
);
768 case OVS_KEY_ATTR_TUNNEL_INFO
:
769 OVS_CB(skb
)->egress_tun_info
= nla_data(nested_attr
);
772 case OVS_KEY_ATTR_ETHERNET
:
773 err
= set_eth_addr(skb
, nla_data(nested_attr
));
776 case OVS_KEY_ATTR_IPV4
:
777 err
= set_ipv4(skb
, nla_data(nested_attr
));
780 case OVS_KEY_ATTR_IPV6
:
781 err
= set_ipv6(skb
, nla_data(nested_attr
));
784 case OVS_KEY_ATTR_TCP
:
785 err
= set_tcp(skb
, nla_data(nested_attr
));
788 case OVS_KEY_ATTR_UDP
:
789 err
= set_udp(skb
, nla_data(nested_attr
));
792 case OVS_KEY_ATTR_SCTP
:
793 err
= set_sctp(skb
, nla_data(nested_attr
));
796 case OVS_KEY_ATTR_MPLS
:
797 err
= set_mpls(skb
, nla_data(nested_attr
));
805 static int execute_recirc(struct datapath
*dp
, struct sk_buff
*skb
,
806 const struct nlattr
*a
, int rem
)
808 struct sw_flow_key recirc_key
;
810 if (!is_skb_flow_key_valid(skb
)) {
813 err
= ovs_flow_key_update(skb
, OVS_CB(skb
)->pkt_key
);
818 BUG_ON(!is_skb_flow_key_valid(skb
));
820 if (!last_action(a
, rem
)) {
821 /* Recirc action is the not the last action
822 * of the action list. */
823 skb
= skb_clone(skb
, GFP_ATOMIC
);
825 /* Skip the recirc action when out of memory, but
826 * continue on with the rest of the action list. */
830 flow_key_clone(skb
, &recirc_key
);
833 flow_key_set_recirc_id(skb
, nla_get_u32(a
));
834 ovs_dp_process_packet(skb
, true);
838 /* Execute a list of actions against 'skb'. */
839 static int do_execute_actions(struct datapath
*dp
, struct sk_buff
*skb
,
840 const struct nlattr
*attr
, int len
)
842 /* Every output action needs a separate clone of 'skb', but the common
843 * case is just a single output action, so that doing a clone and
844 * then freeing the original skbuff is wasteful. So the following code
845 * is slightly obscure just to avoid that. */
847 const struct nlattr
*a
;
850 for (a
= attr
, rem
= len
; rem
> 0;
851 a
= nla_next(a
, &rem
)) {
854 if (unlikely(prev_port
!= -1)) {
855 struct sk_buff
*out_skb
= skb_clone(skb
, GFP_ATOMIC
);
858 do_output(dp
, out_skb
, prev_port
);
863 switch (nla_type(a
)) {
864 case OVS_ACTION_ATTR_OUTPUT
:
865 prev_port
= nla_get_u32(a
);
868 case OVS_ACTION_ATTR_USERSPACE
:
869 output_userspace(dp
, skb
, a
);
872 case OVS_ACTION_ATTR_HASH
:
873 execute_hash(skb
, a
);
876 case OVS_ACTION_ATTR_PUSH_MPLS
:
877 err
= push_mpls(skb
, nla_data(a
));
880 case OVS_ACTION_ATTR_POP_MPLS
:
881 err
= pop_mpls(skb
, nla_get_be16(a
));
884 case OVS_ACTION_ATTR_PUSH_VLAN
:
885 err
= push_vlan(skb
, nla_data(a
));
886 if (unlikely(err
)) /* skb already freed. */
890 case OVS_ACTION_ATTR_POP_VLAN
:
894 case OVS_ACTION_ATTR_RECIRC
:
895 err
= execute_recirc(dp
, skb
, a
, rem
);
896 if (last_action(a
, rem
)) {
897 /* If this is the last action, the skb has
898 * been consumed or freed.
899 * Return immediately. */
904 case OVS_ACTION_ATTR_SET
:
905 err
= execute_set_action(skb
, nla_data(a
));
908 case OVS_ACTION_ATTR_SAMPLE
:
909 err
= sample(dp
, skb
, a
);
920 do_output(dp
, skb
, prev_port
);
927 /* We limit the number of times that we pass into execute_actions()
928 * to avoid blowing out the stack in the event that we have a loop.
930 * Each loop adds some (estimated) cost to the kernel stack.
931 * The loop terminates when the max cost is exceeded.
933 #define RECIRC_STACK_COST 1
934 #define DEFAULT_STACK_COST 4
935 /* Allow up to 4 regular services, and up to 3 recirculations */
936 #define MAX_STACK_COST (DEFAULT_STACK_COST * 4 + RECIRC_STACK_COST * 3)
938 struct loop_counter
{
939 u8 stack_cost
; /* loop stack cost. */
940 bool looping
; /* Loop detected? */
943 static DEFINE_PER_CPU(struct loop_counter
, loop_counters
);
945 static int loop_suppress(struct datapath
*dp
, struct sw_flow_actions
*actions
)
948 pr_warn("%s: flow loop detected, dropping\n",
950 actions
->actions_len
= 0;
954 /* Execute a list of actions against 'skb'. */
955 int ovs_execute_actions(struct datapath
*dp
, struct sk_buff
*skb
,
956 struct sw_flow_actions
*acts
, bool recirc
)
958 const u8 stack_cost
= recirc
? RECIRC_STACK_COST
: DEFAULT_STACK_COST
;
959 struct loop_counter
*loop
;
962 /* Check whether we've looped too much. */
963 loop
= &__get_cpu_var(loop_counters
);
964 loop
->stack_cost
+= stack_cost
;
965 if (unlikely(loop
->stack_cost
> MAX_STACK_COST
))
966 loop
->looping
= true;
967 if (unlikely(loop
->looping
)) {
968 error
= loop_suppress(dp
, acts
);
973 error
= do_execute_actions(dp
, skb
, acts
->actions
, acts
->actions_len
);
975 /* Check whether sub-actions looped too much. */
976 if (unlikely(loop
->looping
))
977 error
= loop_suppress(dp
, acts
);
980 /* Decrement loop stack cost. */
981 loop
->stack_cost
-= stack_cost
;
982 if (!loop
->stack_cost
)
983 loop
->looping
= false;