2 * Distributed under the terms of the GNU GPL version 2.
3 * Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira Networks.
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
9 /* Functions for executing flow actions. */
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 #include <linux/skbuff.h>
16 #include <linux/openvswitch.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <linux/in6.h>
20 #include <linux/if_arp.h>
21 #include <linux/if_vlan.h>
23 #include <net/checksum.h>
24 #include <net/dsfield.h>
31 static int do_execute_actions(struct datapath
*dp
, struct sk_buff
*skb
,
32 const struct nlattr
*attr
, int len
, bool keep_skb
);
34 static int make_writable(struct sk_buff
*skb
, int write_len
)
36 if (!skb_cloned(skb
) || skb_clone_writable(skb
, write_len
))
39 return pskb_expand_head(skb
, 0, 0, GFP_ATOMIC
);
42 /* remove VLAN header from packet and update csum accrodingly. */
43 static int __pop_vlan_tci(struct sk_buff
*skb
, __be16
*current_tci
)
46 struct vlan_ethhdr
*veth
;
49 err
= make_writable(skb
, VLAN_ETH_HLEN
);
53 if (get_ip_summed(skb
) == OVS_CSUM_COMPLETE
)
54 skb
->csum
= csum_sub(skb
->csum
, csum_partial(skb
->data
55 + ETH_HLEN
, VLAN_HLEN
, 0));
57 veth
= (struct vlan_ethhdr
*) skb
->data
;
58 *current_tci
= veth
->h_vlan_TCI
;
60 memmove(skb
->data
+ VLAN_HLEN
, skb
->data
, 2 * ETH_ALEN
);
62 eh
= (struct ethhdr
*)__skb_pull(skb
, VLAN_HLEN
);
64 skb
->protocol
= eh
->h_proto
;
65 skb
->mac_header
+= VLAN_HLEN
;
70 static int pop_vlan(struct sk_buff
*skb
)
75 if (likely(vlan_tx_tag_present(skb
))) {
78 if (unlikely(skb
->protocol
!= htons(ETH_P_8021Q
) ||
79 skb
->len
< VLAN_ETH_HLEN
))
82 err
= __pop_vlan_tci(skb
, &tci
);
86 /* move next vlan tag to hw accel tag */
87 if (likely(skb
->protocol
!= htons(ETH_P_8021Q
) ||
88 skb
->len
< VLAN_ETH_HLEN
))
91 err
= __pop_vlan_tci(skb
, &tci
);
95 __vlan_hwaccel_put_tag(skb
, ntohs(tci
));
99 static int push_vlan(struct sk_buff
*skb
, const struct ovs_key_8021q
*q_key
)
101 if (unlikely(vlan_tx_tag_present(skb
))) {
104 /* push down current VLAN tag */
105 current_tag
= vlan_tx_tag_get(skb
);
107 if (!__vlan_put_tag(skb
, current_tag
))
110 if (get_ip_summed(skb
) == OVS_CSUM_COMPLETE
)
111 skb
->csum
= csum_add(skb
->csum
, csum_partial(skb
->data
112 + ETH_HLEN
, VLAN_HLEN
, 0));
115 __vlan_hwaccel_put_tag(skb
, ntohs(q_key
->q_tci
));
119 static int set_eth_addr(struct sk_buff
*skb
,
120 const struct ovs_key_ethernet
*eth_key
)
123 err
= make_writable(skb
, ETH_HLEN
);
127 memcpy(eth_hdr(skb
)->h_source
, eth_key
->eth_src
, ETH_HLEN
);
128 memcpy(eth_hdr(skb
)->h_dest
, eth_key
->eth_dst
, ETH_HLEN
);
133 static void set_ip_addr(struct sk_buff
*skb
, struct iphdr
*nh
,
134 __be32
*addr
, __be32 new_addr
)
136 int transport_len
= skb
->len
- skb_transport_offset(skb
);
138 if (nh
->protocol
== IPPROTO_TCP
) {
139 if (likely(transport_len
>= sizeof(struct tcphdr
)))
140 inet_proto_csum_replace4(&tcp_hdr(skb
)->check
, skb
,
142 } else if (nh
->protocol
== IPPROTO_UDP
) {
143 if (likely(transport_len
>= sizeof(struct udphdr
)))
144 inet_proto_csum_replace4(&udp_hdr(skb
)->check
, skb
,
148 csum_replace4(&nh
->check
, *addr
, new_addr
);
149 skb_clear_rxhash(skb
);
153 static void set_ip_ttl(struct sk_buff
*skb
, struct iphdr
*nh
, u8 new_ttl
)
155 csum_replace2(&nh
->check
, htons(nh
->ttl
<< 8), htons(new_ttl
<< 8));
159 static int set_ipv4(struct sk_buff
*skb
, const struct ovs_key_ipv4
*ipv4_key
)
164 err
= make_writable(skb
, skb_network_offset(skb
) +
165 sizeof(struct iphdr
));
171 if (ipv4_key
->ipv4_src
!= nh
->saddr
)
172 set_ip_addr(skb
, nh
, &nh
->saddr
, ipv4_key
->ipv4_src
);
174 if (ipv4_key
->ipv4_dst
!= nh
->daddr
)
175 set_ip_addr(skb
, nh
, &nh
->daddr
, ipv4_key
->ipv4_dst
);
177 if (ipv4_key
->ipv4_tos
!= nh
->tos
)
178 ipv4_change_dsfield(nh
, 0, ipv4_key
->ipv4_tos
);
180 if (ipv4_key
->ipv4_ttl
!= nh
->ttl
)
181 set_ip_ttl(skb
, nh
, ipv4_key
->ipv4_ttl
);
186 /* Must follow make_writable() since that can move the skb data. */
187 static void set_tp_port(struct sk_buff
*skb
, __be16
*port
,
188 __be16 new_port
, __sum16
*check
)
190 inet_proto_csum_replace2(check
, skb
, *port
, new_port
, 0);
192 skb_clear_rxhash(skb
);
195 static int set_udp_port(struct sk_buff
*skb
,
196 const struct ovs_key_udp
*udp_port_key
)
201 err
= make_writable(skb
, skb_transport_offset(skb
) +
202 sizeof(struct udphdr
));
207 if (udp_port_key
->udp_src
!= uh
->source
)
208 set_tp_port(skb
, &uh
->source
, udp_port_key
->udp_src
, &uh
->check
);
210 if (udp_port_key
->udp_dst
!= uh
->dest
)
211 set_tp_port(skb
, &uh
->dest
, udp_port_key
->udp_dst
, &uh
->check
);
216 static int set_tcp_port(struct sk_buff
*skb
,
217 const struct ovs_key_tcp
*tcp_port_key
)
222 err
= make_writable(skb
, skb_transport_offset(skb
) +
223 sizeof(struct tcphdr
));
228 if (tcp_port_key
->tcp_src
!= th
->source
)
229 set_tp_port(skb
, &th
->source
, tcp_port_key
->tcp_src
, &th
->check
);
231 if (tcp_port_key
->tcp_dst
!= th
->dest
)
232 set_tp_port(skb
, &th
->dest
, tcp_port_key
->tcp_dst
, &th
->check
);
237 static int do_output(struct datapath
*dp
, struct sk_buff
*skb
, int out_port
)
244 vport
= rcu_dereference(dp
->ports
[out_port
]);
245 if (unlikely(!vport
)) {
250 vport_send(vport
, skb
);
254 static int output_userspace(struct datapath
*dp
, struct sk_buff
*skb
,
255 const struct nlattr
*attr
)
257 struct dp_upcall_info upcall
;
258 const struct nlattr
*a
;
261 upcall
.cmd
= OVS_PACKET_CMD_ACTION
;
262 upcall
.key
= &OVS_CB(skb
)->flow
->key
;
263 upcall
.userdata
= NULL
;
266 for (a
= nla_data(attr
), rem
= nla_len(attr
); rem
> 0;
267 a
= nla_next(a
, &rem
)) {
268 switch (nla_type(a
)) {
269 case OVS_USERSPACE_ATTR_USERDATA
:
273 case OVS_USERSPACE_ATTR_PID
:
274 upcall
.pid
= nla_get_u32(a
);
279 return dp_upcall(dp
, skb
, &upcall
);
282 static int sample(struct datapath
*dp
, struct sk_buff
*skb
,
283 const struct nlattr
*attr
)
285 const struct nlattr
*acts_list
= NULL
;
286 const struct nlattr
*a
;
289 for (a
= nla_data(attr
), rem
= nla_len(attr
); rem
> 0;
290 a
= nla_next(a
, &rem
)) {
291 switch (nla_type(a
)) {
292 case OVS_SAMPLE_ATTR_PROBABILITY
:
293 if (net_random() >= nla_get_u32(a
))
297 case OVS_SAMPLE_ATTR_ACTIONS
:
303 return do_execute_actions(dp
, skb
, nla_data(acts_list
),
304 nla_len(acts_list
), true);
307 static int execute_set_action(struct sk_buff
*skb
,
308 const struct nlattr
*nested_attr
)
312 switch (nla_type(nested_attr
)) {
313 case OVS_KEY_ATTR_PRIORITY
:
314 skb
->priority
= nla_get_u32(nested_attr
);
317 case OVS_KEY_ATTR_TUN_ID
:
318 OVS_CB(skb
)->tun_id
= nla_get_be64(nested_attr
);
321 case OVS_KEY_ATTR_ETHERNET
:
322 err
= set_eth_addr(skb
, nla_data(nested_attr
));
325 case OVS_KEY_ATTR_IPV4
:
326 err
= set_ipv4(skb
, nla_data(nested_attr
));
329 case OVS_KEY_ATTR_TCP
:
330 err
= set_tcp_port(skb
, nla_data(nested_attr
));
333 case OVS_KEY_ATTR_UDP
:
334 err
= set_udp_port(skb
, nla_data(nested_attr
));
341 /* Execute a list of actions against 'skb'. */
342 static int do_execute_actions(struct datapath
*dp
, struct sk_buff
*skb
,
343 const struct nlattr
*attr
, int len
, bool keep_skb
)
345 /* Every output action needs a separate clone of 'skb', but the common
346 * case is just a single output action, so that doing a clone and
347 * then freeing the original skbuff is wasteful. So the following code
348 * is slightly obscure just to avoid that. */
350 const struct nlattr
*a
;
353 for (a
= attr
, rem
= len
; rem
> 0;
354 a
= nla_next(a
, &rem
)) {
357 if (prev_port
!= -1) {
358 do_output(dp
, skb_clone(skb
, GFP_ATOMIC
), prev_port
);
362 switch (nla_type(a
)) {
363 case OVS_ACTION_ATTR_OUTPUT
:
364 prev_port
= nla_get_u32(a
);
367 case OVS_ACTION_ATTR_USERSPACE
:
368 output_userspace(dp
, skb
, a
);
371 case OVS_ACTION_ATTR_PUSH
:
372 /* Only supported push action is on vlan tag. */
373 err
= push_vlan(skb
, nla_data(nla_data(a
)));
374 if (unlikely(err
)) /* skb already freed. */
378 case OVS_ACTION_ATTR_POP
:
379 /* Only supported pop action is on vlan tag. */
383 case OVS_ACTION_ATTR_SET
:
384 err
= execute_set_action(skb
, nla_data(a
));
387 case OVS_ACTION_ATTR_SAMPLE
:
388 err
= sample(dp
, skb
, a
);
398 if (prev_port
!= -1) {
400 skb
= skb_clone(skb
, GFP_ATOMIC
);
402 do_output(dp
, skb
, prev_port
);
403 } else if (!keep_skb
)
409 /* We limit the number of times that we pass into execute_actions()
410 * to avoid blowing out the stack in the event that we have a loop. */
413 struct loop_counter
{
414 u8 count
; /* Count. */
415 bool looping
; /* Loop detected? */
418 static DEFINE_PER_CPU(struct loop_counter
, loop_counters
);
420 static int loop_suppress(struct datapath
*dp
, struct sw_flow_actions
*actions
)
423 pr_warn("%s: flow looped %d times, dropping\n",
424 dp_name(dp
), MAX_LOOPS
);
425 actions
->actions_len
= 0;
429 /* Execute a list of actions against 'skb'. */
430 int execute_actions(struct datapath
*dp
, struct sk_buff
*skb
)
432 struct sw_flow_actions
*acts
= rcu_dereference(OVS_CB(skb
)->flow
->sf_acts
);
433 struct loop_counter
*loop
;
436 /* Check whether we've looped too much. */
437 loop
= &__get_cpu_var(loop_counters
);
438 if (unlikely(++loop
->count
> MAX_LOOPS
))
439 loop
->looping
= true;
440 if (unlikely(loop
->looping
)) {
441 error
= loop_suppress(dp
, acts
);
446 OVS_CB(skb
)->tun_id
= 0;
447 error
= do_execute_actions(dp
, skb
, acts
->actions
,
448 acts
->actions_len
, false);
450 /* Check whether sub-actions looped too much. */
451 if (unlikely(loop
->looping
))
452 error
= loop_suppress(dp
, acts
);
455 /* Decrement loop counter. */
457 loop
->looping
= false;