2 * Distributed under the terms of the GNU GPL version 2.
3 * Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira Networks.
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
9 /* Functions for executing flow actions. */
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 #include <linux/skbuff.h>
16 #include <linux/tcp.h>
17 #include <linux/udp.h>
18 #include <linux/in6.h>
19 #include <linux/if_arp.h>
20 #include <linux/if_vlan.h>
21 #include <net/inet_ecn.h>
23 #include <net/checksum.h>
28 #include "openvswitch/datapath-protocol.h"
32 static int do_execute_actions(struct datapath
*dp
, struct sk_buff
*skb
,
33 const struct nlattr
*attr
, int len
, bool keep_skb
);
35 static int make_writable(struct sk_buff
*skb
, int write_len
)
37 if (!skb_cloned(skb
) || skb_clone_writable(skb
, write_len
))
40 return pskb_expand_head(skb
, 0, 0, GFP_ATOMIC
);
43 /* remove VLAN header from packet and update csum accrodingly. */
44 static int __pop_vlan_tci(struct sk_buff
*skb
, __be16
*current_tci
)
47 struct vlan_ethhdr
*veth
;
50 err
= make_writable(skb
, VLAN_ETH_HLEN
);
54 if (get_ip_summed(skb
) == OVS_CSUM_COMPLETE
)
55 skb
->csum
= csum_sub(skb
->csum
, csum_partial(skb
->data
56 + ETH_HLEN
, VLAN_HLEN
, 0));
58 veth
= (struct vlan_ethhdr
*) skb
->data
;
59 *current_tci
= veth
->h_vlan_TCI
;
61 memmove(skb
->data
+ VLAN_HLEN
, skb
->data
, 2 * ETH_ALEN
);
63 eh
= (struct ethhdr
*)__skb_pull(skb
, VLAN_HLEN
);
65 skb
->protocol
= eh
->h_proto
;
66 skb
->mac_header
+= VLAN_HLEN
;
71 static int pop_vlan(struct sk_buff
*skb
)
76 if (likely(vlan_tx_tag_present(skb
))) {
79 if (unlikely(skb
->protocol
!= htons(ETH_P_8021Q
) ||
80 skb
->len
< VLAN_ETH_HLEN
))
83 err
= __pop_vlan_tci(skb
, &tci
);
87 /* move next vlan tag to hw accel tag */
88 if (likely(skb
->protocol
!= htons(ETH_P_8021Q
) ||
89 skb
->len
< VLAN_ETH_HLEN
))
92 err
= __pop_vlan_tci(skb
, &tci
);
96 __vlan_hwaccel_put_tag(skb
, ntohs(tci
));
100 static int push_vlan(struct sk_buff
*skb
, __be16 new_tci
)
102 if (unlikely(vlan_tx_tag_present(skb
))) {
105 /* push down current VLAN tag */
106 current_tag
= vlan_tx_tag_get(skb
);
108 if (!__vlan_put_tag(skb
, current_tag
))
111 if (get_ip_summed(skb
) == OVS_CSUM_COMPLETE
)
112 skb
->csum
= csum_add(skb
->csum
, csum_partial(skb
->data
113 + ETH_HLEN
, VLAN_HLEN
, 0));
116 __vlan_hwaccel_put_tag(skb
, ntohs(new_tci
));
120 static bool is_ip(struct sk_buff
*skb
)
122 return (OVS_CB(skb
)->flow
->key
.eth
.type
== htons(ETH_P_IP
) &&
123 skb
->transport_header
> skb
->network_header
);
126 static __sum16
*get_l4_checksum(struct sk_buff
*skb
)
128 u8 nw_proto
= OVS_CB(skb
)->flow
->key
.ip
.proto
;
129 int transport_len
= skb
->len
- skb_transport_offset(skb
);
130 if (nw_proto
== IPPROTO_TCP
) {
131 if (likely(transport_len
>= sizeof(struct tcphdr
)))
132 return &tcp_hdr(skb
)->check
;
133 } else if (nw_proto
== IPPROTO_UDP
) {
134 if (likely(transport_len
>= sizeof(struct udphdr
)))
135 return &udp_hdr(skb
)->check
;
140 static int set_nw_addr(struct sk_buff
*skb
, const struct nlattr
*a
)
142 __be32 new_nwaddr
= nla_get_be32(a
);
148 if (unlikely(!is_ip(skb
)))
151 err
= make_writable(skb
, skb_network_offset(skb
) +
152 sizeof(struct iphdr
));
157 nwaddr
= nla_type(a
) == OVS_ACTION_ATTR_SET_NW_SRC
? &nh
->saddr
: &nh
->daddr
;
159 check
= get_l4_checksum(skb
);
161 inet_proto_csum_replace4(check
, skb
, *nwaddr
, new_nwaddr
, 1);
162 csum_replace4(&nh
->check
, *nwaddr
, new_nwaddr
);
164 skb_clear_rxhash(skb
);
166 *nwaddr
= new_nwaddr
;
171 static int set_nw_tos(struct sk_buff
*skb
, u8 nw_tos
)
173 struct iphdr
*nh
= ip_hdr(skb
);
177 if (unlikely(!is_ip(skb
)))
180 err
= make_writable(skb
, skb_network_offset(skb
) +
181 sizeof(struct iphdr
));
185 /* Set the DSCP bits and preserve the ECN bits. */
187 new = nw_tos
| (nh
->tos
& INET_ECN_MASK
);
188 csum_replace4(&nh
->check
, (__force __be32
)old
,
189 (__force __be32
)new);
195 static int set_tp_port(struct sk_buff
*skb
, const struct nlattr
*a
)
202 if (unlikely(!is_ip(skb
)))
205 err
= make_writable(skb
, skb_transport_offset(skb
) +
206 sizeof(struct tcphdr
));
210 /* Must follow make_writable() since that can move the skb data. */
211 check
= get_l4_checksum(skb
);
212 if (unlikely(!check
))
216 * Update port and checksum.
218 * This is OK because source and destination port numbers are at the
219 * same offsets in both UDP and TCP headers, and get_l4_checksum() only
220 * supports those protocols.
223 port
= nla_type(a
) == OVS_ACTION_ATTR_SET_TP_SRC
? &th
->source
: &th
->dest
;
224 inet_proto_csum_replace2(check
, skb
, *port
, nla_get_be16(a
), 0);
225 *port
= nla_get_be16(a
);
226 skb_clear_rxhash(skb
);
231 static int do_output(struct datapath
*dp
, struct sk_buff
*skb
, int out_port
)
238 vport
= rcu_dereference(dp
->ports
[out_port
]);
239 if (unlikely(!vport
)) {
244 vport_send(vport
, skb
);
248 static int output_userspace(struct datapath
*dp
, struct sk_buff
*skb
,
249 const struct nlattr
*attr
)
251 struct dp_upcall_info upcall
;
252 const struct nlattr
*a
;
255 upcall
.cmd
= OVS_PACKET_CMD_ACTION
;
256 upcall
.key
= &OVS_CB(skb
)->flow
->key
;
257 upcall
.userdata
= NULL
;
260 for (a
= nla_data(attr
), rem
= nla_len(attr
); rem
> 0;
261 a
= nla_next(a
, &rem
)) {
262 switch (nla_type(a
)) {
263 case OVS_USERSPACE_ATTR_USERDATA
:
267 case OVS_USERSPACE_ATTR_PID
:
268 upcall
.pid
= nla_get_u32(a
);
273 return dp_upcall(dp
, skb
, &upcall
);
276 static int sample(struct datapath
*dp
, struct sk_buff
*skb
,
277 const struct nlattr
*attr
)
279 const struct nlattr
*acts_list
= NULL
;
280 const struct nlattr
*a
;
283 for (a
= nla_data(attr
), rem
= nla_len(attr
); rem
> 0;
284 a
= nla_next(a
, &rem
)) {
285 switch (nla_type(a
)) {
286 case OVS_SAMPLE_ATTR_PROBABILITY
:
287 if (net_random() >= nla_get_u32(a
))
291 case OVS_SAMPLE_ATTR_ACTIONS
:
297 return do_execute_actions(dp
, skb
, nla_data(acts_list
),
298 nla_len(acts_list
), true);
301 /* Execute a list of actions against 'skb'. */
302 static int do_execute_actions(struct datapath
*dp
, struct sk_buff
*skb
,
303 const struct nlattr
*attr
, int len
, bool keep_skb
)
305 /* Every output action needs a separate clone of 'skb', but the common
306 * case is just a single output action, so that doing a clone and
307 * then freeing the original skbuff is wasteful. So the following code
308 * is slightly obscure just to avoid that. */
310 u32 priority
= skb
->priority
;
311 const struct nlattr
*a
;
314 for (a
= attr
, rem
= len
; rem
> 0;
315 a
= nla_next(a
, &rem
)) {
318 if (prev_port
!= -1) {
319 do_output(dp
, skb_clone(skb
, GFP_ATOMIC
), prev_port
);
323 switch (nla_type(a
)) {
324 case OVS_ACTION_ATTR_OUTPUT
:
325 prev_port
= nla_get_u32(a
);
328 case OVS_ACTION_ATTR_USERSPACE
:
329 output_userspace(dp
, skb
, a
);
332 case OVS_ACTION_ATTR_SET_TUNNEL
:
333 OVS_CB(skb
)->tun_id
= nla_get_be64(a
);
336 case OVS_ACTION_ATTR_PUSH_VLAN
:
337 err
= push_vlan(skb
, nla_get_be16(a
));
338 if (unlikely(err
)) /* skb already freed */
342 case OVS_ACTION_ATTR_POP_VLAN
:
346 case OVS_ACTION_ATTR_SET_DL_SRC
:
347 err
= make_writable(skb
, ETH_HLEN
);
349 memcpy(eth_hdr(skb
)->h_source
, nla_data(a
), ETH_ALEN
);
352 case OVS_ACTION_ATTR_SET_DL_DST
:
353 err
= make_writable(skb
, ETH_HLEN
);
355 memcpy(eth_hdr(skb
)->h_dest
, nla_data(a
), ETH_ALEN
);
358 case OVS_ACTION_ATTR_SET_NW_SRC
:
359 case OVS_ACTION_ATTR_SET_NW_DST
:
360 err
= set_nw_addr(skb
, a
);
363 case OVS_ACTION_ATTR_SET_NW_TOS
:
364 err
= set_nw_tos(skb
, nla_get_u8(a
));
367 case OVS_ACTION_ATTR_SET_TP_SRC
:
368 case OVS_ACTION_ATTR_SET_TP_DST
:
369 err
= set_tp_port(skb
, a
);
372 case OVS_ACTION_ATTR_SET_PRIORITY
:
373 skb
->priority
= nla_get_u32(a
);
376 case OVS_ACTION_ATTR_POP_PRIORITY
:
377 skb
->priority
= priority
;
380 case OVS_ACTION_ATTR_SAMPLE
:
381 err
= sample(dp
, skb
, a
);
391 if (prev_port
!= -1) {
393 skb
= skb_clone(skb
, GFP_ATOMIC
);
395 do_output(dp
, skb
, prev_port
);
396 } else if (!keep_skb
)
402 /* We limit the number of times that we pass into execute_actions()
403 * to avoid blowing out the stack in the event that we have a loop. */
406 struct loop_counter
{
407 u8 count
; /* Count. */
408 bool looping
; /* Loop detected? */
411 static DEFINE_PER_CPU(struct loop_counter
, loop_counters
);
413 static int loop_suppress(struct datapath
*dp
, struct sw_flow_actions
*actions
)
416 pr_warn("%s: flow looped %d times, dropping\n",
417 dp_name(dp
), MAX_LOOPS
);
418 actions
->actions_len
= 0;
422 /* Execute a list of actions against 'skb'. */
423 int execute_actions(struct datapath
*dp
, struct sk_buff
*skb
)
425 struct sw_flow_actions
*acts
= rcu_dereference(OVS_CB(skb
)->flow
->sf_acts
);
426 struct loop_counter
*loop
;
429 /* Check whether we've looped too much. */
430 loop
= &__get_cpu_var(loop_counters
);
431 if (unlikely(++loop
->count
> MAX_LOOPS
))
432 loop
->looping
= true;
433 if (unlikely(loop
->looping
)) {
434 error
= loop_suppress(dp
, acts
);
439 OVS_CB(skb
)->tun_id
= 0;
440 error
= do_execute_actions(dp
, skb
, acts
->actions
,
441 acts
->actions_len
, false);
443 /* Check whether sub-actions looped too much. */
444 if (unlikely(loop
->looping
))
445 error
= loop_suppress(dp
, acts
);
448 /* Decrement loop counter. */
450 loop
->looping
= false;