]>
git.proxmox.com Git - mirror_ovs.git/blob - datapath/actions.c
2 * Distributed under the terms of the GNU GPL version 2.
3 * Copyright (c) 2007, 2008, 2009 Nicira Networks.
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
9 /* Functions for executing flow actions. */
11 #include <linux/skbuff.h>
14 #include <linux/tcp.h>
15 #include <linux/udp.h>
16 #include <linux/in6.h>
17 #include <linux/if_vlan.h>
19 #include <net/checksum.h>
23 #include "openvswitch/datapath-protocol.h"
25 static struct sk_buff
*
26 make_writable(struct sk_buff
*skb
, unsigned min_headroom
, gfp_t gfp
)
28 if (skb_shared(skb
) || skb_cloned(skb
)) {
30 unsigned headroom
= max(min_headroom
, skb_headroom(skb
));
32 nskb
= skb_copy_expand(skb
, headroom
, skb_tailroom(skb
), gfp
);
34 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
35 /* Before 2.6.24 these fields were not copied when
36 * doing an skb_copy_expand. */
37 nskb
->ip_summed
= skb
->ip_summed
;
38 nskb
->csum
= skb
->csum
;
40 #if defined(CONFIG_XEN) && LINUX_VERSION_CODE == KERNEL_VERSION(2,6,18)
41 /* These fields are copied in skb_clone but not in
42 * skb_copy or related functions. We need to manually
43 * copy them over here. */
44 nskb
->proto_data_valid
= skb
->proto_data_valid
;
45 nskb
->proto_csum_blank
= skb
->proto_csum_blank
;
51 unsigned int hdr_len
= (skb_transport_offset(skb
)
52 + sizeof(struct tcphdr
));
53 if (pskb_may_pull(skb
, min(hdr_len
, skb
->len
)))
61 static struct sk_buff
*
62 vlan_pull_tag(struct sk_buff
*skb
)
64 struct vlan_ethhdr
*vh
= vlan_eth_hdr(skb
);
68 /* Verify we were given a vlan packet */
69 if (vh
->h_vlan_proto
!= htons(ETH_P_8021Q
))
72 memmove(skb
->data
+ VLAN_HLEN
, skb
->data
, 2 * VLAN_ETH_ALEN
);
74 eh
= (struct ethhdr
*)skb_pull(skb
, VLAN_HLEN
);
76 skb
->protocol
= eh
->h_proto
;
77 skb
->mac_header
+= VLAN_HLEN
;
83 static struct sk_buff
*
84 modify_vlan_tci(struct datapath
*dp
, struct sk_buff
*skb
,
85 struct odp_flow_key
*key
, const union odp_action
*a
,
86 int n_actions
, gfp_t gfp
)
90 if (a
->type
== ODPAT_SET_VLAN_VID
) {
91 tci
= ntohs(a
->vlan_vid
.vlan_vid
);
93 key
->dl_vlan
= htons(tci
& mask
);
95 tci
= a
->vlan_pcp
.vlan_pcp
<< 13;
99 skb
= make_writable(skb
, VLAN_HLEN
, gfp
);
101 return ERR_PTR(-ENOMEM
);
103 if (skb
->protocol
== htons(ETH_P_8021Q
)) {
104 /* Modify vlan id, but maintain other TCI values */
105 struct vlan_ethhdr
*vh
= vlan_eth_hdr(skb
);
106 vh
->h_vlan_TCI
= htons((ntohs(vh
->h_vlan_TCI
) & ~mask
) | tci
);
108 /* Add vlan header */
110 /* Set up checksumming pointers for checksum-deferred packets
111 * on Xen. Otherwise, dev_queue_xmit() will try to do this
112 * when we send the packet out on the wire, and it will fail at
113 * that point because skb_checksum_setup() will not look inside
114 * an 802.1Q header. */
115 vswitch_skb_checksum_setup(skb
);
117 /* GSO is not implemented for packets with an 802.1Q header, so
118 * we have to do segmentation before we add that header.
120 * GSO does work with hardware-accelerated VLAN tagging, but we
121 * can't use hardware-accelerated VLAN tagging since it
122 * requires the device to have a VLAN group configured (with
123 * e.g. vconfig(8)) and we don't do that.
125 * Having to do this here may be a performance loss, since we
126 * can't take advantage of TSO hardware support, although it
127 * does not make a measurable network performance difference
128 * for 1G Ethernet. Fixing that would require patching the
129 * kernel (either to add GSO support to the VLAN protocol or to
130 * support hardware-accelerated VLAN tagging without VLAN
131 * groups configured). */
132 if (skb_is_gso(skb
)) {
133 struct sk_buff
*segs
;
135 segs
= skb_gso_segment(skb
, 0);
137 if (unlikely(IS_ERR(segs
)))
138 return ERR_CAST(segs
);
141 struct sk_buff
*nskb
= segs
->next
;
146 segs
= __vlan_put_tag(segs
, tci
);
149 struct odp_flow_key segkey
= *key
;
150 err
= execute_actions(dp
, segs
,
157 while ((segs
= nskb
)) {
166 } while (segs
->next
);
171 /* The hardware-accelerated version of vlan_put_tag() works
172 * only for a device that has a VLAN group configured (with
173 * e.g. vconfig(8)), so call the software-only version
174 * __vlan_put_tag() directly instead.
176 skb
= __vlan_put_tag(skb
, tci
);
178 return ERR_PTR(-ENOMEM
);
184 static struct sk_buff
*strip_vlan(struct sk_buff
*skb
,
185 struct odp_flow_key
*key
, gfp_t gfp
)
187 skb
= make_writable(skb
, 0, gfp
);
190 key
->dl_vlan
= htons(ODP_VLAN_NONE
);
195 static struct sk_buff
*set_dl_addr(struct sk_buff
*skb
,
196 const struct odp_action_dl_addr
*a
,
199 skb
= make_writable(skb
, 0, gfp
);
201 struct ethhdr
*eh
= eth_hdr(skb
);
202 memcpy(a
->type
== ODPAT_SET_DL_SRC
? eh
->h_source
: eh
->h_dest
,
203 a
->dl_addr
, ETH_ALEN
);
208 /* Updates 'sum', which is a field in 'skb''s data, given that a 4-byte field
209 * covered by the sum has been changed from 'from' to 'to'. If set,
210 * 'pseudohdr' indicates that the field is in the TCP or UDP pseudo-header.
211 * Based on nf_proto_csum_replace4. */
212 static void update_csum(__sum16
*sum
, struct sk_buff
*skb
,
213 __be32 from
, __be32 to
, int pseudohdr
)
215 __be32 diff
[] = { ~from
, to
};
216 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
) {
217 *sum
= csum_fold(csum_partial((char *)diff
, sizeof(diff
),
218 ~csum_unfold(*sum
)));
219 if (skb
->ip_summed
== CHECKSUM_COMPLETE
&& pseudohdr
)
220 skb
->csum
= ~csum_partial((char *)diff
, sizeof(diff
),
222 } else if (pseudohdr
)
223 *sum
= ~csum_fold(csum_partial((char *)diff
, sizeof(diff
),
227 static struct sk_buff
*set_nw_addr(struct sk_buff
*skb
,
228 struct odp_flow_key
*key
,
229 const struct odp_action_nw_addr
*a
,
232 if (key
->dl_type
!= htons(ETH_P_IP
))
235 skb
= make_writable(skb
, 0, gfp
);
237 struct iphdr
*nh
= ip_hdr(skb
);
238 u32
*f
= a
->type
== ODPAT_SET_NW_SRC
? &nh
->saddr
: &nh
->daddr
;
240 u32
new = a
->nw_addr
;
242 if (key
->nw_proto
== IPPROTO_TCP
) {
243 struct tcphdr
*th
= tcp_hdr(skb
);
244 update_csum(&th
->check
, skb
, old
, new, 1);
245 } else if (key
->nw_proto
== IPPROTO_UDP
) {
246 struct udphdr
*th
= udp_hdr(skb
);
247 update_csum(&th
->check
, skb
, old
, new, 1);
249 update_csum(&nh
->check
, skb
, old
, new, 0);
255 static struct sk_buff
*
256 set_tp_port(struct sk_buff
*skb
, struct odp_flow_key
*key
,
257 const struct odp_action_tp_port
*a
,
262 if (key
->dl_type
!= htons(ETH_P_IP
))
265 if (key
->nw_proto
== IPPROTO_TCP
)
266 check_ofs
= offsetof(struct tcphdr
, check
);
267 else if (key
->nw_proto
== IPPROTO_UDP
)
268 check_ofs
= offsetof(struct udphdr
, check
);
272 skb
= make_writable(skb
, 0, gfp
);
274 struct udphdr
*th
= udp_hdr(skb
);
275 u16
*f
= a
->type
== ODPAT_SET_TP_SRC
? &th
->source
: &th
->dest
;
277 u16
new = a
->tp_port
;
278 update_csum((u16
*)(skb_transport_header(skb
) + check_ofs
),
285 static inline unsigned packet_length(const struct sk_buff
*skb
)
287 unsigned length
= skb
->len
- ETH_HLEN
;
288 if (skb
->protocol
== htons(ETH_P_8021Q
))
293 int dp_xmit_skb(struct sk_buff
*skb
)
295 struct datapath
*dp
= skb
->dev
->br_port
->dp
;
298 if (packet_length(skb
) > skb
->dev
->mtu
&& !skb_is_gso(skb
)) {
299 printk(KERN_WARNING
"%s: dropped over-mtu packet: %d > %d\n",
300 dp_name(dp
), packet_length(skb
), skb
->dev
->mtu
);
311 do_output(struct datapath
*dp
, struct sk_buff
*skb
, int out_port
)
313 struct net_bridge_port
*p
;
314 struct net_device
*dev
;
319 p
= dp
->ports
[out_port
];
323 dev
= skb
->dev
= p
->dev
;
325 dp_dev_recv(dev
, skb
);
334 /* Never consumes 'skb'. Returns a port that 'skb' should be sent to, -1 if
336 static int output_group(struct datapath
*dp
, __u16 group
,
337 struct sk_buff
*skb
, gfp_t gfp
)
339 struct dp_port_group
*g
= rcu_dereference(dp
->groups
[group
]);
345 for (i
= 0; i
< g
->n_ports
; i
++) {
346 struct net_bridge_port
*p
= dp
->ports
[g
->ports
[i
]];
347 if (!p
|| skb
->dev
== p
->dev
)
349 if (prev_port
!= -1) {
350 struct sk_buff
*clone
= skb_clone(skb
, gfp
);
353 do_output(dp
, clone
, prev_port
);
355 prev_port
= p
->port_no
;
361 output_control(struct datapath
*dp
, struct sk_buff
*skb
, u32 arg
, gfp_t gfp
)
363 skb
= skb_clone(skb
, gfp
);
366 return dp_output_control(dp
, skb
, _ODPL_ACTION_NR
, arg
);
369 /* Execute a list of actions against 'skb'. */
370 int execute_actions(struct datapath
*dp
, struct sk_buff
*skb
,
371 struct odp_flow_key
*key
,
372 const union odp_action
*a
, int n_actions
,
375 /* Every output action needs a separate clone of 'skb', but the common
376 * case is just a single output action, so that doing a clone and
377 * then freeing the original skbuff is wasteful. So the following code
378 * is slightly obscure just to avoid that. */
381 for (; n_actions
> 0; a
++, n_actions
--) {
382 WARN_ON_ONCE(skb_shared(skb
));
383 if (prev_port
!= -1) {
384 do_output(dp
, skb_clone(skb
, gfp
), prev_port
);
390 prev_port
= a
->output
.port
;
393 case ODPAT_OUTPUT_GROUP
:
394 prev_port
= output_group(dp
, a
->output_group
.group
,
398 case ODPAT_CONTROLLER
:
399 err
= output_control(dp
, skb
, a
->controller
.arg
, gfp
);
406 case ODPAT_SET_VLAN_VID
:
407 case ODPAT_SET_VLAN_PCP
:
408 skb
= modify_vlan_tci(dp
, skb
, key
, a
, n_actions
, gfp
);
413 case ODPAT_STRIP_VLAN
:
414 skb
= strip_vlan(skb
, key
, gfp
);
417 case ODPAT_SET_DL_SRC
:
418 case ODPAT_SET_DL_DST
:
419 skb
= set_dl_addr(skb
, &a
->dl_addr
, gfp
);
422 case ODPAT_SET_NW_SRC
:
423 case ODPAT_SET_NW_DST
:
424 skb
= set_nw_addr(skb
, key
, &a
->nw_addr
, gfp
);
427 case ODPAT_SET_TP_SRC
:
428 case ODPAT_SET_TP_DST
:
429 skb
= set_tp_port(skb
, key
, &a
->tp_port
, gfp
);
436 do_output(dp
, skb
, prev_port
);