2 * Copyright (c) 2010 Nicira Networks.
3 * Distributed under the terms of the GNU GPL version 2.
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
9 #include <linux/if_arp.h>
10 #include <linux/if_ether.h>
12 #include <linux/if_tunnel.h>
13 #include <linux/if_vlan.h>
15 #include <linux/in_route.h>
16 #include <linux/jhash.h>
17 #include <linux/kernel.h>
18 #include <linux/version.h>
20 #include <net/dsfield.h>
23 #include <net/inet_ecn.h>
25 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
28 #include <net/protocol.h>
29 #include <net/route.h>
34 #include "openvswitch/gre.h"
37 #include "vport-generic.h"
39 /* The absolute minimum fragment size. Note that there are many other
40 * definitions of the minimum MTU. */
43 /* The GRE header is composed of a series of sections: a base and then a variable
44 * number of options. */
45 #define GRE_HEADER_SECTION 4
47 struct mutable_config
{
50 unsigned char eth_addr
[ETH_ALEN
];
52 struct gre_port_config port_config
;
54 int tunnel_hlen
; /* Tunnel header length. */
58 struct tbl_node tbl_node
;
62 /* Protected by RCU. */
63 struct mutable_config
*mutable;
66 /* Protected by RCU. */
67 static struct tbl
*port_table
;
69 /* These are just used as an optimization: they don't require any kind of
70 * synchronization because we could have just as easily read the value before
71 * the port change happened. */
72 static unsigned int key_local_remote_ports
;
73 static unsigned int key_remote_ports
;
74 static unsigned int local_remote_ports
;
75 static unsigned int remote_ports
;
77 static inline struct gre_vport
*
78 gre_vport_priv(const struct vport
*vport
)
80 return vport_priv(vport
);
83 static inline struct vport
*
84 gre_vport_to_vport(const struct gre_vport
*gre_vport
)
86 return vport_from_priv(gre_vport
);
89 static inline struct gre_vport
*
90 gre_vport_table_cast(const struct tbl_node
*node
)
92 return container_of(node
, struct gre_vport
, tbl_node
);
97 free_config(struct rcu_head
*rcu
)
99 struct mutable_config
*c
= container_of(rcu
, struct mutable_config
, rcu
);
104 assign_config_rcu(struct vport
*vport
, struct mutable_config
*new_config
)
106 struct gre_vport
*gre_vport
= gre_vport_priv(vport
);
107 struct mutable_config
*old_config
;
109 old_config
= rcu_dereference(gre_vport
->mutable);
110 rcu_assign_pointer(gre_vport
->mutable, new_config
);
111 call_rcu(&old_config
->rcu
, free_config
);
114 static unsigned int *
115 find_port_pool(const struct mutable_config
*mutable)
117 if (mutable->port_config
.flags
& GRE_F_IN_KEY_MATCH
) {
118 if (mutable->port_config
.saddr
)
119 return &local_remote_ports
;
121 return &remote_ports
;
123 if (mutable->port_config
.saddr
)
124 return &key_local_remote_ports
;
126 return &key_remote_ports
;
137 struct port_lookup_key
{
138 u32 vals
[4]; /* Contains enum lookup_key keys. */
139 const struct mutable_config
*mutable;
142 /* Modifies 'target' to store the rcu_dereferenced pointer that was used to do
143 * the comparision. */
145 port_cmp(const struct tbl_node
*node
, void *target
)
147 const struct gre_vport
*gre_vport
= gre_vport_table_cast(node
);
148 struct port_lookup_key
*lookup
= target
;
150 lookup
->mutable = rcu_dereference(gre_vport
->mutable);
152 return ((lookup
->mutable->port_config
.flags
& GRE_F_IN_KEY_MATCH
) ==
153 lookup
->vals
[LOOKUP_KEY_MATCH
]) &&
154 lookup
->mutable->port_config
.daddr
== lookup
->vals
[LOOKUP_DADDR
] &&
155 lookup
->mutable->port_config
.in_key
== lookup
->vals
[LOOKUP_KEY
] &&
156 lookup
->mutable->port_config
.saddr
== lookup
->vals
[LOOKUP_SADDR
];
160 port_hash(struct port_lookup_key
*lookup
)
162 return jhash2(lookup
->vals
, ARRAY_SIZE(lookup
->vals
), 0);
166 add_port(struct vport
*vport
)
168 struct gre_vport
*gre_vport
= gre_vport_priv(vport
);
169 struct port_lookup_key lookup
;
173 struct tbl
*new_table
;
175 new_table
= tbl_create(0);
179 rcu_assign_pointer(port_table
, new_table
);
181 } else if (tbl_count(port_table
) > tbl_n_buckets(port_table
)) {
182 struct tbl
*old_table
= port_table
;
183 struct tbl
*new_table
;
185 new_table
= tbl_expand(old_table
);
186 if (IS_ERR(new_table
))
187 return PTR_ERR(new_table
);
189 rcu_assign_pointer(port_table
, new_table
);
190 tbl_deferred_destroy(old_table
, NULL
);
193 lookup
.vals
[LOOKUP_SADDR
] = gre_vport
->mutable->port_config
.saddr
;
194 lookup
.vals
[LOOKUP_DADDR
] = gre_vport
->mutable->port_config
.daddr
;
195 lookup
.vals
[LOOKUP_KEY
] = gre_vport
->mutable->port_config
.in_key
;
196 lookup
.vals
[LOOKUP_KEY_MATCH
] = gre_vport
->mutable->port_config
.flags
& GRE_F_IN_KEY_MATCH
;
198 err
= tbl_insert(port_table
, &gre_vport
->tbl_node
, port_hash(&lookup
));
202 (*find_port_pool(gre_vport
->mutable))++;
208 del_port(struct vport
*vport
)
210 struct gre_vport
*gre_vport
= gre_vport_priv(vport
);
213 err
= tbl_remove(port_table
, &gre_vport
->tbl_node
);
217 (*find_port_pool(gre_vport
->mutable))--;
222 #define FIND_PORT_KEY (1 << 0)
223 #define FIND_PORT_MATCH (1 << 1)
224 #define FIND_PORT_ANY (FIND_PORT_KEY | FIND_PORT_MATCH)
226 static struct vport
*
227 find_port(__be32 saddr
, __be32 daddr
, __be32 key
, int port_type
,
228 const struct mutable_config
**mutable)
230 struct port_lookup_key lookup
;
231 struct tbl
*table
= rcu_dereference(port_table
);
232 struct tbl_node
*tbl_node
;
237 lookup
.vals
[LOOKUP_SADDR
] = saddr
;
238 lookup
.vals
[LOOKUP_DADDR
] = daddr
;
240 if (port_type
& FIND_PORT_KEY
) {
241 lookup
.vals
[LOOKUP_KEY
] = key
;
242 lookup
.vals
[LOOKUP_KEY_MATCH
] = 0;
244 if (key_local_remote_ports
) {
245 tbl_node
= tbl_lookup(table
, &lookup
, port_hash(&lookup
), port_cmp
);
250 if (key_remote_ports
) {
251 lookup
.vals
[LOOKUP_SADDR
] = 0;
253 tbl_node
= tbl_lookup(table
, &lookup
, port_hash(&lookup
), port_cmp
);
257 lookup
.vals
[LOOKUP_SADDR
] = saddr
;
261 if (port_type
& FIND_PORT_MATCH
) {
262 lookup
.vals
[LOOKUP_KEY
] = 0;
263 lookup
.vals
[LOOKUP_KEY_MATCH
] = GRE_F_IN_KEY_MATCH
;
265 if (local_remote_ports
) {
266 tbl_node
= tbl_lookup(table
, &lookup
, port_hash(&lookup
), port_cmp
);
272 lookup
.vals
[LOOKUP_SADDR
] = 0;
274 tbl_node
= tbl_lookup(table
, &lookup
, port_hash(&lookup
), port_cmp
);
283 *mutable = lookup
.mutable;
284 return gre_vport_to_vport(gre_vport_table_cast(tbl_node
));
288 check_ipv4_address(__be32 addr
)
290 if (ipv4_is_multicast(addr
) || ipv4_is_lbcast(addr
)
291 || ipv4_is_loopback(addr
) || ipv4_is_zeronet(addr
))
298 ipv4_should_icmp(struct sk_buff
*skb
)
300 struct iphdr
*old_iph
= ip_hdr(skb
);
302 /* Don't respond to L2 broadcast. */
303 if (is_multicast_ether_addr(eth_hdr(skb
)->h_dest
))
306 /* Don't respond to L3 broadcast or invalid addresses. */
307 if (!check_ipv4_address(old_iph
->daddr
) ||
308 !check_ipv4_address(old_iph
->saddr
))
311 /* Only respond to the first fragment. */
312 if (old_iph
->frag_off
& htons(IP_OFFSET
))
315 /* Don't respond to ICMP error messages. */
316 if (old_iph
->protocol
== IPPROTO_ICMP
) {
317 u8 icmp_type
, *icmp_typep
;
319 icmp_typep
= skb_header_pointer(skb
, (u8
*)old_iph
+
320 (old_iph
->ihl
<< 2) +
321 offsetof(struct icmphdr
, type
) -
322 skb
->data
, sizeof(icmp_type
),
328 if (*icmp_typep
> NR_ICMP_TYPES
329 || (*icmp_typep
<= ICMP_PARAMETERPROB
330 && *icmp_typep
!= ICMP_ECHOREPLY
331 && *icmp_typep
!= ICMP_ECHO
))
339 ipv4_build_icmp(struct sk_buff
*skb
, struct sk_buff
*nskb
,
340 unsigned int mtu
, unsigned int payload_length
)
342 struct iphdr
*iph
, *old_iph
= ip_hdr(skb
);
343 struct icmphdr
*icmph
;
346 iph
= (struct iphdr
*)skb_put(nskb
, sizeof(struct iphdr
));
347 icmph
= (struct icmphdr
*)skb_put(nskb
, sizeof(struct icmphdr
));
348 payload
= skb_put(nskb
, payload_length
);
352 iph
->ihl
= sizeof(struct iphdr
) >> 2;
353 iph
->tos
= (old_iph
->tos
& IPTOS_TOS_MASK
) |
354 IPTOS_PREC_INTERNETCONTROL
;
355 iph
->tot_len
= htons(sizeof(struct iphdr
)
356 + sizeof(struct icmphdr
)
358 get_random_bytes(&iph
->id
, sizeof(iph
->id
));
361 iph
->protocol
= IPPROTO_ICMP
;
362 iph
->daddr
= old_iph
->saddr
;
363 iph
->saddr
= old_iph
->daddr
;
368 icmph
->type
= ICMP_DEST_UNREACH
;
369 icmph
->code
= ICMP_FRAG_NEEDED
;
370 icmph
->un
.gateway
= htonl(mtu
);
373 nskb
->csum
= csum_partial((u8
*)icmph
, sizeof(struct icmphdr
), 0);
374 nskb
->csum
= skb_copy_and_csum_bits(skb
, (u8
*)old_iph
- skb
->data
,
375 payload
, payload_length
,
377 icmph
->checksum
= csum_fold(nskb
->csum
);
380 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
382 ipv6_should_icmp(struct sk_buff
*skb
)
384 struct ipv6hdr
*old_ipv6h
= ipv6_hdr(skb
);
386 int payload_off
= (u8
*)(old_ipv6h
+ 1) - skb
->data
;
387 u8 nexthdr
= ipv6_hdr(skb
)->nexthdr
;
389 /* Check source address is valid. */
390 addr_type
= ipv6_addr_type(&old_ipv6h
->saddr
);
391 if (addr_type
& IPV6_ADDR_MULTICAST
|| addr_type
== IPV6_ADDR_ANY
)
394 /* Don't reply to unspecified addresses. */
395 if (ipv6_addr_type(&old_ipv6h
->daddr
) == IPV6_ADDR_ANY
)
398 /* Don't respond to ICMP error messages. */
399 payload_off
= ipv6_skip_exthdr(skb
, payload_off
, &nexthdr
);
403 if (nexthdr
== NEXTHDR_ICMP
) {
404 u8 icmp_type
, *icmp_typep
;
406 icmp_typep
= skb_header_pointer(skb
, payload_off
+
407 offsetof(struct icmp6hdr
,
409 sizeof(icmp_type
), &icmp_type
);
411 if (!icmp_typep
|| !(*icmp_typep
& ICMPV6_INFOMSG_MASK
))
419 ipv6_build_icmp(struct sk_buff
*skb
, struct sk_buff
*nskb
, unsigned int mtu
,
420 unsigned int payload_length
)
422 struct ipv6hdr
*ipv6h
, *old_ipv6h
= ipv6_hdr(skb
);
423 struct icmp6hdr
*icmp6h
;
426 ipv6h
= (struct ipv6hdr
*)skb_put(nskb
, sizeof(struct ipv6hdr
));
427 icmp6h
= (struct icmp6hdr
*)skb_put(nskb
, sizeof(struct icmp6hdr
));
428 payload
= skb_put(nskb
, payload_length
);
433 memset(&ipv6h
->flow_lbl
, 0, sizeof(ipv6h
->flow_lbl
));
434 ipv6h
->payload_len
= htons(sizeof(struct icmp6hdr
)
436 ipv6h
->nexthdr
= NEXTHDR_ICMP
;
437 ipv6h
->hop_limit
= IPV6_DEFAULT_HOPLIMIT
;
438 ipv6_addr_copy(&ipv6h
->daddr
, &old_ipv6h
->saddr
);
439 ipv6_addr_copy(&ipv6h
->saddr
, &old_ipv6h
->daddr
);
442 icmp6h
->icmp6_type
= ICMPV6_PKT_TOOBIG
;
443 icmp6h
->icmp6_code
= 0;
444 icmp6h
->icmp6_cksum
= 0;
445 icmp6h
->icmp6_mtu
= htonl(mtu
);
447 nskb
->csum
= csum_partial((u8
*)icmp6h
, sizeof(struct icmp6hdr
), 0);
448 nskb
->csum
= skb_copy_and_csum_bits(skb
, (u8
*)old_ipv6h
- skb
->data
,
449 payload
, payload_length
,
451 icmp6h
->icmp6_cksum
= csum_ipv6_magic(&ipv6h
->saddr
, &ipv6h
->daddr
,
452 sizeof(struct icmp6hdr
)
454 ipv6h
->nexthdr
, nskb
->csum
);
459 send_frag_needed(struct vport
*vport
, const struct mutable_config
*mutable,
460 struct sk_buff
*skb
, unsigned int mtu
, __be32 flow_key
)
462 unsigned int eth_hdr_len
= ETH_HLEN
;
463 unsigned int total_length
= 0, header_length
= 0, payload_length
;
464 struct ethhdr
*eh
, *old_eh
= eth_hdr(skb
);
465 struct sk_buff
*nskb
;
468 if (skb
->protocol
== htons(ETH_P_IP
)) {
469 if (mtu
< IP_MIN_MTU
)
472 if (!ipv4_should_icmp(skb
))
475 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
476 else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
477 if (mtu
< IPV6_MIN_MTU
)
480 /* In theory we should do PMTUD on IPv6 multicast messages but
481 * we don't have an address to send from so just fragment. */
482 if (ipv6_addr_type(&ipv6_hdr(skb
)->daddr
) & IPV6_ADDR_MULTICAST
)
485 if (!ipv6_should_icmp(skb
))
493 if (old_eh
->h_proto
== htons(ETH_P_8021Q
))
494 eth_hdr_len
= VLAN_ETH_HLEN
;
496 payload_length
= skb
->len
- eth_hdr_len
;
497 if (skb
->protocol
== htons(ETH_P_IP
)) {
498 header_length
= sizeof(struct iphdr
) + sizeof(struct icmphdr
);
499 total_length
= min_t(unsigned int, header_length
+
500 payload_length
, 576);
502 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
504 header_length
= sizeof(struct ipv6hdr
) +
505 sizeof(struct icmp6hdr
);
506 total_length
= min_t(unsigned int, header_length
+
507 payload_length
, IPV6_MIN_MTU
);
511 total_length
= min(total_length
, mutable->mtu
);
512 payload_length
= total_length
- header_length
;
514 nskb
= dev_alloc_skb(NET_IP_ALIGN
+ eth_hdr_len
+ header_length
+
519 skb_reserve(nskb
, NET_IP_ALIGN
);
521 /* Ethernet / VLAN */
522 eh
= (struct ethhdr
*)skb_put(nskb
, eth_hdr_len
);
523 memcpy(eh
->h_dest
, old_eh
->h_source
, ETH_ALEN
);
524 memcpy(eh
->h_source
, mutable->eth_addr
, ETH_ALEN
);
525 nskb
->protocol
= eh
->h_proto
= old_eh
->h_proto
;
526 if (old_eh
->h_proto
== htons(ETH_P_8021Q
)) {
527 struct vlan_ethhdr
*vh
= (struct vlan_ethhdr
*)eh
;
529 vh
->h_vlan_TCI
= vlan_eth_hdr(skb
)->h_vlan_TCI
;
530 vh
->h_vlan_encapsulated_proto
= skb
->protocol
;
532 skb_reset_mac_header(nskb
);
535 if (skb
->protocol
== htons(ETH_P_IP
))
536 ipv4_build_icmp(skb
, nskb
, mtu
, payload_length
);
537 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
539 ipv6_build_icmp(skb
, nskb
, mtu
, payload_length
);
542 /* Assume that flow based keys are symmetric with respect to input
543 * and output and use the key that we were going to put on the
544 * outgoing packet for the fake received packet. If the keys are
545 * not symmetric then PMTUD needs to be disabled since we won't have
546 * any way of synthesizing packets. */
547 if (mutable->port_config
.flags
& GRE_F_IN_KEY_MATCH
&&
548 mutable->port_config
.flags
& GRE_F_OUT_KEY_ACTION
)
549 OVS_CB(nskb
)->tun_id
= flow_key
;
551 compute_ip_summed(nskb
, false);
552 vport_receive(vport
, nskb
);
557 static struct sk_buff
*
558 check_headroom(struct sk_buff
*skb
, int headroom
)
560 if (skb_headroom(skb
) < headroom
|| skb_header_cloned(skb
)) {
561 struct sk_buff
*nskb
= skb_realloc_headroom(skb
, max(headroom
, 64));
564 return ERR_PTR(-ENOMEM
);
567 set_skb_csum_bits(skb
, nskb
);
570 skb_set_owner_w(nskb
, skb
->sk
);
580 create_gre_header(struct sk_buff
*skb
, const struct mutable_config
*mutable)
582 struct iphdr
*iph
= ip_hdr(skb
);
583 __be16
*flags
= (__be16
*)(iph
+ 1);
584 __be16
*protocol
= flags
+ 1;
585 __be32
*options
= (__be32
*)((u8
*)iph
+ mutable->tunnel_hlen
586 - GRE_HEADER_SECTION
);
588 *protocol
= htons(ETH_P_TEB
);
591 /* Work backwards over the options so the checksum is last. */
592 if (mutable->port_config
.out_key
||
593 mutable->port_config
.flags
& GRE_F_OUT_KEY_ACTION
) {
596 if (mutable->port_config
.flags
& GRE_F_OUT_KEY_ACTION
)
597 *options
= OVS_CB(skb
)->tun_id
;
599 *options
= mutable->port_config
.out_key
;
604 if (mutable->port_config
.flags
& GRE_F_OUT_CSUM
) {
608 *(__sum16
*)options
= csum_fold(skb_checksum(skb
,
609 sizeof(struct iphdr
),
610 skb
->len
- sizeof(struct iphdr
),
616 check_checksum(struct sk_buff
*skb
)
618 struct iphdr
*iph
= ip_hdr(skb
);
619 __be16 flags
= *(__be16
*)(iph
+ 1);
622 if (flags
& GRE_CSUM
) {
623 switch (skb
->ip_summed
) {
624 case CHECKSUM_COMPLETE
:
625 csum
= csum_fold(skb
->csum
);
633 csum
= __skb_checksum_complete(skb
);
634 skb
->ip_summed
= CHECKSUM_COMPLETE
;
643 parse_gre_header(struct iphdr
*iph
, __be16
*flags
, __be32
*key
)
645 /* IP and ICMP protocol handlers check that the IHL is valid. */
646 __be16
*flagsp
= (__be16
*)((u8
*)iph
+ (iph
->ihl
<< 2));
647 __be16
*protocol
= flagsp
+ 1;
648 __be32
*options
= (__be32
*)(protocol
+ 1);
653 if (*flags
& (GRE_VERSION
| GRE_ROUTING
))
656 if (*protocol
!= htons(ETH_P_TEB
))
659 hdr_len
= GRE_HEADER_SECTION
;
661 if (*flags
& GRE_CSUM
) {
662 hdr_len
+= GRE_HEADER_SECTION
;
666 if (*flags
& GRE_KEY
) {
667 hdr_len
+= GRE_HEADER_SECTION
;
674 if (*flags
& GRE_SEQ
)
675 hdr_len
+= GRE_HEADER_SECTION
;
681 ecn_encapsulate(u8 tos
, struct sk_buff
*skb
)
685 if (skb
->protocol
== htons(ETH_P_IP
))
686 inner
= ((struct iphdr
*)skb_network_header(skb
))->tos
;
687 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
688 else if (skb
->protocol
== htons(ETH_P_IPV6
))
689 inner
= ipv6_get_dsfield((struct ipv6hdr
*)skb_network_header(skb
));
694 return INET_ECN_encapsulate(tos
, inner
);
698 ecn_decapsulate(u8 tos
, struct sk_buff
*skb
)
700 if (INET_ECN_is_ce(tos
)) {
701 __be16 protocol
= skb
->protocol
;
702 unsigned int nw_header
= skb_network_header(skb
) - skb
->data
;
704 if (skb
->protocol
== htons(ETH_P_8021Q
)) {
705 if (unlikely(!pskb_may_pull(skb
, VLAN_ETH_HLEN
)))
708 protocol
= vlan_eth_hdr(skb
)->h_vlan_encapsulated_proto
;
709 nw_header
+= VLAN_HLEN
;
712 if (protocol
== htons(ETH_P_IP
)) {
713 if (unlikely(!pskb_may_pull(skb
, nw_header
714 + sizeof(struct iphdr
))))
717 IP_ECN_set_ce((struct iphdr
*)(nw_header
+ skb
->data
));
719 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
720 else if (protocol
== htons(ETH_P_IPV6
)) {
721 if (unlikely(!pskb_may_pull(skb
, nw_header
722 + sizeof(struct ipv6hdr
))))
725 IP6_ECN_set_ce((struct ipv6hdr
*)(nw_header
732 static struct sk_buff
*
733 handle_gso(struct sk_buff
*skb
)
735 if (skb_is_gso(skb
)) {
736 struct sk_buff
*nskb
= skb_gso_segment(skb
, NETIF_F_SG
);
746 handle_csum_offload(struct sk_buff
*skb
)
748 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
749 return skb_checksum_help(skb
);
751 skb
->ip_summed
= CHECKSUM_NONE
;
756 /* Called with rcu_read_lock. */
758 gre_err(struct sk_buff
*skb
, u32 info
)
761 const struct mutable_config
*mutable;
762 const int type
= icmp_hdr(skb
)->type
;
763 const int code
= icmp_hdr(skb
)->code
;
764 int mtu
= ntohs(icmp_hdr(skb
)->un
.frag
.mtu
);
769 int tunnel_hdr_len
, tot_hdr_len
;
770 unsigned int orig_mac_header
;
771 unsigned int orig_nw_header
;
773 if (type
!= ICMP_DEST_UNREACH
|| code
!= ICMP_FRAG_NEEDED
)
776 /* The mimimum size packet that we would actually be able to process:
777 * encapsulating IP header, minimum GRE header, Ethernet header,
778 * inner IPv4 header. */
779 if (!pskb_may_pull(skb
, sizeof(struct iphdr
) + GRE_HEADER_SECTION
+
780 ETH_HLEN
+ sizeof(struct iphdr
)))
783 iph
= (struct iphdr
*)skb
->data
;
785 tunnel_hdr_len
= parse_gre_header(iph
, &flags
, &key
);
786 if (tunnel_hdr_len
< 0)
789 vport
= find_port(iph
->saddr
, iph
->daddr
, key
, FIND_PORT_ANY
, &mutable);
793 /* Packets received by this function were previously sent by us, so
794 * any comparisons should be to the output values, not the input.
795 * However, it's not really worth it to have a hash table based on
796 * output keys (especially since ICMP error handling of tunneled packets
797 * isn't that reliable anyways). Therefore, we do a lookup based on the
798 * out key as if it were the in key and then check to see if the input
799 * and output keys are the same. */
800 if (mutable->port_config
.in_key
!= mutable->port_config
.out_key
)
803 if (!!(mutable->port_config
.flags
& GRE_F_IN_KEY_MATCH
) !=
804 !!(mutable->port_config
.flags
& GRE_F_OUT_KEY_ACTION
))
807 if ((mutable->port_config
.flags
& GRE_F_OUT_CSUM
) && !(flags
& GRE_CSUM
))
810 tunnel_hdr_len
+= iph
->ihl
<< 2;
812 orig_mac_header
= skb_mac_header(skb
) - skb
->data
;
813 orig_nw_header
= skb_network_header(skb
) - skb
->data
;
814 skb_set_mac_header(skb
, tunnel_hdr_len
);
816 tot_hdr_len
= tunnel_hdr_len
+ ETH_HLEN
;
818 skb
->protocol
= eth_hdr(skb
)->h_proto
;
819 if (skb
->protocol
== htons(ETH_P_8021Q
)) {
820 tot_hdr_len
+= VLAN_HLEN
;
821 skb
->protocol
= vlan_eth_hdr(skb
)->h_vlan_encapsulated_proto
;
824 skb_set_network_header(skb
, tot_hdr_len
);
827 if (skb
->protocol
== htons(ETH_P_IP
))
828 tot_hdr_len
+= sizeof(struct iphdr
);
829 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
830 else if (skb
->protocol
== htons(ETH_P_IPV6
))
831 tot_hdr_len
+= sizeof(struct ipv6hdr
);
836 if (!pskb_may_pull(skb
, tot_hdr_len
))
839 if (skb
->protocol
== htons(ETH_P_IP
)) {
840 if (mtu
< IP_MIN_MTU
) {
841 if (ntohs(ip_hdr(skb
)->tot_len
) >= IP_MIN_MTU
)
848 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
849 else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
850 if (mtu
< IPV6_MIN_MTU
) {
851 unsigned int packet_length
= sizeof(struct ipv6hdr
) +
852 ntohs(ipv6_hdr(skb
)->payload_len
);
854 if (packet_length
>= IPV6_MIN_MTU
855 || ntohs(ipv6_hdr(skb
)->payload_len
) == 0)
863 __pskb_pull(skb
, tunnel_hdr_len
);
864 send_frag_needed(vport
, mutable, skb
, mtu
, key
);
865 skb_push(skb
, tunnel_hdr_len
);
868 skb_set_mac_header(skb
, orig_mac_header
);
869 skb_set_network_header(skb
, orig_nw_header
);
870 skb
->protocol
= htons(ETH_P_IP
);
873 /* Called with rcu_read_lock. */
875 gre_rcv(struct sk_buff
*skb
)
878 const struct mutable_config
*mutable;
884 if (!pskb_may_pull(skb
, GRE_HEADER_SECTION
+ ETH_HLEN
))
887 if (!check_checksum(skb
))
892 hdr_len
= parse_gre_header(iph
, &flags
, &key
);
896 vport
= find_port(iph
->daddr
, iph
->saddr
, key
, FIND_PORT_ANY
, &mutable);
898 icmp_send(skb
, ICMP_DEST_UNREACH
, ICMP_PORT_UNREACH
, 0);
902 if ((mutable->port_config
.flags
& GRE_F_IN_CSUM
) && !(flags
& GRE_CSUM
)) {
903 vport_record_error(vport
, VPORT_E_RX_CRC
);
907 if (!pskb_pull(skb
, hdr_len
) || !pskb_may_pull(skb
, ETH_HLEN
)) {
908 vport_record_error(vport
, VPORT_E_RX_ERROR
);
912 skb
->pkt_type
= PACKET_HOST
;
913 skb
->protocol
= eth_type_trans(skb
, skb
->dev
);
914 skb_postpull_rcsum(skb
, skb_transport_header(skb
), hdr_len
+ ETH_HLEN
);
919 skb_reset_network_header(skb
);
921 ecn_decapsulate(iph
->tos
, skb
);
923 if (mutable->port_config
.flags
& GRE_F_IN_KEY_MATCH
)
924 OVS_CB(skb
)->tun_id
= key
;
926 OVS_CB(skb
)->tun_id
= 0;
928 skb_push(skb
, ETH_HLEN
);
929 compute_ip_summed(skb
, false);
931 vport_receive(vport
, skb
);
941 build_packet(struct vport
*vport
, const struct mutable_config
*mutable,
942 struct iphdr
*iph
, struct rtable
*rt
, int max_headroom
, int mtu
,
946 struct iphdr
*new_iph
;
947 int orig_len
= skb
->len
;
948 __be16 frag_off
= iph
->frag_off
;
950 skb
= check_headroom(skb
, max_headroom
);
951 if (unlikely(IS_ERR(skb
)))
954 err
= handle_csum_offload(skb
);
958 if (skb
->protocol
== htons(ETH_P_IP
)) {
959 struct iphdr
*old_iph
= ip_hdr(skb
);
961 if ((old_iph
->frag_off
& htons(IP_DF
)) &&
962 mtu
< ntohs(old_iph
->tot_len
)) {
963 if (send_frag_needed(vport
, mutable, skb
, mtu
, OVS_CB(skb
)->tun_id
))
968 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
969 else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
970 unsigned int packet_length
= skb
->len
- ETH_HLEN
971 - (eth_hdr(skb
)->h_proto
== htons(ETH_P_8021Q
) ? VLAN_HLEN
: 0);
973 /* IPv6 requires PMTUD if the packet is above the minimum MTU. */
974 if (packet_length
> IPV6_MIN_MTU
)
975 frag_off
= htons(IP_DF
);
977 if (mtu
< packet_length
) {
978 if (send_frag_needed(vport
, mutable, skb
, mtu
, OVS_CB(skb
)->tun_id
))
984 skb_reset_transport_header(skb
);
985 new_iph
= (struct iphdr
*)skb_push(skb
, mutable->tunnel_hlen
);
986 skb_reset_network_header(skb
);
988 memcpy(new_iph
, iph
, sizeof(struct iphdr
));
989 new_iph
->frag_off
= frag_off
;
990 ip_select_ident(new_iph
, &rt
->u
.dst
, NULL
);
992 create_gre_header(skb
, mutable);
994 /* Allow our local IP stack to fragment the outer packet even if the
995 * DF bit is set as a last resort. */
998 memset(&(IPCB(skb
)->opt
), 0, sizeof(IPCB(skb
)->opt
));
999 IPCB(skb
)->flags
= 0;
1001 err
= ip_local_out(skb
);
1002 if (likely(net_xmit_eval(err
) == 0))
1005 vport_record_error(vport
, VPORT_E_TX_ERROR
);
1012 vport_record_error(vport
, VPORT_E_TX_DROPPED
);
1018 gre_send(struct vport
*vport
, struct sk_buff
*skb
)
1020 struct gre_vport
*gre_vport
= gre_vport_priv(vport
);
1021 const struct mutable_config
*mutable = rcu_dereference(gre_vport
->mutable);
1023 struct iphdr
*old_iph
;
1030 /* Validate the protocol headers before we try to use them. */
1031 if (skb
->protocol
== htons(ETH_P_8021Q
)) {
1032 if (unlikely(!pskb_may_pull(skb
, VLAN_ETH_HLEN
)))
1035 skb
->protocol
= vlan_eth_hdr(skb
)->h_vlan_encapsulated_proto
;
1036 skb_set_network_header(skb
, VLAN_ETH_HLEN
);
1039 if (skb
->protocol
== htons(ETH_P_IP
)) {
1040 if (unlikely(!pskb_may_pull(skb
, skb_network_header(skb
)
1041 + sizeof(struct iphdr
) - skb
->data
)))
1044 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1045 else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
1046 if (unlikely(!pskb_may_pull(skb
, skb_network_header(skb
)
1047 + sizeof(struct ipv6hdr
) - skb
->data
)))
1051 old_iph
= ip_hdr(skb
);
1053 iph
.tos
= mutable->port_config
.tos
;
1054 if (mutable->port_config
.flags
& GRE_F_TOS_INHERIT
) {
1055 if (skb
->protocol
== htons(ETH_P_IP
))
1056 iph
.tos
= old_iph
->tos
;
1057 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1058 else if (skb
->protocol
== htons(ETH_P_IPV6
))
1059 iph
.tos
= ipv6_get_dsfield(ipv6_hdr(skb
));
1062 iph
.tos
= ecn_encapsulate(iph
.tos
, skb
);
1065 struct flowi fl
= { .nl_u
= { .ip4_u
=
1066 { .daddr
= mutable->port_config
.daddr
,
1067 .saddr
= mutable->port_config
.saddr
,
1068 .tos
= RT_TOS(iph
.tos
) } },
1069 .proto
= IPPROTO_GRE
};
1071 if (ip_route_output_key(&init_net
, &rt
, &fl
))
1075 iph
.ttl
= mutable->port_config
.ttl
;
1076 if (mutable->port_config
.flags
& GRE_F_TTL_INHERIT
) {
1077 if (skb
->protocol
== htons(ETH_P_IP
))
1078 iph
.ttl
= old_iph
->ttl
;
1079 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1080 else if (skb
->protocol
== htons(ETH_P_IPV6
))
1081 iph
.ttl
= ipv6_hdr(skb
)->hop_limit
;
1085 iph
.ttl
= dst_metric(&rt
->u
.dst
, RTAX_HOPLIMIT
);
1087 iph
.frag_off
= (mutable->port_config
.flags
& GRE_F_PMTUD
) ? htons(IP_DF
) : 0;
1089 mtu
= dst_mtu(&rt
->u
.dst
)
1091 - mutable->tunnel_hlen
1092 - (eth_hdr(skb
)->h_proto
== htons(ETH_P_8021Q
) ? VLAN_HLEN
: 0);
1096 if (skb
->protocol
== htons(ETH_P_IP
)) {
1097 iph
.frag_off
|= old_iph
->frag_off
& htons(IP_DF
);
1098 mtu
= max(mtu
, IP_MIN_MTU
);
1100 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1101 else if (skb
->protocol
== htons(ETH_P_IPV6
))
1102 mtu
= max(mtu
, IPV6_MIN_MTU
);
1106 iph
.ihl
= sizeof(struct iphdr
) >> 2;
1107 iph
.protocol
= IPPROTO_GRE
;
1108 iph
.daddr
= rt
->rt_dst
;
1109 iph
.saddr
= rt
->rt_src
;
1114 skb_dst_set(skb
, &rt
->u
.dst
);
1116 /* If we are doing GSO on a pskb it is better to make sure that the
1117 * headroom is correct now. We will only have to copy the portion in
1118 * the linear data area and GSO will preserve headroom when it creates
1119 * the segments. This is particularly beneficial on Xen where we get
1120 * lots of GSO pskbs. Conversely, we delay copying if it is just to
1121 * get our own writable clone because GSO may do the copy for us. */
1122 max_headroom
= LL_RESERVED_SPACE(rt
->u
.dst
.dev
) + rt
->u
.dst
.header_len
1123 + mutable->tunnel_hlen
;
1125 if (skb_headroom(skb
) < max_headroom
) {
1126 skb
= check_headroom(skb
, max_headroom
);
1127 if (unlikely(IS_ERR(skb
))) {
1128 vport_record_error(vport
, VPORT_E_TX_DROPPED
);
1133 forward_ip_summed(skb
);
1135 if (unlikely(vswitch_skb_checksum_setup(skb
)))
1138 skb
= handle_gso(skb
);
1139 if (unlikely(IS_ERR(skb
))) {
1140 vport_record_error(vport
, VPORT_E_TX_DROPPED
);
1144 /* Process GSO segments. Try to do any work for the entire packet that
1145 * doesn't involve actually writing to it before this point. */
1148 struct sk_buff
*next_skb
= skb
->next
;
1151 orig_len
+= build_packet(vport
, mutable, &iph
, rt
, max_headroom
, mtu
, skb
);
1160 vport_record_error(vport
, VPORT_E_TX_ERROR
);
1165 static struct net_protocol gre_protocol_handlers
= {
1167 .err_handler
= gre_err
,
1175 err
= inet_add_protocol(&gre_protocol_handlers
, IPPROTO_GRE
);
1177 printk(KERN_WARNING
"openvswitch: cannot register gre protocol handler\n");
1185 tbl_destroy(port_table
, NULL
);
1186 inet_del_protocol(&gre_protocol_handlers
, IPPROTO_GRE
);
1190 set_config(const struct vport
*cur_vport
, struct mutable_config
*mutable,
1191 const void __user
*uconfig
)
1193 const struct vport
*old_vport
;
1194 const struct mutable_config
*old_mutable
;
1197 if (copy_from_user(&mutable->port_config
, uconfig
, sizeof(struct gre_port_config
)))
1200 if (mutable->port_config
.daddr
== 0)
1203 if (mutable->port_config
.flags
& GRE_F_IN_KEY_MATCH
) {
1204 port_type
= FIND_PORT_MATCH
;
1205 mutable->port_config
.in_key
= 0;
1207 port_type
= FIND_PORT_KEY
;
1209 old_vport
= find_port(mutable->port_config
.saddr
,
1210 mutable->port_config
.daddr
,
1211 mutable->port_config
.in_key
, port_type
,
1214 if (old_vport
&& old_vport
!= cur_vport
)
1217 if (mutable->port_config
.flags
& GRE_F_OUT_KEY_ACTION
)
1218 mutable->port_config
.out_key
= 0;
1220 mutable->tunnel_hlen
= sizeof(struct iphdr
) + GRE_HEADER_SECTION
;
1222 if (mutable->port_config
.flags
& GRE_F_OUT_CSUM
)
1223 mutable->tunnel_hlen
+= GRE_HEADER_SECTION
;
1225 if (mutable->port_config
.out_key
||
1226 mutable->port_config
.flags
& GRE_F_OUT_KEY_ACTION
)
1227 mutable->tunnel_hlen
+= GRE_HEADER_SECTION
;
1232 static struct vport
*
1233 gre_create(const char *name
, const void __user
*config
)
1235 struct vport
*vport
;
1236 struct gre_vport
*gre_vport
;
1239 vport
= vport_alloc(sizeof(struct gre_vport
), &gre_vport_ops
);
1240 if (IS_ERR(vport
)) {
1241 err
= PTR_ERR(vport
);
1245 gre_vport
= gre_vport_priv(vport
);
1247 strcpy(gre_vport
->name
, name
);
1249 gre_vport
->mutable = kmalloc(sizeof(struct mutable_config
), GFP_KERNEL
);
1250 if (!gre_vport
->mutable) {
1252 goto error_free_vport
;
1255 vport_gen_rand_ether_addr(gre_vport
->mutable->eth_addr
);
1256 gre_vport
->mutable->mtu
= ETH_DATA_LEN
;
1258 err
= set_config(NULL
, gre_vport
->mutable, config
);
1260 goto error_free_mutable
;
1262 err
= add_port(vport
);
1264 goto error_free_mutable
;
1269 kfree(gre_vport
->mutable);
1273 return ERR_PTR(err
);
1277 gre_modify(struct vport
*vport
, const void __user
*config
)
1279 struct gre_vport
*gre_vport
= gre_vport_priv(vport
);
1280 struct mutable_config
*mutable;
1282 int update_hash
= 0;
1284 mutable = kmemdup(gre_vport
->mutable, sizeof(struct mutable_config
), GFP_KERNEL
);
1290 err
= set_config(vport
, mutable, config
);
1294 /* Only remove the port from the hash table if something that would
1295 * affect the lookup has changed. */
1296 if (gre_vport
->mutable->port_config
.saddr
!= mutable->port_config
.saddr
||
1297 gre_vport
->mutable->port_config
.daddr
!= mutable->port_config
.daddr
||
1298 gre_vport
->mutable->port_config
.in_key
!= mutable->port_config
.in_key
||
1299 (gre_vport
->mutable->port_config
.flags
& GRE_F_IN_KEY_MATCH
) !=
1300 (mutable->port_config
.flags
& GRE_F_IN_KEY_MATCH
))
1304 /* This update is not atomic but the lookup uses the config, which
1305 * serves as an inherent double check. */
1307 err
= del_port(vport
);
1312 assign_config_rcu(vport
, mutable);
1315 err
= add_port(vport
);
1329 gre_destroy(struct vport
*vport
)
1331 struct gre_vport
*gre_vport
= gre_vport_priv(vport
);
1333 const struct mutable_config
*old_mutable
;
1335 /* Do a hash table lookup to make sure that the port exists. It should
1336 * exist but might not if a modify failed earlier. */
1337 if (gre_vport
->mutable->port_config
.flags
& GRE_F_IN_KEY_MATCH
)
1338 port_type
= FIND_PORT_MATCH
;
1340 port_type
= FIND_PORT_KEY
;
1342 if (vport
== find_port(gre_vport
->mutable->port_config
.saddr
,
1343 gre_vport
->mutable->port_config
.daddr
,
1344 gre_vport
->mutable->port_config
.in_key
, port_type
, &old_mutable
))
1347 kfree(gre_vport
->mutable);
1354 gre_set_mtu(struct vport
*vport
, int mtu
)
1356 struct gre_vport
*gre_vport
= gre_vport_priv(vport
);
1357 struct mutable_config
*mutable;
1359 mutable = kmemdup(gre_vport
->mutable, sizeof(struct mutable_config
), GFP_KERNEL
);
1364 assign_config_rcu(vport
, mutable);
1370 gre_set_addr(struct vport
*vport
, const unsigned char *addr
)
1372 struct gre_vport
*gre_vport
= gre_vport_priv(vport
);
1373 struct mutable_config
*mutable;
1375 mutable = kmemdup(gre_vport
->mutable, sizeof(struct mutable_config
), GFP_KERNEL
);
1379 memcpy(mutable->eth_addr
, addr
, ETH_ALEN
);
1380 assign_config_rcu(vport
, mutable);
1387 gre_get_name(const struct vport
*vport
)
1389 const struct gre_vport
*gre_vport
= gre_vport_priv(vport
);
1390 return gre_vport
->name
;
1393 static const unsigned char *
1394 gre_get_addr(const struct vport
*vport
)
1396 const struct gre_vport
*gre_vport
= gre_vport_priv(vport
);
1397 return rcu_dereference(gre_vport
->mutable)->eth_addr
;
1401 gre_get_mtu(const struct vport
*vport
)
1403 const struct gre_vport
*gre_vport
= gre_vport_priv(vport
);
1404 return rcu_dereference(gre_vport
->mutable)->mtu
;
1407 struct vport_ops gre_vport_ops
= {
1409 .flags
= VPORT_F_GEN_STATS
| VPORT_F_TUN_ID
,
1412 .create
= gre_create
,
1413 .modify
= gre_modify
,
1414 .destroy
= gre_destroy
,
1415 .set_mtu
= gre_set_mtu
,
1416 .set_addr
= gre_set_addr
,
1417 .get_name
= gre_get_name
,
1418 .get_addr
= gre_get_addr
,
1419 .get_dev_flags
= vport_gen_get_dev_flags
,
1420 .is_running
= vport_gen_is_running
,
1421 .get_operstate
= vport_gen_get_operstate
,
1422 .get_mtu
= gre_get_mtu
,