]> git.proxmox.com Git - mirror_ovs.git/blame - datapath/vport-gre.c
datapath: struct brport_attribute no longer has an owner element
[mirror_ovs.git] / datapath / vport-gre.c
CommitLineData
2736b84e
JG
1/*
2 * Copyright (c) 2010 Nicira Networks.
3 * Distributed under the terms of the GNU GPL version 2.
4 *
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
7 */
8
9#include <linux/if_arp.h>
10#include <linux/if_ether.h>
11#include <linux/ip.h>
12#include <linux/if_tunnel.h>
13#include <linux/if_vlan.h>
14#include <linux/in.h>
15#include <linux/in_route.h>
16#include <linux/jhash.h>
17#include <linux/kernel.h>
18#include <linux/version.h>
19
20#include <net/dsfield.h>
21#include <net/dst.h>
22#include <net/icmp.h>
23#include <net/inet_ecn.h>
24#include <net/ip.h>
6f470982 25#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
2736b84e 26#include <net/ipv6.h>
6f470982 27#endif
2736b84e
JG
28#include <net/protocol.h>
29#include <net/route.h>
30#include <net/xfrm.h>
31
32#include "actions.h"
33#include "datapath.h"
34#include "openvswitch/gre.h"
35#include "table.h"
36#include "vport.h"
b19e8815 37#include "vport-generic.h"
2736b84e
JG
38
39/* The absolute minimum fragment size. Note that there are many other
40 * definitions of the minimum MTU. */
41#define IP_MIN_MTU 68
42
43/* The GRE header is composed of a series of sections: a base and then a variable
44 * number of options. */
45#define GRE_HEADER_SECTION 4
46
acf8144f
SH
47#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
48#define rt_dst(rt) (rt->dst)
49#else
50#define rt_dst(rt) (rt->u.dst)
51#endif
52
27b6cec0
JG
53struct gre_base_hdr {
54 __be16 flags;
55 __be16 protocol;
56};
57
2736b84e
JG
58struct mutable_config {
59 struct rcu_head rcu;
60
61 unsigned char eth_addr[ETH_ALEN];
62 unsigned int mtu;
63 struct gre_port_config port_config;
64
65 int tunnel_hlen; /* Tunnel header length. */
66};
67
68struct gre_vport {
2848cb49 69 struct rcu_head rcu;
2736b84e
JG
70 struct tbl_node tbl_node;
71
72 char name[IFNAMSIZ];
73
74 /* Protected by RCU. */
75 struct mutable_config *mutable;
76};
77
2736b84e
JG
78/* Protected by RCU. */
79static struct tbl *port_table;
80
81/* These are just used as an optimization: they don't require any kind of
82 * synchronization because we could have just as easily read the value before
83 * the port change happened. */
84static unsigned int key_local_remote_ports;
85static unsigned int key_remote_ports;
86static unsigned int local_remote_ports;
87static unsigned int remote_ports;
88
fceb2a5b 89static inline struct gre_vport *gre_vport_priv(const struct vport *vport)
2736b84e
JG
90{
91 return vport_priv(vport);
92}
93
fceb2a5b 94static inline struct vport *gre_vport_to_vport(const struct gre_vport *gre_vport)
2736b84e
JG
95{
96 return vport_from_priv(gre_vport);
97}
98
fceb2a5b 99static inline struct gre_vport *gre_vport_table_cast(const struct tbl_node *node)
2736b84e
JG
100{
101 return container_of(node, struct gre_vport, tbl_node);
102}
103
104/* RCU callback. */
fceb2a5b 105static void free_config(struct rcu_head *rcu)
2736b84e
JG
106{
107 struct mutable_config *c = container_of(rcu, struct mutable_config, rcu);
108 kfree(c);
109}
110
fceb2a5b
JG
111static void assign_config_rcu(struct vport *vport,
112 struct mutable_config *new_config)
2736b84e
JG
113{
114 struct gre_vport *gre_vport = gre_vport_priv(vport);
115 struct mutable_config *old_config;
116
117 old_config = rcu_dereference(gre_vport->mutable);
118 rcu_assign_pointer(gre_vport->mutable, new_config);
119 call_rcu(&old_config->rcu, free_config);
120}
121
fceb2a5b 122static unsigned int *find_port_pool(const struct mutable_config *mutable)
2736b84e
JG
123{
124 if (mutable->port_config.flags & GRE_F_IN_KEY_MATCH) {
125 if (mutable->port_config.saddr)
126 return &local_remote_ports;
127 else
128 return &remote_ports;
129 } else {
130 if (mutable->port_config.saddr)
131 return &key_local_remote_ports;
132 else
133 return &key_remote_ports;
134 }
135}
136
137enum lookup_key {
138 LOOKUP_SADDR = 0,
139 LOOKUP_DADDR = 1,
140 LOOKUP_KEY = 2,
141 LOOKUP_KEY_MATCH = 3
142};
143
144struct port_lookup_key {
145 u32 vals[4]; /* Contains enum lookup_key keys. */
146 const struct mutable_config *mutable;
147};
148
149/* Modifies 'target' to store the rcu_dereferenced pointer that was used to do
150 * the comparision. */
fceb2a5b 151static int port_cmp(const struct tbl_node *node, void *target)
2736b84e
JG
152{
153 const struct gre_vport *gre_vport = gre_vport_table_cast(node);
154 struct port_lookup_key *lookup = target;
155
156 lookup->mutable = rcu_dereference(gre_vport->mutable);
157
158 return ((lookup->mutable->port_config.flags & GRE_F_IN_KEY_MATCH) ==
159 lookup->vals[LOOKUP_KEY_MATCH]) &&
160 lookup->mutable->port_config.daddr == lookup->vals[LOOKUP_DADDR] &&
161 lookup->mutable->port_config.in_key == lookup->vals[LOOKUP_KEY] &&
162 lookup->mutable->port_config.saddr == lookup->vals[LOOKUP_SADDR];
163}
164
fceb2a5b 165static u32 port_hash(struct port_lookup_key *lookup)
2736b84e
JG
166{
167 return jhash2(lookup->vals, ARRAY_SIZE(lookup->vals), 0);
168}
169
fceb2a5b 170static int add_port(struct vport *vport)
2736b84e
JG
171{
172 struct gre_vport *gre_vport = gre_vport_priv(vport);
173 struct port_lookup_key lookup;
174 int err;
175
176 if (!port_table) {
177 struct tbl *new_table;
178
179 new_table = tbl_create(0);
180 if (!new_table)
181 return -ENOMEM;
182
183 rcu_assign_pointer(port_table, new_table);
184
185 } else if (tbl_count(port_table) > tbl_n_buckets(port_table)) {
186 struct tbl *old_table = port_table;
187 struct tbl *new_table;
188
189 new_table = tbl_expand(old_table);
190 if (IS_ERR(new_table))
191 return PTR_ERR(new_table);
192
193 rcu_assign_pointer(port_table, new_table);
194 tbl_deferred_destroy(old_table, NULL);
195 }
196
197 lookup.vals[LOOKUP_SADDR] = gre_vport->mutable->port_config.saddr;
198 lookup.vals[LOOKUP_DADDR] = gre_vport->mutable->port_config.daddr;
199 lookup.vals[LOOKUP_KEY] = gre_vport->mutable->port_config.in_key;
200 lookup.vals[LOOKUP_KEY_MATCH] = gre_vport->mutable->port_config.flags & GRE_F_IN_KEY_MATCH;
201
202 err = tbl_insert(port_table, &gre_vport->tbl_node, port_hash(&lookup));
203 if (err)
204 return err;
205
206 (*find_port_pool(gre_vport->mutable))++;
207
208 return 0;
209}
210
fceb2a5b 211static int del_port(struct vport *vport)
2736b84e
JG
212{
213 struct gre_vport *gre_vport = gre_vport_priv(vport);
214 int err;
215
216 err = tbl_remove(port_table, &gre_vport->tbl_node);
217 if (err)
218 return err;
219
220 (*find_port_pool(gre_vport->mutable))--;
221
222 return 0;
223}
224
225#define FIND_PORT_KEY (1 << 0)
226#define FIND_PORT_MATCH (1 << 1)
227#define FIND_PORT_ANY (FIND_PORT_KEY | FIND_PORT_MATCH)
228
fceb2a5b
JG
229static struct vport *find_port(__be32 saddr, __be32 daddr, __be32 key,
230 int port_type,
231 const struct mutable_config **mutable)
2736b84e
JG
232{
233 struct port_lookup_key lookup;
234 struct tbl *table = rcu_dereference(port_table);
235 struct tbl_node *tbl_node;
236
237 if (!table)
238 return NULL;
239
240 lookup.vals[LOOKUP_SADDR] = saddr;
241 lookup.vals[LOOKUP_DADDR] = daddr;
242
243 if (port_type & FIND_PORT_KEY) {
244 lookup.vals[LOOKUP_KEY] = key;
245 lookup.vals[LOOKUP_KEY_MATCH] = 0;
246
247 if (key_local_remote_ports) {
248 tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp);
249 if (tbl_node)
250 goto found;
251 }
252
253 if (key_remote_ports) {
254 lookup.vals[LOOKUP_SADDR] = 0;
255
256 tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp);
257 if (tbl_node)
258 goto found;
259
260 lookup.vals[LOOKUP_SADDR] = saddr;
261 }
262 }
263
264 if (port_type & FIND_PORT_MATCH) {
265 lookup.vals[LOOKUP_KEY] = 0;
266 lookup.vals[LOOKUP_KEY_MATCH] = GRE_F_IN_KEY_MATCH;
267
268 if (local_remote_ports) {
269 tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp);
270 if (tbl_node)
271 goto found;
272 }
273
274 if (remote_ports) {
275 lookup.vals[LOOKUP_SADDR] = 0;
276
277 tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp);
278 if (tbl_node)
279 goto found;
280 }
281 }
282
283 return NULL;
284
285found:
286 *mutable = lookup.mutable;
287 return gre_vport_to_vport(gre_vport_table_cast(tbl_node));
288}
289
fceb2a5b 290static bool check_ipv4_address(__be32 addr)
2736b84e
JG
291{
292 if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr)
293 || ipv4_is_loopback(addr) || ipv4_is_zeronet(addr))
294 return false;
295
296 return true;
297}
298
fceb2a5b 299static bool ipv4_should_icmp(struct sk_buff *skb)
2736b84e
JG
300{
301 struct iphdr *old_iph = ip_hdr(skb);
302
303 /* Don't respond to L2 broadcast. */
304 if (is_multicast_ether_addr(eth_hdr(skb)->h_dest))
305 return false;
306
307 /* Don't respond to L3 broadcast or invalid addresses. */
308 if (!check_ipv4_address(old_iph->daddr) ||
309 !check_ipv4_address(old_iph->saddr))
310 return false;
311
312 /* Only respond to the first fragment. */
313 if (old_iph->frag_off & htons(IP_OFFSET))
314 return false;
315
316 /* Don't respond to ICMP error messages. */
317 if (old_iph->protocol == IPPROTO_ICMP) {
318 u8 icmp_type, *icmp_typep;
319
320 icmp_typep = skb_header_pointer(skb, (u8 *)old_iph +
321 (old_iph->ihl << 2) +
322 offsetof(struct icmphdr, type) -
323 skb->data, sizeof(icmp_type),
324 &icmp_type);
325
326 if (!icmp_typep)
327 return false;
328
329 if (*icmp_typep > NR_ICMP_TYPES
330 || (*icmp_typep <= ICMP_PARAMETERPROB
331 && *icmp_typep != ICMP_ECHOREPLY
332 && *icmp_typep != ICMP_ECHO))
333 return false;
334 }
335
336 return true;
337}
338
fceb2a5b
JG
339static void ipv4_build_icmp(struct sk_buff *skb, struct sk_buff *nskb,
340 unsigned int mtu, unsigned int payload_length)
2736b84e
JG
341{
342 struct iphdr *iph, *old_iph = ip_hdr(skb);
343 struct icmphdr *icmph;
344 u8 *payload;
345
346 iph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
347 icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr));
348 payload = skb_put(nskb, payload_length);
349
350 /* IP */
351 iph->version = 4;
352 iph->ihl = sizeof(struct iphdr) >> 2;
353 iph->tos = (old_iph->tos & IPTOS_TOS_MASK) |
354 IPTOS_PREC_INTERNETCONTROL;
355 iph->tot_len = htons(sizeof(struct iphdr)
356 + sizeof(struct icmphdr)
357 + payload_length);
358 get_random_bytes(&iph->id, sizeof(iph->id));
359 iph->frag_off = 0;
360 iph->ttl = IPDEFTTL;
361 iph->protocol = IPPROTO_ICMP;
362 iph->daddr = old_iph->saddr;
363 iph->saddr = old_iph->daddr;
364
365 ip_send_check(iph);
366
367 /* ICMP */
368 icmph->type = ICMP_DEST_UNREACH;
369 icmph->code = ICMP_FRAG_NEEDED;
370 icmph->un.gateway = htonl(mtu);
371 icmph->checksum = 0;
372
373 nskb->csum = csum_partial((u8 *)icmph, sizeof(struct icmphdr), 0);
374 nskb->csum = skb_copy_and_csum_bits(skb, (u8 *)old_iph - skb->data,
375 payload, payload_length,
376 nskb->csum);
377 icmph->checksum = csum_fold(nskb->csum);
378}
379
6f470982 380#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
fceb2a5b 381static bool ipv6_should_icmp(struct sk_buff *skb)
2736b84e
JG
382{
383 struct ipv6hdr *old_ipv6h = ipv6_hdr(skb);
384 int addr_type;
385 int payload_off = (u8 *)(old_ipv6h + 1) - skb->data;
386 u8 nexthdr = ipv6_hdr(skb)->nexthdr;
387
388 /* Check source address is valid. */
389 addr_type = ipv6_addr_type(&old_ipv6h->saddr);
390 if (addr_type & IPV6_ADDR_MULTICAST || addr_type == IPV6_ADDR_ANY)
391 return false;
392
393 /* Don't reply to unspecified addresses. */
394 if (ipv6_addr_type(&old_ipv6h->daddr) == IPV6_ADDR_ANY)
395 return false;
396
397 /* Don't respond to ICMP error messages. */
398 payload_off = ipv6_skip_exthdr(skb, payload_off, &nexthdr);
399 if (payload_off < 0)
400 return false;
401
402 if (nexthdr == NEXTHDR_ICMP) {
403 u8 icmp_type, *icmp_typep;
404
405 icmp_typep = skb_header_pointer(skb, payload_off +
406 offsetof(struct icmp6hdr,
407 icmp6_type),
408 sizeof(icmp_type), &icmp_type);
409
410 if (!icmp_typep || !(*icmp_typep & ICMPV6_INFOMSG_MASK))
411 return false;
412 }
413
414 return true;
415}
416
fceb2a5b
JG
417static void ipv6_build_icmp(struct sk_buff *skb, struct sk_buff *nskb,
418 unsigned int mtu, unsigned int payload_length)
2736b84e
JG
419{
420 struct ipv6hdr *ipv6h, *old_ipv6h = ipv6_hdr(skb);
421 struct icmp6hdr *icmp6h;
422 u8 *payload;
423
424 ipv6h = (struct ipv6hdr *)skb_put(nskb, sizeof(struct ipv6hdr));
425 icmp6h = (struct icmp6hdr *)skb_put(nskb, sizeof(struct icmp6hdr));
426 payload = skb_put(nskb, payload_length);
427
428 /* IPv6 */
429 ipv6h->version = 6;
430 ipv6h->priority = 0;
431 memset(&ipv6h->flow_lbl, 0, sizeof(ipv6h->flow_lbl));
432 ipv6h->payload_len = htons(sizeof(struct icmp6hdr)
433 + payload_length);
434 ipv6h->nexthdr = NEXTHDR_ICMP;
435 ipv6h->hop_limit = IPV6_DEFAULT_HOPLIMIT;
436 ipv6_addr_copy(&ipv6h->daddr, &old_ipv6h->saddr);
437 ipv6_addr_copy(&ipv6h->saddr, &old_ipv6h->daddr);
438
439 /* ICMPv6 */
440 icmp6h->icmp6_type = ICMPV6_PKT_TOOBIG;
441 icmp6h->icmp6_code = 0;
442 icmp6h->icmp6_cksum = 0;
443 icmp6h->icmp6_mtu = htonl(mtu);
444
445 nskb->csum = csum_partial((u8 *)icmp6h, sizeof(struct icmp6hdr), 0);
446 nskb->csum = skb_copy_and_csum_bits(skb, (u8 *)old_ipv6h - skb->data,
447 payload, payload_length,
448 nskb->csum);
449 icmp6h->icmp6_cksum = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
450 sizeof(struct icmp6hdr)
451 + payload_length,
452 ipv6h->nexthdr, nskb->csum);
453}
6f470982 454#endif /* IPv6 */
2736b84e 455
fceb2a5b
JG
456static bool send_frag_needed(struct vport *vport,
457 const struct mutable_config *mutable,
458 struct sk_buff *skb, unsigned int mtu,
459 __be32 flow_key)
2736b84e
JG
460{
461 unsigned int eth_hdr_len = ETH_HLEN;
6f470982 462 unsigned int total_length = 0, header_length = 0, payload_length;
2736b84e
JG
463 struct ethhdr *eh, *old_eh = eth_hdr(skb);
464 struct sk_buff *nskb;
465
466 /* Sanity check */
467 if (skb->protocol == htons(ETH_P_IP)) {
468 if (mtu < IP_MIN_MTU)
469 return false;
470
471 if (!ipv4_should_icmp(skb))
472 return true;
6f470982
JG
473 }
474#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
475 else if (skb->protocol == htons(ETH_P_IPV6)) {
2736b84e
JG
476 if (mtu < IPV6_MIN_MTU)
477 return false;
478
479 /* In theory we should do PMTUD on IPv6 multicast messages but
480 * we don't have an address to send from so just fragment. */
481 if (ipv6_addr_type(&ipv6_hdr(skb)->daddr) & IPV6_ADDR_MULTICAST)
482 return false;
483
484 if (!ipv6_should_icmp(skb))
485 return true;
486 }
6f470982
JG
487#endif
488 else
489 return false;
2736b84e
JG
490
491 /* Allocate */
492 if (old_eh->h_proto == htons(ETH_P_8021Q))
493 eth_hdr_len = VLAN_ETH_HLEN;
494
495 payload_length = skb->len - eth_hdr_len;
496 if (skb->protocol == htons(ETH_P_IP)) {
497 header_length = sizeof(struct iphdr) + sizeof(struct icmphdr);
498 total_length = min_t(unsigned int, header_length +
499 payload_length, 576);
6f470982
JG
500 }
501#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
502 else {
2736b84e
JG
503 header_length = sizeof(struct ipv6hdr) +
504 sizeof(struct icmp6hdr);
505 total_length = min_t(unsigned int, header_length +
506 payload_length, IPV6_MIN_MTU);
507 }
6f470982
JG
508#endif
509
2736b84e
JG
510 total_length = min(total_length, mutable->mtu);
511 payload_length = total_length - header_length;
512
513 nskb = dev_alloc_skb(NET_IP_ALIGN + eth_hdr_len + header_length +
514 payload_length);
515 if (!nskb)
516 return false;
517
518 skb_reserve(nskb, NET_IP_ALIGN);
519
520 /* Ethernet / VLAN */
521 eh = (struct ethhdr *)skb_put(nskb, eth_hdr_len);
522 memcpy(eh->h_dest, old_eh->h_source, ETH_ALEN);
523 memcpy(eh->h_source, mutable->eth_addr, ETH_ALEN);
524 nskb->protocol = eh->h_proto = old_eh->h_proto;
525 if (old_eh->h_proto == htons(ETH_P_8021Q)) {
526 struct vlan_ethhdr *vh = (struct vlan_ethhdr *)eh;
527
528 vh->h_vlan_TCI = vlan_eth_hdr(skb)->h_vlan_TCI;
529 vh->h_vlan_encapsulated_proto = skb->protocol;
530 }
531 skb_reset_mac_header(nskb);
532
533 /* Protocol */
534 if (skb->protocol == htons(ETH_P_IP))
535 ipv4_build_icmp(skb, nskb, mtu, payload_length);
6f470982 536#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
2736b84e
JG
537 else
538 ipv6_build_icmp(skb, nskb, mtu, payload_length);
6f470982 539#endif
2736b84e
JG
540
541 /* Assume that flow based keys are symmetric with respect to input
542 * and output and use the key that we were going to put on the
543 * outgoing packet for the fake received packet. If the keys are
544 * not symmetric then PMTUD needs to be disabled since we won't have
545 * any way of synthesizing packets. */
eea2aafb
JG
546 if (mutable->port_config.flags & GRE_F_IN_KEY_MATCH &&
547 mutable->port_config.flags & GRE_F_OUT_KEY_ACTION)
548 OVS_CB(nskb)->tun_id = flow_key;
2736b84e 549
f4267e34 550 compute_ip_summed(nskb, false);
2736b84e
JG
551 vport_receive(vport, nskb);
552
553 return true;
554}
555
fceb2a5b 556static struct sk_buff *check_headroom(struct sk_buff *skb, int headroom)
2736b84e 557{
7e7d587d 558 if (skb_headroom(skb) < headroom || skb_header_cloned(skb)) {
21256fab 559 struct sk_buff *nskb = skb_realloc_headroom(skb, headroom + 16);
2736b84e
JG
560 if (!nskb) {
561 kfree_skb(skb);
562 return ERR_PTR(-ENOMEM);
563 }
564
565 set_skb_csum_bits(skb, nskb);
566
567 if (skb->sk)
568 skb_set_owner_w(nskb, skb->sk);
569
570 dev_kfree_skb(skb);
571 return nskb;
572 }
573
574 return skb;
575}
576
fceb2a5b
JG
577static void create_gre_header(struct sk_buff *skb,
578 const struct mutable_config *mutable)
2736b84e
JG
579{
580 struct iphdr *iph = ip_hdr(skb);
27b6cec0 581 struct gre_base_hdr *greh = (struct gre_base_hdr *)(iph + 1);
2736b84e
JG
582 __be32 *options = (__be32 *)((u8 *)iph + mutable->tunnel_hlen
583 - GRE_HEADER_SECTION);
584
27b6cec0
JG
585 greh->protocol = htons(ETH_P_TEB);
586 greh->flags = 0;
2736b84e
JG
587
588 /* Work backwards over the options so the checksum is last. */
589 if (mutable->port_config.out_key ||
590 mutable->port_config.flags & GRE_F_OUT_KEY_ACTION) {
27b6cec0 591 greh->flags |= GRE_KEY;
2736b84e
JG
592
593 if (mutable->port_config.flags & GRE_F_OUT_KEY_ACTION)
594 *options = OVS_CB(skb)->tun_id;
595 else
596 *options = mutable->port_config.out_key;
597
598 options--;
599 }
600
16e9d4f6 601 if (mutable->port_config.flags & GRE_F_CSUM) {
27b6cec0 602 greh->flags |= GRE_CSUM;
2736b84e
JG
603
604 *options = 0;
605 *(__sum16 *)options = csum_fold(skb_checksum(skb,
606 sizeof(struct iphdr),
607 skb->len - sizeof(struct iphdr),
608 0));
609 }
610}
611
fceb2a5b 612static int check_checksum(struct sk_buff *skb)
2736b84e
JG
613{
614 struct iphdr *iph = ip_hdr(skb);
615 __be16 flags = *(__be16 *)(iph + 1);
616 __sum16 csum = 0;
617
618 if (flags & GRE_CSUM) {
619 switch (skb->ip_summed) {
620 case CHECKSUM_COMPLETE:
621 csum = csum_fold(skb->csum);
622
623 if (!csum)
624 break;
625 /* Fall through. */
626
627 case CHECKSUM_NONE:
628 skb->csum = 0;
629 csum = __skb_checksum_complete(skb);
630 skb->ip_summed = CHECKSUM_COMPLETE;
631 break;
632 }
633 }
634
635 return (csum == 0);
636}
637
fceb2a5b 638static int parse_gre_header(struct iphdr *iph, __be16 *flags, __be32 *key)
2736b84e 639{
eea2aafb 640 /* IP and ICMP protocol handlers check that the IHL is valid. */
27b6cec0
JG
641 struct gre_base_hdr *greh = (struct gre_base_hdr *)((u8 *)iph + (iph->ihl << 2));
642 __be32 *options = (__be32 *)(greh + 1);
2736b84e
JG
643 int hdr_len;
644
27b6cec0 645 *flags = greh->flags;
2736b84e 646
27b6cec0 647 if (greh->flags & (GRE_VERSION | GRE_ROUTING))
2736b84e
JG
648 return -EINVAL;
649
27b6cec0 650 if (greh->protocol != htons(ETH_P_TEB))
2736b84e
JG
651 return -EINVAL;
652
653 hdr_len = GRE_HEADER_SECTION;
654
27b6cec0 655 if (greh->flags & GRE_CSUM) {
2736b84e
JG
656 hdr_len += GRE_HEADER_SECTION;
657 options++;
658 }
659
27b6cec0 660 if (greh->flags & GRE_KEY) {
2736b84e
JG
661 hdr_len += GRE_HEADER_SECTION;
662
663 *key = *options;
664 options++;
665 } else
666 *key = 0;
667
27b6cec0 668 if (greh->flags & GRE_SEQ)
2736b84e
JG
669 hdr_len += GRE_HEADER_SECTION;
670
671 return hdr_len;
672}
673
fceb2a5b 674static inline u8 ecn_encapsulate(u8 tos, struct sk_buff *skb)
2736b84e
JG
675{
676 u8 inner;
677
678 if (skb->protocol == htons(ETH_P_IP))
679 inner = ((struct iphdr *)skb_network_header(skb))->tos;
6f470982 680#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
2736b84e
JG
681 else if (skb->protocol == htons(ETH_P_IPV6))
682 inner = ipv6_get_dsfield((struct ipv6hdr *)skb_network_header(skb));
6f470982 683#endif
2736b84e
JG
684 else
685 inner = 0;
686
687 return INET_ECN_encapsulate(tos, inner);
688}
689
fceb2a5b 690static inline void ecn_decapsulate(u8 tos, struct sk_buff *skb)
2736b84e
JG
691{
692 if (INET_ECN_is_ce(tos)) {
693 __be16 protocol = skb->protocol;
694 unsigned int nw_header = skb_network_header(skb) - skb->data;
695
696 if (skb->protocol == htons(ETH_P_8021Q)) {
697 if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
698 return;
699
700 protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
701 nw_header += VLAN_HLEN;
702 }
703
704 if (protocol == htons(ETH_P_IP)) {
705 if (unlikely(!pskb_may_pull(skb, nw_header
706 + sizeof(struct iphdr))))
707 return;
708
709 IP_ECN_set_ce((struct iphdr *)(nw_header + skb->data));
6f470982
JG
710 }
711#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
712 else if (protocol == htons(ETH_P_IPV6)) {
2736b84e
JG
713 if (unlikely(!pskb_may_pull(skb, nw_header
714 + sizeof(struct ipv6hdr))))
715 return;
716
717 IP6_ECN_set_ce((struct ipv6hdr *)(nw_header
718 + skb->data));
719 }
6f470982 720#endif
2736b84e
JG
721 }
722}
723
fceb2a5b 724static struct sk_buff *handle_gso(struct sk_buff *skb)
2736b84e
JG
725{
726 if (skb_is_gso(skb)) {
9cc8b4e4 727 struct sk_buff *nskb = skb_gso_segment(skb, 0);
2736b84e
JG
728
729 dev_kfree_skb(skb);
730 return nskb;
731 }
732
733 return skb;
734}
735
fceb2a5b 736static int handle_csum_offload(struct sk_buff *skb)
2736b84e
JG
737{
738 if (skb->ip_summed == CHECKSUM_PARTIAL)
739 return skb_checksum_help(skb);
1c6d11a8
JG
740 else {
741 skb->ip_summed = CHECKSUM_NONE;
2736b84e 742 return 0;
1c6d11a8 743 }
2736b84e
JG
744}
745
8819fac7 746/* Called with rcu_read_lock. */
fceb2a5b 747static void gre_err(struct sk_buff *skb, u32 info)
2736b84e
JG
748{
749 struct vport *vport;
750 const struct mutable_config *mutable;
751 const int type = icmp_hdr(skb)->type;
752 const int code = icmp_hdr(skb)->code;
753 int mtu = ntohs(icmp_hdr(skb)->un.frag.mtu);
754
755 struct iphdr *iph;
756 __be16 flags;
757 __be32 key;
758 int tunnel_hdr_len, tot_hdr_len;
759 unsigned int orig_mac_header;
760 unsigned int orig_nw_header;
761
762 if (type != ICMP_DEST_UNREACH || code != ICMP_FRAG_NEEDED)
763 return;
764
765 /* The mimimum size packet that we would actually be able to process:
766 * encapsulating IP header, minimum GRE header, Ethernet header,
767 * inner IPv4 header. */
768 if (!pskb_may_pull(skb, sizeof(struct iphdr) + GRE_HEADER_SECTION +
769 ETH_HLEN + sizeof(struct iphdr)))
770 return;
771
772 iph = (struct iphdr *)skb->data;
773
774 tunnel_hdr_len = parse_gre_header(iph, &flags, &key);
775 if (tunnel_hdr_len < 0)
776 return;
777
778 vport = find_port(iph->saddr, iph->daddr, key, FIND_PORT_ANY, &mutable);
779 if (!vport)
780 return;
781
eea2aafb
JG
782 /* Packets received by this function were previously sent by us, so
783 * any comparisons should be to the output values, not the input.
784 * However, it's not really worth it to have a hash table based on
785 * output keys (especially since ICMP error handling of tunneled packets
786 * isn't that reliable anyways). Therefore, we do a lookup based on the
787 * out key as if it were the in key and then check to see if the input
788 * and output keys are the same. */
789 if (mutable->port_config.in_key != mutable->port_config.out_key)
790 return;
791
792 if (!!(mutable->port_config.flags & GRE_F_IN_KEY_MATCH) !=
793 !!(mutable->port_config.flags & GRE_F_OUT_KEY_ACTION))
794 return;
795
16e9d4f6 796 if ((mutable->port_config.flags & GRE_F_CSUM) && !(flags & GRE_CSUM))
2736b84e
JG
797 return;
798
eea2aafb 799 tunnel_hdr_len += iph->ihl << 2;
2736b84e
JG
800
801 orig_mac_header = skb_mac_header(skb) - skb->data;
802 orig_nw_header = skb_network_header(skb) - skb->data;
eea2aafb 803 skb_set_mac_header(skb, tunnel_hdr_len);
2736b84e 804
eea2aafb 805 tot_hdr_len = tunnel_hdr_len + ETH_HLEN;
2736b84e
JG
806
807 skb->protocol = eth_hdr(skb)->h_proto;
808 if (skb->protocol == htons(ETH_P_8021Q)) {
809 tot_hdr_len += VLAN_HLEN;
810 skb->protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
811 }
812
eea2aafb
JG
813 skb_set_network_header(skb, tot_hdr_len);
814 mtu -= tot_hdr_len;
815
2736b84e
JG
816 if (skb->protocol == htons(ETH_P_IP))
817 tot_hdr_len += sizeof(struct iphdr);
6f470982 818#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
eea2aafb 819 else if (skb->protocol == htons(ETH_P_IPV6))
2736b84e 820 tot_hdr_len += sizeof(struct ipv6hdr);
6f470982 821#endif
2736b84e
JG
822 else
823 goto out;
824
825 if (!pskb_may_pull(skb, tot_hdr_len))
826 goto out;
827
2736b84e
JG
828 if (skb->protocol == htons(ETH_P_IP)) {
829 if (mtu < IP_MIN_MTU) {
830 if (ntohs(ip_hdr(skb)->tot_len) >= IP_MIN_MTU)
831 mtu = IP_MIN_MTU;
832 else
833 goto out;
834 }
835
6f470982
JG
836 }
837#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
838 else if (skb->protocol == htons(ETH_P_IPV6)) {
2736b84e
JG
839 if (mtu < IPV6_MIN_MTU) {
840 unsigned int packet_length = sizeof(struct ipv6hdr) +
841 ntohs(ipv6_hdr(skb)->payload_len);
842
843 if (packet_length >= IPV6_MIN_MTU
844 || ntohs(ipv6_hdr(skb)->payload_len) == 0)
845 mtu = IPV6_MIN_MTU;
846 else
847 goto out;
848 }
849 }
6f470982 850#endif
2736b84e
JG
851
852 __pskb_pull(skb, tunnel_hdr_len);
eea2aafb 853 send_frag_needed(vport, mutable, skb, mtu, key);
2736b84e
JG
854 skb_push(skb, tunnel_hdr_len);
855
856out:
857 skb_set_mac_header(skb, orig_mac_header);
858 skb_set_network_header(skb, orig_nw_header);
859 skb->protocol = htons(ETH_P_IP);
860}
861
8819fac7 862/* Called with rcu_read_lock. */
fceb2a5b 863static int gre_rcv(struct sk_buff *skb)
2736b84e
JG
864{
865 struct vport *vport;
866 const struct mutable_config *mutable;
867 int hdr_len;
868 struct iphdr *iph;
869 __be16 flags;
870 __be32 key;
871
872 if (!pskb_may_pull(skb, GRE_HEADER_SECTION + ETH_HLEN))
873 goto error;
874
875 if (!check_checksum(skb))
876 goto error;
877
878 iph = ip_hdr(skb);
879
880 hdr_len = parse_gre_header(iph, &flags, &key);
881 if (hdr_len < 0)
882 goto error;
883
884 vport = find_port(iph->daddr, iph->saddr, key, FIND_PORT_ANY, &mutable);
885 if (!vport) {
886 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
887 goto error;
888 }
889
2736b84e
JG
890 if (!pskb_pull(skb, hdr_len) || !pskb_may_pull(skb, ETH_HLEN)) {
891 vport_record_error(vport, VPORT_E_RX_ERROR);
892 goto error;
893 }
894
895 skb->pkt_type = PACKET_HOST;
896 skb->protocol = eth_type_trans(skb, skb->dev);
897 skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN);
898
899 skb_dst_drop(skb);
900 nf_reset(skb);
901 secpath_reset(skb);
902 skb_reset_network_header(skb);
903
904 ecn_decapsulate(iph->tos, skb);
905
906 if (mutable->port_config.flags & GRE_F_IN_KEY_MATCH)
907 OVS_CB(skb)->tun_id = key;
908 else
909 OVS_CB(skb)->tun_id = 0;
910
911 skb_push(skb, ETH_HLEN);
f4267e34
JG
912 compute_ip_summed(skb, false);
913
2736b84e
JG
914 vport_receive(vport, skb);
915
916 return 0;
917
918error:
919 kfree_skb(skb);
920 return 0;
921}
922
fceb2a5b
JG
923static int build_packet(struct vport *vport, const struct mutable_config *mutable,
924 struct iphdr *iph, struct rtable *rt, int max_headroom,
925 int mtu, struct sk_buff *skb)
2736b84e
JG
926{
927 int err;
928 struct iphdr *new_iph;
929 int orig_len = skb->len;
930 __be16 frag_off = iph->frag_off;
931
932 skb = check_headroom(skb, max_headroom);
933 if (unlikely(IS_ERR(skb)))
934 goto error;
935
936 err = handle_csum_offload(skb);
937 if (err)
938 goto error_free;
939
940 if (skb->protocol == htons(ETH_P_IP)) {
941 struct iphdr *old_iph = ip_hdr(skb);
942
943 if ((old_iph->frag_off & htons(IP_DF)) &&
944 mtu < ntohs(old_iph->tot_len)) {
eea2aafb 945 if (send_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id))
2736b84e
JG
946 goto error_free;
947 }
948
6f470982
JG
949 }
950#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
951 else if (skb->protocol == htons(ETH_P_IPV6)) {
2736b84e
JG
952 unsigned int packet_length = skb->len - ETH_HLEN
953 - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0);
954
955 /* IPv6 requires PMTUD if the packet is above the minimum MTU. */
956 if (packet_length > IPV6_MIN_MTU)
957 frag_off = htons(IP_DF);
958
959 if (mtu < packet_length) {
eea2aafb 960 if (send_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id))
2736b84e
JG
961 goto error_free;
962 }
963 }
6f470982 964#endif
2736b84e
JG
965
966 skb_reset_transport_header(skb);
967 new_iph = (struct iphdr *)skb_push(skb, mutable->tunnel_hlen);
968 skb_reset_network_header(skb);
969
970 memcpy(new_iph, iph, sizeof(struct iphdr));
971 new_iph->frag_off = frag_off;
acf8144f 972 ip_select_ident(new_iph, &rt_dst(rt), NULL);
2736b84e
JG
973
974 create_gre_header(skb, mutable);
975
1c6d11a8
JG
976 /* Allow our local IP stack to fragment the outer packet even if the
977 * DF bit is set as a last resort. */
978 skb->local_df = 1;
979
2736b84e
JG
980 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
981 IPCB(skb)->flags = 0;
982
983 err = ip_local_out(skb);
984 if (likely(net_xmit_eval(err) == 0))
985 return orig_len;
986 else {
987 vport_record_error(vport, VPORT_E_TX_ERROR);
988 return 0;
989 }
990
991error_free:
992 kfree_skb(skb);
993error:
994 vport_record_error(vport, VPORT_E_TX_DROPPED);
995
996 return 0;
997}
998
fceb2a5b 999static int gre_send(struct vport *vport, struct sk_buff *skb)
2736b84e
JG
1000{
1001 struct gre_vport *gre_vport = gre_vport_priv(vport);
1002 const struct mutable_config *mutable = rcu_dereference(gre_vport->mutable);
1003
1004 struct iphdr *old_iph;
2736b84e
JG
1005 int orig_len;
1006 struct iphdr iph;
1007 struct rtable *rt;
1008 int max_headroom;
1009 int mtu;
1010
1011 /* Validate the protocol headers before we try to use them. */
1012 if (skb->protocol == htons(ETH_P_8021Q)) {
1013 if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
1014 goto error_free;
1015
1016 skb->protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
1017 skb_set_network_header(skb, VLAN_ETH_HLEN);
1018 }
1019
1020 if (skb->protocol == htons(ETH_P_IP)) {
1021 if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
1022 + sizeof(struct iphdr) - skb->data)))
1023 skb->protocol = 0;
6f470982
JG
1024 }
1025#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1026 else if (skb->protocol == htons(ETH_P_IPV6)) {
2736b84e
JG
1027 if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
1028 + sizeof(struct ipv6hdr) - skb->data)))
1029 skb->protocol = 0;
1030 }
6f470982 1031#endif
2736b84e 1032 old_iph = ip_hdr(skb);
2736b84e
JG
1033
1034 iph.tos = mutable->port_config.tos;
1035 if (mutable->port_config.flags & GRE_F_TOS_INHERIT) {
1036 if (skb->protocol == htons(ETH_P_IP))
1037 iph.tos = old_iph->tos;
6f470982 1038#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
2736b84e
JG
1039 else if (skb->protocol == htons(ETH_P_IPV6))
1040 iph.tos = ipv6_get_dsfield(ipv6_hdr(skb));
6f470982 1041#endif
2736b84e
JG
1042 }
1043 iph.tos = ecn_encapsulate(iph.tos, skb);
1044
1045 {
1046 struct flowi fl = { .nl_u = { .ip4_u =
1047 { .daddr = mutable->port_config.daddr,
1048 .saddr = mutable->port_config.saddr,
1049 .tos = RT_TOS(iph.tos) } },
1050 .proto = IPPROTO_GRE };
1051
1052 if (ip_route_output_key(&init_net, &rt, &fl))
1053 goto error_free;
1054 }
1055
1056 iph.ttl = mutable->port_config.ttl;
1057 if (mutable->port_config.flags & GRE_F_TTL_INHERIT) {
1058 if (skb->protocol == htons(ETH_P_IP))
1059 iph.ttl = old_iph->ttl;
6f470982 1060#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
2736b84e 1061 else if (skb->protocol == htons(ETH_P_IPV6))
6f470982
JG
1062 iph.ttl = ipv6_hdr(skb)->hop_limit;
1063#endif
2736b84e
JG
1064 }
1065 if (!iph.ttl)
acf8144f 1066 iph.ttl = dst_metric(&rt_dst(rt), RTAX_HOPLIMIT);
2736b84e
JG
1067
1068 iph.frag_off = (mutable->port_config.flags & GRE_F_PMTUD) ? htons(IP_DF) : 0;
1069 if (iph.frag_off)
acf8144f 1070 mtu = dst_mtu(&rt_dst(rt))
2736b84e
JG
1071 - ETH_HLEN
1072 - mutable->tunnel_hlen
1073 - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0);
1074 else
1075 mtu = mutable->mtu;
1076
1077 if (skb->protocol == htons(ETH_P_IP)) {
1078 iph.frag_off |= old_iph->frag_off & htons(IP_DF);
1079 mtu = max(mtu, IP_MIN_MTU);
6f470982
JG
1080 }
1081#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1082 else if (skb->protocol == htons(ETH_P_IPV6))
2736b84e 1083 mtu = max(mtu, IPV6_MIN_MTU);
6f470982 1084#endif
2736b84e
JG
1085
1086 iph.version = 4;
1087 iph.ihl = sizeof(struct iphdr) >> 2;
1088 iph.protocol = IPPROTO_GRE;
1089 iph.daddr = rt->rt_dst;
1090 iph.saddr = rt->rt_src;
1091
2736b84e
JG
1092 nf_reset(skb);
1093 secpath_reset(skb);
1094 skb_dst_drop(skb);
acf8144f 1095 skb_dst_set(skb, &rt_dst(rt));
2736b84e
JG
1096
1097 /* If we are doing GSO on a pskb it is better to make sure that the
1098 * headroom is correct now. We will only have to copy the portion in
1099 * the linear data area and GSO will preserve headroom when it creates
1100 * the segments. This is particularly beneficial on Xen where we get
1101 * lots of GSO pskbs. Conversely, we delay copying if it is just to
1102 * get our own writable clone because GSO may do the copy for us. */
acf8144f 1103 max_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
54adbf40
JG
1104 + mutable->tunnel_hlen;
1105
2736b84e
JG
1106 if (skb_headroom(skb) < max_headroom) {
1107 skb = check_headroom(skb, max_headroom);
1108 if (unlikely(IS_ERR(skb))) {
1109 vport_record_error(vport, VPORT_E_TX_DROPPED);
1110 goto error;
1111 }
1112 }
1113
1c6d11a8 1114 forward_ip_summed(skb);
b28c72ba
JG
1115
1116 if (unlikely(vswitch_skb_checksum_setup(skb)))
1117 goto error_free;
1c6d11a8 1118
2736b84e
JG
1119 skb = handle_gso(skb);
1120 if (unlikely(IS_ERR(skb))) {
1121 vport_record_error(vport, VPORT_E_TX_DROPPED);
1122 goto error;
1123 }
1124
1c6d11a8 1125 /* Process GSO segments. Try to do any work for the entire packet that
2736b84e
JG
1126 * doesn't involve actually writing to it before this point. */
1127 orig_len = 0;
1128 do {
1129 struct sk_buff *next_skb = skb->next;
1130 skb->next = NULL;
1131
1132 orig_len += build_packet(vport, mutable, &iph, rt, max_headroom, mtu, skb);
1133
1134 skb = next_skb;
1135 } while (skb);
1136
1137 return orig_len;
1138
1139error_free:
1140 kfree_skb(skb);
1141 vport_record_error(vport, VPORT_E_TX_ERROR);
1142error:
1143 return 0;
1144}
1145
1146static struct net_protocol gre_protocol_handlers = {
1147 .handler = gre_rcv,
1148 .err_handler = gre_err,
1149};
1150
fceb2a5b 1151static int gre_init(void)
2736b84e
JG
1152{
1153 int err;
1154
1155 err = inet_add_protocol(&gre_protocol_handlers, IPPROTO_GRE);
1156 if (err)
1157 printk(KERN_WARNING "openvswitch: cannot register gre protocol handler\n");
1158
1159 return err;
1160}
1161
fceb2a5b 1162static void gre_exit(void)
2736b84e
JG
1163{
1164 tbl_destroy(port_table, NULL);
1165 inet_del_protocol(&gre_protocol_handlers, IPPROTO_GRE);
1166}
1167
fceb2a5b
JG
1168static int set_config(const struct vport *cur_vport,
1169 struct mutable_config *mutable, const void __user *uconfig)
2736b84e
JG
1170{
1171 const struct vport *old_vport;
1172 const struct mutable_config *old_mutable;
1173 int port_type;
1174
1175 if (copy_from_user(&mutable->port_config, uconfig, sizeof(struct gre_port_config)))
1176 return -EFAULT;
1177
1178 if (mutable->port_config.daddr == 0)
1179 return -EINVAL;
1180
1181 if (mutable->port_config.flags & GRE_F_IN_KEY_MATCH) {
1182 port_type = FIND_PORT_MATCH;
1183 mutable->port_config.in_key = 0;
1184 } else
1185 port_type = FIND_PORT_KEY;
1186
1187 old_vport = find_port(mutable->port_config.saddr,
1188 mutable->port_config.daddr,
1189 mutable->port_config.in_key, port_type,
1190 &old_mutable);
1191
1192 if (old_vport && old_vport != cur_vport)
1193 return -EEXIST;
1194
eea2aafb
JG
1195 if (mutable->port_config.flags & GRE_F_OUT_KEY_ACTION)
1196 mutable->port_config.out_key = 0;
1197
2736b84e
JG
1198 mutable->tunnel_hlen = sizeof(struct iphdr) + GRE_HEADER_SECTION;
1199
16e9d4f6 1200 if (mutable->port_config.flags & GRE_F_CSUM)
2736b84e
JG
1201 mutable->tunnel_hlen += GRE_HEADER_SECTION;
1202
1203 if (mutable->port_config.out_key ||
1204 mutable->port_config.flags & GRE_F_OUT_KEY_ACTION)
1205 mutable->tunnel_hlen += GRE_HEADER_SECTION;
1206
1207 return 0;
1208}
1209
fceb2a5b 1210static struct vport *gre_create(const char *name, const void __user *config)
2736b84e
JG
1211{
1212 struct vport *vport;
1213 struct gre_vport *gre_vport;
1214 int err;
1215
1216 vport = vport_alloc(sizeof(struct gre_vport), &gre_vport_ops);
1217 if (IS_ERR(vport)) {
1218 err = PTR_ERR(vport);
1219 goto error;
1220 }
1221
1222 gre_vport = gre_vport_priv(vport);
1223
1224 strcpy(gre_vport->name, name);
1225
1226 gre_vport->mutable = kmalloc(sizeof(struct mutable_config), GFP_KERNEL);
1227 if (!gre_vport->mutable) {
1228 err = -ENOMEM;
1229 goto error_free_vport;
1230 }
1231
b19e8815 1232 vport_gen_rand_ether_addr(gre_vport->mutable->eth_addr);
2736b84e
JG
1233 gre_vport->mutable->mtu = ETH_DATA_LEN;
1234
1235 err = set_config(NULL, gre_vport->mutable, config);
1236 if (err)
1237 goto error_free_mutable;
1238
1239 err = add_port(vport);
1240 if (err)
1241 goto error_free_mutable;
1242
1243 return vport;
1244
1245error_free_mutable:
1246 kfree(gre_vport->mutable);
1247error_free_vport:
1248 vport_free(vport);
1249error:
1250 return ERR_PTR(err);
1251}
1252
fceb2a5b 1253static int gre_modify(struct vport *vport, const void __user *config)
2736b84e
JG
1254{
1255 struct gre_vport *gre_vport = gre_vport_priv(vport);
1256 struct mutable_config *mutable;
1257 int err;
1258 int update_hash = 0;
1259
1260 mutable = kmemdup(gre_vport->mutable, sizeof(struct mutable_config), GFP_KERNEL);
1261 if (!mutable) {
1262 err = -ENOMEM;
1263 goto error;
1264 }
1265
1266 err = set_config(vport, mutable, config);
1267 if (err)
1268 goto error_free;
1269
1270 /* Only remove the port from the hash table if something that would
1271 * affect the lookup has changed. */
1272 if (gre_vport->mutable->port_config.saddr != mutable->port_config.saddr ||
1273 gre_vport->mutable->port_config.daddr != mutable->port_config.daddr ||
1274 gre_vport->mutable->port_config.in_key != mutable->port_config.in_key ||
1275 (gre_vport->mutable->port_config.flags & GRE_F_IN_KEY_MATCH) !=
1276 (mutable->port_config.flags & GRE_F_IN_KEY_MATCH))
1277 update_hash = 1;
1278
1279
1280 /* This update is not atomic but the lookup uses the config, which
1281 * serves as an inherent double check. */
1282 if (update_hash) {
1283 err = del_port(vport);
1284 if (err)
1285 goto error_free;
1286 }
1287
1288 assign_config_rcu(vport, mutable);
1289
1290 if (update_hash) {
1291 err = add_port(vport);
1292 if (err)
1293 goto error_free;
1294 }
1295
1296 return 0;
1297
1298error_free:
1299 kfree(mutable);
1300error:
1301 return err;
1302}
1303
2848cb49
JG
1304static void free_port(struct rcu_head *rcu)
1305{
1306 struct gre_vport *gre_vport = container_of(rcu, struct gre_vport, rcu);
1307
1308 kfree(gre_vport->mutable);
1309 vport_free(gre_vport_to_vport(gre_vport));
1310}
1311
fceb2a5b 1312static int gre_destroy(struct vport *vport)
2736b84e
JG
1313{
1314 struct gre_vport *gre_vport = gre_vport_priv(vport);
1315 int port_type;
1316 const struct mutable_config *old_mutable;
1317
1318 /* Do a hash table lookup to make sure that the port exists. It should
1319 * exist but might not if a modify failed earlier. */
1320 if (gre_vport->mutable->port_config.flags & GRE_F_IN_KEY_MATCH)
1321 port_type = FIND_PORT_MATCH;
1322 else
1323 port_type = FIND_PORT_KEY;
1324
1325 if (vport == find_port(gre_vport->mutable->port_config.saddr,
1326 gre_vport->mutable->port_config.daddr,
1327 gre_vport->mutable->port_config.in_key, port_type, &old_mutable))
1328 del_port(vport);
1329
2848cb49 1330 call_rcu(&gre_vport->rcu, free_port);
2736b84e
JG
1331
1332 return 0;
1333}
1334
fceb2a5b 1335static int gre_set_mtu(struct vport *vport, int mtu)
2736b84e
JG
1336{
1337 struct gre_vport *gre_vport = gre_vport_priv(vport);
1338 struct mutable_config *mutable;
2736b84e
JG
1339
1340 mutable = kmemdup(gre_vport->mutable, sizeof(struct mutable_config), GFP_KERNEL);
1341 if (!mutable)
1342 return -ENOMEM;
1343
1344 mutable->mtu = mtu;
1345 assign_config_rcu(vport, mutable);
1346
2736b84e
JG
1347 return 0;
1348}
1349
fceb2a5b 1350static int gre_set_addr(struct vport *vport, const unsigned char *addr)
2736b84e
JG
1351{
1352 struct gre_vport *gre_vport = gre_vport_priv(vport);
1353 struct mutable_config *mutable;
1354
1355 mutable = kmemdup(gre_vport->mutable, sizeof(struct mutable_config), GFP_KERNEL);
1356 if (!mutable)
1357 return -ENOMEM;
1358
1359 memcpy(mutable->eth_addr, addr, ETH_ALEN);
1360 assign_config_rcu(vport, mutable);
1361
1362 return 0;
1363}
1364
1365
fceb2a5b 1366static const char *gre_get_name(const struct vport *vport)
2736b84e
JG
1367{
1368 const struct gre_vport *gre_vport = gre_vport_priv(vport);
1369 return gre_vport->name;
1370}
1371
fceb2a5b 1372static const unsigned char *gre_get_addr(const struct vport *vport)
2736b84e
JG
1373{
1374 const struct gre_vport *gre_vport = gre_vport_priv(vport);
1375 return rcu_dereference(gre_vport->mutable)->eth_addr;
1376}
1377
fceb2a5b 1378static int gre_get_mtu(const struct vport *vport)
2736b84e
JG
1379{
1380 const struct gre_vport *gre_vport = gre_vport_priv(vport);
1381 return rcu_dereference(gre_vport->mutable)->mtu;
1382}
1383
1384struct vport_ops gre_vport_ops = {
1385 .type = "gre",
1386 .flags = VPORT_F_GEN_STATS | VPORT_F_TUN_ID,
1387 .init = gre_init,
1388 .exit = gre_exit,
1389 .create = gre_create,
1390 .modify = gre_modify,
1391 .destroy = gre_destroy,
1392 .set_mtu = gre_set_mtu,
1393 .set_addr = gre_set_addr,
1394 .get_name = gre_get_name,
1395 .get_addr = gre_get_addr,
b19e8815
JG
1396 .get_dev_flags = vport_gen_get_dev_flags,
1397 .is_running = vport_gen_is_running,
1398 .get_operstate = vport_gen_get_operstate,
2736b84e
JG
1399 .get_mtu = gre_get_mtu,
1400 .send = gre_send,
1401};