]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/ipv4/ipip.c
ipip: always notify change when params are updated
[mirror_ubuntu-artful-kernel.git] / net / ipv4 / ipip.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: IP/IP protocol decoder.
1da177e4 3 *
1da177e4
LT
4 * Authors:
5 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
6 *
7 * Fixes:
8 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
9 * a module taking up 2 pages).
10 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11 * to keep ip_forward happy.
12 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
14 * David Woodhouse : Perform some basic ICMP handling.
15 * IPIP Routing without decapsulation.
16 * Carlos Picoto : GRE over IP support
17 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18 * I do not want to merge them together.
19 *
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
24 *
25 */
26
27/* tunnel.c: an IP tunnel driver
28
29 The purpose of this driver is to provide an IP tunnel through
30 which you can tunnel network traffic transparently across subnets.
31
32 This was written by looking at Nick Holloway's dummy driver
33 Thanks for the great code!
34
35 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
e905a9ed 36
1da177e4
LT
37 Minor tweaks:
38 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39 dev->hard_header/hard_header_len changed to use no headers.
40 Comments/bracketing tweaked.
41 Made the tunnels use dev->name not tunnel: when error reporting.
42 Added tx_dropped stat
e905a9ed 43
113aa838 44 -Alan Cox (alan@lxorguk.ukuu.org.uk) 21 March 95
1da177e4
LT
45
46 Reworked:
47 Changed to tunnel to destination gateway in addition to the
48 tunnel's pointopoint address
49 Almost completely rewritten
50 Note: There is currently no firewall or ICMP handling done.
51
52 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
e905a9ed 53
1da177e4
LT
54*/
55
56/* Things I wish I had known when writing the tunnel driver:
57
58 When the tunnel_xmit() function is called, the skb contains the
59 packet to be sent (plus a great deal of extra info), and dev
60 contains the tunnel device that _we_ are.
61
62 When we are passed a packet, we are expected to fill in the
63 source address with our source IP address.
64
65 What is the proper way to allocate, copy and free a buffer?
66 After you allocate it, it is a "0 length" chunk of memory
67 starting at zero. If you want to add headers to the buffer
68 later, you'll have to call "skb_reserve(skb, amount)" with
69 the amount of memory you want reserved. Then, you call
70 "skb_put(skb, amount)" with the amount of space you want in
71 the buffer. skb_put() returns a pointer to the top (#0) of
72 that buffer. skb->len is set to the amount of space you have
73 "allocated" with skb_put(). You can then write up to skb->len
74 bytes to that buffer. If you need more, you can call skb_put()
75 again with the additional amount of space you need. You can
e905a9ed 76 find out how much more space you can allocate by calling
1da177e4
LT
77 "skb_tailroom(skb)".
78 Now, to add header space, call "skb_push(skb, header_len)".
79 This creates space at the beginning of the buffer and returns
80 a pointer to this new space. If later you need to strip a
81 header from a buffer, call "skb_pull(skb, header_len)".
82 skb_headroom() will return how much space is left at the top
83 of the buffer (before the main data). Remember, this headroom
84 space must be reserved before the skb_put() function is called.
85 */
86
87/*
88 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90 For comments look at net/ipv4/ip_gre.c --ANK
91 */
92
e905a9ed 93
4fc268d2 94#include <linux/capability.h>
1da177e4
LT
95#include <linux/module.h>
96#include <linux/types.h>
1da177e4 97#include <linux/kernel.h>
5a0e3ad6 98#include <linux/slab.h>
1da177e4
LT
99#include <asm/uaccess.h>
100#include <linux/skbuff.h>
101#include <linux/netdevice.h>
102#include <linux/in.h>
103#include <linux/tcp.h>
104#include <linux/udp.h>
105#include <linux/if_arp.h>
106#include <linux/mroute.h>
107#include <linux/init.h>
108#include <linux/netfilter_ipv4.h>
46f25dff 109#include <linux/if_ether.h>
1da177e4
LT
110
111#include <net/sock.h>
112#include <net/ip.h>
113#include <net/icmp.h>
1da177e4
LT
114#include <net/ipip.h>
115#include <net/inet_ecn.h>
116#include <net/xfrm.h>
10dc4c7b
PE
117#include <net/net_namespace.h>
118#include <net/netns/generic.h>
1da177e4
LT
119
120#define HASH_SIZE 16
d5a0a1e3 121#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1da177e4 122
eccc1bb8 123static bool log_ecn_error = true;
124module_param(log_ecn_error, bool, 0644);
125MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126
f99189b1 127static int ipip_net_id __read_mostly;
10dc4c7b 128struct ipip_net {
b7285b79
ED
129 struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
130 struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
131 struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
132 struct ip_tunnel __rcu *tunnels_wc[1];
133 struct ip_tunnel __rcu **tunnels[4];
44d3c299 134
b9855c54 135 struct net_device *fb_tunnel_dev;
10dc4c7b
PE
136};
137
3c97af99 138static int ipip_tunnel_init(struct net_device *dev);
1da177e4 139static void ipip_tunnel_setup(struct net_device *dev);
3c97af99 140static void ipip_dev_free(struct net_device *dev);
0974658d 141static struct rtnl_link_ops ipip_link_ops __read_mostly;
1da177e4 142
87b6d218 143static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
144 struct rtnl_link_stats64 *tot)
3c97af99 145{
3c97af99
ED
146 int i;
147
148 for_each_possible_cpu(i) {
149 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
87b6d218 150 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
151 unsigned int start;
152
153 do {
154 start = u64_stats_fetch_begin_bh(&tstats->syncp);
155 rx_packets = tstats->rx_packets;
156 tx_packets = tstats->tx_packets;
157 rx_bytes = tstats->rx_bytes;
158 tx_bytes = tstats->tx_bytes;
159 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
160
161 tot->rx_packets += rx_packets;
162 tot->tx_packets += tx_packets;
163 tot->rx_bytes += rx_bytes;
164 tot->tx_bytes += tx_bytes;
3c97af99 165 }
87b6d218 166
167 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
168 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
169 tot->tx_dropped = dev->stats.tx_dropped;
170 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
171 tot->tx_errors = dev->stats.tx_errors;
172 tot->collisions = dev->stats.collisions;
173
174 return tot;
3c97af99
ED
175}
176
5e73ea1a 177static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
b9fae5c9 178 __be32 remote, __be32 local)
1da177e4 179{
b7285b79
ED
180 unsigned int h0 = HASH(remote);
181 unsigned int h1 = HASH(local);
1da177e4 182 struct ip_tunnel *t;
44d3c299 183 struct ipip_net *ipn = net_generic(net, ipip_net_id);
1da177e4 184
e086cadc 185 for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
1da177e4
LT
186 if (local == t->parms.iph.saddr &&
187 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
188 return t;
8f95dd63 189
e086cadc 190 for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
1da177e4
LT
191 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
192 return t;
8f95dd63 193
e086cadc 194 for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
1da177e4
LT
195 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
196 return t;
8f95dd63
ED
197
198 t = rcu_dereference(ipn->tunnels_wc[0]);
199 if (t && (t->dev->flags&IFF_UP))
1da177e4
LT
200 return t;
201 return NULL;
202}
203
b7285b79 204static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
b9fae5c9 205 struct ip_tunnel_parm *parms)
1da177e4 206{
87d1a164
YH
207 __be32 remote = parms->iph.daddr;
208 __be32 local = parms->iph.saddr;
b7285b79 209 unsigned int h = 0;
1da177e4
LT
210 int prio = 0;
211
212 if (remote) {
213 prio |= 2;
214 h ^= HASH(remote);
215 }
216 if (local) {
217 prio |= 1;
218 h ^= HASH(local);
219 }
44d3c299 220 return &ipn->tunnels[prio][h];
1da177e4
LT
221}
222
b7285b79 223static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
b9fae5c9 224 struct ip_tunnel *t)
87d1a164 225{
b9fae5c9 226 return __ipip_bucket(ipn, &t->parms);
87d1a164 227}
1da177e4 228
b9fae5c9 229static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
1da177e4 230{
b7285b79
ED
231 struct ip_tunnel __rcu **tp;
232 struct ip_tunnel *iter;
233
234 for (tp = ipip_bucket(ipn, t);
235 (iter = rtnl_dereference(*tp)) != NULL;
236 tp = &iter->next) {
237 if (t == iter) {
cf778b00 238 rcu_assign_pointer(*tp, t->next);
1da177e4
LT
239 break;
240 }
241 }
242}
243
b9fae5c9 244static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
1da177e4 245{
b7285b79 246 struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
1da177e4 247
cf778b00
ED
248 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
249 rcu_assign_pointer(*tp, t);
1da177e4
LT
250}
251
5e73ea1a 252static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
b9fae5c9 253 struct ip_tunnel_parm *parms, int create)
1da177e4 254{
d5a0a1e3
AV
255 __be32 remote = parms->iph.daddr;
256 __be32 local = parms->iph.saddr;
b7285b79
ED
257 struct ip_tunnel *t, *nt;
258 struct ip_tunnel __rcu **tp;
1da177e4 259 struct net_device *dev;
1da177e4 260 char name[IFNAMSIZ];
b9fae5c9 261 struct ipip_net *ipn = net_generic(net, ipip_net_id);
1da177e4 262
b7285b79
ED
263 for (tp = __ipip_bucket(ipn, parms);
264 (t = rtnl_dereference(*tp)) != NULL;
265 tp = &t->next) {
1da177e4
LT
266 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
267 return t;
268 }
269 if (!create)
270 return NULL;
271
272 if (parms->name[0])
273 strlcpy(name, parms->name, IFNAMSIZ);
34cc7ba6 274 else
3c97af99 275 strcpy(name, "tunl%d");
1da177e4
LT
276
277 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
278 if (dev == NULL)
279 return NULL;
280
0a826406
PE
281 dev_net_set(dev, net);
282
2941a486 283 nt = netdev_priv(dev);
1da177e4
LT
284 nt->parms = *parms;
285
3c97af99
ED
286 if (ipip_tunnel_init(dev) < 0)
287 goto failed_free;
23a12b14 288
b37d428b
PE
289 if (register_netdevice(dev) < 0)
290 goto failed_free;
1da177e4 291
72b36015 292 strcpy(nt->parms.name, dev->name);
0974658d 293 dev->rtnl_link_ops = &ipip_link_ops;
72b36015 294
1da177e4 295 dev_hold(dev);
b9fae5c9 296 ipip_tunnel_link(ipn, nt);
1da177e4
LT
297 return nt;
298
b37d428b 299failed_free:
3c97af99 300 ipip_dev_free(dev);
1da177e4
LT
301 return NULL;
302}
303
b7285b79 304/* called with RTNL */
1da177e4
LT
305static void ipip_tunnel_uninit(struct net_device *dev)
306{
b9855c54
PE
307 struct net *net = dev_net(dev);
308 struct ipip_net *ipn = net_generic(net, ipip_net_id);
309
b7285b79 310 if (dev == ipn->fb_tunnel_dev)
a9b3cd7f 311 RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
b7285b79 312 else
b9fae5c9 313 ipip_tunnel_unlink(ipn, netdev_priv(dev));
1da177e4
LT
314 dev_put(dev);
315}
316
d2acc347 317static int ipip_err(struct sk_buff *skb, u32 info)
1da177e4 318{
1da177e4 319
071f92d0 320/* All the routers (except for Linux) return only
1da177e4
LT
321 8 bytes of packet payload. It means, that precise relaying of
322 ICMP in the real Internet is absolutely infeasible.
323 */
b71d1d42 324 const struct iphdr *iph = (const struct iphdr *)skb->data;
88c7664f
ACM
325 const int type = icmp_hdr(skb)->type;
326 const int code = icmp_hdr(skb)->code;
1da177e4 327 struct ip_tunnel *t;
d2acc347 328 int err;
1da177e4
LT
329
330 switch (type) {
331 default:
332 case ICMP_PARAMETERPROB:
d2acc347 333 return 0;
1da177e4
LT
334
335 case ICMP_DEST_UNREACH:
336 switch (code) {
337 case ICMP_SR_FAILED:
338 case ICMP_PORT_UNREACH:
339 /* Impossible event. */
d2acc347 340 return 0;
1da177e4
LT
341 default:
342 /* All others are translated to HOST_UNREACH.
343 rfc2003 contains "deep thoughts" about NET_UNREACH,
344 I believe they are just ether pollution. --ANK
345 */
346 break;
347 }
348 break;
349 case ICMP_TIME_EXCEEDED:
350 if (code != ICMP_EXC_TTL)
d2acc347 351 return 0;
1da177e4 352 break;
55be7a9c
DM
353 case ICMP_REDIRECT:
354 break;
1da177e4
LT
355 }
356
d2acc347 357 err = -ENOENT;
cec3ffae 358 t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
36393395
DM
359 if (t == NULL)
360 goto out;
361
362 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
363 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
364 t->dev->ifindex, 0, IPPROTO_IPIP, 0);
365 err = 0;
366 goto out;
367 }
368
55be7a9c
DM
369 if (type == ICMP_REDIRECT) {
370 ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
371 IPPROTO_IPIP, 0);
372 err = 0;
373 goto out;
374 }
375
36393395 376 if (t->parms.iph.daddr == 0)
1da177e4 377 goto out;
d2acc347
HX
378
379 err = 0;
1da177e4
LT
380 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
381 goto out;
382
26d94b46 383 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
1da177e4
LT
384 t->err_count++;
385 else
386 t->err_count = 1;
387 t->err_time = jiffies;
388out:
b0558ef2 389
d2acc347 390 return err;
1da177e4
LT
391}
392
1da177e4
LT
393static int ipip_rcv(struct sk_buff *skb)
394{
1da177e4 395 struct ip_tunnel *tunnel;
eddc9ec5 396 const struct iphdr *iph = ip_hdr(skb);
eccc1bb8 397 int err;
1da177e4 398
3c97af99
ED
399 tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
400 if (tunnel != NULL) {
401 struct pcpu_tstats *tstats;
402
eccc1bb8 403 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
404 goto drop;
1da177e4
LT
405
406 secpath_reset(skb);
407
b0e380b1 408 skb->mac_header = skb->network_header;
c1d2bbe1 409 skb_reset_network_header(skb);
1da177e4
LT
410 skb->protocol = htons(ETH_P_IP);
411 skb->pkt_type = PACKET_HOST;
412
eccc1bb8 413 __skb_tunnel_rx(skb, tunnel->dev);
414
415 err = IP_ECN_decapsulate(iph, skb);
416 if (unlikely(err)) {
417 if (log_ecn_error)
418 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
419 &iph->saddr, iph->tos);
420 if (err > 1) {
421 ++tunnel->dev->stats.rx_frame_errors;
422 ++tunnel->dev->stats.rx_errors;
423 goto drop;
424 }
425 }
426
3c97af99 427 tstats = this_cpu_ptr(tunnel->dev->tstats);
87b6d218 428 u64_stats_update_begin(&tstats->syncp);
3c97af99
ED
429 tstats->rx_packets++;
430 tstats->rx_bytes += skb->len;
87b6d218 431 u64_stats_update_end(&tstats->syncp);
3c97af99 432
caf586e5 433 netif_rx(skb);
1da177e4
LT
434 return 0;
435 }
1da177e4 436
1da177e4 437 return -1;
eccc1bb8 438
439drop:
440 kfree_skb(skb);
441 return 0;
1da177e4
LT
442}
443
444/*
445 * This function assumes it is being called from dev_queue_xmit()
446 * and that skb is filled properly by that function.
447 */
448
6fef4c0c 449static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 450{
2941a486 451 struct ip_tunnel *tunnel = netdev_priv(dev);
b71d1d42 452 const struct iphdr *tiph = &tunnel->parms.iph;
1da177e4 453 u8 tos = tunnel->parms.iph.tos;
d5a0a1e3 454 __be16 df = tiph->frag_off;
1da177e4 455 struct rtable *rt; /* Route to the other host */
3c97af99 456 struct net_device *tdev; /* Device to other host */
b71d1d42 457 const struct iphdr *old_iph = ip_hdr(skb);
1da177e4 458 struct iphdr *iph; /* Our new IP header */
c2636b4d 459 unsigned int max_headroom; /* The extra header space needed */
d5a0a1e3 460 __be32 dst = tiph->daddr;
31e4543d 461 struct flowi4 fl4;
1da177e4
LT
462 int mtu;
463
1da177e4
LT
464 if (skb->protocol != htons(ETH_P_IP))
465 goto tx_error;
466
c3b89fbb
ED
467 if (skb->ip_summed == CHECKSUM_PARTIAL &&
468 skb_checksum_help(skb))
469 goto tx_error;
470
3c97af99 471 if (tos & 1)
1da177e4
LT
472 tos = old_iph->tos;
473
474 if (!dst) {
475 /* NBMA tunnel */
511c3f92 476 if ((rt = skb_rtable(skb)) == NULL) {
3c97af99 477 dev->stats.tx_fifo_errors++;
1da177e4
LT
478 goto tx_error;
479 }
f8126f1d 480 dst = rt_nexthop(rt, old_iph->daddr);
1da177e4
LT
481 }
482
31e4543d 483 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
78fbfd8a
DM
484 dst, tiph->saddr,
485 0, 0,
486 IPPROTO_IPIP, RT_TOS(tos),
487 tunnel->parms.link);
488 if (IS_ERR(rt)) {
489 dev->stats.tx_carrier_errors++;
490 goto tx_error_icmp;
1da177e4 491 }
d8d1f30b 492 tdev = rt->dst.dev;
1da177e4
LT
493
494 if (tdev == dev) {
495 ip_rt_put(rt);
3c97af99 496 dev->stats.collisions++;
1da177e4
LT
497 goto tx_error;
498 }
499
23ca0c98
HX
500 df |= old_iph->frag_off & htons(IP_DF);
501
502 if (df) {
d8d1f30b 503 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
1da177e4 504
23ca0c98 505 if (mtu < 68) {
3c97af99 506 dev->stats.collisions++;
23ca0c98
HX
507 ip_rt_put(rt);
508 goto tx_error;
509 }
1da177e4 510
23ca0c98 511 if (skb_dst(skb))
6700c270 512 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
1da177e4 513
23ca0c98
HX
514 if ((old_iph->frag_off & htons(IP_DF)) &&
515 mtu < ntohs(old_iph->tot_len)) {
516 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
517 htonl(mtu));
518 ip_rt_put(rt);
519 goto tx_error;
520 }
1da177e4
LT
521 }
522
523 if (tunnel->err_count > 0) {
26d94b46
WY
524 if (time_before(jiffies,
525 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
1da177e4
LT
526 tunnel->err_count--;
527 dst_link_failure(skb);
528 } else
529 tunnel->err_count = 0;
530 }
531
532 /*
533 * Okay, now see if we can stuff it in the buffer as-is.
534 */
535 max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
536
cfbba49d
PM
537 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
538 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1da177e4
LT
539 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
540 if (!new_skb) {
541 ip_rt_put(rt);
3c97af99 542 dev->stats.tx_dropped++;
1da177e4 543 dev_kfree_skb(skb);
6ed10654 544 return NETDEV_TX_OK;
1da177e4
LT
545 }
546 if (skb->sk)
547 skb_set_owner_w(new_skb, skb->sk);
548 dev_kfree_skb(skb);
549 skb = new_skb;
eddc9ec5 550 old_iph = ip_hdr(skb);
1da177e4
LT
551 }
552
b0e380b1 553 skb->transport_header = skb->network_header;
e2d1bca7
ACM
554 skb_push(skb, sizeof(struct iphdr));
555 skb_reset_network_header(skb);
1da177e4 556 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
48d5cad8
PM
557 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
558 IPSKB_REROUTED);
adf30907 559 skb_dst_drop(skb);
d8d1f30b 560 skb_dst_set(skb, &rt->dst);
1da177e4
LT
561
562 /*
563 * Push down and install the IPIP header.
564 */
565
eddc9ec5 566 iph = ip_hdr(skb);
1da177e4
LT
567 iph->version = 4;
568 iph->ihl = sizeof(struct iphdr)>>2;
569 iph->frag_off = df;
570 iph->protocol = IPPROTO_IPIP;
571 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
69458cb1
DM
572 iph->daddr = fl4.daddr;
573 iph->saddr = fl4.saddr;
1da177e4
LT
574
575 if ((iph->ttl = tiph->ttl) == 0)
576 iph->ttl = old_iph->ttl;
577
aa0010f8 578 iptunnel_xmit(skb, dev);
6ed10654 579 return NETDEV_TX_OK;
1da177e4
LT
580
581tx_error_icmp:
582 dst_link_failure(skb);
583tx_error:
3c97af99 584 dev->stats.tx_errors++;
1da177e4 585 dev_kfree_skb(skb);
6ed10654 586 return NETDEV_TX_OK;
1da177e4
LT
587}
588
5533995b
MS
589static void ipip_tunnel_bind_dev(struct net_device *dev)
590{
591 struct net_device *tdev = NULL;
592 struct ip_tunnel *tunnel;
b71d1d42 593 const struct iphdr *iph;
5533995b
MS
594
595 tunnel = netdev_priv(dev);
596 iph = &tunnel->parms.iph;
597
598 if (iph->daddr) {
31e4543d
DM
599 struct rtable *rt;
600 struct flowi4 fl4;
601
602 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
603 iph->daddr, iph->saddr,
604 0, 0,
605 IPPROTO_IPIP,
606 RT_TOS(iph->tos),
607 tunnel->parms.link);
b23dd4fe 608 if (!IS_ERR(rt)) {
d8d1f30b 609 tdev = rt->dst.dev;
5533995b
MS
610 ip_rt_put(rt);
611 }
612 dev->flags |= IFF_POINTOPOINT;
613 }
614
615 if (!tdev && tunnel->parms.link)
b99f0152 616 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
5533995b
MS
617
618 if (tdev) {
619 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
620 dev->mtu = tdev->mtu - sizeof(struct iphdr);
621 }
622 dev->iflink = tunnel->parms.link;
623}
624
1da177e4
LT
625static int
626ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
627{
628 int err = 0;
629 struct ip_tunnel_parm p;
630 struct ip_tunnel *t;
b9855c54
PE
631 struct net *net = dev_net(dev);
632 struct ipip_net *ipn = net_generic(net, ipip_net_id);
1da177e4
LT
633
634 switch (cmd) {
635 case SIOCGETTUNNEL:
636 t = NULL;
b9855c54 637 if (dev == ipn->fb_tunnel_dev) {
1da177e4
LT
638 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
639 err = -EFAULT;
640 break;
641 }
b9fae5c9 642 t = ipip_tunnel_locate(net, &p, 0);
1da177e4
LT
643 }
644 if (t == NULL)
2941a486 645 t = netdev_priv(dev);
1da177e4
LT
646 memcpy(&p, &t->parms, sizeof(p));
647 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
648 err = -EFAULT;
649 break;
650
651 case SIOCADDTUNNEL:
652 case SIOCCHGTUNNEL:
653 err = -EPERM;
654 if (!capable(CAP_NET_ADMIN))
655 goto done;
656
657 err = -EFAULT;
658 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
659 goto done;
660
661 err = -EINVAL;
662 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
663 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
664 goto done;
665 if (p.iph.ttl)
666 p.iph.frag_off |= htons(IP_DF);
667
b9fae5c9 668 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1da177e4 669
b9855c54 670 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1da177e4
LT
671 if (t != NULL) {
672 if (t->dev != dev) {
673 err = -EEXIST;
674 break;
675 }
676 } else {
677 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
678 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
679 err = -EINVAL;
680 break;
681 }
2941a486 682 t = netdev_priv(dev);
1da177e4 683 }
c38cc4b5
ND
684
685 ipip_tunnel_unlink(ipn, t);
686 synchronize_net();
687 t->parms.iph.saddr = p.iph.saddr;
688 t->parms.iph.daddr = p.iph.daddr;
689 memcpy(dev->dev_addr, &p.iph.saddr, 4);
690 memcpy(dev->broadcast, &p.iph.daddr, 4);
691 ipip_tunnel_link(ipn, t);
692 t->parms.iph.ttl = p.iph.ttl;
693 t->parms.iph.tos = p.iph.tos;
694 t->parms.iph.frag_off = p.iph.frag_off;
695 if (t->parms.link != p.link) {
696 t->parms.link = p.link;
697 ipip_tunnel_bind_dev(dev);
698 }
699 netdev_state_change(dev);
1da177e4
LT
700 }
701
702 if (t) {
703 err = 0;
1da177e4
LT
704 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
705 err = -EFAULT;
706 } else
707 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
708 break;
709
710 case SIOCDELTUNNEL:
711 err = -EPERM;
712 if (!capable(CAP_NET_ADMIN))
713 goto done;
714
b9855c54 715 if (dev == ipn->fb_tunnel_dev) {
1da177e4
LT
716 err = -EFAULT;
717 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
718 goto done;
719 err = -ENOENT;
b9fae5c9 720 if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
1da177e4
LT
721 goto done;
722 err = -EPERM;
b9855c54 723 if (t->dev == ipn->fb_tunnel_dev)
1da177e4
LT
724 goto done;
725 dev = t->dev;
726 }
22f8cde5
SH
727 unregister_netdevice(dev);
728 err = 0;
1da177e4
LT
729 break;
730
731 default:
732 err = -EINVAL;
733 }
734
735done:
736 return err;
737}
738
1da177e4
LT
739static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
740{
741 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
742 return -EINVAL;
743 dev->mtu = new_mtu;
744 return 0;
745}
746
23a12b14
SH
747static const struct net_device_ops ipip_netdev_ops = {
748 .ndo_uninit = ipip_tunnel_uninit,
749 .ndo_start_xmit = ipip_tunnel_xmit,
750 .ndo_do_ioctl = ipip_tunnel_ioctl,
751 .ndo_change_mtu = ipip_tunnel_change_mtu,
87b6d218 752 .ndo_get_stats64 = ipip_get_stats64,
23a12b14
SH
753};
754
3c97af99
ED
755static void ipip_dev_free(struct net_device *dev)
756{
757 free_percpu(dev->tstats);
758 free_netdev(dev);
759}
760
c3b89fbb
ED
761#define IPIP_FEATURES (NETIF_F_SG | \
762 NETIF_F_FRAGLIST | \
763 NETIF_F_HIGHDMA | \
764 NETIF_F_HW_CSUM)
765
1da177e4
LT
766static void ipip_tunnel_setup(struct net_device *dev)
767{
23a12b14 768 dev->netdev_ops = &ipip_netdev_ops;
3c97af99 769 dev->destructor = ipip_dev_free;
1da177e4
LT
770
771 dev->type = ARPHRD_TUNNEL;
772 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
46f25dff 773 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
1da177e4
LT
774 dev->flags = IFF_NOARP;
775 dev->iflink = 0;
776 dev->addr_len = 4;
0a826406 777 dev->features |= NETIF_F_NETNS_LOCAL;
153f0943 778 dev->features |= NETIF_F_LLTX;
28e72216 779 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
c3b89fbb
ED
780
781 dev->features |= IPIP_FEATURES;
782 dev->hw_features |= IPIP_FEATURES;
1da177e4
LT
783}
784
3c97af99 785static int ipip_tunnel_init(struct net_device *dev)
1da177e4 786{
23a12b14 787 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4
LT
788
789 tunnel->dev = dev;
1da177e4
LT
790
791 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
792 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
793
5533995b 794 ipip_tunnel_bind_dev(dev);
3c97af99
ED
795
796 dev->tstats = alloc_percpu(struct pcpu_tstats);
797 if (!dev->tstats)
798 return -ENOMEM;
799
800 return 0;
1da177e4
LT
801}
802
fada5636 803static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
1da177e4 804{
2941a486 805 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 806 struct iphdr *iph = &tunnel->parms.iph;
44d3c299 807 struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
1da177e4
LT
808
809 tunnel->dev = dev;
810 strcpy(tunnel->parms.name, dev->name);
811
812 iph->version = 4;
813 iph->protocol = IPPROTO_IPIP;
814 iph->ihl = 5;
815
fada5636
ED
816 dev->tstats = alloc_percpu(struct pcpu_tstats);
817 if (!dev->tstats)
818 return -ENOMEM;
819
1da177e4 820 dev_hold(dev);
cf778b00 821 rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
fada5636 822 return 0;
1da177e4
LT
823}
824
0974658d
ND
825static size_t ipip_get_size(const struct net_device *dev)
826{
827 return
828 /* IFLA_IPTUN_LINK */
829 nla_total_size(4) +
830 /* IFLA_IPTUN_LOCAL */
831 nla_total_size(4) +
832 /* IFLA_IPTUN_REMOTE */
833 nla_total_size(4) +
834 /* IFLA_IPTUN_TTL */
835 nla_total_size(1) +
836 /* IFLA_IPTUN_TOS */
837 nla_total_size(1) +
838 0;
839}
840
841static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
842{
843 struct ip_tunnel *tunnel = netdev_priv(dev);
844 struct ip_tunnel_parm *parm = &tunnel->parms;
845
846 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
847 nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
848 nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
849 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
850 nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos))
851 goto nla_put_failure;
852 return 0;
853
854nla_put_failure:
855 return -EMSGSIZE;
856}
857
858static struct rtnl_link_ops ipip_link_ops __read_mostly = {
859 .kind = "ipip",
860 .maxtype = IFLA_IPTUN_MAX,
861 .priv_size = sizeof(struct ip_tunnel),
862 .get_size = ipip_get_size,
863 .fill_info = ipip_fill_info,
864};
865
6dcd814b 866static struct xfrm_tunnel ipip_handler __read_mostly = {
1da177e4
LT
867 .handler = ipip_rcv,
868 .err_handler = ipip_err,
d2acc347 869 .priority = 1,
1da177e4
LT
870};
871
5747a1aa 872static const char banner[] __initconst =
1da177e4
LT
873 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
874
0694c4c0 875static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
44d3c299
PE
876{
877 int prio;
878
879 for (prio = 1; prio < 4; prio++) {
880 int h;
881 for (h = 0; h < HASH_SIZE; h++) {
b7285b79 882 struct ip_tunnel *t;
0694c4c0 883
b7285b79 884 t = rtnl_dereference(ipn->tunnels[prio][h]);
0694c4c0
ED
885 while (t != NULL) {
886 unregister_netdevice_queue(t->dev, head);
b7285b79 887 t = rtnl_dereference(t->next);
0694c4c0 888 }
44d3c299
PE
889 }
890 }
891}
892
2c8c1e72 893static int __net_init ipip_init_net(struct net *net)
10dc4c7b 894{
86de8a63 895 struct ipip_net *ipn = net_generic(net, ipip_net_id);
72b36015 896 struct ip_tunnel *t;
10dc4c7b 897 int err;
10dc4c7b 898
44d3c299
PE
899 ipn->tunnels[0] = ipn->tunnels_wc;
900 ipn->tunnels[1] = ipn->tunnels_l;
901 ipn->tunnels[2] = ipn->tunnels_r;
902 ipn->tunnels[3] = ipn->tunnels_r_l;
903
b9855c54
PE
904 ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
905 "tunl0",
906 ipip_tunnel_setup);
907 if (!ipn->fb_tunnel_dev) {
908 err = -ENOMEM;
909 goto err_alloc_dev;
910 }
be77e593 911 dev_net_set(ipn->fb_tunnel_dev, net);
b9855c54 912
fada5636
ED
913 err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
914 if (err)
915 goto err_reg_dev;
b9855c54
PE
916
917 if ((err = register_netdev(ipn->fb_tunnel_dev)))
918 goto err_reg_dev;
919
72b36015
TF
920 t = netdev_priv(ipn->fb_tunnel_dev);
921
922 strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
10dc4c7b
PE
923 return 0;
924
b9855c54 925err_reg_dev:
fada5636 926 ipip_dev_free(ipn->fb_tunnel_dev);
b9855c54
PE
927err_alloc_dev:
928 /* nothing */
10dc4c7b
PE
929 return err;
930}
931
2c8c1e72 932static void __net_exit ipip_exit_net(struct net *net)
10dc4c7b 933{
86de8a63 934 struct ipip_net *ipn = net_generic(net, ipip_net_id);
0694c4c0 935 LIST_HEAD(list);
10dc4c7b 936
b9855c54 937 rtnl_lock();
0694c4c0
ED
938 ipip_destroy_tunnels(ipn, &list);
939 unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
940 unregister_netdevice_many(&list);
b9855c54 941 rtnl_unlock();
10dc4c7b
PE
942}
943
944static struct pernet_operations ipip_net_ops = {
945 .init = ipip_init_net,
946 .exit = ipip_exit_net,
86de8a63
EB
947 .id = &ipip_net_id,
948 .size = sizeof(struct ipip_net),
10dc4c7b
PE
949};
950
1da177e4
LT
951static int __init ipip_init(void)
952{
953 int err;
954
955 printk(banner);
956
d5aa407f
AD
957 err = register_pernet_device(&ipip_net_ops);
958 if (err < 0)
959 return err;
960 err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
961 if (err < 0) {
058bd4d2 962 pr_info("%s: can't register tunnel\n", __func__);
0974658d 963 goto xfrm_tunnel_failed;
1da177e4 964 }
0974658d
ND
965 err = rtnl_link_register(&ipip_link_ops);
966 if (err < 0)
967 goto rtnl_link_failed;
968
969out:
1da177e4 970 return err;
0974658d
ND
971
972rtnl_link_failed:
973 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
974xfrm_tunnel_failed:
975 unregister_pernet_device(&ipip_net_ops);
976 goto out;
1da177e4
LT
977}
978
979static void __exit ipip_fini(void)
980{
0974658d 981 rtnl_link_unregister(&ipip_link_ops);
c0d56408 982 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
058bd4d2 983 pr_info("%s: can't deregister tunnel\n", __func__);
1da177e4 984
86de8a63 985 unregister_pernet_device(&ipip_net_ops);
1da177e4
LT
986}
987
988module_init(ipip_init);
989module_exit(ipip_fini);
990MODULE_LICENSE("GPL");
8909c9ad 991MODULE_ALIAS_NETDEV("tunl0");