]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/ipv4/route.c
ipv4: use separate genid for next hop exceptions
[mirror_ubuntu-bionic-kernel.git] / net / ipv4 / route.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
02c30a84 8 * Authors: Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 * Fixes:
15 * Alan Cox : Verify area fixes.
16 * Alan Cox : cli() protects routing changes
17 * Rui Oliveira : ICMP routing table updates
18 * (rco@di.uminho.pt) Routing table insertion and update
19 * Linus Torvalds : Rewrote bits to be sensible
20 * Alan Cox : Added BSD route gw semantics
e905a9ed 21 * Alan Cox : Super /proc >4K
1da177e4
LT
22 * Alan Cox : MTU in route table
23 * Alan Cox : MSS actually. Also added the window
24 * clamper.
25 * Sam Lantinga : Fixed route matching in rt_del()
26 * Alan Cox : Routing cache support.
27 * Alan Cox : Removed compatibility cruft.
28 * Alan Cox : RTF_REJECT support.
29 * Alan Cox : TCP irtt support.
30 * Jonathan Naylor : Added Metric support.
31 * Miquel van Smoorenburg : BSD API fixes.
32 * Miquel van Smoorenburg : Metrics.
33 * Alan Cox : Use __u32 properly
34 * Alan Cox : Aligned routing errors more closely with BSD
35 * our system is still very different.
36 * Alan Cox : Faster /proc handling
37 * Alexey Kuznetsov : Massive rework to support tree based routing,
38 * routing caches and better behaviour.
e905a9ed 39 *
1da177e4
LT
40 * Olaf Erb : irtt wasn't being copied right.
41 * Bjorn Ekwall : Kerneld route support.
42 * Alan Cox : Multicast fixed (I hope)
43 * Pavel Krauz : Limited broadcast fixed
44 * Mike McLagan : Routing by source
45 * Alexey Kuznetsov : End of old history. Split to fib.c and
46 * route.c and rewritten from scratch.
47 * Andi Kleen : Load-limit warning messages.
48 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
49 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
50 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
51 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
52 * Marc Boucher : routing by fwmark
53 * Robert Olsson : Added rt_cache statistics
54 * Arnaldo C. Melo : Convert proc stuff to seq_file
bb1d23b0 55 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
cef2685e
IS
56 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
57 * Ilia Sotnikov : Removed TOS from hash calculations
1da177e4
LT
58 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
afd46503
JP
65#define pr_fmt(fmt) "IPv4: " fmt
66
1da177e4
LT
67#include <linux/module.h>
68#include <asm/uaccess.h>
1da177e4
LT
69#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
1da177e4
LT
72#include <linux/mm.h>
73#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
82#include <linux/skbuff.h>
1da177e4
LT
83#include <linux/inetdevice.h>
84#include <linux/igmp.h>
85#include <linux/pkt_sched.h>
86#include <linux/mroute.h>
87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h>
1da177e4
LT
89#include <linux/rcupdate.h>
90#include <linux/times.h>
5a0e3ad6 91#include <linux/slab.h>
352e512c 92#include <net/dst.h>
457c4cbc 93#include <net/net_namespace.h>
1da177e4
LT
94#include <net/protocol.h>
95#include <net/ip.h>
96#include <net/route.h>
97#include <net/inetpeer.h>
98#include <net/sock.h>
99#include <net/ip_fib.h>
100#include <net/arp.h>
101#include <net/tcp.h>
102#include <net/icmp.h>
103#include <net/xfrm.h>
8d71740c 104#include <net/netevent.h>
63f3444f 105#include <net/rtnetlink.h>
1da177e4
LT
106#ifdef CONFIG_SYSCTL
107#include <linux/sysctl.h>
7426a564 108#include <linux/kmemleak.h>
1da177e4 109#endif
6e5714ea 110#include <net/secure_seq.h>
1da177e4 111
68a5e3dd 112#define RT_FL_TOS(oldflp4) \
f61759e6 113 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
1da177e4
LT
114
115#define IP_MAX_MTU 0xFFF0
116
117#define RT_GC_TIMEOUT (300*HZ)
118
1da177e4 119static int ip_rt_max_size;
817bc4db
SH
120static int ip_rt_redirect_number __read_mostly = 9;
121static int ip_rt_redirect_load __read_mostly = HZ / 50;
122static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
123static int ip_rt_error_cost __read_mostly = HZ;
124static int ip_rt_error_burst __read_mostly = 5 * HZ;
817bc4db
SH
125static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
126static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
127static int ip_rt_min_advmss __read_mostly = 256;
9f28a2fc 128
1da177e4
LT
129/*
130 * Interface to generic destination cache.
131 */
132
133static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 134static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
ebb762f2 135static unsigned int ipv4_mtu(const struct dst_entry *dst);
1da177e4
LT
136static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
137static void ipv4_link_failure(struct sk_buff *skb);
6700c270
DM
138static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
139 struct sk_buff *skb, u32 mtu);
140static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
141 struct sk_buff *skb);
caacf05e 142static void ipv4_dst_destroy(struct dst_entry *dst);
1da177e4 143
72cdd1d9
ED
144static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
145 int how)
146{
147}
1da177e4 148
62fa8a84
DM
149static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
150{
31248731
DM
151 WARN_ON(1);
152 return NULL;
62fa8a84
DM
153}
154
f894cbf8
DM
155static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
156 struct sk_buff *skb,
157 const void *daddr);
d3aaeb38 158
1da177e4
LT
159static struct dst_ops ipv4_dst_ops = {
160 .family = AF_INET,
09640e63 161 .protocol = cpu_to_be16(ETH_P_IP),
1da177e4 162 .check = ipv4_dst_check,
0dbaee3b 163 .default_advmss = ipv4_default_advmss,
ebb762f2 164 .mtu = ipv4_mtu,
62fa8a84 165 .cow_metrics = ipv4_cow_metrics,
caacf05e 166 .destroy = ipv4_dst_destroy,
1da177e4
LT
167 .ifdown = ipv4_dst_ifdown,
168 .negative_advice = ipv4_negative_advice,
169 .link_failure = ipv4_link_failure,
170 .update_pmtu = ip_rt_update_pmtu,
e47a185b 171 .redirect = ip_do_redirect,
1ac06e03 172 .local_out = __ip_local_out,
d3aaeb38 173 .neigh_lookup = ipv4_neigh_lookup,
1da177e4
LT
174};
175
176#define ECN_OR_COST(class) TC_PRIO_##class
177
4839c52b 178const __u8 ip_tos2prio[16] = {
1da177e4 179 TC_PRIO_BESTEFFORT,
4a2b9c37 180 ECN_OR_COST(BESTEFFORT),
1da177e4
LT
181 TC_PRIO_BESTEFFORT,
182 ECN_OR_COST(BESTEFFORT),
183 TC_PRIO_BULK,
184 ECN_OR_COST(BULK),
185 TC_PRIO_BULK,
186 ECN_OR_COST(BULK),
187 TC_PRIO_INTERACTIVE,
188 ECN_OR_COST(INTERACTIVE),
189 TC_PRIO_INTERACTIVE,
190 ECN_OR_COST(INTERACTIVE),
191 TC_PRIO_INTERACTIVE_BULK,
192 ECN_OR_COST(INTERACTIVE_BULK),
193 TC_PRIO_INTERACTIVE_BULK,
194 ECN_OR_COST(INTERACTIVE_BULK)
195};
d4a96865 196EXPORT_SYMBOL(ip_tos2prio);
1da177e4 197
2f970d83 198static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
27f39c73 199#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
1da177e4 200
1da177e4 201#ifdef CONFIG_PROC_FS
1da177e4
LT
202static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
203{
29e75252 204 if (*pos)
89aef892 205 return NULL;
29e75252 206 return SEQ_START_TOKEN;
1da177e4
LT
207}
208
209static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
210{
1da177e4 211 ++*pos;
89aef892 212 return NULL;
1da177e4
LT
213}
214
215static void rt_cache_seq_stop(struct seq_file *seq, void *v)
216{
1da177e4
LT
217}
218
219static int rt_cache_seq_show(struct seq_file *seq, void *v)
220{
221 if (v == SEQ_START_TOKEN)
222 seq_printf(seq, "%-127s\n",
223 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
224 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
225 "HHUptod\tSpecDst");
e905a9ed 226 return 0;
1da177e4
LT
227}
228
f690808e 229static const struct seq_operations rt_cache_seq_ops = {
1da177e4
LT
230 .start = rt_cache_seq_start,
231 .next = rt_cache_seq_next,
232 .stop = rt_cache_seq_stop,
233 .show = rt_cache_seq_show,
234};
235
236static int rt_cache_seq_open(struct inode *inode, struct file *file)
237{
89aef892 238 return seq_open(file, &rt_cache_seq_ops);
1da177e4
LT
239}
240
9a32144e 241static const struct file_operations rt_cache_seq_fops = {
1da177e4
LT
242 .owner = THIS_MODULE,
243 .open = rt_cache_seq_open,
244 .read = seq_read,
245 .llseek = seq_lseek,
89aef892 246 .release = seq_release,
1da177e4
LT
247};
248
249
250static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
251{
252 int cpu;
253
254 if (*pos == 0)
255 return SEQ_START_TOKEN;
256
0f23174a 257 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
1da177e4
LT
258 if (!cpu_possible(cpu))
259 continue;
260 *pos = cpu+1;
2f970d83 261 return &per_cpu(rt_cache_stat, cpu);
1da177e4
LT
262 }
263 return NULL;
264}
265
266static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
267{
268 int cpu;
269
0f23174a 270 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
1da177e4
LT
271 if (!cpu_possible(cpu))
272 continue;
273 *pos = cpu+1;
2f970d83 274 return &per_cpu(rt_cache_stat, cpu);
1da177e4
LT
275 }
276 return NULL;
e905a9ed 277
1da177e4
LT
278}
279
280static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
281{
282
283}
284
285static int rt_cpu_seq_show(struct seq_file *seq, void *v)
286{
287 struct rt_cache_stat *st = v;
288
289 if (v == SEQ_START_TOKEN) {
5bec0039 290 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
1da177e4
LT
291 return 0;
292 }
e905a9ed 293
1da177e4
LT
294 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
295 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
fc66f95c 296 dst_entries_get_slow(&ipv4_dst_ops),
1da177e4
LT
297 st->in_hit,
298 st->in_slow_tot,
299 st->in_slow_mc,
300 st->in_no_route,
301 st->in_brd,
302 st->in_martian_dst,
303 st->in_martian_src,
304
305 st->out_hit,
306 st->out_slow_tot,
e905a9ed 307 st->out_slow_mc,
1da177e4
LT
308
309 st->gc_total,
310 st->gc_ignored,
311 st->gc_goal_miss,
312 st->gc_dst_overflow,
313 st->in_hlist_search,
314 st->out_hlist_search
315 );
316 return 0;
317}
318
f690808e 319static const struct seq_operations rt_cpu_seq_ops = {
1da177e4
LT
320 .start = rt_cpu_seq_start,
321 .next = rt_cpu_seq_next,
322 .stop = rt_cpu_seq_stop,
323 .show = rt_cpu_seq_show,
324};
325
326
327static int rt_cpu_seq_open(struct inode *inode, struct file *file)
328{
329 return seq_open(file, &rt_cpu_seq_ops);
330}
331
9a32144e 332static const struct file_operations rt_cpu_seq_fops = {
1da177e4
LT
333 .owner = THIS_MODULE,
334 .open = rt_cpu_seq_open,
335 .read = seq_read,
336 .llseek = seq_lseek,
337 .release = seq_release,
338};
339
c7066f70 340#ifdef CONFIG_IP_ROUTE_CLASSID
a661c419 341static int rt_acct_proc_show(struct seq_file *m, void *v)
78c686e9 342{
a661c419
AD
343 struct ip_rt_acct *dst, *src;
344 unsigned int i, j;
345
346 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
347 if (!dst)
348 return -ENOMEM;
349
350 for_each_possible_cpu(i) {
351 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
352 for (j = 0; j < 256; j++) {
353 dst[j].o_bytes += src[j].o_bytes;
354 dst[j].o_packets += src[j].o_packets;
355 dst[j].i_bytes += src[j].i_bytes;
356 dst[j].i_packets += src[j].i_packets;
357 }
78c686e9
PE
358 }
359
a661c419
AD
360 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
361 kfree(dst);
362 return 0;
363}
78c686e9 364
a661c419
AD
365static int rt_acct_proc_open(struct inode *inode, struct file *file)
366{
367 return single_open(file, rt_acct_proc_show, NULL);
78c686e9 368}
a661c419
AD
369
370static const struct file_operations rt_acct_proc_fops = {
371 .owner = THIS_MODULE,
372 .open = rt_acct_proc_open,
373 .read = seq_read,
374 .llseek = seq_lseek,
375 .release = single_release,
376};
78c686e9 377#endif
107f1634 378
73b38711 379static int __net_init ip_rt_do_proc_init(struct net *net)
107f1634
PE
380{
381 struct proc_dir_entry *pde;
382
d4beaa66
G
383 pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
384 &rt_cache_seq_fops);
107f1634
PE
385 if (!pde)
386 goto err1;
387
77020720
WC
388 pde = proc_create("rt_cache", S_IRUGO,
389 net->proc_net_stat, &rt_cpu_seq_fops);
107f1634
PE
390 if (!pde)
391 goto err2;
392
c7066f70 393#ifdef CONFIG_IP_ROUTE_CLASSID
a661c419 394 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
107f1634
PE
395 if (!pde)
396 goto err3;
397#endif
398 return 0;
399
c7066f70 400#ifdef CONFIG_IP_ROUTE_CLASSID
107f1634
PE
401err3:
402 remove_proc_entry("rt_cache", net->proc_net_stat);
403#endif
404err2:
405 remove_proc_entry("rt_cache", net->proc_net);
406err1:
407 return -ENOMEM;
408}
73b38711
DL
409
410static void __net_exit ip_rt_do_proc_exit(struct net *net)
411{
412 remove_proc_entry("rt_cache", net->proc_net_stat);
413 remove_proc_entry("rt_cache", net->proc_net);
c7066f70 414#ifdef CONFIG_IP_ROUTE_CLASSID
73b38711 415 remove_proc_entry("rt_acct", net->proc_net);
0a931acf 416#endif
73b38711
DL
417}
418
419static struct pernet_operations ip_rt_proc_ops __net_initdata = {
420 .init = ip_rt_do_proc_init,
421 .exit = ip_rt_do_proc_exit,
422};
423
424static int __init ip_rt_proc_init(void)
425{
426 return register_pernet_subsys(&ip_rt_proc_ops);
427}
428
107f1634 429#else
73b38711 430static inline int ip_rt_proc_init(void)
107f1634
PE
431{
432 return 0;
433}
1da177e4 434#endif /* CONFIG_PROC_FS */
e905a9ed 435
4331debc 436static inline bool rt_is_expired(const struct rtable *rth)
e84f84f2 437{
d8d1f30b 438 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
e84f84f2
DL
439}
440
4ccfe6d4 441void rt_cache_flush(struct net *net)
1da177e4 442{
b42664f8 443 rt_genid_bump(net);
98376387
ED
444}
445
f894cbf8
DM
446static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
447 struct sk_buff *skb,
448 const void *daddr)
3769cffb 449{
d3aaeb38
DM
450 struct net_device *dev = dst->dev;
451 const __be32 *pkey = daddr;
39232973 452 const struct rtable *rt;
3769cffb
DM
453 struct neighbour *n;
454
39232973 455 rt = (const struct rtable *) dst;
a263b309 456 if (rt->rt_gateway)
39232973 457 pkey = (const __be32 *) &rt->rt_gateway;
f894cbf8
DM
458 else if (skb)
459 pkey = &ip_hdr(skb)->daddr;
d3aaeb38 460
80703d26 461 n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
d3aaeb38
DM
462 if (n)
463 return n;
32092ecf 464 return neigh_create(&arp_tbl, pkey, dev);
d3aaeb38
DM
465}
466
1da177e4
LT
467/*
468 * Peer allocation may fail only in serious out-of-memory conditions. However
469 * we still can generate some output.
470 * Random ID selection looks a bit dangerous because we have no chances to
471 * select ID being unique in a reasonable period of time.
472 * But broken packet identifier may be better than no packet at all.
473 */
474static void ip_select_fb_ident(struct iphdr *iph)
475{
476 static DEFINE_SPINLOCK(ip_fb_id_lock);
477 static u32 ip_fallback_id;
478 u32 salt;
479
480 spin_lock_bh(&ip_fb_id_lock);
e448515c 481 salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
1da177e4
LT
482 iph->id = htons(salt & 0xFFFF);
483 ip_fallback_id = salt;
484 spin_unlock_bh(&ip_fb_id_lock);
485}
486
487void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
488{
1d861aa4
DM
489 struct net *net = dev_net(dst->dev);
490 struct inet_peer *peer;
1da177e4 491
1d861aa4
DM
492 peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
493 if (peer) {
494 iph->id = htons(inet_getid(peer, more));
495 inet_putpeer(peer);
496 return;
497 }
1da177e4
LT
498
499 ip_select_fb_ident(iph);
500}
4bc2f18b 501EXPORT_SYMBOL(__ip_select_ident);
1da177e4 502
5abf7f7e 503static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
4895c771
DM
504 const struct iphdr *iph,
505 int oif, u8 tos,
506 u8 prot, u32 mark, int flow_flags)
507{
508 if (sk) {
509 const struct inet_sock *inet = inet_sk(sk);
510
511 oif = sk->sk_bound_dev_if;
512 mark = sk->sk_mark;
513 tos = RT_CONN_FLAGS(sk);
514 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
515 }
516 flowi4_init_output(fl4, oif, mark, tos,
517 RT_SCOPE_UNIVERSE, prot,
518 flow_flags,
519 iph->daddr, iph->saddr, 0, 0);
520}
521
5abf7f7e
ED
522static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
523 const struct sock *sk)
4895c771
DM
524{
525 const struct iphdr *iph = ip_hdr(skb);
526 int oif = skb->dev->ifindex;
527 u8 tos = RT_TOS(iph->tos);
528 u8 prot = iph->protocol;
529 u32 mark = skb->mark;
530
531 __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
532}
533
5abf7f7e 534static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
4895c771
DM
535{
536 const struct inet_sock *inet = inet_sk(sk);
5abf7f7e 537 const struct ip_options_rcu *inet_opt;
4895c771
DM
538 __be32 daddr = inet->inet_daddr;
539
540 rcu_read_lock();
541 inet_opt = rcu_dereference(inet->inet_opt);
542 if (inet_opt && inet_opt->opt.srr)
543 daddr = inet_opt->opt.faddr;
544 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
545 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
546 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
547 inet_sk_flowi_flags(sk),
548 daddr, inet->inet_saddr, 0, 0);
549 rcu_read_unlock();
550}
551
5abf7f7e
ED
552static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
553 const struct sk_buff *skb)
4895c771
DM
554{
555 if (skb)
556 build_skb_flow_key(fl4, skb, sk);
557 else
558 build_sk_flow_key(fl4, sk);
559}
560
c5038a83
DM
561static inline void rt_free(struct rtable *rt)
562{
563 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
564}
565
566static DEFINE_SPINLOCK(fnhe_lock);
4895c771 567
aee06da6 568static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
4895c771
DM
569{
570 struct fib_nh_exception *fnhe, *oldest;
c5038a83 571 struct rtable *orig;
4895c771
DM
572
573 oldest = rcu_dereference(hash->chain);
574 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
575 fnhe = rcu_dereference(fnhe->fnhe_next)) {
576 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
577 oldest = fnhe;
578 }
c5038a83
DM
579 orig = rcu_dereference(oldest->fnhe_rth);
580 if (orig) {
581 RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
582 rt_free(orig);
583 }
4895c771
DM
584 return oldest;
585}
586
d3a25c98
DM
587static inline u32 fnhe_hashfun(__be32 daddr)
588{
589 u32 hval;
590
591 hval = (__force u32) daddr;
592 hval ^= (hval >> 11) ^ (hval >> 22);
593
594 return hval & (FNHE_HASH_SIZE - 1);
595}
596
387aa65a
TT
597static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
598{
599 rt->rt_pmtu = fnhe->fnhe_pmtu;
600 rt->dst.expires = fnhe->fnhe_expires;
601
602 if (fnhe->fnhe_gw) {
603 rt->rt_flags |= RTCF_REDIRECTED;
604 rt->rt_gateway = fnhe->fnhe_gw;
605 rt->rt_uses_gateway = 1;
606 }
607}
608
aee06da6
JA
609static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
610 u32 pmtu, unsigned long expires)
4895c771 611{
aee06da6 612 struct fnhe_hash_bucket *hash;
4895c771 613 struct fib_nh_exception *fnhe;
387aa65a
TT
614 struct rtable *rt;
615 unsigned int i;
4895c771 616 int depth;
aee06da6
JA
617 u32 hval = fnhe_hashfun(daddr);
618
c5038a83 619 spin_lock_bh(&fnhe_lock);
4895c771 620
aee06da6 621 hash = nh->nh_exceptions;
4895c771 622 if (!hash) {
aee06da6 623 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
4895c771 624 if (!hash)
aee06da6
JA
625 goto out_unlock;
626 nh->nh_exceptions = hash;
4895c771
DM
627 }
628
4895c771
DM
629 hash += hval;
630
631 depth = 0;
632 for (fnhe = rcu_dereference(hash->chain); fnhe;
633 fnhe = rcu_dereference(fnhe->fnhe_next)) {
634 if (fnhe->fnhe_daddr == daddr)
aee06da6 635 break;
4895c771
DM
636 depth++;
637 }
638
aee06da6
JA
639 if (fnhe) {
640 if (gw)
641 fnhe->fnhe_gw = gw;
642 if (pmtu) {
643 fnhe->fnhe_pmtu = pmtu;
387aa65a 644 fnhe->fnhe_expires = max(1UL, expires);
aee06da6 645 }
387aa65a
TT
646 /* Update all cached dsts too */
647 rt = rcu_dereference(fnhe->fnhe_rth);
648 if (rt)
649 fill_route_from_fnhe(rt, fnhe);
aee06da6
JA
650 } else {
651 if (depth > FNHE_RECLAIM_DEPTH)
652 fnhe = fnhe_oldest(hash);
653 else {
654 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
655 if (!fnhe)
656 goto out_unlock;
657
658 fnhe->fnhe_next = hash->chain;
659 rcu_assign_pointer(hash->chain, fnhe);
660 }
5aad1de5 661 fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
aee06da6
JA
662 fnhe->fnhe_daddr = daddr;
663 fnhe->fnhe_gw = gw;
664 fnhe->fnhe_pmtu = pmtu;
665 fnhe->fnhe_expires = expires;
387aa65a
TT
666
667 /* Exception created; mark the cached routes for the nexthop
668 * stale, so anyone caching it rechecks if this exception
669 * applies to them.
670 */
671 for_each_possible_cpu(i) {
672 struct rtable __rcu **prt;
673 prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
674 rt = rcu_dereference(*prt);
675 if (rt)
676 rt->dst.obsolete = DST_OBSOLETE_KILL;
677 }
4895c771 678 }
4895c771 679
4895c771 680 fnhe->fnhe_stamp = jiffies;
aee06da6
JA
681
682out_unlock:
c5038a83 683 spin_unlock_bh(&fnhe_lock);
aee06da6 684 return;
4895c771
DM
685}
686
ceb33206
DM
687static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
688 bool kill_route)
1da177e4 689{
e47a185b 690 __be32 new_gw = icmp_hdr(skb)->un.gateway;
94206125 691 __be32 old_gw = ip_hdr(skb)->saddr;
e47a185b 692 struct net_device *dev = skb->dev;
e47a185b 693 struct in_device *in_dev;
4895c771 694 struct fib_result res;
e47a185b 695 struct neighbour *n;
317805b8 696 struct net *net;
1da177e4 697
94206125
DM
698 switch (icmp_hdr(skb)->code & 7) {
699 case ICMP_REDIR_NET:
700 case ICMP_REDIR_NETTOS:
701 case ICMP_REDIR_HOST:
702 case ICMP_REDIR_HOSTTOS:
703 break;
704
705 default:
706 return;
707 }
708
e47a185b
DM
709 if (rt->rt_gateway != old_gw)
710 return;
711
712 in_dev = __in_dev_get_rcu(dev);
713 if (!in_dev)
714 return;
715
c346dca1 716 net = dev_net(dev);
9d4fb27d
JP
717 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
718 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
719 ipv4_is_zeronet(new_gw))
1da177e4
LT
720 goto reject_redirect;
721
722 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
723 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
724 goto reject_redirect;
725 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
726 goto reject_redirect;
727 } else {
317805b8 728 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
1da177e4
LT
729 goto reject_redirect;
730 }
731
4895c771 732 n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
e47a185b
DM
733 if (n) {
734 if (!(n->nud_state & NUD_VALID)) {
735 neigh_event_send(n, NULL);
736 } else {
4895c771
DM
737 if (fib_lookup(net, fl4, &res) == 0) {
738 struct fib_nh *nh = &FIB_RES_NH(res);
4895c771 739
aee06da6
JA
740 update_or_create_fnhe(nh, fl4->daddr, new_gw,
741 0, 0);
4895c771 742 }
ceb33206
DM
743 if (kill_route)
744 rt->dst.obsolete = DST_OBSOLETE_KILL;
e47a185b
DM
745 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
746 }
747 neigh_release(n);
748 }
749 return;
750
751reject_redirect:
752#ifdef CONFIG_IP_ROUTE_VERBOSE
99ee038d
DM
753 if (IN_DEV_LOG_MARTIANS(in_dev)) {
754 const struct iphdr *iph = (const struct iphdr *) skb->data;
755 __be32 daddr = iph->daddr;
756 __be32 saddr = iph->saddr;
757
e47a185b
DM
758 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
759 " Advised path = %pI4 -> %pI4\n",
760 &old_gw, dev->name, &new_gw,
761 &saddr, &daddr);
99ee038d 762 }
e47a185b
DM
763#endif
764 ;
765}
766
4895c771
DM
767static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
768{
769 struct rtable *rt;
770 struct flowi4 fl4;
771
772 rt = (struct rtable *) dst;
773
774 ip_rt_build_flow_key(&fl4, sk, skb);
ceb33206 775 __ip_do_redirect(rt, skb, &fl4, true);
4895c771
DM
776}
777
1da177e4
LT
778static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
779{
ee6b9673 780 struct rtable *rt = (struct rtable *)dst;
1da177e4
LT
781 struct dst_entry *ret = dst;
782
783 if (rt) {
d11a4dc1 784 if (dst->obsolete > 0) {
1da177e4
LT
785 ip_rt_put(rt);
786 ret = NULL;
5943634f
DM
787 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
788 rt->dst.expires) {
89aef892 789 ip_rt_put(rt);
1da177e4
LT
790 ret = NULL;
791 }
792 }
793 return ret;
794}
795
796/*
797 * Algorithm:
798 * 1. The first ip_rt_redirect_number redirects are sent
799 * with exponential backoff, then we stop sending them at all,
800 * assuming that the host ignores our redirects.
801 * 2. If we did not see packets requiring redirects
802 * during ip_rt_redirect_silence, we assume that the host
803 * forgot redirected route and start to send redirects again.
804 *
805 * This algorithm is much cheaper and more intelligent than dumb load limiting
806 * in icmp.c.
807 *
808 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
809 * and "frag. need" (breaks PMTU discovery) in icmp.c.
810 */
811
812void ip_rt_send_redirect(struct sk_buff *skb)
813{
511c3f92 814 struct rtable *rt = skb_rtable(skb);
30038fc6 815 struct in_device *in_dev;
92d86829 816 struct inet_peer *peer;
1d861aa4 817 struct net *net;
30038fc6 818 int log_martians;
1da177e4 819
30038fc6 820 rcu_read_lock();
d8d1f30b 821 in_dev = __in_dev_get_rcu(rt->dst.dev);
30038fc6
ED
822 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
823 rcu_read_unlock();
1da177e4 824 return;
30038fc6
ED
825 }
826 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
827 rcu_read_unlock();
1da177e4 828
1d861aa4
DM
829 net = dev_net(rt->dst.dev);
830 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
92d86829 831 if (!peer) {
e81da0e1
JA
832 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
833 rt_nexthop(rt, ip_hdr(skb)->daddr));
92d86829
DM
834 return;
835 }
836
1da177e4
LT
837 /* No redirected packets during ip_rt_redirect_silence;
838 * reset the algorithm.
839 */
92d86829
DM
840 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
841 peer->rate_tokens = 0;
1da177e4
LT
842
843 /* Too many ignored redirects; do not send anything
d8d1f30b 844 * set dst.rate_last to the last seen redirected packet.
1da177e4 845 */
92d86829
DM
846 if (peer->rate_tokens >= ip_rt_redirect_number) {
847 peer->rate_last = jiffies;
1d861aa4 848 goto out_put_peer;
1da177e4
LT
849 }
850
851 /* Check for load limit; set rate_last to the latest sent
852 * redirect.
853 */
92d86829 854 if (peer->rate_tokens == 0 ||
14fb8a76 855 time_after(jiffies,
92d86829
DM
856 (peer->rate_last +
857 (ip_rt_redirect_load << peer->rate_tokens)))) {
e81da0e1
JA
858 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
859
860 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
92d86829
DM
861 peer->rate_last = jiffies;
862 ++peer->rate_tokens;
1da177e4 863#ifdef CONFIG_IP_ROUTE_VERBOSE
30038fc6 864 if (log_martians &&
e87cc472
JP
865 peer->rate_tokens == ip_rt_redirect_number)
866 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
92101b3b 867 &ip_hdr(skb)->saddr, inet_iif(skb),
e81da0e1 868 &ip_hdr(skb)->daddr, &gw);
1da177e4
LT
869#endif
870 }
1d861aa4
DM
871out_put_peer:
872 inet_putpeer(peer);
1da177e4
LT
873}
874
875static int ip_error(struct sk_buff *skb)
876{
251da413 877 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
511c3f92 878 struct rtable *rt = skb_rtable(skb);
92d86829 879 struct inet_peer *peer;
1da177e4 880 unsigned long now;
251da413 881 struct net *net;
92d86829 882 bool send;
1da177e4
LT
883 int code;
884
251da413
DM
885 net = dev_net(rt->dst.dev);
886 if (!IN_DEV_FORWARD(in_dev)) {
887 switch (rt->dst.error) {
888 case EHOSTUNREACH:
889 IP_INC_STATS_BH(net, IPSTATS_MIB_INADDRERRORS);
890 break;
891
892 case ENETUNREACH:
893 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
894 break;
895 }
896 goto out;
897 }
898
d8d1f30b 899 switch (rt->dst.error) {
4500ebf8
JP
900 case EINVAL:
901 default:
902 goto out;
903 case EHOSTUNREACH:
904 code = ICMP_HOST_UNREACH;
905 break;
906 case ENETUNREACH:
907 code = ICMP_NET_UNREACH;
251da413 908 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
4500ebf8
JP
909 break;
910 case EACCES:
911 code = ICMP_PKT_FILTERED;
912 break;
1da177e4
LT
913 }
914
1d861aa4 915 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
92d86829
DM
916
917 send = true;
918 if (peer) {
919 now = jiffies;
920 peer->rate_tokens += now - peer->rate_last;
921 if (peer->rate_tokens > ip_rt_error_burst)
922 peer->rate_tokens = ip_rt_error_burst;
923 peer->rate_last = now;
924 if (peer->rate_tokens >= ip_rt_error_cost)
925 peer->rate_tokens -= ip_rt_error_cost;
926 else
927 send = false;
1d861aa4 928 inet_putpeer(peer);
1da177e4 929 }
92d86829
DM
930 if (send)
931 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
1da177e4
LT
932
933out: kfree_skb(skb);
934 return 0;
e905a9ed 935}
1da177e4 936
d851c12b 937static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
1da177e4 938{
d851c12b 939 struct dst_entry *dst = &rt->dst;
4895c771 940 struct fib_result res;
2c8cec5c 941
fa1e492a
SK
942 if (dst_metric_locked(dst, RTAX_MTU))
943 return;
944
7f92d334
SK
945 if (dst->dev->mtu < mtu)
946 return;
947
5943634f
DM
948 if (mtu < ip_rt_min_pmtu)
949 mtu = ip_rt_min_pmtu;
2c8cec5c 950
f016229e
TT
951 if (rt->rt_pmtu == mtu &&
952 time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
953 return;
954
c5ae7d41 955 rcu_read_lock();
d851c12b 956 if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) {
4895c771 957 struct fib_nh *nh = &FIB_RES_NH(res);
4895c771 958
aee06da6
JA
959 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
960 jiffies + ip_rt_mtu_expires);
4895c771 961 }
c5ae7d41 962 rcu_read_unlock();
1da177e4
LT
963}
964
4895c771
DM
965static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
966 struct sk_buff *skb, u32 mtu)
967{
968 struct rtable *rt = (struct rtable *) dst;
969 struct flowi4 fl4;
970
971 ip_rt_build_flow_key(&fl4, sk, skb);
d851c12b 972 __ip_rt_update_pmtu(rt, &fl4, mtu);
4895c771
DM
973}
974
36393395
DM
975void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
976 int oif, u32 mark, u8 protocol, int flow_flags)
977{
4895c771 978 const struct iphdr *iph = (const struct iphdr *) skb->data;
36393395
DM
979 struct flowi4 fl4;
980 struct rtable *rt;
981
4895c771
DM
982 __build_flow_key(&fl4, NULL, iph, oif,
983 RT_TOS(iph->tos), protocol, mark, flow_flags);
36393395
DM
984 rt = __ip_route_output_key(net, &fl4);
985 if (!IS_ERR(rt)) {
4895c771 986 __ip_rt_update_pmtu(rt, &fl4, mtu);
36393395
DM
987 ip_rt_put(rt);
988 }
989}
990EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
991
9cb3a50c 992static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
36393395 993{
4895c771
DM
994 const struct iphdr *iph = (const struct iphdr *) skb->data;
995 struct flowi4 fl4;
996 struct rtable *rt;
36393395 997
4895c771
DM
998 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
999 rt = __ip_route_output_key(sock_net(sk), &fl4);
1000 if (!IS_ERR(rt)) {
1001 __ip_rt_update_pmtu(rt, &fl4, mtu);
1002 ip_rt_put(rt);
1003 }
36393395 1004}
9cb3a50c
SK
1005
1006void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1007{
1008 const struct iphdr *iph = (const struct iphdr *) skb->data;
1009 struct flowi4 fl4;
1010 struct rtable *rt;
1011 struct dst_entry *dst;
b44108db 1012 bool new = false;
9cb3a50c
SK
1013
1014 bh_lock_sock(sk);
1015 rt = (struct rtable *) __sk_dst_get(sk);
1016
1017 if (sock_owned_by_user(sk) || !rt) {
1018 __ipv4_sk_update_pmtu(skb, sk, mtu);
1019 goto out;
1020 }
1021
1022 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1023
1024 if (!__sk_dst_check(sk, 0)) {
1025 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1026 if (IS_ERR(rt))
1027 goto out;
b44108db
SK
1028
1029 new = true;
9cb3a50c
SK
1030 }
1031
1032 __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
1033
1034 dst = dst_check(&rt->dst, 0);
1035 if (!dst) {
b44108db
SK
1036 if (new)
1037 dst_release(&rt->dst);
1038
9cb3a50c
SK
1039 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1040 if (IS_ERR(rt))
1041 goto out;
1042
b44108db 1043 new = true;
9cb3a50c
SK
1044 }
1045
b44108db
SK
1046 if (new)
1047 __sk_dst_set(sk, &rt->dst);
9cb3a50c
SK
1048
1049out:
1050 bh_unlock_sock(sk);
1051}
36393395 1052EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
f39925db 1053
b42597e2
DM
1054void ipv4_redirect(struct sk_buff *skb, struct net *net,
1055 int oif, u32 mark, u8 protocol, int flow_flags)
1056{
4895c771 1057 const struct iphdr *iph = (const struct iphdr *) skb->data;
b42597e2
DM
1058 struct flowi4 fl4;
1059 struct rtable *rt;
1060
4895c771
DM
1061 __build_flow_key(&fl4, NULL, iph, oif,
1062 RT_TOS(iph->tos), protocol, mark, flow_flags);
b42597e2
DM
1063 rt = __ip_route_output_key(net, &fl4);
1064 if (!IS_ERR(rt)) {
ceb33206 1065 __ip_do_redirect(rt, skb, &fl4, false);
b42597e2
DM
1066 ip_rt_put(rt);
1067 }
1068}
1069EXPORT_SYMBOL_GPL(ipv4_redirect);
1070
1071void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1072{
4895c771
DM
1073 const struct iphdr *iph = (const struct iphdr *) skb->data;
1074 struct flowi4 fl4;
1075 struct rtable *rt;
b42597e2 1076
4895c771
DM
1077 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1078 rt = __ip_route_output_key(sock_net(sk), &fl4);
1079 if (!IS_ERR(rt)) {
ceb33206 1080 __ip_do_redirect(rt, skb, &fl4, false);
4895c771
DM
1081 ip_rt_put(rt);
1082 }
b42597e2
DM
1083}
1084EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1085
efbc368d
DM
1086static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1087{
1088 struct rtable *rt = (struct rtable *) dst;
1089
ceb33206
DM
1090 /* All IPV4 dsts are created with ->obsolete set to the value
1091 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1092 * into this function always.
1093 *
387aa65a
TT
1094 * When a PMTU/redirect information update invalidates a route,
1095 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
1096 * DST_OBSOLETE_DEAD by dst_free().
ceb33206 1097 */
387aa65a 1098 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
efbc368d 1099 return NULL;
d11a4dc1 1100 return dst;
1da177e4
LT
1101}
1102
1da177e4
LT
1103static void ipv4_link_failure(struct sk_buff *skb)
1104{
1105 struct rtable *rt;
1106
1107 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1108
511c3f92 1109 rt = skb_rtable(skb);
5943634f
DM
1110 if (rt)
1111 dst_set_expires(&rt->dst, 0);
1da177e4
LT
1112}
1113
1114static int ip_rt_bug(struct sk_buff *skb)
1115{
91df42be
JP
1116 pr_debug("%s: %pI4 -> %pI4, %s\n",
1117 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1118 skb->dev ? skb->dev->name : "?");
1da177e4 1119 kfree_skb(skb);
c378a9c0 1120 WARN_ON(1);
1da177e4
LT
1121 return 0;
1122}
1123
1124/*
1125 We do not cache source address of outgoing interface,
1126 because it is used only by IP RR, TS and SRR options,
1127 so that it out of fast path.
1128
1129 BTW remember: "addr" is allowed to be not aligned
1130 in IP options!
1131 */
1132
8e36360a 1133void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
1da177e4 1134{
a61ced5d 1135 __be32 src;
1da177e4 1136
c7537967 1137 if (rt_is_output_route(rt))
c5be24ff 1138 src = ip_hdr(skb)->saddr;
ebc0ffae 1139 else {
8e36360a
DM
1140 struct fib_result res;
1141 struct flowi4 fl4;
1142 struct iphdr *iph;
1143
1144 iph = ip_hdr(skb);
1145
1146 memset(&fl4, 0, sizeof(fl4));
1147 fl4.daddr = iph->daddr;
1148 fl4.saddr = iph->saddr;
b0fe4a31 1149 fl4.flowi4_tos = RT_TOS(iph->tos);
8e36360a
DM
1150 fl4.flowi4_oif = rt->dst.dev->ifindex;
1151 fl4.flowi4_iif = skb->dev->ifindex;
1152 fl4.flowi4_mark = skb->mark;
5e2b61f7 1153
ebc0ffae 1154 rcu_read_lock();
68a5e3dd 1155 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
436c3b66 1156 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
ebc0ffae 1157 else
f8126f1d
DM
1158 src = inet_select_addr(rt->dst.dev,
1159 rt_nexthop(rt, iph->daddr),
1160 RT_SCOPE_UNIVERSE);
ebc0ffae
ED
1161 rcu_read_unlock();
1162 }
1da177e4
LT
1163 memcpy(addr, &src, 4);
1164}
1165
c7066f70 1166#ifdef CONFIG_IP_ROUTE_CLASSID
1da177e4
LT
1167static void set_class_tag(struct rtable *rt, u32 tag)
1168{
d8d1f30b
CG
1169 if (!(rt->dst.tclassid & 0xFFFF))
1170 rt->dst.tclassid |= tag & 0xFFFF;
1171 if (!(rt->dst.tclassid & 0xFFFF0000))
1172 rt->dst.tclassid |= tag & 0xFFFF0000;
1da177e4
LT
1173}
1174#endif
1175
0dbaee3b
DM
1176static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1177{
1178 unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
1179
1180 if (advmss == 0) {
1181 advmss = max_t(unsigned int, dst->dev->mtu - 40,
1182 ip_rt_min_advmss);
1183 if (advmss > 65535 - 40)
1184 advmss = 65535 - 40;
1185 }
1186 return advmss;
1187}
1188
ebb762f2 1189static unsigned int ipv4_mtu(const struct dst_entry *dst)
d33e4553 1190{
261663b0 1191 const struct rtable *rt = (const struct rtable *) dst;
5943634f
DM
1192 unsigned int mtu = rt->rt_pmtu;
1193
98d75c37 1194 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
5943634f 1195 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 1196
38d523e2 1197 if (mtu)
618f9bc7
SK
1198 return mtu;
1199
1200 mtu = dst->dev->mtu;
d33e4553
DM
1201
1202 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
155e8336 1203 if (rt->rt_uses_gateway && mtu > 576)
d33e4553
DM
1204 mtu = 576;
1205 }
1206
1207 if (mtu > IP_MAX_MTU)
1208 mtu = IP_MAX_MTU;
1209
1210 return mtu;
1211}
1212
f2bb4bed 1213static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
4895c771
DM
1214{
1215 struct fnhe_hash_bucket *hash = nh->nh_exceptions;
1216 struct fib_nh_exception *fnhe;
1217 u32 hval;
1218
f2bb4bed
DM
1219 if (!hash)
1220 return NULL;
1221
d3a25c98 1222 hval = fnhe_hashfun(daddr);
4895c771
DM
1223
1224 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1225 fnhe = rcu_dereference(fnhe->fnhe_next)) {
f2bb4bed
DM
1226 if (fnhe->fnhe_daddr == daddr)
1227 return fnhe;
1228 }
1229 return NULL;
1230}
aee06da6 1231
caacf05e 1232static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
f2bb4bed
DM
1233 __be32 daddr)
1234{
caacf05e
DM
1235 bool ret = false;
1236
c5038a83 1237 spin_lock_bh(&fnhe_lock);
f2bb4bed 1238
c5038a83 1239 if (daddr == fnhe->fnhe_daddr) {
5aad1de5 1240 int genid = fnhe_genid(dev_net(rt->dst.dev));
13d82bf5 1241 struct rtable *orig = rcu_dereference(fnhe->fnhe_rth);
5aad1de5
TT
1242
1243 if (fnhe->fnhe_genid != genid) {
1244 fnhe->fnhe_genid = genid;
13d82bf5
SK
1245 fnhe->fnhe_gw = 0;
1246 fnhe->fnhe_pmtu = 0;
1247 fnhe->fnhe_expires = 0;
1248 }
387aa65a
TT
1249 fill_route_from_fnhe(rt, fnhe);
1250 if (!rt->rt_gateway)
155e8336 1251 rt->rt_gateway = daddr;
f2bb4bed 1252
c5038a83
DM
1253 rcu_assign_pointer(fnhe->fnhe_rth, rt);
1254 if (orig)
1255 rt_free(orig);
1256
1257 fnhe->fnhe_stamp = jiffies;
caacf05e 1258 ret = true;
c5038a83
DM
1259 }
1260 spin_unlock_bh(&fnhe_lock);
caacf05e
DM
1261
1262 return ret;
54764bb6
ED
1263}
1264
caacf05e 1265static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
f2bb4bed 1266{
d26b3a7c 1267 struct rtable *orig, *prev, **p;
caacf05e 1268 bool ret = true;
f2bb4bed 1269
d26b3a7c 1270 if (rt_is_input_route(rt)) {
54764bb6 1271 p = (struct rtable **)&nh->nh_rth_input;
d26b3a7c 1272 } else {
d26b3a7c
ED
1273 p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
1274 }
f2bb4bed
DM
1275 orig = *p;
1276
1277 prev = cmpxchg(p, orig, rt);
1278 if (prev == orig) {
f2bb4bed 1279 if (orig)
54764bb6 1280 rt_free(orig);
155e8336 1281 } else
caacf05e 1282 ret = false;
caacf05e
DM
1283
1284 return ret;
1285}
1286
1287static DEFINE_SPINLOCK(rt_uncached_lock);
1288static LIST_HEAD(rt_uncached_list);
1289
1290static void rt_add_uncached_list(struct rtable *rt)
1291{
1292 spin_lock_bh(&rt_uncached_lock);
1293 list_add_tail(&rt->rt_uncached, &rt_uncached_list);
1294 spin_unlock_bh(&rt_uncached_lock);
1295}
1296
1297static void ipv4_dst_destroy(struct dst_entry *dst)
1298{
1299 struct rtable *rt = (struct rtable *) dst;
1300
78df76a0 1301 if (!list_empty(&rt->rt_uncached)) {
caacf05e
DM
1302 spin_lock_bh(&rt_uncached_lock);
1303 list_del(&rt->rt_uncached);
1304 spin_unlock_bh(&rt_uncached_lock);
1305 }
1306}
1307
1308void rt_flush_dev(struct net_device *dev)
1309{
1310 if (!list_empty(&rt_uncached_list)) {
1311 struct net *net = dev_net(dev);
1312 struct rtable *rt;
1313
1314 spin_lock_bh(&rt_uncached_lock);
1315 list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
1316 if (rt->dst.dev != dev)
1317 continue;
1318 rt->dst.dev = net->loopback_dev;
1319 dev_hold(rt->dst.dev);
1320 dev_put(dev);
1321 }
1322 spin_unlock_bh(&rt_uncached_lock);
4895c771
DM
1323 }
1324}
1325
4331debc 1326static bool rt_cache_valid(const struct rtable *rt)
d2d68ba9 1327{
4331debc
ED
1328 return rt &&
1329 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1330 !rt_is_expired(rt);
d2d68ba9
DM
1331}
1332
f2bb4bed 1333static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
5e2b61f7 1334 const struct fib_result *res,
f2bb4bed 1335 struct fib_nh_exception *fnhe,
982721f3 1336 struct fib_info *fi, u16 type, u32 itag)
1da177e4 1337{
caacf05e
DM
1338 bool cached = false;
1339
1da177e4 1340 if (fi) {
4895c771
DM
1341 struct fib_nh *nh = &FIB_RES_NH(*res);
1342
155e8336 1343 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
4895c771 1344 rt->rt_gateway = nh->nh_gw;
155e8336
JA
1345 rt->rt_uses_gateway = 1;
1346 }
2860583f 1347 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
c7066f70 1348#ifdef CONFIG_IP_ROUTE_CLASSID
f2bb4bed 1349 rt->dst.tclassid = nh->nh_tclassid;
1da177e4 1350#endif
c5038a83 1351 if (unlikely(fnhe))
caacf05e 1352 cached = rt_bind_exception(rt, fnhe, daddr);
c5038a83 1353 else if (!(rt->dst.flags & DST_NOCACHE))
caacf05e 1354 cached = rt_cache_route(nh, rt);
155e8336
JA
1355 if (unlikely(!cached)) {
1356 /* Routes we intend to cache in nexthop exception or
1357 * FIB nexthop have the DST_NOCACHE bit clear.
1358 * However, if we are unsuccessful at storing this
1359 * route into the cache we really need to set it.
1360 */
1361 rt->dst.flags |= DST_NOCACHE;
1362 if (!rt->rt_gateway)
1363 rt->rt_gateway = daddr;
1364 rt_add_uncached_list(rt);
1365 }
1366 } else
caacf05e 1367 rt_add_uncached_list(rt);
defb3519 1368
c7066f70 1369#ifdef CONFIG_IP_ROUTE_CLASSID
1da177e4 1370#ifdef CONFIG_IP_MULTIPLE_TABLES
85b91b03 1371 set_class_tag(rt, res->tclassid);
1da177e4
LT
1372#endif
1373 set_class_tag(rt, itag);
1374#endif
1da177e4
LT
1375}
1376
5c1e6aa3 1377static struct rtable *rt_dst_alloc(struct net_device *dev,
f2bb4bed 1378 bool nopolicy, bool noxfrm, bool will_cache)
0c4dcd58 1379{
f5b0a874 1380 return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
c6cffba4 1381 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
5c1e6aa3
DM
1382 (nopolicy ? DST_NOPOLICY : 0) |
1383 (noxfrm ? DST_NOXFRM : 0));
0c4dcd58
DM
1384}
1385
96d36220 1386/* called in rcu_read_lock() section */
9e12bb22 1387static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1da177e4
LT
1388 u8 tos, struct net_device *dev, int our)
1389{
1da177e4 1390 struct rtable *rth;
96d36220 1391 struct in_device *in_dev = __in_dev_get_rcu(dev);
1da177e4 1392 u32 itag = 0;
b5f7e755 1393 int err;
1da177e4
LT
1394
1395 /* Primary sanity checks. */
1396
1397 if (in_dev == NULL)
1398 return -EINVAL;
1399
1e637c74 1400 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
d0daebc3 1401 skb->protocol != htons(ETH_P_IP))
1da177e4
LT
1402 goto e_inval;
1403
d0daebc3
TG
1404 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1405 if (ipv4_is_loopback(saddr))
1406 goto e_inval;
1407
f97c1e0c
JP
1408 if (ipv4_is_zeronet(saddr)) {
1409 if (!ipv4_is_local_multicast(daddr))
1da177e4 1410 goto e_inval;
b5f7e755 1411 } else {
9e56e380
DM
1412 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1413 in_dev, &itag);
b5f7e755
ED
1414 if (err < 0)
1415 goto e_err;
1416 }
4e7b2f14 1417 rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
f2bb4bed 1418 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
1da177e4
LT
1419 if (!rth)
1420 goto e_nobufs;
1421
cf911662
DM
1422#ifdef CONFIG_IP_ROUTE_CLASSID
1423 rth->dst.tclassid = itag;
1424#endif
d8d1f30b 1425 rth->dst.output = ip_rt_bug;
1da177e4 1426
cf911662
DM
1427 rth->rt_genid = rt_genid(dev_net(dev));
1428 rth->rt_flags = RTCF_MULTICAST;
1429 rth->rt_type = RTN_MULTICAST;
9917e1e8 1430 rth->rt_is_input= 1;
13378cad 1431 rth->rt_iif = 0;
5943634f 1432 rth->rt_pmtu = 0;
f8126f1d 1433 rth->rt_gateway = 0;
155e8336 1434 rth->rt_uses_gateway = 0;
caacf05e 1435 INIT_LIST_HEAD(&rth->rt_uncached);
1da177e4 1436 if (our) {
d8d1f30b 1437 rth->dst.input= ip_local_deliver;
1da177e4
LT
1438 rth->rt_flags |= RTCF_LOCAL;
1439 }
1440
1441#ifdef CONFIG_IP_MROUTE
f97c1e0c 1442 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
d8d1f30b 1443 rth->dst.input = ip_mr_input;
1da177e4
LT
1444#endif
1445 RT_CACHE_STAT_INC(in_slow_mc);
1446
89aef892
DM
1447 skb_dst_set(skb, &rth->dst);
1448 return 0;
1da177e4
LT
1449
1450e_nobufs:
1da177e4 1451 return -ENOBUFS;
1da177e4 1452e_inval:
96d36220 1453 return -EINVAL;
b5f7e755 1454e_err:
b5f7e755 1455 return err;
1da177e4
LT
1456}
1457
1458
1459static void ip_handle_martian_source(struct net_device *dev,
1460 struct in_device *in_dev,
1461 struct sk_buff *skb,
9e12bb22
AV
1462 __be32 daddr,
1463 __be32 saddr)
1da177e4
LT
1464{
1465 RT_CACHE_STAT_INC(in_martian_src);
1466#ifdef CONFIG_IP_ROUTE_VERBOSE
1467 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1468 /*
1469 * RFC1812 recommendation, if source is martian,
1470 * the only hint is MAC header.
1471 */
058bd4d2 1472 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
673d57e7 1473 &daddr, &saddr, dev->name);
98e399f8 1474 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
058bd4d2
JP
1475 print_hex_dump(KERN_WARNING, "ll header: ",
1476 DUMP_PREFIX_OFFSET, 16, 1,
1477 skb_mac_header(skb),
1478 dev->hard_header_len, true);
1da177e4
LT
1479 }
1480 }
1481#endif
1482}
1483
47360228 1484/* called in rcu_read_lock() section */
5969f71d 1485static int __mkroute_input(struct sk_buff *skb,
982721f3 1486 const struct fib_result *res,
5969f71d 1487 struct in_device *in_dev,
c6cffba4 1488 __be32 daddr, __be32 saddr, u32 tos)
1da177e4 1489{
1da177e4
LT
1490 struct rtable *rth;
1491 int err;
1492 struct in_device *out_dev;
47360228 1493 unsigned int flags = 0;
d2d68ba9 1494 bool do_cache;
d9c9df8c 1495 u32 itag;
1da177e4
LT
1496
1497 /* get a working reference to the output device */
47360228 1498 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
1da177e4 1499 if (out_dev == NULL) {
e87cc472 1500 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
1da177e4
LT
1501 return -EINVAL;
1502 }
1503
5c04c819 1504 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
9e56e380 1505 in_dev->dev, in_dev, &itag);
1da177e4 1506 if (err < 0) {
e905a9ed 1507 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
1da177e4 1508 saddr);
e905a9ed 1509
1da177e4
LT
1510 goto cleanup;
1511 }
1512
e81da0e1
JA
1513 do_cache = res->fi && !itag;
1514 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
1da177e4 1515 (IN_DEV_SHARED_MEDIA(out_dev) ||
e81da0e1 1516 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) {
1da177e4 1517 flags |= RTCF_DOREDIRECT;
e81da0e1
JA
1518 do_cache = false;
1519 }
1da177e4
LT
1520
1521 if (skb->protocol != htons(ETH_P_IP)) {
1522 /* Not IP (i.e. ARP). Do not create route, if it is
1523 * invalid for proxy arp. DNAT routes are always valid.
65324144
JDB
1524 *
1525 * Proxy arp feature have been extended to allow, ARP
1526 * replies back to the same interface, to support
1527 * Private VLAN switch technologies. See arp.c.
1da177e4 1528 */
65324144
JDB
1529 if (out_dev == in_dev &&
1530 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
1da177e4
LT
1531 err = -EINVAL;
1532 goto cleanup;
1533 }
1534 }
1535
e81da0e1
JA
1536 if (do_cache) {
1537 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1538 if (rt_cache_valid(rth)) {
1539 skb_dst_set_noref(skb, &rth->dst);
1540 goto out;
d2d68ba9
DM
1541 }
1542 }
f2bb4bed 1543
5c1e6aa3
DM
1544 rth = rt_dst_alloc(out_dev->dev,
1545 IN_DEV_CONF_GET(in_dev, NOPOLICY),
d2d68ba9 1546 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
1da177e4
LT
1547 if (!rth) {
1548 err = -ENOBUFS;
1549 goto cleanup;
1550 }
1551
cf911662
DM
1552 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
1553 rth->rt_flags = flags;
1554 rth->rt_type = res->type;
9917e1e8 1555 rth->rt_is_input = 1;
13378cad 1556 rth->rt_iif = 0;
5943634f 1557 rth->rt_pmtu = 0;
f8126f1d 1558 rth->rt_gateway = 0;
155e8336 1559 rth->rt_uses_gateway = 0;
caacf05e 1560 INIT_LIST_HEAD(&rth->rt_uncached);
1da177e4 1561
d8d1f30b
CG
1562 rth->dst.input = ip_forward;
1563 rth->dst.output = ip_output;
1da177e4 1564
d2d68ba9 1565 rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag);
c6cffba4 1566 skb_dst_set(skb, &rth->dst);
d2d68ba9 1567out:
1da177e4
LT
1568 err = 0;
1569 cleanup:
1da177e4 1570 return err;
e905a9ed 1571}
1da177e4 1572
5969f71d
SH
1573static int ip_mkroute_input(struct sk_buff *skb,
1574 struct fib_result *res,
68a5e3dd 1575 const struct flowi4 *fl4,
5969f71d
SH
1576 struct in_device *in_dev,
1577 __be32 daddr, __be32 saddr, u32 tos)
1da177e4 1578{
1da177e4 1579#ifdef CONFIG_IP_ROUTE_MULTIPATH
ff3fccb3 1580 if (res->fi && res->fi->fib_nhs > 1)
1b7fe593 1581 fib_select_multipath(res);
1da177e4
LT
1582#endif
1583
1584 /* create a routing cache entry */
c6cffba4 1585 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
1da177e4
LT
1586}
1587
1da177e4
LT
1588/*
1589 * NOTE. We drop all the packets that has local source
1590 * addresses, because every properly looped back packet
1591 * must have correct destination already attached by output routine.
1592 *
1593 * Such approach solves two big problems:
1594 * 1. Not simplex devices are handled properly.
1595 * 2. IP spoofing attempts are filtered with 100% of guarantee.
ebc0ffae 1596 * called with rcu_read_lock()
1da177e4
LT
1597 */
1598
9e12bb22 1599static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
c10237e0 1600 u8 tos, struct net_device *dev)
1da177e4
LT
1601{
1602 struct fib_result res;
96d36220 1603 struct in_device *in_dev = __in_dev_get_rcu(dev);
68a5e3dd 1604 struct flowi4 fl4;
95c96174 1605 unsigned int flags = 0;
1da177e4 1606 u32 itag = 0;
95c96174 1607 struct rtable *rth;
1da177e4 1608 int err = -EINVAL;
5e73ea1a 1609 struct net *net = dev_net(dev);
d2d68ba9 1610 bool do_cache;
1da177e4
LT
1611
1612 /* IP on this device is disabled. */
1613
1614 if (!in_dev)
1615 goto out;
1616
1617 /* Check for the most weird martians, which can be not detected
1618 by fib_lookup.
1619 */
1620
d0daebc3 1621 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
1da177e4
LT
1622 goto martian_source;
1623
d2d68ba9 1624 res.fi = NULL;
27a954bd 1625 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
1da177e4
LT
1626 goto brd_input;
1627
1628 /* Accept zero addresses only to limited broadcast;
1629 * I even do not know to fix it or not. Waiting for complains :-)
1630 */
f97c1e0c 1631 if (ipv4_is_zeronet(saddr))
1da177e4
LT
1632 goto martian_source;
1633
d0daebc3 1634 if (ipv4_is_zeronet(daddr))
1da177e4
LT
1635 goto martian_destination;
1636
9eb43e76
ED
1637 /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
1638 * and call it once if daddr or/and saddr are loopback addresses
1639 */
1640 if (ipv4_is_loopback(daddr)) {
1641 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
d0daebc3 1642 goto martian_destination;
9eb43e76
ED
1643 } else if (ipv4_is_loopback(saddr)) {
1644 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
d0daebc3
TG
1645 goto martian_source;
1646 }
1647
1da177e4
LT
1648 /*
1649 * Now we are ready to route packet.
1650 */
68a5e3dd
DM
1651 fl4.flowi4_oif = 0;
1652 fl4.flowi4_iif = dev->ifindex;
1653 fl4.flowi4_mark = skb->mark;
1654 fl4.flowi4_tos = tos;
1655 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
1656 fl4.daddr = daddr;
1657 fl4.saddr = saddr;
1658 err = fib_lookup(net, &fl4, &res);
251da413 1659 if (err != 0)
1da177e4 1660 goto no_route;
1da177e4
LT
1661
1662 RT_CACHE_STAT_INC(in_slow_tot);
1663
1664 if (res.type == RTN_BROADCAST)
1665 goto brd_input;
1666
1667 if (res.type == RTN_LOCAL) {
5c04c819 1668 err = fib_validate_source(skb, saddr, daddr, tos,
1fb9489b 1669 LOOPBACK_IFINDEX,
9e56e380 1670 dev, in_dev, &itag);
b5f7e755
ED
1671 if (err < 0)
1672 goto martian_source_keep_err;
1da177e4
LT
1673 goto local_input;
1674 }
1675
1676 if (!IN_DEV_FORWARD(in_dev))
251da413 1677 goto no_route;
1da177e4
LT
1678 if (res.type != RTN_UNICAST)
1679 goto martian_destination;
1680
68a5e3dd 1681 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
1da177e4
LT
1682out: return err;
1683
1684brd_input:
1685 if (skb->protocol != htons(ETH_P_IP))
1686 goto e_inval;
1687
41347dcd 1688 if (!ipv4_is_zeronet(saddr)) {
9e56e380
DM
1689 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1690 in_dev, &itag);
1da177e4 1691 if (err < 0)
b5f7e755 1692 goto martian_source_keep_err;
1da177e4
LT
1693 }
1694 flags |= RTCF_BROADCAST;
1695 res.type = RTN_BROADCAST;
1696 RT_CACHE_STAT_INC(in_brd);
1697
1698local_input:
d2d68ba9
DM
1699 do_cache = false;
1700 if (res.fi) {
fe3edf45 1701 if (!itag) {
54764bb6 1702 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
d2d68ba9 1703 if (rt_cache_valid(rth)) {
c6cffba4
DM
1704 skb_dst_set_noref(skb, &rth->dst);
1705 err = 0;
1706 goto out;
d2d68ba9
DM
1707 }
1708 do_cache = true;
1709 }
1710 }
1711
5c1e6aa3 1712 rth = rt_dst_alloc(net->loopback_dev,
d2d68ba9 1713 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
1da177e4
LT
1714 if (!rth)
1715 goto e_nobufs;
1716
cf911662 1717 rth->dst.input= ip_local_deliver;
d8d1f30b 1718 rth->dst.output= ip_rt_bug;
cf911662
DM
1719#ifdef CONFIG_IP_ROUTE_CLASSID
1720 rth->dst.tclassid = itag;
1721#endif
1da177e4 1722
cf911662
DM
1723 rth->rt_genid = rt_genid(net);
1724 rth->rt_flags = flags|RTCF_LOCAL;
1725 rth->rt_type = res.type;
9917e1e8 1726 rth->rt_is_input = 1;
13378cad 1727 rth->rt_iif = 0;
5943634f 1728 rth->rt_pmtu = 0;
f8126f1d 1729 rth->rt_gateway = 0;
155e8336 1730 rth->rt_uses_gateway = 0;
caacf05e 1731 INIT_LIST_HEAD(&rth->rt_uncached);
1da177e4 1732 if (res.type == RTN_UNREACHABLE) {
d8d1f30b
CG
1733 rth->dst.input= ip_error;
1734 rth->dst.error= -err;
1da177e4
LT
1735 rth->rt_flags &= ~RTCF_LOCAL;
1736 }
d2d68ba9
DM
1737 if (do_cache)
1738 rt_cache_route(&FIB_RES_NH(res), rth);
89aef892 1739 skb_dst_set(skb, &rth->dst);
b23dd4fe 1740 err = 0;
ebc0ffae 1741 goto out;
1da177e4
LT
1742
1743no_route:
1744 RT_CACHE_STAT_INC(in_no_route);
1da177e4 1745 res.type = RTN_UNREACHABLE;
7f53878d
MC
1746 if (err == -ESRCH)
1747 err = -ENETUNREACH;
1da177e4
LT
1748 goto local_input;
1749
1750 /*
1751 * Do not cache martian addresses: they should be logged (RFC1812)
1752 */
1753martian_destination:
1754 RT_CACHE_STAT_INC(in_martian_dst);
1755#ifdef CONFIG_IP_ROUTE_VERBOSE
e87cc472
JP
1756 if (IN_DEV_LOG_MARTIANS(in_dev))
1757 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
1758 &daddr, &saddr, dev->name);
1da177e4 1759#endif
2c2910a4 1760
1da177e4
LT
1761e_inval:
1762 err = -EINVAL;
ebc0ffae 1763 goto out;
1da177e4
LT
1764
1765e_nobufs:
1766 err = -ENOBUFS;
ebc0ffae 1767 goto out;
1da177e4
LT
1768
1769martian_source:
b5f7e755
ED
1770 err = -EINVAL;
1771martian_source_keep_err:
1da177e4 1772 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
ebc0ffae 1773 goto out;
1da177e4
LT
1774}
1775
c6cffba4
DM
1776int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1777 u8 tos, struct net_device *dev)
1da177e4 1778{
96d36220 1779 int res;
1da177e4 1780
96d36220
ED
1781 rcu_read_lock();
1782
1da177e4
LT
1783 /* Multicast recognition logic is moved from route cache to here.
1784 The problem was that too many Ethernet cards have broken/missing
1785 hardware multicast filters :-( As result the host on multicasting
1786 network acquires a lot of useless route cache entries, sort of
1787 SDR messages from all the world. Now we try to get rid of them.
1788 Really, provided software IP multicast filter is organized
1789 reasonably (at least, hashed), it does not result in a slowdown
1790 comparing with route cache reject entries.
1791 Note, that multicast routers are not affected, because
1792 route cache entry is created eventually.
1793 */
f97c1e0c 1794 if (ipv4_is_multicast(daddr)) {
96d36220 1795 struct in_device *in_dev = __in_dev_get_rcu(dev);
1da177e4 1796
96d36220 1797 if (in_dev) {
dbdd9a52
DM
1798 int our = ip_check_mc_rcu(in_dev, daddr, saddr,
1799 ip_hdr(skb)->protocol);
1da177e4
LT
1800 if (our
1801#ifdef CONFIG_IP_MROUTE
9d4fb27d
JP
1802 ||
1803 (!ipv4_is_local_multicast(daddr) &&
1804 IN_DEV_MFORWARD(in_dev))
1da177e4 1805#endif
9d4fb27d 1806 ) {
96d36220
ED
1807 int res = ip_route_input_mc(skb, daddr, saddr,
1808 tos, dev, our);
1da177e4 1809 rcu_read_unlock();
96d36220 1810 return res;
1da177e4
LT
1811 }
1812 }
1813 rcu_read_unlock();
1814 return -EINVAL;
1815 }
c10237e0 1816 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
96d36220
ED
1817 rcu_read_unlock();
1818 return res;
1da177e4 1819}
c6cffba4 1820EXPORT_SYMBOL(ip_route_input_noref);
1da177e4 1821
ebc0ffae 1822/* called with rcu_read_lock() */
982721f3 1823static struct rtable *__mkroute_output(const struct fib_result *res,
1a00fee4 1824 const struct flowi4 *fl4, int orig_oif,
f61759e6 1825 struct net_device *dev_out,
5ada5527 1826 unsigned int flags)
1da177e4 1827{
982721f3 1828 struct fib_info *fi = res->fi;
f2bb4bed 1829 struct fib_nh_exception *fnhe;
5ada5527 1830 struct in_device *in_dev;
982721f3 1831 u16 type = res->type;
5ada5527 1832 struct rtable *rth;
c92b9655 1833 bool do_cache;
1da177e4 1834
d0daebc3
TG
1835 in_dev = __in_dev_get_rcu(dev_out);
1836 if (!in_dev)
5ada5527 1837 return ERR_PTR(-EINVAL);
1da177e4 1838
d0daebc3
TG
1839 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1840 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
1841 return ERR_PTR(-EINVAL);
1842
68a5e3dd 1843 if (ipv4_is_lbcast(fl4->daddr))
982721f3 1844 type = RTN_BROADCAST;
68a5e3dd 1845 else if (ipv4_is_multicast(fl4->daddr))
982721f3 1846 type = RTN_MULTICAST;
68a5e3dd 1847 else if (ipv4_is_zeronet(fl4->daddr))
5ada5527 1848 return ERR_PTR(-EINVAL);
1da177e4
LT
1849
1850 if (dev_out->flags & IFF_LOOPBACK)
1851 flags |= RTCF_LOCAL;
1852
63617421 1853 do_cache = true;
982721f3 1854 if (type == RTN_BROADCAST) {
1da177e4 1855 flags |= RTCF_BROADCAST | RTCF_LOCAL;
982721f3
DM
1856 fi = NULL;
1857 } else if (type == RTN_MULTICAST) {
dd28d1a0 1858 flags |= RTCF_MULTICAST | RTCF_LOCAL;
813b3b5d
DM
1859 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
1860 fl4->flowi4_proto))
1da177e4 1861 flags &= ~RTCF_LOCAL;
63617421
JA
1862 else
1863 do_cache = false;
1da177e4 1864 /* If multicast route do not exist use
dd28d1a0
ED
1865 * default one, but do not gateway in this case.
1866 * Yes, it is hack.
1da177e4 1867 */
982721f3
DM
1868 if (fi && res->prefixlen < 4)
1869 fi = NULL;
1da177e4
LT
1870 }
1871
f2bb4bed 1872 fnhe = NULL;
63617421
JA
1873 do_cache &= fi != NULL;
1874 if (do_cache) {
c5038a83 1875 struct rtable __rcu **prth;
c92b9655 1876 struct fib_nh *nh = &FIB_RES_NH(*res);
d26b3a7c 1877
c92b9655 1878 fnhe = find_exception(nh, fl4->daddr);
c5038a83
DM
1879 if (fnhe)
1880 prth = &fnhe->fnhe_rth;
c92b9655
JA
1881 else {
1882 if (unlikely(fl4->flowi4_flags &
1883 FLOWI_FLAG_KNOWN_NH &&
1884 !(nh->nh_gw &&
1885 nh->nh_scope == RT_SCOPE_LINK))) {
1886 do_cache = false;
1887 goto add;
1888 }
1889 prth = __this_cpu_ptr(nh->nh_pcpu_rth_output);
1890 }
c5038a83
DM
1891 rth = rcu_dereference(*prth);
1892 if (rt_cache_valid(rth)) {
1893 dst_hold(&rth->dst);
1894 return rth;
f2bb4bed
DM
1895 }
1896 }
c92b9655
JA
1897
1898add:
5c1e6aa3
DM
1899 rth = rt_dst_alloc(dev_out,
1900 IN_DEV_CONF_GET(in_dev, NOPOLICY),
f2bb4bed 1901 IN_DEV_CONF_GET(in_dev, NOXFRM),
c92b9655 1902 do_cache);
8391d07b 1903 if (!rth)
5ada5527 1904 return ERR_PTR(-ENOBUFS);
8391d07b 1905
cf911662
DM
1906 rth->dst.output = ip_output;
1907
cf911662
DM
1908 rth->rt_genid = rt_genid(dev_net(dev_out));
1909 rth->rt_flags = flags;
1910 rth->rt_type = type;
9917e1e8 1911 rth->rt_is_input = 0;
13378cad 1912 rth->rt_iif = orig_oif ? : 0;
5943634f 1913 rth->rt_pmtu = 0;
f8126f1d 1914 rth->rt_gateway = 0;
155e8336 1915 rth->rt_uses_gateway = 0;
caacf05e 1916 INIT_LIST_HEAD(&rth->rt_uncached);
1da177e4
LT
1917
1918 RT_CACHE_STAT_INC(out_slow_tot);
1919
41347dcd 1920 if (flags & RTCF_LOCAL)
d8d1f30b 1921 rth->dst.input = ip_local_deliver;
1da177e4 1922 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
e905a9ed 1923 if (flags & RTCF_LOCAL &&
1da177e4 1924 !(dev_out->flags & IFF_LOOPBACK)) {
d8d1f30b 1925 rth->dst.output = ip_mc_output;
1da177e4
LT
1926 RT_CACHE_STAT_INC(out_slow_mc);
1927 }
1928#ifdef CONFIG_IP_MROUTE
982721f3 1929 if (type == RTN_MULTICAST) {
1da177e4 1930 if (IN_DEV_MFORWARD(in_dev) &&
813b3b5d 1931 !ipv4_is_local_multicast(fl4->daddr)) {
d8d1f30b
CG
1932 rth->dst.input = ip_mr_input;
1933 rth->dst.output = ip_mc_output;
1da177e4
LT
1934 }
1935 }
1936#endif
1937 }
1938
f2bb4bed 1939 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
1da177e4 1940
5ada5527 1941 return rth;
1da177e4
LT
1942}
1943
1da177e4
LT
1944/*
1945 * Major route resolver routine.
1946 */
1947
89aef892 1948struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
1da177e4 1949{
1da177e4 1950 struct net_device *dev_out = NULL;
f61759e6 1951 __u8 tos = RT_FL_TOS(fl4);
813b3b5d
DM
1952 unsigned int flags = 0;
1953 struct fib_result res;
5ada5527 1954 struct rtable *rth;
813b3b5d 1955 int orig_oif;
1da177e4 1956
85b91b03 1957 res.tclassid = 0;
1da177e4 1958 res.fi = NULL;
8b96d22d 1959 res.table = NULL;
1da177e4 1960
813b3b5d
DM
1961 orig_oif = fl4->flowi4_oif;
1962
1fb9489b 1963 fl4->flowi4_iif = LOOPBACK_IFINDEX;
813b3b5d
DM
1964 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
1965 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
1966 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
44713b67 1967
010c2708 1968 rcu_read_lock();
813b3b5d 1969 if (fl4->saddr) {
b23dd4fe 1970 rth = ERR_PTR(-EINVAL);
813b3b5d
DM
1971 if (ipv4_is_multicast(fl4->saddr) ||
1972 ipv4_is_lbcast(fl4->saddr) ||
1973 ipv4_is_zeronet(fl4->saddr))
1da177e4
LT
1974 goto out;
1975
1da177e4
LT
1976 /* I removed check for oif == dev_out->oif here.
1977 It was wrong for two reasons:
1ab35276
DL
1978 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
1979 is assigned to multiple interfaces.
1da177e4
LT
1980 2. Moreover, we are allowed to send packets with saddr
1981 of another iface. --ANK
1982 */
1983
813b3b5d
DM
1984 if (fl4->flowi4_oif == 0 &&
1985 (ipv4_is_multicast(fl4->daddr) ||
1986 ipv4_is_lbcast(fl4->daddr))) {
a210d01a 1987 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
813b3b5d 1988 dev_out = __ip_dev_find(net, fl4->saddr, false);
a210d01a
JA
1989 if (dev_out == NULL)
1990 goto out;
1991
1da177e4
LT
1992 /* Special hack: user can direct multicasts
1993 and limited broadcast via necessary interface
1994 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
1995 This hack is not just for fun, it allows
1996 vic,vat and friends to work.
1997 They bind socket to loopback, set ttl to zero
1998 and expect that it will work.
1999 From the viewpoint of routing cache they are broken,
2000 because we are not allowed to build multicast path
2001 with loopback source addr (look, routing cache
2002 cannot know, that ttl is zero, so that packet
2003 will not leave this host and route is valid).
2004 Luckily, this hack is good workaround.
2005 */
2006
813b3b5d 2007 fl4->flowi4_oif = dev_out->ifindex;
1da177e4
LT
2008 goto make_route;
2009 }
a210d01a 2010
813b3b5d 2011 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
a210d01a 2012 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
813b3b5d 2013 if (!__ip_dev_find(net, fl4->saddr, false))
a210d01a 2014 goto out;
a210d01a 2015 }
1da177e4
LT
2016 }
2017
2018
813b3b5d
DM
2019 if (fl4->flowi4_oif) {
2020 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
b23dd4fe 2021 rth = ERR_PTR(-ENODEV);
1da177e4
LT
2022 if (dev_out == NULL)
2023 goto out;
e5ed6399
HX
2024
2025 /* RACE: Check return value of inet_select_addr instead. */
fc75fc83 2026 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
b23dd4fe 2027 rth = ERR_PTR(-ENETUNREACH);
fc75fc83
ED
2028 goto out;
2029 }
813b3b5d
DM
2030 if (ipv4_is_local_multicast(fl4->daddr) ||
2031 ipv4_is_lbcast(fl4->daddr)) {
2032 if (!fl4->saddr)
2033 fl4->saddr = inet_select_addr(dev_out, 0,
2034 RT_SCOPE_LINK);
1da177e4
LT
2035 goto make_route;
2036 }
813b3b5d
DM
2037 if (fl4->saddr) {
2038 if (ipv4_is_multicast(fl4->daddr))
2039 fl4->saddr = inet_select_addr(dev_out, 0,
2040 fl4->flowi4_scope);
2041 else if (!fl4->daddr)
2042 fl4->saddr = inet_select_addr(dev_out, 0,
2043 RT_SCOPE_HOST);
1da177e4
LT
2044 }
2045 }
2046
813b3b5d
DM
2047 if (!fl4->daddr) {
2048 fl4->daddr = fl4->saddr;
2049 if (!fl4->daddr)
2050 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
b40afd0e 2051 dev_out = net->loopback_dev;
1fb9489b 2052 fl4->flowi4_oif = LOOPBACK_IFINDEX;
1da177e4
LT
2053 res.type = RTN_LOCAL;
2054 flags |= RTCF_LOCAL;
2055 goto make_route;
2056 }
2057
813b3b5d 2058 if (fib_lookup(net, fl4, &res)) {
1da177e4 2059 res.fi = NULL;
8b96d22d 2060 res.table = NULL;
813b3b5d 2061 if (fl4->flowi4_oif) {
1da177e4
LT
2062 /* Apparently, routing tables are wrong. Assume,
2063 that the destination is on link.
2064
2065 WHY? DW.
2066 Because we are allowed to send to iface
2067 even if it has NO routes and NO assigned
2068 addresses. When oif is specified, routing
2069 tables are looked up with only one purpose:
2070 to catch if destination is gatewayed, rather than
2071 direct. Moreover, if MSG_DONTROUTE is set,
2072 we send packet, ignoring both routing tables
2073 and ifaddr state. --ANK
2074
2075
2076 We could make it even if oif is unknown,
2077 likely IPv6, but we do not.
2078 */
2079
813b3b5d
DM
2080 if (fl4->saddr == 0)
2081 fl4->saddr = inet_select_addr(dev_out, 0,
2082 RT_SCOPE_LINK);
1da177e4
LT
2083 res.type = RTN_UNICAST;
2084 goto make_route;
2085 }
b23dd4fe 2086 rth = ERR_PTR(-ENETUNREACH);
1da177e4
LT
2087 goto out;
2088 }
1da177e4
LT
2089
2090 if (res.type == RTN_LOCAL) {
813b3b5d 2091 if (!fl4->saddr) {
9fc3bbb4 2092 if (res.fi->fib_prefsrc)
813b3b5d 2093 fl4->saddr = res.fi->fib_prefsrc;
9fc3bbb4 2094 else
813b3b5d 2095 fl4->saddr = fl4->daddr;
9fc3bbb4 2096 }
b40afd0e 2097 dev_out = net->loopback_dev;
813b3b5d 2098 fl4->flowi4_oif = dev_out->ifindex;
1da177e4
LT
2099 flags |= RTCF_LOCAL;
2100 goto make_route;
2101 }
2102
2103#ifdef CONFIG_IP_ROUTE_MULTIPATH
813b3b5d 2104 if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
1b7fe593 2105 fib_select_multipath(&res);
1da177e4
LT
2106 else
2107#endif
21d8c49e
DM
2108 if (!res.prefixlen &&
2109 res.table->tb_num_default > 1 &&
813b3b5d 2110 res.type == RTN_UNICAST && !fl4->flowi4_oif)
0c838ff1 2111 fib_select_default(&res);
1da177e4 2112
813b3b5d
DM
2113 if (!fl4->saddr)
2114 fl4->saddr = FIB_RES_PREFSRC(net, res);
1da177e4 2115
1da177e4 2116 dev_out = FIB_RES_DEV(res);
813b3b5d 2117 fl4->flowi4_oif = dev_out->ifindex;
1da177e4
LT
2118
2119
2120make_route:
1a00fee4 2121 rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
1da177e4 2122
010c2708
DM
2123out:
2124 rcu_read_unlock();
b23dd4fe 2125 return rth;
1da177e4 2126}
d8c97a94
ACM
2127EXPORT_SYMBOL_GPL(__ip_route_output_key);
2128
ae2688d5
JW
2129static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2130{
2131 return NULL;
2132}
2133
ebb762f2 2134static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 2135{
618f9bc7
SK
2136 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2137
2138 return mtu ? : dst->dev->mtu;
ec831ea7
RD
2139}
2140
6700c270
DM
2141static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2142 struct sk_buff *skb, u32 mtu)
14e50e57
DM
2143{
2144}
2145
6700c270
DM
2146static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2147 struct sk_buff *skb)
b587ee3b
DM
2148{
2149}
2150
0972ddb2
HB
2151static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2152 unsigned long old)
2153{
2154 return NULL;
2155}
2156
14e50e57
DM
2157static struct dst_ops ipv4_dst_blackhole_ops = {
2158 .family = AF_INET,
09640e63 2159 .protocol = cpu_to_be16(ETH_P_IP),
ae2688d5 2160 .check = ipv4_blackhole_dst_check,
ebb762f2 2161 .mtu = ipv4_blackhole_mtu,
214f45c9 2162 .default_advmss = ipv4_default_advmss,
14e50e57 2163 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
b587ee3b 2164 .redirect = ipv4_rt_blackhole_redirect,
0972ddb2 2165 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
d3aaeb38 2166 .neigh_lookup = ipv4_neigh_lookup,
14e50e57
DM
2167};
2168
2774c131 2169struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2170{
2774c131 2171 struct rtable *ort = (struct rtable *) dst_orig;
f5b0a874 2172 struct rtable *rt;
14e50e57 2173
f5b0a874 2174 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
14e50e57 2175 if (rt) {
d8d1f30b 2176 struct dst_entry *new = &rt->dst;
14e50e57 2177
14e50e57 2178 new->__use = 1;
352e512c
HX
2179 new->input = dst_discard;
2180 new->output = dst_discard;
14e50e57 2181
d8d1f30b 2182 new->dev = ort->dst.dev;
14e50e57
DM
2183 if (new->dev)
2184 dev_hold(new->dev);
2185
9917e1e8 2186 rt->rt_is_input = ort->rt_is_input;
5e2b61f7 2187 rt->rt_iif = ort->rt_iif;
5943634f 2188 rt->rt_pmtu = ort->rt_pmtu;
14e50e57 2189
e84f84f2 2190 rt->rt_genid = rt_genid(net);
14e50e57
DM
2191 rt->rt_flags = ort->rt_flags;
2192 rt->rt_type = ort->rt_type;
14e50e57 2193 rt->rt_gateway = ort->rt_gateway;
155e8336 2194 rt->rt_uses_gateway = ort->rt_uses_gateway;
14e50e57 2195
caacf05e
DM
2196 INIT_LIST_HEAD(&rt->rt_uncached);
2197
14e50e57
DM
2198 dst_free(new);
2199 }
2200
2774c131
DM
2201 dst_release(dst_orig);
2202
2203 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
14e50e57
DM
2204}
2205
9d6ec938 2206struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
b23dd4fe 2207 struct sock *sk)
1da177e4 2208{
9d6ec938 2209 struct rtable *rt = __ip_route_output_key(net, flp4);
1da177e4 2210
b23dd4fe
DM
2211 if (IS_ERR(rt))
2212 return rt;
1da177e4 2213
56157872 2214 if (flp4->flowi4_proto)
9d6ec938
DM
2215 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2216 flowi4_to_flowi(flp4),
2217 sk, 0);
1da177e4 2218
b23dd4fe 2219 return rt;
1da177e4 2220}
d8c97a94
ACM
2221EXPORT_SYMBOL_GPL(ip_route_output_flow);
2222
f1ce3062 2223static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
15e47304 2224 struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
f1ce3062 2225 u32 seq, int event, int nowait, unsigned int flags)
1da177e4 2226{
511c3f92 2227 struct rtable *rt = skb_rtable(skb);
1da177e4 2228 struct rtmsg *r;
be403ea1 2229 struct nlmsghdr *nlh;
2bc8ca40 2230 unsigned long expires = 0;
f185071d 2231 u32 error;
521f5490 2232 u32 metrics[RTAX_MAX];
be403ea1 2233
15e47304 2234 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
be403ea1 2235 if (nlh == NULL)
26932566 2236 return -EMSGSIZE;
be403ea1
TG
2237
2238 r = nlmsg_data(nlh);
1da177e4
LT
2239 r->rtm_family = AF_INET;
2240 r->rtm_dst_len = 32;
2241 r->rtm_src_len = 0;
d6c0a4f6 2242 r->rtm_tos = fl4->flowi4_tos;
1da177e4 2243 r->rtm_table = RT_TABLE_MAIN;
f3756b79
DM
2244 if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
2245 goto nla_put_failure;
1da177e4
LT
2246 r->rtm_type = rt->rt_type;
2247 r->rtm_scope = RT_SCOPE_UNIVERSE;
2248 r->rtm_protocol = RTPROT_UNSPEC;
2249 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2250 if (rt->rt_flags & RTCF_NOTIFY)
2251 r->rtm_flags |= RTM_F_NOTIFY;
be403ea1 2252
f1ce3062 2253 if (nla_put_be32(skb, RTA_DST, dst))
f3756b79 2254 goto nla_put_failure;
1a00fee4 2255 if (src) {
1da177e4 2256 r->rtm_src_len = 32;
1a00fee4 2257 if (nla_put_be32(skb, RTA_SRC, src))
f3756b79 2258 goto nla_put_failure;
1da177e4 2259 }
f3756b79
DM
2260 if (rt->dst.dev &&
2261 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2262 goto nla_put_failure;
c7066f70 2263#ifdef CONFIG_IP_ROUTE_CLASSID
f3756b79
DM
2264 if (rt->dst.tclassid &&
2265 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2266 goto nla_put_failure;
1da177e4 2267#endif
41347dcd 2268 if (!rt_is_input_route(rt) &&
d6c0a4f6
DM
2269 fl4->saddr != src) {
2270 if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr))
f3756b79
DM
2271 goto nla_put_failure;
2272 }
155e8336 2273 if (rt->rt_uses_gateway &&
f3756b79
DM
2274 nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
2275 goto nla_put_failure;
be403ea1 2276
ee9a8f7a
SK
2277 expires = rt->dst.expires;
2278 if (expires) {
2279 unsigned long now = jiffies;
2280
2281 if (time_before(now, expires))
2282 expires -= now;
2283 else
2284 expires = 0;
2285 }
2286
521f5490 2287 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
ee9a8f7a 2288 if (rt->rt_pmtu && expires)
521f5490
JA
2289 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2290 if (rtnetlink_put_metrics(skb, metrics) < 0)
be403ea1
TG
2291 goto nla_put_failure;
2292
b4869889 2293 if (fl4->flowi4_mark &&
68aaed54 2294 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
f3756b79 2295 goto nla_put_failure;
963bfeee 2296
d8d1f30b 2297 error = rt->dst.error;
be403ea1 2298
c7537967 2299 if (rt_is_input_route(rt)) {
8caaf7b6
ND
2300#ifdef CONFIG_IP_MROUTE
2301 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2302 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2303 int err = ipmr_get_route(net, skb,
2304 fl4->saddr, fl4->daddr,
2305 r, nowait);
2306 if (err <= 0) {
2307 if (!nowait) {
2308 if (err == 0)
2309 return 0;
2310 goto nla_put_failure;
2311 } else {
2312 if (err == -EMSGSIZE)
2313 goto nla_put_failure;
2314 error = err;
2315 }
2316 }
2317 } else
2318#endif
2319 if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
2320 goto nla_put_failure;
1da177e4
LT
2321 }
2322
f185071d 2323 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
e3703b3d 2324 goto nla_put_failure;
be403ea1
TG
2325
2326 return nlmsg_end(skb, nlh);
1da177e4 2327
be403ea1 2328nla_put_failure:
26932566
PM
2329 nlmsg_cancel(skb, nlh);
2330 return -EMSGSIZE;
1da177e4
LT
2331}
2332
661d2967 2333static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
1da177e4 2334{
3b1e0a65 2335 struct net *net = sock_net(in_skb->sk);
d889ce3b
TG
2336 struct rtmsg *rtm;
2337 struct nlattr *tb[RTA_MAX+1];
1da177e4 2338 struct rtable *rt = NULL;
d6c0a4f6 2339 struct flowi4 fl4;
9e12bb22
AV
2340 __be32 dst = 0;
2341 __be32 src = 0;
2342 u32 iif;
d889ce3b 2343 int err;
963bfeee 2344 int mark;
1da177e4
LT
2345 struct sk_buff *skb;
2346
d889ce3b
TG
2347 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2348 if (err < 0)
2349 goto errout;
2350
2351 rtm = nlmsg_data(nlh);
2352
1da177e4 2353 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
d889ce3b
TG
2354 if (skb == NULL) {
2355 err = -ENOBUFS;
2356 goto errout;
2357 }
1da177e4
LT
2358
2359 /* Reserve room for dummy headers, this skb can pass
2360 through good chunk of routing engine.
2361 */
459a98ed 2362 skb_reset_mac_header(skb);
c1d2bbe1 2363 skb_reset_network_header(skb);
d2c962b8
SH
2364
2365 /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
eddc9ec5 2366 ip_hdr(skb)->protocol = IPPROTO_ICMP;
1da177e4
LT
2367 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2368
17fb2c64
AV
2369 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
2370 dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
d889ce3b 2371 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
963bfeee 2372 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
1da177e4 2373
d6c0a4f6
DM
2374 memset(&fl4, 0, sizeof(fl4));
2375 fl4.daddr = dst;
2376 fl4.saddr = src;
2377 fl4.flowi4_tos = rtm->rtm_tos;
2378 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2379 fl4.flowi4_mark = mark;
2380
1da177e4 2381 if (iif) {
d889ce3b
TG
2382 struct net_device *dev;
2383
1937504d 2384 dev = __dev_get_by_index(net, iif);
d889ce3b
TG
2385 if (dev == NULL) {
2386 err = -ENODEV;
2387 goto errout_free;
2388 }
2389
1da177e4
LT
2390 skb->protocol = htons(ETH_P_IP);
2391 skb->dev = dev;
963bfeee 2392 skb->mark = mark;
1da177e4
LT
2393 local_bh_disable();
2394 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2395 local_bh_enable();
d889ce3b 2396
511c3f92 2397 rt = skb_rtable(skb);
d8d1f30b
CG
2398 if (err == 0 && rt->dst.error)
2399 err = -rt->dst.error;
1da177e4 2400 } else {
9d6ec938 2401 rt = ip_route_output_key(net, &fl4);
b23dd4fe
DM
2402
2403 err = 0;
2404 if (IS_ERR(rt))
2405 err = PTR_ERR(rt);
1da177e4 2406 }
d889ce3b 2407
1da177e4 2408 if (err)
d889ce3b 2409 goto errout_free;
1da177e4 2410
d8d1f30b 2411 skb_dst_set(skb, &rt->dst);
1da177e4
LT
2412 if (rtm->rtm_flags & RTM_F_NOTIFY)
2413 rt->rt_flags |= RTCF_NOTIFY;
2414
f1ce3062 2415 err = rt_fill_info(net, dst, src, &fl4, skb,
15e47304 2416 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
1937504d 2417 RTM_NEWROUTE, 0, 0);
d889ce3b
TG
2418 if (err <= 0)
2419 goto errout_free;
1da177e4 2420
15e47304 2421 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
d889ce3b 2422errout:
2942e900 2423 return err;
1da177e4 2424
d889ce3b 2425errout_free:
1da177e4 2426 kfree_skb(skb);
d889ce3b 2427 goto errout;
1da177e4
LT
2428}
2429
2430int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2431{
1da177e4
LT
2432 return skb->len;
2433}
2434
2435void ip_rt_multicast_event(struct in_device *in_dev)
2436{
4ccfe6d4 2437 rt_cache_flush(dev_net(in_dev->dev));
1da177e4
LT
2438}
2439
2440#ifdef CONFIG_SYSCTL
082c7ca4
G
2441static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
2442static int ip_rt_gc_interval __read_mostly = 60 * HZ;
2443static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
2444static int ip_rt_gc_elasticity __read_mostly = 8;
2445
81c684d1 2446static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
8d65af78 2447 void __user *buffer,
1da177e4
LT
2448 size_t *lenp, loff_t *ppos)
2449{
5aad1de5
TT
2450 struct net *net = (struct net *)__ctl->extra1;
2451
1da177e4 2452 if (write) {
5aad1de5
TT
2453 rt_cache_flush(net);
2454 fnhe_genid_bump(net);
1da177e4 2455 return 0;
e905a9ed 2456 }
1da177e4
LT
2457
2458 return -EINVAL;
2459}
2460
eeb61f71 2461static ctl_table ipv4_route_table[] = {
1da177e4 2462 {
1da177e4
LT
2463 .procname = "gc_thresh",
2464 .data = &ipv4_dst_ops.gc_thresh,
2465 .maxlen = sizeof(int),
2466 .mode = 0644,
6d9f239a 2467 .proc_handler = proc_dointvec,
1da177e4
LT
2468 },
2469 {
1da177e4
LT
2470 .procname = "max_size",
2471 .data = &ip_rt_max_size,
2472 .maxlen = sizeof(int),
2473 .mode = 0644,
6d9f239a 2474 .proc_handler = proc_dointvec,
1da177e4
LT
2475 },
2476 {
2477 /* Deprecated. Use gc_min_interval_ms */
e905a9ed 2478
1da177e4
LT
2479 .procname = "gc_min_interval",
2480 .data = &ip_rt_gc_min_interval,
2481 .maxlen = sizeof(int),
2482 .mode = 0644,
6d9f239a 2483 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2484 },
2485 {
1da177e4
LT
2486 .procname = "gc_min_interval_ms",
2487 .data = &ip_rt_gc_min_interval,
2488 .maxlen = sizeof(int),
2489 .mode = 0644,
6d9f239a 2490 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4
LT
2491 },
2492 {
1da177e4
LT
2493 .procname = "gc_timeout",
2494 .data = &ip_rt_gc_timeout,
2495 .maxlen = sizeof(int),
2496 .mode = 0644,
6d9f239a 2497 .proc_handler = proc_dointvec_jiffies,
1da177e4 2498 },
9f28a2fc
ED
2499 {
2500 .procname = "gc_interval",
2501 .data = &ip_rt_gc_interval,
2502 .maxlen = sizeof(int),
2503 .mode = 0644,
2504 .proc_handler = proc_dointvec_jiffies,
2505 },
1da177e4 2506 {
1da177e4
LT
2507 .procname = "redirect_load",
2508 .data = &ip_rt_redirect_load,
2509 .maxlen = sizeof(int),
2510 .mode = 0644,
6d9f239a 2511 .proc_handler = proc_dointvec,
1da177e4
LT
2512 },
2513 {
1da177e4
LT
2514 .procname = "redirect_number",
2515 .data = &ip_rt_redirect_number,
2516 .maxlen = sizeof(int),
2517 .mode = 0644,
6d9f239a 2518 .proc_handler = proc_dointvec,
1da177e4
LT
2519 },
2520 {
1da177e4
LT
2521 .procname = "redirect_silence",
2522 .data = &ip_rt_redirect_silence,
2523 .maxlen = sizeof(int),
2524 .mode = 0644,
6d9f239a 2525 .proc_handler = proc_dointvec,
1da177e4
LT
2526 },
2527 {
1da177e4
LT
2528 .procname = "error_cost",
2529 .data = &ip_rt_error_cost,
2530 .maxlen = sizeof(int),
2531 .mode = 0644,
6d9f239a 2532 .proc_handler = proc_dointvec,
1da177e4
LT
2533 },
2534 {
1da177e4
LT
2535 .procname = "error_burst",
2536 .data = &ip_rt_error_burst,
2537 .maxlen = sizeof(int),
2538 .mode = 0644,
6d9f239a 2539 .proc_handler = proc_dointvec,
1da177e4
LT
2540 },
2541 {
1da177e4
LT
2542 .procname = "gc_elasticity",
2543 .data = &ip_rt_gc_elasticity,
2544 .maxlen = sizeof(int),
2545 .mode = 0644,
6d9f239a 2546 .proc_handler = proc_dointvec,
1da177e4
LT
2547 },
2548 {
1da177e4
LT
2549 .procname = "mtu_expires",
2550 .data = &ip_rt_mtu_expires,
2551 .maxlen = sizeof(int),
2552 .mode = 0644,
6d9f239a 2553 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2554 },
2555 {
1da177e4
LT
2556 .procname = "min_pmtu",
2557 .data = &ip_rt_min_pmtu,
2558 .maxlen = sizeof(int),
2559 .mode = 0644,
6d9f239a 2560 .proc_handler = proc_dointvec,
1da177e4
LT
2561 },
2562 {
1da177e4
LT
2563 .procname = "min_adv_mss",
2564 .data = &ip_rt_min_advmss,
2565 .maxlen = sizeof(int),
2566 .mode = 0644,
6d9f239a 2567 .proc_handler = proc_dointvec,
1da177e4 2568 },
f8572d8f 2569 { }
1da177e4 2570};
39a23e75 2571
39a23e75
DL
2572static struct ctl_table ipv4_route_flush_table[] = {
2573 {
39a23e75
DL
2574 .procname = "flush",
2575 .maxlen = sizeof(int),
2576 .mode = 0200,
6d9f239a 2577 .proc_handler = ipv4_sysctl_rtcache_flush,
39a23e75 2578 },
f8572d8f 2579 { },
39a23e75
DL
2580};
2581
2582static __net_init int sysctl_route_net_init(struct net *net)
2583{
2584 struct ctl_table *tbl;
2585
2586 tbl = ipv4_route_flush_table;
09ad9bc7 2587 if (!net_eq(net, &init_net)) {
39a23e75
DL
2588 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
2589 if (tbl == NULL)
2590 goto err_dup;
464dc801
EB
2591
2592 /* Don't export sysctls to unprivileged users */
2593 if (net->user_ns != &init_user_ns)
2594 tbl[0].procname = NULL;
39a23e75
DL
2595 }
2596 tbl[0].extra1 = net;
2597
ec8f23ce 2598 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
39a23e75
DL
2599 if (net->ipv4.route_hdr == NULL)
2600 goto err_reg;
2601 return 0;
2602
2603err_reg:
2604 if (tbl != ipv4_route_flush_table)
2605 kfree(tbl);
2606err_dup:
2607 return -ENOMEM;
2608}
2609
2610static __net_exit void sysctl_route_net_exit(struct net *net)
2611{
2612 struct ctl_table *tbl;
2613
2614 tbl = net->ipv4.route_hdr->ctl_table_arg;
2615 unregister_net_sysctl_table(net->ipv4.route_hdr);
2616 BUG_ON(tbl == ipv4_route_flush_table);
2617 kfree(tbl);
2618}
2619
2620static __net_initdata struct pernet_operations sysctl_route_ops = {
2621 .init = sysctl_route_net_init,
2622 .exit = sysctl_route_net_exit,
2623};
1da177e4
LT
2624#endif
2625
3ee94372 2626static __net_init int rt_genid_init(struct net *net)
9f5e97e5 2627{
b42664f8 2628 atomic_set(&net->rt_genid, 0);
5aad1de5 2629 atomic_set(&net->fnhe_genid, 0);
436c3b66
DM
2630 get_random_bytes(&net->ipv4.dev_addr_genid,
2631 sizeof(net->ipv4.dev_addr_genid));
9f5e97e5
DL
2632 return 0;
2633}
2634
3ee94372
NH
2635static __net_initdata struct pernet_operations rt_genid_ops = {
2636 .init = rt_genid_init,
9f5e97e5
DL
2637};
2638
c3426b47
DM
2639static int __net_init ipv4_inetpeer_init(struct net *net)
2640{
2641 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2642
2643 if (!bp)
2644 return -ENOMEM;
2645 inet_peer_base_init(bp);
2646 net->ipv4.peers = bp;
2647 return 0;
2648}
2649
2650static void __net_exit ipv4_inetpeer_exit(struct net *net)
2651{
2652 struct inet_peer_base *bp = net->ipv4.peers;
2653
2654 net->ipv4.peers = NULL;
56a6b248 2655 inetpeer_invalidate_tree(bp);
c3426b47
DM
2656 kfree(bp);
2657}
2658
2659static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
2660 .init = ipv4_inetpeer_init,
2661 .exit = ipv4_inetpeer_exit,
2662};
9f5e97e5 2663
c7066f70 2664#ifdef CONFIG_IP_ROUTE_CLASSID
7d720c3e 2665struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
c7066f70 2666#endif /* CONFIG_IP_ROUTE_CLASSID */
1da177e4 2667
1da177e4
LT
2668int __init ip_rt_init(void)
2669{
424c4b70 2670 int rc = 0;
1da177e4 2671
c7066f70 2672#ifdef CONFIG_IP_ROUTE_CLASSID
0dcec8c2 2673 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
1da177e4
LT
2674 if (!ip_rt_acct)
2675 panic("IP: failed to allocate ip_rt_acct\n");
1da177e4
LT
2676#endif
2677
e5d679f3
AD
2678 ipv4_dst_ops.kmem_cachep =
2679 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
20c2df83 2680 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1da177e4 2681
14e50e57
DM
2682 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
2683
fc66f95c
ED
2684 if (dst_entries_init(&ipv4_dst_ops) < 0)
2685 panic("IP: failed to allocate ipv4_dst_ops counter\n");
2686
2687 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
2688 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
2689
89aef892
DM
2690 ipv4_dst_ops.gc_thresh = ~0;
2691 ip_rt_max_size = INT_MAX;
1da177e4 2692
1da177e4
LT
2693 devinet_init();
2694 ip_fib_init();
2695
73b38711 2696 if (ip_rt_proc_init())
058bd4d2 2697 pr_err("Unable to create route proc files\n");
1da177e4
LT
2698#ifdef CONFIG_XFRM
2699 xfrm_init();
703fb94e 2700 xfrm4_init();
1da177e4 2701#endif
c7ac8679 2702 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
63f3444f 2703
39a23e75
DL
2704#ifdef CONFIG_SYSCTL
2705 register_pernet_subsys(&sysctl_route_ops);
2706#endif
3ee94372 2707 register_pernet_subsys(&rt_genid_ops);
c3426b47 2708 register_pernet_subsys(&ipv4_inetpeer_ops);
1da177e4
LT
2709 return rc;
2710}
2711
a1bc6eb4 2712#ifdef CONFIG_SYSCTL
eeb61f71
AV
2713/*
2714 * We really need to sanitize the damn ipv4 init order, then all
2715 * this nonsense will go away.
2716 */
2717void __init ip_static_sysctl_init(void)
2718{
4e5ca785 2719 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
eeb61f71 2720}
a1bc6eb4 2721#endif