]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/ipv4/route.c
net: Convert uses of typedef ctl_table to struct ctl_table
[mirror_ubuntu-bionic-kernel.git] / net / ipv4 / route.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
02c30a84 8 * Authors: Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 * Fixes:
15 * Alan Cox : Verify area fixes.
16 * Alan Cox : cli() protects routing changes
17 * Rui Oliveira : ICMP routing table updates
18 * (rco@di.uminho.pt) Routing table insertion and update
19 * Linus Torvalds : Rewrote bits to be sensible
20 * Alan Cox : Added BSD route gw semantics
e905a9ed 21 * Alan Cox : Super /proc >4K
1da177e4
LT
22 * Alan Cox : MTU in route table
23 * Alan Cox : MSS actually. Also added the window
24 * clamper.
25 * Sam Lantinga : Fixed route matching in rt_del()
26 * Alan Cox : Routing cache support.
27 * Alan Cox : Removed compatibility cruft.
28 * Alan Cox : RTF_REJECT support.
29 * Alan Cox : TCP irtt support.
30 * Jonathan Naylor : Added Metric support.
31 * Miquel van Smoorenburg : BSD API fixes.
32 * Miquel van Smoorenburg : Metrics.
33 * Alan Cox : Use __u32 properly
34 * Alan Cox : Aligned routing errors more closely with BSD
35 * our system is still very different.
36 * Alan Cox : Faster /proc handling
37 * Alexey Kuznetsov : Massive rework to support tree based routing,
38 * routing caches and better behaviour.
e905a9ed 39 *
1da177e4
LT
40 * Olaf Erb : irtt wasn't being copied right.
41 * Bjorn Ekwall : Kerneld route support.
42 * Alan Cox : Multicast fixed (I hope)
43 * Pavel Krauz : Limited broadcast fixed
44 * Mike McLagan : Routing by source
45 * Alexey Kuznetsov : End of old history. Split to fib.c and
46 * route.c and rewritten from scratch.
47 * Andi Kleen : Load-limit warning messages.
48 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
49 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
50 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
51 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
52 * Marc Boucher : routing by fwmark
53 * Robert Olsson : Added rt_cache statistics
54 * Arnaldo C. Melo : Convert proc stuff to seq_file
bb1d23b0 55 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
cef2685e
IS
56 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
57 * Ilia Sotnikov : Removed TOS from hash calculations
1da177e4
LT
58 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
afd46503
JP
65#define pr_fmt(fmt) "IPv4: " fmt
66
1da177e4
LT
67#include <linux/module.h>
68#include <asm/uaccess.h>
1da177e4
LT
69#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
1da177e4
LT
72#include <linux/mm.h>
73#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
82#include <linux/skbuff.h>
1da177e4
LT
83#include <linux/inetdevice.h>
84#include <linux/igmp.h>
85#include <linux/pkt_sched.h>
86#include <linux/mroute.h>
87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h>
1da177e4
LT
89#include <linux/rcupdate.h>
90#include <linux/times.h>
5a0e3ad6 91#include <linux/slab.h>
352e512c 92#include <net/dst.h>
457c4cbc 93#include <net/net_namespace.h>
1da177e4
LT
94#include <net/protocol.h>
95#include <net/ip.h>
96#include <net/route.h>
97#include <net/inetpeer.h>
98#include <net/sock.h>
99#include <net/ip_fib.h>
100#include <net/arp.h>
101#include <net/tcp.h>
102#include <net/icmp.h>
103#include <net/xfrm.h>
8d71740c 104#include <net/netevent.h>
63f3444f 105#include <net/rtnetlink.h>
1da177e4
LT
106#ifdef CONFIG_SYSCTL
107#include <linux/sysctl.h>
7426a564 108#include <linux/kmemleak.h>
1da177e4 109#endif
6e5714ea 110#include <net/secure_seq.h>
1da177e4 111
68a5e3dd 112#define RT_FL_TOS(oldflp4) \
f61759e6 113 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
1da177e4
LT
114
115#define IP_MAX_MTU 0xFFF0
116
117#define RT_GC_TIMEOUT (300*HZ)
118
1da177e4 119static int ip_rt_max_size;
817bc4db
SH
120static int ip_rt_redirect_number __read_mostly = 9;
121static int ip_rt_redirect_load __read_mostly = HZ / 50;
122static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
123static int ip_rt_error_cost __read_mostly = HZ;
124static int ip_rt_error_burst __read_mostly = 5 * HZ;
817bc4db
SH
125static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
126static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
127static int ip_rt_min_advmss __read_mostly = 256;
9f28a2fc 128
1da177e4
LT
129/*
130 * Interface to generic destination cache.
131 */
132
133static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 134static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
ebb762f2 135static unsigned int ipv4_mtu(const struct dst_entry *dst);
1da177e4
LT
136static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
137static void ipv4_link_failure(struct sk_buff *skb);
6700c270
DM
138static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
139 struct sk_buff *skb, u32 mtu);
140static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
141 struct sk_buff *skb);
caacf05e 142static void ipv4_dst_destroy(struct dst_entry *dst);
1da177e4 143
72cdd1d9
ED
144static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
145 int how)
146{
147}
1da177e4 148
62fa8a84
DM
149static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
150{
31248731
DM
151 WARN_ON(1);
152 return NULL;
62fa8a84
DM
153}
154
f894cbf8
DM
155static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
156 struct sk_buff *skb,
157 const void *daddr);
d3aaeb38 158
1da177e4
LT
159static struct dst_ops ipv4_dst_ops = {
160 .family = AF_INET,
09640e63 161 .protocol = cpu_to_be16(ETH_P_IP),
1da177e4 162 .check = ipv4_dst_check,
0dbaee3b 163 .default_advmss = ipv4_default_advmss,
ebb762f2 164 .mtu = ipv4_mtu,
62fa8a84 165 .cow_metrics = ipv4_cow_metrics,
caacf05e 166 .destroy = ipv4_dst_destroy,
1da177e4
LT
167 .ifdown = ipv4_dst_ifdown,
168 .negative_advice = ipv4_negative_advice,
169 .link_failure = ipv4_link_failure,
170 .update_pmtu = ip_rt_update_pmtu,
e47a185b 171 .redirect = ip_do_redirect,
1ac06e03 172 .local_out = __ip_local_out,
d3aaeb38 173 .neigh_lookup = ipv4_neigh_lookup,
1da177e4
LT
174};
175
176#define ECN_OR_COST(class) TC_PRIO_##class
177
4839c52b 178const __u8 ip_tos2prio[16] = {
1da177e4 179 TC_PRIO_BESTEFFORT,
4a2b9c37 180 ECN_OR_COST(BESTEFFORT),
1da177e4
LT
181 TC_PRIO_BESTEFFORT,
182 ECN_OR_COST(BESTEFFORT),
183 TC_PRIO_BULK,
184 ECN_OR_COST(BULK),
185 TC_PRIO_BULK,
186 ECN_OR_COST(BULK),
187 TC_PRIO_INTERACTIVE,
188 ECN_OR_COST(INTERACTIVE),
189 TC_PRIO_INTERACTIVE,
190 ECN_OR_COST(INTERACTIVE),
191 TC_PRIO_INTERACTIVE_BULK,
192 ECN_OR_COST(INTERACTIVE_BULK),
193 TC_PRIO_INTERACTIVE_BULK,
194 ECN_OR_COST(INTERACTIVE_BULK)
195};
d4a96865 196EXPORT_SYMBOL(ip_tos2prio);
1da177e4 197
2f970d83 198static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
27f39c73 199#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
1da177e4 200
1da177e4 201#ifdef CONFIG_PROC_FS
1da177e4
LT
202static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
203{
29e75252 204 if (*pos)
89aef892 205 return NULL;
29e75252 206 return SEQ_START_TOKEN;
1da177e4
LT
207}
208
209static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
210{
1da177e4 211 ++*pos;
89aef892 212 return NULL;
1da177e4
LT
213}
214
215static void rt_cache_seq_stop(struct seq_file *seq, void *v)
216{
1da177e4
LT
217}
218
219static int rt_cache_seq_show(struct seq_file *seq, void *v)
220{
221 if (v == SEQ_START_TOKEN)
222 seq_printf(seq, "%-127s\n",
223 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
224 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
225 "HHUptod\tSpecDst");
e905a9ed 226 return 0;
1da177e4
LT
227}
228
f690808e 229static const struct seq_operations rt_cache_seq_ops = {
1da177e4
LT
230 .start = rt_cache_seq_start,
231 .next = rt_cache_seq_next,
232 .stop = rt_cache_seq_stop,
233 .show = rt_cache_seq_show,
234};
235
236static int rt_cache_seq_open(struct inode *inode, struct file *file)
237{
89aef892 238 return seq_open(file, &rt_cache_seq_ops);
1da177e4
LT
239}
240
9a32144e 241static const struct file_operations rt_cache_seq_fops = {
1da177e4
LT
242 .owner = THIS_MODULE,
243 .open = rt_cache_seq_open,
244 .read = seq_read,
245 .llseek = seq_lseek,
89aef892 246 .release = seq_release,
1da177e4
LT
247};
248
249
250static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
251{
252 int cpu;
253
254 if (*pos == 0)
255 return SEQ_START_TOKEN;
256
0f23174a 257 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
1da177e4
LT
258 if (!cpu_possible(cpu))
259 continue;
260 *pos = cpu+1;
2f970d83 261 return &per_cpu(rt_cache_stat, cpu);
1da177e4
LT
262 }
263 return NULL;
264}
265
266static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
267{
268 int cpu;
269
0f23174a 270 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
1da177e4
LT
271 if (!cpu_possible(cpu))
272 continue;
273 *pos = cpu+1;
2f970d83 274 return &per_cpu(rt_cache_stat, cpu);
1da177e4
LT
275 }
276 return NULL;
e905a9ed 277
1da177e4
LT
278}
279
280static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
281{
282
283}
284
285static int rt_cpu_seq_show(struct seq_file *seq, void *v)
286{
287 struct rt_cache_stat *st = v;
288
289 if (v == SEQ_START_TOKEN) {
5bec0039 290 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
1da177e4
LT
291 return 0;
292 }
e905a9ed 293
1da177e4
LT
294 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
295 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
fc66f95c 296 dst_entries_get_slow(&ipv4_dst_ops),
1da177e4
LT
297 st->in_hit,
298 st->in_slow_tot,
299 st->in_slow_mc,
300 st->in_no_route,
301 st->in_brd,
302 st->in_martian_dst,
303 st->in_martian_src,
304
305 st->out_hit,
306 st->out_slow_tot,
e905a9ed 307 st->out_slow_mc,
1da177e4
LT
308
309 st->gc_total,
310 st->gc_ignored,
311 st->gc_goal_miss,
312 st->gc_dst_overflow,
313 st->in_hlist_search,
314 st->out_hlist_search
315 );
316 return 0;
317}
318
f690808e 319static const struct seq_operations rt_cpu_seq_ops = {
1da177e4
LT
320 .start = rt_cpu_seq_start,
321 .next = rt_cpu_seq_next,
322 .stop = rt_cpu_seq_stop,
323 .show = rt_cpu_seq_show,
324};
325
326
327static int rt_cpu_seq_open(struct inode *inode, struct file *file)
328{
329 return seq_open(file, &rt_cpu_seq_ops);
330}
331
9a32144e 332static const struct file_operations rt_cpu_seq_fops = {
1da177e4
LT
333 .owner = THIS_MODULE,
334 .open = rt_cpu_seq_open,
335 .read = seq_read,
336 .llseek = seq_lseek,
337 .release = seq_release,
338};
339
c7066f70 340#ifdef CONFIG_IP_ROUTE_CLASSID
a661c419 341static int rt_acct_proc_show(struct seq_file *m, void *v)
78c686e9 342{
a661c419
AD
343 struct ip_rt_acct *dst, *src;
344 unsigned int i, j;
345
346 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
347 if (!dst)
348 return -ENOMEM;
349
350 for_each_possible_cpu(i) {
351 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
352 for (j = 0; j < 256; j++) {
353 dst[j].o_bytes += src[j].o_bytes;
354 dst[j].o_packets += src[j].o_packets;
355 dst[j].i_bytes += src[j].i_bytes;
356 dst[j].i_packets += src[j].i_packets;
357 }
78c686e9
PE
358 }
359
a661c419
AD
360 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
361 kfree(dst);
362 return 0;
363}
78c686e9 364
a661c419
AD
365static int rt_acct_proc_open(struct inode *inode, struct file *file)
366{
367 return single_open(file, rt_acct_proc_show, NULL);
78c686e9 368}
a661c419
AD
369
370static const struct file_operations rt_acct_proc_fops = {
371 .owner = THIS_MODULE,
372 .open = rt_acct_proc_open,
373 .read = seq_read,
374 .llseek = seq_lseek,
375 .release = single_release,
376};
78c686e9 377#endif
107f1634 378
73b38711 379static int __net_init ip_rt_do_proc_init(struct net *net)
107f1634
PE
380{
381 struct proc_dir_entry *pde;
382
d4beaa66
G
383 pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
384 &rt_cache_seq_fops);
107f1634
PE
385 if (!pde)
386 goto err1;
387
77020720
WC
388 pde = proc_create("rt_cache", S_IRUGO,
389 net->proc_net_stat, &rt_cpu_seq_fops);
107f1634
PE
390 if (!pde)
391 goto err2;
392
c7066f70 393#ifdef CONFIG_IP_ROUTE_CLASSID
a661c419 394 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
107f1634
PE
395 if (!pde)
396 goto err3;
397#endif
398 return 0;
399
c7066f70 400#ifdef CONFIG_IP_ROUTE_CLASSID
107f1634
PE
401err3:
402 remove_proc_entry("rt_cache", net->proc_net_stat);
403#endif
404err2:
405 remove_proc_entry("rt_cache", net->proc_net);
406err1:
407 return -ENOMEM;
408}
73b38711
DL
409
410static void __net_exit ip_rt_do_proc_exit(struct net *net)
411{
412 remove_proc_entry("rt_cache", net->proc_net_stat);
413 remove_proc_entry("rt_cache", net->proc_net);
c7066f70 414#ifdef CONFIG_IP_ROUTE_CLASSID
73b38711 415 remove_proc_entry("rt_acct", net->proc_net);
0a931acf 416#endif
73b38711
DL
417}
418
419static struct pernet_operations ip_rt_proc_ops __net_initdata = {
420 .init = ip_rt_do_proc_init,
421 .exit = ip_rt_do_proc_exit,
422};
423
424static int __init ip_rt_proc_init(void)
425{
426 return register_pernet_subsys(&ip_rt_proc_ops);
427}
428
107f1634 429#else
73b38711 430static inline int ip_rt_proc_init(void)
107f1634
PE
431{
432 return 0;
433}
1da177e4 434#endif /* CONFIG_PROC_FS */
e905a9ed 435
4331debc 436static inline bool rt_is_expired(const struct rtable *rth)
e84f84f2 437{
d8d1f30b 438 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
e84f84f2
DL
439}
440
4ccfe6d4 441void rt_cache_flush(struct net *net)
1da177e4 442{
b42664f8 443 rt_genid_bump(net);
98376387
ED
444}
445
f894cbf8
DM
446static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
447 struct sk_buff *skb,
448 const void *daddr)
3769cffb 449{
d3aaeb38
DM
450 struct net_device *dev = dst->dev;
451 const __be32 *pkey = daddr;
39232973 452 const struct rtable *rt;
3769cffb
DM
453 struct neighbour *n;
454
39232973 455 rt = (const struct rtable *) dst;
a263b309 456 if (rt->rt_gateway)
39232973 457 pkey = (const __be32 *) &rt->rt_gateway;
f894cbf8
DM
458 else if (skb)
459 pkey = &ip_hdr(skb)->daddr;
d3aaeb38 460
80703d26 461 n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
d3aaeb38
DM
462 if (n)
463 return n;
32092ecf 464 return neigh_create(&arp_tbl, pkey, dev);
d3aaeb38
DM
465}
466
1da177e4
LT
467/*
468 * Peer allocation may fail only in serious out-of-memory conditions. However
469 * we still can generate some output.
470 * Random ID selection looks a bit dangerous because we have no chances to
471 * select ID being unique in a reasonable period of time.
472 * But broken packet identifier may be better than no packet at all.
473 */
474static void ip_select_fb_ident(struct iphdr *iph)
475{
476 static DEFINE_SPINLOCK(ip_fb_id_lock);
477 static u32 ip_fallback_id;
478 u32 salt;
479
480 spin_lock_bh(&ip_fb_id_lock);
e448515c 481 salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
1da177e4
LT
482 iph->id = htons(salt & 0xFFFF);
483 ip_fallback_id = salt;
484 spin_unlock_bh(&ip_fb_id_lock);
485}
486
487void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
488{
1d861aa4
DM
489 struct net *net = dev_net(dst->dev);
490 struct inet_peer *peer;
1da177e4 491
1d861aa4
DM
492 peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
493 if (peer) {
494 iph->id = htons(inet_getid(peer, more));
495 inet_putpeer(peer);
496 return;
497 }
1da177e4
LT
498
499 ip_select_fb_ident(iph);
500}
4bc2f18b 501EXPORT_SYMBOL(__ip_select_ident);
1da177e4 502
5abf7f7e 503static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
4895c771
DM
504 const struct iphdr *iph,
505 int oif, u8 tos,
506 u8 prot, u32 mark, int flow_flags)
507{
508 if (sk) {
509 const struct inet_sock *inet = inet_sk(sk);
510
511 oif = sk->sk_bound_dev_if;
512 mark = sk->sk_mark;
513 tos = RT_CONN_FLAGS(sk);
514 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
515 }
516 flowi4_init_output(fl4, oif, mark, tos,
517 RT_SCOPE_UNIVERSE, prot,
518 flow_flags,
519 iph->daddr, iph->saddr, 0, 0);
520}
521
5abf7f7e
ED
522static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
523 const struct sock *sk)
4895c771
DM
524{
525 const struct iphdr *iph = ip_hdr(skb);
526 int oif = skb->dev->ifindex;
527 u8 tos = RT_TOS(iph->tos);
528 u8 prot = iph->protocol;
529 u32 mark = skb->mark;
530
531 __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
532}
533
5abf7f7e 534static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
4895c771
DM
535{
536 const struct inet_sock *inet = inet_sk(sk);
5abf7f7e 537 const struct ip_options_rcu *inet_opt;
4895c771
DM
538 __be32 daddr = inet->inet_daddr;
539
540 rcu_read_lock();
541 inet_opt = rcu_dereference(inet->inet_opt);
542 if (inet_opt && inet_opt->opt.srr)
543 daddr = inet_opt->opt.faddr;
544 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
545 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
546 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
547 inet_sk_flowi_flags(sk),
548 daddr, inet->inet_saddr, 0, 0);
549 rcu_read_unlock();
550}
551
5abf7f7e
ED
552static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
553 const struct sk_buff *skb)
4895c771
DM
554{
555 if (skb)
556 build_skb_flow_key(fl4, skb, sk);
557 else
558 build_sk_flow_key(fl4, sk);
559}
560
c5038a83
DM
561static inline void rt_free(struct rtable *rt)
562{
563 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
564}
565
566static DEFINE_SPINLOCK(fnhe_lock);
4895c771 567
aee06da6 568static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
4895c771
DM
569{
570 struct fib_nh_exception *fnhe, *oldest;
c5038a83 571 struct rtable *orig;
4895c771
DM
572
573 oldest = rcu_dereference(hash->chain);
574 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
575 fnhe = rcu_dereference(fnhe->fnhe_next)) {
576 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
577 oldest = fnhe;
578 }
c5038a83
DM
579 orig = rcu_dereference(oldest->fnhe_rth);
580 if (orig) {
581 RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
582 rt_free(orig);
583 }
4895c771
DM
584 return oldest;
585}
586
d3a25c98
DM
587static inline u32 fnhe_hashfun(__be32 daddr)
588{
589 u32 hval;
590
591 hval = (__force u32) daddr;
592 hval ^= (hval >> 11) ^ (hval >> 22);
593
594 return hval & (FNHE_HASH_SIZE - 1);
595}
596
387aa65a
TT
597static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
598{
599 rt->rt_pmtu = fnhe->fnhe_pmtu;
600 rt->dst.expires = fnhe->fnhe_expires;
601
602 if (fnhe->fnhe_gw) {
603 rt->rt_flags |= RTCF_REDIRECTED;
604 rt->rt_gateway = fnhe->fnhe_gw;
605 rt->rt_uses_gateway = 1;
606 }
607}
608
aee06da6
JA
609static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
610 u32 pmtu, unsigned long expires)
4895c771 611{
aee06da6 612 struct fnhe_hash_bucket *hash;
4895c771 613 struct fib_nh_exception *fnhe;
387aa65a
TT
614 struct rtable *rt;
615 unsigned int i;
4895c771 616 int depth;
aee06da6
JA
617 u32 hval = fnhe_hashfun(daddr);
618
c5038a83 619 spin_lock_bh(&fnhe_lock);
4895c771 620
aee06da6 621 hash = nh->nh_exceptions;
4895c771 622 if (!hash) {
aee06da6 623 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
4895c771 624 if (!hash)
aee06da6
JA
625 goto out_unlock;
626 nh->nh_exceptions = hash;
4895c771
DM
627 }
628
4895c771
DM
629 hash += hval;
630
631 depth = 0;
632 for (fnhe = rcu_dereference(hash->chain); fnhe;
633 fnhe = rcu_dereference(fnhe->fnhe_next)) {
634 if (fnhe->fnhe_daddr == daddr)
aee06da6 635 break;
4895c771
DM
636 depth++;
637 }
638
aee06da6
JA
639 if (fnhe) {
640 if (gw)
641 fnhe->fnhe_gw = gw;
642 if (pmtu) {
643 fnhe->fnhe_pmtu = pmtu;
387aa65a 644 fnhe->fnhe_expires = max(1UL, expires);
aee06da6 645 }
387aa65a
TT
646 /* Update all cached dsts too */
647 rt = rcu_dereference(fnhe->fnhe_rth);
648 if (rt)
649 fill_route_from_fnhe(rt, fnhe);
aee06da6
JA
650 } else {
651 if (depth > FNHE_RECLAIM_DEPTH)
652 fnhe = fnhe_oldest(hash);
653 else {
654 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
655 if (!fnhe)
656 goto out_unlock;
657
658 fnhe->fnhe_next = hash->chain;
659 rcu_assign_pointer(hash->chain, fnhe);
660 }
5aad1de5 661 fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
aee06da6
JA
662 fnhe->fnhe_daddr = daddr;
663 fnhe->fnhe_gw = gw;
664 fnhe->fnhe_pmtu = pmtu;
665 fnhe->fnhe_expires = expires;
387aa65a
TT
666
667 /* Exception created; mark the cached routes for the nexthop
668 * stale, so anyone caching it rechecks if this exception
669 * applies to them.
670 */
671 for_each_possible_cpu(i) {
672 struct rtable __rcu **prt;
673 prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
674 rt = rcu_dereference(*prt);
675 if (rt)
676 rt->dst.obsolete = DST_OBSOLETE_KILL;
677 }
4895c771 678 }
4895c771 679
4895c771 680 fnhe->fnhe_stamp = jiffies;
aee06da6
JA
681
682out_unlock:
c5038a83 683 spin_unlock_bh(&fnhe_lock);
aee06da6 684 return;
4895c771
DM
685}
686
ceb33206
DM
687static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
688 bool kill_route)
1da177e4 689{
e47a185b 690 __be32 new_gw = icmp_hdr(skb)->un.gateway;
94206125 691 __be32 old_gw = ip_hdr(skb)->saddr;
e47a185b 692 struct net_device *dev = skb->dev;
e47a185b 693 struct in_device *in_dev;
4895c771 694 struct fib_result res;
e47a185b 695 struct neighbour *n;
317805b8 696 struct net *net;
1da177e4 697
94206125
DM
698 switch (icmp_hdr(skb)->code & 7) {
699 case ICMP_REDIR_NET:
700 case ICMP_REDIR_NETTOS:
701 case ICMP_REDIR_HOST:
702 case ICMP_REDIR_HOSTTOS:
703 break;
704
705 default:
706 return;
707 }
708
e47a185b
DM
709 if (rt->rt_gateway != old_gw)
710 return;
711
712 in_dev = __in_dev_get_rcu(dev);
713 if (!in_dev)
714 return;
715
c346dca1 716 net = dev_net(dev);
9d4fb27d
JP
717 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
718 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
719 ipv4_is_zeronet(new_gw))
1da177e4
LT
720 goto reject_redirect;
721
722 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
723 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
724 goto reject_redirect;
725 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
726 goto reject_redirect;
727 } else {
317805b8 728 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
1da177e4
LT
729 goto reject_redirect;
730 }
731
4895c771 732 n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
e47a185b
DM
733 if (n) {
734 if (!(n->nud_state & NUD_VALID)) {
735 neigh_event_send(n, NULL);
736 } else {
4895c771
DM
737 if (fib_lookup(net, fl4, &res) == 0) {
738 struct fib_nh *nh = &FIB_RES_NH(res);
4895c771 739
aee06da6
JA
740 update_or_create_fnhe(nh, fl4->daddr, new_gw,
741 0, 0);
4895c771 742 }
ceb33206
DM
743 if (kill_route)
744 rt->dst.obsolete = DST_OBSOLETE_KILL;
e47a185b
DM
745 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
746 }
747 neigh_release(n);
748 }
749 return;
750
751reject_redirect:
752#ifdef CONFIG_IP_ROUTE_VERBOSE
99ee038d
DM
753 if (IN_DEV_LOG_MARTIANS(in_dev)) {
754 const struct iphdr *iph = (const struct iphdr *) skb->data;
755 __be32 daddr = iph->daddr;
756 __be32 saddr = iph->saddr;
757
e47a185b
DM
758 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
759 " Advised path = %pI4 -> %pI4\n",
760 &old_gw, dev->name, &new_gw,
761 &saddr, &daddr);
99ee038d 762 }
e47a185b
DM
763#endif
764 ;
765}
766
4895c771
DM
767static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
768{
769 struct rtable *rt;
770 struct flowi4 fl4;
f96ef988
MK
771 const struct iphdr *iph = (const struct iphdr *) skb->data;
772 int oif = skb->dev->ifindex;
773 u8 tos = RT_TOS(iph->tos);
774 u8 prot = iph->protocol;
775 u32 mark = skb->mark;
4895c771
DM
776
777 rt = (struct rtable *) dst;
778
f96ef988 779 __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0);
ceb33206 780 __ip_do_redirect(rt, skb, &fl4, true);
4895c771
DM
781}
782
1da177e4
LT
783static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
784{
ee6b9673 785 struct rtable *rt = (struct rtable *)dst;
1da177e4
LT
786 struct dst_entry *ret = dst;
787
788 if (rt) {
d11a4dc1 789 if (dst->obsolete > 0) {
1da177e4
LT
790 ip_rt_put(rt);
791 ret = NULL;
5943634f
DM
792 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
793 rt->dst.expires) {
89aef892 794 ip_rt_put(rt);
1da177e4
LT
795 ret = NULL;
796 }
797 }
798 return ret;
799}
800
801/*
802 * Algorithm:
803 * 1. The first ip_rt_redirect_number redirects are sent
804 * with exponential backoff, then we stop sending them at all,
805 * assuming that the host ignores our redirects.
806 * 2. If we did not see packets requiring redirects
807 * during ip_rt_redirect_silence, we assume that the host
808 * forgot redirected route and start to send redirects again.
809 *
810 * This algorithm is much cheaper and more intelligent than dumb load limiting
811 * in icmp.c.
812 *
813 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
814 * and "frag. need" (breaks PMTU discovery) in icmp.c.
815 */
816
817void ip_rt_send_redirect(struct sk_buff *skb)
818{
511c3f92 819 struct rtable *rt = skb_rtable(skb);
30038fc6 820 struct in_device *in_dev;
92d86829 821 struct inet_peer *peer;
1d861aa4 822 struct net *net;
30038fc6 823 int log_martians;
1da177e4 824
30038fc6 825 rcu_read_lock();
d8d1f30b 826 in_dev = __in_dev_get_rcu(rt->dst.dev);
30038fc6
ED
827 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
828 rcu_read_unlock();
1da177e4 829 return;
30038fc6
ED
830 }
831 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
832 rcu_read_unlock();
1da177e4 833
1d861aa4
DM
834 net = dev_net(rt->dst.dev);
835 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
92d86829 836 if (!peer) {
e81da0e1
JA
837 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
838 rt_nexthop(rt, ip_hdr(skb)->daddr));
92d86829
DM
839 return;
840 }
841
1da177e4
LT
842 /* No redirected packets during ip_rt_redirect_silence;
843 * reset the algorithm.
844 */
92d86829
DM
845 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
846 peer->rate_tokens = 0;
1da177e4
LT
847
848 /* Too many ignored redirects; do not send anything
d8d1f30b 849 * set dst.rate_last to the last seen redirected packet.
1da177e4 850 */
92d86829
DM
851 if (peer->rate_tokens >= ip_rt_redirect_number) {
852 peer->rate_last = jiffies;
1d861aa4 853 goto out_put_peer;
1da177e4
LT
854 }
855
856 /* Check for load limit; set rate_last to the latest sent
857 * redirect.
858 */
92d86829 859 if (peer->rate_tokens == 0 ||
14fb8a76 860 time_after(jiffies,
92d86829
DM
861 (peer->rate_last +
862 (ip_rt_redirect_load << peer->rate_tokens)))) {
e81da0e1
JA
863 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
864
865 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
92d86829
DM
866 peer->rate_last = jiffies;
867 ++peer->rate_tokens;
1da177e4 868#ifdef CONFIG_IP_ROUTE_VERBOSE
30038fc6 869 if (log_martians &&
e87cc472
JP
870 peer->rate_tokens == ip_rt_redirect_number)
871 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
92101b3b 872 &ip_hdr(skb)->saddr, inet_iif(skb),
e81da0e1 873 &ip_hdr(skb)->daddr, &gw);
1da177e4
LT
874#endif
875 }
1d861aa4
DM
876out_put_peer:
877 inet_putpeer(peer);
1da177e4
LT
878}
879
880static int ip_error(struct sk_buff *skb)
881{
251da413 882 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
511c3f92 883 struct rtable *rt = skb_rtable(skb);
92d86829 884 struct inet_peer *peer;
1da177e4 885 unsigned long now;
251da413 886 struct net *net;
92d86829 887 bool send;
1da177e4
LT
888 int code;
889
251da413
DM
890 net = dev_net(rt->dst.dev);
891 if (!IN_DEV_FORWARD(in_dev)) {
892 switch (rt->dst.error) {
893 case EHOSTUNREACH:
894 IP_INC_STATS_BH(net, IPSTATS_MIB_INADDRERRORS);
895 break;
896
897 case ENETUNREACH:
898 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
899 break;
900 }
901 goto out;
902 }
903
d8d1f30b 904 switch (rt->dst.error) {
4500ebf8
JP
905 case EINVAL:
906 default:
907 goto out;
908 case EHOSTUNREACH:
909 code = ICMP_HOST_UNREACH;
910 break;
911 case ENETUNREACH:
912 code = ICMP_NET_UNREACH;
251da413 913 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
4500ebf8
JP
914 break;
915 case EACCES:
916 code = ICMP_PKT_FILTERED;
917 break;
1da177e4
LT
918 }
919
1d861aa4 920 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
92d86829
DM
921
922 send = true;
923 if (peer) {
924 now = jiffies;
925 peer->rate_tokens += now - peer->rate_last;
926 if (peer->rate_tokens > ip_rt_error_burst)
927 peer->rate_tokens = ip_rt_error_burst;
928 peer->rate_last = now;
929 if (peer->rate_tokens >= ip_rt_error_cost)
930 peer->rate_tokens -= ip_rt_error_cost;
931 else
932 send = false;
1d861aa4 933 inet_putpeer(peer);
1da177e4 934 }
92d86829
DM
935 if (send)
936 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
1da177e4
LT
937
938out: kfree_skb(skb);
939 return 0;
e905a9ed 940}
1da177e4 941
d851c12b 942static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
1da177e4 943{
d851c12b 944 struct dst_entry *dst = &rt->dst;
4895c771 945 struct fib_result res;
2c8cec5c 946
fa1e492a
SK
947 if (dst_metric_locked(dst, RTAX_MTU))
948 return;
949
7f92d334
SK
950 if (dst->dev->mtu < mtu)
951 return;
952
5943634f
DM
953 if (mtu < ip_rt_min_pmtu)
954 mtu = ip_rt_min_pmtu;
2c8cec5c 955
f016229e
TT
956 if (rt->rt_pmtu == mtu &&
957 time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
958 return;
959
c5ae7d41 960 rcu_read_lock();
d851c12b 961 if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) {
4895c771 962 struct fib_nh *nh = &FIB_RES_NH(res);
4895c771 963
aee06da6
JA
964 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
965 jiffies + ip_rt_mtu_expires);
4895c771 966 }
c5ae7d41 967 rcu_read_unlock();
1da177e4
LT
968}
969
4895c771
DM
970static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
971 struct sk_buff *skb, u32 mtu)
972{
973 struct rtable *rt = (struct rtable *) dst;
974 struct flowi4 fl4;
975
976 ip_rt_build_flow_key(&fl4, sk, skb);
d851c12b 977 __ip_rt_update_pmtu(rt, &fl4, mtu);
4895c771
DM
978}
979
36393395
DM
980void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
981 int oif, u32 mark, u8 protocol, int flow_flags)
982{
4895c771 983 const struct iphdr *iph = (const struct iphdr *) skb->data;
36393395
DM
984 struct flowi4 fl4;
985 struct rtable *rt;
986
4895c771
DM
987 __build_flow_key(&fl4, NULL, iph, oif,
988 RT_TOS(iph->tos), protocol, mark, flow_flags);
36393395
DM
989 rt = __ip_route_output_key(net, &fl4);
990 if (!IS_ERR(rt)) {
4895c771 991 __ip_rt_update_pmtu(rt, &fl4, mtu);
36393395
DM
992 ip_rt_put(rt);
993 }
994}
995EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
996
9cb3a50c 997static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
36393395 998{
4895c771
DM
999 const struct iphdr *iph = (const struct iphdr *) skb->data;
1000 struct flowi4 fl4;
1001 struct rtable *rt;
36393395 1002
4895c771
DM
1003 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1004 rt = __ip_route_output_key(sock_net(sk), &fl4);
1005 if (!IS_ERR(rt)) {
1006 __ip_rt_update_pmtu(rt, &fl4, mtu);
1007 ip_rt_put(rt);
1008 }
36393395 1009}
9cb3a50c
SK
1010
1011void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1012{
1013 const struct iphdr *iph = (const struct iphdr *) skb->data;
1014 struct flowi4 fl4;
1015 struct rtable *rt;
1016 struct dst_entry *dst;
b44108db 1017 bool new = false;
9cb3a50c
SK
1018
1019 bh_lock_sock(sk);
1020 rt = (struct rtable *) __sk_dst_get(sk);
1021
1022 if (sock_owned_by_user(sk) || !rt) {
1023 __ipv4_sk_update_pmtu(skb, sk, mtu);
1024 goto out;
1025 }
1026
1027 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1028
1029 if (!__sk_dst_check(sk, 0)) {
1030 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1031 if (IS_ERR(rt))
1032 goto out;
b44108db
SK
1033
1034 new = true;
9cb3a50c
SK
1035 }
1036
1037 __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
1038
1039 dst = dst_check(&rt->dst, 0);
1040 if (!dst) {
b44108db
SK
1041 if (new)
1042 dst_release(&rt->dst);
1043
9cb3a50c
SK
1044 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1045 if (IS_ERR(rt))
1046 goto out;
1047
b44108db 1048 new = true;
9cb3a50c
SK
1049 }
1050
b44108db
SK
1051 if (new)
1052 __sk_dst_set(sk, &rt->dst);
9cb3a50c
SK
1053
1054out:
1055 bh_unlock_sock(sk);
1056}
36393395 1057EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
f39925db 1058
b42597e2
DM
1059void ipv4_redirect(struct sk_buff *skb, struct net *net,
1060 int oif, u32 mark, u8 protocol, int flow_flags)
1061{
4895c771 1062 const struct iphdr *iph = (const struct iphdr *) skb->data;
b42597e2
DM
1063 struct flowi4 fl4;
1064 struct rtable *rt;
1065
4895c771
DM
1066 __build_flow_key(&fl4, NULL, iph, oif,
1067 RT_TOS(iph->tos), protocol, mark, flow_flags);
b42597e2
DM
1068 rt = __ip_route_output_key(net, &fl4);
1069 if (!IS_ERR(rt)) {
ceb33206 1070 __ip_do_redirect(rt, skb, &fl4, false);
b42597e2
DM
1071 ip_rt_put(rt);
1072 }
1073}
1074EXPORT_SYMBOL_GPL(ipv4_redirect);
1075
1076void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1077{
4895c771
DM
1078 const struct iphdr *iph = (const struct iphdr *) skb->data;
1079 struct flowi4 fl4;
1080 struct rtable *rt;
b42597e2 1081
4895c771
DM
1082 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1083 rt = __ip_route_output_key(sock_net(sk), &fl4);
1084 if (!IS_ERR(rt)) {
ceb33206 1085 __ip_do_redirect(rt, skb, &fl4, false);
4895c771
DM
1086 ip_rt_put(rt);
1087 }
b42597e2
DM
1088}
1089EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1090
efbc368d
DM
1091static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1092{
1093 struct rtable *rt = (struct rtable *) dst;
1094
ceb33206
DM
1095 /* All IPV4 dsts are created with ->obsolete set to the value
1096 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1097 * into this function always.
1098 *
387aa65a
TT
1099 * When a PMTU/redirect information update invalidates a route,
1100 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
1101 * DST_OBSOLETE_DEAD by dst_free().
ceb33206 1102 */
387aa65a 1103 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
efbc368d 1104 return NULL;
d11a4dc1 1105 return dst;
1da177e4
LT
1106}
1107
1da177e4
LT
1108static void ipv4_link_failure(struct sk_buff *skb)
1109{
1110 struct rtable *rt;
1111
1112 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1113
511c3f92 1114 rt = skb_rtable(skb);
5943634f
DM
1115 if (rt)
1116 dst_set_expires(&rt->dst, 0);
1da177e4
LT
1117}
1118
1119static int ip_rt_bug(struct sk_buff *skb)
1120{
91df42be
JP
1121 pr_debug("%s: %pI4 -> %pI4, %s\n",
1122 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1123 skb->dev ? skb->dev->name : "?");
1da177e4 1124 kfree_skb(skb);
c378a9c0 1125 WARN_ON(1);
1da177e4
LT
1126 return 0;
1127}
1128
1129/*
1130 We do not cache source address of outgoing interface,
1131 because it is used only by IP RR, TS and SRR options,
1132 so that it out of fast path.
1133
1134 BTW remember: "addr" is allowed to be not aligned
1135 in IP options!
1136 */
1137
8e36360a 1138void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
1da177e4 1139{
a61ced5d 1140 __be32 src;
1da177e4 1141
c7537967 1142 if (rt_is_output_route(rt))
c5be24ff 1143 src = ip_hdr(skb)->saddr;
ebc0ffae 1144 else {
8e36360a
DM
1145 struct fib_result res;
1146 struct flowi4 fl4;
1147 struct iphdr *iph;
1148
1149 iph = ip_hdr(skb);
1150
1151 memset(&fl4, 0, sizeof(fl4));
1152 fl4.daddr = iph->daddr;
1153 fl4.saddr = iph->saddr;
b0fe4a31 1154 fl4.flowi4_tos = RT_TOS(iph->tos);
8e36360a
DM
1155 fl4.flowi4_oif = rt->dst.dev->ifindex;
1156 fl4.flowi4_iif = skb->dev->ifindex;
1157 fl4.flowi4_mark = skb->mark;
5e2b61f7 1158
ebc0ffae 1159 rcu_read_lock();
68a5e3dd 1160 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
436c3b66 1161 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
ebc0ffae 1162 else
f8126f1d
DM
1163 src = inet_select_addr(rt->dst.dev,
1164 rt_nexthop(rt, iph->daddr),
1165 RT_SCOPE_UNIVERSE);
ebc0ffae
ED
1166 rcu_read_unlock();
1167 }
1da177e4
LT
1168 memcpy(addr, &src, 4);
1169}
1170
c7066f70 1171#ifdef CONFIG_IP_ROUTE_CLASSID
1da177e4
LT
1172static void set_class_tag(struct rtable *rt, u32 tag)
1173{
d8d1f30b
CG
1174 if (!(rt->dst.tclassid & 0xFFFF))
1175 rt->dst.tclassid |= tag & 0xFFFF;
1176 if (!(rt->dst.tclassid & 0xFFFF0000))
1177 rt->dst.tclassid |= tag & 0xFFFF0000;
1da177e4
LT
1178}
1179#endif
1180
0dbaee3b
DM
1181static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1182{
1183 unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
1184
1185 if (advmss == 0) {
1186 advmss = max_t(unsigned int, dst->dev->mtu - 40,
1187 ip_rt_min_advmss);
1188 if (advmss > 65535 - 40)
1189 advmss = 65535 - 40;
1190 }
1191 return advmss;
1192}
1193
ebb762f2 1194static unsigned int ipv4_mtu(const struct dst_entry *dst)
d33e4553 1195{
261663b0 1196 const struct rtable *rt = (const struct rtable *) dst;
5943634f
DM
1197 unsigned int mtu = rt->rt_pmtu;
1198
98d75c37 1199 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
5943634f 1200 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 1201
38d523e2 1202 if (mtu)
618f9bc7
SK
1203 return mtu;
1204
1205 mtu = dst->dev->mtu;
d33e4553
DM
1206
1207 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
155e8336 1208 if (rt->rt_uses_gateway && mtu > 576)
d33e4553
DM
1209 mtu = 576;
1210 }
1211
1212 if (mtu > IP_MAX_MTU)
1213 mtu = IP_MAX_MTU;
1214
1215 return mtu;
1216}
1217
f2bb4bed 1218static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
4895c771
DM
1219{
1220 struct fnhe_hash_bucket *hash = nh->nh_exceptions;
1221 struct fib_nh_exception *fnhe;
1222 u32 hval;
1223
f2bb4bed
DM
1224 if (!hash)
1225 return NULL;
1226
d3a25c98 1227 hval = fnhe_hashfun(daddr);
4895c771
DM
1228
1229 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1230 fnhe = rcu_dereference(fnhe->fnhe_next)) {
f2bb4bed
DM
1231 if (fnhe->fnhe_daddr == daddr)
1232 return fnhe;
1233 }
1234 return NULL;
1235}
aee06da6 1236
caacf05e 1237static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
f2bb4bed
DM
1238 __be32 daddr)
1239{
caacf05e
DM
1240 bool ret = false;
1241
c5038a83 1242 spin_lock_bh(&fnhe_lock);
f2bb4bed 1243
c5038a83 1244 if (daddr == fnhe->fnhe_daddr) {
5aad1de5 1245 int genid = fnhe_genid(dev_net(rt->dst.dev));
13d82bf5 1246 struct rtable *orig = rcu_dereference(fnhe->fnhe_rth);
5aad1de5
TT
1247
1248 if (fnhe->fnhe_genid != genid) {
1249 fnhe->fnhe_genid = genid;
13d82bf5
SK
1250 fnhe->fnhe_gw = 0;
1251 fnhe->fnhe_pmtu = 0;
1252 fnhe->fnhe_expires = 0;
1253 }
387aa65a
TT
1254 fill_route_from_fnhe(rt, fnhe);
1255 if (!rt->rt_gateway)
155e8336 1256 rt->rt_gateway = daddr;
f2bb4bed 1257
c5038a83
DM
1258 rcu_assign_pointer(fnhe->fnhe_rth, rt);
1259 if (orig)
1260 rt_free(orig);
1261
1262 fnhe->fnhe_stamp = jiffies;
caacf05e 1263 ret = true;
c5038a83
DM
1264 }
1265 spin_unlock_bh(&fnhe_lock);
caacf05e
DM
1266
1267 return ret;
54764bb6
ED
1268}
1269
caacf05e 1270static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
f2bb4bed 1271{
d26b3a7c 1272 struct rtable *orig, *prev, **p;
caacf05e 1273 bool ret = true;
f2bb4bed 1274
d26b3a7c 1275 if (rt_is_input_route(rt)) {
54764bb6 1276 p = (struct rtable **)&nh->nh_rth_input;
d26b3a7c 1277 } else {
d26b3a7c
ED
1278 p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
1279 }
f2bb4bed
DM
1280 orig = *p;
1281
1282 prev = cmpxchg(p, orig, rt);
1283 if (prev == orig) {
f2bb4bed 1284 if (orig)
54764bb6 1285 rt_free(orig);
155e8336 1286 } else
caacf05e 1287 ret = false;
caacf05e
DM
1288
1289 return ret;
1290}
1291
1292static DEFINE_SPINLOCK(rt_uncached_lock);
1293static LIST_HEAD(rt_uncached_list);
1294
1295static void rt_add_uncached_list(struct rtable *rt)
1296{
1297 spin_lock_bh(&rt_uncached_lock);
1298 list_add_tail(&rt->rt_uncached, &rt_uncached_list);
1299 spin_unlock_bh(&rt_uncached_lock);
1300}
1301
1302static void ipv4_dst_destroy(struct dst_entry *dst)
1303{
1304 struct rtable *rt = (struct rtable *) dst;
1305
78df76a0 1306 if (!list_empty(&rt->rt_uncached)) {
caacf05e
DM
1307 spin_lock_bh(&rt_uncached_lock);
1308 list_del(&rt->rt_uncached);
1309 spin_unlock_bh(&rt_uncached_lock);
1310 }
1311}
1312
1313void rt_flush_dev(struct net_device *dev)
1314{
1315 if (!list_empty(&rt_uncached_list)) {
1316 struct net *net = dev_net(dev);
1317 struct rtable *rt;
1318
1319 spin_lock_bh(&rt_uncached_lock);
1320 list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
1321 if (rt->dst.dev != dev)
1322 continue;
1323 rt->dst.dev = net->loopback_dev;
1324 dev_hold(rt->dst.dev);
1325 dev_put(dev);
1326 }
1327 spin_unlock_bh(&rt_uncached_lock);
4895c771
DM
1328 }
1329}
1330
4331debc 1331static bool rt_cache_valid(const struct rtable *rt)
d2d68ba9 1332{
4331debc
ED
1333 return rt &&
1334 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1335 !rt_is_expired(rt);
d2d68ba9
DM
1336}
1337
f2bb4bed 1338static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
5e2b61f7 1339 const struct fib_result *res,
f2bb4bed 1340 struct fib_nh_exception *fnhe,
982721f3 1341 struct fib_info *fi, u16 type, u32 itag)
1da177e4 1342{
caacf05e
DM
1343 bool cached = false;
1344
1da177e4 1345 if (fi) {
4895c771
DM
1346 struct fib_nh *nh = &FIB_RES_NH(*res);
1347
155e8336 1348 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
4895c771 1349 rt->rt_gateway = nh->nh_gw;
155e8336
JA
1350 rt->rt_uses_gateway = 1;
1351 }
2860583f 1352 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
c7066f70 1353#ifdef CONFIG_IP_ROUTE_CLASSID
f2bb4bed 1354 rt->dst.tclassid = nh->nh_tclassid;
1da177e4 1355#endif
c5038a83 1356 if (unlikely(fnhe))
caacf05e 1357 cached = rt_bind_exception(rt, fnhe, daddr);
c5038a83 1358 else if (!(rt->dst.flags & DST_NOCACHE))
caacf05e 1359 cached = rt_cache_route(nh, rt);
155e8336
JA
1360 if (unlikely(!cached)) {
1361 /* Routes we intend to cache in nexthop exception or
1362 * FIB nexthop have the DST_NOCACHE bit clear.
1363 * However, if we are unsuccessful at storing this
1364 * route into the cache we really need to set it.
1365 */
1366 rt->dst.flags |= DST_NOCACHE;
1367 if (!rt->rt_gateway)
1368 rt->rt_gateway = daddr;
1369 rt_add_uncached_list(rt);
1370 }
1371 } else
caacf05e 1372 rt_add_uncached_list(rt);
defb3519 1373
c7066f70 1374#ifdef CONFIG_IP_ROUTE_CLASSID
1da177e4 1375#ifdef CONFIG_IP_MULTIPLE_TABLES
85b91b03 1376 set_class_tag(rt, res->tclassid);
1da177e4
LT
1377#endif
1378 set_class_tag(rt, itag);
1379#endif
1da177e4
LT
1380}
1381
5c1e6aa3 1382static struct rtable *rt_dst_alloc(struct net_device *dev,
f2bb4bed 1383 bool nopolicy, bool noxfrm, bool will_cache)
0c4dcd58 1384{
f5b0a874 1385 return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
c6cffba4 1386 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
5c1e6aa3
DM
1387 (nopolicy ? DST_NOPOLICY : 0) |
1388 (noxfrm ? DST_NOXFRM : 0));
0c4dcd58
DM
1389}
1390
96d36220 1391/* called in rcu_read_lock() section */
9e12bb22 1392static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1da177e4
LT
1393 u8 tos, struct net_device *dev, int our)
1394{
1da177e4 1395 struct rtable *rth;
96d36220 1396 struct in_device *in_dev = __in_dev_get_rcu(dev);
1da177e4 1397 u32 itag = 0;
b5f7e755 1398 int err;
1da177e4
LT
1399
1400 /* Primary sanity checks. */
1401
1402 if (in_dev == NULL)
1403 return -EINVAL;
1404
1e637c74 1405 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
d0daebc3 1406 skb->protocol != htons(ETH_P_IP))
1da177e4
LT
1407 goto e_inval;
1408
d0daebc3
TG
1409 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1410 if (ipv4_is_loopback(saddr))
1411 goto e_inval;
1412
f97c1e0c
JP
1413 if (ipv4_is_zeronet(saddr)) {
1414 if (!ipv4_is_local_multicast(daddr))
1da177e4 1415 goto e_inval;
b5f7e755 1416 } else {
9e56e380
DM
1417 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1418 in_dev, &itag);
b5f7e755
ED
1419 if (err < 0)
1420 goto e_err;
1421 }
4e7b2f14 1422 rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
f2bb4bed 1423 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
1da177e4
LT
1424 if (!rth)
1425 goto e_nobufs;
1426
cf911662
DM
1427#ifdef CONFIG_IP_ROUTE_CLASSID
1428 rth->dst.tclassid = itag;
1429#endif
d8d1f30b 1430 rth->dst.output = ip_rt_bug;
1da177e4 1431
cf911662
DM
1432 rth->rt_genid = rt_genid(dev_net(dev));
1433 rth->rt_flags = RTCF_MULTICAST;
1434 rth->rt_type = RTN_MULTICAST;
9917e1e8 1435 rth->rt_is_input= 1;
13378cad 1436 rth->rt_iif = 0;
5943634f 1437 rth->rt_pmtu = 0;
f8126f1d 1438 rth->rt_gateway = 0;
155e8336 1439 rth->rt_uses_gateway = 0;
caacf05e 1440 INIT_LIST_HEAD(&rth->rt_uncached);
1da177e4 1441 if (our) {
d8d1f30b 1442 rth->dst.input= ip_local_deliver;
1da177e4
LT
1443 rth->rt_flags |= RTCF_LOCAL;
1444 }
1445
1446#ifdef CONFIG_IP_MROUTE
f97c1e0c 1447 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
d8d1f30b 1448 rth->dst.input = ip_mr_input;
1da177e4
LT
1449#endif
1450 RT_CACHE_STAT_INC(in_slow_mc);
1451
89aef892
DM
1452 skb_dst_set(skb, &rth->dst);
1453 return 0;
1da177e4
LT
1454
1455e_nobufs:
1da177e4 1456 return -ENOBUFS;
1da177e4 1457e_inval:
96d36220 1458 return -EINVAL;
b5f7e755 1459e_err:
b5f7e755 1460 return err;
1da177e4
LT
1461}
1462
1463
1464static void ip_handle_martian_source(struct net_device *dev,
1465 struct in_device *in_dev,
1466 struct sk_buff *skb,
9e12bb22
AV
1467 __be32 daddr,
1468 __be32 saddr)
1da177e4
LT
1469{
1470 RT_CACHE_STAT_INC(in_martian_src);
1471#ifdef CONFIG_IP_ROUTE_VERBOSE
1472 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1473 /*
1474 * RFC1812 recommendation, if source is martian,
1475 * the only hint is MAC header.
1476 */
058bd4d2 1477 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
673d57e7 1478 &daddr, &saddr, dev->name);
98e399f8 1479 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
058bd4d2
JP
1480 print_hex_dump(KERN_WARNING, "ll header: ",
1481 DUMP_PREFIX_OFFSET, 16, 1,
1482 skb_mac_header(skb),
1483 dev->hard_header_len, true);
1da177e4
LT
1484 }
1485 }
1486#endif
1487}
1488
47360228 1489/* called in rcu_read_lock() section */
5969f71d 1490static int __mkroute_input(struct sk_buff *skb,
982721f3 1491 const struct fib_result *res,
5969f71d 1492 struct in_device *in_dev,
c6cffba4 1493 __be32 daddr, __be32 saddr, u32 tos)
1da177e4 1494{
1da177e4
LT
1495 struct rtable *rth;
1496 int err;
1497 struct in_device *out_dev;
47360228 1498 unsigned int flags = 0;
d2d68ba9 1499 bool do_cache;
d9c9df8c 1500 u32 itag;
1da177e4
LT
1501
1502 /* get a working reference to the output device */
47360228 1503 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
1da177e4 1504 if (out_dev == NULL) {
e87cc472 1505 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
1da177e4
LT
1506 return -EINVAL;
1507 }
1508
5c04c819 1509 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
9e56e380 1510 in_dev->dev, in_dev, &itag);
1da177e4 1511 if (err < 0) {
e905a9ed 1512 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
1da177e4 1513 saddr);
e905a9ed 1514
1da177e4
LT
1515 goto cleanup;
1516 }
1517
e81da0e1
JA
1518 do_cache = res->fi && !itag;
1519 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
1da177e4 1520 (IN_DEV_SHARED_MEDIA(out_dev) ||
e81da0e1 1521 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) {
1da177e4 1522 flags |= RTCF_DOREDIRECT;
e81da0e1
JA
1523 do_cache = false;
1524 }
1da177e4
LT
1525
1526 if (skb->protocol != htons(ETH_P_IP)) {
1527 /* Not IP (i.e. ARP). Do not create route, if it is
1528 * invalid for proxy arp. DNAT routes are always valid.
65324144
JDB
1529 *
1530 * Proxy arp feature have been extended to allow, ARP
1531 * replies back to the same interface, to support
1532 * Private VLAN switch technologies. See arp.c.
1da177e4 1533 */
65324144
JDB
1534 if (out_dev == in_dev &&
1535 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
1da177e4
LT
1536 err = -EINVAL;
1537 goto cleanup;
1538 }
1539 }
1540
e81da0e1
JA
1541 if (do_cache) {
1542 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1543 if (rt_cache_valid(rth)) {
1544 skb_dst_set_noref(skb, &rth->dst);
1545 goto out;
d2d68ba9
DM
1546 }
1547 }
f2bb4bed 1548
5c1e6aa3
DM
1549 rth = rt_dst_alloc(out_dev->dev,
1550 IN_DEV_CONF_GET(in_dev, NOPOLICY),
d2d68ba9 1551 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
1da177e4
LT
1552 if (!rth) {
1553 err = -ENOBUFS;
1554 goto cleanup;
1555 }
1556
cf911662
DM
1557 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
1558 rth->rt_flags = flags;
1559 rth->rt_type = res->type;
9917e1e8 1560 rth->rt_is_input = 1;
13378cad 1561 rth->rt_iif = 0;
5943634f 1562 rth->rt_pmtu = 0;
f8126f1d 1563 rth->rt_gateway = 0;
155e8336 1564 rth->rt_uses_gateway = 0;
caacf05e 1565 INIT_LIST_HEAD(&rth->rt_uncached);
1da177e4 1566
d8d1f30b
CG
1567 rth->dst.input = ip_forward;
1568 rth->dst.output = ip_output;
1da177e4 1569
d2d68ba9 1570 rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag);
c6cffba4 1571 skb_dst_set(skb, &rth->dst);
d2d68ba9 1572out:
1da177e4
LT
1573 err = 0;
1574 cleanup:
1da177e4 1575 return err;
e905a9ed 1576}
1da177e4 1577
5969f71d
SH
1578static int ip_mkroute_input(struct sk_buff *skb,
1579 struct fib_result *res,
68a5e3dd 1580 const struct flowi4 *fl4,
5969f71d
SH
1581 struct in_device *in_dev,
1582 __be32 daddr, __be32 saddr, u32 tos)
1da177e4 1583{
1da177e4 1584#ifdef CONFIG_IP_ROUTE_MULTIPATH
ff3fccb3 1585 if (res->fi && res->fi->fib_nhs > 1)
1b7fe593 1586 fib_select_multipath(res);
1da177e4
LT
1587#endif
1588
1589 /* create a routing cache entry */
c6cffba4 1590 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
1da177e4
LT
1591}
1592
1da177e4
LT
1593/*
1594 * NOTE. We drop all the packets that has local source
1595 * addresses, because every properly looped back packet
1596 * must have correct destination already attached by output routine.
1597 *
1598 * Such approach solves two big problems:
1599 * 1. Not simplex devices are handled properly.
1600 * 2. IP spoofing attempts are filtered with 100% of guarantee.
ebc0ffae 1601 * called with rcu_read_lock()
1da177e4
LT
1602 */
1603
9e12bb22 1604static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
c10237e0 1605 u8 tos, struct net_device *dev)
1da177e4
LT
1606{
1607 struct fib_result res;
96d36220 1608 struct in_device *in_dev = __in_dev_get_rcu(dev);
68a5e3dd 1609 struct flowi4 fl4;
95c96174 1610 unsigned int flags = 0;
1da177e4 1611 u32 itag = 0;
95c96174 1612 struct rtable *rth;
1da177e4 1613 int err = -EINVAL;
5e73ea1a 1614 struct net *net = dev_net(dev);
d2d68ba9 1615 bool do_cache;
1da177e4
LT
1616
1617 /* IP on this device is disabled. */
1618
1619 if (!in_dev)
1620 goto out;
1621
1622 /* Check for the most weird martians, which can be not detected
1623 by fib_lookup.
1624 */
1625
d0daebc3 1626 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
1da177e4
LT
1627 goto martian_source;
1628
d2d68ba9 1629 res.fi = NULL;
27a954bd 1630 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
1da177e4
LT
1631 goto brd_input;
1632
1633 /* Accept zero addresses only to limited broadcast;
1634 * I even do not know to fix it or not. Waiting for complains :-)
1635 */
f97c1e0c 1636 if (ipv4_is_zeronet(saddr))
1da177e4
LT
1637 goto martian_source;
1638
d0daebc3 1639 if (ipv4_is_zeronet(daddr))
1da177e4
LT
1640 goto martian_destination;
1641
9eb43e76
ED
1642 /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
1643 * and call it once if daddr or/and saddr are loopback addresses
1644 */
1645 if (ipv4_is_loopback(daddr)) {
1646 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
d0daebc3 1647 goto martian_destination;
9eb43e76
ED
1648 } else if (ipv4_is_loopback(saddr)) {
1649 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
d0daebc3
TG
1650 goto martian_source;
1651 }
1652
1da177e4
LT
1653 /*
1654 * Now we are ready to route packet.
1655 */
68a5e3dd
DM
1656 fl4.flowi4_oif = 0;
1657 fl4.flowi4_iif = dev->ifindex;
1658 fl4.flowi4_mark = skb->mark;
1659 fl4.flowi4_tos = tos;
1660 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
1661 fl4.daddr = daddr;
1662 fl4.saddr = saddr;
1663 err = fib_lookup(net, &fl4, &res);
251da413 1664 if (err != 0)
1da177e4 1665 goto no_route;
1da177e4
LT
1666
1667 RT_CACHE_STAT_INC(in_slow_tot);
1668
1669 if (res.type == RTN_BROADCAST)
1670 goto brd_input;
1671
1672 if (res.type == RTN_LOCAL) {
5c04c819 1673 err = fib_validate_source(skb, saddr, daddr, tos,
1fb9489b 1674 LOOPBACK_IFINDEX,
9e56e380 1675 dev, in_dev, &itag);
b5f7e755
ED
1676 if (err < 0)
1677 goto martian_source_keep_err;
1da177e4
LT
1678 goto local_input;
1679 }
1680
1681 if (!IN_DEV_FORWARD(in_dev))
251da413 1682 goto no_route;
1da177e4
LT
1683 if (res.type != RTN_UNICAST)
1684 goto martian_destination;
1685
68a5e3dd 1686 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
1da177e4
LT
1687out: return err;
1688
1689brd_input:
1690 if (skb->protocol != htons(ETH_P_IP))
1691 goto e_inval;
1692
41347dcd 1693 if (!ipv4_is_zeronet(saddr)) {
9e56e380
DM
1694 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1695 in_dev, &itag);
1da177e4 1696 if (err < 0)
b5f7e755 1697 goto martian_source_keep_err;
1da177e4
LT
1698 }
1699 flags |= RTCF_BROADCAST;
1700 res.type = RTN_BROADCAST;
1701 RT_CACHE_STAT_INC(in_brd);
1702
1703local_input:
d2d68ba9
DM
1704 do_cache = false;
1705 if (res.fi) {
fe3edf45 1706 if (!itag) {
54764bb6 1707 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
d2d68ba9 1708 if (rt_cache_valid(rth)) {
c6cffba4
DM
1709 skb_dst_set_noref(skb, &rth->dst);
1710 err = 0;
1711 goto out;
d2d68ba9
DM
1712 }
1713 do_cache = true;
1714 }
1715 }
1716
5c1e6aa3 1717 rth = rt_dst_alloc(net->loopback_dev,
d2d68ba9 1718 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
1da177e4
LT
1719 if (!rth)
1720 goto e_nobufs;
1721
cf911662 1722 rth->dst.input= ip_local_deliver;
d8d1f30b 1723 rth->dst.output= ip_rt_bug;
cf911662
DM
1724#ifdef CONFIG_IP_ROUTE_CLASSID
1725 rth->dst.tclassid = itag;
1726#endif
1da177e4 1727
cf911662
DM
1728 rth->rt_genid = rt_genid(net);
1729 rth->rt_flags = flags|RTCF_LOCAL;
1730 rth->rt_type = res.type;
9917e1e8 1731 rth->rt_is_input = 1;
13378cad 1732 rth->rt_iif = 0;
5943634f 1733 rth->rt_pmtu = 0;
f8126f1d 1734 rth->rt_gateway = 0;
155e8336 1735 rth->rt_uses_gateway = 0;
caacf05e 1736 INIT_LIST_HEAD(&rth->rt_uncached);
1da177e4 1737 if (res.type == RTN_UNREACHABLE) {
d8d1f30b
CG
1738 rth->dst.input= ip_error;
1739 rth->dst.error= -err;
1da177e4
LT
1740 rth->rt_flags &= ~RTCF_LOCAL;
1741 }
d2d68ba9
DM
1742 if (do_cache)
1743 rt_cache_route(&FIB_RES_NH(res), rth);
89aef892 1744 skb_dst_set(skb, &rth->dst);
b23dd4fe 1745 err = 0;
ebc0ffae 1746 goto out;
1da177e4
LT
1747
1748no_route:
1749 RT_CACHE_STAT_INC(in_no_route);
1da177e4 1750 res.type = RTN_UNREACHABLE;
7f53878d
MC
1751 if (err == -ESRCH)
1752 err = -ENETUNREACH;
1da177e4
LT
1753 goto local_input;
1754
1755 /*
1756 * Do not cache martian addresses: they should be logged (RFC1812)
1757 */
1758martian_destination:
1759 RT_CACHE_STAT_INC(in_martian_dst);
1760#ifdef CONFIG_IP_ROUTE_VERBOSE
e87cc472
JP
1761 if (IN_DEV_LOG_MARTIANS(in_dev))
1762 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
1763 &daddr, &saddr, dev->name);
1da177e4 1764#endif
2c2910a4 1765
1da177e4
LT
1766e_inval:
1767 err = -EINVAL;
ebc0ffae 1768 goto out;
1da177e4
LT
1769
1770e_nobufs:
1771 err = -ENOBUFS;
ebc0ffae 1772 goto out;
1da177e4
LT
1773
1774martian_source:
b5f7e755
ED
1775 err = -EINVAL;
1776martian_source_keep_err:
1da177e4 1777 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
ebc0ffae 1778 goto out;
1da177e4
LT
1779}
1780
c6cffba4
DM
1781int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1782 u8 tos, struct net_device *dev)
1da177e4 1783{
96d36220 1784 int res;
1da177e4 1785
96d36220
ED
1786 rcu_read_lock();
1787
1da177e4
LT
1788 /* Multicast recognition logic is moved from route cache to here.
1789 The problem was that too many Ethernet cards have broken/missing
1790 hardware multicast filters :-( As result the host on multicasting
1791 network acquires a lot of useless route cache entries, sort of
1792 SDR messages from all the world. Now we try to get rid of them.
1793 Really, provided software IP multicast filter is organized
1794 reasonably (at least, hashed), it does not result in a slowdown
1795 comparing with route cache reject entries.
1796 Note, that multicast routers are not affected, because
1797 route cache entry is created eventually.
1798 */
f97c1e0c 1799 if (ipv4_is_multicast(daddr)) {
96d36220 1800 struct in_device *in_dev = __in_dev_get_rcu(dev);
1da177e4 1801
96d36220 1802 if (in_dev) {
dbdd9a52
DM
1803 int our = ip_check_mc_rcu(in_dev, daddr, saddr,
1804 ip_hdr(skb)->protocol);
1da177e4
LT
1805 if (our
1806#ifdef CONFIG_IP_MROUTE
9d4fb27d
JP
1807 ||
1808 (!ipv4_is_local_multicast(daddr) &&
1809 IN_DEV_MFORWARD(in_dev))
1da177e4 1810#endif
9d4fb27d 1811 ) {
96d36220
ED
1812 int res = ip_route_input_mc(skb, daddr, saddr,
1813 tos, dev, our);
1da177e4 1814 rcu_read_unlock();
96d36220 1815 return res;
1da177e4
LT
1816 }
1817 }
1818 rcu_read_unlock();
1819 return -EINVAL;
1820 }
c10237e0 1821 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
96d36220
ED
1822 rcu_read_unlock();
1823 return res;
1da177e4 1824}
c6cffba4 1825EXPORT_SYMBOL(ip_route_input_noref);
1da177e4 1826
ebc0ffae 1827/* called with rcu_read_lock() */
982721f3 1828static struct rtable *__mkroute_output(const struct fib_result *res,
1a00fee4 1829 const struct flowi4 *fl4, int orig_oif,
f61759e6 1830 struct net_device *dev_out,
5ada5527 1831 unsigned int flags)
1da177e4 1832{
982721f3 1833 struct fib_info *fi = res->fi;
f2bb4bed 1834 struct fib_nh_exception *fnhe;
5ada5527 1835 struct in_device *in_dev;
982721f3 1836 u16 type = res->type;
5ada5527 1837 struct rtable *rth;
c92b9655 1838 bool do_cache;
1da177e4 1839
d0daebc3
TG
1840 in_dev = __in_dev_get_rcu(dev_out);
1841 if (!in_dev)
5ada5527 1842 return ERR_PTR(-EINVAL);
1da177e4 1843
d0daebc3
TG
1844 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1845 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
1846 return ERR_PTR(-EINVAL);
1847
68a5e3dd 1848 if (ipv4_is_lbcast(fl4->daddr))
982721f3 1849 type = RTN_BROADCAST;
68a5e3dd 1850 else if (ipv4_is_multicast(fl4->daddr))
982721f3 1851 type = RTN_MULTICAST;
68a5e3dd 1852 else if (ipv4_is_zeronet(fl4->daddr))
5ada5527 1853 return ERR_PTR(-EINVAL);
1da177e4
LT
1854
1855 if (dev_out->flags & IFF_LOOPBACK)
1856 flags |= RTCF_LOCAL;
1857
63617421 1858 do_cache = true;
982721f3 1859 if (type == RTN_BROADCAST) {
1da177e4 1860 flags |= RTCF_BROADCAST | RTCF_LOCAL;
982721f3
DM
1861 fi = NULL;
1862 } else if (type == RTN_MULTICAST) {
dd28d1a0 1863 flags |= RTCF_MULTICAST | RTCF_LOCAL;
813b3b5d
DM
1864 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
1865 fl4->flowi4_proto))
1da177e4 1866 flags &= ~RTCF_LOCAL;
63617421
JA
1867 else
1868 do_cache = false;
1da177e4 1869 /* If multicast route do not exist use
dd28d1a0
ED
1870 * default one, but do not gateway in this case.
1871 * Yes, it is hack.
1da177e4 1872 */
982721f3
DM
1873 if (fi && res->prefixlen < 4)
1874 fi = NULL;
1da177e4
LT
1875 }
1876
f2bb4bed 1877 fnhe = NULL;
63617421
JA
1878 do_cache &= fi != NULL;
1879 if (do_cache) {
c5038a83 1880 struct rtable __rcu **prth;
c92b9655 1881 struct fib_nh *nh = &FIB_RES_NH(*res);
d26b3a7c 1882
c92b9655 1883 fnhe = find_exception(nh, fl4->daddr);
c5038a83
DM
1884 if (fnhe)
1885 prth = &fnhe->fnhe_rth;
c92b9655
JA
1886 else {
1887 if (unlikely(fl4->flowi4_flags &
1888 FLOWI_FLAG_KNOWN_NH &&
1889 !(nh->nh_gw &&
1890 nh->nh_scope == RT_SCOPE_LINK))) {
1891 do_cache = false;
1892 goto add;
1893 }
1894 prth = __this_cpu_ptr(nh->nh_pcpu_rth_output);
1895 }
c5038a83
DM
1896 rth = rcu_dereference(*prth);
1897 if (rt_cache_valid(rth)) {
1898 dst_hold(&rth->dst);
1899 return rth;
f2bb4bed
DM
1900 }
1901 }
c92b9655
JA
1902
1903add:
5c1e6aa3
DM
1904 rth = rt_dst_alloc(dev_out,
1905 IN_DEV_CONF_GET(in_dev, NOPOLICY),
f2bb4bed 1906 IN_DEV_CONF_GET(in_dev, NOXFRM),
c92b9655 1907 do_cache);
8391d07b 1908 if (!rth)
5ada5527 1909 return ERR_PTR(-ENOBUFS);
8391d07b 1910
cf911662
DM
1911 rth->dst.output = ip_output;
1912
cf911662
DM
1913 rth->rt_genid = rt_genid(dev_net(dev_out));
1914 rth->rt_flags = flags;
1915 rth->rt_type = type;
9917e1e8 1916 rth->rt_is_input = 0;
13378cad 1917 rth->rt_iif = orig_oif ? : 0;
5943634f 1918 rth->rt_pmtu = 0;
f8126f1d 1919 rth->rt_gateway = 0;
155e8336 1920 rth->rt_uses_gateway = 0;
caacf05e 1921 INIT_LIST_HEAD(&rth->rt_uncached);
1da177e4
LT
1922
1923 RT_CACHE_STAT_INC(out_slow_tot);
1924
41347dcd 1925 if (flags & RTCF_LOCAL)
d8d1f30b 1926 rth->dst.input = ip_local_deliver;
1da177e4 1927 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
e905a9ed 1928 if (flags & RTCF_LOCAL &&
1da177e4 1929 !(dev_out->flags & IFF_LOOPBACK)) {
d8d1f30b 1930 rth->dst.output = ip_mc_output;
1da177e4
LT
1931 RT_CACHE_STAT_INC(out_slow_mc);
1932 }
1933#ifdef CONFIG_IP_MROUTE
982721f3 1934 if (type == RTN_MULTICAST) {
1da177e4 1935 if (IN_DEV_MFORWARD(in_dev) &&
813b3b5d 1936 !ipv4_is_local_multicast(fl4->daddr)) {
d8d1f30b
CG
1937 rth->dst.input = ip_mr_input;
1938 rth->dst.output = ip_mc_output;
1da177e4
LT
1939 }
1940 }
1941#endif
1942 }
1943
f2bb4bed 1944 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
1da177e4 1945
5ada5527 1946 return rth;
1da177e4
LT
1947}
1948
1da177e4
LT
1949/*
1950 * Major route resolver routine.
1951 */
1952
89aef892 1953struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
1da177e4 1954{
1da177e4 1955 struct net_device *dev_out = NULL;
f61759e6 1956 __u8 tos = RT_FL_TOS(fl4);
813b3b5d
DM
1957 unsigned int flags = 0;
1958 struct fib_result res;
5ada5527 1959 struct rtable *rth;
813b3b5d 1960 int orig_oif;
1da177e4 1961
85b91b03 1962 res.tclassid = 0;
1da177e4 1963 res.fi = NULL;
8b96d22d 1964 res.table = NULL;
1da177e4 1965
813b3b5d
DM
1966 orig_oif = fl4->flowi4_oif;
1967
1fb9489b 1968 fl4->flowi4_iif = LOOPBACK_IFINDEX;
813b3b5d
DM
1969 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
1970 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
1971 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
44713b67 1972
010c2708 1973 rcu_read_lock();
813b3b5d 1974 if (fl4->saddr) {
b23dd4fe 1975 rth = ERR_PTR(-EINVAL);
813b3b5d
DM
1976 if (ipv4_is_multicast(fl4->saddr) ||
1977 ipv4_is_lbcast(fl4->saddr) ||
1978 ipv4_is_zeronet(fl4->saddr))
1da177e4
LT
1979 goto out;
1980
1da177e4
LT
1981 /* I removed check for oif == dev_out->oif here.
1982 It was wrong for two reasons:
1ab35276
DL
1983 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
1984 is assigned to multiple interfaces.
1da177e4
LT
1985 2. Moreover, we are allowed to send packets with saddr
1986 of another iface. --ANK
1987 */
1988
813b3b5d
DM
1989 if (fl4->flowi4_oif == 0 &&
1990 (ipv4_is_multicast(fl4->daddr) ||
1991 ipv4_is_lbcast(fl4->daddr))) {
a210d01a 1992 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
813b3b5d 1993 dev_out = __ip_dev_find(net, fl4->saddr, false);
a210d01a
JA
1994 if (dev_out == NULL)
1995 goto out;
1996
1da177e4
LT
1997 /* Special hack: user can direct multicasts
1998 and limited broadcast via necessary interface
1999 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
2000 This hack is not just for fun, it allows
2001 vic,vat and friends to work.
2002 They bind socket to loopback, set ttl to zero
2003 and expect that it will work.
2004 From the viewpoint of routing cache they are broken,
2005 because we are not allowed to build multicast path
2006 with loopback source addr (look, routing cache
2007 cannot know, that ttl is zero, so that packet
2008 will not leave this host and route is valid).
2009 Luckily, this hack is good workaround.
2010 */
2011
813b3b5d 2012 fl4->flowi4_oif = dev_out->ifindex;
1da177e4
LT
2013 goto make_route;
2014 }
a210d01a 2015
813b3b5d 2016 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
a210d01a 2017 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
813b3b5d 2018 if (!__ip_dev_find(net, fl4->saddr, false))
a210d01a 2019 goto out;
a210d01a 2020 }
1da177e4
LT
2021 }
2022
2023
813b3b5d
DM
2024 if (fl4->flowi4_oif) {
2025 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
b23dd4fe 2026 rth = ERR_PTR(-ENODEV);
1da177e4
LT
2027 if (dev_out == NULL)
2028 goto out;
e5ed6399
HX
2029
2030 /* RACE: Check return value of inet_select_addr instead. */
fc75fc83 2031 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
b23dd4fe 2032 rth = ERR_PTR(-ENETUNREACH);
fc75fc83
ED
2033 goto out;
2034 }
813b3b5d
DM
2035 if (ipv4_is_local_multicast(fl4->daddr) ||
2036 ipv4_is_lbcast(fl4->daddr)) {
2037 if (!fl4->saddr)
2038 fl4->saddr = inet_select_addr(dev_out, 0,
2039 RT_SCOPE_LINK);
1da177e4
LT
2040 goto make_route;
2041 }
813b3b5d
DM
2042 if (fl4->saddr) {
2043 if (ipv4_is_multicast(fl4->daddr))
2044 fl4->saddr = inet_select_addr(dev_out, 0,
2045 fl4->flowi4_scope);
2046 else if (!fl4->daddr)
2047 fl4->saddr = inet_select_addr(dev_out, 0,
2048 RT_SCOPE_HOST);
1da177e4
LT
2049 }
2050 }
2051
813b3b5d
DM
2052 if (!fl4->daddr) {
2053 fl4->daddr = fl4->saddr;
2054 if (!fl4->daddr)
2055 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
b40afd0e 2056 dev_out = net->loopback_dev;
1fb9489b 2057 fl4->flowi4_oif = LOOPBACK_IFINDEX;
1da177e4
LT
2058 res.type = RTN_LOCAL;
2059 flags |= RTCF_LOCAL;
2060 goto make_route;
2061 }
2062
813b3b5d 2063 if (fib_lookup(net, fl4, &res)) {
1da177e4 2064 res.fi = NULL;
8b96d22d 2065 res.table = NULL;
813b3b5d 2066 if (fl4->flowi4_oif) {
1da177e4
LT
2067 /* Apparently, routing tables are wrong. Assume,
2068 that the destination is on link.
2069
2070 WHY? DW.
2071 Because we are allowed to send to iface
2072 even if it has NO routes and NO assigned
2073 addresses. When oif is specified, routing
2074 tables are looked up with only one purpose:
2075 to catch if destination is gatewayed, rather than
2076 direct. Moreover, if MSG_DONTROUTE is set,
2077 we send packet, ignoring both routing tables
2078 and ifaddr state. --ANK
2079
2080
2081 We could make it even if oif is unknown,
2082 likely IPv6, but we do not.
2083 */
2084
813b3b5d
DM
2085 if (fl4->saddr == 0)
2086 fl4->saddr = inet_select_addr(dev_out, 0,
2087 RT_SCOPE_LINK);
1da177e4
LT
2088 res.type = RTN_UNICAST;
2089 goto make_route;
2090 }
b23dd4fe 2091 rth = ERR_PTR(-ENETUNREACH);
1da177e4
LT
2092 goto out;
2093 }
1da177e4
LT
2094
2095 if (res.type == RTN_LOCAL) {
813b3b5d 2096 if (!fl4->saddr) {
9fc3bbb4 2097 if (res.fi->fib_prefsrc)
813b3b5d 2098 fl4->saddr = res.fi->fib_prefsrc;
9fc3bbb4 2099 else
813b3b5d 2100 fl4->saddr = fl4->daddr;
9fc3bbb4 2101 }
b40afd0e 2102 dev_out = net->loopback_dev;
813b3b5d 2103 fl4->flowi4_oif = dev_out->ifindex;
1da177e4
LT
2104 flags |= RTCF_LOCAL;
2105 goto make_route;
2106 }
2107
2108#ifdef CONFIG_IP_ROUTE_MULTIPATH
813b3b5d 2109 if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
1b7fe593 2110 fib_select_multipath(&res);
1da177e4
LT
2111 else
2112#endif
21d8c49e
DM
2113 if (!res.prefixlen &&
2114 res.table->tb_num_default > 1 &&
813b3b5d 2115 res.type == RTN_UNICAST && !fl4->flowi4_oif)
0c838ff1 2116 fib_select_default(&res);
1da177e4 2117
813b3b5d
DM
2118 if (!fl4->saddr)
2119 fl4->saddr = FIB_RES_PREFSRC(net, res);
1da177e4 2120
1da177e4 2121 dev_out = FIB_RES_DEV(res);
813b3b5d 2122 fl4->flowi4_oif = dev_out->ifindex;
1da177e4
LT
2123
2124
2125make_route:
1a00fee4 2126 rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
1da177e4 2127
010c2708
DM
2128out:
2129 rcu_read_unlock();
b23dd4fe 2130 return rth;
1da177e4 2131}
d8c97a94
ACM
2132EXPORT_SYMBOL_GPL(__ip_route_output_key);
2133
ae2688d5
JW
2134static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2135{
2136 return NULL;
2137}
2138
ebb762f2 2139static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 2140{
618f9bc7
SK
2141 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2142
2143 return mtu ? : dst->dev->mtu;
ec831ea7
RD
2144}
2145
6700c270
DM
2146static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2147 struct sk_buff *skb, u32 mtu)
14e50e57
DM
2148{
2149}
2150
6700c270
DM
2151static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2152 struct sk_buff *skb)
b587ee3b
DM
2153{
2154}
2155
0972ddb2
HB
2156static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2157 unsigned long old)
2158{
2159 return NULL;
2160}
2161
14e50e57
DM
2162static struct dst_ops ipv4_dst_blackhole_ops = {
2163 .family = AF_INET,
09640e63 2164 .protocol = cpu_to_be16(ETH_P_IP),
ae2688d5 2165 .check = ipv4_blackhole_dst_check,
ebb762f2 2166 .mtu = ipv4_blackhole_mtu,
214f45c9 2167 .default_advmss = ipv4_default_advmss,
14e50e57 2168 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
b587ee3b 2169 .redirect = ipv4_rt_blackhole_redirect,
0972ddb2 2170 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
d3aaeb38 2171 .neigh_lookup = ipv4_neigh_lookup,
14e50e57
DM
2172};
2173
2774c131 2174struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2175{
2774c131 2176 struct rtable *ort = (struct rtable *) dst_orig;
f5b0a874 2177 struct rtable *rt;
14e50e57 2178
f5b0a874 2179 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
14e50e57 2180 if (rt) {
d8d1f30b 2181 struct dst_entry *new = &rt->dst;
14e50e57 2182
14e50e57 2183 new->__use = 1;
352e512c
HX
2184 new->input = dst_discard;
2185 new->output = dst_discard;
14e50e57 2186
d8d1f30b 2187 new->dev = ort->dst.dev;
14e50e57
DM
2188 if (new->dev)
2189 dev_hold(new->dev);
2190
9917e1e8 2191 rt->rt_is_input = ort->rt_is_input;
5e2b61f7 2192 rt->rt_iif = ort->rt_iif;
5943634f 2193 rt->rt_pmtu = ort->rt_pmtu;
14e50e57 2194
e84f84f2 2195 rt->rt_genid = rt_genid(net);
14e50e57
DM
2196 rt->rt_flags = ort->rt_flags;
2197 rt->rt_type = ort->rt_type;
14e50e57 2198 rt->rt_gateway = ort->rt_gateway;
155e8336 2199 rt->rt_uses_gateway = ort->rt_uses_gateway;
14e50e57 2200
caacf05e
DM
2201 INIT_LIST_HEAD(&rt->rt_uncached);
2202
14e50e57
DM
2203 dst_free(new);
2204 }
2205
2774c131
DM
2206 dst_release(dst_orig);
2207
2208 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
14e50e57
DM
2209}
2210
9d6ec938 2211struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
b23dd4fe 2212 struct sock *sk)
1da177e4 2213{
9d6ec938 2214 struct rtable *rt = __ip_route_output_key(net, flp4);
1da177e4 2215
b23dd4fe
DM
2216 if (IS_ERR(rt))
2217 return rt;
1da177e4 2218
56157872 2219 if (flp4->flowi4_proto)
9d6ec938
DM
2220 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2221 flowi4_to_flowi(flp4),
2222 sk, 0);
1da177e4 2223
b23dd4fe 2224 return rt;
1da177e4 2225}
d8c97a94
ACM
2226EXPORT_SYMBOL_GPL(ip_route_output_flow);
2227
f1ce3062 2228static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
15e47304 2229 struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
f1ce3062 2230 u32 seq, int event, int nowait, unsigned int flags)
1da177e4 2231{
511c3f92 2232 struct rtable *rt = skb_rtable(skb);
1da177e4 2233 struct rtmsg *r;
be403ea1 2234 struct nlmsghdr *nlh;
2bc8ca40 2235 unsigned long expires = 0;
f185071d 2236 u32 error;
521f5490 2237 u32 metrics[RTAX_MAX];
be403ea1 2238
15e47304 2239 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
be403ea1 2240 if (nlh == NULL)
26932566 2241 return -EMSGSIZE;
be403ea1
TG
2242
2243 r = nlmsg_data(nlh);
1da177e4
LT
2244 r->rtm_family = AF_INET;
2245 r->rtm_dst_len = 32;
2246 r->rtm_src_len = 0;
d6c0a4f6 2247 r->rtm_tos = fl4->flowi4_tos;
1da177e4 2248 r->rtm_table = RT_TABLE_MAIN;
f3756b79
DM
2249 if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
2250 goto nla_put_failure;
1da177e4
LT
2251 r->rtm_type = rt->rt_type;
2252 r->rtm_scope = RT_SCOPE_UNIVERSE;
2253 r->rtm_protocol = RTPROT_UNSPEC;
2254 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2255 if (rt->rt_flags & RTCF_NOTIFY)
2256 r->rtm_flags |= RTM_F_NOTIFY;
be403ea1 2257
f1ce3062 2258 if (nla_put_be32(skb, RTA_DST, dst))
f3756b79 2259 goto nla_put_failure;
1a00fee4 2260 if (src) {
1da177e4 2261 r->rtm_src_len = 32;
1a00fee4 2262 if (nla_put_be32(skb, RTA_SRC, src))
f3756b79 2263 goto nla_put_failure;
1da177e4 2264 }
f3756b79
DM
2265 if (rt->dst.dev &&
2266 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2267 goto nla_put_failure;
c7066f70 2268#ifdef CONFIG_IP_ROUTE_CLASSID
f3756b79
DM
2269 if (rt->dst.tclassid &&
2270 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2271 goto nla_put_failure;
1da177e4 2272#endif
41347dcd 2273 if (!rt_is_input_route(rt) &&
d6c0a4f6
DM
2274 fl4->saddr != src) {
2275 if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr))
f3756b79
DM
2276 goto nla_put_failure;
2277 }
155e8336 2278 if (rt->rt_uses_gateway &&
f3756b79
DM
2279 nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
2280 goto nla_put_failure;
be403ea1 2281
ee9a8f7a
SK
2282 expires = rt->dst.expires;
2283 if (expires) {
2284 unsigned long now = jiffies;
2285
2286 if (time_before(now, expires))
2287 expires -= now;
2288 else
2289 expires = 0;
2290 }
2291
521f5490 2292 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
ee9a8f7a 2293 if (rt->rt_pmtu && expires)
521f5490
JA
2294 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2295 if (rtnetlink_put_metrics(skb, metrics) < 0)
be403ea1
TG
2296 goto nla_put_failure;
2297
b4869889 2298 if (fl4->flowi4_mark &&
68aaed54 2299 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
f3756b79 2300 goto nla_put_failure;
963bfeee 2301
d8d1f30b 2302 error = rt->dst.error;
be403ea1 2303
c7537967 2304 if (rt_is_input_route(rt)) {
8caaf7b6
ND
2305#ifdef CONFIG_IP_MROUTE
2306 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2307 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2308 int err = ipmr_get_route(net, skb,
2309 fl4->saddr, fl4->daddr,
2310 r, nowait);
2311 if (err <= 0) {
2312 if (!nowait) {
2313 if (err == 0)
2314 return 0;
2315 goto nla_put_failure;
2316 } else {
2317 if (err == -EMSGSIZE)
2318 goto nla_put_failure;
2319 error = err;
2320 }
2321 }
2322 } else
2323#endif
2324 if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
2325 goto nla_put_failure;
1da177e4
LT
2326 }
2327
f185071d 2328 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
e3703b3d 2329 goto nla_put_failure;
be403ea1
TG
2330
2331 return nlmsg_end(skb, nlh);
1da177e4 2332
be403ea1 2333nla_put_failure:
26932566
PM
2334 nlmsg_cancel(skb, nlh);
2335 return -EMSGSIZE;
1da177e4
LT
2336}
2337
661d2967 2338static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
1da177e4 2339{
3b1e0a65 2340 struct net *net = sock_net(in_skb->sk);
d889ce3b
TG
2341 struct rtmsg *rtm;
2342 struct nlattr *tb[RTA_MAX+1];
1da177e4 2343 struct rtable *rt = NULL;
d6c0a4f6 2344 struct flowi4 fl4;
9e12bb22
AV
2345 __be32 dst = 0;
2346 __be32 src = 0;
2347 u32 iif;
d889ce3b 2348 int err;
963bfeee 2349 int mark;
1da177e4
LT
2350 struct sk_buff *skb;
2351
d889ce3b
TG
2352 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2353 if (err < 0)
2354 goto errout;
2355
2356 rtm = nlmsg_data(nlh);
2357
1da177e4 2358 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
d889ce3b
TG
2359 if (skb == NULL) {
2360 err = -ENOBUFS;
2361 goto errout;
2362 }
1da177e4
LT
2363
2364 /* Reserve room for dummy headers, this skb can pass
2365 through good chunk of routing engine.
2366 */
459a98ed 2367 skb_reset_mac_header(skb);
c1d2bbe1 2368 skb_reset_network_header(skb);
d2c962b8
SH
2369
2370 /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
eddc9ec5 2371 ip_hdr(skb)->protocol = IPPROTO_ICMP;
1da177e4
LT
2372 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2373
17fb2c64
AV
2374 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
2375 dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
d889ce3b 2376 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
963bfeee 2377 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
1da177e4 2378
d6c0a4f6
DM
2379 memset(&fl4, 0, sizeof(fl4));
2380 fl4.daddr = dst;
2381 fl4.saddr = src;
2382 fl4.flowi4_tos = rtm->rtm_tos;
2383 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2384 fl4.flowi4_mark = mark;
2385
1da177e4 2386 if (iif) {
d889ce3b
TG
2387 struct net_device *dev;
2388
1937504d 2389 dev = __dev_get_by_index(net, iif);
d889ce3b
TG
2390 if (dev == NULL) {
2391 err = -ENODEV;
2392 goto errout_free;
2393 }
2394
1da177e4
LT
2395 skb->protocol = htons(ETH_P_IP);
2396 skb->dev = dev;
963bfeee 2397 skb->mark = mark;
1da177e4
LT
2398 local_bh_disable();
2399 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2400 local_bh_enable();
d889ce3b 2401
511c3f92 2402 rt = skb_rtable(skb);
d8d1f30b
CG
2403 if (err == 0 && rt->dst.error)
2404 err = -rt->dst.error;
1da177e4 2405 } else {
9d6ec938 2406 rt = ip_route_output_key(net, &fl4);
b23dd4fe
DM
2407
2408 err = 0;
2409 if (IS_ERR(rt))
2410 err = PTR_ERR(rt);
1da177e4 2411 }
d889ce3b 2412
1da177e4 2413 if (err)
d889ce3b 2414 goto errout_free;
1da177e4 2415
d8d1f30b 2416 skb_dst_set(skb, &rt->dst);
1da177e4
LT
2417 if (rtm->rtm_flags & RTM_F_NOTIFY)
2418 rt->rt_flags |= RTCF_NOTIFY;
2419
f1ce3062 2420 err = rt_fill_info(net, dst, src, &fl4, skb,
15e47304 2421 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
1937504d 2422 RTM_NEWROUTE, 0, 0);
d889ce3b
TG
2423 if (err <= 0)
2424 goto errout_free;
1da177e4 2425
15e47304 2426 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
d889ce3b 2427errout:
2942e900 2428 return err;
1da177e4 2429
d889ce3b 2430errout_free:
1da177e4 2431 kfree_skb(skb);
d889ce3b 2432 goto errout;
1da177e4
LT
2433}
2434
2435int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2436{
1da177e4
LT
2437 return skb->len;
2438}
2439
2440void ip_rt_multicast_event(struct in_device *in_dev)
2441{
4ccfe6d4 2442 rt_cache_flush(dev_net(in_dev->dev));
1da177e4
LT
2443}
2444
2445#ifdef CONFIG_SYSCTL
082c7ca4
G
2446static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
2447static int ip_rt_gc_interval __read_mostly = 60 * HZ;
2448static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
2449static int ip_rt_gc_elasticity __read_mostly = 8;
2450
fe2c6338 2451static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
8d65af78 2452 void __user *buffer,
1da177e4
LT
2453 size_t *lenp, loff_t *ppos)
2454{
5aad1de5
TT
2455 struct net *net = (struct net *)__ctl->extra1;
2456
1da177e4 2457 if (write) {
5aad1de5
TT
2458 rt_cache_flush(net);
2459 fnhe_genid_bump(net);
1da177e4 2460 return 0;
e905a9ed 2461 }
1da177e4
LT
2462
2463 return -EINVAL;
2464}
2465
fe2c6338 2466static struct ctl_table ipv4_route_table[] = {
1da177e4 2467 {
1da177e4
LT
2468 .procname = "gc_thresh",
2469 .data = &ipv4_dst_ops.gc_thresh,
2470 .maxlen = sizeof(int),
2471 .mode = 0644,
6d9f239a 2472 .proc_handler = proc_dointvec,
1da177e4
LT
2473 },
2474 {
1da177e4
LT
2475 .procname = "max_size",
2476 .data = &ip_rt_max_size,
2477 .maxlen = sizeof(int),
2478 .mode = 0644,
6d9f239a 2479 .proc_handler = proc_dointvec,
1da177e4
LT
2480 },
2481 {
2482 /* Deprecated. Use gc_min_interval_ms */
e905a9ed 2483
1da177e4
LT
2484 .procname = "gc_min_interval",
2485 .data = &ip_rt_gc_min_interval,
2486 .maxlen = sizeof(int),
2487 .mode = 0644,
6d9f239a 2488 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2489 },
2490 {
1da177e4
LT
2491 .procname = "gc_min_interval_ms",
2492 .data = &ip_rt_gc_min_interval,
2493 .maxlen = sizeof(int),
2494 .mode = 0644,
6d9f239a 2495 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4
LT
2496 },
2497 {
1da177e4
LT
2498 .procname = "gc_timeout",
2499 .data = &ip_rt_gc_timeout,
2500 .maxlen = sizeof(int),
2501 .mode = 0644,
6d9f239a 2502 .proc_handler = proc_dointvec_jiffies,
1da177e4 2503 },
9f28a2fc
ED
2504 {
2505 .procname = "gc_interval",
2506 .data = &ip_rt_gc_interval,
2507 .maxlen = sizeof(int),
2508 .mode = 0644,
2509 .proc_handler = proc_dointvec_jiffies,
2510 },
1da177e4 2511 {
1da177e4
LT
2512 .procname = "redirect_load",
2513 .data = &ip_rt_redirect_load,
2514 .maxlen = sizeof(int),
2515 .mode = 0644,
6d9f239a 2516 .proc_handler = proc_dointvec,
1da177e4
LT
2517 },
2518 {
1da177e4
LT
2519 .procname = "redirect_number",
2520 .data = &ip_rt_redirect_number,
2521 .maxlen = sizeof(int),
2522 .mode = 0644,
6d9f239a 2523 .proc_handler = proc_dointvec,
1da177e4
LT
2524 },
2525 {
1da177e4
LT
2526 .procname = "redirect_silence",
2527 .data = &ip_rt_redirect_silence,
2528 .maxlen = sizeof(int),
2529 .mode = 0644,
6d9f239a 2530 .proc_handler = proc_dointvec,
1da177e4
LT
2531 },
2532 {
1da177e4
LT
2533 .procname = "error_cost",
2534 .data = &ip_rt_error_cost,
2535 .maxlen = sizeof(int),
2536 .mode = 0644,
6d9f239a 2537 .proc_handler = proc_dointvec,
1da177e4
LT
2538 },
2539 {
1da177e4
LT
2540 .procname = "error_burst",
2541 .data = &ip_rt_error_burst,
2542 .maxlen = sizeof(int),
2543 .mode = 0644,
6d9f239a 2544 .proc_handler = proc_dointvec,
1da177e4
LT
2545 },
2546 {
1da177e4
LT
2547 .procname = "gc_elasticity",
2548 .data = &ip_rt_gc_elasticity,
2549 .maxlen = sizeof(int),
2550 .mode = 0644,
6d9f239a 2551 .proc_handler = proc_dointvec,
1da177e4
LT
2552 },
2553 {
1da177e4
LT
2554 .procname = "mtu_expires",
2555 .data = &ip_rt_mtu_expires,
2556 .maxlen = sizeof(int),
2557 .mode = 0644,
6d9f239a 2558 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2559 },
2560 {
1da177e4
LT
2561 .procname = "min_pmtu",
2562 .data = &ip_rt_min_pmtu,
2563 .maxlen = sizeof(int),
2564 .mode = 0644,
6d9f239a 2565 .proc_handler = proc_dointvec,
1da177e4
LT
2566 },
2567 {
1da177e4
LT
2568 .procname = "min_adv_mss",
2569 .data = &ip_rt_min_advmss,
2570 .maxlen = sizeof(int),
2571 .mode = 0644,
6d9f239a 2572 .proc_handler = proc_dointvec,
1da177e4 2573 },
f8572d8f 2574 { }
1da177e4 2575};
39a23e75 2576
39a23e75
DL
2577static struct ctl_table ipv4_route_flush_table[] = {
2578 {
39a23e75
DL
2579 .procname = "flush",
2580 .maxlen = sizeof(int),
2581 .mode = 0200,
6d9f239a 2582 .proc_handler = ipv4_sysctl_rtcache_flush,
39a23e75 2583 },
f8572d8f 2584 { },
39a23e75
DL
2585};
2586
2587static __net_init int sysctl_route_net_init(struct net *net)
2588{
2589 struct ctl_table *tbl;
2590
2591 tbl = ipv4_route_flush_table;
09ad9bc7 2592 if (!net_eq(net, &init_net)) {
39a23e75
DL
2593 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
2594 if (tbl == NULL)
2595 goto err_dup;
464dc801
EB
2596
2597 /* Don't export sysctls to unprivileged users */
2598 if (net->user_ns != &init_user_ns)
2599 tbl[0].procname = NULL;
39a23e75
DL
2600 }
2601 tbl[0].extra1 = net;
2602
ec8f23ce 2603 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
39a23e75
DL
2604 if (net->ipv4.route_hdr == NULL)
2605 goto err_reg;
2606 return 0;
2607
2608err_reg:
2609 if (tbl != ipv4_route_flush_table)
2610 kfree(tbl);
2611err_dup:
2612 return -ENOMEM;
2613}
2614
2615static __net_exit void sysctl_route_net_exit(struct net *net)
2616{
2617 struct ctl_table *tbl;
2618
2619 tbl = net->ipv4.route_hdr->ctl_table_arg;
2620 unregister_net_sysctl_table(net->ipv4.route_hdr);
2621 BUG_ON(tbl == ipv4_route_flush_table);
2622 kfree(tbl);
2623}
2624
2625static __net_initdata struct pernet_operations sysctl_route_ops = {
2626 .init = sysctl_route_net_init,
2627 .exit = sysctl_route_net_exit,
2628};
1da177e4
LT
2629#endif
2630
3ee94372 2631static __net_init int rt_genid_init(struct net *net)
9f5e97e5 2632{
b42664f8 2633 atomic_set(&net->rt_genid, 0);
5aad1de5 2634 atomic_set(&net->fnhe_genid, 0);
436c3b66
DM
2635 get_random_bytes(&net->ipv4.dev_addr_genid,
2636 sizeof(net->ipv4.dev_addr_genid));
9f5e97e5
DL
2637 return 0;
2638}
2639
3ee94372
NH
2640static __net_initdata struct pernet_operations rt_genid_ops = {
2641 .init = rt_genid_init,
9f5e97e5
DL
2642};
2643
c3426b47
DM
2644static int __net_init ipv4_inetpeer_init(struct net *net)
2645{
2646 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2647
2648 if (!bp)
2649 return -ENOMEM;
2650 inet_peer_base_init(bp);
2651 net->ipv4.peers = bp;
2652 return 0;
2653}
2654
2655static void __net_exit ipv4_inetpeer_exit(struct net *net)
2656{
2657 struct inet_peer_base *bp = net->ipv4.peers;
2658
2659 net->ipv4.peers = NULL;
56a6b248 2660 inetpeer_invalidate_tree(bp);
c3426b47
DM
2661 kfree(bp);
2662}
2663
2664static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
2665 .init = ipv4_inetpeer_init,
2666 .exit = ipv4_inetpeer_exit,
2667};
9f5e97e5 2668
c7066f70 2669#ifdef CONFIG_IP_ROUTE_CLASSID
7d720c3e 2670struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
c7066f70 2671#endif /* CONFIG_IP_ROUTE_CLASSID */
1da177e4 2672
1da177e4
LT
2673int __init ip_rt_init(void)
2674{
424c4b70 2675 int rc = 0;
1da177e4 2676
c7066f70 2677#ifdef CONFIG_IP_ROUTE_CLASSID
0dcec8c2 2678 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
1da177e4
LT
2679 if (!ip_rt_acct)
2680 panic("IP: failed to allocate ip_rt_acct\n");
1da177e4
LT
2681#endif
2682
e5d679f3
AD
2683 ipv4_dst_ops.kmem_cachep =
2684 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
20c2df83 2685 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1da177e4 2686
14e50e57
DM
2687 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
2688
fc66f95c
ED
2689 if (dst_entries_init(&ipv4_dst_ops) < 0)
2690 panic("IP: failed to allocate ipv4_dst_ops counter\n");
2691
2692 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
2693 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
2694
89aef892
DM
2695 ipv4_dst_ops.gc_thresh = ~0;
2696 ip_rt_max_size = INT_MAX;
1da177e4 2697
1da177e4
LT
2698 devinet_init();
2699 ip_fib_init();
2700
73b38711 2701 if (ip_rt_proc_init())
058bd4d2 2702 pr_err("Unable to create route proc files\n");
1da177e4
LT
2703#ifdef CONFIG_XFRM
2704 xfrm_init();
703fb94e 2705 xfrm4_init();
1da177e4 2706#endif
c7ac8679 2707 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
63f3444f 2708
39a23e75
DL
2709#ifdef CONFIG_SYSCTL
2710 register_pernet_subsys(&sysctl_route_ops);
2711#endif
3ee94372 2712 register_pernet_subsys(&rt_genid_ops);
c3426b47 2713 register_pernet_subsys(&ipv4_inetpeer_ops);
1da177e4
LT
2714 return rc;
2715}
2716
a1bc6eb4 2717#ifdef CONFIG_SYSCTL
eeb61f71
AV
2718/*
2719 * We really need to sanitize the damn ipv4 init order, then all
2720 * this nonsense will go away.
2721 */
2722void __init ip_static_sysctl_init(void)
2723{
4e5ca785 2724 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
eeb61f71 2725}
a1bc6eb4 2726#endif