]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - net/ipv4/route.c
Merge git://oss.sgi.com:8090/oss/git/xfs-2.6
[mirror_ubuntu-focal-kernel.git] / net / ipv4 / route.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
8 * Version: $Id: route.c,v 1.103 2002/01/12 07:44:09 davem Exp $
9 *
02c30a84 10 * Authors: Ross Biro
1da177e4
LT
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Alan Cox, <gw4pts@gw4pts.ampr.org>
13 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
14 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
15 *
16 * Fixes:
17 * Alan Cox : Verify area fixes.
18 * Alan Cox : cli() protects routing changes
19 * Rui Oliveira : ICMP routing table updates
20 * (rco@di.uminho.pt) Routing table insertion and update
21 * Linus Torvalds : Rewrote bits to be sensible
22 * Alan Cox : Added BSD route gw semantics
23 * Alan Cox : Super /proc >4K
24 * Alan Cox : MTU in route table
25 * Alan Cox : MSS actually. Also added the window
26 * clamper.
27 * Sam Lantinga : Fixed route matching in rt_del()
28 * Alan Cox : Routing cache support.
29 * Alan Cox : Removed compatibility cruft.
30 * Alan Cox : RTF_REJECT support.
31 * Alan Cox : TCP irtt support.
32 * Jonathan Naylor : Added Metric support.
33 * Miquel van Smoorenburg : BSD API fixes.
34 * Miquel van Smoorenburg : Metrics.
35 * Alan Cox : Use __u32 properly
36 * Alan Cox : Aligned routing errors more closely with BSD
37 * our system is still very different.
38 * Alan Cox : Faster /proc handling
39 * Alexey Kuznetsov : Massive rework to support tree based routing,
40 * routing caches and better behaviour.
41 *
42 * Olaf Erb : irtt wasn't being copied right.
43 * Bjorn Ekwall : Kerneld route support.
44 * Alan Cox : Multicast fixed (I hope)
45 * Pavel Krauz : Limited broadcast fixed
46 * Mike McLagan : Routing by source
47 * Alexey Kuznetsov : End of old history. Split to fib.c and
48 * route.c and rewritten from scratch.
49 * Andi Kleen : Load-limit warning messages.
50 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
51 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
52 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
53 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
54 * Marc Boucher : routing by fwmark
55 * Robert Olsson : Added rt_cache statistics
56 * Arnaldo C. Melo : Convert proc stuff to seq_file
bb1d23b0 57 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
1da177e4
LT
58 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
65#include <linux/config.h>
66#include <linux/module.h>
67#include <asm/uaccess.h>
68#include <asm/system.h>
69#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
72#include <linux/sched.h>
73#include <linux/mm.h>
424c4b70 74#include <linux/bootmem.h>
1da177e4
LT
75#include <linux/string.h>
76#include <linux/socket.h>
77#include <linux/sockios.h>
78#include <linux/errno.h>
79#include <linux/in.h>
80#include <linux/inet.h>
81#include <linux/netdevice.h>
82#include <linux/proc_fs.h>
83#include <linux/init.h>
84#include <linux/skbuff.h>
85#include <linux/rtnetlink.h>
86#include <linux/inetdevice.h>
87#include <linux/igmp.h>
88#include <linux/pkt_sched.h>
89#include <linux/mroute.h>
90#include <linux/netfilter_ipv4.h>
91#include <linux/random.h>
92#include <linux/jhash.h>
93#include <linux/rcupdate.h>
94#include <linux/times.h>
95#include <net/protocol.h>
96#include <net/ip.h>
97#include <net/route.h>
98#include <net/inetpeer.h>
99#include <net/sock.h>
100#include <net/ip_fib.h>
101#include <net/arp.h>
102#include <net/tcp.h>
103#include <net/icmp.h>
104#include <net/xfrm.h>
105#include <net/ip_mp_alg.h>
106#ifdef CONFIG_SYSCTL
107#include <linux/sysctl.h>
108#endif
109
110#define RT_FL_TOS(oldflp) \
111 ((u32)(oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK)))
112
113#define IP_MAX_MTU 0xFFF0
114
115#define RT_GC_TIMEOUT (300*HZ)
116
117static int ip_rt_min_delay = 2 * HZ;
118static int ip_rt_max_delay = 10 * HZ;
119static int ip_rt_max_size;
120static int ip_rt_gc_timeout = RT_GC_TIMEOUT;
121static int ip_rt_gc_interval = 60 * HZ;
122static int ip_rt_gc_min_interval = HZ / 2;
123static int ip_rt_redirect_number = 9;
124static int ip_rt_redirect_load = HZ / 50;
125static int ip_rt_redirect_silence = ((HZ / 50) << (9 + 1));
126static int ip_rt_error_cost = HZ;
127static int ip_rt_error_burst = 5 * HZ;
128static int ip_rt_gc_elasticity = 8;
129static int ip_rt_mtu_expires = 10 * 60 * HZ;
130static int ip_rt_min_pmtu = 512 + 20 + 20;
131static int ip_rt_min_advmss = 256;
132static int ip_rt_secret_interval = 10 * 60 * HZ;
133static unsigned long rt_deadline;
134
135#define RTprint(a...) printk(KERN_DEBUG a)
136
137static struct timer_list rt_flush_timer;
138static struct timer_list rt_periodic_timer;
139static struct timer_list rt_secret_timer;
140
141/*
142 * Interface to generic destination cache.
143 */
144
145static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
146static void ipv4_dst_destroy(struct dst_entry *dst);
147static void ipv4_dst_ifdown(struct dst_entry *dst,
148 struct net_device *dev, int how);
149static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
150static void ipv4_link_failure(struct sk_buff *skb);
151static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
152static int rt_garbage_collect(void);
153
154
155static struct dst_ops ipv4_dst_ops = {
156 .family = AF_INET,
157 .protocol = __constant_htons(ETH_P_IP),
158 .gc = rt_garbage_collect,
159 .check = ipv4_dst_check,
160 .destroy = ipv4_dst_destroy,
161 .ifdown = ipv4_dst_ifdown,
162 .negative_advice = ipv4_negative_advice,
163 .link_failure = ipv4_link_failure,
164 .update_pmtu = ip_rt_update_pmtu,
165 .entry_size = sizeof(struct rtable),
166};
167
168#define ECN_OR_COST(class) TC_PRIO_##class
169
170__u8 ip_tos2prio[16] = {
171 TC_PRIO_BESTEFFORT,
172 ECN_OR_COST(FILLER),
173 TC_PRIO_BESTEFFORT,
174 ECN_OR_COST(BESTEFFORT),
175 TC_PRIO_BULK,
176 ECN_OR_COST(BULK),
177 TC_PRIO_BULK,
178 ECN_OR_COST(BULK),
179 TC_PRIO_INTERACTIVE,
180 ECN_OR_COST(INTERACTIVE),
181 TC_PRIO_INTERACTIVE,
182 ECN_OR_COST(INTERACTIVE),
183 TC_PRIO_INTERACTIVE_BULK,
184 ECN_OR_COST(INTERACTIVE_BULK),
185 TC_PRIO_INTERACTIVE_BULK,
186 ECN_OR_COST(INTERACTIVE_BULK)
187};
188
189
190/*
191 * Route cache.
192 */
193
194/* The locking scheme is rather straight forward:
195 *
196 * 1) Read-Copy Update protects the buckets of the central route hash.
197 * 2) Only writers remove entries, and they hold the lock
198 * as they look at rtable reference counts.
199 * 3) Only readers acquire references to rtable entries,
200 * they do so with atomic increments and with the
201 * lock held.
202 */
203
204struct rt_hash_bucket {
205 struct rtable *chain;
22c047cc
ED
206};
207#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
208/*
209 * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
210 * The size of this table is a power of two and depends on the number of CPUS.
211 */
212#if NR_CPUS >= 32
213#define RT_HASH_LOCK_SZ 4096
214#elif NR_CPUS >= 16
215#define RT_HASH_LOCK_SZ 2048
216#elif NR_CPUS >= 8
217#define RT_HASH_LOCK_SZ 1024
218#elif NR_CPUS >= 4
219#define RT_HASH_LOCK_SZ 512
220#else
221#define RT_HASH_LOCK_SZ 256
222#endif
223
224static spinlock_t *rt_hash_locks;
225# define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)]
226# define rt_hash_lock_init() { \
227 int i; \
228 rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ, GFP_KERNEL); \
229 if (!rt_hash_locks) panic("IP: failed to allocate rt_hash_locks\n"); \
230 for (i = 0; i < RT_HASH_LOCK_SZ; i++) \
231 spin_lock_init(&rt_hash_locks[i]); \
232 }
233#else
234# define rt_hash_lock_addr(slot) NULL
235# define rt_hash_lock_init()
236#endif
1da177e4
LT
237
238static struct rt_hash_bucket *rt_hash_table;
239static unsigned rt_hash_mask;
240static int rt_hash_log;
241static unsigned int rt_hash_rnd;
242
2f970d83
ED
243static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
244#define RT_CACHE_STAT_INC(field) (__get_cpu_var(rt_cache_stat).field++)
1da177e4
LT
245
246static int rt_intern_hash(unsigned hash, struct rtable *rth,
247 struct rtable **res);
248
249static unsigned int rt_hash_code(u32 daddr, u32 saddr, u8 tos)
250{
251 return (jhash_3words(daddr, saddr, (u32) tos, rt_hash_rnd)
252 & rt_hash_mask);
253}
254
255#ifdef CONFIG_PROC_FS
256struct rt_cache_iter_state {
257 int bucket;
258};
259
260static struct rtable *rt_cache_get_first(struct seq_file *seq)
261{
262 struct rtable *r = NULL;
263 struct rt_cache_iter_state *st = seq->private;
264
265 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
266 rcu_read_lock_bh();
267 r = rt_hash_table[st->bucket].chain;
268 if (r)
269 break;
270 rcu_read_unlock_bh();
271 }
272 return r;
273}
274
275static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r)
276{
277 struct rt_cache_iter_state *st = rcu_dereference(seq->private);
278
279 r = r->u.rt_next;
280 while (!r) {
281 rcu_read_unlock_bh();
282 if (--st->bucket < 0)
283 break;
284 rcu_read_lock_bh();
285 r = rt_hash_table[st->bucket].chain;
286 }
287 return r;
288}
289
290static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos)
291{
292 struct rtable *r = rt_cache_get_first(seq);
293
294 if (r)
295 while (pos && (r = rt_cache_get_next(seq, r)))
296 --pos;
297 return pos ? NULL : r;
298}
299
300static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
301{
302 return *pos ? rt_cache_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
303}
304
305static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
306{
307 struct rtable *r = NULL;
308
309 if (v == SEQ_START_TOKEN)
310 r = rt_cache_get_first(seq);
311 else
312 r = rt_cache_get_next(seq, v);
313 ++*pos;
314 return r;
315}
316
317static void rt_cache_seq_stop(struct seq_file *seq, void *v)
318{
319 if (v && v != SEQ_START_TOKEN)
320 rcu_read_unlock_bh();
321}
322
323static int rt_cache_seq_show(struct seq_file *seq, void *v)
324{
325 if (v == SEQ_START_TOKEN)
326 seq_printf(seq, "%-127s\n",
327 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
328 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
329 "HHUptod\tSpecDst");
330 else {
331 struct rtable *r = v;
332 char temp[256];
333
334 sprintf(temp, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t"
335 "%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X",
336 r->u.dst.dev ? r->u.dst.dev->name : "*",
337 (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway,
338 r->rt_flags, atomic_read(&r->u.dst.__refcnt),
339 r->u.dst.__use, 0, (unsigned long)r->rt_src,
340 (dst_metric(&r->u.dst, RTAX_ADVMSS) ?
341 (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0),
342 dst_metric(&r->u.dst, RTAX_WINDOW),
343 (int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3) +
344 dst_metric(&r->u.dst, RTAX_RTTVAR)),
345 r->fl.fl4_tos,
346 r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1,
347 r->u.dst.hh ? (r->u.dst.hh->hh_output ==
348 dev_queue_xmit) : 0,
349 r->rt_spec_dst);
350 seq_printf(seq, "%-127s\n", temp);
351 }
352 return 0;
353}
354
355static struct seq_operations rt_cache_seq_ops = {
356 .start = rt_cache_seq_start,
357 .next = rt_cache_seq_next,
358 .stop = rt_cache_seq_stop,
359 .show = rt_cache_seq_show,
360};
361
362static int rt_cache_seq_open(struct inode *inode, struct file *file)
363{
364 struct seq_file *seq;
365 int rc = -ENOMEM;
366 struct rt_cache_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
367
368 if (!s)
369 goto out;
370 rc = seq_open(file, &rt_cache_seq_ops);
371 if (rc)
372 goto out_kfree;
373 seq = file->private_data;
374 seq->private = s;
375 memset(s, 0, sizeof(*s));
376out:
377 return rc;
378out_kfree:
379 kfree(s);
380 goto out;
381}
382
383static struct file_operations rt_cache_seq_fops = {
384 .owner = THIS_MODULE,
385 .open = rt_cache_seq_open,
386 .read = seq_read,
387 .llseek = seq_lseek,
388 .release = seq_release_private,
389};
390
391
392static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
393{
394 int cpu;
395
396 if (*pos == 0)
397 return SEQ_START_TOKEN;
398
399 for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
400 if (!cpu_possible(cpu))
401 continue;
402 *pos = cpu+1;
2f970d83 403 return &per_cpu(rt_cache_stat, cpu);
1da177e4
LT
404 }
405 return NULL;
406}
407
408static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
409{
410 int cpu;
411
412 for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
413 if (!cpu_possible(cpu))
414 continue;
415 *pos = cpu+1;
2f970d83 416 return &per_cpu(rt_cache_stat, cpu);
1da177e4
LT
417 }
418 return NULL;
419
420}
421
422static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
423{
424
425}
426
427static int rt_cpu_seq_show(struct seq_file *seq, void *v)
428{
429 struct rt_cache_stat *st = v;
430
431 if (v == SEQ_START_TOKEN) {
5bec0039 432 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
1da177e4
LT
433 return 0;
434 }
435
436 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
437 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
438 atomic_read(&ipv4_dst_ops.entries),
439 st->in_hit,
440 st->in_slow_tot,
441 st->in_slow_mc,
442 st->in_no_route,
443 st->in_brd,
444 st->in_martian_dst,
445 st->in_martian_src,
446
447 st->out_hit,
448 st->out_slow_tot,
449 st->out_slow_mc,
450
451 st->gc_total,
452 st->gc_ignored,
453 st->gc_goal_miss,
454 st->gc_dst_overflow,
455 st->in_hlist_search,
456 st->out_hlist_search
457 );
458 return 0;
459}
460
461static struct seq_operations rt_cpu_seq_ops = {
462 .start = rt_cpu_seq_start,
463 .next = rt_cpu_seq_next,
464 .stop = rt_cpu_seq_stop,
465 .show = rt_cpu_seq_show,
466};
467
468
469static int rt_cpu_seq_open(struct inode *inode, struct file *file)
470{
471 return seq_open(file, &rt_cpu_seq_ops);
472}
473
474static struct file_operations rt_cpu_seq_fops = {
475 .owner = THIS_MODULE,
476 .open = rt_cpu_seq_open,
477 .read = seq_read,
478 .llseek = seq_lseek,
479 .release = seq_release,
480};
481
482#endif /* CONFIG_PROC_FS */
483
484static __inline__ void rt_free(struct rtable *rt)
485{
486 multipath_remove(rt);
487 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
488}
489
490static __inline__ void rt_drop(struct rtable *rt)
491{
492 multipath_remove(rt);
493 ip_rt_put(rt);
494 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
495}
496
497static __inline__ int rt_fast_clean(struct rtable *rth)
498{
499 /* Kill broadcast/multicast entries very aggresively, if they
500 collide in hash table with more useful entries */
501 return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
502 rth->fl.iif && rth->u.rt_next;
503}
504
505static __inline__ int rt_valuable(struct rtable *rth)
506{
507 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
508 rth->u.dst.expires;
509}
510
511static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
512{
513 unsigned long age;
514 int ret = 0;
515
516 if (atomic_read(&rth->u.dst.__refcnt))
517 goto out;
518
519 ret = 1;
520 if (rth->u.dst.expires &&
521 time_after_eq(jiffies, rth->u.dst.expires))
522 goto out;
523
524 age = jiffies - rth->u.dst.lastuse;
525 ret = 0;
526 if ((age <= tmo1 && !rt_fast_clean(rth)) ||
527 (age <= tmo2 && rt_valuable(rth)))
528 goto out;
529 ret = 1;
530out: return ret;
531}
532
533/* Bits of score are:
534 * 31: very valuable
535 * 30: not quite useless
536 * 29..0: usage counter
537 */
538static inline u32 rt_score(struct rtable *rt)
539{
540 u32 score = jiffies - rt->u.dst.lastuse;
541
542 score = ~score & ~(3<<30);
543
544 if (rt_valuable(rt))
545 score |= (1<<31);
546
547 if (!rt->fl.iif ||
548 !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL)))
549 score |= (1<<30);
550
551 return score;
552}
553
554static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
555{
556 return memcmp(&fl1->nl_u.ip4_u, &fl2->nl_u.ip4_u, sizeof(fl1->nl_u.ip4_u)) == 0 &&
557 fl1->oif == fl2->oif &&
558 fl1->iif == fl2->iif;
559}
560
561#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
562static struct rtable **rt_remove_balanced_route(struct rtable **chain_head,
563 struct rtable *expentry,
564 int *removed_count)
565{
566 int passedexpired = 0;
567 struct rtable **nextstep = NULL;
568 struct rtable **rthp = chain_head;
569 struct rtable *rth;
570
571 if (removed_count)
572 *removed_count = 0;
573
574 while ((rth = *rthp) != NULL) {
575 if (rth == expentry)
576 passedexpired = 1;
577
578 if (((*rthp)->u.dst.flags & DST_BALANCED) != 0 &&
579 compare_keys(&(*rthp)->fl, &expentry->fl)) {
580 if (*rthp == expentry) {
581 *rthp = rth->u.rt_next;
582 continue;
583 } else {
584 *rthp = rth->u.rt_next;
585 rt_free(rth);
586 if (removed_count)
587 ++(*removed_count);
588 }
589 } else {
590 if (!((*rthp)->u.dst.flags & DST_BALANCED) &&
591 passedexpired && !nextstep)
592 nextstep = &rth->u.rt_next;
593
594 rthp = &rth->u.rt_next;
595 }
596 }
597
598 rt_free(expentry);
599 if (removed_count)
600 ++(*removed_count);
601
602 return nextstep;
603}
604#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
605
606
607/* This runs via a timer and thus is always in BH context. */
608static void rt_check_expire(unsigned long dummy)
609{
bb1d23b0
ED
610 static unsigned int rover;
611 unsigned int i = rover, goal;
1da177e4
LT
612 struct rtable *rth, **rthp;
613 unsigned long now = jiffies;
bb1d23b0
ED
614 u64 mult;
615
616 mult = ((u64)ip_rt_gc_interval) << rt_hash_log;
617 if (ip_rt_gc_timeout > 1)
618 do_div(mult, ip_rt_gc_timeout);
619 goal = (unsigned int)mult;
620 if (goal > rt_hash_mask) goal = rt_hash_mask + 1;
621 for (; goal > 0; goal--) {
1da177e4
LT
622 unsigned long tmo = ip_rt_gc_timeout;
623
624 i = (i + 1) & rt_hash_mask;
625 rthp = &rt_hash_table[i].chain;
626
bb1d23b0
ED
627 if (*rthp == 0)
628 continue;
22c047cc 629 spin_lock(rt_hash_lock_addr(i));
1da177e4
LT
630 while ((rth = *rthp) != NULL) {
631 if (rth->u.dst.expires) {
632 /* Entry is expired even if it is in use */
633 if (time_before_eq(now, rth->u.dst.expires)) {
634 tmo >>= 1;
635 rthp = &rth->u.rt_next;
636 continue;
637 }
638 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) {
639 tmo >>= 1;
640 rthp = &rth->u.rt_next;
641 continue;
642 }
643
644 /* Cleanup aged off entries. */
645#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
646 /* remove all related balanced entries if necessary */
647 if (rth->u.dst.flags & DST_BALANCED) {
648 rthp = rt_remove_balanced_route(
649 &rt_hash_table[i].chain,
650 rth, NULL);
651 if (!rthp)
652 break;
653 } else {
654 *rthp = rth->u.rt_next;
655 rt_free(rth);
656 }
657#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
658 *rthp = rth->u.rt_next;
659 rt_free(rth);
660#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
661 }
22c047cc 662 spin_unlock(rt_hash_lock_addr(i));
1da177e4
LT
663
664 /* Fallback loop breaker. */
665 if (time_after(jiffies, now))
666 break;
667 }
668 rover = i;
bb1d23b0 669 mod_timer(&rt_periodic_timer, jiffies + ip_rt_gc_interval);
1da177e4
LT
670}
671
672/* This can run from both BH and non-BH contexts, the latter
673 * in the case of a forced flush event.
674 */
675static void rt_run_flush(unsigned long dummy)
676{
677 int i;
678 struct rtable *rth, *next;
679
680 rt_deadline = 0;
681
682 get_random_bytes(&rt_hash_rnd, 4);
683
684 for (i = rt_hash_mask; i >= 0; i--) {
22c047cc 685 spin_lock_bh(rt_hash_lock_addr(i));
1da177e4
LT
686 rth = rt_hash_table[i].chain;
687 if (rth)
688 rt_hash_table[i].chain = NULL;
22c047cc 689 spin_unlock_bh(rt_hash_lock_addr(i));
1da177e4
LT
690
691 for (; rth; rth = next) {
692 next = rth->u.rt_next;
693 rt_free(rth);
694 }
695 }
696}
697
698static DEFINE_SPINLOCK(rt_flush_lock);
699
700void rt_cache_flush(int delay)
701{
702 unsigned long now = jiffies;
703 int user_mode = !in_softirq();
704
705 if (delay < 0)
706 delay = ip_rt_min_delay;
707
708 /* flush existing multipath state*/
709 multipath_flush();
710
711 spin_lock_bh(&rt_flush_lock);
712
713 if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) {
714 long tmo = (long)(rt_deadline - now);
715
716 /* If flush timer is already running
717 and flush request is not immediate (delay > 0):
718
719 if deadline is not achieved, prolongate timer to "delay",
720 otherwise fire it at deadline time.
721 */
722
723 if (user_mode && tmo < ip_rt_max_delay-ip_rt_min_delay)
724 tmo = 0;
725
726 if (delay > tmo)
727 delay = tmo;
728 }
729
730 if (delay <= 0) {
731 spin_unlock_bh(&rt_flush_lock);
732 rt_run_flush(0);
733 return;
734 }
735
736 if (rt_deadline == 0)
737 rt_deadline = now + ip_rt_max_delay;
738
739 mod_timer(&rt_flush_timer, now+delay);
740 spin_unlock_bh(&rt_flush_lock);
741}
742
743static void rt_secret_rebuild(unsigned long dummy)
744{
745 unsigned long now = jiffies;
746
747 rt_cache_flush(0);
748 mod_timer(&rt_secret_timer, now + ip_rt_secret_interval);
749}
750
751/*
752 Short description of GC goals.
753
754 We want to build algorithm, which will keep routing cache
755 at some equilibrium point, when number of aged off entries
756 is kept approximately equal to newly generated ones.
757
758 Current expiration strength is variable "expire".
759 We try to adjust it dynamically, so that if networking
760 is idle expires is large enough to keep enough of warm entries,
761 and when load increases it reduces to limit cache size.
762 */
763
764static int rt_garbage_collect(void)
765{
766 static unsigned long expire = RT_GC_TIMEOUT;
767 static unsigned long last_gc;
768 static int rover;
769 static int equilibrium;
770 struct rtable *rth, **rthp;
771 unsigned long now = jiffies;
772 int goal;
773
774 /*
775 * Garbage collection is pretty expensive,
776 * do not make it too frequently.
777 */
778
779 RT_CACHE_STAT_INC(gc_total);
780
781 if (now - last_gc < ip_rt_gc_min_interval &&
782 atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) {
783 RT_CACHE_STAT_INC(gc_ignored);
784 goto out;
785 }
786
787 /* Calculate number of entries, which we want to expire now. */
788 goal = atomic_read(&ipv4_dst_ops.entries) -
789 (ip_rt_gc_elasticity << rt_hash_log);
790 if (goal <= 0) {
791 if (equilibrium < ipv4_dst_ops.gc_thresh)
792 equilibrium = ipv4_dst_ops.gc_thresh;
793 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
794 if (goal > 0) {
795 equilibrium += min_t(unsigned int, goal / 2, rt_hash_mask + 1);
796 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
797 }
798 } else {
799 /* We are in dangerous area. Try to reduce cache really
800 * aggressively.
801 */
802 goal = max_t(unsigned int, goal / 2, rt_hash_mask + 1);
803 equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal;
804 }
805
806 if (now - last_gc >= ip_rt_gc_min_interval)
807 last_gc = now;
808
809 if (goal <= 0) {
810 equilibrium += goal;
811 goto work_done;
812 }
813
814 do {
815 int i, k;
816
817 for (i = rt_hash_mask, k = rover; i >= 0; i--) {
818 unsigned long tmo = expire;
819
820 k = (k + 1) & rt_hash_mask;
821 rthp = &rt_hash_table[k].chain;
22c047cc 822 spin_lock_bh(rt_hash_lock_addr(k));
1da177e4
LT
823 while ((rth = *rthp) != NULL) {
824 if (!rt_may_expire(rth, tmo, expire)) {
825 tmo >>= 1;
826 rthp = &rth->u.rt_next;
827 continue;
828 }
829#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
830 /* remove all related balanced entries
831 * if necessary
832 */
833 if (rth->u.dst.flags & DST_BALANCED) {
834 int r;
835
836 rthp = rt_remove_balanced_route(
837 &rt_hash_table[i].chain,
838 rth,
839 &r);
840 goal -= r;
841 if (!rthp)
842 break;
843 } else {
844 *rthp = rth->u.rt_next;
845 rt_free(rth);
846 goal--;
847 }
848#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
849 *rthp = rth->u.rt_next;
850 rt_free(rth);
851 goal--;
852#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
853 }
22c047cc 854 spin_unlock_bh(rt_hash_lock_addr(k));
1da177e4
LT
855 if (goal <= 0)
856 break;
857 }
858 rover = k;
859
860 if (goal <= 0)
861 goto work_done;
862
863 /* Goal is not achieved. We stop process if:
864
865 - if expire reduced to zero. Otherwise, expire is halfed.
866 - if table is not full.
867 - if we are called from interrupt.
868 - jiffies check is just fallback/debug loop breaker.
869 We will not spin here for long time in any case.
870 */
871
872 RT_CACHE_STAT_INC(gc_goal_miss);
873
874 if (expire == 0)
875 break;
876
877 expire >>= 1;
878#if RT_CACHE_DEBUG >= 2
879 printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire,
880 atomic_read(&ipv4_dst_ops.entries), goal, i);
881#endif
882
883 if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size)
884 goto out;
885 } while (!in_softirq() && time_before_eq(jiffies, now));
886
887 if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size)
888 goto out;
889 if (net_ratelimit())
890 printk(KERN_WARNING "dst cache overflow\n");
891 RT_CACHE_STAT_INC(gc_dst_overflow);
892 return 1;
893
894work_done:
895 expire += ip_rt_gc_min_interval;
896 if (expire > ip_rt_gc_timeout ||
897 atomic_read(&ipv4_dst_ops.entries) < ipv4_dst_ops.gc_thresh)
898 expire = ip_rt_gc_timeout;
899#if RT_CACHE_DEBUG >= 2
900 printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire,
901 atomic_read(&ipv4_dst_ops.entries), goal, rover);
902#endif
903out: return 0;
904}
905
906static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
907{
908 struct rtable *rth, **rthp;
909 unsigned long now;
910 struct rtable *cand, **candp;
911 u32 min_score;
912 int chain_length;
913 int attempts = !in_softirq();
914
915restart:
916 chain_length = 0;
917 min_score = ~(u32)0;
918 cand = NULL;
919 candp = NULL;
920 now = jiffies;
921
922 rthp = &rt_hash_table[hash].chain;
923
22c047cc 924 spin_lock_bh(rt_hash_lock_addr(hash));
1da177e4
LT
925 while ((rth = *rthp) != NULL) {
926#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
927 if (!(rth->u.dst.flags & DST_BALANCED) &&
928 compare_keys(&rth->fl, &rt->fl)) {
929#else
930 if (compare_keys(&rth->fl, &rt->fl)) {
931#endif
932 /* Put it first */
933 *rthp = rth->u.rt_next;
934 /*
935 * Since lookup is lockfree, the deletion
936 * must be visible to another weakly ordered CPU before
937 * the insertion at the start of the hash chain.
938 */
939 rcu_assign_pointer(rth->u.rt_next,
940 rt_hash_table[hash].chain);
941 /*
942 * Since lookup is lockfree, the update writes
943 * must be ordered for consistency on SMP.
944 */
945 rcu_assign_pointer(rt_hash_table[hash].chain, rth);
946
947 rth->u.dst.__use++;
948 dst_hold(&rth->u.dst);
949 rth->u.dst.lastuse = now;
22c047cc 950 spin_unlock_bh(rt_hash_lock_addr(hash));
1da177e4
LT
951
952 rt_drop(rt);
953 *rp = rth;
954 return 0;
955 }
956
957 if (!atomic_read(&rth->u.dst.__refcnt)) {
958 u32 score = rt_score(rth);
959
960 if (score <= min_score) {
961 cand = rth;
962 candp = rthp;
963 min_score = score;
964 }
965 }
966
967 chain_length++;
968
969 rthp = &rth->u.rt_next;
970 }
971
972 if (cand) {
973 /* ip_rt_gc_elasticity used to be average length of chain
974 * length, when exceeded gc becomes really aggressive.
975 *
976 * The second limit is less certain. At the moment it allows
977 * only 2 entries per bucket. We will see.
978 */
979 if (chain_length > ip_rt_gc_elasticity) {
980 *candp = cand->u.rt_next;
981 rt_free(cand);
982 }
983 }
984
985 /* Try to bind route to arp only if it is output
986 route or unicast forwarding path.
987 */
988 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
989 int err = arp_bind_neighbour(&rt->u.dst);
990 if (err) {
22c047cc 991 spin_unlock_bh(rt_hash_lock_addr(hash));
1da177e4
LT
992
993 if (err != -ENOBUFS) {
994 rt_drop(rt);
995 return err;
996 }
997
998 /* Neighbour tables are full and nothing
999 can be released. Try to shrink route cache,
1000 it is most likely it holds some neighbour records.
1001 */
1002 if (attempts-- > 0) {
1003 int saved_elasticity = ip_rt_gc_elasticity;
1004 int saved_int = ip_rt_gc_min_interval;
1005 ip_rt_gc_elasticity = 1;
1006 ip_rt_gc_min_interval = 0;
1007 rt_garbage_collect();
1008 ip_rt_gc_min_interval = saved_int;
1009 ip_rt_gc_elasticity = saved_elasticity;
1010 goto restart;
1011 }
1012
1013 if (net_ratelimit())
1014 printk(KERN_WARNING "Neighbour table overflow.\n");
1015 rt_drop(rt);
1016 return -ENOBUFS;
1017 }
1018 }
1019
1020 rt->u.rt_next = rt_hash_table[hash].chain;
1021#if RT_CACHE_DEBUG >= 2
1022 if (rt->u.rt_next) {
1023 struct rtable *trt;
1024 printk(KERN_DEBUG "rt_cache @%02x: %u.%u.%u.%u", hash,
1025 NIPQUAD(rt->rt_dst));
1026 for (trt = rt->u.rt_next; trt; trt = trt->u.rt_next)
1027 printk(" . %u.%u.%u.%u", NIPQUAD(trt->rt_dst));
1028 printk("\n");
1029 }
1030#endif
1031 rt_hash_table[hash].chain = rt;
22c047cc 1032 spin_unlock_bh(rt_hash_lock_addr(hash));
1da177e4
LT
1033 *rp = rt;
1034 return 0;
1035}
1036
1037void rt_bind_peer(struct rtable *rt, int create)
1038{
1039 static DEFINE_SPINLOCK(rt_peer_lock);
1040 struct inet_peer *peer;
1041
1042 peer = inet_getpeer(rt->rt_dst, create);
1043
1044 spin_lock_bh(&rt_peer_lock);
1045 if (rt->peer == NULL) {
1046 rt->peer = peer;
1047 peer = NULL;
1048 }
1049 spin_unlock_bh(&rt_peer_lock);
1050 if (peer)
1051 inet_putpeer(peer);
1052}
1053
1054/*
1055 * Peer allocation may fail only in serious out-of-memory conditions. However
1056 * we still can generate some output.
1057 * Random ID selection looks a bit dangerous because we have no chances to
1058 * select ID being unique in a reasonable period of time.
1059 * But broken packet identifier may be better than no packet at all.
1060 */
1061static void ip_select_fb_ident(struct iphdr *iph)
1062{
1063 static DEFINE_SPINLOCK(ip_fb_id_lock);
1064 static u32 ip_fallback_id;
1065 u32 salt;
1066
1067 spin_lock_bh(&ip_fb_id_lock);
1068 salt = secure_ip_id(ip_fallback_id ^ iph->daddr);
1069 iph->id = htons(salt & 0xFFFF);
1070 ip_fallback_id = salt;
1071 spin_unlock_bh(&ip_fb_id_lock);
1072}
1073
1074void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
1075{
1076 struct rtable *rt = (struct rtable *) dst;
1077
1078 if (rt) {
1079 if (rt->peer == NULL)
1080 rt_bind_peer(rt, 1);
1081
1082 /* If peer is attached to destination, it is never detached,
1083 so that we need not to grab a lock to dereference it.
1084 */
1085 if (rt->peer) {
1086 iph->id = htons(inet_getid(rt->peer, more));
1087 return;
1088 }
1089 } else
9c2b3328
SH
1090 printk(KERN_DEBUG "rt_bind_peer(0) @%p\n",
1091 __builtin_return_address(0));
1da177e4
LT
1092
1093 ip_select_fb_ident(iph);
1094}
1095
1096static void rt_del(unsigned hash, struct rtable *rt)
1097{
1098 struct rtable **rthp;
1099
22c047cc 1100 spin_lock_bh(rt_hash_lock_addr(hash));
1da177e4
LT
1101 ip_rt_put(rt);
1102 for (rthp = &rt_hash_table[hash].chain; *rthp;
1103 rthp = &(*rthp)->u.rt_next)
1104 if (*rthp == rt) {
1105 *rthp = rt->u.rt_next;
1106 rt_free(rt);
1107 break;
1108 }
22c047cc 1109 spin_unlock_bh(rt_hash_lock_addr(hash));
1da177e4
LT
1110}
1111
1112void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
1113 u32 saddr, u8 tos, struct net_device *dev)
1114{
1115 int i, k;
1116 struct in_device *in_dev = in_dev_get(dev);
1117 struct rtable *rth, **rthp;
1118 u32 skeys[2] = { saddr, 0 };
1119 int ikeys[2] = { dev->ifindex, 0 };
1120
1121 tos &= IPTOS_RT_MASK;
1122
1123 if (!in_dev)
1124 return;
1125
1126 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev)
1127 || MULTICAST(new_gw) || BADCLASS(new_gw) || ZERONET(new_gw))
1128 goto reject_redirect;
1129
1130 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
1131 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
1132 goto reject_redirect;
1133 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
1134 goto reject_redirect;
1135 } else {
1136 if (inet_addr_type(new_gw) != RTN_UNICAST)
1137 goto reject_redirect;
1138 }
1139
1140 for (i = 0; i < 2; i++) {
1141 for (k = 0; k < 2; k++) {
1142 unsigned hash = rt_hash_code(daddr,
1143 skeys[i] ^ (ikeys[k] << 5),
1144 tos);
1145
1146 rthp=&rt_hash_table[hash].chain;
1147
1148 rcu_read_lock();
1149 while ((rth = rcu_dereference(*rthp)) != NULL) {
1150 struct rtable *rt;
1151
1152 if (rth->fl.fl4_dst != daddr ||
1153 rth->fl.fl4_src != skeys[i] ||
1154 rth->fl.fl4_tos != tos ||
1155 rth->fl.oif != ikeys[k] ||
1156 rth->fl.iif != 0) {
1157 rthp = &rth->u.rt_next;
1158 continue;
1159 }
1160
1161 if (rth->rt_dst != daddr ||
1162 rth->rt_src != saddr ||
1163 rth->u.dst.error ||
1164 rth->rt_gateway != old_gw ||
1165 rth->u.dst.dev != dev)
1166 break;
1167
1168 dst_hold(&rth->u.dst);
1169 rcu_read_unlock();
1170
1171 rt = dst_alloc(&ipv4_dst_ops);
1172 if (rt == NULL) {
1173 ip_rt_put(rth);
1174 in_dev_put(in_dev);
1175 return;
1176 }
1177
1178 /* Copy all the information. */
1179 *rt = *rth;
1180 INIT_RCU_HEAD(&rt->u.dst.rcu_head);
1181 rt->u.dst.__use = 1;
1182 atomic_set(&rt->u.dst.__refcnt, 1);
1183 rt->u.dst.child = NULL;
1184 if (rt->u.dst.dev)
1185 dev_hold(rt->u.dst.dev);
1186 if (rt->idev)
1187 in_dev_hold(rt->idev);
1188 rt->u.dst.obsolete = 0;
1189 rt->u.dst.lastuse = jiffies;
1190 rt->u.dst.path = &rt->u.dst;
1191 rt->u.dst.neighbour = NULL;
1192 rt->u.dst.hh = NULL;
1193 rt->u.dst.xfrm = NULL;
1194
1195 rt->rt_flags |= RTCF_REDIRECTED;
1196
1197 /* Gateway is different ... */
1198 rt->rt_gateway = new_gw;
1199
1200 /* Redirect received -> path was valid */
1201 dst_confirm(&rth->u.dst);
1202
1203 if (rt->peer)
1204 atomic_inc(&rt->peer->refcnt);
1205
1206 if (arp_bind_neighbour(&rt->u.dst) ||
1207 !(rt->u.dst.neighbour->nud_state &
1208 NUD_VALID)) {
1209 if (rt->u.dst.neighbour)
1210 neigh_event_send(rt->u.dst.neighbour, NULL);
1211 ip_rt_put(rth);
1212 rt_drop(rt);
1213 goto do_next;
1214 }
1215
1216 rt_del(hash, rth);
1217 if (!rt_intern_hash(hash, rt, &rt))
1218 ip_rt_put(rt);
1219 goto do_next;
1220 }
1221 rcu_read_unlock();
1222 do_next:
1223 ;
1224 }
1225 }
1226 in_dev_put(in_dev);
1227 return;
1228
1229reject_redirect:
1230#ifdef CONFIG_IP_ROUTE_VERBOSE
1231 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
1232 printk(KERN_INFO "Redirect from %u.%u.%u.%u on %s about "
1233 "%u.%u.%u.%u ignored.\n"
1234 " Advised path = %u.%u.%u.%u -> %u.%u.%u.%u, "
1235 "tos %02x\n",
1236 NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw),
1237 NIPQUAD(saddr), NIPQUAD(daddr), tos);
1238#endif
1239 in_dev_put(in_dev);
1240}
1241
1242static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1243{
1244 struct rtable *rt = (struct rtable*)dst;
1245 struct dst_entry *ret = dst;
1246
1247 if (rt) {
1248 if (dst->obsolete) {
1249 ip_rt_put(rt);
1250 ret = NULL;
1251 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
1252 rt->u.dst.expires) {
1253 unsigned hash = rt_hash_code(rt->fl.fl4_dst,
1254 rt->fl.fl4_src ^
1255 (rt->fl.oif << 5),
1256 rt->fl.fl4_tos);
1257#if RT_CACHE_DEBUG >= 1
1258 printk(KERN_DEBUG "ip_rt_advice: redirect to "
1259 "%u.%u.%u.%u/%02x dropped\n",
1260 NIPQUAD(rt->rt_dst), rt->fl.fl4_tos);
1261#endif
1262 rt_del(hash, rt);
1263 ret = NULL;
1264 }
1265 }
1266 return ret;
1267}
1268
1269/*
1270 * Algorithm:
1271 * 1. The first ip_rt_redirect_number redirects are sent
1272 * with exponential backoff, then we stop sending them at all,
1273 * assuming that the host ignores our redirects.
1274 * 2. If we did not see packets requiring redirects
1275 * during ip_rt_redirect_silence, we assume that the host
1276 * forgot redirected route and start to send redirects again.
1277 *
1278 * This algorithm is much cheaper and more intelligent than dumb load limiting
1279 * in icmp.c.
1280 *
1281 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
1282 * and "frag. need" (breaks PMTU discovery) in icmp.c.
1283 */
1284
1285void ip_rt_send_redirect(struct sk_buff *skb)
1286{
1287 struct rtable *rt = (struct rtable*)skb->dst;
1288 struct in_device *in_dev = in_dev_get(rt->u.dst.dev);
1289
1290 if (!in_dev)
1291 return;
1292
1293 if (!IN_DEV_TX_REDIRECTS(in_dev))
1294 goto out;
1295
1296 /* No redirected packets during ip_rt_redirect_silence;
1297 * reset the algorithm.
1298 */
1299 if (time_after(jiffies, rt->u.dst.rate_last + ip_rt_redirect_silence))
1300 rt->u.dst.rate_tokens = 0;
1301
1302 /* Too many ignored redirects; do not send anything
1303 * set u.dst.rate_last to the last seen redirected packet.
1304 */
1305 if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) {
1306 rt->u.dst.rate_last = jiffies;
1307 goto out;
1308 }
1309
1310 /* Check for load limit; set rate_last to the latest sent
1311 * redirect.
1312 */
1313 if (time_after(jiffies,
1314 (rt->u.dst.rate_last +
1315 (ip_rt_redirect_load << rt->u.dst.rate_tokens)))) {
1316 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1317 rt->u.dst.rate_last = jiffies;
1318 ++rt->u.dst.rate_tokens;
1319#ifdef CONFIG_IP_ROUTE_VERBOSE
1320 if (IN_DEV_LOG_MARTIANS(in_dev) &&
1321 rt->u.dst.rate_tokens == ip_rt_redirect_number &&
1322 net_ratelimit())
1323 printk(KERN_WARNING "host %u.%u.%u.%u/if%d ignores "
1324 "redirects for %u.%u.%u.%u to %u.%u.%u.%u.\n",
1325 NIPQUAD(rt->rt_src), rt->rt_iif,
1326 NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_gateway));
1327#endif
1328 }
1329out:
1330 in_dev_put(in_dev);
1331}
1332
1333static int ip_error(struct sk_buff *skb)
1334{
1335 struct rtable *rt = (struct rtable*)skb->dst;
1336 unsigned long now;
1337 int code;
1338
1339 switch (rt->u.dst.error) {
1340 case EINVAL:
1341 default:
1342 goto out;
1343 case EHOSTUNREACH:
1344 code = ICMP_HOST_UNREACH;
1345 break;
1346 case ENETUNREACH:
1347 code = ICMP_NET_UNREACH;
1348 break;
1349 case EACCES:
1350 code = ICMP_PKT_FILTERED;
1351 break;
1352 }
1353
1354 now = jiffies;
1355 rt->u.dst.rate_tokens += now - rt->u.dst.rate_last;
1356 if (rt->u.dst.rate_tokens > ip_rt_error_burst)
1357 rt->u.dst.rate_tokens = ip_rt_error_burst;
1358 rt->u.dst.rate_last = now;
1359 if (rt->u.dst.rate_tokens >= ip_rt_error_cost) {
1360 rt->u.dst.rate_tokens -= ip_rt_error_cost;
1361 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
1362 }
1363
1364out: kfree_skb(skb);
1365 return 0;
1366}
1367
1368/*
1369 * The last two values are not from the RFC but
1370 * are needed for AMPRnet AX.25 paths.
1371 */
1372
9b5b5cff 1373static const unsigned short mtu_plateau[] =
1da177e4
LT
1374{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 };
1375
1376static __inline__ unsigned short guess_mtu(unsigned short old_mtu)
1377{
1378 int i;
1379
1380 for (i = 0; i < ARRAY_SIZE(mtu_plateau); i++)
1381 if (old_mtu > mtu_plateau[i])
1382 return mtu_plateau[i];
1383 return 68;
1384}
1385
1386unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
1387{
1388 int i;
1389 unsigned short old_mtu = ntohs(iph->tot_len);
1390 struct rtable *rth;
1391 u32 skeys[2] = { iph->saddr, 0, };
1392 u32 daddr = iph->daddr;
1393 u8 tos = iph->tos & IPTOS_RT_MASK;
1394 unsigned short est_mtu = 0;
1395
1396 if (ipv4_config.no_pmtu_disc)
1397 return 0;
1398
1399 for (i = 0; i < 2; i++) {
1400 unsigned hash = rt_hash_code(daddr, skeys[i], tos);
1401
1402 rcu_read_lock();
1403 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
1404 rth = rcu_dereference(rth->u.rt_next)) {
1405 if (rth->fl.fl4_dst == daddr &&
1406 rth->fl.fl4_src == skeys[i] &&
1407 rth->rt_dst == daddr &&
1408 rth->rt_src == iph->saddr &&
1409 rth->fl.fl4_tos == tos &&
1410 rth->fl.iif == 0 &&
1411 !(dst_metric_locked(&rth->u.dst, RTAX_MTU))) {
1412 unsigned short mtu = new_mtu;
1413
1414 if (new_mtu < 68 || new_mtu >= old_mtu) {
1415
1416 /* BSD 4.2 compatibility hack :-( */
1417 if (mtu == 0 &&
1418 old_mtu >= rth->u.dst.metrics[RTAX_MTU-1] &&
1419 old_mtu >= 68 + (iph->ihl << 2))
1420 old_mtu -= iph->ihl << 2;
1421
1422 mtu = guess_mtu(old_mtu);
1423 }
1424 if (mtu <= rth->u.dst.metrics[RTAX_MTU-1]) {
1425 if (mtu < rth->u.dst.metrics[RTAX_MTU-1]) {
1426 dst_confirm(&rth->u.dst);
1427 if (mtu < ip_rt_min_pmtu) {
1428 mtu = ip_rt_min_pmtu;
1429 rth->u.dst.metrics[RTAX_LOCK-1] |=
1430 (1 << RTAX_MTU);
1431 }
1432 rth->u.dst.metrics[RTAX_MTU-1] = mtu;
1433 dst_set_expires(&rth->u.dst,
1434 ip_rt_mtu_expires);
1435 }
1436 est_mtu = mtu;
1437 }
1438 }
1439 }
1440 rcu_read_unlock();
1441 }
1442 return est_mtu ? : new_mtu;
1443}
1444
1445static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1446{
1447 if (dst->metrics[RTAX_MTU-1] > mtu && mtu >= 68 &&
1448 !(dst_metric_locked(dst, RTAX_MTU))) {
1449 if (mtu < ip_rt_min_pmtu) {
1450 mtu = ip_rt_min_pmtu;
1451 dst->metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU);
1452 }
1453 dst->metrics[RTAX_MTU-1] = mtu;
1454 dst_set_expires(dst, ip_rt_mtu_expires);
1455 }
1456}
1457
1458static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1459{
1460 return NULL;
1461}
1462
1463static void ipv4_dst_destroy(struct dst_entry *dst)
1464{
1465 struct rtable *rt = (struct rtable *) dst;
1466 struct inet_peer *peer = rt->peer;
1467 struct in_device *idev = rt->idev;
1468
1469 if (peer) {
1470 rt->peer = NULL;
1471 inet_putpeer(peer);
1472 }
1473
1474 if (idev) {
1475 rt->idev = NULL;
1476 in_dev_put(idev);
1477 }
1478}
1479
1480static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
1481 int how)
1482{
1483 struct rtable *rt = (struct rtable *) dst;
1484 struct in_device *idev = rt->idev;
1485 if (dev != &loopback_dev && idev && idev->dev == dev) {
1486 struct in_device *loopback_idev = in_dev_get(&loopback_dev);
1487 if (loopback_idev) {
1488 rt->idev = loopback_idev;
1489 in_dev_put(idev);
1490 }
1491 }
1492}
1493
1494static void ipv4_link_failure(struct sk_buff *skb)
1495{
1496 struct rtable *rt;
1497
1498 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1499
1500 rt = (struct rtable *) skb->dst;
1501 if (rt)
1502 dst_set_expires(&rt->u.dst, 0);
1503}
1504
1505static int ip_rt_bug(struct sk_buff *skb)
1506{
1507 printk(KERN_DEBUG "ip_rt_bug: %u.%u.%u.%u -> %u.%u.%u.%u, %s\n",
1508 NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr),
1509 skb->dev ? skb->dev->name : "?");
1510 kfree_skb(skb);
1511 return 0;
1512}
1513
1514/*
1515 We do not cache source address of outgoing interface,
1516 because it is used only by IP RR, TS and SRR options,
1517 so that it out of fast path.
1518
1519 BTW remember: "addr" is allowed to be not aligned
1520 in IP options!
1521 */
1522
1523void ip_rt_get_source(u8 *addr, struct rtable *rt)
1524{
1525 u32 src;
1526 struct fib_result res;
1527
1528 if (rt->fl.iif == 0)
1529 src = rt->rt_src;
1530 else if (fib_lookup(&rt->fl, &res) == 0) {
1531 src = FIB_RES_PREFSRC(res);
1532 fib_res_put(&res);
1533 } else
1534 src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway,
1535 RT_SCOPE_UNIVERSE);
1536 memcpy(addr, &src, 4);
1537}
1538
1539#ifdef CONFIG_NET_CLS_ROUTE
1540static void set_class_tag(struct rtable *rt, u32 tag)
1541{
1542 if (!(rt->u.dst.tclassid & 0xFFFF))
1543 rt->u.dst.tclassid |= tag & 0xFFFF;
1544 if (!(rt->u.dst.tclassid & 0xFFFF0000))
1545 rt->u.dst.tclassid |= tag & 0xFFFF0000;
1546}
1547#endif
1548
1549static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1550{
1551 struct fib_info *fi = res->fi;
1552
1553 if (fi) {
1554 if (FIB_RES_GW(*res) &&
1555 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
1556 rt->rt_gateway = FIB_RES_GW(*res);
1557 memcpy(rt->u.dst.metrics, fi->fib_metrics,
1558 sizeof(rt->u.dst.metrics));
1559 if (fi->fib_mtu == 0) {
1560 rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu;
1561 if (rt->u.dst.metrics[RTAX_LOCK-1] & (1 << RTAX_MTU) &&
1562 rt->rt_gateway != rt->rt_dst &&
1563 rt->u.dst.dev->mtu > 576)
1564 rt->u.dst.metrics[RTAX_MTU-1] = 576;
1565 }
1566#ifdef CONFIG_NET_CLS_ROUTE
1567 rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
1568#endif
1569 } else
1570 rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu;
1571
1572 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1573 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
1574 if (rt->u.dst.metrics[RTAX_MTU-1] > IP_MAX_MTU)
1575 rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
1576 if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0)
1577 rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40,
1578 ip_rt_min_advmss);
1579 if (rt->u.dst.metrics[RTAX_ADVMSS-1] > 65535 - 40)
1580 rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
1581
1582#ifdef CONFIG_NET_CLS_ROUTE
1583#ifdef CONFIG_IP_MULTIPLE_TABLES
1584 set_class_tag(rt, fib_rules_tclass(res));
1585#endif
1586 set_class_tag(rt, itag);
1587#endif
1588 rt->rt_type = res->type;
1589}
1590
1591static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr,
1592 u8 tos, struct net_device *dev, int our)
1593{
1594 unsigned hash;
1595 struct rtable *rth;
1596 u32 spec_dst;
1597 struct in_device *in_dev = in_dev_get(dev);
1598 u32 itag = 0;
1599
1600 /* Primary sanity checks. */
1601
1602 if (in_dev == NULL)
1603 return -EINVAL;
1604
1605 if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr) ||
1606 skb->protocol != htons(ETH_P_IP))
1607 goto e_inval;
1608
1609 if (ZERONET(saddr)) {
1610 if (!LOCAL_MCAST(daddr))
1611 goto e_inval;
1612 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1613 } else if (fib_validate_source(saddr, 0, tos, 0,
1614 dev, &spec_dst, &itag) < 0)
1615 goto e_inval;
1616
1617 rth = dst_alloc(&ipv4_dst_ops);
1618 if (!rth)
1619 goto e_nobufs;
1620
1621 rth->u.dst.output= ip_rt_bug;
1622
1623 atomic_set(&rth->u.dst.__refcnt, 1);
1624 rth->u.dst.flags= DST_HOST;
1625 if (in_dev->cnf.no_policy)
1626 rth->u.dst.flags |= DST_NOPOLICY;
1627 rth->fl.fl4_dst = daddr;
1628 rth->rt_dst = daddr;
1629 rth->fl.fl4_tos = tos;
1630#ifdef CONFIG_IP_ROUTE_FWMARK
1631 rth->fl.fl4_fwmark= skb->nfmark;
1632#endif
1633 rth->fl.fl4_src = saddr;
1634 rth->rt_src = saddr;
1635#ifdef CONFIG_NET_CLS_ROUTE
1636 rth->u.dst.tclassid = itag;
1637#endif
1638 rth->rt_iif =
1639 rth->fl.iif = dev->ifindex;
1640 rth->u.dst.dev = &loopback_dev;
1641 dev_hold(rth->u.dst.dev);
1642 rth->idev = in_dev_get(rth->u.dst.dev);
1643 rth->fl.oif = 0;
1644 rth->rt_gateway = daddr;
1645 rth->rt_spec_dst= spec_dst;
1646 rth->rt_type = RTN_MULTICAST;
1647 rth->rt_flags = RTCF_MULTICAST;
1648 if (our) {
1649 rth->u.dst.input= ip_local_deliver;
1650 rth->rt_flags |= RTCF_LOCAL;
1651 }
1652
1653#ifdef CONFIG_IP_MROUTE
1654 if (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev))
1655 rth->u.dst.input = ip_mr_input;
1656#endif
1657 RT_CACHE_STAT_INC(in_slow_mc);
1658
1659 in_dev_put(in_dev);
1660 hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5), tos);
1661 return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst);
1662
1663e_nobufs:
1664 in_dev_put(in_dev);
1665 return -ENOBUFS;
1666
1667e_inval:
1668 in_dev_put(in_dev);
1669 return -EINVAL;
1670}
1671
1672
1673static void ip_handle_martian_source(struct net_device *dev,
1674 struct in_device *in_dev,
1675 struct sk_buff *skb,
1676 u32 daddr,
1677 u32 saddr)
1678{
1679 RT_CACHE_STAT_INC(in_martian_src);
1680#ifdef CONFIG_IP_ROUTE_VERBOSE
1681 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1682 /*
1683 * RFC1812 recommendation, if source is martian,
1684 * the only hint is MAC header.
1685 */
1686 printk(KERN_WARNING "martian source %u.%u.%u.%u from "
1687 "%u.%u.%u.%u, on dev %s\n",
1688 NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
0b7f22aa 1689 if (dev->hard_header_len && skb->mac.raw) {
1da177e4
LT
1690 int i;
1691 unsigned char *p = skb->mac.raw;
1692 printk(KERN_WARNING "ll header: ");
1693 for (i = 0; i < dev->hard_header_len; i++, p++) {
1694 printk("%02x", *p);
1695 if (i < (dev->hard_header_len - 1))
1696 printk(":");
1697 }
1698 printk("\n");
1699 }
1700 }
1701#endif
1702}
1703
1704static inline int __mkroute_input(struct sk_buff *skb,
1705 struct fib_result* res,
1706 struct in_device *in_dev,
1707 u32 daddr, u32 saddr, u32 tos,
1708 struct rtable **result)
1709{
1710
1711 struct rtable *rth;
1712 int err;
1713 struct in_device *out_dev;
1714 unsigned flags = 0;
1715 u32 spec_dst, itag;
1716
1717 /* get a working reference to the output device */
1718 out_dev = in_dev_get(FIB_RES_DEV(*res));
1719 if (out_dev == NULL) {
1720 if (net_ratelimit())
1721 printk(KERN_CRIT "Bug in ip_route_input" \
1722 "_slow(). Please, report\n");
1723 return -EINVAL;
1724 }
1725
1726
1727 err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res),
1728 in_dev->dev, &spec_dst, &itag);
1729 if (err < 0) {
1730 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
1731 saddr);
1732
1733 err = -EINVAL;
1734 goto cleanup;
1735 }
1736
1737 if (err)
1738 flags |= RTCF_DIRECTSRC;
1739
1740 if (out_dev == in_dev && err && !(flags & (RTCF_NAT | RTCF_MASQ)) &&
1741 (IN_DEV_SHARED_MEDIA(out_dev) ||
1742 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1743 flags |= RTCF_DOREDIRECT;
1744
1745 if (skb->protocol != htons(ETH_P_IP)) {
1746 /* Not IP (i.e. ARP). Do not create route, if it is
1747 * invalid for proxy arp. DNAT routes are always valid.
1748 */
1749 if (out_dev == in_dev && !(flags & RTCF_DNAT)) {
1750 err = -EINVAL;
1751 goto cleanup;
1752 }
1753 }
1754
1755
1756 rth = dst_alloc(&ipv4_dst_ops);
1757 if (!rth) {
1758 err = -ENOBUFS;
1759 goto cleanup;
1760 }
1761
ce723d8e 1762 atomic_set(&rth->u.dst.__refcnt, 1);
1da177e4
LT
1763 rth->u.dst.flags= DST_HOST;
1764#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
1765 if (res->fi->fib_nhs > 1)
1766 rth->u.dst.flags |= DST_BALANCED;
1767#endif
1768 if (in_dev->cnf.no_policy)
1769 rth->u.dst.flags |= DST_NOPOLICY;
1770 if (in_dev->cnf.no_xfrm)
1771 rth->u.dst.flags |= DST_NOXFRM;
1772 rth->fl.fl4_dst = daddr;
1773 rth->rt_dst = daddr;
1774 rth->fl.fl4_tos = tos;
1775#ifdef CONFIG_IP_ROUTE_FWMARK
1776 rth->fl.fl4_fwmark= skb->nfmark;
1777#endif
1778 rth->fl.fl4_src = saddr;
1779 rth->rt_src = saddr;
1780 rth->rt_gateway = daddr;
1781 rth->rt_iif =
1782 rth->fl.iif = in_dev->dev->ifindex;
1783 rth->u.dst.dev = (out_dev)->dev;
1784 dev_hold(rth->u.dst.dev);
1785 rth->idev = in_dev_get(rth->u.dst.dev);
1786 rth->fl.oif = 0;
1787 rth->rt_spec_dst= spec_dst;
1788
1789 rth->u.dst.input = ip_forward;
1790 rth->u.dst.output = ip_output;
1791
1792 rt_set_nexthop(rth, res, itag);
1793
1794 rth->rt_flags = flags;
1795
1796 *result = rth;
1797 err = 0;
1798 cleanup:
1799 /* release the working reference to the output device */
1800 in_dev_put(out_dev);
1801 return err;
1802}
1803
1804static inline int ip_mkroute_input_def(struct sk_buff *skb,
1805 struct fib_result* res,
1806 const struct flowi *fl,
1807 struct in_device *in_dev,
1808 u32 daddr, u32 saddr, u32 tos)
1809{
7abaa27c 1810 struct rtable* rth = NULL;
1da177e4
LT
1811 int err;
1812 unsigned hash;
1813
1814#ifdef CONFIG_IP_ROUTE_MULTIPATH
1815 if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0)
1816 fib_select_multipath(fl, res);
1817#endif
1818
1819 /* create a routing cache entry */
1820 err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth);
1821 if (err)
1822 return err;
1da177e4
LT
1823
1824 /* put it into the cache */
1825 hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos);
1826 return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
1827}
1828
1829static inline int ip_mkroute_input(struct sk_buff *skb,
1830 struct fib_result* res,
1831 const struct flowi *fl,
1832 struct in_device *in_dev,
1833 u32 daddr, u32 saddr, u32 tos)
1834{
1835#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
ce723d8e
JA
1836 struct rtable* rth = NULL, *rtres;
1837 unsigned char hop, hopcount;
1da177e4
LT
1838 int err = -EINVAL;
1839 unsigned int hash;
1840
1841 if (res->fi)
1842 hopcount = res->fi->fib_nhs;
1843 else
1844 hopcount = 1;
1845
1da177e4
LT
1846 /* distinguish between multipath and singlepath */
1847 if (hopcount < 2)
1848 return ip_mkroute_input_def(skb, res, fl, in_dev, daddr,
1849 saddr, tos);
1850
1851 /* add all alternatives to the routing cache */
1852 for (hop = 0; hop < hopcount; hop++) {
1853 res->nh_sel = hop;
1854
ce723d8e
JA
1855 /* put reference to previous result */
1856 if (hop)
1857 ip_rt_put(rtres);
1858
1da177e4
LT
1859 /* create a routing cache entry */
1860 err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos,
1861 &rth);
1862 if (err)
1863 return err;
1864
1865 /* put it into the cache */
1866 hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos);
ce723d8e 1867 err = rt_intern_hash(hash, rth, &rtres);
1da177e4
LT
1868 if (err)
1869 return err;
1870
1871 /* forward hop information to multipath impl. */
1872 multipath_set_nhinfo(rth,
1873 FIB_RES_NETWORK(*res),
1874 FIB_RES_NETMASK(*res),
1875 res->prefixlen,
1876 &FIB_RES_NH(*res));
1da177e4 1877 }
ce723d8e 1878 skb->dst = &rtres->u.dst;
1da177e4
LT
1879 return err;
1880#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
1881 return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos);
1882#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
1883}
1884
1885
1886/*
1887 * NOTE. We drop all the packets that has local source
1888 * addresses, because every properly looped back packet
1889 * must have correct destination already attached by output routine.
1890 *
1891 * Such approach solves two big problems:
1892 * 1. Not simplex devices are handled properly.
1893 * 2. IP spoofing attempts are filtered with 100% of guarantee.
1894 */
1895
1896static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr,
1897 u8 tos, struct net_device *dev)
1898{
1899 struct fib_result res;
1900 struct in_device *in_dev = in_dev_get(dev);
1901 struct flowi fl = { .nl_u = { .ip4_u =
1902 { .daddr = daddr,
1903 .saddr = saddr,
1904 .tos = tos,
1905 .scope = RT_SCOPE_UNIVERSE,
1906#ifdef CONFIG_IP_ROUTE_FWMARK
1907 .fwmark = skb->nfmark
1908#endif
1909 } },
1910 .iif = dev->ifindex };
1911 unsigned flags = 0;
1912 u32 itag = 0;
1913 struct rtable * rth;
1914 unsigned hash;
1915 u32 spec_dst;
1916 int err = -EINVAL;
1917 int free_res = 0;
1918
1919 /* IP on this device is disabled. */
1920
1921 if (!in_dev)
1922 goto out;
1923
1924 /* Check for the most weird martians, which can be not detected
1925 by fib_lookup.
1926 */
1927
1928 if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr))
1929 goto martian_source;
1930
1931 if (daddr == 0xFFFFFFFF || (saddr == 0 && daddr == 0))
1932 goto brd_input;
1933
1934 /* Accept zero addresses only to limited broadcast;
1935 * I even do not know to fix it or not. Waiting for complains :-)
1936 */
1937 if (ZERONET(saddr))
1938 goto martian_source;
1939
1940 if (BADCLASS(daddr) || ZERONET(daddr) || LOOPBACK(daddr))
1941 goto martian_destination;
1942
1943 /*
1944 * Now we are ready to route packet.
1945 */
1946 if ((err = fib_lookup(&fl, &res)) != 0) {
1947 if (!IN_DEV_FORWARD(in_dev))
2c2910a4 1948 goto e_hostunreach;
1da177e4
LT
1949 goto no_route;
1950 }
1951 free_res = 1;
1952
1953 RT_CACHE_STAT_INC(in_slow_tot);
1954
1955 if (res.type == RTN_BROADCAST)
1956 goto brd_input;
1957
1958 if (res.type == RTN_LOCAL) {
1959 int result;
1960 result = fib_validate_source(saddr, daddr, tos,
1961 loopback_dev.ifindex,
1962 dev, &spec_dst, &itag);
1963 if (result < 0)
1964 goto martian_source;
1965 if (result)
1966 flags |= RTCF_DIRECTSRC;
1967 spec_dst = daddr;
1968 goto local_input;
1969 }
1970
1971 if (!IN_DEV_FORWARD(in_dev))
2c2910a4 1972 goto e_hostunreach;
1da177e4
LT
1973 if (res.type != RTN_UNICAST)
1974 goto martian_destination;
1975
1976 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
1977 if (err == -ENOBUFS)
1978 goto e_nobufs;
1979 if (err == -EINVAL)
1980 goto e_inval;
1981
1982done:
1983 in_dev_put(in_dev);
1984 if (free_res)
1985 fib_res_put(&res);
1986out: return err;
1987
1988brd_input:
1989 if (skb->protocol != htons(ETH_P_IP))
1990 goto e_inval;
1991
1992 if (ZERONET(saddr))
1993 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1994 else {
1995 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
1996 &itag);
1997 if (err < 0)
1998 goto martian_source;
1999 if (err)
2000 flags |= RTCF_DIRECTSRC;
2001 }
2002 flags |= RTCF_BROADCAST;
2003 res.type = RTN_BROADCAST;
2004 RT_CACHE_STAT_INC(in_brd);
2005
2006local_input:
2007 rth = dst_alloc(&ipv4_dst_ops);
2008 if (!rth)
2009 goto e_nobufs;
2010
2011 rth->u.dst.output= ip_rt_bug;
2012
2013 atomic_set(&rth->u.dst.__refcnt, 1);
2014 rth->u.dst.flags= DST_HOST;
2015 if (in_dev->cnf.no_policy)
2016 rth->u.dst.flags |= DST_NOPOLICY;
2017 rth->fl.fl4_dst = daddr;
2018 rth->rt_dst = daddr;
2019 rth->fl.fl4_tos = tos;
2020#ifdef CONFIG_IP_ROUTE_FWMARK
2021 rth->fl.fl4_fwmark= skb->nfmark;
2022#endif
2023 rth->fl.fl4_src = saddr;
2024 rth->rt_src = saddr;
2025#ifdef CONFIG_NET_CLS_ROUTE
2026 rth->u.dst.tclassid = itag;
2027#endif
2028 rth->rt_iif =
2029 rth->fl.iif = dev->ifindex;
2030 rth->u.dst.dev = &loopback_dev;
2031 dev_hold(rth->u.dst.dev);
2032 rth->idev = in_dev_get(rth->u.dst.dev);
2033 rth->rt_gateway = daddr;
2034 rth->rt_spec_dst= spec_dst;
2035 rth->u.dst.input= ip_local_deliver;
2036 rth->rt_flags = flags|RTCF_LOCAL;
2037 if (res.type == RTN_UNREACHABLE) {
2038 rth->u.dst.input= ip_error;
2039 rth->u.dst.error= -err;
2040 rth->rt_flags &= ~RTCF_LOCAL;
2041 }
2042 rth->rt_type = res.type;
2043 hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5), tos);
2044 err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
2045 goto done;
2046
2047no_route:
2048 RT_CACHE_STAT_INC(in_no_route);
2049 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
2050 res.type = RTN_UNREACHABLE;
2051 goto local_input;
2052
2053 /*
2054 * Do not cache martian addresses: they should be logged (RFC1812)
2055 */
2056martian_destination:
2057 RT_CACHE_STAT_INC(in_martian_dst);
2058#ifdef CONFIG_IP_ROUTE_VERBOSE
2059 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
2060 printk(KERN_WARNING "martian destination %u.%u.%u.%u from "
2061 "%u.%u.%u.%u, dev %s\n",
2062 NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
2063#endif
2c2910a4
DE
2064
2065e_hostunreach:
2066 err = -EHOSTUNREACH;
2067 goto done;
2068
1da177e4
LT
2069e_inval:
2070 err = -EINVAL;
2071 goto done;
2072
2073e_nobufs:
2074 err = -ENOBUFS;
2075 goto done;
2076
2077martian_source:
2078 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
2079 goto e_inval;
2080}
2081
2082int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
2083 u8 tos, struct net_device *dev)
2084{
2085 struct rtable * rth;
2086 unsigned hash;
2087 int iif = dev->ifindex;
2088
2089 tos &= IPTOS_RT_MASK;
2090 hash = rt_hash_code(daddr, saddr ^ (iif << 5), tos);
2091
2092 rcu_read_lock();
2093 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
2094 rth = rcu_dereference(rth->u.rt_next)) {
2095 if (rth->fl.fl4_dst == daddr &&
2096 rth->fl.fl4_src == saddr &&
2097 rth->fl.iif == iif &&
2098 rth->fl.oif == 0 &&
2099#ifdef CONFIG_IP_ROUTE_FWMARK
2100 rth->fl.fl4_fwmark == skb->nfmark &&
2101#endif
2102 rth->fl.fl4_tos == tos) {
2103 rth->u.dst.lastuse = jiffies;
2104 dst_hold(&rth->u.dst);
2105 rth->u.dst.__use++;
2106 RT_CACHE_STAT_INC(in_hit);
2107 rcu_read_unlock();
2108 skb->dst = (struct dst_entry*)rth;
2109 return 0;
2110 }
2111 RT_CACHE_STAT_INC(in_hlist_search);
2112 }
2113 rcu_read_unlock();
2114
2115 /* Multicast recognition logic is moved from route cache to here.
2116 The problem was that too many Ethernet cards have broken/missing
2117 hardware multicast filters :-( As result the host on multicasting
2118 network acquires a lot of useless route cache entries, sort of
2119 SDR messages from all the world. Now we try to get rid of them.
2120 Really, provided software IP multicast filter is organized
2121 reasonably (at least, hashed), it does not result in a slowdown
2122 comparing with route cache reject entries.
2123 Note, that multicast routers are not affected, because
2124 route cache entry is created eventually.
2125 */
2126 if (MULTICAST(daddr)) {
2127 struct in_device *in_dev;
2128
2129 rcu_read_lock();
e5ed6399 2130 if ((in_dev = __in_dev_get_rcu(dev)) != NULL) {
1da177e4
LT
2131 int our = ip_check_mc(in_dev, daddr, saddr,
2132 skb->nh.iph->protocol);
2133 if (our
2134#ifdef CONFIG_IP_MROUTE
2135 || (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev))
2136#endif
2137 ) {
2138 rcu_read_unlock();
2139 return ip_route_input_mc(skb, daddr, saddr,
2140 tos, dev, our);
2141 }
2142 }
2143 rcu_read_unlock();
2144 return -EINVAL;
2145 }
2146 return ip_route_input_slow(skb, daddr, saddr, tos, dev);
2147}
2148
2149static inline int __mkroute_output(struct rtable **result,
2150 struct fib_result* res,
2151 const struct flowi *fl,
2152 const struct flowi *oldflp,
2153 struct net_device *dev_out,
2154 unsigned flags)
2155{
2156 struct rtable *rth;
2157 struct in_device *in_dev;
2158 u32 tos = RT_FL_TOS(oldflp);
2159 int err = 0;
2160
2161 if (LOOPBACK(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK))
2162 return -EINVAL;
2163
2164 if (fl->fl4_dst == 0xFFFFFFFF)
2165 res->type = RTN_BROADCAST;
2166 else if (MULTICAST(fl->fl4_dst))
2167 res->type = RTN_MULTICAST;
2168 else if (BADCLASS(fl->fl4_dst) || ZERONET(fl->fl4_dst))
2169 return -EINVAL;
2170
2171 if (dev_out->flags & IFF_LOOPBACK)
2172 flags |= RTCF_LOCAL;
2173
2174 /* get work reference to inet device */
2175 in_dev = in_dev_get(dev_out);
2176 if (!in_dev)
2177 return -EINVAL;
2178
2179 if (res->type == RTN_BROADCAST) {
2180 flags |= RTCF_BROADCAST | RTCF_LOCAL;
2181 if (res->fi) {
2182 fib_info_put(res->fi);
2183 res->fi = NULL;
2184 }
2185 } else if (res->type == RTN_MULTICAST) {
2186 flags |= RTCF_MULTICAST|RTCF_LOCAL;
2187 if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src,
2188 oldflp->proto))
2189 flags &= ~RTCF_LOCAL;
2190 /* If multicast route do not exist use
2191 default one, but do not gateway in this case.
2192 Yes, it is hack.
2193 */
2194 if (res->fi && res->prefixlen < 4) {
2195 fib_info_put(res->fi);
2196 res->fi = NULL;
2197 }
2198 }
2199
2200
2201 rth = dst_alloc(&ipv4_dst_ops);
2202 if (!rth) {
2203 err = -ENOBUFS;
2204 goto cleanup;
2205 }
2206
ce723d8e 2207 atomic_set(&rth->u.dst.__refcnt, 1);
1da177e4
LT
2208 rth->u.dst.flags= DST_HOST;
2209#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
2210 if (res->fi) {
2211 rth->rt_multipath_alg = res->fi->fib_mp_alg;
2212 if (res->fi->fib_nhs > 1)
2213 rth->u.dst.flags |= DST_BALANCED;
2214 }
2215#endif
2216 if (in_dev->cnf.no_xfrm)
2217 rth->u.dst.flags |= DST_NOXFRM;
2218 if (in_dev->cnf.no_policy)
2219 rth->u.dst.flags |= DST_NOPOLICY;
2220
2221 rth->fl.fl4_dst = oldflp->fl4_dst;
2222 rth->fl.fl4_tos = tos;
2223 rth->fl.fl4_src = oldflp->fl4_src;
2224 rth->fl.oif = oldflp->oif;
2225#ifdef CONFIG_IP_ROUTE_FWMARK
2226 rth->fl.fl4_fwmark= oldflp->fl4_fwmark;
2227#endif
2228 rth->rt_dst = fl->fl4_dst;
2229 rth->rt_src = fl->fl4_src;
2230 rth->rt_iif = oldflp->oif ? : dev_out->ifindex;
2231 /* get references to the devices that are to be hold by the routing
2232 cache entry */
2233 rth->u.dst.dev = dev_out;
2234 dev_hold(dev_out);
2235 rth->idev = in_dev_get(dev_out);
2236 rth->rt_gateway = fl->fl4_dst;
2237 rth->rt_spec_dst= fl->fl4_src;
2238
2239 rth->u.dst.output=ip_output;
2240
2241 RT_CACHE_STAT_INC(out_slow_tot);
2242
2243 if (flags & RTCF_LOCAL) {
2244 rth->u.dst.input = ip_local_deliver;
2245 rth->rt_spec_dst = fl->fl4_dst;
2246 }
2247 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
2248 rth->rt_spec_dst = fl->fl4_src;
2249 if (flags & RTCF_LOCAL &&
2250 !(dev_out->flags & IFF_LOOPBACK)) {
2251 rth->u.dst.output = ip_mc_output;
2252 RT_CACHE_STAT_INC(out_slow_mc);
2253 }
2254#ifdef CONFIG_IP_MROUTE
2255 if (res->type == RTN_MULTICAST) {
2256 if (IN_DEV_MFORWARD(in_dev) &&
2257 !LOCAL_MCAST(oldflp->fl4_dst)) {
2258 rth->u.dst.input = ip_mr_input;
2259 rth->u.dst.output = ip_mc_output;
2260 }
2261 }
2262#endif
2263 }
2264
2265 rt_set_nexthop(rth, res, 0);
2266
2267 rth->rt_flags = flags;
2268
2269 *result = rth;
2270 cleanup:
2271 /* release work reference to inet device */
2272 in_dev_put(in_dev);
2273
2274 return err;
2275}
2276
2277static inline int ip_mkroute_output_def(struct rtable **rp,
2278 struct fib_result* res,
2279 const struct flowi *fl,
2280 const struct flowi *oldflp,
2281 struct net_device *dev_out,
2282 unsigned flags)
2283{
7abaa27c 2284 struct rtable *rth = NULL;
1da177e4
LT
2285 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
2286 unsigned hash;
2287 if (err == 0) {
2288 u32 tos = RT_FL_TOS(oldflp);
2289
1da177e4
LT
2290 hash = rt_hash_code(oldflp->fl4_dst,
2291 oldflp->fl4_src ^ (oldflp->oif << 5), tos);
2292 err = rt_intern_hash(hash, rth, rp);
2293 }
2294
2295 return err;
2296}
2297
2298static inline int ip_mkroute_output(struct rtable** rp,
2299 struct fib_result* res,
2300 const struct flowi *fl,
2301 const struct flowi *oldflp,
2302 struct net_device *dev_out,
2303 unsigned flags)
2304{
2305#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
2306 u32 tos = RT_FL_TOS(oldflp);
2307 unsigned char hop;
2308 unsigned hash;
2309 int err = -EINVAL;
7abaa27c 2310 struct rtable *rth = NULL;
1da177e4
LT
2311
2312 if (res->fi && res->fi->fib_nhs > 1) {
2313 unsigned char hopcount = res->fi->fib_nhs;
2314
2315 for (hop = 0; hop < hopcount; hop++) {
2316 struct net_device *dev2nexthop;
2317
2318 res->nh_sel = hop;
2319
2320 /* hold a work reference to the output device */
2321 dev2nexthop = FIB_RES_DEV(*res);
2322 dev_hold(dev2nexthop);
2323
ce723d8e
JA
2324 /* put reference to previous result */
2325 if (hop)
2326 ip_rt_put(*rp);
2327
1da177e4
LT
2328 err = __mkroute_output(&rth, res, fl, oldflp,
2329 dev2nexthop, flags);
2330
2331 if (err != 0)
2332 goto cleanup;
2333
2334 hash = rt_hash_code(oldflp->fl4_dst,
2335 oldflp->fl4_src ^
2336 (oldflp->oif << 5), tos);
2337 err = rt_intern_hash(hash, rth, rp);
2338
2339 /* forward hop information to multipath impl. */
2340 multipath_set_nhinfo(rth,
2341 FIB_RES_NETWORK(*res),
2342 FIB_RES_NETMASK(*res),
2343 res->prefixlen,
2344 &FIB_RES_NH(*res));
2345 cleanup:
2346 /* release work reference to output device */
2347 dev_put(dev2nexthop);
2348
2349 if (err != 0)
2350 return err;
2351 }
1da177e4
LT
2352 return err;
2353 } else {
2354 return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out,
2355 flags);
2356 }
2357#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
2358 return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, flags);
2359#endif
2360}
2361
2362/*
2363 * Major route resolver routine.
2364 */
2365
2366static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
2367{
2368 u32 tos = RT_FL_TOS(oldflp);
2369 struct flowi fl = { .nl_u = { .ip4_u =
2370 { .daddr = oldflp->fl4_dst,
2371 .saddr = oldflp->fl4_src,
2372 .tos = tos & IPTOS_RT_MASK,
2373 .scope = ((tos & RTO_ONLINK) ?
2374 RT_SCOPE_LINK :
2375 RT_SCOPE_UNIVERSE),
2376#ifdef CONFIG_IP_ROUTE_FWMARK
2377 .fwmark = oldflp->fl4_fwmark
2378#endif
2379 } },
2380 .iif = loopback_dev.ifindex,
2381 .oif = oldflp->oif };
2382 struct fib_result res;
2383 unsigned flags = 0;
2384 struct net_device *dev_out = NULL;
2385 int free_res = 0;
2386 int err;
2387
2388
2389 res.fi = NULL;
2390#ifdef CONFIG_IP_MULTIPLE_TABLES
2391 res.r = NULL;
2392#endif
2393
2394 if (oldflp->fl4_src) {
2395 err = -EINVAL;
2396 if (MULTICAST(oldflp->fl4_src) ||
2397 BADCLASS(oldflp->fl4_src) ||
2398 ZERONET(oldflp->fl4_src))
2399 goto out;
2400
2401 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2402 dev_out = ip_dev_find(oldflp->fl4_src);
2403 if (dev_out == NULL)
2404 goto out;
2405
2406 /* I removed check for oif == dev_out->oif here.
2407 It was wrong for two reasons:
2408 1. ip_dev_find(saddr) can return wrong iface, if saddr is
2409 assigned to multiple interfaces.
2410 2. Moreover, we are allowed to send packets with saddr
2411 of another iface. --ANK
2412 */
2413
2414 if (oldflp->oif == 0
2415 && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == 0xFFFFFFFF)) {
2416 /* Special hack: user can direct multicasts
2417 and limited broadcast via necessary interface
2418 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
2419 This hack is not just for fun, it allows
2420 vic,vat and friends to work.
2421 They bind socket to loopback, set ttl to zero
2422 and expect that it will work.
2423 From the viewpoint of routing cache they are broken,
2424 because we are not allowed to build multicast path
2425 with loopback source addr (look, routing cache
2426 cannot know, that ttl is zero, so that packet
2427 will not leave this host and route is valid).
2428 Luckily, this hack is good workaround.
2429 */
2430
2431 fl.oif = dev_out->ifindex;
2432 goto make_route;
2433 }
2434 if (dev_out)
2435 dev_put(dev_out);
2436 dev_out = NULL;
2437 }
2438
2439
2440 if (oldflp->oif) {
2441 dev_out = dev_get_by_index(oldflp->oif);
2442 err = -ENODEV;
2443 if (dev_out == NULL)
2444 goto out;
e5ed6399
HX
2445
2446 /* RACE: Check return value of inet_select_addr instead. */
2447 if (__in_dev_get_rtnl(dev_out) == NULL) {
1da177e4
LT
2448 dev_put(dev_out);
2449 goto out; /* Wrong error code */
2450 }
2451
2452 if (LOCAL_MCAST(oldflp->fl4_dst) || oldflp->fl4_dst == 0xFFFFFFFF) {
2453 if (!fl.fl4_src)
2454 fl.fl4_src = inet_select_addr(dev_out, 0,
2455 RT_SCOPE_LINK);
2456 goto make_route;
2457 }
2458 if (!fl.fl4_src) {
2459 if (MULTICAST(oldflp->fl4_dst))
2460 fl.fl4_src = inet_select_addr(dev_out, 0,
2461 fl.fl4_scope);
2462 else if (!oldflp->fl4_dst)
2463 fl.fl4_src = inet_select_addr(dev_out, 0,
2464 RT_SCOPE_HOST);
2465 }
2466 }
2467
2468 if (!fl.fl4_dst) {
2469 fl.fl4_dst = fl.fl4_src;
2470 if (!fl.fl4_dst)
2471 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
2472 if (dev_out)
2473 dev_put(dev_out);
2474 dev_out = &loopback_dev;
2475 dev_hold(dev_out);
2476 fl.oif = loopback_dev.ifindex;
2477 res.type = RTN_LOCAL;
2478 flags |= RTCF_LOCAL;
2479 goto make_route;
2480 }
2481
2482 if (fib_lookup(&fl, &res)) {
2483 res.fi = NULL;
2484 if (oldflp->oif) {
2485 /* Apparently, routing tables are wrong. Assume,
2486 that the destination is on link.
2487
2488 WHY? DW.
2489 Because we are allowed to send to iface
2490 even if it has NO routes and NO assigned
2491 addresses. When oif is specified, routing
2492 tables are looked up with only one purpose:
2493 to catch if destination is gatewayed, rather than
2494 direct. Moreover, if MSG_DONTROUTE is set,
2495 we send packet, ignoring both routing tables
2496 and ifaddr state. --ANK
2497
2498
2499 We could make it even if oif is unknown,
2500 likely IPv6, but we do not.
2501 */
2502
2503 if (fl.fl4_src == 0)
2504 fl.fl4_src = inet_select_addr(dev_out, 0,
2505 RT_SCOPE_LINK);
2506 res.type = RTN_UNICAST;
2507 goto make_route;
2508 }
2509 if (dev_out)
2510 dev_put(dev_out);
2511 err = -ENETUNREACH;
2512 goto out;
2513 }
2514 free_res = 1;
2515
2516 if (res.type == RTN_LOCAL) {
2517 if (!fl.fl4_src)
2518 fl.fl4_src = fl.fl4_dst;
2519 if (dev_out)
2520 dev_put(dev_out);
2521 dev_out = &loopback_dev;
2522 dev_hold(dev_out);
2523 fl.oif = dev_out->ifindex;
2524 if (res.fi)
2525 fib_info_put(res.fi);
2526 res.fi = NULL;
2527 flags |= RTCF_LOCAL;
2528 goto make_route;
2529 }
2530
2531#ifdef CONFIG_IP_ROUTE_MULTIPATH
2532 if (res.fi->fib_nhs > 1 && fl.oif == 0)
2533 fib_select_multipath(&fl, &res);
2534 else
2535#endif
2536 if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
2537 fib_select_default(&fl, &res);
2538
2539 if (!fl.fl4_src)
2540 fl.fl4_src = FIB_RES_PREFSRC(res);
2541
2542 if (dev_out)
2543 dev_put(dev_out);
2544 dev_out = FIB_RES_DEV(res);
2545 dev_hold(dev_out);
2546 fl.oif = dev_out->ifindex;
2547
2548
2549make_route:
2550 err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags);
2551
2552
2553 if (free_res)
2554 fib_res_put(&res);
2555 if (dev_out)
2556 dev_put(dev_out);
2557out: return err;
2558}
2559
2560int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
2561{
2562 unsigned hash;
2563 struct rtable *rth;
2564
2565 hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5), flp->fl4_tos);
2566
2567 rcu_read_lock_bh();
2568 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
2569 rth = rcu_dereference(rth->u.rt_next)) {
2570 if (rth->fl.fl4_dst == flp->fl4_dst &&
2571 rth->fl.fl4_src == flp->fl4_src &&
2572 rth->fl.iif == 0 &&
2573 rth->fl.oif == flp->oif &&
2574#ifdef CONFIG_IP_ROUTE_FWMARK
2575 rth->fl.fl4_fwmark == flp->fl4_fwmark &&
2576#endif
2577 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
2578 (IPTOS_RT_MASK | RTO_ONLINK))) {
2579
2580 /* check for multipath routes and choose one if
2581 * necessary
2582 */
2583 if (multipath_select_route(flp, rth, rp)) {
2584 dst_hold(&(*rp)->u.dst);
2585 RT_CACHE_STAT_INC(out_hit);
2586 rcu_read_unlock_bh();
2587 return 0;
2588 }
2589
2590 rth->u.dst.lastuse = jiffies;
2591 dst_hold(&rth->u.dst);
2592 rth->u.dst.__use++;
2593 RT_CACHE_STAT_INC(out_hit);
2594 rcu_read_unlock_bh();
2595 *rp = rth;
2596 return 0;
2597 }
2598 RT_CACHE_STAT_INC(out_hlist_search);
2599 }
2600 rcu_read_unlock_bh();
2601
2602 return ip_route_output_slow(rp, flp);
2603}
2604
d8c97a94
ACM
2605EXPORT_SYMBOL_GPL(__ip_route_output_key);
2606
1da177e4
LT
2607int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags)
2608{
2609 int err;
2610
2611 if ((err = __ip_route_output_key(rp, flp)) != 0)
2612 return err;
2613
2614 if (flp->proto) {
2615 if (!flp->fl4_src)
2616 flp->fl4_src = (*rp)->rt_src;
2617 if (!flp->fl4_dst)
2618 flp->fl4_dst = (*rp)->rt_dst;
2619 return xfrm_lookup((struct dst_entry **)rp, flp, sk, flags);
2620 }
2621
2622 return 0;
2623}
2624
d8c97a94
ACM
2625EXPORT_SYMBOL_GPL(ip_route_output_flow);
2626
1da177e4
LT
2627int ip_route_output_key(struct rtable **rp, struct flowi *flp)
2628{
2629 return ip_route_output_flow(rp, flp, NULL, 0);
2630}
2631
2632static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
b6544c0b 2633 int nowait, unsigned int flags)
1da177e4
LT
2634{
2635 struct rtable *rt = (struct rtable*)skb->dst;
2636 struct rtmsg *r;
2637 struct nlmsghdr *nlh;
2638 unsigned char *b = skb->tail;
2639 struct rta_cacheinfo ci;
2640#ifdef CONFIG_IP_MROUTE
2641 struct rtattr *eptr;
2642#endif
b6544c0b 2643 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
1da177e4 2644 r = NLMSG_DATA(nlh);
1da177e4
LT
2645 r->rtm_family = AF_INET;
2646 r->rtm_dst_len = 32;
2647 r->rtm_src_len = 0;
2648 r->rtm_tos = rt->fl.fl4_tos;
2649 r->rtm_table = RT_TABLE_MAIN;
2650 r->rtm_type = rt->rt_type;
2651 r->rtm_scope = RT_SCOPE_UNIVERSE;
2652 r->rtm_protocol = RTPROT_UNSPEC;
2653 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2654 if (rt->rt_flags & RTCF_NOTIFY)
2655 r->rtm_flags |= RTM_F_NOTIFY;
2656 RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst);
2657 if (rt->fl.fl4_src) {
2658 r->rtm_src_len = 32;
2659 RTA_PUT(skb, RTA_SRC, 4, &rt->fl.fl4_src);
2660 }
2661 if (rt->u.dst.dev)
2662 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex);
2663#ifdef CONFIG_NET_CLS_ROUTE
2664 if (rt->u.dst.tclassid)
2665 RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid);
2666#endif
2667#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
2668 if (rt->rt_multipath_alg != IP_MP_ALG_NONE) {
2669 __u32 alg = rt->rt_multipath_alg;
2670
2671 RTA_PUT(skb, RTA_MP_ALGO, 4, &alg);
2672 }
2673#endif
2674 if (rt->fl.iif)
2675 RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst);
2676 else if (rt->rt_src != rt->fl.fl4_src)
2677 RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src);
2678 if (rt->rt_dst != rt->rt_gateway)
2679 RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway);
2680 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2681 goto rtattr_failure;
2682 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2683 ci.rta_used = rt->u.dst.__use;
2684 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
2685 if (rt->u.dst.expires)
2686 ci.rta_expires = jiffies_to_clock_t(rt->u.dst.expires - jiffies);
2687 else
2688 ci.rta_expires = 0;
2689 ci.rta_error = rt->u.dst.error;
2690 ci.rta_id = ci.rta_ts = ci.rta_tsage = 0;
2691 if (rt->peer) {
2692 ci.rta_id = rt->peer->ip_id_count;
2693 if (rt->peer->tcp_ts_stamp) {
2694 ci.rta_ts = rt->peer->tcp_ts;
2695 ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp;
2696 }
2697 }
2698#ifdef CONFIG_IP_MROUTE
2699 eptr = (struct rtattr*)skb->tail;
2700#endif
2701 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2702 if (rt->fl.iif) {
2703#ifdef CONFIG_IP_MROUTE
2704 u32 dst = rt->rt_dst;
2705
2706 if (MULTICAST(dst) && !LOCAL_MCAST(dst) &&
2707 ipv4_devconf.mc_forwarding) {
2708 int err = ipmr_get_route(skb, r, nowait);
2709 if (err <= 0) {
2710 if (!nowait) {
2711 if (err == 0)
2712 return 0;
2713 goto nlmsg_failure;
2714 } else {
2715 if (err == -EMSGSIZE)
2716 goto nlmsg_failure;
2717 ((struct rta_cacheinfo*)RTA_DATA(eptr))->rta_error = err;
2718 }
2719 }
2720 } else
2721#endif
2722 RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
2723 }
2724
2725 nlh->nlmsg_len = skb->tail - b;
2726 return skb->len;
2727
2728nlmsg_failure:
2729rtattr_failure:
2730 skb_trim(skb, b - skb->data);
2731 return -1;
2732}
2733
2734int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2735{
2736 struct rtattr **rta = arg;
2737 struct rtmsg *rtm = NLMSG_DATA(nlh);
2738 struct rtable *rt = NULL;
2739 u32 dst = 0;
2740 u32 src = 0;
2741 int iif = 0;
2742 int err = -ENOBUFS;
2743 struct sk_buff *skb;
2744
2745 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2746 if (!skb)
2747 goto out;
2748
2749 /* Reserve room for dummy headers, this skb can pass
2750 through good chunk of routing engine.
2751 */
2752 skb->mac.raw = skb->data;
2753 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2754
2755 if (rta[RTA_SRC - 1])
2756 memcpy(&src, RTA_DATA(rta[RTA_SRC - 1]), 4);
2757 if (rta[RTA_DST - 1])
2758 memcpy(&dst, RTA_DATA(rta[RTA_DST - 1]), 4);
2759 if (rta[RTA_IIF - 1])
2760 memcpy(&iif, RTA_DATA(rta[RTA_IIF - 1]), sizeof(int));
2761
2762 if (iif) {
2763 struct net_device *dev = __dev_get_by_index(iif);
2764 err = -ENODEV;
2765 if (!dev)
2766 goto out_free;
2767 skb->protocol = htons(ETH_P_IP);
2768 skb->dev = dev;
2769 local_bh_disable();
2770 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2771 local_bh_enable();
2772 rt = (struct rtable*)skb->dst;
2773 if (!err && rt->u.dst.error)
2774 err = -rt->u.dst.error;
2775 } else {
2776 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst,
2777 .saddr = src,
2778 .tos = rtm->rtm_tos } } };
2779 int oif = 0;
2780 if (rta[RTA_OIF - 1])
2781 memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int));
2782 fl.oif = oif;
2783 err = ip_route_output_key(&rt, &fl);
2784 }
2785 if (err)
2786 goto out_free;
2787
2788 skb->dst = &rt->u.dst;
2789 if (rtm->rtm_flags & RTM_F_NOTIFY)
2790 rt->rt_flags |= RTCF_NOTIFY;
2791
2792 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
2793
2794 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
b6544c0b 2795 RTM_NEWROUTE, 0, 0);
1da177e4
LT
2796 if (!err)
2797 goto out_free;
2798 if (err < 0) {
2799 err = -EMSGSIZE;
2800 goto out_free;
2801 }
2802
2803 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
2804 if (err > 0)
2805 err = 0;
2806out: return err;
2807
2808out_free:
2809 kfree_skb(skb);
2810 goto out;
2811}
2812
2813int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2814{
2815 struct rtable *rt;
2816 int h, s_h;
2817 int idx, s_idx;
2818
2819 s_h = cb->args[0];
2820 s_idx = idx = cb->args[1];
2821 for (h = 0; h <= rt_hash_mask; h++) {
2822 if (h < s_h) continue;
2823 if (h > s_h)
2824 s_idx = 0;
2825 rcu_read_lock_bh();
2826 for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt;
2827 rt = rcu_dereference(rt->u.rt_next), idx++) {
2828 if (idx < s_idx)
2829 continue;
2830 skb->dst = dst_clone(&rt->u.dst);
2831 if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
b6544c0b
JHS
2832 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
2833 1, NLM_F_MULTI) <= 0) {
1da177e4
LT
2834 dst_release(xchg(&skb->dst, NULL));
2835 rcu_read_unlock_bh();
2836 goto done;
2837 }
2838 dst_release(xchg(&skb->dst, NULL));
2839 }
2840 rcu_read_unlock_bh();
2841 }
2842
2843done:
2844 cb->args[0] = h;
2845 cb->args[1] = idx;
2846 return skb->len;
2847}
2848
2849void ip_rt_multicast_event(struct in_device *in_dev)
2850{
2851 rt_cache_flush(0);
2852}
2853
2854#ifdef CONFIG_SYSCTL
2855static int flush_delay;
2856
2857static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
2858 struct file *filp, void __user *buffer,
2859 size_t *lenp, loff_t *ppos)
2860{
2861 if (write) {
2862 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2863 rt_cache_flush(flush_delay);
2864 return 0;
2865 }
2866
2867 return -EINVAL;
2868}
2869
2870static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
2871 int __user *name,
2872 int nlen,
2873 void __user *oldval,
2874 size_t __user *oldlenp,
2875 void __user *newval,
2876 size_t newlen,
2877 void **context)
2878{
2879 int delay;
2880 if (newlen != sizeof(int))
2881 return -EINVAL;
2882 if (get_user(delay, (int __user *)newval))
2883 return -EFAULT;
2884 rt_cache_flush(delay);
2885 return 0;
2886}
2887
2888ctl_table ipv4_route_table[] = {
2889 {
2890 .ctl_name = NET_IPV4_ROUTE_FLUSH,
2891 .procname = "flush",
2892 .data = &flush_delay,
2893 .maxlen = sizeof(int),
7e3e0360 2894 .mode = 0200,
1da177e4
LT
2895 .proc_handler = &ipv4_sysctl_rtcache_flush,
2896 .strategy = &ipv4_sysctl_rtcache_flush_strategy,
2897 },
2898 {
2899 .ctl_name = NET_IPV4_ROUTE_MIN_DELAY,
2900 .procname = "min_delay",
2901 .data = &ip_rt_min_delay,
2902 .maxlen = sizeof(int),
2903 .mode = 0644,
2904 .proc_handler = &proc_dointvec_jiffies,
2905 .strategy = &sysctl_jiffies,
2906 },
2907 {
2908 .ctl_name = NET_IPV4_ROUTE_MAX_DELAY,
2909 .procname = "max_delay",
2910 .data = &ip_rt_max_delay,
2911 .maxlen = sizeof(int),
2912 .mode = 0644,
2913 .proc_handler = &proc_dointvec_jiffies,
2914 .strategy = &sysctl_jiffies,
2915 },
2916 {
2917 .ctl_name = NET_IPV4_ROUTE_GC_THRESH,
2918 .procname = "gc_thresh",
2919 .data = &ipv4_dst_ops.gc_thresh,
2920 .maxlen = sizeof(int),
2921 .mode = 0644,
2922 .proc_handler = &proc_dointvec,
2923 },
2924 {
2925 .ctl_name = NET_IPV4_ROUTE_MAX_SIZE,
2926 .procname = "max_size",
2927 .data = &ip_rt_max_size,
2928 .maxlen = sizeof(int),
2929 .mode = 0644,
2930 .proc_handler = &proc_dointvec,
2931 },
2932 {
2933 /* Deprecated. Use gc_min_interval_ms */
2934
2935 .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL,
2936 .procname = "gc_min_interval",
2937 .data = &ip_rt_gc_min_interval,
2938 .maxlen = sizeof(int),
2939 .mode = 0644,
2940 .proc_handler = &proc_dointvec_jiffies,
2941 .strategy = &sysctl_jiffies,
2942 },
2943 {
2944 .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS,
2945 .procname = "gc_min_interval_ms",
2946 .data = &ip_rt_gc_min_interval,
2947 .maxlen = sizeof(int),
2948 .mode = 0644,
2949 .proc_handler = &proc_dointvec_ms_jiffies,
2950 .strategy = &sysctl_ms_jiffies,
2951 },
2952 {
2953 .ctl_name = NET_IPV4_ROUTE_GC_TIMEOUT,
2954 .procname = "gc_timeout",
2955 .data = &ip_rt_gc_timeout,
2956 .maxlen = sizeof(int),
2957 .mode = 0644,
2958 .proc_handler = &proc_dointvec_jiffies,
2959 .strategy = &sysctl_jiffies,
2960 },
2961 {
2962 .ctl_name = NET_IPV4_ROUTE_GC_INTERVAL,
2963 .procname = "gc_interval",
2964 .data = &ip_rt_gc_interval,
2965 .maxlen = sizeof(int),
2966 .mode = 0644,
2967 .proc_handler = &proc_dointvec_jiffies,
2968 .strategy = &sysctl_jiffies,
2969 },
2970 {
2971 .ctl_name = NET_IPV4_ROUTE_REDIRECT_LOAD,
2972 .procname = "redirect_load",
2973 .data = &ip_rt_redirect_load,
2974 .maxlen = sizeof(int),
2975 .mode = 0644,
2976 .proc_handler = &proc_dointvec,
2977 },
2978 {
2979 .ctl_name = NET_IPV4_ROUTE_REDIRECT_NUMBER,
2980 .procname = "redirect_number",
2981 .data = &ip_rt_redirect_number,
2982 .maxlen = sizeof(int),
2983 .mode = 0644,
2984 .proc_handler = &proc_dointvec,
2985 },
2986 {
2987 .ctl_name = NET_IPV4_ROUTE_REDIRECT_SILENCE,
2988 .procname = "redirect_silence",
2989 .data = &ip_rt_redirect_silence,
2990 .maxlen = sizeof(int),
2991 .mode = 0644,
2992 .proc_handler = &proc_dointvec,
2993 },
2994 {
2995 .ctl_name = NET_IPV4_ROUTE_ERROR_COST,
2996 .procname = "error_cost",
2997 .data = &ip_rt_error_cost,
2998 .maxlen = sizeof(int),
2999 .mode = 0644,
3000 .proc_handler = &proc_dointvec,
3001 },
3002 {
3003 .ctl_name = NET_IPV4_ROUTE_ERROR_BURST,
3004 .procname = "error_burst",
3005 .data = &ip_rt_error_burst,
3006 .maxlen = sizeof(int),
3007 .mode = 0644,
3008 .proc_handler = &proc_dointvec,
3009 },
3010 {
3011 .ctl_name = NET_IPV4_ROUTE_GC_ELASTICITY,
3012 .procname = "gc_elasticity",
3013 .data = &ip_rt_gc_elasticity,
3014 .maxlen = sizeof(int),
3015 .mode = 0644,
3016 .proc_handler = &proc_dointvec,
3017 },
3018 {
3019 .ctl_name = NET_IPV4_ROUTE_MTU_EXPIRES,
3020 .procname = "mtu_expires",
3021 .data = &ip_rt_mtu_expires,
3022 .maxlen = sizeof(int),
3023 .mode = 0644,
3024 .proc_handler = &proc_dointvec_jiffies,
3025 .strategy = &sysctl_jiffies,
3026 },
3027 {
3028 .ctl_name = NET_IPV4_ROUTE_MIN_PMTU,
3029 .procname = "min_pmtu",
3030 .data = &ip_rt_min_pmtu,
3031 .maxlen = sizeof(int),
3032 .mode = 0644,
3033 .proc_handler = &proc_dointvec,
3034 },
3035 {
3036 .ctl_name = NET_IPV4_ROUTE_MIN_ADVMSS,
3037 .procname = "min_adv_mss",
3038 .data = &ip_rt_min_advmss,
3039 .maxlen = sizeof(int),
3040 .mode = 0644,
3041 .proc_handler = &proc_dointvec,
3042 },
3043 {
3044 .ctl_name = NET_IPV4_ROUTE_SECRET_INTERVAL,
3045 .procname = "secret_interval",
3046 .data = &ip_rt_secret_interval,
3047 .maxlen = sizeof(int),
3048 .mode = 0644,
3049 .proc_handler = &proc_dointvec_jiffies,
3050 .strategy = &sysctl_jiffies,
3051 },
3052 { .ctl_name = 0 }
3053};
3054#endif
3055
3056#ifdef CONFIG_NET_CLS_ROUTE
3057struct ip_rt_acct *ip_rt_acct;
3058
3059/* This code sucks. But you should have seen it before! --RR */
3060
3061/* IP route accounting ptr for this logical cpu number. */
3062#define IP_RT_ACCT_CPU(i) (ip_rt_acct + i * 256)
3063
3064#ifdef CONFIG_PROC_FS
3065static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
3066 int length, int *eof, void *data)
3067{
3068 unsigned int i;
3069
3070 if ((offset & 3) || (length & 3))
3071 return -EIO;
3072
3073 if (offset >= sizeof(struct ip_rt_acct) * 256) {
3074 *eof = 1;
3075 return 0;
3076 }
3077
3078 if (offset + length >= sizeof(struct ip_rt_acct) * 256) {
3079 length = sizeof(struct ip_rt_acct) * 256 - offset;
3080 *eof = 1;
3081 }
3082
3083 offset /= sizeof(u32);
3084
3085 if (length > 0) {
3086 u32 *src = ((u32 *) IP_RT_ACCT_CPU(0)) + offset;
3087 u32 *dst = (u32 *) buffer;
3088
3089 /* Copy first cpu. */
3090 *start = buffer;
3091 memcpy(dst, src, length);
3092
3093 /* Add the other cpus in, one int at a time */
3094 for_each_cpu(i) {
3095 unsigned int j;
3096
3097 src = ((u32 *) IP_RT_ACCT_CPU(i)) + offset;
3098
3099 for (j = 0; j < length/4; j++)
3100 dst[j] += src[j];
3101 }
3102 }
3103 return length;
3104}
3105#endif /* CONFIG_PROC_FS */
3106#endif /* CONFIG_NET_CLS_ROUTE */
3107
3108static __initdata unsigned long rhash_entries;
3109static int __init set_rhash_entries(char *str)
3110{
3111 if (!str)
3112 return 0;
3113 rhash_entries = simple_strtoul(str, &str, 0);
3114 return 1;
3115}
3116__setup("rhash_entries=", set_rhash_entries);
3117
3118int __init ip_rt_init(void)
3119{
424c4b70 3120 int rc = 0;
1da177e4
LT
3121
3122 rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^
3123 (jiffies ^ (jiffies >> 7)));
3124
3125#ifdef CONFIG_NET_CLS_ROUTE
424c4b70
ED
3126 {
3127 int order;
1da177e4
LT
3128 for (order = 0;
3129 (PAGE_SIZE << order) < 256 * sizeof(struct ip_rt_acct) * NR_CPUS; order++)
3130 /* NOTHING */;
3131 ip_rt_acct = (struct ip_rt_acct *)__get_free_pages(GFP_KERNEL, order);
3132 if (!ip_rt_acct)
3133 panic("IP: failed to allocate ip_rt_acct\n");
3134 memset(ip_rt_acct, 0, PAGE_SIZE << order);
424c4b70 3135 }
1da177e4
LT
3136#endif
3137
3138 ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache",
3139 sizeof(struct rtable),
3140 0, SLAB_HWCACHE_ALIGN,
3141 NULL, NULL);
3142
3143 if (!ipv4_dst_ops.kmem_cachep)
3144 panic("IP: failed to allocate ip_dst_cache\n");
3145
424c4b70
ED
3146 rt_hash_table = (struct rt_hash_bucket *)
3147 alloc_large_system_hash("IP route cache",
3148 sizeof(struct rt_hash_bucket),
3149 rhash_entries,
3150 (num_physpages >= 128 * 1024) ?
18955cfc 3151 15 : 17,
424c4b70
ED
3152 HASH_HIGHMEM,
3153 &rt_hash_log,
3154 &rt_hash_mask,
3155 0);
22c047cc
ED
3156 memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket));
3157 rt_hash_lock_init();
1da177e4
LT
3158
3159 ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1);
3160 ip_rt_max_size = (rt_hash_mask + 1) * 16;
3161
1da177e4
LT
3162 devinet_init();
3163 ip_fib_init();
3164
3165 init_timer(&rt_flush_timer);
3166 rt_flush_timer.function = rt_run_flush;
3167 init_timer(&rt_periodic_timer);
3168 rt_periodic_timer.function = rt_check_expire;
3169 init_timer(&rt_secret_timer);
3170 rt_secret_timer.function = rt_secret_rebuild;
3171
3172 /* All the timers, started at system startup tend
3173 to synchronize. Perturb it a bit.
3174 */
3175 rt_periodic_timer.expires = jiffies + net_random() % ip_rt_gc_interval +
3176 ip_rt_gc_interval;
3177 add_timer(&rt_periodic_timer);
3178
3179 rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval +
3180 ip_rt_secret_interval;
3181 add_timer(&rt_secret_timer);
3182
3183#ifdef CONFIG_PROC_FS
3184 {
3185 struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */
3186 if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
3187 !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO,
3188 proc_net_stat))) {
1da177e4
LT
3189 return -ENOMEM;
3190 }
3191 rtstat_pde->proc_fops = &rt_cpu_seq_fops;
3192 }
3193#ifdef CONFIG_NET_CLS_ROUTE
3194 create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL);
3195#endif
3196#endif
3197#ifdef CONFIG_XFRM
3198 xfrm_init();
3199 xfrm4_init();
3200#endif
3201 return rc;
3202}
3203
3204EXPORT_SYMBOL(__ip_select_ident);
3205EXPORT_SYMBOL(ip_route_input);
3206EXPORT_SYMBOL(ip_route_output_key);