]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - net/ipv4/devinet.c
efi/arm: Fix boot crash with CONFIG_CPUMASK_OFFSTACK=y
[mirror_ubuntu-artful-kernel.git] / net / ipv4 / devinet.c
1 /*
2 * NET3 IP device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the IP parts of dev.c 1.0.19
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17 *
18 * Changes:
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
20 * lists.
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
25 * if no match found.
26 */
27
28
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67
68 static struct ipv4_devconf ipv4_devconf = {
69 .data = {
70 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
71 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
72 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
73 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
74 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
75 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
76 },
77 };
78
79 static struct ipv4_devconf ipv4_devconf_dflt = {
80 .data = {
81 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
82 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
83 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
86 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
87 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
88 },
89 };
90
91 #define IPV4_DEVCONF_DFLT(net, attr) \
92 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
93
94 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
95 [IFA_LOCAL] = { .type = NLA_U32 },
96 [IFA_ADDRESS] = { .type = NLA_U32 },
97 [IFA_BROADCAST] = { .type = NLA_U32 },
98 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
99 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
100 [IFA_FLAGS] = { .type = NLA_U32 },
101 };
102
103 #define IN4_ADDR_HSIZE_SHIFT 8
104 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
105
106 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
107
108 static u32 inet_addr_hash(const struct net *net, __be32 addr)
109 {
110 u32 val = (__force u32) addr ^ net_hash_mix(net);
111
112 return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
113 }
114
115 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
116 {
117 u32 hash = inet_addr_hash(net, ifa->ifa_local);
118
119 ASSERT_RTNL();
120 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
121 }
122
123 static void inet_hash_remove(struct in_ifaddr *ifa)
124 {
125 ASSERT_RTNL();
126 hlist_del_init_rcu(&ifa->hash);
127 }
128
129 /**
130 * __ip_dev_find - find the first device with a given source address.
131 * @net: the net namespace
132 * @addr: the source address
133 * @devref: if true, take a reference on the found device
134 *
135 * If a caller uses devref=false, it should be protected by RCU, or RTNL
136 */
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
138 {
139 u32 hash = inet_addr_hash(net, addr);
140 struct net_device *result = NULL;
141 struct in_ifaddr *ifa;
142
143 rcu_read_lock();
144 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
145 if (ifa->ifa_local == addr) {
146 struct net_device *dev = ifa->ifa_dev->dev;
147
148 if (!net_eq(dev_net(dev), net))
149 continue;
150 result = dev;
151 break;
152 }
153 }
154 if (!result) {
155 struct flowi4 fl4 = { .daddr = addr };
156 struct fib_result res = { 0 };
157 struct fib_table *local;
158
159 /* Fallback to FIB local table so that communication
160 * over loopback subnets work.
161 */
162 local = fib_get_table(net, RT_TABLE_LOCAL);
163 if (local &&
164 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
165 res.type == RTN_LOCAL)
166 result = FIB_RES_DEV(res);
167 }
168 if (result && devref)
169 dev_hold(result);
170 rcu_read_unlock();
171 return result;
172 }
173 EXPORT_SYMBOL(__ip_dev_find);
174
175 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
176
177 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
178 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
179 int destroy);
180 #ifdef CONFIG_SYSCTL
181 static int devinet_sysctl_register(struct in_device *idev);
182 static void devinet_sysctl_unregister(struct in_device *idev);
183 #else
184 static int devinet_sysctl_register(struct in_device *idev)
185 {
186 return 0;
187 }
188 static void devinet_sysctl_unregister(struct in_device *idev)
189 {
190 }
191 #endif
192
193 /* Locks all the inet devices. */
194
195 static struct in_ifaddr *inet_alloc_ifa(void)
196 {
197 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
198 }
199
200 static void inet_rcu_free_ifa(struct rcu_head *head)
201 {
202 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
203 if (ifa->ifa_dev)
204 in_dev_put(ifa->ifa_dev);
205 kfree(ifa);
206 }
207
208 static void inet_free_ifa(struct in_ifaddr *ifa)
209 {
210 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
211 }
212
213 void in_dev_finish_destroy(struct in_device *idev)
214 {
215 struct net_device *dev = idev->dev;
216
217 WARN_ON(idev->ifa_list);
218 WARN_ON(idev->mc_list);
219 kfree(rcu_dereference_protected(idev->mc_hash, 1));
220 #ifdef NET_REFCNT_DEBUG
221 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
222 #endif
223 dev_put(dev);
224 if (!idev->dead)
225 pr_err("Freeing alive in_device %p\n", idev);
226 else
227 kfree(idev);
228 }
229 EXPORT_SYMBOL(in_dev_finish_destroy);
230
231 static struct in_device *inetdev_init(struct net_device *dev)
232 {
233 struct in_device *in_dev;
234 int err = -ENOMEM;
235
236 ASSERT_RTNL();
237
238 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
239 if (!in_dev)
240 goto out;
241 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
242 sizeof(in_dev->cnf));
243 in_dev->cnf.sysctl = NULL;
244 in_dev->dev = dev;
245 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
246 if (!in_dev->arp_parms)
247 goto out_kfree;
248 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
249 dev_disable_lro(dev);
250 /* Reference in_dev->dev */
251 dev_hold(dev);
252 /* Account for reference dev->ip_ptr (below) */
253 in_dev_hold(in_dev);
254
255 err = devinet_sysctl_register(in_dev);
256 if (err) {
257 in_dev->dead = 1;
258 in_dev_put(in_dev);
259 in_dev = NULL;
260 goto out;
261 }
262 ip_mc_init_dev(in_dev);
263 if (dev->flags & IFF_UP)
264 ip_mc_up(in_dev);
265
266 /* we can receive as soon as ip_ptr is set -- do this last */
267 rcu_assign_pointer(dev->ip_ptr, in_dev);
268 out:
269 return in_dev ?: ERR_PTR(err);
270 out_kfree:
271 kfree(in_dev);
272 in_dev = NULL;
273 goto out;
274 }
275
276 static void in_dev_rcu_put(struct rcu_head *head)
277 {
278 struct in_device *idev = container_of(head, struct in_device, rcu_head);
279 in_dev_put(idev);
280 }
281
282 static void inetdev_destroy(struct in_device *in_dev)
283 {
284 struct in_ifaddr *ifa;
285 struct net_device *dev;
286
287 ASSERT_RTNL();
288
289 dev = in_dev->dev;
290
291 in_dev->dead = 1;
292
293 ip_mc_destroy_dev(in_dev);
294
295 while ((ifa = in_dev->ifa_list) != NULL) {
296 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
297 inet_free_ifa(ifa);
298 }
299
300 RCU_INIT_POINTER(dev->ip_ptr, NULL);
301
302 devinet_sysctl_unregister(in_dev);
303 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
304 arp_ifdown(dev);
305
306 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
307 }
308
309 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
310 {
311 rcu_read_lock();
312 for_primary_ifa(in_dev) {
313 if (inet_ifa_match(a, ifa)) {
314 if (!b || inet_ifa_match(b, ifa)) {
315 rcu_read_unlock();
316 return 1;
317 }
318 }
319 } endfor_ifa(in_dev);
320 rcu_read_unlock();
321 return 0;
322 }
323
324 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
325 int destroy, struct nlmsghdr *nlh, u32 portid)
326 {
327 struct in_ifaddr *promote = NULL;
328 struct in_ifaddr *ifa, *ifa1 = *ifap;
329 struct in_ifaddr *last_prim = in_dev->ifa_list;
330 struct in_ifaddr *prev_prom = NULL;
331 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
332
333 ASSERT_RTNL();
334
335 if (in_dev->dead)
336 goto no_promotions;
337
338 /* 1. Deleting primary ifaddr forces deletion all secondaries
339 * unless alias promotion is set
340 **/
341
342 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
343 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
344
345 while ((ifa = *ifap1) != NULL) {
346 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
347 ifa1->ifa_scope <= ifa->ifa_scope)
348 last_prim = ifa;
349
350 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
351 ifa1->ifa_mask != ifa->ifa_mask ||
352 !inet_ifa_match(ifa1->ifa_address, ifa)) {
353 ifap1 = &ifa->ifa_next;
354 prev_prom = ifa;
355 continue;
356 }
357
358 if (!do_promote) {
359 inet_hash_remove(ifa);
360 *ifap1 = ifa->ifa_next;
361
362 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
363 blocking_notifier_call_chain(&inetaddr_chain,
364 NETDEV_DOWN, ifa);
365 inet_free_ifa(ifa);
366 } else {
367 promote = ifa;
368 break;
369 }
370 }
371 }
372
373 /* On promotion all secondaries from subnet are changing
374 * the primary IP, we must remove all their routes silently
375 * and later to add them back with new prefsrc. Do this
376 * while all addresses are on the device list.
377 */
378 for (ifa = promote; ifa; ifa = ifa->ifa_next) {
379 if (ifa1->ifa_mask == ifa->ifa_mask &&
380 inet_ifa_match(ifa1->ifa_address, ifa))
381 fib_del_ifaddr(ifa, ifa1);
382 }
383
384 no_promotions:
385 /* 2. Unlink it */
386
387 *ifap = ifa1->ifa_next;
388 inet_hash_remove(ifa1);
389
390 /* 3. Announce address deletion */
391
392 /* Send message first, then call notifier.
393 At first sight, FIB update triggered by notifier
394 will refer to already deleted ifaddr, that could confuse
395 netlink listeners. It is not true: look, gated sees
396 that route deleted and if it still thinks that ifaddr
397 is valid, it will try to restore deleted routes... Grr.
398 So that, this order is correct.
399 */
400 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
401 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
402
403 if (promote) {
404 struct in_ifaddr *next_sec = promote->ifa_next;
405
406 if (prev_prom) {
407 prev_prom->ifa_next = promote->ifa_next;
408 promote->ifa_next = last_prim->ifa_next;
409 last_prim->ifa_next = promote;
410 }
411
412 promote->ifa_flags &= ~IFA_F_SECONDARY;
413 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
414 blocking_notifier_call_chain(&inetaddr_chain,
415 NETDEV_UP, promote);
416 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
417 if (ifa1->ifa_mask != ifa->ifa_mask ||
418 !inet_ifa_match(ifa1->ifa_address, ifa))
419 continue;
420 fib_add_ifaddr(ifa);
421 }
422
423 }
424 if (destroy)
425 inet_free_ifa(ifa1);
426 }
427
428 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
429 int destroy)
430 {
431 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
432 }
433
434 static void check_lifetime(struct work_struct *work);
435
436 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
437
438 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
439 u32 portid)
440 {
441 struct in_device *in_dev = ifa->ifa_dev;
442 struct in_ifaddr *ifa1, **ifap, **last_primary;
443
444 ASSERT_RTNL();
445
446 if (!ifa->ifa_local) {
447 inet_free_ifa(ifa);
448 return 0;
449 }
450
451 ifa->ifa_flags &= ~IFA_F_SECONDARY;
452 last_primary = &in_dev->ifa_list;
453
454 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
455 ifap = &ifa1->ifa_next) {
456 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
457 ifa->ifa_scope <= ifa1->ifa_scope)
458 last_primary = &ifa1->ifa_next;
459 if (ifa1->ifa_mask == ifa->ifa_mask &&
460 inet_ifa_match(ifa1->ifa_address, ifa)) {
461 if (ifa1->ifa_local == ifa->ifa_local) {
462 inet_free_ifa(ifa);
463 return -EEXIST;
464 }
465 if (ifa1->ifa_scope != ifa->ifa_scope) {
466 inet_free_ifa(ifa);
467 return -EINVAL;
468 }
469 ifa->ifa_flags |= IFA_F_SECONDARY;
470 }
471 }
472
473 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
474 prandom_seed((__force u32) ifa->ifa_local);
475 ifap = last_primary;
476 }
477
478 ifa->ifa_next = *ifap;
479 *ifap = ifa;
480
481 inet_hash_insert(dev_net(in_dev->dev), ifa);
482
483 cancel_delayed_work(&check_lifetime_work);
484 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
485
486 /* Send message first, then call notifier.
487 Notifier will trigger FIB update, so that
488 listeners of netlink will know about new ifaddr */
489 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
490 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
491
492 return 0;
493 }
494
495 static int inet_insert_ifa(struct in_ifaddr *ifa)
496 {
497 return __inet_insert_ifa(ifa, NULL, 0);
498 }
499
500 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
501 {
502 struct in_device *in_dev = __in_dev_get_rtnl(dev);
503
504 ASSERT_RTNL();
505
506 if (!in_dev) {
507 inet_free_ifa(ifa);
508 return -ENOBUFS;
509 }
510 ipv4_devconf_setall(in_dev);
511 neigh_parms_data_state_setall(in_dev->arp_parms);
512 if (ifa->ifa_dev != in_dev) {
513 WARN_ON(ifa->ifa_dev);
514 in_dev_hold(in_dev);
515 ifa->ifa_dev = in_dev;
516 }
517 if (ipv4_is_loopback(ifa->ifa_local))
518 ifa->ifa_scope = RT_SCOPE_HOST;
519 return inet_insert_ifa(ifa);
520 }
521
522 /* Caller must hold RCU or RTNL :
523 * We dont take a reference on found in_device
524 */
525 struct in_device *inetdev_by_index(struct net *net, int ifindex)
526 {
527 struct net_device *dev;
528 struct in_device *in_dev = NULL;
529
530 rcu_read_lock();
531 dev = dev_get_by_index_rcu(net, ifindex);
532 if (dev)
533 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
534 rcu_read_unlock();
535 return in_dev;
536 }
537 EXPORT_SYMBOL(inetdev_by_index);
538
539 /* Called only from RTNL semaphored context. No locks. */
540
541 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
542 __be32 mask)
543 {
544 ASSERT_RTNL();
545
546 for_primary_ifa(in_dev) {
547 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
548 return ifa;
549 } endfor_ifa(in_dev);
550 return NULL;
551 }
552
553 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
554 {
555 struct ip_mreqn mreq = {
556 .imr_multiaddr.s_addr = ifa->ifa_address,
557 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
558 };
559 int ret;
560
561 ASSERT_RTNL();
562
563 lock_sock(sk);
564 if (join)
565 ret = ip_mc_join_group(sk, &mreq);
566 else
567 ret = ip_mc_leave_group(sk, &mreq);
568 release_sock(sk);
569
570 return ret;
571 }
572
573 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
574 {
575 struct net *net = sock_net(skb->sk);
576 struct nlattr *tb[IFA_MAX+1];
577 struct in_device *in_dev;
578 struct ifaddrmsg *ifm;
579 struct in_ifaddr *ifa, **ifap;
580 int err = -EINVAL;
581
582 ASSERT_RTNL();
583
584 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
585 if (err < 0)
586 goto errout;
587
588 ifm = nlmsg_data(nlh);
589 in_dev = inetdev_by_index(net, ifm->ifa_index);
590 if (!in_dev) {
591 err = -ENODEV;
592 goto errout;
593 }
594
595 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
596 ifap = &ifa->ifa_next) {
597 if (tb[IFA_LOCAL] &&
598 ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
599 continue;
600
601 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
602 continue;
603
604 if (tb[IFA_ADDRESS] &&
605 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
606 !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
607 continue;
608
609 if (ipv4_is_multicast(ifa->ifa_address))
610 ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
611 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
612 return 0;
613 }
614
615 err = -EADDRNOTAVAIL;
616 errout:
617 return err;
618 }
619
620 #define INFINITY_LIFE_TIME 0xFFFFFFFF
621
622 static void check_lifetime(struct work_struct *work)
623 {
624 unsigned long now, next, next_sec, next_sched;
625 struct in_ifaddr *ifa;
626 struct hlist_node *n;
627 int i;
628
629 now = jiffies;
630 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
631
632 for (i = 0; i < IN4_ADDR_HSIZE; i++) {
633 bool change_needed = false;
634
635 rcu_read_lock();
636 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
637 unsigned long age;
638
639 if (ifa->ifa_flags & IFA_F_PERMANENT)
640 continue;
641
642 /* We try to batch several events at once. */
643 age = (now - ifa->ifa_tstamp +
644 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
645
646 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
647 age >= ifa->ifa_valid_lft) {
648 change_needed = true;
649 } else if (ifa->ifa_preferred_lft ==
650 INFINITY_LIFE_TIME) {
651 continue;
652 } else if (age >= ifa->ifa_preferred_lft) {
653 if (time_before(ifa->ifa_tstamp +
654 ifa->ifa_valid_lft * HZ, next))
655 next = ifa->ifa_tstamp +
656 ifa->ifa_valid_lft * HZ;
657
658 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
659 change_needed = true;
660 } else if (time_before(ifa->ifa_tstamp +
661 ifa->ifa_preferred_lft * HZ,
662 next)) {
663 next = ifa->ifa_tstamp +
664 ifa->ifa_preferred_lft * HZ;
665 }
666 }
667 rcu_read_unlock();
668 if (!change_needed)
669 continue;
670 rtnl_lock();
671 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
672 unsigned long age;
673
674 if (ifa->ifa_flags & IFA_F_PERMANENT)
675 continue;
676
677 /* We try to batch several events at once. */
678 age = (now - ifa->ifa_tstamp +
679 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
680
681 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
682 age >= ifa->ifa_valid_lft) {
683 struct in_ifaddr **ifap;
684
685 for (ifap = &ifa->ifa_dev->ifa_list;
686 *ifap != NULL; ifap = &(*ifap)->ifa_next) {
687 if (*ifap == ifa) {
688 inet_del_ifa(ifa->ifa_dev,
689 ifap, 1);
690 break;
691 }
692 }
693 } else if (ifa->ifa_preferred_lft !=
694 INFINITY_LIFE_TIME &&
695 age >= ifa->ifa_preferred_lft &&
696 !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
697 ifa->ifa_flags |= IFA_F_DEPRECATED;
698 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
699 }
700 }
701 rtnl_unlock();
702 }
703
704 next_sec = round_jiffies_up(next);
705 next_sched = next;
706
707 /* If rounded timeout is accurate enough, accept it. */
708 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
709 next_sched = next_sec;
710
711 now = jiffies;
712 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
713 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
714 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
715
716 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
717 next_sched - now);
718 }
719
720 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
721 __u32 prefered_lft)
722 {
723 unsigned long timeout;
724
725 ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
726
727 timeout = addrconf_timeout_fixup(valid_lft, HZ);
728 if (addrconf_finite_timeout(timeout))
729 ifa->ifa_valid_lft = timeout;
730 else
731 ifa->ifa_flags |= IFA_F_PERMANENT;
732
733 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
734 if (addrconf_finite_timeout(timeout)) {
735 if (timeout == 0)
736 ifa->ifa_flags |= IFA_F_DEPRECATED;
737 ifa->ifa_preferred_lft = timeout;
738 }
739 ifa->ifa_tstamp = jiffies;
740 if (!ifa->ifa_cstamp)
741 ifa->ifa_cstamp = ifa->ifa_tstamp;
742 }
743
744 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
745 __u32 *pvalid_lft, __u32 *pprefered_lft)
746 {
747 struct nlattr *tb[IFA_MAX+1];
748 struct in_ifaddr *ifa;
749 struct ifaddrmsg *ifm;
750 struct net_device *dev;
751 struct in_device *in_dev;
752 int err;
753
754 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
755 if (err < 0)
756 goto errout;
757
758 ifm = nlmsg_data(nlh);
759 err = -EINVAL;
760 if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
761 goto errout;
762
763 dev = __dev_get_by_index(net, ifm->ifa_index);
764 err = -ENODEV;
765 if (!dev)
766 goto errout;
767
768 in_dev = __in_dev_get_rtnl(dev);
769 err = -ENOBUFS;
770 if (!in_dev)
771 goto errout;
772
773 ifa = inet_alloc_ifa();
774 if (!ifa)
775 /*
776 * A potential indev allocation can be left alive, it stays
777 * assigned to its device and is destroy with it.
778 */
779 goto errout;
780
781 ipv4_devconf_setall(in_dev);
782 neigh_parms_data_state_setall(in_dev->arp_parms);
783 in_dev_hold(in_dev);
784
785 if (!tb[IFA_ADDRESS])
786 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
787
788 INIT_HLIST_NODE(&ifa->hash);
789 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
790 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
791 ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
792 ifm->ifa_flags;
793 ifa->ifa_scope = ifm->ifa_scope;
794 ifa->ifa_dev = in_dev;
795
796 ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
797 ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
798
799 if (tb[IFA_BROADCAST])
800 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
801
802 if (tb[IFA_LABEL])
803 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
804 else
805 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
806
807 if (tb[IFA_CACHEINFO]) {
808 struct ifa_cacheinfo *ci;
809
810 ci = nla_data(tb[IFA_CACHEINFO]);
811 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
812 err = -EINVAL;
813 goto errout_free;
814 }
815 *pvalid_lft = ci->ifa_valid;
816 *pprefered_lft = ci->ifa_prefered;
817 }
818
819 return ifa;
820
821 errout_free:
822 inet_free_ifa(ifa);
823 errout:
824 return ERR_PTR(err);
825 }
826
827 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
828 {
829 struct in_device *in_dev = ifa->ifa_dev;
830 struct in_ifaddr *ifa1, **ifap;
831
832 if (!ifa->ifa_local)
833 return NULL;
834
835 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
836 ifap = &ifa1->ifa_next) {
837 if (ifa1->ifa_mask == ifa->ifa_mask &&
838 inet_ifa_match(ifa1->ifa_address, ifa) &&
839 ifa1->ifa_local == ifa->ifa_local)
840 return ifa1;
841 }
842 return NULL;
843 }
844
845 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
846 {
847 struct net *net = sock_net(skb->sk);
848 struct in_ifaddr *ifa;
849 struct in_ifaddr *ifa_existing;
850 __u32 valid_lft = INFINITY_LIFE_TIME;
851 __u32 prefered_lft = INFINITY_LIFE_TIME;
852
853 ASSERT_RTNL();
854
855 ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
856 if (IS_ERR(ifa))
857 return PTR_ERR(ifa);
858
859 ifa_existing = find_matching_ifa(ifa);
860 if (!ifa_existing) {
861 /* It would be best to check for !NLM_F_CREATE here but
862 * userspace already relies on not having to provide this.
863 */
864 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
865 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
866 int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
867 true, ifa);
868
869 if (ret < 0) {
870 inet_free_ifa(ifa);
871 return ret;
872 }
873 }
874 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
875 } else {
876 inet_free_ifa(ifa);
877
878 if (nlh->nlmsg_flags & NLM_F_EXCL ||
879 !(nlh->nlmsg_flags & NLM_F_REPLACE))
880 return -EEXIST;
881 ifa = ifa_existing;
882 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
883 cancel_delayed_work(&check_lifetime_work);
884 queue_delayed_work(system_power_efficient_wq,
885 &check_lifetime_work, 0);
886 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
887 }
888 return 0;
889 }
890
891 /*
892 * Determine a default network mask, based on the IP address.
893 */
894
895 static int inet_abc_len(__be32 addr)
896 {
897 int rc = -1; /* Something else, probably a multicast. */
898
899 if (ipv4_is_zeronet(addr))
900 rc = 0;
901 else {
902 __u32 haddr = ntohl(addr);
903
904 if (IN_CLASSA(haddr))
905 rc = 8;
906 else if (IN_CLASSB(haddr))
907 rc = 16;
908 else if (IN_CLASSC(haddr))
909 rc = 24;
910 }
911
912 return rc;
913 }
914
915
916 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
917 {
918 struct ifreq ifr;
919 struct sockaddr_in sin_orig;
920 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
921 struct in_device *in_dev;
922 struct in_ifaddr **ifap = NULL;
923 struct in_ifaddr *ifa = NULL;
924 struct net_device *dev;
925 char *colon;
926 int ret = -EFAULT;
927 int tryaddrmatch = 0;
928
929 /*
930 * Fetch the caller's info block into kernel space
931 */
932
933 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
934 goto out;
935 ifr.ifr_name[IFNAMSIZ - 1] = 0;
936
937 /* save original address for comparison */
938 memcpy(&sin_orig, sin, sizeof(*sin));
939
940 colon = strchr(ifr.ifr_name, ':');
941 if (colon)
942 *colon = 0;
943
944 dev_load(net, ifr.ifr_name);
945
946 switch (cmd) {
947 case SIOCGIFADDR: /* Get interface address */
948 case SIOCGIFBRDADDR: /* Get the broadcast address */
949 case SIOCGIFDSTADDR: /* Get the destination address */
950 case SIOCGIFNETMASK: /* Get the netmask for the interface */
951 /* Note that these ioctls will not sleep,
952 so that we do not impose a lock.
953 One day we will be forced to put shlock here (I mean SMP)
954 */
955 tryaddrmatch = (sin_orig.sin_family == AF_INET);
956 memset(sin, 0, sizeof(*sin));
957 sin->sin_family = AF_INET;
958 break;
959
960 case SIOCSIFFLAGS:
961 ret = -EPERM;
962 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
963 goto out;
964 break;
965 case SIOCSIFADDR: /* Set interface address (and family) */
966 case SIOCSIFBRDADDR: /* Set the broadcast address */
967 case SIOCSIFDSTADDR: /* Set the destination address */
968 case SIOCSIFNETMASK: /* Set the netmask for the interface */
969 ret = -EPERM;
970 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
971 goto out;
972 ret = -EINVAL;
973 if (sin->sin_family != AF_INET)
974 goto out;
975 break;
976 default:
977 ret = -EINVAL;
978 goto out;
979 }
980
981 rtnl_lock();
982
983 ret = -ENODEV;
984 dev = __dev_get_by_name(net, ifr.ifr_name);
985 if (!dev)
986 goto done;
987
988 if (colon)
989 *colon = ':';
990
991 in_dev = __in_dev_get_rtnl(dev);
992 if (in_dev) {
993 if (tryaddrmatch) {
994 /* Matthias Andree */
995 /* compare label and address (4.4BSD style) */
996 /* note: we only do this for a limited set of ioctls
997 and only if the original address family was AF_INET.
998 This is checked above. */
999 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1000 ifap = &ifa->ifa_next) {
1001 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1002 sin_orig.sin_addr.s_addr ==
1003 ifa->ifa_local) {
1004 break; /* found */
1005 }
1006 }
1007 }
1008 /* we didn't get a match, maybe the application is
1009 4.3BSD-style and passed in junk so we fall back to
1010 comparing just the label */
1011 if (!ifa) {
1012 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1013 ifap = &ifa->ifa_next)
1014 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1015 break;
1016 }
1017 }
1018
1019 ret = -EADDRNOTAVAIL;
1020 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1021 goto done;
1022
1023 switch (cmd) {
1024 case SIOCGIFADDR: /* Get interface address */
1025 sin->sin_addr.s_addr = ifa->ifa_local;
1026 goto rarok;
1027
1028 case SIOCGIFBRDADDR: /* Get the broadcast address */
1029 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1030 goto rarok;
1031
1032 case SIOCGIFDSTADDR: /* Get the destination address */
1033 sin->sin_addr.s_addr = ifa->ifa_address;
1034 goto rarok;
1035
1036 case SIOCGIFNETMASK: /* Get the netmask for the interface */
1037 sin->sin_addr.s_addr = ifa->ifa_mask;
1038 goto rarok;
1039
1040 case SIOCSIFFLAGS:
1041 if (colon) {
1042 ret = -EADDRNOTAVAIL;
1043 if (!ifa)
1044 break;
1045 ret = 0;
1046 if (!(ifr.ifr_flags & IFF_UP))
1047 inet_del_ifa(in_dev, ifap, 1);
1048 break;
1049 }
1050 ret = dev_change_flags(dev, ifr.ifr_flags);
1051 break;
1052
1053 case SIOCSIFADDR: /* Set interface address (and family) */
1054 ret = -EINVAL;
1055 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1056 break;
1057
1058 if (!ifa) {
1059 ret = -ENOBUFS;
1060 ifa = inet_alloc_ifa();
1061 if (!ifa)
1062 break;
1063 INIT_HLIST_NODE(&ifa->hash);
1064 if (colon)
1065 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1066 else
1067 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1068 } else {
1069 ret = 0;
1070 if (ifa->ifa_local == sin->sin_addr.s_addr)
1071 break;
1072 inet_del_ifa(in_dev, ifap, 0);
1073 ifa->ifa_broadcast = 0;
1074 ifa->ifa_scope = 0;
1075 }
1076
1077 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1078
1079 if (!(dev->flags & IFF_POINTOPOINT)) {
1080 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1081 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1082 if ((dev->flags & IFF_BROADCAST) &&
1083 ifa->ifa_prefixlen < 31)
1084 ifa->ifa_broadcast = ifa->ifa_address |
1085 ~ifa->ifa_mask;
1086 } else {
1087 ifa->ifa_prefixlen = 32;
1088 ifa->ifa_mask = inet_make_mask(32);
1089 }
1090 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1091 ret = inet_set_ifa(dev, ifa);
1092 break;
1093
1094 case SIOCSIFBRDADDR: /* Set the broadcast address */
1095 ret = 0;
1096 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1097 inet_del_ifa(in_dev, ifap, 0);
1098 ifa->ifa_broadcast = sin->sin_addr.s_addr;
1099 inet_insert_ifa(ifa);
1100 }
1101 break;
1102
1103 case SIOCSIFDSTADDR: /* Set the destination address */
1104 ret = 0;
1105 if (ifa->ifa_address == sin->sin_addr.s_addr)
1106 break;
1107 ret = -EINVAL;
1108 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1109 break;
1110 ret = 0;
1111 inet_del_ifa(in_dev, ifap, 0);
1112 ifa->ifa_address = sin->sin_addr.s_addr;
1113 inet_insert_ifa(ifa);
1114 break;
1115
1116 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1117
1118 /*
1119 * The mask we set must be legal.
1120 */
1121 ret = -EINVAL;
1122 if (bad_mask(sin->sin_addr.s_addr, 0))
1123 break;
1124 ret = 0;
1125 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1126 __be32 old_mask = ifa->ifa_mask;
1127 inet_del_ifa(in_dev, ifap, 0);
1128 ifa->ifa_mask = sin->sin_addr.s_addr;
1129 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1130
1131 /* See if current broadcast address matches
1132 * with current netmask, then recalculate
1133 * the broadcast address. Otherwise it's a
1134 * funny address, so don't touch it since
1135 * the user seems to know what (s)he's doing...
1136 */
1137 if ((dev->flags & IFF_BROADCAST) &&
1138 (ifa->ifa_prefixlen < 31) &&
1139 (ifa->ifa_broadcast ==
1140 (ifa->ifa_local|~old_mask))) {
1141 ifa->ifa_broadcast = (ifa->ifa_local |
1142 ~sin->sin_addr.s_addr);
1143 }
1144 inet_insert_ifa(ifa);
1145 }
1146 break;
1147 }
1148 done:
1149 rtnl_unlock();
1150 out:
1151 return ret;
1152 rarok:
1153 rtnl_unlock();
1154 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1155 goto out;
1156 }
1157
1158 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1159 {
1160 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1161 struct in_ifaddr *ifa;
1162 struct ifreq ifr;
1163 int done = 0;
1164
1165 if (!in_dev)
1166 goto out;
1167
1168 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1169 if (!buf) {
1170 done += sizeof(ifr);
1171 continue;
1172 }
1173 if (len < (int) sizeof(ifr))
1174 break;
1175 memset(&ifr, 0, sizeof(struct ifreq));
1176 strcpy(ifr.ifr_name, ifa->ifa_label);
1177
1178 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1179 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1180 ifa->ifa_local;
1181
1182 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1183 done = -EFAULT;
1184 break;
1185 }
1186 buf += sizeof(struct ifreq);
1187 len -= sizeof(struct ifreq);
1188 done += sizeof(struct ifreq);
1189 }
1190 out:
1191 return done;
1192 }
1193
1194 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1195 {
1196 __be32 addr = 0;
1197 struct in_device *in_dev;
1198 struct net *net = dev_net(dev);
1199 int master_idx;
1200
1201 rcu_read_lock();
1202 in_dev = __in_dev_get_rcu(dev);
1203 if (!in_dev)
1204 goto no_in_dev;
1205
1206 for_primary_ifa(in_dev) {
1207 if (ifa->ifa_scope > scope)
1208 continue;
1209 if (!dst || inet_ifa_match(dst, ifa)) {
1210 addr = ifa->ifa_local;
1211 break;
1212 }
1213 if (!addr)
1214 addr = ifa->ifa_local;
1215 } endfor_ifa(in_dev);
1216
1217 if (addr)
1218 goto out_unlock;
1219 no_in_dev:
1220 master_idx = l3mdev_master_ifindex_rcu(dev);
1221
1222 /* For VRFs, the VRF device takes the place of the loopback device,
1223 * with addresses on it being preferred. Note in such cases the
1224 * loopback device will be among the devices that fail the master_idx
1225 * equality check in the loop below.
1226 */
1227 if (master_idx &&
1228 (dev = dev_get_by_index_rcu(net, master_idx)) &&
1229 (in_dev = __in_dev_get_rcu(dev))) {
1230 for_primary_ifa(in_dev) {
1231 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1232 ifa->ifa_scope <= scope) {
1233 addr = ifa->ifa_local;
1234 goto out_unlock;
1235 }
1236 } endfor_ifa(in_dev);
1237 }
1238
1239 /* Not loopback addresses on loopback should be preferred
1240 in this case. It is important that lo is the first interface
1241 in dev_base list.
1242 */
1243 for_each_netdev_rcu(net, dev) {
1244 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1245 continue;
1246
1247 in_dev = __in_dev_get_rcu(dev);
1248 if (!in_dev)
1249 continue;
1250
1251 for_primary_ifa(in_dev) {
1252 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1253 ifa->ifa_scope <= scope) {
1254 addr = ifa->ifa_local;
1255 goto out_unlock;
1256 }
1257 } endfor_ifa(in_dev);
1258 }
1259 out_unlock:
1260 rcu_read_unlock();
1261 return addr;
1262 }
1263 EXPORT_SYMBOL(inet_select_addr);
1264
1265 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1266 __be32 local, int scope)
1267 {
1268 int same = 0;
1269 __be32 addr = 0;
1270
1271 for_ifa(in_dev) {
1272 if (!addr &&
1273 (local == ifa->ifa_local || !local) &&
1274 ifa->ifa_scope <= scope) {
1275 addr = ifa->ifa_local;
1276 if (same)
1277 break;
1278 }
1279 if (!same) {
1280 same = (!local || inet_ifa_match(local, ifa)) &&
1281 (!dst || inet_ifa_match(dst, ifa));
1282 if (same && addr) {
1283 if (local || !dst)
1284 break;
1285 /* Is the selected addr into dst subnet? */
1286 if (inet_ifa_match(addr, ifa))
1287 break;
1288 /* No, then can we use new local src? */
1289 if (ifa->ifa_scope <= scope) {
1290 addr = ifa->ifa_local;
1291 break;
1292 }
1293 /* search for large dst subnet for addr */
1294 same = 0;
1295 }
1296 }
1297 } endfor_ifa(in_dev);
1298
1299 return same ? addr : 0;
1300 }
1301
1302 /*
1303 * Confirm that local IP address exists using wildcards:
1304 * - net: netns to check, cannot be NULL
1305 * - in_dev: only on this interface, NULL=any interface
1306 * - dst: only in the same subnet as dst, 0=any dst
1307 * - local: address, 0=autoselect the local address
1308 * - scope: maximum allowed scope value for the local address
1309 */
1310 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1311 __be32 dst, __be32 local, int scope)
1312 {
1313 __be32 addr = 0;
1314 struct net_device *dev;
1315
1316 if (in_dev)
1317 return confirm_addr_indev(in_dev, dst, local, scope);
1318
1319 rcu_read_lock();
1320 for_each_netdev_rcu(net, dev) {
1321 in_dev = __in_dev_get_rcu(dev);
1322 if (in_dev) {
1323 addr = confirm_addr_indev(in_dev, dst, local, scope);
1324 if (addr)
1325 break;
1326 }
1327 }
1328 rcu_read_unlock();
1329
1330 return addr;
1331 }
1332 EXPORT_SYMBOL(inet_confirm_addr);
1333
1334 /*
1335 * Device notifier
1336 */
1337
1338 int register_inetaddr_notifier(struct notifier_block *nb)
1339 {
1340 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1341 }
1342 EXPORT_SYMBOL(register_inetaddr_notifier);
1343
1344 int unregister_inetaddr_notifier(struct notifier_block *nb)
1345 {
1346 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1347 }
1348 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1349
1350 /* Rename ifa_labels for a device name change. Make some effort to preserve
1351 * existing alias numbering and to create unique labels if possible.
1352 */
1353 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1354 {
1355 struct in_ifaddr *ifa;
1356 int named = 0;
1357
1358 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1359 char old[IFNAMSIZ], *dot;
1360
1361 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1362 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1363 if (named++ == 0)
1364 goto skip;
1365 dot = strchr(old, ':');
1366 if (!dot) {
1367 sprintf(old, ":%d", named);
1368 dot = old;
1369 }
1370 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1371 strcat(ifa->ifa_label, dot);
1372 else
1373 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1374 skip:
1375 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1376 }
1377 }
1378
1379 static bool inetdev_valid_mtu(unsigned int mtu)
1380 {
1381 return mtu >= 68;
1382 }
1383
1384 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1385 struct in_device *in_dev)
1386
1387 {
1388 struct in_ifaddr *ifa;
1389
1390 for (ifa = in_dev->ifa_list; ifa;
1391 ifa = ifa->ifa_next) {
1392 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1393 ifa->ifa_local, dev,
1394 ifa->ifa_local, NULL,
1395 dev->dev_addr, NULL);
1396 }
1397 }
1398
1399 /* Called only under RTNL semaphore */
1400
1401 static int inetdev_event(struct notifier_block *this, unsigned long event,
1402 void *ptr)
1403 {
1404 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1405 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1406
1407 ASSERT_RTNL();
1408
1409 if (!in_dev) {
1410 if (event == NETDEV_REGISTER) {
1411 in_dev = inetdev_init(dev);
1412 if (IS_ERR(in_dev))
1413 return notifier_from_errno(PTR_ERR(in_dev));
1414 if (dev->flags & IFF_LOOPBACK) {
1415 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1416 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1417 }
1418 } else if (event == NETDEV_CHANGEMTU) {
1419 /* Re-enabling IP */
1420 if (inetdev_valid_mtu(dev->mtu))
1421 in_dev = inetdev_init(dev);
1422 }
1423 goto out;
1424 }
1425
1426 switch (event) {
1427 case NETDEV_REGISTER:
1428 pr_debug("%s: bug\n", __func__);
1429 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1430 break;
1431 case NETDEV_UP:
1432 if (!inetdev_valid_mtu(dev->mtu))
1433 break;
1434 if (dev->flags & IFF_LOOPBACK) {
1435 struct in_ifaddr *ifa = inet_alloc_ifa();
1436
1437 if (ifa) {
1438 INIT_HLIST_NODE(&ifa->hash);
1439 ifa->ifa_local =
1440 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1441 ifa->ifa_prefixlen = 8;
1442 ifa->ifa_mask = inet_make_mask(8);
1443 in_dev_hold(in_dev);
1444 ifa->ifa_dev = in_dev;
1445 ifa->ifa_scope = RT_SCOPE_HOST;
1446 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1447 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1448 INFINITY_LIFE_TIME);
1449 ipv4_devconf_setall(in_dev);
1450 neigh_parms_data_state_setall(in_dev->arp_parms);
1451 inet_insert_ifa(ifa);
1452 }
1453 }
1454 ip_mc_up(in_dev);
1455 /* fall through */
1456 case NETDEV_CHANGEADDR:
1457 if (!IN_DEV_ARP_NOTIFY(in_dev))
1458 break;
1459 /* fall through */
1460 case NETDEV_NOTIFY_PEERS:
1461 /* Send gratuitous ARP to notify of link change */
1462 inetdev_send_gratuitous_arp(dev, in_dev);
1463 break;
1464 case NETDEV_DOWN:
1465 ip_mc_down(in_dev);
1466 break;
1467 case NETDEV_PRE_TYPE_CHANGE:
1468 ip_mc_unmap(in_dev);
1469 break;
1470 case NETDEV_POST_TYPE_CHANGE:
1471 ip_mc_remap(in_dev);
1472 break;
1473 case NETDEV_CHANGEMTU:
1474 if (inetdev_valid_mtu(dev->mtu))
1475 break;
1476 /* disable IP when MTU is not enough */
1477 case NETDEV_UNREGISTER:
1478 inetdev_destroy(in_dev);
1479 break;
1480 case NETDEV_CHANGENAME:
1481 /* Do not notify about label change, this event is
1482 * not interesting to applications using netlink.
1483 */
1484 inetdev_changename(dev, in_dev);
1485
1486 devinet_sysctl_unregister(in_dev);
1487 devinet_sysctl_register(in_dev);
1488 break;
1489 }
1490 out:
1491 return NOTIFY_DONE;
1492 }
1493
1494 static struct notifier_block ip_netdev_notifier = {
1495 .notifier_call = inetdev_event,
1496 };
1497
1498 static size_t inet_nlmsg_size(void)
1499 {
1500 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1501 + nla_total_size(4) /* IFA_ADDRESS */
1502 + nla_total_size(4) /* IFA_LOCAL */
1503 + nla_total_size(4) /* IFA_BROADCAST */
1504 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1505 + nla_total_size(4) /* IFA_FLAGS */
1506 + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1507 }
1508
1509 static inline u32 cstamp_delta(unsigned long cstamp)
1510 {
1511 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1512 }
1513
1514 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1515 unsigned long tstamp, u32 preferred, u32 valid)
1516 {
1517 struct ifa_cacheinfo ci;
1518
1519 ci.cstamp = cstamp_delta(cstamp);
1520 ci.tstamp = cstamp_delta(tstamp);
1521 ci.ifa_prefered = preferred;
1522 ci.ifa_valid = valid;
1523
1524 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1525 }
1526
1527 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1528 u32 portid, u32 seq, int event, unsigned int flags)
1529 {
1530 struct ifaddrmsg *ifm;
1531 struct nlmsghdr *nlh;
1532 u32 preferred, valid;
1533
1534 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1535 if (!nlh)
1536 return -EMSGSIZE;
1537
1538 ifm = nlmsg_data(nlh);
1539 ifm->ifa_family = AF_INET;
1540 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1541 ifm->ifa_flags = ifa->ifa_flags;
1542 ifm->ifa_scope = ifa->ifa_scope;
1543 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1544
1545 if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1546 preferred = ifa->ifa_preferred_lft;
1547 valid = ifa->ifa_valid_lft;
1548 if (preferred != INFINITY_LIFE_TIME) {
1549 long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1550
1551 if (preferred > tval)
1552 preferred -= tval;
1553 else
1554 preferred = 0;
1555 if (valid != INFINITY_LIFE_TIME) {
1556 if (valid > tval)
1557 valid -= tval;
1558 else
1559 valid = 0;
1560 }
1561 }
1562 } else {
1563 preferred = INFINITY_LIFE_TIME;
1564 valid = INFINITY_LIFE_TIME;
1565 }
1566 if ((ifa->ifa_address &&
1567 nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1568 (ifa->ifa_local &&
1569 nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1570 (ifa->ifa_broadcast &&
1571 nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1572 (ifa->ifa_label[0] &&
1573 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1574 nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1575 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1576 preferred, valid))
1577 goto nla_put_failure;
1578
1579 nlmsg_end(skb, nlh);
1580 return 0;
1581
1582 nla_put_failure:
1583 nlmsg_cancel(skb, nlh);
1584 return -EMSGSIZE;
1585 }
1586
1587 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1588 {
1589 struct net *net = sock_net(skb->sk);
1590 int h, s_h;
1591 int idx, s_idx;
1592 int ip_idx, s_ip_idx;
1593 struct net_device *dev;
1594 struct in_device *in_dev;
1595 struct in_ifaddr *ifa;
1596 struct hlist_head *head;
1597
1598 s_h = cb->args[0];
1599 s_idx = idx = cb->args[1];
1600 s_ip_idx = ip_idx = cb->args[2];
1601
1602 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1603 idx = 0;
1604 head = &net->dev_index_head[h];
1605 rcu_read_lock();
1606 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1607 net->dev_base_seq;
1608 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1609 if (idx < s_idx)
1610 goto cont;
1611 if (h > s_h || idx > s_idx)
1612 s_ip_idx = 0;
1613 in_dev = __in_dev_get_rcu(dev);
1614 if (!in_dev)
1615 goto cont;
1616
1617 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1618 ifa = ifa->ifa_next, ip_idx++) {
1619 if (ip_idx < s_ip_idx)
1620 continue;
1621 if (inet_fill_ifaddr(skb, ifa,
1622 NETLINK_CB(cb->skb).portid,
1623 cb->nlh->nlmsg_seq,
1624 RTM_NEWADDR, NLM_F_MULTI) < 0) {
1625 rcu_read_unlock();
1626 goto done;
1627 }
1628 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1629 }
1630 cont:
1631 idx++;
1632 }
1633 rcu_read_unlock();
1634 }
1635
1636 done:
1637 cb->args[0] = h;
1638 cb->args[1] = idx;
1639 cb->args[2] = ip_idx;
1640
1641 return skb->len;
1642 }
1643
1644 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1645 u32 portid)
1646 {
1647 struct sk_buff *skb;
1648 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1649 int err = -ENOBUFS;
1650 struct net *net;
1651
1652 net = dev_net(ifa->ifa_dev->dev);
1653 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1654 if (!skb)
1655 goto errout;
1656
1657 err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1658 if (err < 0) {
1659 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1660 WARN_ON(err == -EMSGSIZE);
1661 kfree_skb(skb);
1662 goto errout;
1663 }
1664 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1665 return;
1666 errout:
1667 if (err < 0)
1668 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1669 }
1670
1671 static size_t inet_get_link_af_size(const struct net_device *dev,
1672 u32 ext_filter_mask)
1673 {
1674 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1675
1676 if (!in_dev)
1677 return 0;
1678
1679 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1680 }
1681
1682 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1683 u32 ext_filter_mask)
1684 {
1685 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1686 struct nlattr *nla;
1687 int i;
1688
1689 if (!in_dev)
1690 return -ENODATA;
1691
1692 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1693 if (!nla)
1694 return -EMSGSIZE;
1695
1696 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1697 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1698
1699 return 0;
1700 }
1701
1702 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1703 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1704 };
1705
1706 static int inet_validate_link_af(const struct net_device *dev,
1707 const struct nlattr *nla)
1708 {
1709 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1710 int err, rem;
1711
1712 if (dev && !__in_dev_get_rtnl(dev))
1713 return -EAFNOSUPPORT;
1714
1715 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1716 if (err < 0)
1717 return err;
1718
1719 if (tb[IFLA_INET_CONF]) {
1720 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1721 int cfgid = nla_type(a);
1722
1723 if (nla_len(a) < 4)
1724 return -EINVAL;
1725
1726 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1727 return -EINVAL;
1728 }
1729 }
1730
1731 return 0;
1732 }
1733
1734 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1735 {
1736 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1737 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1738 int rem;
1739
1740 if (!in_dev)
1741 return -EAFNOSUPPORT;
1742
1743 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1744 BUG();
1745
1746 if (tb[IFLA_INET_CONF]) {
1747 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1748 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1749 }
1750
1751 return 0;
1752 }
1753
1754 static int inet_netconf_msgsize_devconf(int type)
1755 {
1756 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1757 + nla_total_size(4); /* NETCONFA_IFINDEX */
1758 bool all = false;
1759
1760 if (type == NETCONFA_ALL)
1761 all = true;
1762
1763 if (all || type == NETCONFA_FORWARDING)
1764 size += nla_total_size(4);
1765 if (all || type == NETCONFA_RP_FILTER)
1766 size += nla_total_size(4);
1767 if (all || type == NETCONFA_MC_FORWARDING)
1768 size += nla_total_size(4);
1769 if (all || type == NETCONFA_PROXY_NEIGH)
1770 size += nla_total_size(4);
1771 if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1772 size += nla_total_size(4);
1773
1774 return size;
1775 }
1776
1777 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1778 struct ipv4_devconf *devconf, u32 portid,
1779 u32 seq, int event, unsigned int flags,
1780 int type)
1781 {
1782 struct nlmsghdr *nlh;
1783 struct netconfmsg *ncm;
1784 bool all = false;
1785
1786 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1787 flags);
1788 if (!nlh)
1789 return -EMSGSIZE;
1790
1791 if (type == NETCONFA_ALL)
1792 all = true;
1793
1794 ncm = nlmsg_data(nlh);
1795 ncm->ncm_family = AF_INET;
1796
1797 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1798 goto nla_put_failure;
1799
1800 if ((all || type == NETCONFA_FORWARDING) &&
1801 nla_put_s32(skb, NETCONFA_FORWARDING,
1802 IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1803 goto nla_put_failure;
1804 if ((all || type == NETCONFA_RP_FILTER) &&
1805 nla_put_s32(skb, NETCONFA_RP_FILTER,
1806 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1807 goto nla_put_failure;
1808 if ((all || type == NETCONFA_MC_FORWARDING) &&
1809 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1810 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1811 goto nla_put_failure;
1812 if ((all || type == NETCONFA_PROXY_NEIGH) &&
1813 nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1814 IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1815 goto nla_put_failure;
1816 if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1817 nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1818 IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1819 goto nla_put_failure;
1820
1821 nlmsg_end(skb, nlh);
1822 return 0;
1823
1824 nla_put_failure:
1825 nlmsg_cancel(skb, nlh);
1826 return -EMSGSIZE;
1827 }
1828
1829 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1830 struct ipv4_devconf *devconf)
1831 {
1832 struct sk_buff *skb;
1833 int err = -ENOBUFS;
1834
1835 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1836 if (!skb)
1837 goto errout;
1838
1839 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1840 RTM_NEWNETCONF, 0, type);
1841 if (err < 0) {
1842 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1843 WARN_ON(err == -EMSGSIZE);
1844 kfree_skb(skb);
1845 goto errout;
1846 }
1847 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1848 return;
1849 errout:
1850 if (err < 0)
1851 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1852 }
1853
1854 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1855 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1856 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
1857 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
1858 [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) },
1859 [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) },
1860 };
1861
1862 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1863 struct nlmsghdr *nlh)
1864 {
1865 struct net *net = sock_net(in_skb->sk);
1866 struct nlattr *tb[NETCONFA_MAX+1];
1867 struct netconfmsg *ncm;
1868 struct sk_buff *skb;
1869 struct ipv4_devconf *devconf;
1870 struct in_device *in_dev;
1871 struct net_device *dev;
1872 int ifindex;
1873 int err;
1874
1875 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1876 devconf_ipv4_policy);
1877 if (err < 0)
1878 goto errout;
1879
1880 err = -EINVAL;
1881 if (!tb[NETCONFA_IFINDEX])
1882 goto errout;
1883
1884 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1885 switch (ifindex) {
1886 case NETCONFA_IFINDEX_ALL:
1887 devconf = net->ipv4.devconf_all;
1888 break;
1889 case NETCONFA_IFINDEX_DEFAULT:
1890 devconf = net->ipv4.devconf_dflt;
1891 break;
1892 default:
1893 dev = __dev_get_by_index(net, ifindex);
1894 if (!dev)
1895 goto errout;
1896 in_dev = __in_dev_get_rtnl(dev);
1897 if (!in_dev)
1898 goto errout;
1899 devconf = &in_dev->cnf;
1900 break;
1901 }
1902
1903 err = -ENOBUFS;
1904 skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1905 if (!skb)
1906 goto errout;
1907
1908 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1909 NETLINK_CB(in_skb).portid,
1910 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1911 NETCONFA_ALL);
1912 if (err < 0) {
1913 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1914 WARN_ON(err == -EMSGSIZE);
1915 kfree_skb(skb);
1916 goto errout;
1917 }
1918 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1919 errout:
1920 return err;
1921 }
1922
1923 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1924 struct netlink_callback *cb)
1925 {
1926 struct net *net = sock_net(skb->sk);
1927 int h, s_h;
1928 int idx, s_idx;
1929 struct net_device *dev;
1930 struct in_device *in_dev;
1931 struct hlist_head *head;
1932
1933 s_h = cb->args[0];
1934 s_idx = idx = cb->args[1];
1935
1936 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1937 idx = 0;
1938 head = &net->dev_index_head[h];
1939 rcu_read_lock();
1940 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1941 net->dev_base_seq;
1942 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1943 if (idx < s_idx)
1944 goto cont;
1945 in_dev = __in_dev_get_rcu(dev);
1946 if (!in_dev)
1947 goto cont;
1948
1949 if (inet_netconf_fill_devconf(skb, dev->ifindex,
1950 &in_dev->cnf,
1951 NETLINK_CB(cb->skb).portid,
1952 cb->nlh->nlmsg_seq,
1953 RTM_NEWNETCONF,
1954 NLM_F_MULTI,
1955 NETCONFA_ALL) < 0) {
1956 rcu_read_unlock();
1957 goto done;
1958 }
1959 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1960 cont:
1961 idx++;
1962 }
1963 rcu_read_unlock();
1964 }
1965 if (h == NETDEV_HASHENTRIES) {
1966 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1967 net->ipv4.devconf_all,
1968 NETLINK_CB(cb->skb).portid,
1969 cb->nlh->nlmsg_seq,
1970 RTM_NEWNETCONF, NLM_F_MULTI,
1971 NETCONFA_ALL) < 0)
1972 goto done;
1973 else
1974 h++;
1975 }
1976 if (h == NETDEV_HASHENTRIES + 1) {
1977 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1978 net->ipv4.devconf_dflt,
1979 NETLINK_CB(cb->skb).portid,
1980 cb->nlh->nlmsg_seq,
1981 RTM_NEWNETCONF, NLM_F_MULTI,
1982 NETCONFA_ALL) < 0)
1983 goto done;
1984 else
1985 h++;
1986 }
1987 done:
1988 cb->args[0] = h;
1989 cb->args[1] = idx;
1990
1991 return skb->len;
1992 }
1993
1994 #ifdef CONFIG_SYSCTL
1995
1996 static void devinet_copy_dflt_conf(struct net *net, int i)
1997 {
1998 struct net_device *dev;
1999
2000 rcu_read_lock();
2001 for_each_netdev_rcu(net, dev) {
2002 struct in_device *in_dev;
2003
2004 in_dev = __in_dev_get_rcu(dev);
2005 if (in_dev && !test_bit(i, in_dev->cnf.state))
2006 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2007 }
2008 rcu_read_unlock();
2009 }
2010
2011 /* called with RTNL locked */
2012 static void inet_forward_change(struct net *net)
2013 {
2014 struct net_device *dev;
2015 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2016
2017 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2018 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2019 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2020 NETCONFA_IFINDEX_ALL,
2021 net->ipv4.devconf_all);
2022 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2023 NETCONFA_IFINDEX_DEFAULT,
2024 net->ipv4.devconf_dflt);
2025
2026 for_each_netdev(net, dev) {
2027 struct in_device *in_dev;
2028
2029 if (on)
2030 dev_disable_lro(dev);
2031
2032 in_dev = __in_dev_get_rtnl(dev);
2033 if (in_dev) {
2034 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2035 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2036 dev->ifindex, &in_dev->cnf);
2037 }
2038 }
2039 }
2040
2041 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2042 {
2043 if (cnf == net->ipv4.devconf_dflt)
2044 return NETCONFA_IFINDEX_DEFAULT;
2045 else if (cnf == net->ipv4.devconf_all)
2046 return NETCONFA_IFINDEX_ALL;
2047 else {
2048 struct in_device *idev
2049 = container_of(cnf, struct in_device, cnf);
2050 return idev->dev->ifindex;
2051 }
2052 }
2053
2054 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2055 void __user *buffer,
2056 size_t *lenp, loff_t *ppos)
2057 {
2058 int old_value = *(int *)ctl->data;
2059 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2060 int new_value = *(int *)ctl->data;
2061
2062 if (write) {
2063 struct ipv4_devconf *cnf = ctl->extra1;
2064 struct net *net = ctl->extra2;
2065 int i = (int *)ctl->data - cnf->data;
2066 int ifindex;
2067
2068 set_bit(i, cnf->state);
2069
2070 if (cnf == net->ipv4.devconf_dflt)
2071 devinet_copy_dflt_conf(net, i);
2072 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2073 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2074 if ((new_value == 0) && (old_value != 0))
2075 rt_cache_flush(net);
2076
2077 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2078 new_value != old_value) {
2079 ifindex = devinet_conf_ifindex(net, cnf);
2080 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2081 ifindex, cnf);
2082 }
2083 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2084 new_value != old_value) {
2085 ifindex = devinet_conf_ifindex(net, cnf);
2086 inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2087 ifindex, cnf);
2088 }
2089 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2090 new_value != old_value) {
2091 ifindex = devinet_conf_ifindex(net, cnf);
2092 inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2093 ifindex, cnf);
2094 }
2095 }
2096
2097 return ret;
2098 }
2099
2100 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2101 void __user *buffer,
2102 size_t *lenp, loff_t *ppos)
2103 {
2104 int *valp = ctl->data;
2105 int val = *valp;
2106 loff_t pos = *ppos;
2107 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2108
2109 if (write && *valp != val) {
2110 struct net *net = ctl->extra2;
2111
2112 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2113 if (!rtnl_trylock()) {
2114 /* Restore the original values before restarting */
2115 *valp = val;
2116 *ppos = pos;
2117 return restart_syscall();
2118 }
2119 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2120 inet_forward_change(net);
2121 } else {
2122 struct ipv4_devconf *cnf = ctl->extra1;
2123 struct in_device *idev =
2124 container_of(cnf, struct in_device, cnf);
2125 if (*valp)
2126 dev_disable_lro(idev->dev);
2127 inet_netconf_notify_devconf(net,
2128 NETCONFA_FORWARDING,
2129 idev->dev->ifindex,
2130 cnf);
2131 }
2132 rtnl_unlock();
2133 rt_cache_flush(net);
2134 } else
2135 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2136 NETCONFA_IFINDEX_DEFAULT,
2137 net->ipv4.devconf_dflt);
2138 }
2139
2140 return ret;
2141 }
2142
2143 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2144 void __user *buffer,
2145 size_t *lenp, loff_t *ppos)
2146 {
2147 int *valp = ctl->data;
2148 int val = *valp;
2149 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2150 struct net *net = ctl->extra2;
2151
2152 if (write && *valp != val)
2153 rt_cache_flush(net);
2154
2155 return ret;
2156 }
2157
2158 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2159 { \
2160 .procname = name, \
2161 .data = ipv4_devconf.data + \
2162 IPV4_DEVCONF_ ## attr - 1, \
2163 .maxlen = sizeof(int), \
2164 .mode = mval, \
2165 .proc_handler = proc, \
2166 .extra1 = &ipv4_devconf, \
2167 }
2168
2169 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2170 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2171
2172 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2173 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2174
2175 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2176 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2177
2178 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2179 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2180
2181 static struct devinet_sysctl_table {
2182 struct ctl_table_header *sysctl_header;
2183 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2184 } devinet_sysctl = {
2185 .devinet_vars = {
2186 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2187 devinet_sysctl_forward),
2188 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2189
2190 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2191 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2192 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2193 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2194 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2195 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2196 "accept_source_route"),
2197 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2198 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2199 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2200 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2201 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2202 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2203 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2204 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2205 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2206 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2207 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2208 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2209 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2210 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2211 "force_igmp_version"),
2212 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2213 "igmpv2_unsolicited_report_interval"),
2214 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2215 "igmpv3_unsolicited_report_interval"),
2216 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2217 "ignore_routes_with_linkdown"),
2218 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2219 "drop_gratuitous_arp"),
2220
2221 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2222 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2223 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2224 "promote_secondaries"),
2225 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2226 "route_localnet"),
2227 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2228 "drop_unicast_in_l2_multicast"),
2229 },
2230 };
2231
2232 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2233 int ifindex, struct ipv4_devconf *p)
2234 {
2235 int i;
2236 struct devinet_sysctl_table *t;
2237 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2238
2239 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2240 if (!t)
2241 goto out;
2242
2243 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2244 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2245 t->devinet_vars[i].extra1 = p;
2246 t->devinet_vars[i].extra2 = net;
2247 }
2248
2249 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2250
2251 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2252 if (!t->sysctl_header)
2253 goto free;
2254
2255 p->sysctl = t;
2256
2257 inet_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p);
2258 return 0;
2259
2260 free:
2261 kfree(t);
2262 out:
2263 return -ENOBUFS;
2264 }
2265
2266 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2267 {
2268 struct devinet_sysctl_table *t = cnf->sysctl;
2269
2270 if (!t)
2271 return;
2272
2273 cnf->sysctl = NULL;
2274 unregister_net_sysctl_table(t->sysctl_header);
2275 kfree(t);
2276 }
2277
2278 static int devinet_sysctl_register(struct in_device *idev)
2279 {
2280 int err;
2281
2282 if (!sysctl_dev_name_is_allowed(idev->dev->name))
2283 return -EINVAL;
2284
2285 err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2286 if (err)
2287 return err;
2288 err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2289 idev->dev->ifindex, &idev->cnf);
2290 if (err)
2291 neigh_sysctl_unregister(idev->arp_parms);
2292 return err;
2293 }
2294
2295 static void devinet_sysctl_unregister(struct in_device *idev)
2296 {
2297 __devinet_sysctl_unregister(&idev->cnf);
2298 neigh_sysctl_unregister(idev->arp_parms);
2299 }
2300
2301 static struct ctl_table ctl_forward_entry[] = {
2302 {
2303 .procname = "ip_forward",
2304 .data = &ipv4_devconf.data[
2305 IPV4_DEVCONF_FORWARDING - 1],
2306 .maxlen = sizeof(int),
2307 .mode = 0644,
2308 .proc_handler = devinet_sysctl_forward,
2309 .extra1 = &ipv4_devconf,
2310 .extra2 = &init_net,
2311 },
2312 { },
2313 };
2314 #endif
2315
2316 static __net_init int devinet_init_net(struct net *net)
2317 {
2318 int err;
2319 struct ipv4_devconf *all, *dflt;
2320 #ifdef CONFIG_SYSCTL
2321 struct ctl_table *tbl = ctl_forward_entry;
2322 struct ctl_table_header *forw_hdr;
2323 #endif
2324
2325 err = -ENOMEM;
2326 all = &ipv4_devconf;
2327 dflt = &ipv4_devconf_dflt;
2328
2329 if (!net_eq(net, &init_net)) {
2330 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2331 if (!all)
2332 goto err_alloc_all;
2333
2334 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2335 if (!dflt)
2336 goto err_alloc_dflt;
2337
2338 #ifdef CONFIG_SYSCTL
2339 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2340 if (!tbl)
2341 goto err_alloc_ctl;
2342
2343 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2344 tbl[0].extra1 = all;
2345 tbl[0].extra2 = net;
2346 #endif
2347 }
2348
2349 #ifdef CONFIG_SYSCTL
2350 err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2351 if (err < 0)
2352 goto err_reg_all;
2353
2354 err = __devinet_sysctl_register(net, "default",
2355 NETCONFA_IFINDEX_DEFAULT, dflt);
2356 if (err < 0)
2357 goto err_reg_dflt;
2358
2359 err = -ENOMEM;
2360 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2361 if (!forw_hdr)
2362 goto err_reg_ctl;
2363 net->ipv4.forw_hdr = forw_hdr;
2364 #endif
2365
2366 net->ipv4.devconf_all = all;
2367 net->ipv4.devconf_dflt = dflt;
2368 return 0;
2369
2370 #ifdef CONFIG_SYSCTL
2371 err_reg_ctl:
2372 __devinet_sysctl_unregister(dflt);
2373 err_reg_dflt:
2374 __devinet_sysctl_unregister(all);
2375 err_reg_all:
2376 if (tbl != ctl_forward_entry)
2377 kfree(tbl);
2378 err_alloc_ctl:
2379 #endif
2380 if (dflt != &ipv4_devconf_dflt)
2381 kfree(dflt);
2382 err_alloc_dflt:
2383 if (all != &ipv4_devconf)
2384 kfree(all);
2385 err_alloc_all:
2386 return err;
2387 }
2388
2389 static __net_exit void devinet_exit_net(struct net *net)
2390 {
2391 #ifdef CONFIG_SYSCTL
2392 struct ctl_table *tbl;
2393
2394 tbl = net->ipv4.forw_hdr->ctl_table_arg;
2395 unregister_net_sysctl_table(net->ipv4.forw_hdr);
2396 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2397 __devinet_sysctl_unregister(net->ipv4.devconf_all);
2398 kfree(tbl);
2399 #endif
2400 kfree(net->ipv4.devconf_dflt);
2401 kfree(net->ipv4.devconf_all);
2402 }
2403
2404 static __net_initdata struct pernet_operations devinet_ops = {
2405 .init = devinet_init_net,
2406 .exit = devinet_exit_net,
2407 };
2408
2409 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2410 .family = AF_INET,
2411 .fill_link_af = inet_fill_link_af,
2412 .get_link_af_size = inet_get_link_af_size,
2413 .validate_link_af = inet_validate_link_af,
2414 .set_link_af = inet_set_link_af,
2415 };
2416
2417 void __init devinet_init(void)
2418 {
2419 int i;
2420
2421 for (i = 0; i < IN4_ADDR_HSIZE; i++)
2422 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2423
2424 register_pernet_subsys(&devinet_ops);
2425
2426 register_gifconf(PF_INET, inet_gifconf);
2427 register_netdevice_notifier(&ip_netdev_notifier);
2428
2429 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2430
2431 rtnl_af_register(&inet_af_ops);
2432
2433 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2434 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2435 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2436 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2437 inet_netconf_dump_devconf, NULL);
2438 }