]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - net/ipv4/devinet.c
Merge tag 'linux-can-next-for-4.6-20160226' of git://git.kernel.org/pub/scm/linux...
[mirror_ubuntu-jammy-kernel.git] / net / ipv4 / devinet.c
1 /*
2 * NET3 IP device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the IP parts of dev.c 1.0.19
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17 *
18 * Changes:
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
20 * lists.
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
25 * if no match found.
26 */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67
68 #include "fib_lookup.h"
69
70 static struct ipv4_devconf ipv4_devconf = {
71 .data = {
72 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
78 },
79 };
80
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82 .data = {
83 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
90 },
91 };
92
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97 [IFA_LOCAL] = { .type = NLA_U32 },
98 [IFA_ADDRESS] = { .type = NLA_U32 },
99 [IFA_BROADCAST] = { .type = NLA_U32 },
100 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
102 [IFA_FLAGS] = { .type = NLA_U32 },
103 };
104
105 #define IN4_ADDR_HSIZE_SHIFT 8
106 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
107
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109
110 static u32 inet_addr_hash(const struct net *net, __be32 addr)
111 {
112 u32 val = (__force u32) addr ^ net_hash_mix(net);
113
114 return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
115 }
116
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118 {
119 u32 hash = inet_addr_hash(net, ifa->ifa_local);
120
121 ASSERT_RTNL();
122 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123 }
124
125 static void inet_hash_remove(struct in_ifaddr *ifa)
126 {
127 ASSERT_RTNL();
128 hlist_del_init_rcu(&ifa->hash);
129 }
130
131 /**
132 * __ip_dev_find - find the first device with a given source address.
133 * @net: the net namespace
134 * @addr: the source address
135 * @devref: if true, take a reference on the found device
136 *
137 * If a caller uses devref=false, it should be protected by RCU, or RTNL
138 */
139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
140 {
141 u32 hash = inet_addr_hash(net, addr);
142 struct net_device *result = NULL;
143 struct in_ifaddr *ifa;
144
145 rcu_read_lock();
146 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
147 if (ifa->ifa_local == addr) {
148 struct net_device *dev = ifa->ifa_dev->dev;
149
150 if (!net_eq(dev_net(dev), net))
151 continue;
152 result = dev;
153 break;
154 }
155 }
156 if (!result) {
157 struct flowi4 fl4 = { .daddr = addr };
158 struct fib_result res = { 0 };
159 struct fib_table *local;
160
161 /* Fallback to FIB local table so that communication
162 * over loopback subnets work.
163 */
164 local = fib_get_table(net, RT_TABLE_LOCAL);
165 if (local &&
166 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167 res.type == RTN_LOCAL)
168 result = FIB_RES_DEV(res);
169 }
170 if (result && devref)
171 dev_hold(result);
172 rcu_read_unlock();
173 return result;
174 }
175 EXPORT_SYMBOL(__ip_dev_find);
176
177 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178
179 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181 int destroy);
182 #ifdef CONFIG_SYSCTL
183 static int devinet_sysctl_register(struct in_device *idev);
184 static void devinet_sysctl_unregister(struct in_device *idev);
185 #else
186 static int devinet_sysctl_register(struct in_device *idev)
187 {
188 return 0;
189 }
190 static void devinet_sysctl_unregister(struct in_device *idev)
191 {
192 }
193 #endif
194
195 /* Locks all the inet devices. */
196
197 static struct in_ifaddr *inet_alloc_ifa(void)
198 {
199 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
200 }
201
202 static void inet_rcu_free_ifa(struct rcu_head *head)
203 {
204 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
205 if (ifa->ifa_dev)
206 in_dev_put(ifa->ifa_dev);
207 kfree(ifa);
208 }
209
210 static void inet_free_ifa(struct in_ifaddr *ifa)
211 {
212 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
213 }
214
215 void in_dev_finish_destroy(struct in_device *idev)
216 {
217 struct net_device *dev = idev->dev;
218
219 WARN_ON(idev->ifa_list);
220 WARN_ON(idev->mc_list);
221 kfree(rcu_dereference_protected(idev->mc_hash, 1));
222 #ifdef NET_REFCNT_DEBUG
223 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
224 #endif
225 dev_put(dev);
226 if (!idev->dead)
227 pr_err("Freeing alive in_device %p\n", idev);
228 else
229 kfree(idev);
230 }
231 EXPORT_SYMBOL(in_dev_finish_destroy);
232
233 static struct in_device *inetdev_init(struct net_device *dev)
234 {
235 struct in_device *in_dev;
236 int err = -ENOMEM;
237
238 ASSERT_RTNL();
239
240 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
241 if (!in_dev)
242 goto out;
243 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
244 sizeof(in_dev->cnf));
245 in_dev->cnf.sysctl = NULL;
246 in_dev->dev = dev;
247 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
248 if (!in_dev->arp_parms)
249 goto out_kfree;
250 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
251 dev_disable_lro(dev);
252 /* Reference in_dev->dev */
253 dev_hold(dev);
254 /* Account for reference dev->ip_ptr (below) */
255 in_dev_hold(in_dev);
256
257 err = devinet_sysctl_register(in_dev);
258 if (err) {
259 in_dev->dead = 1;
260 in_dev_put(in_dev);
261 in_dev = NULL;
262 goto out;
263 }
264 ip_mc_init_dev(in_dev);
265 if (dev->flags & IFF_UP)
266 ip_mc_up(in_dev);
267
268 /* we can receive as soon as ip_ptr is set -- do this last */
269 rcu_assign_pointer(dev->ip_ptr, in_dev);
270 out:
271 return in_dev ?: ERR_PTR(err);
272 out_kfree:
273 kfree(in_dev);
274 in_dev = NULL;
275 goto out;
276 }
277
278 static void in_dev_rcu_put(struct rcu_head *head)
279 {
280 struct in_device *idev = container_of(head, struct in_device, rcu_head);
281 in_dev_put(idev);
282 }
283
284 static void inetdev_destroy(struct in_device *in_dev)
285 {
286 struct in_ifaddr *ifa;
287 struct net_device *dev;
288
289 ASSERT_RTNL();
290
291 dev = in_dev->dev;
292
293 in_dev->dead = 1;
294
295 ip_mc_destroy_dev(in_dev);
296
297 while ((ifa = in_dev->ifa_list) != NULL) {
298 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
299 inet_free_ifa(ifa);
300 }
301
302 RCU_INIT_POINTER(dev->ip_ptr, NULL);
303
304 devinet_sysctl_unregister(in_dev);
305 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
306 arp_ifdown(dev);
307
308 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
309 }
310
311 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
312 {
313 rcu_read_lock();
314 for_primary_ifa(in_dev) {
315 if (inet_ifa_match(a, ifa)) {
316 if (!b || inet_ifa_match(b, ifa)) {
317 rcu_read_unlock();
318 return 1;
319 }
320 }
321 } endfor_ifa(in_dev);
322 rcu_read_unlock();
323 return 0;
324 }
325
326 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
327 int destroy, struct nlmsghdr *nlh, u32 portid)
328 {
329 struct in_ifaddr *promote = NULL;
330 struct in_ifaddr *ifa, *ifa1 = *ifap;
331 struct in_ifaddr *last_prim = in_dev->ifa_list;
332 struct in_ifaddr *prev_prom = NULL;
333 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
334
335 ASSERT_RTNL();
336
337 /* 1. Deleting primary ifaddr forces deletion all secondaries
338 * unless alias promotion is set
339 **/
340
341 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
342 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
343
344 while ((ifa = *ifap1) != NULL) {
345 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
346 ifa1->ifa_scope <= ifa->ifa_scope)
347 last_prim = ifa;
348
349 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
350 ifa1->ifa_mask != ifa->ifa_mask ||
351 !inet_ifa_match(ifa1->ifa_address, ifa)) {
352 ifap1 = &ifa->ifa_next;
353 prev_prom = ifa;
354 continue;
355 }
356
357 if (!do_promote) {
358 inet_hash_remove(ifa);
359 *ifap1 = ifa->ifa_next;
360
361 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
362 blocking_notifier_call_chain(&inetaddr_chain,
363 NETDEV_DOWN, ifa);
364 inet_free_ifa(ifa);
365 } else {
366 promote = ifa;
367 break;
368 }
369 }
370 }
371
372 /* On promotion all secondaries from subnet are changing
373 * the primary IP, we must remove all their routes silently
374 * and later to add them back with new prefsrc. Do this
375 * while all addresses are on the device list.
376 */
377 for (ifa = promote; ifa; ifa = ifa->ifa_next) {
378 if (ifa1->ifa_mask == ifa->ifa_mask &&
379 inet_ifa_match(ifa1->ifa_address, ifa))
380 fib_del_ifaddr(ifa, ifa1);
381 }
382
383 /* 2. Unlink it */
384
385 *ifap = ifa1->ifa_next;
386 inet_hash_remove(ifa1);
387
388 /* 3. Announce address deletion */
389
390 /* Send message first, then call notifier.
391 At first sight, FIB update triggered by notifier
392 will refer to already deleted ifaddr, that could confuse
393 netlink listeners. It is not true: look, gated sees
394 that route deleted and if it still thinks that ifaddr
395 is valid, it will try to restore deleted routes... Grr.
396 So that, this order is correct.
397 */
398 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
399 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
400
401 if (promote) {
402 struct in_ifaddr *next_sec = promote->ifa_next;
403
404 if (prev_prom) {
405 prev_prom->ifa_next = promote->ifa_next;
406 promote->ifa_next = last_prim->ifa_next;
407 last_prim->ifa_next = promote;
408 }
409
410 promote->ifa_flags &= ~IFA_F_SECONDARY;
411 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
412 blocking_notifier_call_chain(&inetaddr_chain,
413 NETDEV_UP, promote);
414 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
415 if (ifa1->ifa_mask != ifa->ifa_mask ||
416 !inet_ifa_match(ifa1->ifa_address, ifa))
417 continue;
418 fib_add_ifaddr(ifa);
419 }
420
421 }
422 if (destroy)
423 inet_free_ifa(ifa1);
424 }
425
426 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
427 int destroy)
428 {
429 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
430 }
431
432 static void check_lifetime(struct work_struct *work);
433
434 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
435
436 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
437 u32 portid)
438 {
439 struct in_device *in_dev = ifa->ifa_dev;
440 struct in_ifaddr *ifa1, **ifap, **last_primary;
441
442 ASSERT_RTNL();
443
444 if (!ifa->ifa_local) {
445 inet_free_ifa(ifa);
446 return 0;
447 }
448
449 ifa->ifa_flags &= ~IFA_F_SECONDARY;
450 last_primary = &in_dev->ifa_list;
451
452 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
453 ifap = &ifa1->ifa_next) {
454 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
455 ifa->ifa_scope <= ifa1->ifa_scope)
456 last_primary = &ifa1->ifa_next;
457 if (ifa1->ifa_mask == ifa->ifa_mask &&
458 inet_ifa_match(ifa1->ifa_address, ifa)) {
459 if (ifa1->ifa_local == ifa->ifa_local) {
460 inet_free_ifa(ifa);
461 return -EEXIST;
462 }
463 if (ifa1->ifa_scope != ifa->ifa_scope) {
464 inet_free_ifa(ifa);
465 return -EINVAL;
466 }
467 ifa->ifa_flags |= IFA_F_SECONDARY;
468 }
469 }
470
471 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
472 prandom_seed((__force u32) ifa->ifa_local);
473 ifap = last_primary;
474 }
475
476 ifa->ifa_next = *ifap;
477 *ifap = ifa;
478
479 inet_hash_insert(dev_net(in_dev->dev), ifa);
480
481 cancel_delayed_work(&check_lifetime_work);
482 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
483
484 /* Send message first, then call notifier.
485 Notifier will trigger FIB update, so that
486 listeners of netlink will know about new ifaddr */
487 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
488 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
489
490 return 0;
491 }
492
493 static int inet_insert_ifa(struct in_ifaddr *ifa)
494 {
495 return __inet_insert_ifa(ifa, NULL, 0);
496 }
497
498 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
499 {
500 struct in_device *in_dev = __in_dev_get_rtnl(dev);
501
502 ASSERT_RTNL();
503
504 if (!in_dev) {
505 inet_free_ifa(ifa);
506 return -ENOBUFS;
507 }
508 ipv4_devconf_setall(in_dev);
509 neigh_parms_data_state_setall(in_dev->arp_parms);
510 if (ifa->ifa_dev != in_dev) {
511 WARN_ON(ifa->ifa_dev);
512 in_dev_hold(in_dev);
513 ifa->ifa_dev = in_dev;
514 }
515 if (ipv4_is_loopback(ifa->ifa_local))
516 ifa->ifa_scope = RT_SCOPE_HOST;
517 return inet_insert_ifa(ifa);
518 }
519
520 /* Caller must hold RCU or RTNL :
521 * We dont take a reference on found in_device
522 */
523 struct in_device *inetdev_by_index(struct net *net, int ifindex)
524 {
525 struct net_device *dev;
526 struct in_device *in_dev = NULL;
527
528 rcu_read_lock();
529 dev = dev_get_by_index_rcu(net, ifindex);
530 if (dev)
531 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
532 rcu_read_unlock();
533 return in_dev;
534 }
535 EXPORT_SYMBOL(inetdev_by_index);
536
537 /* Called only from RTNL semaphored context. No locks. */
538
539 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
540 __be32 mask)
541 {
542 ASSERT_RTNL();
543
544 for_primary_ifa(in_dev) {
545 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
546 return ifa;
547 } endfor_ifa(in_dev);
548 return NULL;
549 }
550
551 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
552 {
553 struct ip_mreqn mreq = {
554 .imr_multiaddr.s_addr = ifa->ifa_address,
555 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
556 };
557 int ret;
558
559 ASSERT_RTNL();
560
561 lock_sock(sk);
562 if (join)
563 ret = ip_mc_join_group(sk, &mreq);
564 else
565 ret = ip_mc_leave_group(sk, &mreq);
566 release_sock(sk);
567
568 return ret;
569 }
570
571 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
572 {
573 struct net *net = sock_net(skb->sk);
574 struct nlattr *tb[IFA_MAX+1];
575 struct in_device *in_dev;
576 struct ifaddrmsg *ifm;
577 struct in_ifaddr *ifa, **ifap;
578 int err = -EINVAL;
579
580 ASSERT_RTNL();
581
582 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
583 if (err < 0)
584 goto errout;
585
586 ifm = nlmsg_data(nlh);
587 in_dev = inetdev_by_index(net, ifm->ifa_index);
588 if (!in_dev) {
589 err = -ENODEV;
590 goto errout;
591 }
592
593 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
594 ifap = &ifa->ifa_next) {
595 if (tb[IFA_LOCAL] &&
596 ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
597 continue;
598
599 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
600 continue;
601
602 if (tb[IFA_ADDRESS] &&
603 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
604 !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
605 continue;
606
607 if (ipv4_is_multicast(ifa->ifa_address))
608 ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
609 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
610 return 0;
611 }
612
613 err = -EADDRNOTAVAIL;
614 errout:
615 return err;
616 }
617
618 #define INFINITY_LIFE_TIME 0xFFFFFFFF
619
620 static void check_lifetime(struct work_struct *work)
621 {
622 unsigned long now, next, next_sec, next_sched;
623 struct in_ifaddr *ifa;
624 struct hlist_node *n;
625 int i;
626
627 now = jiffies;
628 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
629
630 for (i = 0; i < IN4_ADDR_HSIZE; i++) {
631 bool change_needed = false;
632
633 rcu_read_lock();
634 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
635 unsigned long age;
636
637 if (ifa->ifa_flags & IFA_F_PERMANENT)
638 continue;
639
640 /* We try to batch several events at once. */
641 age = (now - ifa->ifa_tstamp +
642 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
643
644 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
645 age >= ifa->ifa_valid_lft) {
646 change_needed = true;
647 } else if (ifa->ifa_preferred_lft ==
648 INFINITY_LIFE_TIME) {
649 continue;
650 } else if (age >= ifa->ifa_preferred_lft) {
651 if (time_before(ifa->ifa_tstamp +
652 ifa->ifa_valid_lft * HZ, next))
653 next = ifa->ifa_tstamp +
654 ifa->ifa_valid_lft * HZ;
655
656 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
657 change_needed = true;
658 } else if (time_before(ifa->ifa_tstamp +
659 ifa->ifa_preferred_lft * HZ,
660 next)) {
661 next = ifa->ifa_tstamp +
662 ifa->ifa_preferred_lft * HZ;
663 }
664 }
665 rcu_read_unlock();
666 if (!change_needed)
667 continue;
668 rtnl_lock();
669 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
670 unsigned long age;
671
672 if (ifa->ifa_flags & IFA_F_PERMANENT)
673 continue;
674
675 /* We try to batch several events at once. */
676 age = (now - ifa->ifa_tstamp +
677 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
678
679 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
680 age >= ifa->ifa_valid_lft) {
681 struct in_ifaddr **ifap;
682
683 for (ifap = &ifa->ifa_dev->ifa_list;
684 *ifap != NULL; ifap = &(*ifap)->ifa_next) {
685 if (*ifap == ifa) {
686 inet_del_ifa(ifa->ifa_dev,
687 ifap, 1);
688 break;
689 }
690 }
691 } else if (ifa->ifa_preferred_lft !=
692 INFINITY_LIFE_TIME &&
693 age >= ifa->ifa_preferred_lft &&
694 !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
695 ifa->ifa_flags |= IFA_F_DEPRECATED;
696 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
697 }
698 }
699 rtnl_unlock();
700 }
701
702 next_sec = round_jiffies_up(next);
703 next_sched = next;
704
705 /* If rounded timeout is accurate enough, accept it. */
706 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
707 next_sched = next_sec;
708
709 now = jiffies;
710 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
711 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
712 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
713
714 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
715 next_sched - now);
716 }
717
718 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
719 __u32 prefered_lft)
720 {
721 unsigned long timeout;
722
723 ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
724
725 timeout = addrconf_timeout_fixup(valid_lft, HZ);
726 if (addrconf_finite_timeout(timeout))
727 ifa->ifa_valid_lft = timeout;
728 else
729 ifa->ifa_flags |= IFA_F_PERMANENT;
730
731 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
732 if (addrconf_finite_timeout(timeout)) {
733 if (timeout == 0)
734 ifa->ifa_flags |= IFA_F_DEPRECATED;
735 ifa->ifa_preferred_lft = timeout;
736 }
737 ifa->ifa_tstamp = jiffies;
738 if (!ifa->ifa_cstamp)
739 ifa->ifa_cstamp = ifa->ifa_tstamp;
740 }
741
742 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
743 __u32 *pvalid_lft, __u32 *pprefered_lft)
744 {
745 struct nlattr *tb[IFA_MAX+1];
746 struct in_ifaddr *ifa;
747 struct ifaddrmsg *ifm;
748 struct net_device *dev;
749 struct in_device *in_dev;
750 int err;
751
752 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
753 if (err < 0)
754 goto errout;
755
756 ifm = nlmsg_data(nlh);
757 err = -EINVAL;
758 if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
759 goto errout;
760
761 dev = __dev_get_by_index(net, ifm->ifa_index);
762 err = -ENODEV;
763 if (!dev)
764 goto errout;
765
766 in_dev = __in_dev_get_rtnl(dev);
767 err = -ENOBUFS;
768 if (!in_dev)
769 goto errout;
770
771 ifa = inet_alloc_ifa();
772 if (!ifa)
773 /*
774 * A potential indev allocation can be left alive, it stays
775 * assigned to its device and is destroy with it.
776 */
777 goto errout;
778
779 ipv4_devconf_setall(in_dev);
780 neigh_parms_data_state_setall(in_dev->arp_parms);
781 in_dev_hold(in_dev);
782
783 if (!tb[IFA_ADDRESS])
784 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
785
786 INIT_HLIST_NODE(&ifa->hash);
787 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
788 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
789 ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
790 ifm->ifa_flags;
791 ifa->ifa_scope = ifm->ifa_scope;
792 ifa->ifa_dev = in_dev;
793
794 ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
795 ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
796
797 if (tb[IFA_BROADCAST])
798 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
799
800 if (tb[IFA_LABEL])
801 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
802 else
803 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
804
805 if (tb[IFA_CACHEINFO]) {
806 struct ifa_cacheinfo *ci;
807
808 ci = nla_data(tb[IFA_CACHEINFO]);
809 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
810 err = -EINVAL;
811 goto errout_free;
812 }
813 *pvalid_lft = ci->ifa_valid;
814 *pprefered_lft = ci->ifa_prefered;
815 }
816
817 return ifa;
818
819 errout_free:
820 inet_free_ifa(ifa);
821 errout:
822 return ERR_PTR(err);
823 }
824
825 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
826 {
827 struct in_device *in_dev = ifa->ifa_dev;
828 struct in_ifaddr *ifa1, **ifap;
829
830 if (!ifa->ifa_local)
831 return NULL;
832
833 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
834 ifap = &ifa1->ifa_next) {
835 if (ifa1->ifa_mask == ifa->ifa_mask &&
836 inet_ifa_match(ifa1->ifa_address, ifa) &&
837 ifa1->ifa_local == ifa->ifa_local)
838 return ifa1;
839 }
840 return NULL;
841 }
842
843 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
844 {
845 struct net *net = sock_net(skb->sk);
846 struct in_ifaddr *ifa;
847 struct in_ifaddr *ifa_existing;
848 __u32 valid_lft = INFINITY_LIFE_TIME;
849 __u32 prefered_lft = INFINITY_LIFE_TIME;
850
851 ASSERT_RTNL();
852
853 ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
854 if (IS_ERR(ifa))
855 return PTR_ERR(ifa);
856
857 ifa_existing = find_matching_ifa(ifa);
858 if (!ifa_existing) {
859 /* It would be best to check for !NLM_F_CREATE here but
860 * userspace already relies on not having to provide this.
861 */
862 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
863 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
864 int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
865 true, ifa);
866
867 if (ret < 0) {
868 inet_free_ifa(ifa);
869 return ret;
870 }
871 }
872 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
873 } else {
874 inet_free_ifa(ifa);
875
876 if (nlh->nlmsg_flags & NLM_F_EXCL ||
877 !(nlh->nlmsg_flags & NLM_F_REPLACE))
878 return -EEXIST;
879 ifa = ifa_existing;
880 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
881 cancel_delayed_work(&check_lifetime_work);
882 queue_delayed_work(system_power_efficient_wq,
883 &check_lifetime_work, 0);
884 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
885 }
886 return 0;
887 }
888
889 /*
890 * Determine a default network mask, based on the IP address.
891 */
892
893 static int inet_abc_len(__be32 addr)
894 {
895 int rc = -1; /* Something else, probably a multicast. */
896
897 if (ipv4_is_zeronet(addr))
898 rc = 0;
899 else {
900 __u32 haddr = ntohl(addr);
901
902 if (IN_CLASSA(haddr))
903 rc = 8;
904 else if (IN_CLASSB(haddr))
905 rc = 16;
906 else if (IN_CLASSC(haddr))
907 rc = 24;
908 }
909
910 return rc;
911 }
912
913
914 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
915 {
916 struct ifreq ifr;
917 struct sockaddr_in sin_orig;
918 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
919 struct in_device *in_dev;
920 struct in_ifaddr **ifap = NULL;
921 struct in_ifaddr *ifa = NULL;
922 struct net_device *dev;
923 char *colon;
924 int ret = -EFAULT;
925 int tryaddrmatch = 0;
926
927 /*
928 * Fetch the caller's info block into kernel space
929 */
930
931 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
932 goto out;
933 ifr.ifr_name[IFNAMSIZ - 1] = 0;
934
935 /* save original address for comparison */
936 memcpy(&sin_orig, sin, sizeof(*sin));
937
938 colon = strchr(ifr.ifr_name, ':');
939 if (colon)
940 *colon = 0;
941
942 dev_load(net, ifr.ifr_name);
943
944 switch (cmd) {
945 case SIOCGIFADDR: /* Get interface address */
946 case SIOCGIFBRDADDR: /* Get the broadcast address */
947 case SIOCGIFDSTADDR: /* Get the destination address */
948 case SIOCGIFNETMASK: /* Get the netmask for the interface */
949 /* Note that these ioctls will not sleep,
950 so that we do not impose a lock.
951 One day we will be forced to put shlock here (I mean SMP)
952 */
953 tryaddrmatch = (sin_orig.sin_family == AF_INET);
954 memset(sin, 0, sizeof(*sin));
955 sin->sin_family = AF_INET;
956 break;
957
958 case SIOCSIFFLAGS:
959 ret = -EPERM;
960 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
961 goto out;
962 break;
963 case SIOCSIFADDR: /* Set interface address (and family) */
964 case SIOCSIFBRDADDR: /* Set the broadcast address */
965 case SIOCSIFDSTADDR: /* Set the destination address */
966 case SIOCSIFNETMASK: /* Set the netmask for the interface */
967 ret = -EPERM;
968 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
969 goto out;
970 ret = -EINVAL;
971 if (sin->sin_family != AF_INET)
972 goto out;
973 break;
974 default:
975 ret = -EINVAL;
976 goto out;
977 }
978
979 rtnl_lock();
980
981 ret = -ENODEV;
982 dev = __dev_get_by_name(net, ifr.ifr_name);
983 if (!dev)
984 goto done;
985
986 if (colon)
987 *colon = ':';
988
989 in_dev = __in_dev_get_rtnl(dev);
990 if (in_dev) {
991 if (tryaddrmatch) {
992 /* Matthias Andree */
993 /* compare label and address (4.4BSD style) */
994 /* note: we only do this for a limited set of ioctls
995 and only if the original address family was AF_INET.
996 This is checked above. */
997 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
998 ifap = &ifa->ifa_next) {
999 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1000 sin_orig.sin_addr.s_addr ==
1001 ifa->ifa_local) {
1002 break; /* found */
1003 }
1004 }
1005 }
1006 /* we didn't get a match, maybe the application is
1007 4.3BSD-style and passed in junk so we fall back to
1008 comparing just the label */
1009 if (!ifa) {
1010 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1011 ifap = &ifa->ifa_next)
1012 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1013 break;
1014 }
1015 }
1016
1017 ret = -EADDRNOTAVAIL;
1018 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1019 goto done;
1020
1021 switch (cmd) {
1022 case SIOCGIFADDR: /* Get interface address */
1023 sin->sin_addr.s_addr = ifa->ifa_local;
1024 goto rarok;
1025
1026 case SIOCGIFBRDADDR: /* Get the broadcast address */
1027 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1028 goto rarok;
1029
1030 case SIOCGIFDSTADDR: /* Get the destination address */
1031 sin->sin_addr.s_addr = ifa->ifa_address;
1032 goto rarok;
1033
1034 case SIOCGIFNETMASK: /* Get the netmask for the interface */
1035 sin->sin_addr.s_addr = ifa->ifa_mask;
1036 goto rarok;
1037
1038 case SIOCSIFFLAGS:
1039 if (colon) {
1040 ret = -EADDRNOTAVAIL;
1041 if (!ifa)
1042 break;
1043 ret = 0;
1044 if (!(ifr.ifr_flags & IFF_UP))
1045 inet_del_ifa(in_dev, ifap, 1);
1046 break;
1047 }
1048 ret = dev_change_flags(dev, ifr.ifr_flags);
1049 break;
1050
1051 case SIOCSIFADDR: /* Set interface address (and family) */
1052 ret = -EINVAL;
1053 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1054 break;
1055
1056 if (!ifa) {
1057 ret = -ENOBUFS;
1058 ifa = inet_alloc_ifa();
1059 if (!ifa)
1060 break;
1061 INIT_HLIST_NODE(&ifa->hash);
1062 if (colon)
1063 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1064 else
1065 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1066 } else {
1067 ret = 0;
1068 if (ifa->ifa_local == sin->sin_addr.s_addr)
1069 break;
1070 inet_del_ifa(in_dev, ifap, 0);
1071 ifa->ifa_broadcast = 0;
1072 ifa->ifa_scope = 0;
1073 }
1074
1075 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1076
1077 if (!(dev->flags & IFF_POINTOPOINT)) {
1078 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1079 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1080 if ((dev->flags & IFF_BROADCAST) &&
1081 ifa->ifa_prefixlen < 31)
1082 ifa->ifa_broadcast = ifa->ifa_address |
1083 ~ifa->ifa_mask;
1084 } else {
1085 ifa->ifa_prefixlen = 32;
1086 ifa->ifa_mask = inet_make_mask(32);
1087 }
1088 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1089 ret = inet_set_ifa(dev, ifa);
1090 break;
1091
1092 case SIOCSIFBRDADDR: /* Set the broadcast address */
1093 ret = 0;
1094 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1095 inet_del_ifa(in_dev, ifap, 0);
1096 ifa->ifa_broadcast = sin->sin_addr.s_addr;
1097 inet_insert_ifa(ifa);
1098 }
1099 break;
1100
1101 case SIOCSIFDSTADDR: /* Set the destination address */
1102 ret = 0;
1103 if (ifa->ifa_address == sin->sin_addr.s_addr)
1104 break;
1105 ret = -EINVAL;
1106 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1107 break;
1108 ret = 0;
1109 inet_del_ifa(in_dev, ifap, 0);
1110 ifa->ifa_address = sin->sin_addr.s_addr;
1111 inet_insert_ifa(ifa);
1112 break;
1113
1114 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1115
1116 /*
1117 * The mask we set must be legal.
1118 */
1119 ret = -EINVAL;
1120 if (bad_mask(sin->sin_addr.s_addr, 0))
1121 break;
1122 ret = 0;
1123 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1124 __be32 old_mask = ifa->ifa_mask;
1125 inet_del_ifa(in_dev, ifap, 0);
1126 ifa->ifa_mask = sin->sin_addr.s_addr;
1127 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1128
1129 /* See if current broadcast address matches
1130 * with current netmask, then recalculate
1131 * the broadcast address. Otherwise it's a
1132 * funny address, so don't touch it since
1133 * the user seems to know what (s)he's doing...
1134 */
1135 if ((dev->flags & IFF_BROADCAST) &&
1136 (ifa->ifa_prefixlen < 31) &&
1137 (ifa->ifa_broadcast ==
1138 (ifa->ifa_local|~old_mask))) {
1139 ifa->ifa_broadcast = (ifa->ifa_local |
1140 ~sin->sin_addr.s_addr);
1141 }
1142 inet_insert_ifa(ifa);
1143 }
1144 break;
1145 }
1146 done:
1147 rtnl_unlock();
1148 out:
1149 return ret;
1150 rarok:
1151 rtnl_unlock();
1152 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1153 goto out;
1154 }
1155
1156 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1157 {
1158 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1159 struct in_ifaddr *ifa;
1160 struct ifreq ifr;
1161 int done = 0;
1162
1163 if (!in_dev)
1164 goto out;
1165
1166 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1167 if (!buf) {
1168 done += sizeof(ifr);
1169 continue;
1170 }
1171 if (len < (int) sizeof(ifr))
1172 break;
1173 memset(&ifr, 0, sizeof(struct ifreq));
1174 strcpy(ifr.ifr_name, ifa->ifa_label);
1175
1176 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1177 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1178 ifa->ifa_local;
1179
1180 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1181 done = -EFAULT;
1182 break;
1183 }
1184 buf += sizeof(struct ifreq);
1185 len -= sizeof(struct ifreq);
1186 done += sizeof(struct ifreq);
1187 }
1188 out:
1189 return done;
1190 }
1191
1192 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1193 {
1194 __be32 addr = 0;
1195 struct in_device *in_dev;
1196 struct net *net = dev_net(dev);
1197 int master_idx;
1198
1199 rcu_read_lock();
1200 in_dev = __in_dev_get_rcu(dev);
1201 if (!in_dev)
1202 goto no_in_dev;
1203
1204 for_primary_ifa(in_dev) {
1205 if (ifa->ifa_scope > scope)
1206 continue;
1207 if (!dst || inet_ifa_match(dst, ifa)) {
1208 addr = ifa->ifa_local;
1209 break;
1210 }
1211 if (!addr)
1212 addr = ifa->ifa_local;
1213 } endfor_ifa(in_dev);
1214
1215 if (addr)
1216 goto out_unlock;
1217 no_in_dev:
1218 master_idx = l3mdev_master_ifindex_rcu(dev);
1219
1220 /* For VRFs, the VRF device takes the place of the loopback device,
1221 * with addresses on it being preferred. Note in such cases the
1222 * loopback device will be among the devices that fail the master_idx
1223 * equality check in the loop below.
1224 */
1225 if (master_idx &&
1226 (dev = dev_get_by_index_rcu(net, master_idx)) &&
1227 (in_dev = __in_dev_get_rcu(dev))) {
1228 for_primary_ifa(in_dev) {
1229 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1230 ifa->ifa_scope <= scope) {
1231 addr = ifa->ifa_local;
1232 goto out_unlock;
1233 }
1234 } endfor_ifa(in_dev);
1235 }
1236
1237 /* Not loopback addresses on loopback should be preferred
1238 in this case. It is important that lo is the first interface
1239 in dev_base list.
1240 */
1241 for_each_netdev_rcu(net, dev) {
1242 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1243 continue;
1244
1245 in_dev = __in_dev_get_rcu(dev);
1246 if (!in_dev)
1247 continue;
1248
1249 for_primary_ifa(in_dev) {
1250 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1251 ifa->ifa_scope <= scope) {
1252 addr = ifa->ifa_local;
1253 goto out_unlock;
1254 }
1255 } endfor_ifa(in_dev);
1256 }
1257 out_unlock:
1258 rcu_read_unlock();
1259 return addr;
1260 }
1261 EXPORT_SYMBOL(inet_select_addr);
1262
1263 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1264 __be32 local, int scope)
1265 {
1266 int same = 0;
1267 __be32 addr = 0;
1268
1269 for_ifa(in_dev) {
1270 if (!addr &&
1271 (local == ifa->ifa_local || !local) &&
1272 ifa->ifa_scope <= scope) {
1273 addr = ifa->ifa_local;
1274 if (same)
1275 break;
1276 }
1277 if (!same) {
1278 same = (!local || inet_ifa_match(local, ifa)) &&
1279 (!dst || inet_ifa_match(dst, ifa));
1280 if (same && addr) {
1281 if (local || !dst)
1282 break;
1283 /* Is the selected addr into dst subnet? */
1284 if (inet_ifa_match(addr, ifa))
1285 break;
1286 /* No, then can we use new local src? */
1287 if (ifa->ifa_scope <= scope) {
1288 addr = ifa->ifa_local;
1289 break;
1290 }
1291 /* search for large dst subnet for addr */
1292 same = 0;
1293 }
1294 }
1295 } endfor_ifa(in_dev);
1296
1297 return same ? addr : 0;
1298 }
1299
1300 /*
1301 * Confirm that local IP address exists using wildcards:
1302 * - net: netns to check, cannot be NULL
1303 * - in_dev: only on this interface, NULL=any interface
1304 * - dst: only in the same subnet as dst, 0=any dst
1305 * - local: address, 0=autoselect the local address
1306 * - scope: maximum allowed scope value for the local address
1307 */
1308 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1309 __be32 dst, __be32 local, int scope)
1310 {
1311 __be32 addr = 0;
1312 struct net_device *dev;
1313
1314 if (in_dev)
1315 return confirm_addr_indev(in_dev, dst, local, scope);
1316
1317 rcu_read_lock();
1318 for_each_netdev_rcu(net, dev) {
1319 in_dev = __in_dev_get_rcu(dev);
1320 if (in_dev) {
1321 addr = confirm_addr_indev(in_dev, dst, local, scope);
1322 if (addr)
1323 break;
1324 }
1325 }
1326 rcu_read_unlock();
1327
1328 return addr;
1329 }
1330 EXPORT_SYMBOL(inet_confirm_addr);
1331
1332 /*
1333 * Device notifier
1334 */
1335
1336 int register_inetaddr_notifier(struct notifier_block *nb)
1337 {
1338 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1339 }
1340 EXPORT_SYMBOL(register_inetaddr_notifier);
1341
1342 int unregister_inetaddr_notifier(struct notifier_block *nb)
1343 {
1344 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1345 }
1346 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1347
1348 /* Rename ifa_labels for a device name change. Make some effort to preserve
1349 * existing alias numbering and to create unique labels if possible.
1350 */
1351 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1352 {
1353 struct in_ifaddr *ifa;
1354 int named = 0;
1355
1356 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1357 char old[IFNAMSIZ], *dot;
1358
1359 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1360 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1361 if (named++ == 0)
1362 goto skip;
1363 dot = strchr(old, ':');
1364 if (!dot) {
1365 sprintf(old, ":%d", named);
1366 dot = old;
1367 }
1368 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1369 strcat(ifa->ifa_label, dot);
1370 else
1371 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1372 skip:
1373 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1374 }
1375 }
1376
1377 static bool inetdev_valid_mtu(unsigned int mtu)
1378 {
1379 return mtu >= 68;
1380 }
1381
1382 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1383 struct in_device *in_dev)
1384
1385 {
1386 struct in_ifaddr *ifa;
1387
1388 for (ifa = in_dev->ifa_list; ifa;
1389 ifa = ifa->ifa_next) {
1390 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1391 ifa->ifa_local, dev,
1392 ifa->ifa_local, NULL,
1393 dev->dev_addr, NULL);
1394 }
1395 }
1396
1397 /* Called only under RTNL semaphore */
1398
1399 static int inetdev_event(struct notifier_block *this, unsigned long event,
1400 void *ptr)
1401 {
1402 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1403 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1404
1405 ASSERT_RTNL();
1406
1407 if (!in_dev) {
1408 if (event == NETDEV_REGISTER) {
1409 in_dev = inetdev_init(dev);
1410 if (IS_ERR(in_dev))
1411 return notifier_from_errno(PTR_ERR(in_dev));
1412 if (dev->flags & IFF_LOOPBACK) {
1413 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1414 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1415 }
1416 } else if (event == NETDEV_CHANGEMTU) {
1417 /* Re-enabling IP */
1418 if (inetdev_valid_mtu(dev->mtu))
1419 in_dev = inetdev_init(dev);
1420 }
1421 goto out;
1422 }
1423
1424 switch (event) {
1425 case NETDEV_REGISTER:
1426 pr_debug("%s: bug\n", __func__);
1427 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1428 break;
1429 case NETDEV_UP:
1430 if (!inetdev_valid_mtu(dev->mtu))
1431 break;
1432 if (dev->flags & IFF_LOOPBACK) {
1433 struct in_ifaddr *ifa = inet_alloc_ifa();
1434
1435 if (ifa) {
1436 INIT_HLIST_NODE(&ifa->hash);
1437 ifa->ifa_local =
1438 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1439 ifa->ifa_prefixlen = 8;
1440 ifa->ifa_mask = inet_make_mask(8);
1441 in_dev_hold(in_dev);
1442 ifa->ifa_dev = in_dev;
1443 ifa->ifa_scope = RT_SCOPE_HOST;
1444 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1445 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1446 INFINITY_LIFE_TIME);
1447 ipv4_devconf_setall(in_dev);
1448 neigh_parms_data_state_setall(in_dev->arp_parms);
1449 inet_insert_ifa(ifa);
1450 }
1451 }
1452 ip_mc_up(in_dev);
1453 /* fall through */
1454 case NETDEV_CHANGEADDR:
1455 if (!IN_DEV_ARP_NOTIFY(in_dev))
1456 break;
1457 /* fall through */
1458 case NETDEV_NOTIFY_PEERS:
1459 /* Send gratuitous ARP to notify of link change */
1460 inetdev_send_gratuitous_arp(dev, in_dev);
1461 break;
1462 case NETDEV_DOWN:
1463 ip_mc_down(in_dev);
1464 break;
1465 case NETDEV_PRE_TYPE_CHANGE:
1466 ip_mc_unmap(in_dev);
1467 break;
1468 case NETDEV_POST_TYPE_CHANGE:
1469 ip_mc_remap(in_dev);
1470 break;
1471 case NETDEV_CHANGEMTU:
1472 if (inetdev_valid_mtu(dev->mtu))
1473 break;
1474 /* disable IP when MTU is not enough */
1475 case NETDEV_UNREGISTER:
1476 inetdev_destroy(in_dev);
1477 break;
1478 case NETDEV_CHANGENAME:
1479 /* Do not notify about label change, this event is
1480 * not interesting to applications using netlink.
1481 */
1482 inetdev_changename(dev, in_dev);
1483
1484 devinet_sysctl_unregister(in_dev);
1485 devinet_sysctl_register(in_dev);
1486 break;
1487 }
1488 out:
1489 return NOTIFY_DONE;
1490 }
1491
1492 static struct notifier_block ip_netdev_notifier = {
1493 .notifier_call = inetdev_event,
1494 };
1495
1496 static size_t inet_nlmsg_size(void)
1497 {
1498 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1499 + nla_total_size(4) /* IFA_ADDRESS */
1500 + nla_total_size(4) /* IFA_LOCAL */
1501 + nla_total_size(4) /* IFA_BROADCAST */
1502 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1503 + nla_total_size(4) /* IFA_FLAGS */
1504 + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1505 }
1506
1507 static inline u32 cstamp_delta(unsigned long cstamp)
1508 {
1509 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1510 }
1511
1512 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1513 unsigned long tstamp, u32 preferred, u32 valid)
1514 {
1515 struct ifa_cacheinfo ci;
1516
1517 ci.cstamp = cstamp_delta(cstamp);
1518 ci.tstamp = cstamp_delta(tstamp);
1519 ci.ifa_prefered = preferred;
1520 ci.ifa_valid = valid;
1521
1522 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1523 }
1524
1525 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1526 u32 portid, u32 seq, int event, unsigned int flags)
1527 {
1528 struct ifaddrmsg *ifm;
1529 struct nlmsghdr *nlh;
1530 u32 preferred, valid;
1531
1532 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1533 if (!nlh)
1534 return -EMSGSIZE;
1535
1536 ifm = nlmsg_data(nlh);
1537 ifm->ifa_family = AF_INET;
1538 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1539 ifm->ifa_flags = ifa->ifa_flags;
1540 ifm->ifa_scope = ifa->ifa_scope;
1541 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1542
1543 if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1544 preferred = ifa->ifa_preferred_lft;
1545 valid = ifa->ifa_valid_lft;
1546 if (preferred != INFINITY_LIFE_TIME) {
1547 long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1548
1549 if (preferred > tval)
1550 preferred -= tval;
1551 else
1552 preferred = 0;
1553 if (valid != INFINITY_LIFE_TIME) {
1554 if (valid > tval)
1555 valid -= tval;
1556 else
1557 valid = 0;
1558 }
1559 }
1560 } else {
1561 preferred = INFINITY_LIFE_TIME;
1562 valid = INFINITY_LIFE_TIME;
1563 }
1564 if ((ifa->ifa_address &&
1565 nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1566 (ifa->ifa_local &&
1567 nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1568 (ifa->ifa_broadcast &&
1569 nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1570 (ifa->ifa_label[0] &&
1571 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1572 nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1573 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1574 preferred, valid))
1575 goto nla_put_failure;
1576
1577 nlmsg_end(skb, nlh);
1578 return 0;
1579
1580 nla_put_failure:
1581 nlmsg_cancel(skb, nlh);
1582 return -EMSGSIZE;
1583 }
1584
1585 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1586 {
1587 struct net *net = sock_net(skb->sk);
1588 int h, s_h;
1589 int idx, s_idx;
1590 int ip_idx, s_ip_idx;
1591 struct net_device *dev;
1592 struct in_device *in_dev;
1593 struct in_ifaddr *ifa;
1594 struct hlist_head *head;
1595
1596 s_h = cb->args[0];
1597 s_idx = idx = cb->args[1];
1598 s_ip_idx = ip_idx = cb->args[2];
1599
1600 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1601 idx = 0;
1602 head = &net->dev_index_head[h];
1603 rcu_read_lock();
1604 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1605 net->dev_base_seq;
1606 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1607 if (idx < s_idx)
1608 goto cont;
1609 if (h > s_h || idx > s_idx)
1610 s_ip_idx = 0;
1611 in_dev = __in_dev_get_rcu(dev);
1612 if (!in_dev)
1613 goto cont;
1614
1615 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1616 ifa = ifa->ifa_next, ip_idx++) {
1617 if (ip_idx < s_ip_idx)
1618 continue;
1619 if (inet_fill_ifaddr(skb, ifa,
1620 NETLINK_CB(cb->skb).portid,
1621 cb->nlh->nlmsg_seq,
1622 RTM_NEWADDR, NLM_F_MULTI) < 0) {
1623 rcu_read_unlock();
1624 goto done;
1625 }
1626 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1627 }
1628 cont:
1629 idx++;
1630 }
1631 rcu_read_unlock();
1632 }
1633
1634 done:
1635 cb->args[0] = h;
1636 cb->args[1] = idx;
1637 cb->args[2] = ip_idx;
1638
1639 return skb->len;
1640 }
1641
1642 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1643 u32 portid)
1644 {
1645 struct sk_buff *skb;
1646 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1647 int err = -ENOBUFS;
1648 struct net *net;
1649
1650 net = dev_net(ifa->ifa_dev->dev);
1651 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1652 if (!skb)
1653 goto errout;
1654
1655 err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1656 if (err < 0) {
1657 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1658 WARN_ON(err == -EMSGSIZE);
1659 kfree_skb(skb);
1660 goto errout;
1661 }
1662 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1663 return;
1664 errout:
1665 if (err < 0)
1666 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1667 }
1668
1669 static size_t inet_get_link_af_size(const struct net_device *dev,
1670 u32 ext_filter_mask)
1671 {
1672 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1673
1674 if (!in_dev)
1675 return 0;
1676
1677 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1678 }
1679
1680 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1681 u32 ext_filter_mask)
1682 {
1683 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1684 struct nlattr *nla;
1685 int i;
1686
1687 if (!in_dev)
1688 return -ENODATA;
1689
1690 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1691 if (!nla)
1692 return -EMSGSIZE;
1693
1694 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1695 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1696
1697 return 0;
1698 }
1699
1700 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1701 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1702 };
1703
1704 static int inet_validate_link_af(const struct net_device *dev,
1705 const struct nlattr *nla)
1706 {
1707 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1708 int err, rem;
1709
1710 if (dev && !__in_dev_get_rtnl(dev))
1711 return -EAFNOSUPPORT;
1712
1713 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1714 if (err < 0)
1715 return err;
1716
1717 if (tb[IFLA_INET_CONF]) {
1718 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1719 int cfgid = nla_type(a);
1720
1721 if (nla_len(a) < 4)
1722 return -EINVAL;
1723
1724 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1725 return -EINVAL;
1726 }
1727 }
1728
1729 return 0;
1730 }
1731
1732 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1733 {
1734 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1735 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1736 int rem;
1737
1738 if (!in_dev)
1739 return -EAFNOSUPPORT;
1740
1741 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1742 BUG();
1743
1744 if (tb[IFLA_INET_CONF]) {
1745 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1746 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1747 }
1748
1749 return 0;
1750 }
1751
1752 static int inet_netconf_msgsize_devconf(int type)
1753 {
1754 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1755 + nla_total_size(4); /* NETCONFA_IFINDEX */
1756
1757 /* type -1 is used for ALL */
1758 if (type == -1 || type == NETCONFA_FORWARDING)
1759 size += nla_total_size(4);
1760 if (type == -1 || type == NETCONFA_RP_FILTER)
1761 size += nla_total_size(4);
1762 if (type == -1 || type == NETCONFA_MC_FORWARDING)
1763 size += nla_total_size(4);
1764 if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1765 size += nla_total_size(4);
1766 if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1767 size += nla_total_size(4);
1768
1769 return size;
1770 }
1771
1772 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1773 struct ipv4_devconf *devconf, u32 portid,
1774 u32 seq, int event, unsigned int flags,
1775 int type)
1776 {
1777 struct nlmsghdr *nlh;
1778 struct netconfmsg *ncm;
1779
1780 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1781 flags);
1782 if (!nlh)
1783 return -EMSGSIZE;
1784
1785 ncm = nlmsg_data(nlh);
1786 ncm->ncm_family = AF_INET;
1787
1788 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1789 goto nla_put_failure;
1790
1791 /* type -1 is used for ALL */
1792 if ((type == -1 || type == NETCONFA_FORWARDING) &&
1793 nla_put_s32(skb, NETCONFA_FORWARDING,
1794 IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1795 goto nla_put_failure;
1796 if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1797 nla_put_s32(skb, NETCONFA_RP_FILTER,
1798 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1799 goto nla_put_failure;
1800 if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1801 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1802 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1803 goto nla_put_failure;
1804 if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1805 nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1806 IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1807 goto nla_put_failure;
1808 if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1809 nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1810 IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1811 goto nla_put_failure;
1812
1813 nlmsg_end(skb, nlh);
1814 return 0;
1815
1816 nla_put_failure:
1817 nlmsg_cancel(skb, nlh);
1818 return -EMSGSIZE;
1819 }
1820
1821 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1822 struct ipv4_devconf *devconf)
1823 {
1824 struct sk_buff *skb;
1825 int err = -ENOBUFS;
1826
1827 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1828 if (!skb)
1829 goto errout;
1830
1831 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1832 RTM_NEWNETCONF, 0, type);
1833 if (err < 0) {
1834 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1835 WARN_ON(err == -EMSGSIZE);
1836 kfree_skb(skb);
1837 goto errout;
1838 }
1839 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1840 return;
1841 errout:
1842 if (err < 0)
1843 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1844 }
1845
1846 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1847 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1848 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
1849 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
1850 [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) },
1851 [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) },
1852 };
1853
1854 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1855 struct nlmsghdr *nlh)
1856 {
1857 struct net *net = sock_net(in_skb->sk);
1858 struct nlattr *tb[NETCONFA_MAX+1];
1859 struct netconfmsg *ncm;
1860 struct sk_buff *skb;
1861 struct ipv4_devconf *devconf;
1862 struct in_device *in_dev;
1863 struct net_device *dev;
1864 int ifindex;
1865 int err;
1866
1867 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1868 devconf_ipv4_policy);
1869 if (err < 0)
1870 goto errout;
1871
1872 err = -EINVAL;
1873 if (!tb[NETCONFA_IFINDEX])
1874 goto errout;
1875
1876 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1877 switch (ifindex) {
1878 case NETCONFA_IFINDEX_ALL:
1879 devconf = net->ipv4.devconf_all;
1880 break;
1881 case NETCONFA_IFINDEX_DEFAULT:
1882 devconf = net->ipv4.devconf_dflt;
1883 break;
1884 default:
1885 dev = __dev_get_by_index(net, ifindex);
1886 if (!dev)
1887 goto errout;
1888 in_dev = __in_dev_get_rtnl(dev);
1889 if (!in_dev)
1890 goto errout;
1891 devconf = &in_dev->cnf;
1892 break;
1893 }
1894
1895 err = -ENOBUFS;
1896 skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1897 if (!skb)
1898 goto errout;
1899
1900 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1901 NETLINK_CB(in_skb).portid,
1902 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1903 -1);
1904 if (err < 0) {
1905 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1906 WARN_ON(err == -EMSGSIZE);
1907 kfree_skb(skb);
1908 goto errout;
1909 }
1910 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1911 errout:
1912 return err;
1913 }
1914
1915 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1916 struct netlink_callback *cb)
1917 {
1918 struct net *net = sock_net(skb->sk);
1919 int h, s_h;
1920 int idx, s_idx;
1921 struct net_device *dev;
1922 struct in_device *in_dev;
1923 struct hlist_head *head;
1924
1925 s_h = cb->args[0];
1926 s_idx = idx = cb->args[1];
1927
1928 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1929 idx = 0;
1930 head = &net->dev_index_head[h];
1931 rcu_read_lock();
1932 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1933 net->dev_base_seq;
1934 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1935 if (idx < s_idx)
1936 goto cont;
1937 in_dev = __in_dev_get_rcu(dev);
1938 if (!in_dev)
1939 goto cont;
1940
1941 if (inet_netconf_fill_devconf(skb, dev->ifindex,
1942 &in_dev->cnf,
1943 NETLINK_CB(cb->skb).portid,
1944 cb->nlh->nlmsg_seq,
1945 RTM_NEWNETCONF,
1946 NLM_F_MULTI,
1947 -1) < 0) {
1948 rcu_read_unlock();
1949 goto done;
1950 }
1951 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1952 cont:
1953 idx++;
1954 }
1955 rcu_read_unlock();
1956 }
1957 if (h == NETDEV_HASHENTRIES) {
1958 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1959 net->ipv4.devconf_all,
1960 NETLINK_CB(cb->skb).portid,
1961 cb->nlh->nlmsg_seq,
1962 RTM_NEWNETCONF, NLM_F_MULTI,
1963 -1) < 0)
1964 goto done;
1965 else
1966 h++;
1967 }
1968 if (h == NETDEV_HASHENTRIES + 1) {
1969 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1970 net->ipv4.devconf_dflt,
1971 NETLINK_CB(cb->skb).portid,
1972 cb->nlh->nlmsg_seq,
1973 RTM_NEWNETCONF, NLM_F_MULTI,
1974 -1) < 0)
1975 goto done;
1976 else
1977 h++;
1978 }
1979 done:
1980 cb->args[0] = h;
1981 cb->args[1] = idx;
1982
1983 return skb->len;
1984 }
1985
1986 #ifdef CONFIG_SYSCTL
1987
1988 static void devinet_copy_dflt_conf(struct net *net, int i)
1989 {
1990 struct net_device *dev;
1991
1992 rcu_read_lock();
1993 for_each_netdev_rcu(net, dev) {
1994 struct in_device *in_dev;
1995
1996 in_dev = __in_dev_get_rcu(dev);
1997 if (in_dev && !test_bit(i, in_dev->cnf.state))
1998 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1999 }
2000 rcu_read_unlock();
2001 }
2002
2003 /* called with RTNL locked */
2004 static void inet_forward_change(struct net *net)
2005 {
2006 struct net_device *dev;
2007 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2008
2009 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2010 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2011 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2012 NETCONFA_IFINDEX_ALL,
2013 net->ipv4.devconf_all);
2014 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2015 NETCONFA_IFINDEX_DEFAULT,
2016 net->ipv4.devconf_dflt);
2017
2018 for_each_netdev(net, dev) {
2019 struct in_device *in_dev;
2020 if (on)
2021 dev_disable_lro(dev);
2022 rcu_read_lock();
2023 in_dev = __in_dev_get_rcu(dev);
2024 if (in_dev) {
2025 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2026 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2027 dev->ifindex, &in_dev->cnf);
2028 }
2029 rcu_read_unlock();
2030 }
2031 }
2032
2033 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2034 {
2035 if (cnf == net->ipv4.devconf_dflt)
2036 return NETCONFA_IFINDEX_DEFAULT;
2037 else if (cnf == net->ipv4.devconf_all)
2038 return NETCONFA_IFINDEX_ALL;
2039 else {
2040 struct in_device *idev
2041 = container_of(cnf, struct in_device, cnf);
2042 return idev->dev->ifindex;
2043 }
2044 }
2045
2046 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2047 void __user *buffer,
2048 size_t *lenp, loff_t *ppos)
2049 {
2050 int old_value = *(int *)ctl->data;
2051 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2052 int new_value = *(int *)ctl->data;
2053
2054 if (write) {
2055 struct ipv4_devconf *cnf = ctl->extra1;
2056 struct net *net = ctl->extra2;
2057 int i = (int *)ctl->data - cnf->data;
2058 int ifindex;
2059
2060 set_bit(i, cnf->state);
2061
2062 if (cnf == net->ipv4.devconf_dflt)
2063 devinet_copy_dflt_conf(net, i);
2064 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2065 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2066 if ((new_value == 0) && (old_value != 0))
2067 rt_cache_flush(net);
2068
2069 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2070 new_value != old_value) {
2071 ifindex = devinet_conf_ifindex(net, cnf);
2072 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2073 ifindex, cnf);
2074 }
2075 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2076 new_value != old_value) {
2077 ifindex = devinet_conf_ifindex(net, cnf);
2078 inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2079 ifindex, cnf);
2080 }
2081 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2082 new_value != old_value) {
2083 ifindex = devinet_conf_ifindex(net, cnf);
2084 inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2085 ifindex, cnf);
2086 }
2087 }
2088
2089 return ret;
2090 }
2091
2092 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2093 void __user *buffer,
2094 size_t *lenp, loff_t *ppos)
2095 {
2096 int *valp = ctl->data;
2097 int val = *valp;
2098 loff_t pos = *ppos;
2099 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2100
2101 if (write && *valp != val) {
2102 struct net *net = ctl->extra2;
2103
2104 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2105 if (!rtnl_trylock()) {
2106 /* Restore the original values before restarting */
2107 *valp = val;
2108 *ppos = pos;
2109 return restart_syscall();
2110 }
2111 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2112 inet_forward_change(net);
2113 } else {
2114 struct ipv4_devconf *cnf = ctl->extra1;
2115 struct in_device *idev =
2116 container_of(cnf, struct in_device, cnf);
2117 if (*valp)
2118 dev_disable_lro(idev->dev);
2119 inet_netconf_notify_devconf(net,
2120 NETCONFA_FORWARDING,
2121 idev->dev->ifindex,
2122 cnf);
2123 }
2124 rtnl_unlock();
2125 rt_cache_flush(net);
2126 } else
2127 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2128 NETCONFA_IFINDEX_DEFAULT,
2129 net->ipv4.devconf_dflt);
2130 }
2131
2132 return ret;
2133 }
2134
2135 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2136 void __user *buffer,
2137 size_t *lenp, loff_t *ppos)
2138 {
2139 int *valp = ctl->data;
2140 int val = *valp;
2141 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2142 struct net *net = ctl->extra2;
2143
2144 if (write && *valp != val)
2145 rt_cache_flush(net);
2146
2147 return ret;
2148 }
2149
2150 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2151 { \
2152 .procname = name, \
2153 .data = ipv4_devconf.data + \
2154 IPV4_DEVCONF_ ## attr - 1, \
2155 .maxlen = sizeof(int), \
2156 .mode = mval, \
2157 .proc_handler = proc, \
2158 .extra1 = &ipv4_devconf, \
2159 }
2160
2161 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2162 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2163
2164 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2165 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2166
2167 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2168 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2169
2170 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2171 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2172
2173 static struct devinet_sysctl_table {
2174 struct ctl_table_header *sysctl_header;
2175 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2176 } devinet_sysctl = {
2177 .devinet_vars = {
2178 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2179 devinet_sysctl_forward),
2180 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2181
2182 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2183 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2184 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2185 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2186 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2187 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2188 "accept_source_route"),
2189 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2190 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2191 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2192 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2193 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2194 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2195 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2196 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2197 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2198 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2199 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2200 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2201 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2202 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2203 "force_igmp_version"),
2204 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2205 "igmpv2_unsolicited_report_interval"),
2206 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2207 "igmpv3_unsolicited_report_interval"),
2208 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2209 "ignore_routes_with_linkdown"),
2210 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2211 "drop_gratuitous_arp"),
2212
2213 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2214 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2215 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2216 "promote_secondaries"),
2217 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2218 "route_localnet"),
2219 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2220 "drop_unicast_in_l2_multicast"),
2221 },
2222 };
2223
2224 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2225 struct ipv4_devconf *p)
2226 {
2227 int i;
2228 struct devinet_sysctl_table *t;
2229 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2230
2231 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2232 if (!t)
2233 goto out;
2234
2235 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2236 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2237 t->devinet_vars[i].extra1 = p;
2238 t->devinet_vars[i].extra2 = net;
2239 }
2240
2241 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2242
2243 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2244 if (!t->sysctl_header)
2245 goto free;
2246
2247 p->sysctl = t;
2248 return 0;
2249
2250 free:
2251 kfree(t);
2252 out:
2253 return -ENOBUFS;
2254 }
2255
2256 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2257 {
2258 struct devinet_sysctl_table *t = cnf->sysctl;
2259
2260 if (!t)
2261 return;
2262
2263 cnf->sysctl = NULL;
2264 unregister_net_sysctl_table(t->sysctl_header);
2265 kfree(t);
2266 }
2267
2268 static int devinet_sysctl_register(struct in_device *idev)
2269 {
2270 int err;
2271
2272 if (!sysctl_dev_name_is_allowed(idev->dev->name))
2273 return -EINVAL;
2274
2275 err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2276 if (err)
2277 return err;
2278 err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2279 &idev->cnf);
2280 if (err)
2281 neigh_sysctl_unregister(idev->arp_parms);
2282 return err;
2283 }
2284
2285 static void devinet_sysctl_unregister(struct in_device *idev)
2286 {
2287 __devinet_sysctl_unregister(&idev->cnf);
2288 neigh_sysctl_unregister(idev->arp_parms);
2289 }
2290
2291 static struct ctl_table ctl_forward_entry[] = {
2292 {
2293 .procname = "ip_forward",
2294 .data = &ipv4_devconf.data[
2295 IPV4_DEVCONF_FORWARDING - 1],
2296 .maxlen = sizeof(int),
2297 .mode = 0644,
2298 .proc_handler = devinet_sysctl_forward,
2299 .extra1 = &ipv4_devconf,
2300 .extra2 = &init_net,
2301 },
2302 { },
2303 };
2304 #endif
2305
2306 static __net_init int devinet_init_net(struct net *net)
2307 {
2308 int err;
2309 struct ipv4_devconf *all, *dflt;
2310 #ifdef CONFIG_SYSCTL
2311 struct ctl_table *tbl = ctl_forward_entry;
2312 struct ctl_table_header *forw_hdr;
2313 #endif
2314
2315 err = -ENOMEM;
2316 all = &ipv4_devconf;
2317 dflt = &ipv4_devconf_dflt;
2318
2319 if (!net_eq(net, &init_net)) {
2320 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2321 if (!all)
2322 goto err_alloc_all;
2323
2324 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2325 if (!dflt)
2326 goto err_alloc_dflt;
2327
2328 #ifdef CONFIG_SYSCTL
2329 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2330 if (!tbl)
2331 goto err_alloc_ctl;
2332
2333 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2334 tbl[0].extra1 = all;
2335 tbl[0].extra2 = net;
2336 #endif
2337 }
2338
2339 #ifdef CONFIG_SYSCTL
2340 err = __devinet_sysctl_register(net, "all", all);
2341 if (err < 0)
2342 goto err_reg_all;
2343
2344 err = __devinet_sysctl_register(net, "default", dflt);
2345 if (err < 0)
2346 goto err_reg_dflt;
2347
2348 err = -ENOMEM;
2349 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2350 if (!forw_hdr)
2351 goto err_reg_ctl;
2352 net->ipv4.forw_hdr = forw_hdr;
2353 #endif
2354
2355 net->ipv4.devconf_all = all;
2356 net->ipv4.devconf_dflt = dflt;
2357 return 0;
2358
2359 #ifdef CONFIG_SYSCTL
2360 err_reg_ctl:
2361 __devinet_sysctl_unregister(dflt);
2362 err_reg_dflt:
2363 __devinet_sysctl_unregister(all);
2364 err_reg_all:
2365 if (tbl != ctl_forward_entry)
2366 kfree(tbl);
2367 err_alloc_ctl:
2368 #endif
2369 if (dflt != &ipv4_devconf_dflt)
2370 kfree(dflt);
2371 err_alloc_dflt:
2372 if (all != &ipv4_devconf)
2373 kfree(all);
2374 err_alloc_all:
2375 return err;
2376 }
2377
2378 static __net_exit void devinet_exit_net(struct net *net)
2379 {
2380 #ifdef CONFIG_SYSCTL
2381 struct ctl_table *tbl;
2382
2383 tbl = net->ipv4.forw_hdr->ctl_table_arg;
2384 unregister_net_sysctl_table(net->ipv4.forw_hdr);
2385 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2386 __devinet_sysctl_unregister(net->ipv4.devconf_all);
2387 kfree(tbl);
2388 #endif
2389 kfree(net->ipv4.devconf_dflt);
2390 kfree(net->ipv4.devconf_all);
2391 }
2392
2393 static __net_initdata struct pernet_operations devinet_ops = {
2394 .init = devinet_init_net,
2395 .exit = devinet_exit_net,
2396 };
2397
2398 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2399 .family = AF_INET,
2400 .fill_link_af = inet_fill_link_af,
2401 .get_link_af_size = inet_get_link_af_size,
2402 .validate_link_af = inet_validate_link_af,
2403 .set_link_af = inet_set_link_af,
2404 };
2405
2406 void __init devinet_init(void)
2407 {
2408 int i;
2409
2410 for (i = 0; i < IN4_ADDR_HSIZE; i++)
2411 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2412
2413 register_pernet_subsys(&devinet_ops);
2414
2415 register_gifconf(PF_INET, inet_gifconf);
2416 register_netdevice_notifier(&ip_netdev_notifier);
2417
2418 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2419
2420 rtnl_af_register(&inet_af_ops);
2421
2422 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2423 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2424 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2425 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2426 inet_netconf_dump_devconf, NULL);
2427 }