]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blob - net/ipv4/devinet.c
Merge branch 'for-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetoot...
[mirror_ubuntu-hirsute-kernel.git] / net / ipv4 / devinet.c
1 /*
2 * NET3 IP device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the IP parts of dev.c 1.0.19
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17 *
18 * Changes:
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
20 * lists.
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
25 * if no match found.
26 */
27
28
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68
69 static struct ipv4_devconf ipv4_devconf = {
70 .data = {
71 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
77 },
78 };
79
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81 .data = {
82 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
89 },
90 };
91
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96 [IFA_LOCAL] = { .type = NLA_U32 },
97 [IFA_ADDRESS] = { .type = NLA_U32 },
98 [IFA_BROADCAST] = { .type = NLA_U32 },
99 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
101 [IFA_FLAGS] = { .type = NLA_U32 },
102 };
103
104 #define IN4_ADDR_HSIZE_SHIFT 8
105 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
106
107 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
108
109 static u32 inet_addr_hash(const struct net *net, __be32 addr)
110 {
111 u32 val = (__force u32) addr ^ net_hash_mix(net);
112
113 return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
114 }
115
116 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
117 {
118 u32 hash = inet_addr_hash(net, ifa->ifa_local);
119
120 ASSERT_RTNL();
121 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
122 }
123
124 static void inet_hash_remove(struct in_ifaddr *ifa)
125 {
126 ASSERT_RTNL();
127 hlist_del_init_rcu(&ifa->hash);
128 }
129
130 /**
131 * __ip_dev_find - find the first device with a given source address.
132 * @net: the net namespace
133 * @addr: the source address
134 * @devref: if true, take a reference on the found device
135 *
136 * If a caller uses devref=false, it should be protected by RCU, or RTNL
137 */
138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 {
140 u32 hash = inet_addr_hash(net, addr);
141 struct net_device *result = NULL;
142 struct in_ifaddr *ifa;
143
144 rcu_read_lock();
145 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
146 if (ifa->ifa_local == addr) {
147 struct net_device *dev = ifa->ifa_dev->dev;
148
149 if (!net_eq(dev_net(dev), net))
150 continue;
151 result = dev;
152 break;
153 }
154 }
155 if (!result) {
156 struct flowi4 fl4 = { .daddr = addr };
157 struct fib_result res = { 0 };
158 struct fib_table *local;
159
160 /* Fallback to FIB local table so that communication
161 * over loopback subnets work.
162 */
163 local = fib_get_table(net, RT_TABLE_LOCAL);
164 if (local &&
165 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166 res.type == RTN_LOCAL)
167 result = FIB_RES_DEV(res);
168 }
169 if (result && devref)
170 dev_hold(result);
171 rcu_read_unlock();
172 return result;
173 }
174 EXPORT_SYMBOL(__ip_dev_find);
175
176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
177
178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
179 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
180 int destroy);
181 #ifdef CONFIG_SYSCTL
182 static int devinet_sysctl_register(struct in_device *idev);
183 static void devinet_sysctl_unregister(struct in_device *idev);
184 #else
185 static int devinet_sysctl_register(struct in_device *idev)
186 {
187 return 0;
188 }
189 static void devinet_sysctl_unregister(struct in_device *idev)
190 {
191 }
192 #endif
193
194 /* Locks all the inet devices. */
195
196 static struct in_ifaddr *inet_alloc_ifa(void)
197 {
198 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
199 }
200
201 static void inet_rcu_free_ifa(struct rcu_head *head)
202 {
203 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
204 if (ifa->ifa_dev)
205 in_dev_put(ifa->ifa_dev);
206 kfree(ifa);
207 }
208
209 static void inet_free_ifa(struct in_ifaddr *ifa)
210 {
211 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
212 }
213
214 void in_dev_finish_destroy(struct in_device *idev)
215 {
216 struct net_device *dev = idev->dev;
217
218 WARN_ON(idev->ifa_list);
219 WARN_ON(idev->mc_list);
220 kfree(rcu_dereference_protected(idev->mc_hash, 1));
221 #ifdef NET_REFCNT_DEBUG
222 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
223 #endif
224 dev_put(dev);
225 if (!idev->dead)
226 pr_err("Freeing alive in_device %p\n", idev);
227 else
228 kfree(idev);
229 }
230 EXPORT_SYMBOL(in_dev_finish_destroy);
231
232 static struct in_device *inetdev_init(struct net_device *dev)
233 {
234 struct in_device *in_dev;
235 int err = -ENOMEM;
236
237 ASSERT_RTNL();
238
239 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
240 if (!in_dev)
241 goto out;
242 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
243 sizeof(in_dev->cnf));
244 in_dev->cnf.sysctl = NULL;
245 in_dev->dev = dev;
246 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
247 if (!in_dev->arp_parms)
248 goto out_kfree;
249 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
250 dev_disable_lro(dev);
251 /* Reference in_dev->dev */
252 dev_hold(dev);
253 /* Account for reference dev->ip_ptr (below) */
254 in_dev_hold(in_dev);
255
256 err = devinet_sysctl_register(in_dev);
257 if (err) {
258 in_dev->dead = 1;
259 in_dev_put(in_dev);
260 in_dev = NULL;
261 goto out;
262 }
263 ip_mc_init_dev(in_dev);
264 if (dev->flags & IFF_UP)
265 ip_mc_up(in_dev);
266
267 /* we can receive as soon as ip_ptr is set -- do this last */
268 rcu_assign_pointer(dev->ip_ptr, in_dev);
269 out:
270 return in_dev ?: ERR_PTR(err);
271 out_kfree:
272 kfree(in_dev);
273 in_dev = NULL;
274 goto out;
275 }
276
277 static void in_dev_rcu_put(struct rcu_head *head)
278 {
279 struct in_device *idev = container_of(head, struct in_device, rcu_head);
280 in_dev_put(idev);
281 }
282
283 static void inetdev_destroy(struct in_device *in_dev)
284 {
285 struct in_ifaddr *ifa;
286 struct net_device *dev;
287
288 ASSERT_RTNL();
289
290 dev = in_dev->dev;
291
292 in_dev->dead = 1;
293
294 ip_mc_destroy_dev(in_dev);
295
296 while ((ifa = in_dev->ifa_list) != NULL) {
297 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
298 inet_free_ifa(ifa);
299 }
300
301 RCU_INIT_POINTER(dev->ip_ptr, NULL);
302
303 devinet_sysctl_unregister(in_dev);
304 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
305 arp_ifdown(dev);
306
307 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
308 }
309
310 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
311 {
312 rcu_read_lock();
313 for_primary_ifa(in_dev) {
314 if (inet_ifa_match(a, ifa)) {
315 if (!b || inet_ifa_match(b, ifa)) {
316 rcu_read_unlock();
317 return 1;
318 }
319 }
320 } endfor_ifa(in_dev);
321 rcu_read_unlock();
322 return 0;
323 }
324
325 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
326 int destroy, struct nlmsghdr *nlh, u32 portid)
327 {
328 struct in_ifaddr *promote = NULL;
329 struct in_ifaddr *ifa, *ifa1 = *ifap;
330 struct in_ifaddr *last_prim = in_dev->ifa_list;
331 struct in_ifaddr *prev_prom = NULL;
332 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
333
334 ASSERT_RTNL();
335
336 if (in_dev->dead)
337 goto no_promotions;
338
339 /* 1. Deleting primary ifaddr forces deletion all secondaries
340 * unless alias promotion is set
341 **/
342
343 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
344 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
345
346 while ((ifa = *ifap1) != NULL) {
347 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
348 ifa1->ifa_scope <= ifa->ifa_scope)
349 last_prim = ifa;
350
351 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
352 ifa1->ifa_mask != ifa->ifa_mask ||
353 !inet_ifa_match(ifa1->ifa_address, ifa)) {
354 ifap1 = &ifa->ifa_next;
355 prev_prom = ifa;
356 continue;
357 }
358
359 if (!do_promote) {
360 inet_hash_remove(ifa);
361 *ifap1 = ifa->ifa_next;
362
363 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
364 blocking_notifier_call_chain(&inetaddr_chain,
365 NETDEV_DOWN, ifa);
366 inet_free_ifa(ifa);
367 } else {
368 promote = ifa;
369 break;
370 }
371 }
372 }
373
374 /* On promotion all secondaries from subnet are changing
375 * the primary IP, we must remove all their routes silently
376 * and later to add them back with new prefsrc. Do this
377 * while all addresses are on the device list.
378 */
379 for (ifa = promote; ifa; ifa = ifa->ifa_next) {
380 if (ifa1->ifa_mask == ifa->ifa_mask &&
381 inet_ifa_match(ifa1->ifa_address, ifa))
382 fib_del_ifaddr(ifa, ifa1);
383 }
384
385 no_promotions:
386 /* 2. Unlink it */
387
388 *ifap = ifa1->ifa_next;
389 inet_hash_remove(ifa1);
390
391 /* 3. Announce address deletion */
392
393 /* Send message first, then call notifier.
394 At first sight, FIB update triggered by notifier
395 will refer to already deleted ifaddr, that could confuse
396 netlink listeners. It is not true: look, gated sees
397 that route deleted and if it still thinks that ifaddr
398 is valid, it will try to restore deleted routes... Grr.
399 So that, this order is correct.
400 */
401 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
402 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
403
404 if (promote) {
405 struct in_ifaddr *next_sec = promote->ifa_next;
406
407 if (prev_prom) {
408 prev_prom->ifa_next = promote->ifa_next;
409 promote->ifa_next = last_prim->ifa_next;
410 last_prim->ifa_next = promote;
411 }
412
413 promote->ifa_flags &= ~IFA_F_SECONDARY;
414 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
415 blocking_notifier_call_chain(&inetaddr_chain,
416 NETDEV_UP, promote);
417 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
418 if (ifa1->ifa_mask != ifa->ifa_mask ||
419 !inet_ifa_match(ifa1->ifa_address, ifa))
420 continue;
421 fib_add_ifaddr(ifa);
422 }
423
424 }
425 if (destroy)
426 inet_free_ifa(ifa1);
427 }
428
429 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
430 int destroy)
431 {
432 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
433 }
434
435 static void check_lifetime(struct work_struct *work);
436
437 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
438
439 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
440 u32 portid)
441 {
442 struct in_device *in_dev = ifa->ifa_dev;
443 struct in_ifaddr *ifa1, **ifap, **last_primary;
444
445 ASSERT_RTNL();
446
447 if (!ifa->ifa_local) {
448 inet_free_ifa(ifa);
449 return 0;
450 }
451
452 ifa->ifa_flags &= ~IFA_F_SECONDARY;
453 last_primary = &in_dev->ifa_list;
454
455 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
456 ifap = &ifa1->ifa_next) {
457 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
458 ifa->ifa_scope <= ifa1->ifa_scope)
459 last_primary = &ifa1->ifa_next;
460 if (ifa1->ifa_mask == ifa->ifa_mask &&
461 inet_ifa_match(ifa1->ifa_address, ifa)) {
462 if (ifa1->ifa_local == ifa->ifa_local) {
463 inet_free_ifa(ifa);
464 return -EEXIST;
465 }
466 if (ifa1->ifa_scope != ifa->ifa_scope) {
467 inet_free_ifa(ifa);
468 return -EINVAL;
469 }
470 ifa->ifa_flags |= IFA_F_SECONDARY;
471 }
472 }
473
474 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
475 prandom_seed((__force u32) ifa->ifa_local);
476 ifap = last_primary;
477 }
478
479 ifa->ifa_next = *ifap;
480 *ifap = ifa;
481
482 inet_hash_insert(dev_net(in_dev->dev), ifa);
483
484 cancel_delayed_work(&check_lifetime_work);
485 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
486
487 /* Send message first, then call notifier.
488 Notifier will trigger FIB update, so that
489 listeners of netlink will know about new ifaddr */
490 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
491 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
492
493 return 0;
494 }
495
496 static int inet_insert_ifa(struct in_ifaddr *ifa)
497 {
498 return __inet_insert_ifa(ifa, NULL, 0);
499 }
500
501 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
502 {
503 struct in_device *in_dev = __in_dev_get_rtnl(dev);
504
505 ASSERT_RTNL();
506
507 if (!in_dev) {
508 inet_free_ifa(ifa);
509 return -ENOBUFS;
510 }
511 ipv4_devconf_setall(in_dev);
512 neigh_parms_data_state_setall(in_dev->arp_parms);
513 if (ifa->ifa_dev != in_dev) {
514 WARN_ON(ifa->ifa_dev);
515 in_dev_hold(in_dev);
516 ifa->ifa_dev = in_dev;
517 }
518 if (ipv4_is_loopback(ifa->ifa_local))
519 ifa->ifa_scope = RT_SCOPE_HOST;
520 return inet_insert_ifa(ifa);
521 }
522
523 /* Caller must hold RCU or RTNL :
524 * We dont take a reference on found in_device
525 */
526 struct in_device *inetdev_by_index(struct net *net, int ifindex)
527 {
528 struct net_device *dev;
529 struct in_device *in_dev = NULL;
530
531 rcu_read_lock();
532 dev = dev_get_by_index_rcu(net, ifindex);
533 if (dev)
534 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
535 rcu_read_unlock();
536 return in_dev;
537 }
538 EXPORT_SYMBOL(inetdev_by_index);
539
540 /* Called only from RTNL semaphored context. No locks. */
541
542 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
543 __be32 mask)
544 {
545 ASSERT_RTNL();
546
547 for_primary_ifa(in_dev) {
548 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
549 return ifa;
550 } endfor_ifa(in_dev);
551 return NULL;
552 }
553
554 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
555 {
556 struct ip_mreqn mreq = {
557 .imr_multiaddr.s_addr = ifa->ifa_address,
558 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
559 };
560 int ret;
561
562 ASSERT_RTNL();
563
564 lock_sock(sk);
565 if (join)
566 ret = ip_mc_join_group(sk, &mreq);
567 else
568 ret = ip_mc_leave_group(sk, &mreq);
569 release_sock(sk);
570
571 return ret;
572 }
573
574 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
575 {
576 struct net *net = sock_net(skb->sk);
577 struct nlattr *tb[IFA_MAX+1];
578 struct in_device *in_dev;
579 struct ifaddrmsg *ifm;
580 struct in_ifaddr *ifa, **ifap;
581 int err = -EINVAL;
582
583 ASSERT_RTNL();
584
585 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
586 NULL);
587 if (err < 0)
588 goto errout;
589
590 ifm = nlmsg_data(nlh);
591 in_dev = inetdev_by_index(net, ifm->ifa_index);
592 if (!in_dev) {
593 err = -ENODEV;
594 goto errout;
595 }
596
597 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
598 ifap = &ifa->ifa_next) {
599 if (tb[IFA_LOCAL] &&
600 ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
601 continue;
602
603 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
604 continue;
605
606 if (tb[IFA_ADDRESS] &&
607 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
608 !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
609 continue;
610
611 if (ipv4_is_multicast(ifa->ifa_address))
612 ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
613 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
614 return 0;
615 }
616
617 err = -EADDRNOTAVAIL;
618 errout:
619 return err;
620 }
621
622 #define INFINITY_LIFE_TIME 0xFFFFFFFF
623
624 static void check_lifetime(struct work_struct *work)
625 {
626 unsigned long now, next, next_sec, next_sched;
627 struct in_ifaddr *ifa;
628 struct hlist_node *n;
629 int i;
630
631 now = jiffies;
632 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
633
634 for (i = 0; i < IN4_ADDR_HSIZE; i++) {
635 bool change_needed = false;
636
637 rcu_read_lock();
638 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
639 unsigned long age;
640
641 if (ifa->ifa_flags & IFA_F_PERMANENT)
642 continue;
643
644 /* We try to batch several events at once. */
645 age = (now - ifa->ifa_tstamp +
646 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
647
648 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
649 age >= ifa->ifa_valid_lft) {
650 change_needed = true;
651 } else if (ifa->ifa_preferred_lft ==
652 INFINITY_LIFE_TIME) {
653 continue;
654 } else if (age >= ifa->ifa_preferred_lft) {
655 if (time_before(ifa->ifa_tstamp +
656 ifa->ifa_valid_lft * HZ, next))
657 next = ifa->ifa_tstamp +
658 ifa->ifa_valid_lft * HZ;
659
660 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
661 change_needed = true;
662 } else if (time_before(ifa->ifa_tstamp +
663 ifa->ifa_preferred_lft * HZ,
664 next)) {
665 next = ifa->ifa_tstamp +
666 ifa->ifa_preferred_lft * HZ;
667 }
668 }
669 rcu_read_unlock();
670 if (!change_needed)
671 continue;
672 rtnl_lock();
673 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
674 unsigned long age;
675
676 if (ifa->ifa_flags & IFA_F_PERMANENT)
677 continue;
678
679 /* We try to batch several events at once. */
680 age = (now - ifa->ifa_tstamp +
681 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
682
683 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
684 age >= ifa->ifa_valid_lft) {
685 struct in_ifaddr **ifap;
686
687 for (ifap = &ifa->ifa_dev->ifa_list;
688 *ifap != NULL; ifap = &(*ifap)->ifa_next) {
689 if (*ifap == ifa) {
690 inet_del_ifa(ifa->ifa_dev,
691 ifap, 1);
692 break;
693 }
694 }
695 } else if (ifa->ifa_preferred_lft !=
696 INFINITY_LIFE_TIME &&
697 age >= ifa->ifa_preferred_lft &&
698 !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
699 ifa->ifa_flags |= IFA_F_DEPRECATED;
700 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
701 }
702 }
703 rtnl_unlock();
704 }
705
706 next_sec = round_jiffies_up(next);
707 next_sched = next;
708
709 /* If rounded timeout is accurate enough, accept it. */
710 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
711 next_sched = next_sec;
712
713 now = jiffies;
714 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
715 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
716 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
717
718 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
719 next_sched - now);
720 }
721
722 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
723 __u32 prefered_lft)
724 {
725 unsigned long timeout;
726
727 ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
728
729 timeout = addrconf_timeout_fixup(valid_lft, HZ);
730 if (addrconf_finite_timeout(timeout))
731 ifa->ifa_valid_lft = timeout;
732 else
733 ifa->ifa_flags |= IFA_F_PERMANENT;
734
735 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
736 if (addrconf_finite_timeout(timeout)) {
737 if (timeout == 0)
738 ifa->ifa_flags |= IFA_F_DEPRECATED;
739 ifa->ifa_preferred_lft = timeout;
740 }
741 ifa->ifa_tstamp = jiffies;
742 if (!ifa->ifa_cstamp)
743 ifa->ifa_cstamp = ifa->ifa_tstamp;
744 }
745
746 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
747 __u32 *pvalid_lft, __u32 *pprefered_lft)
748 {
749 struct nlattr *tb[IFA_MAX+1];
750 struct in_ifaddr *ifa;
751 struct ifaddrmsg *ifm;
752 struct net_device *dev;
753 struct in_device *in_dev;
754 int err;
755
756 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
757 NULL);
758 if (err < 0)
759 goto errout;
760
761 ifm = nlmsg_data(nlh);
762 err = -EINVAL;
763 if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
764 goto errout;
765
766 dev = __dev_get_by_index(net, ifm->ifa_index);
767 err = -ENODEV;
768 if (!dev)
769 goto errout;
770
771 in_dev = __in_dev_get_rtnl(dev);
772 err = -ENOBUFS;
773 if (!in_dev)
774 goto errout;
775
776 ifa = inet_alloc_ifa();
777 if (!ifa)
778 /*
779 * A potential indev allocation can be left alive, it stays
780 * assigned to its device and is destroy with it.
781 */
782 goto errout;
783
784 ipv4_devconf_setall(in_dev);
785 neigh_parms_data_state_setall(in_dev->arp_parms);
786 in_dev_hold(in_dev);
787
788 if (!tb[IFA_ADDRESS])
789 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
790
791 INIT_HLIST_NODE(&ifa->hash);
792 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
793 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
794 ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
795 ifm->ifa_flags;
796 ifa->ifa_scope = ifm->ifa_scope;
797 ifa->ifa_dev = in_dev;
798
799 ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
800 ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
801
802 if (tb[IFA_BROADCAST])
803 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
804
805 if (tb[IFA_LABEL])
806 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
807 else
808 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
809
810 if (tb[IFA_CACHEINFO]) {
811 struct ifa_cacheinfo *ci;
812
813 ci = nla_data(tb[IFA_CACHEINFO]);
814 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
815 err = -EINVAL;
816 goto errout_free;
817 }
818 *pvalid_lft = ci->ifa_valid;
819 *pprefered_lft = ci->ifa_prefered;
820 }
821
822 return ifa;
823
824 errout_free:
825 inet_free_ifa(ifa);
826 errout:
827 return ERR_PTR(err);
828 }
829
830 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
831 {
832 struct in_device *in_dev = ifa->ifa_dev;
833 struct in_ifaddr *ifa1, **ifap;
834
835 if (!ifa->ifa_local)
836 return NULL;
837
838 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
839 ifap = &ifa1->ifa_next) {
840 if (ifa1->ifa_mask == ifa->ifa_mask &&
841 inet_ifa_match(ifa1->ifa_address, ifa) &&
842 ifa1->ifa_local == ifa->ifa_local)
843 return ifa1;
844 }
845 return NULL;
846 }
847
848 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
849 {
850 struct net *net = sock_net(skb->sk);
851 struct in_ifaddr *ifa;
852 struct in_ifaddr *ifa_existing;
853 __u32 valid_lft = INFINITY_LIFE_TIME;
854 __u32 prefered_lft = INFINITY_LIFE_TIME;
855
856 ASSERT_RTNL();
857
858 ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
859 if (IS_ERR(ifa))
860 return PTR_ERR(ifa);
861
862 ifa_existing = find_matching_ifa(ifa);
863 if (!ifa_existing) {
864 /* It would be best to check for !NLM_F_CREATE here but
865 * userspace already relies on not having to provide this.
866 */
867 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
868 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
869 int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
870 true, ifa);
871
872 if (ret < 0) {
873 inet_free_ifa(ifa);
874 return ret;
875 }
876 }
877 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
878 } else {
879 inet_free_ifa(ifa);
880
881 if (nlh->nlmsg_flags & NLM_F_EXCL ||
882 !(nlh->nlmsg_flags & NLM_F_REPLACE))
883 return -EEXIST;
884 ifa = ifa_existing;
885 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
886 cancel_delayed_work(&check_lifetime_work);
887 queue_delayed_work(system_power_efficient_wq,
888 &check_lifetime_work, 0);
889 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
890 }
891 return 0;
892 }
893
894 /*
895 * Determine a default network mask, based on the IP address.
896 */
897
898 static int inet_abc_len(__be32 addr)
899 {
900 int rc = -1; /* Something else, probably a multicast. */
901
902 if (ipv4_is_zeronet(addr))
903 rc = 0;
904 else {
905 __u32 haddr = ntohl(addr);
906
907 if (IN_CLASSA(haddr))
908 rc = 8;
909 else if (IN_CLASSB(haddr))
910 rc = 16;
911 else if (IN_CLASSC(haddr))
912 rc = 24;
913 }
914
915 return rc;
916 }
917
918
919 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
920 {
921 struct ifreq ifr;
922 struct sockaddr_in sin_orig;
923 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
924 struct in_device *in_dev;
925 struct in_ifaddr **ifap = NULL;
926 struct in_ifaddr *ifa = NULL;
927 struct net_device *dev;
928 char *colon;
929 int ret = -EFAULT;
930 int tryaddrmatch = 0;
931
932 /*
933 * Fetch the caller's info block into kernel space
934 */
935
936 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
937 goto out;
938 ifr.ifr_name[IFNAMSIZ - 1] = 0;
939
940 /* save original address for comparison */
941 memcpy(&sin_orig, sin, sizeof(*sin));
942
943 colon = strchr(ifr.ifr_name, ':');
944 if (colon)
945 *colon = 0;
946
947 dev_load(net, ifr.ifr_name);
948
949 switch (cmd) {
950 case SIOCGIFADDR: /* Get interface address */
951 case SIOCGIFBRDADDR: /* Get the broadcast address */
952 case SIOCGIFDSTADDR: /* Get the destination address */
953 case SIOCGIFNETMASK: /* Get the netmask for the interface */
954 /* Note that these ioctls will not sleep,
955 so that we do not impose a lock.
956 One day we will be forced to put shlock here (I mean SMP)
957 */
958 tryaddrmatch = (sin_orig.sin_family == AF_INET);
959 memset(sin, 0, sizeof(*sin));
960 sin->sin_family = AF_INET;
961 break;
962
963 case SIOCSIFFLAGS:
964 ret = -EPERM;
965 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
966 goto out;
967 break;
968 case SIOCSIFADDR: /* Set interface address (and family) */
969 case SIOCSIFBRDADDR: /* Set the broadcast address */
970 case SIOCSIFDSTADDR: /* Set the destination address */
971 case SIOCSIFNETMASK: /* Set the netmask for the interface */
972 ret = -EPERM;
973 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
974 goto out;
975 ret = -EINVAL;
976 if (sin->sin_family != AF_INET)
977 goto out;
978 break;
979 default:
980 ret = -EINVAL;
981 goto out;
982 }
983
984 rtnl_lock();
985
986 ret = -ENODEV;
987 dev = __dev_get_by_name(net, ifr.ifr_name);
988 if (!dev)
989 goto done;
990
991 if (colon)
992 *colon = ':';
993
994 in_dev = __in_dev_get_rtnl(dev);
995 if (in_dev) {
996 if (tryaddrmatch) {
997 /* Matthias Andree */
998 /* compare label and address (4.4BSD style) */
999 /* note: we only do this for a limited set of ioctls
1000 and only if the original address family was AF_INET.
1001 This is checked above. */
1002 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1003 ifap = &ifa->ifa_next) {
1004 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1005 sin_orig.sin_addr.s_addr ==
1006 ifa->ifa_local) {
1007 break; /* found */
1008 }
1009 }
1010 }
1011 /* we didn't get a match, maybe the application is
1012 4.3BSD-style and passed in junk so we fall back to
1013 comparing just the label */
1014 if (!ifa) {
1015 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1016 ifap = &ifa->ifa_next)
1017 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1018 break;
1019 }
1020 }
1021
1022 ret = -EADDRNOTAVAIL;
1023 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1024 goto done;
1025
1026 switch (cmd) {
1027 case SIOCGIFADDR: /* Get interface address */
1028 sin->sin_addr.s_addr = ifa->ifa_local;
1029 goto rarok;
1030
1031 case SIOCGIFBRDADDR: /* Get the broadcast address */
1032 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1033 goto rarok;
1034
1035 case SIOCGIFDSTADDR: /* Get the destination address */
1036 sin->sin_addr.s_addr = ifa->ifa_address;
1037 goto rarok;
1038
1039 case SIOCGIFNETMASK: /* Get the netmask for the interface */
1040 sin->sin_addr.s_addr = ifa->ifa_mask;
1041 goto rarok;
1042
1043 case SIOCSIFFLAGS:
1044 if (colon) {
1045 ret = -EADDRNOTAVAIL;
1046 if (!ifa)
1047 break;
1048 ret = 0;
1049 if (!(ifr.ifr_flags & IFF_UP))
1050 inet_del_ifa(in_dev, ifap, 1);
1051 break;
1052 }
1053 ret = dev_change_flags(dev, ifr.ifr_flags);
1054 break;
1055
1056 case SIOCSIFADDR: /* Set interface address (and family) */
1057 ret = -EINVAL;
1058 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1059 break;
1060
1061 if (!ifa) {
1062 ret = -ENOBUFS;
1063 ifa = inet_alloc_ifa();
1064 if (!ifa)
1065 break;
1066 INIT_HLIST_NODE(&ifa->hash);
1067 if (colon)
1068 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1069 else
1070 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1071 } else {
1072 ret = 0;
1073 if (ifa->ifa_local == sin->sin_addr.s_addr)
1074 break;
1075 inet_del_ifa(in_dev, ifap, 0);
1076 ifa->ifa_broadcast = 0;
1077 ifa->ifa_scope = 0;
1078 }
1079
1080 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1081
1082 if (!(dev->flags & IFF_POINTOPOINT)) {
1083 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1084 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1085 if ((dev->flags & IFF_BROADCAST) &&
1086 ifa->ifa_prefixlen < 31)
1087 ifa->ifa_broadcast = ifa->ifa_address |
1088 ~ifa->ifa_mask;
1089 } else {
1090 ifa->ifa_prefixlen = 32;
1091 ifa->ifa_mask = inet_make_mask(32);
1092 }
1093 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1094 ret = inet_set_ifa(dev, ifa);
1095 break;
1096
1097 case SIOCSIFBRDADDR: /* Set the broadcast address */
1098 ret = 0;
1099 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1100 inet_del_ifa(in_dev, ifap, 0);
1101 ifa->ifa_broadcast = sin->sin_addr.s_addr;
1102 inet_insert_ifa(ifa);
1103 }
1104 break;
1105
1106 case SIOCSIFDSTADDR: /* Set the destination address */
1107 ret = 0;
1108 if (ifa->ifa_address == sin->sin_addr.s_addr)
1109 break;
1110 ret = -EINVAL;
1111 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1112 break;
1113 ret = 0;
1114 inet_del_ifa(in_dev, ifap, 0);
1115 ifa->ifa_address = sin->sin_addr.s_addr;
1116 inet_insert_ifa(ifa);
1117 break;
1118
1119 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1120
1121 /*
1122 * The mask we set must be legal.
1123 */
1124 ret = -EINVAL;
1125 if (bad_mask(sin->sin_addr.s_addr, 0))
1126 break;
1127 ret = 0;
1128 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1129 __be32 old_mask = ifa->ifa_mask;
1130 inet_del_ifa(in_dev, ifap, 0);
1131 ifa->ifa_mask = sin->sin_addr.s_addr;
1132 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1133
1134 /* See if current broadcast address matches
1135 * with current netmask, then recalculate
1136 * the broadcast address. Otherwise it's a
1137 * funny address, so don't touch it since
1138 * the user seems to know what (s)he's doing...
1139 */
1140 if ((dev->flags & IFF_BROADCAST) &&
1141 (ifa->ifa_prefixlen < 31) &&
1142 (ifa->ifa_broadcast ==
1143 (ifa->ifa_local|~old_mask))) {
1144 ifa->ifa_broadcast = (ifa->ifa_local |
1145 ~sin->sin_addr.s_addr);
1146 }
1147 inet_insert_ifa(ifa);
1148 }
1149 break;
1150 }
1151 done:
1152 rtnl_unlock();
1153 out:
1154 return ret;
1155 rarok:
1156 rtnl_unlock();
1157 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1158 goto out;
1159 }
1160
1161 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1162 {
1163 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1164 struct in_ifaddr *ifa;
1165 struct ifreq ifr;
1166 int done = 0;
1167
1168 if (!in_dev)
1169 goto out;
1170
1171 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1172 if (!buf) {
1173 done += sizeof(ifr);
1174 continue;
1175 }
1176 if (len < (int) sizeof(ifr))
1177 break;
1178 memset(&ifr, 0, sizeof(struct ifreq));
1179 strcpy(ifr.ifr_name, ifa->ifa_label);
1180
1181 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1182 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1183 ifa->ifa_local;
1184
1185 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1186 done = -EFAULT;
1187 break;
1188 }
1189 buf += sizeof(struct ifreq);
1190 len -= sizeof(struct ifreq);
1191 done += sizeof(struct ifreq);
1192 }
1193 out:
1194 return done;
1195 }
1196
1197 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1198 int scope)
1199 {
1200 for_primary_ifa(in_dev) {
1201 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1202 ifa->ifa_scope <= scope)
1203 return ifa->ifa_local;
1204 } endfor_ifa(in_dev);
1205
1206 return 0;
1207 }
1208
1209 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1210 {
1211 __be32 addr = 0;
1212 struct in_device *in_dev;
1213 struct net *net = dev_net(dev);
1214 int master_idx;
1215
1216 rcu_read_lock();
1217 in_dev = __in_dev_get_rcu(dev);
1218 if (!in_dev)
1219 goto no_in_dev;
1220
1221 for_primary_ifa(in_dev) {
1222 if (ifa->ifa_scope > scope)
1223 continue;
1224 if (!dst || inet_ifa_match(dst, ifa)) {
1225 addr = ifa->ifa_local;
1226 break;
1227 }
1228 if (!addr)
1229 addr = ifa->ifa_local;
1230 } endfor_ifa(in_dev);
1231
1232 if (addr)
1233 goto out_unlock;
1234 no_in_dev:
1235 master_idx = l3mdev_master_ifindex_rcu(dev);
1236
1237 /* For VRFs, the VRF device takes the place of the loopback device,
1238 * with addresses on it being preferred. Note in such cases the
1239 * loopback device will be among the devices that fail the master_idx
1240 * equality check in the loop below.
1241 */
1242 if (master_idx &&
1243 (dev = dev_get_by_index_rcu(net, master_idx)) &&
1244 (in_dev = __in_dev_get_rcu(dev))) {
1245 addr = in_dev_select_addr(in_dev, scope);
1246 if (addr)
1247 goto out_unlock;
1248 }
1249
1250 /* Not loopback addresses on loopback should be preferred
1251 in this case. It is important that lo is the first interface
1252 in dev_base list.
1253 */
1254 for_each_netdev_rcu(net, dev) {
1255 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1256 continue;
1257
1258 in_dev = __in_dev_get_rcu(dev);
1259 if (!in_dev)
1260 continue;
1261
1262 addr = in_dev_select_addr(in_dev, scope);
1263 if (addr)
1264 goto out_unlock;
1265 }
1266 out_unlock:
1267 rcu_read_unlock();
1268 return addr;
1269 }
1270 EXPORT_SYMBOL(inet_select_addr);
1271
1272 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1273 __be32 local, int scope)
1274 {
1275 int same = 0;
1276 __be32 addr = 0;
1277
1278 for_ifa(in_dev) {
1279 if (!addr &&
1280 (local == ifa->ifa_local || !local) &&
1281 ifa->ifa_scope <= scope) {
1282 addr = ifa->ifa_local;
1283 if (same)
1284 break;
1285 }
1286 if (!same) {
1287 same = (!local || inet_ifa_match(local, ifa)) &&
1288 (!dst || inet_ifa_match(dst, ifa));
1289 if (same && addr) {
1290 if (local || !dst)
1291 break;
1292 /* Is the selected addr into dst subnet? */
1293 if (inet_ifa_match(addr, ifa))
1294 break;
1295 /* No, then can we use new local src? */
1296 if (ifa->ifa_scope <= scope) {
1297 addr = ifa->ifa_local;
1298 break;
1299 }
1300 /* search for large dst subnet for addr */
1301 same = 0;
1302 }
1303 }
1304 } endfor_ifa(in_dev);
1305
1306 return same ? addr : 0;
1307 }
1308
1309 /*
1310 * Confirm that local IP address exists using wildcards:
1311 * - net: netns to check, cannot be NULL
1312 * - in_dev: only on this interface, NULL=any interface
1313 * - dst: only in the same subnet as dst, 0=any dst
1314 * - local: address, 0=autoselect the local address
1315 * - scope: maximum allowed scope value for the local address
1316 */
1317 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1318 __be32 dst, __be32 local, int scope)
1319 {
1320 __be32 addr = 0;
1321 struct net_device *dev;
1322
1323 if (in_dev)
1324 return confirm_addr_indev(in_dev, dst, local, scope);
1325
1326 rcu_read_lock();
1327 for_each_netdev_rcu(net, dev) {
1328 in_dev = __in_dev_get_rcu(dev);
1329 if (in_dev) {
1330 addr = confirm_addr_indev(in_dev, dst, local, scope);
1331 if (addr)
1332 break;
1333 }
1334 }
1335 rcu_read_unlock();
1336
1337 return addr;
1338 }
1339 EXPORT_SYMBOL(inet_confirm_addr);
1340
1341 /*
1342 * Device notifier
1343 */
1344
1345 int register_inetaddr_notifier(struct notifier_block *nb)
1346 {
1347 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1348 }
1349 EXPORT_SYMBOL(register_inetaddr_notifier);
1350
1351 int unregister_inetaddr_notifier(struct notifier_block *nb)
1352 {
1353 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1354 }
1355 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1356
1357 /* Rename ifa_labels for a device name change. Make some effort to preserve
1358 * existing alias numbering and to create unique labels if possible.
1359 */
1360 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1361 {
1362 struct in_ifaddr *ifa;
1363 int named = 0;
1364
1365 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1366 char old[IFNAMSIZ], *dot;
1367
1368 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1369 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1370 if (named++ == 0)
1371 goto skip;
1372 dot = strchr(old, ':');
1373 if (!dot) {
1374 sprintf(old, ":%d", named);
1375 dot = old;
1376 }
1377 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1378 strcat(ifa->ifa_label, dot);
1379 else
1380 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1381 skip:
1382 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1383 }
1384 }
1385
1386 static bool inetdev_valid_mtu(unsigned int mtu)
1387 {
1388 return mtu >= 68;
1389 }
1390
1391 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1392 struct in_device *in_dev)
1393
1394 {
1395 struct in_ifaddr *ifa;
1396
1397 for (ifa = in_dev->ifa_list; ifa;
1398 ifa = ifa->ifa_next) {
1399 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1400 ifa->ifa_local, dev,
1401 ifa->ifa_local, NULL,
1402 dev->dev_addr, NULL);
1403 }
1404 }
1405
1406 /* Called only under RTNL semaphore */
1407
1408 static int inetdev_event(struct notifier_block *this, unsigned long event,
1409 void *ptr)
1410 {
1411 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1412 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1413
1414 ASSERT_RTNL();
1415
1416 if (!in_dev) {
1417 if (event == NETDEV_REGISTER) {
1418 in_dev = inetdev_init(dev);
1419 if (IS_ERR(in_dev))
1420 return notifier_from_errno(PTR_ERR(in_dev));
1421 if (dev->flags & IFF_LOOPBACK) {
1422 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1423 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1424 }
1425 } else if (event == NETDEV_CHANGEMTU) {
1426 /* Re-enabling IP */
1427 if (inetdev_valid_mtu(dev->mtu))
1428 in_dev = inetdev_init(dev);
1429 }
1430 goto out;
1431 }
1432
1433 switch (event) {
1434 case NETDEV_REGISTER:
1435 pr_debug("%s: bug\n", __func__);
1436 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1437 break;
1438 case NETDEV_UP:
1439 if (!inetdev_valid_mtu(dev->mtu))
1440 break;
1441 if (dev->flags & IFF_LOOPBACK) {
1442 struct in_ifaddr *ifa = inet_alloc_ifa();
1443
1444 if (ifa) {
1445 INIT_HLIST_NODE(&ifa->hash);
1446 ifa->ifa_local =
1447 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1448 ifa->ifa_prefixlen = 8;
1449 ifa->ifa_mask = inet_make_mask(8);
1450 in_dev_hold(in_dev);
1451 ifa->ifa_dev = in_dev;
1452 ifa->ifa_scope = RT_SCOPE_HOST;
1453 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1454 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1455 INFINITY_LIFE_TIME);
1456 ipv4_devconf_setall(in_dev);
1457 neigh_parms_data_state_setall(in_dev->arp_parms);
1458 inet_insert_ifa(ifa);
1459 }
1460 }
1461 ip_mc_up(in_dev);
1462 /* fall through */
1463 case NETDEV_CHANGEADDR:
1464 if (!IN_DEV_ARP_NOTIFY(in_dev))
1465 break;
1466 /* fall through */
1467 case NETDEV_NOTIFY_PEERS:
1468 /* Send gratuitous ARP to notify of link change */
1469 inetdev_send_gratuitous_arp(dev, in_dev);
1470 break;
1471 case NETDEV_DOWN:
1472 ip_mc_down(in_dev);
1473 break;
1474 case NETDEV_PRE_TYPE_CHANGE:
1475 ip_mc_unmap(in_dev);
1476 break;
1477 case NETDEV_POST_TYPE_CHANGE:
1478 ip_mc_remap(in_dev);
1479 break;
1480 case NETDEV_CHANGEMTU:
1481 if (inetdev_valid_mtu(dev->mtu))
1482 break;
1483 /* disable IP when MTU is not enough */
1484 case NETDEV_UNREGISTER:
1485 inetdev_destroy(in_dev);
1486 break;
1487 case NETDEV_CHANGENAME:
1488 /* Do not notify about label change, this event is
1489 * not interesting to applications using netlink.
1490 */
1491 inetdev_changename(dev, in_dev);
1492
1493 devinet_sysctl_unregister(in_dev);
1494 devinet_sysctl_register(in_dev);
1495 break;
1496 }
1497 out:
1498 return NOTIFY_DONE;
1499 }
1500
1501 static struct notifier_block ip_netdev_notifier = {
1502 .notifier_call = inetdev_event,
1503 };
1504
1505 static size_t inet_nlmsg_size(void)
1506 {
1507 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1508 + nla_total_size(4) /* IFA_ADDRESS */
1509 + nla_total_size(4) /* IFA_LOCAL */
1510 + nla_total_size(4) /* IFA_BROADCAST */
1511 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1512 + nla_total_size(4) /* IFA_FLAGS */
1513 + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1514 }
1515
1516 static inline u32 cstamp_delta(unsigned long cstamp)
1517 {
1518 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1519 }
1520
1521 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1522 unsigned long tstamp, u32 preferred, u32 valid)
1523 {
1524 struct ifa_cacheinfo ci;
1525
1526 ci.cstamp = cstamp_delta(cstamp);
1527 ci.tstamp = cstamp_delta(tstamp);
1528 ci.ifa_prefered = preferred;
1529 ci.ifa_valid = valid;
1530
1531 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1532 }
1533
1534 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1535 u32 portid, u32 seq, int event, unsigned int flags)
1536 {
1537 struct ifaddrmsg *ifm;
1538 struct nlmsghdr *nlh;
1539 u32 preferred, valid;
1540
1541 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1542 if (!nlh)
1543 return -EMSGSIZE;
1544
1545 ifm = nlmsg_data(nlh);
1546 ifm->ifa_family = AF_INET;
1547 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1548 ifm->ifa_flags = ifa->ifa_flags;
1549 ifm->ifa_scope = ifa->ifa_scope;
1550 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1551
1552 if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1553 preferred = ifa->ifa_preferred_lft;
1554 valid = ifa->ifa_valid_lft;
1555 if (preferred != INFINITY_LIFE_TIME) {
1556 long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1557
1558 if (preferred > tval)
1559 preferred -= tval;
1560 else
1561 preferred = 0;
1562 if (valid != INFINITY_LIFE_TIME) {
1563 if (valid > tval)
1564 valid -= tval;
1565 else
1566 valid = 0;
1567 }
1568 }
1569 } else {
1570 preferred = INFINITY_LIFE_TIME;
1571 valid = INFINITY_LIFE_TIME;
1572 }
1573 if ((ifa->ifa_address &&
1574 nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1575 (ifa->ifa_local &&
1576 nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1577 (ifa->ifa_broadcast &&
1578 nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1579 (ifa->ifa_label[0] &&
1580 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1581 nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1582 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1583 preferred, valid))
1584 goto nla_put_failure;
1585
1586 nlmsg_end(skb, nlh);
1587 return 0;
1588
1589 nla_put_failure:
1590 nlmsg_cancel(skb, nlh);
1591 return -EMSGSIZE;
1592 }
1593
1594 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1595 {
1596 struct net *net = sock_net(skb->sk);
1597 int h, s_h;
1598 int idx, s_idx;
1599 int ip_idx, s_ip_idx;
1600 struct net_device *dev;
1601 struct in_device *in_dev;
1602 struct in_ifaddr *ifa;
1603 struct hlist_head *head;
1604
1605 s_h = cb->args[0];
1606 s_idx = idx = cb->args[1];
1607 s_ip_idx = ip_idx = cb->args[2];
1608
1609 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1610 idx = 0;
1611 head = &net->dev_index_head[h];
1612 rcu_read_lock();
1613 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1614 net->dev_base_seq;
1615 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1616 if (idx < s_idx)
1617 goto cont;
1618 if (h > s_h || idx > s_idx)
1619 s_ip_idx = 0;
1620 in_dev = __in_dev_get_rcu(dev);
1621 if (!in_dev)
1622 goto cont;
1623
1624 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1625 ifa = ifa->ifa_next, ip_idx++) {
1626 if (ip_idx < s_ip_idx)
1627 continue;
1628 if (inet_fill_ifaddr(skb, ifa,
1629 NETLINK_CB(cb->skb).portid,
1630 cb->nlh->nlmsg_seq,
1631 RTM_NEWADDR, NLM_F_MULTI) < 0) {
1632 rcu_read_unlock();
1633 goto done;
1634 }
1635 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1636 }
1637 cont:
1638 idx++;
1639 }
1640 rcu_read_unlock();
1641 }
1642
1643 done:
1644 cb->args[0] = h;
1645 cb->args[1] = idx;
1646 cb->args[2] = ip_idx;
1647
1648 return skb->len;
1649 }
1650
1651 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1652 u32 portid)
1653 {
1654 struct sk_buff *skb;
1655 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1656 int err = -ENOBUFS;
1657 struct net *net;
1658
1659 net = dev_net(ifa->ifa_dev->dev);
1660 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1661 if (!skb)
1662 goto errout;
1663
1664 err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1665 if (err < 0) {
1666 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1667 WARN_ON(err == -EMSGSIZE);
1668 kfree_skb(skb);
1669 goto errout;
1670 }
1671 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1672 return;
1673 errout:
1674 if (err < 0)
1675 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1676 }
1677
1678 static size_t inet_get_link_af_size(const struct net_device *dev,
1679 u32 ext_filter_mask)
1680 {
1681 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1682
1683 if (!in_dev)
1684 return 0;
1685
1686 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1687 }
1688
1689 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1690 u32 ext_filter_mask)
1691 {
1692 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1693 struct nlattr *nla;
1694 int i;
1695
1696 if (!in_dev)
1697 return -ENODATA;
1698
1699 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1700 if (!nla)
1701 return -EMSGSIZE;
1702
1703 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1704 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1705
1706 return 0;
1707 }
1708
1709 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1710 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1711 };
1712
1713 static int inet_validate_link_af(const struct net_device *dev,
1714 const struct nlattr *nla)
1715 {
1716 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1717 int err, rem;
1718
1719 if (dev && !__in_dev_get_rtnl(dev))
1720 return -EAFNOSUPPORT;
1721
1722 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1723 if (err < 0)
1724 return err;
1725
1726 if (tb[IFLA_INET_CONF]) {
1727 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1728 int cfgid = nla_type(a);
1729
1730 if (nla_len(a) < 4)
1731 return -EINVAL;
1732
1733 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1734 return -EINVAL;
1735 }
1736 }
1737
1738 return 0;
1739 }
1740
1741 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1742 {
1743 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1744 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1745 int rem;
1746
1747 if (!in_dev)
1748 return -EAFNOSUPPORT;
1749
1750 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1751 BUG();
1752
1753 if (tb[IFLA_INET_CONF]) {
1754 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1755 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1756 }
1757
1758 return 0;
1759 }
1760
1761 static int inet_netconf_msgsize_devconf(int type)
1762 {
1763 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1764 + nla_total_size(4); /* NETCONFA_IFINDEX */
1765 bool all = false;
1766
1767 if (type == NETCONFA_ALL)
1768 all = true;
1769
1770 if (all || type == NETCONFA_FORWARDING)
1771 size += nla_total_size(4);
1772 if (all || type == NETCONFA_RP_FILTER)
1773 size += nla_total_size(4);
1774 if (all || type == NETCONFA_MC_FORWARDING)
1775 size += nla_total_size(4);
1776 if (all || type == NETCONFA_PROXY_NEIGH)
1777 size += nla_total_size(4);
1778 if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1779 size += nla_total_size(4);
1780
1781 return size;
1782 }
1783
1784 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1785 struct ipv4_devconf *devconf, u32 portid,
1786 u32 seq, int event, unsigned int flags,
1787 int type)
1788 {
1789 struct nlmsghdr *nlh;
1790 struct netconfmsg *ncm;
1791 bool all = false;
1792
1793 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1794 flags);
1795 if (!nlh)
1796 return -EMSGSIZE;
1797
1798 if (type == NETCONFA_ALL)
1799 all = true;
1800
1801 ncm = nlmsg_data(nlh);
1802 ncm->ncm_family = AF_INET;
1803
1804 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1805 goto nla_put_failure;
1806
1807 if (!devconf)
1808 goto out;
1809
1810 if ((all || type == NETCONFA_FORWARDING) &&
1811 nla_put_s32(skb, NETCONFA_FORWARDING,
1812 IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1813 goto nla_put_failure;
1814 if ((all || type == NETCONFA_RP_FILTER) &&
1815 nla_put_s32(skb, NETCONFA_RP_FILTER,
1816 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1817 goto nla_put_failure;
1818 if ((all || type == NETCONFA_MC_FORWARDING) &&
1819 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1820 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1821 goto nla_put_failure;
1822 if ((all || type == NETCONFA_PROXY_NEIGH) &&
1823 nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1824 IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1825 goto nla_put_failure;
1826 if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1827 nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1828 IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1829 goto nla_put_failure;
1830
1831 out:
1832 nlmsg_end(skb, nlh);
1833 return 0;
1834
1835 nla_put_failure:
1836 nlmsg_cancel(skb, nlh);
1837 return -EMSGSIZE;
1838 }
1839
1840 void inet_netconf_notify_devconf(struct net *net, int event, int type,
1841 int ifindex, struct ipv4_devconf *devconf)
1842 {
1843 struct sk_buff *skb;
1844 int err = -ENOBUFS;
1845
1846 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1847 if (!skb)
1848 goto errout;
1849
1850 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1851 event, 0, type);
1852 if (err < 0) {
1853 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1854 WARN_ON(err == -EMSGSIZE);
1855 kfree_skb(skb);
1856 goto errout;
1857 }
1858 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1859 return;
1860 errout:
1861 if (err < 0)
1862 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1863 }
1864
1865 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1866 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1867 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
1868 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
1869 [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) },
1870 [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) },
1871 };
1872
1873 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1874 struct nlmsghdr *nlh)
1875 {
1876 struct net *net = sock_net(in_skb->sk);
1877 struct nlattr *tb[NETCONFA_MAX+1];
1878 struct netconfmsg *ncm;
1879 struct sk_buff *skb;
1880 struct ipv4_devconf *devconf;
1881 struct in_device *in_dev;
1882 struct net_device *dev;
1883 int ifindex;
1884 int err;
1885
1886 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1887 devconf_ipv4_policy, NULL);
1888 if (err < 0)
1889 goto errout;
1890
1891 err = -EINVAL;
1892 if (!tb[NETCONFA_IFINDEX])
1893 goto errout;
1894
1895 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1896 switch (ifindex) {
1897 case NETCONFA_IFINDEX_ALL:
1898 devconf = net->ipv4.devconf_all;
1899 break;
1900 case NETCONFA_IFINDEX_DEFAULT:
1901 devconf = net->ipv4.devconf_dflt;
1902 break;
1903 default:
1904 dev = __dev_get_by_index(net, ifindex);
1905 if (!dev)
1906 goto errout;
1907 in_dev = __in_dev_get_rtnl(dev);
1908 if (!in_dev)
1909 goto errout;
1910 devconf = &in_dev->cnf;
1911 break;
1912 }
1913
1914 err = -ENOBUFS;
1915 skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1916 if (!skb)
1917 goto errout;
1918
1919 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1920 NETLINK_CB(in_skb).portid,
1921 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1922 NETCONFA_ALL);
1923 if (err < 0) {
1924 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1925 WARN_ON(err == -EMSGSIZE);
1926 kfree_skb(skb);
1927 goto errout;
1928 }
1929 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1930 errout:
1931 return err;
1932 }
1933
1934 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1935 struct netlink_callback *cb)
1936 {
1937 struct net *net = sock_net(skb->sk);
1938 int h, s_h;
1939 int idx, s_idx;
1940 struct net_device *dev;
1941 struct in_device *in_dev;
1942 struct hlist_head *head;
1943
1944 s_h = cb->args[0];
1945 s_idx = idx = cb->args[1];
1946
1947 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1948 idx = 0;
1949 head = &net->dev_index_head[h];
1950 rcu_read_lock();
1951 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1952 net->dev_base_seq;
1953 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1954 if (idx < s_idx)
1955 goto cont;
1956 in_dev = __in_dev_get_rcu(dev);
1957 if (!in_dev)
1958 goto cont;
1959
1960 if (inet_netconf_fill_devconf(skb, dev->ifindex,
1961 &in_dev->cnf,
1962 NETLINK_CB(cb->skb).portid,
1963 cb->nlh->nlmsg_seq,
1964 RTM_NEWNETCONF,
1965 NLM_F_MULTI,
1966 NETCONFA_ALL) < 0) {
1967 rcu_read_unlock();
1968 goto done;
1969 }
1970 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1971 cont:
1972 idx++;
1973 }
1974 rcu_read_unlock();
1975 }
1976 if (h == NETDEV_HASHENTRIES) {
1977 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1978 net->ipv4.devconf_all,
1979 NETLINK_CB(cb->skb).portid,
1980 cb->nlh->nlmsg_seq,
1981 RTM_NEWNETCONF, NLM_F_MULTI,
1982 NETCONFA_ALL) < 0)
1983 goto done;
1984 else
1985 h++;
1986 }
1987 if (h == NETDEV_HASHENTRIES + 1) {
1988 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1989 net->ipv4.devconf_dflt,
1990 NETLINK_CB(cb->skb).portid,
1991 cb->nlh->nlmsg_seq,
1992 RTM_NEWNETCONF, NLM_F_MULTI,
1993 NETCONFA_ALL) < 0)
1994 goto done;
1995 else
1996 h++;
1997 }
1998 done:
1999 cb->args[0] = h;
2000 cb->args[1] = idx;
2001
2002 return skb->len;
2003 }
2004
2005 #ifdef CONFIG_SYSCTL
2006
2007 static void devinet_copy_dflt_conf(struct net *net, int i)
2008 {
2009 struct net_device *dev;
2010
2011 rcu_read_lock();
2012 for_each_netdev_rcu(net, dev) {
2013 struct in_device *in_dev;
2014
2015 in_dev = __in_dev_get_rcu(dev);
2016 if (in_dev && !test_bit(i, in_dev->cnf.state))
2017 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2018 }
2019 rcu_read_unlock();
2020 }
2021
2022 /* called with RTNL locked */
2023 static void inet_forward_change(struct net *net)
2024 {
2025 struct net_device *dev;
2026 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2027
2028 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2029 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2030 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2031 NETCONFA_FORWARDING,
2032 NETCONFA_IFINDEX_ALL,
2033 net->ipv4.devconf_all);
2034 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2035 NETCONFA_FORWARDING,
2036 NETCONFA_IFINDEX_DEFAULT,
2037 net->ipv4.devconf_dflt);
2038
2039 for_each_netdev(net, dev) {
2040 struct in_device *in_dev;
2041
2042 if (on)
2043 dev_disable_lro(dev);
2044
2045 in_dev = __in_dev_get_rtnl(dev);
2046 if (in_dev) {
2047 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2048 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2049 NETCONFA_FORWARDING,
2050 dev->ifindex, &in_dev->cnf);
2051 }
2052 }
2053 }
2054
2055 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2056 {
2057 if (cnf == net->ipv4.devconf_dflt)
2058 return NETCONFA_IFINDEX_DEFAULT;
2059 else if (cnf == net->ipv4.devconf_all)
2060 return NETCONFA_IFINDEX_ALL;
2061 else {
2062 struct in_device *idev
2063 = container_of(cnf, struct in_device, cnf);
2064 return idev->dev->ifindex;
2065 }
2066 }
2067
2068 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2069 void __user *buffer,
2070 size_t *lenp, loff_t *ppos)
2071 {
2072 int old_value = *(int *)ctl->data;
2073 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2074 int new_value = *(int *)ctl->data;
2075
2076 if (write) {
2077 struct ipv4_devconf *cnf = ctl->extra1;
2078 struct net *net = ctl->extra2;
2079 int i = (int *)ctl->data - cnf->data;
2080 int ifindex;
2081
2082 set_bit(i, cnf->state);
2083
2084 if (cnf == net->ipv4.devconf_dflt)
2085 devinet_copy_dflt_conf(net, i);
2086 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2087 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2088 if ((new_value == 0) && (old_value != 0))
2089 rt_cache_flush(net);
2090
2091 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2092 new_value != old_value) {
2093 ifindex = devinet_conf_ifindex(net, cnf);
2094 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2095 NETCONFA_RP_FILTER,
2096 ifindex, cnf);
2097 }
2098 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2099 new_value != old_value) {
2100 ifindex = devinet_conf_ifindex(net, cnf);
2101 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2102 NETCONFA_PROXY_NEIGH,
2103 ifindex, cnf);
2104 }
2105 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2106 new_value != old_value) {
2107 ifindex = devinet_conf_ifindex(net, cnf);
2108 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2109 NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2110 ifindex, cnf);
2111 }
2112 }
2113
2114 return ret;
2115 }
2116
2117 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2118 void __user *buffer,
2119 size_t *lenp, loff_t *ppos)
2120 {
2121 int *valp = ctl->data;
2122 int val = *valp;
2123 loff_t pos = *ppos;
2124 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2125
2126 if (write && *valp != val) {
2127 struct net *net = ctl->extra2;
2128
2129 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2130 if (!rtnl_trylock()) {
2131 /* Restore the original values before restarting */
2132 *valp = val;
2133 *ppos = pos;
2134 return restart_syscall();
2135 }
2136 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2137 inet_forward_change(net);
2138 } else {
2139 struct ipv4_devconf *cnf = ctl->extra1;
2140 struct in_device *idev =
2141 container_of(cnf, struct in_device, cnf);
2142 if (*valp)
2143 dev_disable_lro(idev->dev);
2144 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2145 NETCONFA_FORWARDING,
2146 idev->dev->ifindex,
2147 cnf);
2148 }
2149 rtnl_unlock();
2150 rt_cache_flush(net);
2151 } else
2152 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2153 NETCONFA_FORWARDING,
2154 NETCONFA_IFINDEX_DEFAULT,
2155 net->ipv4.devconf_dflt);
2156 }
2157
2158 return ret;
2159 }
2160
2161 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2162 void __user *buffer,
2163 size_t *lenp, loff_t *ppos)
2164 {
2165 int *valp = ctl->data;
2166 int val = *valp;
2167 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2168 struct net *net = ctl->extra2;
2169
2170 if (write && *valp != val)
2171 rt_cache_flush(net);
2172
2173 return ret;
2174 }
2175
2176 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2177 { \
2178 .procname = name, \
2179 .data = ipv4_devconf.data + \
2180 IPV4_DEVCONF_ ## attr - 1, \
2181 .maxlen = sizeof(int), \
2182 .mode = mval, \
2183 .proc_handler = proc, \
2184 .extra1 = &ipv4_devconf, \
2185 }
2186
2187 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2188 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2189
2190 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2191 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2192
2193 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2194 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2195
2196 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2197 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2198
2199 static struct devinet_sysctl_table {
2200 struct ctl_table_header *sysctl_header;
2201 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2202 } devinet_sysctl = {
2203 .devinet_vars = {
2204 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2205 devinet_sysctl_forward),
2206 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2207
2208 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2209 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2210 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2211 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2212 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2213 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2214 "accept_source_route"),
2215 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2216 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2217 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2218 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2219 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2220 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2221 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2222 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2223 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2224 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2225 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2226 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2227 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2228 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2229 "force_igmp_version"),
2230 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2231 "igmpv2_unsolicited_report_interval"),
2232 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2233 "igmpv3_unsolicited_report_interval"),
2234 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2235 "ignore_routes_with_linkdown"),
2236 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2237 "drop_gratuitous_arp"),
2238
2239 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2240 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2241 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2242 "promote_secondaries"),
2243 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2244 "route_localnet"),
2245 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2246 "drop_unicast_in_l2_multicast"),
2247 },
2248 };
2249
2250 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2251 int ifindex, struct ipv4_devconf *p)
2252 {
2253 int i;
2254 struct devinet_sysctl_table *t;
2255 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2256
2257 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2258 if (!t)
2259 goto out;
2260
2261 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2262 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2263 t->devinet_vars[i].extra1 = p;
2264 t->devinet_vars[i].extra2 = net;
2265 }
2266
2267 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2268
2269 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2270 if (!t->sysctl_header)
2271 goto free;
2272
2273 p->sysctl = t;
2274
2275 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2276 ifindex, p);
2277 return 0;
2278
2279 free:
2280 kfree(t);
2281 out:
2282 return -ENOBUFS;
2283 }
2284
2285 static void __devinet_sysctl_unregister(struct net *net,
2286 struct ipv4_devconf *cnf, int ifindex)
2287 {
2288 struct devinet_sysctl_table *t = cnf->sysctl;
2289
2290 if (t) {
2291 cnf->sysctl = NULL;
2292 unregister_net_sysctl_table(t->sysctl_header);
2293 kfree(t);
2294 }
2295
2296 inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2297 }
2298
2299 static int devinet_sysctl_register(struct in_device *idev)
2300 {
2301 int err;
2302
2303 if (!sysctl_dev_name_is_allowed(idev->dev->name))
2304 return -EINVAL;
2305
2306 err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2307 if (err)
2308 return err;
2309 err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2310 idev->dev->ifindex, &idev->cnf);
2311 if (err)
2312 neigh_sysctl_unregister(idev->arp_parms);
2313 return err;
2314 }
2315
2316 static void devinet_sysctl_unregister(struct in_device *idev)
2317 {
2318 struct net *net = dev_net(idev->dev);
2319
2320 __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2321 neigh_sysctl_unregister(idev->arp_parms);
2322 }
2323
2324 static struct ctl_table ctl_forward_entry[] = {
2325 {
2326 .procname = "ip_forward",
2327 .data = &ipv4_devconf.data[
2328 IPV4_DEVCONF_FORWARDING - 1],
2329 .maxlen = sizeof(int),
2330 .mode = 0644,
2331 .proc_handler = devinet_sysctl_forward,
2332 .extra1 = &ipv4_devconf,
2333 .extra2 = &init_net,
2334 },
2335 { },
2336 };
2337 #endif
2338
2339 static __net_init int devinet_init_net(struct net *net)
2340 {
2341 int err;
2342 struct ipv4_devconf *all, *dflt;
2343 #ifdef CONFIG_SYSCTL
2344 struct ctl_table *tbl = ctl_forward_entry;
2345 struct ctl_table_header *forw_hdr;
2346 #endif
2347
2348 err = -ENOMEM;
2349 all = &ipv4_devconf;
2350 dflt = &ipv4_devconf_dflt;
2351
2352 if (!net_eq(net, &init_net)) {
2353 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2354 if (!all)
2355 goto err_alloc_all;
2356
2357 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2358 if (!dflt)
2359 goto err_alloc_dflt;
2360
2361 #ifdef CONFIG_SYSCTL
2362 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2363 if (!tbl)
2364 goto err_alloc_ctl;
2365
2366 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2367 tbl[0].extra1 = all;
2368 tbl[0].extra2 = net;
2369 #endif
2370 }
2371
2372 #ifdef CONFIG_SYSCTL
2373 err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2374 if (err < 0)
2375 goto err_reg_all;
2376
2377 err = __devinet_sysctl_register(net, "default",
2378 NETCONFA_IFINDEX_DEFAULT, dflt);
2379 if (err < 0)
2380 goto err_reg_dflt;
2381
2382 err = -ENOMEM;
2383 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2384 if (!forw_hdr)
2385 goto err_reg_ctl;
2386 net->ipv4.forw_hdr = forw_hdr;
2387 #endif
2388
2389 net->ipv4.devconf_all = all;
2390 net->ipv4.devconf_dflt = dflt;
2391 return 0;
2392
2393 #ifdef CONFIG_SYSCTL
2394 err_reg_ctl:
2395 __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2396 err_reg_dflt:
2397 __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2398 err_reg_all:
2399 if (tbl != ctl_forward_entry)
2400 kfree(tbl);
2401 err_alloc_ctl:
2402 #endif
2403 if (dflt != &ipv4_devconf_dflt)
2404 kfree(dflt);
2405 err_alloc_dflt:
2406 if (all != &ipv4_devconf)
2407 kfree(all);
2408 err_alloc_all:
2409 return err;
2410 }
2411
2412 static __net_exit void devinet_exit_net(struct net *net)
2413 {
2414 #ifdef CONFIG_SYSCTL
2415 struct ctl_table *tbl;
2416
2417 tbl = net->ipv4.forw_hdr->ctl_table_arg;
2418 unregister_net_sysctl_table(net->ipv4.forw_hdr);
2419 __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2420 NETCONFA_IFINDEX_DEFAULT);
2421 __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2422 NETCONFA_IFINDEX_ALL);
2423 kfree(tbl);
2424 #endif
2425 kfree(net->ipv4.devconf_dflt);
2426 kfree(net->ipv4.devconf_all);
2427 }
2428
2429 static __net_initdata struct pernet_operations devinet_ops = {
2430 .init = devinet_init_net,
2431 .exit = devinet_exit_net,
2432 };
2433
2434 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2435 .family = AF_INET,
2436 .fill_link_af = inet_fill_link_af,
2437 .get_link_af_size = inet_get_link_af_size,
2438 .validate_link_af = inet_validate_link_af,
2439 .set_link_af = inet_set_link_af,
2440 };
2441
2442 void __init devinet_init(void)
2443 {
2444 int i;
2445
2446 for (i = 0; i < IN4_ADDR_HSIZE; i++)
2447 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2448
2449 register_pernet_subsys(&devinet_ops);
2450
2451 register_gifconf(PF_INET, inet_gifconf);
2452 register_netdevice_notifier(&ip_netdev_notifier);
2453
2454 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2455
2456 rtnl_af_register(&inet_af_ops);
2457
2458 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2459 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2460 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2461 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2462 inet_netconf_dump_devconf, NULL);
2463 }