]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - net/core/dev.c
[PATCH] Notifier chain update: API changes
[mirror_ubuntu-bionic-kernel.git] / net / core / dev.c
1 /*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
74
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/config.h>
80 #include <linux/cpu.h>
81 #include <linux/types.h>
82 #include <linux/kernel.h>
83 #include <linux/sched.h>
84 #include <linux/mutex.h>
85 #include <linux/string.h>
86 #include <linux/mm.h>
87 #include <linux/socket.h>
88 #include <linux/sockios.h>
89 #include <linux/errno.h>
90 #include <linux/interrupt.h>
91 #include <linux/if_ether.h>
92 #include <linux/netdevice.h>
93 #include <linux/etherdevice.h>
94 #include <linux/notifier.h>
95 #include <linux/skbuff.h>
96 #include <net/sock.h>
97 #include <linux/rtnetlink.h>
98 #include <linux/proc_fs.h>
99 #include <linux/seq_file.h>
100 #include <linux/stat.h>
101 #include <linux/if_bridge.h>
102 #include <linux/divert.h>
103 #include <net/dst.h>
104 #include <net/pkt_sched.h>
105 #include <net/checksum.h>
106 #include <linux/highmem.h>
107 #include <linux/init.h>
108 #include <linux/kmod.h>
109 #include <linux/module.h>
110 #include <linux/kallsyms.h>
111 #include <linux/netpoll.h>
112 #include <linux/rcupdate.h>
113 #include <linux/delay.h>
114 #include <linux/wireless.h>
115 #include <net/iw_handler.h>
116 #include <asm/current.h>
117 #include <linux/audit.h>
118
119 /*
120 * The list of packet types we will receive (as opposed to discard)
121 * and the routines to invoke.
122 *
123 * Why 16. Because with 16 the only overlap we get on a hash of the
124 * low nibble of the protocol value is RARP/SNAP/X.25.
125 *
126 * NOTE: That is no longer true with the addition of VLAN tags. Not
127 * sure which should go first, but I bet it won't make much
128 * difference if we are running VLANs. The good news is that
129 * this protocol won't be in the list unless compiled in, so
130 * the average user (w/out VLANs) will not be adversly affected.
131 * --BLG
132 *
133 * 0800 IP
134 * 8100 802.1Q VLAN
135 * 0001 802.3
136 * 0002 AX.25
137 * 0004 802.2
138 * 8035 RARP
139 * 0005 SNAP
140 * 0805 X.25
141 * 0806 ARP
142 * 8137 IPX
143 * 0009 Localtalk
144 * 86DD IPv6
145 */
146
147 static DEFINE_SPINLOCK(ptype_lock);
148 static struct list_head ptype_base[16]; /* 16 way hashed list */
149 static struct list_head ptype_all; /* Taps */
150
151 /*
152 * The @dev_base list is protected by @dev_base_lock and the rtln
153 * semaphore.
154 *
155 * Pure readers hold dev_base_lock for reading.
156 *
157 * Writers must hold the rtnl semaphore while they loop through the
158 * dev_base list, and hold dev_base_lock for writing when they do the
159 * actual updates. This allows pure readers to access the list even
160 * while a writer is preparing to update it.
161 *
162 * To put it another way, dev_base_lock is held for writing only to
163 * protect against pure readers; the rtnl semaphore provides the
164 * protection against other writers.
165 *
166 * See, for example usages, register_netdevice() and
167 * unregister_netdevice(), which must be called with the rtnl
168 * semaphore held.
169 */
170 struct net_device *dev_base;
171 static struct net_device **dev_tail = &dev_base;
172 DEFINE_RWLOCK(dev_base_lock);
173
174 EXPORT_SYMBOL(dev_base);
175 EXPORT_SYMBOL(dev_base_lock);
176
177 #define NETDEV_HASHBITS 8
178 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
179 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
180
181 static inline struct hlist_head *dev_name_hash(const char *name)
182 {
183 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
184 return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
185 }
186
187 static inline struct hlist_head *dev_index_hash(int ifindex)
188 {
189 return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
190 }
191
192 /*
193 * Our notifier list
194 */
195
196 static BLOCKING_NOTIFIER_HEAD(netdev_chain);
197
198 /*
199 * Device drivers call our routines to queue packets here. We empty the
200 * queue in the local softnet handler.
201 */
202 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
203
204 #ifdef CONFIG_SYSFS
205 extern int netdev_sysfs_init(void);
206 extern int netdev_register_sysfs(struct net_device *);
207 extern void netdev_unregister_sysfs(struct net_device *);
208 #else
209 #define netdev_sysfs_init() (0)
210 #define netdev_register_sysfs(dev) (0)
211 #define netdev_unregister_sysfs(dev) do { } while(0)
212 #endif
213
214
215 /*******************************************************************************
216
217 Protocol management and registration routines
218
219 *******************************************************************************/
220
221 /*
222 * For efficiency
223 */
224
225 int netdev_nit;
226
227 /*
228 * Add a protocol ID to the list. Now that the input handler is
229 * smarter we can dispense with all the messy stuff that used to be
230 * here.
231 *
232 * BEWARE!!! Protocol handlers, mangling input packets,
233 * MUST BE last in hash buckets and checking protocol handlers
234 * MUST start from promiscuous ptype_all chain in net_bh.
235 * It is true now, do not change it.
236 * Explanation follows: if protocol handler, mangling packet, will
237 * be the first on list, it is not able to sense, that packet
238 * is cloned and should be copied-on-write, so that it will
239 * change it and subsequent readers will get broken packet.
240 * --ANK (980803)
241 */
242
243 /**
244 * dev_add_pack - add packet handler
245 * @pt: packet type declaration
246 *
247 * Add a protocol handler to the networking stack. The passed &packet_type
248 * is linked into kernel lists and may not be freed until it has been
249 * removed from the kernel lists.
250 *
251 * This call does not sleep therefore it can not
252 * guarantee all CPU's that are in middle of receiving packets
253 * will see the new packet type (until the next received packet).
254 */
255
256 void dev_add_pack(struct packet_type *pt)
257 {
258 int hash;
259
260 spin_lock_bh(&ptype_lock);
261 if (pt->type == htons(ETH_P_ALL)) {
262 netdev_nit++;
263 list_add_rcu(&pt->list, &ptype_all);
264 } else {
265 hash = ntohs(pt->type) & 15;
266 list_add_rcu(&pt->list, &ptype_base[hash]);
267 }
268 spin_unlock_bh(&ptype_lock);
269 }
270
271 /**
272 * __dev_remove_pack - remove packet handler
273 * @pt: packet type declaration
274 *
275 * Remove a protocol handler that was previously added to the kernel
276 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
277 * from the kernel lists and can be freed or reused once this function
278 * returns.
279 *
280 * The packet type might still be in use by receivers
281 * and must not be freed until after all the CPU's have gone
282 * through a quiescent state.
283 */
284 void __dev_remove_pack(struct packet_type *pt)
285 {
286 struct list_head *head;
287 struct packet_type *pt1;
288
289 spin_lock_bh(&ptype_lock);
290
291 if (pt->type == htons(ETH_P_ALL)) {
292 netdev_nit--;
293 head = &ptype_all;
294 } else
295 head = &ptype_base[ntohs(pt->type) & 15];
296
297 list_for_each_entry(pt1, head, list) {
298 if (pt == pt1) {
299 list_del_rcu(&pt->list);
300 goto out;
301 }
302 }
303
304 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
305 out:
306 spin_unlock_bh(&ptype_lock);
307 }
308 /**
309 * dev_remove_pack - remove packet handler
310 * @pt: packet type declaration
311 *
312 * Remove a protocol handler that was previously added to the kernel
313 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
314 * from the kernel lists and can be freed or reused once this function
315 * returns.
316 *
317 * This call sleeps to guarantee that no CPU is looking at the packet
318 * type after return.
319 */
320 void dev_remove_pack(struct packet_type *pt)
321 {
322 __dev_remove_pack(pt);
323
324 synchronize_net();
325 }
326
327 /******************************************************************************
328
329 Device Boot-time Settings Routines
330
331 *******************************************************************************/
332
333 /* Boot time configuration table */
334 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
335
336 /**
337 * netdev_boot_setup_add - add new setup entry
338 * @name: name of the device
339 * @map: configured settings for the device
340 *
341 * Adds new setup entry to the dev_boot_setup list. The function
342 * returns 0 on error and 1 on success. This is a generic routine to
343 * all netdevices.
344 */
345 static int netdev_boot_setup_add(char *name, struct ifmap *map)
346 {
347 struct netdev_boot_setup *s;
348 int i;
349
350 s = dev_boot_setup;
351 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
352 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
353 memset(s[i].name, 0, sizeof(s[i].name));
354 strcpy(s[i].name, name);
355 memcpy(&s[i].map, map, sizeof(s[i].map));
356 break;
357 }
358 }
359
360 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
361 }
362
363 /**
364 * netdev_boot_setup_check - check boot time settings
365 * @dev: the netdevice
366 *
367 * Check boot time settings for the device.
368 * The found settings are set for the device to be used
369 * later in the device probing.
370 * Returns 0 if no settings found, 1 if they are.
371 */
372 int netdev_boot_setup_check(struct net_device *dev)
373 {
374 struct netdev_boot_setup *s = dev_boot_setup;
375 int i;
376
377 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
378 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
379 !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
380 dev->irq = s[i].map.irq;
381 dev->base_addr = s[i].map.base_addr;
382 dev->mem_start = s[i].map.mem_start;
383 dev->mem_end = s[i].map.mem_end;
384 return 1;
385 }
386 }
387 return 0;
388 }
389
390
391 /**
392 * netdev_boot_base - get address from boot time settings
393 * @prefix: prefix for network device
394 * @unit: id for network device
395 *
396 * Check boot time settings for the base address of device.
397 * The found settings are set for the device to be used
398 * later in the device probing.
399 * Returns 0 if no settings found.
400 */
401 unsigned long netdev_boot_base(const char *prefix, int unit)
402 {
403 const struct netdev_boot_setup *s = dev_boot_setup;
404 char name[IFNAMSIZ];
405 int i;
406
407 sprintf(name, "%s%d", prefix, unit);
408
409 /*
410 * If device already registered then return base of 1
411 * to indicate not to probe for this interface
412 */
413 if (__dev_get_by_name(name))
414 return 1;
415
416 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
417 if (!strcmp(name, s[i].name))
418 return s[i].map.base_addr;
419 return 0;
420 }
421
422 /*
423 * Saves at boot time configured settings for any netdevice.
424 */
425 int __init netdev_boot_setup(char *str)
426 {
427 int ints[5];
428 struct ifmap map;
429
430 str = get_options(str, ARRAY_SIZE(ints), ints);
431 if (!str || !*str)
432 return 0;
433
434 /* Save settings */
435 memset(&map, 0, sizeof(map));
436 if (ints[0] > 0)
437 map.irq = ints[1];
438 if (ints[0] > 1)
439 map.base_addr = ints[2];
440 if (ints[0] > 2)
441 map.mem_start = ints[3];
442 if (ints[0] > 3)
443 map.mem_end = ints[4];
444
445 /* Add new entry to the list */
446 return netdev_boot_setup_add(str, &map);
447 }
448
449 __setup("netdev=", netdev_boot_setup);
450
451 /*******************************************************************************
452
453 Device Interface Subroutines
454
455 *******************************************************************************/
456
457 /**
458 * __dev_get_by_name - find a device by its name
459 * @name: name to find
460 *
461 * Find an interface by name. Must be called under RTNL semaphore
462 * or @dev_base_lock. If the name is found a pointer to the device
463 * is returned. If the name is not found then %NULL is returned. The
464 * reference counters are not incremented so the caller must be
465 * careful with locks.
466 */
467
468 struct net_device *__dev_get_by_name(const char *name)
469 {
470 struct hlist_node *p;
471
472 hlist_for_each(p, dev_name_hash(name)) {
473 struct net_device *dev
474 = hlist_entry(p, struct net_device, name_hlist);
475 if (!strncmp(dev->name, name, IFNAMSIZ))
476 return dev;
477 }
478 return NULL;
479 }
480
481 /**
482 * dev_get_by_name - find a device by its name
483 * @name: name to find
484 *
485 * Find an interface by name. This can be called from any
486 * context and does its own locking. The returned handle has
487 * the usage count incremented and the caller must use dev_put() to
488 * release it when it is no longer needed. %NULL is returned if no
489 * matching device is found.
490 */
491
492 struct net_device *dev_get_by_name(const char *name)
493 {
494 struct net_device *dev;
495
496 read_lock(&dev_base_lock);
497 dev = __dev_get_by_name(name);
498 if (dev)
499 dev_hold(dev);
500 read_unlock(&dev_base_lock);
501 return dev;
502 }
503
504 /**
505 * __dev_get_by_index - find a device by its ifindex
506 * @ifindex: index of device
507 *
508 * Search for an interface by index. Returns %NULL if the device
509 * is not found or a pointer to the device. The device has not
510 * had its reference counter increased so the caller must be careful
511 * about locking. The caller must hold either the RTNL semaphore
512 * or @dev_base_lock.
513 */
514
515 struct net_device *__dev_get_by_index(int ifindex)
516 {
517 struct hlist_node *p;
518
519 hlist_for_each(p, dev_index_hash(ifindex)) {
520 struct net_device *dev
521 = hlist_entry(p, struct net_device, index_hlist);
522 if (dev->ifindex == ifindex)
523 return dev;
524 }
525 return NULL;
526 }
527
528
529 /**
530 * dev_get_by_index - find a device by its ifindex
531 * @ifindex: index of device
532 *
533 * Search for an interface by index. Returns NULL if the device
534 * is not found or a pointer to the device. The device returned has
535 * had a reference added and the pointer is safe until the user calls
536 * dev_put to indicate they have finished with it.
537 */
538
539 struct net_device *dev_get_by_index(int ifindex)
540 {
541 struct net_device *dev;
542
543 read_lock(&dev_base_lock);
544 dev = __dev_get_by_index(ifindex);
545 if (dev)
546 dev_hold(dev);
547 read_unlock(&dev_base_lock);
548 return dev;
549 }
550
551 /**
552 * dev_getbyhwaddr - find a device by its hardware address
553 * @type: media type of device
554 * @ha: hardware address
555 *
556 * Search for an interface by MAC address. Returns NULL if the device
557 * is not found or a pointer to the device. The caller must hold the
558 * rtnl semaphore. The returned device has not had its ref count increased
559 * and the caller must therefore be careful about locking
560 *
561 * BUGS:
562 * If the API was consistent this would be __dev_get_by_hwaddr
563 */
564
565 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
566 {
567 struct net_device *dev;
568
569 ASSERT_RTNL();
570
571 for (dev = dev_base; dev; dev = dev->next)
572 if (dev->type == type &&
573 !memcmp(dev->dev_addr, ha, dev->addr_len))
574 break;
575 return dev;
576 }
577
578 EXPORT_SYMBOL(dev_getbyhwaddr);
579
580 struct net_device *dev_getfirstbyhwtype(unsigned short type)
581 {
582 struct net_device *dev;
583
584 rtnl_lock();
585 for (dev = dev_base; dev; dev = dev->next) {
586 if (dev->type == type) {
587 dev_hold(dev);
588 break;
589 }
590 }
591 rtnl_unlock();
592 return dev;
593 }
594
595 EXPORT_SYMBOL(dev_getfirstbyhwtype);
596
597 /**
598 * dev_get_by_flags - find any device with given flags
599 * @if_flags: IFF_* values
600 * @mask: bitmask of bits in if_flags to check
601 *
602 * Search for any interface with the given flags. Returns NULL if a device
603 * is not found or a pointer to the device. The device returned has
604 * had a reference added and the pointer is safe until the user calls
605 * dev_put to indicate they have finished with it.
606 */
607
608 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
609 {
610 struct net_device *dev;
611
612 read_lock(&dev_base_lock);
613 for (dev = dev_base; dev != NULL; dev = dev->next) {
614 if (((dev->flags ^ if_flags) & mask) == 0) {
615 dev_hold(dev);
616 break;
617 }
618 }
619 read_unlock(&dev_base_lock);
620 return dev;
621 }
622
623 /**
624 * dev_valid_name - check if name is okay for network device
625 * @name: name string
626 *
627 * Network device names need to be valid file names to
628 * to allow sysfs to work
629 */
630 int dev_valid_name(const char *name)
631 {
632 return !(*name == '\0'
633 || !strcmp(name, ".")
634 || !strcmp(name, "..")
635 || strchr(name, '/'));
636 }
637
638 /**
639 * dev_alloc_name - allocate a name for a device
640 * @dev: device
641 * @name: name format string
642 *
643 * Passed a format string - eg "lt%d" it will try and find a suitable
644 * id. Not efficient for many devices, not called a lot. The caller
645 * must hold the dev_base or rtnl lock while allocating the name and
646 * adding the device in order to avoid duplicates. Returns the number
647 * of the unit assigned or a negative errno code.
648 */
649
650 int dev_alloc_name(struct net_device *dev, const char *name)
651 {
652 int i = 0;
653 char buf[IFNAMSIZ];
654 const char *p;
655 const int max_netdevices = 8*PAGE_SIZE;
656 long *inuse;
657 struct net_device *d;
658
659 p = strnchr(name, IFNAMSIZ-1, '%');
660 if (p) {
661 /*
662 * Verify the string as this thing may have come from
663 * the user. There must be either one "%d" and no other "%"
664 * characters.
665 */
666 if (p[1] != 'd' || strchr(p + 2, '%'))
667 return -EINVAL;
668
669 /* Use one page as a bit array of possible slots */
670 inuse = (long *) get_zeroed_page(GFP_ATOMIC);
671 if (!inuse)
672 return -ENOMEM;
673
674 for (d = dev_base; d; d = d->next) {
675 if (!sscanf(d->name, name, &i))
676 continue;
677 if (i < 0 || i >= max_netdevices)
678 continue;
679
680 /* avoid cases where sscanf is not exact inverse of printf */
681 snprintf(buf, sizeof(buf), name, i);
682 if (!strncmp(buf, d->name, IFNAMSIZ))
683 set_bit(i, inuse);
684 }
685
686 i = find_first_zero_bit(inuse, max_netdevices);
687 free_page((unsigned long) inuse);
688 }
689
690 snprintf(buf, sizeof(buf), name, i);
691 if (!__dev_get_by_name(buf)) {
692 strlcpy(dev->name, buf, IFNAMSIZ);
693 return i;
694 }
695
696 /* It is possible to run out of possible slots
697 * when the name is long and there isn't enough space left
698 * for the digits, or if all bits are used.
699 */
700 return -ENFILE;
701 }
702
703
704 /**
705 * dev_change_name - change name of a device
706 * @dev: device
707 * @newname: name (or format string) must be at least IFNAMSIZ
708 *
709 * Change name of a device, can pass format strings "eth%d".
710 * for wildcarding.
711 */
712 int dev_change_name(struct net_device *dev, char *newname)
713 {
714 int err = 0;
715
716 ASSERT_RTNL();
717
718 if (dev->flags & IFF_UP)
719 return -EBUSY;
720
721 if (!dev_valid_name(newname))
722 return -EINVAL;
723
724 if (strchr(newname, '%')) {
725 err = dev_alloc_name(dev, newname);
726 if (err < 0)
727 return err;
728 strcpy(newname, dev->name);
729 }
730 else if (__dev_get_by_name(newname))
731 return -EEXIST;
732 else
733 strlcpy(dev->name, newname, IFNAMSIZ);
734
735 err = class_device_rename(&dev->class_dev, dev->name);
736 if (!err) {
737 hlist_del(&dev->name_hlist);
738 hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
739 blocking_notifier_call_chain(&netdev_chain,
740 NETDEV_CHANGENAME, dev);
741 }
742
743 return err;
744 }
745
746 /**
747 * netdev_features_change - device changes fatures
748 * @dev: device to cause notification
749 *
750 * Called to indicate a device has changed features.
751 */
752 void netdev_features_change(struct net_device *dev)
753 {
754 blocking_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
755 }
756 EXPORT_SYMBOL(netdev_features_change);
757
758 /**
759 * netdev_state_change - device changes state
760 * @dev: device to cause notification
761 *
762 * Called to indicate a device has changed state. This function calls
763 * the notifier chains for netdev_chain and sends a NEWLINK message
764 * to the routing socket.
765 */
766 void netdev_state_change(struct net_device *dev)
767 {
768 if (dev->flags & IFF_UP) {
769 blocking_notifier_call_chain(&netdev_chain,
770 NETDEV_CHANGE, dev);
771 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
772 }
773 }
774
775 /**
776 * dev_load - load a network module
777 * @name: name of interface
778 *
779 * If a network interface is not present and the process has suitable
780 * privileges this function loads the module. If module loading is not
781 * available in this kernel then it becomes a nop.
782 */
783
784 void dev_load(const char *name)
785 {
786 struct net_device *dev;
787
788 read_lock(&dev_base_lock);
789 dev = __dev_get_by_name(name);
790 read_unlock(&dev_base_lock);
791
792 if (!dev && capable(CAP_SYS_MODULE))
793 request_module("%s", name);
794 }
795
796 static int default_rebuild_header(struct sk_buff *skb)
797 {
798 printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
799 skb->dev ? skb->dev->name : "NULL!!!");
800 kfree_skb(skb);
801 return 1;
802 }
803
804
805 /**
806 * dev_open - prepare an interface for use.
807 * @dev: device to open
808 *
809 * Takes a device from down to up state. The device's private open
810 * function is invoked and then the multicast lists are loaded. Finally
811 * the device is moved into the up state and a %NETDEV_UP message is
812 * sent to the netdev notifier chain.
813 *
814 * Calling this function on an active interface is a nop. On a failure
815 * a negative errno code is returned.
816 */
817 int dev_open(struct net_device *dev)
818 {
819 int ret = 0;
820
821 /*
822 * Is it already up?
823 */
824
825 if (dev->flags & IFF_UP)
826 return 0;
827
828 /*
829 * Is it even present?
830 */
831 if (!netif_device_present(dev))
832 return -ENODEV;
833
834 /*
835 * Call device private open method
836 */
837 set_bit(__LINK_STATE_START, &dev->state);
838 if (dev->open) {
839 ret = dev->open(dev);
840 if (ret)
841 clear_bit(__LINK_STATE_START, &dev->state);
842 }
843
844 /*
845 * If it went open OK then:
846 */
847
848 if (!ret) {
849 /*
850 * Set the flags.
851 */
852 dev->flags |= IFF_UP;
853
854 /*
855 * Initialize multicasting status
856 */
857 dev_mc_upload(dev);
858
859 /*
860 * Wakeup transmit queue engine
861 */
862 dev_activate(dev);
863
864 /*
865 * ... and announce new interface.
866 */
867 blocking_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
868 }
869 return ret;
870 }
871
872 /**
873 * dev_close - shutdown an interface.
874 * @dev: device to shutdown
875 *
876 * This function moves an active device into down state. A
877 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
878 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
879 * chain.
880 */
881 int dev_close(struct net_device *dev)
882 {
883 if (!(dev->flags & IFF_UP))
884 return 0;
885
886 /*
887 * Tell people we are going down, so that they can
888 * prepare to death, when device is still operating.
889 */
890 blocking_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
891
892 dev_deactivate(dev);
893
894 clear_bit(__LINK_STATE_START, &dev->state);
895
896 /* Synchronize to scheduled poll. We cannot touch poll list,
897 * it can be even on different cpu. So just clear netif_running(),
898 * and wait when poll really will happen. Actually, the best place
899 * for this is inside dev->stop() after device stopped its irq
900 * engine, but this requires more changes in devices. */
901
902 smp_mb__after_clear_bit(); /* Commit netif_running(). */
903 while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
904 /* No hurry. */
905 msleep(1);
906 }
907
908 /*
909 * Call the device specific close. This cannot fail.
910 * Only if device is UP
911 *
912 * We allow it to be called even after a DETACH hot-plug
913 * event.
914 */
915 if (dev->stop)
916 dev->stop(dev);
917
918 /*
919 * Device is now down.
920 */
921
922 dev->flags &= ~IFF_UP;
923
924 /*
925 * Tell people we are down
926 */
927 blocking_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
928
929 return 0;
930 }
931
932
933 /*
934 * Device change register/unregister. These are not inline or static
935 * as we export them to the world.
936 */
937
938 /**
939 * register_netdevice_notifier - register a network notifier block
940 * @nb: notifier
941 *
942 * Register a notifier to be called when network device events occur.
943 * The notifier passed is linked into the kernel structures and must
944 * not be reused until it has been unregistered. A negative errno code
945 * is returned on a failure.
946 *
947 * When registered all registration and up events are replayed
948 * to the new notifier to allow device to have a race free
949 * view of the network device list.
950 */
951
952 int register_netdevice_notifier(struct notifier_block *nb)
953 {
954 struct net_device *dev;
955 int err;
956
957 rtnl_lock();
958 err = blocking_notifier_chain_register(&netdev_chain, nb);
959 if (!err) {
960 for (dev = dev_base; dev; dev = dev->next) {
961 nb->notifier_call(nb, NETDEV_REGISTER, dev);
962
963 if (dev->flags & IFF_UP)
964 nb->notifier_call(nb, NETDEV_UP, dev);
965 }
966 }
967 rtnl_unlock();
968 return err;
969 }
970
971 /**
972 * unregister_netdevice_notifier - unregister a network notifier block
973 * @nb: notifier
974 *
975 * Unregister a notifier previously registered by
976 * register_netdevice_notifier(). The notifier is unlinked into the
977 * kernel structures and may then be reused. A negative errno code
978 * is returned on a failure.
979 */
980
981 int unregister_netdevice_notifier(struct notifier_block *nb)
982 {
983 int err;
984
985 rtnl_lock();
986 err = blocking_notifier_chain_unregister(&netdev_chain, nb);
987 rtnl_unlock();
988 return err;
989 }
990
991 /**
992 * call_netdevice_notifiers - call all network notifier blocks
993 * @val: value passed unmodified to notifier function
994 * @v: pointer passed unmodified to notifier function
995 *
996 * Call all network notifier blocks. Parameters and return value
997 * are as for blocking_notifier_call_chain().
998 */
999
1000 int call_netdevice_notifiers(unsigned long val, void *v)
1001 {
1002 return blocking_notifier_call_chain(&netdev_chain, val, v);
1003 }
1004
1005 /* When > 0 there are consumers of rx skb time stamps */
1006 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1007
1008 void net_enable_timestamp(void)
1009 {
1010 atomic_inc(&netstamp_needed);
1011 }
1012
1013 void net_disable_timestamp(void)
1014 {
1015 atomic_dec(&netstamp_needed);
1016 }
1017
1018 void __net_timestamp(struct sk_buff *skb)
1019 {
1020 struct timeval tv;
1021
1022 do_gettimeofday(&tv);
1023 skb_set_timestamp(skb, &tv);
1024 }
1025 EXPORT_SYMBOL(__net_timestamp);
1026
1027 static inline void net_timestamp(struct sk_buff *skb)
1028 {
1029 if (atomic_read(&netstamp_needed))
1030 __net_timestamp(skb);
1031 else {
1032 skb->tstamp.off_sec = 0;
1033 skb->tstamp.off_usec = 0;
1034 }
1035 }
1036
1037 /*
1038 * Support routine. Sends outgoing frames to any network
1039 * taps currently in use.
1040 */
1041
1042 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1043 {
1044 struct packet_type *ptype;
1045
1046 net_timestamp(skb);
1047
1048 rcu_read_lock();
1049 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1050 /* Never send packets back to the socket
1051 * they originated from - MvS (miquels@drinkel.ow.org)
1052 */
1053 if ((ptype->dev == dev || !ptype->dev) &&
1054 (ptype->af_packet_priv == NULL ||
1055 (struct sock *)ptype->af_packet_priv != skb->sk)) {
1056 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1057 if (!skb2)
1058 break;
1059
1060 /* skb->nh should be correctly
1061 set by sender, so that the second statement is
1062 just protection against buggy protocols.
1063 */
1064 skb2->mac.raw = skb2->data;
1065
1066 if (skb2->nh.raw < skb2->data ||
1067 skb2->nh.raw > skb2->tail) {
1068 if (net_ratelimit())
1069 printk(KERN_CRIT "protocol %04x is "
1070 "buggy, dev %s\n",
1071 skb2->protocol, dev->name);
1072 skb2->nh.raw = skb2->data;
1073 }
1074
1075 skb2->h.raw = skb2->nh.raw;
1076 skb2->pkt_type = PACKET_OUTGOING;
1077 ptype->func(skb2, skb->dev, ptype, skb->dev);
1078 }
1079 }
1080 rcu_read_unlock();
1081 }
1082
1083 /*
1084 * Invalidate hardware checksum when packet is to be mangled, and
1085 * complete checksum manually on outgoing path.
1086 */
1087 int skb_checksum_help(struct sk_buff *skb, int inward)
1088 {
1089 unsigned int csum;
1090 int ret = 0, offset = skb->h.raw - skb->data;
1091
1092 if (inward) {
1093 skb->ip_summed = CHECKSUM_NONE;
1094 goto out;
1095 }
1096
1097 if (skb_cloned(skb)) {
1098 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1099 if (ret)
1100 goto out;
1101 }
1102
1103 BUG_ON(offset > (int)skb->len);
1104 csum = skb_checksum(skb, offset, skb->len-offset, 0);
1105
1106 offset = skb->tail - skb->h.raw;
1107 BUG_ON(offset <= 0);
1108 BUG_ON(skb->csum + 2 > offset);
1109
1110 *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
1111 skb->ip_summed = CHECKSUM_NONE;
1112 out:
1113 return ret;
1114 }
1115
1116 /* Take action when hardware reception checksum errors are detected. */
1117 #ifdef CONFIG_BUG
1118 void netdev_rx_csum_fault(struct net_device *dev)
1119 {
1120 if (net_ratelimit()) {
1121 printk(KERN_ERR "%s: hw csum failure.\n",
1122 dev ? dev->name : "<unknown>");
1123 dump_stack();
1124 }
1125 }
1126 EXPORT_SYMBOL(netdev_rx_csum_fault);
1127 #endif
1128
1129 #ifdef CONFIG_HIGHMEM
1130 /* Actually, we should eliminate this check as soon as we know, that:
1131 * 1. IOMMU is present and allows to map all the memory.
1132 * 2. No high memory really exists on this machine.
1133 */
1134
1135 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1136 {
1137 int i;
1138
1139 if (dev->features & NETIF_F_HIGHDMA)
1140 return 0;
1141
1142 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1143 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1144 return 1;
1145
1146 return 0;
1147 }
1148 #else
1149 #define illegal_highdma(dev, skb) (0)
1150 #endif
1151
1152 /* Keep head the same: replace data */
1153 int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask)
1154 {
1155 unsigned int size;
1156 u8 *data;
1157 long offset;
1158 struct skb_shared_info *ninfo;
1159 int headerlen = skb->data - skb->head;
1160 int expand = (skb->tail + skb->data_len) - skb->end;
1161
1162 if (skb_shared(skb))
1163 BUG();
1164
1165 if (expand <= 0)
1166 expand = 0;
1167
1168 size = skb->end - skb->head + expand;
1169 size = SKB_DATA_ALIGN(size);
1170 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
1171 if (!data)
1172 return -ENOMEM;
1173
1174 /* Copy entire thing */
1175 if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
1176 BUG();
1177
1178 /* Set up shinfo */
1179 ninfo = (struct skb_shared_info*)(data + size);
1180 atomic_set(&ninfo->dataref, 1);
1181 ninfo->tso_size = skb_shinfo(skb)->tso_size;
1182 ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
1183 ninfo->nr_frags = 0;
1184 ninfo->frag_list = NULL;
1185
1186 /* Offset between the two in bytes */
1187 offset = data - skb->head;
1188
1189 /* Free old data. */
1190 skb_release_data(skb);
1191
1192 skb->head = data;
1193 skb->end = data + size;
1194
1195 /* Set up new pointers */
1196 skb->h.raw += offset;
1197 skb->nh.raw += offset;
1198 skb->mac.raw += offset;
1199 skb->tail += offset;
1200 skb->data += offset;
1201
1202 /* We are no longer a clone, even if we were. */
1203 skb->cloned = 0;
1204
1205 skb->tail += skb->data_len;
1206 skb->data_len = 0;
1207 return 0;
1208 }
1209
1210 #define HARD_TX_LOCK(dev, cpu) { \
1211 if ((dev->features & NETIF_F_LLTX) == 0) { \
1212 spin_lock(&dev->xmit_lock); \
1213 dev->xmit_lock_owner = cpu; \
1214 } \
1215 }
1216
1217 #define HARD_TX_UNLOCK(dev) { \
1218 if ((dev->features & NETIF_F_LLTX) == 0) { \
1219 dev->xmit_lock_owner = -1; \
1220 spin_unlock(&dev->xmit_lock); \
1221 } \
1222 }
1223
1224 /**
1225 * dev_queue_xmit - transmit a buffer
1226 * @skb: buffer to transmit
1227 *
1228 * Queue a buffer for transmission to a network device. The caller must
1229 * have set the device and priority and built the buffer before calling
1230 * this function. The function can be called from an interrupt.
1231 *
1232 * A negative errno code is returned on a failure. A success does not
1233 * guarantee the frame will be transmitted as it may be dropped due
1234 * to congestion or traffic shaping.
1235 *
1236 * -----------------------------------------------------------------------------------
1237 * I notice this method can also return errors from the queue disciplines,
1238 * including NET_XMIT_DROP, which is a positive value. So, errors can also
1239 * be positive.
1240 *
1241 * Regardless of the return value, the skb is consumed, so it is currently
1242 * difficult to retry a send to this method. (You can bump the ref count
1243 * before sending to hold a reference for retry if you are careful.)
1244 *
1245 * When calling this method, interrupts MUST be enabled. This is because
1246 * the BH enable code must have IRQs enabled so that it will not deadlock.
1247 * --BLG
1248 */
1249
1250 int dev_queue_xmit(struct sk_buff *skb)
1251 {
1252 struct net_device *dev = skb->dev;
1253 struct Qdisc *q;
1254 int rc = -ENOMEM;
1255
1256 if (skb_shinfo(skb)->frag_list &&
1257 !(dev->features & NETIF_F_FRAGLIST) &&
1258 __skb_linearize(skb, GFP_ATOMIC))
1259 goto out_kfree_skb;
1260
1261 /* Fragmented skb is linearized if device does not support SG,
1262 * or if at least one of fragments is in highmem and device
1263 * does not support DMA from it.
1264 */
1265 if (skb_shinfo(skb)->nr_frags &&
1266 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1267 __skb_linearize(skb, GFP_ATOMIC))
1268 goto out_kfree_skb;
1269
1270 /* If packet is not checksummed and device does not support
1271 * checksumming for this protocol, complete checksumming here.
1272 */
1273 if (skb->ip_summed == CHECKSUM_HW &&
1274 (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
1275 (!(dev->features & NETIF_F_IP_CSUM) ||
1276 skb->protocol != htons(ETH_P_IP))))
1277 if (skb_checksum_help(skb, 0))
1278 goto out_kfree_skb;
1279
1280 spin_lock_prefetch(&dev->queue_lock);
1281
1282 /* Disable soft irqs for various locks below. Also
1283 * stops preemption for RCU.
1284 */
1285 local_bh_disable();
1286
1287 /* Updates of qdisc are serialized by queue_lock.
1288 * The struct Qdisc which is pointed to by qdisc is now a
1289 * rcu structure - it may be accessed without acquiring
1290 * a lock (but the structure may be stale.) The freeing of the
1291 * qdisc will be deferred until it's known that there are no
1292 * more references to it.
1293 *
1294 * If the qdisc has an enqueue function, we still need to
1295 * hold the queue_lock before calling it, since queue_lock
1296 * also serializes access to the device queue.
1297 */
1298
1299 q = rcu_dereference(dev->qdisc);
1300 #ifdef CONFIG_NET_CLS_ACT
1301 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1302 #endif
1303 if (q->enqueue) {
1304 /* Grab device queue */
1305 spin_lock(&dev->queue_lock);
1306
1307 rc = q->enqueue(skb, q);
1308
1309 qdisc_run(dev);
1310
1311 spin_unlock(&dev->queue_lock);
1312 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1313 goto out;
1314 }
1315
1316 /* The device has no queue. Common case for software devices:
1317 loopback, all the sorts of tunnels...
1318
1319 Really, it is unlikely that xmit_lock protection is necessary here.
1320 (f.e. loopback and IP tunnels are clean ignoring statistics
1321 counters.)
1322 However, it is possible, that they rely on protection
1323 made by us here.
1324
1325 Check this and shot the lock. It is not prone from deadlocks.
1326 Either shot noqueue qdisc, it is even simpler 8)
1327 */
1328 if (dev->flags & IFF_UP) {
1329 int cpu = smp_processor_id(); /* ok because BHs are off */
1330
1331 if (dev->xmit_lock_owner != cpu) {
1332
1333 HARD_TX_LOCK(dev, cpu);
1334
1335 if (!netif_queue_stopped(dev)) {
1336 if (netdev_nit)
1337 dev_queue_xmit_nit(skb, dev);
1338
1339 rc = 0;
1340 if (!dev->hard_start_xmit(skb, dev)) {
1341 HARD_TX_UNLOCK(dev);
1342 goto out;
1343 }
1344 }
1345 HARD_TX_UNLOCK(dev);
1346 if (net_ratelimit())
1347 printk(KERN_CRIT "Virtual device %s asks to "
1348 "queue packet!\n", dev->name);
1349 } else {
1350 /* Recursion is detected! It is possible,
1351 * unfortunately */
1352 if (net_ratelimit())
1353 printk(KERN_CRIT "Dead loop on virtual device "
1354 "%s, fix it urgently!\n", dev->name);
1355 }
1356 }
1357
1358 rc = -ENETDOWN;
1359 local_bh_enable();
1360
1361 out_kfree_skb:
1362 kfree_skb(skb);
1363 return rc;
1364 out:
1365 local_bh_enable();
1366 return rc;
1367 }
1368
1369
1370 /*=======================================================================
1371 Receiver routines
1372 =======================================================================*/
1373
1374 int netdev_max_backlog = 1000;
1375 int netdev_budget = 300;
1376 int weight_p = 64; /* old backlog weight */
1377
1378 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1379
1380
1381 /**
1382 * netif_rx - post buffer to the network code
1383 * @skb: buffer to post
1384 *
1385 * This function receives a packet from a device driver and queues it for
1386 * the upper (protocol) levels to process. It always succeeds. The buffer
1387 * may be dropped during processing for congestion control or by the
1388 * protocol layers.
1389 *
1390 * return values:
1391 * NET_RX_SUCCESS (no congestion)
1392 * NET_RX_CN_LOW (low congestion)
1393 * NET_RX_CN_MOD (moderate congestion)
1394 * NET_RX_CN_HIGH (high congestion)
1395 * NET_RX_DROP (packet was dropped)
1396 *
1397 */
1398
1399 int netif_rx(struct sk_buff *skb)
1400 {
1401 struct softnet_data *queue;
1402 unsigned long flags;
1403
1404 /* if netpoll wants it, pretend we never saw it */
1405 if (netpoll_rx(skb))
1406 return NET_RX_DROP;
1407
1408 if (!skb->tstamp.off_sec)
1409 net_timestamp(skb);
1410
1411 /*
1412 * The code is rearranged so that the path is the most
1413 * short when CPU is congested, but is still operating.
1414 */
1415 local_irq_save(flags);
1416 queue = &__get_cpu_var(softnet_data);
1417
1418 __get_cpu_var(netdev_rx_stat).total++;
1419 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1420 if (queue->input_pkt_queue.qlen) {
1421 enqueue:
1422 dev_hold(skb->dev);
1423 __skb_queue_tail(&queue->input_pkt_queue, skb);
1424 local_irq_restore(flags);
1425 return NET_RX_SUCCESS;
1426 }
1427
1428 netif_rx_schedule(&queue->backlog_dev);
1429 goto enqueue;
1430 }
1431
1432 __get_cpu_var(netdev_rx_stat).dropped++;
1433 local_irq_restore(flags);
1434
1435 kfree_skb(skb);
1436 return NET_RX_DROP;
1437 }
1438
1439 int netif_rx_ni(struct sk_buff *skb)
1440 {
1441 int err;
1442
1443 preempt_disable();
1444 err = netif_rx(skb);
1445 if (local_softirq_pending())
1446 do_softirq();
1447 preempt_enable();
1448
1449 return err;
1450 }
1451
1452 EXPORT_SYMBOL(netif_rx_ni);
1453
1454 static inline struct net_device *skb_bond(struct sk_buff *skb)
1455 {
1456 struct net_device *dev = skb->dev;
1457
1458 if (dev->master) {
1459 /*
1460 * On bonding slaves other than the currently active
1461 * slave, suppress duplicates except for 802.3ad
1462 * ETH_P_SLOW and alb non-mcast/bcast.
1463 */
1464 if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
1465 if (dev->master->priv_flags & IFF_MASTER_ALB) {
1466 if (skb->pkt_type != PACKET_BROADCAST &&
1467 skb->pkt_type != PACKET_MULTICAST)
1468 goto keep;
1469 }
1470
1471 if (dev->master->priv_flags & IFF_MASTER_8023AD &&
1472 skb->protocol == __constant_htons(ETH_P_SLOW))
1473 goto keep;
1474
1475 kfree_skb(skb);
1476 return NULL;
1477 }
1478 keep:
1479 skb->dev = dev->master;
1480 }
1481
1482 return dev;
1483 }
1484
1485 static void net_tx_action(struct softirq_action *h)
1486 {
1487 struct softnet_data *sd = &__get_cpu_var(softnet_data);
1488
1489 if (sd->completion_queue) {
1490 struct sk_buff *clist;
1491
1492 local_irq_disable();
1493 clist = sd->completion_queue;
1494 sd->completion_queue = NULL;
1495 local_irq_enable();
1496
1497 while (clist) {
1498 struct sk_buff *skb = clist;
1499 clist = clist->next;
1500
1501 BUG_TRAP(!atomic_read(&skb->users));
1502 __kfree_skb(skb);
1503 }
1504 }
1505
1506 if (sd->output_queue) {
1507 struct net_device *head;
1508
1509 local_irq_disable();
1510 head = sd->output_queue;
1511 sd->output_queue = NULL;
1512 local_irq_enable();
1513
1514 while (head) {
1515 struct net_device *dev = head;
1516 head = head->next_sched;
1517
1518 smp_mb__before_clear_bit();
1519 clear_bit(__LINK_STATE_SCHED, &dev->state);
1520
1521 if (spin_trylock(&dev->queue_lock)) {
1522 qdisc_run(dev);
1523 spin_unlock(&dev->queue_lock);
1524 } else {
1525 netif_schedule(dev);
1526 }
1527 }
1528 }
1529 }
1530
1531 static __inline__ int deliver_skb(struct sk_buff *skb,
1532 struct packet_type *pt_prev,
1533 struct net_device *orig_dev)
1534 {
1535 atomic_inc(&skb->users);
1536 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1537 }
1538
1539 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1540 int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
1541 struct net_bridge;
1542 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1543 unsigned char *addr);
1544 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
1545
1546 static __inline__ int handle_bridge(struct sk_buff **pskb,
1547 struct packet_type **pt_prev, int *ret,
1548 struct net_device *orig_dev)
1549 {
1550 struct net_bridge_port *port;
1551
1552 if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
1553 (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
1554 return 0;
1555
1556 if (*pt_prev) {
1557 *ret = deliver_skb(*pskb, *pt_prev, orig_dev);
1558 *pt_prev = NULL;
1559 }
1560
1561 return br_handle_frame_hook(port, pskb);
1562 }
1563 #else
1564 #define handle_bridge(skb, pt_prev, ret, orig_dev) (0)
1565 #endif
1566
1567 #ifdef CONFIG_NET_CLS_ACT
1568 /* TODO: Maybe we should just force sch_ingress to be compiled in
1569 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1570 * a compare and 2 stores extra right now if we dont have it on
1571 * but have CONFIG_NET_CLS_ACT
1572 * NOTE: This doesnt stop any functionality; if you dont have
1573 * the ingress scheduler, you just cant add policies on ingress.
1574 *
1575 */
1576 static int ing_filter(struct sk_buff *skb)
1577 {
1578 struct Qdisc *q;
1579 struct net_device *dev = skb->dev;
1580 int result = TC_ACT_OK;
1581
1582 if (dev->qdisc_ingress) {
1583 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1584 if (MAX_RED_LOOP < ttl++) {
1585 printk("Redir loop detected Dropping packet (%s->%s)\n",
1586 skb->input_dev->name, skb->dev->name);
1587 return TC_ACT_SHOT;
1588 }
1589
1590 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1591
1592 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1593
1594 spin_lock(&dev->ingress_lock);
1595 if ((q = dev->qdisc_ingress) != NULL)
1596 result = q->enqueue(skb, q);
1597 spin_unlock(&dev->ingress_lock);
1598
1599 }
1600
1601 return result;
1602 }
1603 #endif
1604
1605 int netif_receive_skb(struct sk_buff *skb)
1606 {
1607 struct packet_type *ptype, *pt_prev;
1608 struct net_device *orig_dev;
1609 int ret = NET_RX_DROP;
1610 unsigned short type;
1611
1612 /* if we've gotten here through NAPI, check netpoll */
1613 if (skb->dev->poll && netpoll_rx(skb))
1614 return NET_RX_DROP;
1615
1616 if (!skb->tstamp.off_sec)
1617 net_timestamp(skb);
1618
1619 if (!skb->input_dev)
1620 skb->input_dev = skb->dev;
1621
1622 orig_dev = skb_bond(skb);
1623
1624 if (!orig_dev)
1625 return NET_RX_DROP;
1626
1627 __get_cpu_var(netdev_rx_stat).total++;
1628
1629 skb->h.raw = skb->nh.raw = skb->data;
1630 skb->mac_len = skb->nh.raw - skb->mac.raw;
1631
1632 pt_prev = NULL;
1633
1634 rcu_read_lock();
1635
1636 #ifdef CONFIG_NET_CLS_ACT
1637 if (skb->tc_verd & TC_NCLS) {
1638 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1639 goto ncls;
1640 }
1641 #endif
1642
1643 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1644 if (!ptype->dev || ptype->dev == skb->dev) {
1645 if (pt_prev)
1646 ret = deliver_skb(skb, pt_prev, orig_dev);
1647 pt_prev = ptype;
1648 }
1649 }
1650
1651 #ifdef CONFIG_NET_CLS_ACT
1652 if (pt_prev) {
1653 ret = deliver_skb(skb, pt_prev, orig_dev);
1654 pt_prev = NULL; /* noone else should process this after*/
1655 } else {
1656 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1657 }
1658
1659 ret = ing_filter(skb);
1660
1661 if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1662 kfree_skb(skb);
1663 goto out;
1664 }
1665
1666 skb->tc_verd = 0;
1667 ncls:
1668 #endif
1669
1670 handle_diverter(skb);
1671
1672 if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
1673 goto out;
1674
1675 type = skb->protocol;
1676 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1677 if (ptype->type == type &&
1678 (!ptype->dev || ptype->dev == skb->dev)) {
1679 if (pt_prev)
1680 ret = deliver_skb(skb, pt_prev, orig_dev);
1681 pt_prev = ptype;
1682 }
1683 }
1684
1685 if (pt_prev) {
1686 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1687 } else {
1688 kfree_skb(skb);
1689 /* Jamal, now you will not able to escape explaining
1690 * me how you were going to use this. :-)
1691 */
1692 ret = NET_RX_DROP;
1693 }
1694
1695 out:
1696 rcu_read_unlock();
1697 return ret;
1698 }
1699
1700 static int process_backlog(struct net_device *backlog_dev, int *budget)
1701 {
1702 int work = 0;
1703 int quota = min(backlog_dev->quota, *budget);
1704 struct softnet_data *queue = &__get_cpu_var(softnet_data);
1705 unsigned long start_time = jiffies;
1706
1707 backlog_dev->weight = weight_p;
1708 for (;;) {
1709 struct sk_buff *skb;
1710 struct net_device *dev;
1711
1712 local_irq_disable();
1713 skb = __skb_dequeue(&queue->input_pkt_queue);
1714 if (!skb)
1715 goto job_done;
1716 local_irq_enable();
1717
1718 dev = skb->dev;
1719
1720 netif_receive_skb(skb);
1721
1722 dev_put(dev);
1723
1724 work++;
1725
1726 if (work >= quota || jiffies - start_time > 1)
1727 break;
1728
1729 }
1730
1731 backlog_dev->quota -= work;
1732 *budget -= work;
1733 return -1;
1734
1735 job_done:
1736 backlog_dev->quota -= work;
1737 *budget -= work;
1738
1739 list_del(&backlog_dev->poll_list);
1740 smp_mb__before_clear_bit();
1741 netif_poll_enable(backlog_dev);
1742
1743 local_irq_enable();
1744 return 0;
1745 }
1746
1747 static void net_rx_action(struct softirq_action *h)
1748 {
1749 struct softnet_data *queue = &__get_cpu_var(softnet_data);
1750 unsigned long start_time = jiffies;
1751 int budget = netdev_budget;
1752 void *have;
1753
1754 local_irq_disable();
1755
1756 while (!list_empty(&queue->poll_list)) {
1757 struct net_device *dev;
1758
1759 if (budget <= 0 || jiffies - start_time > 1)
1760 goto softnet_break;
1761
1762 local_irq_enable();
1763
1764 dev = list_entry(queue->poll_list.next,
1765 struct net_device, poll_list);
1766 have = netpoll_poll_lock(dev);
1767
1768 if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1769 netpoll_poll_unlock(have);
1770 local_irq_disable();
1771 list_move_tail(&dev->poll_list, &queue->poll_list);
1772 if (dev->quota < 0)
1773 dev->quota += dev->weight;
1774 else
1775 dev->quota = dev->weight;
1776 } else {
1777 netpoll_poll_unlock(have);
1778 dev_put(dev);
1779 local_irq_disable();
1780 }
1781 }
1782 out:
1783 local_irq_enable();
1784 return;
1785
1786 softnet_break:
1787 __get_cpu_var(netdev_rx_stat).time_squeeze++;
1788 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
1789 goto out;
1790 }
1791
1792 static gifconf_func_t * gifconf_list [NPROTO];
1793
1794 /**
1795 * register_gifconf - register a SIOCGIF handler
1796 * @family: Address family
1797 * @gifconf: Function handler
1798 *
1799 * Register protocol dependent address dumping routines. The handler
1800 * that is passed must not be freed or reused until it has been replaced
1801 * by another handler.
1802 */
1803 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1804 {
1805 if (family >= NPROTO)
1806 return -EINVAL;
1807 gifconf_list[family] = gifconf;
1808 return 0;
1809 }
1810
1811
1812 /*
1813 * Map an interface index to its name (SIOCGIFNAME)
1814 */
1815
1816 /*
1817 * We need this ioctl for efficient implementation of the
1818 * if_indextoname() function required by the IPv6 API. Without
1819 * it, we would have to search all the interfaces to find a
1820 * match. --pb
1821 */
1822
1823 static int dev_ifname(struct ifreq __user *arg)
1824 {
1825 struct net_device *dev;
1826 struct ifreq ifr;
1827
1828 /*
1829 * Fetch the caller's info block.
1830 */
1831
1832 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1833 return -EFAULT;
1834
1835 read_lock(&dev_base_lock);
1836 dev = __dev_get_by_index(ifr.ifr_ifindex);
1837 if (!dev) {
1838 read_unlock(&dev_base_lock);
1839 return -ENODEV;
1840 }
1841
1842 strcpy(ifr.ifr_name, dev->name);
1843 read_unlock(&dev_base_lock);
1844
1845 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
1846 return -EFAULT;
1847 return 0;
1848 }
1849
1850 /*
1851 * Perform a SIOCGIFCONF call. This structure will change
1852 * size eventually, and there is nothing I can do about it.
1853 * Thus we will need a 'compatibility mode'.
1854 */
1855
1856 static int dev_ifconf(char __user *arg)
1857 {
1858 struct ifconf ifc;
1859 struct net_device *dev;
1860 char __user *pos;
1861 int len;
1862 int total;
1863 int i;
1864
1865 /*
1866 * Fetch the caller's info block.
1867 */
1868
1869 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
1870 return -EFAULT;
1871
1872 pos = ifc.ifc_buf;
1873 len = ifc.ifc_len;
1874
1875 /*
1876 * Loop over the interfaces, and write an info block for each.
1877 */
1878
1879 total = 0;
1880 for (dev = dev_base; dev; dev = dev->next) {
1881 for (i = 0; i < NPROTO; i++) {
1882 if (gifconf_list[i]) {
1883 int done;
1884 if (!pos)
1885 done = gifconf_list[i](dev, NULL, 0);
1886 else
1887 done = gifconf_list[i](dev, pos + total,
1888 len - total);
1889 if (done < 0)
1890 return -EFAULT;
1891 total += done;
1892 }
1893 }
1894 }
1895
1896 /*
1897 * All done. Write the updated control block back to the caller.
1898 */
1899 ifc.ifc_len = total;
1900
1901 /*
1902 * Both BSD and Solaris return 0 here, so we do too.
1903 */
1904 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
1905 }
1906
1907 #ifdef CONFIG_PROC_FS
1908 /*
1909 * This is invoked by the /proc filesystem handler to display a device
1910 * in detail.
1911 */
1912 static __inline__ struct net_device *dev_get_idx(loff_t pos)
1913 {
1914 struct net_device *dev;
1915 loff_t i;
1916
1917 for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
1918
1919 return i == pos ? dev : NULL;
1920 }
1921
1922 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
1923 {
1924 read_lock(&dev_base_lock);
1925 return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
1926 }
1927
1928 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1929 {
1930 ++*pos;
1931 return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
1932 }
1933
1934 void dev_seq_stop(struct seq_file *seq, void *v)
1935 {
1936 read_unlock(&dev_base_lock);
1937 }
1938
1939 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
1940 {
1941 if (dev->get_stats) {
1942 struct net_device_stats *stats = dev->get_stats(dev);
1943
1944 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
1945 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
1946 dev->name, stats->rx_bytes, stats->rx_packets,
1947 stats->rx_errors,
1948 stats->rx_dropped + stats->rx_missed_errors,
1949 stats->rx_fifo_errors,
1950 stats->rx_length_errors + stats->rx_over_errors +
1951 stats->rx_crc_errors + stats->rx_frame_errors,
1952 stats->rx_compressed, stats->multicast,
1953 stats->tx_bytes, stats->tx_packets,
1954 stats->tx_errors, stats->tx_dropped,
1955 stats->tx_fifo_errors, stats->collisions,
1956 stats->tx_carrier_errors +
1957 stats->tx_aborted_errors +
1958 stats->tx_window_errors +
1959 stats->tx_heartbeat_errors,
1960 stats->tx_compressed);
1961 } else
1962 seq_printf(seq, "%6s: No statistics available.\n", dev->name);
1963 }
1964
1965 /*
1966 * Called from the PROCfs module. This now uses the new arbitrary sized
1967 * /proc/net interface to create /proc/net/dev
1968 */
1969 static int dev_seq_show(struct seq_file *seq, void *v)
1970 {
1971 if (v == SEQ_START_TOKEN)
1972 seq_puts(seq, "Inter-| Receive "
1973 " | Transmit\n"
1974 " face |bytes packets errs drop fifo frame "
1975 "compressed multicast|bytes packets errs "
1976 "drop fifo colls carrier compressed\n");
1977 else
1978 dev_seq_printf_stats(seq, v);
1979 return 0;
1980 }
1981
1982 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
1983 {
1984 struct netif_rx_stats *rc = NULL;
1985
1986 while (*pos < NR_CPUS)
1987 if (cpu_online(*pos)) {
1988 rc = &per_cpu(netdev_rx_stat, *pos);
1989 break;
1990 } else
1991 ++*pos;
1992 return rc;
1993 }
1994
1995 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
1996 {
1997 return softnet_get_online(pos);
1998 }
1999
2000 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2001 {
2002 ++*pos;
2003 return softnet_get_online(pos);
2004 }
2005
2006 static void softnet_seq_stop(struct seq_file *seq, void *v)
2007 {
2008 }
2009
2010 static int softnet_seq_show(struct seq_file *seq, void *v)
2011 {
2012 struct netif_rx_stats *s = v;
2013
2014 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2015 s->total, s->dropped, s->time_squeeze, 0,
2016 0, 0, 0, 0, /* was fastroute */
2017 s->cpu_collision );
2018 return 0;
2019 }
2020
2021 static struct seq_operations dev_seq_ops = {
2022 .start = dev_seq_start,
2023 .next = dev_seq_next,
2024 .stop = dev_seq_stop,
2025 .show = dev_seq_show,
2026 };
2027
2028 static int dev_seq_open(struct inode *inode, struct file *file)
2029 {
2030 return seq_open(file, &dev_seq_ops);
2031 }
2032
2033 static struct file_operations dev_seq_fops = {
2034 .owner = THIS_MODULE,
2035 .open = dev_seq_open,
2036 .read = seq_read,
2037 .llseek = seq_lseek,
2038 .release = seq_release,
2039 };
2040
2041 static struct seq_operations softnet_seq_ops = {
2042 .start = softnet_seq_start,
2043 .next = softnet_seq_next,
2044 .stop = softnet_seq_stop,
2045 .show = softnet_seq_show,
2046 };
2047
2048 static int softnet_seq_open(struct inode *inode, struct file *file)
2049 {
2050 return seq_open(file, &softnet_seq_ops);
2051 }
2052
2053 static struct file_operations softnet_seq_fops = {
2054 .owner = THIS_MODULE,
2055 .open = softnet_seq_open,
2056 .read = seq_read,
2057 .llseek = seq_lseek,
2058 .release = seq_release,
2059 };
2060
2061 #ifdef CONFIG_WIRELESS_EXT
2062 extern int wireless_proc_init(void);
2063 #else
2064 #define wireless_proc_init() 0
2065 #endif
2066
2067 static int __init dev_proc_init(void)
2068 {
2069 int rc = -ENOMEM;
2070
2071 if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2072 goto out;
2073 if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2074 goto out_dev;
2075 if (wireless_proc_init())
2076 goto out_softnet;
2077 rc = 0;
2078 out:
2079 return rc;
2080 out_softnet:
2081 proc_net_remove("softnet_stat");
2082 out_dev:
2083 proc_net_remove("dev");
2084 goto out;
2085 }
2086 #else
2087 #define dev_proc_init() 0
2088 #endif /* CONFIG_PROC_FS */
2089
2090
2091 /**
2092 * netdev_set_master - set up master/slave pair
2093 * @slave: slave device
2094 * @master: new master device
2095 *
2096 * Changes the master device of the slave. Pass %NULL to break the
2097 * bonding. The caller must hold the RTNL semaphore. On a failure
2098 * a negative errno code is returned. On success the reference counts
2099 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2100 * function returns zero.
2101 */
2102 int netdev_set_master(struct net_device *slave, struct net_device *master)
2103 {
2104 struct net_device *old = slave->master;
2105
2106 ASSERT_RTNL();
2107
2108 if (master) {
2109 if (old)
2110 return -EBUSY;
2111 dev_hold(master);
2112 }
2113
2114 slave->master = master;
2115
2116 synchronize_net();
2117
2118 if (old)
2119 dev_put(old);
2120
2121 if (master)
2122 slave->flags |= IFF_SLAVE;
2123 else
2124 slave->flags &= ~IFF_SLAVE;
2125
2126 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2127 return 0;
2128 }
2129
2130 /**
2131 * dev_set_promiscuity - update promiscuity count on a device
2132 * @dev: device
2133 * @inc: modifier
2134 *
2135 * Add or remove promsicuity from a device. While the count in the device
2136 * remains above zero the interface remains promiscuous. Once it hits zero
2137 * the device reverts back to normal filtering operation. A negative inc
2138 * value is used to drop promiscuity on the device.
2139 */
2140 void dev_set_promiscuity(struct net_device *dev, int inc)
2141 {
2142 unsigned short old_flags = dev->flags;
2143
2144 if ((dev->promiscuity += inc) == 0)
2145 dev->flags &= ~IFF_PROMISC;
2146 else
2147 dev->flags |= IFF_PROMISC;
2148 if (dev->flags != old_flags) {
2149 dev_mc_upload(dev);
2150 printk(KERN_INFO "device %s %s promiscuous mode\n",
2151 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2152 "left");
2153 audit_log(current->audit_context, GFP_ATOMIC,
2154 AUDIT_ANOM_PROMISCUOUS,
2155 "dev=%s prom=%d old_prom=%d auid=%u",
2156 dev->name, (dev->flags & IFF_PROMISC),
2157 (old_flags & IFF_PROMISC),
2158 audit_get_loginuid(current->audit_context));
2159 }
2160 }
2161
2162 /**
2163 * dev_set_allmulti - update allmulti count on a device
2164 * @dev: device
2165 * @inc: modifier
2166 *
2167 * Add or remove reception of all multicast frames to a device. While the
2168 * count in the device remains above zero the interface remains listening
2169 * to all interfaces. Once it hits zero the device reverts back to normal
2170 * filtering operation. A negative @inc value is used to drop the counter
2171 * when releasing a resource needing all multicasts.
2172 */
2173
2174 void dev_set_allmulti(struct net_device *dev, int inc)
2175 {
2176 unsigned short old_flags = dev->flags;
2177
2178 dev->flags |= IFF_ALLMULTI;
2179 if ((dev->allmulti += inc) == 0)
2180 dev->flags &= ~IFF_ALLMULTI;
2181 if (dev->flags ^ old_flags)
2182 dev_mc_upload(dev);
2183 }
2184
2185 unsigned dev_get_flags(const struct net_device *dev)
2186 {
2187 unsigned flags;
2188
2189 flags = (dev->flags & ~(IFF_PROMISC |
2190 IFF_ALLMULTI |
2191 IFF_RUNNING |
2192 IFF_LOWER_UP |
2193 IFF_DORMANT)) |
2194 (dev->gflags & (IFF_PROMISC |
2195 IFF_ALLMULTI));
2196
2197 if (netif_running(dev)) {
2198 if (netif_oper_up(dev))
2199 flags |= IFF_RUNNING;
2200 if (netif_carrier_ok(dev))
2201 flags |= IFF_LOWER_UP;
2202 if (netif_dormant(dev))
2203 flags |= IFF_DORMANT;
2204 }
2205
2206 return flags;
2207 }
2208
2209 int dev_change_flags(struct net_device *dev, unsigned flags)
2210 {
2211 int ret;
2212 int old_flags = dev->flags;
2213
2214 /*
2215 * Set the flags on our device.
2216 */
2217
2218 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2219 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2220 IFF_AUTOMEDIA)) |
2221 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2222 IFF_ALLMULTI));
2223
2224 /*
2225 * Load in the correct multicast list now the flags have changed.
2226 */
2227
2228 dev_mc_upload(dev);
2229
2230 /*
2231 * Have we downed the interface. We handle IFF_UP ourselves
2232 * according to user attempts to set it, rather than blindly
2233 * setting it.
2234 */
2235
2236 ret = 0;
2237 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
2238 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2239
2240 if (!ret)
2241 dev_mc_upload(dev);
2242 }
2243
2244 if (dev->flags & IFF_UP &&
2245 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2246 IFF_VOLATILE)))
2247 blocking_notifier_call_chain(&netdev_chain,
2248 NETDEV_CHANGE, dev);
2249
2250 if ((flags ^ dev->gflags) & IFF_PROMISC) {
2251 int inc = (flags & IFF_PROMISC) ? +1 : -1;
2252 dev->gflags ^= IFF_PROMISC;
2253 dev_set_promiscuity(dev, inc);
2254 }
2255
2256 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2257 is important. Some (broken) drivers set IFF_PROMISC, when
2258 IFF_ALLMULTI is requested not asking us and not reporting.
2259 */
2260 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2261 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2262 dev->gflags ^= IFF_ALLMULTI;
2263 dev_set_allmulti(dev, inc);
2264 }
2265
2266 if (old_flags ^ dev->flags)
2267 rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2268
2269 return ret;
2270 }
2271
2272 int dev_set_mtu(struct net_device *dev, int new_mtu)
2273 {
2274 int err;
2275
2276 if (new_mtu == dev->mtu)
2277 return 0;
2278
2279 /* MTU must be positive. */
2280 if (new_mtu < 0)
2281 return -EINVAL;
2282
2283 if (!netif_device_present(dev))
2284 return -ENODEV;
2285
2286 err = 0;
2287 if (dev->change_mtu)
2288 err = dev->change_mtu(dev, new_mtu);
2289 else
2290 dev->mtu = new_mtu;
2291 if (!err && dev->flags & IFF_UP)
2292 blocking_notifier_call_chain(&netdev_chain,
2293 NETDEV_CHANGEMTU, dev);
2294 return err;
2295 }
2296
2297 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2298 {
2299 int err;
2300
2301 if (!dev->set_mac_address)
2302 return -EOPNOTSUPP;
2303 if (sa->sa_family != dev->type)
2304 return -EINVAL;
2305 if (!netif_device_present(dev))
2306 return -ENODEV;
2307 err = dev->set_mac_address(dev, sa);
2308 if (!err)
2309 blocking_notifier_call_chain(&netdev_chain,
2310 NETDEV_CHANGEADDR, dev);
2311 return err;
2312 }
2313
2314 /*
2315 * Perform the SIOCxIFxxx calls.
2316 */
2317 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2318 {
2319 int err;
2320 struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2321
2322 if (!dev)
2323 return -ENODEV;
2324
2325 switch (cmd) {
2326 case SIOCGIFFLAGS: /* Get interface flags */
2327 ifr->ifr_flags = dev_get_flags(dev);
2328 return 0;
2329
2330 case SIOCSIFFLAGS: /* Set interface flags */
2331 return dev_change_flags(dev, ifr->ifr_flags);
2332
2333 case SIOCGIFMETRIC: /* Get the metric on the interface
2334 (currently unused) */
2335 ifr->ifr_metric = 0;
2336 return 0;
2337
2338 case SIOCSIFMETRIC: /* Set the metric on the interface
2339 (currently unused) */
2340 return -EOPNOTSUPP;
2341
2342 case SIOCGIFMTU: /* Get the MTU of a device */
2343 ifr->ifr_mtu = dev->mtu;
2344 return 0;
2345
2346 case SIOCSIFMTU: /* Set the MTU of a device */
2347 return dev_set_mtu(dev, ifr->ifr_mtu);
2348
2349 case SIOCGIFHWADDR:
2350 if (!dev->addr_len)
2351 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2352 else
2353 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2354 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2355 ifr->ifr_hwaddr.sa_family = dev->type;
2356 return 0;
2357
2358 case SIOCSIFHWADDR:
2359 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2360
2361 case SIOCSIFHWBROADCAST:
2362 if (ifr->ifr_hwaddr.sa_family != dev->type)
2363 return -EINVAL;
2364 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2365 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2366 blocking_notifier_call_chain(&netdev_chain,
2367 NETDEV_CHANGEADDR, dev);
2368 return 0;
2369
2370 case SIOCGIFMAP:
2371 ifr->ifr_map.mem_start = dev->mem_start;
2372 ifr->ifr_map.mem_end = dev->mem_end;
2373 ifr->ifr_map.base_addr = dev->base_addr;
2374 ifr->ifr_map.irq = dev->irq;
2375 ifr->ifr_map.dma = dev->dma;
2376 ifr->ifr_map.port = dev->if_port;
2377 return 0;
2378
2379 case SIOCSIFMAP:
2380 if (dev->set_config) {
2381 if (!netif_device_present(dev))
2382 return -ENODEV;
2383 return dev->set_config(dev, &ifr->ifr_map);
2384 }
2385 return -EOPNOTSUPP;
2386
2387 case SIOCADDMULTI:
2388 if (!dev->set_multicast_list ||
2389 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2390 return -EINVAL;
2391 if (!netif_device_present(dev))
2392 return -ENODEV;
2393 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2394 dev->addr_len, 1);
2395
2396 case SIOCDELMULTI:
2397 if (!dev->set_multicast_list ||
2398 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2399 return -EINVAL;
2400 if (!netif_device_present(dev))
2401 return -ENODEV;
2402 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2403 dev->addr_len, 1);
2404
2405 case SIOCGIFINDEX:
2406 ifr->ifr_ifindex = dev->ifindex;
2407 return 0;
2408
2409 case SIOCGIFTXQLEN:
2410 ifr->ifr_qlen = dev->tx_queue_len;
2411 return 0;
2412
2413 case SIOCSIFTXQLEN:
2414 if (ifr->ifr_qlen < 0)
2415 return -EINVAL;
2416 dev->tx_queue_len = ifr->ifr_qlen;
2417 return 0;
2418
2419 case SIOCSIFNAME:
2420 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2421 return dev_change_name(dev, ifr->ifr_newname);
2422
2423 /*
2424 * Unknown or private ioctl
2425 */
2426
2427 default:
2428 if ((cmd >= SIOCDEVPRIVATE &&
2429 cmd <= SIOCDEVPRIVATE + 15) ||
2430 cmd == SIOCBONDENSLAVE ||
2431 cmd == SIOCBONDRELEASE ||
2432 cmd == SIOCBONDSETHWADDR ||
2433 cmd == SIOCBONDSLAVEINFOQUERY ||
2434 cmd == SIOCBONDINFOQUERY ||
2435 cmd == SIOCBONDCHANGEACTIVE ||
2436 cmd == SIOCGMIIPHY ||
2437 cmd == SIOCGMIIREG ||
2438 cmd == SIOCSMIIREG ||
2439 cmd == SIOCBRADDIF ||
2440 cmd == SIOCBRDELIF ||
2441 cmd == SIOCWANDEV) {
2442 err = -EOPNOTSUPP;
2443 if (dev->do_ioctl) {
2444 if (netif_device_present(dev))
2445 err = dev->do_ioctl(dev, ifr,
2446 cmd);
2447 else
2448 err = -ENODEV;
2449 }
2450 } else
2451 err = -EINVAL;
2452
2453 }
2454 return err;
2455 }
2456
2457 /*
2458 * This function handles all "interface"-type I/O control requests. The actual
2459 * 'doing' part of this is dev_ifsioc above.
2460 */
2461
2462 /**
2463 * dev_ioctl - network device ioctl
2464 * @cmd: command to issue
2465 * @arg: pointer to a struct ifreq in user space
2466 *
2467 * Issue ioctl functions to devices. This is normally called by the
2468 * user space syscall interfaces but can sometimes be useful for
2469 * other purposes. The return value is the return from the syscall if
2470 * positive or a negative errno code on error.
2471 */
2472
2473 int dev_ioctl(unsigned int cmd, void __user *arg)
2474 {
2475 struct ifreq ifr;
2476 int ret;
2477 char *colon;
2478
2479 /* One special case: SIOCGIFCONF takes ifconf argument
2480 and requires shared lock, because it sleeps writing
2481 to user space.
2482 */
2483
2484 if (cmd == SIOCGIFCONF) {
2485 rtnl_lock();
2486 ret = dev_ifconf((char __user *) arg);
2487 rtnl_unlock();
2488 return ret;
2489 }
2490 if (cmd == SIOCGIFNAME)
2491 return dev_ifname((struct ifreq __user *)arg);
2492
2493 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2494 return -EFAULT;
2495
2496 ifr.ifr_name[IFNAMSIZ-1] = 0;
2497
2498 colon = strchr(ifr.ifr_name, ':');
2499 if (colon)
2500 *colon = 0;
2501
2502 /*
2503 * See which interface the caller is talking about.
2504 */
2505
2506 switch (cmd) {
2507 /*
2508 * These ioctl calls:
2509 * - can be done by all.
2510 * - atomic and do not require locking.
2511 * - return a value
2512 */
2513 case SIOCGIFFLAGS:
2514 case SIOCGIFMETRIC:
2515 case SIOCGIFMTU:
2516 case SIOCGIFHWADDR:
2517 case SIOCGIFSLAVE:
2518 case SIOCGIFMAP:
2519 case SIOCGIFINDEX:
2520 case SIOCGIFTXQLEN:
2521 dev_load(ifr.ifr_name);
2522 read_lock(&dev_base_lock);
2523 ret = dev_ifsioc(&ifr, cmd);
2524 read_unlock(&dev_base_lock);
2525 if (!ret) {
2526 if (colon)
2527 *colon = ':';
2528 if (copy_to_user(arg, &ifr,
2529 sizeof(struct ifreq)))
2530 ret = -EFAULT;
2531 }
2532 return ret;
2533
2534 case SIOCETHTOOL:
2535 dev_load(ifr.ifr_name);
2536 rtnl_lock();
2537 ret = dev_ethtool(&ifr);
2538 rtnl_unlock();
2539 if (!ret) {
2540 if (colon)
2541 *colon = ':';
2542 if (copy_to_user(arg, &ifr,
2543 sizeof(struct ifreq)))
2544 ret = -EFAULT;
2545 }
2546 return ret;
2547
2548 /*
2549 * These ioctl calls:
2550 * - require superuser power.
2551 * - require strict serialization.
2552 * - return a value
2553 */
2554 case SIOCGMIIPHY:
2555 case SIOCGMIIREG:
2556 case SIOCSIFNAME:
2557 if (!capable(CAP_NET_ADMIN))
2558 return -EPERM;
2559 dev_load(ifr.ifr_name);
2560 rtnl_lock();
2561 ret = dev_ifsioc(&ifr, cmd);
2562 rtnl_unlock();
2563 if (!ret) {
2564 if (colon)
2565 *colon = ':';
2566 if (copy_to_user(arg, &ifr,
2567 sizeof(struct ifreq)))
2568 ret = -EFAULT;
2569 }
2570 return ret;
2571
2572 /*
2573 * These ioctl calls:
2574 * - require superuser power.
2575 * - require strict serialization.
2576 * - do not return a value
2577 */
2578 case SIOCSIFFLAGS:
2579 case SIOCSIFMETRIC:
2580 case SIOCSIFMTU:
2581 case SIOCSIFMAP:
2582 case SIOCSIFHWADDR:
2583 case SIOCSIFSLAVE:
2584 case SIOCADDMULTI:
2585 case SIOCDELMULTI:
2586 case SIOCSIFHWBROADCAST:
2587 case SIOCSIFTXQLEN:
2588 case SIOCSMIIREG:
2589 case SIOCBONDENSLAVE:
2590 case SIOCBONDRELEASE:
2591 case SIOCBONDSETHWADDR:
2592 case SIOCBONDCHANGEACTIVE:
2593 case SIOCBRADDIF:
2594 case SIOCBRDELIF:
2595 if (!capable(CAP_NET_ADMIN))
2596 return -EPERM;
2597 /* fall through */
2598 case SIOCBONDSLAVEINFOQUERY:
2599 case SIOCBONDINFOQUERY:
2600 dev_load(ifr.ifr_name);
2601 rtnl_lock();
2602 ret = dev_ifsioc(&ifr, cmd);
2603 rtnl_unlock();
2604 return ret;
2605
2606 case SIOCGIFMEM:
2607 /* Get the per device memory space. We can add this but
2608 * currently do not support it */
2609 case SIOCSIFMEM:
2610 /* Set the per device memory buffer space.
2611 * Not applicable in our case */
2612 case SIOCSIFLINK:
2613 return -EINVAL;
2614
2615 /*
2616 * Unknown or private ioctl.
2617 */
2618 default:
2619 if (cmd == SIOCWANDEV ||
2620 (cmd >= SIOCDEVPRIVATE &&
2621 cmd <= SIOCDEVPRIVATE + 15)) {
2622 dev_load(ifr.ifr_name);
2623 rtnl_lock();
2624 ret = dev_ifsioc(&ifr, cmd);
2625 rtnl_unlock();
2626 if (!ret && copy_to_user(arg, &ifr,
2627 sizeof(struct ifreq)))
2628 ret = -EFAULT;
2629 return ret;
2630 }
2631 #ifdef CONFIG_WIRELESS_EXT
2632 /* Take care of Wireless Extensions */
2633 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2634 /* If command is `set a parameter', or
2635 * `get the encoding parameters', check if
2636 * the user has the right to do it */
2637 if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) {
2638 if (!capable(CAP_NET_ADMIN))
2639 return -EPERM;
2640 }
2641 dev_load(ifr.ifr_name);
2642 rtnl_lock();
2643 /* Follow me in net/core/wireless.c */
2644 ret = wireless_process_ioctl(&ifr, cmd);
2645 rtnl_unlock();
2646 if (IW_IS_GET(cmd) &&
2647 copy_to_user(arg, &ifr,
2648 sizeof(struct ifreq)))
2649 ret = -EFAULT;
2650 return ret;
2651 }
2652 #endif /* CONFIG_WIRELESS_EXT */
2653 return -EINVAL;
2654 }
2655 }
2656
2657
2658 /**
2659 * dev_new_index - allocate an ifindex
2660 *
2661 * Returns a suitable unique value for a new device interface
2662 * number. The caller must hold the rtnl semaphore or the
2663 * dev_base_lock to be sure it remains unique.
2664 */
2665 static int dev_new_index(void)
2666 {
2667 static int ifindex;
2668 for (;;) {
2669 if (++ifindex <= 0)
2670 ifindex = 1;
2671 if (!__dev_get_by_index(ifindex))
2672 return ifindex;
2673 }
2674 }
2675
2676 static int dev_boot_phase = 1;
2677
2678 /* Delayed registration/unregisteration */
2679 static DEFINE_SPINLOCK(net_todo_list_lock);
2680 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
2681
2682 static inline void net_set_todo(struct net_device *dev)
2683 {
2684 spin_lock(&net_todo_list_lock);
2685 list_add_tail(&dev->todo_list, &net_todo_list);
2686 spin_unlock(&net_todo_list_lock);
2687 }
2688
2689 /**
2690 * register_netdevice - register a network device
2691 * @dev: device to register
2692 *
2693 * Take a completed network device structure and add it to the kernel
2694 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2695 * chain. 0 is returned on success. A negative errno code is returned
2696 * on a failure to set up the device, or if the name is a duplicate.
2697 *
2698 * Callers must hold the rtnl semaphore. You may want
2699 * register_netdev() instead of this.
2700 *
2701 * BUGS:
2702 * The locking appears insufficient to guarantee two parallel registers
2703 * will not get the same name.
2704 */
2705
2706 int register_netdevice(struct net_device *dev)
2707 {
2708 struct hlist_head *head;
2709 struct hlist_node *p;
2710 int ret;
2711
2712 BUG_ON(dev_boot_phase);
2713 ASSERT_RTNL();
2714
2715 /* When net_device's are persistent, this will be fatal. */
2716 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
2717
2718 spin_lock_init(&dev->queue_lock);
2719 spin_lock_init(&dev->xmit_lock);
2720 dev->xmit_lock_owner = -1;
2721 #ifdef CONFIG_NET_CLS_ACT
2722 spin_lock_init(&dev->ingress_lock);
2723 #endif
2724
2725 ret = alloc_divert_blk(dev);
2726 if (ret)
2727 goto out;
2728
2729 dev->iflink = -1;
2730
2731 /* Init, if this function is available */
2732 if (dev->init) {
2733 ret = dev->init(dev);
2734 if (ret) {
2735 if (ret > 0)
2736 ret = -EIO;
2737 goto out_err;
2738 }
2739 }
2740
2741 if (!dev_valid_name(dev->name)) {
2742 ret = -EINVAL;
2743 goto out_err;
2744 }
2745
2746 dev->ifindex = dev_new_index();
2747 if (dev->iflink == -1)
2748 dev->iflink = dev->ifindex;
2749
2750 /* Check for existence of name */
2751 head = dev_name_hash(dev->name);
2752 hlist_for_each(p, head) {
2753 struct net_device *d
2754 = hlist_entry(p, struct net_device, name_hlist);
2755 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
2756 ret = -EEXIST;
2757 goto out_err;
2758 }
2759 }
2760
2761 /* Fix illegal SG+CSUM combinations. */
2762 if ((dev->features & NETIF_F_SG) &&
2763 !(dev->features & (NETIF_F_IP_CSUM |
2764 NETIF_F_NO_CSUM |
2765 NETIF_F_HW_CSUM))) {
2766 printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
2767 dev->name);
2768 dev->features &= ~NETIF_F_SG;
2769 }
2770
2771 /* TSO requires that SG is present as well. */
2772 if ((dev->features & NETIF_F_TSO) &&
2773 !(dev->features & NETIF_F_SG)) {
2774 printk("%s: Dropping NETIF_F_TSO since no SG feature.\n",
2775 dev->name);
2776 dev->features &= ~NETIF_F_TSO;
2777 }
2778 if (dev->features & NETIF_F_UFO) {
2779 if (!(dev->features & NETIF_F_HW_CSUM)) {
2780 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
2781 "NETIF_F_HW_CSUM feature.\n",
2782 dev->name);
2783 dev->features &= ~NETIF_F_UFO;
2784 }
2785 if (!(dev->features & NETIF_F_SG)) {
2786 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
2787 "NETIF_F_SG feature.\n",
2788 dev->name);
2789 dev->features &= ~NETIF_F_UFO;
2790 }
2791 }
2792
2793 /*
2794 * nil rebuild_header routine,
2795 * that should be never called and used as just bug trap.
2796 */
2797
2798 if (!dev->rebuild_header)
2799 dev->rebuild_header = default_rebuild_header;
2800
2801 /*
2802 * Default initial state at registry is that the
2803 * device is present.
2804 */
2805
2806 set_bit(__LINK_STATE_PRESENT, &dev->state);
2807
2808 dev->next = NULL;
2809 dev_init_scheduler(dev);
2810 write_lock_bh(&dev_base_lock);
2811 *dev_tail = dev;
2812 dev_tail = &dev->next;
2813 hlist_add_head(&dev->name_hlist, head);
2814 hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
2815 dev_hold(dev);
2816 dev->reg_state = NETREG_REGISTERING;
2817 write_unlock_bh(&dev_base_lock);
2818
2819 /* Notify protocols, that a new device appeared. */
2820 blocking_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
2821
2822 /* Finish registration after unlock */
2823 net_set_todo(dev);
2824 ret = 0;
2825
2826 out:
2827 return ret;
2828 out_err:
2829 free_divert_blk(dev);
2830 goto out;
2831 }
2832
2833 /**
2834 * register_netdev - register a network device
2835 * @dev: device to register
2836 *
2837 * Take a completed network device structure and add it to the kernel
2838 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2839 * chain. 0 is returned on success. A negative errno code is returned
2840 * on a failure to set up the device, or if the name is a duplicate.
2841 *
2842 * This is a wrapper around register_netdev that takes the rtnl semaphore
2843 * and expands the device name if you passed a format string to
2844 * alloc_netdev.
2845 */
2846 int register_netdev(struct net_device *dev)
2847 {
2848 int err;
2849
2850 rtnl_lock();
2851
2852 /*
2853 * If the name is a format string the caller wants us to do a
2854 * name allocation.
2855 */
2856 if (strchr(dev->name, '%')) {
2857 err = dev_alloc_name(dev, dev->name);
2858 if (err < 0)
2859 goto out;
2860 }
2861
2862 /*
2863 * Back compatibility hook. Kill this one in 2.5
2864 */
2865 if (dev->name[0] == 0 || dev->name[0] == ' ') {
2866 err = dev_alloc_name(dev, "eth%d");
2867 if (err < 0)
2868 goto out;
2869 }
2870
2871 err = register_netdevice(dev);
2872 out:
2873 rtnl_unlock();
2874 return err;
2875 }
2876 EXPORT_SYMBOL(register_netdev);
2877
2878 /*
2879 * netdev_wait_allrefs - wait until all references are gone.
2880 *
2881 * This is called when unregistering network devices.
2882 *
2883 * Any protocol or device that holds a reference should register
2884 * for netdevice notification, and cleanup and put back the
2885 * reference if they receive an UNREGISTER event.
2886 * We can get stuck here if buggy protocols don't correctly
2887 * call dev_put.
2888 */
2889 static void netdev_wait_allrefs(struct net_device *dev)
2890 {
2891 unsigned long rebroadcast_time, warning_time;
2892
2893 rebroadcast_time = warning_time = jiffies;
2894 while (atomic_read(&dev->refcnt) != 0) {
2895 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
2896 rtnl_lock();
2897
2898 /* Rebroadcast unregister notification */
2899 blocking_notifier_call_chain(&netdev_chain,
2900 NETDEV_UNREGISTER, dev);
2901
2902 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
2903 &dev->state)) {
2904 /* We must not have linkwatch events
2905 * pending on unregister. If this
2906 * happens, we simply run the queue
2907 * unscheduled, resulting in a noop
2908 * for this device.
2909 */
2910 linkwatch_run_queue();
2911 }
2912
2913 __rtnl_unlock();
2914
2915 rebroadcast_time = jiffies;
2916 }
2917
2918 msleep(250);
2919
2920 if (time_after(jiffies, warning_time + 10 * HZ)) {
2921 printk(KERN_EMERG "unregister_netdevice: "
2922 "waiting for %s to become free. Usage "
2923 "count = %d\n",
2924 dev->name, atomic_read(&dev->refcnt));
2925 warning_time = jiffies;
2926 }
2927 }
2928 }
2929
2930 /* The sequence is:
2931 *
2932 * rtnl_lock();
2933 * ...
2934 * register_netdevice(x1);
2935 * register_netdevice(x2);
2936 * ...
2937 * unregister_netdevice(y1);
2938 * unregister_netdevice(y2);
2939 * ...
2940 * rtnl_unlock();
2941 * free_netdev(y1);
2942 * free_netdev(y2);
2943 *
2944 * We are invoked by rtnl_unlock() after it drops the semaphore.
2945 * This allows us to deal with problems:
2946 * 1) We can create/delete sysfs objects which invoke hotplug
2947 * without deadlocking with linkwatch via keventd.
2948 * 2) Since we run with the RTNL semaphore not held, we can sleep
2949 * safely in order to wait for the netdev refcnt to drop to zero.
2950 */
2951 static DEFINE_MUTEX(net_todo_run_mutex);
2952 void netdev_run_todo(void)
2953 {
2954 struct list_head list = LIST_HEAD_INIT(list);
2955 int err;
2956
2957
2958 /* Need to guard against multiple cpu's getting out of order. */
2959 mutex_lock(&net_todo_run_mutex);
2960
2961 /* Not safe to do outside the semaphore. We must not return
2962 * until all unregister events invoked by the local processor
2963 * have been completed (either by this todo run, or one on
2964 * another cpu).
2965 */
2966 if (list_empty(&net_todo_list))
2967 goto out;
2968
2969 /* Snapshot list, allow later requests */
2970 spin_lock(&net_todo_list_lock);
2971 list_splice_init(&net_todo_list, &list);
2972 spin_unlock(&net_todo_list_lock);
2973
2974 while (!list_empty(&list)) {
2975 struct net_device *dev
2976 = list_entry(list.next, struct net_device, todo_list);
2977 list_del(&dev->todo_list);
2978
2979 switch(dev->reg_state) {
2980 case NETREG_REGISTERING:
2981 err = netdev_register_sysfs(dev);
2982 if (err)
2983 printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
2984 dev->name, err);
2985 dev->reg_state = NETREG_REGISTERED;
2986 break;
2987
2988 case NETREG_UNREGISTERING:
2989 netdev_unregister_sysfs(dev);
2990 dev->reg_state = NETREG_UNREGISTERED;
2991
2992 netdev_wait_allrefs(dev);
2993
2994 /* paranoia */
2995 BUG_ON(atomic_read(&dev->refcnt));
2996 BUG_TRAP(!dev->ip_ptr);
2997 BUG_TRAP(!dev->ip6_ptr);
2998 BUG_TRAP(!dev->dn_ptr);
2999
3000
3001 /* It must be the very last action,
3002 * after this 'dev' may point to freed up memory.
3003 */
3004 if (dev->destructor)
3005 dev->destructor(dev);
3006 break;
3007
3008 default:
3009 printk(KERN_ERR "network todo '%s' but state %d\n",
3010 dev->name, dev->reg_state);
3011 break;
3012 }
3013 }
3014
3015 out:
3016 mutex_unlock(&net_todo_run_mutex);
3017 }
3018
3019 /**
3020 * alloc_netdev - allocate network device
3021 * @sizeof_priv: size of private data to allocate space for
3022 * @name: device name format string
3023 * @setup: callback to initialize device
3024 *
3025 * Allocates a struct net_device with private data area for driver use
3026 * and performs basic initialization.
3027 */
3028 struct net_device *alloc_netdev(int sizeof_priv, const char *name,
3029 void (*setup)(struct net_device *))
3030 {
3031 void *p;
3032 struct net_device *dev;
3033 int alloc_size;
3034
3035 /* ensure 32-byte alignment of both the device and private area */
3036 alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
3037 alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3038
3039 p = kmalloc(alloc_size, GFP_KERNEL);
3040 if (!p) {
3041 printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
3042 return NULL;
3043 }
3044 memset(p, 0, alloc_size);
3045
3046 dev = (struct net_device *)
3047 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3048 dev->padded = (char *)dev - (char *)p;
3049
3050 if (sizeof_priv)
3051 dev->priv = netdev_priv(dev);
3052
3053 setup(dev);
3054 strcpy(dev->name, name);
3055 return dev;
3056 }
3057 EXPORT_SYMBOL(alloc_netdev);
3058
3059 /**
3060 * free_netdev - free network device
3061 * @dev: device
3062 *
3063 * This function does the last stage of destroying an allocated device
3064 * interface. The reference to the device object is released.
3065 * If this is the last reference then it will be freed.
3066 */
3067 void free_netdev(struct net_device *dev)
3068 {
3069 #ifdef CONFIG_SYSFS
3070 /* Compatiablity with error handling in drivers */
3071 if (dev->reg_state == NETREG_UNINITIALIZED) {
3072 kfree((char *)dev - dev->padded);
3073 return;
3074 }
3075
3076 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3077 dev->reg_state = NETREG_RELEASED;
3078
3079 /* will free via class release */
3080 class_device_put(&dev->class_dev);
3081 #else
3082 kfree((char *)dev - dev->padded);
3083 #endif
3084 }
3085
3086 /* Synchronize with packet receive processing. */
3087 void synchronize_net(void)
3088 {
3089 might_sleep();
3090 synchronize_rcu();
3091 }
3092
3093 /**
3094 * unregister_netdevice - remove device from the kernel
3095 * @dev: device
3096 *
3097 * This function shuts down a device interface and removes it
3098 * from the kernel tables. On success 0 is returned, on a failure
3099 * a negative errno code is returned.
3100 *
3101 * Callers must hold the rtnl semaphore. You may want
3102 * unregister_netdev() instead of this.
3103 */
3104
3105 int unregister_netdevice(struct net_device *dev)
3106 {
3107 struct net_device *d, **dp;
3108
3109 BUG_ON(dev_boot_phase);
3110 ASSERT_RTNL();
3111
3112 /* Some devices call without registering for initialization unwind. */
3113 if (dev->reg_state == NETREG_UNINITIALIZED) {
3114 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3115 "was registered\n", dev->name, dev);
3116 return -ENODEV;
3117 }
3118
3119 BUG_ON(dev->reg_state != NETREG_REGISTERED);
3120
3121 /* If device is running, close it first. */
3122 if (dev->flags & IFF_UP)
3123 dev_close(dev);
3124
3125 /* And unlink it from device chain. */
3126 for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
3127 if (d == dev) {
3128 write_lock_bh(&dev_base_lock);
3129 hlist_del(&dev->name_hlist);
3130 hlist_del(&dev->index_hlist);
3131 if (dev_tail == &dev->next)
3132 dev_tail = dp;
3133 *dp = d->next;
3134 write_unlock_bh(&dev_base_lock);
3135 break;
3136 }
3137 }
3138 if (!d) {
3139 printk(KERN_ERR "unregister net_device: '%s' not found\n",
3140 dev->name);
3141 return -ENODEV;
3142 }
3143
3144 dev->reg_state = NETREG_UNREGISTERING;
3145
3146 synchronize_net();
3147
3148 /* Shutdown queueing discipline. */
3149 dev_shutdown(dev);
3150
3151
3152 /* Notify protocols, that we are about to destroy
3153 this device. They should clean all the things.
3154 */
3155 blocking_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3156
3157 /*
3158 * Flush the multicast chain
3159 */
3160 dev_mc_discard(dev);
3161
3162 if (dev->uninit)
3163 dev->uninit(dev);
3164
3165 /* Notifier chain MUST detach us from master device. */
3166 BUG_TRAP(!dev->master);
3167
3168 free_divert_blk(dev);
3169
3170 /* Finish processing unregister after unlock */
3171 net_set_todo(dev);
3172
3173 synchronize_net();
3174
3175 dev_put(dev);
3176 return 0;
3177 }
3178
3179 /**
3180 * unregister_netdev - remove device from the kernel
3181 * @dev: device
3182 *
3183 * This function shuts down a device interface and removes it
3184 * from the kernel tables. On success 0 is returned, on a failure
3185 * a negative errno code is returned.
3186 *
3187 * This is just a wrapper for unregister_netdevice that takes
3188 * the rtnl semaphore. In general you want to use this and not
3189 * unregister_netdevice.
3190 */
3191 void unregister_netdev(struct net_device *dev)
3192 {
3193 rtnl_lock();
3194 unregister_netdevice(dev);
3195 rtnl_unlock();
3196 }
3197
3198 EXPORT_SYMBOL(unregister_netdev);
3199
3200 #ifdef CONFIG_HOTPLUG_CPU
3201 static int dev_cpu_callback(struct notifier_block *nfb,
3202 unsigned long action,
3203 void *ocpu)
3204 {
3205 struct sk_buff **list_skb;
3206 struct net_device **list_net;
3207 struct sk_buff *skb;
3208 unsigned int cpu, oldcpu = (unsigned long)ocpu;
3209 struct softnet_data *sd, *oldsd;
3210
3211 if (action != CPU_DEAD)
3212 return NOTIFY_OK;
3213
3214 local_irq_disable();
3215 cpu = smp_processor_id();
3216 sd = &per_cpu(softnet_data, cpu);
3217 oldsd = &per_cpu(softnet_data, oldcpu);
3218
3219 /* Find end of our completion_queue. */
3220 list_skb = &sd->completion_queue;
3221 while (*list_skb)
3222 list_skb = &(*list_skb)->next;
3223 /* Append completion queue from offline CPU. */
3224 *list_skb = oldsd->completion_queue;
3225 oldsd->completion_queue = NULL;
3226
3227 /* Find end of our output_queue. */
3228 list_net = &sd->output_queue;
3229 while (*list_net)
3230 list_net = &(*list_net)->next_sched;
3231 /* Append output queue from offline CPU. */
3232 *list_net = oldsd->output_queue;
3233 oldsd->output_queue = NULL;
3234
3235 raise_softirq_irqoff(NET_TX_SOFTIRQ);
3236 local_irq_enable();
3237
3238 /* Process offline CPU's input_pkt_queue */
3239 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3240 netif_rx(skb);
3241
3242 return NOTIFY_OK;
3243 }
3244 #endif /* CONFIG_HOTPLUG_CPU */
3245
3246
3247 /*
3248 * Initialize the DEV module. At boot time this walks the device list and
3249 * unhooks any devices that fail to initialise (normally hardware not
3250 * present) and leaves us with a valid list of present and active devices.
3251 *
3252 */
3253
3254 /*
3255 * This is called single threaded during boot, so no need
3256 * to take the rtnl semaphore.
3257 */
3258 static int __init net_dev_init(void)
3259 {
3260 int i, rc = -ENOMEM;
3261
3262 BUG_ON(!dev_boot_phase);
3263
3264 net_random_init();
3265
3266 if (dev_proc_init())
3267 goto out;
3268
3269 if (netdev_sysfs_init())
3270 goto out;
3271
3272 INIT_LIST_HEAD(&ptype_all);
3273 for (i = 0; i < 16; i++)
3274 INIT_LIST_HEAD(&ptype_base[i]);
3275
3276 for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3277 INIT_HLIST_HEAD(&dev_name_head[i]);
3278
3279 for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3280 INIT_HLIST_HEAD(&dev_index_head[i]);
3281
3282 /*
3283 * Initialise the packet receive queues.
3284 */
3285
3286 for_each_cpu(i) {
3287 struct softnet_data *queue;
3288
3289 queue = &per_cpu(softnet_data, i);
3290 skb_queue_head_init(&queue->input_pkt_queue);
3291 queue->completion_queue = NULL;
3292 INIT_LIST_HEAD(&queue->poll_list);
3293 set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3294 queue->backlog_dev.weight = weight_p;
3295 queue->backlog_dev.poll = process_backlog;
3296 atomic_set(&queue->backlog_dev.refcnt, 1);
3297 }
3298
3299 dev_boot_phase = 0;
3300
3301 open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3302 open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3303
3304 hotcpu_notifier(dev_cpu_callback, 0);
3305 dst_init();
3306 dev_mcast_init();
3307 rc = 0;
3308 out:
3309 return rc;
3310 }
3311
3312 subsys_initcall(net_dev_init);
3313
3314 EXPORT_SYMBOL(__dev_get_by_index);
3315 EXPORT_SYMBOL(__dev_get_by_name);
3316 EXPORT_SYMBOL(__dev_remove_pack);
3317 EXPORT_SYMBOL(__skb_linearize);
3318 EXPORT_SYMBOL(dev_valid_name);
3319 EXPORT_SYMBOL(dev_add_pack);
3320 EXPORT_SYMBOL(dev_alloc_name);
3321 EXPORT_SYMBOL(dev_close);
3322 EXPORT_SYMBOL(dev_get_by_flags);
3323 EXPORT_SYMBOL(dev_get_by_index);
3324 EXPORT_SYMBOL(dev_get_by_name);
3325 EXPORT_SYMBOL(dev_open);
3326 EXPORT_SYMBOL(dev_queue_xmit);
3327 EXPORT_SYMBOL(dev_remove_pack);
3328 EXPORT_SYMBOL(dev_set_allmulti);
3329 EXPORT_SYMBOL(dev_set_promiscuity);
3330 EXPORT_SYMBOL(dev_change_flags);
3331 EXPORT_SYMBOL(dev_set_mtu);
3332 EXPORT_SYMBOL(dev_set_mac_address);
3333 EXPORT_SYMBOL(free_netdev);
3334 EXPORT_SYMBOL(netdev_boot_setup_check);
3335 EXPORT_SYMBOL(netdev_set_master);
3336 EXPORT_SYMBOL(netdev_state_change);
3337 EXPORT_SYMBOL(netif_receive_skb);
3338 EXPORT_SYMBOL(netif_rx);
3339 EXPORT_SYMBOL(register_gifconf);
3340 EXPORT_SYMBOL(register_netdevice);
3341 EXPORT_SYMBOL(register_netdevice_notifier);
3342 EXPORT_SYMBOL(skb_checksum_help);
3343 EXPORT_SYMBOL(synchronize_net);
3344 EXPORT_SYMBOL(unregister_netdevice);
3345 EXPORT_SYMBOL(unregister_netdevice_notifier);
3346 EXPORT_SYMBOL(net_enable_timestamp);
3347 EXPORT_SYMBOL(net_disable_timestamp);
3348 EXPORT_SYMBOL(dev_get_flags);
3349
3350 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3351 EXPORT_SYMBOL(br_handle_frame_hook);
3352 EXPORT_SYMBOL(br_fdb_get_hook);
3353 EXPORT_SYMBOL(br_fdb_put_hook);
3354 #endif
3355
3356 #ifdef CONFIG_KMOD
3357 EXPORT_SYMBOL(dev_load);
3358 #endif
3359
3360 EXPORT_PER_CPU_SYMBOL(softnet_data);