1 /*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set to 2
24 * if register_netdev gets called before
25 * net_dev_init & also removed a few lines
26 * of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant stunts to
29 * keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into drivers
34 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
35 * Alan Cox : 100 backlog just doesn't cut it when
36 * you start doing multicast video 8)
37 * Alan Cox : Rewrote net_bh and list manager.
38 * Alan Cox : Fix ETH_P_ALL echoback lengths.
39 * Alan Cox : Took out transmit every packet pass
40 * Saved a few bytes in the ioctl handler
41 * Alan Cox : Network driver sets packet type before calling netif_rx. Saves
42 * a function call a packet.
43 * Alan Cox : Hashed net_bh()
44 * Richard Kooijman: Timestamp fixes.
45 * Alan Cox : Wrong field in SIOCGIFDSTADDR
46 * Alan Cox : Device lock protection.
47 * Alan Cox : Fixed nasty side effect of device close changes.
48 * Rudi Cilibrasi : Pass the right thing to set_mac_address()
49 * Dave Miller : 32bit quantity for the device lock to make it work out
50 * on a Sparc.
51 * Bjorn Ekwall : Added KERNELD hack.
52 * Alan Cox : Cleaned up the backlog initialise.
53 * Craig Metz : SIOCGIFCONF fix if space for under
54 * 1 device.
55 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
56 * is no device open function.
57 * Andi Kleen : Fix error reporting for SIOCGIFCONF
58 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
59 * Cyrus Durgin : Cleaned for KMOD
60 * Adam Sulmicki : Bug Fix : Network Device Unload
61 * A network device unload needs to purge
62 * the backlog queue.
63 * Paul Rusty Russell : SIOCSIFNAME
64 * Pekka Riikonen : Netdev boot-time settings code
65 * Andrew Morton : Make unregister_netdevice wait indefinitely on dev->refcnt
66 * J Hadi Salim : - Backlog queue sampling
67 * - netif_rx() feedback
68 */
69
70 #include <asm/uaccess.h>
71 #include <asm/system.h>
72 #include <asm/bitops.h>
73 #include <linux/config.h>
74 #include <linux/types.h>
75 #include <linux/kernel.h>
76 #include <linux/sched.h>
77 #include <linux/string.h>
78 #include <linux/mm.h>
79 #include <linux/socket.h>
80 #include <linux/sockios.h>
81 #include <linux/errno.h>
82 #include <linux/interrupt.h>
83 #include <linux/if_ether.h>
84 #include <linux/netdevice.h>
85 #include <linux/etherdevice.h>
86 #include <linux/notifier.h>
87 #include <linux/skbuff.h>
88 #include <linux/brlock.h>
89 #include <net/sock.h>
90 #include <linux/rtnetlink.h>
91 #include <linux/proc_fs.h>
92 #include <linux/stat.h>
93 #include <linux/if_bridge.h>
94 #include <linux/divert.h>
95 #include <net/dst.h>
96 #include <net/pkt_sched.h>
97 #include <net/profile.h>
98 #include <net/checksum.h>
99 #include <linux/highmem.h>
100 #include <linux/init.h>
101 #include <linux/kmod.h>
102 #include <linux/module.h>
103 #if defined(CONFIG_NET_RADIO) || defined(CONFIG_NET_PCMCIA_RADIO)
104 #include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
105 #include <net/iw_handler.h>
106 #endif /* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */
107 #ifdef CONFIG_PLIP
108 extern int plip_init(void);
109 #endif
110
111
112 /* This define, if set, will randomly drop a packet when congestion
113 * is more than moderate. It helps fairness in the multi-interface
114 * case when one of them is a hog, but it kills performance for the
115 * single interface case so it is off now by default.
116 */
117 #undef RAND_LIE
118
119 /* Setting this will sample the queue lengths and thus congestion
120 * via a timer instead of as each packet is received.
121 */
122 #undef OFFLINE_SAMPLE
123
124 NET_PROFILE_DEFINE(dev_queue_xmit)
125 NET_PROFILE_DEFINE(softnet_process)
126
127 const char *if_port_text[] = {
128 "unknown",
129 "BNC",
130 "10baseT",
131 "AUI",
132 "100baseT",
133 "100baseTX",
134 "100baseFX"
135 };
136
137 /*
138 * The list of packet types we will receive (as opposed to discard)
139 * and the routines to invoke.
140 *
141 * Why 16. Because with 16 the only overlap we get on a hash of the
142 * low nibble of the protocol value is RARP/SNAP/X.25.
143 *
144 * NOTE: That is no longer true with the addition of VLAN tags. Not
145 * sure which should go first, but I bet it won't make much
146 * difference if we are running VLANs. The good news is that
147 * this protocol won't be in the list unless compiled in, so
148 * the average user (w/out VLANs) will not be adversly affected.
149 * --BLG
150 *
151 * 0800 IP
152 * 8100 802.1Q VLAN
153 * 0001 802.3
154 * 0002 AX.25
155 * 0004 802.2
156 * 8035 RARP
157 * 0005 SNAP
158 * 0805 X.25
159 * 0806 ARP
160 * 8137 IPX
161 * 0009 Localtalk
162 * 86DD IPv6
163 */
164
165 static struct packet_type *ptype_base[16]; /* 16 way hashed list */
166 static struct packet_type *ptype_all = NULL; /* Taps */
167
168 #ifdef OFFLINE_SAMPLE
169 static void sample_queue(unsigned long dummy);
170 static struct timer_list samp_timer = { function: sample_queue };
171 #endif
172
173 #ifdef CONFIG_HOTPLUG
174 static int net_run_sbin_hotplug(struct net_device *dev, char *action);
175 #else
176 #define net_run_sbin_hotplug(dev, action) ({ 0; })
177 #endif
178
179 /*
180 * Our notifier list
181 */
182
183 static struct notifier_block *netdev_chain=NULL;
184
185 /*
186 * Device drivers call our routines to queue packets here. We empty the
187 * queue in the local softnet handler.
188 */
189 struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned;
190
191 #ifdef CONFIG_NET_FASTROUTE
192 int netdev_fastroute;
193 int netdev_fastroute_obstacles;
194 #endif
195
196
197 /******************************************************************************************
198
199 Protocol management and registration routines
200
201 *******************************************************************************************/
202
203 /*
204 * For efficiency
205 */
206
207 int netdev_nit=0;
208
209 /*
210 * Add a protocol ID to the list. Now that the input handler is
211 * smarter we can dispense with all the messy stuff that used to be
212 * here.
213 *
214 * BEWARE!!! Protocol handlers, mangling input packets,
215 * MUST BE last in hash buckets and checking protocol handlers
216 * MUST start from promiscous ptype_all chain in net_bh.
217 * It is true now, do not change it.
218 * Explantion follows: if protocol handler, mangling packet, will
219 * be the first on list, it is not able to sense, that packet
220 * is cloned and should be copied-on-write, so that it will
221 * change it and subsequent readers will get broken packet.
222 * --ANK (980803)
223 */
224
225 /**
226 * dev_add_pack - add packet handler
227 * @pt: packet type declaration
228 *
229 * Add a protocol handler to the networking stack. The passed &packet_type
230 * is linked into kernel lists and may not be freed until it has been
231 * removed from the kernel lists.
232 */
233
dev_add_pack(struct packet_type * pt)234 void dev_add_pack(struct packet_type *pt)
235 {
236 int hash;
237
238 br_write_lock_bh(BR_NETPROTO_LOCK);
239
240 #ifdef CONFIG_NET_FASTROUTE
241 /* Hack to detect packet socket */
242 if ((pt->data) && ((int)(pt->data)!=1)) {
243 netdev_fastroute_obstacles++;
244 dev_clear_fastroute(pt->dev);
245 }
246 #endif
247 if (pt->type == htons(ETH_P_ALL)) {
248 netdev_nit++;
249 pt->next=ptype_all;
250 ptype_all=pt;
251 } else {
252 hash=ntohs(pt->type)&15;
253 pt->next = ptype_base[hash];
254 ptype_base[hash] = pt;
255 }
256 br_write_unlock_bh(BR_NETPROTO_LOCK);
257 }
258
259
260 /**
261 * dev_remove_pack - remove packet handler
262 * @pt: packet type declaration
263 *
264 * Remove a protocol handler that was previously added to the kernel
265 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
266 * from the kernel lists and can be freed or reused once this function
267 * returns.
268 */
269
dev_remove_pack(struct packet_type * pt)270 void dev_remove_pack(struct packet_type *pt)
271 {
272 struct packet_type **pt1;
273
274 br_write_lock_bh(BR_NETPROTO_LOCK);
275
276 if (pt->type == htons(ETH_P_ALL)) {
277 netdev_nit--;
278 pt1=&ptype_all;
279 } else {
280 pt1=&ptype_base[ntohs(pt->type)&15];
281 }
282
283 for (; (*pt1) != NULL; pt1 = &((*pt1)->next)) {
284 if (pt == (*pt1)) {
285 *pt1 = pt->next;
286 #ifdef CONFIG_NET_FASTROUTE
287 if (pt->data)
288 netdev_fastroute_obstacles--;
289 #endif
290 br_write_unlock_bh(BR_NETPROTO_LOCK);
291 return;
292 }
293 }
294 br_write_unlock_bh(BR_NETPROTO_LOCK);
295 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
296 }
297
298 /******************************************************************************
299
300 Device Boot-time Settings Routines
301
302 *******************************************************************************/
303
304 /* Boot time configuration table */
305 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
306
307 /**
308 * netdev_boot_setup_add - add new setup entry
309 * @name: name of the device
310 * @map: configured settings for the device
311 *
312 * Adds new setup entry to the dev_boot_setup list. The function
313 * returns 0 on error and 1 on success. This is a generic routine to
314 * all netdevices.
315 */
netdev_boot_setup_add(char * name,struct ifmap * map)316 int netdev_boot_setup_add(char *name, struct ifmap *map)
317 {
318 struct netdev_boot_setup *s;
319 int i;
320
321 s = dev_boot_setup;
322 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
323 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
324 memset(s[i].name, 0, sizeof(s[i].name));
325 strcpy(s[i].name, name);
326 memcpy(&s[i].map, map, sizeof(s[i].map));
327 break;
328 }
329 }
330
331 if (i >= NETDEV_BOOT_SETUP_MAX)
332 return 0;
333
334 return 1;
335 }
336
337 /**
338 * netdev_boot_setup_check - check boot time settings
339 * @dev: the netdevice
340 *
341 * Check boot time settings for the device.
342 * The found settings are set for the device to be used
343 * later in the device probing.
344 * Returns 0 if no settings found, 1 if they are.
345 */
netdev_boot_setup_check(struct net_device * dev)346 int netdev_boot_setup_check(struct net_device *dev)
347 {
348 struct netdev_boot_setup *s;
349 int i;
350
351 s = dev_boot_setup;
352 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
353 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
354 !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
355 dev->irq = s[i].map.irq;
356 dev->base_addr = s[i].map.base_addr;
357 dev->mem_start = s[i].map.mem_start;
358 dev->mem_end = s[i].map.mem_end;
359 return 1;
360 }
361 }
362 return 0;
363 }
364
365 /*
366 * Saves at boot time configured settings for any netdevice.
367 */
netdev_boot_setup(char * str)368 int __init netdev_boot_setup(char *str)
369 {
370 int ints[5];
371 struct ifmap map;
372
373 str = get_options(str, ARRAY_SIZE(ints), ints);
374 if (!str || !*str)
375 return 0;
376
377 /* Save settings */
378 memset(&map, 0, sizeof(map));
379 if (ints[0] > 0)
380 map.irq = ints[1];
381 if (ints[0] > 1)
382 map.base_addr = ints[2];
383 if (ints[0] > 2)
384 map.mem_start = ints[3];
385 if (ints[0] > 3)
386 map.mem_end = ints[4];
387
388 /* Add new entry to the list */
389 return netdev_boot_setup_add(str, &map);
390 }
391
392 __setup("netdev=", netdev_boot_setup);
393
394 /*****************************************************************************************
395
396 Device Interface Subroutines
397
398 ******************************************************************************************/
399
400 /**
401 * __dev_get_by_name - find a device by its name
402 * @name: name to find
403 *
404 * Find an interface by name. Must be called under RTNL semaphore
405 * or @dev_base_lock. If the name is found a pointer to the device
406 * is returned. If the name is not found then %NULL is returned. The
407 * reference counters are not incremented so the caller must be
408 * careful with locks.
409 */
410
411
__dev_get_by_name(const char * name)412 struct net_device *__dev_get_by_name(const char *name)
413 {
414 struct net_device *dev;
415
416 for (dev = dev_base; dev != NULL; dev = dev->next) {
417 if (strncmp(dev->name, name, IFNAMSIZ) == 0)
418 return dev;
419 }
420 return NULL;
421 }
422
423 /**
424 * dev_get_by_name - find a device by its name
425 * @name: name to find
426 *
427 * Find an interface by name. This can be called from any
428 * context and does its own locking. The returned handle has
429 * the usage count incremented and the caller must use dev_put() to
430 * release it when it is no longer needed. %NULL is returned if no
431 * matching device is found.
432 */
433
dev_get_by_name(const char * name)434 struct net_device *dev_get_by_name(const char *name)
435 {
436 struct net_device *dev;
437
438 read_lock(&dev_base_lock);
439 dev = __dev_get_by_name(name);
440 if (dev)
441 dev_hold(dev);
442 read_unlock(&dev_base_lock);
443 return dev;
444 }
445
446 /*
447 Return value is changed to int to prevent illegal usage in future.
448 It is still legal to use to check for device existence.
449
450 User should understand, that the result returned by this function
451 is meaningless, if it was not issued under rtnl semaphore.
452 */
453
454 /**
455 * dev_get - test if a device exists
456 * @name: name to test for
457 *
458 * Test if a name exists. Returns true if the name is found. In order
459 * to be sure the name is not allocated or removed during the test the
460 * caller must hold the rtnl semaphore.
461 *
462 * This function primarily exists for back compatibility with older
463 * drivers.
464 */
465
dev_get(const char * name)466 int dev_get(const char *name)
467 {
468 struct net_device *dev;
469
470 read_lock(&dev_base_lock);
471 dev = __dev_get_by_name(name);
472 read_unlock(&dev_base_lock);
473 return dev != NULL;
474 }
475
476 /**
477 * __dev_get_by_index - find a device by its ifindex
478 * @ifindex: index of device
479 *
480 * Search for an interface by index. Returns %NULL if the device
481 * is not found or a pointer to the device. The device has not
482 * had its reference counter increased so the caller must be careful
483 * about locking. The caller must hold either the RTNL semaphore
484 * or @dev_base_lock.
485 */
486
__dev_get_by_index(int ifindex)487 struct net_device * __dev_get_by_index(int ifindex)
488 {
489 struct net_device *dev;
490
491 for (dev = dev_base; dev != NULL; dev = dev->next) {
492 if (dev->ifindex == ifindex)
493 return dev;
494 }
495 return NULL;
496 }
497
498
499 /**
500 * dev_get_by_index - find a device by its ifindex
501 * @ifindex: index of device
502 *
503 * Search for an interface by index. Returns NULL if the device
504 * is not found or a pointer to the device. The device returned has
505 * had a reference added and the pointer is safe until the user calls
506 * dev_put to indicate they have finished with it.
507 */
508
dev_get_by_index(int ifindex)509 struct net_device * dev_get_by_index(int ifindex)
510 {
511 struct net_device *dev;
512
513 read_lock(&dev_base_lock);
514 dev = __dev_get_by_index(ifindex);
515 if (dev)
516 dev_hold(dev);
517 read_unlock(&dev_base_lock);
518 return dev;
519 }
520
521 /**
522 * dev_getbyhwaddr - find a device by its hardware address
523 * @type: media type of device
524 * @ha: hardware address
525 *
526 * Search for an interface by MAC address. Returns NULL if the device
527 * is not found or a pointer to the device. The caller must hold the
528 * rtnl semaphore. The returned device has not had its ref count increased
529 * and the caller must therefore be careful about locking
530 *
531 * BUGS:
532 * If the API was consistent this would be __dev_get_by_hwaddr
533 */
534
dev_getbyhwaddr(unsigned short type,char * ha)535 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
536 {
537 struct net_device *dev;
538
539 ASSERT_RTNL();
540
541 for (dev = dev_base; dev != NULL; dev = dev->next) {
542 if (dev->type == type &&
543 memcmp(dev->dev_addr, ha, dev->addr_len) == 0)
544 return dev;
545 }
546 return NULL;
547 }
548
549 /**
550 * dev_get_by_flags - find any device with given flags
551 * @if_flags: IFF_* values
552 * @mask: bitmask of bits in if_flags to check
553 *
554 * Search for any interface with the given flags. Returns NULL if a device
555 * is not found or a pointer to the device. The device returned has
556 * had a reference added and the pointer is safe until the user calls
557 * dev_put to indicate they have finished with it.
558 */
559
dev_get_by_flags(unsigned short if_flags,unsigned short mask)560 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
561 {
562 struct net_device *dev;
563
564 read_lock(&dev_base_lock);
565 dev = __dev_get_by_flags(if_flags, mask);
566 if (dev)
567 dev_hold(dev);
568 read_unlock(&dev_base_lock);
569 return dev;
570 }
571
572 /**
573 * __dev_get_by_flags - find any device with given flags
574 * @if_flags: IFF_* values
575 * @mask: bitmask of bits in if_flags to check
576 *
577 * Search for any interface with the given flags. Returns NULL if a device
578 * is not found or a pointer to the device. The caller must hold either
579 * the RTNL semaphore or @dev_base_lock.
580 */
581
__dev_get_by_flags(unsigned short if_flags,unsigned short mask)582 struct net_device *__dev_get_by_flags(unsigned short if_flags, unsigned short mask)
583 {
584 struct net_device *dev;
585
586 for (dev = dev_base; dev != NULL; dev = dev->next) {
587 if (((dev->flags ^ if_flags) & mask) == 0)
588 return dev;
589 }
590 return NULL;
591 }
592
593 /**
594 * dev_alloc_name - allocate a name for a device
595 * @dev: device
596 * @name: name format string
597 *
598 * Passed a format string - eg "lt%d" it will try and find a suitable
599 * id. Not efficient for many devices, not called a lot. The caller
600 * must hold the dev_base or rtnl lock while allocating the name and
601 * adding the device in order to avoid duplicates. Returns the number
602 * of the unit assigned or a negative errno code.
603 */
604
dev_alloc_name(struct net_device * dev,const char * name)605 int dev_alloc_name(struct net_device *dev, const char *name)
606 {
607 int i;
608 char buf[32];
609 char *p;
610
611 /*
612 * Verify the string as this thing may have come from
613 * the user. There must be either one "%d" and no other "%"
614 * characters, or no "%" characters at all.
615 */
616 p = strchr(name, '%');
617 if (p && (p[1] != 'd' || strchr(p+2, '%')))
618 return -EINVAL;
619
620 /*
621 * If you need over 100 please also fix the algorithm...
622 */
623 for (i = 0; i < 100; i++) {
624 snprintf(buf,sizeof(buf),name,i);
625 if (__dev_get_by_name(buf) == NULL) {
626 strcpy(dev->name, buf);
627 return i;
628 }
629 }
630 return -ENFILE; /* Over 100 of the things .. bail out! */
631 }
632
633 /**
634 * dev_alloc - allocate a network device and name
635 * @name: name format string
636 * @err: error return pointer
637 *
638 * Passed a format string, eg. "lt%d", it will allocate a network device
639 * and space for the name. %NULL is returned if no memory is available.
640 * If the allocation succeeds then the name is assigned and the
641 * device pointer returned. %NULL is returned if the name allocation
642 * failed. The cause of an error is returned as a negative errno code
643 * in the variable @err points to.
644 *
645 * The caller must hold the @dev_base or RTNL locks when doing this in
646 * order to avoid duplicate name allocations.
647 */
648
dev_alloc(const char * name,int * err)649 struct net_device *dev_alloc(const char *name, int *err)
650 {
651 struct net_device *dev=kmalloc(sizeof(struct net_device), GFP_KERNEL);
652 if (dev == NULL) {
653 *err = -ENOBUFS;
654 return NULL;
655 }
656 memset(dev, 0, sizeof(struct net_device));
657 *err = dev_alloc_name(dev, name);
658 if (*err < 0) {
659 kfree(dev);
660 return NULL;
661 }
662 return dev;
663 }
664
665 /**
666 * netdev_state_change - device changes state
667 * @dev: device to cause notification
668 *
669 * Called to indicate a device has changed state. This function calls
670 * the notifier chains for netdev_chain and sends a NEWLINK message
671 * to the routing socket.
672 */
673
netdev_state_change(struct net_device * dev)674 void netdev_state_change(struct net_device *dev)
675 {
676 if (dev->flags&IFF_UP) {
677 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
678 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
679 }
680 }
681
682
683 #ifdef CONFIG_KMOD
684
685 /**
686 * dev_load - load a network module
687 * @name: name of interface
688 *
689 * If a network interface is not present and the process has suitable
690 * privileges this function loads the module. If module loading is not
691 * available in this kernel then it becomes a nop.
692 */
693
dev_load(const char * name)694 void dev_load(const char *name)
695 {
696 if (!dev_get(name) && capable(CAP_SYS_MODULE))
697 request_module(name);
698 }
699
700 #else
701
dev_load(const char * unused)702 extern inline void dev_load(const char *unused){;}
703
704 #endif
705
default_rebuild_header(struct sk_buff * skb)706 static int default_rebuild_header(struct sk_buff *skb)
707 {
708 printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", skb->dev ? skb->dev->name : "NULL!!!");
709 kfree_skb(skb);
710 return 1;
711 }
712
713 /**
714 * dev_open - prepare an interface for use.
715 * @dev: device to open
716 *
717 * Takes a device from down to up state. The device's private open
718 * function is invoked and then the multicast lists are loaded. Finally
719 * the device is moved into the up state and a %NETDEV_UP message is
720 * sent to the netdev notifier chain.
721 *
722 * Calling this function on an active interface is a nop. On a failure
723 * a negative errno code is returned.
724 */
725
dev_open(struct net_device * dev)726 int dev_open(struct net_device *dev)
727 {
728 int ret = 0;
729
730 /*
731 * Is it already up?
732 */
733
734 if (dev->flags&IFF_UP)
735 return 0;
736
737 /*
738 * Is it even present?
739 */
740 if (!netif_device_present(dev))
741 return -ENODEV;
742
743 /*
744 * Call device private open method
745 */
746 if (try_inc_mod_count(dev->owner)) {
747 set_bit(__LINK_STATE_START, &dev->state);
748 if (dev->open) {
749 ret = dev->open(dev);
750 if (ret != 0) {
751 clear_bit(__LINK_STATE_START, &dev->state);
752 if (dev->owner)
753 __MOD_DEC_USE_COUNT(dev->owner);
754 }
755 }
756 } else {
757 ret = -ENODEV;
758 }
759
760 /*
761 * If it went open OK then:
762 */
763
764 if (ret == 0)
765 {
766 /*
767 * Set the flags.
768 */
769 dev->flags |= IFF_UP;
770
771 /*
772 * Initialize multicasting status
773 */
774 dev_mc_upload(dev);
775
776 /*
777 * Wakeup transmit queue engine
778 */
779 dev_activate(dev);
780
781 /*
782 * ... and announce new interface.
783 */
784 notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
785 }
786 return(ret);
787 }
788
789 #ifdef CONFIG_NET_FASTROUTE
790
dev_do_clear_fastroute(struct net_device * dev)791 static void dev_do_clear_fastroute(struct net_device *dev)
792 {
793 if (dev->accept_fastpath) {
794 int i;
795
796 for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++) {
797 struct dst_entry *dst;
798
799 write_lock_irq(&dev->fastpath_lock);
800 dst = dev->fastpath[i];
801 dev->fastpath[i] = NULL;
802 write_unlock_irq(&dev->fastpath_lock);
803
804 dst_release(dst);
805 }
806 }
807 }
808
dev_clear_fastroute(struct net_device * dev)809 void dev_clear_fastroute(struct net_device *dev)
810 {
811 if (dev) {
812 dev_do_clear_fastroute(dev);
813 } else {
814 read_lock(&dev_base_lock);
815 for (dev = dev_base; dev; dev = dev->next)
816 dev_do_clear_fastroute(dev);
817 read_unlock(&dev_base_lock);
818 }
819 }
820 #endif
821
822 /**
823 * dev_close - shutdown an interface.
824 * @dev: device to shutdown
825 *
826 * This function moves an active device into down state. A
827 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
828 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
829 * chain.
830 */
831
dev_close(struct net_device * dev)832 int dev_close(struct net_device *dev)
833 {
834 if (!(dev->flags&IFF_UP))
835 return 0;
836
837 /*
838 * Tell people we are going down, so that they can
839 * prepare to death, when device is still operating.
840 */
841 notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
842
843 dev_deactivate(dev);
844
845 clear_bit(__LINK_STATE_START, &dev->state);
846
847 /* Synchronize to scheduled poll. We cannot touch poll list,
848 * it can be even on different cpu. So just clear netif_running(),
849 * and wait when poll really will happen. Actually, the best place
850 * for this is inside dev->stop() after device stopped its irq
851 * engine, but this requires more changes in devices. */
852
853 smp_mb__after_clear_bit(); /* Commit netif_running(). */
854 while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
855 /* No hurry. */
856 current->state = TASK_INTERRUPTIBLE;
857 schedule_timeout(1);
858 }
859
860 /*
861 * Call the device specific close. This cannot fail.
862 * Only if device is UP
863 *
864 * We allow it to be called even after a DETACH hot-plug
865 * event.
866 */
867
868 if (dev->stop)
869 dev->stop(dev);
870
871 /*
872 * Device is now down.
873 */
874
875 dev->flags &= ~IFF_UP;
876 #ifdef CONFIG_NET_FASTROUTE
877 dev_clear_fastroute(dev);
878 #endif
879
880 /*
881 * Tell people we are down
882 */
883 notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
884
885 /*
886 * Drop the module refcount
887 */
888 if (dev->owner)
889 __MOD_DEC_USE_COUNT(dev->owner);
890
891 return(0);
892 }
893
894
895 /*
896 * Device change register/unregister. These are not inline or static
897 * as we export them to the world.
898 */
899
900 /**
901 * register_netdevice_notifier - register a network notifier block
902 * @nb: notifier
903 *
904 * Register a notifier to be called when network device events occur.
905 * The notifier passed is linked into the kernel structures and must
906 * not be reused until it has been unregistered. A negative errno code
907 * is returned on a failure.
908 */
909
register_netdevice_notifier(struct notifier_block * nb)910 int register_netdevice_notifier(struct notifier_block *nb)
911 {
912 return notifier_chain_register(&netdev_chain, nb);
913 }
914
915 /**
916 * unregister_netdevice_notifier - unregister a network notifier block
917 * @nb: notifier
918 *
919 * Unregister a notifier previously registered by
920 * register_netdevice_notifier(). The notifier is unlinked into the
921 * kernel structures and may then be reused. A negative errno code
922 * is returned on a failure.
923 */
924
unregister_netdevice_notifier(struct notifier_block * nb)925 int unregister_netdevice_notifier(struct notifier_block *nb)
926 {
927 return notifier_chain_unregister(&netdev_chain,nb);
928 }
929
930 /*
931 * Support routine. Sends outgoing frames to any network
932 * taps currently in use.
933 */
934
dev_queue_xmit_nit(struct sk_buff * skb,struct net_device * dev)935 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
936 {
937 struct packet_type *ptype;
938 do_gettimeofday(&skb->stamp);
939
940 br_read_lock(BR_NETPROTO_LOCK);
941 for (ptype = ptype_all; ptype!=NULL; ptype = ptype->next)
942 {
943 /* Never send packets back to the socket
944 * they originated from - MvS (miquels@drinkel.ow.org)
945 */
946 if ((ptype->dev == dev || !ptype->dev) &&
947 ((struct sock *)ptype->data != skb->sk))
948 {
949 struct sk_buff *skb2;
950 if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL)
951 break;
952
953 /* skb->nh should be correctly
954 set by sender, so that the second statement is
955 just protection against buggy protocols.
956 */
957 skb2->mac.raw = skb2->data;
958
959 if (skb2->nh.raw < skb2->data || skb2->nh.raw > skb2->tail) {
960 if (net_ratelimit())
961 printk(KERN_CRIT "protocol %04x is buggy, dev %s\n", skb2->protocol, dev->name);
962 skb2->nh.raw = skb2->data;
963 }
964
965 skb2->h.raw = skb2->nh.raw;
966 skb2->pkt_type = PACKET_OUTGOING;
967 ptype->func(skb2, skb->dev, ptype);
968 }
969 }
970 br_read_unlock(BR_NETPROTO_LOCK);
971 }
972
973 /* Calculate csum in the case, when packet is misrouted.
974 * If it failed by some reason, ignore and send skb with wrong
975 * checksum.
976 */
skb_checksum_help(struct sk_buff * skb)977 struct sk_buff * skb_checksum_help(struct sk_buff *skb)
978 {
979 int offset;
980 unsigned int csum;
981
982 offset = skb->h.raw - skb->data;
983 if (offset > (int)skb->len)
984 BUG();
985 csum = skb_checksum(skb, offset, skb->len-offset, 0);
986
987 offset = skb->tail - skb->h.raw;
988 if (offset <= 0)
989 BUG();
990 if (skb->csum+2 > offset)
991 BUG();
992
993 *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
994 skb->ip_summed = CHECKSUM_NONE;
995 return skb;
996 }
997
998 #ifdef CONFIG_HIGHMEM
999 /* Actually, we should eliminate this check as soon as we know, that:
1000 * 1. IOMMU is present and allows to map all the memory.
1001 * 2. No high memory really exists on this machine.
1002 */
1003
1004 static inline int
illegal_highdma(struct net_device * dev,struct sk_buff * skb)1005 illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1006 {
1007 int i;
1008
1009 if (dev->features&NETIF_F_HIGHDMA)
1010 return 0;
1011
1012 for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
1013 if (skb_shinfo(skb)->frags[i].page >= highmem_start_page)
1014 return 1;
1015
1016 return 0;
1017 }
1018 #else
1019 #define illegal_highdma(dev, skb) (0)
1020 #endif
1021
1022 /**
1023 * dev_queue_xmit - transmit a buffer
1024 * @skb: buffer to transmit
1025 *
1026 * Queue a buffer for transmission to a network device. The caller must
1027 * have set the device and priority and built the buffer before calling this
1028 * function. The function can be called from an interrupt.
1029 *
1030 * A negative errno code is returned on a failure. A success does not
1031 * guarantee the frame will be transmitted as it may be dropped due
1032 * to congestion or traffic shaping.
1033 */
1034
dev_queue_xmit(struct sk_buff * skb)1035 int dev_queue_xmit(struct sk_buff *skb)
1036 {
1037 struct net_device *dev = skb->dev;
1038 struct Qdisc *q;
1039
1040 if (skb_shinfo(skb)->frag_list &&
1041 !(dev->features&NETIF_F_FRAGLIST) &&
1042 skb_linearize(skb, GFP_ATOMIC) != 0) {
1043 kfree_skb(skb);
1044 return -ENOMEM;
1045 }
1046
1047 /* Fragmented skb is linearized if device does not support SG,
1048 * or if at least one of fragments is in highmem and device
1049 * does not support DMA from it.
1050 */
1051 if (skb_shinfo(skb)->nr_frags &&
1052 (!(dev->features&NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1053 skb_linearize(skb, GFP_ATOMIC) != 0) {
1054 kfree_skb(skb);
1055 return -ENOMEM;
1056 }
1057
1058 /* If packet is not checksummed and device does not support
1059 * checksumming for this protocol, complete checksumming here.
1060 */
1061 if (skb->ip_summed == CHECKSUM_HW &&
1062 (!(dev->features&(NETIF_F_HW_CSUM|NETIF_F_NO_CSUM)) &&
1063 (!(dev->features&NETIF_F_IP_CSUM) ||
1064 skb->protocol != htons(ETH_P_IP)))) {
1065 if ((skb = skb_checksum_help(skb)) == NULL)
1066 return -ENOMEM;
1067 }
1068
1069 /* Grab device queue */
1070 spin_lock_bh(&dev->queue_lock);
1071 q = dev->qdisc;
1072 if (q->enqueue) {
1073 int ret = q->enqueue(skb, q);
1074
1075 qdisc_run(dev);
1076
1077 spin_unlock_bh(&dev->queue_lock);
1078 return ret == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : ret;
1079 }
1080
1081 /* The device has no queue. Common case for software devices:
1082 loopback, all the sorts of tunnels...
1083
1084 Really, it is unlikely that xmit_lock protection is necessary here.
1085 (f.e. loopback and IP tunnels are clean ignoring statistics counters.)
1086 However, it is possible, that they rely on protection
1087 made by us here.
1088
1089 Check this and shot the lock. It is not prone from deadlocks.
1090 Either shot noqueue qdisc, it is even simpler 8)
1091 */
1092 if (dev->flags&IFF_UP) {
1093 int cpu = smp_processor_id();
1094
1095 if (dev->xmit_lock_owner != cpu) {
1096 spin_unlock(&dev->queue_lock);
1097 spin_lock(&dev->xmit_lock);
1098 dev->xmit_lock_owner = cpu;
1099
1100 if (!netif_queue_stopped(dev)) {
1101 if (netdev_nit)
1102 dev_queue_xmit_nit(skb,dev);
1103
1104 if (dev->hard_start_xmit(skb, dev) == 0) {
1105 dev->xmit_lock_owner = -1;
1106 spin_unlock_bh(&dev->xmit_lock);
1107 return 0;
1108 }
1109 }
1110 dev->xmit_lock_owner = -1;
1111 spin_unlock_bh(&dev->xmit_lock);
1112 if (net_ratelimit())
1113 printk(KERN_CRIT "Virtual device %s asks to queue packet!\n", dev->name);
1114 kfree_skb(skb);
1115 return -ENETDOWN;
1116 } else {
1117 /* Recursion is detected! It is possible, unfortunately */
1118 if (net_ratelimit())
1119 printk(KERN_CRIT "Dead loop on virtual device %s, fix it urgently!\n", dev->name);
1120 }
1121 }
1122 spin_unlock_bh(&dev->queue_lock);
1123
1124 kfree_skb(skb);
1125 return -ENETDOWN;
1126 }
1127
1128
1129 /*=======================================================================
1130 Receiver routines
1131 =======================================================================*/
1132
1133 int netdev_max_backlog = 300;
1134 int weight_p = 64; /* old backlog weight */
1135 /* These numbers are selected based on intuition and some
1136 * experimentatiom, if you have more scientific way of doing this
1137 * please go ahead and fix things.
1138 */
1139 int no_cong_thresh = 10;
1140 int no_cong = 20;
1141 int lo_cong = 100;
1142 int mod_cong = 290;
1143
1144 struct netif_rx_stats netdev_rx_stat[NR_CPUS];
1145
1146
1147 #ifdef CONFIG_NET_HW_FLOWCONTROL
1148 atomic_t netdev_dropping = ATOMIC_INIT(0);
1149 static unsigned long netdev_fc_mask = 1;
1150 unsigned long netdev_fc_xoff = 0;
1151 spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED;
1152
1153 static struct
1154 {
1155 void (*stimul)(struct net_device *);
1156 struct net_device *dev;
1157 } netdev_fc_slots[BITS_PER_LONG];
1158
netdev_register_fc(struct net_device * dev,void (* stimul)(struct net_device * dev))1159 int netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev))
1160 {
1161 int bit = 0;
1162 unsigned long flags;
1163
1164 spin_lock_irqsave(&netdev_fc_lock, flags);
1165 if (netdev_fc_mask != ~0UL) {
1166 bit = ffz(netdev_fc_mask);
1167 netdev_fc_slots[bit].stimul = stimul;
1168 netdev_fc_slots[bit].dev = dev;
1169 set_bit(bit, &netdev_fc_mask);
1170 clear_bit(bit, &netdev_fc_xoff);
1171 }
1172 spin_unlock_irqrestore(&netdev_fc_lock, flags);
1173 return bit;
1174 }
1175
netdev_unregister_fc(int bit)1176 void netdev_unregister_fc(int bit)
1177 {
1178 unsigned long flags;
1179
1180 spin_lock_irqsave(&netdev_fc_lock, flags);
1181 if (bit > 0) {
1182 netdev_fc_slots[bit].stimul = NULL;
1183 netdev_fc_slots[bit].dev = NULL;
1184 clear_bit(bit, &netdev_fc_mask);
1185 clear_bit(bit, &netdev_fc_xoff);
1186 }
1187 spin_unlock_irqrestore(&netdev_fc_lock, flags);
1188 }
1189
netdev_wakeup(void)1190 static void netdev_wakeup(void)
1191 {
1192 unsigned long xoff;
1193
1194 spin_lock(&netdev_fc_lock);
1195 xoff = netdev_fc_xoff;
1196 netdev_fc_xoff = 0;
1197 while (xoff) {
1198 int i = ffz(~xoff);
1199 xoff &= ~(1<<i);
1200 netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev);
1201 }
1202 spin_unlock(&netdev_fc_lock);
1203 }
1204 #endif
1205
get_sample_stats(int cpu)1206 static void get_sample_stats(int cpu)
1207 {
1208 #ifdef RAND_LIE
1209 unsigned long rd;
1210 int rq;
1211 #endif
1212 int blog = softnet_data[cpu].input_pkt_queue.qlen;
1213 int avg_blog = softnet_data[cpu].avg_blog;
1214
1215 avg_blog = (avg_blog >> 1)+ (blog >> 1);
1216
1217 if (avg_blog > mod_cong) {
1218 /* Above moderate congestion levels. */
1219 softnet_data[cpu].cng_level = NET_RX_CN_HIGH;
1220 #ifdef RAND_LIE
1221 rd = net_random();
1222 rq = rd % netdev_max_backlog;
1223 if (rq < avg_blog) /* unlucky bastard */
1224 softnet_data[cpu].cng_level = NET_RX_DROP;
1225 #endif
1226 } else if (avg_blog > lo_cong) {
1227 softnet_data[cpu].cng_level = NET_RX_CN_MOD;
1228 #ifdef RAND_LIE
1229 rd = net_random();
1230 rq = rd % netdev_max_backlog;
1231 if (rq < avg_blog) /* unlucky bastard */
1232 softnet_data[cpu].cng_level = NET_RX_CN_HIGH;
1233 #endif
1234 } else if (avg_blog > no_cong)
1235 softnet_data[cpu].cng_level = NET_RX_CN_LOW;
1236 else /* no congestion */
1237 softnet_data[cpu].cng_level = NET_RX_SUCCESS;
1238
1239 softnet_data[cpu].avg_blog = avg_blog;
1240 }
1241
1242 #ifdef OFFLINE_SAMPLE
sample_queue(unsigned long dummy)1243 static void sample_queue(unsigned long dummy)
1244 {
1245 /* 10 ms 0r 1ms -- i dont care -- JHS */
1246 int next_tick = 1;
1247 int cpu = smp_processor_id();
1248
1249 get_sample_stats(cpu);
1250 next_tick += jiffies;
1251 mod_timer(&samp_timer, next_tick);
1252 }
1253 #endif
1254
1255
1256 /**
1257 * netif_rx - post buffer to the network code
1258 * @skb: buffer to post
1259 *
1260 * This function receives a packet from a device driver and queues it for
1261 * the upper (protocol) levels to process. It always succeeds. The buffer
1262 * may be dropped during processing for congestion control or by the
1263 * protocol layers.
1264 *
1265 * return values:
1266 * NET_RX_SUCCESS (no congestion)
1267 * NET_RX_CN_LOW (low congestion)
1268 * NET_RX_CN_MOD (moderate congestion)
1269 * NET_RX_CN_HIGH (high congestion)
1270 * NET_RX_DROP (packet was dropped)
1271 *
1272 *
1273 */
1274
netif_rx(struct sk_buff * skb)1275 int netif_rx(struct sk_buff *skb)
1276 {
1277 int this_cpu = smp_processor_id();
1278 struct softnet_data *queue;
1279 unsigned long flags;
1280
1281 if (skb->stamp.tv_sec == 0)
1282 do_gettimeofday(&skb->stamp);
1283
1284 /* The code is rearranged so that the path is the most
1285 short when CPU is congested, but is still operating.
1286 */
1287 queue = &softnet_data[this_cpu];
1288
1289 local_irq_save(flags);
1290
1291 netdev_rx_stat[this_cpu].total++;
1292 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1293 if (queue->input_pkt_queue.qlen) {
1294 if (queue->throttle)
1295 goto drop;
1296
1297 enqueue:
1298 dev_hold(skb->dev);
1299 __skb_queue_tail(&queue->input_pkt_queue,skb);
1300 local_irq_restore(flags);
1301 #ifndef OFFLINE_SAMPLE
1302 get_sample_stats(this_cpu);
1303 #endif
1304 return queue->cng_level;
1305 }
1306
1307 if (queue->throttle) {
1308 queue->throttle = 0;
1309 #ifdef CONFIG_NET_HW_FLOWCONTROL
1310 if (atomic_dec_and_test(&netdev_dropping))
1311 netdev_wakeup();
1312 #endif
1313 }
1314
1315 netif_rx_schedule(&queue->blog_dev);
1316 goto enqueue;
1317 }
1318
1319 if (queue->throttle == 0) {
1320 queue->throttle = 1;
1321 netdev_rx_stat[this_cpu].throttled++;
1322 #ifdef CONFIG_NET_HW_FLOWCONTROL
1323 atomic_inc(&netdev_dropping);
1324 #endif
1325 }
1326
1327 drop:
1328 netdev_rx_stat[this_cpu].dropped++;
1329 local_irq_restore(flags);
1330
1331 kfree_skb(skb);
1332 return NET_RX_DROP;
1333 }
1334
1335 /* Deliver skb to an old protocol, which is not threaded well
1336 or which do not understand shared skbs.
1337 */
deliver_to_old_ones(struct packet_type * pt,struct sk_buff * skb,int last)1338 static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last)
1339 {
1340 static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED;
1341 int ret = NET_RX_DROP;
1342
1343
1344 if (!last) {
1345 skb = skb_clone(skb, GFP_ATOMIC);
1346 if (skb == NULL)
1347 return ret;
1348 }
1349 if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) {
1350 kfree_skb(skb);
1351 return ret;
1352 }
1353
1354 /* The assumption (correct one) is that old protocols
1355 did not depened on BHs different of NET_BH and TIMER_BH.
1356 */
1357
1358 /* Emulate NET_BH with special spinlock */
1359 spin_lock(&net_bh_lock);
1360
1361 /* Disable timers and wait for all timers completion */
1362 tasklet_disable(bh_task_vec+TIMER_BH);
1363
1364 ret = pt->func(skb, skb->dev, pt);
1365
1366 tasklet_hi_enable(bh_task_vec+TIMER_BH);
1367 spin_unlock(&net_bh_lock);
1368 return ret;
1369 }
1370
skb_bond(struct sk_buff * skb)1371 static __inline__ void skb_bond(struct sk_buff *skb)
1372 {
1373 struct net_device *dev = skb->dev;
1374
1375 if (dev->master) {
1376 skb->real_dev = skb->dev;
1377 skb->dev = dev->master;
1378 }
1379 }
1380
net_tx_action(struct softirq_action * h)1381 static void net_tx_action(struct softirq_action *h)
1382 {
1383 int cpu = smp_processor_id();
1384
1385 if (softnet_data[cpu].completion_queue) {
1386 struct sk_buff *clist;
1387
1388 local_irq_disable();
1389 clist = softnet_data[cpu].completion_queue;
1390 softnet_data[cpu].completion_queue = NULL;
1391 local_irq_enable();
1392
1393 while (clist != NULL) {
1394 struct sk_buff *skb = clist;
1395 clist = clist->next;
1396
1397 BUG_TRAP(atomic_read(&skb->users) == 0);
1398 __kfree_skb(skb);
1399 }
1400 }
1401
1402 if (softnet_data[cpu].output_queue) {
1403 struct net_device *head;
1404
1405 local_irq_disable();
1406 head = softnet_data[cpu].output_queue;
1407 softnet_data[cpu].output_queue = NULL;
1408 local_irq_enable();
1409
1410 while (head != NULL) {
1411 struct net_device *dev = head;
1412 head = head->next_sched;
1413
1414 smp_mb__before_clear_bit();
1415 clear_bit(__LINK_STATE_SCHED, &dev->state);
1416
1417 if (spin_trylock(&dev->queue_lock)) {
1418 qdisc_run(dev);
1419 spin_unlock(&dev->queue_lock);
1420 } else {
1421 netif_schedule(dev);
1422 }
1423 }
1424 }
1425 }
1426
1427
1428 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
1429 void (*br_handle_frame_hook)(struct sk_buff *skb) = NULL;
1430 #endif
1431
handle_bridge(struct sk_buff * skb,struct packet_type * pt_prev)1432 static __inline__ int handle_bridge(struct sk_buff *skb,
1433 struct packet_type *pt_prev)
1434 {
1435 int ret = NET_RX_DROP;
1436
1437 if (pt_prev) {
1438 if (!pt_prev->data)
1439 ret = deliver_to_old_ones(pt_prev, skb, 0);
1440 else {
1441 atomic_inc(&skb->users);
1442 ret = pt_prev->func(skb, skb->dev, pt_prev);
1443 }
1444 }
1445
1446 br_handle_frame_hook(skb);
1447 return ret;
1448 }
1449
1450
1451 #ifdef CONFIG_NET_DIVERT
handle_diverter(struct sk_buff * skb)1452 static inline int handle_diverter(struct sk_buff *skb)
1453 {
1454 /* if diversion is supported on device, then divert */
1455 if (skb->dev->divert && skb->dev->divert->divert)
1456 divert_frame(skb);
1457 return 0;
1458 }
1459 #endif /* CONFIG_NET_DIVERT */
1460
netif_receive_skb(struct sk_buff * skb)1461 int netif_receive_skb(struct sk_buff *skb)
1462 {
1463 struct packet_type *ptype, *pt_prev;
1464 int ret = NET_RX_DROP;
1465 unsigned short type;
1466
1467 if (skb->stamp.tv_sec == 0)
1468 do_gettimeofday(&skb->stamp);
1469
1470 skb_bond(skb);
1471
1472 netdev_rx_stat[smp_processor_id()].total++;
1473
1474 #ifdef CONFIG_NET_FASTROUTE
1475 if (skb->pkt_type == PACKET_FASTROUTE) {
1476 netdev_rx_stat[smp_processor_id()].fastroute_deferred_out++;
1477 return dev_queue_xmit(skb);
1478 }
1479 #endif
1480
1481 skb->h.raw = skb->nh.raw = skb->data;
1482
1483 pt_prev = NULL;
1484 for (ptype = ptype_all; ptype; ptype = ptype->next) {
1485 if (!ptype->dev || ptype->dev == skb->dev) {
1486 if (pt_prev) {
1487 if (!pt_prev->data) {
1488 ret = deliver_to_old_ones(pt_prev, skb, 0);
1489 } else {
1490 atomic_inc(&skb->users);
1491 ret = pt_prev->func(skb, skb->dev, pt_prev);
1492 }
1493 }
1494 pt_prev = ptype;
1495 }
1496 }
1497
1498 #ifdef CONFIG_NET_DIVERT
1499 if (skb->dev->divert && skb->dev->divert->divert)
1500 ret = handle_diverter(skb);
1501 #endif /* CONFIG_NET_DIVERT */
1502
1503 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
1504 if (skb->dev->br_port != NULL && br_handle_frame_hook != NULL &&
1505 skb->pkt_type != PACKET_LOOPBACK) {
1506 return handle_bridge(skb, pt_prev);
1507 }
1508 #endif
1509
1510 type = skb->protocol;
1511 for (ptype=ptype_base[ntohs(type)&15];ptype;ptype=ptype->next) {
1512 if (ptype->type == type &&
1513 (!ptype->dev || ptype->dev == skb->dev)) {
1514 if (pt_prev) {
1515 if (!pt_prev->data) {
1516 ret = deliver_to_old_ones(pt_prev, skb, 0);
1517 } else {
1518 atomic_inc(&skb->users);
1519 ret = pt_prev->func(skb, skb->dev, pt_prev);
1520 }
1521 }
1522 pt_prev = ptype;
1523 }
1524 }
1525
1526 if (pt_prev) {
1527 if (!pt_prev->data) {
1528 ret = deliver_to_old_ones(pt_prev, skb, 1);
1529 } else {
1530 ret = pt_prev->func(skb, skb->dev, pt_prev);
1531 }
1532 } else {
1533 kfree_skb(skb);
1534 /* Jamal, now you will not able to escape explaining
1535 * me how you were going to use this. :-)
1536 */
1537 ret = NET_RX_DROP;
1538 }
1539
1540 return ret;
1541 }
1542
process_backlog(struct net_device * backlog_dev,int * budget)1543 static int process_backlog(struct net_device *backlog_dev, int *budget)
1544 {
1545 int work = 0;
1546 int quota = min(backlog_dev->quota, *budget);
1547 int this_cpu = smp_processor_id();
1548 struct softnet_data *queue = &softnet_data[this_cpu];
1549 unsigned long start_time = jiffies;
1550
1551 for (;;) {
1552 struct sk_buff *skb;
1553 struct net_device *dev;
1554
1555 local_irq_disable();
1556 skb = __skb_dequeue(&queue->input_pkt_queue);
1557 if (skb == NULL)
1558 goto job_done;
1559 local_irq_enable();
1560
1561 dev = skb->dev;
1562
1563 netif_receive_skb(skb);
1564
1565 dev_put(dev);
1566
1567 work++;
1568
1569 if (work >= quota || jiffies - start_time > 1)
1570 break;
1571
1572 #ifdef CONFIG_NET_HW_FLOWCONTROL
1573 if (queue->throttle && queue->input_pkt_queue.qlen < no_cong_thresh ) {
1574 queue->throttle = 0;
1575 if (atomic_dec_and_test(&netdev_dropping)) {
1576 netdev_wakeup();
1577 break;
1578 }
1579 }
1580 #endif
1581 }
1582
1583 backlog_dev->quota -= work;
1584 *budget -= work;
1585 return -1;
1586
1587 job_done:
1588 backlog_dev->quota -= work;
1589 *budget -= work;
1590
1591 list_del(&backlog_dev->poll_list);
1592 smp_mb__before_clear_bit();
1593 netif_poll_enable(backlog_dev);
1594
1595 if (queue->throttle) {
1596 queue->throttle = 0;
1597 #ifdef CONFIG_NET_HW_FLOWCONTROL
1598 if (atomic_dec_and_test(&netdev_dropping))
1599 netdev_wakeup();
1600 #endif
1601 }
1602 local_irq_enable();
1603 return 0;
1604 }
1605
net_rx_action(struct softirq_action * h)1606 static void net_rx_action(struct softirq_action *h)
1607 {
1608 int this_cpu = smp_processor_id();
1609 struct softnet_data *queue = &softnet_data[this_cpu];
1610 unsigned long start_time = jiffies;
1611 int budget = netdev_max_backlog;
1612
1613 br_read_lock(BR_NETPROTO_LOCK);
1614 local_irq_disable();
1615
1616 while (!list_empty(&queue->poll_list)) {
1617 struct net_device *dev;
1618
1619 if (budget <= 0 || jiffies - start_time > 1)
1620 goto softnet_break;
1621
1622 local_irq_enable();
1623
1624 dev = list_entry(queue->poll_list.next, struct net_device, poll_list);
1625
1626 if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1627 local_irq_disable();
1628 list_del(&dev->poll_list);
1629 list_add_tail(&dev->poll_list, &queue->poll_list);
1630 if (dev->quota < 0)
1631 dev->quota += dev->weight;
1632 else
1633 dev->quota = dev->weight;
1634 } else {
1635 dev_put(dev);
1636 local_irq_disable();
1637 }
1638 }
1639
1640 local_irq_enable();
1641 br_read_unlock(BR_NETPROTO_LOCK);
1642 return;
1643
1644 softnet_break:
1645 netdev_rx_stat[this_cpu].time_squeeze++;
1646 __cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
1647
1648 local_irq_enable();
1649 br_read_unlock(BR_NETPROTO_LOCK);
1650 }
1651
1652 static gifconf_func_t * gifconf_list [NPROTO];
1653
1654 /**
1655 * register_gifconf - register a SIOCGIF handler
1656 * @family: Address family
1657 * @gifconf: Function handler
1658 *
1659 * Register protocol dependent address dumping routines. The handler
1660 * that is passed must not be freed or reused until it has been replaced
1661 * by another handler.
1662 */
1663
register_gifconf(unsigned int family,gifconf_func_t * gifconf)1664 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1665 {
1666 if (family>=NPROTO)
1667 return -EINVAL;
1668 gifconf_list[family] = gifconf;
1669 return 0;
1670 }
1671
1672
1673 /*
1674 * Map an interface index to its name (SIOCGIFNAME)
1675 */
1676
1677 /*
1678 * We need this ioctl for efficient implementation of the
1679 * if_indextoname() function required by the IPv6 API. Without
1680 * it, we would have to search all the interfaces to find a
1681 * match. --pb
1682 */
1683
dev_ifname(struct ifreq * arg)1684 static int dev_ifname(struct ifreq *arg)
1685 {
1686 struct net_device *dev;
1687 struct ifreq ifr;
1688
1689 /*
1690 * Fetch the caller's info block.
1691 */
1692
1693 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1694 return -EFAULT;
1695
1696 read_lock(&dev_base_lock);
1697 dev = __dev_get_by_index(ifr.ifr_ifindex);
1698 if (!dev) {
1699 read_unlock(&dev_base_lock);
1700 return -ENODEV;
1701 }
1702
1703 strcpy(ifr.ifr_name, dev->name);
1704 read_unlock(&dev_base_lock);
1705
1706 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
1707 return -EFAULT;
1708 return 0;
1709 }
1710
1711 /*
1712 * Perform a SIOCGIFCONF call. This structure will change
1713 * size eventually, and there is nothing I can do about it.
1714 * Thus we will need a 'compatibility mode'.
1715 */
1716
dev_ifconf(char * arg)1717 static int dev_ifconf(char *arg)
1718 {
1719 struct ifconf ifc;
1720 struct net_device *dev;
1721 char *pos;
1722 int len;
1723 int total;
1724 int i;
1725
1726 /*
1727 * Fetch the caller's info block.
1728 */
1729
1730 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
1731 return -EFAULT;
1732
1733 pos = ifc.ifc_buf;
1734 len = ifc.ifc_len;
1735
1736 /*
1737 * Loop over the interfaces, and write an info block for each.
1738 */
1739
1740 total = 0;
1741 for (dev = dev_base; dev != NULL; dev = dev->next) {
1742 for (i=0; i<NPROTO; i++) {
1743 if (gifconf_list[i]) {
1744 int done;
1745 if (pos==NULL) {
1746 done = gifconf_list[i](dev, NULL, 0);
1747 } else {
1748 done = gifconf_list[i](dev, pos+total, len-total);
1749 }
1750 if (done<0) {
1751 return -EFAULT;
1752 }
1753 total += done;
1754 }
1755 }
1756 }
1757
1758 /*
1759 * All done. Write the updated control block back to the caller.
1760 */
1761 ifc.ifc_len = total;
1762
1763 if (copy_to_user(arg, &ifc, sizeof(struct ifconf)))
1764 return -EFAULT;
1765
1766 /*
1767 * Both BSD and Solaris return 0 here, so we do too.
1768 */
1769 return 0;
1770 }
1771
1772 /*
1773 * This is invoked by the /proc filesystem handler to display a device
1774 * in detail.
1775 */
1776
1777 #ifdef CONFIG_PROC_FS
1778
sprintf_stats(char * buffer,struct net_device * dev)1779 static int sprintf_stats(char *buffer, struct net_device *dev)
1780 {
1781 struct net_device_stats *stats = (dev->get_stats ? dev->get_stats(dev): NULL);
1782 int size;
1783
1784 if (stats)
1785 size = sprintf(buffer, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu %8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
1786 dev->name,
1787 stats->rx_bytes,
1788 stats->rx_packets, stats->rx_errors,
1789 stats->rx_dropped + stats->rx_missed_errors,
1790 stats->rx_fifo_errors,
1791 stats->rx_length_errors + stats->rx_over_errors
1792 + stats->rx_crc_errors + stats->rx_frame_errors,
1793 stats->rx_compressed, stats->multicast,
1794 stats->tx_bytes,
1795 stats->tx_packets, stats->tx_errors, stats->tx_dropped,
1796 stats->tx_fifo_errors, stats->collisions,
1797 stats->tx_carrier_errors + stats->tx_aborted_errors
1798 + stats->tx_window_errors + stats->tx_heartbeat_errors,
1799 stats->tx_compressed);
1800 else
1801 size = sprintf(buffer, "%6s: No statistics available.\n", dev->name);
1802
1803 return size;
1804 }
1805
1806 /*
1807 * Called from the PROCfs module. This now uses the new arbitrary sized /proc/net interface
1808 * to create /proc/net/dev
1809 */
1810
dev_get_info(char * buffer,char ** start,off_t offset,int length)1811 static int dev_get_info(char *buffer, char **start, off_t offset, int length)
1812 {
1813 int len = 0;
1814 off_t begin = 0;
1815 off_t pos = 0;
1816 int size;
1817 struct net_device *dev;
1818
1819
1820 size = sprintf(buffer,
1821 "Inter-| Receive | Transmit\n"
1822 " face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n");
1823
1824 pos += size;
1825 len += size;
1826
1827
1828 read_lock(&dev_base_lock);
1829 for (dev = dev_base; dev != NULL; dev = dev->next) {
1830 size = sprintf_stats(buffer+len, dev);
1831 len += size;
1832 pos = begin + len;
1833
1834 if (pos < offset) {
1835 len = 0;
1836 begin = pos;
1837 }
1838 if (pos > offset + length)
1839 break;
1840 }
1841 read_unlock(&dev_base_lock);
1842
1843 *start = buffer + (offset - begin); /* Start of wanted data */
1844 len -= (offset - begin); /* Start slop */
1845 if (len > length)
1846 len = length; /* Ending slop */
1847 if (len < 0)
1848 len = 0;
1849 return len;
1850 }
1851
dev_proc_stats(char * buffer,char ** start,off_t offset,int length,int * eof,void * data)1852 static int dev_proc_stats(char *buffer, char **start, off_t offset,
1853 int length, int *eof, void *data)
1854 {
1855 int i, lcpu;
1856 int len=0;
1857
1858 for (lcpu=0; lcpu<smp_num_cpus; lcpu++) {
1859 i = cpu_logical_map(lcpu);
1860 len += sprintf(buffer+len, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
1861 netdev_rx_stat[i].total,
1862 netdev_rx_stat[i].dropped,
1863 netdev_rx_stat[i].time_squeeze,
1864 netdev_rx_stat[i].throttled,
1865 netdev_rx_stat[i].fastroute_hit,
1866 netdev_rx_stat[i].fastroute_success,
1867 netdev_rx_stat[i].fastroute_defer,
1868 netdev_rx_stat[i].fastroute_deferred_out,
1869 #if 0
1870 netdev_rx_stat[i].fastroute_latency_reduction
1871 #else
1872 netdev_rx_stat[i].cpu_collision
1873 #endif
1874 );
1875 }
1876
1877 len -= offset;
1878
1879 if (len > length)
1880 len = length;
1881 if (len < 0)
1882 len = 0;
1883
1884 *start = buffer + offset;
1885 *eof = 1;
1886
1887 return len;
1888 }
1889
1890 #endif /* CONFIG_PROC_FS */
1891
1892
1893 /**
1894 * netdev_set_master - set up master/slave pair
1895 * @slave: slave device
1896 * @master: new master device
1897 *
1898 * Changes the master device of the slave. Pass %NULL to break the
1899 * bonding. The caller must hold the RTNL semaphore. On a failure
1900 * a negative errno code is returned. On success the reference counts
1901 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
1902 * function returns zero.
1903 */
1904
netdev_set_master(struct net_device * slave,struct net_device * master)1905 int netdev_set_master(struct net_device *slave, struct net_device *master)
1906 {
1907 struct net_device *old = slave->master;
1908
1909 ASSERT_RTNL();
1910
1911 if (master) {
1912 if (old)
1913 return -EBUSY;
1914 dev_hold(master);
1915 }
1916
1917 br_write_lock_bh(BR_NETPROTO_LOCK);
1918 slave->master = master;
1919 br_write_unlock_bh(BR_NETPROTO_LOCK);
1920
1921 if (old)
1922 dev_put(old);
1923
1924 if (master)
1925 slave->flags |= IFF_SLAVE;
1926 else
1927 slave->flags &= ~IFF_SLAVE;
1928
1929 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
1930 return 0;
1931 }
1932
1933 /**
1934 * dev_set_promiscuity - update promiscuity count on a device
1935 * @dev: device
1936 * @inc: modifier
1937 *
1938 * Add or remove promsicuity from a device. While the count in the device
1939 * remains above zero the interface remains promiscuous. Once it hits zero
1940 * the device reverts back to normal filtering operation. A negative inc
1941 * value is used to drop promiscuity on the device.
1942 */
1943
dev_set_promiscuity(struct net_device * dev,int inc)1944 void dev_set_promiscuity(struct net_device *dev, int inc)
1945 {
1946 unsigned short old_flags = dev->flags;
1947
1948 dev->flags |= IFF_PROMISC;
1949 if ((dev->promiscuity += inc) == 0)
1950 dev->flags &= ~IFF_PROMISC;
1951 if (dev->flags^old_flags) {
1952 #ifdef CONFIG_NET_FASTROUTE
1953 if (dev->flags&IFF_PROMISC) {
1954 netdev_fastroute_obstacles++;
1955 dev_clear_fastroute(dev);
1956 } else
1957 netdev_fastroute_obstacles--;
1958 #endif
1959 dev_mc_upload(dev);
1960 printk(KERN_INFO "device %s %s promiscuous mode\n",
1961 dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "left");
1962 }
1963 }
1964
1965 /**
1966 * dev_set_allmulti - update allmulti count on a device
1967 * @dev: device
1968 * @inc: modifier
1969 *
1970 * Add or remove reception of all multicast frames to a device. While the
1971 * count in the device remains above zero the interface remains listening
1972 * to all interfaces. Once it hits zero the device reverts back to normal
1973 * filtering operation. A negative @inc value is used to drop the counter
1974 * when releasing a resource needing all multicasts.
1975 */
1976
dev_set_allmulti(struct net_device * dev,int inc)1977 void dev_set_allmulti(struct net_device *dev, int inc)
1978 {
1979 unsigned short old_flags = dev->flags;
1980
1981 dev->flags |= IFF_ALLMULTI;
1982 if ((dev->allmulti += inc) == 0)
1983 dev->flags &= ~IFF_ALLMULTI;
1984 if (dev->flags^old_flags)
1985 dev_mc_upload(dev);
1986 }
1987
dev_change_flags(struct net_device * dev,unsigned flags)1988 int dev_change_flags(struct net_device *dev, unsigned flags)
1989 {
1990 int ret;
1991 int old_flags = dev->flags;
1992
1993 /*
1994 * Set the flags on our device.
1995 */
1996
1997 dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP|IFF_DYNAMIC|
1998 IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) |
1999 (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI));
2000
2001 /*
2002 * Load in the correct multicast list now the flags have changed.
2003 */
2004
2005 dev_mc_upload(dev);
2006
2007 /*
2008 * Have we downed the interface. We handle IFF_UP ourselves
2009 * according to user attempts to set it, rather than blindly
2010 * setting it.
2011 */
2012
2013 ret = 0;
2014 if ((old_flags^flags)&IFF_UP) /* Bit is different ? */
2015 {
2016 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2017
2018 if (ret == 0)
2019 dev_mc_upload(dev);
2020 }
2021
2022 if (dev->flags&IFF_UP &&
2023 ((old_flags^dev->flags)&~(IFF_UP|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE)))
2024 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
2025
2026 if ((flags^dev->gflags)&IFF_PROMISC) {
2027 int inc = (flags&IFF_PROMISC) ? +1 : -1;
2028 dev->gflags ^= IFF_PROMISC;
2029 dev_set_promiscuity(dev, inc);
2030 }
2031
2032 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2033 is important. Some (broken) drivers set IFF_PROMISC, when
2034 IFF_ALLMULTI is requested not asking us and not reporting.
2035 */
2036 if ((flags^dev->gflags)&IFF_ALLMULTI) {
2037 int inc = (flags&IFF_ALLMULTI) ? +1 : -1;
2038 dev->gflags ^= IFF_ALLMULTI;
2039 dev_set_allmulti(dev, inc);
2040 }
2041
2042 if (old_flags^dev->flags)
2043 rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags^dev->flags);
2044
2045 return ret;
2046 }
2047
2048 /*
2049 * Perform the SIOCxIFxxx calls.
2050 */
2051
dev_ifsioc(struct ifreq * ifr,unsigned int cmd)2052 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2053 {
2054 struct net_device *dev;
2055 int err;
2056
2057 if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL)
2058 return -ENODEV;
2059
2060 switch(cmd)
2061 {
2062 case SIOCGIFFLAGS: /* Get interface flags */
2063 ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI|IFF_RUNNING))
2064 |(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI));
2065 if (netif_running(dev) && netif_carrier_ok(dev))
2066 ifr->ifr_flags |= IFF_RUNNING;
2067 return 0;
2068
2069 case SIOCSIFFLAGS: /* Set interface flags */
2070 return dev_change_flags(dev, ifr->ifr_flags);
2071
2072 case SIOCGIFMETRIC: /* Get the metric on the interface (currently unused) */
2073 ifr->ifr_metric = 0;
2074 return 0;
2075
2076 case SIOCSIFMETRIC: /* Set the metric on the interface (currently unused) */
2077 return -EOPNOTSUPP;
2078
2079 case SIOCGIFMTU: /* Get the MTU of a device */
2080 ifr->ifr_mtu = dev->mtu;
2081 return 0;
2082
2083 case SIOCSIFMTU: /* Set the MTU of a device */
2084 if (ifr->ifr_mtu == dev->mtu)
2085 return 0;
2086
2087 /*
2088 * MTU must be positive.
2089 */
2090
2091 if (ifr->ifr_mtu<0)
2092 return -EINVAL;
2093
2094 if (!netif_device_present(dev))
2095 return -ENODEV;
2096
2097 if (dev->change_mtu)
2098 err = dev->change_mtu(dev, ifr->ifr_mtu);
2099 else {
2100 dev->mtu = ifr->ifr_mtu;
2101 err = 0;
2102 }
2103 if (!err && dev->flags&IFF_UP)
2104 notifier_call_chain(&netdev_chain, NETDEV_CHANGEMTU, dev);
2105 return err;
2106
2107 case SIOCGIFHWADDR:
2108 memcpy(ifr->ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN);
2109 ifr->ifr_hwaddr.sa_family=dev->type;
2110 return 0;
2111
2112 case SIOCSIFHWADDR:
2113 if (dev->set_mac_address == NULL)
2114 return -EOPNOTSUPP;
2115 if (ifr->ifr_hwaddr.sa_family!=dev->type)
2116 return -EINVAL;
2117 if (!netif_device_present(dev))
2118 return -ENODEV;
2119 err = dev->set_mac_address(dev, &ifr->ifr_hwaddr);
2120 if (!err)
2121 notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
2122 return err;
2123
2124 case SIOCSIFHWBROADCAST:
2125 if (ifr->ifr_hwaddr.sa_family!=dev->type)
2126 return -EINVAL;
2127 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, MAX_ADDR_LEN);
2128 notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
2129 return 0;
2130
2131 case SIOCGIFMAP:
2132 ifr->ifr_map.mem_start=dev->mem_start;
2133 ifr->ifr_map.mem_end=dev->mem_end;
2134 ifr->ifr_map.base_addr=dev->base_addr;
2135 ifr->ifr_map.irq=dev->irq;
2136 ifr->ifr_map.dma=dev->dma;
2137 ifr->ifr_map.port=dev->if_port;
2138 return 0;
2139
2140 case SIOCSIFMAP:
2141 if (dev->set_config) {
2142 if (!netif_device_present(dev))
2143 return -ENODEV;
2144 return dev->set_config(dev,&ifr->ifr_map);
2145 }
2146 return -EOPNOTSUPP;
2147
2148 case SIOCADDMULTI:
2149 if (dev->set_multicast_list == NULL ||
2150 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2151 return -EINVAL;
2152 if (!netif_device_present(dev))
2153 return -ENODEV;
2154 dev_mc_add(dev,ifr->ifr_hwaddr.sa_data, dev->addr_len, 1);
2155 return 0;
2156
2157 case SIOCDELMULTI:
2158 if (dev->set_multicast_list == NULL ||
2159 ifr->ifr_hwaddr.sa_family!=AF_UNSPEC)
2160 return -EINVAL;
2161 if (!netif_device_present(dev))
2162 return -ENODEV;
2163 dev_mc_delete(dev,ifr->ifr_hwaddr.sa_data,dev->addr_len, 1);
2164 return 0;
2165
2166 case SIOCGIFINDEX:
2167 ifr->ifr_ifindex = dev->ifindex;
2168 return 0;
2169
2170 case SIOCGIFTXQLEN:
2171 ifr->ifr_qlen = dev->tx_queue_len;
2172 return 0;
2173
2174 case SIOCSIFTXQLEN:
2175 if (ifr->ifr_qlen<0)
2176 return -EINVAL;
2177 dev->tx_queue_len = ifr->ifr_qlen;
2178 return 0;
2179
2180 case SIOCSIFNAME:
2181 if (dev->flags&IFF_UP)
2182 return -EBUSY;
2183 /* Check if name contains a wildcard */
2184 if (strchr(ifr->ifr_newname, '%')) {
2185 char format[IFNAMSIZ + 1];
2186 int ret;
2187 memcpy(format, ifr->ifr_newname, IFNAMSIZ);
2188 format[IFNAMSIZ-1] = 0;
2189 /* Find a free name based on format.
2190 * dev_alloc_name() replaces "%d" with at max
2191 * 2 digits, so no name overflow. - Jean II */
2192 ret = dev_alloc_name(dev, format);
2193 if (ret < 0)
2194 return ret;
2195 /* Copy the new name back to caller. */
2196 strncpy(ifr->ifr_newname, dev->name, IFNAMSIZ);
2197 } else {
2198 if (__dev_get_by_name(ifr->ifr_newname))
2199 return -EEXIST;
2200 memcpy(dev->name, ifr->ifr_newname, IFNAMSIZ);
2201 dev->name[IFNAMSIZ-1] = 0;
2202 }
2203 notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
2204 return 0;
2205
2206 /*
2207 * Unknown or private ioctl
2208 */
2209
2210 default:
2211 if ((cmd >= SIOCDEVPRIVATE &&
2212 cmd <= SIOCDEVPRIVATE + 15) ||
2213 cmd == SIOCBONDENSLAVE ||
2214 cmd == SIOCBONDRELEASE ||
2215 cmd == SIOCBONDSETHWADDR ||
2216 cmd == SIOCBONDSLAVEINFOQUERY ||
2217 cmd == SIOCBONDINFOQUERY ||
2218 cmd == SIOCBONDCHANGEACTIVE ||
2219 cmd == SIOCGMIIPHY ||
2220 cmd == SIOCGMIIREG ||
2221 cmd == SIOCSMIIREG ||
2222 cmd == SIOCWANDEV) {
2223 if (dev->do_ioctl) {
2224 if (!netif_device_present(dev))
2225 return -ENODEV;
2226 return dev->do_ioctl(dev, ifr, cmd);
2227 }
2228 return -EOPNOTSUPP;
2229 }
2230
2231 }
2232 return -EINVAL;
2233 }
2234
2235 /*
2236 * This function handles all "interface"-type I/O control requests. The actual
2237 * 'doing' part of this is dev_ifsioc above.
2238 */
2239
2240 /**
2241 * dev_ioctl - network device ioctl
2242 * @cmd: command to issue
2243 * @arg: pointer to a struct ifreq in user space
2244 *
2245 * Issue ioctl functions to devices. This is normally called by the
2246 * user space syscall interfaces but can sometimes be useful for
2247 * other purposes. The return value is the return from the syscall if
2248 * positive or a negative errno code on error.
2249 */
2250
dev_ioctl(unsigned int cmd,void * arg)2251 int dev_ioctl(unsigned int cmd, void *arg)
2252 {
2253 struct ifreq ifr;
2254 int ret;
2255 char *colon;
2256
2257 /* One special case: SIOCGIFCONF takes ifconf argument
2258 and requires shared lock, because it sleeps writing
2259 to user space.
2260 */
2261
2262 if (cmd == SIOCGIFCONF) {
2263 rtnl_shlock();
2264 ret = dev_ifconf((char *) arg);
2265 rtnl_shunlock();
2266 return ret;
2267 }
2268 if (cmd == SIOCGIFNAME) {
2269 return dev_ifname((struct ifreq *)arg);
2270 }
2271
2272 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2273 return -EFAULT;
2274
2275 ifr.ifr_name[IFNAMSIZ-1] = 0;
2276
2277 colon = strchr(ifr.ifr_name, ':');
2278 if (colon)
2279 *colon = 0;
2280
2281 /*
2282 * See which interface the caller is talking about.
2283 */
2284
2285 switch(cmd)
2286 {
2287 /*
2288 * These ioctl calls:
2289 * - can be done by all.
2290 * - atomic and do not require locking.
2291 * - return a value
2292 */
2293
2294 case SIOCGIFFLAGS:
2295 case SIOCGIFMETRIC:
2296 case SIOCGIFMTU:
2297 case SIOCGIFHWADDR:
2298 case SIOCGIFSLAVE:
2299 case SIOCGIFMAP:
2300 case SIOCGIFINDEX:
2301 case SIOCGIFTXQLEN:
2302 dev_load(ifr.ifr_name);
2303 read_lock(&dev_base_lock);
2304 ret = dev_ifsioc(&ifr, cmd);
2305 read_unlock(&dev_base_lock);
2306 if (!ret) {
2307 if (colon)
2308 *colon = ':';
2309 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2310 return -EFAULT;
2311 }
2312 return ret;
2313
2314 case SIOCETHTOOL:
2315 dev_load(ifr.ifr_name);
2316 rtnl_lock();
2317 ret = dev_ethtool(&ifr);
2318 rtnl_unlock();
2319 if (!ret) {
2320 if (colon)
2321 *colon = ':';
2322 if (copy_to_user(arg, &ifr,
2323 sizeof(struct ifreq)))
2324 ret = -EFAULT;
2325 }
2326 return ret;
2327
2328 /*
2329 * These ioctl calls:
2330 * - require superuser power.
2331 * - require strict serialization.
2332 * - return a value
2333 */
2334
2335 case SIOCSIFNAME:
2336 case SIOCGMIIPHY:
2337 case SIOCGMIIREG:
2338 if (!capable(CAP_NET_ADMIN))
2339 return -EPERM;
2340 dev_load(ifr.ifr_name);
2341 dev_probe_lock();
2342 rtnl_lock();
2343 ret = dev_ifsioc(&ifr, cmd);
2344 rtnl_unlock();
2345 dev_probe_unlock();
2346 if (!ret) {
2347 if (colon)
2348 *colon = ':';
2349 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2350 return -EFAULT;
2351 }
2352 return ret;
2353
2354 /*
2355 * These ioctl calls:
2356 * - require superuser power.
2357 * - require strict serialization.
2358 * - do not return a value
2359 */
2360
2361 case SIOCSIFFLAGS:
2362 case SIOCSIFMETRIC:
2363 case SIOCSIFMTU:
2364 case SIOCSIFMAP:
2365 case SIOCSIFHWADDR:
2366 case SIOCSIFSLAVE:
2367 case SIOCADDMULTI:
2368 case SIOCDELMULTI:
2369 case SIOCSIFHWBROADCAST:
2370 case SIOCSIFTXQLEN:
2371 case SIOCSMIIREG:
2372 case SIOCBONDENSLAVE:
2373 case SIOCBONDRELEASE:
2374 case SIOCBONDSETHWADDR:
2375 case SIOCBONDSLAVEINFOQUERY:
2376 case SIOCBONDINFOQUERY:
2377 case SIOCBONDCHANGEACTIVE:
2378 if (!capable(CAP_NET_ADMIN))
2379 return -EPERM;
2380 dev_load(ifr.ifr_name);
2381 dev_probe_lock();
2382 rtnl_lock();
2383 ret = dev_ifsioc(&ifr, cmd);
2384 rtnl_unlock();
2385 dev_probe_unlock();
2386 return ret;
2387
2388 case SIOCGIFMEM:
2389 /* Get the per device memory space. We can add this but currently
2390 do not support it */
2391 case SIOCSIFMEM:
2392 /* Set the per device memory buffer space. Not applicable in our case */
2393 case SIOCSIFLINK:
2394 return -EINVAL;
2395
2396 /*
2397 * Unknown or private ioctl.
2398 */
2399
2400 default:
2401 if (cmd == SIOCWANDEV ||
2402 (cmd >= SIOCDEVPRIVATE &&
2403 cmd <= SIOCDEVPRIVATE + 15)) {
2404 dev_load(ifr.ifr_name);
2405 dev_probe_lock();
2406 rtnl_lock();
2407 ret = dev_ifsioc(&ifr, cmd);
2408 rtnl_unlock();
2409 dev_probe_unlock();
2410 if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2411 return -EFAULT;
2412 return ret;
2413 }
2414 #ifdef WIRELESS_EXT
2415 /* Take care of Wireless Extensions */
2416 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2417 /* If command is `set a parameter', or
2418 * `get the encoding parameters', check if
2419 * the user has the right to do it */
2420 if (IW_IS_SET(cmd) || (cmd == SIOCGIWENCODE)) {
2421 if(!capable(CAP_NET_ADMIN))
2422 return -EPERM;
2423 }
2424 dev_load(ifr.ifr_name);
2425 rtnl_lock();
2426 /* Follow me in net/core/wireless.c */
2427 ret = wireless_process_ioctl(&ifr, cmd);
2428 rtnl_unlock();
2429 if (!ret && IW_IS_GET(cmd) &&
2430 copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2431 return -EFAULT;
2432 return ret;
2433 }
2434 #endif /* WIRELESS_EXT */
2435 return -EINVAL;
2436 }
2437 }
2438
2439
2440 /**
2441 * dev_new_index - allocate an ifindex
2442 *
2443 * Returns a suitable unique value for a new device interface
2444 * number. The caller must hold the rtnl semaphore or the
2445 * dev_base_lock to be sure it remains unique.
2446 */
2447
dev_new_index(void)2448 int dev_new_index(void)
2449 {
2450 static int ifindex;
2451 for (;;) {
2452 if (++ifindex <= 0)
2453 ifindex=1;
2454 if (__dev_get_by_index(ifindex) == NULL)
2455 return ifindex;
2456 }
2457 }
2458
2459 static int dev_boot_phase = 1;
2460
2461 /**
2462 * register_netdevice - register a network device
2463 * @dev: device to register
2464 *
2465 * Take a completed network device structure and add it to the kernel
2466 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2467 * chain. 0 is returned on success. A negative errno code is returned
2468 * on a failure to set up the device, or if the name is a duplicate.
2469 *
2470 * Callers must hold the rtnl semaphore. See the comment at the
2471 * end of Space.c for details about the locking. You may want
2472 * register_netdev() instead of this.
2473 *
2474 * BUGS:
2475 * The locking appears insufficient to guarantee two parallel registers
2476 * will not get the same name.
2477 */
2478
2479 int net_dev_init(void);
2480
register_netdevice(struct net_device * dev)2481 int register_netdevice(struct net_device *dev)
2482 {
2483 struct net_device *d, **dp;
2484 #ifdef CONFIG_NET_DIVERT
2485 int ret;
2486 #endif
2487
2488 spin_lock_init(&dev->queue_lock);
2489 spin_lock_init(&dev->xmit_lock);
2490 dev->xmit_lock_owner = -1;
2491 #ifdef CONFIG_NET_FASTROUTE
2492 dev->fastpath_lock=RW_LOCK_UNLOCKED;
2493 #endif
2494
2495 if (dev_boot_phase)
2496 net_dev_init();
2497
2498 #ifdef CONFIG_NET_DIVERT
2499 ret = alloc_divert_blk(dev);
2500 if (ret)
2501 return ret;
2502 #endif /* CONFIG_NET_DIVERT */
2503
2504 dev->iflink = -1;
2505
2506 /* Init, if this function is available */
2507 if (dev->init && dev->init(dev) != 0) {
2508 #ifdef CONFIG_NET_DIVERT
2509 free_divert_blk(dev);
2510 #endif
2511 return -EIO;
2512 }
2513
2514 dev->ifindex = dev_new_index();
2515 if (dev->iflink == -1)
2516 dev->iflink = dev->ifindex;
2517
2518 /* Check for existence, and append to tail of chain */
2519 for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
2520 if (d == dev || strcmp(d->name, dev->name) == 0) {
2521 #ifdef CONFIG_NET_DIVERT
2522 free_divert_blk(dev);
2523 #endif
2524 return -EEXIST;
2525 }
2526 }
2527
2528 /* Fix illegal SG+CSUM combinations. */
2529 if ((dev->features & NETIF_F_SG) &&
2530 !(dev->features & (NETIF_F_IP_CSUM |
2531 NETIF_F_NO_CSUM |
2532 NETIF_F_HW_CSUM))) {
2533 printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
2534 dev->name);
2535 dev->features &= ~NETIF_F_SG;
2536 }
2537
2538 /*
2539 * nil rebuild_header routine,
2540 * that should be never called and used as just bug trap.
2541 */
2542
2543 if (dev->rebuild_header == NULL)
2544 dev->rebuild_header = default_rebuild_header;
2545
2546 /*
2547 * Default initial state at registry is that the
2548 * device is present.
2549 */
2550
2551 set_bit(__LINK_STATE_PRESENT, &dev->state);
2552
2553 dev->next = NULL;
2554 dev_init_scheduler(dev);
2555 write_lock_bh(&dev_base_lock);
2556 *dp = dev;
2557 dev_hold(dev);
2558 dev->deadbeaf = 0;
2559 write_unlock_bh(&dev_base_lock);
2560
2561 /* Notify protocols, that a new device appeared. */
2562 notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
2563
2564 net_run_sbin_hotplug(dev, "register");
2565
2566 return 0;
2567 }
2568
2569 /**
2570 * netdev_finish_unregister - complete unregistration
2571 * @dev: device
2572 *
2573 * Destroy and free a dead device. A value of zero is returned on
2574 * success.
2575 */
2576
netdev_finish_unregister(struct net_device * dev)2577 int netdev_finish_unregister(struct net_device *dev)
2578 {
2579 BUG_TRAP(dev->ip_ptr==NULL);
2580 BUG_TRAP(dev->ip6_ptr==NULL);
2581 BUG_TRAP(dev->dn_ptr==NULL);
2582
2583 if (!dev->deadbeaf) {
2584 printk(KERN_ERR "Freeing alive device %p, %s\n", dev, dev->name);
2585 return 0;
2586 }
2587 #ifdef NET_REFCNT_DEBUG
2588 printk(KERN_DEBUG "netdev_finish_unregister: %s%s.\n", dev->name,
2589 (dev->features & NETIF_F_DYNALLOC)?"":", old style");
2590 #endif
2591 if (dev->destructor)
2592 dev->destructor(dev);
2593 if (dev->features & NETIF_F_DYNALLOC)
2594 kfree(dev);
2595 return 0;
2596 }
2597
2598 /**
2599 * unregister_netdevice - remove device from the kernel
2600 * @dev: device
2601 *
2602 * This function shuts down a device interface and removes it
2603 * from the kernel tables. On success 0 is returned, on a failure
2604 * a negative errno code is returned.
2605 *
2606 * Callers must hold the rtnl semaphore. See the comment at the
2607 * end of Space.c for details about the locking. You may want
2608 * unregister_netdev() instead of this.
2609 */
2610
unregister_netdevice(struct net_device * dev)2611 int unregister_netdevice(struct net_device *dev)
2612 {
2613 unsigned long now, warning_time;
2614 struct net_device *d, **dp;
2615
2616 /* If device is running, close it first. */
2617 if (dev->flags & IFF_UP)
2618 dev_close(dev);
2619
2620 BUG_TRAP(dev->deadbeaf==0);
2621 dev->deadbeaf = 1;
2622
2623 /* And unlink it from device chain. */
2624 for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) {
2625 if (d == dev) {
2626 write_lock_bh(&dev_base_lock);
2627 *dp = d->next;
2628 write_unlock_bh(&dev_base_lock);
2629 break;
2630 }
2631 }
2632 if (d == NULL) {
2633 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never was registered\n", dev->name, dev);
2634 return -ENODEV;
2635 }
2636
2637 /* Synchronize to net_rx_action. */
2638 br_write_lock_bh(BR_NETPROTO_LOCK);
2639 br_write_unlock_bh(BR_NETPROTO_LOCK);
2640
2641 if (dev_boot_phase == 0) {
2642 #ifdef CONFIG_NET_FASTROUTE
2643 dev_clear_fastroute(dev);
2644 #endif
2645
2646 /* Shutdown queueing discipline. */
2647 dev_shutdown(dev);
2648
2649 net_run_sbin_hotplug(dev, "unregister");
2650
2651 /* Notify protocols, that we are about to destroy
2652 this device. They should clean all the things.
2653 */
2654 notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
2655
2656 /*
2657 * Flush the multicast chain
2658 */
2659 dev_mc_discard(dev);
2660 }
2661
2662 if (dev->uninit)
2663 dev->uninit(dev);
2664
2665 /* Notifier chain MUST detach us from master device. */
2666 BUG_TRAP(dev->master==NULL);
2667
2668 #ifdef CONFIG_NET_DIVERT
2669 free_divert_blk(dev);
2670 #endif
2671
2672 if (dev->features & NETIF_F_DYNALLOC) {
2673 #ifdef NET_REFCNT_DEBUG
2674 if (atomic_read(&dev->refcnt) != 1)
2675 printk(KERN_DEBUG "unregister_netdevice: holding %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt)-1);
2676 #endif
2677 dev_put(dev);
2678 return 0;
2679 }
2680
2681 /* Last reference is our one */
2682 if (atomic_read(&dev->refcnt) == 1) {
2683 dev_put(dev);
2684 return 0;
2685 }
2686
2687 #ifdef NET_REFCNT_DEBUG
2688 printk("unregister_netdevice: waiting %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt));
2689 #endif
2690
2691 /* EXPLANATION. If dev->refcnt is not now 1 (our own reference)
2692 it means that someone in the kernel still has a reference
2693 to this device and we cannot release it.
2694
2695 "New style" devices have destructors, hence we can return from this
2696 function and destructor will do all the work later. As of kernel 2.4.0
2697 there are very few "New Style" devices.
2698
2699 "Old style" devices expect that the device is free of any references
2700 upon exit from this function.
2701 We cannot return from this function until all such references have
2702 fallen away. This is because the caller of this function will probably
2703 immediately kfree(*dev) and then be unloaded via sys_delete_module.
2704
2705 So, we linger until all references fall away. The duration of the
2706 linger is basically unbounded! It is driven by, for example, the
2707 current setting of sysctl_ipfrag_time.
2708
2709 After 1 second, we start to rebroadcast unregister notifications
2710 in hope that careless clients will release the device.
2711
2712 */
2713
2714 now = warning_time = jiffies;
2715 while (atomic_read(&dev->refcnt) != 1) {
2716 if ((jiffies - now) > 1*HZ) {
2717 /* Rebroadcast unregister notification */
2718 notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
2719 }
2720 current->state = TASK_INTERRUPTIBLE;
2721 schedule_timeout(HZ/4);
2722 current->state = TASK_RUNNING;
2723 if ((jiffies - warning_time) > 10*HZ) {
2724 printk(KERN_EMERG "unregister_netdevice: waiting for %s to "
2725 "become free. Usage count = %d\n",
2726 dev->name, atomic_read(&dev->refcnt));
2727 warning_time = jiffies;
2728 }
2729 }
2730 dev_put(dev);
2731 return 0;
2732 }
2733
2734
2735 /*
2736 * Initialize the DEV module. At boot time this walks the device list and
2737 * unhooks any devices that fail to initialise (normally hardware not
2738 * present) and leaves us with a valid list of present and active devices.
2739 *
2740 */
2741
2742 extern void net_device_init(void);
2743 extern void ip_auto_config(void);
2744 struct proc_dir_entry *proc_net_drivers;
2745 #ifdef CONFIG_NET_DIVERT
2746 extern void dv_init(void);
2747 #endif /* CONFIG_NET_DIVERT */
2748
2749
2750 /*
2751 * Callers must hold the rtnl semaphore. See the comment at the
2752 * end of Space.c for details about the locking.
2753 */
net_dev_init(void)2754 int __init net_dev_init(void)
2755 {
2756 struct net_device *dev, **dp;
2757 int i;
2758
2759 if (!dev_boot_phase)
2760 return 0;
2761
2762
2763 #ifdef CONFIG_NET_DIVERT
2764 dv_init();
2765 #endif /* CONFIG_NET_DIVERT */
2766
2767 /*
2768 * Initialise the packet receive queues.
2769 */
2770
2771 for (i = 0; i < NR_CPUS; i++) {
2772 struct softnet_data *queue;
2773
2774 queue = &softnet_data[i];
2775 skb_queue_head_init(&queue->input_pkt_queue);
2776 queue->throttle = 0;
2777 queue->cng_level = 0;
2778 queue->avg_blog = 10; /* arbitrary non-zero */
2779 queue->completion_queue = NULL;
2780 INIT_LIST_HEAD(&queue->poll_list);
2781 set_bit(__LINK_STATE_START, &queue->blog_dev.state);
2782 queue->blog_dev.weight = weight_p;
2783 queue->blog_dev.poll = process_backlog;
2784 atomic_set(&queue->blog_dev.refcnt, 1);
2785 }
2786
2787 #ifdef CONFIG_NET_PROFILE
2788 net_profile_init();
2789 NET_PROFILE_REGISTER(dev_queue_xmit);
2790 NET_PROFILE_REGISTER(softnet_process);
2791 #endif
2792
2793 #ifdef OFFLINE_SAMPLE
2794 samp_timer.expires = jiffies + (10 * HZ);
2795 add_timer(&samp_timer);
2796 #endif
2797
2798 /*
2799 * Add the devices.
2800 * If the call to dev->init fails, the dev is removed
2801 * from the chain disconnecting the device until the
2802 * next reboot.
2803 *
2804 * NB At boot phase networking is dead. No locking is required.
2805 * But we still preserve dev_base_lock for sanity.
2806 */
2807
2808 dp = &dev_base;
2809 while ((dev = *dp) != NULL) {
2810 spin_lock_init(&dev->queue_lock);
2811 spin_lock_init(&dev->xmit_lock);
2812 #ifdef CONFIG_NET_FASTROUTE
2813 dev->fastpath_lock = RW_LOCK_UNLOCKED;
2814 #endif
2815 dev->xmit_lock_owner = -1;
2816 dev->iflink = -1;
2817 dev_hold(dev);
2818
2819 /*
2820 * Allocate name. If the init() fails
2821 * the name will be reissued correctly.
2822 */
2823 if (strchr(dev->name, '%'))
2824 dev_alloc_name(dev, dev->name);
2825
2826 /*
2827 * Check boot time settings for the device.
2828 */
2829 netdev_boot_setup_check(dev);
2830
2831 if (dev->init && dev->init(dev)) {
2832 /*
2833 * It failed to come up. It will be unhooked later.
2834 * dev_alloc_name can now advance to next suitable
2835 * name that is checked next.
2836 */
2837 dev->deadbeaf = 1;
2838 dp = &dev->next;
2839 } else {
2840 dp = &dev->next;
2841 dev->ifindex = dev_new_index();
2842 if (dev->iflink == -1)
2843 dev->iflink = dev->ifindex;
2844 if (dev->rebuild_header == NULL)
2845 dev->rebuild_header = default_rebuild_header;
2846 dev_init_scheduler(dev);
2847 set_bit(__LINK_STATE_PRESENT, &dev->state);
2848 }
2849 }
2850
2851 /*
2852 * Unhook devices that failed to come up
2853 */
2854 dp = &dev_base;
2855 while ((dev = *dp) != NULL) {
2856 if (dev->deadbeaf) {
2857 write_lock_bh(&dev_base_lock);
2858 *dp = dev->next;
2859 write_unlock_bh(&dev_base_lock);
2860 dev_put(dev);
2861 } else {
2862 dp = &dev->next;
2863 }
2864 }
2865
2866 #ifdef CONFIG_PROC_FS
2867 proc_net_create("dev", 0, dev_get_info);
2868 create_proc_read_entry("net/softnet_stat", 0, 0, dev_proc_stats, NULL);
2869 proc_net_drivers = proc_mkdir("net/drivers", 0);
2870 #ifdef WIRELESS_EXT
2871 /* Available in net/core/wireless.c */
2872 proc_net_create("wireless", 0, dev_get_wireless_info);
2873 #endif /* WIRELESS_EXT */
2874 #endif /* CONFIG_PROC_FS */
2875
2876 dev_boot_phase = 0;
2877
2878 open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
2879 open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
2880
2881 dst_init();
2882 dev_mcast_init();
2883
2884 #ifdef CONFIG_NET_SCHED
2885 pktsched_init();
2886 #endif
2887 /*
2888 * Initialise network devices
2889 */
2890
2891 net_device_init();
2892
2893 return 0;
2894 }
2895
2896 #ifdef CONFIG_HOTPLUG
2897
2898 /* Notify userspace when a netdevice event occurs,
2899 * by running '/sbin/hotplug net' with certain
2900 * environment variables set.
2901 */
2902
net_run_sbin_hotplug(struct net_device * dev,char * action)2903 static int net_run_sbin_hotplug(struct net_device *dev, char *action)
2904 {
2905 char *argv[3], *envp[5], ifname[12 + IFNAMSIZ], action_str[32];
2906 int i;
2907
2908 sprintf(ifname, "INTERFACE=%s", dev->name);
2909 sprintf(action_str, "ACTION=%s", action);
2910
2911 i = 0;
2912 argv[i++] = hotplug_path;
2913 argv[i++] = "net";
2914 argv[i] = 0;
2915
2916 i = 0;
2917 /* minimal command environment */
2918 envp [i++] = "HOME=/";
2919 envp [i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
2920 envp [i++] = ifname;
2921 envp [i++] = action_str;
2922 envp [i] = 0;
2923
2924 return call_usermodehelper(argv [0], argv, envp);
2925 }
2926 #endif
2927