1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set to 2
24  *                                      if register_netdev gets called before
25  *                                      net_dev_init & also removed a few lines
26  *                                      of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant stunts to
29  *					keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into drivers
34  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
35  *		Alan Cox	:	100 backlog just doesn't cut it when
36  *					you start doing multicast video 8)
37  *		Alan Cox	:	Rewrote net_bh and list manager.
38  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
39  *		Alan Cox	:	Took out transmit every packet pass
40  *					Saved a few bytes in the ioctl handler
41  *		Alan Cox	:	Network driver sets packet type before calling netif_rx. Saves
42  *					a function call a packet.
43  *		Alan Cox	:	Hashed net_bh()
44  *		Richard Kooijman:	Timestamp fixes.
45  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
46  *		Alan Cox	:	Device lock protection.
47  *		Alan Cox	: 	Fixed nasty side effect of device close changes.
48  *		Rudi Cilibrasi	:	Pass the right thing to set_mac_address()
49  *		Dave Miller	:	32bit quantity for the device lock to make it work out
50  *					on a Sparc.
51  *		Bjorn Ekwall	:	Added KERNELD hack.
52  *		Alan Cox	:	Cleaned up the backlog initialise.
53  *		Craig Metz	:	SIOCGIFCONF fix if space for under
54  *					1 device.
55  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
56  *					is no device open function.
57  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
58  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
59  *		Cyrus Durgin	:	Cleaned for KMOD
60  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
61  *					A network device unload needs to purge
62  *					the backlog queue.
63  *	Paul Rusty Russell	:	SIOCSIFNAME
64  *              Pekka Riikonen  :	Netdev boot-time settings code
65  *              Andrew Morton   :       Make unregister_netdevice wait indefinitely on dev->refcnt
66  * 		J Hadi Salim	:	- Backlog queue sampling
67  *				        - netif_rx() feedback
68  */
69 
70 #include <asm/uaccess.h>
71 #include <asm/system.h>
72 #include <asm/bitops.h>
73 #include <linux/config.h>
74 #include <linux/types.h>
75 #include <linux/kernel.h>
76 #include <linux/sched.h>
77 #include <linux/string.h>
78 #include <linux/mm.h>
79 #include <linux/socket.h>
80 #include <linux/sockios.h>
81 #include <linux/errno.h>
82 #include <linux/interrupt.h>
83 #include <linux/if_ether.h>
84 #include <linux/netdevice.h>
85 #include <linux/etherdevice.h>
86 #include <linux/notifier.h>
87 #include <linux/skbuff.h>
88 #include <linux/brlock.h>
89 #include <net/sock.h>
90 #include <linux/rtnetlink.h>
91 #include <linux/proc_fs.h>
92 #include <linux/stat.h>
93 #include <linux/if_bridge.h>
94 #include <linux/divert.h>
95 #include <net/dst.h>
96 #include <net/pkt_sched.h>
97 #include <net/profile.h>
98 #include <net/checksum.h>
99 #include <linux/highmem.h>
100 #include <linux/init.h>
101 #include <linux/kmod.h>
102 #include <linux/module.h>
103 #if defined(CONFIG_NET_RADIO) || defined(CONFIG_NET_PCMCIA_RADIO)
104 #include <linux/wireless.h>		/* Note : will define WIRELESS_EXT */
105 #include <net/iw_handler.h>
106 #endif	/* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */
107 #ifdef CONFIG_PLIP
108 extern int plip_init(void);
109 #endif
110 
111 
112 /* This define, if set, will randomly drop a packet when congestion
113  * is more than moderate.  It helps fairness in the multi-interface
114  * case when one of them is a hog, but it kills performance for the
115  * single interface case so it is off now by default.
116  */
117 #undef RAND_LIE
118 
119 /* Setting this will sample the queue lengths and thus congestion
120  * via a timer instead of as each packet is received.
121  */
122 #undef OFFLINE_SAMPLE
123 
124 NET_PROFILE_DEFINE(dev_queue_xmit)
125 NET_PROFILE_DEFINE(softnet_process)
126 
127 const char *if_port_text[] = {
128   "unknown",
129   "BNC",
130   "10baseT",
131   "AUI",
132   "100baseT",
133   "100baseTX",
134   "100baseFX"
135 };
136 
137 /*
138  *	The list of packet types we will receive (as opposed to discard)
139  *	and the routines to invoke.
140  *
141  *	Why 16. Because with 16 the only overlap we get on a hash of the
142  *	low nibble of the protocol value is RARP/SNAP/X.25.
143  *
144  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
145  *             sure which should go first, but I bet it won't make much
146  *             difference if we are running VLANs.  The good news is that
147  *             this protocol won't be in the list unless compiled in, so
148  *             the average user (w/out VLANs) will not be adversly affected.
149  *             --BLG
150  *
151  *		0800	IP
152  *		8100    802.1Q VLAN
153  *		0001	802.3
154  *		0002	AX.25
155  *		0004	802.2
156  *		8035	RARP
157  *		0005	SNAP
158  *		0805	X.25
159  *		0806	ARP
160  *		8137	IPX
161  *		0009	Localtalk
162  *		86DD	IPv6
163  */
164 
165 static struct packet_type *ptype_base[16];		/* 16 way hashed list */
166 static struct packet_type *ptype_all = NULL;		/* Taps */
167 
168 #ifdef OFFLINE_SAMPLE
169 static void sample_queue(unsigned long dummy);
170 static struct timer_list samp_timer = { function: sample_queue };
171 #endif
172 
173 #ifdef CONFIG_HOTPLUG
174 static int net_run_sbin_hotplug(struct net_device *dev, char *action);
175 #else
176 #define net_run_sbin_hotplug(dev, action) ({ 0; })
177 #endif
178 
179 /*
180  *	Our notifier list
181  */
182 
183 static struct notifier_block *netdev_chain=NULL;
184 
185 /*
186  *	Device drivers call our routines to queue packets here. We empty the
187  *	queue in the local softnet handler.
188  */
189 struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned;
190 
191 #ifdef CONFIG_NET_FASTROUTE
192 int netdev_fastroute;
193 int netdev_fastroute_obstacles;
194 #endif
195 
196 
197 /******************************************************************************************
198 
199 		Protocol management and registration routines
200 
201 *******************************************************************************************/
202 
203 /*
204  *	For efficiency
205  */
206 
207 int netdev_nit=0;
208 
209 /*
210  *	Add a protocol ID to the list. Now that the input handler is
211  *	smarter we can dispense with all the messy stuff that used to be
212  *	here.
213  *
214  *	BEWARE!!! Protocol handlers, mangling input packets,
215  *	MUST BE last in hash buckets and checking protocol handlers
216  *	MUST start from promiscous ptype_all chain in net_bh.
217  *	It is true now, do not change it.
218  *	Explantion follows: if protocol handler, mangling packet, will
219  *	be the first on list, it is not able to sense, that packet
220  *	is cloned and should be copied-on-write, so that it will
221  *	change it and subsequent readers will get broken packet.
222  *							--ANK (980803)
223  */
224 
225 /**
226  *	dev_add_pack - add packet handler
227  *	@pt: packet type declaration
228  *
229  *	Add a protocol handler to the networking stack. The passed &packet_type
230  *	is linked into kernel lists and may not be freed until it has been
231  *	removed from the kernel lists.
232  */
233 
dev_add_pack(struct packet_type * pt)234 void dev_add_pack(struct packet_type *pt)
235 {
236 	int hash;
237 
238 	br_write_lock_bh(BR_NETPROTO_LOCK);
239 
240 #ifdef CONFIG_NET_FASTROUTE
241 	/* Hack to detect packet socket */
242 	if ((pt->data) && ((int)(pt->data)!=1)) {
243 		netdev_fastroute_obstacles++;
244 		dev_clear_fastroute(pt->dev);
245 	}
246 #endif
247 	if (pt->type == htons(ETH_P_ALL)) {
248 		netdev_nit++;
249 		pt->next=ptype_all;
250 		ptype_all=pt;
251 	} else {
252 		hash=ntohs(pt->type)&15;
253 		pt->next = ptype_base[hash];
254 		ptype_base[hash] = pt;
255 	}
256 	br_write_unlock_bh(BR_NETPROTO_LOCK);
257 }
258 
259 
260 /**
261  *	dev_remove_pack	 - remove packet handler
262  *	@pt: packet type declaration
263  *
264  *	Remove a protocol handler that was previously added to the kernel
265  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
266  *	from the kernel lists and can be freed or reused once this function
267  *	returns.
268  */
269 
dev_remove_pack(struct packet_type * pt)270 void dev_remove_pack(struct packet_type *pt)
271 {
272 	struct packet_type **pt1;
273 
274 	br_write_lock_bh(BR_NETPROTO_LOCK);
275 
276 	if (pt->type == htons(ETH_P_ALL)) {
277 		netdev_nit--;
278 		pt1=&ptype_all;
279 	} else {
280 		pt1=&ptype_base[ntohs(pt->type)&15];
281 	}
282 
283 	for (; (*pt1) != NULL; pt1 = &((*pt1)->next)) {
284 		if (pt == (*pt1)) {
285 			*pt1 = pt->next;
286 #ifdef CONFIG_NET_FASTROUTE
287 			if (pt->data)
288 				netdev_fastroute_obstacles--;
289 #endif
290 			br_write_unlock_bh(BR_NETPROTO_LOCK);
291 			return;
292 		}
293 	}
294 	br_write_unlock_bh(BR_NETPROTO_LOCK);
295 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
296 }
297 
298 /******************************************************************************
299 
300 		      Device Boot-time Settings Routines
301 
302 *******************************************************************************/
303 
304 /* Boot time configuration table */
305 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
306 
307 /**
308  *	netdev_boot_setup_add	- add new setup entry
309  *	@name: name of the device
310  *	@map: configured settings for the device
311  *
312  *	Adds new setup entry to the dev_boot_setup list.  The function
313  *	returns 0 on error and 1 on success.  This is a generic routine to
314  *	all netdevices.
315  */
netdev_boot_setup_add(char * name,struct ifmap * map)316 int netdev_boot_setup_add(char *name, struct ifmap *map)
317 {
318 	struct netdev_boot_setup *s;
319 	int i;
320 
321 	s = dev_boot_setup;
322 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
323 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
324 			memset(s[i].name, 0, sizeof(s[i].name));
325 			strcpy(s[i].name, name);
326 			memcpy(&s[i].map, map, sizeof(s[i].map));
327 			break;
328 		}
329 	}
330 
331 	if (i >= NETDEV_BOOT_SETUP_MAX)
332 		return 0;
333 
334 	return 1;
335 }
336 
337 /**
338  *	netdev_boot_setup_check	- check boot time settings
339  *	@dev: the netdevice
340  *
341  * 	Check boot time settings for the device.
342  *	The found settings are set for the device to be used
343  *	later in the device probing.
344  *	Returns 0 if no settings found, 1 if they are.
345  */
netdev_boot_setup_check(struct net_device * dev)346 int netdev_boot_setup_check(struct net_device *dev)
347 {
348 	struct netdev_boot_setup *s;
349 	int i;
350 
351 	s = dev_boot_setup;
352 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
353 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
354 		    !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
355 			dev->irq 	= s[i].map.irq;
356 			dev->base_addr 	= s[i].map.base_addr;
357 			dev->mem_start 	= s[i].map.mem_start;
358 			dev->mem_end 	= s[i].map.mem_end;
359 			return 1;
360 		}
361 	}
362 	return 0;
363 }
364 
365 /*
366  * Saves at boot time configured settings for any netdevice.
367  */
netdev_boot_setup(char * str)368 int __init netdev_boot_setup(char *str)
369 {
370 	int ints[5];
371 	struct ifmap map;
372 
373 	str = get_options(str, ARRAY_SIZE(ints), ints);
374 	if (!str || !*str)
375 		return 0;
376 
377 	/* Save settings */
378 	memset(&map, 0, sizeof(map));
379 	if (ints[0] > 0)
380 		map.irq = ints[1];
381 	if (ints[0] > 1)
382 		map.base_addr = ints[2];
383 	if (ints[0] > 2)
384 		map.mem_start = ints[3];
385 	if (ints[0] > 3)
386 		map.mem_end = ints[4];
387 
388 	/* Add new entry to the list */
389 	return netdev_boot_setup_add(str, &map);
390 }
391 
392 __setup("netdev=", netdev_boot_setup);
393 
394 /*****************************************************************************************
395 
396 			    Device Interface Subroutines
397 
398 ******************************************************************************************/
399 
400 /**
401  *	__dev_get_by_name	- find a device by its name
402  *	@name: name to find
403  *
404  *	Find an interface by name. Must be called under RTNL semaphore
405  *	or @dev_base_lock. If the name is found a pointer to the device
406  *	is returned. If the name is not found then %NULL is returned. The
407  *	reference counters are not incremented so the caller must be
408  *	careful with locks.
409  */
410 
411 
__dev_get_by_name(const char * name)412 struct net_device *__dev_get_by_name(const char *name)
413 {
414 	struct net_device *dev;
415 
416 	for (dev = dev_base; dev != NULL; dev = dev->next) {
417 		if (strncmp(dev->name, name, IFNAMSIZ) == 0)
418 			return dev;
419 	}
420 	return NULL;
421 }
422 
423 /**
424  *	dev_get_by_name		- find a device by its name
425  *	@name: name to find
426  *
427  *	Find an interface by name. This can be called from any
428  *	context and does its own locking. The returned handle has
429  *	the usage count incremented and the caller must use dev_put() to
430  *	release it when it is no longer needed. %NULL is returned if no
431  *	matching device is found.
432  */
433 
dev_get_by_name(const char * name)434 struct net_device *dev_get_by_name(const char *name)
435 {
436 	struct net_device *dev;
437 
438 	read_lock(&dev_base_lock);
439 	dev = __dev_get_by_name(name);
440 	if (dev)
441 		dev_hold(dev);
442 	read_unlock(&dev_base_lock);
443 	return dev;
444 }
445 
446 /*
447    Return value is changed to int to prevent illegal usage in future.
448    It is still legal to use to check for device existence.
449 
450    User should understand, that the result returned by this function
451    is meaningless, if it was not issued under rtnl semaphore.
452  */
453 
454 /**
455  *	dev_get	-	test if a device exists
456  *	@name:	name to test for
457  *
458  *	Test if a name exists. Returns true if the name is found. In order
459  *	to be sure the name is not allocated or removed during the test the
460  *	caller must hold the rtnl semaphore.
461  *
462  *	This function primarily exists for back compatibility with older
463  *	drivers.
464  */
465 
dev_get(const char * name)466 int dev_get(const char *name)
467 {
468 	struct net_device *dev;
469 
470 	read_lock(&dev_base_lock);
471 	dev = __dev_get_by_name(name);
472 	read_unlock(&dev_base_lock);
473 	return dev != NULL;
474 }
475 
476 /**
477  *	__dev_get_by_index - find a device by its ifindex
478  *	@ifindex: index of device
479  *
480  *	Search for an interface by index. Returns %NULL if the device
481  *	is not found or a pointer to the device. The device has not
482  *	had its reference counter increased so the caller must be careful
483  *	about locking. The caller must hold either the RTNL semaphore
484  *	or @dev_base_lock.
485  */
486 
__dev_get_by_index(int ifindex)487 struct net_device * __dev_get_by_index(int ifindex)
488 {
489 	struct net_device *dev;
490 
491 	for (dev = dev_base; dev != NULL; dev = dev->next) {
492 		if (dev->ifindex == ifindex)
493 			return dev;
494 	}
495 	return NULL;
496 }
497 
498 
499 /**
500  *	dev_get_by_index - find a device by its ifindex
501  *	@ifindex: index of device
502  *
503  *	Search for an interface by index. Returns NULL if the device
504  *	is not found or a pointer to the device. The device returned has
505  *	had a reference added and the pointer is safe until the user calls
506  *	dev_put to indicate they have finished with it.
507  */
508 
dev_get_by_index(int ifindex)509 struct net_device * dev_get_by_index(int ifindex)
510 {
511 	struct net_device *dev;
512 
513 	read_lock(&dev_base_lock);
514 	dev = __dev_get_by_index(ifindex);
515 	if (dev)
516 		dev_hold(dev);
517 	read_unlock(&dev_base_lock);
518 	return dev;
519 }
520 
521 /**
522  *	dev_getbyhwaddr - find a device by its hardware address
523  *	@type: media type of device
524  *	@ha: hardware address
525  *
526  *	Search for an interface by MAC address. Returns NULL if the device
527  *	is not found or a pointer to the device. The caller must hold the
528  *	rtnl semaphore. The returned device has not had its ref count increased
529  *	and the caller must therefore be careful about locking
530  *
531  *	BUGS:
532  *	If the API was consistent this would be __dev_get_by_hwaddr
533  */
534 
dev_getbyhwaddr(unsigned short type,char * ha)535 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
536 {
537 	struct net_device *dev;
538 
539 	ASSERT_RTNL();
540 
541 	for (dev = dev_base; dev != NULL; dev = dev->next) {
542 		if (dev->type == type &&
543 		    memcmp(dev->dev_addr, ha, dev->addr_len) == 0)
544 			return dev;
545 	}
546 	return NULL;
547 }
548 
549 /**
550  *	dev_get_by_flags - find any device with given flags
551  *	@if_flags: IFF_* values
552  *	@mask: bitmask of bits in if_flags to check
553  *
554  *	Search for any interface with the given flags. Returns NULL if a device
555  *	is not found or a pointer to the device. The device returned has
556  *	had a reference added and the pointer is safe until the user calls
557  *	dev_put to indicate they have finished with it.
558  */
559 
dev_get_by_flags(unsigned short if_flags,unsigned short mask)560 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
561 {
562 	struct net_device *dev;
563 
564 	read_lock(&dev_base_lock);
565 	dev = __dev_get_by_flags(if_flags, mask);
566 	if (dev)
567 		dev_hold(dev);
568 	read_unlock(&dev_base_lock);
569 	return dev;
570 }
571 
572 /**
573  *	__dev_get_by_flags - find any device with given flags
574  *	@if_flags: IFF_* values
575  *	@mask: bitmask of bits in if_flags to check
576  *
577  *	Search for any interface with the given flags. Returns NULL if a device
578  *	is not found or a pointer to the device. The caller must hold either
579  *	the RTNL semaphore or @dev_base_lock.
580  */
581 
__dev_get_by_flags(unsigned short if_flags,unsigned short mask)582 struct net_device *__dev_get_by_flags(unsigned short if_flags, unsigned short mask)
583 {
584 	struct net_device *dev;
585 
586 	for (dev = dev_base; dev != NULL; dev = dev->next) {
587 		if (((dev->flags ^ if_flags) & mask) == 0)
588 			return dev;
589 	}
590 	return NULL;
591 }
592 
593 /**
594  *	dev_alloc_name - allocate a name for a device
595  *	@dev: device
596  *	@name: name format string
597  *
598  *	Passed a format string - eg "lt%d" it will try and find a suitable
599  *	id. Not efficient for many devices, not called a lot. The caller
600  *	must hold the dev_base or rtnl lock while allocating the name and
601  *	adding the device in order to avoid duplicates. Returns the number
602  *	of the unit assigned or a negative errno code.
603  */
604 
dev_alloc_name(struct net_device * dev,const char * name)605 int dev_alloc_name(struct net_device *dev, const char *name)
606 {
607 	int i;
608 	char buf[32];
609 	char *p;
610 
611 	/*
612 	 * Verify the string as this thing may have come from
613 	 * the user.  There must be either one "%d" and no other "%"
614 	 * characters, or no "%" characters at all.
615 	 */
616 	p = strchr(name, '%');
617 	if (p && (p[1] != 'd' || strchr(p+2, '%')))
618 		return -EINVAL;
619 
620 	/*
621 	 * If you need over 100 please also fix the algorithm...
622 	 */
623 	for (i = 0; i < 100; i++) {
624 		snprintf(buf,sizeof(buf),name,i);
625 		if (__dev_get_by_name(buf) == NULL) {
626 			strcpy(dev->name, buf);
627 			return i;
628 		}
629 	}
630 	return -ENFILE;	/* Over 100 of the things .. bail out! */
631 }
632 
633 /**
634  *	dev_alloc - allocate a network device and name
635  *	@name: name format string
636  *	@err: error return pointer
637  *
638  *	Passed a format string, eg. "lt%d", it will allocate a network device
639  *	and space for the name. %NULL is returned if no memory is available.
640  *	If the allocation succeeds then the name is assigned and the
641  *	device pointer returned. %NULL is returned if the name allocation
642  *	failed. The cause of an error is returned as a negative errno code
643  *	in the variable @err points to.
644  *
645  *	The caller must hold the @dev_base or RTNL locks when doing this in
646  *	order to avoid duplicate name allocations.
647  */
648 
dev_alloc(const char * name,int * err)649 struct net_device *dev_alloc(const char *name, int *err)
650 {
651 	struct net_device *dev=kmalloc(sizeof(struct net_device), GFP_KERNEL);
652 	if (dev == NULL) {
653 		*err = -ENOBUFS;
654 		return NULL;
655 	}
656 	memset(dev, 0, sizeof(struct net_device));
657 	*err = dev_alloc_name(dev, name);
658 	if (*err < 0) {
659 		kfree(dev);
660 		return NULL;
661 	}
662 	return dev;
663 }
664 
665 /**
666  *	netdev_state_change - device changes state
667  *	@dev: device to cause notification
668  *
669  *	Called to indicate a device has changed state. This function calls
670  *	the notifier chains for netdev_chain and sends a NEWLINK message
671  *	to the routing socket.
672  */
673 
netdev_state_change(struct net_device * dev)674 void netdev_state_change(struct net_device *dev)
675 {
676 	if (dev->flags&IFF_UP) {
677 		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
678 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
679 	}
680 }
681 
682 
683 #ifdef CONFIG_KMOD
684 
685 /**
686  *	dev_load 	- load a network module
687  *	@name: name of interface
688  *
689  *	If a network interface is not present and the process has suitable
690  *	privileges this function loads the module. If module loading is not
691  *	available in this kernel then it becomes a nop.
692  */
693 
dev_load(const char * name)694 void dev_load(const char *name)
695 {
696 	if (!dev_get(name) && capable(CAP_SYS_MODULE))
697 		request_module(name);
698 }
699 
700 #else
701 
dev_load(const char * unused)702 extern inline void dev_load(const char *unused){;}
703 
704 #endif
705 
default_rebuild_header(struct sk_buff * skb)706 static int default_rebuild_header(struct sk_buff *skb)
707 {
708 	printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", skb->dev ? skb->dev->name : "NULL!!!");
709 	kfree_skb(skb);
710 	return 1;
711 }
712 
713 /**
714  *	dev_open	- prepare an interface for use.
715  *	@dev:	device to open
716  *
717  *	Takes a device from down to up state. The device's private open
718  *	function is invoked and then the multicast lists are loaded. Finally
719  *	the device is moved into the up state and a %NETDEV_UP message is
720  *	sent to the netdev notifier chain.
721  *
722  *	Calling this function on an active interface is a nop. On a failure
723  *	a negative errno code is returned.
724  */
725 
dev_open(struct net_device * dev)726 int dev_open(struct net_device *dev)
727 {
728 	int ret = 0;
729 
730 	/*
731 	 *	Is it already up?
732 	 */
733 
734 	if (dev->flags&IFF_UP)
735 		return 0;
736 
737 	/*
738 	 *	Is it even present?
739 	 */
740 	if (!netif_device_present(dev))
741 		return -ENODEV;
742 
743 	/*
744 	 *	Call device private open method
745 	 */
746 	if (try_inc_mod_count(dev->owner)) {
747 		set_bit(__LINK_STATE_START, &dev->state);
748 		if (dev->open) {
749 			ret = dev->open(dev);
750 			if (ret != 0) {
751 				clear_bit(__LINK_STATE_START, &dev->state);
752 				if (dev->owner)
753 					__MOD_DEC_USE_COUNT(dev->owner);
754 			}
755 		}
756 	} else {
757 		ret = -ENODEV;
758 	}
759 
760 	/*
761 	 *	If it went open OK then:
762 	 */
763 
764 	if (ret == 0)
765 	{
766 		/*
767 		 *	Set the flags.
768 		 */
769 		dev->flags |= IFF_UP;
770 
771 		/*
772 		 *	Initialize multicasting status
773 		 */
774 		dev_mc_upload(dev);
775 
776 		/*
777 		 *	Wakeup transmit queue engine
778 		 */
779 		dev_activate(dev);
780 
781 		/*
782 		 *	... and announce new interface.
783 		 */
784 		notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
785 	}
786 	return(ret);
787 }
788 
789 #ifdef CONFIG_NET_FASTROUTE
790 
dev_do_clear_fastroute(struct net_device * dev)791 static void dev_do_clear_fastroute(struct net_device *dev)
792 {
793 	if (dev->accept_fastpath) {
794 		int i;
795 
796 		for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++) {
797 			struct dst_entry *dst;
798 
799 			write_lock_irq(&dev->fastpath_lock);
800 			dst = dev->fastpath[i];
801 			dev->fastpath[i] = NULL;
802 			write_unlock_irq(&dev->fastpath_lock);
803 
804 			dst_release(dst);
805 		}
806 	}
807 }
808 
dev_clear_fastroute(struct net_device * dev)809 void dev_clear_fastroute(struct net_device *dev)
810 {
811 	if (dev) {
812 		dev_do_clear_fastroute(dev);
813 	} else {
814 		read_lock(&dev_base_lock);
815 		for (dev = dev_base; dev; dev = dev->next)
816 			dev_do_clear_fastroute(dev);
817 		read_unlock(&dev_base_lock);
818 	}
819 }
820 #endif
821 
822 /**
823  *	dev_close - shutdown an interface.
824  *	@dev: device to shutdown
825  *
826  *	This function moves an active device into down state. A
827  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
828  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
829  *	chain.
830  */
831 
dev_close(struct net_device * dev)832 int dev_close(struct net_device *dev)
833 {
834 	if (!(dev->flags&IFF_UP))
835 		return 0;
836 
837 	/*
838 	 *	Tell people we are going down, so that they can
839 	 *	prepare to death, when device is still operating.
840 	 */
841 	notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
842 
843 	dev_deactivate(dev);
844 
845 	clear_bit(__LINK_STATE_START, &dev->state);
846 
847 	/* Synchronize to scheduled poll. We cannot touch poll list,
848 	 * it can be even on different cpu. So just clear netif_running(),
849 	 * and wait when poll really will happen. Actually, the best place
850 	 * for this is inside dev->stop() after device stopped its irq
851 	 * engine, but this requires more changes in devices. */
852 
853 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
854 	while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
855 		/* No hurry. */
856 		current->state = TASK_INTERRUPTIBLE;
857 		schedule_timeout(1);
858 	}
859 
860 	/*
861 	 *	Call the device specific close. This cannot fail.
862 	 *	Only if device is UP
863 	 *
864 	 *	We allow it to be called even after a DETACH hot-plug
865 	 *	event.
866 	 */
867 
868 	if (dev->stop)
869 		dev->stop(dev);
870 
871 	/*
872 	 *	Device is now down.
873 	 */
874 
875 	dev->flags &= ~IFF_UP;
876 #ifdef CONFIG_NET_FASTROUTE
877 	dev_clear_fastroute(dev);
878 #endif
879 
880 	/*
881 	 *	Tell people we are down
882 	 */
883 	notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
884 
885 	/*
886 	 * Drop the module refcount
887 	 */
888 	if (dev->owner)
889 		__MOD_DEC_USE_COUNT(dev->owner);
890 
891 	return(0);
892 }
893 
894 
895 /*
896  *	Device change register/unregister. These are not inline or static
897  *	as we export them to the world.
898  */
899 
900 /**
901  *	register_netdevice_notifier - register a network notifier block
902  *	@nb: notifier
903  *
904  *	Register a notifier to be called when network device events occur.
905  *	The notifier passed is linked into the kernel structures and must
906  *	not be reused until it has been unregistered. A negative errno code
907  *	is returned on a failure.
908  */
909 
register_netdevice_notifier(struct notifier_block * nb)910 int register_netdevice_notifier(struct notifier_block *nb)
911 {
912 	return notifier_chain_register(&netdev_chain, nb);
913 }
914 
915 /**
916  *	unregister_netdevice_notifier - unregister a network notifier block
917  *	@nb: notifier
918  *
919  *	Unregister a notifier previously registered by
920  *	register_netdevice_notifier(). The notifier is unlinked into the
921  *	kernel structures and may then be reused. A negative errno code
922  *	is returned on a failure.
923  */
924 
unregister_netdevice_notifier(struct notifier_block * nb)925 int unregister_netdevice_notifier(struct notifier_block *nb)
926 {
927 	return notifier_chain_unregister(&netdev_chain,nb);
928 }
929 
930 /*
931  *	Support routine. Sends outgoing frames to any network
932  *	taps currently in use.
933  */
934 
dev_queue_xmit_nit(struct sk_buff * skb,struct net_device * dev)935 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
936 {
937 	struct packet_type *ptype;
938 	do_gettimeofday(&skb->stamp);
939 
940 	br_read_lock(BR_NETPROTO_LOCK);
941 	for (ptype = ptype_all; ptype!=NULL; ptype = ptype->next)
942 	{
943 		/* Never send packets back to the socket
944 		 * they originated from - MvS (miquels@drinkel.ow.org)
945 		 */
946 		if ((ptype->dev == dev || !ptype->dev) &&
947 			((struct sock *)ptype->data != skb->sk))
948 		{
949 			struct sk_buff *skb2;
950 			if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL)
951 				break;
952 
953 			/* skb->nh should be correctly
954 			   set by sender, so that the second statement is
955 			   just protection against buggy protocols.
956 			 */
957 			skb2->mac.raw = skb2->data;
958 
959 			if (skb2->nh.raw < skb2->data || skb2->nh.raw > skb2->tail) {
960 				if (net_ratelimit())
961 					printk(KERN_CRIT "protocol %04x is buggy, dev %s\n", skb2->protocol, dev->name);
962 				skb2->nh.raw = skb2->data;
963 			}
964 
965 			skb2->h.raw = skb2->nh.raw;
966 			skb2->pkt_type = PACKET_OUTGOING;
967 			ptype->func(skb2, skb->dev, ptype);
968 		}
969 	}
970 	br_read_unlock(BR_NETPROTO_LOCK);
971 }
972 
973 /* Calculate csum in the case, when packet is misrouted.
974  * If it failed by some reason, ignore and send skb with wrong
975  * checksum.
976  */
skb_checksum_help(struct sk_buff * skb)977 struct sk_buff * skb_checksum_help(struct sk_buff *skb)
978 {
979 	int offset;
980 	unsigned int csum;
981 
982 	offset = skb->h.raw - skb->data;
983 	if (offset > (int)skb->len)
984 		BUG();
985 	csum = skb_checksum(skb, offset, skb->len-offset, 0);
986 
987 	offset = skb->tail - skb->h.raw;
988 	if (offset <= 0)
989 		BUG();
990 	if (skb->csum+2 > offset)
991 		BUG();
992 
993 	*(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
994 	skb->ip_summed = CHECKSUM_NONE;
995 	return skb;
996 }
997 
998 #ifdef CONFIG_HIGHMEM
999 /* Actually, we should eliminate this check as soon as we know, that:
1000  * 1. IOMMU is present and allows to map all the memory.
1001  * 2. No high memory really exists on this machine.
1002  */
1003 
1004 static inline int
illegal_highdma(struct net_device * dev,struct sk_buff * skb)1005 illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1006 {
1007 	int i;
1008 
1009 	if (dev->features&NETIF_F_HIGHDMA)
1010 		return 0;
1011 
1012 	for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
1013 		if (skb_shinfo(skb)->frags[i].page >= highmem_start_page)
1014 			return 1;
1015 
1016 	return 0;
1017 }
1018 #else
1019 #define illegal_highdma(dev, skb)	(0)
1020 #endif
1021 
1022 /**
1023  *	dev_queue_xmit - transmit a buffer
1024  *	@skb: buffer to transmit
1025  *
1026  *	Queue a buffer for transmission to a network device. The caller must
1027  *	have set the device and priority and built the buffer before calling this
1028  *	function. The function can be called from an interrupt.
1029  *
1030  *	A negative errno code is returned on a failure. A success does not
1031  *	guarantee the frame will be transmitted as it may be dropped due
1032  *	to congestion or traffic shaping.
1033  */
1034 
dev_queue_xmit(struct sk_buff * skb)1035 int dev_queue_xmit(struct sk_buff *skb)
1036 {
1037 	struct net_device *dev = skb->dev;
1038 	struct Qdisc  *q;
1039 
1040 	if (skb_shinfo(skb)->frag_list &&
1041 	    !(dev->features&NETIF_F_FRAGLIST) &&
1042 	    skb_linearize(skb, GFP_ATOMIC) != 0) {
1043 		kfree_skb(skb);
1044 		return -ENOMEM;
1045 	}
1046 
1047 	/* Fragmented skb is linearized if device does not support SG,
1048 	 * or if at least one of fragments is in highmem and device
1049 	 * does not support DMA from it.
1050 	 */
1051 	if (skb_shinfo(skb)->nr_frags &&
1052 	    (!(dev->features&NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1053 	    skb_linearize(skb, GFP_ATOMIC) != 0) {
1054 		kfree_skb(skb);
1055 		return -ENOMEM;
1056 	}
1057 
1058 	/* If packet is not checksummed and device does not support
1059 	 * checksumming for this protocol, complete checksumming here.
1060 	 */
1061 	if (skb->ip_summed == CHECKSUM_HW &&
1062 	    (!(dev->features&(NETIF_F_HW_CSUM|NETIF_F_NO_CSUM)) &&
1063 	     (!(dev->features&NETIF_F_IP_CSUM) ||
1064 	      skb->protocol != htons(ETH_P_IP)))) {
1065 		if ((skb = skb_checksum_help(skb)) == NULL)
1066 			return -ENOMEM;
1067 	}
1068 
1069 	/* Grab device queue */
1070 	spin_lock_bh(&dev->queue_lock);
1071 	q = dev->qdisc;
1072 	if (q->enqueue) {
1073 		int ret = q->enqueue(skb, q);
1074 
1075 		qdisc_run(dev);
1076 
1077 		spin_unlock_bh(&dev->queue_lock);
1078 		return ret == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : ret;
1079 	}
1080 
1081 	/* The device has no queue. Common case for software devices:
1082 	   loopback, all the sorts of tunnels...
1083 
1084 	   Really, it is unlikely that xmit_lock protection is necessary here.
1085 	   (f.e. loopback and IP tunnels are clean ignoring statistics counters.)
1086 	   However, it is possible, that they rely on protection
1087 	   made by us here.
1088 
1089 	   Check this and shot the lock. It is not prone from deadlocks.
1090 	   Either shot noqueue qdisc, it is even simpler 8)
1091 	 */
1092 	if (dev->flags&IFF_UP) {
1093 		int cpu = smp_processor_id();
1094 
1095 		if (dev->xmit_lock_owner != cpu) {
1096 			spin_unlock(&dev->queue_lock);
1097 			spin_lock(&dev->xmit_lock);
1098 			dev->xmit_lock_owner = cpu;
1099 
1100 			if (!netif_queue_stopped(dev)) {
1101 				if (netdev_nit)
1102 					dev_queue_xmit_nit(skb,dev);
1103 
1104 				if (dev->hard_start_xmit(skb, dev) == 0) {
1105 					dev->xmit_lock_owner = -1;
1106 					spin_unlock_bh(&dev->xmit_lock);
1107 					return 0;
1108 				}
1109 			}
1110 			dev->xmit_lock_owner = -1;
1111 			spin_unlock_bh(&dev->xmit_lock);
1112 			if (net_ratelimit())
1113 				printk(KERN_CRIT "Virtual device %s asks to queue packet!\n", dev->name);
1114 			kfree_skb(skb);
1115 			return -ENETDOWN;
1116 		} else {
1117 			/* Recursion is detected! It is possible, unfortunately */
1118 			if (net_ratelimit())
1119 				printk(KERN_CRIT "Dead loop on virtual device %s, fix it urgently!\n", dev->name);
1120 		}
1121 	}
1122 	spin_unlock_bh(&dev->queue_lock);
1123 
1124 	kfree_skb(skb);
1125 	return -ENETDOWN;
1126 }
1127 
1128 
1129 /*=======================================================================
1130 			Receiver routines
1131   =======================================================================*/
1132 
1133 int netdev_max_backlog = 300;
1134 int weight_p = 64;            /* old backlog weight */
1135 /* These numbers are selected based on intuition and some
1136  * experimentatiom, if you have more scientific way of doing this
1137  * please go ahead and fix things.
1138  */
1139 int no_cong_thresh = 10;
1140 int no_cong = 20;
1141 int lo_cong = 100;
1142 int mod_cong = 290;
1143 
1144 struct netif_rx_stats netdev_rx_stat[NR_CPUS];
1145 
1146 
1147 #ifdef CONFIG_NET_HW_FLOWCONTROL
1148 atomic_t netdev_dropping = ATOMIC_INIT(0);
1149 static unsigned long netdev_fc_mask = 1;
1150 unsigned long netdev_fc_xoff = 0;
1151 spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED;
1152 
1153 static struct
1154 {
1155 	void (*stimul)(struct net_device *);
1156 	struct net_device *dev;
1157 } netdev_fc_slots[BITS_PER_LONG];
1158 
netdev_register_fc(struct net_device * dev,void (* stimul)(struct net_device * dev))1159 int netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev))
1160 {
1161 	int bit = 0;
1162 	unsigned long flags;
1163 
1164 	spin_lock_irqsave(&netdev_fc_lock, flags);
1165 	if (netdev_fc_mask != ~0UL) {
1166 		bit = ffz(netdev_fc_mask);
1167 		netdev_fc_slots[bit].stimul = stimul;
1168 		netdev_fc_slots[bit].dev = dev;
1169 		set_bit(bit, &netdev_fc_mask);
1170 		clear_bit(bit, &netdev_fc_xoff);
1171 	}
1172 	spin_unlock_irqrestore(&netdev_fc_lock, flags);
1173 	return bit;
1174 }
1175 
netdev_unregister_fc(int bit)1176 void netdev_unregister_fc(int bit)
1177 {
1178 	unsigned long flags;
1179 
1180 	spin_lock_irqsave(&netdev_fc_lock, flags);
1181 	if (bit > 0) {
1182 		netdev_fc_slots[bit].stimul = NULL;
1183 		netdev_fc_slots[bit].dev = NULL;
1184 		clear_bit(bit, &netdev_fc_mask);
1185 		clear_bit(bit, &netdev_fc_xoff);
1186 	}
1187 	spin_unlock_irqrestore(&netdev_fc_lock, flags);
1188 }
1189 
netdev_wakeup(void)1190 static void netdev_wakeup(void)
1191 {
1192 	unsigned long xoff;
1193 
1194 	spin_lock(&netdev_fc_lock);
1195 	xoff = netdev_fc_xoff;
1196 	netdev_fc_xoff = 0;
1197 	while (xoff) {
1198 		int i = ffz(~xoff);
1199 		xoff &= ~(1<<i);
1200 		netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev);
1201 	}
1202 	spin_unlock(&netdev_fc_lock);
1203 }
1204 #endif
1205 
get_sample_stats(int cpu)1206 static void get_sample_stats(int cpu)
1207 {
1208 #ifdef RAND_LIE
1209 	unsigned long rd;
1210 	int rq;
1211 #endif
1212 	int blog = softnet_data[cpu].input_pkt_queue.qlen;
1213 	int avg_blog = softnet_data[cpu].avg_blog;
1214 
1215 	avg_blog = (avg_blog >> 1)+ (blog >> 1);
1216 
1217 	if (avg_blog > mod_cong) {
1218 		/* Above moderate congestion levels. */
1219 		softnet_data[cpu].cng_level = NET_RX_CN_HIGH;
1220 #ifdef RAND_LIE
1221 		rd = net_random();
1222 		rq = rd % netdev_max_backlog;
1223 		if (rq < avg_blog) /* unlucky bastard */
1224 			softnet_data[cpu].cng_level = NET_RX_DROP;
1225 #endif
1226 	} else if (avg_blog > lo_cong) {
1227 		softnet_data[cpu].cng_level = NET_RX_CN_MOD;
1228 #ifdef RAND_LIE
1229 		rd = net_random();
1230 		rq = rd % netdev_max_backlog;
1231 			if (rq < avg_blog) /* unlucky bastard */
1232 				softnet_data[cpu].cng_level = NET_RX_CN_HIGH;
1233 #endif
1234 	} else if (avg_blog > no_cong)
1235 		softnet_data[cpu].cng_level = NET_RX_CN_LOW;
1236 	else  /* no congestion */
1237 		softnet_data[cpu].cng_level = NET_RX_SUCCESS;
1238 
1239 	softnet_data[cpu].avg_blog = avg_blog;
1240 }
1241 
1242 #ifdef OFFLINE_SAMPLE
sample_queue(unsigned long dummy)1243 static void sample_queue(unsigned long dummy)
1244 {
1245 /* 10 ms 0r 1ms -- i dont care -- JHS */
1246 	int next_tick = 1;
1247 	int cpu = smp_processor_id();
1248 
1249 	get_sample_stats(cpu);
1250 	next_tick += jiffies;
1251 	mod_timer(&samp_timer, next_tick);
1252 }
1253 #endif
1254 
1255 
1256 /**
1257  *	netif_rx	-	post buffer to the network code
1258  *	@skb: buffer to post
1259  *
1260  *	This function receives a packet from a device driver and queues it for
1261  *	the upper (protocol) levels to process.  It always succeeds. The buffer
1262  *	may be dropped during processing for congestion control or by the
1263  *	protocol layers.
1264  *
1265  *	return values:
1266  *	NET_RX_SUCCESS	(no congestion)
1267  *	NET_RX_CN_LOW     (low congestion)
1268  *	NET_RX_CN_MOD     (moderate congestion)
1269  *	NET_RX_CN_HIGH    (high congestion)
1270  *	NET_RX_DROP    (packet was dropped)
1271  *
1272  *
1273  */
1274 
netif_rx(struct sk_buff * skb)1275 int netif_rx(struct sk_buff *skb)
1276 {
1277 	int this_cpu = smp_processor_id();
1278 	struct softnet_data *queue;
1279 	unsigned long flags;
1280 
1281 	if (skb->stamp.tv_sec == 0)
1282 		do_gettimeofday(&skb->stamp);
1283 
1284 	/* The code is rearranged so that the path is the most
1285 	   short when CPU is congested, but is still operating.
1286 	 */
1287 	queue = &softnet_data[this_cpu];
1288 
1289 	local_irq_save(flags);
1290 
1291 	netdev_rx_stat[this_cpu].total++;
1292 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1293 		if (queue->input_pkt_queue.qlen) {
1294 			if (queue->throttle)
1295 				goto drop;
1296 
1297 enqueue:
1298 			dev_hold(skb->dev);
1299 			__skb_queue_tail(&queue->input_pkt_queue,skb);
1300 			local_irq_restore(flags);
1301 #ifndef OFFLINE_SAMPLE
1302 			get_sample_stats(this_cpu);
1303 #endif
1304 			return queue->cng_level;
1305 		}
1306 
1307 		if (queue->throttle) {
1308 			queue->throttle = 0;
1309 #ifdef CONFIG_NET_HW_FLOWCONTROL
1310 			if (atomic_dec_and_test(&netdev_dropping))
1311 				netdev_wakeup();
1312 #endif
1313 		}
1314 
1315 		netif_rx_schedule(&queue->blog_dev);
1316 		goto enqueue;
1317 	}
1318 
1319 	if (queue->throttle == 0) {
1320 		queue->throttle = 1;
1321 		netdev_rx_stat[this_cpu].throttled++;
1322 #ifdef CONFIG_NET_HW_FLOWCONTROL
1323 		atomic_inc(&netdev_dropping);
1324 #endif
1325 	}
1326 
1327 drop:
1328 	netdev_rx_stat[this_cpu].dropped++;
1329 	local_irq_restore(flags);
1330 
1331 	kfree_skb(skb);
1332 	return NET_RX_DROP;
1333 }
1334 
1335 /* Deliver skb to an old protocol, which is not threaded well
1336    or which do not understand shared skbs.
1337  */
deliver_to_old_ones(struct packet_type * pt,struct sk_buff * skb,int last)1338 static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last)
1339 {
1340 	static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED;
1341 	int ret = NET_RX_DROP;
1342 
1343 
1344 	if (!last) {
1345 		skb = skb_clone(skb, GFP_ATOMIC);
1346 		if (skb == NULL)
1347 			return ret;
1348 	}
1349 	if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) {
1350 		kfree_skb(skb);
1351 		return ret;
1352 	}
1353 
1354 	/* The assumption (correct one) is that old protocols
1355 	   did not depened on BHs different of NET_BH and TIMER_BH.
1356 	 */
1357 
1358 	/* Emulate NET_BH with special spinlock */
1359 	spin_lock(&net_bh_lock);
1360 
1361 	/* Disable timers and wait for all timers completion */
1362 	tasklet_disable(bh_task_vec+TIMER_BH);
1363 
1364 	ret = pt->func(skb, skb->dev, pt);
1365 
1366 	tasklet_hi_enable(bh_task_vec+TIMER_BH);
1367 	spin_unlock(&net_bh_lock);
1368 	return ret;
1369 }
1370 
skb_bond(struct sk_buff * skb)1371 static __inline__ void skb_bond(struct sk_buff *skb)
1372 {
1373 	struct net_device *dev = skb->dev;
1374 
1375 	if (dev->master) {
1376 		skb->real_dev = skb->dev;
1377 		skb->dev = dev->master;
1378 	}
1379 }
1380 
net_tx_action(struct softirq_action * h)1381 static void net_tx_action(struct softirq_action *h)
1382 {
1383 	int cpu = smp_processor_id();
1384 
1385 	if (softnet_data[cpu].completion_queue) {
1386 		struct sk_buff *clist;
1387 
1388 		local_irq_disable();
1389 		clist = softnet_data[cpu].completion_queue;
1390 		softnet_data[cpu].completion_queue = NULL;
1391 		local_irq_enable();
1392 
1393 		while (clist != NULL) {
1394 			struct sk_buff *skb = clist;
1395 			clist = clist->next;
1396 
1397 			BUG_TRAP(atomic_read(&skb->users) == 0);
1398 			__kfree_skb(skb);
1399 		}
1400 	}
1401 
1402 	if (softnet_data[cpu].output_queue) {
1403 		struct net_device *head;
1404 
1405 		local_irq_disable();
1406 		head = softnet_data[cpu].output_queue;
1407 		softnet_data[cpu].output_queue = NULL;
1408 		local_irq_enable();
1409 
1410 		while (head != NULL) {
1411 			struct net_device *dev = head;
1412 			head = head->next_sched;
1413 
1414 			smp_mb__before_clear_bit();
1415 			clear_bit(__LINK_STATE_SCHED, &dev->state);
1416 
1417 			if (spin_trylock(&dev->queue_lock)) {
1418 				qdisc_run(dev);
1419 				spin_unlock(&dev->queue_lock);
1420 			} else {
1421 				netif_schedule(dev);
1422 			}
1423 		}
1424 	}
1425 }
1426 
1427 
1428 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
1429 void (*br_handle_frame_hook)(struct sk_buff *skb) = NULL;
1430 #endif
1431 
handle_bridge(struct sk_buff * skb,struct packet_type * pt_prev)1432 static __inline__ int handle_bridge(struct sk_buff *skb,
1433 				     struct packet_type *pt_prev)
1434 {
1435 	int ret = NET_RX_DROP;
1436 
1437 	if (pt_prev) {
1438 		if (!pt_prev->data)
1439 			ret = deliver_to_old_ones(pt_prev, skb, 0);
1440 		else {
1441 			atomic_inc(&skb->users);
1442 			ret = pt_prev->func(skb, skb->dev, pt_prev);
1443 		}
1444 	}
1445 
1446 	br_handle_frame_hook(skb);
1447 	return ret;
1448 }
1449 
1450 
1451 #ifdef CONFIG_NET_DIVERT
handle_diverter(struct sk_buff * skb)1452 static inline int handle_diverter(struct sk_buff *skb)
1453 {
1454 	/* if diversion is supported on device, then divert */
1455 	if (skb->dev->divert && skb->dev->divert->divert)
1456 		divert_frame(skb);
1457 	return 0;
1458 }
1459 #endif   /* CONFIG_NET_DIVERT */
1460 
netif_receive_skb(struct sk_buff * skb)1461 int netif_receive_skb(struct sk_buff *skb)
1462 {
1463 	struct packet_type *ptype, *pt_prev;
1464 	int ret = NET_RX_DROP;
1465 	unsigned short type;
1466 
1467 	if (skb->stamp.tv_sec == 0)
1468 		do_gettimeofday(&skb->stamp);
1469 
1470 	skb_bond(skb);
1471 
1472 	netdev_rx_stat[smp_processor_id()].total++;
1473 
1474 #ifdef CONFIG_NET_FASTROUTE
1475 	if (skb->pkt_type == PACKET_FASTROUTE) {
1476 		netdev_rx_stat[smp_processor_id()].fastroute_deferred_out++;
1477 		return dev_queue_xmit(skb);
1478 	}
1479 #endif
1480 
1481 	skb->h.raw = skb->nh.raw = skb->data;
1482 
1483 	pt_prev = NULL;
1484 	for (ptype = ptype_all; ptype; ptype = ptype->next) {
1485 		if (!ptype->dev || ptype->dev == skb->dev) {
1486 			if (pt_prev) {
1487 				if (!pt_prev->data) {
1488 					ret = deliver_to_old_ones(pt_prev, skb, 0);
1489 				} else {
1490 					atomic_inc(&skb->users);
1491 					ret = pt_prev->func(skb, skb->dev, pt_prev);
1492 				}
1493 			}
1494 			pt_prev = ptype;
1495 		}
1496 	}
1497 
1498 #ifdef CONFIG_NET_DIVERT
1499 	if (skb->dev->divert && skb->dev->divert->divert)
1500 		ret = handle_diverter(skb);
1501 #endif /* CONFIG_NET_DIVERT */
1502 
1503 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
1504 	if (skb->dev->br_port != NULL && br_handle_frame_hook != NULL &&
1505 	    skb->pkt_type != PACKET_LOOPBACK) {
1506 		return handle_bridge(skb, pt_prev);
1507 	}
1508 #endif
1509 
1510 	type = skb->protocol;
1511 	for (ptype=ptype_base[ntohs(type)&15];ptype;ptype=ptype->next) {
1512 		if (ptype->type == type &&
1513 		    (!ptype->dev || ptype->dev == skb->dev)) {
1514 			if (pt_prev) {
1515 				if (!pt_prev->data) {
1516 					ret = deliver_to_old_ones(pt_prev, skb, 0);
1517 				} else {
1518 					atomic_inc(&skb->users);
1519 					ret = pt_prev->func(skb, skb->dev, pt_prev);
1520 				}
1521 			}
1522 			pt_prev = ptype;
1523 		}
1524 	}
1525 
1526 	if (pt_prev) {
1527 		if (!pt_prev->data) {
1528 			ret = deliver_to_old_ones(pt_prev, skb, 1);
1529 		} else {
1530 			ret = pt_prev->func(skb, skb->dev, pt_prev);
1531 		}
1532 	} else {
1533 		kfree_skb(skb);
1534 		/* Jamal, now you will not able to escape explaining
1535 		 * me how you were going to use this. :-)
1536 		 */
1537 		ret = NET_RX_DROP;
1538 	}
1539 
1540 	return ret;
1541 }
1542 
process_backlog(struct net_device * backlog_dev,int * budget)1543 static int process_backlog(struct net_device *backlog_dev, int *budget)
1544 {
1545 	int work = 0;
1546 	int quota = min(backlog_dev->quota, *budget);
1547 	int this_cpu = smp_processor_id();
1548 	struct softnet_data *queue = &softnet_data[this_cpu];
1549 	unsigned long start_time = jiffies;
1550 
1551 	for (;;) {
1552 		struct sk_buff *skb;
1553 		struct net_device *dev;
1554 
1555 		local_irq_disable();
1556 		skb = __skb_dequeue(&queue->input_pkt_queue);
1557 		if (skb == NULL)
1558 			goto job_done;
1559 		local_irq_enable();
1560 
1561 		dev = skb->dev;
1562 
1563 		netif_receive_skb(skb);
1564 
1565 		dev_put(dev);
1566 
1567 		work++;
1568 
1569 		if (work >= quota || jiffies - start_time > 1)
1570 			break;
1571 
1572 #ifdef CONFIG_NET_HW_FLOWCONTROL
1573 		if (queue->throttle && queue->input_pkt_queue.qlen < no_cong_thresh ) {
1574 			queue->throttle = 0;
1575 			if (atomic_dec_and_test(&netdev_dropping)) {
1576 				netdev_wakeup();
1577 				break;
1578 			}
1579 		}
1580 #endif
1581 	}
1582 
1583 	backlog_dev->quota -= work;
1584 	*budget -= work;
1585 	return -1;
1586 
1587 job_done:
1588 	backlog_dev->quota -= work;
1589 	*budget -= work;
1590 
1591 	list_del(&backlog_dev->poll_list);
1592 	smp_mb__before_clear_bit();
1593 	netif_poll_enable(backlog_dev);
1594 
1595 	if (queue->throttle) {
1596 		queue->throttle = 0;
1597 #ifdef CONFIG_NET_HW_FLOWCONTROL
1598 		if (atomic_dec_and_test(&netdev_dropping))
1599 			netdev_wakeup();
1600 #endif
1601 	}
1602 	local_irq_enable();
1603 	return 0;
1604 }
1605 
net_rx_action(struct softirq_action * h)1606 static void net_rx_action(struct softirq_action *h)
1607 {
1608 	int this_cpu = smp_processor_id();
1609 	struct softnet_data *queue = &softnet_data[this_cpu];
1610 	unsigned long start_time = jiffies;
1611 	int budget = netdev_max_backlog;
1612 
1613 	br_read_lock(BR_NETPROTO_LOCK);
1614 	local_irq_disable();
1615 
1616 	while (!list_empty(&queue->poll_list)) {
1617 		struct net_device *dev;
1618 
1619 		if (budget <= 0 || jiffies - start_time > 1)
1620 			goto softnet_break;
1621 
1622 		local_irq_enable();
1623 
1624 		dev = list_entry(queue->poll_list.next, struct net_device, poll_list);
1625 
1626 		if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1627 			local_irq_disable();
1628 			list_del(&dev->poll_list);
1629 			list_add_tail(&dev->poll_list, &queue->poll_list);
1630 			if (dev->quota < 0)
1631 				dev->quota += dev->weight;
1632 			else
1633 				dev->quota = dev->weight;
1634 		} else {
1635 			dev_put(dev);
1636 			local_irq_disable();
1637 		}
1638 	}
1639 
1640 	local_irq_enable();
1641 	br_read_unlock(BR_NETPROTO_LOCK);
1642 	return;
1643 
1644 softnet_break:
1645 	netdev_rx_stat[this_cpu].time_squeeze++;
1646 	__cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
1647 
1648 	local_irq_enable();
1649 	br_read_unlock(BR_NETPROTO_LOCK);
1650 }
1651 
1652 static gifconf_func_t * gifconf_list [NPROTO];
1653 
1654 /**
1655  *	register_gifconf	-	register a SIOCGIF handler
1656  *	@family: Address family
1657  *	@gifconf: Function handler
1658  *
1659  *	Register protocol dependent address dumping routines. The handler
1660  *	that is passed must not be freed or reused until it has been replaced
1661  *	by another handler.
1662  */
1663 
register_gifconf(unsigned int family,gifconf_func_t * gifconf)1664 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1665 {
1666 	if (family>=NPROTO)
1667 		return -EINVAL;
1668 	gifconf_list[family] = gifconf;
1669 	return 0;
1670 }
1671 
1672 
1673 /*
1674  *	Map an interface index to its name (SIOCGIFNAME)
1675  */
1676 
1677 /*
1678  *	We need this ioctl for efficient implementation of the
1679  *	if_indextoname() function required by the IPv6 API.  Without
1680  *	it, we would have to search all the interfaces to find a
1681  *	match.  --pb
1682  */
1683 
dev_ifname(struct ifreq * arg)1684 static int dev_ifname(struct ifreq *arg)
1685 {
1686 	struct net_device *dev;
1687 	struct ifreq ifr;
1688 
1689 	/*
1690 	 *	Fetch the caller's info block.
1691 	 */
1692 
1693 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1694 		return -EFAULT;
1695 
1696 	read_lock(&dev_base_lock);
1697 	dev = __dev_get_by_index(ifr.ifr_ifindex);
1698 	if (!dev) {
1699 		read_unlock(&dev_base_lock);
1700 		return -ENODEV;
1701 	}
1702 
1703 	strcpy(ifr.ifr_name, dev->name);
1704 	read_unlock(&dev_base_lock);
1705 
1706 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
1707 		return -EFAULT;
1708 	return 0;
1709 }
1710 
1711 /*
1712  *	Perform a SIOCGIFCONF call. This structure will change
1713  *	size eventually, and there is nothing I can do about it.
1714  *	Thus we will need a 'compatibility mode'.
1715  */
1716 
dev_ifconf(char * arg)1717 static int dev_ifconf(char *arg)
1718 {
1719 	struct ifconf ifc;
1720 	struct net_device *dev;
1721 	char *pos;
1722 	int len;
1723 	int total;
1724 	int i;
1725 
1726 	/*
1727 	 *	Fetch the caller's info block.
1728 	 */
1729 
1730 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
1731 		return -EFAULT;
1732 
1733 	pos = ifc.ifc_buf;
1734 	len = ifc.ifc_len;
1735 
1736 	/*
1737 	 *	Loop over the interfaces, and write an info block for each.
1738 	 */
1739 
1740 	total = 0;
1741 	for (dev = dev_base; dev != NULL; dev = dev->next) {
1742 		for (i=0; i<NPROTO; i++) {
1743 			if (gifconf_list[i]) {
1744 				int done;
1745 				if (pos==NULL) {
1746 					done = gifconf_list[i](dev, NULL, 0);
1747 				} else {
1748 					done = gifconf_list[i](dev, pos+total, len-total);
1749 				}
1750 				if (done<0) {
1751 					return -EFAULT;
1752 				}
1753 				total += done;
1754 			}
1755 		}
1756   	}
1757 
1758 	/*
1759 	 *	All done.  Write the updated control block back to the caller.
1760 	 */
1761 	ifc.ifc_len = total;
1762 
1763 	if (copy_to_user(arg, &ifc, sizeof(struct ifconf)))
1764 		return -EFAULT;
1765 
1766 	/*
1767 	 * 	Both BSD and Solaris return 0 here, so we do too.
1768 	 */
1769 	return 0;
1770 }
1771 
1772 /*
1773  *	This is invoked by the /proc filesystem handler to display a device
1774  *	in detail.
1775  */
1776 
1777 #ifdef CONFIG_PROC_FS
1778 
sprintf_stats(char * buffer,struct net_device * dev)1779 static int sprintf_stats(char *buffer, struct net_device *dev)
1780 {
1781 	struct net_device_stats *stats = (dev->get_stats ? dev->get_stats(dev): NULL);
1782 	int size;
1783 
1784 	if (stats)
1785 		size = sprintf(buffer, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu %8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
1786  		   dev->name,
1787 		   stats->rx_bytes,
1788 		   stats->rx_packets, stats->rx_errors,
1789 		   stats->rx_dropped + stats->rx_missed_errors,
1790 		   stats->rx_fifo_errors,
1791 		   stats->rx_length_errors + stats->rx_over_errors
1792 		   + stats->rx_crc_errors + stats->rx_frame_errors,
1793 		   stats->rx_compressed, stats->multicast,
1794 		   stats->tx_bytes,
1795 		   stats->tx_packets, stats->tx_errors, stats->tx_dropped,
1796 		   stats->tx_fifo_errors, stats->collisions,
1797 		   stats->tx_carrier_errors + stats->tx_aborted_errors
1798 		   + stats->tx_window_errors + stats->tx_heartbeat_errors,
1799 		   stats->tx_compressed);
1800 	else
1801 		size = sprintf(buffer, "%6s: No statistics available.\n", dev->name);
1802 
1803 	return size;
1804 }
1805 
1806 /*
1807  *	Called from the PROCfs module. This now uses the new arbitrary sized /proc/net interface
1808  *	to create /proc/net/dev
1809  */
1810 
dev_get_info(char * buffer,char ** start,off_t offset,int length)1811 static int dev_get_info(char *buffer, char **start, off_t offset, int length)
1812 {
1813 	int len = 0;
1814 	off_t begin = 0;
1815 	off_t pos = 0;
1816 	int size;
1817 	struct net_device *dev;
1818 
1819 
1820 	size = sprintf(buffer,
1821 		"Inter-|   Receive                                                |  Transmit\n"
1822 		" face |bytes    packets errs drop fifo frame compressed multicast|bytes    packets errs drop fifo colls carrier compressed\n");
1823 
1824 	pos += size;
1825 	len += size;
1826 
1827 
1828 	read_lock(&dev_base_lock);
1829 	for (dev = dev_base; dev != NULL; dev = dev->next) {
1830 		size = sprintf_stats(buffer+len, dev);
1831 		len += size;
1832 		pos = begin + len;
1833 
1834 		if (pos < offset) {
1835 			len = 0;
1836 			begin = pos;
1837 		}
1838 		if (pos > offset + length)
1839 			break;
1840 	}
1841 	read_unlock(&dev_base_lock);
1842 
1843 	*start = buffer + (offset - begin);	/* Start of wanted data */
1844 	len -= (offset - begin);		/* Start slop */
1845 	if (len > length)
1846 		len = length;			/* Ending slop */
1847 	if (len < 0)
1848 		len = 0;
1849 	return len;
1850 }
1851 
dev_proc_stats(char * buffer,char ** start,off_t offset,int length,int * eof,void * data)1852 static int dev_proc_stats(char *buffer, char **start, off_t offset,
1853 			  int length, int *eof, void *data)
1854 {
1855 	int i, lcpu;
1856 	int len=0;
1857 
1858 	for (lcpu=0; lcpu<smp_num_cpus; lcpu++) {
1859 		i = cpu_logical_map(lcpu);
1860 		len += sprintf(buffer+len, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
1861 			       netdev_rx_stat[i].total,
1862 			       netdev_rx_stat[i].dropped,
1863 			       netdev_rx_stat[i].time_squeeze,
1864 			       netdev_rx_stat[i].throttled,
1865 			       netdev_rx_stat[i].fastroute_hit,
1866 			       netdev_rx_stat[i].fastroute_success,
1867 			       netdev_rx_stat[i].fastroute_defer,
1868 			       netdev_rx_stat[i].fastroute_deferred_out,
1869 #if 0
1870 			       netdev_rx_stat[i].fastroute_latency_reduction
1871 #else
1872 			       netdev_rx_stat[i].cpu_collision
1873 #endif
1874 			       );
1875 	}
1876 
1877 	len -= offset;
1878 
1879 	if (len > length)
1880 		len = length;
1881 	if (len < 0)
1882 		len = 0;
1883 
1884 	*start = buffer + offset;
1885 	*eof = 1;
1886 
1887 	return len;
1888 }
1889 
1890 #endif	/* CONFIG_PROC_FS */
1891 
1892 
1893 /**
1894  *	netdev_set_master	-	set up master/slave pair
1895  *	@slave: slave device
1896  *	@master: new master device
1897  *
1898  *	Changes the master device of the slave. Pass %NULL to break the
1899  *	bonding. The caller must hold the RTNL semaphore. On a failure
1900  *	a negative errno code is returned. On success the reference counts
1901  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
1902  *	function returns zero.
1903  */
1904 
netdev_set_master(struct net_device * slave,struct net_device * master)1905 int netdev_set_master(struct net_device *slave, struct net_device *master)
1906 {
1907 	struct net_device *old = slave->master;
1908 
1909 	ASSERT_RTNL();
1910 
1911 	if (master) {
1912 		if (old)
1913 			return -EBUSY;
1914 		dev_hold(master);
1915 	}
1916 
1917 	br_write_lock_bh(BR_NETPROTO_LOCK);
1918 	slave->master = master;
1919 	br_write_unlock_bh(BR_NETPROTO_LOCK);
1920 
1921 	if (old)
1922 		dev_put(old);
1923 
1924 	if (master)
1925 		slave->flags |= IFF_SLAVE;
1926 	else
1927 		slave->flags &= ~IFF_SLAVE;
1928 
1929 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
1930 	return 0;
1931 }
1932 
1933 /**
1934  *	dev_set_promiscuity	- update promiscuity count on a device
1935  *	@dev: device
1936  *	@inc: modifier
1937  *
1938  *	Add or remove promsicuity from a device. While the count in the device
1939  *	remains above zero the interface remains promiscuous. Once it hits zero
1940  *	the device reverts back to normal filtering operation. A negative inc
1941  *	value is used to drop promiscuity on the device.
1942  */
1943 
dev_set_promiscuity(struct net_device * dev,int inc)1944 void dev_set_promiscuity(struct net_device *dev, int inc)
1945 {
1946 	unsigned short old_flags = dev->flags;
1947 
1948 	dev->flags |= IFF_PROMISC;
1949 	if ((dev->promiscuity += inc) == 0)
1950 		dev->flags &= ~IFF_PROMISC;
1951 	if (dev->flags^old_flags) {
1952 #ifdef CONFIG_NET_FASTROUTE
1953 		if (dev->flags&IFF_PROMISC) {
1954 			netdev_fastroute_obstacles++;
1955 			dev_clear_fastroute(dev);
1956 		} else
1957 			netdev_fastroute_obstacles--;
1958 #endif
1959 		dev_mc_upload(dev);
1960 		printk(KERN_INFO "device %s %s promiscuous mode\n",
1961 		       dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "left");
1962 	}
1963 }
1964 
1965 /**
1966  *	dev_set_allmulti	- update allmulti count on a device
1967  *	@dev: device
1968  *	@inc: modifier
1969  *
1970  *	Add or remove reception of all multicast frames to a device. While the
1971  *	count in the device remains above zero the interface remains listening
1972  *	to all interfaces. Once it hits zero the device reverts back to normal
1973  *	filtering operation. A negative @inc value is used to drop the counter
1974  *	when releasing a resource needing all multicasts.
1975  */
1976 
dev_set_allmulti(struct net_device * dev,int inc)1977 void dev_set_allmulti(struct net_device *dev, int inc)
1978 {
1979 	unsigned short old_flags = dev->flags;
1980 
1981 	dev->flags |= IFF_ALLMULTI;
1982 	if ((dev->allmulti += inc) == 0)
1983 		dev->flags &= ~IFF_ALLMULTI;
1984 	if (dev->flags^old_flags)
1985 		dev_mc_upload(dev);
1986 }
1987 
dev_change_flags(struct net_device * dev,unsigned flags)1988 int dev_change_flags(struct net_device *dev, unsigned flags)
1989 {
1990 	int ret;
1991 	int old_flags = dev->flags;
1992 
1993 	/*
1994 	 *	Set the flags on our device.
1995 	 */
1996 
1997 	dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP|IFF_DYNAMIC|
1998 			       IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) |
1999 				       (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI));
2000 
2001 	/*
2002 	 *	Load in the correct multicast list now the flags have changed.
2003 	 */
2004 
2005 	dev_mc_upload(dev);
2006 
2007 	/*
2008 	 *	Have we downed the interface. We handle IFF_UP ourselves
2009 	 *	according to user attempts to set it, rather than blindly
2010 	 *	setting it.
2011 	 */
2012 
2013 	ret = 0;
2014 	if ((old_flags^flags)&IFF_UP)	/* Bit is different  ? */
2015 	{
2016 		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2017 
2018 		if (ret == 0)
2019 			dev_mc_upload(dev);
2020 	}
2021 
2022 	if (dev->flags&IFF_UP &&
2023 	    ((old_flags^dev->flags)&~(IFF_UP|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE)))
2024 		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
2025 
2026 	if ((flags^dev->gflags)&IFF_PROMISC) {
2027 		int inc = (flags&IFF_PROMISC) ? +1 : -1;
2028 		dev->gflags ^= IFF_PROMISC;
2029 		dev_set_promiscuity(dev, inc);
2030 	}
2031 
2032 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2033 	   is important. Some (broken) drivers set IFF_PROMISC, when
2034 	   IFF_ALLMULTI is requested not asking us and not reporting.
2035 	 */
2036 	if ((flags^dev->gflags)&IFF_ALLMULTI) {
2037 		int inc = (flags&IFF_ALLMULTI) ? +1 : -1;
2038 		dev->gflags ^= IFF_ALLMULTI;
2039 		dev_set_allmulti(dev, inc);
2040 	}
2041 
2042 	if (old_flags^dev->flags)
2043 		rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags^dev->flags);
2044 
2045 	return ret;
2046 }
2047 
2048 /*
2049  *	Perform the SIOCxIFxxx calls.
2050  */
2051 
dev_ifsioc(struct ifreq * ifr,unsigned int cmd)2052 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2053 {
2054 	struct net_device *dev;
2055 	int err;
2056 
2057 	if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL)
2058 		return -ENODEV;
2059 
2060 	switch(cmd)
2061 	{
2062 		case SIOCGIFFLAGS:	/* Get interface flags */
2063 			ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI|IFF_RUNNING))
2064 				|(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI));
2065 			if (netif_running(dev) && netif_carrier_ok(dev))
2066 				ifr->ifr_flags |= IFF_RUNNING;
2067 			return 0;
2068 
2069 		case SIOCSIFFLAGS:	/* Set interface flags */
2070 			return dev_change_flags(dev, ifr->ifr_flags);
2071 
2072 		case SIOCGIFMETRIC:	/* Get the metric on the interface (currently unused) */
2073 			ifr->ifr_metric = 0;
2074 			return 0;
2075 
2076 		case SIOCSIFMETRIC:	/* Set the metric on the interface (currently unused) */
2077 			return -EOPNOTSUPP;
2078 
2079 		case SIOCGIFMTU:	/* Get the MTU of a device */
2080 			ifr->ifr_mtu = dev->mtu;
2081 			return 0;
2082 
2083 		case SIOCSIFMTU:	/* Set the MTU of a device */
2084 			if (ifr->ifr_mtu == dev->mtu)
2085 				return 0;
2086 
2087 			/*
2088 			 *	MTU must be positive.
2089 			 */
2090 
2091 			if (ifr->ifr_mtu<0)
2092 				return -EINVAL;
2093 
2094 			if (!netif_device_present(dev))
2095 				return -ENODEV;
2096 
2097 			if (dev->change_mtu)
2098 				err = dev->change_mtu(dev, ifr->ifr_mtu);
2099 			else {
2100 				dev->mtu = ifr->ifr_mtu;
2101 				err = 0;
2102 			}
2103 			if (!err && dev->flags&IFF_UP)
2104 				notifier_call_chain(&netdev_chain, NETDEV_CHANGEMTU, dev);
2105 			return err;
2106 
2107 		case SIOCGIFHWADDR:
2108 			memcpy(ifr->ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN);
2109 			ifr->ifr_hwaddr.sa_family=dev->type;
2110 			return 0;
2111 
2112 		case SIOCSIFHWADDR:
2113 			if (dev->set_mac_address == NULL)
2114 				return -EOPNOTSUPP;
2115 			if (ifr->ifr_hwaddr.sa_family!=dev->type)
2116 				return -EINVAL;
2117 			if (!netif_device_present(dev))
2118 				return -ENODEV;
2119 			err = dev->set_mac_address(dev, &ifr->ifr_hwaddr);
2120 			if (!err)
2121 				notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
2122 			return err;
2123 
2124 		case SIOCSIFHWBROADCAST:
2125 			if (ifr->ifr_hwaddr.sa_family!=dev->type)
2126 				return -EINVAL;
2127 			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, MAX_ADDR_LEN);
2128 			notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
2129 			return 0;
2130 
2131 		case SIOCGIFMAP:
2132 			ifr->ifr_map.mem_start=dev->mem_start;
2133 			ifr->ifr_map.mem_end=dev->mem_end;
2134 			ifr->ifr_map.base_addr=dev->base_addr;
2135 			ifr->ifr_map.irq=dev->irq;
2136 			ifr->ifr_map.dma=dev->dma;
2137 			ifr->ifr_map.port=dev->if_port;
2138 			return 0;
2139 
2140 		case SIOCSIFMAP:
2141 			if (dev->set_config) {
2142 				if (!netif_device_present(dev))
2143 					return -ENODEV;
2144 				return dev->set_config(dev,&ifr->ifr_map);
2145 			}
2146 			return -EOPNOTSUPP;
2147 
2148 		case SIOCADDMULTI:
2149 			if (dev->set_multicast_list == NULL ||
2150 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2151 				return -EINVAL;
2152 			if (!netif_device_present(dev))
2153 				return -ENODEV;
2154 			dev_mc_add(dev,ifr->ifr_hwaddr.sa_data, dev->addr_len, 1);
2155 			return 0;
2156 
2157 		case SIOCDELMULTI:
2158 			if (dev->set_multicast_list == NULL ||
2159 			    ifr->ifr_hwaddr.sa_family!=AF_UNSPEC)
2160 				return -EINVAL;
2161 			if (!netif_device_present(dev))
2162 				return -ENODEV;
2163 			dev_mc_delete(dev,ifr->ifr_hwaddr.sa_data,dev->addr_len, 1);
2164 			return 0;
2165 
2166 		case SIOCGIFINDEX:
2167 			ifr->ifr_ifindex = dev->ifindex;
2168 			return 0;
2169 
2170 		case SIOCGIFTXQLEN:
2171 			ifr->ifr_qlen = dev->tx_queue_len;
2172 			return 0;
2173 
2174 		case SIOCSIFTXQLEN:
2175 			if (ifr->ifr_qlen<0)
2176 				return -EINVAL;
2177 			dev->tx_queue_len = ifr->ifr_qlen;
2178 			return 0;
2179 
2180 		case SIOCSIFNAME:
2181 			if (dev->flags&IFF_UP)
2182 				return -EBUSY;
2183 			/* Check if name contains a wildcard */
2184 			if (strchr(ifr->ifr_newname, '%')) {
2185 				char format[IFNAMSIZ + 1];
2186 				int ret;
2187 				memcpy(format, ifr->ifr_newname, IFNAMSIZ);
2188 				format[IFNAMSIZ-1] = 0;
2189 				/* Find a free name based on format.
2190 				 * dev_alloc_name() replaces "%d" with at max
2191 				 * 2 digits, so no name overflow. - Jean II */
2192 				ret = dev_alloc_name(dev, format);
2193 				if (ret < 0)
2194 					return ret;
2195 				/* Copy the new name back to caller. */
2196 				strncpy(ifr->ifr_newname, dev->name, IFNAMSIZ);
2197 			} else {
2198 				if (__dev_get_by_name(ifr->ifr_newname))
2199 					return -EEXIST;
2200 				memcpy(dev->name, ifr->ifr_newname, IFNAMSIZ);
2201 				dev->name[IFNAMSIZ-1] = 0;
2202 			}
2203 			notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
2204 			return 0;
2205 
2206 		/*
2207 		 *	Unknown or private ioctl
2208 		 */
2209 
2210 		default:
2211 			if ((cmd >= SIOCDEVPRIVATE &&
2212 			    cmd <= SIOCDEVPRIVATE + 15) ||
2213 			    cmd == SIOCBONDENSLAVE ||
2214 			    cmd == SIOCBONDRELEASE ||
2215 			    cmd == SIOCBONDSETHWADDR ||
2216 			    cmd == SIOCBONDSLAVEINFOQUERY ||
2217 			    cmd == SIOCBONDINFOQUERY ||
2218 			    cmd == SIOCBONDCHANGEACTIVE ||
2219 			    cmd == SIOCGMIIPHY ||
2220 			    cmd == SIOCGMIIREG ||
2221 			    cmd == SIOCSMIIREG ||
2222 			    cmd == SIOCWANDEV) {
2223 				if (dev->do_ioctl) {
2224 					if (!netif_device_present(dev))
2225 						return -ENODEV;
2226 					return dev->do_ioctl(dev, ifr, cmd);
2227 				}
2228 				return -EOPNOTSUPP;
2229 			}
2230 
2231 	}
2232 	return -EINVAL;
2233 }
2234 
2235 /*
2236  *	This function handles all "interface"-type I/O control requests. The actual
2237  *	'doing' part of this is dev_ifsioc above.
2238  */
2239 
2240 /**
2241  *	dev_ioctl	-	network device ioctl
2242  *	@cmd: command to issue
2243  *	@arg: pointer to a struct ifreq in user space
2244  *
2245  *	Issue ioctl functions to devices. This is normally called by the
2246  *	user space syscall interfaces but can sometimes be useful for
2247  *	other purposes. The return value is the return from the syscall if
2248  *	positive or a negative errno code on error.
2249  */
2250 
dev_ioctl(unsigned int cmd,void * arg)2251 int dev_ioctl(unsigned int cmd, void *arg)
2252 {
2253 	struct ifreq ifr;
2254 	int ret;
2255 	char *colon;
2256 
2257 	/* One special case: SIOCGIFCONF takes ifconf argument
2258 	   and requires shared lock, because it sleeps writing
2259 	   to user space.
2260 	 */
2261 
2262 	if (cmd == SIOCGIFCONF) {
2263 		rtnl_shlock();
2264 		ret = dev_ifconf((char *) arg);
2265 		rtnl_shunlock();
2266 		return ret;
2267 	}
2268 	if (cmd == SIOCGIFNAME) {
2269 		return dev_ifname((struct ifreq *)arg);
2270 	}
2271 
2272 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2273 		return -EFAULT;
2274 
2275 	ifr.ifr_name[IFNAMSIZ-1] = 0;
2276 
2277 	colon = strchr(ifr.ifr_name, ':');
2278 	if (colon)
2279 		*colon = 0;
2280 
2281 	/*
2282 	 *	See which interface the caller is talking about.
2283 	 */
2284 
2285 	switch(cmd)
2286 	{
2287 		/*
2288 		 *	These ioctl calls:
2289 		 *	- can be done by all.
2290 		 *	- atomic and do not require locking.
2291 		 *	- return a value
2292 		 */
2293 
2294 		case SIOCGIFFLAGS:
2295 		case SIOCGIFMETRIC:
2296 		case SIOCGIFMTU:
2297 		case SIOCGIFHWADDR:
2298 		case SIOCGIFSLAVE:
2299 		case SIOCGIFMAP:
2300 		case SIOCGIFINDEX:
2301 		case SIOCGIFTXQLEN:
2302 			dev_load(ifr.ifr_name);
2303 			read_lock(&dev_base_lock);
2304 			ret = dev_ifsioc(&ifr, cmd);
2305 			read_unlock(&dev_base_lock);
2306 			if (!ret) {
2307 				if (colon)
2308 					*colon = ':';
2309 				if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2310 					return -EFAULT;
2311 			}
2312 			return ret;
2313 
2314 		case SIOCETHTOOL:
2315 			dev_load(ifr.ifr_name);
2316 			rtnl_lock();
2317 			ret = dev_ethtool(&ifr);
2318 			rtnl_unlock();
2319 			if (!ret) {
2320 				if (colon)
2321 					*colon = ':';
2322 				if (copy_to_user(arg, &ifr,
2323 						 sizeof(struct ifreq)))
2324 					ret = -EFAULT;
2325 			}
2326 			return ret;
2327 
2328 		/*
2329 		 *	These ioctl calls:
2330 		 *	- require superuser power.
2331 		 *	- require strict serialization.
2332 		 *	- return a value
2333 		 */
2334 
2335 		case SIOCSIFNAME:
2336 		case SIOCGMIIPHY:
2337 		case SIOCGMIIREG:
2338 			if (!capable(CAP_NET_ADMIN))
2339 				return -EPERM;
2340 			dev_load(ifr.ifr_name);
2341 			dev_probe_lock();
2342 			rtnl_lock();
2343 			ret = dev_ifsioc(&ifr, cmd);
2344 			rtnl_unlock();
2345 			dev_probe_unlock();
2346 			if (!ret) {
2347 				if (colon)
2348 					*colon = ':';
2349 				if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2350 					return -EFAULT;
2351 			}
2352 			return ret;
2353 
2354 		/*
2355 		 *	These ioctl calls:
2356 		 *	- require superuser power.
2357 		 *	- require strict serialization.
2358 		 *	- do not return a value
2359 		 */
2360 
2361 		case SIOCSIFFLAGS:
2362 		case SIOCSIFMETRIC:
2363 		case SIOCSIFMTU:
2364 		case SIOCSIFMAP:
2365 		case SIOCSIFHWADDR:
2366 		case SIOCSIFSLAVE:
2367 		case SIOCADDMULTI:
2368 		case SIOCDELMULTI:
2369 		case SIOCSIFHWBROADCAST:
2370 		case SIOCSIFTXQLEN:
2371 		case SIOCSMIIREG:
2372 		case SIOCBONDENSLAVE:
2373 		case SIOCBONDRELEASE:
2374 		case SIOCBONDSETHWADDR:
2375 		case SIOCBONDSLAVEINFOQUERY:
2376 		case SIOCBONDINFOQUERY:
2377 		case SIOCBONDCHANGEACTIVE:
2378 			if (!capable(CAP_NET_ADMIN))
2379 				return -EPERM;
2380 			dev_load(ifr.ifr_name);
2381 			dev_probe_lock();
2382 			rtnl_lock();
2383 			ret = dev_ifsioc(&ifr, cmd);
2384 			rtnl_unlock();
2385 			dev_probe_unlock();
2386 			return ret;
2387 
2388 		case SIOCGIFMEM:
2389 			/* Get the per device memory space. We can add this but currently
2390 			   do not support it */
2391 		case SIOCSIFMEM:
2392 			/* Set the per device memory buffer space. Not applicable in our case */
2393 		case SIOCSIFLINK:
2394 			return -EINVAL;
2395 
2396 		/*
2397 		 *	Unknown or private ioctl.
2398 		 */
2399 
2400 		default:
2401 			if (cmd == SIOCWANDEV ||
2402 			    (cmd >= SIOCDEVPRIVATE &&
2403 			     cmd <= SIOCDEVPRIVATE + 15)) {
2404 				dev_load(ifr.ifr_name);
2405 				dev_probe_lock();
2406 				rtnl_lock();
2407 				ret = dev_ifsioc(&ifr, cmd);
2408 				rtnl_unlock();
2409 				dev_probe_unlock();
2410 				if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2411 					return -EFAULT;
2412 				return ret;
2413 			}
2414 #ifdef WIRELESS_EXT
2415 			/* Take care of Wireless Extensions */
2416 			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2417 				/* If command is `set a parameter', or
2418 				 * `get the encoding parameters', check if
2419 				 * the user has the right to do it */
2420 				if (IW_IS_SET(cmd) || (cmd == SIOCGIWENCODE)) {
2421 					if(!capable(CAP_NET_ADMIN))
2422 						return -EPERM;
2423 				}
2424 				dev_load(ifr.ifr_name);
2425 				rtnl_lock();
2426 				/* Follow me in net/core/wireless.c */
2427 				ret = wireless_process_ioctl(&ifr, cmd);
2428 				rtnl_unlock();
2429 				if (!ret && IW_IS_GET(cmd) &&
2430 				    copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2431 					return -EFAULT;
2432 				return ret;
2433 			}
2434 #endif	/* WIRELESS_EXT */
2435 			return -EINVAL;
2436 	}
2437 }
2438 
2439 
2440 /**
2441  *	dev_new_index	-	allocate an ifindex
2442  *
2443  *	Returns a suitable unique value for a new device interface
2444  *	number.  The caller must hold the rtnl semaphore or the
2445  *	dev_base_lock to be sure it remains unique.
2446  */
2447 
dev_new_index(void)2448 int dev_new_index(void)
2449 {
2450 	static int ifindex;
2451 	for (;;) {
2452 		if (++ifindex <= 0)
2453 			ifindex=1;
2454 		if (__dev_get_by_index(ifindex) == NULL)
2455 			return ifindex;
2456 	}
2457 }
2458 
2459 static int dev_boot_phase = 1;
2460 
2461 /**
2462  *	register_netdevice	- register a network device
2463  *	@dev: device to register
2464  *
2465  *	Take a completed network device structure and add it to the kernel
2466  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2467  *	chain. 0 is returned on success. A negative errno code is returned
2468  *	on a failure to set up the device, or if the name is a duplicate.
2469  *
2470  *	Callers must hold the rtnl semaphore.  See the comment at the
2471  *	end of Space.c for details about the locking.  You may want
2472  *	register_netdev() instead of this.
2473  *
2474  *	BUGS:
2475  *	The locking appears insufficient to guarantee two parallel registers
2476  *	will not get the same name.
2477  */
2478 
2479 int net_dev_init(void);
2480 
register_netdevice(struct net_device * dev)2481 int register_netdevice(struct net_device *dev)
2482 {
2483 	struct net_device *d, **dp;
2484 #ifdef CONFIG_NET_DIVERT
2485 	int ret;
2486 #endif
2487 
2488 	spin_lock_init(&dev->queue_lock);
2489 	spin_lock_init(&dev->xmit_lock);
2490 	dev->xmit_lock_owner = -1;
2491 #ifdef CONFIG_NET_FASTROUTE
2492 	dev->fastpath_lock=RW_LOCK_UNLOCKED;
2493 #endif
2494 
2495 	if (dev_boot_phase)
2496 		net_dev_init();
2497 
2498 #ifdef CONFIG_NET_DIVERT
2499 	ret = alloc_divert_blk(dev);
2500 	if (ret)
2501 		return ret;
2502 #endif /* CONFIG_NET_DIVERT */
2503 
2504 	dev->iflink = -1;
2505 
2506 	/* Init, if this function is available */
2507 	if (dev->init && dev->init(dev) != 0) {
2508 #ifdef CONFIG_NET_DIVERT
2509 		free_divert_blk(dev);
2510 #endif
2511 		return -EIO;
2512 	}
2513 
2514 	dev->ifindex = dev_new_index();
2515 	if (dev->iflink == -1)
2516 		dev->iflink = dev->ifindex;
2517 
2518 	/* Check for existence, and append to tail of chain */
2519 	for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
2520 		if (d == dev || strcmp(d->name, dev->name) == 0) {
2521 #ifdef CONFIG_NET_DIVERT
2522 			free_divert_blk(dev);
2523 #endif
2524 			return -EEXIST;
2525 		}
2526 	}
2527 
2528 	/* Fix illegal SG+CSUM combinations. */
2529 	if ((dev->features & NETIF_F_SG) &&
2530 	    !(dev->features & (NETIF_F_IP_CSUM |
2531 			       NETIF_F_NO_CSUM |
2532 			       NETIF_F_HW_CSUM))) {
2533 		printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
2534 		       dev->name);
2535 		dev->features &= ~NETIF_F_SG;
2536 	}
2537 
2538 	/*
2539 	 *	nil rebuild_header routine,
2540 	 *	that should be never called and used as just bug trap.
2541 	 */
2542 
2543 	if (dev->rebuild_header == NULL)
2544 		dev->rebuild_header = default_rebuild_header;
2545 
2546 	/*
2547 	 *	Default initial state at registry is that the
2548 	 *	device is present.
2549 	 */
2550 
2551 	set_bit(__LINK_STATE_PRESENT, &dev->state);
2552 
2553 	dev->next = NULL;
2554 	dev_init_scheduler(dev);
2555 	write_lock_bh(&dev_base_lock);
2556 	*dp = dev;
2557 	dev_hold(dev);
2558 	dev->deadbeaf = 0;
2559 	write_unlock_bh(&dev_base_lock);
2560 
2561 	/* Notify protocols, that a new device appeared. */
2562 	notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
2563 
2564 	net_run_sbin_hotplug(dev, "register");
2565 
2566 	return 0;
2567 }
2568 
2569 /**
2570  *	netdev_finish_unregister - complete unregistration
2571  *	@dev: device
2572  *
2573  *	Destroy and free a dead device. A value of zero is returned on
2574  *	success.
2575  */
2576 
netdev_finish_unregister(struct net_device * dev)2577 int netdev_finish_unregister(struct net_device *dev)
2578 {
2579 	BUG_TRAP(dev->ip_ptr==NULL);
2580 	BUG_TRAP(dev->ip6_ptr==NULL);
2581 	BUG_TRAP(dev->dn_ptr==NULL);
2582 
2583 	if (!dev->deadbeaf) {
2584 		printk(KERN_ERR "Freeing alive device %p, %s\n", dev, dev->name);
2585 		return 0;
2586 	}
2587 #ifdef NET_REFCNT_DEBUG
2588 	printk(KERN_DEBUG "netdev_finish_unregister: %s%s.\n", dev->name,
2589 	       (dev->features & NETIF_F_DYNALLOC)?"":", old style");
2590 #endif
2591 	if (dev->destructor)
2592 		dev->destructor(dev);
2593 	if (dev->features & NETIF_F_DYNALLOC)
2594 		kfree(dev);
2595 	return 0;
2596 }
2597 
2598 /**
2599  *	unregister_netdevice - remove device from the kernel
2600  *	@dev: device
2601  *
2602  *	This function shuts down a device interface and removes it
2603  *	from the kernel tables. On success 0 is returned, on a failure
2604  *	a negative errno code is returned.
2605  *
2606  *	Callers must hold the rtnl semaphore.  See the comment at the
2607  *	end of Space.c for details about the locking.  You may want
2608  *	unregister_netdev() instead of this.
2609  */
2610 
unregister_netdevice(struct net_device * dev)2611 int unregister_netdevice(struct net_device *dev)
2612 {
2613 	unsigned long now, warning_time;
2614 	struct net_device *d, **dp;
2615 
2616 	/* If device is running, close it first. */
2617 	if (dev->flags & IFF_UP)
2618 		dev_close(dev);
2619 
2620 	BUG_TRAP(dev->deadbeaf==0);
2621 	dev->deadbeaf = 1;
2622 
2623 	/* And unlink it from device chain. */
2624 	for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) {
2625 		if (d == dev) {
2626 			write_lock_bh(&dev_base_lock);
2627 			*dp = d->next;
2628 			write_unlock_bh(&dev_base_lock);
2629 			break;
2630 		}
2631 	}
2632 	if (d == NULL) {
2633 		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never was registered\n", dev->name, dev);
2634 		return -ENODEV;
2635 	}
2636 
2637 	/* Synchronize to net_rx_action. */
2638 	br_write_lock_bh(BR_NETPROTO_LOCK);
2639 	br_write_unlock_bh(BR_NETPROTO_LOCK);
2640 
2641 	if (dev_boot_phase == 0) {
2642 #ifdef CONFIG_NET_FASTROUTE
2643 		dev_clear_fastroute(dev);
2644 #endif
2645 
2646 		/* Shutdown queueing discipline. */
2647 		dev_shutdown(dev);
2648 
2649 		net_run_sbin_hotplug(dev, "unregister");
2650 
2651 		/* Notify protocols, that we are about to destroy
2652 		   this device. They should clean all the things.
2653 		 */
2654 		notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
2655 
2656 		/*
2657 		 *	Flush the multicast chain
2658 		 */
2659 		dev_mc_discard(dev);
2660 	}
2661 
2662 	if (dev->uninit)
2663 		dev->uninit(dev);
2664 
2665 	/* Notifier chain MUST detach us from master device. */
2666 	BUG_TRAP(dev->master==NULL);
2667 
2668 #ifdef CONFIG_NET_DIVERT
2669 	free_divert_blk(dev);
2670 #endif
2671 
2672 	if (dev->features & NETIF_F_DYNALLOC) {
2673 #ifdef NET_REFCNT_DEBUG
2674 		if (atomic_read(&dev->refcnt) != 1)
2675 			printk(KERN_DEBUG "unregister_netdevice: holding %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt)-1);
2676 #endif
2677 		dev_put(dev);
2678 		return 0;
2679 	}
2680 
2681 	/* Last reference is our one */
2682 	if (atomic_read(&dev->refcnt) == 1) {
2683 		dev_put(dev);
2684 		return 0;
2685 	}
2686 
2687 #ifdef NET_REFCNT_DEBUG
2688 	printk("unregister_netdevice: waiting %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt));
2689 #endif
2690 
2691 	/* EXPLANATION. If dev->refcnt is not now 1 (our own reference)
2692 	   it means that someone in the kernel still has a reference
2693 	   to this device and we cannot release it.
2694 
2695 	   "New style" devices have destructors, hence we can return from this
2696 	   function and destructor will do all the work later.  As of kernel 2.4.0
2697 	   there are very few "New Style" devices.
2698 
2699 	   "Old style" devices expect that the device is free of any references
2700 	   upon exit from this function.
2701 	   We cannot return from this function until all such references have
2702 	   fallen away.  This is because the caller of this function will probably
2703 	   immediately kfree(*dev) and then be unloaded via sys_delete_module.
2704 
2705 	   So, we linger until all references fall away.  The duration of the
2706 	   linger is basically unbounded! It is driven by, for example, the
2707 	   current setting of sysctl_ipfrag_time.
2708 
2709 	   After 1 second, we start to rebroadcast unregister notifications
2710 	   in hope that careless clients will release the device.
2711 
2712 	 */
2713 
2714 	now = warning_time = jiffies;
2715 	while (atomic_read(&dev->refcnt) != 1) {
2716 		if ((jiffies - now) > 1*HZ) {
2717 			/* Rebroadcast unregister notification */
2718 			notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
2719 		}
2720 		current->state = TASK_INTERRUPTIBLE;
2721 		schedule_timeout(HZ/4);
2722 		current->state = TASK_RUNNING;
2723 		if ((jiffies - warning_time) > 10*HZ) {
2724 			printk(KERN_EMERG "unregister_netdevice: waiting for %s to "
2725 					"become free. Usage count = %d\n",
2726 					dev->name, atomic_read(&dev->refcnt));
2727 			warning_time = jiffies;
2728 		}
2729 	}
2730 	dev_put(dev);
2731 	return 0;
2732 }
2733 
2734 
2735 /*
2736  *	Initialize the DEV module. At boot time this walks the device list and
2737  *	unhooks any devices that fail to initialise (normally hardware not
2738  *	present) and leaves us with a valid list of present and active devices.
2739  *
2740  */
2741 
2742 extern void net_device_init(void);
2743 extern void ip_auto_config(void);
2744 struct proc_dir_entry *proc_net_drivers;
2745 #ifdef CONFIG_NET_DIVERT
2746 extern void dv_init(void);
2747 #endif /* CONFIG_NET_DIVERT */
2748 
2749 
2750 /*
2751  *       Callers must hold the rtnl semaphore.  See the comment at the
2752  *       end of Space.c for details about the locking.
2753  */
net_dev_init(void)2754 int __init net_dev_init(void)
2755 {
2756 	struct net_device *dev, **dp;
2757 	int i;
2758 
2759 	if (!dev_boot_phase)
2760 		return 0;
2761 
2762 
2763 #ifdef CONFIG_NET_DIVERT
2764 	dv_init();
2765 #endif /* CONFIG_NET_DIVERT */
2766 
2767 	/*
2768 	 *	Initialise the packet receive queues.
2769 	 */
2770 
2771 	for (i = 0; i < NR_CPUS; i++) {
2772 		struct softnet_data *queue;
2773 
2774 		queue = &softnet_data[i];
2775 		skb_queue_head_init(&queue->input_pkt_queue);
2776 		queue->throttle = 0;
2777 		queue->cng_level = 0;
2778 		queue->avg_blog = 10; /* arbitrary non-zero */
2779 		queue->completion_queue = NULL;
2780 		INIT_LIST_HEAD(&queue->poll_list);
2781 		set_bit(__LINK_STATE_START, &queue->blog_dev.state);
2782 		queue->blog_dev.weight = weight_p;
2783 		queue->blog_dev.poll = process_backlog;
2784 		atomic_set(&queue->blog_dev.refcnt, 1);
2785 	}
2786 
2787 #ifdef CONFIG_NET_PROFILE
2788 	net_profile_init();
2789 	NET_PROFILE_REGISTER(dev_queue_xmit);
2790 	NET_PROFILE_REGISTER(softnet_process);
2791 #endif
2792 
2793 #ifdef OFFLINE_SAMPLE
2794 	samp_timer.expires = jiffies + (10 * HZ);
2795 	add_timer(&samp_timer);
2796 #endif
2797 
2798 	/*
2799 	 *	Add the devices.
2800 	 *	If the call to dev->init fails, the dev is removed
2801 	 *	from the chain disconnecting the device until the
2802 	 *	next reboot.
2803 	 *
2804 	 *	NB At boot phase networking is dead. No locking is required.
2805 	 *	But we still preserve dev_base_lock for sanity.
2806 	 */
2807 
2808 	dp = &dev_base;
2809 	while ((dev = *dp) != NULL) {
2810 		spin_lock_init(&dev->queue_lock);
2811 		spin_lock_init(&dev->xmit_lock);
2812 #ifdef CONFIG_NET_FASTROUTE
2813 		dev->fastpath_lock = RW_LOCK_UNLOCKED;
2814 #endif
2815 		dev->xmit_lock_owner = -1;
2816 		dev->iflink = -1;
2817 		dev_hold(dev);
2818 
2819 		/*
2820 		 * Allocate name. If the init() fails
2821 		 * the name will be reissued correctly.
2822 		 */
2823 		if (strchr(dev->name, '%'))
2824 			dev_alloc_name(dev, dev->name);
2825 
2826 		/*
2827 		 * Check boot time settings for the device.
2828 		 */
2829 		netdev_boot_setup_check(dev);
2830 
2831 		if (dev->init && dev->init(dev)) {
2832 			/*
2833 			 * It failed to come up. It will be unhooked later.
2834 			 * dev_alloc_name can now advance to next suitable
2835 			 * name that is checked next.
2836 			 */
2837 			dev->deadbeaf = 1;
2838 			dp = &dev->next;
2839 		} else {
2840 			dp = &dev->next;
2841 			dev->ifindex = dev_new_index();
2842 			if (dev->iflink == -1)
2843 				dev->iflink = dev->ifindex;
2844 			if (dev->rebuild_header == NULL)
2845 				dev->rebuild_header = default_rebuild_header;
2846 			dev_init_scheduler(dev);
2847 			set_bit(__LINK_STATE_PRESENT, &dev->state);
2848 		}
2849 	}
2850 
2851 	/*
2852 	 * Unhook devices that failed to come up
2853 	 */
2854 	dp = &dev_base;
2855 	while ((dev = *dp) != NULL) {
2856 		if (dev->deadbeaf) {
2857 			write_lock_bh(&dev_base_lock);
2858 			*dp = dev->next;
2859 			write_unlock_bh(&dev_base_lock);
2860 			dev_put(dev);
2861 		} else {
2862 			dp = &dev->next;
2863 		}
2864 	}
2865 
2866 #ifdef CONFIG_PROC_FS
2867 	proc_net_create("dev", 0, dev_get_info);
2868 	create_proc_read_entry("net/softnet_stat", 0, 0, dev_proc_stats, NULL);
2869 	proc_net_drivers = proc_mkdir("net/drivers", 0);
2870 #ifdef WIRELESS_EXT
2871 	/* Available in net/core/wireless.c */
2872 	proc_net_create("wireless", 0, dev_get_wireless_info);
2873 #endif	/* WIRELESS_EXT */
2874 #endif	/* CONFIG_PROC_FS */
2875 
2876 	dev_boot_phase = 0;
2877 
2878 	open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
2879 	open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
2880 
2881 	dst_init();
2882 	dev_mcast_init();
2883 
2884 #ifdef CONFIG_NET_SCHED
2885 	pktsched_init();
2886 #endif
2887 	/*
2888 	 *	Initialise network devices
2889 	 */
2890 
2891 	net_device_init();
2892 
2893 	return 0;
2894 }
2895 
2896 #ifdef CONFIG_HOTPLUG
2897 
2898 /* Notify userspace when a netdevice event occurs,
2899  * by running '/sbin/hotplug net' with certain
2900  * environment variables set.
2901  */
2902 
net_run_sbin_hotplug(struct net_device * dev,char * action)2903 static int net_run_sbin_hotplug(struct net_device *dev, char *action)
2904 {
2905 	char *argv[3], *envp[5], ifname[12 + IFNAMSIZ], action_str[32];
2906 	int i;
2907 
2908 	sprintf(ifname, "INTERFACE=%s", dev->name);
2909 	sprintf(action_str, "ACTION=%s", action);
2910 
2911         i = 0;
2912         argv[i++] = hotplug_path;
2913         argv[i++] = "net";
2914         argv[i] = 0;
2915 
2916 	i = 0;
2917 	/* minimal command environment */
2918 	envp [i++] = "HOME=/";
2919 	envp [i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
2920 	envp [i++] = ifname;
2921 	envp [i++] = action_str;
2922 	envp [i] = 0;
2923 
2924 	return call_usermodehelper(argv [0], argv, envp);
2925 }
2926 #endif
2927