1 /*
2  *	Routines having to do with the 'struct sk_buff' memory handlers.
3  *
4  *	Authors:	Alan Cox <iiitac@pyr.swan.ac.uk>
5  *			Florian La Roche <rzsfl@rz.uni-sb.de>
6  *
7  *	Version:	$Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $
8  *
9  *	Fixes:
10  *		Alan Cox	:	Fixed the worst of the load balancer bugs.
11  *		Dave Platt	:	Interrupt stacking fix.
12  *	Richard Kooijman	:	Timestamp fixes.
13  *		Alan Cox	:	Changed buffer format.
14  *		Alan Cox	:	destructor hook for AF_UNIX etc.
15  *		Linus Torvalds	:	Better skb_clone.
16  *		Alan Cox	:	Added skb_copy.
17  *		Alan Cox	:	Added all the changed routines Linus
18  *					only put in the headers
19  *		Ray VanTassle	:	Fixed --skb->lock in free
20  *		Alan Cox	:	skb_copy copy arp field
21  *		Andi Kleen	:	slabified it.
22  *
23  *	NOTE:
24  *		The __skb_ routines should be called with interrupts
25  *	disabled, or you better be *real* sure that the operation is atomic
26  *	with respect to whatever list is being frobbed (e.g. via lock_sock()
27  *	or via disabling bottom half handlers, etc).
28  *
29  *	This program is free software; you can redistribute it and/or
30  *	modify it under the terms of the GNU General Public License
31  *	as published by the Free Software Foundation; either version
32  *	2 of the License, or (at your option) any later version.
33  */
34 
35 /*
36  *	The functions in this file will not compile correctly with gcc 2.4.x
37  */
38 
39 #include <linux/config.h>
40 #include <linux/types.h>
41 #include <linux/kernel.h>
42 #include <linux/sched.h>
43 #include <linux/mm.h>
44 #include <linux/interrupt.h>
45 #include <linux/in.h>
46 #include <linux/inet.h>
47 #include <linux/slab.h>
48 #include <linux/netdevice.h>
49 #include <linux/string.h>
50 #include <linux/skbuff.h>
51 #include <linux/cache.h>
52 #include <linux/rtnetlink.h>
53 #include <linux/init.h>
54 #include <linux/highmem.h>
55 
56 #include <net/protocol.h>
57 #include <net/dst.h>
58 #include <net/sock.h>
59 #include <net/checksum.h>
60 
61 #include <asm/uaccess.h>
62 #include <asm/system.h>
63 
64 int sysctl_hot_list_len = 128;
65 
66 static kmem_cache_t *skbuff_head_cache;
67 
68 static union {
69 	struct sk_buff_head	list;
70 	char			pad[SMP_CACHE_BYTES];
71 } skb_head_pool[NR_CPUS];
72 
73 /*
74  *	Keep out-of-line to prevent kernel bloat.
75  *	__builtin_return_address is not used because it is not always
76  *	reliable.
77  */
78 
79 /**
80  *	skb_over_panic	- 	private function
81  *	@skb: buffer
82  *	@sz: size
83  *	@here: address
84  *
85  *	Out of line support code for skb_put(). Not user callable.
86  */
87 
skb_over_panic(struct sk_buff * skb,int sz,void * here)88 void skb_over_panic(struct sk_buff *skb, int sz, void *here)
89 {
90 	printk("skput:over: %p:%d put:%d dev:%s",
91 		here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
92 	BUG();
93 }
94 
95 /**
96  *	skb_under_panic	- 	private function
97  *	@skb: buffer
98  *	@sz: size
99  *	@here: address
100  *
101  *	Out of line support code for skb_push(). Not user callable.
102  */
103 
104 
skb_under_panic(struct sk_buff * skb,int sz,void * here)105 void skb_under_panic(struct sk_buff *skb, int sz, void *here)
106 {
107         printk("skput:under: %p:%d put:%d dev:%s",
108                 here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
109 	BUG();
110 }
111 
skb_head_from_pool(void)112 static __inline__ struct sk_buff *skb_head_from_pool(void)
113 {
114 	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
115 
116 	if (skb_queue_len(list)) {
117 		struct sk_buff *skb;
118 		unsigned long flags;
119 
120 		local_irq_save(flags);
121 		skb = __skb_dequeue(list);
122 		local_irq_restore(flags);
123 		return skb;
124 	}
125 	return NULL;
126 }
127 
skb_head_to_pool(struct sk_buff * skb)128 static __inline__ void skb_head_to_pool(struct sk_buff *skb)
129 {
130 	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
131 
132 	if (skb_queue_len(list) < sysctl_hot_list_len) {
133 		unsigned long flags;
134 
135 		local_irq_save(flags);
136 		__skb_queue_head(list, skb);
137 		local_irq_restore(flags);
138 
139 		return;
140 	}
141 	kmem_cache_free(skbuff_head_cache, skb);
142 }
143 
144 
145 /* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
146  *	'private' fields and also do memory statistics to find all the
147  *	[BEEP] leaks.
148  *
149  */
150 
151 /**
152  *	alloc_skb	-	allocate a network buffer
153  *	@size: size to allocate
154  *	@gfp_mask: allocation mask
155  *
156  *	Allocate a new &sk_buff. The returned buffer has no headroom and a
157  *	tail room of size bytes. The object has a reference count of one.
158  *	The return is the buffer. On a failure the return is %NULL.
159  *
160  *	Buffers may only be allocated from interrupts using a @gfp_mask of
161  *	%GFP_ATOMIC.
162  */
163 
alloc_skb(unsigned int size,int gfp_mask)164 struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
165 {
166 	struct sk_buff *skb;
167 	u8 *data;
168 
169 	if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
170 		static int count = 0;
171 		if (++count < 5) {
172 			printk(KERN_ERR "alloc_skb called nonatomically "
173 			       "from interrupt %p\n", NET_CALLER(size));
174  			BUG();
175 		}
176 		gfp_mask &= ~__GFP_WAIT;
177 	}
178 
179 	/* Get the HEAD */
180 	skb = skb_head_from_pool();
181 	if (skb == NULL) {
182 		skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
183 		if (skb == NULL)
184 			goto nohead;
185 	}
186 
187 	/* Get the DATA. Size must match skb_add_mtu(). */
188 	size = SKB_DATA_ALIGN(size);
189 	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
190 	if (data == NULL)
191 		goto nodata;
192 
193 	/* XXX: does not include slab overhead */
194 	skb->truesize = size + sizeof(struct sk_buff);
195 
196 	/* Load the data pointers. */
197 	skb->head = data;
198 	skb->data = data;
199 	skb->tail = data;
200 	skb->end = data + size;
201 
202 	/* Set up other state */
203 	skb->len = 0;
204 	skb->cloned = 0;
205 	skb->data_len = 0;
206 
207 	atomic_set(&skb->users, 1);
208 	atomic_set(&(skb_shinfo(skb)->dataref), 1);
209 	skb_shinfo(skb)->nr_frags = 0;
210 	skb_shinfo(skb)->frag_list = NULL;
211 	return skb;
212 
213 nodata:
214 	skb_head_to_pool(skb);
215 nohead:
216 	return NULL;
217 }
218 
219 
220 /*
221  *	Slab constructor for a skb head.
222  */
skb_headerinit(void * p,kmem_cache_t * cache,unsigned long flags)223 static inline void skb_headerinit(void *p, kmem_cache_t *cache,
224 				  unsigned long flags)
225 {
226 	struct sk_buff *skb = p;
227 
228 	skb->next = NULL;
229 	skb->prev = NULL;
230 	skb->list = NULL;
231 	skb->sk = NULL;
232 	skb->stamp.tv_sec=0;	/* No idea about time */
233 	skb->dev = NULL;
234 	skb->real_dev = NULL;
235 	skb->dst = NULL;
236 	memset(skb->cb, 0, sizeof(skb->cb));
237 	skb->pkt_type = PACKET_HOST;	/* Default type */
238 	skb->ip_summed = 0;
239 	skb->priority = 0;
240 	skb->security = 0;	/* By default packets are insecure */
241 	skb->destructor = NULL;
242 
243 #ifdef CONFIG_NETFILTER
244 	skb->nfmark = skb->nfcache = 0;
245 	skb->nfct = NULL;
246 #ifdef CONFIG_NETFILTER_DEBUG
247 	skb->nf_debug = 0;
248 #endif
249 #endif
250 #ifdef CONFIG_NET_SCHED
251 	skb->tc_index = 0;
252 #endif
253 }
254 
skb_drop_fraglist(struct sk_buff * skb)255 static void skb_drop_fraglist(struct sk_buff *skb)
256 {
257 	struct sk_buff *list = skb_shinfo(skb)->frag_list;
258 
259 	skb_shinfo(skb)->frag_list = NULL;
260 
261 	do {
262 		struct sk_buff *this = list;
263 		list = list->next;
264 		kfree_skb(this);
265 	} while (list);
266 }
267 
skb_clone_fraglist(struct sk_buff * skb)268 static void skb_clone_fraglist(struct sk_buff *skb)
269 {
270 	struct sk_buff *list;
271 
272 	for (list = skb_shinfo(skb)->frag_list; list; list=list->next)
273 		skb_get(list);
274 }
275 
skb_release_data(struct sk_buff * skb)276 static void skb_release_data(struct sk_buff *skb)
277 {
278 	if (!skb->cloned ||
279 	    atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
280 		if (skb_shinfo(skb)->nr_frags) {
281 			int i;
282 			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
283 				put_page(skb_shinfo(skb)->frags[i].page);
284 		}
285 
286 		if (skb_shinfo(skb)->frag_list)
287 			skb_drop_fraglist(skb);
288 
289 		kfree(skb->head);
290 	}
291 }
292 
293 /*
294  *	Free an skbuff by memory without cleaning the state.
295  */
kfree_skbmem(struct sk_buff * skb)296 void kfree_skbmem(struct sk_buff *skb)
297 {
298 	skb_release_data(skb);
299 	skb_head_to_pool(skb);
300 }
301 
302 /**
303  *	__kfree_skb - private function
304  *	@skb: buffer
305  *
306  *	Free an sk_buff. Release anything attached to the buffer.
307  *	Clean the state. This is an internal helper function. Users should
308  *	always call kfree_skb
309  */
310 
__kfree_skb(struct sk_buff * skb)311 void __kfree_skb(struct sk_buff *skb)
312 {
313 	if (skb->list) {
314 	 	printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
315 		       "on a list (from %p).\n", NET_CALLER(skb));
316 		BUG();
317 	}
318 
319 	dst_release(skb->dst);
320 	if(skb->destructor) {
321 		if (in_irq()) {
322 			printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n",
323 				NET_CALLER(skb));
324 		}
325 		skb->destructor(skb);
326 	}
327 #ifdef CONFIG_NETFILTER
328 	nf_conntrack_put(skb->nfct);
329 #endif
330 	skb_headerinit(skb, NULL, 0);  /* clean state */
331 	kfree_skbmem(skb);
332 }
333 
334 /**
335  *	skb_clone	-	duplicate an sk_buff
336  *	@skb: buffer to clone
337  *	@gfp_mask: allocation priority
338  *
339  *	Duplicate an &sk_buff. The new one is not owned by a socket. Both
340  *	copies share the same packet data but not structure. The new
341  *	buffer has a reference count of 1. If the allocation fails the
342  *	function returns %NULL otherwise the new buffer is returned.
343  *
344  *	If this function is called from an interrupt gfp_mask() must be
345  *	%GFP_ATOMIC.
346  */
347 
skb_clone(struct sk_buff * skb,int gfp_mask)348 struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
349 {
350 	struct sk_buff *n;
351 
352 	n = skb_head_from_pool();
353 	if (!n) {
354 		n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
355 		if (!n)
356 			return NULL;
357 	}
358 
359 #define C(x) n->x = skb->x
360 
361 	n->next = n->prev = NULL;
362 	n->list = NULL;
363 	n->sk = NULL;
364 	C(stamp);
365 	C(dev);
366 	C(real_dev);
367 	C(h);
368 	C(nh);
369 	C(mac);
370 	C(dst);
371 	dst_clone(n->dst);
372 	memcpy(n->cb, skb->cb, sizeof(skb->cb));
373 	C(len);
374 	C(data_len);
375 	C(csum);
376 	n->cloned = 1;
377 	C(pkt_type);
378 	C(ip_summed);
379 	C(priority);
380 	atomic_set(&n->users, 1);
381 	C(protocol);
382 	C(security);
383 	C(truesize);
384 	C(head);
385 	C(data);
386 	C(tail);
387 	C(end);
388 	n->destructor = NULL;
389 #ifdef CONFIG_NETFILTER
390 	C(nfmark);
391 	C(nfcache);
392 	C(nfct);
393 #ifdef CONFIG_NETFILTER_DEBUG
394 	C(nf_debug);
395 #endif
396 #endif /*CONFIG_NETFILTER*/
397 #if defined(CONFIG_HIPPI)
398 	C(private);
399 #endif
400 #ifdef CONFIG_NET_SCHED
401 	C(tc_index);
402 #endif
403 
404 	atomic_inc(&(skb_shinfo(skb)->dataref));
405 	skb->cloned = 1;
406 #ifdef CONFIG_NETFILTER
407 	nf_conntrack_get(skb->nfct);
408 #endif
409 	return n;
410 }
411 
copy_skb_header(struct sk_buff * new,const struct sk_buff * old)412 static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
413 {
414 	/*
415 	 *	Shift between the two data areas in bytes
416 	 */
417 	unsigned long offset = new->data - old->data;
418 
419 	new->list=NULL;
420 	new->sk=NULL;
421 	new->dev=old->dev;
422 	new->real_dev=old->real_dev;
423 	new->priority=old->priority;
424 	new->protocol=old->protocol;
425 	new->dst=dst_clone(old->dst);
426 	new->h.raw=old->h.raw+offset;
427 	new->nh.raw=old->nh.raw+offset;
428 	new->mac.raw=old->mac.raw+offset;
429 	memcpy(new->cb, old->cb, sizeof(old->cb));
430 	atomic_set(&new->users, 1);
431 	new->pkt_type=old->pkt_type;
432 	new->stamp=old->stamp;
433 	new->destructor = NULL;
434 	new->security=old->security;
435 #ifdef CONFIG_NETFILTER
436 	new->nfmark=old->nfmark;
437 	new->nfcache=old->nfcache;
438 	new->nfct=old->nfct;
439 	nf_conntrack_get(new->nfct);
440 #ifdef CONFIG_NETFILTER_DEBUG
441 	new->nf_debug=old->nf_debug;
442 #endif
443 #endif
444 #ifdef CONFIG_NET_SCHED
445 	new->tc_index = old->tc_index;
446 #endif
447 }
448 
449 /**
450  *	skb_copy	-	create private copy of an sk_buff
451  *	@skb: buffer to copy
452  *	@gfp_mask: allocation priority
453  *
454  *	Make a copy of both an &sk_buff and its data. This is used when the
455  *	caller wishes to modify the data and needs a private copy of the
456  *	data to alter. Returns %NULL on failure or the pointer to the buffer
457  *	on success. The returned buffer has a reference count of 1.
458  *
459  *	As by-product this function converts non-linear &sk_buff to linear
460  *	one, so that &sk_buff becomes completely private and caller is allowed
461  *	to modify all the data of returned buffer. This means that this
462  *	function is not recommended for use in circumstances when only
463  *	header is going to be modified. Use pskb_copy() instead.
464  */
465 
skb_copy(const struct sk_buff * skb,int gfp_mask)466 struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
467 {
468 	struct sk_buff *n;
469 	int headerlen = skb->data-skb->head;
470 
471 	/*
472 	 *	Allocate the copy buffer
473 	 */
474 	n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
475 	if(n==NULL)
476 		return NULL;
477 
478 	/* Set the data pointer */
479 	skb_reserve(n,headerlen);
480 	/* Set the tail pointer and length */
481 	skb_put(n,skb->len);
482 	n->csum = skb->csum;
483 	n->ip_summed = skb->ip_summed;
484 
485 	if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len))
486 		BUG();
487 
488 	copy_skb_header(n, skb);
489 
490 	return n;
491 }
492 
493 /* Keep head the same: replace data */
skb_linearize(struct sk_buff * skb,int gfp_mask)494 int skb_linearize(struct sk_buff *skb, int gfp_mask)
495 {
496 	unsigned int size;
497 	u8 *data;
498 	long offset;
499 	int headerlen = skb->data - skb->head;
500 	int expand = (skb->tail+skb->data_len) - skb->end;
501 
502 	if (skb_shared(skb))
503 		BUG();
504 
505 	if (expand <= 0)
506 		expand = 0;
507 
508 	size = (skb->end - skb->head + expand);
509 	size = SKB_DATA_ALIGN(size);
510 	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
511 	if (data == NULL)
512 		return -ENOMEM;
513 
514 	/* Copy entire thing */
515 	if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len))
516 		BUG();
517 
518 	/* Offset between the two in bytes */
519 	offset = data - skb->head;
520 
521 	/* Free old data. */
522 	skb_release_data(skb);
523 
524 	skb->head = data;
525 	skb->end  = data + size;
526 
527 	/* Set up new pointers */
528 	skb->h.raw += offset;
529 	skb->nh.raw += offset;
530 	skb->mac.raw += offset;
531 	skb->tail += offset;
532 	skb->data += offset;
533 
534 	/* Set up shinfo */
535 	atomic_set(&(skb_shinfo(skb)->dataref), 1);
536 	skb_shinfo(skb)->nr_frags = 0;
537 	skb_shinfo(skb)->frag_list = NULL;
538 
539 	/* We are no longer a clone, even if we were. */
540 	skb->cloned = 0;
541 
542 	skb->tail += skb->data_len;
543 	skb->data_len = 0;
544 	return 0;
545 }
546 
547 
548 /**
549  *	pskb_copy	-	create copy of an sk_buff with private head.
550  *	@skb: buffer to copy
551  *	@gfp_mask: allocation priority
552  *
553  *	Make a copy of both an &sk_buff and part of its data, located
554  *	in header. Fragmented data remain shared. This is used when
555  *	the caller wishes to modify only header of &sk_buff and needs
556  *	private copy of the header to alter. Returns %NULL on failure
557  *	or the pointer to the buffer on success.
558  *	The returned buffer has a reference count of 1.
559  */
560 
pskb_copy(struct sk_buff * skb,int gfp_mask)561 struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
562 {
563 	struct sk_buff *n;
564 
565 	/*
566 	 *	Allocate the copy buffer
567 	 */
568 	n=alloc_skb(skb->end - skb->head, gfp_mask);
569 	if(n==NULL)
570 		return NULL;
571 
572 	/* Set the data pointer */
573 	skb_reserve(n,skb->data-skb->head);
574 	/* Set the tail pointer and length */
575 	skb_put(n,skb_headlen(skb));
576 	/* Copy the bytes */
577 	memcpy(n->data, skb->data, n->len);
578 	n->csum = skb->csum;
579 	n->ip_summed = skb->ip_summed;
580 
581 	n->data_len = skb->data_len;
582 	n->len = skb->len;
583 
584 	if (skb_shinfo(skb)->nr_frags) {
585 		int i;
586 
587 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
588 			skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
589 			get_page(skb_shinfo(n)->frags[i].page);
590 		}
591 		skb_shinfo(n)->nr_frags = i;
592 	}
593 
594 	if (skb_shinfo(skb)->frag_list) {
595 		skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
596 		skb_clone_fraglist(n);
597 	}
598 
599 	copy_skb_header(n, skb);
600 
601 	return n;
602 }
603 
604 /**
605  *	pskb_expand_head - reallocate header of &sk_buff
606  *	@skb: buffer to reallocate
607  *	@nhead: room to add at head
608  *	@ntail: room to add at tail
609  *	@gfp_mask: allocation priority
610  *
611  *	Expands (or creates identical copy, if &nhead and &ntail are zero)
612  *	header of skb. &sk_buff itself is not changed. &sk_buff MUST have
613  *	reference count of 1. Returns zero in the case of success or error,
614  *	if expansion failed. In the last case, &sk_buff is not changed.
615  *
616  *	All the pointers pointing into skb header may change and must be
617  *	reloaded after call to this function.
618  */
619 
pskb_expand_head(struct sk_buff * skb,int nhead,int ntail,int gfp_mask)620 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
621 {
622 	int i;
623 	u8 *data;
624 	int size = nhead + (skb->end - skb->head) + ntail;
625 	long off;
626 
627 	if (skb_shared(skb))
628 		BUG();
629 
630 	size = SKB_DATA_ALIGN(size);
631 
632 	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
633 	if (data == NULL)
634 		goto nodata;
635 
636 	/* Copy only real data... and, alas, header. This should be
637 	 * optimized for the cases when header is void. */
638 	memcpy(data+nhead, skb->head, skb->tail-skb->head);
639 	memcpy(data+size, skb->end, sizeof(struct skb_shared_info));
640 
641 	for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
642 		get_page(skb_shinfo(skb)->frags[i].page);
643 
644 	if (skb_shinfo(skb)->frag_list)
645 		skb_clone_fraglist(skb);
646 
647 	skb_release_data(skb);
648 
649 	off = (data+nhead) - skb->head;
650 
651 	skb->head = data;
652 	skb->end  = data+size;
653 
654 	skb->data += off;
655 	skb->tail += off;
656 	skb->mac.raw += off;
657 	skb->h.raw += off;
658 	skb->nh.raw += off;
659 	skb->cloned = 0;
660 	atomic_set(&skb_shinfo(skb)->dataref, 1);
661 	return 0;
662 
663 nodata:
664 	return -ENOMEM;
665 }
666 
667 /* Make private copy of skb with writable head and some headroom */
668 
669 struct sk_buff *
skb_realloc_headroom(struct sk_buff * skb,unsigned int headroom)670 skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
671 {
672 	struct sk_buff *skb2;
673 	int delta = headroom - skb_headroom(skb);
674 
675 	if (delta <= 0)
676 		return pskb_copy(skb, GFP_ATOMIC);
677 
678 	skb2 = skb_clone(skb, GFP_ATOMIC);
679 	if (skb2 == NULL ||
680 	    !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
681 		return skb2;
682 
683 	kfree_skb(skb2);
684 	return NULL;
685 }
686 
687 
688 /**
689  *	skb_copy_expand	-	copy and expand sk_buff
690  *	@skb: buffer to copy
691  *	@newheadroom: new free bytes at head
692  *	@newtailroom: new free bytes at tail
693  *	@gfp_mask: allocation priority
694  *
695  *	Make a copy of both an &sk_buff and its data and while doing so
696  *	allocate additional space.
697  *
698  *	This is used when the caller wishes to modify the data and needs a
699  *	private copy of the data to alter as well as more space for new fields.
700  *	Returns %NULL on failure or the pointer to the buffer
701  *	on success. The returned buffer has a reference count of 1.
702  *
703  *	You must pass %GFP_ATOMIC as the allocation priority if this function
704  *	is called from an interrupt.
705  */
706 
707 
skb_copy_expand(const struct sk_buff * skb,int newheadroom,int newtailroom,int gfp_mask)708 struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
709 				int newheadroom,
710 				int newtailroom,
711 				int gfp_mask)
712 {
713 	struct sk_buff *n;
714 
715 	/*
716 	 *	Allocate the copy buffer
717 	 */
718 
719 	n=alloc_skb(newheadroom + skb->len + newtailroom,
720 		    gfp_mask);
721 	if(n==NULL)
722 		return NULL;
723 
724 	skb_reserve(n,newheadroom);
725 
726 	/* Set the tail pointer and length */
727 	skb_put(n,skb->len);
728 
729 	/* Copy the data only. */
730 	if (skb_copy_bits(skb, 0, n->data, skb->len))
731 		BUG();
732 
733 	copy_skb_header(n, skb);
734 	return n;
735 }
736 
737 /**
738  *	skb_pad			-	zero pad the tail of an skb
739  *	@skb: buffer to pad
740  *	@pad: space to pad
741  *
742  *	Ensure that a buffer is followed by a padding area that is zero
743  *	filled. Used by network drivers which may DMA or transfer data
744  *	beyond the buffer end onto the wire.
745  *
746  *	May return NULL in out of memory cases.
747  */
748 
skb_pad(struct sk_buff * skb,int pad)749 struct sk_buff *skb_pad(struct sk_buff *skb, int pad)
750 {
751 	struct sk_buff *nskb;
752 
753 	/* If the skbuff is non linear tailroom is always zero.. */
754 	if(skb_tailroom(skb) >= pad)
755 	{
756 		memset(skb->data+skb->len, 0, pad);
757 		return skb;
758 	}
759 
760 	nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC);
761 	kfree_skb(skb);
762 	if(nskb)
763 		memset(nskb->data+nskb->len, 0, pad);
764 	return nskb;
765 }
766 
767 /* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
768  * If realloc==0 and trimming is impossible without change of data,
769  * it is BUG().
770  */
771 
___pskb_trim(struct sk_buff * skb,unsigned int len,int realloc)772 int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
773 {
774 	int offset = skb_headlen(skb);
775 	int nfrags = skb_shinfo(skb)->nr_frags;
776 	int i;
777 
778 	for (i=0; i<nfrags; i++) {
779 		int end = offset + skb_shinfo(skb)->frags[i].size;
780 		if (end > len) {
781 			if (skb_cloned(skb)) {
782 				if (!realloc)
783 					BUG();
784 				if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
785 					return -ENOMEM;
786 			}
787 			if (len <= offset) {
788 				put_page(skb_shinfo(skb)->frags[i].page);
789 				skb_shinfo(skb)->nr_frags--;
790 			} else {
791 				skb_shinfo(skb)->frags[i].size = len-offset;
792 			}
793 		}
794 		offset = end;
795 	}
796 
797 	if (offset < len) {
798 		skb->data_len -= skb->len - len;
799 		skb->len = len;
800 	} else {
801 		if (len <= skb_headlen(skb)) {
802 			skb->len = len;
803 			skb->data_len = 0;
804 			skb->tail = skb->data + len;
805 			if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
806 				skb_drop_fraglist(skb);
807 		} else {
808 			skb->data_len -= skb->len - len;
809 			skb->len = len;
810 		}
811 	}
812 
813 	return 0;
814 }
815 
816 /**
817  *	__pskb_pull_tail - advance tail of skb header
818  *	@skb: buffer to reallocate
819  *	@delta: number of bytes to advance tail
820  *
821  *	The function makes a sense only on a fragmented &sk_buff,
822  *	it expands header moving its tail forward and copying necessary
823  *	data from fragmented part.
824  *
825  *	&sk_buff MUST have reference count of 1.
826  *
827  *	Returns %NULL (and &sk_buff does not change) if pull failed
828  *	or value of new tail of skb in the case of success.
829  *
830  *	All the pointers pointing into skb header may change and must be
831  *	reloaded after call to this function.
832  */
833 
834 /* Moves tail of skb head forward, copying data from fragmented part,
835  * when it is necessary.
836  * 1. It may fail due to malloc failure.
837  * 2. It may change skb pointers.
838  *
839  * It is pretty complicated. Luckily, it is called only in exceptional cases.
840  */
__pskb_pull_tail(struct sk_buff * skb,int delta)841 unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta)
842 {
843 	int i, k, eat;
844 
845 	/* If skb has not enough free space at tail, get new one
846 	 * plus 128 bytes for future expansions. If we have enough
847 	 * room at tail, reallocate without expansion only if skb is cloned.
848 	 */
849 	eat = (skb->tail+delta) - skb->end;
850 
851 	if (eat > 0 || skb_cloned(skb)) {
852 		if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC))
853 			return NULL;
854 	}
855 
856 	if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
857 		BUG();
858 
859 	/* Optimization: no fragments, no reasons to preestimate
860 	 * size of pulled pages. Superb.
861 	 */
862 	if (skb_shinfo(skb)->frag_list == NULL)
863 		goto pull_pages;
864 
865 	/* Estimate size of pulled pages. */
866 	eat = delta;
867 	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
868 		if (skb_shinfo(skb)->frags[i].size >= eat)
869 			goto pull_pages;
870 		eat -= skb_shinfo(skb)->frags[i].size;
871 	}
872 
873 	/* If we need update frag list, we are in troubles.
874 	 * Certainly, it possible to add an offset to skb data,
875 	 * but taking into account that pulling is expected to
876 	 * be very rare operation, it is worth to fight against
877 	 * further bloating skb head and crucify ourselves here instead.
878 	 * Pure masohism, indeed. 8)8)
879 	 */
880 	if (eat) {
881 		struct sk_buff *list = skb_shinfo(skb)->frag_list;
882 		struct sk_buff *clone = NULL;
883 		struct sk_buff *insp = NULL;
884 
885 		do {
886 			if (list == NULL)
887 				BUG();
888 
889 			if (list->len <= eat) {
890 				/* Eaten as whole. */
891 				eat -= list->len;
892 				list = list->next;
893 				insp = list;
894 			} else {
895 				/* Eaten partially. */
896 
897 				if (skb_shared(list)) {
898 					/* Sucks! We need to fork list. :-( */
899 					clone = skb_clone(list, GFP_ATOMIC);
900 					if (clone == NULL)
901 						return NULL;
902 					insp = list->next;
903 					list = clone;
904 				} else {
905 					/* This may be pulled without
906 					 * problems. */
907 					insp = list;
908 				}
909 				if (pskb_pull(list, eat) == NULL) {
910 					if (clone)
911 						kfree_skb(clone);
912 					return NULL;
913 				}
914 				break;
915 			}
916 		} while (eat);
917 
918 		/* Free pulled out fragments. */
919 		while ((list = skb_shinfo(skb)->frag_list) != insp) {
920 			skb_shinfo(skb)->frag_list = list->next;
921 			kfree_skb(list);
922 		}
923 		/* And insert new clone at head. */
924 		if (clone) {
925 			clone->next = list;
926 			skb_shinfo(skb)->frag_list = clone;
927 		}
928 	}
929 	/* Success! Now we may commit changes to skb data. */
930 
931 pull_pages:
932 	eat = delta;
933 	k = 0;
934 	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
935 		if (skb_shinfo(skb)->frags[i].size <= eat) {
936 			put_page(skb_shinfo(skb)->frags[i].page);
937 			eat -= skb_shinfo(skb)->frags[i].size;
938 		} else {
939 			skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
940 			if (eat) {
941 				skb_shinfo(skb)->frags[k].page_offset += eat;
942 				skb_shinfo(skb)->frags[k].size -= eat;
943 				eat = 0;
944 			}
945 			k++;
946 		}
947 	}
948 	skb_shinfo(skb)->nr_frags = k;
949 
950 	skb->tail += delta;
951 	skb->data_len -= delta;
952 
953 	return skb->tail;
954 }
955 
956 /* Copy some data bits from skb to kernel buffer. */
957 
skb_copy_bits(const struct sk_buff * skb,int offset,void * to,int len)958 int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
959 {
960 	int i, copy;
961 	int start = skb->len - skb->data_len;
962 
963 	if (offset > (int)skb->len-len)
964 		goto fault;
965 
966 	/* Copy header. */
967 	if ((copy = start-offset) > 0) {
968 		if (copy > len)
969 			copy = len;
970 		memcpy(to, skb->data + offset, copy);
971 		if ((len -= copy) == 0)
972 			return 0;
973 		offset += copy;
974 		to += copy;
975 	}
976 
977 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
978 		int end;
979 
980 		BUG_TRAP(start <= offset+len);
981 
982 		end = start + skb_shinfo(skb)->frags[i].size;
983 		if ((copy = end-offset) > 0) {
984 			u8 *vaddr;
985 
986 			if (copy > len)
987 				copy = len;
988 
989 			vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
990 			memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+
991 			       offset-start, copy);
992 			kunmap_skb_frag(vaddr);
993 
994 			if ((len -= copy) == 0)
995 				return 0;
996 			offset += copy;
997 			to += copy;
998 		}
999 		start = end;
1000 	}
1001 
1002 	if (skb_shinfo(skb)->frag_list) {
1003 		struct sk_buff *list;
1004 
1005 		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
1006 			int end;
1007 
1008 			BUG_TRAP(start <= offset+len);
1009 
1010 			end = start + list->len;
1011 			if ((copy = end-offset) > 0) {
1012 				if (copy > len)
1013 					copy = len;
1014 				if (skb_copy_bits(list, offset-start, to, copy))
1015 					goto fault;
1016 				if ((len -= copy) == 0)
1017 					return 0;
1018 				offset += copy;
1019 				to += copy;
1020 			}
1021 			start = end;
1022 		}
1023 	}
1024 	if (len == 0)
1025 		return 0;
1026 
1027 fault:
1028 	return -EFAULT;
1029 }
1030 
1031 /* Checksum skb data. */
1032 
skb_checksum(const struct sk_buff * skb,int offset,int len,unsigned int csum)1033 unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum)
1034 {
1035 	int i, copy;
1036 	int start = skb->len - skb->data_len;
1037 	int pos = 0;
1038 
1039 	/* Checksum header. */
1040 	if ((copy = start-offset) > 0) {
1041 		if (copy > len)
1042 			copy = len;
1043 		csum = csum_partial(skb->data+offset, copy, csum);
1044 		if ((len -= copy) == 0)
1045 			return csum;
1046 		offset += copy;
1047 		pos = copy;
1048 	}
1049 
1050 	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
1051 		int end;
1052 
1053 		BUG_TRAP(start <= offset+len);
1054 
1055 		end = start + skb_shinfo(skb)->frags[i].size;
1056 		if ((copy = end-offset) > 0) {
1057 			unsigned int csum2;
1058 			u8 *vaddr;
1059 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1060 
1061 			if (copy > len)
1062 				copy = len;
1063 			vaddr = kmap_skb_frag(frag);
1064 			csum2 = csum_partial(vaddr + frag->page_offset +
1065 					     offset-start, copy, 0);
1066 			kunmap_skb_frag(vaddr);
1067 			csum = csum_block_add(csum, csum2, pos);
1068 			if (!(len -= copy))
1069 				return csum;
1070 			offset += copy;
1071 			pos += copy;
1072 		}
1073 		start = end;
1074 	}
1075 
1076 	if (skb_shinfo(skb)->frag_list) {
1077 		struct sk_buff *list;
1078 
1079 		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
1080 			int end;
1081 
1082 			BUG_TRAP(start <= offset+len);
1083 
1084 			end = start + list->len;
1085 			if ((copy = end-offset) > 0) {
1086 				unsigned int csum2;
1087 				if (copy > len)
1088 					copy = len;
1089 				csum2 = skb_checksum(list, offset-start, copy, 0);
1090 				csum = csum_block_add(csum, csum2, pos);
1091 				if ((len -= copy) == 0)
1092 					return csum;
1093 				offset += copy;
1094 				pos += copy;
1095 			}
1096 			start = end;
1097 		}
1098 	}
1099 	if (len == 0)
1100 		return csum;
1101 
1102 	BUG();
1103 	return csum;
1104 }
1105 
1106 /* Both of above in one bottle. */
1107 
skb_copy_and_csum_bits(const struct sk_buff * skb,int offset,u8 * to,int len,unsigned int csum)1108 unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum)
1109 {
1110 	int i, copy;
1111 	int start = skb->len - skb->data_len;
1112 	int pos = 0;
1113 
1114 	/* Copy header. */
1115 	if ((copy = start-offset) > 0) {
1116 		if (copy > len)
1117 			copy = len;
1118 		csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum);
1119 		if ((len -= copy) == 0)
1120 			return csum;
1121 		offset += copy;
1122 		to += copy;
1123 		pos = copy;
1124 	}
1125 
1126 	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
1127 		int end;
1128 
1129 		BUG_TRAP(start <= offset+len);
1130 
1131 		end = start + skb_shinfo(skb)->frags[i].size;
1132 		if ((copy = end-offset) > 0) {
1133 			unsigned int csum2;
1134 			u8 *vaddr;
1135 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1136 
1137 			if (copy > len)
1138 				copy = len;
1139 			vaddr = kmap_skb_frag(frag);
1140 			csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset +
1141 						      offset-start, to, copy, 0);
1142 			kunmap_skb_frag(vaddr);
1143 			csum = csum_block_add(csum, csum2, pos);
1144 			if (!(len -= copy))
1145 				return csum;
1146 			offset += copy;
1147 			to += copy;
1148 			pos += copy;
1149 		}
1150 		start = end;
1151 	}
1152 
1153 	if (skb_shinfo(skb)->frag_list) {
1154 		struct sk_buff *list;
1155 
1156 		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
1157 			unsigned int csum2;
1158 			int end;
1159 
1160 			BUG_TRAP(start <= offset+len);
1161 
1162 			end = start + list->len;
1163 			if ((copy = end-offset) > 0) {
1164 				if (copy > len)
1165 					copy = len;
1166 				csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0);
1167 				csum = csum_block_add(csum, csum2, pos);
1168 				if ((len -= copy) == 0)
1169 					return csum;
1170 				offset += copy;
1171 				to += copy;
1172 				pos += copy;
1173 			}
1174 			start = end;
1175 		}
1176 	}
1177 	if (len == 0)
1178 		return csum;
1179 
1180 	BUG();
1181 	return csum;
1182 }
1183 
skb_copy_and_csum_dev(const struct sk_buff * skb,u8 * to)1184 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
1185 {
1186 	unsigned int csum;
1187 	long csstart;
1188 
1189 	if (skb->ip_summed == CHECKSUM_HW)
1190 		csstart = skb->h.raw - skb->data;
1191 	else
1192 		csstart = skb->len - skb->data_len;
1193 
1194 	if (csstart > skb->len - skb->data_len)
1195 		BUG();
1196 
1197 	memcpy(to, skb->data, csstart);
1198 
1199 	csum = 0;
1200 	if (csstart != skb->len)
1201 		csum = skb_copy_and_csum_bits(skb, csstart, to+csstart,
1202 				skb->len-csstart, 0);
1203 
1204 	if (skb->ip_summed == CHECKSUM_HW) {
1205 		long csstuff = csstart + skb->csum;
1206 
1207 		*((unsigned short *)(to + csstuff)) = csum_fold(csum);
1208 	}
1209 }
1210 
1211 #if 0
1212 /*
1213  * 	Tune the memory allocator for a new MTU size.
1214  */
1215 void skb_add_mtu(int mtu)
1216 {
1217 	/* Must match allocation in alloc_skb */
1218 	mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
1219 
1220 	kmem_add_cache_size(mtu);
1221 }
1222 #endif
1223 
skb_init(void)1224 void __init skb_init(void)
1225 {
1226 	int i;
1227 
1228 	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
1229 					      sizeof(struct sk_buff),
1230 					      0,
1231 					      SLAB_HWCACHE_ALIGN,
1232 					      skb_headerinit, NULL);
1233 	if (!skbuff_head_cache)
1234 		panic("cannot create skbuff cache");
1235 
1236 	for (i=0; i<NR_CPUS; i++)
1237 		skb_queue_head_init(&skb_head_pool[i].list);
1238 }
1239