1 /*
2 * Routines having to do with the 'struct sk_buff' memory handlers.
3 *
4 * Authors: Alan Cox <iiitac@pyr.swan.ac.uk>
5 * Florian La Roche <rzsfl@rz.uni-sb.de>
6 *
7 * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $
8 *
9 * Fixes:
10 * Alan Cox : Fixed the worst of the load balancer bugs.
11 * Dave Platt : Interrupt stacking fix.
12 * Richard Kooijman : Timestamp fixes.
13 * Alan Cox : Changed buffer format.
14 * Alan Cox : destructor hook for AF_UNIX etc.
15 * Linus Torvalds : Better skb_clone.
16 * Alan Cox : Added skb_copy.
17 * Alan Cox : Added all the changed routines Linus
18 * only put in the headers
19 * Ray VanTassle : Fixed --skb->lock in free
20 * Alan Cox : skb_copy copy arp field
21 * Andi Kleen : slabified it.
22 *
23 * NOTE:
24 * The __skb_ routines should be called with interrupts
25 * disabled, or you better be *real* sure that the operation is atomic
26 * with respect to whatever list is being frobbed (e.g. via lock_sock()
27 * or via disabling bottom half handlers, etc).
28 *
29 * This program is free software; you can redistribute it and/or
30 * modify it under the terms of the GNU General Public License
31 * as published by the Free Software Foundation; either version
32 * 2 of the License, or (at your option) any later version.
33 */
34
35 /*
36 * The functions in this file will not compile correctly with gcc 2.4.x
37 */
38
39 #include <linux/config.h>
40 #include <linux/types.h>
41 #include <linux/kernel.h>
42 #include <linux/sched.h>
43 #include <linux/mm.h>
44 #include <linux/interrupt.h>
45 #include <linux/in.h>
46 #include <linux/inet.h>
47 #include <linux/slab.h>
48 #include <linux/netdevice.h>
49 #include <linux/string.h>
50 #include <linux/skbuff.h>
51 #include <linux/cache.h>
52 #include <linux/rtnetlink.h>
53 #include <linux/init.h>
54 #include <linux/highmem.h>
55
56 #include <net/protocol.h>
57 #include <net/dst.h>
58 #include <net/sock.h>
59 #include <net/checksum.h>
60
61 #include <asm/uaccess.h>
62 #include <asm/system.h>
63
64 int sysctl_hot_list_len = 128;
65
66 static kmem_cache_t *skbuff_head_cache;
67
68 static union {
69 struct sk_buff_head list;
70 char pad[SMP_CACHE_BYTES];
71 } skb_head_pool[NR_CPUS];
72
73 /*
74 * Keep out-of-line to prevent kernel bloat.
75 * __builtin_return_address is not used because it is not always
76 * reliable.
77 */
78
79 /**
80 * skb_over_panic - private function
81 * @skb: buffer
82 * @sz: size
83 * @here: address
84 *
85 * Out of line support code for skb_put(). Not user callable.
86 */
87
skb_over_panic(struct sk_buff * skb,int sz,void * here)88 void skb_over_panic(struct sk_buff *skb, int sz, void *here)
89 {
90 printk("skput:over: %p:%d put:%d dev:%s",
91 here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
92 BUG();
93 }
94
95 /**
96 * skb_under_panic - private function
97 * @skb: buffer
98 * @sz: size
99 * @here: address
100 *
101 * Out of line support code for skb_push(). Not user callable.
102 */
103
104
skb_under_panic(struct sk_buff * skb,int sz,void * here)105 void skb_under_panic(struct sk_buff *skb, int sz, void *here)
106 {
107 printk("skput:under: %p:%d put:%d dev:%s",
108 here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
109 BUG();
110 }
111
skb_head_from_pool(void)112 static __inline__ struct sk_buff *skb_head_from_pool(void)
113 {
114 struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
115
116 if (skb_queue_len(list)) {
117 struct sk_buff *skb;
118 unsigned long flags;
119
120 local_irq_save(flags);
121 skb = __skb_dequeue(list);
122 local_irq_restore(flags);
123 return skb;
124 }
125 return NULL;
126 }
127
skb_head_to_pool(struct sk_buff * skb)128 static __inline__ void skb_head_to_pool(struct sk_buff *skb)
129 {
130 struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
131
132 if (skb_queue_len(list) < sysctl_hot_list_len) {
133 unsigned long flags;
134
135 local_irq_save(flags);
136 __skb_queue_head(list, skb);
137 local_irq_restore(flags);
138
139 return;
140 }
141 kmem_cache_free(skbuff_head_cache, skb);
142 }
143
144
145 /* Allocate a new skbuff. We do this ourselves so we can fill in a few
146 * 'private' fields and also do memory statistics to find all the
147 * [BEEP] leaks.
148 *
149 */
150
151 /**
152 * alloc_skb - allocate a network buffer
153 * @size: size to allocate
154 * @gfp_mask: allocation mask
155 *
156 * Allocate a new &sk_buff. The returned buffer has no headroom and a
157 * tail room of size bytes. The object has a reference count of one.
158 * The return is the buffer. On a failure the return is %NULL.
159 *
160 * Buffers may only be allocated from interrupts using a @gfp_mask of
161 * %GFP_ATOMIC.
162 */
163
alloc_skb(unsigned int size,int gfp_mask)164 struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
165 {
166 struct sk_buff *skb;
167 u8 *data;
168
169 if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
170 static int count = 0;
171 if (++count < 5) {
172 printk(KERN_ERR "alloc_skb called nonatomically "
173 "from interrupt %p\n", NET_CALLER(size));
174 BUG();
175 }
176 gfp_mask &= ~__GFP_WAIT;
177 }
178
179 /* Get the HEAD */
180 skb = skb_head_from_pool();
181 if (skb == NULL) {
182 skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
183 if (skb == NULL)
184 goto nohead;
185 }
186
187 /* Get the DATA. Size must match skb_add_mtu(). */
188 size = SKB_DATA_ALIGN(size);
189 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
190 if (data == NULL)
191 goto nodata;
192
193 /* XXX: does not include slab overhead */
194 skb->truesize = size + sizeof(struct sk_buff);
195
196 /* Load the data pointers. */
197 skb->head = data;
198 skb->data = data;
199 skb->tail = data;
200 skb->end = data + size;
201
202 /* Set up other state */
203 skb->len = 0;
204 skb->cloned = 0;
205 skb->data_len = 0;
206
207 atomic_set(&skb->users, 1);
208 atomic_set(&(skb_shinfo(skb)->dataref), 1);
209 skb_shinfo(skb)->nr_frags = 0;
210 skb_shinfo(skb)->frag_list = NULL;
211 return skb;
212
213 nodata:
214 skb_head_to_pool(skb);
215 nohead:
216 return NULL;
217 }
218
219
220 /*
221 * Slab constructor for a skb head.
222 */
skb_headerinit(void * p,kmem_cache_t * cache,unsigned long flags)223 static inline void skb_headerinit(void *p, kmem_cache_t *cache,
224 unsigned long flags)
225 {
226 struct sk_buff *skb = p;
227
228 skb->next = NULL;
229 skb->prev = NULL;
230 skb->list = NULL;
231 skb->sk = NULL;
232 skb->stamp.tv_sec=0; /* No idea about time */
233 skb->dev = NULL;
234 skb->real_dev = NULL;
235 skb->dst = NULL;
236 memset(skb->cb, 0, sizeof(skb->cb));
237 skb->pkt_type = PACKET_HOST; /* Default type */
238 skb->ip_summed = 0;
239 skb->priority = 0;
240 skb->security = 0; /* By default packets are insecure */
241 skb->destructor = NULL;
242
243 #ifdef CONFIG_NETFILTER
244 skb->nfmark = skb->nfcache = 0;
245 skb->nfct = NULL;
246 #ifdef CONFIG_NETFILTER_DEBUG
247 skb->nf_debug = 0;
248 #endif
249 #endif
250 #ifdef CONFIG_NET_SCHED
251 skb->tc_index = 0;
252 #endif
253 }
254
skb_drop_fraglist(struct sk_buff * skb)255 static void skb_drop_fraglist(struct sk_buff *skb)
256 {
257 struct sk_buff *list = skb_shinfo(skb)->frag_list;
258
259 skb_shinfo(skb)->frag_list = NULL;
260
261 do {
262 struct sk_buff *this = list;
263 list = list->next;
264 kfree_skb(this);
265 } while (list);
266 }
267
skb_clone_fraglist(struct sk_buff * skb)268 static void skb_clone_fraglist(struct sk_buff *skb)
269 {
270 struct sk_buff *list;
271
272 for (list = skb_shinfo(skb)->frag_list; list; list=list->next)
273 skb_get(list);
274 }
275
skb_release_data(struct sk_buff * skb)276 static void skb_release_data(struct sk_buff *skb)
277 {
278 if (!skb->cloned ||
279 atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
280 if (skb_shinfo(skb)->nr_frags) {
281 int i;
282 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
283 put_page(skb_shinfo(skb)->frags[i].page);
284 }
285
286 if (skb_shinfo(skb)->frag_list)
287 skb_drop_fraglist(skb);
288
289 kfree(skb->head);
290 }
291 }
292
293 /*
294 * Free an skbuff by memory without cleaning the state.
295 */
kfree_skbmem(struct sk_buff * skb)296 void kfree_skbmem(struct sk_buff *skb)
297 {
298 skb_release_data(skb);
299 skb_head_to_pool(skb);
300 }
301
302 /**
303 * __kfree_skb - private function
304 * @skb: buffer
305 *
306 * Free an sk_buff. Release anything attached to the buffer.
307 * Clean the state. This is an internal helper function. Users should
308 * always call kfree_skb
309 */
310
__kfree_skb(struct sk_buff * skb)311 void __kfree_skb(struct sk_buff *skb)
312 {
313 if (skb->list) {
314 printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
315 "on a list (from %p).\n", NET_CALLER(skb));
316 BUG();
317 }
318
319 dst_release(skb->dst);
320 if(skb->destructor) {
321 if (in_irq()) {
322 printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n",
323 NET_CALLER(skb));
324 }
325 skb->destructor(skb);
326 }
327 #ifdef CONFIG_NETFILTER
328 nf_conntrack_put(skb->nfct);
329 #endif
330 skb_headerinit(skb, NULL, 0); /* clean state */
331 kfree_skbmem(skb);
332 }
333
334 /**
335 * skb_clone - duplicate an sk_buff
336 * @skb: buffer to clone
337 * @gfp_mask: allocation priority
338 *
339 * Duplicate an &sk_buff. The new one is not owned by a socket. Both
340 * copies share the same packet data but not structure. The new
341 * buffer has a reference count of 1. If the allocation fails the
342 * function returns %NULL otherwise the new buffer is returned.
343 *
344 * If this function is called from an interrupt gfp_mask() must be
345 * %GFP_ATOMIC.
346 */
347
skb_clone(struct sk_buff * skb,int gfp_mask)348 struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
349 {
350 struct sk_buff *n;
351
352 n = skb_head_from_pool();
353 if (!n) {
354 n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
355 if (!n)
356 return NULL;
357 }
358
359 #define C(x) n->x = skb->x
360
361 n->next = n->prev = NULL;
362 n->list = NULL;
363 n->sk = NULL;
364 C(stamp);
365 C(dev);
366 C(real_dev);
367 C(h);
368 C(nh);
369 C(mac);
370 C(dst);
371 dst_clone(n->dst);
372 memcpy(n->cb, skb->cb, sizeof(skb->cb));
373 C(len);
374 C(data_len);
375 C(csum);
376 n->cloned = 1;
377 C(pkt_type);
378 C(ip_summed);
379 C(priority);
380 atomic_set(&n->users, 1);
381 C(protocol);
382 C(security);
383 C(truesize);
384 C(head);
385 C(data);
386 C(tail);
387 C(end);
388 n->destructor = NULL;
389 #ifdef CONFIG_NETFILTER
390 C(nfmark);
391 C(nfcache);
392 C(nfct);
393 #ifdef CONFIG_NETFILTER_DEBUG
394 C(nf_debug);
395 #endif
396 #endif /*CONFIG_NETFILTER*/
397 #if defined(CONFIG_HIPPI)
398 C(private);
399 #endif
400 #ifdef CONFIG_NET_SCHED
401 C(tc_index);
402 #endif
403
404 atomic_inc(&(skb_shinfo(skb)->dataref));
405 skb->cloned = 1;
406 #ifdef CONFIG_NETFILTER
407 nf_conntrack_get(skb->nfct);
408 #endif
409 return n;
410 }
411
copy_skb_header(struct sk_buff * new,const struct sk_buff * old)412 static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
413 {
414 /*
415 * Shift between the two data areas in bytes
416 */
417 unsigned long offset = new->data - old->data;
418
419 new->list=NULL;
420 new->sk=NULL;
421 new->dev=old->dev;
422 new->real_dev=old->real_dev;
423 new->priority=old->priority;
424 new->protocol=old->protocol;
425 new->dst=dst_clone(old->dst);
426 new->h.raw=old->h.raw+offset;
427 new->nh.raw=old->nh.raw+offset;
428 new->mac.raw=old->mac.raw+offset;
429 memcpy(new->cb, old->cb, sizeof(old->cb));
430 atomic_set(&new->users, 1);
431 new->pkt_type=old->pkt_type;
432 new->stamp=old->stamp;
433 new->destructor = NULL;
434 new->security=old->security;
435 #ifdef CONFIG_NETFILTER
436 new->nfmark=old->nfmark;
437 new->nfcache=old->nfcache;
438 new->nfct=old->nfct;
439 nf_conntrack_get(new->nfct);
440 #ifdef CONFIG_NETFILTER_DEBUG
441 new->nf_debug=old->nf_debug;
442 #endif
443 #endif
444 #ifdef CONFIG_NET_SCHED
445 new->tc_index = old->tc_index;
446 #endif
447 }
448
449 /**
450 * skb_copy - create private copy of an sk_buff
451 * @skb: buffer to copy
452 * @gfp_mask: allocation priority
453 *
454 * Make a copy of both an &sk_buff and its data. This is used when the
455 * caller wishes to modify the data and needs a private copy of the
456 * data to alter. Returns %NULL on failure or the pointer to the buffer
457 * on success. The returned buffer has a reference count of 1.
458 *
459 * As by-product this function converts non-linear &sk_buff to linear
460 * one, so that &sk_buff becomes completely private and caller is allowed
461 * to modify all the data of returned buffer. This means that this
462 * function is not recommended for use in circumstances when only
463 * header is going to be modified. Use pskb_copy() instead.
464 */
465
skb_copy(const struct sk_buff * skb,int gfp_mask)466 struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
467 {
468 struct sk_buff *n;
469 int headerlen = skb->data-skb->head;
470
471 /*
472 * Allocate the copy buffer
473 */
474 n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
475 if(n==NULL)
476 return NULL;
477
478 /* Set the data pointer */
479 skb_reserve(n,headerlen);
480 /* Set the tail pointer and length */
481 skb_put(n,skb->len);
482 n->csum = skb->csum;
483 n->ip_summed = skb->ip_summed;
484
485 if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len))
486 BUG();
487
488 copy_skb_header(n, skb);
489
490 return n;
491 }
492
493 /* Keep head the same: replace data */
skb_linearize(struct sk_buff * skb,int gfp_mask)494 int skb_linearize(struct sk_buff *skb, int gfp_mask)
495 {
496 unsigned int size;
497 u8 *data;
498 long offset;
499 int headerlen = skb->data - skb->head;
500 int expand = (skb->tail+skb->data_len) - skb->end;
501
502 if (skb_shared(skb))
503 BUG();
504
505 if (expand <= 0)
506 expand = 0;
507
508 size = (skb->end - skb->head + expand);
509 size = SKB_DATA_ALIGN(size);
510 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
511 if (data == NULL)
512 return -ENOMEM;
513
514 /* Copy entire thing */
515 if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len))
516 BUG();
517
518 /* Offset between the two in bytes */
519 offset = data - skb->head;
520
521 /* Free old data. */
522 skb_release_data(skb);
523
524 skb->head = data;
525 skb->end = data + size;
526
527 /* Set up new pointers */
528 skb->h.raw += offset;
529 skb->nh.raw += offset;
530 skb->mac.raw += offset;
531 skb->tail += offset;
532 skb->data += offset;
533
534 /* Set up shinfo */
535 atomic_set(&(skb_shinfo(skb)->dataref), 1);
536 skb_shinfo(skb)->nr_frags = 0;
537 skb_shinfo(skb)->frag_list = NULL;
538
539 /* We are no longer a clone, even if we were. */
540 skb->cloned = 0;
541
542 skb->tail += skb->data_len;
543 skb->data_len = 0;
544 return 0;
545 }
546
547
548 /**
549 * pskb_copy - create copy of an sk_buff with private head.
550 * @skb: buffer to copy
551 * @gfp_mask: allocation priority
552 *
553 * Make a copy of both an &sk_buff and part of its data, located
554 * in header. Fragmented data remain shared. This is used when
555 * the caller wishes to modify only header of &sk_buff and needs
556 * private copy of the header to alter. Returns %NULL on failure
557 * or the pointer to the buffer on success.
558 * The returned buffer has a reference count of 1.
559 */
560
pskb_copy(struct sk_buff * skb,int gfp_mask)561 struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
562 {
563 struct sk_buff *n;
564
565 /*
566 * Allocate the copy buffer
567 */
568 n=alloc_skb(skb->end - skb->head, gfp_mask);
569 if(n==NULL)
570 return NULL;
571
572 /* Set the data pointer */
573 skb_reserve(n,skb->data-skb->head);
574 /* Set the tail pointer and length */
575 skb_put(n,skb_headlen(skb));
576 /* Copy the bytes */
577 memcpy(n->data, skb->data, n->len);
578 n->csum = skb->csum;
579 n->ip_summed = skb->ip_summed;
580
581 n->data_len = skb->data_len;
582 n->len = skb->len;
583
584 if (skb_shinfo(skb)->nr_frags) {
585 int i;
586
587 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
588 skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
589 get_page(skb_shinfo(n)->frags[i].page);
590 }
591 skb_shinfo(n)->nr_frags = i;
592 }
593
594 if (skb_shinfo(skb)->frag_list) {
595 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
596 skb_clone_fraglist(n);
597 }
598
599 copy_skb_header(n, skb);
600
601 return n;
602 }
603
604 /**
605 * pskb_expand_head - reallocate header of &sk_buff
606 * @skb: buffer to reallocate
607 * @nhead: room to add at head
608 * @ntail: room to add at tail
609 * @gfp_mask: allocation priority
610 *
611 * Expands (or creates identical copy, if &nhead and &ntail are zero)
612 * header of skb. &sk_buff itself is not changed. &sk_buff MUST have
613 * reference count of 1. Returns zero in the case of success or error,
614 * if expansion failed. In the last case, &sk_buff is not changed.
615 *
616 * All the pointers pointing into skb header may change and must be
617 * reloaded after call to this function.
618 */
619
pskb_expand_head(struct sk_buff * skb,int nhead,int ntail,int gfp_mask)620 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
621 {
622 int i;
623 u8 *data;
624 int size = nhead + (skb->end - skb->head) + ntail;
625 long off;
626
627 if (skb_shared(skb))
628 BUG();
629
630 size = SKB_DATA_ALIGN(size);
631
632 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
633 if (data == NULL)
634 goto nodata;
635
636 /* Copy only real data... and, alas, header. This should be
637 * optimized for the cases when header is void. */
638 memcpy(data+nhead, skb->head, skb->tail-skb->head);
639 memcpy(data+size, skb->end, sizeof(struct skb_shared_info));
640
641 for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
642 get_page(skb_shinfo(skb)->frags[i].page);
643
644 if (skb_shinfo(skb)->frag_list)
645 skb_clone_fraglist(skb);
646
647 skb_release_data(skb);
648
649 off = (data+nhead) - skb->head;
650
651 skb->head = data;
652 skb->end = data+size;
653
654 skb->data += off;
655 skb->tail += off;
656 skb->mac.raw += off;
657 skb->h.raw += off;
658 skb->nh.raw += off;
659 skb->cloned = 0;
660 atomic_set(&skb_shinfo(skb)->dataref, 1);
661 return 0;
662
663 nodata:
664 return -ENOMEM;
665 }
666
667 /* Make private copy of skb with writable head and some headroom */
668
669 struct sk_buff *
skb_realloc_headroom(struct sk_buff * skb,unsigned int headroom)670 skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
671 {
672 struct sk_buff *skb2;
673 int delta = headroom - skb_headroom(skb);
674
675 if (delta <= 0)
676 return pskb_copy(skb, GFP_ATOMIC);
677
678 skb2 = skb_clone(skb, GFP_ATOMIC);
679 if (skb2 == NULL ||
680 !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
681 return skb2;
682
683 kfree_skb(skb2);
684 return NULL;
685 }
686
687
688 /**
689 * skb_copy_expand - copy and expand sk_buff
690 * @skb: buffer to copy
691 * @newheadroom: new free bytes at head
692 * @newtailroom: new free bytes at tail
693 * @gfp_mask: allocation priority
694 *
695 * Make a copy of both an &sk_buff and its data and while doing so
696 * allocate additional space.
697 *
698 * This is used when the caller wishes to modify the data and needs a
699 * private copy of the data to alter as well as more space for new fields.
700 * Returns %NULL on failure or the pointer to the buffer
701 * on success. The returned buffer has a reference count of 1.
702 *
703 * You must pass %GFP_ATOMIC as the allocation priority if this function
704 * is called from an interrupt.
705 */
706
707
skb_copy_expand(const struct sk_buff * skb,int newheadroom,int newtailroom,int gfp_mask)708 struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
709 int newheadroom,
710 int newtailroom,
711 int gfp_mask)
712 {
713 struct sk_buff *n;
714
715 /*
716 * Allocate the copy buffer
717 */
718
719 n=alloc_skb(newheadroom + skb->len + newtailroom,
720 gfp_mask);
721 if(n==NULL)
722 return NULL;
723
724 skb_reserve(n,newheadroom);
725
726 /* Set the tail pointer and length */
727 skb_put(n,skb->len);
728
729 /* Copy the data only. */
730 if (skb_copy_bits(skb, 0, n->data, skb->len))
731 BUG();
732
733 copy_skb_header(n, skb);
734 return n;
735 }
736
737 /**
738 * skb_pad - zero pad the tail of an skb
739 * @skb: buffer to pad
740 * @pad: space to pad
741 *
742 * Ensure that a buffer is followed by a padding area that is zero
743 * filled. Used by network drivers which may DMA or transfer data
744 * beyond the buffer end onto the wire.
745 *
746 * May return NULL in out of memory cases.
747 */
748
skb_pad(struct sk_buff * skb,int pad)749 struct sk_buff *skb_pad(struct sk_buff *skb, int pad)
750 {
751 struct sk_buff *nskb;
752
753 /* If the skbuff is non linear tailroom is always zero.. */
754 if(skb_tailroom(skb) >= pad)
755 {
756 memset(skb->data+skb->len, 0, pad);
757 return skb;
758 }
759
760 nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC);
761 kfree_skb(skb);
762 if(nskb)
763 memset(nskb->data+nskb->len, 0, pad);
764 return nskb;
765 }
766
767 /* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
768 * If realloc==0 and trimming is impossible without change of data,
769 * it is BUG().
770 */
771
___pskb_trim(struct sk_buff * skb,unsigned int len,int realloc)772 int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
773 {
774 int offset = skb_headlen(skb);
775 int nfrags = skb_shinfo(skb)->nr_frags;
776 int i;
777
778 for (i=0; i<nfrags; i++) {
779 int end = offset + skb_shinfo(skb)->frags[i].size;
780 if (end > len) {
781 if (skb_cloned(skb)) {
782 if (!realloc)
783 BUG();
784 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
785 return -ENOMEM;
786 }
787 if (len <= offset) {
788 put_page(skb_shinfo(skb)->frags[i].page);
789 skb_shinfo(skb)->nr_frags--;
790 } else {
791 skb_shinfo(skb)->frags[i].size = len-offset;
792 }
793 }
794 offset = end;
795 }
796
797 if (offset < len) {
798 skb->data_len -= skb->len - len;
799 skb->len = len;
800 } else {
801 if (len <= skb_headlen(skb)) {
802 skb->len = len;
803 skb->data_len = 0;
804 skb->tail = skb->data + len;
805 if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
806 skb_drop_fraglist(skb);
807 } else {
808 skb->data_len -= skb->len - len;
809 skb->len = len;
810 }
811 }
812
813 return 0;
814 }
815
816 /**
817 * __pskb_pull_tail - advance tail of skb header
818 * @skb: buffer to reallocate
819 * @delta: number of bytes to advance tail
820 *
821 * The function makes a sense only on a fragmented &sk_buff,
822 * it expands header moving its tail forward and copying necessary
823 * data from fragmented part.
824 *
825 * &sk_buff MUST have reference count of 1.
826 *
827 * Returns %NULL (and &sk_buff does not change) if pull failed
828 * or value of new tail of skb in the case of success.
829 *
830 * All the pointers pointing into skb header may change and must be
831 * reloaded after call to this function.
832 */
833
834 /* Moves tail of skb head forward, copying data from fragmented part,
835 * when it is necessary.
836 * 1. It may fail due to malloc failure.
837 * 2. It may change skb pointers.
838 *
839 * It is pretty complicated. Luckily, it is called only in exceptional cases.
840 */
__pskb_pull_tail(struct sk_buff * skb,int delta)841 unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta)
842 {
843 int i, k, eat;
844
845 /* If skb has not enough free space at tail, get new one
846 * plus 128 bytes for future expansions. If we have enough
847 * room at tail, reallocate without expansion only if skb is cloned.
848 */
849 eat = (skb->tail+delta) - skb->end;
850
851 if (eat > 0 || skb_cloned(skb)) {
852 if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC))
853 return NULL;
854 }
855
856 if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
857 BUG();
858
859 /* Optimization: no fragments, no reasons to preestimate
860 * size of pulled pages. Superb.
861 */
862 if (skb_shinfo(skb)->frag_list == NULL)
863 goto pull_pages;
864
865 /* Estimate size of pulled pages. */
866 eat = delta;
867 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
868 if (skb_shinfo(skb)->frags[i].size >= eat)
869 goto pull_pages;
870 eat -= skb_shinfo(skb)->frags[i].size;
871 }
872
873 /* If we need update frag list, we are in troubles.
874 * Certainly, it possible to add an offset to skb data,
875 * but taking into account that pulling is expected to
876 * be very rare operation, it is worth to fight against
877 * further bloating skb head and crucify ourselves here instead.
878 * Pure masohism, indeed. 8)8)
879 */
880 if (eat) {
881 struct sk_buff *list = skb_shinfo(skb)->frag_list;
882 struct sk_buff *clone = NULL;
883 struct sk_buff *insp = NULL;
884
885 do {
886 if (list == NULL)
887 BUG();
888
889 if (list->len <= eat) {
890 /* Eaten as whole. */
891 eat -= list->len;
892 list = list->next;
893 insp = list;
894 } else {
895 /* Eaten partially. */
896
897 if (skb_shared(list)) {
898 /* Sucks! We need to fork list. :-( */
899 clone = skb_clone(list, GFP_ATOMIC);
900 if (clone == NULL)
901 return NULL;
902 insp = list->next;
903 list = clone;
904 } else {
905 /* This may be pulled without
906 * problems. */
907 insp = list;
908 }
909 if (pskb_pull(list, eat) == NULL) {
910 if (clone)
911 kfree_skb(clone);
912 return NULL;
913 }
914 break;
915 }
916 } while (eat);
917
918 /* Free pulled out fragments. */
919 while ((list = skb_shinfo(skb)->frag_list) != insp) {
920 skb_shinfo(skb)->frag_list = list->next;
921 kfree_skb(list);
922 }
923 /* And insert new clone at head. */
924 if (clone) {
925 clone->next = list;
926 skb_shinfo(skb)->frag_list = clone;
927 }
928 }
929 /* Success! Now we may commit changes to skb data. */
930
931 pull_pages:
932 eat = delta;
933 k = 0;
934 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
935 if (skb_shinfo(skb)->frags[i].size <= eat) {
936 put_page(skb_shinfo(skb)->frags[i].page);
937 eat -= skb_shinfo(skb)->frags[i].size;
938 } else {
939 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
940 if (eat) {
941 skb_shinfo(skb)->frags[k].page_offset += eat;
942 skb_shinfo(skb)->frags[k].size -= eat;
943 eat = 0;
944 }
945 k++;
946 }
947 }
948 skb_shinfo(skb)->nr_frags = k;
949
950 skb->tail += delta;
951 skb->data_len -= delta;
952
953 return skb->tail;
954 }
955
956 /* Copy some data bits from skb to kernel buffer. */
957
skb_copy_bits(const struct sk_buff * skb,int offset,void * to,int len)958 int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
959 {
960 int i, copy;
961 int start = skb->len - skb->data_len;
962
963 if (offset > (int)skb->len-len)
964 goto fault;
965
966 /* Copy header. */
967 if ((copy = start-offset) > 0) {
968 if (copy > len)
969 copy = len;
970 memcpy(to, skb->data + offset, copy);
971 if ((len -= copy) == 0)
972 return 0;
973 offset += copy;
974 to += copy;
975 }
976
977 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
978 int end;
979
980 BUG_TRAP(start <= offset+len);
981
982 end = start + skb_shinfo(skb)->frags[i].size;
983 if ((copy = end-offset) > 0) {
984 u8 *vaddr;
985
986 if (copy > len)
987 copy = len;
988
989 vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
990 memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+
991 offset-start, copy);
992 kunmap_skb_frag(vaddr);
993
994 if ((len -= copy) == 0)
995 return 0;
996 offset += copy;
997 to += copy;
998 }
999 start = end;
1000 }
1001
1002 if (skb_shinfo(skb)->frag_list) {
1003 struct sk_buff *list;
1004
1005 for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
1006 int end;
1007
1008 BUG_TRAP(start <= offset+len);
1009
1010 end = start + list->len;
1011 if ((copy = end-offset) > 0) {
1012 if (copy > len)
1013 copy = len;
1014 if (skb_copy_bits(list, offset-start, to, copy))
1015 goto fault;
1016 if ((len -= copy) == 0)
1017 return 0;
1018 offset += copy;
1019 to += copy;
1020 }
1021 start = end;
1022 }
1023 }
1024 if (len == 0)
1025 return 0;
1026
1027 fault:
1028 return -EFAULT;
1029 }
1030
1031 /* Checksum skb data. */
1032
skb_checksum(const struct sk_buff * skb,int offset,int len,unsigned int csum)1033 unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum)
1034 {
1035 int i, copy;
1036 int start = skb->len - skb->data_len;
1037 int pos = 0;
1038
1039 /* Checksum header. */
1040 if ((copy = start-offset) > 0) {
1041 if (copy > len)
1042 copy = len;
1043 csum = csum_partial(skb->data+offset, copy, csum);
1044 if ((len -= copy) == 0)
1045 return csum;
1046 offset += copy;
1047 pos = copy;
1048 }
1049
1050 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
1051 int end;
1052
1053 BUG_TRAP(start <= offset+len);
1054
1055 end = start + skb_shinfo(skb)->frags[i].size;
1056 if ((copy = end-offset) > 0) {
1057 unsigned int csum2;
1058 u8 *vaddr;
1059 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1060
1061 if (copy > len)
1062 copy = len;
1063 vaddr = kmap_skb_frag(frag);
1064 csum2 = csum_partial(vaddr + frag->page_offset +
1065 offset-start, copy, 0);
1066 kunmap_skb_frag(vaddr);
1067 csum = csum_block_add(csum, csum2, pos);
1068 if (!(len -= copy))
1069 return csum;
1070 offset += copy;
1071 pos += copy;
1072 }
1073 start = end;
1074 }
1075
1076 if (skb_shinfo(skb)->frag_list) {
1077 struct sk_buff *list;
1078
1079 for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
1080 int end;
1081
1082 BUG_TRAP(start <= offset+len);
1083
1084 end = start + list->len;
1085 if ((copy = end-offset) > 0) {
1086 unsigned int csum2;
1087 if (copy > len)
1088 copy = len;
1089 csum2 = skb_checksum(list, offset-start, copy, 0);
1090 csum = csum_block_add(csum, csum2, pos);
1091 if ((len -= copy) == 0)
1092 return csum;
1093 offset += copy;
1094 pos += copy;
1095 }
1096 start = end;
1097 }
1098 }
1099 if (len == 0)
1100 return csum;
1101
1102 BUG();
1103 return csum;
1104 }
1105
1106 /* Both of above in one bottle. */
1107
skb_copy_and_csum_bits(const struct sk_buff * skb,int offset,u8 * to,int len,unsigned int csum)1108 unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum)
1109 {
1110 int i, copy;
1111 int start = skb->len - skb->data_len;
1112 int pos = 0;
1113
1114 /* Copy header. */
1115 if ((copy = start-offset) > 0) {
1116 if (copy > len)
1117 copy = len;
1118 csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum);
1119 if ((len -= copy) == 0)
1120 return csum;
1121 offset += copy;
1122 to += copy;
1123 pos = copy;
1124 }
1125
1126 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
1127 int end;
1128
1129 BUG_TRAP(start <= offset+len);
1130
1131 end = start + skb_shinfo(skb)->frags[i].size;
1132 if ((copy = end-offset) > 0) {
1133 unsigned int csum2;
1134 u8 *vaddr;
1135 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1136
1137 if (copy > len)
1138 copy = len;
1139 vaddr = kmap_skb_frag(frag);
1140 csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset +
1141 offset-start, to, copy, 0);
1142 kunmap_skb_frag(vaddr);
1143 csum = csum_block_add(csum, csum2, pos);
1144 if (!(len -= copy))
1145 return csum;
1146 offset += copy;
1147 to += copy;
1148 pos += copy;
1149 }
1150 start = end;
1151 }
1152
1153 if (skb_shinfo(skb)->frag_list) {
1154 struct sk_buff *list;
1155
1156 for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
1157 unsigned int csum2;
1158 int end;
1159
1160 BUG_TRAP(start <= offset+len);
1161
1162 end = start + list->len;
1163 if ((copy = end-offset) > 0) {
1164 if (copy > len)
1165 copy = len;
1166 csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0);
1167 csum = csum_block_add(csum, csum2, pos);
1168 if ((len -= copy) == 0)
1169 return csum;
1170 offset += copy;
1171 to += copy;
1172 pos += copy;
1173 }
1174 start = end;
1175 }
1176 }
1177 if (len == 0)
1178 return csum;
1179
1180 BUG();
1181 return csum;
1182 }
1183
skb_copy_and_csum_dev(const struct sk_buff * skb,u8 * to)1184 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
1185 {
1186 unsigned int csum;
1187 long csstart;
1188
1189 if (skb->ip_summed == CHECKSUM_HW)
1190 csstart = skb->h.raw - skb->data;
1191 else
1192 csstart = skb->len - skb->data_len;
1193
1194 if (csstart > skb->len - skb->data_len)
1195 BUG();
1196
1197 memcpy(to, skb->data, csstart);
1198
1199 csum = 0;
1200 if (csstart != skb->len)
1201 csum = skb_copy_and_csum_bits(skb, csstart, to+csstart,
1202 skb->len-csstart, 0);
1203
1204 if (skb->ip_summed == CHECKSUM_HW) {
1205 long csstuff = csstart + skb->csum;
1206
1207 *((unsigned short *)(to + csstuff)) = csum_fold(csum);
1208 }
1209 }
1210
1211 #if 0
1212 /*
1213 * Tune the memory allocator for a new MTU size.
1214 */
1215 void skb_add_mtu(int mtu)
1216 {
1217 /* Must match allocation in alloc_skb */
1218 mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
1219
1220 kmem_add_cache_size(mtu);
1221 }
1222 #endif
1223
skb_init(void)1224 void __init skb_init(void)
1225 {
1226 int i;
1227
1228 skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
1229 sizeof(struct sk_buff),
1230 0,
1231 SLAB_HWCACHE_ALIGN,
1232 skb_headerinit, NULL);
1233 if (!skbuff_head_cache)
1234 panic("cannot create skbuff cache");
1235
1236 for (i=0; i<NR_CPUS; i++)
1237 skb_queue_head_init(&skb_head_pool[i].list);
1238 }
1239