]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Routines having to do with the 'struct sk_buff' memory handlers. | |
3 | * | |
4 | * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> | |
5 | * Florian La Roche <rzsfl@rz.uni-sb.de> | |
6 | * | |
7 | * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $ | |
8 | * | |
9 | * Fixes: | |
10 | * Alan Cox : Fixed the worst of the load | |
11 | * balancer bugs. | |
12 | * Dave Platt : Interrupt stacking fix. | |
13 | * Richard Kooijman : Timestamp fixes. | |
14 | * Alan Cox : Changed buffer format. | |
15 | * Alan Cox : destructor hook for AF_UNIX etc. | |
16 | * Linus Torvalds : Better skb_clone. | |
17 | * Alan Cox : Added skb_copy. | |
18 | * Alan Cox : Added all the changed routines Linus | |
19 | * only put in the headers | |
20 | * Ray VanTassle : Fixed --skb->lock in free | |
21 | * Alan Cox : skb_copy copy arp field | |
22 | * Andi Kleen : slabified it. | |
23 | * Robert Olsson : Removed skb_head_pool | |
24 | * | |
25 | * NOTE: | |
26 | * The __skb_ routines should be called with interrupts | |
27 | * disabled, or you better be *real* sure that the operation is atomic | |
28 | * with respect to whatever list is being frobbed (e.g. via lock_sock() | |
29 | * or via disabling bottom half handlers, etc). | |
30 | * | |
31 | * This program is free software; you can redistribute it and/or | |
32 | * modify it under the terms of the GNU General Public License | |
33 | * as published by the Free Software Foundation; either version | |
34 | * 2 of the License, or (at your option) any later version. | |
35 | */ | |
36 | ||
37 | /* | |
38 | * The functions in this file will not compile correctly with gcc 2.4.x | |
39 | */ | |
40 | ||
41 | #include <linux/module.h> | |
42 | #include <linux/types.h> | |
43 | #include <linux/kernel.h> | |
44 | #include <linux/mm.h> | |
45 | #include <linux/interrupt.h> | |
46 | #include <linux/in.h> | |
47 | #include <linux/inet.h> | |
48 | #include <linux/slab.h> | |
49 | #include <linux/netdevice.h> | |
50 | #ifdef CONFIG_NET_CLS_ACT | |
51 | #include <net/pkt_sched.h> | |
52 | #endif | |
53 | #include <linux/string.h> | |
54 | #include <linux/skbuff.h> | |
55 | #include <linux/cache.h> | |
56 | #include <linux/rtnetlink.h> | |
57 | #include <linux/init.h> | |
58 | #include <linux/scatterlist.h> | |
59 | ||
60 | #include <net/protocol.h> | |
61 | #include <net/dst.h> | |
62 | #include <net/sock.h> | |
63 | #include <net/checksum.h> | |
64 | #include <net/xfrm.h> | |
65 | ||
66 | #include <asm/uaccess.h> | |
67 | #include <asm/system.h> | |
68 | ||
69 | #include "kmap_skb.h" | |
70 | ||
71 | static struct kmem_cache *skbuff_head_cache __read_mostly; | |
72 | static struct kmem_cache *skbuff_fclone_cache __read_mostly; | |
73 | ||
74 | /* | |
75 | * Keep out-of-line to prevent kernel bloat. | |
76 | * __builtin_return_address is not used because it is not always | |
77 | * reliable. | |
78 | */ | |
79 | ||
80 | /** | |
81 | * skb_over_panic - private function | |
82 | * @skb: buffer | |
83 | * @sz: size | |
84 | * @here: address | |
85 | * | |
86 | * Out of line support code for skb_put(). Not user callable. | |
87 | */ | |
88 | void skb_over_panic(struct sk_buff *skb, int sz, void *here) | |
89 | { | |
90 | printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p " | |
91 | "data:%p tail:%#lx end:%#lx dev:%s\n", | |
92 | here, skb->len, sz, skb->head, skb->data, | |
93 | (unsigned long)skb->tail, (unsigned long)skb->end, | |
94 | skb->dev ? skb->dev->name : "<NULL>"); | |
95 | BUG(); | |
96 | } | |
97 | ||
98 | /** | |
99 | * skb_under_panic - private function | |
100 | * @skb: buffer | |
101 | * @sz: size | |
102 | * @here: address | |
103 | * | |
104 | * Out of line support code for skb_push(). Not user callable. | |
105 | */ | |
106 | ||
107 | void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |
108 | { | |
109 | printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p " | |
110 | "data:%p tail:%#lx end:%#lx dev:%s\n", | |
111 | here, skb->len, sz, skb->head, skb->data, | |
112 | (unsigned long)skb->tail, (unsigned long)skb->end, | |
113 | skb->dev ? skb->dev->name : "<NULL>"); | |
114 | BUG(); | |
115 | } | |
116 | ||
117 | void skb_truesize_bug(struct sk_buff *skb) | |
118 | { | |
119 | printk(KERN_ERR "SKB BUG: Invalid truesize (%u) " | |
120 | "len=%u, sizeof(sk_buff)=%Zd\n", | |
121 | skb->truesize, skb->len, sizeof(struct sk_buff)); | |
122 | } | |
123 | EXPORT_SYMBOL(skb_truesize_bug); | |
124 | ||
125 | /* Allocate a new skbuff. We do this ourselves so we can fill in a few | |
126 | * 'private' fields and also do memory statistics to find all the | |
127 | * [BEEP] leaks. | |
128 | * | |
129 | */ | |
130 | ||
131 | /** | |
132 | * __alloc_skb - allocate a network buffer | |
133 | * @size: size to allocate | |
134 | * @gfp_mask: allocation mask | |
135 | * @fclone: allocate from fclone cache instead of head cache | |
136 | * and allocate a cloned (child) skb | |
137 | * @node: numa node to allocate memory on | |
138 | * | |
139 | * Allocate a new &sk_buff. The returned buffer has no headroom and a | |
140 | * tail room of size bytes. The object has a reference count of one. | |
141 | * The return is the buffer. On a failure the return is %NULL. | |
142 | * | |
143 | * Buffers may only be allocated from interrupts using a @gfp_mask of | |
144 | * %GFP_ATOMIC. | |
145 | */ | |
146 | struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |
147 | int fclone, int node) | |
148 | { | |
149 | struct kmem_cache *cache; | |
150 | struct skb_shared_info *shinfo; | |
151 | struct sk_buff *skb; | |
152 | u8 *data; | |
153 | ||
154 | cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; | |
155 | ||
156 | /* Get the HEAD */ | |
157 | skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); | |
158 | if (!skb) | |
159 | goto out; | |
160 | ||
161 | size = SKB_DATA_ALIGN(size); | |
162 | data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), | |
163 | gfp_mask, node); | |
164 | if (!data) | |
165 | goto nodata; | |
166 | ||
167 | /* | |
168 | * See comment in sk_buff definition, just before the 'tail' member | |
169 | */ | |
170 | memset(skb, 0, offsetof(struct sk_buff, tail)); | |
171 | skb->truesize = size + sizeof(struct sk_buff); | |
172 | atomic_set(&skb->users, 1); | |
173 | skb->head = data; | |
174 | skb->data = data; | |
175 | skb_reset_tail_pointer(skb); | |
176 | skb->end = skb->tail + size; | |
177 | /* make sure we initialize shinfo sequentially */ | |
178 | shinfo = skb_shinfo(skb); | |
179 | atomic_set(&shinfo->dataref, 1); | |
180 | shinfo->nr_frags = 0; | |
181 | shinfo->gso_size = 0; | |
182 | shinfo->gso_segs = 0; | |
183 | shinfo->gso_type = 0; | |
184 | shinfo->ip6_frag_id = 0; | |
185 | shinfo->frag_list = NULL; | |
186 | ||
187 | if (fclone) { | |
188 | struct sk_buff *child = skb + 1; | |
189 | atomic_t *fclone_ref = (atomic_t *) (child + 1); | |
190 | ||
191 | skb->fclone = SKB_FCLONE_ORIG; | |
192 | atomic_set(fclone_ref, 1); | |
193 | ||
194 | child->fclone = SKB_FCLONE_UNAVAILABLE; | |
195 | } | |
196 | out: | |
197 | return skb; | |
198 | nodata: | |
199 | kmem_cache_free(cache, skb); | |
200 | skb = NULL; | |
201 | goto out; | |
202 | } | |
203 | ||
204 | /** | |
205 | * __netdev_alloc_skb - allocate an skbuff for rx on a specific device | |
206 | * @dev: network device to receive on | |
207 | * @length: length to allocate | |
208 | * @gfp_mask: get_free_pages mask, passed to alloc_skb | |
209 | * | |
210 | * Allocate a new &sk_buff and assign it a usage count of one. The | |
211 | * buffer has unspecified headroom built in. Users should allocate | |
212 | * the headroom they think they need without accounting for the | |
213 | * built in space. The built in space is used for optimisations. | |
214 | * | |
215 | * %NULL is returned if there is no free memory. | |
216 | */ | |
217 | struct sk_buff *__netdev_alloc_skb(struct net_device *dev, | |
218 | unsigned int length, gfp_t gfp_mask) | |
219 | { | |
220 | int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1; | |
221 | struct sk_buff *skb; | |
222 | ||
223 | skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node); | |
224 | if (likely(skb)) { | |
225 | skb_reserve(skb, NET_SKB_PAD); | |
226 | skb->dev = dev; | |
227 | } | |
228 | return skb; | |
229 | } | |
230 | ||
231 | static void skb_drop_list(struct sk_buff **listp) | |
232 | { | |
233 | struct sk_buff *list = *listp; | |
234 | ||
235 | *listp = NULL; | |
236 | ||
237 | do { | |
238 | struct sk_buff *this = list; | |
239 | list = list->next; | |
240 | kfree_skb(this); | |
241 | } while (list); | |
242 | } | |
243 | ||
244 | static inline void skb_drop_fraglist(struct sk_buff *skb) | |
245 | { | |
246 | skb_drop_list(&skb_shinfo(skb)->frag_list); | |
247 | } | |
248 | ||
249 | static void skb_clone_fraglist(struct sk_buff *skb) | |
250 | { | |
251 | struct sk_buff *list; | |
252 | ||
253 | for (list = skb_shinfo(skb)->frag_list; list; list = list->next) | |
254 | skb_get(list); | |
255 | } | |
256 | ||
257 | static void skb_release_data(struct sk_buff *skb) | |
258 | { | |
259 | if (!skb->cloned || | |
260 | !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, | |
261 | &skb_shinfo(skb)->dataref)) { | |
262 | if (skb_shinfo(skb)->nr_frags) { | |
263 | int i; | |
264 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) | |
265 | put_page(skb_shinfo(skb)->frags[i].page); | |
266 | } | |
267 | ||
268 | if (skb_shinfo(skb)->frag_list) | |
269 | skb_drop_fraglist(skb); | |
270 | ||
271 | kfree(skb->head); | |
272 | } | |
273 | } | |
274 | ||
275 | /* | |
276 | * Free an skbuff by memory without cleaning the state. | |
277 | */ | |
278 | void kfree_skbmem(struct sk_buff *skb) | |
279 | { | |
280 | struct sk_buff *other; | |
281 | atomic_t *fclone_ref; | |
282 | ||
283 | skb_release_data(skb); | |
284 | switch (skb->fclone) { | |
285 | case SKB_FCLONE_UNAVAILABLE: | |
286 | kmem_cache_free(skbuff_head_cache, skb); | |
287 | break; | |
288 | ||
289 | case SKB_FCLONE_ORIG: | |
290 | fclone_ref = (atomic_t *) (skb + 2); | |
291 | if (atomic_dec_and_test(fclone_ref)) | |
292 | kmem_cache_free(skbuff_fclone_cache, skb); | |
293 | break; | |
294 | ||
295 | case SKB_FCLONE_CLONE: | |
296 | fclone_ref = (atomic_t *) (skb + 1); | |
297 | other = skb - 1; | |
298 | ||
299 | /* The clone portion is available for | |
300 | * fast-cloning again. | |
301 | */ | |
302 | skb->fclone = SKB_FCLONE_UNAVAILABLE; | |
303 | ||
304 | if (atomic_dec_and_test(fclone_ref)) | |
305 | kmem_cache_free(skbuff_fclone_cache, other); | |
306 | break; | |
307 | }; | |
308 | } | |
309 | ||
310 | /** | |
311 | * __kfree_skb - private function | |
312 | * @skb: buffer | |
313 | * | |
314 | * Free an sk_buff. Release anything attached to the buffer. | |
315 | * Clean the state. This is an internal helper function. Users should | |
316 | * always call kfree_skb | |
317 | */ | |
318 | ||
319 | void __kfree_skb(struct sk_buff *skb) | |
320 | { | |
321 | dst_release(skb->dst); | |
322 | #ifdef CONFIG_XFRM | |
323 | secpath_put(skb->sp); | |
324 | #endif | |
325 | if (skb->destructor) { | |
326 | WARN_ON(in_irq()); | |
327 | skb->destructor(skb); | |
328 | } | |
329 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | |
330 | nf_conntrack_put(skb->nfct); | |
331 | nf_conntrack_put_reasm(skb->nfct_reasm); | |
332 | #endif | |
333 | #ifdef CONFIG_BRIDGE_NETFILTER | |
334 | nf_bridge_put(skb->nf_bridge); | |
335 | #endif | |
336 | /* XXX: IS this still necessary? - JHS */ | |
337 | #ifdef CONFIG_NET_SCHED | |
338 | skb->tc_index = 0; | |
339 | #ifdef CONFIG_NET_CLS_ACT | |
340 | skb->tc_verd = 0; | |
341 | #endif | |
342 | #endif | |
343 | ||
344 | kfree_skbmem(skb); | |
345 | } | |
346 | ||
347 | /** | |
348 | * kfree_skb - free an sk_buff | |
349 | * @skb: buffer to free | |
350 | * | |
351 | * Drop a reference to the buffer and free it if the usage count has | |
352 | * hit zero. | |
353 | */ | |
354 | void kfree_skb(struct sk_buff *skb) | |
355 | { | |
356 | if (unlikely(!skb)) | |
357 | return; | |
358 | if (likely(atomic_read(&skb->users) == 1)) | |
359 | smp_rmb(); | |
360 | else if (likely(!atomic_dec_and_test(&skb->users))) | |
361 | return; | |
362 | __kfree_skb(skb); | |
363 | } | |
364 | ||
365 | /** | |
366 | * skb_clone - duplicate an sk_buff | |
367 | * @skb: buffer to clone | |
368 | * @gfp_mask: allocation priority | |
369 | * | |
370 | * Duplicate an &sk_buff. The new one is not owned by a socket. Both | |
371 | * copies share the same packet data but not structure. The new | |
372 | * buffer has a reference count of 1. If the allocation fails the | |
373 | * function returns %NULL otherwise the new buffer is returned. | |
374 | * | |
375 | * If this function is called from an interrupt gfp_mask() must be | |
376 | * %GFP_ATOMIC. | |
377 | */ | |
378 | ||
379 | struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) | |
380 | { | |
381 | struct sk_buff *n; | |
382 | ||
383 | n = skb + 1; | |
384 | if (skb->fclone == SKB_FCLONE_ORIG && | |
385 | n->fclone == SKB_FCLONE_UNAVAILABLE) { | |
386 | atomic_t *fclone_ref = (atomic_t *) (n + 1); | |
387 | n->fclone = SKB_FCLONE_CLONE; | |
388 | atomic_inc(fclone_ref); | |
389 | } else { | |
390 | n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); | |
391 | if (!n) | |
392 | return NULL; | |
393 | n->fclone = SKB_FCLONE_UNAVAILABLE; | |
394 | } | |
395 | ||
396 | #define C(x) n->x = skb->x | |
397 | ||
398 | n->next = n->prev = NULL; | |
399 | n->sk = NULL; | |
400 | C(tstamp); | |
401 | C(dev); | |
402 | C(transport_header); | |
403 | C(network_header); | |
404 | C(mac_header); | |
405 | C(dst); | |
406 | dst_clone(skb->dst); | |
407 | C(sp); | |
408 | #ifdef CONFIG_INET | |
409 | secpath_get(skb->sp); | |
410 | #endif | |
411 | memcpy(n->cb, skb->cb, sizeof(skb->cb)); | |
412 | C(len); | |
413 | C(data_len); | |
414 | C(mac_len); | |
415 | C(csum); | |
416 | C(local_df); | |
417 | n->cloned = 1; | |
418 | n->nohdr = 0; | |
419 | C(pkt_type); | |
420 | C(ip_summed); | |
421 | C(priority); | |
422 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) | |
423 | C(ipvs_property); | |
424 | #endif | |
425 | C(protocol); | |
426 | n->destructor = NULL; | |
427 | C(mark); | |
428 | __nf_copy(n, skb); | |
429 | #ifdef CONFIG_NET_SCHED | |
430 | C(tc_index); | |
431 | #ifdef CONFIG_NET_CLS_ACT | |
432 | n->tc_verd = SET_TC_VERD(skb->tc_verd,0); | |
433 | n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); | |
434 | n->tc_verd = CLR_TC_MUNGED(n->tc_verd); | |
435 | C(iif); | |
436 | #endif | |
437 | skb_copy_secmark(n, skb); | |
438 | #endif | |
439 | C(truesize); | |
440 | atomic_set(&n->users, 1); | |
441 | C(head); | |
442 | C(data); | |
443 | C(tail); | |
444 | C(end); | |
445 | ||
446 | atomic_inc(&(skb_shinfo(skb)->dataref)); | |
447 | skb->cloned = 1; | |
448 | ||
449 | return n; | |
450 | } | |
451 | ||
452 | static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |
453 | { | |
454 | #ifndef NET_SKBUFF_DATA_USES_OFFSET | |
455 | /* | |
456 | * Shift between the two data areas in bytes | |
457 | */ | |
458 | unsigned long offset = new->data - old->data; | |
459 | #endif | |
460 | new->sk = NULL; | |
461 | new->dev = old->dev; | |
462 | new->priority = old->priority; | |
463 | new->protocol = old->protocol; | |
464 | new->dst = dst_clone(old->dst); | |
465 | #ifdef CONFIG_INET | |
466 | new->sp = secpath_get(old->sp); | |
467 | #endif | |
468 | new->transport_header = old->transport_header; | |
469 | new->network_header = old->network_header; | |
470 | new->mac_header = old->mac_header; | |
471 | #ifndef NET_SKBUFF_DATA_USES_OFFSET | |
472 | /* {transport,network,mac}_header are relative to skb->head */ | |
473 | new->transport_header += offset; | |
474 | new->network_header += offset; | |
475 | new->mac_header += offset; | |
476 | #endif | |
477 | memcpy(new->cb, old->cb, sizeof(old->cb)); | |
478 | new->local_df = old->local_df; | |
479 | new->fclone = SKB_FCLONE_UNAVAILABLE; | |
480 | new->pkt_type = old->pkt_type; | |
481 | new->tstamp = old->tstamp; | |
482 | new->destructor = NULL; | |
483 | new->mark = old->mark; | |
484 | __nf_copy(new, old); | |
485 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) | |
486 | new->ipvs_property = old->ipvs_property; | |
487 | #endif | |
488 | #ifdef CONFIG_NET_SCHED | |
489 | #ifdef CONFIG_NET_CLS_ACT | |
490 | new->tc_verd = old->tc_verd; | |
491 | #endif | |
492 | new->tc_index = old->tc_index; | |
493 | #endif | |
494 | skb_copy_secmark(new, old); | |
495 | atomic_set(&new->users, 1); | |
496 | skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; | |
497 | skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; | |
498 | skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; | |
499 | } | |
500 | ||
501 | /** | |
502 | * skb_copy - create private copy of an sk_buff | |
503 | * @skb: buffer to copy | |
504 | * @gfp_mask: allocation priority | |
505 | * | |
506 | * Make a copy of both an &sk_buff and its data. This is used when the | |
507 | * caller wishes to modify the data and needs a private copy of the | |
508 | * data to alter. Returns %NULL on failure or the pointer to the buffer | |
509 | * on success. The returned buffer has a reference count of 1. | |
510 | * | |
511 | * As by-product this function converts non-linear &sk_buff to linear | |
512 | * one, so that &sk_buff becomes completely private and caller is allowed | |
513 | * to modify all the data of returned buffer. This means that this | |
514 | * function is not recommended for use in circumstances when only | |
515 | * header is going to be modified. Use pskb_copy() instead. | |
516 | */ | |
517 | ||
518 | struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) | |
519 | { | |
520 | int headerlen = skb->data - skb->head; | |
521 | /* | |
522 | * Allocate the copy buffer | |
523 | */ | |
524 | struct sk_buff *n; | |
525 | #ifdef NET_SKBUFF_DATA_USES_OFFSET | |
526 | n = alloc_skb(skb->end + skb->data_len, gfp_mask); | |
527 | #else | |
528 | n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask); | |
529 | #endif | |
530 | if (!n) | |
531 | return NULL; | |
532 | ||
533 | /* Set the data pointer */ | |
534 | skb_reserve(n, headerlen); | |
535 | /* Set the tail pointer and length */ | |
536 | skb_put(n, skb->len); | |
537 | n->csum = skb->csum; | |
538 | n->ip_summed = skb->ip_summed; | |
539 | ||
540 | if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) | |
541 | BUG(); | |
542 | ||
543 | copy_skb_header(n, skb); | |
544 | return n; | |
545 | } | |
546 | ||
547 | ||
548 | /** | |
549 | * pskb_copy - create copy of an sk_buff with private head. | |
550 | * @skb: buffer to copy | |
551 | * @gfp_mask: allocation priority | |
552 | * | |
553 | * Make a copy of both an &sk_buff and part of its data, located | |
554 | * in header. Fragmented data remain shared. This is used when | |
555 | * the caller wishes to modify only header of &sk_buff and needs | |
556 | * private copy of the header to alter. Returns %NULL on failure | |
557 | * or the pointer to the buffer on success. | |
558 | * The returned buffer has a reference count of 1. | |
559 | */ | |
560 | ||
561 | struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) | |
562 | { | |
563 | /* | |
564 | * Allocate the copy buffer | |
565 | */ | |
566 | struct sk_buff *n; | |
567 | #ifdef NET_SKBUFF_DATA_USES_OFFSET | |
568 | n = alloc_skb(skb->end, gfp_mask); | |
569 | #else | |
570 | n = alloc_skb(skb->end - skb->head, gfp_mask); | |
571 | #endif | |
572 | if (!n) | |
573 | goto out; | |
574 | ||
575 | /* Set the data pointer */ | |
576 | skb_reserve(n, skb->data - skb->head); | |
577 | /* Set the tail pointer and length */ | |
578 | skb_put(n, skb_headlen(skb)); | |
579 | /* Copy the bytes */ | |
580 | skb_copy_from_linear_data(skb, n->data, n->len); | |
581 | n->csum = skb->csum; | |
582 | n->ip_summed = skb->ip_summed; | |
583 | ||
584 | n->truesize += skb->data_len; | |
585 | n->data_len = skb->data_len; | |
586 | n->len = skb->len; | |
587 | ||
588 | if (skb_shinfo(skb)->nr_frags) { | |
589 | int i; | |
590 | ||
591 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | |
592 | skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; | |
593 | get_page(skb_shinfo(n)->frags[i].page); | |
594 | } | |
595 | skb_shinfo(n)->nr_frags = i; | |
596 | } | |
597 | ||
598 | if (skb_shinfo(skb)->frag_list) { | |
599 | skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; | |
600 | skb_clone_fraglist(n); | |
601 | } | |
602 | ||
603 | copy_skb_header(n, skb); | |
604 | out: | |
605 | return n; | |
606 | } | |
607 | ||
608 | /** | |
609 | * pskb_expand_head - reallocate header of &sk_buff | |
610 | * @skb: buffer to reallocate | |
611 | * @nhead: room to add at head | |
612 | * @ntail: room to add at tail | |
613 | * @gfp_mask: allocation priority | |
614 | * | |
615 | * Expands (or creates identical copy, if &nhead and &ntail are zero) | |
616 | * header of skb. &sk_buff itself is not changed. &sk_buff MUST have | |
617 | * reference count of 1. Returns zero in the case of success or error, | |
618 | * if expansion failed. In the last case, &sk_buff is not changed. | |
619 | * | |
620 | * All the pointers pointing into skb header may change and must be | |
621 | * reloaded after call to this function. | |
622 | */ | |
623 | ||
624 | int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, | |
625 | gfp_t gfp_mask) | |
626 | { | |
627 | int i; | |
628 | u8 *data; | |
629 | #ifdef NET_SKBUFF_DATA_USES_OFFSET | |
630 | int size = nhead + skb->end + ntail; | |
631 | #else | |
632 | int size = nhead + (skb->end - skb->head) + ntail; | |
633 | #endif | |
634 | long off; | |
635 | ||
636 | if (skb_shared(skb)) | |
637 | BUG(); | |
638 | ||
639 | size = SKB_DATA_ALIGN(size); | |
640 | ||
641 | data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); | |
642 | if (!data) | |
643 | goto nodata; | |
644 | ||
645 | /* Copy only real data... and, alas, header. This should be | |
646 | * optimized for the cases when header is void. */ | |
647 | memcpy(data + nhead, skb->head, | |
648 | #ifdef NET_SKBUFF_DATA_USES_OFFSET | |
649 | skb->tail); | |
650 | #else | |
651 | skb->tail - skb->head); | |
652 | #endif | |
653 | memcpy(data + size, skb_end_pointer(skb), | |
654 | sizeof(struct skb_shared_info)); | |
655 | ||
656 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) | |
657 | get_page(skb_shinfo(skb)->frags[i].page); | |
658 | ||
659 | if (skb_shinfo(skb)->frag_list) | |
660 | skb_clone_fraglist(skb); | |
661 | ||
662 | skb_release_data(skb); | |
663 | ||
664 | off = (data + nhead) - skb->head; | |
665 | ||
666 | skb->head = data; | |
667 | skb->data += off; | |
668 | #ifdef NET_SKBUFF_DATA_USES_OFFSET | |
669 | skb->end = size; | |
670 | off = nhead; | |
671 | #else | |
672 | skb->end = skb->head + size; | |
673 | #endif | |
674 | /* {transport,network,mac}_header and tail are relative to skb->head */ | |
675 | skb->tail += off; | |
676 | skb->transport_header += off; | |
677 | skb->network_header += off; | |
678 | skb->mac_header += off; | |
679 | skb->cloned = 0; | |
680 | skb->nohdr = 0; | |
681 | atomic_set(&skb_shinfo(skb)->dataref, 1); | |
682 | return 0; | |
683 | ||
684 | nodata: | |
685 | return -ENOMEM; | |
686 | } | |
687 | ||
688 | /* Make private copy of skb with writable head and some headroom */ | |
689 | ||
690 | struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) | |
691 | { | |
692 | struct sk_buff *skb2; | |
693 | int delta = headroom - skb_headroom(skb); | |
694 | ||
695 | if (delta <= 0) | |
696 | skb2 = pskb_copy(skb, GFP_ATOMIC); | |
697 | else { | |
698 | skb2 = skb_clone(skb, GFP_ATOMIC); | |
699 | if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, | |
700 | GFP_ATOMIC)) { | |
701 | kfree_skb(skb2); | |
702 | skb2 = NULL; | |
703 | } | |
704 | } | |
705 | return skb2; | |
706 | } | |
707 | ||
708 | ||
709 | /** | |
710 | * skb_copy_expand - copy and expand sk_buff | |
711 | * @skb: buffer to copy | |
712 | * @newheadroom: new free bytes at head | |
713 | * @newtailroom: new free bytes at tail | |
714 | * @gfp_mask: allocation priority | |
715 | * | |
716 | * Make a copy of both an &sk_buff and its data and while doing so | |
717 | * allocate additional space. | |
718 | * | |
719 | * This is used when the caller wishes to modify the data and needs a | |
720 | * private copy of the data to alter as well as more space for new fields. | |
721 | * Returns %NULL on failure or the pointer to the buffer | |
722 | * on success. The returned buffer has a reference count of 1. | |
723 | * | |
724 | * You must pass %GFP_ATOMIC as the allocation priority if this function | |
725 | * is called from an interrupt. | |
726 | * | |
727 | * BUG ALERT: ip_summed is not copied. Why does this work? Is it used | |
728 | * only by netfilter in the cases when checksum is recalculated? --ANK | |
729 | */ | |
730 | struct sk_buff *skb_copy_expand(const struct sk_buff *skb, | |
731 | int newheadroom, int newtailroom, | |
732 | gfp_t gfp_mask) | |
733 | { | |
734 | /* | |
735 | * Allocate the copy buffer | |
736 | */ | |
737 | struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, | |
738 | gfp_mask); | |
739 | int oldheadroom = skb_headroom(skb); | |
740 | int head_copy_len, head_copy_off; | |
741 | int off = 0; | |
742 | ||
743 | if (!n) | |
744 | return NULL; | |
745 | ||
746 | skb_reserve(n, newheadroom); | |
747 | ||
748 | /* Set the tail pointer and length */ | |
749 | skb_put(n, skb->len); | |
750 | ||
751 | head_copy_len = oldheadroom; | |
752 | head_copy_off = 0; | |
753 | if (newheadroom <= head_copy_len) | |
754 | head_copy_len = newheadroom; | |
755 | else | |
756 | head_copy_off = newheadroom - head_copy_len; | |
757 | ||
758 | /* Copy the linear header and data. */ | |
759 | if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, | |
760 | skb->len + head_copy_len)) | |
761 | BUG(); | |
762 | ||
763 | copy_skb_header(n, skb); | |
764 | ||
765 | #ifdef NET_SKBUFF_DATA_USES_OFFSET | |
766 | off = newheadroom - oldheadroom; | |
767 | #endif | |
768 | n->transport_header += off; | |
769 | n->network_header += off; | |
770 | n->mac_header += off; | |
771 | ||
772 | return n; | |
773 | } | |
774 | ||
775 | /** | |
776 | * skb_pad - zero pad the tail of an skb | |
777 | * @skb: buffer to pad | |
778 | * @pad: space to pad | |
779 | * | |
780 | * Ensure that a buffer is followed by a padding area that is zero | |
781 | * filled. Used by network drivers which may DMA or transfer data | |
782 | * beyond the buffer end onto the wire. | |
783 | * | |
784 | * May return error in out of memory cases. The skb is freed on error. | |
785 | */ | |
786 | ||
787 | int skb_pad(struct sk_buff *skb, int pad) | |
788 | { | |
789 | int err; | |
790 | int ntail; | |
791 | ||
792 | /* If the skbuff is non linear tailroom is always zero.. */ | |
793 | if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) { | |
794 | memset(skb->data+skb->len, 0, pad); | |
795 | return 0; | |
796 | } | |
797 | ||
798 | ntail = skb->data_len + pad - (skb->end - skb->tail); | |
799 | if (likely(skb_cloned(skb) || ntail > 0)) { | |
800 | err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC); | |
801 | if (unlikely(err)) | |
802 | goto free_skb; | |
803 | } | |
804 | ||
805 | /* FIXME: The use of this function with non-linear skb's really needs | |
806 | * to be audited. | |
807 | */ | |
808 | err = skb_linearize(skb); | |
809 | if (unlikely(err)) | |
810 | goto free_skb; | |
811 | ||
812 | memset(skb->data + skb->len, 0, pad); | |
813 | return 0; | |
814 | ||
815 | free_skb: | |
816 | kfree_skb(skb); | |
817 | return err; | |
818 | } | |
819 | ||
820 | /* Trims skb to length len. It can change skb pointers. | |
821 | */ | |
822 | ||
823 | int ___pskb_trim(struct sk_buff *skb, unsigned int len) | |
824 | { | |
825 | struct sk_buff **fragp; | |
826 | struct sk_buff *frag; | |
827 | int offset = skb_headlen(skb); | |
828 | int nfrags = skb_shinfo(skb)->nr_frags; | |
829 | int i; | |
830 | int err; | |
831 | ||
832 | if (skb_cloned(skb) && | |
833 | unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))) | |
834 | return err; | |
835 | ||
836 | i = 0; | |
837 | if (offset >= len) | |
838 | goto drop_pages; | |
839 | ||
840 | for (; i < nfrags; i++) { | |
841 | int end = offset + skb_shinfo(skb)->frags[i].size; | |
842 | ||
843 | if (end < len) { | |
844 | offset = end; | |
845 | continue; | |
846 | } | |
847 | ||
848 | skb_shinfo(skb)->frags[i++].size = len - offset; | |
849 | ||
850 | drop_pages: | |
851 | skb_shinfo(skb)->nr_frags = i; | |
852 | ||
853 | for (; i < nfrags; i++) | |
854 | put_page(skb_shinfo(skb)->frags[i].page); | |
855 | ||
856 | if (skb_shinfo(skb)->frag_list) | |
857 | skb_drop_fraglist(skb); | |
858 | goto done; | |
859 | } | |
860 | ||
861 | for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp); | |
862 | fragp = &frag->next) { | |
863 | int end = offset + frag->len; | |
864 | ||
865 | if (skb_shared(frag)) { | |
866 | struct sk_buff *nfrag; | |
867 | ||
868 | nfrag = skb_clone(frag, GFP_ATOMIC); | |
869 | if (unlikely(!nfrag)) | |
870 | return -ENOMEM; | |
871 | ||
872 | nfrag->next = frag->next; | |
873 | kfree_skb(frag); | |
874 | frag = nfrag; | |
875 | *fragp = frag; | |
876 | } | |
877 | ||
878 | if (end < len) { | |
879 | offset = end; | |
880 | continue; | |
881 | } | |
882 | ||
883 | if (end > len && | |
884 | unlikely((err = pskb_trim(frag, len - offset)))) | |
885 | return err; | |
886 | ||
887 | if (frag->next) | |
888 | skb_drop_list(&frag->next); | |
889 | break; | |
890 | } | |
891 | ||
892 | done: | |
893 | if (len > skb_headlen(skb)) { | |
894 | skb->data_len -= skb->len - len; | |
895 | skb->len = len; | |
896 | } else { | |
897 | skb->len = len; | |
898 | skb->data_len = 0; | |
899 | skb_set_tail_pointer(skb, len); | |
900 | } | |
901 | ||
902 | return 0; | |
903 | } | |
904 | ||
905 | /** | |
906 | * __pskb_pull_tail - advance tail of skb header | |
907 | * @skb: buffer to reallocate | |
908 | * @delta: number of bytes to advance tail | |
909 | * | |
910 | * The function makes a sense only on a fragmented &sk_buff, | |
911 | * it expands header moving its tail forward and copying necessary | |
912 | * data from fragmented part. | |
913 | * | |
914 | * &sk_buff MUST have reference count of 1. | |
915 | * | |
916 | * Returns %NULL (and &sk_buff does not change) if pull failed | |
917 | * or value of new tail of skb in the case of success. | |
918 | * | |
919 | * All the pointers pointing into skb header may change and must be | |
920 | * reloaded after call to this function. | |
921 | */ | |
922 | ||
923 | /* Moves tail of skb head forward, copying data from fragmented part, | |
924 | * when it is necessary. | |
925 | * 1. It may fail due to malloc failure. | |
926 | * 2. It may change skb pointers. | |
927 | * | |
928 | * It is pretty complicated. Luckily, it is called only in exceptional cases. | |
929 | */ | |
930 | unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) | |
931 | { | |
932 | /* If skb has not enough free space at tail, get new one | |
933 | * plus 128 bytes for future expansions. If we have enough | |
934 | * room at tail, reallocate without expansion only if skb is cloned. | |
935 | */ | |
936 | int i, k, eat = (skb->tail + delta) - skb->end; | |
937 | ||
938 | if (eat > 0 || skb_cloned(skb)) { | |
939 | if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0, | |
940 | GFP_ATOMIC)) | |
941 | return NULL; | |
942 | } | |
943 | ||
944 | if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta)) | |
945 | BUG(); | |
946 | ||
947 | /* Optimization: no fragments, no reasons to preestimate | |
948 | * size of pulled pages. Superb. | |
949 | */ | |
950 | if (!skb_shinfo(skb)->frag_list) | |
951 | goto pull_pages; | |
952 | ||
953 | /* Estimate size of pulled pages. */ | |
954 | eat = delta; | |
955 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | |
956 | if (skb_shinfo(skb)->frags[i].size >= eat) | |
957 | goto pull_pages; | |
958 | eat -= skb_shinfo(skb)->frags[i].size; | |
959 | } | |
960 | ||
961 | /* If we need update frag list, we are in troubles. | |
962 | * Certainly, it possible to add an offset to skb data, | |
963 | * but taking into account that pulling is expected to | |
964 | * be very rare operation, it is worth to fight against | |
965 | * further bloating skb head and crucify ourselves here instead. | |
966 | * Pure masohism, indeed. 8)8) | |
967 | */ | |
968 | if (eat) { | |
969 | struct sk_buff *list = skb_shinfo(skb)->frag_list; | |
970 | struct sk_buff *clone = NULL; | |
971 | struct sk_buff *insp = NULL; | |
972 | ||
973 | do { | |
974 | BUG_ON(!list); | |
975 | ||
976 | if (list->len <= eat) { | |
977 | /* Eaten as whole. */ | |
978 | eat -= list->len; | |
979 | list = list->next; | |
980 | insp = list; | |
981 | } else { | |
982 | /* Eaten partially. */ | |
983 | ||
984 | if (skb_shared(list)) { | |
985 | /* Sucks! We need to fork list. :-( */ | |
986 | clone = skb_clone(list, GFP_ATOMIC); | |
987 | if (!clone) | |
988 | return NULL; | |
989 | insp = list->next; | |
990 | list = clone; | |
991 | } else { | |
992 | /* This may be pulled without | |
993 | * problems. */ | |
994 | insp = list; | |
995 | } | |
996 | if (!pskb_pull(list, eat)) { | |
997 | if (clone) | |
998 | kfree_skb(clone); | |
999 | return NULL; | |
1000 | } | |
1001 | break; | |
1002 | } | |
1003 | } while (eat); | |
1004 | ||
1005 | /* Free pulled out fragments. */ | |
1006 | while ((list = skb_shinfo(skb)->frag_list) != insp) { | |
1007 | skb_shinfo(skb)->frag_list = list->next; | |
1008 | kfree_skb(list); | |
1009 | } | |
1010 | /* And insert new clone at head. */ | |
1011 | if (clone) { | |
1012 | clone->next = list; | |
1013 | skb_shinfo(skb)->frag_list = clone; | |
1014 | } | |
1015 | } | |
1016 | /* Success! Now we may commit changes to skb data. */ | |
1017 | ||
1018 | pull_pages: | |
1019 | eat = delta; | |
1020 | k = 0; | |
1021 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | |
1022 | if (skb_shinfo(skb)->frags[i].size <= eat) { | |
1023 | put_page(skb_shinfo(skb)->frags[i].page); | |
1024 | eat -= skb_shinfo(skb)->frags[i].size; | |
1025 | } else { | |
1026 | skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; | |
1027 | if (eat) { | |
1028 | skb_shinfo(skb)->frags[k].page_offset += eat; | |
1029 | skb_shinfo(skb)->frags[k].size -= eat; | |
1030 | eat = 0; | |
1031 | } | |
1032 | k++; | |
1033 | } | |
1034 | } | |
1035 | skb_shinfo(skb)->nr_frags = k; | |
1036 | ||
1037 | skb->tail += delta; | |
1038 | skb->data_len -= delta; | |
1039 | ||
1040 | return skb_tail_pointer(skb); | |
1041 | } | |
1042 | ||
1043 | /* Copy some data bits from skb to kernel buffer. */ | |
1044 | ||
1045 | int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) | |
1046 | { | |
1047 | int i, copy; | |
1048 | int start = skb_headlen(skb); | |
1049 | ||
1050 | if (offset > (int)skb->len - len) | |
1051 | goto fault; | |
1052 | ||
1053 | /* Copy header. */ | |
1054 | if ((copy = start - offset) > 0) { | |
1055 | if (copy > len) | |
1056 | copy = len; | |
1057 | skb_copy_from_linear_data_offset(skb, offset, to, copy); | |
1058 | if ((len -= copy) == 0) | |
1059 | return 0; | |
1060 | offset += copy; | |
1061 | to += copy; | |
1062 | } | |
1063 | ||
1064 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | |
1065 | int end; | |
1066 | ||
1067 | BUG_TRAP(start <= offset + len); | |
1068 | ||
1069 | end = start + skb_shinfo(skb)->frags[i].size; | |
1070 | if ((copy = end - offset) > 0) { | |
1071 | u8 *vaddr; | |
1072 | ||
1073 | if (copy > len) | |
1074 | copy = len; | |
1075 | ||
1076 | vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); | |
1077 | memcpy(to, | |
1078 | vaddr + skb_shinfo(skb)->frags[i].page_offset+ | |
1079 | offset - start, copy); | |
1080 | kunmap_skb_frag(vaddr); | |
1081 | ||
1082 | if ((len -= copy) == 0) | |
1083 | return 0; | |
1084 | offset += copy; | |
1085 | to += copy; | |
1086 | } | |
1087 | start = end; | |
1088 | } | |
1089 | ||
1090 | if (skb_shinfo(skb)->frag_list) { | |
1091 | struct sk_buff *list = skb_shinfo(skb)->frag_list; | |
1092 | ||
1093 | for (; list; list = list->next) { | |
1094 | int end; | |
1095 | ||
1096 | BUG_TRAP(start <= offset + len); | |
1097 | ||
1098 | end = start + list->len; | |
1099 | if ((copy = end - offset) > 0) { | |
1100 | if (copy > len) | |
1101 | copy = len; | |
1102 | if (skb_copy_bits(list, offset - start, | |
1103 | to, copy)) | |
1104 | goto fault; | |
1105 | if ((len -= copy) == 0) | |
1106 | return 0; | |
1107 | offset += copy; | |
1108 | to += copy; | |
1109 | } | |
1110 | start = end; | |
1111 | } | |
1112 | } | |
1113 | if (!len) | |
1114 | return 0; | |
1115 | ||
1116 | fault: | |
1117 | return -EFAULT; | |
1118 | } | |
1119 | ||
1120 | /** | |
1121 | * skb_store_bits - store bits from kernel buffer to skb | |
1122 | * @skb: destination buffer | |
1123 | * @offset: offset in destination | |
1124 | * @from: source buffer | |
1125 | * @len: number of bytes to copy | |
1126 | * | |
1127 | * Copy the specified number of bytes from the source buffer to the | |
1128 | * destination skb. This function handles all the messy bits of | |
1129 | * traversing fragment lists and such. | |
1130 | */ | |
1131 | ||
1132 | int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) | |
1133 | { | |
1134 | int i, copy; | |
1135 | int start = skb_headlen(skb); | |
1136 | ||
1137 | if (offset > (int)skb->len - len) | |
1138 | goto fault; | |
1139 | ||
1140 | if ((copy = start - offset) > 0) { | |
1141 | if (copy > len) | |
1142 | copy = len; | |
1143 | skb_copy_to_linear_data_offset(skb, offset, from, copy); | |
1144 | if ((len -= copy) == 0) | |
1145 | return 0; | |
1146 | offset += copy; | |
1147 | from += copy; | |
1148 | } | |
1149 | ||
1150 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | |
1151 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; | |
1152 | int end; | |
1153 | ||
1154 | BUG_TRAP(start <= offset + len); | |
1155 | ||
1156 | end = start + frag->size; | |
1157 | if ((copy = end - offset) > 0) { | |
1158 | u8 *vaddr; | |
1159 | ||
1160 | if (copy > len) | |
1161 | copy = len; | |
1162 | ||
1163 | vaddr = kmap_skb_frag(frag); | |
1164 | memcpy(vaddr + frag->page_offset + offset - start, | |
1165 | from, copy); | |
1166 | kunmap_skb_frag(vaddr); | |
1167 | ||
1168 | if ((len -= copy) == 0) | |
1169 | return 0; | |
1170 | offset += copy; | |
1171 | from += copy; | |
1172 | } | |
1173 | start = end; | |
1174 | } | |
1175 | ||
1176 | if (skb_shinfo(skb)->frag_list) { | |
1177 | struct sk_buff *list = skb_shinfo(skb)->frag_list; | |
1178 | ||
1179 | for (; list; list = list->next) { | |
1180 | int end; | |
1181 | ||
1182 | BUG_TRAP(start <= offset + len); | |
1183 | ||
1184 | end = start + list->len; | |
1185 | if ((copy = end - offset) > 0) { | |
1186 | if (copy > len) | |
1187 | copy = len; | |
1188 | if (skb_store_bits(list, offset - start, | |
1189 | from, copy)) | |
1190 | goto fault; | |
1191 | if ((len -= copy) == 0) | |
1192 | return 0; | |
1193 | offset += copy; | |
1194 | from += copy; | |
1195 | } | |
1196 | start = end; | |
1197 | } | |
1198 | } | |
1199 | if (!len) | |
1200 | return 0; | |
1201 | ||
1202 | fault: | |
1203 | return -EFAULT; | |
1204 | } | |
1205 | ||
1206 | EXPORT_SYMBOL(skb_store_bits); | |
1207 | ||
1208 | /* Checksum skb data. */ | |
1209 | ||
1210 | __wsum skb_checksum(const struct sk_buff *skb, int offset, | |
1211 | int len, __wsum csum) | |
1212 | { | |
1213 | int start = skb_headlen(skb); | |
1214 | int i, copy = start - offset; | |
1215 | int pos = 0; | |
1216 | ||
1217 | /* Checksum header. */ | |
1218 | if (copy > 0) { | |
1219 | if (copy > len) | |
1220 | copy = len; | |
1221 | csum = csum_partial(skb->data + offset, copy, csum); | |
1222 | if ((len -= copy) == 0) | |
1223 | return csum; | |
1224 | offset += copy; | |
1225 | pos = copy; | |
1226 | } | |
1227 | ||
1228 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | |
1229 | int end; | |
1230 | ||
1231 | BUG_TRAP(start <= offset + len); | |
1232 | ||
1233 | end = start + skb_shinfo(skb)->frags[i].size; | |
1234 | if ((copy = end - offset) > 0) { | |
1235 | __wsum csum2; | |
1236 | u8 *vaddr; | |
1237 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; | |
1238 | ||
1239 | if (copy > len) | |
1240 | copy = len; | |
1241 | vaddr = kmap_skb_frag(frag); | |
1242 | csum2 = csum_partial(vaddr + frag->page_offset + | |
1243 | offset - start, copy, 0); | |
1244 | kunmap_skb_frag(vaddr); | |
1245 | csum = csum_block_add(csum, csum2, pos); | |
1246 | if (!(len -= copy)) | |
1247 | return csum; | |
1248 | offset += copy; | |
1249 | pos += copy; | |
1250 | } | |
1251 | start = end; | |
1252 | } | |
1253 | ||
1254 | if (skb_shinfo(skb)->frag_list) { | |
1255 | struct sk_buff *list = skb_shinfo(skb)->frag_list; | |
1256 | ||
1257 | for (; list; list = list->next) { | |
1258 | int end; | |
1259 | ||
1260 | BUG_TRAP(start <= offset + len); | |
1261 | ||
1262 | end = start + list->len; | |
1263 | if ((copy = end - offset) > 0) { | |
1264 | __wsum csum2; | |
1265 | if (copy > len) | |
1266 | copy = len; | |
1267 | csum2 = skb_checksum(list, offset - start, | |
1268 | copy, 0); | |
1269 | csum = csum_block_add(csum, csum2, pos); | |
1270 | if ((len -= copy) == 0) | |
1271 | return csum; | |
1272 | offset += copy; | |
1273 | pos += copy; | |
1274 | } | |
1275 | start = end; | |
1276 | } | |
1277 | } | |
1278 | BUG_ON(len); | |
1279 | ||
1280 | return csum; | |
1281 | } | |
1282 | ||
1283 | /* Both of above in one bottle. */ | |
1284 | ||
1285 | __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, | |
1286 | u8 *to, int len, __wsum csum) | |
1287 | { | |
1288 | int start = skb_headlen(skb); | |
1289 | int i, copy = start - offset; | |
1290 | int pos = 0; | |
1291 | ||
1292 | /* Copy header. */ | |
1293 | if (copy > 0) { | |
1294 | if (copy > len) | |
1295 | copy = len; | |
1296 | csum = csum_partial_copy_nocheck(skb->data + offset, to, | |
1297 | copy, csum); | |
1298 | if ((len -= copy) == 0) | |
1299 | return csum; | |
1300 | offset += copy; | |
1301 | to += copy; | |
1302 | pos = copy; | |
1303 | } | |
1304 | ||
1305 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | |
1306 | int end; | |
1307 | ||
1308 | BUG_TRAP(start <= offset + len); | |
1309 | ||
1310 | end = start + skb_shinfo(skb)->frags[i].size; | |
1311 | if ((copy = end - offset) > 0) { | |
1312 | __wsum csum2; | |
1313 | u8 *vaddr; | |
1314 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; | |
1315 | ||
1316 | if (copy > len) | |
1317 | copy = len; | |
1318 | vaddr = kmap_skb_frag(frag); | |
1319 | csum2 = csum_partial_copy_nocheck(vaddr + | |
1320 | frag->page_offset + | |
1321 | offset - start, to, | |
1322 | copy, 0); | |
1323 | kunmap_skb_frag(vaddr); | |
1324 | csum = csum_block_add(csum, csum2, pos); | |
1325 | if (!(len -= copy)) | |
1326 | return csum; | |
1327 | offset += copy; | |
1328 | to += copy; | |
1329 | pos += copy; | |
1330 | } | |
1331 | start = end; | |
1332 | } | |
1333 | ||
1334 | if (skb_shinfo(skb)->frag_list) { | |
1335 | struct sk_buff *list = skb_shinfo(skb)->frag_list; | |
1336 | ||
1337 | for (; list; list = list->next) { | |
1338 | __wsum csum2; | |
1339 | int end; | |
1340 | ||
1341 | BUG_TRAP(start <= offset + len); | |
1342 | ||
1343 | end = start + list->len; | |
1344 | if ((copy = end - offset) > 0) { | |
1345 | if (copy > len) | |
1346 | copy = len; | |
1347 | csum2 = skb_copy_and_csum_bits(list, | |
1348 | offset - start, | |
1349 | to, copy, 0); | |
1350 | csum = csum_block_add(csum, csum2, pos); | |
1351 | if ((len -= copy) == 0) | |
1352 | return csum; | |
1353 | offset += copy; | |
1354 | to += copy; | |
1355 | pos += copy; | |
1356 | } | |
1357 | start = end; | |
1358 | } | |
1359 | } | |
1360 | BUG_ON(len); | |
1361 | return csum; | |
1362 | } | |
1363 | ||
1364 | void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) | |
1365 | { | |
1366 | __wsum csum; | |
1367 | long csstart; | |
1368 | ||
1369 | if (skb->ip_summed == CHECKSUM_PARTIAL) | |
1370 | csstart = skb->csum_start - skb_headroom(skb); | |
1371 | else | |
1372 | csstart = skb_headlen(skb); | |
1373 | ||
1374 | BUG_ON(csstart > skb_headlen(skb)); | |
1375 | ||
1376 | skb_copy_from_linear_data(skb, to, csstart); | |
1377 | ||
1378 | csum = 0; | |
1379 | if (csstart != skb->len) | |
1380 | csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, | |
1381 | skb->len - csstart, 0); | |
1382 | ||
1383 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | |
1384 | long csstuff = csstart + skb->csum_offset; | |
1385 | ||
1386 | *((__sum16 *)(to + csstuff)) = csum_fold(csum); | |
1387 | } | |
1388 | } | |
1389 | ||
1390 | /** | |
1391 | * skb_dequeue - remove from the head of the queue | |
1392 | * @list: list to dequeue from | |
1393 | * | |
1394 | * Remove the head of the list. The list lock is taken so the function | |
1395 | * may be used safely with other locking list functions. The head item is | |
1396 | * returned or %NULL if the list is empty. | |
1397 | */ | |
1398 | ||
1399 | struct sk_buff *skb_dequeue(struct sk_buff_head *list) | |
1400 | { | |
1401 | unsigned long flags; | |
1402 | struct sk_buff *result; | |
1403 | ||
1404 | spin_lock_irqsave(&list->lock, flags); | |
1405 | result = __skb_dequeue(list); | |
1406 | spin_unlock_irqrestore(&list->lock, flags); | |
1407 | return result; | |
1408 | } | |
1409 | ||
1410 | /** | |
1411 | * skb_dequeue_tail - remove from the tail of the queue | |
1412 | * @list: list to dequeue from | |
1413 | * | |
1414 | * Remove the tail of the list. The list lock is taken so the function | |
1415 | * may be used safely with other locking list functions. The tail item is | |
1416 | * returned or %NULL if the list is empty. | |
1417 | */ | |
1418 | struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) | |
1419 | { | |
1420 | unsigned long flags; | |
1421 | struct sk_buff *result; | |
1422 | ||
1423 | spin_lock_irqsave(&list->lock, flags); | |
1424 | result = __skb_dequeue_tail(list); | |
1425 | spin_unlock_irqrestore(&list->lock, flags); | |
1426 | return result; | |
1427 | } | |
1428 | ||
1429 | /** | |
1430 | * skb_queue_purge - empty a list | |
1431 | * @list: list to empty | |
1432 | * | |
1433 | * Delete all buffers on an &sk_buff list. Each buffer is removed from | |
1434 | * the list and one reference dropped. This function takes the list | |
1435 | * lock and is atomic with respect to other list locking functions. | |
1436 | */ | |
1437 | void skb_queue_purge(struct sk_buff_head *list) | |
1438 | { | |
1439 | struct sk_buff *skb; | |
1440 | while ((skb = skb_dequeue(list)) != NULL) | |
1441 | kfree_skb(skb); | |
1442 | } | |
1443 | ||
1444 | /** | |
1445 | * skb_queue_head - queue a buffer at the list head | |
1446 | * @list: list to use | |
1447 | * @newsk: buffer to queue | |
1448 | * | |
1449 | * Queue a buffer at the start of the list. This function takes the | |
1450 | * list lock and can be used safely with other locking &sk_buff functions | |
1451 | * safely. | |
1452 | * | |
1453 | * A buffer cannot be placed on two lists at the same time. | |
1454 | */ | |
1455 | void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) | |
1456 | { | |
1457 | unsigned long flags; | |
1458 | ||
1459 | spin_lock_irqsave(&list->lock, flags); | |
1460 | __skb_queue_head(list, newsk); | |
1461 | spin_unlock_irqrestore(&list->lock, flags); | |
1462 | } | |
1463 | ||
1464 | /** | |
1465 | * skb_queue_tail - queue a buffer at the list tail | |
1466 | * @list: list to use | |
1467 | * @newsk: buffer to queue | |
1468 | * | |
1469 | * Queue a buffer at the tail of the list. This function takes the | |
1470 | * list lock and can be used safely with other locking &sk_buff functions | |
1471 | * safely. | |
1472 | * | |
1473 | * A buffer cannot be placed on two lists at the same time. | |
1474 | */ | |
1475 | void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) | |
1476 | { | |
1477 | unsigned long flags; | |
1478 | ||
1479 | spin_lock_irqsave(&list->lock, flags); | |
1480 | __skb_queue_tail(list, newsk); | |
1481 | spin_unlock_irqrestore(&list->lock, flags); | |
1482 | } | |
1483 | ||
1484 | /** | |
1485 | * skb_unlink - remove a buffer from a list | |
1486 | * @skb: buffer to remove | |
1487 | * @list: list to use | |
1488 | * | |
1489 | * Remove a packet from a list. The list locks are taken and this | |
1490 | * function is atomic with respect to other list locked calls | |
1491 | * | |
1492 | * You must know what list the SKB is on. | |
1493 | */ | |
1494 | void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) | |
1495 | { | |
1496 | unsigned long flags; | |
1497 | ||
1498 | spin_lock_irqsave(&list->lock, flags); | |
1499 | __skb_unlink(skb, list); | |
1500 | spin_unlock_irqrestore(&list->lock, flags); | |
1501 | } | |
1502 | ||
1503 | /** | |
1504 | * skb_append - append a buffer | |
1505 | * @old: buffer to insert after | |
1506 | * @newsk: buffer to insert | |
1507 | * @list: list to use | |
1508 | * | |
1509 | * Place a packet after a given packet in a list. The list locks are taken | |
1510 | * and this function is atomic with respect to other list locked calls. | |
1511 | * A buffer cannot be placed on two lists at the same time. | |
1512 | */ | |
1513 | void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) | |
1514 | { | |
1515 | unsigned long flags; | |
1516 | ||
1517 | spin_lock_irqsave(&list->lock, flags); | |
1518 | __skb_append(old, newsk, list); | |
1519 | spin_unlock_irqrestore(&list->lock, flags); | |
1520 | } | |
1521 | ||
1522 | ||
1523 | /** | |
1524 | * skb_insert - insert a buffer | |
1525 | * @old: buffer to insert before | |
1526 | * @newsk: buffer to insert | |
1527 | * @list: list to use | |
1528 | * | |
1529 | * Place a packet before a given packet in a list. The list locks are | |
1530 | * taken and this function is atomic with respect to other list locked | |
1531 | * calls. | |
1532 | * | |
1533 | * A buffer cannot be placed on two lists at the same time. | |
1534 | */ | |
1535 | void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) | |
1536 | { | |
1537 | unsigned long flags; | |
1538 | ||
1539 | spin_lock_irqsave(&list->lock, flags); | |
1540 | __skb_insert(newsk, old->prev, old, list); | |
1541 | spin_unlock_irqrestore(&list->lock, flags); | |
1542 | } | |
1543 | ||
1544 | static inline void skb_split_inside_header(struct sk_buff *skb, | |
1545 | struct sk_buff* skb1, | |
1546 | const u32 len, const int pos) | |
1547 | { | |
1548 | int i; | |
1549 | ||
1550 | skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len), | |
1551 | pos - len); | |
1552 | /* And move data appendix as is. */ | |
1553 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) | |
1554 | skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; | |
1555 | ||
1556 | skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; | |
1557 | skb_shinfo(skb)->nr_frags = 0; | |
1558 | skb1->data_len = skb->data_len; | |
1559 | skb1->len += skb1->data_len; | |
1560 | skb->data_len = 0; | |
1561 | skb->len = len; | |
1562 | skb_set_tail_pointer(skb, len); | |
1563 | } | |
1564 | ||
1565 | static inline void skb_split_no_header(struct sk_buff *skb, | |
1566 | struct sk_buff* skb1, | |
1567 | const u32 len, int pos) | |
1568 | { | |
1569 | int i, k = 0; | |
1570 | const int nfrags = skb_shinfo(skb)->nr_frags; | |
1571 | ||
1572 | skb_shinfo(skb)->nr_frags = 0; | |
1573 | skb1->len = skb1->data_len = skb->len - len; | |
1574 | skb->len = len; | |
1575 | skb->data_len = len - pos; | |
1576 | ||
1577 | for (i = 0; i < nfrags; i++) { | |
1578 | int size = skb_shinfo(skb)->frags[i].size; | |
1579 | ||
1580 | if (pos + size > len) { | |
1581 | skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; | |
1582 | ||
1583 | if (pos < len) { | |
1584 | /* Split frag. | |
1585 | * We have two variants in this case: | |
1586 | * 1. Move all the frag to the second | |
1587 | * part, if it is possible. F.e. | |
1588 | * this approach is mandatory for TUX, | |
1589 | * where splitting is expensive. | |
1590 | * 2. Split is accurately. We make this. | |
1591 | */ | |
1592 | get_page(skb_shinfo(skb)->frags[i].page); | |
1593 | skb_shinfo(skb1)->frags[0].page_offset += len - pos; | |
1594 | skb_shinfo(skb1)->frags[0].size -= len - pos; | |
1595 | skb_shinfo(skb)->frags[i].size = len - pos; | |
1596 | skb_shinfo(skb)->nr_frags++; | |
1597 | } | |
1598 | k++; | |
1599 | } else | |
1600 | skb_shinfo(skb)->nr_frags++; | |
1601 | pos += size; | |
1602 | } | |
1603 | skb_shinfo(skb1)->nr_frags = k; | |
1604 | } | |
1605 | ||
1606 | /** | |
1607 | * skb_split - Split fragmented skb to two parts at length len. | |
1608 | * @skb: the buffer to split | |
1609 | * @skb1: the buffer to receive the second part | |
1610 | * @len: new length for skb | |
1611 | */ | |
1612 | void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) | |
1613 | { | |
1614 | int pos = skb_headlen(skb); | |
1615 | ||
1616 | if (len < pos) /* Split line is inside header. */ | |
1617 | skb_split_inside_header(skb, skb1, len, pos); | |
1618 | else /* Second chunk has no header, nothing to copy. */ | |
1619 | skb_split_no_header(skb, skb1, len, pos); | |
1620 | } | |
1621 | ||
1622 | /** | |
1623 | * skb_prepare_seq_read - Prepare a sequential read of skb data | |
1624 | * @skb: the buffer to read | |
1625 | * @from: lower offset of data to be read | |
1626 | * @to: upper offset of data to be read | |
1627 | * @st: state variable | |
1628 | * | |
1629 | * Initializes the specified state variable. Must be called before | |
1630 | * invoking skb_seq_read() for the first time. | |
1631 | */ | |
1632 | void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, | |
1633 | unsigned int to, struct skb_seq_state *st) | |
1634 | { | |
1635 | st->lower_offset = from; | |
1636 | st->upper_offset = to; | |
1637 | st->root_skb = st->cur_skb = skb; | |
1638 | st->frag_idx = st->stepped_offset = 0; | |
1639 | st->frag_data = NULL; | |
1640 | } | |
1641 | ||
1642 | /** | |
1643 | * skb_seq_read - Sequentially read skb data | |
1644 | * @consumed: number of bytes consumed by the caller so far | |
1645 | * @data: destination pointer for data to be returned | |
1646 | * @st: state variable | |
1647 | * | |
1648 | * Reads a block of skb data at &consumed relative to the | |
1649 | * lower offset specified to skb_prepare_seq_read(). Assigns | |
1650 | * the head of the data block to &data and returns the length | |
1651 | * of the block or 0 if the end of the skb data or the upper | |
1652 | * offset has been reached. | |
1653 | * | |
1654 | * The caller is not required to consume all of the data | |
1655 | * returned, i.e. &consumed is typically set to the number | |
1656 | * of bytes already consumed and the next call to | |
1657 | * skb_seq_read() will return the remaining part of the block. | |
1658 | * | |
1659 | * Note: The size of each block of data returned can be arbitary, | |
1660 | * this limitation is the cost for zerocopy seqeuental | |
1661 | * reads of potentially non linear data. | |
1662 | * | |
1663 | * Note: Fragment lists within fragments are not implemented | |
1664 | * at the moment, state->root_skb could be replaced with | |
1665 | * a stack for this purpose. | |
1666 | */ | |
1667 | unsigned int skb_seq_read(unsigned int consumed, const u8 **data, | |
1668 | struct skb_seq_state *st) | |
1669 | { | |
1670 | unsigned int block_limit, abs_offset = consumed + st->lower_offset; | |
1671 | skb_frag_t *frag; | |
1672 | ||
1673 | if (unlikely(abs_offset >= st->upper_offset)) | |
1674 | return 0; | |
1675 | ||
1676 | next_skb: | |
1677 | block_limit = skb_headlen(st->cur_skb); | |
1678 | ||
1679 | if (abs_offset < block_limit) { | |
1680 | *data = st->cur_skb->data + abs_offset; | |
1681 | return block_limit - abs_offset; | |
1682 | } | |
1683 | ||
1684 | if (st->frag_idx == 0 && !st->frag_data) | |
1685 | st->stepped_offset += skb_headlen(st->cur_skb); | |
1686 | ||
1687 | while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { | |
1688 | frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; | |
1689 | block_limit = frag->size + st->stepped_offset; | |
1690 | ||
1691 | if (abs_offset < block_limit) { | |
1692 | if (!st->frag_data) | |
1693 | st->frag_data = kmap_skb_frag(frag); | |
1694 | ||
1695 | *data = (u8 *) st->frag_data + frag->page_offset + | |
1696 | (abs_offset - st->stepped_offset); | |
1697 | ||
1698 | return block_limit - abs_offset; | |
1699 | } | |
1700 | ||
1701 | if (st->frag_data) { | |
1702 | kunmap_skb_frag(st->frag_data); | |
1703 | st->frag_data = NULL; | |
1704 | } | |
1705 | ||
1706 | st->frag_idx++; | |
1707 | st->stepped_offset += frag->size; | |
1708 | } | |
1709 | ||
1710 | if (st->cur_skb->next) { | |
1711 | st->cur_skb = st->cur_skb->next; | |
1712 | st->frag_idx = 0; | |
1713 | goto next_skb; | |
1714 | } else if (st->root_skb == st->cur_skb && | |
1715 | skb_shinfo(st->root_skb)->frag_list) { | |
1716 | st->cur_skb = skb_shinfo(st->root_skb)->frag_list; | |
1717 | goto next_skb; | |
1718 | } | |
1719 | ||
1720 | return 0; | |
1721 | } | |
1722 | ||
1723 | /** | |
1724 | * skb_abort_seq_read - Abort a sequential read of skb data | |
1725 | * @st: state variable | |
1726 | * | |
1727 | * Must be called if skb_seq_read() was not called until it | |
1728 | * returned 0. | |
1729 | */ | |
1730 | void skb_abort_seq_read(struct skb_seq_state *st) | |
1731 | { | |
1732 | if (st->frag_data) | |
1733 | kunmap_skb_frag(st->frag_data); | |
1734 | } | |
1735 | ||
1736 | #define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) | |
1737 | ||
1738 | static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text, | |
1739 | struct ts_config *conf, | |
1740 | struct ts_state *state) | |
1741 | { | |
1742 | return skb_seq_read(offset, text, TS_SKB_CB(state)); | |
1743 | } | |
1744 | ||
1745 | static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) | |
1746 | { | |
1747 | skb_abort_seq_read(TS_SKB_CB(state)); | |
1748 | } | |
1749 | ||
1750 | /** | |
1751 | * skb_find_text - Find a text pattern in skb data | |
1752 | * @skb: the buffer to look in | |
1753 | * @from: search offset | |
1754 | * @to: search limit | |
1755 | * @config: textsearch configuration | |
1756 | * @state: uninitialized textsearch state variable | |
1757 | * | |
1758 | * Finds a pattern in the skb data according to the specified | |
1759 | * textsearch configuration. Use textsearch_next() to retrieve | |
1760 | * subsequent occurrences of the pattern. Returns the offset | |
1761 | * to the first occurrence or UINT_MAX if no match was found. | |
1762 | */ | |
1763 | unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, | |
1764 | unsigned int to, struct ts_config *config, | |
1765 | struct ts_state *state) | |
1766 | { | |
1767 | unsigned int ret; | |
1768 | ||
1769 | config->get_next_block = skb_ts_get_next_block; | |
1770 | config->finish = skb_ts_finish; | |
1771 | ||
1772 | skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state)); | |
1773 | ||
1774 | ret = textsearch_find(config, state); | |
1775 | return (ret <= to - from ? ret : UINT_MAX); | |
1776 | } | |
1777 | ||
1778 | /** | |
1779 | * skb_append_datato_frags: - append the user data to a skb | |
1780 | * @sk: sock structure | |
1781 | * @skb: skb structure to be appened with user data. | |
1782 | * @getfrag: call back function to be used for getting the user data | |
1783 | * @from: pointer to user message iov | |
1784 | * @length: length of the iov message | |
1785 | * | |
1786 | * Description: This procedure append the user data in the fragment part | |
1787 | * of the skb if any page alloc fails user this procedure returns -ENOMEM | |
1788 | */ | |
1789 | int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, | |
1790 | int (*getfrag)(void *from, char *to, int offset, | |
1791 | int len, int odd, struct sk_buff *skb), | |
1792 | void *from, int length) | |
1793 | { | |
1794 | int frg_cnt = 0; | |
1795 | skb_frag_t *frag = NULL; | |
1796 | struct page *page = NULL; | |
1797 | int copy, left; | |
1798 | int offset = 0; | |
1799 | int ret; | |
1800 | ||
1801 | do { | |
1802 | /* Return error if we don't have space for new frag */ | |
1803 | frg_cnt = skb_shinfo(skb)->nr_frags; | |
1804 | if (frg_cnt >= MAX_SKB_FRAGS) | |
1805 | return -EFAULT; | |
1806 | ||
1807 | /* allocate a new page for next frag */ | |
1808 | page = alloc_pages(sk->sk_allocation, 0); | |
1809 | ||
1810 | /* If alloc_page fails just return failure and caller will | |
1811 | * free previous allocated pages by doing kfree_skb() | |
1812 | */ | |
1813 | if (page == NULL) | |
1814 | return -ENOMEM; | |
1815 | ||
1816 | /* initialize the next frag */ | |
1817 | sk->sk_sndmsg_page = page; | |
1818 | sk->sk_sndmsg_off = 0; | |
1819 | skb_fill_page_desc(skb, frg_cnt, page, 0, 0); | |
1820 | skb->truesize += PAGE_SIZE; | |
1821 | atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); | |
1822 | ||
1823 | /* get the new initialized frag */ | |
1824 | frg_cnt = skb_shinfo(skb)->nr_frags; | |
1825 | frag = &skb_shinfo(skb)->frags[frg_cnt - 1]; | |
1826 | ||
1827 | /* copy the user data to page */ | |
1828 | left = PAGE_SIZE - frag->page_offset; | |
1829 | copy = (length > left)? left : length; | |
1830 | ||
1831 | ret = getfrag(from, (page_address(frag->page) + | |
1832 | frag->page_offset + frag->size), | |
1833 | offset, copy, 0, skb); | |
1834 | if (ret < 0) | |
1835 | return -EFAULT; | |
1836 | ||
1837 | /* copy was successful so update the size parameters */ | |
1838 | sk->sk_sndmsg_off += copy; | |
1839 | frag->size += copy; | |
1840 | skb->len += copy; | |
1841 | skb->data_len += copy; | |
1842 | offset += copy; | |
1843 | length -= copy; | |
1844 | ||
1845 | } while (length > 0); | |
1846 | ||
1847 | return 0; | |
1848 | } | |
1849 | ||
1850 | /** | |
1851 | * skb_pull_rcsum - pull skb and update receive checksum | |
1852 | * @skb: buffer to update | |
1853 | * @start: start of data before pull | |
1854 | * @len: length of data pulled | |
1855 | * | |
1856 | * This function performs an skb_pull on the packet and updates | |
1857 | * update the CHECKSUM_COMPLETE checksum. It should be used on | |
1858 | * receive path processing instead of skb_pull unless you know | |
1859 | * that the checksum difference is zero (e.g., a valid IP header) | |
1860 | * or you are setting ip_summed to CHECKSUM_NONE. | |
1861 | */ | |
1862 | unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) | |
1863 | { | |
1864 | BUG_ON(len > skb->len); | |
1865 | skb->len -= len; | |
1866 | BUG_ON(skb->len < skb->data_len); | |
1867 | skb_postpull_rcsum(skb, skb->data, len); | |
1868 | return skb->data += len; | |
1869 | } | |
1870 | ||
1871 | EXPORT_SYMBOL_GPL(skb_pull_rcsum); | |
1872 | ||
1873 | /** | |
1874 | * skb_segment - Perform protocol segmentation on skb. | |
1875 | * @skb: buffer to segment | |
1876 | * @features: features for the output path (see dev->features) | |
1877 | * | |
1878 | * This function performs segmentation on the given skb. It returns | |
1879 | * the segment at the given position. It returns NULL if there are | |
1880 | * no more segments to generate, or when an error is encountered. | |
1881 | */ | |
1882 | struct sk_buff *skb_segment(struct sk_buff *skb, int features) | |
1883 | { | |
1884 | struct sk_buff *segs = NULL; | |
1885 | struct sk_buff *tail = NULL; | |
1886 | unsigned int mss = skb_shinfo(skb)->gso_size; | |
1887 | unsigned int doffset = skb->data - skb_mac_header(skb); | |
1888 | unsigned int offset = doffset; | |
1889 | unsigned int headroom; | |
1890 | unsigned int len; | |
1891 | int sg = features & NETIF_F_SG; | |
1892 | int nfrags = skb_shinfo(skb)->nr_frags; | |
1893 | int err = -ENOMEM; | |
1894 | int i = 0; | |
1895 | int pos; | |
1896 | ||
1897 | __skb_push(skb, doffset); | |
1898 | headroom = skb_headroom(skb); | |
1899 | pos = skb_headlen(skb); | |
1900 | ||
1901 | do { | |
1902 | struct sk_buff *nskb; | |
1903 | skb_frag_t *frag; | |
1904 | int hsize; | |
1905 | int k; | |
1906 | int size; | |
1907 | ||
1908 | len = skb->len - offset; | |
1909 | if (len > mss) | |
1910 | len = mss; | |
1911 | ||
1912 | hsize = skb_headlen(skb) - offset; | |
1913 | if (hsize < 0) | |
1914 | hsize = 0; | |
1915 | if (hsize > len || !sg) | |
1916 | hsize = len; | |
1917 | ||
1918 | nskb = alloc_skb(hsize + doffset + headroom, GFP_ATOMIC); | |
1919 | if (unlikely(!nskb)) | |
1920 | goto err; | |
1921 | ||
1922 | if (segs) | |
1923 | tail->next = nskb; | |
1924 | else | |
1925 | segs = nskb; | |
1926 | tail = nskb; | |
1927 | ||
1928 | nskb->dev = skb->dev; | |
1929 | nskb->priority = skb->priority; | |
1930 | nskb->protocol = skb->protocol; | |
1931 | nskb->dst = dst_clone(skb->dst); | |
1932 | memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); | |
1933 | nskb->pkt_type = skb->pkt_type; | |
1934 | nskb->mac_len = skb->mac_len; | |
1935 | ||
1936 | skb_reserve(nskb, headroom); | |
1937 | skb_reset_mac_header(nskb); | |
1938 | skb_set_network_header(nskb, skb->mac_len); | |
1939 | nskb->transport_header = (nskb->network_header + | |
1940 | skb_network_header_len(skb)); | |
1941 | skb_copy_from_linear_data(skb, skb_put(nskb, doffset), | |
1942 | doffset); | |
1943 | if (!sg) { | |
1944 | nskb->csum = skb_copy_and_csum_bits(skb, offset, | |
1945 | skb_put(nskb, len), | |
1946 | len, 0); | |
1947 | continue; | |
1948 | } | |
1949 | ||
1950 | frag = skb_shinfo(nskb)->frags; | |
1951 | k = 0; | |
1952 | ||
1953 | nskb->ip_summed = CHECKSUM_PARTIAL; | |
1954 | nskb->csum = skb->csum; | |
1955 | skb_copy_from_linear_data_offset(skb, offset, | |
1956 | skb_put(nskb, hsize), hsize); | |
1957 | ||
1958 | while (pos < offset + len) { | |
1959 | BUG_ON(i >= nfrags); | |
1960 | ||
1961 | *frag = skb_shinfo(skb)->frags[i]; | |
1962 | get_page(frag->page); | |
1963 | size = frag->size; | |
1964 | ||
1965 | if (pos < offset) { | |
1966 | frag->page_offset += offset - pos; | |
1967 | frag->size -= offset - pos; | |
1968 | } | |
1969 | ||
1970 | k++; | |
1971 | ||
1972 | if (pos + size <= offset + len) { | |
1973 | i++; | |
1974 | pos += size; | |
1975 | } else { | |
1976 | frag->size -= pos + size - (offset + len); | |
1977 | break; | |
1978 | } | |
1979 | ||
1980 | frag++; | |
1981 | } | |
1982 | ||
1983 | skb_shinfo(nskb)->nr_frags = k; | |
1984 | nskb->data_len = len - hsize; | |
1985 | nskb->len += nskb->data_len; | |
1986 | nskb->truesize += nskb->data_len; | |
1987 | } while ((offset += len) < skb->len); | |
1988 | ||
1989 | return segs; | |
1990 | ||
1991 | err: | |
1992 | while ((skb = segs)) { | |
1993 | segs = skb->next; | |
1994 | kfree_skb(skb); | |
1995 | } | |
1996 | return ERR_PTR(err); | |
1997 | } | |
1998 | ||
1999 | EXPORT_SYMBOL_GPL(skb_segment); | |
2000 | ||
2001 | void __init skb_init(void) | |
2002 | { | |
2003 | skbuff_head_cache = kmem_cache_create("skbuff_head_cache", | |
2004 | sizeof(struct sk_buff), | |
2005 | 0, | |
2006 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, | |
2007 | NULL, NULL); | |
2008 | skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", | |
2009 | (2*sizeof(struct sk_buff)) + | |
2010 | sizeof(atomic_t), | |
2011 | 0, | |
2012 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, | |
2013 | NULL, NULL); | |
2014 | } | |
2015 | ||
2016 | /** | |
2017 | * skb_to_sgvec - Fill a scatter-gather list from a socket buffer | |
2018 | * @skb: Socket buffer containing the buffers to be mapped | |
2019 | * @sg: The scatter-gather list to map into | |
2020 | * @offset: The offset into the buffer's contents to start mapping | |
2021 | * @len: Length of buffer space to be mapped | |
2022 | * | |
2023 | * Fill the specified scatter-gather list with mappings/pointers into a | |
2024 | * region of the buffer space attached to a socket buffer. | |
2025 | */ | |
2026 | int | |
2027 | skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) | |
2028 | { | |
2029 | int start = skb_headlen(skb); | |
2030 | int i, copy = start - offset; | |
2031 | int elt = 0; | |
2032 | ||
2033 | if (copy > 0) { | |
2034 | if (copy > len) | |
2035 | copy = len; | |
2036 | sg[elt].page = virt_to_page(skb->data + offset); | |
2037 | sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE; | |
2038 | sg[elt].length = copy; | |
2039 | elt++; | |
2040 | if ((len -= copy) == 0) | |
2041 | return elt; | |
2042 | offset += copy; | |
2043 | } | |
2044 | ||
2045 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | |
2046 | int end; | |
2047 | ||
2048 | BUG_TRAP(start <= offset + len); | |
2049 | ||
2050 | end = start + skb_shinfo(skb)->frags[i].size; | |
2051 | if ((copy = end - offset) > 0) { | |
2052 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; | |
2053 | ||
2054 | if (copy > len) | |
2055 | copy = len; | |
2056 | sg[elt].page = frag->page; | |
2057 | sg[elt].offset = frag->page_offset+offset-start; | |
2058 | sg[elt].length = copy; | |
2059 | elt++; | |
2060 | if (!(len -= copy)) | |
2061 | return elt; | |
2062 | offset += copy; | |
2063 | } | |
2064 | start = end; | |
2065 | } | |
2066 | ||
2067 | if (skb_shinfo(skb)->frag_list) { | |
2068 | struct sk_buff *list = skb_shinfo(skb)->frag_list; | |
2069 | ||
2070 | for (; list; list = list->next) { | |
2071 | int end; | |
2072 | ||
2073 | BUG_TRAP(start <= offset + len); | |
2074 | ||
2075 | end = start + list->len; | |
2076 | if ((copy = end - offset) > 0) { | |
2077 | if (copy > len) | |
2078 | copy = len; | |
2079 | elt += skb_to_sgvec(list, sg+elt, offset - start, copy); | |
2080 | if ((len -= copy) == 0) | |
2081 | return elt; | |
2082 | offset += copy; | |
2083 | } | |
2084 | start = end; | |
2085 | } | |
2086 | } | |
2087 | BUG_ON(len); | |
2088 | return elt; | |
2089 | } | |
2090 | ||
2091 | /** | |
2092 | * skb_cow_data - Check that a socket buffer's data buffers are writable | |
2093 | * @skb: The socket buffer to check. | |
2094 | * @tailbits: Amount of trailing space to be added | |
2095 | * @trailer: Returned pointer to the skb where the @tailbits space begins | |
2096 | * | |
2097 | * Make sure that the data buffers attached to a socket buffer are | |
2098 | * writable. If they are not, private copies are made of the data buffers | |
2099 | * and the socket buffer is set to use these instead. | |
2100 | * | |
2101 | * If @tailbits is given, make sure that there is space to write @tailbits | |
2102 | * bytes of data beyond current end of socket buffer. @trailer will be | |
2103 | * set to point to the skb in which this space begins. | |
2104 | * | |
2105 | * The number of scatterlist elements required to completely map the | |
2106 | * COW'd and extended socket buffer will be returned. | |
2107 | */ | |
2108 | int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) | |
2109 | { | |
2110 | int copyflag; | |
2111 | int elt; | |
2112 | struct sk_buff *skb1, **skb_p; | |
2113 | ||
2114 | /* If skb is cloned or its head is paged, reallocate | |
2115 | * head pulling out all the pages (pages are considered not writable | |
2116 | * at the moment even if they are anonymous). | |
2117 | */ | |
2118 | if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) && | |
2119 | __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL) | |
2120 | return -ENOMEM; | |
2121 | ||
2122 | /* Easy case. Most of packets will go this way. */ | |
2123 | if (!skb_shinfo(skb)->frag_list) { | |
2124 | /* A little of trouble, not enough of space for trailer. | |
2125 | * This should not happen, when stack is tuned to generate | |
2126 | * good frames. OK, on miss we reallocate and reserve even more | |
2127 | * space, 128 bytes is fair. */ | |
2128 | ||
2129 | if (skb_tailroom(skb) < tailbits && | |
2130 | pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC)) | |
2131 | return -ENOMEM; | |
2132 | ||
2133 | /* Voila! */ | |
2134 | *trailer = skb; | |
2135 | return 1; | |
2136 | } | |
2137 | ||
2138 | /* Misery. We are in troubles, going to mincer fragments... */ | |
2139 | ||
2140 | elt = 1; | |
2141 | skb_p = &skb_shinfo(skb)->frag_list; | |
2142 | copyflag = 0; | |
2143 | ||
2144 | while ((skb1 = *skb_p) != NULL) { | |
2145 | int ntail = 0; | |
2146 | ||
2147 | /* The fragment is partially pulled by someone, | |
2148 | * this can happen on input. Copy it and everything | |
2149 | * after it. */ | |
2150 | ||
2151 | if (skb_shared(skb1)) | |
2152 | copyflag = 1; | |
2153 | ||
2154 | /* If the skb is the last, worry about trailer. */ | |
2155 | ||
2156 | if (skb1->next == NULL && tailbits) { | |
2157 | if (skb_shinfo(skb1)->nr_frags || | |
2158 | skb_shinfo(skb1)->frag_list || | |
2159 | skb_tailroom(skb1) < tailbits) | |
2160 | ntail = tailbits + 128; | |
2161 | } | |
2162 | ||
2163 | if (copyflag || | |
2164 | skb_cloned(skb1) || | |
2165 | ntail || | |
2166 | skb_shinfo(skb1)->nr_frags || | |
2167 | skb_shinfo(skb1)->frag_list) { | |
2168 | struct sk_buff *skb2; | |
2169 | ||
2170 | /* Fuck, we are miserable poor guys... */ | |
2171 | if (ntail == 0) | |
2172 | skb2 = skb_copy(skb1, GFP_ATOMIC); | |
2173 | else | |
2174 | skb2 = skb_copy_expand(skb1, | |
2175 | skb_headroom(skb1), | |
2176 | ntail, | |
2177 | GFP_ATOMIC); | |
2178 | if (unlikely(skb2 == NULL)) | |
2179 | return -ENOMEM; | |
2180 | ||
2181 | if (skb1->sk) | |
2182 | skb_set_owner_w(skb2, skb1->sk); | |
2183 | ||
2184 | /* Looking around. Are we still alive? | |
2185 | * OK, link new skb, drop old one */ | |
2186 | ||
2187 | skb2->next = skb1->next; | |
2188 | *skb_p = skb2; | |
2189 | kfree_skb(skb1); | |
2190 | skb1 = skb2; | |
2191 | } | |
2192 | elt++; | |
2193 | *trailer = skb1; | |
2194 | skb_p = &skb1->next; | |
2195 | } | |
2196 | ||
2197 | return elt; | |
2198 | } | |
2199 | ||
2200 | EXPORT_SYMBOL(___pskb_trim); | |
2201 | EXPORT_SYMBOL(__kfree_skb); | |
2202 | EXPORT_SYMBOL(kfree_skb); | |
2203 | EXPORT_SYMBOL(__pskb_pull_tail); | |
2204 | EXPORT_SYMBOL(__alloc_skb); | |
2205 | EXPORT_SYMBOL(__netdev_alloc_skb); | |
2206 | EXPORT_SYMBOL(pskb_copy); | |
2207 | EXPORT_SYMBOL(pskb_expand_head); | |
2208 | EXPORT_SYMBOL(skb_checksum); | |
2209 | EXPORT_SYMBOL(skb_clone); | |
2210 | EXPORT_SYMBOL(skb_clone_fraglist); | |
2211 | EXPORT_SYMBOL(skb_copy); | |
2212 | EXPORT_SYMBOL(skb_copy_and_csum_bits); | |
2213 | EXPORT_SYMBOL(skb_copy_and_csum_dev); | |
2214 | EXPORT_SYMBOL(skb_copy_bits); | |
2215 | EXPORT_SYMBOL(skb_copy_expand); | |
2216 | EXPORT_SYMBOL(skb_over_panic); | |
2217 | EXPORT_SYMBOL(skb_pad); | |
2218 | EXPORT_SYMBOL(skb_realloc_headroom); | |
2219 | EXPORT_SYMBOL(skb_under_panic); | |
2220 | EXPORT_SYMBOL(skb_dequeue); | |
2221 | EXPORT_SYMBOL(skb_dequeue_tail); | |
2222 | EXPORT_SYMBOL(skb_insert); | |
2223 | EXPORT_SYMBOL(skb_queue_purge); | |
2224 | EXPORT_SYMBOL(skb_queue_head); | |
2225 | EXPORT_SYMBOL(skb_queue_tail); | |
2226 | EXPORT_SYMBOL(skb_unlink); | |
2227 | EXPORT_SYMBOL(skb_append); | |
2228 | EXPORT_SYMBOL(skb_split); | |
2229 | EXPORT_SYMBOL(skb_prepare_seq_read); | |
2230 | EXPORT_SYMBOL(skb_seq_read); | |
2231 | EXPORT_SYMBOL(skb_abort_seq_read); | |
2232 | EXPORT_SYMBOL(skb_find_text); | |
2233 | EXPORT_SYMBOL(skb_append_datato_frags); | |
2234 | ||
2235 | EXPORT_SYMBOL_GPL(skb_to_sgvec); | |
2236 | EXPORT_SYMBOL_GPL(skb_cow_data); |