]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Routines having to do with the 'struct sk_buff' memory handlers. | |
3 | * | |
4 | * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> | |
5 | * Florian La Roche <rzsfl@rz.uni-sb.de> | |
6 | * | |
7 | * Fixes: | |
8 | * Alan Cox : Fixed the worst of the load | |
9 | * balancer bugs. | |
10 | * Dave Platt : Interrupt stacking fix. | |
11 | * Richard Kooijman : Timestamp fixes. | |
12 | * Alan Cox : Changed buffer format. | |
13 | * Alan Cox : destructor hook for AF_UNIX etc. | |
14 | * Linus Torvalds : Better skb_clone. | |
15 | * Alan Cox : Added skb_copy. | |
16 | * Alan Cox : Added all the changed routines Linus | |
17 | * only put in the headers | |
18 | * Ray VanTassle : Fixed --skb->lock in free | |
19 | * Alan Cox : skb_copy copy arp field | |
20 | * Andi Kleen : slabified it. | |
21 | * Robert Olsson : Removed skb_head_pool | |
22 | * | |
23 | * NOTE: | |
24 | * The __skb_ routines should be called with interrupts | |
25 | * disabled, or you better be *real* sure that the operation is atomic | |
26 | * with respect to whatever list is being frobbed (e.g. via lock_sock() | |
27 | * or via disabling bottom half handlers, etc). | |
28 | * | |
29 | * This program is free software; you can redistribute it and/or | |
30 | * modify it under the terms of the GNU General Public License | |
31 | * as published by the Free Software Foundation; either version | |
32 | * 2 of the License, or (at your option) any later version. | |
33 | */ | |
34 | ||
35 | /* | |
36 | * The functions in this file will not compile correctly with gcc 2.4.x | |
37 | */ | |
38 | ||
39 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
40 | ||
41 | #include <linux/module.h> | |
42 | #include <linux/types.h> | |
43 | #include <linux/kernel.h> | |
44 | #include <linux/kmemcheck.h> | |
45 | #include <linux/mm.h> | |
46 | #include <linux/interrupt.h> | |
47 | #include <linux/in.h> | |
48 | #include <linux/inet.h> | |
49 | #include <linux/slab.h> | |
50 | #include <linux/tcp.h> | |
51 | #include <linux/udp.h> | |
52 | #include <linux/netdevice.h> | |
53 | #ifdef CONFIG_NET_CLS_ACT | |
54 | #include <net/pkt_sched.h> | |
55 | #endif | |
56 | #include <linux/string.h> | |
57 | #include <linux/skbuff.h> | |
58 | #include <linux/splice.h> | |
59 | #include <linux/cache.h> | |
60 | #include <linux/rtnetlink.h> | |
61 | #include <linux/init.h> | |
62 | #include <linux/scatterlist.h> | |
63 | #include <linux/errqueue.h> | |
64 | #include <linux/prefetch.h> | |
65 | #include <linux/if_vlan.h> | |
66 | ||
67 | #include <net/protocol.h> | |
68 | #include <net/dst.h> | |
69 | #include <net/sock.h> | |
70 | #include <net/checksum.h> | |
71 | #include <net/ip6_checksum.h> | |
72 | #include <net/xfrm.h> | |
73 | ||
74 | #include <asm/uaccess.h> | |
75 | #include <trace/events/skb.h> | |
76 | #include <linux/highmem.h> | |
77 | #include <linux/capability.h> | |
78 | #include <linux/user_namespace.h> | |
79 | ||
80 | struct kmem_cache *skbuff_head_cache __read_mostly; | |
81 | static struct kmem_cache *skbuff_fclone_cache __read_mostly; | |
82 | ||
83 | /** | |
84 | * skb_panic - private function for out-of-line support | |
85 | * @skb: buffer | |
86 | * @sz: size | |
87 | * @addr: address | |
88 | * @msg: skb_over_panic or skb_under_panic | |
89 | * | |
90 | * Out-of-line support for skb_put() and skb_push(). | |
91 | * Called via the wrapper skb_over_panic() or skb_under_panic(). | |
92 | * Keep out of line to prevent kernel bloat. | |
93 | * __builtin_return_address is not used because it is not always reliable. | |
94 | */ | |
95 | static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr, | |
96 | const char msg[]) | |
97 | { | |
98 | pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n", | |
99 | msg, addr, skb->len, sz, skb->head, skb->data, | |
100 | (unsigned long)skb->tail, (unsigned long)skb->end, | |
101 | skb->dev ? skb->dev->name : "<NULL>"); | |
102 | BUG(); | |
103 | } | |
104 | ||
105 | static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr) | |
106 | { | |
107 | skb_panic(skb, sz, addr, __func__); | |
108 | } | |
109 | ||
110 | static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr) | |
111 | { | |
112 | skb_panic(skb, sz, addr, __func__); | |
113 | } | |
114 | ||
115 | /* | |
116 | * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells | |
117 | * the caller if emergency pfmemalloc reserves are being used. If it is and | |
118 | * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves | |
119 | * may be used. Otherwise, the packet data may be discarded until enough | |
120 | * memory is free | |
121 | */ | |
122 | #define kmalloc_reserve(size, gfp, node, pfmemalloc) \ | |
123 | __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc) | |
124 | ||
125 | static void *__kmalloc_reserve(size_t size, gfp_t flags, int node, | |
126 | unsigned long ip, bool *pfmemalloc) | |
127 | { | |
128 | void *obj; | |
129 | bool ret_pfmemalloc = false; | |
130 | ||
131 | /* | |
132 | * Try a regular allocation, when that fails and we're not entitled | |
133 | * to the reserves, fail. | |
134 | */ | |
135 | obj = kmalloc_node_track_caller(size, | |
136 | flags | __GFP_NOMEMALLOC | __GFP_NOWARN, | |
137 | node); | |
138 | if (obj || !(gfp_pfmemalloc_allowed(flags))) | |
139 | goto out; | |
140 | ||
141 | /* Try again but now we are using pfmemalloc reserves */ | |
142 | ret_pfmemalloc = true; | |
143 | obj = kmalloc_node_track_caller(size, flags, node); | |
144 | ||
145 | out: | |
146 | if (pfmemalloc) | |
147 | *pfmemalloc = ret_pfmemalloc; | |
148 | ||
149 | return obj; | |
150 | } | |
151 | ||
152 | /* Allocate a new skbuff. We do this ourselves so we can fill in a few | |
153 | * 'private' fields and also do memory statistics to find all the | |
154 | * [BEEP] leaks. | |
155 | * | |
156 | */ | |
157 | ||
158 | struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node) | |
159 | { | |
160 | struct sk_buff *skb; | |
161 | ||
162 | /* Get the HEAD */ | |
163 | skb = kmem_cache_alloc_node(skbuff_head_cache, | |
164 | gfp_mask & ~__GFP_DMA, node); | |
165 | if (!skb) | |
166 | goto out; | |
167 | ||
168 | /* | |
169 | * Only clear those fields we need to clear, not those that we will | |
170 | * actually initialise below. Hence, don't put any more fields after | |
171 | * the tail pointer in struct sk_buff! | |
172 | */ | |
173 | memset(skb, 0, offsetof(struct sk_buff, tail)); | |
174 | skb->head = NULL; | |
175 | skb->truesize = sizeof(struct sk_buff); | |
176 | atomic_set(&skb->users, 1); | |
177 | ||
178 | skb->mac_header = (typeof(skb->mac_header))~0U; | |
179 | out: | |
180 | return skb; | |
181 | } | |
182 | ||
183 | /** | |
184 | * __alloc_skb - allocate a network buffer | |
185 | * @size: size to allocate | |
186 | * @gfp_mask: allocation mask | |
187 | * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache | |
188 | * instead of head cache and allocate a cloned (child) skb. | |
189 | * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for | |
190 | * allocations in case the data is required for writeback | |
191 | * @node: numa node to allocate memory on | |
192 | * | |
193 | * Allocate a new &sk_buff. The returned buffer has no headroom and a | |
194 | * tail room of at least size bytes. The object has a reference count | |
195 | * of one. The return is the buffer. On a failure the return is %NULL. | |
196 | * | |
197 | * Buffers may only be allocated from interrupts using a @gfp_mask of | |
198 | * %GFP_ATOMIC. | |
199 | */ | |
200 | struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |
201 | int flags, int node) | |
202 | { | |
203 | struct kmem_cache *cache; | |
204 | struct skb_shared_info *shinfo; | |
205 | struct sk_buff *skb; | |
206 | u8 *data; | |
207 | bool pfmemalloc; | |
208 | ||
209 | cache = (flags & SKB_ALLOC_FCLONE) | |
210 | ? skbuff_fclone_cache : skbuff_head_cache; | |
211 | ||
212 | if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX)) | |
213 | gfp_mask |= __GFP_MEMALLOC; | |
214 | ||
215 | /* Get the HEAD */ | |
216 | skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); | |
217 | if (!skb) | |
218 | goto out; | |
219 | prefetchw(skb); | |
220 | ||
221 | /* We do our best to align skb_shared_info on a separate cache | |
222 | * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives | |
223 | * aligned memory blocks, unless SLUB/SLAB debug is enabled. | |
224 | * Both skb->head and skb_shared_info are cache line aligned. | |
225 | */ | |
226 | size = SKB_DATA_ALIGN(size); | |
227 | size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | |
228 | data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc); | |
229 | if (!data) | |
230 | goto nodata; | |
231 | /* kmalloc(size) might give us more room than requested. | |
232 | * Put skb_shared_info exactly at the end of allocated zone, | |
233 | * to allow max possible filling before reallocation. | |
234 | */ | |
235 | size = SKB_WITH_OVERHEAD(ksize(data)); | |
236 | prefetchw(data + size); | |
237 | ||
238 | /* | |
239 | * Only clear those fields we need to clear, not those that we will | |
240 | * actually initialise below. Hence, don't put any more fields after | |
241 | * the tail pointer in struct sk_buff! | |
242 | */ | |
243 | memset(skb, 0, offsetof(struct sk_buff, tail)); | |
244 | /* Account for allocated memory : skb + skb->head */ | |
245 | skb->truesize = SKB_TRUESIZE(size); | |
246 | skb->pfmemalloc = pfmemalloc; | |
247 | atomic_set(&skb->users, 1); | |
248 | skb->head = data; | |
249 | skb->data = data; | |
250 | skb_reset_tail_pointer(skb); | |
251 | skb->end = skb->tail + size; | |
252 | skb->mac_header = (typeof(skb->mac_header))~0U; | |
253 | skb->transport_header = (typeof(skb->transport_header))~0U; | |
254 | ||
255 | /* make sure we initialize shinfo sequentially */ | |
256 | shinfo = skb_shinfo(skb); | |
257 | memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); | |
258 | atomic_set(&shinfo->dataref, 1); | |
259 | kmemcheck_annotate_variable(shinfo->destructor_arg); | |
260 | ||
261 | if (flags & SKB_ALLOC_FCLONE) { | |
262 | struct sk_buff_fclones *fclones; | |
263 | ||
264 | fclones = container_of(skb, struct sk_buff_fclones, skb1); | |
265 | ||
266 | kmemcheck_annotate_bitfield(&fclones->skb2, flags1); | |
267 | skb->fclone = SKB_FCLONE_ORIG; | |
268 | atomic_set(&fclones->fclone_ref, 1); | |
269 | ||
270 | fclones->skb2.fclone = SKB_FCLONE_CLONE; | |
271 | fclones->skb2.pfmemalloc = pfmemalloc; | |
272 | } | |
273 | out: | |
274 | return skb; | |
275 | nodata: | |
276 | kmem_cache_free(cache, skb); | |
277 | skb = NULL; | |
278 | goto out; | |
279 | } | |
280 | EXPORT_SYMBOL(__alloc_skb); | |
281 | ||
282 | /** | |
283 | * __build_skb - build a network buffer | |
284 | * @data: data buffer provided by caller | |
285 | * @frag_size: size of data, or 0 if head was kmalloced | |
286 | * | |
287 | * Allocate a new &sk_buff. Caller provides space holding head and | |
288 | * skb_shared_info. @data must have been allocated by kmalloc() only if | |
289 | * @frag_size is 0, otherwise data should come from the page allocator | |
290 | * or vmalloc() | |
291 | * The return is the new skb buffer. | |
292 | * On a failure the return is %NULL, and @data is not freed. | |
293 | * Notes : | |
294 | * Before IO, driver allocates only data buffer where NIC put incoming frame | |
295 | * Driver should add room at head (NET_SKB_PAD) and | |
296 | * MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info)) | |
297 | * After IO, driver calls build_skb(), to allocate sk_buff and populate it | |
298 | * before giving packet to stack. | |
299 | * RX rings only contains data buffers, not full skbs. | |
300 | */ | |
301 | struct sk_buff *__build_skb(void *data, unsigned int frag_size) | |
302 | { | |
303 | struct skb_shared_info *shinfo; | |
304 | struct sk_buff *skb; | |
305 | unsigned int size = frag_size ? : ksize(data); | |
306 | ||
307 | skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC); | |
308 | if (!skb) | |
309 | return NULL; | |
310 | ||
311 | size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | |
312 | ||
313 | memset(skb, 0, offsetof(struct sk_buff, tail)); | |
314 | skb->truesize = SKB_TRUESIZE(size); | |
315 | atomic_set(&skb->users, 1); | |
316 | skb->head = data; | |
317 | skb->data = data; | |
318 | skb_reset_tail_pointer(skb); | |
319 | skb->end = skb->tail + size; | |
320 | skb->mac_header = (typeof(skb->mac_header))~0U; | |
321 | skb->transport_header = (typeof(skb->transport_header))~0U; | |
322 | ||
323 | /* make sure we initialize shinfo sequentially */ | |
324 | shinfo = skb_shinfo(skb); | |
325 | memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); | |
326 | atomic_set(&shinfo->dataref, 1); | |
327 | kmemcheck_annotate_variable(shinfo->destructor_arg); | |
328 | ||
329 | return skb; | |
330 | } | |
331 | ||
332 | /* build_skb() is wrapper over __build_skb(), that specifically | |
333 | * takes care of skb->head and skb->pfmemalloc | |
334 | * This means that if @frag_size is not zero, then @data must be backed | |
335 | * by a page fragment, not kmalloc() or vmalloc() | |
336 | */ | |
337 | struct sk_buff *build_skb(void *data, unsigned int frag_size) | |
338 | { | |
339 | struct sk_buff *skb = __build_skb(data, frag_size); | |
340 | ||
341 | if (skb && frag_size) { | |
342 | skb->head_frag = 1; | |
343 | if (virt_to_head_page(data)->pfmemalloc) | |
344 | skb->pfmemalloc = 1; | |
345 | } | |
346 | return skb; | |
347 | } | |
348 | EXPORT_SYMBOL(build_skb); | |
349 | ||
350 | struct netdev_alloc_cache { | |
351 | void * va; | |
352 | #if (PAGE_SIZE < NETDEV_FRAG_PAGE_MAX_SIZE) | |
353 | __u16 offset; | |
354 | __u16 size; | |
355 | #else | |
356 | __u32 offset; | |
357 | #endif | |
358 | /* we maintain a pagecount bias, so that we dont dirty cache line | |
359 | * containing page->_count every time we allocate a fragment. | |
360 | */ | |
361 | unsigned int pagecnt_bias; | |
362 | bool pfmemalloc; | |
363 | }; | |
364 | static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); | |
365 | static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache); | |
366 | ||
367 | static struct page *__page_frag_refill(struct netdev_alloc_cache *nc, | |
368 | gfp_t gfp_mask) | |
369 | { | |
370 | struct page *page = NULL; | |
371 | gfp_t gfp = gfp_mask; | |
372 | ||
373 | #if (PAGE_SIZE < NETDEV_FRAG_PAGE_MAX_SIZE) | |
374 | gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | | |
375 | __GFP_NOMEMALLOC; | |
376 | page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, | |
377 | NETDEV_FRAG_PAGE_MAX_ORDER); | |
378 | nc->size = page ? NETDEV_FRAG_PAGE_MAX_SIZE : PAGE_SIZE; | |
379 | #endif | |
380 | if (unlikely(!page)) | |
381 | page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); | |
382 | ||
383 | nc->va = page ? page_address(page) : NULL; | |
384 | ||
385 | return page; | |
386 | } | |
387 | ||
388 | static void *__alloc_page_frag(struct netdev_alloc_cache *nc, | |
389 | unsigned int fragsz, gfp_t gfp_mask) | |
390 | { | |
391 | unsigned int size = PAGE_SIZE; | |
392 | struct page *page; | |
393 | int offset; | |
394 | ||
395 | if (unlikely(!nc->va)) { | |
396 | refill: | |
397 | page = __page_frag_refill(nc, gfp_mask); | |
398 | if (!page) | |
399 | return NULL; | |
400 | ||
401 | #if (PAGE_SIZE < NETDEV_FRAG_PAGE_MAX_SIZE) | |
402 | /* if size can vary use size else just use PAGE_SIZE */ | |
403 | size = nc->size; | |
404 | #endif | |
405 | /* Even if we own the page, we do not use atomic_set(). | |
406 | * This would break get_page_unless_zero() users. | |
407 | */ | |
408 | atomic_add(size - 1, &page->_count); | |
409 | ||
410 | /* reset page count bias and offset to start of new frag */ | |
411 | nc->pfmemalloc = page->pfmemalloc; | |
412 | nc->pagecnt_bias = size; | |
413 | nc->offset = size; | |
414 | } | |
415 | ||
416 | offset = nc->offset - fragsz; | |
417 | if (unlikely(offset < 0)) { | |
418 | page = virt_to_page(nc->va); | |
419 | ||
420 | if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count)) | |
421 | goto refill; | |
422 | ||
423 | #if (PAGE_SIZE < NETDEV_FRAG_PAGE_MAX_SIZE) | |
424 | /* if size can vary use size else just use PAGE_SIZE */ | |
425 | size = nc->size; | |
426 | #endif | |
427 | /* OK, page count is 0, we can safely set it */ | |
428 | atomic_set(&page->_count, size); | |
429 | ||
430 | /* reset page count bias and offset to start of new frag */ | |
431 | nc->pagecnt_bias = size; | |
432 | offset = size - fragsz; | |
433 | } | |
434 | ||
435 | nc->pagecnt_bias--; | |
436 | nc->offset = offset; | |
437 | ||
438 | return nc->va + offset; | |
439 | } | |
440 | ||
441 | static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | |
442 | { | |
443 | struct netdev_alloc_cache *nc; | |
444 | unsigned long flags; | |
445 | void *data; | |
446 | ||
447 | local_irq_save(flags); | |
448 | nc = this_cpu_ptr(&netdev_alloc_cache); | |
449 | data = __alloc_page_frag(nc, fragsz, gfp_mask); | |
450 | local_irq_restore(flags); | |
451 | return data; | |
452 | } | |
453 | ||
454 | /** | |
455 | * netdev_alloc_frag - allocate a page fragment | |
456 | * @fragsz: fragment size | |
457 | * | |
458 | * Allocates a frag from a page for receive buffer. | |
459 | * Uses GFP_ATOMIC allocations. | |
460 | */ | |
461 | void *netdev_alloc_frag(unsigned int fragsz) | |
462 | { | |
463 | return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); | |
464 | } | |
465 | EXPORT_SYMBOL(netdev_alloc_frag); | |
466 | ||
467 | static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | |
468 | { | |
469 | struct netdev_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
470 | ||
471 | return __alloc_page_frag(nc, fragsz, gfp_mask); | |
472 | } | |
473 | ||
474 | void *napi_alloc_frag(unsigned int fragsz) | |
475 | { | |
476 | return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); | |
477 | } | |
478 | EXPORT_SYMBOL(napi_alloc_frag); | |
479 | ||
480 | /** | |
481 | * __netdev_alloc_skb - allocate an skbuff for rx on a specific device | |
482 | * @dev: network device to receive on | |
483 | * @length: length to allocate | |
484 | * @gfp_mask: get_free_pages mask, passed to alloc_skb | |
485 | * | |
486 | * Allocate a new &sk_buff and assign it a usage count of one. The | |
487 | * buffer has NET_SKB_PAD headroom built in. Users should allocate | |
488 | * the headroom they think they need without accounting for the | |
489 | * built in space. The built in space is used for optimisations. | |
490 | * | |
491 | * %NULL is returned if there is no free memory. | |
492 | */ | |
493 | struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, | |
494 | gfp_t gfp_mask) | |
495 | { | |
496 | struct netdev_alloc_cache *nc; | |
497 | unsigned long flags; | |
498 | struct sk_buff *skb; | |
499 | bool pfmemalloc; | |
500 | void *data; | |
501 | ||
502 | len += NET_SKB_PAD; | |
503 | ||
504 | if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || | |
505 | (gfp_mask & (__GFP_WAIT | GFP_DMA))) | |
506 | return __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); | |
507 | ||
508 | len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | |
509 | len = SKB_DATA_ALIGN(len); | |
510 | ||
511 | if (sk_memalloc_socks()) | |
512 | gfp_mask |= __GFP_MEMALLOC; | |
513 | ||
514 | local_irq_save(flags); | |
515 | ||
516 | nc = this_cpu_ptr(&netdev_alloc_cache); | |
517 | data = __alloc_page_frag(nc, len, gfp_mask); | |
518 | pfmemalloc = nc->pfmemalloc; | |
519 | ||
520 | local_irq_restore(flags); | |
521 | ||
522 | if (unlikely(!data)) | |
523 | return NULL; | |
524 | ||
525 | skb = __build_skb(data, len); | |
526 | if (unlikely(!skb)) { | |
527 | put_page(virt_to_head_page(data)); | |
528 | return NULL; | |
529 | } | |
530 | ||
531 | /* use OR instead of assignment to avoid clearing of bits in mask */ | |
532 | if (pfmemalloc) | |
533 | skb->pfmemalloc = 1; | |
534 | skb->head_frag = 1; | |
535 | ||
536 | skb_reserve(skb, NET_SKB_PAD); | |
537 | skb->dev = dev; | |
538 | ||
539 | return skb; | |
540 | } | |
541 | EXPORT_SYMBOL(__netdev_alloc_skb); | |
542 | ||
543 | /** | |
544 | * __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance | |
545 | * @napi: napi instance this buffer was allocated for | |
546 | * @length: length to allocate | |
547 | * @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages | |
548 | * | |
549 | * Allocate a new sk_buff for use in NAPI receive. This buffer will | |
550 | * attempt to allocate the head from a special reserved region used | |
551 | * only for NAPI Rx allocation. By doing this we can save several | |
552 | * CPU cycles by avoiding having to disable and re-enable IRQs. | |
553 | * | |
554 | * %NULL is returned if there is no free memory. | |
555 | */ | |
556 | struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, | |
557 | gfp_t gfp_mask) | |
558 | { | |
559 | struct netdev_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
560 | struct sk_buff *skb; | |
561 | void *data; | |
562 | ||
563 | len += NET_SKB_PAD + NET_IP_ALIGN; | |
564 | ||
565 | if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || | |
566 | (gfp_mask & (__GFP_WAIT | GFP_DMA))) | |
567 | return __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); | |
568 | ||
569 | len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | |
570 | len = SKB_DATA_ALIGN(len); | |
571 | ||
572 | if (sk_memalloc_socks()) | |
573 | gfp_mask |= __GFP_MEMALLOC; | |
574 | ||
575 | data = __alloc_page_frag(nc, len, gfp_mask); | |
576 | if (unlikely(!data)) | |
577 | return NULL; | |
578 | ||
579 | skb = __build_skb(data, len); | |
580 | if (unlikely(!skb)) { | |
581 | put_page(virt_to_head_page(data)); | |
582 | return NULL; | |
583 | } | |
584 | ||
585 | /* use OR instead of assignment to avoid clearing of bits in mask */ | |
586 | if (nc->pfmemalloc) | |
587 | skb->pfmemalloc = 1; | |
588 | skb->head_frag = 1; | |
589 | ||
590 | skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); | |
591 | skb->dev = napi->dev; | |
592 | ||
593 | return skb; | |
594 | } | |
595 | EXPORT_SYMBOL(__napi_alloc_skb); | |
596 | ||
597 | void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, | |
598 | int size, unsigned int truesize) | |
599 | { | |
600 | skb_fill_page_desc(skb, i, page, off, size); | |
601 | skb->len += size; | |
602 | skb->data_len += size; | |
603 | skb->truesize += truesize; | |
604 | } | |
605 | EXPORT_SYMBOL(skb_add_rx_frag); | |
606 | ||
607 | void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, | |
608 | unsigned int truesize) | |
609 | { | |
610 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; | |
611 | ||
612 | skb_frag_size_add(frag, size); | |
613 | skb->len += size; | |
614 | skb->data_len += size; | |
615 | skb->truesize += truesize; | |
616 | } | |
617 | EXPORT_SYMBOL(skb_coalesce_rx_frag); | |
618 | ||
619 | static void skb_drop_list(struct sk_buff **listp) | |
620 | { | |
621 | kfree_skb_list(*listp); | |
622 | *listp = NULL; | |
623 | } | |
624 | ||
625 | static inline void skb_drop_fraglist(struct sk_buff *skb) | |
626 | { | |
627 | skb_drop_list(&skb_shinfo(skb)->frag_list); | |
628 | } | |
629 | ||
630 | static void skb_clone_fraglist(struct sk_buff *skb) | |
631 | { | |
632 | struct sk_buff *list; | |
633 | ||
634 | skb_walk_frags(skb, list) | |
635 | skb_get(list); | |
636 | } | |
637 | ||
638 | static void skb_free_head(struct sk_buff *skb) | |
639 | { | |
640 | if (skb->head_frag) | |
641 | put_page(virt_to_head_page(skb->head)); | |
642 | else | |
643 | kfree(skb->head); | |
644 | } | |
645 | ||
646 | static void skb_release_data(struct sk_buff *skb) | |
647 | { | |
648 | struct skb_shared_info *shinfo = skb_shinfo(skb); | |
649 | int i; | |
650 | ||
651 | if (skb->cloned && | |
652 | atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, | |
653 | &shinfo->dataref)) | |
654 | return; | |
655 | ||
656 | for (i = 0; i < shinfo->nr_frags; i++) | |
657 | __skb_frag_unref(&shinfo->frags[i]); | |
658 | ||
659 | /* | |
660 | * If skb buf is from userspace, we need to notify the caller | |
661 | * the lower device DMA has done; | |
662 | */ | |
663 | if (shinfo->tx_flags & SKBTX_DEV_ZEROCOPY) { | |
664 | struct ubuf_info *uarg; | |
665 | ||
666 | uarg = shinfo->destructor_arg; | |
667 | if (uarg->callback) | |
668 | uarg->callback(uarg, true); | |
669 | } | |
670 | ||
671 | if (shinfo->frag_list) | |
672 | kfree_skb_list(shinfo->frag_list); | |
673 | ||
674 | skb_free_head(skb); | |
675 | } | |
676 | ||
677 | /* | |
678 | * Free an skbuff by memory without cleaning the state. | |
679 | */ | |
680 | static void kfree_skbmem(struct sk_buff *skb) | |
681 | { | |
682 | struct sk_buff_fclones *fclones; | |
683 | ||
684 | switch (skb->fclone) { | |
685 | case SKB_FCLONE_UNAVAILABLE: | |
686 | kmem_cache_free(skbuff_head_cache, skb); | |
687 | return; | |
688 | ||
689 | case SKB_FCLONE_ORIG: | |
690 | fclones = container_of(skb, struct sk_buff_fclones, skb1); | |
691 | ||
692 | /* We usually free the clone (TX completion) before original skb | |
693 | * This test would have no chance to be true for the clone, | |
694 | * while here, branch prediction will be good. | |
695 | */ | |
696 | if (atomic_read(&fclones->fclone_ref) == 1) | |
697 | goto fastpath; | |
698 | break; | |
699 | ||
700 | default: /* SKB_FCLONE_CLONE */ | |
701 | fclones = container_of(skb, struct sk_buff_fclones, skb2); | |
702 | break; | |
703 | } | |
704 | if (!atomic_dec_and_test(&fclones->fclone_ref)) | |
705 | return; | |
706 | fastpath: | |
707 | kmem_cache_free(skbuff_fclone_cache, fclones); | |
708 | } | |
709 | ||
710 | static void skb_release_head_state(struct sk_buff *skb) | |
711 | { | |
712 | skb_dst_drop(skb); | |
713 | #ifdef CONFIG_XFRM | |
714 | secpath_put(skb->sp); | |
715 | #endif | |
716 | if (skb->destructor) { | |
717 | WARN_ON(in_irq()); | |
718 | skb->destructor(skb); | |
719 | } | |
720 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) | |
721 | nf_conntrack_put(skb->nfct); | |
722 | #endif | |
723 | #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) | |
724 | nf_bridge_put(skb->nf_bridge); | |
725 | #endif | |
726 | } | |
727 | ||
728 | /* Free everything but the sk_buff shell. */ | |
729 | static void skb_release_all(struct sk_buff *skb) | |
730 | { | |
731 | skb_release_head_state(skb); | |
732 | if (likely(skb->head)) | |
733 | skb_release_data(skb); | |
734 | } | |
735 | ||
736 | /** | |
737 | * __kfree_skb - private function | |
738 | * @skb: buffer | |
739 | * | |
740 | * Free an sk_buff. Release anything attached to the buffer. | |
741 | * Clean the state. This is an internal helper function. Users should | |
742 | * always call kfree_skb | |
743 | */ | |
744 | ||
745 | void __kfree_skb(struct sk_buff *skb) | |
746 | { | |
747 | skb_release_all(skb); | |
748 | kfree_skbmem(skb); | |
749 | } | |
750 | EXPORT_SYMBOL(__kfree_skb); | |
751 | ||
752 | /** | |
753 | * kfree_skb - free an sk_buff | |
754 | * @skb: buffer to free | |
755 | * | |
756 | * Drop a reference to the buffer and free it if the usage count has | |
757 | * hit zero. | |
758 | */ | |
759 | void kfree_skb(struct sk_buff *skb) | |
760 | { | |
761 | if (unlikely(!skb)) | |
762 | return; | |
763 | if (likely(atomic_read(&skb->users) == 1)) | |
764 | smp_rmb(); | |
765 | else if (likely(!atomic_dec_and_test(&skb->users))) | |
766 | return; | |
767 | trace_kfree_skb(skb, __builtin_return_address(0)); | |
768 | __kfree_skb(skb); | |
769 | } | |
770 | EXPORT_SYMBOL(kfree_skb); | |
771 | ||
772 | void kfree_skb_list(struct sk_buff *segs) | |
773 | { | |
774 | while (segs) { | |
775 | struct sk_buff *next = segs->next; | |
776 | ||
777 | kfree_skb(segs); | |
778 | segs = next; | |
779 | } | |
780 | } | |
781 | EXPORT_SYMBOL(kfree_skb_list); | |
782 | ||
783 | /** | |
784 | * skb_tx_error - report an sk_buff xmit error | |
785 | * @skb: buffer that triggered an error | |
786 | * | |
787 | * Report xmit error if a device callback is tracking this skb. | |
788 | * skb must be freed afterwards. | |
789 | */ | |
790 | void skb_tx_error(struct sk_buff *skb) | |
791 | { | |
792 | if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { | |
793 | struct ubuf_info *uarg; | |
794 | ||
795 | uarg = skb_shinfo(skb)->destructor_arg; | |
796 | if (uarg->callback) | |
797 | uarg->callback(uarg, false); | |
798 | skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; | |
799 | } | |
800 | } | |
801 | EXPORT_SYMBOL(skb_tx_error); | |
802 | ||
803 | /** | |
804 | * consume_skb - free an skbuff | |
805 | * @skb: buffer to free | |
806 | * | |
807 | * Drop a ref to the buffer and free it if the usage count has hit zero | |
808 | * Functions identically to kfree_skb, but kfree_skb assumes that the frame | |
809 | * is being dropped after a failure and notes that | |
810 | */ | |
811 | void consume_skb(struct sk_buff *skb) | |
812 | { | |
813 | if (unlikely(!skb)) | |
814 | return; | |
815 | if (likely(atomic_read(&skb->users) == 1)) | |
816 | smp_rmb(); | |
817 | else if (likely(!atomic_dec_and_test(&skb->users))) | |
818 | return; | |
819 | trace_consume_skb(skb); | |
820 | __kfree_skb(skb); | |
821 | } | |
822 | EXPORT_SYMBOL(consume_skb); | |
823 | ||
824 | /* Make sure a field is enclosed inside headers_start/headers_end section */ | |
825 | #define CHECK_SKB_FIELD(field) \ | |
826 | BUILD_BUG_ON(offsetof(struct sk_buff, field) < \ | |
827 | offsetof(struct sk_buff, headers_start)); \ | |
828 | BUILD_BUG_ON(offsetof(struct sk_buff, field) > \ | |
829 | offsetof(struct sk_buff, headers_end)); \ | |
830 | ||
831 | static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |
832 | { | |
833 | new->tstamp = old->tstamp; | |
834 | /* We do not copy old->sk */ | |
835 | new->dev = old->dev; | |
836 | memcpy(new->cb, old->cb, sizeof(old->cb)); | |
837 | skb_dst_copy(new, old); | |
838 | #ifdef CONFIG_XFRM | |
839 | new->sp = secpath_get(old->sp); | |
840 | #endif | |
841 | __nf_copy(new, old, false); | |
842 | ||
843 | /* Note : this field could be in headers_start/headers_end section | |
844 | * It is not yet because we do not want to have a 16 bit hole | |
845 | */ | |
846 | new->queue_mapping = old->queue_mapping; | |
847 | ||
848 | memcpy(&new->headers_start, &old->headers_start, | |
849 | offsetof(struct sk_buff, headers_end) - | |
850 | offsetof(struct sk_buff, headers_start)); | |
851 | CHECK_SKB_FIELD(protocol); | |
852 | CHECK_SKB_FIELD(csum); | |
853 | CHECK_SKB_FIELD(hash); | |
854 | CHECK_SKB_FIELD(priority); | |
855 | CHECK_SKB_FIELD(skb_iif); | |
856 | CHECK_SKB_FIELD(vlan_proto); | |
857 | CHECK_SKB_FIELD(vlan_tci); | |
858 | CHECK_SKB_FIELD(transport_header); | |
859 | CHECK_SKB_FIELD(network_header); | |
860 | CHECK_SKB_FIELD(mac_header); | |
861 | CHECK_SKB_FIELD(inner_protocol); | |
862 | CHECK_SKB_FIELD(inner_transport_header); | |
863 | CHECK_SKB_FIELD(inner_network_header); | |
864 | CHECK_SKB_FIELD(inner_mac_header); | |
865 | CHECK_SKB_FIELD(mark); | |
866 | #ifdef CONFIG_NETWORK_SECMARK | |
867 | CHECK_SKB_FIELD(secmark); | |
868 | #endif | |
869 | #ifdef CONFIG_NET_RX_BUSY_POLL | |
870 | CHECK_SKB_FIELD(napi_id); | |
871 | #endif | |
872 | #ifdef CONFIG_XPS | |
873 | CHECK_SKB_FIELD(sender_cpu); | |
874 | #endif | |
875 | #ifdef CONFIG_NET_SCHED | |
876 | CHECK_SKB_FIELD(tc_index); | |
877 | #ifdef CONFIG_NET_CLS_ACT | |
878 | CHECK_SKB_FIELD(tc_verd); | |
879 | #endif | |
880 | #endif | |
881 | ||
882 | } | |
883 | ||
884 | /* | |
885 | * You should not add any new code to this function. Add it to | |
886 | * __copy_skb_header above instead. | |
887 | */ | |
888 | static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) | |
889 | { | |
890 | #define C(x) n->x = skb->x | |
891 | ||
892 | n->next = n->prev = NULL; | |
893 | n->sk = NULL; | |
894 | __copy_skb_header(n, skb); | |
895 | ||
896 | C(len); | |
897 | C(data_len); | |
898 | C(mac_len); | |
899 | n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; | |
900 | n->cloned = 1; | |
901 | n->nohdr = 0; | |
902 | n->destructor = NULL; | |
903 | C(tail); | |
904 | C(end); | |
905 | C(head); | |
906 | C(head_frag); | |
907 | C(data); | |
908 | C(truesize); | |
909 | atomic_set(&n->users, 1); | |
910 | ||
911 | atomic_inc(&(skb_shinfo(skb)->dataref)); | |
912 | skb->cloned = 1; | |
913 | ||
914 | return n; | |
915 | #undef C | |
916 | } | |
917 | ||
918 | /** | |
919 | * skb_morph - morph one skb into another | |
920 | * @dst: the skb to receive the contents | |
921 | * @src: the skb to supply the contents | |
922 | * | |
923 | * This is identical to skb_clone except that the target skb is | |
924 | * supplied by the user. | |
925 | * | |
926 | * The target skb is returned upon exit. | |
927 | */ | |
928 | struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) | |
929 | { | |
930 | skb_release_all(dst); | |
931 | return __skb_clone(dst, src); | |
932 | } | |
933 | EXPORT_SYMBOL_GPL(skb_morph); | |
934 | ||
935 | /** | |
936 | * skb_copy_ubufs - copy userspace skb frags buffers to kernel | |
937 | * @skb: the skb to modify | |
938 | * @gfp_mask: allocation priority | |
939 | * | |
940 | * This must be called on SKBTX_DEV_ZEROCOPY skb. | |
941 | * It will copy all frags into kernel and drop the reference | |
942 | * to userspace pages. | |
943 | * | |
944 | * If this function is called from an interrupt gfp_mask() must be | |
945 | * %GFP_ATOMIC. | |
946 | * | |
947 | * Returns 0 on success or a negative error code on failure | |
948 | * to allocate kernel memory to copy to. | |
949 | */ | |
950 | int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) | |
951 | { | |
952 | int i; | |
953 | int num_frags = skb_shinfo(skb)->nr_frags; | |
954 | struct page *page, *head = NULL; | |
955 | struct ubuf_info *uarg = skb_shinfo(skb)->destructor_arg; | |
956 | ||
957 | for (i = 0; i < num_frags; i++) { | |
958 | u8 *vaddr; | |
959 | skb_frag_t *f = &skb_shinfo(skb)->frags[i]; | |
960 | ||
961 | page = alloc_page(gfp_mask); | |
962 | if (!page) { | |
963 | while (head) { | |
964 | struct page *next = (struct page *)page_private(head); | |
965 | put_page(head); | |
966 | head = next; | |
967 | } | |
968 | return -ENOMEM; | |
969 | } | |
970 | vaddr = kmap_atomic(skb_frag_page(f)); | |
971 | memcpy(page_address(page), | |
972 | vaddr + f->page_offset, skb_frag_size(f)); | |
973 | kunmap_atomic(vaddr); | |
974 | set_page_private(page, (unsigned long)head); | |
975 | head = page; | |
976 | } | |
977 | ||
978 | /* skb frags release userspace buffers */ | |
979 | for (i = 0; i < num_frags; i++) | |
980 | skb_frag_unref(skb, i); | |
981 | ||
982 | uarg->callback(uarg, false); | |
983 | ||
984 | /* skb frags point to kernel buffers */ | |
985 | for (i = num_frags - 1; i >= 0; i--) { | |
986 | __skb_fill_page_desc(skb, i, head, 0, | |
987 | skb_shinfo(skb)->frags[i].size); | |
988 | head = (struct page *)page_private(head); | |
989 | } | |
990 | ||
991 | skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; | |
992 | return 0; | |
993 | } | |
994 | EXPORT_SYMBOL_GPL(skb_copy_ubufs); | |
995 | ||
996 | /** | |
997 | * skb_clone - duplicate an sk_buff | |
998 | * @skb: buffer to clone | |
999 | * @gfp_mask: allocation priority | |
1000 | * | |
1001 | * Duplicate an &sk_buff. The new one is not owned by a socket. Both | |
1002 | * copies share the same packet data but not structure. The new | |
1003 | * buffer has a reference count of 1. If the allocation fails the | |
1004 | * function returns %NULL otherwise the new buffer is returned. | |
1005 | * | |
1006 | * If this function is called from an interrupt gfp_mask() must be | |
1007 | * %GFP_ATOMIC. | |
1008 | */ | |
1009 | ||
1010 | struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) | |
1011 | { | |
1012 | struct sk_buff_fclones *fclones = container_of(skb, | |
1013 | struct sk_buff_fclones, | |
1014 | skb1); | |
1015 | struct sk_buff *n; | |
1016 | ||
1017 | if (skb_orphan_frags(skb, gfp_mask)) | |
1018 | return NULL; | |
1019 | ||
1020 | if (skb->fclone == SKB_FCLONE_ORIG && | |
1021 | atomic_read(&fclones->fclone_ref) == 1) { | |
1022 | n = &fclones->skb2; | |
1023 | atomic_set(&fclones->fclone_ref, 2); | |
1024 | } else { | |
1025 | if (skb_pfmemalloc(skb)) | |
1026 | gfp_mask |= __GFP_MEMALLOC; | |
1027 | ||
1028 | n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); | |
1029 | if (!n) | |
1030 | return NULL; | |
1031 | ||
1032 | kmemcheck_annotate_bitfield(n, flags1); | |
1033 | n->fclone = SKB_FCLONE_UNAVAILABLE; | |
1034 | } | |
1035 | ||
1036 | return __skb_clone(n, skb); | |
1037 | } | |
1038 | EXPORT_SYMBOL(skb_clone); | |
1039 | ||
1040 | static void skb_headers_offset_update(struct sk_buff *skb, int off) | |
1041 | { | |
1042 | /* Only adjust this if it actually is csum_start rather than csum */ | |
1043 | if (skb->ip_summed == CHECKSUM_PARTIAL) | |
1044 | skb->csum_start += off; | |
1045 | /* {transport,network,mac}_header and tail are relative to skb->head */ | |
1046 | skb->transport_header += off; | |
1047 | skb->network_header += off; | |
1048 | if (skb_mac_header_was_set(skb)) | |
1049 | skb->mac_header += off; | |
1050 | skb->inner_transport_header += off; | |
1051 | skb->inner_network_header += off; | |
1052 | skb->inner_mac_header += off; | |
1053 | } | |
1054 | ||
1055 | static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |
1056 | { | |
1057 | __copy_skb_header(new, old); | |
1058 | ||
1059 | skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; | |
1060 | skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; | |
1061 | skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; | |
1062 | } | |
1063 | ||
1064 | static inline int skb_alloc_rx_flag(const struct sk_buff *skb) | |
1065 | { | |
1066 | if (skb_pfmemalloc(skb)) | |
1067 | return SKB_ALLOC_RX; | |
1068 | return 0; | |
1069 | } | |
1070 | ||
1071 | /** | |
1072 | * skb_copy - create private copy of an sk_buff | |
1073 | * @skb: buffer to copy | |
1074 | * @gfp_mask: allocation priority | |
1075 | * | |
1076 | * Make a copy of both an &sk_buff and its data. This is used when the | |
1077 | * caller wishes to modify the data and needs a private copy of the | |
1078 | * data to alter. Returns %NULL on failure or the pointer to the buffer | |
1079 | * on success. The returned buffer has a reference count of 1. | |
1080 | * | |
1081 | * As by-product this function converts non-linear &sk_buff to linear | |
1082 | * one, so that &sk_buff becomes completely private and caller is allowed | |
1083 | * to modify all the data of returned buffer. This means that this | |
1084 | * function is not recommended for use in circumstances when only | |
1085 | * header is going to be modified. Use pskb_copy() instead. | |
1086 | */ | |
1087 | ||
1088 | struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) | |
1089 | { | |
1090 | int headerlen = skb_headroom(skb); | |
1091 | unsigned int size = skb_end_offset(skb) + skb->data_len; | |
1092 | struct sk_buff *n = __alloc_skb(size, gfp_mask, | |
1093 | skb_alloc_rx_flag(skb), NUMA_NO_NODE); | |
1094 | ||
1095 | if (!n) | |
1096 | return NULL; | |
1097 | ||
1098 | /* Set the data pointer */ | |
1099 | skb_reserve(n, headerlen); | |
1100 | /* Set the tail pointer and length */ | |
1101 | skb_put(n, skb->len); | |
1102 | ||
1103 | if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) | |
1104 | BUG(); | |
1105 | ||
1106 | copy_skb_header(n, skb); | |
1107 | return n; | |
1108 | } | |
1109 | EXPORT_SYMBOL(skb_copy); | |
1110 | ||
1111 | /** | |
1112 | * __pskb_copy_fclone - create copy of an sk_buff with private head. | |
1113 | * @skb: buffer to copy | |
1114 | * @headroom: headroom of new skb | |
1115 | * @gfp_mask: allocation priority | |
1116 | * @fclone: if true allocate the copy of the skb from the fclone | |
1117 | * cache instead of the head cache; it is recommended to set this | |
1118 | * to true for the cases where the copy will likely be cloned | |
1119 | * | |
1120 | * Make a copy of both an &sk_buff and part of its data, located | |
1121 | * in header. Fragmented data remain shared. This is used when | |
1122 | * the caller wishes to modify only header of &sk_buff and needs | |
1123 | * private copy of the header to alter. Returns %NULL on failure | |
1124 | * or the pointer to the buffer on success. | |
1125 | * The returned buffer has a reference count of 1. | |
1126 | */ | |
1127 | ||
1128 | struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom, | |
1129 | gfp_t gfp_mask, bool fclone) | |
1130 | { | |
1131 | unsigned int size = skb_headlen(skb) + headroom; | |
1132 | int flags = skb_alloc_rx_flag(skb) | (fclone ? SKB_ALLOC_FCLONE : 0); | |
1133 | struct sk_buff *n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE); | |
1134 | ||
1135 | if (!n) | |
1136 | goto out; | |
1137 | ||
1138 | /* Set the data pointer */ | |
1139 | skb_reserve(n, headroom); | |
1140 | /* Set the tail pointer and length */ | |
1141 | skb_put(n, skb_headlen(skb)); | |
1142 | /* Copy the bytes */ | |
1143 | skb_copy_from_linear_data(skb, n->data, n->len); | |
1144 | ||
1145 | n->truesize += skb->data_len; | |
1146 | n->data_len = skb->data_len; | |
1147 | n->len = skb->len; | |
1148 | ||
1149 | if (skb_shinfo(skb)->nr_frags) { | |
1150 | int i; | |
1151 | ||
1152 | if (skb_orphan_frags(skb, gfp_mask)) { | |
1153 | kfree_skb(n); | |
1154 | n = NULL; | |
1155 | goto out; | |
1156 | } | |
1157 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | |
1158 | skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; | |
1159 | skb_frag_ref(skb, i); | |
1160 | } | |
1161 | skb_shinfo(n)->nr_frags = i; | |
1162 | } | |
1163 | ||
1164 | if (skb_has_frag_list(skb)) { | |
1165 | skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; | |
1166 | skb_clone_fraglist(n); | |
1167 | } | |
1168 | ||
1169 | copy_skb_header(n, skb); | |
1170 | out: | |
1171 | return n; | |
1172 | } | |
1173 | EXPORT_SYMBOL(__pskb_copy_fclone); | |
1174 | ||
1175 | /** | |
1176 | * pskb_expand_head - reallocate header of &sk_buff | |
1177 | * @skb: buffer to reallocate | |
1178 | * @nhead: room to add at head | |
1179 | * @ntail: room to add at tail | |
1180 | * @gfp_mask: allocation priority | |
1181 | * | |
1182 | * Expands (or creates identical copy, if @nhead and @ntail are zero) | |
1183 | * header of @skb. &sk_buff itself is not changed. &sk_buff MUST have | |
1184 | * reference count of 1. Returns zero in the case of success or error, | |
1185 | * if expansion failed. In the last case, &sk_buff is not changed. | |
1186 | * | |
1187 | * All the pointers pointing into skb header may change and must be | |
1188 | * reloaded after call to this function. | |
1189 | */ | |
1190 | ||
1191 | int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, | |
1192 | gfp_t gfp_mask) | |
1193 | { | |
1194 | int i; | |
1195 | u8 *data; | |
1196 | int size = nhead + skb_end_offset(skb) + ntail; | |
1197 | long off; | |
1198 | ||
1199 | BUG_ON(nhead < 0); | |
1200 | ||
1201 | if (skb_shared(skb)) | |
1202 | BUG(); | |
1203 | ||
1204 | size = SKB_DATA_ALIGN(size); | |
1205 | ||
1206 | if (skb_pfmemalloc(skb)) | |
1207 | gfp_mask |= __GFP_MEMALLOC; | |
1208 | data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), | |
1209 | gfp_mask, NUMA_NO_NODE, NULL); | |
1210 | if (!data) | |
1211 | goto nodata; | |
1212 | size = SKB_WITH_OVERHEAD(ksize(data)); | |
1213 | ||
1214 | /* Copy only real data... and, alas, header. This should be | |
1215 | * optimized for the cases when header is void. | |
1216 | */ | |
1217 | memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head); | |
1218 | ||
1219 | memcpy((struct skb_shared_info *)(data + size), | |
1220 | skb_shinfo(skb), | |
1221 | offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); | |
1222 | ||
1223 | /* | |
1224 | * if shinfo is shared we must drop the old head gracefully, but if it | |
1225 | * is not we can just drop the old head and let the existing refcount | |
1226 | * be since all we did is relocate the values | |
1227 | */ | |
1228 | if (skb_cloned(skb)) { | |
1229 | /* copy this zero copy skb frags */ | |
1230 | if (skb_orphan_frags(skb, gfp_mask)) | |
1231 | goto nofrags; | |
1232 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) | |
1233 | skb_frag_ref(skb, i); | |
1234 | ||
1235 | if (skb_has_frag_list(skb)) | |
1236 | skb_clone_fraglist(skb); | |
1237 | ||
1238 | skb_release_data(skb); | |
1239 | } else { | |
1240 | skb_free_head(skb); | |
1241 | } | |
1242 | off = (data + nhead) - skb->head; | |
1243 | ||
1244 | skb->head = data; | |
1245 | skb->head_frag = 0; | |
1246 | skb->data += off; | |
1247 | #ifdef NET_SKBUFF_DATA_USES_OFFSET | |
1248 | skb->end = size; | |
1249 | off = nhead; | |
1250 | #else | |
1251 | skb->end = skb->head + size; | |
1252 | #endif | |
1253 | skb->tail += off; | |
1254 | skb_headers_offset_update(skb, nhead); | |
1255 | skb->cloned = 0; | |
1256 | skb->hdr_len = 0; | |
1257 | skb->nohdr = 0; | |
1258 | atomic_set(&skb_shinfo(skb)->dataref, 1); | |
1259 | return 0; | |
1260 | ||
1261 | nofrags: | |
1262 | kfree(data); | |
1263 | nodata: | |
1264 | return -ENOMEM; | |
1265 | } | |
1266 | EXPORT_SYMBOL(pskb_expand_head); | |
1267 | ||
1268 | /* Make private copy of skb with writable head and some headroom */ | |
1269 | ||
1270 | struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) | |
1271 | { | |
1272 | struct sk_buff *skb2; | |
1273 | int delta = headroom - skb_headroom(skb); | |
1274 | ||
1275 | if (delta <= 0) | |
1276 | skb2 = pskb_copy(skb, GFP_ATOMIC); | |
1277 | else { | |
1278 | skb2 = skb_clone(skb, GFP_ATOMIC); | |
1279 | if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, | |
1280 | GFP_ATOMIC)) { | |
1281 | kfree_skb(skb2); | |
1282 | skb2 = NULL; | |
1283 | } | |
1284 | } | |
1285 | return skb2; | |
1286 | } | |
1287 | EXPORT_SYMBOL(skb_realloc_headroom); | |
1288 | ||
1289 | /** | |
1290 | * skb_copy_expand - copy and expand sk_buff | |
1291 | * @skb: buffer to copy | |
1292 | * @newheadroom: new free bytes at head | |
1293 | * @newtailroom: new free bytes at tail | |
1294 | * @gfp_mask: allocation priority | |
1295 | * | |
1296 | * Make a copy of both an &sk_buff and its data and while doing so | |
1297 | * allocate additional space. | |
1298 | * | |
1299 | * This is used when the caller wishes to modify the data and needs a | |
1300 | * private copy of the data to alter as well as more space for new fields. | |
1301 | * Returns %NULL on failure or the pointer to the buffer | |
1302 | * on success. The returned buffer has a reference count of 1. | |
1303 | * | |
1304 | * You must pass %GFP_ATOMIC as the allocation priority if this function | |
1305 | * is called from an interrupt. | |
1306 | */ | |
1307 | struct sk_buff *skb_copy_expand(const struct sk_buff *skb, | |
1308 | int newheadroom, int newtailroom, | |
1309 | gfp_t gfp_mask) | |
1310 | { | |
1311 | /* | |
1312 | * Allocate the copy buffer | |
1313 | */ | |
1314 | struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom, | |
1315 | gfp_mask, skb_alloc_rx_flag(skb), | |
1316 | NUMA_NO_NODE); | |
1317 | int oldheadroom = skb_headroom(skb); | |
1318 | int head_copy_len, head_copy_off; | |
1319 | ||
1320 | if (!n) | |
1321 | return NULL; | |
1322 | ||
1323 | skb_reserve(n, newheadroom); | |
1324 | ||
1325 | /* Set the tail pointer and length */ | |
1326 | skb_put(n, skb->len); | |
1327 | ||
1328 | head_copy_len = oldheadroom; | |
1329 | head_copy_off = 0; | |
1330 | if (newheadroom <= head_copy_len) | |
1331 | head_copy_len = newheadroom; | |
1332 | else | |
1333 | head_copy_off = newheadroom - head_copy_len; | |
1334 | ||
1335 | /* Copy the linear header and data. */ | |
1336 | if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, | |
1337 | skb->len + head_copy_len)) | |
1338 | BUG(); | |
1339 | ||
1340 | copy_skb_header(n, skb); | |
1341 | ||
1342 | skb_headers_offset_update(n, newheadroom - oldheadroom); | |
1343 | ||
1344 | return n; | |
1345 | } | |
1346 | EXPORT_SYMBOL(skb_copy_expand); | |
1347 | ||
1348 | /** | |
1349 | * skb_pad - zero pad the tail of an skb | |
1350 | * @skb: buffer to pad | |
1351 | * @pad: space to pad | |
1352 | * | |
1353 | * Ensure that a buffer is followed by a padding area that is zero | |
1354 | * filled. Used by network drivers which may DMA or transfer data | |
1355 | * beyond the buffer end onto the wire. | |
1356 | * | |
1357 | * May return error in out of memory cases. The skb is freed on error. | |
1358 | */ | |
1359 | ||
1360 | int skb_pad(struct sk_buff *skb, int pad) | |
1361 | { | |
1362 | int err; | |
1363 | int ntail; | |
1364 | ||
1365 | /* If the skbuff is non linear tailroom is always zero.. */ | |
1366 | if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) { | |
1367 | memset(skb->data+skb->len, 0, pad); | |
1368 | return 0; | |
1369 | } | |
1370 | ||
1371 | ntail = skb->data_len + pad - (skb->end - skb->tail); | |
1372 | if (likely(skb_cloned(skb) || ntail > 0)) { | |
1373 | err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC); | |
1374 | if (unlikely(err)) | |
1375 | goto free_skb; | |
1376 | } | |
1377 | ||
1378 | /* FIXME: The use of this function with non-linear skb's really needs | |
1379 | * to be audited. | |
1380 | */ | |
1381 | err = skb_linearize(skb); | |
1382 | if (unlikely(err)) | |
1383 | goto free_skb; | |
1384 | ||
1385 | memset(skb->data + skb->len, 0, pad); | |
1386 | return 0; | |
1387 | ||
1388 | free_skb: | |
1389 | kfree_skb(skb); | |
1390 | return err; | |
1391 | } | |
1392 | EXPORT_SYMBOL(skb_pad); | |
1393 | ||
1394 | /** | |
1395 | * pskb_put - add data to the tail of a potentially fragmented buffer | |
1396 | * @skb: start of the buffer to use | |
1397 | * @tail: tail fragment of the buffer to use | |
1398 | * @len: amount of data to add | |
1399 | * | |
1400 | * This function extends the used data area of the potentially | |
1401 | * fragmented buffer. @tail must be the last fragment of @skb -- or | |
1402 | * @skb itself. If this would exceed the total buffer size the kernel | |
1403 | * will panic. A pointer to the first byte of the extra data is | |
1404 | * returned. | |
1405 | */ | |
1406 | ||
1407 | unsigned char *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) | |
1408 | { | |
1409 | if (tail != skb) { | |
1410 | skb->data_len += len; | |
1411 | skb->len += len; | |
1412 | } | |
1413 | return skb_put(tail, len); | |
1414 | } | |
1415 | EXPORT_SYMBOL_GPL(pskb_put); | |
1416 | ||
1417 | /** | |
1418 | * skb_put - add data to a buffer | |
1419 | * @skb: buffer to use | |
1420 | * @len: amount of data to add | |
1421 | * | |
1422 | * This function extends the used data area of the buffer. If this would | |
1423 | * exceed the total buffer size the kernel will panic. A pointer to the | |
1424 | * first byte of the extra data is returned. | |
1425 | */ | |
1426 | unsigned char *skb_put(struct sk_buff *skb, unsigned int len) | |
1427 | { | |
1428 | unsigned char *tmp = skb_tail_pointer(skb); | |
1429 | SKB_LINEAR_ASSERT(skb); | |
1430 | skb->tail += len; | |
1431 | skb->len += len; | |
1432 | if (unlikely(skb->tail > skb->end)) | |
1433 | skb_over_panic(skb, len, __builtin_return_address(0)); | |
1434 | return tmp; | |
1435 | } | |
1436 | EXPORT_SYMBOL(skb_put); | |
1437 | ||
1438 | /** | |
1439 | * skb_push - add data to the start of a buffer | |
1440 | * @skb: buffer to use | |
1441 | * @len: amount of data to add | |
1442 | * | |
1443 | * This function extends the used data area of the buffer at the buffer | |
1444 | * start. If this would exceed the total buffer headroom the kernel will | |
1445 | * panic. A pointer to the first byte of the extra data is returned. | |
1446 | */ | |
1447 | unsigned char *skb_push(struct sk_buff *skb, unsigned int len) | |
1448 | { | |
1449 | skb->data -= len; | |
1450 | skb->len += len; | |
1451 | if (unlikely(skb->data<skb->head)) | |
1452 | skb_under_panic(skb, len, __builtin_return_address(0)); | |
1453 | return skb->data; | |
1454 | } | |
1455 | EXPORT_SYMBOL(skb_push); | |
1456 | ||
1457 | /** | |
1458 | * skb_pull - remove data from the start of a buffer | |
1459 | * @skb: buffer to use | |
1460 | * @len: amount of data to remove | |
1461 | * | |
1462 | * This function removes data from the start of a buffer, returning | |
1463 | * the memory to the headroom. A pointer to the next data in the buffer | |
1464 | * is returned. Once the data has been pulled future pushes will overwrite | |
1465 | * the old data. | |
1466 | */ | |
1467 | unsigned char *skb_pull(struct sk_buff *skb, unsigned int len) | |
1468 | { | |
1469 | return skb_pull_inline(skb, len); | |
1470 | } | |
1471 | EXPORT_SYMBOL(skb_pull); | |
1472 | ||
1473 | /** | |
1474 | * skb_trim - remove end from a buffer | |
1475 | * @skb: buffer to alter | |
1476 | * @len: new length | |
1477 | * | |
1478 | * Cut the length of a buffer down by removing data from the tail. If | |
1479 | * the buffer is already under the length specified it is not modified. | |
1480 | * The skb must be linear. | |
1481 | */ | |
1482 | void skb_trim(struct sk_buff *skb, unsigned int len) | |
1483 | { | |
1484 | if (skb->len > len) | |
1485 | __skb_trim(skb, len); | |
1486 | } | |
1487 | EXPORT_SYMBOL(skb_trim); | |
1488 | ||
1489 | /* Trims skb to length len. It can change skb pointers. | |
1490 | */ | |
1491 | ||
1492 | int ___pskb_trim(struct sk_buff *skb, unsigned int len) | |
1493 | { | |
1494 | struct sk_buff **fragp; | |
1495 | struct sk_buff *frag; | |
1496 | int offset = skb_headlen(skb); | |
1497 | int nfrags = skb_shinfo(skb)->nr_frags; | |
1498 | int i; | |
1499 | int err; | |
1500 | ||
1501 | if (skb_cloned(skb) && | |
1502 | unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))) | |
1503 | return err; | |
1504 | ||
1505 | i = 0; | |
1506 | if (offset >= len) | |
1507 | goto drop_pages; | |
1508 | ||
1509 | for (; i < nfrags; i++) { | |
1510 | int end = offset + skb_frag_size(&skb_shinfo(skb)->frags[i]); | |
1511 | ||
1512 | if (end < len) { | |
1513 | offset = end; | |
1514 | continue; | |
1515 | } | |
1516 | ||
1517 | skb_frag_size_set(&skb_shinfo(skb)->frags[i++], len - offset); | |
1518 | ||
1519 | drop_pages: | |
1520 | skb_shinfo(skb)->nr_frags = i; | |
1521 | ||
1522 | for (; i < nfrags; i++) | |
1523 | skb_frag_unref(skb, i); | |
1524 | ||
1525 | if (skb_has_frag_list(skb)) | |
1526 | skb_drop_fraglist(skb); | |
1527 | goto done; | |
1528 | } | |
1529 | ||
1530 | for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp); | |
1531 | fragp = &frag->next) { | |
1532 | int end = offset + frag->len; | |
1533 | ||
1534 | if (skb_shared(frag)) { | |
1535 | struct sk_buff *nfrag; | |
1536 | ||
1537 | nfrag = skb_clone(frag, GFP_ATOMIC); | |
1538 | if (unlikely(!nfrag)) | |
1539 | return -ENOMEM; | |
1540 | ||
1541 | nfrag->next = frag->next; | |
1542 | consume_skb(frag); | |
1543 | frag = nfrag; | |
1544 | *fragp = frag; | |
1545 | } | |
1546 | ||
1547 | if (end < len) { | |
1548 | offset = end; | |
1549 | continue; | |
1550 | } | |
1551 | ||
1552 | if (end > len && | |
1553 | unlikely((err = pskb_trim(frag, len - offset)))) | |
1554 | return err; | |
1555 | ||
1556 | if (frag->next) | |
1557 | skb_drop_list(&frag->next); | |
1558 | break; | |
1559 | } | |
1560 | ||
1561 | done: | |
1562 | if (len > skb_headlen(skb)) { | |
1563 | skb->data_len -= skb->len - len; | |
1564 | skb->len = len; | |
1565 | } else { | |
1566 | skb->len = len; | |
1567 | skb->data_len = 0; | |
1568 | skb_set_tail_pointer(skb, len); | |
1569 | } | |
1570 | ||
1571 | return 0; | |
1572 | } | |
1573 | EXPORT_SYMBOL(___pskb_trim); | |
1574 | ||
1575 | /** | |
1576 | * __pskb_pull_tail - advance tail of skb header | |
1577 | * @skb: buffer to reallocate | |
1578 | * @delta: number of bytes to advance tail | |
1579 | * | |
1580 | * The function makes a sense only on a fragmented &sk_buff, | |
1581 | * it expands header moving its tail forward and copying necessary | |
1582 | * data from fragmented part. | |
1583 | * | |
1584 | * &sk_buff MUST have reference count of 1. | |
1585 | * | |
1586 | * Returns %NULL (and &sk_buff does not change) if pull failed | |
1587 | * or value of new tail of skb in the case of success. | |
1588 | * | |
1589 | * All the pointers pointing into skb header may change and must be | |
1590 | * reloaded after call to this function. | |
1591 | */ | |
1592 | ||
1593 | /* Moves tail of skb head forward, copying data from fragmented part, | |
1594 | * when it is necessary. | |
1595 | * 1. It may fail due to malloc failure. | |
1596 | * 2. It may change skb pointers. | |
1597 | * | |
1598 | * It is pretty complicated. Luckily, it is called only in exceptional cases. | |
1599 | */ | |
1600 | unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) | |
1601 | { | |
1602 | /* If skb has not enough free space at tail, get new one | |
1603 | * plus 128 bytes for future expansions. If we have enough | |
1604 | * room at tail, reallocate without expansion only if skb is cloned. | |
1605 | */ | |
1606 | int i, k, eat = (skb->tail + delta) - skb->end; | |
1607 | ||
1608 | if (eat > 0 || skb_cloned(skb)) { | |
1609 | if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0, | |
1610 | GFP_ATOMIC)) | |
1611 | return NULL; | |
1612 | } | |
1613 | ||
1614 | if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta)) | |
1615 | BUG(); | |
1616 | ||
1617 | /* Optimization: no fragments, no reasons to preestimate | |
1618 | * size of pulled pages. Superb. | |
1619 | */ | |
1620 | if (!skb_has_frag_list(skb)) | |
1621 | goto pull_pages; | |
1622 | ||
1623 | /* Estimate size of pulled pages. */ | |
1624 | eat = delta; | |
1625 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | |
1626 | int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); | |
1627 | ||
1628 | if (size >= eat) | |
1629 | goto pull_pages; | |
1630 | eat -= size; | |
1631 | } | |
1632 | ||
1633 | /* If we need update frag list, we are in troubles. | |
1634 | * Certainly, it possible to add an offset to skb data, | |
1635 | * but taking into account that pulling is expected to | |
1636 | * be very rare operation, it is worth to fight against | |
1637 | * further bloating skb head and crucify ourselves here instead. | |
1638 | * Pure masohism, indeed. 8)8) | |
1639 | */ | |
1640 | if (eat) { | |
1641 | struct sk_buff *list = skb_shinfo(skb)->frag_list; | |
1642 | struct sk_buff *clone = NULL; | |
1643 | struct sk_buff *insp = NULL; | |
1644 | ||
1645 | do { | |
1646 | BUG_ON(!list); | |
1647 | ||
1648 | if (list->len <= eat) { | |
1649 | /* Eaten as whole. */ | |
1650 | eat -= list->len; | |
1651 | list = list->next; | |
1652 | insp = list; | |
1653 | } else { | |
1654 | /* Eaten partially. */ | |
1655 | ||
1656 | if (skb_shared(list)) { | |
1657 | /* Sucks! We need to fork list. :-( */ | |
1658 | clone = skb_clone(list, GFP_ATOMIC); | |
1659 | if (!clone) | |
1660 | return NULL; | |
1661 | insp = list->next; | |
1662 | list = clone; | |
1663 | } else { | |
1664 | /* This may be pulled without | |
1665 | * problems. */ | |
1666 | insp = list; | |
1667 | } | |
1668 | if (!pskb_pull(list, eat)) { | |
1669 | kfree_skb(clone); | |
1670 | return NULL; | |
1671 | } | |
1672 | break; | |
1673 | } | |
1674 | } while (eat); | |
1675 | ||
1676 | /* Free pulled out fragments. */ | |
1677 | while ((list = skb_shinfo(skb)->frag_list) != insp) { | |
1678 | skb_shinfo(skb)->frag_list = list->next; | |
1679 | kfree_skb(list); | |
1680 | } | |
1681 | /* And insert new clone at head. */ | |
1682 | if (clone) { | |
1683 | clone->next = list; | |
1684 | skb_shinfo(skb)->frag_list = clone; | |
1685 | } | |
1686 | } | |
1687 | /* Success! Now we may commit changes to skb data. */ | |
1688 | ||
1689 | pull_pages: | |
1690 | eat = delta; | |
1691 | k = 0; | |
1692 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | |
1693 | int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); | |
1694 | ||
1695 | if (size <= eat) { | |
1696 | skb_frag_unref(skb, i); | |
1697 | eat -= size; | |
1698 | } else { | |
1699 | skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; | |
1700 | if (eat) { | |
1701 | skb_shinfo(skb)->frags[k].page_offset += eat; | |
1702 | skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); | |
1703 | eat = 0; | |
1704 | } | |
1705 | k++; | |
1706 | } | |
1707 | } | |
1708 | skb_shinfo(skb)->nr_frags = k; | |
1709 | ||
1710 | skb->tail += delta; | |
1711 | skb->data_len -= delta; | |
1712 | ||
1713 | return skb_tail_pointer(skb); | |
1714 | } | |
1715 | EXPORT_SYMBOL(__pskb_pull_tail); | |
1716 | ||
1717 | /** | |
1718 | * skb_copy_bits - copy bits from skb to kernel buffer | |
1719 | * @skb: source skb | |
1720 | * @offset: offset in source | |
1721 | * @to: destination buffer | |
1722 | * @len: number of bytes to copy | |
1723 | * | |
1724 | * Copy the specified number of bytes from the source skb to the | |
1725 | * destination buffer. | |
1726 | * | |
1727 | * CAUTION ! : | |
1728 | * If its prototype is ever changed, | |
1729 | * check arch/{*}/net/{*}.S files, | |
1730 | * since it is called from BPF assembly code. | |
1731 | */ | |
1732 | int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) | |
1733 | { | |
1734 | int start = skb_headlen(skb); | |
1735 | struct sk_buff *frag_iter; | |
1736 | int i, copy; | |
1737 | ||
1738 | if (offset > (int)skb->len - len) | |
1739 | goto fault; | |
1740 | ||
1741 | /* Copy header. */ | |
1742 | if ((copy = start - offset) > 0) { | |
1743 | if (copy > len) | |
1744 | copy = len; | |
1745 | skb_copy_from_linear_data_offset(skb, offset, to, copy); | |
1746 | if ((len -= copy) == 0) | |
1747 | return 0; | |
1748 | offset += copy; | |
1749 | to += copy; | |
1750 | } | |
1751 | ||
1752 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | |
1753 | int end; | |
1754 | skb_frag_t *f = &skb_shinfo(skb)->frags[i]; | |
1755 | ||
1756 | WARN_ON(start > offset + len); | |
1757 | ||
1758 | end = start + skb_frag_size(f); | |
1759 | if ((copy = end - offset) > 0) { | |
1760 | u8 *vaddr; | |
1761 | ||
1762 | if (copy > len) | |
1763 | copy = len; | |
1764 | ||
1765 | vaddr = kmap_atomic(skb_frag_page(f)); | |
1766 | memcpy(to, | |
1767 | vaddr + f->page_offset + offset - start, | |
1768 | copy); | |
1769 | kunmap_atomic(vaddr); | |
1770 | ||
1771 | if ((len -= copy) == 0) | |
1772 | return 0; | |
1773 | offset += copy; | |
1774 | to += copy; | |
1775 | } | |
1776 | start = end; | |
1777 | } | |
1778 | ||
1779 | skb_walk_frags(skb, frag_iter) { | |
1780 | int end; | |
1781 | ||
1782 | WARN_ON(start > offset + len); | |
1783 | ||
1784 | end = start + frag_iter->len; | |
1785 | if ((copy = end - offset) > 0) { | |
1786 | if (copy > len) | |
1787 | copy = len; | |
1788 | if (skb_copy_bits(frag_iter, offset - start, to, copy)) | |
1789 | goto fault; | |
1790 | if ((len -= copy) == 0) | |
1791 | return 0; | |
1792 | offset += copy; | |
1793 | to += copy; | |
1794 | } | |
1795 | start = end; | |
1796 | } | |
1797 | ||
1798 | if (!len) | |
1799 | return 0; | |
1800 | ||
1801 | fault: | |
1802 | return -EFAULT; | |
1803 | } | |
1804 | EXPORT_SYMBOL(skb_copy_bits); | |
1805 | ||
1806 | /* | |
1807 | * Callback from splice_to_pipe(), if we need to release some pages | |
1808 | * at the end of the spd in case we error'ed out in filling the pipe. | |
1809 | */ | |
1810 | static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) | |
1811 | { | |
1812 | put_page(spd->pages[i]); | |
1813 | } | |
1814 | ||
1815 | static struct page *linear_to_page(struct page *page, unsigned int *len, | |
1816 | unsigned int *offset, | |
1817 | struct sock *sk) | |
1818 | { | |
1819 | struct page_frag *pfrag = sk_page_frag(sk); | |
1820 | ||
1821 | if (!sk_page_frag_refill(sk, pfrag)) | |
1822 | return NULL; | |
1823 | ||
1824 | *len = min_t(unsigned int, *len, pfrag->size - pfrag->offset); | |
1825 | ||
1826 | memcpy(page_address(pfrag->page) + pfrag->offset, | |
1827 | page_address(page) + *offset, *len); | |
1828 | *offset = pfrag->offset; | |
1829 | pfrag->offset += *len; | |
1830 | ||
1831 | return pfrag->page; | |
1832 | } | |
1833 | ||
1834 | static bool spd_can_coalesce(const struct splice_pipe_desc *spd, | |
1835 | struct page *page, | |
1836 | unsigned int offset) | |
1837 | { | |
1838 | return spd->nr_pages && | |
1839 | spd->pages[spd->nr_pages - 1] == page && | |
1840 | (spd->partial[spd->nr_pages - 1].offset + | |
1841 | spd->partial[spd->nr_pages - 1].len == offset); | |
1842 | } | |
1843 | ||
1844 | /* | |
1845 | * Fill page/offset/length into spd, if it can hold more pages. | |
1846 | */ | |
1847 | static bool spd_fill_page(struct splice_pipe_desc *spd, | |
1848 | struct pipe_inode_info *pipe, struct page *page, | |
1849 | unsigned int *len, unsigned int offset, | |
1850 | bool linear, | |
1851 | struct sock *sk) | |
1852 | { | |
1853 | if (unlikely(spd->nr_pages == MAX_SKB_FRAGS)) | |
1854 | return true; | |
1855 | ||
1856 | if (linear) { | |
1857 | page = linear_to_page(page, len, &offset, sk); | |
1858 | if (!page) | |
1859 | return true; | |
1860 | } | |
1861 | if (spd_can_coalesce(spd, page, offset)) { | |
1862 | spd->partial[spd->nr_pages - 1].len += *len; | |
1863 | return false; | |
1864 | } | |
1865 | get_page(page); | |
1866 | spd->pages[spd->nr_pages] = page; | |
1867 | spd->partial[spd->nr_pages].len = *len; | |
1868 | spd->partial[spd->nr_pages].offset = offset; | |
1869 | spd->nr_pages++; | |
1870 | ||
1871 | return false; | |
1872 | } | |
1873 | ||
1874 | static bool __splice_segment(struct page *page, unsigned int poff, | |
1875 | unsigned int plen, unsigned int *off, | |
1876 | unsigned int *len, | |
1877 | struct splice_pipe_desc *spd, bool linear, | |
1878 | struct sock *sk, | |
1879 | struct pipe_inode_info *pipe) | |
1880 | { | |
1881 | if (!*len) | |
1882 | return true; | |
1883 | ||
1884 | /* skip this segment if already processed */ | |
1885 | if (*off >= plen) { | |
1886 | *off -= plen; | |
1887 | return false; | |
1888 | } | |
1889 | ||
1890 | /* ignore any bits we already processed */ | |
1891 | poff += *off; | |
1892 | plen -= *off; | |
1893 | *off = 0; | |
1894 | ||
1895 | do { | |
1896 | unsigned int flen = min(*len, plen); | |
1897 | ||
1898 | if (spd_fill_page(spd, pipe, page, &flen, poff, | |
1899 | linear, sk)) | |
1900 | return true; | |
1901 | poff += flen; | |
1902 | plen -= flen; | |
1903 | *len -= flen; | |
1904 | } while (*len && plen); | |
1905 | ||
1906 | return false; | |
1907 | } | |
1908 | ||
1909 | /* | |
1910 | * Map linear and fragment data from the skb to spd. It reports true if the | |
1911 | * pipe is full or if we already spliced the requested length. | |
1912 | */ | |
1913 | static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, | |
1914 | unsigned int *offset, unsigned int *len, | |
1915 | struct splice_pipe_desc *spd, struct sock *sk) | |
1916 | { | |
1917 | int seg; | |
1918 | ||
1919 | /* map the linear part : | |
1920 | * If skb->head_frag is set, this 'linear' part is backed by a | |
1921 | * fragment, and if the head is not shared with any clones then | |
1922 | * we can avoid a copy since we own the head portion of this page. | |
1923 | */ | |
1924 | if (__splice_segment(virt_to_page(skb->data), | |
1925 | (unsigned long) skb->data & (PAGE_SIZE - 1), | |
1926 | skb_headlen(skb), | |
1927 | offset, len, spd, | |
1928 | skb_head_is_locked(skb), | |
1929 | sk, pipe)) | |
1930 | return true; | |
1931 | ||
1932 | /* | |
1933 | * then map the fragments | |
1934 | */ | |
1935 | for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { | |
1936 | const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; | |
1937 | ||
1938 | if (__splice_segment(skb_frag_page(f), | |
1939 | f->page_offset, skb_frag_size(f), | |
1940 | offset, len, spd, false, sk, pipe)) | |
1941 | return true; | |
1942 | } | |
1943 | ||
1944 | return false; | |
1945 | } | |
1946 | ||
1947 | /* | |
1948 | * Map data from the skb to a pipe. Should handle both the linear part, | |
1949 | * the fragments, and the frag list. It does NOT handle frag lists within | |
1950 | * the frag list, if such a thing exists. We'd probably need to recurse to | |
1951 | * handle that cleanly. | |
1952 | */ | |
1953 | int skb_splice_bits(struct sk_buff *skb, unsigned int offset, | |
1954 | struct pipe_inode_info *pipe, unsigned int tlen, | |
1955 | unsigned int flags) | |
1956 | { | |
1957 | struct partial_page partial[MAX_SKB_FRAGS]; | |
1958 | struct page *pages[MAX_SKB_FRAGS]; | |
1959 | struct splice_pipe_desc spd = { | |
1960 | .pages = pages, | |
1961 | .partial = partial, | |
1962 | .nr_pages_max = MAX_SKB_FRAGS, | |
1963 | .flags = flags, | |
1964 | .ops = &nosteal_pipe_buf_ops, | |
1965 | .spd_release = sock_spd_release, | |
1966 | }; | |
1967 | struct sk_buff *frag_iter; | |
1968 | struct sock *sk = skb->sk; | |
1969 | int ret = 0; | |
1970 | ||
1971 | /* | |
1972 | * __skb_splice_bits() only fails if the output has no room left, | |
1973 | * so no point in going over the frag_list for the error case. | |
1974 | */ | |
1975 | if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk)) | |
1976 | goto done; | |
1977 | else if (!tlen) | |
1978 | goto done; | |
1979 | ||
1980 | /* | |
1981 | * now see if we have a frag_list to map | |
1982 | */ | |
1983 | skb_walk_frags(skb, frag_iter) { | |
1984 | if (!tlen) | |
1985 | break; | |
1986 | if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk)) | |
1987 | break; | |
1988 | } | |
1989 | ||
1990 | done: | |
1991 | if (spd.nr_pages) { | |
1992 | /* | |
1993 | * Drop the socket lock, otherwise we have reverse | |
1994 | * locking dependencies between sk_lock and i_mutex | |
1995 | * here as compared to sendfile(). We enter here | |
1996 | * with the socket lock held, and splice_to_pipe() will | |
1997 | * grab the pipe inode lock. For sendfile() emulation, | |
1998 | * we call into ->sendpage() with the i_mutex lock held | |
1999 | * and networking will grab the socket lock. | |
2000 | */ | |
2001 | release_sock(sk); | |
2002 | ret = splice_to_pipe(pipe, &spd); | |
2003 | lock_sock(sk); | |
2004 | } | |
2005 | ||
2006 | return ret; | |
2007 | } | |
2008 | ||
2009 | /** | |
2010 | * skb_store_bits - store bits from kernel buffer to skb | |
2011 | * @skb: destination buffer | |
2012 | * @offset: offset in destination | |
2013 | * @from: source buffer | |
2014 | * @len: number of bytes to copy | |
2015 | * | |
2016 | * Copy the specified number of bytes from the source buffer to the | |
2017 | * destination skb. This function handles all the messy bits of | |
2018 | * traversing fragment lists and such. | |
2019 | */ | |
2020 | ||
2021 | int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) | |
2022 | { | |
2023 | int start = skb_headlen(skb); | |
2024 | struct sk_buff *frag_iter; | |
2025 | int i, copy; | |
2026 | ||
2027 | if (offset > (int)skb->len - len) | |
2028 | goto fault; | |
2029 | ||
2030 | if ((copy = start - offset) > 0) { | |
2031 | if (copy > len) | |
2032 | copy = len; | |
2033 | skb_copy_to_linear_data_offset(skb, offset, from, copy); | |
2034 | if ((len -= copy) == 0) | |
2035 | return 0; | |
2036 | offset += copy; | |
2037 | from += copy; | |
2038 | } | |
2039 | ||
2040 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | |
2041 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; | |
2042 | int end; | |
2043 | ||
2044 | WARN_ON(start > offset + len); | |
2045 | ||
2046 | end = start + skb_frag_size(frag); | |
2047 | if ((copy = end - offset) > 0) { | |
2048 | u8 *vaddr; | |
2049 | ||
2050 | if (copy > len) | |
2051 | copy = len; | |
2052 | ||
2053 | vaddr = kmap_atomic(skb_frag_page(frag)); | |
2054 | memcpy(vaddr + frag->page_offset + offset - start, | |
2055 | from, copy); | |
2056 | kunmap_atomic(vaddr); | |
2057 | ||
2058 | if ((len -= copy) == 0) | |
2059 | return 0; | |
2060 | offset += copy; | |
2061 | from += copy; | |
2062 | } | |
2063 | start = end; | |
2064 | } | |
2065 | ||
2066 | skb_walk_frags(skb, frag_iter) { | |
2067 | int end; | |
2068 | ||
2069 | WARN_ON(start > offset + len); | |
2070 | ||
2071 | end = start + frag_iter->len; | |
2072 | if ((copy = end - offset) > 0) { | |
2073 | if (copy > len) | |
2074 | copy = len; | |
2075 | if (skb_store_bits(frag_iter, offset - start, | |
2076 | from, copy)) | |
2077 | goto fault; | |
2078 | if ((len -= copy) == 0) | |
2079 | return 0; | |
2080 | offset += copy; | |
2081 | from += copy; | |
2082 | } | |
2083 | start = end; | |
2084 | } | |
2085 | if (!len) | |
2086 | return 0; | |
2087 | ||
2088 | fault: | |
2089 | return -EFAULT; | |
2090 | } | |
2091 | EXPORT_SYMBOL(skb_store_bits); | |
2092 | ||
2093 | /* Checksum skb data. */ | |
2094 | __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, | |
2095 | __wsum csum, const struct skb_checksum_ops *ops) | |
2096 | { | |
2097 | int start = skb_headlen(skb); | |
2098 | int i, copy = start - offset; | |
2099 | struct sk_buff *frag_iter; | |
2100 | int pos = 0; | |
2101 | ||
2102 | /* Checksum header. */ | |
2103 | if (copy > 0) { | |
2104 | if (copy > len) | |
2105 | copy = len; | |
2106 | csum = ops->update(skb->data + offset, copy, csum); | |
2107 | if ((len -= copy) == 0) | |
2108 | return csum; | |
2109 | offset += copy; | |
2110 | pos = copy; | |
2111 | } | |
2112 | ||
2113 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | |
2114 | int end; | |
2115 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; | |
2116 | ||
2117 | WARN_ON(start > offset + len); | |
2118 | ||
2119 | end = start + skb_frag_size(frag); | |
2120 | if ((copy = end - offset) > 0) { | |
2121 | __wsum csum2; | |
2122 | u8 *vaddr; | |
2123 | ||
2124 | if (copy > len) | |
2125 | copy = len; | |
2126 | vaddr = kmap_atomic(skb_frag_page(frag)); | |
2127 | csum2 = ops->update(vaddr + frag->page_offset + | |
2128 | offset - start, copy, 0); | |
2129 | kunmap_atomic(vaddr); | |
2130 | csum = ops->combine(csum, csum2, pos, copy); | |
2131 | if (!(len -= copy)) | |
2132 | return csum; | |
2133 | offset += copy; | |
2134 | pos += copy; | |
2135 | } | |
2136 | start = end; | |
2137 | } | |
2138 | ||
2139 | skb_walk_frags(skb, frag_iter) { | |
2140 | int end; | |
2141 | ||
2142 | WARN_ON(start > offset + len); | |
2143 | ||
2144 | end = start + frag_iter->len; | |
2145 | if ((copy = end - offset) > 0) { | |
2146 | __wsum csum2; | |
2147 | if (copy > len) | |
2148 | copy = len; | |
2149 | csum2 = __skb_checksum(frag_iter, offset - start, | |
2150 | copy, 0, ops); | |
2151 | csum = ops->combine(csum, csum2, pos, copy); | |
2152 | if ((len -= copy) == 0) | |
2153 | return csum; | |
2154 | offset += copy; | |
2155 | pos += copy; | |
2156 | } | |
2157 | start = end; | |
2158 | } | |
2159 | BUG_ON(len); | |
2160 | ||
2161 | return csum; | |
2162 | } | |
2163 | EXPORT_SYMBOL(__skb_checksum); | |
2164 | ||
2165 | __wsum skb_checksum(const struct sk_buff *skb, int offset, | |
2166 | int len, __wsum csum) | |
2167 | { | |
2168 | const struct skb_checksum_ops ops = { | |
2169 | .update = csum_partial_ext, | |
2170 | .combine = csum_block_add_ext, | |
2171 | }; | |
2172 | ||
2173 | return __skb_checksum(skb, offset, len, csum, &ops); | |
2174 | } | |
2175 | EXPORT_SYMBOL(skb_checksum); | |
2176 | ||
2177 | /* Both of above in one bottle. */ | |
2178 | ||
2179 | __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, | |
2180 | u8 *to, int len, __wsum csum) | |
2181 | { | |
2182 | int start = skb_headlen(skb); | |
2183 | int i, copy = start - offset; | |
2184 | struct sk_buff *frag_iter; | |
2185 | int pos = 0; | |
2186 | ||
2187 | /* Copy header. */ | |
2188 | if (copy > 0) { | |
2189 | if (copy > len) | |
2190 | copy = len; | |
2191 | csum = csum_partial_copy_nocheck(skb->data + offset, to, | |
2192 | copy, csum); | |
2193 | if ((len -= copy) == 0) | |
2194 | return csum; | |
2195 | offset += copy; | |
2196 | to += copy; | |
2197 | pos = copy; | |
2198 | } | |
2199 | ||
2200 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | |
2201 | int end; | |
2202 | ||
2203 | WARN_ON(start > offset + len); | |
2204 | ||
2205 | end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); | |
2206 | if ((copy = end - offset) > 0) { | |
2207 | __wsum csum2; | |
2208 | u8 *vaddr; | |
2209 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; | |
2210 | ||
2211 | if (copy > len) | |
2212 | copy = len; | |
2213 | vaddr = kmap_atomic(skb_frag_page(frag)); | |
2214 | csum2 = csum_partial_copy_nocheck(vaddr + | |
2215 | frag->page_offset + | |
2216 | offset - start, to, | |
2217 | copy, 0); | |
2218 | kunmap_atomic(vaddr); | |
2219 | csum = csum_block_add(csum, csum2, pos); | |
2220 | if (!(len -= copy)) | |
2221 | return csum; | |
2222 | offset += copy; | |
2223 | to += copy; | |
2224 | pos += copy; | |
2225 | } | |
2226 | start = end; | |
2227 | } | |
2228 | ||
2229 | skb_walk_frags(skb, frag_iter) { | |
2230 | __wsum csum2; | |
2231 | int end; | |
2232 | ||
2233 | WARN_ON(start > offset + len); | |
2234 | ||
2235 | end = start + frag_iter->len; | |
2236 | if ((copy = end - offset) > 0) { | |
2237 | if (copy > len) | |
2238 | copy = len; | |
2239 | csum2 = skb_copy_and_csum_bits(frag_iter, | |
2240 | offset - start, | |
2241 | to, copy, 0); | |
2242 | csum = csum_block_add(csum, csum2, pos); | |
2243 | if ((len -= copy) == 0) | |
2244 | return csum; | |
2245 | offset += copy; | |
2246 | to += copy; | |
2247 | pos += copy; | |
2248 | } | |
2249 | start = end; | |
2250 | } | |
2251 | BUG_ON(len); | |
2252 | return csum; | |
2253 | } | |
2254 | EXPORT_SYMBOL(skb_copy_and_csum_bits); | |
2255 | ||
2256 | /** | |
2257 | * skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy() | |
2258 | * @from: source buffer | |
2259 | * | |
2260 | * Calculates the amount of linear headroom needed in the 'to' skb passed | |
2261 | * into skb_zerocopy(). | |
2262 | */ | |
2263 | unsigned int | |
2264 | skb_zerocopy_headlen(const struct sk_buff *from) | |
2265 | { | |
2266 | unsigned int hlen = 0; | |
2267 | ||
2268 | if (!from->head_frag || | |
2269 | skb_headlen(from) < L1_CACHE_BYTES || | |
2270 | skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) | |
2271 | hlen = skb_headlen(from); | |
2272 | ||
2273 | if (skb_has_frag_list(from)) | |
2274 | hlen = from->len; | |
2275 | ||
2276 | return hlen; | |
2277 | } | |
2278 | EXPORT_SYMBOL_GPL(skb_zerocopy_headlen); | |
2279 | ||
2280 | /** | |
2281 | * skb_zerocopy - Zero copy skb to skb | |
2282 | * @to: destination buffer | |
2283 | * @from: source buffer | |
2284 | * @len: number of bytes to copy from source buffer | |
2285 | * @hlen: size of linear headroom in destination buffer | |
2286 | * | |
2287 | * Copies up to `len` bytes from `from` to `to` by creating references | |
2288 | * to the frags in the source buffer. | |
2289 | * | |
2290 | * The `hlen` as calculated by skb_zerocopy_headlen() specifies the | |
2291 | * headroom in the `to` buffer. | |
2292 | * | |
2293 | * Return value: | |
2294 | * 0: everything is OK | |
2295 | * -ENOMEM: couldn't orphan frags of @from due to lack of memory | |
2296 | * -EFAULT: skb_copy_bits() found some problem with skb geometry | |
2297 | */ | |
2298 | int | |
2299 | skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) | |
2300 | { | |
2301 | int i, j = 0; | |
2302 | int plen = 0; /* length of skb->head fragment */ | |
2303 | int ret; | |
2304 | struct page *page; | |
2305 | unsigned int offset; | |
2306 | ||
2307 | BUG_ON(!from->head_frag && !hlen); | |
2308 | ||
2309 | /* dont bother with small payloads */ | |
2310 | if (len <= skb_tailroom(to)) | |
2311 | return skb_copy_bits(from, 0, skb_put(to, len), len); | |
2312 | ||
2313 | if (hlen) { | |
2314 | ret = skb_copy_bits(from, 0, skb_put(to, hlen), hlen); | |
2315 | if (unlikely(ret)) | |
2316 | return ret; | |
2317 | len -= hlen; | |
2318 | } else { | |
2319 | plen = min_t(int, skb_headlen(from), len); | |
2320 | if (plen) { | |
2321 | page = virt_to_head_page(from->head); | |
2322 | offset = from->data - (unsigned char *)page_address(page); | |
2323 | __skb_fill_page_desc(to, 0, page, offset, plen); | |
2324 | get_page(page); | |
2325 | j = 1; | |
2326 | len -= plen; | |
2327 | } | |
2328 | } | |
2329 | ||
2330 | to->truesize += len + plen; | |
2331 | to->len += len + plen; | |
2332 | to->data_len += len + plen; | |
2333 | ||
2334 | if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) { | |
2335 | skb_tx_error(from); | |
2336 | return -ENOMEM; | |
2337 | } | |
2338 | ||
2339 | for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { | |
2340 | if (!len) | |
2341 | break; | |
2342 | skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; | |
2343 | skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); | |
2344 | len -= skb_shinfo(to)->frags[j].size; | |
2345 | skb_frag_ref(to, j); | |
2346 | j++; | |
2347 | } | |
2348 | skb_shinfo(to)->nr_frags = j; | |
2349 | ||
2350 | return 0; | |
2351 | } | |
2352 | EXPORT_SYMBOL_GPL(skb_zerocopy); | |
2353 | ||
2354 | void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) | |
2355 | { | |
2356 | __wsum csum; | |
2357 | long csstart; | |
2358 | ||
2359 | if (skb->ip_summed == CHECKSUM_PARTIAL) | |
2360 | csstart = skb_checksum_start_offset(skb); | |
2361 | else | |
2362 | csstart = skb_headlen(skb); | |
2363 | ||
2364 | BUG_ON(csstart > skb_headlen(skb)); | |
2365 | ||
2366 | skb_copy_from_linear_data(skb, to, csstart); | |
2367 | ||
2368 | csum = 0; | |
2369 | if (csstart != skb->len) | |
2370 | csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, | |
2371 | skb->len - csstart, 0); | |
2372 | ||
2373 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | |
2374 | long csstuff = csstart + skb->csum_offset; | |
2375 | ||
2376 | *((__sum16 *)(to + csstuff)) = csum_fold(csum); | |
2377 | } | |
2378 | } | |
2379 | EXPORT_SYMBOL(skb_copy_and_csum_dev); | |
2380 | ||
2381 | /** | |
2382 | * skb_dequeue - remove from the head of the queue | |
2383 | * @list: list to dequeue from | |
2384 | * | |
2385 | * Remove the head of the list. The list lock is taken so the function | |
2386 | * may be used safely with other locking list functions. The head item is | |
2387 | * returned or %NULL if the list is empty. | |
2388 | */ | |
2389 | ||
2390 | struct sk_buff *skb_dequeue(struct sk_buff_head *list) | |
2391 | { | |
2392 | unsigned long flags; | |
2393 | struct sk_buff *result; | |
2394 | ||
2395 | spin_lock_irqsave(&list->lock, flags); | |
2396 | result = __skb_dequeue(list); | |
2397 | spin_unlock_irqrestore(&list->lock, flags); | |
2398 | return result; | |
2399 | } | |
2400 | EXPORT_SYMBOL(skb_dequeue); | |
2401 | ||
2402 | /** | |
2403 | * skb_dequeue_tail - remove from the tail of the queue | |
2404 | * @list: list to dequeue from | |
2405 | * | |
2406 | * Remove the tail of the list. The list lock is taken so the function | |
2407 | * may be used safely with other locking list functions. The tail item is | |
2408 | * returned or %NULL if the list is empty. | |
2409 | */ | |
2410 | struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) | |
2411 | { | |
2412 | unsigned long flags; | |
2413 | struct sk_buff *result; | |
2414 | ||
2415 | spin_lock_irqsave(&list->lock, flags); | |
2416 | result = __skb_dequeue_tail(list); | |
2417 | spin_unlock_irqrestore(&list->lock, flags); | |
2418 | return result; | |
2419 | } | |
2420 | EXPORT_SYMBOL(skb_dequeue_tail); | |
2421 | ||
2422 | /** | |
2423 | * skb_queue_purge - empty a list | |
2424 | * @list: list to empty | |
2425 | * | |
2426 | * Delete all buffers on an &sk_buff list. Each buffer is removed from | |
2427 | * the list and one reference dropped. This function takes the list | |
2428 | * lock and is atomic with respect to other list locking functions. | |
2429 | */ | |
2430 | void skb_queue_purge(struct sk_buff_head *list) | |
2431 | { | |
2432 | struct sk_buff *skb; | |
2433 | while ((skb = skb_dequeue(list)) != NULL) | |
2434 | kfree_skb(skb); | |
2435 | } | |
2436 | EXPORT_SYMBOL(skb_queue_purge); | |
2437 | ||
2438 | /** | |
2439 | * skb_queue_head - queue a buffer at the list head | |
2440 | * @list: list to use | |
2441 | * @newsk: buffer to queue | |
2442 | * | |
2443 | * Queue a buffer at the start of the list. This function takes the | |
2444 | * list lock and can be used safely with other locking &sk_buff functions | |
2445 | * safely. | |
2446 | * | |
2447 | * A buffer cannot be placed on two lists at the same time. | |
2448 | */ | |
2449 | void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) | |
2450 | { | |
2451 | unsigned long flags; | |
2452 | ||
2453 | spin_lock_irqsave(&list->lock, flags); | |
2454 | __skb_queue_head(list, newsk); | |
2455 | spin_unlock_irqrestore(&list->lock, flags); | |
2456 | } | |
2457 | EXPORT_SYMBOL(skb_queue_head); | |
2458 | ||
2459 | /** | |
2460 | * skb_queue_tail - queue a buffer at the list tail | |
2461 | * @list: list to use | |
2462 | * @newsk: buffer to queue | |
2463 | * | |
2464 | * Queue a buffer at the tail of the list. This function takes the | |
2465 | * list lock and can be used safely with other locking &sk_buff functions | |
2466 | * safely. | |
2467 | * | |
2468 | * A buffer cannot be placed on two lists at the same time. | |
2469 | */ | |
2470 | void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) | |
2471 | { | |
2472 | unsigned long flags; | |
2473 | ||
2474 | spin_lock_irqsave(&list->lock, flags); | |
2475 | __skb_queue_tail(list, newsk); | |
2476 | spin_unlock_irqrestore(&list->lock, flags); | |
2477 | } | |
2478 | EXPORT_SYMBOL(skb_queue_tail); | |
2479 | ||
2480 | /** | |
2481 | * skb_unlink - remove a buffer from a list | |
2482 | * @skb: buffer to remove | |
2483 | * @list: list to use | |
2484 | * | |
2485 | * Remove a packet from a list. The list locks are taken and this | |
2486 | * function is atomic with respect to other list locked calls | |
2487 | * | |
2488 | * You must know what list the SKB is on. | |
2489 | */ | |
2490 | void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) | |
2491 | { | |
2492 | unsigned long flags; | |
2493 | ||
2494 | spin_lock_irqsave(&list->lock, flags); | |
2495 | __skb_unlink(skb, list); | |
2496 | spin_unlock_irqrestore(&list->lock, flags); | |
2497 | } | |
2498 | EXPORT_SYMBOL(skb_unlink); | |
2499 | ||
2500 | /** | |
2501 | * skb_append - append a buffer | |
2502 | * @old: buffer to insert after | |
2503 | * @newsk: buffer to insert | |
2504 | * @list: list to use | |
2505 | * | |
2506 | * Place a packet after a given packet in a list. The list locks are taken | |
2507 | * and this function is atomic with respect to other list locked calls. | |
2508 | * A buffer cannot be placed on two lists at the same time. | |
2509 | */ | |
2510 | void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) | |
2511 | { | |
2512 | unsigned long flags; | |
2513 | ||
2514 | spin_lock_irqsave(&list->lock, flags); | |
2515 | __skb_queue_after(list, old, newsk); | |
2516 | spin_unlock_irqrestore(&list->lock, flags); | |
2517 | } | |
2518 | EXPORT_SYMBOL(skb_append); | |
2519 | ||
2520 | /** | |
2521 | * skb_insert - insert a buffer | |
2522 | * @old: buffer to insert before | |
2523 | * @newsk: buffer to insert | |
2524 | * @list: list to use | |
2525 | * | |
2526 | * Place a packet before a given packet in a list. The list locks are | |
2527 | * taken and this function is atomic with respect to other list locked | |
2528 | * calls. | |
2529 | * | |
2530 | * A buffer cannot be placed on two lists at the same time. | |
2531 | */ | |
2532 | void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) | |
2533 | { | |
2534 | unsigned long flags; | |
2535 | ||
2536 | spin_lock_irqsave(&list->lock, flags); | |
2537 | __skb_insert(newsk, old->prev, old, list); | |
2538 | spin_unlock_irqrestore(&list->lock, flags); | |
2539 | } | |
2540 | EXPORT_SYMBOL(skb_insert); | |
2541 | ||
2542 | static inline void skb_split_inside_header(struct sk_buff *skb, | |
2543 | struct sk_buff* skb1, | |
2544 | const u32 len, const int pos) | |
2545 | { | |
2546 | int i; | |
2547 | ||
2548 | skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len), | |
2549 | pos - len); | |
2550 | /* And move data appendix as is. */ | |
2551 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) | |
2552 | skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; | |
2553 | ||
2554 | skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; | |
2555 | skb_shinfo(skb)->nr_frags = 0; | |
2556 | skb1->data_len = skb->data_len; | |
2557 | skb1->len += skb1->data_len; | |
2558 | skb->data_len = 0; | |
2559 | skb->len = len; | |
2560 | skb_set_tail_pointer(skb, len); | |
2561 | } | |
2562 | ||
2563 | static inline void skb_split_no_header(struct sk_buff *skb, | |
2564 | struct sk_buff* skb1, | |
2565 | const u32 len, int pos) | |
2566 | { | |
2567 | int i, k = 0; | |
2568 | const int nfrags = skb_shinfo(skb)->nr_frags; | |
2569 | ||
2570 | skb_shinfo(skb)->nr_frags = 0; | |
2571 | skb1->len = skb1->data_len = skb->len - len; | |
2572 | skb->len = len; | |
2573 | skb->data_len = len - pos; | |
2574 | ||
2575 | for (i = 0; i < nfrags; i++) { | |
2576 | int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); | |
2577 | ||
2578 | if (pos + size > len) { | |
2579 | skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; | |
2580 | ||
2581 | if (pos < len) { | |
2582 | /* Split frag. | |
2583 | * We have two variants in this case: | |
2584 | * 1. Move all the frag to the second | |
2585 | * part, if it is possible. F.e. | |
2586 | * this approach is mandatory for TUX, | |
2587 | * where splitting is expensive. | |
2588 | * 2. Split is accurately. We make this. | |
2589 | */ | |
2590 | skb_frag_ref(skb, i); | |
2591 | skb_shinfo(skb1)->frags[0].page_offset += len - pos; | |
2592 | skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos); | |
2593 | skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos); | |
2594 | skb_shinfo(skb)->nr_frags++; | |
2595 | } | |
2596 | k++; | |
2597 | } else | |
2598 | skb_shinfo(skb)->nr_frags++; | |
2599 | pos += size; | |
2600 | } | |
2601 | skb_shinfo(skb1)->nr_frags = k; | |
2602 | } | |
2603 | ||
2604 | /** | |
2605 | * skb_split - Split fragmented skb to two parts at length len. | |
2606 | * @skb: the buffer to split | |
2607 | * @skb1: the buffer to receive the second part | |
2608 | * @len: new length for skb | |
2609 | */ | |
2610 | void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) | |
2611 | { | |
2612 | int pos = skb_headlen(skb); | |
2613 | ||
2614 | skb_shinfo(skb1)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; | |
2615 | if (len < pos) /* Split line is inside header. */ | |
2616 | skb_split_inside_header(skb, skb1, len, pos); | |
2617 | else /* Second chunk has no header, nothing to copy. */ | |
2618 | skb_split_no_header(skb, skb1, len, pos); | |
2619 | } | |
2620 | EXPORT_SYMBOL(skb_split); | |
2621 | ||
2622 | /* Shifting from/to a cloned skb is a no-go. | |
2623 | * | |
2624 | * Caller cannot keep skb_shinfo related pointers past calling here! | |
2625 | */ | |
2626 | static int skb_prepare_for_shift(struct sk_buff *skb) | |
2627 | { | |
2628 | return skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC); | |
2629 | } | |
2630 | ||
2631 | /** | |
2632 | * skb_shift - Shifts paged data partially from skb to another | |
2633 | * @tgt: buffer into which tail data gets added | |
2634 | * @skb: buffer from which the paged data comes from | |
2635 | * @shiftlen: shift up to this many bytes | |
2636 | * | |
2637 | * Attempts to shift up to shiftlen worth of bytes, which may be less than | |
2638 | * the length of the skb, from skb to tgt. Returns number bytes shifted. | |
2639 | * It's up to caller to free skb if everything was shifted. | |
2640 | * | |
2641 | * If @tgt runs out of frags, the whole operation is aborted. | |
2642 | * | |
2643 | * Skb cannot include anything else but paged data while tgt is allowed | |
2644 | * to have non-paged data as well. | |
2645 | * | |
2646 | * TODO: full sized shift could be optimized but that would need | |
2647 | * specialized skb free'er to handle frags without up-to-date nr_frags. | |
2648 | */ | |
2649 | int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) | |
2650 | { | |
2651 | int from, to, merge, todo; | |
2652 | struct skb_frag_struct *fragfrom, *fragto; | |
2653 | ||
2654 | BUG_ON(shiftlen > skb->len); | |
2655 | BUG_ON(skb_headlen(skb)); /* Would corrupt stream */ | |
2656 | ||
2657 | todo = shiftlen; | |
2658 | from = 0; | |
2659 | to = skb_shinfo(tgt)->nr_frags; | |
2660 | fragfrom = &skb_shinfo(skb)->frags[from]; | |
2661 | ||
2662 | /* Actual merge is delayed until the point when we know we can | |
2663 | * commit all, so that we don't have to undo partial changes | |
2664 | */ | |
2665 | if (!to || | |
2666 | !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom), | |
2667 | fragfrom->page_offset)) { | |
2668 | merge = -1; | |
2669 | } else { | |
2670 | merge = to - 1; | |
2671 | ||
2672 | todo -= skb_frag_size(fragfrom); | |
2673 | if (todo < 0) { | |
2674 | if (skb_prepare_for_shift(skb) || | |
2675 | skb_prepare_for_shift(tgt)) | |
2676 | return 0; | |
2677 | ||
2678 | /* All previous frag pointers might be stale! */ | |
2679 | fragfrom = &skb_shinfo(skb)->frags[from]; | |
2680 | fragto = &skb_shinfo(tgt)->frags[merge]; | |
2681 | ||
2682 | skb_frag_size_add(fragto, shiftlen); | |
2683 | skb_frag_size_sub(fragfrom, shiftlen); | |
2684 | fragfrom->page_offset += shiftlen; | |
2685 | ||
2686 | goto onlymerged; | |
2687 | } | |
2688 | ||
2689 | from++; | |
2690 | } | |
2691 | ||
2692 | /* Skip full, not-fitting skb to avoid expensive operations */ | |
2693 | if ((shiftlen == skb->len) && | |
2694 | (skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to)) | |
2695 | return 0; | |
2696 | ||
2697 | if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt)) | |
2698 | return 0; | |
2699 | ||
2700 | while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) { | |
2701 | if (to == MAX_SKB_FRAGS) | |
2702 | return 0; | |
2703 | ||
2704 | fragfrom = &skb_shinfo(skb)->frags[from]; | |
2705 | fragto = &skb_shinfo(tgt)->frags[to]; | |
2706 | ||
2707 | if (todo >= skb_frag_size(fragfrom)) { | |
2708 | *fragto = *fragfrom; | |
2709 | todo -= skb_frag_size(fragfrom); | |
2710 | from++; | |
2711 | to++; | |
2712 | ||
2713 | } else { | |
2714 | __skb_frag_ref(fragfrom); | |
2715 | fragto->page = fragfrom->page; | |
2716 | fragto->page_offset = fragfrom->page_offset; | |
2717 | skb_frag_size_set(fragto, todo); | |
2718 | ||
2719 | fragfrom->page_offset += todo; | |
2720 | skb_frag_size_sub(fragfrom, todo); | |
2721 | todo = 0; | |
2722 | ||
2723 | to++; | |
2724 | break; | |
2725 | } | |
2726 | } | |
2727 | ||
2728 | /* Ready to "commit" this state change to tgt */ | |
2729 | skb_shinfo(tgt)->nr_frags = to; | |
2730 | ||
2731 | if (merge >= 0) { | |
2732 | fragfrom = &skb_shinfo(skb)->frags[0]; | |
2733 | fragto = &skb_shinfo(tgt)->frags[merge]; | |
2734 | ||
2735 | skb_frag_size_add(fragto, skb_frag_size(fragfrom)); | |
2736 | __skb_frag_unref(fragfrom); | |
2737 | } | |
2738 | ||
2739 | /* Reposition in the original skb */ | |
2740 | to = 0; | |
2741 | while (from < skb_shinfo(skb)->nr_frags) | |
2742 | skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++]; | |
2743 | skb_shinfo(skb)->nr_frags = to; | |
2744 | ||
2745 | BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags); | |
2746 | ||
2747 | onlymerged: | |
2748 | /* Most likely the tgt won't ever need its checksum anymore, skb on | |
2749 | * the other hand might need it if it needs to be resent | |
2750 | */ | |
2751 | tgt->ip_summed = CHECKSUM_PARTIAL; | |
2752 | skb->ip_summed = CHECKSUM_PARTIAL; | |
2753 | ||
2754 | /* Yak, is it really working this way? Some helper please? */ | |
2755 | skb->len -= shiftlen; | |
2756 | skb->data_len -= shiftlen; | |
2757 | skb->truesize -= shiftlen; | |
2758 | tgt->len += shiftlen; | |
2759 | tgt->data_len += shiftlen; | |
2760 | tgt->truesize += shiftlen; | |
2761 | ||
2762 | return shiftlen; | |
2763 | } | |
2764 | ||
2765 | /** | |
2766 | * skb_prepare_seq_read - Prepare a sequential read of skb data | |
2767 | * @skb: the buffer to read | |
2768 | * @from: lower offset of data to be read | |
2769 | * @to: upper offset of data to be read | |
2770 | * @st: state variable | |
2771 | * | |
2772 | * Initializes the specified state variable. Must be called before | |
2773 | * invoking skb_seq_read() for the first time. | |
2774 | */ | |
2775 | void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, | |
2776 | unsigned int to, struct skb_seq_state *st) | |
2777 | { | |
2778 | st->lower_offset = from; | |
2779 | st->upper_offset = to; | |
2780 | st->root_skb = st->cur_skb = skb; | |
2781 | st->frag_idx = st->stepped_offset = 0; | |
2782 | st->frag_data = NULL; | |
2783 | } | |
2784 | EXPORT_SYMBOL(skb_prepare_seq_read); | |
2785 | ||
2786 | /** | |
2787 | * skb_seq_read - Sequentially read skb data | |
2788 | * @consumed: number of bytes consumed by the caller so far | |
2789 | * @data: destination pointer for data to be returned | |
2790 | * @st: state variable | |
2791 | * | |
2792 | * Reads a block of skb data at @consumed relative to the | |
2793 | * lower offset specified to skb_prepare_seq_read(). Assigns | |
2794 | * the head of the data block to @data and returns the length | |
2795 | * of the block or 0 if the end of the skb data or the upper | |
2796 | * offset has been reached. | |
2797 | * | |
2798 | * The caller is not required to consume all of the data | |
2799 | * returned, i.e. @consumed is typically set to the number | |
2800 | * of bytes already consumed and the next call to | |
2801 | * skb_seq_read() will return the remaining part of the block. | |
2802 | * | |
2803 | * Note 1: The size of each block of data returned can be arbitrary, | |
2804 | * this limitation is the cost for zerocopy sequential | |
2805 | * reads of potentially non linear data. | |
2806 | * | |
2807 | * Note 2: Fragment lists within fragments are not implemented | |
2808 | * at the moment, state->root_skb could be replaced with | |
2809 | * a stack for this purpose. | |
2810 | */ | |
2811 | unsigned int skb_seq_read(unsigned int consumed, const u8 **data, | |
2812 | struct skb_seq_state *st) | |
2813 | { | |
2814 | unsigned int block_limit, abs_offset = consumed + st->lower_offset; | |
2815 | skb_frag_t *frag; | |
2816 | ||
2817 | if (unlikely(abs_offset >= st->upper_offset)) { | |
2818 | if (st->frag_data) { | |
2819 | kunmap_atomic(st->frag_data); | |
2820 | st->frag_data = NULL; | |
2821 | } | |
2822 | return 0; | |
2823 | } | |
2824 | ||
2825 | next_skb: | |
2826 | block_limit = skb_headlen(st->cur_skb) + st->stepped_offset; | |
2827 | ||
2828 | if (abs_offset < block_limit && !st->frag_data) { | |
2829 | *data = st->cur_skb->data + (abs_offset - st->stepped_offset); | |
2830 | return block_limit - abs_offset; | |
2831 | } | |
2832 | ||
2833 | if (st->frag_idx == 0 && !st->frag_data) | |
2834 | st->stepped_offset += skb_headlen(st->cur_skb); | |
2835 | ||
2836 | while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { | |
2837 | frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; | |
2838 | block_limit = skb_frag_size(frag) + st->stepped_offset; | |
2839 | ||
2840 | if (abs_offset < block_limit) { | |
2841 | if (!st->frag_data) | |
2842 | st->frag_data = kmap_atomic(skb_frag_page(frag)); | |
2843 | ||
2844 | *data = (u8 *) st->frag_data + frag->page_offset + | |
2845 | (abs_offset - st->stepped_offset); | |
2846 | ||
2847 | return block_limit - abs_offset; | |
2848 | } | |
2849 | ||
2850 | if (st->frag_data) { | |
2851 | kunmap_atomic(st->frag_data); | |
2852 | st->frag_data = NULL; | |
2853 | } | |
2854 | ||
2855 | st->frag_idx++; | |
2856 | st->stepped_offset += skb_frag_size(frag); | |
2857 | } | |
2858 | ||
2859 | if (st->frag_data) { | |
2860 | kunmap_atomic(st->frag_data); | |
2861 | st->frag_data = NULL; | |
2862 | } | |
2863 | ||
2864 | if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) { | |
2865 | st->cur_skb = skb_shinfo(st->root_skb)->frag_list; | |
2866 | st->frag_idx = 0; | |
2867 | goto next_skb; | |
2868 | } else if (st->cur_skb->next) { | |
2869 | st->cur_skb = st->cur_skb->next; | |
2870 | st->frag_idx = 0; | |
2871 | goto next_skb; | |
2872 | } | |
2873 | ||
2874 | return 0; | |
2875 | } | |
2876 | EXPORT_SYMBOL(skb_seq_read); | |
2877 | ||
2878 | /** | |
2879 | * skb_abort_seq_read - Abort a sequential read of skb data | |
2880 | * @st: state variable | |
2881 | * | |
2882 | * Must be called if skb_seq_read() was not called until it | |
2883 | * returned 0. | |
2884 | */ | |
2885 | void skb_abort_seq_read(struct skb_seq_state *st) | |
2886 | { | |
2887 | if (st->frag_data) | |
2888 | kunmap_atomic(st->frag_data); | |
2889 | } | |
2890 | EXPORT_SYMBOL(skb_abort_seq_read); | |
2891 | ||
2892 | #define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) | |
2893 | ||
2894 | static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text, | |
2895 | struct ts_config *conf, | |
2896 | struct ts_state *state) | |
2897 | { | |
2898 | return skb_seq_read(offset, text, TS_SKB_CB(state)); | |
2899 | } | |
2900 | ||
2901 | static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) | |
2902 | { | |
2903 | skb_abort_seq_read(TS_SKB_CB(state)); | |
2904 | } | |
2905 | ||
2906 | /** | |
2907 | * skb_find_text - Find a text pattern in skb data | |
2908 | * @skb: the buffer to look in | |
2909 | * @from: search offset | |
2910 | * @to: search limit | |
2911 | * @config: textsearch configuration | |
2912 | * | |
2913 | * Finds a pattern in the skb data according to the specified | |
2914 | * textsearch configuration. Use textsearch_next() to retrieve | |
2915 | * subsequent occurrences of the pattern. Returns the offset | |
2916 | * to the first occurrence or UINT_MAX if no match was found. | |
2917 | */ | |
2918 | unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, | |
2919 | unsigned int to, struct ts_config *config) | |
2920 | { | |
2921 | struct ts_state state; | |
2922 | unsigned int ret; | |
2923 | ||
2924 | config->get_next_block = skb_ts_get_next_block; | |
2925 | config->finish = skb_ts_finish; | |
2926 | ||
2927 | skb_prepare_seq_read(skb, from, to, TS_SKB_CB(&state)); | |
2928 | ||
2929 | ret = textsearch_find(config, &state); | |
2930 | return (ret <= to - from ? ret : UINT_MAX); | |
2931 | } | |
2932 | EXPORT_SYMBOL(skb_find_text); | |
2933 | ||
2934 | /** | |
2935 | * skb_append_datato_frags - append the user data to a skb | |
2936 | * @sk: sock structure | |
2937 | * @skb: skb structure to be appended with user data. | |
2938 | * @getfrag: call back function to be used for getting the user data | |
2939 | * @from: pointer to user message iov | |
2940 | * @length: length of the iov message | |
2941 | * | |
2942 | * Description: This procedure append the user data in the fragment part | |
2943 | * of the skb if any page alloc fails user this procedure returns -ENOMEM | |
2944 | */ | |
2945 | int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, | |
2946 | int (*getfrag)(void *from, char *to, int offset, | |
2947 | int len, int odd, struct sk_buff *skb), | |
2948 | void *from, int length) | |
2949 | { | |
2950 | int frg_cnt = skb_shinfo(skb)->nr_frags; | |
2951 | int copy; | |
2952 | int offset = 0; | |
2953 | int ret; | |
2954 | struct page_frag *pfrag = ¤t->task_frag; | |
2955 | ||
2956 | do { | |
2957 | /* Return error if we don't have space for new frag */ | |
2958 | if (frg_cnt >= MAX_SKB_FRAGS) | |
2959 | return -EMSGSIZE; | |
2960 | ||
2961 | if (!sk_page_frag_refill(sk, pfrag)) | |
2962 | return -ENOMEM; | |
2963 | ||
2964 | /* copy the user data to page */ | |
2965 | copy = min_t(int, length, pfrag->size - pfrag->offset); | |
2966 | ||
2967 | ret = getfrag(from, page_address(pfrag->page) + pfrag->offset, | |
2968 | offset, copy, 0, skb); | |
2969 | if (ret < 0) | |
2970 | return -EFAULT; | |
2971 | ||
2972 | /* copy was successful so update the size parameters */ | |
2973 | skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset, | |
2974 | copy); | |
2975 | frg_cnt++; | |
2976 | pfrag->offset += copy; | |
2977 | get_page(pfrag->page); | |
2978 | ||
2979 | skb->truesize += copy; | |
2980 | atomic_add(copy, &sk->sk_wmem_alloc); | |
2981 | skb->len += copy; | |
2982 | skb->data_len += copy; | |
2983 | offset += copy; | |
2984 | length -= copy; | |
2985 | ||
2986 | } while (length > 0); | |
2987 | ||
2988 | return 0; | |
2989 | } | |
2990 | EXPORT_SYMBOL(skb_append_datato_frags); | |
2991 | ||
2992 | /** | |
2993 | * skb_pull_rcsum - pull skb and update receive checksum | |
2994 | * @skb: buffer to update | |
2995 | * @len: length of data pulled | |
2996 | * | |
2997 | * This function performs an skb_pull on the packet and updates | |
2998 | * the CHECKSUM_COMPLETE checksum. It should be used on | |
2999 | * receive path processing instead of skb_pull unless you know | |
3000 | * that the checksum difference is zero (e.g., a valid IP header) | |
3001 | * or you are setting ip_summed to CHECKSUM_NONE. | |
3002 | */ | |
3003 | unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) | |
3004 | { | |
3005 | BUG_ON(len > skb->len); | |
3006 | skb->len -= len; | |
3007 | BUG_ON(skb->len < skb->data_len); | |
3008 | skb_postpull_rcsum(skb, skb->data, len); | |
3009 | return skb->data += len; | |
3010 | } | |
3011 | EXPORT_SYMBOL_GPL(skb_pull_rcsum); | |
3012 | ||
3013 | /** | |
3014 | * skb_segment - Perform protocol segmentation on skb. | |
3015 | * @head_skb: buffer to segment | |
3016 | * @features: features for the output path (see dev->features) | |
3017 | * | |
3018 | * This function performs segmentation on the given skb. It returns | |
3019 | * a pointer to the first in a list of new skbs for the segments. | |
3020 | * In case of error it returns ERR_PTR(err). | |
3021 | */ | |
3022 | struct sk_buff *skb_segment(struct sk_buff *head_skb, | |
3023 | netdev_features_t features) | |
3024 | { | |
3025 | struct sk_buff *segs = NULL; | |
3026 | struct sk_buff *tail = NULL; | |
3027 | struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list; | |
3028 | skb_frag_t *frag = skb_shinfo(head_skb)->frags; | |
3029 | unsigned int mss = skb_shinfo(head_skb)->gso_size; | |
3030 | unsigned int doffset = head_skb->data - skb_mac_header(head_skb); | |
3031 | struct sk_buff *frag_skb = head_skb; | |
3032 | unsigned int offset = doffset; | |
3033 | unsigned int tnl_hlen = skb_tnl_header_len(head_skb); | |
3034 | unsigned int headroom; | |
3035 | unsigned int len; | |
3036 | __be16 proto; | |
3037 | bool csum; | |
3038 | int sg = !!(features & NETIF_F_SG); | |
3039 | int nfrags = skb_shinfo(head_skb)->nr_frags; | |
3040 | int err = -ENOMEM; | |
3041 | int i = 0; | |
3042 | int pos; | |
3043 | int dummy; | |
3044 | ||
3045 | __skb_push(head_skb, doffset); | |
3046 | proto = skb_network_protocol(head_skb, &dummy); | |
3047 | if (unlikely(!proto)) | |
3048 | return ERR_PTR(-EINVAL); | |
3049 | ||
3050 | csum = !head_skb->encap_hdr_csum && | |
3051 | !!can_checksum_protocol(features, proto); | |
3052 | ||
3053 | headroom = skb_headroom(head_skb); | |
3054 | pos = skb_headlen(head_skb); | |
3055 | ||
3056 | do { | |
3057 | struct sk_buff *nskb; | |
3058 | skb_frag_t *nskb_frag; | |
3059 | int hsize; | |
3060 | int size; | |
3061 | ||
3062 | len = head_skb->len - offset; | |
3063 | if (len > mss) | |
3064 | len = mss; | |
3065 | ||
3066 | hsize = skb_headlen(head_skb) - offset; | |
3067 | if (hsize < 0) | |
3068 | hsize = 0; | |
3069 | if (hsize > len || !sg) | |
3070 | hsize = len; | |
3071 | ||
3072 | if (!hsize && i >= nfrags && skb_headlen(list_skb) && | |
3073 | (skb_headlen(list_skb) == len || sg)) { | |
3074 | BUG_ON(skb_headlen(list_skb) > len); | |
3075 | ||
3076 | i = 0; | |
3077 | nfrags = skb_shinfo(list_skb)->nr_frags; | |
3078 | frag = skb_shinfo(list_skb)->frags; | |
3079 | frag_skb = list_skb; | |
3080 | pos += skb_headlen(list_skb); | |
3081 | ||
3082 | while (pos < offset + len) { | |
3083 | BUG_ON(i >= nfrags); | |
3084 | ||
3085 | size = skb_frag_size(frag); | |
3086 | if (pos + size > offset + len) | |
3087 | break; | |
3088 | ||
3089 | i++; | |
3090 | pos += size; | |
3091 | frag++; | |
3092 | } | |
3093 | ||
3094 | nskb = skb_clone(list_skb, GFP_ATOMIC); | |
3095 | list_skb = list_skb->next; | |
3096 | ||
3097 | if (unlikely(!nskb)) | |
3098 | goto err; | |
3099 | ||
3100 | if (unlikely(pskb_trim(nskb, len))) { | |
3101 | kfree_skb(nskb); | |
3102 | goto err; | |
3103 | } | |
3104 | ||
3105 | hsize = skb_end_offset(nskb); | |
3106 | if (skb_cow_head(nskb, doffset + headroom)) { | |
3107 | kfree_skb(nskb); | |
3108 | goto err; | |
3109 | } | |
3110 | ||
3111 | nskb->truesize += skb_end_offset(nskb) - hsize; | |
3112 | skb_release_head_state(nskb); | |
3113 | __skb_push(nskb, doffset); | |
3114 | } else { | |
3115 | nskb = __alloc_skb(hsize + doffset + headroom, | |
3116 | GFP_ATOMIC, skb_alloc_rx_flag(head_skb), | |
3117 | NUMA_NO_NODE); | |
3118 | ||
3119 | if (unlikely(!nskb)) | |
3120 | goto err; | |
3121 | ||
3122 | skb_reserve(nskb, headroom); | |
3123 | __skb_put(nskb, doffset); | |
3124 | } | |
3125 | ||
3126 | if (segs) | |
3127 | tail->next = nskb; | |
3128 | else | |
3129 | segs = nskb; | |
3130 | tail = nskb; | |
3131 | ||
3132 | __copy_skb_header(nskb, head_skb); | |
3133 | ||
3134 | skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom); | |
3135 | skb_reset_mac_len(nskb); | |
3136 | ||
3137 | skb_copy_from_linear_data_offset(head_skb, -tnl_hlen, | |
3138 | nskb->data - tnl_hlen, | |
3139 | doffset + tnl_hlen); | |
3140 | ||
3141 | if (nskb->len == len + doffset) | |
3142 | goto perform_csum_check; | |
3143 | ||
3144 | if (!sg && !nskb->remcsum_offload) { | |
3145 | nskb->ip_summed = CHECKSUM_NONE; | |
3146 | nskb->csum = skb_copy_and_csum_bits(head_skb, offset, | |
3147 | skb_put(nskb, len), | |
3148 | len, 0); | |
3149 | SKB_GSO_CB(nskb)->csum_start = | |
3150 | skb_headroom(nskb) + doffset; | |
3151 | continue; | |
3152 | } | |
3153 | ||
3154 | nskb_frag = skb_shinfo(nskb)->frags; | |
3155 | ||
3156 | skb_copy_from_linear_data_offset(head_skb, offset, | |
3157 | skb_put(nskb, hsize), hsize); | |
3158 | ||
3159 | skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags & | |
3160 | SKBTX_SHARED_FRAG; | |
3161 | ||
3162 | while (pos < offset + len) { | |
3163 | if (i >= nfrags) { | |
3164 | BUG_ON(skb_headlen(list_skb)); | |
3165 | ||
3166 | i = 0; | |
3167 | nfrags = skb_shinfo(list_skb)->nr_frags; | |
3168 | frag = skb_shinfo(list_skb)->frags; | |
3169 | frag_skb = list_skb; | |
3170 | ||
3171 | BUG_ON(!nfrags); | |
3172 | ||
3173 | list_skb = list_skb->next; | |
3174 | } | |
3175 | ||
3176 | if (unlikely(skb_shinfo(nskb)->nr_frags >= | |
3177 | MAX_SKB_FRAGS)) { | |
3178 | net_warn_ratelimited( | |
3179 | "skb_segment: too many frags: %u %u\n", | |
3180 | pos, mss); | |
3181 | goto err; | |
3182 | } | |
3183 | ||
3184 | if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC))) | |
3185 | goto err; | |
3186 | ||
3187 | *nskb_frag = *frag; | |
3188 | __skb_frag_ref(nskb_frag); | |
3189 | size = skb_frag_size(nskb_frag); | |
3190 | ||
3191 | if (pos < offset) { | |
3192 | nskb_frag->page_offset += offset - pos; | |
3193 | skb_frag_size_sub(nskb_frag, offset - pos); | |
3194 | } | |
3195 | ||
3196 | skb_shinfo(nskb)->nr_frags++; | |
3197 | ||
3198 | if (pos + size <= offset + len) { | |
3199 | i++; | |
3200 | frag++; | |
3201 | pos += size; | |
3202 | } else { | |
3203 | skb_frag_size_sub(nskb_frag, pos + size - (offset + len)); | |
3204 | goto skip_fraglist; | |
3205 | } | |
3206 | ||
3207 | nskb_frag++; | |
3208 | } | |
3209 | ||
3210 | skip_fraglist: | |
3211 | nskb->data_len = len - hsize; | |
3212 | nskb->len += nskb->data_len; | |
3213 | nskb->truesize += nskb->data_len; | |
3214 | ||
3215 | perform_csum_check: | |
3216 | if (!csum && !nskb->remcsum_offload) { | |
3217 | nskb->csum = skb_checksum(nskb, doffset, | |
3218 | nskb->len - doffset, 0); | |
3219 | nskb->ip_summed = CHECKSUM_NONE; | |
3220 | SKB_GSO_CB(nskb)->csum_start = | |
3221 | skb_headroom(nskb) + doffset; | |
3222 | } | |
3223 | } while ((offset += len) < head_skb->len); | |
3224 | ||
3225 | /* Some callers want to get the end of the list. | |
3226 | * Put it in segs->prev to avoid walking the list. | |
3227 | * (see validate_xmit_skb_list() for example) | |
3228 | */ | |
3229 | segs->prev = tail; | |
3230 | ||
3231 | /* Following permits correct backpressure, for protocols | |
3232 | * using skb_set_owner_w(). | |
3233 | * Idea is to tranfert ownership from head_skb to last segment. | |
3234 | */ | |
3235 | if (head_skb->destructor == sock_wfree) { | |
3236 | swap(tail->truesize, head_skb->truesize); | |
3237 | swap(tail->destructor, head_skb->destructor); | |
3238 | swap(tail->sk, head_skb->sk); | |
3239 | } | |
3240 | return segs; | |
3241 | ||
3242 | err: | |
3243 | kfree_skb_list(segs); | |
3244 | return ERR_PTR(err); | |
3245 | } | |
3246 | EXPORT_SYMBOL_GPL(skb_segment); | |
3247 | ||
3248 | int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) | |
3249 | { | |
3250 | struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb); | |
3251 | unsigned int offset = skb_gro_offset(skb); | |
3252 | unsigned int headlen = skb_headlen(skb); | |
3253 | unsigned int len = skb_gro_len(skb); | |
3254 | struct sk_buff *lp, *p = *head; | |
3255 | unsigned int delta_truesize; | |
3256 | ||
3257 | if (unlikely(p->len + len >= 65536)) | |
3258 | return -E2BIG; | |
3259 | ||
3260 | lp = NAPI_GRO_CB(p)->last; | |
3261 | pinfo = skb_shinfo(lp); | |
3262 | ||
3263 | if (headlen <= offset) { | |
3264 | skb_frag_t *frag; | |
3265 | skb_frag_t *frag2; | |
3266 | int i = skbinfo->nr_frags; | |
3267 | int nr_frags = pinfo->nr_frags + i; | |
3268 | ||
3269 | if (nr_frags > MAX_SKB_FRAGS) | |
3270 | goto merge; | |
3271 | ||
3272 | offset -= headlen; | |
3273 | pinfo->nr_frags = nr_frags; | |
3274 | skbinfo->nr_frags = 0; | |
3275 | ||
3276 | frag = pinfo->frags + nr_frags; | |
3277 | frag2 = skbinfo->frags + i; | |
3278 | do { | |
3279 | *--frag = *--frag2; | |
3280 | } while (--i); | |
3281 | ||
3282 | frag->page_offset += offset; | |
3283 | skb_frag_size_sub(frag, offset); | |
3284 | ||
3285 | /* all fragments truesize : remove (head size + sk_buff) */ | |
3286 | delta_truesize = skb->truesize - | |
3287 | SKB_TRUESIZE(skb_end_offset(skb)); | |
3288 | ||
3289 | skb->truesize -= skb->data_len; | |
3290 | skb->len -= skb->data_len; | |
3291 | skb->data_len = 0; | |
3292 | ||
3293 | NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE; | |
3294 | goto done; | |
3295 | } else if (skb->head_frag) { | |
3296 | int nr_frags = pinfo->nr_frags; | |
3297 | skb_frag_t *frag = pinfo->frags + nr_frags; | |
3298 | struct page *page = virt_to_head_page(skb->head); | |
3299 | unsigned int first_size = headlen - offset; | |
3300 | unsigned int first_offset; | |
3301 | ||
3302 | if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS) | |
3303 | goto merge; | |
3304 | ||
3305 | first_offset = skb->data - | |
3306 | (unsigned char *)page_address(page) + | |
3307 | offset; | |
3308 | ||
3309 | pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags; | |
3310 | ||
3311 | frag->page.p = page; | |
3312 | frag->page_offset = first_offset; | |
3313 | skb_frag_size_set(frag, first_size); | |
3314 | ||
3315 | memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags); | |
3316 | /* We dont need to clear skbinfo->nr_frags here */ | |
3317 | ||
3318 | delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff)); | |
3319 | NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; | |
3320 | goto done; | |
3321 | } | |
3322 | ||
3323 | merge: | |
3324 | delta_truesize = skb->truesize; | |
3325 | if (offset > headlen) { | |
3326 | unsigned int eat = offset - headlen; | |
3327 | ||
3328 | skbinfo->frags[0].page_offset += eat; | |
3329 | skb_frag_size_sub(&skbinfo->frags[0], eat); | |
3330 | skb->data_len -= eat; | |
3331 | skb->len -= eat; | |
3332 | offset = headlen; | |
3333 | } | |
3334 | ||
3335 | __skb_pull(skb, offset); | |
3336 | ||
3337 | if (NAPI_GRO_CB(p)->last == p) | |
3338 | skb_shinfo(p)->frag_list = skb; | |
3339 | else | |
3340 | NAPI_GRO_CB(p)->last->next = skb; | |
3341 | NAPI_GRO_CB(p)->last = skb; | |
3342 | __skb_header_release(skb); | |
3343 | lp = p; | |
3344 | ||
3345 | done: | |
3346 | NAPI_GRO_CB(p)->count++; | |
3347 | p->data_len += len; | |
3348 | p->truesize += delta_truesize; | |
3349 | p->len += len; | |
3350 | if (lp != p) { | |
3351 | lp->data_len += len; | |
3352 | lp->truesize += delta_truesize; | |
3353 | lp->len += len; | |
3354 | } | |
3355 | NAPI_GRO_CB(skb)->same_flow = 1; | |
3356 | return 0; | |
3357 | } | |
3358 | ||
3359 | void __init skb_init(void) | |
3360 | { | |
3361 | skbuff_head_cache = kmem_cache_create("skbuff_head_cache", | |
3362 | sizeof(struct sk_buff), | |
3363 | 0, | |
3364 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, | |
3365 | NULL); | |
3366 | skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", | |
3367 | sizeof(struct sk_buff_fclones), | |
3368 | 0, | |
3369 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, | |
3370 | NULL); | |
3371 | } | |
3372 | ||
3373 | /** | |
3374 | * skb_to_sgvec - Fill a scatter-gather list from a socket buffer | |
3375 | * @skb: Socket buffer containing the buffers to be mapped | |
3376 | * @sg: The scatter-gather list to map into | |
3377 | * @offset: The offset into the buffer's contents to start mapping | |
3378 | * @len: Length of buffer space to be mapped | |
3379 | * | |
3380 | * Fill the specified scatter-gather list with mappings/pointers into a | |
3381 | * region of the buffer space attached to a socket buffer. | |
3382 | */ | |
3383 | static int | |
3384 | __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) | |
3385 | { | |
3386 | int start = skb_headlen(skb); | |
3387 | int i, copy = start - offset; | |
3388 | struct sk_buff *frag_iter; | |
3389 | int elt = 0; | |
3390 | ||
3391 | if (copy > 0) { | |
3392 | if (copy > len) | |
3393 | copy = len; | |
3394 | sg_set_buf(sg, skb->data + offset, copy); | |
3395 | elt++; | |
3396 | if ((len -= copy) == 0) | |
3397 | return elt; | |
3398 | offset += copy; | |
3399 | } | |
3400 | ||
3401 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | |
3402 | int end; | |
3403 | ||
3404 | WARN_ON(start > offset + len); | |
3405 | ||
3406 | end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); | |
3407 | if ((copy = end - offset) > 0) { | |
3408 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; | |
3409 | ||
3410 | if (copy > len) | |
3411 | copy = len; | |
3412 | sg_set_page(&sg[elt], skb_frag_page(frag), copy, | |
3413 | frag->page_offset+offset-start); | |
3414 | elt++; | |
3415 | if (!(len -= copy)) | |
3416 | return elt; | |
3417 | offset += copy; | |
3418 | } | |
3419 | start = end; | |
3420 | } | |
3421 | ||
3422 | skb_walk_frags(skb, frag_iter) { | |
3423 | int end; | |
3424 | ||
3425 | WARN_ON(start > offset + len); | |
3426 | ||
3427 | end = start + frag_iter->len; | |
3428 | if ((copy = end - offset) > 0) { | |
3429 | if (copy > len) | |
3430 | copy = len; | |
3431 | elt += __skb_to_sgvec(frag_iter, sg+elt, offset - start, | |
3432 | copy); | |
3433 | if ((len -= copy) == 0) | |
3434 | return elt; | |
3435 | offset += copy; | |
3436 | } | |
3437 | start = end; | |
3438 | } | |
3439 | BUG_ON(len); | |
3440 | return elt; | |
3441 | } | |
3442 | ||
3443 | /* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given | |
3444 | * sglist without mark the sg which contain last skb data as the end. | |
3445 | * So the caller can mannipulate sg list as will when padding new data after | |
3446 | * the first call without calling sg_unmark_end to expend sg list. | |
3447 | * | |
3448 | * Scenario to use skb_to_sgvec_nomark: | |
3449 | * 1. sg_init_table | |
3450 | * 2. skb_to_sgvec_nomark(payload1) | |
3451 | * 3. skb_to_sgvec_nomark(payload2) | |
3452 | * | |
3453 | * This is equivalent to: | |
3454 | * 1. sg_init_table | |
3455 | * 2. skb_to_sgvec(payload1) | |
3456 | * 3. sg_unmark_end | |
3457 | * 4. skb_to_sgvec(payload2) | |
3458 | * | |
3459 | * When mapping mutilple payload conditionally, skb_to_sgvec_nomark | |
3460 | * is more preferable. | |
3461 | */ | |
3462 | int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, | |
3463 | int offset, int len) | |
3464 | { | |
3465 | return __skb_to_sgvec(skb, sg, offset, len); | |
3466 | } | |
3467 | EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark); | |
3468 | ||
3469 | int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) | |
3470 | { | |
3471 | int nsg = __skb_to_sgvec(skb, sg, offset, len); | |
3472 | ||
3473 | sg_mark_end(&sg[nsg - 1]); | |
3474 | ||
3475 | return nsg; | |
3476 | } | |
3477 | EXPORT_SYMBOL_GPL(skb_to_sgvec); | |
3478 | ||
3479 | /** | |
3480 | * skb_cow_data - Check that a socket buffer's data buffers are writable | |
3481 | * @skb: The socket buffer to check. | |
3482 | * @tailbits: Amount of trailing space to be added | |
3483 | * @trailer: Returned pointer to the skb where the @tailbits space begins | |
3484 | * | |
3485 | * Make sure that the data buffers attached to a socket buffer are | |
3486 | * writable. If they are not, private copies are made of the data buffers | |
3487 | * and the socket buffer is set to use these instead. | |
3488 | * | |
3489 | * If @tailbits is given, make sure that there is space to write @tailbits | |
3490 | * bytes of data beyond current end of socket buffer. @trailer will be | |
3491 | * set to point to the skb in which this space begins. | |
3492 | * | |
3493 | * The number of scatterlist elements required to completely map the | |
3494 | * COW'd and extended socket buffer will be returned. | |
3495 | */ | |
3496 | int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) | |
3497 | { | |
3498 | int copyflag; | |
3499 | int elt; | |
3500 | struct sk_buff *skb1, **skb_p; | |
3501 | ||
3502 | /* If skb is cloned or its head is paged, reallocate | |
3503 | * head pulling out all the pages (pages are considered not writable | |
3504 | * at the moment even if they are anonymous). | |
3505 | */ | |
3506 | if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) && | |
3507 | __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL) | |
3508 | return -ENOMEM; | |
3509 | ||
3510 | /* Easy case. Most of packets will go this way. */ | |
3511 | if (!skb_has_frag_list(skb)) { | |
3512 | /* A little of trouble, not enough of space for trailer. | |
3513 | * This should not happen, when stack is tuned to generate | |
3514 | * good frames. OK, on miss we reallocate and reserve even more | |
3515 | * space, 128 bytes is fair. */ | |
3516 | ||
3517 | if (skb_tailroom(skb) < tailbits && | |
3518 | pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC)) | |
3519 | return -ENOMEM; | |
3520 | ||
3521 | /* Voila! */ | |
3522 | *trailer = skb; | |
3523 | return 1; | |
3524 | } | |
3525 | ||
3526 | /* Misery. We are in troubles, going to mincer fragments... */ | |
3527 | ||
3528 | elt = 1; | |
3529 | skb_p = &skb_shinfo(skb)->frag_list; | |
3530 | copyflag = 0; | |
3531 | ||
3532 | while ((skb1 = *skb_p) != NULL) { | |
3533 | int ntail = 0; | |
3534 | ||
3535 | /* The fragment is partially pulled by someone, | |
3536 | * this can happen on input. Copy it and everything | |
3537 | * after it. */ | |
3538 | ||
3539 | if (skb_shared(skb1)) | |
3540 | copyflag = 1; | |
3541 | ||
3542 | /* If the skb is the last, worry about trailer. */ | |
3543 | ||
3544 | if (skb1->next == NULL && tailbits) { | |
3545 | if (skb_shinfo(skb1)->nr_frags || | |
3546 | skb_has_frag_list(skb1) || | |
3547 | skb_tailroom(skb1) < tailbits) | |
3548 | ntail = tailbits + 128; | |
3549 | } | |
3550 | ||
3551 | if (copyflag || | |
3552 | skb_cloned(skb1) || | |
3553 | ntail || | |
3554 | skb_shinfo(skb1)->nr_frags || | |
3555 | skb_has_frag_list(skb1)) { | |
3556 | struct sk_buff *skb2; | |
3557 | ||
3558 | /* Fuck, we are miserable poor guys... */ | |
3559 | if (ntail == 0) | |
3560 | skb2 = skb_copy(skb1, GFP_ATOMIC); | |
3561 | else | |
3562 | skb2 = skb_copy_expand(skb1, | |
3563 | skb_headroom(skb1), | |
3564 | ntail, | |
3565 | GFP_ATOMIC); | |
3566 | if (unlikely(skb2 == NULL)) | |
3567 | return -ENOMEM; | |
3568 | ||
3569 | if (skb1->sk) | |
3570 | skb_set_owner_w(skb2, skb1->sk); | |
3571 | ||
3572 | /* Looking around. Are we still alive? | |
3573 | * OK, link new skb, drop old one */ | |
3574 | ||
3575 | skb2->next = skb1->next; | |
3576 | *skb_p = skb2; | |
3577 | kfree_skb(skb1); | |
3578 | skb1 = skb2; | |
3579 | } | |
3580 | elt++; | |
3581 | *trailer = skb1; | |
3582 | skb_p = &skb1->next; | |
3583 | } | |
3584 | ||
3585 | return elt; | |
3586 | } | |
3587 | EXPORT_SYMBOL_GPL(skb_cow_data); | |
3588 | ||
3589 | static void sock_rmem_free(struct sk_buff *skb) | |
3590 | { | |
3591 | struct sock *sk = skb->sk; | |
3592 | ||
3593 | atomic_sub(skb->truesize, &sk->sk_rmem_alloc); | |
3594 | } | |
3595 | ||
3596 | /* | |
3597 | * Note: We dont mem charge error packets (no sk_forward_alloc changes) | |
3598 | */ | |
3599 | int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) | |
3600 | { | |
3601 | if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= | |
3602 | (unsigned int)sk->sk_rcvbuf) | |
3603 | return -ENOMEM; | |
3604 | ||
3605 | skb_orphan(skb); | |
3606 | skb->sk = sk; | |
3607 | skb->destructor = sock_rmem_free; | |
3608 | atomic_add(skb->truesize, &sk->sk_rmem_alloc); | |
3609 | ||
3610 | /* before exiting rcu section, make sure dst is refcounted */ | |
3611 | skb_dst_force(skb); | |
3612 | ||
3613 | skb_queue_tail(&sk->sk_error_queue, skb); | |
3614 | if (!sock_flag(sk, SOCK_DEAD)) | |
3615 | sk->sk_data_ready(sk); | |
3616 | return 0; | |
3617 | } | |
3618 | EXPORT_SYMBOL(sock_queue_err_skb); | |
3619 | ||
3620 | struct sk_buff *sock_dequeue_err_skb(struct sock *sk) | |
3621 | { | |
3622 | struct sk_buff_head *q = &sk->sk_error_queue; | |
3623 | struct sk_buff *skb, *skb_next; | |
3624 | unsigned long flags; | |
3625 | int err = 0; | |
3626 | ||
3627 | spin_lock_irqsave(&q->lock, flags); | |
3628 | skb = __skb_dequeue(q); | |
3629 | if (skb && (skb_next = skb_peek(q))) | |
3630 | err = SKB_EXT_ERR(skb_next)->ee.ee_errno; | |
3631 | spin_unlock_irqrestore(&q->lock, flags); | |
3632 | ||
3633 | sk->sk_err = err; | |
3634 | if (err) | |
3635 | sk->sk_error_report(sk); | |
3636 | ||
3637 | return skb; | |
3638 | } | |
3639 | EXPORT_SYMBOL(sock_dequeue_err_skb); | |
3640 | ||
3641 | /** | |
3642 | * skb_clone_sk - create clone of skb, and take reference to socket | |
3643 | * @skb: the skb to clone | |
3644 | * | |
3645 | * This function creates a clone of a buffer that holds a reference on | |
3646 | * sk_refcnt. Buffers created via this function are meant to be | |
3647 | * returned using sock_queue_err_skb, or free via kfree_skb. | |
3648 | * | |
3649 | * When passing buffers allocated with this function to sock_queue_err_skb | |
3650 | * it is necessary to wrap the call with sock_hold/sock_put in order to | |
3651 | * prevent the socket from being released prior to being enqueued on | |
3652 | * the sk_error_queue. | |
3653 | */ | |
3654 | struct sk_buff *skb_clone_sk(struct sk_buff *skb) | |
3655 | { | |
3656 | struct sock *sk = skb->sk; | |
3657 | struct sk_buff *clone; | |
3658 | ||
3659 | if (!sk || !atomic_inc_not_zero(&sk->sk_refcnt)) | |
3660 | return NULL; | |
3661 | ||
3662 | clone = skb_clone(skb, GFP_ATOMIC); | |
3663 | if (!clone) { | |
3664 | sock_put(sk); | |
3665 | return NULL; | |
3666 | } | |
3667 | ||
3668 | clone->sk = sk; | |
3669 | clone->destructor = sock_efree; | |
3670 | ||
3671 | return clone; | |
3672 | } | |
3673 | EXPORT_SYMBOL(skb_clone_sk); | |
3674 | ||
3675 | static void __skb_complete_tx_timestamp(struct sk_buff *skb, | |
3676 | struct sock *sk, | |
3677 | int tstype) | |
3678 | { | |
3679 | struct sock_exterr_skb *serr; | |
3680 | int err; | |
3681 | ||
3682 | serr = SKB_EXT_ERR(skb); | |
3683 | memset(serr, 0, sizeof(*serr)); | |
3684 | serr->ee.ee_errno = ENOMSG; | |
3685 | serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; | |
3686 | serr->ee.ee_info = tstype; | |
3687 | if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { | |
3688 | serr->ee.ee_data = skb_shinfo(skb)->tskey; | |
3689 | if (sk->sk_protocol == IPPROTO_TCP) | |
3690 | serr->ee.ee_data -= sk->sk_tskey; | |
3691 | } | |
3692 | ||
3693 | err = sock_queue_err_skb(sk, skb); | |
3694 | ||
3695 | if (err) | |
3696 | kfree_skb(skb); | |
3697 | } | |
3698 | ||
3699 | static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly) | |
3700 | { | |
3701 | bool ret; | |
3702 | ||
3703 | if (likely(sysctl_tstamp_allow_data || tsonly)) | |
3704 | return true; | |
3705 | ||
3706 | read_lock_bh(&sk->sk_callback_lock); | |
3707 | ret = sk->sk_socket && sk->sk_socket->file && | |
3708 | file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW); | |
3709 | read_unlock_bh(&sk->sk_callback_lock); | |
3710 | return ret; | |
3711 | } | |
3712 | ||
3713 | void skb_complete_tx_timestamp(struct sk_buff *skb, | |
3714 | struct skb_shared_hwtstamps *hwtstamps) | |
3715 | { | |
3716 | struct sock *sk = skb->sk; | |
3717 | ||
3718 | if (!skb_may_tx_timestamp(sk, false)) | |
3719 | return; | |
3720 | ||
3721 | /* take a reference to prevent skb_orphan() from freeing the socket */ | |
3722 | sock_hold(sk); | |
3723 | ||
3724 | *skb_hwtstamps(skb) = *hwtstamps; | |
3725 | __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); | |
3726 | ||
3727 | sock_put(sk); | |
3728 | } | |
3729 | EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); | |
3730 | ||
3731 | void __skb_tstamp_tx(struct sk_buff *orig_skb, | |
3732 | struct skb_shared_hwtstamps *hwtstamps, | |
3733 | struct sock *sk, int tstype) | |
3734 | { | |
3735 | struct sk_buff *skb; | |
3736 | bool tsonly; | |
3737 | ||
3738 | if (!sk) | |
3739 | return; | |
3740 | ||
3741 | tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY; | |
3742 | if (!skb_may_tx_timestamp(sk, tsonly)) | |
3743 | return; | |
3744 | ||
3745 | if (tsonly) | |
3746 | skb = alloc_skb(0, GFP_ATOMIC); | |
3747 | else | |
3748 | skb = skb_clone(orig_skb, GFP_ATOMIC); | |
3749 | if (!skb) | |
3750 | return; | |
3751 | ||
3752 | if (tsonly) { | |
3753 | skb_shinfo(skb)->tx_flags = skb_shinfo(orig_skb)->tx_flags; | |
3754 | skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey; | |
3755 | } | |
3756 | ||
3757 | if (hwtstamps) | |
3758 | *skb_hwtstamps(skb) = *hwtstamps; | |
3759 | else | |
3760 | skb->tstamp = ktime_get_real(); | |
3761 | ||
3762 | __skb_complete_tx_timestamp(skb, sk, tstype); | |
3763 | } | |
3764 | EXPORT_SYMBOL_GPL(__skb_tstamp_tx); | |
3765 | ||
3766 | void skb_tstamp_tx(struct sk_buff *orig_skb, | |
3767 | struct skb_shared_hwtstamps *hwtstamps) | |
3768 | { | |
3769 | return __skb_tstamp_tx(orig_skb, hwtstamps, orig_skb->sk, | |
3770 | SCM_TSTAMP_SND); | |
3771 | } | |
3772 | EXPORT_SYMBOL_GPL(skb_tstamp_tx); | |
3773 | ||
3774 | void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) | |
3775 | { | |
3776 | struct sock *sk = skb->sk; | |
3777 | struct sock_exterr_skb *serr; | |
3778 | int err; | |
3779 | ||
3780 | skb->wifi_acked_valid = 1; | |
3781 | skb->wifi_acked = acked; | |
3782 | ||
3783 | serr = SKB_EXT_ERR(skb); | |
3784 | memset(serr, 0, sizeof(*serr)); | |
3785 | serr->ee.ee_errno = ENOMSG; | |
3786 | serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS; | |
3787 | ||
3788 | /* take a reference to prevent skb_orphan() from freeing the socket */ | |
3789 | sock_hold(sk); | |
3790 | ||
3791 | err = sock_queue_err_skb(sk, skb); | |
3792 | if (err) | |
3793 | kfree_skb(skb); | |
3794 | ||
3795 | sock_put(sk); | |
3796 | } | |
3797 | EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); | |
3798 | ||
3799 | /** | |
3800 | * skb_partial_csum_set - set up and verify partial csum values for packet | |
3801 | * @skb: the skb to set | |
3802 | * @start: the number of bytes after skb->data to start checksumming. | |
3803 | * @off: the offset from start to place the checksum. | |
3804 | * | |
3805 | * For untrusted partially-checksummed packets, we need to make sure the values | |
3806 | * for skb->csum_start and skb->csum_offset are valid so we don't oops. | |
3807 | * | |
3808 | * This function checks and sets those values and skb->ip_summed: if this | |
3809 | * returns false you should drop the packet. | |
3810 | */ | |
3811 | bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) | |
3812 | { | |
3813 | if (unlikely(start > skb_headlen(skb)) || | |
3814 | unlikely((int)start + off > skb_headlen(skb) - 2)) { | |
3815 | net_warn_ratelimited("bad partial csum: csum=%u/%u len=%u\n", | |
3816 | start, off, skb_headlen(skb)); | |
3817 | return false; | |
3818 | } | |
3819 | skb->ip_summed = CHECKSUM_PARTIAL; | |
3820 | skb->csum_start = skb_headroom(skb) + start; | |
3821 | skb->csum_offset = off; | |
3822 | skb_set_transport_header(skb, start); | |
3823 | return true; | |
3824 | } | |
3825 | EXPORT_SYMBOL_GPL(skb_partial_csum_set); | |
3826 | ||
3827 | static int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len, | |
3828 | unsigned int max) | |
3829 | { | |
3830 | if (skb_headlen(skb) >= len) | |
3831 | return 0; | |
3832 | ||
3833 | /* If we need to pullup then pullup to the max, so we | |
3834 | * won't need to do it again. | |
3835 | */ | |
3836 | if (max > skb->len) | |
3837 | max = skb->len; | |
3838 | ||
3839 | if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL) | |
3840 | return -ENOMEM; | |
3841 | ||
3842 | if (skb_headlen(skb) < len) | |
3843 | return -EPROTO; | |
3844 | ||
3845 | return 0; | |
3846 | } | |
3847 | ||
3848 | #define MAX_TCP_HDR_LEN (15 * 4) | |
3849 | ||
3850 | static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb, | |
3851 | typeof(IPPROTO_IP) proto, | |
3852 | unsigned int off) | |
3853 | { | |
3854 | switch (proto) { | |
3855 | int err; | |
3856 | ||
3857 | case IPPROTO_TCP: | |
3858 | err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr), | |
3859 | off + MAX_TCP_HDR_LEN); | |
3860 | if (!err && !skb_partial_csum_set(skb, off, | |
3861 | offsetof(struct tcphdr, | |
3862 | check))) | |
3863 | err = -EPROTO; | |
3864 | return err ? ERR_PTR(err) : &tcp_hdr(skb)->check; | |
3865 | ||
3866 | case IPPROTO_UDP: | |
3867 | err = skb_maybe_pull_tail(skb, off + sizeof(struct udphdr), | |
3868 | off + sizeof(struct udphdr)); | |
3869 | if (!err && !skb_partial_csum_set(skb, off, | |
3870 | offsetof(struct udphdr, | |
3871 | check))) | |
3872 | err = -EPROTO; | |
3873 | return err ? ERR_PTR(err) : &udp_hdr(skb)->check; | |
3874 | } | |
3875 | ||
3876 | return ERR_PTR(-EPROTO); | |
3877 | } | |
3878 | ||
3879 | /* This value should be large enough to cover a tagged ethernet header plus | |
3880 | * maximally sized IP and TCP or UDP headers. | |
3881 | */ | |
3882 | #define MAX_IP_HDR_LEN 128 | |
3883 | ||
3884 | static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate) | |
3885 | { | |
3886 | unsigned int off; | |
3887 | bool fragment; | |
3888 | __sum16 *csum; | |
3889 | int err; | |
3890 | ||
3891 | fragment = false; | |
3892 | ||
3893 | err = skb_maybe_pull_tail(skb, | |
3894 | sizeof(struct iphdr), | |
3895 | MAX_IP_HDR_LEN); | |
3896 | if (err < 0) | |
3897 | goto out; | |
3898 | ||
3899 | if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF)) | |
3900 | fragment = true; | |
3901 | ||
3902 | off = ip_hdrlen(skb); | |
3903 | ||
3904 | err = -EPROTO; | |
3905 | ||
3906 | if (fragment) | |
3907 | goto out; | |
3908 | ||
3909 | csum = skb_checksum_setup_ip(skb, ip_hdr(skb)->protocol, off); | |
3910 | if (IS_ERR(csum)) | |
3911 | return PTR_ERR(csum); | |
3912 | ||
3913 | if (recalculate) | |
3914 | *csum = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, | |
3915 | ip_hdr(skb)->daddr, | |
3916 | skb->len - off, | |
3917 | ip_hdr(skb)->protocol, 0); | |
3918 | err = 0; | |
3919 | ||
3920 | out: | |
3921 | return err; | |
3922 | } | |
3923 | ||
3924 | /* This value should be large enough to cover a tagged ethernet header plus | |
3925 | * an IPv6 header, all options, and a maximal TCP or UDP header. | |
3926 | */ | |
3927 | #define MAX_IPV6_HDR_LEN 256 | |
3928 | ||
3929 | #define OPT_HDR(type, skb, off) \ | |
3930 | (type *)(skb_network_header(skb) + (off)) | |
3931 | ||
3932 | static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate) | |
3933 | { | |
3934 | int err; | |
3935 | u8 nexthdr; | |
3936 | unsigned int off; | |
3937 | unsigned int len; | |
3938 | bool fragment; | |
3939 | bool done; | |
3940 | __sum16 *csum; | |
3941 | ||
3942 | fragment = false; | |
3943 | done = false; | |
3944 | ||
3945 | off = sizeof(struct ipv6hdr); | |
3946 | ||
3947 | err = skb_maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN); | |
3948 | if (err < 0) | |
3949 | goto out; | |
3950 | ||
3951 | nexthdr = ipv6_hdr(skb)->nexthdr; | |
3952 | ||
3953 | len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); | |
3954 | while (off <= len && !done) { | |
3955 | switch (nexthdr) { | |
3956 | case IPPROTO_DSTOPTS: | |
3957 | case IPPROTO_HOPOPTS: | |
3958 | case IPPROTO_ROUTING: { | |
3959 | struct ipv6_opt_hdr *hp; | |
3960 | ||
3961 | err = skb_maybe_pull_tail(skb, | |
3962 | off + | |
3963 | sizeof(struct ipv6_opt_hdr), | |
3964 | MAX_IPV6_HDR_LEN); | |
3965 | if (err < 0) | |
3966 | goto out; | |
3967 | ||
3968 | hp = OPT_HDR(struct ipv6_opt_hdr, skb, off); | |
3969 | nexthdr = hp->nexthdr; | |
3970 | off += ipv6_optlen(hp); | |
3971 | break; | |
3972 | } | |
3973 | case IPPROTO_AH: { | |
3974 | struct ip_auth_hdr *hp; | |
3975 | ||
3976 | err = skb_maybe_pull_tail(skb, | |
3977 | off + | |
3978 | sizeof(struct ip_auth_hdr), | |
3979 | MAX_IPV6_HDR_LEN); | |
3980 | if (err < 0) | |
3981 | goto out; | |
3982 | ||
3983 | hp = OPT_HDR(struct ip_auth_hdr, skb, off); | |
3984 | nexthdr = hp->nexthdr; | |
3985 | off += ipv6_authlen(hp); | |
3986 | break; | |
3987 | } | |
3988 | case IPPROTO_FRAGMENT: { | |
3989 | struct frag_hdr *hp; | |
3990 | ||
3991 | err = skb_maybe_pull_tail(skb, | |
3992 | off + | |
3993 | sizeof(struct frag_hdr), | |
3994 | MAX_IPV6_HDR_LEN); | |
3995 | if (err < 0) | |
3996 | goto out; | |
3997 | ||
3998 | hp = OPT_HDR(struct frag_hdr, skb, off); | |
3999 | ||
4000 | if (hp->frag_off & htons(IP6_OFFSET | IP6_MF)) | |
4001 | fragment = true; | |
4002 | ||
4003 | nexthdr = hp->nexthdr; | |
4004 | off += sizeof(struct frag_hdr); | |
4005 | break; | |
4006 | } | |
4007 | default: | |
4008 | done = true; | |
4009 | break; | |
4010 | } | |
4011 | } | |
4012 | ||
4013 | err = -EPROTO; | |
4014 | ||
4015 | if (!done || fragment) | |
4016 | goto out; | |
4017 | ||
4018 | csum = skb_checksum_setup_ip(skb, nexthdr, off); | |
4019 | if (IS_ERR(csum)) | |
4020 | return PTR_ERR(csum); | |
4021 | ||
4022 | if (recalculate) | |
4023 | *csum = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, | |
4024 | &ipv6_hdr(skb)->daddr, | |
4025 | skb->len - off, nexthdr, 0); | |
4026 | err = 0; | |
4027 | ||
4028 | out: | |
4029 | return err; | |
4030 | } | |
4031 | ||
4032 | /** | |
4033 | * skb_checksum_setup - set up partial checksum offset | |
4034 | * @skb: the skb to set up | |
4035 | * @recalculate: if true the pseudo-header checksum will be recalculated | |
4036 | */ | |
4037 | int skb_checksum_setup(struct sk_buff *skb, bool recalculate) | |
4038 | { | |
4039 | int err; | |
4040 | ||
4041 | switch (skb->protocol) { | |
4042 | case htons(ETH_P_IP): | |
4043 | err = skb_checksum_setup_ipv4(skb, recalculate); | |
4044 | break; | |
4045 | ||
4046 | case htons(ETH_P_IPV6): | |
4047 | err = skb_checksum_setup_ipv6(skb, recalculate); | |
4048 | break; | |
4049 | ||
4050 | default: | |
4051 | err = -EPROTO; | |
4052 | break; | |
4053 | } | |
4054 | ||
4055 | return err; | |
4056 | } | |
4057 | EXPORT_SYMBOL(skb_checksum_setup); | |
4058 | ||
4059 | /** | |
4060 | * skb_checksum_maybe_trim - maybe trims the given skb | |
4061 | * @skb: the skb to check | |
4062 | * @transport_len: the data length beyond the network header | |
4063 | * | |
4064 | * Checks whether the given skb has data beyond the given transport length. | |
4065 | * If so, returns a cloned skb trimmed to this transport length. | |
4066 | * Otherwise returns the provided skb. Returns NULL in error cases | |
4067 | * (e.g. transport_len exceeds skb length or out-of-memory). | |
4068 | * | |
4069 | * Caller needs to set the skb transport header and release the returned skb. | |
4070 | * Provided skb is consumed. | |
4071 | */ | |
4072 | static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb, | |
4073 | unsigned int transport_len) | |
4074 | { | |
4075 | struct sk_buff *skb_chk; | |
4076 | unsigned int len = skb_transport_offset(skb) + transport_len; | |
4077 | int ret; | |
4078 | ||
4079 | if (skb->len < len) { | |
4080 | kfree_skb(skb); | |
4081 | return NULL; | |
4082 | } else if (skb->len == len) { | |
4083 | return skb; | |
4084 | } | |
4085 | ||
4086 | skb_chk = skb_clone(skb, GFP_ATOMIC); | |
4087 | kfree_skb(skb); | |
4088 | ||
4089 | if (!skb_chk) | |
4090 | return NULL; | |
4091 | ||
4092 | ret = pskb_trim_rcsum(skb_chk, len); | |
4093 | if (ret) { | |
4094 | kfree_skb(skb_chk); | |
4095 | return NULL; | |
4096 | } | |
4097 | ||
4098 | return skb_chk; | |
4099 | } | |
4100 | ||
4101 | /** | |
4102 | * skb_checksum_trimmed - validate checksum of an skb | |
4103 | * @skb: the skb to check | |
4104 | * @transport_len: the data length beyond the network header | |
4105 | * @skb_chkf: checksum function to use | |
4106 | * | |
4107 | * Applies the given checksum function skb_chkf to the provided skb. | |
4108 | * Returns a checked and maybe trimmed skb. Returns NULL on error. | |
4109 | * | |
4110 | * If the skb has data beyond the given transport length, then a | |
4111 | * trimmed & cloned skb is checked and returned. | |
4112 | * | |
4113 | * Caller needs to set the skb transport header and release the returned skb. | |
4114 | * Provided skb is consumed. | |
4115 | */ | |
4116 | struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, | |
4117 | unsigned int transport_len, | |
4118 | __sum16(*skb_chkf)(struct sk_buff *skb)) | |
4119 | { | |
4120 | struct sk_buff *skb_chk; | |
4121 | unsigned int offset = skb_transport_offset(skb); | |
4122 | __sum16 ret; | |
4123 | ||
4124 | skb_chk = skb_checksum_maybe_trim(skb, transport_len); | |
4125 | if (!skb_chk) | |
4126 | return NULL; | |
4127 | ||
4128 | if (!pskb_may_pull(skb_chk, offset)) { | |
4129 | kfree_skb(skb_chk); | |
4130 | return NULL; | |
4131 | } | |
4132 | ||
4133 | __skb_pull(skb_chk, offset); | |
4134 | ret = skb_chkf(skb_chk); | |
4135 | __skb_push(skb_chk, offset); | |
4136 | ||
4137 | if (ret) { | |
4138 | kfree_skb(skb_chk); | |
4139 | return NULL; | |
4140 | } | |
4141 | ||
4142 | return skb_chk; | |
4143 | } | |
4144 | EXPORT_SYMBOL(skb_checksum_trimmed); | |
4145 | ||
4146 | void __skb_warn_lro_forwarding(const struct sk_buff *skb) | |
4147 | { | |
4148 | net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n", | |
4149 | skb->dev->name); | |
4150 | } | |
4151 | EXPORT_SYMBOL(__skb_warn_lro_forwarding); | |
4152 | ||
4153 | void kfree_skb_partial(struct sk_buff *skb, bool head_stolen) | |
4154 | { | |
4155 | if (head_stolen) { | |
4156 | skb_release_head_state(skb); | |
4157 | kmem_cache_free(skbuff_head_cache, skb); | |
4158 | } else { | |
4159 | __kfree_skb(skb); | |
4160 | } | |
4161 | } | |
4162 | EXPORT_SYMBOL(kfree_skb_partial); | |
4163 | ||
4164 | /** | |
4165 | * skb_try_coalesce - try to merge skb to prior one | |
4166 | * @to: prior buffer | |
4167 | * @from: buffer to add | |
4168 | * @fragstolen: pointer to boolean | |
4169 | * @delta_truesize: how much more was allocated than was requested | |
4170 | */ | |
4171 | bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, | |
4172 | bool *fragstolen, int *delta_truesize) | |
4173 | { | |
4174 | int i, delta, len = from->len; | |
4175 | ||
4176 | *fragstolen = false; | |
4177 | ||
4178 | if (skb_cloned(to)) | |
4179 | return false; | |
4180 | ||
4181 | if (len <= skb_tailroom(to)) { | |
4182 | if (len) | |
4183 | BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); | |
4184 | *delta_truesize = 0; | |
4185 | return true; | |
4186 | } | |
4187 | ||
4188 | if (skb_has_frag_list(to) || skb_has_frag_list(from)) | |
4189 | return false; | |
4190 | ||
4191 | if (skb_headlen(from) != 0) { | |
4192 | struct page *page; | |
4193 | unsigned int offset; | |
4194 | ||
4195 | if (skb_shinfo(to)->nr_frags + | |
4196 | skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) | |
4197 | return false; | |
4198 | ||
4199 | if (skb_head_is_locked(from)) | |
4200 | return false; | |
4201 | ||
4202 | delta = from->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff)); | |
4203 | ||
4204 | page = virt_to_head_page(from->head); | |
4205 | offset = from->data - (unsigned char *)page_address(page); | |
4206 | ||
4207 | skb_fill_page_desc(to, skb_shinfo(to)->nr_frags, | |
4208 | page, offset, skb_headlen(from)); | |
4209 | *fragstolen = true; | |
4210 | } else { | |
4211 | if (skb_shinfo(to)->nr_frags + | |
4212 | skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS) | |
4213 | return false; | |
4214 | ||
4215 | delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from)); | |
4216 | } | |
4217 | ||
4218 | WARN_ON_ONCE(delta < len); | |
4219 | ||
4220 | memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags, | |
4221 | skb_shinfo(from)->frags, | |
4222 | skb_shinfo(from)->nr_frags * sizeof(skb_frag_t)); | |
4223 | skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags; | |
4224 | ||
4225 | if (!skb_cloned(from)) | |
4226 | skb_shinfo(from)->nr_frags = 0; | |
4227 | ||
4228 | /* if the skb is not cloned this does nothing | |
4229 | * since we set nr_frags to 0. | |
4230 | */ | |
4231 | for (i = 0; i < skb_shinfo(from)->nr_frags; i++) | |
4232 | skb_frag_ref(from, i); | |
4233 | ||
4234 | to->truesize += delta; | |
4235 | to->len += len; | |
4236 | to->data_len += len; | |
4237 | ||
4238 | *delta_truesize = delta; | |
4239 | return true; | |
4240 | } | |
4241 | EXPORT_SYMBOL(skb_try_coalesce); | |
4242 | ||
4243 | /** | |
4244 | * skb_scrub_packet - scrub an skb | |
4245 | * | |
4246 | * @skb: buffer to clean | |
4247 | * @xnet: packet is crossing netns | |
4248 | * | |
4249 | * skb_scrub_packet can be used after encapsulating or decapsulting a packet | |
4250 | * into/from a tunnel. Some information have to be cleared during these | |
4251 | * operations. | |
4252 | * skb_scrub_packet can also be used to clean a skb before injecting it in | |
4253 | * another namespace (@xnet == true). We have to clear all information in the | |
4254 | * skb that could impact namespace isolation. | |
4255 | */ | |
4256 | void skb_scrub_packet(struct sk_buff *skb, bool xnet) | |
4257 | { | |
4258 | skb->tstamp.tv64 = 0; | |
4259 | skb->pkt_type = PACKET_HOST; | |
4260 | skb->skb_iif = 0; | |
4261 | skb->ignore_df = 0; | |
4262 | skb_dst_drop(skb); | |
4263 | skb_sender_cpu_clear(skb); | |
4264 | secpath_reset(skb); | |
4265 | nf_reset(skb); | |
4266 | nf_reset_trace(skb); | |
4267 | ||
4268 | if (!xnet) | |
4269 | return; | |
4270 | ||
4271 | skb_orphan(skb); | |
4272 | skb->mark = 0; | |
4273 | } | |
4274 | EXPORT_SYMBOL_GPL(skb_scrub_packet); | |
4275 | ||
4276 | /** | |
4277 | * skb_gso_transport_seglen - Return length of individual segments of a gso packet | |
4278 | * | |
4279 | * @skb: GSO skb | |
4280 | * | |
4281 | * skb_gso_transport_seglen is used to determine the real size of the | |
4282 | * individual segments, including Layer4 headers (TCP/UDP). | |
4283 | * | |
4284 | * The MAC/L2 or network (IP, IPv6) headers are not accounted for. | |
4285 | */ | |
4286 | unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) | |
4287 | { | |
4288 | const struct skb_shared_info *shinfo = skb_shinfo(skb); | |
4289 | unsigned int thlen = 0; | |
4290 | ||
4291 | if (skb->encapsulation) { | |
4292 | thlen = skb_inner_transport_header(skb) - | |
4293 | skb_transport_header(skb); | |
4294 | ||
4295 | if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) | |
4296 | thlen += inner_tcp_hdrlen(skb); | |
4297 | } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { | |
4298 | thlen = tcp_hdrlen(skb); | |
4299 | } | |
4300 | /* UFO sets gso_size to the size of the fragmentation | |
4301 | * payload, i.e. the size of the L4 (UDP) header is already | |
4302 | * accounted for. | |
4303 | */ | |
4304 | return thlen + shinfo->gso_size; | |
4305 | } | |
4306 | EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); | |
4307 | ||
4308 | static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) | |
4309 | { | |
4310 | if (skb_cow(skb, skb_headroom(skb)) < 0) { | |
4311 | kfree_skb(skb); | |
4312 | return NULL; | |
4313 | } | |
4314 | ||
4315 | memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); | |
4316 | skb->mac_header += VLAN_HLEN; | |
4317 | return skb; | |
4318 | } | |
4319 | ||
4320 | struct sk_buff *skb_vlan_untag(struct sk_buff *skb) | |
4321 | { | |
4322 | struct vlan_hdr *vhdr; | |
4323 | u16 vlan_tci; | |
4324 | ||
4325 | if (unlikely(skb_vlan_tag_present(skb))) { | |
4326 | /* vlan_tci is already set-up so leave this for another time */ | |
4327 | return skb; | |
4328 | } | |
4329 | ||
4330 | skb = skb_share_check(skb, GFP_ATOMIC); | |
4331 | if (unlikely(!skb)) | |
4332 | goto err_free; | |
4333 | ||
4334 | if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) | |
4335 | goto err_free; | |
4336 | ||
4337 | vhdr = (struct vlan_hdr *)skb->data; | |
4338 | vlan_tci = ntohs(vhdr->h_vlan_TCI); | |
4339 | __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci); | |
4340 | ||
4341 | skb_pull_rcsum(skb, VLAN_HLEN); | |
4342 | vlan_set_encap_proto(skb, vhdr); | |
4343 | ||
4344 | skb = skb_reorder_vlan_header(skb); | |
4345 | if (unlikely(!skb)) | |
4346 | goto err_free; | |
4347 | ||
4348 | skb_reset_network_header(skb); | |
4349 | skb_reset_transport_header(skb); | |
4350 | skb_reset_mac_len(skb); | |
4351 | ||
4352 | return skb; | |
4353 | ||
4354 | err_free: | |
4355 | kfree_skb(skb); | |
4356 | return NULL; | |
4357 | } | |
4358 | EXPORT_SYMBOL(skb_vlan_untag); | |
4359 | ||
4360 | int skb_ensure_writable(struct sk_buff *skb, int write_len) | |
4361 | { | |
4362 | if (!pskb_may_pull(skb, write_len)) | |
4363 | return -ENOMEM; | |
4364 | ||
4365 | if (!skb_cloned(skb) || skb_clone_writable(skb, write_len)) | |
4366 | return 0; | |
4367 | ||
4368 | return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); | |
4369 | } | |
4370 | EXPORT_SYMBOL(skb_ensure_writable); | |
4371 | ||
4372 | /* remove VLAN header from packet and update csum accordingly. */ | |
4373 | static int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) | |
4374 | { | |
4375 | struct vlan_hdr *vhdr; | |
4376 | unsigned int offset = skb->data - skb_mac_header(skb); | |
4377 | int err; | |
4378 | ||
4379 | __skb_push(skb, offset); | |
4380 | err = skb_ensure_writable(skb, VLAN_ETH_HLEN); | |
4381 | if (unlikely(err)) | |
4382 | goto pull; | |
4383 | ||
4384 | skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); | |
4385 | ||
4386 | vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN); | |
4387 | *vlan_tci = ntohs(vhdr->h_vlan_TCI); | |
4388 | ||
4389 | memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN); | |
4390 | __skb_pull(skb, VLAN_HLEN); | |
4391 | ||
4392 | vlan_set_encap_proto(skb, vhdr); | |
4393 | skb->mac_header += VLAN_HLEN; | |
4394 | ||
4395 | if (skb_network_offset(skb) < ETH_HLEN) | |
4396 | skb_set_network_header(skb, ETH_HLEN); | |
4397 | ||
4398 | skb_reset_mac_len(skb); | |
4399 | pull: | |
4400 | __skb_pull(skb, offset); | |
4401 | ||
4402 | return err; | |
4403 | } | |
4404 | ||
4405 | int skb_vlan_pop(struct sk_buff *skb) | |
4406 | { | |
4407 | u16 vlan_tci; | |
4408 | __be16 vlan_proto; | |
4409 | int err; | |
4410 | ||
4411 | if (likely(skb_vlan_tag_present(skb))) { | |
4412 | skb->vlan_tci = 0; | |
4413 | } else { | |
4414 | if (unlikely((skb->protocol != htons(ETH_P_8021Q) && | |
4415 | skb->protocol != htons(ETH_P_8021AD)) || | |
4416 | skb->len < VLAN_ETH_HLEN)) | |
4417 | return 0; | |
4418 | ||
4419 | err = __skb_vlan_pop(skb, &vlan_tci); | |
4420 | if (err) | |
4421 | return err; | |
4422 | } | |
4423 | /* move next vlan tag to hw accel tag */ | |
4424 | if (likely((skb->protocol != htons(ETH_P_8021Q) && | |
4425 | skb->protocol != htons(ETH_P_8021AD)) || | |
4426 | skb->len < VLAN_ETH_HLEN)) | |
4427 | return 0; | |
4428 | ||
4429 | vlan_proto = skb->protocol; | |
4430 | err = __skb_vlan_pop(skb, &vlan_tci); | |
4431 | if (unlikely(err)) | |
4432 | return err; | |
4433 | ||
4434 | __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); | |
4435 | return 0; | |
4436 | } | |
4437 | EXPORT_SYMBOL(skb_vlan_pop); | |
4438 | ||
4439 | int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) | |
4440 | { | |
4441 | if (skb_vlan_tag_present(skb)) { | |
4442 | unsigned int offset = skb->data - skb_mac_header(skb); | |
4443 | int err; | |
4444 | ||
4445 | /* __vlan_insert_tag expect skb->data pointing to mac header. | |
4446 | * So change skb->data before calling it and change back to | |
4447 | * original position later | |
4448 | */ | |
4449 | __skb_push(skb, offset); | |
4450 | err = __vlan_insert_tag(skb, skb->vlan_proto, | |
4451 | skb_vlan_tag_get(skb)); | |
4452 | if (err) | |
4453 | return err; | |
4454 | skb->protocol = skb->vlan_proto; | |
4455 | skb->mac_len += VLAN_HLEN; | |
4456 | __skb_pull(skb, offset); | |
4457 | ||
4458 | if (skb->ip_summed == CHECKSUM_COMPLETE) | |
4459 | skb->csum = csum_add(skb->csum, csum_partial(skb->data | |
4460 | + (2 * ETH_ALEN), VLAN_HLEN, 0)); | |
4461 | } | |
4462 | __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); | |
4463 | return 0; | |
4464 | } | |
4465 | EXPORT_SYMBOL(skb_vlan_push); | |
4466 | ||
4467 | /** | |
4468 | * alloc_skb_with_frags - allocate skb with page frags | |
4469 | * | |
4470 | * @header_len: size of linear part | |
4471 | * @data_len: needed length in frags | |
4472 | * @max_page_order: max page order desired. | |
4473 | * @errcode: pointer to error code if any | |
4474 | * @gfp_mask: allocation mask | |
4475 | * | |
4476 | * This can be used to allocate a paged skb, given a maximal order for frags. | |
4477 | */ | |
4478 | struct sk_buff *alloc_skb_with_frags(unsigned long header_len, | |
4479 | unsigned long data_len, | |
4480 | int max_page_order, | |
4481 | int *errcode, | |
4482 | gfp_t gfp_mask) | |
4483 | { | |
4484 | int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; | |
4485 | unsigned long chunk; | |
4486 | struct sk_buff *skb; | |
4487 | struct page *page; | |
4488 | gfp_t gfp_head; | |
4489 | int i; | |
4490 | ||
4491 | *errcode = -EMSGSIZE; | |
4492 | /* Note this test could be relaxed, if we succeed to allocate | |
4493 | * high order pages... | |
4494 | */ | |
4495 | if (npages > MAX_SKB_FRAGS) | |
4496 | return NULL; | |
4497 | ||
4498 | gfp_head = gfp_mask; | |
4499 | if (gfp_head & __GFP_WAIT) | |
4500 | gfp_head |= __GFP_REPEAT; | |
4501 | ||
4502 | *errcode = -ENOBUFS; | |
4503 | skb = alloc_skb(header_len, gfp_head); | |
4504 | if (!skb) | |
4505 | return NULL; | |
4506 | ||
4507 | skb->truesize += npages << PAGE_SHIFT; | |
4508 | ||
4509 | for (i = 0; npages > 0; i++) { | |
4510 | int order = max_page_order; | |
4511 | ||
4512 | while (order) { | |
4513 | if (npages >= 1 << order) { | |
4514 | page = alloc_pages(gfp_mask | | |
4515 | __GFP_COMP | | |
4516 | __GFP_NOWARN | | |
4517 | __GFP_NORETRY, | |
4518 | order); | |
4519 | if (page) | |
4520 | goto fill_page; | |
4521 | /* Do not retry other high order allocations */ | |
4522 | order = 1; | |
4523 | max_page_order = 0; | |
4524 | } | |
4525 | order--; | |
4526 | } | |
4527 | page = alloc_page(gfp_mask); | |
4528 | if (!page) | |
4529 | goto failure; | |
4530 | fill_page: | |
4531 | chunk = min_t(unsigned long, data_len, | |
4532 | PAGE_SIZE << order); | |
4533 | skb_fill_page_desc(skb, i, page, 0, chunk); | |
4534 | data_len -= chunk; | |
4535 | npages -= 1 << order; | |
4536 | } | |
4537 | return skb; | |
4538 | ||
4539 | failure: | |
4540 | kfree_skb(skb); | |
4541 | return NULL; | |
4542 | } | |
4543 | EXPORT_SYMBOL(alloc_skb_with_frags); |