]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/core/skbuff.c
UBUNTU: Start new release
[mirror_ubuntu-zesty-kernel.git] / net / core / skbuff.c
CommitLineData
1da177e4
LT
1/*
2 * Routines having to do with the 'struct sk_buff' memory handlers.
3 *
113aa838 4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Florian La Roche <rzsfl@rz.uni-sb.de>
6 *
1da177e4
LT
7 * Fixes:
8 * Alan Cox : Fixed the worst of the load
9 * balancer bugs.
10 * Dave Platt : Interrupt stacking fix.
11 * Richard Kooijman : Timestamp fixes.
12 * Alan Cox : Changed buffer format.
13 * Alan Cox : destructor hook for AF_UNIX etc.
14 * Linus Torvalds : Better skb_clone.
15 * Alan Cox : Added skb_copy.
16 * Alan Cox : Added all the changed routines Linus
17 * only put in the headers
18 * Ray VanTassle : Fixed --skb->lock in free
19 * Alan Cox : skb_copy copy arp field
20 * Andi Kleen : slabified it.
21 * Robert Olsson : Removed skb_head_pool
22 *
23 * NOTE:
24 * The __skb_ routines should be called with interrupts
25 * disabled, or you better be *real* sure that the operation is atomic
26 * with respect to whatever list is being frobbed (e.g. via lock_sock()
27 * or via disabling bottom half handlers, etc).
28 *
29 * This program is free software; you can redistribute it and/or
30 * modify it under the terms of the GNU General Public License
31 * as published by the Free Software Foundation; either version
32 * 2 of the License, or (at your option) any later version.
33 */
34
35/*
36 * The functions in this file will not compile correctly with gcc 2.4.x
37 */
38
e005d193
JP
39#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
40
1da177e4
LT
41#include <linux/module.h>
42#include <linux/types.h>
43#include <linux/kernel.h>
fe55f6d5 44#include <linux/kmemcheck.h>
1da177e4
LT
45#include <linux/mm.h>
46#include <linux/interrupt.h>
47#include <linux/in.h>
48#include <linux/inet.h>
49#include <linux/slab.h>
de960aa9
FW
50#include <linux/tcp.h>
51#include <linux/udp.h>
90017acc 52#include <linux/sctp.h>
1da177e4
LT
53#include <linux/netdevice.h>
54#ifdef CONFIG_NET_CLS_ACT
55#include <net/pkt_sched.h>
56#endif
57#include <linux/string.h>
58#include <linux/skbuff.h>
9c55e01c 59#include <linux/splice.h>
1da177e4
LT
60#include <linux/cache.h>
61#include <linux/rtnetlink.h>
62#include <linux/init.h>
716ea3a7 63#include <linux/scatterlist.h>
ac45f602 64#include <linux/errqueue.h>
268bb0ce 65#include <linux/prefetch.h>
0d5501c1 66#include <linux/if_vlan.h>
1da177e4
LT
67
68#include <net/protocol.h>
69#include <net/dst.h>
70#include <net/sock.h>
71#include <net/checksum.h>
ed1f50c3 72#include <net/ip6_checksum.h>
1da177e4
LT
73#include <net/xfrm.h>
74
7c0f6ba6 75#include <linux/uaccess.h>
ad8d75ff 76#include <trace/events/skb.h>
51c56b00 77#include <linux/highmem.h>
b245be1f
WB
78#include <linux/capability.h>
79#include <linux/user_namespace.h>
a1f8e7f7 80
d7e8883c 81struct kmem_cache *skbuff_head_cache __read_mostly;
e18b890b 82static struct kmem_cache *skbuff_fclone_cache __read_mostly;
5f74f82e
HWR
83int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
84EXPORT_SYMBOL(sysctl_max_skb_frags);
1da177e4 85
1da177e4 86/**
f05de73b
JS
87 * skb_panic - private function for out-of-line support
88 * @skb: buffer
89 * @sz: size
90 * @addr: address
99d5851e 91 * @msg: skb_over_panic or skb_under_panic
1da177e4 92 *
f05de73b
JS
93 * Out-of-line support for skb_put() and skb_push().
94 * Called via the wrapper skb_over_panic() or skb_under_panic().
95 * Keep out of line to prevent kernel bloat.
96 * __builtin_return_address is not used because it is not always reliable.
1da177e4 97 */
f05de73b 98static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr,
99d5851e 99 const char msg[])
1da177e4 100{
e005d193 101 pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n",
99d5851e 102 msg, addr, skb->len, sz, skb->head, skb->data,
e005d193
JP
103 (unsigned long)skb->tail, (unsigned long)skb->end,
104 skb->dev ? skb->dev->name : "<NULL>");
1da177e4
LT
105 BUG();
106}
107
f05de73b 108static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr)
1da177e4 109{
f05de73b 110 skb_panic(skb, sz, addr, __func__);
1da177e4
LT
111}
112
f05de73b
JS
113static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
114{
115 skb_panic(skb, sz, addr, __func__);
116}
c93bdd0e
MG
117
118/*
119 * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
120 * the caller if emergency pfmemalloc reserves are being used. If it is and
121 * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves
122 * may be used. Otherwise, the packet data may be discarded until enough
123 * memory is free
124 */
125#define kmalloc_reserve(size, gfp, node, pfmemalloc) \
126 __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc)
61c5e88a 127
128static void *__kmalloc_reserve(size_t size, gfp_t flags, int node,
129 unsigned long ip, bool *pfmemalloc)
c93bdd0e
MG
130{
131 void *obj;
132 bool ret_pfmemalloc = false;
133
134 /*
135 * Try a regular allocation, when that fails and we're not entitled
136 * to the reserves, fail.
137 */
138 obj = kmalloc_node_track_caller(size,
139 flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
140 node);
141 if (obj || !(gfp_pfmemalloc_allowed(flags)))
142 goto out;
143
144 /* Try again but now we are using pfmemalloc reserves */
145 ret_pfmemalloc = true;
146 obj = kmalloc_node_track_caller(size, flags, node);
147
148out:
149 if (pfmemalloc)
150 *pfmemalloc = ret_pfmemalloc;
151
152 return obj;
153}
154
1da177e4
LT
155/* Allocate a new skbuff. We do this ourselves so we can fill in a few
156 * 'private' fields and also do memory statistics to find all the
157 * [BEEP] leaks.
158 *
159 */
160
0ebd0ac5
PM
161struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node)
162{
163 struct sk_buff *skb;
164
165 /* Get the HEAD */
166 skb = kmem_cache_alloc_node(skbuff_head_cache,
167 gfp_mask & ~__GFP_DMA, node);
168 if (!skb)
169 goto out;
170
171 /*
172 * Only clear those fields we need to clear, not those that we will
173 * actually initialise below. Hence, don't put any more fields after
174 * the tail pointer in struct sk_buff!
175 */
176 memset(skb, 0, offsetof(struct sk_buff, tail));
5e71d9d7 177 skb->head = NULL;
0ebd0ac5
PM
178 skb->truesize = sizeof(struct sk_buff);
179 atomic_set(&skb->users, 1);
180
35d04610 181 skb->mac_header = (typeof(skb->mac_header))~0U;
0ebd0ac5
PM
182out:
183 return skb;
184}
185
1da177e4 186/**
d179cd12 187 * __alloc_skb - allocate a network buffer
1da177e4
LT
188 * @size: size to allocate
189 * @gfp_mask: allocation mask
c93bdd0e
MG
190 * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache
191 * instead of head cache and allocate a cloned (child) skb.
192 * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
193 * allocations in case the data is required for writeback
b30973f8 194 * @node: numa node to allocate memory on
1da177e4
LT
195 *
196 * Allocate a new &sk_buff. The returned buffer has no headroom and a
94b6042c
BH
197 * tail room of at least size bytes. The object has a reference count
198 * of one. The return is the buffer. On a failure the return is %NULL.
1da177e4
LT
199 *
200 * Buffers may only be allocated from interrupts using a @gfp_mask of
201 * %GFP_ATOMIC.
202 */
dd0fc66f 203struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
c93bdd0e 204 int flags, int node)
1da177e4 205{
e18b890b 206 struct kmem_cache *cache;
4947d3ef 207 struct skb_shared_info *shinfo;
1da177e4
LT
208 struct sk_buff *skb;
209 u8 *data;
c93bdd0e 210 bool pfmemalloc;
1da177e4 211
c93bdd0e
MG
212 cache = (flags & SKB_ALLOC_FCLONE)
213 ? skbuff_fclone_cache : skbuff_head_cache;
214
215 if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
216 gfp_mask |= __GFP_MEMALLOC;
8798b3fb 217
1da177e4 218 /* Get the HEAD */
b30973f8 219 skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
1da177e4
LT
220 if (!skb)
221 goto out;
ec7d2f2c 222 prefetchw(skb);
1da177e4 223
87fb4b7b
ED
224 /* We do our best to align skb_shared_info on a separate cache
225 * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
226 * aligned memory blocks, unless SLUB/SLAB debug is enabled.
227 * Both skb->head and skb_shared_info are cache line aligned.
228 */
bc417e30 229 size = SKB_DATA_ALIGN(size);
87fb4b7b 230 size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
c93bdd0e 231 data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc);
1da177e4
LT
232 if (!data)
233 goto nodata;
87fb4b7b
ED
234 /* kmalloc(size) might give us more room than requested.
235 * Put skb_shared_info exactly at the end of allocated zone,
236 * to allow max possible filling before reallocation.
237 */
238 size = SKB_WITH_OVERHEAD(ksize(data));
ec7d2f2c 239 prefetchw(data + size);
1da177e4 240
ca0605a7 241 /*
c8005785
JB
242 * Only clear those fields we need to clear, not those that we will
243 * actually initialise below. Hence, don't put any more fields after
244 * the tail pointer in struct sk_buff!
ca0605a7
ACM
245 */
246 memset(skb, 0, offsetof(struct sk_buff, tail));
87fb4b7b
ED
247 /* Account for allocated memory : skb + skb->head */
248 skb->truesize = SKB_TRUESIZE(size);
c93bdd0e 249 skb->pfmemalloc = pfmemalloc;
1da177e4
LT
250 atomic_set(&skb->users, 1);
251 skb->head = data;
252 skb->data = data;
27a884dc 253 skb_reset_tail_pointer(skb);
4305b541 254 skb->end = skb->tail + size;
35d04610
CW
255 skb->mac_header = (typeof(skb->mac_header))~0U;
256 skb->transport_header = (typeof(skb->transport_header))~0U;
19633e12 257
4947d3ef
BL
258 /* make sure we initialize shinfo sequentially */
259 shinfo = skb_shinfo(skb);
ec7d2f2c 260 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
4947d3ef 261 atomic_set(&shinfo->dataref, 1);
c2aa3665 262 kmemcheck_annotate_variable(shinfo->destructor_arg);
4947d3ef 263
c93bdd0e 264 if (flags & SKB_ALLOC_FCLONE) {
d0bf4a9e 265 struct sk_buff_fclones *fclones;
1da177e4 266
d0bf4a9e
ED
267 fclones = container_of(skb, struct sk_buff_fclones, skb1);
268
269 kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
d179cd12 270 skb->fclone = SKB_FCLONE_ORIG;
d0bf4a9e 271 atomic_set(&fclones->fclone_ref, 1);
d179cd12 272
6ffe75eb 273 fclones->skb2.fclone = SKB_FCLONE_CLONE;
d0bf4a9e 274 fclones->skb2.pfmemalloc = pfmemalloc;
d179cd12 275 }
1da177e4
LT
276out:
277 return skb;
278nodata:
8798b3fb 279 kmem_cache_free(cache, skb);
1da177e4
LT
280 skb = NULL;
281 goto out;
1da177e4 282}
b4ac530f 283EXPORT_SYMBOL(__alloc_skb);
1da177e4 284
b2b5ce9d 285/**
2ea2f62c 286 * __build_skb - build a network buffer
b2b5ce9d 287 * @data: data buffer provided by caller
2ea2f62c 288 * @frag_size: size of data, or 0 if head was kmalloced
b2b5ce9d
ED
289 *
290 * Allocate a new &sk_buff. Caller provides space holding head and
deceb4c0 291 * skb_shared_info. @data must have been allocated by kmalloc() only if
2ea2f62c
ED
292 * @frag_size is 0, otherwise data should come from the page allocator
293 * or vmalloc()
b2b5ce9d
ED
294 * The return is the new skb buffer.
295 * On a failure the return is %NULL, and @data is not freed.
296 * Notes :
297 * Before IO, driver allocates only data buffer where NIC put incoming frame
298 * Driver should add room at head (NET_SKB_PAD) and
299 * MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info))
300 * After IO, driver calls build_skb(), to allocate sk_buff and populate it
301 * before giving packet to stack.
302 * RX rings only contains data buffers, not full skbs.
303 */
2ea2f62c 304struct sk_buff *__build_skb(void *data, unsigned int frag_size)
b2b5ce9d
ED
305{
306 struct skb_shared_info *shinfo;
307 struct sk_buff *skb;
d3836f21 308 unsigned int size = frag_size ? : ksize(data);
b2b5ce9d
ED
309
310 skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
311 if (!skb)
312 return NULL;
313
d3836f21 314 size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
b2b5ce9d
ED
315
316 memset(skb, 0, offsetof(struct sk_buff, tail));
317 skb->truesize = SKB_TRUESIZE(size);
318 atomic_set(&skb->users, 1);
319 skb->head = data;
320 skb->data = data;
321 skb_reset_tail_pointer(skb);
322 skb->end = skb->tail + size;
35d04610
CW
323 skb->mac_header = (typeof(skb->mac_header))~0U;
324 skb->transport_header = (typeof(skb->transport_header))~0U;
b2b5ce9d
ED
325
326 /* make sure we initialize shinfo sequentially */
327 shinfo = skb_shinfo(skb);
328 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
329 atomic_set(&shinfo->dataref, 1);
330 kmemcheck_annotate_variable(shinfo->destructor_arg);
331
332 return skb;
333}
2ea2f62c
ED
334
335/* build_skb() is wrapper over __build_skb(), that specifically
336 * takes care of skb->head and skb->pfmemalloc
337 * This means that if @frag_size is not zero, then @data must be backed
338 * by a page fragment, not kmalloc() or vmalloc()
339 */
340struct sk_buff *build_skb(void *data, unsigned int frag_size)
341{
342 struct sk_buff *skb = __build_skb(data, frag_size);
343
344 if (skb && frag_size) {
345 skb->head_frag = 1;
2f064f34 346 if (page_is_pfmemalloc(virt_to_head_page(data)))
2ea2f62c
ED
347 skb->pfmemalloc = 1;
348 }
349 return skb;
350}
b2b5ce9d
ED
351EXPORT_SYMBOL(build_skb);
352
795bb1c0
JDB
353#define NAPI_SKB_CACHE_SIZE 64
354
355struct napi_alloc_cache {
356 struct page_frag_cache page;
e0d7924a 357 unsigned int skb_count;
795bb1c0
JDB
358 void *skb_cache[NAPI_SKB_CACHE_SIZE];
359};
360
b63ae8ca 361static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
795bb1c0 362static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
ffde7328
AD
363
364static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
365{
b63ae8ca 366 struct page_frag_cache *nc;
ffde7328
AD
367 unsigned long flags;
368 void *data;
369
370 local_irq_save(flags);
9451980a 371 nc = this_cpu_ptr(&netdev_alloc_cache);
8c2dd3e4 372 data = page_frag_alloc(nc, fragsz, gfp_mask);
6f532612
ED
373 local_irq_restore(flags);
374 return data;
375}
c93bdd0e
MG
376
377/**
378 * netdev_alloc_frag - allocate a page fragment
379 * @fragsz: fragment size
380 *
381 * Allocates a frag from a page for receive buffer.
382 * Uses GFP_ATOMIC allocations.
383 */
384void *netdev_alloc_frag(unsigned int fragsz)
385{
386 return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
387}
6f532612
ED
388EXPORT_SYMBOL(netdev_alloc_frag);
389
ffde7328
AD
390static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
391{
795bb1c0 392 struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
9451980a 393
8c2dd3e4 394 return page_frag_alloc(&nc->page, fragsz, gfp_mask);
ffde7328
AD
395}
396
397void *napi_alloc_frag(unsigned int fragsz)
398{
399 return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
400}
401EXPORT_SYMBOL(napi_alloc_frag);
402
fd11a83d
AD
403/**
404 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
405 * @dev: network device to receive on
d7499160 406 * @len: length to allocate
fd11a83d
AD
407 * @gfp_mask: get_free_pages mask, passed to alloc_skb
408 *
409 * Allocate a new &sk_buff and assign it a usage count of one. The
410 * buffer has NET_SKB_PAD headroom built in. Users should allocate
411 * the headroom they think they need without accounting for the
412 * built in space. The built in space is used for optimisations.
413 *
414 * %NULL is returned if there is no free memory.
415 */
9451980a
AD
416struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
417 gfp_t gfp_mask)
fd11a83d 418{
b63ae8ca 419 struct page_frag_cache *nc;
9451980a 420 unsigned long flags;
fd11a83d 421 struct sk_buff *skb;
9451980a
AD
422 bool pfmemalloc;
423 void *data;
424
425 len += NET_SKB_PAD;
fd11a83d 426
9451980a 427 if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
d0164adc 428 (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
a080e7bd
AD
429 skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
430 if (!skb)
431 goto skb_fail;
432 goto skb_success;
433 }
fd11a83d 434
9451980a
AD
435 len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
436 len = SKB_DATA_ALIGN(len);
437
438 if (sk_memalloc_socks())
439 gfp_mask |= __GFP_MEMALLOC;
440
441 local_irq_save(flags);
442
443 nc = this_cpu_ptr(&netdev_alloc_cache);
8c2dd3e4 444 data = page_frag_alloc(nc, len, gfp_mask);
9451980a
AD
445 pfmemalloc = nc->pfmemalloc;
446
447 local_irq_restore(flags);
448
449 if (unlikely(!data))
450 return NULL;
451
452 skb = __build_skb(data, len);
453 if (unlikely(!skb)) {
181edb2b 454 skb_free_frag(data);
9451980a 455 return NULL;
7b2e497a 456 }
fd11a83d 457
9451980a
AD
458 /* use OR instead of assignment to avoid clearing of bits in mask */
459 if (pfmemalloc)
460 skb->pfmemalloc = 1;
461 skb->head_frag = 1;
462
a080e7bd 463skb_success:
9451980a
AD
464 skb_reserve(skb, NET_SKB_PAD);
465 skb->dev = dev;
466
a080e7bd 467skb_fail:
8af27456
CH
468 return skb;
469}
b4ac530f 470EXPORT_SYMBOL(__netdev_alloc_skb);
1da177e4 471
fd11a83d
AD
472/**
473 * __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
474 * @napi: napi instance this buffer was allocated for
d7499160 475 * @len: length to allocate
fd11a83d
AD
476 * @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages
477 *
478 * Allocate a new sk_buff for use in NAPI receive. This buffer will
479 * attempt to allocate the head from a special reserved region used
480 * only for NAPI Rx allocation. By doing this we can save several
481 * CPU cycles by avoiding having to disable and re-enable IRQs.
482 *
483 * %NULL is returned if there is no free memory.
484 */
9451980a
AD
485struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
486 gfp_t gfp_mask)
fd11a83d 487{
795bb1c0 488 struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
fd11a83d 489 struct sk_buff *skb;
9451980a
AD
490 void *data;
491
492 len += NET_SKB_PAD + NET_IP_ALIGN;
fd11a83d 493
9451980a 494 if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
d0164adc 495 (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
a080e7bd
AD
496 skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
497 if (!skb)
498 goto skb_fail;
499 goto skb_success;
500 }
9451980a
AD
501
502 len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
503 len = SKB_DATA_ALIGN(len);
504
505 if (sk_memalloc_socks())
506 gfp_mask |= __GFP_MEMALLOC;
fd11a83d 507
8c2dd3e4 508 data = page_frag_alloc(&nc->page, len, gfp_mask);
9451980a
AD
509 if (unlikely(!data))
510 return NULL;
511
512 skb = __build_skb(data, len);
513 if (unlikely(!skb)) {
181edb2b 514 skb_free_frag(data);
9451980a 515 return NULL;
fd11a83d
AD
516 }
517
9451980a 518 /* use OR instead of assignment to avoid clearing of bits in mask */
795bb1c0 519 if (nc->page.pfmemalloc)
9451980a
AD
520 skb->pfmemalloc = 1;
521 skb->head_frag = 1;
522
a080e7bd 523skb_success:
9451980a
AD
524 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
525 skb->dev = napi->dev;
526
a080e7bd 527skb_fail:
fd11a83d
AD
528 return skb;
529}
530EXPORT_SYMBOL(__napi_alloc_skb);
531
654bed16 532void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
50269e19 533 int size, unsigned int truesize)
654bed16
PZ
534{
535 skb_fill_page_desc(skb, i, page, off, size);
536 skb->len += size;
537 skb->data_len += size;
50269e19 538 skb->truesize += truesize;
654bed16
PZ
539}
540EXPORT_SYMBOL(skb_add_rx_frag);
541
f8e617e1
JW
542void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
543 unsigned int truesize)
544{
545 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
546
547 skb_frag_size_add(frag, size);
548 skb->len += size;
549 skb->data_len += size;
550 skb->truesize += truesize;
551}
552EXPORT_SYMBOL(skb_coalesce_rx_frag);
553
27b437c8 554static void skb_drop_list(struct sk_buff **listp)
1da177e4 555{
bd8a7036 556 kfree_skb_list(*listp);
27b437c8 557 *listp = NULL;
1da177e4
LT
558}
559
27b437c8
HX
560static inline void skb_drop_fraglist(struct sk_buff *skb)
561{
562 skb_drop_list(&skb_shinfo(skb)->frag_list);
563}
564
1da177e4
LT
565static void skb_clone_fraglist(struct sk_buff *skb)
566{
567 struct sk_buff *list;
568
fbb398a8 569 skb_walk_frags(skb, list)
1da177e4
LT
570 skb_get(list);
571}
572
d3836f21
ED
573static void skb_free_head(struct sk_buff *skb)
574{
181edb2b
AD
575 unsigned char *head = skb->head;
576
d3836f21 577 if (skb->head_frag)
181edb2b 578 skb_free_frag(head);
d3836f21 579 else
181edb2b 580 kfree(head);
d3836f21
ED
581}
582
5bba1712 583static void skb_release_data(struct sk_buff *skb)
1da177e4 584{
ff04a771
ED
585 struct skb_shared_info *shinfo = skb_shinfo(skb);
586 int i;
1da177e4 587
ff04a771
ED
588 if (skb->cloned &&
589 atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
590 &shinfo->dataref))
591 return;
a6686f2f 592
ff04a771
ED
593 for (i = 0; i < shinfo->nr_frags; i++)
594 __skb_frag_unref(&shinfo->frags[i]);
a6686f2f 595
ff04a771
ED
596 /*
597 * If skb buf is from userspace, we need to notify the caller
598 * the lower device DMA has done;
599 */
600 if (shinfo->tx_flags & SKBTX_DEV_ZEROCOPY) {
601 struct ubuf_info *uarg;
1da177e4 602
ff04a771
ED
603 uarg = shinfo->destructor_arg;
604 if (uarg->callback)
605 uarg->callback(uarg, true);
1da177e4 606 }
ff04a771
ED
607
608 if (shinfo->frag_list)
609 kfree_skb_list(shinfo->frag_list);
610
611 skb_free_head(skb);
1da177e4
LT
612}
613
614/*
615 * Free an skbuff by memory without cleaning the state.
616 */
2d4baff8 617static void kfree_skbmem(struct sk_buff *skb)
1da177e4 618{
d0bf4a9e 619 struct sk_buff_fclones *fclones;
d179cd12 620
d179cd12
DM
621 switch (skb->fclone) {
622 case SKB_FCLONE_UNAVAILABLE:
623 kmem_cache_free(skbuff_head_cache, skb);
6ffe75eb 624 return;
d179cd12
DM
625
626 case SKB_FCLONE_ORIG:
d0bf4a9e 627 fclones = container_of(skb, struct sk_buff_fclones, skb1);
d179cd12 628
6ffe75eb
ED
629 /* We usually free the clone (TX completion) before original skb
630 * This test would have no chance to be true for the clone,
631 * while here, branch prediction will be good.
d179cd12 632 */
6ffe75eb
ED
633 if (atomic_read(&fclones->fclone_ref) == 1)
634 goto fastpath;
635 break;
e7820e39 636
6ffe75eb
ED
637 default: /* SKB_FCLONE_CLONE */
638 fclones = container_of(skb, struct sk_buff_fclones, skb2);
d179cd12 639 break;
3ff50b79 640 }
6ffe75eb
ED
641 if (!atomic_dec_and_test(&fclones->fclone_ref))
642 return;
643fastpath:
644 kmem_cache_free(skbuff_fclone_cache, fclones);
1da177e4
LT
645}
646
04a4bb55 647static void skb_release_head_state(struct sk_buff *skb)
1da177e4 648{
adf30907 649 skb_dst_drop(skb);
1da177e4
LT
650#ifdef CONFIG_XFRM
651 secpath_put(skb->sp);
652#endif
9c2b3328
SH
653 if (skb->destructor) {
654 WARN_ON(in_irq());
1da177e4
LT
655 skb->destructor(skb);
656 }
a3bf7ae9 657#if IS_ENABLED(CONFIG_NF_CONNTRACK)
5f79e0f9 658 nf_conntrack_put(skb->nfct);
2fc72c7b 659#endif
1109a90c 660#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
1da177e4
LT
661 nf_bridge_put(skb->nf_bridge);
662#endif
04a4bb55
LB
663}
664
665/* Free everything but the sk_buff shell. */
666static void skb_release_all(struct sk_buff *skb)
667{
668 skb_release_head_state(skb);
5e71d9d7 669 if (likely(skb->head))
0ebd0ac5 670 skb_release_data(skb);
2d4baff8
HX
671}
672
673/**
674 * __kfree_skb - private function
675 * @skb: buffer
676 *
677 * Free an sk_buff. Release anything attached to the buffer.
678 * Clean the state. This is an internal helper function. Users should
679 * always call kfree_skb
680 */
1da177e4 681
2d4baff8
HX
682void __kfree_skb(struct sk_buff *skb)
683{
684 skb_release_all(skb);
1da177e4
LT
685 kfree_skbmem(skb);
686}
b4ac530f 687EXPORT_SYMBOL(__kfree_skb);
1da177e4 688