]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - net/core/skbuff.c
skbuff: back tiny skbs with kmalloc() in __netdev_alloc_skb() too
[mirror_ubuntu-hirsute-kernel.git] / net / core / skbuff.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * Routines having to do with the 'struct sk_buff' memory handlers.
4 *
113aa838 5 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
6 * Florian La Roche <rzsfl@rz.uni-sb.de>
7 *
1da177e4
LT
8 * Fixes:
9 * Alan Cox : Fixed the worst of the load
10 * balancer bugs.
11 * Dave Platt : Interrupt stacking fix.
12 * Richard Kooijman : Timestamp fixes.
13 * Alan Cox : Changed buffer format.
14 * Alan Cox : destructor hook for AF_UNIX etc.
15 * Linus Torvalds : Better skb_clone.
16 * Alan Cox : Added skb_copy.
17 * Alan Cox : Added all the changed routines Linus
18 * only put in the headers
19 * Ray VanTassle : Fixed --skb->lock in free
20 * Alan Cox : skb_copy copy arp field
21 * Andi Kleen : slabified it.
22 * Robert Olsson : Removed skb_head_pool
23 *
24 * NOTE:
25 * The __skb_ routines should be called with interrupts
26 * disabled, or you better be *real* sure that the operation is atomic
27 * with respect to whatever list is being frobbed (e.g. via lock_sock()
28 * or via disabling bottom half handlers, etc).
1da177e4
LT
29 */
30
31/*
32 * The functions in this file will not compile correctly with gcc 2.4.x
33 */
34
e005d193
JP
35#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
36
1da177e4
LT
37#include <linux/module.h>
38#include <linux/types.h>
39#include <linux/kernel.h>
1da177e4
LT
40#include <linux/mm.h>
41#include <linux/interrupt.h>
42#include <linux/in.h>
43#include <linux/inet.h>
44#include <linux/slab.h>
de960aa9
FW
45#include <linux/tcp.h>
46#include <linux/udp.h>
90017acc 47#include <linux/sctp.h>
1da177e4
LT
48#include <linux/netdevice.h>
49#ifdef CONFIG_NET_CLS_ACT
50#include <net/pkt_sched.h>
51#endif
52#include <linux/string.h>
53#include <linux/skbuff.h>
9c55e01c 54#include <linux/splice.h>
1da177e4
LT
55#include <linux/cache.h>
56#include <linux/rtnetlink.h>
57#include <linux/init.h>
716ea3a7 58#include <linux/scatterlist.h>
ac45f602 59#include <linux/errqueue.h>
268bb0ce 60#include <linux/prefetch.h>
0d5501c1 61#include <linux/if_vlan.h>
2a2ea508 62#include <linux/mpls.h>
1da177e4
LT
63
64#include <net/protocol.h>
65#include <net/dst.h>
66#include <net/sock.h>
67#include <net/checksum.h>
ed1f50c3 68#include <net/ip6_checksum.h>
1da177e4 69#include <net/xfrm.h>
8822e270 70#include <net/mpls.h>
3ee17bc7 71#include <net/mptcp.h>
1da177e4 72
7c0f6ba6 73#include <linux/uaccess.h>
ad8d75ff 74#include <trace/events/skb.h>
51c56b00 75#include <linux/highmem.h>
b245be1f
WB
76#include <linux/capability.h>
77#include <linux/user_namespace.h>
2544af03 78#include <linux/indirect_call_wrapper.h>
a1f8e7f7 79
7b7ed885
BVA
80#include "datagram.h"
81
08009a76
AD
82struct kmem_cache *skbuff_head_cache __ro_after_init;
83static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
df5042f4
FW
84#ifdef CONFIG_SKB_EXTENSIONS
85static struct kmem_cache *skbuff_ext_cache __ro_after_init;
86#endif
5f74f82e
HWR
87int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
88EXPORT_SYMBOL(sysctl_max_skb_frags);
1da177e4 89
1da177e4 90/**
f05de73b
JS
91 * skb_panic - private function for out-of-line support
92 * @skb: buffer
93 * @sz: size
94 * @addr: address
99d5851e 95 * @msg: skb_over_panic or skb_under_panic
1da177e4 96 *
f05de73b
JS
97 * Out-of-line support for skb_put() and skb_push().
98 * Called via the wrapper skb_over_panic() or skb_under_panic().
99 * Keep out of line to prevent kernel bloat.
100 * __builtin_return_address is not used because it is not always reliable.
1da177e4 101 */
f05de73b 102static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr,
99d5851e 103 const char msg[])
1da177e4 104{
41a46913 105 pr_emerg("%s: text:%px len:%d put:%d head:%px data:%px tail:%#lx end:%#lx dev:%s\n",
99d5851e 106 msg, addr, skb->len, sz, skb->head, skb->data,
e005d193
JP
107 (unsigned long)skb->tail, (unsigned long)skb->end,
108 skb->dev ? skb->dev->name : "<NULL>");
1da177e4
LT
109 BUG();
110}
111
f05de73b 112static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr)
1da177e4 113{
f05de73b 114 skb_panic(skb, sz, addr, __func__);
1da177e4
LT
115}
116
f05de73b
JS
117static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
118{
119 skb_panic(skb, sz, addr, __func__);
120}
c93bdd0e
MG
121
122/*
123 * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
124 * the caller if emergency pfmemalloc reserves are being used. If it is and
125 * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves
126 * may be used. Otherwise, the packet data may be discarded until enough
127 * memory is free
128 */
129#define kmalloc_reserve(size, gfp, node, pfmemalloc) \
130 __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc)
61c5e88a 131
132static void *__kmalloc_reserve(size_t size, gfp_t flags, int node,
133 unsigned long ip, bool *pfmemalloc)
c93bdd0e
MG
134{
135 void *obj;
136 bool ret_pfmemalloc = false;
137
138 /*
139 * Try a regular allocation, when that fails and we're not entitled
140 * to the reserves, fail.
141 */
142 obj = kmalloc_node_track_caller(size,
143 flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
144 node);
145 if (obj || !(gfp_pfmemalloc_allowed(flags)))
146 goto out;
147
148 /* Try again but now we are using pfmemalloc reserves */
149 ret_pfmemalloc = true;
150 obj = kmalloc_node_track_caller(size, flags, node);
151
152out:
153 if (pfmemalloc)
154 *pfmemalloc = ret_pfmemalloc;
155
156 return obj;
157}
158
1da177e4
LT
159/* Allocate a new skbuff. We do this ourselves so we can fill in a few
160 * 'private' fields and also do memory statistics to find all the
161 * [BEEP] leaks.
162 *
163 */
164
165/**
d179cd12 166 * __alloc_skb - allocate a network buffer
1da177e4
LT
167 * @size: size to allocate
168 * @gfp_mask: allocation mask
c93bdd0e
MG
169 * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache
170 * instead of head cache and allocate a cloned (child) skb.
171 * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
172 * allocations in case the data is required for writeback
b30973f8 173 * @node: numa node to allocate memory on
1da177e4
LT
174 *
175 * Allocate a new &sk_buff. The returned buffer has no headroom and a
94b6042c
BH
176 * tail room of at least size bytes. The object has a reference count
177 * of one. The return is the buffer. On a failure the return is %NULL.
1da177e4
LT
178 *
179 * Buffers may only be allocated from interrupts using a @gfp_mask of
180 * %GFP_ATOMIC.
181 */
dd0fc66f 182struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
c93bdd0e 183 int flags, int node)
1da177e4 184{
e18b890b 185 struct kmem_cache *cache;
4947d3ef 186 struct skb_shared_info *shinfo;
1da177e4
LT
187 struct sk_buff *skb;
188 u8 *data;
c93bdd0e 189 bool pfmemalloc;
1da177e4 190
c93bdd0e
MG
191 cache = (flags & SKB_ALLOC_FCLONE)
192 ? skbuff_fclone_cache : skbuff_head_cache;
193
194 if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
195 gfp_mask |= __GFP_MEMALLOC;
8798b3fb 196
1da177e4 197 /* Get the HEAD */
b30973f8 198 skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
1da177e4
LT
199 if (!skb)
200 goto out;
ec7d2f2c 201 prefetchw(skb);
1da177e4 202
87fb4b7b
ED
203 /* We do our best to align skb_shared_info on a separate cache
204 * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
205 * aligned memory blocks, unless SLUB/SLAB debug is enabled.
206 * Both skb->head and skb_shared_info are cache line aligned.
207 */
bc417e30 208 size = SKB_DATA_ALIGN(size);
87fb4b7b 209 size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
c93bdd0e 210 data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc);
1da177e4
LT
211 if (!data)
212 goto nodata;
87fb4b7b
ED
213 /* kmalloc(size) might give us more room than requested.
214 * Put skb_shared_info exactly at the end of allocated zone,
215 * to allow max possible filling before reallocation.
216 */
217 size = SKB_WITH_OVERHEAD(ksize(data));
ec7d2f2c 218 prefetchw(data + size);
1da177e4 219
ca0605a7 220 /*
c8005785
JB
221 * Only clear those fields we need to clear, not those that we will
222 * actually initialise below. Hence, don't put any more fields after
223 * the tail pointer in struct sk_buff!
ca0605a7
ACM
224 */
225 memset(skb, 0, offsetof(struct sk_buff, tail));
87fb4b7b
ED
226 /* Account for allocated memory : skb + skb->head */
227 skb->truesize = SKB_TRUESIZE(size);
c93bdd0e 228 skb->pfmemalloc = pfmemalloc;
63354797 229 refcount_set(&skb->users, 1);
1da177e4
LT
230 skb->head = data;
231 skb->data = data;
27a884dc 232 skb_reset_tail_pointer(skb);
4305b541 233 skb->end = skb->tail + size;
35d04610
CW
234 skb->mac_header = (typeof(skb->mac_header))~0U;
235 skb->transport_header = (typeof(skb->transport_header))~0U;
19633e12 236
4947d3ef
BL
237 /* make sure we initialize shinfo sequentially */
238 shinfo = skb_shinfo(skb);
ec7d2f2c 239 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
4947d3ef 240 atomic_set(&shinfo->dataref, 1);
4947d3ef 241
c93bdd0e 242 if (flags & SKB_ALLOC_FCLONE) {
d0bf4a9e 243 struct sk_buff_fclones *fclones;
1da177e4 244
d0bf4a9e
ED
245 fclones = container_of(skb, struct sk_buff_fclones, skb1);
246
d179cd12 247 skb->fclone = SKB_FCLONE_ORIG;
2638595a 248 refcount_set(&fclones->fclone_ref, 1);
d179cd12 249
6ffe75eb 250 fclones->skb2.fclone = SKB_FCLONE_CLONE;
d179cd12 251 }
6370cc3b
AN
252
253 skb_set_kcov_handle(skb, kcov_common_handle());
254
1da177e4
LT
255out:
256 return skb;
257nodata:
8798b3fb 258 kmem_cache_free(cache, skb);
1da177e4
LT
259 skb = NULL;
260 goto out;
1da177e4 261}
b4ac530f 262EXPORT_SYMBOL(__alloc_skb);
1da177e4 263
ba0509b6
JDB
264/* Caller must provide SKB that is memset cleared */
265static struct sk_buff *__build_skb_around(struct sk_buff *skb,
266 void *data, unsigned int frag_size)
267{
268 struct skb_shared_info *shinfo;
269 unsigned int size = frag_size ? : ksize(data);
270
271 size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
272
273 /* Assumes caller memset cleared SKB */
274 skb->truesize = SKB_TRUESIZE(size);
275 refcount_set(&skb->users, 1);
276 skb->head = data;
277 skb->data = data;
278 skb_reset_tail_pointer(skb);
279 skb->end = skb->tail + size;
280 skb->mac_header = (typeof(skb->mac_header))~0U;
281 skb->transport_header = (typeof(skb->transport_header))~0U;
282
283 /* make sure we initialize shinfo sequentially */
284 shinfo = skb_shinfo(skb);
285 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
286 atomic_set(&shinfo->dataref, 1);
287
6370cc3b
AN
288 skb_set_kcov_handle(skb, kcov_common_handle());
289
ba0509b6
JDB
290 return skb;
291}
292
b2b5ce9d 293/**
2ea2f62c 294 * __build_skb - build a network buffer
b2b5ce9d 295 * @data: data buffer provided by caller
2ea2f62c 296 * @frag_size: size of data, or 0 if head was kmalloced
b2b5ce9d
ED
297 *
298 * Allocate a new &sk_buff. Caller provides space holding head and
deceb4c0 299 * skb_shared_info. @data must have been allocated by kmalloc() only if
2ea2f62c
ED
300 * @frag_size is 0, otherwise data should come from the page allocator
301 * or vmalloc()
b2b5ce9d
ED
302 * The return is the new skb buffer.
303 * On a failure the return is %NULL, and @data is not freed.
304 * Notes :
305 * Before IO, driver allocates only data buffer where NIC put incoming frame
306 * Driver should add room at head (NET_SKB_PAD) and
307 * MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info))
308 * After IO, driver calls build_skb(), to allocate sk_buff and populate it
309 * before giving packet to stack.
310 * RX rings only contains data buffers, not full skbs.
311 */
2ea2f62c 312struct sk_buff *__build_skb(void *data, unsigned int frag_size)
b2b5ce9d 313{
b2b5ce9d 314 struct sk_buff *skb;
b2b5ce9d
ED
315
316 skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
ba0509b6 317 if (unlikely(!skb))
b2b5ce9d
ED
318 return NULL;
319
b2b5ce9d 320 memset(skb, 0, offsetof(struct sk_buff, tail));
b2b5ce9d 321
ba0509b6 322 return __build_skb_around(skb, data, frag_size);
b2b5ce9d 323}
2ea2f62c
ED
324
325/* build_skb() is wrapper over __build_skb(), that specifically
326 * takes care of skb->head and skb->pfmemalloc
327 * This means that if @frag_size is not zero, then @data must be backed
328 * by a page fragment, not kmalloc() or vmalloc()
329 */
330struct sk_buff *build_skb(void *data, unsigned int frag_size)
331{
332 struct sk_buff *skb = __build_skb(data, frag_size);
333
334 if (skb && frag_size) {
335 skb->head_frag = 1;
2f064f34 336 if (page_is_pfmemalloc(virt_to_head_page(data)))
2ea2f62c
ED
337 skb->pfmemalloc = 1;
338 }
339 return skb;
340}
b2b5ce9d
ED
341EXPORT_SYMBOL(build_skb);
342
ba0509b6
JDB
343/**
344 * build_skb_around - build a network buffer around provided skb
345 * @skb: sk_buff provide by caller, must be memset cleared
346 * @data: data buffer provided by caller
347 * @frag_size: size of data, or 0 if head was kmalloced
348 */
349struct sk_buff *build_skb_around(struct sk_buff *skb,
350 void *data, unsigned int frag_size)
351{
352 if (unlikely(!skb))
353 return NULL;
354
355 skb = __build_skb_around(skb, data, frag_size);
356
357 if (skb && frag_size) {
358 skb->head_frag = 1;
359 if (page_is_pfmemalloc(virt_to_head_page(data)))
360 skb->pfmemalloc = 1;
361 }
362 return skb;
363}
364EXPORT_SYMBOL(build_skb_around);
365
795bb1c0
JDB
366#define NAPI_SKB_CACHE_SIZE 64
367
368struct napi_alloc_cache {
369 struct page_frag_cache page;
e0d7924a 370 unsigned int skb_count;
795bb1c0
JDB
371 void *skb_cache[NAPI_SKB_CACHE_SIZE];
372};
373
b63ae8ca 374static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
795bb1c0 375static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
ffde7328 376
7ba7aeab 377static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
ffde7328 378{
7ba7aeab 379 struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
ffde7328 380
7ba7aeab
SAS
381 return page_frag_alloc(&nc->page, fragsz, gfp_mask);
382}
383
384void *napi_alloc_frag(unsigned int fragsz)
385{
386 fragsz = SKB_DATA_ALIGN(fragsz);
387
388 return __napi_alloc_frag(fragsz, GFP_ATOMIC);
6f532612 389}
7ba7aeab 390EXPORT_SYMBOL(napi_alloc_frag);
c93bdd0e
MG
391
392/**
393 * netdev_alloc_frag - allocate a page fragment
394 * @fragsz: fragment size
395 *
396 * Allocates a frag from a page for receive buffer.
397 * Uses GFP_ATOMIC allocations.
398 */
399void *netdev_alloc_frag(unsigned int fragsz)
400{
7ba7aeab
SAS
401 struct page_frag_cache *nc;
402 void *data;
ffde7328 403
3bed3cc4 404 fragsz = SKB_DATA_ALIGN(fragsz);
7ba7aeab
SAS
405 if (in_irq() || irqs_disabled()) {
406 nc = this_cpu_ptr(&netdev_alloc_cache);
407 data = page_frag_alloc(nc, fragsz, GFP_ATOMIC);
408 } else {
409 local_bh_disable();
410 data = __napi_alloc_frag(fragsz, GFP_ATOMIC);
411 local_bh_enable();
412 }
413 return data;
ffde7328 414}
7ba7aeab 415EXPORT_SYMBOL(netdev_alloc_frag);
ffde7328 416
fd11a83d
AD
417/**
418 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
419 * @dev: network device to receive on
d7499160 420 * @len: length to allocate
fd11a83d
AD
421 * @gfp_mask: get_free_pages mask, passed to alloc_skb
422 *
423 * Allocate a new &sk_buff and assign it a usage count of one. The
424 * buffer has NET_SKB_PAD headroom built in. Users should allocate
425 * the headroom they think they need without accounting for the
426 * built in space. The built in space is used for optimisations.
427 *
428 * %NULL is returned if there is no free memory.
429 */
9451980a
AD
430struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
431 gfp_t gfp_mask)
fd11a83d 432{
b63ae8ca 433 struct page_frag_cache *nc;
fd11a83d 434 struct sk_buff *skb;
9451980a
AD
435 bool pfmemalloc;
436 void *data;
437
438 len += NET_SKB_PAD;
fd11a83d 439
66c55602
AL
440 /* If requested length is either too small or too big,
441 * we use kmalloc() for skb->head allocation.
442 */
443 if (len <= SKB_WITH_OVERHEAD(1024) ||
444 len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
d0164adc 445 (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
a080e7bd
AD
446 skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
447 if (!skb)
448 goto skb_fail;
449 goto skb_success;
450 }
fd11a83d 451
9451980a
AD
452 len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
453 len = SKB_DATA_ALIGN(len);
454
455 if (sk_memalloc_socks())
456 gfp_mask |= __GFP_MEMALLOC;
457
92dcabd7
SAS
458 if (in_irq() || irqs_disabled()) {
459 nc = this_cpu_ptr(&netdev_alloc_cache);
460 data = page_frag_alloc(nc, len, gfp_mask);
461 pfmemalloc = nc->pfmemalloc;
462 } else {
463 local_bh_disable();
464 nc = this_cpu_ptr(&napi_alloc_cache.page);
465 data = page_frag_alloc(nc, len, gfp_mask);
466 pfmemalloc = nc->pfmemalloc;
467 local_bh_enable();
468 }
9451980a
AD
469
470 if (unlikely(!data))
471 return NULL;
472
473 skb = __build_skb(data, len);
474 if (unlikely(!skb)) {
181edb2b 475 skb_free_frag(data);
9451980a 476 return NULL;
7b2e497a 477 }
fd11a83d 478
9451980a
AD
479 if (pfmemalloc)
480 skb->pfmemalloc = 1;
481 skb->head_frag = 1;
482
a080e7bd 483skb_success:
9451980a
AD
484 skb_reserve(skb, NET_SKB_PAD);
485 skb->dev = dev;
486
a080e7bd 487skb_fail:
8af27456
CH
488 return skb;
489}
b4ac530f 490EXPORT_SYMBOL(__netdev_alloc_skb);
1da177e4 491
fd11a83d
AD
492/**
493 * __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
494 * @napi: napi instance this buffer was allocated for
d7499160 495 * @len: length to allocate
fd11a83d
AD
496 * @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages
497 *
498 * Allocate a new sk_buff for use in NAPI receive. This buffer will
499 * attempt to allocate the head from a special reserved region used
500 * only for NAPI Rx allocation. By doing this we can save several
501 * CPU cycles by avoiding having to disable and re-enable IRQs.
502 *
503 * %NULL is returned if there is no free memory.
504 */
9451980a
AD
505struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
506 gfp_t gfp_mask)
fd11a83d 507{
3226b158 508 struct napi_alloc_cache *nc;
fd11a83d 509 struct sk_buff *skb;
9451980a
AD
510 void *data;
511
512 len += NET_SKB_PAD + NET_IP_ALIGN;
fd11a83d 513
3226b158
ED
514 /* If requested length is either too small or too big,
515 * we use kmalloc() for skb->head allocation.
516 */
517 if (len <= SKB_WITH_OVERHEAD(1024) ||
518 len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
d0164adc 519 (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
a080e7bd
AD
520 skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
521 if (!skb)
522 goto skb_fail;
523 goto skb_success;
524 }
9451980a 525
3226b158 526 nc = this_cpu_ptr(&napi_alloc_cache);
9451980a
AD
527 len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
528 len = SKB_DATA_ALIGN(len);
529
530 if (sk_memalloc_socks())
531 gfp_mask |= __GFP_MEMALLOC;
fd11a83d 532
8c2dd3e4 533 data = page_frag_alloc(&nc->page, len, gfp_mask);
9451980a
AD
534 if (unlikely(!data))
535 return NULL;
536
537 skb = __build_skb(data, len);
538 if (unlikely(!skb)) {
181edb2b 539 skb_free_frag(data);
9451980a 540 return NULL;
fd11a83d
AD
541 }
542
795bb1c0 543 if (nc->page.pfmemalloc)
9451980a
AD
544 skb->pfmemalloc = 1;
545 skb->head_frag = 1;
546
a080e7bd 547skb_success:
9451980a
AD
548 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
549 skb->dev = napi->dev;
550
a080e7bd 551skb_fail:
fd11a83d
AD
552 return skb;
553}
554EXPORT_SYMBOL(__napi_alloc_skb);
555
654bed16 556void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
50269e19 557 int size, unsigned int truesize)
654bed16
PZ
558{
559 skb_fill_page_desc(skb, i, page, off, size);
560 skb->len += size;
561 skb->data_len += size;
50269e19 562 skb->truesize += truesize;
654bed16
PZ
563}
564EXPORT_SYMBOL(skb_add_rx_frag);
565
f8e617e1
JW
566void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
567 unsigned int truesize)
568{
569 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
570
571 skb_frag_size_add(frag, size);
572 skb->len += size;
573 skb->data_len += size;
574 skb->truesize += truesize;
575}
576EXPORT_SYMBOL(skb_coalesce_rx_frag);
577
27b437c8 578static void skb_drop_list(struct sk_buff **listp)
1da177e4 579{
bd8a7036 580 kfree_skb_list(*listp);
27b437c8 581 *listp = NULL;
1da177e4
LT
582}
583
27b437c8
HX
584static inline void skb_drop_fraglist(struct sk_buff *skb)
585{
586 skb_drop_list(&skb_shinfo(skb)->frag_list);
587}
588
1da177e4
LT
589static void skb_clone_fraglist(struct sk_buff *skb)
590{
591 struct sk_buff *list;
592
fbb398a8 593 skb_walk_frags(skb, list)
1da177e4
LT
594 skb_get(list);
595}
596
d3836f21
ED
597static void skb_free_head(struct sk_buff *skb)
598{
181edb2b
AD
599 unsigned char *head = skb->head;
600
d3836f21 601 if (skb->head_frag)
181edb2b 602 skb_free_frag(head);
d3836f21 603 else
181edb2b 604 kfree(head);
d3836f21
ED
605}
606
5bba1712 607static void skb_release_data(struct sk_buff *skb)
1da177e4 608{
ff04a771
ED
609 struct skb_shared_info *shinfo = skb_shinfo(skb);
610 int i;
1da177e4 611
ff04a771
ED
612 if (skb->cloned &&
613 atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
614 &shinfo->dataref))
615 return;
a6686f2f 616
ff04a771
ED
617 for (i = 0; i < shinfo->nr_frags; i++)
618 __skb_frag_unref(&shinfo->frags[i]);
a6686f2f 619
ff04a771
ED
620 if (shinfo->frag_list)
621 kfree_skb_list(shinfo->frag_list);
622
1f8b977a 623 skb_zcopy_clear(skb, true);
ff04a771 624 skb_free_head(skb);
1da177e4
LT
625}
626
627/*
628 * Free an skbuff by memory without cleaning the state.
629 */
2d4baff8 630static void kfree_skbmem(struct sk_buff *skb)
1da177e4 631{
d0bf4a9e 632 struct sk_buff_fclones *fclones;
d179cd12 633
d179cd12
DM
634 switch (skb->fclone) {
635 case SKB_FCLONE_UNAVAILABLE:
636 kmem_cache_free(skbuff_head_cache, skb);
6ffe75eb 637 return;
d179cd12
DM
638
639 case SKB_FCLONE_ORIG:
d0bf4a9e 640 fclones = container_of(skb, struct sk_buff_fclones, skb1);
d179cd12 641
6ffe75eb
ED
642 /* We usually free the clone (TX completion) before original skb
643 * This test would have no chance to be true for the clone,
644 * while here, branch prediction will be good.
d179cd12 645 */
2638595a 646 if (refcount_read(&fclones->fclone_ref) == 1)
6ffe75eb
ED
647 goto fastpath;
648 break;
e7820e39 649
6ffe75eb
ED
650 default: /* SKB_FCLONE_CLONE */
651 fclones = container_of(skb, struct sk_buff_fclones, skb2);
d179cd12 652 break;
3ff50b79 653 }
2638595a 654 if (!refcount_dec_and_test(&fclones->fclone_ref))
6ffe75eb
ED
655 return;
656fastpath:
657 kmem_cache_free(skbuff_fclone_cache, fclones);
1da177e4
LT
658}
659
0a463c78 660void skb_release_head_state(struct sk_buff *skb)
1da177e4 661{
adf30907 662 skb_dst_drop(skb);
9c2b3328
SH
663 if (skb->destructor) {
664 WARN_ON(in_irq());
1da177e4
LT
665 skb->destructor(skb);
666 }
a3bf7ae9 667#if IS_ENABLED(CONFIG_NF_CONNTRACK)
cb9c6836 668 nf_conntrack_put(skb_nfct(skb));
1da177e4 669#endif
df5042f4 670 skb_ext_put(skb);
04a4bb55
LB
671}
672
673/* Free everything but the sk_buff shell. */
674static void skb_release_all(struct sk_buff *skb)
675{
676 skb_release_head_state(skb);
a28b1b90
FW
677 if (likely(skb->head))
678 skb_release_data(skb);
2d4baff8
HX
679}
680
681/**
682 * __kfree_skb - private function
683 * @skb: buffer
684 *
685 * Free an sk_buff. Release anything attached to the buffer.
686 * Clean the state. This is an internal helper function. Users should
687 * always call kfree_skb
688 */
1da177e4 689
2d4baff8
HX
690void __kfree_skb(struct sk_buff *skb)
691{
692 skb_release_all(skb);
1da177e4
LT
693 kfree_skbmem(skb);
694}
b4ac530f 695EXPORT_SYMBOL(__kfree_skb);
1da177e4 696