]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/core/skbuff.c
Merge commit 'origin/HEAD' into test-merge
[mirror_ubuntu-artful-kernel.git] / net / core / skbuff.c
CommitLineData
1da177e4
LT
1/*
2 * Routines having to do with the 'struct sk_buff' memory handlers.
3 *
4 * Authors: Alan Cox <iiitac@pyr.swan.ac.uk>
5 * Florian La Roche <rzsfl@rz.uni-sb.de>
6 *
7 * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $
8 *
9 * Fixes:
10 * Alan Cox : Fixed the worst of the load
11 * balancer bugs.
12 * Dave Platt : Interrupt stacking fix.
13 * Richard Kooijman : Timestamp fixes.
14 * Alan Cox : Changed buffer format.
15 * Alan Cox : destructor hook for AF_UNIX etc.
16 * Linus Torvalds : Better skb_clone.
17 * Alan Cox : Added skb_copy.
18 * Alan Cox : Added all the changed routines Linus
19 * only put in the headers
20 * Ray VanTassle : Fixed --skb->lock in free
21 * Alan Cox : skb_copy copy arp field
22 * Andi Kleen : slabified it.
23 * Robert Olsson : Removed skb_head_pool
24 *
25 * NOTE:
26 * The __skb_ routines should be called with interrupts
27 * disabled, or you better be *real* sure that the operation is atomic
28 * with respect to whatever list is being frobbed (e.g. via lock_sock()
29 * or via disabling bottom half handlers, etc).
30 *
31 * This program is free software; you can redistribute it and/or
32 * modify it under the terms of the GNU General Public License
33 * as published by the Free Software Foundation; either version
34 * 2 of the License, or (at your option) any later version.
35 */
36
37/*
38 * The functions in this file will not compile correctly with gcc 2.4.x
39 */
40
1da177e4
LT
41#include <linux/module.h>
42#include <linux/types.h>
43#include <linux/kernel.h>
1da177e4
LT
44#include <linux/mm.h>
45#include <linux/interrupt.h>
46#include <linux/in.h>
47#include <linux/inet.h>
48#include <linux/slab.h>
49#include <linux/netdevice.h>
50#ifdef CONFIG_NET_CLS_ACT
51#include <net/pkt_sched.h>
52#endif
53#include <linux/string.h>
54#include <linux/skbuff.h>
9c55e01c 55#include <linux/splice.h>
1da177e4
LT
56#include <linux/cache.h>
57#include <linux/rtnetlink.h>
58#include <linux/init.h>
716ea3a7 59#include <linux/scatterlist.h>
1da177e4
LT
60
61#include <net/protocol.h>
62#include <net/dst.h>
63#include <net/sock.h>
64#include <net/checksum.h>
65#include <net/xfrm.h>
66
67#include <asm/uaccess.h>
68#include <asm/system.h>
69
a1f8e7f7
AV
70#include "kmap_skb.h"
71
e18b890b
CL
72static struct kmem_cache *skbuff_head_cache __read_mostly;
73static struct kmem_cache *skbuff_fclone_cache __read_mostly;
1da177e4 74
9c55e01c
JA
75static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
76 struct pipe_buffer *buf)
77{
78 struct sk_buff *skb = (struct sk_buff *) buf->private;
79
80 kfree_skb(skb);
81}
82
83static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
84 struct pipe_buffer *buf)
85{
86 struct sk_buff *skb = (struct sk_buff *) buf->private;
87
88 skb_get(skb);
89}
90
91static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
92 struct pipe_buffer *buf)
93{
94 return 1;
95}
96
97
98/* Pipe buffer operations for a socket. */
99static struct pipe_buf_operations sock_pipe_buf_ops = {
100 .can_merge = 0,
101 .map = generic_pipe_buf_map,
102 .unmap = generic_pipe_buf_unmap,
103 .confirm = generic_pipe_buf_confirm,
104 .release = sock_pipe_buf_release,
105 .steal = sock_pipe_buf_steal,
106 .get = sock_pipe_buf_get,
107};
108
1da177e4
LT
109/*
110 * Keep out-of-line to prevent kernel bloat.
111 * __builtin_return_address is not used because it is not always
112 * reliable.
113 */
114
115/**
116 * skb_over_panic - private function
117 * @skb: buffer
118 * @sz: size
119 * @here: address
120 *
121 * Out of line support code for skb_put(). Not user callable.
122 */
123void skb_over_panic(struct sk_buff *skb, int sz, void *here)
124{
26095455 125 printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
4305b541 126 "data:%p tail:%#lx end:%#lx dev:%s\n",
27a884dc 127 here, skb->len, sz, skb->head, skb->data,
4305b541 128 (unsigned long)skb->tail, (unsigned long)skb->end,
26095455 129 skb->dev ? skb->dev->name : "<NULL>");
1da177e4
LT
130 BUG();
131}
132
133/**
134 * skb_under_panic - private function
135 * @skb: buffer
136 * @sz: size
137 * @here: address
138 *
139 * Out of line support code for skb_push(). Not user callable.
140 */
141
142void skb_under_panic(struct sk_buff *skb, int sz, void *here)
143{
26095455 144 printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
4305b541 145 "data:%p tail:%#lx end:%#lx dev:%s\n",
27a884dc 146 here, skb->len, sz, skb->head, skb->data,
4305b541 147 (unsigned long)skb->tail, (unsigned long)skb->end,
26095455 148 skb->dev ? skb->dev->name : "<NULL>");
1da177e4
LT
149 BUG();
150}
151
dc6de336
DM
152void skb_truesize_bug(struct sk_buff *skb)
153{
154 printk(KERN_ERR "SKB BUG: Invalid truesize (%u) "
155 "len=%u, sizeof(sk_buff)=%Zd\n",
156 skb->truesize, skb->len, sizeof(struct sk_buff));
157}
158EXPORT_SYMBOL(skb_truesize_bug);
159
1da177e4
LT
160/* Allocate a new skbuff. We do this ourselves so we can fill in a few
161 * 'private' fields and also do memory statistics to find all the
162 * [BEEP] leaks.
163 *
164 */
165
166/**
d179cd12 167 * __alloc_skb - allocate a network buffer
1da177e4
LT
168 * @size: size to allocate
169 * @gfp_mask: allocation mask
c83c2486
RD
170 * @fclone: allocate from fclone cache instead of head cache
171 * and allocate a cloned (child) skb
b30973f8 172 * @node: numa node to allocate memory on
1da177e4
LT
173 *
174 * Allocate a new &sk_buff. The returned buffer has no headroom and a
175 * tail room of size bytes. The object has a reference count of one.
176 * The return is the buffer. On a failure the return is %NULL.
177 *
178 * Buffers may only be allocated from interrupts using a @gfp_mask of
179 * %GFP_ATOMIC.
180 */
dd0fc66f 181struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
b30973f8 182 int fclone, int node)
1da177e4 183{
e18b890b 184 struct kmem_cache *cache;
4947d3ef 185 struct skb_shared_info *shinfo;
1da177e4
LT
186 struct sk_buff *skb;
187 u8 *data;
188
8798b3fb
HX
189 cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
190
1da177e4 191 /* Get the HEAD */
b30973f8 192 skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
1da177e4
LT
193 if (!skb)
194 goto out;
195
1da177e4 196 size = SKB_DATA_ALIGN(size);
b30973f8
CH
197 data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
198 gfp_mask, node);
1da177e4
LT
199 if (!data)
200 goto nodata;
201
ca0605a7 202 /*
c8005785
JB
203 * Only clear those fields we need to clear, not those that we will
204 * actually initialise below. Hence, don't put any more fields after
205 * the tail pointer in struct sk_buff!
ca0605a7
ACM
206 */
207 memset(skb, 0, offsetof(struct sk_buff, tail));
1da177e4
LT
208 skb->truesize = size + sizeof(struct sk_buff);
209 atomic_set(&skb->users, 1);
210 skb->head = data;
211 skb->data = data;
27a884dc 212 skb_reset_tail_pointer(skb);
4305b541 213 skb->end = skb->tail + size;
4947d3ef
BL
214 /* make sure we initialize shinfo sequentially */
215 shinfo = skb_shinfo(skb);
216 atomic_set(&shinfo->dataref, 1);
217 shinfo->nr_frags = 0;
7967168c
HX
218 shinfo->gso_size = 0;
219 shinfo->gso_segs = 0;
220 shinfo->gso_type = 0;
4947d3ef
BL
221 shinfo->ip6_frag_id = 0;
222 shinfo->frag_list = NULL;
223
d179cd12
DM
224 if (fclone) {
225 struct sk_buff *child = skb + 1;
226 atomic_t *fclone_ref = (atomic_t *) (child + 1);
1da177e4 227
d179cd12
DM
228 skb->fclone = SKB_FCLONE_ORIG;
229 atomic_set(fclone_ref, 1);
230
231 child->fclone = SKB_FCLONE_UNAVAILABLE;
232 }
1da177e4
LT
233out:
234 return skb;
235nodata:
8798b3fb 236 kmem_cache_free(cache, skb);
1da177e4
LT
237 skb = NULL;
238 goto out;
1da177e4
LT
239}
240
8af27456
CH
241/**
242 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
243 * @dev: network device to receive on
244 * @length: length to allocate
245 * @gfp_mask: get_free_pages mask, passed to alloc_skb
246 *
247 * Allocate a new &sk_buff and assign it a usage count of one. The
248 * buffer has unspecified headroom built in. Users should allocate
249 * the headroom they think they need without accounting for the
250 * built in space. The built in space is used for optimisations.
251 *
252 * %NULL is returned if there is no free memory.
253 */
254struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
255 unsigned int length, gfp_t gfp_mask)
256{
43cb76d9 257 int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
8af27456
CH
258 struct sk_buff *skb;
259
4ec93edb 260 skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node);
7b2e497a 261 if (likely(skb)) {
8af27456 262 skb_reserve(skb, NET_SKB_PAD);
7b2e497a
CH
263 skb->dev = dev;
264 }
8af27456
CH
265 return skb;
266}
1da177e4 267
f58518e6
IJ
268/**
269 * dev_alloc_skb - allocate an skbuff for receiving
270 * @length: length to allocate
271 *
272 * Allocate a new &sk_buff and assign it a usage count of one. The
273 * buffer has unspecified headroom built in. Users should allocate
274 * the headroom they think they need without accounting for the
275 * built in space. The built in space is used for optimisations.
276 *
277 * %NULL is returned if there is no free memory. Although this function
278 * allocates memory it can be called from an interrupt.
279 */
280struct sk_buff *dev_alloc_skb(unsigned int length)
281{
1483b874
DV
282 /*
283 * There is more code here than it seems:
a0f55e0e 284 * __dev_alloc_skb is an inline
1483b874 285 */
f58518e6
IJ
286 return __dev_alloc_skb(length, GFP_ATOMIC);
287}
288EXPORT_SYMBOL(dev_alloc_skb);
289
27b437c8 290static void skb_drop_list(struct sk_buff **listp)
1da177e4 291{
27b437c8 292 struct sk_buff *list = *listp;
1da177e4 293
27b437c8 294 *listp = NULL;
1da177e4
LT
295
296 do {
297 struct sk_buff *this = list;
298 list = list->next;
299 kfree_skb(this);
300 } while (list);
301}
302
27b437c8
HX
303static inline void skb_drop_fraglist(struct sk_buff *skb)
304{
305 skb_drop_list(&skb_shinfo(skb)->frag_list);
306}
307
1da177e4
LT
308static void skb_clone_fraglist(struct sk_buff *skb)
309{
310 struct sk_buff *list;
311
312 for (list = skb_shinfo(skb)->frag_list; list; list = list->next)
313 skb_get(list);
314}
315
5bba1712 316static void skb_release_data(struct sk_buff *skb)
1da177e4
LT
317{
318 if (!skb->cloned ||
319 !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
320 &skb_shinfo(skb)->dataref)) {
321 if (skb_shinfo(skb)->nr_frags) {
322 int i;
323 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
324 put_page(skb_shinfo(skb)->frags[i].page);
325 }
326
327 if (skb_shinfo(skb)->frag_list)
328 skb_drop_fraglist(skb);
329
330 kfree(skb->head);
331 }
332}
333
334/*
335 * Free an skbuff by memory without cleaning the state.
336 */
2d4baff8 337static void kfree_skbmem(struct sk_buff *skb)
1da177e4 338{
d179cd12
DM
339 struct sk_buff *other;
340 atomic_t *fclone_ref;
341
d179cd12
DM
342 switch (skb->fclone) {
343 case SKB_FCLONE_UNAVAILABLE:
344 kmem_cache_free(skbuff_head_cache, skb);
345 break;
346
347 case SKB_FCLONE_ORIG:
348 fclone_ref = (atomic_t *) (skb + 2);
349 if (atomic_dec_and_test(fclone_ref))
350 kmem_cache_free(skbuff_fclone_cache, skb);
351 break;
352
353 case SKB_FCLONE_CLONE:
354 fclone_ref = (atomic_t *) (skb + 1);
355 other = skb - 1;
356
357 /* The clone portion is available for
358 * fast-cloning again.
359 */
360 skb->fclone = SKB_FCLONE_UNAVAILABLE;
361
362 if (atomic_dec_and_test(fclone_ref))
363 kmem_cache_free(skbuff_fclone_cache, other);
364 break;
3ff50b79 365 }
1da177e4
LT
366}
367
2d4baff8
HX
368/* Free everything but the sk_buff shell. */
369static void skb_release_all(struct sk_buff *skb)
1da177e4 370{
1da177e4
LT
371 dst_release(skb->dst);
372#ifdef CONFIG_XFRM
373 secpath_put(skb->sp);
374#endif
9c2b3328
SH
375 if (skb->destructor) {
376 WARN_ON(in_irq());
1da177e4
LT
377 skb->destructor(skb);
378 }
9fb9cbb1 379#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
5f79e0f9 380 nf_conntrack_put(skb->nfct);
9fb9cbb1
YK
381 nf_conntrack_put_reasm(skb->nfct_reasm);
382#endif
1da177e4
LT
383#ifdef CONFIG_BRIDGE_NETFILTER
384 nf_bridge_put(skb->nf_bridge);
385#endif
1da177e4
LT
386/* XXX: IS this still necessary? - JHS */
387#ifdef CONFIG_NET_SCHED
388 skb->tc_index = 0;
389#ifdef CONFIG_NET_CLS_ACT
390 skb->tc_verd = 0;
1da177e4
LT
391#endif
392#endif
2d4baff8
HX
393 skb_release_data(skb);
394}
395
396/**
397 * __kfree_skb - private function
398 * @skb: buffer
399 *
400 * Free an sk_buff. Release anything attached to the buffer.
401 * Clean the state. This is an internal helper function. Users should
402 * always call kfree_skb
403 */
1da177e4 404
2d4baff8
HX
405void __kfree_skb(struct sk_buff *skb)
406{
407 skb_release_all(skb);
1da177e4
LT
408 kfree_skbmem(skb);
409}
410