]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blobdiff - net/core/skbuff.c
net: Convert net_ratelimit uses to net_<level>_ratelimited
[mirror_ubuntu-zesty-kernel.git] / net / core / skbuff.c
index e59840010d45c9bc25f521fe1ef0717d2de984af..2a187194231708a228ae0fd7282a4efde0934f42 100644 (file)
 
 #include <asm/uaccess.h>
 #include <trace/events/skb.h>
+#include <linux/highmem.h>
 
-#include "kmap_skb.h"
-
-static struct kmem_cache *skbuff_head_cache __read_mostly;
+struct kmem_cache *skbuff_head_cache __read_mostly;
 static struct kmem_cache *skbuff_fclone_cache __read_mostly;
 
 static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
@@ -246,6 +245,7 @@ EXPORT_SYMBOL(__alloc_skb);
 /**
  * build_skb - build a network buffer
  * @data: data buffer provided by caller
+ * @frag_size: size of fragment, or 0 if head was kmalloced
  *
  * Allocate a new &sk_buff. Caller provides space holding head and
  * skb_shared_info. @data must have been allocated by kmalloc()
@@ -259,20 +259,21 @@ EXPORT_SYMBOL(__alloc_skb);
  *  before giving packet to stack.
  *  RX rings only contains data buffers, not full skbs.
  */
-struct sk_buff *build_skb(void *data)
+struct sk_buff *build_skb(void *data, unsigned int frag_size)
 {
        struct skb_shared_info *shinfo;
        struct sk_buff *skb;
-       unsigned int size;
+       unsigned int size = frag_size ? : ksize(data);
 
        skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
        if (!skb)
                return NULL;
 
-       size = ksize(data) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+       size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
        memset(skb, 0, offsetof(struct sk_buff, tail));
        skb->truesize = SKB_TRUESIZE(size);
+       skb->head_frag = frag_size != 0;
        atomic_set(&skb->users, 1);
        skb->head = data;
        skb->data = data;
@@ -377,6 +378,14 @@ static void skb_clone_fraglist(struct sk_buff *skb)
                skb_get(list);
 }
 
+static void skb_free_head(struct sk_buff *skb)
+{
+       if (skb->head_frag)
+               put_page(virt_to_head_page(skb->head));
+       else
+               kfree(skb->head);
+}
+
 static void skb_release_data(struct sk_buff *skb)
 {
        if (!skb->cloned ||
@@ -403,7 +412,7 @@ static void skb_release_data(struct sk_buff *skb)
                if (skb_has_frag_list(skb))
                        skb_drop_fraglist(skb);
 
-               kfree(skb->head);
+               skb_free_head(skb);
        }
 }
 
@@ -645,6 +654,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
        C(tail);
        C(end);
        C(head);
+       C(head_frag);
        C(data);
        C(truesize);
        atomic_set(&n->users, 1);
@@ -707,10 +717,10 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
                        }
                        return -ENOMEM;
                }
-               vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+               vaddr = kmap_atomic(skb_frag_page(f));
                memcpy(page_address(page),
                       vaddr + f->page_offset, skb_frag_size(f));
-               kunmap_skb_frag(vaddr);
+               kunmap_atomic(vaddr);
                page->private = (unsigned long)head;
                head = page;
        }
@@ -819,7 +829,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
 {
        int headerlen = skb_headroom(skb);
-       unsigned int size = (skb_end_pointer(skb) - skb->head) + skb->data_len;
+       unsigned int size = skb_end_offset(skb) + skb->data_len;
        struct sk_buff *n = alloc_skb(size, gfp_mask);
 
        if (!n)
@@ -920,9 +930,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 {
        int i;
        u8 *data;
-       int size = nhead + (skb_end_pointer(skb) - skb->head) + ntail;
+       int size = nhead + skb_end_offset(skb) + ntail;
        long off;
-       bool fastpath;
 
        BUG_ON(nhead < 0);
 
@@ -931,27 +940,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 
        size = SKB_DATA_ALIGN(size);
 
-       /* Check if we can avoid taking references on fragments if we own
-        * the last reference on skb->head. (see skb_release_data())
-        */
-       if (!skb->cloned)
-               fastpath = true;
-       else {
-               int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
-               fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
-       }
-
-       if (fastpath &&
-           size + sizeof(struct skb_shared_info) <= ksize(skb->head)) {
-               memmove(skb->head + size, skb_shinfo(skb),
-                       offsetof(struct skb_shared_info,
-                                frags[skb_shinfo(skb)->nr_frags]));
-               memmove(skb->head + nhead, skb->head,
-                       skb_tail_pointer(skb) - skb->head);
-               off = nhead;
-               goto adjust_others;
-       }
-
        data = kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
                       gfp_mask);
        if (!data)
@@ -967,9 +955,12 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
               skb_shinfo(skb),
               offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
 
-       if (fastpath) {
-               kfree(skb->head);
-       } else {
+       /*
+        * if shinfo is shared we must drop the old head gracefully, but if it
+        * is not we can just drop the old head and let the existing refcount
+        * be since all we did is relocate the values
+        */
+       if (skb_cloned(skb)) {
                /* copy this zero copy skb frags */
                if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
                        if (skb_copy_ubufs(skb, gfp_mask))
@@ -982,11 +973,13 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
                        skb_clone_fraglist(skb);
 
                skb_release_data(skb);
+       } else {
+               skb_free_head(skb);
        }
        off = (data + nhead) - skb->head;
 
        skb->head     = data;
-adjust_others:
+       skb->head_frag = 0;
        skb->data    += off;
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
        skb->end      = size;
@@ -1275,7 +1268,7 @@ drop_pages:
                                return -ENOMEM;
 
                        nfrag->next = frag->next;
-                       kfree_skb(frag);
+                       consume_skb(frag);
                        frag = nfrag;
                        *fragp = frag;
                }
@@ -1487,21 +1480,22 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
 
        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                int end;
+               skb_frag_t *f = &skb_shinfo(skb)->frags[i];
 
                WARN_ON(start > offset + len);
 
-               end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
+               end = start + skb_frag_size(f);
                if ((copy = end - offset) > 0) {
                        u8 *vaddr;
 
                        if (copy > len)
                                copy = len;
 
-                       vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+                       vaddr = kmap_atomic(skb_frag_page(f));
                        memcpy(to,
-                              vaddr + skb_shinfo(skb)->frags[i].page_offset+
-                              offset - start, copy);
-                       kunmap_skb_frag(vaddr);
+                              vaddr + f->page_offset + offset - start,
+                              copy);
+                       kunmap_atomic(vaddr);
 
                        if ((len -= copy) == 0)
                                return 0;
@@ -1547,9 +1541,9 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
        put_page(spd->pages[i]);
 }
 
-static inline struct page *linear_to_page(struct page *page, unsigned int *len,
-                                         unsigned int *offset,
-                                         struct sk_buff *skb, struct sock *sk)
+static struct page *linear_to_page(struct page *page, unsigned int *len,
+                                  unsigned int *offset,
+                                  struct sk_buff *skb, struct sock *sk)
 {
        struct page *p = sk->sk_sndmsg_page;
        unsigned int off;
@@ -1565,6 +1559,9 @@ new_page:
        } else {
                unsigned int mlen;
 
+               /* If we are the only user of the page, we can reset offset */
+               if (page_count(p) == 1)
+                       sk->sk_sndmsg_off = 0;
                off = sk->sk_sndmsg_off;
                mlen = PAGE_SIZE - off;
                if (mlen < 64 && mlen < *len) {
@@ -1578,36 +1575,48 @@ new_page:
        memcpy(page_address(p) + off, page_address(page) + *offset, *len);
        sk->sk_sndmsg_off += *len;
        *offset = off;
-       get_page(p);
 
        return p;
 }
 
+static bool spd_can_coalesce(const struct splice_pipe_desc *spd,
+                            struct page *page,
+                            unsigned int offset)
+{
+       return  spd->nr_pages &&
+               spd->pages[spd->nr_pages - 1] == page &&
+               (spd->partial[spd->nr_pages - 1].offset +
+                spd->partial[spd->nr_pages - 1].len == offset);
+}
+
 /*
  * Fill page/offset/length into spd, if it can hold more pages.
  */
-static inline int spd_fill_page(struct splice_pipe_desc *spd,
-                               struct pipe_inode_info *pipe, struct page *page,
-                               unsigned int *len, unsigned int offset,
-                               struct sk_buff *skb, int linear,
-                               struct sock *sk)
+static bool spd_fill_page(struct splice_pipe_desc *spd,
+                         struct pipe_inode_info *pipe, struct page *page,
+                         unsigned int *len, unsigned int offset,
+                         struct sk_buff *skb, bool linear,
+                         struct sock *sk)
 {
-       if (unlikely(spd->nr_pages == pipe->buffers))
-               return 1;
+       if (unlikely(spd->nr_pages == MAX_SKB_FRAGS))
+               return true;
 
        if (linear) {
                page = linear_to_page(page, len, &offset, skb, sk);
                if (!page)
-                       return 1;
-       } else
-               get_page(page);
-
+                       return true;
+       }
+       if (spd_can_coalesce(spd, page, offset)) {
+               spd->partial[spd->nr_pages - 1].len += *len;
+               return false;
+       }
+       get_page(page);
        spd->pages[spd->nr_pages] = page;
        spd->partial[spd->nr_pages].len = *len;
        spd->partial[spd->nr_pages].offset = offset;
        spd->nr_pages++;
 
-       return 0;
+       return false;
 }
 
 static inline void __segment_seek(struct page **page, unsigned int *poff,
@@ -1624,20 +1633,20 @@ static inline void __segment_seek(struct page **page, unsigned int *poff,
        *plen -= off;
 }
 
-static inline int __splice_segment(struct page *page, unsigned int poff,
-                                  unsigned int plen, unsigned int *off,
-                                  unsigned int *len, struct sk_buff *skb,
-                                  struct splice_pipe_desc *spd, int linear,
-                                  struct sock *sk,
-                                  struct pipe_inode_info *pipe)
+static bool __splice_segment(struct page *page, unsigned int poff,
+                            unsigned int plen, unsigned int *off,
+                            unsigned int *len, struct sk_buff *skb,
+                            struct splice_pipe_desc *spd, bool linear,
+                            struct sock *sk,
+                            struct pipe_inode_info *pipe)
 {
        if (!*len)
-               return 1;
+               return true;
 
        /* skip this segment if already processed */
        if (*off >= plen) {
                *off -= plen;
-               return 0;
+               return false;
        }
 
        /* ignore any bits we already processed */
@@ -1653,34 +1662,38 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
                flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
 
                if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk))
-                       return 1;
+                       return true;
 
                __segment_seek(&page, &poff, &plen, flen);
                *len -= flen;
 
        } while (*len && plen);
 
-       return 0;
+       return false;
 }
 
 /*
- * Map linear and fragment data from the skb to spd. It reports failure if the
+ * Map linear and fragment data from the skb to spd. It reports true if the
  * pipe is full or if we already spliced the requested length.
  */
-static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
-                            unsigned int *offset, unsigned int *len,
-                            struct splice_pipe_desc *spd, struct sock *sk)
+static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
+                             unsigned int *offset, unsigned int *len,
+                             struct splice_pipe_desc *spd, struct sock *sk)
 {
        int seg;
 
-       /*
-        * map the linear part
+       /* map the linear part :
+        * If skb->head_frag is set, this 'linear' part is backed by a
+        * fragment, and if the head is not shared with any clones then
+        * we can avoid a copy since we own the head portion of this page.
         */
        if (__splice_segment(virt_to_page(skb->data),
                             (unsigned long) skb->data & (PAGE_SIZE - 1),
                             skb_headlen(skb),
-                            offset, len, skb, spd, 1, sk, pipe))
-               return 1;
+                            offset, len, skb, spd,
+                            skb_head_is_locked(skb),
+                            sk, pipe))
+               return true;
 
        /*
         * then map the fragments
@@ -1690,11 +1703,11 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
 
                if (__splice_segment(skb_frag_page(f),
                                     f->page_offset, skb_frag_size(f),
-                                    offset, len, skb, spd, 0, sk, pipe))
-                       return 1;
+                                    offset, len, skb, spd, false, sk, pipe))
+                       return true;
        }
 
-       return 0;
+       return false;
 }
 
 /*
@@ -1707,8 +1720,8 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
                    struct pipe_inode_info *pipe, unsigned int tlen,
                    unsigned int flags)
 {
-       struct partial_page partial[PIPE_DEF_BUFFERS];
-       struct page *pages[PIPE_DEF_BUFFERS];
+       struct partial_page partial[MAX_SKB_FRAGS];
+       struct page *pages[MAX_SKB_FRAGS];
        struct splice_pipe_desc spd = {
                .pages = pages,
                .partial = partial,
@@ -1720,9 +1733,6 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
        struct sock *sk = skb->sk;
        int ret = 0;
 
-       if (splice_grow_spd(pipe, &spd))
-               return -ENOMEM;
-
        /*
         * __skb_splice_bits() only fails if the output has no room left,
         * so no point in going over the frag_list for the error case.
@@ -1758,7 +1768,6 @@ done:
                lock_sock(sk);
        }
 
-       splice_shrink_spd(pipe, &spd);
        return ret;
 }
 
@@ -1806,10 +1815,10 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
                        if (copy > len)
                                copy = len;
 
-                       vaddr = kmap_skb_frag(frag);
+                       vaddr = kmap_atomic(skb_frag_page(frag));
                        memcpy(vaddr + frag->page_offset + offset - start,
                               from, copy);
-                       kunmap_skb_frag(vaddr);
+                       kunmap_atomic(vaddr);
 
                        if ((len -= copy) == 0)
                                return 0;
@@ -1869,21 +1878,21 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
 
        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                int end;
+               skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
                WARN_ON(start > offset + len);
 
-               end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
+               end = start + skb_frag_size(frag);
                if ((copy = end - offset) > 0) {
                        __wsum csum2;
                        u8 *vaddr;
-                       skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
                        if (copy > len)
                                copy = len;
-                       vaddr = kmap_skb_frag(frag);
+                       vaddr = kmap_atomic(skb_frag_page(frag));
                        csum2 = csum_partial(vaddr + frag->page_offset +
                                             offset - start, copy, 0);
-                       kunmap_skb_frag(vaddr);
+                       kunmap_atomic(vaddr);
                        csum = csum_block_add(csum, csum2, pos);
                        if (!(len -= copy))
                                return csum;
@@ -1955,12 +1964,12 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
 
                        if (copy > len)
                                copy = len;
-                       vaddr = kmap_skb_frag(frag);
+                       vaddr = kmap_atomic(skb_frag_page(frag));
                        csum2 = csum_partial_copy_nocheck(vaddr +
                                                          frag->page_offset +
                                                          offset - start, to,
                                                          copy, 0);
-                       kunmap_skb_frag(vaddr);
+                       kunmap_atomic(vaddr);
                        csum = csum_block_add(csum, csum2, pos);
                        if (!(len -= copy))
                                return csum;
@@ -2480,7 +2489,7 @@ next_skb:
 
                if (abs_offset < block_limit) {
                        if (!st->frag_data)
-                               st->frag_data = kmap_skb_frag(frag);
+                               st->frag_data = kmap_atomic(skb_frag_page(frag));
 
                        *data = (u8 *) st->frag_data + frag->page_offset +
                                (abs_offset - st->stepped_offset);
@@ -2489,7 +2498,7 @@ next_skb:
                }
 
                if (st->frag_data) {
-                       kunmap_skb_frag(st->frag_data);
+                       kunmap_atomic(st->frag_data);
                        st->frag_data = NULL;
                }
 
@@ -2498,7 +2507,7 @@ next_skb:
        }
 
        if (st->frag_data) {
-               kunmap_skb_frag(st->frag_data);
+               kunmap_atomic(st->frag_data);
                st->frag_data = NULL;
        }
 
@@ -2526,7 +2535,7 @@ EXPORT_SYMBOL(skb_seq_read);
 void skb_abort_seq_read(struct skb_seq_state *st)
 {
        if (st->frag_data)
-               kunmap_skb_frag(st->frag_data);
+               kunmap_atomic(st->frag_data);
 }
 EXPORT_SYMBOL(skb_abort_seq_read);
 
@@ -2718,14 +2727,13 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
                        if (unlikely(!nskb))
                                goto err;
 
-                       hsize = skb_end_pointer(nskb) - nskb->head;
+                       hsize = skb_end_offset(nskb);
                        if (skb_cow_head(nskb, doffset + headroom)) {
                                kfree_skb(nskb);
                                goto err;
                        }
 
-                       nskb->truesize += skb_end_pointer(nskb) - nskb->head -
-                                         hsize;
+                       nskb->truesize += skb_end_offset(nskb) - hsize;
                        skb_release_head_state(nskb);
                        __skb_push(nskb, doffset);
                } else {
@@ -2843,6 +2851,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
        unsigned int len = skb_gro_len(skb);
        unsigned int offset = skb_gro_offset(skb);
        unsigned int headlen = skb_headlen(skb);
+       unsigned int delta_truesize;
 
        if (p->len + len >= 65536)
                return -E2BIG;
@@ -2872,11 +2881,41 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
                frag->page_offset += offset;
                skb_frag_size_sub(frag, offset);
 
+               /* all fragments truesize : remove (head size + sk_buff) */
+               delta_truesize = skb->truesize -
+                                SKB_TRUESIZE(skb_end_offset(skb));
+
                skb->truesize -= skb->data_len;
                skb->len -= skb->data_len;
                skb->data_len = 0;
 
-               NAPI_GRO_CB(skb)->free = 1;
+               NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE;
+               goto done;
+       } else if (skb->head_frag) {
+               int nr_frags = pinfo->nr_frags;
+               skb_frag_t *frag = pinfo->frags + nr_frags;
+               struct page *page = virt_to_head_page(skb->head);
+               unsigned int first_size = headlen - offset;
+               unsigned int first_offset;
+
+               if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS)
+                       return -E2BIG;
+
+               first_offset = skb->data -
+                              (unsigned char *)page_address(page) +
+                              offset;
+
+               pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags;
+
+               frag->page.p      = page;
+               frag->page_offset = first_offset;
+               skb_frag_size_set(frag, first_size);
+
+               memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
+               /* We dont need to clear skbinfo->nr_frags here */
+
+               delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
+               NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
                goto done;
        } else if (skb_gro_len(p) != pinfo->gso_size)
                return -E2BIG;
@@ -2918,7 +2957,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
        p = nskb;
 
 merge:
-       p->truesize += skb->truesize - len;
+       delta_truesize = skb->truesize;
        if (offset > headlen) {
                unsigned int eat = offset - headlen;
 
@@ -2938,7 +2977,7 @@ merge:
 done:
        NAPI_GRO_CB(p)->count++;
        p->data_len += len;
-       p->truesize += len;
+       p->truesize += delta_truesize;
        p->len += len;
 
        NAPI_GRO_CB(skb)->same_flow = 1;
@@ -3166,7 +3205,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
        int len = skb->len;
 
        if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
-           (unsigned)sk->sk_rcvbuf)
+           (unsigned int)sk->sk_rcvbuf)
                return -ENOMEM;
 
        skb_orphan(skb);
@@ -3260,10 +3299,8 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
 {
        if (unlikely(start > skb_headlen(skb)) ||
            unlikely((int)start + off > skb_headlen(skb) - 2)) {
-               if (net_ratelimit())
-                       printk(KERN_WARNING
-                              "bad partial csum: csum=%u/%u len=%u\n",
-                              start, off, skb_headlen(skb));
+               net_warn_ratelimited("bad partial csum: csum=%u/%u len=%u\n",
+                                    start, off, skb_headlen(skb));
                return false;
        }
        skb->ip_summed = CHECKSUM_PARTIAL;
@@ -3275,8 +3312,7 @@ EXPORT_SYMBOL_GPL(skb_partial_csum_set);
 
 void __skb_warn_lro_forwarding(const struct sk_buff *skb)
 {
-       if (net_ratelimit())
-               pr_warning("%s: received packets cannot be forwarded"
-                          " while LRO is enabled\n", skb->dev->name);
+       net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n",
+                            skb->dev->name);
 }
 EXPORT_SYMBOL(__skb_warn_lro_forwarding);