]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/packet/af_packet.c
net: call dev_queue_xmit_nit() after skb_dst_drop()
[mirror_ubuntu-zesty-kernel.git] / net / packet / af_packet.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
02c30a84 8 * Authors: Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 *
1ce4f28b 12 * Fixes:
1da177e4
LT
13 * Alan Cox : verify_area() now used correctly
14 * Alan Cox : new skbuff lists, look ma no backlogs!
15 * Alan Cox : tidied skbuff lists.
16 * Alan Cox : Now uses generic datagram routines I
17 * added. Also fixed the peek/read crash
18 * from all old Linux datagram code.
19 * Alan Cox : Uses the improved datagram code.
20 * Alan Cox : Added NULL's for socket options.
21 * Alan Cox : Re-commented the code.
22 * Alan Cox : Use new kernel side addressing
23 * Rob Janssen : Correct MTU usage.
24 * Dave Platt : Counter leaks caused by incorrect
25 * interrupt locking and some slightly
26 * dubious gcc output. Can you read
27 * compiler: it said _VOLATILE_
28 * Richard Kooijman : Timestamp fixes.
29 * Alan Cox : New buffers. Use sk->mac.raw.
30 * Alan Cox : sendmsg/recvmsg support.
31 * Alan Cox : Protocol setting support
32 * Alexey Kuznetsov : Untied from IPv4 stack.
33 * Cyrus Durgin : Fixed kerneld for kmod.
34 * Michal Ostrowski : Module initialization cleanup.
1ce4f28b 35 * Ulises Alonso : Frame number limit removal and
1da177e4 36 * packet_set_ring memory leak.
0fb375fb
EB
37 * Eric Biederman : Allow for > 8 byte hardware addresses.
38 * The convention is that longer addresses
39 * will simply extend the hardware address
1ce4f28b 40 * byte arrays at the end of sockaddr_ll
0fb375fb 41 * and packet_mreq.
69e3c75f 42 * Johann Baudy : Added TX RING.
1da177e4
LT
43 *
44 * This program is free software; you can redistribute it and/or
45 * modify it under the terms of the GNU General Public License
46 * as published by the Free Software Foundation; either version
47 * 2 of the License, or (at your option) any later version.
48 *
49 */
1ce4f28b 50
1da177e4 51#include <linux/types.h>
1da177e4 52#include <linux/mm.h>
4fc268d2 53#include <linux/capability.h>
1da177e4
LT
54#include <linux/fcntl.h>
55#include <linux/socket.h>
56#include <linux/in.h>
57#include <linux/inet.h>
58#include <linux/netdevice.h>
59#include <linux/if_packet.h>
60#include <linux/wireless.h>
ffbc6111 61#include <linux/kernel.h>
1da177e4 62#include <linux/kmod.h>
5a0e3ad6 63#include <linux/slab.h>
0e3125c7 64#include <linux/vmalloc.h>
457c4cbc 65#include <net/net_namespace.h>
1da177e4
LT
66#include <net/ip.h>
67#include <net/protocol.h>
68#include <linux/skbuff.h>
69#include <net/sock.h>
70#include <linux/errno.h>
71#include <linux/timer.h>
72#include <asm/system.h>
73#include <asm/uaccess.h>
74#include <asm/ioctls.h>
75#include <asm/page.h>
a1f8e7f7 76#include <asm/cacheflush.h>
1da177e4
LT
77#include <asm/io.h>
78#include <linux/proc_fs.h>
79#include <linux/seq_file.h>
80#include <linux/poll.h>
81#include <linux/module.h>
82#include <linux/init.h>
905db440 83#include <linux/mutex.h>
05423b24 84#include <linux/if_vlan.h>
bfd5f4a3 85#include <linux/virtio_net.h>
ed85b565 86#include <linux/errqueue.h>
614f60fa 87#include <linux/net_tstamp.h>
1da177e4
LT
88
89#ifdef CONFIG_INET
90#include <net/inet_common.h>
91#endif
92
1da177e4
LT
93/*
94 Assumptions:
95 - if device has no dev->hard_header routine, it adds and removes ll header
96 inside itself. In this case ll header is invisible outside of device,
97 but higher levels still should reserve dev->hard_header_len.
98 Some devices are enough clever to reallocate skb, when header
99 will not fit to reserved space (tunnel), another ones are silly
100 (PPP).
101 - packet socket receives packets with pulled ll header,
102 so that SOCK_RAW should push it back.
103
104On receive:
105-----------
106
107Incoming, dev->hard_header!=NULL
b0e380b1
ACM
108 mac_header -> ll header
109 data -> data
1da177e4
LT
110
111Outgoing, dev->hard_header!=NULL
b0e380b1
ACM
112 mac_header -> ll header
113 data -> ll header
1da177e4
LT
114
115Incoming, dev->hard_header==NULL
b0e380b1
ACM
116 mac_header -> UNKNOWN position. It is very likely, that it points to ll
117 header. PPP makes it, that is wrong, because introduce
db0c58f9 118 assymetry between rx and tx paths.
b0e380b1 119 data -> data
1da177e4
LT
120
121Outgoing, dev->hard_header==NULL
b0e380b1
ACM
122 mac_header -> data. ll header is still not built!
123 data -> data
1da177e4
LT
124
125Resume
126 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
127
128
129On transmit:
130------------
131
132dev->hard_header != NULL
b0e380b1
ACM
133 mac_header -> ll header
134 data -> ll header
1da177e4
LT
135
136dev->hard_header == NULL (ll header is added by device, we cannot control it)
b0e380b1
ACM
137 mac_header -> data
138 data -> data
1da177e4
LT
139
140 We should set nh.raw on output to correct posistion,
141 packet classifier depends on it.
142 */
143
1da177e4
LT
144/* Private packet socket structures. */
145
40d4e3df 146struct packet_mclist {
1da177e4
LT
147 struct packet_mclist *next;
148 int ifindex;
149 int count;
150 unsigned short type;
151 unsigned short alen;
0fb375fb
EB
152 unsigned char addr[MAX_ADDR_LEN];
153};
154/* identical to struct packet_mreq except it has
155 * a longer address field.
156 */
40d4e3df 157struct packet_mreq_max {
0fb375fb
EB
158 int mr_ifindex;
159 unsigned short mr_type;
160 unsigned short mr_alen;
161 unsigned char mr_address[MAX_ADDR_LEN];
1da177e4 162};
a2efcfa0 163
69e3c75f
JB
164static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
165 int closing, int tx_ring);
166
0e3125c7
NH
167#define PGV_FROM_VMALLOC 1
168struct pgv {
169 char *buffer;
0e3125c7
NH
170};
171
69e3c75f 172struct packet_ring_buffer {
0e3125c7 173 struct pgv *pg_vec;
69e3c75f
JB
174 unsigned int head;
175 unsigned int frames_per_block;
176 unsigned int frame_size;
177 unsigned int frame_max;
178
179 unsigned int pg_vec_order;
180 unsigned int pg_vec_pages;
181 unsigned int pg_vec_len;
182
183 atomic_t pending;
184};
185
186struct packet_sock;
187static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
1da177e4
LT
188
189static void packet_flush_mclist(struct sock *sk);
190
191struct packet_sock {
192 /* struct sock has to be the first member of packet_sock */
193 struct sock sk;
194 struct tpacket_stats stats;
69e3c75f
JB
195 struct packet_ring_buffer rx_ring;
196 struct packet_ring_buffer tx_ring;
1da177e4 197 int copy_thresh;
1da177e4 198 spinlock_t bind_lock;
905db440 199 struct mutex pg_vec_lock;
8dc41944 200 unsigned int running:1, /* prot_hook is attached*/
80feaacb 201 auxdata:1,
bfd5f4a3
SS
202 origdev:1,
203 has_vnet_hdr:1;
1da177e4 204 int ifindex; /* bound device */
0e11c91e 205 __be16 num;
1da177e4 206 struct packet_mclist *mclist;
1da177e4 207 atomic_t mapped;
bbd6ef87
PM
208 enum tpacket_versions tp_version;
209 unsigned int tp_hdrlen;
8913336a 210 unsigned int tp_reserve;
69e3c75f 211 unsigned int tp_loss:1;
614f60fa 212 unsigned int tp_tstamp;
94b05952 213 struct packet_type prot_hook ____cacheline_aligned_in_smp;
1da177e4
LT
214};
215
ffbc6111
HX
216struct packet_skb_cb {
217 unsigned int origlen;
218 union {
219 struct sockaddr_pkt pkt;
220 struct sockaddr_ll ll;
221 } sa;
222};
223
224#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
8dc41944 225
0af55bb5
CG
226static inline struct page *pgv_to_page(void *addr)
227{
228 if (is_vmalloc_addr(addr))
229 return vmalloc_to_page(addr);
230 return virt_to_page(addr);
231}
232
69e3c75f 233static void __packet_set_status(struct packet_sock *po, void *frame, int status)
1da177e4 234{
bbd6ef87
PM
235 union {
236 struct tpacket_hdr *h1;
237 struct tpacket2_hdr *h2;
238 void *raw;
239 } h;
1da177e4 240
69e3c75f 241 h.raw = frame;
bbd6ef87
PM
242 switch (po->tp_version) {
243 case TPACKET_V1:
69e3c75f 244 h.h1->tp_status = status;
0af55bb5 245 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
bbd6ef87
PM
246 break;
247 case TPACKET_V2:
69e3c75f 248 h.h2->tp_status = status;
0af55bb5 249 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
bbd6ef87 250 break;
69e3c75f 251 default:
40d4e3df 252 pr_err("TPACKET version not supported\n");
69e3c75f 253 BUG();
bbd6ef87 254 }
69e3c75f
JB
255
256 smp_wmb();
bbd6ef87
PM
257}
258
69e3c75f 259static int __packet_get_status(struct packet_sock *po, void *frame)
bbd6ef87
PM
260{
261 union {
262 struct tpacket_hdr *h1;
263 struct tpacket2_hdr *h2;
264 void *raw;
265 } h;
266
69e3c75f
JB
267 smp_rmb();
268
bbd6ef87
PM
269 h.raw = frame;
270 switch (po->tp_version) {
271 case TPACKET_V1:
0af55bb5 272 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
69e3c75f 273 return h.h1->tp_status;
bbd6ef87 274 case TPACKET_V2:
0af55bb5 275 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
69e3c75f
JB
276 return h.h2->tp_status;
277 default:
40d4e3df 278 pr_err("TPACKET version not supported\n");
69e3c75f
JB
279 BUG();
280 return 0;
bbd6ef87 281 }
1da177e4 282}
69e3c75f
JB
283
284static void *packet_lookup_frame(struct packet_sock *po,
285 struct packet_ring_buffer *rb,
286 unsigned int position,
287 int status)
288{
289 unsigned int pg_vec_pos, frame_offset;
290 union {
291 struct tpacket_hdr *h1;
292 struct tpacket2_hdr *h2;
293 void *raw;
294 } h;
295
296 pg_vec_pos = position / rb->frames_per_block;
297 frame_offset = position % rb->frames_per_block;
298
0e3125c7
NH
299 h.raw = rb->pg_vec[pg_vec_pos].buffer +
300 (frame_offset * rb->frame_size);
69e3c75f
JB
301
302 if (status != __packet_get_status(po, h.raw))
303 return NULL;
304
305 return h.raw;
306}
307
308static inline void *packet_current_frame(struct packet_sock *po,
309 struct packet_ring_buffer *rb,
310 int status)
311{
312 return packet_lookup_frame(po, rb, rb->head, status);
313}
314
315static inline void *packet_previous_frame(struct packet_sock *po,
316 struct packet_ring_buffer *rb,
317 int status)
318{
319 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
320 return packet_lookup_frame(po, rb, previous, status);
321}
322
323static inline void packet_increment_head(struct packet_ring_buffer *buff)
324{
325 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
326}
327
1da177e4
LT
328static inline struct packet_sock *pkt_sk(struct sock *sk)
329{
330 return (struct packet_sock *)sk;
331}
332
333static void packet_sock_destruct(struct sock *sk)
334{
ed85b565
RC
335 skb_queue_purge(&sk->sk_error_queue);
336
547b792c
IJ
337 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
338 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
1da177e4
LT
339
340 if (!sock_flag(sk, SOCK_DEAD)) {
40d4e3df 341 pr_err("Attempt to release alive packet socket: %p\n", sk);
1da177e4
LT
342 return;
343 }
344
17ab56a2 345 sk_refcnt_debug_dec(sk);
1da177e4
LT
346}
347
348
90ddc4f0 349static const struct proto_ops packet_ops;
1da177e4 350
90ddc4f0 351static const struct proto_ops packet_ops_spkt;
1da177e4 352
40d4e3df
ED
353static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
354 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
355{
356 struct sock *sk;
357 struct sockaddr_pkt *spkt;
358
359 /*
360 * When we registered the protocol we saved the socket in the data
361 * field for just this event.
362 */
363
364 sk = pt->af_packet_priv;
1ce4f28b 365
1da177e4
LT
366 /*
367 * Yank back the headers [hope the device set this
368 * right or kerboom...]
369 *
370 * Incoming packets have ll header pulled,
371 * push it back.
372 *
98e399f8 373 * For outgoing ones skb->data == skb_mac_header(skb)
1da177e4
LT
374 * so that this procedure is noop.
375 */
376
377 if (skb->pkt_type == PACKET_LOOPBACK)
378 goto out;
379
09ad9bc7 380 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
381 goto out;
382
40d4e3df
ED
383 skb = skb_share_check(skb, GFP_ATOMIC);
384 if (skb == NULL)
1da177e4
LT
385 goto oom;
386
387 /* drop any routing info */
adf30907 388 skb_dst_drop(skb);
1da177e4 389
84531c24
PO
390 /* drop conntrack reference */
391 nf_reset(skb);
392
ffbc6111 393 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
1da177e4 394
98e399f8 395 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
396
397 /*
398 * The SOCK_PACKET socket receives _all_ frames.
399 */
400
401 spkt->spkt_family = dev->type;
402 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
403 spkt->spkt_protocol = skb->protocol;
404
405 /*
406 * Charge the memory to the socket. This is done specifically
407 * to prevent sockets using all the memory up.
408 */
409
40d4e3df 410 if (sock_queue_rcv_skb(sk, skb) == 0)
1da177e4
LT
411 return 0;
412
413out:
414 kfree_skb(skb);
415oom:
416 return 0;
417}
418
419
420/*
421 * Output a raw packet to a device layer. This bypasses all the other
422 * protocol layers and you must therefore supply it with a complete frame
423 */
1ce4f28b 424
1da177e4
LT
425static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
426 struct msghdr *msg, size_t len)
427{
428 struct sock *sk = sock->sk;
40d4e3df 429 struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
1a35ca80 430 struct sk_buff *skb = NULL;
1da177e4 431 struct net_device *dev;
40d4e3df 432 __be16 proto = 0;
1da177e4 433 int err;
1ce4f28b 434
1da177e4 435 /*
1ce4f28b 436 * Get and verify the address.
1da177e4
LT
437 */
438
40d4e3df 439 if (saddr) {
1da177e4 440 if (msg->msg_namelen < sizeof(struct sockaddr))
40d4e3df
ED
441 return -EINVAL;
442 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
443 proto = saddr->spkt_protocol;
444 } else
445 return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */
1da177e4
LT
446
447 /*
1ce4f28b 448 * Find the device first to size check it
1da177e4
LT
449 */
450
451 saddr->spkt_device[13] = 0;
1a35ca80 452retry:
654d1f8a
ED
453 rcu_read_lock();
454 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
1da177e4
LT
455 err = -ENODEV;
456 if (dev == NULL)
457 goto out_unlock;
1ce4f28b 458
d5e76b0a
DM
459 err = -ENETDOWN;
460 if (!(dev->flags & IFF_UP))
461 goto out_unlock;
462
1da177e4 463 /*
40d4e3df
ED
464 * You may not queue a frame bigger than the mtu. This is the lowest level
465 * raw protocol and you must do your own fragmentation at this level.
1da177e4 466 */
1ce4f28b 467
1da177e4 468 err = -EMSGSIZE;
8ae55f04 469 if (len > dev->mtu + dev->hard_header_len)
1da177e4
LT
470 goto out_unlock;
471
1a35ca80
ED
472 if (!skb) {
473 size_t reserved = LL_RESERVED_SPACE(dev);
474 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
475
476 rcu_read_unlock();
477 skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL);
478 if (skb == NULL)
479 return -ENOBUFS;
480 /* FIXME: Save some space for broken drivers that write a hard
481 * header at transmission time by themselves. PPP is the notable
482 * one here. This should really be fixed at the driver level.
483 */
484 skb_reserve(skb, reserved);
485 skb_reset_network_header(skb);
486
487 /* Try to align data part correctly */
488 if (hhlen) {
489 skb->data -= hhlen;
490 skb->tail -= hhlen;
491 if (len < hhlen)
492 skb_reset_network_header(skb);
493 }
494 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
495 if (err)
496 goto out_free;
497 goto retry;
1da177e4
LT
498 }
499
1a35ca80 500
1da177e4
LT
501 skb->protocol = proto;
502 skb->dev = dev;
503 skb->priority = sk->sk_priority;
2d37a186 504 skb->mark = sk->sk_mark;
2244d07b 505 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
ed85b565
RC
506 if (err < 0)
507 goto out_unlock;
1da177e4
LT
508
509 dev_queue_xmit(skb);
654d1f8a 510 rcu_read_unlock();
40d4e3df 511 return len;
1da177e4 512
1da177e4 513out_unlock:
654d1f8a 514 rcu_read_unlock();
1a35ca80
ED
515out_free:
516 kfree_skb(skb);
1da177e4
LT
517 return err;
518}
1da177e4 519
62ab0812
ED
520static inline unsigned int run_filter(const struct sk_buff *skb,
521 const struct sock *sk,
dbcb5855 522 unsigned int res)
1da177e4
LT
523{
524 struct sk_filter *filter;
fda9ef5d
DM
525
526 rcu_read_lock_bh();
a898def2 527 filter = rcu_dereference_bh(sk->sk_filter);
dbcb5855 528 if (filter != NULL)
93aaae2e 529 res = sk_run_filter(skb, filter->insns);
fda9ef5d 530 rcu_read_unlock_bh();
1da177e4 531
dbcb5855 532 return res;
1da177e4
LT
533}
534
535/*
62ab0812
ED
536 * This function makes lazy skb cloning in hope that most of packets
537 * are discarded by BPF.
538 *
539 * Note tricky part: we DO mangle shared skb! skb->data, skb->len
540 * and skb->cb are mangled. It works because (and until) packets
541 * falling here are owned by current CPU. Output packets are cloned
542 * by dev_queue_xmit_nit(), input packets are processed by net_bh
543 * sequencially, so that if we return skb to original state on exit,
544 * we will not harm anyone.
1da177e4
LT
545 */
546
40d4e3df
ED
547static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
548 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
549{
550 struct sock *sk;
551 struct sockaddr_ll *sll;
552 struct packet_sock *po;
40d4e3df 553 u8 *skb_head = skb->data;
1da177e4 554 int skb_len = skb->len;
dbcb5855 555 unsigned int snaplen, res;
1da177e4
LT
556
557 if (skb->pkt_type == PACKET_LOOPBACK)
558 goto drop;
559
560 sk = pt->af_packet_priv;
561 po = pkt_sk(sk);
562
09ad9bc7 563 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
564 goto drop;
565
1da177e4
LT
566 skb->dev = dev;
567
3b04ddde 568 if (dev->header_ops) {
1da177e4 569 /* The device has an explicit notion of ll header,
62ab0812
ED
570 * exported to higher levels.
571 *
572 * Otherwise, the device hides details of its frame
573 * structure, so that corresponding packet head is
574 * never delivered to user.
1da177e4
LT
575 */
576 if (sk->sk_type != SOCK_DGRAM)
98e399f8 577 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
578 else if (skb->pkt_type == PACKET_OUTGOING) {
579 /* Special case: outgoing packets have ll header at head */
bbe735e4 580 skb_pull(skb, skb_network_offset(skb));
1da177e4
LT
581 }
582 }
583
584 snaplen = skb->len;
585
dbcb5855
DM
586 res = run_filter(skb, sk, snaplen);
587 if (!res)
fda9ef5d 588 goto drop_n_restore;
dbcb5855
DM
589 if (snaplen > res)
590 snaplen = res;
1da177e4
LT
591
592 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
593 (unsigned)sk->sk_rcvbuf)
594 goto drop_n_acct;
595
596 if (skb_shared(skb)) {
597 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
598 if (nskb == NULL)
599 goto drop_n_acct;
600
601 if (skb_head != skb->data) {
602 skb->data = skb_head;
603 skb->len = skb_len;
604 }
605 kfree_skb(skb);
606 skb = nskb;
607 }
608
ffbc6111
HX
609 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
610 sizeof(skb->cb));
611
612 sll = &PACKET_SKB_CB(skb)->sa.ll;
1da177e4
LT
613 sll->sll_family = AF_PACKET;
614 sll->sll_hatype = dev->type;
615 sll->sll_protocol = skb->protocol;
616 sll->sll_pkttype = skb->pkt_type;
8032b464 617 if (unlikely(po->origdev))
80feaacb
PWJ
618 sll->sll_ifindex = orig_dev->ifindex;
619 else
620 sll->sll_ifindex = dev->ifindex;
1da177e4 621
b95cce35 622 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1da177e4 623
ffbc6111 624 PACKET_SKB_CB(skb)->origlen = skb->len;
8dc41944 625
1da177e4
LT
626 if (pskb_trim(skb, snaplen))
627 goto drop_n_acct;
628
629 skb_set_owner_r(skb, sk);
630 skb->dev = NULL;
adf30907 631 skb_dst_drop(skb);
1da177e4 632
84531c24
PO
633 /* drop conntrack reference */
634 nf_reset(skb);
635
1da177e4
LT
636 spin_lock(&sk->sk_receive_queue.lock);
637 po->stats.tp_packets++;
3b885787 638 skb->dropcount = atomic_read(&sk->sk_drops);
1da177e4
LT
639 __skb_queue_tail(&sk->sk_receive_queue, skb);
640 spin_unlock(&sk->sk_receive_queue.lock);
641 sk->sk_data_ready(sk, skb->len);
642 return 0;
643
644drop_n_acct:
3b885787 645 po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
1da177e4
LT
646
647drop_n_restore:
648 if (skb_head != skb->data && skb_shared(skb)) {
649 skb->data = skb_head;
650 skb->len = skb_len;
651 }
652drop:
ead2ceb0 653 consume_skb(skb);
1da177e4
LT
654 return 0;
655}
656
40d4e3df
ED
657static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
658 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
659{
660 struct sock *sk;
661 struct packet_sock *po;
662 struct sockaddr_ll *sll;
bbd6ef87
PM
663 union {
664 struct tpacket_hdr *h1;
665 struct tpacket2_hdr *h2;
666 void *raw;
667 } h;
40d4e3df 668 u8 *skb_head = skb->data;
1da177e4 669 int skb_len = skb->len;
dbcb5855 670 unsigned int snaplen, res;
1da177e4 671 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
bbd6ef87 672 unsigned short macoff, netoff, hdrlen;
1da177e4 673 struct sk_buff *copy_skb = NULL;
b7aa0bf7 674 struct timeval tv;
bbd6ef87 675 struct timespec ts;
614f60fa 676 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
1da177e4
LT
677
678 if (skb->pkt_type == PACKET_LOOPBACK)
679 goto drop;
680
681 sk = pt->af_packet_priv;
682 po = pkt_sk(sk);
683
09ad9bc7 684 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
685 goto drop;
686
3b04ddde 687 if (dev->header_ops) {
1da177e4 688 if (sk->sk_type != SOCK_DGRAM)
98e399f8 689 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
690 else if (skb->pkt_type == PACKET_OUTGOING) {
691 /* Special case: outgoing packets have ll header at head */
bbe735e4 692 skb_pull(skb, skb_network_offset(skb));
1da177e4
LT
693 }
694 }
695
8dc41944
HX
696 if (skb->ip_summed == CHECKSUM_PARTIAL)
697 status |= TP_STATUS_CSUMNOTREADY;
698
1da177e4
LT
699 snaplen = skb->len;
700
dbcb5855
DM
701 res = run_filter(skb, sk, snaplen);
702 if (!res)
fda9ef5d 703 goto drop_n_restore;
dbcb5855
DM
704 if (snaplen > res)
705 snaplen = res;
1da177e4
LT
706
707 if (sk->sk_type == SOCK_DGRAM) {
8913336a
PM
708 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
709 po->tp_reserve;
1da177e4 710 } else {
bbe735e4 711 unsigned maclen = skb_network_offset(skb);
bbd6ef87 712 netoff = TPACKET_ALIGN(po->tp_hdrlen +
8913336a
PM
713 (maclen < 16 ? 16 : maclen)) +
714 po->tp_reserve;
1da177e4
LT
715 macoff = netoff - maclen;
716 }
717
69e3c75f 718 if (macoff + snaplen > po->rx_ring.frame_size) {
1da177e4
LT
719 if (po->copy_thresh &&
720 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
721 (unsigned)sk->sk_rcvbuf) {
722 if (skb_shared(skb)) {
723 copy_skb = skb_clone(skb, GFP_ATOMIC);
724 } else {
725 copy_skb = skb_get(skb);
726 skb_head = skb->data;
727 }
728 if (copy_skb)
729 skb_set_owner_r(copy_skb, sk);
730 }
69e3c75f 731 snaplen = po->rx_ring.frame_size - macoff;
1da177e4
LT
732 if ((int)snaplen < 0)
733 snaplen = 0;
734 }
1da177e4
LT
735
736 spin_lock(&sk->sk_receive_queue.lock);
69e3c75f 737 h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
bbd6ef87 738 if (!h.raw)
1da177e4 739 goto ring_is_full;
69e3c75f 740 packet_increment_head(&po->rx_ring);
1da177e4
LT
741 po->stats.tp_packets++;
742 if (copy_skb) {
743 status |= TP_STATUS_COPY;
744 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
745 }
746 if (!po->stats.tp_drops)
747 status &= ~TP_STATUS_LOSING;
748 spin_unlock(&sk->sk_receive_queue.lock);
749
bbd6ef87 750 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
1da177e4 751
bbd6ef87
PM
752 switch (po->tp_version) {
753 case TPACKET_V1:
754 h.h1->tp_len = skb->len;
755 h.h1->tp_snaplen = snaplen;
756 h.h1->tp_mac = macoff;
757 h.h1->tp_net = netoff;
614f60fa
SM
758 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
759 && shhwtstamps->syststamp.tv64)
760 tv = ktime_to_timeval(shhwtstamps->syststamp);
761 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
762 && shhwtstamps->hwtstamp.tv64)
763 tv = ktime_to_timeval(shhwtstamps->hwtstamp);
764 else if (skb->tstamp.tv64)
bbd6ef87
PM
765 tv = ktime_to_timeval(skb->tstamp);
766 else
767 do_gettimeofday(&tv);
768 h.h1->tp_sec = tv.tv_sec;
769 h.h1->tp_usec = tv.tv_usec;
770 hdrlen = sizeof(*h.h1);
771 break;
772 case TPACKET_V2:
773 h.h2->tp_len = skb->len;
774 h.h2->tp_snaplen = snaplen;
775 h.h2->tp_mac = macoff;
776 h.h2->tp_net = netoff;
614f60fa
SM
777 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
778 && shhwtstamps->syststamp.tv64)
779 ts = ktime_to_timespec(shhwtstamps->syststamp);
780 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
781 && shhwtstamps->hwtstamp.tv64)
782 ts = ktime_to_timespec(shhwtstamps->hwtstamp);
783 else if (skb->tstamp.tv64)
bbd6ef87
PM
784 ts = ktime_to_timespec(skb->tstamp);
785 else
786 getnstimeofday(&ts);
787 h.h2->tp_sec = ts.tv_sec;
788 h.h2->tp_nsec = ts.tv_nsec;
05423b24 789 h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
bbd6ef87
PM
790 hdrlen = sizeof(*h.h2);
791 break;
792 default:
793 BUG();
794 }
1da177e4 795
bbd6ef87 796 sll = h.raw + TPACKET_ALIGN(hdrlen);
b95cce35 797 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1da177e4
LT
798 sll->sll_family = AF_PACKET;
799 sll->sll_hatype = dev->type;
800 sll->sll_protocol = skb->protocol;
801 sll->sll_pkttype = skb->pkt_type;
8032b464 802 if (unlikely(po->origdev))
80feaacb
PWJ
803 sll->sll_ifindex = orig_dev->ifindex;
804 else
805 sll->sll_ifindex = dev->ifindex;
1da177e4 806
bbd6ef87 807 __packet_set_status(po, h.raw, status);
e16aa207 808 smp_mb();
1da177e4 809 {
0af55bb5
CG
810 u8 *start, *end;
811
812 end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen);
813 for (start = h.raw; start < end; start += PAGE_SIZE)
814 flush_dcache_page(pgv_to_page(start));
1da177e4
LT
815 }
816
817 sk->sk_data_ready(sk, 0);
818
819drop_n_restore:
820 if (skb_head != skb->data && skb_shared(skb)) {
821 skb->data = skb_head;
822 skb->len = skb_len;
823 }
824drop:
1ce4f28b 825 kfree_skb(skb);
1da177e4
LT
826 return 0;
827
828ring_is_full:
829 po->stats.tp_drops++;
830 spin_unlock(&sk->sk_receive_queue.lock);
831
832 sk->sk_data_ready(sk, 0);
acb5d75b 833 kfree_skb(copy_skb);
1da177e4
LT
834 goto drop_n_restore;
835}
836
69e3c75f
JB
837static void tpacket_destruct_skb(struct sk_buff *skb)
838{
839 struct packet_sock *po = pkt_sk(skb->sk);
40d4e3df 840 void *ph;
1da177e4 841
69e3c75f 842 BUG_ON(skb == NULL);
1da177e4 843
69e3c75f
JB
844 if (likely(po->tx_ring.pg_vec)) {
845 ph = skb_shinfo(skb)->destructor_arg;
846 BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
847 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
848 atomic_dec(&po->tx_ring.pending);
849 __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
850 }
851
852 sock_wfree(skb);
853}
854
40d4e3df
ED
855static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
856 void *frame, struct net_device *dev, int size_max,
857 __be16 proto, unsigned char *addr)
69e3c75f
JB
858{
859 union {
860 struct tpacket_hdr *h1;
861 struct tpacket2_hdr *h2;
862 void *raw;
863 } ph;
864 int to_write, offset, len, tp_len, nr_frags, len_max;
865 struct socket *sock = po->sk.sk_socket;
866 struct page *page;
867 void *data;
868 int err;
869
870 ph.raw = frame;
871
872 skb->protocol = proto;
873 skb->dev = dev;
874 skb->priority = po->sk.sk_priority;
2d37a186 875 skb->mark = po->sk.sk_mark;
69e3c75f
JB
876 skb_shinfo(skb)->destructor_arg = ph.raw;
877
878 switch (po->tp_version) {
879 case TPACKET_V2:
880 tp_len = ph.h2->tp_len;
881 break;
882 default:
883 tp_len = ph.h1->tp_len;
884 break;
885 }
886 if (unlikely(tp_len > size_max)) {
40d4e3df 887 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
69e3c75f
JB
888 return -EMSGSIZE;
889 }
890
891 skb_reserve(skb, LL_RESERVED_SPACE(dev));
892 skb_reset_network_header(skb);
893
894 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
895 to_write = tp_len;
896
897 if (sock->type == SOCK_DGRAM) {
898 err = dev_hard_header(skb, dev, ntohs(proto), addr,
899 NULL, tp_len);
900 if (unlikely(err < 0))
901 return -EINVAL;
40d4e3df 902 } else if (dev->hard_header_len) {
69e3c75f
JB
903 /* net device doesn't like empty head */
904 if (unlikely(tp_len <= dev->hard_header_len)) {
40d4e3df
ED
905 pr_err("packet size is too short (%d < %d)\n",
906 tp_len, dev->hard_header_len);
69e3c75f
JB
907 return -EINVAL;
908 }
909
910 skb_push(skb, dev->hard_header_len);
911 err = skb_store_bits(skb, 0, data,
912 dev->hard_header_len);
913 if (unlikely(err))
914 return err;
915
916 data += dev->hard_header_len;
917 to_write -= dev->hard_header_len;
918 }
919
920 err = -EFAULT;
69e3c75f
JB
921 offset = offset_in_page(data);
922 len_max = PAGE_SIZE - offset;
923 len = ((to_write > len_max) ? len_max : to_write);
924
925 skb->data_len = to_write;
926 skb->len += to_write;
927 skb->truesize += to_write;
928 atomic_add(to_write, &po->sk.sk_wmem_alloc);
929
930 while (likely(to_write)) {
931 nr_frags = skb_shinfo(skb)->nr_frags;
932
933 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
40d4e3df
ED
934 pr_err("Packet exceed the number of skb frags(%lu)\n",
935 MAX_SKB_FRAGS);
69e3c75f
JB
936 return -EFAULT;
937 }
938
0af55bb5
CG
939 page = pgv_to_page(data);
940 data += len;
69e3c75f
JB
941 flush_dcache_page(page);
942 get_page(page);
0af55bb5 943 skb_fill_page_desc(skb, nr_frags, page, offset, len);
69e3c75f
JB
944 to_write -= len;
945 offset = 0;
946 len_max = PAGE_SIZE;
947 len = ((to_write > len_max) ? len_max : to_write);
948 }
949
950 return tp_len;
951}
952
953static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
954{
955 struct socket *sock;
956 struct sk_buff *skb;
957 struct net_device *dev;
958 __be16 proto;
959 int ifindex, err, reserve = 0;
40d4e3df
ED
960 void *ph;
961 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
69e3c75f
JB
962 int tp_len, size_max;
963 unsigned char *addr;
964 int len_sum = 0;
965 int status = 0;
966
967 sock = po->sk.sk_socket;
968
969 mutex_lock(&po->pg_vec_lock);
970
971 err = -EBUSY;
972 if (saddr == NULL) {
973 ifindex = po->ifindex;
974 proto = po->num;
975 addr = NULL;
976 } else {
977 err = -EINVAL;
978 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
979 goto out;
980 if (msg->msg_namelen < (saddr->sll_halen
981 + offsetof(struct sockaddr_ll,
982 sll_addr)))
983 goto out;
984 ifindex = saddr->sll_ifindex;
985 proto = saddr->sll_protocol;
986 addr = saddr->sll_addr;
987 }
988
989 dev = dev_get_by_index(sock_net(&po->sk), ifindex);
990 err = -ENXIO;
991 if (unlikely(dev == NULL))
992 goto out;
993
994 reserve = dev->hard_header_len;
995
996 err = -ENETDOWN;
997 if (unlikely(!(dev->flags & IFF_UP)))
998 goto out_put;
999
1000 size_max = po->tx_ring.frame_size
b5dd884e 1001 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
69e3c75f
JB
1002
1003 if (size_max > dev->mtu + reserve)
1004 size_max = dev->mtu + reserve;
1005
1006 do {
1007 ph = packet_current_frame(po, &po->tx_ring,
1008 TP_STATUS_SEND_REQUEST);
1009
1010 if (unlikely(ph == NULL)) {
1011 schedule();
1012 continue;
1013 }
1014
1015 status = TP_STATUS_SEND_REQUEST;
1016 skb = sock_alloc_send_skb(&po->sk,
1017 LL_ALLOCATED_SPACE(dev)
1018 + sizeof(struct sockaddr_ll),
1019 0, &err);
1020
1021 if (unlikely(skb == NULL))
1022 goto out_status;
1023
1024 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
1025 addr);
1026
1027 if (unlikely(tp_len < 0)) {
1028 if (po->tp_loss) {
1029 __packet_set_status(po, ph,
1030 TP_STATUS_AVAILABLE);
1031 packet_increment_head(&po->tx_ring);
1032 kfree_skb(skb);
1033 continue;
1034 } else {
1035 status = TP_STATUS_WRONG_FORMAT;
1036 err = tp_len;
1037 goto out_status;
1038 }
1039 }
1040
1041 skb->destructor = tpacket_destruct_skb;
1042 __packet_set_status(po, ph, TP_STATUS_SENDING);
1043 atomic_inc(&po->tx_ring.pending);
1044
1045 status = TP_STATUS_SEND_REQUEST;
1046 err = dev_queue_xmit(skb);
eb70df13
JP
1047 if (unlikely(err > 0)) {
1048 err = net_xmit_errno(err);
1049 if (err && __packet_get_status(po, ph) ==
1050 TP_STATUS_AVAILABLE) {
1051 /* skb was destructed already */
1052 skb = NULL;
1053 goto out_status;
1054 }
1055 /*
1056 * skb was dropped but not destructed yet;
1057 * let's treat it like congestion or err < 0
1058 */
1059 err = 0;
1060 }
69e3c75f
JB
1061 packet_increment_head(&po->tx_ring);
1062 len_sum += tp_len;
f64f9e71
JP
1063 } while (likely((ph != NULL) ||
1064 ((!(msg->msg_flags & MSG_DONTWAIT)) &&
1065 (atomic_read(&po->tx_ring.pending))))
1066 );
69e3c75f
JB
1067
1068 err = len_sum;
1069 goto out_put;
1070
69e3c75f
JB
1071out_status:
1072 __packet_set_status(po, ph, status);
1073 kfree_skb(skb);
1074out_put:
1075 dev_put(dev);
1076out:
1077 mutex_unlock(&po->pg_vec_lock);
1078 return err;
1079}
69e3c75f 1080
bfd5f4a3
SS
1081static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
1082 size_t reserve, size_t len,
1083 size_t linear, int noblock,
1084 int *err)
1085{
1086 struct sk_buff *skb;
1087
1088 /* Under a page? Don't bother with paged skb. */
1089 if (prepad + len < PAGE_SIZE || !linear)
1090 linear = len;
1091
1092 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
1093 err);
1094 if (!skb)
1095 return NULL;
1096
1097 skb_reserve(skb, reserve);
1098 skb_put(skb, linear);
1099 skb->data_len = len - linear;
1100 skb->len += len - linear;
1101
1102 return skb;
1103}
1104
69e3c75f 1105static int packet_snd(struct socket *sock,
1da177e4
LT
1106 struct msghdr *msg, size_t len)
1107{
1108 struct sock *sk = sock->sk;
40d4e3df 1109 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
1da177e4
LT
1110 struct sk_buff *skb;
1111 struct net_device *dev;
0e11c91e 1112 __be16 proto;
1da177e4
LT
1113 unsigned char *addr;
1114 int ifindex, err, reserve = 0;
bfd5f4a3
SS
1115 struct virtio_net_hdr vnet_hdr = { 0 };
1116 int offset = 0;
1117 int vnet_hdr_len;
1118 struct packet_sock *po = pkt_sk(sk);
1119 unsigned short gso_type = 0;
1da177e4
LT
1120
1121 /*
1ce4f28b 1122 * Get and verify the address.
1da177e4 1123 */
1ce4f28b 1124
1da177e4 1125 if (saddr == NULL) {
1da177e4
LT
1126 ifindex = po->ifindex;
1127 proto = po->num;
1128 addr = NULL;
1129 } else {
1130 err = -EINVAL;
1131 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
1132 goto out;
0fb375fb
EB
1133 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
1134 goto out;
1da177e4
LT
1135 ifindex = saddr->sll_ifindex;
1136 proto = saddr->sll_protocol;
1137 addr = saddr->sll_addr;
1138 }
1139
1140
3b1e0a65 1141 dev = dev_get_by_index(sock_net(sk), ifindex);
1da177e4
LT
1142 err = -ENXIO;
1143 if (dev == NULL)
1144 goto out_unlock;
1145 if (sock->type == SOCK_RAW)
1146 reserve = dev->hard_header_len;
1147
d5e76b0a
DM
1148 err = -ENETDOWN;
1149 if (!(dev->flags & IFF_UP))
1150 goto out_unlock;
1151
bfd5f4a3
SS
1152 if (po->has_vnet_hdr) {
1153 vnet_hdr_len = sizeof(vnet_hdr);
1154
1155 err = -EINVAL;
1156 if (len < vnet_hdr_len)
1157 goto out_unlock;
1158
1159 len -= vnet_hdr_len;
1160
1161 err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
1162 vnet_hdr_len);
1163 if (err < 0)
1164 goto out_unlock;
1165
1166 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
1167 (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
1168 vnet_hdr.hdr_len))
1169 vnet_hdr.hdr_len = vnet_hdr.csum_start +
1170 vnet_hdr.csum_offset + 2;
1171
1172 err = -EINVAL;
1173 if (vnet_hdr.hdr_len > len)
1174 goto out_unlock;
1175
1176 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1177 switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1178 case VIRTIO_NET_HDR_GSO_TCPV4:
1179 gso_type = SKB_GSO_TCPV4;
1180 break;
1181 case VIRTIO_NET_HDR_GSO_TCPV6:
1182 gso_type = SKB_GSO_TCPV6;
1183 break;
1184 case VIRTIO_NET_HDR_GSO_UDP:
1185 gso_type = SKB_GSO_UDP;
1186 break;
1187 default:
1188 goto out_unlock;
1189 }
1190
1191 if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
1192 gso_type |= SKB_GSO_TCP_ECN;
1193
1194 if (vnet_hdr.gso_size == 0)
1195 goto out_unlock;
1196
1197 }
1198 }
1199
1da177e4 1200 err = -EMSGSIZE;
bfd5f4a3 1201 if (!gso_type && (len > dev->mtu+reserve))
1da177e4
LT
1202 goto out_unlock;
1203
bfd5f4a3
SS
1204 err = -ENOBUFS;
1205 skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev),
1206 LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len,
1207 msg->msg_flags & MSG_DONTWAIT, &err);
40d4e3df 1208 if (skb == NULL)
1da177e4
LT
1209 goto out_unlock;
1210
bfd5f4a3 1211 skb_set_network_header(skb, reserve);
1da177e4 1212
0c4e8581
SH
1213 err = -EINVAL;
1214 if (sock->type == SOCK_DGRAM &&
bfd5f4a3 1215 (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
0c4e8581 1216 goto out_free;
1da177e4
LT
1217
1218 /* Returns -EFAULT on error */
bfd5f4a3 1219 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
1da177e4
LT
1220 if (err)
1221 goto out_free;
2244d07b 1222 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
ed85b565
RC
1223 if (err < 0)
1224 goto out_free;
1da177e4
LT
1225
1226 skb->protocol = proto;
1227 skb->dev = dev;
1228 skb->priority = sk->sk_priority;
2d37a186 1229 skb->mark = sk->sk_mark;
1da177e4 1230
bfd5f4a3
SS
1231 if (po->has_vnet_hdr) {
1232 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1233 if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
1234 vnet_hdr.csum_offset)) {
1235 err = -EINVAL;
1236 goto out_free;
1237 }
1238 }
1239
1240 skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
1241 skb_shinfo(skb)->gso_type = gso_type;
1242
1243 /* Header must be checked, and gso_segs computed. */
1244 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1245 skb_shinfo(skb)->gso_segs = 0;
1246
1247 len += vnet_hdr_len;
1248 }
1249
1da177e4
LT
1250 /*
1251 * Now send it
1252 */
1253
1254 err = dev_queue_xmit(skb);
1255 if (err > 0 && (err = net_xmit_errno(err)) != 0)
1256 goto out_unlock;
1257
1258 dev_put(dev);
1259
40d4e3df 1260 return len;
1da177e4
LT
1261
1262out_free:
1263 kfree_skb(skb);
1264out_unlock:
1265 if (dev)
1266 dev_put(dev);
1267out:
1268 return err;
1269}
1270
69e3c75f
JB
1271static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
1272 struct msghdr *msg, size_t len)
1273{
69e3c75f
JB
1274 struct sock *sk = sock->sk;
1275 struct packet_sock *po = pkt_sk(sk);
1276 if (po->tx_ring.pg_vec)
1277 return tpacket_snd(po, msg);
1278 else
69e3c75f
JB
1279 return packet_snd(sock, msg, len);
1280}
1281
1da177e4
LT
1282/*
1283 * Close a PACKET socket. This is fairly simple. We immediately go
1284 * to 'closed' state and remove our protocol entry in the device list.
1285 */
1286
1287static int packet_release(struct socket *sock)
1288{
1289 struct sock *sk = sock->sk;
1290 struct packet_sock *po;
d12d01d6 1291 struct net *net;
69e3c75f 1292 struct tpacket_req req;
1da177e4
LT
1293
1294 if (!sk)
1295 return 0;
1296
3b1e0a65 1297 net = sock_net(sk);
1da177e4
LT
1298 po = pkt_sk(sk);
1299
808f5114 1300 spin_lock_bh(&net->packet.sklist_lock);
1301 sk_del_node_init_rcu(sk);
920de804 1302 sock_prot_inuse_add(net, sk->sk_prot, -1);
808f5114 1303 spin_unlock_bh(&net->packet.sklist_lock);
1da177e4 1304
808f5114 1305 spin_lock(&po->bind_lock);
1da177e4
LT
1306 if (po->running) {
1307 /*
808f5114 1308 * Remove from protocol table
1da177e4 1309 */
1da177e4
LT
1310 po->running = 0;
1311 po->num = 0;
808f5114 1312 __dev_remove_pack(&po->prot_hook);
1da177e4
LT
1313 __sock_put(sk);
1314 }
808f5114 1315 spin_unlock(&po->bind_lock);
1da177e4 1316
1da177e4 1317 packet_flush_mclist(sk);
1da177e4 1318
69e3c75f
JB
1319 memset(&req, 0, sizeof(req));
1320
1321 if (po->rx_ring.pg_vec)
1322 packet_set_ring(sk, &req, 1, 0);
1323
1324 if (po->tx_ring.pg_vec)
1325 packet_set_ring(sk, &req, 1, 1);
1da177e4 1326
808f5114 1327 synchronize_net();
1da177e4
LT
1328 /*
1329 * Now the socket is dead. No more input will appear.
1330 */
1da177e4
LT
1331 sock_orphan(sk);
1332 sock->sk = NULL;
1333
1334 /* Purge queues */
1335
1336 skb_queue_purge(&sk->sk_receive_queue);
17ab56a2 1337 sk_refcnt_debug_release(sk);
1da177e4
LT
1338
1339 sock_put(sk);
1340 return 0;
1341}
1342
1343/*
1344 * Attach a packet hook.
1345 */
1346
0e11c91e 1347static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
1da177e4
LT
1348{
1349 struct packet_sock *po = pkt_sk(sk);
1350 /*
1351 * Detach an existing hook if present.
1352 */
1353
1354 lock_sock(sk);
1355
1356 spin_lock(&po->bind_lock);
1357 if (po->running) {
1358 __sock_put(sk);
1359 po->running = 0;
1360 po->num = 0;
1361 spin_unlock(&po->bind_lock);
1362 dev_remove_pack(&po->prot_hook);
1363 spin_lock(&po->bind_lock);
1364 }
1365
1366 po->num = protocol;
1367 po->prot_hook.type = protocol;
1368 po->prot_hook.dev = dev;
1369
1370 po->ifindex = dev ? dev->ifindex : 0;
1371
1372 if (protocol == 0)
1373 goto out_unlock;
1374
be85d4ad 1375 if (!dev || (dev->flags & IFF_UP)) {
1da177e4
LT
1376 dev_add_pack(&po->prot_hook);
1377 sock_hold(sk);
1378 po->running = 1;
be85d4ad
UT
1379 } else {
1380 sk->sk_err = ENETDOWN;
1381 if (!sock_flag(sk, SOCK_DEAD))
1382 sk->sk_error_report(sk);
1da177e4
LT
1383 }
1384
1385out_unlock:
1386 spin_unlock(&po->bind_lock);
1387 release_sock(sk);
1388 return 0;
1389}
1390
1391/*
1392 * Bind a packet socket to a device
1393 */
1394
40d4e3df
ED
1395static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
1396 int addr_len)
1da177e4 1397{
40d4e3df 1398 struct sock *sk = sock->sk;
1da177e4
LT
1399 char name[15];
1400 struct net_device *dev;
1401 int err = -ENODEV;
1ce4f28b 1402
1da177e4
LT
1403 /*
1404 * Check legality
1405 */
1ce4f28b 1406
8ae55f04 1407 if (addr_len != sizeof(struct sockaddr))
1da177e4 1408 return -EINVAL;
40d4e3df 1409 strlcpy(name, uaddr->sa_data, sizeof(name));
1da177e4 1410
3b1e0a65 1411 dev = dev_get_by_name(sock_net(sk), name);
1da177e4
LT
1412 if (dev) {
1413 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
1414 dev_put(dev);
1415 }
1416 return err;
1417}
1da177e4
LT
1418
1419static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1420{
40d4e3df
ED
1421 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1422 struct sock *sk = sock->sk;
1da177e4
LT
1423 struct net_device *dev = NULL;
1424 int err;
1425
1426
1427 /*
1428 * Check legality
1429 */
1ce4f28b 1430
1da177e4
LT
1431 if (addr_len < sizeof(struct sockaddr_ll))
1432 return -EINVAL;
1433 if (sll->sll_family != AF_PACKET)
1434 return -EINVAL;
1435
1436 if (sll->sll_ifindex) {
1437 err = -ENODEV;
3b1e0a65 1438 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
1da177e4
LT
1439 if (dev == NULL)
1440 goto out;
1441 }
1442 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1443 if (dev)
1444 dev_put(dev);
1445
1446out:
1447 return err;
1448}
1449
1450static struct proto packet_proto = {
1451 .name = "PACKET",
1452 .owner = THIS_MODULE,
1453 .obj_size = sizeof(struct packet_sock),
1454};
1455
1456/*
1ce4f28b 1457 * Create a packet of type SOCK_PACKET.
1da177e4
LT
1458 */
1459
3f378b68
EP
1460static int packet_create(struct net *net, struct socket *sock, int protocol,
1461 int kern)
1da177e4
LT
1462{
1463 struct sock *sk;
1464 struct packet_sock *po;
0e11c91e 1465 __be16 proto = (__force __be16)protocol; /* weird, but documented */
1da177e4
LT
1466 int err;
1467
1468 if (!capable(CAP_NET_RAW))
1469 return -EPERM;
be02097c
DM
1470 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1471 sock->type != SOCK_PACKET)
1da177e4
LT
1472 return -ESOCKTNOSUPPORT;
1473
1474 sock->state = SS_UNCONNECTED;
1475
1476 err = -ENOBUFS;
6257ff21 1477 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
1da177e4
LT
1478 if (sk == NULL)
1479 goto out;
1480
1481 sock->ops = &packet_ops;
1da177e4
LT
1482 if (sock->type == SOCK_PACKET)
1483 sock->ops = &packet_ops_spkt;
be02097c 1484
1da177e4
LT
1485 sock_init_data(sock, sk);
1486
1487 po = pkt_sk(sk);
1488 sk->sk_family = PF_PACKET;
0e11c91e 1489 po->num = proto;
1da177e4
LT
1490
1491 sk->sk_destruct = packet_sock_destruct;
17ab56a2 1492 sk_refcnt_debug_inc(sk);
1da177e4
LT
1493
1494 /*
1495 * Attach a protocol block
1496 */
1497
1498 spin_lock_init(&po->bind_lock);
905db440 1499 mutex_init(&po->pg_vec_lock);
1da177e4 1500 po->prot_hook.func = packet_rcv;
be02097c 1501
1da177e4
LT
1502 if (sock->type == SOCK_PACKET)
1503 po->prot_hook.func = packet_rcv_spkt;
be02097c 1504
1da177e4
LT
1505 po->prot_hook.af_packet_priv = sk;
1506
0e11c91e
AV
1507 if (proto) {
1508 po->prot_hook.type = proto;
1da177e4
LT
1509 dev_add_pack(&po->prot_hook);
1510 sock_hold(sk);
1511 po->running = 1;
1512 }
1513
808f5114 1514 spin_lock_bh(&net->packet.sklist_lock);
1515 sk_add_node_rcu(sk, &net->packet.sklist);
3680453c 1516 sock_prot_inuse_add(net, &packet_proto, 1);
808f5114 1517 spin_unlock_bh(&net->packet.sklist_lock);
1518
40d4e3df 1519 return 0;
1da177e4
LT
1520out:
1521 return err;
1522}
1523
ed85b565
RC
1524static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
1525{
1526 struct sock_exterr_skb *serr;
1527 struct sk_buff *skb, *skb2;
1528 int copied, err;
1529
1530 err = -EAGAIN;
1531 skb = skb_dequeue(&sk->sk_error_queue);
1532 if (skb == NULL)
1533 goto out;
1534
1535 copied = skb->len;
1536 if (copied > len) {
1537 msg->msg_flags |= MSG_TRUNC;
1538 copied = len;
1539 }
1540 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1541 if (err)
1542 goto out_free_skb;
1543
1544 sock_recv_timestamp(msg, sk, skb);
1545
1546 serr = SKB_EXT_ERR(skb);
1547 put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
1548 sizeof(serr->ee), &serr->ee);
1549
1550 msg->msg_flags |= MSG_ERRQUEUE;
1551 err = copied;
1552
1553 /* Reset and regenerate socket error */
1554 spin_lock_bh(&sk->sk_error_queue.lock);
1555 sk->sk_err = 0;
1556 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
1557 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
1558 spin_unlock_bh(&sk->sk_error_queue.lock);
1559 sk->sk_error_report(sk);
1560 } else
1561 spin_unlock_bh(&sk->sk_error_queue.lock);
1562
1563out_free_skb:
1564 kfree_skb(skb);
1565out:
1566 return err;
1567}
1568
1da177e4
LT
1569/*
1570 * Pull a packet from our receive queue and hand it to the user.
1571 * If necessary we block.
1572 */
1573
1574static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1575 struct msghdr *msg, size_t len, int flags)
1576{
1577 struct sock *sk = sock->sk;
1578 struct sk_buff *skb;
1579 int copied, err;
0fb375fb 1580 struct sockaddr_ll *sll;
bfd5f4a3 1581 int vnet_hdr_len = 0;
1da177e4
LT
1582
1583 err = -EINVAL;
ed85b565 1584 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
1da177e4
LT
1585 goto out;
1586
1587#if 0
1588 /* What error should we return now? EUNATTACH? */
1589 if (pkt_sk(sk)->ifindex < 0)
1590 return -ENODEV;
1591#endif
1592
ed85b565
RC
1593 if (flags & MSG_ERRQUEUE) {
1594 err = packet_recv_error(sk, msg, len);
1595 goto out;
1596 }
1597
1da177e4
LT
1598 /*
1599 * Call the generic datagram receiver. This handles all sorts
1600 * of horrible races and re-entrancy so we can forget about it
1601 * in the protocol layers.
1602 *
1603 * Now it will return ENETDOWN, if device have just gone down,
1604 * but then it will block.
1605 */
1606
40d4e3df 1607 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
1da177e4
LT
1608
1609 /*
1ce4f28b 1610 * An error occurred so return it. Because skb_recv_datagram()
1da177e4
LT
1611 * handles the blocking we don't see and worry about blocking
1612 * retries.
1613 */
1614
8ae55f04 1615 if (skb == NULL)
1da177e4
LT
1616 goto out;
1617
bfd5f4a3
SS
1618 if (pkt_sk(sk)->has_vnet_hdr) {
1619 struct virtio_net_hdr vnet_hdr = { 0 };
1620
1621 err = -EINVAL;
1622 vnet_hdr_len = sizeof(vnet_hdr);
1f18b717 1623 if (len < vnet_hdr_len)
bfd5f4a3
SS
1624 goto out_free;
1625
1f18b717
MK
1626 len -= vnet_hdr_len;
1627
bfd5f4a3
SS
1628 if (skb_is_gso(skb)) {
1629 struct skb_shared_info *sinfo = skb_shinfo(skb);
1630
1631 /* This is a hint as to how much should be linear. */
1632 vnet_hdr.hdr_len = skb_headlen(skb);
1633 vnet_hdr.gso_size = sinfo->gso_size;
1634 if (sinfo->gso_type & SKB_GSO_TCPV4)
1635 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1636 else if (sinfo->gso_type & SKB_GSO_TCPV6)
1637 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1638 else if (sinfo->gso_type & SKB_GSO_UDP)
1639 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
1640 else if (sinfo->gso_type & SKB_GSO_FCOE)
1641 goto out_free;
1642 else
1643 BUG();
1644 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
1645 vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1646 } else
1647 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
1648
1649 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1650 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1651 vnet_hdr.csum_start = skb->csum_start -
1652 skb_headroom(skb);
1653 vnet_hdr.csum_offset = skb->csum_offset;
1654 } /* else everything is zero */
1655
1656 err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
1657 vnet_hdr_len);
1658 if (err < 0)
1659 goto out_free;
1660 }
1661
0fb375fb
EB
1662 /*
1663 * If the address length field is there to be filled in, we fill
1664 * it in now.
1665 */
1666
ffbc6111 1667 sll = &PACKET_SKB_CB(skb)->sa.ll;
0fb375fb
EB
1668 if (sock->type == SOCK_PACKET)
1669 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1670 else
1671 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1672
1da177e4
LT
1673 /*
1674 * You lose any data beyond the buffer you gave. If it worries a
1675 * user program they can ask the device for its MTU anyway.
1676 */
1677
1678 copied = skb->len;
40d4e3df
ED
1679 if (copied > len) {
1680 copied = len;
1681 msg->msg_flags |= MSG_TRUNC;
1da177e4
LT
1682 }
1683
1684 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1685 if (err)
1686 goto out_free;
1687
3b885787 1688 sock_recv_ts_and_drops(msg, sk, skb);
1da177e4
LT
1689
1690 if (msg->msg_name)
ffbc6111
HX
1691 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1692 msg->msg_namelen);
1da177e4 1693
8dc41944 1694 if (pkt_sk(sk)->auxdata) {
ffbc6111
HX
1695 struct tpacket_auxdata aux;
1696
1697 aux.tp_status = TP_STATUS_USER;
1698 if (skb->ip_summed == CHECKSUM_PARTIAL)
1699 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1700 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1701 aux.tp_snaplen = skb->len;
1702 aux.tp_mac = 0;
bbe735e4 1703 aux.tp_net = skb_network_offset(skb);
05423b24 1704 aux.tp_vlan_tci = vlan_tx_tag_get(skb);
ffbc6111
HX
1705
1706 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
8dc41944
HX
1707 }
1708
1da177e4
LT
1709 /*
1710 * Free or return the buffer as appropriate. Again this
1711 * hides all the races and re-entrancy issues from us.
1712 */
bfd5f4a3 1713 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
1da177e4
LT
1714
1715out_free:
1716 skb_free_datagram(sk, skb);
1717out:
1718 return err;
1719}
1720
1da177e4
LT
1721static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1722 int *uaddr_len, int peer)
1723{
1724 struct net_device *dev;
1725 struct sock *sk = sock->sk;
1726
1727 if (peer)
1728 return -EOPNOTSUPP;
1729
1730 uaddr->sa_family = AF_PACKET;
654d1f8a
ED
1731 rcu_read_lock();
1732 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
1733 if (dev)
67286640 1734 strncpy(uaddr->sa_data, dev->name, 14);
654d1f8a 1735 else
1da177e4 1736 memset(uaddr->sa_data, 0, 14);
654d1f8a 1737 rcu_read_unlock();
1da177e4
LT
1738 *uaddr_len = sizeof(*uaddr);
1739
1740 return 0;
1741}
1da177e4
LT
1742
1743static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1744 int *uaddr_len, int peer)
1745{
1746 struct net_device *dev;
1747 struct sock *sk = sock->sk;
1748 struct packet_sock *po = pkt_sk(sk);
13cfa97b 1749 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
1da177e4
LT
1750
1751 if (peer)
1752 return -EOPNOTSUPP;
1753
1754 sll->sll_family = AF_PACKET;
1755 sll->sll_ifindex = po->ifindex;
1756 sll->sll_protocol = po->num;
67286640 1757 sll->sll_pkttype = 0;
654d1f8a
ED
1758 rcu_read_lock();
1759 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
1da177e4
LT
1760 if (dev) {
1761 sll->sll_hatype = dev->type;
1762 sll->sll_halen = dev->addr_len;
1763 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1da177e4
LT
1764 } else {
1765 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1766 sll->sll_halen = 0;
1767 }
654d1f8a 1768 rcu_read_unlock();
0fb375fb 1769 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1da177e4
LT
1770
1771 return 0;
1772}
1773
2aeb0b88
WC
1774static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1775 int what)
1da177e4
LT
1776{
1777 switch (i->type) {
1778 case PACKET_MR_MULTICAST:
1162563f
JP
1779 if (i->alen != dev->addr_len)
1780 return -EINVAL;
1da177e4 1781 if (what > 0)
22bedad3 1782 return dev_mc_add(dev, i->addr);
1da177e4 1783 else
22bedad3 1784 return dev_mc_del(dev, i->addr);
1da177e4
LT
1785 break;
1786 case PACKET_MR_PROMISC:
2aeb0b88 1787 return dev_set_promiscuity(dev, what);
1da177e4
LT
1788 break;
1789 case PACKET_MR_ALLMULTI:
2aeb0b88 1790 return dev_set_allmulti(dev, what);
1da177e4 1791 break;
d95ed927 1792 case PACKET_MR_UNICAST:
1162563f
JP
1793 if (i->alen != dev->addr_len)
1794 return -EINVAL;
d95ed927 1795 if (what > 0)
a748ee24 1796 return dev_uc_add(dev, i->addr);
d95ed927 1797 else
a748ee24 1798 return dev_uc_del(dev, i->addr);
d95ed927 1799 break;
40d4e3df
ED
1800 default:
1801 break;
1da177e4 1802 }
2aeb0b88 1803 return 0;
1da177e4
LT
1804}
1805
1806static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1807{
40d4e3df 1808 for ( ; i; i = i->next) {
1da177e4
LT
1809 if (i->ifindex == dev->ifindex)
1810 packet_dev_mc(dev, i, what);
1811 }
1812}
1813
0fb375fb 1814static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1da177e4
LT
1815{
1816 struct packet_sock *po = pkt_sk(sk);
1817 struct packet_mclist *ml, *i;
1818 struct net_device *dev;
1819 int err;
1820
1821 rtnl_lock();
1822
1823 err = -ENODEV;
3b1e0a65 1824 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
1da177e4
LT
1825 if (!dev)
1826 goto done;
1827
1828 err = -EINVAL;
1162563f 1829 if (mreq->mr_alen > dev->addr_len)
1da177e4
LT
1830 goto done;
1831
1832 err = -ENOBUFS;
8b3a7005 1833 i = kmalloc(sizeof(*i), GFP_KERNEL);
1da177e4
LT
1834 if (i == NULL)
1835 goto done;
1836
1837 err = 0;
1838 for (ml = po->mclist; ml; ml = ml->next) {
1839 if (ml->ifindex == mreq->mr_ifindex &&
1840 ml->type == mreq->mr_type &&
1841 ml->alen == mreq->mr_alen &&
1842 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1843 ml->count++;
1844 /* Free the new element ... */
1845 kfree(i);
1846 goto done;
1847 }
1848 }
1849
1850 i->type = mreq->mr_type;
1851 i->ifindex = mreq->mr_ifindex;
1852 i->alen = mreq->mr_alen;
1853 memcpy(i->addr, mreq->mr_address, i->alen);
1854 i->count = 1;
1855 i->next = po->mclist;
1856 po->mclist = i;
2aeb0b88
WC
1857 err = packet_dev_mc(dev, i, 1);
1858 if (err) {
1859 po->mclist = i->next;
1860 kfree(i);
1861 }
1da177e4
LT
1862
1863done:
1864 rtnl_unlock();
1865 return err;
1866}
1867
0fb375fb 1868static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1da177e4
LT
1869{
1870 struct packet_mclist *ml, **mlp;
1871
1872 rtnl_lock();
1873
1874 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1875 if (ml->ifindex == mreq->mr_ifindex &&
1876 ml->type == mreq->mr_type &&
1877 ml->alen == mreq->mr_alen &&
1878 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1879 if (--ml->count == 0) {
1880 struct net_device *dev;
1881 *mlp = ml->next;
ad959e76
ED
1882 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1883 if (dev)
1da177e4 1884 packet_dev_mc(dev, ml, -1);
1da177e4
LT
1885 kfree(ml);
1886 }
1887 rtnl_unlock();
1888 return 0;
1889 }
1890 }
1891 rtnl_unlock();
1892 return -EADDRNOTAVAIL;
1893}
1894
1895static void packet_flush_mclist(struct sock *sk)
1896{
1897 struct packet_sock *po = pkt_sk(sk);
1898 struct packet_mclist *ml;
1899
1900 if (!po->mclist)
1901 return;
1902
1903 rtnl_lock();
1904 while ((ml = po->mclist) != NULL) {
1905 struct net_device *dev;
1906
1907 po->mclist = ml->next;
ad959e76
ED
1908 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1909 if (dev != NULL)
1da177e4 1910 packet_dev_mc(dev, ml, -1);
1da177e4
LT
1911 kfree(ml);
1912 }
1913 rtnl_unlock();
1914}
1da177e4
LT
1915
1916static int
b7058842 1917packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
1918{
1919 struct sock *sk = sock->sk;
8dc41944 1920 struct packet_sock *po = pkt_sk(sk);
1da177e4
LT
1921 int ret;
1922
1923 if (level != SOL_PACKET)
1924 return -ENOPROTOOPT;
1925
69e3c75f 1926 switch (optname) {
1ce4f28b 1927 case PACKET_ADD_MEMBERSHIP:
1da177e4
LT
1928 case PACKET_DROP_MEMBERSHIP:
1929 {
0fb375fb
EB
1930 struct packet_mreq_max mreq;
1931 int len = optlen;
1932 memset(&mreq, 0, sizeof(mreq));
1933 if (len < sizeof(struct packet_mreq))
1da177e4 1934 return -EINVAL;
0fb375fb
EB
1935 if (len > sizeof(mreq))
1936 len = sizeof(mreq);
40d4e3df 1937 if (copy_from_user(&mreq, optval, len))
1da177e4 1938 return -EFAULT;
0fb375fb
EB
1939 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1940 return -EINVAL;
1da177e4
LT
1941 if (optname == PACKET_ADD_MEMBERSHIP)
1942 ret = packet_mc_add(sk, &mreq);
1943 else
1944 ret = packet_mc_drop(sk, &mreq);
1945 return ret;
1946 }
a2efcfa0 1947
1da177e4 1948 case PACKET_RX_RING:
69e3c75f 1949 case PACKET_TX_RING:
1da177e4
LT
1950 {
1951 struct tpacket_req req;
1952
40d4e3df 1953 if (optlen < sizeof(req))
1da177e4 1954 return -EINVAL;
bfd5f4a3
SS
1955 if (pkt_sk(sk)->has_vnet_hdr)
1956 return -EINVAL;
40d4e3df 1957 if (copy_from_user(&req, optval, sizeof(req)))
1da177e4 1958 return -EFAULT;
69e3c75f 1959 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
1da177e4
LT
1960 }
1961 case PACKET_COPY_THRESH:
1962 {
1963 int val;
1964
40d4e3df 1965 if (optlen != sizeof(val))
1da177e4 1966 return -EINVAL;
40d4e3df 1967 if (copy_from_user(&val, optval, sizeof(val)))
1da177e4
LT
1968 return -EFAULT;
1969
1970 pkt_sk(sk)->copy_thresh = val;
1971 return 0;
1972 }
bbd6ef87
PM
1973 case PACKET_VERSION:
1974 {
1975 int val;
1976
1977 if (optlen != sizeof(val))
1978 return -EINVAL;
69e3c75f 1979 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
bbd6ef87
PM
1980 return -EBUSY;
1981 if (copy_from_user(&val, optval, sizeof(val)))
1982 return -EFAULT;
1983 switch (val) {
1984 case TPACKET_V1:
1985 case TPACKET_V2:
1986 po->tp_version = val;
1987 return 0;
1988 default:
1989 return -EINVAL;
1990 }
1991 }
8913336a
PM
1992 case PACKET_RESERVE:
1993 {
1994 unsigned int val;
1995
1996 if (optlen != sizeof(val))
1997 return -EINVAL;
69e3c75f 1998 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
8913336a
PM
1999 return -EBUSY;
2000 if (copy_from_user(&val, optval, sizeof(val)))
2001 return -EFAULT;
2002 po->tp_reserve = val;
2003 return 0;
2004 }
69e3c75f
JB
2005 case PACKET_LOSS:
2006 {
2007 unsigned int val;
2008
2009 if (optlen != sizeof(val))
2010 return -EINVAL;
2011 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2012 return -EBUSY;
2013 if (copy_from_user(&val, optval, sizeof(val)))
2014 return -EFAULT;
2015 po->tp_loss = !!val;
2016 return 0;
2017 }
8dc41944
HX
2018 case PACKET_AUXDATA:
2019 {
2020 int val;
2021
2022 if (optlen < sizeof(val))
2023 return -EINVAL;
2024 if (copy_from_user(&val, optval, sizeof(val)))
2025 return -EFAULT;
2026
2027 po->auxdata = !!val;
2028 return 0;
2029 }
80feaacb
PWJ
2030 case PACKET_ORIGDEV:
2031 {
2032 int val;
2033
2034 if (optlen < sizeof(val))
2035 return -EINVAL;
2036 if (copy_from_user(&val, optval, sizeof(val)))
2037 return -EFAULT;
2038
2039 po->origdev = !!val;
2040 return 0;
2041 }
bfd5f4a3
SS
2042 case PACKET_VNET_HDR:
2043 {
2044 int val;
2045
2046 if (sock->type != SOCK_RAW)
2047 return -EINVAL;
2048 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2049 return -EBUSY;
2050 if (optlen < sizeof(val))
2051 return -EINVAL;
2052 if (copy_from_user(&val, optval, sizeof(val)))
2053 return -EFAULT;
2054
2055 po->has_vnet_hdr = !!val;
2056 return 0;
2057 }
614f60fa
SM
2058 case PACKET_TIMESTAMP:
2059 {
2060 int val;
2061
2062 if (optlen != sizeof(val))
2063 return -EINVAL;
2064 if (copy_from_user(&val, optval, sizeof(val)))
2065 return -EFAULT;
2066
2067 po->tp_tstamp = val;
2068 return 0;
2069 }
1da177e4
LT
2070 default:
2071 return -ENOPROTOOPT;
2072 }
2073}
2074
2075static int packet_getsockopt(struct socket *sock, int level, int optname,
2076 char __user *optval, int __user *optlen)
2077{
2078 int len;
8dc41944 2079 int val;
1da177e4
LT
2080 struct sock *sk = sock->sk;
2081 struct packet_sock *po = pkt_sk(sk);
8dc41944
HX
2082 void *data;
2083 struct tpacket_stats st;
1da177e4
LT
2084
2085 if (level != SOL_PACKET)
2086 return -ENOPROTOOPT;
2087
8ae55f04
KK
2088 if (get_user(len, optlen))
2089 return -EFAULT;
1da177e4
LT
2090
2091 if (len < 0)
2092 return -EINVAL;
1ce4f28b 2093
69e3c75f 2094 switch (optname) {
1da177e4 2095 case PACKET_STATISTICS:
1da177e4
LT
2096 if (len > sizeof(struct tpacket_stats))
2097 len = sizeof(struct tpacket_stats);
2098 spin_lock_bh(&sk->sk_receive_queue.lock);
2099 st = po->stats;
2100 memset(&po->stats, 0, sizeof(st));
2101 spin_unlock_bh(&sk->sk_receive_queue.lock);
2102 st.tp_packets += st.tp_drops;
2103
8dc41944
HX
2104 data = &st;
2105 break;
2106 case PACKET_AUXDATA:
2107 if (len > sizeof(int))
2108 len = sizeof(int);
2109 val = po->auxdata;
2110
80feaacb
PWJ
2111 data = &val;
2112 break;
2113 case PACKET_ORIGDEV:
2114 if (len > sizeof(int))
2115 len = sizeof(int);
2116 val = po->origdev;
2117
bfd5f4a3
SS
2118 data = &val;
2119 break;
2120 case PACKET_VNET_HDR:
2121 if (len > sizeof(int))
2122 len = sizeof(int);
2123 val = po->has_vnet_hdr;
2124
8dc41944 2125 data = &val;
1da177e4 2126 break;
bbd6ef87
PM
2127 case PACKET_VERSION:
2128 if (len > sizeof(int))
2129 len = sizeof(int);
2130 val = po->tp_version;
2131 data = &val;
2132 break;
2133 case PACKET_HDRLEN:
2134 if (len > sizeof(int))
2135 len = sizeof(int);
2136 if (copy_from_user(&val, optval, len))
2137 return -EFAULT;
2138 switch (val) {
2139 case TPACKET_V1:
2140 val = sizeof(struct tpacket_hdr);
2141 break;
2142 case TPACKET_V2:
2143 val = sizeof(struct tpacket2_hdr);
2144 break;
2145 default:
2146 return -EINVAL;
2147 }
2148 data = &val;
2149 break;
8913336a
PM
2150 case PACKET_RESERVE:
2151 if (len > sizeof(unsigned int))
2152 len = sizeof(unsigned int);
2153 val = po->tp_reserve;
2154 data = &val;
2155 break;
69e3c75f
JB
2156 case PACKET_LOSS:
2157 if (len > sizeof(unsigned int))
2158 len = sizeof(unsigned int);
2159 val = po->tp_loss;
2160 data = &val;
2161 break;
614f60fa
SM
2162 case PACKET_TIMESTAMP:
2163 if (len > sizeof(int))
2164 len = sizeof(int);
2165 val = po->tp_tstamp;
2166 data = &val;
2167 break;
1da177e4
LT
2168 default:
2169 return -ENOPROTOOPT;
2170 }
2171
8ae55f04
KK
2172 if (put_user(len, optlen))
2173 return -EFAULT;
8dc41944
HX
2174 if (copy_to_user(optval, data, len))
2175 return -EFAULT;
8ae55f04 2176 return 0;
1da177e4
LT
2177}
2178
2179
2180static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
2181{
2182 struct sock *sk;
2183 struct hlist_node *node;
ad930650 2184 struct net_device *dev = data;
c346dca1 2185 struct net *net = dev_net(dev);
1da177e4 2186
808f5114 2187 rcu_read_lock();
2188 sk_for_each_rcu(sk, node, &net->packet.sklist) {
1da177e4
LT
2189 struct packet_sock *po = pkt_sk(sk);
2190
2191 switch (msg) {
2192 case NETDEV_UNREGISTER:
1da177e4
LT
2193 if (po->mclist)
2194 packet_dev_mclist(dev, po->mclist, -1);
a2efcfa0
DM
2195 /* fallthrough */
2196
1da177e4
LT
2197 case NETDEV_DOWN:
2198 if (dev->ifindex == po->ifindex) {
2199 spin_lock(&po->bind_lock);
2200 if (po->running) {
2201 __dev_remove_pack(&po->prot_hook);
2202 __sock_put(sk);
2203 po->running = 0;
2204 sk->sk_err = ENETDOWN;
2205 if (!sock_flag(sk, SOCK_DEAD))
2206 sk->sk_error_report(sk);
2207 }
2208 if (msg == NETDEV_UNREGISTER) {
2209 po->ifindex = -1;
2210 po->prot_hook.dev = NULL;
2211 }
2212 spin_unlock(&po->bind_lock);
2213 }
2214 break;
2215 case NETDEV_UP:
808f5114 2216 if (dev->ifindex == po->ifindex) {
2217 spin_lock(&po->bind_lock);
2218 if (po->num && !po->running) {
2219 dev_add_pack(&po->prot_hook);
2220 sock_hold(sk);
2221 po->running = 1;
2222 }
2223 spin_unlock(&po->bind_lock);
1da177e4 2224 }
1da177e4
LT
2225 break;
2226 }
2227 }
808f5114 2228 rcu_read_unlock();
1da177e4
LT
2229 return NOTIFY_DONE;
2230}
2231
2232
2233static int packet_ioctl(struct socket *sock, unsigned int cmd,
2234 unsigned long arg)
2235{
2236 struct sock *sk = sock->sk;
2237
69e3c75f 2238 switch (cmd) {
40d4e3df
ED
2239 case SIOCOUTQ:
2240 {
2241 int amount = sk_wmem_alloc_get(sk);
31e6d363 2242
40d4e3df
ED
2243 return put_user(amount, (int __user *)arg);
2244 }
2245 case SIOCINQ:
2246 {
2247 struct sk_buff *skb;
2248 int amount = 0;
2249
2250 spin_lock_bh(&sk->sk_receive_queue.lock);
2251 skb = skb_peek(&sk->sk_receive_queue);
2252 if (skb)
2253 amount = skb->len;
2254 spin_unlock_bh(&sk->sk_receive_queue.lock);
2255 return put_user(amount, (int __user *)arg);
2256 }
2257 case SIOCGSTAMP:
2258 return sock_get_timestamp(sk, (struct timeval __user *)arg);
2259 case SIOCGSTAMPNS:
2260 return sock_get_timestampns(sk, (struct timespec __user *)arg);
1ce4f28b 2261
1da177e4 2262#ifdef CONFIG_INET
40d4e3df
ED
2263 case SIOCADDRT:
2264 case SIOCDELRT:
2265 case SIOCDARP:
2266 case SIOCGARP:
2267 case SIOCSARP:
2268 case SIOCGIFADDR:
2269 case SIOCSIFADDR:
2270 case SIOCGIFBRDADDR:
2271 case SIOCSIFBRDADDR:
2272 case SIOCGIFNETMASK:
2273 case SIOCSIFNETMASK:
2274 case SIOCGIFDSTADDR:
2275 case SIOCSIFDSTADDR:
2276 case SIOCSIFFLAGS:
40d4e3df 2277 return inet_dgram_ops.ioctl(sock, cmd, arg);
1da177e4
LT
2278#endif
2279
40d4e3df
ED
2280 default:
2281 return -ENOIOCTLCMD;
1da177e4
LT
2282 }
2283 return 0;
2284}
2285
40d4e3df 2286static unsigned int packet_poll(struct file *file, struct socket *sock,
1da177e4
LT
2287 poll_table *wait)
2288{
2289 struct sock *sk = sock->sk;
2290 struct packet_sock *po = pkt_sk(sk);
2291 unsigned int mask = datagram_poll(file, sock, wait);
2292
2293 spin_lock_bh(&sk->sk_receive_queue.lock);
69e3c75f
JB
2294 if (po->rx_ring.pg_vec) {
2295 if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
1da177e4
LT
2296 mask |= POLLIN | POLLRDNORM;
2297 }
2298 spin_unlock_bh(&sk->sk_receive_queue.lock);
69e3c75f
JB
2299 spin_lock_bh(&sk->sk_write_queue.lock);
2300 if (po->tx_ring.pg_vec) {
2301 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
2302 mask |= POLLOUT | POLLWRNORM;
2303 }
2304 spin_unlock_bh(&sk->sk_write_queue.lock);
1da177e4
LT
2305 return mask;
2306}
2307
2308
2309/* Dirty? Well, I still did not learn better way to account
2310 * for user mmaps.
2311 */
2312
2313static void packet_mm_open(struct vm_area_struct *vma)
2314{
2315 struct file *file = vma->vm_file;
40d4e3df 2316 struct socket *sock = file->private_data;
1da177e4 2317 struct sock *sk = sock->sk;
1ce4f28b 2318
1da177e4
LT
2319 if (sk)
2320 atomic_inc(&pkt_sk(sk)->mapped);
2321}
2322
2323static void packet_mm_close(struct vm_area_struct *vma)
2324{
2325 struct file *file = vma->vm_file;
40d4e3df 2326 struct socket *sock = file->private_data;
1da177e4 2327 struct sock *sk = sock->sk;
1ce4f28b 2328
1da177e4
LT
2329 if (sk)
2330 atomic_dec(&pkt_sk(sk)->mapped);
2331}
2332
f0f37e2f 2333static const struct vm_operations_struct packet_mmap_ops = {
40d4e3df
ED
2334 .open = packet_mm_open,
2335 .close = packet_mm_close,
1da177e4
LT
2336};
2337
0e3125c7
NH
2338static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
2339 unsigned int len)
1da177e4
LT
2340{
2341 int i;
2342
4ebf0ae2 2343 for (i = 0; i < len; i++) {
0e3125c7 2344 if (likely(pg_vec[i].buffer)) {
c56b4d90 2345 if (is_vmalloc_addr(pg_vec[i].buffer))
0e3125c7
NH
2346 vfree(pg_vec[i].buffer);
2347 else
2348 free_pages((unsigned long)pg_vec[i].buffer,
2349 order);
2350 pg_vec[i].buffer = NULL;
2351 }
1da177e4
LT
2352 }
2353 kfree(pg_vec);
2354}
2355
c56b4d90 2356static inline char *alloc_one_pg_vec_page(unsigned long order)
4ebf0ae2 2357{
0e3125c7
NH
2358 char *buffer = NULL;
2359 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
2360 __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
2361
2362 buffer = (char *) __get_free_pages(gfp_flags, order);
2363
2364 if (buffer)
2365 return buffer;
2366
2367 /*
2368 * __get_free_pages failed, fall back to vmalloc
2369 */
bbce5a59 2370 buffer = vzalloc((1 << order) * PAGE_SIZE);
719bfeaa 2371
0e3125c7
NH
2372 if (buffer)
2373 return buffer;
2374
2375 /*
2376 * vmalloc failed, lets dig into swap here
2377 */
0e3125c7
NH
2378 gfp_flags &= ~__GFP_NORETRY;
2379 buffer = (char *)__get_free_pages(gfp_flags, order);
2380 if (buffer)
2381 return buffer;
2382
2383 /*
2384 * complete and utter failure
2385 */
2386 return NULL;
4ebf0ae2
DM
2387}
2388
0e3125c7 2389static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
4ebf0ae2
DM
2390{
2391 unsigned int block_nr = req->tp_block_nr;
0e3125c7 2392 struct pgv *pg_vec;
4ebf0ae2
DM
2393 int i;
2394
0e3125c7 2395 pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
4ebf0ae2
DM
2396 if (unlikely(!pg_vec))
2397 goto out;
2398
2399 for (i = 0; i < block_nr; i++) {
c56b4d90 2400 pg_vec[i].buffer = alloc_one_pg_vec_page(order);
0e3125c7 2401 if (unlikely(!pg_vec[i].buffer))
4ebf0ae2
DM
2402 goto out_free_pgvec;
2403 }
2404
2405out:
2406 return pg_vec;
2407
2408out_free_pgvec:
2409 free_pg_vec(pg_vec, order, block_nr);
0e3125c7 2410 kfree(pg_vec);
4ebf0ae2
DM
2411 pg_vec = NULL;
2412 goto out;
2413}
1da177e4 2414
69e3c75f
JB
2415static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2416 int closing, int tx_ring)
1da177e4 2417{
0e3125c7 2418 struct pgv *pg_vec = NULL;
1da177e4 2419 struct packet_sock *po = pkt_sk(sk);
0e11c91e 2420 int was_running, order = 0;
69e3c75f
JB
2421 struct packet_ring_buffer *rb;
2422 struct sk_buff_head *rb_queue;
0e11c91e 2423 __be16 num;
69e3c75f 2424 int err;
1ce4f28b 2425
69e3c75f
JB
2426 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
2427 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
1da177e4 2428
69e3c75f
JB
2429 err = -EBUSY;
2430 if (!closing) {
2431 if (atomic_read(&po->mapped))
2432 goto out;
2433 if (atomic_read(&rb->pending))
2434 goto out;
2435 }
1da177e4 2436
69e3c75f
JB
2437 if (req->tp_block_nr) {
2438 /* Sanity tests and some calculations */
2439 err = -EBUSY;
2440 if (unlikely(rb->pg_vec))
2441 goto out;
1da177e4 2442
bbd6ef87
PM
2443 switch (po->tp_version) {
2444 case TPACKET_V1:
2445 po->tp_hdrlen = TPACKET_HDRLEN;
2446 break;
2447 case TPACKET_V2:
2448 po->tp_hdrlen = TPACKET2_HDRLEN;
2449 break;
2450 }
2451
69e3c75f 2452 err = -EINVAL;
4ebf0ae2 2453 if (unlikely((int)req->tp_block_size <= 0))
69e3c75f 2454 goto out;
4ebf0ae2 2455 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
69e3c75f 2456 goto out;
8913336a 2457 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
69e3c75f
JB
2458 po->tp_reserve))
2459 goto out;
4ebf0ae2 2460 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
69e3c75f 2461 goto out;
1da177e4 2462
69e3c75f
JB
2463 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
2464 if (unlikely(rb->frames_per_block <= 0))
2465 goto out;
2466 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
2467 req->tp_frame_nr))
2468 goto out;
1da177e4
LT
2469
2470 err = -ENOMEM;
4ebf0ae2
DM
2471 order = get_order(req->tp_block_size);
2472 pg_vec = alloc_pg_vec(req, order);
2473 if (unlikely(!pg_vec))
1da177e4 2474 goto out;
69e3c75f
JB
2475 }
2476 /* Done */
2477 else {
2478 err = -EINVAL;
4ebf0ae2 2479 if (unlikely(req->tp_frame_nr))
69e3c75f 2480 goto out;
1da177e4
LT
2481 }
2482
2483 lock_sock(sk);
2484
2485 /* Detach socket from network */
2486 spin_lock(&po->bind_lock);
2487 was_running = po->running;
2488 num = po->num;
2489 if (was_running) {
2490 __dev_remove_pack(&po->prot_hook);
2491 po->num = 0;
2492 po->running = 0;
2493 __sock_put(sk);
2494 }
2495 spin_unlock(&po->bind_lock);
1ce4f28b 2496
1da177e4
LT
2497 synchronize_net();
2498
2499 err = -EBUSY;
905db440 2500 mutex_lock(&po->pg_vec_lock);
1da177e4
LT
2501 if (closing || atomic_read(&po->mapped) == 0) {
2502 err = 0;
2503#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
69e3c75f
JB
2504 spin_lock_bh(&rb_queue->lock);
2505 pg_vec = XC(rb->pg_vec, pg_vec);
2506 rb->frame_max = (req->tp_frame_nr - 1);
2507 rb->head = 0;
2508 rb->frame_size = req->tp_frame_size;
2509 spin_unlock_bh(&rb_queue->lock);
2510
2511 order = XC(rb->pg_vec_order, order);
2512 req->tp_block_nr = XC(rb->pg_vec_len, req->tp_block_nr);
2513
2514 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
2515 po->prot_hook.func = (po->rx_ring.pg_vec) ?
2516 tpacket_rcv : packet_rcv;
2517 skb_queue_purge(rb_queue);
1da177e4
LT
2518#undef XC
2519 if (atomic_read(&po->mapped))
40d4e3df
ED
2520 pr_err("packet_mmap: vma is busy: %d\n",
2521 atomic_read(&po->mapped));
1da177e4 2522 }
905db440 2523 mutex_unlock(&po->pg_vec_lock);
1da177e4
LT
2524
2525 spin_lock(&po->bind_lock);
2526 if (was_running && !po->running) {
2527 sock_hold(sk);
2528 po->running = 1;
2529 po->num = num;
2530 dev_add_pack(&po->prot_hook);
2531 }
2532 spin_unlock(&po->bind_lock);
2533
2534 release_sock(sk);
2535
1da177e4
LT
2536 if (pg_vec)
2537 free_pg_vec(pg_vec, order, req->tp_block_nr);
2538out:
2539 return err;
2540}
2541
69e3c75f
JB
2542static int packet_mmap(struct file *file, struct socket *sock,
2543 struct vm_area_struct *vma)
1da177e4
LT
2544{
2545 struct sock *sk = sock->sk;
2546 struct packet_sock *po = pkt_sk(sk);
69e3c75f
JB
2547 unsigned long size, expected_size;
2548 struct packet_ring_buffer *rb;
1da177e4
LT
2549 unsigned long start;
2550 int err = -EINVAL;
2551 int i;
2552
2553 if (vma->vm_pgoff)
2554 return -EINVAL;
2555
905db440 2556 mutex_lock(&po->pg_vec_lock);
69e3c75f
JB
2557
2558 expected_size = 0;
2559 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2560 if (rb->pg_vec) {
2561 expected_size += rb->pg_vec_len
2562 * rb->pg_vec_pages
2563 * PAGE_SIZE;
2564 }
2565 }
2566
2567 if (expected_size == 0)
1da177e4 2568 goto out;
69e3c75f
JB
2569
2570 size = vma->vm_end - vma->vm_start;
2571 if (size != expected_size)
1da177e4
LT
2572 goto out;
2573
1da177e4 2574 start = vma->vm_start;
69e3c75f
JB
2575 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2576 if (rb->pg_vec == NULL)
2577 continue;
2578
2579 for (i = 0; i < rb->pg_vec_len; i++) {
0e3125c7
NH
2580 struct page *page;
2581 void *kaddr = rb->pg_vec[i].buffer;
69e3c75f
JB
2582 int pg_num;
2583
c56b4d90
CG
2584 for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
2585 page = pgv_to_page(kaddr);
69e3c75f
JB
2586 err = vm_insert_page(vma, start, page);
2587 if (unlikely(err))
2588 goto out;
2589 start += PAGE_SIZE;
0e3125c7 2590 kaddr += PAGE_SIZE;
69e3c75f 2591 }
4ebf0ae2 2592 }
1da177e4 2593 }
69e3c75f 2594
4ebf0ae2 2595 atomic_inc(&po->mapped);
1da177e4
LT
2596 vma->vm_ops = &packet_mmap_ops;
2597 err = 0;
2598
2599out:
905db440 2600 mutex_unlock(&po->pg_vec_lock);
1da177e4
LT
2601 return err;
2602}
1da177e4 2603
90ddc4f0 2604static const struct proto_ops packet_ops_spkt = {
1da177e4
LT
2605 .family = PF_PACKET,
2606 .owner = THIS_MODULE,
2607 .release = packet_release,
2608 .bind = packet_bind_spkt,
2609 .connect = sock_no_connect,
2610 .socketpair = sock_no_socketpair,
2611 .accept = sock_no_accept,
2612 .getname = packet_getname_spkt,
2613 .poll = datagram_poll,
2614 .ioctl = packet_ioctl,
2615 .listen = sock_no_listen,
2616 .shutdown = sock_no_shutdown,
2617 .setsockopt = sock_no_setsockopt,
2618 .getsockopt = sock_no_getsockopt,
2619 .sendmsg = packet_sendmsg_spkt,
2620 .recvmsg = packet_recvmsg,
2621 .mmap = sock_no_mmap,
2622 .sendpage = sock_no_sendpage,
2623};
1da177e4 2624
90ddc4f0 2625static const struct proto_ops packet_ops = {
1da177e4
LT
2626 .family = PF_PACKET,
2627 .owner = THIS_MODULE,
2628 .release = packet_release,
2629 .bind = packet_bind,
2630 .connect = sock_no_connect,
2631 .socketpair = sock_no_socketpair,
2632 .accept = sock_no_accept,
1ce4f28b 2633 .getname = packet_getname,
1da177e4
LT
2634 .poll = packet_poll,
2635 .ioctl = packet_ioctl,
2636 .listen = sock_no_listen,
2637 .shutdown = sock_no_shutdown,
2638 .setsockopt = packet_setsockopt,
2639 .getsockopt = packet_getsockopt,
2640 .sendmsg = packet_sendmsg,
2641 .recvmsg = packet_recvmsg,
2642 .mmap = packet_mmap,
2643 .sendpage = sock_no_sendpage,
2644};
2645
ec1b4cf7 2646static const struct net_proto_family packet_family_ops = {
1da177e4
LT
2647 .family = PF_PACKET,
2648 .create = packet_create,
2649 .owner = THIS_MODULE,
2650};
2651
2652static struct notifier_block packet_netdev_notifier = {
40d4e3df 2653 .notifier_call = packet_notifier,
1da177e4
LT
2654};
2655
2656#ifdef CONFIG_PROC_FS
1da177e4
LT
2657
2658static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
808f5114 2659 __acquires(RCU)
1da177e4 2660{
e372c414 2661 struct net *net = seq_file_net(seq);
808f5114 2662
2663 rcu_read_lock();
2664 return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
1da177e4
LT
2665}
2666
2667static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2668{
1bf40954 2669 struct net *net = seq_file_net(seq);
808f5114 2670 return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
1da177e4
LT
2671}
2672
2673static void packet_seq_stop(struct seq_file *seq, void *v)
808f5114 2674 __releases(RCU)
1da177e4 2675{
808f5114 2676 rcu_read_unlock();
1da177e4
LT
2677}
2678
1ce4f28b 2679static int packet_seq_show(struct seq_file *seq, void *v)
1da177e4
LT
2680{
2681 if (v == SEQ_START_TOKEN)
2682 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2683 else {
b7ceabd9 2684 struct sock *s = sk_entry(v);
1da177e4
LT
2685 const struct packet_sock *po = pkt_sk(s);
2686
2687 seq_printf(seq,
2688 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
2689 s,
2690 atomic_read(&s->sk_refcnt),
2691 s->sk_type,
2692 ntohs(po->num),
2693 po->ifindex,
2694 po->running,
2695 atomic_read(&s->sk_rmem_alloc),
2696 sock_i_uid(s),
40d4e3df 2697 sock_i_ino(s));
1da177e4
LT
2698 }
2699
2700 return 0;
2701}
2702
56b3d975 2703static const struct seq_operations packet_seq_ops = {
1da177e4
LT
2704 .start = packet_seq_start,
2705 .next = packet_seq_next,
2706 .stop = packet_seq_stop,
2707 .show = packet_seq_show,
2708};
2709
2710static int packet_seq_open(struct inode *inode, struct file *file)
2711{
e372c414
DL
2712 return seq_open_net(inode, file, &packet_seq_ops,
2713 sizeof(struct seq_net_private));
1da177e4
LT
2714}
2715
da7071d7 2716static const struct file_operations packet_seq_fops = {
1da177e4
LT
2717 .owner = THIS_MODULE,
2718 .open = packet_seq_open,
2719 .read = seq_read,
2720 .llseek = seq_lseek,
e372c414 2721 .release = seq_release_net,
1da177e4
LT
2722};
2723
2724#endif
2725
2c8c1e72 2726static int __net_init packet_net_init(struct net *net)
d12d01d6 2727{
808f5114 2728 spin_lock_init(&net->packet.sklist_lock);
2aaef4e4 2729 INIT_HLIST_HEAD(&net->packet.sklist);
d12d01d6
DL
2730
2731 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2732 return -ENOMEM;
2733
2734 return 0;
2735}
2736
2c8c1e72 2737static void __net_exit packet_net_exit(struct net *net)
d12d01d6
DL
2738{
2739 proc_net_remove(net, "packet");
2740}
2741
2742static struct pernet_operations packet_net_ops = {
2743 .init = packet_net_init,
2744 .exit = packet_net_exit,
2745};
2746
2747
1da177e4
LT
2748static void __exit packet_exit(void)
2749{
1da177e4 2750 unregister_netdevice_notifier(&packet_netdev_notifier);
d12d01d6 2751 unregister_pernet_subsys(&packet_net_ops);
1da177e4
LT
2752 sock_unregister(PF_PACKET);
2753 proto_unregister(&packet_proto);
2754}
2755
2756static int __init packet_init(void)
2757{
2758 int rc = proto_register(&packet_proto, 0);
2759
2760 if (rc != 0)
2761 goto out;
2762
2763 sock_register(&packet_family_ops);
d12d01d6 2764 register_pernet_subsys(&packet_net_ops);
1da177e4 2765 register_netdevice_notifier(&packet_netdev_notifier);
1da177e4
LT
2766out:
2767 return rc;
2768}
2769
2770module_init(packet_init);
2771module_exit(packet_exit);
2772MODULE_LICENSE("GPL");
2773MODULE_ALIAS_NETPROTO(PF_PACKET);