]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/packet/af_packet.c
Merge branch 'batman-adv/next' of git://git.open-mesh.org/ecsv/linux-merge
[mirror_ubuntu-zesty-kernel.git] / net / packet / af_packet.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
02c30a84 8 * Authors: Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 *
1ce4f28b 12 * Fixes:
1da177e4
LT
13 * Alan Cox : verify_area() now used correctly
14 * Alan Cox : new skbuff lists, look ma no backlogs!
15 * Alan Cox : tidied skbuff lists.
16 * Alan Cox : Now uses generic datagram routines I
17 * added. Also fixed the peek/read crash
18 * from all old Linux datagram code.
19 * Alan Cox : Uses the improved datagram code.
20 * Alan Cox : Added NULL's for socket options.
21 * Alan Cox : Re-commented the code.
22 * Alan Cox : Use new kernel side addressing
23 * Rob Janssen : Correct MTU usage.
24 * Dave Platt : Counter leaks caused by incorrect
25 * interrupt locking and some slightly
26 * dubious gcc output. Can you read
27 * compiler: it said _VOLATILE_
28 * Richard Kooijman : Timestamp fixes.
29 * Alan Cox : New buffers. Use sk->mac.raw.
30 * Alan Cox : sendmsg/recvmsg support.
31 * Alan Cox : Protocol setting support
32 * Alexey Kuznetsov : Untied from IPv4 stack.
33 * Cyrus Durgin : Fixed kerneld for kmod.
34 * Michal Ostrowski : Module initialization cleanup.
1ce4f28b 35 * Ulises Alonso : Frame number limit removal and
1da177e4 36 * packet_set_ring memory leak.
0fb375fb
EB
37 * Eric Biederman : Allow for > 8 byte hardware addresses.
38 * The convention is that longer addresses
39 * will simply extend the hardware address
1ce4f28b 40 * byte arrays at the end of sockaddr_ll
0fb375fb 41 * and packet_mreq.
69e3c75f 42 * Johann Baudy : Added TX RING.
1da177e4
LT
43 *
44 * This program is free software; you can redistribute it and/or
45 * modify it under the terms of the GNU General Public License
46 * as published by the Free Software Foundation; either version
47 * 2 of the License, or (at your option) any later version.
48 *
49 */
1ce4f28b 50
1da177e4 51#include <linux/types.h>
1da177e4 52#include <linux/mm.h>
4fc268d2 53#include <linux/capability.h>
1da177e4
LT
54#include <linux/fcntl.h>
55#include <linux/socket.h>
56#include <linux/in.h>
57#include <linux/inet.h>
58#include <linux/netdevice.h>
59#include <linux/if_packet.h>
60#include <linux/wireless.h>
ffbc6111 61#include <linux/kernel.h>
1da177e4 62#include <linux/kmod.h>
5a0e3ad6 63#include <linux/slab.h>
0e3125c7 64#include <linux/vmalloc.h>
457c4cbc 65#include <net/net_namespace.h>
1da177e4
LT
66#include <net/ip.h>
67#include <net/protocol.h>
68#include <linux/skbuff.h>
69#include <net/sock.h>
70#include <linux/errno.h>
71#include <linux/timer.h>
72#include <asm/system.h>
73#include <asm/uaccess.h>
74#include <asm/ioctls.h>
75#include <asm/page.h>
a1f8e7f7 76#include <asm/cacheflush.h>
1da177e4
LT
77#include <asm/io.h>
78#include <linux/proc_fs.h>
79#include <linux/seq_file.h>
80#include <linux/poll.h>
81#include <linux/module.h>
82#include <linux/init.h>
905db440 83#include <linux/mutex.h>
05423b24 84#include <linux/if_vlan.h>
bfd5f4a3 85#include <linux/virtio_net.h>
ed85b565 86#include <linux/errqueue.h>
614f60fa 87#include <linux/net_tstamp.h>
1da177e4
LT
88
89#ifdef CONFIG_INET
90#include <net/inet_common.h>
91#endif
92
1da177e4
LT
93/*
94 Assumptions:
95 - if device has no dev->hard_header routine, it adds and removes ll header
96 inside itself. In this case ll header is invisible outside of device,
97 but higher levels still should reserve dev->hard_header_len.
98 Some devices are enough clever to reallocate skb, when header
99 will not fit to reserved space (tunnel), another ones are silly
100 (PPP).
101 - packet socket receives packets with pulled ll header,
102 so that SOCK_RAW should push it back.
103
104On receive:
105-----------
106
107Incoming, dev->hard_header!=NULL
b0e380b1
ACM
108 mac_header -> ll header
109 data -> data
1da177e4
LT
110
111Outgoing, dev->hard_header!=NULL
b0e380b1
ACM
112 mac_header -> ll header
113 data -> ll header
1da177e4
LT
114
115Incoming, dev->hard_header==NULL
b0e380b1
ACM
116 mac_header -> UNKNOWN position. It is very likely, that it points to ll
117 header. PPP makes it, that is wrong, because introduce
db0c58f9 118 assymetry between rx and tx paths.
b0e380b1 119 data -> data
1da177e4
LT
120
121Outgoing, dev->hard_header==NULL
b0e380b1
ACM
122 mac_header -> data. ll header is still not built!
123 data -> data
1da177e4
LT
124
125Resume
126 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
127
128
129On transmit:
130------------
131
132dev->hard_header != NULL
b0e380b1
ACM
133 mac_header -> ll header
134 data -> ll header
1da177e4
LT
135
136dev->hard_header == NULL (ll header is added by device, we cannot control it)
b0e380b1
ACM
137 mac_header -> data
138 data -> data
1da177e4
LT
139
140 We should set nh.raw on output to correct posistion,
141 packet classifier depends on it.
142 */
143
1da177e4
LT
144/* Private packet socket structures. */
145
40d4e3df 146struct packet_mclist {
1da177e4
LT
147 struct packet_mclist *next;
148 int ifindex;
149 int count;
150 unsigned short type;
151 unsigned short alen;
0fb375fb
EB
152 unsigned char addr[MAX_ADDR_LEN];
153};
154/* identical to struct packet_mreq except it has
155 * a longer address field.
156 */
40d4e3df 157struct packet_mreq_max {
0fb375fb
EB
158 int mr_ifindex;
159 unsigned short mr_type;
160 unsigned short mr_alen;
161 unsigned char mr_address[MAX_ADDR_LEN];
1da177e4 162};
a2efcfa0 163
69e3c75f
JB
164static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
165 int closing, int tx_ring);
166
0e3125c7
NH
167struct pgv {
168 char *buffer;
0e3125c7
NH
169};
170
69e3c75f 171struct packet_ring_buffer {
0e3125c7 172 struct pgv *pg_vec;
69e3c75f
JB
173 unsigned int head;
174 unsigned int frames_per_block;
175 unsigned int frame_size;
176 unsigned int frame_max;
177
178 unsigned int pg_vec_order;
179 unsigned int pg_vec_pages;
180 unsigned int pg_vec_len;
181
182 atomic_t pending;
183};
184
185struct packet_sock;
186static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
1da177e4
LT
187
188static void packet_flush_mclist(struct sock *sk);
189
190struct packet_sock {
191 /* struct sock has to be the first member of packet_sock */
192 struct sock sk;
193 struct tpacket_stats stats;
69e3c75f
JB
194 struct packet_ring_buffer rx_ring;
195 struct packet_ring_buffer tx_ring;
1da177e4 196 int copy_thresh;
1da177e4 197 spinlock_t bind_lock;
905db440 198 struct mutex pg_vec_lock;
8dc41944 199 unsigned int running:1, /* prot_hook is attached*/
80feaacb 200 auxdata:1,
bfd5f4a3
SS
201 origdev:1,
202 has_vnet_hdr:1;
1da177e4 203 int ifindex; /* bound device */
0e11c91e 204 __be16 num;
1da177e4 205 struct packet_mclist *mclist;
1da177e4 206 atomic_t mapped;
bbd6ef87
PM
207 enum tpacket_versions tp_version;
208 unsigned int tp_hdrlen;
8913336a 209 unsigned int tp_reserve;
69e3c75f 210 unsigned int tp_loss:1;
614f60fa 211 unsigned int tp_tstamp;
94b05952 212 struct packet_type prot_hook ____cacheline_aligned_in_smp;
1da177e4
LT
213};
214
ffbc6111
HX
215struct packet_skb_cb {
216 unsigned int origlen;
217 union {
218 struct sockaddr_pkt pkt;
219 struct sockaddr_ll ll;
220 } sa;
221};
222
223#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
8dc41944 224
f6dafa95 225static inline __pure struct page *pgv_to_page(void *addr)
0af55bb5
CG
226{
227 if (is_vmalloc_addr(addr))
228 return vmalloc_to_page(addr);
229 return virt_to_page(addr);
230}
231
69e3c75f 232static void __packet_set_status(struct packet_sock *po, void *frame, int status)
1da177e4 233{
bbd6ef87
PM
234 union {
235 struct tpacket_hdr *h1;
236 struct tpacket2_hdr *h2;
237 void *raw;
238 } h;
1da177e4 239
69e3c75f 240 h.raw = frame;
bbd6ef87
PM
241 switch (po->tp_version) {
242 case TPACKET_V1:
69e3c75f 243 h.h1->tp_status = status;
0af55bb5 244 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
bbd6ef87
PM
245 break;
246 case TPACKET_V2:
69e3c75f 247 h.h2->tp_status = status;
0af55bb5 248 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
bbd6ef87 249 break;
69e3c75f 250 default:
40d4e3df 251 pr_err("TPACKET version not supported\n");
69e3c75f 252 BUG();
bbd6ef87 253 }
69e3c75f
JB
254
255 smp_wmb();
bbd6ef87
PM
256}
257
69e3c75f 258static int __packet_get_status(struct packet_sock *po, void *frame)
bbd6ef87
PM
259{
260 union {
261 struct tpacket_hdr *h1;
262 struct tpacket2_hdr *h2;
263 void *raw;
264 } h;
265
69e3c75f
JB
266 smp_rmb();
267
bbd6ef87
PM
268 h.raw = frame;
269 switch (po->tp_version) {
270 case TPACKET_V1:
0af55bb5 271 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
69e3c75f 272 return h.h1->tp_status;
bbd6ef87 273 case TPACKET_V2:
0af55bb5 274 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
69e3c75f
JB
275 return h.h2->tp_status;
276 default:
40d4e3df 277 pr_err("TPACKET version not supported\n");
69e3c75f
JB
278 BUG();
279 return 0;
bbd6ef87 280 }
1da177e4 281}
69e3c75f
JB
282
283static void *packet_lookup_frame(struct packet_sock *po,
284 struct packet_ring_buffer *rb,
285 unsigned int position,
286 int status)
287{
288 unsigned int pg_vec_pos, frame_offset;
289 union {
290 struct tpacket_hdr *h1;
291 struct tpacket2_hdr *h2;
292 void *raw;
293 } h;
294
295 pg_vec_pos = position / rb->frames_per_block;
296 frame_offset = position % rb->frames_per_block;
297
0e3125c7
NH
298 h.raw = rb->pg_vec[pg_vec_pos].buffer +
299 (frame_offset * rb->frame_size);
69e3c75f
JB
300
301 if (status != __packet_get_status(po, h.raw))
302 return NULL;
303
304 return h.raw;
305}
306
307static inline void *packet_current_frame(struct packet_sock *po,
308 struct packet_ring_buffer *rb,
309 int status)
310{
311 return packet_lookup_frame(po, rb, rb->head, status);
312}
313
314static inline void *packet_previous_frame(struct packet_sock *po,
315 struct packet_ring_buffer *rb,
316 int status)
317{
318 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
319 return packet_lookup_frame(po, rb, previous, status);
320}
321
322static inline void packet_increment_head(struct packet_ring_buffer *buff)
323{
324 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
325}
326
1da177e4
LT
327static inline struct packet_sock *pkt_sk(struct sock *sk)
328{
329 return (struct packet_sock *)sk;
330}
331
332static void packet_sock_destruct(struct sock *sk)
333{
ed85b565
RC
334 skb_queue_purge(&sk->sk_error_queue);
335
547b792c
IJ
336 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
337 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
1da177e4
LT
338
339 if (!sock_flag(sk, SOCK_DEAD)) {
40d4e3df 340 pr_err("Attempt to release alive packet socket: %p\n", sk);
1da177e4
LT
341 return;
342 }
343
17ab56a2 344 sk_refcnt_debug_dec(sk);
1da177e4
LT
345}
346
347
90ddc4f0 348static const struct proto_ops packet_ops;
1da177e4 349
90ddc4f0 350static const struct proto_ops packet_ops_spkt;
1da177e4 351
40d4e3df
ED
352static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
353 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
354{
355 struct sock *sk;
356 struct sockaddr_pkt *spkt;
357
358 /*
359 * When we registered the protocol we saved the socket in the data
360 * field for just this event.
361 */
362
363 sk = pt->af_packet_priv;
1ce4f28b 364
1da177e4
LT
365 /*
366 * Yank back the headers [hope the device set this
367 * right or kerboom...]
368 *
369 * Incoming packets have ll header pulled,
370 * push it back.
371 *
98e399f8 372 * For outgoing ones skb->data == skb_mac_header(skb)
1da177e4
LT
373 * so that this procedure is noop.
374 */
375
376 if (skb->pkt_type == PACKET_LOOPBACK)
377 goto out;
378
09ad9bc7 379 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
380 goto out;
381
40d4e3df
ED
382 skb = skb_share_check(skb, GFP_ATOMIC);
383 if (skb == NULL)
1da177e4
LT
384 goto oom;
385
386 /* drop any routing info */
adf30907 387 skb_dst_drop(skb);
1da177e4 388
84531c24
PO
389 /* drop conntrack reference */
390 nf_reset(skb);
391
ffbc6111 392 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
1da177e4 393
98e399f8 394 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
395
396 /*
397 * The SOCK_PACKET socket receives _all_ frames.
398 */
399
400 spkt->spkt_family = dev->type;
401 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
402 spkt->spkt_protocol = skb->protocol;
403
404 /*
405 * Charge the memory to the socket. This is done specifically
406 * to prevent sockets using all the memory up.
407 */
408
40d4e3df 409 if (sock_queue_rcv_skb(sk, skb) == 0)
1da177e4
LT
410 return 0;
411
412out:
413 kfree_skb(skb);
414oom:
415 return 0;
416}
417
418
419/*
420 * Output a raw packet to a device layer. This bypasses all the other
421 * protocol layers and you must therefore supply it with a complete frame
422 */
1ce4f28b 423
1da177e4
LT
424static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
425 struct msghdr *msg, size_t len)
426{
427 struct sock *sk = sock->sk;
40d4e3df 428 struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
1a35ca80 429 struct sk_buff *skb = NULL;
1da177e4 430 struct net_device *dev;
40d4e3df 431 __be16 proto = 0;
1da177e4 432 int err;
1ce4f28b 433
1da177e4 434 /*
1ce4f28b 435 * Get and verify the address.
1da177e4
LT
436 */
437
40d4e3df 438 if (saddr) {
1da177e4 439 if (msg->msg_namelen < sizeof(struct sockaddr))
40d4e3df
ED
440 return -EINVAL;
441 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
442 proto = saddr->spkt_protocol;
443 } else
444 return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */
1da177e4
LT
445
446 /*
1ce4f28b 447 * Find the device first to size check it
1da177e4
LT
448 */
449
450 saddr->spkt_device[13] = 0;
1a35ca80 451retry:
654d1f8a
ED
452 rcu_read_lock();
453 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
1da177e4
LT
454 err = -ENODEV;
455 if (dev == NULL)
456 goto out_unlock;
1ce4f28b 457
d5e76b0a
DM
458 err = -ENETDOWN;
459 if (!(dev->flags & IFF_UP))
460 goto out_unlock;
461
1da177e4 462 /*
40d4e3df
ED
463 * You may not queue a frame bigger than the mtu. This is the lowest level
464 * raw protocol and you must do your own fragmentation at this level.
1da177e4 465 */
1ce4f28b 466
1da177e4 467 err = -EMSGSIZE;
8ae55f04 468 if (len > dev->mtu + dev->hard_header_len)
1da177e4
LT
469 goto out_unlock;
470
1a35ca80
ED
471 if (!skb) {
472 size_t reserved = LL_RESERVED_SPACE(dev);
473 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
474
475 rcu_read_unlock();
476 skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL);
477 if (skb == NULL)
478 return -ENOBUFS;
479 /* FIXME: Save some space for broken drivers that write a hard
480 * header at transmission time by themselves. PPP is the notable
481 * one here. This should really be fixed at the driver level.
482 */
483 skb_reserve(skb, reserved);
484 skb_reset_network_header(skb);
485
486 /* Try to align data part correctly */
487 if (hhlen) {
488 skb->data -= hhlen;
489 skb->tail -= hhlen;
490 if (len < hhlen)
491 skb_reset_network_header(skb);
492 }
493 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
494 if (err)
495 goto out_free;
496 goto retry;
1da177e4
LT
497 }
498
1a35ca80 499
1da177e4
LT
500 skb->protocol = proto;
501 skb->dev = dev;
502 skb->priority = sk->sk_priority;
2d37a186 503 skb->mark = sk->sk_mark;
2244d07b 504 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
ed85b565
RC
505 if (err < 0)
506 goto out_unlock;
1da177e4
LT
507
508 dev_queue_xmit(skb);
654d1f8a 509 rcu_read_unlock();
40d4e3df 510 return len;
1da177e4 511
1da177e4 512out_unlock:
654d1f8a 513 rcu_read_unlock();
1a35ca80
ED
514out_free:
515 kfree_skb(skb);
1da177e4
LT
516 return err;
517}
1da177e4 518
62ab0812
ED
519static inline unsigned int run_filter(const struct sk_buff *skb,
520 const struct sock *sk,
dbcb5855 521 unsigned int res)
1da177e4
LT
522{
523 struct sk_filter *filter;
fda9ef5d 524
80f8f102
ED
525 rcu_read_lock();
526 filter = rcu_dereference(sk->sk_filter);
dbcb5855 527 if (filter != NULL)
93aaae2e 528 res = sk_run_filter(skb, filter->insns);
80f8f102 529 rcu_read_unlock();
1da177e4 530
dbcb5855 531 return res;
1da177e4
LT
532}
533
534/*
62ab0812
ED
535 * This function makes lazy skb cloning in hope that most of packets
536 * are discarded by BPF.
537 *
538 * Note tricky part: we DO mangle shared skb! skb->data, skb->len
539 * and skb->cb are mangled. It works because (and until) packets
540 * falling here are owned by current CPU. Output packets are cloned
541 * by dev_queue_xmit_nit(), input packets are processed by net_bh
542 * sequencially, so that if we return skb to original state on exit,
543 * we will not harm anyone.
1da177e4
LT
544 */
545
40d4e3df
ED
546static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
547 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
548{
549 struct sock *sk;
550 struct sockaddr_ll *sll;
551 struct packet_sock *po;
40d4e3df 552 u8 *skb_head = skb->data;
1da177e4 553 int skb_len = skb->len;
dbcb5855 554 unsigned int snaplen, res;
1da177e4
LT
555
556 if (skb->pkt_type == PACKET_LOOPBACK)
557 goto drop;
558
559 sk = pt->af_packet_priv;
560 po = pkt_sk(sk);
561
09ad9bc7 562 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
563 goto drop;
564
1da177e4
LT
565 skb->dev = dev;
566
3b04ddde 567 if (dev->header_ops) {
1da177e4 568 /* The device has an explicit notion of ll header,
62ab0812
ED
569 * exported to higher levels.
570 *
571 * Otherwise, the device hides details of its frame
572 * structure, so that corresponding packet head is
573 * never delivered to user.
1da177e4
LT
574 */
575 if (sk->sk_type != SOCK_DGRAM)
98e399f8 576 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
577 else if (skb->pkt_type == PACKET_OUTGOING) {
578 /* Special case: outgoing packets have ll header at head */
bbe735e4 579 skb_pull(skb, skb_network_offset(skb));
1da177e4
LT
580 }
581 }
582
583 snaplen = skb->len;
584
dbcb5855
DM
585 res = run_filter(skb, sk, snaplen);
586 if (!res)
fda9ef5d 587 goto drop_n_restore;
dbcb5855
DM
588 if (snaplen > res)
589 snaplen = res;
1da177e4
LT
590
591 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
592 (unsigned)sk->sk_rcvbuf)
593 goto drop_n_acct;
594
595 if (skb_shared(skb)) {
596 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
597 if (nskb == NULL)
598 goto drop_n_acct;
599
600 if (skb_head != skb->data) {
601 skb->data = skb_head;
602 skb->len = skb_len;
603 }
604 kfree_skb(skb);
605 skb = nskb;
606 }
607
ffbc6111
HX
608 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
609 sizeof(skb->cb));
610
611 sll = &PACKET_SKB_CB(skb)->sa.ll;
1da177e4
LT
612 sll->sll_family = AF_PACKET;
613 sll->sll_hatype = dev->type;
614 sll->sll_protocol = skb->protocol;
615 sll->sll_pkttype = skb->pkt_type;
8032b464 616 if (unlikely(po->origdev))
80feaacb
PWJ
617 sll->sll_ifindex = orig_dev->ifindex;
618 else
619 sll->sll_ifindex = dev->ifindex;
1da177e4 620
b95cce35 621 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1da177e4 622
ffbc6111 623 PACKET_SKB_CB(skb)->origlen = skb->len;
8dc41944 624
1da177e4
LT
625 if (pskb_trim(skb, snaplen))
626 goto drop_n_acct;
627
628 skb_set_owner_r(skb, sk);
629 skb->dev = NULL;
adf30907 630 skb_dst_drop(skb);
1da177e4 631
84531c24
PO
632 /* drop conntrack reference */
633 nf_reset(skb);
634
1da177e4
LT
635 spin_lock(&sk->sk_receive_queue.lock);
636 po->stats.tp_packets++;
3b885787 637 skb->dropcount = atomic_read(&sk->sk_drops);
1da177e4
LT
638 __skb_queue_tail(&sk->sk_receive_queue, skb);
639 spin_unlock(&sk->sk_receive_queue.lock);
640 sk->sk_data_ready(sk, skb->len);
641 return 0;
642
643drop_n_acct:
3b885787 644 po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
1da177e4
LT
645
646drop_n_restore:
647 if (skb_head != skb->data && skb_shared(skb)) {
648 skb->data = skb_head;
649 skb->len = skb_len;
650 }
651drop:
ead2ceb0 652 consume_skb(skb);
1da177e4
LT
653 return 0;
654}
655
40d4e3df
ED
656static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
657 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
658{
659 struct sock *sk;
660 struct packet_sock *po;
661 struct sockaddr_ll *sll;
bbd6ef87
PM
662 union {
663 struct tpacket_hdr *h1;
664 struct tpacket2_hdr *h2;
665 void *raw;
666 } h;
40d4e3df 667 u8 *skb_head = skb->data;
1da177e4 668 int skb_len = skb->len;
dbcb5855 669 unsigned int snaplen, res;
1da177e4 670 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
bbd6ef87 671 unsigned short macoff, netoff, hdrlen;
1da177e4 672 struct sk_buff *copy_skb = NULL;
b7aa0bf7 673 struct timeval tv;
bbd6ef87 674 struct timespec ts;
614f60fa 675 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
1da177e4
LT
676
677 if (skb->pkt_type == PACKET_LOOPBACK)
678 goto drop;
679
680 sk = pt->af_packet_priv;
681 po = pkt_sk(sk);
682
09ad9bc7 683 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
684 goto drop;
685
3b04ddde 686 if (dev->header_ops) {
1da177e4 687 if (sk->sk_type != SOCK_DGRAM)
98e399f8 688 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
689 else if (skb->pkt_type == PACKET_OUTGOING) {
690 /* Special case: outgoing packets have ll header at head */
bbe735e4 691 skb_pull(skb, skb_network_offset(skb));
1da177e4
LT
692 }
693 }
694
8dc41944
HX
695 if (skb->ip_summed == CHECKSUM_PARTIAL)
696 status |= TP_STATUS_CSUMNOTREADY;
697
1da177e4
LT
698 snaplen = skb->len;
699
dbcb5855
DM
700 res = run_filter(skb, sk, snaplen);
701 if (!res)
fda9ef5d 702 goto drop_n_restore;
dbcb5855
DM
703 if (snaplen > res)
704 snaplen = res;
1da177e4
LT
705
706 if (sk->sk_type == SOCK_DGRAM) {
8913336a
PM
707 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
708 po->tp_reserve;
1da177e4 709 } else {
bbe735e4 710 unsigned maclen = skb_network_offset(skb);
bbd6ef87 711 netoff = TPACKET_ALIGN(po->tp_hdrlen +
8913336a
PM
712 (maclen < 16 ? 16 : maclen)) +
713 po->tp_reserve;
1da177e4
LT
714 macoff = netoff - maclen;
715 }
716
69e3c75f 717 if (macoff + snaplen > po->rx_ring.frame_size) {
1da177e4
LT
718 if (po->copy_thresh &&
719 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
720 (unsigned)sk->sk_rcvbuf) {
721 if (skb_shared(skb)) {
722 copy_skb = skb_clone(skb, GFP_ATOMIC);
723 } else {
724 copy_skb = skb_get(skb);
725 skb_head = skb->data;
726 }
727 if (copy_skb)
728 skb_set_owner_r(copy_skb, sk);
729 }
69e3c75f 730 snaplen = po->rx_ring.frame_size - macoff;
1da177e4
LT
731 if ((int)snaplen < 0)
732 snaplen = 0;
733 }
1da177e4
LT
734
735 spin_lock(&sk->sk_receive_queue.lock);
69e3c75f 736 h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
bbd6ef87 737 if (!h.raw)
1da177e4 738 goto ring_is_full;
69e3c75f 739 packet_increment_head(&po->rx_ring);
1da177e4
LT
740 po->stats.tp_packets++;
741 if (copy_skb) {
742 status |= TP_STATUS_COPY;
743 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
744 }
745 if (!po->stats.tp_drops)
746 status &= ~TP_STATUS_LOSING;
747 spin_unlock(&sk->sk_receive_queue.lock);
748
bbd6ef87 749 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
1da177e4 750
bbd6ef87
PM
751 switch (po->tp_version) {
752 case TPACKET_V1:
753 h.h1->tp_len = skb->len;
754 h.h1->tp_snaplen = snaplen;
755 h.h1->tp_mac = macoff;
756 h.h1->tp_net = netoff;
614f60fa
SM
757 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
758 && shhwtstamps->syststamp.tv64)
759 tv = ktime_to_timeval(shhwtstamps->syststamp);
760 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
761 && shhwtstamps->hwtstamp.tv64)
762 tv = ktime_to_timeval(shhwtstamps->hwtstamp);
763 else if (skb->tstamp.tv64)
bbd6ef87
PM
764 tv = ktime_to_timeval(skb->tstamp);
765 else
766 do_gettimeofday(&tv);
767 h.h1->tp_sec = tv.tv_sec;
768 h.h1->tp_usec = tv.tv_usec;
769 hdrlen = sizeof(*h.h1);
770 break;
771 case TPACKET_V2:
772 h.h2->tp_len = skb->len;
773 h.h2->tp_snaplen = snaplen;
774 h.h2->tp_mac = macoff;
775 h.h2->tp_net = netoff;
614f60fa
SM
776 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
777 && shhwtstamps->syststamp.tv64)
778 ts = ktime_to_timespec(shhwtstamps->syststamp);
779 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
780 && shhwtstamps->hwtstamp.tv64)
781 ts = ktime_to_timespec(shhwtstamps->hwtstamp);
782 else if (skb->tstamp.tv64)
bbd6ef87
PM
783 ts = ktime_to_timespec(skb->tstamp);
784 else
785 getnstimeofday(&ts);
786 h.h2->tp_sec = ts.tv_sec;
787 h.h2->tp_nsec = ts.tv_nsec;
05423b24 788 h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
bbd6ef87
PM
789 hdrlen = sizeof(*h.h2);
790 break;
791 default:
792 BUG();
793 }
1da177e4 794
bbd6ef87 795 sll = h.raw + TPACKET_ALIGN(hdrlen);
b95cce35 796 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1da177e4
LT
797 sll->sll_family = AF_PACKET;
798 sll->sll_hatype = dev->type;
799 sll->sll_protocol = skb->protocol;
800 sll->sll_pkttype = skb->pkt_type;
8032b464 801 if (unlikely(po->origdev))
80feaacb
PWJ
802 sll->sll_ifindex = orig_dev->ifindex;
803 else
804 sll->sll_ifindex = dev->ifindex;
1da177e4 805
bbd6ef87 806 __packet_set_status(po, h.raw, status);
e16aa207 807 smp_mb();
f6dafa95 808#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
1da177e4 809 {
0af55bb5
CG
810 u8 *start, *end;
811
812 end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen);
813 for (start = h.raw; start < end; start += PAGE_SIZE)
814 flush_dcache_page(pgv_to_page(start));
1da177e4 815 }
f6dafa95 816#endif
1da177e4
LT
817
818 sk->sk_data_ready(sk, 0);
819
820drop_n_restore:
821 if (skb_head != skb->data && skb_shared(skb)) {
822 skb->data = skb_head;
823 skb->len = skb_len;
824 }
825drop:
1ce4f28b 826 kfree_skb(skb);
1da177e4
LT
827 return 0;
828
829ring_is_full:
830 po->stats.tp_drops++;
831 spin_unlock(&sk->sk_receive_queue.lock);
832
833 sk->sk_data_ready(sk, 0);
acb5d75b 834 kfree_skb(copy_skb);
1da177e4
LT
835 goto drop_n_restore;
836}
837
69e3c75f
JB
838static void tpacket_destruct_skb(struct sk_buff *skb)
839{
840 struct packet_sock *po = pkt_sk(skb->sk);
40d4e3df 841 void *ph;
1da177e4 842
69e3c75f 843 BUG_ON(skb == NULL);
1da177e4 844
69e3c75f
JB
845 if (likely(po->tx_ring.pg_vec)) {
846 ph = skb_shinfo(skb)->destructor_arg;
847 BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
848 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
849 atomic_dec(&po->tx_ring.pending);
850 __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
851 }
852
853 sock_wfree(skb);
854}
855
40d4e3df
ED
856static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
857 void *frame, struct net_device *dev, int size_max,
858 __be16 proto, unsigned char *addr)
69e3c75f
JB
859{
860 union {
861 struct tpacket_hdr *h1;
862 struct tpacket2_hdr *h2;
863 void *raw;
864 } ph;
865 int to_write, offset, len, tp_len, nr_frags, len_max;
866 struct socket *sock = po->sk.sk_socket;
867 struct page *page;
868 void *data;
869 int err;
870
871 ph.raw = frame;
872
873 skb->protocol = proto;
874 skb->dev = dev;
875 skb->priority = po->sk.sk_priority;
2d37a186 876 skb->mark = po->sk.sk_mark;
69e3c75f
JB
877 skb_shinfo(skb)->destructor_arg = ph.raw;
878
879 switch (po->tp_version) {
880 case TPACKET_V2:
881 tp_len = ph.h2->tp_len;
882 break;
883 default:
884 tp_len = ph.h1->tp_len;
885 break;
886 }
887 if (unlikely(tp_len > size_max)) {
40d4e3df 888 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
69e3c75f
JB
889 return -EMSGSIZE;
890 }
891
892 skb_reserve(skb, LL_RESERVED_SPACE(dev));
893 skb_reset_network_header(skb);
894
895 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
896 to_write = tp_len;
897
898 if (sock->type == SOCK_DGRAM) {
899 err = dev_hard_header(skb, dev, ntohs(proto), addr,
900 NULL, tp_len);
901 if (unlikely(err < 0))
902 return -EINVAL;
40d4e3df 903 } else if (dev->hard_header_len) {
69e3c75f
JB
904 /* net device doesn't like empty head */
905 if (unlikely(tp_len <= dev->hard_header_len)) {
40d4e3df
ED
906 pr_err("packet size is too short (%d < %d)\n",
907 tp_len, dev->hard_header_len);
69e3c75f
JB
908 return -EINVAL;
909 }
910
911 skb_push(skb, dev->hard_header_len);
912 err = skb_store_bits(skb, 0, data,
913 dev->hard_header_len);
914 if (unlikely(err))
915 return err;
916
917 data += dev->hard_header_len;
918 to_write -= dev->hard_header_len;
919 }
920
921 err = -EFAULT;
69e3c75f
JB
922 offset = offset_in_page(data);
923 len_max = PAGE_SIZE - offset;
924 len = ((to_write > len_max) ? len_max : to_write);
925
926 skb->data_len = to_write;
927 skb->len += to_write;
928 skb->truesize += to_write;
929 atomic_add(to_write, &po->sk.sk_wmem_alloc);
930
931 while (likely(to_write)) {
932 nr_frags = skb_shinfo(skb)->nr_frags;
933
934 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
40d4e3df
ED
935 pr_err("Packet exceed the number of skb frags(%lu)\n",
936 MAX_SKB_FRAGS);
69e3c75f
JB
937 return -EFAULT;
938 }
939
0af55bb5
CG
940 page = pgv_to_page(data);
941 data += len;
69e3c75f
JB
942 flush_dcache_page(page);
943 get_page(page);
0af55bb5 944 skb_fill_page_desc(skb, nr_frags, page, offset, len);
69e3c75f
JB
945 to_write -= len;
946 offset = 0;
947 len_max = PAGE_SIZE;
948 len = ((to_write > len_max) ? len_max : to_write);
949 }
950
951 return tp_len;
952}
953
954static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
955{
956 struct socket *sock;
957 struct sk_buff *skb;
958 struct net_device *dev;
959 __be16 proto;
960 int ifindex, err, reserve = 0;
40d4e3df
ED
961 void *ph;
962 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
69e3c75f
JB
963 int tp_len, size_max;
964 unsigned char *addr;
965 int len_sum = 0;
966 int status = 0;
967
968 sock = po->sk.sk_socket;
969
970 mutex_lock(&po->pg_vec_lock);
971
972 err = -EBUSY;
973 if (saddr == NULL) {
974 ifindex = po->ifindex;
975 proto = po->num;
976 addr = NULL;
977 } else {
978 err = -EINVAL;
979 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
980 goto out;
981 if (msg->msg_namelen < (saddr->sll_halen
982 + offsetof(struct sockaddr_ll,
983 sll_addr)))
984 goto out;
985 ifindex = saddr->sll_ifindex;
986 proto = saddr->sll_protocol;
987 addr = saddr->sll_addr;
988 }
989
990 dev = dev_get_by_index(sock_net(&po->sk), ifindex);
991 err = -ENXIO;
992 if (unlikely(dev == NULL))
993 goto out;
994
995 reserve = dev->hard_header_len;
996
997 err = -ENETDOWN;
998 if (unlikely(!(dev->flags & IFF_UP)))
999 goto out_put;
1000
1001 size_max = po->tx_ring.frame_size
b5dd884e 1002 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
69e3c75f
JB
1003
1004 if (size_max > dev->mtu + reserve)
1005 size_max = dev->mtu + reserve;
1006
1007 do {
1008 ph = packet_current_frame(po, &po->tx_ring,
1009 TP_STATUS_SEND_REQUEST);
1010
1011 if (unlikely(ph == NULL)) {
1012 schedule();
1013 continue;
1014 }
1015
1016 status = TP_STATUS_SEND_REQUEST;
1017 skb = sock_alloc_send_skb(&po->sk,
1018 LL_ALLOCATED_SPACE(dev)
1019 + sizeof(struct sockaddr_ll),
1020 0, &err);
1021
1022 if (unlikely(skb == NULL))
1023 goto out_status;
1024
1025 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
1026 addr);
1027
1028 if (unlikely(tp_len < 0)) {
1029 if (po->tp_loss) {
1030 __packet_set_status(po, ph,
1031 TP_STATUS_AVAILABLE);
1032 packet_increment_head(&po->tx_ring);
1033 kfree_skb(skb);
1034 continue;
1035 } else {
1036 status = TP_STATUS_WRONG_FORMAT;
1037 err = tp_len;
1038 goto out_status;
1039 }
1040 }
1041
1042 skb->destructor = tpacket_destruct_skb;
1043 __packet_set_status(po, ph, TP_STATUS_SENDING);
1044 atomic_inc(&po->tx_ring.pending);
1045
1046 status = TP_STATUS_SEND_REQUEST;
1047 err = dev_queue_xmit(skb);
eb70df13
JP
1048 if (unlikely(err > 0)) {
1049 err = net_xmit_errno(err);
1050 if (err && __packet_get_status(po, ph) ==
1051 TP_STATUS_AVAILABLE) {
1052 /* skb was destructed already */
1053 skb = NULL;
1054 goto out_status;
1055 }
1056 /*
1057 * skb was dropped but not destructed yet;
1058 * let's treat it like congestion or err < 0
1059 */
1060 err = 0;
1061 }
69e3c75f
JB
1062 packet_increment_head(&po->tx_ring);
1063 len_sum += tp_len;
f64f9e71
JP
1064 } while (likely((ph != NULL) ||
1065 ((!(msg->msg_flags & MSG_DONTWAIT)) &&
1066 (atomic_read(&po->tx_ring.pending))))
1067 );
69e3c75f
JB
1068
1069 err = len_sum;
1070 goto out_put;
1071
69e3c75f
JB
1072out_status:
1073 __packet_set_status(po, ph, status);
1074 kfree_skb(skb);
1075out_put:
1076 dev_put(dev);
1077out:
1078 mutex_unlock(&po->pg_vec_lock);
1079 return err;
1080}
69e3c75f 1081
bfd5f4a3
SS
1082static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
1083 size_t reserve, size_t len,
1084 size_t linear, int noblock,
1085 int *err)
1086{
1087 struct sk_buff *skb;
1088
1089 /* Under a page? Don't bother with paged skb. */
1090 if (prepad + len < PAGE_SIZE || !linear)
1091 linear = len;
1092
1093 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
1094 err);
1095 if (!skb)
1096 return NULL;
1097
1098 skb_reserve(skb, reserve);
1099 skb_put(skb, linear);
1100 skb->data_len = len - linear;
1101 skb->len += len - linear;
1102
1103 return skb;
1104}
1105
69e3c75f 1106static int packet_snd(struct socket *sock,
1da177e4
LT
1107 struct msghdr *msg, size_t len)
1108{
1109 struct sock *sk = sock->sk;
40d4e3df 1110 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
1da177e4
LT
1111 struct sk_buff *skb;
1112 struct net_device *dev;
0e11c91e 1113 __be16 proto;
1da177e4
LT
1114 unsigned char *addr;
1115 int ifindex, err, reserve = 0;
bfd5f4a3
SS
1116 struct virtio_net_hdr vnet_hdr = { 0 };
1117 int offset = 0;
1118 int vnet_hdr_len;
1119 struct packet_sock *po = pkt_sk(sk);
1120 unsigned short gso_type = 0;
1da177e4
LT
1121
1122 /*
1ce4f28b 1123 * Get and verify the address.
1da177e4 1124 */
1ce4f28b 1125
1da177e4 1126 if (saddr == NULL) {
1da177e4
LT
1127 ifindex = po->ifindex;
1128 proto = po->num;
1129 addr = NULL;
1130 } else {
1131 err = -EINVAL;
1132 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
1133 goto out;
0fb375fb
EB
1134 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
1135 goto out;
1da177e4
LT
1136 ifindex = saddr->sll_ifindex;
1137 proto = saddr->sll_protocol;
1138 addr = saddr->sll_addr;
1139 }
1140
1141
3b1e0a65 1142 dev = dev_get_by_index(sock_net(sk), ifindex);
1da177e4
LT
1143 err = -ENXIO;
1144 if (dev == NULL)
1145 goto out_unlock;
1146 if (sock->type == SOCK_RAW)
1147 reserve = dev->hard_header_len;
1148
d5e76b0a
DM
1149 err = -ENETDOWN;
1150 if (!(dev->flags & IFF_UP))
1151 goto out_unlock;
1152
bfd5f4a3
SS
1153 if (po->has_vnet_hdr) {
1154 vnet_hdr_len = sizeof(vnet_hdr);
1155
1156 err = -EINVAL;
1157 if (len < vnet_hdr_len)
1158 goto out_unlock;
1159
1160 len -= vnet_hdr_len;
1161
1162 err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
1163 vnet_hdr_len);
1164 if (err < 0)
1165 goto out_unlock;
1166
1167 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
1168 (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
1169 vnet_hdr.hdr_len))
1170 vnet_hdr.hdr_len = vnet_hdr.csum_start +
1171 vnet_hdr.csum_offset + 2;
1172
1173 err = -EINVAL;
1174 if (vnet_hdr.hdr_len > len)
1175 goto out_unlock;
1176
1177 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1178 switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1179 case VIRTIO_NET_HDR_GSO_TCPV4:
1180 gso_type = SKB_GSO_TCPV4;
1181 break;
1182 case VIRTIO_NET_HDR_GSO_TCPV6:
1183 gso_type = SKB_GSO_TCPV6;
1184 break;
1185 case VIRTIO_NET_HDR_GSO_UDP:
1186 gso_type = SKB_GSO_UDP;
1187 break;
1188 default:
1189 goto out_unlock;
1190 }
1191
1192 if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
1193 gso_type |= SKB_GSO_TCP_ECN;
1194
1195 if (vnet_hdr.gso_size == 0)
1196 goto out_unlock;
1197
1198 }
1199 }
1200
1da177e4 1201 err = -EMSGSIZE;
bfd5f4a3 1202 if (!gso_type && (len > dev->mtu+reserve))
1da177e4
LT
1203 goto out_unlock;
1204
bfd5f4a3
SS
1205 err = -ENOBUFS;
1206 skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev),
1207 LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len,
1208 msg->msg_flags & MSG_DONTWAIT, &err);
40d4e3df 1209 if (skb == NULL)
1da177e4
LT
1210 goto out_unlock;
1211
bfd5f4a3 1212 skb_set_network_header(skb, reserve);
1da177e4 1213
0c4e8581
SH
1214 err = -EINVAL;
1215 if (sock->type == SOCK_DGRAM &&
bfd5f4a3 1216 (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
0c4e8581 1217 goto out_free;
1da177e4
LT
1218
1219 /* Returns -EFAULT on error */
bfd5f4a3 1220 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
1da177e4
LT
1221 if (err)
1222 goto out_free;
2244d07b 1223 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
ed85b565
RC
1224 if (err < 0)
1225 goto out_free;
1da177e4
LT
1226
1227 skb->protocol = proto;
1228 skb->dev = dev;
1229 skb->priority = sk->sk_priority;
2d37a186 1230 skb->mark = sk->sk_mark;
1da177e4 1231
bfd5f4a3
SS
1232 if (po->has_vnet_hdr) {
1233 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1234 if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
1235 vnet_hdr.csum_offset)) {
1236 err = -EINVAL;
1237 goto out_free;
1238 }
1239 }
1240
1241 skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
1242 skb_shinfo(skb)->gso_type = gso_type;
1243
1244 /* Header must be checked, and gso_segs computed. */
1245 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1246 skb_shinfo(skb)->gso_segs = 0;
1247
1248 len += vnet_hdr_len;
1249 }
1250
1da177e4
LT
1251 /*
1252 * Now send it
1253 */
1254
1255 err = dev_queue_xmit(skb);
1256 if (err > 0 && (err = net_xmit_errno(err)) != 0)
1257 goto out_unlock;
1258
1259 dev_put(dev);
1260
40d4e3df 1261 return len;
1da177e4
LT
1262
1263out_free:
1264 kfree_skb(skb);
1265out_unlock:
1266 if (dev)
1267 dev_put(dev);
1268out:
1269 return err;
1270}
1271
69e3c75f
JB
1272static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
1273 struct msghdr *msg, size_t len)
1274{
69e3c75f
JB
1275 struct sock *sk = sock->sk;
1276 struct packet_sock *po = pkt_sk(sk);
1277 if (po->tx_ring.pg_vec)
1278 return tpacket_snd(po, msg);
1279 else
69e3c75f
JB
1280 return packet_snd(sock, msg, len);
1281}
1282
1da177e4
LT
1283/*
1284 * Close a PACKET socket. This is fairly simple. We immediately go
1285 * to 'closed' state and remove our protocol entry in the device list.
1286 */
1287
1288static int packet_release(struct socket *sock)
1289{
1290 struct sock *sk = sock->sk;
1291 struct packet_sock *po;
d12d01d6 1292 struct net *net;
69e3c75f 1293 struct tpacket_req req;
1da177e4
LT
1294
1295 if (!sk)
1296 return 0;
1297
3b1e0a65 1298 net = sock_net(sk);
1da177e4
LT
1299 po = pkt_sk(sk);
1300
808f5114 1301 spin_lock_bh(&net->packet.sklist_lock);
1302 sk_del_node_init_rcu(sk);
920de804 1303 sock_prot_inuse_add(net, sk->sk_prot, -1);
808f5114 1304 spin_unlock_bh(&net->packet.sklist_lock);
1da177e4 1305
808f5114 1306 spin_lock(&po->bind_lock);
1da177e4
LT
1307 if (po->running) {
1308 /*
808f5114 1309 * Remove from protocol table
1da177e4 1310 */
1da177e4
LT
1311 po->running = 0;
1312 po->num = 0;
808f5114 1313 __dev_remove_pack(&po->prot_hook);
1da177e4
LT
1314 __sock_put(sk);
1315 }
808f5114 1316 spin_unlock(&po->bind_lock);
1da177e4 1317
1da177e4 1318 packet_flush_mclist(sk);
1da177e4 1319
69e3c75f
JB
1320 memset(&req, 0, sizeof(req));
1321
1322 if (po->rx_ring.pg_vec)
1323 packet_set_ring(sk, &req, 1, 0);
1324
1325 if (po->tx_ring.pg_vec)
1326 packet_set_ring(sk, &req, 1, 1);
1da177e4 1327
808f5114 1328 synchronize_net();
1da177e4
LT
1329 /*
1330 * Now the socket is dead. No more input will appear.
1331 */
1da177e4
LT
1332 sock_orphan(sk);
1333 sock->sk = NULL;
1334
1335 /* Purge queues */
1336
1337 skb_queue_purge(&sk->sk_receive_queue);
17ab56a2 1338 sk_refcnt_debug_release(sk);
1da177e4
LT
1339
1340 sock_put(sk);
1341 return 0;
1342}
1343
1344/*
1345 * Attach a packet hook.
1346 */
1347
0e11c91e 1348static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
1da177e4
LT
1349{
1350 struct packet_sock *po = pkt_sk(sk);
1351 /*
1352 * Detach an existing hook if present.
1353 */
1354
1355 lock_sock(sk);
1356
1357 spin_lock(&po->bind_lock);
1358 if (po->running) {
1359 __sock_put(sk);
1360 po->running = 0;
1361 po->num = 0;
1362 spin_unlock(&po->bind_lock);
1363 dev_remove_pack(&po->prot_hook);
1364 spin_lock(&po->bind_lock);
1365 }
1366
1367 po->num = protocol;
1368 po->prot_hook.type = protocol;
1369 po->prot_hook.dev = dev;
1370
1371 po->ifindex = dev ? dev->ifindex : 0;
1372
1373 if (protocol == 0)
1374 goto out_unlock;
1375
be85d4ad 1376 if (!dev || (dev->flags & IFF_UP)) {
1da177e4
LT
1377 dev_add_pack(&po->prot_hook);
1378 sock_hold(sk);
1379 po->running = 1;
be85d4ad
UT
1380 } else {
1381 sk->sk_err = ENETDOWN;
1382 if (!sock_flag(sk, SOCK_DEAD))
1383 sk->sk_error_report(sk);
1da177e4
LT
1384 }
1385
1386out_unlock:
1387 spin_unlock(&po->bind_lock);
1388 release_sock(sk);
1389 return 0;
1390}
1391
1392/*
1393 * Bind a packet socket to a device
1394 */
1395
40d4e3df
ED
1396static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
1397 int addr_len)
1da177e4 1398{
40d4e3df 1399 struct sock *sk = sock->sk;
1da177e4
LT
1400 char name[15];
1401 struct net_device *dev;
1402 int err = -ENODEV;
1ce4f28b 1403
1da177e4
LT
1404 /*
1405 * Check legality
1406 */
1ce4f28b 1407
8ae55f04 1408 if (addr_len != sizeof(struct sockaddr))
1da177e4 1409 return -EINVAL;
40d4e3df 1410 strlcpy(name, uaddr->sa_data, sizeof(name));
1da177e4 1411
3b1e0a65 1412 dev = dev_get_by_name(sock_net(sk), name);
1da177e4
LT
1413 if (dev) {
1414 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
1415 dev_put(dev);
1416 }
1417 return err;
1418}
1da177e4
LT
1419
1420static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1421{
40d4e3df
ED
1422 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1423 struct sock *sk = sock->sk;
1da177e4
LT
1424 struct net_device *dev = NULL;
1425 int err;
1426
1427
1428 /*
1429 * Check legality
1430 */
1ce4f28b 1431
1da177e4
LT
1432 if (addr_len < sizeof(struct sockaddr_ll))
1433 return -EINVAL;
1434 if (sll->sll_family != AF_PACKET)
1435 return -EINVAL;
1436
1437 if (sll->sll_ifindex) {
1438 err = -ENODEV;
3b1e0a65 1439 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
1da177e4
LT
1440 if (dev == NULL)
1441 goto out;
1442 }
1443 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1444 if (dev)
1445 dev_put(dev);
1446
1447out:
1448 return err;
1449}
1450
1451static struct proto packet_proto = {
1452 .name = "PACKET",
1453 .owner = THIS_MODULE,
1454 .obj_size = sizeof(struct packet_sock),
1455};
1456
1457/*
1ce4f28b 1458 * Create a packet of type SOCK_PACKET.
1da177e4
LT
1459 */
1460
3f378b68
EP
1461static int packet_create(struct net *net, struct socket *sock, int protocol,
1462 int kern)
1da177e4
LT
1463{
1464 struct sock *sk;
1465 struct packet_sock *po;
0e11c91e 1466 __be16 proto = (__force __be16)protocol; /* weird, but documented */
1da177e4
LT
1467 int err;
1468
1469 if (!capable(CAP_NET_RAW))
1470 return -EPERM;
be02097c
DM
1471 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1472 sock->type != SOCK_PACKET)
1da177e4
LT
1473 return -ESOCKTNOSUPPORT;
1474
1475 sock->state = SS_UNCONNECTED;
1476
1477 err = -ENOBUFS;
6257ff21 1478 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
1da177e4
LT
1479 if (sk == NULL)
1480 goto out;
1481
1482 sock->ops = &packet_ops;
1da177e4
LT
1483 if (sock->type == SOCK_PACKET)
1484 sock->ops = &packet_ops_spkt;
be02097c 1485
1da177e4
LT
1486 sock_init_data(sock, sk);
1487
1488 po = pkt_sk(sk);
1489 sk->sk_family = PF_PACKET;
0e11c91e 1490 po->num = proto;
1da177e4
LT
1491
1492 sk->sk_destruct = packet_sock_destruct;
17ab56a2 1493 sk_refcnt_debug_inc(sk);
1da177e4
LT
1494
1495 /*
1496 * Attach a protocol block
1497 */
1498
1499 spin_lock_init(&po->bind_lock);
905db440 1500 mutex_init(&po->pg_vec_lock);
1da177e4 1501 po->prot_hook.func = packet_rcv;
be02097c 1502
1da177e4
LT
1503 if (sock->type == SOCK_PACKET)
1504 po->prot_hook.func = packet_rcv_spkt;
be02097c 1505
1da177e4
LT
1506 po->prot_hook.af_packet_priv = sk;
1507
0e11c91e
AV
1508 if (proto) {
1509 po->prot_hook.type = proto;
1da177e4
LT
1510 dev_add_pack(&po->prot_hook);
1511 sock_hold(sk);
1512 po->running = 1;
1513 }
1514
808f5114 1515 spin_lock_bh(&net->packet.sklist_lock);
1516 sk_add_node_rcu(sk, &net->packet.sklist);
3680453c 1517 sock_prot_inuse_add(net, &packet_proto, 1);
808f5114 1518 spin_unlock_bh(&net->packet.sklist_lock);
1519
40d4e3df 1520 return 0;
1da177e4
LT
1521out:
1522 return err;
1523}
1524
ed85b565
RC
1525static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
1526{
1527 struct sock_exterr_skb *serr;
1528 struct sk_buff *skb, *skb2;
1529 int copied, err;
1530
1531 err = -EAGAIN;
1532 skb = skb_dequeue(&sk->sk_error_queue);
1533 if (skb == NULL)
1534 goto out;
1535
1536 copied = skb->len;
1537 if (copied > len) {
1538 msg->msg_flags |= MSG_TRUNC;
1539 copied = len;
1540 }
1541 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1542 if (err)
1543 goto out_free_skb;
1544
1545 sock_recv_timestamp(msg, sk, skb);
1546
1547 serr = SKB_EXT_ERR(skb);
1548 put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
1549 sizeof(serr->ee), &serr->ee);
1550
1551 msg->msg_flags |= MSG_ERRQUEUE;
1552 err = copied;
1553
1554 /* Reset and regenerate socket error */
1555 spin_lock_bh(&sk->sk_error_queue.lock);
1556 sk->sk_err = 0;
1557 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
1558 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
1559 spin_unlock_bh(&sk->sk_error_queue.lock);
1560 sk->sk_error_report(sk);
1561 } else
1562 spin_unlock_bh(&sk->sk_error_queue.lock);
1563
1564out_free_skb:
1565 kfree_skb(skb);
1566out:
1567 return err;
1568}
1569
1da177e4
LT
1570/*
1571 * Pull a packet from our receive queue and hand it to the user.
1572 * If necessary we block.
1573 */
1574
1575static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1576 struct msghdr *msg, size_t len, int flags)
1577{
1578 struct sock *sk = sock->sk;
1579 struct sk_buff *skb;
1580 int copied, err;
0fb375fb 1581 struct sockaddr_ll *sll;
bfd5f4a3 1582 int vnet_hdr_len = 0;
1da177e4
LT
1583
1584 err = -EINVAL;
ed85b565 1585 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
1da177e4
LT
1586 goto out;
1587
1588#if 0
1589 /* What error should we return now? EUNATTACH? */
1590 if (pkt_sk(sk)->ifindex < 0)
1591 return -ENODEV;
1592#endif
1593
ed85b565
RC
1594 if (flags & MSG_ERRQUEUE) {
1595 err = packet_recv_error(sk, msg, len);
1596 goto out;
1597 }
1598
1da177e4
LT
1599 /*
1600 * Call the generic datagram receiver. This handles all sorts
1601 * of horrible races and re-entrancy so we can forget about it
1602 * in the protocol layers.
1603 *
1604 * Now it will return ENETDOWN, if device have just gone down,
1605 * but then it will block.
1606 */
1607
40d4e3df 1608 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
1da177e4
LT
1609
1610 /*
1ce4f28b 1611 * An error occurred so return it. Because skb_recv_datagram()
1da177e4
LT
1612 * handles the blocking we don't see and worry about blocking
1613 * retries.
1614 */
1615
8ae55f04 1616 if (skb == NULL)
1da177e4
LT
1617 goto out;
1618
bfd5f4a3
SS
1619 if (pkt_sk(sk)->has_vnet_hdr) {
1620 struct virtio_net_hdr vnet_hdr = { 0 };
1621
1622 err = -EINVAL;
1623 vnet_hdr_len = sizeof(vnet_hdr);
1f18b717 1624 if (len < vnet_hdr_len)
bfd5f4a3
SS
1625 goto out_free;
1626
1f18b717
MK
1627 len -= vnet_hdr_len;
1628
bfd5f4a3
SS
1629 if (skb_is_gso(skb)) {
1630 struct skb_shared_info *sinfo = skb_shinfo(skb);
1631
1632 /* This is a hint as to how much should be linear. */
1633 vnet_hdr.hdr_len = skb_headlen(skb);
1634 vnet_hdr.gso_size = sinfo->gso_size;
1635 if (sinfo->gso_type & SKB_GSO_TCPV4)
1636 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1637 else if (sinfo->gso_type & SKB_GSO_TCPV6)
1638 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1639 else if (sinfo->gso_type & SKB_GSO_UDP)
1640 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
1641 else if (sinfo->gso_type & SKB_GSO_FCOE)
1642 goto out_free;
1643 else
1644 BUG();
1645 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
1646 vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1647 } else
1648 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
1649
1650 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1651 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
55508d60 1652 vnet_hdr.csum_start = skb_checksum_start_offset(skb);
bfd5f4a3
SS
1653 vnet_hdr.csum_offset = skb->csum_offset;
1654 } /* else everything is zero */
1655
1656 err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
1657 vnet_hdr_len);
1658 if (err < 0)
1659 goto out_free;
1660 }
1661
0fb375fb
EB
1662 /*
1663 * If the address length field is there to be filled in, we fill
1664 * it in now.
1665 */
1666
ffbc6111 1667 sll = &PACKET_SKB_CB(skb)->sa.ll;
0fb375fb
EB
1668 if (sock->type == SOCK_PACKET)
1669 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1670 else
1671 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1672
1da177e4
LT
1673 /*
1674 * You lose any data beyond the buffer you gave. If it worries a
1675 * user program they can ask the device for its MTU anyway.
1676 */
1677
1678 copied = skb->len;
40d4e3df
ED
1679 if (copied > len) {
1680 copied = len;
1681 msg->msg_flags |= MSG_TRUNC;
1da177e4
LT
1682 }
1683
1684 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1685 if (err)
1686 goto out_free;
1687
3b885787 1688 sock_recv_ts_and_drops(msg, sk, skb);
1da177e4
LT
1689
1690 if (msg->msg_name)
ffbc6111
HX
1691 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1692 msg->msg_namelen);
1da177e4 1693
8dc41944 1694 if (pkt_sk(sk)->auxdata) {
ffbc6111
HX
1695 struct tpacket_auxdata aux;
1696
1697 aux.tp_status = TP_STATUS_USER;
1698 if (skb->ip_summed == CHECKSUM_PARTIAL)
1699 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1700 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1701 aux.tp_snaplen = skb->len;
1702 aux.tp_mac = 0;
bbe735e4 1703 aux.tp_net = skb_network_offset(skb);
05423b24 1704 aux.tp_vlan_tci = vlan_tx_tag_get(skb);
ffbc6111
HX
1705
1706 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
8dc41944
HX
1707 }
1708
1da177e4
LT
1709 /*
1710 * Free or return the buffer as appropriate. Again this
1711 * hides all the races and re-entrancy issues from us.
1712 */
bfd5f4a3 1713 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
1da177e4
LT
1714
1715out_free:
1716 skb_free_datagram(sk, skb);
1717out:
1718 return err;
1719}
1720
1da177e4
LT
1721static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1722 int *uaddr_len, int peer)
1723{
1724 struct net_device *dev;
1725 struct sock *sk = sock->sk;
1726
1727 if (peer)
1728 return -EOPNOTSUPP;
1729
1730 uaddr->sa_family = AF_PACKET;
654d1f8a
ED
1731 rcu_read_lock();
1732 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
1733 if (dev)
67286640 1734 strncpy(uaddr->sa_data, dev->name, 14);
654d1f8a 1735 else
1da177e4 1736 memset(uaddr->sa_data, 0, 14);
654d1f8a 1737 rcu_read_unlock();
1da177e4
LT
1738 *uaddr_len = sizeof(*uaddr);
1739
1740 return 0;
1741}
1da177e4
LT
1742
1743static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1744 int *uaddr_len, int peer)
1745{
1746 struct net_device *dev;
1747 struct sock *sk = sock->sk;
1748 struct packet_sock *po = pkt_sk(sk);
13cfa97b 1749 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
1da177e4
LT
1750
1751 if (peer)
1752 return -EOPNOTSUPP;
1753
1754 sll->sll_family = AF_PACKET;
1755 sll->sll_ifindex = po->ifindex;
1756 sll->sll_protocol = po->num;
67286640 1757 sll->sll_pkttype = 0;
654d1f8a
ED
1758 rcu_read_lock();
1759 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
1da177e4
LT
1760 if (dev) {
1761 sll->sll_hatype = dev->type;
1762 sll->sll_halen = dev->addr_len;
1763 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1da177e4
LT
1764 } else {
1765 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1766 sll->sll_halen = 0;
1767 }
654d1f8a 1768 rcu_read_unlock();
0fb375fb 1769 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1da177e4
LT
1770
1771 return 0;
1772}
1773
2aeb0b88
WC
1774static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1775 int what)
1da177e4
LT
1776{
1777 switch (i->type) {
1778 case PACKET_MR_MULTICAST:
1162563f
JP
1779 if (i->alen != dev->addr_len)
1780 return -EINVAL;
1da177e4 1781 if (what > 0)
22bedad3 1782 return dev_mc_add(dev, i->addr);
1da177e4 1783 else
22bedad3 1784 return dev_mc_del(dev, i->addr);
1da177e4
LT
1785 break;
1786 case PACKET_MR_PROMISC:
2aeb0b88 1787 return dev_set_promiscuity(dev, what);
1da177e4
LT
1788 break;
1789 case PACKET_MR_ALLMULTI:
2aeb0b88 1790 return dev_set_allmulti(dev, what);
1da177e4 1791 break;
d95ed927 1792 case PACKET_MR_UNICAST:
1162563f
JP
1793 if (i->alen != dev->addr_len)
1794 return -EINVAL;
d95ed927 1795 if (what > 0)
a748ee24 1796 return dev_uc_add(dev, i->addr);
d95ed927 1797 else
a748ee24 1798 return dev_uc_del(dev, i->addr);
d95ed927 1799 break;
40d4e3df
ED
1800 default:
1801 break;
1da177e4 1802 }
2aeb0b88 1803 return 0;
1da177e4
LT
1804}
1805
1806static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1807{
40d4e3df 1808 for ( ; i; i = i->next) {
1da177e4
LT
1809 if (i->ifindex == dev->ifindex)
1810 packet_dev_mc(dev, i, what);
1811 }
1812}
1813
0fb375fb 1814static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1da177e4
LT
1815{
1816 struct packet_sock *po = pkt_sk(sk);
1817 struct packet_mclist *ml, *i;
1818 struct net_device *dev;
1819 int err;
1820
1821 rtnl_lock();
1822
1823 err = -ENODEV;
3b1e0a65 1824 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
1da177e4
LT
1825 if (!dev)
1826 goto done;
1827
1828 err = -EINVAL;
1162563f 1829 if (mreq->mr_alen > dev->addr_len)
1da177e4
LT
1830 goto done;
1831
1832 err = -ENOBUFS;
8b3a7005 1833 i = kmalloc(sizeof(*i), GFP_KERNEL);
1da177e4
LT
1834 if (i == NULL)
1835 goto done;
1836
1837 err = 0;
1838 for (ml = po->mclist; ml; ml = ml->next) {
1839 if (ml->ifindex == mreq->mr_ifindex &&
1840 ml->type == mreq->mr_type &&
1841 ml->alen == mreq->mr_alen &&
1842 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1843 ml->count++;
1844 /* Free the new element ... */
1845 kfree(i);
1846 goto done;
1847 }
1848 }
1849
1850 i->type = mreq->mr_type;
1851 i->ifindex = mreq->mr_ifindex;
1852 i->alen = mreq->mr_alen;
1853 memcpy(i->addr, mreq->mr_address, i->alen);
1854 i->count = 1;
1855 i->next = po->mclist;
1856 po->mclist = i;
2aeb0b88
WC
1857 err = packet_dev_mc(dev, i, 1);
1858 if (err) {
1859 po->mclist = i->next;
1860 kfree(i);
1861 }
1da177e4
LT
1862
1863done:
1864 rtnl_unlock();
1865 return err;
1866}
1867
0fb375fb 1868static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1da177e4
LT
1869{
1870 struct packet_mclist *ml, **mlp;
1871
1872 rtnl_lock();
1873
1874 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1875 if (ml->ifindex == mreq->mr_ifindex &&
1876 ml->type == mreq->mr_type &&
1877 ml->alen == mreq->mr_alen &&
1878 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1879 if (--ml->count == 0) {
1880 struct net_device *dev;
1881 *mlp = ml->next;
ad959e76
ED
1882 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1883 if (dev)
1da177e4 1884 packet_dev_mc(dev, ml, -1);
1da177e4
LT
1885 kfree(ml);
1886 }
1887 rtnl_unlock();
1888 return 0;
1889 }
1890 }
1891 rtnl_unlock();
1892 return -EADDRNOTAVAIL;
1893}
1894
1895static void packet_flush_mclist(struct sock *sk)
1896{
1897 struct packet_sock *po = pkt_sk(sk);
1898 struct packet_mclist *ml;
1899
1900 if (!po->mclist)
1901 return;
1902
1903 rtnl_lock();
1904 while ((ml = po->mclist) != NULL) {
1905 struct net_device *dev;
1906
1907 po->mclist = ml->next;
ad959e76
ED
1908 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1909 if (dev != NULL)
1da177e4 1910 packet_dev_mc(dev, ml, -1);
1da177e4
LT
1911 kfree(ml);
1912 }
1913 rtnl_unlock();
1914}
1da177e4
LT
1915
1916static int
b7058842 1917packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
1918{
1919 struct sock *sk = sock->sk;
8dc41944 1920 struct packet_sock *po = pkt_sk(sk);
1da177e4
LT
1921 int ret;
1922
1923 if (level != SOL_PACKET)
1924 return -ENOPROTOOPT;
1925
69e3c75f 1926 switch (optname) {
1ce4f28b 1927 case PACKET_ADD_MEMBERSHIP:
1da177e4
LT
1928 case PACKET_DROP_MEMBERSHIP:
1929 {
0fb375fb
EB
1930 struct packet_mreq_max mreq;
1931 int len = optlen;
1932 memset(&mreq, 0, sizeof(mreq));
1933 if (len < sizeof(struct packet_mreq))
1da177e4 1934 return -EINVAL;
0fb375fb
EB
1935 if (len > sizeof(mreq))
1936 len = sizeof(mreq);
40d4e3df 1937 if (copy_from_user(&mreq, optval, len))
1da177e4 1938 return -EFAULT;
0fb375fb
EB
1939 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1940 return -EINVAL;
1da177e4
LT
1941 if (optname == PACKET_ADD_MEMBERSHIP)
1942 ret = packet_mc_add(sk, &mreq);
1943 else
1944 ret = packet_mc_drop(sk, &mreq);
1945 return ret;
1946 }
a2efcfa0 1947
1da177e4 1948 case PACKET_RX_RING:
69e3c75f 1949 case PACKET_TX_RING:
1da177e4
LT
1950 {
1951 struct tpacket_req req;
1952
40d4e3df 1953 if (optlen < sizeof(req))
1da177e4 1954 return -EINVAL;
bfd5f4a3
SS
1955 if (pkt_sk(sk)->has_vnet_hdr)
1956 return -EINVAL;
40d4e3df 1957 if (copy_from_user(&req, optval, sizeof(req)))
1da177e4 1958 return -EFAULT;
69e3c75f 1959 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
1da177e4
LT
1960 }
1961 case PACKET_COPY_THRESH:
1962 {
1963 int val;
1964
40d4e3df 1965 if (optlen != sizeof(val))
1da177e4 1966 return -EINVAL;
40d4e3df 1967 if (copy_from_user(&val, optval, sizeof(val)))
1da177e4
LT
1968 return -EFAULT;
1969
1970 pkt_sk(sk)->copy_thresh = val;
1971 return 0;
1972 }
bbd6ef87
PM
1973 case PACKET_VERSION:
1974 {
1975 int val;
1976
1977 if (optlen != sizeof(val))
1978 return -EINVAL;
69e3c75f 1979 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
bbd6ef87
PM
1980 return -EBUSY;
1981 if (copy_from_user(&val, optval, sizeof(val)))
1982 return -EFAULT;
1983 switch (val) {
1984 case TPACKET_V1:
1985 case TPACKET_V2:
1986 po->tp_version = val;
1987 return 0;
1988 default:
1989 return -EINVAL;
1990 }
1991 }
8913336a
PM
1992 case PACKET_RESERVE:
1993 {
1994 unsigned int val;
1995
1996 if (optlen != sizeof(val))
1997 return -EINVAL;
69e3c75f 1998 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
8913336a
PM
1999 return -EBUSY;
2000 if (copy_from_user(&val, optval, sizeof(val)))
2001 return -EFAULT;
2002 po->tp_reserve = val;
2003 return 0;
2004 }
69e3c75f
JB
2005 case PACKET_LOSS:
2006 {
2007 unsigned int val;
2008
2009 if (optlen != sizeof(val))
2010 return -EINVAL;
2011 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2012 return -EBUSY;
2013 if (copy_from_user(&val, optval, sizeof(val)))
2014 return -EFAULT;
2015 po->tp_loss = !!val;
2016 return 0;
2017 }
8dc41944
HX
2018 case PACKET_AUXDATA:
2019 {
2020 int val;
2021
2022 if (optlen < sizeof(val))
2023 return -EINVAL;
2024 if (copy_from_user(&val, optval, sizeof(val)))
2025 return -EFAULT;
2026
2027 po->auxdata = !!val;
2028 return 0;
2029 }
80feaacb
PWJ
2030 case PACKET_ORIGDEV:
2031 {
2032 int val;
2033
2034 if (optlen < sizeof(val))
2035 return -EINVAL;
2036 if (copy_from_user(&val, optval, sizeof(val)))
2037 return -EFAULT;
2038
2039 po->origdev = !!val;
2040 return 0;
2041 }
bfd5f4a3
SS
2042 case PACKET_VNET_HDR:
2043 {
2044 int val;
2045
2046 if (sock->type != SOCK_RAW)
2047 return -EINVAL;
2048 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2049 return -EBUSY;
2050 if (optlen < sizeof(val))
2051 return -EINVAL;
2052 if (copy_from_user(&val, optval, sizeof(val)))
2053 return -EFAULT;
2054
2055 po->has_vnet_hdr = !!val;
2056 return 0;
2057 }
614f60fa
SM
2058 case PACKET_TIMESTAMP:
2059 {
2060 int val;
2061
2062 if (optlen != sizeof(val))
2063 return -EINVAL;
2064 if (copy_from_user(&val, optval, sizeof(val)))
2065 return -EFAULT;
2066
2067 po->tp_tstamp = val;
2068 return 0;
2069 }
1da177e4
LT
2070 default:
2071 return -ENOPROTOOPT;
2072 }
2073}
2074
2075static int packet_getsockopt(struct socket *sock, int level, int optname,
2076 char __user *optval, int __user *optlen)
2077{
2078 int len;
8dc41944 2079 int val;
1da177e4
LT
2080 struct sock *sk = sock->sk;
2081 struct packet_sock *po = pkt_sk(sk);
8dc41944
HX
2082 void *data;
2083 struct tpacket_stats st;
1da177e4
LT
2084
2085 if (level != SOL_PACKET)
2086 return -ENOPROTOOPT;
2087
8ae55f04
KK
2088 if (get_user(len, optlen))
2089 return -EFAULT;
1da177e4
LT
2090
2091 if (len < 0)
2092 return -EINVAL;
1ce4f28b 2093
69e3c75f 2094 switch (optname) {
1da177e4 2095 case PACKET_STATISTICS:
1da177e4
LT
2096 if (len > sizeof(struct tpacket_stats))
2097 len = sizeof(struct tpacket_stats);
2098 spin_lock_bh(&sk->sk_receive_queue.lock);
2099 st = po->stats;
2100 memset(&po->stats, 0, sizeof(st));
2101 spin_unlock_bh(&sk->sk_receive_queue.lock);
2102 st.tp_packets += st.tp_drops;
2103
8dc41944
HX
2104 data = &st;
2105 break;
2106 case PACKET_AUXDATA:
2107 if (len > sizeof(int))
2108 len = sizeof(int);
2109 val = po->auxdata;
2110
80feaacb
PWJ
2111 data = &val;
2112 break;
2113 case PACKET_ORIGDEV:
2114 if (len > sizeof(int))
2115 len = sizeof(int);
2116 val = po->origdev;
2117
bfd5f4a3
SS
2118 data = &val;
2119 break;
2120 case PACKET_VNET_HDR:
2121 if (len > sizeof(int))
2122 len = sizeof(int);
2123 val = po->has_vnet_hdr;
2124
8dc41944 2125 data = &val;
1da177e4 2126 break;
bbd6ef87
PM
2127 case PACKET_VERSION:
2128 if (len > sizeof(int))
2129 len = sizeof(int);
2130 val = po->tp_version;
2131 data = &val;
2132 break;
2133 case PACKET_HDRLEN:
2134 if (len > sizeof(int))
2135 len = sizeof(int);
2136 if (copy_from_user(&val, optval, len))
2137 return -EFAULT;
2138 switch (val) {
2139 case TPACKET_V1:
2140 val = sizeof(struct tpacket_hdr);
2141 break;
2142 case TPACKET_V2:
2143 val = sizeof(struct tpacket2_hdr);
2144 break;
2145 default:
2146 return -EINVAL;
2147 }
2148 data = &val;
2149 break;
8913336a
PM
2150 case PACKET_RESERVE:
2151 if (len > sizeof(unsigned int))
2152 len = sizeof(unsigned int);
2153 val = po->tp_reserve;
2154 data = &val;
2155 break;
69e3c75f
JB
2156 case PACKET_LOSS:
2157 if (len > sizeof(unsigned int))
2158 len = sizeof(unsigned int);
2159 val = po->tp_loss;
2160 data = &val;
2161 break;
614f60fa
SM
2162 case PACKET_TIMESTAMP:
2163 if (len > sizeof(int))
2164 len = sizeof(int);
2165 val = po->tp_tstamp;
2166 data = &val;
2167 break;
1da177e4
LT
2168 default:
2169 return -ENOPROTOOPT;
2170 }
2171
8ae55f04
KK
2172 if (put_user(len, optlen))
2173 return -EFAULT;
8dc41944
HX
2174 if (copy_to_user(optval, data, len))
2175 return -EFAULT;
8ae55f04 2176 return 0;
1da177e4
LT
2177}
2178
2179
2180static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
2181{
2182 struct sock *sk;
2183 struct hlist_node *node;
ad930650 2184 struct net_device *dev = data;
c346dca1 2185 struct net *net = dev_net(dev);
1da177e4 2186
808f5114 2187 rcu_read_lock();
2188 sk_for_each_rcu(sk, node, &net->packet.sklist) {
1da177e4
LT
2189 struct packet_sock *po = pkt_sk(sk);
2190
2191 switch (msg) {
2192 case NETDEV_UNREGISTER:
1da177e4
LT
2193 if (po->mclist)
2194 packet_dev_mclist(dev, po->mclist, -1);
a2efcfa0
DM
2195 /* fallthrough */
2196
1da177e4
LT
2197 case NETDEV_DOWN:
2198 if (dev->ifindex == po->ifindex) {
2199 spin_lock(&po->bind_lock);
2200 if (po->running) {
2201 __dev_remove_pack(&po->prot_hook);
2202 __sock_put(sk);
2203 po->running = 0;
2204 sk->sk_err = ENETDOWN;
2205 if (!sock_flag(sk, SOCK_DEAD))
2206 sk->sk_error_report(sk);
2207 }
2208 if (msg == NETDEV_UNREGISTER) {
2209 po->ifindex = -1;
2210 po->prot_hook.dev = NULL;
2211 }
2212 spin_unlock(&po->bind_lock);
2213 }
2214 break;
2215 case NETDEV_UP:
808f5114 2216 if (dev->ifindex == po->ifindex) {
2217 spin_lock(&po->bind_lock);
2218 if (po->num && !po->running) {
2219 dev_add_pack(&po->prot_hook);
2220 sock_hold(sk);
2221 po->running = 1;
2222 }
2223 spin_unlock(&po->bind_lock);
1da177e4 2224 }
1da177e4
LT
2225 break;
2226 }
2227 }
808f5114 2228 rcu_read_unlock();
1da177e4
LT
2229 return NOTIFY_DONE;
2230}
2231
2232
2233static int packet_ioctl(struct socket *sock, unsigned int cmd,
2234 unsigned long arg)
2235{
2236 struct sock *sk = sock->sk;
2237
69e3c75f 2238 switch (cmd) {
40d4e3df
ED
2239 case SIOCOUTQ:
2240 {
2241 int amount = sk_wmem_alloc_get(sk);
31e6d363 2242
40d4e3df
ED
2243 return put_user(amount, (int __user *)arg);
2244 }
2245 case SIOCINQ:
2246 {
2247 struct sk_buff *skb;
2248 int amount = 0;
2249
2250 spin_lock_bh(&sk->sk_receive_queue.lock);
2251 skb = skb_peek(&sk->sk_receive_queue);
2252 if (skb)
2253 amount = skb->len;
2254 spin_unlock_bh(&sk->sk_receive_queue.lock);
2255 return put_user(amount, (int __user *)arg);
2256 }
2257 case SIOCGSTAMP:
2258 return sock_get_timestamp(sk, (struct timeval __user *)arg);
2259 case SIOCGSTAMPNS:
2260 return sock_get_timestampns(sk, (struct timespec __user *)arg);
1ce4f28b 2261
1da177e4 2262#ifdef CONFIG_INET
40d4e3df
ED
2263 case SIOCADDRT:
2264 case SIOCDELRT:
2265 case SIOCDARP:
2266 case SIOCGARP:
2267 case SIOCSARP:
2268 case SIOCGIFADDR:
2269 case SIOCSIFADDR:
2270 case SIOCGIFBRDADDR:
2271 case SIOCSIFBRDADDR:
2272 case SIOCGIFNETMASK:
2273 case SIOCSIFNETMASK:
2274 case SIOCGIFDSTADDR:
2275 case SIOCSIFDSTADDR:
2276 case SIOCSIFFLAGS:
40d4e3df 2277 return inet_dgram_ops.ioctl(sock, cmd, arg);
1da177e4
LT
2278#endif
2279
40d4e3df
ED
2280 default:
2281 return -ENOIOCTLCMD;
1da177e4
LT
2282 }
2283 return 0;
2284}
2285
40d4e3df 2286static unsigned int packet_poll(struct file *file, struct socket *sock,
1da177e4
LT
2287 poll_table *wait)
2288{
2289 struct sock *sk = sock->sk;
2290 struct packet_sock *po = pkt_sk(sk);
2291 unsigned int mask = datagram_poll(file, sock, wait);
2292
2293 spin_lock_bh(&sk->sk_receive_queue.lock);
69e3c75f
JB
2294 if (po->rx_ring.pg_vec) {
2295 if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
1da177e4
LT
2296 mask |= POLLIN | POLLRDNORM;
2297 }
2298 spin_unlock_bh(&sk->sk_receive_queue.lock);
69e3c75f
JB
2299 spin_lock_bh(&sk->sk_write_queue.lock);
2300 if (po->tx_ring.pg_vec) {
2301 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
2302 mask |= POLLOUT | POLLWRNORM;
2303 }
2304 spin_unlock_bh(&sk->sk_write_queue.lock);
1da177e4
LT
2305 return mask;
2306}
2307
2308
2309/* Dirty? Well, I still did not learn better way to account
2310 * for user mmaps.
2311 */
2312
2313static void packet_mm_open(struct vm_area_struct *vma)
2314{
2315 struct file *file = vma->vm_file;
40d4e3df 2316 struct socket *sock = file->private_data;
1da177e4 2317 struct sock *sk = sock->sk;
1ce4f28b 2318
1da177e4
LT
2319 if (sk)
2320 atomic_inc(&pkt_sk(sk)->mapped);
2321}
2322
2323static void packet_mm_close(struct vm_area_struct *vma)
2324{
2325 struct file *file = vma->vm_file;
40d4e3df 2326 struct socket *sock = file->private_data;
1da177e4 2327 struct sock *sk = sock->sk;
1ce4f28b 2328
1da177e4
LT
2329 if (sk)
2330 atomic_dec(&pkt_sk(sk)->mapped);
2331}
2332
f0f37e2f 2333static const struct vm_operations_struct packet_mmap_ops = {
40d4e3df
ED
2334 .open = packet_mm_open,
2335 .close = packet_mm_close,
1da177e4
LT
2336};
2337
0e3125c7
NH
2338static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
2339 unsigned int len)
1da177e4
LT
2340{
2341 int i;
2342
4ebf0ae2 2343 for (i = 0; i < len; i++) {
0e3125c7 2344 if (likely(pg_vec[i].buffer)) {
c56b4d90 2345 if (is_vmalloc_addr(pg_vec[i].buffer))
0e3125c7
NH
2346 vfree(pg_vec[i].buffer);
2347 else
2348 free_pages((unsigned long)pg_vec[i].buffer,
2349 order);
2350 pg_vec[i].buffer = NULL;
2351 }
1da177e4
LT
2352 }
2353 kfree(pg_vec);
2354}
2355
c56b4d90 2356static inline char *alloc_one_pg_vec_page(unsigned long order)
4ebf0ae2 2357{
0e3125c7
NH
2358 char *buffer = NULL;
2359 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
2360 __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
2361
2362 buffer = (char *) __get_free_pages(gfp_flags, order);
2363
2364 if (buffer)
2365 return buffer;
2366
2367 /*
2368 * __get_free_pages failed, fall back to vmalloc
2369 */
bbce5a59 2370 buffer = vzalloc((1 << order) * PAGE_SIZE);
719bfeaa 2371
0e3125c7
NH
2372 if (buffer)
2373 return buffer;
2374
2375 /*
2376 * vmalloc failed, lets dig into swap here
2377 */
0e3125c7
NH
2378 gfp_flags &= ~__GFP_NORETRY;
2379 buffer = (char *)__get_free_pages(gfp_flags, order);
2380 if (buffer)
2381 return buffer;
2382
2383 /*
2384 * complete and utter failure
2385 */
2386 return NULL;
4ebf0ae2
DM
2387}
2388
0e3125c7 2389static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
4ebf0ae2
DM
2390{
2391 unsigned int block_nr = req->tp_block_nr;
0e3125c7 2392 struct pgv *pg_vec;
4ebf0ae2
DM
2393 int i;
2394
0e3125c7 2395 pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
4ebf0ae2
DM
2396 if (unlikely(!pg_vec))
2397 goto out;
2398
2399 for (i = 0; i < block_nr; i++) {
c56b4d90 2400 pg_vec[i].buffer = alloc_one_pg_vec_page(order);
0e3125c7 2401 if (unlikely(!pg_vec[i].buffer))
4ebf0ae2
DM
2402 goto out_free_pgvec;
2403 }
2404
2405out:
2406 return pg_vec;
2407
2408out_free_pgvec:
2409 free_pg_vec(pg_vec, order, block_nr);
2410 pg_vec = NULL;
2411 goto out;
2412}
1da177e4 2413
69e3c75f
JB
2414static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2415 int closing, int tx_ring)
1da177e4 2416{
0e3125c7 2417 struct pgv *pg_vec = NULL;
1da177e4 2418 struct packet_sock *po = pkt_sk(sk);
0e11c91e 2419 int was_running, order = 0;
69e3c75f
JB
2420 struct packet_ring_buffer *rb;
2421 struct sk_buff_head *rb_queue;
0e11c91e 2422 __be16 num;
69e3c75f 2423 int err;
1ce4f28b 2424
69e3c75f
JB
2425 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
2426 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
1da177e4 2427
69e3c75f
JB
2428 err = -EBUSY;
2429 if (!closing) {
2430 if (atomic_read(&po->mapped))
2431 goto out;
2432 if (atomic_read(&rb->pending))
2433 goto out;
2434 }
1da177e4 2435
69e3c75f
JB
2436 if (req->tp_block_nr) {
2437 /* Sanity tests and some calculations */
2438 err = -EBUSY;
2439 if (unlikely(rb->pg_vec))
2440 goto out;
1da177e4 2441
bbd6ef87
PM
2442 switch (po->tp_version) {
2443 case TPACKET_V1:
2444 po->tp_hdrlen = TPACKET_HDRLEN;
2445 break;
2446 case TPACKET_V2:
2447 po->tp_hdrlen = TPACKET2_HDRLEN;
2448 break;
2449 }
2450
69e3c75f 2451 err = -EINVAL;
4ebf0ae2 2452 if (unlikely((int)req->tp_block_size <= 0))
69e3c75f 2453 goto out;
4ebf0ae2 2454 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
69e3c75f 2455 goto out;
8913336a 2456 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
69e3c75f
JB
2457 po->tp_reserve))
2458 goto out;
4ebf0ae2 2459 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
69e3c75f 2460 goto out;
1da177e4 2461
69e3c75f
JB
2462 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
2463 if (unlikely(rb->frames_per_block <= 0))
2464 goto out;
2465 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
2466 req->tp_frame_nr))
2467 goto out;
1da177e4
LT
2468
2469 err = -ENOMEM;
4ebf0ae2
DM
2470 order = get_order(req->tp_block_size);
2471 pg_vec = alloc_pg_vec(req, order);
2472 if (unlikely(!pg_vec))
1da177e4 2473 goto out;
69e3c75f
JB
2474 }
2475 /* Done */
2476 else {
2477 err = -EINVAL;
4ebf0ae2 2478 if (unlikely(req->tp_frame_nr))
69e3c75f 2479 goto out;
1da177e4
LT
2480 }
2481
2482 lock_sock(sk);
2483
2484 /* Detach socket from network */
2485 spin_lock(&po->bind_lock);
2486 was_running = po->running;
2487 num = po->num;
2488 if (was_running) {
2489 __dev_remove_pack(&po->prot_hook);
2490 po->num = 0;
2491 po->running = 0;
2492 __sock_put(sk);
2493 }
2494 spin_unlock(&po->bind_lock);
1ce4f28b 2495
1da177e4
LT
2496 synchronize_net();
2497
2498 err = -EBUSY;
905db440 2499 mutex_lock(&po->pg_vec_lock);
1da177e4
LT
2500 if (closing || atomic_read(&po->mapped) == 0) {
2501 err = 0;
69e3c75f 2502 spin_lock_bh(&rb_queue->lock);
c053fd96 2503 swap(rb->pg_vec, pg_vec);
69e3c75f
JB
2504 rb->frame_max = (req->tp_frame_nr - 1);
2505 rb->head = 0;
2506 rb->frame_size = req->tp_frame_size;
2507 spin_unlock_bh(&rb_queue->lock);
2508
c053fd96
CG
2509 swap(rb->pg_vec_order, order);
2510 swap(rb->pg_vec_len, req->tp_block_nr);
69e3c75f
JB
2511
2512 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
2513 po->prot_hook.func = (po->rx_ring.pg_vec) ?
2514 tpacket_rcv : packet_rcv;
2515 skb_queue_purge(rb_queue);
1da177e4 2516 if (atomic_read(&po->mapped))
40d4e3df
ED
2517 pr_err("packet_mmap: vma is busy: %d\n",
2518 atomic_read(&po->mapped));
1da177e4 2519 }
905db440 2520 mutex_unlock(&po->pg_vec_lock);
1da177e4
LT
2521
2522 spin_lock(&po->bind_lock);
2523 if (was_running && !po->running) {
2524 sock_hold(sk);
2525 po->running = 1;
2526 po->num = num;
2527 dev_add_pack(&po->prot_hook);
2528 }
2529 spin_unlock(&po->bind_lock);
2530
2531 release_sock(sk);
2532
1da177e4
LT
2533 if (pg_vec)
2534 free_pg_vec(pg_vec, order, req->tp_block_nr);
2535out:
2536 return err;
2537}
2538
69e3c75f
JB
2539static int packet_mmap(struct file *file, struct socket *sock,
2540 struct vm_area_struct *vma)
1da177e4
LT
2541{
2542 struct sock *sk = sock->sk;
2543 struct packet_sock *po = pkt_sk(sk);
69e3c75f
JB
2544 unsigned long size, expected_size;
2545 struct packet_ring_buffer *rb;
1da177e4
LT
2546 unsigned long start;
2547 int err = -EINVAL;
2548 int i;
2549
2550 if (vma->vm_pgoff)
2551 return -EINVAL;
2552
905db440 2553 mutex_lock(&po->pg_vec_lock);
69e3c75f
JB
2554
2555 expected_size = 0;
2556 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2557 if (rb->pg_vec) {
2558 expected_size += rb->pg_vec_len
2559 * rb->pg_vec_pages
2560 * PAGE_SIZE;
2561 }
2562 }
2563
2564 if (expected_size == 0)
1da177e4 2565 goto out;
69e3c75f
JB
2566
2567 size = vma->vm_end - vma->vm_start;
2568 if (size != expected_size)
1da177e4
LT
2569 goto out;
2570
1da177e4 2571 start = vma->vm_start;
69e3c75f
JB
2572 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2573 if (rb->pg_vec == NULL)
2574 continue;
2575
2576 for (i = 0; i < rb->pg_vec_len; i++) {
0e3125c7
NH
2577 struct page *page;
2578 void *kaddr = rb->pg_vec[i].buffer;
69e3c75f
JB
2579 int pg_num;
2580
c56b4d90
CG
2581 for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
2582 page = pgv_to_page(kaddr);
69e3c75f
JB
2583 err = vm_insert_page(vma, start, page);
2584 if (unlikely(err))
2585 goto out;
2586 start += PAGE_SIZE;
0e3125c7 2587 kaddr += PAGE_SIZE;
69e3c75f 2588 }
4ebf0ae2 2589 }
1da177e4 2590 }
69e3c75f 2591
4ebf0ae2 2592 atomic_inc(&po->mapped);
1da177e4
LT
2593 vma->vm_ops = &packet_mmap_ops;
2594 err = 0;
2595
2596out:
905db440 2597 mutex_unlock(&po->pg_vec_lock);
1da177e4
LT
2598 return err;
2599}
1da177e4 2600
90ddc4f0 2601static const struct proto_ops packet_ops_spkt = {
1da177e4
LT
2602 .family = PF_PACKET,
2603 .owner = THIS_MODULE,
2604 .release = packet_release,
2605 .bind = packet_bind_spkt,
2606 .connect = sock_no_connect,
2607 .socketpair = sock_no_socketpair,
2608 .accept = sock_no_accept,
2609 .getname = packet_getname_spkt,
2610 .poll = datagram_poll,
2611 .ioctl = packet_ioctl,
2612 .listen = sock_no_listen,
2613 .shutdown = sock_no_shutdown,
2614 .setsockopt = sock_no_setsockopt,
2615 .getsockopt = sock_no_getsockopt,
2616 .sendmsg = packet_sendmsg_spkt,
2617 .recvmsg = packet_recvmsg,
2618 .mmap = sock_no_mmap,
2619 .sendpage = sock_no_sendpage,
2620};
1da177e4 2621
90ddc4f0 2622static const struct proto_ops packet_ops = {
1da177e4
LT
2623 .family = PF_PACKET,
2624 .owner = THIS_MODULE,
2625 .release = packet_release,
2626 .bind = packet_bind,
2627 .connect = sock_no_connect,
2628 .socketpair = sock_no_socketpair,
2629 .accept = sock_no_accept,
1ce4f28b 2630 .getname = packet_getname,
1da177e4
LT
2631 .poll = packet_poll,
2632 .ioctl = packet_ioctl,
2633 .listen = sock_no_listen,
2634 .shutdown = sock_no_shutdown,
2635 .setsockopt = packet_setsockopt,
2636 .getsockopt = packet_getsockopt,
2637 .sendmsg = packet_sendmsg,
2638 .recvmsg = packet_recvmsg,
2639 .mmap = packet_mmap,
2640 .sendpage = sock_no_sendpage,
2641};
2642
ec1b4cf7 2643static const struct net_proto_family packet_family_ops = {
1da177e4
LT
2644 .family = PF_PACKET,
2645 .create = packet_create,
2646 .owner = THIS_MODULE,
2647};
2648
2649static struct notifier_block packet_netdev_notifier = {
40d4e3df 2650 .notifier_call = packet_notifier,
1da177e4
LT
2651};
2652
2653#ifdef CONFIG_PROC_FS
1da177e4
LT
2654
2655static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
808f5114 2656 __acquires(RCU)
1da177e4 2657{
e372c414 2658 struct net *net = seq_file_net(seq);
808f5114 2659
2660 rcu_read_lock();
2661 return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
1da177e4
LT
2662}
2663
2664static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2665{
1bf40954 2666 struct net *net = seq_file_net(seq);
808f5114 2667 return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
1da177e4
LT
2668}
2669
2670static void packet_seq_stop(struct seq_file *seq, void *v)
808f5114 2671 __releases(RCU)
1da177e4 2672{
808f5114 2673 rcu_read_unlock();
1da177e4
LT
2674}
2675
1ce4f28b 2676static int packet_seq_show(struct seq_file *seq, void *v)
1da177e4
LT
2677{
2678 if (v == SEQ_START_TOKEN)
2679 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2680 else {
b7ceabd9 2681 struct sock *s = sk_entry(v);
1da177e4
LT
2682 const struct packet_sock *po = pkt_sk(s);
2683
2684 seq_printf(seq,
2685 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
2686 s,
2687 atomic_read(&s->sk_refcnt),
2688 s->sk_type,
2689 ntohs(po->num),
2690 po->ifindex,
2691 po->running,
2692 atomic_read(&s->sk_rmem_alloc),
2693 sock_i_uid(s),
40d4e3df 2694 sock_i_ino(s));
1da177e4
LT
2695 }
2696
2697 return 0;
2698}
2699
56b3d975 2700static const struct seq_operations packet_seq_ops = {
1da177e4
LT
2701 .start = packet_seq_start,
2702 .next = packet_seq_next,
2703 .stop = packet_seq_stop,
2704 .show = packet_seq_show,
2705};
2706
2707static int packet_seq_open(struct inode *inode, struct file *file)
2708{
e372c414
DL
2709 return seq_open_net(inode, file, &packet_seq_ops,
2710 sizeof(struct seq_net_private));
1da177e4
LT
2711}
2712
da7071d7 2713static const struct file_operations packet_seq_fops = {
1da177e4
LT
2714 .owner = THIS_MODULE,
2715 .open = packet_seq_open,
2716 .read = seq_read,
2717 .llseek = seq_lseek,
e372c414 2718 .release = seq_release_net,
1da177e4
LT
2719};
2720
2721#endif
2722
2c8c1e72 2723static int __net_init packet_net_init(struct net *net)
d12d01d6 2724{
808f5114 2725 spin_lock_init(&net->packet.sklist_lock);
2aaef4e4 2726 INIT_HLIST_HEAD(&net->packet.sklist);
d12d01d6
DL
2727
2728 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2729 return -ENOMEM;
2730
2731 return 0;
2732}
2733
2c8c1e72 2734static void __net_exit packet_net_exit(struct net *net)
d12d01d6
DL
2735{
2736 proc_net_remove(net, "packet");
2737}
2738
2739static struct pernet_operations packet_net_ops = {
2740 .init = packet_net_init,
2741 .exit = packet_net_exit,
2742};
2743
2744
1da177e4
LT
2745static void __exit packet_exit(void)
2746{
1da177e4 2747 unregister_netdevice_notifier(&packet_netdev_notifier);
d12d01d6 2748 unregister_pernet_subsys(&packet_net_ops);
1da177e4
LT
2749 sock_unregister(PF_PACKET);
2750 proto_unregister(&packet_proto);
2751}
2752
2753static int __init packet_init(void)
2754{
2755 int rc = proto_register(&packet_proto, 0);
2756
2757 if (rc != 0)
2758 goto out;
2759
2760 sock_register(&packet_family_ops);
d12d01d6 2761 register_pernet_subsys(&packet_net_ops);
1da177e4 2762 register_netdevice_notifier(&packet_netdev_notifier);
1da177e4
LT
2763out:
2764 return rc;
2765}
2766
2767module_init(packet_init);
2768module_exit(packet_exit);
2769MODULE_LICENSE("GPL");
2770MODULE_ALIAS_NETPROTO(PF_PACKET);