]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/packet/af_packet.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
[mirror_ubuntu-artful-kernel.git] / net / packet / af_packet.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
02c30a84 8 * Authors: Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 *
1ce4f28b 12 * Fixes:
1da177e4
LT
13 * Alan Cox : verify_area() now used correctly
14 * Alan Cox : new skbuff lists, look ma no backlogs!
15 * Alan Cox : tidied skbuff lists.
16 * Alan Cox : Now uses generic datagram routines I
17 * added. Also fixed the peek/read crash
18 * from all old Linux datagram code.
19 * Alan Cox : Uses the improved datagram code.
20 * Alan Cox : Added NULL's for socket options.
21 * Alan Cox : Re-commented the code.
22 * Alan Cox : Use new kernel side addressing
23 * Rob Janssen : Correct MTU usage.
24 * Dave Platt : Counter leaks caused by incorrect
25 * interrupt locking and some slightly
26 * dubious gcc output. Can you read
27 * compiler: it said _VOLATILE_
28 * Richard Kooijman : Timestamp fixes.
29 * Alan Cox : New buffers. Use sk->mac.raw.
30 * Alan Cox : sendmsg/recvmsg support.
31 * Alan Cox : Protocol setting support
32 * Alexey Kuznetsov : Untied from IPv4 stack.
33 * Cyrus Durgin : Fixed kerneld for kmod.
34 * Michal Ostrowski : Module initialization cleanup.
1ce4f28b 35 * Ulises Alonso : Frame number limit removal and
1da177e4 36 * packet_set_ring memory leak.
0fb375fb
EB
37 * Eric Biederman : Allow for > 8 byte hardware addresses.
38 * The convention is that longer addresses
39 * will simply extend the hardware address
1ce4f28b 40 * byte arrays at the end of sockaddr_ll
0fb375fb 41 * and packet_mreq.
69e3c75f 42 * Johann Baudy : Added TX RING.
1da177e4
LT
43 *
44 * This program is free software; you can redistribute it and/or
45 * modify it under the terms of the GNU General Public License
46 * as published by the Free Software Foundation; either version
47 * 2 of the License, or (at your option) any later version.
48 *
49 */
1ce4f28b 50
1da177e4 51#include <linux/types.h>
1da177e4 52#include <linux/mm.h>
4fc268d2 53#include <linux/capability.h>
1da177e4
LT
54#include <linux/fcntl.h>
55#include <linux/socket.h>
56#include <linux/in.h>
57#include <linux/inet.h>
58#include <linux/netdevice.h>
59#include <linux/if_packet.h>
60#include <linux/wireless.h>
ffbc6111 61#include <linux/kernel.h>
1da177e4 62#include <linux/kmod.h>
5a0e3ad6 63#include <linux/slab.h>
0e3125c7 64#include <linux/vmalloc.h>
457c4cbc 65#include <net/net_namespace.h>
1da177e4
LT
66#include <net/ip.h>
67#include <net/protocol.h>
68#include <linux/skbuff.h>
69#include <net/sock.h>
70#include <linux/errno.h>
71#include <linux/timer.h>
72#include <asm/system.h>
73#include <asm/uaccess.h>
74#include <asm/ioctls.h>
75#include <asm/page.h>
a1f8e7f7 76#include <asm/cacheflush.h>
1da177e4
LT
77#include <asm/io.h>
78#include <linux/proc_fs.h>
79#include <linux/seq_file.h>
80#include <linux/poll.h>
81#include <linux/module.h>
82#include <linux/init.h>
905db440 83#include <linux/mutex.h>
05423b24 84#include <linux/if_vlan.h>
bfd5f4a3 85#include <linux/virtio_net.h>
ed85b565 86#include <linux/errqueue.h>
614f60fa 87#include <linux/net_tstamp.h>
1da177e4
LT
88
89#ifdef CONFIG_INET
90#include <net/inet_common.h>
91#endif
92
1da177e4
LT
93/*
94 Assumptions:
95 - if device has no dev->hard_header routine, it adds and removes ll header
96 inside itself. In this case ll header is invisible outside of device,
97 but higher levels still should reserve dev->hard_header_len.
98 Some devices are enough clever to reallocate skb, when header
99 will not fit to reserved space (tunnel), another ones are silly
100 (PPP).
101 - packet socket receives packets with pulled ll header,
102 so that SOCK_RAW should push it back.
103
104On receive:
105-----------
106
107Incoming, dev->hard_header!=NULL
b0e380b1
ACM
108 mac_header -> ll header
109 data -> data
1da177e4
LT
110
111Outgoing, dev->hard_header!=NULL
b0e380b1
ACM
112 mac_header -> ll header
113 data -> ll header
1da177e4
LT
114
115Incoming, dev->hard_header==NULL
b0e380b1
ACM
116 mac_header -> UNKNOWN position. It is very likely, that it points to ll
117 header. PPP makes it, that is wrong, because introduce
db0c58f9 118 assymetry between rx and tx paths.
b0e380b1 119 data -> data
1da177e4
LT
120
121Outgoing, dev->hard_header==NULL
b0e380b1
ACM
122 mac_header -> data. ll header is still not built!
123 data -> data
1da177e4
LT
124
125Resume
126 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
127
128
129On transmit:
130------------
131
132dev->hard_header != NULL
b0e380b1
ACM
133 mac_header -> ll header
134 data -> ll header
1da177e4
LT
135
136dev->hard_header == NULL (ll header is added by device, we cannot control it)
b0e380b1
ACM
137 mac_header -> data
138 data -> data
1da177e4
LT
139
140 We should set nh.raw on output to correct posistion,
141 packet classifier depends on it.
142 */
143
1da177e4
LT
144/* Private packet socket structures. */
145
40d4e3df 146struct packet_mclist {
1da177e4
LT
147 struct packet_mclist *next;
148 int ifindex;
149 int count;
150 unsigned short type;
151 unsigned short alen;
0fb375fb
EB
152 unsigned char addr[MAX_ADDR_LEN];
153};
154/* identical to struct packet_mreq except it has
155 * a longer address field.
156 */
40d4e3df 157struct packet_mreq_max {
0fb375fb
EB
158 int mr_ifindex;
159 unsigned short mr_type;
160 unsigned short mr_alen;
161 unsigned char mr_address[MAX_ADDR_LEN];
1da177e4 162};
a2efcfa0 163
69e3c75f
JB
164static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
165 int closing, int tx_ring);
166
0e3125c7
NH
167#define PGV_FROM_VMALLOC 1
168struct pgv {
169 char *buffer;
0e3125c7
NH
170};
171
69e3c75f 172struct packet_ring_buffer {
0e3125c7 173 struct pgv *pg_vec;
69e3c75f
JB
174 unsigned int head;
175 unsigned int frames_per_block;
176 unsigned int frame_size;
177 unsigned int frame_max;
178
179 unsigned int pg_vec_order;
180 unsigned int pg_vec_pages;
181 unsigned int pg_vec_len;
182
183 atomic_t pending;
184};
185
186struct packet_sock;
187static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
1da177e4
LT
188
189static void packet_flush_mclist(struct sock *sk);
190
191struct packet_sock {
192 /* struct sock has to be the first member of packet_sock */
193 struct sock sk;
194 struct tpacket_stats stats;
69e3c75f
JB
195 struct packet_ring_buffer rx_ring;
196 struct packet_ring_buffer tx_ring;
1da177e4 197 int copy_thresh;
1da177e4 198 spinlock_t bind_lock;
905db440 199 struct mutex pg_vec_lock;
8dc41944 200 unsigned int running:1, /* prot_hook is attached*/
80feaacb 201 auxdata:1,
bfd5f4a3
SS
202 origdev:1,
203 has_vnet_hdr:1;
1da177e4 204 int ifindex; /* bound device */
0e11c91e 205 __be16 num;
1da177e4 206 struct packet_mclist *mclist;
1da177e4 207 atomic_t mapped;
bbd6ef87
PM
208 enum tpacket_versions tp_version;
209 unsigned int tp_hdrlen;
8913336a 210 unsigned int tp_reserve;
69e3c75f 211 unsigned int tp_loss:1;
614f60fa 212 unsigned int tp_tstamp;
94b05952 213 struct packet_type prot_hook ____cacheline_aligned_in_smp;
1da177e4
LT
214};
215
ffbc6111
HX
216struct packet_skb_cb {
217 unsigned int origlen;
218 union {
219 struct sockaddr_pkt pkt;
220 struct sockaddr_ll ll;
221 } sa;
222};
223
224#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
8dc41944 225
f6dafa95 226static inline __pure struct page *pgv_to_page(void *addr)
0af55bb5
CG
227{
228 if (is_vmalloc_addr(addr))
229 return vmalloc_to_page(addr);
230 return virt_to_page(addr);
231}
232
69e3c75f 233static void __packet_set_status(struct packet_sock *po, void *frame, int status)
1da177e4 234{
bbd6ef87
PM
235 union {
236 struct tpacket_hdr *h1;
237 struct tpacket2_hdr *h2;
238 void *raw;
239 } h;
1da177e4 240
69e3c75f 241 h.raw = frame;
bbd6ef87
PM
242 switch (po->tp_version) {
243 case TPACKET_V1:
69e3c75f 244 h.h1->tp_status = status;
0af55bb5 245 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
bbd6ef87
PM
246 break;
247 case TPACKET_V2:
69e3c75f 248 h.h2->tp_status = status;
0af55bb5 249 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
bbd6ef87 250 break;
69e3c75f 251 default:
40d4e3df 252 pr_err("TPACKET version not supported\n");
69e3c75f 253 BUG();
bbd6ef87 254 }
69e3c75f
JB
255
256 smp_wmb();
bbd6ef87
PM
257}
258
69e3c75f 259static int __packet_get_status(struct packet_sock *po, void *frame)
bbd6ef87
PM
260{
261 union {
262 struct tpacket_hdr *h1;
263 struct tpacket2_hdr *h2;
264 void *raw;
265 } h;
266
69e3c75f
JB
267 smp_rmb();
268
bbd6ef87
PM
269 h.raw = frame;
270 switch (po->tp_version) {
271 case TPACKET_V1:
0af55bb5 272 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
69e3c75f 273 return h.h1->tp_status;
bbd6ef87 274 case TPACKET_V2:
0af55bb5 275 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
69e3c75f
JB
276 return h.h2->tp_status;
277 default:
40d4e3df 278 pr_err("TPACKET version not supported\n");
69e3c75f
JB
279 BUG();
280 return 0;
bbd6ef87 281 }
1da177e4 282}
69e3c75f
JB
283
284static void *packet_lookup_frame(struct packet_sock *po,
285 struct packet_ring_buffer *rb,
286 unsigned int position,
287 int status)
288{
289 unsigned int pg_vec_pos, frame_offset;
290 union {
291 struct tpacket_hdr *h1;
292 struct tpacket2_hdr *h2;
293 void *raw;
294 } h;
295
296 pg_vec_pos = position / rb->frames_per_block;
297 frame_offset = position % rb->frames_per_block;
298
0e3125c7
NH
299 h.raw = rb->pg_vec[pg_vec_pos].buffer +
300 (frame_offset * rb->frame_size);
69e3c75f
JB
301
302 if (status != __packet_get_status(po, h.raw))
303 return NULL;
304
305 return h.raw;
306}
307
308static inline void *packet_current_frame(struct packet_sock *po,
309 struct packet_ring_buffer *rb,
310 int status)
311{
312 return packet_lookup_frame(po, rb, rb->head, status);
313}
314
315static inline void *packet_previous_frame(struct packet_sock *po,
316 struct packet_ring_buffer *rb,
317 int status)
318{
319 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
320 return packet_lookup_frame(po, rb, previous, status);
321}
322
323static inline void packet_increment_head(struct packet_ring_buffer *buff)
324{
325 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
326}
327
1da177e4
LT
328static inline struct packet_sock *pkt_sk(struct sock *sk)
329{
330 return (struct packet_sock *)sk;
331}
332
333static void packet_sock_destruct(struct sock *sk)
334{
ed85b565
RC
335 skb_queue_purge(&sk->sk_error_queue);
336
547b792c
IJ
337 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
338 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
1da177e4
LT
339
340 if (!sock_flag(sk, SOCK_DEAD)) {
40d4e3df 341 pr_err("Attempt to release alive packet socket: %p\n", sk);
1da177e4
LT
342 return;
343 }
344
17ab56a2 345 sk_refcnt_debug_dec(sk);
1da177e4
LT
346}
347
348
90ddc4f0 349static const struct proto_ops packet_ops;
1da177e4 350
90ddc4f0 351static const struct proto_ops packet_ops_spkt;
1da177e4 352
40d4e3df
ED
353static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
354 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
355{
356 struct sock *sk;
357 struct sockaddr_pkt *spkt;
358
359 /*
360 * When we registered the protocol we saved the socket in the data
361 * field for just this event.
362 */
363
364 sk = pt->af_packet_priv;
1ce4f28b 365
1da177e4
LT
366 /*
367 * Yank back the headers [hope the device set this
368 * right or kerboom...]
369 *
370 * Incoming packets have ll header pulled,
371 * push it back.
372 *
98e399f8 373 * For outgoing ones skb->data == skb_mac_header(skb)
1da177e4
LT
374 * so that this procedure is noop.
375 */
376
377 if (skb->pkt_type == PACKET_LOOPBACK)
378 goto out;
379
09ad9bc7 380 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
381 goto out;
382
40d4e3df
ED
383 skb = skb_share_check(skb, GFP_ATOMIC);
384 if (skb == NULL)
1da177e4
LT
385 goto oom;
386
387 /* drop any routing info */
adf30907 388 skb_dst_drop(skb);
1da177e4 389
84531c24
PO
390 /* drop conntrack reference */
391 nf_reset(skb);
392
ffbc6111 393 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
1da177e4 394
98e399f8 395 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
396
397 /*
398 * The SOCK_PACKET socket receives _all_ frames.
399 */
400
401 spkt->spkt_family = dev->type;
402 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
403 spkt->spkt_protocol = skb->protocol;
404
405 /*
406 * Charge the memory to the socket. This is done specifically
407 * to prevent sockets using all the memory up.
408 */
409
40d4e3df 410 if (sock_queue_rcv_skb(sk, skb) == 0)
1da177e4
LT
411 return 0;
412
413out:
414 kfree_skb(skb);
415oom:
416 return 0;
417}
418
419
420/*
421 * Output a raw packet to a device layer. This bypasses all the other
422 * protocol layers and you must therefore supply it with a complete frame
423 */
1ce4f28b 424
1da177e4
LT
425static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
426 struct msghdr *msg, size_t len)
427{
428 struct sock *sk = sock->sk;
40d4e3df 429 struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
1a35ca80 430 struct sk_buff *skb = NULL;
1da177e4 431 struct net_device *dev;
40d4e3df 432 __be16 proto = 0;
1da177e4 433 int err;
1ce4f28b 434
1da177e4 435 /*
1ce4f28b 436 * Get and verify the address.
1da177e4
LT
437 */
438
40d4e3df 439 if (saddr) {
1da177e4 440 if (msg->msg_namelen < sizeof(struct sockaddr))
40d4e3df
ED
441 return -EINVAL;
442 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
443 proto = saddr->spkt_protocol;
444 } else
445 return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */
1da177e4
LT
446
447 /*
1ce4f28b 448 * Find the device first to size check it
1da177e4
LT
449 */
450
451 saddr->spkt_device[13] = 0;
1a35ca80 452retry:
654d1f8a
ED
453 rcu_read_lock();
454 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
1da177e4
LT
455 err = -ENODEV;
456 if (dev == NULL)
457 goto out_unlock;
1ce4f28b 458
d5e76b0a
DM
459 err = -ENETDOWN;
460 if (!(dev->flags & IFF_UP))
461 goto out_unlock;
462
1da177e4 463 /*
40d4e3df
ED
464 * You may not queue a frame bigger than the mtu. This is the lowest level
465 * raw protocol and you must do your own fragmentation at this level.
1da177e4 466 */
1ce4f28b 467
1da177e4 468 err = -EMSGSIZE;
8ae55f04 469 if (len > dev->mtu + dev->hard_header_len)
1da177e4
LT
470 goto out_unlock;
471
1a35ca80
ED
472 if (!skb) {
473 size_t reserved = LL_RESERVED_SPACE(dev);
474 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
475
476 rcu_read_unlock();
477 skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL);
478 if (skb == NULL)
479 return -ENOBUFS;
480 /* FIXME: Save some space for broken drivers that write a hard
481 * header at transmission time by themselves. PPP is the notable
482 * one here. This should really be fixed at the driver level.
483 */
484 skb_reserve(skb, reserved);
485 skb_reset_network_header(skb);
486
487 /* Try to align data part correctly */
488 if (hhlen) {
489 skb->data -= hhlen;
490 skb->tail -= hhlen;
491 if (len < hhlen)
492 skb_reset_network_header(skb);
493 }
494 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
495 if (err)
496 goto out_free;
497 goto retry;
1da177e4
LT
498 }
499
1a35ca80 500
1da177e4
LT
501 skb->protocol = proto;
502 skb->dev = dev;
503 skb->priority = sk->sk_priority;
2d37a186 504 skb->mark = sk->sk_mark;
2244d07b 505 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
ed85b565
RC
506 if (err < 0)
507 goto out_unlock;
1da177e4
LT
508
509 dev_queue_xmit(skb);
654d1f8a 510 rcu_read_unlock();
40d4e3df 511 return len;
1da177e4 512
1da177e4 513out_unlock:
654d1f8a 514 rcu_read_unlock();
1a35ca80
ED
515out_free:
516 kfree_skb(skb);
1da177e4
LT
517 return err;
518}
1da177e4 519
62ab0812
ED
520static inline unsigned int run_filter(const struct sk_buff *skb,
521 const struct sock *sk,
dbcb5855 522 unsigned int res)
1da177e4
LT
523{
524 struct sk_filter *filter;
fda9ef5d
DM
525
526 rcu_read_lock_bh();
a898def2 527 filter = rcu_dereference_bh(sk->sk_filter);
dbcb5855 528 if (filter != NULL)
93aaae2e 529 res = sk_run_filter(skb, filter->insns);
fda9ef5d 530 rcu_read_unlock_bh();
1da177e4 531
dbcb5855 532 return res;
1da177e4
LT
533}
534
535/*
62ab0812
ED
536 * This function makes lazy skb cloning in hope that most of packets
537 * are discarded by BPF.
538 *
539 * Note tricky part: we DO mangle shared skb! skb->data, skb->len
540 * and skb->cb are mangled. It works because (and until) packets
541 * falling here are owned by current CPU. Output packets are cloned
542 * by dev_queue_xmit_nit(), input packets are processed by net_bh
543 * sequencially, so that if we return skb to original state on exit,
544 * we will not harm anyone.
1da177e4
LT
545 */
546
40d4e3df
ED
547static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
548 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
549{
550 struct sock *sk;
551 struct sockaddr_ll *sll;
552 struct packet_sock *po;
40d4e3df 553 u8 *skb_head = skb->data;
1da177e4 554 int skb_len = skb->len;
dbcb5855 555 unsigned int snaplen, res;
1da177e4
LT
556
557 if (skb->pkt_type == PACKET_LOOPBACK)
558 goto drop;
559
560 sk = pt->af_packet_priv;
561 po = pkt_sk(sk);
562
09ad9bc7 563 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
564 goto drop;
565
1da177e4
LT
566 skb->dev = dev;
567
3b04ddde 568 if (dev->header_ops) {
1da177e4 569 /* The device has an explicit notion of ll header,
62ab0812
ED
570 * exported to higher levels.
571 *
572 * Otherwise, the device hides details of its frame
573 * structure, so that corresponding packet head is
574 * never delivered to user.
1da177e4
LT
575 */
576 if (sk->sk_type != SOCK_DGRAM)
98e399f8 577 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
578 else if (skb->pkt_type == PACKET_OUTGOING) {
579 /* Special case: outgoing packets have ll header at head */
bbe735e4 580 skb_pull(skb, skb_network_offset(skb));
1da177e4
LT
581 }
582 }
583
584 snaplen = skb->len;
585
dbcb5855
DM
586 res = run_filter(skb, sk, snaplen);
587 if (!res)
fda9ef5d 588 goto drop_n_restore;
dbcb5855
DM
589 if (snaplen > res)
590 snaplen = res;
1da177e4
LT
591
592 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
593 (unsigned)sk->sk_rcvbuf)
594 goto drop_n_acct;
595
596 if (skb_shared(skb)) {
597 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
598 if (nskb == NULL)
599 goto drop_n_acct;
600
601 if (skb_head != skb->data) {
602 skb->data = skb_head;
603 skb->len = skb_len;
604 }
605 kfree_skb(skb);
606 skb = nskb;
607 }
608
ffbc6111
HX
609 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
610 sizeof(skb->cb));
611
612 sll = &PACKET_SKB_CB(skb)->sa.ll;
1da177e4
LT
613 sll->sll_family = AF_PACKET;
614 sll->sll_hatype = dev->type;
615 sll->sll_protocol = skb->protocol;
616 sll->sll_pkttype = skb->pkt_type;
8032b464 617 if (unlikely(po->origdev))
80feaacb
PWJ
618 sll->sll_ifindex = orig_dev->ifindex;
619 else
620 sll->sll_ifindex = dev->ifindex;
1da177e4 621
b95cce35 622 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1da177e4 623
ffbc6111 624 PACKET_SKB_CB(skb)->origlen = skb->len;
8dc41944 625
1da177e4
LT
626 if (pskb_trim(skb, snaplen))
627 goto drop_n_acct;
628
629 skb_set_owner_r(skb, sk);
630 skb->dev = NULL;
adf30907 631 skb_dst_drop(skb);
1da177e4 632
84531c24
PO
633 /* drop conntrack reference */
634 nf_reset(skb);
635
1da177e4
LT
636 spin_lock(&sk->sk_receive_queue.lock);
637 po->stats.tp_packets++;
3b885787 638 skb->dropcount = atomic_read(&sk->sk_drops);
1da177e4
LT
639 __skb_queue_tail(&sk->sk_receive_queue, skb);
640 spin_unlock(&sk->sk_receive_queue.lock);
641 sk->sk_data_ready(sk, skb->len);
642 return 0;
643
644drop_n_acct:
3b885787 645 po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
1da177e4
LT
646
647drop_n_restore:
648 if (skb_head != skb->data && skb_shared(skb)) {
649 skb->data = skb_head;
650 skb->len = skb_len;
651 }
652drop:
ead2ceb0 653 consume_skb(skb);
1da177e4
LT
654 return 0;
655}
656
40d4e3df
ED
657static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
658 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
659{
660 struct sock *sk;
661 struct packet_sock *po;
662 struct sockaddr_ll *sll;
bbd6ef87
PM
663 union {
664 struct tpacket_hdr *h1;
665 struct tpacket2_hdr *h2;
666 void *raw;
667 } h;
40d4e3df 668 u8 *skb_head = skb->data;
1da177e4 669 int skb_len = skb->len;
dbcb5855 670 unsigned int snaplen, res;
1da177e4 671 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
bbd6ef87 672 unsigned short macoff, netoff, hdrlen;
1da177e4 673 struct sk_buff *copy_skb = NULL;
b7aa0bf7 674 struct timeval tv;
bbd6ef87 675 struct timespec ts;
614f60fa 676 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
1da177e4
LT
677
678 if (skb->pkt_type == PACKET_LOOPBACK)
679 goto drop;
680
681 sk = pt->af_packet_priv;
682 po = pkt_sk(sk);
683
09ad9bc7 684 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
685 goto drop;
686
3b04ddde 687 if (dev->header_ops) {
1da177e4 688 if (sk->sk_type != SOCK_DGRAM)
98e399f8 689 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
690 else if (skb->pkt_type == PACKET_OUTGOING) {
691 /* Special case: outgoing packets have ll header at head */
bbe735e4 692 skb_pull(skb, skb_network_offset(skb));
1da177e4
LT
693 }
694 }
695
8dc41944
HX
696 if (skb->ip_summed == CHECKSUM_PARTIAL)
697 status |= TP_STATUS_CSUMNOTREADY;
698
1da177e4
LT
699 snaplen = skb->len;
700
dbcb5855
DM
701 res = run_filter(skb, sk, snaplen);
702 if (!res)
fda9ef5d 703 goto drop_n_restore;
dbcb5855
DM
704 if (snaplen > res)
705 snaplen = res;
1da177e4
LT
706
707 if (sk->sk_type == SOCK_DGRAM) {
8913336a
PM
708 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
709 po->tp_reserve;
1da177e4 710 } else {
bbe735e4 711 unsigned maclen = skb_network_offset(skb);
bbd6ef87 712 netoff = TPACKET_ALIGN(po->tp_hdrlen +
8913336a
PM
713 (maclen < 16 ? 16 : maclen)) +
714 po->tp_reserve;
1da177e4
LT
715 macoff = netoff - maclen;
716 }
717
69e3c75f 718 if (macoff + snaplen > po->rx_ring.frame_size) {
1da177e4
LT
719 if (po->copy_thresh &&
720 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
721 (unsigned)sk->sk_rcvbuf) {
722 if (skb_shared(skb)) {
723 copy_skb = skb_clone(skb, GFP_ATOMIC);
724 } else {
725 copy_skb = skb_get(skb);
726 skb_head = skb->data;
727 }
728 if (copy_skb)
729 skb_set_owner_r(copy_skb, sk);
730 }
69e3c75f 731 snaplen = po->rx_ring.frame_size - macoff;
1da177e4
LT
732 if ((int)snaplen < 0)
733 snaplen = 0;
734 }
1da177e4
LT
735
736 spin_lock(&sk->sk_receive_queue.lock);
69e3c75f 737 h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
bbd6ef87 738 if (!h.raw)
1da177e4 739 goto ring_is_full;
69e3c75f 740 packet_increment_head(&po->rx_ring);
1da177e4
LT
741 po->stats.tp_packets++;
742 if (copy_skb) {
743 status |= TP_STATUS_COPY;
744 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
745 }
746 if (!po->stats.tp_drops)
747 status &= ~TP_STATUS_LOSING;
748 spin_unlock(&sk->sk_receive_queue.lock);
749
bbd6ef87 750 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
1da177e4 751
bbd6ef87
PM
752 switch (po->tp_version) {
753 case TPACKET_V1:
754 h.h1->tp_len = skb->len;
755 h.h1->tp_snaplen = snaplen;
756 h.h1->tp_mac = macoff;
757 h.h1->tp_net = netoff;
614f60fa
SM
758 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
759 && shhwtstamps->syststamp.tv64)
760 tv = ktime_to_timeval(shhwtstamps->syststamp);
761 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
762 && shhwtstamps->hwtstamp.tv64)
763 tv = ktime_to_timeval(shhwtstamps->hwtstamp);
764 else if (skb->tstamp.tv64)
bbd6ef87
PM
765 tv = ktime_to_timeval(skb->tstamp);
766 else
767 do_gettimeofday(&tv);
768 h.h1->tp_sec = tv.tv_sec;
769 h.h1->tp_usec = tv.tv_usec;
770 hdrlen = sizeof(*h.h1);
771 break;
772 case TPACKET_V2:
773 h.h2->tp_len = skb->len;
774 h.h2->tp_snaplen = snaplen;
775 h.h2->tp_mac = macoff;
776 h.h2->tp_net = netoff;
614f60fa
SM
777 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
778 && shhwtstamps->syststamp.tv64)
779 ts = ktime_to_timespec(shhwtstamps->syststamp);
780 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
781 && shhwtstamps->hwtstamp.tv64)
782 ts = ktime_to_timespec(shhwtstamps->hwtstamp);
783 else if (skb->tstamp.tv64)
bbd6ef87
PM
784 ts = ktime_to_timespec(skb->tstamp);
785 else
786 getnstimeofday(&ts);
787 h.h2->tp_sec = ts.tv_sec;
788 h.h2->tp_nsec = ts.tv_nsec;
05423b24 789 h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
bbd6ef87
PM
790 hdrlen = sizeof(*h.h2);
791 break;
792 default:
793 BUG();
794 }
1da177e4 795
bbd6ef87 796 sll = h.raw + TPACKET_ALIGN(hdrlen);
b95cce35 797 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1da177e4
LT
798 sll->sll_family = AF_PACKET;
799 sll->sll_hatype = dev->type;
800 sll->sll_protocol = skb->protocol;
801 sll->sll_pkttype = skb->pkt_type;
8032b464 802 if (unlikely(po->origdev))
80feaacb
PWJ
803 sll->sll_ifindex = orig_dev->ifindex;
804 else
805 sll->sll_ifindex = dev->ifindex;
1da177e4 806
bbd6ef87 807 __packet_set_status(po, h.raw, status);
e16aa207 808 smp_mb();
f6dafa95 809#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
1da177e4 810 {
0af55bb5
CG
811 u8 *start, *end;
812
813 end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen);
814 for (start = h.raw; start < end; start += PAGE_SIZE)
815 flush_dcache_page(pgv_to_page(start));
1da177e4 816 }
f6dafa95 817#endif
1da177e4
LT
818
819 sk->sk_data_ready(sk, 0);
820
821drop_n_restore:
822 if (skb_head != skb->data && skb_shared(skb)) {
823 skb->data = skb_head;
824 skb->len = skb_len;
825 }
826drop:
1ce4f28b 827 kfree_skb(skb);
1da177e4
LT
828 return 0;
829
830ring_is_full:
831 po->stats.tp_drops++;
832 spin_unlock(&sk->sk_receive_queue.lock);
833
834 sk->sk_data_ready(sk, 0);
acb5d75b 835 kfree_skb(copy_skb);
1da177e4
LT
836 goto drop_n_restore;
837}
838
69e3c75f
JB
839static void tpacket_destruct_skb(struct sk_buff *skb)
840{
841 struct packet_sock *po = pkt_sk(skb->sk);
40d4e3df 842 void *ph;
1da177e4 843
69e3c75f 844 BUG_ON(skb == NULL);
1da177e4 845
69e3c75f
JB
846 if (likely(po->tx_ring.pg_vec)) {
847 ph = skb_shinfo(skb)->destructor_arg;
848 BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
849 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
850 atomic_dec(&po->tx_ring.pending);
851 __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
852 }
853
854 sock_wfree(skb);
855}
856
40d4e3df
ED
857static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
858 void *frame, struct net_device *dev, int size_max,
859 __be16 proto, unsigned char *addr)
69e3c75f
JB
860{
861 union {
862 struct tpacket_hdr *h1;
863 struct tpacket2_hdr *h2;
864 void *raw;
865 } ph;
866 int to_write, offset, len, tp_len, nr_frags, len_max;
867 struct socket *sock = po->sk.sk_socket;
868 struct page *page;
869 void *data;
870 int err;
871
872 ph.raw = frame;
873
874 skb->protocol = proto;
875 skb->dev = dev;
876 skb->priority = po->sk.sk_priority;
2d37a186 877 skb->mark = po->sk.sk_mark;
69e3c75f
JB
878 skb_shinfo(skb)->destructor_arg = ph.raw;
879
880 switch (po->tp_version) {
881 case TPACKET_V2:
882 tp_len = ph.h2->tp_len;
883 break;
884 default:
885 tp_len = ph.h1->tp_len;
886 break;
887 }
888 if (unlikely(tp_len > size_max)) {
40d4e3df 889 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
69e3c75f
JB
890 return -EMSGSIZE;
891 }
892
893 skb_reserve(skb, LL_RESERVED_SPACE(dev));
894 skb_reset_network_header(skb);
895
896 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
897 to_write = tp_len;
898
899 if (sock->type == SOCK_DGRAM) {
900 err = dev_hard_header(skb, dev, ntohs(proto), addr,
901 NULL, tp_len);
902 if (unlikely(err < 0))
903 return -EINVAL;
40d4e3df 904 } else if (dev->hard_header_len) {
69e3c75f
JB
905 /* net device doesn't like empty head */
906 if (unlikely(tp_len <= dev->hard_header_len)) {
40d4e3df
ED
907 pr_err("packet size is too short (%d < %d)\n",
908 tp_len, dev->hard_header_len);
69e3c75f
JB
909 return -EINVAL;
910 }
911
912 skb_push(skb, dev->hard_header_len);
913 err = skb_store_bits(skb, 0, data,
914 dev->hard_header_len);
915 if (unlikely(err))
916 return err;
917
918 data += dev->hard_header_len;
919 to_write -= dev->hard_header_len;
920 }
921
922 err = -EFAULT;
69e3c75f
JB
923 offset = offset_in_page(data);
924 len_max = PAGE_SIZE - offset;
925 len = ((to_write > len_max) ? len_max : to_write);
926
927 skb->data_len = to_write;
928 skb->len += to_write;
929 skb->truesize += to_write;
930 atomic_add(to_write, &po->sk.sk_wmem_alloc);
931
932 while (likely(to_write)) {
933 nr_frags = skb_shinfo(skb)->nr_frags;
934
935 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
40d4e3df
ED
936 pr_err("Packet exceed the number of skb frags(%lu)\n",
937 MAX_SKB_FRAGS);
69e3c75f
JB
938 return -EFAULT;
939 }
940
0af55bb5
CG
941 page = pgv_to_page(data);
942 data += len;
69e3c75f
JB
943 flush_dcache_page(page);
944 get_page(page);
0af55bb5 945 skb_fill_page_desc(skb, nr_frags, page, offset, len);
69e3c75f
JB
946 to_write -= len;
947 offset = 0;
948 len_max = PAGE_SIZE;
949 len = ((to_write > len_max) ? len_max : to_write);
950 }
951
952 return tp_len;
953}
954
955static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
956{
957 struct socket *sock;
958 struct sk_buff *skb;
959 struct net_device *dev;
960 __be16 proto;
961 int ifindex, err, reserve = 0;
40d4e3df
ED
962 void *ph;
963 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
69e3c75f
JB
964 int tp_len, size_max;
965 unsigned char *addr;
966 int len_sum = 0;
967 int status = 0;
968
969 sock = po->sk.sk_socket;
970
971 mutex_lock(&po->pg_vec_lock);
972
973 err = -EBUSY;
974 if (saddr == NULL) {
975 ifindex = po->ifindex;
976 proto = po->num;
977 addr = NULL;
978 } else {
979 err = -EINVAL;
980 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
981 goto out;
982 if (msg->msg_namelen < (saddr->sll_halen
983 + offsetof(struct sockaddr_ll,
984 sll_addr)))
985 goto out;
986 ifindex = saddr->sll_ifindex;
987 proto = saddr->sll_protocol;
988 addr = saddr->sll_addr;
989 }
990
991 dev = dev_get_by_index(sock_net(&po->sk), ifindex);
992 err = -ENXIO;
993 if (unlikely(dev == NULL))
994 goto out;
995
996 reserve = dev->hard_header_len;
997
998 err = -ENETDOWN;
999 if (unlikely(!(dev->flags & IFF_UP)))
1000 goto out_put;
1001
1002 size_max = po->tx_ring.frame_size
b5dd884e 1003 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
69e3c75f
JB
1004
1005 if (size_max > dev->mtu + reserve)
1006 size_max = dev->mtu + reserve;
1007
1008 do {
1009 ph = packet_current_frame(po, &po->tx_ring,
1010 TP_STATUS_SEND_REQUEST);
1011
1012 if (unlikely(ph == NULL)) {
1013 schedule();
1014 continue;
1015 }
1016
1017 status = TP_STATUS_SEND_REQUEST;
1018 skb = sock_alloc_send_skb(&po->sk,
1019 LL_ALLOCATED_SPACE(dev)
1020 + sizeof(struct sockaddr_ll),
1021 0, &err);
1022
1023 if (unlikely(skb == NULL))
1024 goto out_status;
1025
1026 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
1027 addr);
1028
1029 if (unlikely(tp_len < 0)) {
1030 if (po->tp_loss) {
1031 __packet_set_status(po, ph,
1032 TP_STATUS_AVAILABLE);
1033 packet_increment_head(&po->tx_ring);
1034 kfree_skb(skb);
1035 continue;
1036 } else {
1037 status = TP_STATUS_WRONG_FORMAT;
1038 err = tp_len;
1039 goto out_status;
1040 }
1041 }
1042
1043 skb->destructor = tpacket_destruct_skb;
1044 __packet_set_status(po, ph, TP_STATUS_SENDING);
1045 atomic_inc(&po->tx_ring.pending);
1046
1047 status = TP_STATUS_SEND_REQUEST;
1048 err = dev_queue_xmit(skb);
eb70df13
JP
1049 if (unlikely(err > 0)) {
1050 err = net_xmit_errno(err);
1051 if (err && __packet_get_status(po, ph) ==
1052 TP_STATUS_AVAILABLE) {
1053 /* skb was destructed already */
1054 skb = NULL;
1055 goto out_status;
1056 }
1057 /*
1058 * skb was dropped but not destructed yet;
1059 * let's treat it like congestion or err < 0
1060 */
1061 err = 0;
1062 }
69e3c75f
JB
1063 packet_increment_head(&po->tx_ring);
1064 len_sum += tp_len;
f64f9e71
JP
1065 } while (likely((ph != NULL) ||
1066 ((!(msg->msg_flags & MSG_DONTWAIT)) &&
1067 (atomic_read(&po->tx_ring.pending))))
1068 );
69e3c75f
JB
1069
1070 err = len_sum;
1071 goto out_put;
1072
69e3c75f
JB
1073out_status:
1074 __packet_set_status(po, ph, status);
1075 kfree_skb(skb);
1076out_put:
1077 dev_put(dev);
1078out:
1079 mutex_unlock(&po->pg_vec_lock);
1080 return err;
1081}
69e3c75f 1082
bfd5f4a3
SS
1083static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
1084 size_t reserve, size_t len,
1085 size_t linear, int noblock,
1086 int *err)
1087{
1088 struct sk_buff *skb;
1089
1090 /* Under a page? Don't bother with paged skb. */
1091 if (prepad + len < PAGE_SIZE || !linear)
1092 linear = len;
1093
1094 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
1095 err);
1096 if (!skb)
1097 return NULL;
1098
1099 skb_reserve(skb, reserve);
1100 skb_put(skb, linear);
1101 skb->data_len = len - linear;
1102 skb->len += len - linear;
1103
1104 return skb;
1105}
1106
69e3c75f 1107static int packet_snd(struct socket *sock,
1da177e4
LT
1108 struct msghdr *msg, size_t len)
1109{
1110 struct sock *sk = sock->sk;
40d4e3df 1111 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
1da177e4
LT
1112 struct sk_buff *skb;
1113 struct net_device *dev;
0e11c91e 1114 __be16 proto;
1da177e4
LT
1115 unsigned char *addr;
1116 int ifindex, err, reserve = 0;
bfd5f4a3
SS
1117 struct virtio_net_hdr vnet_hdr = { 0 };
1118 int offset = 0;
1119 int vnet_hdr_len;
1120 struct packet_sock *po = pkt_sk(sk);
1121 unsigned short gso_type = 0;
1da177e4
LT
1122
1123 /*
1ce4f28b 1124 * Get and verify the address.
1da177e4 1125 */
1ce4f28b 1126
1da177e4 1127 if (saddr == NULL) {
1da177e4
LT
1128 ifindex = po->ifindex;
1129 proto = po->num;
1130 addr = NULL;
1131 } else {
1132 err = -EINVAL;
1133 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
1134 goto out;
0fb375fb
EB
1135 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
1136 goto out;
1da177e4
LT
1137 ifindex = saddr->sll_ifindex;
1138 proto = saddr->sll_protocol;
1139 addr = saddr->sll_addr;
1140 }
1141
1142
3b1e0a65 1143 dev = dev_get_by_index(sock_net(sk), ifindex);
1da177e4
LT
1144 err = -ENXIO;
1145 if (dev == NULL)
1146 goto out_unlock;
1147 if (sock->type == SOCK_RAW)
1148 reserve = dev->hard_header_len;
1149
d5e76b0a
DM
1150 err = -ENETDOWN;
1151 if (!(dev->flags & IFF_UP))
1152 goto out_unlock;
1153
bfd5f4a3
SS
1154 if (po->has_vnet_hdr) {
1155 vnet_hdr_len = sizeof(vnet_hdr);
1156
1157 err = -EINVAL;
1158 if (len < vnet_hdr_len)
1159 goto out_unlock;
1160
1161 len -= vnet_hdr_len;
1162
1163 err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
1164 vnet_hdr_len);
1165 if (err < 0)
1166 goto out_unlock;
1167
1168 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
1169 (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
1170 vnet_hdr.hdr_len))
1171 vnet_hdr.hdr_len = vnet_hdr.csum_start +
1172 vnet_hdr.csum_offset + 2;
1173
1174 err = -EINVAL;
1175 if (vnet_hdr.hdr_len > len)
1176 goto out_unlock;
1177
1178 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1179 switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1180 case VIRTIO_NET_HDR_GSO_TCPV4:
1181 gso_type = SKB_GSO_TCPV4;
1182 break;
1183 case VIRTIO_NET_HDR_GSO_TCPV6:
1184 gso_type = SKB_GSO_TCPV6;
1185 break;
1186 case VIRTIO_NET_HDR_GSO_UDP:
1187 gso_type = SKB_GSO_UDP;
1188 break;
1189 default:
1190 goto out_unlock;
1191 }
1192
1193 if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
1194 gso_type |= SKB_GSO_TCP_ECN;
1195
1196 if (vnet_hdr.gso_size == 0)
1197 goto out_unlock;
1198
1199 }
1200 }
1201
1da177e4 1202 err = -EMSGSIZE;
bfd5f4a3 1203 if (!gso_type && (len > dev->mtu+reserve))
1da177e4
LT
1204 goto out_unlock;
1205
bfd5f4a3
SS
1206 err = -ENOBUFS;
1207 skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev),
1208 LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len,
1209 msg->msg_flags & MSG_DONTWAIT, &err);
40d4e3df 1210 if (skb == NULL)
1da177e4
LT
1211 goto out_unlock;
1212
bfd5f4a3 1213 skb_set_network_header(skb, reserve);
1da177e4 1214
0c4e8581
SH
1215 err = -EINVAL;
1216 if (sock->type == SOCK_DGRAM &&
bfd5f4a3 1217 (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
0c4e8581 1218 goto out_free;
1da177e4
LT
1219
1220 /* Returns -EFAULT on error */
bfd5f4a3 1221 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
1da177e4
LT
1222 if (err)
1223 goto out_free;
2244d07b 1224 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
ed85b565
RC
1225 if (err < 0)
1226 goto out_free;
1da177e4
LT
1227
1228 skb->protocol = proto;
1229 skb->dev = dev;
1230 skb->priority = sk->sk_priority;
2d37a186 1231 skb->mark = sk->sk_mark;
1da177e4 1232
bfd5f4a3
SS
1233 if (po->has_vnet_hdr) {
1234 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1235 if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
1236 vnet_hdr.csum_offset)) {
1237 err = -EINVAL;
1238 goto out_free;
1239 }
1240 }
1241
1242 skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
1243 skb_shinfo(skb)->gso_type = gso_type;
1244
1245 /* Header must be checked, and gso_segs computed. */
1246 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1247 skb_shinfo(skb)->gso_segs = 0;
1248
1249 len += vnet_hdr_len;
1250 }
1251
1da177e4
LT
1252 /*
1253 * Now send it
1254 */
1255
1256 err = dev_queue_xmit(skb);
1257 if (err > 0 && (err = net_xmit_errno(err)) != 0)
1258 goto out_unlock;
1259
1260 dev_put(dev);
1261
40d4e3df 1262 return len;
1da177e4
LT
1263
1264out_free:
1265 kfree_skb(skb);
1266out_unlock:
1267 if (dev)
1268 dev_put(dev);
1269out:
1270 return err;
1271}
1272
69e3c75f
JB
1273static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
1274 struct msghdr *msg, size_t len)
1275{
69e3c75f
JB
1276 struct sock *sk = sock->sk;
1277 struct packet_sock *po = pkt_sk(sk);
1278 if (po->tx_ring.pg_vec)
1279 return tpacket_snd(po, msg);
1280 else
69e3c75f
JB
1281 return packet_snd(sock, msg, len);
1282}
1283
1da177e4
LT
1284/*
1285 * Close a PACKET socket. This is fairly simple. We immediately go
1286 * to 'closed' state and remove our protocol entry in the device list.
1287 */
1288
1289static int packet_release(struct socket *sock)
1290{
1291 struct sock *sk = sock->sk;
1292 struct packet_sock *po;
d12d01d6 1293 struct net *net;
69e3c75f 1294 struct tpacket_req req;
1da177e4
LT
1295
1296 if (!sk)
1297 return 0;
1298
3b1e0a65 1299 net = sock_net(sk);
1da177e4
LT
1300 po = pkt_sk(sk);
1301
808f5114 1302 spin_lock_bh(&net->packet.sklist_lock);
1303 sk_del_node_init_rcu(sk);
920de804 1304 sock_prot_inuse_add(net, sk->sk_prot, -1);
808f5114 1305 spin_unlock_bh(&net->packet.sklist_lock);
1da177e4 1306
808f5114 1307 spin_lock(&po->bind_lock);
1da177e4
LT
1308 if (po->running) {
1309 /*
808f5114 1310 * Remove from protocol table
1da177e4 1311 */
1da177e4
LT
1312 po->running = 0;
1313 po->num = 0;
808f5114 1314 __dev_remove_pack(&po->prot_hook);
1da177e4
LT
1315 __sock_put(sk);
1316 }
808f5114 1317 spin_unlock(&po->bind_lock);
1da177e4 1318
1da177e4 1319 packet_flush_mclist(sk);
1da177e4 1320
69e3c75f
JB
1321 memset(&req, 0, sizeof(req));
1322
1323 if (po->rx_ring.pg_vec)
1324 packet_set_ring(sk, &req, 1, 0);
1325
1326 if (po->tx_ring.pg_vec)
1327 packet_set_ring(sk, &req, 1, 1);
1da177e4 1328
808f5114 1329 synchronize_net();
1da177e4
LT
1330 /*
1331 * Now the socket is dead. No more input will appear.
1332 */
1da177e4
LT
1333 sock_orphan(sk);
1334 sock->sk = NULL;
1335
1336 /* Purge queues */
1337
1338 skb_queue_purge(&sk->sk_receive_queue);
17ab56a2 1339 sk_refcnt_debug_release(sk);
1da177e4
LT
1340
1341 sock_put(sk);
1342 return 0;
1343}
1344
1345/*
1346 * Attach a packet hook.
1347 */
1348
0e11c91e 1349static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
1da177e4
LT
1350{
1351 struct packet_sock *po = pkt_sk(sk);
1352 /*
1353 * Detach an existing hook if present.
1354 */
1355
1356 lock_sock(sk);
1357
1358 spin_lock(&po->bind_lock);
1359 if (po->running) {
1360 __sock_put(sk);
1361 po->running = 0;
1362 po->num = 0;
1363 spin_unlock(&po->bind_lock);
1364 dev_remove_pack(&po->prot_hook);
1365 spin_lock(&po->bind_lock);
1366 }
1367
1368 po->num = protocol;
1369 po->prot_hook.type = protocol;
1370 po->prot_hook.dev = dev;
1371
1372 po->ifindex = dev ? dev->ifindex : 0;
1373
1374 if (protocol == 0)
1375 goto out_unlock;
1376
be85d4ad 1377 if (!dev || (dev->flags & IFF_UP)) {
1da177e4
LT
1378 dev_add_pack(&po->prot_hook);
1379 sock_hold(sk);
1380 po->running = 1;
be85d4ad
UT
1381 } else {
1382 sk->sk_err = ENETDOWN;
1383 if (!sock_flag(sk, SOCK_DEAD))
1384 sk->sk_error_report(sk);
1da177e4
LT
1385 }
1386
1387out_unlock:
1388 spin_unlock(&po->bind_lock);
1389 release_sock(sk);
1390 return 0;
1391}
1392
1393/*
1394 * Bind a packet socket to a device
1395 */
1396
40d4e3df
ED
1397static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
1398 int addr_len)
1da177e4 1399{
40d4e3df 1400 struct sock *sk = sock->sk;
1da177e4
LT
1401 char name[15];
1402 struct net_device *dev;
1403 int err = -ENODEV;
1ce4f28b 1404
1da177e4
LT
1405 /*
1406 * Check legality
1407 */
1ce4f28b 1408
8ae55f04 1409 if (addr_len != sizeof(struct sockaddr))
1da177e4 1410 return -EINVAL;
40d4e3df 1411 strlcpy(name, uaddr->sa_data, sizeof(name));
1da177e4 1412
3b1e0a65 1413 dev = dev_get_by_name(sock_net(sk), name);
1da177e4
LT
1414 if (dev) {
1415 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
1416 dev_put(dev);
1417 }
1418 return err;
1419}
1da177e4
LT
1420
1421static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1422{
40d4e3df
ED
1423 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1424 struct sock *sk = sock->sk;
1da177e4
LT
1425 struct net_device *dev = NULL;
1426 int err;
1427
1428
1429 /*
1430 * Check legality
1431 */
1ce4f28b 1432
1da177e4
LT
1433 if (addr_len < sizeof(struct sockaddr_ll))
1434 return -EINVAL;
1435 if (sll->sll_family != AF_PACKET)
1436 return -EINVAL;
1437
1438 if (sll->sll_ifindex) {
1439 err = -ENODEV;
3b1e0a65 1440 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
1da177e4
LT
1441 if (dev == NULL)
1442 goto out;
1443 }
1444 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1445 if (dev)
1446 dev_put(dev);
1447
1448out:
1449 return err;
1450}
1451
1452static struct proto packet_proto = {
1453 .name = "PACKET",
1454 .owner = THIS_MODULE,
1455 .obj_size = sizeof(struct packet_sock),
1456};
1457
1458/*
1ce4f28b 1459 * Create a packet of type SOCK_PACKET.
1da177e4
LT
1460 */
1461
3f378b68
EP
1462static int packet_create(struct net *net, struct socket *sock, int protocol,
1463 int kern)
1da177e4
LT
1464{
1465 struct sock *sk;
1466 struct packet_sock *po;
0e11c91e 1467 __be16 proto = (__force __be16)protocol; /* weird, but documented */
1da177e4
LT
1468 int err;
1469
1470 if (!capable(CAP_NET_RAW))
1471 return -EPERM;
be02097c
DM
1472 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1473 sock->type != SOCK_PACKET)
1da177e4
LT
1474 return -ESOCKTNOSUPPORT;
1475
1476 sock->state = SS_UNCONNECTED;
1477
1478 err = -ENOBUFS;
6257ff21 1479 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
1da177e4
LT
1480 if (sk == NULL)
1481 goto out;
1482
1483 sock->ops = &packet_ops;
1da177e4
LT
1484 if (sock->type == SOCK_PACKET)
1485 sock->ops = &packet_ops_spkt;
be02097c 1486
1da177e4
LT
1487 sock_init_data(sock, sk);
1488
1489 po = pkt_sk(sk);
1490 sk->sk_family = PF_PACKET;
0e11c91e 1491 po->num = proto;
1da177e4
LT
1492
1493 sk->sk_destruct = packet_sock_destruct;
17ab56a2 1494 sk_refcnt_debug_inc(sk);
1da177e4
LT
1495
1496 /*
1497 * Attach a protocol block
1498 */
1499
1500 spin_lock_init(&po->bind_lock);
905db440 1501 mutex_init(&po->pg_vec_lock);
1da177e4 1502 po->prot_hook.func = packet_rcv;
be02097c 1503
1da177e4
LT
1504 if (sock->type == SOCK_PACKET)
1505 po->prot_hook.func = packet_rcv_spkt;
be02097c 1506
1da177e4
LT
1507 po->prot_hook.af_packet_priv = sk;
1508
0e11c91e
AV
1509 if (proto) {
1510 po->prot_hook.type = proto;
1da177e4
LT
1511 dev_add_pack(&po->prot_hook);
1512 sock_hold(sk);
1513 po->running = 1;
1514 }
1515
808f5114 1516 spin_lock_bh(&net->packet.sklist_lock);
1517 sk_add_node_rcu(sk, &net->packet.sklist);
3680453c 1518 sock_prot_inuse_add(net, &packet_proto, 1);
808f5114 1519 spin_unlock_bh(&net->packet.sklist_lock);
1520
40d4e3df 1521 return 0;
1da177e4
LT
1522out:
1523 return err;
1524}
1525
ed85b565
RC
1526static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
1527{
1528 struct sock_exterr_skb *serr;
1529 struct sk_buff *skb, *skb2;
1530 int copied, err;
1531
1532 err = -EAGAIN;
1533 skb = skb_dequeue(&sk->sk_error_queue);
1534 if (skb == NULL)
1535 goto out;
1536
1537 copied = skb->len;
1538 if (copied > len) {
1539 msg->msg_flags |= MSG_TRUNC;
1540 copied = len;
1541 }
1542 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1543 if (err)
1544 goto out_free_skb;
1545
1546 sock_recv_timestamp(msg, sk, skb);
1547
1548 serr = SKB_EXT_ERR(skb);
1549 put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
1550 sizeof(serr->ee), &serr->ee);
1551
1552 msg->msg_flags |= MSG_ERRQUEUE;
1553 err = copied;
1554
1555 /* Reset and regenerate socket error */
1556 spin_lock_bh(&sk->sk_error_queue.lock);
1557 sk->sk_err = 0;
1558 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
1559 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
1560 spin_unlock_bh(&sk->sk_error_queue.lock);
1561 sk->sk_error_report(sk);
1562 } else
1563 spin_unlock_bh(&sk->sk_error_queue.lock);
1564
1565out_free_skb:
1566 kfree_skb(skb);
1567out:
1568 return err;
1569}
1570
1da177e4
LT
1571/*
1572 * Pull a packet from our receive queue and hand it to the user.
1573 * If necessary we block.
1574 */
1575
1576static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1577 struct msghdr *msg, size_t len, int flags)
1578{
1579 struct sock *sk = sock->sk;
1580 struct sk_buff *skb;
1581 int copied, err;
0fb375fb 1582 struct sockaddr_ll *sll;
bfd5f4a3 1583 int vnet_hdr_len = 0;
1da177e4
LT
1584
1585 err = -EINVAL;
ed85b565 1586 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
1da177e4
LT
1587 goto out;
1588
1589#if 0
1590 /* What error should we return now? EUNATTACH? */
1591 if (pkt_sk(sk)->ifindex < 0)
1592 return -ENODEV;
1593#endif
1594
ed85b565
RC
1595 if (flags & MSG_ERRQUEUE) {
1596 err = packet_recv_error(sk, msg, len);
1597 goto out;
1598 }
1599
1da177e4
LT
1600 /*
1601 * Call the generic datagram receiver. This handles all sorts
1602 * of horrible races and re-entrancy so we can forget about it
1603 * in the protocol layers.
1604 *
1605 * Now it will return ENETDOWN, if device have just gone down,
1606 * but then it will block.
1607 */
1608
40d4e3df 1609 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
1da177e4
LT
1610
1611 /*
1ce4f28b 1612 * An error occurred so return it. Because skb_recv_datagram()
1da177e4
LT
1613 * handles the blocking we don't see and worry about blocking
1614 * retries.
1615 */
1616
8ae55f04 1617 if (skb == NULL)
1da177e4
LT
1618 goto out;
1619
bfd5f4a3
SS
1620 if (pkt_sk(sk)->has_vnet_hdr) {
1621 struct virtio_net_hdr vnet_hdr = { 0 };
1622
1623 err = -EINVAL;
1624 vnet_hdr_len = sizeof(vnet_hdr);
1f18b717 1625 if (len < vnet_hdr_len)
bfd5f4a3
SS
1626 goto out_free;
1627
1f18b717
MK
1628 len -= vnet_hdr_len;
1629
bfd5f4a3
SS
1630 if (skb_is_gso(skb)) {
1631 struct skb_shared_info *sinfo = skb_shinfo(skb);
1632
1633 /* This is a hint as to how much should be linear. */
1634 vnet_hdr.hdr_len = skb_headlen(skb);
1635 vnet_hdr.gso_size = sinfo->gso_size;
1636 if (sinfo->gso_type & SKB_GSO_TCPV4)
1637 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1638 else if (sinfo->gso_type & SKB_GSO_TCPV6)
1639 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1640 else if (sinfo->gso_type & SKB_GSO_UDP)
1641 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
1642 else if (sinfo->gso_type & SKB_GSO_FCOE)
1643 goto out_free;
1644 else
1645 BUG();
1646 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
1647 vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1648 } else
1649 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
1650
1651 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1652 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
55508d60 1653 vnet_hdr.csum_start = skb_checksum_start_offset(skb);
bfd5f4a3
SS
1654 vnet_hdr.csum_offset = skb->csum_offset;
1655 } /* else everything is zero */
1656
1657 err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
1658 vnet_hdr_len);
1659 if (err < 0)
1660 goto out_free;
1661 }
1662
0fb375fb
EB
1663 /*
1664 * If the address length field is there to be filled in, we fill
1665 * it in now.
1666 */
1667
ffbc6111 1668 sll = &PACKET_SKB_CB(skb)->sa.ll;
0fb375fb
EB
1669 if (sock->type == SOCK_PACKET)
1670 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1671 else
1672 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1673
1da177e4
LT
1674 /*
1675 * You lose any data beyond the buffer you gave. If it worries a
1676 * user program they can ask the device for its MTU anyway.
1677 */
1678
1679 copied = skb->len;
40d4e3df
ED
1680 if (copied > len) {
1681 copied = len;
1682 msg->msg_flags |= MSG_TRUNC;
1da177e4
LT
1683 }
1684
1685 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1686 if (err)
1687 goto out_free;
1688
3b885787 1689 sock_recv_ts_and_drops(msg, sk, skb);
1da177e4
LT
1690
1691 if (msg->msg_name)
ffbc6111
HX
1692 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1693 msg->msg_namelen);
1da177e4 1694
8dc41944 1695 if (pkt_sk(sk)->auxdata) {
ffbc6111
HX
1696 struct tpacket_auxdata aux;
1697
1698 aux.tp_status = TP_STATUS_USER;
1699 if (skb->ip_summed == CHECKSUM_PARTIAL)
1700 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1701 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1702 aux.tp_snaplen = skb->len;
1703 aux.tp_mac = 0;
bbe735e4 1704 aux.tp_net = skb_network_offset(skb);
05423b24 1705 aux.tp_vlan_tci = vlan_tx_tag_get(skb);
ffbc6111
HX
1706
1707 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
8dc41944
HX
1708 }
1709
1da177e4
LT
1710 /*
1711 * Free or return the buffer as appropriate. Again this
1712 * hides all the races and re-entrancy issues from us.
1713 */
bfd5f4a3 1714 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
1da177e4
LT
1715
1716out_free:
1717 skb_free_datagram(sk, skb);
1718out:
1719 return err;
1720}
1721
1da177e4
LT
1722static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1723 int *uaddr_len, int peer)
1724{
1725 struct net_device *dev;
1726 struct sock *sk = sock->sk;
1727
1728 if (peer)
1729 return -EOPNOTSUPP;
1730
1731 uaddr->sa_family = AF_PACKET;
654d1f8a
ED
1732 rcu_read_lock();
1733 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
1734 if (dev)
67286640 1735 strncpy(uaddr->sa_data, dev->name, 14);
654d1f8a 1736 else
1da177e4 1737 memset(uaddr->sa_data, 0, 14);
654d1f8a 1738 rcu_read_unlock();
1da177e4
LT
1739 *uaddr_len = sizeof(*uaddr);
1740
1741 return 0;
1742}
1da177e4
LT
1743
1744static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1745 int *uaddr_len, int peer)
1746{
1747 struct net_device *dev;
1748 struct sock *sk = sock->sk;
1749 struct packet_sock *po = pkt_sk(sk);
13cfa97b 1750 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
1da177e4
LT
1751
1752 if (peer)
1753 return -EOPNOTSUPP;
1754
1755 sll->sll_family = AF_PACKET;
1756 sll->sll_ifindex = po->ifindex;
1757 sll->sll_protocol = po->num;
67286640 1758 sll->sll_pkttype = 0;
654d1f8a
ED
1759 rcu_read_lock();
1760 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
1da177e4
LT
1761 if (dev) {
1762 sll->sll_hatype = dev->type;
1763 sll->sll_halen = dev->addr_len;
1764 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1da177e4
LT
1765 } else {
1766 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1767 sll->sll_halen = 0;
1768 }
654d1f8a 1769 rcu_read_unlock();
0fb375fb 1770 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1da177e4
LT
1771
1772 return 0;
1773}
1774
2aeb0b88
WC
1775static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1776 int what)
1da177e4
LT
1777{
1778 switch (i->type) {
1779 case PACKET_MR_MULTICAST:
1162563f
JP
1780 if (i->alen != dev->addr_len)
1781 return -EINVAL;
1da177e4 1782 if (what > 0)
22bedad3 1783 return dev_mc_add(dev, i->addr);
1da177e4 1784 else
22bedad3 1785 return dev_mc_del(dev, i->addr);
1da177e4
LT
1786 break;
1787 case PACKET_MR_PROMISC:
2aeb0b88 1788 return dev_set_promiscuity(dev, what);
1da177e4
LT
1789 break;
1790 case PACKET_MR_ALLMULTI:
2aeb0b88 1791 return dev_set_allmulti(dev, what);
1da177e4 1792 break;
d95ed927 1793 case PACKET_MR_UNICAST:
1162563f
JP
1794 if (i->alen != dev->addr_len)
1795 return -EINVAL;
d95ed927 1796 if (what > 0)
a748ee24 1797 return dev_uc_add(dev, i->addr);
d95ed927 1798 else
a748ee24 1799 return dev_uc_del(dev, i->addr);
d95ed927 1800 break;
40d4e3df
ED
1801 default:
1802 break;
1da177e4 1803 }
2aeb0b88 1804 return 0;
1da177e4
LT
1805}
1806
1807static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1808{
40d4e3df 1809 for ( ; i; i = i->next) {
1da177e4
LT
1810 if (i->ifindex == dev->ifindex)
1811 packet_dev_mc(dev, i, what);
1812 }
1813}
1814
0fb375fb 1815static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1da177e4
LT
1816{
1817 struct packet_sock *po = pkt_sk(sk);
1818 struct packet_mclist *ml, *i;
1819 struct net_device *dev;
1820 int err;
1821
1822 rtnl_lock();
1823
1824 err = -ENODEV;
3b1e0a65 1825 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
1da177e4
LT
1826 if (!dev)
1827 goto done;
1828
1829 err = -EINVAL;
1162563f 1830 if (mreq->mr_alen > dev->addr_len)
1da177e4
LT
1831 goto done;
1832
1833 err = -ENOBUFS;
8b3a7005 1834 i = kmalloc(sizeof(*i), GFP_KERNEL);
1da177e4
LT
1835 if (i == NULL)
1836 goto done;
1837
1838 err = 0;
1839 for (ml = po->mclist; ml; ml = ml->next) {
1840 if (ml->ifindex == mreq->mr_ifindex &&
1841 ml->type == mreq->mr_type &&
1842 ml->alen == mreq->mr_alen &&
1843 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1844 ml->count++;
1845 /* Free the new element ... */
1846 kfree(i);
1847 goto done;
1848 }
1849 }
1850
1851 i->type = mreq->mr_type;
1852 i->ifindex = mreq->mr_ifindex;
1853 i->alen = mreq->mr_alen;
1854 memcpy(i->addr, mreq->mr_address, i->alen);
1855 i->count = 1;
1856 i->next = po->mclist;
1857 po->mclist = i;
2aeb0b88
WC
1858 err = packet_dev_mc(dev, i, 1);
1859 if (err) {
1860 po->mclist = i->next;
1861 kfree(i);
1862 }
1da177e4
LT
1863
1864done:
1865 rtnl_unlock();
1866 return err;
1867}
1868
0fb375fb 1869static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1da177e4
LT
1870{
1871 struct packet_mclist *ml, **mlp;
1872
1873 rtnl_lock();
1874
1875 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1876 if (ml->ifindex == mreq->mr_ifindex &&
1877 ml->type == mreq->mr_type &&
1878 ml->alen == mreq->mr_alen &&
1879 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1880 if (--ml->count == 0) {
1881 struct net_device *dev;
1882 *mlp = ml->next;
ad959e76
ED
1883 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1884 if (dev)
1da177e4 1885 packet_dev_mc(dev, ml, -1);
1da177e4
LT
1886 kfree(ml);
1887 }
1888 rtnl_unlock();
1889 return 0;
1890 }
1891 }
1892 rtnl_unlock();
1893 return -EADDRNOTAVAIL;
1894}
1895
1896static void packet_flush_mclist(struct sock *sk)
1897{
1898 struct packet_sock *po = pkt_sk(sk);
1899 struct packet_mclist *ml;
1900
1901 if (!po->mclist)
1902 return;
1903
1904 rtnl_lock();
1905 while ((ml = po->mclist) != NULL) {
1906 struct net_device *dev;
1907
1908 po->mclist = ml->next;
ad959e76
ED
1909 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1910 if (dev != NULL)
1da177e4 1911 packet_dev_mc(dev, ml, -1);
1da177e4
LT
1912 kfree(ml);
1913 }
1914 rtnl_unlock();
1915}
1da177e4
LT
1916
1917static int
b7058842 1918packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
1919{
1920 struct sock *sk = sock->sk;
8dc41944 1921 struct packet_sock *po = pkt_sk(sk);
1da177e4
LT
1922 int ret;
1923
1924 if (level != SOL_PACKET)
1925 return -ENOPROTOOPT;
1926
69e3c75f 1927 switch (optname) {
1ce4f28b 1928 case PACKET_ADD_MEMBERSHIP:
1da177e4
LT
1929 case PACKET_DROP_MEMBERSHIP:
1930 {
0fb375fb
EB
1931 struct packet_mreq_max mreq;
1932 int len = optlen;
1933 memset(&mreq, 0, sizeof(mreq));
1934 if (len < sizeof(struct packet_mreq))
1da177e4 1935 return -EINVAL;
0fb375fb
EB
1936 if (len > sizeof(mreq))
1937 len = sizeof(mreq);
40d4e3df 1938 if (copy_from_user(&mreq, optval, len))
1da177e4 1939 return -EFAULT;
0fb375fb
EB
1940 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1941 return -EINVAL;
1da177e4
LT
1942 if (optname == PACKET_ADD_MEMBERSHIP)
1943 ret = packet_mc_add(sk, &mreq);
1944 else
1945 ret = packet_mc_drop(sk, &mreq);
1946 return ret;
1947 }
a2efcfa0 1948
1da177e4 1949 case PACKET_RX_RING:
69e3c75f 1950 case PACKET_TX_RING:
1da177e4
LT
1951 {
1952 struct tpacket_req req;
1953
40d4e3df 1954 if (optlen < sizeof(req))
1da177e4 1955 return -EINVAL;
bfd5f4a3
SS
1956 if (pkt_sk(sk)->has_vnet_hdr)
1957 return -EINVAL;
40d4e3df 1958 if (copy_from_user(&req, optval, sizeof(req)))
1da177e4 1959 return -EFAULT;
69e3c75f 1960 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
1da177e4
LT
1961 }
1962 case PACKET_COPY_THRESH:
1963 {
1964 int val;
1965
40d4e3df 1966 if (optlen != sizeof(val))
1da177e4 1967 return -EINVAL;
40d4e3df 1968 if (copy_from_user(&val, optval, sizeof(val)))
1da177e4
LT
1969 return -EFAULT;
1970
1971 pkt_sk(sk)->copy_thresh = val;
1972 return 0;
1973 }
bbd6ef87
PM
1974 case PACKET_VERSION:
1975 {
1976 int val;
1977
1978 if (optlen != sizeof(val))
1979 return -EINVAL;
69e3c75f 1980 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
bbd6ef87
PM
1981 return -EBUSY;
1982 if (copy_from_user(&val, optval, sizeof(val)))
1983 return -EFAULT;
1984 switch (val) {
1985 case TPACKET_V1:
1986 case TPACKET_V2:
1987 po->tp_version = val;
1988 return 0;
1989 default:
1990 return -EINVAL;
1991 }
1992 }
8913336a
PM
1993 case PACKET_RESERVE:
1994 {
1995 unsigned int val;
1996
1997 if (optlen != sizeof(val))
1998 return -EINVAL;
69e3c75f 1999 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
8913336a
PM
2000 return -EBUSY;
2001 if (copy_from_user(&val, optval, sizeof(val)))
2002 return -EFAULT;
2003 po->tp_reserve = val;
2004 return 0;
2005 }
69e3c75f
JB
2006 case PACKET_LOSS:
2007 {
2008 unsigned int val;
2009
2010 if (optlen != sizeof(val))
2011 return -EINVAL;
2012 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2013 return -EBUSY;
2014 if (copy_from_user(&val, optval, sizeof(val)))
2015 return -EFAULT;
2016 po->tp_loss = !!val;
2017 return 0;
2018 }
8dc41944
HX
2019 case PACKET_AUXDATA:
2020 {
2021 int val;
2022
2023 if (optlen < sizeof(val))
2024 return -EINVAL;
2025 if (copy_from_user(&val, optval, sizeof(val)))
2026 return -EFAULT;
2027
2028 po->auxdata = !!val;
2029 return 0;
2030 }
80feaacb
PWJ
2031 case PACKET_ORIGDEV:
2032 {
2033 int val;
2034
2035 if (optlen < sizeof(val))
2036 return -EINVAL;
2037 if (copy_from_user(&val, optval, sizeof(val)))
2038 return -EFAULT;
2039
2040 po->origdev = !!val;
2041 return 0;
2042 }
bfd5f4a3
SS
2043 case PACKET_VNET_HDR:
2044 {
2045 int val;
2046
2047 if (sock->type != SOCK_RAW)
2048 return -EINVAL;
2049 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2050 return -EBUSY;
2051 if (optlen < sizeof(val))
2052 return -EINVAL;
2053 if (copy_from_user(&val, optval, sizeof(val)))
2054 return -EFAULT;
2055
2056 po->has_vnet_hdr = !!val;
2057 return 0;
2058 }
614f60fa
SM
2059 case PACKET_TIMESTAMP:
2060 {
2061 int val;
2062
2063 if (optlen != sizeof(val))
2064 return -EINVAL;
2065 if (copy_from_user(&val, optval, sizeof(val)))
2066 return -EFAULT;
2067
2068 po->tp_tstamp = val;
2069 return 0;
2070 }
1da177e4
LT
2071 default:
2072 return -ENOPROTOOPT;
2073 }
2074}
2075
2076static int packet_getsockopt(struct socket *sock, int level, int optname,
2077 char __user *optval, int __user *optlen)
2078{
2079 int len;
8dc41944 2080 int val;
1da177e4
LT
2081 struct sock *sk = sock->sk;
2082 struct packet_sock *po = pkt_sk(sk);
8dc41944
HX
2083 void *data;
2084 struct tpacket_stats st;
1da177e4
LT
2085
2086 if (level != SOL_PACKET)
2087 return -ENOPROTOOPT;
2088
8ae55f04
KK
2089 if (get_user(len, optlen))
2090 return -EFAULT;
1da177e4
LT
2091
2092 if (len < 0)
2093 return -EINVAL;
1ce4f28b 2094
69e3c75f 2095 switch (optname) {
1da177e4 2096 case PACKET_STATISTICS:
1da177e4
LT
2097 if (len > sizeof(struct tpacket_stats))
2098 len = sizeof(struct tpacket_stats);
2099 spin_lock_bh(&sk->sk_receive_queue.lock);
2100 st = po->stats;
2101 memset(&po->stats, 0, sizeof(st));
2102 spin_unlock_bh(&sk->sk_receive_queue.lock);
2103 st.tp_packets += st.tp_drops;
2104
8dc41944
HX
2105 data = &st;
2106 break;
2107 case PACKET_AUXDATA:
2108 if (len > sizeof(int))
2109 len = sizeof(int);
2110 val = po->auxdata;
2111
80feaacb
PWJ
2112 data = &val;
2113 break;
2114 case PACKET_ORIGDEV:
2115 if (len > sizeof(int))
2116 len = sizeof(int);
2117 val = po->origdev;
2118
bfd5f4a3
SS
2119 data = &val;
2120 break;
2121 case PACKET_VNET_HDR:
2122 if (len > sizeof(int))
2123 len = sizeof(int);
2124 val = po->has_vnet_hdr;
2125
8dc41944 2126 data = &val;
1da177e4 2127 break;
bbd6ef87
PM
2128 case PACKET_VERSION:
2129 if (len > sizeof(int))
2130 len = sizeof(int);
2131 val = po->tp_version;
2132 data = &val;
2133 break;
2134 case PACKET_HDRLEN:
2135 if (len > sizeof(int))
2136 len = sizeof(int);
2137 if (copy_from_user(&val, optval, len))
2138 return -EFAULT;
2139 switch (val) {
2140 case TPACKET_V1:
2141 val = sizeof(struct tpacket_hdr);
2142 break;
2143 case TPACKET_V2:
2144 val = sizeof(struct tpacket2_hdr);
2145 break;
2146 default:
2147 return -EINVAL;
2148 }
2149 data = &val;
2150 break;
8913336a
PM
2151 case PACKET_RESERVE:
2152 if (len > sizeof(unsigned int))
2153 len = sizeof(unsigned int);
2154 val = po->tp_reserve;
2155 data = &val;
2156 break;
69e3c75f
JB
2157 case PACKET_LOSS:
2158 if (len > sizeof(unsigned int))
2159 len = sizeof(unsigned int);
2160 val = po->tp_loss;
2161 data = &val;
2162 break;
614f60fa
SM
2163 case PACKET_TIMESTAMP:
2164 if (len > sizeof(int))
2165 len = sizeof(int);
2166 val = po->tp_tstamp;
2167 data = &val;
2168 break;
1da177e4
LT
2169 default:
2170 return -ENOPROTOOPT;
2171 }
2172
8ae55f04
KK
2173 if (put_user(len, optlen))
2174 return -EFAULT;
8dc41944
HX
2175 if (copy_to_user(optval, data, len))
2176 return -EFAULT;
8ae55f04 2177 return 0;
1da177e4
LT
2178}
2179
2180
2181static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
2182{
2183 struct sock *sk;
2184 struct hlist_node *node;
ad930650 2185 struct net_device *dev = data;
c346dca1 2186 struct net *net = dev_net(dev);
1da177e4 2187
808f5114 2188 rcu_read_lock();
2189 sk_for_each_rcu(sk, node, &net->packet.sklist) {
1da177e4
LT
2190 struct packet_sock *po = pkt_sk(sk);
2191
2192 switch (msg) {
2193 case NETDEV_UNREGISTER:
1da177e4
LT
2194 if (po->mclist)
2195 packet_dev_mclist(dev, po->mclist, -1);
a2efcfa0
DM
2196 /* fallthrough */
2197
1da177e4
LT
2198 case NETDEV_DOWN:
2199 if (dev->ifindex == po->ifindex) {
2200 spin_lock(&po->bind_lock);
2201 if (po->running) {
2202 __dev_remove_pack(&po->prot_hook);
2203 __sock_put(sk);
2204 po->running = 0;
2205 sk->sk_err = ENETDOWN;
2206 if (!sock_flag(sk, SOCK_DEAD))
2207 sk->sk_error_report(sk);
2208 }
2209 if (msg == NETDEV_UNREGISTER) {
2210 po->ifindex = -1;
2211 po->prot_hook.dev = NULL;
2212 }
2213 spin_unlock(&po->bind_lock);
2214 }
2215 break;
2216 case NETDEV_UP:
808f5114 2217 if (dev->ifindex == po->ifindex) {
2218 spin_lock(&po->bind_lock);
2219 if (po->num && !po->running) {
2220 dev_add_pack(&po->prot_hook);
2221 sock_hold(sk);
2222 po->running = 1;
2223 }
2224 spin_unlock(&po->bind_lock);
1da177e4 2225 }
1da177e4
LT
2226 break;
2227 }
2228 }
808f5114 2229 rcu_read_unlock();
1da177e4
LT
2230 return NOTIFY_DONE;
2231}
2232
2233
2234static int packet_ioctl(struct socket *sock, unsigned int cmd,
2235 unsigned long arg)
2236{
2237 struct sock *sk = sock->sk;
2238
69e3c75f 2239 switch (cmd) {
40d4e3df
ED
2240 case SIOCOUTQ:
2241 {
2242 int amount = sk_wmem_alloc_get(sk);
31e6d363 2243
40d4e3df
ED
2244 return put_user(amount, (int __user *)arg);
2245 }
2246 case SIOCINQ:
2247 {
2248 struct sk_buff *skb;
2249 int amount = 0;
2250
2251 spin_lock_bh(&sk->sk_receive_queue.lock);
2252 skb = skb_peek(&sk->sk_receive_queue);
2253 if (skb)
2254 amount = skb->len;
2255 spin_unlock_bh(&sk->sk_receive_queue.lock);
2256 return put_user(amount, (int __user *)arg);
2257 }
2258 case SIOCGSTAMP:
2259 return sock_get_timestamp(sk, (struct timeval __user *)arg);
2260 case SIOCGSTAMPNS:
2261 return sock_get_timestampns(sk, (struct timespec __user *)arg);
1ce4f28b 2262
1da177e4 2263#ifdef CONFIG_INET
40d4e3df
ED
2264 case SIOCADDRT:
2265 case SIOCDELRT:
2266 case SIOCDARP:
2267 case SIOCGARP:
2268 case SIOCSARP:
2269 case SIOCGIFADDR:
2270 case SIOCSIFADDR:
2271 case SIOCGIFBRDADDR:
2272 case SIOCSIFBRDADDR:
2273 case SIOCGIFNETMASK:
2274 case SIOCSIFNETMASK:
2275 case SIOCGIFDSTADDR:
2276 case SIOCSIFDSTADDR:
2277 case SIOCSIFFLAGS:
40d4e3df 2278 return inet_dgram_ops.ioctl(sock, cmd, arg);
1da177e4
LT
2279#endif
2280
40d4e3df
ED
2281 default:
2282 return -ENOIOCTLCMD;
1da177e4
LT
2283 }
2284 return 0;
2285}
2286
40d4e3df 2287static unsigned int packet_poll(struct file *file, struct socket *sock,
1da177e4
LT
2288 poll_table *wait)
2289{
2290 struct sock *sk = sock->sk;
2291 struct packet_sock *po = pkt_sk(sk);
2292 unsigned int mask = datagram_poll(file, sock, wait);
2293
2294 spin_lock_bh(&sk->sk_receive_queue.lock);
69e3c75f
JB
2295 if (po->rx_ring.pg_vec) {
2296 if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
1da177e4
LT
2297 mask |= POLLIN | POLLRDNORM;
2298 }
2299 spin_unlock_bh(&sk->sk_receive_queue.lock);
69e3c75f
JB
2300 spin_lock_bh(&sk->sk_write_queue.lock);
2301 if (po->tx_ring.pg_vec) {
2302 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
2303 mask |= POLLOUT | POLLWRNORM;
2304 }
2305 spin_unlock_bh(&sk->sk_write_queue.lock);
1da177e4
LT
2306 return mask;
2307}
2308
2309
2310/* Dirty? Well, I still did not learn better way to account
2311 * for user mmaps.
2312 */
2313
2314static void packet_mm_open(struct vm_area_struct *vma)
2315{
2316 struct file *file = vma->vm_file;
40d4e3df 2317 struct socket *sock = file->private_data;
1da177e4 2318 struct sock *sk = sock->sk;
1ce4f28b 2319
1da177e4
LT
2320 if (sk)
2321 atomic_inc(&pkt_sk(sk)->mapped);
2322}
2323
2324static void packet_mm_close(struct vm_area_struct *vma)
2325{
2326 struct file *file = vma->vm_file;
40d4e3df 2327 struct socket *sock = file->private_data;
1da177e4 2328 struct sock *sk = sock->sk;
1ce4f28b 2329
1da177e4
LT
2330 if (sk)
2331 atomic_dec(&pkt_sk(sk)->mapped);
2332}
2333
f0f37e2f 2334static const struct vm_operations_struct packet_mmap_ops = {
40d4e3df
ED
2335 .open = packet_mm_open,
2336 .close = packet_mm_close,
1da177e4
LT
2337};
2338
0e3125c7
NH
2339static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
2340 unsigned int len)
1da177e4
LT
2341{
2342 int i;
2343
4ebf0ae2 2344 for (i = 0; i < len; i++) {
0e3125c7 2345 if (likely(pg_vec[i].buffer)) {
c56b4d90 2346 if (is_vmalloc_addr(pg_vec[i].buffer))
0e3125c7
NH
2347 vfree(pg_vec[i].buffer);
2348 else
2349 free_pages((unsigned long)pg_vec[i].buffer,
2350 order);
2351 pg_vec[i].buffer = NULL;
2352 }
1da177e4
LT
2353 }
2354 kfree(pg_vec);
2355}
2356
c56b4d90 2357static inline char *alloc_one_pg_vec_page(unsigned long order)
4ebf0ae2 2358{
0e3125c7
NH
2359 char *buffer = NULL;
2360 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
2361 __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
2362
2363 buffer = (char *) __get_free_pages(gfp_flags, order);
2364
2365 if (buffer)
2366 return buffer;
2367
2368 /*
2369 * __get_free_pages failed, fall back to vmalloc
2370 */
bbce5a59 2371 buffer = vzalloc((1 << order) * PAGE_SIZE);
719bfeaa 2372
0e3125c7
NH
2373 if (buffer)
2374 return buffer;
2375
2376 /*
2377 * vmalloc failed, lets dig into swap here
2378 */
0e3125c7
NH
2379 gfp_flags &= ~__GFP_NORETRY;
2380 buffer = (char *)__get_free_pages(gfp_flags, order);
2381 if (buffer)
2382 return buffer;
2383
2384 /*
2385 * complete and utter failure
2386 */
2387 return NULL;
4ebf0ae2
DM
2388}
2389
0e3125c7 2390static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
4ebf0ae2
DM
2391{
2392 unsigned int block_nr = req->tp_block_nr;
0e3125c7 2393 struct pgv *pg_vec;
4ebf0ae2
DM
2394 int i;
2395
0e3125c7 2396 pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
4ebf0ae2
DM
2397 if (unlikely(!pg_vec))
2398 goto out;
2399
2400 for (i = 0; i < block_nr; i++) {
c56b4d90 2401 pg_vec[i].buffer = alloc_one_pg_vec_page(order);
0e3125c7 2402 if (unlikely(!pg_vec[i].buffer))
4ebf0ae2
DM
2403 goto out_free_pgvec;
2404 }
2405
2406out:
2407 return pg_vec;
2408
2409out_free_pgvec:
2410 free_pg_vec(pg_vec, order, block_nr);
2411 pg_vec = NULL;
2412 goto out;
2413}
1da177e4 2414
69e3c75f
JB
2415static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2416 int closing, int tx_ring)
1da177e4 2417{
0e3125c7 2418 struct pgv *pg_vec = NULL;
1da177e4 2419 struct packet_sock *po = pkt_sk(sk);
0e11c91e 2420 int was_running, order = 0;
69e3c75f
JB
2421 struct packet_ring_buffer *rb;
2422 struct sk_buff_head *rb_queue;
0e11c91e 2423 __be16 num;
69e3c75f 2424 int err;
1ce4f28b 2425
69e3c75f
JB
2426 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
2427 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
1da177e4 2428
69e3c75f
JB
2429 err = -EBUSY;
2430 if (!closing) {
2431 if (atomic_read(&po->mapped))
2432 goto out;
2433 if (atomic_read(&rb->pending))
2434 goto out;
2435 }
1da177e4 2436
69e3c75f
JB
2437 if (req->tp_block_nr) {
2438 /* Sanity tests and some calculations */
2439 err = -EBUSY;
2440 if (unlikely(rb->pg_vec))
2441 goto out;
1da177e4 2442
bbd6ef87
PM
2443 switch (po->tp_version) {
2444 case TPACKET_V1:
2445 po->tp_hdrlen = TPACKET_HDRLEN;
2446 break;
2447 case TPACKET_V2:
2448 po->tp_hdrlen = TPACKET2_HDRLEN;
2449 break;
2450 }
2451
69e3c75f 2452 err = -EINVAL;
4ebf0ae2 2453 if (unlikely((int)req->tp_block_size <= 0))
69e3c75f 2454 goto out;
4ebf0ae2 2455 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
69e3c75f 2456 goto out;
8913336a 2457 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
69e3c75f
JB
2458 po->tp_reserve))
2459 goto out;
4ebf0ae2 2460 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
69e3c75f 2461 goto out;
1da177e4 2462
69e3c75f
JB
2463 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
2464 if (unlikely(rb->frames_per_block <= 0))
2465 goto out;
2466 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
2467 req->tp_frame_nr))
2468 goto out;
1da177e4
LT
2469
2470 err = -ENOMEM;
4ebf0ae2
DM
2471 order = get_order(req->tp_block_size);
2472 pg_vec = alloc_pg_vec(req, order);
2473 if (unlikely(!pg_vec))
1da177e4 2474 goto out;
69e3c75f
JB
2475 }
2476 /* Done */
2477 else {
2478 err = -EINVAL;
4ebf0ae2 2479 if (unlikely(req->tp_frame_nr))
69e3c75f 2480 goto out;
1da177e4
LT
2481 }
2482
2483 lock_sock(sk);
2484
2485 /* Detach socket from network */
2486 spin_lock(&po->bind_lock);
2487 was_running = po->running;
2488 num = po->num;
2489 if (was_running) {
2490 __dev_remove_pack(&po->prot_hook);
2491 po->num = 0;
2492 po->running = 0;
2493 __sock_put(sk);
2494 }
2495 spin_unlock(&po->bind_lock);
1ce4f28b 2496
1da177e4
LT
2497 synchronize_net();
2498
2499 err = -EBUSY;
905db440 2500 mutex_lock(&po->pg_vec_lock);
1da177e4
LT
2501 if (closing || atomic_read(&po->mapped) == 0) {
2502 err = 0;
69e3c75f 2503 spin_lock_bh(&rb_queue->lock);
c053fd96 2504 swap(rb->pg_vec, pg_vec);
69e3c75f
JB
2505 rb->frame_max = (req->tp_frame_nr - 1);
2506 rb->head = 0;
2507 rb->frame_size = req->tp_frame_size;
2508 spin_unlock_bh(&rb_queue->lock);
2509
c053fd96
CG
2510 swap(rb->pg_vec_order, order);
2511 swap(rb->pg_vec_len, req->tp_block_nr);
69e3c75f
JB
2512
2513 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
2514 po->prot_hook.func = (po->rx_ring.pg_vec) ?
2515 tpacket_rcv : packet_rcv;
2516 skb_queue_purge(rb_queue);
1da177e4 2517 if (atomic_read(&po->mapped))
40d4e3df
ED
2518 pr_err("packet_mmap: vma is busy: %d\n",
2519 atomic_read(&po->mapped));
1da177e4 2520 }
905db440 2521 mutex_unlock(&po->pg_vec_lock);
1da177e4
LT
2522
2523 spin_lock(&po->bind_lock);
2524 if (was_running && !po->running) {
2525 sock_hold(sk);
2526 po->running = 1;
2527 po->num = num;
2528 dev_add_pack(&po->prot_hook);
2529 }
2530 spin_unlock(&po->bind_lock);
2531
2532 release_sock(sk);
2533
1da177e4
LT
2534 if (pg_vec)
2535 free_pg_vec(pg_vec, order, req->tp_block_nr);
2536out:
2537 return err;
2538}
2539
69e3c75f
JB
2540static int packet_mmap(struct file *file, struct socket *sock,
2541 struct vm_area_struct *vma)
1da177e4
LT
2542{
2543 struct sock *sk = sock->sk;
2544 struct packet_sock *po = pkt_sk(sk);
69e3c75f
JB
2545 unsigned long size, expected_size;
2546 struct packet_ring_buffer *rb;
1da177e4
LT
2547 unsigned long start;
2548 int err = -EINVAL;
2549 int i;
2550
2551 if (vma->vm_pgoff)
2552 return -EINVAL;
2553
905db440 2554 mutex_lock(&po->pg_vec_lock);
69e3c75f
JB
2555
2556 expected_size = 0;
2557 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2558 if (rb->pg_vec) {
2559 expected_size += rb->pg_vec_len
2560 * rb->pg_vec_pages
2561 * PAGE_SIZE;
2562 }
2563 }
2564
2565 if (expected_size == 0)
1da177e4 2566 goto out;
69e3c75f
JB
2567
2568 size = vma->vm_end - vma->vm_start;
2569 if (size != expected_size)
1da177e4
LT
2570 goto out;
2571
1da177e4 2572 start = vma->vm_start;
69e3c75f
JB
2573 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2574 if (rb->pg_vec == NULL)
2575 continue;
2576
2577 for (i = 0; i < rb->pg_vec_len; i++) {
0e3125c7
NH
2578 struct page *page;
2579 void *kaddr = rb->pg_vec[i].buffer;
69e3c75f
JB
2580 int pg_num;
2581
c56b4d90
CG
2582 for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
2583 page = pgv_to_page(kaddr);
69e3c75f
JB
2584 err = vm_insert_page(vma, start, page);
2585 if (unlikely(err))
2586 goto out;
2587 start += PAGE_SIZE;
0e3125c7 2588 kaddr += PAGE_SIZE;
69e3c75f 2589 }
4ebf0ae2 2590 }
1da177e4 2591 }
69e3c75f 2592
4ebf0ae2 2593 atomic_inc(&po->mapped);
1da177e4
LT
2594 vma->vm_ops = &packet_mmap_ops;
2595 err = 0;
2596
2597out:
905db440 2598 mutex_unlock(&po->pg_vec_lock);
1da177e4
LT
2599 return err;
2600}
1da177e4 2601
90ddc4f0 2602static const struct proto_ops packet_ops_spkt = {
1da177e4
LT
2603 .family = PF_PACKET,
2604 .owner = THIS_MODULE,
2605 .release = packet_release,
2606 .bind = packet_bind_spkt,
2607 .connect = sock_no_connect,
2608 .socketpair = sock_no_socketpair,
2609 .accept = sock_no_accept,
2610 .getname = packet_getname_spkt,
2611 .poll = datagram_poll,
2612 .ioctl = packet_ioctl,
2613 .listen = sock_no_listen,
2614 .shutdown = sock_no_shutdown,
2615 .setsockopt = sock_no_setsockopt,
2616 .getsockopt = sock_no_getsockopt,
2617 .sendmsg = packet_sendmsg_spkt,
2618 .recvmsg = packet_recvmsg,
2619 .mmap = sock_no_mmap,
2620 .sendpage = sock_no_sendpage,
2621};
1da177e4 2622
90ddc4f0 2623static const struct proto_ops packet_ops = {
1da177e4
LT
2624 .family = PF_PACKET,
2625 .owner = THIS_MODULE,
2626 .release = packet_release,
2627 .bind = packet_bind,
2628 .connect = sock_no_connect,
2629 .socketpair = sock_no_socketpair,
2630 .accept = sock_no_accept,
1ce4f28b 2631 .getname = packet_getname,
1da177e4
LT
2632 .poll = packet_poll,
2633 .ioctl = packet_ioctl,
2634 .listen = sock_no_listen,
2635 .shutdown = sock_no_shutdown,
2636 .setsockopt = packet_setsockopt,
2637 .getsockopt = packet_getsockopt,
2638 .sendmsg = packet_sendmsg,
2639 .recvmsg = packet_recvmsg,
2640 .mmap = packet_mmap,
2641 .sendpage = sock_no_sendpage,
2642};
2643
ec1b4cf7 2644static const struct net_proto_family packet_family_ops = {
1da177e4
LT
2645 .family = PF_PACKET,
2646 .create = packet_create,
2647 .owner = THIS_MODULE,
2648};
2649
2650static struct notifier_block packet_netdev_notifier = {
40d4e3df 2651 .notifier_call = packet_notifier,
1da177e4
LT
2652};
2653
2654#ifdef CONFIG_PROC_FS
1da177e4
LT
2655
2656static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
808f5114 2657 __acquires(RCU)
1da177e4 2658{
e372c414 2659 struct net *net = seq_file_net(seq);
808f5114 2660
2661 rcu_read_lock();
2662 return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
1da177e4
LT
2663}
2664
2665static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2666{
1bf40954 2667 struct net *net = seq_file_net(seq);
808f5114 2668 return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
1da177e4
LT
2669}
2670
2671static void packet_seq_stop(struct seq_file *seq, void *v)
808f5114 2672 __releases(RCU)
1da177e4 2673{
808f5114 2674 rcu_read_unlock();
1da177e4
LT
2675}
2676
1ce4f28b 2677static int packet_seq_show(struct seq_file *seq, void *v)
1da177e4
LT
2678{
2679 if (v == SEQ_START_TOKEN)
2680 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2681 else {
b7ceabd9 2682 struct sock *s = sk_entry(v);
1da177e4
LT
2683 const struct packet_sock *po = pkt_sk(s);
2684
2685 seq_printf(seq,
2686 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
2687 s,
2688 atomic_read(&s->sk_refcnt),
2689 s->sk_type,
2690 ntohs(po->num),
2691 po->ifindex,
2692 po->running,
2693 atomic_read(&s->sk_rmem_alloc),
2694 sock_i_uid(s),
40d4e3df 2695 sock_i_ino(s));
1da177e4
LT
2696 }
2697
2698 return 0;
2699}
2700
56b3d975 2701static const struct seq_operations packet_seq_ops = {
1da177e4
LT
2702 .start = packet_seq_start,
2703 .next = packet_seq_next,
2704 .stop = packet_seq_stop,
2705 .show = packet_seq_show,
2706};
2707
2708static int packet_seq_open(struct inode *inode, struct file *file)
2709{
e372c414
DL
2710 return seq_open_net(inode, file, &packet_seq_ops,
2711 sizeof(struct seq_net_private));
1da177e4
LT
2712}
2713
da7071d7 2714static const struct file_operations packet_seq_fops = {
1da177e4
LT
2715 .owner = THIS_MODULE,
2716 .open = packet_seq_open,
2717 .read = seq_read,
2718 .llseek = seq_lseek,
e372c414 2719 .release = seq_release_net,
1da177e4
LT
2720};
2721
2722#endif
2723
2c8c1e72 2724static int __net_init packet_net_init(struct net *net)
d12d01d6 2725{
808f5114 2726 spin_lock_init(&net->packet.sklist_lock);
2aaef4e4 2727 INIT_HLIST_HEAD(&net->packet.sklist);
d12d01d6
DL
2728
2729 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2730 return -ENOMEM;
2731
2732 return 0;
2733}
2734
2c8c1e72 2735static void __net_exit packet_net_exit(struct net *net)
d12d01d6
DL
2736{
2737 proc_net_remove(net, "packet");
2738}
2739
2740static struct pernet_operations packet_net_ops = {
2741 .init = packet_net_init,
2742 .exit = packet_net_exit,
2743};
2744
2745
1da177e4
LT
2746static void __exit packet_exit(void)
2747{
1da177e4 2748 unregister_netdevice_notifier(&packet_netdev_notifier);
d12d01d6 2749 unregister_pernet_subsys(&packet_net_ops);
1da177e4
LT
2750 sock_unregister(PF_PACKET);
2751 proto_unregister(&packet_proto);
2752}
2753
2754static int __init packet_init(void)
2755{
2756 int rc = proto_register(&packet_proto, 0);
2757
2758 if (rc != 0)
2759 goto out;
2760
2761 sock_register(&packet_family_ops);
d12d01d6 2762 register_pernet_subsys(&packet_net_ops);
1da177e4 2763 register_netdevice_notifier(&packet_netdev_notifier);
1da177e4
LT
2764out:
2765 return rc;
2766}
2767
2768module_init(packet_init);
2769module_exit(packet_exit);
2770MODULE_LICENSE("GPL");
2771MODULE_ALIAS_NETPROTO(PF_PACKET);