]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blob - net/packet/af_packet.c
[AF_PACKET]: Kill bogus CONFIG_PACKET_MULTICAST
[mirror_ubuntu-zesty-kernel.git] / net / packet / af_packet.c
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
8 * Version: $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
9 *
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Alan Cox, <gw4pts@gw4pts.ampr.org>
13 *
14 * Fixes:
15 * Alan Cox : verify_area() now used correctly
16 * Alan Cox : new skbuff lists, look ma no backlogs!
17 * Alan Cox : tidied skbuff lists.
18 * Alan Cox : Now uses generic datagram routines I
19 * added. Also fixed the peek/read crash
20 * from all old Linux datagram code.
21 * Alan Cox : Uses the improved datagram code.
22 * Alan Cox : Added NULL's for socket options.
23 * Alan Cox : Re-commented the code.
24 * Alan Cox : Use new kernel side addressing
25 * Rob Janssen : Correct MTU usage.
26 * Dave Platt : Counter leaks caused by incorrect
27 * interrupt locking and some slightly
28 * dubious gcc output. Can you read
29 * compiler: it said _VOLATILE_
30 * Richard Kooijman : Timestamp fixes.
31 * Alan Cox : New buffers. Use sk->mac.raw.
32 * Alan Cox : sendmsg/recvmsg support.
33 * Alan Cox : Protocol setting support
34 * Alexey Kuznetsov : Untied from IPv4 stack.
35 * Cyrus Durgin : Fixed kerneld for kmod.
36 * Michal Ostrowski : Module initialization cleanup.
37 * Ulises Alonso : Frame number limit removal and
38 * packet_set_ring memory leak.
39 * Eric Biederman : Allow for > 8 byte hardware addresses.
40 * The convention is that longer addresses
41 * will simply extend the hardware address
42 * byte arrays at the end of sockaddr_ll
43 * and packet_mreq.
44 *
45 * This program is free software; you can redistribute it and/or
46 * modify it under the terms of the GNU General Public License
47 * as published by the Free Software Foundation; either version
48 * 2 of the License, or (at your option) any later version.
49 *
50 */
51
52 #include <linux/types.h>
53 #include <linux/mm.h>
54 #include <linux/capability.h>
55 #include <linux/fcntl.h>
56 #include <linux/socket.h>
57 #include <linux/in.h>
58 #include <linux/inet.h>
59 #include <linux/netdevice.h>
60 #include <linux/if_packet.h>
61 #include <linux/wireless.h>
62 #include <linux/kernel.h>
63 #include <linux/kmod.h>
64 #include <net/ip.h>
65 #include <net/protocol.h>
66 #include <linux/skbuff.h>
67 #include <net/sock.h>
68 #include <linux/errno.h>
69 #include <linux/timer.h>
70 #include <asm/system.h>
71 #include <asm/uaccess.h>
72 #include <asm/ioctls.h>
73 #include <asm/page.h>
74 #include <asm/cacheflush.h>
75 #include <asm/io.h>
76 #include <linux/proc_fs.h>
77 #include <linux/seq_file.h>
78 #include <linux/poll.h>
79 #include <linux/module.h>
80 #include <linux/init.h>
81
82 #ifdef CONFIG_INET
83 #include <net/inet_common.h>
84 #endif
85
86 #define CONFIG_SOCK_PACKET 1
87
88 /*
89 Assumptions:
90 - if device has no dev->hard_header routine, it adds and removes ll header
91 inside itself. In this case ll header is invisible outside of device,
92 but higher levels still should reserve dev->hard_header_len.
93 Some devices are enough clever to reallocate skb, when header
94 will not fit to reserved space (tunnel), another ones are silly
95 (PPP).
96 - packet socket receives packets with pulled ll header,
97 so that SOCK_RAW should push it back.
98
99 On receive:
100 -----------
101
102 Incoming, dev->hard_header!=NULL
103 mac_header -> ll header
104 data -> data
105
106 Outgoing, dev->hard_header!=NULL
107 mac_header -> ll header
108 data -> ll header
109
110 Incoming, dev->hard_header==NULL
111 mac_header -> UNKNOWN position. It is very likely, that it points to ll
112 header. PPP makes it, that is wrong, because introduce
113 assymetry between rx and tx paths.
114 data -> data
115
116 Outgoing, dev->hard_header==NULL
117 mac_header -> data. ll header is still not built!
118 data -> data
119
120 Resume
121 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
122
123
124 On transmit:
125 ------------
126
127 dev->hard_header != NULL
128 mac_header -> ll header
129 data -> ll header
130
131 dev->hard_header == NULL (ll header is added by device, we cannot control it)
132 mac_header -> data
133 data -> data
134
135 We should set nh.raw on output to correct posistion,
136 packet classifier depends on it.
137 */
138
139 /* List of all packet sockets. */
140 static HLIST_HEAD(packet_sklist);
141 static DEFINE_RWLOCK(packet_sklist_lock);
142
143 static atomic_t packet_socks_nr;
144
145
146 /* Private packet socket structures. */
147
148 struct packet_mclist
149 {
150 struct packet_mclist *next;
151 int ifindex;
152 int count;
153 unsigned short type;
154 unsigned short alen;
155 unsigned char addr[MAX_ADDR_LEN];
156 };
157 /* identical to struct packet_mreq except it has
158 * a longer address field.
159 */
160 struct packet_mreq_max
161 {
162 int mr_ifindex;
163 unsigned short mr_type;
164 unsigned short mr_alen;
165 unsigned char mr_address[MAX_ADDR_LEN];
166 };
167
168 #ifdef CONFIG_PACKET_MMAP
169 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
170 #endif
171
172 static void packet_flush_mclist(struct sock *sk);
173
174 struct packet_sock {
175 /* struct sock has to be the first member of packet_sock */
176 struct sock sk;
177 struct tpacket_stats stats;
178 #ifdef CONFIG_PACKET_MMAP
179 char * *pg_vec;
180 unsigned int head;
181 unsigned int frames_per_block;
182 unsigned int frame_size;
183 unsigned int frame_max;
184 int copy_thresh;
185 #endif
186 struct packet_type prot_hook;
187 spinlock_t bind_lock;
188 unsigned int running:1, /* prot_hook is attached*/
189 auxdata:1,
190 origdev:1;
191 int ifindex; /* bound device */
192 __be16 num;
193 struct packet_mclist *mclist;
194 #ifdef CONFIG_PACKET_MMAP
195 atomic_t mapped;
196 unsigned int pg_vec_order;
197 unsigned int pg_vec_pages;
198 unsigned int pg_vec_len;
199 #endif
200 };
201
202 struct packet_skb_cb {
203 unsigned int origlen;
204 union {
205 struct sockaddr_pkt pkt;
206 struct sockaddr_ll ll;
207 } sa;
208 };
209
210 #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
211
212 #ifdef CONFIG_PACKET_MMAP
213
214 static inline struct tpacket_hdr *packet_lookup_frame(struct packet_sock *po, unsigned int position)
215 {
216 unsigned int pg_vec_pos, frame_offset;
217
218 pg_vec_pos = position / po->frames_per_block;
219 frame_offset = position % po->frames_per_block;
220
221 return (struct tpacket_hdr *)(po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size));
222 }
223 #endif
224
225 static inline struct packet_sock *pkt_sk(struct sock *sk)
226 {
227 return (struct packet_sock *)sk;
228 }
229
230 static void packet_sock_destruct(struct sock *sk)
231 {
232 BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
233 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
234
235 if (!sock_flag(sk, SOCK_DEAD)) {
236 printk("Attempt to release alive packet socket: %p\n", sk);
237 return;
238 }
239
240 atomic_dec(&packet_socks_nr);
241 #ifdef PACKET_REFCNT_DEBUG
242 printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
243 #endif
244 }
245
246
247 static const struct proto_ops packet_ops;
248
249 #ifdef CONFIG_SOCK_PACKET
250 static const struct proto_ops packet_ops_spkt;
251
252 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
253 {
254 struct sock *sk;
255 struct sockaddr_pkt *spkt;
256
257 /*
258 * When we registered the protocol we saved the socket in the data
259 * field for just this event.
260 */
261
262 sk = pt->af_packet_priv;
263
264 /*
265 * Yank back the headers [hope the device set this
266 * right or kerboom...]
267 *
268 * Incoming packets have ll header pulled,
269 * push it back.
270 *
271 * For outgoing ones skb->data == skb_mac_header(skb)
272 * so that this procedure is noop.
273 */
274
275 if (skb->pkt_type == PACKET_LOOPBACK)
276 goto out;
277
278 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
279 goto oom;
280
281 /* drop any routing info */
282 dst_release(skb->dst);
283 skb->dst = NULL;
284
285 /* drop conntrack reference */
286 nf_reset(skb);
287
288 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
289
290 skb_push(skb, skb->data - skb_mac_header(skb));
291
292 /*
293 * The SOCK_PACKET socket receives _all_ frames.
294 */
295
296 spkt->spkt_family = dev->type;
297 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
298 spkt->spkt_protocol = skb->protocol;
299
300 /*
301 * Charge the memory to the socket. This is done specifically
302 * to prevent sockets using all the memory up.
303 */
304
305 if (sock_queue_rcv_skb(sk,skb) == 0)
306 return 0;
307
308 out:
309 kfree_skb(skb);
310 oom:
311 return 0;
312 }
313
314
315 /*
316 * Output a raw packet to a device layer. This bypasses all the other
317 * protocol layers and you must therefore supply it with a complete frame
318 */
319
320 static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
321 struct msghdr *msg, size_t len)
322 {
323 struct sock *sk = sock->sk;
324 struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
325 struct sk_buff *skb;
326 struct net_device *dev;
327 __be16 proto=0;
328 int err;
329
330 /*
331 * Get and verify the address.
332 */
333
334 if (saddr)
335 {
336 if (msg->msg_namelen < sizeof(struct sockaddr))
337 return(-EINVAL);
338 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
339 proto=saddr->spkt_protocol;
340 }
341 else
342 return(-ENOTCONN); /* SOCK_PACKET must be sent giving an address */
343
344 /*
345 * Find the device first to size check it
346 */
347
348 saddr->spkt_device[13] = 0;
349 dev = dev_get_by_name(saddr->spkt_device);
350 err = -ENODEV;
351 if (dev == NULL)
352 goto out_unlock;
353
354 err = -ENETDOWN;
355 if (!(dev->flags & IFF_UP))
356 goto out_unlock;
357
358 /*
359 * You may not queue a frame bigger than the mtu. This is the lowest level
360 * raw protocol and you must do your own fragmentation at this level.
361 */
362
363 err = -EMSGSIZE;
364 if (len > dev->mtu + dev->hard_header_len)
365 goto out_unlock;
366
367 err = -ENOBUFS;
368 skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
369
370 /*
371 * If the write buffer is full, then tough. At this level the user gets to
372 * deal with the problem - do your own algorithmic backoffs. That's far
373 * more flexible.
374 */
375
376 if (skb == NULL)
377 goto out_unlock;
378
379 /*
380 * Fill it in
381 */
382
383 /* FIXME: Save some space for broken drivers that write a
384 * hard header at transmission time by themselves. PPP is the
385 * notable one here. This should really be fixed at the driver level.
386 */
387 skb_reserve(skb, LL_RESERVED_SPACE(dev));
388 skb_reset_network_header(skb);
389
390 /* Try to align data part correctly */
391 if (dev->hard_header) {
392 skb->data -= dev->hard_header_len;
393 skb->tail -= dev->hard_header_len;
394 if (len < dev->hard_header_len)
395 skb_reset_network_header(skb);
396 }
397
398 /* Returns -EFAULT on error */
399 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
400 skb->protocol = proto;
401 skb->dev = dev;
402 skb->priority = sk->sk_priority;
403 if (err)
404 goto out_free;
405
406 /*
407 * Now send it
408 */
409
410 dev_queue_xmit(skb);
411 dev_put(dev);
412 return(len);
413
414 out_free:
415 kfree_skb(skb);
416 out_unlock:
417 if (dev)
418 dev_put(dev);
419 return err;
420 }
421 #endif
422
423 static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
424 unsigned int res)
425 {
426 struct sk_filter *filter;
427
428 rcu_read_lock_bh();
429 filter = rcu_dereference(sk->sk_filter);
430 if (filter != NULL)
431 res = sk_run_filter(skb, filter->insns, filter->len);
432 rcu_read_unlock_bh();
433
434 return res;
435 }
436
437 /*
438 This function makes lazy skb cloning in hope that most of packets
439 are discarded by BPF.
440
441 Note tricky part: we DO mangle shared skb! skb->data, skb->len
442 and skb->cb are mangled. It works because (and until) packets
443 falling here are owned by current CPU. Output packets are cloned
444 by dev_queue_xmit_nit(), input packets are processed by net_bh
445 sequencially, so that if we return skb to original state on exit,
446 we will not harm anyone.
447 */
448
449 static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
450 {
451 struct sock *sk;
452 struct sockaddr_ll *sll;
453 struct packet_sock *po;
454 u8 * skb_head = skb->data;
455 int skb_len = skb->len;
456 unsigned int snaplen, res;
457
458 if (skb->pkt_type == PACKET_LOOPBACK)
459 goto drop;
460
461 sk = pt->af_packet_priv;
462 po = pkt_sk(sk);
463
464 skb->dev = dev;
465
466 if (dev->hard_header) {
467 /* The device has an explicit notion of ll header,
468 exported to higher levels.
469
470 Otherwise, the device hides datails of it frame
471 structure, so that corresponding packet head
472 never delivered to user.
473 */
474 if (sk->sk_type != SOCK_DGRAM)
475 skb_push(skb, skb->data - skb_mac_header(skb));
476 else if (skb->pkt_type == PACKET_OUTGOING) {
477 /* Special case: outgoing packets have ll header at head */
478 skb_pull(skb, skb_network_offset(skb));
479 }
480 }
481
482 snaplen = skb->len;
483
484 res = run_filter(skb, sk, snaplen);
485 if (!res)
486 goto drop_n_restore;
487 if (snaplen > res)
488 snaplen = res;
489
490 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
491 (unsigned)sk->sk_rcvbuf)
492 goto drop_n_acct;
493
494 if (skb_shared(skb)) {
495 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
496 if (nskb == NULL)
497 goto drop_n_acct;
498
499 if (skb_head != skb->data) {
500 skb->data = skb_head;
501 skb->len = skb_len;
502 }
503 kfree_skb(skb);
504 skb = nskb;
505 }
506
507 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
508 sizeof(skb->cb));
509
510 sll = &PACKET_SKB_CB(skb)->sa.ll;
511 sll->sll_family = AF_PACKET;
512 sll->sll_hatype = dev->type;
513 sll->sll_protocol = skb->protocol;
514 sll->sll_pkttype = skb->pkt_type;
515 if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST)
516 sll->sll_ifindex = orig_dev->ifindex;
517 else
518 sll->sll_ifindex = dev->ifindex;
519 sll->sll_halen = 0;
520
521 if (dev->hard_header_parse)
522 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
523
524 PACKET_SKB_CB(skb)->origlen = skb->len;
525
526 if (pskb_trim(skb, snaplen))
527 goto drop_n_acct;
528
529 skb_set_owner_r(skb, sk);
530 skb->dev = NULL;
531 dst_release(skb->dst);
532 skb->dst = NULL;
533
534 /* drop conntrack reference */
535 nf_reset(skb);
536
537 spin_lock(&sk->sk_receive_queue.lock);
538 po->stats.tp_packets++;
539 __skb_queue_tail(&sk->sk_receive_queue, skb);
540 spin_unlock(&sk->sk_receive_queue.lock);
541 sk->sk_data_ready(sk, skb->len);
542 return 0;
543
544 drop_n_acct:
545 spin_lock(&sk->sk_receive_queue.lock);
546 po->stats.tp_drops++;
547 spin_unlock(&sk->sk_receive_queue.lock);
548
549 drop_n_restore:
550 if (skb_head != skb->data && skb_shared(skb)) {
551 skb->data = skb_head;
552 skb->len = skb_len;
553 }
554 drop:
555 kfree_skb(skb);
556 return 0;
557 }
558
559 #ifdef CONFIG_PACKET_MMAP
560 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
561 {
562 struct sock *sk;
563 struct packet_sock *po;
564 struct sockaddr_ll *sll;
565 struct tpacket_hdr *h;
566 u8 * skb_head = skb->data;
567 int skb_len = skb->len;
568 unsigned int snaplen, res;
569 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
570 unsigned short macoff, netoff;
571 struct sk_buff *copy_skb = NULL;
572 struct timeval tv;
573
574 if (skb->pkt_type == PACKET_LOOPBACK)
575 goto drop;
576
577 sk = pt->af_packet_priv;
578 po = pkt_sk(sk);
579
580 if (dev->hard_header) {
581 if (sk->sk_type != SOCK_DGRAM)
582 skb_push(skb, skb->data - skb_mac_header(skb));
583 else if (skb->pkt_type == PACKET_OUTGOING) {
584 /* Special case: outgoing packets have ll header at head */
585 skb_pull(skb, skb_network_offset(skb));
586 }
587 }
588
589 if (skb->ip_summed == CHECKSUM_PARTIAL)
590 status |= TP_STATUS_CSUMNOTREADY;
591
592 snaplen = skb->len;
593
594 res = run_filter(skb, sk, snaplen);
595 if (!res)
596 goto drop_n_restore;
597 if (snaplen > res)
598 snaplen = res;
599
600 if (sk->sk_type == SOCK_DGRAM) {
601 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
602 } else {
603 unsigned maclen = skb_network_offset(skb);
604 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
605 macoff = netoff - maclen;
606 }
607
608 if (macoff + snaplen > po->frame_size) {
609 if (po->copy_thresh &&
610 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
611 (unsigned)sk->sk_rcvbuf) {
612 if (skb_shared(skb)) {
613 copy_skb = skb_clone(skb, GFP_ATOMIC);
614 } else {
615 copy_skb = skb_get(skb);
616 skb_head = skb->data;
617 }
618 if (copy_skb)
619 skb_set_owner_r(copy_skb, sk);
620 }
621 snaplen = po->frame_size - macoff;
622 if ((int)snaplen < 0)
623 snaplen = 0;
624 }
625
626 spin_lock(&sk->sk_receive_queue.lock);
627 h = packet_lookup_frame(po, po->head);
628
629 if (h->tp_status)
630 goto ring_is_full;
631 po->head = po->head != po->frame_max ? po->head+1 : 0;
632 po->stats.tp_packets++;
633 if (copy_skb) {
634 status |= TP_STATUS_COPY;
635 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
636 }
637 if (!po->stats.tp_drops)
638 status &= ~TP_STATUS_LOSING;
639 spin_unlock(&sk->sk_receive_queue.lock);
640
641 skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen);
642
643 h->tp_len = skb->len;
644 h->tp_snaplen = snaplen;
645 h->tp_mac = macoff;
646 h->tp_net = netoff;
647 if (skb->tstamp.tv64 == 0) {
648 __net_timestamp(skb);
649 sock_enable_timestamp(sk);
650 }
651 tv = ktime_to_timeval(skb->tstamp);
652 h->tp_sec = tv.tv_sec;
653 h->tp_usec = tv.tv_usec;
654
655 sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
656 sll->sll_halen = 0;
657 if (dev->hard_header_parse)
658 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
659 sll->sll_family = AF_PACKET;
660 sll->sll_hatype = dev->type;
661 sll->sll_protocol = skb->protocol;
662 sll->sll_pkttype = skb->pkt_type;
663 if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST)
664 sll->sll_ifindex = orig_dev->ifindex;
665 else
666 sll->sll_ifindex = dev->ifindex;
667
668 h->tp_status = status;
669 smp_mb();
670
671 {
672 struct page *p_start, *p_end;
673 u8 *h_end = (u8 *)h + macoff + snaplen - 1;
674
675 p_start = virt_to_page(h);
676 p_end = virt_to_page(h_end);
677 while (p_start <= p_end) {
678 flush_dcache_page(p_start);
679 p_start++;
680 }
681 }
682
683 sk->sk_data_ready(sk, 0);
684
685 drop_n_restore:
686 if (skb_head != skb->data && skb_shared(skb)) {
687 skb->data = skb_head;
688 skb->len = skb_len;
689 }
690 drop:
691 kfree_skb(skb);
692 return 0;
693
694 ring_is_full:
695 po->stats.tp_drops++;
696 spin_unlock(&sk->sk_receive_queue.lock);
697
698 sk->sk_data_ready(sk, 0);
699 if (copy_skb)
700 kfree_skb(copy_skb);
701 goto drop_n_restore;
702 }
703
704 #endif
705
706
707 static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
708 struct msghdr *msg, size_t len)
709 {
710 struct sock *sk = sock->sk;
711 struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
712 struct sk_buff *skb;
713 struct net_device *dev;
714 __be16 proto;
715 unsigned char *addr;
716 int ifindex, err, reserve = 0;
717
718 /*
719 * Get and verify the address.
720 */
721
722 if (saddr == NULL) {
723 struct packet_sock *po = pkt_sk(sk);
724
725 ifindex = po->ifindex;
726 proto = po->num;
727 addr = NULL;
728 } else {
729 err = -EINVAL;
730 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
731 goto out;
732 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
733 goto out;
734 ifindex = saddr->sll_ifindex;
735 proto = saddr->sll_protocol;
736 addr = saddr->sll_addr;
737 }
738
739
740 dev = dev_get_by_index(ifindex);
741 err = -ENXIO;
742 if (dev == NULL)
743 goto out_unlock;
744 if (sock->type == SOCK_RAW)
745 reserve = dev->hard_header_len;
746
747 err = -ENETDOWN;
748 if (!(dev->flags & IFF_UP))
749 goto out_unlock;
750
751 err = -EMSGSIZE;
752 if (len > dev->mtu+reserve)
753 goto out_unlock;
754
755 skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
756 msg->msg_flags & MSG_DONTWAIT, &err);
757 if (skb==NULL)
758 goto out_unlock;
759
760 skb_reserve(skb, LL_RESERVED_SPACE(dev));
761 skb_reset_network_header(skb);
762
763 if (dev->hard_header) {
764 int res;
765 err = -EINVAL;
766 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
767 if (sock->type != SOCK_DGRAM) {
768 skb_reset_tail_pointer(skb);
769 skb->len = 0;
770 } else if (res < 0)
771 goto out_free;
772 }
773
774 /* Returns -EFAULT on error */
775 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
776 if (err)
777 goto out_free;
778
779 skb->protocol = proto;
780 skb->dev = dev;
781 skb->priority = sk->sk_priority;
782
783 /*
784 * Now send it
785 */
786
787 err = dev_queue_xmit(skb);
788 if (err > 0 && (err = net_xmit_errno(err)) != 0)
789 goto out_unlock;
790
791 dev_put(dev);
792
793 return(len);
794
795 out_free:
796 kfree_skb(skb);
797 out_unlock:
798 if (dev)
799 dev_put(dev);
800 out:
801 return err;
802 }
803
804 /*
805 * Close a PACKET socket. This is fairly simple. We immediately go
806 * to 'closed' state and remove our protocol entry in the device list.
807 */
808
809 static int packet_release(struct socket *sock)
810 {
811 struct sock *sk = sock->sk;
812 struct packet_sock *po;
813
814 if (!sk)
815 return 0;
816
817 po = pkt_sk(sk);
818
819 write_lock_bh(&packet_sklist_lock);
820 sk_del_node_init(sk);
821 write_unlock_bh(&packet_sklist_lock);
822
823 /*
824 * Unhook packet receive handler.
825 */
826
827 if (po->running) {
828 /*
829 * Remove the protocol hook
830 */
831 dev_remove_pack(&po->prot_hook);
832 po->running = 0;
833 po->num = 0;
834 __sock_put(sk);
835 }
836
837 packet_flush_mclist(sk);
838
839 #ifdef CONFIG_PACKET_MMAP
840 if (po->pg_vec) {
841 struct tpacket_req req;
842 memset(&req, 0, sizeof(req));
843 packet_set_ring(sk, &req, 1);
844 }
845 #endif
846
847 /*
848 * Now the socket is dead. No more input will appear.
849 */
850
851 sock_orphan(sk);
852 sock->sk = NULL;
853
854 /* Purge queues */
855
856 skb_queue_purge(&sk->sk_receive_queue);
857
858 sock_put(sk);
859 return 0;
860 }
861
862 /*
863 * Attach a packet hook.
864 */
865
866 static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
867 {
868 struct packet_sock *po = pkt_sk(sk);
869 /*
870 * Detach an existing hook if present.
871 */
872
873 lock_sock(sk);
874
875 spin_lock(&po->bind_lock);
876 if (po->running) {
877 __sock_put(sk);
878 po->running = 0;
879 po->num = 0;
880 spin_unlock(&po->bind_lock);
881 dev_remove_pack(&po->prot_hook);
882 spin_lock(&po->bind_lock);
883 }
884
885 po->num = protocol;
886 po->prot_hook.type = protocol;
887 po->prot_hook.dev = dev;
888
889 po->ifindex = dev ? dev->ifindex : 0;
890
891 if (protocol == 0)
892 goto out_unlock;
893
894 if (dev) {
895 if (dev->flags&IFF_UP) {
896 dev_add_pack(&po->prot_hook);
897 sock_hold(sk);
898 po->running = 1;
899 } else {
900 sk->sk_err = ENETDOWN;
901 if (!sock_flag(sk, SOCK_DEAD))
902 sk->sk_error_report(sk);
903 }
904 } else {
905 dev_add_pack(&po->prot_hook);
906 sock_hold(sk);
907 po->running = 1;
908 }
909
910 out_unlock:
911 spin_unlock(&po->bind_lock);
912 release_sock(sk);
913 return 0;
914 }
915
916 /*
917 * Bind a packet socket to a device
918 */
919
920 #ifdef CONFIG_SOCK_PACKET
921
922 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
923 {
924 struct sock *sk=sock->sk;
925 char name[15];
926 struct net_device *dev;
927 int err = -ENODEV;
928
929 /*
930 * Check legality
931 */
932
933 if (addr_len != sizeof(struct sockaddr))
934 return -EINVAL;
935 strlcpy(name,uaddr->sa_data,sizeof(name));
936
937 dev = dev_get_by_name(name);
938 if (dev) {
939 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
940 dev_put(dev);
941 }
942 return err;
943 }
944 #endif
945
946 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
947 {
948 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
949 struct sock *sk=sock->sk;
950 struct net_device *dev = NULL;
951 int err;
952
953
954 /*
955 * Check legality
956 */
957
958 if (addr_len < sizeof(struct sockaddr_ll))
959 return -EINVAL;
960 if (sll->sll_family != AF_PACKET)
961 return -EINVAL;
962
963 if (sll->sll_ifindex) {
964 err = -ENODEV;
965 dev = dev_get_by_index(sll->sll_ifindex);
966 if (dev == NULL)
967 goto out;
968 }
969 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
970 if (dev)
971 dev_put(dev);
972
973 out:
974 return err;
975 }
976
977 static struct proto packet_proto = {
978 .name = "PACKET",
979 .owner = THIS_MODULE,
980 .obj_size = sizeof(struct packet_sock),
981 };
982
983 /*
984 * Create a packet of type SOCK_PACKET.
985 */
986
987 static int packet_create(struct socket *sock, int protocol)
988 {
989 struct sock *sk;
990 struct packet_sock *po;
991 __be16 proto = (__force __be16)protocol; /* weird, but documented */
992 int err;
993
994 if (!capable(CAP_NET_RAW))
995 return -EPERM;
996 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
997 #ifdef CONFIG_SOCK_PACKET
998 && sock->type != SOCK_PACKET
999 #endif
1000 )
1001 return -ESOCKTNOSUPPORT;
1002
1003 sock->state = SS_UNCONNECTED;
1004
1005 err = -ENOBUFS;
1006 sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
1007 if (sk == NULL)
1008 goto out;
1009
1010 sock->ops = &packet_ops;
1011 #ifdef CONFIG_SOCK_PACKET
1012 if (sock->type == SOCK_PACKET)
1013 sock->ops = &packet_ops_spkt;
1014 #endif
1015 sock_init_data(sock, sk);
1016
1017 po = pkt_sk(sk);
1018 sk->sk_family = PF_PACKET;
1019 po->num = proto;
1020
1021 sk->sk_destruct = packet_sock_destruct;
1022 atomic_inc(&packet_socks_nr);
1023
1024 /*
1025 * Attach a protocol block
1026 */
1027
1028 spin_lock_init(&po->bind_lock);
1029 po->prot_hook.func = packet_rcv;
1030 #ifdef CONFIG_SOCK_PACKET
1031 if (sock->type == SOCK_PACKET)
1032 po->prot_hook.func = packet_rcv_spkt;
1033 #endif
1034 po->prot_hook.af_packet_priv = sk;
1035
1036 if (proto) {
1037 po->prot_hook.type = proto;
1038 dev_add_pack(&po->prot_hook);
1039 sock_hold(sk);
1040 po->running = 1;
1041 }
1042
1043 write_lock_bh(&packet_sklist_lock);
1044 sk_add_node(sk, &packet_sklist);
1045 write_unlock_bh(&packet_sklist_lock);
1046 return(0);
1047 out:
1048 return err;
1049 }
1050
1051 /*
1052 * Pull a packet from our receive queue and hand it to the user.
1053 * If necessary we block.
1054 */
1055
1056 static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1057 struct msghdr *msg, size_t len, int flags)
1058 {
1059 struct sock *sk = sock->sk;
1060 struct sk_buff *skb;
1061 int copied, err;
1062 struct sockaddr_ll *sll;
1063
1064 err = -EINVAL;
1065 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1066 goto out;
1067
1068 #if 0
1069 /* What error should we return now? EUNATTACH? */
1070 if (pkt_sk(sk)->ifindex < 0)
1071 return -ENODEV;
1072 #endif
1073
1074 /*
1075 * Call the generic datagram receiver. This handles all sorts
1076 * of horrible races and re-entrancy so we can forget about it
1077 * in the protocol layers.
1078 *
1079 * Now it will return ENETDOWN, if device have just gone down,
1080 * but then it will block.
1081 */
1082
1083 skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1084
1085 /*
1086 * An error occurred so return it. Because skb_recv_datagram()
1087 * handles the blocking we don't see and worry about blocking
1088 * retries.
1089 */
1090
1091 if (skb == NULL)
1092 goto out;
1093
1094 /*
1095 * If the address length field is there to be filled in, we fill
1096 * it in now.
1097 */
1098
1099 sll = &PACKET_SKB_CB(skb)->sa.ll;
1100 if (sock->type == SOCK_PACKET)
1101 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1102 else
1103 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1104
1105 /*
1106 * You lose any data beyond the buffer you gave. If it worries a
1107 * user program they can ask the device for its MTU anyway.
1108 */
1109
1110 copied = skb->len;
1111 if (copied > len)
1112 {
1113 copied=len;
1114 msg->msg_flags|=MSG_TRUNC;
1115 }
1116
1117 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1118 if (err)
1119 goto out_free;
1120
1121 sock_recv_timestamp(msg, sk, skb);
1122
1123 if (msg->msg_name)
1124 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1125 msg->msg_namelen);
1126
1127 if (pkt_sk(sk)->auxdata) {
1128 struct tpacket_auxdata aux;
1129
1130 aux.tp_status = TP_STATUS_USER;
1131 if (skb->ip_summed == CHECKSUM_PARTIAL)
1132 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1133 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1134 aux.tp_snaplen = skb->len;
1135 aux.tp_mac = 0;
1136 aux.tp_net = skb_network_offset(skb);
1137
1138 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
1139 }
1140
1141 /*
1142 * Free or return the buffer as appropriate. Again this
1143 * hides all the races and re-entrancy issues from us.
1144 */
1145 err = (flags&MSG_TRUNC) ? skb->len : copied;
1146
1147 out_free:
1148 skb_free_datagram(sk, skb);
1149 out:
1150 return err;
1151 }
1152
1153 #ifdef CONFIG_SOCK_PACKET
1154 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1155 int *uaddr_len, int peer)
1156 {
1157 struct net_device *dev;
1158 struct sock *sk = sock->sk;
1159
1160 if (peer)
1161 return -EOPNOTSUPP;
1162
1163 uaddr->sa_family = AF_PACKET;
1164 dev = dev_get_by_index(pkt_sk(sk)->ifindex);
1165 if (dev) {
1166 strlcpy(uaddr->sa_data, dev->name, 15);
1167 dev_put(dev);
1168 } else
1169 memset(uaddr->sa_data, 0, 14);
1170 *uaddr_len = sizeof(*uaddr);
1171
1172 return 0;
1173 }
1174 #endif
1175
1176 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1177 int *uaddr_len, int peer)
1178 {
1179 struct net_device *dev;
1180 struct sock *sk = sock->sk;
1181 struct packet_sock *po = pkt_sk(sk);
1182 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1183
1184 if (peer)
1185 return -EOPNOTSUPP;
1186
1187 sll->sll_family = AF_PACKET;
1188 sll->sll_ifindex = po->ifindex;
1189 sll->sll_protocol = po->num;
1190 dev = dev_get_by_index(po->ifindex);
1191 if (dev) {
1192 sll->sll_hatype = dev->type;
1193 sll->sll_halen = dev->addr_len;
1194 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1195 dev_put(dev);
1196 } else {
1197 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1198 sll->sll_halen = 0;
1199 }
1200 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1201
1202 return 0;
1203 }
1204
1205 static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1206 {
1207 switch (i->type) {
1208 case PACKET_MR_MULTICAST:
1209 if (what > 0)
1210 dev_mc_add(dev, i->addr, i->alen, 0);
1211 else
1212 dev_mc_delete(dev, i->addr, i->alen, 0);
1213 break;
1214 case PACKET_MR_PROMISC:
1215 dev_set_promiscuity(dev, what);
1216 break;
1217 case PACKET_MR_ALLMULTI:
1218 dev_set_allmulti(dev, what);
1219 break;
1220 default:;
1221 }
1222 }
1223
1224 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1225 {
1226 for ( ; i; i=i->next) {
1227 if (i->ifindex == dev->ifindex)
1228 packet_dev_mc(dev, i, what);
1229 }
1230 }
1231
1232 static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1233 {
1234 struct packet_sock *po = pkt_sk(sk);
1235 struct packet_mclist *ml, *i;
1236 struct net_device *dev;
1237 int err;
1238
1239 rtnl_lock();
1240
1241 err = -ENODEV;
1242 dev = __dev_get_by_index(mreq->mr_ifindex);
1243 if (!dev)
1244 goto done;
1245
1246 err = -EINVAL;
1247 if (mreq->mr_alen > dev->addr_len)
1248 goto done;
1249
1250 err = -ENOBUFS;
1251 i = kmalloc(sizeof(*i), GFP_KERNEL);
1252 if (i == NULL)
1253 goto done;
1254
1255 err = 0;
1256 for (ml = po->mclist; ml; ml = ml->next) {
1257 if (ml->ifindex == mreq->mr_ifindex &&
1258 ml->type == mreq->mr_type &&
1259 ml->alen == mreq->mr_alen &&
1260 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1261 ml->count++;
1262 /* Free the new element ... */
1263 kfree(i);
1264 goto done;
1265 }
1266 }
1267
1268 i->type = mreq->mr_type;
1269 i->ifindex = mreq->mr_ifindex;
1270 i->alen = mreq->mr_alen;
1271 memcpy(i->addr, mreq->mr_address, i->alen);
1272 i->count = 1;
1273 i->next = po->mclist;
1274 po->mclist = i;
1275 packet_dev_mc(dev, i, +1);
1276
1277 done:
1278 rtnl_unlock();
1279 return err;
1280 }
1281
1282 static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1283 {
1284 struct packet_mclist *ml, **mlp;
1285
1286 rtnl_lock();
1287
1288 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1289 if (ml->ifindex == mreq->mr_ifindex &&
1290 ml->type == mreq->mr_type &&
1291 ml->alen == mreq->mr_alen &&
1292 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1293 if (--ml->count == 0) {
1294 struct net_device *dev;
1295 *mlp = ml->next;
1296 dev = dev_get_by_index(ml->ifindex);
1297 if (dev) {
1298 packet_dev_mc(dev, ml, -1);
1299 dev_put(dev);
1300 }
1301 kfree(ml);
1302 }
1303 rtnl_unlock();
1304 return 0;
1305 }
1306 }
1307 rtnl_unlock();
1308 return -EADDRNOTAVAIL;
1309 }
1310
1311 static void packet_flush_mclist(struct sock *sk)
1312 {
1313 struct packet_sock *po = pkt_sk(sk);
1314 struct packet_mclist *ml;
1315
1316 if (!po->mclist)
1317 return;
1318
1319 rtnl_lock();
1320 while ((ml = po->mclist) != NULL) {
1321 struct net_device *dev;
1322
1323 po->mclist = ml->next;
1324 if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1325 packet_dev_mc(dev, ml, -1);
1326 dev_put(dev);
1327 }
1328 kfree(ml);
1329 }
1330 rtnl_unlock();
1331 }
1332
1333 static int
1334 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1335 {
1336 struct sock *sk = sock->sk;
1337 struct packet_sock *po = pkt_sk(sk);
1338 int ret;
1339
1340 if (level != SOL_PACKET)
1341 return -ENOPROTOOPT;
1342
1343 switch(optname) {
1344 case PACKET_ADD_MEMBERSHIP:
1345 case PACKET_DROP_MEMBERSHIP:
1346 {
1347 struct packet_mreq_max mreq;
1348 int len = optlen;
1349 memset(&mreq, 0, sizeof(mreq));
1350 if (len < sizeof(struct packet_mreq))
1351 return -EINVAL;
1352 if (len > sizeof(mreq))
1353 len = sizeof(mreq);
1354 if (copy_from_user(&mreq,optval,len))
1355 return -EFAULT;
1356 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1357 return -EINVAL;
1358 if (optname == PACKET_ADD_MEMBERSHIP)
1359 ret = packet_mc_add(sk, &mreq);
1360 else
1361 ret = packet_mc_drop(sk, &mreq);
1362 return ret;
1363 }
1364
1365 #ifdef CONFIG_PACKET_MMAP
1366 case PACKET_RX_RING:
1367 {
1368 struct tpacket_req req;
1369
1370 if (optlen<sizeof(req))
1371 return -EINVAL;
1372 if (copy_from_user(&req,optval,sizeof(req)))
1373 return -EFAULT;
1374 return packet_set_ring(sk, &req, 0);
1375 }
1376 case PACKET_COPY_THRESH:
1377 {
1378 int val;
1379
1380 if (optlen!=sizeof(val))
1381 return -EINVAL;
1382 if (copy_from_user(&val,optval,sizeof(val)))
1383 return -EFAULT;
1384
1385 pkt_sk(sk)->copy_thresh = val;
1386 return 0;
1387 }
1388 #endif
1389 case PACKET_AUXDATA:
1390 {
1391 int val;
1392
1393 if (optlen < sizeof(val))
1394 return -EINVAL;
1395 if (copy_from_user(&val, optval, sizeof(val)))
1396 return -EFAULT;
1397
1398 po->auxdata = !!val;
1399 return 0;
1400 }
1401 case PACKET_ORIGDEV:
1402 {
1403 int val;
1404
1405 if (optlen < sizeof(val))
1406 return -EINVAL;
1407 if (copy_from_user(&val, optval, sizeof(val)))
1408 return -EFAULT;
1409
1410 po->origdev = !!val;
1411 return 0;
1412 }
1413 default:
1414 return -ENOPROTOOPT;
1415 }
1416 }
1417
1418 static int packet_getsockopt(struct socket *sock, int level, int optname,
1419 char __user *optval, int __user *optlen)
1420 {
1421 int len;
1422 int val;
1423 struct sock *sk = sock->sk;
1424 struct packet_sock *po = pkt_sk(sk);
1425 void *data;
1426 struct tpacket_stats st;
1427
1428 if (level != SOL_PACKET)
1429 return -ENOPROTOOPT;
1430
1431 if (get_user(len, optlen))
1432 return -EFAULT;
1433
1434 if (len < 0)
1435 return -EINVAL;
1436
1437 switch(optname) {
1438 case PACKET_STATISTICS:
1439 if (len > sizeof(struct tpacket_stats))
1440 len = sizeof(struct tpacket_stats);
1441 spin_lock_bh(&sk->sk_receive_queue.lock);
1442 st = po->stats;
1443 memset(&po->stats, 0, sizeof(st));
1444 spin_unlock_bh(&sk->sk_receive_queue.lock);
1445 st.tp_packets += st.tp_drops;
1446
1447 data = &st;
1448 break;
1449 case PACKET_AUXDATA:
1450 if (len > sizeof(int))
1451 len = sizeof(int);
1452 val = po->auxdata;
1453
1454 data = &val;
1455 break;
1456 case PACKET_ORIGDEV:
1457 if (len > sizeof(int))
1458 len = sizeof(int);
1459 val = po->origdev;
1460
1461 data = &val;
1462 break;
1463 default:
1464 return -ENOPROTOOPT;
1465 }
1466
1467 if (put_user(len, optlen))
1468 return -EFAULT;
1469 if (copy_to_user(optval, data, len))
1470 return -EFAULT;
1471 return 0;
1472 }
1473
1474
1475 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1476 {
1477 struct sock *sk;
1478 struct hlist_node *node;
1479 struct net_device *dev = data;
1480
1481 read_lock(&packet_sklist_lock);
1482 sk_for_each(sk, node, &packet_sklist) {
1483 struct packet_sock *po = pkt_sk(sk);
1484
1485 switch (msg) {
1486 case NETDEV_UNREGISTER:
1487 if (po->mclist)
1488 packet_dev_mclist(dev, po->mclist, -1);
1489 /* fallthrough */
1490
1491 case NETDEV_DOWN:
1492 if (dev->ifindex == po->ifindex) {
1493 spin_lock(&po->bind_lock);
1494 if (po->running) {
1495 __dev_remove_pack(&po->prot_hook);
1496 __sock_put(sk);
1497 po->running = 0;
1498 sk->sk_err = ENETDOWN;
1499 if (!sock_flag(sk, SOCK_DEAD))
1500 sk->sk_error_report(sk);
1501 }
1502 if (msg == NETDEV_UNREGISTER) {
1503 po->ifindex = -1;
1504 po->prot_hook.dev = NULL;
1505 }
1506 spin_unlock(&po->bind_lock);
1507 }
1508 break;
1509 case NETDEV_UP:
1510 spin_lock(&po->bind_lock);
1511 if (dev->ifindex == po->ifindex && po->num &&
1512 !po->running) {
1513 dev_add_pack(&po->prot_hook);
1514 sock_hold(sk);
1515 po->running = 1;
1516 }
1517 spin_unlock(&po->bind_lock);
1518 break;
1519 }
1520 }
1521 read_unlock(&packet_sklist_lock);
1522 return NOTIFY_DONE;
1523 }
1524
1525
1526 static int packet_ioctl(struct socket *sock, unsigned int cmd,
1527 unsigned long arg)
1528 {
1529 struct sock *sk = sock->sk;
1530
1531 switch(cmd) {
1532 case SIOCOUTQ:
1533 {
1534 int amount = atomic_read(&sk->sk_wmem_alloc);
1535 return put_user(amount, (int __user *)arg);
1536 }
1537 case SIOCINQ:
1538 {
1539 struct sk_buff *skb;
1540 int amount = 0;
1541
1542 spin_lock_bh(&sk->sk_receive_queue.lock);
1543 skb = skb_peek(&sk->sk_receive_queue);
1544 if (skb)
1545 amount = skb->len;
1546 spin_unlock_bh(&sk->sk_receive_queue.lock);
1547 return put_user(amount, (int __user *)arg);
1548 }
1549 case SIOCGSTAMP:
1550 return sock_get_timestamp(sk, (struct timeval __user *)arg);
1551 case SIOCGSTAMPNS:
1552 return sock_get_timestampns(sk, (struct timespec __user *)arg);
1553
1554 #ifdef CONFIG_INET
1555 case SIOCADDRT:
1556 case SIOCDELRT:
1557 case SIOCDARP:
1558 case SIOCGARP:
1559 case SIOCSARP:
1560 case SIOCGIFADDR:
1561 case SIOCSIFADDR:
1562 case SIOCGIFBRDADDR:
1563 case SIOCSIFBRDADDR:
1564 case SIOCGIFNETMASK:
1565 case SIOCSIFNETMASK:
1566 case SIOCGIFDSTADDR:
1567 case SIOCSIFDSTADDR:
1568 case SIOCSIFFLAGS:
1569 return inet_dgram_ops.ioctl(sock, cmd, arg);
1570 #endif
1571
1572 default:
1573 return -ENOIOCTLCMD;
1574 }
1575 return 0;
1576 }
1577
1578 #ifndef CONFIG_PACKET_MMAP
1579 #define packet_mmap sock_no_mmap
1580 #define packet_poll datagram_poll
1581 #else
1582
1583 static unsigned int packet_poll(struct file * file, struct socket *sock,
1584 poll_table *wait)
1585 {
1586 struct sock *sk = sock->sk;
1587 struct packet_sock *po = pkt_sk(sk);
1588 unsigned int mask = datagram_poll(file, sock, wait);
1589
1590 spin_lock_bh(&sk->sk_receive_queue.lock);
1591 if (po->pg_vec) {
1592 unsigned last = po->head ? po->head-1 : po->frame_max;
1593 struct tpacket_hdr *h;
1594
1595 h = packet_lookup_frame(po, last);
1596
1597 if (h->tp_status)
1598 mask |= POLLIN | POLLRDNORM;
1599 }
1600 spin_unlock_bh(&sk->sk_receive_queue.lock);
1601 return mask;
1602 }
1603
1604
1605 /* Dirty? Well, I still did not learn better way to account
1606 * for user mmaps.
1607 */
1608
1609 static void packet_mm_open(struct vm_area_struct *vma)
1610 {
1611 struct file *file = vma->vm_file;
1612 struct socket * sock = file->private_data;
1613 struct sock *sk = sock->sk;
1614
1615 if (sk)
1616 atomic_inc(&pkt_sk(sk)->mapped);
1617 }
1618
1619 static void packet_mm_close(struct vm_area_struct *vma)
1620 {
1621 struct file *file = vma->vm_file;
1622 struct socket * sock = file->private_data;
1623 struct sock *sk = sock->sk;
1624
1625 if (sk)
1626 atomic_dec(&pkt_sk(sk)->mapped);
1627 }
1628
1629 static struct vm_operations_struct packet_mmap_ops = {
1630 .open = packet_mm_open,
1631 .close =packet_mm_close,
1632 };
1633
1634 static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
1635 {
1636 return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
1637 }
1638
1639 static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
1640 {
1641 int i;
1642
1643 for (i = 0; i < len; i++) {
1644 if (likely(pg_vec[i]))
1645 free_pages((unsigned long) pg_vec[i], order);
1646 }
1647 kfree(pg_vec);
1648 }
1649
1650 static inline char *alloc_one_pg_vec_page(unsigned long order)
1651 {
1652 return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
1653 order);
1654 }
1655
1656 static char **alloc_pg_vec(struct tpacket_req *req, int order)
1657 {
1658 unsigned int block_nr = req->tp_block_nr;
1659 char **pg_vec;
1660 int i;
1661
1662 pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
1663 if (unlikely(!pg_vec))
1664 goto out;
1665
1666 for (i = 0; i < block_nr; i++) {
1667 pg_vec[i] = alloc_one_pg_vec_page(order);
1668 if (unlikely(!pg_vec[i]))
1669 goto out_free_pgvec;
1670 }
1671
1672 out:
1673 return pg_vec;
1674
1675 out_free_pgvec:
1676 free_pg_vec(pg_vec, order, block_nr);
1677 pg_vec = NULL;
1678 goto out;
1679 }
1680
1681 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1682 {
1683 char **pg_vec = NULL;
1684 struct packet_sock *po = pkt_sk(sk);
1685 int was_running, order = 0;
1686 __be16 num;
1687 int err = 0;
1688
1689 if (req->tp_block_nr) {
1690 int i, l;
1691
1692 /* Sanity tests and some calculations */
1693
1694 if (unlikely(po->pg_vec))
1695 return -EBUSY;
1696
1697 if (unlikely((int)req->tp_block_size <= 0))
1698 return -EINVAL;
1699 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
1700 return -EINVAL;
1701 if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
1702 return -EINVAL;
1703 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
1704 return -EINVAL;
1705
1706 po->frames_per_block = req->tp_block_size/req->tp_frame_size;
1707 if (unlikely(po->frames_per_block <= 0))
1708 return -EINVAL;
1709 if (unlikely((po->frames_per_block * req->tp_block_nr) !=
1710 req->tp_frame_nr))
1711 return -EINVAL;
1712
1713 err = -ENOMEM;
1714 order = get_order(req->tp_block_size);
1715 pg_vec = alloc_pg_vec(req, order);
1716 if (unlikely(!pg_vec))
1717 goto out;
1718
1719 l = 0;
1720 for (i = 0; i < req->tp_block_nr; i++) {
1721 char *ptr = pg_vec[i];
1722 struct tpacket_hdr *header;
1723 int k;
1724
1725 for (k = 0; k < po->frames_per_block; k++) {
1726 header = (struct tpacket_hdr *) ptr;
1727 header->tp_status = TP_STATUS_KERNEL;
1728 ptr += req->tp_frame_size;
1729 }
1730 }
1731 /* Done */
1732 } else {
1733 if (unlikely(req->tp_frame_nr))
1734 return -EINVAL;
1735 }
1736
1737 lock_sock(sk);
1738
1739 /* Detach socket from network */
1740 spin_lock(&po->bind_lock);
1741 was_running = po->running;
1742 num = po->num;
1743 if (was_running) {
1744 __dev_remove_pack(&po->prot_hook);
1745 po->num = 0;
1746 po->running = 0;
1747 __sock_put(sk);
1748 }
1749 spin_unlock(&po->bind_lock);
1750
1751 synchronize_net();
1752
1753 err = -EBUSY;
1754 if (closing || atomic_read(&po->mapped) == 0) {
1755 err = 0;
1756 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1757
1758 spin_lock_bh(&sk->sk_receive_queue.lock);
1759 pg_vec = XC(po->pg_vec, pg_vec);
1760 po->frame_max = (req->tp_frame_nr - 1);
1761 po->head = 0;
1762 po->frame_size = req->tp_frame_size;
1763 spin_unlock_bh(&sk->sk_receive_queue.lock);
1764
1765 order = XC(po->pg_vec_order, order);
1766 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1767
1768 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1769 po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1770 skb_queue_purge(&sk->sk_receive_queue);
1771 #undef XC
1772 if (atomic_read(&po->mapped))
1773 printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1774 }
1775
1776 spin_lock(&po->bind_lock);
1777 if (was_running && !po->running) {
1778 sock_hold(sk);
1779 po->running = 1;
1780 po->num = num;
1781 dev_add_pack(&po->prot_hook);
1782 }
1783 spin_unlock(&po->bind_lock);
1784
1785 release_sock(sk);
1786
1787 if (pg_vec)
1788 free_pg_vec(pg_vec, order, req->tp_block_nr);
1789 out:
1790 return err;
1791 }
1792
1793 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1794 {
1795 struct sock *sk = sock->sk;
1796 struct packet_sock *po = pkt_sk(sk);
1797 unsigned long size;
1798 unsigned long start;
1799 int err = -EINVAL;
1800 int i;
1801
1802 if (vma->vm_pgoff)
1803 return -EINVAL;
1804
1805 size = vma->vm_end - vma->vm_start;
1806
1807 lock_sock(sk);
1808 if (po->pg_vec == NULL)
1809 goto out;
1810 if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1811 goto out;
1812
1813 start = vma->vm_start;
1814 for (i = 0; i < po->pg_vec_len; i++) {
1815 struct page *page = virt_to_page(po->pg_vec[i]);
1816 int pg_num;
1817
1818 for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
1819 err = vm_insert_page(vma, start, page);
1820 if (unlikely(err))
1821 goto out;
1822 start += PAGE_SIZE;
1823 }
1824 }
1825 atomic_inc(&po->mapped);
1826 vma->vm_ops = &packet_mmap_ops;
1827 err = 0;
1828
1829 out:
1830 release_sock(sk);
1831 return err;
1832 }
1833 #endif
1834
1835
1836 #ifdef CONFIG_SOCK_PACKET
1837 static const struct proto_ops packet_ops_spkt = {
1838 .family = PF_PACKET,
1839 .owner = THIS_MODULE,
1840 .release = packet_release,
1841 .bind = packet_bind_spkt,
1842 .connect = sock_no_connect,
1843 .socketpair = sock_no_socketpair,
1844 .accept = sock_no_accept,
1845 .getname = packet_getname_spkt,
1846 .poll = datagram_poll,
1847 .ioctl = packet_ioctl,
1848 .listen = sock_no_listen,
1849 .shutdown = sock_no_shutdown,
1850 .setsockopt = sock_no_setsockopt,
1851 .getsockopt = sock_no_getsockopt,
1852 .sendmsg = packet_sendmsg_spkt,
1853 .recvmsg = packet_recvmsg,
1854 .mmap = sock_no_mmap,
1855 .sendpage = sock_no_sendpage,
1856 };
1857 #endif
1858
1859 static const struct proto_ops packet_ops = {
1860 .family = PF_PACKET,
1861 .owner = THIS_MODULE,
1862 .release = packet_release,
1863 .bind = packet_bind,
1864 .connect = sock_no_connect,
1865 .socketpair = sock_no_socketpair,
1866 .accept = sock_no_accept,
1867 .getname = packet_getname,
1868 .poll = packet_poll,
1869 .ioctl = packet_ioctl,
1870 .listen = sock_no_listen,
1871 .shutdown = sock_no_shutdown,
1872 .setsockopt = packet_setsockopt,
1873 .getsockopt = packet_getsockopt,
1874 .sendmsg = packet_sendmsg,
1875 .recvmsg = packet_recvmsg,
1876 .mmap = packet_mmap,
1877 .sendpage = sock_no_sendpage,
1878 };
1879
1880 static struct net_proto_family packet_family_ops = {
1881 .family = PF_PACKET,
1882 .create = packet_create,
1883 .owner = THIS_MODULE,
1884 };
1885
1886 static struct notifier_block packet_netdev_notifier = {
1887 .notifier_call =packet_notifier,
1888 };
1889
1890 #ifdef CONFIG_PROC_FS
1891 static inline struct sock *packet_seq_idx(loff_t off)
1892 {
1893 struct sock *s;
1894 struct hlist_node *node;
1895
1896 sk_for_each(s, node, &packet_sklist) {
1897 if (!off--)
1898 return s;
1899 }
1900 return NULL;
1901 }
1902
1903 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1904 {
1905 read_lock(&packet_sklist_lock);
1906 return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
1907 }
1908
1909 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1910 {
1911 ++*pos;
1912 return (v == SEQ_START_TOKEN)
1913 ? sk_head(&packet_sklist)
1914 : sk_next((struct sock*)v) ;
1915 }
1916
1917 static void packet_seq_stop(struct seq_file *seq, void *v)
1918 {
1919 read_unlock(&packet_sklist_lock);
1920 }
1921
1922 static int packet_seq_show(struct seq_file *seq, void *v)
1923 {
1924 if (v == SEQ_START_TOKEN)
1925 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
1926 else {
1927 struct sock *s = v;
1928 const struct packet_sock *po = pkt_sk(s);
1929
1930 seq_printf(seq,
1931 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
1932 s,
1933 atomic_read(&s->sk_refcnt),
1934 s->sk_type,
1935 ntohs(po->num),
1936 po->ifindex,
1937 po->running,
1938 atomic_read(&s->sk_rmem_alloc),
1939 sock_i_uid(s),
1940 sock_i_ino(s) );
1941 }
1942
1943 return 0;
1944 }
1945
1946 static struct seq_operations packet_seq_ops = {
1947 .start = packet_seq_start,
1948 .next = packet_seq_next,
1949 .stop = packet_seq_stop,
1950 .show = packet_seq_show,
1951 };
1952
1953 static int packet_seq_open(struct inode *inode, struct file *file)
1954 {
1955 return seq_open(file, &packet_seq_ops);
1956 }
1957
1958 static const struct file_operations packet_seq_fops = {
1959 .owner = THIS_MODULE,
1960 .open = packet_seq_open,
1961 .read = seq_read,
1962 .llseek = seq_lseek,
1963 .release = seq_release,
1964 };
1965
1966 #endif
1967
1968 static void __exit packet_exit(void)
1969 {
1970 proc_net_remove("packet");
1971 unregister_netdevice_notifier(&packet_netdev_notifier);
1972 sock_unregister(PF_PACKET);
1973 proto_unregister(&packet_proto);
1974 }
1975
1976 static int __init packet_init(void)
1977 {
1978 int rc = proto_register(&packet_proto, 0);
1979
1980 if (rc != 0)
1981 goto out;
1982
1983 sock_register(&packet_family_ops);
1984 register_netdevice_notifier(&packet_netdev_notifier);
1985 proc_net_fops_create("packet", 0, &packet_seq_fops);
1986 out:
1987 return rc;
1988 }
1989
1990 module_init(packet_init);
1991 module_exit(packet_exit);
1992 MODULE_LICENSE("GPL");
1993 MODULE_ALIAS_NETPROTO(PF_PACKET);