]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - net/dccp/proto.c
7a3bea9c28c172c8768414cf0fe8f616fb5e15b3
[mirror_ubuntu-artful-kernel.git] / net / dccp / proto.c
1 /*
2 * net/dccp/proto.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <asm/semaphore.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
35
36 #include "ccid.h"
37 #include "dccp.h"
38 #include "feat.h"
39
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41
42 EXPORT_SYMBOL_GPL(dccp_statistics);
43
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47
48 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49 .lhash_lock = RW_LOCK_UNLOCKED,
50 .lhash_users = ATOMIC_INIT(0),
51 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
52 };
53
54 EXPORT_SYMBOL_GPL(dccp_hashinfo);
55
56 /* the maximum queue length for tx in packets. 0 is no limit */
57 int sysctl_dccp_tx_qlen __read_mostly = 5;
58
59 void dccp_set_state(struct sock *sk, const int state)
60 {
61 const int oldstate = sk->sk_state;
62
63 dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
64 dccp_role(sk), sk,
65 dccp_state_name(oldstate), dccp_state_name(state));
66 WARN_ON(state == oldstate);
67
68 switch (state) {
69 case DCCP_OPEN:
70 if (oldstate != DCCP_OPEN)
71 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
72 break;
73
74 case DCCP_CLOSED:
75 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
76 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
77
78 sk->sk_prot->unhash(sk);
79 if (inet_csk(sk)->icsk_bind_hash != NULL &&
80 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
81 inet_put_port(&dccp_hashinfo, sk);
82 /* fall through */
83 default:
84 if (oldstate == DCCP_OPEN)
85 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
86 }
87
88 /* Change state AFTER socket is unhashed to avoid closed
89 * socket sitting in hash tables.
90 */
91 sk->sk_state = state;
92 }
93
94 EXPORT_SYMBOL_GPL(dccp_set_state);
95
96 void dccp_done(struct sock *sk)
97 {
98 dccp_set_state(sk, DCCP_CLOSED);
99 dccp_clear_xmit_timers(sk);
100
101 sk->sk_shutdown = SHUTDOWN_MASK;
102
103 if (!sock_flag(sk, SOCK_DEAD))
104 sk->sk_state_change(sk);
105 else
106 inet_csk_destroy_sock(sk);
107 }
108
109 EXPORT_SYMBOL_GPL(dccp_done);
110
111 const char *dccp_packet_name(const int type)
112 {
113 static const char *dccp_packet_names[] = {
114 [DCCP_PKT_REQUEST] = "REQUEST",
115 [DCCP_PKT_RESPONSE] = "RESPONSE",
116 [DCCP_PKT_DATA] = "DATA",
117 [DCCP_PKT_ACK] = "ACK",
118 [DCCP_PKT_DATAACK] = "DATAACK",
119 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
120 [DCCP_PKT_CLOSE] = "CLOSE",
121 [DCCP_PKT_RESET] = "RESET",
122 [DCCP_PKT_SYNC] = "SYNC",
123 [DCCP_PKT_SYNCACK] = "SYNCACK",
124 };
125
126 if (type >= DCCP_NR_PKT_TYPES)
127 return "INVALID";
128 else
129 return dccp_packet_names[type];
130 }
131
132 EXPORT_SYMBOL_GPL(dccp_packet_name);
133
134 const char *dccp_state_name(const int state)
135 {
136 static char *dccp_state_names[] = {
137 [DCCP_OPEN] = "OPEN",
138 [DCCP_REQUESTING] = "REQUESTING",
139 [DCCP_PARTOPEN] = "PARTOPEN",
140 [DCCP_LISTEN] = "LISTEN",
141 [DCCP_RESPOND] = "RESPOND",
142 [DCCP_CLOSING] = "CLOSING",
143 [DCCP_TIME_WAIT] = "TIME_WAIT",
144 [DCCP_CLOSED] = "CLOSED",
145 };
146
147 if (state >= DCCP_MAX_STATES)
148 return "INVALID STATE!";
149 else
150 return dccp_state_names[state];
151 }
152
153 EXPORT_SYMBOL_GPL(dccp_state_name);
154
155 void dccp_hash(struct sock *sk)
156 {
157 inet_hash(&dccp_hashinfo, sk);
158 }
159
160 EXPORT_SYMBOL_GPL(dccp_hash);
161
162 void dccp_unhash(struct sock *sk)
163 {
164 inet_unhash(&dccp_hashinfo, sk);
165 }
166
167 EXPORT_SYMBOL_GPL(dccp_unhash);
168
169 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
170 {
171 struct dccp_sock *dp = dccp_sk(sk);
172 struct dccp_minisock *dmsk = dccp_msk(sk);
173 struct inet_connection_sock *icsk = inet_csk(sk);
174
175 dccp_minisock_init(&dp->dccps_minisock);
176
177 /*
178 * FIXME: We're hardcoding the CCID, and doing this at this point makes
179 * the listening (master) sock get CCID control blocks, which is not
180 * necessary, but for now, to not mess with the test userspace apps,
181 * lets leave it here, later the real solution is to do this in a
182 * setsockopt(CCIDs-I-want/accept). -acme
183 */
184 if (likely(ctl_sock_initialized)) {
185 int rc = dccp_feat_init(dmsk);
186
187 if (rc)
188 return rc;
189
190 if (dmsk->dccpms_send_ack_vector) {
191 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
192 if (dp->dccps_hc_rx_ackvec == NULL)
193 return -ENOMEM;
194 }
195 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
196 sk, GFP_KERNEL);
197 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
198 sk, GFP_KERNEL);
199 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
200 dp->dccps_hc_tx_ccid == NULL)) {
201 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
202 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
203 if (dmsk->dccpms_send_ack_vector) {
204 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
205 dp->dccps_hc_rx_ackvec = NULL;
206 }
207 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
208 return -ENOMEM;
209 }
210 } else {
211 /* control socket doesn't need feat nego */
212 INIT_LIST_HEAD(&dmsk->dccpms_pending);
213 INIT_LIST_HEAD(&dmsk->dccpms_conf);
214 }
215
216 dccp_init_xmit_timers(sk);
217 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
218 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
219 sk->sk_state = DCCP_CLOSED;
220 sk->sk_write_space = dccp_write_space;
221 icsk->icsk_sync_mss = dccp_sync_mss;
222 dp->dccps_mss_cache = 536;
223 dp->dccps_rate_last = jiffies;
224 dp->dccps_role = DCCP_ROLE_UNDEFINED;
225 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
226 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
227
228 return 0;
229 }
230
231 EXPORT_SYMBOL_GPL(dccp_init_sock);
232
233 int dccp_destroy_sock(struct sock *sk)
234 {
235 struct dccp_sock *dp = dccp_sk(sk);
236 struct dccp_minisock *dmsk = dccp_msk(sk);
237
238 /*
239 * DCCP doesn't use sk_write_queue, just sk_send_head
240 * for retransmissions
241 */
242 if (sk->sk_send_head != NULL) {
243 kfree_skb(sk->sk_send_head);
244 sk->sk_send_head = NULL;
245 }
246
247 /* Clean up a referenced DCCP bind bucket. */
248 if (inet_csk(sk)->icsk_bind_hash != NULL)
249 inet_put_port(&dccp_hashinfo, sk);
250
251 kfree(dp->dccps_service_list);
252 dp->dccps_service_list = NULL;
253
254 if (dmsk->dccpms_send_ack_vector) {
255 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
256 dp->dccps_hc_rx_ackvec = NULL;
257 }
258 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
259 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
260 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
261
262 /* clean up feature negotiation state */
263 dccp_feat_clean(dmsk);
264
265 return 0;
266 }
267
268 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
269
270 static inline int dccp_listen_start(struct sock *sk, int backlog)
271 {
272 struct dccp_sock *dp = dccp_sk(sk);
273
274 dp->dccps_role = DCCP_ROLE_LISTEN;
275 return inet_csk_listen_start(sk, backlog);
276 }
277
278 int dccp_disconnect(struct sock *sk, int flags)
279 {
280 struct inet_connection_sock *icsk = inet_csk(sk);
281 struct inet_sock *inet = inet_sk(sk);
282 int err = 0;
283 const int old_state = sk->sk_state;
284
285 if (old_state != DCCP_CLOSED)
286 dccp_set_state(sk, DCCP_CLOSED);
287
288 /* ABORT function of RFC793 */
289 if (old_state == DCCP_LISTEN) {
290 inet_csk_listen_stop(sk);
291 /* FIXME: do the active reset thing */
292 } else if (old_state == DCCP_REQUESTING)
293 sk->sk_err = ECONNRESET;
294
295 dccp_clear_xmit_timers(sk);
296 __skb_queue_purge(&sk->sk_receive_queue);
297 if (sk->sk_send_head != NULL) {
298 __kfree_skb(sk->sk_send_head);
299 sk->sk_send_head = NULL;
300 }
301
302 inet->dport = 0;
303
304 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
305 inet_reset_saddr(sk);
306
307 sk->sk_shutdown = 0;
308 sock_reset_flag(sk, SOCK_DONE);
309
310 icsk->icsk_backoff = 0;
311 inet_csk_delack_init(sk);
312 __sk_dst_reset(sk);
313
314 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
315
316 sk->sk_error_report(sk);
317 return err;
318 }
319
320 EXPORT_SYMBOL_GPL(dccp_disconnect);
321
322 /*
323 * Wait for a DCCP event.
324 *
325 * Note that we don't need to lock the socket, as the upper poll layers
326 * take care of normal races (between the test and the event) and we don't
327 * go look at any of the socket buffers directly.
328 */
329 unsigned int dccp_poll(struct file *file, struct socket *sock,
330 poll_table *wait)
331 {
332 unsigned int mask;
333 struct sock *sk = sock->sk;
334
335 poll_wait(file, sk->sk_sleep, wait);
336 if (sk->sk_state == DCCP_LISTEN)
337 return inet_csk_listen_poll(sk);
338
339 /* Socket is not locked. We are protected from async events
340 by poll logic and correct handling of state changes
341 made by another threads is impossible in any case.
342 */
343
344 mask = 0;
345 if (sk->sk_err)
346 mask = POLLERR;
347
348 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
349 mask |= POLLHUP;
350 if (sk->sk_shutdown & RCV_SHUTDOWN)
351 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
352
353 /* Connected? */
354 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
355 if (atomic_read(&sk->sk_rmem_alloc) > 0)
356 mask |= POLLIN | POLLRDNORM;
357
358 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
359 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
360 mask |= POLLOUT | POLLWRNORM;
361 } else { /* send SIGIO later */
362 set_bit(SOCK_ASYNC_NOSPACE,
363 &sk->sk_socket->flags);
364 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
365
366 /* Race breaker. If space is freed after
367 * wspace test but before the flags are set,
368 * IO signal will be lost.
369 */
370 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
371 mask |= POLLOUT | POLLWRNORM;
372 }
373 }
374 }
375 return mask;
376 }
377
378 EXPORT_SYMBOL_GPL(dccp_poll);
379
380 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
381 {
382 int rc = -ENOTCONN;
383
384 lock_sock(sk);
385
386 if (sk->sk_state == DCCP_LISTEN)
387 goto out;
388
389 switch (cmd) {
390 case SIOCINQ: {
391 struct sk_buff *skb;
392 unsigned long amount = 0;
393
394 skb = skb_peek(&sk->sk_receive_queue);
395 if (skb != NULL) {
396 /*
397 * We will only return the amount of this packet since
398 * that is all that will be read.
399 */
400 amount = skb->len;
401 }
402 rc = put_user(amount, (int __user *)arg);
403 }
404 break;
405 default:
406 rc = -ENOIOCTLCMD;
407 break;
408 }
409 out:
410 release_sock(sk);
411 return rc;
412 }
413
414 EXPORT_SYMBOL_GPL(dccp_ioctl);
415
416 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
417 char __user *optval, int optlen)
418 {
419 struct dccp_sock *dp = dccp_sk(sk);
420 struct dccp_service_list *sl = NULL;
421
422 if (service == DCCP_SERVICE_INVALID_VALUE ||
423 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
424 return -EINVAL;
425
426 if (optlen > sizeof(service)) {
427 sl = kmalloc(optlen, GFP_KERNEL);
428 if (sl == NULL)
429 return -ENOMEM;
430
431 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
432 if (copy_from_user(sl->dccpsl_list,
433 optval + sizeof(service),
434 optlen - sizeof(service)) ||
435 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
436 kfree(sl);
437 return -EFAULT;
438 }
439 }
440
441 lock_sock(sk);
442 dp->dccps_service = service;
443
444 kfree(dp->dccps_service_list);
445
446 dp->dccps_service_list = sl;
447 release_sock(sk);
448 return 0;
449 }
450
451 /* byte 1 is feature. the rest is the preference list */
452 static int dccp_setsockopt_change(struct sock *sk, int type,
453 struct dccp_so_feat __user *optval)
454 {
455 struct dccp_so_feat opt;
456 u8 *val;
457 int rc;
458
459 if (copy_from_user(&opt, optval, sizeof(opt)))
460 return -EFAULT;
461
462 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
463 if (!val)
464 return -ENOMEM;
465
466 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
467 rc = -EFAULT;
468 goto out_free_val;
469 }
470
471 rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
472 val, opt.dccpsf_len, GFP_KERNEL);
473 if (rc)
474 goto out_free_val;
475
476 out:
477 return rc;
478
479 out_free_val:
480 kfree(val);
481 goto out;
482 }
483
484 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
485 char __user *optval, int optlen)
486 {
487 struct dccp_sock *dp = dccp_sk(sk);
488 int val, err = 0;
489
490 if (optlen < sizeof(int))
491 return -EINVAL;
492
493 if (get_user(val, (int __user *)optval))
494 return -EFAULT;
495
496 if (optname == DCCP_SOCKOPT_SERVICE)
497 return dccp_setsockopt_service(sk, val, optval, optlen);
498
499 lock_sock(sk);
500 switch (optname) {
501 case DCCP_SOCKOPT_PACKET_SIZE:
502 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
503 err = 0;
504 break;
505 case DCCP_SOCKOPT_CHANGE_L:
506 if (optlen != sizeof(struct dccp_so_feat))
507 err = -EINVAL;
508 else
509 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
510 (struct dccp_so_feat __user *)
511 optval);
512 break;
513 case DCCP_SOCKOPT_CHANGE_R:
514 if (optlen != sizeof(struct dccp_so_feat))
515 err = -EINVAL;
516 else
517 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
518 (struct dccp_so_feat __user *)
519 optval);
520 break;
521 case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
522 if (val < 0 || val > 15)
523 err = -EINVAL;
524 else
525 dp->dccps_pcslen = val;
526 break;
527 case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */
528 if (val < 0 || val > 15)
529 err = -EINVAL;
530 else {
531 dp->dccps_pcrlen = val;
532 /* FIXME: add feature negotiation,
533 * ChangeL(MinimumChecksumCoverage, val) */
534 }
535 break;
536 default:
537 err = -ENOPROTOOPT;
538 break;
539 }
540
541 release_sock(sk);
542 return err;
543 }
544
545 int dccp_setsockopt(struct sock *sk, int level, int optname,
546 char __user *optval, int optlen)
547 {
548 if (level != SOL_DCCP)
549 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
550 optname, optval,
551 optlen);
552 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
553 }
554
555 EXPORT_SYMBOL_GPL(dccp_setsockopt);
556
557 #ifdef CONFIG_COMPAT
558 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
559 char __user *optval, int optlen)
560 {
561 if (level != SOL_DCCP)
562 return inet_csk_compat_setsockopt(sk, level, optname,
563 optval, optlen);
564 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
565 }
566
567 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
568 #endif
569
570 static int dccp_getsockopt_service(struct sock *sk, int len,
571 __be32 __user *optval,
572 int __user *optlen)
573 {
574 const struct dccp_sock *dp = dccp_sk(sk);
575 const struct dccp_service_list *sl;
576 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
577
578 lock_sock(sk);
579 if ((sl = dp->dccps_service_list) != NULL) {
580 slen = sl->dccpsl_nr * sizeof(u32);
581 total_len += slen;
582 }
583
584 err = -EINVAL;
585 if (total_len > len)
586 goto out;
587
588 err = 0;
589 if (put_user(total_len, optlen) ||
590 put_user(dp->dccps_service, optval) ||
591 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
592 err = -EFAULT;
593 out:
594 release_sock(sk);
595 return err;
596 }
597
598 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
599 char __user *optval, int __user *optlen)
600 {
601 struct dccp_sock *dp;
602 int val, len;
603
604 if (get_user(len, optlen))
605 return -EFAULT;
606
607 if (len < (int)sizeof(int))
608 return -EINVAL;
609
610 dp = dccp_sk(sk);
611
612 switch (optname) {
613 case DCCP_SOCKOPT_PACKET_SIZE:
614 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
615 return 0;
616 case DCCP_SOCKOPT_SERVICE:
617 return dccp_getsockopt_service(sk, len,
618 (__be32 __user *)optval, optlen);
619 case DCCP_SOCKOPT_GET_CUR_MPS:
620 val = dp->dccps_mss_cache;
621 len = sizeof(val);
622 break;
623 case DCCP_SOCKOPT_SEND_CSCOV:
624 val = dp->dccps_pcslen;
625 len = sizeof(val);
626 break;
627 case DCCP_SOCKOPT_RECV_CSCOV:
628 val = dp->dccps_pcrlen;
629 len = sizeof(val);
630 break;
631 case 128 ... 191:
632 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
633 len, (u32 __user *)optval, optlen);
634 case 192 ... 255:
635 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
636 len, (u32 __user *)optval, optlen);
637 default:
638 return -ENOPROTOOPT;
639 }
640
641 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
642 return -EFAULT;
643
644 return 0;
645 }
646
647 int dccp_getsockopt(struct sock *sk, int level, int optname,
648 char __user *optval, int __user *optlen)
649 {
650 if (level != SOL_DCCP)
651 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
652 optname, optval,
653 optlen);
654 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
655 }
656
657 EXPORT_SYMBOL_GPL(dccp_getsockopt);
658
659 #ifdef CONFIG_COMPAT
660 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
661 char __user *optval, int __user *optlen)
662 {
663 if (level != SOL_DCCP)
664 return inet_csk_compat_getsockopt(sk, level, optname,
665 optval, optlen);
666 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
667 }
668
669 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
670 #endif
671
672 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
673 size_t len)
674 {
675 const struct dccp_sock *dp = dccp_sk(sk);
676 const int flags = msg->msg_flags;
677 const int noblock = flags & MSG_DONTWAIT;
678 struct sk_buff *skb;
679 int rc, size;
680 long timeo;
681
682 if (len > dp->dccps_mss_cache)
683 return -EMSGSIZE;
684
685 lock_sock(sk);
686
687 if (sysctl_dccp_tx_qlen &&
688 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
689 rc = -EAGAIN;
690 goto out_release;
691 }
692
693 timeo = sock_sndtimeo(sk, noblock);
694
695 /*
696 * We have to use sk_stream_wait_connect here to set sk_write_pending,
697 * so that the trick in dccp_rcv_request_sent_state_process.
698 */
699 /* Wait for a connection to finish. */
700 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
701 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
702 goto out_release;
703
704 size = sk->sk_prot->max_header + len;
705 release_sock(sk);
706 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
707 lock_sock(sk);
708 if (skb == NULL)
709 goto out_release;
710
711 skb_reserve(skb, sk->sk_prot->max_header);
712 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
713 if (rc != 0)
714 goto out_discard;
715
716 skb_queue_tail(&sk->sk_write_queue, skb);
717 dccp_write_xmit(sk,0);
718 out_release:
719 release_sock(sk);
720 return rc ? : len;
721 out_discard:
722 kfree_skb(skb);
723 goto out_release;
724 }
725
726 EXPORT_SYMBOL_GPL(dccp_sendmsg);
727
728 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
729 size_t len, int nonblock, int flags, int *addr_len)
730 {
731 const struct dccp_hdr *dh;
732 long timeo;
733
734 lock_sock(sk);
735
736 if (sk->sk_state == DCCP_LISTEN) {
737 len = -ENOTCONN;
738 goto out;
739 }
740
741 timeo = sock_rcvtimeo(sk, nonblock);
742
743 do {
744 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
745
746 if (skb == NULL)
747 goto verify_sock_status;
748
749 dh = dccp_hdr(skb);
750
751 if (dh->dccph_type == DCCP_PKT_DATA ||
752 dh->dccph_type == DCCP_PKT_DATAACK)
753 goto found_ok_skb;
754
755 if (dh->dccph_type == DCCP_PKT_RESET ||
756 dh->dccph_type == DCCP_PKT_CLOSE) {
757 dccp_pr_debug("found fin ok!\n");
758 len = 0;
759 goto found_fin_ok;
760 }
761 dccp_pr_debug("packet_type=%s\n",
762 dccp_packet_name(dh->dccph_type));
763 sk_eat_skb(sk, skb, 0);
764 verify_sock_status:
765 if (sock_flag(sk, SOCK_DONE)) {
766 len = 0;
767 break;
768 }
769
770 if (sk->sk_err) {
771 len = sock_error(sk);
772 break;
773 }
774
775 if (sk->sk_shutdown & RCV_SHUTDOWN) {
776 len = 0;
777 break;
778 }
779
780 if (sk->sk_state == DCCP_CLOSED) {
781 if (!sock_flag(sk, SOCK_DONE)) {
782 /* This occurs when user tries to read
783 * from never connected socket.
784 */
785 len = -ENOTCONN;
786 break;
787 }
788 len = 0;
789 break;
790 }
791
792 if (!timeo) {
793 len = -EAGAIN;
794 break;
795 }
796
797 if (signal_pending(current)) {
798 len = sock_intr_errno(timeo);
799 break;
800 }
801
802 sk_wait_data(sk, &timeo);
803 continue;
804 found_ok_skb:
805 if (len > skb->len)
806 len = skb->len;
807 else if (len < skb->len)
808 msg->msg_flags |= MSG_TRUNC;
809
810 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
811 /* Exception. Bailout! */
812 len = -EFAULT;
813 break;
814 }
815 found_fin_ok:
816 if (!(flags & MSG_PEEK))
817 sk_eat_skb(sk, skb, 0);
818 break;
819 } while (1);
820 out:
821 release_sock(sk);
822 return len;
823 }
824
825 EXPORT_SYMBOL_GPL(dccp_recvmsg);
826
827 int inet_dccp_listen(struct socket *sock, int backlog)
828 {
829 struct sock *sk = sock->sk;
830 unsigned char old_state;
831 int err;
832
833 lock_sock(sk);
834
835 err = -EINVAL;
836 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
837 goto out;
838
839 old_state = sk->sk_state;
840 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
841 goto out;
842
843 /* Really, if the socket is already in listen state
844 * we can only allow the backlog to be adjusted.
845 */
846 if (old_state != DCCP_LISTEN) {
847 /*
848 * FIXME: here it probably should be sk->sk_prot->listen_start
849 * see tcp_listen_start
850 */
851 err = dccp_listen_start(sk, backlog);
852 if (err)
853 goto out;
854 }
855 sk->sk_max_ack_backlog = backlog;
856 err = 0;
857
858 out:
859 release_sock(sk);
860 return err;
861 }
862
863 EXPORT_SYMBOL_GPL(inet_dccp_listen);
864
865 static const unsigned char dccp_new_state[] = {
866 /* current state: new state: action: */
867 [0] = DCCP_CLOSED,
868 [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
869 [DCCP_REQUESTING] = DCCP_CLOSED,
870 [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
871 [DCCP_LISTEN] = DCCP_CLOSED,
872 [DCCP_RESPOND] = DCCP_CLOSED,
873 [DCCP_CLOSING] = DCCP_CLOSED,
874 [DCCP_TIME_WAIT] = DCCP_CLOSED,
875 [DCCP_CLOSED] = DCCP_CLOSED,
876 };
877
878 static int dccp_close_state(struct sock *sk)
879 {
880 const int next = dccp_new_state[sk->sk_state];
881 const int ns = next & DCCP_STATE_MASK;
882
883 if (ns != sk->sk_state)
884 dccp_set_state(sk, ns);
885
886 return next & DCCP_ACTION_FIN;
887 }
888
889 void dccp_close(struct sock *sk, long timeout)
890 {
891 struct dccp_sock *dp = dccp_sk(sk);
892 struct sk_buff *skb;
893 int state;
894
895 lock_sock(sk);
896
897 sk->sk_shutdown = SHUTDOWN_MASK;
898
899 if (sk->sk_state == DCCP_LISTEN) {
900 dccp_set_state(sk, DCCP_CLOSED);
901
902 /* Special case. */
903 inet_csk_listen_stop(sk);
904
905 goto adjudge_to_death;
906 }
907
908 sk_stop_timer(sk, &dp->dccps_xmit_timer);
909
910 /*
911 * We need to flush the recv. buffs. We do this only on the
912 * descriptor close, not protocol-sourced closes, because the
913 *reader process may not have drained the data yet!
914 */
915 /* FIXME: check for unread data */
916 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
917 __kfree_skb(skb);
918 }
919
920 if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
921 /* Check zero linger _after_ checking for unread data. */
922 sk->sk_prot->disconnect(sk, 0);
923 } else if (dccp_close_state(sk)) {
924 dccp_send_close(sk, 1);
925 }
926
927 sk_stream_wait_close(sk, timeout);
928
929 adjudge_to_death:
930 state = sk->sk_state;
931 sock_hold(sk);
932 sock_orphan(sk);
933 atomic_inc(sk->sk_prot->orphan_count);
934
935 /*
936 * It is the last release_sock in its life. It will remove backlog.
937 */
938 release_sock(sk);
939 /*
940 * Now socket is owned by kernel and we acquire BH lock
941 * to finish close. No need to check for user refs.
942 */
943 local_bh_disable();
944 bh_lock_sock(sk);
945 BUG_TRAP(!sock_owned_by_user(sk));
946
947 /* Have we already been destroyed by a softirq or backlog? */
948 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
949 goto out;
950
951 /*
952 * The last release_sock may have processed the CLOSE or RESET
953 * packet moving sock to CLOSED state, if not we have to fire
954 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
955 * in draft-ietf-dccp-spec-11. -acme
956 */
957 if (sk->sk_state == DCCP_CLOSING) {
958 /* FIXME: should start at 2 * RTT */
959 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
960 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
961 inet_csk(sk)->icsk_rto,
962 DCCP_RTO_MAX);
963 #if 0
964 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
965 dccp_set_state(sk, DCCP_CLOSED);
966 #endif
967 }
968
969 if (sk->sk_state == DCCP_CLOSED)
970 inet_csk_destroy_sock(sk);
971
972 /* Otherwise, socket is reprieved until protocol close. */
973
974 out:
975 bh_unlock_sock(sk);
976 local_bh_enable();
977 sock_put(sk);
978 }
979
980 EXPORT_SYMBOL_GPL(dccp_close);
981
982 void dccp_shutdown(struct sock *sk, int how)
983 {
984 dccp_pr_debug("entry\n");
985 }
986
987 EXPORT_SYMBOL_GPL(dccp_shutdown);
988
989 static int __init dccp_mib_init(void)
990 {
991 int rc = -ENOMEM;
992
993 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
994 if (dccp_statistics[0] == NULL)
995 goto out;
996
997 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
998 if (dccp_statistics[1] == NULL)
999 goto out_free_one;
1000
1001 rc = 0;
1002 out:
1003 return rc;
1004 out_free_one:
1005 free_percpu(dccp_statistics[0]);
1006 dccp_statistics[0] = NULL;
1007 goto out;
1008
1009 }
1010
1011 static void dccp_mib_exit(void)
1012 {
1013 free_percpu(dccp_statistics[0]);
1014 free_percpu(dccp_statistics[1]);
1015 dccp_statistics[0] = dccp_statistics[1] = NULL;
1016 }
1017
1018 static int thash_entries;
1019 module_param(thash_entries, int, 0444);
1020 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1021
1022 #ifdef CONFIG_IP_DCCP_DEBUG
1023 int dccp_debug;
1024 module_param(dccp_debug, bool, 0444);
1025 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1026
1027 EXPORT_SYMBOL_GPL(dccp_debug);
1028 #endif
1029
1030 static int __init dccp_init(void)
1031 {
1032 unsigned long goal;
1033 int ehash_order, bhash_order, i;
1034 int rc = -ENOBUFS;
1035
1036 dccp_hashinfo.bind_bucket_cachep =
1037 kmem_cache_create("dccp_bind_bucket",
1038 sizeof(struct inet_bind_bucket), 0,
1039 SLAB_HWCACHE_ALIGN, NULL);
1040 if (!dccp_hashinfo.bind_bucket_cachep)
1041 goto out;
1042
1043 /*
1044 * Size and allocate the main established and bind bucket
1045 * hash tables.
1046 *
1047 * The methodology is similar to that of the buffer cache.
1048 */
1049 if (num_physpages >= (128 * 1024))
1050 goal = num_physpages >> (21 - PAGE_SHIFT);
1051 else
1052 goal = num_physpages >> (23 - PAGE_SHIFT);
1053
1054 if (thash_entries)
1055 goal = (thash_entries *
1056 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1057 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1058 ;
1059 do {
1060 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1061 sizeof(struct inet_ehash_bucket);
1062 while (dccp_hashinfo.ehash_size &
1063 (dccp_hashinfo.ehash_size - 1))
1064 dccp_hashinfo.ehash_size--;
1065 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1066 __get_free_pages(GFP_ATOMIC, ehash_order);
1067 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1068
1069 if (!dccp_hashinfo.ehash) {
1070 DCCP_CRIT("Failed to allocate DCCP established hash table");
1071 goto out_free_bind_bucket_cachep;
1072 }
1073
1074 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1075 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1076 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1077 }
1078
1079 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1080 goto out_free_dccp_ehash;
1081
1082 bhash_order = ehash_order;
1083
1084 do {
1085 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1086 sizeof(struct inet_bind_hashbucket);
1087 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1088 bhash_order > 0)
1089 continue;
1090 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1091 __get_free_pages(GFP_ATOMIC, bhash_order);
1092 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1093
1094 if (!dccp_hashinfo.bhash) {
1095 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1096 goto out_free_dccp_locks;
1097 }
1098
1099 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1100 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1101 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1102 }
1103
1104 rc = dccp_mib_init();
1105 if (rc)
1106 goto out_free_dccp_bhash;
1107
1108 rc = dccp_ackvec_init();
1109 if (rc)
1110 goto out_free_dccp_mib;
1111
1112 rc = dccp_sysctl_init();
1113 if (rc)
1114 goto out_ackvec_exit;
1115
1116 dccp_timestamping_init();
1117 out:
1118 return rc;
1119 out_ackvec_exit:
1120 dccp_ackvec_exit();
1121 out_free_dccp_mib:
1122 dccp_mib_exit();
1123 out_free_dccp_bhash:
1124 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1125 dccp_hashinfo.bhash = NULL;
1126 out_free_dccp_locks:
1127 inet_ehash_locks_free(&dccp_hashinfo);
1128 out_free_dccp_ehash:
1129 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1130 dccp_hashinfo.ehash = NULL;
1131 out_free_bind_bucket_cachep:
1132 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1133 dccp_hashinfo.bind_bucket_cachep = NULL;
1134 goto out;
1135 }
1136
1137 static void __exit dccp_fini(void)
1138 {
1139 dccp_mib_exit();
1140 free_pages((unsigned long)dccp_hashinfo.bhash,
1141 get_order(dccp_hashinfo.bhash_size *
1142 sizeof(struct inet_bind_hashbucket)));
1143 free_pages((unsigned long)dccp_hashinfo.ehash,
1144 get_order(dccp_hashinfo.ehash_size *
1145 sizeof(struct inet_ehash_bucket)));
1146 inet_ehash_locks_free(&dccp_hashinfo);
1147 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1148 dccp_ackvec_exit();
1149 dccp_sysctl_exit();
1150 }
1151
1152 module_init(dccp_init);
1153 module_exit(dccp_fini);
1154
1155 MODULE_LICENSE("GPL");
1156 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1157 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");