]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - net/dccp/proto.c
db54e557eff1e23418f8cfb41dbaf3adc66ab7ff
[mirror_ubuntu-artful-kernel.git] / net / dccp / proto.c
1 /*
2 * net/dccp/proto.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/semaphore.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48 .lhash_lock = RW_LOCK_UNLOCKED,
49 .lhash_users = ATOMIC_INIT(0),
50 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 void dccp_set_state(struct sock *sk, const int state)
56 {
57 const int oldstate = sk->sk_state;
58
59 dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
60 dccp_role(sk), sk,
61 dccp_state_name(oldstate), dccp_state_name(state));
62 WARN_ON(state == oldstate);
63
64 switch (state) {
65 case DCCP_OPEN:
66 if (oldstate != DCCP_OPEN)
67 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
68 break;
69
70 case DCCP_CLOSED:
71 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
72 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
73
74 sk->sk_prot->unhash(sk);
75 if (inet_csk(sk)->icsk_bind_hash != NULL &&
76 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
77 inet_put_port(&dccp_hashinfo, sk);
78 /* fall through */
79 default:
80 if (oldstate == DCCP_OPEN)
81 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
82 }
83
84 /* Change state AFTER socket is unhashed to avoid closed
85 * socket sitting in hash tables.
86 */
87 sk->sk_state = state;
88 }
89
90 EXPORT_SYMBOL_GPL(dccp_set_state);
91
92 void dccp_done(struct sock *sk)
93 {
94 dccp_set_state(sk, DCCP_CLOSED);
95 dccp_clear_xmit_timers(sk);
96
97 sk->sk_shutdown = SHUTDOWN_MASK;
98
99 if (!sock_flag(sk, SOCK_DEAD))
100 sk->sk_state_change(sk);
101 else
102 inet_csk_destroy_sock(sk);
103 }
104
105 EXPORT_SYMBOL_GPL(dccp_done);
106
107 const char *dccp_packet_name(const int type)
108 {
109 static const char *dccp_packet_names[] = {
110 [DCCP_PKT_REQUEST] = "REQUEST",
111 [DCCP_PKT_RESPONSE] = "RESPONSE",
112 [DCCP_PKT_DATA] = "DATA",
113 [DCCP_PKT_ACK] = "ACK",
114 [DCCP_PKT_DATAACK] = "DATAACK",
115 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
116 [DCCP_PKT_CLOSE] = "CLOSE",
117 [DCCP_PKT_RESET] = "RESET",
118 [DCCP_PKT_SYNC] = "SYNC",
119 [DCCP_PKT_SYNCACK] = "SYNCACK",
120 };
121
122 if (type >= DCCP_NR_PKT_TYPES)
123 return "INVALID";
124 else
125 return dccp_packet_names[type];
126 }
127
128 EXPORT_SYMBOL_GPL(dccp_packet_name);
129
130 const char *dccp_state_name(const int state)
131 {
132 static char *dccp_state_names[] = {
133 [DCCP_OPEN] = "OPEN",
134 [DCCP_REQUESTING] = "REQUESTING",
135 [DCCP_PARTOPEN] = "PARTOPEN",
136 [DCCP_LISTEN] = "LISTEN",
137 [DCCP_RESPOND] = "RESPOND",
138 [DCCP_CLOSING] = "CLOSING",
139 [DCCP_TIME_WAIT] = "TIME_WAIT",
140 [DCCP_CLOSED] = "CLOSED",
141 };
142
143 if (state >= DCCP_MAX_STATES)
144 return "INVALID STATE!";
145 else
146 return dccp_state_names[state];
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_state_name);
150
151 void dccp_hash(struct sock *sk)
152 {
153 inet_hash(&dccp_hashinfo, sk);
154 }
155
156 EXPORT_SYMBOL_GPL(dccp_hash);
157
158 void dccp_unhash(struct sock *sk)
159 {
160 inet_unhash(&dccp_hashinfo, sk);
161 }
162
163 EXPORT_SYMBOL_GPL(dccp_unhash);
164
165 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
166 {
167 struct dccp_sock *dp = dccp_sk(sk);
168 struct dccp_minisock *dmsk = dccp_msk(sk);
169 struct inet_connection_sock *icsk = inet_csk(sk);
170
171 dccp_minisock_init(&dp->dccps_minisock);
172 do_gettimeofday(&dp->dccps_epoch);
173
174 /*
175 * FIXME: We're hardcoding the CCID, and doing this at this point makes
176 * the listening (master) sock get CCID control blocks, which is not
177 * necessary, but for now, to not mess with the test userspace apps,
178 * lets leave it here, later the real solution is to do this in a
179 * setsockopt(CCIDs-I-want/accept). -acme
180 */
181 if (likely(ctl_sock_initialized)) {
182 int rc = dccp_feat_init(dmsk);
183
184 if (rc)
185 return rc;
186
187 if (dmsk->dccpms_send_ack_vector) {
188 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
189 if (dp->dccps_hc_rx_ackvec == NULL)
190 return -ENOMEM;
191 }
192 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
193 sk, GFP_KERNEL);
194 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
195 sk, GFP_KERNEL);
196 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
197 dp->dccps_hc_tx_ccid == NULL)) {
198 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
199 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
200 if (dmsk->dccpms_send_ack_vector) {
201 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
202 dp->dccps_hc_rx_ackvec = NULL;
203 }
204 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
205 return -ENOMEM;
206 }
207 } else {
208 /* control socket doesn't need feat nego */
209 INIT_LIST_HEAD(&dmsk->dccpms_pending);
210 INIT_LIST_HEAD(&dmsk->dccpms_conf);
211 }
212
213 dccp_init_xmit_timers(sk);
214 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
215 sk->sk_state = DCCP_CLOSED;
216 sk->sk_write_space = dccp_write_space;
217 icsk->icsk_sync_mss = dccp_sync_mss;
218 dp->dccps_mss_cache = 536;
219 dp->dccps_role = DCCP_ROLE_UNDEFINED;
220 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
221 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
222
223 return 0;
224 }
225
226 EXPORT_SYMBOL_GPL(dccp_init_sock);
227
228 int dccp_destroy_sock(struct sock *sk)
229 {
230 struct dccp_sock *dp = dccp_sk(sk);
231 struct dccp_minisock *dmsk = dccp_msk(sk);
232
233 /*
234 * DCCP doesn't use sk_write_queue, just sk_send_head
235 * for retransmissions
236 */
237 if (sk->sk_send_head != NULL) {
238 kfree_skb(sk->sk_send_head);
239 sk->sk_send_head = NULL;
240 }
241
242 /* Clean up a referenced DCCP bind bucket. */
243 if (inet_csk(sk)->icsk_bind_hash != NULL)
244 inet_put_port(&dccp_hashinfo, sk);
245
246 kfree(dp->dccps_service_list);
247 dp->dccps_service_list = NULL;
248
249 if (dmsk->dccpms_send_ack_vector) {
250 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
251 dp->dccps_hc_rx_ackvec = NULL;
252 }
253 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
254 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
255 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
256
257 /* clean up feature negotiation state */
258 dccp_feat_clean(dmsk);
259
260 return 0;
261 }
262
263 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
264
265 static inline int dccp_listen_start(struct sock *sk, int backlog)
266 {
267 struct dccp_sock *dp = dccp_sk(sk);
268
269 dp->dccps_role = DCCP_ROLE_LISTEN;
270 return inet_csk_listen_start(sk, backlog);
271 }
272
273 int dccp_disconnect(struct sock *sk, int flags)
274 {
275 struct inet_connection_sock *icsk = inet_csk(sk);
276 struct inet_sock *inet = inet_sk(sk);
277 int err = 0;
278 const int old_state = sk->sk_state;
279
280 if (old_state != DCCP_CLOSED)
281 dccp_set_state(sk, DCCP_CLOSED);
282
283 /* ABORT function of RFC793 */
284 if (old_state == DCCP_LISTEN) {
285 inet_csk_listen_stop(sk);
286 /* FIXME: do the active reset thing */
287 } else if (old_state == DCCP_REQUESTING)
288 sk->sk_err = ECONNRESET;
289
290 dccp_clear_xmit_timers(sk);
291 __skb_queue_purge(&sk->sk_receive_queue);
292 if (sk->sk_send_head != NULL) {
293 __kfree_skb(sk->sk_send_head);
294 sk->sk_send_head = NULL;
295 }
296
297 inet->dport = 0;
298
299 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
300 inet_reset_saddr(sk);
301
302 sk->sk_shutdown = 0;
303 sock_reset_flag(sk, SOCK_DONE);
304
305 icsk->icsk_backoff = 0;
306 inet_csk_delack_init(sk);
307 __sk_dst_reset(sk);
308
309 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
310
311 sk->sk_error_report(sk);
312 return err;
313 }
314
315 EXPORT_SYMBOL_GPL(dccp_disconnect);
316
317 /*
318 * Wait for a DCCP event.
319 *
320 * Note that we don't need to lock the socket, as the upper poll layers
321 * take care of normal races (between the test and the event) and we don't
322 * go look at any of the socket buffers directly.
323 */
324 unsigned int dccp_poll(struct file *file, struct socket *sock,
325 poll_table *wait)
326 {
327 unsigned int mask;
328 struct sock *sk = sock->sk;
329
330 poll_wait(file, sk->sk_sleep, wait);
331 if (sk->sk_state == DCCP_LISTEN)
332 return inet_csk_listen_poll(sk);
333
334 /* Socket is not locked. We are protected from async events
335 by poll logic and correct handling of state changes
336 made by another threads is impossible in any case.
337 */
338
339 mask = 0;
340 if (sk->sk_err)
341 mask = POLLERR;
342
343 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
344 mask |= POLLHUP;
345 if (sk->sk_shutdown & RCV_SHUTDOWN)
346 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
347
348 /* Connected? */
349 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
350 if (atomic_read(&sk->sk_rmem_alloc) > 0)
351 mask |= POLLIN | POLLRDNORM;
352
353 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
354 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
355 mask |= POLLOUT | POLLWRNORM;
356 } else { /* send SIGIO later */
357 set_bit(SOCK_ASYNC_NOSPACE,
358 &sk->sk_socket->flags);
359 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
360
361 /* Race breaker. If space is freed after
362 * wspace test but before the flags are set,
363 * IO signal will be lost.
364 */
365 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
366 mask |= POLLOUT | POLLWRNORM;
367 }
368 }
369 }
370 return mask;
371 }
372
373 EXPORT_SYMBOL_GPL(dccp_poll);
374
375 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
376 {
377 dccp_pr_debug("entry\n");
378 return -ENOIOCTLCMD;
379 }
380
381 EXPORT_SYMBOL_GPL(dccp_ioctl);
382
383 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
384 char __user *optval, int optlen)
385 {
386 struct dccp_sock *dp = dccp_sk(sk);
387 struct dccp_service_list *sl = NULL;
388
389 if (service == DCCP_SERVICE_INVALID_VALUE ||
390 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
391 return -EINVAL;
392
393 if (optlen > sizeof(service)) {
394 sl = kmalloc(optlen, GFP_KERNEL);
395 if (sl == NULL)
396 return -ENOMEM;
397
398 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
399 if (copy_from_user(sl->dccpsl_list,
400 optval + sizeof(service),
401 optlen - sizeof(service)) ||
402 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
403 kfree(sl);
404 return -EFAULT;
405 }
406 }
407
408 lock_sock(sk);
409 dp->dccps_service = service;
410
411 kfree(dp->dccps_service_list);
412
413 dp->dccps_service_list = sl;
414 release_sock(sk);
415 return 0;
416 }
417
418 /* byte 1 is feature. the rest is the preference list */
419 static int dccp_setsockopt_change(struct sock *sk, int type,
420 struct dccp_so_feat __user *optval)
421 {
422 struct dccp_so_feat opt;
423 u8 *val;
424 int rc;
425
426 if (copy_from_user(&opt, optval, sizeof(opt)))
427 return -EFAULT;
428
429 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
430 if (!val)
431 return -ENOMEM;
432
433 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
434 rc = -EFAULT;
435 goto out_free_val;
436 }
437
438 rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
439 val, opt.dccpsf_len, GFP_KERNEL);
440 if (rc)
441 goto out_free_val;
442
443 out:
444 return rc;
445
446 out_free_val:
447 kfree(val);
448 goto out;
449 }
450
451 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
452 char __user *optval, int optlen)
453 {
454 struct dccp_sock *dp;
455 int err;
456 int val;
457
458 if (optlen < sizeof(int))
459 return -EINVAL;
460
461 if (get_user(val, (int __user *)optval))
462 return -EFAULT;
463
464 if (optname == DCCP_SOCKOPT_SERVICE)
465 return dccp_setsockopt_service(sk, val, optval, optlen);
466
467 lock_sock(sk);
468 dp = dccp_sk(sk);
469 err = 0;
470
471 switch (optname) {
472 case DCCP_SOCKOPT_PACKET_SIZE:
473 dp->dccps_packet_size = val;
474 break;
475 case DCCP_SOCKOPT_CHANGE_L:
476 if (optlen != sizeof(struct dccp_so_feat))
477 err = -EINVAL;
478 else
479 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
480 (struct dccp_so_feat __user *)
481 optval);
482 break;
483 case DCCP_SOCKOPT_CHANGE_R:
484 if (optlen != sizeof(struct dccp_so_feat))
485 err = -EINVAL;
486 else
487 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
488 (struct dccp_so_feat __user *)
489 optval);
490 break;
491 case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
492 if (val < 0 || val > 15)
493 err = -EINVAL;
494 else
495 dp->dccps_pcslen = val;
496 break;
497 case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */
498 if (val < 0 || val > 15)
499 err = -EINVAL;
500 else {
501 dp->dccps_pcrlen = val;
502 /* FIXME: add feature negotiation,
503 * ChangeL(MinimumChecksumCoverage, val) */
504 }
505 break;
506 default:
507 err = -ENOPROTOOPT;
508 break;
509 }
510
511 release_sock(sk);
512 return err;
513 }
514
515 int dccp_setsockopt(struct sock *sk, int level, int optname,
516 char __user *optval, int optlen)
517 {
518 if (level != SOL_DCCP)
519 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
520 optname, optval,
521 optlen);
522 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
523 }
524
525 EXPORT_SYMBOL_GPL(dccp_setsockopt);
526
527 #ifdef CONFIG_COMPAT
528 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
529 char __user *optval, int optlen)
530 {
531 if (level != SOL_DCCP)
532 return inet_csk_compat_setsockopt(sk, level, optname,
533 optval, optlen);
534 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
535 }
536
537 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
538 #endif
539
540 static int dccp_getsockopt_service(struct sock *sk, int len,
541 __be32 __user *optval,
542 int __user *optlen)
543 {
544 const struct dccp_sock *dp = dccp_sk(sk);
545 const struct dccp_service_list *sl;
546 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
547
548 lock_sock(sk);
549 if ((sl = dp->dccps_service_list) != NULL) {
550 slen = sl->dccpsl_nr * sizeof(u32);
551 total_len += slen;
552 }
553
554 err = -EINVAL;
555 if (total_len > len)
556 goto out;
557
558 err = 0;
559 if (put_user(total_len, optlen) ||
560 put_user(dp->dccps_service, optval) ||
561 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
562 err = -EFAULT;
563 out:
564 release_sock(sk);
565 return err;
566 }
567
568 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
569 char __user *optval, int __user *optlen)
570 {
571 struct dccp_sock *dp;
572 int val, len;
573
574 if (get_user(len, optlen))
575 return -EFAULT;
576
577 if (len < sizeof(int))
578 return -EINVAL;
579
580 dp = dccp_sk(sk);
581
582 switch (optname) {
583 case DCCP_SOCKOPT_PACKET_SIZE:
584 val = dp->dccps_packet_size;
585 len = sizeof(dp->dccps_packet_size);
586 break;
587 case DCCP_SOCKOPT_SERVICE:
588 return dccp_getsockopt_service(sk, len,
589 (__be32 __user *)optval, optlen);
590 case DCCP_SOCKOPT_SEND_CSCOV:
591 val = dp->dccps_pcslen;
592 break;
593 case DCCP_SOCKOPT_RECV_CSCOV:
594 val = dp->dccps_pcrlen;
595 break;
596 case 128 ... 191:
597 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
598 len, (u32 __user *)optval, optlen);
599 case 192 ... 255:
600 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
601 len, (u32 __user *)optval, optlen);
602 default:
603 return -ENOPROTOOPT;
604 }
605
606 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
607 return -EFAULT;
608
609 return 0;
610 }
611
612 int dccp_getsockopt(struct sock *sk, int level, int optname,
613 char __user *optval, int __user *optlen)
614 {
615 if (level != SOL_DCCP)
616 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
617 optname, optval,
618 optlen);
619 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
620 }
621
622 EXPORT_SYMBOL_GPL(dccp_getsockopt);
623
624 #ifdef CONFIG_COMPAT
625 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
626 char __user *optval, int __user *optlen)
627 {
628 if (level != SOL_DCCP)
629 return inet_csk_compat_getsockopt(sk, level, optname,
630 optval, optlen);
631 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
632 }
633
634 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
635 #endif
636
637 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
638 size_t len)
639 {
640 const struct dccp_sock *dp = dccp_sk(sk);
641 const int flags = msg->msg_flags;
642 const int noblock = flags & MSG_DONTWAIT;
643 struct sk_buff *skb;
644 int rc, size;
645 long timeo;
646
647 if (len > dp->dccps_mss_cache)
648 return -EMSGSIZE;
649
650 lock_sock(sk);
651 timeo = sock_sndtimeo(sk, noblock);
652
653 /*
654 * We have to use sk_stream_wait_connect here to set sk_write_pending,
655 * so that the trick in dccp_rcv_request_sent_state_process.
656 */
657 /* Wait for a connection to finish. */
658 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
659 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
660 goto out_release;
661
662 size = sk->sk_prot->max_header + len;
663 release_sock(sk);
664 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
665 lock_sock(sk);
666 if (skb == NULL)
667 goto out_release;
668
669 skb_reserve(skb, sk->sk_prot->max_header);
670 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
671 if (rc != 0)
672 goto out_discard;
673
674 skb_queue_tail(&sk->sk_write_queue, skb);
675 dccp_write_xmit(sk,0);
676 out_release:
677 release_sock(sk);
678 return rc ? : len;
679 out_discard:
680 kfree_skb(skb);
681 goto out_release;
682 }
683
684 EXPORT_SYMBOL_GPL(dccp_sendmsg);
685
686 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
687 size_t len, int nonblock, int flags, int *addr_len)
688 {
689 const struct dccp_hdr *dh;
690 long timeo;
691
692 lock_sock(sk);
693
694 if (sk->sk_state == DCCP_LISTEN) {
695 len = -ENOTCONN;
696 goto out;
697 }
698
699 timeo = sock_rcvtimeo(sk, nonblock);
700
701 do {
702 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
703
704 if (skb == NULL)
705 goto verify_sock_status;
706
707 dh = dccp_hdr(skb);
708
709 if (dh->dccph_type == DCCP_PKT_DATA ||
710 dh->dccph_type == DCCP_PKT_DATAACK)
711 goto found_ok_skb;
712
713 if (dh->dccph_type == DCCP_PKT_RESET ||
714 dh->dccph_type == DCCP_PKT_CLOSE) {
715 dccp_pr_debug("found fin ok!\n");
716 len = 0;
717 goto found_fin_ok;
718 }
719 dccp_pr_debug("packet_type=%s\n",
720 dccp_packet_name(dh->dccph_type));
721 sk_eat_skb(sk, skb, 0);
722 verify_sock_status:
723 if (sock_flag(sk, SOCK_DONE)) {
724 len = 0;
725 break;
726 }
727
728 if (sk->sk_err) {
729 len = sock_error(sk);
730 break;
731 }
732
733 if (sk->sk_shutdown & RCV_SHUTDOWN) {
734 len = 0;
735 break;
736 }
737
738 if (sk->sk_state == DCCP_CLOSED) {
739 if (!sock_flag(sk, SOCK_DONE)) {
740 /* This occurs when user tries to read
741 * from never connected socket.
742 */
743 len = -ENOTCONN;
744 break;
745 }
746 len = 0;
747 break;
748 }
749
750 if (!timeo) {
751 len = -EAGAIN;
752 break;
753 }
754
755 if (signal_pending(current)) {
756 len = sock_intr_errno(timeo);
757 break;
758 }
759
760 sk_wait_data(sk, &timeo);
761 continue;
762 found_ok_skb:
763 if (len > skb->len)
764 len = skb->len;
765 else if (len < skb->len)
766 msg->msg_flags |= MSG_TRUNC;
767
768 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
769 /* Exception. Bailout! */
770 len = -EFAULT;
771 break;
772 }
773 found_fin_ok:
774 if (!(flags & MSG_PEEK))
775 sk_eat_skb(sk, skb, 0);
776 break;
777 } while (1);
778 out:
779 release_sock(sk);
780 return len;
781 }
782
783 EXPORT_SYMBOL_GPL(dccp_recvmsg);
784
785 int inet_dccp_listen(struct socket *sock, int backlog)
786 {
787 struct sock *sk = sock->sk;
788 unsigned char old_state;
789 int err;
790
791 lock_sock(sk);
792
793 err = -EINVAL;
794 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
795 goto out;
796
797 old_state = sk->sk_state;
798 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
799 goto out;
800
801 /* Really, if the socket is already in listen state
802 * we can only allow the backlog to be adjusted.
803 */
804 if (old_state != DCCP_LISTEN) {
805 /*
806 * FIXME: here it probably should be sk->sk_prot->listen_start
807 * see tcp_listen_start
808 */
809 err = dccp_listen_start(sk, backlog);
810 if (err)
811 goto out;
812 }
813 sk->sk_max_ack_backlog = backlog;
814 err = 0;
815
816 out:
817 release_sock(sk);
818 return err;
819 }
820
821 EXPORT_SYMBOL_GPL(inet_dccp_listen);
822
823 static const unsigned char dccp_new_state[] = {
824 /* current state: new state: action: */
825 [0] = DCCP_CLOSED,
826 [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
827 [DCCP_REQUESTING] = DCCP_CLOSED,
828 [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
829 [DCCP_LISTEN] = DCCP_CLOSED,
830 [DCCP_RESPOND] = DCCP_CLOSED,
831 [DCCP_CLOSING] = DCCP_CLOSED,
832 [DCCP_TIME_WAIT] = DCCP_CLOSED,
833 [DCCP_CLOSED] = DCCP_CLOSED,
834 };
835
836 static int dccp_close_state(struct sock *sk)
837 {
838 const int next = dccp_new_state[sk->sk_state];
839 const int ns = next & DCCP_STATE_MASK;
840
841 if (ns != sk->sk_state)
842 dccp_set_state(sk, ns);
843
844 return next & DCCP_ACTION_FIN;
845 }
846
847 void dccp_close(struct sock *sk, long timeout)
848 {
849 struct dccp_sock *dp = dccp_sk(sk);
850 struct sk_buff *skb;
851 int state;
852
853 lock_sock(sk);
854
855 sk->sk_shutdown = SHUTDOWN_MASK;
856
857 if (sk->sk_state == DCCP_LISTEN) {
858 dccp_set_state(sk, DCCP_CLOSED);
859
860 /* Special case. */
861 inet_csk_listen_stop(sk);
862
863 goto adjudge_to_death;
864 }
865
866 sk_stop_timer(sk, &dp->dccps_xmit_timer);
867
868 /*
869 * We need to flush the recv. buffs. We do this only on the
870 * descriptor close, not protocol-sourced closes, because the
871 *reader process may not have drained the data yet!
872 */
873 /* FIXME: check for unread data */
874 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
875 __kfree_skb(skb);
876 }
877
878 if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
879 /* Check zero linger _after_ checking for unread data. */
880 sk->sk_prot->disconnect(sk, 0);
881 } else if (dccp_close_state(sk)) {
882 dccp_send_close(sk, 1);
883 }
884
885 sk_stream_wait_close(sk, timeout);
886
887 adjudge_to_death:
888 state = sk->sk_state;
889 sock_hold(sk);
890 sock_orphan(sk);
891 atomic_inc(sk->sk_prot->orphan_count);
892
893 /*
894 * It is the last release_sock in its life. It will remove backlog.
895 */
896 release_sock(sk);
897 /*
898 * Now socket is owned by kernel and we acquire BH lock
899 * to finish close. No need to check for user refs.
900 */
901 local_bh_disable();
902 bh_lock_sock(sk);
903 BUG_TRAP(!sock_owned_by_user(sk));
904
905 /* Have we already been destroyed by a softirq or backlog? */
906 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
907 goto out;
908
909 /*
910 * The last release_sock may have processed the CLOSE or RESET
911 * packet moving sock to CLOSED state, if not we have to fire
912 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
913 * in draft-ietf-dccp-spec-11. -acme
914 */
915 if (sk->sk_state == DCCP_CLOSING) {
916 /* FIXME: should start at 2 * RTT */
917 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
918 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
919 inet_csk(sk)->icsk_rto,
920 DCCP_RTO_MAX);
921 #if 0
922 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
923 dccp_set_state(sk, DCCP_CLOSED);
924 #endif
925 }
926
927 if (sk->sk_state == DCCP_CLOSED)
928 inet_csk_destroy_sock(sk);
929
930 /* Otherwise, socket is reprieved until protocol close. */
931
932 out:
933 bh_unlock_sock(sk);
934 local_bh_enable();
935 sock_put(sk);
936 }
937
938 EXPORT_SYMBOL_GPL(dccp_close);
939
940 void dccp_shutdown(struct sock *sk, int how)
941 {
942 dccp_pr_debug("entry\n");
943 }
944
945 EXPORT_SYMBOL_GPL(dccp_shutdown);
946
947 static int __init dccp_mib_init(void)
948 {
949 int rc = -ENOMEM;
950
951 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
952 if (dccp_statistics[0] == NULL)
953 goto out;
954
955 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
956 if (dccp_statistics[1] == NULL)
957 goto out_free_one;
958
959 rc = 0;
960 out:
961 return rc;
962 out_free_one:
963 free_percpu(dccp_statistics[0]);
964 dccp_statistics[0] = NULL;
965 goto out;
966
967 }
968
969 static void dccp_mib_exit(void)
970 {
971 free_percpu(dccp_statistics[0]);
972 free_percpu(dccp_statistics[1]);
973 dccp_statistics[0] = dccp_statistics[1] = NULL;
974 }
975
976 static int thash_entries;
977 module_param(thash_entries, int, 0444);
978 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
979
980 #ifdef CONFIG_IP_DCCP_DEBUG
981 int dccp_debug;
982 module_param(dccp_debug, int, 0444);
983 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
984
985 EXPORT_SYMBOL_GPL(dccp_debug);
986 #endif
987
988 static int __init dccp_init(void)
989 {
990 unsigned long goal;
991 int ehash_order, bhash_order, i;
992 int rc = -ENOBUFS;
993
994 dccp_hashinfo.bind_bucket_cachep =
995 kmem_cache_create("dccp_bind_bucket",
996 sizeof(struct inet_bind_bucket), 0,
997 SLAB_HWCACHE_ALIGN, NULL, NULL);
998 if (!dccp_hashinfo.bind_bucket_cachep)
999 goto out;
1000
1001 /*
1002 * Size and allocate the main established and bind bucket
1003 * hash tables.
1004 *
1005 * The methodology is similar to that of the buffer cache.
1006 */
1007 if (num_physpages >= (128 * 1024))
1008 goal = num_physpages >> (21 - PAGE_SHIFT);
1009 else
1010 goal = num_physpages >> (23 - PAGE_SHIFT);
1011
1012 if (thash_entries)
1013 goal = (thash_entries *
1014 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1015 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1016 ;
1017 do {
1018 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1019 sizeof(struct inet_ehash_bucket);
1020 dccp_hashinfo.ehash_size >>= 1;
1021 while (dccp_hashinfo.ehash_size &
1022 (dccp_hashinfo.ehash_size - 1))
1023 dccp_hashinfo.ehash_size--;
1024 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1025 __get_free_pages(GFP_ATOMIC, ehash_order);
1026 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1027
1028 if (!dccp_hashinfo.ehash) {
1029 printk(KERN_CRIT "Failed to allocate DCCP "
1030 "established hash table\n");
1031 goto out_free_bind_bucket_cachep;
1032 }
1033
1034 for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
1035 rwlock_init(&dccp_hashinfo.ehash[i].lock);
1036 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1037 }
1038
1039 bhash_order = ehash_order;
1040
1041 do {
1042 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1043 sizeof(struct inet_bind_hashbucket);
1044 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1045 bhash_order > 0)
1046 continue;
1047 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1048 __get_free_pages(GFP_ATOMIC, bhash_order);
1049 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1050
1051 if (!dccp_hashinfo.bhash) {
1052 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
1053 goto out_free_dccp_ehash;
1054 }
1055
1056 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1057 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1058 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1059 }
1060
1061 rc = dccp_mib_init();
1062 if (rc)
1063 goto out_free_dccp_bhash;
1064
1065 rc = dccp_ackvec_init();
1066 if (rc)
1067 goto out_free_dccp_mib;
1068
1069 rc = dccp_sysctl_init();
1070 if (rc)
1071 goto out_ackvec_exit;
1072 out:
1073 return rc;
1074 out_ackvec_exit:
1075 dccp_ackvec_exit();
1076 out_free_dccp_mib:
1077 dccp_mib_exit();
1078 out_free_dccp_bhash:
1079 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1080 dccp_hashinfo.bhash = NULL;
1081 out_free_dccp_ehash:
1082 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1083 dccp_hashinfo.ehash = NULL;
1084 out_free_bind_bucket_cachep:
1085 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1086 dccp_hashinfo.bind_bucket_cachep = NULL;
1087 goto out;
1088 }
1089
1090 static void __exit dccp_fini(void)
1091 {
1092 dccp_mib_exit();
1093 free_pages((unsigned long)dccp_hashinfo.bhash,
1094 get_order(dccp_hashinfo.bhash_size *
1095 sizeof(struct inet_bind_hashbucket)));
1096 free_pages((unsigned long)dccp_hashinfo.ehash,
1097 get_order(dccp_hashinfo.ehash_size *
1098 sizeof(struct inet_ehash_bucket)));
1099 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1100 dccp_ackvec_exit();
1101 dccp_sysctl_exit();
1102 }
1103
1104 module_init(dccp_init);
1105 module_exit(dccp_fini);
1106
1107 MODULE_LICENSE("GPL");
1108 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1109 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");