]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - net/dccp/proto.c
[DCCP] ipv4: make struct dccp_v4_prot static
[mirror_ubuntu-artful-kernel.git] / net / dccp / proto.c
1 /*
2 * net/dccp/proto.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_sock.h>
27 #include <net/sock.h>
28 #include <net/xfrm.h>
29
30 #include <asm/semaphore.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
35
36 #include "ccid.h"
37 #include "dccp.h"
38 #include "feat.h"
39
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41
42 EXPORT_SYMBOL_GPL(dccp_statistics);
43
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47
48 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49 .lhash_lock = RW_LOCK_UNLOCKED,
50 .lhash_users = ATOMIC_INIT(0),
51 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
52 };
53
54 EXPORT_SYMBOL_GPL(dccp_hashinfo);
55
56 const char *dccp_packet_name(const int type)
57 {
58 static const char *dccp_packet_names[] = {
59 [DCCP_PKT_REQUEST] = "REQUEST",
60 [DCCP_PKT_RESPONSE] = "RESPONSE",
61 [DCCP_PKT_DATA] = "DATA",
62 [DCCP_PKT_ACK] = "ACK",
63 [DCCP_PKT_DATAACK] = "DATAACK",
64 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
65 [DCCP_PKT_CLOSE] = "CLOSE",
66 [DCCP_PKT_RESET] = "RESET",
67 [DCCP_PKT_SYNC] = "SYNC",
68 [DCCP_PKT_SYNCACK] = "SYNCACK",
69 };
70
71 if (type >= DCCP_NR_PKT_TYPES)
72 return "INVALID";
73 else
74 return dccp_packet_names[type];
75 }
76
77 EXPORT_SYMBOL_GPL(dccp_packet_name);
78
79 const char *dccp_state_name(const int state)
80 {
81 static char *dccp_state_names[] = {
82 [DCCP_OPEN] = "OPEN",
83 [DCCP_REQUESTING] = "REQUESTING",
84 [DCCP_PARTOPEN] = "PARTOPEN",
85 [DCCP_LISTEN] = "LISTEN",
86 [DCCP_RESPOND] = "RESPOND",
87 [DCCP_CLOSING] = "CLOSING",
88 [DCCP_TIME_WAIT] = "TIME_WAIT",
89 [DCCP_CLOSED] = "CLOSED",
90 };
91
92 if (state >= DCCP_MAX_STATES)
93 return "INVALID STATE!";
94 else
95 return dccp_state_names[state];
96 }
97
98 EXPORT_SYMBOL_GPL(dccp_state_name);
99
100 void dccp_hash(struct sock *sk)
101 {
102 inet_hash(&dccp_hashinfo, sk);
103 }
104
105 EXPORT_SYMBOL_GPL(dccp_hash);
106
107 void dccp_unhash(struct sock *sk)
108 {
109 inet_unhash(&dccp_hashinfo, sk);
110 }
111
112 EXPORT_SYMBOL_GPL(dccp_unhash);
113
114 int dccp_init_sock(struct sock *sk)
115 {
116 struct dccp_sock *dp = dccp_sk(sk);
117 struct inet_connection_sock *icsk = inet_csk(sk);
118 static int dccp_ctl_socket_init = 1;
119
120 dccp_options_init(&dp->dccps_options);
121 do_gettimeofday(&dp->dccps_epoch);
122
123 /*
124 * FIXME: We're hardcoding the CCID, and doing this at this point makes
125 * the listening (master) sock get CCID control blocks, which is not
126 * necessary, but for now, to not mess with the test userspace apps,
127 * lets leave it here, later the real solution is to do this in a
128 * setsockopt(CCIDs-I-want/accept). -acme
129 */
130 if (likely(!dccp_ctl_socket_init)) {
131 int rc = dccp_feat_init(sk);
132
133 if (rc)
134 return rc;
135
136 if (dp->dccps_options.dccpo_send_ack_vector) {
137 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
138 if (dp->dccps_hc_rx_ackvec == NULL)
139 return -ENOMEM;
140 }
141 dp->dccps_hc_rx_ccid =
142 ccid_hc_rx_new(dp->dccps_options.dccpo_rx_ccid,
143 sk, GFP_KERNEL);
144 dp->dccps_hc_tx_ccid =
145 ccid_hc_tx_new(dp->dccps_options.dccpo_tx_ccid,
146 sk, GFP_KERNEL);
147 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
148 dp->dccps_hc_tx_ccid == NULL)) {
149 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
150 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
151 if (dp->dccps_options.dccpo_send_ack_vector) {
152 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
153 dp->dccps_hc_rx_ackvec = NULL;
154 }
155 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
156 return -ENOMEM;
157 }
158 } else {
159 /* control socket doesn't need feat nego */
160 INIT_LIST_HEAD(&dp->dccps_options.dccpo_pending);
161 INIT_LIST_HEAD(&dp->dccps_options.dccpo_conf);
162 dccp_ctl_socket_init = 0;
163 }
164
165 dccp_init_xmit_timers(sk);
166 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
167 sk->sk_state = DCCP_CLOSED;
168 sk->sk_write_space = dccp_write_space;
169 icsk->icsk_sync_mss = dccp_sync_mss;
170 dp->dccps_mss_cache = 536;
171 dp->dccps_role = DCCP_ROLE_UNDEFINED;
172 dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
173 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
174
175 return 0;
176 }
177
178 EXPORT_SYMBOL_GPL(dccp_init_sock);
179
180 int dccp_destroy_sock(struct sock *sk)
181 {
182 struct dccp_sock *dp = dccp_sk(sk);
183
184 /*
185 * DCCP doesn't use sk_write_queue, just sk_send_head
186 * for retransmissions
187 */
188 if (sk->sk_send_head != NULL) {
189 kfree_skb(sk->sk_send_head);
190 sk->sk_send_head = NULL;
191 }
192
193 /* Clean up a referenced DCCP bind bucket. */
194 if (inet_csk(sk)->icsk_bind_hash != NULL)
195 inet_put_port(&dccp_hashinfo, sk);
196
197 kfree(dp->dccps_service_list);
198 dp->dccps_service_list = NULL;
199
200 if (dp->dccps_options.dccpo_send_ack_vector) {
201 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
202 dp->dccps_hc_rx_ackvec = NULL;
203 }
204 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
205 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
206 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
207
208 /* clean up feature negotiation state */
209 dccp_feat_clean(sk);
210
211 return 0;
212 }
213
214 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
215
216 static inline int dccp_listen_start(struct sock *sk)
217 {
218 struct dccp_sock *dp = dccp_sk(sk);
219
220 dp->dccps_role = DCCP_ROLE_LISTEN;
221 /*
222 * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
223 * before calling listen()
224 */
225 if (dccp_service_not_initialized(sk))
226 return -EPROTO;
227 return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
228 }
229
230 int dccp_disconnect(struct sock *sk, int flags)
231 {
232 struct inet_connection_sock *icsk = inet_csk(sk);
233 struct inet_sock *inet = inet_sk(sk);
234 int err = 0;
235 const int old_state = sk->sk_state;
236
237 if (old_state != DCCP_CLOSED)
238 dccp_set_state(sk, DCCP_CLOSED);
239
240 /* ABORT function of RFC793 */
241 if (old_state == DCCP_LISTEN) {
242 inet_csk_listen_stop(sk);
243 /* FIXME: do the active reset thing */
244 } else if (old_state == DCCP_REQUESTING)
245 sk->sk_err = ECONNRESET;
246
247 dccp_clear_xmit_timers(sk);
248 __skb_queue_purge(&sk->sk_receive_queue);
249 if (sk->sk_send_head != NULL) {
250 __kfree_skb(sk->sk_send_head);
251 sk->sk_send_head = NULL;
252 }
253
254 inet->dport = 0;
255
256 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
257 inet_reset_saddr(sk);
258
259 sk->sk_shutdown = 0;
260 sock_reset_flag(sk, SOCK_DONE);
261
262 icsk->icsk_backoff = 0;
263 inet_csk_delack_init(sk);
264 __sk_dst_reset(sk);
265
266 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
267
268 sk->sk_error_report(sk);
269 return err;
270 }
271
272 EXPORT_SYMBOL_GPL(dccp_disconnect);
273
274 /*
275 * Wait for a DCCP event.
276 *
277 * Note that we don't need to lock the socket, as the upper poll layers
278 * take care of normal races (between the test and the event) and we don't
279 * go look at any of the socket buffers directly.
280 */
281 unsigned int dccp_poll(struct file *file, struct socket *sock,
282 poll_table *wait)
283 {
284 unsigned int mask;
285 struct sock *sk = sock->sk;
286
287 poll_wait(file, sk->sk_sleep, wait);
288 if (sk->sk_state == DCCP_LISTEN)
289 return inet_csk_listen_poll(sk);
290
291 /* Socket is not locked. We are protected from async events
292 by poll logic and correct handling of state changes
293 made by another threads is impossible in any case.
294 */
295
296 mask = 0;
297 if (sk->sk_err)
298 mask = POLLERR;
299
300 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
301 mask |= POLLHUP;
302 if (sk->sk_shutdown & RCV_SHUTDOWN)
303 mask |= POLLIN | POLLRDNORM;
304
305 /* Connected? */
306 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
307 if (atomic_read(&sk->sk_rmem_alloc) > 0)
308 mask |= POLLIN | POLLRDNORM;
309
310 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
311 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
312 mask |= POLLOUT | POLLWRNORM;
313 } else { /* send SIGIO later */
314 set_bit(SOCK_ASYNC_NOSPACE,
315 &sk->sk_socket->flags);
316 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
317
318 /* Race breaker. If space is freed after
319 * wspace test but before the flags are set,
320 * IO signal will be lost.
321 */
322 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
323 mask |= POLLOUT | POLLWRNORM;
324 }
325 }
326 }
327 return mask;
328 }
329
330 EXPORT_SYMBOL_GPL(dccp_poll);
331
332 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
333 {
334 dccp_pr_debug("entry\n");
335 return -ENOIOCTLCMD;
336 }
337
338 EXPORT_SYMBOL_GPL(dccp_ioctl);
339
340 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
341 char __user *optval, int optlen)
342 {
343 struct dccp_sock *dp = dccp_sk(sk);
344 struct dccp_service_list *sl = NULL;
345
346 if (service == DCCP_SERVICE_INVALID_VALUE ||
347 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
348 return -EINVAL;
349
350 if (optlen > sizeof(service)) {
351 sl = kmalloc(optlen, GFP_KERNEL);
352 if (sl == NULL)
353 return -ENOMEM;
354
355 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
356 if (copy_from_user(sl->dccpsl_list,
357 optval + sizeof(service),
358 optlen - sizeof(service)) ||
359 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
360 kfree(sl);
361 return -EFAULT;
362 }
363 }
364
365 lock_sock(sk);
366 dp->dccps_service = service;
367
368 kfree(dp->dccps_service_list);
369
370 dp->dccps_service_list = sl;
371 release_sock(sk);
372 return 0;
373 }
374
375 /* byte 1 is feature. the rest is the preference list */
376 static int dccp_setsockopt_change(struct sock *sk, int type,
377 struct dccp_so_feat __user *optval)
378 {
379 struct dccp_so_feat opt;
380 u8 *val;
381 int rc;
382
383 if (copy_from_user(&opt, optval, sizeof(opt)))
384 return -EFAULT;
385
386 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
387 if (!val)
388 return -ENOMEM;
389
390 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
391 rc = -EFAULT;
392 goto out_free_val;
393 }
394
395 rc = dccp_feat_change(sk, type, opt.dccpsf_feat, val, opt.dccpsf_len,
396 GFP_KERNEL);
397 if (rc)
398 goto out_free_val;
399
400 out:
401 return rc;
402
403 out_free_val:
404 kfree(val);
405 goto out;
406 }
407
408 int dccp_setsockopt(struct sock *sk, int level, int optname,
409 char __user *optval, int optlen)
410 {
411 struct dccp_sock *dp;
412 int err;
413 int val;
414
415 if (level != SOL_DCCP)
416 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
417 optname, optval,
418 optlen);
419
420 if (optlen < sizeof(int))
421 return -EINVAL;
422
423 if (get_user(val, (int __user *)optval))
424 return -EFAULT;
425
426 if (optname == DCCP_SOCKOPT_SERVICE)
427 return dccp_setsockopt_service(sk, val, optval, optlen);
428
429 lock_sock(sk);
430 dp = dccp_sk(sk);
431 err = 0;
432
433 switch (optname) {
434 case DCCP_SOCKOPT_PACKET_SIZE:
435 dp->dccps_packet_size = val;
436 break;
437
438 case DCCP_SOCKOPT_CHANGE_L:
439 if (optlen != sizeof(struct dccp_so_feat))
440 err = -EINVAL;
441 else
442 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
443 (struct dccp_so_feat *)
444 optval);
445 break;
446
447 case DCCP_SOCKOPT_CHANGE_R:
448 if (optlen != sizeof(struct dccp_so_feat))
449 err = -EINVAL;
450 else
451 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
452 (struct dccp_so_feat *)
453 optval);
454 break;
455
456 default:
457 err = -ENOPROTOOPT;
458 break;
459 }
460
461 release_sock(sk);
462 return err;
463 }
464
465 EXPORT_SYMBOL_GPL(dccp_setsockopt);
466
467 static int dccp_getsockopt_service(struct sock *sk, int len,
468 __be32 __user *optval,
469 int __user *optlen)
470 {
471 const struct dccp_sock *dp = dccp_sk(sk);
472 const struct dccp_service_list *sl;
473 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
474
475 lock_sock(sk);
476 if (dccp_service_not_initialized(sk))
477 goto out;
478
479 if ((sl = dp->dccps_service_list) != NULL) {
480 slen = sl->dccpsl_nr * sizeof(u32);
481 total_len += slen;
482 }
483
484 err = -EINVAL;
485 if (total_len > len)
486 goto out;
487
488 err = 0;
489 if (put_user(total_len, optlen) ||
490 put_user(dp->dccps_service, optval) ||
491 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
492 err = -EFAULT;
493 out:
494 release_sock(sk);
495 return err;
496 }
497
498 int dccp_getsockopt(struct sock *sk, int level, int optname,
499 char __user *optval, int __user *optlen)
500 {
501 struct dccp_sock *dp;
502 int val, len;
503
504 if (level != SOL_DCCP)
505 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
506 optname, optval,
507 optlen);
508 if (get_user(len, optlen))
509 return -EFAULT;
510
511 if (len < sizeof(int))
512 return -EINVAL;
513
514 dp = dccp_sk(sk);
515
516 switch (optname) {
517 case DCCP_SOCKOPT_PACKET_SIZE:
518 val = dp->dccps_packet_size;
519 len = sizeof(dp->dccps_packet_size);
520 break;
521 case DCCP_SOCKOPT_SERVICE:
522 return dccp_getsockopt_service(sk, len,
523 (__be32 __user *)optval, optlen);
524 case 128 ... 191:
525 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
526 len, (u32 __user *)optval, optlen);
527 case 192 ... 255:
528 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
529 len, (u32 __user *)optval, optlen);
530 default:
531 return -ENOPROTOOPT;
532 }
533
534 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
535 return -EFAULT;
536
537 return 0;
538 }
539
540 EXPORT_SYMBOL_GPL(dccp_getsockopt);
541
542 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
543 size_t len)
544 {
545 const struct dccp_sock *dp = dccp_sk(sk);
546 const int flags = msg->msg_flags;
547 const int noblock = flags & MSG_DONTWAIT;
548 struct sk_buff *skb;
549 int rc, size;
550 long timeo;
551
552 if (len > dp->dccps_mss_cache)
553 return -EMSGSIZE;
554
555 lock_sock(sk);
556 timeo = sock_sndtimeo(sk, noblock);
557
558 /*
559 * We have to use sk_stream_wait_connect here to set sk_write_pending,
560 * so that the trick in dccp_rcv_request_sent_state_process.
561 */
562 /* Wait for a connection to finish. */
563 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
564 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
565 goto out_release;
566
567 size = sk->sk_prot->max_header + len;
568 release_sock(sk);
569 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
570 lock_sock(sk);
571 if (skb == NULL)
572 goto out_release;
573
574 skb_reserve(skb, sk->sk_prot->max_header);
575 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
576 if (rc != 0)
577 goto out_discard;
578
579 rc = dccp_write_xmit(sk, skb, &timeo);
580 /*
581 * XXX we don't use sk_write_queue, so just discard the packet.
582 * Current plan however is to _use_ sk_write_queue with
583 * an algorith similar to tcp_sendmsg, where the main difference
584 * is that in DCCP we have to respect packet boundaries, so
585 * no coalescing of skbs.
586 *
587 * This bug was _quickly_ found & fixed by just looking at an OSTRA
588 * generated callgraph 8) -acme
589 */
590 out_release:
591 release_sock(sk);
592 return rc ? : len;
593 out_discard:
594 kfree_skb(skb);
595 goto out_release;
596 }
597
598 EXPORT_SYMBOL_GPL(dccp_sendmsg);
599
600 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
601 size_t len, int nonblock, int flags, int *addr_len)
602 {
603 const struct dccp_hdr *dh;
604 long timeo;
605
606 lock_sock(sk);
607
608 if (sk->sk_state == DCCP_LISTEN) {
609 len = -ENOTCONN;
610 goto out;
611 }
612
613 timeo = sock_rcvtimeo(sk, nonblock);
614
615 do {
616 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
617
618 if (skb == NULL)
619 goto verify_sock_status;
620
621 dh = dccp_hdr(skb);
622
623 if (dh->dccph_type == DCCP_PKT_DATA ||
624 dh->dccph_type == DCCP_PKT_DATAACK)
625 goto found_ok_skb;
626
627 if (dh->dccph_type == DCCP_PKT_RESET ||
628 dh->dccph_type == DCCP_PKT_CLOSE) {
629 dccp_pr_debug("found fin ok!\n");
630 len = 0;
631 goto found_fin_ok;
632 }
633 dccp_pr_debug("packet_type=%s\n",
634 dccp_packet_name(dh->dccph_type));
635 sk_eat_skb(sk, skb);
636 verify_sock_status:
637 if (sock_flag(sk, SOCK_DONE)) {
638 len = 0;
639 break;
640 }
641
642 if (sk->sk_err) {
643 len = sock_error(sk);
644 break;
645 }
646
647 if (sk->sk_shutdown & RCV_SHUTDOWN) {
648 len = 0;
649 break;
650 }
651
652 if (sk->sk_state == DCCP_CLOSED) {
653 if (!sock_flag(sk, SOCK_DONE)) {
654 /* This occurs when user tries to read
655 * from never connected socket.
656 */
657 len = -ENOTCONN;
658 break;
659 }
660 len = 0;
661 break;
662 }
663
664 if (!timeo) {
665 len = -EAGAIN;
666 break;
667 }
668
669 if (signal_pending(current)) {
670 len = sock_intr_errno(timeo);
671 break;
672 }
673
674 sk_wait_data(sk, &timeo);
675 continue;
676 found_ok_skb:
677 if (len > skb->len)
678 len = skb->len;
679 else if (len < skb->len)
680 msg->msg_flags |= MSG_TRUNC;
681
682 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
683 /* Exception. Bailout! */
684 len = -EFAULT;
685 break;
686 }
687 found_fin_ok:
688 if (!(flags & MSG_PEEK))
689 sk_eat_skb(sk, skb);
690 break;
691 } while (1);
692 out:
693 release_sock(sk);
694 return len;
695 }
696
697 EXPORT_SYMBOL_GPL(dccp_recvmsg);
698
699 int inet_dccp_listen(struct socket *sock, int backlog)
700 {
701 struct sock *sk = sock->sk;
702 unsigned char old_state;
703 int err;
704
705 lock_sock(sk);
706
707 err = -EINVAL;
708 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
709 goto out;
710
711 old_state = sk->sk_state;
712 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
713 goto out;
714
715 /* Really, if the socket is already in listen state
716 * we can only allow the backlog to be adjusted.
717 */
718 if (old_state != DCCP_LISTEN) {
719 /*
720 * FIXME: here it probably should be sk->sk_prot->listen_start
721 * see tcp_listen_start
722 */
723 err = dccp_listen_start(sk);
724 if (err)
725 goto out;
726 }
727 sk->sk_max_ack_backlog = backlog;
728 err = 0;
729
730 out:
731 release_sock(sk);
732 return err;
733 }
734
735 EXPORT_SYMBOL_GPL(inet_dccp_listen);
736
737 static const unsigned char dccp_new_state[] = {
738 /* current state: new state: action: */
739 [0] = DCCP_CLOSED,
740 [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
741 [DCCP_REQUESTING] = DCCP_CLOSED,
742 [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
743 [DCCP_LISTEN] = DCCP_CLOSED,
744 [DCCP_RESPOND] = DCCP_CLOSED,
745 [DCCP_CLOSING] = DCCP_CLOSED,
746 [DCCP_TIME_WAIT] = DCCP_CLOSED,
747 [DCCP_CLOSED] = DCCP_CLOSED,
748 };
749
750 static int dccp_close_state(struct sock *sk)
751 {
752 const int next = dccp_new_state[sk->sk_state];
753 const int ns = next & DCCP_STATE_MASK;
754
755 if (ns != sk->sk_state)
756 dccp_set_state(sk, ns);
757
758 return next & DCCP_ACTION_FIN;
759 }
760
761 void dccp_close(struct sock *sk, long timeout)
762 {
763 struct sk_buff *skb;
764
765 lock_sock(sk);
766
767 sk->sk_shutdown = SHUTDOWN_MASK;
768
769 if (sk->sk_state == DCCP_LISTEN) {
770 dccp_set_state(sk, DCCP_CLOSED);
771
772 /* Special case. */
773 inet_csk_listen_stop(sk);
774
775 goto adjudge_to_death;
776 }
777
778 /*
779 * We need to flush the recv. buffs. We do this only on the
780 * descriptor close, not protocol-sourced closes, because the
781 *reader process may not have drained the data yet!
782 */
783 /* FIXME: check for unread data */
784 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
785 __kfree_skb(skb);
786 }
787
788 if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
789 /* Check zero linger _after_ checking for unread data. */
790 sk->sk_prot->disconnect(sk, 0);
791 } else if (dccp_close_state(sk)) {
792 dccp_send_close(sk, 1);
793 }
794
795 sk_stream_wait_close(sk, timeout);
796
797 adjudge_to_death:
798 /*
799 * It is the last release_sock in its life. It will remove backlog.
800 */
801 release_sock(sk);
802 /*
803 * Now socket is owned by kernel and we acquire BH lock
804 * to finish close. No need to check for user refs.
805 */
806 local_bh_disable();
807 bh_lock_sock(sk);
808 BUG_TRAP(!sock_owned_by_user(sk));
809
810 sock_hold(sk);
811 sock_orphan(sk);
812
813 /*
814 * The last release_sock may have processed the CLOSE or RESET
815 * packet moving sock to CLOSED state, if not we have to fire
816 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
817 * in draft-ietf-dccp-spec-11. -acme
818 */
819 if (sk->sk_state == DCCP_CLOSING) {
820 /* FIXME: should start at 2 * RTT */
821 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
822 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
823 inet_csk(sk)->icsk_rto,
824 DCCP_RTO_MAX);
825 #if 0
826 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
827 dccp_set_state(sk, DCCP_CLOSED);
828 #endif
829 }
830
831 atomic_inc(sk->sk_prot->orphan_count);
832 if (sk->sk_state == DCCP_CLOSED)
833 inet_csk_destroy_sock(sk);
834
835 /* Otherwise, socket is reprieved until protocol close. */
836
837 bh_unlock_sock(sk);
838 local_bh_enable();
839 sock_put(sk);
840 }
841
842 EXPORT_SYMBOL_GPL(dccp_close);
843
844 void dccp_shutdown(struct sock *sk, int how)
845 {
846 dccp_pr_debug("entry\n");
847 }
848
849 EXPORT_SYMBOL_GPL(dccp_shutdown);
850
851 static int __init dccp_mib_init(void)
852 {
853 int rc = -ENOMEM;
854
855 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
856 if (dccp_statistics[0] == NULL)
857 goto out;
858
859 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
860 if (dccp_statistics[1] == NULL)
861 goto out_free_one;
862
863 rc = 0;
864 out:
865 return rc;
866 out_free_one:
867 free_percpu(dccp_statistics[0]);
868 dccp_statistics[0] = NULL;
869 goto out;
870
871 }
872
873 static void dccp_mib_exit(void)
874 {
875 free_percpu(dccp_statistics[0]);
876 free_percpu(dccp_statistics[1]);
877 dccp_statistics[0] = dccp_statistics[1] = NULL;
878 }
879
880 static int thash_entries;
881 module_param(thash_entries, int, 0444);
882 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
883
884 #ifdef CONFIG_IP_DCCP_DEBUG
885 int dccp_debug;
886 module_param(dccp_debug, int, 0444);
887 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
888
889 EXPORT_SYMBOL_GPL(dccp_debug);
890 #endif
891
892 static int __init dccp_init(void)
893 {
894 unsigned long goal;
895 int ehash_order, bhash_order, i;
896 int rc = -ENOBUFS;
897
898 dccp_hashinfo.bind_bucket_cachep =
899 kmem_cache_create("dccp_bind_bucket",
900 sizeof(struct inet_bind_bucket), 0,
901 SLAB_HWCACHE_ALIGN, NULL, NULL);
902 if (!dccp_hashinfo.bind_bucket_cachep)
903 goto out;
904
905 /*
906 * Size and allocate the main established and bind bucket
907 * hash tables.
908 *
909 * The methodology is similar to that of the buffer cache.
910 */
911 if (num_physpages >= (128 * 1024))
912 goal = num_physpages >> (21 - PAGE_SHIFT);
913 else
914 goal = num_physpages >> (23 - PAGE_SHIFT);
915
916 if (thash_entries)
917 goal = (thash_entries *
918 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
919 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
920 ;
921 do {
922 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
923 sizeof(struct inet_ehash_bucket);
924 dccp_hashinfo.ehash_size >>= 1;
925 while (dccp_hashinfo.ehash_size &
926 (dccp_hashinfo.ehash_size - 1))
927 dccp_hashinfo.ehash_size--;
928 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
929 __get_free_pages(GFP_ATOMIC, ehash_order);
930 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
931
932 if (!dccp_hashinfo.ehash) {
933 printk(KERN_CRIT "Failed to allocate DCCP "
934 "established hash table\n");
935 goto out_free_bind_bucket_cachep;
936 }
937
938 for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
939 rwlock_init(&dccp_hashinfo.ehash[i].lock);
940 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
941 }
942
943 bhash_order = ehash_order;
944
945 do {
946 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
947 sizeof(struct inet_bind_hashbucket);
948 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
949 bhash_order > 0)
950 continue;
951 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
952 __get_free_pages(GFP_ATOMIC, bhash_order);
953 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
954
955 if (!dccp_hashinfo.bhash) {
956 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
957 goto out_free_dccp_ehash;
958 }
959
960 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
961 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
962 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
963 }
964
965 rc = dccp_mib_init();
966 if (rc)
967 goto out_free_dccp_bhash;
968
969 rc = dccp_ackvec_init();
970 if (rc)
971 goto out_free_dccp_mib;
972
973 rc = dccp_sysctl_init();
974 if (rc)
975 goto out_ackvec_exit;
976 out:
977 return rc;
978 out_ackvec_exit:
979 dccp_ackvec_exit();
980 out_free_dccp_mib:
981 dccp_mib_exit();
982 out_free_dccp_bhash:
983 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
984 dccp_hashinfo.bhash = NULL;
985 out_free_dccp_ehash:
986 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
987 dccp_hashinfo.ehash = NULL;
988 out_free_bind_bucket_cachep:
989 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
990 dccp_hashinfo.bind_bucket_cachep = NULL;
991 goto out;
992 }
993
994 static void __exit dccp_fini(void)
995 {
996 dccp_mib_exit();
997 free_pages((unsigned long)dccp_hashinfo.bhash,
998 get_order(dccp_hashinfo.bhash_size *
999 sizeof(struct inet_bind_hashbucket)));
1000 free_pages((unsigned long)dccp_hashinfo.ehash,
1001 get_order(dccp_hashinfo.ehash_size *
1002 sizeof(struct inet_ehash_bucket)));
1003 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1004 dccp_ackvec_exit();
1005 dccp_sysctl_exit();
1006 }
1007
1008 module_init(dccp_init);
1009 module_exit(dccp_fini);
1010
1011 MODULE_LICENSE("GPL");
1012 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1013 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");