]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blob - net/dccp/proto.c
Merge branch 'x86/pebs' into x86/unify-cpu-detect
[mirror_ubuntu-zesty-kernel.git] / net / dccp / proto.c
1 /*
2 * net/dccp/proto.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48 .lhash_lock = RW_LOCK_UNLOCKED,
49 .lhash_users = ATOMIC_INIT(0),
50 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60 const int oldstate = sk->sk_state;
61
62 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
63 dccp_state_name(oldstate), dccp_state_name(state));
64 WARN_ON(state == oldstate);
65
66 switch (state) {
67 case DCCP_OPEN:
68 if (oldstate != DCCP_OPEN)
69 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
70 break;
71
72 case DCCP_CLOSED:
73 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
74 oldstate == DCCP_CLOSING)
75 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
76
77 sk->sk_prot->unhash(sk);
78 if (inet_csk(sk)->icsk_bind_hash != NULL &&
79 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
80 inet_put_port(sk);
81 /* fall through */
82 default:
83 if (oldstate == DCCP_OPEN)
84 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
85 }
86
87 /* Change state AFTER socket is unhashed to avoid closed
88 * socket sitting in hash tables.
89 */
90 sk->sk_state = state;
91 }
92
93 EXPORT_SYMBOL_GPL(dccp_set_state);
94
95 static void dccp_finish_passive_close(struct sock *sk)
96 {
97 switch (sk->sk_state) {
98 case DCCP_PASSIVE_CLOSE:
99 /* Node (client or server) has received Close packet. */
100 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
101 dccp_set_state(sk, DCCP_CLOSED);
102 break;
103 case DCCP_PASSIVE_CLOSEREQ:
104 /*
105 * Client received CloseReq. We set the `active' flag so that
106 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
107 */
108 dccp_send_close(sk, 1);
109 dccp_set_state(sk, DCCP_CLOSING);
110 }
111 }
112
113 void dccp_done(struct sock *sk)
114 {
115 dccp_set_state(sk, DCCP_CLOSED);
116 dccp_clear_xmit_timers(sk);
117
118 sk->sk_shutdown = SHUTDOWN_MASK;
119
120 if (!sock_flag(sk, SOCK_DEAD))
121 sk->sk_state_change(sk);
122 else
123 inet_csk_destroy_sock(sk);
124 }
125
126 EXPORT_SYMBOL_GPL(dccp_done);
127
128 const char *dccp_packet_name(const int type)
129 {
130 static const char *dccp_packet_names[] = {
131 [DCCP_PKT_REQUEST] = "REQUEST",
132 [DCCP_PKT_RESPONSE] = "RESPONSE",
133 [DCCP_PKT_DATA] = "DATA",
134 [DCCP_PKT_ACK] = "ACK",
135 [DCCP_PKT_DATAACK] = "DATAACK",
136 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
137 [DCCP_PKT_CLOSE] = "CLOSE",
138 [DCCP_PKT_RESET] = "RESET",
139 [DCCP_PKT_SYNC] = "SYNC",
140 [DCCP_PKT_SYNCACK] = "SYNCACK",
141 };
142
143 if (type >= DCCP_NR_PKT_TYPES)
144 return "INVALID";
145 else
146 return dccp_packet_names[type];
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_packet_name);
150
151 const char *dccp_state_name(const int state)
152 {
153 static char *dccp_state_names[] = {
154 [DCCP_OPEN] = "OPEN",
155 [DCCP_REQUESTING] = "REQUESTING",
156 [DCCP_PARTOPEN] = "PARTOPEN",
157 [DCCP_LISTEN] = "LISTEN",
158 [DCCP_RESPOND] = "RESPOND",
159 [DCCP_CLOSING] = "CLOSING",
160 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
161 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
162 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
163 [DCCP_TIME_WAIT] = "TIME_WAIT",
164 [DCCP_CLOSED] = "CLOSED",
165 };
166
167 if (state >= DCCP_MAX_STATES)
168 return "INVALID STATE!";
169 else
170 return dccp_state_names[state];
171 }
172
173 EXPORT_SYMBOL_GPL(dccp_state_name);
174
175 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
176 {
177 struct dccp_sock *dp = dccp_sk(sk);
178 struct dccp_minisock *dmsk = dccp_msk(sk);
179 struct inet_connection_sock *icsk = inet_csk(sk);
180
181 dccp_minisock_init(&dp->dccps_minisock);
182
183 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
184 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
185 sk->sk_state = DCCP_CLOSED;
186 sk->sk_write_space = dccp_write_space;
187 icsk->icsk_sync_mss = dccp_sync_mss;
188 dp->dccps_mss_cache = 536;
189 dp->dccps_rate_last = jiffies;
190 dp->dccps_role = DCCP_ROLE_UNDEFINED;
191 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
192 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
193
194 dccp_init_xmit_timers(sk);
195
196 /*
197 * FIXME: We're hardcoding the CCID, and doing this at this point makes
198 * the listening (master) sock get CCID control blocks, which is not
199 * necessary, but for now, to not mess with the test userspace apps,
200 * lets leave it here, later the real solution is to do this in a
201 * setsockopt(CCIDs-I-want/accept). -acme
202 */
203 if (likely(ctl_sock_initialized)) {
204 int rc = dccp_feat_init(dmsk);
205
206 if (rc)
207 return rc;
208
209 if (dmsk->dccpms_send_ack_vector) {
210 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
211 if (dp->dccps_hc_rx_ackvec == NULL)
212 return -ENOMEM;
213 }
214 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
215 sk, GFP_KERNEL);
216 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
217 sk, GFP_KERNEL);
218 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
219 dp->dccps_hc_tx_ccid == NULL)) {
220 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
221 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
222 if (dmsk->dccpms_send_ack_vector) {
223 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
224 dp->dccps_hc_rx_ackvec = NULL;
225 }
226 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
227 return -ENOMEM;
228 }
229 } else {
230 /* control socket doesn't need feat nego */
231 INIT_LIST_HEAD(&dmsk->dccpms_pending);
232 INIT_LIST_HEAD(&dmsk->dccpms_conf);
233 }
234
235 return 0;
236 }
237
238 EXPORT_SYMBOL_GPL(dccp_init_sock);
239
240 void dccp_destroy_sock(struct sock *sk)
241 {
242 struct dccp_sock *dp = dccp_sk(sk);
243 struct dccp_minisock *dmsk = dccp_msk(sk);
244
245 /*
246 * DCCP doesn't use sk_write_queue, just sk_send_head
247 * for retransmissions
248 */
249 if (sk->sk_send_head != NULL) {
250 kfree_skb(sk->sk_send_head);
251 sk->sk_send_head = NULL;
252 }
253
254 /* Clean up a referenced DCCP bind bucket. */
255 if (inet_csk(sk)->icsk_bind_hash != NULL)
256 inet_put_port(sk);
257
258 kfree(dp->dccps_service_list);
259 dp->dccps_service_list = NULL;
260
261 if (dmsk->dccpms_send_ack_vector) {
262 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
263 dp->dccps_hc_rx_ackvec = NULL;
264 }
265 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
266 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
267 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
268
269 /* clean up feature negotiation state */
270 dccp_feat_clean(dmsk);
271 }
272
273 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
274
275 static inline int dccp_listen_start(struct sock *sk, int backlog)
276 {
277 struct dccp_sock *dp = dccp_sk(sk);
278
279 dp->dccps_role = DCCP_ROLE_LISTEN;
280 return inet_csk_listen_start(sk, backlog);
281 }
282
283 static inline int dccp_need_reset(int state)
284 {
285 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
286 state != DCCP_REQUESTING;
287 }
288
289 int dccp_disconnect(struct sock *sk, int flags)
290 {
291 struct inet_connection_sock *icsk = inet_csk(sk);
292 struct inet_sock *inet = inet_sk(sk);
293 int err = 0;
294 const int old_state = sk->sk_state;
295
296 if (old_state != DCCP_CLOSED)
297 dccp_set_state(sk, DCCP_CLOSED);
298
299 /*
300 * This corresponds to the ABORT function of RFC793, sec. 3.8
301 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
302 */
303 if (old_state == DCCP_LISTEN) {
304 inet_csk_listen_stop(sk);
305 } else if (dccp_need_reset(old_state)) {
306 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
307 sk->sk_err = ECONNRESET;
308 } else if (old_state == DCCP_REQUESTING)
309 sk->sk_err = ECONNRESET;
310
311 dccp_clear_xmit_timers(sk);
312 __skb_queue_purge(&sk->sk_receive_queue);
313 if (sk->sk_send_head != NULL) {
314 __kfree_skb(sk->sk_send_head);
315 sk->sk_send_head = NULL;
316 }
317
318 inet->dport = 0;
319
320 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
321 inet_reset_saddr(sk);
322
323 sk->sk_shutdown = 0;
324 sock_reset_flag(sk, SOCK_DONE);
325
326 icsk->icsk_backoff = 0;
327 inet_csk_delack_init(sk);
328 __sk_dst_reset(sk);
329
330 WARN_ON(inet->num && !icsk->icsk_bind_hash);
331
332 sk->sk_error_report(sk);
333 return err;
334 }
335
336 EXPORT_SYMBOL_GPL(dccp_disconnect);
337
338 /*
339 * Wait for a DCCP event.
340 *
341 * Note that we don't need to lock the socket, as the upper poll layers
342 * take care of normal races (between the test and the event) and we don't
343 * go look at any of the socket buffers directly.
344 */
345 unsigned int dccp_poll(struct file *file, struct socket *sock,
346 poll_table *wait)
347 {
348 unsigned int mask;
349 struct sock *sk = sock->sk;
350
351 poll_wait(file, sk->sk_sleep, wait);
352 if (sk->sk_state == DCCP_LISTEN)
353 return inet_csk_listen_poll(sk);
354
355 /* Socket is not locked. We are protected from async events
356 by poll logic and correct handling of state changes
357 made by another threads is impossible in any case.
358 */
359
360 mask = 0;
361 if (sk->sk_err)
362 mask = POLLERR;
363
364 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
365 mask |= POLLHUP;
366 if (sk->sk_shutdown & RCV_SHUTDOWN)
367 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
368
369 /* Connected? */
370 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
371 if (atomic_read(&sk->sk_rmem_alloc) > 0)
372 mask |= POLLIN | POLLRDNORM;
373
374 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
375 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
376 mask |= POLLOUT | POLLWRNORM;
377 } else { /* send SIGIO later */
378 set_bit(SOCK_ASYNC_NOSPACE,
379 &sk->sk_socket->flags);
380 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
381
382 /* Race breaker. If space is freed after
383 * wspace test but before the flags are set,
384 * IO signal will be lost.
385 */
386 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
387 mask |= POLLOUT | POLLWRNORM;
388 }
389 }
390 }
391 return mask;
392 }
393
394 EXPORT_SYMBOL_GPL(dccp_poll);
395
396 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
397 {
398 int rc = -ENOTCONN;
399
400 lock_sock(sk);
401
402 if (sk->sk_state == DCCP_LISTEN)
403 goto out;
404
405 switch (cmd) {
406 case SIOCINQ: {
407 struct sk_buff *skb;
408 unsigned long amount = 0;
409
410 skb = skb_peek(&sk->sk_receive_queue);
411 if (skb != NULL) {
412 /*
413 * We will only return the amount of this packet since
414 * that is all that will be read.
415 */
416 amount = skb->len;
417 }
418 rc = put_user(amount, (int __user *)arg);
419 }
420 break;
421 default:
422 rc = -ENOIOCTLCMD;
423 break;
424 }
425 out:
426 release_sock(sk);
427 return rc;
428 }
429
430 EXPORT_SYMBOL_GPL(dccp_ioctl);
431
432 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
433 char __user *optval, int optlen)
434 {
435 struct dccp_sock *dp = dccp_sk(sk);
436 struct dccp_service_list *sl = NULL;
437
438 if (service == DCCP_SERVICE_INVALID_VALUE ||
439 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
440 return -EINVAL;
441
442 if (optlen > sizeof(service)) {
443 sl = kmalloc(optlen, GFP_KERNEL);
444 if (sl == NULL)
445 return -ENOMEM;
446
447 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
448 if (copy_from_user(sl->dccpsl_list,
449 optval + sizeof(service),
450 optlen - sizeof(service)) ||
451 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
452 kfree(sl);
453 return -EFAULT;
454 }
455 }
456
457 lock_sock(sk);
458 dp->dccps_service = service;
459
460 kfree(dp->dccps_service_list);
461
462 dp->dccps_service_list = sl;
463 release_sock(sk);
464 return 0;
465 }
466
467 /* byte 1 is feature. the rest is the preference list */
468 static int dccp_setsockopt_change(struct sock *sk, int type,
469 struct dccp_so_feat __user *optval)
470 {
471 struct dccp_so_feat opt;
472 u8 *val;
473 int rc;
474
475 if (copy_from_user(&opt, optval, sizeof(opt)))
476 return -EFAULT;
477 /*
478 * rfc4340: 6.1. Change Options
479 */
480 if (opt.dccpsf_len < 1)
481 return -EINVAL;
482
483 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
484 if (!val)
485 return -ENOMEM;
486
487 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
488 rc = -EFAULT;
489 goto out_free_val;
490 }
491
492 rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
493 val, opt.dccpsf_len, GFP_KERNEL);
494 if (rc)
495 goto out_free_val;
496
497 out:
498 return rc;
499
500 out_free_val:
501 kfree(val);
502 goto out;
503 }
504
505 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
506 char __user *optval, int optlen)
507 {
508 struct dccp_sock *dp = dccp_sk(sk);
509 int val, err = 0;
510
511 if (optlen < sizeof(int))
512 return -EINVAL;
513
514 if (get_user(val, (int __user *)optval))
515 return -EFAULT;
516
517 if (optname == DCCP_SOCKOPT_SERVICE)
518 return dccp_setsockopt_service(sk, val, optval, optlen);
519
520 lock_sock(sk);
521 switch (optname) {
522 case DCCP_SOCKOPT_PACKET_SIZE:
523 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
524 err = 0;
525 break;
526 case DCCP_SOCKOPT_CHANGE_L:
527 if (optlen != sizeof(struct dccp_so_feat))
528 err = -EINVAL;
529 else
530 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
531 (struct dccp_so_feat __user *)
532 optval);
533 break;
534 case DCCP_SOCKOPT_CHANGE_R:
535 if (optlen != sizeof(struct dccp_so_feat))
536 err = -EINVAL;
537 else
538 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
539 (struct dccp_so_feat __user *)
540 optval);
541 break;
542 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
543 if (dp->dccps_role != DCCP_ROLE_SERVER)
544 err = -EOPNOTSUPP;
545 else
546 dp->dccps_server_timewait = (val != 0);
547 break;
548 case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
549 if (val < 0 || val > 15)
550 err = -EINVAL;
551 else
552 dp->dccps_pcslen = val;
553 break;
554 case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */
555 if (val < 0 || val > 15)
556 err = -EINVAL;
557 else {
558 dp->dccps_pcrlen = val;
559 /* FIXME: add feature negotiation,
560 * ChangeL(MinimumChecksumCoverage, val) */
561 }
562 break;
563 default:
564 err = -ENOPROTOOPT;
565 break;
566 }
567
568 release_sock(sk);
569 return err;
570 }
571
572 int dccp_setsockopt(struct sock *sk, int level, int optname,
573 char __user *optval, int optlen)
574 {
575 if (level != SOL_DCCP)
576 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
577 optname, optval,
578 optlen);
579 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
580 }
581
582 EXPORT_SYMBOL_GPL(dccp_setsockopt);
583
584 #ifdef CONFIG_COMPAT
585 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
586 char __user *optval, int optlen)
587 {
588 if (level != SOL_DCCP)
589 return inet_csk_compat_setsockopt(sk, level, optname,
590 optval, optlen);
591 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
592 }
593
594 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
595 #endif
596
597 static int dccp_getsockopt_service(struct sock *sk, int len,
598 __be32 __user *optval,
599 int __user *optlen)
600 {
601 const struct dccp_sock *dp = dccp_sk(sk);
602 const struct dccp_service_list *sl;
603 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
604
605 lock_sock(sk);
606 if ((sl = dp->dccps_service_list) != NULL) {
607 slen = sl->dccpsl_nr * sizeof(u32);
608 total_len += slen;
609 }
610
611 err = -EINVAL;
612 if (total_len > len)
613 goto out;
614
615 err = 0;
616 if (put_user(total_len, optlen) ||
617 put_user(dp->dccps_service, optval) ||
618 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
619 err = -EFAULT;
620 out:
621 release_sock(sk);
622 return err;
623 }
624
625 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
626 char __user *optval, int __user *optlen)
627 {
628 struct dccp_sock *dp;
629 int val, len;
630
631 if (get_user(len, optlen))
632 return -EFAULT;
633
634 if (len < (int)sizeof(int))
635 return -EINVAL;
636
637 dp = dccp_sk(sk);
638
639 switch (optname) {
640 case DCCP_SOCKOPT_PACKET_SIZE:
641 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
642 return 0;
643 case DCCP_SOCKOPT_SERVICE:
644 return dccp_getsockopt_service(sk, len,
645 (__be32 __user *)optval, optlen);
646 case DCCP_SOCKOPT_GET_CUR_MPS:
647 val = dp->dccps_mss_cache;
648 break;
649 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
650 val = dp->dccps_server_timewait;
651 break;
652 case DCCP_SOCKOPT_SEND_CSCOV:
653 val = dp->dccps_pcslen;
654 break;
655 case DCCP_SOCKOPT_RECV_CSCOV:
656 val = dp->dccps_pcrlen;
657 break;
658 case 128 ... 191:
659 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
660 len, (u32 __user *)optval, optlen);
661 case 192 ... 255:
662 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
663 len, (u32 __user *)optval, optlen);
664 default:
665 return -ENOPROTOOPT;
666 }
667
668 len = sizeof(val);
669 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
670 return -EFAULT;
671
672 return 0;
673 }
674
675 int dccp_getsockopt(struct sock *sk, int level, int optname,
676 char __user *optval, int __user *optlen)
677 {
678 if (level != SOL_DCCP)
679 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
680 optname, optval,
681 optlen);
682 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
683 }
684
685 EXPORT_SYMBOL_GPL(dccp_getsockopt);
686
687 #ifdef CONFIG_COMPAT
688 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
689 char __user *optval, int __user *optlen)
690 {
691 if (level != SOL_DCCP)
692 return inet_csk_compat_getsockopt(sk, level, optname,
693 optval, optlen);
694 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
695 }
696
697 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
698 #endif
699
700 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
701 size_t len)
702 {
703 const struct dccp_sock *dp = dccp_sk(sk);
704 const int flags = msg->msg_flags;
705 const int noblock = flags & MSG_DONTWAIT;
706 struct sk_buff *skb;
707 int rc, size;
708 long timeo;
709
710 if (len > dp->dccps_mss_cache)
711 return -EMSGSIZE;
712
713 lock_sock(sk);
714
715 if (sysctl_dccp_tx_qlen &&
716 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
717 rc = -EAGAIN;
718 goto out_release;
719 }
720
721 timeo = sock_sndtimeo(sk, noblock);
722
723 /*
724 * We have to use sk_stream_wait_connect here to set sk_write_pending,
725 * so that the trick in dccp_rcv_request_sent_state_process.
726 */
727 /* Wait for a connection to finish. */
728 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
729 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
730 goto out_release;
731
732 size = sk->sk_prot->max_header + len;
733 release_sock(sk);
734 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
735 lock_sock(sk);
736 if (skb == NULL)
737 goto out_release;
738
739 skb_reserve(skb, sk->sk_prot->max_header);
740 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
741 if (rc != 0)
742 goto out_discard;
743
744 skb_queue_tail(&sk->sk_write_queue, skb);
745 dccp_write_xmit(sk,0);
746 out_release:
747 release_sock(sk);
748 return rc ? : len;
749 out_discard:
750 kfree_skb(skb);
751 goto out_release;
752 }
753
754 EXPORT_SYMBOL_GPL(dccp_sendmsg);
755
756 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
757 size_t len, int nonblock, int flags, int *addr_len)
758 {
759 const struct dccp_hdr *dh;
760 long timeo;
761
762 lock_sock(sk);
763
764 if (sk->sk_state == DCCP_LISTEN) {
765 len = -ENOTCONN;
766 goto out;
767 }
768
769 timeo = sock_rcvtimeo(sk, nonblock);
770
771 do {
772 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
773
774 if (skb == NULL)
775 goto verify_sock_status;
776
777 dh = dccp_hdr(skb);
778
779 switch (dh->dccph_type) {
780 case DCCP_PKT_DATA:
781 case DCCP_PKT_DATAACK:
782 goto found_ok_skb;
783
784 case DCCP_PKT_CLOSE:
785 case DCCP_PKT_CLOSEREQ:
786 if (!(flags & MSG_PEEK))
787 dccp_finish_passive_close(sk);
788 /* fall through */
789 case DCCP_PKT_RESET:
790 dccp_pr_debug("found fin (%s) ok!\n",
791 dccp_packet_name(dh->dccph_type));
792 len = 0;
793 goto found_fin_ok;
794 default:
795 dccp_pr_debug("packet_type=%s\n",
796 dccp_packet_name(dh->dccph_type));
797 sk_eat_skb(sk, skb, 0);
798 }
799 verify_sock_status:
800 if (sock_flag(sk, SOCK_DONE)) {
801 len = 0;
802 break;
803 }
804
805 if (sk->sk_err) {
806 len = sock_error(sk);
807 break;
808 }
809
810 if (sk->sk_shutdown & RCV_SHUTDOWN) {
811 len = 0;
812 break;
813 }
814
815 if (sk->sk_state == DCCP_CLOSED) {
816 if (!sock_flag(sk, SOCK_DONE)) {
817 /* This occurs when user tries to read
818 * from never connected socket.
819 */
820 len = -ENOTCONN;
821 break;
822 }
823 len = 0;
824 break;
825 }
826
827 if (!timeo) {
828 len = -EAGAIN;
829 break;
830 }
831
832 if (signal_pending(current)) {
833 len = sock_intr_errno(timeo);
834 break;
835 }
836
837 sk_wait_data(sk, &timeo);
838 continue;
839 found_ok_skb:
840 if (len > skb->len)
841 len = skb->len;
842 else if (len < skb->len)
843 msg->msg_flags |= MSG_TRUNC;
844
845 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
846 /* Exception. Bailout! */
847 len = -EFAULT;
848 break;
849 }
850 found_fin_ok:
851 if (!(flags & MSG_PEEK))
852 sk_eat_skb(sk, skb, 0);
853 break;
854 } while (1);
855 out:
856 release_sock(sk);
857 return len;
858 }
859
860 EXPORT_SYMBOL_GPL(dccp_recvmsg);
861
862 int inet_dccp_listen(struct socket *sock, int backlog)
863 {
864 struct sock *sk = sock->sk;
865 unsigned char old_state;
866 int err;
867
868 lock_sock(sk);
869
870 err = -EINVAL;
871 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
872 goto out;
873
874 old_state = sk->sk_state;
875 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
876 goto out;
877
878 /* Really, if the socket is already in listen state
879 * we can only allow the backlog to be adjusted.
880 */
881 if (old_state != DCCP_LISTEN) {
882 /*
883 * FIXME: here it probably should be sk->sk_prot->listen_start
884 * see tcp_listen_start
885 */
886 err = dccp_listen_start(sk, backlog);
887 if (err)
888 goto out;
889 }
890 sk->sk_max_ack_backlog = backlog;
891 err = 0;
892
893 out:
894 release_sock(sk);
895 return err;
896 }
897
898 EXPORT_SYMBOL_GPL(inet_dccp_listen);
899
900 static void dccp_terminate_connection(struct sock *sk)
901 {
902 u8 next_state = DCCP_CLOSED;
903
904 switch (sk->sk_state) {
905 case DCCP_PASSIVE_CLOSE:
906 case DCCP_PASSIVE_CLOSEREQ:
907 dccp_finish_passive_close(sk);
908 break;
909 case DCCP_PARTOPEN:
910 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
911 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
912 /* fall through */
913 case DCCP_OPEN:
914 dccp_send_close(sk, 1);
915
916 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
917 !dccp_sk(sk)->dccps_server_timewait)
918 next_state = DCCP_ACTIVE_CLOSEREQ;
919 else
920 next_state = DCCP_CLOSING;
921 /* fall through */
922 default:
923 dccp_set_state(sk, next_state);
924 }
925 }
926
927 void dccp_close(struct sock *sk, long timeout)
928 {
929 struct dccp_sock *dp = dccp_sk(sk);
930 struct sk_buff *skb;
931 u32 data_was_unread = 0;
932 int state;
933
934 lock_sock(sk);
935
936 sk->sk_shutdown = SHUTDOWN_MASK;
937
938 if (sk->sk_state == DCCP_LISTEN) {
939 dccp_set_state(sk, DCCP_CLOSED);
940
941 /* Special case. */
942 inet_csk_listen_stop(sk);
943
944 goto adjudge_to_death;
945 }
946
947 sk_stop_timer(sk, &dp->dccps_xmit_timer);
948
949 /*
950 * We need to flush the recv. buffs. We do this only on the
951 * descriptor close, not protocol-sourced closes, because the
952 *reader process may not have drained the data yet!
953 */
954 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
955 data_was_unread += skb->len;
956 __kfree_skb(skb);
957 }
958
959 if (data_was_unread) {
960 /* Unread data was tossed, send an appropriate Reset Code */
961 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
962 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
963 dccp_set_state(sk, DCCP_CLOSED);
964 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
965 /* Check zero linger _after_ checking for unread data. */
966 sk->sk_prot->disconnect(sk, 0);
967 } else if (sk->sk_state != DCCP_CLOSED) {
968 dccp_terminate_connection(sk);
969 }
970
971 sk_stream_wait_close(sk, timeout);
972
973 adjudge_to_death:
974 state = sk->sk_state;
975 sock_hold(sk);
976 sock_orphan(sk);
977 atomic_inc(sk->sk_prot->orphan_count);
978
979 /*
980 * It is the last release_sock in its life. It will remove backlog.
981 */
982 release_sock(sk);
983 /*
984 * Now socket is owned by kernel and we acquire BH lock
985 * to finish close. No need to check for user refs.
986 */
987 local_bh_disable();
988 bh_lock_sock(sk);
989 WARN_ON(sock_owned_by_user(sk));
990
991 /* Have we already been destroyed by a softirq or backlog? */
992 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
993 goto out;
994
995 if (sk->sk_state == DCCP_CLOSED)
996 inet_csk_destroy_sock(sk);
997
998 /* Otherwise, socket is reprieved until protocol close. */
999
1000 out:
1001 bh_unlock_sock(sk);
1002 local_bh_enable();
1003 sock_put(sk);
1004 }
1005
1006 EXPORT_SYMBOL_GPL(dccp_close);
1007
1008 void dccp_shutdown(struct sock *sk, int how)
1009 {
1010 dccp_pr_debug("called shutdown(%x)\n", how);
1011 }
1012
1013 EXPORT_SYMBOL_GPL(dccp_shutdown);
1014
1015 static inline int dccp_mib_init(void)
1016 {
1017 return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1018 }
1019
1020 static inline void dccp_mib_exit(void)
1021 {
1022 snmp_mib_free((void**)dccp_statistics);
1023 }
1024
1025 static int thash_entries;
1026 module_param(thash_entries, int, 0444);
1027 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1028
1029 #ifdef CONFIG_IP_DCCP_DEBUG
1030 int dccp_debug;
1031 module_param(dccp_debug, bool, 0444);
1032 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1033
1034 EXPORT_SYMBOL_GPL(dccp_debug);
1035 #endif
1036
1037 static int __init dccp_init(void)
1038 {
1039 unsigned long goal;
1040 int ehash_order, bhash_order, i;
1041 int rc = -ENOBUFS;
1042
1043 BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1044 FIELD_SIZEOF(struct sk_buff, cb));
1045
1046 dccp_hashinfo.bind_bucket_cachep =
1047 kmem_cache_create("dccp_bind_bucket",
1048 sizeof(struct inet_bind_bucket), 0,
1049 SLAB_HWCACHE_ALIGN, NULL);
1050 if (!dccp_hashinfo.bind_bucket_cachep)
1051 goto out;
1052
1053 /*
1054 * Size and allocate the main established and bind bucket
1055 * hash tables.
1056 *
1057 * The methodology is similar to that of the buffer cache.
1058 */
1059 if (num_physpages >= (128 * 1024))
1060 goal = num_physpages >> (21 - PAGE_SHIFT);
1061 else
1062 goal = num_physpages >> (23 - PAGE_SHIFT);
1063
1064 if (thash_entries)
1065 goal = (thash_entries *
1066 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1067 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1068 ;
1069 do {
1070 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1071 sizeof(struct inet_ehash_bucket);
1072 while (dccp_hashinfo.ehash_size &
1073 (dccp_hashinfo.ehash_size - 1))
1074 dccp_hashinfo.ehash_size--;
1075 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1076 __get_free_pages(GFP_ATOMIC, ehash_order);
1077 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1078
1079 if (!dccp_hashinfo.ehash) {
1080 DCCP_CRIT("Failed to allocate DCCP established hash table");
1081 goto out_free_bind_bucket_cachep;
1082 }
1083
1084 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1085 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1086 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1087 }
1088
1089 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1090 goto out_free_dccp_ehash;
1091
1092 bhash_order = ehash_order;
1093
1094 do {
1095 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1096 sizeof(struct inet_bind_hashbucket);
1097 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1098 bhash_order > 0)
1099 continue;
1100 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1101 __get_free_pages(GFP_ATOMIC, bhash_order);
1102 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1103
1104 if (!dccp_hashinfo.bhash) {
1105 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1106 goto out_free_dccp_locks;
1107 }
1108
1109 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1110 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1111 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1112 }
1113
1114 rc = dccp_mib_init();
1115 if (rc)
1116 goto out_free_dccp_bhash;
1117
1118 rc = dccp_ackvec_init();
1119 if (rc)
1120 goto out_free_dccp_mib;
1121
1122 rc = dccp_sysctl_init();
1123 if (rc)
1124 goto out_ackvec_exit;
1125
1126 dccp_timestamping_init();
1127 out:
1128 return rc;
1129 out_ackvec_exit:
1130 dccp_ackvec_exit();
1131 out_free_dccp_mib:
1132 dccp_mib_exit();
1133 out_free_dccp_bhash:
1134 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1135 dccp_hashinfo.bhash = NULL;
1136 out_free_dccp_locks:
1137 inet_ehash_locks_free(&dccp_hashinfo);
1138 out_free_dccp_ehash:
1139 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1140 dccp_hashinfo.ehash = NULL;
1141 out_free_bind_bucket_cachep:
1142 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1143 dccp_hashinfo.bind_bucket_cachep = NULL;
1144 goto out;
1145 }
1146
1147 static void __exit dccp_fini(void)
1148 {
1149 dccp_mib_exit();
1150 free_pages((unsigned long)dccp_hashinfo.bhash,
1151 get_order(dccp_hashinfo.bhash_size *
1152 sizeof(struct inet_bind_hashbucket)));
1153 free_pages((unsigned long)dccp_hashinfo.ehash,
1154 get_order(dccp_hashinfo.ehash_size *
1155 sizeof(struct inet_ehash_bucket)));
1156 inet_ehash_locks_free(&dccp_hashinfo);
1157 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1158 dccp_ackvec_exit();
1159 dccp_sysctl_exit();
1160 }
1161
1162 module_init(dccp_init);
1163 module_exit(dccp_fini);
1164
1165 MODULE_LICENSE("GPL");
1166 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1167 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");