]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/dccp/proto.c
[DCCP]: Dedicated auxiliary states to support passive-close
[mirror_ubuntu-artful-kernel.git] / net / dccp / proto.c
CommitLineData
7c657876
ACM
1/*
2 * net/dccp/proto.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
7c657876
ACM
12#include <linux/dccp.h>
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/sched.h>
16#include <linux/kernel.h>
17#include <linux/skbuff.h>
18#include <linux/netdevice.h>
19#include <linux/in.h>
20#include <linux/if_arp.h>
21#include <linux/init.h>
22#include <linux/random.h>
23#include <net/checksum.h>
24
14c85021 25#include <net/inet_sock.h>
7c657876
ACM
26#include <net/sock.h>
27#include <net/xfrm.h>
28
6273172e 29#include <asm/ioctls.h>
7c657876
ACM
30#include <asm/semaphore.h>
31#include <linux/spinlock.h>
32#include <linux/timer.h>
33#include <linux/delay.h>
34#include <linux/poll.h>
7c657876
ACM
35
36#include "ccid.h"
37#include "dccp.h"
afe00251 38#include "feat.h"
7c657876 39
ba89966c 40DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
7c657876 41
f21e68ca
ACM
42EXPORT_SYMBOL_GPL(dccp_statistics);
43
7c657876
ACM
44atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
f21e68ca
ACM
46EXPORT_SYMBOL_GPL(dccp_orphan_count);
47
075ae866
ACM
48struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49 .lhash_lock = RW_LOCK_UNLOCKED,
50 .lhash_users = ATOMIC_INIT(0),
51 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
52};
53
54EXPORT_SYMBOL_GPL(dccp_hashinfo);
55
b1308dc0
IM
56/* the maximum queue length for tx in packets. 0 is no limit */
57int sysctl_dccp_tx_qlen __read_mostly = 5;
58
c25a18ba
ACM
59void dccp_set_state(struct sock *sk, const int state)
60{
61 const int oldstate = sk->sk_state;
62
f11135a3 63 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
c25a18ba
ACM
64 dccp_state_name(oldstate), dccp_state_name(state));
65 WARN_ON(state == oldstate);
66
67 switch (state) {
68 case DCCP_OPEN:
69 if (oldstate != DCCP_OPEN)
70 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
71 break;
72
73 case DCCP_CLOSED:
74 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
75 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
76
77 sk->sk_prot->unhash(sk);
78 if (inet_csk(sk)->icsk_bind_hash != NULL &&
79 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
80 inet_put_port(&dccp_hashinfo, sk);
81 /* fall through */
82 default:
83 if (oldstate == DCCP_OPEN)
84 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
85 }
86
87 /* Change state AFTER socket is unhashed to avoid closed
88 * socket sitting in hash tables.
89 */
90 sk->sk_state = state;
91}
92
93EXPORT_SYMBOL_GPL(dccp_set_state);
94
95void dccp_done(struct sock *sk)
96{
97 dccp_set_state(sk, DCCP_CLOSED);
98 dccp_clear_xmit_timers(sk);
99
100 sk->sk_shutdown = SHUTDOWN_MASK;
101
102 if (!sock_flag(sk, SOCK_DEAD))
103 sk->sk_state_change(sk);
104 else
105 inet_csk_destroy_sock(sk);
106}
107
108EXPORT_SYMBOL_GPL(dccp_done);
109
7c657876
ACM
110const char *dccp_packet_name(const int type)
111{
112 static const char *dccp_packet_names[] = {
113 [DCCP_PKT_REQUEST] = "REQUEST",
114 [DCCP_PKT_RESPONSE] = "RESPONSE",
115 [DCCP_PKT_DATA] = "DATA",
116 [DCCP_PKT_ACK] = "ACK",
117 [DCCP_PKT_DATAACK] = "DATAACK",
118 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
119 [DCCP_PKT_CLOSE] = "CLOSE",
120 [DCCP_PKT_RESET] = "RESET",
121 [DCCP_PKT_SYNC] = "SYNC",
122 [DCCP_PKT_SYNCACK] = "SYNCACK",
123 };
124
125 if (type >= DCCP_NR_PKT_TYPES)
126 return "INVALID";
127 else
128 return dccp_packet_names[type];
129}
130
131EXPORT_SYMBOL_GPL(dccp_packet_name);
132
133const char *dccp_state_name(const int state)
134{
135 static char *dccp_state_names[] = {
f11135a3
GR
136 [DCCP_OPEN] = "OPEN",
137 [DCCP_REQUESTING] = "REQUESTING",
138 [DCCP_PARTOPEN] = "PARTOPEN",
139 [DCCP_LISTEN] = "LISTEN",
140 [DCCP_RESPOND] = "RESPOND",
141 [DCCP_CLOSING] = "CLOSING",
142 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
143 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
144 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
145 [DCCP_TIME_WAIT] = "TIME_WAIT",
146 [DCCP_CLOSED] = "CLOSED",
7c657876
ACM
147 };
148
149 if (state >= DCCP_MAX_STATES)
150 return "INVALID STATE!";
151 else
152 return dccp_state_names[state];
153}
154
155EXPORT_SYMBOL_GPL(dccp_state_name);
156
c985ed70
ACM
157void dccp_hash(struct sock *sk)
158{
159 inet_hash(&dccp_hashinfo, sk);
160}
161
162EXPORT_SYMBOL_GPL(dccp_hash);
163
164void dccp_unhash(struct sock *sk)
165{
166 inet_unhash(&dccp_hashinfo, sk);
167}
168
169EXPORT_SYMBOL_GPL(dccp_unhash);
170
72478873 171int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
3e0fadc5
ACM
172{
173 struct dccp_sock *dp = dccp_sk(sk);
a4bf3902 174 struct dccp_minisock *dmsk = dccp_msk(sk);
3e0fadc5 175 struct inet_connection_sock *icsk = inet_csk(sk);
3e0fadc5 176
a4bf3902 177 dccp_minisock_init(&dp->dccps_minisock);
3e0fadc5 178
e18d7a98
ACM
179 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
180 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
181 sk->sk_state = DCCP_CLOSED;
182 sk->sk_write_space = dccp_write_space;
183 icsk->icsk_sync_mss = dccp_sync_mss;
184 dp->dccps_mss_cache = 536;
185 dp->dccps_rate_last = jiffies;
186 dp->dccps_role = DCCP_ROLE_UNDEFINED;
187 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
188 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
189
190 dccp_init_xmit_timers(sk);
191
3e0fadc5
ACM
192 /*
193 * FIXME: We're hardcoding the CCID, and doing this at this point makes
194 * the listening (master) sock get CCID control blocks, which is not
195 * necessary, but for now, to not mess with the test userspace apps,
196 * lets leave it here, later the real solution is to do this in a
197 * setsockopt(CCIDs-I-want/accept). -acme
198 */
72478873 199 if (likely(ctl_sock_initialized)) {
8ca0d17b 200 int rc = dccp_feat_init(dmsk);
3e0fadc5
ACM
201
202 if (rc)
203 return rc;
204
a4bf3902 205 if (dmsk->dccpms_send_ack_vector) {
3e0fadc5
ACM
206 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
207 if (dp->dccps_hc_rx_ackvec == NULL)
208 return -ENOMEM;
209 }
a4bf3902
ACM
210 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
211 sk, GFP_KERNEL);
212 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
213 sk, GFP_KERNEL);
8109b02b 214 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
3e0fadc5
ACM
215 dp->dccps_hc_tx_ccid == NULL)) {
216 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
217 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
a4bf3902 218 if (dmsk->dccpms_send_ack_vector) {
3e0fadc5
ACM
219 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
220 dp->dccps_hc_rx_ackvec = NULL;
221 }
222 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
223 return -ENOMEM;
224 }
225 } else {
226 /* control socket doesn't need feat nego */
a4bf3902
ACM
227 INIT_LIST_HEAD(&dmsk->dccpms_pending);
228 INIT_LIST_HEAD(&dmsk->dccpms_conf);
3e0fadc5
ACM
229 }
230
3e0fadc5
ACM
231 return 0;
232}
233
234EXPORT_SYMBOL_GPL(dccp_init_sock);
235
236int dccp_destroy_sock(struct sock *sk)
237{
238 struct dccp_sock *dp = dccp_sk(sk);
8ca0d17b 239 struct dccp_minisock *dmsk = dccp_msk(sk);
3e0fadc5
ACM
240
241 /*
242 * DCCP doesn't use sk_write_queue, just sk_send_head
243 * for retransmissions
244 */
245 if (sk->sk_send_head != NULL) {
246 kfree_skb(sk->sk_send_head);
247 sk->sk_send_head = NULL;
248 }
249
250 /* Clean up a referenced DCCP bind bucket. */
251 if (inet_csk(sk)->icsk_bind_hash != NULL)
252 inet_put_port(&dccp_hashinfo, sk);
253
254 kfree(dp->dccps_service_list);
255 dp->dccps_service_list = NULL;
256
8ca0d17b 257 if (dmsk->dccpms_send_ack_vector) {
3e0fadc5
ACM
258 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
259 dp->dccps_hc_rx_ackvec = NULL;
260 }
261 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
262 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
263 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
264
265 /* clean up feature negotiation state */
8ca0d17b 266 dccp_feat_clean(dmsk);
3e0fadc5
ACM
267
268 return 0;
269}
270
271EXPORT_SYMBOL_GPL(dccp_destroy_sock);
272
72a3effa 273static inline int dccp_listen_start(struct sock *sk, int backlog)
7c657876 274{
67e6b629
ACM
275 struct dccp_sock *dp = dccp_sk(sk);
276
277 dp->dccps_role = DCCP_ROLE_LISTEN;
72a3effa 278 return inet_csk_listen_start(sk, backlog);
7c657876
ACM
279}
280
ce865a61
GR
281static inline int dccp_need_reset(int state)
282{
283 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
284 state != DCCP_REQUESTING;
285}
286
7c657876
ACM
287int dccp_disconnect(struct sock *sk, int flags)
288{
289 struct inet_connection_sock *icsk = inet_csk(sk);
290 struct inet_sock *inet = inet_sk(sk);
291 int err = 0;
292 const int old_state = sk->sk_state;
293
294 if (old_state != DCCP_CLOSED)
295 dccp_set_state(sk, DCCP_CLOSED);
296
ce865a61
GR
297 /*
298 * This corresponds to the ABORT function of RFC793, sec. 3.8
299 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
300 */
7c657876
ACM
301 if (old_state == DCCP_LISTEN) {
302 inet_csk_listen_stop(sk);
ce865a61
GR
303 } else if (dccp_need_reset(old_state)) {
304 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
305 sk->sk_err = ECONNRESET;
7c657876
ACM
306 } else if (old_state == DCCP_REQUESTING)
307 sk->sk_err = ECONNRESET;
308
309 dccp_clear_xmit_timers(sk);
310 __skb_queue_purge(&sk->sk_receive_queue);
311 if (sk->sk_send_head != NULL) {
312 __kfree_skb(sk->sk_send_head);
313 sk->sk_send_head = NULL;
314 }
315
316 inet->dport = 0;
317
318 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
319 inet_reset_saddr(sk);
320
321 sk->sk_shutdown = 0;
322 sock_reset_flag(sk, SOCK_DONE);
323
324 icsk->icsk_backoff = 0;
325 inet_csk_delack_init(sk);
326 __sk_dst_reset(sk);
327
328 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
329
330 sk->sk_error_report(sk);
331 return err;
332}
333
f21e68ca
ACM
334EXPORT_SYMBOL_GPL(dccp_disconnect);
335
331968bd
ACM
336/*
337 * Wait for a DCCP event.
338 *
339 * Note that we don't need to lock the socket, as the upper poll layers
340 * take care of normal races (between the test and the event) and we don't
341 * go look at any of the socket buffers directly.
342 */
f21e68ca
ACM
343unsigned int dccp_poll(struct file *file, struct socket *sock,
344 poll_table *wait)
331968bd
ACM
345{
346 unsigned int mask;
347 struct sock *sk = sock->sk;
348
349 poll_wait(file, sk->sk_sleep, wait);
350 if (sk->sk_state == DCCP_LISTEN)
351 return inet_csk_listen_poll(sk);
352
353 /* Socket is not locked. We are protected from async events
354 by poll logic and correct handling of state changes
355 made by another threads is impossible in any case.
356 */
357
358 mask = 0;
359 if (sk->sk_err)
360 mask = POLLERR;
361
362 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
363 mask |= POLLHUP;
364 if (sk->sk_shutdown & RCV_SHUTDOWN)
f348d70a 365 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
331968bd
ACM
366
367 /* Connected? */
368 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
369 if (atomic_read(&sk->sk_rmem_alloc) > 0)
370 mask |= POLLIN | POLLRDNORM;
371
372 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
373 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
374 mask |= POLLOUT | POLLWRNORM;
375 } else { /* send SIGIO later */
376 set_bit(SOCK_ASYNC_NOSPACE,
377 &sk->sk_socket->flags);
378 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
379
380 /* Race breaker. If space is freed after
381 * wspace test but before the flags are set,
382 * IO signal will be lost.
383 */
384 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
385 mask |= POLLOUT | POLLWRNORM;
386 }
387 }
388 }
389 return mask;
390}
391
f21e68ca
ACM
392EXPORT_SYMBOL_GPL(dccp_poll);
393
7c657876
ACM
394int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
395{
6273172e
ACM
396 int rc = -ENOTCONN;
397
398 lock_sock(sk);
399
400 if (sk->sk_state == DCCP_LISTEN)
401 goto out;
402
403 switch (cmd) {
404 case SIOCINQ: {
405 struct sk_buff *skb;
406 unsigned long amount = 0;
407
408 skb = skb_peek(&sk->sk_receive_queue);
409 if (skb != NULL) {
410 /*
411 * We will only return the amount of this packet since
412 * that is all that will be read.
413 */
414 amount = skb->len;
415 }
416 rc = put_user(amount, (int __user *)arg);
417 }
418 break;
419 default:
420 rc = -ENOIOCTLCMD;
421 break;
422 }
423out:
424 release_sock(sk);
425 return rc;
7c657876
ACM
426}
427
f21e68ca
ACM
428EXPORT_SYMBOL_GPL(dccp_ioctl);
429
60fe62e7 430static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
67e6b629
ACM
431 char __user *optval, int optlen)
432{
433 struct dccp_sock *dp = dccp_sk(sk);
434 struct dccp_service_list *sl = NULL;
435
8109b02b 436 if (service == DCCP_SERVICE_INVALID_VALUE ||
67e6b629
ACM
437 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
438 return -EINVAL;
439
440 if (optlen > sizeof(service)) {
441 sl = kmalloc(optlen, GFP_KERNEL);
442 if (sl == NULL)
443 return -ENOMEM;
444
445 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
446 if (copy_from_user(sl->dccpsl_list,
447 optval + sizeof(service),
448 optlen - sizeof(service)) ||
449 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
450 kfree(sl);
451 return -EFAULT;
452 }
453 }
454
455 lock_sock(sk);
456 dp->dccps_service = service;
457
a51482bd 458 kfree(dp->dccps_service_list);
67e6b629
ACM
459
460 dp->dccps_service_list = sl;
461 release_sock(sk);
462 return 0;
463}
464
afe00251
AB
465/* byte 1 is feature. the rest is the preference list */
466static int dccp_setsockopt_change(struct sock *sk, int type,
467 struct dccp_so_feat __user *optval)
468{
469 struct dccp_so_feat opt;
470 u8 *val;
471 int rc;
472
473 if (copy_from_user(&opt, optval, sizeof(opt)))
474 return -EFAULT;
475
476 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
477 if (!val)
478 return -ENOMEM;
479
480 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
481 rc = -EFAULT;
482 goto out_free_val;
483 }
484
8ca0d17b
ACM
485 rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
486 val, opt.dccpsf_len, GFP_KERNEL);
afe00251
AB
487 if (rc)
488 goto out_free_val;
489
490out:
491 return rc;
492
493out_free_val:
494 kfree(val);
495 goto out;
496}
497
3fdadf7d
DM
498static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
499 char __user *optval, int optlen)
7c657876 500{
09dbc389
GR
501 struct dccp_sock *dp = dccp_sk(sk);
502 int val, err = 0;
7c657876 503
a84ffe43
ACM
504 if (optlen < sizeof(int))
505 return -EINVAL;
506
507 if (get_user(val, (int __user *)optval))
508 return -EFAULT;
509
67e6b629
ACM
510 if (optname == DCCP_SOCKOPT_SERVICE)
511 return dccp_setsockopt_service(sk, val, optval, optlen);
a84ffe43 512
67e6b629 513 lock_sock(sk);
a84ffe43
ACM
514 switch (optname) {
515 case DCCP_SOCKOPT_PACKET_SIZE:
5aed3243 516 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
841bac1d 517 err = 0;
a84ffe43 518 break;
afe00251
AB
519 case DCCP_SOCKOPT_CHANGE_L:
520 if (optlen != sizeof(struct dccp_so_feat))
521 err = -EINVAL;
522 else
523 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
c9eaf173 524 (struct dccp_so_feat __user *)
afe00251
AB
525 optval);
526 break;
afe00251
AB
527 case DCCP_SOCKOPT_CHANGE_R:
528 if (optlen != sizeof(struct dccp_so_feat))
529 err = -EINVAL;
530 else
531 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
9faefb6d 532 (struct dccp_so_feat __user *)
afe00251
AB
533 optval);
534 break;
6f4e5fff
GR
535 case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
536 if (val < 0 || val > 15)
537 err = -EINVAL;
538 else
539 dp->dccps_pcslen = val;
540 break;
541 case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */
542 if (val < 0 || val > 15)
543 err = -EINVAL;
544 else {
545 dp->dccps_pcrlen = val;
546 /* FIXME: add feature negotiation,
547 * ChangeL(MinimumChecksumCoverage, val) */
548 }
549 break;
a84ffe43
ACM
550 default:
551 err = -ENOPROTOOPT;
552 break;
553 }
6f4e5fff 554
a84ffe43
ACM
555 release_sock(sk);
556 return err;
7c657876
ACM
557}
558
3fdadf7d
DM
559int dccp_setsockopt(struct sock *sk, int level, int optname,
560 char __user *optval, int optlen)
561{
562 if (level != SOL_DCCP)
563 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
564 optname, optval,
565 optlen);
566 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
567}
543d9cfe 568
f21e68ca
ACM
569EXPORT_SYMBOL_GPL(dccp_setsockopt);
570
3fdadf7d
DM
571#ifdef CONFIG_COMPAT
572int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
543d9cfe 573 char __user *optval, int optlen)
3fdadf7d 574{
dec73ff0
ACM
575 if (level != SOL_DCCP)
576 return inet_csk_compat_setsockopt(sk, level, optname,
577 optval, optlen);
3fdadf7d
DM
578 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
579}
543d9cfe 580
3fdadf7d
DM
581EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
582#endif
583
67e6b629 584static int dccp_getsockopt_service(struct sock *sk, int len,
60fe62e7 585 __be32 __user *optval,
67e6b629
ACM
586 int __user *optlen)
587{
588 const struct dccp_sock *dp = dccp_sk(sk);
589 const struct dccp_service_list *sl;
590 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
591
592 lock_sock(sk);
67e6b629
ACM
593 if ((sl = dp->dccps_service_list) != NULL) {
594 slen = sl->dccpsl_nr * sizeof(u32);
595 total_len += slen;
596 }
597
598 err = -EINVAL;
599 if (total_len > len)
600 goto out;
601
602 err = 0;
603 if (put_user(total_len, optlen) ||
604 put_user(dp->dccps_service, optval) ||
605 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
606 err = -EFAULT;
607out:
608 release_sock(sk);
609 return err;
610}
611
3fdadf7d 612static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
a1d3a355 613 char __user *optval, int __user *optlen)
7c657876 614{
a84ffe43
ACM
615 struct dccp_sock *dp;
616 int val, len;
7c657876 617
a84ffe43
ACM
618 if (get_user(len, optlen))
619 return -EFAULT;
620
39ebc027 621 if (len < (int)sizeof(int))
a84ffe43
ACM
622 return -EINVAL;
623
624 dp = dccp_sk(sk);
625
626 switch (optname) {
627 case DCCP_SOCKOPT_PACKET_SIZE:
5aed3243 628 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
841bac1d 629 return 0;
88f964db
ACM
630 case DCCP_SOCKOPT_SERVICE:
631 return dccp_getsockopt_service(sk, len,
60fe62e7 632 (__be32 __user *)optval, optlen);
7c559a9e
GR
633 case DCCP_SOCKOPT_GET_CUR_MPS:
634 val = dp->dccps_mss_cache;
635 len = sizeof(val);
636 break;
6f4e5fff
GR
637 case DCCP_SOCKOPT_SEND_CSCOV:
638 val = dp->dccps_pcslen;
39ebc027 639 len = sizeof(val);
6f4e5fff
GR
640 break;
641 case DCCP_SOCKOPT_RECV_CSCOV:
642 val = dp->dccps_pcrlen;
39ebc027 643 len = sizeof(val);
6f4e5fff 644 break;
88f964db
ACM
645 case 128 ... 191:
646 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
647 len, (u32 __user *)optval, optlen);
648 case 192 ... 255:
649 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
650 len, (u32 __user *)optval, optlen);
a84ffe43
ACM
651 default:
652 return -ENOPROTOOPT;
653 }
654
655 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
656 return -EFAULT;
657
658 return 0;
7c657876
ACM
659}
660
3fdadf7d
DM
661int dccp_getsockopt(struct sock *sk, int level, int optname,
662 char __user *optval, int __user *optlen)
663{
664 if (level != SOL_DCCP)
665 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
666 optname, optval,
667 optlen);
668 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
669}
543d9cfe 670
f21e68ca
ACM
671EXPORT_SYMBOL_GPL(dccp_getsockopt);
672
3fdadf7d
DM
673#ifdef CONFIG_COMPAT
674int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
543d9cfe 675 char __user *optval, int __user *optlen)
3fdadf7d 676{
dec73ff0
ACM
677 if (level != SOL_DCCP)
678 return inet_csk_compat_getsockopt(sk, level, optname,
679 optval, optlen);
3fdadf7d
DM
680 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
681}
543d9cfe 682
3fdadf7d
DM
683EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
684#endif
685
7c657876
ACM
686int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
687 size_t len)
688{
689 const struct dccp_sock *dp = dccp_sk(sk);
690 const int flags = msg->msg_flags;
691 const int noblock = flags & MSG_DONTWAIT;
692 struct sk_buff *skb;
693 int rc, size;
694 long timeo;
695
696 if (len > dp->dccps_mss_cache)
697 return -EMSGSIZE;
698
699 lock_sock(sk);
b1308dc0
IM
700
701 if (sysctl_dccp_tx_qlen &&
702 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
703 rc = -EAGAIN;
704 goto out_release;
705 }
706
27258ee5 707 timeo = sock_sndtimeo(sk, noblock);
7c657876
ACM
708
709 /*
710 * We have to use sk_stream_wait_connect here to set sk_write_pending,
711 * so that the trick in dccp_rcv_request_sent_state_process.
712 */
713 /* Wait for a connection to finish. */
cecd8d0e 714 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
7c657876 715 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
27258ee5 716 goto out_release;
7c657876
ACM
717
718 size = sk->sk_prot->max_header + len;
719 release_sock(sk);
720 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
721 lock_sock(sk);
7c657876
ACM
722 if (skb == NULL)
723 goto out_release;
724
725 skb_reserve(skb, sk->sk_prot->max_header);
726 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
27258ee5
ACM
727 if (rc != 0)
728 goto out_discard;
729
97e5848d
IM
730 skb_queue_tail(&sk->sk_write_queue, skb);
731 dccp_write_xmit(sk,0);
7c657876
ACM
732out_release:
733 release_sock(sk);
734 return rc ? : len;
27258ee5
ACM
735out_discard:
736 kfree_skb(skb);
7c657876 737 goto out_release;
7c657876
ACM
738}
739
f21e68ca
ACM
740EXPORT_SYMBOL_GPL(dccp_sendmsg);
741
7c657876
ACM
742int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
743 size_t len, int nonblock, int flags, int *addr_len)
744{
745 const struct dccp_hdr *dh;
7c657876
ACM
746 long timeo;
747
748 lock_sock(sk);
749
531669a0
ACM
750 if (sk->sk_state == DCCP_LISTEN) {
751 len = -ENOTCONN;
7c657876 752 goto out;
7c657876 753 }
7c657876 754
531669a0 755 timeo = sock_rcvtimeo(sk, nonblock);
7c657876
ACM
756
757 do {
531669a0 758 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
7c657876 759
531669a0
ACM
760 if (skb == NULL)
761 goto verify_sock_status;
7c657876 762
531669a0 763 dh = dccp_hdr(skb);
7c657876 764
531669a0
ACM
765 if (dh->dccph_type == DCCP_PKT_DATA ||
766 dh->dccph_type == DCCP_PKT_DATAACK)
767 goto found_ok_skb;
7c657876 768
531669a0
ACM
769 if (dh->dccph_type == DCCP_PKT_RESET ||
770 dh->dccph_type == DCCP_PKT_CLOSE) {
771 dccp_pr_debug("found fin ok!\n");
772 len = 0;
773 goto found_fin_ok;
774 }
775 dccp_pr_debug("packet_type=%s\n",
776 dccp_packet_name(dh->dccph_type));
624d1164 777 sk_eat_skb(sk, skb, 0);
531669a0
ACM
778verify_sock_status:
779 if (sock_flag(sk, SOCK_DONE)) {
780 len = 0;
7c657876 781 break;
531669a0 782 }
7c657876 783
531669a0
ACM
784 if (sk->sk_err) {
785 len = sock_error(sk);
786 break;
787 }
7c657876 788
531669a0
ACM
789 if (sk->sk_shutdown & RCV_SHUTDOWN) {
790 len = 0;
791 break;
792 }
7c657876 793
531669a0
ACM
794 if (sk->sk_state == DCCP_CLOSED) {
795 if (!sock_flag(sk, SOCK_DONE)) {
796 /* This occurs when user tries to read
797 * from never connected socket.
798 */
799 len = -ENOTCONN;
7c657876
ACM
800 break;
801 }
531669a0
ACM
802 len = 0;
803 break;
7c657876
ACM
804 }
805
531669a0
ACM
806 if (!timeo) {
807 len = -EAGAIN;
808 break;
809 }
7c657876 810
531669a0
ACM
811 if (signal_pending(current)) {
812 len = sock_intr_errno(timeo);
813 break;
814 }
7c657876 815
531669a0 816 sk_wait_data(sk, &timeo);
7c657876 817 continue;
7c657876 818 found_ok_skb:
531669a0
ACM
819 if (len > skb->len)
820 len = skb->len;
821 else if (len < skb->len)
822 msg->msg_flags |= MSG_TRUNC;
823
824 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
825 /* Exception. Bailout! */
826 len = -EFAULT;
827 break;
7c657876 828 }
7c657876
ACM
829 found_fin_ok:
830 if (!(flags & MSG_PEEK))
624d1164 831 sk_eat_skb(sk, skb, 0);
7c657876 832 break;
531669a0 833 } while (1);
7c657876
ACM
834out:
835 release_sock(sk);
531669a0 836 return len;
7c657876
ACM
837}
838
f21e68ca
ACM
839EXPORT_SYMBOL_GPL(dccp_recvmsg);
840
841int inet_dccp_listen(struct socket *sock, int backlog)
7c657876
ACM
842{
843 struct sock *sk = sock->sk;
844 unsigned char old_state;
845 int err;
846
847 lock_sock(sk);
848
849 err = -EINVAL;
850 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
851 goto out;
852
853 old_state = sk->sk_state;
854 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
855 goto out;
856
857 /* Really, if the socket is already in listen state
858 * we can only allow the backlog to be adjusted.
859 */
860 if (old_state != DCCP_LISTEN) {
861 /*
862 * FIXME: here it probably should be sk->sk_prot->listen_start
863 * see tcp_listen_start
864 */
72a3effa 865 err = dccp_listen_start(sk, backlog);
7c657876
ACM
866 if (err)
867 goto out;
868 }
869 sk->sk_max_ack_backlog = backlog;
870 err = 0;
871
872out:
873 release_sock(sk);
874 return err;
875}
876
f21e68ca
ACM
877EXPORT_SYMBOL_GPL(inet_dccp_listen);
878
7c657876 879static const unsigned char dccp_new_state[] = {
7690af3f
ACM
880 /* current state: new state: action: */
881 [0] = DCCP_CLOSED,
8109b02b 882 [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
7690af3f
ACM
883 [DCCP_REQUESTING] = DCCP_CLOSED,
884 [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
885 [DCCP_LISTEN] = DCCP_CLOSED,
886 [DCCP_RESPOND] = DCCP_CLOSED,
887 [DCCP_CLOSING] = DCCP_CLOSED,
888 [DCCP_TIME_WAIT] = DCCP_CLOSED,
889 [DCCP_CLOSED] = DCCP_CLOSED,
7c657876
ACM
890};
891
892static int dccp_close_state(struct sock *sk)
893{
894 const int next = dccp_new_state[sk->sk_state];
895 const int ns = next & DCCP_STATE_MASK;
896
897 if (ns != sk->sk_state)
898 dccp_set_state(sk, ns);
899
900 return next & DCCP_ACTION_FIN;
901}
902
903void dccp_close(struct sock *sk, long timeout)
904{
97e5848d 905 struct dccp_sock *dp = dccp_sk(sk);
7c657876 906 struct sk_buff *skb;
d83bd95b 907 u32 data_was_unread = 0;
134af346 908 int state;
7c657876
ACM
909
910 lock_sock(sk);
911
912 sk->sk_shutdown = SHUTDOWN_MASK;
913
914 if (sk->sk_state == DCCP_LISTEN) {
915 dccp_set_state(sk, DCCP_CLOSED);
916
917 /* Special case. */
918 inet_csk_listen_stop(sk);
919
920 goto adjudge_to_death;
921 }
922
97e5848d
IM
923 sk_stop_timer(sk, &dp->dccps_xmit_timer);
924
7c657876
ACM
925 /*
926 * We need to flush the recv. buffs. We do this only on the
927 * descriptor close, not protocol-sourced closes, because the
928 *reader process may not have drained the data yet!
929 */
7c657876 930 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
d83bd95b 931 data_was_unread += skb->len;
7c657876
ACM
932 __kfree_skb(skb);
933 }
934
d83bd95b
GR
935 if (data_was_unread) {
936 /* Unread data was tossed, send an appropriate Reset Code */
937 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
938 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
939 dccp_set_state(sk, DCCP_CLOSED);
940 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
7c657876
ACM
941 /* Check zero linger _after_ checking for unread data. */
942 sk->sk_prot->disconnect(sk, 0);
943 } else if (dccp_close_state(sk)) {
7ad07e7c 944 dccp_send_close(sk, 1);
7c657876
ACM
945 }
946
947 sk_stream_wait_close(sk, timeout);
948
949adjudge_to_death:
134af346
HX
950 state = sk->sk_state;
951 sock_hold(sk);
952 sock_orphan(sk);
953 atomic_inc(sk->sk_prot->orphan_count);
954
7ad07e7c
ACM
955 /*
956 * It is the last release_sock in its life. It will remove backlog.
957 */
7c657876
ACM
958 release_sock(sk);
959 /*
960 * Now socket is owned by kernel and we acquire BH lock
961 * to finish close. No need to check for user refs.
962 */
963 local_bh_disable();
964 bh_lock_sock(sk);
965 BUG_TRAP(!sock_owned_by_user(sk));
966
134af346
HX
967 /* Have we already been destroyed by a softirq or backlog? */
968 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
969 goto out;
7ad07e7c
ACM
970
971 /*
972 * The last release_sock may have processed the CLOSE or RESET
973 * packet moving sock to CLOSED state, if not we have to fire
974 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
975 * in draft-ietf-dccp-spec-11. -acme
976 */
977 if (sk->sk_state == DCCP_CLOSING) {
978 /* FIXME: should start at 2 * RTT */
979 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
980 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
981 inet_csk(sk)->icsk_rto,
982 DCCP_RTO_MAX);
983#if 0
984 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
7c657876 985 dccp_set_state(sk, DCCP_CLOSED);
7ad07e7c
ACM
986#endif
987 }
7c657876 988
7c657876
ACM
989 if (sk->sk_state == DCCP_CLOSED)
990 inet_csk_destroy_sock(sk);
991
992 /* Otherwise, socket is reprieved until protocol close. */
993
134af346 994out:
7c657876
ACM
995 bh_unlock_sock(sk);
996 local_bh_enable();
997 sock_put(sk);
998}
999
f21e68ca
ACM
1000EXPORT_SYMBOL_GPL(dccp_close);
1001
7c657876
ACM
1002void dccp_shutdown(struct sock *sk, int how)
1003{
8e8c71f1 1004 dccp_pr_debug("called shutdown(%x)\n", how);
7c657876
ACM
1005}
1006
f21e68ca
ACM
1007EXPORT_SYMBOL_GPL(dccp_shutdown);
1008
46f09ffa 1009static int __init dccp_mib_init(void)
7c657876
ACM
1010{
1011 int rc = -ENOMEM;
1012
1013 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
1014 if (dccp_statistics[0] == NULL)
1015 goto out;
1016
1017 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
1018 if (dccp_statistics[1] == NULL)
1019 goto out_free_one;
1020
1021 rc = 0;
1022out:
1023 return rc;
1024out_free_one:
1025 free_percpu(dccp_statistics[0]);
1026 dccp_statistics[0] = NULL;
1027 goto out;
1028
1029}
1030
b61fafc4 1031static void dccp_mib_exit(void)
46f09ffa
ACM
1032{
1033 free_percpu(dccp_statistics[0]);
1034 free_percpu(dccp_statistics[1]);
1035 dccp_statistics[0] = dccp_statistics[1] = NULL;
1036}
1037
7c657876
ACM
1038static int thash_entries;
1039module_param(thash_entries, int, 0444);
1040MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1041
a1d3a355 1042#ifdef CONFIG_IP_DCCP_DEBUG
7c657876 1043int dccp_debug;
042d18f9 1044module_param(dccp_debug, bool, 0444);
7c657876 1045MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
f21e68ca
ACM
1046
1047EXPORT_SYMBOL_GPL(dccp_debug);
a1d3a355 1048#endif
7c657876
ACM
1049
1050static int __init dccp_init(void)
1051{
1052 unsigned long goal;
1053 int ehash_order, bhash_order, i;
b61fafc4 1054 int rc = -ENOBUFS;
7c657876 1055
7690af3f
ACM
1056 dccp_hashinfo.bind_bucket_cachep =
1057 kmem_cache_create("dccp_bind_bucket",
1058 sizeof(struct inet_bind_bucket), 0,
20c2df83 1059 SLAB_HWCACHE_ALIGN, NULL);
7c657876 1060 if (!dccp_hashinfo.bind_bucket_cachep)
b61fafc4 1061 goto out;
7c657876
ACM
1062
1063 /*
1064 * Size and allocate the main established and bind bucket
1065 * hash tables.
1066 *
1067 * The methodology is similar to that of the buffer cache.
1068 */
1069 if (num_physpages >= (128 * 1024))
1070 goal = num_physpages >> (21 - PAGE_SHIFT);
1071 else
1072 goal = num_physpages >> (23 - PAGE_SHIFT);
1073
1074 if (thash_entries)
7690af3f
ACM
1075 goal = (thash_entries *
1076 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
7c657876
ACM
1077 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1078 ;
1079 do {
1080 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1081 sizeof(struct inet_ehash_bucket);
7690af3f
ACM
1082 while (dccp_hashinfo.ehash_size &
1083 (dccp_hashinfo.ehash_size - 1))
7c657876
ACM
1084 dccp_hashinfo.ehash_size--;
1085 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1086 __get_free_pages(GFP_ATOMIC, ehash_order);
1087 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1088
1089 if (!dccp_hashinfo.ehash) {
59348b19 1090 DCCP_CRIT("Failed to allocate DCCP established hash table");
7c657876
ACM
1091 goto out_free_bind_bucket_cachep;
1092 }
1093
dbca9b27 1094 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
7c657876 1095 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
dbca9b27 1096 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
7c657876
ACM
1097 }
1098
230140cf
ED
1099 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1100 goto out_free_dccp_ehash;
1101
7c657876
ACM
1102 bhash_order = ehash_order;
1103
1104 do {
1105 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1106 sizeof(struct inet_bind_hashbucket);
7690af3f
ACM
1107 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1108 bhash_order > 0)
7c657876
ACM
1109 continue;
1110 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1111 __get_free_pages(GFP_ATOMIC, bhash_order);
1112 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1113
1114 if (!dccp_hashinfo.bhash) {
59348b19 1115 DCCP_CRIT("Failed to allocate DCCP bind hash table");
230140cf 1116 goto out_free_dccp_locks;
7c657876
ACM
1117 }
1118
1119 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1120 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1121 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1122 }
1123
46f09ffa 1124 rc = dccp_mib_init();
fa23e2ec 1125 if (rc)
7c657876
ACM
1126 goto out_free_dccp_bhash;
1127
9b07ef5d 1128 rc = dccp_ackvec_init();
7c657876 1129 if (rc)
b61fafc4 1130 goto out_free_dccp_mib;
9b07ef5d 1131
e55d912f 1132 rc = dccp_sysctl_init();
9b07ef5d
ACM
1133 if (rc)
1134 goto out_ackvec_exit;
4c70f383
GR
1135
1136 dccp_timestamping_init();
7c657876
ACM
1137out:
1138 return rc;
9b07ef5d
ACM
1139out_ackvec_exit:
1140 dccp_ackvec_exit();
b61fafc4 1141out_free_dccp_mib:
46f09ffa 1142 dccp_mib_exit();
7c657876
ACM
1143out_free_dccp_bhash:
1144 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1145 dccp_hashinfo.bhash = NULL;
230140cf
ED
1146out_free_dccp_locks:
1147 inet_ehash_locks_free(&dccp_hashinfo);
7c657876
ACM
1148out_free_dccp_ehash:
1149 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1150 dccp_hashinfo.ehash = NULL;
1151out_free_bind_bucket_cachep:
1152 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1153 dccp_hashinfo.bind_bucket_cachep = NULL;
7c657876
ACM
1154 goto out;
1155}
1156
7c657876
ACM
1157static void __exit dccp_fini(void)
1158{
46f09ffa 1159 dccp_mib_exit();
725ba8ee
ACM
1160 free_pages((unsigned long)dccp_hashinfo.bhash,
1161 get_order(dccp_hashinfo.bhash_size *
1162 sizeof(struct inet_bind_hashbucket)));
1163 free_pages((unsigned long)dccp_hashinfo.ehash,
1164 get_order(dccp_hashinfo.ehash_size *
1165 sizeof(struct inet_ehash_bucket)));
230140cf 1166 inet_ehash_locks_free(&dccp_hashinfo);
7c657876 1167 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
9b07ef5d 1168 dccp_ackvec_exit();
e55d912f 1169 dccp_sysctl_exit();
7c657876
ACM
1170}
1171
1172module_init(dccp_init);
1173module_exit(dccp_fini);
1174
7c657876
ACM
1175MODULE_LICENSE("GPL");
1176MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1177MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");