]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - net/dccp/proto.c
Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
[mirror_ubuntu-artful-kernel.git] / net / dccp / proto.c
1 /*
2 * net/dccp/proto.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <linux/slab.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_sock.h>
27 #include <net/inet_common.h>
28 #include <net/sock.h>
29 #include <net/xfrm.h>
30
31 #include <asm/ioctls.h>
32 #include <linux/spinlock.h>
33 #include <linux/timer.h>
34 #include <linux/delay.h>
35 #include <linux/poll.h>
36
37 #include "ccid.h"
38 #include "dccp.h"
39 #include "feat.h"
40
41 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42
43 EXPORT_SYMBOL_GPL(dccp_statistics);
44
45 struct percpu_counter dccp_orphan_count;
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47
48 struct inet_hashinfo dccp_hashinfo;
49 EXPORT_SYMBOL_GPL(dccp_hashinfo);
50
51 /* the maximum queue length for tx in packets. 0 is no limit */
52 int sysctl_dccp_tx_qlen __read_mostly = 5;
53
54 #ifdef CONFIG_IP_DCCP_DEBUG
55 static const char *dccp_state_name(const int state)
56 {
57 static const char *const dccp_state_names[] = {
58 [DCCP_OPEN] = "OPEN",
59 [DCCP_REQUESTING] = "REQUESTING",
60 [DCCP_PARTOPEN] = "PARTOPEN",
61 [DCCP_LISTEN] = "LISTEN",
62 [DCCP_RESPOND] = "RESPOND",
63 [DCCP_CLOSING] = "CLOSING",
64 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
65 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
66 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
67 [DCCP_TIME_WAIT] = "TIME_WAIT",
68 [DCCP_CLOSED] = "CLOSED",
69 };
70
71 if (state >= DCCP_MAX_STATES)
72 return "INVALID STATE!";
73 else
74 return dccp_state_names[state];
75 }
76 #endif
77
78 void dccp_set_state(struct sock *sk, const int state)
79 {
80 const int oldstate = sk->sk_state;
81
82 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
83 dccp_state_name(oldstate), dccp_state_name(state));
84 WARN_ON(state == oldstate);
85
86 switch (state) {
87 case DCCP_OPEN:
88 if (oldstate != DCCP_OPEN)
89 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
90 /* Client retransmits all Confirm options until entering OPEN */
91 if (oldstate == DCCP_PARTOPEN)
92 dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
93 break;
94
95 case DCCP_CLOSED:
96 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
97 oldstate == DCCP_CLOSING)
98 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
99
100 sk->sk_prot->unhash(sk);
101 if (inet_csk(sk)->icsk_bind_hash != NULL &&
102 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
103 inet_put_port(sk);
104 /* fall through */
105 default:
106 if (oldstate == DCCP_OPEN)
107 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
108 }
109
110 /* Change state AFTER socket is unhashed to avoid closed
111 * socket sitting in hash tables.
112 */
113 sk->sk_state = state;
114 }
115
116 EXPORT_SYMBOL_GPL(dccp_set_state);
117
118 static void dccp_finish_passive_close(struct sock *sk)
119 {
120 switch (sk->sk_state) {
121 case DCCP_PASSIVE_CLOSE:
122 /* Node (client or server) has received Close packet. */
123 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
124 dccp_set_state(sk, DCCP_CLOSED);
125 break;
126 case DCCP_PASSIVE_CLOSEREQ:
127 /*
128 * Client received CloseReq. We set the `active' flag so that
129 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
130 */
131 dccp_send_close(sk, 1);
132 dccp_set_state(sk, DCCP_CLOSING);
133 }
134 }
135
136 void dccp_done(struct sock *sk)
137 {
138 dccp_set_state(sk, DCCP_CLOSED);
139 dccp_clear_xmit_timers(sk);
140
141 sk->sk_shutdown = SHUTDOWN_MASK;
142
143 if (!sock_flag(sk, SOCK_DEAD))
144 sk->sk_state_change(sk);
145 else
146 inet_csk_destroy_sock(sk);
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_done);
150
151 const char *dccp_packet_name(const int type)
152 {
153 static const char *const dccp_packet_names[] = {
154 [DCCP_PKT_REQUEST] = "REQUEST",
155 [DCCP_PKT_RESPONSE] = "RESPONSE",
156 [DCCP_PKT_DATA] = "DATA",
157 [DCCP_PKT_ACK] = "ACK",
158 [DCCP_PKT_DATAACK] = "DATAACK",
159 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
160 [DCCP_PKT_CLOSE] = "CLOSE",
161 [DCCP_PKT_RESET] = "RESET",
162 [DCCP_PKT_SYNC] = "SYNC",
163 [DCCP_PKT_SYNCACK] = "SYNCACK",
164 };
165
166 if (type >= DCCP_NR_PKT_TYPES)
167 return "INVALID";
168 else
169 return dccp_packet_names[type];
170 }
171
172 EXPORT_SYMBOL_GPL(dccp_packet_name);
173
174 static void dccp_sk_destruct(struct sock *sk)
175 {
176 struct dccp_sock *dp = dccp_sk(sk);
177
178 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
179 dp->dccps_hc_tx_ccid = NULL;
180 inet_sock_destruct(sk);
181 }
182
183 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
184 {
185 struct dccp_sock *dp = dccp_sk(sk);
186 struct inet_connection_sock *icsk = inet_csk(sk);
187
188 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
189 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
190 sk->sk_state = DCCP_CLOSED;
191 sk->sk_write_space = dccp_write_space;
192 sk->sk_destruct = dccp_sk_destruct;
193 icsk->icsk_sync_mss = dccp_sync_mss;
194 dp->dccps_mss_cache = 536;
195 dp->dccps_rate_last = jiffies;
196 dp->dccps_role = DCCP_ROLE_UNDEFINED;
197 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
198 dp->dccps_tx_qlen = sysctl_dccp_tx_qlen;
199
200 dccp_init_xmit_timers(sk);
201
202 INIT_LIST_HEAD(&dp->dccps_featneg);
203 /* control socket doesn't need feat nego */
204 if (likely(ctl_sock_initialized))
205 return dccp_feat_init(sk);
206 return 0;
207 }
208
209 EXPORT_SYMBOL_GPL(dccp_init_sock);
210
211 void dccp_destroy_sock(struct sock *sk)
212 {
213 struct dccp_sock *dp = dccp_sk(sk);
214
215 __skb_queue_purge(&sk->sk_write_queue);
216 if (sk->sk_send_head != NULL) {
217 kfree_skb(sk->sk_send_head);
218 sk->sk_send_head = NULL;
219 }
220
221 /* Clean up a referenced DCCP bind bucket. */
222 if (inet_csk(sk)->icsk_bind_hash != NULL)
223 inet_put_port(sk);
224
225 kfree(dp->dccps_service_list);
226 dp->dccps_service_list = NULL;
227
228 if (dp->dccps_hc_rx_ackvec != NULL) {
229 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
230 dp->dccps_hc_rx_ackvec = NULL;
231 }
232 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
233 dp->dccps_hc_rx_ccid = NULL;
234
235 /* clean up feature negotiation state */
236 dccp_feat_list_purge(&dp->dccps_featneg);
237 }
238
239 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
240
241 static inline int dccp_listen_start(struct sock *sk, int backlog)
242 {
243 struct dccp_sock *dp = dccp_sk(sk);
244
245 dp->dccps_role = DCCP_ROLE_LISTEN;
246 /* do not start to listen if feature negotiation setup fails */
247 if (dccp_feat_finalise_settings(dp))
248 return -EPROTO;
249 return inet_csk_listen_start(sk, backlog);
250 }
251
252 static inline int dccp_need_reset(int state)
253 {
254 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
255 state != DCCP_REQUESTING;
256 }
257
258 int dccp_disconnect(struct sock *sk, int flags)
259 {
260 struct inet_connection_sock *icsk = inet_csk(sk);
261 struct inet_sock *inet = inet_sk(sk);
262 int err = 0;
263 const int old_state = sk->sk_state;
264
265 if (old_state != DCCP_CLOSED)
266 dccp_set_state(sk, DCCP_CLOSED);
267
268 /*
269 * This corresponds to the ABORT function of RFC793, sec. 3.8
270 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
271 */
272 if (old_state == DCCP_LISTEN) {
273 inet_csk_listen_stop(sk);
274 } else if (dccp_need_reset(old_state)) {
275 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
276 sk->sk_err = ECONNRESET;
277 } else if (old_state == DCCP_REQUESTING)
278 sk->sk_err = ECONNRESET;
279
280 dccp_clear_xmit_timers(sk);
281
282 __skb_queue_purge(&sk->sk_receive_queue);
283 __skb_queue_purge(&sk->sk_write_queue);
284 if (sk->sk_send_head != NULL) {
285 __kfree_skb(sk->sk_send_head);
286 sk->sk_send_head = NULL;
287 }
288
289 inet->inet_dport = 0;
290
291 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
292 inet_reset_saddr(sk);
293
294 sk->sk_shutdown = 0;
295 sock_reset_flag(sk, SOCK_DONE);
296
297 icsk->icsk_backoff = 0;
298 inet_csk_delack_init(sk);
299 __sk_dst_reset(sk);
300
301 WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
302
303 sk->sk_error_report(sk);
304 return err;
305 }
306
307 EXPORT_SYMBOL_GPL(dccp_disconnect);
308
309 /*
310 * Wait for a DCCP event.
311 *
312 * Note that we don't need to lock the socket, as the upper poll layers
313 * take care of normal races (between the test and the event) and we don't
314 * go look at any of the socket buffers directly.
315 */
316 unsigned int dccp_poll(struct file *file, struct socket *sock,
317 poll_table *wait)
318 {
319 unsigned int mask;
320 struct sock *sk = sock->sk;
321
322 sock_poll_wait(file, sk_sleep(sk), wait);
323 if (sk->sk_state == DCCP_LISTEN)
324 return inet_csk_listen_poll(sk);
325
326 /* Socket is not locked. We are protected from async events
327 by poll logic and correct handling of state changes
328 made by another threads is impossible in any case.
329 */
330
331 mask = 0;
332 if (sk->sk_err)
333 mask = POLLERR;
334
335 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
336 mask |= POLLHUP;
337 if (sk->sk_shutdown & RCV_SHUTDOWN)
338 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
339
340 /* Connected? */
341 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
342 if (atomic_read(&sk->sk_rmem_alloc) > 0)
343 mask |= POLLIN | POLLRDNORM;
344
345 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
346 if (sk_stream_is_writeable(sk)) {
347 mask |= POLLOUT | POLLWRNORM;
348 } else { /* send SIGIO later */
349 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
350 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
351
352 /* Race breaker. If space is freed after
353 * wspace test but before the flags are set,
354 * IO signal will be lost.
355 */
356 if (sk_stream_is_writeable(sk))
357 mask |= POLLOUT | POLLWRNORM;
358 }
359 }
360 }
361 return mask;
362 }
363
364 EXPORT_SYMBOL_GPL(dccp_poll);
365
366 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
367 {
368 int rc = -ENOTCONN;
369
370 lock_sock(sk);
371
372 if (sk->sk_state == DCCP_LISTEN)
373 goto out;
374
375 switch (cmd) {
376 case SIOCINQ: {
377 struct sk_buff *skb;
378 unsigned long amount = 0;
379
380 skb = skb_peek(&sk->sk_receive_queue);
381 if (skb != NULL) {
382 /*
383 * We will only return the amount of this packet since
384 * that is all that will be read.
385 */
386 amount = skb->len;
387 }
388 rc = put_user(amount, (int __user *)arg);
389 }
390 break;
391 default:
392 rc = -ENOIOCTLCMD;
393 break;
394 }
395 out:
396 release_sock(sk);
397 return rc;
398 }
399
400 EXPORT_SYMBOL_GPL(dccp_ioctl);
401
402 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
403 char __user *optval, unsigned int optlen)
404 {
405 struct dccp_sock *dp = dccp_sk(sk);
406 struct dccp_service_list *sl = NULL;
407
408 if (service == DCCP_SERVICE_INVALID_VALUE ||
409 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
410 return -EINVAL;
411
412 if (optlen > sizeof(service)) {
413 sl = kmalloc(optlen, GFP_KERNEL);
414 if (sl == NULL)
415 return -ENOMEM;
416
417 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
418 if (copy_from_user(sl->dccpsl_list,
419 optval + sizeof(service),
420 optlen - sizeof(service)) ||
421 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
422 kfree(sl);
423 return -EFAULT;
424 }
425 }
426
427 lock_sock(sk);
428 dp->dccps_service = service;
429
430 kfree(dp->dccps_service_list);
431
432 dp->dccps_service_list = sl;
433 release_sock(sk);
434 return 0;
435 }
436
437 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
438 {
439 u8 *list, len;
440 int i, rc;
441
442 if (cscov < 0 || cscov > 15)
443 return -EINVAL;
444 /*
445 * Populate a list of permissible values, in the range cscov...15. This
446 * is necessary since feature negotiation of single values only works if
447 * both sides incidentally choose the same value. Since the list starts
448 * lowest-value first, negotiation will pick the smallest shared value.
449 */
450 if (cscov == 0)
451 return 0;
452 len = 16 - cscov;
453
454 list = kmalloc(len, GFP_KERNEL);
455 if (list == NULL)
456 return -ENOBUFS;
457
458 for (i = 0; i < len; i++)
459 list[i] = cscov++;
460
461 rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
462
463 if (rc == 0) {
464 if (rx)
465 dccp_sk(sk)->dccps_pcrlen = cscov;
466 else
467 dccp_sk(sk)->dccps_pcslen = cscov;
468 }
469 kfree(list);
470 return rc;
471 }
472
473 static int dccp_setsockopt_ccid(struct sock *sk, int type,
474 char __user *optval, unsigned int optlen)
475 {
476 u8 *val;
477 int rc = 0;
478
479 if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
480 return -EINVAL;
481
482 val = memdup_user(optval, optlen);
483 if (IS_ERR(val))
484 return PTR_ERR(val);
485
486 lock_sock(sk);
487 if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
488 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
489
490 if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
491 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
492 release_sock(sk);
493
494 kfree(val);
495 return rc;
496 }
497
498 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
499 char __user *optval, unsigned int optlen)
500 {
501 struct dccp_sock *dp = dccp_sk(sk);
502 int val, err = 0;
503
504 switch (optname) {
505 case DCCP_SOCKOPT_PACKET_SIZE:
506 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
507 return 0;
508 case DCCP_SOCKOPT_CHANGE_L:
509 case DCCP_SOCKOPT_CHANGE_R:
510 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
511 return 0;
512 case DCCP_SOCKOPT_CCID:
513 case DCCP_SOCKOPT_RX_CCID:
514 case DCCP_SOCKOPT_TX_CCID:
515 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
516 }
517
518 if (optlen < (int)sizeof(int))
519 return -EINVAL;
520
521 if (get_user(val, (int __user *)optval))
522 return -EFAULT;
523
524 if (optname == DCCP_SOCKOPT_SERVICE)
525 return dccp_setsockopt_service(sk, val, optval, optlen);
526
527 lock_sock(sk);
528 switch (optname) {
529 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
530 if (dp->dccps_role != DCCP_ROLE_SERVER)
531 err = -EOPNOTSUPP;
532 else
533 dp->dccps_server_timewait = (val != 0);
534 break;
535 case DCCP_SOCKOPT_SEND_CSCOV:
536 err = dccp_setsockopt_cscov(sk, val, false);
537 break;
538 case DCCP_SOCKOPT_RECV_CSCOV:
539 err = dccp_setsockopt_cscov(sk, val, true);
540 break;
541 case DCCP_SOCKOPT_QPOLICY_ID:
542 if (sk->sk_state != DCCP_CLOSED)
543 err = -EISCONN;
544 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
545 err = -EINVAL;
546 else
547 dp->dccps_qpolicy = val;
548 break;
549 case DCCP_SOCKOPT_QPOLICY_TXQLEN:
550 if (val < 0)
551 err = -EINVAL;
552 else
553 dp->dccps_tx_qlen = val;
554 break;
555 default:
556 err = -ENOPROTOOPT;
557 break;
558 }
559 release_sock(sk);
560
561 return err;
562 }
563
564 int dccp_setsockopt(struct sock *sk, int level, int optname,
565 char __user *optval, unsigned int optlen)
566 {
567 if (level != SOL_DCCP)
568 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
569 optname, optval,
570 optlen);
571 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
572 }
573
574 EXPORT_SYMBOL_GPL(dccp_setsockopt);
575
576 #ifdef CONFIG_COMPAT
577 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
578 char __user *optval, unsigned int optlen)
579 {
580 if (level != SOL_DCCP)
581 return inet_csk_compat_setsockopt(sk, level, optname,
582 optval, optlen);
583 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
584 }
585
586 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
587 #endif
588
589 static int dccp_getsockopt_service(struct sock *sk, int len,
590 __be32 __user *optval,
591 int __user *optlen)
592 {
593 const struct dccp_sock *dp = dccp_sk(sk);
594 const struct dccp_service_list *sl;
595 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
596
597 lock_sock(sk);
598 if ((sl = dp->dccps_service_list) != NULL) {
599 slen = sl->dccpsl_nr * sizeof(u32);
600 total_len += slen;
601 }
602
603 err = -EINVAL;
604 if (total_len > len)
605 goto out;
606
607 err = 0;
608 if (put_user(total_len, optlen) ||
609 put_user(dp->dccps_service, optval) ||
610 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
611 err = -EFAULT;
612 out:
613 release_sock(sk);
614 return err;
615 }
616
617 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
618 char __user *optval, int __user *optlen)
619 {
620 struct dccp_sock *dp;
621 int val, len;
622
623 if (get_user(len, optlen))
624 return -EFAULT;
625
626 if (len < (int)sizeof(int))
627 return -EINVAL;
628
629 dp = dccp_sk(sk);
630
631 switch (optname) {
632 case DCCP_SOCKOPT_PACKET_SIZE:
633 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
634 return 0;
635 case DCCP_SOCKOPT_SERVICE:
636 return dccp_getsockopt_service(sk, len,
637 (__be32 __user *)optval, optlen);
638 case DCCP_SOCKOPT_GET_CUR_MPS:
639 val = dp->dccps_mss_cache;
640 break;
641 case DCCP_SOCKOPT_AVAILABLE_CCIDS:
642 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
643 case DCCP_SOCKOPT_TX_CCID:
644 val = ccid_get_current_tx_ccid(dp);
645 if (val < 0)
646 return -ENOPROTOOPT;
647 break;
648 case DCCP_SOCKOPT_RX_CCID:
649 val = ccid_get_current_rx_ccid(dp);
650 if (val < 0)
651 return -ENOPROTOOPT;
652 break;
653 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
654 val = dp->dccps_server_timewait;
655 break;
656 case DCCP_SOCKOPT_SEND_CSCOV:
657 val = dp->dccps_pcslen;
658 break;
659 case DCCP_SOCKOPT_RECV_CSCOV:
660 val = dp->dccps_pcrlen;
661 break;
662 case DCCP_SOCKOPT_QPOLICY_ID:
663 val = dp->dccps_qpolicy;
664 break;
665 case DCCP_SOCKOPT_QPOLICY_TXQLEN:
666 val = dp->dccps_tx_qlen;
667 break;
668 case 128 ... 191:
669 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
670 len, (u32 __user *)optval, optlen);
671 case 192 ... 255:
672 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
673 len, (u32 __user *)optval, optlen);
674 default:
675 return -ENOPROTOOPT;
676 }
677
678 len = sizeof(val);
679 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
680 return -EFAULT;
681
682 return 0;
683 }
684
685 int dccp_getsockopt(struct sock *sk, int level, int optname,
686 char __user *optval, int __user *optlen)
687 {
688 if (level != SOL_DCCP)
689 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
690 optname, optval,
691 optlen);
692 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
693 }
694
695 EXPORT_SYMBOL_GPL(dccp_getsockopt);
696
697 #ifdef CONFIG_COMPAT
698 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
699 char __user *optval, int __user *optlen)
700 {
701 if (level != SOL_DCCP)
702 return inet_csk_compat_getsockopt(sk, level, optname,
703 optval, optlen);
704 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
705 }
706
707 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
708 #endif
709
710 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
711 {
712 struct cmsghdr *cmsg;
713
714 /*
715 * Assign an (opaque) qpolicy priority value to skb->priority.
716 *
717 * We are overloading this skb field for use with the qpolicy subystem.
718 * The skb->priority is normally used for the SO_PRIORITY option, which
719 * is initialised from sk_priority. Since the assignment of sk_priority
720 * to skb->priority happens later (on layer 3), we overload this field
721 * for use with queueing priorities as long as the skb is on layer 4.
722 * The default priority value (if nothing is set) is 0.
723 */
724 skb->priority = 0;
725
726 for_each_cmsghdr(cmsg, msg) {
727 if (!CMSG_OK(msg, cmsg))
728 return -EINVAL;
729
730 if (cmsg->cmsg_level != SOL_DCCP)
731 continue;
732
733 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
734 !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
735 return -EINVAL;
736
737 switch (cmsg->cmsg_type) {
738 case DCCP_SCM_PRIORITY:
739 if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
740 return -EINVAL;
741 skb->priority = *(__u32 *)CMSG_DATA(cmsg);
742 break;
743 default:
744 return -EINVAL;
745 }
746 }
747 return 0;
748 }
749
750 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
751 {
752 const struct dccp_sock *dp = dccp_sk(sk);
753 const int flags = msg->msg_flags;
754 const int noblock = flags & MSG_DONTWAIT;
755 struct sk_buff *skb;
756 int rc, size;
757 long timeo;
758
759 if (len > dp->dccps_mss_cache)
760 return -EMSGSIZE;
761
762 lock_sock(sk);
763
764 if (dccp_qpolicy_full(sk)) {
765 rc = -EAGAIN;
766 goto out_release;
767 }
768
769 timeo = sock_sndtimeo(sk, noblock);
770
771 /*
772 * We have to use sk_stream_wait_connect here to set sk_write_pending,
773 * so that the trick in dccp_rcv_request_sent_state_process.
774 */
775 /* Wait for a connection to finish. */
776 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
777 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
778 goto out_release;
779
780 size = sk->sk_prot->max_header + len;
781 release_sock(sk);
782 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
783 lock_sock(sk);
784 if (skb == NULL)
785 goto out_release;
786
787 skb_reserve(skb, sk->sk_prot->max_header);
788 rc = memcpy_from_msg(skb_put(skb, len), msg, len);
789 if (rc != 0)
790 goto out_discard;
791
792 rc = dccp_msghdr_parse(msg, skb);
793 if (rc != 0)
794 goto out_discard;
795
796 dccp_qpolicy_push(sk, skb);
797 /*
798 * The xmit_timer is set if the TX CCID is rate-based and will expire
799 * when congestion control permits to release further packets into the
800 * network. Window-based CCIDs do not use this timer.
801 */
802 if (!timer_pending(&dp->dccps_xmit_timer))
803 dccp_write_xmit(sk);
804 out_release:
805 release_sock(sk);
806 return rc ? : len;
807 out_discard:
808 kfree_skb(skb);
809 goto out_release;
810 }
811
812 EXPORT_SYMBOL_GPL(dccp_sendmsg);
813
814 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
815 int flags, int *addr_len)
816 {
817 const struct dccp_hdr *dh;
818 long timeo;
819
820 lock_sock(sk);
821
822 if (sk->sk_state == DCCP_LISTEN) {
823 len = -ENOTCONN;
824 goto out;
825 }
826
827 timeo = sock_rcvtimeo(sk, nonblock);
828
829 do {
830 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
831
832 if (skb == NULL)
833 goto verify_sock_status;
834
835 dh = dccp_hdr(skb);
836
837 switch (dh->dccph_type) {
838 case DCCP_PKT_DATA:
839 case DCCP_PKT_DATAACK:
840 goto found_ok_skb;
841
842 case DCCP_PKT_CLOSE:
843 case DCCP_PKT_CLOSEREQ:
844 if (!(flags & MSG_PEEK))
845 dccp_finish_passive_close(sk);
846 /* fall through */
847 case DCCP_PKT_RESET:
848 dccp_pr_debug("found fin (%s) ok!\n",
849 dccp_packet_name(dh->dccph_type));
850 len = 0;
851 goto found_fin_ok;
852 default:
853 dccp_pr_debug("packet_type=%s\n",
854 dccp_packet_name(dh->dccph_type));
855 sk_eat_skb(sk, skb);
856 }
857 verify_sock_status:
858 if (sock_flag(sk, SOCK_DONE)) {
859 len = 0;
860 break;
861 }
862
863 if (sk->sk_err) {
864 len = sock_error(sk);
865 break;
866 }
867
868 if (sk->sk_shutdown & RCV_SHUTDOWN) {
869 len = 0;
870 break;
871 }
872
873 if (sk->sk_state == DCCP_CLOSED) {
874 if (!sock_flag(sk, SOCK_DONE)) {
875 /* This occurs when user tries to read
876 * from never connected socket.
877 */
878 len = -ENOTCONN;
879 break;
880 }
881 len = 0;
882 break;
883 }
884
885 if (!timeo) {
886 len = -EAGAIN;
887 break;
888 }
889
890 if (signal_pending(current)) {
891 len = sock_intr_errno(timeo);
892 break;
893 }
894
895 sk_wait_data(sk, &timeo, NULL);
896 continue;
897 found_ok_skb:
898 if (len > skb->len)
899 len = skb->len;
900 else if (len < skb->len)
901 msg->msg_flags |= MSG_TRUNC;
902
903 if (skb_copy_datagram_msg(skb, 0, msg, len)) {
904 /* Exception. Bailout! */
905 len = -EFAULT;
906 break;
907 }
908 if (flags & MSG_TRUNC)
909 len = skb->len;
910 found_fin_ok:
911 if (!(flags & MSG_PEEK))
912 sk_eat_skb(sk, skb);
913 break;
914 } while (1);
915 out:
916 release_sock(sk);
917 return len;
918 }
919
920 EXPORT_SYMBOL_GPL(dccp_recvmsg);
921
922 int inet_dccp_listen(struct socket *sock, int backlog)
923 {
924 struct sock *sk = sock->sk;
925 unsigned char old_state;
926 int err;
927
928 lock_sock(sk);
929
930 err = -EINVAL;
931 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
932 goto out;
933
934 old_state = sk->sk_state;
935 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
936 goto out;
937
938 /* Really, if the socket is already in listen state
939 * we can only allow the backlog to be adjusted.
940 */
941 if (old_state != DCCP_LISTEN) {
942 /*
943 * FIXME: here it probably should be sk->sk_prot->listen_start
944 * see tcp_listen_start
945 */
946 err = dccp_listen_start(sk, backlog);
947 if (err)
948 goto out;
949 }
950 sk->sk_max_ack_backlog = backlog;
951 err = 0;
952
953 out:
954 release_sock(sk);
955 return err;
956 }
957
958 EXPORT_SYMBOL_GPL(inet_dccp_listen);
959
960 static void dccp_terminate_connection(struct sock *sk)
961 {
962 u8 next_state = DCCP_CLOSED;
963
964 switch (sk->sk_state) {
965 case DCCP_PASSIVE_CLOSE:
966 case DCCP_PASSIVE_CLOSEREQ:
967 dccp_finish_passive_close(sk);
968 break;
969 case DCCP_PARTOPEN:
970 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
971 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
972 /* fall through */
973 case DCCP_OPEN:
974 dccp_send_close(sk, 1);
975
976 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
977 !dccp_sk(sk)->dccps_server_timewait)
978 next_state = DCCP_ACTIVE_CLOSEREQ;
979 else
980 next_state = DCCP_CLOSING;
981 /* fall through */
982 default:
983 dccp_set_state(sk, next_state);
984 }
985 }
986
987 void dccp_close(struct sock *sk, long timeout)
988 {
989 struct dccp_sock *dp = dccp_sk(sk);
990 struct sk_buff *skb;
991 u32 data_was_unread = 0;
992 int state;
993
994 lock_sock(sk);
995
996 sk->sk_shutdown = SHUTDOWN_MASK;
997
998 if (sk->sk_state == DCCP_LISTEN) {
999 dccp_set_state(sk, DCCP_CLOSED);
1000
1001 /* Special case. */
1002 inet_csk_listen_stop(sk);
1003
1004 goto adjudge_to_death;
1005 }
1006
1007 sk_stop_timer(sk, &dp->dccps_xmit_timer);
1008
1009 /*
1010 * We need to flush the recv. buffs. We do this only on the
1011 * descriptor close, not protocol-sourced closes, because the
1012 *reader process may not have drained the data yet!
1013 */
1014 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1015 data_was_unread += skb->len;
1016 __kfree_skb(skb);
1017 }
1018
1019 /* If socket has been already reset kill it. */
1020 if (sk->sk_state == DCCP_CLOSED)
1021 goto adjudge_to_death;
1022
1023 if (data_was_unread) {
1024 /* Unread data was tossed, send an appropriate Reset Code */
1025 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1026 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1027 dccp_set_state(sk, DCCP_CLOSED);
1028 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1029 /* Check zero linger _after_ checking for unread data. */
1030 sk->sk_prot->disconnect(sk, 0);
1031 } else if (sk->sk_state != DCCP_CLOSED) {
1032 /*
1033 * Normal connection termination. May need to wait if there are
1034 * still packets in the TX queue that are delayed by the CCID.
1035 */
1036 dccp_flush_write_queue(sk, &timeout);
1037 dccp_terminate_connection(sk);
1038 }
1039
1040 /*
1041 * Flush write queue. This may be necessary in several cases:
1042 * - we have been closed by the peer but still have application data;
1043 * - abortive termination (unread data or zero linger time),
1044 * - normal termination but queue could not be flushed within time limit
1045 */
1046 __skb_queue_purge(&sk->sk_write_queue);
1047
1048 sk_stream_wait_close(sk, timeout);
1049
1050 adjudge_to_death:
1051 state = sk->sk_state;
1052 sock_hold(sk);
1053 sock_orphan(sk);
1054
1055 /*
1056 * It is the last release_sock in its life. It will remove backlog.
1057 */
1058 release_sock(sk);
1059 /*
1060 * Now socket is owned by kernel and we acquire BH lock
1061 * to finish close. No need to check for user refs.
1062 */
1063 local_bh_disable();
1064 bh_lock_sock(sk);
1065 WARN_ON(sock_owned_by_user(sk));
1066
1067 percpu_counter_inc(sk->sk_prot->orphan_count);
1068
1069 /* Have we already been destroyed by a softirq or backlog? */
1070 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1071 goto out;
1072
1073 if (sk->sk_state == DCCP_CLOSED)
1074 inet_csk_destroy_sock(sk);
1075
1076 /* Otherwise, socket is reprieved until protocol close. */
1077
1078 out:
1079 bh_unlock_sock(sk);
1080 local_bh_enable();
1081 sock_put(sk);
1082 }
1083
1084 EXPORT_SYMBOL_GPL(dccp_close);
1085
1086 void dccp_shutdown(struct sock *sk, int how)
1087 {
1088 dccp_pr_debug("called shutdown(%x)\n", how);
1089 }
1090
1091 EXPORT_SYMBOL_GPL(dccp_shutdown);
1092
1093 static inline int __init dccp_mib_init(void)
1094 {
1095 dccp_statistics = alloc_percpu(struct dccp_mib);
1096 if (!dccp_statistics)
1097 return -ENOMEM;
1098 return 0;
1099 }
1100
1101 static inline void dccp_mib_exit(void)
1102 {
1103 free_percpu(dccp_statistics);
1104 }
1105
1106 static int thash_entries;
1107 module_param(thash_entries, int, 0444);
1108 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1109
1110 #ifdef CONFIG_IP_DCCP_DEBUG
1111 bool dccp_debug;
1112 module_param(dccp_debug, bool, 0644);
1113 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1114
1115 EXPORT_SYMBOL_GPL(dccp_debug);
1116 #endif
1117
1118 static int __init dccp_init(void)
1119 {
1120 unsigned long goal;
1121 int ehash_order, bhash_order, i;
1122 int rc;
1123
1124 BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1125 FIELD_SIZEOF(struct sk_buff, cb));
1126 rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
1127 if (rc)
1128 goto out_fail;
1129 rc = -ENOBUFS;
1130 inet_hashinfo_init(&dccp_hashinfo);
1131 dccp_hashinfo.bind_bucket_cachep =
1132 kmem_cache_create("dccp_bind_bucket",
1133 sizeof(struct inet_bind_bucket), 0,
1134 SLAB_HWCACHE_ALIGN, NULL);
1135 if (!dccp_hashinfo.bind_bucket_cachep)
1136 goto out_free_percpu;
1137
1138 /*
1139 * Size and allocate the main established and bind bucket
1140 * hash tables.
1141 *
1142 * The methodology is similar to that of the buffer cache.
1143 */
1144 if (totalram_pages >= (128 * 1024))
1145 goal = totalram_pages >> (21 - PAGE_SHIFT);
1146 else
1147 goal = totalram_pages >> (23 - PAGE_SHIFT);
1148
1149 if (thash_entries)
1150 goal = (thash_entries *
1151 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1152 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1153 ;
1154 do {
1155 unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1156 sizeof(struct inet_ehash_bucket);
1157
1158 while (hash_size & (hash_size - 1))
1159 hash_size--;
1160 dccp_hashinfo.ehash_mask = hash_size - 1;
1161 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1162 __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1163 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1164
1165 if (!dccp_hashinfo.ehash) {
1166 DCCP_CRIT("Failed to allocate DCCP established hash table");
1167 goto out_free_bind_bucket_cachep;
1168 }
1169
1170 for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1171 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1172
1173 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1174 goto out_free_dccp_ehash;
1175
1176 bhash_order = ehash_order;
1177
1178 do {
1179 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1180 sizeof(struct inet_bind_hashbucket);
1181 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1182 bhash_order > 0)
1183 continue;
1184 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1185 __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1186 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1187
1188 if (!dccp_hashinfo.bhash) {
1189 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1190 goto out_free_dccp_locks;
1191 }
1192
1193 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1194 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1195 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1196 }
1197
1198 rc = dccp_mib_init();
1199 if (rc)
1200 goto out_free_dccp_bhash;
1201
1202 rc = dccp_ackvec_init();
1203 if (rc)
1204 goto out_free_dccp_mib;
1205
1206 rc = dccp_sysctl_init();
1207 if (rc)
1208 goto out_ackvec_exit;
1209
1210 rc = ccid_initialize_builtins();
1211 if (rc)
1212 goto out_sysctl_exit;
1213
1214 dccp_timestamping_init();
1215
1216 return 0;
1217
1218 out_sysctl_exit:
1219 dccp_sysctl_exit();
1220 out_ackvec_exit:
1221 dccp_ackvec_exit();
1222 out_free_dccp_mib:
1223 dccp_mib_exit();
1224 out_free_dccp_bhash:
1225 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1226 out_free_dccp_locks:
1227 inet_ehash_locks_free(&dccp_hashinfo);
1228 out_free_dccp_ehash:
1229 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1230 out_free_bind_bucket_cachep:
1231 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1232 out_free_percpu:
1233 percpu_counter_destroy(&dccp_orphan_count);
1234 out_fail:
1235 dccp_hashinfo.bhash = NULL;
1236 dccp_hashinfo.ehash = NULL;
1237 dccp_hashinfo.bind_bucket_cachep = NULL;
1238 return rc;
1239 }
1240
1241 static void __exit dccp_fini(void)
1242 {
1243 ccid_cleanup_builtins();
1244 dccp_mib_exit();
1245 free_pages((unsigned long)dccp_hashinfo.bhash,
1246 get_order(dccp_hashinfo.bhash_size *
1247 sizeof(struct inet_bind_hashbucket)));
1248 free_pages((unsigned long)dccp_hashinfo.ehash,
1249 get_order((dccp_hashinfo.ehash_mask + 1) *
1250 sizeof(struct inet_ehash_bucket)));
1251 inet_ehash_locks_free(&dccp_hashinfo);
1252 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1253 dccp_ackvec_exit();
1254 dccp_sysctl_exit();
1255 percpu_counter_destroy(&dccp_orphan_count);
1256 }
1257
1258 module_init(dccp_init);
1259 module_exit(dccp_fini);
1260
1261 MODULE_LICENSE("GPL");
1262 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1263 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");