]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/dccp/proto.c
[DCCP] ipv4: make struct dccp_v4_prot static
[mirror_ubuntu-artful-kernel.git] / net / dccp / proto.c
CommitLineData
7c657876
ACM
1/*
2 * net/dccp/proto.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/config.h>
13#include <linux/dccp.h>
14#include <linux/module.h>
15#include <linux/types.h>
16#include <linux/sched.h>
17#include <linux/kernel.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/if_arp.h>
22#include <linux/init.h>
23#include <linux/random.h>
24#include <net/checksum.h>
25
14c85021 26#include <net/inet_sock.h>
7c657876
ACM
27#include <net/sock.h>
28#include <net/xfrm.h>
29
30#include <asm/semaphore.h>
31#include <linux/spinlock.h>
32#include <linux/timer.h>
33#include <linux/delay.h>
34#include <linux/poll.h>
7c657876
ACM
35
36#include "ccid.h"
37#include "dccp.h"
afe00251 38#include "feat.h"
7c657876 39
ba89966c 40DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
7c657876 41
f21e68ca
ACM
42EXPORT_SYMBOL_GPL(dccp_statistics);
43
7c657876
ACM
44atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
f21e68ca
ACM
46EXPORT_SYMBOL_GPL(dccp_orphan_count);
47
075ae866
ACM
48struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49 .lhash_lock = RW_LOCK_UNLOCKED,
50 .lhash_users = ATOMIC_INIT(0),
51 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
52};
53
54EXPORT_SYMBOL_GPL(dccp_hashinfo);
55
7c657876
ACM
56const char *dccp_packet_name(const int type)
57{
58 static const char *dccp_packet_names[] = {
59 [DCCP_PKT_REQUEST] = "REQUEST",
60 [DCCP_PKT_RESPONSE] = "RESPONSE",
61 [DCCP_PKT_DATA] = "DATA",
62 [DCCP_PKT_ACK] = "ACK",
63 [DCCP_PKT_DATAACK] = "DATAACK",
64 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
65 [DCCP_PKT_CLOSE] = "CLOSE",
66 [DCCP_PKT_RESET] = "RESET",
67 [DCCP_PKT_SYNC] = "SYNC",
68 [DCCP_PKT_SYNCACK] = "SYNCACK",
69 };
70
71 if (type >= DCCP_NR_PKT_TYPES)
72 return "INVALID";
73 else
74 return dccp_packet_names[type];
75}
76
77EXPORT_SYMBOL_GPL(dccp_packet_name);
78
79const char *dccp_state_name(const int state)
80{
81 static char *dccp_state_names[] = {
82 [DCCP_OPEN] = "OPEN",
83 [DCCP_REQUESTING] = "REQUESTING",
84 [DCCP_PARTOPEN] = "PARTOPEN",
85 [DCCP_LISTEN] = "LISTEN",
86 [DCCP_RESPOND] = "RESPOND",
87 [DCCP_CLOSING] = "CLOSING",
88 [DCCP_TIME_WAIT] = "TIME_WAIT",
89 [DCCP_CLOSED] = "CLOSED",
90 };
91
92 if (state >= DCCP_MAX_STATES)
93 return "INVALID STATE!";
94 else
95 return dccp_state_names[state];
96}
97
98EXPORT_SYMBOL_GPL(dccp_state_name);
99
c985ed70
ACM
100void dccp_hash(struct sock *sk)
101{
102 inet_hash(&dccp_hashinfo, sk);
103}
104
105EXPORT_SYMBOL_GPL(dccp_hash);
106
107void dccp_unhash(struct sock *sk)
108{
109 inet_unhash(&dccp_hashinfo, sk);
110}
111
112EXPORT_SYMBOL_GPL(dccp_unhash);
113
3e0fadc5
ACM
114int dccp_init_sock(struct sock *sk)
115{
116 struct dccp_sock *dp = dccp_sk(sk);
117 struct inet_connection_sock *icsk = inet_csk(sk);
118 static int dccp_ctl_socket_init = 1;
119
120 dccp_options_init(&dp->dccps_options);
121 do_gettimeofday(&dp->dccps_epoch);
122
123 /*
124 * FIXME: We're hardcoding the CCID, and doing this at this point makes
125 * the listening (master) sock get CCID control blocks, which is not
126 * necessary, but for now, to not mess with the test userspace apps,
127 * lets leave it here, later the real solution is to do this in a
128 * setsockopt(CCIDs-I-want/accept). -acme
129 */
130 if (likely(!dccp_ctl_socket_init)) {
131 int rc = dccp_feat_init(sk);
132
133 if (rc)
134 return rc;
135
136 if (dp->dccps_options.dccpo_send_ack_vector) {
137 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
138 if (dp->dccps_hc_rx_ackvec == NULL)
139 return -ENOMEM;
140 }
141 dp->dccps_hc_rx_ccid =
142 ccid_hc_rx_new(dp->dccps_options.dccpo_rx_ccid,
143 sk, GFP_KERNEL);
144 dp->dccps_hc_tx_ccid =
145 ccid_hc_tx_new(dp->dccps_options.dccpo_tx_ccid,
146 sk, GFP_KERNEL);
147 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
148 dp->dccps_hc_tx_ccid == NULL)) {
149 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
150 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
151 if (dp->dccps_options.dccpo_send_ack_vector) {
152 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
153 dp->dccps_hc_rx_ackvec = NULL;
154 }
155 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
156 return -ENOMEM;
157 }
158 } else {
159 /* control socket doesn't need feat nego */
160 INIT_LIST_HEAD(&dp->dccps_options.dccpo_pending);
161 INIT_LIST_HEAD(&dp->dccps_options.dccpo_conf);
162 dccp_ctl_socket_init = 0;
163 }
164
165 dccp_init_xmit_timers(sk);
166 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
167 sk->sk_state = DCCP_CLOSED;
168 sk->sk_write_space = dccp_write_space;
169 icsk->icsk_sync_mss = dccp_sync_mss;
170 dp->dccps_mss_cache = 536;
171 dp->dccps_role = DCCP_ROLE_UNDEFINED;
172 dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
173 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
174
175 return 0;
176}
177
178EXPORT_SYMBOL_GPL(dccp_init_sock);
179
180int dccp_destroy_sock(struct sock *sk)
181{
182 struct dccp_sock *dp = dccp_sk(sk);
183
184 /*
185 * DCCP doesn't use sk_write_queue, just sk_send_head
186 * for retransmissions
187 */
188 if (sk->sk_send_head != NULL) {
189 kfree_skb(sk->sk_send_head);
190 sk->sk_send_head = NULL;
191 }
192
193 /* Clean up a referenced DCCP bind bucket. */
194 if (inet_csk(sk)->icsk_bind_hash != NULL)
195 inet_put_port(&dccp_hashinfo, sk);
196
197 kfree(dp->dccps_service_list);
198 dp->dccps_service_list = NULL;
199
200 if (dp->dccps_options.dccpo_send_ack_vector) {
201 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
202 dp->dccps_hc_rx_ackvec = NULL;
203 }
204 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
205 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
206 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
207
208 /* clean up feature negotiation state */
209 dccp_feat_clean(sk);
210
211 return 0;
212}
213
214EXPORT_SYMBOL_GPL(dccp_destroy_sock);
215
7c657876
ACM
216static inline int dccp_listen_start(struct sock *sk)
217{
67e6b629
ACM
218 struct dccp_sock *dp = dccp_sk(sk);
219
220 dp->dccps_role = DCCP_ROLE_LISTEN;
221 /*
222 * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
223 * before calling listen()
224 */
225 if (dccp_service_not_initialized(sk))
226 return -EPROTO;
7c657876
ACM
227 return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
228}
229
230int dccp_disconnect(struct sock *sk, int flags)
231{
232 struct inet_connection_sock *icsk = inet_csk(sk);
233 struct inet_sock *inet = inet_sk(sk);
234 int err = 0;
235 const int old_state = sk->sk_state;
236
237 if (old_state != DCCP_CLOSED)
238 dccp_set_state(sk, DCCP_CLOSED);
239
240 /* ABORT function of RFC793 */
241 if (old_state == DCCP_LISTEN) {
242 inet_csk_listen_stop(sk);
243 /* FIXME: do the active reset thing */
244 } else if (old_state == DCCP_REQUESTING)
245 sk->sk_err = ECONNRESET;
246
247 dccp_clear_xmit_timers(sk);
248 __skb_queue_purge(&sk->sk_receive_queue);
249 if (sk->sk_send_head != NULL) {
250 __kfree_skb(sk->sk_send_head);
251 sk->sk_send_head = NULL;
252 }
253
254 inet->dport = 0;
255
256 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
257 inet_reset_saddr(sk);
258
259 sk->sk_shutdown = 0;
260 sock_reset_flag(sk, SOCK_DONE);
261
262 icsk->icsk_backoff = 0;
263 inet_csk_delack_init(sk);
264 __sk_dst_reset(sk);
265
266 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
267
268 sk->sk_error_report(sk);
269 return err;
270}
271
f21e68ca
ACM
272EXPORT_SYMBOL_GPL(dccp_disconnect);
273
331968bd
ACM
274/*
275 * Wait for a DCCP event.
276 *
277 * Note that we don't need to lock the socket, as the upper poll layers
278 * take care of normal races (between the test and the event) and we don't
279 * go look at any of the socket buffers directly.
280 */
f21e68ca
ACM
281unsigned int dccp_poll(struct file *file, struct socket *sock,
282 poll_table *wait)
331968bd
ACM
283{
284 unsigned int mask;
285 struct sock *sk = sock->sk;
286
287 poll_wait(file, sk->sk_sleep, wait);
288 if (sk->sk_state == DCCP_LISTEN)
289 return inet_csk_listen_poll(sk);
290
291 /* Socket is not locked. We are protected from async events
292 by poll logic and correct handling of state changes
293 made by another threads is impossible in any case.
294 */
295
296 mask = 0;
297 if (sk->sk_err)
298 mask = POLLERR;
299
300 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
301 mask |= POLLHUP;
302 if (sk->sk_shutdown & RCV_SHUTDOWN)
303 mask |= POLLIN | POLLRDNORM;
304
305 /* Connected? */
306 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
307 if (atomic_read(&sk->sk_rmem_alloc) > 0)
308 mask |= POLLIN | POLLRDNORM;
309
310 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
311 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
312 mask |= POLLOUT | POLLWRNORM;
313 } else { /* send SIGIO later */
314 set_bit(SOCK_ASYNC_NOSPACE,
315 &sk->sk_socket->flags);
316 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
317
318 /* Race breaker. If space is freed after
319 * wspace test but before the flags are set,
320 * IO signal will be lost.
321 */
322 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
323 mask |= POLLOUT | POLLWRNORM;
324 }
325 }
326 }
327 return mask;
328}
329
f21e68ca
ACM
330EXPORT_SYMBOL_GPL(dccp_poll);
331
7c657876
ACM
332int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
333{
334 dccp_pr_debug("entry\n");
335 return -ENOIOCTLCMD;
336}
337
f21e68ca
ACM
338EXPORT_SYMBOL_GPL(dccp_ioctl);
339
60fe62e7 340static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
67e6b629
ACM
341 char __user *optval, int optlen)
342{
343 struct dccp_sock *dp = dccp_sk(sk);
344 struct dccp_service_list *sl = NULL;
345
346 if (service == DCCP_SERVICE_INVALID_VALUE ||
347 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
348 return -EINVAL;
349
350 if (optlen > sizeof(service)) {
351 sl = kmalloc(optlen, GFP_KERNEL);
352 if (sl == NULL)
353 return -ENOMEM;
354
355 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
356 if (copy_from_user(sl->dccpsl_list,
357 optval + sizeof(service),
358 optlen - sizeof(service)) ||
359 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
360 kfree(sl);
361 return -EFAULT;
362 }
363 }
364
365 lock_sock(sk);
366 dp->dccps_service = service;
367
a51482bd 368 kfree(dp->dccps_service_list);
67e6b629
ACM
369
370 dp->dccps_service_list = sl;
371 release_sock(sk);
372 return 0;
373}
374
afe00251
AB
375/* byte 1 is feature. the rest is the preference list */
376static int dccp_setsockopt_change(struct sock *sk, int type,
377 struct dccp_so_feat __user *optval)
378{
379 struct dccp_so_feat opt;
380 u8 *val;
381 int rc;
382
383 if (copy_from_user(&opt, optval, sizeof(opt)))
384 return -EFAULT;
385
386 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
387 if (!val)
388 return -ENOMEM;
389
390 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
391 rc = -EFAULT;
392 goto out_free_val;
393 }
394
395 rc = dccp_feat_change(sk, type, opt.dccpsf_feat, val, opt.dccpsf_len,
396 GFP_KERNEL);
397 if (rc)
398 goto out_free_val;
399
400out:
401 return rc;
402
403out_free_val:
404 kfree(val);
405 goto out;
406}
407
7c657876 408int dccp_setsockopt(struct sock *sk, int level, int optname,
a1d3a355 409 char __user *optval, int optlen)
7c657876 410{
a84ffe43
ACM
411 struct dccp_sock *dp;
412 int err;
413 int val;
7c657876
ACM
414
415 if (level != SOL_DCCP)
57cca05a
ACM
416 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
417 optname, optval,
418 optlen);
7c657876 419
a84ffe43
ACM
420 if (optlen < sizeof(int))
421 return -EINVAL;
422
423 if (get_user(val, (int __user *)optval))
424 return -EFAULT;
425
67e6b629
ACM
426 if (optname == DCCP_SOCKOPT_SERVICE)
427 return dccp_setsockopt_service(sk, val, optval, optlen);
a84ffe43 428
67e6b629 429 lock_sock(sk);
a84ffe43
ACM
430 dp = dccp_sk(sk);
431 err = 0;
432
433 switch (optname) {
434 case DCCP_SOCKOPT_PACKET_SIZE:
435 dp->dccps_packet_size = val;
436 break;
afe00251
AB
437
438 case DCCP_SOCKOPT_CHANGE_L:
439 if (optlen != sizeof(struct dccp_so_feat))
440 err = -EINVAL;
441 else
442 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
443 (struct dccp_so_feat *)
444 optval);
445 break;
446
447 case DCCP_SOCKOPT_CHANGE_R:
448 if (optlen != sizeof(struct dccp_so_feat))
449 err = -EINVAL;
450 else
451 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
452 (struct dccp_so_feat *)
453 optval);
454 break;
455
a84ffe43
ACM
456 default:
457 err = -ENOPROTOOPT;
458 break;
459 }
460
461 release_sock(sk);
462 return err;
7c657876
ACM
463}
464
f21e68ca
ACM
465EXPORT_SYMBOL_GPL(dccp_setsockopt);
466
67e6b629 467static int dccp_getsockopt_service(struct sock *sk, int len,
60fe62e7 468 __be32 __user *optval,
67e6b629
ACM
469 int __user *optlen)
470{
471 const struct dccp_sock *dp = dccp_sk(sk);
472 const struct dccp_service_list *sl;
473 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
474
475 lock_sock(sk);
476 if (dccp_service_not_initialized(sk))
477 goto out;
478
479 if ((sl = dp->dccps_service_list) != NULL) {
480 slen = sl->dccpsl_nr * sizeof(u32);
481 total_len += slen;
482 }
483
484 err = -EINVAL;
485 if (total_len > len)
486 goto out;
487
488 err = 0;
489 if (put_user(total_len, optlen) ||
490 put_user(dp->dccps_service, optval) ||
491 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
492 err = -EFAULT;
493out:
494 release_sock(sk);
495 return err;
496}
497
7c657876 498int dccp_getsockopt(struct sock *sk, int level, int optname,
a1d3a355 499 char __user *optval, int __user *optlen)
7c657876 500{
a84ffe43
ACM
501 struct dccp_sock *dp;
502 int val, len;
7c657876
ACM
503
504 if (level != SOL_DCCP)
57cca05a
ACM
505 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
506 optname, optval,
507 optlen);
a84ffe43
ACM
508 if (get_user(len, optlen))
509 return -EFAULT;
510
88f964db 511 if (len < sizeof(int))
a84ffe43
ACM
512 return -EINVAL;
513
514 dp = dccp_sk(sk);
515
516 switch (optname) {
517 case DCCP_SOCKOPT_PACKET_SIZE:
518 val = dp->dccps_packet_size;
88f964db 519 len = sizeof(dp->dccps_packet_size);
a84ffe43 520 break;
88f964db
ACM
521 case DCCP_SOCKOPT_SERVICE:
522 return dccp_getsockopt_service(sk, len,
60fe62e7 523 (__be32 __user *)optval, optlen);
88f964db
ACM
524 case 128 ... 191:
525 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
526 len, (u32 __user *)optval, optlen);
527 case 192 ... 255:
528 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
529 len, (u32 __user *)optval, optlen);
a84ffe43
ACM
530 default:
531 return -ENOPROTOOPT;
532 }
533
534 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
535 return -EFAULT;
536
537 return 0;
7c657876
ACM
538}
539
f21e68ca
ACM
540EXPORT_SYMBOL_GPL(dccp_getsockopt);
541
7c657876
ACM
542int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
543 size_t len)
544{
545 const struct dccp_sock *dp = dccp_sk(sk);
546 const int flags = msg->msg_flags;
547 const int noblock = flags & MSG_DONTWAIT;
548 struct sk_buff *skb;
549 int rc, size;
550 long timeo;
551
552 if (len > dp->dccps_mss_cache)
553 return -EMSGSIZE;
554
555 lock_sock(sk);
27258ee5 556 timeo = sock_sndtimeo(sk, noblock);
7c657876
ACM
557
558 /*
559 * We have to use sk_stream_wait_connect here to set sk_write_pending,
560 * so that the trick in dccp_rcv_request_sent_state_process.
561 */
562 /* Wait for a connection to finish. */
563 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
564 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
27258ee5 565 goto out_release;
7c657876
ACM
566
567 size = sk->sk_prot->max_header + len;
568 release_sock(sk);
569 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
570 lock_sock(sk);
7c657876
ACM
571 if (skb == NULL)
572 goto out_release;
573
574 skb_reserve(skb, sk->sk_prot->max_header);
575 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
27258ee5
ACM
576 if (rc != 0)
577 goto out_discard;
578
d6809c12 579 rc = dccp_write_xmit(sk, skb, &timeo);
20472af9
ACM
580 /*
581 * XXX we don't use sk_write_queue, so just discard the packet.
582 * Current plan however is to _use_ sk_write_queue with
583 * an algorith similar to tcp_sendmsg, where the main difference
584 * is that in DCCP we have to respect packet boundaries, so
585 * no coalescing of skbs.
586 *
587 * This bug was _quickly_ found & fixed by just looking at an OSTRA
588 * generated callgraph 8) -acme
589 */
7c657876
ACM
590out_release:
591 release_sock(sk);
592 return rc ? : len;
27258ee5
ACM
593out_discard:
594 kfree_skb(skb);
7c657876 595 goto out_release;
7c657876
ACM
596}
597
f21e68ca
ACM
598EXPORT_SYMBOL_GPL(dccp_sendmsg);
599
7c657876
ACM
600int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
601 size_t len, int nonblock, int flags, int *addr_len)
602{
603 const struct dccp_hdr *dh;
7c657876
ACM
604 long timeo;
605
606 lock_sock(sk);
607
531669a0
ACM
608 if (sk->sk_state == DCCP_LISTEN) {
609 len = -ENOTCONN;
7c657876 610 goto out;
7c657876 611 }
7c657876 612
531669a0 613 timeo = sock_rcvtimeo(sk, nonblock);
7c657876
ACM
614
615 do {
531669a0 616 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
7c657876 617
531669a0
ACM
618 if (skb == NULL)
619 goto verify_sock_status;
7c657876 620
531669a0 621 dh = dccp_hdr(skb);
7c657876 622
531669a0
ACM
623 if (dh->dccph_type == DCCP_PKT_DATA ||
624 dh->dccph_type == DCCP_PKT_DATAACK)
625 goto found_ok_skb;
7c657876 626
531669a0
ACM
627 if (dh->dccph_type == DCCP_PKT_RESET ||
628 dh->dccph_type == DCCP_PKT_CLOSE) {
629 dccp_pr_debug("found fin ok!\n");
630 len = 0;
631 goto found_fin_ok;
632 }
633 dccp_pr_debug("packet_type=%s\n",
634 dccp_packet_name(dh->dccph_type));
635 sk_eat_skb(sk, skb);
636verify_sock_status:
637 if (sock_flag(sk, SOCK_DONE)) {
638 len = 0;
7c657876 639 break;
531669a0 640 }
7c657876 641
531669a0
ACM
642 if (sk->sk_err) {
643 len = sock_error(sk);
644 break;
645 }
7c657876 646
531669a0
ACM
647 if (sk->sk_shutdown & RCV_SHUTDOWN) {
648 len = 0;
649 break;
650 }
7c657876 651
531669a0
ACM
652 if (sk->sk_state == DCCP_CLOSED) {
653 if (!sock_flag(sk, SOCK_DONE)) {
654 /* This occurs when user tries to read
655 * from never connected socket.
656 */
657 len = -ENOTCONN;
7c657876
ACM
658 break;
659 }
531669a0
ACM
660 len = 0;
661 break;
7c657876
ACM
662 }
663
531669a0
ACM
664 if (!timeo) {
665 len = -EAGAIN;
666 break;
667 }
7c657876 668
531669a0
ACM
669 if (signal_pending(current)) {
670 len = sock_intr_errno(timeo);
671 break;
672 }
7c657876 673
531669a0 674 sk_wait_data(sk, &timeo);
7c657876 675 continue;
7c657876 676 found_ok_skb:
531669a0
ACM
677 if (len > skb->len)
678 len = skb->len;
679 else if (len < skb->len)
680 msg->msg_flags |= MSG_TRUNC;
681
682 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
683 /* Exception. Bailout! */
684 len = -EFAULT;
685 break;
7c657876 686 }
7c657876
ACM
687 found_fin_ok:
688 if (!(flags & MSG_PEEK))
689 sk_eat_skb(sk, skb);
690 break;
531669a0 691 } while (1);
7c657876
ACM
692out:
693 release_sock(sk);
531669a0 694 return len;
7c657876
ACM
695}
696
f21e68ca
ACM
697EXPORT_SYMBOL_GPL(dccp_recvmsg);
698
699int inet_dccp_listen(struct socket *sock, int backlog)
7c657876
ACM
700{
701 struct sock *sk = sock->sk;
702 unsigned char old_state;
703 int err;
704
705 lock_sock(sk);
706
707 err = -EINVAL;
708 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
709 goto out;
710
711 old_state = sk->sk_state;
712 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
713 goto out;
714
715 /* Really, if the socket is already in listen state
716 * we can only allow the backlog to be adjusted.
717 */
718 if (old_state != DCCP_LISTEN) {
719 /*
720 * FIXME: here it probably should be sk->sk_prot->listen_start
721 * see tcp_listen_start
722 */
723 err = dccp_listen_start(sk);
724 if (err)
725 goto out;
726 }
727 sk->sk_max_ack_backlog = backlog;
728 err = 0;
729
730out:
731 release_sock(sk);
732 return err;
733}
734
f21e68ca
ACM
735EXPORT_SYMBOL_GPL(inet_dccp_listen);
736
7c657876 737static const unsigned char dccp_new_state[] = {
7690af3f
ACM
738 /* current state: new state: action: */
739 [0] = DCCP_CLOSED,
740 [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
741 [DCCP_REQUESTING] = DCCP_CLOSED,
742 [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
743 [DCCP_LISTEN] = DCCP_CLOSED,
744 [DCCP_RESPOND] = DCCP_CLOSED,
745 [DCCP_CLOSING] = DCCP_CLOSED,
746 [DCCP_TIME_WAIT] = DCCP_CLOSED,
747 [DCCP_CLOSED] = DCCP_CLOSED,
7c657876
ACM
748};
749
750static int dccp_close_state(struct sock *sk)
751{
752 const int next = dccp_new_state[sk->sk_state];
753 const int ns = next & DCCP_STATE_MASK;
754
755 if (ns != sk->sk_state)
756 dccp_set_state(sk, ns);
757
758 return next & DCCP_ACTION_FIN;
759}
760
761void dccp_close(struct sock *sk, long timeout)
762{
763 struct sk_buff *skb;
764
765 lock_sock(sk);
766
767 sk->sk_shutdown = SHUTDOWN_MASK;
768
769 if (sk->sk_state == DCCP_LISTEN) {
770 dccp_set_state(sk, DCCP_CLOSED);
771
772 /* Special case. */
773 inet_csk_listen_stop(sk);
774
775 goto adjudge_to_death;
776 }
777
778 /*
779 * We need to flush the recv. buffs. We do this only on the
780 * descriptor close, not protocol-sourced closes, because the
781 *reader process may not have drained the data yet!
782 */
783 /* FIXME: check for unread data */
784 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
785 __kfree_skb(skb);
786 }
787
788 if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
789 /* Check zero linger _after_ checking for unread data. */
790 sk->sk_prot->disconnect(sk, 0);
791 } else if (dccp_close_state(sk)) {
7ad07e7c 792 dccp_send_close(sk, 1);
7c657876
ACM
793 }
794
795 sk_stream_wait_close(sk, timeout);
796
797adjudge_to_death:
7ad07e7c
ACM
798 /*
799 * It is the last release_sock in its life. It will remove backlog.
800 */
7c657876
ACM
801 release_sock(sk);
802 /*
803 * Now socket is owned by kernel and we acquire BH lock
804 * to finish close. No need to check for user refs.
805 */
806 local_bh_disable();
807 bh_lock_sock(sk);
808 BUG_TRAP(!sock_owned_by_user(sk));
809
810 sock_hold(sk);
811 sock_orphan(sk);
7ad07e7c
ACM
812
813 /*
814 * The last release_sock may have processed the CLOSE or RESET
815 * packet moving sock to CLOSED state, if not we have to fire
816 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
817 * in draft-ietf-dccp-spec-11. -acme
818 */
819 if (sk->sk_state == DCCP_CLOSING) {
820 /* FIXME: should start at 2 * RTT */
821 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
822 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
823 inet_csk(sk)->icsk_rto,
824 DCCP_RTO_MAX);
825#if 0
826 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
7c657876 827 dccp_set_state(sk, DCCP_CLOSED);
7ad07e7c
ACM
828#endif
829 }
7c657876 830
7ad07e7c 831 atomic_inc(sk->sk_prot->orphan_count);
7c657876
ACM
832 if (sk->sk_state == DCCP_CLOSED)
833 inet_csk_destroy_sock(sk);
834
835 /* Otherwise, socket is reprieved until protocol close. */
836
837 bh_unlock_sock(sk);
838 local_bh_enable();
839 sock_put(sk);
840}
841
f21e68ca
ACM
842EXPORT_SYMBOL_GPL(dccp_close);
843
7c657876
ACM
844void dccp_shutdown(struct sock *sk, int how)
845{
846 dccp_pr_debug("entry\n");
847}
848
f21e68ca
ACM
849EXPORT_SYMBOL_GPL(dccp_shutdown);
850
46f09ffa 851static int __init dccp_mib_init(void)
7c657876
ACM
852{
853 int rc = -ENOMEM;
854
855 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
856 if (dccp_statistics[0] == NULL)
857 goto out;
858
859 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
860 if (dccp_statistics[1] == NULL)
861 goto out_free_one;
862
863 rc = 0;
864out:
865 return rc;
866out_free_one:
867 free_percpu(dccp_statistics[0]);
868 dccp_statistics[0] = NULL;
869 goto out;
870
871}
872
b61fafc4 873static void dccp_mib_exit(void)
46f09ffa
ACM
874{
875 free_percpu(dccp_statistics[0]);
876 free_percpu(dccp_statistics[1]);
877 dccp_statistics[0] = dccp_statistics[1] = NULL;
878}
879
7c657876
ACM
880static int thash_entries;
881module_param(thash_entries, int, 0444);
882MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
883
a1d3a355 884#ifdef CONFIG_IP_DCCP_DEBUG
7c657876
ACM
885int dccp_debug;
886module_param(dccp_debug, int, 0444);
887MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
f21e68ca
ACM
888
889EXPORT_SYMBOL_GPL(dccp_debug);
a1d3a355 890#endif
7c657876
ACM
891
892static int __init dccp_init(void)
893{
894 unsigned long goal;
895 int ehash_order, bhash_order, i;
b61fafc4 896 int rc = -ENOBUFS;
7c657876 897
7690af3f
ACM
898 dccp_hashinfo.bind_bucket_cachep =
899 kmem_cache_create("dccp_bind_bucket",
900 sizeof(struct inet_bind_bucket), 0,
901 SLAB_HWCACHE_ALIGN, NULL, NULL);
7c657876 902 if (!dccp_hashinfo.bind_bucket_cachep)
b61fafc4 903 goto out;
7c657876
ACM
904
905 /*
906 * Size and allocate the main established and bind bucket
907 * hash tables.
908 *
909 * The methodology is similar to that of the buffer cache.
910 */
911 if (num_physpages >= (128 * 1024))
912 goal = num_physpages >> (21 - PAGE_SHIFT);
913 else
914 goal = num_physpages >> (23 - PAGE_SHIFT);
915
916 if (thash_entries)
7690af3f
ACM
917 goal = (thash_entries *
918 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
7c657876
ACM
919 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
920 ;
921 do {
922 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
923 sizeof(struct inet_ehash_bucket);
924 dccp_hashinfo.ehash_size >>= 1;
7690af3f
ACM
925 while (dccp_hashinfo.ehash_size &
926 (dccp_hashinfo.ehash_size - 1))
7c657876
ACM
927 dccp_hashinfo.ehash_size--;
928 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
929 __get_free_pages(GFP_ATOMIC, ehash_order);
930 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
931
932 if (!dccp_hashinfo.ehash) {
933 printk(KERN_CRIT "Failed to allocate DCCP "
934 "established hash table\n");
935 goto out_free_bind_bucket_cachep;
936 }
937
938 for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
939 rwlock_init(&dccp_hashinfo.ehash[i].lock);
940 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
941 }
942
943 bhash_order = ehash_order;
944
945 do {
946 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
947 sizeof(struct inet_bind_hashbucket);
7690af3f
ACM
948 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
949 bhash_order > 0)
7c657876
ACM
950 continue;
951 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
952 __get_free_pages(GFP_ATOMIC, bhash_order);
953 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
954
955 if (!dccp_hashinfo.bhash) {
956 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
957 goto out_free_dccp_ehash;
958 }
959
960 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
961 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
962 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
963 }
964
46f09ffa 965 rc = dccp_mib_init();
fa23e2ec 966 if (rc)
7c657876
ACM
967 goto out_free_dccp_bhash;
968
9b07ef5d 969 rc = dccp_ackvec_init();
7c657876 970 if (rc)
b61fafc4 971 goto out_free_dccp_mib;
9b07ef5d 972
e55d912f 973 rc = dccp_sysctl_init();
9b07ef5d
ACM
974 if (rc)
975 goto out_ackvec_exit;
7c657876
ACM
976out:
977 return rc;
9b07ef5d
ACM
978out_ackvec_exit:
979 dccp_ackvec_exit();
b61fafc4 980out_free_dccp_mib:
46f09ffa 981 dccp_mib_exit();
7c657876
ACM
982out_free_dccp_bhash:
983 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
984 dccp_hashinfo.bhash = NULL;
985out_free_dccp_ehash:
986 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
987 dccp_hashinfo.ehash = NULL;
988out_free_bind_bucket_cachep:
989 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
990 dccp_hashinfo.bind_bucket_cachep = NULL;
7c657876
ACM
991 goto out;
992}
993
7c657876
ACM
994static void __exit dccp_fini(void)
995{
46f09ffa 996 dccp_mib_exit();
725ba8ee
ACM
997 free_pages((unsigned long)dccp_hashinfo.bhash,
998 get_order(dccp_hashinfo.bhash_size *
999 sizeof(struct inet_bind_hashbucket)));
1000 free_pages((unsigned long)dccp_hashinfo.ehash,
1001 get_order(dccp_hashinfo.ehash_size *
1002 sizeof(struct inet_ehash_bucket)));
7c657876 1003 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
9b07ef5d 1004 dccp_ackvec_exit();
e55d912f 1005 dccp_sysctl_exit();
7c657876
ACM
1006}
1007
1008module_init(dccp_init);
1009module_exit(dccp_fini);
1010
7c657876
ACM
1011MODULE_LICENSE("GPL");
1012MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1013MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");