4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
25 #include <net/inet_sock.h>
29 #include <asm/ioctls.h>
30 #include <asm/semaphore.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
40 DEFINE_SNMP_STAT(struct dccp_mib
, dccp_statistics
) __read_mostly
;
42 EXPORT_SYMBOL_GPL(dccp_statistics
);
44 atomic_t dccp_orphan_count
= ATOMIC_INIT(0);
46 EXPORT_SYMBOL_GPL(dccp_orphan_count
);
48 struct inet_hashinfo __cacheline_aligned dccp_hashinfo
= {
49 .lhash_lock
= RW_LOCK_UNLOCKED
,
50 .lhash_users
= ATOMIC_INIT(0),
51 .lhash_wait
= __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo
.lhash_wait
),
54 EXPORT_SYMBOL_GPL(dccp_hashinfo
);
56 /* the maximum queue length for tx in packets. 0 is no limit */
57 int sysctl_dccp_tx_qlen __read_mostly
= 5;
59 void dccp_set_state(struct sock
*sk
, const int state
)
61 const int oldstate
= sk
->sk_state
;
63 dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
65 dccp_state_name(oldstate
), dccp_state_name(state
));
66 WARN_ON(state
== oldstate
);
70 if (oldstate
!= DCCP_OPEN
)
71 DCCP_INC_STATS(DCCP_MIB_CURRESTAB
);
75 if (oldstate
== DCCP_CLOSING
|| oldstate
== DCCP_OPEN
)
76 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS
);
78 sk
->sk_prot
->unhash(sk
);
79 if (inet_csk(sk
)->icsk_bind_hash
!= NULL
&&
80 !(sk
->sk_userlocks
& SOCK_BINDPORT_LOCK
))
81 inet_put_port(&dccp_hashinfo
, sk
);
84 if (oldstate
== DCCP_OPEN
)
85 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB
);
88 /* Change state AFTER socket is unhashed to avoid closed
89 * socket sitting in hash tables.
94 EXPORT_SYMBOL_GPL(dccp_set_state
);
96 void dccp_done(struct sock
*sk
)
98 dccp_set_state(sk
, DCCP_CLOSED
);
99 dccp_clear_xmit_timers(sk
);
101 sk
->sk_shutdown
= SHUTDOWN_MASK
;
103 if (!sock_flag(sk
, SOCK_DEAD
))
104 sk
->sk_state_change(sk
);
106 inet_csk_destroy_sock(sk
);
109 EXPORT_SYMBOL_GPL(dccp_done
);
111 const char *dccp_packet_name(const int type
)
113 static const char *dccp_packet_names
[] = {
114 [DCCP_PKT_REQUEST
] = "REQUEST",
115 [DCCP_PKT_RESPONSE
] = "RESPONSE",
116 [DCCP_PKT_DATA
] = "DATA",
117 [DCCP_PKT_ACK
] = "ACK",
118 [DCCP_PKT_DATAACK
] = "DATAACK",
119 [DCCP_PKT_CLOSEREQ
] = "CLOSEREQ",
120 [DCCP_PKT_CLOSE
] = "CLOSE",
121 [DCCP_PKT_RESET
] = "RESET",
122 [DCCP_PKT_SYNC
] = "SYNC",
123 [DCCP_PKT_SYNCACK
] = "SYNCACK",
126 if (type
>= DCCP_NR_PKT_TYPES
)
129 return dccp_packet_names
[type
];
132 EXPORT_SYMBOL_GPL(dccp_packet_name
);
134 const char *dccp_state_name(const int state
)
136 static char *dccp_state_names
[] = {
137 [DCCP_OPEN
] = "OPEN",
138 [DCCP_REQUESTING
] = "REQUESTING",
139 [DCCP_PARTOPEN
] = "PARTOPEN",
140 [DCCP_LISTEN
] = "LISTEN",
141 [DCCP_RESPOND
] = "RESPOND",
142 [DCCP_CLOSING
] = "CLOSING",
143 [DCCP_TIME_WAIT
] = "TIME_WAIT",
144 [DCCP_CLOSED
] = "CLOSED",
147 if (state
>= DCCP_MAX_STATES
)
148 return "INVALID STATE!";
150 return dccp_state_names
[state
];
153 EXPORT_SYMBOL_GPL(dccp_state_name
);
155 void dccp_hash(struct sock
*sk
)
157 inet_hash(&dccp_hashinfo
, sk
);
160 EXPORT_SYMBOL_GPL(dccp_hash
);
162 void dccp_unhash(struct sock
*sk
)
164 inet_unhash(&dccp_hashinfo
, sk
);
167 EXPORT_SYMBOL_GPL(dccp_unhash
);
169 int dccp_init_sock(struct sock
*sk
, const __u8 ctl_sock_initialized
)
171 struct dccp_sock
*dp
= dccp_sk(sk
);
172 struct dccp_minisock
*dmsk
= dccp_msk(sk
);
173 struct inet_connection_sock
*icsk
= inet_csk(sk
);
175 dccp_minisock_init(&dp
->dccps_minisock
);
177 icsk
->icsk_rto
= DCCP_TIMEOUT_INIT
;
178 icsk
->icsk_syn_retries
= sysctl_dccp_request_retries
;
179 sk
->sk_state
= DCCP_CLOSED
;
180 sk
->sk_write_space
= dccp_write_space
;
181 icsk
->icsk_sync_mss
= dccp_sync_mss
;
182 dp
->dccps_mss_cache
= 536;
183 dp
->dccps_rate_last
= jiffies
;
184 dp
->dccps_role
= DCCP_ROLE_UNDEFINED
;
185 dp
->dccps_service
= DCCP_SERVICE_CODE_IS_ABSENT
;
186 dp
->dccps_l_ack_ratio
= dp
->dccps_r_ack_ratio
= 1;
188 dccp_init_xmit_timers(sk
);
191 * FIXME: We're hardcoding the CCID, and doing this at this point makes
192 * the listening (master) sock get CCID control blocks, which is not
193 * necessary, but for now, to not mess with the test userspace apps,
194 * lets leave it here, later the real solution is to do this in a
195 * setsockopt(CCIDs-I-want/accept). -acme
197 if (likely(ctl_sock_initialized
)) {
198 int rc
= dccp_feat_init(dmsk
);
203 if (dmsk
->dccpms_send_ack_vector
) {
204 dp
->dccps_hc_rx_ackvec
= dccp_ackvec_alloc(GFP_KERNEL
);
205 if (dp
->dccps_hc_rx_ackvec
== NULL
)
208 dp
->dccps_hc_rx_ccid
= ccid_hc_rx_new(dmsk
->dccpms_rx_ccid
,
210 dp
->dccps_hc_tx_ccid
= ccid_hc_tx_new(dmsk
->dccpms_tx_ccid
,
212 if (unlikely(dp
->dccps_hc_rx_ccid
== NULL
||
213 dp
->dccps_hc_tx_ccid
== NULL
)) {
214 ccid_hc_rx_delete(dp
->dccps_hc_rx_ccid
, sk
);
215 ccid_hc_tx_delete(dp
->dccps_hc_tx_ccid
, sk
);
216 if (dmsk
->dccpms_send_ack_vector
) {
217 dccp_ackvec_free(dp
->dccps_hc_rx_ackvec
);
218 dp
->dccps_hc_rx_ackvec
= NULL
;
220 dp
->dccps_hc_rx_ccid
= dp
->dccps_hc_tx_ccid
= NULL
;
224 /* control socket doesn't need feat nego */
225 INIT_LIST_HEAD(&dmsk
->dccpms_pending
);
226 INIT_LIST_HEAD(&dmsk
->dccpms_conf
);
232 EXPORT_SYMBOL_GPL(dccp_init_sock
);
234 int dccp_destroy_sock(struct sock
*sk
)
236 struct dccp_sock
*dp
= dccp_sk(sk
);
237 struct dccp_minisock
*dmsk
= dccp_msk(sk
);
240 * DCCP doesn't use sk_write_queue, just sk_send_head
241 * for retransmissions
243 if (sk
->sk_send_head
!= NULL
) {
244 kfree_skb(sk
->sk_send_head
);
245 sk
->sk_send_head
= NULL
;
248 /* Clean up a referenced DCCP bind bucket. */
249 if (inet_csk(sk
)->icsk_bind_hash
!= NULL
)
250 inet_put_port(&dccp_hashinfo
, sk
);
252 kfree(dp
->dccps_service_list
);
253 dp
->dccps_service_list
= NULL
;
255 if (dmsk
->dccpms_send_ack_vector
) {
256 dccp_ackvec_free(dp
->dccps_hc_rx_ackvec
);
257 dp
->dccps_hc_rx_ackvec
= NULL
;
259 ccid_hc_rx_delete(dp
->dccps_hc_rx_ccid
, sk
);
260 ccid_hc_tx_delete(dp
->dccps_hc_tx_ccid
, sk
);
261 dp
->dccps_hc_rx_ccid
= dp
->dccps_hc_tx_ccid
= NULL
;
263 /* clean up feature negotiation state */
264 dccp_feat_clean(dmsk
);
269 EXPORT_SYMBOL_GPL(dccp_destroy_sock
);
271 static inline int dccp_listen_start(struct sock
*sk
, int backlog
)
273 struct dccp_sock
*dp
= dccp_sk(sk
);
275 dp
->dccps_role
= DCCP_ROLE_LISTEN
;
276 return inet_csk_listen_start(sk
, backlog
);
279 int dccp_disconnect(struct sock
*sk
, int flags
)
281 struct inet_connection_sock
*icsk
= inet_csk(sk
);
282 struct inet_sock
*inet
= inet_sk(sk
);
284 const int old_state
= sk
->sk_state
;
286 if (old_state
!= DCCP_CLOSED
)
287 dccp_set_state(sk
, DCCP_CLOSED
);
289 /* ABORT function of RFC793 */
290 if (old_state
== DCCP_LISTEN
) {
291 inet_csk_listen_stop(sk
);
292 /* FIXME: do the active reset thing */
293 } else if (old_state
== DCCP_REQUESTING
)
294 sk
->sk_err
= ECONNRESET
;
296 dccp_clear_xmit_timers(sk
);
297 __skb_queue_purge(&sk
->sk_receive_queue
);
298 if (sk
->sk_send_head
!= NULL
) {
299 __kfree_skb(sk
->sk_send_head
);
300 sk
->sk_send_head
= NULL
;
305 if (!(sk
->sk_userlocks
& SOCK_BINDADDR_LOCK
))
306 inet_reset_saddr(sk
);
309 sock_reset_flag(sk
, SOCK_DONE
);
311 icsk
->icsk_backoff
= 0;
312 inet_csk_delack_init(sk
);
315 BUG_TRAP(!inet
->num
|| icsk
->icsk_bind_hash
);
317 sk
->sk_error_report(sk
);
321 EXPORT_SYMBOL_GPL(dccp_disconnect
);
324 * Wait for a DCCP event.
326 * Note that we don't need to lock the socket, as the upper poll layers
327 * take care of normal races (between the test and the event) and we don't
328 * go look at any of the socket buffers directly.
330 unsigned int dccp_poll(struct file
*file
, struct socket
*sock
,
334 struct sock
*sk
= sock
->sk
;
336 poll_wait(file
, sk
->sk_sleep
, wait
);
337 if (sk
->sk_state
== DCCP_LISTEN
)
338 return inet_csk_listen_poll(sk
);
340 /* Socket is not locked. We are protected from async events
341 by poll logic and correct handling of state changes
342 made by another threads is impossible in any case.
349 if (sk
->sk_shutdown
== SHUTDOWN_MASK
|| sk
->sk_state
== DCCP_CLOSED
)
351 if (sk
->sk_shutdown
& RCV_SHUTDOWN
)
352 mask
|= POLLIN
| POLLRDNORM
| POLLRDHUP
;
355 if ((1 << sk
->sk_state
) & ~(DCCPF_REQUESTING
| DCCPF_RESPOND
)) {
356 if (atomic_read(&sk
->sk_rmem_alloc
) > 0)
357 mask
|= POLLIN
| POLLRDNORM
;
359 if (!(sk
->sk_shutdown
& SEND_SHUTDOWN
)) {
360 if (sk_stream_wspace(sk
) >= sk_stream_min_wspace(sk
)) {
361 mask
|= POLLOUT
| POLLWRNORM
;
362 } else { /* send SIGIO later */
363 set_bit(SOCK_ASYNC_NOSPACE
,
364 &sk
->sk_socket
->flags
);
365 set_bit(SOCK_NOSPACE
, &sk
->sk_socket
->flags
);
367 /* Race breaker. If space is freed after
368 * wspace test but before the flags are set,
369 * IO signal will be lost.
371 if (sk_stream_wspace(sk
) >= sk_stream_min_wspace(sk
))
372 mask
|= POLLOUT
| POLLWRNORM
;
379 EXPORT_SYMBOL_GPL(dccp_poll
);
381 int dccp_ioctl(struct sock
*sk
, int cmd
, unsigned long arg
)
387 if (sk
->sk_state
== DCCP_LISTEN
)
393 unsigned long amount
= 0;
395 skb
= skb_peek(&sk
->sk_receive_queue
);
398 * We will only return the amount of this packet since
399 * that is all that will be read.
403 rc
= put_user(amount
, (int __user
*)arg
);
415 EXPORT_SYMBOL_GPL(dccp_ioctl
);
417 static int dccp_setsockopt_service(struct sock
*sk
, const __be32 service
,
418 char __user
*optval
, int optlen
)
420 struct dccp_sock
*dp
= dccp_sk(sk
);
421 struct dccp_service_list
*sl
= NULL
;
423 if (service
== DCCP_SERVICE_INVALID_VALUE
||
424 optlen
> DCCP_SERVICE_LIST_MAX_LEN
* sizeof(u32
))
427 if (optlen
> sizeof(service
)) {
428 sl
= kmalloc(optlen
, GFP_KERNEL
);
432 sl
->dccpsl_nr
= optlen
/ sizeof(u32
) - 1;
433 if (copy_from_user(sl
->dccpsl_list
,
434 optval
+ sizeof(service
),
435 optlen
- sizeof(service
)) ||
436 dccp_list_has_service(sl
, DCCP_SERVICE_INVALID_VALUE
)) {
443 dp
->dccps_service
= service
;
445 kfree(dp
->dccps_service_list
);
447 dp
->dccps_service_list
= sl
;
452 /* byte 1 is feature. the rest is the preference list */
453 static int dccp_setsockopt_change(struct sock
*sk
, int type
,
454 struct dccp_so_feat __user
*optval
)
456 struct dccp_so_feat opt
;
460 if (copy_from_user(&opt
, optval
, sizeof(opt
)))
463 val
= kmalloc(opt
.dccpsf_len
, GFP_KERNEL
);
467 if (copy_from_user(val
, opt
.dccpsf_val
, opt
.dccpsf_len
)) {
472 rc
= dccp_feat_change(dccp_msk(sk
), type
, opt
.dccpsf_feat
,
473 val
, opt
.dccpsf_len
, GFP_KERNEL
);
485 static int do_dccp_setsockopt(struct sock
*sk
, int level
, int optname
,
486 char __user
*optval
, int optlen
)
488 struct dccp_sock
*dp
= dccp_sk(sk
);
491 if (optlen
< sizeof(int))
494 if (get_user(val
, (int __user
*)optval
))
497 if (optname
== DCCP_SOCKOPT_SERVICE
)
498 return dccp_setsockopt_service(sk
, val
, optval
, optlen
);
502 case DCCP_SOCKOPT_PACKET_SIZE
:
503 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
506 case DCCP_SOCKOPT_CHANGE_L
:
507 if (optlen
!= sizeof(struct dccp_so_feat
))
510 err
= dccp_setsockopt_change(sk
, DCCPO_CHANGE_L
,
511 (struct dccp_so_feat __user
*)
514 case DCCP_SOCKOPT_CHANGE_R
:
515 if (optlen
!= sizeof(struct dccp_so_feat
))
518 err
= dccp_setsockopt_change(sk
, DCCPO_CHANGE_R
,
519 (struct dccp_so_feat __user
*)
522 case DCCP_SOCKOPT_SEND_CSCOV
: /* sender side, RFC 4340, sec. 9.2 */
523 if (val
< 0 || val
> 15)
526 dp
->dccps_pcslen
= val
;
528 case DCCP_SOCKOPT_RECV_CSCOV
: /* receiver side, RFC 4340 sec. 9.2.1 */
529 if (val
< 0 || val
> 15)
532 dp
->dccps_pcrlen
= val
;
533 /* FIXME: add feature negotiation,
534 * ChangeL(MinimumChecksumCoverage, val) */
546 int dccp_setsockopt(struct sock
*sk
, int level
, int optname
,
547 char __user
*optval
, int optlen
)
549 if (level
!= SOL_DCCP
)
550 return inet_csk(sk
)->icsk_af_ops
->setsockopt(sk
, level
,
553 return do_dccp_setsockopt(sk
, level
, optname
, optval
, optlen
);
556 EXPORT_SYMBOL_GPL(dccp_setsockopt
);
559 int compat_dccp_setsockopt(struct sock
*sk
, int level
, int optname
,
560 char __user
*optval
, int optlen
)
562 if (level
!= SOL_DCCP
)
563 return inet_csk_compat_setsockopt(sk
, level
, optname
,
565 return do_dccp_setsockopt(sk
, level
, optname
, optval
, optlen
);
568 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt
);
571 static int dccp_getsockopt_service(struct sock
*sk
, int len
,
572 __be32 __user
*optval
,
575 const struct dccp_sock
*dp
= dccp_sk(sk
);
576 const struct dccp_service_list
*sl
;
577 int err
= -ENOENT
, slen
= 0, total_len
= sizeof(u32
);
580 if ((sl
= dp
->dccps_service_list
) != NULL
) {
581 slen
= sl
->dccpsl_nr
* sizeof(u32
);
590 if (put_user(total_len
, optlen
) ||
591 put_user(dp
->dccps_service
, optval
) ||
592 (sl
!= NULL
&& copy_to_user(optval
+ 1, sl
->dccpsl_list
, slen
)))
599 static int do_dccp_getsockopt(struct sock
*sk
, int level
, int optname
,
600 char __user
*optval
, int __user
*optlen
)
602 struct dccp_sock
*dp
;
605 if (get_user(len
, optlen
))
608 if (len
< (int)sizeof(int))
614 case DCCP_SOCKOPT_PACKET_SIZE
:
615 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
617 case DCCP_SOCKOPT_SERVICE
:
618 return dccp_getsockopt_service(sk
, len
,
619 (__be32 __user
*)optval
, optlen
);
620 case DCCP_SOCKOPT_GET_CUR_MPS
:
621 val
= dp
->dccps_mss_cache
;
624 case DCCP_SOCKOPT_SEND_CSCOV
:
625 val
= dp
->dccps_pcslen
;
628 case DCCP_SOCKOPT_RECV_CSCOV
:
629 val
= dp
->dccps_pcrlen
;
633 return ccid_hc_rx_getsockopt(dp
->dccps_hc_rx_ccid
, sk
, optname
,
634 len
, (u32 __user
*)optval
, optlen
);
636 return ccid_hc_tx_getsockopt(dp
->dccps_hc_tx_ccid
, sk
, optname
,
637 len
, (u32 __user
*)optval
, optlen
);
642 if (put_user(len
, optlen
) || copy_to_user(optval
, &val
, len
))
648 int dccp_getsockopt(struct sock
*sk
, int level
, int optname
,
649 char __user
*optval
, int __user
*optlen
)
651 if (level
!= SOL_DCCP
)
652 return inet_csk(sk
)->icsk_af_ops
->getsockopt(sk
, level
,
655 return do_dccp_getsockopt(sk
, level
, optname
, optval
, optlen
);
658 EXPORT_SYMBOL_GPL(dccp_getsockopt
);
661 int compat_dccp_getsockopt(struct sock
*sk
, int level
, int optname
,
662 char __user
*optval
, int __user
*optlen
)
664 if (level
!= SOL_DCCP
)
665 return inet_csk_compat_getsockopt(sk
, level
, optname
,
667 return do_dccp_getsockopt(sk
, level
, optname
, optval
, optlen
);
670 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt
);
673 int dccp_sendmsg(struct kiocb
*iocb
, struct sock
*sk
, struct msghdr
*msg
,
676 const struct dccp_sock
*dp
= dccp_sk(sk
);
677 const int flags
= msg
->msg_flags
;
678 const int noblock
= flags
& MSG_DONTWAIT
;
683 if (len
> dp
->dccps_mss_cache
)
688 if (sysctl_dccp_tx_qlen
&&
689 (sk
->sk_write_queue
.qlen
>= sysctl_dccp_tx_qlen
)) {
694 timeo
= sock_sndtimeo(sk
, noblock
);
697 * We have to use sk_stream_wait_connect here to set sk_write_pending,
698 * so that the trick in dccp_rcv_request_sent_state_process.
700 /* Wait for a connection to finish. */
701 if ((1 << sk
->sk_state
) & ~(DCCPF_OPEN
| DCCPF_PARTOPEN
))
702 if ((rc
= sk_stream_wait_connect(sk
, &timeo
)) != 0)
705 size
= sk
->sk_prot
->max_header
+ len
;
707 skb
= sock_alloc_send_skb(sk
, size
, noblock
, &rc
);
712 skb_reserve(skb
, sk
->sk_prot
->max_header
);
713 rc
= memcpy_fromiovec(skb_put(skb
, len
), msg
->msg_iov
, len
);
717 skb_queue_tail(&sk
->sk_write_queue
, skb
);
718 dccp_write_xmit(sk
,0);
727 EXPORT_SYMBOL_GPL(dccp_sendmsg
);
729 int dccp_recvmsg(struct kiocb
*iocb
, struct sock
*sk
, struct msghdr
*msg
,
730 size_t len
, int nonblock
, int flags
, int *addr_len
)
732 const struct dccp_hdr
*dh
;
737 if (sk
->sk_state
== DCCP_LISTEN
) {
742 timeo
= sock_rcvtimeo(sk
, nonblock
);
745 struct sk_buff
*skb
= skb_peek(&sk
->sk_receive_queue
);
748 goto verify_sock_status
;
752 if (dh
->dccph_type
== DCCP_PKT_DATA
||
753 dh
->dccph_type
== DCCP_PKT_DATAACK
)
756 if (dh
->dccph_type
== DCCP_PKT_RESET
||
757 dh
->dccph_type
== DCCP_PKT_CLOSE
) {
758 dccp_pr_debug("found fin ok!\n");
762 dccp_pr_debug("packet_type=%s\n",
763 dccp_packet_name(dh
->dccph_type
));
764 sk_eat_skb(sk
, skb
, 0);
766 if (sock_flag(sk
, SOCK_DONE
)) {
772 len
= sock_error(sk
);
776 if (sk
->sk_shutdown
& RCV_SHUTDOWN
) {
781 if (sk
->sk_state
== DCCP_CLOSED
) {
782 if (!sock_flag(sk
, SOCK_DONE
)) {
783 /* This occurs when user tries to read
784 * from never connected socket.
798 if (signal_pending(current
)) {
799 len
= sock_intr_errno(timeo
);
803 sk_wait_data(sk
, &timeo
);
808 else if (len
< skb
->len
)
809 msg
->msg_flags
|= MSG_TRUNC
;
811 if (skb_copy_datagram_iovec(skb
, 0, msg
->msg_iov
, len
)) {
812 /* Exception. Bailout! */
817 if (!(flags
& MSG_PEEK
))
818 sk_eat_skb(sk
, skb
, 0);
826 EXPORT_SYMBOL_GPL(dccp_recvmsg
);
828 int inet_dccp_listen(struct socket
*sock
, int backlog
)
830 struct sock
*sk
= sock
->sk
;
831 unsigned char old_state
;
837 if (sock
->state
!= SS_UNCONNECTED
|| sock
->type
!= SOCK_DCCP
)
840 old_state
= sk
->sk_state
;
841 if (!((1 << old_state
) & (DCCPF_CLOSED
| DCCPF_LISTEN
)))
844 /* Really, if the socket is already in listen state
845 * we can only allow the backlog to be adjusted.
847 if (old_state
!= DCCP_LISTEN
) {
849 * FIXME: here it probably should be sk->sk_prot->listen_start
850 * see tcp_listen_start
852 err
= dccp_listen_start(sk
, backlog
);
856 sk
->sk_max_ack_backlog
= backlog
;
864 EXPORT_SYMBOL_GPL(inet_dccp_listen
);
866 static const unsigned char dccp_new_state
[] = {
867 /* current state: new state: action: */
869 [DCCP_OPEN
] = DCCP_CLOSING
| DCCP_ACTION_FIN
,
870 [DCCP_REQUESTING
] = DCCP_CLOSED
,
871 [DCCP_PARTOPEN
] = DCCP_CLOSING
| DCCP_ACTION_FIN
,
872 [DCCP_LISTEN
] = DCCP_CLOSED
,
873 [DCCP_RESPOND
] = DCCP_CLOSED
,
874 [DCCP_CLOSING
] = DCCP_CLOSED
,
875 [DCCP_TIME_WAIT
] = DCCP_CLOSED
,
876 [DCCP_CLOSED
] = DCCP_CLOSED
,
879 static int dccp_close_state(struct sock
*sk
)
881 const int next
= dccp_new_state
[sk
->sk_state
];
882 const int ns
= next
& DCCP_STATE_MASK
;
884 if (ns
!= sk
->sk_state
)
885 dccp_set_state(sk
, ns
);
887 return next
& DCCP_ACTION_FIN
;
890 void dccp_close(struct sock
*sk
, long timeout
)
892 struct dccp_sock
*dp
= dccp_sk(sk
);
898 sk
->sk_shutdown
= SHUTDOWN_MASK
;
900 if (sk
->sk_state
== DCCP_LISTEN
) {
901 dccp_set_state(sk
, DCCP_CLOSED
);
904 inet_csk_listen_stop(sk
);
906 goto adjudge_to_death
;
909 sk_stop_timer(sk
, &dp
->dccps_xmit_timer
);
912 * We need to flush the recv. buffs. We do this only on the
913 * descriptor close, not protocol-sourced closes, because the
914 *reader process may not have drained the data yet!
916 /* FIXME: check for unread data */
917 while ((skb
= __skb_dequeue(&sk
->sk_receive_queue
)) != NULL
) {
921 if (sock_flag(sk
, SOCK_LINGER
) && !sk
->sk_lingertime
) {
922 /* Check zero linger _after_ checking for unread data. */
923 sk
->sk_prot
->disconnect(sk
, 0);
924 } else if (dccp_close_state(sk
)) {
925 dccp_send_close(sk
, 1);
928 sk_stream_wait_close(sk
, timeout
);
931 state
= sk
->sk_state
;
934 atomic_inc(sk
->sk_prot
->orphan_count
);
937 * It is the last release_sock in its life. It will remove backlog.
941 * Now socket is owned by kernel and we acquire BH lock
942 * to finish close. No need to check for user refs.
946 BUG_TRAP(!sock_owned_by_user(sk
));
948 /* Have we already been destroyed by a softirq or backlog? */
949 if (state
!= DCCP_CLOSED
&& sk
->sk_state
== DCCP_CLOSED
)
953 * The last release_sock may have processed the CLOSE or RESET
954 * packet moving sock to CLOSED state, if not we have to fire
955 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
956 * in draft-ietf-dccp-spec-11. -acme
958 if (sk
->sk_state
== DCCP_CLOSING
) {
959 /* FIXME: should start at 2 * RTT */
960 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
961 inet_csk_reset_xmit_timer(sk
, ICSK_TIME_RETRANS
,
962 inet_csk(sk
)->icsk_rto
,
965 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
966 dccp_set_state(sk
, DCCP_CLOSED
);
970 if (sk
->sk_state
== DCCP_CLOSED
)
971 inet_csk_destroy_sock(sk
);
973 /* Otherwise, socket is reprieved until protocol close. */
981 EXPORT_SYMBOL_GPL(dccp_close
);
983 void dccp_shutdown(struct sock
*sk
, int how
)
985 dccp_pr_debug("called shutdown(%x)\n", how
);
988 EXPORT_SYMBOL_GPL(dccp_shutdown
);
990 static int __init
dccp_mib_init(void)
994 dccp_statistics
[0] = alloc_percpu(struct dccp_mib
);
995 if (dccp_statistics
[0] == NULL
)
998 dccp_statistics
[1] = alloc_percpu(struct dccp_mib
);
999 if (dccp_statistics
[1] == NULL
)
1006 free_percpu(dccp_statistics
[0]);
1007 dccp_statistics
[0] = NULL
;
1012 static void dccp_mib_exit(void)
1014 free_percpu(dccp_statistics
[0]);
1015 free_percpu(dccp_statistics
[1]);
1016 dccp_statistics
[0] = dccp_statistics
[1] = NULL
;
1019 static int thash_entries
;
1020 module_param(thash_entries
, int, 0444);
1021 MODULE_PARM_DESC(thash_entries
, "Number of ehash buckets");
1023 #ifdef CONFIG_IP_DCCP_DEBUG
1025 module_param(dccp_debug
, bool, 0444);
1026 MODULE_PARM_DESC(dccp_debug
, "Enable debug messages");
1028 EXPORT_SYMBOL_GPL(dccp_debug
);
1031 static int __init
dccp_init(void)
1034 int ehash_order
, bhash_order
, i
;
1037 dccp_hashinfo
.bind_bucket_cachep
=
1038 kmem_cache_create("dccp_bind_bucket",
1039 sizeof(struct inet_bind_bucket
), 0,
1040 SLAB_HWCACHE_ALIGN
, NULL
);
1041 if (!dccp_hashinfo
.bind_bucket_cachep
)
1045 * Size and allocate the main established and bind bucket
1048 * The methodology is similar to that of the buffer cache.
1050 if (num_physpages
>= (128 * 1024))
1051 goal
= num_physpages
>> (21 - PAGE_SHIFT
);
1053 goal
= num_physpages
>> (23 - PAGE_SHIFT
);
1056 goal
= (thash_entries
*
1057 sizeof(struct inet_ehash_bucket
)) >> PAGE_SHIFT
;
1058 for (ehash_order
= 0; (1UL << ehash_order
) < goal
; ehash_order
++)
1061 dccp_hashinfo
.ehash_size
= (1UL << ehash_order
) * PAGE_SIZE
/
1062 sizeof(struct inet_ehash_bucket
);
1063 while (dccp_hashinfo
.ehash_size
&
1064 (dccp_hashinfo
.ehash_size
- 1))
1065 dccp_hashinfo
.ehash_size
--;
1066 dccp_hashinfo
.ehash
= (struct inet_ehash_bucket
*)
1067 __get_free_pages(GFP_ATOMIC
, ehash_order
);
1068 } while (!dccp_hashinfo
.ehash
&& --ehash_order
> 0);
1070 if (!dccp_hashinfo
.ehash
) {
1071 DCCP_CRIT("Failed to allocate DCCP established hash table");
1072 goto out_free_bind_bucket_cachep
;
1075 for (i
= 0; i
< dccp_hashinfo
.ehash_size
; i
++) {
1076 INIT_HLIST_HEAD(&dccp_hashinfo
.ehash
[i
].chain
);
1077 INIT_HLIST_HEAD(&dccp_hashinfo
.ehash
[i
].twchain
);
1080 if (inet_ehash_locks_alloc(&dccp_hashinfo
))
1081 goto out_free_dccp_ehash
;
1083 bhash_order
= ehash_order
;
1086 dccp_hashinfo
.bhash_size
= (1UL << bhash_order
) * PAGE_SIZE
/
1087 sizeof(struct inet_bind_hashbucket
);
1088 if ((dccp_hashinfo
.bhash_size
> (64 * 1024)) &&
1091 dccp_hashinfo
.bhash
= (struct inet_bind_hashbucket
*)
1092 __get_free_pages(GFP_ATOMIC
, bhash_order
);
1093 } while (!dccp_hashinfo
.bhash
&& --bhash_order
>= 0);
1095 if (!dccp_hashinfo
.bhash
) {
1096 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1097 goto out_free_dccp_locks
;
1100 for (i
= 0; i
< dccp_hashinfo
.bhash_size
; i
++) {
1101 spin_lock_init(&dccp_hashinfo
.bhash
[i
].lock
);
1102 INIT_HLIST_HEAD(&dccp_hashinfo
.bhash
[i
].chain
);
1105 rc
= dccp_mib_init();
1107 goto out_free_dccp_bhash
;
1109 rc
= dccp_ackvec_init();
1111 goto out_free_dccp_mib
;
1113 rc
= dccp_sysctl_init();
1115 goto out_ackvec_exit
;
1117 dccp_timestamping_init();
1124 out_free_dccp_bhash
:
1125 free_pages((unsigned long)dccp_hashinfo
.bhash
, bhash_order
);
1126 dccp_hashinfo
.bhash
= NULL
;
1127 out_free_dccp_locks
:
1128 inet_ehash_locks_free(&dccp_hashinfo
);
1129 out_free_dccp_ehash
:
1130 free_pages((unsigned long)dccp_hashinfo
.ehash
, ehash_order
);
1131 dccp_hashinfo
.ehash
= NULL
;
1132 out_free_bind_bucket_cachep
:
1133 kmem_cache_destroy(dccp_hashinfo
.bind_bucket_cachep
);
1134 dccp_hashinfo
.bind_bucket_cachep
= NULL
;
1138 static void __exit
dccp_fini(void)
1141 free_pages((unsigned long)dccp_hashinfo
.bhash
,
1142 get_order(dccp_hashinfo
.bhash_size
*
1143 sizeof(struct inet_bind_hashbucket
)));
1144 free_pages((unsigned long)dccp_hashinfo
.ehash
,
1145 get_order(dccp_hashinfo
.ehash_size
*
1146 sizeof(struct inet_ehash_bucket
)));
1147 inet_ehash_locks_free(&dccp_hashinfo
);
1148 kmem_cache_destroy(dccp_hashinfo
.bind_bucket_cachep
);
1153 module_init(dccp_init
);
1154 module_exit(dccp_fini
);
1156 MODULE_LICENSE("GPL");
1157 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1158 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");