]>
git.proxmox.com Git - mirror_iproute2.git/blob - Modules/tcp_diag.c
2 * tcp_diag.c Module for monitoring TCP sockets.
6 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 #include <linux/config.h>
15 #include <linux/module.h>
16 #include <linux/types.h>
17 #include <linux/fcntl.h>
18 #include <linux/random.h>
19 #include <linux/cache.h>
20 #include <linux/init.h>
25 #include <net/inet_common.h>
27 #include <linux/inet.h>
28 #include <linux/stddef.h>
32 static struct sock
*tcpnl
;
35 #define TCPDIAG_PUT(skb, attrtype, attrlen) \
36 ({ int rtalen = RTA_LENGTH(attrlen); \
38 if (skb_tailroom(skb) < RTA_ALIGN(rtalen)) goto nlmsg_failure; \
39 rta = (void*)__skb_put(skb, RTA_ALIGN(rtalen)); \
40 rta->rta_type = attrtype; \
41 rta->rta_len = rtalen; \
44 static int tcpdiag_fill(struct sk_buff
*skb
, struct sock
*sk
,
45 int ext
, u32 pid
, u32 seq
)
47 struct tcp_opt
*tp
= &sk
->tp_pinfo
.af_tcp
;
50 struct tcp_info
*info
= NULL
;
51 struct tcpdiag_meminfo
*minfo
= NULL
;
52 unsigned char *b
= skb
->tail
;
54 nlh
= NLMSG_PUT(skb
, pid
, seq
, TCPDIAG_GETSOCK
, sizeof(*r
));
56 if (sk
->state
!= TCP_TIME_WAIT
) {
57 if (ext
& (1<<(TCPDIAG_MEMINFO
-1)))
58 minfo
= TCPDIAG_PUT(skb
, TCPDIAG_MEMINFO
, sizeof(*minfo
));
59 if (ext
& (1<<(TCPDIAG_INFO
-1)))
60 info
= TCPDIAG_PUT(skb
, TCPDIAG_INFO
, sizeof(*info
));
62 r
->tcpdiag_family
= sk
->family
;
63 r
->tcpdiag_state
= sk
->state
;
65 r
->tcpdiag_retrans
= 0;
67 r
->id
.tcpdiag_sport
= sk
->sport
;
68 r
->id
.tcpdiag_dport
= sk
->dport
;
69 r
->id
.tcpdiag_src
[0] = sk
->rcv_saddr
;
70 r
->id
.tcpdiag_dst
[0] = sk
->daddr
;
71 r
->id
.tcpdiag_if
= sk
->bound_dev_if
;
72 *((struct sock
**)&r
->id
.tcpdiag_cookie
) = sk
;
74 if (r
->tcpdiag_state
== TCP_TIME_WAIT
) {
75 struct tcp_tw_bucket
*tw
= (struct tcp_tw_bucket
*)sk
;
76 long tmo
= tw
->ttd
- jiffies
;
80 r
->tcpdiag_state
= tw
->substate
;
82 r
->tcpdiag_expires
= (tmo
*1000+HZ
-1)/HZ
;
83 r
->tcpdiag_rqueue
= 0;
84 r
->tcpdiag_wqueue
= 0;
88 if (r
->tcpdiag_family
== AF_INET6
) {
89 memcpy(r
->id
.tcpdiag_src
, &tw
->v6_rcv_saddr
, 16);
90 memcpy(r
->id
.tcpdiag_dst
, &tw
->v6_daddr
, 16);
93 nlh
->nlmsg_len
= skb
->tail
- b
;
98 if (r
->tcpdiag_family
== AF_INET6
) {
99 memcpy(r
->id
.tcpdiag_src
, &sk
->net_pinfo
.af_inet6
.rcv_saddr
, 16);
100 memcpy(r
->id
.tcpdiag_dst
, &sk
->net_pinfo
.af_inet6
.daddr
, 16);
104 #define EXPIRES_IN_MS(tmo) ((tmo-jiffies)*1000+HZ-1)/HZ
106 if (tp
->pending
== TCP_TIME_RETRANS
) {
107 r
->tcpdiag_timer
= 1;
108 r
->tcpdiag_retrans
= tp
->retransmits
;
109 r
->tcpdiag_expires
= EXPIRES_IN_MS(tp
->timeout
);
110 } else if (tp
->pending
== TCP_TIME_PROBE0
) {
111 r
->tcpdiag_timer
= 4;
112 r
->tcpdiag_retrans
= tp
->probes_out
;
113 r
->tcpdiag_expires
= EXPIRES_IN_MS(tp
->timeout
);
114 } else if (timer_pending(&sk
->timer
)) {
115 r
->tcpdiag_timer
= 2;
116 r
->tcpdiag_retrans
= tp
->probes_out
;
117 r
->tcpdiag_expires
= EXPIRES_IN_MS(sk
->timer
.expires
);
119 r
->tcpdiag_timer
= 0;
120 r
->tcpdiag_expires
= 0;
124 r
->tcpdiag_rqueue
= tp
->rcv_nxt
- tp
->copied_seq
;
125 r
->tcpdiag_wqueue
= tp
->write_seq
- tp
->snd_una
;
126 r
->tcpdiag_uid
= sock_i_uid(sk
);
127 r
->tcpdiag_inode
= sock_i_ino(sk
);
130 minfo
->tcpdiag_rmem
= atomic_read(&sk
->rmem_alloc
);
131 minfo
->tcpdiag_wmem
= sk
->wmem_queued
;
132 minfo
->tcpdiag_fmem
= sk
->forward_alloc
;
133 minfo
->tcpdiag_tmem
= atomic_read(&sk
->wmem_alloc
);
137 u32 now
= tcp_time_stamp
;
139 info
->tcpi_state
= sk
->state
;
140 info
->tcpi_ca_state
= tp
->ca_state
;
141 info
->tcpi_retransmits
= tp
->retransmits
;
142 info
->tcpi_probes
= tp
->probes_out
;
143 info
->tcpi_backoff
= tp
->backoff
;
144 info
->tcpi_options
= 0;
146 info
->tcpi_options
|= TCPI_OPT_TIMESTAMPS
;
148 info
->tcpi_options
|= TCPI_OPT_SACK
;
150 info
->tcpi_options
|= TCPI_OPT_WSCALE
;
151 info
->tcpi_snd_wscale
= tp
->snd_wscale
;
152 info
->tcpi_rcv_wscale
= tp
->rcv_wscale
;
154 info
->tcpi_snd_wscale
= 0;
155 info
->tcpi_rcv_wscale
= 0;
157 #ifdef CONFIG_INET_ECN
158 if (tp
->ecn_flags
&TCP_ECN_OK
)
159 info
->tcpi_options
|= TCPI_OPT_ECN
;
162 info
->tcpi_rto
= (1000000*tp
->rto
)/HZ
;
163 info
->tcpi_ato
= (1000000*tp
->ack
.ato
)/HZ
;
164 info
->tcpi_snd_mss
= tp
->mss_cache
;
165 info
->tcpi_rcv_mss
= tp
->ack
.rcv_mss
;
167 info
->tcpi_unacked
= tp
->packets_out
;
168 info
->tcpi_sacked
= tp
->sacked_out
;
169 info
->tcpi_lost
= tp
->lost_out
;
170 info
->tcpi_retrans
= tp
->retrans_out
;
171 info
->tcpi_fackets
= tp
->fackets_out
;
173 info
->tcpi_last_data_sent
= ((now
- tp
->lsndtime
)*1000)/HZ
;
174 info
->tcpi_last_ack_sent
= 0;
175 info
->tcpi_last_data_recv
= ((now
- tp
->ack
.lrcvtime
)*1000)/HZ
;
176 info
->tcpi_last_ack_recv
= ((now
- tp
->rcv_tstamp
)*1000)/HZ
;
178 info
->tcpi_pmtu
= tp
->pmtu_cookie
;
179 info
->tcpi_rcv_ssthresh
= tp
->rcv_ssthresh
;
180 info
->tcpi_rtt
= ((1000000*tp
->srtt
)/HZ
)>>3;
181 info
->tcpi_rttvar
= ((1000000*tp
->mdev
)/HZ
)>>2;
182 info
->tcpi_snd_ssthresh
= tp
->snd_ssthresh
;
183 info
->tcpi_snd_cwnd
= tp
->snd_cwnd
;
184 info
->tcpi_advmss
= tp
->advmss
;
185 info
->tcpi_reordering
= tp
->reordering
;
188 nlh
->nlmsg_len
= skb
->tail
- b
;
192 skb_trim(skb
, b
- skb
->data
);
196 extern struct sock
*tcp_v4_lookup(u32 saddr
, u16 sport
, u32 daddr
, u16 dport
, int dif
);
198 extern struct sock
*tcp_v6_lookup(struct in6_addr
*saddr
, u16 sport
,
199 struct in6_addr
*daddr
, u16 dport
,
203 static int tcpdiag_get_exact(struct sk_buff
*in_skb
, struct nlmsghdr
*nlh
)
207 struct tcpdiagreq
*req
= NLMSG_DATA(nlh
);
210 if (req
->tcpdiag_family
== AF_INET
) {
211 sk
= tcp_v4_lookup(req
->id
.tcpdiag_dst
[0], req
->id
.tcpdiag_dport
,
212 req
->id
.tcpdiag_src
[0], req
->id
.tcpdiag_sport
,
216 else if (req
->tcpdiag_family
== AF_INET6
) {
217 sk
= tcp_v6_lookup((struct in6_addr
*)req
->id
.tcpdiag_dst
, req
->id
.tcpdiag_dport
,
218 (struct in6_addr
*)req
->id
.tcpdiag_src
, req
->id
.tcpdiag_sport
,
230 if ((req
->id
.tcpdiag_cookie
[0] != TCPDIAG_NOCOOKIE
||
231 req
->id
.tcpdiag_cookie
[1] != TCPDIAG_NOCOOKIE
) &&
232 sk
!= *((struct sock
**)&req
->id
.tcpdiag_cookie
[0]))
236 rep
= alloc_skb(NLMSG_SPACE(sizeof(struct tcpdiagmsg
)+
237 sizeof(struct tcpdiag_meminfo
)+
238 sizeof(struct tcp_info
)+64), GFP_KERNEL
);
242 if (tcpdiag_fill(rep
, sk
, req
->tcpdiag_ext
,
243 NETLINK_CB(in_skb
).pid
,
244 nlh
->nlmsg_seq
) <= 0)
247 err
= netlink_unicast(tcpnl
, rep
, NETLINK_CB(in_skb
).pid
, MSG_DONTWAIT
);
253 if (sk
->state
== TCP_TIME_WAIT
)
254 tcp_tw_put((struct tcp_tw_bucket
*)sk
);
261 int bitstring_match(u32
*a1
, u32
*a2
, int bits
)
263 int words
= bits
>> 5;
268 if (memcmp(a1
, a2
, words
<< 2))
278 mask
= htonl((0xffffffff) << (32 - bits
));
280 if ((w1
^ w2
) & mask
)
288 int tcpdiag_bc_run(char *bc
, int len
, struct sock
*sk
)
292 struct tcpdiag_bc_op
*op
= (struct tcpdiag_bc_op
*)bc
;
300 case TCPDIAG_BC_S_GE
:
301 yes
= (sk
->num
>= op
[1].no
);
303 case TCPDIAG_BC_S_LE
:
304 yes
= (sk
->num
<= op
[1].no
);
306 case TCPDIAG_BC_D_GE
:
307 yes
= (ntohs(sk
->dport
) >= op
[1].no
);
309 case TCPDIAG_BC_D_LE
:
310 yes
= (ntohs(sk
->dport
) <= op
[1].no
);
312 case TCPDIAG_BC_AUTO
:
313 yes
= !(sk
->userlocks
&SOCK_BINDPORT_LOCK
);
315 case TCPDIAG_BC_S_COND
:
316 case TCPDIAG_BC_D_COND
:
318 struct tcpdiag_hostcond
*cond
= (struct tcpdiag_hostcond
*)(op
+1);
321 if (cond
->port
!= -1 &&
322 cond
->port
!= (op
->code
== TCPDIAG_BC_S_COND
? sk
->num
: ntohs(sk
->dport
))) {
327 if (cond
->prefix_len
== 0)
330 if (sk
->family
== AF_INET6
) {
331 if (op
->code
== TCPDIAG_BC_S_COND
)
332 addr
= (u32
*)&sk
->net_pinfo
.af_inet6
.rcv_saddr
;
334 addr
= (u32
*)&sk
->net_pinfo
.af_inet6
.daddr
;
336 if (op
->code
== TCPDIAG_BC_S_COND
)
337 addr
= &sk
->rcv_saddr
;
342 if (bitstring_match(addr
, cond
->addr
, cond
->prefix_len
))
344 if (sk
->family
== AF_INET6
&& cond
->family
== AF_INET
) {
345 if (addr
[0] == 0 && addr
[1] == 0 &&
346 addr
[2] == __constant_htonl(0xffff) &&
347 bitstring_match(addr
+3, cond
->addr
, cond
->prefix_len
))
366 int valid_cc(char *bc
, int len
, int cc
)
369 struct tcpdiag_bc_op
*op
= (struct tcpdiag_bc_op
*)bc
;
383 int tcpdiag_bc_audit(char *bytecode
, int bytecode_len
)
386 int len
= bytecode_len
;
389 struct tcpdiag_bc_op
*op
= (struct tcpdiag_bc_op
*)bc
;
391 //printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len);
393 case TCPDIAG_BC_AUTO
:
394 case TCPDIAG_BC_S_COND
:
395 case TCPDIAG_BC_D_COND
:
396 case TCPDIAG_BC_S_GE
:
397 case TCPDIAG_BC_S_LE
:
398 case TCPDIAG_BC_D_GE
:
399 case TCPDIAG_BC_D_LE
:
400 if (op
->yes
< 4 || op
->yes
> len
+4)
403 if (op
->no
< 4 || op
->no
> len
+4)
406 !valid_cc(bytecode
, bytecode_len
, len
-op
->no
))
410 if (op
->yes
< 4 || op
->yes
> len
+4)
419 return len
== 0 ? 0 : -EINVAL
;
423 int tcpdiag_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
427 struct tcpdiagreq
*r
= NLMSG_DATA(cb
->nlh
);
428 struct rtattr
*bc
= NULL
;
430 if (cb
->nlh
->nlmsg_len
> 4+NLMSG_SPACE(sizeof(struct tcpdiagreq
)))
431 bc
= (struct rtattr
*)(r
+1);
434 s_num
= num
= cb
->args
[2];
436 if (cb
->args
[0] == 0) {
437 if (!(r
->tcpdiag_states
&(TCPF_LISTEN
|TCPF_SYN_RECV
)))
440 for (i
= s_i
; i
< TCP_LHTABLE_SIZE
; i
++) {
441 struct sock
*sk
= tcp_listening_hash
[i
];
446 for (sk
= tcp_listening_hash
[i
], num
= 0;
448 sk
= sk
->next
, num
++) {
451 if (!(r
->tcpdiag_states
&TCPF_LISTEN
) ||
454 if (r
->id
.tcpdiag_sport
!= sk
->sport
&& r
->id
.tcpdiag_sport
)
456 if (bc
&& !tcpdiag_bc_run(RTA_DATA(bc
), RTA_PAYLOAD(bc
), sk
))
458 if (tcpdiag_fill(skb
, sk
, r
->tcpdiag_ext
,
459 NETLINK_CB(cb
->skb
).pid
,
460 cb
->nlh
->nlmsg_seq
) <= 0) {
469 s_i
= num
= s_num
= 0;
472 if (!(r
->tcpdiag_states
&~(TCPF_LISTEN
|TCPF_SYN_RECV
)))
475 for (i
= s_i
; i
< tcp_ehash_size
; i
++) {
476 struct tcp_ehash_bucket
*head
= &tcp_ehash
[i
];
482 read_lock_bh(&head
->lock
);
484 for (sk
= head
->chain
, num
= 0;
486 sk
= sk
->next
, num
++) {
489 if (!(r
->tcpdiag_states
&(1<<sk
->state
)))
491 if (r
->id
.tcpdiag_sport
!= sk
->sport
&& r
->id
.tcpdiag_sport
)
493 if (r
->id
.tcpdiag_dport
!= sk
->dport
&& r
->id
.tcpdiag_dport
)
495 if (bc
&& !tcpdiag_bc_run(RTA_DATA(bc
), RTA_PAYLOAD(bc
), sk
))
497 if (tcpdiag_fill(skb
, sk
, r
->tcpdiag_ext
,
498 NETLINK_CB(cb
->skb
).pid
,
499 cb
->nlh
->nlmsg_seq
) <= 0) {
500 read_unlock_bh(&head
->lock
);
505 if (r
->tcpdiag_states
&TCPF_TIME_WAIT
) {
506 for (sk
= tcp_ehash
[i
+tcp_ehash_size
].chain
;
508 sk
= sk
->next
, num
++) {
511 if (!(r
->tcpdiag_states
&(1<<sk
->zapped
)))
513 if (r
->id
.tcpdiag_sport
!= sk
->sport
&& r
->id
.tcpdiag_sport
)
515 if (r
->id
.tcpdiag_dport
!= sk
->dport
&& r
->id
.tcpdiag_dport
)
517 if (bc
&& !tcpdiag_bc_run(RTA_DATA(bc
), RTA_PAYLOAD(bc
), sk
))
519 if (tcpdiag_fill(skb
, sk
, r
->tcpdiag_ext
,
520 NETLINK_CB(cb
->skb
).pid
,
521 cb
->nlh
->nlmsg_seq
) <= 0) {
522 read_unlock_bh(&head
->lock
);
527 read_unlock_bh(&head
->lock
);
536 static int tcpdiag_dump_done(struct netlink_callback
*cb
)
542 static __inline__
int
543 tcpdiag_rcv_msg(struct sk_buff
*skb
, struct nlmsghdr
*nlh
)
545 if (!(nlh
->nlmsg_flags
&NLM_F_REQUEST
))
548 if (nlh
->nlmsg_type
!= TCPDIAG_GETSOCK
)
551 if (NLMSG_LENGTH(sizeof(struct tcpdiagreq
)) > skb
->len
)
554 if (nlh
->nlmsg_flags
&NLM_F_DUMP
) {
555 if (nlh
->nlmsg_len
> 4 + NLMSG_SPACE(sizeof(struct tcpdiagreq
))) {
556 struct rtattr
*rta
= (struct rtattr
*)(NLMSG_DATA(nlh
) + sizeof(struct tcpdiagreq
));
557 if (rta
->rta_type
!= TCPDIAG_REQ_BYTECODE
||
559 rta
->rta_len
> nlh
->nlmsg_len
- NLMSG_SPACE(sizeof(struct tcpdiagreq
)))
561 if (tcpdiag_bc_audit(RTA_DATA(rta
), RTA_PAYLOAD(rta
)))
564 return netlink_dump_start(tcpnl
, skb
, nlh
,
568 return tcpdiag_get_exact(skb
, nlh
);
576 extern __inline__
void tcpdiag_rcv_skb(struct sk_buff
*skb
)
579 struct nlmsghdr
* nlh
;
581 if (skb
->len
>= NLMSG_SPACE(0)) {
582 nlh
= (struct nlmsghdr
*)skb
->data
;
583 if (nlh
->nlmsg_len
< sizeof(*nlh
) || skb
->len
< nlh
->nlmsg_len
)
585 err
= tcpdiag_rcv_msg(skb
, nlh
);
587 netlink_ack(skb
, nlh
, err
);
591 static void tcpdiag_rcv(struct sock
*sk
, int len
)
595 while ((skb
= skb_dequeue(&sk
->receive_queue
)) != NULL
) {
596 tcpdiag_rcv_skb(skb
);
601 static int __init
tcpdiag_init(void)
603 tcpnl
= netlink_kernel_create(NETLINK_TCPDIAG
, tcpdiag_rcv
);
609 static void __exit
tcpdiag_exit(void)
611 printk(KERN_INFO
"Caution: unloading tcp_diag is not very well supported. Nothing to worry, but yet.\n");
613 sock_release(tcpnl
->socket
);
616 module_init(tcpdiag_init
);
617 module_exit(tcpdiag_exit
);
621 * compile-command: "gcc -DMOPS -DMODULE -D__KERNEL__ -I../include -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -c tcp_diag.c"