1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * inet_diag.c Module for monitoring INET transport protocols sockets.
5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
8 #include <linux/kernel.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/fcntl.h>
12 #include <linux/random.h>
13 #include <linux/slab.h>
14 #include <linux/cache.h>
15 #include <linux/init.h>
16 #include <linux/time.h>
21 #include <net/inet_common.h>
22 #include <net/inet_connection_sock.h>
23 #include <net/inet_hashtables.h>
24 #include <net/inet_timewait_sock.h>
25 #include <net/inet6_hashtables.h>
26 #include <net/bpf_sk_storage.h>
27 #include <net/netlink.h>
29 #include <linux/inet.h>
30 #include <linux/stddef.h>
32 #include <linux/inet_diag.h>
33 #include <linux/sock_diag.h>
35 static const struct inet_diag_handler
**inet_diag_table
;
37 struct inet_diag_entry
{
46 #ifdef CONFIG_SOCK_CGROUP_DATA
51 static DEFINE_MUTEX(inet_diag_table_mutex
);
53 static const struct inet_diag_handler
*inet_diag_lock_handler(int proto
)
55 if (proto
< 0 || proto
>= IPPROTO_MAX
) {
56 mutex_lock(&inet_diag_table_mutex
);
57 return ERR_PTR(-ENOENT
);
60 if (!inet_diag_table
[proto
])
61 sock_load_diag_module(AF_INET
, proto
);
63 mutex_lock(&inet_diag_table_mutex
);
64 if (!inet_diag_table
[proto
])
65 return ERR_PTR(-ENOENT
);
67 return inet_diag_table
[proto
];
70 static void inet_diag_unlock_handler(const struct inet_diag_handler
*handler
)
72 mutex_unlock(&inet_diag_table_mutex
);
75 void inet_diag_msg_common_fill(struct inet_diag_msg
*r
, struct sock
*sk
)
77 r
->idiag_family
= sk
->sk_family
;
79 r
->id
.idiag_sport
= htons(sk
->sk_num
);
80 r
->id
.idiag_dport
= sk
->sk_dport
;
81 r
->id
.idiag_if
= sk
->sk_bound_dev_if
;
82 sock_diag_save_cookie(sk
, r
->id
.idiag_cookie
);
84 #if IS_ENABLED(CONFIG_IPV6)
85 if (sk
->sk_family
== AF_INET6
) {
86 *(struct in6_addr
*)r
->id
.idiag_src
= sk
->sk_v6_rcv_saddr
;
87 *(struct in6_addr
*)r
->id
.idiag_dst
= sk
->sk_v6_daddr
;
91 memset(&r
->id
.idiag_src
, 0, sizeof(r
->id
.idiag_src
));
92 memset(&r
->id
.idiag_dst
, 0, sizeof(r
->id
.idiag_dst
));
94 r
->id
.idiag_src
[0] = sk
->sk_rcv_saddr
;
95 r
->id
.idiag_dst
[0] = sk
->sk_daddr
;
98 EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill
);
100 static size_t inet_sk_attr_size(struct sock
*sk
,
101 const struct inet_diag_req_v2
*req
,
104 const struct inet_diag_handler
*handler
;
107 handler
= inet_diag_table
[req
->sdiag_protocol
];
108 if (handler
&& handler
->idiag_get_aux_size
)
109 aux
= handler
->idiag_get_aux_size(sk
, net_admin
);
111 return nla_total_size(sizeof(struct tcp_info
))
112 + nla_total_size(sizeof(struct inet_diag_msg
))
113 + inet_diag_msg_attrs_size()
114 + nla_total_size(sizeof(struct inet_diag_meminfo
))
115 + nla_total_size(SK_MEMINFO_VARS
* sizeof(u32
))
116 + nla_total_size(TCP_CA_NAME_MAX
)
117 + nla_total_size(sizeof(struct tcpvegas_info
))
122 int inet_diag_msg_attrs_fill(struct sock
*sk
, struct sk_buff
*skb
,
123 struct inet_diag_msg
*r
, int ext
,
124 struct user_namespace
*user_ns
,
127 const struct inet_sock
*inet
= inet_sk(sk
);
128 struct inet_diag_sockopt inet_sockopt
;
130 if (nla_put_u8(skb
, INET_DIAG_SHUTDOWN
, sk
->sk_shutdown
))
133 /* IPv6 dual-stack sockets use inet->tos for IPv4 connections,
134 * hence this needs to be included regardless of socket family.
136 if (ext
& (1 << (INET_DIAG_TOS
- 1)))
137 if (nla_put_u8(skb
, INET_DIAG_TOS
, inet
->tos
) < 0)
140 #if IS_ENABLED(CONFIG_IPV6)
141 if (r
->idiag_family
== AF_INET6
) {
142 if (ext
& (1 << (INET_DIAG_TCLASS
- 1)))
143 if (nla_put_u8(skb
, INET_DIAG_TCLASS
,
144 inet6_sk(sk
)->tclass
) < 0)
147 if (((1 << sk
->sk_state
) & (TCPF_LISTEN
| TCPF_CLOSE
)) &&
148 nla_put_u8(skb
, INET_DIAG_SKV6ONLY
, ipv6_only_sock(sk
)))
153 if (net_admin
&& nla_put_u32(skb
, INET_DIAG_MARK
, sk
->sk_mark
))
156 if (ext
& (1 << (INET_DIAG_CLASS_ID
- 1)) ||
157 ext
& (1 << (INET_DIAG_TCLASS
- 1))) {
160 #ifdef CONFIG_SOCK_CGROUP_DATA
161 classid
= sock_cgroup_classid(&sk
->sk_cgrp_data
);
163 /* Fallback to socket priority if class id isn't set.
164 * Classful qdiscs use it as direct reference to class.
165 * For cgroup2 classid is always zero.
168 classid
= sk
->sk_priority
;
170 if (nla_put_u32(skb
, INET_DIAG_CLASS_ID
, classid
))
174 #ifdef CONFIG_SOCK_CGROUP_DATA
175 if (nla_put_u64_64bit(skb
, INET_DIAG_CGROUP_ID
,
176 cgroup_id(sock_cgroup_ptr(&sk
->sk_cgrp_data
)),
181 r
->idiag_uid
= from_kuid_munged(user_ns
, sock_i_uid(sk
));
182 r
->idiag_inode
= sock_i_ino(sk
);
184 memset(&inet_sockopt
, 0, sizeof(inet_sockopt
));
185 inet_sockopt
.recverr
= inet
->recverr
;
186 inet_sockopt
.is_icsk
= inet
->is_icsk
;
187 inet_sockopt
.freebind
= inet
->freebind
;
188 inet_sockopt
.hdrincl
= inet
->hdrincl
;
189 inet_sockopt
.mc_loop
= inet
->mc_loop
;
190 inet_sockopt
.transparent
= inet
->transparent
;
191 inet_sockopt
.mc_all
= inet
->mc_all
;
192 inet_sockopt
.nodefrag
= inet
->nodefrag
;
193 inet_sockopt
.bind_address_no_port
= inet
->bind_address_no_port
;
194 inet_sockopt
.recverr_rfc4884
= inet
->recverr_rfc4884
;
195 inet_sockopt
.defer_connect
= inet
->defer_connect
;
196 if (nla_put(skb
, INET_DIAG_SOCKOPT
, sizeof(inet_sockopt
),
204 EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill
);
206 static int inet_diag_parse_attrs(const struct nlmsghdr
*nlh
, int hdrlen
,
207 struct nlattr
**req_nlas
)
212 nlmsg_for_each_attr(nla
, nlh
, hdrlen
, remaining
) {
213 int type
= nla_type(nla
);
215 if (type
== INET_DIAG_REQ_PROTOCOL
&& nla_len(nla
) != sizeof(u32
))
218 if (type
< __INET_DIAG_REQ_MAX
)
219 req_nlas
[type
] = nla
;
224 static int inet_diag_get_protocol(const struct inet_diag_req_v2
*req
,
225 const struct inet_diag_dump_data
*data
)
227 if (data
->req_nlas
[INET_DIAG_REQ_PROTOCOL
])
228 return nla_get_u32(data
->req_nlas
[INET_DIAG_REQ_PROTOCOL
]);
229 return req
->sdiag_protocol
;
232 #define MAX_DUMP_ALLOC_SIZE (KMALLOC_MAX_SIZE - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
234 int inet_sk_diag_fill(struct sock
*sk
, struct inet_connection_sock
*icsk
,
235 struct sk_buff
*skb
, struct netlink_callback
*cb
,
236 const struct inet_diag_req_v2
*req
,
237 u16 nlmsg_flags
, bool net_admin
)
239 const struct tcp_congestion_ops
*ca_ops
;
240 const struct inet_diag_handler
*handler
;
241 struct inet_diag_dump_data
*cb_data
;
242 int ext
= req
->idiag_ext
;
243 struct inet_diag_msg
*r
;
244 struct nlmsghdr
*nlh
;
249 handler
= inet_diag_table
[inet_diag_get_protocol(req
, cb_data
)];
252 nlh
= nlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
253 cb
->nlh
->nlmsg_type
, sizeof(*r
), nlmsg_flags
);
258 BUG_ON(!sk_fullsock(sk
));
260 inet_diag_msg_common_fill(r
, sk
);
261 r
->idiag_state
= sk
->sk_state
;
263 r
->idiag_retrans
= 0;
265 if (inet_diag_msg_attrs_fill(sk
, skb
, r
, ext
,
266 sk_user_ns(NETLINK_CB(cb
->skb
).sk
),
270 if (ext
& (1 << (INET_DIAG_MEMINFO
- 1))) {
271 struct inet_diag_meminfo minfo
= {
272 .idiag_rmem
= sk_rmem_alloc_get(sk
),
273 .idiag_wmem
= READ_ONCE(sk
->sk_wmem_queued
),
274 .idiag_fmem
= sk
->sk_forward_alloc
,
275 .idiag_tmem
= sk_wmem_alloc_get(sk
),
278 if (nla_put(skb
, INET_DIAG_MEMINFO
, sizeof(minfo
), &minfo
) < 0)
282 if (ext
& (1 << (INET_DIAG_SKMEMINFO
- 1)))
283 if (sock_diag_put_meminfo(sk
, skb
, INET_DIAG_SKMEMINFO
))
287 * RAW sockets might have user-defined protocols assigned,
288 * so report the one supplied on socket creation.
290 if (sk
->sk_type
== SOCK_RAW
) {
291 if (nla_put_u8(skb
, INET_DIAG_PROTOCOL
, sk
->sk_protocol
))
296 handler
->idiag_get_info(sk
, r
, NULL
);
300 if (icsk
->icsk_pending
== ICSK_TIME_RETRANS
||
301 icsk
->icsk_pending
== ICSK_TIME_REO_TIMEOUT
||
302 icsk
->icsk_pending
== ICSK_TIME_LOSS_PROBE
) {
304 r
->idiag_retrans
= icsk
->icsk_retransmits
;
306 jiffies_delta_to_msecs(icsk
->icsk_timeout
- jiffies
);
307 } else if (icsk
->icsk_pending
== ICSK_TIME_PROBE0
) {
309 r
->idiag_retrans
= icsk
->icsk_probes_out
;
311 jiffies_delta_to_msecs(icsk
->icsk_timeout
- jiffies
);
312 } else if (timer_pending(&sk
->sk_timer
)) {
314 r
->idiag_retrans
= icsk
->icsk_probes_out
;
316 jiffies_delta_to_msecs(sk
->sk_timer
.expires
- jiffies
);
319 r
->idiag_expires
= 0;
322 if ((ext
& (1 << (INET_DIAG_INFO
- 1))) && handler
->idiag_info_size
) {
323 attr
= nla_reserve_64bit(skb
, INET_DIAG_INFO
,
324 handler
->idiag_info_size
,
329 info
= nla_data(attr
);
332 if (ext
& (1 << (INET_DIAG_CONG
- 1))) {
336 ca_ops
= READ_ONCE(icsk
->icsk_ca_ops
);
338 err
= nla_put_string(skb
, INET_DIAG_CONG
, ca_ops
->name
);
344 handler
->idiag_get_info(sk
, r
, info
);
346 if (ext
& (1 << (INET_DIAG_INFO
- 1)) && handler
->idiag_get_aux
)
347 if (handler
->idiag_get_aux(sk
, net_admin
, skb
) < 0)
350 if (sk
->sk_state
< TCP_TIME_WAIT
) {
351 union tcp_cc_info info
;
356 ca_ops
= READ_ONCE(icsk
->icsk_ca_ops
);
357 if (ca_ops
&& ca_ops
->get_info
)
358 sz
= ca_ops
->get_info(sk
, ext
, &attr
, &info
);
360 if (sz
&& nla_put(skb
, attr
, sz
, &info
) < 0)
364 /* Keep it at the end for potential retry with a larger skb,
365 * or else do best-effort fitting, which is only done for the
368 if (cb_data
->bpf_stg_diag
) {
369 bool first_nlmsg
= ((unsigned char *)nlh
== skb
->data
);
370 unsigned int prev_min_dump_alloc
;
371 unsigned int total_nla_size
= 0;
372 unsigned int msg_len
;
375 msg_len
= skb_tail_pointer(skb
) - (unsigned char *)nlh
;
376 err
= bpf_sk_storage_diag_put(cb_data
->bpf_stg_diag
, sk
, skb
,
377 INET_DIAG_SK_BPF_STORAGES
,
383 total_nla_size
+= msg_len
;
384 prev_min_dump_alloc
= cb
->min_dump_alloc
;
385 if (total_nla_size
> prev_min_dump_alloc
)
386 cb
->min_dump_alloc
= min_t(u32
, total_nla_size
,
387 MAX_DUMP_ALLOC_SIZE
);
392 if (cb
->min_dump_alloc
> prev_min_dump_alloc
)
393 /* Retry with pskb_expand_head() with
394 * __GFP_DIRECT_RECLAIM
398 WARN_ON_ONCE(total_nla_size
<= prev_min_dump_alloc
);
400 /* Send what we have for this sk
401 * and move on to the next sk in the following
411 nlmsg_cancel(skb
, nlh
);
414 EXPORT_SYMBOL_GPL(inet_sk_diag_fill
);
416 static int inet_twsk_diag_fill(struct sock
*sk
,
418 struct netlink_callback
*cb
,
421 struct inet_timewait_sock
*tw
= inet_twsk(sk
);
422 struct inet_diag_msg
*r
;
423 struct nlmsghdr
*nlh
;
426 nlh
= nlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
,
427 cb
->nlh
->nlmsg_seq
, cb
->nlh
->nlmsg_type
,
428 sizeof(*r
), nlmsg_flags
);
433 BUG_ON(tw
->tw_state
!= TCP_TIME_WAIT
);
435 inet_diag_msg_common_fill(r
, sk
);
436 r
->idiag_retrans
= 0;
438 r
->idiag_state
= tw
->tw_substate
;
440 tmo
= tw
->tw_timer
.expires
- jiffies
;
441 r
->idiag_expires
= jiffies_delta_to_msecs(tmo
);
451 static int inet_req_diag_fill(struct sock
*sk
, struct sk_buff
*skb
,
452 struct netlink_callback
*cb
,
453 u16 nlmsg_flags
, bool net_admin
)
455 struct request_sock
*reqsk
= inet_reqsk(sk
);
456 struct inet_diag_msg
*r
;
457 struct nlmsghdr
*nlh
;
460 nlh
= nlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
461 cb
->nlh
->nlmsg_type
, sizeof(*r
), nlmsg_flags
);
466 inet_diag_msg_common_fill(r
, sk
);
467 r
->idiag_state
= TCP_SYN_RECV
;
469 r
->idiag_retrans
= reqsk
->num_retrans
;
471 BUILD_BUG_ON(offsetof(struct inet_request_sock
, ir_cookie
) !=
472 offsetof(struct sock
, sk_cookie
));
474 tmo
= inet_reqsk(sk
)->rsk_timer
.expires
- jiffies
;
475 r
->idiag_expires
= jiffies_delta_to_msecs(tmo
);
481 if (net_admin
&& nla_put_u32(skb
, INET_DIAG_MARK
,
482 inet_rsk(reqsk
)->ir_mark
))
489 static int sk_diag_fill(struct sock
*sk
, struct sk_buff
*skb
,
490 struct netlink_callback
*cb
,
491 const struct inet_diag_req_v2
*r
,
492 u16 nlmsg_flags
, bool net_admin
)
494 if (sk
->sk_state
== TCP_TIME_WAIT
)
495 return inet_twsk_diag_fill(sk
, skb
, cb
, nlmsg_flags
);
497 if (sk
->sk_state
== TCP_NEW_SYN_RECV
)
498 return inet_req_diag_fill(sk
, skb
, cb
, nlmsg_flags
, net_admin
);
500 return inet_sk_diag_fill(sk
, inet_csk(sk
), skb
, cb
, r
, nlmsg_flags
,
504 struct sock
*inet_diag_find_one_icsk(struct net
*net
,
505 struct inet_hashinfo
*hashinfo
,
506 const struct inet_diag_req_v2
*req
)
511 if (req
->sdiag_family
== AF_INET
)
512 sk
= inet_lookup(net
, hashinfo
, NULL
, 0, req
->id
.idiag_dst
[0],
513 req
->id
.idiag_dport
, req
->id
.idiag_src
[0],
514 req
->id
.idiag_sport
, req
->id
.idiag_if
);
515 #if IS_ENABLED(CONFIG_IPV6)
516 else if (req
->sdiag_family
== AF_INET6
) {
517 if (ipv6_addr_v4mapped((struct in6_addr
*)req
->id
.idiag_dst
) &&
518 ipv6_addr_v4mapped((struct in6_addr
*)req
->id
.idiag_src
))
519 sk
= inet_lookup(net
, hashinfo
, NULL
, 0, req
->id
.idiag_dst
[3],
520 req
->id
.idiag_dport
, req
->id
.idiag_src
[3],
521 req
->id
.idiag_sport
, req
->id
.idiag_if
);
523 sk
= inet6_lookup(net
, hashinfo
, NULL
, 0,
524 (struct in6_addr
*)req
->id
.idiag_dst
,
526 (struct in6_addr
*)req
->id
.idiag_src
,
533 return ERR_PTR(-EINVAL
);
537 return ERR_PTR(-ENOENT
);
539 if (sock_diag_check_cookie(sk
, req
->id
.idiag_cookie
)) {
541 return ERR_PTR(-ENOENT
);
546 EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk
);
548 int inet_diag_dump_one_icsk(struct inet_hashinfo
*hashinfo
,
549 struct netlink_callback
*cb
,
550 const struct inet_diag_req_v2
*req
)
552 struct sk_buff
*in_skb
= cb
->skb
;
553 bool net_admin
= netlink_net_capable(in_skb
, CAP_NET_ADMIN
);
554 struct net
*net
= sock_net(in_skb
->sk
);
559 sk
= inet_diag_find_one_icsk(net
, hashinfo
, req
);
563 rep
= nlmsg_new(inet_sk_attr_size(sk
, req
, net_admin
), GFP_KERNEL
);
569 err
= sk_diag_fill(sk
, rep
, cb
, req
, 0, net_admin
);
571 WARN_ON(err
== -EMSGSIZE
);
575 err
= netlink_unicast(net
->diag_nlsk
, rep
, NETLINK_CB(in_skb
).portid
,
586 EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk
);
588 static int inet_diag_cmd_exact(int cmd
, struct sk_buff
*in_skb
,
589 const struct nlmsghdr
*nlh
,
591 const struct inet_diag_req_v2
*req
)
593 const struct inet_diag_handler
*handler
;
594 struct inet_diag_dump_data dump_data
;
597 memset(&dump_data
, 0, sizeof(dump_data
));
598 err
= inet_diag_parse_attrs(nlh
, hdrlen
, dump_data
.req_nlas
);
602 protocol
= inet_diag_get_protocol(req
, &dump_data
);
604 handler
= inet_diag_lock_handler(protocol
);
605 if (IS_ERR(handler
)) {
606 err
= PTR_ERR(handler
);
607 } else if (cmd
== SOCK_DIAG_BY_FAMILY
) {
608 struct netlink_callback cb
= {
613 err
= handler
->dump_one(&cb
, req
);
614 } else if (cmd
== SOCK_DESTROY
&& handler
->destroy
) {
615 err
= handler
->destroy(in_skb
, req
);
619 inet_diag_unlock_handler(handler
);
624 static int bitstring_match(const __be32
*a1
, const __be32
*a2
, int bits
)
626 int words
= bits
>> 5;
631 if (memcmp(a1
, a2
, words
<< 2))
641 mask
= htonl((0xffffffff) << (32 - bits
));
643 if ((w1
^ w2
) & mask
)
650 static int inet_diag_bc_run(const struct nlattr
*_bc
,
651 const struct inet_diag_entry
*entry
)
653 const void *bc
= nla_data(_bc
);
654 int len
= nla_len(_bc
);
658 const struct inet_diag_bc_op
*op
= bc
;
661 case INET_DIAG_BC_NOP
:
663 case INET_DIAG_BC_JMP
:
666 case INET_DIAG_BC_S_EQ
:
667 yes
= entry
->sport
== op
[1].no
;
669 case INET_DIAG_BC_S_GE
:
670 yes
= entry
->sport
>= op
[1].no
;
672 case INET_DIAG_BC_S_LE
:
673 yes
= entry
->sport
<= op
[1].no
;
675 case INET_DIAG_BC_D_EQ
:
676 yes
= entry
->dport
== op
[1].no
;
678 case INET_DIAG_BC_D_GE
:
679 yes
= entry
->dport
>= op
[1].no
;
681 case INET_DIAG_BC_D_LE
:
682 yes
= entry
->dport
<= op
[1].no
;
684 case INET_DIAG_BC_AUTO
:
685 yes
= !(entry
->userlocks
& SOCK_BINDPORT_LOCK
);
687 case INET_DIAG_BC_S_COND
:
688 case INET_DIAG_BC_D_COND
: {
689 const struct inet_diag_hostcond
*cond
;
692 cond
= (const struct inet_diag_hostcond
*)(op
+ 1);
693 if (cond
->port
!= -1 &&
694 cond
->port
!= (op
->code
== INET_DIAG_BC_S_COND
?
695 entry
->sport
: entry
->dport
)) {
700 if (op
->code
== INET_DIAG_BC_S_COND
)
705 if (cond
->family
!= AF_UNSPEC
&&
706 cond
->family
!= entry
->family
) {
707 if (entry
->family
== AF_INET6
&&
708 cond
->family
== AF_INET
) {
709 if (addr
[0] == 0 && addr
[1] == 0 &&
710 addr
[2] == htonl(0xffff) &&
711 bitstring_match(addr
+ 3,
720 if (cond
->prefix_len
== 0)
722 if (bitstring_match(addr
, cond
->addr
,
728 case INET_DIAG_BC_DEV_COND
: {
731 ifindex
= *((const u32
*)(op
+ 1));
732 if (ifindex
!= entry
->ifindex
)
736 case INET_DIAG_BC_MARK_COND
: {
737 struct inet_diag_markcond
*cond
;
739 cond
= (struct inet_diag_markcond
*)(op
+ 1);
740 if ((entry
->mark
& cond
->mask
) != cond
->mark
)
744 #ifdef CONFIG_SOCK_CGROUP_DATA
745 case INET_DIAG_BC_CGROUP_COND
: {
748 cgroup_id
= get_unaligned((const u64
*)(op
+ 1));
749 if (cgroup_id
!= entry
->cgroup_id
)
767 /* This helper is available for all sockets (ESTABLISH, TIMEWAIT, SYN_RECV)
769 static void entry_fill_addrs(struct inet_diag_entry
*entry
,
770 const struct sock
*sk
)
772 #if IS_ENABLED(CONFIG_IPV6)
773 if (sk
->sk_family
== AF_INET6
) {
774 entry
->saddr
= sk
->sk_v6_rcv_saddr
.s6_addr32
;
775 entry
->daddr
= sk
->sk_v6_daddr
.s6_addr32
;
779 entry
->saddr
= &sk
->sk_rcv_saddr
;
780 entry
->daddr
= &sk
->sk_daddr
;
784 int inet_diag_bc_sk(const struct nlattr
*bc
, struct sock
*sk
)
786 struct inet_sock
*inet
= inet_sk(sk
);
787 struct inet_diag_entry entry
;
792 entry
.family
= sk
->sk_family
;
793 entry_fill_addrs(&entry
, sk
);
794 entry
.sport
= inet
->inet_num
;
795 entry
.dport
= ntohs(inet
->inet_dport
);
796 entry
.ifindex
= sk
->sk_bound_dev_if
;
797 entry
.userlocks
= sk_fullsock(sk
) ? sk
->sk_userlocks
: 0;
799 entry
.mark
= sk
->sk_mark
;
800 else if (sk
->sk_state
== TCP_NEW_SYN_RECV
)
801 entry
.mark
= inet_rsk(inet_reqsk(sk
))->ir_mark
;
804 #ifdef CONFIG_SOCK_CGROUP_DATA
805 entry
.cgroup_id
= sk_fullsock(sk
) ?
806 cgroup_id(sock_cgroup_ptr(&sk
->sk_cgrp_data
)) : 0;
809 return inet_diag_bc_run(bc
, &entry
);
811 EXPORT_SYMBOL_GPL(inet_diag_bc_sk
);
813 static int valid_cc(const void *bc
, int len
, int cc
)
816 const struct inet_diag_bc_op
*op
= bc
;
822 if (op
->yes
< 4 || op
->yes
& 3)
830 /* data is u32 ifindex */
831 static bool valid_devcond(const struct inet_diag_bc_op
*op
, int len
,
834 /* Check ifindex space. */
835 *min_len
+= sizeof(u32
);
841 /* Validate an inet_diag_hostcond. */
842 static bool valid_hostcond(const struct inet_diag_bc_op
*op
, int len
,
845 struct inet_diag_hostcond
*cond
;
848 /* Check hostcond space. */
849 *min_len
+= sizeof(struct inet_diag_hostcond
);
852 cond
= (struct inet_diag_hostcond
*)(op
+ 1);
854 /* Check address family and address length. */
855 switch (cond
->family
) {
860 addr_len
= sizeof(struct in_addr
);
863 addr_len
= sizeof(struct in6_addr
);
868 *min_len
+= addr_len
;
872 /* Check prefix length (in bits) vs address length (in bytes). */
873 if (cond
->prefix_len
> 8 * addr_len
)
879 /* Validate a port comparison operator. */
880 static bool valid_port_comparison(const struct inet_diag_bc_op
*op
,
881 int len
, int *min_len
)
883 /* Port comparisons put the port in a follow-on inet_diag_bc_op. */
884 *min_len
+= sizeof(struct inet_diag_bc_op
);
890 static bool valid_markcond(const struct inet_diag_bc_op
*op
, int len
,
893 *min_len
+= sizeof(struct inet_diag_markcond
);
894 return len
>= *min_len
;
897 #ifdef CONFIG_SOCK_CGROUP_DATA
898 static bool valid_cgroupcond(const struct inet_diag_bc_op
*op
, int len
,
901 *min_len
+= sizeof(u64
);
902 return len
>= *min_len
;
906 static int inet_diag_bc_audit(const struct nlattr
*attr
,
907 const struct sk_buff
*skb
)
909 bool net_admin
= netlink_net_capable(skb
, CAP_NET_ADMIN
);
910 const void *bytecode
, *bc
;
911 int bytecode_len
, len
;
913 if (!attr
|| nla_len(attr
) < sizeof(struct inet_diag_bc_op
))
916 bytecode
= bc
= nla_data(attr
);
917 len
= bytecode_len
= nla_len(attr
);
920 int min_len
= sizeof(struct inet_diag_bc_op
);
921 const struct inet_diag_bc_op
*op
= bc
;
924 case INET_DIAG_BC_S_COND
:
925 case INET_DIAG_BC_D_COND
:
926 if (!valid_hostcond(bc
, len
, &min_len
))
929 case INET_DIAG_BC_DEV_COND
:
930 if (!valid_devcond(bc
, len
, &min_len
))
933 case INET_DIAG_BC_S_EQ
:
934 case INET_DIAG_BC_S_GE
:
935 case INET_DIAG_BC_S_LE
:
936 case INET_DIAG_BC_D_EQ
:
937 case INET_DIAG_BC_D_GE
:
938 case INET_DIAG_BC_D_LE
:
939 if (!valid_port_comparison(bc
, len
, &min_len
))
942 case INET_DIAG_BC_MARK_COND
:
945 if (!valid_markcond(bc
, len
, &min_len
))
948 #ifdef CONFIG_SOCK_CGROUP_DATA
949 case INET_DIAG_BC_CGROUP_COND
:
950 if (!valid_cgroupcond(bc
, len
, &min_len
))
954 case INET_DIAG_BC_AUTO
:
955 case INET_DIAG_BC_JMP
:
956 case INET_DIAG_BC_NOP
:
962 if (op
->code
!= INET_DIAG_BC_NOP
) {
963 if (op
->no
< min_len
|| op
->no
> len
+ 4 || op
->no
& 3)
966 !valid_cc(bytecode
, bytecode_len
, len
- op
->no
))
970 if (op
->yes
< min_len
|| op
->yes
> len
+ 4 || op
->yes
& 3)
975 return len
== 0 ? 0 : -EINVAL
;
978 static void twsk_build_assert(void)
980 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_family
) !=
981 offsetof(struct sock
, sk_family
));
983 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_num
) !=
984 offsetof(struct inet_sock
, inet_num
));
986 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_dport
) !=
987 offsetof(struct inet_sock
, inet_dport
));
989 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_rcv_saddr
) !=
990 offsetof(struct inet_sock
, inet_rcv_saddr
));
992 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_daddr
) !=
993 offsetof(struct inet_sock
, inet_daddr
));
995 #if IS_ENABLED(CONFIG_IPV6)
996 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_v6_rcv_saddr
) !=
997 offsetof(struct sock
, sk_v6_rcv_saddr
));
999 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_v6_daddr
) !=
1000 offsetof(struct sock
, sk_v6_daddr
));
1004 void inet_diag_dump_icsk(struct inet_hashinfo
*hashinfo
, struct sk_buff
*skb
,
1005 struct netlink_callback
*cb
,
1006 const struct inet_diag_req_v2
*r
)
1008 bool net_admin
= netlink_net_capable(cb
->skb
, CAP_NET_ADMIN
);
1009 struct inet_diag_dump_data
*cb_data
= cb
->data
;
1010 struct net
*net
= sock_net(skb
->sk
);
1011 u32 idiag_states
= r
->idiag_states
;
1012 int i
, num
, s_i
, s_num
;
1016 bc
= cb_data
->inet_diag_nla_bc
;
1017 if (idiag_states
& TCPF_SYN_RECV
)
1018 idiag_states
|= TCPF_NEW_SYN_RECV
;
1020 s_num
= num
= cb
->args
[2];
1022 if (cb
->args
[0] == 0) {
1023 if (!(idiag_states
& TCPF_LISTEN
) || r
->id
.idiag_dport
)
1024 goto skip_listen_ht
;
1026 for (i
= s_i
; i
< INET_LHTABLE_SIZE
; i
++) {
1027 struct inet_listen_hashbucket
*ilb
;
1028 struct hlist_nulls_node
*node
;
1031 ilb
= &hashinfo
->listening_hash
[i
];
1032 spin_lock(&ilb
->lock
);
1033 sk_nulls_for_each(sk
, node
, &ilb
->nulls_head
) {
1034 struct inet_sock
*inet
= inet_sk(sk
);
1036 if (!net_eq(sock_net(sk
), net
))
1044 if (r
->sdiag_family
!= AF_UNSPEC
&&
1045 sk
->sk_family
!= r
->sdiag_family
)
1048 if (r
->id
.idiag_sport
!= inet
->inet_sport
&&
1052 if (!inet_diag_bc_sk(bc
, sk
))
1055 if (inet_sk_diag_fill(sk
, inet_csk(sk
), skb
,
1058 spin_unlock(&ilb
->lock
);
1065 spin_unlock(&ilb
->lock
);
1071 s_i
= num
= s_num
= 0;
1074 if (!(idiag_states
& ~TCPF_LISTEN
))
1078 for (i
= s_i
; i
<= hashinfo
->ehash_mask
; i
++) {
1079 struct inet_ehash_bucket
*head
= &hashinfo
->ehash
[i
];
1080 spinlock_t
*lock
= inet_ehash_lockp(hashinfo
, i
);
1081 struct hlist_nulls_node
*node
;
1082 struct sock
*sk_arr
[SKARR_SZ
];
1083 int num_arr
[SKARR_SZ
];
1084 int idx
, accum
, res
;
1086 if (hlist_nulls_empty(&head
->chain
))
1096 sk_nulls_for_each(sk
, node
, &head
->chain
) {
1099 if (!net_eq(sock_net(sk
), net
))
1103 state
= (sk
->sk_state
== TCP_TIME_WAIT
) ?
1104 inet_twsk(sk
)->tw_substate
: sk
->sk_state
;
1105 if (!(idiag_states
& (1 << state
)))
1107 if (r
->sdiag_family
!= AF_UNSPEC
&&
1108 sk
->sk_family
!= r
->sdiag_family
)
1110 if (r
->id
.idiag_sport
!= htons(sk
->sk_num
) &&
1113 if (r
->id
.idiag_dport
!= sk
->sk_dport
&&
1116 twsk_build_assert();
1118 if (!inet_diag_bc_sk(bc
, sk
))
1121 if (!refcount_inc_not_zero(&sk
->sk_refcnt
))
1124 num_arr
[accum
] = num
;
1126 if (++accum
== SKARR_SZ
)
1131 spin_unlock_bh(lock
);
1133 for (idx
= 0; idx
< accum
; idx
++) {
1135 res
= sk_diag_fill(sk_arr
[idx
], skb
, cb
, r
,
1136 NLM_F_MULTI
, net_admin
);
1140 sock_gen_put(sk_arr
[idx
]);
1145 if (accum
== SKARR_SZ
) {
1157 EXPORT_SYMBOL_GPL(inet_diag_dump_icsk
);
1159 static int __inet_diag_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
,
1160 const struct inet_diag_req_v2
*r
)
1162 struct inet_diag_dump_data
*cb_data
= cb
->data
;
1163 const struct inet_diag_handler
*handler
;
1164 u32 prev_min_dump_alloc
;
1165 int protocol
, err
= 0;
1167 protocol
= inet_diag_get_protocol(r
, cb_data
);
1170 prev_min_dump_alloc
= cb
->min_dump_alloc
;
1171 handler
= inet_diag_lock_handler(protocol
);
1172 if (!IS_ERR(handler
))
1173 handler
->dump(skb
, cb
, r
);
1175 err
= PTR_ERR(handler
);
1176 inet_diag_unlock_handler(handler
);
1178 /* The skb is not large enough to fit one sk info and
1179 * inet_sk_diag_fill() has requested for a larger skb.
1181 if (!skb
->len
&& cb
->min_dump_alloc
> prev_min_dump_alloc
) {
1182 err
= pskb_expand_head(skb
, 0, cb
->min_dump_alloc
, GFP_KERNEL
);
1187 return err
? : skb
->len
;
1190 static int inet_diag_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1192 return __inet_diag_dump(skb
, cb
, nlmsg_data(cb
->nlh
));
1195 static int __inet_diag_dump_start(struct netlink_callback
*cb
, int hdrlen
)
1197 const struct nlmsghdr
*nlh
= cb
->nlh
;
1198 struct inet_diag_dump_data
*cb_data
;
1199 struct sk_buff
*skb
= cb
->skb
;
1203 cb_data
= kzalloc(sizeof(*cb_data
), GFP_KERNEL
);
1207 err
= inet_diag_parse_attrs(nlh
, hdrlen
, cb_data
->req_nlas
);
1212 nla
= cb_data
->inet_diag_nla_bc
;
1214 err
= inet_diag_bc_audit(nla
, skb
);
1221 nla
= cb_data
->inet_diag_nla_bpf_stgs
;
1223 struct bpf_sk_storage_diag
*bpf_stg_diag
;
1225 bpf_stg_diag
= bpf_sk_storage_diag_alloc(nla
);
1226 if (IS_ERR(bpf_stg_diag
)) {
1228 return PTR_ERR(bpf_stg_diag
);
1230 cb_data
->bpf_stg_diag
= bpf_stg_diag
;
1237 static int inet_diag_dump_start(struct netlink_callback
*cb
)
1239 return __inet_diag_dump_start(cb
, sizeof(struct inet_diag_req_v2
));
1242 static int inet_diag_dump_start_compat(struct netlink_callback
*cb
)
1244 return __inet_diag_dump_start(cb
, sizeof(struct inet_diag_req
));
1247 static int inet_diag_dump_done(struct netlink_callback
*cb
)
1249 struct inet_diag_dump_data
*cb_data
= cb
->data
;
1251 bpf_sk_storage_diag_free(cb_data
->bpf_stg_diag
);
1257 static int inet_diag_type2proto(int type
)
1260 case TCPDIAG_GETSOCK
:
1262 case DCCPDIAG_GETSOCK
:
1263 return IPPROTO_DCCP
;
1269 static int inet_diag_dump_compat(struct sk_buff
*skb
,
1270 struct netlink_callback
*cb
)
1272 struct inet_diag_req
*rc
= nlmsg_data(cb
->nlh
);
1273 struct inet_diag_req_v2 req
;
1275 req
.sdiag_family
= AF_UNSPEC
; /* compatibility */
1276 req
.sdiag_protocol
= inet_diag_type2proto(cb
->nlh
->nlmsg_type
);
1277 req
.idiag_ext
= rc
->idiag_ext
;
1278 req
.idiag_states
= rc
->idiag_states
;
1281 return __inet_diag_dump(skb
, cb
, &req
);
1284 static int inet_diag_get_exact_compat(struct sk_buff
*in_skb
,
1285 const struct nlmsghdr
*nlh
)
1287 struct inet_diag_req
*rc
= nlmsg_data(nlh
);
1288 struct inet_diag_req_v2 req
;
1290 req
.sdiag_family
= rc
->idiag_family
;
1291 req
.sdiag_protocol
= inet_diag_type2proto(nlh
->nlmsg_type
);
1292 req
.idiag_ext
= rc
->idiag_ext
;
1293 req
.idiag_states
= rc
->idiag_states
;
1296 return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY
, in_skb
, nlh
,
1297 sizeof(struct inet_diag_req
), &req
);
1300 static int inet_diag_rcv_msg_compat(struct sk_buff
*skb
, struct nlmsghdr
*nlh
)
1302 int hdrlen
= sizeof(struct inet_diag_req
);
1303 struct net
*net
= sock_net(skb
->sk
);
1305 if (nlh
->nlmsg_type
>= INET_DIAG_GETSOCK_MAX
||
1306 nlmsg_len(nlh
) < hdrlen
)
1309 if (nlh
->nlmsg_flags
& NLM_F_DUMP
) {
1310 struct netlink_dump_control c
= {
1311 .start
= inet_diag_dump_start_compat
,
1312 .done
= inet_diag_dump_done
,
1313 .dump
= inet_diag_dump_compat
,
1315 return netlink_dump_start(net
->diag_nlsk
, skb
, nlh
, &c
);
1318 return inet_diag_get_exact_compat(skb
, nlh
);
1321 static int inet_diag_handler_cmd(struct sk_buff
*skb
, struct nlmsghdr
*h
)
1323 int hdrlen
= sizeof(struct inet_diag_req_v2
);
1324 struct net
*net
= sock_net(skb
->sk
);
1326 if (nlmsg_len(h
) < hdrlen
)
1329 if (h
->nlmsg_type
== SOCK_DIAG_BY_FAMILY
&&
1330 h
->nlmsg_flags
& NLM_F_DUMP
) {
1331 struct netlink_dump_control c
= {
1332 .start
= inet_diag_dump_start
,
1333 .done
= inet_diag_dump_done
,
1334 .dump
= inet_diag_dump
,
1336 return netlink_dump_start(net
->diag_nlsk
, skb
, h
, &c
);
1339 return inet_diag_cmd_exact(h
->nlmsg_type
, skb
, h
, hdrlen
,
1344 int inet_diag_handler_get_info(struct sk_buff
*skb
, struct sock
*sk
)
1346 const struct inet_diag_handler
*handler
;
1347 struct nlmsghdr
*nlh
;
1348 struct nlattr
*attr
;
1349 struct inet_diag_msg
*r
;
1353 nlh
= nlmsg_put(skb
, 0, 0, SOCK_DIAG_BY_FAMILY
, sizeof(*r
), 0);
1357 r
= nlmsg_data(nlh
);
1358 memset(r
, 0, sizeof(*r
));
1359 inet_diag_msg_common_fill(r
, sk
);
1360 if (sk
->sk_type
== SOCK_DGRAM
|| sk
->sk_type
== SOCK_STREAM
)
1361 r
->id
.idiag_sport
= inet_sk(sk
)->inet_sport
;
1362 r
->idiag_state
= sk
->sk_state
;
1364 if ((err
= nla_put_u8(skb
, INET_DIAG_PROTOCOL
, sk
->sk_protocol
))) {
1365 nlmsg_cancel(skb
, nlh
);
1369 handler
= inet_diag_lock_handler(sk
->sk_protocol
);
1370 if (IS_ERR(handler
)) {
1371 inet_diag_unlock_handler(handler
);
1372 nlmsg_cancel(skb
, nlh
);
1373 return PTR_ERR(handler
);
1376 attr
= handler
->idiag_info_size
1377 ? nla_reserve_64bit(skb
, INET_DIAG_INFO
,
1378 handler
->idiag_info_size
,
1382 info
= nla_data(attr
);
1384 handler
->idiag_get_info(sk
, r
, info
);
1385 inet_diag_unlock_handler(handler
);
1387 nlmsg_end(skb
, nlh
);
1391 static const struct sock_diag_handler inet_diag_handler
= {
1393 .dump
= inet_diag_handler_cmd
,
1394 .get_info
= inet_diag_handler_get_info
,
1395 .destroy
= inet_diag_handler_cmd
,
1398 static const struct sock_diag_handler inet6_diag_handler
= {
1400 .dump
= inet_diag_handler_cmd
,
1401 .get_info
= inet_diag_handler_get_info
,
1402 .destroy
= inet_diag_handler_cmd
,
1405 int inet_diag_register(const struct inet_diag_handler
*h
)
1407 const __u16 type
= h
->idiag_type
;
1410 if (type
>= IPPROTO_MAX
)
1413 mutex_lock(&inet_diag_table_mutex
);
1415 if (!inet_diag_table
[type
]) {
1416 inet_diag_table
[type
] = h
;
1419 mutex_unlock(&inet_diag_table_mutex
);
1423 EXPORT_SYMBOL_GPL(inet_diag_register
);
1425 void inet_diag_unregister(const struct inet_diag_handler
*h
)
1427 const __u16 type
= h
->idiag_type
;
1429 if (type
>= IPPROTO_MAX
)
1432 mutex_lock(&inet_diag_table_mutex
);
1433 inet_diag_table
[type
] = NULL
;
1434 mutex_unlock(&inet_diag_table_mutex
);
1436 EXPORT_SYMBOL_GPL(inet_diag_unregister
);
1438 static int __init
inet_diag_init(void)
1440 const int inet_diag_table_size
= (IPPROTO_MAX
*
1441 sizeof(struct inet_diag_handler
*));
1444 inet_diag_table
= kzalloc(inet_diag_table_size
, GFP_KERNEL
);
1445 if (!inet_diag_table
)
1448 err
= sock_diag_register(&inet_diag_handler
);
1452 err
= sock_diag_register(&inet6_diag_handler
);
1456 sock_diag_register_inet_compat(inet_diag_rcv_msg_compat
);
1461 sock_diag_unregister(&inet_diag_handler
);
1463 kfree(inet_diag_table
);
1467 static void __exit
inet_diag_exit(void)
1469 sock_diag_unregister(&inet6_diag_handler
);
1470 sock_diag_unregister(&inet_diag_handler
);
1471 sock_diag_unregister_inet_compat(inet_diag_rcv_msg_compat
);
1472 kfree(inet_diag_table
);
1475 module_init(inet_diag_init
);
1476 module_exit(inet_diag_exit
);
1477 MODULE_LICENSE("GPL");
1478 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK
, NETLINK_SOCK_DIAG
, 2 /* AF_INET */);
1479 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK
, NETLINK_SOCK_DIAG
, 10 /* AF_INET6 */);