1 // SPDX-License-Identifier: GPL-2.0
3 #include <linux/types.h>
4 #include <linux/netfilter.h>
5 #include <linux/module.h>
6 #include <linux/slab.h>
7 #include <linux/mutex.h>
8 #include <linux/vmalloc.h>
9 #include <linux/stddef.h>
10 #include <linux/err.h>
11 #include <linux/percpu.h>
12 #include <linux/notifier.h>
13 #include <linux/kernel.h>
14 #include <linux/netdevice.h>
16 #include <net/netfilter/nf_conntrack.h>
17 #include <net/netfilter/nf_conntrack_l4proto.h>
18 #include <net/netfilter/nf_conntrack_core.h>
19 #include <net/netfilter/nf_conntrack_bridge.h>
20 #include <net/netfilter/nf_log.h>
23 #include <linux/icmp.h>
24 #include <linux/sysctl.h>
25 #include <net/route.h>
28 #include <linux/netfilter_ipv4.h>
29 #include <linux/netfilter_ipv6.h>
30 #include <linux/netfilter_ipv6/ip6_tables.h>
31 #include <net/netfilter/nf_conntrack_helper.h>
32 #include <net/netfilter/nf_conntrack_zones.h>
33 #include <net/netfilter/nf_conntrack_seqadj.h>
34 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
35 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
36 #include <net/netfilter/nf_nat_helper.h>
37 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
38 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
40 #include <linux/ipv6.h>
41 #include <linux/in6.h>
43 #include <net/inet_frag.h>
45 extern unsigned int nf_conntrack_net_id
;
47 static DEFINE_MUTEX(nf_ct_proto_mutex
);
51 void nf_l4proto_log_invalid(const struct sk_buff
*skb
,
59 if (net
->ct
.sysctl_log_invalid
!= protonum
&&
60 net
->ct
.sysctl_log_invalid
!= IPPROTO_RAW
)
67 nf_log_packet(net
, pf
, 0, skb
, NULL
, NULL
, NULL
,
68 "nf_ct_proto_%d: %pV ", protonum
, &vaf
);
71 EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid
);
74 void nf_ct_l4proto_log_invalid(const struct sk_buff
*skb
,
75 const struct nf_conn
*ct
,
83 if (likely(net
->ct
.sysctl_log_invalid
== 0))
90 nf_l4proto_log_invalid(skb
, net
, nf_ct_l3num(ct
),
91 nf_ct_protonum(ct
), "%pV", &vaf
);
94 EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid
);
97 const struct nf_conntrack_l4proto
*nf_ct_l4proto_find(u8 l4proto
)
100 case IPPROTO_UDP
: return &nf_conntrack_l4proto_udp
;
101 case IPPROTO_TCP
: return &nf_conntrack_l4proto_tcp
;
102 case IPPROTO_ICMP
: return &nf_conntrack_l4proto_icmp
;
103 #ifdef CONFIG_NF_CT_PROTO_DCCP
104 case IPPROTO_DCCP
: return &nf_conntrack_l4proto_dccp
;
106 #ifdef CONFIG_NF_CT_PROTO_SCTP
107 case IPPROTO_SCTP
: return &nf_conntrack_l4proto_sctp
;
109 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
110 case IPPROTO_UDPLITE
: return &nf_conntrack_l4proto_udplite
;
112 #ifdef CONFIG_NF_CT_PROTO_GRE
113 case IPPROTO_GRE
: return &nf_conntrack_l4proto_gre
;
115 #if IS_ENABLED(CONFIG_IPV6)
116 case IPPROTO_ICMPV6
: return &nf_conntrack_l4proto_icmpv6
;
117 #endif /* CONFIG_IPV6 */
120 return &nf_conntrack_l4proto_generic
;
122 EXPORT_SYMBOL_GPL(nf_ct_l4proto_find
);
124 unsigned int nf_confirm(struct sk_buff
*skb
, unsigned int protoff
,
125 struct nf_conn
*ct
, enum ip_conntrack_info ctinfo
)
127 const struct nf_conn_help
*help
;
129 help
= nfct_help(ct
);
131 const struct nf_conntrack_helper
*helper
;
134 /* rcu_read_lock()ed by nf_hook_thresh */
135 helper
= rcu_dereference(help
->helper
);
137 ret
= helper
->help(skb
,
140 if (ret
!= NF_ACCEPT
)
145 if (test_bit(IPS_SEQ_ADJUST_BIT
, &ct
->status
) &&
146 !nf_is_loopback_packet(skb
)) {
147 if (!nf_ct_seq_adjust(skb
, ct
, ctinfo
, protoff
)) {
148 NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct
), drop
);
153 /* We've seen it coming out the other side: confirm it */
154 return nf_conntrack_confirm(skb
);
156 EXPORT_SYMBOL_GPL(nf_confirm
);
158 static unsigned int ipv4_confirm(void *priv
,
160 const struct nf_hook_state
*state
)
162 enum ip_conntrack_info ctinfo
;
165 ct
= nf_ct_get(skb
, &ctinfo
);
166 if (!ct
|| ctinfo
== IP_CT_RELATED_REPLY
)
167 return nf_conntrack_confirm(skb
);
169 return nf_confirm(skb
,
170 skb_network_offset(skb
) + ip_hdrlen(skb
),
174 static unsigned int ipv4_conntrack_in(void *priv
,
176 const struct nf_hook_state
*state
)
178 return nf_conntrack_in(skb
, state
);
181 static unsigned int ipv4_conntrack_local(void *priv
,
183 const struct nf_hook_state
*state
)
185 if (ip_is_fragment(ip_hdr(skb
))) { /* IP_NODEFRAG setsockopt set */
186 enum ip_conntrack_info ctinfo
;
187 struct nf_conn
*tmpl
;
189 tmpl
= nf_ct_get(skb
, &ctinfo
);
190 if (tmpl
&& nf_ct_is_template(tmpl
)) {
191 /* when skipping ct, clear templates to avoid fooling
192 * later targets/matches
200 return nf_conntrack_in(skb
, state
);
203 /* Connection tracking may drop packets, but never alters them, so
204 * make it the first hook.
206 static const struct nf_hook_ops ipv4_conntrack_ops
[] = {
208 .hook
= ipv4_conntrack_in
,
210 .hooknum
= NF_INET_PRE_ROUTING
,
211 .priority
= NF_IP_PRI_CONNTRACK
,
214 .hook
= ipv4_conntrack_local
,
216 .hooknum
= NF_INET_LOCAL_OUT
,
217 .priority
= NF_IP_PRI_CONNTRACK
,
220 .hook
= ipv4_confirm
,
222 .hooknum
= NF_INET_POST_ROUTING
,
223 .priority
= NF_IP_PRI_CONNTRACK_CONFIRM
,
226 .hook
= ipv4_confirm
,
228 .hooknum
= NF_INET_LOCAL_IN
,
229 .priority
= NF_IP_PRI_CONNTRACK_CONFIRM
,
233 /* Fast function for those who don't want to parse /proc (and I don't
235 * Reversing the socket's dst/src point of view gives us the reply
239 getorigdst(struct sock
*sk
, int optval
, void __user
*user
, int *len
)
241 const struct inet_sock
*inet
= inet_sk(sk
);
242 const struct nf_conntrack_tuple_hash
*h
;
243 struct nf_conntrack_tuple tuple
;
245 memset(&tuple
, 0, sizeof(tuple
));
248 tuple
.src
.u3
.ip
= inet
->inet_rcv_saddr
;
249 tuple
.src
.u
.tcp
.port
= inet
->inet_sport
;
250 tuple
.dst
.u3
.ip
= inet
->inet_daddr
;
251 tuple
.dst
.u
.tcp
.port
= inet
->inet_dport
;
252 tuple
.src
.l3num
= PF_INET
;
253 tuple
.dst
.protonum
= sk
->sk_protocol
;
256 /* We only do TCP and SCTP at the moment: is there a better way? */
257 if (tuple
.dst
.protonum
!= IPPROTO_TCP
&&
258 tuple
.dst
.protonum
!= IPPROTO_SCTP
) {
259 pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
263 if ((unsigned int)*len
< sizeof(struct sockaddr_in
)) {
264 pr_debug("SO_ORIGINAL_DST: len %d not %zu\n",
265 *len
, sizeof(struct sockaddr_in
));
269 h
= nf_conntrack_find_get(sock_net(sk
), &nf_ct_zone_dflt
, &tuple
);
271 struct sockaddr_in sin
;
272 struct nf_conn
*ct
= nf_ct_tuplehash_to_ctrack(h
);
274 sin
.sin_family
= AF_INET
;
275 sin
.sin_port
= ct
->tuplehash
[IP_CT_DIR_ORIGINAL
]
276 .tuple
.dst
.u
.tcp
.port
;
277 sin
.sin_addr
.s_addr
= ct
->tuplehash
[IP_CT_DIR_ORIGINAL
]
279 memset(sin
.sin_zero
, 0, sizeof(sin
.sin_zero
));
281 pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
282 &sin
.sin_addr
.s_addr
, ntohs(sin
.sin_port
));
284 if (copy_to_user(user
, &sin
, sizeof(sin
)) != 0)
289 pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
290 &tuple
.src
.u3
.ip
, ntohs(tuple
.src
.u
.tcp
.port
),
291 &tuple
.dst
.u3
.ip
, ntohs(tuple
.dst
.u
.tcp
.port
));
295 static struct nf_sockopt_ops so_getorigdst
= {
297 .get_optmin
= SO_ORIGINAL_DST
,
298 .get_optmax
= SO_ORIGINAL_DST
+ 1,
300 .owner
= THIS_MODULE
,
303 #if IS_ENABLED(CONFIG_IPV6)
305 ipv6_getorigdst(struct sock
*sk
, int optval
, void __user
*user
, int *len
)
307 struct nf_conntrack_tuple tuple
= { .src
.l3num
= NFPROTO_IPV6
};
308 const struct ipv6_pinfo
*inet6
= inet6_sk(sk
);
309 const struct inet_sock
*inet
= inet_sk(sk
);
310 const struct nf_conntrack_tuple_hash
*h
;
311 struct sockaddr_in6 sin6
;
317 tuple
.src
.u3
.in6
= sk
->sk_v6_rcv_saddr
;
318 tuple
.src
.u
.tcp
.port
= inet
->inet_sport
;
319 tuple
.dst
.u3
.in6
= sk
->sk_v6_daddr
;
320 tuple
.dst
.u
.tcp
.port
= inet
->inet_dport
;
321 tuple
.dst
.protonum
= sk
->sk_protocol
;
322 bound_dev_if
= sk
->sk_bound_dev_if
;
323 flow_label
= inet6
->flow_label
;
326 if (tuple
.dst
.protonum
!= IPPROTO_TCP
&&
327 tuple
.dst
.protonum
!= IPPROTO_SCTP
)
330 if (*len
< 0 || (unsigned int)*len
< sizeof(sin6
))
333 h
= nf_conntrack_find_get(sock_net(sk
), &nf_ct_zone_dflt
, &tuple
);
335 pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
336 &tuple
.src
.u3
.ip6
, ntohs(tuple
.src
.u
.tcp
.port
),
337 &tuple
.dst
.u3
.ip6
, ntohs(tuple
.dst
.u
.tcp
.port
));
341 ct
= nf_ct_tuplehash_to_ctrack(h
);
343 sin6
.sin6_family
= AF_INET6
;
344 sin6
.sin6_port
= ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
.dst
.u
.tcp
.port
;
345 sin6
.sin6_flowinfo
= flow_label
& IPV6_FLOWINFO_MASK
;
346 memcpy(&sin6
.sin6_addr
,
347 &ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
.dst
.u3
.in6
,
348 sizeof(sin6
.sin6_addr
));
351 sin6
.sin6_scope_id
= ipv6_iface_scope_id(&sin6
.sin6_addr
, bound_dev_if
);
352 return copy_to_user(user
, &sin6
, sizeof(sin6
)) ? -EFAULT
: 0;
355 static struct nf_sockopt_ops so_getorigdst6
= {
357 .get_optmin
= IP6T_SO_ORIGINAL_DST
,
358 .get_optmax
= IP6T_SO_ORIGINAL_DST
+ 1,
359 .get
= ipv6_getorigdst
,
360 .owner
= THIS_MODULE
,
363 static unsigned int ipv6_confirm(void *priv
,
365 const struct nf_hook_state
*state
)
368 enum ip_conntrack_info ctinfo
;
369 unsigned char pnum
= ipv6_hdr(skb
)->nexthdr
;
373 ct
= nf_ct_get(skb
, &ctinfo
);
374 if (!ct
|| ctinfo
== IP_CT_RELATED_REPLY
)
375 return nf_conntrack_confirm(skb
);
377 protoff
= ipv6_skip_exthdr(skb
, sizeof(struct ipv6hdr
), &pnum
,
379 if (protoff
< 0 || (frag_off
& htons(~0x7)) != 0) {
380 pr_debug("proto header not found\n");
381 return nf_conntrack_confirm(skb
);
384 return nf_confirm(skb
, protoff
, ct
, ctinfo
);
387 static unsigned int ipv6_conntrack_in(void *priv
,
389 const struct nf_hook_state
*state
)
391 return nf_conntrack_in(skb
, state
);
394 static unsigned int ipv6_conntrack_local(void *priv
,
396 const struct nf_hook_state
*state
)
398 return nf_conntrack_in(skb
, state
);
401 static const struct nf_hook_ops ipv6_conntrack_ops
[] = {
403 .hook
= ipv6_conntrack_in
,
405 .hooknum
= NF_INET_PRE_ROUTING
,
406 .priority
= NF_IP6_PRI_CONNTRACK
,
409 .hook
= ipv6_conntrack_local
,
411 .hooknum
= NF_INET_LOCAL_OUT
,
412 .priority
= NF_IP6_PRI_CONNTRACK
,
415 .hook
= ipv6_confirm
,
417 .hooknum
= NF_INET_POST_ROUTING
,
418 .priority
= NF_IP6_PRI_LAST
,
421 .hook
= ipv6_confirm
,
423 .hooknum
= NF_INET_LOCAL_IN
,
424 .priority
= NF_IP6_PRI_LAST
- 1,
429 static int nf_ct_tcp_fixup(struct nf_conn
*ct
, void *_nfproto
)
431 u8 nfproto
= (unsigned long)_nfproto
;
433 if (nf_ct_l3num(ct
) != nfproto
)
436 if (nf_ct_protonum(ct
) == IPPROTO_TCP
&&
437 ct
->proto
.tcp
.state
== TCP_CONNTRACK_ESTABLISHED
) {
438 ct
->proto
.tcp
.seen
[0].td_maxwin
= 0;
439 ct
->proto
.tcp
.seen
[1].td_maxwin
= 0;
445 static struct nf_ct_bridge_info
*nf_ct_bridge_info
;
447 static int nf_ct_netns_do_get(struct net
*net
, u8 nfproto
)
449 struct nf_conntrack_net
*cnet
= net_generic(net
, nf_conntrack_net_id
);
450 bool fixup_needed
= false, retry
= true;
453 mutex_lock(&nf_ct_proto_mutex
);
458 if (cnet
->users4
> 1)
460 err
= nf_defrag_ipv4_enable(net
);
466 err
= nf_register_net_hooks(net
, ipv4_conntrack_ops
,
467 ARRAY_SIZE(ipv4_conntrack_ops
));
473 #if IS_ENABLED(CONFIG_IPV6)
476 if (cnet
->users6
> 1)
478 err
= nf_defrag_ipv6_enable(net
);
484 err
= nf_register_net_hooks(net
, ipv6_conntrack_ops
,
485 ARRAY_SIZE(ipv6_conntrack_ops
));
493 if (!nf_ct_bridge_info
) {
498 mutex_unlock(&nf_ct_proto_mutex
);
499 request_module("nf_conntrack_bridge");
503 if (!try_module_get(nf_ct_bridge_info
->me
)) {
507 cnet
->users_bridge
++;
508 if (cnet
->users_bridge
> 1)
511 err
= nf_register_net_hooks(net
, nf_ct_bridge_info
->ops
,
512 nf_ct_bridge_info
->ops_size
);
514 cnet
->users_bridge
= 0;
523 mutex_unlock(&nf_ct_proto_mutex
);
526 nf_ct_iterate_cleanup_net(net
, nf_ct_tcp_fixup
,
527 (void *)(unsigned long)nfproto
, 0, 0);
532 static void nf_ct_netns_do_put(struct net
*net
, u8 nfproto
)
534 struct nf_conntrack_net
*cnet
= net_generic(net
, nf_conntrack_net_id
);
536 mutex_lock(&nf_ct_proto_mutex
);
539 if (cnet
->users4
&& (--cnet
->users4
== 0))
540 nf_unregister_net_hooks(net
, ipv4_conntrack_ops
,
541 ARRAY_SIZE(ipv4_conntrack_ops
));
543 #if IS_ENABLED(CONFIG_IPV6)
545 if (cnet
->users6
&& (--cnet
->users6
== 0))
546 nf_unregister_net_hooks(net
, ipv6_conntrack_ops
,
547 ARRAY_SIZE(ipv6_conntrack_ops
));
551 if (!nf_ct_bridge_info
)
553 if (cnet
->users_bridge
&& (--cnet
->users_bridge
== 0))
554 nf_unregister_net_hooks(net
, nf_ct_bridge_info
->ops
,
555 nf_ct_bridge_info
->ops_size
);
557 module_put(nf_ct_bridge_info
->me
);
560 mutex_unlock(&nf_ct_proto_mutex
);
563 static int nf_ct_netns_inet_get(struct net
*net
)
567 err
= nf_ct_netns_do_get(net
, NFPROTO_IPV4
);
568 #if IS_ENABLED(CONFIG_IPV6)
571 err
= nf_ct_netns_do_get(net
, NFPROTO_IPV6
);
577 nf_ct_netns_put(net
, NFPROTO_IPV4
);
583 int nf_ct_netns_get(struct net
*net
, u8 nfproto
)
589 err
= nf_ct_netns_inet_get(net
);
592 err
= nf_ct_netns_do_get(net
, NFPROTO_BRIDGE
);
596 err
= nf_ct_netns_inet_get(net
);
598 nf_ct_netns_put(net
, NFPROTO_BRIDGE
);
603 err
= nf_ct_netns_do_get(net
, nfproto
);
608 EXPORT_SYMBOL_GPL(nf_ct_netns_get
);
610 void nf_ct_netns_put(struct net
*net
, uint8_t nfproto
)
614 nf_ct_netns_do_put(net
, NFPROTO_BRIDGE
);
617 nf_ct_netns_do_put(net
, NFPROTO_IPV4
);
618 nf_ct_netns_do_put(net
, NFPROTO_IPV6
);
621 nf_ct_netns_do_put(net
, nfproto
);
625 EXPORT_SYMBOL_GPL(nf_ct_netns_put
);
627 void nf_ct_bridge_register(struct nf_ct_bridge_info
*info
)
629 WARN_ON(nf_ct_bridge_info
);
630 mutex_lock(&nf_ct_proto_mutex
);
631 nf_ct_bridge_info
= info
;
632 mutex_unlock(&nf_ct_proto_mutex
);
634 EXPORT_SYMBOL_GPL(nf_ct_bridge_register
);
636 void nf_ct_bridge_unregister(struct nf_ct_bridge_info
*info
)
638 WARN_ON(!nf_ct_bridge_info
);
639 mutex_lock(&nf_ct_proto_mutex
);
640 nf_ct_bridge_info
= NULL
;
641 mutex_unlock(&nf_ct_proto_mutex
);
643 EXPORT_SYMBOL_GPL(nf_ct_bridge_unregister
);
645 int nf_conntrack_proto_init(void)
649 ret
= nf_register_sockopt(&so_getorigdst
);
653 #if IS_ENABLED(CONFIG_IPV6)
654 ret
= nf_register_sockopt(&so_getorigdst6
);
656 goto cleanup_sockopt
;
661 #if IS_ENABLED(CONFIG_IPV6)
663 nf_unregister_sockopt(&so_getorigdst6
);
668 void nf_conntrack_proto_fini(void)
670 nf_unregister_sockopt(&so_getorigdst
);
671 #if IS_ENABLED(CONFIG_IPV6)
672 nf_unregister_sockopt(&so_getorigdst6
);
676 void nf_conntrack_proto_pernet_init(struct net
*net
)
678 nf_conntrack_generic_init_net(net
);
679 nf_conntrack_udp_init_net(net
);
680 nf_conntrack_tcp_init_net(net
);
681 nf_conntrack_icmp_init_net(net
);
682 #if IS_ENABLED(CONFIG_IPV6)
683 nf_conntrack_icmpv6_init_net(net
);
685 #ifdef CONFIG_NF_CT_PROTO_DCCP
686 nf_conntrack_dccp_init_net(net
);
688 #ifdef CONFIG_NF_CT_PROTO_SCTP
689 nf_conntrack_sctp_init_net(net
);
691 #ifdef CONFIG_NF_CT_PROTO_GRE
692 nf_conntrack_gre_init_net(net
);
696 void nf_conntrack_proto_pernet_fini(struct net
*net
)
698 #ifdef CONFIG_NF_CT_PROTO_GRE
699 nf_ct_gre_keymap_flush(net
);
703 module_param_call(hashsize
, nf_conntrack_set_hashsize
, param_get_uint
,
704 &nf_conntrack_htable_size
, 0600);
706 MODULE_ALIAS("ip_conntrack");
707 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET
));
708 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6
));
709 MODULE_LICENSE("GPL");