2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
9 * Development of this code funded by Astaro AG (http://www.astaro.com/)
12 #include <linux/kernel.h>
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/netlink.h>
16 #include <linux/netfilter.h>
17 #include <linux/netfilter/nf_tables.h>
18 #include <net/netfilter/nf_tables.h>
19 #include <net/netfilter/nf_conntrack.h>
20 #include <net/netfilter/nf_conntrack_acct.h>
21 #include <net/netfilter/nf_conntrack_tuple.h>
22 #include <net/netfilter/nf_conntrack_helper.h>
23 #include <net/netfilter/nf_conntrack_ecache.h>
24 #include <net/netfilter/nf_conntrack_labels.h>
27 enum nft_ct_keys key
:8;
28 enum ip_conntrack_dir dir
:8;
30 enum nft_registers dreg
:8;
31 enum nft_registers sreg
:8;
35 #ifdef CONFIG_NF_CONNTRACK_ZONES
36 static DEFINE_PER_CPU(struct nf_conn
*, nft_ct_pcpu_template
);
37 static unsigned int nft_ct_pcpu_template_refcnt __read_mostly
;
40 static u64
nft_ct_get_eval_counter(const struct nf_conn_counter
*c
,
42 enum ip_conntrack_dir d
)
44 if (d
< IP_CT_DIR_MAX
)
45 return k
== NFT_CT_BYTES
? atomic64_read(&c
[d
].bytes
) :
46 atomic64_read(&c
[d
].packets
);
48 return nft_ct_get_eval_counter(c
, k
, IP_CT_DIR_ORIGINAL
) +
49 nft_ct_get_eval_counter(c
, k
, IP_CT_DIR_REPLY
);
52 static void nft_ct_get_eval(const struct nft_expr
*expr
,
53 struct nft_regs
*regs
,
54 const struct nft_pktinfo
*pkt
)
56 const struct nft_ct
*priv
= nft_expr_priv(expr
);
57 u32
*dest
= ®s
->data
[priv
->dreg
];
58 enum ip_conntrack_info ctinfo
;
59 const struct nf_conn
*ct
;
60 const struct nf_conn_help
*help
;
61 const struct nf_conntrack_tuple
*tuple
;
62 const struct nf_conntrack_helper
*helper
;
65 ct
= nf_ct_get(pkt
->skb
, &ctinfo
);
70 state
= NF_CT_STATE_INVALID_BIT
;
71 else if (nf_ct_is_untracked(ct
))
72 state
= NF_CT_STATE_UNTRACKED_BIT
;
74 state
= NF_CT_STATE_BIT(ctinfo
);
85 case NFT_CT_DIRECTION
:
86 nft_reg_store8(dest
, CTINFO2DIR(ctinfo
));
91 #ifdef CONFIG_NF_CONNTRACK_MARK
96 #ifdef CONFIG_NF_CONNTRACK_SECMARK
101 case NFT_CT_EXPIRATION
:
102 *dest
= jiffies_to_msecs(nf_ct_expires(ct
));
105 if (ct
->master
== NULL
)
107 help
= nfct_help(ct
->master
);
110 helper
= rcu_dereference(help
->helper
);
113 strncpy((char *)dest
, helper
->name
, NF_CT_HELPER_NAME_LEN
);
115 #ifdef CONFIG_NF_CONNTRACK_LABELS
116 case NFT_CT_LABELS
: {
117 struct nf_conn_labels
*labels
= nf_ct_labels_find(ct
);
120 memcpy(dest
, labels
->bits
, NF_CT_LABELS_MAX_SIZE
);
122 memset(dest
, 0, NF_CT_LABELS_MAX_SIZE
);
126 case NFT_CT_BYTES
: /* fallthrough */
128 const struct nf_conn_acct
*acct
= nf_conn_acct_find(ct
);
132 count
= nft_ct_get_eval_counter(acct
->counter
,
133 priv
->key
, priv
->dir
);
134 memcpy(dest
, &count
, sizeof(count
));
137 case NFT_CT_AVGPKT
: {
138 const struct nf_conn_acct
*acct
= nf_conn_acct_find(ct
);
139 u64 avgcnt
= 0, bcnt
= 0, pcnt
= 0;
142 pcnt
= nft_ct_get_eval_counter(acct
->counter
,
143 NFT_CT_PKTS
, priv
->dir
);
144 bcnt
= nft_ct_get_eval_counter(acct
->counter
,
145 NFT_CT_BYTES
, priv
->dir
);
147 avgcnt
= div64_u64(bcnt
, pcnt
);
150 memcpy(dest
, &avgcnt
, sizeof(avgcnt
));
153 case NFT_CT_L3PROTOCOL
:
154 nft_reg_store8(dest
, nf_ct_l3num(ct
));
156 case NFT_CT_PROTOCOL
:
157 nft_reg_store8(dest
, nf_ct_protonum(ct
));
159 #ifdef CONFIG_NF_CONNTRACK_ZONES
161 const struct nf_conntrack_zone
*zone
= nf_ct_zone(ct
);
164 if (priv
->dir
< IP_CT_DIR_MAX
)
165 zoneid
= nf_ct_zone_id(zone
, priv
->dir
);
169 nft_reg_store16(dest
, zoneid
);
177 tuple
= &ct
->tuplehash
[priv
->dir
].tuple
;
180 memcpy(dest
, tuple
->src
.u3
.all
,
181 nf_ct_l3num(ct
) == NFPROTO_IPV4
? 4 : 16);
184 memcpy(dest
, tuple
->dst
.u3
.all
,
185 nf_ct_l3num(ct
) == NFPROTO_IPV4
? 4 : 16);
187 case NFT_CT_PROTO_SRC
:
188 nft_reg_store16(dest
, (__force u16
)tuple
->src
.u
.all
);
190 case NFT_CT_PROTO_DST
:
191 nft_reg_store16(dest
, (__force u16
)tuple
->dst
.u
.all
);
198 regs
->verdict
.code
= NFT_BREAK
;
201 #ifdef CONFIG_NF_CONNTRACK_ZONES
202 static void nft_ct_set_zone_eval(const struct nft_expr
*expr
,
203 struct nft_regs
*regs
,
204 const struct nft_pktinfo
*pkt
)
206 struct nf_conntrack_zone zone
= { .dir
= NF_CT_DEFAULT_ZONE_DIR
};
207 const struct nft_ct
*priv
= nft_expr_priv(expr
);
208 struct sk_buff
*skb
= pkt
->skb
;
209 enum ip_conntrack_info ctinfo
;
210 u16 value
= nft_reg_load16(®s
->data
[priv
->sreg
]);
213 ct
= nf_ct_get(skb
, &ctinfo
);
214 if (ct
) /* already tracked */
220 case IP_CT_DIR_ORIGINAL
:
221 zone
.dir
= NF_CT_ZONE_DIR_ORIG
;
223 case IP_CT_DIR_REPLY
:
224 zone
.dir
= NF_CT_ZONE_DIR_REPL
;
230 ct
= this_cpu_read(nft_ct_pcpu_template
);
232 if (likely(atomic_read(&ct
->ct_general
.use
) == 1)) {
233 nf_ct_zone_add(ct
, &zone
);
235 /* previous skb got queued to userspace */
236 ct
= nf_ct_tmpl_alloc(nft_net(pkt
), &zone
, GFP_ATOMIC
);
238 regs
->verdict
.code
= NF_DROP
;
243 atomic_inc(&ct
->ct_general
.use
);
244 nf_ct_set(skb
, ct
, IP_CT_NEW
);
248 static void nft_ct_set_eval(const struct nft_expr
*expr
,
249 struct nft_regs
*regs
,
250 const struct nft_pktinfo
*pkt
)
252 const struct nft_ct
*priv
= nft_expr_priv(expr
);
253 struct sk_buff
*skb
= pkt
->skb
;
254 #ifdef CONFIG_NF_CONNTRACK_MARK
255 u32 value
= regs
->data
[priv
->sreg
];
257 enum ip_conntrack_info ctinfo
;
260 ct
= nf_ct_get(skb
, &ctinfo
);
265 #ifdef CONFIG_NF_CONNTRACK_MARK
267 if (ct
->mark
!= value
) {
269 nf_conntrack_event_cache(IPCT_MARK
, ct
);
273 #ifdef CONFIG_NF_CONNTRACK_LABELS
275 nf_connlabels_replace(ct
,
276 ®s
->data
[priv
->sreg
],
277 ®s
->data
[priv
->sreg
],
278 NF_CT_LABELS_MAX_SIZE
/ sizeof(u32
));
286 static const struct nla_policy nft_ct_policy
[NFTA_CT_MAX
+ 1] = {
287 [NFTA_CT_DREG
] = { .type
= NLA_U32
},
288 [NFTA_CT_KEY
] = { .type
= NLA_U32
},
289 [NFTA_CT_DIRECTION
] = { .type
= NLA_U8
},
290 [NFTA_CT_SREG
] = { .type
= NLA_U32
},
293 static int nft_ct_netns_get(struct net
*net
, uint8_t family
)
297 if (family
== NFPROTO_INET
) {
298 err
= nf_ct_netns_get(net
, NFPROTO_IPV4
);
301 err
= nf_ct_netns_get(net
, NFPROTO_IPV6
);
305 err
= nf_ct_netns_get(net
, family
);
312 nf_ct_netns_put(net
, NFPROTO_IPV4
);
317 static void nft_ct_netns_put(struct net
*net
, uint8_t family
)
319 if (family
== NFPROTO_INET
) {
320 nf_ct_netns_put(net
, NFPROTO_IPV4
);
321 nf_ct_netns_put(net
, NFPROTO_IPV6
);
323 nf_ct_netns_put(net
, family
);
326 #ifdef CONFIG_NF_CONNTRACK_ZONES
327 static void nft_ct_tmpl_put_pcpu(void)
332 for_each_possible_cpu(cpu
) {
333 ct
= per_cpu(nft_ct_pcpu_template
, cpu
);
337 per_cpu(nft_ct_pcpu_template
, cpu
) = NULL
;
341 static bool nft_ct_tmpl_alloc_pcpu(void)
343 struct nf_conntrack_zone zone
= { .id
= 0 };
347 if (nft_ct_pcpu_template_refcnt
)
350 for_each_possible_cpu(cpu
) {
351 tmp
= nf_ct_tmpl_alloc(&init_net
, &zone
, GFP_KERNEL
);
353 nft_ct_tmpl_put_pcpu();
357 atomic_set(&tmp
->ct_general
.use
, 1);
358 per_cpu(nft_ct_pcpu_template
, cpu
) = tmp
;
365 static int nft_ct_get_init(const struct nft_ctx
*ctx
,
366 const struct nft_expr
*expr
,
367 const struct nlattr
* const tb
[])
369 struct nft_ct
*priv
= nft_expr_priv(expr
);
373 priv
->key
= ntohl(nla_get_be32(tb
[NFTA_CT_KEY
]));
374 priv
->dir
= IP_CT_DIR_MAX
;
376 case NFT_CT_DIRECTION
:
377 if (tb
[NFTA_CT_DIRECTION
] != NULL
)
383 #ifdef CONFIG_NF_CONNTRACK_MARK
386 #ifdef CONFIG_NF_CONNTRACK_SECMARK
389 case NFT_CT_EXPIRATION
:
390 if (tb
[NFTA_CT_DIRECTION
] != NULL
)
394 #ifdef CONFIG_NF_CONNTRACK_LABELS
396 if (tb
[NFTA_CT_DIRECTION
] != NULL
)
398 len
= NF_CT_LABELS_MAX_SIZE
;
402 if (tb
[NFTA_CT_DIRECTION
] != NULL
)
404 len
= NF_CT_HELPER_NAME_LEN
;
407 case NFT_CT_L3PROTOCOL
:
408 case NFT_CT_PROTOCOL
:
409 /* For compatibility, do not report error if NFTA_CT_DIRECTION
410 * attribute is specified.
416 if (tb
[NFTA_CT_DIRECTION
] == NULL
)
419 switch (ctx
->afi
->family
) {
421 len
= FIELD_SIZEOF(struct nf_conntrack_tuple
,
426 len
= FIELD_SIZEOF(struct nf_conntrack_tuple
,
430 return -EAFNOSUPPORT
;
433 case NFT_CT_PROTO_SRC
:
434 case NFT_CT_PROTO_DST
:
435 if (tb
[NFTA_CT_DIRECTION
] == NULL
)
437 len
= FIELD_SIZEOF(struct nf_conntrack_tuple
, src
.u
.all
);
444 #ifdef CONFIG_NF_CONNTRACK_ZONES
453 if (tb
[NFTA_CT_DIRECTION
] != NULL
) {
454 priv
->dir
= nla_get_u8(tb
[NFTA_CT_DIRECTION
]);
456 case IP_CT_DIR_ORIGINAL
:
457 case IP_CT_DIR_REPLY
:
464 priv
->dreg
= nft_parse_register(tb
[NFTA_CT_DREG
]);
465 err
= nft_validate_register_store(ctx
, priv
->dreg
, NULL
,
466 NFT_DATA_VALUE
, len
);
470 err
= nft_ct_netns_get(ctx
->net
, ctx
->afi
->family
);
474 if (priv
->key
== NFT_CT_BYTES
||
475 priv
->key
== NFT_CT_PKTS
||
476 priv
->key
== NFT_CT_AVGPKT
)
477 nf_ct_set_acct(ctx
->net
, true);
482 static void __nft_ct_set_destroy(const struct nft_ctx
*ctx
, struct nft_ct
*priv
)
485 #ifdef CONFIG_NF_CONNTRACK_LABELS
487 nf_connlabels_put(ctx
->net
);
490 #ifdef CONFIG_NF_CONNTRACK_ZONES
492 if (--nft_ct_pcpu_template_refcnt
== 0)
493 nft_ct_tmpl_put_pcpu();
500 static int nft_ct_set_init(const struct nft_ctx
*ctx
,
501 const struct nft_expr
*expr
,
502 const struct nlattr
* const tb
[])
504 struct nft_ct
*priv
= nft_expr_priv(expr
);
508 priv
->dir
= IP_CT_DIR_MAX
;
509 priv
->key
= ntohl(nla_get_be32(tb
[NFTA_CT_KEY
]));
511 #ifdef CONFIG_NF_CONNTRACK_MARK
513 if (tb
[NFTA_CT_DIRECTION
])
515 len
= FIELD_SIZEOF(struct nf_conn
, mark
);
518 #ifdef CONFIG_NF_CONNTRACK_LABELS
520 if (tb
[NFTA_CT_DIRECTION
])
522 len
= NF_CT_LABELS_MAX_SIZE
;
523 err
= nf_connlabels_get(ctx
->net
, (len
* BITS_PER_BYTE
) - 1);
528 #ifdef CONFIG_NF_CONNTRACK_ZONES
530 if (!nft_ct_tmpl_alloc_pcpu())
532 nft_ct_pcpu_template_refcnt
++;
540 if (tb
[NFTA_CT_DIRECTION
]) {
541 priv
->dir
= nla_get_u8(tb
[NFTA_CT_DIRECTION
]);
543 case IP_CT_DIR_ORIGINAL
:
544 case IP_CT_DIR_REPLY
:
552 priv
->sreg
= nft_parse_register(tb
[NFTA_CT_SREG
]);
553 err
= nft_validate_register_load(priv
->sreg
, len
);
557 err
= nft_ct_netns_get(ctx
->net
, ctx
->afi
->family
);
564 __nft_ct_set_destroy(ctx
, priv
);
568 static void nft_ct_get_destroy(const struct nft_ctx
*ctx
,
569 const struct nft_expr
*expr
)
571 nf_ct_netns_put(ctx
->net
, ctx
->afi
->family
);
574 static void nft_ct_set_destroy(const struct nft_ctx
*ctx
,
575 const struct nft_expr
*expr
)
577 struct nft_ct
*priv
= nft_expr_priv(expr
);
579 __nft_ct_set_destroy(ctx
, priv
);
580 nft_ct_netns_put(ctx
->net
, ctx
->afi
->family
);
583 static int nft_ct_get_dump(struct sk_buff
*skb
, const struct nft_expr
*expr
)
585 const struct nft_ct
*priv
= nft_expr_priv(expr
);
587 if (nft_dump_register(skb
, NFTA_CT_DREG
, priv
->dreg
))
588 goto nla_put_failure
;
589 if (nla_put_be32(skb
, NFTA_CT_KEY
, htonl(priv
->key
)))
590 goto nla_put_failure
;
595 case NFT_CT_PROTO_SRC
:
596 case NFT_CT_PROTO_DST
:
597 if (nla_put_u8(skb
, NFTA_CT_DIRECTION
, priv
->dir
))
598 goto nla_put_failure
;
604 if (priv
->dir
< IP_CT_DIR_MAX
&&
605 nla_put_u8(skb
, NFTA_CT_DIRECTION
, priv
->dir
))
606 goto nla_put_failure
;
618 static int nft_ct_set_dump(struct sk_buff
*skb
, const struct nft_expr
*expr
)
620 const struct nft_ct
*priv
= nft_expr_priv(expr
);
622 if (nft_dump_register(skb
, NFTA_CT_SREG
, priv
->sreg
))
623 goto nla_put_failure
;
624 if (nla_put_be32(skb
, NFTA_CT_KEY
, htonl(priv
->key
)))
625 goto nla_put_failure
;
629 if (priv
->dir
< IP_CT_DIR_MAX
&&
630 nla_put_u8(skb
, NFTA_CT_DIRECTION
, priv
->dir
))
631 goto nla_put_failure
;
643 static struct nft_expr_type nft_ct_type
;
644 static const struct nft_expr_ops nft_ct_get_ops
= {
645 .type
= &nft_ct_type
,
646 .size
= NFT_EXPR_SIZE(sizeof(struct nft_ct
)),
647 .eval
= nft_ct_get_eval
,
648 .init
= nft_ct_get_init
,
649 .destroy
= nft_ct_get_destroy
,
650 .dump
= nft_ct_get_dump
,
653 static const struct nft_expr_ops nft_ct_set_ops
= {
654 .type
= &nft_ct_type
,
655 .size
= NFT_EXPR_SIZE(sizeof(struct nft_ct
)),
656 .eval
= nft_ct_set_eval
,
657 .init
= nft_ct_set_init
,
658 .destroy
= nft_ct_set_destroy
,
659 .dump
= nft_ct_set_dump
,
662 #ifdef CONFIG_NF_CONNTRACK_ZONES
663 static const struct nft_expr_ops nft_ct_set_zone_ops
= {
664 .type
= &nft_ct_type
,
665 .size
= NFT_EXPR_SIZE(sizeof(struct nft_ct
)),
666 .eval
= nft_ct_set_zone_eval
,
667 .init
= nft_ct_set_init
,
668 .destroy
= nft_ct_set_destroy
,
669 .dump
= nft_ct_set_dump
,
673 static const struct nft_expr_ops
*
674 nft_ct_select_ops(const struct nft_ctx
*ctx
,
675 const struct nlattr
* const tb
[])
677 if (tb
[NFTA_CT_KEY
] == NULL
)
678 return ERR_PTR(-EINVAL
);
680 if (tb
[NFTA_CT_DREG
] && tb
[NFTA_CT_SREG
])
681 return ERR_PTR(-EINVAL
);
683 if (tb
[NFTA_CT_DREG
])
684 return &nft_ct_get_ops
;
686 if (tb
[NFTA_CT_SREG
]) {
687 #ifdef CONFIG_NF_CONNTRACK_ZONES
688 if (nla_get_be32(tb
[NFTA_CT_KEY
]) == htonl(NFT_CT_ZONE
))
689 return &nft_ct_set_zone_ops
;
691 return &nft_ct_set_ops
;
694 return ERR_PTR(-EINVAL
);
697 static struct nft_expr_type nft_ct_type __read_mostly
= {
699 .select_ops
= &nft_ct_select_ops
,
700 .policy
= nft_ct_policy
,
701 .maxattr
= NFTA_CT_MAX
,
702 .owner
= THIS_MODULE
,
705 static void nft_notrack_eval(const struct nft_expr
*expr
,
706 struct nft_regs
*regs
,
707 const struct nft_pktinfo
*pkt
)
709 struct sk_buff
*skb
= pkt
->skb
;
710 enum ip_conntrack_info ctinfo
;
713 ct
= nf_ct_get(pkt
->skb
, &ctinfo
);
714 /* Previously seen (loopback or untracked)? Ignore. */
718 ct
= nf_ct_untracked_get();
719 atomic_inc(&ct
->ct_general
.use
);
720 nf_ct_set(skb
, ct
, IP_CT_NEW
);
723 static struct nft_expr_type nft_notrack_type
;
724 static const struct nft_expr_ops nft_notrack_ops
= {
725 .type
= &nft_notrack_type
,
726 .size
= NFT_EXPR_SIZE(0),
727 .eval
= nft_notrack_eval
,
730 static struct nft_expr_type nft_notrack_type __read_mostly
= {
732 .ops
= &nft_notrack_ops
,
733 .owner
= THIS_MODULE
,
736 static int __init
nft_ct_module_init(void)
740 BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE
> NFT_REG_SIZE
);
742 err
= nft_register_expr(&nft_ct_type
);
746 err
= nft_register_expr(&nft_notrack_type
);
752 nft_unregister_expr(&nft_ct_type
);
756 static void __exit
nft_ct_module_exit(void)
758 nft_unregister_expr(&nft_notrack_type
);
759 nft_unregister_expr(&nft_ct_type
);
762 module_init(nft_ct_module_init
);
763 module_exit(nft_ct_module_exit
);
765 MODULE_LICENSE("GPL");
766 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
767 MODULE_ALIAS_NFT_EXPR("ct");
768 MODULE_ALIAS_NFT_EXPR("notrack");