]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/commitdiff
netfilter: add nftables
authorPatrick McHardy <kaber@trash.net>
Mon, 14 Oct 2013 09:00:02 +0000 (11:00 +0200)
committerPablo Neira Ayuso <pablo@netfilter.org>
Mon, 14 Oct 2013 15:15:48 +0000 (17:15 +0200)
This patch adds nftables which is the intended successor of iptables.
This packet filtering framework reuses the existing netfilter hooks,
the connection tracking system, the NAT subsystem, the transparent
proxying engine, the logging infrastructure and the userspace packet
queueing facilities.

In a nutshell, nftables provides a pseudo-state machine with 4 general
purpose registers of 128 bits and 1 specific purpose register to store
verdicts. This pseudo-machine comes with an extensible instruction set,
a.k.a. "expressions" in the nftables jargon. The expressions included
in this patch provide the basic functionality, they are:

* bitwise: to perform bitwise operations.
* byteorder: to change from host/network endianess.
* cmp: to compare data with the content of the registers.
* counter: to enable counters on rules.
* ct: to store conntrack keys into register.
* exthdr: to match IPv6 extension headers.
* immediate: to load data into registers.
* limit: to limit matching based on packet rate.
* log: to log packets.
* meta: to match metainformation that usually comes with the skbuff.
* nat: to perform Network Address Translation.
* payload: to fetch data from the packet payload and store it into
  registers.
* reject (IPv4 only): to explicitly close connection, eg. TCP RST.

Using this instruction-set, the userspace utility 'nft' can transform
the rules expressed in human-readable text representation (using a
new syntax, inspired by tcpdump) to nftables bytecode.

nftables also inherits the table, chain and rule objects from
iptables, but in a more configurable way, and it also includes the
original datatype-agnostic set infrastructure with mapping support.
This set infrastructure is enhanced in the follow up patch (netfilter:
nf_tables: add netlink set API).

This patch includes the following components:

* the netlink API: net/netfilter/nf_tables_api.c and
  include/uapi/netfilter/nf_tables.h
* the packet filter core: net/netfilter/nf_tables_core.c
* the expressions (described above): net/netfilter/nft_*.c
* the filter tables: arp, IPv4, IPv6 and bridge:
  net/ipv4/netfilter/nf_tables_ipv4.c
  net/ipv6/netfilter/nf_tables_ipv6.c
  net/ipv4/netfilter/nf_tables_arp.c
  net/bridge/netfilter/nf_tables_bridge.c
* the NAT table (IPv4 only):
  net/ipv4/netfilter/nf_table_nat_ipv4.c
* the route table (similar to mangle):
  net/ipv4/netfilter/nf_table_route_ipv4.c
  net/ipv6/netfilter/nf_table_route_ipv6.c
* internal definitions under:
  include/net/netfilter/nf_tables.h
  include/net/netfilter/nf_tables_core.h
* It also includes an skeleton expression:
  net/netfilter/nft_expr_template.c
  and the preliminary implementation of the meta target
  net/netfilter/nft_meta_target.c

It also includes a change in struct nf_hook_ops to add a new
pointer to store private data to the hook, that is used to store
the rule list per chain.

This patch is based on the patch from Patrick McHardy, plus merged
accumulated cleanups, fixes and small enhancements to the nftables
code that has been done since 2009, which are:

From Patrick McHardy:
* nf_tables: adjust netlink handler function signatures
* nf_tables: only retry table lookup after successful table module load
* nf_tables: fix event notification echo and avoid unnecessary messages
* nft_ct: add l3proto support
* nf_tables: pass expression context to nft_validate_data_load()
* nf_tables: remove redundant definition
* nft_ct: fix maxattr initialization
* nf_tables: fix invalid event type in nf_tables_getrule()
* nf_tables: simplify nft_data_init() usage
* nf_tables: build in more core modules
* nf_tables: fix double lookup expression unregistation
* nf_tables: move expression initialization to nf_tables_core.c
* nf_tables: build in payload module
* nf_tables: use NFPROTO constants
* nf_tables: rename pid variables to portid
* nf_tables: save 48 bits per rule
* nf_tables: introduce chain rename
* nf_tables: check for duplicate names on chain rename
* nf_tables: remove ability to specify handles for new rules
* nf_tables: return error for rule change request
* nf_tables: return error for NLM_F_REPLACE without rule handle
* nf_tables: include NLM_F_APPEND/NLM_F_REPLACE flags in rule notification
* nf_tables: fix NLM_F_MULTI usage in netlink notifications
* nf_tables: include NLM_F_APPEND in rule dumps

From Pablo Neira Ayuso:
* nf_tables: fix stack overflow in nf_tables_newrule
* nf_tables: nft_ct: fix compilation warning
* nf_tables: nft_ct: fix crash with invalid packets
* nft_log: group and qthreshold are 2^16
* nf_tables: nft_meta: fix socket uid,gid handling
* nft_counter: allow to restore counters
* nf_tables: fix module autoload
* nf_tables: allow to remove all rules placed in one chain
* nf_tables: use 64-bits rule handle instead of 16-bits
* nf_tables: fix chain after rule deletion
* nf_tables: improve deletion performance
* nf_tables: add missing code in route chain type
* nf_tables: rise maximum number of expressions from 12 to 128
* nf_tables: don't delete table if in use
* nf_tables: fix basechain release

From Tomasz Bursztyka:
* nf_tables: Add support for changing users chain's name
* nf_tables: Change chain's name to be fixed sized
* nf_tables: Add support for replacing a rule by another one
* nf_tables: Update uapi nftables netlink header documentation

From Florian Westphal:
* nft_log: group is u16, snaplen u32

From Phil Oester:
* nf_tables: operational limit match

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
39 files changed:
include/linux/netfilter.h
include/net/netfilter/nf_tables.h [new file with mode: 0644]
include/net/netfilter/nf_tables_core.h [new file with mode: 0644]
include/uapi/linux/netfilter/Kbuild
include/uapi/linux/netfilter/nf_conntrack_common.h
include/uapi/linux/netfilter/nf_tables.h [new file with mode: 0644]
include/uapi/linux/netfilter/nfnetlink.h
net/bridge/netfilter/Kconfig
net/bridge/netfilter/Makefile
net/bridge/netfilter/nf_tables_bridge.c [new file with mode: 0644]
net/ipv4/netfilter/Kconfig
net/ipv4/netfilter/Makefile
net/ipv4/netfilter/nf_table_nat_ipv4.c [new file with mode: 0644]
net/ipv4/netfilter/nf_table_route_ipv4.c [new file with mode: 0644]
net/ipv4/netfilter/nf_tables_ipv4.c [new file with mode: 0644]
net/ipv4/netfilter/nft_reject_ipv4.c [new file with mode: 0644]
net/ipv6/netfilter/Kconfig
net/ipv6/netfilter/Makefile
net/ipv6/netfilter/nf_table_route_ipv6.c [new file with mode: 0644]
net/ipv6/netfilter/nf_tables_ipv6.c [new file with mode: 0644]
net/netfilter/Kconfig
net/netfilter/Makefile
net/netfilter/nf_tables_api.c [new file with mode: 0644]
net/netfilter/nf_tables_core.c [new file with mode: 0644]
net/netfilter/nft_bitwise.c [new file with mode: 0644]
net/netfilter/nft_byteorder.c [new file with mode: 0644]
net/netfilter/nft_cmp.c [new file with mode: 0644]
net/netfilter/nft_counter.c [new file with mode: 0644]
net/netfilter/nft_ct.c [new file with mode: 0644]
net/netfilter/nft_expr_template.c [new file with mode: 0644]
net/netfilter/nft_exthdr.c [new file with mode: 0644]
net/netfilter/nft_hash.c [new file with mode: 0644]
net/netfilter/nft_immediate.c [new file with mode: 0644]
net/netfilter/nft_limit.c [new file with mode: 0644]
net/netfilter/nft_log.c [new file with mode: 0644]
net/netfilter/nft_meta.c [new file with mode: 0644]
net/netfilter/nft_meta_target.c [new file with mode: 0644]
net/netfilter/nft_payload.c [new file with mode: 0644]
net/netfilter/nft_set.c [new file with mode: 0644]

index fef7e67f71016b67967d052cc42e6ed5b129f733..2077489f98873bcbe4ca083cd1f0eb1b99eeab5e 100644 (file)
@@ -53,12 +53,13 @@ struct nf_hook_ops {
        struct list_head list;
 
        /* User fills in from here down. */
-       nf_hookfn *hook;
-       struct module *owner;
-       u_int8_t pf;
-       unsigned int hooknum;
+       nf_hookfn       *hook;
+       struct module   *owner;
+       void            *priv;
+       u_int8_t        pf;
+       unsigned int    hooknum;
        /* Hooks are ordered in ascending priority. */
-       int priority;
+       int             priority;
 };
 
 struct nf_sockopt_ops {
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
new file mode 100644 (file)
index 0000000..d26dfa3
--- /dev/null
@@ -0,0 +1,301 @@
+#ifndef _NET_NF_TABLES_H
+#define _NET_NF_TABLES_H
+
+#include <linux/list.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netlink.h>
+
+struct nft_pktinfo {
+       struct sk_buff                  *skb;
+       const struct net_device         *in;
+       const struct net_device         *out;
+       u8                              hooknum;
+       u8                              nhoff;
+       u8                              thoff;
+};
+
+struct nft_data {
+       union {
+               u32                             data[4];
+               struct {
+                       u32                     verdict;
+                       struct nft_chain        *chain;
+               };
+       };
+} __attribute__((aligned(__alignof__(u64))));
+
+static inline int nft_data_cmp(const struct nft_data *d1,
+                              const struct nft_data *d2,
+                              unsigned int len)
+{
+       return memcmp(d1->data, d2->data, len);
+}
+
+static inline void nft_data_copy(struct nft_data *dst,
+                                const struct nft_data *src)
+{
+       BUILD_BUG_ON(__alignof__(*dst) != __alignof__(u64));
+       *(u64 *)&dst->data[0] = *(u64 *)&src->data[0];
+       *(u64 *)&dst->data[2] = *(u64 *)&src->data[2];
+}
+
+static inline void nft_data_debug(const struct nft_data *data)
+{
+       pr_debug("data[0]=%x data[1]=%x data[2]=%x data[3]=%x\n",
+                data->data[0], data->data[1],
+                data->data[2], data->data[3]);
+}
+
+/**
+ *     struct nft_ctx - nf_tables rule context
+ *
+ *     @afi: address family info
+ *     @table: the table the chain is contained in
+ *     @chain: the chain the rule is contained in
+ */
+struct nft_ctx {
+       const struct nft_af_info        *afi;
+       const struct nft_table          *table;
+       const struct nft_chain          *chain;
+};
+
+enum nft_data_types {
+       NFT_DATA_VALUE,
+       NFT_DATA_VERDICT,
+};
+
+struct nft_data_desc {
+       enum nft_data_types             type;
+       unsigned int                    len;
+};
+
+extern int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
+                        struct nft_data_desc *desc, const struct nlattr *nla);
+extern void nft_data_uninit(const struct nft_data *data,
+                           enum nft_data_types type);
+extern int nft_data_dump(struct sk_buff *skb, int attr,
+                        const struct nft_data *data,
+                        enum nft_data_types type, unsigned int len);
+
+static inline enum nft_data_types nft_dreg_to_type(enum nft_registers reg)
+{
+       return reg == NFT_REG_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE;
+}
+
+extern int nft_validate_input_register(enum nft_registers reg);
+extern int nft_validate_output_register(enum nft_registers reg);
+extern int nft_validate_data_load(const struct nft_ctx *ctx,
+                                 enum nft_registers reg,
+                                 const struct nft_data *data,
+                                 enum nft_data_types type);
+
+/**
+ *     struct nft_expr_ops - nf_tables expression operations
+ *
+ *     @eval: Expression evaluation function
+ *     @init: initialization function
+ *     @destroy: destruction function
+ *     @dump: function to dump parameters
+ *     @list: used internally
+ *     @name: Identifier
+ *     @owner: module reference
+ *     @policy: netlink attribute policy
+ *     @maxattr: highest netlink attribute number
+ *     @size: full expression size, including private data size
+ */
+struct nft_expr;
+struct nft_expr_ops {
+       void                            (*eval)(const struct nft_expr *expr,
+                                               struct nft_data data[NFT_REG_MAX + 1],
+                                               const struct nft_pktinfo *pkt);
+       int                             (*init)(const struct nft_ctx *ctx,
+                                               const struct nft_expr *expr,
+                                               const struct nlattr * const tb[]);
+       void                            (*destroy)(const struct nft_expr *expr);
+       int                             (*dump)(struct sk_buff *skb,
+                                               const struct nft_expr *expr);
+
+       struct list_head                list;
+       const char                      *name;
+       struct module                   *owner;
+       const struct nla_policy         *policy;
+       unsigned int                    maxattr;
+       unsigned int                    size;
+};
+
+#define NFT_EXPR_SIZE(size)            (sizeof(struct nft_expr) + \
+                                        ALIGN(size, __alignof__(struct nft_expr)))
+
+/**
+ *     struct nft_expr - nf_tables expression
+ *
+ *     @ops: expression ops
+ *     @data: expression private data
+ */
+struct nft_expr {
+       const struct nft_expr_ops       *ops;
+       unsigned char                   data[];
+};
+
+static inline void *nft_expr_priv(const struct nft_expr *expr)
+{
+       return (void *)expr->data;
+}
+
+/**
+ *     struct nft_rule - nf_tables rule
+ *
+ *     @list: used internally
+ *     @rcu_head: used internally for rcu
+ *     @handle: rule handle
+ *     @dlen: length of expression data
+ *     @data: expression data
+ */
+struct nft_rule {
+       struct list_head                list;
+       struct rcu_head                 rcu_head;
+       u64                             handle:48,
+                                       dlen:16;
+       unsigned char                   data[]
+               __attribute__((aligned(__alignof__(struct nft_expr))));
+};
+
+static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule)
+{
+       return (struct nft_expr *)&rule->data[0];
+}
+
+static inline struct nft_expr *nft_expr_next(const struct nft_expr *expr)
+{
+       return ((void *)expr) + expr->ops->size;
+}
+
+static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule)
+{
+       return (struct nft_expr *)&rule->data[rule->dlen];
+}
+
+/*
+ * The last pointer isn't really necessary, but the compiler isn't able to
+ * determine that the result of nft_expr_last() is always the same since it
+ * can't assume that the dlen value wasn't changed within calls in the loop.
+ */
+#define nft_rule_for_each_expr(expr, last, rule) \
+       for ((expr) = nft_expr_first(rule), (last) = nft_expr_last(rule); \
+            (expr) != (last); \
+            (expr) = nft_expr_next(expr))
+
+enum nft_chain_flags {
+       NFT_BASE_CHAIN                  = 0x1,
+       NFT_CHAIN_BUILTIN               = 0x2,
+};
+
+/**
+ *     struct nft_chain - nf_tables chain
+ *
+ *     @rules: list of rules in the chain
+ *     @list: used internally
+ *     @rcu_head: used internally
+ *     @handle: chain handle
+ *     @flags: bitmask of enum nft_chain_flags
+ *     @use: number of jump references to this chain
+ *     @level: length of longest path to this chain
+ *     @name: name of the chain
+ */
+struct nft_chain {
+       struct list_head                rules;
+       struct list_head                list;
+       struct rcu_head                 rcu_head;
+       u64                             handle;
+       u8                              flags;
+       u16                             use;
+       u16                             level;
+       char                            name[NFT_CHAIN_MAXNAMELEN];
+};
+
+/**
+ *     struct nft_base_chain - nf_tables base chain
+ *
+ *     @ops: netfilter hook ops
+ *     @chain: the chain
+ */
+struct nft_base_chain {
+       struct nf_hook_ops              ops;
+       struct nft_chain                chain;
+};
+
+static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain)
+{
+       return container_of(chain, struct nft_base_chain, chain);
+}
+
+extern unsigned int nft_do_chain(const struct nf_hook_ops *ops,
+                                struct sk_buff *skb,
+                                const struct net_device *in,
+                                const struct net_device *out,
+                                int (*okfn)(struct sk_buff *));
+
+enum nft_table_flags {
+       NFT_TABLE_BUILTIN               = 0x1,
+};
+
+/**
+ *     struct nft_table - nf_tables table
+ *
+ *     @list: used internally
+ *     @chains: chains in the table
+ *     @sets: sets in the table
+ *     @hgenerator: handle generator state
+ *     @use: number of chain references to this table
+ *     @flags: table flag (see enum nft_table_flags)
+ *     @name: name of the table
+ */
+struct nft_table {
+       struct list_head                list;
+       struct list_head                chains;
+       struct list_head                sets;
+       u64                             hgenerator;
+       u32                             use;
+       u16                             flags;
+       char                            name[];
+};
+
+/**
+ *     struct nft_af_info - nf_tables address family info
+ *
+ *     @list: used internally
+ *     @family: address family
+ *     @nhooks: number of hooks in this family
+ *     @owner: module owner
+ *     @tables: used internally
+ *     @hooks: hookfn overrides for packet validation
+ */
+struct nft_af_info {
+       struct list_head                list;
+       int                             family;
+       unsigned int                    nhooks;
+       struct module                   *owner;
+       struct list_head                tables;
+       nf_hookfn                       *hooks[NF_MAX_HOOKS];
+};
+
+extern int nft_register_afinfo(struct nft_af_info *);
+extern void nft_unregister_afinfo(struct nft_af_info *);
+
+extern int nft_register_table(struct nft_table *, int family);
+extern void nft_unregister_table(struct nft_table *, int family);
+
+extern int nft_register_expr(struct nft_expr_ops *);
+extern void nft_unregister_expr(struct nft_expr_ops *);
+
+#define MODULE_ALIAS_NFT_FAMILY(family)        \
+       MODULE_ALIAS("nft-afinfo-" __stringify(family))
+
+#define MODULE_ALIAS_NFT_TABLE(family, name) \
+       MODULE_ALIAS("nft-table-" __stringify(family) "-" name)
+
+#define MODULE_ALIAS_NFT_EXPR(name) \
+       MODULE_ALIAS("nft-expr-" name)
+
+#endif /* _NET_NF_TABLES_H */
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
new file mode 100644 (file)
index 0000000..283396c
--- /dev/null
@@ -0,0 +1,25 @@
+#ifndef _NET_NF_TABLES_CORE_H
+#define _NET_NF_TABLES_CORE_H
+
+extern int nf_tables_core_module_init(void);
+extern void nf_tables_core_module_exit(void);
+
+extern int nft_immediate_module_init(void);
+extern void nft_immediate_module_exit(void);
+
+extern int nft_cmp_module_init(void);
+extern void nft_cmp_module_exit(void);
+
+extern int nft_lookup_module_init(void);
+extern void nft_lookup_module_exit(void);
+
+extern int nft_bitwise_module_init(void);
+extern void nft_bitwise_module_exit(void);
+
+extern int nft_byteorder_module_init(void);
+extern void nft_byteorder_module_exit(void);
+
+extern int nft_payload_module_init(void);
+extern void nft_payload_module_exit(void);
+
+#endif /* _NET_NF_TABLES_CORE_H */
index 174915420d3fe8231dc151519ceac551aceb148e..6ce0b7f566a7e139fabf01dd1f04f7220630ebd4 100644 (file)
@@ -5,6 +5,7 @@ header-y += nf_conntrack_ftp.h
 header-y += nf_conntrack_sctp.h
 header-y += nf_conntrack_tcp.h
 header-y += nf_conntrack_tuple_common.h
+header-y += nf_tables.h
 header-y += nf_nat.h
 header-y += nfnetlink.h
 header-y += nfnetlink_acct.h
index 8dd803818ebe34c63db7792dadf4614632bdd93e..319f47128db8c117563efa26d4cd54e1c3ac2cc2 100644 (file)
@@ -25,6 +25,10 @@ enum ip_conntrack_info {
        IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1
 };
 
+#define NF_CT_STATE_INVALID_BIT                        (1 << 0)
+#define NF_CT_STATE_BIT(ctinfo)                        (1 << ((ctinfo) % IP_CT_IS_REPLY + 1))
+#define NF_CT_STATE_UNTRACKED_BIT              (1 << (IP_CT_NUMBER + 1))
+
 /* Bitset representing status of connection. */
 enum ip_conntrack_status {
        /* It's an expected connection: bit 0 set.  This bit never changed */
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
new file mode 100644 (file)
index 0000000..ec6d84a
--- /dev/null
@@ -0,0 +1,582 @@
+#ifndef _LINUX_NF_TABLES_H
+#define _LINUX_NF_TABLES_H
+
+#define NFT_CHAIN_MAXNAMELEN 32
+
+enum nft_registers {
+       NFT_REG_VERDICT,
+       NFT_REG_1,
+       NFT_REG_2,
+       NFT_REG_3,
+       NFT_REG_4,
+       __NFT_REG_MAX
+};
+#define NFT_REG_MAX    (__NFT_REG_MAX - 1)
+
+/**
+ * enum nft_verdicts - nf_tables internal verdicts
+ *
+ * @NFT_CONTINUE: continue evaluation of the current rule
+ * @NFT_BREAK: terminate evaluation of the current rule
+ * @NFT_JUMP: push the current chain on the jump stack and jump to a chain
+ * @NFT_GOTO: jump to a chain without pushing the current chain on the jump stack
+ * @NFT_RETURN: return to the topmost chain on the jump stack
+ *
+ * The nf_tables verdicts share their numeric space with the netfilter verdicts.
+ */
+enum nft_verdicts {
+       NFT_CONTINUE    = -1,
+       NFT_BREAK       = -2,
+       NFT_JUMP        = -3,
+       NFT_GOTO        = -4,
+       NFT_RETURN      = -5,
+};
+
+/**
+ * enum nf_tables_msg_types - nf_tables netlink message types
+ *
+ * @NFT_MSG_NEWTABLE: create a new table (enum nft_table_attributes)
+ * @NFT_MSG_GETTABLE: get a table (enum nft_table_attributes)
+ * @NFT_MSG_DELTABLE: delete a table (enum nft_table_attributes)
+ * @NFT_MSG_NEWCHAIN: create a new chain (enum nft_chain_attributes)
+ * @NFT_MSG_GETCHAIN: get a chain (enum nft_chain_attributes)
+ * @NFT_MSG_DELCHAIN: delete a chain (enum nft_chain_attributes)
+ * @NFT_MSG_NEWRULE: create a new rule (enum nft_rule_attributes)
+ * @NFT_MSG_GETRULE: get a rule (enum nft_rule_attributes)
+ * @NFT_MSG_DELRULE: delete a rule (enum nft_rule_attributes)
+ */
+enum nf_tables_msg_types {
+       NFT_MSG_NEWTABLE,
+       NFT_MSG_GETTABLE,
+       NFT_MSG_DELTABLE,
+       NFT_MSG_NEWCHAIN,
+       NFT_MSG_GETCHAIN,
+       NFT_MSG_DELCHAIN,
+       NFT_MSG_NEWRULE,
+       NFT_MSG_GETRULE,
+       NFT_MSG_DELRULE,
+       NFT_MSG_MAX,
+};
+
+enum nft_list_attributes {
+       NFTA_LIST_UNPEC,
+       NFTA_LIST_ELEM,
+       __NFTA_LIST_MAX
+};
+#define NFTA_LIST_MAX          (__NFTA_LIST_MAX - 1)
+
+/**
+ * enum nft_hook_attributes - nf_tables netfilter hook netlink attributes
+ *
+ * @NFTA_HOOK_HOOKNUM: netfilter hook number (NLA_U32)
+ * @NFTA_HOOK_PRIORITY: netfilter hook priority (NLA_U32)
+ */
+enum nft_hook_attributes {
+       NFTA_HOOK_UNSPEC,
+       NFTA_HOOK_HOOKNUM,
+       NFTA_HOOK_PRIORITY,
+       __NFTA_HOOK_MAX
+};
+#define NFTA_HOOK_MAX          (__NFTA_HOOK_MAX - 1)
+
+/**
+ * enum nft_table_attributes - nf_tables table netlink attributes
+ *
+ * @NFTA_TABLE_NAME: name of the table (NLA_STRING)
+ */
+enum nft_table_attributes {
+       NFTA_TABLE_UNSPEC,
+       NFTA_TABLE_NAME,
+       __NFTA_TABLE_MAX
+};
+#define NFTA_TABLE_MAX         (__NFTA_TABLE_MAX - 1)
+
+/**
+ * enum nft_chain_attributes - nf_tables chain netlink attributes
+ *
+ * @NFTA_CHAIN_TABLE: name of the table containing the chain (NLA_STRING)
+ * @NFTA_CHAIN_HANDLE: numeric handle of the chain (NLA_U64)
+ * @NFTA_CHAIN_NAME: name of the chain (NLA_STRING)
+ * @NFTA_CHAIN_HOOK: hook specification for basechains (NLA_NESTED: nft_hook_attributes)
+ */
+enum nft_chain_attributes {
+       NFTA_CHAIN_UNSPEC,
+       NFTA_CHAIN_TABLE,
+       NFTA_CHAIN_HANDLE,
+       NFTA_CHAIN_NAME,
+       NFTA_CHAIN_HOOK,
+       __NFTA_CHAIN_MAX
+};
+#define NFTA_CHAIN_MAX         (__NFTA_CHAIN_MAX - 1)
+
+/**
+ * enum nft_rule_attributes - nf_tables rule netlink attributes
+ *
+ * @NFTA_RULE_TABLE: name of the table containing the rule (NLA_STRING)
+ * @NFTA_RULE_CHAIN: name of the chain containing the rule (NLA_STRING)
+ * @NFTA_RULE_HANDLE: numeric handle of the rule (NLA_U64)
+ * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes)
+ */
+enum nft_rule_attributes {
+       NFTA_RULE_UNSPEC,
+       NFTA_RULE_TABLE,
+       NFTA_RULE_CHAIN,
+       NFTA_RULE_HANDLE,
+       NFTA_RULE_EXPRESSIONS,
+       __NFTA_RULE_MAX
+};
+#define NFTA_RULE_MAX          (__NFTA_RULE_MAX - 1)
+
+enum nft_data_attributes {
+       NFTA_DATA_UNSPEC,
+       NFTA_DATA_VALUE,
+       NFTA_DATA_VERDICT,
+       __NFTA_DATA_MAX
+};
+#define NFTA_DATA_MAX          (__NFTA_DATA_MAX - 1)
+
+/**
+ * enum nft_verdict_attributes - nf_tables verdict netlink attributes
+ *
+ * @NFTA_VERDICT_CODE: nf_tables verdict (NLA_U32: enum nft_verdicts)
+ * @NFTA_VERDICT_CHAIN: jump target chain name (NLA_STRING)
+ */
+enum nft_verdict_attributes {
+       NFTA_VERDICT_UNSPEC,
+       NFTA_VERDICT_CODE,
+       NFTA_VERDICT_CHAIN,
+       __NFTA_VERDICT_MAX
+};
+#define NFTA_VERDICT_MAX       (__NFTA_VERDICT_MAX - 1)
+
+/**
+ * enum nft_expr_attributes - nf_tables expression netlink attributes
+ *
+ * @NFTA_EXPR_NAME: name of the expression type (NLA_STRING)
+ * @NFTA_EXPR_DATA: type specific data (NLA_NESTED)
+ */
+enum nft_expr_attributes {
+       NFTA_EXPR_UNSPEC,
+       NFTA_EXPR_NAME,
+       NFTA_EXPR_DATA,
+       __NFTA_EXPR_MAX
+};
+#define NFTA_EXPR_MAX          (__NFTA_EXPR_MAX - 1)
+
+/**
+ * enum nft_immediate_attributes - nf_tables immediate expression netlink attributes
+ *
+ * @NFTA_IMMEDIATE_DREG: destination register to load data into (NLA_U32)
+ * @NFTA_IMMEDIATE_DATA: data to load (NLA_NESTED: nft_data_attributes)
+ */
+enum nft_immediate_attributes {
+       NFTA_IMMEDIATE_UNSPEC,
+       NFTA_IMMEDIATE_DREG,
+       NFTA_IMMEDIATE_DATA,
+       __NFTA_IMMEDIATE_MAX
+};
+#define NFTA_IMMEDIATE_MAX     (__NFTA_IMMEDIATE_MAX - 1)
+
+/**
+ * enum nft_bitwise_attributes - nf_tables bitwise expression netlink attributes
+ *
+ * @NFTA_BITWISE_SREG: source register (NLA_U32: nft_registers)
+ * @NFTA_BITWISE_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_BITWISE_LEN: length of operands (NLA_U32)
+ * @NFTA_BITWISE_MASK: mask value (NLA_NESTED: nft_data_attributes)
+ * @NFTA_BITWISE_XOR: xor value (NLA_NESTED: nft_data_attributes)
+ *
+ * The bitwise expression performs the following operation:
+ *
+ * dreg = (sreg & mask) ^ xor
+ *
+ * which allow to express all bitwise operations:
+ *
+ *             mask    xor
+ * NOT:                1       1
+ * OR:         0       x
+ * XOR:                1       x
+ * AND:                x       0
+ */
+enum nft_bitwise_attributes {
+       NFTA_BITWISE_UNSPEC,
+       NFTA_BITWISE_SREG,
+       NFTA_BITWISE_DREG,
+       NFTA_BITWISE_LEN,
+       NFTA_BITWISE_MASK,
+       NFTA_BITWISE_XOR,
+       __NFTA_BITWISE_MAX
+};
+#define NFTA_BITWISE_MAX       (__NFTA_BITWISE_MAX - 1)
+
+/**
+ * enum nft_byteorder_ops - nf_tables byteorder operators
+ *
+ * @NFT_BYTEORDER_NTOH: network to host operator
+ * @NFT_BYTEORDER_HTON: host to network opertaor
+ */
+enum nft_byteorder_ops {
+       NFT_BYTEORDER_NTOH,
+       NFT_BYTEORDER_HTON,
+};
+
+/**
+ * enum nft_byteorder_attributes - nf_tables byteorder expression netlink attributes
+ *
+ * @NFTA_BYTEORDER_SREG: source register (NLA_U32: nft_registers)
+ * @NFTA_BYTEORDER_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_BYTEORDER_OP: operator (NLA_U32: enum nft_byteorder_ops)
+ * @NFTA_BYTEORDER_LEN: length of the data (NLA_U32)
+ * @NFTA_BYTEORDER_SIZE: data size in bytes (NLA_U32: 2 or 4)
+ */
+enum nft_byteorder_attributes {
+       NFTA_BYTEORDER_UNSPEC,
+       NFTA_BYTEORDER_SREG,
+       NFTA_BYTEORDER_DREG,
+       NFTA_BYTEORDER_OP,
+       NFTA_BYTEORDER_LEN,
+       NFTA_BYTEORDER_SIZE,
+       __NFTA_BYTEORDER_MAX
+};
+#define NFTA_BYTEORDER_MAX     (__NFTA_BYTEORDER_MAX - 1)
+
+/**
+ * enum nft_cmp_ops - nf_tables relational operator
+ *
+ * @NFT_CMP_EQ: equal
+ * @NFT_CMP_NEQ: not equal
+ * @NFT_CMP_LT: less than
+ * @NFT_CMP_LTE: less than or equal to
+ * @NFT_CMP_GT: greater than
+ * @NFT_CMP_GTE: greater than or equal to
+ */
+enum nft_cmp_ops {
+       NFT_CMP_EQ,
+       NFT_CMP_NEQ,
+       NFT_CMP_LT,
+       NFT_CMP_LTE,
+       NFT_CMP_GT,
+       NFT_CMP_GTE,
+};
+
+/**
+ * enum nft_cmp_attributes - nf_tables cmp expression netlink attributes
+ *
+ * @NFTA_CMP_SREG: source register of data to compare (NLA_U32: nft_registers)
+ * @NFTA_CMP_OP: cmp operation (NLA_U32: nft_cmp_ops)
+ * @NFTA_CMP_DATA: data to compare against (NLA_NESTED: nft_data_attributes)
+ */
+enum nft_cmp_attributes {
+       NFTA_CMP_UNSPEC,
+       NFTA_CMP_SREG,
+       NFTA_CMP_OP,
+       NFTA_CMP_DATA,
+       __NFTA_CMP_MAX
+};
+#define NFTA_CMP_MAX           (__NFTA_CMP_MAX - 1)
+
+enum nft_set_elem_flags {
+       NFT_SE_INTERVAL_END     = 0x1,
+};
+
+enum nft_set_elem_attributes {
+       NFTA_SE_UNSPEC,
+       NFTA_SE_KEY,
+       NFTA_SE_DATA,
+       NFTA_SE_FLAGS,
+       __NFTA_SE_MAX
+};
+#define NFTA_SE_MAX            (__NFTA_SE_MAX - 1)
+
+enum nft_set_flags {
+       NFT_SET_INTERVAL        = 0x1,
+       NFT_SET_MAP             = 0x2,
+};
+
+enum nft_set_attributes {
+       NFTA_SET_UNSPEC,
+       NFTA_SET_FLAGS,
+       NFTA_SET_SREG,
+       NFTA_SET_DREG,
+       NFTA_SET_KLEN,
+       NFTA_SET_DLEN,
+       NFTA_SET_ELEMENTS,
+       __NFTA_SET_MAX
+};
+#define NFTA_SET_MAX           (__NFTA_SET_MAX - 1)
+
+enum nft_hash_flags {
+       NFT_HASH_MAP            = 0x1,
+};
+
+enum nft_hash_elem_attributes {
+       NFTA_HE_UNSPEC,
+       NFTA_HE_KEY,
+       NFTA_HE_DATA,
+       __NFTA_HE_MAX
+};
+#define NFTA_HE_MAX            (__NFTA_HE_MAX - 1)
+
+enum nft_hash_attributes {
+       NFTA_HASH_UNSPEC,
+       NFTA_HASH_FLAGS,
+       NFTA_HASH_SREG,
+       NFTA_HASH_DREG,
+       NFTA_HASH_KLEN,
+       NFTA_HASH_ELEMENTS,
+       __NFTA_HASH_MAX
+};
+#define NFTA_HASH_MAX          (__NFTA_HASH_MAX - 1)
+
+/**
+ * enum nft_payload_bases - nf_tables payload expression offset bases
+ *
+ * @NFT_PAYLOAD_LL_HEADER: link layer header
+ * @NFT_PAYLOAD_NETWORK_HEADER: network header
+ * @NFT_PAYLOAD_TRANSPORT_HEADER: transport header
+ */
+enum nft_payload_bases {
+       NFT_PAYLOAD_LL_HEADER,
+       NFT_PAYLOAD_NETWORK_HEADER,
+       NFT_PAYLOAD_TRANSPORT_HEADER,
+};
+
+/**
+ * enum nft_payload_attributes - nf_tables payload expression netlink attributes
+ *
+ * @NFTA_PAYLOAD_DREG: destination register to load data into (NLA_U32: nft_registers)
+ * @NFTA_PAYLOAD_BASE: payload base (NLA_U32: nft_payload_bases)
+ * @NFTA_PAYLOAD_OFFSET: payload offset relative to base (NLA_U32)
+ * @NFTA_PAYLOAD_LEN: payload length (NLA_U32)
+ */
+enum nft_payload_attributes {
+       NFTA_PAYLOAD_UNSPEC,
+       NFTA_PAYLOAD_DREG,
+       NFTA_PAYLOAD_BASE,
+       NFTA_PAYLOAD_OFFSET,
+       NFTA_PAYLOAD_LEN,
+       __NFTA_PAYLOAD_MAX
+};
+#define NFTA_PAYLOAD_MAX       (__NFTA_PAYLOAD_MAX - 1)
+
+/**
+ * enum nft_exthdr_attributes - nf_tables IPv6 extension header expression netlink attributes
+ *
+ * @NFTA_EXTHDR_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_EXTHDR_TYPE: extension header type (NLA_U8)
+ * @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32)
+ * @NFTA_EXTHDR_LEN: extension header length (NLA_U32)
+ */
+enum nft_exthdr_attributes {
+       NFTA_EXTHDR_UNSPEC,
+       NFTA_EXTHDR_DREG,
+       NFTA_EXTHDR_TYPE,
+       NFTA_EXTHDR_OFFSET,
+       NFTA_EXTHDR_LEN,
+       __NFTA_EXTHDR_MAX
+};
+#define NFTA_EXTHDR_MAX                (__NFTA_EXTHDR_MAX - 1)
+
+/**
+ * enum nft_meta_keys - nf_tables meta expression keys
+ *
+ * @NFT_META_LEN: packet length (skb->len)
+ * @NFT_META_PROTOCOL: packet ethertype protocol (skb->protocol), invalid in OUTPUT
+ * @NFT_META_PRIORITY: packet priority (skb->priority)
+ * @NFT_META_MARK: packet mark (skb->mark)
+ * @NFT_META_IIF: packet input interface index (dev->ifindex)
+ * @NFT_META_OIF: packet output interface index (dev->ifindex)
+ * @NFT_META_IIFNAME: packet input interface name (dev->name)
+ * @NFT_META_OIFNAME: packet output interface name (dev->name)
+ * @NFT_META_IIFTYPE: packet input interface type (dev->type)
+ * @NFT_META_OIFTYPE: packet output interface type (dev->type)
+ * @NFT_META_SKUID: originating socket UID (fsuid)
+ * @NFT_META_SKGID: originating socket GID (fsgid)
+ * @NFT_META_NFTRACE: packet nftrace bit
+ * @NFT_META_RTCLASSID: realm value of packet's route (skb->dst->tclassid)
+ * @NFT_META_SECMARK: packet secmark (skb->secmark)
+ */
+enum nft_meta_keys {
+       NFT_META_LEN,
+       NFT_META_PROTOCOL,
+       NFT_META_PRIORITY,
+       NFT_META_MARK,
+       NFT_META_IIF,
+       NFT_META_OIF,
+       NFT_META_IIFNAME,
+       NFT_META_OIFNAME,
+       NFT_META_IIFTYPE,
+       NFT_META_OIFTYPE,
+       NFT_META_SKUID,
+       NFT_META_SKGID,
+       NFT_META_NFTRACE,
+       NFT_META_RTCLASSID,
+       NFT_META_SECMARK,
+};
+
+/**
+ * enum nft_meta_attributes - nf_tables meta expression netlink attributes
+ *
+ * @NFTA_META_DREG: destination register (NLA_U32)
+ * @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys)
+ */
+enum nft_meta_attributes {
+       NFTA_META_UNSPEC,
+       NFTA_META_DREG,
+       NFTA_META_KEY,
+       __NFTA_META_MAX
+};
+#define NFTA_META_MAX          (__NFTA_META_MAX - 1)
+
+/**
+ * enum nft_ct_keys - nf_tables ct expression keys
+ *
+ * @NFT_CT_STATE: conntrack state (bitmask of enum ip_conntrack_info)
+ * @NFT_CT_DIRECTION: conntrack direction (enum ip_conntrack_dir)
+ * @NFT_CT_STATUS: conntrack status (bitmask of enum ip_conntrack_status)
+ * @NFT_CT_MARK: conntrack mark value
+ * @NFT_CT_SECMARK: conntrack secmark value
+ * @NFT_CT_EXPIRATION: relative conntrack expiration time in ms
+ * @NFT_CT_HELPER: connection tracking helper assigned to conntrack
+ * @NFT_CT_L3PROTOCOL: conntrack layer 3 protocol
+ * @NFT_CT_SRC: conntrack layer 3 protocol source (IPv4/IPv6 address)
+ * @NFT_CT_DST: conntrack layer 3 protocol destination (IPv4/IPv6 address)
+ * @NFT_CT_PROTOCOL: conntrack layer 4 protocol
+ * @NFT_CT_PROTO_SRC: conntrack layer 4 protocol source
+ * @NFT_CT_PROTO_DST: conntrack layer 4 protocol destination
+ */
+enum nft_ct_keys {
+       NFT_CT_STATE,
+       NFT_CT_DIRECTION,
+       NFT_CT_STATUS,
+       NFT_CT_MARK,
+       NFT_CT_SECMARK,
+       NFT_CT_EXPIRATION,
+       NFT_CT_HELPER,
+       NFT_CT_L3PROTOCOL,
+       NFT_CT_SRC,
+       NFT_CT_DST,
+       NFT_CT_PROTOCOL,
+       NFT_CT_PROTO_SRC,
+       NFT_CT_PROTO_DST,
+};
+
+/**
+ * enum nft_ct_attributes - nf_tables ct expression netlink attributes
+ *
+ * @NFTA_CT_DREG: destination register (NLA_U32)
+ * @NFTA_CT_KEY: conntrack data item to load (NLA_U32: nft_ct_keys)
+ * @NFTA_CT_DIRECTION: direction in case of directional keys (NLA_U8)
+ */
+enum nft_ct_attributes {
+       NFTA_CT_UNSPEC,
+       NFTA_CT_DREG,
+       NFTA_CT_KEY,
+       NFTA_CT_DIRECTION,
+       __NFTA_CT_MAX
+};
+#define NFTA_CT_MAX            (__NFTA_CT_MAX - 1)
+
+/**
+ * enum nft_limit_attributes - nf_tables limit expression netlink attributes
+ *
+ * @NFTA_LIMIT_RATE: refill rate (NLA_U64)
+ * @NFTA_LIMIT_UNIT: refill unit (NLA_U64)
+ */
+enum nft_limit_attributes {
+       NFTA_LIMIT_UNSPEC,
+       NFTA_LIMIT_RATE,
+       NFTA_LIMIT_UNIT,
+       __NFTA_LIMIT_MAX
+};
+#define NFTA_LIMIT_MAX         (__NFTA_LIMIT_MAX - 1)
+
+/**
+ * enum nft_counter_attributes - nf_tables counter expression netlink attributes
+ *
+ * @NFTA_COUNTER_BYTES: number of bytes (NLA_U64)
+ * @NFTA_COUNTER_PACKETS: number of packets (NLA_U64)
+ */
+enum nft_counter_attributes {
+       NFTA_COUNTER_UNSPEC,
+       NFTA_COUNTER_BYTES,
+       NFTA_COUNTER_PACKETS,
+       __NFTA_COUNTER_MAX
+};
+#define NFTA_COUNTER_MAX       (__NFTA_COUNTER_MAX - 1)
+
+/**
+ * enum nft_log_attributes - nf_tables log expression netlink attributes
+ *
+ * @NFTA_LOG_GROUP: netlink group to send messages to (NLA_U32)
+ * @NFTA_LOG_PREFIX: prefix to prepend to log messages (NLA_STRING)
+ * @NFTA_LOG_SNAPLEN: length of payload to include in netlink message (NLA_U32)
+ * @NFTA_LOG_QTHRESHOLD: queue threshold (NLA_U32)
+ */
+enum nft_log_attributes {
+       NFTA_LOG_UNSPEC,
+       NFTA_LOG_GROUP,
+       NFTA_LOG_PREFIX,
+       NFTA_LOG_SNAPLEN,
+       NFTA_LOG_QTHRESHOLD,
+       __NFTA_LOG_MAX
+};
+#define NFTA_LOG_MAX           (__NFTA_LOG_MAX - 1)
+
+/**
+ * enum nft_reject_types - nf_tables reject expression reject types
+ *
+ * @NFT_REJECT_ICMP_UNREACH: reject using ICMP unreachable
+ * @NFT_REJECT_TCP_RST: reject using TCP RST
+ */
+enum nft_reject_types {
+       NFT_REJECT_ICMP_UNREACH,
+       NFT_REJECT_TCP_RST,
+};
+
+/**
+ * enum nft_reject_attributes - nf_tables reject expression netlink attributes
+ *
+ * @NFTA_REJECT_TYPE: packet type to use (NLA_U32: nft_reject_types)
+ * @NFTA_REJECT_ICMP_CODE: ICMP code to use (NLA_U8)
+ */
+enum nft_reject_attributes {
+       NFTA_REJECT_UNSPEC,
+       NFTA_REJECT_TYPE,
+       NFTA_REJECT_ICMP_CODE,
+       __NFTA_REJECT_MAX
+};
+#define NFTA_REJECT_MAX                (__NFTA_REJECT_MAX - 1)
+
+/**
+ * enum nft_nat_types - nf_tables nat expression NAT types
+ *
+ * @NFT_NAT_SNAT: source NAT
+ * @NFT_NAT_DNAT: destination NAT
+ */
+enum nft_nat_types {
+       NFT_NAT_SNAT,
+       NFT_NAT_DNAT,
+};
+
+/**
+ * enum nft_nat_attributes - nf_tables nat expression netlink attributes
+ *
+ * @NFTA_NAT_TYPE: NAT type (NLA_U32: nft_nat_types)
+ * @NFTA_NAT_ADDR_MIN: source register of address range start (NLA_U32: nft_registers)
+ * @NFTA_NAT_ADDR_MAX: source register of address range end (NLA_U32: nft_registers)
+ * @NFTA_NAT_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers)
+ * @NFTA_NAT_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers)
+ */
+enum nft_nat_attributes {
+       NFTA_NAT_UNSPEC,
+       NFTA_NAT_TYPE,
+       NFTA_NAT_ADDR_MIN,
+       NFTA_NAT_ADDR_MAX,
+       NFTA_NAT_PROTO_MIN,
+       NFTA_NAT_PROTO_MAX,
+       __NFTA_NAT_MAX
+};
+#define NFTA_NAT_MAX           (__NFTA_NAT_MAX - 1)
+
+#endif /* _LINUX_NF_TABLES_H */
index 4a4efafad5f46664e1f60914eb52d8eb00805b84..d276c3bd55b802ecfedb0d7c18ebde34940afcc6 100644 (file)
@@ -18,6 +18,8 @@ enum nfnetlink_groups {
 #define NFNLGRP_CONNTRACK_EXP_UPDATE   NFNLGRP_CONNTRACK_EXP_UPDATE
        NFNLGRP_CONNTRACK_EXP_DESTROY,
 #define NFNLGRP_CONNTRACK_EXP_DESTROY  NFNLGRP_CONNTRACK_EXP_DESTROY
+       NFNLGRP_NFTABLES,
+#define NFNLGRP_NFTABLES                NFNLGRP_NFTABLES
        __NFNLGRP_MAX,
 };
 #define NFNLGRP_MAX    (__NFNLGRP_MAX - 1)
@@ -51,6 +53,7 @@ struct nfgenmsg {
 #define NFNL_SUBSYS_ACCT               7
 #define NFNL_SUBSYS_CTNETLINK_TIMEOUT  8
 #define NFNL_SUBSYS_CTHELPER           9
-#define NFNL_SUBSYS_COUNT              10
+#define NFNL_SUBSYS_NFTABLES           10
+#define NFNL_SUBSYS_COUNT              11
 
 #endif /* _UAPI_NFNETLINK_H */
index a9aff9c7d0273b2a41ef9d122ac38e769956a640..68f8128147be7c9eb2ae6ab9593d999308eea755 100644 (file)
@@ -1,6 +1,9 @@
 #
 # Bridge netfilter configuration
 #
+#
+config NF_TABLES_BRIDGE
+       tristate "Ethernet Bridge nf_tables support"
 
 menuconfig BRIDGE_NF_EBTABLES
        tristate "Ethernet Bridge tables (ebtables) support"
index 0718699540b023fd6d95be217b79e4bfe5029821..ea7629f58b3d1c44e28524df8a0937de3a18546b 100644 (file)
@@ -2,6 +2,8 @@
 # Makefile for the netfilter modules for Link Layer filtering on a bridge.
 #
 
+obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o
+
 obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o
 
 # tables
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
new file mode 100644 (file)
index 0000000..bc5c21c
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter_bridge.h>
+#include <net/netfilter/nf_tables.h>
+
+static struct nft_af_info nft_af_bridge __read_mostly = {
+       .family         = NFPROTO_BRIDGE,
+       .nhooks         = NF_BR_NUMHOOKS,
+       .owner          = THIS_MODULE,
+};
+
+static int __init nf_tables_bridge_init(void)
+{
+       return nft_register_afinfo(&nft_af_bridge);
+}
+
+static void __exit nf_tables_bridge_exit(void)
+{
+       nft_unregister_afinfo(&nft_af_bridge);
+}
+
+module_init(nf_tables_bridge_init);
+module_exit(nf_tables_bridge_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(AF_BRIDGE);
index 1657e39b291f2ae8747e21e944627def23ebfcbb..eb1d56ece361d7c2d1056c6afc19447b6a0d4740 100644 (file)
@@ -36,6 +36,22 @@ config NF_CONNTRACK_PROC_COMPAT
 
          If unsure, say Y.
 
+config NF_TABLES_IPV4
+       depends on NF_TABLES
+       tristate "IPv4 nf_tables support"
+
+config NFT_REJECT_IPV4
+       depends on NF_TABLES_IPV4
+       tristate "nf_tables IPv4 reject support"
+
+config NF_TABLE_ROUTE_IPV4
+       depends on NF_TABLES_IPV4
+       tristate "IPv4 nf_tables route table support"
+
+config NF_TABLE_NAT_IPV4
+       depends on NF_TABLES_IPV4
+       tristate "IPv4 nf_tables nat table support"
+
 config IP_NF_IPTABLES
        tristate "IP tables support (required for filtering/masq/NAT)"
        default m if NETFILTER_ADVANCED=n
index 3622b248b6dd7ad78aa4411243b4b507731d7e2b..b2f01cd2cd65af25c8bef2ffa43168df4728557b 100644 (file)
@@ -27,6 +27,11 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
 # NAT protocols (nf_nat)
 obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
 
+obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
+obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
+obj-$(CONFIG_NF_TABLE_ROUTE_IPV4) += nf_table_route_ipv4.o
+obj-$(CONFIG_NF_TABLE_NAT_IPV4) += nf_table_nat_ipv4.o
+
 # generic IP tables 
 obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
 
diff --git a/net/ipv4/netfilter/nf_table_nat_ipv4.c b/net/ipv4/netfilter/nf_table_nat_ipv4.c
new file mode 100644 (file)
index 0000000..2a6f184
--- /dev/null
@@ -0,0 +1,409 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/ip.h>
+
+struct nft_nat {
+       enum nft_registers      sreg_addr_min:8;
+       enum nft_registers      sreg_addr_max:8;
+       enum nft_registers      sreg_proto_min:8;
+       enum nft_registers      sreg_proto_max:8;
+       enum nf_nat_manip_type  type;
+};
+
+static void nft_nat_eval(const struct nft_expr *expr,
+                        struct nft_data data[NFT_REG_MAX + 1],
+                        const struct nft_pktinfo *pkt)
+{
+       const struct nft_nat *priv = nft_expr_priv(expr);
+       enum ip_conntrack_info ctinfo;
+       struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo);
+       struct nf_nat_range range;
+
+       memset(&range, 0, sizeof(range));
+       if (priv->sreg_addr_min) {
+               range.min_addr.ip = data[priv->sreg_addr_min].data[0];
+               range.max_addr.ip = data[priv->sreg_addr_max].data[0];
+               range.flags |= NF_NAT_RANGE_MAP_IPS;
+       }
+
+       if (priv->sreg_proto_min) {
+               range.min_proto.all = data[priv->sreg_proto_min].data[0];
+               range.max_proto.all = data[priv->sreg_proto_max].data[0];
+               range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+       }
+
+       data[NFT_REG_VERDICT].verdict =
+               nf_nat_setup_info(ct, &range, priv->type);
+}
+
+static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
+       [NFTA_NAT_ADDR_MIN]     = { .type = NLA_U32 },
+       [NFTA_NAT_ADDR_MAX]     = { .type = NLA_U32 },
+       [NFTA_NAT_PROTO_MIN]    = { .type = NLA_U32 },
+       [NFTA_NAT_PROTO_MAX]    = { .type = NLA_U32 },
+       [NFTA_NAT_TYPE]         = { .type = NLA_U32 },
+};
+
+static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+                       const struct nlattr * const tb[])
+{
+       struct nft_nat *priv = nft_expr_priv(expr);
+       int err;
+
+       if (tb[NFTA_NAT_TYPE] == NULL)
+               return -EINVAL;
+
+       switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) {
+       case NFT_NAT_SNAT:
+               priv->type = NF_NAT_MANIP_SRC;
+               break;
+       case NFT_NAT_DNAT:
+               priv->type = NF_NAT_MANIP_DST;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       if (tb[NFTA_NAT_ADDR_MIN]) {
+               priv->sreg_addr_min = ntohl(nla_get_be32(tb[NFTA_NAT_ADDR_MIN]));
+               err = nft_validate_input_register(priv->sreg_addr_min);
+               if (err < 0)
+                       return err;
+       }
+
+       if (tb[NFTA_NAT_ADDR_MAX]) {
+               priv->sreg_addr_max = ntohl(nla_get_be32(tb[NFTA_NAT_ADDR_MAX]));
+               err = nft_validate_input_register(priv->sreg_addr_max);
+               if (err < 0)
+                       return err;
+       } else
+               priv->sreg_addr_max = priv->sreg_addr_min;
+
+       if (tb[NFTA_NAT_PROTO_MIN]) {
+               priv->sreg_proto_min = ntohl(nla_get_be32(tb[NFTA_NAT_PROTO_MIN]));
+               err = nft_validate_input_register(priv->sreg_proto_min);
+               if (err < 0)
+                       return err;
+       }
+
+       if (tb[NFTA_NAT_PROTO_MAX]) {
+               priv->sreg_proto_max = ntohl(nla_get_be32(tb[NFTA_NAT_PROTO_MAX]));
+               err = nft_validate_input_register(priv->sreg_proto_max);
+               if (err < 0)
+                       return err;
+       } else
+               priv->sreg_proto_max = priv->sreg_proto_min;
+
+       return 0;
+}
+
+static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_nat *priv = nft_expr_priv(expr);
+
+       switch (priv->type) {
+       case NF_NAT_MANIP_SRC:
+               if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_SNAT)))
+                       goto nla_put_failure;
+               break;
+       case NF_NAT_MANIP_DST:
+               if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_DNAT)))
+                       goto nla_put_failure;
+               break;
+       }
+
+       if (nla_put_be32(skb, NFTA_NAT_ADDR_MIN, htonl(priv->sreg_addr_min)))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_NAT_ADDR_MAX, htonl(priv->sreg_addr_max)))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_NAT_PROTO_MIN, htonl(priv->sreg_proto_min)))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_NAT_PROTO_MAX, htonl(priv->sreg_proto_max)))
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_ops nft_nat_ops __read_mostly = {
+       .name           = "nat",
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_nat)),
+       .owner          = THIS_MODULE,
+       .eval           = nft_nat_eval,
+       .init           = nft_nat_init,
+       .dump           = nft_nat_dump,
+       .policy         = nft_nat_policy,
+       .maxattr        = NFTA_NAT_MAX,
+};
+
+/*
+ * NAT table
+ */
+
+static unsigned int nf_nat_fn(const struct nf_hook_ops *ops,
+                             struct sk_buff *skb,
+                             const struct net_device *in,
+                             const struct net_device *out,
+                             int (*okfn)(struct sk_buff *))
+{
+       enum ip_conntrack_info ctinfo;
+       struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+       struct nf_conn_nat *nat;
+       enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+       unsigned int ret;
+
+       if (ct == NULL || nf_ct_is_untracked(ct))
+               return NF_ACCEPT;
+
+       NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)));
+
+       nat = nfct_nat(ct);
+       if (nat == NULL) {
+               /* Conntrack module was loaded late, can't add extension. */
+               if (nf_ct_is_confirmed(ct))
+                       return NF_ACCEPT;
+               nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+               if (nat == NULL)
+                       return NF_ACCEPT;
+       }
+
+       switch (ctinfo) {
+       case IP_CT_RELATED:
+       case IP_CT_RELATED + IP_CT_IS_REPLY:
+               if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
+                       if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
+                                                          ops->hooknum))
+                               return NF_DROP;
+                       else
+                               return NF_ACCEPT;
+               }
+               /* Fall through */
+       case IP_CT_NEW:
+               if (nf_nat_initialized(ct, maniptype))
+                       break;
+
+               ret = nft_do_chain(ops, skb, in, out, okfn);
+               if (ret != NF_ACCEPT)
+                       return ret;
+               if (!nf_nat_initialized(ct, maniptype)) {
+                       ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
+                       if (ret != NF_ACCEPT)
+                               return ret;
+               }
+       default:
+               break;
+       }
+
+       return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+}
+
+static unsigned int nf_nat_prerouting(const struct nf_hook_ops *ops,
+                                     struct sk_buff *skb,
+                                     const struct net_device *in,
+                                     const struct net_device *out,
+                                     int (*okfn)(struct sk_buff *))
+{
+       __be32 daddr = ip_hdr(skb)->daddr;
+       unsigned int ret;
+
+       ret = nf_nat_fn(ops, skb, in, out, okfn);
+       if (ret != NF_DROP && ret != NF_STOLEN &&
+           ip_hdr(skb)->daddr != daddr) {
+               skb_dst_drop(skb);
+       }
+       return ret;
+}
+
+static unsigned int nf_nat_postrouting(const struct nf_hook_ops *ops,
+                                      struct sk_buff *skb,
+                                      const struct net_device *in,
+                                      const struct net_device *out,
+                                      int (*okfn)(struct sk_buff *))
+{
+       enum ip_conntrack_info ctinfo __maybe_unused;
+       const struct nf_conn *ct __maybe_unused;
+       unsigned int ret;
+
+       ret = nf_nat_fn(ops, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+       if (ret != NF_DROP && ret != NF_STOLEN &&
+           (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+               enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+               if (ct->tuplehash[dir].tuple.src.u3.ip !=
+                   ct->tuplehash[!dir].tuple.dst.u3.ip ||
+                   ct->tuplehash[dir].tuple.src.u.all !=
+                   ct->tuplehash[!dir].tuple.dst.u.all)
+                       return nf_xfrm_me_harder(skb, AF_INET) == 0 ?
+                                                               ret : NF_DROP;
+       }
+#endif
+       return ret;
+}
+
+static unsigned int nf_nat_output(const struct nf_hook_ops *ops,
+                                 struct sk_buff *skb,
+                                 const struct net_device *in,
+                                 const struct net_device *out,
+                                 int (*okfn)(struct sk_buff *))
+{
+       enum ip_conntrack_info ctinfo;
+       const struct nf_conn *ct;
+       unsigned int ret;
+
+       ret = nf_nat_fn(ops, skb, in, out, okfn);
+       if (ret != NF_DROP && ret != NF_STOLEN &&
+           (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+               enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+               if (ct->tuplehash[dir].tuple.dst.u3.ip !=
+                   ct->tuplehash[!dir].tuple.src.u3.ip) {
+                       if (ip_route_me_harder(skb, RTN_UNSPEC))
+                               ret = NF_DROP;
+               }
+#ifdef CONFIG_XFRM
+               else if (ct->tuplehash[dir].tuple.dst.u.all !=
+                        ct->tuplehash[!dir].tuple.src.u.all)
+                       if (nf_xfrm_me_harder(skb, AF_INET))
+                               ret = NF_DROP;
+#endif
+       }
+       return ret;
+}
+
+static struct nft_base_chain nf_chain_nat_prerouting __read_mostly = {
+       .chain  = {
+               .name           = "PREROUTING",
+               .rules          = LIST_HEAD_INIT(nf_chain_nat_prerouting.chain.rules),
+               .flags          = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN,
+       },
+       .ops    = {
+               .hook           = nf_nat_prerouting,
+               .owner          = THIS_MODULE,
+               .pf             = NFPROTO_IPV4,
+               .hooknum        = NF_INET_PRE_ROUTING,
+               .priority       = NF_IP_PRI_NAT_DST,
+               .priv           = &nf_chain_nat_prerouting.chain,
+       },
+};
+
+static struct nft_base_chain nf_chain_nat_postrouting __read_mostly = {
+       .chain  = {
+               .name           = "POSTROUTING",
+               .rules          = LIST_HEAD_INIT(nf_chain_nat_postrouting.chain.rules),
+               .flags          = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN,
+       },
+       .ops    = {
+               .hook           = nf_nat_postrouting,
+               .owner          = THIS_MODULE,
+               .pf             = NFPROTO_IPV4,
+               .hooknum        = NF_INET_POST_ROUTING,
+               .priority       = NF_IP_PRI_NAT_SRC,
+               .priv           = &nf_chain_nat_postrouting.chain,
+       },
+};
+
+static struct nft_base_chain nf_chain_nat_output __read_mostly = {
+       .chain  = {
+               .name           = "OUTPUT",
+               .rules          = LIST_HEAD_INIT(nf_chain_nat_output.chain.rules),
+               .flags          = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN,
+       },
+       .ops    = {
+               .hook           = nf_nat_output,
+               .owner          = THIS_MODULE,
+               .pf             = NFPROTO_IPV4,
+               .hooknum        = NF_INET_LOCAL_OUT,
+               .priority       = NF_IP_PRI_NAT_DST,
+               .priv           = &nf_chain_nat_output.chain,
+       },
+};
+
+static struct nft_base_chain nf_chain_nat_input __read_mostly = {
+       .chain  = {
+               .name           = "INPUT",
+               .rules          = LIST_HEAD_INIT(nf_chain_nat_input.chain.rules),
+               .flags          = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN,
+       },
+       .ops    = {
+               .hook           = nf_nat_fn,
+               .owner          = THIS_MODULE,
+               .pf             = NFPROTO_IPV4,
+               .hooknum        = NF_INET_LOCAL_IN,
+               .priority       = NF_IP_PRI_NAT_SRC,
+               .priv           = &nf_chain_nat_input.chain,
+       },
+};
+
+
+static struct nft_table nf_table_nat_ipv4 __read_mostly = {
+       .name   = "nat",
+       .chains = LIST_HEAD_INIT(nf_table_nat_ipv4.chains),
+};
+
+static int __init nf_table_nat_init(void)
+{
+       int err;
+
+       list_add_tail(&nf_chain_nat_prerouting.chain.list,
+                     &nf_table_nat_ipv4.chains);
+       list_add_tail(&nf_chain_nat_postrouting.chain.list,
+                     &nf_table_nat_ipv4.chains);
+       list_add_tail(&nf_chain_nat_output.chain.list,
+                     &nf_table_nat_ipv4.chains);
+       list_add_tail(&nf_chain_nat_input.chain.list,
+                     &nf_table_nat_ipv4.chains);
+
+       err = nft_register_table(&nf_table_nat_ipv4, NFPROTO_IPV4);
+       if (err < 0)
+               goto err1;
+
+       err = nft_register_expr(&nft_nat_ops);
+       if (err < 0)
+               goto err2;
+
+       return 0;
+
+err2:
+       nft_unregister_table(&nf_table_nat_ipv4, NFPROTO_IPV4);
+err1:
+       return err;
+}
+
+static void __exit nf_table_nat_exit(void)
+{
+       nft_unregister_expr(&nft_nat_ops);
+       nft_unregister_table(&nf_table_nat_ipv4, AF_INET);
+}
+
+module_init(nf_table_nat_init);
+module_exit(nf_table_nat_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_TABLE(AF_INET, "nat");
+MODULE_ALIAS_NFT_EXPR("nat");
diff --git a/net/ipv4/netfilter/nf_table_route_ipv4.c b/net/ipv4/netfilter/nf_table_route_ipv4.c
new file mode 100644 (file)
index 0000000..4f257a1
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+                                       struct sk_buff *skb,
+                                       const struct net_device *in,
+                                       const struct net_device *out,
+                                       int (*okfn)(struct sk_buff *))
+{
+       unsigned int ret;
+       u32 mark;
+       __be32 saddr, daddr;
+       u_int8_t tos;
+       const struct iphdr *iph;
+
+       /* root is playing with raw sockets. */
+       if (skb->len < sizeof(struct iphdr) ||
+           ip_hdrlen(skb) < sizeof(struct iphdr))
+               return NF_ACCEPT;
+
+       mark = skb->mark;
+       iph = ip_hdr(skb);
+       saddr = iph->saddr;
+       daddr = iph->daddr;
+       tos = iph->tos;
+
+       ret = nft_do_chain(ops, skb, in, out, okfn);
+       if (ret != NF_DROP && ret != NF_QUEUE) {
+               iph = ip_hdr(skb);
+
+               if (iph->saddr != saddr ||
+                   iph->daddr != daddr ||
+                   skb->mark != mark ||
+                   iph->tos != tos)
+                       if (ip_route_me_harder(skb, RTN_UNSPEC))
+                               ret = NF_DROP;
+       }
+       return ret;
+}
+
+static struct nft_base_chain nf_chain_route_output __read_mostly = {
+       .chain  = {
+               .name           = "OUTPUT",
+               .rules          = LIST_HEAD_INIT(nf_chain_route_output.chain.rules),
+               .flags          = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN,
+       },
+       .ops    = {
+               .hook           = nf_route_table_hook,
+               .owner          = THIS_MODULE,
+               .pf             = NFPROTO_IPV4,
+               .hooknum        = NF_INET_LOCAL_OUT,
+               .priority       = NF_IP_PRI_MANGLE,
+               .priv           = &nf_chain_route_output.chain,
+       },
+};
+
+static struct nft_table nf_table_route_ipv4 __read_mostly = {
+       .name   = "route",
+       .chains = LIST_HEAD_INIT(nf_table_route_ipv4.chains),
+};
+
+static int __init nf_table_route_init(void)
+{
+       list_add_tail(&nf_chain_route_output.chain.list,
+                     &nf_table_route_ipv4.chains);
+       return nft_register_table(&nf_table_route_ipv4, NFPROTO_IPV4);
+}
+
+static void __exit nf_table_route_exit(void)
+{
+       nft_unregister_table(&nf_table_route_ipv4, NFPROTO_IPV4);
+}
+
+module_init(nf_table_route_init);
+module_exit(nf_table_route_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_TABLE(AF_INET, "route");
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
new file mode 100644 (file)
index 0000000..63d0a3b
--- /dev/null
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/ip.h>
+
+static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops,
+                                   struct sk_buff *skb,
+                                   const struct net_device *in,
+                                   const struct net_device *out,
+                                   int (*okfn)(struct sk_buff *))
+{
+       if (unlikely(skb->len < sizeof(struct iphdr) ||
+                    ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
+               if (net_ratelimit())
+                       pr_info("nf_tables_ipv4: ignoring short SOCK_RAW "
+                               "packet\n");
+               return NF_ACCEPT;
+       }
+
+       return nft_do_chain(ops, skb, in, out, okfn);
+}
+
+static struct nft_af_info nft_af_ipv4 __read_mostly = {
+       .family         = NFPROTO_IPV4,
+       .nhooks         = NF_INET_NUMHOOKS,
+       .owner          = THIS_MODULE,
+       .hooks          = {
+               [NF_INET_LOCAL_OUT]     = nft_ipv4_output,
+       },
+};
+
+static int __init nf_tables_ipv4_init(void)
+{
+       return nft_register_afinfo(&nft_af_ipv4);
+}
+
+static void __exit nf_tables_ipv4_exit(void)
+{
+       nft_unregister_afinfo(&nft_af_ipv4);
+}
+
+module_init(nf_tables_ipv4_init);
+module_exit(nf_tables_ipv4_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(AF_INET);
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
new file mode 100644 (file)
index 0000000..b4ee8d3
--- /dev/null
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/icmp.h>
+
+struct nft_reject {
+       enum nft_reject_types   type:8;
+       u8                      icmp_code;
+};
+
+static void nft_reject_eval(const struct nft_expr *expr,
+                             struct nft_data data[NFT_REG_MAX + 1],
+                             const struct nft_pktinfo *pkt)
+{
+       struct nft_reject *priv = nft_expr_priv(expr);
+
+       switch (priv->type) {
+       case NFT_REJECT_ICMP_UNREACH:
+               icmp_send(pkt->skb, ICMP_DEST_UNREACH, priv->icmp_code, 0);
+               break;
+       case NFT_REJECT_TCP_RST:
+               break;
+       }
+
+       data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+
+static const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
+       [NFTA_REJECT_TYPE]              = { .type = NLA_U32 },
+       [NFTA_REJECT_ICMP_CODE]         = { .type = NLA_U8 },
+};
+
+static int nft_reject_init(const struct nft_ctx *ctx,
+                          const struct nft_expr *expr,
+                          const struct nlattr * const tb[])
+{
+       struct nft_reject *priv = nft_expr_priv(expr);
+
+       if (tb[NFTA_REJECT_TYPE] == NULL)
+               return -EINVAL;
+
+       priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE]));
+       switch (priv->type) {
+       case NFT_REJECT_ICMP_UNREACH:
+               if (tb[NFTA_REJECT_ICMP_CODE] == NULL)
+                       return -EINVAL;
+               priv->icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]);
+       case NFT_REJECT_TCP_RST:
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_reject *priv = nft_expr_priv(expr);
+
+       if (nla_put_be32(skb, NFTA_REJECT_TYPE, priv->type))
+               goto nla_put_failure;
+
+       switch (priv->type) {
+       case NFT_REJECT_ICMP_UNREACH:
+               if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code))
+                       goto nla_put_failure;
+               break;
+       }
+
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_ops reject_ops __read_mostly = {
+       .name           = "reject",
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+       .owner          = THIS_MODULE,
+       .eval           = nft_reject_eval,
+       .init           = nft_reject_init,
+       .dump           = nft_reject_dump,
+       .policy         = nft_reject_policy,
+       .maxattr        = NFTA_REJECT_MAX,
+};
+
+static int __init nft_reject_module_init(void)
+{
+       return nft_register_expr(&reject_ops);
+}
+
+static void __exit nft_reject_module_exit(void)
+{
+       nft_unregister_expr(&reject_ops);
+}
+
+module_init(nft_reject_module_init);
+module_exit(nft_reject_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("reject");
index a7f842b29b67b2ca5ff2c2cbe13588e3098ce44b..5677e38eeca3c8ecefe8bd3c14427d59a7aef35c 100644 (file)
@@ -25,6 +25,14 @@ config NF_CONNTRACK_IPV6
 
          To compile it as a module, choose M here.  If unsure, say N.
 
+config NF_TABLES_IPV6
+       depends on NF_TABLES
+       tristate "IPv6 nf_tables support"
+
+config NF_TABLE_ROUTE_IPV6
+       depends on NF_TABLES_IPV6
+       tristate "IPv6 nf_tables route table support"
+
 config IP6_NF_IPTABLES
        tristate "IP6 tables support (required for filtering)"
        depends on INET && IPV6
index 2b53738f798cd3a9898d89a2a1a76b48ffe983b8..956af4492d101b9f1826cbff010237f8bf13b078 100644 (file)
@@ -23,6 +23,10 @@ obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
 nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
 obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
 
+# nf_tables
+obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
+obj-$(CONFIG_NF_TABLE_ROUTE_IPV6) += nf_table_route_ipv6.o
+
 # matches
 obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
 obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
diff --git a/net/ipv6/netfilter/nf_table_route_ipv6.c b/net/ipv6/netfilter/nf_table_route_ipv6.c
new file mode 100644 (file)
index 0000000..48ac65c
--- /dev/null
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/route.h>
+
+static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+                                       struct sk_buff *skb,
+                                       const struct net_device *in,
+                                       const struct net_device *out,
+                                       int (*okfn)(struct sk_buff *))
+{
+       unsigned int ret;
+       struct in6_addr saddr, daddr;
+       u_int8_t hop_limit;
+       u32 mark, flowlabel;
+
+       /* save source/dest address, mark, hoplimit, flowlabel, priority */
+       memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
+       memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr));
+       mark = skb->mark;
+       hop_limit = ipv6_hdr(skb)->hop_limit;
+
+       /* flowlabel and prio (includes version, which shouldn't change either */
+       flowlabel = *((u32 *)ipv6_hdr(skb));
+
+       ret = nft_do_chain(ops, skb, in, out, okfn);
+       if (ret != NF_DROP && ret != NF_QUEUE &&
+           (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
+            memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
+            skb->mark != mark ||
+            ipv6_hdr(skb)->hop_limit != hop_limit ||
+            flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
+               return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
+
+       return ret;
+}
+
+static struct nft_base_chain nf_chain_route_output __read_mostly = {
+       .chain  = {
+               .name           = "OUTPUT",
+               .rules          = LIST_HEAD_INIT(nf_chain_route_output.chain.rules),
+               .flags          = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN,
+       },
+       .ops    = {
+               .hook           = nf_route_table_hook,
+               .owner          = THIS_MODULE,
+               .pf             = NFPROTO_IPV6,
+               .hooknum        = NF_INET_LOCAL_OUT,
+               .priority       = NF_IP6_PRI_MANGLE,
+               .priv           = &nf_chain_route_output.chain,
+       },
+};
+
+static struct nft_table nf_table_route_ipv6 __read_mostly = {
+       .name   = "route",
+       .chains = LIST_HEAD_INIT(nf_table_route_ipv6.chains),
+};
+
+static int __init nf_table_route_init(void)
+{
+       list_add_tail(&nf_chain_route_output.chain.list,
+                     &nf_table_route_ipv6.chains);
+       return nft_register_table(&nf_table_route_ipv6, NFPROTO_IPV6);
+}
+
+static void __exit nf_table_route_exit(void)
+{
+       nft_unregister_table(&nf_table_route_ipv6, NFPROTO_IPV6);
+}
+
+module_init(nf_table_route_init);
+module_exit(nf_table_route_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_TABLE(AF_INET6, "route");
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
new file mode 100644 (file)
index 0000000..e0717ce
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_tables.h>
+
+static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops,
+                                   struct sk_buff *skb,
+                                   const struct net_device *in,
+                                   const struct net_device *out,
+                                   int (*okfn)(struct sk_buff *))
+{
+       if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
+               if (net_ratelimit())
+                       pr_info("nf_tables_ipv6: ignoring short SOCK_RAW "
+                               "packet\n");
+               return NF_ACCEPT;
+       }
+
+       return nft_do_chain(ops, skb, in, out, okfn);
+}
+
+static struct nft_af_info nft_af_ipv6 __read_mostly = {
+       .family         = NFPROTO_IPV6,
+       .nhooks         = NF_INET_NUMHOOKS,
+       .owner          = THIS_MODULE,
+       .hooks          = {
+               [NF_INET_LOCAL_OUT]     = nft_ipv6_output,
+       },
+};
+
+static int __init nf_tables_ipv6_init(void)
+{
+       return nft_register_afinfo(&nft_af_ipv6);
+}
+
+static void __exit nf_tables_ipv6_exit(void)
+{
+       nft_unregister_afinfo(&nft_af_ipv6);
+}
+
+module_init(nf_tables_ipv6_init);
+module_exit(nf_tables_ipv6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(AF_INET6);
index 6e839b6dff2b1349f86d87837e0a91bb486cc4bf..c271e1af93b59fa6a4af4bb6fa0d4978dc37b324 100644 (file)
@@ -413,6 +413,43 @@ config NETFILTER_SYNPROXY
 
 endif # NF_CONNTRACK
 
+config NF_TABLES
+       depends on NETFILTER_NETLINK
+       tristate "Netfilter nf_tables support"
+
+config NFT_EXTHDR
+       depends on NF_TABLES
+       tristate "Netfilter nf_tables IPv6 exthdr module"
+
+config NFT_META
+       depends on NF_TABLES
+       tristate "Netfilter nf_tables meta module"
+
+config NFT_CT
+       depends on NF_TABLES
+       depends on NF_CONNTRACK
+       tristate "Netfilter nf_tables conntrack module"
+
+config NFT_SET
+       depends on NF_TABLES
+       tristate "Netfilter nf_tables set module"
+
+config NFT_HASH
+       depends on NF_TABLES
+       tristate "Netfilter nf_tables hash module"
+
+config NFT_COUNTER
+       depends on NF_TABLES
+       tristate "Netfilter nf_tables counter module"
+
+config NFT_LOG
+       depends on NF_TABLES
+       tristate "Netfilter nf_tables log module"
+
+config NFT_LIMIT
+       depends on NF_TABLES
+       tristate "Netfilter nf_tables limit module"
+
 config NETFILTER_XTABLES
        tristate "Netfilter Xtables support (required for ip_tables)"
        default m if NETFILTER_ADVANCED=n
index c3a0a12907f693630b841400d3e47babfd338bde..1ca3f3932826f3383f217b2984b9c0efef7f4e86 100644 (file)
@@ -64,6 +64,22 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
 # SYNPROXY
 obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
 
+# nf_tables
+nf_tables-objs += nf_tables_core.o nf_tables_api.o
+nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o
+nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
+
+obj-$(CONFIG_NF_TABLES)                += nf_tables.o
+obj-$(CONFIG_NFT_EXTHDR)       += nft_exthdr.o
+obj-$(CONFIG_NFT_META)         += nft_meta.o
+obj-$(CONFIG_NFT_CT)           += nft_ct.o
+obj-$(CONFIG_NFT_LIMIT)                += nft_limit.o
+#nf_tables-objs                        += nft_meta_target.o
+obj-$(CONFIG_NFT_SET)          += nft_set.o
+obj-$(CONFIG_NFT_HASH)         += nft_hash.o
+obj-$(CONFIG_NFT_COUNTER)      += nft_counter.o
+obj-$(CONFIG_NFT_LOG)          += nft_log.o
+
 # generic X tables 
 obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
new file mode 100644 (file)
index 0000000..7d59c89
--- /dev/null
@@ -0,0 +1,1760 @@
+/*
+ * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/sock.h>
+
+static LIST_HEAD(nf_tables_afinfo);
+static LIST_HEAD(nf_tables_expressions);
+
+/**
+ *     nft_register_afinfo - register nf_tables address family info
+ *
+ *     @afi: address family info to register
+ *
+ *     Register the address family for use with nf_tables. Returns zero on
+ *     success or a negative errno code otherwise.
+ */
+int nft_register_afinfo(struct nft_af_info *afi)
+{
+       INIT_LIST_HEAD(&afi->tables);
+       nfnl_lock(NFNL_SUBSYS_NFTABLES);
+       list_add_tail(&afi->list, &nf_tables_afinfo);
+       nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(nft_register_afinfo);
+
+/**
+ *     nft_unregister_afinfo - unregister nf_tables address family info
+ *
+ *     @afi: address family info to unregister
+ *
+ *     Unregister the address family for use with nf_tables.
+ */
+void nft_unregister_afinfo(struct nft_af_info *afi)
+{
+       nfnl_lock(NFNL_SUBSYS_NFTABLES);
+       list_del(&afi->list);
+       nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_afinfo);
+
+static struct nft_af_info *nft_afinfo_lookup(int family)
+{
+       struct nft_af_info *afi;
+
+       list_for_each_entry(afi, &nf_tables_afinfo, list) {
+               if (afi->family == family)
+                       return afi;
+       }
+       return NULL;
+}
+
+static struct nft_af_info *nf_tables_afinfo_lookup(int family, bool autoload)
+{
+       struct nft_af_info *afi;
+
+       afi = nft_afinfo_lookup(family);
+       if (afi != NULL)
+               return afi;
+#ifdef CONFIG_MODULES
+       if (autoload) {
+               nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+               request_module("nft-afinfo-%u", family);
+               nfnl_lock(NFNL_SUBSYS_NFTABLES);
+               afi = nft_afinfo_lookup(family);
+               if (afi != NULL)
+                       return ERR_PTR(-EAGAIN);
+       }
+#endif
+       return ERR_PTR(-EAFNOSUPPORT);
+}
+
+/*
+ * Tables
+ */
+
+static struct nft_table *nft_table_lookup(const struct nft_af_info *afi,
+                                         const struct nlattr *nla)
+{
+       struct nft_table *table;
+
+       list_for_each_entry(table, &afi->tables, list) {
+               if (!nla_strcmp(nla, table->name))
+                       return table;
+       }
+       return NULL;
+}
+
+static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi,
+                                               const struct nlattr *nla,
+                                               bool autoload)
+{
+       struct nft_table *table;
+
+       if (nla == NULL)
+               return ERR_PTR(-EINVAL);
+
+       table = nft_table_lookup(afi, nla);
+       if (table != NULL)
+               return table;
+
+#ifdef CONFIG_MODULES
+       if (autoload) {
+               nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+               request_module("nft-table-%u-%*.s", afi->family,
+                              nla_len(nla)-1, (const char *)nla_data(nla));
+               nfnl_lock(NFNL_SUBSYS_NFTABLES);
+               if (nft_table_lookup(afi, nla))
+                       return ERR_PTR(-EAGAIN);
+       }
+#endif
+       return ERR_PTR(-ENOENT);
+}
+
+static inline u64 nf_tables_alloc_handle(struct nft_table *table)
+{
+       return ++table->hgenerator;
+}
+
+static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
+       [NFTA_TABLE_NAME]       = { .type = NLA_STRING },
+};
+
+static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq,
+                                    int event, u32 flags, int family,
+                                    const struct nft_table *table)
+{
+       struct nlmsghdr *nlh;
+       struct nfgenmsg *nfmsg;
+
+       event |= NFNL_SUBSYS_NFTABLES << 8;
+       nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
+       if (nlh == NULL)
+               goto nla_put_failure;
+
+       nfmsg = nlmsg_data(nlh);
+       nfmsg->nfgen_family     = family;
+       nfmsg->version          = NFNETLINK_V0;
+       nfmsg->res_id           = 0;
+
+       if (nla_put_string(skb, NFTA_TABLE_NAME, table->name))
+               goto nla_put_failure;
+
+       return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+       nlmsg_trim(skb, nlh);
+       return -1;
+}
+
+static int nf_tables_table_notify(const struct sk_buff *oskb,
+                                 const struct nlmsghdr *nlh,
+                                 const struct nft_table *table,
+                                 int event, int family)
+{
+       struct sk_buff *skb;
+       u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+       u32 seq = nlh ? nlh->nlmsg_seq : 0;
+       struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
+       bool report;
+       int err;
+
+       report = nlh ? nlmsg_report(nlh) : false;
+       if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+               return 0;
+
+       err = -ENOBUFS;
+       skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+       if (skb == NULL)
+               goto err;
+
+       err = nf_tables_fill_table_info(skb, portid, seq, event, 0,
+                                       family, table);
+       if (err < 0) {
+               kfree_skb(skb);
+               goto err;
+       }
+
+       err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
+                            GFP_KERNEL);
+err:
+       if (err < 0)
+               nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+       return err;
+}
+
+static int nf_tables_dump_tables(struct sk_buff *skb,
+                                struct netlink_callback *cb)
+{
+       const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+       const struct nft_af_info *afi;
+       const struct nft_table *table;
+       unsigned int idx = 0, s_idx = cb->args[0];
+       int family = nfmsg->nfgen_family;
+
+       list_for_each_entry(afi, &nf_tables_afinfo, list) {
+               if (family != NFPROTO_UNSPEC && family != afi->family)
+                       continue;
+
+               list_for_each_entry(table, &afi->tables, list) {
+                       if (idx < s_idx)
+                               goto cont;
+                       if (idx > s_idx)
+                               memset(&cb->args[1], 0,
+                                      sizeof(cb->args) - sizeof(cb->args[0]));
+                       if (nf_tables_fill_table_info(skb,
+                                                     NETLINK_CB(cb->skb).portid,
+                                                     cb->nlh->nlmsg_seq,
+                                                     NFT_MSG_NEWTABLE,
+                                                     NLM_F_MULTI,
+                                                     afi->family, table) < 0)
+                               goto done;
+cont:
+                       idx++;
+               }
+       }
+done:
+       cb->args[0] = idx;
+       return skb->len;
+}
+
+static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
+                             const struct nlmsghdr *nlh,
+                             const struct nlattr * const nla[])
+{
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+       const struct nft_af_info *afi;
+       const struct nft_table *table;
+       struct sk_buff *skb2;
+       int family = nfmsg->nfgen_family;
+       int err;
+
+       if (nlh->nlmsg_flags & NLM_F_DUMP) {
+               struct netlink_dump_control c = {
+                       .dump = nf_tables_dump_tables,
+               };
+               return netlink_dump_start(nlsk, skb, nlh, &c);
+       }
+
+       afi = nf_tables_afinfo_lookup(family, false);
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+
+       table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], false);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+
+       skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+       if (!skb2)
+               return -ENOMEM;
+
+       err = nf_tables_fill_table_info(skb2, NETLINK_CB(skb).portid,
+                                       nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0,
+                                       family, table);
+       if (err < 0)
+               goto err;
+
+       return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+
+err:
+       kfree_skb(skb2);
+       return err;
+}
+
+static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
+                             const struct nlmsghdr *nlh,
+                             const struct nlattr * const nla[])
+{
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+       const struct nlattr *name;
+       struct nft_af_info *afi;
+       struct nft_table *table;
+       int family = nfmsg->nfgen_family;
+
+       afi = nf_tables_afinfo_lookup(family, true);
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+
+       name = nla[NFTA_TABLE_NAME];
+       table = nf_tables_table_lookup(afi, name, false);
+       if (IS_ERR(table)) {
+               if (PTR_ERR(table) != -ENOENT)
+                       return PTR_ERR(table);
+               table = NULL;
+       }
+
+       if (table != NULL) {
+               if (nlh->nlmsg_flags & NLM_F_EXCL)
+                       return -EEXIST;
+               if (nlh->nlmsg_flags & NLM_F_REPLACE)
+                       return -EOPNOTSUPP;
+               return 0;
+       }
+
+       table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL);
+       if (table == NULL)
+               return -ENOMEM;
+
+       nla_strlcpy(table->name, name, nla_len(name));
+       INIT_LIST_HEAD(&table->chains);
+
+       list_add_tail(&table->list, &afi->tables);
+       nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
+       return 0;
+}
+
+static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
+                             const struct nlmsghdr *nlh,
+                             const struct nlattr * const nla[])
+{
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+       struct nft_af_info *afi;
+       struct nft_table *table;
+       int family = nfmsg->nfgen_family;
+
+       afi = nf_tables_afinfo_lookup(family, false);
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+
+       table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], false);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+
+       if (table->flags & NFT_TABLE_BUILTIN)
+               return -EOPNOTSUPP;
+
+       if (table->use)
+               return -EBUSY;
+
+       list_del(&table->list);
+       nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family);
+       kfree(table);
+       return 0;
+}
+
+static struct nft_table *__nf_tables_table_lookup(const struct nft_af_info *afi,
+                                                 const char *name)
+{
+       struct nft_table *table;
+
+       list_for_each_entry(table, &afi->tables, list) {
+               if (!strcmp(name, table->name))
+                       return table;
+       }
+
+       return ERR_PTR(-ENOENT);
+}
+
+static int nf_tables_chain_notify(const struct sk_buff *oskb,
+                                 const struct nlmsghdr *nlh,
+                                 const struct nft_table *table,
+                                 const struct nft_chain *chain,
+                                 int event, int family);
+
+/**
+ *     nft_register_table - register a built-in table
+ *
+ *     @table: the table to register
+ *     @family: protocol family to register table with
+ *
+ *     Register a built-in table for use with nf_tables. Returns zero on
+ *     success or a negative errno code otherwise.
+ */
+int nft_register_table(struct nft_table *table, int family)
+{
+       struct nft_af_info *afi;
+       struct nft_table *t;
+       struct nft_chain *chain;
+       int err;
+
+       nfnl_lock(NFNL_SUBSYS_NFTABLES);
+again:
+       afi = nf_tables_afinfo_lookup(family, true);
+       if (IS_ERR(afi)) {
+               err = PTR_ERR(afi);
+               if (err == -EAGAIN)
+                       goto again;
+               goto err;
+       }
+
+       t = __nf_tables_table_lookup(afi, table->name);
+       if (IS_ERR(t)) {
+               err = PTR_ERR(t);
+               if (err != -ENOENT)
+                       goto err;
+               t = NULL;
+       }
+
+       if (t != NULL) {
+               err = -EEXIST;
+               goto err;
+       }
+
+       table->flags |= NFT_TABLE_BUILTIN;
+       list_add_tail(&table->list, &afi->tables);
+       nf_tables_table_notify(NULL, NULL, table, NFT_MSG_NEWTABLE, family);
+       list_for_each_entry(chain, &table->chains, list)
+               nf_tables_chain_notify(NULL, NULL, table, chain,
+                                      NFT_MSG_NEWCHAIN, family);
+       err = 0;
+err:
+       nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+       return err;
+}
+EXPORT_SYMBOL_GPL(nft_register_table);
+
+/**
+ *     nft_unregister_table - unregister a built-in table
+ *
+ *     @table: the table to unregister
+ *     @family: protocol family to unregister table with
+ *
+ *     Unregister a built-in table for use with nf_tables.
+ */
+void nft_unregister_table(struct nft_table *table, int family)
+{
+       struct nft_chain *chain;
+
+       nfnl_lock(NFNL_SUBSYS_NFTABLES);
+       list_del(&table->list);
+       list_for_each_entry(chain, &table->chains, list)
+               nf_tables_chain_notify(NULL, NULL, table, chain,
+                                      NFT_MSG_DELCHAIN, family);
+       nf_tables_table_notify(NULL, NULL, table, NFT_MSG_DELTABLE, family);
+       nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_table);
+
+/*
+ * Chains
+ */
+
+static struct nft_chain *
+nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle)
+{
+       struct nft_chain *chain;
+
+       list_for_each_entry(chain, &table->chains, list) {
+               if (chain->handle == handle)
+                       return chain;
+       }
+
+       return ERR_PTR(-ENOENT);
+}
+
+static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table,
+                                               const struct nlattr *nla)
+{
+       struct nft_chain *chain;
+
+       if (nla == NULL)
+               return ERR_PTR(-EINVAL);
+
+       list_for_each_entry(chain, &table->chains, list) {
+               if (!nla_strcmp(nla, chain->name))
+                       return chain;
+       }
+
+       return ERR_PTR(-ENOENT);
+}
+
+static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
+       [NFTA_CHAIN_TABLE]      = { .type = NLA_STRING },
+       [NFTA_CHAIN_HANDLE]     = { .type = NLA_U64 },
+       [NFTA_CHAIN_NAME]       = { .type = NLA_STRING,
+                                   .len = NFT_CHAIN_MAXNAMELEN - 1 },
+       [NFTA_CHAIN_HOOK]       = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = {
+       [NFTA_HOOK_HOOKNUM]     = { .type = NLA_U32 },
+       [NFTA_HOOK_PRIORITY]    = { .type = NLA_U32 },
+};
+
+static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq,
+                                    int event, u32 flags, int family,
+                                    const struct nft_table *table,
+                                    const struct nft_chain *chain)
+{
+       struct nlmsghdr *nlh;
+       struct nfgenmsg *nfmsg;
+
+       event |= NFNL_SUBSYS_NFTABLES << 8;
+       nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
+       if (nlh == NULL)
+               goto nla_put_failure;
+
+       nfmsg = nlmsg_data(nlh);
+       nfmsg->nfgen_family     = family;
+       nfmsg->version          = NFNETLINK_V0;
+       nfmsg->res_id           = 0;
+
+       if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name))
+               goto nla_put_failure;
+       if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle)))
+               goto nla_put_failure;
+       if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name))
+               goto nla_put_failure;
+
+       if (chain->flags & NFT_BASE_CHAIN) {
+               const struct nf_hook_ops *ops = &nft_base_chain(chain)->ops;
+               struct nlattr *nest = nla_nest_start(skb, NFTA_CHAIN_HOOK);
+               if (nest == NULL)
+                       goto nla_put_failure;
+               if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
+                       goto nla_put_failure;
+               if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
+                       goto nla_put_failure;
+               nla_nest_end(skb, nest);
+       }
+
+       return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+       nlmsg_trim(skb, nlh);
+       return -1;
+}
+
+static int nf_tables_chain_notify(const struct sk_buff *oskb,
+                                 const struct nlmsghdr *nlh,
+                                 const struct nft_table *table,
+                                 const struct nft_chain *chain,
+                                 int event, int family)
+{
+       struct sk_buff *skb;
+       u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+       struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
+       u32 seq = nlh ? nlh->nlmsg_seq : 0;
+       bool report;
+       int err;
+
+       report = nlh ? nlmsg_report(nlh) : false;
+       if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+               return 0;
+
+       err = -ENOBUFS;
+       skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+       if (skb == NULL)
+               goto err;
+
+       err = nf_tables_fill_chain_info(skb, portid, seq, event, 0, family,
+                                       table, chain);
+       if (err < 0) {
+               kfree_skb(skb);
+               goto err;
+       }
+
+       err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
+                            GFP_KERNEL);
+err:
+       if (err < 0)
+               nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+       return err;
+}
+
+static int nf_tables_dump_chains(struct sk_buff *skb,
+                                struct netlink_callback *cb)
+{
+       const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+       const struct nft_af_info *afi;
+       const struct nft_table *table;
+       const struct nft_chain *chain;
+       unsigned int idx = 0, s_idx = cb->args[0];
+       int family = nfmsg->nfgen_family;
+
+       list_for_each_entry(afi, &nf_tables_afinfo, list) {
+               if (family != NFPROTO_UNSPEC && family != afi->family)
+                       continue;
+
+               list_for_each_entry(table, &afi->tables, list) {
+                       list_for_each_entry(chain, &table->chains, list) {
+                               if (idx < s_idx)
+                                       goto cont;
+                               if (idx > s_idx)
+                                       memset(&cb->args[1], 0,
+                                              sizeof(cb->args) - sizeof(cb->args[0]));
+                               if (nf_tables_fill_chain_info(skb, NETLINK_CB(cb->skb).portid,
+                                                             cb->nlh->nlmsg_seq,
+                                                             NFT_MSG_NEWCHAIN,
+                                                             NLM_F_MULTI,
+                                                             afi->family, table, chain) < 0)
+                                       goto done;
+cont:
+                               idx++;
+                       }
+               }
+       }
+done:
+       cb->args[0] = idx;
+       return skb->len;
+}
+
+
+static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
+                             const struct nlmsghdr *nlh,
+                             const struct nlattr * const nla[])
+{
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+       const struct nft_af_info *afi;
+       const struct nft_table *table;
+       const struct nft_chain *chain;
+       struct sk_buff *skb2;
+       int family = nfmsg->nfgen_family;
+       int err;
+
+       if (nlh->nlmsg_flags & NLM_F_DUMP) {
+               struct netlink_dump_control c = {
+                       .dump = nf_tables_dump_chains,
+               };
+               return netlink_dump_start(nlsk, skb, nlh, &c);
+       }
+
+       afi = nf_tables_afinfo_lookup(family, false);
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+
+       table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], false);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+
+       chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
+       if (IS_ERR(chain))
+               return PTR_ERR(chain);
+
+       skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+       if (!skb2)
+               return -ENOMEM;
+
+       err = nf_tables_fill_chain_info(skb2, NETLINK_CB(skb).portid,
+                                       nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0,
+                                       family, table, chain);
+       if (err < 0)
+               goto err;
+
+       return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+
+err:
+       kfree_skb(skb2);
+       return err;
+}
+
+static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
+                             const struct nlmsghdr *nlh,
+                             const struct nlattr * const nla[])
+{
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+       const struct nlattr * uninitialized_var(name);
+       const struct nft_af_info *afi;
+       struct nft_table *table;
+       struct nft_chain *chain;
+       struct nft_base_chain *basechain;
+       struct nlattr *ha[NFTA_HOOK_MAX + 1];
+       int family = nfmsg->nfgen_family;
+       u64 handle = 0;
+       int err;
+       bool create;
+
+       create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+
+       afi = nf_tables_afinfo_lookup(family, true);
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+
+       table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], create);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+
+       if (table->use == UINT_MAX)
+               return -EOVERFLOW;
+
+       chain = NULL;
+       name = nla[NFTA_CHAIN_NAME];
+
+       if (nla[NFTA_CHAIN_HANDLE]) {
+               handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
+               chain = nf_tables_chain_lookup_byhandle(table, handle);
+               if (IS_ERR(chain))
+                       return PTR_ERR(chain);
+       } else {
+               chain = nf_tables_chain_lookup(table, name);
+               if (IS_ERR(chain)) {
+                       if (PTR_ERR(chain) != -ENOENT)
+                               return PTR_ERR(chain);
+                       chain = NULL;
+               }
+       }
+
+       if (chain != NULL) {
+               if (nlh->nlmsg_flags & NLM_F_EXCL)
+                       return -EEXIST;
+               if (nlh->nlmsg_flags & NLM_F_REPLACE)
+                       return -EOPNOTSUPP;
+
+               if (nla[NFTA_CHAIN_HANDLE] && name &&
+                   !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME])))
+                       return -EEXIST;
+
+               if (nla[NFTA_CHAIN_HANDLE] && name)
+                       nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+
+               goto notify;
+       }
+
+       if (nla[NFTA_CHAIN_HOOK]) {
+               struct nf_hook_ops *ops;
+
+               err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
+                                      nft_hook_policy);
+               if (err < 0)
+                       return err;
+               if (ha[NFTA_HOOK_HOOKNUM] == NULL ||
+                   ha[NFTA_HOOK_PRIORITY] == NULL)
+                       return -EINVAL;
+               if (ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])) >= afi->nhooks)
+                       return -EINVAL;
+
+               basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
+               if (basechain == NULL)
+                       return -ENOMEM;
+               chain = &basechain->chain;
+
+               ops = &basechain->ops;
+               ops->pf         = family;
+               ops->owner      = afi->owner;
+               ops->hooknum    = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
+               ops->priority   = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
+               ops->priv       = chain;
+               ops->hook       = nft_do_chain;
+               if (afi->hooks[ops->hooknum])
+                       ops->hook = afi->hooks[ops->hooknum];
+
+               chain->flags |= NFT_BASE_CHAIN;
+       } else {
+               chain = kzalloc(sizeof(*chain), GFP_KERNEL);
+               if (chain == NULL)
+                       return -ENOMEM;
+       }
+
+       INIT_LIST_HEAD(&chain->rules);
+       chain->handle = nf_tables_alloc_handle(table);
+       nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+
+       list_add_tail(&chain->list, &table->chains);
+       table->use++;
+notify:
+       nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN,
+                              family);
+       return 0;
+}
+
+static void nf_tables_rcu_chain_destroy(struct rcu_head *head)
+{
+       struct nft_chain *chain = container_of(head, struct nft_chain, rcu_head);
+
+       BUG_ON(chain->use > 0);
+
+       if (chain->flags & NFT_BASE_CHAIN)
+               kfree(nft_base_chain(chain));
+       else
+               kfree(chain);
+}
+
+static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
+                             const struct nlmsghdr *nlh,
+                             const struct nlattr * const nla[])
+{
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+       const struct nft_af_info *afi;
+       struct nft_table *table;
+       struct nft_chain *chain;
+       int family = nfmsg->nfgen_family;
+
+       afi = nf_tables_afinfo_lookup(family, false);
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+
+       table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], false);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+
+       chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
+       if (IS_ERR(chain))
+               return PTR_ERR(chain);
+
+       if (chain->flags & NFT_CHAIN_BUILTIN)
+               return -EOPNOTSUPP;
+
+       if (!list_empty(&chain->rules))
+               return -EBUSY;
+
+       list_del(&chain->list);
+       table->use--;
+
+       if (chain->flags & NFT_BASE_CHAIN)
+               nf_unregister_hook(&nft_base_chain(chain)->ops);
+
+       nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN,
+                              family);
+
+       /* Make sure all rule references are gone before this is released */
+       call_rcu(&chain->rcu_head, nf_tables_rcu_chain_destroy);
+       return 0;
+}
+
+static void nft_ctx_init(struct nft_ctx *ctx,
+                        const struct nft_af_info *afi,
+                        const struct nft_table *table,
+                        const struct nft_chain *chain)
+{
+       ctx->afi   = afi;
+       ctx->table = table;
+       ctx->chain = chain;
+}
+
+/*
+ * Expressions
+ */
+
+/**
+ *     nft_register_expr - register nf_tables expr operations
+ *     @ops: expr operations
+ *
+ *     Registers the expr operations for use with nf_tables. Returns zero on
+ *     success or a negative errno code otherwise.
+ */
+int nft_register_expr(struct nft_expr_ops *ops)
+{
+       nfnl_lock(NFNL_SUBSYS_NFTABLES);
+       list_add_tail(&ops->list, &nf_tables_expressions);
+       nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(nft_register_expr);
+
+/**
+ *     nft_unregister_expr - unregister nf_tables expr operations
+ *     @ops: expr operations
+ *
+ *     Unregisters the expr operations for use with nf_tables.
+ */
+void nft_unregister_expr(struct nft_expr_ops *ops)
+{
+       nfnl_lock(NFNL_SUBSYS_NFTABLES);
+       list_del(&ops->list);
+       nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_expr);
+
+static const struct nft_expr_ops *__nft_expr_ops_get(struct nlattr *nla)
+{
+       const struct nft_expr_ops *ops;
+
+       list_for_each_entry(ops, &nf_tables_expressions, list) {
+               if (!nla_strcmp(nla, ops->name))
+                       return ops;
+       }
+       return NULL;
+}
+
+static const struct nft_expr_ops *nft_expr_ops_get(struct nlattr *nla)
+{
+       const struct nft_expr_ops *ops;
+
+       if (nla == NULL)
+               return ERR_PTR(-EINVAL);
+
+       ops = __nft_expr_ops_get(nla);
+       if (ops != NULL && try_module_get(ops->owner))
+               return ops;
+
+#ifdef CONFIG_MODULES
+       if (ops == NULL) {
+               nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+               request_module("nft-expr-%.*s",
+                              nla_len(nla), (char *)nla_data(nla));
+               nfnl_lock(NFNL_SUBSYS_NFTABLES);
+               if (__nft_expr_ops_get(nla))
+                       return ERR_PTR(-EAGAIN);
+       }
+#endif
+       return ERR_PTR(-ENOENT);
+}
+
+static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = {
+       [NFTA_EXPR_NAME]        = { .type = NLA_STRING },
+       [NFTA_EXPR_DATA]        = { .type = NLA_NESTED },
+};
+
+static int nf_tables_fill_expr_info(struct sk_buff *skb,
+                                   const struct nft_expr *expr)
+{
+       if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->name))
+               goto nla_put_failure;
+
+       if (expr->ops->dump) {
+               struct nlattr *data = nla_nest_start(skb, NFTA_EXPR_DATA);
+               if (data == NULL)
+                       goto nla_put_failure;
+               if (expr->ops->dump(skb, expr) < 0)
+                       goto nla_put_failure;
+               nla_nest_end(skb, data);
+       }
+
+       return skb->len;
+
+nla_put_failure:
+       return -1;
+};
+
+struct nft_expr_info {
+       const struct nft_expr_ops       *ops;
+       struct nlattr                   *tb[NFTA_EXPR_MAX + 1];
+};
+
+static int nf_tables_expr_parse(const struct nlattr *nla,
+                               struct nft_expr_info *info)
+{
+       const struct nft_expr_ops *ops;
+       int err;
+
+       err = nla_parse_nested(info->tb, NFTA_EXPR_MAX, nla, nft_expr_policy);
+       if (err < 0)
+               return err;
+
+       ops = nft_expr_ops_get(info->tb[NFTA_EXPR_NAME]);
+       if (IS_ERR(ops))
+               return PTR_ERR(ops);
+       info->ops = ops;
+       return 0;
+}
+
+static int nf_tables_newexpr(const struct nft_ctx *ctx,
+                            struct nft_expr_info *info,
+                            struct nft_expr *expr)
+{
+       const struct nft_expr_ops *ops = info->ops;
+       int err;
+
+       expr->ops = ops;
+       if (ops->init) {
+               struct nlattr *ma[ops->maxattr + 1];
+
+               if (info->tb[NFTA_EXPR_DATA]) {
+                       err = nla_parse_nested(ma, ops->maxattr,
+                                              info->tb[NFTA_EXPR_DATA],
+                                              ops->policy);
+                       if (err < 0)
+                               goto err1;
+               } else
+                       memset(ma, 0, sizeof(ma[0]) * (ops->maxattr + 1));
+
+               err = ops->init(ctx, expr, (const struct nlattr **)ma);
+               if (err < 0)
+                       goto err1;
+       }
+
+       info->ops = NULL;
+       return 0;
+
+err1:
+       expr->ops = NULL;
+       return err;
+}
+
+static void nf_tables_expr_destroy(struct nft_expr *expr)
+{
+       if (expr->ops->destroy)
+               expr->ops->destroy(expr);
+       module_put(expr->ops->owner);
+}
+
+/*
+ * Rules
+ */
+
+static struct nft_rule *__nf_tables_rule_lookup(const struct nft_chain *chain,
+                                               u64 handle)
+{
+       struct nft_rule *rule;
+
+       // FIXME: this sucks
+       list_for_each_entry(rule, &chain->rules, list) {
+               if (handle == rule->handle)
+                       return rule;
+       }
+
+       return ERR_PTR(-ENOENT);
+}
+
+static struct nft_rule *nf_tables_rule_lookup(const struct nft_chain *chain,
+                                             const struct nlattr *nla)
+{
+       if (nla == NULL)
+               return ERR_PTR(-EINVAL);
+
+       return __nf_tables_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla)));
+}
+
+static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
+       [NFTA_RULE_TABLE]       = { .type = NLA_STRING },
+       [NFTA_RULE_CHAIN]       = { .type = NLA_STRING,
+                                   .len = NFT_CHAIN_MAXNAMELEN - 1 },
+       [NFTA_RULE_HANDLE]      = { .type = NLA_U64 },
+       [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED },
+};
+
+static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
+                                   int event, u32 flags, int family,
+                                   const struct nft_table *table,
+                                   const struct nft_chain *chain,
+                                   const struct nft_rule *rule)
+{
+       struct nlmsghdr *nlh;
+       struct nfgenmsg *nfmsg;
+       const struct nft_expr *expr, *next;
+       struct nlattr *list;
+
+       event |= NFNL_SUBSYS_NFTABLES << 8;
+       nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
+                       flags);
+       if (nlh == NULL)
+               goto nla_put_failure;
+
+       nfmsg = nlmsg_data(nlh);
+       nfmsg->nfgen_family     = family;
+       nfmsg->version          = NFNETLINK_V0;
+       nfmsg->res_id           = 0;
+
+       if (nla_put_string(skb, NFTA_RULE_TABLE, table->name))
+               goto nla_put_failure;
+       if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name))
+               goto nla_put_failure;
+       if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle)))
+               goto nla_put_failure;
+
+       list = nla_nest_start(skb, NFTA_RULE_EXPRESSIONS);
+       if (list == NULL)
+               goto nla_put_failure;
+       nft_rule_for_each_expr(expr, next, rule) {
+               struct nlattr *elem = nla_nest_start(skb, NFTA_LIST_ELEM);
+               if (elem == NULL)
+                       goto nla_put_failure;
+               if (nf_tables_fill_expr_info(skb, expr) < 0)
+                       goto nla_put_failure;
+               nla_nest_end(skb, elem);
+       }
+       nla_nest_end(skb, list);
+
+       return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+       nlmsg_trim(skb, nlh);
+       return -1;
+}
+
+static int nf_tables_rule_notify(const struct sk_buff *oskb,
+                                const struct nlmsghdr *nlh,
+                                const struct nft_table *table,
+                                const struct nft_chain *chain,
+                                const struct nft_rule *rule,
+                                int event, u32 flags, int family)
+{
+       struct sk_buff *skb;
+       u32 portid = NETLINK_CB(oskb).portid;
+       struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
+       u32 seq = nlh->nlmsg_seq;
+       bool report;
+       int err;
+
+       report = nlmsg_report(nlh);
+       if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+               return 0;
+
+       err = -ENOBUFS;
+       skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+       if (skb == NULL)
+               goto err;
+
+       err = nf_tables_fill_rule_info(skb, portid, seq, event, flags,
+                                      family, table, chain, rule);
+       if (err < 0) {
+               kfree_skb(skb);
+               goto err;
+       }
+
+       err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
+                            GFP_KERNEL);
+err:
+       if (err < 0)
+               nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+       return err;
+}
+
+static int nf_tables_dump_rules(struct sk_buff *skb,
+                               struct netlink_callback *cb)
+{
+       const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+       const struct nft_af_info *afi;
+       const struct nft_table *table;
+       const struct nft_chain *chain;
+       const struct nft_rule *rule;
+       unsigned int idx = 0, s_idx = cb->args[0];
+       int family = nfmsg->nfgen_family;
+
+       list_for_each_entry(afi, &nf_tables_afinfo, list) {
+               if (family != NFPROTO_UNSPEC && family != afi->family)
+                       continue;
+
+               list_for_each_entry(table, &afi->tables, list) {
+                       list_for_each_entry(chain, &table->chains, list) {
+                               list_for_each_entry(rule, &chain->rules, list) {
+                                       if (idx < s_idx)
+                                               goto cont;
+                                       if (idx > s_idx)
+                                               memset(&cb->args[1], 0,
+                                                      sizeof(cb->args) - sizeof(cb->args[0]));
+                                       if (nf_tables_fill_rule_info(skb, NETLINK_CB(cb->skb).portid,
+                                                                     cb->nlh->nlmsg_seq,
+                                                                     NFT_MSG_NEWRULE,
+                                                                     NLM_F_MULTI | NLM_F_APPEND,
+                                                                     afi->family, table, chain, rule) < 0)
+                                               goto done;
+cont:
+                                       idx++;
+                               }
+                       }
+               }
+       }
+done:
+       cb->args[0] = idx;
+       return skb->len;
+}
+
+static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
+                            const struct nlmsghdr *nlh,
+                            const struct nlattr * const nla[])
+{
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+       const struct nft_af_info *afi;
+       const struct nft_table *table;
+       const struct nft_chain *chain;
+       const struct nft_rule *rule;
+       struct sk_buff *skb2;
+       int family = nfmsg->nfgen_family;
+       int err;
+
+       if (nlh->nlmsg_flags & NLM_F_DUMP) {
+               struct netlink_dump_control c = {
+                       .dump = nf_tables_dump_rules,
+               };
+               return netlink_dump_start(nlsk, skb, nlh, &c);
+       }
+
+       afi = nf_tables_afinfo_lookup(family, false);
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+
+       table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], false);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+
+       chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+       if (IS_ERR(chain))
+               return PTR_ERR(chain);
+
+       rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+       if (IS_ERR(rule))
+               return PTR_ERR(rule);
+
+       skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+       if (!skb2)
+               return -ENOMEM;
+
+       err = nf_tables_fill_rule_info(skb2, NETLINK_CB(skb).portid,
+                                      nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
+                                      family, table, chain, rule);
+       if (err < 0)
+               goto err;
+
+       return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+
+err:
+       kfree_skb(skb2);
+       return err;
+}
+
+static void nf_tables_rcu_rule_destroy(struct rcu_head *head)
+{
+       struct nft_rule *rule = container_of(head, struct nft_rule, rcu_head);
+       struct nft_expr *expr;
+
+       /*
+        * Careful: some expressions might not be initialized in case this
+        * is called on error from nf_tables_newrule().
+        */
+       expr = nft_expr_first(rule);
+       while (expr->ops && expr != nft_expr_last(rule)) {
+               nf_tables_expr_destroy(expr);
+               expr = nft_expr_next(expr);
+       }
+       kfree(rule);
+}
+
+static void nf_tables_rule_destroy(struct nft_rule *rule)
+{
+       call_rcu(&rule->rcu_head, nf_tables_rcu_rule_destroy);
+}
+
+#define NFT_RULE_MAXEXPRS      128
+
+static struct nft_expr_info *info;
+
+static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
+                            const struct nlmsghdr *nlh,
+                            const struct nlattr * const nla[])
+{
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+       const struct nft_af_info *afi;
+       struct nft_table *table;
+       struct nft_chain *chain;
+       struct nft_rule *rule, *old_rule = NULL;
+       struct nft_expr *expr;
+       struct nft_ctx ctx;
+       struct nlattr *tmp;
+       unsigned int size, i, n;
+       int err, rem;
+       bool create;
+       u64 handle;
+
+       create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+
+       afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, create);
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+
+       table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], create);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+
+       chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+       if (IS_ERR(chain))
+               return PTR_ERR(chain);
+
+       if (nla[NFTA_RULE_HANDLE]) {
+               handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
+               rule = __nf_tables_rule_lookup(chain, handle);
+               if (IS_ERR(rule))
+                       return PTR_ERR(rule);
+
+               if (nlh->nlmsg_flags & NLM_F_EXCL)
+                       return -EEXIST;
+               if (nlh->nlmsg_flags & NLM_F_REPLACE)
+                       old_rule = rule;
+               else
+                       return -EOPNOTSUPP;
+       } else {
+               if (!create || nlh->nlmsg_flags & NLM_F_REPLACE)
+                       return -EINVAL;
+               handle = nf_tables_alloc_handle(table);
+       }
+
+       n = 0;
+       size = 0;
+       if (nla[NFTA_RULE_EXPRESSIONS]) {
+               nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) {
+                       err = -EINVAL;
+                       if (nla_type(tmp) != NFTA_LIST_ELEM)
+                               goto err1;
+                       if (n == NFT_RULE_MAXEXPRS)
+                               goto err1;
+                       err = nf_tables_expr_parse(tmp, &info[n]);
+                       if (err < 0)
+                               goto err1;
+                       size += info[n].ops->size;
+                       n++;
+               }
+       }
+
+       err = -ENOMEM;
+       rule = kzalloc(sizeof(*rule) + size, GFP_KERNEL);
+       if (rule == NULL)
+               goto err1;
+
+       rule->handle = handle;
+       rule->dlen   = size;
+
+       nft_ctx_init(&ctx, afi, table, chain);
+       expr = nft_expr_first(rule);
+       for (i = 0; i < n; i++) {
+               err = nf_tables_newexpr(&ctx, &info[i], expr);
+               if (err < 0)
+                       goto err2;
+               expr = nft_expr_next(expr);
+       }
+
+       /* Register hook when first rule is inserted into a base chain */
+       if (list_empty(&chain->rules) && chain->flags & NFT_BASE_CHAIN) {
+               err = nf_register_hook(&nft_base_chain(chain)->ops);
+               if (err < 0)
+                       goto err2;
+       }
+
+       if (nlh->nlmsg_flags & NLM_F_REPLACE) {
+               list_replace_rcu(&old_rule->list, &rule->list);
+               nf_tables_rule_destroy(old_rule);
+       } else if (nlh->nlmsg_flags & NLM_F_APPEND)
+               list_add_tail_rcu(&rule->list, &chain->rules);
+       else
+               list_add_rcu(&rule->list, &chain->rules);
+
+       nf_tables_rule_notify(skb, nlh, table, chain, rule, NFT_MSG_NEWRULE,
+                             nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE),
+                             nfmsg->nfgen_family);
+       return 0;
+
+err2:
+       nf_tables_rule_destroy(rule);
+err1:
+       for (i = 0; i < n; i++) {
+               if (info[i].ops != NULL)
+                       module_put(info[i].ops->owner);
+       }
+       return err;
+}
+
+static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
+                            const struct nlmsghdr *nlh,
+                            const struct nlattr * const nla[])
+{
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+       const struct nft_af_info *afi;
+       const struct nft_table *table;
+       struct nft_chain *chain;
+       struct nft_rule *rule, *tmp;
+       int family = nfmsg->nfgen_family;
+
+       afi = nf_tables_afinfo_lookup(family, false);
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+
+       table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], false);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+
+       chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+       if (IS_ERR(chain))
+               return PTR_ERR(chain);
+
+       if (nla[NFTA_RULE_HANDLE]) {
+               rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+               if (IS_ERR(rule))
+                       return PTR_ERR(rule);
+
+               /* List removal must be visible before destroying expressions */
+               list_del_rcu(&rule->list);
+
+               nf_tables_rule_notify(skb, nlh, table, chain, rule,
+                                     NFT_MSG_DELRULE, 0, family);
+               nf_tables_rule_destroy(rule);
+       } else {
+               /* Remove all rules in this chain */
+               list_for_each_entry_safe(rule, tmp, &chain->rules, list) {
+                       list_del_rcu(&rule->list);
+
+                       nf_tables_rule_notify(skb, nlh, table, chain, rule,
+                                             NFT_MSG_DELRULE, 0, family);
+                       nf_tables_rule_destroy(rule);
+               }
+       }
+
+       /* Unregister hook when last rule from base chain is deleted */
+       if (list_empty(&chain->rules) && chain->flags & NFT_BASE_CHAIN)
+               nf_unregister_hook(&nft_base_chain(chain)->ops);
+
+       return 0;
+}
+
+static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
+       [NFT_MSG_NEWTABLE] = {
+               .call           = nf_tables_newtable,
+               .attr_count     = NFTA_TABLE_MAX,
+               .policy         = nft_table_policy,
+       },
+       [NFT_MSG_GETTABLE] = {
+               .call           = nf_tables_gettable,
+               .attr_count     = NFTA_TABLE_MAX,
+               .policy         = nft_table_policy,
+       },
+       [NFT_MSG_DELTABLE] = {
+               .call           = nf_tables_deltable,
+               .attr_count     = NFTA_TABLE_MAX,
+               .policy         = nft_table_policy,
+       },
+       [NFT_MSG_NEWCHAIN] = {
+               .call           = nf_tables_newchain,
+               .attr_count     = NFTA_CHAIN_MAX,
+               .policy         = nft_chain_policy,
+       },
+       [NFT_MSG_GETCHAIN] = {
+               .call           = nf_tables_getchain,
+               .attr_count     = NFTA_CHAIN_MAX,
+               .policy         = nft_chain_policy,
+       },
+       [NFT_MSG_DELCHAIN] = {
+               .call           = nf_tables_delchain,
+               .attr_count     = NFTA_CHAIN_MAX,
+               .policy         = nft_chain_policy,
+       },
+       [NFT_MSG_NEWRULE] = {
+               .call           = nf_tables_newrule,
+               .attr_count     = NFTA_RULE_MAX,
+               .policy         = nft_rule_policy,
+       },
+       [NFT_MSG_GETRULE] = {
+               .call           = nf_tables_getrule,
+               .attr_count     = NFTA_RULE_MAX,
+               .policy         = nft_rule_policy,
+       },
+       [NFT_MSG_DELRULE] = {
+               .call           = nf_tables_delrule,
+               .attr_count     = NFTA_RULE_MAX,
+               .policy         = nft_rule_policy,
+       },
+};
+
+static const struct nfnetlink_subsystem nf_tables_subsys = {
+       .name           = "nf_tables",
+       .subsys_id      = NFNL_SUBSYS_NFTABLES,
+       .cb_count       = NFT_MSG_MAX,
+       .cb             = nf_tables_cb,
+};
+
+/**
+ *     nft_validate_input_register - validate an expressions' input register
+ *
+ *     @reg: the register number
+ *
+ *     Validate that the input register is one of the general purpose
+ *     registers.
+ */
+int nft_validate_input_register(enum nft_registers reg)
+{
+       if (reg <= NFT_REG_VERDICT)
+               return -EINVAL;
+       if (reg > NFT_REG_MAX)
+               return -ERANGE;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(nft_validate_input_register);
+
+/**
+ *     nft_validate_output_register - validate an expressions' output register
+ *
+ *     @reg: the register number
+ *
+ *     Validate that the output register is one of the general purpose
+ *     registers or the verdict register.
+ */
+int nft_validate_output_register(enum nft_registers reg)
+{
+       if (reg < NFT_REG_VERDICT)
+               return -EINVAL;
+       if (reg > NFT_REG_MAX)
+               return -ERANGE;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(nft_validate_output_register);
+
+/**
+ *     nft_validate_data_load - validate an expressions' data load
+ *
+ *     @ctx: context of the expression performing the load
+ *     @reg: the destination register number
+ *     @data: the data to load
+ *     @type: the data type
+ *
+ *     Validate that a data load uses the appropriate data type for
+ *     the destination register. A value of NULL for the data means
+ *     that its runtime gathered data, which is always of type
+ *     NFT_DATA_VALUE.
+ */
+int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg,
+                          const struct nft_data *data,
+                          enum nft_data_types type)
+{
+       switch (reg) {
+       case NFT_REG_VERDICT:
+               if (data == NULL || type != NFT_DATA_VERDICT)
+                       return -EINVAL;
+               // FIXME: do loop detection
+               return 0;
+       default:
+               if (data != NULL && type != NFT_DATA_VALUE)
+                       return -EINVAL;
+               return 0;
+       }
+}
+EXPORT_SYMBOL_GPL(nft_validate_data_load);
+
+static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
+       [NFTA_VERDICT_CODE]     = { .type = NLA_U32 },
+       [NFTA_VERDICT_CHAIN]    = { .type = NLA_STRING,
+                                   .len = NFT_CHAIN_MAXNAMELEN - 1 },
+};
+
+static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
+                           struct nft_data_desc *desc, const struct nlattr *nla)
+{
+       struct nlattr *tb[NFTA_VERDICT_MAX + 1];
+       struct nft_chain *chain;
+       int err;
+
+       err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy);
+       if (err < 0)
+               return err;
+
+       if (!tb[NFTA_VERDICT_CODE])
+               return -EINVAL;
+       data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
+
+       switch (data->verdict) {
+       case NF_ACCEPT:
+       case NF_DROP:
+       case NF_QUEUE:
+       case NFT_CONTINUE:
+       case NFT_BREAK:
+       case NFT_RETURN:
+               desc->len = sizeof(data->verdict);
+               break;
+       case NFT_JUMP:
+       case NFT_GOTO:
+               if (!tb[NFTA_VERDICT_CHAIN])
+                       return -EINVAL;
+               chain = nf_tables_chain_lookup(ctx->table,
+                                              tb[NFTA_VERDICT_CHAIN]);
+               if (IS_ERR(chain))
+                       return PTR_ERR(chain);
+               if (chain->flags & NFT_BASE_CHAIN)
+                       return -EOPNOTSUPP;
+
+               if (ctx->chain->level + 1 > chain->level) {
+                       if (ctx->chain->level + 1 == 16)
+                               return -EMLINK;
+                       chain->level = ctx->chain->level + 1;
+               }
+               chain->use++;
+               data->chain = chain;
+               desc->len = sizeof(data);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       desc->type = NFT_DATA_VERDICT;
+       return 0;
+}
+
+static void nft_verdict_uninit(const struct nft_data *data)
+{
+       switch (data->verdict) {
+       case NFT_JUMP:
+       case NFT_GOTO:
+               data->chain->use--;
+               break;
+       }
+}
+
+static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data)
+{
+       struct nlattr *nest;
+
+       nest = nla_nest_start(skb, NFTA_DATA_VERDICT);
+       if (!nest)
+               goto nla_put_failure;
+
+       if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict)))
+               goto nla_put_failure;
+
+       switch (data->verdict) {
+       case NFT_JUMP:
+       case NFT_GOTO:
+               if (nla_put_string(skb, NFTA_VERDICT_CHAIN, data->chain->name))
+                       goto nla_put_failure;
+       }
+       nla_nest_end(skb, nest);
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data,
+                         struct nft_data_desc *desc, const struct nlattr *nla)
+{
+       unsigned int len;
+
+       len = nla_len(nla);
+       if (len == 0)
+               return -EINVAL;
+       if (len > sizeof(data->data))
+               return -EOVERFLOW;
+
+       nla_memcpy(data->data, nla, sizeof(data->data));
+       desc->type = NFT_DATA_VALUE;
+       desc->len  = len;
+       return 0;
+}
+
+static int nft_value_dump(struct sk_buff *skb, const struct nft_data *data,
+                         unsigned int len)
+{
+       return nla_put(skb, NFTA_DATA_VALUE, len, data->data);
+}
+
+static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
+       [NFTA_DATA_VALUE]       = { .type = NLA_BINARY,
+                                   .len  = FIELD_SIZEOF(struct nft_data, data) },
+       [NFTA_DATA_VERDICT]     = { .type = NLA_NESTED },
+};
+
+/**
+ *     nft_data_init - parse nf_tables data netlink attributes
+ *
+ *     @ctx: context of the expression using the data
+ *     @data: destination struct nft_data
+ *     @desc: data description
+ *     @nla: netlink attribute containing data
+ *
+ *     Parse the netlink data attributes and initialize a struct nft_data.
+ *     The type and length of data are returned in the data description.
+ *
+ *     The caller can indicate that it only wants to accept data of type
+ *     NFT_DATA_VALUE by passing NULL for the ctx argument.
+ */
+int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
+                 struct nft_data_desc *desc, const struct nlattr *nla)
+{
+       struct nlattr *tb[NFTA_DATA_MAX + 1];
+       int err;
+
+       err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy);
+       if (err < 0)
+               return err;
+
+       if (tb[NFTA_DATA_VALUE])
+               return nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]);
+       if (tb[NFTA_DATA_VERDICT] && ctx != NULL)
+               return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]);
+       return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(nft_data_init);
+
+/**
+ *     nft_data_uninit - release a nft_data item
+ *
+ *     @data: struct nft_data to release
+ *     @type: type of data
+ *
+ *     Release a nft_data item. NFT_DATA_VALUE types can be silently discarded,
+ *     all others need to be released by calling this function.
+ */
+void nft_data_uninit(const struct nft_data *data, enum nft_data_types type)
+{
+       switch (type) {
+       case NFT_DATA_VALUE:
+               return;
+       case NFT_DATA_VERDICT:
+               return nft_verdict_uninit(data);
+       default:
+               WARN_ON(1);
+       }
+}
+EXPORT_SYMBOL_GPL(nft_data_uninit);
+
+int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
+                 enum nft_data_types type, unsigned int len)
+{
+       struct nlattr *nest;
+       int err;
+
+       nest = nla_nest_start(skb, attr);
+       if (nest == NULL)
+               return -1;
+
+       switch (type) {
+       case NFT_DATA_VALUE:
+               err = nft_value_dump(skb, data, len);
+               break;
+       case NFT_DATA_VERDICT:
+               err = nft_verdict_dump(skb, data);
+               break;
+       default:
+               err = -EINVAL;
+               WARN_ON(1);
+       }
+
+       nla_nest_end(skb, nest);
+       return err;
+}
+EXPORT_SYMBOL_GPL(nft_data_dump);
+
+static int __init nf_tables_module_init(void)
+{
+       int err;
+
+       info = kmalloc(sizeof(struct nft_expr_info) * NFT_RULE_MAXEXPRS,
+                      GFP_KERNEL);
+       if (info == NULL) {
+               err = -ENOMEM;
+               goto err1;
+       }
+
+       err = nf_tables_core_module_init();
+       if (err < 0)
+               goto err2;
+
+       err = nfnetlink_subsys_register(&nf_tables_subsys);
+       if (err < 0)
+               goto err3;
+
+       pr_info("nf_tables: (c) 2007-2009 Patrick McHardy <kaber@trash.net>\n");
+       return 0;
+err3:
+       nf_tables_core_module_exit();
+err2:
+       kfree(info);
+err1:
+       return err;
+}
+
+static void __exit nf_tables_module_exit(void)
+{
+       nfnetlink_subsys_unregister(&nf_tables_subsys);
+       nf_tables_core_module_exit();
+       kfree(info);
+}
+
+module_init(nf_tables_module_init);
+module_exit(nf_tables_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
new file mode 100644 (file)
index 0000000..bc7fb85
--- /dev/null
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+#define NFT_JUMP_STACK_SIZE    16
+
+unsigned int nft_do_chain(const struct nf_hook_ops *ops,
+                         struct sk_buff *skb,
+                         const struct net_device *in,
+                         const struct net_device *out,
+                         int (*okfn)(struct sk_buff *))
+{
+       const struct nft_chain *chain = ops->priv;
+       const struct nft_rule *rule;
+       const struct nft_expr *expr, *last;
+       struct nft_data data[NFT_REG_MAX + 1];
+       const struct nft_pktinfo pkt = {
+               .skb            = skb,
+               .in             = in,
+               .out            = out,
+               .hooknum        = ops->hooknum,
+       };
+       unsigned int stackptr = 0;
+       struct {
+               const struct nft_chain  *chain;
+               const struct nft_rule   *rule;
+       } jumpstack[NFT_JUMP_STACK_SIZE];
+
+do_chain:
+       rule = list_entry(&chain->rules, struct nft_rule, list);
+next_rule:
+       data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+       list_for_each_entry_continue_rcu(rule, &chain->rules, list) {
+               nft_rule_for_each_expr(expr, last, rule) {
+                       expr->ops->eval(expr, data, &pkt);
+                       if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE)
+                               break;
+               }
+
+               switch (data[NFT_REG_VERDICT].verdict) {
+               case NFT_BREAK:
+                       data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+                       /* fall through */
+               case NFT_CONTINUE:
+                       continue;
+               }
+               break;
+       }
+
+       switch (data[NFT_REG_VERDICT].verdict) {
+       case NF_ACCEPT:
+       case NF_DROP:
+       case NF_QUEUE:
+               return data[NFT_REG_VERDICT].verdict;
+       case NFT_JUMP:
+               BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE);
+               jumpstack[stackptr].chain = chain;
+               jumpstack[stackptr].rule  = rule;
+               stackptr++;
+               /* fall through */
+       case NFT_GOTO:
+               chain = data[NFT_REG_VERDICT].chain;
+               goto do_chain;
+       case NFT_RETURN:
+       case NFT_CONTINUE:
+               break;
+       default:
+               WARN_ON(1);
+       }
+
+       if (stackptr > 0) {
+               stackptr--;
+               chain = jumpstack[stackptr].chain;
+               rule  = jumpstack[stackptr].rule;
+               goto next_rule;
+       }
+
+       return NF_ACCEPT;
+}
+EXPORT_SYMBOL_GPL(nft_do_chain);
+
+int __init nf_tables_core_module_init(void)
+{
+       int err;
+
+       err = nft_immediate_module_init();
+       if (err < 0)
+               goto err1;
+
+       err = nft_cmp_module_init();
+       if (err < 0)
+               goto err2;
+
+       err = nft_lookup_module_init();
+       if (err < 0)
+               goto err3;
+
+       err = nft_bitwise_module_init();
+       if (err < 0)
+               goto err4;
+
+       err = nft_byteorder_module_init();
+       if (err < 0)
+               goto err5;
+
+       err = nft_payload_module_init();
+       if (err < 0)
+               goto err6;
+
+       return 0;
+
+err6:
+       nft_byteorder_module_exit();
+err5:
+       nft_bitwise_module_exit();
+err4:
+       nft_lookup_module_exit();
+err3:
+       nft_cmp_module_exit();
+err2:
+       nft_immediate_module_exit();
+err1:
+       return err;
+}
+
+void nf_tables_core_module_exit(void)
+{
+       nft_payload_module_exit();
+       nft_byteorder_module_exit();
+       nft_bitwise_module_exit();
+       nft_lookup_module_exit();
+       nft_cmp_module_exit();
+       nft_immediate_module_exit();
+}
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
new file mode 100644 (file)
index 0000000..0f75015
--- /dev/null
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_bitwise {
+       enum nft_registers      sreg:8;
+       enum nft_registers      dreg:8;
+       u8                      len;
+       struct nft_data         mask;
+       struct nft_data         xor;
+};
+
+static void nft_bitwise_eval(const struct nft_expr *expr,
+                            struct nft_data data[NFT_REG_MAX + 1],
+                            const struct nft_pktinfo *pkt)
+{
+       const struct nft_bitwise *priv = nft_expr_priv(expr);
+       const struct nft_data *src = &data[priv->sreg];
+       struct nft_data *dst = &data[priv->dreg];
+       unsigned int i;
+
+       for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++) {
+               dst->data[i] = (src->data[i] & priv->mask.data[i]) ^
+                              priv->xor.data[i];
+       }
+}
+
+static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
+       [NFTA_BITWISE_SREG]     = { .type = NLA_U32 },
+       [NFTA_BITWISE_DREG]     = { .type = NLA_U32 },
+       [NFTA_BITWISE_LEN]      = { .type = NLA_U32 },
+       [NFTA_BITWISE_MASK]     = { .type = NLA_NESTED },
+       [NFTA_BITWISE_XOR]      = { .type = NLA_NESTED },
+};
+
+static int nft_bitwise_init(const struct nft_ctx *ctx,
+                           const struct nft_expr *expr,
+                           const struct nlattr * const tb[])
+{
+       struct nft_bitwise *priv = nft_expr_priv(expr);
+       struct nft_data_desc d1, d2;
+       int err;
+
+       if (tb[NFTA_BITWISE_SREG] == NULL ||
+           tb[NFTA_BITWISE_DREG] == NULL ||
+           tb[NFTA_BITWISE_LEN] == NULL ||
+           tb[NFTA_BITWISE_MASK] == NULL ||
+           tb[NFTA_BITWISE_XOR] == NULL)
+               return -EINVAL;
+
+       priv->sreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_SREG]));
+       err = nft_validate_input_register(priv->sreg);
+       if (err < 0)
+               return err;
+
+       priv->dreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_DREG]));
+       err = nft_validate_output_register(priv->dreg);
+       if (err < 0)
+               return err;
+       err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+       if (err < 0)
+               return err;
+
+       priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN]));
+
+       err = nft_data_init(NULL, &priv->mask, &d1, tb[NFTA_BITWISE_MASK]);
+       if (err < 0)
+               return err;
+       if (d1.len != priv->len)
+               return -EINVAL;
+
+       err = nft_data_init(NULL, &priv->xor, &d2, tb[NFTA_BITWISE_XOR]);
+       if (err < 0)
+               return err;
+       if (d2.len != priv->len)
+               return -EINVAL;
+
+       return 0;
+}
+
+static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_bitwise *priv = nft_expr_priv(expr);
+
+       if (nla_put_be32(skb, NFTA_BITWISE_SREG, htonl(priv->sreg)))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_BITWISE_DREG, htonl(priv->dreg)))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(priv->len)))
+               goto nla_put_failure;
+
+       if (nft_data_dump(skb, NFTA_BITWISE_MASK, &priv->mask,
+                         NFT_DATA_VALUE, priv->len) < 0)
+               goto nla_put_failure;
+
+       if (nft_data_dump(skb, NFTA_BITWISE_XOR, &priv->xor,
+                         NFT_DATA_VALUE, priv->len) < 0)
+               goto nla_put_failure;
+
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_ops nft_bitwise_ops __read_mostly = {
+       .name           = "bitwise",
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)),
+       .owner          = THIS_MODULE,
+       .eval           = nft_bitwise_eval,
+       .init           = nft_bitwise_init,
+       .dump           = nft_bitwise_dump,
+       .policy         = nft_bitwise_policy,
+       .maxattr        = NFTA_BITWISE_MAX,
+};
+
+int __init nft_bitwise_module_init(void)
+{
+       return nft_register_expr(&nft_bitwise_ops);
+}
+
+void nft_bitwise_module_exit(void)
+{
+       nft_unregister_expr(&nft_bitwise_ops);
+}
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
new file mode 100644 (file)
index 0000000..8b0657a
--- /dev/null
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_byteorder {
+       enum nft_registers      sreg:8;
+       enum nft_registers      dreg:8;
+       enum nft_byteorder_ops  op:8;
+       u8                      len;
+       u8                      size;
+};
+
+static void nft_byteorder_eval(const struct nft_expr *expr,
+                              struct nft_data data[NFT_REG_MAX + 1],
+                              const struct nft_pktinfo *pkt)
+{
+       const struct nft_byteorder *priv = nft_expr_priv(expr);
+       struct nft_data *src = &data[priv->sreg], *dst = &data[priv->dreg];
+       union { u32 u32; u16 u16; } *s, *d;
+       unsigned int i;
+
+       s = (void *)src->data;
+       d = (void *)dst->data;
+
+       switch (priv->size) {
+       case 4:
+               switch (priv->op) {
+               case NFT_BYTEORDER_NTOH:
+                       for (i = 0; i < priv->len / 4; i++)
+                               d[i].u32 = ntohl((__force __be32)s[i].u32);
+                       break;
+               case NFT_BYTEORDER_HTON:
+                       for (i = 0; i < priv->len / 4; i++)
+                               d[i].u32 = (__force __u32)htonl(s[i].u32);
+                       break;
+               }
+               break;
+       case 2:
+               switch (priv->op) {
+               case NFT_BYTEORDER_NTOH:
+                       for (i = 0; i < priv->len / 2; i++)
+                               d[i].u16 = ntohs((__force __be16)s[i].u16);
+                       break;
+               case NFT_BYTEORDER_HTON:
+                       for (i = 0; i < priv->len / 2; i++)
+                               d[i].u16 = (__force __u16)htons(s[i].u16);
+                       break;
+               }
+               break;
+       }
+}
+
+static const struct nla_policy nft_byteorder_policy[NFTA_BYTEORDER_MAX + 1] = {
+       [NFTA_BYTEORDER_SREG]   = { .type = NLA_U32 },
+       [NFTA_BYTEORDER_DREG]   = { .type = NLA_U32 },
+       [NFTA_BYTEORDER_OP]     = { .type = NLA_U32 },
+       [NFTA_BYTEORDER_LEN]    = { .type = NLA_U32 },
+       [NFTA_BYTEORDER_SIZE]   = { .type = NLA_U32 },
+};
+
+static int nft_byteorder_init(const struct nft_ctx *ctx,
+                             const struct nft_expr *expr,
+                             const struct nlattr * const tb[])
+{
+       struct nft_byteorder *priv = nft_expr_priv(expr);
+       int err;
+
+       if (tb[NFTA_BYTEORDER_SREG] == NULL ||
+           tb[NFTA_BYTEORDER_DREG] == NULL ||
+           tb[NFTA_BYTEORDER_LEN] == NULL ||
+           tb[NFTA_BYTEORDER_SIZE] == NULL ||
+           tb[NFTA_BYTEORDER_OP] == NULL)
+               return -EINVAL;
+
+       priv->sreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SREG]));
+       err = nft_validate_input_register(priv->sreg);
+       if (err < 0)
+               return err;
+
+       priv->dreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_DREG]));
+       err = nft_validate_output_register(priv->dreg);
+       if (err < 0)
+               return err;
+       err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+       if (err < 0)
+               return err;
+
+       priv->op = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_OP]));
+       switch (priv->op) {
+       case NFT_BYTEORDER_NTOH:
+       case NFT_BYTEORDER_HTON:
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN]));
+       if (priv->len == 0 || priv->len > FIELD_SIZEOF(struct nft_data, data))
+               return -EINVAL;
+
+       priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE]));
+       switch (priv->size) {
+       case 2:
+       case 4:
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_byteorder *priv = nft_expr_priv(expr);
+
+       if (nla_put_be32(skb, NFTA_BYTEORDER_SREG, htonl(priv->sreg)))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_BYTEORDER_DREG, htonl(priv->dreg)))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op)))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_BYTEORDER_LEN, htonl(priv->len)))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_BYTEORDER_SIZE, htonl(priv->size)))
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_ops nft_byteorder_ops __read_mostly = {
+       .name           = "byteorder",
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)),
+       .owner          = THIS_MODULE,
+       .eval           = nft_byteorder_eval,
+       .init           = nft_byteorder_init,
+       .dump           = nft_byteorder_dump,
+       .policy         = nft_byteorder_policy,
+       .maxattr        = NFTA_BYTEORDER_MAX,
+};
+
+int __init nft_byteorder_module_init(void)
+{
+       return nft_register_expr(&nft_byteorder_ops);
+}
+
+void nft_byteorder_module_exit(void)
+{
+       nft_unregister_expr(&nft_byteorder_ops);
+}
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
new file mode 100644 (file)
index 0000000..e734d67
--- /dev/null
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_cmp_expr {
+       struct nft_data         data;
+       enum nft_registers      sreg:8;
+       u8                      len;
+       enum nft_cmp_ops        op:8;
+};
+
+static void nft_cmp_eval(const struct nft_expr *expr,
+                        struct nft_data data[NFT_REG_MAX + 1],
+                        const struct nft_pktinfo *pkt)
+{
+       const struct nft_cmp_expr *priv = nft_expr_priv(expr);
+       int d;
+
+       d = nft_data_cmp(&data[priv->sreg], &priv->data, priv->len);
+       switch (priv->op) {
+       case NFT_CMP_EQ:
+               if (d != 0)
+                       goto mismatch;
+               break;
+       case NFT_CMP_NEQ:
+               if (d == 0)
+                       goto mismatch;
+               break;
+       case NFT_CMP_LT:
+               if (d == 0)
+                       goto mismatch;
+       case NFT_CMP_LTE:
+               if (d > 0)
+                       goto mismatch;
+               break;
+       case NFT_CMP_GT:
+               if (d == 0)
+                       goto mismatch;
+       case NFT_CMP_GTE:
+               if (d < 0)
+                       goto mismatch;
+               break;
+       }
+       return;
+
+mismatch:
+       data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_cmp_policy[NFTA_CMP_MAX + 1] = {
+       [NFTA_CMP_SREG]         = { .type = NLA_U32 },
+       [NFTA_CMP_OP]           = { .type = NLA_U32 },
+       [NFTA_CMP_DATA]         = { .type = NLA_NESTED },
+};
+
+static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+                       const struct nlattr * const tb[])
+{
+       struct nft_cmp_expr *priv = nft_expr_priv(expr);
+       struct nft_data_desc desc;
+       int err;
+
+       if (tb[NFTA_CMP_SREG] == NULL ||
+           tb[NFTA_CMP_OP] == NULL ||
+           tb[NFTA_CMP_DATA] == NULL)
+               return -EINVAL;
+
+       priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
+       err = nft_validate_input_register(priv->sreg);
+       if (err < 0)
+               return err;
+
+       priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
+       switch (priv->op) {
+       case NFT_CMP_EQ:
+       case NFT_CMP_NEQ:
+       case NFT_CMP_LT:
+       case NFT_CMP_LTE:
+       case NFT_CMP_GT:
+       case NFT_CMP_GTE:
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]);
+       if (err < 0)
+               return err;
+
+       priv->len = desc.len;
+       return 0;
+}
+
+static int nft_cmp_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_cmp_expr *priv = nft_expr_priv(expr);
+
+       if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg)))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_CMP_OP, htonl(priv->op)))
+               goto nla_put_failure;
+
+       if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data,
+                         NFT_DATA_VALUE, priv->len) < 0)
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_ops nft_cmp_ops __read_mostly = {
+       .name           = "cmp",
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)),
+       .owner          = THIS_MODULE,
+       .eval           = nft_cmp_eval,
+       .init           = nft_cmp_init,
+       .dump           = nft_cmp_dump,
+       .policy         = nft_cmp_policy,
+       .maxattr        = NFTA_CMP_MAX,
+};
+
+int __init nft_cmp_module_init(void)
+{
+       return nft_register_expr(&nft_cmp_ops);
+}
+
+void nft_cmp_module_exit(void)
+{
+       nft_unregister_expr(&nft_cmp_ops);
+}
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
new file mode 100644 (file)
index 0000000..33c5d36
--- /dev/null
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/seqlock.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_counter {
+       seqlock_t       lock;
+       u64             bytes;
+       u64             packets;
+};
+
+static void nft_counter_eval(const struct nft_expr *expr,
+                            struct nft_data data[NFT_REG_MAX + 1],
+                            const struct nft_pktinfo *pkt)
+{
+       struct nft_counter *priv = nft_expr_priv(expr);
+
+       write_seqlock_bh(&priv->lock);
+       priv->bytes += pkt->skb->len;
+       priv->packets++;
+       write_sequnlock_bh(&priv->lock);
+}
+
+static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       struct nft_counter *priv = nft_expr_priv(expr);
+       unsigned int seq;
+       u64 bytes;
+       u64 packets;
+
+       do {
+               seq = read_seqbegin(&priv->lock);
+               bytes   = priv->bytes;
+               packets = priv->packets;
+       } while (read_seqretry(&priv->lock, seq));
+
+       if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(bytes)))
+               goto nla_put_failure;
+       if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(packets)))
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
+       [NFTA_COUNTER_PACKETS]  = { .type = NLA_U64 },
+       [NFTA_COUNTER_BYTES]    = { .type = NLA_U64 },
+};
+
+static int nft_counter_init(const struct nft_ctx *ctx,
+                           const struct nft_expr *expr,
+                           const struct nlattr * const tb[])
+{
+       struct nft_counter *priv = nft_expr_priv(expr);
+
+       if (tb[NFTA_COUNTER_PACKETS])
+               priv->packets = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+       if (tb[NFTA_COUNTER_BYTES])
+               priv->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+
+       seqlock_init(&priv->lock);
+       return 0;
+}
+
+static struct nft_expr_ops nft_counter_ops __read_mostly = {
+       .name           = "counter",
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_counter)),
+       .policy         = nft_counter_policy,
+       .maxattr        = NFTA_COUNTER_MAX,
+       .owner          = THIS_MODULE,
+       .eval           = nft_counter_eval,
+       .init           = nft_counter_init,
+       .dump           = nft_counter_dump,
+};
+
+static int __init nft_counter_module_init(void)
+{
+       return nft_register_expr(&nft_counter_ops);
+}
+
+static void __exit nft_counter_module_exit(void)
+{
+       nft_unregister_expr(&nft_counter_ops);
+}
+
+module_init(nft_counter_module_init);
+module_exit(nft_counter_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("counter");
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
new file mode 100644 (file)
index 0000000..a1756d6
--- /dev/null
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+
+struct nft_ct {
+       enum nft_ct_keys        key:8;
+       enum ip_conntrack_dir   dir:8;
+       enum nft_registers      dreg:8;
+       uint8_t                 family;
+};
+
+static void nft_ct_eval(const struct nft_expr *expr,
+                       struct nft_data data[NFT_REG_MAX + 1],
+                       const struct nft_pktinfo *pkt)
+{
+       const struct nft_ct *priv = nft_expr_priv(expr);
+       struct nft_data *dest = &data[priv->dreg];
+       enum ip_conntrack_info ctinfo;
+       const struct nf_conn *ct;
+       const struct nf_conn_help *help;
+       const struct nf_conntrack_tuple *tuple;
+       const struct nf_conntrack_helper *helper;
+       long diff;
+       unsigned int state;
+
+       ct = nf_ct_get(pkt->skb, &ctinfo);
+
+       switch (priv->key) {
+       case NFT_CT_STATE:
+               if (ct == NULL)
+                       state = NF_CT_STATE_INVALID_BIT;
+               else if (nf_ct_is_untracked(ct))
+                       state = NF_CT_STATE_UNTRACKED_BIT;
+               else
+                       state = NF_CT_STATE_BIT(ctinfo);
+               dest->data[0] = state;
+               return;
+       }
+
+       if (ct == NULL)
+               goto err;
+
+       switch (priv->key) {
+       case NFT_CT_DIRECTION:
+               dest->data[0] = CTINFO2DIR(ctinfo);
+               return;
+       case NFT_CT_STATUS:
+               dest->data[0] = ct->status;
+               return;
+#ifdef CONFIG_NF_CONNTRACK_MARK
+       case NFT_CT_MARK:
+               dest->data[0] = ct->mark;
+               return;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+       case NFT_CT_SECMARK:
+               dest->data[0] = ct->secmark;
+               return;
+#endif
+       case NFT_CT_EXPIRATION:
+               diff = (long)jiffies - (long)ct->timeout.expires;
+               if (diff < 0)
+                       diff = 0;
+               dest->data[0] = jiffies_to_msecs(diff);
+               return;
+       case NFT_CT_HELPER:
+               if (ct->master == NULL)
+                       goto err;
+               help = nfct_help(ct->master);
+               if (help == NULL)
+                       goto err;
+               helper = rcu_dereference(help->helper);
+               if (helper == NULL)
+                       goto err;
+               if (strlen(helper->name) >= sizeof(dest->data))
+                       goto err;
+               strncpy((char *)dest->data, helper->name, sizeof(dest->data));
+               return;
+       }
+
+       tuple = &ct->tuplehash[priv->dir].tuple;
+       switch (priv->key) {
+       case NFT_CT_L3PROTOCOL:
+               dest->data[0] = nf_ct_l3num(ct);
+               return;
+       case NFT_CT_SRC:
+               memcpy(dest->data, tuple->src.u3.all,
+                      nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
+               return;
+       case NFT_CT_DST:
+               memcpy(dest->data, tuple->dst.u3.all,
+                      nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
+               return;
+       case NFT_CT_PROTOCOL:
+               dest->data[0] = nf_ct_protonum(ct);
+               return;
+       case NFT_CT_PROTO_SRC:
+               dest->data[0] = (__force __u16)tuple->src.u.all;
+               return;
+       case NFT_CT_PROTO_DST:
+               dest->data[0] = (__force __u16)tuple->dst.u.all;
+               return;
+       }
+       return;
+err:
+       data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
+       [NFTA_CT_DREG]          = { .type = NLA_U32 },
+       [NFTA_CT_KEY]           = { .type = NLA_U32 },
+       [NFTA_CT_DIRECTION]     = { .type = NLA_U8 },
+};
+
+static int nft_ct_init(const struct nft_ctx *ctx,
+                      const struct nft_expr *expr,
+                      const struct nlattr * const tb[])
+{
+       struct nft_ct *priv = nft_expr_priv(expr);
+       int err;
+
+       if (tb[NFTA_CT_DREG] == NULL ||
+           tb[NFTA_CT_KEY] == NULL)
+               return -EINVAL;
+
+       priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
+       if (tb[NFTA_CT_DIRECTION] != NULL) {
+               priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
+               switch (priv->dir) {
+               case IP_CT_DIR_ORIGINAL:
+               case IP_CT_DIR_REPLY:
+                       break;
+               default:
+                       return -EINVAL;
+               }
+       }
+
+       switch (priv->key) {
+       case NFT_CT_STATE:
+       case NFT_CT_DIRECTION:
+       case NFT_CT_STATUS:
+#ifdef CONFIG_NF_CONNTRACK_MARK
+       case NFT_CT_MARK:
+#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+       case NFT_CT_SECMARK:
+#endif
+       case NFT_CT_EXPIRATION:
+       case NFT_CT_HELPER:
+               if (tb[NFTA_CT_DIRECTION] != NULL)
+                       return -EINVAL;
+               break;
+       case NFT_CT_PROTOCOL:
+       case NFT_CT_SRC:
+       case NFT_CT_DST:
+       case NFT_CT_PROTO_SRC:
+       case NFT_CT_PROTO_DST:
+               if (tb[NFTA_CT_DIRECTION] == NULL)
+                       return -EINVAL;
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       err = nf_ct_l3proto_try_module_get(ctx->afi->family);
+       if (err < 0)
+               return err;
+       priv->family = ctx->afi->family;
+
+       priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG]));
+       err = nft_validate_output_register(priv->dreg);
+       if (err < 0)
+               goto err1;
+
+       err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+       if (err < 0)
+               goto err1;
+       return 0;
+
+err1:
+       nf_ct_l3proto_module_put(ctx->afi->family);
+       return err;
+}
+
+static void nft_ct_destroy(const struct nft_expr *expr)
+{
+       struct nft_ct *priv = nft_expr_priv(expr);
+
+       nf_ct_l3proto_module_put(priv->family);
+}
+
+static int nft_ct_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_ct *priv = nft_expr_priv(expr);
+
+       if (nla_put_be32(skb, NFTA_CT_DREG, htonl(priv->dreg)))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
+               goto nla_put_failure;
+       if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_ops nft_ct_ops __read_mostly = {
+       .name           = "ct",
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
+       .owner          = THIS_MODULE,
+       .eval           = nft_ct_eval,
+       .init           = nft_ct_init,
+       .destroy        = nft_ct_destroy,
+       .dump           = nft_ct_dump,
+       .policy         = nft_ct_policy,
+       .maxattr        = NFTA_CT_MAX,
+};
+
+static int __init nft_ct_module_init(void)
+{
+       return nft_register_expr(&nft_ct_ops);
+}
+
+static void __exit nft_ct_module_exit(void)
+{
+       nft_unregister_expr(&nft_ct_ops);
+}
+
+module_init(nft_ct_module_init);
+module_exit(nft_ct_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("ct");
diff --git a/net/netfilter/nft_expr_template.c b/net/netfilter/nft_expr_template.c
new file mode 100644 (file)
index 0000000..9fc8eb3
--- /dev/null
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_template {
+
+};
+
+static void nft_template_eval(const struct nft_expr *expr,
+                             struct nft_data data[NFT_REG_MAX + 1],
+                             const struct nft_pktinfo *pkt)
+{
+       struct nft_template *priv = nft_expr_priv(expr);
+
+}
+
+static const struct nla_policy nft_template_policy[NFTA_TEMPLATE_MAX + 1] = {
+       [NFTA_TEMPLATE_ATTR]            = { .type = NLA_U32 },
+};
+
+static int nft_template_init(const struct nft_ctx *ctx,
+                          const struct nft_expr *expr,
+                          const struct nlattr *tb[])
+{
+       struct nft_template *priv = nft_expr_priv(expr);
+
+       return 0;
+}
+
+static void nft_template_destroy(const struct nft_ctx *ctx,
+                              const struct nft_expr *expr)
+{
+       struct nft_template *priv = nft_expr_priv(expr);
+
+}
+
+static int nft_template_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_template *priv = nft_expr_priv(expr);
+
+       NLA_PUT_BE32(skb, NFTA_TEMPLATE_ATTR, priv->field);
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_ops template_ops __read_mostly = {
+       .name           = "template",
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_template)),
+       .owner          = THIS_MODULE,
+       .eval           = nft_template_eval,
+       .init           = nft_template_init,
+       .destroy        = nft_template_destroy,
+       .dump           = nft_template_dump,
+       .policy         = nft_template_policy,
+       .maxattr        = NFTA_TEMPLATE_MAX,
+};
+
+static int __init nft_template_module_init(void)
+{
+       return nft_register_expr(&template_ops);
+}
+
+static void __exit nft_template_module_exit(void)
+{
+       nft_unregister_expr(&template_ops);
+}
+
+module_init(nft_template_module_init);
+module_exit(nft_template_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("template");
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
new file mode 100644 (file)
index 0000000..21c6a6b
--- /dev/null
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+// FIXME:
+#include <net/ipv6.h>
+
+struct nft_exthdr {
+       u8                      type;
+       u8                      offset;
+       u8                      len;
+       enum nft_registers      dreg:8;
+};
+
+static void nft_exthdr_eval(const struct nft_expr *expr,
+                           struct nft_data data[NFT_REG_MAX + 1],
+                           const struct nft_pktinfo *pkt)
+{
+       struct nft_exthdr *priv = nft_expr_priv(expr);
+       struct nft_data *dest = &data[priv->dreg];
+       unsigned int offset;
+       int err;
+
+       err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL);
+       if (err < 0)
+               goto err;
+       offset += priv->offset;
+
+       if (skb_copy_bits(pkt->skb, offset, dest->data, priv->len) < 0)
+               goto err;
+       return;
+err:
+       data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
+       [NFTA_EXTHDR_DREG]              = { .type = NLA_U32 },
+       [NFTA_EXTHDR_TYPE]              = { .type = NLA_U8 },
+       [NFTA_EXTHDR_OFFSET]            = { .type = NLA_U32 },
+       [NFTA_EXTHDR_LEN]               = { .type = NLA_U32 },
+};
+
+static int nft_exthdr_init(const struct nft_ctx *ctx,
+                          const struct nft_expr *expr,
+                          const struct nlattr * const tb[])
+{
+       struct nft_exthdr *priv = nft_expr_priv(expr);
+       int err;
+
+       if (tb[NFTA_EXTHDR_DREG] == NULL ||
+           tb[NFTA_EXTHDR_TYPE] == NULL ||
+           tb[NFTA_EXTHDR_OFFSET] == NULL ||
+           tb[NFTA_EXTHDR_LEN] == NULL)
+               return -EINVAL;
+
+       priv->type   = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
+       priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET]));
+       priv->len    = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN]));
+       if (priv->len == 0 ||
+           priv->len > FIELD_SIZEOF(struct nft_data, data))
+               return -EINVAL;
+
+       priv->dreg = ntohl(nla_get_be32(tb[NFTA_EXTHDR_DREG]));
+       err = nft_validate_output_register(priv->dreg);
+       if (err < 0)
+               return err;
+       return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+}
+
+static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+       if (nla_put_be32(skb, NFTA_EXTHDR_DREG, htonl(priv->dreg)))
+               goto nla_put_failure;
+       if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset)))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_EXTHDR_LEN, htonl(priv->len)))
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_ops exthdr_ops __read_mostly = {
+       .name           = "exthdr",
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+       .owner          = THIS_MODULE,
+       .eval           = nft_exthdr_eval,
+       .init           = nft_exthdr_init,
+       .dump           = nft_exthdr_dump,
+       .policy         = nft_exthdr_policy,
+       .maxattr        = NFTA_EXTHDR_MAX,
+};
+
+static int __init nft_exthdr_module_init(void)
+{
+       return nft_register_expr(&exthdr_ops);
+}
+
+static void __exit nft_exthdr_module_exit(void)
+{
+       nft_unregister_expr(&exthdr_ops);
+}
+
+module_init(nft_exthdr_module_init);
+module_exit(nft_exthdr_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("exthdr");
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
new file mode 100644 (file)
index 0000000..67cc502
--- /dev/null
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_hash {
+       struct hlist_head       *hash;
+       unsigned int            hsize;
+       enum nft_registers      sreg:8;
+       enum nft_registers      dreg:8;
+       u8                      klen;
+       u8                      dlen;
+       u16                     flags;
+};
+
+struct nft_hash_elem {
+       struct hlist_node       hnode;
+       struct nft_data         key;
+       struct nft_data         data[];
+};
+
+static u32 nft_hash_rnd __read_mostly;
+static bool nft_hash_rnd_initted __read_mostly;
+
+static unsigned int nft_hash_data(const struct nft_data *data,
+                                 unsigned int hsize, unsigned int len)
+{
+       unsigned int h;
+
+       // FIXME: can we reasonably guarantee the upper bits are fixed?
+       h = jhash2(data->data, len >> 2, nft_hash_rnd);
+       return ((u64)h * hsize) >> 32;
+}
+
+static void nft_hash_eval(const struct nft_expr *expr,
+                         struct nft_data data[NFT_REG_MAX + 1],
+                         const struct nft_pktinfo *pkt)
+{
+       const struct nft_hash *priv = nft_expr_priv(expr);
+       const struct nft_hash_elem *elem;
+       const struct nft_data *key = &data[priv->sreg];
+       unsigned int h;
+
+       h = nft_hash_data(key, priv->hsize, priv->klen);
+       hlist_for_each_entry(elem, &priv->hash[h], hnode) {
+               if (nft_data_cmp(&elem->key, key, priv->klen))
+                       continue;
+               if (priv->flags & NFT_HASH_MAP)
+                       nft_data_copy(&data[priv->dreg], elem->data);
+               return;
+       }
+       data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static void nft_hash_elem_destroy(const struct nft_expr *expr,
+                                 struct nft_hash_elem *elem)
+{
+       const struct nft_hash *priv = nft_expr_priv(expr);
+
+       nft_data_uninit(&elem->key, NFT_DATA_VALUE);
+       if (priv->flags & NFT_HASH_MAP)
+               nft_data_uninit(elem->data, nft_dreg_to_type(priv->dreg));
+       kfree(elem);
+}
+
+static const struct nla_policy nft_he_policy[NFTA_HE_MAX + 1] = {
+       [NFTA_HE_KEY]           = { .type = NLA_NESTED },
+       [NFTA_HE_DATA]          = { .type = NLA_NESTED },
+};
+
+static int nft_hash_elem_init(const struct nft_ctx *ctx,
+                             const struct nft_expr *expr,
+                             const struct nlattr *nla,
+                             struct nft_hash_elem **new)
+{
+       struct nft_hash *priv = nft_expr_priv(expr);
+       struct nlattr *tb[NFTA_HE_MAX + 1];
+       struct nft_hash_elem *elem;
+       struct nft_data_desc d1, d2;
+       unsigned int size;
+       int err;
+
+       err = nla_parse_nested(tb, NFTA_HE_MAX, nla, nft_he_policy);
+       if (err < 0)
+               return err;
+
+       if (tb[NFTA_HE_KEY] == NULL)
+               return -EINVAL;
+       size = sizeof(*elem);
+
+       if (priv->flags & NFT_HASH_MAP) {
+               if (tb[NFTA_HE_DATA] == NULL)
+                       return -EINVAL;
+               size += sizeof(elem->data[0]);
+       } else {
+               if (tb[NFTA_HE_DATA] != NULL)
+                       return -EINVAL;
+       }
+
+       elem = kzalloc(size, GFP_KERNEL);
+       if (elem == NULL)
+               return -ENOMEM;
+
+       err = nft_data_init(ctx, &elem->key, &d1, tb[NFTA_HE_KEY]);
+       if (err < 0)
+               goto err1;
+       err = -EINVAL;
+       if (d1.type != NFT_DATA_VALUE || d1.len != priv->klen)
+               goto err2;
+
+       if (tb[NFTA_HE_DATA] != NULL) {
+               err = nft_data_init(ctx, elem->data, &d2, tb[NFTA_HE_DATA]);
+               if (err < 0)
+                       goto err2;
+               err = nft_validate_data_load(ctx, priv->dreg, elem->data, d2.type);
+               if (err < 0)
+                       goto err3;
+       }
+
+       *new = elem;
+       return 0;
+
+err3:
+       nft_data_uninit(elem->data, d2.type);
+err2:
+       nft_data_uninit(&elem->key, d1.type);
+err1:
+       kfree(elem);
+       return err;
+}
+
+static int nft_hash_elem_dump(struct sk_buff *skb, const struct nft_expr *expr,
+                             const struct nft_hash_elem *elem)
+
+{
+       const struct nft_hash *priv = nft_expr_priv(expr);
+       struct nlattr *nest;
+
+       nest = nla_nest_start(skb, NFTA_LIST_ELEM);
+       if (nest == NULL)
+               goto nla_put_failure;
+
+       if (nft_data_dump(skb, NFTA_HE_KEY, &elem->key,
+                         NFT_DATA_VALUE, priv->klen) < 0)
+               goto nla_put_failure;
+
+       if (priv->flags & NFT_HASH_MAP) {
+               if (nft_data_dump(skb, NFTA_HE_DATA, elem->data,
+                                 NFT_DATA_VALUE, priv->dlen) < 0)
+                       goto nla_put_failure;
+       }
+
+       nla_nest_end(skb, nest);
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static void nft_hash_destroy(const struct nft_ctx *ctx,
+                            const struct nft_expr *expr)
+{
+       const struct nft_hash *priv = nft_expr_priv(expr);
+       const struct hlist_node *next;
+       struct nft_hash_elem *elem;
+       unsigned int i;
+
+       for (i = 0; i < priv->hsize; i++) {
+               hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) {
+                       hlist_del(&elem->hnode);
+                       nft_hash_elem_destroy(expr, elem);
+               }
+       }
+       kfree(priv->hash);
+}
+
+static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
+       [NFTA_HASH_FLAGS]       = { .type = NLA_U32 },
+       [NFTA_HASH_SREG]        = { .type = NLA_U32 },
+       [NFTA_HASH_DREG]        = { .type = NLA_U32 },
+       [NFTA_HASH_KLEN]        = { .type = NLA_U32 },
+       [NFTA_HASH_ELEMENTS]    = { .type = NLA_NESTED },
+};
+
+static int nft_hash_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+                        const struct nlattr * const tb[])
+{
+       struct nft_hash *priv = nft_expr_priv(expr);
+       struct nft_hash_elem *elem, *uninitialized_var(new);
+       const struct nlattr *nla;
+       unsigned int cnt, i;
+       unsigned int h;
+       int err, rem;
+
+       if (unlikely(!nft_hash_rnd_initted)) {
+               get_random_bytes(&nft_hash_rnd, 4);
+               nft_hash_rnd_initted = true;
+       }
+
+       if (tb[NFTA_HASH_SREG] == NULL ||
+           tb[NFTA_HASH_KLEN] == NULL ||
+           tb[NFTA_HASH_ELEMENTS] == NULL)
+               return -EINVAL;
+
+       if (tb[NFTA_HASH_FLAGS] != NULL) {
+               priv->flags = ntohl(nla_get_be32(tb[NFTA_HASH_FLAGS]));
+               if (priv->flags & ~NFT_HASH_MAP)
+                       return -EINVAL;
+       }
+
+       priv->sreg = ntohl(nla_get_be32(tb[NFTA_HASH_SREG]));
+       err = nft_validate_input_register(priv->sreg);
+       if (err < 0)
+               return err;
+
+       if (tb[NFTA_HASH_DREG] != NULL) {
+               if (!(priv->flags & NFT_HASH_MAP))
+                       return -EINVAL;
+               priv->dreg = ntohl(nla_get_be32(tb[NFTA_HASH_DREG]));
+               err = nft_validate_output_register(priv->dreg);
+               if (err < 0)
+                       return err;
+       }
+
+       priv->klen = ntohl(nla_get_be32(tb[NFTA_HASH_KLEN]));
+       if (priv->klen == 0)
+               return -EINVAL;
+
+       cnt = 0;
+       nla_for_each_nested(nla, tb[NFTA_HASH_ELEMENTS], rem) {
+               if (nla_type(nla) != NFTA_LIST_ELEM)
+                       return -EINVAL;
+               cnt++;
+       }
+
+       /* Aim for a load factor of 0.75 */
+       cnt = cnt * 4 / 3;
+
+       priv->hash = kcalloc(cnt, sizeof(struct hlist_head), GFP_KERNEL);
+       if (priv->hash == NULL)
+               return -ENOMEM;
+       priv->hsize = cnt;
+
+       for (i = 0; i < cnt; i++)
+               INIT_HLIST_HEAD(&priv->hash[i]);
+
+       err = -ENOMEM;
+       nla_for_each_nested(nla, tb[NFTA_HASH_ELEMENTS], rem) {
+               err = nft_hash_elem_init(ctx, expr, nla, &new);
+               if (err < 0)
+                       goto err1;
+
+               h = nft_hash_data(&new->key, priv->hsize, priv->klen);
+               hlist_for_each_entry(elem, &priv->hash[h], hnode) {
+                       if (nft_data_cmp(&elem->key, &new->key, priv->klen))
+                               continue;
+                       nft_hash_elem_destroy(expr, new);
+                       err = -EEXIST;
+                       goto err1;
+               }
+               hlist_add_head(&new->hnode, &priv->hash[h]);
+       }
+       return 0;
+
+err1:
+       nft_hash_destroy(ctx, expr);
+       return err;
+}
+
+static int nft_hash_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_hash *priv = nft_expr_priv(expr);
+       const struct nft_hash_elem *elem;
+       struct nlattr *list;
+       unsigned int i;
+
+       if (priv->flags)
+               if (nla_put_be32(skb, NFTA_HASH_FLAGS, htonl(priv->flags)))
+                       goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_HASH_SREG, htonl(priv->sreg)))
+               goto nla_put_failure;
+       if (priv->flags & NFT_HASH_MAP)
+               if (nla_put_be32(skb, NFTA_HASH_DREG, htonl(priv->dreg)))
+                       goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_HASH_KLEN, htonl(priv->klen)))
+               goto nla_put_failure;
+
+       list = nla_nest_start(skb, NFTA_HASH_ELEMENTS);
+       if (list == NULL)
+               goto nla_put_failure;
+
+       for (i = 0; i < priv->hsize; i++) {
+               hlist_for_each_entry(elem, &priv->hash[i], hnode) {
+                       if (nft_hash_elem_dump(skb, expr, elem) < 0)
+                               goto nla_put_failure;
+               }
+       }
+
+       nla_nest_end(skb, list);
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_ops nft_hash_ops __read_mostly = {
+       .name           = "hash",
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_hash)),
+       .owner          = THIS_MODULE,
+       .eval           = nft_hash_eval,
+       .init           = nft_hash_init,
+       .destroy        = nft_hash_destroy,
+       .dump           = nft_hash_dump,
+       .policy         = nft_hash_policy,
+       .maxattr        = NFTA_HASH_MAX,
+};
+
+static int __init nft_hash_module_init(void)
+{
+       return nft_register_expr(&nft_hash_ops);
+}
+
+static void __exit nft_hash_module_exit(void)
+{
+       nft_unregister_expr(&nft_hash_ops);
+}
+
+module_init(nft_hash_module_init);
+module_exit(nft_hash_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("hash");
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
new file mode 100644 (file)
index 0000000..3bf42c3
--- /dev/null
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_immediate_expr {
+       struct nft_data         data;
+       enum nft_registers      dreg:8;
+       u8                      dlen;
+};
+
+static void nft_immediate_eval(const struct nft_expr *expr,
+                              struct nft_data data[NFT_REG_MAX + 1],
+                              const struct nft_pktinfo *pkt)
+{
+       const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+       nft_data_copy(&data[priv->dreg], &priv->data);
+}
+
+static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = {
+       [NFTA_IMMEDIATE_DREG]   = { .type = NLA_U32 },
+       [NFTA_IMMEDIATE_DATA]   = { .type = NLA_NESTED },
+};
+
+static int nft_immediate_init(const struct nft_ctx *ctx,
+                             const struct nft_expr *expr,
+                             const struct nlattr * const tb[])
+{
+       struct nft_immediate_expr *priv = nft_expr_priv(expr);
+       struct nft_data_desc desc;
+       int err;
+
+       if (tb[NFTA_IMMEDIATE_DREG] == NULL ||
+           tb[NFTA_IMMEDIATE_DATA] == NULL)
+               return -EINVAL;
+
+       priv->dreg = ntohl(nla_get_be32(tb[NFTA_IMMEDIATE_DREG]));
+       err = nft_validate_output_register(priv->dreg);
+       if (err < 0)
+               return err;
+
+       err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]);
+       if (err < 0)
+               return err;
+       priv->dlen = desc.len;
+
+       err = nft_validate_data_load(ctx, priv->dreg, &priv->data, desc.type);
+       if (err < 0)
+               goto err1;
+
+       return 0;
+
+err1:
+       nft_data_uninit(&priv->data, desc.type);
+       return err;
+}
+
+static void nft_immediate_destroy(const struct nft_expr *expr)
+{
+       const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+       return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg));
+}
+
+static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+       if (nla_put_be32(skb, NFTA_IMMEDIATE_DREG, htonl(priv->dreg)))
+               goto nla_put_failure;
+
+       return nft_data_dump(skb, NFTA_IMMEDIATE_DATA, &priv->data,
+                            nft_dreg_to_type(priv->dreg), priv->dlen);
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_ops nft_imm_ops __read_mostly = {
+       .name           = "immediate",
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
+       .owner          = THIS_MODULE,
+       .eval           = nft_immediate_eval,
+       .init           = nft_immediate_init,
+       .destroy        = nft_immediate_destroy,
+       .dump           = nft_immediate_dump,
+       .policy         = nft_immediate_policy,
+       .maxattr        = NFTA_IMMEDIATE_MAX,
+};
+
+int __init nft_immediate_module_init(void)
+{
+       return nft_register_expr(&nft_imm_ops);
+}
+
+void nft_immediate_module_exit(void)
+{
+       nft_unregister_expr(&nft_imm_ops);
+}
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
new file mode 100644 (file)
index 0000000..e0e3fc8
--- /dev/null
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+static DEFINE_SPINLOCK(limit_lock);
+
+struct nft_limit {
+       u64             tokens;
+       u64             rate;
+       u64             unit;
+       unsigned long   stamp;
+};
+
+static void nft_limit_eval(const struct nft_expr *expr,
+                          struct nft_data data[NFT_REG_MAX + 1],
+                          const struct nft_pktinfo *pkt)
+{
+       struct nft_limit *priv = nft_expr_priv(expr);
+
+       spin_lock_bh(&limit_lock);
+       if (time_after_eq(jiffies, priv->stamp)) {
+               priv->tokens = priv->rate;
+               priv->stamp = jiffies + priv->unit * HZ;
+       }
+
+       if (priv->tokens >= 1) {
+               priv->tokens--;
+               spin_unlock_bh(&limit_lock);
+               return;
+       }
+       spin_unlock_bh(&limit_lock);
+
+       data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = {
+       [NFTA_LIMIT_RATE]       = { .type = NLA_U64 },
+       [NFTA_LIMIT_UNIT]       = { .type = NLA_U64 },
+};
+
+static int nft_limit_init(const struct nft_ctx *ctx,
+                         const struct nft_expr *expr,
+                         const struct nlattr * const tb[])
+{
+       struct nft_limit *priv = nft_expr_priv(expr);
+
+       if (tb[NFTA_LIMIT_RATE] == NULL ||
+           tb[NFTA_LIMIT_UNIT] == NULL)
+               return -EINVAL;
+
+       priv->rate   = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
+       priv->unit   = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
+       priv->stamp  = jiffies + priv->unit * HZ;
+       priv->tokens = priv->rate;
+       return 0;
+}
+
+static int nft_limit_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_limit *priv = nft_expr_priv(expr);
+
+       if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate)))
+               goto nla_put_failure;
+       if (nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(priv->unit)))
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_ops nft_limit_ops __read_mostly = {
+       .name           = "limit",
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_limit)),
+       .owner          = THIS_MODULE,
+       .eval           = nft_limit_eval,
+       .init           = nft_limit_init,
+       .dump           = nft_limit_dump,
+       .policy         = nft_limit_policy,
+       .maxattr        = NFTA_LIMIT_MAX,
+};
+
+static int __init nft_limit_module_init(void)
+{
+       return nft_register_expr(&nft_limit_ops);
+}
+
+static void __exit nft_limit_module_exit(void)
+{
+       nft_unregister_expr(&nft_limit_ops);
+}
+
+module_init(nft_limit_module_init);
+module_exit(nft_limit_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("limit");
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
new file mode 100644 (file)
index 0000000..da495c3
--- /dev/null
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_log.h>
+#include <linux/netdevice.h>
+
+static const char *nft_log_null_prefix = "";
+
+struct nft_log {
+       struct nf_loginfo       loginfo;
+       char                    *prefix;
+       int                     family;
+};
+
+static void nft_log_eval(const struct nft_expr *expr,
+                        struct nft_data data[NFT_REG_MAX + 1],
+                        const struct nft_pktinfo *pkt)
+{
+       const struct nft_log *priv = nft_expr_priv(expr);
+       struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
+
+       nf_log_packet(net, priv->family, pkt->hooknum, pkt->skb, pkt->in,
+                     pkt->out, &priv->loginfo, "%s", priv->prefix);
+}
+
+static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = {
+       [NFTA_LOG_GROUP]        = { .type = NLA_U16 },
+       [NFTA_LOG_PREFIX]       = { .type = NLA_STRING },
+       [NFTA_LOG_SNAPLEN]      = { .type = NLA_U32 },
+       [NFTA_LOG_QTHRESHOLD]   = { .type = NLA_U16 },
+};
+
+static int nft_log_init(const struct nft_ctx *ctx,
+                       const struct nft_expr *expr,
+                       const struct nlattr * const tb[])
+{
+       struct nft_log *priv = nft_expr_priv(expr);
+       struct nf_loginfo *li = &priv->loginfo;
+       const struct nlattr *nla;
+
+       priv->family = ctx->afi->family;
+
+       nla = tb[NFTA_LOG_PREFIX];
+       if (nla != NULL) {
+               priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL);
+               if (priv->prefix == NULL)
+                       return -ENOMEM;
+               nla_strlcpy(priv->prefix, nla, nla_len(nla) + 1);
+       } else
+               priv->prefix = (char *)nft_log_null_prefix;
+
+       li->type = NF_LOG_TYPE_ULOG;
+       if (tb[NFTA_LOG_GROUP] != NULL)
+               li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP]));
+
+       if (tb[NFTA_LOG_SNAPLEN] != NULL)
+               li->u.ulog.copy_len = ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN]));
+       if (tb[NFTA_LOG_QTHRESHOLD] != NULL) {
+               li->u.ulog.qthreshold =
+                       ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD]));
+       }
+
+       return 0;
+}
+
+static void nft_log_destroy(const struct nft_expr *expr)
+{
+       struct nft_log *priv = nft_expr_priv(expr);
+
+       if (priv->prefix != nft_log_null_prefix)
+               kfree(priv->prefix);
+}
+
+static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_log *priv = nft_expr_priv(expr);
+       const struct nf_loginfo *li = &priv->loginfo;
+
+       if (priv->prefix != nft_log_null_prefix)
+               if (nla_put_string(skb, NFTA_LOG_PREFIX, priv->prefix))
+                       goto nla_put_failure;
+       if (li->u.ulog.group)
+               if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group)))
+                       goto nla_put_failure;
+       if (li->u.ulog.copy_len)
+               if (nla_put_be32(skb, NFTA_LOG_SNAPLEN,
+                                htonl(li->u.ulog.copy_len)))
+                       goto nla_put_failure;
+       if (li->u.ulog.qthreshold)
+               if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD,
+                                htons(li->u.ulog.qthreshold)))
+                       goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_ops nft_log_ops __read_mostly = {
+       .name           = "log",
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_log)),
+       .owner          = THIS_MODULE,
+       .eval           = nft_log_eval,
+       .init           = nft_log_init,
+       .destroy        = nft_log_destroy,
+       .dump           = nft_log_dump,
+       .policy         = nft_log_policy,
+       .maxattr        = NFTA_LOG_MAX,
+};
+
+static int __init nft_log_module_init(void)
+{
+       return nft_register_expr(&nft_log_ops);
+}
+
+static void __exit nft_log_module_exit(void)
+{
+       nft_unregister_expr(&nft_log_ops);
+}
+
+module_init(nft_log_module_init);
+module_exit(nft_log_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("log");
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
new file mode 100644 (file)
index 0000000..96735aa
--- /dev/null
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
+#include <net/netfilter/nf_tables.h>
+
+struct nft_meta {
+       enum nft_meta_keys      key:8;
+       enum nft_registers      dreg:8;
+};
+
+static void nft_meta_eval(const struct nft_expr *expr,
+                         struct nft_data data[NFT_REG_MAX + 1],
+                         const struct nft_pktinfo *pkt)
+{
+       const struct nft_meta *priv = nft_expr_priv(expr);
+       const struct sk_buff *skb = pkt->skb;
+       const struct net_device *in = pkt->in, *out = pkt->out;
+       struct nft_data *dest = &data[priv->dreg];
+
+       switch (priv->key) {
+       case NFT_META_LEN:
+               dest->data[0] = skb->len;
+               break;
+       case NFT_META_PROTOCOL:
+               *(__be16 *)dest->data = skb->protocol;
+               break;
+       case NFT_META_PRIORITY:
+               dest->data[0] = skb->priority;
+               break;
+       case NFT_META_MARK:
+               dest->data[0] = skb->mark;
+               break;
+       case NFT_META_IIF:
+               if (in == NULL)
+                       goto err;
+               dest->data[0] = in->ifindex;
+               break;
+       case NFT_META_OIF:
+               if (out == NULL)
+                       goto err;
+               dest->data[0] = out->ifindex;
+               break;
+       case NFT_META_IIFNAME:
+               if (in == NULL)
+                       goto err;
+               strncpy((char *)dest->data, in->name, sizeof(dest->data));
+               break;
+       case NFT_META_OIFNAME:
+               if (out == NULL)
+                       goto err;
+               strncpy((char *)dest->data, out->name, sizeof(dest->data));
+               break;
+       case NFT_META_IIFTYPE:
+               if (in == NULL)
+                       goto err;
+               *(u16 *)dest->data = in->type;
+               break;
+       case NFT_META_OIFTYPE:
+               if (out == NULL)
+                       goto err;
+               *(u16 *)dest->data = out->type;
+               break;
+       case NFT_META_SKUID:
+               if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT)
+                       goto err;
+
+               read_lock_bh(&skb->sk->sk_callback_lock);
+               if (skb->sk->sk_socket == NULL ||
+                   skb->sk->sk_socket->file == NULL) {
+                       read_unlock_bh(&skb->sk->sk_callback_lock);
+                       goto err;
+               }
+
+               dest->data[0] =
+                       from_kuid_munged(&init_user_ns,
+                               skb->sk->sk_socket->file->f_cred->fsuid);
+               read_unlock_bh(&skb->sk->sk_callback_lock);
+               break;
+       case NFT_META_SKGID:
+               if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT)
+                       goto err;
+
+               read_lock_bh(&skb->sk->sk_callback_lock);
+               if (skb->sk->sk_socket == NULL ||
+                   skb->sk->sk_socket->file == NULL) {
+                       read_unlock_bh(&skb->sk->sk_callback_lock);
+                       goto err;
+               }
+               dest->data[0] =
+                       from_kgid_munged(&init_user_ns,
+                                skb->sk->sk_socket->file->f_cred->fsgid);
+               read_unlock_bh(&skb->sk->sk_callback_lock);
+               break;
+#ifdef CONFIG_NET_CLS_ROUTE
+       case NFT_META_RTCLASSID: {
+               const struct dst_entry *dst = skb_dst(skb);
+
+               if (dst == NULL)
+                       goto err;
+               dest->data[0] = dst->tclassid;
+               break;
+       }
+#endif
+#ifdef CONFIG_NETWORK_SECMARK
+       case NFT_META_SECMARK:
+               dest->data[0] = skb->secmark;
+               break;
+#endif
+       default:
+               WARN_ON(1);
+               goto err;
+       }
+       return;
+
+err:
+       data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
+       [NFTA_META_DREG]        = { .type = NLA_U32 },
+       [NFTA_META_KEY]         = { .type = NLA_U32 },
+};
+
+static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+                        const struct nlattr * const tb[])
+{
+       struct nft_meta *priv = nft_expr_priv(expr);
+       int err;
+
+       if (tb[NFTA_META_DREG] == NULL ||
+           tb[NFTA_META_KEY] == NULL)
+               return -EINVAL;
+
+       priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+       switch (priv->key) {
+       case NFT_META_LEN:
+       case NFT_META_PROTOCOL:
+       case NFT_META_PRIORITY:
+       case NFT_META_MARK:
+       case NFT_META_IIF:
+       case NFT_META_OIF:
+       case NFT_META_IIFNAME:
+       case NFT_META_OIFNAME:
+       case NFT_META_IIFTYPE:
+       case NFT_META_OIFTYPE:
+       case NFT_META_SKUID:
+       case NFT_META_SKGID:
+#ifdef CONFIG_NET_CLS_ROUTE
+       case NFT_META_RTCLASSID:
+#endif
+#ifdef CONFIG_NETWORK_SECMARK
+       case NFT_META_SECMARK:
+#endif
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
+       err = nft_validate_output_register(priv->dreg);
+       if (err < 0)
+               return err;
+       return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+}
+
+static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_meta *priv = nft_expr_priv(expr);
+
+       if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg)))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key)))
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_ops nft_meta_ops __read_mostly = {
+       .name           = "meta",
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+       .owner          = THIS_MODULE,
+       .eval           = nft_meta_eval,
+       .init           = nft_meta_init,
+       .dump           = nft_meta_dump,
+       .policy         = nft_meta_policy,
+       .maxattr        = NFTA_META_MAX,
+};
+
+static int __init nft_meta_module_init(void)
+{
+       return nft_register_expr(&nft_meta_ops);
+}
+
+static void __exit nft_meta_module_exit(void)
+{
+       nft_unregister_expr(&nft_meta_ops);
+}
+
+module_init(nft_meta_module_init);
+module_exit(nft_meta_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("meta");
diff --git a/net/netfilter/nft_meta_target.c b/net/netfilter/nft_meta_target.c
new file mode 100644 (file)
index 0000000..71177df
--- /dev/null
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_meta {
+       enum nft_meta_keys      key;
+};
+
+static void nft_meta_eval(const struct nft_expr *expr,
+                         struct nft_data *nfres,
+                         struct nft_data *data,
+                         const struct nft_pktinfo *pkt)
+{
+       const struct nft_meta *meta = nft_expr_priv(expr);
+       struct sk_buff *skb = pkt->skb;
+       u32 val = data->data[0];
+
+       switch (meta->key) {
+       case NFT_META_MARK:
+               skb->mark = val;
+               break;
+       case NFT_META_PRIORITY:
+               skb->priority = val;
+               break;
+       case NFT_META_NFTRACE:
+               skb->nf_trace = val;
+               break;
+#ifdef CONFIG_NETWORK_SECMARK
+       case NFT_META_SECMARK:
+               skb->secmark = val;
+               break;
+#endif
+       default:
+               WARN_ON(1);
+       }
+}
+
+static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
+       [NFTA_META_KEY]         = { .type = NLA_U32 },
+};
+
+static int nft_meta_init(const struct nft_expr *expr, struct nlattr *tb[])
+{
+       struct nft_meta *meta = nft_expr_priv(expr);
+
+       if (tb[NFTA_META_KEY] == NULL)
+               return -EINVAL;
+
+       meta->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+       switch (meta->key) {
+       case NFT_META_MARK:
+       case NFT_META_PRIORITY:
+       case NFT_META_NFTRACE:
+#ifdef CONFIG_NETWORK_SECMARK
+       case NFT_META_SECMARK:
+#endif
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       struct nft_meta *meta = nft_expr_priv(expr);
+
+       NLA_PUT_BE32(skb, NFTA_META_KEY, htonl(meta->key));
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_ops meta_target __read_mostly = {
+       .name           = "meta",
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+       .owner          = THIS_MODULE,
+       .eval           = nft_meta_eval,
+       .init           = nft_meta_init,
+       .dump           = nft_meta_dump,
+       .policy         = nft_meta_policy,
+       .maxattr        = NFTA_META_MAX,
+};
+
+static int __init nft_meta_target_init(void)
+{
+       return nft_register_expr(&meta_target);
+}
+
+static void __exit nft_meta_target_exit(void)
+{
+       nft_unregister_expr(&meta_target);
+}
+
+module_init(nft_meta_target_init);
+module_exit(nft_meta_target_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("meta");
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
new file mode 100644 (file)
index 0000000..329f134
--- /dev/null
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_payload {
+       enum nft_payload_bases  base:8;
+       u8                      offset;
+       u8                      len;
+       enum nft_registers      dreg:8;
+};
+
+static void nft_payload_eval(const struct nft_expr *expr,
+                            struct nft_data data[NFT_REG_MAX + 1],
+                            const struct nft_pktinfo *pkt)
+{
+       const struct nft_payload *priv = nft_expr_priv(expr);
+       const struct sk_buff *skb = pkt->skb;
+       struct nft_data *dest = &data[priv->dreg];
+       int offset;
+
+       switch (priv->base) {
+       case NFT_PAYLOAD_LL_HEADER:
+               if (!skb_mac_header_was_set(skb))
+                       goto err;
+               offset = skb_mac_header(skb) - skb->data;
+               break;
+       case NFT_PAYLOAD_NETWORK_HEADER:
+               offset = skb_network_offset(skb);
+               break;
+       case NFT_PAYLOAD_TRANSPORT_HEADER:
+               offset = skb_transport_offset(skb);
+               break;
+       default:
+               BUG();
+       }
+       offset += priv->offset;
+
+       if (skb_copy_bits(skb, offset, dest->data, priv->len) < 0)
+               goto err;
+       return;
+err:
+       data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = {
+       [NFTA_PAYLOAD_DREG]     = { .type = NLA_U32 },
+       [NFTA_PAYLOAD_BASE]     = { .type = NLA_U32 },
+       [NFTA_PAYLOAD_OFFSET]   = { .type = NLA_U32 },
+       [NFTA_PAYLOAD_LEN]      = { .type = NLA_U32 },
+};
+
+static int nft_payload_init(const struct nft_ctx *ctx,
+                           const struct nft_expr *expr,
+                           const struct nlattr * const tb[])
+{
+       struct nft_payload *priv = nft_expr_priv(expr);
+       int err;
+
+       if (tb[NFTA_PAYLOAD_DREG] == NULL ||
+           tb[NFTA_PAYLOAD_BASE] == NULL ||
+           tb[NFTA_PAYLOAD_OFFSET] == NULL ||
+           tb[NFTA_PAYLOAD_LEN] == NULL)
+               return -EINVAL;
+
+       priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
+       switch (priv->base) {
+       case NFT_PAYLOAD_LL_HEADER:
+       case NFT_PAYLOAD_NETWORK_HEADER:
+       case NFT_PAYLOAD_TRANSPORT_HEADER:
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
+       priv->len    = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+       if (priv->len == 0 ||
+           priv->len > FIELD_SIZEOF(struct nft_data, data))
+               return -EINVAL;
+
+       priv->dreg = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_DREG]));
+       err = nft_validate_output_register(priv->dreg);
+       if (err < 0)
+               return err;
+       return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+}
+
+static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_payload *priv = nft_expr_priv(expr);
+
+       if (nla_put_be32(skb, NFTA_PAYLOAD_DREG, htonl(priv->dreg)) ||
+           nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) ||
+           nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) ||
+           nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)))
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_ops nft_payload_ops __read_mostly = {
+       .name           = "payload",
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_payload)),
+       .owner          = THIS_MODULE,
+       .eval           = nft_payload_eval,
+       .init           = nft_payload_init,
+       .dump           = nft_payload_dump,
+       .policy         = nft_payload_policy,
+       .maxattr        = NFTA_PAYLOAD_MAX,
+};
+
+int __init nft_payload_module_init(void)
+{
+       return nft_register_expr(&nft_payload_ops);
+}
+
+void nft_payload_module_exit(void)
+{
+       nft_unregister_expr(&nft_payload_ops);
+}
diff --git a/net/netfilter/nft_set.c b/net/netfilter/nft_set.c
new file mode 100644 (file)
index 0000000..7b7c835
--- /dev/null
@@ -0,0 +1,381 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_set {
+       struct rb_root          root;
+       enum nft_registers      sreg:8;
+       enum nft_registers      dreg:8;
+       u8                      klen;
+       u8                      dlen;
+       u16                     flags;
+};
+
+struct nft_set_elem {
+       struct rb_node          node;
+       enum nft_set_elem_flags flags;
+       struct nft_data         key;
+       struct nft_data         data[];
+};
+
+static void nft_set_eval(const struct nft_expr *expr,
+                        struct nft_data data[NFT_REG_MAX + 1],
+                        const struct nft_pktinfo *pkt)
+{
+       const struct nft_set *priv = nft_expr_priv(expr);
+       const struct rb_node *parent = priv->root.rb_node;
+       const struct nft_set_elem *elem, *interval = NULL;
+       const struct nft_data *key = &data[priv->sreg];
+       int d;
+
+       while (parent != NULL) {
+               elem = rb_entry(parent, struct nft_set_elem, node);
+
+               d = nft_data_cmp(&elem->key, key, priv->klen);
+               if (d < 0) {
+                       parent = parent->rb_left;
+                       interval = elem;
+               } else if (d > 0)
+                       parent = parent->rb_right;
+               else {
+found:
+                       if (elem->flags & NFT_SE_INTERVAL_END)
+                               goto out;
+                       if (priv->flags & NFT_SET_MAP)
+                               nft_data_copy(&data[priv->dreg], elem->data);
+                       return;
+               }
+       }
+
+       if (priv->flags & NFT_SET_INTERVAL && interval != NULL) {
+               elem = interval;
+               goto found;
+       }
+out:
+       data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static void nft_set_elem_destroy(const struct nft_expr *expr,
+                                struct nft_set_elem *elem)
+{
+       const struct nft_set *priv = nft_expr_priv(expr);
+
+       nft_data_uninit(&elem->key, NFT_DATA_VALUE);
+       if (priv->flags & NFT_SET_MAP)
+               nft_data_uninit(elem->data, nft_dreg_to_type(priv->dreg));
+       kfree(elem);
+}
+
+static const struct nla_policy nft_se_policy[NFTA_SE_MAX + 1] = {
+       [NFTA_SE_KEY]           = { .type = NLA_NESTED },
+       [NFTA_SE_DATA]          = { .type = NLA_NESTED },
+       [NFTA_SE_FLAGS]         = { .type = NLA_U32 },
+};
+
+static int nft_set_elem_init(const struct nft_ctx *ctx,
+                            const struct nft_expr *expr,
+                            const struct nlattr *nla,
+                            struct nft_set_elem **new)
+{
+       struct nft_set *priv = nft_expr_priv(expr);
+       struct nlattr *tb[NFTA_SE_MAX + 1];
+       struct nft_set_elem *elem;
+       struct nft_data_desc d1, d2;
+       enum nft_set_elem_flags flags = 0;
+       unsigned int size;
+       int err;
+
+       err = nla_parse_nested(tb, NFTA_SE_MAX, nla, nft_se_policy);
+       if (err < 0)
+               return err;
+
+       if (tb[NFTA_SE_KEY] == NULL)
+               return -EINVAL;
+
+       if (tb[NFTA_SE_FLAGS] != NULL) {
+               flags = ntohl(nla_get_be32(tb[NFTA_SE_FLAGS]));
+               if (flags & ~NFT_SE_INTERVAL_END)
+                       return -EINVAL;
+       }
+
+       size = sizeof(*elem);
+       if (priv->flags & NFT_SET_MAP) {
+               if (tb[NFTA_SE_DATA] == NULL && !(flags & NFT_SE_INTERVAL_END))
+                       return -EINVAL;
+               size += sizeof(elem->data[0]);
+       } else {
+               if (tb[NFTA_SE_DATA] != NULL)
+                       return -EINVAL;
+       }
+
+       elem = kzalloc(size, GFP_KERNEL);
+       if (elem == NULL)
+               return -ENOMEM;
+       elem->flags = flags;
+
+       err = nft_data_init(ctx, &elem->key, &d1, tb[NFTA_SE_KEY]);
+       if (err < 0)
+               goto err1;
+       err = -EINVAL;
+       if (d1.type != NFT_DATA_VALUE || d1.len != priv->klen)
+               goto err2;
+
+       if (tb[NFTA_SE_DATA] != NULL) {
+               err = nft_data_init(ctx, elem->data, &d2, tb[NFTA_SE_DATA]);
+               if (err < 0)
+                       goto err2;
+               err = -EINVAL;
+               if (priv->dreg != NFT_REG_VERDICT && d2.len != priv->dlen)
+                       goto err2;
+               err = nft_validate_data_load(ctx, priv->dreg, elem->data, d2.type);
+               if (err < 0)
+                       goto err3;
+       }
+
+       *new = elem;
+       return 0;
+
+err3:
+       nft_data_uninit(elem->data, d2.type);
+err2:
+       nft_data_uninit(&elem->key, d1.type);
+err1:
+       kfree(elem);
+       return err;
+}
+
+static int nft_set_elem_dump(struct sk_buff *skb, const struct nft_expr *expr,
+                            const struct nft_set_elem *elem)
+
+{
+       const struct nft_set *priv = nft_expr_priv(expr);
+       struct nlattr *nest;
+
+       nest = nla_nest_start(skb, NFTA_LIST_ELEM);
+       if (nest == NULL)
+               goto nla_put_failure;
+
+       if (nft_data_dump(skb, NFTA_SE_KEY, &elem->key,
+                         NFT_DATA_VALUE, priv->klen) < 0)
+               goto nla_put_failure;
+
+       if (priv->flags & NFT_SET_MAP && !(elem->flags & NFT_SE_INTERVAL_END)) {
+               if (nft_data_dump(skb, NFTA_SE_DATA, elem->data,
+                                 nft_dreg_to_type(priv->dreg), priv->dlen) < 0)
+                       goto nla_put_failure;
+       }
+
+       if (elem->flags){
+               if (nla_put_be32(skb, NFTA_SE_FLAGS, htonl(elem->flags)))
+                       goto nla_put_failure;
+       }
+
+       nla_nest_end(skb, nest);
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static void nft_set_destroy(const struct nft_expr *expr)
+{
+       struct nft_set *priv = nft_expr_priv(expr);
+       struct nft_set_elem *elem;
+       struct rb_node *node;
+
+       while ((node = priv->root.rb_node) != NULL) {
+               rb_erase(node, &priv->root);
+               elem = rb_entry(node, struct nft_set_elem, node);
+               nft_set_elem_destroy(expr, elem);
+       }
+}
+
+static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
+       [NFTA_SET_FLAGS]        = { .type = NLA_U32 },
+       [NFTA_SET_SREG]         = { .type = NLA_U32 },
+       [NFTA_SET_DREG]         = { .type = NLA_U32 },
+       [NFTA_SET_KLEN]         = { .type = NLA_U32 },
+       [NFTA_SET_DLEN]         = { .type = NLA_U32 },
+       [NFTA_SET_ELEMENTS]     = { .type = NLA_NESTED },
+};
+
+static int nft_set_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+                       const struct nlattr * const tb[])
+{
+       struct nft_set *priv = nft_expr_priv(expr);
+       struct nft_set_elem *elem, *uninitialized_var(new);
+       struct rb_node *parent, **p;
+       const struct nlattr *nla;
+       int err, rem, d;
+
+       if (tb[NFTA_SET_SREG] == NULL ||
+           tb[NFTA_SET_KLEN] == NULL ||
+           tb[NFTA_SET_ELEMENTS] == NULL)
+               return -EINVAL;
+
+       priv->root = RB_ROOT;
+
+       if (tb[NFTA_SET_FLAGS] != NULL) {
+               priv->flags = ntohl(nla_get_be32(tb[NFTA_SET_FLAGS]));
+               if (priv->flags & ~(NFT_SET_INTERVAL | NFT_SET_MAP))
+                       return -EINVAL;
+       }
+
+       priv->sreg = ntohl(nla_get_be32(tb[NFTA_SET_SREG]));
+       err = nft_validate_input_register(priv->sreg);
+       if (err < 0)
+               return err;
+
+       if (tb[NFTA_SET_DREG] != NULL) {
+               if (!(priv->flags & NFT_SET_MAP))
+                       return -EINVAL;
+               if (tb[NFTA_SET_DLEN] == NULL)
+                       return -EINVAL;
+
+               priv->dreg = ntohl(nla_get_be32(tb[NFTA_SET_DREG]));
+               err = nft_validate_output_register(priv->dreg);
+               if (err < 0)
+                       return err;
+
+               if (priv->dreg == NFT_REG_VERDICT)
+                       priv->dlen = FIELD_SIZEOF(struct nft_data, data);
+               else {
+                       priv->dlen = ntohl(nla_get_be32(tb[NFTA_SET_DLEN]));
+                       if (priv->dlen == 0 ||
+                           priv->dlen > FIELD_SIZEOF(struct nft_data, data))
+                               return -EINVAL;
+               }
+       } else {
+               if (priv->flags & NFT_SET_MAP)
+                       return -EINVAL;
+               if (tb[NFTA_SET_DLEN] != NULL)
+                       return -EINVAL;
+       }
+
+       priv->klen = ntohl(nla_get_be32(tb[NFTA_SET_KLEN]));
+       if (priv->klen == 0 ||
+           priv->klen > FIELD_SIZEOF(struct nft_data, data))
+               return -EINVAL;
+
+       nla_for_each_nested(nla, tb[NFTA_SET_ELEMENTS], rem) {
+               err = -EINVAL;
+               if (nla_type(nla) != NFTA_LIST_ELEM)
+                       goto err1;
+
+               err = nft_set_elem_init(ctx, expr, nla, &new);
+               if (err < 0)
+                       goto err1;
+
+               parent = NULL;
+               p = &priv->root.rb_node;
+               while (*p != NULL) {
+                       parent = *p;
+                       elem = rb_entry(parent, struct nft_set_elem, node);
+                       d = nft_data_cmp(&elem->key, &new->key, priv->klen);
+                       if (d < 0)
+                               p = &parent->rb_left;
+                       else if (d > 0)
+                               p = &parent->rb_right;
+                       else {
+                               err = -EEXIST;
+                               goto err2;
+                       }
+               }
+               rb_link_node(&new->node, parent, p);
+               rb_insert_color(&new->node, &priv->root);
+       }
+
+       return 0;
+
+err2:
+       nft_set_elem_destroy(expr, new);
+err1:
+       nft_set_destroy(expr);
+       return err;
+}
+
+static int nft_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       struct nft_set *priv = nft_expr_priv(expr);
+       const struct nft_set_elem *elem;
+       struct rb_node *node;
+       struct nlattr *list;
+
+       if (priv->flags) {
+               if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(priv->flags)))
+                       goto nla_put_failure;
+       }
+
+       if (nla_put_be32(skb, NFTA_SET_SREG, htonl(priv->sreg)))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_SET_KLEN, htonl(priv->klen)))
+               goto nla_put_failure;
+
+       if (priv->flags & NFT_SET_MAP) {
+               if (nla_put_be32(skb, NFTA_SET_DREG, htonl(priv->dreg)))
+                       goto nla_put_failure;
+               if (nla_put_be32(skb, NFTA_SET_DLEN, htonl(priv->dlen)))
+                       goto nla_put_failure;
+       }
+
+       list = nla_nest_start(skb, NFTA_SET_ELEMENTS);
+       if (list == NULL)
+               goto nla_put_failure;
+
+       for (node = rb_first(&priv->root); node; node = rb_next(node)) {
+               elem = rb_entry(node, struct nft_set_elem, node);
+               if (nft_set_elem_dump(skb, expr, elem) < 0)
+                       goto nla_put_failure;
+       }
+
+       nla_nest_end(skb, list);
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_ops nft_set_ops __read_mostly = {
+       .name           = "set",
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_set)),
+       .owner          = THIS_MODULE,
+       .eval           = nft_set_eval,
+       .init           = nft_set_init,
+       .destroy        = nft_set_destroy,
+       .dump           = nft_set_dump,
+       .policy         = nft_set_policy,
+       .maxattr        = NFTA_SET_MAX,
+};
+
+static int __init nft_set_module_init(void)
+{
+       return nft_register_expr(&nft_set_ops);
+}
+
+static void __exit nft_set_module_exit(void)
+{
+       nft_unregister_expr(&nft_set_ops);
+}
+
+module_init(nft_set_module_init);
+module_exit(nft_set_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("set");