2 * Copyright (c) 2015 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "netlink-conntrack.h"
21 #include <linux/netfilter/nfnetlink.h>
22 #include <linux/netfilter/nfnetlink_conntrack.h>
23 #include <linux/netfilter/nf_conntrack_common.h>
24 #include <linux/netfilter/nf_conntrack_tcp.h>
25 #include <linux/netfilter/nf_conntrack_ftp.h>
26 #include <linux/netfilter/nf_conntrack_sctp.h>
28 #include "byte-order.h"
30 #include "dynamic-string.h"
32 #include "netlink-socket.h"
34 #include "openvswitch/vlog.h"
35 #include "poll-loop.h"
40 VLOG_DEFINE_THIS_MODULE(netlink_conntrack
);
41 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
43 /* This module works only if conntrack modules and features are enabled in the
44 * Linux kernel. This can be done from a root shell like this:
46 * $ modprobe ip_conntrack
47 * $ sysctl -w net.netfilter.nf_conntrack_acct=1
48 * $ sysctl -w net.netfilter.nf_conntrack_timestamp=1
50 * Also, if testing conntrack label feature without conntrack-aware OVS kernel
51 * module, there must be a connlabel rule in iptables for space to be reserved
52 * for the labels (see kernel source connlabel_mt_check()). Such a rule can be
53 * inserted from a root shell like this:
55 * $ iptables -A INPUT -m conntrack -m connlabel \
56 * --ctstate NEW,ESTABLISHED,RELATED --label 127 -j ACCEPT
59 /* Some attributes were introduced in later kernels: with these definitions
60 * we should be able to compile userspace against Linux 2.6.32+. */
62 #define CTA_ZONE (CTA_SECMARK + 1)
63 #define CTA_SECCTX (CTA_SECMARK + 2)
64 #define CTA_TIMESTAMP (CTA_SECMARK + 3)
65 #define CTA_MARK_MASK (CTA_SECMARK + 4)
66 #define CTA_LABELS (CTA_SECMARK + 5)
67 #define CTA_LABELS_MASK (CTA_SECMARK + 6)
69 #define CTA_TIMESTAMP_START 1
70 #define CTA_TIMESTAMP_STOP 2
72 #define IPS_TEMPLATE_BIT 11
73 #define IPS_TEMPLATE (1 << IPS_TEMPLATE_BIT)
75 #define IPS_UNTRACKED_BIT 12
76 #define IPS_UNTRACKED (1 << IPS_UNTRACKED_BIT)
78 static const struct nl_policy nfnlgrp_conntrack_policy
[] = {
79 [CTA_TUPLE_ORIG
] = { .type
= NL_A_NESTED
, .optional
= false },
80 [CTA_TUPLE_REPLY
] = { .type
= NL_A_NESTED
, .optional
= false },
81 [CTA_ZONE
] = { .type
= NL_A_BE16
, .optional
= true },
82 [CTA_STATUS
] = { .type
= NL_A_BE32
, .optional
= false },
83 [CTA_TIMESTAMP
] = { .type
= NL_A_NESTED
, .optional
= true },
84 [CTA_TIMEOUT
] = { .type
= NL_A_BE32
, .optional
= true },
85 [CTA_COUNTERS_ORIG
] = { .type
= NL_A_NESTED
, .optional
= true },
86 [CTA_COUNTERS_REPLY
] = { .type
= NL_A_NESTED
, .optional
= true },
87 [CTA_PROTOINFO
] = { .type
= NL_A_NESTED
, .optional
= true },
88 [CTA_HELP
] = { .type
= NL_A_NESTED
, .optional
= true },
89 [CTA_MARK
] = { .type
= NL_A_BE32
, .optional
= true },
90 [CTA_SECCTX
] = { .type
= NL_A_NESTED
, .optional
= true },
91 [CTA_ID
] = { .type
= NL_A_BE32
, .optional
= false },
92 [CTA_USE
] = { .type
= NL_A_BE32
, .optional
= true },
93 [CTA_TUPLE_MASTER
] = { .type
= NL_A_NESTED
, .optional
= true },
94 [CTA_NAT_SEQ_ADJ_ORIG
] = { .type
= NL_A_NESTED
, .optional
= true },
95 [CTA_NAT_SEQ_ADJ_REPLY
] = { .type
= NL_A_NESTED
, .optional
= true },
96 [CTA_LABELS
] = { .type
= NL_A_UNSPEC
, .optional
= true },
97 /* CTA_NAT_SRC, CTA_NAT_DST, CTA_TIMESTAMP, CTA_MARK_MASK, and
98 * CTA_LABELS_MASK are not received from kernel. */
101 /* Declarations for conntrack netlink dumping. */
102 static void nl_msg_put_nfgenmsg(struct ofpbuf
*msg
, size_t expected_payload
,
103 int family
, uint8_t subsystem
, uint8_t cmd
,
106 static bool nl_ct_parse_header_policy(struct ofpbuf
*buf
,
107 enum nl_ct_event_type
*event_type
,
108 uint8_t *nfgen_family
,
109 struct nlattr
*attrs
[ARRAY_SIZE(nfnlgrp_conntrack_policy
)]);
111 static bool nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry
*entry
,
112 struct nlattr
*attrs
[ARRAY_SIZE(nfnlgrp_conntrack_policy
)],
113 uint8_t nfgen_family
);
115 struct nl_ct_dump_state
{
122 /* Conntrack netlink dumping. */
124 /* Initialize a conntrack netlink dump. */
126 nl_ct_dump_start(struct nl_ct_dump_state
**statep
, const uint16_t *zone
)
128 struct nl_ct_dump_state
*state
;
130 *statep
= state
= xzalloc(sizeof *state
);
131 ofpbuf_init(&state
->buf
, NL_DUMP_BUFSIZE
);
134 state
->filter_zone
= true;
138 nl_msg_put_nfgenmsg(&state
->buf
, 0, AF_UNSPEC
, NFNL_SUBSYS_CTNETLINK
,
139 IPCTNL_MSG_CT_GET
, NLM_F_REQUEST
);
140 nl_dump_start(&state
->dump
, NETLINK_NETFILTER
, &state
->buf
);
141 ofpbuf_clear(&state
->buf
);
146 /* Receive the next 'entry' from the conntrack netlink dump with 'state'.
147 * Returns 'EOF' when no more entries are available, 0 otherwise. 'entry' may
148 * be uninitilized memory on entry, and must be uninitialized with
149 * ct_dpif_entry_uninit() afterwards by the caller. In case the same 'entry' is
150 * passed to this function again, the entry must also be uninitialized before
153 nl_ct_dump_next(struct nl_ct_dump_state
*state
, struct ct_dpif_entry
*entry
)
157 memset(entry
, 0, sizeof *entry
);
159 struct nlattr
*attrs
[ARRAY_SIZE(nfnlgrp_conntrack_policy
)];
160 enum nl_ct_event_type type
;
161 uint8_t nfgen_family
;
163 if (!nl_dump_next(&state
->dump
, &buf
, &state
->buf
)) {
167 if (!nl_ct_parse_header_policy(&buf
, &type
, &nfgen_family
, attrs
)) {
171 if (state
->filter_zone
) {
172 uint16_t entry_zone
= attrs
[CTA_ZONE
]
173 ? ntohs(nl_attr_get_be16(attrs
[CTA_ZONE
]))
175 if (entry_zone
!= state
->zone
) {
180 if (nl_ct_attrs_to_ct_dpif_entry(entry
, attrs
, nfgen_family
)) {
184 ct_dpif_entry_uninit(entry
);
185 memset(entry
, 0, sizeof *entry
);
186 /* Ignore the failed entry and get the next one. */
193 /* End a conntrack netlink dump. */
195 nl_ct_dump_done(struct nl_ct_dump_state
*state
)
197 int error
= nl_dump_done(&state
->dump
);
199 ofpbuf_uninit(&state
->buf
);
204 /* Format conntrack event 'entry' of 'type' to 'ds'. */
206 nl_ct_format_event_entry(const struct ct_dpif_entry
*entry
,
207 enum nl_ct_event_type type
, struct ds
*ds
,
208 bool verbose
, bool print_stats
)
210 ds_put_format(ds
, "%s ",
211 type
== NL_CT_EVENT_NEW
? "NEW"
212 : type
== NL_CT_EVENT_UPDATE
? "UPDATE"
213 : type
== NL_CT_EVENT_DELETE
? "DELETE"
215 ct_dpif_format_entry(entry
, ds
, verbose
, print_stats
);
224 ofpbuf_init(&buf
, NL_DUMP_BUFSIZE
);
226 nl_msg_put_nfgenmsg(&buf
, 0, AF_UNSPEC
, NFNL_SUBSYS_CTNETLINK
,
227 IPCTNL_MSG_CT_DELETE
, NLM_F_REQUEST
);
229 err
= nl_transact(NETLINK_NETFILTER
, &buf
, NULL
);
232 /* Expectations are flushed automatically, because they do not
233 * have a master connection anymore */
239 nl_ct_flush_zone(uint16_t flush_zone
)
241 /* Apparently, there's no netlink interface to flush a specific zone.
242 * This code dumps every connection, checks the zone and eventually
245 * This is race-prone, but it is better than using shell scripts. */
248 struct ofpbuf buf
, reply
, delete;
250 ofpbuf_init(&buf
, NL_DUMP_BUFSIZE
);
251 ofpbuf_init(&delete, NL_DUMP_BUFSIZE
);
253 nl_msg_put_nfgenmsg(&buf
, 0, AF_UNSPEC
, NFNL_SUBSYS_CTNETLINK
,
254 IPCTNL_MSG_CT_GET
, NLM_F_REQUEST
);
255 nl_dump_start(&dump
, NETLINK_NETFILTER
, &buf
);
259 struct nlattr
*attrs
[ARRAY_SIZE(nfnlgrp_conntrack_policy
)];
260 enum nl_ct_event_type event_type
;
261 uint8_t nfgen_family
;
264 if (!nl_dump_next(&dump
, &reply
, &buf
)) {
268 if (!nl_ct_parse_header_policy(&reply
, &event_type
, &nfgen_family
,
273 if (attrs
[CTA_ZONE
]) {
274 zone
= ntohs(nl_attr_get_be16(attrs
[CTA_ZONE
]));
277 if (zone
!= flush_zone
) {
278 /* The entry is not in the zone we're flushing. */
281 nl_msg_put_nfgenmsg(&delete, 0, nfgen_family
, NFNL_SUBSYS_CTNETLINK
,
282 IPCTNL_MSG_CT_DELETE
, NLM_F_REQUEST
);
284 nl_msg_put_be16(&delete, CTA_ZONE
, htons(zone
));
285 nl_msg_put_unspec(&delete, CTA_TUPLE_ORIG
, attrs
[CTA_TUPLE_ORIG
] + 1,
286 attrs
[CTA_TUPLE_ORIG
]->nla_len
- NLA_HDRLEN
);
287 nl_msg_put_unspec(&delete, CTA_ID
, attrs
[CTA_ID
] + 1,
288 attrs
[CTA_ID
]->nla_len
- NLA_HDRLEN
);
289 nl_transact(NETLINK_NETFILTER
, &delete, NULL
);
290 ofpbuf_clear(&delete);
295 ofpbuf_uninit(&delete);
298 /* Expectations are flushed automatically, because they do not
299 * have a master connection anymore */
303 /* Conntrack netlink parsing. */
306 nl_ct_parse_counters(struct nlattr
*nla
, struct ct_dpif_counters
*counters
)
308 static const struct nl_policy policy
[] = {
309 [CTA_COUNTERS_PACKETS
] = { .type
= NL_A_BE64
, .optional
= false },
310 [CTA_COUNTERS_BYTES
] = { .type
= NL_A_BE64
, .optional
= false },
312 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
315 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
319 = ntohll(nl_attr_get_be64(attrs
[CTA_COUNTERS_PACKETS
]));
320 counters
->bytes
= ntohll(nl_attr_get_be64(attrs
[CTA_COUNTERS_BYTES
]));
322 VLOG_ERR_RL(&rl
, "Could not parse nested counters. "
323 "Possibly incompatible Linux kernel version.");
330 nl_ct_parse_timestamp(struct nlattr
*nla
, struct ct_dpif_timestamp
*timestamp
)
332 static const struct nl_policy policy
[] = {
333 [CTA_TIMESTAMP_START
] = { .type
= NL_A_BE64
, .optional
= false },
334 [CTA_TIMESTAMP_STOP
] = { .type
= NL_A_BE64
, .optional
= true },
336 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
339 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
343 = ntohll(nl_attr_get_be64(attrs
[CTA_TIMESTAMP_START
]));
344 if (attrs
[CTA_TIMESTAMP_STOP
]) {
346 = ntohll(nl_attr_get_be64(attrs
[CTA_TIMESTAMP_STOP
]));
349 VLOG_ERR_RL(&rl
, "Could not parse nested timestamp. "
350 "Possibly incompatible Linux kernel version.");
357 nl_ct_parse_tuple_ip(struct nlattr
*nla
, struct ct_dpif_tuple
*tuple
)
359 static const struct nl_policy policy
[] = {
360 [CTA_IP_V4_SRC
] = { .type
= NL_A_BE32
, .optional
= true },
361 [CTA_IP_V4_DST
] = { .type
= NL_A_BE32
, .optional
= true },
362 [CTA_IP_V6_SRC
] = { NL_POLICY_FOR(struct in6_addr
), .optional
= true },
363 [CTA_IP_V6_DST
] = { NL_POLICY_FOR(struct in6_addr
), .optional
= true },
365 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
368 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
371 if (tuple
->l3_type
== AF_INET
) {
372 if (attrs
[CTA_IP_V4_SRC
]) {
373 tuple
->src
.ip
= nl_attr_get_be32(attrs
[CTA_IP_V4_SRC
]);
375 if (attrs
[CTA_IP_V4_DST
]) {
376 tuple
->dst
.ip
= nl_attr_get_be32(attrs
[CTA_IP_V4_DST
]);
378 } else if (tuple
->l3_type
== AF_INET6
) {
379 if (attrs
[CTA_IP_V6_SRC
]) {
380 memcpy(&tuple
->src
.in6
, nl_attr_get(attrs
[CTA_IP_V6_SRC
]),
381 sizeof tuple
->src
.in6
);
383 if (attrs
[CTA_IP_V6_DST
]) {
384 memcpy(&tuple
->dst
.in6
, nl_attr_get(attrs
[CTA_IP_V6_DST
]),
385 sizeof tuple
->dst
.in6
);
388 VLOG_WARN_RL(&rl
, "Unsupported IP protocol: %u.", tuple
->l3_type
);
392 VLOG_ERR_RL(&rl
, "Could not parse nested tuple IP options. "
393 "Possibly incompatible Linux kernel version.");
400 nl_ct_parse_tuple_proto(struct nlattr
*nla
, struct ct_dpif_tuple
*tuple
)
402 static const struct nl_policy policy
[] = {
403 [CTA_PROTO_NUM
] = { .type
= NL_A_U8
, .optional
= false },
404 [CTA_PROTO_SRC_PORT
] = { .type
= NL_A_BE16
, .optional
= true },
405 [CTA_PROTO_DST_PORT
] = { .type
= NL_A_BE16
, .optional
= true },
406 [CTA_PROTO_ICMP_ID
] = { .type
= NL_A_BE16
, .optional
= true },
407 [CTA_PROTO_ICMP_TYPE
] = { .type
= NL_A_U8
, .optional
= true },
408 [CTA_PROTO_ICMP_CODE
] = { .type
= NL_A_U8
, .optional
= true },
409 [CTA_PROTO_ICMPV6_ID
] = { .type
= NL_A_BE16
, .optional
= true },
410 [CTA_PROTO_ICMPV6_TYPE
] = { .type
= NL_A_U8
, .optional
= true },
411 [CTA_PROTO_ICMPV6_CODE
] = { .type
= NL_A_U8
, .optional
= true },
413 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
416 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
419 tuple
->ip_proto
= nl_attr_get_u8(attrs
[CTA_PROTO_NUM
]);
421 if (tuple
->l3_type
== AF_INET
&& tuple
->ip_proto
== IPPROTO_ICMP
) {
422 if (!attrs
[CTA_PROTO_ICMP_ID
] || !attrs
[CTA_PROTO_ICMP_TYPE
]
423 || !attrs
[CTA_PROTO_ICMP_CODE
]) {
424 VLOG_ERR_RL(&rl
, "Tuple ICMP data missing.");
427 tuple
->icmp_id
= nl_attr_get_be16(attrs
[CTA_PROTO_ICMP_ID
]);
428 tuple
->icmp_type
= nl_attr_get_u8(attrs
[CTA_PROTO_ICMP_TYPE
]);
429 tuple
->icmp_code
= nl_attr_get_u8(attrs
[CTA_PROTO_ICMP_CODE
]);
430 } else if (tuple
->l3_type
== AF_INET6
&&
431 tuple
->ip_proto
== IPPROTO_ICMPV6
) {
432 if (!attrs
[CTA_PROTO_ICMPV6_ID
] || !attrs
[CTA_PROTO_ICMPV6_TYPE
]
433 || !attrs
[CTA_PROTO_ICMPV6_CODE
]) {
434 VLOG_ERR_RL(&rl
, "Tuple ICMPv6 data missing.");
437 tuple
->icmp_id
= nl_attr_get_be16(attrs
[CTA_PROTO_ICMPV6_ID
]);
438 tuple
->icmp_type
= nl_attr_get_u8(attrs
[CTA_PROTO_ICMPV6_TYPE
]);
439 tuple
->icmp_code
= nl_attr_get_u8(attrs
[CTA_PROTO_ICMPV6_CODE
]);
440 } else if (attrs
[CTA_PROTO_SRC_PORT
] && attrs
[CTA_PROTO_DST_PORT
]) {
441 tuple
->src_port
= nl_attr_get_be16(attrs
[CTA_PROTO_SRC_PORT
]);
442 tuple
->dst_port
= nl_attr_get_be16(attrs
[CTA_PROTO_DST_PORT
]);
444 /* Unsupported IPPROTO and no ports, leave them zeroed.
445 * We have parsed the ip_proto, so this is not a total failure. */
446 VLOG_INFO_RL(&rl
, "Unsupported L4 protocol: %u.", tuple
->ip_proto
);
449 VLOG_ERR_RL(&rl
, "Could not parse nested tuple protocol options. "
450 "Possibly incompatible Linux kernel version.");
457 nl_ct_parse_tuple(struct nlattr
*nla
, struct ct_dpif_tuple
*tuple
,
460 static const struct nl_policy policy
[] = {
461 [CTA_TUPLE_IP
] = { .type
= NL_A_NESTED
, .optional
= false },
462 [CTA_TUPLE_PROTO
] = { .type
= NL_A_NESTED
, .optional
= false },
464 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
467 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
469 memset(tuple
, 0, sizeof *tuple
);
472 tuple
->l3_type
= l3_type
;
474 if (!nl_ct_parse_tuple_ip(attrs
[CTA_TUPLE_IP
], tuple
)
475 || !nl_ct_parse_tuple_proto(attrs
[CTA_TUPLE_PROTO
], tuple
)) {
479 ct_dpif_format_tuple(&ds
, tuple
, true);
481 VLOG_ERR_RL(&rl
, "Failed to parse tuple: %s", ds_cstr(&ds
));
484 memset(tuple
, 0, sizeof *tuple
);
488 VLOG_ERR_RL(&rl
, "Could not parse nested tuple options. "
489 "Possibly incompatible Linux kernel version.");
495 /* Translate netlink TCP state to CT_DPIF_TCP state. */
497 nl_ct_tcp_state_to_dpif(uint8_t state
)
500 case TCP_CONNTRACK_NONE
:
501 return CT_DPIF_TCPS_CLOSED
;
502 case TCP_CONNTRACK_SYN_SENT
:
503 return CT_DPIF_TCPS_SYN_SENT
;
504 case TCP_CONNTRACK_SYN_SENT2
:
505 return CT_DPIF_TCPS_SYN_SENT
;
506 case TCP_CONNTRACK_SYN_RECV
:
507 return CT_DPIF_TCPS_SYN_RECV
;
508 case TCP_CONNTRACK_ESTABLISHED
:
509 return CT_DPIF_TCPS_ESTABLISHED
;
510 case TCP_CONNTRACK_FIN_WAIT
:
511 return CT_DPIF_TCPS_FIN_WAIT_1
;
512 case TCP_CONNTRACK_CLOSE_WAIT
:
513 return CT_DPIF_TCPS_CLOSE_WAIT
;
514 case TCP_CONNTRACK_LAST_ACK
:
515 return CT_DPIF_TCPS_LAST_ACK
;
516 case TCP_CONNTRACK_TIME_WAIT
:
517 return CT_DPIF_TCPS_TIME_WAIT
;
518 case TCP_CONNTRACK_CLOSE
:
519 return CT_DPIF_TCPS_CLOSING
;
521 return CT_DPIF_TCPS_CLOSED
;
526 ip_ct_tcp_flags_to_dpif(uint8_t flags
)
529 #define CT_DPIF_TCP_FLAG(FLAG) \
530 ret |= (flags & IP_CT_TCP_FLAG_##FLAG) ? CT_DPIF_TCPF_##FLAG : 0;
532 #undef CT_DPIF_STATUS_FLAG
537 nl_ct_parse_protoinfo_tcp(struct nlattr
*nla
,
538 struct ct_dpif_protoinfo
*protoinfo
)
540 static const struct nl_policy policy
[] = {
541 [CTA_PROTOINFO_TCP_STATE
] = { .type
= NL_A_U8
, .optional
= false },
542 [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL
] = { .type
= NL_A_U8
,
544 [CTA_PROTOINFO_TCP_WSCALE_REPLY
] = { .type
= NL_A_U8
,
546 [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL
] = { .type
= NL_A_U16
,
548 [CTA_PROTOINFO_TCP_FLAGS_REPLY
] = { .type
= NL_A_U16
,
551 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
554 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
557 const struct nf_ct_tcp_flags
*flags_orig
, *flags_reply
;
559 protoinfo
->proto
= IPPROTO_TCP
;
560 state
= nl_ct_tcp_state_to_dpif(
561 nl_attr_get_u8(attrs
[CTA_PROTOINFO_TCP_STATE
]));
562 /* The connection tracker keeps only one tcp state for the
563 * connection, but our structures store a separate state for
564 * each endpoint. Here we duplicate the state. */
565 protoinfo
->tcp
.state_orig
= protoinfo
->tcp
.state_reply
= state
;
566 protoinfo
->tcp
.wscale_orig
= nl_attr_get_u8(
567 attrs
[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL
]);
568 protoinfo
->tcp
.wscale_reply
= nl_attr_get_u8(
569 attrs
[CTA_PROTOINFO_TCP_WSCALE_REPLY
]);
571 nl_attr_get_unspec(attrs
[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL
],
573 protoinfo
->tcp
.flags_orig
=
574 ip_ct_tcp_flags_to_dpif(flags_orig
->flags
);
576 nl_attr_get_unspec(attrs
[CTA_PROTOINFO_TCP_FLAGS_REPLY
],
577 sizeof *flags_reply
);
578 protoinfo
->tcp
.flags_reply
=
579 ip_ct_tcp_flags_to_dpif(flags_reply
->flags
);
581 VLOG_ERR_RL(&rl
, "Could not parse nested TCP protoinfo options. "
582 "Possibly incompatible Linux kernel version.");
589 nl_ct_parse_protoinfo(struct nlattr
*nla
, struct ct_dpif_protoinfo
*protoinfo
)
591 /* These are mutually exclusive. */
592 static const struct nl_policy policy
[] = {
593 [CTA_PROTOINFO_TCP
] = { .type
= NL_A_NESTED
, .optional
= true },
594 [CTA_PROTOINFO_SCTP
] = { .type
= NL_A_NESTED
, .optional
= true },
596 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
599 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
601 memset(protoinfo
, 0, sizeof *protoinfo
);
604 if (attrs
[CTA_PROTOINFO_TCP
]) {
605 parsed
= nl_ct_parse_protoinfo_tcp(attrs
[CTA_PROTOINFO_TCP
],
607 } else if (attrs
[CTA_PROTOINFO_SCTP
]) {
608 VLOG_WARN_RL(&rl
, "SCTP protoinfo not yet supported!");
610 VLOG_WARN_RL(&rl
, "Empty protoinfo!");
613 VLOG_ERR_RL(&rl
, "Could not parse nested protoinfo options. "
614 "Possibly incompatible Linux kernel version.");
621 nl_ct_parse_helper(struct nlattr
*nla
, struct ct_dpif_helper
*helper
)
623 static const struct nl_policy policy
[] = {
624 [CTA_HELP_NAME
] = { .type
= NL_A_STRING
, .optional
= false },
626 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
629 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
631 memset(helper
, 0, sizeof *helper
);
634 helper
->name
= xstrdup(nl_attr_get_string(attrs
[CTA_HELP_NAME
]));
636 VLOG_ERR_RL(&rl
, "Could not parse nested helper options. "
637 "Possibly incompatible Linux kernel version.");
643 /* Translate netlink entry status flags to CT_DPIF_TCP status flags. */
645 ips_status_to_dpif_flags(uint32_t status
)
648 #define CT_DPIF_STATUS_FLAG(FLAG) \
649 ret |= (status & IPS_##FLAG) ? CT_DPIF_STATUS_##FLAG : 0;
651 #undef CT_DPIF_STATUS_FLAG
656 nl_ct_parse_header_policy(struct ofpbuf
*buf
,
657 enum nl_ct_event_type
*event_type
,
658 uint8_t *nfgen_family
,
659 struct nlattr
*attrs
[ARRAY_SIZE(nfnlgrp_conntrack_policy
)])
661 struct nlmsghdr
*nlh
;
662 struct nfgenmsg
*nfm
;
665 nlh
= ofpbuf_at(buf
, 0, NLMSG_HDRLEN
);
666 nfm
= ofpbuf_at(buf
, NLMSG_HDRLEN
, sizeof *nfm
);
668 VLOG_ERR_RL(&rl
, "Received bad nfnl message (no nfgenmsg).");
671 if (NFNL_SUBSYS_ID(nlh
->nlmsg_type
) != NFNL_SUBSYS_CTNETLINK
) {
672 VLOG_ERR_RL(&rl
, "Received non-conntrack message (subsystem: %u).",
673 NFNL_SUBSYS_ID(nlh
->nlmsg_type
));
676 if (nfm
->version
!= NFNETLINK_V0
) {
677 VLOG_ERR_RL(&rl
, "Received unsupported nfnetlink version (%u).",
678 NFNL_MSG_TYPE(nfm
->version
));
682 if (!nl_policy_parse(buf
, NLMSG_HDRLEN
+ sizeof *nfm
,
683 nfnlgrp_conntrack_policy
, attrs
,
684 ARRAY_SIZE(nfnlgrp_conntrack_policy
))) {
685 VLOG_ERR_RL(&rl
, "Received bad nfnl message (policy).");
689 type
= NFNL_MSG_TYPE(nlh
->nlmsg_type
);
690 *nfgen_family
= nfm
->nfgen_family
;
693 case IPCTNL_MSG_CT_NEW
:
694 *event_type
= nlh
->nlmsg_flags
& NLM_F_CREATE
695 ? NL_CT_EVENT_NEW
: NL_CT_EVENT_UPDATE
;
697 case IPCTNL_MSG_CT_DELETE
:
698 *event_type
= NL_CT_EVENT_DELETE
;
701 VLOG_ERR_RL(&rl
, "Can't parse conntrack event type.");
709 nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry
*entry
,
710 struct nlattr
*attrs
[ARRAY_SIZE(nfnlgrp_conntrack_policy
)],
711 uint8_t nfgen_family
)
713 if (!nl_ct_parse_tuple(attrs
[CTA_TUPLE_ORIG
], &entry
->tuple_orig
,
717 if (!nl_ct_parse_tuple(attrs
[CTA_TUPLE_REPLY
], &entry
->tuple_reply
,
721 if (attrs
[CTA_COUNTERS_ORIG
] &&
722 !nl_ct_parse_counters(attrs
[CTA_COUNTERS_ORIG
],
723 &entry
->counters_orig
)) {
726 if (attrs
[CTA_COUNTERS_REPLY
] &&
727 !nl_ct_parse_counters(attrs
[CTA_COUNTERS_REPLY
],
728 &entry
->counters_reply
)) {
731 if (attrs
[CTA_TIMESTAMP
] &&
732 !nl_ct_parse_timestamp(attrs
[CTA_TIMESTAMP
], &entry
->timestamp
)) {
736 entry
->id
= ntohl(nl_attr_get_be32(attrs
[CTA_ID
]));
738 if (attrs
[CTA_ZONE
]) {
739 entry
->zone
= ntohs(nl_attr_get_be16(attrs
[CTA_ZONE
]));
741 if (attrs
[CTA_STATUS
]) {
742 entry
->status
= ips_status_to_dpif_flags(
743 ntohl(nl_attr_get_be32(attrs
[CTA_STATUS
])));
745 if (attrs
[CTA_TIMEOUT
]) {
746 entry
->timeout
= ntohl(nl_attr_get_be32(attrs
[CTA_TIMEOUT
]));
748 if (attrs
[CTA_MARK
]) {
749 entry
->mark
= ntohl(nl_attr_get_be32(attrs
[CTA_MARK
]));
751 if (attrs
[CTA_LABELS
]) {
752 memcpy(&entry
->labels
, nl_attr_get(attrs
[CTA_LABELS
]),
753 MIN(sizeof entry
->labels
, nl_attr_get_size(attrs
[CTA_LABELS
])));
755 if (attrs
[CTA_PROTOINFO
] &&
756 !nl_ct_parse_protoinfo(attrs
[CTA_PROTOINFO
], &entry
->protoinfo
)) {
759 if (attrs
[CTA_HELP
] &&
760 !nl_ct_parse_helper(attrs
[CTA_HELP
], &entry
->helper
)) {
763 if (attrs
[CTA_TUPLE_MASTER
] &&
764 !nl_ct_parse_tuple(attrs
[CTA_TUPLE_MASTER
], &entry
->tuple_master
,
772 nl_ct_parse_entry(struct ofpbuf
*buf
, struct ct_dpif_entry
*entry
,
773 enum nl_ct_event_type
*event_type
)
775 struct nlattr
*attrs
[ARRAY_SIZE(nfnlgrp_conntrack_policy
)];
776 uint8_t nfgen_family
;
778 memset(entry
, 0, sizeof *entry
);
779 if (!nl_ct_parse_header_policy(buf
, event_type
, &nfgen_family
, attrs
)) {
783 if (!nl_ct_attrs_to_ct_dpif_entry(entry
, attrs
, nfgen_family
)) {
784 ct_dpif_entry_uninit(entry
);
785 memset(entry
, 0, sizeof *entry
);
792 /* NetFilter utility functions. */
794 /* Puts a nlmsghdr and nfgenmsg at the beginning of 'msg', which must be
795 * initially empty. 'expected_payload' should be an estimate of the number of
796 * payload bytes to be supplied; if the size of the payload is unknown a value
797 * of 0 is acceptable.
799 * Non-zero 'family' is the address family of items to get (e.g. AF_INET).
801 * 'flags' is a bit-mask that indicates what kind of request is being made. It
802 * is often NLM_F_REQUEST indicating that a request is being made, commonly
803 * or'd with NLM_F_ACK to request an acknowledgement. NLM_F_DUMP flag reguests
804 * a dump of the table.
806 * 'subsystem' is a netfilter subsystem id, e.g., NFNL_SUBSYS_CTNETLINK.
808 * 'cmd' is an enumerated value specific to the 'subsystem'.
810 * Sets the new nlmsghdr's nlmsg_pid field to 0 for now. nl_sock_send() will
811 * fill it in just before sending the message.
813 * nl_msg_put_nlmsghdr() should be used to compose Netlink messages that are
814 * not NetFilter Netlink messages. */
816 nl_msg_put_nfgenmsg(struct ofpbuf
*msg
, size_t expected_payload
,
817 int family
, uint8_t subsystem
, uint8_t cmd
,
820 struct nfgenmsg
*nfm
;
822 nl_msg_put_nlmsghdr(msg
, sizeof *nfm
+ expected_payload
,
823 subsystem
<< 8 | cmd
, flags
);
824 ovs_assert(msg
->size
== NLMSG_HDRLEN
);
825 nfm
= nl_msg_put_uninit(msg
, sizeof *nfm
);
826 nfm
->nfgen_family
= family
;
827 nfm
->version
= NFNETLINK_V0
;