2 * Copyright (c) 2015 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "netlink-conntrack.h"
21 #include <linux/netfilter/nfnetlink.h>
22 #include <linux/netfilter/nfnetlink_conntrack.h>
23 #include <linux/netfilter/nf_conntrack_common.h>
24 #include <linux/netfilter/nf_conntrack_tcp.h>
25 #include <linux/netfilter/nf_conntrack_ftp.h>
26 #include <linux/netfilter/nf_conntrack_sctp.h>
28 #include "byte-order.h"
30 #include "openvswitch/dynamic-string.h"
32 #include "netlink-socket.h"
33 #include "openvswitch/ofpbuf.h"
34 #include "openvswitch/vlog.h"
35 #include "poll-loop.h"
40 VLOG_DEFINE_THIS_MODULE(netlink_conntrack
);
41 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
43 /* This module works only if conntrack modules and features are enabled in the
44 * Linux kernel. This can be done from a root shell like this:
46 * $ modprobe ip_conntrack
47 * $ sysctl -w net.netfilter.nf_conntrack_acct=1
48 * $ sysctl -w net.netfilter.nf_conntrack_timestamp=1
50 * Also, if testing conntrack label feature without conntrack-aware OVS kernel
51 * module, there must be a connlabel rule in iptables for space to be reserved
52 * for the labels (see kernel source connlabel_mt_check()). Such a rule can be
53 * inserted from a root shell like this:
55 * $ iptables -A INPUT -m conntrack -m connlabel \
56 * --ctstate NEW,ESTABLISHED,RELATED --label 127 -j ACCEPT
59 /* Some attributes were introduced in later kernels: with these definitions
60 * we should be able to compile userspace against Linux 2.6.32+. */
62 #define CTA_ZONE (CTA_SECMARK + 1)
63 #define CTA_SECCTX (CTA_SECMARK + 2)
64 #define CTA_TIMESTAMP (CTA_SECMARK + 3)
65 #define CTA_MARK_MASK (CTA_SECMARK + 4)
66 #define CTA_LABELS (CTA_SECMARK + 5)
67 #define CTA_LABELS_MASK (CTA_SECMARK + 6)
69 #define CTA_TIMESTAMP_START 1
70 #define CTA_TIMESTAMP_STOP 2
72 #define IPS_TEMPLATE_BIT 11
73 #define IPS_TEMPLATE (1 << IPS_TEMPLATE_BIT)
75 #define IPS_UNTRACKED_BIT 12
76 #define IPS_UNTRACKED (1 << IPS_UNTRACKED_BIT)
78 static const struct nl_policy nfnlgrp_conntrack_policy
[] = {
79 [CTA_TUPLE_ORIG
] = { .type
= NL_A_NESTED
, .optional
= false },
80 [CTA_TUPLE_REPLY
] = { .type
= NL_A_NESTED
, .optional
= false },
81 [CTA_ZONE
] = { .type
= NL_A_BE16
, .optional
= true },
82 [CTA_STATUS
] = { .type
= NL_A_BE32
, .optional
= false },
83 [CTA_TIMESTAMP
] = { .type
= NL_A_NESTED
, .optional
= true },
84 [CTA_TIMEOUT
] = { .type
= NL_A_BE32
, .optional
= true },
85 [CTA_COUNTERS_ORIG
] = { .type
= NL_A_NESTED
, .optional
= true },
86 [CTA_COUNTERS_REPLY
] = { .type
= NL_A_NESTED
, .optional
= true },
87 [CTA_PROTOINFO
] = { .type
= NL_A_NESTED
, .optional
= true },
88 [CTA_HELP
] = { .type
= NL_A_NESTED
, .optional
= true },
89 [CTA_MARK
] = { .type
= NL_A_BE32
, .optional
= true },
90 [CTA_SECCTX
] = { .type
= NL_A_NESTED
, .optional
= true },
91 [CTA_ID
] = { .type
= NL_A_BE32
, .optional
= false },
92 [CTA_USE
] = { .type
= NL_A_BE32
, .optional
= true },
93 [CTA_TUPLE_MASTER
] = { .type
= NL_A_NESTED
, .optional
= true },
94 [CTA_NAT_SEQ_ADJ_ORIG
] = { .type
= NL_A_NESTED
, .optional
= true },
95 [CTA_NAT_SEQ_ADJ_REPLY
] = { .type
= NL_A_NESTED
, .optional
= true },
96 [CTA_LABELS
] = { .type
= NL_A_UNSPEC
, .optional
= true },
97 /* CTA_NAT_SRC, CTA_NAT_DST, CTA_TIMESTAMP, CTA_MARK_MASK, and
98 * CTA_LABELS_MASK are not received from kernel. */
101 /* Declarations for conntrack netlink dumping. */
102 static void nl_msg_put_nfgenmsg(struct ofpbuf
*msg
, size_t expected_payload
,
103 int family
, uint8_t subsystem
, uint8_t cmd
,
106 static bool nl_ct_parse_header_policy(struct ofpbuf
*buf
,
107 enum nl_ct_event_type
*event_type
,
108 uint8_t *nfgen_family
,
109 struct nlattr
*attrs
[ARRAY_SIZE(nfnlgrp_conntrack_policy
)]);
111 static bool nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry
*entry
,
112 struct nlattr
*attrs
[ARRAY_SIZE(nfnlgrp_conntrack_policy
)],
113 uint8_t nfgen_family
);
115 struct nl_ct_dump_state
{
122 /* Conntrack netlink dumping. */
124 /* Initialize a conntrack netlink dump. */
126 nl_ct_dump_start(struct nl_ct_dump_state
**statep
, const uint16_t *zone
,
129 struct nl_ct_dump_state
*state
;
131 *statep
= state
= xzalloc(sizeof *state
);
132 ofpbuf_init(&state
->buf
, NL_DUMP_BUFSIZE
);
135 state
->filter_zone
= true;
139 nl_msg_put_nfgenmsg(&state
->buf
, 0, AF_UNSPEC
, NFNL_SUBSYS_CTNETLINK
,
140 IPCTNL_MSG_CT_GET
, NLM_F_REQUEST
);
141 nl_dump_start(&state
->dump
, NETLINK_NETFILTER
, &state
->buf
);
142 ofpbuf_clear(&state
->buf
);
144 /* Buckets to store connections are not used. */
150 /* Receive the next 'entry' from the conntrack netlink dump with 'state'.
151 * Returns 'EOF' when no more entries are available, 0 otherwise. 'entry' may
152 * be uninitilized memory on entry, and must be uninitialized with
153 * ct_dpif_entry_uninit() afterwards by the caller. In case the same 'entry' is
154 * passed to this function again, the entry must also be uninitialized before
157 nl_ct_dump_next(struct nl_ct_dump_state
*state
, struct ct_dpif_entry
*entry
)
161 memset(entry
, 0, sizeof *entry
);
163 struct nlattr
*attrs
[ARRAY_SIZE(nfnlgrp_conntrack_policy
)];
164 enum nl_ct_event_type type
;
165 uint8_t nfgen_family
;
167 if (!nl_dump_next(&state
->dump
, &buf
, &state
->buf
)) {
171 if (!nl_ct_parse_header_policy(&buf
, &type
, &nfgen_family
, attrs
)) {
175 if (state
->filter_zone
) {
176 uint16_t entry_zone
= attrs
[CTA_ZONE
]
177 ? ntohs(nl_attr_get_be16(attrs
[CTA_ZONE
]))
179 if (entry_zone
!= state
->zone
) {
184 if (nl_ct_attrs_to_ct_dpif_entry(entry
, attrs
, nfgen_family
)) {
188 ct_dpif_entry_uninit(entry
);
189 memset(entry
, 0, sizeof *entry
);
190 /* Ignore the failed entry and get the next one. */
197 /* End a conntrack netlink dump. */
199 nl_ct_dump_done(struct nl_ct_dump_state
*state
)
201 int error
= nl_dump_done(&state
->dump
);
203 ofpbuf_uninit(&state
->buf
);
208 /* Format conntrack event 'entry' of 'type' to 'ds'. */
210 nl_ct_format_event_entry(const struct ct_dpif_entry
*entry
,
211 enum nl_ct_event_type type
, struct ds
*ds
,
212 bool verbose
, bool print_stats
)
214 ds_put_format(ds
, "%s ",
215 type
== NL_CT_EVENT_NEW
? "NEW"
216 : type
== NL_CT_EVENT_UPDATE
? "UPDATE"
217 : type
== NL_CT_EVENT_DELETE
? "DELETE"
219 ct_dpif_format_entry(entry
, ds
, verbose
, print_stats
);
228 ofpbuf_init(&buf
, NL_DUMP_BUFSIZE
);
230 nl_msg_put_nfgenmsg(&buf
, 0, AF_UNSPEC
, NFNL_SUBSYS_CTNETLINK
,
231 IPCTNL_MSG_CT_DELETE
, NLM_F_REQUEST
);
233 err
= nl_transact(NETLINK_NETFILTER
, &buf
, NULL
);
236 /* Expectations are flushed automatically, because they do not
237 * have a master connection anymore */
244 nl_ct_flush_zone(uint16_t flush_zone
)
246 /* Windows can flush a specific zone */
250 ofpbuf_init(&buf
, NL_DUMP_BUFSIZE
);
252 nl_msg_put_nfgenmsg(&buf
, 0, AF_UNSPEC
, NFNL_SUBSYS_CTNETLINK
,
253 IPCTNL_MSG_CT_DELETE
, NLM_F_REQUEST
);
254 nl_msg_put_be16(&buf
, CTA_ZONE
, flush_zone
);
256 err
= nl_transact(NETLINK_NETFILTER
, &buf
, NULL
);
263 nl_ct_flush_zone(uint16_t flush_zone
)
265 /* Apparently, there's no netlink interface to flush a specific zone.
266 * This code dumps every connection, checks the zone and eventually
269 * This is race-prone, but it is better than using shell scripts. */
272 struct ofpbuf buf
, reply
, delete;
274 ofpbuf_init(&buf
, NL_DUMP_BUFSIZE
);
275 ofpbuf_init(&delete, NL_DUMP_BUFSIZE
);
277 nl_msg_put_nfgenmsg(&buf
, 0, AF_UNSPEC
, NFNL_SUBSYS_CTNETLINK
,
278 IPCTNL_MSG_CT_GET
, NLM_F_REQUEST
);
279 nl_dump_start(&dump
, NETLINK_NETFILTER
, &buf
);
283 struct nlattr
*attrs
[ARRAY_SIZE(nfnlgrp_conntrack_policy
)];
284 enum nl_ct_event_type event_type
;
285 uint8_t nfgen_family
;
288 if (!nl_dump_next(&dump
, &reply
, &buf
)) {
292 if (!nl_ct_parse_header_policy(&reply
, &event_type
, &nfgen_family
,
297 if (attrs
[CTA_ZONE
]) {
298 zone
= ntohs(nl_attr_get_be16(attrs
[CTA_ZONE
]));
301 if (zone
!= flush_zone
) {
302 /* The entry is not in the zone we're flushing. */
305 nl_msg_put_nfgenmsg(&delete, 0, nfgen_family
, NFNL_SUBSYS_CTNETLINK
,
306 IPCTNL_MSG_CT_DELETE
, NLM_F_REQUEST
);
308 nl_msg_put_be16(&delete, CTA_ZONE
, htons(zone
));
309 nl_msg_put_unspec(&delete, CTA_TUPLE_ORIG
, attrs
[CTA_TUPLE_ORIG
] + 1,
310 attrs
[CTA_TUPLE_ORIG
]->nla_len
- NLA_HDRLEN
);
311 nl_msg_put_unspec(&delete, CTA_ID
, attrs
[CTA_ID
] + 1,
312 attrs
[CTA_ID
]->nla_len
- NLA_HDRLEN
);
313 nl_transact(NETLINK_NETFILTER
, &delete, NULL
);
314 ofpbuf_clear(&delete);
319 ofpbuf_uninit(&delete);
322 /* Expectations are flushed automatically, because they do not
323 * have a master connection anymore */
328 /* Conntrack netlink parsing. */
331 nl_ct_parse_counters(struct nlattr
*nla
, struct ct_dpif_counters
*counters
)
333 static const struct nl_policy policy
[] = {
334 [CTA_COUNTERS_PACKETS
] = { .type
= NL_A_BE64
, .optional
= false },
335 [CTA_COUNTERS_BYTES
] = { .type
= NL_A_BE64
, .optional
= false },
337 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
340 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
344 = ntohll(nl_attr_get_be64(attrs
[CTA_COUNTERS_PACKETS
]));
345 counters
->bytes
= ntohll(nl_attr_get_be64(attrs
[CTA_COUNTERS_BYTES
]));
347 VLOG_ERR_RL(&rl
, "Could not parse nested counters. "
348 "Possibly incompatible Linux kernel version.");
355 nl_ct_parse_timestamp(struct nlattr
*nla
, struct ct_dpif_timestamp
*timestamp
)
357 static const struct nl_policy policy
[] = {
358 [CTA_TIMESTAMP_START
] = { .type
= NL_A_BE64
, .optional
= false },
359 [CTA_TIMESTAMP_STOP
] = { .type
= NL_A_BE64
, .optional
= true },
361 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
364 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
368 = ntohll(nl_attr_get_be64(attrs
[CTA_TIMESTAMP_START
]));
369 if (attrs
[CTA_TIMESTAMP_STOP
]) {
371 = ntohll(nl_attr_get_be64(attrs
[CTA_TIMESTAMP_STOP
]));
374 VLOG_ERR_RL(&rl
, "Could not parse nested timestamp. "
375 "Possibly incompatible Linux kernel version.");
382 nl_ct_parse_tuple_ip(struct nlattr
*nla
, struct ct_dpif_tuple
*tuple
)
384 static const struct nl_policy policy
[] = {
385 [CTA_IP_V4_SRC
] = { .type
= NL_A_BE32
, .optional
= true },
386 [CTA_IP_V4_DST
] = { .type
= NL_A_BE32
, .optional
= true },
387 [CTA_IP_V6_SRC
] = { NL_POLICY_FOR(struct in6_addr
), .optional
= true },
388 [CTA_IP_V6_DST
] = { NL_POLICY_FOR(struct in6_addr
), .optional
= true },
390 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
393 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
396 if (tuple
->l3_type
== AF_INET
) {
397 if (attrs
[CTA_IP_V4_SRC
]) {
398 tuple
->src
.ip
= nl_attr_get_be32(attrs
[CTA_IP_V4_SRC
]);
400 if (attrs
[CTA_IP_V4_DST
]) {
401 tuple
->dst
.ip
= nl_attr_get_be32(attrs
[CTA_IP_V4_DST
]);
403 } else if (tuple
->l3_type
== AF_INET6
) {
404 if (attrs
[CTA_IP_V6_SRC
]) {
405 memcpy(&tuple
->src
.in6
, nl_attr_get(attrs
[CTA_IP_V6_SRC
]),
406 sizeof tuple
->src
.in6
);
408 if (attrs
[CTA_IP_V6_DST
]) {
409 memcpy(&tuple
->dst
.in6
, nl_attr_get(attrs
[CTA_IP_V6_DST
]),
410 sizeof tuple
->dst
.in6
);
413 VLOG_WARN_RL(&rl
, "Unsupported IP protocol: %u.", tuple
->l3_type
);
417 VLOG_ERR_RL(&rl
, "Could not parse nested tuple IP options. "
418 "Possibly incompatible Linux kernel version.");
425 nl_ct_parse_tuple_proto(struct nlattr
*nla
, struct ct_dpif_tuple
*tuple
)
427 static const struct nl_policy policy
[] = {
428 [CTA_PROTO_NUM
] = { .type
= NL_A_U8
, .optional
= false },
429 [CTA_PROTO_SRC_PORT
] = { .type
= NL_A_BE16
, .optional
= true },
430 [CTA_PROTO_DST_PORT
] = { .type
= NL_A_BE16
, .optional
= true },
431 [CTA_PROTO_ICMP_ID
] = { .type
= NL_A_BE16
, .optional
= true },
432 [CTA_PROTO_ICMP_TYPE
] = { .type
= NL_A_U8
, .optional
= true },
433 [CTA_PROTO_ICMP_CODE
] = { .type
= NL_A_U8
, .optional
= true },
434 [CTA_PROTO_ICMPV6_ID
] = { .type
= NL_A_BE16
, .optional
= true },
435 [CTA_PROTO_ICMPV6_TYPE
] = { .type
= NL_A_U8
, .optional
= true },
436 [CTA_PROTO_ICMPV6_CODE
] = { .type
= NL_A_U8
, .optional
= true },
438 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
441 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
444 tuple
->ip_proto
= nl_attr_get_u8(attrs
[CTA_PROTO_NUM
]);
446 if (tuple
->l3_type
== AF_INET
&& tuple
->ip_proto
== IPPROTO_ICMP
) {
447 if (!attrs
[CTA_PROTO_ICMP_ID
] || !attrs
[CTA_PROTO_ICMP_TYPE
]
448 || !attrs
[CTA_PROTO_ICMP_CODE
]) {
449 VLOG_ERR_RL(&rl
, "Tuple ICMP data missing.");
452 tuple
->icmp_id
= nl_attr_get_be16(attrs
[CTA_PROTO_ICMP_ID
]);
453 tuple
->icmp_type
= nl_attr_get_u8(attrs
[CTA_PROTO_ICMP_TYPE
]);
454 tuple
->icmp_code
= nl_attr_get_u8(attrs
[CTA_PROTO_ICMP_CODE
]);
455 } else if (tuple
->l3_type
== AF_INET6
&&
456 tuple
->ip_proto
== IPPROTO_ICMPV6
) {
457 if (!attrs
[CTA_PROTO_ICMPV6_ID
] || !attrs
[CTA_PROTO_ICMPV6_TYPE
]
458 || !attrs
[CTA_PROTO_ICMPV6_CODE
]) {
459 VLOG_ERR_RL(&rl
, "Tuple ICMPv6 data missing.");
462 tuple
->icmp_id
= nl_attr_get_be16(attrs
[CTA_PROTO_ICMPV6_ID
]);
463 tuple
->icmp_type
= nl_attr_get_u8(attrs
[CTA_PROTO_ICMPV6_TYPE
]);
464 tuple
->icmp_code
= nl_attr_get_u8(attrs
[CTA_PROTO_ICMPV6_CODE
]);
465 } else if (attrs
[CTA_PROTO_SRC_PORT
] && attrs
[CTA_PROTO_DST_PORT
]) {
466 tuple
->src_port
= nl_attr_get_be16(attrs
[CTA_PROTO_SRC_PORT
]);
467 tuple
->dst_port
= nl_attr_get_be16(attrs
[CTA_PROTO_DST_PORT
]);
469 /* Unsupported IPPROTO and no ports, leave them zeroed.
470 * We have parsed the ip_proto, so this is not a failure. */
471 VLOG_DBG_RL(&rl
, "Unsupported L4 protocol: %u.", tuple
->ip_proto
);
474 VLOG_ERR_RL(&rl
, "Could not parse nested tuple protocol options. "
475 "Possibly incompatible Linux kernel version.");
482 nl_ct_parse_tuple(struct nlattr
*nla
, struct ct_dpif_tuple
*tuple
,
485 static const struct nl_policy policy
[] = {
486 [CTA_TUPLE_IP
] = { .type
= NL_A_NESTED
, .optional
= false },
487 [CTA_TUPLE_PROTO
] = { .type
= NL_A_NESTED
, .optional
= false },
489 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
492 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
494 memset(tuple
, 0, sizeof *tuple
);
497 tuple
->l3_type
= l3_type
;
499 if (!nl_ct_parse_tuple_ip(attrs
[CTA_TUPLE_IP
], tuple
)
500 || !nl_ct_parse_tuple_proto(attrs
[CTA_TUPLE_PROTO
], tuple
)) {
504 ct_dpif_format_tuple(&ds
, tuple
);
506 VLOG_ERR_RL(&rl
, "Failed to parse tuple: %s", ds_cstr(&ds
));
509 memset(tuple
, 0, sizeof *tuple
);
513 VLOG_ERR_RL(&rl
, "Could not parse nested tuple options. "
514 "Possibly incompatible Linux kernel version.");
520 /* Translate netlink TCP state to CT_DPIF_TCP state. */
522 nl_ct_tcp_state_to_dpif(uint8_t state
)
525 /* Windows currently sends up CT_DPIF_TCP state */
529 case TCP_CONNTRACK_NONE
:
530 return CT_DPIF_TCPS_CLOSED
;
531 case TCP_CONNTRACK_SYN_SENT
:
532 return CT_DPIF_TCPS_SYN_SENT
;
533 case TCP_CONNTRACK_SYN_SENT2
:
534 return CT_DPIF_TCPS_SYN_SENT
;
535 case TCP_CONNTRACK_SYN_RECV
:
536 return CT_DPIF_TCPS_SYN_RECV
;
537 case TCP_CONNTRACK_ESTABLISHED
:
538 return CT_DPIF_TCPS_ESTABLISHED
;
539 case TCP_CONNTRACK_FIN_WAIT
:
540 return CT_DPIF_TCPS_FIN_WAIT_1
;
541 case TCP_CONNTRACK_CLOSE_WAIT
:
542 return CT_DPIF_TCPS_CLOSE_WAIT
;
543 case TCP_CONNTRACK_LAST_ACK
:
544 return CT_DPIF_TCPS_LAST_ACK
;
545 case TCP_CONNTRACK_TIME_WAIT
:
546 return CT_DPIF_TCPS_TIME_WAIT
;
547 case TCP_CONNTRACK_CLOSE
:
548 return CT_DPIF_TCPS_CLOSING
;
550 return CT_DPIF_TCPS_CLOSED
;
556 ip_ct_tcp_flags_to_dpif(uint8_t flags
)
559 /* Windows currently sends up CT_DPIF_TCP flags */
563 #define CT_DPIF_TCP_FLAG(FLAG) \
564 ret |= (flags & IP_CT_TCP_FLAG_##FLAG) ? CT_DPIF_TCPF_##FLAG : 0;
566 #undef CT_DPIF_STATUS_FLAG
572 nl_ct_parse_protoinfo_tcp(struct nlattr
*nla
,
573 struct ct_dpif_protoinfo
*protoinfo
)
575 static const struct nl_policy policy
[] = {
576 [CTA_PROTOINFO_TCP_STATE
] = { .type
= NL_A_U8
, .optional
= false },
577 [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL
] = { .type
= NL_A_U8
,
579 [CTA_PROTOINFO_TCP_WSCALE_REPLY
] = { .type
= NL_A_U8
,
581 [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL
] = { .type
= NL_A_U16
,
583 [CTA_PROTOINFO_TCP_FLAGS_REPLY
] = { .type
= NL_A_U16
,
586 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
589 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
592 const struct nf_ct_tcp_flags
*flags_orig
, *flags_reply
;
594 protoinfo
->proto
= IPPROTO_TCP
;
595 state
= nl_ct_tcp_state_to_dpif(
596 nl_attr_get_u8(attrs
[CTA_PROTOINFO_TCP_STATE
]));
597 /* The connection tracker keeps only one tcp state for the
598 * connection, but our structures store a separate state for
599 * each endpoint. Here we duplicate the state. */
600 protoinfo
->tcp
.state_orig
= protoinfo
->tcp
.state_reply
= state
;
601 protoinfo
->tcp
.wscale_orig
= nl_attr_get_u8(
602 attrs
[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL
]);
603 protoinfo
->tcp
.wscale_reply
= nl_attr_get_u8(
604 attrs
[CTA_PROTOINFO_TCP_WSCALE_REPLY
]);
606 nl_attr_get_unspec(attrs
[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL
],
608 protoinfo
->tcp
.flags_orig
=
609 ip_ct_tcp_flags_to_dpif(flags_orig
->flags
);
611 nl_attr_get_unspec(attrs
[CTA_PROTOINFO_TCP_FLAGS_REPLY
],
612 sizeof *flags_reply
);
613 protoinfo
->tcp
.flags_reply
=
614 ip_ct_tcp_flags_to_dpif(flags_reply
->flags
);
616 VLOG_ERR_RL(&rl
, "Could not parse nested TCP protoinfo options. "
617 "Possibly incompatible Linux kernel version.");
624 nl_ct_parse_protoinfo(struct nlattr
*nla
, struct ct_dpif_protoinfo
*protoinfo
)
626 /* These are mutually exclusive. */
627 static const struct nl_policy policy
[] = {
628 [CTA_PROTOINFO_TCP
] = { .type
= NL_A_NESTED
, .optional
= true },
629 [CTA_PROTOINFO_SCTP
] = { .type
= NL_A_NESTED
, .optional
= true },
631 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
634 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
636 memset(protoinfo
, 0, sizeof *protoinfo
);
639 if (attrs
[CTA_PROTOINFO_TCP
]) {
640 parsed
= nl_ct_parse_protoinfo_tcp(attrs
[CTA_PROTOINFO_TCP
],
642 } else if (attrs
[CTA_PROTOINFO_SCTP
]) {
643 VLOG_WARN_RL(&rl
, "SCTP protoinfo not yet supported!");
645 VLOG_WARN_RL(&rl
, "Empty protoinfo!");
648 VLOG_ERR_RL(&rl
, "Could not parse nested protoinfo options. "
649 "Possibly incompatible Linux kernel version.");
656 nl_ct_parse_helper(struct nlattr
*nla
, struct ct_dpif_helper
*helper
)
658 static const struct nl_policy policy
[] = {
659 [CTA_HELP_NAME
] = { .type
= NL_A_STRING
, .optional
= false },
661 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
664 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
666 memset(helper
, 0, sizeof *helper
);
669 helper
->name
= xstrdup(nl_attr_get_string(attrs
[CTA_HELP_NAME
]));
671 VLOG_ERR_RL(&rl
, "Could not parse nested helper options. "
672 "Possibly incompatible Linux kernel version.");
678 /* Translate netlink entry status flags to CT_DPIF_TCP status flags. */
680 ips_status_to_dpif_flags(uint32_t status
)
683 #define CT_DPIF_STATUS_FLAG(FLAG) \
684 ret |= (status & IPS_##FLAG) ? CT_DPIF_STATUS_##FLAG : 0;
686 #undef CT_DPIF_STATUS_FLAG
691 nl_ct_parse_header_policy(struct ofpbuf
*buf
,
692 enum nl_ct_event_type
*event_type
,
693 uint8_t *nfgen_family
,
694 struct nlattr
*attrs
[ARRAY_SIZE(nfnlgrp_conntrack_policy
)])
696 struct nlmsghdr
*nlh
;
697 struct nfgenmsg
*nfm
;
700 nlh
= ofpbuf_at(buf
, 0, NLMSG_HDRLEN
);
701 nfm
= ofpbuf_at(buf
, NLMSG_HDRLEN
, sizeof *nfm
);
703 VLOG_ERR_RL(&rl
, "Received bad nfnl message (no nfgenmsg).");
706 if (NFNL_SUBSYS_ID(nlh
->nlmsg_type
) != NFNL_SUBSYS_CTNETLINK
) {
707 VLOG_ERR_RL(&rl
, "Received non-conntrack message (subsystem: %u).",
708 NFNL_SUBSYS_ID(nlh
->nlmsg_type
));
711 if (nfm
->version
!= NFNETLINK_V0
) {
712 VLOG_ERR_RL(&rl
, "Received unsupported nfnetlink version (%u).",
713 NFNL_MSG_TYPE(nfm
->version
));
717 if (!nl_policy_parse(buf
, NLMSG_HDRLEN
+ sizeof *nfm
,
718 nfnlgrp_conntrack_policy
, attrs
,
719 ARRAY_SIZE(nfnlgrp_conntrack_policy
))) {
720 VLOG_ERR_RL(&rl
, "Received bad nfnl message (policy).");
724 type
= NFNL_MSG_TYPE(nlh
->nlmsg_type
);
725 *nfgen_family
= nfm
->nfgen_family
;
728 case IPCTNL_MSG_CT_NEW
:
729 *event_type
= nlh
->nlmsg_flags
& NLM_F_CREATE
730 ? NL_CT_EVENT_NEW
: NL_CT_EVENT_UPDATE
;
732 case IPCTNL_MSG_CT_DELETE
:
733 *event_type
= NL_CT_EVENT_DELETE
;
736 VLOG_ERR_RL(&rl
, "Can't parse conntrack event type.");
744 nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry
*entry
,
745 struct nlattr
*attrs
[ARRAY_SIZE(nfnlgrp_conntrack_policy
)],
746 uint8_t nfgen_family
)
748 if (!nl_ct_parse_tuple(attrs
[CTA_TUPLE_ORIG
], &entry
->tuple_orig
,
752 if (!nl_ct_parse_tuple(attrs
[CTA_TUPLE_REPLY
], &entry
->tuple_reply
,
756 if (attrs
[CTA_COUNTERS_ORIG
] &&
757 !nl_ct_parse_counters(attrs
[CTA_COUNTERS_ORIG
],
758 &entry
->counters_orig
)) {
761 if (attrs
[CTA_COUNTERS_REPLY
] &&
762 !nl_ct_parse_counters(attrs
[CTA_COUNTERS_REPLY
],
763 &entry
->counters_reply
)) {
766 if (attrs
[CTA_TIMESTAMP
] &&
767 !nl_ct_parse_timestamp(attrs
[CTA_TIMESTAMP
], &entry
->timestamp
)) {
771 entry
->id
= ntohl(nl_attr_get_be32(attrs
[CTA_ID
]));
773 if (attrs
[CTA_ZONE
]) {
774 entry
->zone
= ntohs(nl_attr_get_be16(attrs
[CTA_ZONE
]));
776 if (attrs
[CTA_STATUS
]) {
777 entry
->status
= ips_status_to_dpif_flags(
778 ntohl(nl_attr_get_be32(attrs
[CTA_STATUS
])));
780 if (attrs
[CTA_TIMEOUT
]) {
781 entry
->timeout
= ntohl(nl_attr_get_be32(attrs
[CTA_TIMEOUT
]));
783 if (attrs
[CTA_MARK
]) {
784 entry
->mark
= ntohl(nl_attr_get_be32(attrs
[CTA_MARK
]));
786 if (attrs
[CTA_LABELS
]) {
787 entry
->have_labels
= true;
788 memcpy(&entry
->labels
, nl_attr_get(attrs
[CTA_LABELS
]),
789 MIN(sizeof entry
->labels
, nl_attr_get_size(attrs
[CTA_LABELS
])));
791 if (attrs
[CTA_PROTOINFO
] &&
792 !nl_ct_parse_protoinfo(attrs
[CTA_PROTOINFO
], &entry
->protoinfo
)) {
795 if (attrs
[CTA_HELP
] &&
796 !nl_ct_parse_helper(attrs
[CTA_HELP
], &entry
->helper
)) {
799 if (attrs
[CTA_TUPLE_MASTER
] &&
800 !nl_ct_parse_tuple(attrs
[CTA_TUPLE_MASTER
], &entry
->tuple_master
,
808 nl_ct_parse_entry(struct ofpbuf
*buf
, struct ct_dpif_entry
*entry
,
809 enum nl_ct_event_type
*event_type
)
811 struct nlattr
*attrs
[ARRAY_SIZE(nfnlgrp_conntrack_policy
)];
812 uint8_t nfgen_family
;
814 memset(entry
, 0, sizeof *entry
);
815 if (!nl_ct_parse_header_policy(buf
, event_type
, &nfgen_family
, attrs
)) {
819 if (!nl_ct_attrs_to_ct_dpif_entry(entry
, attrs
, nfgen_family
)) {
820 ct_dpif_entry_uninit(entry
);
821 memset(entry
, 0, sizeof *entry
);
828 /* NetFilter utility functions. */
830 /* Puts a nlmsghdr and nfgenmsg at the beginning of 'msg', which must be
831 * initially empty. 'expected_payload' should be an estimate of the number of
832 * payload bytes to be supplied; if the size of the payload is unknown a value
833 * of 0 is acceptable.
835 * Non-zero 'family' is the address family of items to get (e.g. AF_INET).
837 * 'flags' is a bit-mask that indicates what kind of request is being made. It
838 * is often NLM_F_REQUEST indicating that a request is being made, commonly
839 * or'd with NLM_F_ACK to request an acknowledgement. NLM_F_DUMP flag reguests
840 * a dump of the table.
842 * 'subsystem' is a netfilter subsystem id, e.g., NFNL_SUBSYS_CTNETLINK.
844 * 'cmd' is an enumerated value specific to the 'subsystem'.
846 * Sets the new nlmsghdr's nlmsg_pid field to 0 for now. nl_sock_send() will
847 * fill it in just before sending the message.
849 * nl_msg_put_nlmsghdr() should be used to compose Netlink messages that are
850 * not NetFilter Netlink messages. */
852 nl_msg_put_nfgenmsg(struct ofpbuf
*msg
, size_t expected_payload
,
853 int family
, uint8_t subsystem
, uint8_t cmd
,
856 struct nfgenmsg
*nfm
;
858 nl_msg_put_nlmsghdr(msg
, sizeof *nfm
+ expected_payload
,
859 subsystem
<< 8 | cmd
, flags
);
860 ovs_assert(msg
->size
== NLMSG_HDRLEN
);
861 nfm
= nl_msg_put_uninit(msg
, sizeof *nfm
);
862 nfm
->nfgen_family
= family
;
863 nfm
->version
= NFNETLINK_V0
;
866 /* nfgenmsg contains ovsHdr padding in windows */
867 nfm
->ovsHdr
.dp_ifindex
= 0;