2 * Copyright (c) 2015 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "netlink-conntrack.h"
22 #include <linux/netfilter/nfnetlink.h>
23 #include <linux/netfilter/nfnetlink_conntrack.h>
24 #include <linux/netfilter/nf_conntrack_common.h>
25 #include <linux/netfilter/nf_conntrack_tcp.h>
26 #include <linux/netfilter/nf_conntrack_ftp.h>
27 #include <linux/netfilter/nf_conntrack_sctp.h>
29 #include "byte-order.h"
31 #include "openvswitch/dynamic-string.h"
33 #include "netlink-socket.h"
34 #include "openvswitch/ofpbuf.h"
35 #include "openvswitch/vlog.h"
36 #include "openvswitch/poll-loop.h"
41 VLOG_DEFINE_THIS_MODULE(netlink_conntrack
);
42 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
44 /* This module works only if conntrack modules and features are enabled in the
45 * Linux kernel. This can be done from a root shell like this:
47 * $ modprobe ip_conntrack
48 * $ sysctl -w net.netfilter.nf_conntrack_acct=1
49 * $ sysctl -w net.netfilter.nf_conntrack_timestamp=1
51 * Also, if testing conntrack label feature without conntrack-aware OVS kernel
52 * module, there must be a connlabel rule in iptables for space to be reserved
53 * for the labels (see kernel source connlabel_mt_check()). Such a rule can be
54 * inserted from a root shell like this:
56 * $ iptables -A INPUT -m conntrack -m connlabel \
57 * --ctstate NEW,ESTABLISHED,RELATED --label 127 -j ACCEPT
60 /* Some attributes were introduced in later kernels: with these definitions
61 * we should be able to compile userspace against Linux 2.6.32+. */
63 #define CTA_ZONE (CTA_SECMARK + 1)
64 #define CTA_SECCTX (CTA_SECMARK + 2)
65 #define CTA_TIMESTAMP (CTA_SECMARK + 3)
66 #define CTA_MARK_MASK (CTA_SECMARK + 4)
67 #define CTA_LABELS (CTA_SECMARK + 5)
68 #define CTA_LABELS_MASK (CTA_SECMARK + 6)
70 #define CTA_TIMESTAMP_START 1
71 #define CTA_TIMESTAMP_STOP 2
73 #define IPS_TEMPLATE_BIT 11
74 #define IPS_TEMPLATE (1 << IPS_TEMPLATE_BIT)
76 #define IPS_UNTRACKED_BIT 12
77 #define IPS_UNTRACKED (1 << IPS_UNTRACKED_BIT)
79 static const struct nl_policy nfnlgrp_conntrack_policy
[] = {
80 [CTA_TUPLE_ORIG
] = { .type
= NL_A_NESTED
, .optional
= false },
81 [CTA_TUPLE_REPLY
] = { .type
= NL_A_NESTED
, .optional
= false },
82 [CTA_ZONE
] = { .type
= NL_A_BE16
, .optional
= true },
83 [CTA_STATUS
] = { .type
= NL_A_BE32
, .optional
= false },
84 [CTA_TIMESTAMP
] = { .type
= NL_A_NESTED
, .optional
= true },
85 [CTA_TIMEOUT
] = { .type
= NL_A_BE32
, .optional
= true },
86 [CTA_COUNTERS_ORIG
] = { .type
= NL_A_NESTED
, .optional
= true },
87 [CTA_COUNTERS_REPLY
] = { .type
= NL_A_NESTED
, .optional
= true },
88 [CTA_PROTOINFO
] = { .type
= NL_A_NESTED
, .optional
= true },
89 [CTA_HELP
] = { .type
= NL_A_NESTED
, .optional
= true },
90 [CTA_MARK
] = { .type
= NL_A_BE32
, .optional
= true },
91 [CTA_SECCTX
] = { .type
= NL_A_NESTED
, .optional
= true },
92 [CTA_ID
] = { .type
= NL_A_BE32
, .optional
= false },
93 [CTA_USE
] = { .type
= NL_A_BE32
, .optional
= true },
94 [CTA_TUPLE_MASTER
] = { .type
= NL_A_NESTED
, .optional
= true },
95 [CTA_NAT_SEQ_ADJ_ORIG
] = { .type
= NL_A_NESTED
, .optional
= true },
96 [CTA_NAT_SEQ_ADJ_REPLY
] = { .type
= NL_A_NESTED
, .optional
= true },
97 [CTA_LABELS
] = { .type
= NL_A_UNSPEC
, .optional
= true },
98 /* CTA_NAT_SRC, CTA_NAT_DST, CTA_TIMESTAMP, CTA_MARK_MASK, and
99 * CTA_LABELS_MASK are not received from kernel. */
102 /* Declarations for conntrack netlink dumping. */
103 static void nl_msg_put_nfgenmsg(struct ofpbuf
*msg
, size_t expected_payload
,
104 int family
, uint8_t subsystem
, uint8_t cmd
,
107 static bool nl_ct_parse_header_policy(struct ofpbuf
*buf
,
108 enum nl_ct_event_type
*event_type
,
109 uint8_t *nfgen_family
,
110 struct nlattr
*attrs
[ARRAY_SIZE(nfnlgrp_conntrack_policy
)]);
112 static bool nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry
*entry
,
113 struct nlattr
*attrs
[ARRAY_SIZE(nfnlgrp_conntrack_policy
)],
114 uint8_t nfgen_family
);
115 static bool nl_ct_put_ct_tuple(struct ofpbuf
*buf
,
116 const struct ct_dpif_tuple
*tuple
, enum ctattr_type type
);
118 struct nl_ct_dump_state
{
125 /* Conntrack netlink dumping. */
127 /* Initialize a conntrack netlink dump. */
129 nl_ct_dump_start(struct nl_ct_dump_state
**statep
, const uint16_t *zone
,
132 struct nl_ct_dump_state
*state
;
134 *statep
= state
= xzalloc(sizeof *state
);
135 ofpbuf_init(&state
->buf
, NL_DUMP_BUFSIZE
);
138 state
->filter_zone
= true;
142 nl_msg_put_nfgenmsg(&state
->buf
, 0, AF_UNSPEC
, NFNL_SUBSYS_CTNETLINK
,
143 IPCTNL_MSG_CT_GET
, NLM_F_REQUEST
);
144 nl_dump_start(&state
->dump
, NETLINK_NETFILTER
, &state
->buf
);
145 ofpbuf_clear(&state
->buf
);
147 /* Buckets to store connections are not used. */
153 /* Receive the next 'entry' from the conntrack netlink dump with 'state'.
154 * Returns 'EOF' when no more entries are available, 0 otherwise. 'entry' may
155 * be uninitilized memory on entry, and must be uninitialized with
156 * ct_dpif_entry_uninit() afterwards by the caller. In case the same 'entry' is
157 * passed to this function again, the entry must also be uninitialized before
160 nl_ct_dump_next(struct nl_ct_dump_state
*state
, struct ct_dpif_entry
*entry
)
164 memset(entry
, 0, sizeof *entry
);
166 struct nlattr
*attrs
[ARRAY_SIZE(nfnlgrp_conntrack_policy
)];
167 enum nl_ct_event_type type
;
168 uint8_t nfgen_family
;
170 if (!nl_dump_next(&state
->dump
, &buf
, &state
->buf
)) {
174 if (!nl_ct_parse_header_policy(&buf
, &type
, &nfgen_family
, attrs
)) {
178 if (state
->filter_zone
) {
179 uint16_t entry_zone
= attrs
[CTA_ZONE
]
180 ? ntohs(nl_attr_get_be16(attrs
[CTA_ZONE
]))
182 if (entry_zone
!= state
->zone
) {
187 if (nl_ct_attrs_to_ct_dpif_entry(entry
, attrs
, nfgen_family
)) {
191 ct_dpif_entry_uninit(entry
);
192 memset(entry
, 0, sizeof *entry
);
193 /* Ignore the failed entry and get the next one. */
200 /* End a conntrack netlink dump. */
202 nl_ct_dump_done(struct nl_ct_dump_state
*state
)
204 int error
= nl_dump_done(&state
->dump
);
206 ofpbuf_uninit(&state
->buf
);
211 /* Format conntrack event 'entry' of 'type' to 'ds'. */
213 nl_ct_format_event_entry(const struct ct_dpif_entry
*entry
,
214 enum nl_ct_event_type type
, struct ds
*ds
,
215 bool verbose
, bool print_stats
)
217 ds_put_format(ds
, "%s ",
218 type
== NL_CT_EVENT_NEW
? "NEW"
219 : type
== NL_CT_EVENT_UPDATE
? "UPDATE"
220 : type
== NL_CT_EVENT_DELETE
? "DELETE"
222 ct_dpif_format_entry(entry
, ds
, verbose
, print_stats
);
231 ofpbuf_init(&buf
, NL_DUMP_BUFSIZE
);
233 nl_msg_put_nfgenmsg(&buf
, 0, AF_UNSPEC
, NFNL_SUBSYS_CTNETLINK
,
234 IPCTNL_MSG_CT_DELETE
, NLM_F_REQUEST
);
236 err
= nl_transact(NETLINK_NETFILTER
, &buf
, NULL
);
239 /* Expectations are flushed automatically, because they do not
240 * have a master connection anymore */
246 nl_ct_flush_tuple(const struct ct_dpif_tuple
*tuple
, uint16_t zone
)
251 ofpbuf_init(&buf
, NL_DUMP_BUFSIZE
);
252 nl_msg_put_nfgenmsg(&buf
, 0, tuple
->l3_type
, NFNL_SUBSYS_CTNETLINK
,
253 IPCTNL_MSG_CT_DELETE
, NLM_F_REQUEST
);
255 nl_msg_put_be16(&buf
, CTA_ZONE
, htons(zone
));
256 if (!nl_ct_put_ct_tuple(&buf
, tuple
, CTA_TUPLE_ORIG
)) {
260 err
= nl_transact(NETLINK_NETFILTER
, &buf
, NULL
);
268 nl_ct_flush_zone(uint16_t flush_zone
)
270 /* Windows can flush a specific zone */
274 ofpbuf_init(&buf
, NL_DUMP_BUFSIZE
);
276 nl_msg_put_nfgenmsg(&buf
, 0, AF_UNSPEC
, NFNL_SUBSYS_CTNETLINK
,
277 IPCTNL_MSG_CT_DELETE
, NLM_F_REQUEST
);
278 nl_msg_put_be16(&buf
, CTA_ZONE
, htons(flush_zone
));
280 err
= nl_transact(NETLINK_NETFILTER
, &buf
, NULL
);
287 nl_ct_flush_zone(uint16_t flush_zone
)
289 /* Apparently, there's no netlink interface to flush a specific zone.
290 * This code dumps every connection, checks the zone and eventually
293 * This is race-prone, but it is better than using shell scripts. */
296 struct ofpbuf buf
, reply
, delete;
298 ofpbuf_init(&buf
, NL_DUMP_BUFSIZE
);
299 ofpbuf_init(&delete, NL_DUMP_BUFSIZE
);
301 nl_msg_put_nfgenmsg(&buf
, 0, AF_UNSPEC
, NFNL_SUBSYS_CTNETLINK
,
302 IPCTNL_MSG_CT_GET
, NLM_F_REQUEST
);
303 nl_dump_start(&dump
, NETLINK_NETFILTER
, &buf
);
307 struct nlattr
*attrs
[ARRAY_SIZE(nfnlgrp_conntrack_policy
)];
308 enum nl_ct_event_type event_type
;
309 uint8_t nfgen_family
;
312 if (!nl_dump_next(&dump
, &reply
, &buf
)) {
316 if (!nl_ct_parse_header_policy(&reply
, &event_type
, &nfgen_family
,
321 if (attrs
[CTA_ZONE
]) {
322 zone
= ntohs(nl_attr_get_be16(attrs
[CTA_ZONE
]));
325 if (zone
!= flush_zone
) {
326 /* The entry is not in the zone we're flushing. */
329 nl_msg_put_nfgenmsg(&delete, 0, nfgen_family
, NFNL_SUBSYS_CTNETLINK
,
330 IPCTNL_MSG_CT_DELETE
, NLM_F_REQUEST
);
332 nl_msg_put_be16(&delete, CTA_ZONE
, htons(zone
));
333 nl_msg_put_unspec(&delete, CTA_TUPLE_ORIG
, attrs
[CTA_TUPLE_ORIG
] + 1,
334 attrs
[CTA_TUPLE_ORIG
]->nla_len
- NLA_HDRLEN
);
335 nl_msg_put_unspec(&delete, CTA_ID
, attrs
[CTA_ID
] + 1,
336 attrs
[CTA_ID
]->nla_len
- NLA_HDRLEN
);
337 nl_transact(NETLINK_NETFILTER
, &delete, NULL
);
338 ofpbuf_clear(&delete);
343 ofpbuf_uninit(&delete);
346 /* Expectations are flushed automatically, because they do not
347 * have a master connection anymore */
352 /* Conntrack netlink parsing. */
355 nl_ct_parse_counters(struct nlattr
*nla
, struct ct_dpif_counters
*counters
)
357 static const struct nl_policy policy
[] = {
358 [CTA_COUNTERS_PACKETS
] = { .type
= NL_A_BE64
, .optional
= false },
359 [CTA_COUNTERS_BYTES
] = { .type
= NL_A_BE64
, .optional
= false },
361 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
364 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
368 = ntohll(nl_attr_get_be64(attrs
[CTA_COUNTERS_PACKETS
]));
369 counters
->bytes
= ntohll(nl_attr_get_be64(attrs
[CTA_COUNTERS_BYTES
]));
371 VLOG_ERR_RL(&rl
, "Could not parse nested counters. "
372 "Possibly incompatible Linux kernel version.");
379 nl_ct_parse_timestamp(struct nlattr
*nla
, struct ct_dpif_timestamp
*timestamp
)
381 static const struct nl_policy policy
[] = {
382 [CTA_TIMESTAMP_START
] = { .type
= NL_A_BE64
, .optional
= false },
383 [CTA_TIMESTAMP_STOP
] = { .type
= NL_A_BE64
, .optional
= true },
385 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
388 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
392 = ntohll(nl_attr_get_be64(attrs
[CTA_TIMESTAMP_START
]));
393 if (attrs
[CTA_TIMESTAMP_STOP
]) {
395 = ntohll(nl_attr_get_be64(attrs
[CTA_TIMESTAMP_STOP
]));
398 VLOG_ERR_RL(&rl
, "Could not parse nested timestamp. "
399 "Possibly incompatible Linux kernel version.");
406 nl_ct_parse_tuple_ip(struct nlattr
*nla
, struct ct_dpif_tuple
*tuple
)
408 static const struct nl_policy policy
[] = {
409 [CTA_IP_V4_SRC
] = { .type
= NL_A_BE32
, .optional
= true },
410 [CTA_IP_V4_DST
] = { .type
= NL_A_BE32
, .optional
= true },
411 [CTA_IP_V6_SRC
] = { NL_POLICY_FOR(struct in6_addr
), .optional
= true },
412 [CTA_IP_V6_DST
] = { NL_POLICY_FOR(struct in6_addr
), .optional
= true },
414 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
417 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
420 if (tuple
->l3_type
== AF_INET
) {
421 if (attrs
[CTA_IP_V4_SRC
]) {
422 tuple
->src
.ip
= nl_attr_get_be32(attrs
[CTA_IP_V4_SRC
]);
424 if (attrs
[CTA_IP_V4_DST
]) {
425 tuple
->dst
.ip
= nl_attr_get_be32(attrs
[CTA_IP_V4_DST
]);
427 } else if (tuple
->l3_type
== AF_INET6
) {
428 if (attrs
[CTA_IP_V6_SRC
]) {
429 memcpy(&tuple
->src
.in6
, nl_attr_get(attrs
[CTA_IP_V6_SRC
]),
430 sizeof tuple
->src
.in6
);
432 if (attrs
[CTA_IP_V6_DST
]) {
433 memcpy(&tuple
->dst
.in6
, nl_attr_get(attrs
[CTA_IP_V6_DST
]),
434 sizeof tuple
->dst
.in6
);
437 VLOG_WARN_RL(&rl
, "Unsupported IP protocol: %u.", tuple
->l3_type
);
441 VLOG_ERR_RL(&rl
, "Could not parse nested tuple IP options. "
442 "Possibly incompatible Linux kernel version.");
449 nl_ct_parse_tuple_proto(struct nlattr
*nla
, struct ct_dpif_tuple
*tuple
)
451 static const struct nl_policy policy
[] = {
452 [CTA_PROTO_NUM
] = { .type
= NL_A_U8
, .optional
= false },
453 [CTA_PROTO_SRC_PORT
] = { .type
= NL_A_BE16
, .optional
= true },
454 [CTA_PROTO_DST_PORT
] = { .type
= NL_A_BE16
, .optional
= true },
455 [CTA_PROTO_ICMP_ID
] = { .type
= NL_A_BE16
, .optional
= true },
456 [CTA_PROTO_ICMP_TYPE
] = { .type
= NL_A_U8
, .optional
= true },
457 [CTA_PROTO_ICMP_CODE
] = { .type
= NL_A_U8
, .optional
= true },
458 [CTA_PROTO_ICMPV6_ID
] = { .type
= NL_A_BE16
, .optional
= true },
459 [CTA_PROTO_ICMPV6_TYPE
] = { .type
= NL_A_U8
, .optional
= true },
460 [CTA_PROTO_ICMPV6_CODE
] = { .type
= NL_A_U8
, .optional
= true },
462 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
465 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
468 tuple
->ip_proto
= nl_attr_get_u8(attrs
[CTA_PROTO_NUM
]);
470 if (tuple
->l3_type
== AF_INET
&& tuple
->ip_proto
== IPPROTO_ICMP
) {
471 if (!attrs
[CTA_PROTO_ICMP_ID
] || !attrs
[CTA_PROTO_ICMP_TYPE
]
472 || !attrs
[CTA_PROTO_ICMP_CODE
]) {
473 VLOG_ERR_RL(&rl
, "Tuple ICMP data missing.");
476 tuple
->icmp_id
= nl_attr_get_be16(attrs
[CTA_PROTO_ICMP_ID
]);
477 tuple
->icmp_type
= nl_attr_get_u8(attrs
[CTA_PROTO_ICMP_TYPE
]);
478 tuple
->icmp_code
= nl_attr_get_u8(attrs
[CTA_PROTO_ICMP_CODE
]);
479 } else if (tuple
->l3_type
== AF_INET6
&&
480 tuple
->ip_proto
== IPPROTO_ICMPV6
) {
481 if (!attrs
[CTA_PROTO_ICMPV6_ID
] || !attrs
[CTA_PROTO_ICMPV6_TYPE
]
482 || !attrs
[CTA_PROTO_ICMPV6_CODE
]) {
483 VLOG_ERR_RL(&rl
, "Tuple ICMPv6 data missing.");
486 tuple
->icmp_id
= nl_attr_get_be16(attrs
[CTA_PROTO_ICMPV6_ID
]);
487 tuple
->icmp_type
= nl_attr_get_u8(attrs
[CTA_PROTO_ICMPV6_TYPE
]);
488 tuple
->icmp_code
= nl_attr_get_u8(attrs
[CTA_PROTO_ICMPV6_CODE
]);
489 } else if (attrs
[CTA_PROTO_SRC_PORT
] && attrs
[CTA_PROTO_DST_PORT
]) {
490 tuple
->src_port
= nl_attr_get_be16(attrs
[CTA_PROTO_SRC_PORT
]);
491 tuple
->dst_port
= nl_attr_get_be16(attrs
[CTA_PROTO_DST_PORT
]);
493 /* Unsupported IPPROTO and no ports, leave them zeroed.
494 * We have parsed the ip_proto, so this is not a failure. */
495 VLOG_DBG_RL(&rl
, "Unsupported L4 protocol: %u.", tuple
->ip_proto
);
498 VLOG_ERR_RL(&rl
, "Could not parse nested tuple protocol options. "
499 "Possibly incompatible Linux kernel version.");
506 nl_ct_parse_tuple(struct nlattr
*nla
, struct ct_dpif_tuple
*tuple
,
509 static const struct nl_policy policy
[] = {
510 [CTA_TUPLE_IP
] = { .type
= NL_A_NESTED
, .optional
= false },
511 [CTA_TUPLE_PROTO
] = { .type
= NL_A_NESTED
, .optional
= false },
513 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
516 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
518 memset(tuple
, 0, sizeof *tuple
);
521 tuple
->l3_type
= l3_type
;
523 if (!nl_ct_parse_tuple_ip(attrs
[CTA_TUPLE_IP
], tuple
)
524 || !nl_ct_parse_tuple_proto(attrs
[CTA_TUPLE_PROTO
], tuple
)) {
528 ct_dpif_format_tuple(&ds
, tuple
);
530 VLOG_ERR_RL(&rl
, "Failed to parse tuple: %s", ds_cstr(&ds
));
533 memset(tuple
, 0, sizeof *tuple
);
537 VLOG_ERR_RL(&rl
, "Could not parse nested tuple options. "
538 "Possibly incompatible Linux kernel version.");
545 nl_ct_put_tuple_ip(struct ofpbuf
*buf
, const struct ct_dpif_tuple
*tuple
)
547 size_t offset
= nl_msg_start_nested(buf
, CTA_TUPLE_IP
);
549 if (tuple
->l3_type
== AF_INET
) {
550 nl_msg_put_be32(buf
, CTA_IP_V4_SRC
, tuple
->src
.ip
);
551 nl_msg_put_be32(buf
, CTA_IP_V4_DST
, tuple
->dst
.ip
);
552 } else if (tuple
->l3_type
== AF_INET6
) {
553 nl_msg_put_in6_addr(buf
, CTA_IP_V6_SRC
, &tuple
->src
.in6
);
554 nl_msg_put_in6_addr(buf
, CTA_IP_V6_DST
, &tuple
->dst
.in6
);
556 VLOG_WARN_RL(&rl
, "Unsupported IP protocol: %"PRIu16
".",
561 nl_msg_end_nested(buf
, offset
);
566 nl_ct_put_tuple_proto(struct ofpbuf
*buf
, const struct ct_dpif_tuple
*tuple
)
568 size_t offset
= nl_msg_start_nested(buf
, CTA_TUPLE_PROTO
);
570 nl_msg_put_u8(buf
, CTA_PROTO_NUM
, tuple
->ip_proto
);
572 if (tuple
->l3_type
== AF_INET
&& tuple
->ip_proto
== IPPROTO_ICMP
) {
573 nl_msg_put_be16(buf
, CTA_PROTO_ICMP_ID
, tuple
->icmp_id
);
574 nl_msg_put_u8(buf
, CTA_PROTO_ICMP_TYPE
, tuple
->icmp_type
);
575 nl_msg_put_u8(buf
, CTA_PROTO_ICMP_CODE
, tuple
->icmp_code
);
576 } else if (tuple
->l3_type
== AF_INET6
&&
577 tuple
->ip_proto
== IPPROTO_ICMPV6
) {
578 nl_msg_put_be16(buf
, CTA_PROTO_ICMPV6_ID
, tuple
->icmp_id
);
579 nl_msg_put_u8(buf
, CTA_PROTO_ICMPV6_TYPE
, tuple
->icmp_type
);
580 nl_msg_put_u8(buf
, CTA_PROTO_ICMPV6_CODE
, tuple
->icmp_code
);
581 } else if (tuple
->ip_proto
== IPPROTO_TCP
||
582 tuple
->ip_proto
== IPPROTO_UDP
) {
583 nl_msg_put_be16(buf
, CTA_PROTO_SRC_PORT
, tuple
->src_port
);
584 nl_msg_put_be16(buf
, CTA_PROTO_DST_PORT
, tuple
->dst_port
);
586 VLOG_WARN_RL(&rl
, "Unsupported L4 protocol: %"PRIu8
".",
591 nl_msg_end_nested(buf
, offset
);
596 nl_ct_put_ct_tuple(struct ofpbuf
*buf
, const struct ct_dpif_tuple
*tuple
,
597 enum ctattr_type type
)
599 if (type
!= CTA_TUPLE_ORIG
&& type
!= CTA_TUPLE_REPLY
&&
600 type
!= CTA_TUPLE_MASTER
) {
604 size_t offset
= nl_msg_start_nested(buf
, type
);
606 if (!nl_ct_put_tuple_ip(buf
, tuple
)) {
609 if (!nl_ct_put_tuple_proto(buf
, tuple
)) {
613 nl_msg_end_nested(buf
, offset
);
617 /* Translate netlink TCP state to CT_DPIF_TCP state. */
619 nl_ct_tcp_state_to_dpif(uint8_t state
)
622 /* Windows currently sends up CT_DPIF_TCP state */
626 case TCP_CONNTRACK_NONE
:
627 return CT_DPIF_TCPS_CLOSED
;
628 case TCP_CONNTRACK_SYN_SENT
:
629 return CT_DPIF_TCPS_SYN_SENT
;
630 case TCP_CONNTRACK_SYN_SENT2
:
631 return CT_DPIF_TCPS_SYN_SENT
;
632 case TCP_CONNTRACK_SYN_RECV
:
633 return CT_DPIF_TCPS_SYN_RECV
;
634 case TCP_CONNTRACK_ESTABLISHED
:
635 return CT_DPIF_TCPS_ESTABLISHED
;
636 case TCP_CONNTRACK_FIN_WAIT
:
637 return CT_DPIF_TCPS_FIN_WAIT_1
;
638 case TCP_CONNTRACK_CLOSE_WAIT
:
639 return CT_DPIF_TCPS_CLOSE_WAIT
;
640 case TCP_CONNTRACK_LAST_ACK
:
641 return CT_DPIF_TCPS_LAST_ACK
;
642 case TCP_CONNTRACK_TIME_WAIT
:
643 return CT_DPIF_TCPS_TIME_WAIT
;
644 case TCP_CONNTRACK_CLOSE
:
645 return CT_DPIF_TCPS_CLOSING
;
647 return CT_DPIF_TCPS_CLOSED
;
653 ip_ct_tcp_flags_to_dpif(uint8_t flags
)
656 /* Windows currently sends up CT_DPIF_TCP flags */
660 #define CT_DPIF_TCP_FLAG(FLAG) \
661 ret |= (flags & IP_CT_TCP_FLAG_##FLAG) ? CT_DPIF_TCPF_##FLAG : 0;
663 #undef CT_DPIF_STATUS_FLAG
669 nl_ct_parse_protoinfo_tcp(struct nlattr
*nla
,
670 struct ct_dpif_protoinfo
*protoinfo
)
672 static const struct nl_policy policy
[] = {
673 [CTA_PROTOINFO_TCP_STATE
] = { .type
= NL_A_U8
, .optional
= false },
674 [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL
] = { .type
= NL_A_U8
,
676 [CTA_PROTOINFO_TCP_WSCALE_REPLY
] = { .type
= NL_A_U8
,
678 [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL
] = { .type
= NL_A_U16
,
680 [CTA_PROTOINFO_TCP_FLAGS_REPLY
] = { .type
= NL_A_U16
,
683 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
686 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
689 const struct nf_ct_tcp_flags
*flags_orig
, *flags_reply
;
691 protoinfo
->proto
= IPPROTO_TCP
;
692 state
= nl_ct_tcp_state_to_dpif(
693 nl_attr_get_u8(attrs
[CTA_PROTOINFO_TCP_STATE
]));
694 /* The connection tracker keeps only one tcp state for the
695 * connection, but our structures store a separate state for
696 * each endpoint. Here we duplicate the state. */
697 protoinfo
->tcp
.state_orig
= protoinfo
->tcp
.state_reply
= state
;
698 protoinfo
->tcp
.wscale_orig
= nl_attr_get_u8(
699 attrs
[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL
]);
700 protoinfo
->tcp
.wscale_reply
= nl_attr_get_u8(
701 attrs
[CTA_PROTOINFO_TCP_WSCALE_REPLY
]);
703 nl_attr_get_unspec(attrs
[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL
],
705 protoinfo
->tcp
.flags_orig
=
706 ip_ct_tcp_flags_to_dpif(flags_orig
->flags
);
708 nl_attr_get_unspec(attrs
[CTA_PROTOINFO_TCP_FLAGS_REPLY
],
709 sizeof *flags_reply
);
710 protoinfo
->tcp
.flags_reply
=
711 ip_ct_tcp_flags_to_dpif(flags_reply
->flags
);
713 VLOG_ERR_RL(&rl
, "Could not parse nested TCP protoinfo options. "
714 "Possibly incompatible Linux kernel version.");
721 nl_ct_parse_protoinfo(struct nlattr
*nla
, struct ct_dpif_protoinfo
*protoinfo
)
723 /* These are mutually exclusive. */
724 static const struct nl_policy policy
[] = {
725 [CTA_PROTOINFO_TCP
] = { .type
= NL_A_NESTED
, .optional
= true },
726 [CTA_PROTOINFO_SCTP
] = { .type
= NL_A_NESTED
, .optional
= true },
728 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
731 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
733 memset(protoinfo
, 0, sizeof *protoinfo
);
736 if (attrs
[CTA_PROTOINFO_TCP
]) {
737 parsed
= nl_ct_parse_protoinfo_tcp(attrs
[CTA_PROTOINFO_TCP
],
739 } else if (attrs
[CTA_PROTOINFO_SCTP
]) {
740 VLOG_WARN_RL(&rl
, "SCTP protoinfo not yet supported!");
742 VLOG_WARN_RL(&rl
, "Empty protoinfo!");
745 VLOG_ERR_RL(&rl
, "Could not parse nested protoinfo options. "
746 "Possibly incompatible Linux kernel version.");
753 nl_ct_parse_helper(struct nlattr
*nla
, struct ct_dpif_helper
*helper
)
755 static const struct nl_policy policy
[] = {
756 [CTA_HELP_NAME
] = { .type
= NL_A_STRING
, .optional
= false },
758 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
761 parsed
= nl_parse_nested(nla
, policy
, attrs
, ARRAY_SIZE(policy
));
763 memset(helper
, 0, sizeof *helper
);
766 helper
->name
= xstrdup(nl_attr_get_string(attrs
[CTA_HELP_NAME
]));
768 VLOG_ERR_RL(&rl
, "Could not parse nested helper options. "
769 "Possibly incompatible Linux kernel version.");
775 /* Translate netlink entry status flags to CT_DPIF_TCP status flags. */
777 ips_status_to_dpif_flags(uint32_t status
)
780 #define CT_DPIF_STATUS_FLAG(FLAG) \
781 ret |= (status & IPS_##FLAG) ? CT_DPIF_STATUS_##FLAG : 0;
783 #undef CT_DPIF_STATUS_FLAG
788 nl_ct_parse_header_policy(struct ofpbuf
*buf
,
789 enum nl_ct_event_type
*event_type
,
790 uint8_t *nfgen_family
,
791 struct nlattr
*attrs
[ARRAY_SIZE(nfnlgrp_conntrack_policy
)])
793 struct nlmsghdr
*nlh
;
794 struct nfgenmsg
*nfm
;
797 nlh
= ofpbuf_at(buf
, 0, NLMSG_HDRLEN
);
798 nfm
= ofpbuf_at(buf
, NLMSG_HDRLEN
, sizeof *nfm
);
800 VLOG_ERR_RL(&rl
, "Received bad nfnl message (no nfgenmsg).");
803 if (NFNL_SUBSYS_ID(nlh
->nlmsg_type
) != NFNL_SUBSYS_CTNETLINK
) {
804 VLOG_ERR_RL(&rl
, "Received non-conntrack message (subsystem: %u).",
805 NFNL_SUBSYS_ID(nlh
->nlmsg_type
));
808 if (nfm
->version
!= NFNETLINK_V0
) {
809 VLOG_ERR_RL(&rl
, "Received unsupported nfnetlink version (%u).",
810 NFNL_MSG_TYPE(nfm
->version
));
814 if (!nl_policy_parse(buf
, NLMSG_HDRLEN
+ sizeof *nfm
,
815 nfnlgrp_conntrack_policy
, attrs
,
816 ARRAY_SIZE(nfnlgrp_conntrack_policy
))) {
817 VLOG_ERR_RL(&rl
, "Received bad nfnl message (policy).");
821 type
= NFNL_MSG_TYPE(nlh
->nlmsg_type
);
822 *nfgen_family
= nfm
->nfgen_family
;
825 case IPCTNL_MSG_CT_NEW
:
826 *event_type
= nlh
->nlmsg_flags
& NLM_F_CREATE
827 ? NL_CT_EVENT_NEW
: NL_CT_EVENT_UPDATE
;
829 case IPCTNL_MSG_CT_DELETE
:
830 *event_type
= NL_CT_EVENT_DELETE
;
833 VLOG_ERR_RL(&rl
, "Can't parse conntrack event type.");
841 nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry
*entry
,
842 struct nlattr
*attrs
[ARRAY_SIZE(nfnlgrp_conntrack_policy
)],
843 uint8_t nfgen_family
)
845 if (!nl_ct_parse_tuple(attrs
[CTA_TUPLE_ORIG
], &entry
->tuple_orig
,
849 if (!nl_ct_parse_tuple(attrs
[CTA_TUPLE_REPLY
], &entry
->tuple_reply
,
853 if (attrs
[CTA_COUNTERS_ORIG
] &&
854 !nl_ct_parse_counters(attrs
[CTA_COUNTERS_ORIG
],
855 &entry
->counters_orig
)) {
858 if (attrs
[CTA_COUNTERS_REPLY
] &&
859 !nl_ct_parse_counters(attrs
[CTA_COUNTERS_REPLY
],
860 &entry
->counters_reply
)) {
863 if (attrs
[CTA_TIMESTAMP
] &&
864 !nl_ct_parse_timestamp(attrs
[CTA_TIMESTAMP
], &entry
->timestamp
)) {
868 entry
->id
= ntohl(nl_attr_get_be32(attrs
[CTA_ID
]));
870 if (attrs
[CTA_ZONE
]) {
871 entry
->zone
= ntohs(nl_attr_get_be16(attrs
[CTA_ZONE
]));
873 if (attrs
[CTA_STATUS
]) {
874 entry
->status
= ips_status_to_dpif_flags(
875 ntohl(nl_attr_get_be32(attrs
[CTA_STATUS
])));
877 if (attrs
[CTA_TIMEOUT
]) {
878 entry
->timeout
= ntohl(nl_attr_get_be32(attrs
[CTA_TIMEOUT
]));
880 if (attrs
[CTA_MARK
]) {
881 entry
->mark
= ntohl(nl_attr_get_be32(attrs
[CTA_MARK
]));
883 if (attrs
[CTA_LABELS
]) {
884 entry
->have_labels
= true;
885 memcpy(&entry
->labels
, nl_attr_get(attrs
[CTA_LABELS
]),
886 MIN(sizeof entry
->labels
, nl_attr_get_size(attrs
[CTA_LABELS
])));
888 if (attrs
[CTA_PROTOINFO
] &&
889 !nl_ct_parse_protoinfo(attrs
[CTA_PROTOINFO
], &entry
->protoinfo
)) {
892 if (attrs
[CTA_HELP
] &&
893 !nl_ct_parse_helper(attrs
[CTA_HELP
], &entry
->helper
)) {
896 if (attrs
[CTA_TUPLE_MASTER
] &&
897 !nl_ct_parse_tuple(attrs
[CTA_TUPLE_MASTER
], &entry
->tuple_master
,
905 nl_ct_parse_entry(struct ofpbuf
*buf
, struct ct_dpif_entry
*entry
,
906 enum nl_ct_event_type
*event_type
)
908 struct nlattr
*attrs
[ARRAY_SIZE(nfnlgrp_conntrack_policy
)];
909 uint8_t nfgen_family
;
911 memset(entry
, 0, sizeof *entry
);
912 if (!nl_ct_parse_header_policy(buf
, event_type
, &nfgen_family
, attrs
)) {
916 if (!nl_ct_attrs_to_ct_dpif_entry(entry
, attrs
, nfgen_family
)) {
917 ct_dpif_entry_uninit(entry
);
918 memset(entry
, 0, sizeof *entry
);
925 /* NetFilter utility functions. */
927 /* Puts a nlmsghdr and nfgenmsg at the beginning of 'msg', which must be
928 * initially empty. 'expected_payload' should be an estimate of the number of
929 * payload bytes to be supplied; if the size of the payload is unknown a value
930 * of 0 is acceptable.
932 * Non-zero 'family' is the address family of items to get (e.g. AF_INET).
934 * 'flags' is a bit-mask that indicates what kind of request is being made. It
935 * is often NLM_F_REQUEST indicating that a request is being made, commonly
936 * or'd with NLM_F_ACK to request an acknowledgement. NLM_F_DUMP flag reguests
937 * a dump of the table.
939 * 'subsystem' is a netfilter subsystem id, e.g., NFNL_SUBSYS_CTNETLINK.
941 * 'cmd' is an enumerated value specific to the 'subsystem'.
943 * Sets the new nlmsghdr's nlmsg_pid field to 0 for now. nl_sock_send() will
944 * fill it in just before sending the message.
946 * nl_msg_put_nlmsghdr() should be used to compose Netlink messages that are
947 * not NetFilter Netlink messages. */
949 nl_msg_put_nfgenmsg(struct ofpbuf
*msg
, size_t expected_payload
,
950 int family
, uint8_t subsystem
, uint8_t cmd
,
953 struct nfgenmsg
*nfm
;
955 nl_msg_put_nlmsghdr(msg
, sizeof *nfm
+ expected_payload
,
956 subsystem
<< 8 | cmd
, flags
);
957 ovs_assert(msg
->size
== NLMSG_HDRLEN
);
958 nfm
= nl_msg_put_uninit(msg
, sizeof *nfm
);
959 nfm
->nfgen_family
= family
;
960 nfm
->version
= NFNETLINK_V0
;
963 /* nfgenmsg contains ovsHdr padding in windows */
964 nfm
->ovsHdr
.dp_ifindex
= 0;