]> git.proxmox.com Git - ovs.git/blob - lib/netlink-conntrack.c
dpctl: Add new 'ct-bkts' command.
[ovs.git] / lib / netlink-conntrack.c
1 /*
2 * Copyright (c) 2015 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include "netlink-conntrack.h"
20
21 #include <linux/netfilter/nfnetlink.h>
22 #include <linux/netfilter/nfnetlink_conntrack.h>
23 #include <linux/netfilter/nf_conntrack_common.h>
24 #include <linux/netfilter/nf_conntrack_tcp.h>
25 #include <linux/netfilter/nf_conntrack_ftp.h>
26 #include <linux/netfilter/nf_conntrack_sctp.h>
27
28 #include "byte-order.h"
29 #include "compiler.h"
30 #include "openvswitch/dynamic-string.h"
31 #include "netlink.h"
32 #include "netlink-socket.h"
33 #include "openvswitch/ofpbuf.h"
34 #include "openvswitch/vlog.h"
35 #include "poll-loop.h"
36 #include "timeval.h"
37 #include "unixctl.h"
38 #include "util.h"
39
40 VLOG_DEFINE_THIS_MODULE(netlink_conntrack);
41 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
42
43 /* This module works only if conntrack modules and features are enabled in the
44 * Linux kernel. This can be done from a root shell like this:
45 *
46 * $ modprobe ip_conntrack
47 * $ sysctl -w net.netfilter.nf_conntrack_acct=1
48 * $ sysctl -w net.netfilter.nf_conntrack_timestamp=1
49 *
50 * Also, if testing conntrack label feature without conntrack-aware OVS kernel
51 * module, there must be a connlabel rule in iptables for space to be reserved
52 * for the labels (see kernel source connlabel_mt_check()). Such a rule can be
53 * inserted from a root shell like this:
54 *
55 * $ iptables -A INPUT -m conntrack -m connlabel \
56 * --ctstate NEW,ESTABLISHED,RELATED --label 127 -j ACCEPT
57 */
58
59 /* Some attributes were introduced in later kernels: with these definitions
60 * we should be able to compile userspace against Linux 2.6.32+. */
61
62 #define CTA_ZONE (CTA_SECMARK + 1)
63 #define CTA_SECCTX (CTA_SECMARK + 2)
64 #define CTA_TIMESTAMP (CTA_SECMARK + 3)
65 #define CTA_MARK_MASK (CTA_SECMARK + 4)
66 #define CTA_LABELS (CTA_SECMARK + 5)
67 #define CTA_LABELS_MASK (CTA_SECMARK + 6)
68
69 #define CTA_TIMESTAMP_START 1
70 #define CTA_TIMESTAMP_STOP 2
71
72 #define IPS_TEMPLATE_BIT 11
73 #define IPS_TEMPLATE (1 << IPS_TEMPLATE_BIT)
74
75 #define IPS_UNTRACKED_BIT 12
76 #define IPS_UNTRACKED (1 << IPS_UNTRACKED_BIT)
77
78 static const struct nl_policy nfnlgrp_conntrack_policy[] = {
79 [CTA_TUPLE_ORIG] = { .type = NL_A_NESTED, .optional = false },
80 [CTA_TUPLE_REPLY] = { .type = NL_A_NESTED, .optional = false },
81 [CTA_ZONE] = { .type = NL_A_BE16, .optional = true },
82 [CTA_STATUS] = { .type = NL_A_BE32, .optional = false },
83 [CTA_TIMESTAMP] = { .type = NL_A_NESTED, .optional = true },
84 [CTA_TIMEOUT] = { .type = NL_A_BE32, .optional = true },
85 [CTA_COUNTERS_ORIG] = { .type = NL_A_NESTED, .optional = true },
86 [CTA_COUNTERS_REPLY] = { .type = NL_A_NESTED, .optional = true },
87 [CTA_PROTOINFO] = { .type = NL_A_NESTED, .optional = true },
88 [CTA_HELP] = { .type = NL_A_NESTED, .optional = true },
89 [CTA_MARK] = { .type = NL_A_BE32, .optional = true },
90 [CTA_SECCTX] = { .type = NL_A_NESTED, .optional = true },
91 [CTA_ID] = { .type = NL_A_BE32, .optional = false },
92 [CTA_USE] = { .type = NL_A_BE32, .optional = true },
93 [CTA_TUPLE_MASTER] = { .type = NL_A_NESTED, .optional = true },
94 [CTA_NAT_SEQ_ADJ_ORIG] = { .type = NL_A_NESTED, .optional = true },
95 [CTA_NAT_SEQ_ADJ_REPLY] = { .type = NL_A_NESTED, .optional = true },
96 [CTA_LABELS] = { .type = NL_A_UNSPEC, .optional = true },
97 /* CTA_NAT_SRC, CTA_NAT_DST, CTA_TIMESTAMP, CTA_MARK_MASK, and
98 * CTA_LABELS_MASK are not received from kernel. */
99 };
100
101 /* Declarations for conntrack netlink dumping. */
102 static void nl_msg_put_nfgenmsg(struct ofpbuf *msg, size_t expected_payload,
103 int family, uint8_t subsystem, uint8_t cmd,
104 uint32_t flags);
105
106 static bool nl_ct_parse_header_policy(struct ofpbuf *buf,
107 enum nl_ct_event_type *event_type,
108 uint8_t *nfgen_family,
109 struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)]);
110
111 static bool nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry *entry,
112 struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)],
113 uint8_t nfgen_family);
114
115 struct nl_ct_dump_state {
116 struct nl_dump dump;
117 struct ofpbuf buf;
118 bool filter_zone;
119 uint16_t zone;
120 };
121
122 /* Conntrack netlink dumping. */
123
124 /* Initialize a conntrack netlink dump. */
125 int
126 nl_ct_dump_start(struct nl_ct_dump_state **statep, const uint16_t *zone,
127 int *ptot_bkts)
128 {
129 struct nl_ct_dump_state *state;
130
131 *statep = state = xzalloc(sizeof *state);
132 ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
133
134 if (zone) {
135 state->filter_zone = true;
136 state->zone = *zone;
137 }
138
139 nl_msg_put_nfgenmsg(&state->buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK,
140 IPCTNL_MSG_CT_GET, NLM_F_REQUEST);
141 nl_dump_start(&state->dump, NETLINK_NETFILTER, &state->buf);
142 ofpbuf_clear(&state->buf);
143
144 /* Buckets to store connections are not used. */
145 *ptot_bkts = -1;
146
147 return 0;
148 }
149
150 /* Receive the next 'entry' from the conntrack netlink dump with 'state'.
151 * Returns 'EOF' when no more entries are available, 0 otherwise. 'entry' may
152 * be uninitilized memory on entry, and must be uninitialized with
153 * ct_dpif_entry_uninit() afterwards by the caller. In case the same 'entry' is
154 * passed to this function again, the entry must also be uninitialized before
155 * the next call. */
156 int
157 nl_ct_dump_next(struct nl_ct_dump_state *state, struct ct_dpif_entry *entry)
158 {
159 struct ofpbuf buf;
160
161 memset(entry, 0, sizeof *entry);
162 for (;;) {
163 struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)];
164 enum nl_ct_event_type type;
165 uint8_t nfgen_family;
166
167 if (!nl_dump_next(&state->dump, &buf, &state->buf)) {
168 return EOF;
169 }
170
171 if (!nl_ct_parse_header_policy(&buf, &type, &nfgen_family, attrs)) {
172 continue;
173 };
174
175 if (state->filter_zone) {
176 uint16_t entry_zone = attrs[CTA_ZONE]
177 ? ntohs(nl_attr_get_be16(attrs[CTA_ZONE]))
178 : 0;
179 if (entry_zone != state->zone) {
180 continue;
181 }
182 }
183
184 if (nl_ct_attrs_to_ct_dpif_entry(entry, attrs, nfgen_family)) {
185 break;
186 }
187
188 ct_dpif_entry_uninit(entry);
189 memset(entry, 0, sizeof *entry);
190 /* Ignore the failed entry and get the next one. */
191 }
192
193 ofpbuf_uninit(&buf);
194 return 0;
195 }
196
197 /* End a conntrack netlink dump. */
198 int
199 nl_ct_dump_done(struct nl_ct_dump_state *state)
200 {
201 int error = nl_dump_done(&state->dump);
202
203 ofpbuf_uninit(&state->buf);
204 free(state);
205 return error;
206 }
207
208 /* Format conntrack event 'entry' of 'type' to 'ds'. */
209 void
210 nl_ct_format_event_entry(const struct ct_dpif_entry *entry,
211 enum nl_ct_event_type type, struct ds *ds,
212 bool verbose, bool print_stats)
213 {
214 ds_put_format(ds, "%s ",
215 type == NL_CT_EVENT_NEW ? "NEW"
216 : type == NL_CT_EVENT_UPDATE ? "UPDATE"
217 : type == NL_CT_EVENT_DELETE ? "DELETE"
218 : "UNKNOWN");
219 ct_dpif_format_entry(entry, ds, verbose, print_stats);
220 }
221
222 int
223 nl_ct_flush(void)
224 {
225 struct ofpbuf buf;
226 int err;
227
228 ofpbuf_init(&buf, NL_DUMP_BUFSIZE);
229
230 nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK,
231 IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST);
232
233 err = nl_transact(NETLINK_NETFILTER, &buf, NULL);
234 ofpbuf_uninit(&buf);
235
236 /* Expectations are flushed automatically, because they do not
237 * have a master connection anymore */
238
239 return err;
240 }
241
242 #ifdef _WIN32
243 int
244 nl_ct_flush_zone(uint16_t flush_zone)
245 {
246 /* Windows can flush a specific zone */
247 struct ofpbuf buf;
248 int err;
249
250 ofpbuf_init(&buf, NL_DUMP_BUFSIZE);
251
252 nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK,
253 IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST);
254 nl_msg_put_be16(&buf, CTA_ZONE, flush_zone);
255
256 err = nl_transact(NETLINK_NETFILTER, &buf, NULL);
257 ofpbuf_uninit(&buf);
258
259 return err;
260 }
261 #else
262 int
263 nl_ct_flush_zone(uint16_t flush_zone)
264 {
265 /* Apparently, there's no netlink interface to flush a specific zone.
266 * This code dumps every connection, checks the zone and eventually
267 * delete the entry.
268 *
269 * This is race-prone, but it is better than using shell scripts. */
270
271 struct nl_dump dump;
272 struct ofpbuf buf, reply, delete;
273
274 ofpbuf_init(&buf, NL_DUMP_BUFSIZE);
275 ofpbuf_init(&delete, NL_DUMP_BUFSIZE);
276
277 nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK,
278 IPCTNL_MSG_CT_GET, NLM_F_REQUEST);
279 nl_dump_start(&dump, NETLINK_NETFILTER, &buf);
280 ofpbuf_clear(&buf);
281
282 for (;;) {
283 struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)];
284 enum nl_ct_event_type event_type;
285 uint8_t nfgen_family;
286 uint16_t zone = 0;
287
288 if (!nl_dump_next(&dump, &reply, &buf)) {
289 break;
290 }
291
292 if (!nl_ct_parse_header_policy(&reply, &event_type, &nfgen_family,
293 attrs)) {
294 continue;
295 };
296
297 if (attrs[CTA_ZONE]) {
298 zone = ntohs(nl_attr_get_be16(attrs[CTA_ZONE]));
299 }
300
301 if (zone != flush_zone) {
302 /* The entry is not in the zone we're flushing. */
303 continue;
304 }
305 nl_msg_put_nfgenmsg(&delete, 0, nfgen_family, NFNL_SUBSYS_CTNETLINK,
306 IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST);
307
308 nl_msg_put_be16(&delete, CTA_ZONE, htons(zone));
309 nl_msg_put_unspec(&delete, CTA_TUPLE_ORIG, attrs[CTA_TUPLE_ORIG] + 1,
310 attrs[CTA_TUPLE_ORIG]->nla_len - NLA_HDRLEN);
311 nl_msg_put_unspec(&delete, CTA_ID, attrs[CTA_ID] + 1,
312 attrs[CTA_ID]->nla_len - NLA_HDRLEN);
313 nl_transact(NETLINK_NETFILTER, &delete, NULL);
314 ofpbuf_clear(&delete);
315 }
316
317 nl_dump_done(&dump);
318
319 ofpbuf_uninit(&delete);
320 ofpbuf_uninit(&buf);
321
322 /* Expectations are flushed automatically, because they do not
323 * have a master connection anymore */
324 return 0;
325 }
326 #endif
327
328 /* Conntrack netlink parsing. */
329
330 static bool
331 nl_ct_parse_counters(struct nlattr *nla, struct ct_dpif_counters *counters)
332 {
333 static const struct nl_policy policy[] = {
334 [CTA_COUNTERS_PACKETS] = { .type = NL_A_BE64, .optional = false },
335 [CTA_COUNTERS_BYTES] = { .type = NL_A_BE64, .optional = false },
336 };
337 struct nlattr *attrs[ARRAY_SIZE(policy)];
338 bool parsed;
339
340 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
341
342 if (parsed) {
343 counters->packets
344 = ntohll(nl_attr_get_be64(attrs[CTA_COUNTERS_PACKETS]));
345 counters->bytes = ntohll(nl_attr_get_be64(attrs[CTA_COUNTERS_BYTES]));
346 } else {
347 VLOG_ERR_RL(&rl, "Could not parse nested counters. "
348 "Possibly incompatible Linux kernel version.");
349 }
350
351 return parsed;
352 }
353
354 static bool
355 nl_ct_parse_timestamp(struct nlattr *nla, struct ct_dpif_timestamp *timestamp)
356 {
357 static const struct nl_policy policy[] = {
358 [CTA_TIMESTAMP_START] = { .type = NL_A_BE64, .optional = false },
359 [CTA_TIMESTAMP_STOP] = { .type = NL_A_BE64, .optional = true },
360 };
361 struct nlattr *attrs[ARRAY_SIZE(policy)];
362 bool parsed;
363
364 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
365
366 if (parsed) {
367 timestamp->start
368 = ntohll(nl_attr_get_be64(attrs[CTA_TIMESTAMP_START]));
369 if (attrs[CTA_TIMESTAMP_STOP]) {
370 timestamp->stop
371 = ntohll(nl_attr_get_be64(attrs[CTA_TIMESTAMP_STOP]));
372 }
373 } else {
374 VLOG_ERR_RL(&rl, "Could not parse nested timestamp. "
375 "Possibly incompatible Linux kernel version.");
376 }
377
378 return parsed;
379 }
380
381 static bool
382 nl_ct_parse_tuple_ip(struct nlattr *nla, struct ct_dpif_tuple *tuple)
383 {
384 static const struct nl_policy policy[] = {
385 [CTA_IP_V4_SRC] = { .type = NL_A_BE32, .optional = true },
386 [CTA_IP_V4_DST] = { .type = NL_A_BE32, .optional = true },
387 [CTA_IP_V6_SRC] = { NL_POLICY_FOR(struct in6_addr), .optional = true },
388 [CTA_IP_V6_DST] = { NL_POLICY_FOR(struct in6_addr), .optional = true },
389 };
390 struct nlattr *attrs[ARRAY_SIZE(policy)];
391 bool parsed;
392
393 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
394
395 if (parsed) {
396 if (tuple->l3_type == AF_INET) {
397 if (attrs[CTA_IP_V4_SRC]) {
398 tuple->src.ip = nl_attr_get_be32(attrs[CTA_IP_V4_SRC]);
399 }
400 if (attrs[CTA_IP_V4_DST]) {
401 tuple->dst.ip = nl_attr_get_be32(attrs[CTA_IP_V4_DST]);
402 }
403 } else if (tuple->l3_type == AF_INET6) {
404 if (attrs[CTA_IP_V6_SRC]) {
405 memcpy(&tuple->src.in6, nl_attr_get(attrs[CTA_IP_V6_SRC]),
406 sizeof tuple->src.in6);
407 }
408 if (attrs[CTA_IP_V6_DST]) {
409 memcpy(&tuple->dst.in6, nl_attr_get(attrs[CTA_IP_V6_DST]),
410 sizeof tuple->dst.in6);
411 }
412 } else {
413 VLOG_WARN_RL(&rl, "Unsupported IP protocol: %u.", tuple->l3_type);
414 return false;
415 }
416 } else {
417 VLOG_ERR_RL(&rl, "Could not parse nested tuple IP options. "
418 "Possibly incompatible Linux kernel version.");
419 }
420
421 return parsed;
422 }
423
424 static bool
425 nl_ct_parse_tuple_proto(struct nlattr *nla, struct ct_dpif_tuple *tuple)
426 {
427 static const struct nl_policy policy[] = {
428 [CTA_PROTO_NUM] = { .type = NL_A_U8, .optional = false },
429 [CTA_PROTO_SRC_PORT] = { .type = NL_A_BE16, .optional = true },
430 [CTA_PROTO_DST_PORT] = { .type = NL_A_BE16, .optional = true },
431 [CTA_PROTO_ICMP_ID] = { .type = NL_A_BE16, .optional = true },
432 [CTA_PROTO_ICMP_TYPE] = { .type = NL_A_U8, .optional = true },
433 [CTA_PROTO_ICMP_CODE] = { .type = NL_A_U8, .optional = true },
434 [CTA_PROTO_ICMPV6_ID] = { .type = NL_A_BE16, .optional = true },
435 [CTA_PROTO_ICMPV6_TYPE] = { .type = NL_A_U8, .optional = true },
436 [CTA_PROTO_ICMPV6_CODE] = { .type = NL_A_U8, .optional = true },
437 };
438 struct nlattr *attrs[ARRAY_SIZE(policy)];
439 bool parsed;
440
441 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
442
443 if (parsed) {
444 tuple->ip_proto = nl_attr_get_u8(attrs[CTA_PROTO_NUM]);
445
446 if (tuple->l3_type == AF_INET && tuple->ip_proto == IPPROTO_ICMP) {
447 if (!attrs[CTA_PROTO_ICMP_ID] || !attrs[CTA_PROTO_ICMP_TYPE]
448 || !attrs[CTA_PROTO_ICMP_CODE]) {
449 VLOG_ERR_RL(&rl, "Tuple ICMP data missing.");
450 return false;
451 }
452 tuple->icmp_id = nl_attr_get_be16(attrs[CTA_PROTO_ICMP_ID]);
453 tuple->icmp_type = nl_attr_get_u8(attrs[CTA_PROTO_ICMP_TYPE]);
454 tuple->icmp_code = nl_attr_get_u8(attrs[CTA_PROTO_ICMP_CODE]);
455 } else if (tuple->l3_type == AF_INET6 &&
456 tuple->ip_proto == IPPROTO_ICMPV6) {
457 if (!attrs[CTA_PROTO_ICMPV6_ID] || !attrs[CTA_PROTO_ICMPV6_TYPE]
458 || !attrs[CTA_PROTO_ICMPV6_CODE]) {
459 VLOG_ERR_RL(&rl, "Tuple ICMPv6 data missing.");
460 return false;
461 }
462 tuple->icmp_id = nl_attr_get_be16(attrs[CTA_PROTO_ICMPV6_ID]);
463 tuple->icmp_type = nl_attr_get_u8(attrs[CTA_PROTO_ICMPV6_TYPE]);
464 tuple->icmp_code = nl_attr_get_u8(attrs[CTA_PROTO_ICMPV6_CODE]);
465 } else if (attrs[CTA_PROTO_SRC_PORT] && attrs[CTA_PROTO_DST_PORT]) {
466 tuple->src_port = nl_attr_get_be16(attrs[CTA_PROTO_SRC_PORT]);
467 tuple->dst_port = nl_attr_get_be16(attrs[CTA_PROTO_DST_PORT]);
468 } else {
469 /* Unsupported IPPROTO and no ports, leave them zeroed.
470 * We have parsed the ip_proto, so this is not a failure. */
471 VLOG_DBG_RL(&rl, "Unsupported L4 protocol: %u.", tuple->ip_proto);
472 }
473 } else {
474 VLOG_ERR_RL(&rl, "Could not parse nested tuple protocol options. "
475 "Possibly incompatible Linux kernel version.");
476 }
477
478 return parsed;
479 }
480
481 static bool
482 nl_ct_parse_tuple(struct nlattr *nla, struct ct_dpif_tuple *tuple,
483 uint16_t l3_type)
484 {
485 static const struct nl_policy policy[] = {
486 [CTA_TUPLE_IP] = { .type = NL_A_NESTED, .optional = false },
487 [CTA_TUPLE_PROTO] = { .type = NL_A_NESTED, .optional = false },
488 };
489 struct nlattr *attrs[ARRAY_SIZE(policy)];
490 bool parsed;
491
492 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
493
494 memset(tuple, 0, sizeof *tuple);
495
496 if (parsed) {
497 tuple->l3_type = l3_type;
498
499 if (!nl_ct_parse_tuple_ip(attrs[CTA_TUPLE_IP], tuple)
500 || !nl_ct_parse_tuple_proto(attrs[CTA_TUPLE_PROTO], tuple)) {
501 struct ds ds;
502
503 ds_init(&ds);
504 ct_dpif_format_tuple(&ds, tuple);
505
506 VLOG_ERR_RL(&rl, "Failed to parse tuple: %s", ds_cstr(&ds));
507 ds_destroy(&ds);
508
509 memset(tuple, 0, sizeof *tuple);
510 return false;
511 }
512 } else {
513 VLOG_ERR_RL(&rl, "Could not parse nested tuple options. "
514 "Possibly incompatible Linux kernel version.");
515 }
516
517 return parsed;
518 }
519
520 /* Translate netlink TCP state to CT_DPIF_TCP state. */
521 static uint8_t
522 nl_ct_tcp_state_to_dpif(uint8_t state)
523 {
524 #ifdef _WIN32
525 /* Windows currently sends up CT_DPIF_TCP state */
526 return state;
527 #else
528 switch (state) {
529 case TCP_CONNTRACK_NONE:
530 return CT_DPIF_TCPS_CLOSED;
531 case TCP_CONNTRACK_SYN_SENT:
532 return CT_DPIF_TCPS_SYN_SENT;
533 case TCP_CONNTRACK_SYN_SENT2:
534 return CT_DPIF_TCPS_SYN_SENT;
535 case TCP_CONNTRACK_SYN_RECV:
536 return CT_DPIF_TCPS_SYN_RECV;
537 case TCP_CONNTRACK_ESTABLISHED:
538 return CT_DPIF_TCPS_ESTABLISHED;
539 case TCP_CONNTRACK_FIN_WAIT:
540 return CT_DPIF_TCPS_FIN_WAIT_1;
541 case TCP_CONNTRACK_CLOSE_WAIT:
542 return CT_DPIF_TCPS_CLOSE_WAIT;
543 case TCP_CONNTRACK_LAST_ACK:
544 return CT_DPIF_TCPS_LAST_ACK;
545 case TCP_CONNTRACK_TIME_WAIT:
546 return CT_DPIF_TCPS_TIME_WAIT;
547 case TCP_CONNTRACK_CLOSE:
548 return CT_DPIF_TCPS_CLOSING;
549 default:
550 return CT_DPIF_TCPS_CLOSED;
551 }
552 #endif
553 }
554
555 static uint8_t
556 ip_ct_tcp_flags_to_dpif(uint8_t flags)
557 {
558 #ifdef _WIN32
559 /* Windows currently sends up CT_DPIF_TCP flags */
560 return flags;
561 #else
562 uint8_t ret = 0;
563 #define CT_DPIF_TCP_FLAG(FLAG) \
564 ret |= (flags & IP_CT_TCP_FLAG_##FLAG) ? CT_DPIF_TCPF_##FLAG : 0;
565 CT_DPIF_TCP_FLAGS
566 #undef CT_DPIF_STATUS_FLAG
567 return ret;
568 #endif
569 }
570
571 static bool
572 nl_ct_parse_protoinfo_tcp(struct nlattr *nla,
573 struct ct_dpif_protoinfo *protoinfo)
574 {
575 static const struct nl_policy policy[] = {
576 [CTA_PROTOINFO_TCP_STATE] = { .type = NL_A_U8, .optional = false },
577 [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NL_A_U8,
578 .optional = false },
579 [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NL_A_U8,
580 .optional = false },
581 [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .type = NL_A_U16,
582 .optional = false },
583 [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .type = NL_A_U16,
584 .optional = false },
585 };
586 struct nlattr *attrs[ARRAY_SIZE(policy)];
587 bool parsed;
588
589 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
590
591 if (parsed) {
592 const struct nf_ct_tcp_flags *flags_orig, *flags_reply;
593 uint8_t state;
594 protoinfo->proto = IPPROTO_TCP;
595 state = nl_ct_tcp_state_to_dpif(
596 nl_attr_get_u8(attrs[CTA_PROTOINFO_TCP_STATE]));
597 /* The connection tracker keeps only one tcp state for the
598 * connection, but our structures store a separate state for
599 * each endpoint. Here we duplicate the state. */
600 protoinfo->tcp.state_orig = protoinfo->tcp.state_reply = state;
601 protoinfo->tcp.wscale_orig = nl_attr_get_u8(
602 attrs[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
603 protoinfo->tcp.wscale_reply = nl_attr_get_u8(
604 attrs[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
605 flags_orig =
606 nl_attr_get_unspec(attrs[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL],
607 sizeof *flags_orig);
608 protoinfo->tcp.flags_orig =
609 ip_ct_tcp_flags_to_dpif(flags_orig->flags);
610 flags_reply =
611 nl_attr_get_unspec(attrs[CTA_PROTOINFO_TCP_FLAGS_REPLY],
612 sizeof *flags_reply);
613 protoinfo->tcp.flags_reply =
614 ip_ct_tcp_flags_to_dpif(flags_reply->flags);
615 } else {
616 VLOG_ERR_RL(&rl, "Could not parse nested TCP protoinfo options. "
617 "Possibly incompatible Linux kernel version.");
618 }
619
620 return parsed;
621 }
622
623 static bool
624 nl_ct_parse_protoinfo(struct nlattr *nla, struct ct_dpif_protoinfo *protoinfo)
625 {
626 /* These are mutually exclusive. */
627 static const struct nl_policy policy[] = {
628 [CTA_PROTOINFO_TCP] = { .type = NL_A_NESTED, .optional = true },
629 [CTA_PROTOINFO_SCTP] = { .type = NL_A_NESTED, .optional = true },
630 };
631 struct nlattr *attrs[ARRAY_SIZE(policy)];
632 bool parsed;
633
634 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
635
636 memset(protoinfo, 0, sizeof *protoinfo);
637
638 if (parsed) {
639 if (attrs[CTA_PROTOINFO_TCP]) {
640 parsed = nl_ct_parse_protoinfo_tcp(attrs[CTA_PROTOINFO_TCP],
641 protoinfo);
642 } else if (attrs[CTA_PROTOINFO_SCTP]) {
643 VLOG_WARN_RL(&rl, "SCTP protoinfo not yet supported!");
644 } else {
645 VLOG_WARN_RL(&rl, "Empty protoinfo!");
646 }
647 } else {
648 VLOG_ERR_RL(&rl, "Could not parse nested protoinfo options. "
649 "Possibly incompatible Linux kernel version.");
650 }
651
652 return parsed;
653 }
654
655 static bool
656 nl_ct_parse_helper(struct nlattr *nla, struct ct_dpif_helper *helper)
657 {
658 static const struct nl_policy policy[] = {
659 [CTA_HELP_NAME] = { .type = NL_A_STRING, .optional = false },
660 };
661 struct nlattr *attrs[ARRAY_SIZE(policy)];
662 bool parsed;
663
664 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
665
666 memset(helper, 0, sizeof *helper);
667
668 if (parsed) {
669 helper->name = xstrdup(nl_attr_get_string(attrs[CTA_HELP_NAME]));
670 } else {
671 VLOG_ERR_RL(&rl, "Could not parse nested helper options. "
672 "Possibly incompatible Linux kernel version.");
673 }
674
675 return parsed;
676 }
677
678 /* Translate netlink entry status flags to CT_DPIF_TCP status flags. */
679 static uint32_t
680 ips_status_to_dpif_flags(uint32_t status)
681 {
682 uint32_t ret = 0;
683 #define CT_DPIF_STATUS_FLAG(FLAG) \
684 ret |= (status & IPS_##FLAG) ? CT_DPIF_STATUS_##FLAG : 0;
685 CT_DPIF_STATUS_FLAGS
686 #undef CT_DPIF_STATUS_FLAG
687 return ret;
688 }
689
690 static bool
691 nl_ct_parse_header_policy(struct ofpbuf *buf,
692 enum nl_ct_event_type *event_type,
693 uint8_t *nfgen_family,
694 struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)])
695 {
696 struct nlmsghdr *nlh;
697 struct nfgenmsg *nfm;
698 uint8_t type;
699
700 nlh = ofpbuf_at(buf, 0, NLMSG_HDRLEN);
701 nfm = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *nfm);
702 if (!nfm) {
703 VLOG_ERR_RL(&rl, "Received bad nfnl message (no nfgenmsg).");
704 return false;
705 }
706 if (NFNL_SUBSYS_ID(nlh->nlmsg_type) != NFNL_SUBSYS_CTNETLINK) {
707 VLOG_ERR_RL(&rl, "Received non-conntrack message (subsystem: %u).",
708 NFNL_SUBSYS_ID(nlh->nlmsg_type));
709 return false;
710 }
711 if (nfm->version != NFNETLINK_V0) {
712 VLOG_ERR_RL(&rl, "Received unsupported nfnetlink version (%u).",
713 NFNL_MSG_TYPE(nfm->version));
714 return false;
715 }
716
717 if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof *nfm,
718 nfnlgrp_conntrack_policy, attrs,
719 ARRAY_SIZE(nfnlgrp_conntrack_policy))) {
720 VLOG_ERR_RL(&rl, "Received bad nfnl message (policy).");
721 return false;
722 }
723
724 type = NFNL_MSG_TYPE(nlh->nlmsg_type);
725 *nfgen_family = nfm->nfgen_family;
726
727 switch (type) {
728 case IPCTNL_MSG_CT_NEW:
729 *event_type = nlh->nlmsg_flags & NLM_F_CREATE
730 ? NL_CT_EVENT_NEW : NL_CT_EVENT_UPDATE;
731 break;
732 case IPCTNL_MSG_CT_DELETE:
733 *event_type = NL_CT_EVENT_DELETE;
734 break;
735 default:
736 VLOG_ERR_RL(&rl, "Can't parse conntrack event type.");
737 return false;
738 }
739
740 return true;
741 }
742
743 static bool
744 nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry *entry,
745 struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)],
746 uint8_t nfgen_family)
747 {
748 if (!nl_ct_parse_tuple(attrs[CTA_TUPLE_ORIG], &entry->tuple_orig,
749 nfgen_family)) {
750 return false;
751 }
752 if (!nl_ct_parse_tuple(attrs[CTA_TUPLE_REPLY], &entry->tuple_reply,
753 nfgen_family)) {
754 return false;
755 }
756 if (attrs[CTA_COUNTERS_ORIG] &&
757 !nl_ct_parse_counters(attrs[CTA_COUNTERS_ORIG],
758 &entry->counters_orig)) {
759 return false;
760 }
761 if (attrs[CTA_COUNTERS_REPLY] &&
762 !nl_ct_parse_counters(attrs[CTA_COUNTERS_REPLY],
763 &entry->counters_reply)) {
764 return false;
765 }
766 if (attrs[CTA_TIMESTAMP] &&
767 !nl_ct_parse_timestamp(attrs[CTA_TIMESTAMP], &entry->timestamp)) {
768 return false;
769 }
770 if (attrs[CTA_ID]) {
771 entry->id = ntohl(nl_attr_get_be32(attrs[CTA_ID]));
772 }
773 if (attrs[CTA_ZONE]) {
774 entry->zone = ntohs(nl_attr_get_be16(attrs[CTA_ZONE]));
775 }
776 if (attrs[CTA_STATUS]) {
777 entry->status = ips_status_to_dpif_flags(
778 ntohl(nl_attr_get_be32(attrs[CTA_STATUS])));
779 }
780 if (attrs[CTA_TIMEOUT]) {
781 entry->timeout = ntohl(nl_attr_get_be32(attrs[CTA_TIMEOUT]));
782 }
783 if (attrs[CTA_MARK]) {
784 entry->mark = ntohl(nl_attr_get_be32(attrs[CTA_MARK]));
785 }
786 if (attrs[CTA_LABELS]) {
787 entry->have_labels = true;
788 memcpy(&entry->labels, nl_attr_get(attrs[CTA_LABELS]),
789 MIN(sizeof entry->labels, nl_attr_get_size(attrs[CTA_LABELS])));
790 }
791 if (attrs[CTA_PROTOINFO] &&
792 !nl_ct_parse_protoinfo(attrs[CTA_PROTOINFO], &entry->protoinfo)) {
793 return false;
794 }
795 if (attrs[CTA_HELP] &&
796 !nl_ct_parse_helper(attrs[CTA_HELP], &entry->helper)) {
797 return false;
798 }
799 if (attrs[CTA_TUPLE_MASTER] &&
800 !nl_ct_parse_tuple(attrs[CTA_TUPLE_MASTER], &entry->tuple_master,
801 nfgen_family)) {
802 return false;
803 }
804 return true;
805 }
806
807 bool
808 nl_ct_parse_entry(struct ofpbuf *buf, struct ct_dpif_entry *entry,
809 enum nl_ct_event_type *event_type)
810 {
811 struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)];
812 uint8_t nfgen_family;
813
814 memset(entry, 0, sizeof *entry);
815 if (!nl_ct_parse_header_policy(buf, event_type, &nfgen_family, attrs)) {
816 return false;
817 };
818
819 if (!nl_ct_attrs_to_ct_dpif_entry(entry, attrs, nfgen_family)) {
820 ct_dpif_entry_uninit(entry);
821 memset(entry, 0, sizeof *entry);
822 return false;
823 }
824
825 return true;
826 }
827
828 /* NetFilter utility functions. */
829
830 /* Puts a nlmsghdr and nfgenmsg at the beginning of 'msg', which must be
831 * initially empty. 'expected_payload' should be an estimate of the number of
832 * payload bytes to be supplied; if the size of the payload is unknown a value
833 * of 0 is acceptable.
834 *
835 * Non-zero 'family' is the address family of items to get (e.g. AF_INET).
836 *
837 * 'flags' is a bit-mask that indicates what kind of request is being made. It
838 * is often NLM_F_REQUEST indicating that a request is being made, commonly
839 * or'd with NLM_F_ACK to request an acknowledgement. NLM_F_DUMP flag reguests
840 * a dump of the table.
841 *
842 * 'subsystem' is a netfilter subsystem id, e.g., NFNL_SUBSYS_CTNETLINK.
843 *
844 * 'cmd' is an enumerated value specific to the 'subsystem'.
845 *
846 * Sets the new nlmsghdr's nlmsg_pid field to 0 for now. nl_sock_send() will
847 * fill it in just before sending the message.
848 *
849 * nl_msg_put_nlmsghdr() should be used to compose Netlink messages that are
850 * not NetFilter Netlink messages. */
851 static void
852 nl_msg_put_nfgenmsg(struct ofpbuf *msg, size_t expected_payload,
853 int family, uint8_t subsystem, uint8_t cmd,
854 uint32_t flags)
855 {
856 struct nfgenmsg *nfm;
857
858 nl_msg_put_nlmsghdr(msg, sizeof *nfm + expected_payload,
859 subsystem << 8 | cmd, flags);
860 ovs_assert(msg->size == NLMSG_HDRLEN);
861 nfm = nl_msg_put_uninit(msg, sizeof *nfm);
862 nfm->nfgen_family = family;
863 nfm->version = NFNETLINK_V0;
864 nfm->res_id = 0;
865 #ifdef _WIN32
866 /* nfgenmsg contains ovsHdr padding in windows */
867 nfm->ovsHdr.dp_ifindex = 0;
868 #endif
869 }