]> git.proxmox.com Git - mirror_ovs.git/blame - lib/netlink-conntrack.c
netdev-offload-tc: Use single 'once' variable for probing tc features
[mirror_ovs.git] / lib / netlink-conntrack.c
CommitLineData
6830a0c0
DDP
1/*
2 * Copyright (c) 2015 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
18
19#include "netlink-conntrack.h"
20
817a7657 21#include <errno.h>
6830a0c0
DDP
22#include <linux/netfilter/nfnetlink.h>
23#include <linux/netfilter/nfnetlink_conntrack.h>
24#include <linux/netfilter/nf_conntrack_common.h>
25#include <linux/netfilter/nf_conntrack_tcp.h>
26#include <linux/netfilter/nf_conntrack_ftp.h>
27#include <linux/netfilter/nf_conntrack_sctp.h>
28
29#include "byte-order.h"
30#include "compiler.h"
3e8a2ad1 31#include "openvswitch/dynamic-string.h"
6830a0c0
DDP
32#include "netlink.h"
33#include "netlink-socket.h"
64c96779 34#include "openvswitch/ofpbuf.h"
6830a0c0 35#include "openvswitch/vlog.h"
fd016ae3 36#include "openvswitch/poll-loop.h"
6830a0c0
DDP
37#include "timeval.h"
38#include "unixctl.h"
39#include "util.h"
40
41VLOG_DEFINE_THIS_MODULE(netlink_conntrack);
42static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
43
44/* This module works only if conntrack modules and features are enabled in the
45 * Linux kernel. This can be done from a root shell like this:
46 *
47 * $ modprobe ip_conntrack
48 * $ sysctl -w net.netfilter.nf_conntrack_acct=1
49 * $ sysctl -w net.netfilter.nf_conntrack_timestamp=1
50 *
51 * Also, if testing conntrack label feature without conntrack-aware OVS kernel
52 * module, there must be a connlabel rule in iptables for space to be reserved
53 * for the labels (see kernel source connlabel_mt_check()). Such a rule can be
54 * inserted from a root shell like this:
55 *
56 * $ iptables -A INPUT -m conntrack -m connlabel \
57 * --ctstate NEW,ESTABLISHED,RELATED --label 127 -j ACCEPT
58 */
59
60/* Some attributes were introduced in later kernels: with these definitions
61 * we should be able to compile userspace against Linux 2.6.32+. */
62
63#define CTA_ZONE (CTA_SECMARK + 1)
64#define CTA_SECCTX (CTA_SECMARK + 2)
65#define CTA_TIMESTAMP (CTA_SECMARK + 3)
66#define CTA_MARK_MASK (CTA_SECMARK + 4)
67#define CTA_LABELS (CTA_SECMARK + 5)
68#define CTA_LABELS_MASK (CTA_SECMARK + 6)
69
70#define CTA_TIMESTAMP_START 1
71#define CTA_TIMESTAMP_STOP 2
72
73#define IPS_TEMPLATE_BIT 11
74#define IPS_TEMPLATE (1 << IPS_TEMPLATE_BIT)
75
76#define IPS_UNTRACKED_BIT 12
77#define IPS_UNTRACKED (1 << IPS_UNTRACKED_BIT)
78
79static const struct nl_policy nfnlgrp_conntrack_policy[] = {
80 [CTA_TUPLE_ORIG] = { .type = NL_A_NESTED, .optional = false },
81 [CTA_TUPLE_REPLY] = { .type = NL_A_NESTED, .optional = false },
82 [CTA_ZONE] = { .type = NL_A_BE16, .optional = true },
83 [CTA_STATUS] = { .type = NL_A_BE32, .optional = false },
84 [CTA_TIMESTAMP] = { .type = NL_A_NESTED, .optional = true },
85 [CTA_TIMEOUT] = { .type = NL_A_BE32, .optional = true },
86 [CTA_COUNTERS_ORIG] = { .type = NL_A_NESTED, .optional = true },
87 [CTA_COUNTERS_REPLY] = { .type = NL_A_NESTED, .optional = true },
88 [CTA_PROTOINFO] = { .type = NL_A_NESTED, .optional = true },
89 [CTA_HELP] = { .type = NL_A_NESTED, .optional = true },
90 [CTA_MARK] = { .type = NL_A_BE32, .optional = true },
91 [CTA_SECCTX] = { .type = NL_A_NESTED, .optional = true },
92 [CTA_ID] = { .type = NL_A_BE32, .optional = false },
93 [CTA_USE] = { .type = NL_A_BE32, .optional = true },
94 [CTA_TUPLE_MASTER] = { .type = NL_A_NESTED, .optional = true },
95 [CTA_NAT_SEQ_ADJ_ORIG] = { .type = NL_A_NESTED, .optional = true },
96 [CTA_NAT_SEQ_ADJ_REPLY] = { .type = NL_A_NESTED, .optional = true },
97 [CTA_LABELS] = { .type = NL_A_UNSPEC, .optional = true },
98 /* CTA_NAT_SRC, CTA_NAT_DST, CTA_TIMESTAMP, CTA_MARK_MASK, and
99 * CTA_LABELS_MASK are not received from kernel. */
100};
101
102/* Declarations for conntrack netlink dumping. */
103static void nl_msg_put_nfgenmsg(struct ofpbuf *msg, size_t expected_payload,
104 int family, uint8_t subsystem, uint8_t cmd,
105 uint32_t flags);
106
107static bool nl_ct_parse_header_policy(struct ofpbuf *buf,
108 enum nl_ct_event_type *event_type,
109 uint8_t *nfgen_family,
110 struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)]);
111
112static bool nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry *entry,
113 struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)],
114 uint8_t nfgen_family);
817a7657
YHW
115static bool nl_ct_put_ct_tuple(struct ofpbuf *buf,
116 const struct ct_dpif_tuple *tuple, enum ctattr_type type);
6830a0c0
DDP
117
118struct nl_ct_dump_state {
119 struct nl_dump dump;
120 struct ofpbuf buf;
121 bool filter_zone;
122 uint16_t zone;
123};
e0467f6d 124
6830a0c0
DDP
125/* Conntrack netlink dumping. */
126
127/* Initialize a conntrack netlink dump. */
128int
ded30c74
FA
129nl_ct_dump_start(struct nl_ct_dump_state **statep, const uint16_t *zone,
130 int *ptot_bkts)
6830a0c0
DDP
131{
132 struct nl_ct_dump_state *state;
133
134 *statep = state = xzalloc(sizeof *state);
135 ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
136
137 if (zone) {
138 state->filter_zone = true;
139 state->zone = *zone;
140 }
141
142 nl_msg_put_nfgenmsg(&state->buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK,
143 IPCTNL_MSG_CT_GET, NLM_F_REQUEST);
144 nl_dump_start(&state->dump, NETLINK_NETFILTER, &state->buf);
145 ofpbuf_clear(&state->buf);
146
ded30c74
FA
147 /* Buckets to store connections are not used. */
148 *ptot_bkts = -1;
149
6830a0c0
DDP
150 return 0;
151}
152
153/* Receive the next 'entry' from the conntrack netlink dump with 'state'.
154 * Returns 'EOF' when no more entries are available, 0 otherwise. 'entry' may
155 * be uninitilized memory on entry, and must be uninitialized with
156 * ct_dpif_entry_uninit() afterwards by the caller. In case the same 'entry' is
157 * passed to this function again, the entry must also be uninitialized before
158 * the next call. */
159int
160nl_ct_dump_next(struct nl_ct_dump_state *state, struct ct_dpif_entry *entry)
161{
162 struct ofpbuf buf;
163
164 memset(entry, 0, sizeof *entry);
165 for (;;) {
166 struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)];
167 enum nl_ct_event_type type;
168 uint8_t nfgen_family;
169
170 if (!nl_dump_next(&state->dump, &buf, &state->buf)) {
171 return EOF;
172 }
173
174 if (!nl_ct_parse_header_policy(&buf, &type, &nfgen_family, attrs)) {
175 continue;
176 };
177
178 if (state->filter_zone) {
179 uint16_t entry_zone = attrs[CTA_ZONE]
180 ? ntohs(nl_attr_get_be16(attrs[CTA_ZONE]))
181 : 0;
182 if (entry_zone != state->zone) {
183 continue;
184 }
185 }
186
187 if (nl_ct_attrs_to_ct_dpif_entry(entry, attrs, nfgen_family)) {
188 break;
189 }
190
191 ct_dpif_entry_uninit(entry);
192 memset(entry, 0, sizeof *entry);
193 /* Ignore the failed entry and get the next one. */
194 }
195
196 ofpbuf_uninit(&buf);
197 return 0;
198}
199
200/* End a conntrack netlink dump. */
201int
202nl_ct_dump_done(struct nl_ct_dump_state *state)
203{
204 int error = nl_dump_done(&state->dump);
205
206 ofpbuf_uninit(&state->buf);
207 free(state);
208 return error;
209}
e0467f6d 210
6830a0c0
DDP
211/* Format conntrack event 'entry' of 'type' to 'ds'. */
212void
213nl_ct_format_event_entry(const struct ct_dpif_entry *entry,
214 enum nl_ct_event_type type, struct ds *ds,
215 bool verbose, bool print_stats)
216{
217 ds_put_format(ds, "%s ",
218 type == NL_CT_EVENT_NEW ? "NEW"
219 : type == NL_CT_EVENT_UPDATE ? "UPDATE"
220 : type == NL_CT_EVENT_DELETE ? "DELETE"
221 : "UNKNOWN");
222 ct_dpif_format_entry(entry, ds, verbose, print_stats);
223}
224
225int
226nl_ct_flush(void)
227{
228 struct ofpbuf buf;
229 int err;
230
231 ofpbuf_init(&buf, NL_DUMP_BUFSIZE);
232
233 nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK,
234 IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST);
235
236 err = nl_transact(NETLINK_NETFILTER, &buf, NULL);
237 ofpbuf_uninit(&buf);
238
239 /* Expectations are flushed automatically, because they do not
f51cf36d 240 * have a parent connection anymore */
6830a0c0
DDP
241
242 return err;
243}
244
817a7657
YHW
245int
246nl_ct_flush_tuple(const struct ct_dpif_tuple *tuple, uint16_t zone)
247{
248 int err;
249 struct ofpbuf buf;
250
251 ofpbuf_init(&buf, NL_DUMP_BUFSIZE);
252 nl_msg_put_nfgenmsg(&buf, 0, tuple->l3_type, NFNL_SUBSYS_CTNETLINK,
253 IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST);
254
255 nl_msg_put_be16(&buf, CTA_ZONE, htons(zone));
256 if (!nl_ct_put_ct_tuple(&buf, tuple, CTA_TUPLE_ORIG)) {
257 err = EOPNOTSUPP;
258 goto out;
259 }
260 err = nl_transact(NETLINK_NETFILTER, &buf, NULL);
261out:
262 ofpbuf_uninit(&buf);
263 return err;
264}
265
e0467f6d
SV
266#ifdef _WIN32
267int
268nl_ct_flush_zone(uint16_t flush_zone)
269{
270 /* Windows can flush a specific zone */
271 struct ofpbuf buf;
272 int err;
273
274 ofpbuf_init(&buf, NL_DUMP_BUFSIZE);
275
276 nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK,
277 IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST);
159cc1f4 278 nl_msg_put_be16(&buf, CTA_ZONE, htons(flush_zone));
e0467f6d
SV
279
280 err = nl_transact(NETLINK_NETFILTER, &buf, NULL);
281 ofpbuf_uninit(&buf);
282
283 return err;
284}
285#else
6830a0c0
DDP
286int
287nl_ct_flush_zone(uint16_t flush_zone)
288{
289 /* Apparently, there's no netlink interface to flush a specific zone.
290 * This code dumps every connection, checks the zone and eventually
291 * delete the entry.
292 *
293 * This is race-prone, but it is better than using shell scripts. */
294
295 struct nl_dump dump;
296 struct ofpbuf buf, reply, delete;
297
298 ofpbuf_init(&buf, NL_DUMP_BUFSIZE);
299 ofpbuf_init(&delete, NL_DUMP_BUFSIZE);
300
301 nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK,
302 IPCTNL_MSG_CT_GET, NLM_F_REQUEST);
303 nl_dump_start(&dump, NETLINK_NETFILTER, &buf);
304 ofpbuf_clear(&buf);
305
306 for (;;) {
307 struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)];
308 enum nl_ct_event_type event_type;
309 uint8_t nfgen_family;
310 uint16_t zone = 0;
311
312 if (!nl_dump_next(&dump, &reply, &buf)) {
313 break;
314 }
315
316 if (!nl_ct_parse_header_policy(&reply, &event_type, &nfgen_family,
317 attrs)) {
318 continue;
319 };
320
321 if (attrs[CTA_ZONE]) {
322 zone = ntohs(nl_attr_get_be16(attrs[CTA_ZONE]));
323 }
324
325 if (zone != flush_zone) {
326 /* The entry is not in the zone we're flushing. */
327 continue;
328 }
329 nl_msg_put_nfgenmsg(&delete, 0, nfgen_family, NFNL_SUBSYS_CTNETLINK,
330 IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST);
331
332 nl_msg_put_be16(&delete, CTA_ZONE, htons(zone));
333 nl_msg_put_unspec(&delete, CTA_TUPLE_ORIG, attrs[CTA_TUPLE_ORIG] + 1,
334 attrs[CTA_TUPLE_ORIG]->nla_len - NLA_HDRLEN);
335 nl_msg_put_unspec(&delete, CTA_ID, attrs[CTA_ID] + 1,
336 attrs[CTA_ID]->nla_len - NLA_HDRLEN);
337 nl_transact(NETLINK_NETFILTER, &delete, NULL);
338 ofpbuf_clear(&delete);
339 }
340
341 nl_dump_done(&dump);
342
343 ofpbuf_uninit(&delete);
344 ofpbuf_uninit(&buf);
345
346 /* Expectations are flushed automatically, because they do not
f51cf36d 347 * have a parent connection anymore */
6830a0c0
DDP
348 return 0;
349}
e0467f6d
SV
350#endif
351
6830a0c0
DDP
352/* Conntrack netlink parsing. */
353
354static bool
355nl_ct_parse_counters(struct nlattr *nla, struct ct_dpif_counters *counters)
356{
357 static const struct nl_policy policy[] = {
358 [CTA_COUNTERS_PACKETS] = { .type = NL_A_BE64, .optional = false },
359 [CTA_COUNTERS_BYTES] = { .type = NL_A_BE64, .optional = false },
360 };
361 struct nlattr *attrs[ARRAY_SIZE(policy)];
362 bool parsed;
363
364 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
365
366 if (parsed) {
367 counters->packets
368 = ntohll(nl_attr_get_be64(attrs[CTA_COUNTERS_PACKETS]));
369 counters->bytes = ntohll(nl_attr_get_be64(attrs[CTA_COUNTERS_BYTES]));
370 } else {
371 VLOG_ERR_RL(&rl, "Could not parse nested counters. "
372 "Possibly incompatible Linux kernel version.");
373 }
374
375 return parsed;
376}
377
378static bool
379nl_ct_parse_timestamp(struct nlattr *nla, struct ct_dpif_timestamp *timestamp)
380{
381 static const struct nl_policy policy[] = {
382 [CTA_TIMESTAMP_START] = { .type = NL_A_BE64, .optional = false },
383 [CTA_TIMESTAMP_STOP] = { .type = NL_A_BE64, .optional = true },
384 };
385 struct nlattr *attrs[ARRAY_SIZE(policy)];
386 bool parsed;
387
388 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
389
390 if (parsed) {
391 timestamp->start
392 = ntohll(nl_attr_get_be64(attrs[CTA_TIMESTAMP_START]));
393 if (attrs[CTA_TIMESTAMP_STOP]) {
394 timestamp->stop
395 = ntohll(nl_attr_get_be64(attrs[CTA_TIMESTAMP_STOP]));
396 }
397 } else {
398 VLOG_ERR_RL(&rl, "Could not parse nested timestamp. "
399 "Possibly incompatible Linux kernel version.");
400 }
401
402 return parsed;
403}
404
405static bool
406nl_ct_parse_tuple_ip(struct nlattr *nla, struct ct_dpif_tuple *tuple)
407{
408 static const struct nl_policy policy[] = {
409 [CTA_IP_V4_SRC] = { .type = NL_A_BE32, .optional = true },
410 [CTA_IP_V4_DST] = { .type = NL_A_BE32, .optional = true },
411 [CTA_IP_V6_SRC] = { NL_POLICY_FOR(struct in6_addr), .optional = true },
412 [CTA_IP_V6_DST] = { NL_POLICY_FOR(struct in6_addr), .optional = true },
413 };
414 struct nlattr *attrs[ARRAY_SIZE(policy)];
415 bool parsed;
416
417 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
418
419 if (parsed) {
420 if (tuple->l3_type == AF_INET) {
421 if (attrs[CTA_IP_V4_SRC]) {
422 tuple->src.ip = nl_attr_get_be32(attrs[CTA_IP_V4_SRC]);
423 }
424 if (attrs[CTA_IP_V4_DST]) {
425 tuple->dst.ip = nl_attr_get_be32(attrs[CTA_IP_V4_DST]);
426 }
427 } else if (tuple->l3_type == AF_INET6) {
428 if (attrs[CTA_IP_V6_SRC]) {
429 memcpy(&tuple->src.in6, nl_attr_get(attrs[CTA_IP_V6_SRC]),
430 sizeof tuple->src.in6);
431 }
432 if (attrs[CTA_IP_V6_DST]) {
433 memcpy(&tuple->dst.in6, nl_attr_get(attrs[CTA_IP_V6_DST]),
434 sizeof tuple->dst.in6);
435 }
436 } else {
437 VLOG_WARN_RL(&rl, "Unsupported IP protocol: %u.", tuple->l3_type);
438 return false;
439 }
440 } else {
441 VLOG_ERR_RL(&rl, "Could not parse nested tuple IP options. "
442 "Possibly incompatible Linux kernel version.");
443 }
444
445 return parsed;
446}
447
448static bool
449nl_ct_parse_tuple_proto(struct nlattr *nla, struct ct_dpif_tuple *tuple)
450{
451 static const struct nl_policy policy[] = {
452 [CTA_PROTO_NUM] = { .type = NL_A_U8, .optional = false },
453 [CTA_PROTO_SRC_PORT] = { .type = NL_A_BE16, .optional = true },
454 [CTA_PROTO_DST_PORT] = { .type = NL_A_BE16, .optional = true },
455 [CTA_PROTO_ICMP_ID] = { .type = NL_A_BE16, .optional = true },
456 [CTA_PROTO_ICMP_TYPE] = { .type = NL_A_U8, .optional = true },
457 [CTA_PROTO_ICMP_CODE] = { .type = NL_A_U8, .optional = true },
458 [CTA_PROTO_ICMPV6_ID] = { .type = NL_A_BE16, .optional = true },
459 [CTA_PROTO_ICMPV6_TYPE] = { .type = NL_A_U8, .optional = true },
460 [CTA_PROTO_ICMPV6_CODE] = { .type = NL_A_U8, .optional = true },
461 };
462 struct nlattr *attrs[ARRAY_SIZE(policy)];
463 bool parsed;
464
465 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
466
467 if (parsed) {
468 tuple->ip_proto = nl_attr_get_u8(attrs[CTA_PROTO_NUM]);
469
470 if (tuple->l3_type == AF_INET && tuple->ip_proto == IPPROTO_ICMP) {
471 if (!attrs[CTA_PROTO_ICMP_ID] || !attrs[CTA_PROTO_ICMP_TYPE]
472 || !attrs[CTA_PROTO_ICMP_CODE]) {
473 VLOG_ERR_RL(&rl, "Tuple ICMP data missing.");
474 return false;
475 }
476 tuple->icmp_id = nl_attr_get_be16(attrs[CTA_PROTO_ICMP_ID]);
477 tuple->icmp_type = nl_attr_get_u8(attrs[CTA_PROTO_ICMP_TYPE]);
478 tuple->icmp_code = nl_attr_get_u8(attrs[CTA_PROTO_ICMP_CODE]);
479 } else if (tuple->l3_type == AF_INET6 &&
480 tuple->ip_proto == IPPROTO_ICMPV6) {
481 if (!attrs[CTA_PROTO_ICMPV6_ID] || !attrs[CTA_PROTO_ICMPV6_TYPE]
482 || !attrs[CTA_PROTO_ICMPV6_CODE]) {
483 VLOG_ERR_RL(&rl, "Tuple ICMPv6 data missing.");
484 return false;
485 }
486 tuple->icmp_id = nl_attr_get_be16(attrs[CTA_PROTO_ICMPV6_ID]);
487 tuple->icmp_type = nl_attr_get_u8(attrs[CTA_PROTO_ICMPV6_TYPE]);
488 tuple->icmp_code = nl_attr_get_u8(attrs[CTA_PROTO_ICMPV6_CODE]);
489 } else if (attrs[CTA_PROTO_SRC_PORT] && attrs[CTA_PROTO_DST_PORT]) {
490 tuple->src_port = nl_attr_get_be16(attrs[CTA_PROTO_SRC_PORT]);
491 tuple->dst_port = nl_attr_get_be16(attrs[CTA_PROTO_DST_PORT]);
492 } else {
493 /* Unsupported IPPROTO and no ports, leave them zeroed.
f55c7595
JR
494 * We have parsed the ip_proto, so this is not a failure. */
495 VLOG_DBG_RL(&rl, "Unsupported L4 protocol: %u.", tuple->ip_proto);
6830a0c0
DDP
496 }
497 } else {
498 VLOG_ERR_RL(&rl, "Could not parse nested tuple protocol options. "
499 "Possibly incompatible Linux kernel version.");
500 }
501
502 return parsed;
503}
504
505static bool
506nl_ct_parse_tuple(struct nlattr *nla, struct ct_dpif_tuple *tuple,
507 uint16_t l3_type)
508{
509 static const struct nl_policy policy[] = {
510 [CTA_TUPLE_IP] = { .type = NL_A_NESTED, .optional = false },
511 [CTA_TUPLE_PROTO] = { .type = NL_A_NESTED, .optional = false },
512 };
513 struct nlattr *attrs[ARRAY_SIZE(policy)];
514 bool parsed;
515
516 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
517
518 memset(tuple, 0, sizeof *tuple);
519
520 if (parsed) {
521 tuple->l3_type = l3_type;
522
523 if (!nl_ct_parse_tuple_ip(attrs[CTA_TUPLE_IP], tuple)
524 || !nl_ct_parse_tuple_proto(attrs[CTA_TUPLE_PROTO], tuple)) {
525 struct ds ds;
526
527 ds_init(&ds);
b269a122 528 ct_dpif_format_tuple(&ds, tuple);
6830a0c0
DDP
529
530 VLOG_ERR_RL(&rl, "Failed to parse tuple: %s", ds_cstr(&ds));
531 ds_destroy(&ds);
532
533 memset(tuple, 0, sizeof *tuple);
534 return false;
535 }
536 } else {
537 VLOG_ERR_RL(&rl, "Could not parse nested tuple options. "
538 "Possibly incompatible Linux kernel version.");
539 }
540
541 return parsed;
542}
543
817a7657
YHW
544static bool
545nl_ct_put_tuple_ip(struct ofpbuf *buf, const struct ct_dpif_tuple *tuple)
546{
547 size_t offset = nl_msg_start_nested(buf, CTA_TUPLE_IP);
548
549 if (tuple->l3_type == AF_INET) {
550 nl_msg_put_be32(buf, CTA_IP_V4_SRC, tuple->src.ip);
551 nl_msg_put_be32(buf, CTA_IP_V4_DST, tuple->dst.ip);
552 } else if (tuple->l3_type == AF_INET6) {
553 nl_msg_put_in6_addr(buf, CTA_IP_V6_SRC, &tuple->src.in6);
554 nl_msg_put_in6_addr(buf, CTA_IP_V6_DST, &tuple->dst.in6);
555 } else {
556 VLOG_WARN_RL(&rl, "Unsupported IP protocol: %"PRIu16".",
557 tuple->l3_type);
558 return false;
559 }
560
561 nl_msg_end_nested(buf, offset);
562 return true;
563}
564
565static bool
566nl_ct_put_tuple_proto(struct ofpbuf *buf, const struct ct_dpif_tuple *tuple)
567{
568 size_t offset = nl_msg_start_nested(buf, CTA_TUPLE_PROTO);
569
570 nl_msg_put_u8(buf, CTA_PROTO_NUM, tuple->ip_proto);
571
572 if (tuple->l3_type == AF_INET && tuple->ip_proto == IPPROTO_ICMP) {
573 nl_msg_put_be16(buf, CTA_PROTO_ICMP_ID, tuple->icmp_id);
574 nl_msg_put_u8(buf, CTA_PROTO_ICMP_TYPE, tuple->icmp_type);
575 nl_msg_put_u8(buf, CTA_PROTO_ICMP_CODE, tuple->icmp_code);
576 } else if (tuple->l3_type == AF_INET6 &&
577 tuple->ip_proto == IPPROTO_ICMPV6) {
578 nl_msg_put_be16(buf, CTA_PROTO_ICMPV6_ID, tuple->icmp_id);
579 nl_msg_put_u8(buf, CTA_PROTO_ICMPV6_TYPE, tuple->icmp_type);
580 nl_msg_put_u8(buf, CTA_PROTO_ICMPV6_CODE, tuple->icmp_code);
581 } else if (tuple->ip_proto == IPPROTO_TCP ||
582 tuple->ip_proto == IPPROTO_UDP) {
583 nl_msg_put_be16(buf, CTA_PROTO_SRC_PORT, tuple->src_port);
584 nl_msg_put_be16(buf, CTA_PROTO_DST_PORT, tuple->dst_port);
585 } else {
586 VLOG_WARN_RL(&rl, "Unsupported L4 protocol: %"PRIu8".",
587 tuple->ip_proto);
588 return false;
589 }
590
591 nl_msg_end_nested(buf, offset);
592 return true;
593}
594
595static bool
596nl_ct_put_ct_tuple(struct ofpbuf *buf, const struct ct_dpif_tuple *tuple,
597 enum ctattr_type type)
598{
599 if (type != CTA_TUPLE_ORIG && type != CTA_TUPLE_REPLY &&
600 type != CTA_TUPLE_MASTER) {
601 return false;
602 }
603
604 size_t offset = nl_msg_start_nested(buf, type);
605
606 if (!nl_ct_put_tuple_ip(buf, tuple)) {
607 return false;
608 }
609 if (!nl_ct_put_tuple_proto(buf, tuple)) {
610 return false;
611 }
612
613 nl_msg_end_nested(buf, offset);
614 return true;
615}
616
6830a0c0
DDP
617/* Translate netlink TCP state to CT_DPIF_TCP state. */
618static uint8_t
619nl_ct_tcp_state_to_dpif(uint8_t state)
620{
f5d29e92
SV
621#ifdef _WIN32
622 /* Windows currently sends up CT_DPIF_TCP state */
623 return state;
624#else
6830a0c0
DDP
625 switch (state) {
626 case TCP_CONNTRACK_NONE:
627 return CT_DPIF_TCPS_CLOSED;
628 case TCP_CONNTRACK_SYN_SENT:
629 return CT_DPIF_TCPS_SYN_SENT;
630 case TCP_CONNTRACK_SYN_SENT2:
631 return CT_DPIF_TCPS_SYN_SENT;
632 case TCP_CONNTRACK_SYN_RECV:
633 return CT_DPIF_TCPS_SYN_RECV;
634 case TCP_CONNTRACK_ESTABLISHED:
635 return CT_DPIF_TCPS_ESTABLISHED;
636 case TCP_CONNTRACK_FIN_WAIT:
637 return CT_DPIF_TCPS_FIN_WAIT_1;
638 case TCP_CONNTRACK_CLOSE_WAIT:
639 return CT_DPIF_TCPS_CLOSE_WAIT;
640 case TCP_CONNTRACK_LAST_ACK:
641 return CT_DPIF_TCPS_LAST_ACK;
642 case TCP_CONNTRACK_TIME_WAIT:
643 return CT_DPIF_TCPS_TIME_WAIT;
644 case TCP_CONNTRACK_CLOSE:
645 return CT_DPIF_TCPS_CLOSING;
646 default:
647 return CT_DPIF_TCPS_CLOSED;
648 }
f5d29e92 649#endif
6830a0c0
DDP
650}
651
652static uint8_t
653ip_ct_tcp_flags_to_dpif(uint8_t flags)
654{
f5d29e92
SV
655#ifdef _WIN32
656 /* Windows currently sends up CT_DPIF_TCP flags */
657 return flags;
658#else
6830a0c0
DDP
659 uint8_t ret = 0;
660#define CT_DPIF_TCP_FLAG(FLAG) \
661 ret |= (flags & IP_CT_TCP_FLAG_##FLAG) ? CT_DPIF_TCPF_##FLAG : 0;
662 CT_DPIF_TCP_FLAGS
af7523e8 663#undef CT_DPIF_TCP_FLAG
6830a0c0 664 return ret;
f5d29e92 665#endif
6830a0c0
DDP
666}
667
668static bool
669nl_ct_parse_protoinfo_tcp(struct nlattr *nla,
670 struct ct_dpif_protoinfo *protoinfo)
671{
672 static const struct nl_policy policy[] = {
673 [CTA_PROTOINFO_TCP_STATE] = { .type = NL_A_U8, .optional = false },
674 [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NL_A_U8,
675 .optional = false },
676 [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NL_A_U8,
677 .optional = false },
678 [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .type = NL_A_U16,
679 .optional = false },
680 [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .type = NL_A_U16,
681 .optional = false },
682 };
683 struct nlattr *attrs[ARRAY_SIZE(policy)];
684 bool parsed;
685
686 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
687
688 if (parsed) {
689 const struct nf_ct_tcp_flags *flags_orig, *flags_reply;
690 uint8_t state;
691 protoinfo->proto = IPPROTO_TCP;
692 state = nl_ct_tcp_state_to_dpif(
693 nl_attr_get_u8(attrs[CTA_PROTOINFO_TCP_STATE]));
694 /* The connection tracker keeps only one tcp state for the
695 * connection, but our structures store a separate state for
696 * each endpoint. Here we duplicate the state. */
697 protoinfo->tcp.state_orig = protoinfo->tcp.state_reply = state;
698 protoinfo->tcp.wscale_orig = nl_attr_get_u8(
699 attrs[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
700 protoinfo->tcp.wscale_reply = nl_attr_get_u8(
701 attrs[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
702 flags_orig =
703 nl_attr_get_unspec(attrs[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL],
704 sizeof *flags_orig);
705 protoinfo->tcp.flags_orig =
706 ip_ct_tcp_flags_to_dpif(flags_orig->flags);
707 flags_reply =
708 nl_attr_get_unspec(attrs[CTA_PROTOINFO_TCP_FLAGS_REPLY],
709 sizeof *flags_reply);
710 protoinfo->tcp.flags_reply =
711 ip_ct_tcp_flags_to_dpif(flags_reply->flags);
712 } else {
713 VLOG_ERR_RL(&rl, "Could not parse nested TCP protoinfo options. "
714 "Possibly incompatible Linux kernel version.");
715 }
716
717 return parsed;
718}
719
93346d88
AC
720/* Translate netlink SCTP state to CT_DPIF_SCTP state. */
721static uint8_t
722nl_ct_sctp_state_to_dpif(uint8_t state)
723{
724#ifdef _WIN32
725 /* For now, return the CT_DPIF_SCTP state. Not sure what windows does. */
726 return state;
727#else
728 switch (state) {
729 case SCTP_CONNTRACK_COOKIE_WAIT:
730 return CT_DPIF_SCTP_STATE_COOKIE_WAIT;
731 case SCTP_CONNTRACK_COOKIE_ECHOED:
732 return CT_DPIF_SCTP_STATE_COOKIE_ECHOED;
733 case SCTP_CONNTRACK_ESTABLISHED:
734 return CT_DPIF_SCTP_STATE_ESTABLISHED;
735 case SCTP_CONNTRACK_SHUTDOWN_SENT:
736 return CT_DPIF_SCTP_STATE_SHUTDOWN_SENT;
737 case SCTP_CONNTRACK_SHUTDOWN_RECD:
738 return CT_DPIF_SCTP_STATE_SHUTDOWN_RECD;
739 case SCTP_CONNTRACK_SHUTDOWN_ACK_SENT:
740 return CT_DPIF_SCTP_STATE_SHUTDOWN_ACK_SENT;
741 case SCTP_CONNTRACK_HEARTBEAT_SENT:
742 return CT_DPIF_SCTP_STATE_HEARTBEAT_SENT;
743 case SCTP_CONNTRACK_HEARTBEAT_ACKED:
744 return CT_DPIF_SCTP_STATE_HEARTBEAT_ACKED;
745 case SCTP_CONNTRACK_CLOSED:
746 /* Fall Through. */
747 case SCTP_CONNTRACK_NONE:
748 /* Fall Through. */
749 default:
750 return CT_DPIF_SCTP_STATE_CLOSED;
751 }
752#endif
753}
754
755static bool
756nl_ct_parse_protoinfo_sctp(struct nlattr *nla,
757 struct ct_dpif_protoinfo *protoinfo)
758{
759 static const struct nl_policy policy[] = {
760 [CTA_PROTOINFO_SCTP_STATE] = { .type = NL_A_U8, .optional = false },
761 [CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] = { .type = NL_A_U32,
762 .optional = false },
763 [CTA_PROTOINFO_SCTP_VTAG_REPLY] = { .type = NL_A_U32,
764 .optional = false },
765 };
766 struct nlattr *attrs[ARRAY_SIZE(policy)];
767 bool parsed;
768
769 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
770 if (parsed) {
771 protoinfo->proto = IPPROTO_SCTP;
772
773 protoinfo->sctp.state = nl_ct_sctp_state_to_dpif(
774 nl_attr_get_u8(attrs[CTA_PROTOINFO_SCTP_STATE]));
775 protoinfo->sctp.vtag_orig = nl_attr_get_u32(
776 attrs[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]);
777 protoinfo->sctp.vtag_reply = nl_attr_get_u32(
778 attrs[CTA_PROTOINFO_SCTP_VTAG_REPLY]);
779 } else {
780 VLOG_ERR_RL(&rl, "Could not parse nested SCTP protoinfo options. "
781 "Possibly incompatible Linux kernel version.");
782 }
783
784 return parsed;
785}
786
6830a0c0
DDP
787static bool
788nl_ct_parse_protoinfo(struct nlattr *nla, struct ct_dpif_protoinfo *protoinfo)
789{
790 /* These are mutually exclusive. */
791 static const struct nl_policy policy[] = {
792 [CTA_PROTOINFO_TCP] = { .type = NL_A_NESTED, .optional = true },
793 [CTA_PROTOINFO_SCTP] = { .type = NL_A_NESTED, .optional = true },
794 };
795 struct nlattr *attrs[ARRAY_SIZE(policy)];
796 bool parsed;
797
798 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
799
800 memset(protoinfo, 0, sizeof *protoinfo);
801
802 if (parsed) {
803 if (attrs[CTA_PROTOINFO_TCP]) {
804 parsed = nl_ct_parse_protoinfo_tcp(attrs[CTA_PROTOINFO_TCP],
805 protoinfo);
806 } else if (attrs[CTA_PROTOINFO_SCTP]) {
93346d88
AC
807 parsed = nl_ct_parse_protoinfo_sctp(attrs[CTA_PROTOINFO_SCTP],
808 protoinfo);
6830a0c0
DDP
809 } else {
810 VLOG_WARN_RL(&rl, "Empty protoinfo!");
811 }
812 } else {
813 VLOG_ERR_RL(&rl, "Could not parse nested protoinfo options. "
814 "Possibly incompatible Linux kernel version.");
815 }
816
817 return parsed;
818}
819
820static bool
821nl_ct_parse_helper(struct nlattr *nla, struct ct_dpif_helper *helper)
822{
823 static const struct nl_policy policy[] = {
824 [CTA_HELP_NAME] = { .type = NL_A_STRING, .optional = false },
825 };
826 struct nlattr *attrs[ARRAY_SIZE(policy)];
827 bool parsed;
828
829 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
830
831 memset(helper, 0, sizeof *helper);
832
833 if (parsed) {
834 helper->name = xstrdup(nl_attr_get_string(attrs[CTA_HELP_NAME]));
835 } else {
836 VLOG_ERR_RL(&rl, "Could not parse nested helper options. "
837 "Possibly incompatible Linux kernel version.");
838 }
839
840 return parsed;
841}
842
1f161318
YHW
843static int nl_ct_timeout_policy_max_attr[] = {
844 [IPPROTO_TCP] = CTA_TIMEOUT_TCP_MAX,
845 [IPPROTO_UDP] = CTA_TIMEOUT_UDP_MAX,
846 [IPPROTO_ICMP] = CTA_TIMEOUT_ICMP_MAX,
847 [IPPROTO_ICMPV6] = CTA_TIMEOUT_ICMPV6_MAX
848};
849
850static void
851nl_ct_set_timeout_policy_attr(struct nl_ct_timeout_policy *nl_tp,
852 uint32_t attr, uint32_t val)
853{
854 nl_tp->present |= 1 << attr;
855 nl_tp->attrs[attr] = val;
856}
857
858static int
859nl_ct_parse_tcp_timeout_policy_data(struct nlattr *nla,
860 struct nl_ct_timeout_policy *nl_tp)
861{
862 static const struct nl_policy policy[] = {
863 [CTA_TIMEOUT_TCP_SYN_SENT] = { .type = NL_A_BE32,
864 .optional = false },
865 [CTA_TIMEOUT_TCP_SYN_RECV] = { .type = NL_A_BE32,
866 .optional = false },
867 [CTA_TIMEOUT_TCP_ESTABLISHED] = { .type = NL_A_BE32,
868 .optional = false },
869 [CTA_TIMEOUT_TCP_FIN_WAIT] = { .type = NL_A_BE32,
870 .optional = false },
871 [CTA_TIMEOUT_TCP_CLOSE_WAIT] = { .type = NL_A_BE32,
872 .optional = false },
873 [CTA_TIMEOUT_TCP_LAST_ACK] = { .type = NL_A_BE32,
874 .optional = false },
875 [CTA_TIMEOUT_TCP_TIME_WAIT] = { .type = NL_A_BE32,
876 .optional = false },
877 [CTA_TIMEOUT_TCP_CLOSE] = { .type = NL_A_BE32,
878 .optional = false },
879 [CTA_TIMEOUT_TCP_SYN_SENT2] = { .type = NL_A_BE32,
880 .optional = false },
881 [CTA_TIMEOUT_TCP_RETRANS] = { .type = NL_A_BE32,
882 .optional = false },
883 [CTA_TIMEOUT_TCP_UNACK] = { .type = NL_A_BE32,
884 .optional = false },
885 };
886 struct nlattr *attrs[ARRAY_SIZE(policy)];
887
888 if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) {
889 VLOG_ERR_RL(&rl, "Could not parse nested tcp timeout options. "
890 "Possibly incompatible Linux kernel version.");
891 return EINVAL;
892 }
893
894 for (int i = CTA_TIMEOUT_TCP_SYN_SENT; i <= CTA_TIMEOUT_TCP_UNACK; i++) {
895 nl_ct_set_timeout_policy_attr(nl_tp, i,
896 ntohl(nl_attr_get_be32(attrs[i])));
897 }
898 return 0;
899}
900
901static int
902nl_ct_parse_udp_timeout_policy_data(struct nlattr *nla,
903 struct nl_ct_timeout_policy *nl_tp)
904{
905 static const struct nl_policy policy[] = {
906 [CTA_TIMEOUT_UDP_UNREPLIED] = { .type = NL_A_BE32,
907 .optional = false },
908 [CTA_TIMEOUT_UDP_REPLIED] = { .type = NL_A_BE32,
909 .optional = false },
910 };
911 struct nlattr *attrs[ARRAY_SIZE(policy)];
912
913 if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) {
914 VLOG_ERR_RL(&rl, "Could not parse nested tcp timeout options. "
915 "Possibly incompatible Linux kernel version.");
916 return EINVAL;
917 }
918
919 for (int i = CTA_TIMEOUT_UDP_UNREPLIED; i <= CTA_TIMEOUT_UDP_REPLIED;
920 i++) {
921 nl_ct_set_timeout_policy_attr(nl_tp, i,
922 ntohl(nl_attr_get_be32(attrs[i])));
923 }
924 return 0;
925}
926
927static int
928nl_ct_parse_icmp_timeout_policy_data(struct nlattr *nla,
929 struct nl_ct_timeout_policy *nl_tp)
930{
931 static const struct nl_policy policy[] = {
932 [CTA_TIMEOUT_ICMP_TIMEOUT] = { .type = NL_A_BE32,
933 .optional = false },
934 };
935 struct nlattr *attrs[ARRAY_SIZE(policy)];
936
937 if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) {
938 VLOG_ERR_RL(&rl, "Could not parse nested icmp timeout options. "
939 "Possibly incompatible Linux kernel version.");
940 return EINVAL;
941 }
942
943 nl_ct_set_timeout_policy_attr(
944 nl_tp, CTA_TIMEOUT_ICMP_TIMEOUT,
945 ntohl(nl_attr_get_be32(attrs[CTA_TIMEOUT_ICMP_TIMEOUT])));
946 return 0;
947}
948
949static int
950nl_ct_parse_icmpv6_timeout_policy_data(struct nlattr *nla,
951 struct nl_ct_timeout_policy *nl_tp)
952{
953 static const struct nl_policy policy[] = {
954 [CTA_TIMEOUT_ICMPV6_TIMEOUT] = { .type = NL_A_BE32,
955 .optional = false },
956 };
957 struct nlattr *attrs[ARRAY_SIZE(policy)];
958
959 if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) {
960 VLOG_ERR_RL(&rl, "Could not parse nested icmpv6 timeout options. "
961 "Possibly incompatible Linux kernel version.");
962 return EINVAL;
963 }
964
965 nl_ct_set_timeout_policy_attr(
966 nl_tp, CTA_TIMEOUT_ICMPV6_TIMEOUT,
967 ntohl(nl_attr_get_be32(attrs[CTA_TIMEOUT_ICMPV6_TIMEOUT])));
968 return 0;
969}
970
971static int
972nl_ct_parse_timeout_policy_data(struct nlattr *nla,
973 struct nl_ct_timeout_policy *nl_tp)
974{
975 switch (nl_tp->l4num) {
976 case IPPROTO_TCP:
977 return nl_ct_parse_tcp_timeout_policy_data(nla, nl_tp);
978 case IPPROTO_UDP:
979 return nl_ct_parse_udp_timeout_policy_data(nla, nl_tp);
980 case IPPROTO_ICMP:
981 return nl_ct_parse_icmp_timeout_policy_data(nla, nl_tp);
982 case IPPROTO_ICMPV6:
983 return nl_ct_parse_icmpv6_timeout_policy_data(nla, nl_tp);
984 default:
985 return EINVAL;
986 }
987}
988
989static int
990nl_ct_timeout_policy_from_ofpbuf(struct ofpbuf *buf,
991 struct nl_ct_timeout_policy *nl_tp,
992 bool default_tp)
993{
994 static const struct nl_policy policy[] = {
995 [CTA_TIMEOUT_NAME] = { .type = NL_A_STRING, .optional = false },
996 [CTA_TIMEOUT_L3PROTO] = { .type = NL_A_BE16, .optional = false },
997 [CTA_TIMEOUT_L4PROTO] = { .type = NL_A_U8, .optional = false },
998 [CTA_TIMEOUT_DATA] = { .type = NL_A_NESTED, .optional = false }
999 };
1000 static const struct nl_policy policy_default_tp[] = {
1001 [CTA_TIMEOUT_L3PROTO] = { .type = NL_A_BE16, .optional = false },
1002 [CTA_TIMEOUT_L4PROTO] = { .type = NL_A_U8, .optional = false },
1003 [CTA_TIMEOUT_DATA] = { .type = NL_A_NESTED, .optional = false }
1004 };
1005
1006 struct nlattr *attrs[ARRAY_SIZE(policy)];
1007 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
1008 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
1009 struct nfgenmsg *nfmsg = ofpbuf_try_pull(&b, sizeof *nfmsg);
1010
1011 if (!nlmsg || !nfmsg
1012 || NFNL_SUBSYS_ID(nlmsg->nlmsg_type) != NFNL_SUBSYS_CTNETLINK_TIMEOUT
1013 || nfmsg->version != NFNETLINK_V0
1014 || !nl_policy_parse(&b, 0, default_tp ? policy_default_tp : policy,
1015 attrs, default_tp ? ARRAY_SIZE(policy_default_tp) :
1016 ARRAY_SIZE(policy))) {
1017 return EINVAL;
1018 }
1019
1020 if (!default_tp) {
1021 ovs_strlcpy(nl_tp->name, nl_attr_get_string(attrs[CTA_TIMEOUT_NAME]),
1022 sizeof nl_tp->name);
1023 }
1024 nl_tp->l3num = ntohs(nl_attr_get_be16(attrs[CTA_TIMEOUT_L3PROTO]));
1025 nl_tp->l4num = nl_attr_get_u8(attrs[CTA_TIMEOUT_L4PROTO]);
1026 nl_tp->present = 0;
1027
1028 return nl_ct_parse_timeout_policy_data(attrs[CTA_TIMEOUT_DATA], nl_tp);
1029}
1030
1031int
1032nl_ct_set_timeout_policy(const struct nl_ct_timeout_policy *nl_tp)
1033{
1034 struct ofpbuf buf;
1035 size_t offset;
1036
1037 ofpbuf_init(&buf, 512);
1038 nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT,
1039 IPCTNL_MSG_TIMEOUT_NEW, NLM_F_REQUEST | NLM_F_CREATE
1040 | NLM_F_ACK | NLM_F_REPLACE);
1041
1042 nl_msg_put_string(&buf, CTA_TIMEOUT_NAME, nl_tp->name);
1043 nl_msg_put_be16(&buf, CTA_TIMEOUT_L3PROTO, htons(nl_tp->l3num));
1044 nl_msg_put_u8(&buf, CTA_TIMEOUT_L4PROTO, nl_tp->l4num);
1045
1046 offset = nl_msg_start_nested(&buf, CTA_TIMEOUT_DATA);
1047 for (int i = 1; i <= nl_ct_timeout_policy_max_attr[nl_tp->l4num]; ++i) {
1048 if (nl_tp->present & 1 << i) {
1049 nl_msg_put_be32(&buf, i, htonl(nl_tp->attrs[i]));
1050 }
1051 }
1052 nl_msg_end_nested(&buf, offset);
1053
1054 int err = nl_transact(NETLINK_NETFILTER, &buf, NULL);
1055 ofpbuf_uninit(&buf);
1056 return err;
1057}
1058
1059int
1060nl_ct_get_timeout_policy(const char *tp_name,
1061 struct nl_ct_timeout_policy *nl_tp)
1062{
1063 struct ofpbuf request, *reply;
1064
1065 ofpbuf_init(&request, 512);
1066 nl_msg_put_nfgenmsg(&request, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT,
1067 IPCTNL_MSG_TIMEOUT_GET, NLM_F_REQUEST | NLM_F_ACK);
1068 nl_msg_put_string(&request, CTA_TIMEOUT_NAME, tp_name);
1069 int err = nl_transact(NETLINK_NETFILTER, &request, &reply);
1070 if (err) {
1071 goto out;
1072 }
1073
1074 err = nl_ct_timeout_policy_from_ofpbuf(reply, nl_tp, false);
1075
1076out:
1077 ofpbuf_uninit(&request);
1078 ofpbuf_delete(reply);
1079 return err;
1080}
1081
1082int
1083nl_ct_del_timeout_policy(const char *tp_name)
1084{
1085 struct ofpbuf buf;
1086
1087 ofpbuf_init(&buf, 64);
1088 nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT,
1089 IPCTNL_MSG_TIMEOUT_DELETE, NLM_F_REQUEST | NLM_F_ACK);
1090
1091 nl_msg_put_string(&buf, CTA_TIMEOUT_NAME, tp_name);
1092 int err = nl_transact(NETLINK_NETFILTER, &buf, NULL);
1093 ofpbuf_uninit(&buf);
1094 return err;
1095}
1096
1097struct nl_ct_timeout_policy_dump_state {
1098 struct nl_dump dump;
1099 struct ofpbuf buf;
1100};
1101
1102int
1103nl_ct_timeout_policy_dump_start(
1104 struct nl_ct_timeout_policy_dump_state **statep)
1105{
1106 struct ofpbuf request;
1107 struct nl_ct_timeout_policy_dump_state *state;
1108
1109 *statep = state = xzalloc(sizeof *state);
1110 ofpbuf_init(&request, 512);
1111 nl_msg_put_nfgenmsg(&request, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT,
1112 IPCTNL_MSG_TIMEOUT_GET,
1113 NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP);
1114
1115 nl_dump_start(&state->dump, NETLINK_NETFILTER, &request);
1116 ofpbuf_uninit(&request);
1117 ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
1118 return 0;
1119}
1120
1121int
1122nl_ct_timeout_policy_dump_next(struct nl_ct_timeout_policy_dump_state *state,
1123 struct nl_ct_timeout_policy *nl_tp)
1124{
1125 struct ofpbuf reply;
1126
1127 if (!nl_dump_next(&state->dump, &reply, &state->buf)) {
1128 return EOF;
1129 }
1130 int err = nl_ct_timeout_policy_from_ofpbuf(&reply, nl_tp, false);
1131 ofpbuf_uninit(&reply);
1132 return err;
1133}
1134
1135int
1136nl_ct_timeout_policy_dump_done(struct nl_ct_timeout_policy_dump_state *state)
1137{
1138 int err = nl_dump_done(&state->dump);
1139 ofpbuf_uninit(&state->buf);
1140 free(state);
1141 return err;
1142}
1143
6830a0c0
DDP
1144/* Translate netlink entry status flags to CT_DPIF_TCP status flags. */
1145static uint32_t
1146ips_status_to_dpif_flags(uint32_t status)
1147{
1148 uint32_t ret = 0;
1149#define CT_DPIF_STATUS_FLAG(FLAG) \
1150 ret |= (status & IPS_##FLAG) ? CT_DPIF_STATUS_##FLAG : 0;
1151 CT_DPIF_STATUS_FLAGS
1152#undef CT_DPIF_STATUS_FLAG
1153 return ret;
1154}
1155
1156static bool
1157nl_ct_parse_header_policy(struct ofpbuf *buf,
1158 enum nl_ct_event_type *event_type,
1159 uint8_t *nfgen_family,
1160 struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)])
1161{
1162 struct nlmsghdr *nlh;
1163 struct nfgenmsg *nfm;
1164 uint8_t type;
1165
1166 nlh = ofpbuf_at(buf, 0, NLMSG_HDRLEN);
1167 nfm = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *nfm);
1168 if (!nfm) {
1169 VLOG_ERR_RL(&rl, "Received bad nfnl message (no nfgenmsg).");
1170 return false;
1171 }
1172 if (NFNL_SUBSYS_ID(nlh->nlmsg_type) != NFNL_SUBSYS_CTNETLINK) {
1173 VLOG_ERR_RL(&rl, "Received non-conntrack message (subsystem: %u).",
1174 NFNL_SUBSYS_ID(nlh->nlmsg_type));
1175 return false;
1176 }
1177 if (nfm->version != NFNETLINK_V0) {
1178 VLOG_ERR_RL(&rl, "Received unsupported nfnetlink version (%u).",
1179 NFNL_MSG_TYPE(nfm->version));
1180 return false;
1181 }
1182
1183 if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof *nfm,
1184 nfnlgrp_conntrack_policy, attrs,
1185 ARRAY_SIZE(nfnlgrp_conntrack_policy))) {
1186 VLOG_ERR_RL(&rl, "Received bad nfnl message (policy).");
1187 return false;
1188 }
1189
1190 type = NFNL_MSG_TYPE(nlh->nlmsg_type);
1191 *nfgen_family = nfm->nfgen_family;
1192
1193 switch (type) {
1194 case IPCTNL_MSG_CT_NEW:
1195 *event_type = nlh->nlmsg_flags & NLM_F_CREATE
1196 ? NL_CT_EVENT_NEW : NL_CT_EVENT_UPDATE;
1197 break;
1198 case IPCTNL_MSG_CT_DELETE:
1199 *event_type = NL_CT_EVENT_DELETE;
1200 break;
1201 default:
1202 VLOG_ERR_RL(&rl, "Can't parse conntrack event type.");
1203 return false;
1204 }
1205
1206 return true;
1207}
1208
1209static bool
1210nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry *entry,
1211 struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)],
1212 uint8_t nfgen_family)
1213{
1214 if (!nl_ct_parse_tuple(attrs[CTA_TUPLE_ORIG], &entry->tuple_orig,
1215 nfgen_family)) {
1216 return false;
1217 }
1218 if (!nl_ct_parse_tuple(attrs[CTA_TUPLE_REPLY], &entry->tuple_reply,
1219 nfgen_family)) {
1220 return false;
1221 }
1222 if (attrs[CTA_COUNTERS_ORIG] &&
1223 !nl_ct_parse_counters(attrs[CTA_COUNTERS_ORIG],
1224 &entry->counters_orig)) {
1225 return false;
1226 }
1227 if (attrs[CTA_COUNTERS_REPLY] &&
1228 !nl_ct_parse_counters(attrs[CTA_COUNTERS_REPLY],
1229 &entry->counters_reply)) {
1230 return false;
1231 }
1232 if (attrs[CTA_TIMESTAMP] &&
1233 !nl_ct_parse_timestamp(attrs[CTA_TIMESTAMP], &entry->timestamp)) {
1234 return false;
1235 }
1236 if (attrs[CTA_ID]) {
1237 entry->id = ntohl(nl_attr_get_be32(attrs[CTA_ID]));
1238 }
1239 if (attrs[CTA_ZONE]) {
1240 entry->zone = ntohs(nl_attr_get_be16(attrs[CTA_ZONE]));
1241 }
1242 if (attrs[CTA_STATUS]) {
1243 entry->status = ips_status_to_dpif_flags(
1244 ntohl(nl_attr_get_be32(attrs[CTA_STATUS])));
1245 }
1246 if (attrs[CTA_TIMEOUT]) {
1247 entry->timeout = ntohl(nl_attr_get_be32(attrs[CTA_TIMEOUT]));
1248 }
1249 if (attrs[CTA_MARK]) {
1250 entry->mark = ntohl(nl_attr_get_be32(attrs[CTA_MARK]));
1251 }
1252 if (attrs[CTA_LABELS]) {
e7237700 1253 entry->have_labels = true;
6830a0c0
DDP
1254 memcpy(&entry->labels, nl_attr_get(attrs[CTA_LABELS]),
1255 MIN(sizeof entry->labels, nl_attr_get_size(attrs[CTA_LABELS])));
1256 }
1257 if (attrs[CTA_PROTOINFO] &&
1258 !nl_ct_parse_protoinfo(attrs[CTA_PROTOINFO], &entry->protoinfo)) {
1259 return false;
1260 }
1261 if (attrs[CTA_HELP] &&
1262 !nl_ct_parse_helper(attrs[CTA_HELP], &entry->helper)) {
1263 return false;
1264 }
1265 if (attrs[CTA_TUPLE_MASTER] &&
f51cf36d 1266 !nl_ct_parse_tuple(attrs[CTA_TUPLE_MASTER], &entry->tuple_parent,
6830a0c0
DDP
1267 nfgen_family)) {
1268 return false;
1269 }
1270 return true;
1271}
1272
1273bool
1274nl_ct_parse_entry(struct ofpbuf *buf, struct ct_dpif_entry *entry,
1275 enum nl_ct_event_type *event_type)
1276{
1277 struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)];
1278 uint8_t nfgen_family;
1279
1280 memset(entry, 0, sizeof *entry);
1281 if (!nl_ct_parse_header_policy(buf, event_type, &nfgen_family, attrs)) {
1282 return false;
1283 };
1284
1285 if (!nl_ct_attrs_to_ct_dpif_entry(entry, attrs, nfgen_family)) {
1286 ct_dpif_entry_uninit(entry);
1287 memset(entry, 0, sizeof *entry);
1288 return false;
1289 }
1290
1291 return true;
1292}
e0467f6d 1293
6830a0c0
DDP
1294/* NetFilter utility functions. */
1295
1296/* Puts a nlmsghdr and nfgenmsg at the beginning of 'msg', which must be
1297 * initially empty. 'expected_payload' should be an estimate of the number of
1298 * payload bytes to be supplied; if the size of the payload is unknown a value
1299 * of 0 is acceptable.
1300 *
1301 * Non-zero 'family' is the address family of items to get (e.g. AF_INET).
1302 *
1303 * 'flags' is a bit-mask that indicates what kind of request is being made. It
1304 * is often NLM_F_REQUEST indicating that a request is being made, commonly
1305 * or'd with NLM_F_ACK to request an acknowledgement. NLM_F_DUMP flag reguests
1306 * a dump of the table.
1307 *
1308 * 'subsystem' is a netfilter subsystem id, e.g., NFNL_SUBSYS_CTNETLINK.
1309 *
1310 * 'cmd' is an enumerated value specific to the 'subsystem'.
1311 *
1312 * Sets the new nlmsghdr's nlmsg_pid field to 0 for now. nl_sock_send() will
1313 * fill it in just before sending the message.
1314 *
1315 * nl_msg_put_nlmsghdr() should be used to compose Netlink messages that are
1316 * not NetFilter Netlink messages. */
1317static void
1318nl_msg_put_nfgenmsg(struct ofpbuf *msg, size_t expected_payload,
1319 int family, uint8_t subsystem, uint8_t cmd,
1320 uint32_t flags)
1321{
1322 struct nfgenmsg *nfm;
1323
1324 nl_msg_put_nlmsghdr(msg, sizeof *nfm + expected_payload,
1325 subsystem << 8 | cmd, flags);
1326 ovs_assert(msg->size == NLMSG_HDRLEN);
1327 nfm = nl_msg_put_uninit(msg, sizeof *nfm);
1328 nfm->nfgen_family = family;
1329 nfm->version = NFNETLINK_V0;
1330 nfm->res_id = 0;
e0467f6d
SV
1331#ifdef _WIN32
1332 /* nfgenmsg contains ovsHdr padding in windows */
1333 nfm->ovsHdr.dp_ifindex = 0;
1334#endif
6830a0c0 1335}