]>
Commit | Line | Data |
---|---|---|
6830a0c0 DDP |
1 | /* |
2 | * Copyright (c) 2015 Nicira, Inc. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
18 | ||
19 | #include "netlink-conntrack.h" | |
20 | ||
817a7657 | 21 | #include <errno.h> |
6830a0c0 DDP |
22 | #include <linux/netfilter/nfnetlink.h> |
23 | #include <linux/netfilter/nfnetlink_conntrack.h> | |
24 | #include <linux/netfilter/nf_conntrack_common.h> | |
25 | #include <linux/netfilter/nf_conntrack_tcp.h> | |
26 | #include <linux/netfilter/nf_conntrack_ftp.h> | |
27 | #include <linux/netfilter/nf_conntrack_sctp.h> | |
28 | ||
29 | #include "byte-order.h" | |
30 | #include "compiler.h" | |
3e8a2ad1 | 31 | #include "openvswitch/dynamic-string.h" |
6830a0c0 DDP |
32 | #include "netlink.h" |
33 | #include "netlink-socket.h" | |
64c96779 | 34 | #include "openvswitch/ofpbuf.h" |
6830a0c0 | 35 | #include "openvswitch/vlog.h" |
fd016ae3 | 36 | #include "openvswitch/poll-loop.h" |
6830a0c0 DDP |
37 | #include "timeval.h" |
38 | #include "unixctl.h" | |
39 | #include "util.h" | |
40 | ||
41 | VLOG_DEFINE_THIS_MODULE(netlink_conntrack); | |
42 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); | |
43 | ||
44 | /* This module works only if conntrack modules and features are enabled in the | |
45 | * Linux kernel. This can be done from a root shell like this: | |
46 | * | |
47 | * $ modprobe ip_conntrack | |
48 | * $ sysctl -w net.netfilter.nf_conntrack_acct=1 | |
49 | * $ sysctl -w net.netfilter.nf_conntrack_timestamp=1 | |
50 | * | |
51 | * Also, if testing conntrack label feature without conntrack-aware OVS kernel | |
52 | * module, there must be a connlabel rule in iptables for space to be reserved | |
53 | * for the labels (see kernel source connlabel_mt_check()). Such a rule can be | |
54 | * inserted from a root shell like this: | |
55 | * | |
56 | * $ iptables -A INPUT -m conntrack -m connlabel \ | |
57 | * --ctstate NEW,ESTABLISHED,RELATED --label 127 -j ACCEPT | |
58 | */ | |
59 | ||
60 | /* Some attributes were introduced in later kernels: with these definitions | |
61 | * we should be able to compile userspace against Linux 2.6.32+. */ | |
62 | ||
63 | #define CTA_ZONE (CTA_SECMARK + 1) | |
64 | #define CTA_SECCTX (CTA_SECMARK + 2) | |
65 | #define CTA_TIMESTAMP (CTA_SECMARK + 3) | |
66 | #define CTA_MARK_MASK (CTA_SECMARK + 4) | |
67 | #define CTA_LABELS (CTA_SECMARK + 5) | |
68 | #define CTA_LABELS_MASK (CTA_SECMARK + 6) | |
69 | ||
70 | #define CTA_TIMESTAMP_START 1 | |
71 | #define CTA_TIMESTAMP_STOP 2 | |
72 | ||
73 | #define IPS_TEMPLATE_BIT 11 | |
74 | #define IPS_TEMPLATE (1 << IPS_TEMPLATE_BIT) | |
75 | ||
76 | #define IPS_UNTRACKED_BIT 12 | |
77 | #define IPS_UNTRACKED (1 << IPS_UNTRACKED_BIT) | |
78 | ||
79 | static const struct nl_policy nfnlgrp_conntrack_policy[] = { | |
80 | [CTA_TUPLE_ORIG] = { .type = NL_A_NESTED, .optional = false }, | |
81 | [CTA_TUPLE_REPLY] = { .type = NL_A_NESTED, .optional = false }, | |
82 | [CTA_ZONE] = { .type = NL_A_BE16, .optional = true }, | |
83 | [CTA_STATUS] = { .type = NL_A_BE32, .optional = false }, | |
84 | [CTA_TIMESTAMP] = { .type = NL_A_NESTED, .optional = true }, | |
85 | [CTA_TIMEOUT] = { .type = NL_A_BE32, .optional = true }, | |
86 | [CTA_COUNTERS_ORIG] = { .type = NL_A_NESTED, .optional = true }, | |
87 | [CTA_COUNTERS_REPLY] = { .type = NL_A_NESTED, .optional = true }, | |
88 | [CTA_PROTOINFO] = { .type = NL_A_NESTED, .optional = true }, | |
89 | [CTA_HELP] = { .type = NL_A_NESTED, .optional = true }, | |
90 | [CTA_MARK] = { .type = NL_A_BE32, .optional = true }, | |
91 | [CTA_SECCTX] = { .type = NL_A_NESTED, .optional = true }, | |
92 | [CTA_ID] = { .type = NL_A_BE32, .optional = false }, | |
93 | [CTA_USE] = { .type = NL_A_BE32, .optional = true }, | |
94 | [CTA_TUPLE_MASTER] = { .type = NL_A_NESTED, .optional = true }, | |
95 | [CTA_NAT_SEQ_ADJ_ORIG] = { .type = NL_A_NESTED, .optional = true }, | |
96 | [CTA_NAT_SEQ_ADJ_REPLY] = { .type = NL_A_NESTED, .optional = true }, | |
97 | [CTA_LABELS] = { .type = NL_A_UNSPEC, .optional = true }, | |
98 | /* CTA_NAT_SRC, CTA_NAT_DST, CTA_TIMESTAMP, CTA_MARK_MASK, and | |
99 | * CTA_LABELS_MASK are not received from kernel. */ | |
100 | }; | |
101 | ||
102 | /* Declarations for conntrack netlink dumping. */ | |
103 | static void nl_msg_put_nfgenmsg(struct ofpbuf *msg, size_t expected_payload, | |
104 | int family, uint8_t subsystem, uint8_t cmd, | |
105 | uint32_t flags); | |
106 | ||
107 | static bool nl_ct_parse_header_policy(struct ofpbuf *buf, | |
108 | enum nl_ct_event_type *event_type, | |
109 | uint8_t *nfgen_family, | |
110 | struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)]); | |
111 | ||
112 | static bool nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry *entry, | |
113 | struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)], | |
114 | uint8_t nfgen_family); | |
817a7657 YHW |
115 | static bool nl_ct_put_ct_tuple(struct ofpbuf *buf, |
116 | const struct ct_dpif_tuple *tuple, enum ctattr_type type); | |
6830a0c0 DDP |
117 | |
118 | struct nl_ct_dump_state { | |
119 | struct nl_dump dump; | |
120 | struct ofpbuf buf; | |
121 | bool filter_zone; | |
122 | uint16_t zone; | |
123 | }; | |
e0467f6d | 124 | |
6830a0c0 DDP |
125 | /* Conntrack netlink dumping. */ |
126 | ||
127 | /* Initialize a conntrack netlink dump. */ | |
128 | int | |
ded30c74 FA |
129 | nl_ct_dump_start(struct nl_ct_dump_state **statep, const uint16_t *zone, |
130 | int *ptot_bkts) | |
6830a0c0 DDP |
131 | { |
132 | struct nl_ct_dump_state *state; | |
133 | ||
134 | *statep = state = xzalloc(sizeof *state); | |
135 | ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE); | |
136 | ||
137 | if (zone) { | |
138 | state->filter_zone = true; | |
139 | state->zone = *zone; | |
140 | } | |
141 | ||
142 | nl_msg_put_nfgenmsg(&state->buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK, | |
143 | IPCTNL_MSG_CT_GET, NLM_F_REQUEST); | |
144 | nl_dump_start(&state->dump, NETLINK_NETFILTER, &state->buf); | |
145 | ofpbuf_clear(&state->buf); | |
146 | ||
ded30c74 FA |
147 | /* Buckets to store connections are not used. */ |
148 | *ptot_bkts = -1; | |
149 | ||
6830a0c0 DDP |
150 | return 0; |
151 | } | |
152 | ||
153 | /* Receive the next 'entry' from the conntrack netlink dump with 'state'. | |
154 | * Returns 'EOF' when no more entries are available, 0 otherwise. 'entry' may | |
155 | * be uninitilized memory on entry, and must be uninitialized with | |
156 | * ct_dpif_entry_uninit() afterwards by the caller. In case the same 'entry' is | |
157 | * passed to this function again, the entry must also be uninitialized before | |
158 | * the next call. */ | |
159 | int | |
160 | nl_ct_dump_next(struct nl_ct_dump_state *state, struct ct_dpif_entry *entry) | |
161 | { | |
162 | struct ofpbuf buf; | |
163 | ||
164 | memset(entry, 0, sizeof *entry); | |
165 | for (;;) { | |
166 | struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)]; | |
167 | enum nl_ct_event_type type; | |
168 | uint8_t nfgen_family; | |
169 | ||
170 | if (!nl_dump_next(&state->dump, &buf, &state->buf)) { | |
171 | return EOF; | |
172 | } | |
173 | ||
174 | if (!nl_ct_parse_header_policy(&buf, &type, &nfgen_family, attrs)) { | |
175 | continue; | |
176 | }; | |
177 | ||
178 | if (state->filter_zone) { | |
179 | uint16_t entry_zone = attrs[CTA_ZONE] | |
180 | ? ntohs(nl_attr_get_be16(attrs[CTA_ZONE])) | |
181 | : 0; | |
182 | if (entry_zone != state->zone) { | |
183 | continue; | |
184 | } | |
185 | } | |
186 | ||
187 | if (nl_ct_attrs_to_ct_dpif_entry(entry, attrs, nfgen_family)) { | |
188 | break; | |
189 | } | |
190 | ||
191 | ct_dpif_entry_uninit(entry); | |
192 | memset(entry, 0, sizeof *entry); | |
193 | /* Ignore the failed entry and get the next one. */ | |
194 | } | |
195 | ||
196 | ofpbuf_uninit(&buf); | |
197 | return 0; | |
198 | } | |
199 | ||
200 | /* End a conntrack netlink dump. */ | |
201 | int | |
202 | nl_ct_dump_done(struct nl_ct_dump_state *state) | |
203 | { | |
204 | int error = nl_dump_done(&state->dump); | |
205 | ||
206 | ofpbuf_uninit(&state->buf); | |
207 | free(state); | |
208 | return error; | |
209 | } | |
e0467f6d | 210 | |
6830a0c0 DDP |
211 | /* Format conntrack event 'entry' of 'type' to 'ds'. */ |
212 | void | |
213 | nl_ct_format_event_entry(const struct ct_dpif_entry *entry, | |
214 | enum nl_ct_event_type type, struct ds *ds, | |
215 | bool verbose, bool print_stats) | |
216 | { | |
217 | ds_put_format(ds, "%s ", | |
218 | type == NL_CT_EVENT_NEW ? "NEW" | |
219 | : type == NL_CT_EVENT_UPDATE ? "UPDATE" | |
220 | : type == NL_CT_EVENT_DELETE ? "DELETE" | |
221 | : "UNKNOWN"); | |
222 | ct_dpif_format_entry(entry, ds, verbose, print_stats); | |
223 | } | |
224 | ||
225 | int | |
226 | nl_ct_flush(void) | |
227 | { | |
228 | struct ofpbuf buf; | |
229 | int err; | |
230 | ||
231 | ofpbuf_init(&buf, NL_DUMP_BUFSIZE); | |
232 | ||
233 | nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK, | |
234 | IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST); | |
235 | ||
236 | err = nl_transact(NETLINK_NETFILTER, &buf, NULL); | |
237 | ofpbuf_uninit(&buf); | |
238 | ||
239 | /* Expectations are flushed automatically, because they do not | |
f51cf36d | 240 | * have a parent connection anymore */ |
6830a0c0 DDP |
241 | |
242 | return err; | |
243 | } | |
244 | ||
817a7657 YHW |
245 | int |
246 | nl_ct_flush_tuple(const struct ct_dpif_tuple *tuple, uint16_t zone) | |
247 | { | |
248 | int err; | |
249 | struct ofpbuf buf; | |
250 | ||
251 | ofpbuf_init(&buf, NL_DUMP_BUFSIZE); | |
252 | nl_msg_put_nfgenmsg(&buf, 0, tuple->l3_type, NFNL_SUBSYS_CTNETLINK, | |
253 | IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST); | |
254 | ||
255 | nl_msg_put_be16(&buf, CTA_ZONE, htons(zone)); | |
256 | if (!nl_ct_put_ct_tuple(&buf, tuple, CTA_TUPLE_ORIG)) { | |
257 | err = EOPNOTSUPP; | |
258 | goto out; | |
259 | } | |
260 | err = nl_transact(NETLINK_NETFILTER, &buf, NULL); | |
261 | out: | |
262 | ofpbuf_uninit(&buf); | |
263 | return err; | |
264 | } | |
265 | ||
e0467f6d SV |
266 | #ifdef _WIN32 |
267 | int | |
268 | nl_ct_flush_zone(uint16_t flush_zone) | |
269 | { | |
270 | /* Windows can flush a specific zone */ | |
271 | struct ofpbuf buf; | |
272 | int err; | |
273 | ||
274 | ofpbuf_init(&buf, NL_DUMP_BUFSIZE); | |
275 | ||
276 | nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK, | |
277 | IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST); | |
159cc1f4 | 278 | nl_msg_put_be16(&buf, CTA_ZONE, htons(flush_zone)); |
e0467f6d SV |
279 | |
280 | err = nl_transact(NETLINK_NETFILTER, &buf, NULL); | |
281 | ofpbuf_uninit(&buf); | |
282 | ||
283 | return err; | |
284 | } | |
285 | #else | |
6830a0c0 DDP |
286 | int |
287 | nl_ct_flush_zone(uint16_t flush_zone) | |
288 | { | |
289 | /* Apparently, there's no netlink interface to flush a specific zone. | |
290 | * This code dumps every connection, checks the zone and eventually | |
291 | * delete the entry. | |
292 | * | |
293 | * This is race-prone, but it is better than using shell scripts. */ | |
294 | ||
295 | struct nl_dump dump; | |
296 | struct ofpbuf buf, reply, delete; | |
297 | ||
298 | ofpbuf_init(&buf, NL_DUMP_BUFSIZE); | |
299 | ofpbuf_init(&delete, NL_DUMP_BUFSIZE); | |
300 | ||
301 | nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK, | |
302 | IPCTNL_MSG_CT_GET, NLM_F_REQUEST); | |
303 | nl_dump_start(&dump, NETLINK_NETFILTER, &buf); | |
304 | ofpbuf_clear(&buf); | |
305 | ||
306 | for (;;) { | |
307 | struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)]; | |
308 | enum nl_ct_event_type event_type; | |
309 | uint8_t nfgen_family; | |
310 | uint16_t zone = 0; | |
311 | ||
312 | if (!nl_dump_next(&dump, &reply, &buf)) { | |
313 | break; | |
314 | } | |
315 | ||
316 | if (!nl_ct_parse_header_policy(&reply, &event_type, &nfgen_family, | |
317 | attrs)) { | |
318 | continue; | |
319 | }; | |
320 | ||
321 | if (attrs[CTA_ZONE]) { | |
322 | zone = ntohs(nl_attr_get_be16(attrs[CTA_ZONE])); | |
323 | } | |
324 | ||
325 | if (zone != flush_zone) { | |
326 | /* The entry is not in the zone we're flushing. */ | |
327 | continue; | |
328 | } | |
329 | nl_msg_put_nfgenmsg(&delete, 0, nfgen_family, NFNL_SUBSYS_CTNETLINK, | |
330 | IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST); | |
331 | ||
332 | nl_msg_put_be16(&delete, CTA_ZONE, htons(zone)); | |
333 | nl_msg_put_unspec(&delete, CTA_TUPLE_ORIG, attrs[CTA_TUPLE_ORIG] + 1, | |
334 | attrs[CTA_TUPLE_ORIG]->nla_len - NLA_HDRLEN); | |
335 | nl_msg_put_unspec(&delete, CTA_ID, attrs[CTA_ID] + 1, | |
336 | attrs[CTA_ID]->nla_len - NLA_HDRLEN); | |
337 | nl_transact(NETLINK_NETFILTER, &delete, NULL); | |
338 | ofpbuf_clear(&delete); | |
339 | } | |
340 | ||
341 | nl_dump_done(&dump); | |
342 | ||
343 | ofpbuf_uninit(&delete); | |
344 | ofpbuf_uninit(&buf); | |
345 | ||
346 | /* Expectations are flushed automatically, because they do not | |
f51cf36d | 347 | * have a parent connection anymore */ |
6830a0c0 DDP |
348 | return 0; |
349 | } | |
e0467f6d SV |
350 | #endif |
351 | ||
6830a0c0 DDP |
352 | /* Conntrack netlink parsing. */ |
353 | ||
354 | static bool | |
355 | nl_ct_parse_counters(struct nlattr *nla, struct ct_dpif_counters *counters) | |
356 | { | |
357 | static const struct nl_policy policy[] = { | |
358 | [CTA_COUNTERS_PACKETS] = { .type = NL_A_BE64, .optional = false }, | |
359 | [CTA_COUNTERS_BYTES] = { .type = NL_A_BE64, .optional = false }, | |
360 | }; | |
361 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
362 | bool parsed; | |
363 | ||
364 | parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); | |
365 | ||
366 | if (parsed) { | |
367 | counters->packets | |
368 | = ntohll(nl_attr_get_be64(attrs[CTA_COUNTERS_PACKETS])); | |
369 | counters->bytes = ntohll(nl_attr_get_be64(attrs[CTA_COUNTERS_BYTES])); | |
370 | } else { | |
371 | VLOG_ERR_RL(&rl, "Could not parse nested counters. " | |
372 | "Possibly incompatible Linux kernel version."); | |
373 | } | |
374 | ||
375 | return parsed; | |
376 | } | |
377 | ||
378 | static bool | |
379 | nl_ct_parse_timestamp(struct nlattr *nla, struct ct_dpif_timestamp *timestamp) | |
380 | { | |
381 | static const struct nl_policy policy[] = { | |
382 | [CTA_TIMESTAMP_START] = { .type = NL_A_BE64, .optional = false }, | |
383 | [CTA_TIMESTAMP_STOP] = { .type = NL_A_BE64, .optional = true }, | |
384 | }; | |
385 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
386 | bool parsed; | |
387 | ||
388 | parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); | |
389 | ||
390 | if (parsed) { | |
391 | timestamp->start | |
392 | = ntohll(nl_attr_get_be64(attrs[CTA_TIMESTAMP_START])); | |
393 | if (attrs[CTA_TIMESTAMP_STOP]) { | |
394 | timestamp->stop | |
395 | = ntohll(nl_attr_get_be64(attrs[CTA_TIMESTAMP_STOP])); | |
396 | } | |
397 | } else { | |
398 | VLOG_ERR_RL(&rl, "Could not parse nested timestamp. " | |
399 | "Possibly incompatible Linux kernel version."); | |
400 | } | |
401 | ||
402 | return parsed; | |
403 | } | |
404 | ||
405 | static bool | |
406 | nl_ct_parse_tuple_ip(struct nlattr *nla, struct ct_dpif_tuple *tuple) | |
407 | { | |
408 | static const struct nl_policy policy[] = { | |
409 | [CTA_IP_V4_SRC] = { .type = NL_A_BE32, .optional = true }, | |
410 | [CTA_IP_V4_DST] = { .type = NL_A_BE32, .optional = true }, | |
411 | [CTA_IP_V6_SRC] = { NL_POLICY_FOR(struct in6_addr), .optional = true }, | |
412 | [CTA_IP_V6_DST] = { NL_POLICY_FOR(struct in6_addr), .optional = true }, | |
413 | }; | |
414 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
415 | bool parsed; | |
416 | ||
417 | parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); | |
418 | ||
419 | if (parsed) { | |
420 | if (tuple->l3_type == AF_INET) { | |
421 | if (attrs[CTA_IP_V4_SRC]) { | |
422 | tuple->src.ip = nl_attr_get_be32(attrs[CTA_IP_V4_SRC]); | |
423 | } | |
424 | if (attrs[CTA_IP_V4_DST]) { | |
425 | tuple->dst.ip = nl_attr_get_be32(attrs[CTA_IP_V4_DST]); | |
426 | } | |
427 | } else if (tuple->l3_type == AF_INET6) { | |
428 | if (attrs[CTA_IP_V6_SRC]) { | |
429 | memcpy(&tuple->src.in6, nl_attr_get(attrs[CTA_IP_V6_SRC]), | |
430 | sizeof tuple->src.in6); | |
431 | } | |
432 | if (attrs[CTA_IP_V6_DST]) { | |
433 | memcpy(&tuple->dst.in6, nl_attr_get(attrs[CTA_IP_V6_DST]), | |
434 | sizeof tuple->dst.in6); | |
435 | } | |
436 | } else { | |
437 | VLOG_WARN_RL(&rl, "Unsupported IP protocol: %u.", tuple->l3_type); | |
438 | return false; | |
439 | } | |
440 | } else { | |
441 | VLOG_ERR_RL(&rl, "Could not parse nested tuple IP options. " | |
442 | "Possibly incompatible Linux kernel version."); | |
443 | } | |
444 | ||
445 | return parsed; | |
446 | } | |
447 | ||
448 | static bool | |
449 | nl_ct_parse_tuple_proto(struct nlattr *nla, struct ct_dpif_tuple *tuple) | |
450 | { | |
451 | static const struct nl_policy policy[] = { | |
452 | [CTA_PROTO_NUM] = { .type = NL_A_U8, .optional = false }, | |
453 | [CTA_PROTO_SRC_PORT] = { .type = NL_A_BE16, .optional = true }, | |
454 | [CTA_PROTO_DST_PORT] = { .type = NL_A_BE16, .optional = true }, | |
455 | [CTA_PROTO_ICMP_ID] = { .type = NL_A_BE16, .optional = true }, | |
456 | [CTA_PROTO_ICMP_TYPE] = { .type = NL_A_U8, .optional = true }, | |
457 | [CTA_PROTO_ICMP_CODE] = { .type = NL_A_U8, .optional = true }, | |
458 | [CTA_PROTO_ICMPV6_ID] = { .type = NL_A_BE16, .optional = true }, | |
459 | [CTA_PROTO_ICMPV6_TYPE] = { .type = NL_A_U8, .optional = true }, | |
460 | [CTA_PROTO_ICMPV6_CODE] = { .type = NL_A_U8, .optional = true }, | |
461 | }; | |
462 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
463 | bool parsed; | |
464 | ||
465 | parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); | |
466 | ||
467 | if (parsed) { | |
468 | tuple->ip_proto = nl_attr_get_u8(attrs[CTA_PROTO_NUM]); | |
469 | ||
470 | if (tuple->l3_type == AF_INET && tuple->ip_proto == IPPROTO_ICMP) { | |
471 | if (!attrs[CTA_PROTO_ICMP_ID] || !attrs[CTA_PROTO_ICMP_TYPE] | |
472 | || !attrs[CTA_PROTO_ICMP_CODE]) { | |
473 | VLOG_ERR_RL(&rl, "Tuple ICMP data missing."); | |
474 | return false; | |
475 | } | |
476 | tuple->icmp_id = nl_attr_get_be16(attrs[CTA_PROTO_ICMP_ID]); | |
477 | tuple->icmp_type = nl_attr_get_u8(attrs[CTA_PROTO_ICMP_TYPE]); | |
478 | tuple->icmp_code = nl_attr_get_u8(attrs[CTA_PROTO_ICMP_CODE]); | |
479 | } else if (tuple->l3_type == AF_INET6 && | |
480 | tuple->ip_proto == IPPROTO_ICMPV6) { | |
481 | if (!attrs[CTA_PROTO_ICMPV6_ID] || !attrs[CTA_PROTO_ICMPV6_TYPE] | |
482 | || !attrs[CTA_PROTO_ICMPV6_CODE]) { | |
483 | VLOG_ERR_RL(&rl, "Tuple ICMPv6 data missing."); | |
484 | return false; | |
485 | } | |
486 | tuple->icmp_id = nl_attr_get_be16(attrs[CTA_PROTO_ICMPV6_ID]); | |
487 | tuple->icmp_type = nl_attr_get_u8(attrs[CTA_PROTO_ICMPV6_TYPE]); | |
488 | tuple->icmp_code = nl_attr_get_u8(attrs[CTA_PROTO_ICMPV6_CODE]); | |
489 | } else if (attrs[CTA_PROTO_SRC_PORT] && attrs[CTA_PROTO_DST_PORT]) { | |
490 | tuple->src_port = nl_attr_get_be16(attrs[CTA_PROTO_SRC_PORT]); | |
491 | tuple->dst_port = nl_attr_get_be16(attrs[CTA_PROTO_DST_PORT]); | |
492 | } else { | |
493 | /* Unsupported IPPROTO and no ports, leave them zeroed. | |
f55c7595 JR |
494 | * We have parsed the ip_proto, so this is not a failure. */ |
495 | VLOG_DBG_RL(&rl, "Unsupported L4 protocol: %u.", tuple->ip_proto); | |
6830a0c0 DDP |
496 | } |
497 | } else { | |
498 | VLOG_ERR_RL(&rl, "Could not parse nested tuple protocol options. " | |
499 | "Possibly incompatible Linux kernel version."); | |
500 | } | |
501 | ||
502 | return parsed; | |
503 | } | |
504 | ||
505 | static bool | |
506 | nl_ct_parse_tuple(struct nlattr *nla, struct ct_dpif_tuple *tuple, | |
507 | uint16_t l3_type) | |
508 | { | |
509 | static const struct nl_policy policy[] = { | |
510 | [CTA_TUPLE_IP] = { .type = NL_A_NESTED, .optional = false }, | |
511 | [CTA_TUPLE_PROTO] = { .type = NL_A_NESTED, .optional = false }, | |
512 | }; | |
513 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
514 | bool parsed; | |
515 | ||
516 | parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); | |
517 | ||
518 | memset(tuple, 0, sizeof *tuple); | |
519 | ||
520 | if (parsed) { | |
521 | tuple->l3_type = l3_type; | |
522 | ||
523 | if (!nl_ct_parse_tuple_ip(attrs[CTA_TUPLE_IP], tuple) | |
524 | || !nl_ct_parse_tuple_proto(attrs[CTA_TUPLE_PROTO], tuple)) { | |
525 | struct ds ds; | |
526 | ||
527 | ds_init(&ds); | |
b269a122 | 528 | ct_dpif_format_tuple(&ds, tuple); |
6830a0c0 DDP |
529 | |
530 | VLOG_ERR_RL(&rl, "Failed to parse tuple: %s", ds_cstr(&ds)); | |
531 | ds_destroy(&ds); | |
532 | ||
533 | memset(tuple, 0, sizeof *tuple); | |
534 | return false; | |
535 | } | |
536 | } else { | |
537 | VLOG_ERR_RL(&rl, "Could not parse nested tuple options. " | |
538 | "Possibly incompatible Linux kernel version."); | |
539 | } | |
540 | ||
541 | return parsed; | |
542 | } | |
543 | ||
817a7657 YHW |
544 | static bool |
545 | nl_ct_put_tuple_ip(struct ofpbuf *buf, const struct ct_dpif_tuple *tuple) | |
546 | { | |
547 | size_t offset = nl_msg_start_nested(buf, CTA_TUPLE_IP); | |
548 | ||
549 | if (tuple->l3_type == AF_INET) { | |
550 | nl_msg_put_be32(buf, CTA_IP_V4_SRC, tuple->src.ip); | |
551 | nl_msg_put_be32(buf, CTA_IP_V4_DST, tuple->dst.ip); | |
552 | } else if (tuple->l3_type == AF_INET6) { | |
553 | nl_msg_put_in6_addr(buf, CTA_IP_V6_SRC, &tuple->src.in6); | |
554 | nl_msg_put_in6_addr(buf, CTA_IP_V6_DST, &tuple->dst.in6); | |
555 | } else { | |
556 | VLOG_WARN_RL(&rl, "Unsupported IP protocol: %"PRIu16".", | |
557 | tuple->l3_type); | |
558 | return false; | |
559 | } | |
560 | ||
561 | nl_msg_end_nested(buf, offset); | |
562 | return true; | |
563 | } | |
564 | ||
565 | static bool | |
566 | nl_ct_put_tuple_proto(struct ofpbuf *buf, const struct ct_dpif_tuple *tuple) | |
567 | { | |
568 | size_t offset = nl_msg_start_nested(buf, CTA_TUPLE_PROTO); | |
569 | ||
570 | nl_msg_put_u8(buf, CTA_PROTO_NUM, tuple->ip_proto); | |
571 | ||
572 | if (tuple->l3_type == AF_INET && tuple->ip_proto == IPPROTO_ICMP) { | |
573 | nl_msg_put_be16(buf, CTA_PROTO_ICMP_ID, tuple->icmp_id); | |
574 | nl_msg_put_u8(buf, CTA_PROTO_ICMP_TYPE, tuple->icmp_type); | |
575 | nl_msg_put_u8(buf, CTA_PROTO_ICMP_CODE, tuple->icmp_code); | |
576 | } else if (tuple->l3_type == AF_INET6 && | |
577 | tuple->ip_proto == IPPROTO_ICMPV6) { | |
578 | nl_msg_put_be16(buf, CTA_PROTO_ICMPV6_ID, tuple->icmp_id); | |
579 | nl_msg_put_u8(buf, CTA_PROTO_ICMPV6_TYPE, tuple->icmp_type); | |
580 | nl_msg_put_u8(buf, CTA_PROTO_ICMPV6_CODE, tuple->icmp_code); | |
581 | } else if (tuple->ip_proto == IPPROTO_TCP || | |
582 | tuple->ip_proto == IPPROTO_UDP) { | |
583 | nl_msg_put_be16(buf, CTA_PROTO_SRC_PORT, tuple->src_port); | |
584 | nl_msg_put_be16(buf, CTA_PROTO_DST_PORT, tuple->dst_port); | |
585 | } else { | |
586 | VLOG_WARN_RL(&rl, "Unsupported L4 protocol: %"PRIu8".", | |
587 | tuple->ip_proto); | |
588 | return false; | |
589 | } | |
590 | ||
591 | nl_msg_end_nested(buf, offset); | |
592 | return true; | |
593 | } | |
594 | ||
595 | static bool | |
596 | nl_ct_put_ct_tuple(struct ofpbuf *buf, const struct ct_dpif_tuple *tuple, | |
597 | enum ctattr_type type) | |
598 | { | |
599 | if (type != CTA_TUPLE_ORIG && type != CTA_TUPLE_REPLY && | |
600 | type != CTA_TUPLE_MASTER) { | |
601 | return false; | |
602 | } | |
603 | ||
604 | size_t offset = nl_msg_start_nested(buf, type); | |
605 | ||
606 | if (!nl_ct_put_tuple_ip(buf, tuple)) { | |
607 | return false; | |
608 | } | |
609 | if (!nl_ct_put_tuple_proto(buf, tuple)) { | |
610 | return false; | |
611 | } | |
612 | ||
613 | nl_msg_end_nested(buf, offset); | |
614 | return true; | |
615 | } | |
616 | ||
6830a0c0 DDP |
617 | /* Translate netlink TCP state to CT_DPIF_TCP state. */ |
618 | static uint8_t | |
619 | nl_ct_tcp_state_to_dpif(uint8_t state) | |
620 | { | |
f5d29e92 SV |
621 | #ifdef _WIN32 |
622 | /* Windows currently sends up CT_DPIF_TCP state */ | |
623 | return state; | |
624 | #else | |
6830a0c0 DDP |
625 | switch (state) { |
626 | case TCP_CONNTRACK_NONE: | |
627 | return CT_DPIF_TCPS_CLOSED; | |
628 | case TCP_CONNTRACK_SYN_SENT: | |
629 | return CT_DPIF_TCPS_SYN_SENT; | |
630 | case TCP_CONNTRACK_SYN_SENT2: | |
631 | return CT_DPIF_TCPS_SYN_SENT; | |
632 | case TCP_CONNTRACK_SYN_RECV: | |
633 | return CT_DPIF_TCPS_SYN_RECV; | |
634 | case TCP_CONNTRACK_ESTABLISHED: | |
635 | return CT_DPIF_TCPS_ESTABLISHED; | |
636 | case TCP_CONNTRACK_FIN_WAIT: | |
637 | return CT_DPIF_TCPS_FIN_WAIT_1; | |
638 | case TCP_CONNTRACK_CLOSE_WAIT: | |
639 | return CT_DPIF_TCPS_CLOSE_WAIT; | |
640 | case TCP_CONNTRACK_LAST_ACK: | |
641 | return CT_DPIF_TCPS_LAST_ACK; | |
642 | case TCP_CONNTRACK_TIME_WAIT: | |
643 | return CT_DPIF_TCPS_TIME_WAIT; | |
644 | case TCP_CONNTRACK_CLOSE: | |
645 | return CT_DPIF_TCPS_CLOSING; | |
646 | default: | |
647 | return CT_DPIF_TCPS_CLOSED; | |
648 | } | |
f5d29e92 | 649 | #endif |
6830a0c0 DDP |
650 | } |
651 | ||
652 | static uint8_t | |
653 | ip_ct_tcp_flags_to_dpif(uint8_t flags) | |
654 | { | |
f5d29e92 SV |
655 | #ifdef _WIN32 |
656 | /* Windows currently sends up CT_DPIF_TCP flags */ | |
657 | return flags; | |
658 | #else | |
6830a0c0 DDP |
659 | uint8_t ret = 0; |
660 | #define CT_DPIF_TCP_FLAG(FLAG) \ | |
661 | ret |= (flags & IP_CT_TCP_FLAG_##FLAG) ? CT_DPIF_TCPF_##FLAG : 0; | |
662 | CT_DPIF_TCP_FLAGS | |
af7523e8 | 663 | #undef CT_DPIF_TCP_FLAG |
6830a0c0 | 664 | return ret; |
f5d29e92 | 665 | #endif |
6830a0c0 DDP |
666 | } |
667 | ||
668 | static bool | |
669 | nl_ct_parse_protoinfo_tcp(struct nlattr *nla, | |
670 | struct ct_dpif_protoinfo *protoinfo) | |
671 | { | |
672 | static const struct nl_policy policy[] = { | |
673 | [CTA_PROTOINFO_TCP_STATE] = { .type = NL_A_U8, .optional = false }, | |
674 | [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NL_A_U8, | |
675 | .optional = false }, | |
676 | [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NL_A_U8, | |
677 | .optional = false }, | |
678 | [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .type = NL_A_U16, | |
679 | .optional = false }, | |
680 | [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .type = NL_A_U16, | |
681 | .optional = false }, | |
682 | }; | |
683 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
684 | bool parsed; | |
685 | ||
686 | parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); | |
687 | ||
688 | if (parsed) { | |
689 | const struct nf_ct_tcp_flags *flags_orig, *flags_reply; | |
690 | uint8_t state; | |
691 | protoinfo->proto = IPPROTO_TCP; | |
692 | state = nl_ct_tcp_state_to_dpif( | |
693 | nl_attr_get_u8(attrs[CTA_PROTOINFO_TCP_STATE])); | |
694 | /* The connection tracker keeps only one tcp state for the | |
695 | * connection, but our structures store a separate state for | |
696 | * each endpoint. Here we duplicate the state. */ | |
697 | protoinfo->tcp.state_orig = protoinfo->tcp.state_reply = state; | |
698 | protoinfo->tcp.wscale_orig = nl_attr_get_u8( | |
699 | attrs[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]); | |
700 | protoinfo->tcp.wscale_reply = nl_attr_get_u8( | |
701 | attrs[CTA_PROTOINFO_TCP_WSCALE_REPLY]); | |
702 | flags_orig = | |
703 | nl_attr_get_unspec(attrs[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL], | |
704 | sizeof *flags_orig); | |
705 | protoinfo->tcp.flags_orig = | |
706 | ip_ct_tcp_flags_to_dpif(flags_orig->flags); | |
707 | flags_reply = | |
708 | nl_attr_get_unspec(attrs[CTA_PROTOINFO_TCP_FLAGS_REPLY], | |
709 | sizeof *flags_reply); | |
710 | protoinfo->tcp.flags_reply = | |
711 | ip_ct_tcp_flags_to_dpif(flags_reply->flags); | |
712 | } else { | |
713 | VLOG_ERR_RL(&rl, "Could not parse nested TCP protoinfo options. " | |
714 | "Possibly incompatible Linux kernel version."); | |
715 | } | |
716 | ||
717 | return parsed; | |
718 | } | |
719 | ||
93346d88 AC |
720 | /* Translate netlink SCTP state to CT_DPIF_SCTP state. */ |
721 | static uint8_t | |
722 | nl_ct_sctp_state_to_dpif(uint8_t state) | |
723 | { | |
724 | #ifdef _WIN32 | |
725 | /* For now, return the CT_DPIF_SCTP state. Not sure what windows does. */ | |
726 | return state; | |
727 | #else | |
728 | switch (state) { | |
729 | case SCTP_CONNTRACK_COOKIE_WAIT: | |
730 | return CT_DPIF_SCTP_STATE_COOKIE_WAIT; | |
731 | case SCTP_CONNTRACK_COOKIE_ECHOED: | |
732 | return CT_DPIF_SCTP_STATE_COOKIE_ECHOED; | |
733 | case SCTP_CONNTRACK_ESTABLISHED: | |
734 | return CT_DPIF_SCTP_STATE_ESTABLISHED; | |
735 | case SCTP_CONNTRACK_SHUTDOWN_SENT: | |
736 | return CT_DPIF_SCTP_STATE_SHUTDOWN_SENT; | |
737 | case SCTP_CONNTRACK_SHUTDOWN_RECD: | |
738 | return CT_DPIF_SCTP_STATE_SHUTDOWN_RECD; | |
739 | case SCTP_CONNTRACK_SHUTDOWN_ACK_SENT: | |
740 | return CT_DPIF_SCTP_STATE_SHUTDOWN_ACK_SENT; | |
741 | case SCTP_CONNTRACK_HEARTBEAT_SENT: | |
742 | return CT_DPIF_SCTP_STATE_HEARTBEAT_SENT; | |
743 | case SCTP_CONNTRACK_HEARTBEAT_ACKED: | |
744 | return CT_DPIF_SCTP_STATE_HEARTBEAT_ACKED; | |
745 | case SCTP_CONNTRACK_CLOSED: | |
746 | /* Fall Through. */ | |
747 | case SCTP_CONNTRACK_NONE: | |
748 | /* Fall Through. */ | |
749 | default: | |
750 | return CT_DPIF_SCTP_STATE_CLOSED; | |
751 | } | |
752 | #endif | |
753 | } | |
754 | ||
755 | static bool | |
756 | nl_ct_parse_protoinfo_sctp(struct nlattr *nla, | |
757 | struct ct_dpif_protoinfo *protoinfo) | |
758 | { | |
759 | static const struct nl_policy policy[] = { | |
760 | [CTA_PROTOINFO_SCTP_STATE] = { .type = NL_A_U8, .optional = false }, | |
761 | [CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] = { .type = NL_A_U32, | |
762 | .optional = false }, | |
763 | [CTA_PROTOINFO_SCTP_VTAG_REPLY] = { .type = NL_A_U32, | |
764 | .optional = false }, | |
765 | }; | |
766 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
767 | bool parsed; | |
768 | ||
769 | parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); | |
770 | if (parsed) { | |
771 | protoinfo->proto = IPPROTO_SCTP; | |
772 | ||
773 | protoinfo->sctp.state = nl_ct_sctp_state_to_dpif( | |
774 | nl_attr_get_u8(attrs[CTA_PROTOINFO_SCTP_STATE])); | |
775 | protoinfo->sctp.vtag_orig = nl_attr_get_u32( | |
776 | attrs[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]); | |
777 | protoinfo->sctp.vtag_reply = nl_attr_get_u32( | |
778 | attrs[CTA_PROTOINFO_SCTP_VTAG_REPLY]); | |
779 | } else { | |
780 | VLOG_ERR_RL(&rl, "Could not parse nested SCTP protoinfo options. " | |
781 | "Possibly incompatible Linux kernel version."); | |
782 | } | |
783 | ||
784 | return parsed; | |
785 | } | |
786 | ||
6830a0c0 DDP |
787 | static bool |
788 | nl_ct_parse_protoinfo(struct nlattr *nla, struct ct_dpif_protoinfo *protoinfo) | |
789 | { | |
790 | /* These are mutually exclusive. */ | |
791 | static const struct nl_policy policy[] = { | |
792 | [CTA_PROTOINFO_TCP] = { .type = NL_A_NESTED, .optional = true }, | |
793 | [CTA_PROTOINFO_SCTP] = { .type = NL_A_NESTED, .optional = true }, | |
794 | }; | |
795 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
796 | bool parsed; | |
797 | ||
798 | parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); | |
799 | ||
800 | memset(protoinfo, 0, sizeof *protoinfo); | |
801 | ||
802 | if (parsed) { | |
803 | if (attrs[CTA_PROTOINFO_TCP]) { | |
804 | parsed = nl_ct_parse_protoinfo_tcp(attrs[CTA_PROTOINFO_TCP], | |
805 | protoinfo); | |
806 | } else if (attrs[CTA_PROTOINFO_SCTP]) { | |
93346d88 AC |
807 | parsed = nl_ct_parse_protoinfo_sctp(attrs[CTA_PROTOINFO_SCTP], |
808 | protoinfo); | |
6830a0c0 DDP |
809 | } else { |
810 | VLOG_WARN_RL(&rl, "Empty protoinfo!"); | |
811 | } | |
812 | } else { | |
813 | VLOG_ERR_RL(&rl, "Could not parse nested protoinfo options. " | |
814 | "Possibly incompatible Linux kernel version."); | |
815 | } | |
816 | ||
817 | return parsed; | |
818 | } | |
819 | ||
820 | static bool | |
821 | nl_ct_parse_helper(struct nlattr *nla, struct ct_dpif_helper *helper) | |
822 | { | |
823 | static const struct nl_policy policy[] = { | |
824 | [CTA_HELP_NAME] = { .type = NL_A_STRING, .optional = false }, | |
825 | }; | |
826 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
827 | bool parsed; | |
828 | ||
829 | parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); | |
830 | ||
831 | memset(helper, 0, sizeof *helper); | |
832 | ||
833 | if (parsed) { | |
834 | helper->name = xstrdup(nl_attr_get_string(attrs[CTA_HELP_NAME])); | |
835 | } else { | |
836 | VLOG_ERR_RL(&rl, "Could not parse nested helper options. " | |
837 | "Possibly incompatible Linux kernel version."); | |
838 | } | |
839 | ||
840 | return parsed; | |
841 | } | |
842 | ||
1f161318 YHW |
843 | static int nl_ct_timeout_policy_max_attr[] = { |
844 | [IPPROTO_TCP] = CTA_TIMEOUT_TCP_MAX, | |
845 | [IPPROTO_UDP] = CTA_TIMEOUT_UDP_MAX, | |
846 | [IPPROTO_ICMP] = CTA_TIMEOUT_ICMP_MAX, | |
847 | [IPPROTO_ICMPV6] = CTA_TIMEOUT_ICMPV6_MAX | |
848 | }; | |
849 | ||
850 | static void | |
851 | nl_ct_set_timeout_policy_attr(struct nl_ct_timeout_policy *nl_tp, | |
852 | uint32_t attr, uint32_t val) | |
853 | { | |
854 | nl_tp->present |= 1 << attr; | |
855 | nl_tp->attrs[attr] = val; | |
856 | } | |
857 | ||
858 | static int | |
859 | nl_ct_parse_tcp_timeout_policy_data(struct nlattr *nla, | |
860 | struct nl_ct_timeout_policy *nl_tp) | |
861 | { | |
862 | static const struct nl_policy policy[] = { | |
863 | [CTA_TIMEOUT_TCP_SYN_SENT] = { .type = NL_A_BE32, | |
864 | .optional = false }, | |
865 | [CTA_TIMEOUT_TCP_SYN_RECV] = { .type = NL_A_BE32, | |
866 | .optional = false }, | |
867 | [CTA_TIMEOUT_TCP_ESTABLISHED] = { .type = NL_A_BE32, | |
868 | .optional = false }, | |
869 | [CTA_TIMEOUT_TCP_FIN_WAIT] = { .type = NL_A_BE32, | |
870 | .optional = false }, | |
871 | [CTA_TIMEOUT_TCP_CLOSE_WAIT] = { .type = NL_A_BE32, | |
872 | .optional = false }, | |
873 | [CTA_TIMEOUT_TCP_LAST_ACK] = { .type = NL_A_BE32, | |
874 | .optional = false }, | |
875 | [CTA_TIMEOUT_TCP_TIME_WAIT] = { .type = NL_A_BE32, | |
876 | .optional = false }, | |
877 | [CTA_TIMEOUT_TCP_CLOSE] = { .type = NL_A_BE32, | |
878 | .optional = false }, | |
879 | [CTA_TIMEOUT_TCP_SYN_SENT2] = { .type = NL_A_BE32, | |
880 | .optional = false }, | |
881 | [CTA_TIMEOUT_TCP_RETRANS] = { .type = NL_A_BE32, | |
882 | .optional = false }, | |
883 | [CTA_TIMEOUT_TCP_UNACK] = { .type = NL_A_BE32, | |
884 | .optional = false }, | |
885 | }; | |
886 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
887 | ||
888 | if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) { | |
889 | VLOG_ERR_RL(&rl, "Could not parse nested tcp timeout options. " | |
890 | "Possibly incompatible Linux kernel version."); | |
891 | return EINVAL; | |
892 | } | |
893 | ||
894 | for (int i = CTA_TIMEOUT_TCP_SYN_SENT; i <= CTA_TIMEOUT_TCP_UNACK; i++) { | |
895 | nl_ct_set_timeout_policy_attr(nl_tp, i, | |
896 | ntohl(nl_attr_get_be32(attrs[i]))); | |
897 | } | |
898 | return 0; | |
899 | } | |
900 | ||
901 | static int | |
902 | nl_ct_parse_udp_timeout_policy_data(struct nlattr *nla, | |
903 | struct nl_ct_timeout_policy *nl_tp) | |
904 | { | |
905 | static const struct nl_policy policy[] = { | |
906 | [CTA_TIMEOUT_UDP_UNREPLIED] = { .type = NL_A_BE32, | |
907 | .optional = false }, | |
908 | [CTA_TIMEOUT_UDP_REPLIED] = { .type = NL_A_BE32, | |
909 | .optional = false }, | |
910 | }; | |
911 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
912 | ||
913 | if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) { | |
914 | VLOG_ERR_RL(&rl, "Could not parse nested tcp timeout options. " | |
915 | "Possibly incompatible Linux kernel version."); | |
916 | return EINVAL; | |
917 | } | |
918 | ||
919 | for (int i = CTA_TIMEOUT_UDP_UNREPLIED; i <= CTA_TIMEOUT_UDP_REPLIED; | |
920 | i++) { | |
921 | nl_ct_set_timeout_policy_attr(nl_tp, i, | |
922 | ntohl(nl_attr_get_be32(attrs[i]))); | |
923 | } | |
924 | return 0; | |
925 | } | |
926 | ||
927 | static int | |
928 | nl_ct_parse_icmp_timeout_policy_data(struct nlattr *nla, | |
929 | struct nl_ct_timeout_policy *nl_tp) | |
930 | { | |
931 | static const struct nl_policy policy[] = { | |
932 | [CTA_TIMEOUT_ICMP_TIMEOUT] = { .type = NL_A_BE32, | |
933 | .optional = false }, | |
934 | }; | |
935 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
936 | ||
937 | if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) { | |
938 | VLOG_ERR_RL(&rl, "Could not parse nested icmp timeout options. " | |
939 | "Possibly incompatible Linux kernel version."); | |
940 | return EINVAL; | |
941 | } | |
942 | ||
943 | nl_ct_set_timeout_policy_attr( | |
944 | nl_tp, CTA_TIMEOUT_ICMP_TIMEOUT, | |
945 | ntohl(nl_attr_get_be32(attrs[CTA_TIMEOUT_ICMP_TIMEOUT]))); | |
946 | return 0; | |
947 | } | |
948 | ||
949 | static int | |
950 | nl_ct_parse_icmpv6_timeout_policy_data(struct nlattr *nla, | |
951 | struct nl_ct_timeout_policy *nl_tp) | |
952 | { | |
953 | static const struct nl_policy policy[] = { | |
954 | [CTA_TIMEOUT_ICMPV6_TIMEOUT] = { .type = NL_A_BE32, | |
955 | .optional = false }, | |
956 | }; | |
957 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
958 | ||
959 | if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) { | |
960 | VLOG_ERR_RL(&rl, "Could not parse nested icmpv6 timeout options. " | |
961 | "Possibly incompatible Linux kernel version."); | |
962 | return EINVAL; | |
963 | } | |
964 | ||
965 | nl_ct_set_timeout_policy_attr( | |
966 | nl_tp, CTA_TIMEOUT_ICMPV6_TIMEOUT, | |
967 | ntohl(nl_attr_get_be32(attrs[CTA_TIMEOUT_ICMPV6_TIMEOUT]))); | |
968 | return 0; | |
969 | } | |
970 | ||
971 | static int | |
972 | nl_ct_parse_timeout_policy_data(struct nlattr *nla, | |
973 | struct nl_ct_timeout_policy *nl_tp) | |
974 | { | |
975 | switch (nl_tp->l4num) { | |
976 | case IPPROTO_TCP: | |
977 | return nl_ct_parse_tcp_timeout_policy_data(nla, nl_tp); | |
978 | case IPPROTO_UDP: | |
979 | return nl_ct_parse_udp_timeout_policy_data(nla, nl_tp); | |
980 | case IPPROTO_ICMP: | |
981 | return nl_ct_parse_icmp_timeout_policy_data(nla, nl_tp); | |
982 | case IPPROTO_ICMPV6: | |
983 | return nl_ct_parse_icmpv6_timeout_policy_data(nla, nl_tp); | |
984 | default: | |
985 | return EINVAL; | |
986 | } | |
987 | } | |
988 | ||
989 | static int | |
990 | nl_ct_timeout_policy_from_ofpbuf(struct ofpbuf *buf, | |
991 | struct nl_ct_timeout_policy *nl_tp, | |
992 | bool default_tp) | |
993 | { | |
994 | static const struct nl_policy policy[] = { | |
995 | [CTA_TIMEOUT_NAME] = { .type = NL_A_STRING, .optional = false }, | |
996 | [CTA_TIMEOUT_L3PROTO] = { .type = NL_A_BE16, .optional = false }, | |
997 | [CTA_TIMEOUT_L4PROTO] = { .type = NL_A_U8, .optional = false }, | |
998 | [CTA_TIMEOUT_DATA] = { .type = NL_A_NESTED, .optional = false } | |
999 | }; | |
1000 | static const struct nl_policy policy_default_tp[] = { | |
1001 | [CTA_TIMEOUT_L3PROTO] = { .type = NL_A_BE16, .optional = false }, | |
1002 | [CTA_TIMEOUT_L4PROTO] = { .type = NL_A_U8, .optional = false }, | |
1003 | [CTA_TIMEOUT_DATA] = { .type = NL_A_NESTED, .optional = false } | |
1004 | }; | |
1005 | ||
1006 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
1007 | struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size); | |
1008 | struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); | |
1009 | struct nfgenmsg *nfmsg = ofpbuf_try_pull(&b, sizeof *nfmsg); | |
1010 | ||
1011 | if (!nlmsg || !nfmsg | |
1012 | || NFNL_SUBSYS_ID(nlmsg->nlmsg_type) != NFNL_SUBSYS_CTNETLINK_TIMEOUT | |
1013 | || nfmsg->version != NFNETLINK_V0 | |
1014 | || !nl_policy_parse(&b, 0, default_tp ? policy_default_tp : policy, | |
1015 | attrs, default_tp ? ARRAY_SIZE(policy_default_tp) : | |
1016 | ARRAY_SIZE(policy))) { | |
1017 | return EINVAL; | |
1018 | } | |
1019 | ||
1020 | if (!default_tp) { | |
1021 | ovs_strlcpy(nl_tp->name, nl_attr_get_string(attrs[CTA_TIMEOUT_NAME]), | |
1022 | sizeof nl_tp->name); | |
1023 | } | |
1024 | nl_tp->l3num = ntohs(nl_attr_get_be16(attrs[CTA_TIMEOUT_L3PROTO])); | |
1025 | nl_tp->l4num = nl_attr_get_u8(attrs[CTA_TIMEOUT_L4PROTO]); | |
1026 | nl_tp->present = 0; | |
1027 | ||
1028 | return nl_ct_parse_timeout_policy_data(attrs[CTA_TIMEOUT_DATA], nl_tp); | |
1029 | } | |
1030 | ||
1031 | int | |
1032 | nl_ct_set_timeout_policy(const struct nl_ct_timeout_policy *nl_tp) | |
1033 | { | |
1034 | struct ofpbuf buf; | |
1035 | size_t offset; | |
1036 | ||
1037 | ofpbuf_init(&buf, 512); | |
1038 | nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT, | |
1039 | IPCTNL_MSG_TIMEOUT_NEW, NLM_F_REQUEST | NLM_F_CREATE | |
1040 | | NLM_F_ACK | NLM_F_REPLACE); | |
1041 | ||
1042 | nl_msg_put_string(&buf, CTA_TIMEOUT_NAME, nl_tp->name); | |
1043 | nl_msg_put_be16(&buf, CTA_TIMEOUT_L3PROTO, htons(nl_tp->l3num)); | |
1044 | nl_msg_put_u8(&buf, CTA_TIMEOUT_L4PROTO, nl_tp->l4num); | |
1045 | ||
1046 | offset = nl_msg_start_nested(&buf, CTA_TIMEOUT_DATA); | |
1047 | for (int i = 1; i <= nl_ct_timeout_policy_max_attr[nl_tp->l4num]; ++i) { | |
1048 | if (nl_tp->present & 1 << i) { | |
1049 | nl_msg_put_be32(&buf, i, htonl(nl_tp->attrs[i])); | |
1050 | } | |
1051 | } | |
1052 | nl_msg_end_nested(&buf, offset); | |
1053 | ||
1054 | int err = nl_transact(NETLINK_NETFILTER, &buf, NULL); | |
1055 | ofpbuf_uninit(&buf); | |
1056 | return err; | |
1057 | } | |
1058 | ||
1059 | int | |
1060 | nl_ct_get_timeout_policy(const char *tp_name, | |
1061 | struct nl_ct_timeout_policy *nl_tp) | |
1062 | { | |
1063 | struct ofpbuf request, *reply; | |
1064 | ||
1065 | ofpbuf_init(&request, 512); | |
1066 | nl_msg_put_nfgenmsg(&request, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT, | |
1067 | IPCTNL_MSG_TIMEOUT_GET, NLM_F_REQUEST | NLM_F_ACK); | |
1068 | nl_msg_put_string(&request, CTA_TIMEOUT_NAME, tp_name); | |
1069 | int err = nl_transact(NETLINK_NETFILTER, &request, &reply); | |
1070 | if (err) { | |
1071 | goto out; | |
1072 | } | |
1073 | ||
1074 | err = nl_ct_timeout_policy_from_ofpbuf(reply, nl_tp, false); | |
1075 | ||
1076 | out: | |
1077 | ofpbuf_uninit(&request); | |
1078 | ofpbuf_delete(reply); | |
1079 | return err; | |
1080 | } | |
1081 | ||
1082 | int | |
1083 | nl_ct_del_timeout_policy(const char *tp_name) | |
1084 | { | |
1085 | struct ofpbuf buf; | |
1086 | ||
1087 | ofpbuf_init(&buf, 64); | |
1088 | nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT, | |
1089 | IPCTNL_MSG_TIMEOUT_DELETE, NLM_F_REQUEST | NLM_F_ACK); | |
1090 | ||
1091 | nl_msg_put_string(&buf, CTA_TIMEOUT_NAME, tp_name); | |
1092 | int err = nl_transact(NETLINK_NETFILTER, &buf, NULL); | |
1093 | ofpbuf_uninit(&buf); | |
1094 | return err; | |
1095 | } | |
1096 | ||
1097 | struct nl_ct_timeout_policy_dump_state { | |
1098 | struct nl_dump dump; | |
1099 | struct ofpbuf buf; | |
1100 | }; | |
1101 | ||
1102 | int | |
1103 | nl_ct_timeout_policy_dump_start( | |
1104 | struct nl_ct_timeout_policy_dump_state **statep) | |
1105 | { | |
1106 | struct ofpbuf request; | |
1107 | struct nl_ct_timeout_policy_dump_state *state; | |
1108 | ||
1109 | *statep = state = xzalloc(sizeof *state); | |
1110 | ofpbuf_init(&request, 512); | |
1111 | nl_msg_put_nfgenmsg(&request, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT, | |
1112 | IPCTNL_MSG_TIMEOUT_GET, | |
1113 | NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP); | |
1114 | ||
1115 | nl_dump_start(&state->dump, NETLINK_NETFILTER, &request); | |
1116 | ofpbuf_uninit(&request); | |
1117 | ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE); | |
1118 | return 0; | |
1119 | } | |
1120 | ||
1121 | int | |
1122 | nl_ct_timeout_policy_dump_next(struct nl_ct_timeout_policy_dump_state *state, | |
1123 | struct nl_ct_timeout_policy *nl_tp) | |
1124 | { | |
1125 | struct ofpbuf reply; | |
1126 | ||
1127 | if (!nl_dump_next(&state->dump, &reply, &state->buf)) { | |
1128 | return EOF; | |
1129 | } | |
1130 | int err = nl_ct_timeout_policy_from_ofpbuf(&reply, nl_tp, false); | |
1131 | ofpbuf_uninit(&reply); | |
1132 | return err; | |
1133 | } | |
1134 | ||
1135 | int | |
1136 | nl_ct_timeout_policy_dump_done(struct nl_ct_timeout_policy_dump_state *state) | |
1137 | { | |
1138 | int err = nl_dump_done(&state->dump); | |
1139 | ofpbuf_uninit(&state->buf); | |
1140 | free(state); | |
1141 | return err; | |
1142 | } | |
1143 | ||
6830a0c0 DDP |
1144 | /* Translate netlink entry status flags to CT_DPIF_TCP status flags. */ |
1145 | static uint32_t | |
1146 | ips_status_to_dpif_flags(uint32_t status) | |
1147 | { | |
1148 | uint32_t ret = 0; | |
1149 | #define CT_DPIF_STATUS_FLAG(FLAG) \ | |
1150 | ret |= (status & IPS_##FLAG) ? CT_DPIF_STATUS_##FLAG : 0; | |
1151 | CT_DPIF_STATUS_FLAGS | |
1152 | #undef CT_DPIF_STATUS_FLAG | |
1153 | return ret; | |
1154 | } | |
1155 | ||
1156 | static bool | |
1157 | nl_ct_parse_header_policy(struct ofpbuf *buf, | |
1158 | enum nl_ct_event_type *event_type, | |
1159 | uint8_t *nfgen_family, | |
1160 | struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)]) | |
1161 | { | |
1162 | struct nlmsghdr *nlh; | |
1163 | struct nfgenmsg *nfm; | |
1164 | uint8_t type; | |
1165 | ||
1166 | nlh = ofpbuf_at(buf, 0, NLMSG_HDRLEN); | |
1167 | nfm = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *nfm); | |
1168 | if (!nfm) { | |
1169 | VLOG_ERR_RL(&rl, "Received bad nfnl message (no nfgenmsg)."); | |
1170 | return false; | |
1171 | } | |
1172 | if (NFNL_SUBSYS_ID(nlh->nlmsg_type) != NFNL_SUBSYS_CTNETLINK) { | |
1173 | VLOG_ERR_RL(&rl, "Received non-conntrack message (subsystem: %u).", | |
1174 | NFNL_SUBSYS_ID(nlh->nlmsg_type)); | |
1175 | return false; | |
1176 | } | |
1177 | if (nfm->version != NFNETLINK_V0) { | |
1178 | VLOG_ERR_RL(&rl, "Received unsupported nfnetlink version (%u).", | |
1179 | NFNL_MSG_TYPE(nfm->version)); | |
1180 | return false; | |
1181 | } | |
1182 | ||
1183 | if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof *nfm, | |
1184 | nfnlgrp_conntrack_policy, attrs, | |
1185 | ARRAY_SIZE(nfnlgrp_conntrack_policy))) { | |
1186 | VLOG_ERR_RL(&rl, "Received bad nfnl message (policy)."); | |
1187 | return false; | |
1188 | } | |
1189 | ||
1190 | type = NFNL_MSG_TYPE(nlh->nlmsg_type); | |
1191 | *nfgen_family = nfm->nfgen_family; | |
1192 | ||
1193 | switch (type) { | |
1194 | case IPCTNL_MSG_CT_NEW: | |
1195 | *event_type = nlh->nlmsg_flags & NLM_F_CREATE | |
1196 | ? NL_CT_EVENT_NEW : NL_CT_EVENT_UPDATE; | |
1197 | break; | |
1198 | case IPCTNL_MSG_CT_DELETE: | |
1199 | *event_type = NL_CT_EVENT_DELETE; | |
1200 | break; | |
1201 | default: | |
1202 | VLOG_ERR_RL(&rl, "Can't parse conntrack event type."); | |
1203 | return false; | |
1204 | } | |
1205 | ||
1206 | return true; | |
1207 | } | |
1208 | ||
1209 | static bool | |
1210 | nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry *entry, | |
1211 | struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)], | |
1212 | uint8_t nfgen_family) | |
1213 | { | |
1214 | if (!nl_ct_parse_tuple(attrs[CTA_TUPLE_ORIG], &entry->tuple_orig, | |
1215 | nfgen_family)) { | |
1216 | return false; | |
1217 | } | |
1218 | if (!nl_ct_parse_tuple(attrs[CTA_TUPLE_REPLY], &entry->tuple_reply, | |
1219 | nfgen_family)) { | |
1220 | return false; | |
1221 | } | |
1222 | if (attrs[CTA_COUNTERS_ORIG] && | |
1223 | !nl_ct_parse_counters(attrs[CTA_COUNTERS_ORIG], | |
1224 | &entry->counters_orig)) { | |
1225 | return false; | |
1226 | } | |
1227 | if (attrs[CTA_COUNTERS_REPLY] && | |
1228 | !nl_ct_parse_counters(attrs[CTA_COUNTERS_REPLY], | |
1229 | &entry->counters_reply)) { | |
1230 | return false; | |
1231 | } | |
1232 | if (attrs[CTA_TIMESTAMP] && | |
1233 | !nl_ct_parse_timestamp(attrs[CTA_TIMESTAMP], &entry->timestamp)) { | |
1234 | return false; | |
1235 | } | |
1236 | if (attrs[CTA_ID]) { | |
1237 | entry->id = ntohl(nl_attr_get_be32(attrs[CTA_ID])); | |
1238 | } | |
1239 | if (attrs[CTA_ZONE]) { | |
1240 | entry->zone = ntohs(nl_attr_get_be16(attrs[CTA_ZONE])); | |
1241 | } | |
1242 | if (attrs[CTA_STATUS]) { | |
1243 | entry->status = ips_status_to_dpif_flags( | |
1244 | ntohl(nl_attr_get_be32(attrs[CTA_STATUS]))); | |
1245 | } | |
1246 | if (attrs[CTA_TIMEOUT]) { | |
1247 | entry->timeout = ntohl(nl_attr_get_be32(attrs[CTA_TIMEOUT])); | |
1248 | } | |
1249 | if (attrs[CTA_MARK]) { | |
1250 | entry->mark = ntohl(nl_attr_get_be32(attrs[CTA_MARK])); | |
1251 | } | |
1252 | if (attrs[CTA_LABELS]) { | |
e7237700 | 1253 | entry->have_labels = true; |
6830a0c0 DDP |
1254 | memcpy(&entry->labels, nl_attr_get(attrs[CTA_LABELS]), |
1255 | MIN(sizeof entry->labels, nl_attr_get_size(attrs[CTA_LABELS]))); | |
1256 | } | |
1257 | if (attrs[CTA_PROTOINFO] && | |
1258 | !nl_ct_parse_protoinfo(attrs[CTA_PROTOINFO], &entry->protoinfo)) { | |
1259 | return false; | |
1260 | } | |
1261 | if (attrs[CTA_HELP] && | |
1262 | !nl_ct_parse_helper(attrs[CTA_HELP], &entry->helper)) { | |
1263 | return false; | |
1264 | } | |
1265 | if (attrs[CTA_TUPLE_MASTER] && | |
f51cf36d | 1266 | !nl_ct_parse_tuple(attrs[CTA_TUPLE_MASTER], &entry->tuple_parent, |
6830a0c0 DDP |
1267 | nfgen_family)) { |
1268 | return false; | |
1269 | } | |
1270 | return true; | |
1271 | } | |
1272 | ||
1273 | bool | |
1274 | nl_ct_parse_entry(struct ofpbuf *buf, struct ct_dpif_entry *entry, | |
1275 | enum nl_ct_event_type *event_type) | |
1276 | { | |
1277 | struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)]; | |
1278 | uint8_t nfgen_family; | |
1279 | ||
1280 | memset(entry, 0, sizeof *entry); | |
1281 | if (!nl_ct_parse_header_policy(buf, event_type, &nfgen_family, attrs)) { | |
1282 | return false; | |
1283 | }; | |
1284 | ||
1285 | if (!nl_ct_attrs_to_ct_dpif_entry(entry, attrs, nfgen_family)) { | |
1286 | ct_dpif_entry_uninit(entry); | |
1287 | memset(entry, 0, sizeof *entry); | |
1288 | return false; | |
1289 | } | |
1290 | ||
1291 | return true; | |
1292 | } | |
e0467f6d | 1293 | |
6830a0c0 DDP |
1294 | /* NetFilter utility functions. */ |
1295 | ||
1296 | /* Puts a nlmsghdr and nfgenmsg at the beginning of 'msg', which must be | |
1297 | * initially empty. 'expected_payload' should be an estimate of the number of | |
1298 | * payload bytes to be supplied; if the size of the payload is unknown a value | |
1299 | * of 0 is acceptable. | |
1300 | * | |
1301 | * Non-zero 'family' is the address family of items to get (e.g. AF_INET). | |
1302 | * | |
1303 | * 'flags' is a bit-mask that indicates what kind of request is being made. It | |
1304 | * is often NLM_F_REQUEST indicating that a request is being made, commonly | |
1305 | * or'd with NLM_F_ACK to request an acknowledgement. NLM_F_DUMP flag reguests | |
1306 | * a dump of the table. | |
1307 | * | |
1308 | * 'subsystem' is a netfilter subsystem id, e.g., NFNL_SUBSYS_CTNETLINK. | |
1309 | * | |
1310 | * 'cmd' is an enumerated value specific to the 'subsystem'. | |
1311 | * | |
1312 | * Sets the new nlmsghdr's nlmsg_pid field to 0 for now. nl_sock_send() will | |
1313 | * fill it in just before sending the message. | |
1314 | * | |
1315 | * nl_msg_put_nlmsghdr() should be used to compose Netlink messages that are | |
1316 | * not NetFilter Netlink messages. */ | |
1317 | static void | |
1318 | nl_msg_put_nfgenmsg(struct ofpbuf *msg, size_t expected_payload, | |
1319 | int family, uint8_t subsystem, uint8_t cmd, | |
1320 | uint32_t flags) | |
1321 | { | |
1322 | struct nfgenmsg *nfm; | |
1323 | ||
1324 | nl_msg_put_nlmsghdr(msg, sizeof *nfm + expected_payload, | |
1325 | subsystem << 8 | cmd, flags); | |
1326 | ovs_assert(msg->size == NLMSG_HDRLEN); | |
1327 | nfm = nl_msg_put_uninit(msg, sizeof *nfm); | |
1328 | nfm->nfgen_family = family; | |
1329 | nfm->version = NFNETLINK_V0; | |
1330 | nfm->res_id = 0; | |
e0467f6d SV |
1331 | #ifdef _WIN32 |
1332 | /* nfgenmsg contains ovsHdr padding in windows */ | |
1333 | nfm->ovsHdr.dp_ifindex = 0; | |
1334 | #endif | |
6830a0c0 | 1335 | } |