]>
Commit | Line | Data |
---|---|---|
6830a0c0 DDP |
1 | /* |
2 | * Copyright (c) 2015 Nicira, Inc. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
18 | ||
19 | #include "netlink-conntrack.h" | |
20 | ||
21 | #include <linux/netfilter/nfnetlink.h> | |
22 | #include <linux/netfilter/nfnetlink_conntrack.h> | |
23 | #include <linux/netfilter/nf_conntrack_common.h> | |
24 | #include <linux/netfilter/nf_conntrack_tcp.h> | |
25 | #include <linux/netfilter/nf_conntrack_ftp.h> | |
26 | #include <linux/netfilter/nf_conntrack_sctp.h> | |
27 | ||
28 | #include "byte-order.h" | |
29 | #include "compiler.h" | |
3e8a2ad1 | 30 | #include "openvswitch/dynamic-string.h" |
6830a0c0 DDP |
31 | #include "netlink.h" |
32 | #include "netlink-socket.h" | |
64c96779 | 33 | #include "openvswitch/ofpbuf.h" |
6830a0c0 DDP |
34 | #include "openvswitch/vlog.h" |
35 | #include "poll-loop.h" | |
36 | #include "timeval.h" | |
37 | #include "unixctl.h" | |
38 | #include "util.h" | |
39 | ||
40 | VLOG_DEFINE_THIS_MODULE(netlink_conntrack); | |
41 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); | |
42 | ||
43 | /* This module works only if conntrack modules and features are enabled in the | |
44 | * Linux kernel. This can be done from a root shell like this: | |
45 | * | |
46 | * $ modprobe ip_conntrack | |
47 | * $ sysctl -w net.netfilter.nf_conntrack_acct=1 | |
48 | * $ sysctl -w net.netfilter.nf_conntrack_timestamp=1 | |
49 | * | |
50 | * Also, if testing conntrack label feature without conntrack-aware OVS kernel | |
51 | * module, there must be a connlabel rule in iptables for space to be reserved | |
52 | * for the labels (see kernel source connlabel_mt_check()). Such a rule can be | |
53 | * inserted from a root shell like this: | |
54 | * | |
55 | * $ iptables -A INPUT -m conntrack -m connlabel \ | |
56 | * --ctstate NEW,ESTABLISHED,RELATED --label 127 -j ACCEPT | |
57 | */ | |
58 | ||
59 | /* Some attributes were introduced in later kernels: with these definitions | |
60 | * we should be able to compile userspace against Linux 2.6.32+. */ | |
61 | ||
62 | #define CTA_ZONE (CTA_SECMARK + 1) | |
63 | #define CTA_SECCTX (CTA_SECMARK + 2) | |
64 | #define CTA_TIMESTAMP (CTA_SECMARK + 3) | |
65 | #define CTA_MARK_MASK (CTA_SECMARK + 4) | |
66 | #define CTA_LABELS (CTA_SECMARK + 5) | |
67 | #define CTA_LABELS_MASK (CTA_SECMARK + 6) | |
68 | ||
69 | #define CTA_TIMESTAMP_START 1 | |
70 | #define CTA_TIMESTAMP_STOP 2 | |
71 | ||
72 | #define IPS_TEMPLATE_BIT 11 | |
73 | #define IPS_TEMPLATE (1 << IPS_TEMPLATE_BIT) | |
74 | ||
75 | #define IPS_UNTRACKED_BIT 12 | |
76 | #define IPS_UNTRACKED (1 << IPS_UNTRACKED_BIT) | |
77 | ||
e0467f6d SV |
78 | #ifdef _WIN32 |
79 | #ifdef NETLINK_NETFILTER | |
80 | #undef NETLINK_NETFILTER | |
81 | #endif | |
82 | /* Reuse same socket for nfgenmsg and genlmsghdr in Windows*/ | |
83 | #define NETLINK_NETFILTER NETLINK_GENERIC | |
84 | #endif | |
85 | ||
6830a0c0 DDP |
86 | static const struct nl_policy nfnlgrp_conntrack_policy[] = { |
87 | [CTA_TUPLE_ORIG] = { .type = NL_A_NESTED, .optional = false }, | |
88 | [CTA_TUPLE_REPLY] = { .type = NL_A_NESTED, .optional = false }, | |
89 | [CTA_ZONE] = { .type = NL_A_BE16, .optional = true }, | |
90 | [CTA_STATUS] = { .type = NL_A_BE32, .optional = false }, | |
91 | [CTA_TIMESTAMP] = { .type = NL_A_NESTED, .optional = true }, | |
92 | [CTA_TIMEOUT] = { .type = NL_A_BE32, .optional = true }, | |
93 | [CTA_COUNTERS_ORIG] = { .type = NL_A_NESTED, .optional = true }, | |
94 | [CTA_COUNTERS_REPLY] = { .type = NL_A_NESTED, .optional = true }, | |
95 | [CTA_PROTOINFO] = { .type = NL_A_NESTED, .optional = true }, | |
96 | [CTA_HELP] = { .type = NL_A_NESTED, .optional = true }, | |
97 | [CTA_MARK] = { .type = NL_A_BE32, .optional = true }, | |
98 | [CTA_SECCTX] = { .type = NL_A_NESTED, .optional = true }, | |
99 | [CTA_ID] = { .type = NL_A_BE32, .optional = false }, | |
100 | [CTA_USE] = { .type = NL_A_BE32, .optional = true }, | |
101 | [CTA_TUPLE_MASTER] = { .type = NL_A_NESTED, .optional = true }, | |
102 | [CTA_NAT_SEQ_ADJ_ORIG] = { .type = NL_A_NESTED, .optional = true }, | |
103 | [CTA_NAT_SEQ_ADJ_REPLY] = { .type = NL_A_NESTED, .optional = true }, | |
104 | [CTA_LABELS] = { .type = NL_A_UNSPEC, .optional = true }, | |
105 | /* CTA_NAT_SRC, CTA_NAT_DST, CTA_TIMESTAMP, CTA_MARK_MASK, and | |
106 | * CTA_LABELS_MASK are not received from kernel. */ | |
107 | }; | |
108 | ||
109 | /* Declarations for conntrack netlink dumping. */ | |
110 | static void nl_msg_put_nfgenmsg(struct ofpbuf *msg, size_t expected_payload, | |
111 | int family, uint8_t subsystem, uint8_t cmd, | |
112 | uint32_t flags); | |
113 | ||
114 | static bool nl_ct_parse_header_policy(struct ofpbuf *buf, | |
115 | enum nl_ct_event_type *event_type, | |
116 | uint8_t *nfgen_family, | |
117 | struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)]); | |
118 | ||
119 | static bool nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry *entry, | |
120 | struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)], | |
121 | uint8_t nfgen_family); | |
122 | ||
123 | struct nl_ct_dump_state { | |
124 | struct nl_dump dump; | |
125 | struct ofpbuf buf; | |
126 | bool filter_zone; | |
127 | uint16_t zone; | |
128 | }; | |
e0467f6d | 129 | |
6830a0c0 DDP |
130 | /* Conntrack netlink dumping. */ |
131 | ||
132 | /* Initialize a conntrack netlink dump. */ | |
133 | int | |
134 | nl_ct_dump_start(struct nl_ct_dump_state **statep, const uint16_t *zone) | |
135 | { | |
136 | struct nl_ct_dump_state *state; | |
137 | ||
138 | *statep = state = xzalloc(sizeof *state); | |
139 | ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE); | |
140 | ||
141 | if (zone) { | |
142 | state->filter_zone = true; | |
143 | state->zone = *zone; | |
144 | } | |
145 | ||
146 | nl_msg_put_nfgenmsg(&state->buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK, | |
147 | IPCTNL_MSG_CT_GET, NLM_F_REQUEST); | |
148 | nl_dump_start(&state->dump, NETLINK_NETFILTER, &state->buf); | |
149 | ofpbuf_clear(&state->buf); | |
150 | ||
151 | return 0; | |
152 | } | |
153 | ||
154 | /* Receive the next 'entry' from the conntrack netlink dump with 'state'. | |
155 | * Returns 'EOF' when no more entries are available, 0 otherwise. 'entry' may | |
156 | * be uninitilized memory on entry, and must be uninitialized with | |
157 | * ct_dpif_entry_uninit() afterwards by the caller. In case the same 'entry' is | |
158 | * passed to this function again, the entry must also be uninitialized before | |
159 | * the next call. */ | |
160 | int | |
161 | nl_ct_dump_next(struct nl_ct_dump_state *state, struct ct_dpif_entry *entry) | |
162 | { | |
163 | struct ofpbuf buf; | |
164 | ||
165 | memset(entry, 0, sizeof *entry); | |
166 | for (;;) { | |
167 | struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)]; | |
168 | enum nl_ct_event_type type; | |
169 | uint8_t nfgen_family; | |
170 | ||
171 | if (!nl_dump_next(&state->dump, &buf, &state->buf)) { | |
172 | return EOF; | |
173 | } | |
174 | ||
175 | if (!nl_ct_parse_header_policy(&buf, &type, &nfgen_family, attrs)) { | |
176 | continue; | |
177 | }; | |
178 | ||
179 | if (state->filter_zone) { | |
180 | uint16_t entry_zone = attrs[CTA_ZONE] | |
181 | ? ntohs(nl_attr_get_be16(attrs[CTA_ZONE])) | |
182 | : 0; | |
183 | if (entry_zone != state->zone) { | |
184 | continue; | |
185 | } | |
186 | } | |
187 | ||
188 | if (nl_ct_attrs_to_ct_dpif_entry(entry, attrs, nfgen_family)) { | |
189 | break; | |
190 | } | |
191 | ||
192 | ct_dpif_entry_uninit(entry); | |
193 | memset(entry, 0, sizeof *entry); | |
194 | /* Ignore the failed entry and get the next one. */ | |
195 | } | |
196 | ||
197 | ofpbuf_uninit(&buf); | |
198 | return 0; | |
199 | } | |
200 | ||
201 | /* End a conntrack netlink dump. */ | |
202 | int | |
203 | nl_ct_dump_done(struct nl_ct_dump_state *state) | |
204 | { | |
205 | int error = nl_dump_done(&state->dump); | |
206 | ||
207 | ofpbuf_uninit(&state->buf); | |
208 | free(state); | |
209 | return error; | |
210 | } | |
e0467f6d | 211 | |
6830a0c0 DDP |
212 | /* Format conntrack event 'entry' of 'type' to 'ds'. */ |
213 | void | |
214 | nl_ct_format_event_entry(const struct ct_dpif_entry *entry, | |
215 | enum nl_ct_event_type type, struct ds *ds, | |
216 | bool verbose, bool print_stats) | |
217 | { | |
218 | ds_put_format(ds, "%s ", | |
219 | type == NL_CT_EVENT_NEW ? "NEW" | |
220 | : type == NL_CT_EVENT_UPDATE ? "UPDATE" | |
221 | : type == NL_CT_EVENT_DELETE ? "DELETE" | |
222 | : "UNKNOWN"); | |
223 | ct_dpif_format_entry(entry, ds, verbose, print_stats); | |
224 | } | |
225 | ||
226 | int | |
227 | nl_ct_flush(void) | |
228 | { | |
229 | struct ofpbuf buf; | |
230 | int err; | |
231 | ||
232 | ofpbuf_init(&buf, NL_DUMP_BUFSIZE); | |
233 | ||
234 | nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK, | |
235 | IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST); | |
236 | ||
237 | err = nl_transact(NETLINK_NETFILTER, &buf, NULL); | |
238 | ofpbuf_uninit(&buf); | |
239 | ||
240 | /* Expectations are flushed automatically, because they do not | |
241 | * have a master connection anymore */ | |
242 | ||
243 | return err; | |
244 | } | |
245 | ||
e0467f6d SV |
246 | #ifdef _WIN32 |
247 | int | |
248 | nl_ct_flush_zone(uint16_t flush_zone) | |
249 | { | |
250 | /* Windows can flush a specific zone */ | |
251 | struct ofpbuf buf; | |
252 | int err; | |
253 | ||
254 | ofpbuf_init(&buf, NL_DUMP_BUFSIZE); | |
255 | ||
256 | nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK, | |
257 | IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST); | |
258 | nl_msg_put_be16(&buf, CTA_ZONE, flush_zone); | |
259 | ||
260 | err = nl_transact(NETLINK_NETFILTER, &buf, NULL); | |
261 | ofpbuf_uninit(&buf); | |
262 | ||
263 | return err; | |
264 | } | |
265 | #else | |
6830a0c0 DDP |
266 | int |
267 | nl_ct_flush_zone(uint16_t flush_zone) | |
268 | { | |
269 | /* Apparently, there's no netlink interface to flush a specific zone. | |
270 | * This code dumps every connection, checks the zone and eventually | |
271 | * delete the entry. | |
272 | * | |
273 | * This is race-prone, but it is better than using shell scripts. */ | |
274 | ||
275 | struct nl_dump dump; | |
276 | struct ofpbuf buf, reply, delete; | |
277 | ||
278 | ofpbuf_init(&buf, NL_DUMP_BUFSIZE); | |
279 | ofpbuf_init(&delete, NL_DUMP_BUFSIZE); | |
280 | ||
281 | nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK, | |
282 | IPCTNL_MSG_CT_GET, NLM_F_REQUEST); | |
283 | nl_dump_start(&dump, NETLINK_NETFILTER, &buf); | |
284 | ofpbuf_clear(&buf); | |
285 | ||
286 | for (;;) { | |
287 | struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)]; | |
288 | enum nl_ct_event_type event_type; | |
289 | uint8_t nfgen_family; | |
290 | uint16_t zone = 0; | |
291 | ||
292 | if (!nl_dump_next(&dump, &reply, &buf)) { | |
293 | break; | |
294 | } | |
295 | ||
296 | if (!nl_ct_parse_header_policy(&reply, &event_type, &nfgen_family, | |
297 | attrs)) { | |
298 | continue; | |
299 | }; | |
300 | ||
301 | if (attrs[CTA_ZONE]) { | |
302 | zone = ntohs(nl_attr_get_be16(attrs[CTA_ZONE])); | |
303 | } | |
304 | ||
305 | if (zone != flush_zone) { | |
306 | /* The entry is not in the zone we're flushing. */ | |
307 | continue; | |
308 | } | |
309 | nl_msg_put_nfgenmsg(&delete, 0, nfgen_family, NFNL_SUBSYS_CTNETLINK, | |
310 | IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST); | |
311 | ||
312 | nl_msg_put_be16(&delete, CTA_ZONE, htons(zone)); | |
313 | nl_msg_put_unspec(&delete, CTA_TUPLE_ORIG, attrs[CTA_TUPLE_ORIG] + 1, | |
314 | attrs[CTA_TUPLE_ORIG]->nla_len - NLA_HDRLEN); | |
315 | nl_msg_put_unspec(&delete, CTA_ID, attrs[CTA_ID] + 1, | |
316 | attrs[CTA_ID]->nla_len - NLA_HDRLEN); | |
317 | nl_transact(NETLINK_NETFILTER, &delete, NULL); | |
318 | ofpbuf_clear(&delete); | |
319 | } | |
320 | ||
321 | nl_dump_done(&dump); | |
322 | ||
323 | ofpbuf_uninit(&delete); | |
324 | ofpbuf_uninit(&buf); | |
325 | ||
326 | /* Expectations are flushed automatically, because they do not | |
327 | * have a master connection anymore */ | |
328 | return 0; | |
329 | } | |
e0467f6d SV |
330 | #endif |
331 | ||
6830a0c0 DDP |
332 | /* Conntrack netlink parsing. */ |
333 | ||
334 | static bool | |
335 | nl_ct_parse_counters(struct nlattr *nla, struct ct_dpif_counters *counters) | |
336 | { | |
337 | static const struct nl_policy policy[] = { | |
338 | [CTA_COUNTERS_PACKETS] = { .type = NL_A_BE64, .optional = false }, | |
339 | [CTA_COUNTERS_BYTES] = { .type = NL_A_BE64, .optional = false }, | |
340 | }; | |
341 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
342 | bool parsed; | |
343 | ||
344 | parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); | |
345 | ||
346 | if (parsed) { | |
347 | counters->packets | |
348 | = ntohll(nl_attr_get_be64(attrs[CTA_COUNTERS_PACKETS])); | |
349 | counters->bytes = ntohll(nl_attr_get_be64(attrs[CTA_COUNTERS_BYTES])); | |
350 | } else { | |
351 | VLOG_ERR_RL(&rl, "Could not parse nested counters. " | |
352 | "Possibly incompatible Linux kernel version."); | |
353 | } | |
354 | ||
355 | return parsed; | |
356 | } | |
357 | ||
358 | static bool | |
359 | nl_ct_parse_timestamp(struct nlattr *nla, struct ct_dpif_timestamp *timestamp) | |
360 | { | |
361 | static const struct nl_policy policy[] = { | |
362 | [CTA_TIMESTAMP_START] = { .type = NL_A_BE64, .optional = false }, | |
363 | [CTA_TIMESTAMP_STOP] = { .type = NL_A_BE64, .optional = true }, | |
364 | }; | |
365 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
366 | bool parsed; | |
367 | ||
368 | parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); | |
369 | ||
370 | if (parsed) { | |
371 | timestamp->start | |
372 | = ntohll(nl_attr_get_be64(attrs[CTA_TIMESTAMP_START])); | |
373 | if (attrs[CTA_TIMESTAMP_STOP]) { | |
374 | timestamp->stop | |
375 | = ntohll(nl_attr_get_be64(attrs[CTA_TIMESTAMP_STOP])); | |
376 | } | |
377 | } else { | |
378 | VLOG_ERR_RL(&rl, "Could not parse nested timestamp. " | |
379 | "Possibly incompatible Linux kernel version."); | |
380 | } | |
381 | ||
382 | return parsed; | |
383 | } | |
384 | ||
385 | static bool | |
386 | nl_ct_parse_tuple_ip(struct nlattr *nla, struct ct_dpif_tuple *tuple) | |
387 | { | |
388 | static const struct nl_policy policy[] = { | |
389 | [CTA_IP_V4_SRC] = { .type = NL_A_BE32, .optional = true }, | |
390 | [CTA_IP_V4_DST] = { .type = NL_A_BE32, .optional = true }, | |
391 | [CTA_IP_V6_SRC] = { NL_POLICY_FOR(struct in6_addr), .optional = true }, | |
392 | [CTA_IP_V6_DST] = { NL_POLICY_FOR(struct in6_addr), .optional = true }, | |
393 | }; | |
394 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
395 | bool parsed; | |
396 | ||
397 | parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); | |
398 | ||
399 | if (parsed) { | |
400 | if (tuple->l3_type == AF_INET) { | |
401 | if (attrs[CTA_IP_V4_SRC]) { | |
402 | tuple->src.ip = nl_attr_get_be32(attrs[CTA_IP_V4_SRC]); | |
403 | } | |
404 | if (attrs[CTA_IP_V4_DST]) { | |
405 | tuple->dst.ip = nl_attr_get_be32(attrs[CTA_IP_V4_DST]); | |
406 | } | |
407 | } else if (tuple->l3_type == AF_INET6) { | |
408 | if (attrs[CTA_IP_V6_SRC]) { | |
409 | memcpy(&tuple->src.in6, nl_attr_get(attrs[CTA_IP_V6_SRC]), | |
410 | sizeof tuple->src.in6); | |
411 | } | |
412 | if (attrs[CTA_IP_V6_DST]) { | |
413 | memcpy(&tuple->dst.in6, nl_attr_get(attrs[CTA_IP_V6_DST]), | |
414 | sizeof tuple->dst.in6); | |
415 | } | |
416 | } else { | |
417 | VLOG_WARN_RL(&rl, "Unsupported IP protocol: %u.", tuple->l3_type); | |
418 | return false; | |
419 | } | |
420 | } else { | |
421 | VLOG_ERR_RL(&rl, "Could not parse nested tuple IP options. " | |
422 | "Possibly incompatible Linux kernel version."); | |
423 | } | |
424 | ||
425 | return parsed; | |
426 | } | |
427 | ||
428 | static bool | |
429 | nl_ct_parse_tuple_proto(struct nlattr *nla, struct ct_dpif_tuple *tuple) | |
430 | { | |
431 | static const struct nl_policy policy[] = { | |
432 | [CTA_PROTO_NUM] = { .type = NL_A_U8, .optional = false }, | |
433 | [CTA_PROTO_SRC_PORT] = { .type = NL_A_BE16, .optional = true }, | |
434 | [CTA_PROTO_DST_PORT] = { .type = NL_A_BE16, .optional = true }, | |
435 | [CTA_PROTO_ICMP_ID] = { .type = NL_A_BE16, .optional = true }, | |
436 | [CTA_PROTO_ICMP_TYPE] = { .type = NL_A_U8, .optional = true }, | |
437 | [CTA_PROTO_ICMP_CODE] = { .type = NL_A_U8, .optional = true }, | |
438 | [CTA_PROTO_ICMPV6_ID] = { .type = NL_A_BE16, .optional = true }, | |
439 | [CTA_PROTO_ICMPV6_TYPE] = { .type = NL_A_U8, .optional = true }, | |
440 | [CTA_PROTO_ICMPV6_CODE] = { .type = NL_A_U8, .optional = true }, | |
441 | }; | |
442 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
443 | bool parsed; | |
444 | ||
445 | parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); | |
446 | ||
447 | if (parsed) { | |
448 | tuple->ip_proto = nl_attr_get_u8(attrs[CTA_PROTO_NUM]); | |
449 | ||
450 | if (tuple->l3_type == AF_INET && tuple->ip_proto == IPPROTO_ICMP) { | |
451 | if (!attrs[CTA_PROTO_ICMP_ID] || !attrs[CTA_PROTO_ICMP_TYPE] | |
452 | || !attrs[CTA_PROTO_ICMP_CODE]) { | |
453 | VLOG_ERR_RL(&rl, "Tuple ICMP data missing."); | |
454 | return false; | |
455 | } | |
456 | tuple->icmp_id = nl_attr_get_be16(attrs[CTA_PROTO_ICMP_ID]); | |
457 | tuple->icmp_type = nl_attr_get_u8(attrs[CTA_PROTO_ICMP_TYPE]); | |
458 | tuple->icmp_code = nl_attr_get_u8(attrs[CTA_PROTO_ICMP_CODE]); | |
459 | } else if (tuple->l3_type == AF_INET6 && | |
460 | tuple->ip_proto == IPPROTO_ICMPV6) { | |
461 | if (!attrs[CTA_PROTO_ICMPV6_ID] || !attrs[CTA_PROTO_ICMPV6_TYPE] | |
462 | || !attrs[CTA_PROTO_ICMPV6_CODE]) { | |
463 | VLOG_ERR_RL(&rl, "Tuple ICMPv6 data missing."); | |
464 | return false; | |
465 | } | |
466 | tuple->icmp_id = nl_attr_get_be16(attrs[CTA_PROTO_ICMPV6_ID]); | |
467 | tuple->icmp_type = nl_attr_get_u8(attrs[CTA_PROTO_ICMPV6_TYPE]); | |
468 | tuple->icmp_code = nl_attr_get_u8(attrs[CTA_PROTO_ICMPV6_CODE]); | |
469 | } else if (attrs[CTA_PROTO_SRC_PORT] && attrs[CTA_PROTO_DST_PORT]) { | |
470 | tuple->src_port = nl_attr_get_be16(attrs[CTA_PROTO_SRC_PORT]); | |
471 | tuple->dst_port = nl_attr_get_be16(attrs[CTA_PROTO_DST_PORT]); | |
472 | } else { | |
473 | /* Unsupported IPPROTO and no ports, leave them zeroed. | |
474 | * We have parsed the ip_proto, so this is not a total failure. */ | |
475 | VLOG_INFO_RL(&rl, "Unsupported L4 protocol: %u.", tuple->ip_proto); | |
476 | } | |
477 | } else { | |
478 | VLOG_ERR_RL(&rl, "Could not parse nested tuple protocol options. " | |
479 | "Possibly incompatible Linux kernel version."); | |
480 | } | |
481 | ||
482 | return parsed; | |
483 | } | |
484 | ||
485 | static bool | |
486 | nl_ct_parse_tuple(struct nlattr *nla, struct ct_dpif_tuple *tuple, | |
487 | uint16_t l3_type) | |
488 | { | |
489 | static const struct nl_policy policy[] = { | |
490 | [CTA_TUPLE_IP] = { .type = NL_A_NESTED, .optional = false }, | |
491 | [CTA_TUPLE_PROTO] = { .type = NL_A_NESTED, .optional = false }, | |
492 | }; | |
493 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
494 | bool parsed; | |
495 | ||
496 | parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); | |
497 | ||
498 | memset(tuple, 0, sizeof *tuple); | |
499 | ||
500 | if (parsed) { | |
501 | tuple->l3_type = l3_type; | |
502 | ||
503 | if (!nl_ct_parse_tuple_ip(attrs[CTA_TUPLE_IP], tuple) | |
504 | || !nl_ct_parse_tuple_proto(attrs[CTA_TUPLE_PROTO], tuple)) { | |
505 | struct ds ds; | |
506 | ||
507 | ds_init(&ds); | |
508 | ct_dpif_format_tuple(&ds, tuple, true); | |
509 | ||
510 | VLOG_ERR_RL(&rl, "Failed to parse tuple: %s", ds_cstr(&ds)); | |
511 | ds_destroy(&ds); | |
512 | ||
513 | memset(tuple, 0, sizeof *tuple); | |
514 | return false; | |
515 | } | |
516 | } else { | |
517 | VLOG_ERR_RL(&rl, "Could not parse nested tuple options. " | |
518 | "Possibly incompatible Linux kernel version."); | |
519 | } | |
520 | ||
521 | return parsed; | |
522 | } | |
523 | ||
524 | /* Translate netlink TCP state to CT_DPIF_TCP state. */ | |
525 | static uint8_t | |
526 | nl_ct_tcp_state_to_dpif(uint8_t state) | |
527 | { | |
528 | switch (state) { | |
529 | case TCP_CONNTRACK_NONE: | |
530 | return CT_DPIF_TCPS_CLOSED; | |
531 | case TCP_CONNTRACK_SYN_SENT: | |
532 | return CT_DPIF_TCPS_SYN_SENT; | |
533 | case TCP_CONNTRACK_SYN_SENT2: | |
534 | return CT_DPIF_TCPS_SYN_SENT; | |
535 | case TCP_CONNTRACK_SYN_RECV: | |
536 | return CT_DPIF_TCPS_SYN_RECV; | |
537 | case TCP_CONNTRACK_ESTABLISHED: | |
538 | return CT_DPIF_TCPS_ESTABLISHED; | |
539 | case TCP_CONNTRACK_FIN_WAIT: | |
540 | return CT_DPIF_TCPS_FIN_WAIT_1; | |
541 | case TCP_CONNTRACK_CLOSE_WAIT: | |
542 | return CT_DPIF_TCPS_CLOSE_WAIT; | |
543 | case TCP_CONNTRACK_LAST_ACK: | |
544 | return CT_DPIF_TCPS_LAST_ACK; | |
545 | case TCP_CONNTRACK_TIME_WAIT: | |
546 | return CT_DPIF_TCPS_TIME_WAIT; | |
547 | case TCP_CONNTRACK_CLOSE: | |
548 | return CT_DPIF_TCPS_CLOSING; | |
549 | default: | |
550 | return CT_DPIF_TCPS_CLOSED; | |
551 | } | |
552 | } | |
553 | ||
554 | static uint8_t | |
555 | ip_ct_tcp_flags_to_dpif(uint8_t flags) | |
556 | { | |
557 | uint8_t ret = 0; | |
558 | #define CT_DPIF_TCP_FLAG(FLAG) \ | |
559 | ret |= (flags & IP_CT_TCP_FLAG_##FLAG) ? CT_DPIF_TCPF_##FLAG : 0; | |
560 | CT_DPIF_TCP_FLAGS | |
561 | #undef CT_DPIF_STATUS_FLAG | |
562 | return ret; | |
563 | } | |
564 | ||
565 | static bool | |
566 | nl_ct_parse_protoinfo_tcp(struct nlattr *nla, | |
567 | struct ct_dpif_protoinfo *protoinfo) | |
568 | { | |
569 | static const struct nl_policy policy[] = { | |
570 | [CTA_PROTOINFO_TCP_STATE] = { .type = NL_A_U8, .optional = false }, | |
571 | [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NL_A_U8, | |
572 | .optional = false }, | |
573 | [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NL_A_U8, | |
574 | .optional = false }, | |
575 | [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .type = NL_A_U16, | |
576 | .optional = false }, | |
577 | [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .type = NL_A_U16, | |
578 | .optional = false }, | |
579 | }; | |
580 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
581 | bool parsed; | |
582 | ||
583 | parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); | |
584 | ||
585 | if (parsed) { | |
586 | const struct nf_ct_tcp_flags *flags_orig, *flags_reply; | |
587 | uint8_t state; | |
588 | protoinfo->proto = IPPROTO_TCP; | |
589 | state = nl_ct_tcp_state_to_dpif( | |
590 | nl_attr_get_u8(attrs[CTA_PROTOINFO_TCP_STATE])); | |
591 | /* The connection tracker keeps only one tcp state for the | |
592 | * connection, but our structures store a separate state for | |
593 | * each endpoint. Here we duplicate the state. */ | |
594 | protoinfo->tcp.state_orig = protoinfo->tcp.state_reply = state; | |
595 | protoinfo->tcp.wscale_orig = nl_attr_get_u8( | |
596 | attrs[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]); | |
597 | protoinfo->tcp.wscale_reply = nl_attr_get_u8( | |
598 | attrs[CTA_PROTOINFO_TCP_WSCALE_REPLY]); | |
599 | flags_orig = | |
600 | nl_attr_get_unspec(attrs[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL], | |
601 | sizeof *flags_orig); | |
602 | protoinfo->tcp.flags_orig = | |
603 | ip_ct_tcp_flags_to_dpif(flags_orig->flags); | |
604 | flags_reply = | |
605 | nl_attr_get_unspec(attrs[CTA_PROTOINFO_TCP_FLAGS_REPLY], | |
606 | sizeof *flags_reply); | |
607 | protoinfo->tcp.flags_reply = | |
608 | ip_ct_tcp_flags_to_dpif(flags_reply->flags); | |
609 | } else { | |
610 | VLOG_ERR_RL(&rl, "Could not parse nested TCP protoinfo options. " | |
611 | "Possibly incompatible Linux kernel version."); | |
612 | } | |
613 | ||
614 | return parsed; | |
615 | } | |
616 | ||
617 | static bool | |
618 | nl_ct_parse_protoinfo(struct nlattr *nla, struct ct_dpif_protoinfo *protoinfo) | |
619 | { | |
620 | /* These are mutually exclusive. */ | |
621 | static const struct nl_policy policy[] = { | |
622 | [CTA_PROTOINFO_TCP] = { .type = NL_A_NESTED, .optional = true }, | |
623 | [CTA_PROTOINFO_SCTP] = { .type = NL_A_NESTED, .optional = true }, | |
624 | }; | |
625 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
626 | bool parsed; | |
627 | ||
628 | parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); | |
629 | ||
630 | memset(protoinfo, 0, sizeof *protoinfo); | |
631 | ||
632 | if (parsed) { | |
633 | if (attrs[CTA_PROTOINFO_TCP]) { | |
634 | parsed = nl_ct_parse_protoinfo_tcp(attrs[CTA_PROTOINFO_TCP], | |
635 | protoinfo); | |
636 | } else if (attrs[CTA_PROTOINFO_SCTP]) { | |
637 | VLOG_WARN_RL(&rl, "SCTP protoinfo not yet supported!"); | |
638 | } else { | |
639 | VLOG_WARN_RL(&rl, "Empty protoinfo!"); | |
640 | } | |
641 | } else { | |
642 | VLOG_ERR_RL(&rl, "Could not parse nested protoinfo options. " | |
643 | "Possibly incompatible Linux kernel version."); | |
644 | } | |
645 | ||
646 | return parsed; | |
647 | } | |
648 | ||
649 | static bool | |
650 | nl_ct_parse_helper(struct nlattr *nla, struct ct_dpif_helper *helper) | |
651 | { | |
652 | static const struct nl_policy policy[] = { | |
653 | [CTA_HELP_NAME] = { .type = NL_A_STRING, .optional = false }, | |
654 | }; | |
655 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
656 | bool parsed; | |
657 | ||
658 | parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy)); | |
659 | ||
660 | memset(helper, 0, sizeof *helper); | |
661 | ||
662 | if (parsed) { | |
663 | helper->name = xstrdup(nl_attr_get_string(attrs[CTA_HELP_NAME])); | |
664 | } else { | |
665 | VLOG_ERR_RL(&rl, "Could not parse nested helper options. " | |
666 | "Possibly incompatible Linux kernel version."); | |
667 | } | |
668 | ||
669 | return parsed; | |
670 | } | |
671 | ||
672 | /* Translate netlink entry status flags to CT_DPIF_TCP status flags. */ | |
673 | static uint32_t | |
674 | ips_status_to_dpif_flags(uint32_t status) | |
675 | { | |
676 | uint32_t ret = 0; | |
677 | #define CT_DPIF_STATUS_FLAG(FLAG) \ | |
678 | ret |= (status & IPS_##FLAG) ? CT_DPIF_STATUS_##FLAG : 0; | |
679 | CT_DPIF_STATUS_FLAGS | |
680 | #undef CT_DPIF_STATUS_FLAG | |
681 | return ret; | |
682 | } | |
683 | ||
684 | static bool | |
685 | nl_ct_parse_header_policy(struct ofpbuf *buf, | |
686 | enum nl_ct_event_type *event_type, | |
687 | uint8_t *nfgen_family, | |
688 | struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)]) | |
689 | { | |
690 | struct nlmsghdr *nlh; | |
691 | struct nfgenmsg *nfm; | |
692 | uint8_t type; | |
693 | ||
694 | nlh = ofpbuf_at(buf, 0, NLMSG_HDRLEN); | |
695 | nfm = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *nfm); | |
696 | if (!nfm) { | |
697 | VLOG_ERR_RL(&rl, "Received bad nfnl message (no nfgenmsg)."); | |
698 | return false; | |
699 | } | |
700 | if (NFNL_SUBSYS_ID(nlh->nlmsg_type) != NFNL_SUBSYS_CTNETLINK) { | |
701 | VLOG_ERR_RL(&rl, "Received non-conntrack message (subsystem: %u).", | |
702 | NFNL_SUBSYS_ID(nlh->nlmsg_type)); | |
703 | return false; | |
704 | } | |
705 | if (nfm->version != NFNETLINK_V0) { | |
706 | VLOG_ERR_RL(&rl, "Received unsupported nfnetlink version (%u).", | |
707 | NFNL_MSG_TYPE(nfm->version)); | |
708 | return false; | |
709 | } | |
710 | ||
711 | if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof *nfm, | |
712 | nfnlgrp_conntrack_policy, attrs, | |
713 | ARRAY_SIZE(nfnlgrp_conntrack_policy))) { | |
714 | VLOG_ERR_RL(&rl, "Received bad nfnl message (policy)."); | |
715 | return false; | |
716 | } | |
717 | ||
718 | type = NFNL_MSG_TYPE(nlh->nlmsg_type); | |
719 | *nfgen_family = nfm->nfgen_family; | |
720 | ||
721 | switch (type) { | |
722 | case IPCTNL_MSG_CT_NEW: | |
723 | *event_type = nlh->nlmsg_flags & NLM_F_CREATE | |
724 | ? NL_CT_EVENT_NEW : NL_CT_EVENT_UPDATE; | |
725 | break; | |
726 | case IPCTNL_MSG_CT_DELETE: | |
727 | *event_type = NL_CT_EVENT_DELETE; | |
728 | break; | |
729 | default: | |
730 | VLOG_ERR_RL(&rl, "Can't parse conntrack event type."); | |
731 | return false; | |
732 | } | |
733 | ||
734 | return true; | |
735 | } | |
736 | ||
737 | static bool | |
738 | nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry *entry, | |
739 | struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)], | |
740 | uint8_t nfgen_family) | |
741 | { | |
742 | if (!nl_ct_parse_tuple(attrs[CTA_TUPLE_ORIG], &entry->tuple_orig, | |
743 | nfgen_family)) { | |
744 | return false; | |
745 | } | |
746 | if (!nl_ct_parse_tuple(attrs[CTA_TUPLE_REPLY], &entry->tuple_reply, | |
747 | nfgen_family)) { | |
748 | return false; | |
749 | } | |
750 | if (attrs[CTA_COUNTERS_ORIG] && | |
751 | !nl_ct_parse_counters(attrs[CTA_COUNTERS_ORIG], | |
752 | &entry->counters_orig)) { | |
753 | return false; | |
754 | } | |
755 | if (attrs[CTA_COUNTERS_REPLY] && | |
756 | !nl_ct_parse_counters(attrs[CTA_COUNTERS_REPLY], | |
757 | &entry->counters_reply)) { | |
758 | return false; | |
759 | } | |
760 | if (attrs[CTA_TIMESTAMP] && | |
761 | !nl_ct_parse_timestamp(attrs[CTA_TIMESTAMP], &entry->timestamp)) { | |
762 | return false; | |
763 | } | |
764 | if (attrs[CTA_ID]) { | |
765 | entry->id = ntohl(nl_attr_get_be32(attrs[CTA_ID])); | |
766 | } | |
767 | if (attrs[CTA_ZONE]) { | |
768 | entry->zone = ntohs(nl_attr_get_be16(attrs[CTA_ZONE])); | |
769 | } | |
770 | if (attrs[CTA_STATUS]) { | |
771 | entry->status = ips_status_to_dpif_flags( | |
772 | ntohl(nl_attr_get_be32(attrs[CTA_STATUS]))); | |
773 | } | |
774 | if (attrs[CTA_TIMEOUT]) { | |
775 | entry->timeout = ntohl(nl_attr_get_be32(attrs[CTA_TIMEOUT])); | |
776 | } | |
777 | if (attrs[CTA_MARK]) { | |
778 | entry->mark = ntohl(nl_attr_get_be32(attrs[CTA_MARK])); | |
779 | } | |
780 | if (attrs[CTA_LABELS]) { | |
781 | memcpy(&entry->labels, nl_attr_get(attrs[CTA_LABELS]), | |
782 | MIN(sizeof entry->labels, nl_attr_get_size(attrs[CTA_LABELS]))); | |
783 | } | |
784 | if (attrs[CTA_PROTOINFO] && | |
785 | !nl_ct_parse_protoinfo(attrs[CTA_PROTOINFO], &entry->protoinfo)) { | |
786 | return false; | |
787 | } | |
788 | if (attrs[CTA_HELP] && | |
789 | !nl_ct_parse_helper(attrs[CTA_HELP], &entry->helper)) { | |
790 | return false; | |
791 | } | |
792 | if (attrs[CTA_TUPLE_MASTER] && | |
793 | !nl_ct_parse_tuple(attrs[CTA_TUPLE_MASTER], &entry->tuple_master, | |
794 | nfgen_family)) { | |
795 | return false; | |
796 | } | |
797 | return true; | |
798 | } | |
799 | ||
800 | bool | |
801 | nl_ct_parse_entry(struct ofpbuf *buf, struct ct_dpif_entry *entry, | |
802 | enum nl_ct_event_type *event_type) | |
803 | { | |
804 | struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)]; | |
805 | uint8_t nfgen_family; | |
806 | ||
807 | memset(entry, 0, sizeof *entry); | |
808 | if (!nl_ct_parse_header_policy(buf, event_type, &nfgen_family, attrs)) { | |
809 | return false; | |
810 | }; | |
811 | ||
812 | if (!nl_ct_attrs_to_ct_dpif_entry(entry, attrs, nfgen_family)) { | |
813 | ct_dpif_entry_uninit(entry); | |
814 | memset(entry, 0, sizeof *entry); | |
815 | return false; | |
816 | } | |
817 | ||
818 | return true; | |
819 | } | |
e0467f6d | 820 | |
6830a0c0 DDP |
821 | /* NetFilter utility functions. */ |
822 | ||
823 | /* Puts a nlmsghdr and nfgenmsg at the beginning of 'msg', which must be | |
824 | * initially empty. 'expected_payload' should be an estimate of the number of | |
825 | * payload bytes to be supplied; if the size of the payload is unknown a value | |
826 | * of 0 is acceptable. | |
827 | * | |
828 | * Non-zero 'family' is the address family of items to get (e.g. AF_INET). | |
829 | * | |
830 | * 'flags' is a bit-mask that indicates what kind of request is being made. It | |
831 | * is often NLM_F_REQUEST indicating that a request is being made, commonly | |
832 | * or'd with NLM_F_ACK to request an acknowledgement. NLM_F_DUMP flag reguests | |
833 | * a dump of the table. | |
834 | * | |
835 | * 'subsystem' is a netfilter subsystem id, e.g., NFNL_SUBSYS_CTNETLINK. | |
836 | * | |
837 | * 'cmd' is an enumerated value specific to the 'subsystem'. | |
838 | * | |
839 | * Sets the new nlmsghdr's nlmsg_pid field to 0 for now. nl_sock_send() will | |
840 | * fill it in just before sending the message. | |
841 | * | |
842 | * nl_msg_put_nlmsghdr() should be used to compose Netlink messages that are | |
843 | * not NetFilter Netlink messages. */ | |
844 | static void | |
845 | nl_msg_put_nfgenmsg(struct ofpbuf *msg, size_t expected_payload, | |
846 | int family, uint8_t subsystem, uint8_t cmd, | |
847 | uint32_t flags) | |
848 | { | |
849 | struct nfgenmsg *nfm; | |
850 | ||
851 | nl_msg_put_nlmsghdr(msg, sizeof *nfm + expected_payload, | |
852 | subsystem << 8 | cmd, flags); | |
853 | ovs_assert(msg->size == NLMSG_HDRLEN); | |
854 | nfm = nl_msg_put_uninit(msg, sizeof *nfm); | |
855 | nfm->nfgen_family = family; | |
856 | nfm->version = NFNETLINK_V0; | |
857 | nfm->res_id = 0; | |
e0467f6d SV |
858 | #ifdef _WIN32 |
859 | /* nfgenmsg contains ovsHdr padding in windows */ | |
860 | nfm->ovsHdr.dp_ifindex = 0; | |
861 | #endif | |
6830a0c0 | 862 | } |