]> git.proxmox.com Git - mirror_ovs.git/blame - lib/ct-dpif.c
dpif: Support conntrack zone limit.
[mirror_ovs.git] / lib / ct-dpif.c
CommitLineData
3948eb54
DDP
1/*
2 * Copyright (c) 2015 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
2a7c4805 18#include "dpif-provider.h"
3948eb54
DDP
19
20#include <errno.h>
21
22#include "ct-dpif.h"
c43a1331 23#include "openvswitch/ofp-parse.h"
2a7c4805 24#include "openvswitch/vlog.h"
3948eb54 25
2a7c4805 26VLOG_DEFINE_THIS_MODULE(ct_dpif);
b77d9629 27
3948eb54
DDP
28/* Declarations for conntrack entry formatting. */
29struct flags {
30 uint32_t flag;
31 const char *name;
32};
33
34static void ct_dpif_format_ipproto(struct ds *, uint16_t ipproto);
35static void ct_dpif_format_counters(struct ds *,
36 const struct ct_dpif_counters *);
37static void ct_dpif_format_timestamp(struct ds *,
38 const struct ct_dpif_timestamp *);
39static void ct_dpif_format_flags(struct ds *, const char *title,
40 uint32_t flags, const struct flags *);
41static void ct_dpif_format_protoinfo(struct ds *, const char *title,
42 const struct ct_dpif_protoinfo *,
43 bool verbose);
44static void ct_dpif_format_helper(struct ds *, const char *title,
45 const struct ct_dpif_helper *);
46
47static const struct flags ct_dpif_status_flags[] = {
48#define CT_DPIF_STATUS_FLAG(FLAG) { CT_DPIF_STATUS_##FLAG, #FLAG },
49 CT_DPIF_STATUS_FLAGS
50#undef CT_DPIF_STATUS_FLAG
51 { 0, NULL } /* End marker. */
52};
53\f
b77d9629
DDP
54/* Dumping */
55
56/* Start dumping the entries from the connection tracker used by 'dpif'.
57 *
58 * 'dump' must be the address of a pointer to a struct ct_dpif_dump_state,
59 * which should be passed (unaltered) to ct_dpif_dump_{next,done}().
60 *
61 * If 'zone' is not NULL, it should point to an integer identifing a
62 * conntrack zone to which the dump will be limited. If it is NULL,
63 * conntrack entries from all zones will be dumped.
64 *
65 * If there has been a problem the function returns a non-zero value
66 * that represents the error. Otherwise it returns zero. */
67int
68ct_dpif_dump_start(struct dpif *dpif, struct ct_dpif_dump_state **dump,
ded30c74 69 const uint16_t *zone, int *ptot_bkts)
b77d9629
DDP
70{
71 int err;
72
73 err = (dpif->dpif_class->ct_dump_start
ded30c74 74 ? dpif->dpif_class->ct_dump_start(dpif, dump, zone, ptot_bkts)
b77d9629
DDP
75 : EOPNOTSUPP);
76
77 if (!err) {
78 (*dump)->dpif = dpif;
79 }
80
81 return err;
82}
83
84/* Dump one connection from a tracker, and put it in 'entry'.
85 *
86 * 'dump' should have been initialized by ct_dpif_dump_start().
87 *
88 * The function returns 0, if an entry has been dumped succesfully.
89 * Otherwise it returns a non-zero value which can be:
90 * - EOF: meaning that there are no more entries to dump.
91 * - an error value.
92 * In both cases, the user should call ct_dpif_dump_done(). */
93int
94ct_dpif_dump_next(struct ct_dpif_dump_state *dump, struct ct_dpif_entry *entry)
95{
96 struct dpif *dpif = dump->dpif;
97
98 return (dpif->dpif_class->ct_dump_next
99 ? dpif->dpif_class->ct_dump_next(dpif, dump, entry)
100 : EOPNOTSUPP);
101}
102
103/* Free resources used by 'dump' */
104int
105ct_dpif_dump_done(struct ct_dpif_dump_state *dump)
106{
107 struct dpif *dpif = dump->dpif;
108
109 return (dpif->dpif_class->ct_dump_done
110 ? dpif->dpif_class->ct_dump_done(dpif, dump)
111 : EOPNOTSUPP);
112}
113\f
817a7657
YHW
114/* Flush the entries in the connection tracker used by 'dpif'. The
115 * arguments have the following behavior:
a0f7b6d5 116 *
817a7657
YHW
117 * - If both 'zone' and 'tuple' are NULL, flush all the conntrack entries.
118 * - If 'zone' is not NULL, and 'tuple' is NULL, flush all the conntrack
119 * entries in '*zone'.
120 * - If 'tuple' is not NULL, flush the conntrack entry specified by 'tuple'
121 * in '*zone'. If 'zone' is NULL, use the default zone (zone 0). */
a0f7b6d5 122int
817a7657
YHW
123ct_dpif_flush(struct dpif *dpif, const uint16_t *zone,
124 const struct ct_dpif_tuple *tuple)
a0f7b6d5 125{
817a7657
YHW
126 if (tuple) {
127 struct ds ds = DS_EMPTY_INITIALIZER;
128 ct_dpif_format_tuple(&ds, tuple);
129 VLOG_DBG("%s: ct_flush: %s in zone %d", dpif_name(dpif), ds_cstr(&ds),
130 zone ? *zone : 0);
131 ds_destroy(&ds);
132 } else if (zone) {
133 VLOG_DBG("%s: ct_flush: zone %"PRIu16, dpif_name(dpif), *zone);
2a7c4805
JP
134 } else {
135 VLOG_DBG("%s: ct_flush: <all>", dpif_name(dpif));
136 }
137
a0f7b6d5 138 return (dpif->dpif_class->ct_flush
817a7657 139 ? dpif->dpif_class->ct_flush(dpif, zone, tuple)
a0f7b6d5
DDP
140 : EOPNOTSUPP);
141}
142
c92339ad
DB
143int
144ct_dpif_set_maxconns(struct dpif *dpif, uint32_t maxconns)
145{
146 return (dpif->dpif_class->ct_set_maxconns
147 ? dpif->dpif_class->ct_set_maxconns(dpif, maxconns)
148 : EOPNOTSUPP);
149}
150
151int
152ct_dpif_get_maxconns(struct dpif *dpif, uint32_t *maxconns)
153{
154 return (dpif->dpif_class->ct_get_maxconns
155 ? dpif->dpif_class->ct_get_maxconns(dpif, maxconns)
156 : EOPNOTSUPP);
157}
158
875075b3
DB
159int
160ct_dpif_get_nconns(struct dpif *dpif, uint32_t *nconns)
161{
162 return (dpif->dpif_class->ct_get_nconns
163 ? dpif->dpif_class->ct_get_nconns(dpif, nconns)
164 : EOPNOTSUPP);
165}
166
cd015a11
YHW
167int
168ct_dpif_set_limits(struct dpif *dpif, const uint32_t *default_limit,
169 const struct ovs_list *zone_limits)
170{
171 return (dpif->dpif_class->ct_set_limits
172 ? dpif->dpif_class->ct_set_limits(dpif, default_limit,
173 zone_limits)
174 : EOPNOTSUPP);
175}
176
177int
178ct_dpif_get_limits(struct dpif *dpif, uint32_t *default_limit,
179 const struct ovs_list *zone_limits_in,
180 struct ovs_list *zone_limits_out)
181{
182 return (dpif->dpif_class->ct_get_limits
183 ? dpif->dpif_class->ct_get_limits(dpif, default_limit,
184 zone_limits_in,
185 zone_limits_out)
186 : EOPNOTSUPP);
187}
188
189int
190ct_dpif_del_limits(struct dpif *dpif, const struct ovs_list *zone_limits)
191{
192 return (dpif->dpif_class->ct_del_limits
193 ? dpif->dpif_class->ct_del_limits(dpif, zone_limits)
194 : EOPNOTSUPP);
195}
196
3948eb54
DDP
197void
198ct_dpif_entry_uninit(struct ct_dpif_entry *entry)
199{
200 if (entry) {
201 if (entry->helper.name) {
202 free(entry->helper.name);
203 }
204 }
205}
206\f
207void
208ct_dpif_format_entry(const struct ct_dpif_entry *entry, struct ds *ds,
209 bool verbose, bool print_stats)
210{
211 ct_dpif_format_ipproto(ds, entry->tuple_orig.ip_proto);
212
213 ds_put_cstr(ds, ",orig=(");
b269a122 214 ct_dpif_format_tuple(ds, &entry->tuple_orig);
3948eb54
DDP
215 if (print_stats) {
216 ct_dpif_format_counters(ds, &entry->counters_orig);
217 }
218 ds_put_cstr(ds, ")");
219
220 ds_put_cstr(ds, ",reply=(");
b269a122 221 ct_dpif_format_tuple(ds, &entry->tuple_reply);
3948eb54
DDP
222 if (print_stats) {
223 ct_dpif_format_counters(ds, &entry->counters_reply);
224 }
225 ds_put_cstr(ds, ")");
226
227 if (print_stats) {
228 ct_dpif_format_timestamp(ds, &entry->timestamp);
229 }
230 if (verbose) {
231 ds_put_format(ds, ",id=%"PRIu32, entry->id);
232 }
233 if (entry->zone) {
234 ds_put_format(ds, ",zone=%"PRIu16, entry->zone);
235 }
236 if (verbose) {
237 ct_dpif_format_flags(ds, ",status=", entry->status,
238 ct_dpif_status_flags);
239 }
240 if (print_stats) {
241 ds_put_format(ds, ",timeout=%"PRIu32, entry->timeout);
242 }
243 if (entry->mark) {
244 ds_put_format(ds, ",mark=%"PRIu32, entry->mark);
245 }
2ff8484b 246 if (!ovs_u128_is_zero(entry->labels)) {
3948eb54
DDP
247 ovs_be128 value;
248
249 ds_put_cstr(ds, ",labels=");
250 value = hton128(entry->labels);
251 ds_put_hex(ds, &value, sizeof value);
252 }
253 ct_dpif_format_protoinfo(ds, ",protoinfo=", &entry->protoinfo, verbose);
254 ct_dpif_format_helper(ds, ",helper=", &entry->helper);
255 if (verbose && entry->tuple_master.l3_type != 0) {
256 ds_put_cstr(ds, ",master=(");
b269a122 257 ct_dpif_format_tuple(ds, &entry->tuple_master);
3948eb54
DDP
258 ds_put_cstr(ds, ")");
259 }
260}
261
262static void
263ct_dpif_format_ipproto(struct ds *ds, uint16_t ipproto)
264{
265 const char *name;
266
267 name = (ipproto == IPPROTO_ICMP) ? "icmp"
268 : (ipproto == IPPROTO_ICMPV6) ? "icmpv6"
269 : (ipproto == IPPROTO_TCP) ? "tcp"
270 : (ipproto == IPPROTO_UDP) ? "udp"
271 : (ipproto == IPPROTO_SCTP) ? "sctp"
81f97b1e
JR
272 : (ipproto == IPPROTO_UDPLITE) ? "udplite"
273 : (ipproto == IPPROTO_DCCP) ? "dccp"
274 : (ipproto == IPPROTO_IGMP) ? "igmp"
3948eb54
DDP
275 : NULL;
276
277 if (name) {
278 ds_put_cstr(ds, name);
279 } else {
280 ds_put_format(ds, "%u", ipproto);
281 }
282}
283
284static void
285ct_dpif_format_counters(struct ds *ds, const struct ct_dpif_counters *counters)
286{
287 if (counters->packets || counters->bytes) {
288 ds_put_format(ds, ",packets=%"PRIu64",bytes=%"PRIu64,
289 counters->packets, counters->bytes);
290 }
291}
292
293static void
294ct_dpif_format_timestamp(struct ds *ds,
295 const struct ct_dpif_timestamp *timestamp)
296{
297 if (timestamp->start || timestamp->stop) {
298 ds_put_strftime_msec(ds, ",start=%Y-%m-%dT%H:%M:%S.###",
299 timestamp->start / UINT64_C(1000000), false);
300 if (timestamp->stop) {
301 ds_put_strftime_msec(ds, ",stop=%Y-%m-%dT%H:%M:%S.###",
302 timestamp->stop / UINT64_C(1000000), false);
303 }
304 }
305}
306
307static void
b269a122 308ct_dpif_format_tuple_icmp(struct ds *ds, const struct ct_dpif_tuple *tuple)
3948eb54 309{
b269a122
DDP
310 ds_put_format(ds, ",id=%u,type=%u,code=%u", ntohs(tuple->icmp_id),
311 tuple->icmp_type, tuple->icmp_code);
3948eb54
DDP
312}
313
314static void
315ct_dpif_format_tuple_tp(struct ds *ds, const struct ct_dpif_tuple *tuple)
316{
317 ds_put_format(ds, ",sport=%u,dport=%u",
318 ntohs(tuple->src_port), ntohs(tuple->dst_port));
319}
320
321void
b269a122 322ct_dpif_format_tuple(struct ds *ds, const struct ct_dpif_tuple *tuple)
3948eb54
DDP
323{
324 if (tuple->l3_type == AF_INET) {
325 ds_put_format(ds, "src="IP_FMT",dst="IP_FMT,
326 IP_ARGS(tuple->src.ip), IP_ARGS(tuple->dst.ip));
327 } else if (tuple->l3_type == AF_INET6) {
328 ds_put_cstr(ds, "src=");
329 ipv6_format_addr(&tuple->src.in6, ds);
330 ds_put_cstr(ds, ",dst=");
331 ipv6_format_addr(&tuple->dst.in6, ds);
332 } else {
333 ds_put_format(ds, "Unsupported address family: %u. HEX:\n",
334 tuple->l3_type);
335 ds_put_hex_dump(ds, tuple, sizeof *tuple, 0, false);
336 return;
337 }
338
339 if (tuple->ip_proto == IPPROTO_ICMP
340 || tuple->ip_proto == IPPROTO_ICMPV6) {
b269a122 341 ct_dpif_format_tuple_icmp(ds, tuple);
3948eb54
DDP
342 } else {
343 ct_dpif_format_tuple_tp(ds, tuple);
344 }
345}
346
347static void
348ct_dpif_format_flags(struct ds *ds, const char *title, uint32_t flags,
349 const struct flags *table)
350{
351 if (title) {
352 ds_put_cstr(ds, title);
353 }
354 for (; table->name; table++) {
355 if (flags & table->flag) {
356 ds_put_format(ds, "%s|", table->name);
357 }
358 }
359 ds_chomp(ds, '|');
360}
361
362static const struct flags tcp_flags[] = {
363#define CT_DPIF_TCP_FLAG(FLAG) { CT_DPIF_TCPF_##FLAG, #FLAG },
364 CT_DPIF_TCP_FLAGS
365#undef CT_DPIF_TCP_FLAG
366 { 0, NULL } /* End marker. */
367};
368
369const char *ct_dpif_tcp_state_string[] = {
370#define CT_DPIF_TCP_STATE(STATE) [CT_DPIF_TCPS_##STATE] = #STATE,
371 CT_DPIF_TCP_STATES
372#undef CT_DPIF_TCP_STATE
373};
374
375static void
376ct_dpif_format_enum__(struct ds *ds, const char *title, unsigned int state,
377 const char *names[], unsigned int max)
378{
379 if (title) {
380 ds_put_cstr(ds, title);
381 }
382 if (state < max) {
383 ds_put_cstr(ds, names[state]);
384 } else {
385 ds_put_format(ds, "[%u]", state);
386 }
387}
388
389#define ct_dpif_format_enum(DS, TITLE, STATE, NAMES) \
390 ct_dpif_format_enum__((DS), (TITLE), (STATE), (NAMES), ARRAY_SIZE(NAMES))
391
392static uint8_t
393coalesce_tcp_state(uint8_t state)
394{
395 /* The Linux kernel connection tracker and the userspace view the
396 * tcp states differently in some situations. If we're formatting
397 * the entry without being verbose, it is worth to adjust the
398 * differences, to ease writing testcases. */
399 switch (state) {
400 case CT_DPIF_TCPS_FIN_WAIT_2:
401 return CT_DPIF_TCPS_TIME_WAIT;
402 case CT_DPIF_TCPS_SYN_RECV:
403 return CT_DPIF_TCPS_ESTABLISHED;
404 default:
405 return state;
406 }
407}
408
409static void
410ct_dpif_format_protoinfo_tcp(struct ds *ds,
411 const struct ct_dpif_protoinfo *protoinfo)
412{
413 uint8_t tcp_state;
414
415 /* We keep two separate tcp states, but we print just one. The Linux
416 * kernel connection tracker internally keeps only one state, so
417 * 'state_orig' and 'state_reply', will be the same. */
418 tcp_state = MAX(protoinfo->tcp.state_orig, protoinfo->tcp.state_reply);
419
420 tcp_state = coalesce_tcp_state(tcp_state);
421 ct_dpif_format_enum(ds, "state=", tcp_state, ct_dpif_tcp_state_string);
422}
423
424static void
425ct_dpif_format_protoinfo_tcp_verbose(struct ds *ds,
426 const struct ct_dpif_protoinfo *protoinfo)
427{
428 ct_dpif_format_enum(ds, "state_orig=", protoinfo->tcp.state_orig,
429 ct_dpif_tcp_state_string);
430 ct_dpif_format_enum(ds, ",state_reply=", protoinfo->tcp.state_reply,
431 ct_dpif_tcp_state_string);
432
433 if (protoinfo->tcp.wscale_orig || protoinfo->tcp.wscale_reply) {
434 ds_put_format(ds, ",wscale_orig=%u,wscale_reply=%u",
435 protoinfo->tcp.wscale_orig,
436 protoinfo->tcp.wscale_reply);
437 }
438 ct_dpif_format_flags(ds, ",flags_orig=", protoinfo->tcp.flags_orig,
439 tcp_flags);
440 ct_dpif_format_flags(ds, ",flags_reply=", protoinfo->tcp.flags_reply,
441 tcp_flags);
442}
443
444static void
445ct_dpif_format_protoinfo(struct ds *ds, const char *title,
446 const struct ct_dpif_protoinfo *protoinfo,
447 bool verbose)
448{
449 if (protoinfo->proto != 0) {
450 if (title) {
451 ds_put_format(ds, "%s(", title);
452 }
453 switch (protoinfo->proto) {
454 case IPPROTO_TCP:
455 if (verbose) {
456 ct_dpif_format_protoinfo_tcp_verbose(ds, protoinfo);
457 } else {
458 ct_dpif_format_protoinfo_tcp(ds, protoinfo);
459 }
460 break;
461 }
462 if (title) {
463 ds_put_cstr(ds, ")");
464 }
465 }
466}
467
468static void
469ct_dpif_format_helper(struct ds *ds, const char *title,
470 const struct ct_dpif_helper *helper)
471{
472 if (helper->name) {
473 if (title) {
474 ds_put_cstr(ds, title);
475 }
476 ds_put_cstr(ds, helper->name);
477 }
478}
8a0d9d85
FA
479
480uint8_t
481ct_dpif_coalesce_tcp_state(uint8_t state)
482{
483 return coalesce_tcp_state(state);
484}
485
486void
487ct_dpif_format_tcp_stat(struct ds * ds, int tcp_state, int conn_per_state)
488{
489 ct_dpif_format_enum(ds, "\t [", tcp_state, ct_dpif_tcp_state_string);
490 ds_put_cstr(ds, "]");
491 ds_put_format(ds, "=%u", conn_per_state);
492}
c43a1331
YHW
493
494/* Parses a specification of a conntrack 5-tuple from 's' into 'tuple'.
495 * Returns true on success. Otherwise, returns false and puts the error
496 * message in 'ds'. */
497bool
498ct_dpif_parse_tuple(struct ct_dpif_tuple *tuple, const char *s, struct ds *ds)
499{
500 char *pos, *key, *value, *copy;
501 memset(tuple, 0, sizeof *tuple);
502
503 pos = copy = xstrdup(s);
504 while (ofputil_parse_key_value(&pos, &key, &value)) {
505 if (!*value) {
506 ds_put_format(ds, "field %s missing value", key);
507 goto error;
508 }
509
510 if (!strcmp(key, "ct_nw_src") || !strcmp(key, "ct_nw_dst")) {
511 if (tuple->l3_type && tuple->l3_type != AF_INET) {
512 ds_put_cstr(ds, "L3 type set multiple times");
513 goto error;
514 } else {
515 tuple->l3_type = AF_INET;
516 }
517 if (!ip_parse(value, key[6] == 's' ? &tuple->src.ip :
518 &tuple->dst.ip)) {
519 goto error_with_msg;
520 }
521 } else if (!strcmp(key, "ct_ipv6_src") ||
522 !strcmp(key, "ct_ipv6_dst")) {
523 if (tuple->l3_type && tuple->l3_type != AF_INET6) {
524 ds_put_cstr(ds, "L3 type set multiple times");
525 goto error;
526 } else {
527 tuple->l3_type = AF_INET6;
528 }
529 if (!ipv6_parse(value, key[8] == 's' ? &tuple->src.in6 :
530 &tuple->dst.in6)) {
531 goto error_with_msg;
532 }
533 } else if (!strcmp(key, "ct_nw_proto")) {
534 char *err = str_to_u8(value, key, &tuple->ip_proto);
535 if (err) {
536 free(err);
537 goto error_with_msg;
538 }
539 } else if (!strcmp(key, "ct_tp_src") || !strcmp(key,"ct_tp_dst")) {
540 uint16_t port;
541 char *err = str_to_u16(value, key, &port);
542 if (err) {
543 free(err);
544 goto error_with_msg;
545 }
546 if (key[6] == 's') {
547 tuple->src_port = htons(port);
548 } else {
549 tuple->dst_port = htons(port);
550 }
551 } else if (!strcmp(key, "icmp_type") || !strcmp(key, "icmp_code") ||
552 !strcmp(key, "icmp_id") ) {
553 if (tuple->ip_proto != IPPROTO_ICMP &&
554 tuple->ip_proto != IPPROTO_ICMPV6) {
555 ds_put_cstr(ds, "invalid L4 fields");
556 goto error;
557 }
558 uint16_t icmp_id;
559 char *err;
560 if (key[5] == 't') {
561 err = str_to_u8(value, key, &tuple->icmp_type);
562 } else if (key[5] == 'c') {
563 err = str_to_u8(value, key, &tuple->icmp_code);
564 } else {
565 err = str_to_u16(value, key, &icmp_id);
566 tuple->icmp_id = htons(icmp_id);
567 }
568 if (err) {
569 free(err);
570 goto error_with_msg;
571 }
572 } else {
573 ds_put_format(ds, "invalid conntrack tuple field: %s", key);
574 goto error;
575 }
576 }
577
578 if (ipv6_is_zero(&tuple->src.in6) || ipv6_is_zero(&tuple->dst.in6) ||
579 !tuple->ip_proto) {
580 /* icmp_type, icmp_code, and icmp_id can be 0. */
581 if (tuple->ip_proto != IPPROTO_ICMP &&
582 tuple->ip_proto != IPPROTO_ICMPV6) {
583 if (!tuple->src_port || !tuple->dst_port) {
584 ds_put_cstr(ds, "at least one of the conntrack 5-tuple fields "
585 "is missing.");
586 goto error;
587 }
588 }
589 }
590
591 free(copy);
592 return true;
593
594error_with_msg:
595 ds_put_format(ds, "failed to parse field %s", key);
596error:
597 free(copy);
598 return false;
599}