2 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2019 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "ofproto-dpif-trace.h"
21 #include "conntrack.h"
23 #include "ofproto-dpif-xlate.h"
26 static void oftrace_node_destroy(struct oftrace_node
*);
28 /* Creates a new oftrace_node, populates it with the given 'type' and a copy of
29 * 'text', and appends it to list 'super'. The caller retains ownership of
32 oftrace_report(struct ovs_list
*super
, enum oftrace_node_type type
,
35 struct oftrace_node
*node
= xmalloc(sizeof *node
);
36 ovs_list_push_back(super
, &node
->node
);
38 node
->text
= xstrdup(text
);
39 ovs_list_init(&node
->subs
);
45 oftrace_node_type_is_terminal(enum oftrace_node_type type
)
65 oftrace_node_list_destroy(struct ovs_list
*nodes
)
68 struct oftrace_node
*node
, *next
;
69 LIST_FOR_EACH_SAFE (node
, next
, node
, nodes
) {
70 ovs_list_remove(&node
->node
);
71 oftrace_node_destroy(node
);
77 oftrace_node_destroy(struct oftrace_node
*node
)
80 oftrace_node_list_destroy(&node
->subs
);
87 oftrace_add_recirc_node(struct ovs_list
*recirc_queue
,
88 enum oftrace_recirc_type type
, const struct flow
*flow
,
89 const struct dp_packet
*packet
, uint32_t recirc_id
,
92 if (!recirc_id_node_find_and_ref(recirc_id
)) {
96 struct oftrace_recirc_node
*node
= xmalloc(sizeof *node
);
97 ovs_list_push_back(recirc_queue
, &node
->node
);
100 node
->recirc_id
= recirc_id
;
102 node
->flow
.recirc_id
= recirc_id
;
103 node
->flow
.ct_zone
= zone
;
104 node
->packet
= packet
? dp_packet_clone(packet
) : NULL
;
110 oftrace_recirc_node_destroy(struct oftrace_recirc_node
*node
)
113 recirc_free_id(node
->recirc_id
);
114 dp_packet_delete(node
->packet
);
120 oftrace_push_ct_state(struct ovs_list
*next_ct_states
, uint32_t ct_state
)
122 struct oftrace_next_ct_state
*next_ct_state
=
123 xmalloc(sizeof *next_ct_state
);
124 next_ct_state
->state
= ct_state
;
125 ovs_list_push_back(next_ct_states
, &next_ct_state
->node
);
129 oftrace_pop_ct_state(struct ovs_list
*next_ct_states
)
131 struct oftrace_next_ct_state
*s
;
132 LIST_FOR_EACH_POP (s
, node
, next_ct_states
) {
133 uint32_t state
= s
->state
;
141 oftrace_node_print_details(struct ds
*output
,
142 const struct ovs_list
*nodes
, int level
)
144 const struct oftrace_node
*sub
;
145 LIST_FOR_EACH (sub
, node
, nodes
) {
146 if (sub
->type
== OFT_BRIDGE
) {
147 ds_put_char(output
, '\n');
150 bool more
= (sub
->node
.next
!= nodes
151 || oftrace_node_type_is_terminal(sub
->type
));
153 ds_put_char_multiple(output
, ' ', (level
+ more
) * 4);
156 ds_put_format(output
, " -> %s\n", sub
->text
);
159 ds_put_format(output
, " >> %s\n", sub
->text
);
162 ds_put_format(output
, " >>>> %s <<<<\n", sub
->text
);
165 ds_put_format(output
, "%s\n", sub
->text
);
166 ds_put_char_multiple(output
, ' ', (level
+ more
) * 4);
167 ds_put_char_multiple(output
, '-', strlen(sub
->text
));
168 ds_put_char(output
, '\n');
174 ds_put_format(output
, "%s\n", sub
->text
);
178 oftrace_node_print_details(output
, &sub
->subs
, level
+ more
+ more
);
182 /* Parses the 'argc' elements of 'argv', ignoring argv[0]. The following
183 * forms are supported:
185 * - [options] [dpname] odp_flow [packet]
186 * - [options] bridge br_flow [packet]
188 * On success, initializes '*ofprotop' and 'flow' and returns NULL. On failure
189 * returns a nonnull malloced error message. */
190 static char * OVS_WARN_UNUSED_RESULT
191 parse_flow_and_packet(int argc
, const char *argv
[],
192 struct ofproto_dpif
**ofprotop
, struct flow
*flow
,
193 struct dp_packet
**packetp
,
194 struct ovs_list
*next_ct_states
,
197 const struct dpif_backer
*backer
= NULL
;
199 struct simap port_names
= SIMAP_INITIALIZER(&port_names
);
200 struct dp_packet
*packet
= NULL
;
203 struct ofpbuf odp_key
;
204 struct ofpbuf odp_mask
;
206 ofpbuf_init(&odp_key
, 0);
207 ofpbuf_init(&odp_mask
, 0);
211 bool generate_packet
= false;
215 for (int i
= 1; i
< argc
; i
++) {
216 const char *arg
= argv
[i
];
217 if (!strcmp(arg
, "-generate") || !strcmp(arg
, "--generate")) {
218 generate_packet
= true;
219 } else if (!strcmp(arg
, "--l7")) {
221 error
= xasprintf("Missing argument for option %s", arg
);
225 struct dp_packet payload
;
226 memset(&payload
, 0, sizeof payload
);
227 dp_packet_init(&payload
, 0);
228 if (dp_packet_put_hex(&payload
, argv
[++i
], NULL
)[0] != '\0') {
229 dp_packet_uninit(&payload
);
230 error
= xstrdup("Trailing garbage in packet data");
234 l7_len
= dp_packet_size(&payload
);
235 l7
= dp_packet_steal_data(&payload
);
236 } else if (!strcmp(arg
, "--l7-len")) {
238 error
= xasprintf("Missing argument for option %s", arg
);
243 l7_len
= atoi(argv
[++i
]);
244 if (l7_len
> 64000) {
245 error
= xasprintf("%s: too much L7 data", argv
[i
]);
248 } else if (consistent
249 && (!strcmp(arg
, "-consistent") ||
250 !strcmp(arg
, "--consistent"))) {
252 } else if (!strcmp(arg
, "--ct-next")) {
254 error
= xasprintf("Missing argument for option %s", arg
);
259 struct ds ds
= DS_EMPTY_INITIALIZER
;
260 if (!parse_ct_state(argv
[++i
], 0, &ct_state
, &ds
)
261 || !validate_ct_state(ct_state
, &ds
)) {
262 error
= ds_steal_cstr(&ds
);
265 oftrace_push_ct_state(next_ct_states
, ct_state
);
266 } else if (arg
[0] == '-') {
267 error
= xasprintf("%s: unknown option", arg
);
269 } else if (n_args
>= ARRAY_SIZE(args
)) {
270 error
= xstrdup("too many arguments");
273 args
[n_args
++] = arg
;
277 /* 'args' must now have one of the following forms:
283 * dpname odp_flow packet
284 * bridge br_flow packet
286 * Parse the packet if it's there. Note that:
288 * - If there is one argument, there cannot be a packet.
290 * - If there are three arguments, there must be a packet.
292 * If there is a packet, we strip it off.
294 if (!generate_packet
&& n_args
> 1) {
295 const char *const_error
= eth_from_hex(args
[n_args
- 1], &packet
);
298 } else if (n_args
> 2) {
299 /* The 3-argument form must end in a hex string. */
300 error
= xstrdup(const_error
);
305 /* We stripped off the packet if there was one, so 'args' now has one of
306 * the following forms:
312 * Before we parse the flow, try to identify the backer, then use that
313 * backer to assemble a collection of port names. The port names are
314 * useful so that the user can specify ports by name instead of number in
317 /* args[0] might be dpname. */
319 if (!strncmp(args
[0], "ovs-", 4)) {
320 dp_type
= args
[0] + 4;
324 backer
= shash_find_data(&all_dpif_backers
, dp_type
);
325 } else if (n_args
== 1) {
326 /* Pick default backer. */
327 struct shash_node
*node
;
328 if (shash_count(&all_dpif_backers
) == 1) {
329 node
= shash_first(&all_dpif_backers
);
333 error
= xstrdup("Syntax error");
336 if (backer
&& backer
->dpif
) {
337 struct dpif_port dpif_port
;
338 struct dpif_port_dump port_dump
;
339 DPIF_PORT_FOR_EACH (&dpif_port
, &port_dump
, backer
->dpif
) {
340 simap_put(&port_names
, dpif_port
.name
,
341 odp_to_u32(dpif_port
.port_no
));
345 /* Parse the flow and determine whether a datapath or
346 * bridge is specified. If function odp_flow_key_from_string()
347 * returns 0, the flow is a odp_flow. If function
348 * parse_ofp_exact_flow() returns NULL, the flow is a br_flow. */
349 if (!odp_flow_from_string(args
[n_args
- 1], &port_names
,
350 &odp_key
, &odp_mask
, &error
)) {
352 error
= xstrdup("Cannot find the datapath");
356 if (odp_flow_key_to_flow(odp_key
.data
, odp_key
.size
, flow
, &error
)
361 *ofprotop
= xlate_lookup_ofproto(backer
, flow
,
362 &flow
->in_port
.ofp_port
, &error
);
363 if (*ofprotop
== NULL
) {
367 flow
->tunnel
.metadata
.tab
= ofproto_get_tun_tab(&(*ofprotop
)->up
);
369 /* Convert Geneve options to OpenFlow format now. This isn't actually
370 * required in order to get the right results since the ofproto xlate
371 * actions will handle this for us. However, converting now ensures
372 * that our formatting code will always be able to consistently print
373 * in OpenFlow format, which is what we use here. */
374 if (flow
->tunnel
.flags
& FLOW_TNL_F_UDPIF
) {
376 memcpy(&tnl
, &flow
->tunnel
, sizeof tnl
);
377 int err
= tun_metadata_from_geneve_udpif(
378 flow
->tunnel
.metadata
.tab
, &tnl
, &tnl
, &flow
->tunnel
);
380 error
= xstrdup("Failed to parse Geneve options");
384 } else if (n_args
!= 2) {
386 error
= xasprintf("%s (or the bridge name was omitted)", s
);
393 *ofprotop
= ofproto_dpif_lookup_by_name(args
[0]);
395 error
= xasprintf("%s: unknown bridge", args
[0]);
399 struct ofputil_port_map map
= OFPUTIL_PORT_MAP_INITIALIZER(&map
);
400 const struct ofport
*ofport
;
401 HMAP_FOR_EACH (ofport
, hmap_node
, &(*ofprotop
)->up
.ports
) {
402 ofputil_port_map_put(&map
, ofport
->ofp_port
,
403 netdev_get_name(ofport
->netdev
));
405 char *err
= parse_ofp_exact_flow(flow
, NULL
,
406 ofproto_get_tun_tab(&(*ofprotop
)->up
),
407 args
[n_args
- 1], &map
);
408 ofputil_port_map_destroy(&map
);
410 error
= xasprintf("Bad openflow flow syntax: %s", err
);
416 if (generate_packet
) {
417 /* Generate a packet, as requested. */
418 packet
= dp_packet_new(0);
419 flow_compose(packet
, flow
, l7
, l7_len
);
421 /* Use the metadata from the flow and the packet argument to
422 * reconstruct the flow. */
423 pkt_metadata_from_flow(&packet
->md
, flow
);
424 flow_extract(packet
, flow
);
429 dp_packet_delete(packet
);
433 ofpbuf_uninit(&odp_key
);
434 ofpbuf_uninit(&odp_mask
);
435 simap_destroy(&port_names
);
441 free_ct_states(struct ovs_list
*ct_states
)
443 while (!ovs_list_is_empty(ct_states
)) {
444 oftrace_pop_ct_state(ct_states
);
449 ofproto_unixctl_trace(struct unixctl_conn
*conn
, int argc
, const char *argv
[],
450 void *aux OVS_UNUSED
)
452 struct ofproto_dpif
*ofproto
;
453 struct dp_packet
*packet
;
456 struct ovs_list next_ct_states
= OVS_LIST_INITIALIZER(&next_ct_states
);
458 error
= parse_flow_and_packet(argc
, argv
, &ofproto
, &flow
, &packet
,
459 &next_ct_states
, NULL
);
464 ofproto_trace(ofproto
, &flow
, packet
, NULL
, 0, &next_ct_states
,
466 unixctl_command_reply(conn
, ds_cstr(&result
));
468 dp_packet_delete(packet
);
470 unixctl_command_reply_error(conn
, error
);
473 free_ct_states(&next_ct_states
);
477 ofproto_unixctl_trace_actions(struct unixctl_conn
*conn
, int argc
,
478 const char *argv
[], void *aux OVS_UNUSED
)
480 enum ofputil_protocol usable_protocols
;
481 struct ofproto_dpif
*ofproto
;
482 bool enforce_consistency
;
483 struct ofpbuf ofpacts
;
484 struct dp_packet
*packet
;
488 struct ovs_list next_ct_states
= OVS_LIST_INITIALIZER(&next_ct_states
);
490 /* Three kinds of error return values! */
496 ofpbuf_init(&ofpacts
, 0);
499 struct ofpact_parse_params pp
= {
502 .usable_protocols
= &usable_protocols
,
504 error
= ofpacts_parse_actions(argv
[--argc
], &pp
);
506 unixctl_command_reply_error(conn
, error
);
511 error
= parse_flow_and_packet(argc
, argv
, &ofproto
, &match
.flow
, &packet
,
512 &next_ct_states
, &enforce_consistency
);
514 unixctl_command_reply_error(conn
, error
);
518 match_wc_init(&match
, &match
.flow
);
520 /* Do the same checks as handle_packet_out() in ofproto.c.
522 * We pass a 'table_id' of 0 to ofpacts_check(), which isn't
523 * strictly correct because these actions aren't in any table, but it's OK
524 * because it 'table_id' is used only to check goto_table instructions, but
525 * packet-outs take a list of actions and therefore it can't include
528 * We skip the "meter" check here because meter is an instruction, not an
529 * action, and thus cannot appear in ofpacts. */
530 in_port
= ofp_to_u16(match
.flow
.in_port
.ofp_port
);
531 if (in_port
>= ofproto
->up
.max_ports
&& in_port
< ofp_to_u16(OFPP_MAX
)) {
532 unixctl_command_reply_error(conn
, "invalid in_port");
536 struct ofpact_check_params cp
= {
538 .max_ports
= u16_to_ofp(ofproto
->up
.max_ports
),
540 .n_tables
= ofproto
->up
.n_tables
,
542 retval
= ofpacts_check_consistency(
543 ofpacts
.data
, ofpacts
.size
,
544 enforce_consistency
? usable_protocols
: 0, &cp
);
546 ovs_mutex_lock(&ofproto_mutex
);
547 retval
= ofproto_check_ofpacts(&ofproto
->up
, ofpacts
.data
,
549 ovs_mutex_unlock(&ofproto_mutex
);
554 ds_put_format(&result
, "Bad actions: %s", ofperr_to_string(retval
));
555 unixctl_command_reply_error(conn
, ds_cstr(&result
));
559 ofproto_trace(ofproto
, &match
.flow
, packet
,
560 ofpacts
.data
, ofpacts
.size
, &next_ct_states
, &result
);
561 unixctl_command_reply(conn
, ds_cstr(&result
));
565 dp_packet_delete(packet
);
566 ofpbuf_uninit(&ofpacts
);
567 free_ct_states(&next_ct_states
);
571 explain_slow_path(enum slow_path_reason slow
, struct ds
*output
)
573 ds_put_cstr(output
, "\nThis flow is handled by the userspace "
574 "slow path because it:");
575 for (; slow
; slow
= zero_rightmost_1bit(slow
)) {
576 enum slow_path_reason bit
= rightmost_1bit(slow
);
577 ds_put_format(output
, "\n - %s.",
578 slow_path_reason_to_explanation(bit
));
582 /* Copies ODP actions from 'in' to 'out', dropping OVS_ACTION_ATTR_OUTPUT and
583 * OVS_ACTION_ATTR_RECIRC along the way. */
585 prune_output_actions(const struct ofpbuf
*in
, struct ofpbuf
*out
)
587 const struct nlattr
*a
;
589 NL_ATTR_FOR_EACH (a
, left
, in
->data
, in
->size
) {
590 if (a
->nla_type
== OVS_ACTION_ATTR_CLONE
) {
591 struct ofpbuf in_nested
;
592 nl_attr_get_nested(a
, &in_nested
);
594 size_t ofs
= nl_msg_start_nested(out
, OVS_ACTION_ATTR_CLONE
);
595 prune_output_actions(&in_nested
, out
);
596 nl_msg_end_nested(out
, ofs
);
597 } else if (a
->nla_type
!= OVS_ACTION_ATTR_OUTPUT
&&
598 a
->nla_type
!= OVS_ACTION_ATTR_RECIRC
) {
599 ofpbuf_put(out
, a
, NLA_ALIGN(a
->nla_len
));
604 /* Executes all of the datapath actions, except for any OVS_ACTION_ATTR_OUTPUT
605 * and OVS_ACTION_ATTR_RECIRC actions, in 'actions' on 'packet', which has the
606 * given 'flow', on 'dpif'. The actions have slow path reason 'slow' (if any).
607 * Appends any error message to 'output'.
609 * With output and recirculation actions dropped, the only remaining side
610 * effects are from OVS_ACTION_ATTR_USERSPACE actions for executing actions to
611 * send a packet to an OpenFlow controller, IPFIX, NetFlow, and sFlow, etc. */
613 execute_actions_except_outputs(struct dpif
*dpif
,
614 const struct dp_packet
*packet
,
615 const struct flow
*flow
,
616 const struct ofpbuf
*actions
,
617 enum slow_path_reason slow
,
620 struct ofpbuf pruned_actions
;
621 ofpbuf_init(&pruned_actions
, 0);
622 prune_output_actions(actions
, &pruned_actions
);
624 struct dpif_execute execute
= {
625 .actions
= pruned_actions
.data
,
626 .actions_len
= pruned_actions
.size
,
627 .needs_help
= (slow
& SLOW_ACTION
) != 0,
629 .packet
= dp_packet_clone_with_headroom(packet
, 2),
631 int error
= dpif_execute(dpif
, &execute
);
633 ds_put_format(output
, "\nAction execution failed (%s)\n.",
634 ovs_strerror(error
));
636 dp_packet_delete(execute
.packet
);
637 ofpbuf_uninit(&pruned_actions
);
641 ofproto_trace__(struct ofproto_dpif
*ofproto
, const struct flow
*flow
,
642 const struct dp_packet
*packet
, struct ovs_list
*recirc_queue
,
643 const struct ofpact ofpacts
[], size_t ofpacts_len
,
646 struct ofpbuf odp_actions
;
647 ofpbuf_init(&odp_actions
, 0);
650 struct flow_wildcards wc
;
651 struct ovs_list trace
= OVS_LIST_INITIALIZER(&trace
);
652 xlate_in_init(&xin
, ofproto
,
653 ofproto_dpif_get_tables_version(ofproto
), flow
,
654 flow
->in_port
.ofp_port
, NULL
, ntohs(flow
->tcp_flags
),
655 packet
, &wc
, &odp_actions
);
656 xin
.ofpacts
= ofpacts
;
657 xin
.ofpacts_len
= ofpacts_len
;
659 xin
.recirc_queue
= recirc_queue
;
661 /* Copy initial flow out of xin.flow. It differs from '*flow' because
662 * xlate_in_init() initializes actset_output to OFPP_UNSET. */
663 struct flow initial_flow
= xin
.flow
;
664 ds_put_cstr(output
, "Flow: ");
665 flow_format(output
, &initial_flow
, NULL
);
666 ds_put_char(output
, '\n');
668 struct xlate_out xout
;
669 enum xlate_error error
= xlate_actions(&xin
, &xout
);
671 oftrace_node_print_details(output
, &trace
, 0);
673 ds_put_cstr(output
, "\nFinal flow: ");
674 if (flow_equal(&initial_flow
, &xin
.flow
)) {
675 ds_put_cstr(output
, "unchanged");
677 flow_format(output
, &xin
.flow
, NULL
);
679 ds_put_char(output
, '\n');
681 ds_put_cstr(output
, "Megaflow: ");
683 match_init(&match
, flow
, &wc
);
684 match_format(&match
, NULL
, output
, OFP_DEFAULT_PRIORITY
);
685 ds_put_char(output
, '\n');
687 ds_put_cstr(output
, "Datapath actions: ");
688 format_odp_actions(output
, odp_actions
.data
, odp_actions
.size
, NULL
);
690 if (error
!= XLATE_OK
) {
691 ds_put_format(output
,
692 "\nTranslation failed (%s), packet is dropped.\n",
693 xlate_strerror(error
));
696 explain_slow_path(xout
.slow
, output
);
699 execute_actions_except_outputs(ofproto
->backer
->dpif
, packet
,
700 &initial_flow
, &odp_actions
,
706 xlate_out_uninit(&xout
);
707 ofpbuf_uninit(&odp_actions
);
708 oftrace_node_list_destroy(&trace
);
711 /* Implements a "trace" through 'ofproto''s flow table, appending a textual
712 * description of the results to 'output'.
714 * The trace follows a packet with the specified 'flow' through the flow
715 * table. 'packet' may be nonnull to trace an actual packet, with consequent
716 * side effects (if it is nonnull then its flow must be 'flow').
718 * If 'ofpacts' is nonnull then its 'ofpacts_len' bytes specify the actions to
719 * trace, otherwise the actions are determined by a flow table lookup. */
721 ofproto_trace(struct ofproto_dpif
*ofproto
, const struct flow
*flow
,
722 const struct dp_packet
*packet
,
723 const struct ofpact ofpacts
[], size_t ofpacts_len
,
724 struct ovs_list
*next_ct_states
, struct ds
*output
)
726 struct ovs_list recirc_queue
= OVS_LIST_INITIALIZER(&recirc_queue
);
727 ofproto_trace__(ofproto
, flow
, packet
, &recirc_queue
,
728 ofpacts
, ofpacts_len
, output
);
730 struct oftrace_recirc_node
*recirc_node
;
731 LIST_FOR_EACH_POP (recirc_node
, node
, &recirc_queue
) {
732 ds_put_cstr(output
, "\n\n");
733 ds_put_char_multiple(output
, '=', 79);
734 ds_put_format(output
, "\nrecirc(%#"PRIx32
")",
735 recirc_node
->recirc_id
);
737 if (next_ct_states
&& recirc_node
->type
== OFT_RECIRC_CONNTRACK
) {
739 if (ovs_list_is_empty(next_ct_states
)) {
740 ct_state
= CS_TRACKED
| CS_NEW
;
741 ds_put_cstr(output
, " - resume conntrack with default "
742 "ct_state=trk|new (use --ct-next to customize)");
744 ct_state
= oftrace_pop_ct_state(next_ct_states
);
745 struct ds s
= DS_EMPTY_INITIALIZER
;
746 format_flags(&s
, ct_state_to_string
, ct_state
, '|');
747 ds_put_format(output
, " - resume conntrack with ct_state=%s",
751 recirc_node
->flow
.ct_state
= ct_state
;
753 ds_put_char(output
, '\n');
754 ds_put_char_multiple(output
, '=', 79);
755 ds_put_cstr(output
, "\n\n");
757 ofproto_trace__(ofproto
, &recirc_node
->flow
, recirc_node
->packet
,
758 &recirc_queue
, ofpacts
, ofpacts_len
, output
);
759 oftrace_recirc_node_destroy(recirc_node
);
764 ofproto_dpif_trace_init(void)
766 static bool registered
;
772 unixctl_command_register(
774 "{[dp_name] odp_flow | bridge br_flow} [OPTIONS...] "
775 "[-generate|packet]", 1, INT_MAX
, ofproto_unixctl_trace
, NULL
);
776 unixctl_command_register(
777 "ofproto/trace-packet-out",
778 "[-consistent] {[dp_name] odp_flow | bridge br_flow} [OPTIONS...] "
779 "[-generate|packet] actions",
780 2, INT_MAX
, ofproto_unixctl_trace_actions
, NULL
);