2 * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "ofproto/ofproto-dpif.h"
20 #include "ofproto/ofproto-provider.h"
27 #include "byte-order.h"
28 #include "connectivity.h"
33 #include "dynamic-string.h"
34 #include "fail-open.h"
35 #include "guarded-list.h"
39 #include "mac-learning.h"
40 #include "meta-flow.h"
41 #include "multipath.h"
42 #include "netdev-vport.h"
47 #include "odp-execute.h"
50 #include "ofp-actions.h"
51 #include "ofp-parse.h"
52 #include "ofp-print.h"
53 #include "ofproto-dpif-governor.h"
54 #include "ofproto-dpif-ipfix.h"
55 #include "ofproto-dpif-mirror.h"
56 #include "ofproto-dpif-monitor.h"
57 #include "ofproto-dpif-sflow.h"
58 #include "ofproto-dpif-upcall.h"
59 #include "ofproto-dpif-xlate.h"
60 #include "poll-loop.h"
66 #include "unaligned.h"
68 #include "vlan-bitmap.h"
71 VLOG_DEFINE_THIS_MODULE(ofproto_dpif
);
73 COVERAGE_DEFINE(ofproto_dpif_expired
);
74 COVERAGE_DEFINE(facet_revalidate
);
75 COVERAGE_DEFINE(facet_unexpected
);
76 COVERAGE_DEFINE(facet_create
);
77 COVERAGE_DEFINE(facet_remove
);
78 COVERAGE_DEFINE(subfacet_create
);
79 COVERAGE_DEFINE(subfacet_destroy
);
80 COVERAGE_DEFINE(subfacet_install_fail
);
81 COVERAGE_DEFINE(packet_in_overflow
);
83 /* Number of implemented OpenFlow tables. */
84 enum { N_TABLES
= 255 };
85 enum { TBL_INTERNAL
= N_TABLES
- 1 }; /* Used for internal hidden rules. */
86 BUILD_ASSERT_DECL(N_TABLES
>= 2 && N_TABLES
<= 255);
96 * - Do include packets and bytes from facets that have been deleted or
97 * whose own statistics have been folded into the rule.
99 * - Do include packets and bytes sent "by hand" that were accounted to
100 * the rule without any facet being involved (this is a rare corner
101 * case in rule_execute()).
103 * - Do not include packet or bytes that can be obtained from any facet's
104 * packet_count or byte_count member or that can be obtained from the
105 * datapath by, e.g., dpif_flow_get() for any subfacet.
107 struct ovs_mutex stats_mutex
;
108 uint64_t packet_count OVS_GUARDED
; /* Number of packets received. */
109 uint64_t byte_count OVS_GUARDED
; /* Number of bytes received. */
112 static void rule_get_stats(struct rule
*, uint64_t *packets
, uint64_t *bytes
);
113 static struct rule_dpif
*rule_dpif_cast(const struct rule
*);
120 * - Do include packets and bytes from facets that have been deleted or
121 * whose own statistics have been folded into the rule.
123 * - Do include packets and bytes sent "by hand" that were accounted to
124 * the rule without any facet being involved (this is a rare corner
125 * case in rule_execute()).
127 * - Do not include packet or bytes that can be obtained from any facet's
128 * packet_count or byte_count member or that can be obtained from the
129 * datapath by, e.g., dpif_flow_get() for any subfacet.
131 struct ovs_mutex stats_mutex
;
132 uint64_t packet_count OVS_GUARDED
; /* Number of packets received. */
133 uint64_t byte_count OVS_GUARDED
; /* Number of bytes received. */
134 struct bucket_counter
*bucket_stats OVS_GUARDED
; /* Bucket statistics. */
138 struct hmap_node hmap_node
; /* In struct ofproto's "bundles" hmap. */
139 struct ofproto_dpif
*ofproto
; /* Owning ofproto. */
140 void *aux
; /* Key supplied by ofproto's client. */
141 char *name
; /* Identifier for log messages. */
144 struct list ports
; /* Contains "struct ofport"s. */
145 enum port_vlan_mode vlan_mode
; /* VLAN mode */
146 int vlan
; /* -1=trunk port, else a 12-bit VLAN ID. */
147 unsigned long *trunks
; /* Bitmap of trunked VLANs, if 'vlan' == -1.
148 * NULL if all VLANs are trunked. */
149 struct lacp
*lacp
; /* LACP if LACP is enabled, otherwise NULL. */
150 struct bond
*bond
; /* Nonnull iff more than one port. */
151 bool use_priority_tags
; /* Use 802.1p tag for frames in VLAN 0? */
154 bool floodable
; /* True if no port has OFPUTIL_PC_NO_FLOOD set. */
157 static void bundle_remove(struct ofport
*);
158 static void bundle_update(struct ofbundle
*);
159 static void bundle_destroy(struct ofbundle
*);
160 static void bundle_del_port(struct ofport_dpif
*);
161 static void bundle_run(struct ofbundle
*);
162 static void bundle_wait(struct ofbundle
*);
164 static void stp_run(struct ofproto_dpif
*ofproto
);
165 static void stp_wait(struct ofproto_dpif
*ofproto
);
166 static int set_stp_port(struct ofport
*,
167 const struct ofproto_port_stp_settings
*);
169 static void compose_slow_path(const struct ofproto_dpif
*, const struct flow
*,
170 enum slow_path_reason
,
171 uint64_t *stub
, size_t stub_size
,
172 const struct nlattr
**actionsp
,
173 size_t *actions_lenp
);
175 /* A subfacet (see "struct subfacet" below) has three possible installation
178 * - SF_NOT_INSTALLED: Not installed in the datapath. This will only be the
179 * case just after the subfacet is created, just before the subfacet is
180 * destroyed, or if the datapath returns an error when we try to install a
183 * - SF_FAST_PATH: The subfacet's actions are installed in the datapath.
185 * - SF_SLOW_PATH: An action that sends every packet for the subfacet through
186 * ofproto_dpif is installed in the datapath.
189 SF_NOT_INSTALLED
, /* No datapath flow for this subfacet. */
190 SF_FAST_PATH
, /* Full actions are installed. */
191 SF_SLOW_PATH
, /* Send-to-userspace action is installed. */
194 /* A dpif flow and actions associated with a facet.
196 * See also the large comment on struct facet. */
199 struct hmap_node hmap_node
; /* In struct ofproto_dpif 'subfacets' list. */
200 struct list list_node
; /* In struct facet's 'facets' list. */
201 struct facet
*facet
; /* Owning facet. */
202 struct dpif_backer
*backer
; /* Owning backer. */
207 long long int used
; /* Time last used; time created if not used. */
208 long long int created
; /* Time created. */
210 uint64_t dp_packet_count
; /* Last known packet count in the datapath. */
211 uint64_t dp_byte_count
; /* Last known byte count in the datapath. */
213 enum subfacet_path path
; /* Installed in datapath? */
216 #define SUBFACET_DESTROY_MAX_BATCH 50
218 static struct subfacet
*subfacet_create(struct facet
*, struct flow_miss
*,
220 static struct subfacet
*subfacet_find(struct dpif_backer
*,
221 const struct nlattr
*key
, size_t key_len
,
223 static void subfacet_destroy(struct subfacet
*);
224 static void subfacet_destroy__(struct subfacet
*);
225 static void subfacet_destroy_batch(struct dpif_backer
*,
226 struct subfacet
**, int n
);
227 static void subfacet_reset_dp_stats(struct subfacet
*,
228 struct dpif_flow_stats
*);
229 static void subfacet_update_stats(struct subfacet
*,
230 const struct dpif_flow_stats
*);
231 static int subfacet_install(struct subfacet
*,
232 const struct ofpbuf
*odp_actions
,
233 struct dpif_flow_stats
*);
234 static void subfacet_uninstall(struct subfacet
*);
236 /* A unique, non-overlapping instantiation of an OpenFlow flow.
238 * A facet associates a "struct flow", which represents the Open vSwitch
239 * userspace idea of an exact-match flow, with one or more subfacets.
240 * While the facet is created based on an exact-match flow, it is stored
241 * within the ofproto based on the wildcards that could be expressed
242 * based on the flow table and other configuration. (See the 'wc'
243 * description in "struct xlate_out" for more details.)
245 * Each subfacet tracks the datapath's idea of the flow equivalent to
246 * the facet. When the kernel module (or other dpif implementation) and
247 * Open vSwitch userspace agree on the definition of a flow key, there
248 * is exactly one subfacet per facet. If the dpif implementation
249 * supports more-specific flow matching than userspace, however, a facet
250 * can have more than one subfacet. Examples include the dpif
251 * implementation not supporting the same wildcards as userspace or some
252 * distinction in flow that userspace simply doesn't understand.
254 * Flow expiration works in terms of subfacets, so a facet must have at
255 * least one subfacet or it will never expire, leaking memory. */
258 struct ofproto_dpif
*ofproto
;
261 struct list subfacets
;
262 long long int used
; /* Time last used; time created if not used. */
265 struct flow flow
; /* Flow of the creating subfacet. */
266 struct cls_rule cr
; /* In 'ofproto_dpif's facets classifier. */
270 * - Do include packets and bytes sent "by hand", e.g. with
273 * - Do include packets and bytes that were obtained from the datapath
274 * when a subfacet's statistics were reset (e.g. dpif_flow_put() with
275 * DPIF_FP_ZERO_STATS).
277 * - Do not include packets or bytes that can be obtained from the
278 * datapath for any existing subfacet.
280 uint64_t packet_count
; /* Number of packets received. */
281 uint64_t byte_count
; /* Number of bytes received. */
283 /* Resubmit statistics. */
284 uint64_t prev_packet_count
; /* Number of packets from last stats push. */
285 uint64_t prev_byte_count
; /* Number of bytes from last stats push. */
286 long long int prev_used
; /* Used time from last stats push. */
289 uint16_t tcp_flags
; /* TCP flags seen for this 'rule'. */
291 struct xlate_out xout
;
293 /* Storage for a single subfacet, to reduce malloc() time and space
294 * overhead. (A facet always has at least one subfacet and in the common
295 * case has exactly one subfacet. However, 'one_subfacet' may not
296 * always be valid, since it could have been removed after newer
297 * subfacets were pushed onto the 'subfacets' list.) */
298 struct subfacet one_subfacet
;
300 long long int learn_rl
; /* Rate limiter for facet_learn(). */
303 static struct facet
*facet_create(const struct flow_miss
*);
304 static void facet_remove(struct facet
*);
305 static void facet_free(struct facet
*);
307 static struct facet
*facet_find(struct ofproto_dpif
*, const struct flow
*);
308 static struct facet
*facet_lookup_valid(struct ofproto_dpif
*,
309 const struct flow
*);
310 static bool facet_revalidate(struct facet
*);
311 static bool facet_check_consistency(struct facet
*);
313 static void facet_flush_stats(struct facet
*);
315 static void facet_reset_counters(struct facet
*);
316 static void flow_push_stats(struct ofproto_dpif
*, struct flow
*,
317 struct dpif_flow_stats
*, bool may_learn
);
318 static void facet_push_stats(struct facet
*, bool may_learn
);
319 static void facet_learn(struct facet
*);
320 static void push_all_stats(void);
322 static bool facet_is_controller_flow(struct facet
*);
325 struct hmap_node odp_port_node
; /* In dpif_backer's "odp_to_ofport_map". */
329 struct ofbundle
*bundle
; /* Bundle that contains this port, if any. */
330 struct list bundle_node
; /* In struct ofbundle's "ports" list. */
331 struct cfm
*cfm
; /* Connectivity Fault Management, if any. */
332 struct bfd
*bfd
; /* BFD, if any. */
333 bool may_enable
; /* May be enabled in bonds. */
334 bool is_tunnel
; /* This port is a tunnel. */
335 bool is_layer3
; /* This is a layer 3 port. */
336 long long int carrier_seq
; /* Carrier status changes. */
337 struct ofport_dpif
*peer
; /* Peer if patch port. */
340 struct stp_port
*stp_port
; /* Spanning Tree Protocol, if any. */
341 enum stp_state stp_state
; /* Always STP_DISABLED if STP not in use. */
342 long long int stp_state_entered
;
344 /* Queue to DSCP mapping. */
345 struct ofproto_port_queue
*qdscp
;
348 /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
350 * This is deprecated. It is only for compatibility with broken device
351 * drivers in old versions of Linux that do not properly support VLANs when
352 * VLAN devices are not used. When broken device drivers are no longer in
353 * widespread use, we will delete these interfaces. */
354 ofp_port_t realdev_ofp_port
;
358 /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
360 * This is deprecated. It is only for compatibility with broken device drivers
361 * in old versions of Linux that do not properly support VLANs when VLAN
362 * devices are not used. When broken device drivers are no longer in
363 * widespread use, we will delete these interfaces. */
364 struct vlan_splinter
{
365 struct hmap_node realdev_vid_node
;
366 struct hmap_node vlandev_node
;
367 ofp_port_t realdev_ofp_port
;
368 ofp_port_t vlandev_ofp_port
;
372 static void vsp_remove(struct ofport_dpif
*);
373 static void vsp_add(struct ofport_dpif
*, ofp_port_t realdev_ofp_port
, int vid
);
375 static odp_port_t
ofp_port_to_odp_port(const struct ofproto_dpif
*,
378 static ofp_port_t
odp_port_to_ofp_port(const struct ofproto_dpif
*,
381 static struct ofport_dpif
*
382 ofport_dpif_cast(const struct ofport
*ofport
)
384 return ofport
? CONTAINER_OF(ofport
, struct ofport_dpif
, up
) : NULL
;
387 static void port_run(struct ofport_dpif
*);
388 static int set_bfd(struct ofport
*, const struct smap
*);
389 static int set_cfm(struct ofport
*, const struct cfm_settings
*);
390 static void ofport_update_peer(struct ofport_dpif
*);
392 struct dpif_completion
{
393 struct list list_node
;
394 struct ofoperation
*op
;
397 /* Reasons that we might need to revalidate every facet, and corresponding
400 * A value of 0 means that there is no need to revalidate.
402 * It would be nice to have some cleaner way to integrate with coverage
403 * counters, but with only a few reasons I guess this is good enough for
405 enum revalidate_reason
{
406 REV_RECONFIGURE
= 1, /* Switch configuration changed. */
407 REV_STP
, /* Spanning tree protocol port status change. */
408 REV_BOND
, /* Bonding changed. */
409 REV_PORT_TOGGLED
, /* Port enabled or disabled by CFM, LACP, ...*/
410 REV_FLOW_TABLE
, /* Flow table changed. */
411 REV_MAC_LEARNING
, /* Mac learning changed. */
412 REV_INCONSISTENCY
/* Facet self-check failed. */
414 COVERAGE_DEFINE(rev_reconfigure
);
415 COVERAGE_DEFINE(rev_stp
);
416 COVERAGE_DEFINE(rev_bond
);
417 COVERAGE_DEFINE(rev_port_toggled
);
418 COVERAGE_DEFINE(rev_flow_table
);
419 COVERAGE_DEFINE(rev_mac_learning
);
420 COVERAGE_DEFINE(rev_inconsistency
);
422 /* All datapaths of a given type share a single dpif backer instance. */
428 struct timer next_expiration
;
430 struct ovs_rwlock odp_to_ofport_lock
;
431 struct hmap odp_to_ofport_map OVS_GUARDED
; /* Contains "struct ofport"s. */
433 struct simap tnl_backers
; /* Set of dpif ports backing tunnels. */
435 /* Facet revalidation flags applying to facets which use this backer. */
436 enum revalidate_reason need_revalidate
; /* Revalidate every facet. */
438 struct hmap drop_keys
; /* Set of dropped odp keys. */
439 bool recv_set_enable
; /* Enables or disables receiving packets. */
441 struct hmap subfacets
;
442 struct governor
*governor
;
444 /* Subfacet statistics.
446 * These keep track of the total number of subfacets added and deleted and
447 * flow life span. They are useful for computing the flow rates stats
448 * exposed via "ovs-appctl dpif/show". The goal is to learn about
449 * traffic patterns in ways that we can use later to improve Open vSwitch
450 * performance in new situations. */
451 unsigned max_n_subfacet
; /* Maximum number of flows */
452 unsigned avg_n_subfacet
; /* Average number of flows. */
455 /* All existing ofproto_backer instances, indexed by ofproto->up.type. */
456 static struct shash all_dpif_backers
= SHASH_INITIALIZER(&all_dpif_backers
);
458 static void drop_key_clear(struct dpif_backer
*);
460 struct ofproto_dpif
{
461 struct hmap_node all_ofproto_dpifs_node
; /* In 'all_ofproto_dpifs'. */
463 struct dpif_backer
*backer
;
465 /* Special OpenFlow rules. */
466 struct rule_dpif
*miss_rule
; /* Sends flow table misses to controller. */
467 struct rule_dpif
*no_packet_in_rule
; /* Drops flow table misses. */
468 struct rule_dpif
*drop_frags_rule
; /* Used in OFPC_FRAG_DROP mode. */
471 struct netflow
*netflow
;
472 struct dpif_sflow
*sflow
;
473 struct dpif_ipfix
*ipfix
;
474 struct hmap bundles
; /* Contains "struct ofbundle"s. */
475 struct mac_learning
*ml
;
476 bool has_bonded_bundles
;
478 struct mbridge
*mbridge
;
481 struct classifier facets
; /* Contains 'struct facet's. */
482 long long int consistency_rl
;
484 struct ovs_mutex stats_mutex
;
485 struct netdev_stats stats OVS_GUARDED
; /* To account packets generated and
486 * consumed in userspace. */
490 long long int stp_last_tick
;
492 /* VLAN splinters. */
493 struct ovs_mutex vsp_mutex
;
494 struct hmap realdev_vid_map OVS_GUARDED
; /* (realdev,vid) -> vlandev. */
495 struct hmap vlandev_map OVS_GUARDED
; /* vlandev -> (realdev,vid). */
498 struct sset ports
; /* Set of standard port names. */
499 struct sset ghost_ports
; /* Ports with no datapath port. */
500 struct sset port_poll_set
; /* Queued names for port_poll() reply. */
501 int port_poll_errno
; /* Last errno for port_poll() reply. */
502 uint64_t change_seq
; /* Connectivity status changes. */
504 /* Per ofproto's dpif stats. */
509 struct guarded_list pins
; /* Contains "struct ofputil_packet_in"s. */
512 /* By default, flows in the datapath are wildcarded (megaflows). They
513 * may be disabled with the "ovs-appctl dpif/disable-megaflows" command. */
514 static bool enable_megaflows
= true;
516 /* All existing ofproto_dpif instances, indexed by ->up.name. */
517 static struct hmap all_ofproto_dpifs
= HMAP_INITIALIZER(&all_ofproto_dpifs
);
519 static void ofproto_dpif_unixctl_init(void);
521 static inline struct ofproto_dpif
*
522 ofproto_dpif_cast(const struct ofproto
*ofproto
)
524 ovs_assert(ofproto
->ofproto_class
== &ofproto_dpif_class
);
525 return CONTAINER_OF(ofproto
, struct ofproto_dpif
, up
);
528 static struct ofport_dpif
*get_ofp_port(const struct ofproto_dpif
*ofproto
,
529 ofp_port_t ofp_port
);
530 static void ofproto_trace(struct ofproto_dpif
*, const struct flow
*,
531 const struct ofpbuf
*packet
,
532 const struct ofpact
[], size_t ofpacts_len
,
536 static void handle_upcalls(struct dpif_backer
*);
538 /* Flow expiration. */
539 static int expire(struct dpif_backer
*);
541 /* Global variables. */
542 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
544 /* Initial mappings of port to bridge mappings. */
545 static struct shash init_ofp_ports
= SHASH_INITIALIZER(&init_ofp_ports
);
547 /* Executes 'fm'. The caller retains ownership of 'fm' and everything in
550 ofproto_dpif_flow_mod(struct ofproto_dpif
*ofproto
,
551 struct ofputil_flow_mod
*fm
)
553 ofproto_flow_mod(&ofproto
->up
, fm
);
556 /* Appends 'pin' to the queue of "packet ins" to be sent to the controller.
557 * Takes ownership of 'pin' and pin->packet. */
559 ofproto_dpif_send_packet_in(struct ofproto_dpif
*ofproto
,
560 struct ofproto_packet_in
*pin
)
562 if (!guarded_list_push_back(&ofproto
->pins
, &pin
->list_node
, 1024)) {
563 COVERAGE_INC(packet_in_overflow
);
564 free(CONST_CAST(void *, pin
->up
.packet
));
569 /* Factory functions. */
572 init(const struct shash
*iface_hints
)
574 struct shash_node
*node
;
576 /* Make a local copy, since we don't own 'iface_hints' elements. */
577 SHASH_FOR_EACH(node
, iface_hints
) {
578 const struct iface_hint
*orig_hint
= node
->data
;
579 struct iface_hint
*new_hint
= xmalloc(sizeof *new_hint
);
581 new_hint
->br_name
= xstrdup(orig_hint
->br_name
);
582 new_hint
->br_type
= xstrdup(orig_hint
->br_type
);
583 new_hint
->ofp_port
= orig_hint
->ofp_port
;
585 shash_add(&init_ofp_ports
, node
->name
, new_hint
);
590 enumerate_types(struct sset
*types
)
592 dp_enumerate_types(types
);
596 enumerate_names(const char *type
, struct sset
*names
)
598 struct ofproto_dpif
*ofproto
;
601 HMAP_FOR_EACH (ofproto
, all_ofproto_dpifs_node
, &all_ofproto_dpifs
) {
602 if (strcmp(type
, ofproto
->up
.type
)) {
605 sset_add(names
, ofproto
->up
.name
);
612 del(const char *type
, const char *name
)
617 error
= dpif_open(name
, type
, &dpif
);
619 error
= dpif_delete(dpif
);
626 port_open_type(const char *datapath_type
, const char *port_type
)
628 return dpif_port_open_type(datapath_type
, port_type
);
631 /* Type functions. */
633 static void process_dpif_port_changes(struct dpif_backer
*);
634 static void process_dpif_all_ports_changed(struct dpif_backer
*);
635 static void process_dpif_port_change(struct dpif_backer
*,
636 const char *devname
);
637 static void process_dpif_port_error(struct dpif_backer
*, int error
);
639 static struct ofproto_dpif
*
640 lookup_ofproto_dpif_by_port_name(const char *name
)
642 struct ofproto_dpif
*ofproto
;
644 HMAP_FOR_EACH (ofproto
, all_ofproto_dpifs_node
, &all_ofproto_dpifs
) {
645 if (sset_contains(&ofproto
->ports
, name
)) {
654 type_run(const char *type
)
656 static long long int push_timer
= LLONG_MIN
;
657 struct dpif_backer
*backer
;
659 backer
= shash_find_data(&all_dpif_backers
, type
);
661 /* This is not necessarily a problem, since backers are only
662 * created on demand. */
666 dpif_run(backer
->dpif
);
668 handle_upcalls(backer
);
670 /* The most natural place to push facet statistics is when they're pulled
671 * from the datapath. However, when there are many flows in the datapath,
672 * this expensive operation can occur so frequently, that it reduces our
673 * ability to quickly set up flows. To reduce the cost, we push statistics
675 if (time_msec() > push_timer
) {
676 push_timer
= time_msec() + 2000;
680 /* If vswitchd started with other_config:flow_restore_wait set as "true",
681 * and the configuration has now changed to "false", enable receiving
682 * packets from the datapath. */
683 if (!backer
->recv_set_enable
&& !ofproto_get_flow_restore_wait()) {
686 backer
->recv_set_enable
= true;
688 error
= dpif_recv_set(backer
->dpif
, backer
->recv_set_enable
);
690 VLOG_ERR("Failed to enable receiving packets in dpif.");
693 dpif_flow_flush(backer
->dpif
);
694 backer
->need_revalidate
= REV_RECONFIGURE
;
697 if (backer
->recv_set_enable
) {
698 udpif_set_threads(backer
->udpif
, n_handlers
);
701 if (backer
->need_revalidate
) {
702 struct ofproto_dpif
*ofproto
;
703 struct simap_node
*node
;
704 struct simap tmp_backers
;
706 /* Handle tunnel garbage collection. */
707 simap_init(&tmp_backers
);
708 simap_swap(&backer
->tnl_backers
, &tmp_backers
);
710 HMAP_FOR_EACH (ofproto
, all_ofproto_dpifs_node
, &all_ofproto_dpifs
) {
711 struct ofport_dpif
*iter
;
713 if (backer
!= ofproto
->backer
) {
717 HMAP_FOR_EACH (iter
, up
.hmap_node
, &ofproto
->up
.ports
) {
718 char namebuf
[NETDEV_VPORT_NAME_BUFSIZE
];
721 if (!iter
->is_tunnel
) {
725 dp_port
= netdev_vport_get_dpif_port(iter
->up
.netdev
,
726 namebuf
, sizeof namebuf
);
727 node
= simap_find(&tmp_backers
, dp_port
);
729 simap_put(&backer
->tnl_backers
, dp_port
, node
->data
);
730 simap_delete(&tmp_backers
, node
);
731 node
= simap_find(&backer
->tnl_backers
, dp_port
);
733 node
= simap_find(&backer
->tnl_backers
, dp_port
);
735 odp_port_t odp_port
= ODPP_NONE
;
737 if (!dpif_port_add(backer
->dpif
, iter
->up
.netdev
,
739 simap_put(&backer
->tnl_backers
, dp_port
,
740 odp_to_u32(odp_port
));
741 node
= simap_find(&backer
->tnl_backers
, dp_port
);
746 iter
->odp_port
= node
? u32_to_odp(node
->data
) : ODPP_NONE
;
747 if (tnl_port_reconfigure(iter
, iter
->up
.netdev
,
749 backer
->need_revalidate
= REV_RECONFIGURE
;
754 SIMAP_FOR_EACH (node
, &tmp_backers
) {
755 dpif_port_del(backer
->dpif
, u32_to_odp(node
->data
));
757 simap_destroy(&tmp_backers
);
759 switch (backer
->need_revalidate
) {
760 case REV_RECONFIGURE
: COVERAGE_INC(rev_reconfigure
); break;
761 case REV_STP
: COVERAGE_INC(rev_stp
); break;
762 case REV_BOND
: COVERAGE_INC(rev_bond
); break;
763 case REV_PORT_TOGGLED
: COVERAGE_INC(rev_port_toggled
); break;
764 case REV_FLOW_TABLE
: COVERAGE_INC(rev_flow_table
); break;
765 case REV_MAC_LEARNING
: COVERAGE_INC(rev_mac_learning
); break;
766 case REV_INCONSISTENCY
: COVERAGE_INC(rev_inconsistency
); break;
768 backer
->need_revalidate
= 0;
770 /* Clear the drop_keys in case we should now be accepting some
771 * formerly dropped flows. */
772 drop_key_clear(backer
);
774 HMAP_FOR_EACH (ofproto
, all_ofproto_dpifs_node
, &all_ofproto_dpifs
) {
775 struct facet
*facet
, *next
;
776 struct ofport_dpif
*ofport
;
777 struct cls_cursor cursor
;
778 struct ofbundle
*bundle
;
780 if (ofproto
->backer
!= backer
) {
784 ovs_rwlock_wrlock(&xlate_rwlock
);
785 xlate_ofproto_set(ofproto
, ofproto
->up
.name
,
786 ofproto
->backer
->dpif
, ofproto
->miss_rule
,
787 ofproto
->no_packet_in_rule
, ofproto
->ml
,
788 ofproto
->stp
, ofproto
->mbridge
,
789 ofproto
->sflow
, ofproto
->ipfix
,
790 ofproto
->netflow
, ofproto
->up
.frag_handling
,
791 ofproto
->up
.forward_bpdu
,
792 connmgr_has_in_band(ofproto
->up
.connmgr
));
794 HMAP_FOR_EACH (bundle
, hmap_node
, &ofproto
->bundles
) {
795 xlate_bundle_set(ofproto
, bundle
, bundle
->name
,
796 bundle
->vlan_mode
, bundle
->vlan
,
797 bundle
->trunks
, bundle
->use_priority_tags
,
798 bundle
->bond
, bundle
->lacp
,
802 HMAP_FOR_EACH (ofport
, up
.hmap_node
, &ofproto
->up
.ports
) {
803 int stp_port
= ofport
->stp_port
804 ? stp_port_no(ofport
->stp_port
)
806 xlate_ofport_set(ofproto
, ofport
->bundle
, ofport
,
807 ofport
->up
.ofp_port
, ofport
->odp_port
,
808 ofport
->up
.netdev
, ofport
->cfm
,
809 ofport
->bfd
, ofport
->peer
, stp_port
,
810 ofport
->qdscp
, ofport
->n_qdscp
,
811 ofport
->up
.pp
.config
, ofport
->up
.pp
.state
,
812 ofport
->is_tunnel
, ofport
->may_enable
);
814 ovs_rwlock_unlock(&xlate_rwlock
);
816 /* Only ofproto-dpif cares about the facet classifier so we just
817 * lock cls_cursor_init() to appease the thread safety analysis. */
818 ovs_rwlock_rdlock(&ofproto
->facets
.rwlock
);
819 cls_cursor_init(&cursor
, &ofproto
->facets
, NULL
);
820 ovs_rwlock_unlock(&ofproto
->facets
.rwlock
);
821 CLS_CURSOR_FOR_EACH_SAFE (facet
, next
, cr
, &cursor
) {
822 facet_revalidate(facet
);
826 udpif_revalidate(backer
->udpif
);
829 if (!backer
->recv_set_enable
) {
830 /* Wake up before a max of 1000ms. */
831 timer_set_duration(&backer
->next_expiration
, 1000);
832 } else if (timer_expired(&backer
->next_expiration
)) {
833 int delay
= expire(backer
);
834 timer_set_duration(&backer
->next_expiration
, delay
);
837 process_dpif_port_changes(backer
);
839 if (backer
->governor
) {
842 governor_run(backer
->governor
);
844 /* If the governor has shrunk to its minimum size and the number of
845 * subfacets has dwindled, then drop the governor entirely.
847 * For hysteresis, the number of subfacets to drop the governor is
848 * smaller than the number needed to trigger its creation. */
849 n_subfacets
= hmap_count(&backer
->subfacets
);
850 if (n_subfacets
* 4 < flow_eviction_threshold
851 && governor_is_idle(backer
->governor
)) {
852 governor_destroy(backer
->governor
);
853 backer
->governor
= NULL
;
860 /* Check for and handle port changes in 'backer''s dpif. */
862 process_dpif_port_changes(struct dpif_backer
*backer
)
868 error
= dpif_port_poll(backer
->dpif
, &devname
);
874 process_dpif_all_ports_changed(backer
);
878 process_dpif_port_change(backer
, devname
);
883 process_dpif_port_error(backer
, error
);
890 process_dpif_all_ports_changed(struct dpif_backer
*backer
)
892 struct ofproto_dpif
*ofproto
;
893 struct dpif_port dpif_port
;
894 struct dpif_port_dump dump
;
895 struct sset devnames
;
898 sset_init(&devnames
);
899 HMAP_FOR_EACH (ofproto
, all_ofproto_dpifs_node
, &all_ofproto_dpifs
) {
900 if (ofproto
->backer
== backer
) {
901 struct ofport
*ofport
;
903 HMAP_FOR_EACH (ofport
, hmap_node
, &ofproto
->up
.ports
) {
904 sset_add(&devnames
, netdev_get_name(ofport
->netdev
));
908 DPIF_PORT_FOR_EACH (&dpif_port
, &dump
, backer
->dpif
) {
909 sset_add(&devnames
, dpif_port
.name
);
912 SSET_FOR_EACH (devname
, &devnames
) {
913 process_dpif_port_change(backer
, devname
);
915 sset_destroy(&devnames
);
919 process_dpif_port_change(struct dpif_backer
*backer
, const char *devname
)
921 struct ofproto_dpif
*ofproto
;
922 struct dpif_port port
;
924 /* Don't report on the datapath's device. */
925 if (!strcmp(devname
, dpif_base_name(backer
->dpif
))) {
929 HMAP_FOR_EACH (ofproto
, all_ofproto_dpifs_node
,
930 &all_ofproto_dpifs
) {
931 if (simap_contains(&ofproto
->backer
->tnl_backers
, devname
)) {
936 ofproto
= lookup_ofproto_dpif_by_port_name(devname
);
937 if (dpif_port_query_by_name(backer
->dpif
, devname
, &port
)) {
938 /* The port was removed. If we know the datapath,
939 * report it through poll_set(). If we don't, it may be
940 * notifying us of a removal we initiated, so ignore it.
941 * If there's a pending ENOBUFS, let it stand, since
942 * everything will be reevaluated. */
943 if (ofproto
&& ofproto
->port_poll_errno
!= ENOBUFS
) {
944 sset_add(&ofproto
->port_poll_set
, devname
);
945 ofproto
->port_poll_errno
= 0;
947 } else if (!ofproto
) {
948 /* The port was added, but we don't know with which
949 * ofproto we should associate it. Delete it. */
950 dpif_port_del(backer
->dpif
, port
.port_no
);
952 struct ofport_dpif
*ofport
;
954 ofport
= ofport_dpif_cast(shash_find_data(
955 &ofproto
->up
.port_by_name
, devname
));
957 && ofport
->odp_port
!= port
.port_no
958 && !odp_port_to_ofport(backer
, port
.port_no
))
960 /* 'ofport''s datapath port number has changed from
961 * 'ofport->odp_port' to 'port.port_no'. Update our internal data
962 * structures to match. */
963 ovs_rwlock_wrlock(&backer
->odp_to_ofport_lock
);
964 hmap_remove(&backer
->odp_to_ofport_map
, &ofport
->odp_port_node
);
965 ofport
->odp_port
= port
.port_no
;
966 hmap_insert(&backer
->odp_to_ofport_map
, &ofport
->odp_port_node
,
967 hash_odp_port(port
.port_no
));
968 ovs_rwlock_unlock(&backer
->odp_to_ofport_lock
);
969 backer
->need_revalidate
= REV_RECONFIGURE
;
972 dpif_port_destroy(&port
);
975 /* Propagate 'error' to all ofprotos based on 'backer'. */
977 process_dpif_port_error(struct dpif_backer
*backer
, int error
)
979 struct ofproto_dpif
*ofproto
;
981 HMAP_FOR_EACH (ofproto
, all_ofproto_dpifs_node
, &all_ofproto_dpifs
) {
982 if (ofproto
->backer
== backer
) {
983 sset_clear(&ofproto
->port_poll_set
);
984 ofproto
->port_poll_errno
= error
;
990 type_wait(const char *type
)
992 struct dpif_backer
*backer
;
994 backer
= shash_find_data(&all_dpif_backers
, type
);
996 /* This is not necessarily a problem, since backers are only
997 * created on demand. */
1001 if (backer
->governor
) {
1002 governor_wait(backer
->governor
);
1005 timer_wait(&backer
->next_expiration
);
1006 dpif_wait(backer
->dpif
);
1007 udpif_wait(backer
->udpif
);
1010 /* Basic life-cycle. */
1012 static int add_internal_flows(struct ofproto_dpif
*);
1014 static struct ofproto
*
1017 struct ofproto_dpif
*ofproto
= xmalloc(sizeof *ofproto
);
1018 return &ofproto
->up
;
1022 dealloc(struct ofproto
*ofproto_
)
1024 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
1029 close_dpif_backer(struct dpif_backer
*backer
)
1031 ovs_assert(backer
->refcount
> 0);
1033 if (--backer
->refcount
) {
1037 drop_key_clear(backer
);
1038 hmap_destroy(&backer
->drop_keys
);
1040 udpif_destroy(backer
->udpif
);
1042 simap_destroy(&backer
->tnl_backers
);
1043 ovs_rwlock_destroy(&backer
->odp_to_ofport_lock
);
1044 hmap_destroy(&backer
->odp_to_ofport_map
);
1045 shash_find_and_delete(&all_dpif_backers
, backer
->type
);
1047 dpif_close(backer
->dpif
);
1049 ovs_assert(hmap_is_empty(&backer
->subfacets
));
1050 hmap_destroy(&backer
->subfacets
);
1051 governor_destroy(backer
->governor
);
1056 /* Datapath port slated for removal from datapath. */
1057 struct odp_garbage
{
1058 struct list list_node
;
1059 odp_port_t odp_port
;
1063 open_dpif_backer(const char *type
, struct dpif_backer
**backerp
)
1065 struct dpif_backer
*backer
;
1066 struct dpif_port_dump port_dump
;
1067 struct dpif_port port
;
1068 struct shash_node
*node
;
1069 struct list garbage_list
;
1070 struct odp_garbage
*garbage
, *next
;
1076 backer
= shash_find_data(&all_dpif_backers
, type
);
1083 backer_name
= xasprintf("ovs-%s", type
);
1085 /* Remove any existing datapaths, since we assume we're the only
1086 * userspace controlling the datapath. */
1088 dp_enumerate_names(type
, &names
);
1089 SSET_FOR_EACH(name
, &names
) {
1090 struct dpif
*old_dpif
;
1092 /* Don't remove our backer if it exists. */
1093 if (!strcmp(name
, backer_name
)) {
1097 if (dpif_open(name
, type
, &old_dpif
)) {
1098 VLOG_WARN("couldn't open old datapath %s to remove it", name
);
1100 dpif_delete(old_dpif
);
1101 dpif_close(old_dpif
);
1104 sset_destroy(&names
);
1106 backer
= xmalloc(sizeof *backer
);
1108 error
= dpif_create_and_open(backer_name
, type
, &backer
->dpif
);
1111 VLOG_ERR("failed to open datapath of type %s: %s", type
,
1112 ovs_strerror(error
));
1116 backer
->udpif
= udpif_create(backer
, backer
->dpif
);
1118 backer
->type
= xstrdup(type
);
1119 backer
->governor
= NULL
;
1120 backer
->refcount
= 1;
1121 hmap_init(&backer
->odp_to_ofport_map
);
1122 ovs_rwlock_init(&backer
->odp_to_ofport_lock
);
1123 hmap_init(&backer
->drop_keys
);
1124 hmap_init(&backer
->subfacets
);
1125 timer_set_duration(&backer
->next_expiration
, 1000);
1126 backer
->need_revalidate
= 0;
1127 simap_init(&backer
->tnl_backers
);
1128 backer
->recv_set_enable
= !ofproto_get_flow_restore_wait();
1131 if (backer
->recv_set_enable
) {
1132 dpif_flow_flush(backer
->dpif
);
1135 /* Loop through the ports already on the datapath and remove any
1136 * that we don't need anymore. */
1137 list_init(&garbage_list
);
1138 dpif_port_dump_start(&port_dump
, backer
->dpif
);
1139 while (dpif_port_dump_next(&port_dump
, &port
)) {
1140 node
= shash_find(&init_ofp_ports
, port
.name
);
1141 if (!node
&& strcmp(port
.name
, dpif_base_name(backer
->dpif
))) {
1142 garbage
= xmalloc(sizeof *garbage
);
1143 garbage
->odp_port
= port
.port_no
;
1144 list_push_front(&garbage_list
, &garbage
->list_node
);
1147 dpif_port_dump_done(&port_dump
);
1149 LIST_FOR_EACH_SAFE (garbage
, next
, list_node
, &garbage_list
) {
1150 dpif_port_del(backer
->dpif
, garbage
->odp_port
);
1151 list_remove(&garbage
->list_node
);
1155 shash_add(&all_dpif_backers
, type
, backer
);
1157 error
= dpif_recv_set(backer
->dpif
, backer
->recv_set_enable
);
1159 VLOG_ERR("failed to listen on datapath of type %s: %s",
1160 type
, ovs_strerror(error
));
1161 close_dpif_backer(backer
);
1165 if (backer
->recv_set_enable
) {
1166 udpif_set_threads(backer
->udpif
, n_handlers
);
1169 backer
->max_n_subfacet
= 0;
1170 backer
->avg_n_subfacet
= 0;
1176 construct(struct ofproto
*ofproto_
)
1178 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
1179 struct shash_node
*node
, *next
;
1182 error
= open_dpif_backer(ofproto
->up
.type
, &ofproto
->backer
);
1187 ofproto
->netflow
= NULL
;
1188 ofproto
->sflow
= NULL
;
1189 ofproto
->ipfix
= NULL
;
1190 ofproto
->stp
= NULL
;
1191 hmap_init(&ofproto
->bundles
);
1192 ofproto
->ml
= mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME
);
1193 ofproto
->mbridge
= mbridge_create();
1194 ofproto
->has_bonded_bundles
= false;
1195 ofproto
->lacp_enabled
= false;
1196 ovs_mutex_init(&ofproto
->stats_mutex
);
1197 ovs_mutex_init(&ofproto
->vsp_mutex
);
1199 classifier_init(&ofproto
->facets
, NULL
);
1200 ofproto
->consistency_rl
= LLONG_MIN
;
1202 guarded_list_init(&ofproto
->pins
);
1204 ofproto_dpif_unixctl_init();
1206 hmap_init(&ofproto
->vlandev_map
);
1207 hmap_init(&ofproto
->realdev_vid_map
);
1209 sset_init(&ofproto
->ports
);
1210 sset_init(&ofproto
->ghost_ports
);
1211 sset_init(&ofproto
->port_poll_set
);
1212 ofproto
->port_poll_errno
= 0;
1213 ofproto
->change_seq
= 0;
1215 SHASH_FOR_EACH_SAFE (node
, next
, &init_ofp_ports
) {
1216 struct iface_hint
*iface_hint
= node
->data
;
1218 if (!strcmp(iface_hint
->br_name
, ofproto
->up
.name
)) {
1219 /* Check if the datapath already has this port. */
1220 if (dpif_port_exists(ofproto
->backer
->dpif
, node
->name
)) {
1221 sset_add(&ofproto
->ports
, node
->name
);
1224 free(iface_hint
->br_name
);
1225 free(iface_hint
->br_type
);
1227 shash_delete(&init_ofp_ports
, node
);
1231 hmap_insert(&all_ofproto_dpifs
, &ofproto
->all_ofproto_dpifs_node
,
1232 hash_string(ofproto
->up
.name
, 0));
1233 memset(&ofproto
->stats
, 0, sizeof ofproto
->stats
);
1235 ofproto_init_tables(ofproto_
, N_TABLES
);
1236 error
= add_internal_flows(ofproto
);
1237 ofproto
->up
.tables
[TBL_INTERNAL
].flags
= OFTABLE_HIDDEN
| OFTABLE_READONLY
;
1240 ofproto
->n_missed
= 0;
1246 add_internal_flow(struct ofproto_dpif
*ofproto
, int id
,
1247 const struct ofpbuf
*ofpacts
, struct rule_dpif
**rulep
)
1249 struct ofputil_flow_mod fm
;
1252 match_init_catchall(&fm
.match
);
1254 match_set_reg(&fm
.match
, 0, id
);
1255 fm
.new_cookie
= htonll(0);
1256 fm
.cookie
= htonll(0);
1257 fm
.cookie_mask
= htonll(0);
1258 fm
.modify_cookie
= false;
1259 fm
.table_id
= TBL_INTERNAL
;
1260 fm
.command
= OFPFC_ADD
;
1261 fm
.idle_timeout
= 0;
1262 fm
.hard_timeout
= 0;
1266 fm
.ofpacts
= ofpacts
->data
;
1267 fm
.ofpacts_len
= ofpacts
->size
;
1269 error
= ofproto_flow_mod(&ofproto
->up
, &fm
);
1271 VLOG_ERR_RL(&rl
, "failed to add internal flow %d (%s)",
1272 id
, ofperr_to_string(error
));
1276 if (rule_dpif_lookup_in_table(ofproto
, &fm
.match
.flow
, NULL
, TBL_INTERNAL
,
1278 rule_dpif_unref(*rulep
);
1287 add_internal_flows(struct ofproto_dpif
*ofproto
)
1289 struct ofpact_controller
*controller
;
1290 uint64_t ofpacts_stub
[128 / 8];
1291 struct ofpbuf ofpacts
;
1295 ofpbuf_use_stack(&ofpacts
, ofpacts_stub
, sizeof ofpacts_stub
);
1298 controller
= ofpact_put_CONTROLLER(&ofpacts
);
1299 controller
->max_len
= UINT16_MAX
;
1300 controller
->controller_id
= 0;
1301 controller
->reason
= OFPR_NO_MATCH
;
1302 ofpact_pad(&ofpacts
);
1304 error
= add_internal_flow(ofproto
, id
++, &ofpacts
, &ofproto
->miss_rule
);
1309 ofpbuf_clear(&ofpacts
);
1310 error
= add_internal_flow(ofproto
, id
++, &ofpacts
,
1311 &ofproto
->no_packet_in_rule
);
1316 error
= add_internal_flow(ofproto
, id
++, &ofpacts
,
1317 &ofproto
->drop_frags_rule
);
1322 destruct(struct ofproto
*ofproto_
)
1324 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
1325 struct rule_dpif
*rule
, *next_rule
;
1326 struct ofproto_packet_in
*pin
, *next_pin
;
1327 struct facet
*facet
, *next_facet
;
1328 struct cls_cursor cursor
;
1329 struct oftable
*table
;
1332 ovs_rwlock_rdlock(&ofproto
->facets
.rwlock
);
1333 cls_cursor_init(&cursor
, &ofproto
->facets
, NULL
);
1334 ovs_rwlock_unlock(&ofproto
->facets
.rwlock
);
1335 CLS_CURSOR_FOR_EACH_SAFE (facet
, next_facet
, cr
, &cursor
) {
1336 facet_remove(facet
);
1339 ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
1340 ovs_rwlock_wrlock(&xlate_rwlock
);
1341 xlate_remove_ofproto(ofproto
);
1342 ovs_rwlock_unlock(&xlate_rwlock
);
1344 /* Discard any flow_miss_batches queued up for 'ofproto', avoiding a
1345 * use-after-free error. */
1346 udpif_revalidate(ofproto
->backer
->udpif
);
1348 hmap_remove(&all_ofproto_dpifs
, &ofproto
->all_ofproto_dpifs_node
);
1350 OFPROTO_FOR_EACH_TABLE (table
, &ofproto
->up
) {
1351 struct cls_cursor cursor
;
1353 ovs_rwlock_rdlock(&table
->cls
.rwlock
);
1354 cls_cursor_init(&cursor
, &table
->cls
, NULL
);
1355 ovs_rwlock_unlock(&table
->cls
.rwlock
);
1356 CLS_CURSOR_FOR_EACH_SAFE (rule
, next_rule
, up
.cr
, &cursor
) {
1357 ofproto_rule_delete(&ofproto
->up
, &rule
->up
);
1361 guarded_list_pop_all(&ofproto
->pins
, &pins
);
1362 LIST_FOR_EACH_SAFE (pin
, next_pin
, list_node
, &pins
) {
1363 list_remove(&pin
->list_node
);
1364 free(CONST_CAST(void *, pin
->up
.packet
));
1367 guarded_list_destroy(&ofproto
->pins
);
1369 mbridge_unref(ofproto
->mbridge
);
1371 netflow_unref(ofproto
->netflow
);
1372 dpif_sflow_unref(ofproto
->sflow
);
1373 hmap_destroy(&ofproto
->bundles
);
1374 mac_learning_unref(ofproto
->ml
);
1376 classifier_destroy(&ofproto
->facets
);
1378 hmap_destroy(&ofproto
->vlandev_map
);
1379 hmap_destroy(&ofproto
->realdev_vid_map
);
1381 sset_destroy(&ofproto
->ports
);
1382 sset_destroy(&ofproto
->ghost_ports
);
1383 sset_destroy(&ofproto
->port_poll_set
);
1385 ovs_mutex_destroy(&ofproto
->stats_mutex
);
1386 ovs_mutex_destroy(&ofproto
->vsp_mutex
);
1388 close_dpif_backer(ofproto
->backer
);
1392 run(struct ofproto
*ofproto_
)
1394 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
1397 if (mbridge_need_revalidate(ofproto
->mbridge
)) {
1398 ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
1399 ovs_rwlock_wrlock(&ofproto
->ml
->rwlock
);
1400 mac_learning_flush(ofproto
->ml
);
1401 ovs_rwlock_unlock(&ofproto
->ml
->rwlock
);
1404 /* Do not perform any periodic activity required by 'ofproto' while
1405 * waiting for flow restore to complete. */
1406 if (!ofproto_get_flow_restore_wait()) {
1407 struct ofproto_packet_in
*pin
, *next_pin
;
1410 guarded_list_pop_all(&ofproto
->pins
, &pins
);
1411 LIST_FOR_EACH_SAFE (pin
, next_pin
, list_node
, &pins
) {
1412 connmgr_send_packet_in(ofproto
->up
.connmgr
, pin
);
1413 list_remove(&pin
->list_node
);
1414 free(CONST_CAST(void *, pin
->up
.packet
));
1419 if (ofproto
->netflow
) {
1420 netflow_run(ofproto
->netflow
);
1422 if (ofproto
->sflow
) {
1423 dpif_sflow_run(ofproto
->sflow
);
1425 if (ofproto
->ipfix
) {
1426 dpif_ipfix_run(ofproto
->ipfix
);
1429 new_seq
= seq_read(connectivity_seq_get());
1430 if (ofproto
->change_seq
!= new_seq
) {
1431 struct ofport_dpif
*ofport
;
1433 HMAP_FOR_EACH (ofport
, up
.hmap_node
, &ofproto
->up
.ports
) {
1437 ofproto
->change_seq
= new_seq
;
1439 if (ofproto
->lacp_enabled
|| ofproto
->has_bonded_bundles
) {
1440 struct ofbundle
*bundle
;
1442 HMAP_FOR_EACH (bundle
, hmap_node
, &ofproto
->bundles
) {
1448 ovs_rwlock_wrlock(&ofproto
->ml
->rwlock
);
1449 if (mac_learning_run(ofproto
->ml
)) {
1450 ofproto
->backer
->need_revalidate
= REV_MAC_LEARNING
;
1452 ovs_rwlock_unlock(&ofproto
->ml
->rwlock
);
1454 /* Check the consistency of a random facet, to aid debugging. */
1455 ovs_rwlock_rdlock(&ofproto
->facets
.rwlock
);
1456 if (time_msec() >= ofproto
->consistency_rl
1457 && !classifier_is_empty(&ofproto
->facets
)
1458 && !ofproto
->backer
->need_revalidate
) {
1459 struct cls_subtable
*table
;
1460 struct cls_rule
*cr
;
1461 struct facet
*facet
;
1463 ofproto
->consistency_rl
= time_msec() + 250;
1465 table
= CONTAINER_OF(hmap_random_node(&ofproto
->facets
.subtables
),
1466 struct cls_subtable
, hmap_node
);
1467 cr
= CONTAINER_OF(hmap_random_node(&table
->rules
), struct cls_rule
,
1469 facet
= CONTAINER_OF(cr
, struct facet
, cr
);
1471 if (!facet_check_consistency(facet
)) {
1472 ofproto
->backer
->need_revalidate
= REV_INCONSISTENCY
;
1475 ovs_rwlock_unlock(&ofproto
->facets
.rwlock
);
1481 wait(struct ofproto
*ofproto_
)
1483 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
1485 if (ofproto_get_flow_restore_wait()) {
1489 if (ofproto
->sflow
) {
1490 dpif_sflow_wait(ofproto
->sflow
);
1492 if (ofproto
->ipfix
) {
1493 dpif_ipfix_wait(ofproto
->ipfix
);
1495 if (ofproto
->lacp_enabled
|| ofproto
->has_bonded_bundles
) {
1496 struct ofbundle
*bundle
;
1498 HMAP_FOR_EACH (bundle
, hmap_node
, &ofproto
->bundles
) {
1499 bundle_wait(bundle
);
1502 if (ofproto
->netflow
) {
1503 netflow_wait(ofproto
->netflow
);
1505 ovs_rwlock_rdlock(&ofproto
->ml
->rwlock
);
1506 mac_learning_wait(ofproto
->ml
);
1507 ovs_rwlock_unlock(&ofproto
->ml
->rwlock
);
1509 if (ofproto
->backer
->need_revalidate
) {
1510 /* Shouldn't happen, but if it does just go around again. */
1511 VLOG_DBG_RL(&rl
, "need revalidate in ofproto_wait_cb()");
1512 poll_immediate_wake();
1517 get_memory_usage(const struct ofproto
*ofproto_
, struct simap
*usage
)
1519 const struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
1520 struct cls_cursor cursor
;
1521 size_t n_subfacets
= 0;
1522 struct facet
*facet
;
1524 ovs_rwlock_rdlock(&ofproto
->facets
.rwlock
);
1525 simap_increase(usage
, "facets", classifier_count(&ofproto
->facets
));
1526 ovs_rwlock_unlock(&ofproto
->facets
.rwlock
);
1528 ovs_rwlock_rdlock(&ofproto
->facets
.rwlock
);
1529 cls_cursor_init(&cursor
, &ofproto
->facets
, NULL
);
1530 CLS_CURSOR_FOR_EACH (facet
, cr
, &cursor
) {
1531 n_subfacets
+= list_size(&facet
->subfacets
);
1533 ovs_rwlock_unlock(&ofproto
->facets
.rwlock
);
1534 simap_increase(usage
, "subfacets", n_subfacets
);
1538 type_get_memory_usage(const char *type
, struct simap
*usage
)
1540 struct dpif_backer
*backer
;
1542 backer
= shash_find_data(&all_dpif_backers
, type
);
1544 udpif_get_memory_usage(backer
->udpif
, usage
);
1549 flush(struct ofproto
*ofproto_
)
1551 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
1552 struct subfacet
*subfacet
, *next_subfacet
;
1553 struct subfacet
*batch
[SUBFACET_DESTROY_MAX_BATCH
];
1557 HMAP_FOR_EACH_SAFE (subfacet
, next_subfacet
, hmap_node
,
1558 &ofproto
->backer
->subfacets
) {
1559 if (subfacet
->facet
->ofproto
!= ofproto
) {
1563 if (subfacet
->path
!= SF_NOT_INSTALLED
) {
1564 batch
[n_batch
++] = subfacet
;
1565 if (n_batch
>= SUBFACET_DESTROY_MAX_BATCH
) {
1566 subfacet_destroy_batch(ofproto
->backer
, batch
, n_batch
);
1570 subfacet_destroy(subfacet
);
1575 subfacet_destroy_batch(ofproto
->backer
, batch
, n_batch
);
1580 get_features(struct ofproto
*ofproto_ OVS_UNUSED
,
1581 bool *arp_match_ip
, enum ofputil_action_bitmap
*actions
)
1583 *arp_match_ip
= true;
1584 *actions
= (OFPUTIL_A_OUTPUT
|
1585 OFPUTIL_A_SET_VLAN_VID
|
1586 OFPUTIL_A_SET_VLAN_PCP
|
1587 OFPUTIL_A_STRIP_VLAN
|
1588 OFPUTIL_A_SET_DL_SRC
|
1589 OFPUTIL_A_SET_DL_DST
|
1590 OFPUTIL_A_SET_NW_SRC
|
1591 OFPUTIL_A_SET_NW_DST
|
1592 OFPUTIL_A_SET_NW_TOS
|
1593 OFPUTIL_A_SET_TP_SRC
|
1594 OFPUTIL_A_SET_TP_DST
|
1599 get_tables(struct ofproto
*ofproto_
, struct ofp12_table_stats
*ots
)
1601 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
1602 struct dpif_dp_stats s
;
1603 uint64_t n_miss
, n_no_pkt_in
, n_bytes
, n_dropped_frags
;
1606 strcpy(ots
->name
, "classifier");
1608 dpif_get_dp_stats(ofproto
->backer
->dpif
, &s
);
1609 rule_get_stats(&ofproto
->miss_rule
->up
, &n_miss
, &n_bytes
);
1610 rule_get_stats(&ofproto
->no_packet_in_rule
->up
, &n_no_pkt_in
, &n_bytes
);
1611 rule_get_stats(&ofproto
->drop_frags_rule
->up
, &n_dropped_frags
, &n_bytes
);
1613 n_lookup
= s
.n_hit
+ s
.n_missed
- n_dropped_frags
;
1614 ots
->lookup_count
= htonll(n_lookup
);
1615 ots
->matched_count
= htonll(n_lookup
- n_miss
- n_no_pkt_in
);
1618 static struct ofport
*
1621 struct ofport_dpif
*port
= xmalloc(sizeof *port
);
1626 port_dealloc(struct ofport
*port_
)
1628 struct ofport_dpif
*port
= ofport_dpif_cast(port_
);
1633 port_construct(struct ofport
*port_
)
1635 struct ofport_dpif
*port
= ofport_dpif_cast(port_
);
1636 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(port
->up
.ofproto
);
1637 const struct netdev
*netdev
= port
->up
.netdev
;
1638 char namebuf
[NETDEV_VPORT_NAME_BUFSIZE
];
1639 struct dpif_port dpif_port
;
1642 ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
1643 port
->bundle
= NULL
;
1646 port
->may_enable
= true;
1647 port
->stp_port
= NULL
;
1648 port
->stp_state
= STP_DISABLED
;
1649 port
->is_tunnel
= false;
1653 port
->realdev_ofp_port
= 0;
1654 port
->vlandev_vid
= 0;
1655 port
->carrier_seq
= netdev_get_carrier_resets(netdev
);
1656 port
->is_layer3
= netdev_vport_is_layer3(netdev
);
1658 if (netdev_vport_is_patch(netdev
)) {
1659 /* By bailing out here, we don't submit the port to the sFlow module
1660 * to be considered for counter polling export. This is correct
1661 * because the patch port represents an interface that sFlow considers
1662 * to be "internal" to the switch as a whole, and therefore not an
1663 * candidate for counter polling. */
1664 port
->odp_port
= ODPP_NONE
;
1665 ofport_update_peer(port
);
1669 error
= dpif_port_query_by_name(ofproto
->backer
->dpif
,
1670 netdev_vport_get_dpif_port(netdev
, namebuf
,
1677 port
->odp_port
= dpif_port
.port_no
;
1679 if (netdev_get_tunnel_config(netdev
)) {
1680 tnl_port_add(port
, port
->up
.netdev
, port
->odp_port
);
1681 port
->is_tunnel
= true;
1683 /* Sanity-check that a mapping doesn't already exist. This
1684 * shouldn't happen for non-tunnel ports. */
1685 if (odp_port_to_ofp_port(ofproto
, port
->odp_port
) != OFPP_NONE
) {
1686 VLOG_ERR("port %s already has an OpenFlow port number",
1688 dpif_port_destroy(&dpif_port
);
1692 ovs_rwlock_wrlock(&ofproto
->backer
->odp_to_ofport_lock
);
1693 hmap_insert(&ofproto
->backer
->odp_to_ofport_map
, &port
->odp_port_node
,
1694 hash_odp_port(port
->odp_port
));
1695 ovs_rwlock_unlock(&ofproto
->backer
->odp_to_ofport_lock
);
1697 dpif_port_destroy(&dpif_port
);
1699 if (ofproto
->sflow
) {
1700 dpif_sflow_add_port(ofproto
->sflow
, port_
, port
->odp_port
);
1707 port_destruct(struct ofport
*port_
)
1709 struct ofport_dpif
*port
= ofport_dpif_cast(port_
);
1710 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(port
->up
.ofproto
);
1711 const char *devname
= netdev_get_name(port
->up
.netdev
);
1712 char namebuf
[NETDEV_VPORT_NAME_BUFSIZE
];
1713 const char *dp_port_name
;
1715 ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
1716 ovs_rwlock_wrlock(&xlate_rwlock
);
1717 xlate_ofport_remove(port
);
1718 ovs_rwlock_unlock(&xlate_rwlock
);
1720 dp_port_name
= netdev_vport_get_dpif_port(port
->up
.netdev
, namebuf
,
1722 if (dpif_port_exists(ofproto
->backer
->dpif
, dp_port_name
)) {
1723 /* The underlying device is still there, so delete it. This
1724 * happens when the ofproto is being destroyed, since the caller
1725 * assumes that removal of attached ports will happen as part of
1727 if (!port
->is_tunnel
) {
1728 dpif_port_del(ofproto
->backer
->dpif
, port
->odp_port
);
1733 port
->peer
->peer
= NULL
;
1737 if (port
->odp_port
!= ODPP_NONE
&& !port
->is_tunnel
) {
1738 ovs_rwlock_wrlock(&ofproto
->backer
->odp_to_ofport_lock
);
1739 hmap_remove(&ofproto
->backer
->odp_to_ofport_map
, &port
->odp_port_node
);
1740 ovs_rwlock_unlock(&ofproto
->backer
->odp_to_ofport_lock
);
1744 sset_find_and_delete(&ofproto
->ports
, devname
);
1745 sset_find_and_delete(&ofproto
->ghost_ports
, devname
);
1746 bundle_remove(port_
);
1747 set_cfm(port_
, NULL
);
1748 set_bfd(port_
, NULL
);
1749 if (ofproto
->sflow
) {
1750 dpif_sflow_del_port(ofproto
->sflow
, port
->odp_port
);
1757 port_modified(struct ofport
*port_
)
1759 struct ofport_dpif
*port
= ofport_dpif_cast(port_
);
1761 if (port
->bundle
&& port
->bundle
->bond
) {
1762 bond_slave_set_netdev(port
->bundle
->bond
, port
, port
->up
.netdev
);
1766 cfm_set_netdev(port
->cfm
, port
->up
.netdev
);
1770 bfd_set_netdev(port
->bfd
, port
->up
.netdev
);
1773 ofproto_dpif_monitor_port_update(port
, port
->bfd
, port
->cfm
,
1774 port
->up
.pp
.hw_addr
);
1776 if (port
->is_tunnel
&& tnl_port_reconfigure(port
, port
->up
.netdev
,
1778 ofproto_dpif_cast(port
->up
.ofproto
)->backer
->need_revalidate
=
1782 ofport_update_peer(port
);
1786 port_reconfigured(struct ofport
*port_
, enum ofputil_port_config old_config
)
1788 struct ofport_dpif
*port
= ofport_dpif_cast(port_
);
1789 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(port
->up
.ofproto
);
1790 enum ofputil_port_config changed
= old_config
^ port
->up
.pp
.config
;
1792 if (changed
& (OFPUTIL_PC_NO_RECV
| OFPUTIL_PC_NO_RECV_STP
|
1793 OFPUTIL_PC_NO_FWD
| OFPUTIL_PC_NO_FLOOD
|
1794 OFPUTIL_PC_NO_PACKET_IN
)) {
1795 ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
1797 if (changed
& OFPUTIL_PC_NO_FLOOD
&& port
->bundle
) {
1798 bundle_update(port
->bundle
);
1804 set_sflow(struct ofproto
*ofproto_
,
1805 const struct ofproto_sflow_options
*sflow_options
)
1807 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
1808 struct dpif_sflow
*ds
= ofproto
->sflow
;
1810 if (sflow_options
) {
1812 struct ofport_dpif
*ofport
;
1814 ds
= ofproto
->sflow
= dpif_sflow_create();
1815 HMAP_FOR_EACH (ofport
, up
.hmap_node
, &ofproto
->up
.ports
) {
1816 dpif_sflow_add_port(ds
, &ofport
->up
, ofport
->odp_port
);
1818 ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
1820 dpif_sflow_set_options(ds
, sflow_options
);
1823 dpif_sflow_unref(ds
);
1824 ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
1825 ofproto
->sflow
= NULL
;
1833 struct ofproto
*ofproto_
,
1834 const struct ofproto_ipfix_bridge_exporter_options
*bridge_exporter_options
,
1835 const struct ofproto_ipfix_flow_exporter_options
*flow_exporters_options
,
1836 size_t n_flow_exporters_options
)
1838 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
1839 struct dpif_ipfix
*di
= ofproto
->ipfix
;
1840 bool has_options
= bridge_exporter_options
|| flow_exporters_options
;
1842 if (has_options
&& !di
) {
1843 di
= ofproto
->ipfix
= dpif_ipfix_create();
1847 /* Call set_options in any case to cleanly flush the flow
1848 * caches in the last exporters that are to be destroyed. */
1849 dpif_ipfix_set_options(
1850 di
, bridge_exporter_options
, flow_exporters_options
,
1851 n_flow_exporters_options
);
1854 dpif_ipfix_unref(di
);
1855 ofproto
->ipfix
= NULL
;
1863 set_cfm(struct ofport
*ofport_
, const struct cfm_settings
*s
)
1865 struct ofport_dpif
*ofport
= ofport_dpif_cast(ofport_
);
1870 struct ofproto_dpif
*ofproto
;
1872 ofproto
= ofproto_dpif_cast(ofport
->up
.ofproto
);
1873 ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
1874 ofport
->cfm
= cfm_create(ofport
->up
.netdev
);
1877 if (cfm_configure(ofport
->cfm
, s
)) {
1884 cfm_unref(ofport
->cfm
);
1887 ofproto_dpif_monitor_port_update(ofport
, ofport
->bfd
, ofport
->cfm
,
1888 ofport
->up
.pp
.hw_addr
);
1893 get_cfm_status(const struct ofport
*ofport_
,
1894 struct ofproto_cfm_status
*status
)
1896 struct ofport_dpif
*ofport
= ofport_dpif_cast(ofport_
);
1899 status
->faults
= cfm_get_fault(ofport
->cfm
);
1900 status
->flap_count
= cfm_get_flap_count(ofport
->cfm
);
1901 status
->remote_opstate
= cfm_get_opup(ofport
->cfm
);
1902 status
->health
= cfm_get_health(ofport
->cfm
);
1903 cfm_get_remote_mpids(ofport
->cfm
, &status
->rmps
, &status
->n_rmps
);
1911 set_bfd(struct ofport
*ofport_
, const struct smap
*cfg
)
1913 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofport_
->ofproto
);
1914 struct ofport_dpif
*ofport
= ofport_dpif_cast(ofport_
);
1918 ofport
->bfd
= bfd_configure(old
, netdev_get_name(ofport
->up
.netdev
),
1919 cfg
, ofport
->up
.netdev
);
1920 if (ofport
->bfd
!= old
) {
1921 ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
1923 ofproto_dpif_monitor_port_update(ofport
, ofport
->bfd
, ofport
->cfm
,
1924 ofport
->up
.pp
.hw_addr
);
1929 get_bfd_status(struct ofport
*ofport_
, struct smap
*smap
)
1931 struct ofport_dpif
*ofport
= ofport_dpif_cast(ofport_
);
1934 bfd_get_status(ofport
->bfd
, smap
);
1941 /* Spanning Tree. */
1944 send_bpdu_cb(struct ofpbuf
*pkt
, int port_num
, void *ofproto_
)
1946 struct ofproto_dpif
*ofproto
= ofproto_
;
1947 struct stp_port
*sp
= stp_get_port(ofproto
->stp
, port_num
);
1948 struct ofport_dpif
*ofport
;
1950 ofport
= stp_port_get_aux(sp
);
1952 VLOG_WARN_RL(&rl
, "%s: cannot send BPDU on unknown port %d",
1953 ofproto
->up
.name
, port_num
);
1955 struct eth_header
*eth
= pkt
->l2
;
1957 netdev_get_etheraddr(ofport
->up
.netdev
, eth
->eth_src
);
1958 if (eth_addr_is_zero(eth
->eth_src
)) {
1959 VLOG_WARN_RL(&rl
, "%s: cannot send BPDU on port %d "
1960 "with unknown MAC", ofproto
->up
.name
, port_num
);
1962 ofproto_dpif_send_packet(ofport
, pkt
);
1968 /* Configures STP on 'ofproto_' using the settings defined in 's'. */
1970 set_stp(struct ofproto
*ofproto_
, const struct ofproto_stp_settings
*s
)
1972 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
1974 /* Only revalidate flows if the configuration changed. */
1975 if (!s
!= !ofproto
->stp
) {
1976 ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
1980 if (!ofproto
->stp
) {
1981 ofproto
->stp
= stp_create(ofproto_
->name
, s
->system_id
,
1982 send_bpdu_cb
, ofproto
);
1983 ofproto
->stp_last_tick
= time_msec();
1986 stp_set_bridge_id(ofproto
->stp
, s
->system_id
);
1987 stp_set_bridge_priority(ofproto
->stp
, s
->priority
);
1988 stp_set_hello_time(ofproto
->stp
, s
->hello_time
);
1989 stp_set_max_age(ofproto
->stp
, s
->max_age
);
1990 stp_set_forward_delay(ofproto
->stp
, s
->fwd_delay
);
1992 struct ofport
*ofport
;
1994 HMAP_FOR_EACH (ofport
, hmap_node
, &ofproto
->up
.ports
) {
1995 set_stp_port(ofport
, NULL
);
1998 stp_unref(ofproto
->stp
);
1999 ofproto
->stp
= NULL
;
2006 get_stp_status(struct ofproto
*ofproto_
, struct ofproto_stp_status
*s
)
2008 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
2012 s
->bridge_id
= stp_get_bridge_id(ofproto
->stp
);
2013 s
->designated_root
= stp_get_designated_root(ofproto
->stp
);
2014 s
->root_path_cost
= stp_get_root_path_cost(ofproto
->stp
);
2023 update_stp_port_state(struct ofport_dpif
*ofport
)
2025 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofport
->up
.ofproto
);
2026 enum stp_state state
;
2028 /* Figure out new state. */
2029 state
= ofport
->stp_port
? stp_port_get_state(ofport
->stp_port
)
2033 if (ofport
->stp_state
!= state
) {
2034 enum ofputil_port_state of_state
;
2037 VLOG_DBG_RL(&rl
, "port %s: STP state changed from %s to %s",
2038 netdev_get_name(ofport
->up
.netdev
),
2039 stp_state_name(ofport
->stp_state
),
2040 stp_state_name(state
));
2041 if (stp_learn_in_state(ofport
->stp_state
)
2042 != stp_learn_in_state(state
)) {
2043 /* xxx Learning action flows should also be flushed. */
2044 ovs_rwlock_wrlock(&ofproto
->ml
->rwlock
);
2045 mac_learning_flush(ofproto
->ml
);
2046 ovs_rwlock_unlock(&ofproto
->ml
->rwlock
);
2048 fwd_change
= stp_forward_in_state(ofport
->stp_state
)
2049 != stp_forward_in_state(state
);
2051 ofproto
->backer
->need_revalidate
= REV_STP
;
2052 ofport
->stp_state
= state
;
2053 ofport
->stp_state_entered
= time_msec();
2055 if (fwd_change
&& ofport
->bundle
) {
2056 bundle_update(ofport
->bundle
);
2059 /* Update the STP state bits in the OpenFlow port description. */
2060 of_state
= ofport
->up
.pp
.state
& ~OFPUTIL_PS_STP_MASK
;
2061 of_state
|= (state
== STP_LISTENING
? OFPUTIL_PS_STP_LISTEN
2062 : state
== STP_LEARNING
? OFPUTIL_PS_STP_LEARN
2063 : state
== STP_FORWARDING
? OFPUTIL_PS_STP_FORWARD
2064 : state
== STP_BLOCKING
? OFPUTIL_PS_STP_BLOCK
2066 ofproto_port_set_state(&ofport
->up
, of_state
);
2070 /* Configures STP on 'ofport_' using the settings defined in 's'. The
2071 * caller is responsible for assigning STP port numbers and ensuring
2072 * there are no duplicates. */
2074 set_stp_port(struct ofport
*ofport_
,
2075 const struct ofproto_port_stp_settings
*s
)
2077 struct ofport_dpif
*ofport
= ofport_dpif_cast(ofport_
);
2078 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofport
->up
.ofproto
);
2079 struct stp_port
*sp
= ofport
->stp_port
;
2081 if (!s
|| !s
->enable
) {
2083 ofport
->stp_port
= NULL
;
2084 stp_port_disable(sp
);
2085 update_stp_port_state(ofport
);
2088 } else if (sp
&& stp_port_no(sp
) != s
->port_num
2089 && ofport
== stp_port_get_aux(sp
)) {
2090 /* The port-id changed, so disable the old one if it's not
2091 * already in use by another port. */
2092 stp_port_disable(sp
);
2095 sp
= ofport
->stp_port
= stp_get_port(ofproto
->stp
, s
->port_num
);
2096 stp_port_enable(sp
);
2098 stp_port_set_aux(sp
, ofport
);
2099 stp_port_set_priority(sp
, s
->priority
);
2100 stp_port_set_path_cost(sp
, s
->path_cost
);
2102 update_stp_port_state(ofport
);
2108 get_stp_port_status(struct ofport
*ofport_
,
2109 struct ofproto_port_stp_status
*s
)
2111 struct ofport_dpif
*ofport
= ofport_dpif_cast(ofport_
);
2112 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofport
->up
.ofproto
);
2113 struct stp_port
*sp
= ofport
->stp_port
;
2115 if (!ofproto
->stp
|| !sp
) {
2121 s
->port_id
= stp_port_get_id(sp
);
2122 s
->state
= stp_port_get_state(sp
);
2123 s
->sec_in_state
= (time_msec() - ofport
->stp_state_entered
) / 1000;
2124 s
->role
= stp_port_get_role(sp
);
2130 get_stp_port_stats(struct ofport
*ofport_
,
2131 struct ofproto_port_stp_stats
*s
)
2133 struct ofport_dpif
*ofport
= ofport_dpif_cast(ofport_
);
2134 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofport
->up
.ofproto
);
2135 struct stp_port
*sp
= ofport
->stp_port
;
2137 if (!ofproto
->stp
|| !sp
) {
2143 stp_port_get_counts(sp
, &s
->tx_count
, &s
->rx_count
, &s
->error_count
);
2149 stp_run(struct ofproto_dpif
*ofproto
)
2152 long long int now
= time_msec();
2153 long long int elapsed
= now
- ofproto
->stp_last_tick
;
2154 struct stp_port
*sp
;
2157 stp_tick(ofproto
->stp
, MIN(INT_MAX
, elapsed
));
2158 ofproto
->stp_last_tick
= now
;
2160 while (stp_get_changed_port(ofproto
->stp
, &sp
)) {
2161 struct ofport_dpif
*ofport
= stp_port_get_aux(sp
);
2164 update_stp_port_state(ofport
);
2168 if (stp_check_and_reset_fdb_flush(ofproto
->stp
)) {
2169 ovs_rwlock_wrlock(&ofproto
->ml
->rwlock
);
2170 mac_learning_flush(ofproto
->ml
);
2171 ovs_rwlock_unlock(&ofproto
->ml
->rwlock
);
2177 stp_wait(struct ofproto_dpif
*ofproto
)
2180 poll_timer_wait(1000);
2185 set_queues(struct ofport
*ofport_
, const struct ofproto_port_queue
*qdscp
,
2188 struct ofport_dpif
*ofport
= ofport_dpif_cast(ofport_
);
2189 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofport
->up
.ofproto
);
2191 if (ofport
->n_qdscp
!= n_qdscp
2192 || (n_qdscp
&& memcmp(ofport
->qdscp
, qdscp
,
2193 n_qdscp
* sizeof *qdscp
))) {
2194 ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
2195 free(ofport
->qdscp
);
2196 ofport
->qdscp
= n_qdscp
2197 ? xmemdup(qdscp
, n_qdscp
* sizeof *qdscp
)
2199 ofport
->n_qdscp
= n_qdscp
;
2207 /* Expires all MAC learning entries associated with 'bundle' and forces its
2208 * ofproto to revalidate every flow.
2210 * Normally MAC learning entries are removed only from the ofproto associated
2211 * with 'bundle', but if 'all_ofprotos' is true, then the MAC learning entries
2212 * are removed from every ofproto. When patch ports and SLB bonds are in use
2213 * and a VM migration happens and the gratuitous ARPs are somehow lost, this
2214 * avoids a MAC_ENTRY_IDLE_TIME delay before the migrated VM can communicate
2215 * with the host from which it migrated. */
2217 bundle_flush_macs(struct ofbundle
*bundle
, bool all_ofprotos
)
2219 struct ofproto_dpif
*ofproto
= bundle
->ofproto
;
2220 struct mac_learning
*ml
= ofproto
->ml
;
2221 struct mac_entry
*mac
, *next_mac
;
2223 ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
2224 ovs_rwlock_wrlock(&ml
->rwlock
);
2225 LIST_FOR_EACH_SAFE (mac
, next_mac
, lru_node
, &ml
->lrus
) {
2226 if (mac
->port
.p
== bundle
) {
2228 struct ofproto_dpif
*o
;
2230 HMAP_FOR_EACH (o
, all_ofproto_dpifs_node
, &all_ofproto_dpifs
) {
2232 struct mac_entry
*e
;
2234 ovs_rwlock_wrlock(&o
->ml
->rwlock
);
2235 e
= mac_learning_lookup(o
->ml
, mac
->mac
, mac
->vlan
);
2237 mac_learning_expire(o
->ml
, e
);
2239 ovs_rwlock_unlock(&o
->ml
->rwlock
);
2244 mac_learning_expire(ml
, mac
);
2247 ovs_rwlock_unlock(&ml
->rwlock
);
2250 static struct ofbundle
*
2251 bundle_lookup(const struct ofproto_dpif
*ofproto
, void *aux
)
2253 struct ofbundle
*bundle
;
2255 HMAP_FOR_EACH_IN_BUCKET (bundle
, hmap_node
, hash_pointer(aux
, 0),
2256 &ofproto
->bundles
) {
2257 if (bundle
->aux
== aux
) {
2265 bundle_update(struct ofbundle
*bundle
)
2267 struct ofport_dpif
*port
;
2269 bundle
->floodable
= true;
2270 LIST_FOR_EACH (port
, bundle_node
, &bundle
->ports
) {
2271 if (port
->up
.pp
.config
& OFPUTIL_PC_NO_FLOOD
2273 || !stp_forward_in_state(port
->stp_state
)) {
2274 bundle
->floodable
= false;
2281 bundle_del_port(struct ofport_dpif
*port
)
2283 struct ofbundle
*bundle
= port
->bundle
;
2285 bundle
->ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
2287 list_remove(&port
->bundle_node
);
2288 port
->bundle
= NULL
;
2291 lacp_slave_unregister(bundle
->lacp
, port
);
2294 bond_slave_unregister(bundle
->bond
, port
);
2297 bundle_update(bundle
);
2301 bundle_add_port(struct ofbundle
*bundle
, ofp_port_t ofp_port
,
2302 struct lacp_slave_settings
*lacp
)
2304 struct ofport_dpif
*port
;
2306 port
= get_ofp_port(bundle
->ofproto
, ofp_port
);
2311 if (port
->bundle
!= bundle
) {
2312 bundle
->ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
2314 bundle_remove(&port
->up
);
2317 port
->bundle
= bundle
;
2318 list_push_back(&bundle
->ports
, &port
->bundle_node
);
2319 if (port
->up
.pp
.config
& OFPUTIL_PC_NO_FLOOD
2321 || !stp_forward_in_state(port
->stp_state
)) {
2322 bundle
->floodable
= false;
2326 bundle
->ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
2327 lacp_slave_register(bundle
->lacp
, port
, lacp
);
2334 bundle_destroy(struct ofbundle
*bundle
)
2336 struct ofproto_dpif
*ofproto
;
2337 struct ofport_dpif
*port
, *next_port
;
2343 ofproto
= bundle
->ofproto
;
2344 mbridge_unregister_bundle(ofproto
->mbridge
, bundle
->aux
);
2346 ovs_rwlock_wrlock(&xlate_rwlock
);
2347 xlate_bundle_remove(bundle
);
2348 ovs_rwlock_unlock(&xlate_rwlock
);
2350 LIST_FOR_EACH_SAFE (port
, next_port
, bundle_node
, &bundle
->ports
) {
2351 bundle_del_port(port
);
2354 bundle_flush_macs(bundle
, true);
2355 hmap_remove(&ofproto
->bundles
, &bundle
->hmap_node
);
2357 free(bundle
->trunks
);
2358 lacp_unref(bundle
->lacp
);
2359 bond_unref(bundle
->bond
);
2364 bundle_set(struct ofproto
*ofproto_
, void *aux
,
2365 const struct ofproto_bundle_settings
*s
)
2367 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
2368 bool need_flush
= false;
2369 struct ofport_dpif
*port
;
2370 struct ofbundle
*bundle
;
2371 unsigned long *trunks
;
2377 bundle_destroy(bundle_lookup(ofproto
, aux
));
2381 ovs_assert(s
->n_slaves
== 1 || s
->bond
!= NULL
);
2382 ovs_assert((s
->lacp
!= NULL
) == (s
->lacp_slaves
!= NULL
));
2384 bundle
= bundle_lookup(ofproto
, aux
);
2386 bundle
= xmalloc(sizeof *bundle
);
2388 bundle
->ofproto
= ofproto
;
2389 hmap_insert(&ofproto
->bundles
, &bundle
->hmap_node
,
2390 hash_pointer(aux
, 0));
2392 bundle
->name
= NULL
;
2394 list_init(&bundle
->ports
);
2395 bundle
->vlan_mode
= PORT_VLAN_TRUNK
;
2397 bundle
->trunks
= NULL
;
2398 bundle
->use_priority_tags
= s
->use_priority_tags
;
2399 bundle
->lacp
= NULL
;
2400 bundle
->bond
= NULL
;
2402 bundle
->floodable
= true;
2403 mbridge_register_bundle(ofproto
->mbridge
, bundle
);
2406 if (!bundle
->name
|| strcmp(s
->name
, bundle
->name
)) {
2408 bundle
->name
= xstrdup(s
->name
);
2413 ofproto
->lacp_enabled
= true;
2414 if (!bundle
->lacp
) {
2415 ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
2416 bundle
->lacp
= lacp_create();
2418 lacp_configure(bundle
->lacp
, s
->lacp
);
2420 lacp_unref(bundle
->lacp
);
2421 bundle
->lacp
= NULL
;
2424 /* Update set of ports. */
2426 for (i
= 0; i
< s
->n_slaves
; i
++) {
2427 if (!bundle_add_port(bundle
, s
->slaves
[i
],
2428 s
->lacp
? &s
->lacp_slaves
[i
] : NULL
)) {
2432 if (!ok
|| list_size(&bundle
->ports
) != s
->n_slaves
) {
2433 struct ofport_dpif
*next_port
;
2435 LIST_FOR_EACH_SAFE (port
, next_port
, bundle_node
, &bundle
->ports
) {
2436 for (i
= 0; i
< s
->n_slaves
; i
++) {
2437 if (s
->slaves
[i
] == port
->up
.ofp_port
) {
2442 bundle_del_port(port
);
2446 ovs_assert(list_size(&bundle
->ports
) <= s
->n_slaves
);
2448 if (list_is_empty(&bundle
->ports
)) {
2449 bundle_destroy(bundle
);
2453 /* Set VLAN tagging mode */
2454 if (s
->vlan_mode
!= bundle
->vlan_mode
2455 || s
->use_priority_tags
!= bundle
->use_priority_tags
) {
2456 bundle
->vlan_mode
= s
->vlan_mode
;
2457 bundle
->use_priority_tags
= s
->use_priority_tags
;
2462 vlan
= (s
->vlan_mode
== PORT_VLAN_TRUNK
? -1
2463 : s
->vlan
>= 0 && s
->vlan
<= 4095 ? s
->vlan
2465 if (vlan
!= bundle
->vlan
) {
2466 bundle
->vlan
= vlan
;
2470 /* Get trunked VLANs. */
2471 switch (s
->vlan_mode
) {
2472 case PORT_VLAN_ACCESS
:
2476 case PORT_VLAN_TRUNK
:
2477 trunks
= CONST_CAST(unsigned long *, s
->trunks
);
2480 case PORT_VLAN_NATIVE_UNTAGGED
:
2481 case PORT_VLAN_NATIVE_TAGGED
:
2482 if (vlan
!= 0 && (!s
->trunks
2483 || !bitmap_is_set(s
->trunks
, vlan
)
2484 || bitmap_is_set(s
->trunks
, 0))) {
2485 /* Force trunking the native VLAN and prohibit trunking VLAN 0. */
2487 trunks
= bitmap_clone(s
->trunks
, 4096);
2489 trunks
= bitmap_allocate1(4096);
2491 bitmap_set1(trunks
, vlan
);
2492 bitmap_set0(trunks
, 0);
2494 trunks
= CONST_CAST(unsigned long *, s
->trunks
);
2501 if (!vlan_bitmap_equal(trunks
, bundle
->trunks
)) {
2502 free(bundle
->trunks
);
2503 if (trunks
== s
->trunks
) {
2504 bundle
->trunks
= vlan_bitmap_clone(trunks
);
2506 bundle
->trunks
= trunks
;
2511 if (trunks
!= s
->trunks
) {
2516 if (!list_is_short(&bundle
->ports
)) {
2517 bundle
->ofproto
->has_bonded_bundles
= true;
2519 if (bond_reconfigure(bundle
->bond
, s
->bond
)) {
2520 ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
2523 bundle
->bond
= bond_create(s
->bond
);
2524 ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
2527 LIST_FOR_EACH (port
, bundle_node
, &bundle
->ports
) {
2528 bond_slave_register(bundle
->bond
, port
, port
->up
.netdev
);
2531 bond_unref(bundle
->bond
);
2532 bundle
->bond
= NULL
;
2535 /* If we changed something that would affect MAC learning, un-learn
2536 * everything on this port and force flow revalidation. */
2538 bundle_flush_macs(bundle
, false);
2545 bundle_remove(struct ofport
*port_
)
2547 struct ofport_dpif
*port
= ofport_dpif_cast(port_
);
2548 struct ofbundle
*bundle
= port
->bundle
;
2551 bundle_del_port(port
);
2552 if (list_is_empty(&bundle
->ports
)) {
2553 bundle_destroy(bundle
);
2554 } else if (list_is_short(&bundle
->ports
)) {
2555 bond_unref(bundle
->bond
);
2556 bundle
->bond
= NULL
;
2562 send_pdu_cb(void *port_
, const void *pdu
, size_t pdu_size
)
2564 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 10);
2565 struct ofport_dpif
*port
= port_
;
2566 uint8_t ea
[ETH_ADDR_LEN
];
2569 error
= netdev_get_etheraddr(port
->up
.netdev
, ea
);
2571 struct ofpbuf packet
;
2574 ofpbuf_init(&packet
, 0);
2575 packet_pdu
= eth_compose(&packet
, eth_addr_lacp
, ea
, ETH_TYPE_LACP
,
2577 memcpy(packet_pdu
, pdu
, pdu_size
);
2579 ofproto_dpif_send_packet(port
, &packet
);
2580 ofpbuf_uninit(&packet
);
2582 VLOG_ERR_RL(&rl
, "port %s: cannot obtain Ethernet address of iface "
2583 "%s (%s)", port
->bundle
->name
,
2584 netdev_get_name(port
->up
.netdev
), ovs_strerror(error
));
2589 bundle_send_learning_packets(struct ofbundle
*bundle
)
2591 struct ofproto_dpif
*ofproto
= bundle
->ofproto
;
2592 struct ofpbuf
*learning_packet
;
2593 int error
, n_packets
, n_errors
;
2594 struct mac_entry
*e
;
2595 struct list packets
;
2597 list_init(&packets
);
2598 ovs_rwlock_rdlock(&ofproto
->ml
->rwlock
);
2599 LIST_FOR_EACH (e
, lru_node
, &ofproto
->ml
->lrus
) {
2600 if (e
->port
.p
!= bundle
) {
2603 learning_packet
= bond_compose_learning_packet(bundle
->bond
,
2606 learning_packet
->private_p
= port_void
;
2607 list_push_back(&packets
, &learning_packet
->list_node
);
2610 ovs_rwlock_unlock(&ofproto
->ml
->rwlock
);
2612 error
= n_packets
= n_errors
= 0;
2613 LIST_FOR_EACH (learning_packet
, list_node
, &packets
) {
2616 ret
= ofproto_dpif_send_packet(learning_packet
->private_p
, learning_packet
);
2623 ofpbuf_list_delete(&packets
);
2626 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
2627 VLOG_WARN_RL(&rl
, "bond %s: %d errors sending %d gratuitous learning "
2628 "packets, last error was: %s",
2629 bundle
->name
, n_errors
, n_packets
, ovs_strerror(error
));
2631 VLOG_DBG("bond %s: sent %d gratuitous learning packets",
2632 bundle
->name
, n_packets
);
2637 bundle_run(struct ofbundle
*bundle
)
2640 lacp_run(bundle
->lacp
, send_pdu_cb
);
2643 struct ofport_dpif
*port
;
2645 LIST_FOR_EACH (port
, bundle_node
, &bundle
->ports
) {
2646 bond_slave_set_may_enable(bundle
->bond
, port
, port
->may_enable
);
2649 if (bond_run(bundle
->bond
, lacp_status(bundle
->lacp
))) {
2650 bundle
->ofproto
->backer
->need_revalidate
= REV_BOND
;
2653 if (bond_should_send_learning_packets(bundle
->bond
)) {
2654 bundle_send_learning_packets(bundle
);
2660 bundle_wait(struct ofbundle
*bundle
)
2663 lacp_wait(bundle
->lacp
);
2666 bond_wait(bundle
->bond
);
2673 mirror_set__(struct ofproto
*ofproto_
, void *aux
,
2674 const struct ofproto_mirror_settings
*s
)
2676 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
2677 struct ofbundle
**srcs
, **dsts
;
2682 mirror_destroy(ofproto
->mbridge
, aux
);
2686 srcs
= xmalloc(s
->n_srcs
* sizeof *srcs
);
2687 dsts
= xmalloc(s
->n_dsts
* sizeof *dsts
);
2689 for (i
= 0; i
< s
->n_srcs
; i
++) {
2690 srcs
[i
] = bundle_lookup(ofproto
, s
->srcs
[i
]);
2693 for (i
= 0; i
< s
->n_dsts
; i
++) {
2694 dsts
[i
] = bundle_lookup(ofproto
, s
->dsts
[i
]);
2697 error
= mirror_set(ofproto
->mbridge
, aux
, s
->name
, srcs
, s
->n_srcs
, dsts
,
2698 s
->n_dsts
, s
->src_vlans
,
2699 bundle_lookup(ofproto
, s
->out_bundle
), s
->out_vlan
);
2706 mirror_get_stats__(struct ofproto
*ofproto
, void *aux
,
2707 uint64_t *packets
, uint64_t *bytes
)
2710 return mirror_get_stats(ofproto_dpif_cast(ofproto
)->mbridge
, aux
, packets
,
2715 set_flood_vlans(struct ofproto
*ofproto_
, unsigned long *flood_vlans
)
2717 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
2718 ovs_rwlock_wrlock(&ofproto
->ml
->rwlock
);
2719 if (mac_learning_set_flood_vlans(ofproto
->ml
, flood_vlans
)) {
2720 mac_learning_flush(ofproto
->ml
);
2722 ovs_rwlock_unlock(&ofproto
->ml
->rwlock
);
2727 is_mirror_output_bundle(const struct ofproto
*ofproto_
, void *aux
)
2729 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
2730 struct ofbundle
*bundle
= bundle_lookup(ofproto
, aux
);
2731 return bundle
&& mirror_bundle_out(ofproto
->mbridge
, bundle
) != 0;
2735 forward_bpdu_changed(struct ofproto
*ofproto_
)
2737 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
2738 ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
2742 set_mac_table_config(struct ofproto
*ofproto_
, unsigned int idle_time
,
2745 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
2746 ovs_rwlock_wrlock(&ofproto
->ml
->rwlock
);
2747 mac_learning_set_idle_time(ofproto
->ml
, idle_time
);
2748 mac_learning_set_max_entries(ofproto
->ml
, max_entries
);
2749 ovs_rwlock_unlock(&ofproto
->ml
->rwlock
);
2754 static struct ofport_dpif
*
2755 get_ofp_port(const struct ofproto_dpif
*ofproto
, ofp_port_t ofp_port
)
2757 struct ofport
*ofport
= ofproto_get_port(&ofproto
->up
, ofp_port
);
2758 return ofport
? ofport_dpif_cast(ofport
) : NULL
;
2762 ofproto_port_from_dpif_port(struct ofproto_dpif
*ofproto
,
2763 struct ofproto_port
*ofproto_port
,
2764 struct dpif_port
*dpif_port
)
2766 ofproto_port
->name
= dpif_port
->name
;
2767 ofproto_port
->type
= dpif_port
->type
;
2768 ofproto_port
->ofp_port
= odp_port_to_ofp_port(ofproto
, dpif_port
->port_no
);
2772 ofport_update_peer(struct ofport_dpif
*ofport
)
2774 const struct ofproto_dpif
*ofproto
;
2775 struct dpif_backer
*backer
;
2778 if (!netdev_vport_is_patch(ofport
->up
.netdev
)) {
2782 backer
= ofproto_dpif_cast(ofport
->up
.ofproto
)->backer
;
2783 backer
->need_revalidate
= REV_RECONFIGURE
;
2786 ofport
->peer
->peer
= NULL
;
2787 ofport
->peer
= NULL
;
2790 peer_name
= netdev_vport_patch_peer(ofport
->up
.netdev
);
2795 HMAP_FOR_EACH (ofproto
, all_ofproto_dpifs_node
, &all_ofproto_dpifs
) {
2796 struct ofport
*peer_ofport
;
2797 struct ofport_dpif
*peer
;
2800 if (ofproto
->backer
!= backer
) {
2804 peer_ofport
= shash_find_data(&ofproto
->up
.port_by_name
, peer_name
);
2809 peer
= ofport_dpif_cast(peer_ofport
);
2810 peer_peer
= netdev_vport_patch_peer(peer
->up
.netdev
);
2811 if (peer_peer
&& !strcmp(netdev_get_name(ofport
->up
.netdev
),
2813 ofport
->peer
= peer
;
2814 ofport
->peer
->peer
= ofport
;
2824 port_run(struct ofport_dpif
*ofport
)
2826 long long int carrier_seq
= netdev_get_carrier_resets(ofport
->up
.netdev
);
2827 bool carrier_changed
= carrier_seq
!= ofport
->carrier_seq
;
2828 bool enable
= netdev_get_carrier(ofport
->up
.netdev
);
2829 bool cfm_enable
= false;
2830 bool bfd_enable
= false;
2832 ofport
->carrier_seq
= carrier_seq
;
2835 int cfm_opup
= cfm_get_opup(ofport
->cfm
);
2837 cfm_enable
= !cfm_get_fault(ofport
->cfm
);
2839 if (cfm_opup
>= 0) {
2840 cfm_enable
= cfm_enable
&& cfm_opup
;
2845 bfd_enable
= bfd_forwarding(ofport
->bfd
);
2848 if (ofport
->bfd
|| ofport
->cfm
) {
2849 enable
= enable
&& (cfm_enable
|| bfd_enable
);
2852 if (ofport
->bundle
) {
2853 enable
= enable
&& lacp_slave_may_enable(ofport
->bundle
->lacp
, ofport
);
2854 if (carrier_changed
) {
2855 lacp_slave_carrier_changed(ofport
->bundle
->lacp
, ofport
);
2859 if (ofport
->may_enable
!= enable
) {
2860 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofport
->up
.ofproto
);
2861 ofproto
->backer
->need_revalidate
= REV_PORT_TOGGLED
;
2864 ofport
->may_enable
= enable
;
2868 port_query_by_name(const struct ofproto
*ofproto_
, const char *devname
,
2869 struct ofproto_port
*ofproto_port
)
2871 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
2872 struct dpif_port dpif_port
;
2875 if (sset_contains(&ofproto
->ghost_ports
, devname
)) {
2876 const char *type
= netdev_get_type_from_name(devname
);
2878 /* We may be called before ofproto->up.port_by_name is populated with
2879 * the appropriate ofport. For this reason, we must get the name and
2880 * type from the netdev layer directly. */
2882 const struct ofport
*ofport
;
2884 ofport
= shash_find_data(&ofproto
->up
.port_by_name
, devname
);
2885 ofproto_port
->ofp_port
= ofport
? ofport
->ofp_port
: OFPP_NONE
;
2886 ofproto_port
->name
= xstrdup(devname
);
2887 ofproto_port
->type
= xstrdup(type
);
2893 if (!sset_contains(&ofproto
->ports
, devname
)) {
2896 error
= dpif_port_query_by_name(ofproto
->backer
->dpif
,
2897 devname
, &dpif_port
);
2899 ofproto_port_from_dpif_port(ofproto
, ofproto_port
, &dpif_port
);
2905 port_add(struct ofproto
*ofproto_
, struct netdev
*netdev
)
2907 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
2908 const char *devname
= netdev_get_name(netdev
);
2909 char namebuf
[NETDEV_VPORT_NAME_BUFSIZE
];
2910 const char *dp_port_name
;
2912 if (netdev_vport_is_patch(netdev
)) {
2913 sset_add(&ofproto
->ghost_ports
, netdev_get_name(netdev
));
2917 dp_port_name
= netdev_vport_get_dpif_port(netdev
, namebuf
, sizeof namebuf
);
2918 if (!dpif_port_exists(ofproto
->backer
->dpif
, dp_port_name
)) {
2919 odp_port_t port_no
= ODPP_NONE
;
2922 error
= dpif_port_add(ofproto
->backer
->dpif
, netdev
, &port_no
);
2926 if (netdev_get_tunnel_config(netdev
)) {
2927 simap_put(&ofproto
->backer
->tnl_backers
,
2928 dp_port_name
, odp_to_u32(port_no
));
2932 if (netdev_get_tunnel_config(netdev
)) {
2933 sset_add(&ofproto
->ghost_ports
, devname
);
2935 sset_add(&ofproto
->ports
, devname
);
2941 port_del(struct ofproto
*ofproto_
, ofp_port_t ofp_port
)
2943 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
2944 struct ofport_dpif
*ofport
= get_ofp_port(ofproto
, ofp_port
);
2951 sset_find_and_delete(&ofproto
->ghost_ports
,
2952 netdev_get_name(ofport
->up
.netdev
));
2953 ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
2954 if (!ofport
->is_tunnel
&& !netdev_vport_is_patch(ofport
->up
.netdev
)) {
2955 error
= dpif_port_del(ofproto
->backer
->dpif
, ofport
->odp_port
);
2957 /* The caller is going to close ofport->up.netdev. If this is a
2958 * bonded port, then the bond is using that netdev, so remove it
2959 * from the bond. The client will need to reconfigure everything
2960 * after deleting ports, so then the slave will get re-added. */
2961 bundle_remove(&ofport
->up
);
2968 port_get_stats(const struct ofport
*ofport_
, struct netdev_stats
*stats
)
2970 struct ofport_dpif
*ofport
= ofport_dpif_cast(ofport_
);
2975 error
= netdev_get_stats(ofport
->up
.netdev
, stats
);
2977 if (!error
&& ofport_
->ofp_port
== OFPP_LOCAL
) {
2978 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofport
->up
.ofproto
);
2980 ovs_mutex_lock(&ofproto
->stats_mutex
);
2981 /* ofproto->stats.tx_packets represents packets that we created
2982 * internally and sent to some port (e.g. packets sent with
2983 * ofproto_dpif_send_packet()). Account for them as if they had
2984 * come from OFPP_LOCAL and got forwarded. */
2986 if (stats
->rx_packets
!= UINT64_MAX
) {
2987 stats
->rx_packets
+= ofproto
->stats
.tx_packets
;
2990 if (stats
->rx_bytes
!= UINT64_MAX
) {
2991 stats
->rx_bytes
+= ofproto
->stats
.tx_bytes
;
2994 /* ofproto->stats.rx_packets represents packets that were received on
2995 * some port and we processed internally and dropped (e.g. STP).
2996 * Account for them as if they had been forwarded to OFPP_LOCAL. */
2998 if (stats
->tx_packets
!= UINT64_MAX
) {
2999 stats
->tx_packets
+= ofproto
->stats
.rx_packets
;
3002 if (stats
->tx_bytes
!= UINT64_MAX
) {
3003 stats
->tx_bytes
+= ofproto
->stats
.rx_bytes
;
3005 ovs_mutex_unlock(&ofproto
->stats_mutex
);
3011 struct port_dump_state
{
3016 struct ofproto_port port
;
3021 port_dump_start(const struct ofproto
*ofproto_ OVS_UNUSED
, void **statep
)
3023 *statep
= xzalloc(sizeof(struct port_dump_state
));
3028 port_dump_next(const struct ofproto
*ofproto_
, void *state_
,
3029 struct ofproto_port
*port
)
3031 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
3032 struct port_dump_state
*state
= state_
;
3033 const struct sset
*sset
;
3034 struct sset_node
*node
;
3036 if (state
->has_port
) {
3037 ofproto_port_destroy(&state
->port
);
3038 state
->has_port
= false;
3040 sset
= state
->ghost
? &ofproto
->ghost_ports
: &ofproto
->ports
;
3041 while ((node
= sset_at_position(sset
, &state
->bucket
, &state
->offset
))) {
3044 error
= port_query_by_name(ofproto_
, node
->name
, &state
->port
);
3046 *port
= state
->port
;
3047 state
->has_port
= true;
3049 } else if (error
!= ENODEV
) {
3054 if (!state
->ghost
) {
3055 state
->ghost
= true;
3058 return port_dump_next(ofproto_
, state_
, port
);
3065 port_dump_done(const struct ofproto
*ofproto_ OVS_UNUSED
, void *state_
)
3067 struct port_dump_state
*state
= state_
;
3069 if (state
->has_port
) {
3070 ofproto_port_destroy(&state
->port
);
3077 port_poll(const struct ofproto
*ofproto_
, char **devnamep
)
3079 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
3081 if (ofproto
->port_poll_errno
) {
3082 int error
= ofproto
->port_poll_errno
;
3083 ofproto
->port_poll_errno
= 0;
3087 if (sset_is_empty(&ofproto
->port_poll_set
)) {
3091 *devnamep
= sset_pop(&ofproto
->port_poll_set
);
3096 port_poll_wait(const struct ofproto
*ofproto_
)
3098 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
3099 dpif_port_poll_wait(ofproto
->backer
->dpif
);
3103 port_is_lacp_current(const struct ofport
*ofport_
)
3105 const struct ofport_dpif
*ofport
= ofport_dpif_cast(ofport_
);
3106 return (ofport
->bundle
&& ofport
->bundle
->lacp
3107 ? lacp_slave_is_current(ofport
->bundle
->lacp
, ofport
)
3111 /* Upcall handling. */
3113 struct flow_miss_op
{
3114 struct dpif_op dpif_op
;
3116 uint64_t slow_stub
[128 / 8]; /* Buffer for compose_slow_path() */
3117 struct xlate_out xout
;
3118 bool xout_garbage
; /* 'xout' needs to be uninitialized? */
3120 struct ofpbuf mask
; /* Flow mask for "put" ops. */
3121 struct odputil_keybuf maskbuf
;
3123 /* If this is a "put" op, then a pointer to the subfacet that should
3124 * be marked as uninstalled if the operation fails. */
3125 struct subfacet
*subfacet
;
3128 /* Figures out whether a flow that missed in 'ofproto', whose details are in
3129 * 'miss' masked by 'wc', is likely to be worth tracking in detail in userspace
3130 * and (usually) installing a datapath flow. The answer is usually "yes" (a
3131 * return value of true). However, for short flows the cost of bookkeeping is
3132 * much higher than the benefits, so when the datapath holds a large number of
3133 * flows we impose some heuristics to decide which flows are likely to be worth
3136 flow_miss_should_make_facet(struct flow_miss
*miss
)
3138 struct dpif_backer
*backer
= miss
->ofproto
->backer
;
3141 switch (flow_miss_model
) {
3142 case OFPROTO_HANDLE_MISS_AUTO
:
3144 case OFPROTO_HANDLE_MISS_WITH_FACETS
:
3146 case OFPROTO_HANDLE_MISS_WITHOUT_FACETS
:
3150 if (!backer
->governor
) {
3153 n_subfacets
= hmap_count(&backer
->subfacets
);
3154 if (n_subfacets
* 2 <= flow_eviction_threshold
) {
3158 backer
->governor
= governor_create();
3161 hash
= flow_hash_in_wildcards(&miss
->flow
, &miss
->xout
.wc
, 0);
3162 return governor_should_install_flow(backer
->governor
, hash
,
3163 miss
->stats
.n_packets
);
3166 /* Handles 'miss', which matches 'facet'. May add any required datapath
3167 * operations to 'ops', incrementing '*n_ops' for each new op.
3169 * All of the packets in 'miss' are considered to have arrived at time
3170 * 'miss->stats.used'. This is really important only for new facets: if we
3171 * just called time_msec() here, then the new subfacet or its packets could
3172 * look (occasionally) as though it was used some time after the facet was
3173 * used. That can make a one-packet flow look like it has a nonzero duration,
3174 * which looks odd in e.g. NetFlow statistics. */
3176 handle_flow_miss_with_facet(struct flow_miss
*miss
, struct facet
*facet
,
3177 struct flow_miss_op
*ops
, size_t *n_ops
)
3179 enum subfacet_path want_path
;
3180 struct subfacet
*subfacet
;
3183 /* Update facet stats. */
3184 facet
->packet_count
+= miss
->stats
.n_packets
;
3185 facet
->prev_packet_count
+= miss
->stats
.n_packets
;
3186 facet
->byte_count
+= miss
->stats
.n_bytes
;
3187 facet
->prev_byte_count
+= miss
->stats
.n_bytes
;
3189 /* Look for an existing subfacet. If we find one, update its used time. */
3190 key_hash
= odp_flow_key_hash(miss
->key
, miss
->key_len
);
3191 if (!list_is_empty(&facet
->subfacets
)) {
3192 subfacet
= subfacet_find(miss
->ofproto
->backer
,
3193 miss
->key
, miss
->key_len
, key_hash
);
3195 if (subfacet
->facet
== facet
) {
3196 subfacet
->used
= MAX(subfacet
->used
, miss
->stats
.used
);
3198 /* This shouldn't happen. */
3199 VLOG_ERR_RL(&rl
, "subfacet with wrong facet");
3200 subfacet_destroy(subfacet
);
3208 /* Don't install the flow if it's the result of the "userspace"
3209 * action for an already installed facet. This can occur when a
3210 * datapath flow with wildcards has a "userspace" action and flows
3211 * sent to userspace result in a different subfacet, which will then
3212 * be rejected as overlapping by the datapath. */
3213 if (miss
->upcall_type
== DPIF_UC_ACTION
3214 && !list_is_empty(&facet
->subfacets
)) {
3218 /* Create a subfacet, if we don't already have one. */
3220 subfacet
= subfacet_create(facet
, miss
, key_hash
);
3223 /* Install the subfacet, if it's not already installed. */
3224 want_path
= facet
->xout
.slow
? SF_SLOW_PATH
: SF_FAST_PATH
;
3225 if (subfacet
->path
!= want_path
) {
3226 struct flow_miss_op
*op
= &ops
[(*n_ops
)++];
3227 struct dpif_flow_put
*put
= &op
->dpif_op
.u
.flow_put
;
3229 subfacet
->path
= want_path
;
3231 ofpbuf_use_stack(&op
->mask
, &op
->maskbuf
, sizeof op
->maskbuf
);
3232 if (enable_megaflows
) {
3233 odp_flow_key_from_mask(&op
->mask
, &facet
->xout
.wc
.masks
,
3234 &miss
->flow
, UINT32_MAX
);
3237 op
->xout_garbage
= false;
3238 op
->dpif_op
.type
= DPIF_OP_FLOW_PUT
;
3239 op
->subfacet
= subfacet
;
3240 put
->flags
= DPIF_FP_CREATE
;
3241 put
->key
= miss
->key
;
3242 put
->key_len
= miss
->key_len
;
3243 put
->mask
= op
->mask
.data
;
3244 put
->mask_len
= op
->mask
.size
;
3246 if (want_path
== SF_FAST_PATH
) {
3247 put
->actions
= facet
->xout
.odp_actions
.data
;
3248 put
->actions_len
= facet
->xout
.odp_actions
.size
;
3250 compose_slow_path(facet
->ofproto
, &miss
->flow
, facet
->xout
.slow
,
3251 op
->slow_stub
, sizeof op
->slow_stub
,
3252 &put
->actions
, &put
->actions_len
);
3258 /* Handles flow miss 'miss'. May add any required datapath operations
3259 * to 'ops', incrementing '*n_ops' for each new op. */
3261 handle_flow_miss(struct flow_miss
*miss
, struct flow_miss_op
*ops
,
3264 struct facet
*facet
;
3266 miss
->ofproto
->n_missed
+= miss
->stats
.n_packets
;
3268 facet
= facet_lookup_valid(miss
->ofproto
, &miss
->flow
);
3270 /* There does not exist a bijection between 'struct flow' and datapath
3271 * flow keys with fitness ODP_FIT_TO_LITTLE. This breaks a fundamental
3272 * assumption used throughout the facet and subfacet handling code.
3273 * Since we have to handle these misses in userspace anyway, we simply
3274 * skip facet creation, avoiding the problem altogether. */
3275 if (miss
->key_fitness
== ODP_FIT_TOO_LITTLE
3276 || !flow_miss_should_make_facet(miss
)) {
3280 facet
= facet_create(miss
);
3282 handle_flow_miss_with_facet(miss
, facet
, ops
, n_ops
);
3285 static struct drop_key
*
3286 drop_key_lookup(const struct dpif_backer
*backer
, const struct nlattr
*key
,
3289 struct drop_key
*drop_key
;
3291 HMAP_FOR_EACH_WITH_HASH (drop_key
, hmap_node
, hash_bytes(key
, key_len
, 0),
3292 &backer
->drop_keys
) {
3293 if (drop_key
->key_len
== key_len
3294 && !memcmp(drop_key
->key
, key
, key_len
)) {
3302 drop_key_clear(struct dpif_backer
*backer
)
3304 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 15);
3305 struct drop_key
*drop_key
, *next
;
3307 HMAP_FOR_EACH_SAFE (drop_key
, next
, hmap_node
, &backer
->drop_keys
) {
3310 error
= dpif_flow_del(backer
->dpif
, drop_key
->key
, drop_key
->key_len
,
3312 if (error
&& !VLOG_DROP_WARN(&rl
)) {
3313 struct ds ds
= DS_EMPTY_INITIALIZER
;
3314 odp_flow_key_format(drop_key
->key
, drop_key
->key_len
, &ds
);
3315 VLOG_WARN("Failed to delete drop key (%s) (%s)",
3316 ovs_strerror(error
), ds_cstr(&ds
));
3320 hmap_remove(&backer
->drop_keys
, &drop_key
->hmap_node
);
3321 drop_key_destroy(drop_key
);
3324 udpif_drop_key_clear(backer
->udpif
);
3328 handle_flow_misses(struct dpif_backer
*backer
, struct flow_miss_batch
*fmb
)
3330 struct flow_miss_op flow_miss_ops
[FLOW_MISS_MAX_BATCH
];
3331 struct dpif_op
*dpif_ops
[FLOW_MISS_MAX_BATCH
];
3332 struct flow_miss
*miss
;
3335 /* Process each element in the to-do list, constructing the set of
3336 * operations to batch. */
3338 HMAP_FOR_EACH (miss
, hmap_node
, &fmb
->misses
) {
3339 handle_flow_miss(miss
, flow_miss_ops
, &n_ops
);
3341 ovs_assert(n_ops
<= ARRAY_SIZE(flow_miss_ops
));
3343 /* Execute batch. */
3344 for (i
= 0; i
< n_ops
; i
++) {
3345 dpif_ops
[i
] = &flow_miss_ops
[i
].dpif_op
;
3347 dpif_operate(backer
->dpif
, dpif_ops
, n_ops
);
3349 for (i
= 0; i
< n_ops
; i
++) {
3350 if (dpif_ops
[i
]->error
!= 0
3351 && flow_miss_ops
[i
].dpif_op
.type
== DPIF_OP_FLOW_PUT
3352 && flow_miss_ops
[i
].subfacet
) {
3353 struct subfacet
*subfacet
= flow_miss_ops
[i
].subfacet
;
3355 COVERAGE_INC(subfacet_install_fail
);
3357 /* Zero-out subfacet counters when installation failed, but
3358 * datapath reported hits. This should not happen and
3359 * indicates a bug, since if the datapath flow exists, we
3360 * should not be attempting to create a new subfacet. A
3361 * buggy datapath could trigger this, so just zero out the
3362 * counters and log an error. */
3363 if (subfacet
->dp_packet_count
|| subfacet
->dp_byte_count
) {
3364 VLOG_ERR_RL(&rl
, "failed to install subfacet for which "
3365 "datapath reported hits");
3366 subfacet
->dp_packet_count
= subfacet
->dp_byte_count
= 0;
3369 subfacet
->path
= SF_NOT_INSTALLED
;
3375 handle_upcalls(struct dpif_backer
*backer
)
3377 struct flow_miss_batch
*fmb
;
3380 for (n_processed
= 0; n_processed
< FLOW_MISS_MAX_BATCH
; n_processed
++) {
3381 struct drop_key
*drop_key
= drop_key_next(backer
->udpif
);
3386 if (!drop_key_lookup(backer
, drop_key
->key
, drop_key
->key_len
)) {
3387 hmap_insert(&backer
->drop_keys
, &drop_key
->hmap_node
,
3388 hash_bytes(drop_key
->key
, drop_key
->key_len
, 0));
3389 dpif_flow_put(backer
->dpif
, DPIF_FP_CREATE
| DPIF_FP_MODIFY
,
3390 drop_key
->key
, drop_key
->key_len
,
3391 NULL
, 0, NULL
, 0, NULL
);
3393 drop_key_destroy(drop_key
);
3397 fmb
= flow_miss_batch_next(backer
->udpif
);
3399 handle_flow_misses(backer
, fmb
);
3400 flow_miss_batch_destroy(fmb
);
3404 /* Flow expiration. */
3406 static int subfacet_max_idle(const struct dpif_backer
*);
3407 static void update_stats(struct dpif_backer
*);
3408 static void rule_expire(struct rule_dpif
*) OVS_REQUIRES(ofproto_mutex
);
3409 static void expire_subfacets(struct dpif_backer
*, int dp_max_idle
);
3411 /* This function is called periodically by run(). Its job is to collect
3412 * updates for the flows that have been installed into the datapath, most
3413 * importantly when they last were used, and then use that information to
3414 * expire flows that have not been used recently.
3416 * Returns the number of milliseconds after which it should be called again. */
3418 expire(struct dpif_backer
*backer
)
3420 struct ofproto_dpif
*ofproto
;
3424 /* Periodically clear out the drop keys in an effort to keep them
3425 * relatively few. */
3426 drop_key_clear(backer
);
3428 /* Update stats for each flow in the backer. */
3429 update_stats(backer
);
3431 n_subfacets
= hmap_count(&backer
->subfacets
);
3432 backer
->avg_n_subfacet
+= n_subfacets
;
3433 backer
->avg_n_subfacet
/= 2;
3435 backer
->max_n_subfacet
= MAX(backer
->max_n_subfacet
, n_subfacets
);
3437 max_idle
= subfacet_max_idle(backer
);
3438 expire_subfacets(backer
, max_idle
);
3440 HMAP_FOR_EACH (ofproto
, all_ofproto_dpifs_node
, &all_ofproto_dpifs
) {
3441 struct rule
*rule
, *next_rule
;
3443 if (ofproto
->backer
!= backer
) {
3447 /* Expire OpenFlow flows whose idle_timeout or hard_timeout
3449 ovs_mutex_lock(&ofproto_mutex
);
3450 LIST_FOR_EACH_SAFE (rule
, next_rule
, expirable
,
3451 &ofproto
->up
.expirable
) {
3452 rule_expire(rule_dpif_cast(rule
));
3454 ovs_mutex_unlock(&ofproto_mutex
);
3456 /* All outstanding data in existing flows has been accounted, so it's a
3457 * good time to do bond rebalancing. */
3458 if (ofproto
->has_bonded_bundles
) {
3459 struct ofbundle
*bundle
;
3461 HMAP_FOR_EACH (bundle
, hmap_node
, &ofproto
->bundles
) {
3463 bond_rebalance(bundle
->bond
);
3469 return MIN(max_idle
, 1000);
3472 /* Updates flow table statistics given that the datapath just reported 'stats'
3473 * as 'subfacet''s statistics. */
3475 update_subfacet_stats(struct subfacet
*subfacet
,
3476 const struct dpif_flow_stats
*stats
)
3478 struct facet
*facet
= subfacet
->facet
;
3479 struct dpif_flow_stats diff
;
3481 diff
.tcp_flags
= stats
->tcp_flags
;
3482 diff
.used
= stats
->used
;
3484 if (stats
->n_packets
>= subfacet
->dp_packet_count
) {
3485 diff
.n_packets
= stats
->n_packets
- subfacet
->dp_packet_count
;
3487 VLOG_WARN_RL(&rl
, "unexpected packet count from the datapath");
3491 if (stats
->n_bytes
>= subfacet
->dp_byte_count
) {
3492 diff
.n_bytes
= stats
->n_bytes
- subfacet
->dp_byte_count
;
3494 VLOG_WARN_RL(&rl
, "unexpected byte count from datapath");
3498 facet
->ofproto
->n_hit
+= diff
.n_packets
;
3499 subfacet
->dp_packet_count
= stats
->n_packets
;
3500 subfacet
->dp_byte_count
= stats
->n_bytes
;
3501 subfacet_update_stats(subfacet
, &diff
);
3503 if (diff
.n_packets
) {
3508 /* 'key' with length 'key_len' bytes is a flow in 'dpif' that we know nothing
3509 * about, or a flow that shouldn't be installed but was anyway. Delete it. */
3511 delete_unexpected_flow(struct dpif_backer
*backer
,
3512 const struct nlattr
*key
, size_t key_len
)
3514 if (!VLOG_DROP_WARN(&rl
)) {
3518 odp_flow_key_format(key
, key_len
, &s
);
3519 VLOG_WARN("unexpected flow: %s", ds_cstr(&s
));
3523 COVERAGE_INC(facet_unexpected
);
3524 dpif_flow_del(backer
->dpif
, key
, key_len
, NULL
);
3527 /* Update 'packet_count', 'byte_count', and 'used' members of installed facets.
3529 * This function also pushes statistics updates to rules which each facet
3530 * resubmits into. Generally these statistics will be accurate. However, if a
3531 * facet changes the rule it resubmits into at some time in between
3532 * update_stats() runs, it is possible that statistics accrued to the
3533 * old rule will be incorrectly attributed to the new rule. This could be
3534 * avoided by calling update_stats() whenever rules are created or
3535 * deleted. However, the performance impact of making so many calls to the
3536 * datapath do not justify the benefit of having perfectly accurate statistics.
3538 * In addition, this function maintains per ofproto flow hit counts. The patch
3539 * port is not treated specially. e.g. A packet ingress from br0 patched into
3540 * br1 will increase the hit count of br0 by 1, however, does not affect
3541 * the hit or miss counts of br1.
3544 update_stats(struct dpif_backer
*backer
)
3546 const struct dpif_flow_stats
*stats
;
3547 struct dpif_flow_dump dump
;
3548 const struct nlattr
*key
, *mask
;
3549 size_t key_len
, mask_len
;
3551 dpif_flow_dump_start(&dump
, backer
->dpif
);
3552 while (dpif_flow_dump_next(&dump
, &key
, &key_len
,
3553 &mask
, &mask_len
, NULL
, NULL
, &stats
)) {
3554 struct subfacet
*subfacet
;
3557 key_hash
= odp_flow_key_hash(key
, key_len
);
3558 subfacet
= subfacet_find(backer
, key
, key_len
, key_hash
);
3559 switch (subfacet
? subfacet
->path
: SF_NOT_INSTALLED
) {
3561 update_subfacet_stats(subfacet
, stats
);
3565 /* Stats are updated per-packet. */
3568 case SF_NOT_INSTALLED
:
3570 delete_unexpected_flow(backer
, key
, key_len
);
3574 dpif_flow_dump_done(&dump
);
3577 /* Calculates and returns the number of milliseconds of idle time after which
3578 * subfacets should expire from the datapath. When a subfacet expires, we fold
3579 * its statistics into its facet, and when a facet's last subfacet expires, we
3580 * fold its statistic into its rule. */
3582 subfacet_max_idle(const struct dpif_backer
*backer
)
3585 * Idle time histogram.
3587 * Most of the time a switch has a relatively small number of subfacets.
3588 * When this is the case we might as well keep statistics for all of them
3589 * in userspace and to cache them in the kernel datapath for performance as
3592 * As the number of subfacets increases, the memory required to maintain
3593 * statistics about them in userspace and in the kernel becomes
3594 * significant. However, with a large number of subfacets it is likely
3595 * that only a few of them are "heavy hitters" that consume a large amount
3596 * of bandwidth. At this point, only heavy hitters are worth caching in
3597 * the kernel and maintaining in userspaces; other subfacets we can
3600 * The technique used to compute the idle time is to build a histogram with
3601 * N_BUCKETS buckets whose width is BUCKET_WIDTH msecs each. Each subfacet
3602 * that is installed in the kernel gets dropped in the appropriate bucket.
3603 * After the histogram has been built, we compute the cutoff so that only
3604 * the most-recently-used 1% of subfacets (but at least
3605 * flow_eviction_threshold flows) are kept cached. At least
3606 * the most-recently-used bucket of subfacets is kept, so actually an
3607 * arbitrary number of subfacets can be kept in any given expiration run
3608 * (though the next run will delete most of those unless they receive
3611 * This requires a second pass through the subfacets, in addition to the
3612 * pass made by update_stats(), because the former function never looks at
3613 * uninstallable subfacets.
3615 enum { BUCKET_WIDTH
= 100 };
3616 enum { N_BUCKETS
= 5000 / BUCKET_WIDTH
};
3617 int buckets
[N_BUCKETS
] = { 0 };
3618 int total
, subtotal
, bucket
;
3619 struct subfacet
*subfacet
;
3623 total
= hmap_count(&backer
->subfacets
);
3624 if (total
<= flow_eviction_threshold
) {
3625 return N_BUCKETS
* BUCKET_WIDTH
;
3628 /* Build histogram. */
3630 HMAP_FOR_EACH (subfacet
, hmap_node
, &backer
->subfacets
) {
3631 long long int idle
= now
- subfacet
->used
;
3632 int bucket
= (idle
<= 0 ? 0
3633 : idle
>= BUCKET_WIDTH
* N_BUCKETS
? N_BUCKETS
- 1
3634 : (unsigned int) idle
/ BUCKET_WIDTH
);
3638 /* Find the first bucket whose flows should be expired. */
3639 subtotal
= bucket
= 0;
3641 subtotal
+= buckets
[bucket
++];
3642 } while (bucket
< N_BUCKETS
&&
3643 subtotal
< MAX(flow_eviction_threshold
, total
/ 100));
3645 if (VLOG_IS_DBG_ENABLED()) {
3649 ds_put_cstr(&s
, "keep");
3650 for (i
= 0; i
< N_BUCKETS
; i
++) {
3652 ds_put_cstr(&s
, ", drop");
3655 ds_put_format(&s
, " %d:%d", i
* BUCKET_WIDTH
, buckets
[i
]);
3658 VLOG_INFO("%s (msec:count)", ds_cstr(&s
));
3662 return bucket
* BUCKET_WIDTH
;
3666 expire_subfacets(struct dpif_backer
*backer
, int dp_max_idle
)
3668 /* Cutoff time for most flows. */
3669 long long int normal_cutoff
= time_msec() - dp_max_idle
;
3671 /* We really want to keep flows for special protocols around, so use a more
3672 * conservative cutoff. */
3673 long long int special_cutoff
= time_msec() - 10000;
3675 struct subfacet
*subfacet
, *next_subfacet
;
3676 struct subfacet
*batch
[SUBFACET_DESTROY_MAX_BATCH
];
3680 HMAP_FOR_EACH_SAFE (subfacet
, next_subfacet
, hmap_node
,
3681 &backer
->subfacets
) {
3682 long long int cutoff
;
3684 cutoff
= (subfacet
->facet
->xout
.slow
& (SLOW_CFM
| SLOW_BFD
| SLOW_LACP
3688 if (subfacet
->used
< cutoff
) {
3689 if (subfacet
->path
!= SF_NOT_INSTALLED
) {
3690 batch
[n_batch
++] = subfacet
;
3691 if (n_batch
>= SUBFACET_DESTROY_MAX_BATCH
) {
3692 subfacet_destroy_batch(backer
, batch
, n_batch
);
3696 subfacet_destroy(subfacet
);
3702 subfacet_destroy_batch(backer
, batch
, n_batch
);
3706 /* If 'rule' is an OpenFlow rule, that has expired according to OpenFlow rules,
3707 * then delete it entirely. */
3709 rule_expire(struct rule_dpif
*rule
)
3710 OVS_REQUIRES(ofproto_mutex
)
3712 uint16_t idle_timeout
, hard_timeout
;
3713 long long int now
= time_msec();
3716 ovs_assert(!rule
->up
.pending
);
3718 /* Has 'rule' expired? */
3719 ovs_mutex_lock(&rule
->up
.mutex
);
3720 hard_timeout
= rule
->up
.hard_timeout
;
3721 idle_timeout
= rule
->up
.idle_timeout
;
3722 if (hard_timeout
&& now
> rule
->up
.modified
+ hard_timeout
* 1000) {
3723 reason
= OFPRR_HARD_TIMEOUT
;
3724 } else if (idle_timeout
&& now
> rule
->up
.used
+ idle_timeout
* 1000) {
3725 reason
= OFPRR_IDLE_TIMEOUT
;
3729 ovs_mutex_unlock(&rule
->up
.mutex
);
3732 COVERAGE_INC(ofproto_dpif_expired
);
3733 ofproto_rule_expire(&rule
->up
, reason
);
3739 /* Creates and returns a new facet based on 'miss'.
3741 * The caller must already have determined that no facet with an identical
3742 * 'miss->flow' exists in 'miss->ofproto'.
3744 * 'rule' and 'xout' must have been created based on 'miss'.
3746 * 'facet'' statistics are initialized based on 'stats'.
3748 * The facet will initially have no subfacets. The caller should create (at
3749 * least) one subfacet with subfacet_create(). */
3750 static struct facet
*
3751 facet_create(const struct flow_miss
*miss
)
3753 struct ofproto_dpif
*ofproto
= miss
->ofproto
;
3754 struct facet
*facet
;
3757 COVERAGE_INC(facet_create
);
3758 facet
= xzalloc(sizeof *facet
);
3759 facet
->ofproto
= miss
->ofproto
;
3760 facet
->used
= miss
->stats
.used
;
3761 facet
->flow
= miss
->flow
;
3762 facet
->learn_rl
= time_msec() + 500;
3764 list_init(&facet
->subfacets
);
3766 xlate_out_copy(&facet
->xout
, &miss
->xout
);
3768 match_init(&match
, &facet
->flow
, &facet
->xout
.wc
);
3769 cls_rule_init(&facet
->cr
, &match
, OFP_DEFAULT_PRIORITY
);
3770 ovs_rwlock_wrlock(&ofproto
->facets
.rwlock
);
3771 classifier_insert(&ofproto
->facets
, &facet
->cr
);
3772 ovs_rwlock_unlock(&ofproto
->facets
.rwlock
);
3778 facet_free(struct facet
*facet
)
3781 xlate_out_uninit(&facet
->xout
);
3786 /* Executes, within 'ofproto', the actions in 'rule' or 'ofpacts' on 'packet'.
3787 * 'flow' must reflect the data in 'packet'. */
3789 ofproto_dpif_execute_actions(struct ofproto_dpif
*ofproto
,
3790 const struct flow
*flow
,
3791 struct rule_dpif
*rule
,
3792 const struct ofpact
*ofpacts
, size_t ofpacts_len
,
3793 struct ofpbuf
*packet
)
3795 struct odputil_keybuf keybuf
;
3796 struct dpif_flow_stats stats
;
3797 struct xlate_out xout
;
3798 struct xlate_in xin
;
3803 ovs_assert((rule
!= NULL
) != (ofpacts
!= NULL
));
3805 dpif_flow_stats_extract(flow
, packet
, time_msec(), &stats
);
3807 rule_dpif_credit_stats(rule
, &stats
);
3810 xlate_in_init(&xin
, ofproto
, flow
, rule
, stats
.tcp_flags
, packet
);
3811 xin
.ofpacts
= ofpacts
;
3812 xin
.ofpacts_len
= ofpacts_len
;
3813 xin
.resubmit_stats
= &stats
;
3814 xlate_actions(&xin
, &xout
);
3816 ofpbuf_use_stack(&key
, &keybuf
, sizeof keybuf
);
3817 in_port
= flow
->in_port
.ofp_port
;
3818 if (in_port
== OFPP_NONE
) {
3819 in_port
= OFPP_LOCAL
;
3821 odp_flow_key_from_flow(&key
, flow
, ofp_port_to_odp_port(ofproto
, in_port
));
3823 error
= dpif_execute(ofproto
->backer
->dpif
, key
.data
, key
.size
,
3824 xout
.odp_actions
.data
, xout
.odp_actions
.size
, packet
,
3825 (xout
.slow
& SLOW_ACTION
) != 0);
3826 xlate_out_uninit(&xout
);
3831 /* Remove 'facet' from its ofproto and free up the associated memory:
3833 * - If 'facet' was installed in the datapath, uninstalls it and updates its
3834 * rule's statistics, via subfacet_uninstall().
3836 * - Removes 'facet' from its rule and from ofproto->facets.
3839 facet_remove(struct facet
*facet
)
3841 struct subfacet
*subfacet
, *next_subfacet
;
3843 COVERAGE_INC(facet_remove
);
3844 ovs_assert(!list_is_empty(&facet
->subfacets
));
3846 /* First uninstall all of the subfacets to get final statistics. */
3847 LIST_FOR_EACH (subfacet
, list_node
, &facet
->subfacets
) {
3848 subfacet_uninstall(subfacet
);
3851 /* Flush the final stats to the rule.
3853 * This might require us to have at least one subfacet around so that we
3854 * can use its actions for accounting in facet_account(), which is why we
3855 * have uninstalled but not yet destroyed the subfacets. */
3856 facet_flush_stats(facet
);
3858 /* Now we're really all done so destroy everything. */
3859 LIST_FOR_EACH_SAFE (subfacet
, next_subfacet
, list_node
,
3860 &facet
->subfacets
) {
3861 subfacet_destroy__(subfacet
);
3863 ovs_rwlock_wrlock(&facet
->ofproto
->facets
.rwlock
);
3864 classifier_remove(&facet
->ofproto
->facets
, &facet
->cr
);
3865 ovs_rwlock_unlock(&facet
->ofproto
->facets
.rwlock
);
3866 cls_rule_destroy(&facet
->cr
);
3870 /* Feed information from 'facet' back into the learning table to keep it in
3871 * sync with what is actually flowing through the datapath. */
3873 facet_learn(struct facet
*facet
)
3875 long long int now
= time_msec();
3877 if (!facet
->xout
.has_fin_timeout
&& now
< facet
->learn_rl
) {
3881 facet
->learn_rl
= now
+ 500;
3883 if (!facet
->xout
.has_learn
3884 && !facet
->xout
.has_normal
3885 && (!facet
->xout
.has_fin_timeout
3886 || !(facet
->tcp_flags
& (TCP_FIN
| TCP_RST
)))) {
3890 facet_push_stats(facet
, true);
3893 /* Returns true if the only action for 'facet' is to send to the controller.
3894 * (We don't report NetFlow expiration messages for such facets because they
3895 * are just part of the control logic for the network, not real traffic). */
3897 facet_is_controller_flow(struct facet
*facet
)
3900 struct ofproto_dpif
*ofproto
= facet
->ofproto
;
3901 const struct ofpact
*ofpacts
;
3902 struct rule_actions
*actions
;
3903 struct rule_dpif
*rule
;
3907 rule_dpif_lookup(ofproto
, &facet
->flow
, NULL
, &rule
);
3908 actions
= rule_dpif_get_actions(rule
);
3909 rule_dpif_unref(rule
);
3911 ofpacts_len
= actions
->ofpacts_len
;
3912 ofpacts
= actions
->ofpacts
;
3913 is_controller
= ofpacts_len
> 0
3914 && ofpacts
->type
== OFPACT_CONTROLLER
3915 && ofpact_next(ofpacts
) >= ofpact_end(ofpacts
, ofpacts_len
);
3916 rule_actions_unref(actions
);
3918 return is_controller
;
3923 /* Folds all of 'facet''s statistics into its rule. Also updates the
3924 * accounting ofhook and emits a NetFlow expiration if appropriate. All of
3925 * 'facet''s statistics in the datapath should have been zeroed and folded into
3926 * its packet and byte counts before this function is called. */
3928 facet_flush_stats(struct facet
*facet
)
3930 struct ofproto_dpif
*ofproto
= facet
->ofproto
;
3931 struct subfacet
*subfacet
;
3933 LIST_FOR_EACH (subfacet
, list_node
, &facet
->subfacets
) {
3934 ovs_assert(!subfacet
->dp_byte_count
);
3935 ovs_assert(!subfacet
->dp_packet_count
);
3938 facet_push_stats(facet
, false);
3940 if (ofproto
->netflow
&& !facet_is_controller_flow(facet
)) {
3941 netflow_expire(ofproto
->netflow
, &facet
->flow
);
3942 netflow_flow_clear(ofproto
->netflow
, &facet
->flow
);
3945 /* Reset counters to prevent double counting if 'facet' ever gets
3947 facet_reset_counters(facet
);
3948 facet
->tcp_flags
= 0;
3951 /* Searches 'ofproto''s table of facets for one which would be responsible for
3952 * 'flow'. Returns it if found, otherwise a null pointer.
3954 * The returned facet might need revalidation; use facet_lookup_valid()
3955 * instead if that is important. */
3956 static struct facet
*
3957 facet_find(struct ofproto_dpif
*ofproto
, const struct flow
*flow
)
3959 struct cls_rule
*cr
;
3961 ovs_rwlock_rdlock(&ofproto
->facets
.rwlock
);
3962 cr
= classifier_lookup(&ofproto
->facets
, flow
, NULL
);
3963 ovs_rwlock_unlock(&ofproto
->facets
.rwlock
);
3964 return cr
? CONTAINER_OF(cr
, struct facet
, cr
) : NULL
;
3967 /* Searches 'ofproto''s table of facets for one capable that covers
3968 * 'flow'. Returns it if found, otherwise a null pointer.
3970 * The returned facet is guaranteed to be valid. */
3971 static struct facet
*
3972 facet_lookup_valid(struct ofproto_dpif
*ofproto
, const struct flow
*flow
)
3974 struct facet
*facet
;
3976 facet
= facet_find(ofproto
, flow
);
3978 && ofproto
->backer
->need_revalidate
3979 && !facet_revalidate(facet
)) {
3987 facet_check_consistency(struct facet
*facet
)
3989 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 15);
3991 struct xlate_out xout
;
3992 struct xlate_in xin
;
3995 /* Check the datapath actions for consistency. */
3996 xlate_in_init(&xin
, facet
->ofproto
, &facet
->flow
, NULL
, 0, NULL
);
3997 xlate_actions(&xin
, &xout
);
3999 ok
= ofpbuf_equal(&facet
->xout
.odp_actions
, &xout
.odp_actions
)
4000 && facet
->xout
.slow
== xout
.slow
;
4001 if (!ok
&& !VLOG_DROP_WARN(&rl
)) {
4002 struct ds s
= DS_EMPTY_INITIALIZER
;
4004 flow_format(&s
, &facet
->flow
);
4005 ds_put_cstr(&s
, ": inconsistency in facet");
4007 if (!ofpbuf_equal(&facet
->xout
.odp_actions
, &xout
.odp_actions
)) {
4008 ds_put_cstr(&s
, " (actions were: ");
4009 format_odp_actions(&s
, facet
->xout
.odp_actions
.data
,
4010 facet
->xout
.odp_actions
.size
);
4011 ds_put_cstr(&s
, ") (correct actions: ");
4012 format_odp_actions(&s
, xout
.odp_actions
.data
,
4013 xout
.odp_actions
.size
);
4014 ds_put_char(&s
, ')');
4017 if (facet
->xout
.slow
!= xout
.slow
) {
4018 ds_put_format(&s
, " slow path incorrect. should be %d", xout
.slow
);
4023 xlate_out_uninit(&xout
);
4028 /* Re-searches the classifier for 'facet':
4030 * - If the rule found is different from 'facet''s current rule, moves
4031 * 'facet' to the new rule and recompiles its actions.
4033 * - If the rule found is the same as 'facet''s current rule, leaves 'facet'
4034 * where it is and recompiles its actions anyway.
4036 * - If any of 'facet''s subfacets correspond to a new flow according to
4037 * xlate_receive(), 'facet' is removed.
4039 * Returns true if 'facet' is still valid. False if 'facet' was removed. */
4041 facet_revalidate(struct facet
*facet
)
4043 struct ofproto_dpif
*ofproto
= facet
->ofproto
;
4044 struct rule_dpif
*new_rule
;
4045 struct subfacet
*subfacet
;
4046 struct flow_wildcards wc
;
4047 struct xlate_out xout
;
4048 struct xlate_in xin
;
4050 COVERAGE_INC(facet_revalidate
);
4052 /* Check that child subfacets still correspond to this facet. Tunnel
4053 * configuration changes could cause a subfacet's OpenFlow in_port to
4055 LIST_FOR_EACH (subfacet
, list_node
, &facet
->subfacets
) {
4056 struct ofproto_dpif
*recv_ofproto
;
4057 struct flow recv_flow
;
4060 error
= xlate_receive(ofproto
->backer
, NULL
, subfacet
->key
,
4061 subfacet
->key_len
, &recv_flow
, NULL
,
4062 &recv_ofproto
, NULL
, NULL
, NULL
, NULL
);
4064 || recv_ofproto
!= ofproto
4065 || facet
!= facet_find(ofproto
, &recv_flow
)) {
4066 facet_remove(facet
);
4071 flow_wildcards_init_catchall(&wc
);
4072 rule_dpif_lookup(ofproto
, &facet
->flow
, &wc
, &new_rule
);
4074 /* Calculate new datapath actions.
4076 * We do not modify any 'facet' state yet, because we might need to, e.g.,
4077 * emit a NetFlow expiration and, if so, we need to have the old state
4078 * around to properly compose it. */
4079 xlate_in_init(&xin
, ofproto
, &facet
->flow
, new_rule
, 0, NULL
);
4080 xlate_actions(&xin
, &xout
);
4081 flow_wildcards_or(&xout
.wc
, &xout
.wc
, &wc
);
4082 /* Make sure non -packet fields are not masked. If not cleared,
4083 * the memcmp() below may fail, causing an otherwise valid facet
4085 flow_wildcards_clear_non_packet_fields(&xout
.wc
);
4087 /* A facet's slow path reason should only change under dramatic
4088 * circumstances. Rather than try to update everything, it's simpler to
4089 * remove the facet and start over.
4091 * More importantly, if a facet's wildcards change, it will be relatively
4092 * difficult to figure out if its subfacets still belong to it, and if not
4093 * which facet they may belong to. Again, to avoid the complexity, we
4094 * simply give up instead. */
4095 if (facet
->xout
.slow
!= xout
.slow
4096 || memcmp(&facet
->xout
.wc
, &xout
.wc
, sizeof xout
.wc
)) {
4097 facet_remove(facet
);
4098 xlate_out_uninit(&xout
);
4099 rule_dpif_unref(new_rule
);
4103 if (!ofpbuf_equal(&facet
->xout
.odp_actions
, &xout
.odp_actions
)) {
4104 LIST_FOR_EACH(subfacet
, list_node
, &facet
->subfacets
) {
4105 if (subfacet
->path
== SF_FAST_PATH
) {
4106 struct dpif_flow_stats stats
;
4108 subfacet_install(subfacet
, &xout
.odp_actions
, &stats
);
4109 subfacet_update_stats(subfacet
, &stats
);
4113 facet_flush_stats(facet
);
4115 ofpbuf_clear(&facet
->xout
.odp_actions
);
4116 ofpbuf_put(&facet
->xout
.odp_actions
, xout
.odp_actions
.data
,
4117 xout
.odp_actions
.size
);
4120 /* Update 'facet' now that we've taken care of all the old state. */
4121 facet
->xout
.slow
= xout
.slow
;
4122 facet
->xout
.has_learn
= xout
.has_learn
;
4123 facet
->xout
.has_normal
= xout
.has_normal
;
4124 facet
->xout
.has_fin_timeout
= xout
.has_fin_timeout
;
4125 facet
->xout
.nf_output_iface
= xout
.nf_output_iface
;
4126 facet
->xout
.mirrors
= xout
.mirrors
;
4128 ovs_mutex_lock(&new_rule
->up
.mutex
);
4129 facet
->used
= MAX(facet
->used
, new_rule
->up
.created
);
4130 ovs_mutex_unlock(&new_rule
->up
.mutex
);
4132 xlate_out_uninit(&xout
);
4133 rule_dpif_unref(new_rule
);
4138 facet_reset_counters(struct facet
*facet
)
4140 facet
->packet_count
= 0;
4141 facet
->byte_count
= 0;
4142 facet
->prev_packet_count
= 0;
4143 facet
->prev_byte_count
= 0;
4147 flow_push_stats(struct ofproto_dpif
*ofproto
, struct flow
*flow
,
4148 struct dpif_flow_stats
*stats
, bool may_learn
)
4150 struct xlate_in xin
;
4152 xlate_in_init(&xin
, ofproto
, flow
, NULL
, stats
->tcp_flags
, NULL
);
4153 xin
.resubmit_stats
= stats
;
4154 xin
.may_learn
= may_learn
;
4155 xlate_actions_for_side_effects(&xin
);
4159 facet_push_stats(struct facet
*facet
, bool may_learn
)
4161 struct dpif_flow_stats stats
;
4163 ovs_assert(facet
->packet_count
>= facet
->prev_packet_count
);
4164 ovs_assert(facet
->byte_count
>= facet
->prev_byte_count
);
4165 ovs_assert(facet
->used
>= facet
->prev_used
);
4167 stats
.n_packets
= facet
->packet_count
- facet
->prev_packet_count
;
4168 stats
.n_bytes
= facet
->byte_count
- facet
->prev_byte_count
;
4169 stats
.used
= facet
->used
;
4170 stats
.tcp_flags
= facet
->tcp_flags
;
4172 if (may_learn
|| stats
.n_packets
|| facet
->used
> facet
->prev_used
) {
4173 facet
->prev_packet_count
= facet
->packet_count
;
4174 facet
->prev_byte_count
= facet
->byte_count
;
4175 facet
->prev_used
= facet
->used
;
4176 flow_push_stats(facet
->ofproto
, &facet
->flow
, &stats
, may_learn
);
4181 push_all_stats(void)
4183 static long long int rl
= LLONG_MIN
;
4184 struct ofproto_dpif
*ofproto
;
4186 if (time_msec() < rl
) {
4190 HMAP_FOR_EACH (ofproto
, all_ofproto_dpifs_node
, &all_ofproto_dpifs
) {
4191 struct cls_cursor cursor
;
4192 struct facet
*facet
;
4194 ovs_rwlock_rdlock(&ofproto
->facets
.rwlock
);
4195 cls_cursor_init(&cursor
, &ofproto
->facets
, NULL
);
4196 CLS_CURSOR_FOR_EACH (facet
, cr
, &cursor
) {
4197 facet_push_stats(facet
, false);
4199 ovs_rwlock_unlock(&ofproto
->facets
.rwlock
);
4202 rl
= time_msec() + 100;
4206 rule_dpif_credit_stats(struct rule_dpif
*rule
,
4207 const struct dpif_flow_stats
*stats
)
4209 ovs_mutex_lock(&rule
->stats_mutex
);
4210 rule
->packet_count
+= stats
->n_packets
;
4211 rule
->byte_count
+= stats
->n_bytes
;
4212 rule
->up
.used
= MAX(rule
->up
.used
, stats
->used
);
4213 ovs_mutex_unlock(&rule
->stats_mutex
);
4217 rule_dpif_is_fail_open(const struct rule_dpif
*rule
)
4219 return is_fail_open_rule(&rule
->up
);
4223 rule_dpif_is_table_miss(const struct rule_dpif
*rule
)
4225 return rule_is_table_miss(&rule
->up
);
4229 rule_dpif_get_flow_cookie(const struct rule_dpif
*rule
)
4230 OVS_REQUIRES(rule
->up
.mutex
)
4232 return rule
->up
.flow_cookie
;
4236 rule_dpif_reduce_timeouts(struct rule_dpif
*rule
, uint16_t idle_timeout
,
4237 uint16_t hard_timeout
)
4239 ofproto_rule_reduce_timeouts(&rule
->up
, idle_timeout
, hard_timeout
);
4242 /* Returns 'rule''s actions. The caller owns a reference on the returned
4243 * actions and must eventually release it (with rule_actions_unref()) to avoid
4245 struct rule_actions
*
4246 rule_dpif_get_actions(const struct rule_dpif
*rule
)
4248 return rule_get_actions(&rule
->up
);
4253 static struct subfacet
*
4254 subfacet_find(struct dpif_backer
*backer
, const struct nlattr
*key
,
4255 size_t key_len
, uint32_t key_hash
)
4257 struct subfacet
*subfacet
;
4259 HMAP_FOR_EACH_WITH_HASH (subfacet
, hmap_node
, key_hash
,
4260 &backer
->subfacets
) {
4261 if (subfacet
->key_len
== key_len
4262 && !memcmp(key
, subfacet
->key
, key_len
)) {
4270 /* Creates and returns a new subfacet within 'facet' for the flow in 'miss'.
4271 * 'key_hash' must be a hash over miss->key. The caller must have already
4272 * ensured that no subfacet subfacet already exists. */
4273 static struct subfacet
*
4274 subfacet_create(struct facet
*facet
, struct flow_miss
*miss
, uint32_t key_hash
)
4276 struct dpif_backer
*backer
= miss
->ofproto
->backer
;
4277 const struct nlattr
*key
= miss
->key
;
4278 size_t key_len
= miss
->key_len
;
4279 struct subfacet
*subfacet
;
4281 subfacet
= (list_is_empty(&facet
->subfacets
)
4282 ? &facet
->one_subfacet
4283 : xmalloc(sizeof *subfacet
));
4285 COVERAGE_INC(subfacet_create
);
4286 hmap_insert(&backer
->subfacets
, &subfacet
->hmap_node
, key_hash
);
4287 list_push_back(&facet
->subfacets
, &subfacet
->list_node
);
4288 subfacet
->facet
= facet
;
4289 subfacet
->key
= xmemdup(key
, key_len
);
4290 subfacet
->key_len
= key_len
;
4291 subfacet
->used
= miss
->stats
.used
;
4292 subfacet
->created
= subfacet
->used
;
4293 subfacet
->dp_packet_count
= 0;
4294 subfacet
->dp_byte_count
= 0;
4295 subfacet
->path
= SF_NOT_INSTALLED
;
4296 subfacet
->backer
= backer
;
4301 /* Uninstalls 'subfacet' from the datapath, if it is installed, removes it from
4302 * its facet within 'ofproto', and frees it. */
4304 subfacet_destroy__(struct subfacet
*subfacet
)
4306 struct facet
*facet
= subfacet
->facet
;
4308 COVERAGE_INC(subfacet_destroy
);
4309 subfacet_uninstall(subfacet
);
4310 hmap_remove(&subfacet
->backer
->subfacets
, &subfacet
->hmap_node
);
4311 list_remove(&subfacet
->list_node
);
4312 free(subfacet
->key
);
4313 if (subfacet
!= &facet
->one_subfacet
) {
4318 /* Destroys 'subfacet', as with subfacet_destroy__(), and then if this was the
4319 * last remaining subfacet in its facet destroys the facet too. */
4321 subfacet_destroy(struct subfacet
*subfacet
)
4323 struct facet
*facet
= subfacet
->facet
;
4325 if (list_is_singleton(&facet
->subfacets
)) {
4326 /* facet_remove() needs at least one subfacet (it will remove it). */
4327 facet_remove(facet
);
4329 subfacet_destroy__(subfacet
);
4334 subfacet_destroy_batch(struct dpif_backer
*backer
,
4335 struct subfacet
**subfacets
, int n
)
4337 struct dpif_op ops
[SUBFACET_DESTROY_MAX_BATCH
];
4338 struct dpif_op
*opsp
[SUBFACET_DESTROY_MAX_BATCH
];
4339 struct dpif_flow_stats stats
[SUBFACET_DESTROY_MAX_BATCH
];
4342 for (i
= 0; i
< n
; i
++) {
4343 ops
[i
].type
= DPIF_OP_FLOW_DEL
;
4344 ops
[i
].u
.flow_del
.key
= subfacets
[i
]->key
;
4345 ops
[i
].u
.flow_del
.key_len
= subfacets
[i
]->key_len
;
4346 ops
[i
].u
.flow_del
.stats
= &stats
[i
];
4350 dpif_operate(backer
->dpif
, opsp
, n
);
4351 for (i
= 0; i
< n
; i
++) {
4352 subfacet_reset_dp_stats(subfacets
[i
], &stats
[i
]);
4353 subfacets
[i
]->path
= SF_NOT_INSTALLED
;
4354 subfacet_destroy(subfacets
[i
]);
4358 /* Updates 'subfacet''s datapath flow, setting its actions to 'actions_len'
4359 * bytes of actions in 'actions'. If 'stats' is non-null, statistics counters
4360 * in the datapath will be zeroed and 'stats' will be updated with traffic new
4361 * since 'subfacet' was last updated.
4363 * Returns 0 if successful, otherwise a positive errno value. */
4365 subfacet_install(struct subfacet
*subfacet
, const struct ofpbuf
*odp_actions
,
4366 struct dpif_flow_stats
*stats
)
4368 struct facet
*facet
= subfacet
->facet
;
4369 enum subfacet_path path
= facet
->xout
.slow
? SF_SLOW_PATH
: SF_FAST_PATH
;
4370 const struct nlattr
*actions
= odp_actions
->data
;
4371 size_t actions_len
= odp_actions
->size
;
4372 struct odputil_keybuf maskbuf
;
4375 uint64_t slow_path_stub
[128 / 8];
4376 enum dpif_flow_put_flags flags
;
4379 flags
= subfacet
->path
== SF_NOT_INSTALLED
? DPIF_FP_CREATE
4382 flags
|= DPIF_FP_ZERO_STATS
;
4385 if (path
== SF_SLOW_PATH
) {
4386 compose_slow_path(facet
->ofproto
, &facet
->flow
, facet
->xout
.slow
,
4387 slow_path_stub
, sizeof slow_path_stub
,
4388 &actions
, &actions_len
);
4391 ofpbuf_use_stack(&mask
, &maskbuf
, sizeof maskbuf
);
4392 if (enable_megaflows
) {
4393 odp_flow_key_from_mask(&mask
, &facet
->xout
.wc
.masks
,
4394 &facet
->flow
, UINT32_MAX
);
4397 ret
= dpif_flow_put(subfacet
->backer
->dpif
, flags
, subfacet
->key
,
4398 subfacet
->key_len
, mask
.data
, mask
.size
,
4399 actions
, actions_len
, stats
);
4402 subfacet_reset_dp_stats(subfacet
, stats
);
4406 COVERAGE_INC(subfacet_install_fail
);
4408 subfacet
->path
= path
;
4413 /* If 'subfacet' is installed in the datapath, uninstalls it. */
4415 subfacet_uninstall(struct subfacet
*subfacet
)
4417 if (subfacet
->path
!= SF_NOT_INSTALLED
) {
4418 struct ofproto_dpif
*ofproto
= subfacet
->facet
->ofproto
;
4419 struct dpif_flow_stats stats
;
4422 error
= dpif_flow_del(ofproto
->backer
->dpif
, subfacet
->key
,
4423 subfacet
->key_len
, &stats
);
4424 subfacet_reset_dp_stats(subfacet
, &stats
);
4426 subfacet_update_stats(subfacet
, &stats
);
4428 subfacet
->path
= SF_NOT_INSTALLED
;
4430 ovs_assert(subfacet
->dp_packet_count
== 0);
4431 ovs_assert(subfacet
->dp_byte_count
== 0);
4435 /* Resets 'subfacet''s datapath statistics counters. This should be called
4436 * when 'subfacet''s statistics are cleared in the datapath. If 'stats' is
4437 * non-null, it should contain the statistics returned by dpif when 'subfacet'
4438 * was reset in the datapath. 'stats' will be modified to include only
4439 * statistics new since 'subfacet' was last updated. */
4441 subfacet_reset_dp_stats(struct subfacet
*subfacet
,
4442 struct dpif_flow_stats
*stats
)
4445 && subfacet
->dp_packet_count
<= stats
->n_packets
4446 && subfacet
->dp_byte_count
<= stats
->n_bytes
) {
4447 stats
->n_packets
-= subfacet
->dp_packet_count
;
4448 stats
->n_bytes
-= subfacet
->dp_byte_count
;
4451 subfacet
->dp_packet_count
= 0;
4452 subfacet
->dp_byte_count
= 0;
4455 /* Folds the statistics from 'stats' into the counters in 'subfacet'.
4457 * Because of the meaning of a subfacet's counters, it only makes sense to do
4458 * this if 'stats' are not tracked in the datapath, that is, if 'stats'
4459 * represents a packet that was sent by hand or if it represents statistics
4460 * that have been cleared out of the datapath. */
4462 subfacet_update_stats(struct subfacet
*subfacet
,
4463 const struct dpif_flow_stats
*stats
)
4465 if (stats
->n_packets
|| stats
->used
> subfacet
->used
) {
4466 struct facet
*facet
= subfacet
->facet
;
4468 subfacet
->used
= MAX(subfacet
->used
, stats
->used
);
4469 facet
->used
= MAX(facet
->used
, stats
->used
);
4470 facet
->packet_count
+= stats
->n_packets
;
4471 facet
->byte_count
+= stats
->n_bytes
;
4472 facet
->tcp_flags
|= stats
->tcp_flags
;
4478 /* Lookup 'flow' in 'ofproto''s classifier. If 'wc' is non-null, sets
4479 * the fields that were relevant as part of the lookup. */
4481 rule_dpif_lookup(struct ofproto_dpif
*ofproto
, const struct flow
*flow
,
4482 struct flow_wildcards
*wc
, struct rule_dpif
**rule
)
4484 struct ofport_dpif
*port
;
4486 if (rule_dpif_lookup_in_table(ofproto
, flow
, wc
, 0, rule
)) {
4489 port
= get_ofp_port(ofproto
, flow
->in_port
.ofp_port
);
4491 VLOG_WARN_RL(&rl
, "packet-in on unknown OpenFlow port %"PRIu16
,
4492 flow
->in_port
.ofp_port
);
4495 choose_miss_rule(port
? port
->up
.pp
.config
: 0, ofproto
->miss_rule
,
4496 ofproto
->no_packet_in_rule
, rule
);
4500 rule_dpif_lookup_in_table(struct ofproto_dpif
*ofproto
,
4501 const struct flow
*flow
, struct flow_wildcards
*wc
,
4502 uint8_t table_id
, struct rule_dpif
**rule
)
4504 const struct cls_rule
*cls_rule
;
4505 struct classifier
*cls
;
4509 if (table_id
>= N_TABLES
) {
4514 memset(&wc
->masks
.dl_type
, 0xff, sizeof wc
->masks
.dl_type
);
4515 wc
->masks
.nw_frag
|= FLOW_NW_FRAG_MASK
;
4518 cls
= &ofproto
->up
.tables
[table_id
].cls
;
4519 ovs_rwlock_rdlock(&cls
->rwlock
);
4520 frag
= (flow
->nw_frag
& FLOW_NW_FRAG_ANY
) != 0;
4521 if (frag
&& ofproto
->up
.frag_handling
== OFPC_FRAG_NORMAL
) {
4522 /* We must pretend that transport ports are unavailable. */
4523 struct flow ofpc_normal_flow
= *flow
;
4524 ofpc_normal_flow
.tp_src
= htons(0);
4525 ofpc_normal_flow
.tp_dst
= htons(0);
4526 cls_rule
= classifier_lookup(cls
, &ofpc_normal_flow
, wc
);
4527 } else if (frag
&& ofproto
->up
.frag_handling
== OFPC_FRAG_DROP
) {
4528 cls_rule
= &ofproto
->drop_frags_rule
->up
.cr
;
4529 /* Frag mask in wc already set above. */
4531 cls_rule
= classifier_lookup(cls
, flow
, wc
);
4534 *rule
= rule_dpif_cast(rule_from_cls_rule(cls_rule
));
4535 rule_dpif_ref(*rule
);
4536 ovs_rwlock_unlock(&cls
->rwlock
);
4538 return *rule
!= NULL
;
4541 /* Given a port configuration (specified as zero if there's no port), chooses
4542 * which of 'miss_rule' and 'no_packet_in_rule' should be used in case of a
4543 * flow table miss. */
4545 choose_miss_rule(enum ofputil_port_config config
, struct rule_dpif
*miss_rule
,
4546 struct rule_dpif
*no_packet_in_rule
, struct rule_dpif
**rule
)
4548 *rule
= config
& OFPUTIL_PC_NO_PACKET_IN
? no_packet_in_rule
: miss_rule
;
4549 rule_dpif_ref(*rule
);
4553 rule_dpif_ref(struct rule_dpif
*rule
)
4556 ofproto_rule_ref(&rule
->up
);
4561 rule_dpif_unref(struct rule_dpif
*rule
)
4564 ofproto_rule_unref(&rule
->up
);
4569 complete_operation(struct rule_dpif
*rule
)
4570 OVS_REQUIRES(ofproto_mutex
)
4572 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(rule
->up
.ofproto
);
4574 ofproto
->backer
->need_revalidate
= REV_FLOW_TABLE
;
4575 ofoperation_complete(rule
->up
.pending
, 0);
4578 static struct rule_dpif
*rule_dpif_cast(const struct rule
*rule
)
4580 return rule
? CONTAINER_OF(rule
, struct rule_dpif
, up
) : NULL
;
4583 static struct rule
*
4586 struct rule_dpif
*rule
= xmalloc(sizeof *rule
);
4591 rule_dealloc(struct rule
*rule_
)
4593 struct rule_dpif
*rule
= rule_dpif_cast(rule_
);
4598 rule_construct(struct rule
*rule_
)
4600 struct rule_dpif
*rule
= rule_dpif_cast(rule_
);
4601 ovs_mutex_init(&rule
->stats_mutex
);
4602 ovs_mutex_lock(&rule
->stats_mutex
);
4603 rule
->packet_count
= 0;
4604 rule
->byte_count
= 0;
4605 ovs_mutex_unlock(&rule
->stats_mutex
);
4610 rule_insert(struct rule
*rule_
)
4611 OVS_REQUIRES(ofproto_mutex
)
4613 struct rule_dpif
*rule
= rule_dpif_cast(rule_
);
4614 complete_operation(rule
);
4618 rule_delete(struct rule
*rule_
)
4619 OVS_REQUIRES(ofproto_mutex
)
4621 struct rule_dpif
*rule
= rule_dpif_cast(rule_
);
4622 complete_operation(rule
);
4626 rule_destruct(struct rule
*rule_
)
4628 struct rule_dpif
*rule
= rule_dpif_cast(rule_
);
4629 ovs_mutex_destroy(&rule
->stats_mutex
);
4633 rule_get_stats(struct rule
*rule_
, uint64_t *packets
, uint64_t *bytes
)
4635 struct rule_dpif
*rule
= rule_dpif_cast(rule_
);
4639 /* Start from historical data for 'rule' itself that are no longer tracked
4640 * in facets. This counts, for example, facets that have expired. */
4641 ovs_mutex_lock(&rule
->stats_mutex
);
4642 *packets
= rule
->packet_count
;
4643 *bytes
= rule
->byte_count
;
4644 ovs_mutex_unlock(&rule
->stats_mutex
);
4648 rule_dpif_execute(struct rule_dpif
*rule
, const struct flow
*flow
,
4649 struct ofpbuf
*packet
)
4651 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(rule
->up
.ofproto
);
4653 ofproto_dpif_execute_actions(ofproto
, flow
, rule
, NULL
, 0, packet
);
4657 rule_execute(struct rule
*rule
, const struct flow
*flow
,
4658 struct ofpbuf
*packet
)
4660 rule_dpif_execute(rule_dpif_cast(rule
), flow
, packet
);
4661 ofpbuf_delete(packet
);
4666 rule_modify_actions(struct rule
*rule_
, bool reset_counters
)
4667 OVS_REQUIRES(ofproto_mutex
)
4669 struct rule_dpif
*rule
= rule_dpif_cast(rule_
);
4671 if (reset_counters
) {
4672 ovs_mutex_lock(&rule
->stats_mutex
);
4673 rule
->packet_count
= 0;
4674 rule
->byte_count
= 0;
4675 ovs_mutex_unlock(&rule
->stats_mutex
);
4678 complete_operation(rule
);
4681 static struct group_dpif
*group_dpif_cast(const struct ofgroup
*group
)
4683 return group
? CONTAINER_OF(group
, struct group_dpif
, up
) : NULL
;
4686 static struct ofgroup
*
4689 struct group_dpif
*group
= xzalloc(sizeof *group
);
4694 group_dealloc(struct ofgroup
*group_
)
4696 struct group_dpif
*group
= group_dpif_cast(group_
);
4701 group_construct_stats(struct group_dpif
*group
)
4702 OVS_REQUIRES(group
->stats_mutex
)
4704 group
->packet_count
= 0;
4705 group
->byte_count
= 0;
4706 if (!group
->bucket_stats
) {
4707 group
->bucket_stats
= xcalloc(group
->up
.n_buckets
,
4708 sizeof *group
->bucket_stats
);
4710 memset(group
->bucket_stats
, 0, group
->up
.n_buckets
*
4711 sizeof *group
->bucket_stats
);
4716 group_construct(struct ofgroup
*group_
)
4718 struct group_dpif
*group
= group_dpif_cast(group_
);
4719 ovs_mutex_init(&group
->stats_mutex
);
4720 ovs_mutex_lock(&group
->stats_mutex
);
4721 group_construct_stats(group
);
4722 ovs_mutex_unlock(&group
->stats_mutex
);
4727 group_destruct__(struct group_dpif
*group
)
4728 OVS_REQUIRES(group
->stats_mutex
)
4730 free(group
->bucket_stats
);
4731 group
->bucket_stats
= NULL
;
4735 group_destruct(struct ofgroup
*group_
)
4737 struct group_dpif
*group
= group_dpif_cast(group_
);
4738 ovs_mutex_lock(&group
->stats_mutex
);
4739 group_destruct__(group
);
4740 ovs_mutex_unlock(&group
->stats_mutex
);
4741 ovs_mutex_destroy(&group
->stats_mutex
);
4745 group_modify(struct ofgroup
*group_
, struct ofgroup
*victim_
)
4747 struct group_dpif
*group
= group_dpif_cast(group_
);
4748 struct group_dpif
*victim
= group_dpif_cast(victim_
);
4750 ovs_mutex_lock(&group
->stats_mutex
);
4751 if (victim
->up
.n_buckets
< group
->up
.n_buckets
) {
4752 group_destruct__(group
);
4754 group_construct_stats(group
);
4755 ovs_mutex_unlock(&group
->stats_mutex
);
4761 group_get_stats(const struct ofgroup
*group_
, struct ofputil_group_stats
*ogs
)
4763 struct group_dpif
*group
= group_dpif_cast(group_
);
4765 /* Start from historical data for 'group' itself that are no longer tracked
4766 * in facets. This counts, for example, facets that have expired. */
4767 ovs_mutex_lock(&group
->stats_mutex
);
4768 ogs
->packet_count
= group
->packet_count
;
4769 ogs
->byte_count
= group
->byte_count
;
4770 memcpy(ogs
->bucket_stats
, group
->bucket_stats
,
4771 group
->up
.n_buckets
* sizeof *group
->bucket_stats
);
4772 ovs_mutex_unlock(&group
->stats_mutex
);
4778 group_dpif_lookup(struct ofproto_dpif
*ofproto
, uint32_t group_id
,
4779 struct group_dpif
**group
)
4780 OVS_TRY_RDLOCK(true, (*group
)->up
.rwlock
)
4782 struct ofgroup
*ofgroup
;
4786 found
= ofproto_group_lookup(&ofproto
->up
, group_id
, &ofgroup
);
4787 *group
= found
? group_dpif_cast(ofgroup
) : NULL
;
4793 group_dpif_release(struct group_dpif
*group
)
4794 OVS_RELEASES(group
->up
.rwlock
)
4796 ofproto_group_release(&group
->up
);
4800 group_dpif_get_buckets(const struct group_dpif
*group
,
4801 const struct list
**buckets
)
4803 *buckets
= &group
->up
.buckets
;
4806 enum ofp11_group_type
4807 group_dpif_get_type(const struct group_dpif
*group
)
4809 return group
->up
.type
;
4812 /* Sends 'packet' out 'ofport'.
4813 * May modify 'packet'.
4814 * Returns 0 if successful, otherwise a positive errno value. */
4816 ofproto_dpif_send_packet(const struct ofport_dpif
*ofport
, struct ofpbuf
*packet
)
4818 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofport
->up
.ofproto
);
4821 error
= xlate_send_packet(ofport
, packet
);
4823 ovs_mutex_lock(&ofproto
->stats_mutex
);
4824 ofproto
->stats
.tx_packets
++;
4825 ofproto
->stats
.tx_bytes
+= packet
->size
;
4826 ovs_mutex_unlock(&ofproto
->stats_mutex
);
4830 /* Composes an ODP action for a "slow path" action for 'flow' within 'ofproto'.
4831 * The action will state 'slow' as the reason that the action is in the slow
4832 * path. (This is purely informational: it allows a human viewing "ovs-dpctl
4833 * dump-flows" output to see why a flow is in the slow path.)
4835 * The 'stub_size' bytes in 'stub' will be used to store the action.
4836 * 'stub_size' must be large enough for the action.
4838 * The action and its size will be stored in '*actionsp' and '*actions_lenp',
4841 compose_slow_path(const struct ofproto_dpif
*ofproto
, const struct flow
*flow
,
4842 enum slow_path_reason slow
,
4843 uint64_t *stub
, size_t stub_size
,
4844 const struct nlattr
**actionsp
, size_t *actions_lenp
)
4846 union user_action_cookie cookie
;
4849 cookie
.type
= USER_ACTION_COOKIE_SLOW_PATH
;
4850 cookie
.slow_path
.unused
= 0;
4851 cookie
.slow_path
.reason
= slow
;
4853 ofpbuf_use_stack(&buf
, stub
, stub_size
);
4854 if (slow
& (SLOW_CFM
| SLOW_BFD
| SLOW_LACP
| SLOW_STP
)) {
4855 uint32_t pid
= dpif_port_get_pid(ofproto
->backer
->dpif
,
4857 odp_put_userspace_action(pid
, &cookie
, sizeof cookie
.slow_path
, &buf
);
4859 odp_port_t odp_port
;
4862 odp_port
= ofp_port_to_odp_port(ofproto
, flow
->in_port
.ofp_port
);
4863 pid
= dpif_port_get_pid(ofproto
->backer
->dpif
, odp_port
);
4864 odp_put_userspace_action(pid
, &cookie
, sizeof cookie
.slow_path
, &buf
);
4866 *actionsp
= buf
.data
;
4867 *actions_lenp
= buf
.size
;
4871 set_frag_handling(struct ofproto
*ofproto_
,
4872 enum ofp_config_flags frag_handling
)
4874 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
4875 if (frag_handling
!= OFPC_FRAG_REASM
) {
4876 ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
4884 packet_out(struct ofproto
*ofproto_
, struct ofpbuf
*packet
,
4885 const struct flow
*flow
,
4886 const struct ofpact
*ofpacts
, size_t ofpacts_len
)
4888 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
4890 ofproto_dpif_execute_actions(ofproto
, flow
, NULL
, ofpacts
,
4891 ofpacts_len
, packet
);
4898 set_netflow(struct ofproto
*ofproto_
,
4899 const struct netflow_options
*netflow_options
)
4901 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
4903 if (netflow_options
) {
4904 if (!ofproto
->netflow
) {
4905 ofproto
->netflow
= netflow_create();
4906 ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
4908 return netflow_set_options(ofproto
->netflow
, netflow_options
);
4909 } else if (ofproto
->netflow
) {
4910 ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
4911 netflow_unref(ofproto
->netflow
);
4912 ofproto
->netflow
= NULL
;
4919 get_netflow_ids(const struct ofproto
*ofproto_
,
4920 uint8_t *engine_type
, uint8_t *engine_id
)
4922 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofproto_
);
4924 dpif_get_netflow_ids(ofproto
->backer
->dpif
, engine_type
, engine_id
);
4927 static struct ofproto_dpif
*
4928 ofproto_dpif_lookup(const char *name
)
4930 struct ofproto_dpif
*ofproto
;
4932 HMAP_FOR_EACH_WITH_HASH (ofproto
, all_ofproto_dpifs_node
,
4933 hash_string(name
, 0), &all_ofproto_dpifs
) {
4934 if (!strcmp(ofproto
->up
.name
, name
)) {
4942 ofproto_unixctl_fdb_flush(struct unixctl_conn
*conn
, int argc
,
4943 const char *argv
[], void *aux OVS_UNUSED
)
4945 struct ofproto_dpif
*ofproto
;
4948 ofproto
= ofproto_dpif_lookup(argv
[1]);
4950 unixctl_command_reply_error(conn
, "no such bridge");
4953 ovs_rwlock_wrlock(&ofproto
->ml
->rwlock
);
4954 mac_learning_flush(ofproto
->ml
);
4955 ovs_rwlock_unlock(&ofproto
->ml
->rwlock
);
4957 HMAP_FOR_EACH (ofproto
, all_ofproto_dpifs_node
, &all_ofproto_dpifs
) {
4958 ovs_rwlock_wrlock(&ofproto
->ml
->rwlock
);
4959 mac_learning_flush(ofproto
->ml
);
4960 ovs_rwlock_unlock(&ofproto
->ml
->rwlock
);
4964 unixctl_command_reply(conn
, "table successfully flushed");
4967 static struct ofport_dpif
*
4968 ofbundle_get_a_port(const struct ofbundle
*bundle
)
4970 return CONTAINER_OF(list_front(&bundle
->ports
), struct ofport_dpif
,
4975 ofproto_unixctl_fdb_show(struct unixctl_conn
*conn
, int argc OVS_UNUSED
,
4976 const char *argv
[], void *aux OVS_UNUSED
)
4978 struct ds ds
= DS_EMPTY_INITIALIZER
;
4979 const struct ofproto_dpif
*ofproto
;
4980 const struct mac_entry
*e
;
4982 ofproto
= ofproto_dpif_lookup(argv
[1]);
4984 unixctl_command_reply_error(conn
, "no such bridge");
4988 ds_put_cstr(&ds
, " port VLAN MAC Age\n");
4989 ovs_rwlock_rdlock(&ofproto
->ml
->rwlock
);
4990 LIST_FOR_EACH (e
, lru_node
, &ofproto
->ml
->lrus
) {
4991 struct ofbundle
*bundle
= e
->port
.p
;
4992 char name
[OFP_MAX_PORT_NAME_LEN
];
4994 ofputil_port_to_string(ofbundle_get_a_port(bundle
)->up
.ofp_port
,
4996 ds_put_format(&ds
, "%5s %4d "ETH_ADDR_FMT
" %3d\n",
4997 name
, e
->vlan
, ETH_ADDR_ARGS(e
->mac
),
4998 mac_entry_age(ofproto
->ml
, e
));
5000 ovs_rwlock_unlock(&ofproto
->ml
->rwlock
);
5001 unixctl_command_reply(conn
, ds_cstr(&ds
));
5006 struct xlate_out xout
;
5007 struct xlate_in xin
;
5013 trace_format_rule(struct ds
*result
, int level
, const struct rule_dpif
*rule
)
5015 struct rule_actions
*actions
;
5018 ds_put_char_multiple(result
, '\t', level
);
5020 ds_put_cstr(result
, "No match\n");
5024 ovs_mutex_lock(&rule
->up
.mutex
);
5025 cookie
= rule
->up
.flow_cookie
;
5026 ovs_mutex_unlock(&rule
->up
.mutex
);
5028 ds_put_format(result
, "Rule: table=%"PRIu8
" cookie=%#"PRIx64
" ",
5029 rule
? rule
->up
.table_id
: 0, ntohll(cookie
));
5030 cls_rule_format(&rule
->up
.cr
, result
);
5031 ds_put_char(result
, '\n');
5033 actions
= rule_dpif_get_actions(rule
);
5035 ds_put_char_multiple(result
, '\t', level
);
5036 ds_put_cstr(result
, "OpenFlow actions=");
5037 ofpacts_format(actions
->ofpacts
, actions
->ofpacts_len
, result
);
5038 ds_put_char(result
, '\n');
5040 rule_actions_unref(actions
);
5044 trace_format_flow(struct ds
*result
, int level
, const char *title
,
5045 struct trace_ctx
*trace
)
5047 ds_put_char_multiple(result
, '\t', level
);
5048 ds_put_format(result
, "%s: ", title
);
5049 if (flow_equal(&trace
->xin
.flow
, &trace
->flow
)) {
5050 ds_put_cstr(result
, "unchanged");
5052 flow_format(result
, &trace
->xin
.flow
);
5053 trace
->flow
= trace
->xin
.flow
;
5055 ds_put_char(result
, '\n');
5059 trace_format_regs(struct ds
*result
, int level
, const char *title
,
5060 struct trace_ctx
*trace
)
5064 ds_put_char_multiple(result
, '\t', level
);
5065 ds_put_format(result
, "%s:", title
);
5066 for (i
= 0; i
< FLOW_N_REGS
; i
++) {
5067 ds_put_format(result
, " reg%"PRIuSIZE
"=0x%"PRIx32
, i
, trace
->flow
.regs
[i
]);
5069 ds_put_char(result
, '\n');
5073 trace_format_odp(struct ds
*result
, int level
, const char *title
,
5074 struct trace_ctx
*trace
)
5076 struct ofpbuf
*odp_actions
= &trace
->xout
.odp_actions
;
5078 ds_put_char_multiple(result
, '\t', level
);
5079 ds_put_format(result
, "%s: ", title
);
5080 format_odp_actions(result
, odp_actions
->data
, odp_actions
->size
);
5081 ds_put_char(result
, '\n');
5085 trace_resubmit(struct xlate_in
*xin
, struct rule_dpif
*rule
, int recurse
)
5087 struct trace_ctx
*trace
= CONTAINER_OF(xin
, struct trace_ctx
, xin
);
5088 struct ds
*result
= trace
->result
;
5090 ds_put_char(result
, '\n');
5091 trace_format_flow(result
, recurse
+ 1, "Resubmitted flow", trace
);
5092 trace_format_regs(result
, recurse
+ 1, "Resubmitted regs", trace
);
5093 trace_format_odp(result
, recurse
+ 1, "Resubmitted odp", trace
);
5094 trace_format_rule(result
, recurse
+ 1, rule
);
5098 trace_report(struct xlate_in
*xin
, const char *s
, int recurse
)
5100 struct trace_ctx
*trace
= CONTAINER_OF(xin
, struct trace_ctx
, xin
);
5101 struct ds
*result
= trace
->result
;
5103 ds_put_char_multiple(result
, '\t', recurse
);
5104 ds_put_cstr(result
, s
);
5105 ds_put_char(result
, '\n');
5108 /* Parses the 'argc' elements of 'argv', ignoring argv[0]. The following
5109 * forms are supported:
5111 * - [dpname] odp_flow [-generate | packet]
5112 * - bridge br_flow [-generate | packet]
5114 * On success, initializes '*ofprotop' and 'flow' and returns NULL. On failure
5115 * returns a nonnull malloced error message. */
5116 static char * WARN_UNUSED_RESULT
5117 parse_flow_and_packet(int argc
, const char *argv
[],
5118 struct ofproto_dpif
**ofprotop
, struct flow
*flow
,
5119 struct ofpbuf
**packetp
)
5121 const struct dpif_backer
*backer
= NULL
;
5122 const char *error
= NULL
;
5124 struct simap port_names
= SIMAP_INITIALIZER(&port_names
);
5125 struct ofpbuf
*packet
;
5126 struct ofpbuf odp_key
;
5127 struct ofpbuf odp_mask
;
5129 ofpbuf_init(&odp_key
, 0);
5130 ofpbuf_init(&odp_mask
, 0);
5132 /* Handle "-generate" or a hex string as the last argument. */
5133 if (!strcmp(argv
[argc
- 1], "-generate")) {
5134 packet
= ofpbuf_new(0);
5137 error
= eth_from_hex(argv
[argc
- 1], &packet
);
5140 } else if (argc
== 4) {
5141 /* The 3-argument form must end in "-generate' or a hex string. */
5147 /* odp_flow can have its in_port specified as a name instead of port no.
5148 * We do not yet know whether a given flow is a odp_flow or a br_flow.
5149 * But, to know whether a flow is odp_flow through odp_flow_from_string(),
5150 * we need to create a simap of name to port no. */
5152 const char *dp_type
;
5153 if (!strncmp(argv
[1], "ovs-", 4)) {
5154 dp_type
= argv
[1] + 4;
5158 backer
= shash_find_data(&all_dpif_backers
, dp_type
);
5159 } else if (argc
== 2) {
5160 struct shash_node
*node
;
5161 if (shash_count(&all_dpif_backers
) == 1) {
5162 node
= shash_first(&all_dpif_backers
);
5163 backer
= node
->data
;
5166 error
= "Syntax error";
5169 if (backer
&& backer
->dpif
) {
5170 struct dpif_port dpif_port
;
5171 struct dpif_port_dump port_dump
;
5172 DPIF_PORT_FOR_EACH (&dpif_port
, &port_dump
, backer
->dpif
) {
5173 simap_put(&port_names
, dpif_port
.name
,
5174 odp_to_u32(dpif_port
.port_no
));
5178 /* Parse the flow and determine whether a datapath or
5179 * bridge is specified. If function odp_flow_key_from_string()
5180 * returns 0, the flow is a odp_flow. If function
5181 * parse_ofp_exact_flow() returns NULL, the flow is a br_flow. */
5182 if (!odp_flow_from_string(argv
[argc
- 1], &port_names
,
5183 &odp_key
, &odp_mask
)) {
5185 error
= "Cannot find the datapath";
5189 if (xlate_receive(backer
, NULL
, odp_key
.data
, odp_key
.size
, flow
,
5190 NULL
, ofprotop
, NULL
, NULL
, NULL
, NULL
)) {
5191 error
= "Invalid datapath flow";
5195 char *err
= parse_ofp_exact_flow(flow
, NULL
, argv
[argc
- 1], NULL
);
5198 m_err
= xasprintf("Bad flow syntax: %s", err
);
5203 error
= "Must specify bridge name";
5207 *ofprotop
= ofproto_dpif_lookup(argv
[1]);
5209 error
= "Unknown bridge name";
5215 /* Generate a packet, if requested. */
5217 if (!packet
->size
) {
5218 flow_compose(packet
, flow
);
5220 union flow_in_port in_port
= flow
->in_port
;
5222 /* Use the metadata from the flow and the packet argument
5223 * to reconstruct the flow. */
5224 flow_extract(packet
, flow
->skb_priority
, flow
->pkt_mark
, NULL
,
5230 if (error
&& !m_err
) {
5231 m_err
= xstrdup(error
);
5234 ofpbuf_delete(packet
);
5238 ofpbuf_uninit(&odp_key
);
5239 ofpbuf_uninit(&odp_mask
);
5240 simap_destroy(&port_names
);
5245 ofproto_unixctl_trace(struct unixctl_conn
*conn
, int argc
, const char *argv
[],
5246 void *aux OVS_UNUSED
)
5248 struct ofproto_dpif
*ofproto
;
5249 struct ofpbuf
*packet
;
5253 error
= parse_flow_and_packet(argc
, argv
, &ofproto
, &flow
, &packet
);
5258 ofproto_trace(ofproto
, &flow
, packet
, NULL
, 0, &result
);
5259 unixctl_command_reply(conn
, ds_cstr(&result
));
5260 ds_destroy(&result
);
5261 ofpbuf_delete(packet
);
5263 unixctl_command_reply_error(conn
, error
);
5269 ofproto_unixctl_trace_actions(struct unixctl_conn
*conn
, int argc
,
5270 const char *argv
[], void *aux OVS_UNUSED
)
5272 enum ofputil_protocol usable_protocols
;
5273 struct ofproto_dpif
*ofproto
;
5274 bool enforce_consistency
;
5275 struct ofpbuf ofpacts
;
5276 struct ofpbuf
*packet
;
5281 /* Three kinds of error return values! */
5287 ofpbuf_init(&ofpacts
, 0);
5289 /* Parse actions. */
5290 error
= parse_ofpacts(argv
[--argc
], &ofpacts
, &usable_protocols
);
5292 unixctl_command_reply_error(conn
, error
);
5297 /* OpenFlow 1.1 and later suggest that the switch enforces certain forms of
5298 * consistency between the flow and the actions. With -consistent, we
5299 * enforce consistency even for a flow supported in OpenFlow 1.0. */
5300 if (!strcmp(argv
[1], "-consistent")) {
5301 enforce_consistency
= true;
5305 enforce_consistency
= false;
5308 error
= parse_flow_and_packet(argc
, argv
, &ofproto
, &flow
, &packet
);
5310 unixctl_command_reply_error(conn
, error
);
5315 /* Do the same checks as handle_packet_out() in ofproto.c.
5317 * We pass a 'table_id' of 0 to ofproto_check_ofpacts(), which isn't
5318 * strictly correct because these actions aren't in any table, but it's OK
5319 * because it 'table_id' is used only to check goto_table instructions, but
5320 * packet-outs take a list of actions and therefore it can't include
5323 * We skip the "meter" check here because meter is an instruction, not an
5324 * action, and thus cannot appear in ofpacts. */
5325 in_port
= ofp_to_u16(flow
.in_port
.ofp_port
);
5326 if (in_port
>= ofproto
->up
.max_ports
&& in_port
< ofp_to_u16(OFPP_MAX
)) {
5327 unixctl_command_reply_error(conn
, "invalid in_port");
5330 if (enforce_consistency
) {
5331 retval
= ofpacts_check_consistency(ofpacts
.data
, ofpacts
.size
, &flow
,
5332 u16_to_ofp(ofproto
->up
.max_ports
),
5333 0, 0, usable_protocols
);
5335 retval
= ofpacts_check(ofpacts
.data
, ofpacts
.size
, &flow
,
5336 u16_to_ofp(ofproto
->up
.max_ports
), 0, 0,
5342 ds_put_format(&result
, "Bad actions: %s", ofperr_to_string(retval
));
5343 unixctl_command_reply_error(conn
, ds_cstr(&result
));
5347 ofproto_trace(ofproto
, &flow
, packet
, ofpacts
.data
, ofpacts
.size
, &result
);
5348 unixctl_command_reply(conn
, ds_cstr(&result
));
5351 ds_destroy(&result
);
5352 ofpbuf_delete(packet
);
5353 ofpbuf_uninit(&ofpacts
);
5356 /* Implements a "trace" through 'ofproto''s flow table, appending a textual
5357 * description of the results to 'ds'.
5359 * The trace follows a packet with the specified 'flow' through the flow
5360 * table. 'packet' may be nonnull to trace an actual packet, with consequent
5361 * side effects (if it is nonnull then its flow must be 'flow').
5363 * If 'ofpacts' is nonnull then its 'ofpacts_len' bytes specify the actions to
5364 * trace, otherwise the actions are determined by a flow table lookup. */
5366 ofproto_trace(struct ofproto_dpif
*ofproto
, const struct flow
*flow
,
5367 const struct ofpbuf
*packet
,
5368 const struct ofpact ofpacts
[], size_t ofpacts_len
,
5371 struct rule_dpif
*rule
;
5372 struct flow_wildcards wc
;
5374 ds_put_format(ds
, "Bridge: %s\n", ofproto
->up
.name
);
5375 ds_put_cstr(ds
, "Flow: ");
5376 flow_format(ds
, flow
);
5377 ds_put_char(ds
, '\n');
5379 flow_wildcards_init_catchall(&wc
);
5383 rule_dpif_lookup(ofproto
, flow
, &wc
, &rule
);
5385 trace_format_rule(ds
, 0, rule
);
5386 if (rule
== ofproto
->miss_rule
) {
5387 ds_put_cstr(ds
, "\nNo match, flow generates \"packet in\"s.\n");
5388 } else if (rule
== ofproto
->no_packet_in_rule
) {
5389 ds_put_cstr(ds
, "\nNo match, packets dropped because "
5390 "OFPPC_NO_PACKET_IN is set on in_port.\n");
5391 } else if (rule
== ofproto
->drop_frags_rule
) {
5392 ds_put_cstr(ds
, "\nPackets dropped because they are IP fragments "
5393 "and the fragment handling mode is \"drop\".\n");
5397 if (rule
|| ofpacts
) {
5398 uint64_t odp_actions_stub
[1024 / 8];
5399 struct ofpbuf odp_actions
;
5400 struct trace_ctx trace
;
5404 tcp_flags
= packet
? packet_get_tcp_flags(packet
, flow
) : 0;
5407 ofpbuf_use_stub(&odp_actions
,
5408 odp_actions_stub
, sizeof odp_actions_stub
);
5409 xlate_in_init(&trace
.xin
, ofproto
, flow
, rule
, tcp_flags
, packet
);
5411 trace
.xin
.ofpacts
= ofpacts
;
5412 trace
.xin
.ofpacts_len
= ofpacts_len
;
5414 trace
.xin
.resubmit_hook
= trace_resubmit
;
5415 trace
.xin
.report_hook
= trace_report
;
5417 xlate_actions(&trace
.xin
, &trace
.xout
);
5418 flow_wildcards_or(&trace
.xout
.wc
, &trace
.xout
.wc
, &wc
);
5420 ds_put_char(ds
, '\n');
5421 trace_format_flow(ds
, 0, "Final flow", &trace
);
5423 match_init(&match
, flow
, &trace
.xout
.wc
);
5424 ds_put_cstr(ds
, "Relevant fields: ");
5425 match_format(&match
, ds
, OFP_DEFAULT_PRIORITY
);
5426 ds_put_char(ds
, '\n');
5428 ds_put_cstr(ds
, "Datapath actions: ");
5429 format_odp_actions(ds
, trace
.xout
.odp_actions
.data
,
5430 trace
.xout
.odp_actions
.size
);
5432 if (trace
.xout
.slow
) {
5433 enum slow_path_reason slow
;
5435 ds_put_cstr(ds
, "\nThis flow is handled by the userspace "
5436 "slow path because it:");
5438 slow
= trace
.xout
.slow
;
5440 enum slow_path_reason bit
= rightmost_1bit(slow
);
5442 ds_put_format(ds
, "\n\t- %s.",
5443 slow_path_reason_to_explanation(bit
));
5449 xlate_out_uninit(&trace
.xout
);
5452 rule_dpif_unref(rule
);
5455 /* Runs a self-check of flow translations in 'ofproto'. Appends a message to
5456 * 'reply' describing the results. */
5458 ofproto_dpif_self_check__(struct ofproto_dpif
*ofproto
, struct ds
*reply
)
5460 struct cls_cursor cursor
;
5461 struct facet
*facet
;
5465 ovs_rwlock_rdlock(&ofproto
->facets
.rwlock
);
5466 cls_cursor_init(&cursor
, &ofproto
->facets
, NULL
);
5467 CLS_CURSOR_FOR_EACH (facet
, cr
, &cursor
) {
5468 if (!facet_check_consistency(facet
)) {
5472 ovs_rwlock_unlock(&ofproto
->facets
.rwlock
);
5474 ofproto
->backer
->need_revalidate
= REV_INCONSISTENCY
;
5478 ds_put_format(reply
, "%s: self-check failed (%d errors)\n",
5479 ofproto
->up
.name
, errors
);
5481 ds_put_format(reply
, "%s: self-check passed\n", ofproto
->up
.name
);
5486 ofproto_dpif_self_check(struct unixctl_conn
*conn
,
5487 int argc
, const char *argv
[], void *aux OVS_UNUSED
)
5489 struct ds reply
= DS_EMPTY_INITIALIZER
;
5490 struct ofproto_dpif
*ofproto
;
5493 ofproto
= ofproto_dpif_lookup(argv
[1]);
5495 unixctl_command_reply_error(conn
, "Unknown ofproto (use "
5496 "ofproto/list for help)");
5499 ofproto_dpif_self_check__(ofproto
, &reply
);
5501 HMAP_FOR_EACH (ofproto
, all_ofproto_dpifs_node
, &all_ofproto_dpifs
) {
5502 ofproto_dpif_self_check__(ofproto
, &reply
);
5506 unixctl_command_reply(conn
, ds_cstr(&reply
));
5510 /* Store the current ofprotos in 'ofproto_shash'. Returns a sorted list
5511 * of the 'ofproto_shash' nodes. It is the responsibility of the caller
5512 * to destroy 'ofproto_shash' and free the returned value. */
5513 static const struct shash_node
**
5514 get_ofprotos(struct shash
*ofproto_shash
)
5516 const struct ofproto_dpif
*ofproto
;
5518 HMAP_FOR_EACH (ofproto
, all_ofproto_dpifs_node
, &all_ofproto_dpifs
) {
5519 char *name
= xasprintf("%s@%s", ofproto
->up
.type
, ofproto
->up
.name
);
5520 shash_add_nocopy(ofproto_shash
, name
, ofproto
);
5523 return shash_sort(ofproto_shash
);
5527 ofproto_unixctl_dpif_dump_dps(struct unixctl_conn
*conn
, int argc OVS_UNUSED
,
5528 const char *argv
[] OVS_UNUSED
,
5529 void *aux OVS_UNUSED
)
5531 struct ds ds
= DS_EMPTY_INITIALIZER
;
5532 struct shash ofproto_shash
;
5533 const struct shash_node
**sorted_ofprotos
;
5536 shash_init(&ofproto_shash
);
5537 sorted_ofprotos
= get_ofprotos(&ofproto_shash
);
5538 for (i
= 0; i
< shash_count(&ofproto_shash
); i
++) {
5539 const struct shash_node
*node
= sorted_ofprotos
[i
];
5540 ds_put_format(&ds
, "%s\n", node
->name
);
5543 shash_destroy(&ofproto_shash
);
5544 free(sorted_ofprotos
);
5546 unixctl_command_reply(conn
, ds_cstr(&ds
));
5551 dpif_show_backer(const struct dpif_backer
*backer
, struct ds
*ds
)
5553 const struct shash_node
**ofprotos
;
5554 struct ofproto_dpif
*ofproto
;
5555 struct shash ofproto_shash
;
5556 uint64_t n_hit
, n_missed
;
5559 n_hit
= n_missed
= 0;
5560 HMAP_FOR_EACH (ofproto
, all_ofproto_dpifs_node
, &all_ofproto_dpifs
) {
5561 if (ofproto
->backer
== backer
) {
5562 n_missed
+= ofproto
->n_missed
;
5563 n_hit
+= ofproto
->n_hit
;
5567 ds_put_format(ds
, "%s: hit:%"PRIu64
" missed:%"PRIu64
"\n",
5568 dpif_name(backer
->dpif
), n_hit
, n_missed
);
5570 ds_put_format(ds
, "\tflows: cur: %"PRIuSIZE
", avg: %u, max: %u\n",
5571 hmap_count(&backer
->subfacets
), backer
->avg_n_subfacet
,
5572 backer
->max_n_subfacet
);
5574 shash_init(&ofproto_shash
);
5575 ofprotos
= get_ofprotos(&ofproto_shash
);
5576 for (i
= 0; i
< shash_count(&ofproto_shash
); i
++) {
5577 struct ofproto_dpif
*ofproto
= ofprotos
[i
]->data
;
5578 const struct shash_node
**ports
;
5581 if (ofproto
->backer
!= backer
) {
5585 ds_put_format(ds
, "\t%s: hit:%"PRIu64
" missed:%"PRIu64
"\n",
5586 ofproto
->up
.name
, ofproto
->n_hit
, ofproto
->n_missed
);
5588 ports
= shash_sort(&ofproto
->up
.port_by_name
);
5589 for (j
= 0; j
< shash_count(&ofproto
->up
.port_by_name
); j
++) {
5590 const struct shash_node
*node
= ports
[j
];
5591 struct ofport
*ofport
= node
->data
;
5593 odp_port_t odp_port
;
5595 ds_put_format(ds
, "\t\t%s %u/", netdev_get_name(ofport
->netdev
),
5598 odp_port
= ofp_port_to_odp_port(ofproto
, ofport
->ofp_port
);
5599 if (odp_port
!= ODPP_NONE
) {
5600 ds_put_format(ds
, "%"PRIu32
":", odp_port
);
5602 ds_put_cstr(ds
, "none:");
5605 ds_put_format(ds
, " (%s", netdev_get_type(ofport
->netdev
));
5608 if (!netdev_get_config(ofport
->netdev
, &config
)) {
5609 const struct smap_node
**nodes
;
5612 nodes
= smap_sort(&config
);
5613 for (i
= 0; i
< smap_count(&config
); i
++) {
5614 const struct smap_node
*node
= nodes
[i
];
5615 ds_put_format(ds
, "%c %s=%s", i
? ',' : ':',
5616 node
->key
, node
->value
);
5620 smap_destroy(&config
);
5622 ds_put_char(ds
, ')');
5623 ds_put_char(ds
, '\n');
5627 shash_destroy(&ofproto_shash
);
5632 ofproto_unixctl_dpif_show(struct unixctl_conn
*conn
, int argc OVS_UNUSED
,
5633 const char *argv
[] OVS_UNUSED
, void *aux OVS_UNUSED
)
5635 struct ds ds
= DS_EMPTY_INITIALIZER
;
5636 const struct shash_node
**backers
;
5639 backers
= shash_sort(&all_dpif_backers
);
5640 for (i
= 0; i
< shash_count(&all_dpif_backers
); i
++) {
5641 dpif_show_backer(backers
[i
]->data
, &ds
);
5645 unixctl_command_reply(conn
, ds_cstr(&ds
));
5649 /* Dump the megaflow (facet) cache. This is useful to check the
5650 * correctness of flow wildcarding, since the same mechanism is used for
5651 * both xlate caching and kernel wildcarding.
5653 * It's important to note that in the output the flow description uses
5654 * OpenFlow (OFP) ports, but the actions use datapath (ODP) ports.
5656 * This command is only needed for advanced debugging, so it's not
5657 * documented in the man page. */
5659 ofproto_unixctl_dpif_dump_megaflows(struct unixctl_conn
*conn
,
5660 int argc OVS_UNUSED
, const char *argv
[],
5661 void *aux OVS_UNUSED
)
5663 struct ds ds
= DS_EMPTY_INITIALIZER
;
5664 const struct ofproto_dpif
*ofproto
;
5665 long long int now
= time_msec();
5666 struct cls_cursor cursor
;
5667 struct facet
*facet
;
5669 ofproto
= ofproto_dpif_lookup(argv
[1]);
5671 unixctl_command_reply_error(conn
, "no such bridge");
5675 ovs_rwlock_rdlock(&ofproto
->facets
.rwlock
);
5676 cls_cursor_init(&cursor
, &ofproto
->facets
, NULL
);
5677 CLS_CURSOR_FOR_EACH (facet
, cr
, &cursor
) {
5678 cls_rule_format(&facet
->cr
, &ds
);
5679 ds_put_cstr(&ds
, ", ");
5680 ds_put_format(&ds
, "n_subfacets:%"PRIuSIZE
", ", list_size(&facet
->subfacets
));
5681 ds_put_format(&ds
, "used:%.3fs, ", (now
- facet
->used
) / 1000.0);
5682 ds_put_cstr(&ds
, "Datapath actions: ");
5683 if (facet
->xout
.slow
) {
5684 uint64_t slow_path_stub
[128 / 8];
5685 const struct nlattr
*actions
;
5688 compose_slow_path(ofproto
, &facet
->flow
, facet
->xout
.slow
,
5689 slow_path_stub
, sizeof slow_path_stub
,
5690 &actions
, &actions_len
);
5691 format_odp_actions(&ds
, actions
, actions_len
);
5693 format_odp_actions(&ds
, facet
->xout
.odp_actions
.data
,
5694 facet
->xout
.odp_actions
.size
);
5696 ds_put_cstr(&ds
, "\n");
5698 ovs_rwlock_unlock(&ofproto
->facets
.rwlock
);
5700 ds_chomp(&ds
, '\n');
5701 unixctl_command_reply(conn
, ds_cstr(&ds
));
5705 /* Disable using the megaflows.
5707 * This command is only needed for advanced debugging, so it's not
5708 * documented in the man page. */
5710 ofproto_unixctl_dpif_disable_megaflows(struct unixctl_conn
*conn
,
5711 int argc OVS_UNUSED
,
5712 const char *argv
[] OVS_UNUSED
,
5713 void *aux OVS_UNUSED
)
5715 struct ofproto_dpif
*ofproto
;
5717 enable_megaflows
= false;
5719 HMAP_FOR_EACH (ofproto
, all_ofproto_dpifs_node
, &all_ofproto_dpifs
) {
5720 flush(&ofproto
->up
);
5723 unixctl_command_reply(conn
, "megaflows disabled");
5726 /* Re-enable using megaflows.
5728 * This command is only needed for advanced debugging, so it's not
5729 * documented in the man page. */
5731 ofproto_unixctl_dpif_enable_megaflows(struct unixctl_conn
*conn
,
5732 int argc OVS_UNUSED
,
5733 const char *argv
[] OVS_UNUSED
,
5734 void *aux OVS_UNUSED
)
5736 struct ofproto_dpif
*ofproto
;
5738 enable_megaflows
= true;
5740 HMAP_FOR_EACH (ofproto
, all_ofproto_dpifs_node
, &all_ofproto_dpifs
) {
5741 flush(&ofproto
->up
);
5744 unixctl_command_reply(conn
, "megaflows enabled");
5748 ofproto_unixctl_dpif_dump_flows(struct unixctl_conn
*conn
,
5749 int argc OVS_UNUSED
, const char *argv
[],
5750 void *aux OVS_UNUSED
)
5752 struct ds ds
= DS_EMPTY_INITIALIZER
;
5753 const struct ofproto_dpif
*ofproto
;
5754 struct subfacet
*subfacet
;
5756 ofproto
= ofproto_dpif_lookup(argv
[1]);
5758 unixctl_command_reply_error(conn
, "no such bridge");
5762 update_stats(ofproto
->backer
);
5764 HMAP_FOR_EACH (subfacet
, hmap_node
, &ofproto
->backer
->subfacets
) {
5765 struct facet
*facet
= subfacet
->facet
;
5766 struct odputil_keybuf maskbuf
;
5769 if (facet
->ofproto
!= ofproto
) {
5773 ofpbuf_use_stack(&mask
, &maskbuf
, sizeof maskbuf
);
5774 if (enable_megaflows
) {
5775 odp_flow_key_from_mask(&mask
, &facet
->xout
.wc
.masks
,
5776 &facet
->flow
, UINT32_MAX
);
5779 odp_flow_format(subfacet
->key
, subfacet
->key_len
,
5780 mask
.data
, mask
.size
, NULL
, &ds
, false);
5782 ds_put_format(&ds
, ", packets:%"PRIu64
", bytes:%"PRIu64
", used:",
5783 subfacet
->dp_packet_count
, subfacet
->dp_byte_count
);
5784 if (subfacet
->used
) {
5785 ds_put_format(&ds
, "%.3fs",
5786 (time_msec() - subfacet
->used
) / 1000.0);
5788 ds_put_format(&ds
, "never");
5790 if (subfacet
->facet
->tcp_flags
) {
5791 ds_put_cstr(&ds
, ", flags:");
5792 packet_format_tcp_flags(&ds
, subfacet
->facet
->tcp_flags
);
5795 ds_put_cstr(&ds
, ", actions:");
5796 if (facet
->xout
.slow
) {
5797 uint64_t slow_path_stub
[128 / 8];
5798 const struct nlattr
*actions
;
5801 compose_slow_path(ofproto
, &facet
->flow
, facet
->xout
.slow
,
5802 slow_path_stub
, sizeof slow_path_stub
,
5803 &actions
, &actions_len
);
5804 format_odp_actions(&ds
, actions
, actions_len
);
5806 format_odp_actions(&ds
, facet
->xout
.odp_actions
.data
,
5807 facet
->xout
.odp_actions
.size
);
5809 ds_put_char(&ds
, '\n');
5812 unixctl_command_reply(conn
, ds_cstr(&ds
));
5817 ofproto_dpif_unixctl_init(void)
5819 static bool registered
;
5825 unixctl_command_register(
5827 "{[dp_name] odp_flow | bridge br_flow} [-generate|packet]",
5828 1, 3, ofproto_unixctl_trace
, NULL
);
5829 unixctl_command_register(
5830 "ofproto/trace-packet-out",
5831 "[-consistent] {[dp_name] odp_flow | bridge br_flow} [-generate|packet] actions",
5832 2, 6, ofproto_unixctl_trace_actions
, NULL
);
5833 unixctl_command_register("fdb/flush", "[bridge]", 0, 1,
5834 ofproto_unixctl_fdb_flush
, NULL
);
5835 unixctl_command_register("fdb/show", "bridge", 1, 1,
5836 ofproto_unixctl_fdb_show
, NULL
);
5837 unixctl_command_register("ofproto/self-check", "[bridge]", 0, 1,
5838 ofproto_dpif_self_check
, NULL
);
5839 unixctl_command_register("dpif/dump-dps", "", 0, 0,
5840 ofproto_unixctl_dpif_dump_dps
, NULL
);
5841 unixctl_command_register("dpif/show", "", 0, 0, ofproto_unixctl_dpif_show
,
5843 unixctl_command_register("dpif/dump-flows", "bridge", 1, 1,
5844 ofproto_unixctl_dpif_dump_flows
, NULL
);
5845 unixctl_command_register("dpif/dump-megaflows", "bridge", 1, 1,
5846 ofproto_unixctl_dpif_dump_megaflows
, NULL
);
5847 unixctl_command_register("dpif/disable-megaflows", "", 0, 0,
5848 ofproto_unixctl_dpif_disable_megaflows
, NULL
);
5849 unixctl_command_register("dpif/enable-megaflows", "", 0, 0,
5850 ofproto_unixctl_dpif_enable_megaflows
, NULL
);
5853 /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
5855 * This is deprecated. It is only for compatibility with broken device drivers
5856 * in old versions of Linux that do not properly support VLANs when VLAN
5857 * devices are not used. When broken device drivers are no longer in
5858 * widespread use, we will delete these interfaces. */
5861 set_realdev(struct ofport
*ofport_
, ofp_port_t realdev_ofp_port
, int vid
)
5863 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(ofport_
->ofproto
);
5864 struct ofport_dpif
*ofport
= ofport_dpif_cast(ofport_
);
5866 if (realdev_ofp_port
== ofport
->realdev_ofp_port
5867 && vid
== ofport
->vlandev_vid
) {
5871 ofproto
->backer
->need_revalidate
= REV_RECONFIGURE
;
5873 if (ofport
->realdev_ofp_port
) {
5876 if (realdev_ofp_port
&& ofport
->bundle
) {
5877 /* vlandevs are enslaved to their realdevs, so they are not allowed to
5878 * themselves be part of a bundle. */
5879 bundle_set(ofport
->up
.ofproto
, ofport
->bundle
, NULL
);
5882 ofport
->realdev_ofp_port
= realdev_ofp_port
;
5883 ofport
->vlandev_vid
= vid
;
5885 if (realdev_ofp_port
) {
5886 vsp_add(ofport
, realdev_ofp_port
, vid
);
5893 hash_realdev_vid(ofp_port_t realdev_ofp_port
, int vid
)
5895 return hash_2words(ofp_to_u16(realdev_ofp_port
), vid
);
5899 ofproto_has_vlan_splinters(const struct ofproto_dpif
*ofproto
)
5900 OVS_EXCLUDED(ofproto
->vsp_mutex
)
5904 ovs_mutex_lock(&ofproto
->vsp_mutex
);
5905 ret
= !hmap_is_empty(&ofproto
->realdev_vid_map
);
5906 ovs_mutex_unlock(&ofproto
->vsp_mutex
);
5911 vsp_realdev_to_vlandev__(const struct ofproto_dpif
*ofproto
,
5912 ofp_port_t realdev_ofp_port
, ovs_be16 vlan_tci
)
5913 OVS_REQUIRES(ofproto
->vsp_mutex
)
5915 if (!hmap_is_empty(&ofproto
->realdev_vid_map
)) {
5916 int vid
= vlan_tci_to_vid(vlan_tci
);
5917 const struct vlan_splinter
*vsp
;
5919 HMAP_FOR_EACH_WITH_HASH (vsp
, realdev_vid_node
,
5920 hash_realdev_vid(realdev_ofp_port
, vid
),
5921 &ofproto
->realdev_vid_map
) {
5922 if (vsp
->realdev_ofp_port
== realdev_ofp_port
5923 && vsp
->vid
== vid
) {
5924 return vsp
->vlandev_ofp_port
;
5928 return realdev_ofp_port
;
5931 /* Returns the OFP port number of the Linux VLAN device that corresponds to
5932 * 'vlan_tci' on the network device with port number 'realdev_ofp_port' in
5933 * 'struct ofport_dpif'. For example, given 'realdev_ofp_port' of eth0 and
5934 * 'vlan_tci' 9, it would return the port number of eth0.9.
5936 * Unless VLAN splinters are enabled for port 'realdev_ofp_port', this
5937 * function just returns its 'realdev_ofp_port' argument. */
5939 vsp_realdev_to_vlandev(const struct ofproto_dpif
*ofproto
,
5940 ofp_port_t realdev_ofp_port
, ovs_be16 vlan_tci
)
5941 OVS_EXCLUDED(ofproto
->vsp_mutex
)
5945 ovs_mutex_lock(&ofproto
->vsp_mutex
);
5946 ret
= vsp_realdev_to_vlandev__(ofproto
, realdev_ofp_port
, vlan_tci
);
5947 ovs_mutex_unlock(&ofproto
->vsp_mutex
);
5951 static struct vlan_splinter
*
5952 vlandev_find(const struct ofproto_dpif
*ofproto
, ofp_port_t vlandev_ofp_port
)
5954 struct vlan_splinter
*vsp
;
5956 HMAP_FOR_EACH_WITH_HASH (vsp
, vlandev_node
,
5957 hash_ofp_port(vlandev_ofp_port
),
5958 &ofproto
->vlandev_map
) {
5959 if (vsp
->vlandev_ofp_port
== vlandev_ofp_port
) {
5967 /* Returns the OpenFlow port number of the "real" device underlying the Linux
5968 * VLAN device with OpenFlow port number 'vlandev_ofp_port' and stores the
5969 * VLAN VID of the Linux VLAN device in '*vid'. For example, given
5970 * 'vlandev_ofp_port' of eth0.9, it would return the OpenFlow port number of
5971 * eth0 and store 9 in '*vid'.
5973 * Returns 0 and does not modify '*vid' if 'vlandev_ofp_port' is not a Linux
5974 * VLAN device. Unless VLAN splinters are enabled, this is what this function
5977 vsp_vlandev_to_realdev(const struct ofproto_dpif
*ofproto
,
5978 ofp_port_t vlandev_ofp_port
, int *vid
)
5979 OVS_REQUIRES(ofproto
->vsp_mutex
)
5981 if (!hmap_is_empty(&ofproto
->vlandev_map
)) {
5982 const struct vlan_splinter
*vsp
;
5984 vsp
= vlandev_find(ofproto
, vlandev_ofp_port
);
5989 return vsp
->realdev_ofp_port
;
5995 /* Given 'flow', a flow representing a packet received on 'ofproto', checks
5996 * whether 'flow->in_port' represents a Linux VLAN device. If so, changes
5997 * 'flow->in_port' to the "real" device backing the VLAN device, sets
5998 * 'flow->vlan_tci' to the VLAN VID, and returns true. Otherwise (which is
5999 * always the case unless VLAN splinters are enabled), returns false without
6000 * making any changes. */
6002 vsp_adjust_flow(const struct ofproto_dpif
*ofproto
, struct flow
*flow
)
6003 OVS_EXCLUDED(ofproto
->vsp_mutex
)
6008 ovs_mutex_lock(&ofproto
->vsp_mutex
);
6009 realdev
= vsp_vlandev_to_realdev(ofproto
, flow
->in_port
.ofp_port
, &vid
);
6010 ovs_mutex_unlock(&ofproto
->vsp_mutex
);
6015 /* Cause the flow to be processed as if it came in on the real device with
6016 * the VLAN device's VLAN ID. */
6017 flow
->in_port
.ofp_port
= realdev
;
6018 flow
->vlan_tci
= htons((vid
& VLAN_VID_MASK
) | VLAN_CFI
);
6023 vsp_remove(struct ofport_dpif
*port
)
6025 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(port
->up
.ofproto
);
6026 struct vlan_splinter
*vsp
;
6028 ovs_mutex_lock(&ofproto
->vsp_mutex
);
6029 vsp
= vlandev_find(ofproto
, port
->up
.ofp_port
);
6031 hmap_remove(&ofproto
->vlandev_map
, &vsp
->vlandev_node
);
6032 hmap_remove(&ofproto
->realdev_vid_map
, &vsp
->realdev_vid_node
);
6035 port
->realdev_ofp_port
= 0;
6037 VLOG_ERR("missing vlan device record");
6039 ovs_mutex_unlock(&ofproto
->vsp_mutex
);
6043 vsp_add(struct ofport_dpif
*port
, ofp_port_t realdev_ofp_port
, int vid
)
6045 struct ofproto_dpif
*ofproto
= ofproto_dpif_cast(port
->up
.ofproto
);
6047 ovs_mutex_lock(&ofproto
->vsp_mutex
);
6048 if (!vsp_vlandev_to_realdev(ofproto
, port
->up
.ofp_port
, NULL
)
6049 && (vsp_realdev_to_vlandev__(ofproto
, realdev_ofp_port
, htons(vid
))
6050 == realdev_ofp_port
)) {
6051 struct vlan_splinter
*vsp
;
6053 vsp
= xmalloc(sizeof *vsp
);
6054 vsp
->realdev_ofp_port
= realdev_ofp_port
;
6055 vsp
->vlandev_ofp_port
= port
->up
.ofp_port
;
6058 port
->realdev_ofp_port
= realdev_ofp_port
;
6060 hmap_insert(&ofproto
->vlandev_map
, &vsp
->vlandev_node
,
6061 hash_ofp_port(port
->up
.ofp_port
));
6062 hmap_insert(&ofproto
->realdev_vid_map
, &vsp
->realdev_vid_node
,
6063 hash_realdev_vid(realdev_ofp_port
, vid
));
6065 VLOG_ERR("duplicate vlan device record");
6067 ovs_mutex_unlock(&ofproto
->vsp_mutex
);
6071 ofp_port_to_odp_port(const struct ofproto_dpif
*ofproto
, ofp_port_t ofp_port
)
6073 const struct ofport_dpif
*ofport
= get_ofp_port(ofproto
, ofp_port
);
6074 return ofport
? ofport
->odp_port
: ODPP_NONE
;
6077 struct ofport_dpif
*
6078 odp_port_to_ofport(const struct dpif_backer
*backer
, odp_port_t odp_port
)
6080 struct ofport_dpif
*port
;
6082 ovs_rwlock_rdlock(&backer
->odp_to_ofport_lock
);
6083 HMAP_FOR_EACH_IN_BUCKET (port
, odp_port_node
, hash_odp_port(odp_port
),
6084 &backer
->odp_to_ofport_map
) {
6085 if (port
->odp_port
== odp_port
) {
6086 ovs_rwlock_unlock(&backer
->odp_to_ofport_lock
);
6091 ovs_rwlock_unlock(&backer
->odp_to_ofport_lock
);
6096 odp_port_to_ofp_port(const struct ofproto_dpif
*ofproto
, odp_port_t odp_port
)
6098 struct ofport_dpif
*port
;
6100 port
= odp_port_to_ofport(ofproto
->backer
, odp_port
);
6101 if (port
&& &ofproto
->up
== port
->up
.ofproto
) {
6102 return port
->up
.ofp_port
;
6108 const struct ofproto_class ofproto_dpif_class
= {
6123 type_get_memory_usage
,
6142 port_is_lacp_current
,
6143 NULL
, /* rule_choose_table */
6152 rule_modify_actions
,
6166 get_stp_port_status
,
6174 is_mirror_output_bundle
,
6175 forward_bpdu_changed
,
6176 set_mac_table_config
,
6178 NULL
, /* meter_get_features */
6179 NULL
, /* meter_set */
6180 NULL
, /* meter_get */
6181 NULL
, /* meter_del */
6182 group_alloc
, /* group_alloc */
6183 group_construct
, /* group_construct */
6184 group_destruct
, /* group_destruct */
6185 group_dealloc
, /* group_dealloc */
6186 group_modify
, /* group_modify */
6187 group_get_stats
, /* group_get_stats */