]> git.proxmox.com Git - ovs.git/blame - ofproto/ofproto-dpif.c
ovsdb-idl: Fix memory leak.
[ovs.git] / ofproto / ofproto-dpif.c
CommitLineData
abe529af 1/*
e0edde6f 2 * Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc.
abe529af
BP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
18
5bee6e26 19#include "ofproto/ofproto-provider.h"
abe529af
BP
20
21#include <errno.h>
22
23#include "autopath.h"
24#include "bond.h"
daff3353 25#include "bundle.h"
abe529af
BP
26#include "byte-order.h"
27#include "connmgr.h"
28#include "coverage.h"
29#include "cfm.h"
30#include "dpif.h"
31#include "dynamic-string.h"
32#include "fail-open.h"
33#include "hmapx.h"
34#include "lacp.h"
75a75043 35#include "learn.h"
abe529af 36#include "mac-learning.h"
816fd533 37#include "meta-flow.h"
abe529af
BP
38#include "multipath.h"
39#include "netdev.h"
40#include "netlink.h"
41#include "nx-match.h"
42#include "odp-util.h"
43#include "ofp-util.h"
44#include "ofpbuf.h"
f25d0cf3 45#include "ofp-actions.h"
31a19d69 46#include "ofp-parse.h"
abe529af 47#include "ofp-print.h"
9d6ac44e 48#include "ofproto-dpif-governor.h"
bae473fe 49#include "ofproto-dpif-sflow.h"
abe529af 50#include "poll-loop.h"
0d085684 51#include "simap.h"
abe529af 52#include "timer.h"
6c1491fb 53#include "unaligned.h"
abe529af
BP
54#include "unixctl.h"
55#include "vlan-bitmap.h"
56#include "vlog.h"
57
58VLOG_DEFINE_THIS_MODULE(ofproto_dpif);
59
abe529af 60COVERAGE_DEFINE(ofproto_dpif_expired);
abe529af
BP
61COVERAGE_DEFINE(ofproto_dpif_xlate);
62COVERAGE_DEFINE(facet_changed_rule);
abe529af
BP
63COVERAGE_DEFINE(facet_revalidate);
64COVERAGE_DEFINE(facet_unexpected);
9d6ac44e 65COVERAGE_DEFINE(facet_suppress);
abe529af 66
29901626 67/* Maximum depth of flow table recursion (due to resubmit actions) in a
abe529af 68 * flow translation. */
642a5c05 69#define MAX_RESUBMIT_RECURSION 32
abe529af 70
9cdaaebe
BP
71/* Number of implemented OpenFlow tables. */
72enum { N_TABLES = 255 };
c57b2226
BP
73enum { TBL_INTERNAL = N_TABLES - 1 }; /* Used for internal hidden rules. */
74BUILD_ASSERT_DECL(N_TABLES >= 2 && N_TABLES <= 255);
9cdaaebe 75
abe529af
BP
76struct ofport_dpif;
77struct ofproto_dpif;
78
79struct rule_dpif {
80 struct rule up;
81
abe529af
BP
82 /* These statistics:
83 *
84 * - Do include packets and bytes from facets that have been deleted or
85 * whose own statistics have been folded into the rule.
86 *
87 * - Do include packets and bytes sent "by hand" that were accounted to
88 * the rule without any facet being involved (this is a rare corner
89 * case in rule_execute()).
90 *
91 * - Do not include packet or bytes that can be obtained from any facet's
92 * packet_count or byte_count member or that can be obtained from the
b0f7b9b5 93 * datapath by, e.g., dpif_flow_get() for any subfacet.
abe529af
BP
94 */
95 uint64_t packet_count; /* Number of packets received. */
96 uint64_t byte_count; /* Number of bytes received. */
97
54a9cbc9
BP
98 tag_type tag; /* Caches rule_calculate_tag() result. */
99
abe529af
BP
100 struct list facets; /* List of "struct facet"s. */
101};
102
103static struct rule_dpif *rule_dpif_cast(const struct rule *rule)
104{
105 return rule ? CONTAINER_OF(rule, struct rule_dpif, up) : NULL;
106}
107
29901626 108static struct rule_dpif *rule_dpif_lookup(struct ofproto_dpif *,
c57b2226
BP
109 const struct flow *);
110static struct rule_dpif *rule_dpif_lookup__(struct ofproto_dpif *,
111 const struct flow *,
112 uint8_t table);
abe529af 113
112bc5f4
BP
114static void rule_credit_stats(struct rule_dpif *,
115 const struct dpif_flow_stats *);
18b2a258 116static void flow_push_stats(struct rule_dpif *, const struct flow *,
112bc5f4 117 const struct dpif_flow_stats *);
822d9414 118static tag_type rule_calculate_tag(const struct flow *,
b0f7b9b5
BP
119 const struct flow_wildcards *,
120 uint32_t basis);
121static void rule_invalidate(const struct rule_dpif *);
122
abe529af
BP
123#define MAX_MIRRORS 32
124typedef uint32_t mirror_mask_t;
125#define MIRROR_MASK_C(X) UINT32_C(X)
126BUILD_ASSERT_DECL(sizeof(mirror_mask_t) * CHAR_BIT >= MAX_MIRRORS);
127struct ofmirror {
128 struct ofproto_dpif *ofproto; /* Owning ofproto. */
129 size_t idx; /* In ofproto's "mirrors" array. */
130 void *aux; /* Key supplied by ofproto's client. */
131 char *name; /* Identifier for log messages. */
132
133 /* Selection criteria. */
134 struct hmapx srcs; /* Contains "struct ofbundle *"s. */
135 struct hmapx dsts; /* Contains "struct ofbundle *"s. */
136 unsigned long *vlans; /* Bitmap of chosen VLANs, NULL selects all. */
137
9ba15e2a 138 /* Output (exactly one of out == NULL and out_vlan == -1 is true). */
abe529af
BP
139 struct ofbundle *out; /* Output port or NULL. */
140 int out_vlan; /* Output VLAN or -1. */
9ba15e2a 141 mirror_mask_t dup_mirrors; /* Bitmap of mirrors with the same output. */
9d24de3b
JP
142
143 /* Counters. */
144 int64_t packet_count; /* Number of packets sent. */
145 int64_t byte_count; /* Number of bytes sent. */
abe529af
BP
146};
147
148static void mirror_destroy(struct ofmirror *);
9d24de3b
JP
149static void update_mirror_stats(struct ofproto_dpif *ofproto,
150 mirror_mask_t mirrors,
151 uint64_t packets, uint64_t bytes);
abe529af 152
abe529af 153struct ofbundle {
abe529af 154 struct hmap_node hmap_node; /* In struct ofproto's "bundles" hmap. */
6e492d81 155 struct ofproto_dpif *ofproto; /* Owning ofproto. */
abe529af
BP
156 void *aux; /* Key supplied by ofproto's client. */
157 char *name; /* Identifier for log messages. */
158
159 /* Configuration. */
160 struct list ports; /* Contains "struct ofport"s. */
ecac4ebf 161 enum port_vlan_mode vlan_mode; /* VLAN mode */
abe529af
BP
162 int vlan; /* -1=trunk port, else a 12-bit VLAN ID. */
163 unsigned long *trunks; /* Bitmap of trunked VLANs, if 'vlan' == -1.
164 * NULL if all VLANs are trunked. */
165 struct lacp *lacp; /* LACP if LACP is enabled, otherwise NULL. */
166 struct bond *bond; /* Nonnull iff more than one port. */
5e9ceccd 167 bool use_priority_tags; /* Use 802.1p tag for frames in VLAN 0? */
abe529af
BP
168
169 /* Status. */
9e1fd49b 170 bool floodable; /* True if no port has OFPUTIL_PC_NO_FLOOD set. */
abe529af
BP
171
172 /* Port mirroring info. */
173 mirror_mask_t src_mirrors; /* Mirrors triggered when packet received. */
174 mirror_mask_t dst_mirrors; /* Mirrors triggered when packet sent. */
175 mirror_mask_t mirror_out; /* Mirrors that output to this bundle. */
176};
177
178static void bundle_remove(struct ofport *);
7bde8dd8 179static void bundle_update(struct ofbundle *);
abe529af
BP
180static void bundle_destroy(struct ofbundle *);
181static void bundle_del_port(struct ofport_dpif *);
182static void bundle_run(struct ofbundle *);
183static void bundle_wait(struct ofbundle *);
4acbc98d 184static struct ofbundle *lookup_input_bundle(const struct ofproto_dpif *,
70c2fd56
BP
185 uint16_t in_port, bool warn,
186 struct ofport_dpif **in_ofportp);
abe529af 187
33158a18
JP
188/* A controller may use OFPP_NONE as the ingress port to indicate that
189 * it did not arrive on a "real" port. 'ofpp_none_bundle' exists for
190 * when an input bundle is needed for validation (e.g., mirroring or
191 * OFPP_NORMAL processing). It is not connected to an 'ofproto' or have
192 * any 'port' structs, so care must be taken when dealing with it. */
193static struct ofbundle ofpp_none_bundle = {
194 .name = "OFPP_NONE",
195 .vlan_mode = PORT_VLAN_TRUNK
196};
197
21f7563c
JP
198static void stp_run(struct ofproto_dpif *ofproto);
199static void stp_wait(struct ofproto_dpif *ofproto);
851bf71d
EJ
200static int set_stp_port(struct ofport *,
201 const struct ofproto_port_stp_settings *);
21f7563c 202
5da5ec37
BP
203static bool ofbundle_includes_vlan(const struct ofbundle *, uint16_t vlan);
204
abe529af
BP
205struct action_xlate_ctx {
206/* action_xlate_ctx_init() initializes these members. */
207
208 /* The ofproto. */
209 struct ofproto_dpif *ofproto;
210
211 /* Flow to which the OpenFlow actions apply. xlate_actions() will modify
212 * this flow when actions change header fields. */
213 struct flow flow;
214
215 /* The packet corresponding to 'flow', or a null pointer if we are
216 * revalidating without a packet to refer to. */
217 const struct ofpbuf *packet;
218
3de9590b
BP
219 /* Should OFPP_NORMAL update the MAC learning table? Should "learn"
220 * actions update the flow table?
221 *
222 * We want to update these tables if we are actually processing a packet,
223 * or if we are accounting for packets that the datapath has processed, but
224 * not if we are just revalidating. */
225 bool may_learn;
75a75043 226
18b2a258
BP
227 /* The rule that we are currently translating, or NULL. */
228 struct rule_dpif *rule;
54834960 229
0e553d9c
BP
230 /* Union of the set of TCP flags seen so far in this flow. (Used only by
231 * NXAST_FIN_TIMEOUT. Set to zero to avoid updating updating rules'
232 * timeouts.) */
233 uint8_t tcp_flags;
234
112bc5f4
BP
235 /* If nonnull, flow translation calls this function just before executing a
236 * resubmit or OFPP_TABLE action. In addition, disables logging of traces
237 * when the recursion depth is exceeded.
238 *
239 * 'rule' is the rule being submitted into. It will be null if the
240 * resubmit or OFPP_TABLE action didn't find a matching rule.
241 *
242 * This is normally null so the client has to set it manually after
243 * calling action_xlate_ctx_init(). */
244 void (*resubmit_hook)(struct action_xlate_ctx *, struct rule_dpif *rule);
245
479df176
BP
246 /* If nonnull, flow translation calls this function to report some
247 * significant decision, e.g. to explain why OFPP_NORMAL translation
248 * dropped a packet. */
249 void (*report_hook)(struct action_xlate_ctx *, const char *s);
250
112bc5f4
BP
251 /* If nonnull, flow translation credits the specified statistics to each
252 * rule reached through a resubmit or OFPP_TABLE action.
abe529af
BP
253 *
254 * This is normally null so the client has to set it manually after
255 * calling action_xlate_ctx_init(). */
112bc5f4 256 const struct dpif_flow_stats *resubmit_stats;
abe529af 257
abe529af
BP
258/* xlate_actions() initializes and uses these members. The client might want
259 * to look at them after it returns. */
260
261 struct ofpbuf *odp_actions; /* Datapath actions. */
75a75043 262 tag_type tags; /* Tags associated with actions. */
6a7e895f 263 enum slow_path_reason slow; /* 0 if fast path may be used. */
75a75043
BP
264 bool has_learn; /* Actions include NXAST_LEARN? */
265 bool has_normal; /* Actions output to OFPP_NORMAL? */
0e553d9c 266 bool has_fin_timeout; /* Actions include NXAST_FIN_TIMEOUT? */
abe529af 267 uint16_t nf_output_iface; /* Output interface index for NetFlow. */
9d24de3b 268 mirror_mask_t mirrors; /* Bitmap of associated mirrors. */
abe529af
BP
269
270/* xlate_actions() initializes and uses these members, but the client has no
271 * reason to look at them. */
272
273 int recurse; /* Recursion level, via xlate_table_action. */
6a6455e5 274 bool max_resubmit_trigger; /* Recursed too deeply during translation. */
b3e9b2ed 275 struct flow base_flow; /* Flow at the last commit. */
deedf7e7 276 uint32_t orig_skb_priority; /* Priority when packet arrived. */
29901626 277 uint8_t table_id; /* OpenFlow table ID where flow was found. */
6ff686f2
PS
278 uint32_t sflow_n_outputs; /* Number of output ports. */
279 uint16_t sflow_odp_port; /* Output port for composing sFlow action. */
280 uint16_t user_cookie_offset;/* Used for user_action_cookie fixup. */
848e8809 281 bool exit; /* No further actions should be processed. */
ccb7c863 282 struct flow orig_flow; /* Copy of original flow. */
abe529af
BP
283};
284
285static void action_xlate_ctx_init(struct action_xlate_ctx *,
286 struct ofproto_dpif *, const struct flow *,
18b2a258 287 ovs_be16 initial_tci, struct rule_dpif *,
0e553d9c 288 uint8_t tcp_flags, const struct ofpbuf *);
050ac423 289static void xlate_actions(struct action_xlate_ctx *,
f25d0cf3 290 const struct ofpact *ofpacts, size_t ofpacts_len,
050ac423
BP
291 struct ofpbuf *odp_actions);
292static void xlate_actions_for_side_effects(struct action_xlate_ctx *,
f25d0cf3
BP
293 const struct ofpact *ofpacts,
294 size_t ofpacts_len);
abe529af 295
6a7e895f
BP
296static size_t put_userspace_action(const struct ofproto_dpif *,
297 struct ofpbuf *odp_actions,
298 const struct flow *,
299 const union user_action_cookie *);
300
301static void compose_slow_path(const struct ofproto_dpif *, const struct flow *,
302 enum slow_path_reason,
303 uint64_t *stub, size_t stub_size,
304 const struct nlattr **actionsp,
305 size_t *actions_lenp);
306
479df176
BP
307static void xlate_report(struct action_xlate_ctx *ctx, const char *s);
308
6a7e895f
BP
309/* A subfacet (see "struct subfacet" below) has three possible installation
310 * states:
311 *
312 * - SF_NOT_INSTALLED: Not installed in the datapath. This will only be the
313 * case just after the subfacet is created, just before the subfacet is
314 * destroyed, or if the datapath returns an error when we try to install a
315 * subfacet.
316 *
317 * - SF_FAST_PATH: The subfacet's actions are installed in the datapath.
318 *
319 * - SF_SLOW_PATH: An action that sends every packet for the subfacet through
320 * ofproto_dpif is installed in the datapath.
321 */
322enum subfacet_path {
323 SF_NOT_INSTALLED, /* No datapath flow for this subfacet. */
324 SF_FAST_PATH, /* Full actions are installed. */
325 SF_SLOW_PATH, /* Send-to-userspace action is installed. */
326};
327
328static const char *subfacet_path_to_string(enum subfacet_path);
329
5f5fbd17
BP
330/* A dpif flow and actions associated with a facet.
331 *
332 * See also the large comment on struct facet. */
333struct subfacet {
334 /* Owners. */
335 struct hmap_node hmap_node; /* In struct ofproto_dpif 'subfacets' list. */
336 struct list list_node; /* In struct facet's 'facets' list. */
337 struct facet *facet; /* Owning facet. */
338
339 /* Key.
340 *
341 * To save memory in the common case, 'key' is NULL if 'key_fitness' is
342 * ODP_FIT_PERFECT, that is, odp_flow_key_from_flow() can accurately
343 * regenerate the ODP flow key from ->facet->flow. */
344 enum odp_key_fitness key_fitness;
345 struct nlattr *key;
346 int key_len;
347
348 long long int used; /* Time last used; time created if not used. */
349
350 uint64_t dp_packet_count; /* Last known packet count in the datapath. */
351 uint64_t dp_byte_count; /* Last known byte count in the datapath. */
352
353 /* Datapath actions.
354 *
355 * These should be essentially identical for every subfacet in a facet, but
356 * may differ in trivial ways due to VLAN splinters. */
357 size_t actions_len; /* Number of bytes in actions[]. */
358 struct nlattr *actions; /* Datapath actions. */
359
6a7e895f
BP
360 enum slow_path_reason slow; /* 0 if fast path may be used. */
361 enum subfacet_path path; /* Installed in datapath? */
5f5fbd17
BP
362
363 /* This value is normally the same as ->facet->flow.vlan_tci. Only VLAN
364 * splinters can cause it to differ. This value should be removed when
365 * the VLAN splinters feature is no longer needed. */
366 ovs_be16 initial_tci; /* Initial VLAN TCI value. */
367};
368
369static struct subfacet *subfacet_create(struct facet *, enum odp_key_fitness,
370 const struct nlattr *key,
371 size_t key_len, ovs_be16 initial_tci);
372static struct subfacet *subfacet_find(struct ofproto_dpif *,
373 const struct nlattr *key, size_t key_len);
374static void subfacet_destroy(struct subfacet *);
375static void subfacet_destroy__(struct subfacet *);
376static void subfacet_get_key(struct subfacet *, struct odputil_keybuf *,
377 struct ofpbuf *key);
378static void subfacet_reset_dp_stats(struct subfacet *,
379 struct dpif_flow_stats *);
380static void subfacet_update_time(struct subfacet *, long long int used);
381static void subfacet_update_stats(struct subfacet *,
382 const struct dpif_flow_stats *);
383static void subfacet_make_actions(struct subfacet *,
5fe20d5d
BP
384 const struct ofpbuf *packet,
385 struct ofpbuf *odp_actions);
5f5fbd17
BP
386static int subfacet_install(struct subfacet *,
387 const struct nlattr *actions, size_t actions_len,
6a7e895f 388 struct dpif_flow_stats *, enum slow_path_reason);
5f5fbd17
BP
389static void subfacet_uninstall(struct subfacet *);
390
6a7e895f
BP
391static enum subfacet_path subfacet_want_path(enum slow_path_reason);
392
b0f7b9b5
BP
393/* An exact-match instantiation of an OpenFlow flow.
394 *
395 * A facet associates a "struct flow", which represents the Open vSwitch
b95fc6ba
BP
396 * userspace idea of an exact-match flow, with one or more subfacets. Each
397 * subfacet tracks the datapath's idea of the exact-match flow equivalent to
398 * the facet. When the kernel module (or other dpif implementation) and Open
399 * vSwitch userspace agree on the definition of a flow key, there is exactly
400 * one subfacet per facet. If the dpif implementation supports more-specific
401 * flow matching than userspace, however, a facet can have more than one
402 * subfacet, each of which corresponds to some distinction in flow that
403 * userspace simply doesn't understand.
b0f7b9b5
BP
404 *
405 * Flow expiration works in terms of subfacets, so a facet must have at least
406 * one subfacet or it will never expire, leaking memory. */
abe529af 407struct facet {
b0f7b9b5
BP
408 /* Owners. */
409 struct hmap_node hmap_node; /* In owning ofproto's 'facets' hmap. */
410 struct list list_node; /* In owning rule's 'facets' list. */
411 struct rule_dpif *rule; /* Owning rule. */
412
413 /* Owned data. */
414 struct list subfacets;
abe529af
BP
415 long long int used; /* Time last used; time created if not used. */
416
b0f7b9b5
BP
417 /* Key. */
418 struct flow flow;
419
abe529af
BP
420 /* These statistics:
421 *
422 * - Do include packets and bytes sent "by hand", e.g. with
423 * dpif_execute().
424 *
425 * - Do include packets and bytes that were obtained from the datapath
b0f7b9b5 426 * when a subfacet's statistics were reset (e.g. dpif_flow_put() with
abe529af 427 * DPIF_FP_ZERO_STATS).
b0f7b9b5
BP
428 *
429 * - Do not include packets or bytes that can be obtained from the
430 * datapath for any existing subfacet.
abe529af
BP
431 */
432 uint64_t packet_count; /* Number of packets received. */
433 uint64_t byte_count; /* Number of bytes received. */
434
b0f7b9b5 435 /* Resubmit statistics. */
9d24de3b
JP
436 uint64_t prev_packet_count; /* Number of packets from last stats push. */
437 uint64_t prev_byte_count; /* Number of bytes from last stats push. */
438 long long int prev_used; /* Used time from last stats push. */
abe529af 439
b0f7b9b5 440 /* Accounting. */
907a4c5e 441 uint64_t accounted_bytes; /* Bytes processed by facet_account(). */
b0f7b9b5 442 struct netflow_flow nf_flow; /* Per-flow NetFlow tracking data. */
0e553d9c 443 uint8_t tcp_flags; /* TCP flags seen for this 'rule'. */
abe529af 444
b95fc6ba
BP
445 /* Properties of datapath actions.
446 *
447 * Every subfacet has its own actions because actions can differ slightly
448 * between splintered and non-splintered subfacets due to the VLAN tag
449 * being initially different (present vs. absent). All of them have these
450 * properties in common so we just store one copy of them here. */
75a75043
BP
451 bool has_learn; /* Actions include NXAST_LEARN? */
452 bool has_normal; /* Actions output to OFPP_NORMAL? */
0e553d9c 453 bool has_fin_timeout; /* Actions include NXAST_FIN_TIMEOUT? */
b0f7b9b5 454 tag_type tags; /* Tags that would require revalidation. */
9d24de3b 455 mirror_mask_t mirrors; /* Bitmap of dependent mirrors. */
26cd7e34
BP
456
457 /* Storage for a single subfacet, to reduce malloc() time and space
458 * overhead. (A facet always has at least one subfacet and in the common
459 * case has exactly one subfacet.) */
460 struct subfacet one_subfacet;
abe529af
BP
461};
462
2b459b83
BP
463static struct facet *facet_create(struct rule_dpif *,
464 const struct flow *, uint32_t hash);
15baa734 465static void facet_remove(struct facet *);
abe529af
BP
466static void facet_free(struct facet *);
467
2b459b83
BP
468static struct facet *facet_find(struct ofproto_dpif *,
469 const struct flow *, uint32_t hash);
abe529af 470static struct facet *facet_lookup_valid(struct ofproto_dpif *,
2b459b83 471 const struct flow *, uint32_t hash);
c57b2226 472static void facet_revalidate(struct facet *);
6814e51f 473static bool facet_check_consistency(struct facet *);
abe529af 474
15baa734 475static void facet_flush_stats(struct facet *);
abe529af 476
15baa734 477static void facet_update_time(struct facet *, long long int used);
bbb5d219 478static void facet_reset_counters(struct facet *);
abe529af 479static void facet_push_stats(struct facet *);
3de9590b
BP
480static void facet_learn(struct facet *);
481static void facet_account(struct facet *);
abe529af
BP
482
483static bool facet_is_controller_flow(struct facet *);
484
abe529af
BP
485struct ofport_dpif {
486 struct ofport up;
487
488 uint32_t odp_port;
489 struct ofbundle *bundle; /* Bundle that contains this port, if any. */
490 struct list bundle_node; /* In struct ofbundle's "ports" list. */
491 struct cfm *cfm; /* Connectivity Fault Management, if any. */
492 tag_type tag; /* Tag associated with this port. */
00794817 493 uint32_t bond_stable_id; /* stable_id to use as bond slave, or 0. */
015e08bc 494 bool may_enable; /* May be enabled in bonds. */
3e5b3fdb 495 long long int carrier_seq; /* Carrier status changes. */
21f7563c 496
52a90c29 497 /* Spanning tree. */
21f7563c
JP
498 struct stp_port *stp_port; /* Spanning Tree Protocol, if any. */
499 enum stp_state stp_state; /* Always STP_DISABLED if STP not in use. */
500 long long int stp_state_entered;
8b36f51e
EJ
501
502 struct hmap priorities; /* Map of attached 'priority_to_dscp's. */
52a90c29
BP
503
504 /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
505 *
506 * This is deprecated. It is only for compatibility with broken device
507 * drivers in old versions of Linux that do not properly support VLANs when
508 * VLAN devices are not used. When broken device drivers are no longer in
509 * widespread use, we will delete these interfaces. */
510 uint16_t realdev_ofp_port;
511 int vlandev_vid;
8b36f51e
EJ
512};
513
514/* Node in 'ofport_dpif''s 'priorities' map. Used to maintain a map from
515 * 'priority' (the datapath's term for QoS queue) to the dscp bits which all
516 * traffic egressing the 'ofport' with that priority should be marked with. */
517struct priority_to_dscp {
518 struct hmap_node hmap_node; /* Node in 'ofport_dpif''s 'priorities' map. */
519 uint32_t priority; /* Priority of this queue (see struct flow). */
520
521 uint8_t dscp; /* DSCP bits to mark outgoing traffic with. */
abe529af
BP
522};
523
52a90c29
BP
524/* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
525 *
526 * This is deprecated. It is only for compatibility with broken device drivers
527 * in old versions of Linux that do not properly support VLANs when VLAN
528 * devices are not used. When broken device drivers are no longer in
529 * widespread use, we will delete these interfaces. */
530struct vlan_splinter {
531 struct hmap_node realdev_vid_node;
532 struct hmap_node vlandev_node;
533 uint16_t realdev_ofp_port;
534 uint16_t vlandev_ofp_port;
535 int vid;
536};
537
538static uint32_t vsp_realdev_to_vlandev(const struct ofproto_dpif *,
539 uint32_t realdev, ovs_be16 vlan_tci);
b98d8985 540static bool vsp_adjust_flow(const struct ofproto_dpif *, struct flow *);
52a90c29
BP
541static void vsp_remove(struct ofport_dpif *);
542static void vsp_add(struct ofport_dpif *, uint16_t realdev_ofp_port, int vid);
543
abe529af
BP
544static struct ofport_dpif *
545ofport_dpif_cast(const struct ofport *ofport)
546{
547 assert(ofport->ofproto->ofproto_class == &ofproto_dpif_class);
548 return ofport ? CONTAINER_OF(ofport, struct ofport_dpif, up) : NULL;
549}
550
551static void port_run(struct ofport_dpif *);
0aa66d6e 552static void port_run_fast(struct ofport_dpif *);
abe529af 553static void port_wait(struct ofport_dpif *);
a5610457 554static int set_cfm(struct ofport *, const struct cfm_settings *);
8b36f51e 555static void ofport_clear_priorities(struct ofport_dpif *);
abe529af 556
7ee20df1
BP
557struct dpif_completion {
558 struct list list_node;
559 struct ofoperation *op;
560};
561
54a9cbc9
BP
562/* Extra information about a classifier table.
563 * Currently used just for optimized flow revalidation. */
564struct table_dpif {
565 /* If either of these is nonnull, then this table has a form that allows
566 * flows to be tagged to avoid revalidating most flows for the most common
567 * kinds of flow table changes. */
568 struct cls_table *catchall_table; /* Table that wildcards all fields. */
569 struct cls_table *other_table; /* Table with any other wildcard set. */
570 uint32_t basis; /* Keeps each table's tags separate. */
571};
572
3c4a309c
BP
573/* Reasons that we might need to revalidate every facet, and corresponding
574 * coverage counters.
575 *
576 * A value of 0 means that there is no need to revalidate.
577 *
578 * It would be nice to have some cleaner way to integrate with coverage
579 * counters, but with only a few reasons I guess this is good enough for
580 * now. */
581enum revalidate_reason {
582 REV_RECONFIGURE = 1, /* Switch configuration changed. */
583 REV_STP, /* Spanning tree protocol port status change. */
584 REV_PORT_TOGGLED, /* Port enabled or disabled by CFM, LACP, ...*/
585 REV_FLOW_TABLE, /* Flow table changed. */
586 REV_INCONSISTENCY /* Facet self-check failed. */
587};
588COVERAGE_DEFINE(rev_reconfigure);
589COVERAGE_DEFINE(rev_stp);
590COVERAGE_DEFINE(rev_port_toggled);
591COVERAGE_DEFINE(rev_flow_table);
592COVERAGE_DEFINE(rev_inconsistency);
593
abe529af 594struct ofproto_dpif {
b44a10b7 595 struct hmap_node all_ofproto_dpifs_node; /* In 'all_ofproto_dpifs'. */
abe529af
BP
596 struct ofproto up;
597 struct dpif *dpif;
598 int max_ports;
599
c57b2226
BP
600 /* Special OpenFlow rules. */
601 struct rule_dpif *miss_rule; /* Sends flow table misses to controller. */
602 struct rule_dpif *no_packet_in_rule; /* Drops flow table misses. */
603
6c1491fb
BP
604 /* Statistics. */
605 uint64_t n_matches;
606
abe529af
BP
607 /* Bridging. */
608 struct netflow *netflow;
bae473fe 609 struct dpif_sflow *sflow;
abe529af
BP
610 struct hmap bundles; /* Contains "struct ofbundle"s. */
611 struct mac_learning *ml;
612 struct ofmirror *mirrors[MAX_MIRRORS];
ccb7c863 613 bool has_mirrors;
abe529af
BP
614 bool has_bonded_bundles;
615
616 /* Expiration. */
617 struct timer next_expiration;
618
619 /* Facets. */
620 struct hmap facets;
b0f7b9b5 621 struct hmap subfacets;
9d6ac44e 622 struct governor *governor;
54a9cbc9
BP
623
624 /* Revalidation. */
625 struct table_dpif tables[N_TABLES];
3c4a309c 626 enum revalidate_reason need_revalidate;
abe529af 627 struct tag_set revalidate_set;
7ee20df1
BP
628
629 /* Support for debugging async flow mods. */
630 struct list completions;
daff3353
EJ
631
632 bool has_bundle_action; /* True when the first bundle action appears. */
6527c598
PS
633 struct netdev_stats stats; /* To account packets generated and consumed in
634 * userspace. */
21f7563c
JP
635
636 /* Spanning tree. */
637 struct stp *stp;
638 long long int stp_last_tick;
52a90c29
BP
639
640 /* VLAN splinters. */
641 struct hmap realdev_vid_map; /* (realdev,vid) -> vlandev. */
642 struct hmap vlandev_map; /* vlandev -> (realdev,vid). */
abe529af
BP
643};
644
7ee20df1
BP
645/* Defer flow mod completion until "ovs-appctl ofproto/unclog"? (Useful only
646 * for debugging the asynchronous flow_mod implementation.) */
647static bool clogged;
648
b44a10b7
BP
649/* All existing ofproto_dpif instances, indexed by ->up.name. */
650static struct hmap all_ofproto_dpifs = HMAP_INITIALIZER(&all_ofproto_dpifs);
651
abe529af
BP
652static void ofproto_dpif_unixctl_init(void);
653
654static struct ofproto_dpif *
655ofproto_dpif_cast(const struct ofproto *ofproto)
656{
657 assert(ofproto->ofproto_class == &ofproto_dpif_class);
658 return CONTAINER_OF(ofproto, struct ofproto_dpif, up);
659}
660
4acbc98d 661static struct ofport_dpif *get_ofp_port(const struct ofproto_dpif *,
abe529af 662 uint16_t ofp_port);
4acbc98d 663static struct ofport_dpif *get_odp_port(const struct ofproto_dpif *,
abe529af 664 uint32_t odp_port);
6a6455e5
EJ
665static void ofproto_trace(struct ofproto_dpif *, const struct flow *,
666 const struct ofpbuf *, ovs_be16 initial_tci,
667 struct ds *);
abe529af
BP
668
669/* Packet processing. */
670static void update_learning_table(struct ofproto_dpif *,
671 const struct flow *, int vlan,
672 struct ofbundle *);
501f8d1f
BP
673/* Upcalls. */
674#define FLOW_MISS_MAX_BATCH 50
9b16c439 675static int handle_upcalls(struct ofproto_dpif *, unsigned int max_batch);
abe529af
BP
676
677/* Flow expiration. */
678static int expire(struct ofproto_dpif *);
679
6fca1ffb
BP
680/* NetFlow. */
681static void send_netflow_active_timeouts(struct ofproto_dpif *);
682
abe529af 683/* Utilities. */
52a90c29 684static int send_packet(const struct ofport_dpif *, struct ofpbuf *packet);
6a7d1a39
BP
685static size_t compose_sflow_action(const struct ofproto_dpif *,
686 struct ofpbuf *odp_actions,
687 const struct flow *, uint32_t odp_port);
c06bba01
JP
688static void add_mirror_actions(struct action_xlate_ctx *ctx,
689 const struct flow *flow);
abe529af
BP
690/* Global variables. */
691static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
692\f
693/* Factory functions. */
694
695static void
696enumerate_types(struct sset *types)
697{
698 dp_enumerate_types(types);
699}
700
701static int
702enumerate_names(const char *type, struct sset *names)
703{
704 return dp_enumerate_names(type, names);
705}
706
707static int
708del(const char *type, const char *name)
709{
710 struct dpif *dpif;
711 int error;
712
713 error = dpif_open(name, type, &dpif);
714 if (!error) {
715 error = dpif_delete(dpif);
716 dpif_close(dpif);
717 }
718 return error;
719}
720\f
721/* Basic life-cycle. */
722
c57b2226
BP
723static int add_internal_flows(struct ofproto_dpif *);
724
abe529af
BP
725static struct ofproto *
726alloc(void)
727{
728 struct ofproto_dpif *ofproto = xmalloc(sizeof *ofproto);
729 return &ofproto->up;
730}
731
732static void
733dealloc(struct ofproto *ofproto_)
734{
735 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
736 free(ofproto);
737}
738
739static int
0f5f95a9 740construct(struct ofproto *ofproto_)
abe529af
BP
741{
742 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
743 const char *name = ofproto->up.name;
744 int error;
745 int i;
746
747 error = dpif_create_and_open(name, ofproto->up.type, &ofproto->dpif);
748 if (error) {
749 VLOG_ERR("failed to open datapath %s: %s", name, strerror(error));
750 return error;
751 }
752
753 ofproto->max_ports = dpif_get_max_ports(ofproto->dpif);
6c1491fb 754 ofproto->n_matches = 0;
abe529af 755
be8194bb
JG
756 dpif_flow_flush(ofproto->dpif);
757 dpif_recv_purge(ofproto->dpif);
758
a12b3ead 759 error = dpif_recv_set(ofproto->dpif, true);
abe529af
BP
760 if (error) {
761 VLOG_ERR("failed to listen on datapath %s: %s", name, strerror(error));
762 dpif_close(ofproto->dpif);
763 return error;
764 }
abe529af
BP
765
766 ofproto->netflow = NULL;
767 ofproto->sflow = NULL;
21f7563c 768 ofproto->stp = NULL;
abe529af 769 hmap_init(&ofproto->bundles);
e764773c 770 ofproto->ml = mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME);
abe529af
BP
771 for (i = 0; i < MAX_MIRRORS; i++) {
772 ofproto->mirrors[i] = NULL;
773 }
774 ofproto->has_bonded_bundles = false;
775
776 timer_set_duration(&ofproto->next_expiration, 1000);
777
778 hmap_init(&ofproto->facets);
b0f7b9b5 779 hmap_init(&ofproto->subfacets);
9d6ac44e 780 ofproto->governor = NULL;
54a9cbc9
BP
781
782 for (i = 0; i < N_TABLES; i++) {
783 struct table_dpif *table = &ofproto->tables[i];
784
785 table->catchall_table = NULL;
786 table->other_table = NULL;
787 table->basis = random_uint32();
788 }
3c4a309c 789 ofproto->need_revalidate = 0;
abe529af
BP
790 tag_set_init(&ofproto->revalidate_set);
791
7ee20df1
BP
792 list_init(&ofproto->completions);
793
abe529af
BP
794 ofproto_dpif_unixctl_init();
795
ccb7c863 796 ofproto->has_mirrors = false;
daff3353
EJ
797 ofproto->has_bundle_action = false;
798
52a90c29
BP
799 hmap_init(&ofproto->vlandev_map);
800 hmap_init(&ofproto->realdev_vid_map);
801
b44a10b7
BP
802 hmap_insert(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node,
803 hash_string(ofproto->up.name, 0));
6527c598 804 memset(&ofproto->stats, 0, sizeof ofproto->stats);
0f5f95a9
BP
805
806 ofproto_init_tables(ofproto_, N_TABLES);
c57b2226
BP
807 error = add_internal_flows(ofproto);
808 ofproto->up.tables[TBL_INTERNAL].flags = OFTABLE_HIDDEN | OFTABLE_READONLY;
809
810 return error;
811}
812
813static int
814add_internal_flow(struct ofproto_dpif *ofproto, int id,
f25d0cf3 815 const struct ofpbuf *ofpacts, struct rule_dpif **rulep)
c57b2226
BP
816{
817 struct ofputil_flow_mod fm;
818 int error;
819
820 cls_rule_init_catchall(&fm.cr, 0);
821 cls_rule_set_reg(&fm.cr, 0, id);
623e1caf 822 fm.new_cookie = htonll(0);
c57b2226
BP
823 fm.cookie = htonll(0);
824 fm.cookie_mask = htonll(0);
825 fm.table_id = TBL_INTERNAL;
826 fm.command = OFPFC_ADD;
827 fm.idle_timeout = 0;
828 fm.hard_timeout = 0;
829 fm.buffer_id = 0;
830 fm.out_port = 0;
831 fm.flags = 0;
f25d0cf3
BP
832 fm.ofpacts = ofpacts->data;
833 fm.ofpacts_len = ofpacts->size;
c57b2226
BP
834
835 error = ofproto_flow_mod(&ofproto->up, &fm);
836 if (error) {
837 VLOG_ERR_RL(&rl, "failed to add internal flow %d (%s)",
838 id, ofperr_to_string(error));
839 return error;
840 }
841
842 *rulep = rule_dpif_lookup__(ofproto, &fm.cr.flow, TBL_INTERNAL);
843 assert(*rulep != NULL);
0f5f95a9 844
abe529af
BP
845 return 0;
846}
847
c57b2226
BP
848static int
849add_internal_flows(struct ofproto_dpif *ofproto)
850{
f25d0cf3
BP
851 struct ofpact_controller *controller;
852 uint64_t ofpacts_stub[128 / 8];
853 struct ofpbuf ofpacts;
c57b2226
BP
854 int error;
855 int id;
856
f25d0cf3 857 ofpbuf_use_stack(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
c57b2226
BP
858 id = 1;
859
f25d0cf3
BP
860 controller = ofpact_put_CONTROLLER(&ofpacts);
861 controller->max_len = UINT16_MAX;
862 controller->controller_id = 0;
863 controller->reason = OFPR_NO_MATCH;
864 ofpact_pad(&ofpacts);
865
866 error = add_internal_flow(ofproto, id++, &ofpacts, &ofproto->miss_rule);
c57b2226
BP
867 if (error) {
868 return error;
869 }
870
f25d0cf3
BP
871 ofpbuf_clear(&ofpacts);
872 error = add_internal_flow(ofproto, id++, &ofpacts,
c57b2226
BP
873 &ofproto->no_packet_in_rule);
874 return error;
875}
876
7ee20df1
BP
877static void
878complete_operations(struct ofproto_dpif *ofproto)
879{
880 struct dpif_completion *c, *next;
881
882 LIST_FOR_EACH_SAFE (c, next, list_node, &ofproto->completions) {
883 ofoperation_complete(c->op, 0);
884 list_remove(&c->list_node);
885 free(c);
886 }
887}
888
abe529af
BP
889static void
890destruct(struct ofproto *ofproto_)
891{
892 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
7ee20df1 893 struct rule_dpif *rule, *next_rule;
d0918789 894 struct oftable *table;
abe529af
BP
895 int i;
896
b44a10b7 897 hmap_remove(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node);
7ee20df1
BP
898 complete_operations(ofproto);
899
0697b5c3
BP
900 OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) {
901 struct cls_cursor cursor;
902
d0918789 903 cls_cursor_init(&cursor, &table->cls, NULL);
0697b5c3
BP
904 CLS_CURSOR_FOR_EACH_SAFE (rule, next_rule, up.cr, &cursor) {
905 ofproto_rule_destroy(&rule->up);
906 }
7ee20df1
BP
907 }
908
abe529af
BP
909 for (i = 0; i < MAX_MIRRORS; i++) {
910 mirror_destroy(ofproto->mirrors[i]);
911 }
912
913 netflow_destroy(ofproto->netflow);
bae473fe 914 dpif_sflow_destroy(ofproto->sflow);
abe529af
BP
915 hmap_destroy(&ofproto->bundles);
916 mac_learning_destroy(ofproto->ml);
917
918 hmap_destroy(&ofproto->facets);
b0f7b9b5 919 hmap_destroy(&ofproto->subfacets);
9d6ac44e 920 governor_destroy(ofproto->governor);
abe529af 921
52a90c29
BP
922 hmap_destroy(&ofproto->vlandev_map);
923 hmap_destroy(&ofproto->realdev_vid_map);
924
abe529af
BP
925 dpif_close(ofproto->dpif);
926}
927
928static int
5fcc0d00 929run_fast(struct ofproto *ofproto_)
abe529af
BP
930{
931 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
0aa66d6e 932 struct ofport_dpif *ofport;
9b16c439 933 unsigned int work;
abe529af 934
0aa66d6e
EJ
935 HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
936 port_run_fast(ofport);
937 }
938
9b16c439
BP
939 /* Handle one or more batches of upcalls, until there's nothing left to do
940 * or until we do a fixed total amount of work.
941 *
942 * We do work in batches because it can be much cheaper to set up a number
943 * of flows and fire off their patches all at once. We do multiple batches
944 * because in some cases handling a packet can cause another packet to be
945 * queued almost immediately as part of the return flow. Both
946 * optimizations can make major improvements on some benchmarks and
947 * presumably for real traffic as well. */
948 work = 0;
949 while (work < FLOW_MISS_MAX_BATCH) {
950 int retval = handle_upcalls(ofproto, FLOW_MISS_MAX_BATCH - work);
5fcc0d00 951 if (retval <= 0) {
9b16c439 952 return -retval;
501f8d1f 953 }
5fcc0d00
BP
954 work += retval;
955 }
956 return 0;
957}
958
959static int
960run(struct ofproto *ofproto_)
961{
962 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
963 struct ofport_dpif *ofport;
964 struct ofbundle *bundle;
965 int error;
966
967 if (!clogged) {
968 complete_operations(ofproto);
969 }
970 dpif_run(ofproto->dpif);
971
972 error = run_fast(ofproto_);
973 if (error) {
974 return error;
abe529af
BP
975 }
976
977 if (timer_expired(&ofproto->next_expiration)) {
978 int delay = expire(ofproto);
979 timer_set_duration(&ofproto->next_expiration, delay);
980 }
981
982 if (ofproto->netflow) {
6fca1ffb
BP
983 if (netflow_run(ofproto->netflow)) {
984 send_netflow_active_timeouts(ofproto);
985 }
abe529af
BP
986 }
987 if (ofproto->sflow) {
bae473fe 988 dpif_sflow_run(ofproto->sflow);
abe529af
BP
989 }
990
991 HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
992 port_run(ofport);
993 }
994 HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
995 bundle_run(bundle);
996 }
997
21f7563c 998 stp_run(ofproto);
1c313b88
BP
999 mac_learning_run(ofproto->ml, &ofproto->revalidate_set);
1000
abe529af
BP
1001 /* Now revalidate if there's anything to do. */
1002 if (ofproto->need_revalidate
1003 || !tag_set_is_empty(&ofproto->revalidate_set)) {
1004 struct tag_set revalidate_set = ofproto->revalidate_set;
1005 bool revalidate_all = ofproto->need_revalidate;
c57b2226 1006 struct facet *facet;
abe529af 1007
3c4a309c
BP
1008 switch (ofproto->need_revalidate) {
1009 case REV_RECONFIGURE: COVERAGE_INC(rev_reconfigure); break;
1010 case REV_STP: COVERAGE_INC(rev_stp); break;
1011 case REV_PORT_TOGGLED: COVERAGE_INC(rev_port_toggled); break;
1012 case REV_FLOW_TABLE: COVERAGE_INC(rev_flow_table); break;
1013 case REV_INCONSISTENCY: COVERAGE_INC(rev_inconsistency); break;
1014 }
1015
abe529af
BP
1016 /* Clear the revalidation flags. */
1017 tag_set_init(&ofproto->revalidate_set);
3c4a309c 1018 ofproto->need_revalidate = 0;
abe529af 1019
c57b2226 1020 HMAP_FOR_EACH (facet, hmap_node, &ofproto->facets) {
abe529af
BP
1021 if (revalidate_all
1022 || tag_set_intersects(&revalidate_set, facet->tags)) {
15baa734 1023 facet_revalidate(facet);
abe529af
BP
1024 }
1025 }
1026 }
1027
6814e51f
BP
1028 /* Check the consistency of a random facet, to aid debugging. */
1029 if (!hmap_is_empty(&ofproto->facets) && !ofproto->need_revalidate) {
1030 struct facet *facet;
1031
1032 facet = CONTAINER_OF(hmap_random_node(&ofproto->facets),
1033 struct facet, hmap_node);
1034 if (!tag_set_intersects(&ofproto->revalidate_set, facet->tags)) {
1035 if (!facet_check_consistency(facet)) {
3c4a309c 1036 ofproto->need_revalidate = REV_INCONSISTENCY;
6814e51f
BP
1037 }
1038 }
1039 }
1040
9d6ac44e
BP
1041 if (ofproto->governor) {
1042 size_t n_subfacets;
1043
1044 governor_run(ofproto->governor);
1045
1046 /* If the governor has shrunk to its minimum size and the number of
1047 * subfacets has dwindled, then drop the governor entirely.
1048 *
1049 * For hysteresis, the number of subfacets to drop the governor is
1050 * smaller than the number needed to trigger its creation. */
1051 n_subfacets = hmap_count(&ofproto->subfacets);
1052 if (n_subfacets * 4 < ofproto->up.flow_eviction_threshold
1053 && governor_is_idle(ofproto->governor)) {
1054 governor_destroy(ofproto->governor);
1055 ofproto->governor = NULL;
1056 }
1057 }
1058
abe529af
BP
1059 return 0;
1060}
1061
1062static void
1063wait(struct ofproto *ofproto_)
1064{
1065 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1066 struct ofport_dpif *ofport;
1067 struct ofbundle *bundle;
1068
7ee20df1
BP
1069 if (!clogged && !list_is_empty(&ofproto->completions)) {
1070 poll_immediate_wake();
1071 }
1072
abe529af
BP
1073 dpif_wait(ofproto->dpif);
1074 dpif_recv_wait(ofproto->dpif);
1075 if (ofproto->sflow) {
bae473fe 1076 dpif_sflow_wait(ofproto->sflow);
abe529af
BP
1077 }
1078 if (!tag_set_is_empty(&ofproto->revalidate_set)) {
1079 poll_immediate_wake();
1080 }
1081 HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
1082 port_wait(ofport);
1083 }
1084 HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
1085 bundle_wait(bundle);
1086 }
6fca1ffb
BP
1087 if (ofproto->netflow) {
1088 netflow_wait(ofproto->netflow);
1089 }
1c313b88 1090 mac_learning_wait(ofproto->ml);
21f7563c 1091 stp_wait(ofproto);
abe529af
BP
1092 if (ofproto->need_revalidate) {
1093 /* Shouldn't happen, but if it does just go around again. */
1094 VLOG_DBG_RL(&rl, "need revalidate in ofproto_wait_cb()");
1095 poll_immediate_wake();
1096 } else {
1097 timer_wait(&ofproto->next_expiration);
1098 }
9d6ac44e
BP
1099 if (ofproto->governor) {
1100 governor_wait(ofproto->governor);
1101 }
abe529af
BP
1102}
1103
0d085684
BP
1104static void
1105get_memory_usage(const struct ofproto *ofproto_, struct simap *usage)
1106{
1107 const struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1108
1109 simap_increase(usage, "facets", hmap_count(&ofproto->facets));
1110 simap_increase(usage, "subfacets", hmap_count(&ofproto->subfacets));
1111}
1112
abe529af
BP
1113static void
1114flush(struct ofproto *ofproto_)
1115{
1116 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1117 struct facet *facet, *next_facet;
1118
1119 HMAP_FOR_EACH_SAFE (facet, next_facet, hmap_node, &ofproto->facets) {
1120 /* Mark the facet as not installed so that facet_remove() doesn't
1121 * bother trying to uninstall it. There is no point in uninstalling it
1122 * individually since we are about to blow away all the facets with
1123 * dpif_flow_flush(). */
b0f7b9b5
BP
1124 struct subfacet *subfacet;
1125
1126 LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) {
6a7e895f 1127 subfacet->path = SF_NOT_INSTALLED;
b0f7b9b5
BP
1128 subfacet->dp_packet_count = 0;
1129 subfacet->dp_byte_count = 0;
1130 }
15baa734 1131 facet_remove(facet);
abe529af
BP
1132 }
1133 dpif_flow_flush(ofproto->dpif);
1134}
1135
6c1491fb
BP
1136static void
1137get_features(struct ofproto *ofproto_ OVS_UNUSED,
9e1fd49b 1138 bool *arp_match_ip, enum ofputil_action_bitmap *actions)
6c1491fb
BP
1139{
1140 *arp_match_ip = true;
9e1fd49b
BP
1141 *actions = (OFPUTIL_A_OUTPUT |
1142 OFPUTIL_A_SET_VLAN_VID |
1143 OFPUTIL_A_SET_VLAN_PCP |
1144 OFPUTIL_A_STRIP_VLAN |
1145 OFPUTIL_A_SET_DL_SRC |
1146 OFPUTIL_A_SET_DL_DST |
1147 OFPUTIL_A_SET_NW_SRC |
1148 OFPUTIL_A_SET_NW_DST |
1149 OFPUTIL_A_SET_NW_TOS |
1150 OFPUTIL_A_SET_TP_SRC |
1151 OFPUTIL_A_SET_TP_DST |
1152 OFPUTIL_A_ENQUEUE);
6c1491fb
BP
1153}
1154
1155static void
e2b9ac44 1156get_tables(struct ofproto *ofproto_, struct ofp10_table_stats *ots)
6c1491fb
BP
1157{
1158 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
a8d9304d 1159 struct dpif_dp_stats s;
6c1491fb
BP
1160
1161 strcpy(ots->name, "classifier");
1162
1163 dpif_get_dp_stats(ofproto->dpif, &s);
1164 put_32aligned_be64(&ots->lookup_count, htonll(s.n_hit + s.n_missed));
1165 put_32aligned_be64(&ots->matched_count,
1166 htonll(s.n_hit + ofproto->n_matches));
1167}
1168
abe529af
BP
1169static struct ofport *
1170port_alloc(void)
1171{
1172 struct ofport_dpif *port = xmalloc(sizeof *port);
1173 return &port->up;
1174}
1175
1176static void
1177port_dealloc(struct ofport *port_)
1178{
1179 struct ofport_dpif *port = ofport_dpif_cast(port_);
1180 free(port);
1181}
1182
1183static int
1184port_construct(struct ofport *port_)
1185{
1186 struct ofport_dpif *port = ofport_dpif_cast(port_);
1187 struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
1188
3c4a309c 1189 ofproto->need_revalidate = REV_RECONFIGURE;
abe529af
BP
1190 port->odp_port = ofp_port_to_odp_port(port->up.ofp_port);
1191 port->bundle = NULL;
1192 port->cfm = NULL;
1193 port->tag = tag_create_random();
d5ffa7f2 1194 port->may_enable = true;
21f7563c
JP
1195 port->stp_port = NULL;
1196 port->stp_state = STP_DISABLED;
8b36f51e 1197 hmap_init(&port->priorities);
52a90c29
BP
1198 port->realdev_ofp_port = 0;
1199 port->vlandev_vid = 0;
3e5b3fdb 1200 port->carrier_seq = netdev_get_carrier_resets(port->up.netdev);
abe529af
BP
1201
1202 if (ofproto->sflow) {
392c7182 1203 dpif_sflow_add_port(ofproto->sflow, port_);
abe529af
BP
1204 }
1205
1206 return 0;
1207}
1208
1209static void
1210port_destruct(struct ofport *port_)
1211{
1212 struct ofport_dpif *port = ofport_dpif_cast(port_);
1213 struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
1214
3c4a309c 1215 ofproto->need_revalidate = REV_RECONFIGURE;
abe529af 1216 bundle_remove(port_);
a5610457 1217 set_cfm(port_, NULL);
abe529af 1218 if (ofproto->sflow) {
bae473fe 1219 dpif_sflow_del_port(ofproto->sflow, port->odp_port);
abe529af 1220 }
8b36f51e
EJ
1221
1222 ofport_clear_priorities(port);
1223 hmap_destroy(&port->priorities);
abe529af
BP
1224}
1225
1226static void
1227port_modified(struct ofport *port_)
1228{
1229 struct ofport_dpif *port = ofport_dpif_cast(port_);
1230
1231 if (port->bundle && port->bundle->bond) {
1232 bond_slave_set_netdev(port->bundle->bond, port, port->up.netdev);
1233 }
1234}
1235
1236static void
9e1fd49b 1237port_reconfigured(struct ofport *port_, enum ofputil_port_config old_config)
abe529af
BP
1238{
1239 struct ofport_dpif *port = ofport_dpif_cast(port_);
1240 struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
9e1fd49b 1241 enum ofputil_port_config changed = old_config ^ port->up.pp.config;
abe529af 1242
9e1fd49b 1243 if (changed & (OFPUTIL_PC_NO_RECV | OFPUTIL_PC_NO_RECV_STP |
c57b2226
BP
1244 OFPUTIL_PC_NO_FWD | OFPUTIL_PC_NO_FLOOD |
1245 OFPUTIL_PC_NO_PACKET_IN)) {
3c4a309c 1246 ofproto->need_revalidate = REV_RECONFIGURE;
7bde8dd8 1247
9e1fd49b 1248 if (changed & OFPUTIL_PC_NO_FLOOD && port->bundle) {
7bde8dd8
JP
1249 bundle_update(port->bundle);
1250 }
abe529af
BP
1251 }
1252}
1253
1254static int
1255set_sflow(struct ofproto *ofproto_,
1256 const struct ofproto_sflow_options *sflow_options)
1257{
1258 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
bae473fe 1259 struct dpif_sflow *ds = ofproto->sflow;
6ff686f2 1260
abe529af 1261 if (sflow_options) {
bae473fe 1262 if (!ds) {
abe529af
BP
1263 struct ofport_dpif *ofport;
1264
bae473fe 1265 ds = ofproto->sflow = dpif_sflow_create(ofproto->dpif);
abe529af 1266 HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
392c7182 1267 dpif_sflow_add_port(ds, &ofport->up);
abe529af 1268 }
3c4a309c 1269 ofproto->need_revalidate = REV_RECONFIGURE;
abe529af 1270 }
bae473fe 1271 dpif_sflow_set_options(ds, sflow_options);
abe529af 1272 } else {
6ff686f2
PS
1273 if (ds) {
1274 dpif_sflow_destroy(ds);
3c4a309c 1275 ofproto->need_revalidate = REV_RECONFIGURE;
6ff686f2
PS
1276 ofproto->sflow = NULL;
1277 }
abe529af
BP
1278 }
1279 return 0;
1280}
1281
1282static int
a5610457 1283set_cfm(struct ofport *ofport_, const struct cfm_settings *s)
abe529af
BP
1284{
1285 struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1286 int error;
1287
a5610457 1288 if (!s) {
abe529af
BP
1289 error = 0;
1290 } else {
1291 if (!ofport->cfm) {
8c977421
EJ
1292 struct ofproto_dpif *ofproto;
1293
1294 ofproto = ofproto_dpif_cast(ofport->up.ofproto);
3c4a309c 1295 ofproto->need_revalidate = REV_RECONFIGURE;
6f629657 1296 ofport->cfm = cfm_create(netdev_get_name(ofport->up.netdev));
abe529af
BP
1297 }
1298
a5610457 1299 if (cfm_configure(ofport->cfm, s)) {
abe529af
BP
1300 return 0;
1301 }
1302
1303 error = EINVAL;
1304 }
1305 cfm_destroy(ofport->cfm);
1306 ofport->cfm = NULL;
1307 return error;
1308}
1309
1310static int
a5610457 1311get_cfm_fault(const struct ofport *ofport_)
abe529af
BP
1312{
1313 struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
a5610457
EJ
1314
1315 return ofport->cfm ? cfm_get_fault(ofport->cfm) : -1;
abe529af 1316}
1de11730 1317
1c0333b6
EJ
1318static int
1319get_cfm_opup(const struct ofport *ofport_)
1320{
1321 struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1322
1323 return ofport->cfm ? cfm_get_opup(ofport->cfm) : -1;
1324}
1325
1de11730
EJ
1326static int
1327get_cfm_remote_mpids(const struct ofport *ofport_, const uint64_t **rmps,
1328 size_t *n_rmps)
1329{
1330 struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1331
1332 if (ofport->cfm) {
1333 cfm_get_remote_mpids(ofport->cfm, rmps, n_rmps);
1334 return 0;
1335 } else {
1336 return -1;
1337 }
1338}
3967a833
MM
1339
1340static int
1341get_cfm_health(const struct ofport *ofport_)
1342{
1343 struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1344
1345 return ofport->cfm ? cfm_get_health(ofport->cfm) : -1;
1346}
abe529af 1347\f
21f7563c
JP
1348/* Spanning Tree. */
1349
1350static void
1351send_bpdu_cb(struct ofpbuf *pkt, int port_num, void *ofproto_)
1352{
1353 struct ofproto_dpif *ofproto = ofproto_;
1354 struct stp_port *sp = stp_get_port(ofproto->stp, port_num);
1355 struct ofport_dpif *ofport;
1356
1357 ofport = stp_port_get_aux(sp);
1358 if (!ofport) {
1359 VLOG_WARN_RL(&rl, "%s: cannot send BPDU on unknown port %d",
1360 ofproto->up.name, port_num);
1361 } else {
1362 struct eth_header *eth = pkt->l2;
1363
1364 netdev_get_etheraddr(ofport->up.netdev, eth->eth_src);
1365 if (eth_addr_is_zero(eth->eth_src)) {
1366 VLOG_WARN_RL(&rl, "%s: cannot send BPDU on port %d "
1367 "with unknown MAC", ofproto->up.name, port_num);
1368 } else {
97d6520b 1369 send_packet(ofport, pkt);
21f7563c
JP
1370 }
1371 }
1372 ofpbuf_delete(pkt);
1373}
1374
1375/* Configures STP on 'ofproto_' using the settings defined in 's'. */
1376static int
1377set_stp(struct ofproto *ofproto_, const struct ofproto_stp_settings *s)
1378{
1379 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1380
1381 /* Only revalidate flows if the configuration changed. */
1382 if (!s != !ofproto->stp) {
3c4a309c 1383 ofproto->need_revalidate = REV_RECONFIGURE;
21f7563c
JP
1384 }
1385
1386 if (s) {
1387 if (!ofproto->stp) {
1388 ofproto->stp = stp_create(ofproto_->name, s->system_id,
1389 send_bpdu_cb, ofproto);
1390 ofproto->stp_last_tick = time_msec();
1391 }
1392
1393 stp_set_bridge_id(ofproto->stp, s->system_id);
1394 stp_set_bridge_priority(ofproto->stp, s->priority);
1395 stp_set_hello_time(ofproto->stp, s->hello_time);
1396 stp_set_max_age(ofproto->stp, s->max_age);
1397 stp_set_forward_delay(ofproto->stp, s->fwd_delay);
1398 } else {
851bf71d
EJ
1399 struct ofport *ofport;
1400
1401 HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) {
1402 set_stp_port(ofport, NULL);
1403 }
1404
21f7563c
JP
1405 stp_destroy(ofproto->stp);
1406 ofproto->stp = NULL;
1407 }
1408
1409 return 0;
1410}
1411
1412static int
1413get_stp_status(struct ofproto *ofproto_, struct ofproto_stp_status *s)
1414{
1415 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1416
1417 if (ofproto->stp) {
1418 s->enabled = true;
1419 s->bridge_id = stp_get_bridge_id(ofproto->stp);
1420 s->designated_root = stp_get_designated_root(ofproto->stp);
1421 s->root_path_cost = stp_get_root_path_cost(ofproto->stp);
1422 } else {
1423 s->enabled = false;
1424 }
1425
1426 return 0;
1427}
1428
1429static void
1430update_stp_port_state(struct ofport_dpif *ofport)
1431{
1432 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
1433 enum stp_state state;
1434
1435 /* Figure out new state. */
1436 state = ofport->stp_port ? stp_port_get_state(ofport->stp_port)
1437 : STP_DISABLED;
1438
1439 /* Update state. */
1440 if (ofport->stp_state != state) {
9e1fd49b 1441 enum ofputil_port_state of_state;
21f7563c
JP
1442 bool fwd_change;
1443
1444 VLOG_DBG_RL(&rl, "port %s: STP state changed from %s to %s",
1445 netdev_get_name(ofport->up.netdev),
1446 stp_state_name(ofport->stp_state),
1447 stp_state_name(state));
1448 if (stp_learn_in_state(ofport->stp_state)
1449 != stp_learn_in_state(state)) {
1450 /* xxx Learning action flows should also be flushed. */
d0040604 1451 mac_learning_flush(ofproto->ml, &ofproto->revalidate_set);
21f7563c
JP
1452 }
1453 fwd_change = stp_forward_in_state(ofport->stp_state)
1454 != stp_forward_in_state(state);
1455
3c4a309c 1456 ofproto->need_revalidate = REV_STP;
21f7563c
JP
1457 ofport->stp_state = state;
1458 ofport->stp_state_entered = time_msec();
1459
b308140a 1460 if (fwd_change && ofport->bundle) {
21f7563c
JP
1461 bundle_update(ofport->bundle);
1462 }
1463
1464 /* Update the STP state bits in the OpenFlow port description. */
9e1fd49b
BP
1465 of_state = ofport->up.pp.state & ~OFPUTIL_PS_STP_MASK;
1466 of_state |= (state == STP_LISTENING ? OFPUTIL_PS_STP_LISTEN
1467 : state == STP_LEARNING ? OFPUTIL_PS_STP_LEARN
1468 : state == STP_FORWARDING ? OFPUTIL_PS_STP_FORWARD
1469 : state == STP_BLOCKING ? OFPUTIL_PS_STP_BLOCK
1470 : 0);
21f7563c
JP
1471 ofproto_port_set_state(&ofport->up, of_state);
1472 }
1473}
1474
1475/* Configures STP on 'ofport_' using the settings defined in 's'. The
1476 * caller is responsible for assigning STP port numbers and ensuring
1477 * there are no duplicates. */
1478static int
1479set_stp_port(struct ofport *ofport_,
1480 const struct ofproto_port_stp_settings *s)
1481{
1482 struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1483 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
1484 struct stp_port *sp = ofport->stp_port;
1485
1486 if (!s || !s->enable) {
1487 if (sp) {
1488 ofport->stp_port = NULL;
1489 stp_port_disable(sp);
ecd12731 1490 update_stp_port_state(ofport);
21f7563c
JP
1491 }
1492 return 0;
1493 } else if (sp && stp_port_no(sp) != s->port_num
1494 && ofport == stp_port_get_aux(sp)) {
1495 /* The port-id changed, so disable the old one if it's not
1496 * already in use by another port. */
1497 stp_port_disable(sp);
1498 }
1499
1500 sp = ofport->stp_port = stp_get_port(ofproto->stp, s->port_num);
1501 stp_port_enable(sp);
1502
1503 stp_port_set_aux(sp, ofport);
1504 stp_port_set_priority(sp, s->priority);
1505 stp_port_set_path_cost(sp, s->path_cost);
1506
1507 update_stp_port_state(ofport);
1508
1509 return 0;
1510}
1511
1512static int
1513get_stp_port_status(struct ofport *ofport_,
1514 struct ofproto_port_stp_status *s)
1515{
1516 struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1517 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
1518 struct stp_port *sp = ofport->stp_port;
1519
1520 if (!ofproto->stp || !sp) {
1521 s->enabled = false;
1522 return 0;
1523 }
1524
1525 s->enabled = true;
1526 s->port_id = stp_port_get_id(sp);
1527 s->state = stp_port_get_state(sp);
1528 s->sec_in_state = (time_msec() - ofport->stp_state_entered) / 1000;
1529 s->role = stp_port_get_role(sp);
80740385 1530 stp_port_get_counts(sp, &s->tx_count, &s->rx_count, &s->error_count);
21f7563c
JP
1531
1532 return 0;
1533}
1534
1535static void
1536stp_run(struct ofproto_dpif *ofproto)
1537{
1538 if (ofproto->stp) {
1539 long long int now = time_msec();
1540 long long int elapsed = now - ofproto->stp_last_tick;
1541 struct stp_port *sp;
1542
1543 if (elapsed > 0) {
1544 stp_tick(ofproto->stp, MIN(INT_MAX, elapsed));
1545 ofproto->stp_last_tick = now;
1546 }
1547 while (stp_get_changed_port(ofproto->stp, &sp)) {
1548 struct ofport_dpif *ofport = stp_port_get_aux(sp);
1549
1550 if (ofport) {
1551 update_stp_port_state(ofport);
1552 }
1553 }
6ae50723
EJ
1554
1555 if (stp_check_and_reset_fdb_flush(ofproto->stp)) {
1556 mac_learning_flush(ofproto->ml, &ofproto->revalidate_set);
1557 }
21f7563c
JP
1558 }
1559}
1560
1561static void
1562stp_wait(struct ofproto_dpif *ofproto)
1563{
1564 if (ofproto->stp) {
1565 poll_timer_wait(1000);
1566 }
1567}
1568
1569/* Returns true if STP should process 'flow'. */
1570static bool
1571stp_should_process_flow(const struct flow *flow)
1572{
1573 return eth_addr_equals(flow->dl_dst, eth_addr_stp);
1574}
1575
1576static void
1577stp_process_packet(const struct ofport_dpif *ofport,
1578 const struct ofpbuf *packet)
1579{
1580 struct ofpbuf payload = *packet;
1581 struct eth_header *eth = payload.data;
1582 struct stp_port *sp = ofport->stp_port;
1583
1584 /* Sink packets on ports that have STP disabled when the bridge has
1585 * STP enabled. */
1586 if (!sp || stp_port_get_state(sp) == STP_DISABLED) {
1587 return;
1588 }
1589
1590 /* Trim off padding on payload. */
c573540b
BP
1591 if (payload.size > ntohs(eth->eth_type) + ETH_HEADER_LEN) {
1592 payload.size = ntohs(eth->eth_type) + ETH_HEADER_LEN;
21f7563c
JP
1593 }
1594
1595 if (ofpbuf_try_pull(&payload, ETH_HEADER_LEN + LLC_HEADER_LEN)) {
1596 stp_received_bpdu(sp, payload.data, payload.size);
1597 }
1598}
1599\f
8b36f51e
EJ
1600static struct priority_to_dscp *
1601get_priority(const struct ofport_dpif *ofport, uint32_t priority)
1602{
1603 struct priority_to_dscp *pdscp;
1604 uint32_t hash;
1605
1606 hash = hash_int(priority, 0);
1607 HMAP_FOR_EACH_IN_BUCKET (pdscp, hmap_node, hash, &ofport->priorities) {
1608 if (pdscp->priority == priority) {
1609 return pdscp;
1610 }
1611 }
1612 return NULL;
1613}
1614
1615static void
1616ofport_clear_priorities(struct ofport_dpif *ofport)
1617{
1618 struct priority_to_dscp *pdscp, *next;
1619
1620 HMAP_FOR_EACH_SAFE (pdscp, next, hmap_node, &ofport->priorities) {
1621 hmap_remove(&ofport->priorities, &pdscp->hmap_node);
1622 free(pdscp);
1623 }
1624}
1625
1626static int
1627set_queues(struct ofport *ofport_,
1628 const struct ofproto_port_queue *qdscp_list,
1629 size_t n_qdscp)
1630{
1631 struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1632 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
1633 struct hmap new = HMAP_INITIALIZER(&new);
1634 size_t i;
1635
1636 for (i = 0; i < n_qdscp; i++) {
1637 struct priority_to_dscp *pdscp;
1638 uint32_t priority;
1639 uint8_t dscp;
1640
1641 dscp = (qdscp_list[i].dscp << 2) & IP_DSCP_MASK;
1642 if (dpif_queue_to_priority(ofproto->dpif, qdscp_list[i].queue,
1643 &priority)) {
1644 continue;
1645 }
1646
1647 pdscp = get_priority(ofport, priority);
1648 if (pdscp) {
1649 hmap_remove(&ofport->priorities, &pdscp->hmap_node);
1650 } else {
1651 pdscp = xmalloc(sizeof *pdscp);
1652 pdscp->priority = priority;
1653 pdscp->dscp = dscp;
3c4a309c 1654 ofproto->need_revalidate = REV_RECONFIGURE;
8b36f51e
EJ
1655 }
1656
1657 if (pdscp->dscp != dscp) {
1658 pdscp->dscp = dscp;
3c4a309c 1659 ofproto->need_revalidate = REV_RECONFIGURE;
8b36f51e
EJ
1660 }
1661
1662 hmap_insert(&new, &pdscp->hmap_node, hash_int(pdscp->priority, 0));
1663 }
1664
1665 if (!hmap_is_empty(&ofport->priorities)) {
1666 ofport_clear_priorities(ofport);
3c4a309c 1667 ofproto->need_revalidate = REV_RECONFIGURE;
8b36f51e
EJ
1668 }
1669
1670 hmap_swap(&new, &ofport->priorities);
1671 hmap_destroy(&new);
1672
1673 return 0;
1674}
1675\f
abe529af
BP
1676/* Bundles. */
1677
b44a10b7
BP
1678/* Expires all MAC learning entries associated with 'bundle' and forces its
1679 * ofproto to revalidate every flow.
1680 *
1681 * Normally MAC learning entries are removed only from the ofproto associated
1682 * with 'bundle', but if 'all_ofprotos' is true, then the MAC learning entries
1683 * are removed from every ofproto. When patch ports and SLB bonds are in use
1684 * and a VM migration happens and the gratuitous ARPs are somehow lost, this
1685 * avoids a MAC_ENTRY_IDLE_TIME delay before the migrated VM can communicate
1686 * with the host from which it migrated. */
abe529af 1687static void
b44a10b7 1688bundle_flush_macs(struct ofbundle *bundle, bool all_ofprotos)
abe529af
BP
1689{
1690 struct ofproto_dpif *ofproto = bundle->ofproto;
1691 struct mac_learning *ml = ofproto->ml;
1692 struct mac_entry *mac, *next_mac;
1693
3c4a309c 1694 ofproto->need_revalidate = REV_RECONFIGURE;
abe529af
BP
1695 LIST_FOR_EACH_SAFE (mac, next_mac, lru_node, &ml->lrus) {
1696 if (mac->port.p == bundle) {
b44a10b7
BP
1697 if (all_ofprotos) {
1698 struct ofproto_dpif *o;
1699
1700 HMAP_FOR_EACH (o, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
1701 if (o != ofproto) {
1702 struct mac_entry *e;
1703
1704 e = mac_learning_lookup(o->ml, mac->mac, mac->vlan,
1705 NULL);
1706 if (e) {
1707 tag_set_add(&o->revalidate_set, e->tag);
1708 mac_learning_expire(o->ml, e);
1709 }
1710 }
1711 }
1712 }
1713
abe529af
BP
1714 mac_learning_expire(ml, mac);
1715 }
1716 }
1717}
1718
1719static struct ofbundle *
1720bundle_lookup(const struct ofproto_dpif *ofproto, void *aux)
1721{
1722 struct ofbundle *bundle;
1723
1724 HMAP_FOR_EACH_IN_BUCKET (bundle, hmap_node, hash_pointer(aux, 0),
1725 &ofproto->bundles) {
1726 if (bundle->aux == aux) {
1727 return bundle;
1728 }
1729 }
1730 return NULL;
1731}
1732
1733/* Looks up each of the 'n_auxes' pointers in 'auxes' as bundles and adds the
1734 * ones that are found to 'bundles'. */
1735static void
1736bundle_lookup_multiple(struct ofproto_dpif *ofproto,
1737 void **auxes, size_t n_auxes,
1738 struct hmapx *bundles)
1739{
1740 size_t i;
1741
1742 hmapx_init(bundles);
1743 for (i = 0; i < n_auxes; i++) {
1744 struct ofbundle *bundle = bundle_lookup(ofproto, auxes[i]);
1745 if (bundle) {
1746 hmapx_add(bundles, bundle);
1747 }
1748 }
1749}
1750
7bde8dd8
JP
1751static void
1752bundle_update(struct ofbundle *bundle)
1753{
1754 struct ofport_dpif *port;
1755
1756 bundle->floodable = true;
1757 LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
9e1fd49b
BP
1758 if (port->up.pp.config & OFPUTIL_PC_NO_FLOOD
1759 || !stp_forward_in_state(port->stp_state)) {
7bde8dd8
JP
1760 bundle->floodable = false;
1761 break;
1762 }
1763 }
1764}
1765
abe529af
BP
1766static void
1767bundle_del_port(struct ofport_dpif *port)
1768{
1769 struct ofbundle *bundle = port->bundle;
1770
3c4a309c 1771 bundle->ofproto->need_revalidate = REV_RECONFIGURE;
6f77f4ae 1772
abe529af
BP
1773 list_remove(&port->bundle_node);
1774 port->bundle = NULL;
1775
1776 if (bundle->lacp) {
1777 lacp_slave_unregister(bundle->lacp, port);
1778 }
1779 if (bundle->bond) {
1780 bond_slave_unregister(bundle->bond, port);
1781 }
1782
7bde8dd8 1783 bundle_update(bundle);
abe529af
BP
1784}
1785
1786static bool
1787bundle_add_port(struct ofbundle *bundle, uint32_t ofp_port,
00794817
BP
1788 struct lacp_slave_settings *lacp,
1789 uint32_t bond_stable_id)
abe529af
BP
1790{
1791 struct ofport_dpif *port;
1792
1793 port = get_ofp_port(bundle->ofproto, ofp_port);
1794 if (!port) {
1795 return false;
1796 }
1797
1798 if (port->bundle != bundle) {
3c4a309c 1799 bundle->ofproto->need_revalidate = REV_RECONFIGURE;
abe529af
BP
1800 if (port->bundle) {
1801 bundle_del_port(port);
1802 }
1803
1804 port->bundle = bundle;
1805 list_push_back(&bundle->ports, &port->bundle_node);
9e1fd49b
BP
1806 if (port->up.pp.config & OFPUTIL_PC_NO_FLOOD
1807 || !stp_forward_in_state(port->stp_state)) {
abe529af
BP
1808 bundle->floodable = false;
1809 }
1810 }
1811 if (lacp) {
3c4a309c 1812 port->bundle->ofproto->need_revalidate = REV_RECONFIGURE;
abe529af
BP
1813 lacp_slave_register(bundle->lacp, port, lacp);
1814 }
1815
00794817
BP
1816 port->bond_stable_id = bond_stable_id;
1817
abe529af
BP
1818 return true;
1819}
1820
1821static void
1822bundle_destroy(struct ofbundle *bundle)
1823{
1824 struct ofproto_dpif *ofproto;
1825 struct ofport_dpif *port, *next_port;
1826 int i;
1827
1828 if (!bundle) {
1829 return;
1830 }
1831
1832 ofproto = bundle->ofproto;
1833 for (i = 0; i < MAX_MIRRORS; i++) {
1834 struct ofmirror *m = ofproto->mirrors[i];
1835 if (m) {
1836 if (m->out == bundle) {
1837 mirror_destroy(m);
1838 } else if (hmapx_find_and_delete(&m->srcs, bundle)
1839 || hmapx_find_and_delete(&m->dsts, bundle)) {
3c4a309c 1840 ofproto->need_revalidate = REV_RECONFIGURE;
abe529af
BP
1841 }
1842 }
1843 }
1844
1845 LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) {
1846 bundle_del_port(port);
1847 }
1848
b44a10b7 1849 bundle_flush_macs(bundle, true);
abe529af
BP
1850 hmap_remove(&ofproto->bundles, &bundle->hmap_node);
1851 free(bundle->name);
1852 free(bundle->trunks);
1853 lacp_destroy(bundle->lacp);
1854 bond_destroy(bundle->bond);
1855 free(bundle);
1856}
1857
1858static int
1859bundle_set(struct ofproto *ofproto_, void *aux,
1860 const struct ofproto_bundle_settings *s)
1861{
1862 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1863 bool need_flush = false;
abe529af
BP
1864 struct ofport_dpif *port;
1865 struct ofbundle *bundle;
ecac4ebf
BP
1866 unsigned long *trunks;
1867 int vlan;
abe529af
BP
1868 size_t i;
1869 bool ok;
1870
1871 if (!s) {
1872 bundle_destroy(bundle_lookup(ofproto, aux));
1873 return 0;
1874 }
1875
1876 assert(s->n_slaves == 1 || s->bond != NULL);
1877 assert((s->lacp != NULL) == (s->lacp_slaves != NULL));
1878
1879 bundle = bundle_lookup(ofproto, aux);
1880 if (!bundle) {
1881 bundle = xmalloc(sizeof *bundle);
1882
1883 bundle->ofproto = ofproto;
1884 hmap_insert(&ofproto->bundles, &bundle->hmap_node,
1885 hash_pointer(aux, 0));
1886 bundle->aux = aux;
1887 bundle->name = NULL;
1888
1889 list_init(&bundle->ports);
ecac4ebf 1890 bundle->vlan_mode = PORT_VLAN_TRUNK;
abe529af
BP
1891 bundle->vlan = -1;
1892 bundle->trunks = NULL;
5e9ceccd 1893 bundle->use_priority_tags = s->use_priority_tags;
abe529af
BP
1894 bundle->lacp = NULL;
1895 bundle->bond = NULL;
1896
1897 bundle->floodable = true;
1898
1899 bundle->src_mirrors = 0;
1900 bundle->dst_mirrors = 0;
1901 bundle->mirror_out = 0;
1902 }
1903
1904 if (!bundle->name || strcmp(s->name, bundle->name)) {
1905 free(bundle->name);
1906 bundle->name = xstrdup(s->name);
1907 }
1908
1909 /* LACP. */
1910 if (s->lacp) {
1911 if (!bundle->lacp) {
3c4a309c 1912 ofproto->need_revalidate = REV_RECONFIGURE;
abe529af
BP
1913 bundle->lacp = lacp_create();
1914 }
1915 lacp_configure(bundle->lacp, s->lacp);
1916 } else {
1917 lacp_destroy(bundle->lacp);
1918 bundle->lacp = NULL;
1919 }
1920
1921 /* Update set of ports. */
1922 ok = true;
1923 for (i = 0; i < s->n_slaves; i++) {
1924 if (!bundle_add_port(bundle, s->slaves[i],
00794817
BP
1925 s->lacp ? &s->lacp_slaves[i] : NULL,
1926 s->bond_stable_ids ? s->bond_stable_ids[i] : 0)) {
abe529af
BP
1927 ok = false;
1928 }
1929 }
1930 if (!ok || list_size(&bundle->ports) != s->n_slaves) {
1931 struct ofport_dpif *next_port;
1932
1933 LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) {
1934 for (i = 0; i < s->n_slaves; i++) {
56c769ab 1935 if (s->slaves[i] == port->up.ofp_port) {
abe529af
BP
1936 goto found;
1937 }
1938 }
1939
1940 bundle_del_port(port);
1941 found: ;
1942 }
1943 }
1944 assert(list_size(&bundle->ports) <= s->n_slaves);
1945
1946 if (list_is_empty(&bundle->ports)) {
1947 bundle_destroy(bundle);
1948 return EINVAL;
1949 }
1950
ecac4ebf 1951 /* Set VLAN tagging mode */
5e9ceccd
BP
1952 if (s->vlan_mode != bundle->vlan_mode
1953 || s->use_priority_tags != bundle->use_priority_tags) {
ecac4ebf 1954 bundle->vlan_mode = s->vlan_mode;
5e9ceccd 1955 bundle->use_priority_tags = s->use_priority_tags;
ecac4ebf
BP
1956 need_flush = true;
1957 }
1958
abe529af 1959 /* Set VLAN tag. */
ecac4ebf
BP
1960 vlan = (s->vlan_mode == PORT_VLAN_TRUNK ? -1
1961 : s->vlan >= 0 && s->vlan <= 4095 ? s->vlan
1962 : 0);
1963 if (vlan != bundle->vlan) {
1964 bundle->vlan = vlan;
abe529af
BP
1965 need_flush = true;
1966 }
1967
1968 /* Get trunked VLANs. */
ecac4ebf
BP
1969 switch (s->vlan_mode) {
1970 case PORT_VLAN_ACCESS:
1971 trunks = NULL;
1972 break;
1973
1974 case PORT_VLAN_TRUNK:
ebc56baa 1975 trunks = CONST_CAST(unsigned long *, s->trunks);
ecac4ebf
BP
1976 break;
1977
1978 case PORT_VLAN_NATIVE_UNTAGGED:
1979 case PORT_VLAN_NATIVE_TAGGED:
1980 if (vlan != 0 && (!s->trunks
1981 || !bitmap_is_set(s->trunks, vlan)
1982 || bitmap_is_set(s->trunks, 0))) {
1983 /* Force trunking the native VLAN and prohibit trunking VLAN 0. */
1984 if (s->trunks) {
1985 trunks = bitmap_clone(s->trunks, 4096);
1986 } else {
1987 trunks = bitmap_allocate1(4096);
1988 }
1989 bitmap_set1(trunks, vlan);
1990 bitmap_set0(trunks, 0);
1991 } else {
ebc56baa 1992 trunks = CONST_CAST(unsigned long *, s->trunks);
ecac4ebf
BP
1993 }
1994 break;
1995
1996 default:
1997 NOT_REACHED();
1998 }
abe529af
BP
1999 if (!vlan_bitmap_equal(trunks, bundle->trunks)) {
2000 free(bundle->trunks);
ecac4ebf
BP
2001 if (trunks == s->trunks) {
2002 bundle->trunks = vlan_bitmap_clone(trunks);
2003 } else {
2004 bundle->trunks = trunks;
2005 trunks = NULL;
2006 }
abe529af
BP
2007 need_flush = true;
2008 }
ecac4ebf
BP
2009 if (trunks != s->trunks) {
2010 free(trunks);
2011 }
abe529af
BP
2012
2013 /* Bonding. */
2014 if (!list_is_short(&bundle->ports)) {
2015 bundle->ofproto->has_bonded_bundles = true;
2016 if (bundle->bond) {
2017 if (bond_reconfigure(bundle->bond, s->bond)) {
3c4a309c 2018 ofproto->need_revalidate = REV_RECONFIGURE;
abe529af
BP
2019 }
2020 } else {
2021 bundle->bond = bond_create(s->bond);
3c4a309c 2022 ofproto->need_revalidate = REV_RECONFIGURE;
abe529af
BP
2023 }
2024
2025 LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
00794817 2026 bond_slave_register(bundle->bond, port, port->bond_stable_id,
abe529af
BP
2027 port->up.netdev);
2028 }
2029 } else {
2030 bond_destroy(bundle->bond);
2031 bundle->bond = NULL;
2032 }
2033
2034 /* If we changed something that would affect MAC learning, un-learn
2035 * everything on this port and force flow revalidation. */
2036 if (need_flush) {
b44a10b7 2037 bundle_flush_macs(bundle, false);
abe529af
BP
2038 }
2039
2040 return 0;
2041}
2042
2043static void
2044bundle_remove(struct ofport *port_)
2045{
2046 struct ofport_dpif *port = ofport_dpif_cast(port_);
2047 struct ofbundle *bundle = port->bundle;
2048
2049 if (bundle) {
2050 bundle_del_port(port);
2051 if (list_is_empty(&bundle->ports)) {
2052 bundle_destroy(bundle);
2053 } else if (list_is_short(&bundle->ports)) {
2054 bond_destroy(bundle->bond);
2055 bundle->bond = NULL;
2056 }
2057 }
2058}
2059
2060static void
5f877369 2061send_pdu_cb(void *port_, const void *pdu, size_t pdu_size)
abe529af
BP
2062{
2063 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 10);
2064 struct ofport_dpif *port = port_;
2065 uint8_t ea[ETH_ADDR_LEN];
2066 int error;
2067
2068 error = netdev_get_etheraddr(port->up.netdev, ea);
2069 if (!error) {
abe529af 2070 struct ofpbuf packet;
5f877369 2071 void *packet_pdu;
abe529af
BP
2072
2073 ofpbuf_init(&packet, 0);
2074 packet_pdu = eth_compose(&packet, eth_addr_lacp, ea, ETH_TYPE_LACP,
5f877369
EJ
2075 pdu_size);
2076 memcpy(packet_pdu, pdu, pdu_size);
2077
97d6520b 2078 send_packet(port, &packet);
abe529af
BP
2079 ofpbuf_uninit(&packet);
2080 } else {
2081 VLOG_ERR_RL(&rl, "port %s: cannot obtain Ethernet address of iface "
2082 "%s (%s)", port->bundle->name,
2083 netdev_get_name(port->up.netdev), strerror(error));
2084 }
2085}
2086
2087static void
2088bundle_send_learning_packets(struct ofbundle *bundle)
2089{
2090 struct ofproto_dpif *ofproto = bundle->ofproto;
2091 int error, n_packets, n_errors;
2092 struct mac_entry *e;
2093
2094 error = n_packets = n_errors = 0;
2095 LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
2096 if (e->port.p != bundle) {
ea131871
JG
2097 struct ofpbuf *learning_packet;
2098 struct ofport_dpif *port;
4dd1e3ca 2099 void *port_void;
ea131871
JG
2100 int ret;
2101
4dd1e3ca
BP
2102 /* The assignment to "port" is unnecessary but makes "grep"ing for
2103 * struct ofport_dpif more effective. */
2104 learning_packet = bond_compose_learning_packet(bundle->bond,
2105 e->mac, e->vlan,
2106 &port_void);
2107 port = port_void;
97d6520b 2108 ret = send_packet(port, learning_packet);
ea131871 2109 ofpbuf_delete(learning_packet);
abe529af
BP
2110 if (ret) {
2111 error = ret;
2112 n_errors++;
2113 }
2114 n_packets++;
2115 }
2116 }
2117
2118 if (n_errors) {
2119 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2120 VLOG_WARN_RL(&rl, "bond %s: %d errors sending %d gratuitous learning "
2121 "packets, last error was: %s",
2122 bundle->name, n_errors, n_packets, strerror(error));
2123 } else {
2124 VLOG_DBG("bond %s: sent %d gratuitous learning packets",
2125 bundle->name, n_packets);
2126 }
2127}
2128
2129static void
2130bundle_run(struct ofbundle *bundle)
2131{
2132 if (bundle->lacp) {
2133 lacp_run(bundle->lacp, send_pdu_cb);
2134 }
2135 if (bundle->bond) {
2136 struct ofport_dpif *port;
2137
2138 LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
015e08bc 2139 bond_slave_set_may_enable(bundle->bond, port, port->may_enable);
abe529af
BP
2140 }
2141
2142 bond_run(bundle->bond, &bundle->ofproto->revalidate_set,
bdebeece 2143 lacp_status(bundle->lacp));
abe529af
BP
2144 if (bond_should_send_learning_packets(bundle->bond)) {
2145 bundle_send_learning_packets(bundle);
2146 }
2147 }
2148}
2149
2150static void
2151bundle_wait(struct ofbundle *bundle)
2152{
2153 if (bundle->lacp) {
2154 lacp_wait(bundle->lacp);
2155 }
2156 if (bundle->bond) {
2157 bond_wait(bundle->bond);
2158 }
2159}
2160\f
2161/* Mirrors. */
2162
2163static int
2164mirror_scan(struct ofproto_dpif *ofproto)
2165{
2166 int idx;
2167
2168 for (idx = 0; idx < MAX_MIRRORS; idx++) {
2169 if (!ofproto->mirrors[idx]) {
2170 return idx;
2171 }
2172 }
2173 return -1;
2174}
2175
2176static struct ofmirror *
2177mirror_lookup(struct ofproto_dpif *ofproto, void *aux)
2178{
2179 int i;
2180
2181 for (i = 0; i < MAX_MIRRORS; i++) {
2182 struct ofmirror *mirror = ofproto->mirrors[i];
2183 if (mirror && mirror->aux == aux) {
2184 return mirror;
2185 }
2186 }
2187
2188 return NULL;
2189}
2190
9ba15e2a
BP
2191/* Update the 'dup_mirrors' member of each of the ofmirrors in 'ofproto'. */
2192static void
2193mirror_update_dups(struct ofproto_dpif *ofproto)
2194{
2195 int i;
2196
2197 for (i = 0; i < MAX_MIRRORS; i++) {
2198 struct ofmirror *m = ofproto->mirrors[i];
2199
2200 if (m) {
2201 m->dup_mirrors = MIRROR_MASK_C(1) << i;
2202 }
2203 }
2204
2205 for (i = 0; i < MAX_MIRRORS; i++) {
2206 struct ofmirror *m1 = ofproto->mirrors[i];
2207 int j;
2208
2209 if (!m1) {
2210 continue;
2211 }
2212
2213 for (j = i + 1; j < MAX_MIRRORS; j++) {
2214 struct ofmirror *m2 = ofproto->mirrors[j];
2215
edb0540b 2216 if (m2 && m1->out == m2->out && m1->out_vlan == m2->out_vlan) {
9ba15e2a
BP
2217 m1->dup_mirrors |= MIRROR_MASK_C(1) << j;
2218 m2->dup_mirrors |= m1->dup_mirrors;
2219 }
2220 }
2221 }
2222}
2223
abe529af
BP
2224static int
2225mirror_set(struct ofproto *ofproto_, void *aux,
2226 const struct ofproto_mirror_settings *s)
2227{
2228 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2229 mirror_mask_t mirror_bit;
2230 struct ofbundle *bundle;
2231 struct ofmirror *mirror;
2232 struct ofbundle *out;
2233 struct hmapx srcs; /* Contains "struct ofbundle *"s. */
2234 struct hmapx dsts; /* Contains "struct ofbundle *"s. */
2235 int out_vlan;
2236
2237 mirror = mirror_lookup(ofproto, aux);
2238 if (!s) {
2239 mirror_destroy(mirror);
2240 return 0;
2241 }
2242 if (!mirror) {
2243 int idx;
2244
2245 idx = mirror_scan(ofproto);
2246 if (idx < 0) {
2247 VLOG_WARN("bridge %s: maximum of %d port mirrors reached, "
2248 "cannot create %s",
2249 ofproto->up.name, MAX_MIRRORS, s->name);
2250 return EFBIG;
2251 }
2252
2253 mirror = ofproto->mirrors[idx] = xzalloc(sizeof *mirror);
2254 mirror->ofproto = ofproto;
2255 mirror->idx = idx;
8b28d864 2256 mirror->aux = aux;
abe529af
BP
2257 mirror->out_vlan = -1;
2258 mirror->name = NULL;
2259 }
2260
2261 if (!mirror->name || strcmp(s->name, mirror->name)) {
2262 free(mirror->name);
2263 mirror->name = xstrdup(s->name);
2264 }
2265
2266 /* Get the new configuration. */
2267 if (s->out_bundle) {
2268 out = bundle_lookup(ofproto, s->out_bundle);
2269 if (!out) {
2270 mirror_destroy(mirror);
2271 return EINVAL;
2272 }
2273 out_vlan = -1;
2274 } else {
2275 out = NULL;
2276 out_vlan = s->out_vlan;
2277 }
2278 bundle_lookup_multiple(ofproto, s->srcs, s->n_srcs, &srcs);
2279 bundle_lookup_multiple(ofproto, s->dsts, s->n_dsts, &dsts);
2280
2281 /* If the configuration has not changed, do nothing. */
2282 if (hmapx_equals(&srcs, &mirror->srcs)
2283 && hmapx_equals(&dsts, &mirror->dsts)
2284 && vlan_bitmap_equal(mirror->vlans, s->src_vlans)
2285 && mirror->out == out
2286 && mirror->out_vlan == out_vlan)
2287 {
2288 hmapx_destroy(&srcs);
2289 hmapx_destroy(&dsts);
2290 return 0;
2291 }
2292
2293 hmapx_swap(&srcs, &mirror->srcs);
2294 hmapx_destroy(&srcs);
2295
2296 hmapx_swap(&dsts, &mirror->dsts);
2297 hmapx_destroy(&dsts);
2298
2299 free(mirror->vlans);
2300 mirror->vlans = vlan_bitmap_clone(s->src_vlans);
2301
2302 mirror->out = out;
2303 mirror->out_vlan = out_vlan;
2304
2305 /* Update bundles. */
2306 mirror_bit = MIRROR_MASK_C(1) << mirror->idx;
2307 HMAP_FOR_EACH (bundle, hmap_node, &mirror->ofproto->bundles) {
2308 if (hmapx_contains(&mirror->srcs, bundle)) {
2309 bundle->src_mirrors |= mirror_bit;
2310 } else {
2311 bundle->src_mirrors &= ~mirror_bit;
2312 }
2313
2314 if (hmapx_contains(&mirror->dsts, bundle)) {
2315 bundle->dst_mirrors |= mirror_bit;
2316 } else {
2317 bundle->dst_mirrors &= ~mirror_bit;
2318 }
2319
2320 if (mirror->out == bundle) {
2321 bundle->mirror_out |= mirror_bit;
2322 } else {
2323 bundle->mirror_out &= ~mirror_bit;
2324 }
2325 }
2326
3c4a309c 2327 ofproto->need_revalidate = REV_RECONFIGURE;
ccb7c863 2328 ofproto->has_mirrors = true;
d0040604 2329 mac_learning_flush(ofproto->ml, &ofproto->revalidate_set);
9ba15e2a 2330 mirror_update_dups(ofproto);
abe529af
BP
2331
2332 return 0;
2333}
2334
2335static void
2336mirror_destroy(struct ofmirror *mirror)
2337{
2338 struct ofproto_dpif *ofproto;
2339 mirror_mask_t mirror_bit;
2340 struct ofbundle *bundle;
ccb7c863 2341 int i;
abe529af
BP
2342
2343 if (!mirror) {
2344 return;
2345 }
2346
2347 ofproto = mirror->ofproto;
3c4a309c 2348 ofproto->need_revalidate = REV_RECONFIGURE;
d0040604 2349 mac_learning_flush(ofproto->ml, &ofproto->revalidate_set);
abe529af
BP
2350
2351 mirror_bit = MIRROR_MASK_C(1) << mirror->idx;
2352 HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
2353 bundle->src_mirrors &= ~mirror_bit;
2354 bundle->dst_mirrors &= ~mirror_bit;
2355 bundle->mirror_out &= ~mirror_bit;
2356 }
2357
2358 hmapx_destroy(&mirror->srcs);
2359 hmapx_destroy(&mirror->dsts);
2360 free(mirror->vlans);
2361
2362 ofproto->mirrors[mirror->idx] = NULL;
2363 free(mirror->name);
2364 free(mirror);
9ba15e2a
BP
2365
2366 mirror_update_dups(ofproto);
ccb7c863
BP
2367
2368 ofproto->has_mirrors = false;
2369 for (i = 0; i < MAX_MIRRORS; i++) {
2370 if (ofproto->mirrors[i]) {
2371 ofproto->has_mirrors = true;
2372 break;
2373 }
2374 }
abe529af
BP
2375}
2376
9d24de3b
JP
2377static int
2378mirror_get_stats(struct ofproto *ofproto_, void *aux,
2379 uint64_t *packets, uint64_t *bytes)
2380{
2381 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2382 struct ofmirror *mirror = mirror_lookup(ofproto, aux);
2383
2384 if (!mirror) {
2385 *packets = *bytes = UINT64_MAX;
2386 return 0;
2387 }
2388
2389 *packets = mirror->packet_count;
2390 *bytes = mirror->byte_count;
2391
2392 return 0;
2393}
2394
abe529af
BP
2395static int
2396set_flood_vlans(struct ofproto *ofproto_, unsigned long *flood_vlans)
2397{
2398 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2399 if (mac_learning_set_flood_vlans(ofproto->ml, flood_vlans)) {
d0040604 2400 mac_learning_flush(ofproto->ml, &ofproto->revalidate_set);
abe529af
BP
2401 }
2402 return 0;
2403}
2404
2405static bool
b4affc74 2406is_mirror_output_bundle(const struct ofproto *ofproto_, void *aux)
abe529af
BP
2407{
2408 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2409 struct ofbundle *bundle = bundle_lookup(ofproto, aux);
2410 return bundle && bundle->mirror_out != 0;
2411}
8402c74b
SS
2412
2413static void
b53055f4 2414forward_bpdu_changed(struct ofproto *ofproto_)
8402c74b
SS
2415{
2416 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3c4a309c 2417 ofproto->need_revalidate = REV_RECONFIGURE;
8402c74b 2418}
e764773c
BP
2419
2420static void
2421set_mac_idle_time(struct ofproto *ofproto_, unsigned int idle_time)
2422{
2423 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2424 mac_learning_set_idle_time(ofproto->ml, idle_time);
2425}
abe529af
BP
2426\f
2427/* Ports. */
2428
2429static struct ofport_dpif *
4acbc98d 2430get_ofp_port(const struct ofproto_dpif *ofproto, uint16_t ofp_port)
abe529af 2431{
7df6a8bd
BP
2432 struct ofport *ofport = ofproto_get_port(&ofproto->up, ofp_port);
2433 return ofport ? ofport_dpif_cast(ofport) : NULL;
abe529af
BP
2434}
2435
2436static struct ofport_dpif *
4acbc98d 2437get_odp_port(const struct ofproto_dpif *ofproto, uint32_t odp_port)
abe529af
BP
2438{
2439 return get_ofp_port(ofproto, odp_port_to_ofp_port(odp_port));
2440}
2441
2442static void
2443ofproto_port_from_dpif_port(struct ofproto_port *ofproto_port,
2444 struct dpif_port *dpif_port)
2445{
2446 ofproto_port->name = dpif_port->name;
2447 ofproto_port->type = dpif_port->type;
2448 ofproto_port->ofp_port = odp_port_to_ofp_port(dpif_port->port_no);
2449}
2450
0aa66d6e
EJ
2451static void
2452port_run_fast(struct ofport_dpif *ofport)
2453{
2454 if (ofport->cfm && cfm_should_send_ccm(ofport->cfm)) {
2455 struct ofpbuf packet;
2456
2457 ofpbuf_init(&packet, 0);
2458 cfm_compose_ccm(ofport->cfm, &packet, ofport->up.pp.hw_addr);
2459 send_packet(ofport, &packet);
2460 ofpbuf_uninit(&packet);
2461 }
2462}
2463
abe529af
BP
2464static void
2465port_run(struct ofport_dpif *ofport)
2466{
3e5b3fdb
EJ
2467 long long int carrier_seq = netdev_get_carrier_resets(ofport->up.netdev);
2468 bool carrier_changed = carrier_seq != ofport->carrier_seq;
015e08bc
EJ
2469 bool enable = netdev_get_carrier(ofport->up.netdev);
2470
3e5b3fdb
EJ
2471 ofport->carrier_seq = carrier_seq;
2472
0aa66d6e 2473 port_run_fast(ofport);
abe529af 2474 if (ofport->cfm) {
4653c558
EJ
2475 int cfm_opup = cfm_get_opup(ofport->cfm);
2476
abe529af 2477 cfm_run(ofport->cfm);
4653c558
EJ
2478 enable = enable && !cfm_get_fault(ofport->cfm);
2479
2480 if (cfm_opup >= 0) {
2481 enable = enable && cfm_opup;
2482 }
abe529af 2483 }
015e08bc
EJ
2484
2485 if (ofport->bundle) {
2486 enable = enable && lacp_slave_may_enable(ofport->bundle->lacp, ofport);
3e5b3fdb
EJ
2487 if (carrier_changed) {
2488 lacp_slave_carrier_changed(ofport->bundle->lacp, ofport);
2489 }
015e08bc
EJ
2490 }
2491
daff3353
EJ
2492 if (ofport->may_enable != enable) {
2493 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2494
2495 if (ofproto->has_bundle_action) {
3c4a309c 2496 ofproto->need_revalidate = REV_PORT_TOGGLED;
daff3353
EJ
2497 }
2498 }
2499
015e08bc 2500 ofport->may_enable = enable;
abe529af
BP
2501}
2502
2503static void
2504port_wait(struct ofport_dpif *ofport)
2505{
2506 if (ofport->cfm) {
2507 cfm_wait(ofport->cfm);
2508 }
2509}
2510
2511static int
2512port_query_by_name(const struct ofproto *ofproto_, const char *devname,
2513 struct ofproto_port *ofproto_port)
2514{
2515 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2516 struct dpif_port dpif_port;
2517 int error;
2518
2519 error = dpif_port_query_by_name(ofproto->dpif, devname, &dpif_port);
2520 if (!error) {
2521 ofproto_port_from_dpif_port(ofproto_port, &dpif_port);
2522 }
2523 return error;
2524}
2525
2526static int
2527port_add(struct ofproto *ofproto_, struct netdev *netdev, uint16_t *ofp_portp)
2528{
2529 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
232dfa4a 2530 uint16_t odp_port = UINT16_MAX;
abe529af
BP
2531 int error;
2532
2533 error = dpif_port_add(ofproto->dpif, netdev, &odp_port);
2534 if (!error) {
2535 *ofp_portp = odp_port_to_ofp_port(odp_port);
2536 }
2537 return error;
2538}
2539
2540static int
2541port_del(struct ofproto *ofproto_, uint16_t ofp_port)
2542{
2543 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2544 int error;
2545
2546 error = dpif_port_del(ofproto->dpif, ofp_port_to_odp_port(ofp_port));
2547 if (!error) {
2548 struct ofport_dpif *ofport = get_ofp_port(ofproto, ofp_port);
2549 if (ofport) {
2550 /* The caller is going to close ofport->up.netdev. If this is a
2551 * bonded port, then the bond is using that netdev, so remove it
2552 * from the bond. The client will need to reconfigure everything
2553 * after deleting ports, so then the slave will get re-added. */
2554 bundle_remove(&ofport->up);
2555 }
2556 }
2557 return error;
2558}
2559
6527c598
PS
2560static int
2561port_get_stats(const struct ofport *ofport_, struct netdev_stats *stats)
2562{
2563 struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2564 int error;
2565
2566 error = netdev_get_stats(ofport->up.netdev, stats);
2567
2568 if (!error && ofport->odp_port == OVSP_LOCAL) {
2569 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2570
2571 /* ofproto->stats.tx_packets represents packets that we created
2572 * internally and sent to some port (e.g. packets sent with
2573 * send_packet()). Account for them as if they had come from
2574 * OFPP_LOCAL and got forwarded. */
2575
2576 if (stats->rx_packets != UINT64_MAX) {
2577 stats->rx_packets += ofproto->stats.tx_packets;
2578 }
2579
2580 if (stats->rx_bytes != UINT64_MAX) {
2581 stats->rx_bytes += ofproto->stats.tx_bytes;
2582 }
2583
2584 /* ofproto->stats.rx_packets represents packets that were received on
2585 * some port and we processed internally and dropped (e.g. STP).
2586 * Account fro them as if they had been forwarded to OFPP_LOCAL. */
2587
2588 if (stats->tx_packets != UINT64_MAX) {
2589 stats->tx_packets += ofproto->stats.rx_packets;
2590 }
2591
2592 if (stats->tx_bytes != UINT64_MAX) {
2593 stats->tx_bytes += ofproto->stats.rx_bytes;
2594 }
2595 }
2596
2597 return error;
2598}
2599
2600/* Account packets for LOCAL port. */
2601static void
2602ofproto_update_local_port_stats(const struct ofproto *ofproto_,
2603 size_t tx_size, size_t rx_size)
2604{
2605 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2606
2607 if (rx_size) {
2608 ofproto->stats.rx_packets++;
2609 ofproto->stats.rx_bytes += rx_size;
2610 }
2611 if (tx_size) {
2612 ofproto->stats.tx_packets++;
2613 ofproto->stats.tx_bytes += tx_size;
2614 }
2615}
2616
abe529af
BP
2617struct port_dump_state {
2618 struct dpif_port_dump dump;
2619 bool done;
2620};
2621
2622static int
2623port_dump_start(const struct ofproto *ofproto_, void **statep)
2624{
2625 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2626 struct port_dump_state *state;
2627
2628 *statep = state = xmalloc(sizeof *state);
2629 dpif_port_dump_start(&state->dump, ofproto->dpif);
2630 state->done = false;
2631 return 0;
2632}
2633
2634static int
2635port_dump_next(const struct ofproto *ofproto_ OVS_UNUSED, void *state_,
2636 struct ofproto_port *port)
2637{
2638 struct port_dump_state *state = state_;
2639 struct dpif_port dpif_port;
2640
2641 if (dpif_port_dump_next(&state->dump, &dpif_port)) {
2642 ofproto_port_from_dpif_port(port, &dpif_port);
2643 return 0;
2644 } else {
2645 int error = dpif_port_dump_done(&state->dump);
2646 state->done = true;
2647 return error ? error : EOF;
2648 }
2649}
2650
2651static int
2652port_dump_done(const struct ofproto *ofproto_ OVS_UNUSED, void *state_)
2653{
2654 struct port_dump_state *state = state_;
2655
2656 if (!state->done) {
2657 dpif_port_dump_done(&state->dump);
2658 }
2659 free(state);
2660 return 0;
2661}
2662
2663static int
2664port_poll(const struct ofproto *ofproto_, char **devnamep)
2665{
2666 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2667 return dpif_port_poll(ofproto->dpif, devnamep);
2668}
2669
2670static void
2671port_poll_wait(const struct ofproto *ofproto_)
2672{
2673 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2674 dpif_port_poll_wait(ofproto->dpif);
2675}
2676
2677static int
2678port_is_lacp_current(const struct ofport *ofport_)
2679{
2680 const struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2681 return (ofport->bundle && ofport->bundle->lacp
2682 ? lacp_slave_is_current(ofport->bundle->lacp, ofport)
2683 : -1);
2684}
2685\f
2686/* Upcall handling. */
2687
501f8d1f
BP
2688/* Flow miss batching.
2689 *
2690 * Some dpifs implement operations faster when you hand them off in a batch.
2691 * To allow batching, "struct flow_miss" queues the dpif-related work needed
2692 * for a given flow. Each "struct flow_miss" corresponds to sending one or
2693 * more packets, plus possibly installing the flow in the dpif.
2694 *
2695 * So far we only batch the operations that affect flow setup time the most.
2696 * It's possible to batch more than that, but the benefit might be minimal. */
2697struct flow_miss {
2698 struct hmap_node hmap_node;
2699 struct flow flow;
b0f7b9b5 2700 enum odp_key_fitness key_fitness;
501f8d1f
BP
2701 const struct nlattr *key;
2702 size_t key_len;
e84173dc 2703 ovs_be16 initial_tci;
501f8d1f 2704 struct list packets;
6a7e895f 2705 enum dpif_upcall_type upcall_type;
501f8d1f
BP
2706};
2707
2708struct flow_miss_op {
c2b565b5 2709 struct dpif_op dpif_op;
5fe20d5d
BP
2710 struct subfacet *subfacet; /* Subfacet */
2711 void *garbage; /* Pointer to pass to free(), NULL if none. */
2712 uint64_t stub[1024 / 8]; /* Temporary buffer. */
501f8d1f
BP
2713};
2714
62cd7072
BP
2715/* Sends an OFPT_PACKET_IN message for 'packet' of type OFPR_NO_MATCH to each
2716 * OpenFlow controller as necessary according to their individual
29ebe880 2717 * configurations. */
62cd7072 2718static void
a39edbd4 2719send_packet_in_miss(struct ofproto_dpif *ofproto, const struct ofpbuf *packet,
29ebe880 2720 const struct flow *flow)
62cd7072
BP
2721{
2722 struct ofputil_packet_in pin;
2723
3e3252fa
EJ
2724 pin.packet = packet->data;
2725 pin.packet_len = packet->size;
62cd7072 2726 pin.reason = OFPR_NO_MATCH;
a7349929 2727 pin.controller_id = 0;
54834960
EJ
2728
2729 pin.table_id = 0;
2730 pin.cookie = 0;
2731
62cd7072 2732 pin.send_len = 0; /* not used for flow table misses */
5d6c3af0
EJ
2733
2734 flow_get_metadata(flow, &pin.fmd);
2735
2736 /* Registers aren't meaningful on a miss. */
2737 memset(pin.fmd.reg_masks, 0, sizeof pin.fmd.reg_masks);
2738
d8653c38 2739 connmgr_send_packet_in(ofproto->up.connmgr, &pin);
62cd7072
BP
2740}
2741
6a7e895f 2742static enum slow_path_reason
abe529af
BP
2743process_special(struct ofproto_dpif *ofproto, const struct flow *flow,
2744 const struct ofpbuf *packet)
2745{
b6e001b6
EJ
2746 struct ofport_dpif *ofport = get_ofp_port(ofproto, flow->in_port);
2747
2748 if (!ofport) {
6a7e895f 2749 return 0;
b6e001b6
EJ
2750 }
2751
ef9819b5 2752 if (ofport->cfm && cfm_should_process_flow(ofport->cfm, flow)) {
b6e001b6 2753 if (packet) {
abe529af
BP
2754 cfm_process_heartbeat(ofport->cfm, packet);
2755 }
6a7e895f 2756 return SLOW_CFM;
b6e001b6
EJ
2757 } else if (ofport->bundle && ofport->bundle->lacp
2758 && flow->dl_type == htons(ETH_TYPE_LACP)) {
2759 if (packet) {
2760 lacp_process_packet(ofport->bundle->lacp, ofport, packet);
abe529af 2761 }
6a7e895f 2762 return SLOW_LACP;
21f7563c
JP
2763 } else if (ofproto->stp && stp_should_process_flow(flow)) {
2764 if (packet) {
2765 stp_process_packet(ofport, packet);
2766 }
6a7e895f 2767 return SLOW_STP;
abe529af 2768 }
6a7e895f 2769 return 0;
abe529af
BP
2770}
2771
501f8d1f 2772static struct flow_miss *
b23cdad9 2773flow_miss_find(struct hmap *todo, const struct flow *flow, uint32_t hash)
abe529af 2774{
501f8d1f 2775 struct flow_miss *miss;
abe529af 2776
501f8d1f
BP
2777 HMAP_FOR_EACH_WITH_HASH (miss, hmap_node, hash, todo) {
2778 if (flow_equal(&miss->flow, flow)) {
2779 return miss;
2780 }
2781 }
abe529af 2782
b23cdad9 2783 return NULL;
501f8d1f 2784}
abe529af 2785
9d6ac44e
BP
2786/* Partially Initializes 'op' as an "execute" operation for 'miss' and
2787 * 'packet'. The caller must initialize op->actions and op->actions_len. If
2788 * 'miss' is associated with a subfacet the caller must also initialize the
2789 * returned op->subfacet, and if anything needs to be freed after processing
2790 * the op, the caller must initialize op->garbage also. */
501f8d1f 2791static void
9d6ac44e
BP
2792init_flow_miss_execute_op(struct flow_miss *miss, struct ofpbuf *packet,
2793 struct flow_miss_op *op)
501f8d1f 2794{
9d6ac44e
BP
2795 if (miss->flow.vlan_tci != miss->initial_tci) {
2796 /* This packet was received on a VLAN splinter port. We
2797 * added a VLAN to the packet to make the packet resemble
2798 * the flow, but the actions were composed assuming that
2799 * the packet contained no VLAN. So, we must remove the
2800 * VLAN header from the packet before trying to execute the
2801 * actions. */
2802 eth_pop_vlan(packet);
2803 }
2804
2805 op->subfacet = NULL;
2806 op->garbage = NULL;
2807 op->dpif_op.type = DPIF_OP_EXECUTE;
2808 op->dpif_op.u.execute.key = miss->key;
2809 op->dpif_op.u.execute.key_len = miss->key_len;
2810 op->dpif_op.u.execute.packet = packet;
2811}
2812
2813/* Helper for handle_flow_miss_without_facet() and
2814 * handle_flow_miss_with_facet(). */
2815static void
2816handle_flow_miss_common(struct rule_dpif *rule,
2817 struct ofpbuf *packet, const struct flow *flow)
2818{
2819 struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
2820
2821 ofproto->n_matches++;
2822
2823 if (rule->up.cr.priority == FAIL_OPEN_PRIORITY) {
2824 /*
2825 * Extra-special case for fail-open mode.
2826 *
2827 * We are in fail-open mode and the packet matched the fail-open
2828 * rule, but we are connected to a controller too. We should send
2829 * the packet up to the controller in the hope that it will try to
2830 * set up a flow and thereby allow us to exit fail-open.
2831 *
2832 * See the top-level comment in fail-open.c for more information.
2833 */
2834 send_packet_in_miss(ofproto, packet, flow);
2835 }
2836}
2837
2838/* Figures out whether a flow that missed in 'ofproto', whose details are in
2839 * 'miss', is likely to be worth tracking in detail in userspace and (usually)
2840 * installing a datapath flow. The answer is usually "yes" (a return value of
2841 * true). However, for short flows the cost of bookkeeping is much higher than
2842 * the benefits, so when the datapath holds a large number of flows we impose
2843 * some heuristics to decide which flows are likely to be worth tracking. */
2844static bool
2845flow_miss_should_make_facet(struct ofproto_dpif *ofproto,
2846 struct flow_miss *miss, uint32_t hash)
2847{
2848 if (!ofproto->governor) {
2849 size_t n_subfacets;
2850
2851 n_subfacets = hmap_count(&ofproto->subfacets);
2852 if (n_subfacets * 2 <= ofproto->up.flow_eviction_threshold) {
2853 return true;
2854 }
2855
2856 ofproto->governor = governor_create(ofproto->up.name);
2857 }
2858
2859 return governor_should_install_flow(ofproto->governor, hash,
2860 list_size(&miss->packets));
2861}
2862
2863/* Handles 'miss', which matches 'rule', without creating a facet or subfacet
2864 * or creating any datapath flow. May add an "execute" operation to 'ops' and
2865 * increment '*n_ops'. */
2866static void
2867handle_flow_miss_without_facet(struct flow_miss *miss,
2868 struct rule_dpif *rule,
2869 struct flow_miss_op *ops, size_t *n_ops)
2870{
2871 struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
2872 struct action_xlate_ctx ctx;
530a1d91 2873 struct ofpbuf *packet;
2b459b83 2874
9d6ac44e
BP
2875 LIST_FOR_EACH (packet, list_node, &miss->packets) {
2876 struct flow_miss_op *op = &ops[*n_ops];
2877 struct dpif_flow_stats stats;
2878 struct ofpbuf odp_actions;
abe529af 2879
9d6ac44e 2880 COVERAGE_INC(facet_suppress);
501f8d1f 2881
9d6ac44e 2882 ofpbuf_use_stub(&odp_actions, op->stub, sizeof op->stub);
501f8d1f 2883
9d6ac44e
BP
2884 dpif_flow_stats_extract(&miss->flow, packet, &stats);
2885 rule_credit_stats(rule, &stats);
abe529af 2886
9d6ac44e
BP
2887 action_xlate_ctx_init(&ctx, ofproto, &miss->flow, miss->initial_tci,
2888 rule, 0, packet);
2889 ctx.resubmit_stats = &stats;
f25d0cf3 2890 xlate_actions(&ctx, rule->up.ofpacts, rule->up.ofpacts_len,
9d6ac44e 2891 &odp_actions);
abe529af 2892
9d6ac44e
BP
2893 if (odp_actions.size) {
2894 struct dpif_execute *execute = &op->dpif_op.u.execute;
2895
2896 init_flow_miss_execute_op(miss, packet, op);
2897 execute->actions = odp_actions.data;
2898 execute->actions_len = odp_actions.size;
2899 op->garbage = ofpbuf_get_uninit_pointer(&odp_actions);
2900
2901 (*n_ops)++;
2902 } else {
2903 ofpbuf_uninit(&odp_actions);
2904 }
abe529af 2905 }
9d6ac44e
BP
2906}
2907
2908/* Handles 'miss', which matches 'facet'. May add any required datapath
2909 * operations to 'ops', incrementing '*n_ops' for each new op. */
2910static void
2911handle_flow_miss_with_facet(struct flow_miss *miss, struct facet *facet,
2912 struct flow_miss_op *ops, size_t *n_ops)
2913{
6a7e895f
BP
2914 struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
2915 enum subfacet_path want_path;
9d6ac44e
BP
2916 struct subfacet *subfacet;
2917 struct ofpbuf *packet;
abe529af 2918
15baa734 2919 subfacet = subfacet_create(facet,
e84173dc
BP
2920 miss->key_fitness, miss->key, miss->key_len,
2921 miss->initial_tci);
b0f7b9b5 2922
530a1d91 2923 LIST_FOR_EACH (packet, list_node, &miss->packets) {
5fe20d5d 2924 struct flow_miss_op *op = &ops[*n_ops];
67d91f78 2925 struct dpif_flow_stats stats;
5fe20d5d 2926 struct ofpbuf odp_actions;
67d91f78 2927
9d6ac44e 2928 handle_flow_miss_common(facet->rule, packet, &miss->flow);
501f8d1f 2929
5fe20d5d 2930 ofpbuf_use_stub(&odp_actions, op->stub, sizeof op->stub);
6a7e895f 2931 if (!subfacet->actions || subfacet->slow) {
5fe20d5d 2932 subfacet_make_actions(subfacet, packet, &odp_actions);
501f8d1f 2933 }
67d91f78 2934
67d91f78 2935 dpif_flow_stats_extract(&facet->flow, packet, &stats);
15baa734 2936 subfacet_update_stats(subfacet, &stats);
67d91f78 2937
9d6ac44e
BP
2938 if (subfacet->actions_len) {
2939 struct dpif_execute *execute = &op->dpif_op.u.execute;
8338659a 2940
9d6ac44e
BP
2941 init_flow_miss_execute_op(miss, packet, op);
2942 op->subfacet = subfacet;
6a7e895f 2943 if (!subfacet->slow) {
9d6ac44e
BP
2944 execute->actions = subfacet->actions;
2945 execute->actions_len = subfacet->actions_len;
2946 ofpbuf_uninit(&odp_actions);
2947 } else {
2948 execute->actions = odp_actions.data;
2949 execute->actions_len = odp_actions.size;
2950 op->garbage = ofpbuf_get_uninit_pointer(&odp_actions);
2951 }
999fba59 2952
9d6ac44e 2953 (*n_ops)++;
5fe20d5d 2954 } else {
9d6ac44e 2955 ofpbuf_uninit(&odp_actions);
5fe20d5d 2956 }
501f8d1f
BP
2957 }
2958
6a7e895f
BP
2959 want_path = subfacet_want_path(subfacet->slow);
2960 if (miss->upcall_type == DPIF_UC_MISS || subfacet->path != want_path) {
501f8d1f 2961 struct flow_miss_op *op = &ops[(*n_ops)++];
c2b565b5 2962 struct dpif_flow_put *put = &op->dpif_op.u.flow_put;
501f8d1f 2963
b0f7b9b5 2964 op->subfacet = subfacet;
5fe20d5d 2965 op->garbage = NULL;
c2b565b5 2966 op->dpif_op.type = DPIF_OP_FLOW_PUT;
501f8d1f
BP
2967 put->flags = DPIF_FP_CREATE | DPIF_FP_MODIFY;
2968 put->key = miss->key;
2969 put->key_len = miss->key_len;
6a7e895f
BP
2970 if (want_path == SF_FAST_PATH) {
2971 put->actions = subfacet->actions;
2972 put->actions_len = subfacet->actions_len;
2973 } else {
2974 compose_slow_path(ofproto, &facet->flow, subfacet->slow,
2975 op->stub, sizeof op->stub,
2976 &put->actions, &put->actions_len);
2977 }
501f8d1f
BP
2978 put->stats = NULL;
2979 }
2980}
2981
9d6ac44e
BP
2982/* Handles flow miss 'miss' on 'ofproto'. May add any required datapath
2983 * operations to 'ops', incrementing '*n_ops' for each new op. */
2984static void
2985handle_flow_miss(struct ofproto_dpif *ofproto, struct flow_miss *miss,
2986 struct flow_miss_op *ops, size_t *n_ops)
2987{
2988 struct facet *facet;
2989 uint32_t hash;
2990
2991 /* The caller must ensure that miss->hmap_node.hash contains
2992 * flow_hash(miss->flow, 0). */
2993 hash = miss->hmap_node.hash;
2994
2995 facet = facet_lookup_valid(ofproto, &miss->flow, hash);
2996 if (!facet) {
c57b2226
BP
2997 struct rule_dpif *rule = rule_dpif_lookup(ofproto, &miss->flow);
2998
2999 if (!flow_miss_should_make_facet(ofproto, miss, hash)) {
9d6ac44e
BP
3000 handle_flow_miss_without_facet(miss, rule, ops, n_ops);
3001 return;
3002 }
3003
3004 facet = facet_create(rule, &miss->flow, hash);
3005 }
3006 handle_flow_miss_with_facet(miss, facet, ops, n_ops);
3007}
3008
e2a6ca36
BP
3009/* Like odp_flow_key_to_flow(), this function converts the 'key_len' bytes of
3010 * OVS_KEY_ATTR_* attributes in 'key' to a flow structure in 'flow' and returns
3011 * an ODP_FIT_* value that indicates how well 'key' fits our expectations for
3012 * what a flow key should contain.
3013 *
3014 * This function also includes some logic to help make VLAN splinters
3015 * transparent to the rest of the upcall processing logic. In particular, if
3016 * the extracted in_port is a VLAN splinter port, it replaces flow->in_port by
3017 * the "real" port, sets flow->vlan_tci correctly for the VLAN of the VLAN
3018 * splinter port, and pushes a VLAN header onto 'packet' (if it is nonnull).
3019 *
3020 * Sets '*initial_tci' to the VLAN TCI with which the packet was really
3021 * received, that is, the actual VLAN TCI extracted by odp_flow_key_to_flow().
3022 * (This differs from the value returned in flow->vlan_tci only for packets
3023 * received on VLAN splinters.)
3024 */
e84173dc 3025static enum odp_key_fitness
52a90c29 3026ofproto_dpif_extract_flow_key(const struct ofproto_dpif *ofproto,
e84173dc 3027 const struct nlattr *key, size_t key_len,
e2a6ca36
BP
3028 struct flow *flow, ovs_be16 *initial_tci,
3029 struct ofpbuf *packet)
e84173dc
BP
3030{
3031 enum odp_key_fitness fitness;
3032
3033 fitness = odp_flow_key_to_flow(key, key_len, flow);
3034 if (fitness == ODP_FIT_ERROR) {
3035 return fitness;
3036 }
3037 *initial_tci = flow->vlan_tci;
3038
b98d8985 3039 if (vsp_adjust_flow(ofproto, flow)) {
e2a6ca36
BP
3040 if (packet) {
3041 /* Make the packet resemble the flow, so that it gets sent to an
3042 * OpenFlow controller properly, so that it looks correct for
3043 * sFlow, and so that flow_extract() will get the correct vlan_tci
3044 * if it is called on 'packet'.
3045 *
3046 * The allocated space inside 'packet' probably also contains
3047 * 'key', that is, both 'packet' and 'key' are probably part of a
3048 * struct dpif_upcall (see the large comment on that structure
3049 * definition), so pushing data on 'packet' is in general not a
3050 * good idea since it could overwrite 'key' or free it as a side
3051 * effect. However, it's OK in this special case because we know
3052 * that 'packet' is inside a Netlink attribute: pushing 4 bytes
3053 * will just overwrite the 4-byte "struct nlattr", which is fine
3054 * since we don't need that header anymore. */
3055 eth_push_vlan(packet, flow->vlan_tci);
3056 }
52a90c29
BP
3057
3058 /* Let the caller know that we can't reproduce 'key' from 'flow'. */
3059 if (fitness == ODP_FIT_PERFECT) {
3060 fitness = ODP_FIT_TOO_MUCH;
3061 }
3062 }
3063
e84173dc
BP
3064 return fitness;
3065}
3066
501f8d1f
BP
3067static void
3068handle_miss_upcalls(struct ofproto_dpif *ofproto, struct dpif_upcall *upcalls,
3069 size_t n_upcalls)
3070{
3071 struct dpif_upcall *upcall;
b23cdad9
BP
3072 struct flow_miss *miss;
3073 struct flow_miss misses[FLOW_MISS_MAX_BATCH];
501f8d1f 3074 struct flow_miss_op flow_miss_ops[FLOW_MISS_MAX_BATCH * 2];
c2b565b5 3075 struct dpif_op *dpif_ops[FLOW_MISS_MAX_BATCH * 2];
501f8d1f 3076 struct hmap todo;
b23cdad9 3077 int n_misses;
501f8d1f
BP
3078 size_t n_ops;
3079 size_t i;
3080
3081 if (!n_upcalls) {
3082 return;
3083 }
3084
3085 /* Construct the to-do list.
3086 *
3087 * This just amounts to extracting the flow from each packet and sticking
3088 * the packets that have the same flow in the same "flow_miss" structure so
3089 * that we can process them together. */
3090 hmap_init(&todo);
b23cdad9 3091 n_misses = 0;
501f8d1f 3092 for (upcall = upcalls; upcall < &upcalls[n_upcalls]; upcall++) {
b23cdad9
BP
3093 struct flow_miss *miss = &misses[n_misses];
3094 struct flow_miss *existing_miss;
3095 uint32_t hash;
501f8d1f 3096
b0f7b9b5
BP
3097 /* Obtain metadata and check userspace/kernel agreement on flow match,
3098 * then set 'flow''s header pointers. */
b23cdad9
BP
3099 miss->key_fitness = ofproto_dpif_extract_flow_key(
3100 ofproto, upcall->key, upcall->key_len,
3101 &miss->flow, &miss->initial_tci, upcall->packet);
3102 if (miss->key_fitness == ODP_FIT_ERROR) {
b0f7b9b5
BP
3103 continue;
3104 }
b23cdad9
BP
3105 flow_extract(upcall->packet, miss->flow.skb_priority,
3106 miss->flow.tun_id, miss->flow.in_port, &miss->flow);
501f8d1f 3107
501f8d1f 3108 /* Add other packets to a to-do list. */
b23cdad9
BP
3109 hash = flow_hash(&miss->flow, 0);
3110 existing_miss = flow_miss_find(&todo, &miss->flow, hash);
3111 if (!existing_miss) {
3112 hmap_insert(&todo, &miss->hmap_node, hash);
3113 miss->key = upcall->key;
3114 miss->key_len = upcall->key_len;
6a7e895f 3115 miss->upcall_type = upcall->type;
b23cdad9
BP
3116 list_init(&miss->packets);
3117
3118 n_misses++;
3119 } else {
3120 miss = existing_miss;
3121 }
501f8d1f
BP
3122 list_push_back(&miss->packets, &upcall->packet->list_node);
3123 }
3124
3125 /* Process each element in the to-do list, constructing the set of
3126 * operations to batch. */
3127 n_ops = 0;
33bb0caa 3128 HMAP_FOR_EACH (miss, hmap_node, &todo) {
501f8d1f 3129 handle_flow_miss(ofproto, miss, flow_miss_ops, &n_ops);
abe529af 3130 }
501f8d1f 3131 assert(n_ops <= ARRAY_SIZE(flow_miss_ops));
501f8d1f
BP
3132
3133 /* Execute batch. */
3134 for (i = 0; i < n_ops; i++) {
3135 dpif_ops[i] = &flow_miss_ops[i].dpif_op;
3136 }
3137 dpif_operate(ofproto->dpif, dpif_ops, n_ops);
3138
3139 /* Free memory and update facets. */
3140 for (i = 0; i < n_ops; i++) {
3141 struct flow_miss_op *op = &flow_miss_ops[i];
501f8d1f
BP
3142
3143 switch (op->dpif_op.type) {
3144 case DPIF_OP_EXECUTE:
501f8d1f 3145 break;
abe529af 3146
501f8d1f 3147 case DPIF_OP_FLOW_PUT:
c2b565b5 3148 if (!op->dpif_op.error) {
6a7e895f 3149 op->subfacet->path = subfacet_want_path(op->subfacet->slow);
501f8d1f
BP
3150 }
3151 break;
b99d3cee
BP
3152
3153 case DPIF_OP_FLOW_DEL:
3154 NOT_REACHED();
501f8d1f 3155 }
5fe20d5d
BP
3156
3157 free(op->garbage);
501f8d1f 3158 }
33bb0caa 3159 hmap_destroy(&todo);
abe529af
BP
3160}
3161
6a7e895f
BP
3162static enum { SFLOW_UPCALL, MISS_UPCALL, BAD_UPCALL }
3163classify_upcall(const struct dpif_upcall *upcall)
3164{
3165 union user_action_cookie cookie;
3166
3167 /* First look at the upcall type. */
3168 switch (upcall->type) {
3169 case DPIF_UC_ACTION:
3170 break;
3171
3172 case DPIF_UC_MISS:
3173 return MISS_UPCALL;
3174
3175 case DPIF_N_UC_TYPES:
3176 default:
3177 VLOG_WARN_RL(&rl, "upcall has unexpected type %"PRIu32, upcall->type);
3178 return BAD_UPCALL;
3179 }
3180
3181 /* "action" upcalls need a closer look. */
3182 memcpy(&cookie, &upcall->userdata, sizeof(cookie));
3183 switch (cookie.type) {
3184 case USER_ACTION_COOKIE_SFLOW:
3185 return SFLOW_UPCALL;
3186
3187 case USER_ACTION_COOKIE_SLOW_PATH:
3188 return MISS_UPCALL;
3189
3190 case USER_ACTION_COOKIE_UNSPEC:
3191 default:
3192 VLOG_WARN_RL(&rl, "invalid user cookie : 0x%"PRIx64, upcall->userdata);
3193 return BAD_UPCALL;
3194 }
3195}
3196
abe529af 3197static void
6a7e895f
BP
3198handle_sflow_upcall(struct ofproto_dpif *ofproto,
3199 const struct dpif_upcall *upcall)
abe529af 3200{
1673e0e4 3201 union user_action_cookie cookie;
e84173dc
BP
3202 enum odp_key_fitness fitness;
3203 ovs_be16 initial_tci;
3204 struct flow flow;
abe529af 3205
e84173dc
BP
3206 fitness = ofproto_dpif_extract_flow_key(ofproto, upcall->key,
3207 upcall->key_len, &flow,
e2a6ca36 3208 &initial_tci, upcall->packet);
e84173dc
BP
3209 if (fitness == ODP_FIT_ERROR) {
3210 return;
3211 }
3212
6a7e895f
BP
3213 memcpy(&cookie, &upcall->userdata, sizeof(cookie));
3214 dpif_sflow_received(ofproto->sflow, upcall->packet, &flow, &cookie);
6ff686f2
PS
3215}
3216
9b16c439
BP
3217static int
3218handle_upcalls(struct ofproto_dpif *ofproto, unsigned int max_batch)
6ff686f2 3219{
9b16c439 3220 struct dpif_upcall misses[FLOW_MISS_MAX_BATCH];
90a7c55e
BP
3221 struct ofpbuf miss_bufs[FLOW_MISS_MAX_BATCH];
3222 uint64_t miss_buf_stubs[FLOW_MISS_MAX_BATCH][4096 / 8];
3223 int n_processed;
9b16c439
BP
3224 int n_misses;
3225 int i;
abe529af 3226
90a7c55e 3227 assert(max_batch <= FLOW_MISS_MAX_BATCH);
abe529af 3228
9b16c439 3229 n_misses = 0;
90a7c55e 3230 for (n_processed = 0; n_processed < max_batch; n_processed++) {
9b16c439 3231 struct dpif_upcall *upcall = &misses[n_misses];
90a7c55e 3232 struct ofpbuf *buf = &miss_bufs[n_misses];
9b16c439
BP
3233 int error;
3234
90a7c55e
BP
3235 ofpbuf_use_stub(buf, miss_buf_stubs[n_misses],
3236 sizeof miss_buf_stubs[n_misses]);
3237 error = dpif_recv(ofproto->dpif, upcall, buf);
9b16c439 3238 if (error) {
90a7c55e 3239 ofpbuf_uninit(buf);
9b16c439
BP
3240 break;
3241 }
3242
6a7e895f
BP
3243 switch (classify_upcall(upcall)) {
3244 case MISS_UPCALL:
9b16c439
BP
3245 /* Handle it later. */
3246 n_misses++;
3247 break;
3248
6a7e895f
BP
3249 case SFLOW_UPCALL:
3250 if (ofproto->sflow) {
3251 handle_sflow_upcall(ofproto, upcall);
3252 }
3253 ofpbuf_uninit(buf);
3254 break;
3255
3256 case BAD_UPCALL:
3257 ofpbuf_uninit(buf);
9b16c439
BP
3258 break;
3259 }
abe529af 3260 }
9b16c439 3261
6a7e895f 3262 /* Handle deferred MISS_UPCALL processing. */
9b16c439 3263 handle_miss_upcalls(ofproto, misses, n_misses);
90a7c55e
BP
3264 for (i = 0; i < n_misses; i++) {
3265 ofpbuf_uninit(&miss_bufs[i]);
3266 }
9b16c439 3267
90a7c55e 3268 return n_processed;
abe529af
BP
3269}
3270\f
3271/* Flow expiration. */
3272
b0f7b9b5 3273static int subfacet_max_idle(const struct ofproto_dpif *);
abe529af
BP
3274static void update_stats(struct ofproto_dpif *);
3275static void rule_expire(struct rule_dpif *);
b0f7b9b5 3276static void expire_subfacets(struct ofproto_dpif *, int dp_max_idle);
abe529af
BP
3277
3278/* This function is called periodically by run(). Its job is to collect
3279 * updates for the flows that have been installed into the datapath, most
3280 * importantly when they last were used, and then use that information to
3281 * expire flows that have not been used recently.
3282 *
3283 * Returns the number of milliseconds after which it should be called again. */
3284static int
3285expire(struct ofproto_dpif *ofproto)
3286{
3287 struct rule_dpif *rule, *next_rule;
d0918789 3288 struct oftable *table;
abe529af
BP
3289 int dp_max_idle;
3290
3291 /* Update stats for each flow in the datapath. */
3292 update_stats(ofproto);
3293
b0f7b9b5
BP
3294 /* Expire subfacets that have been idle too long. */
3295 dp_max_idle = subfacet_max_idle(ofproto);
3296 expire_subfacets(ofproto, dp_max_idle);
abe529af
BP
3297
3298 /* Expire OpenFlow flows whose idle_timeout or hard_timeout has passed. */
0697b5c3
BP
3299 OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) {
3300 struct cls_cursor cursor;
3301
d0918789 3302 cls_cursor_init(&cursor, &table->cls, NULL);
0697b5c3
BP
3303 CLS_CURSOR_FOR_EACH_SAFE (rule, next_rule, up.cr, &cursor) {
3304 rule_expire(rule);
3305 }
abe529af
BP
3306 }
3307
3308 /* All outstanding data in existing flows has been accounted, so it's a
3309 * good time to do bond rebalancing. */
3310 if (ofproto->has_bonded_bundles) {
3311 struct ofbundle *bundle;
3312
3313 HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
3314 if (bundle->bond) {
3315 bond_rebalance(bundle->bond, &ofproto->revalidate_set);
3316 }
3317 }
3318 }
3319
3320 return MIN(dp_max_idle, 1000);
3321}
3322
a218c879
BP
3323/* Updates flow table statistics given that the datapath just reported 'stats'
3324 * as 'subfacet''s statistics. */
3325static void
3326update_subfacet_stats(struct subfacet *subfacet,
3327 const struct dpif_flow_stats *stats)
3328{
3329 struct facet *facet = subfacet->facet;
3330
3331 if (stats->n_packets >= subfacet->dp_packet_count) {
3332 uint64_t extra = stats->n_packets - subfacet->dp_packet_count;
3333 facet->packet_count += extra;
3334 } else {
3335 VLOG_WARN_RL(&rl, "unexpected packet count from the datapath");
3336 }
3337
3338 if (stats->n_bytes >= subfacet->dp_byte_count) {
3339 facet->byte_count += stats->n_bytes - subfacet->dp_byte_count;
3340 } else {
3341 VLOG_WARN_RL(&rl, "unexpected byte count from datapath");
3342 }
3343
3344 subfacet->dp_packet_count = stats->n_packets;
3345 subfacet->dp_byte_count = stats->n_bytes;
3346
3347 facet->tcp_flags |= stats->tcp_flags;
3348
3349 subfacet_update_time(subfacet, stats->used);
3350 if (facet->accounted_bytes < facet->byte_count) {
3351 facet_learn(facet);
3352 facet_account(facet);
3353 facet->accounted_bytes = facet->byte_count;
3354 }
3355 facet_push_stats(facet);
3356}
3357
3358/* 'key' with length 'key_len' bytes is a flow in 'dpif' that we know nothing
3359 * about, or a flow that shouldn't be installed but was anyway. Delete it. */
3360static void
3361delete_unexpected_flow(struct dpif *dpif,
3362 const struct nlattr *key, size_t key_len)
3363{
3364 if (!VLOG_DROP_WARN(&rl)) {
3365 struct ds s;
3366
3367 ds_init(&s);
3368 odp_flow_key_format(key, key_len, &s);
3369 VLOG_WARN("unexpected flow from datapath %s", ds_cstr(&s));
3370 ds_destroy(&s);
3371 }
3372
3373 COVERAGE_INC(facet_unexpected);
3374 dpif_flow_del(dpif, key, key_len, NULL);
3375}
3376
abe529af
BP
3377/* Update 'packet_count', 'byte_count', and 'used' members of installed facets.
3378 *
3379 * This function also pushes statistics updates to rules which each facet
3380 * resubmits into. Generally these statistics will be accurate. However, if a
3381 * facet changes the rule it resubmits into at some time in between
3382 * update_stats() runs, it is possible that statistics accrued to the
3383 * old rule will be incorrectly attributed to the new rule. This could be
3384 * avoided by calling update_stats() whenever rules are created or
3385 * deleted. However, the performance impact of making so many calls to the
3386 * datapath do not justify the benefit of having perfectly accurate statistics.
3387 */
3388static void
3389update_stats(struct ofproto_dpif *p)
3390{
3391 const struct dpif_flow_stats *stats;
3392 struct dpif_flow_dump dump;
3393 const struct nlattr *key;
3394 size_t key_len;
3395
3396 dpif_flow_dump_start(&dump, p->dpif);
3397 while (dpif_flow_dump_next(&dump, &key, &key_len, NULL, NULL, &stats)) {
b0f7b9b5 3398 struct subfacet *subfacet;
abe529af 3399
6a542738 3400 subfacet = subfacet_find(p, key, key_len);
6a7e895f
BP
3401 switch (subfacet ? subfacet->path : SF_NOT_INSTALLED) {
3402 case SF_FAST_PATH:
a218c879 3403 update_subfacet_stats(subfacet, stats);
6a7e895f
BP
3404 break;
3405
3406 case SF_SLOW_PATH:
3407 /* Stats are updated per-packet. */
3408 break;
3409
3410 case SF_NOT_INSTALLED:
3411 default:
a218c879 3412 delete_unexpected_flow(p->dpif, key, key_len);
6a7e895f 3413 break;
abe529af
BP
3414 }
3415 }
3416 dpif_flow_dump_done(&dump);
3417}
3418
3419/* Calculates and returns the number of milliseconds of idle time after which
b0f7b9b5
BP
3420 * subfacets should expire from the datapath. When a subfacet expires, we fold
3421 * its statistics into its facet, and when a facet's last subfacet expires, we
3422 * fold its statistic into its rule. */
abe529af 3423static int
b0f7b9b5 3424subfacet_max_idle(const struct ofproto_dpif *ofproto)
abe529af
BP
3425{
3426 /*
3427 * Idle time histogram.
3428 *
b0f7b9b5
BP
3429 * Most of the time a switch has a relatively small number of subfacets.
3430 * When this is the case we might as well keep statistics for all of them
3431 * in userspace and to cache them in the kernel datapath for performance as
abe529af
BP
3432 * well.
3433 *
b0f7b9b5 3434 * As the number of subfacets increases, the memory required to maintain
abe529af 3435 * statistics about them in userspace and in the kernel becomes
b0f7b9b5
BP
3436 * significant. However, with a large number of subfacets it is likely
3437 * that only a few of them are "heavy hitters" that consume a large amount
3438 * of bandwidth. At this point, only heavy hitters are worth caching in
3439 * the kernel and maintaining in userspaces; other subfacets we can
3440 * discard.
abe529af
BP
3441 *
3442 * The technique used to compute the idle time is to build a histogram with
b0f7b9b5 3443 * N_BUCKETS buckets whose width is BUCKET_WIDTH msecs each. Each subfacet
abe529af
BP
3444 * that is installed in the kernel gets dropped in the appropriate bucket.
3445 * After the histogram has been built, we compute the cutoff so that only
b0f7b9b5 3446 * the most-recently-used 1% of subfacets (but at least
084f5290 3447 * ofproto->up.flow_eviction_threshold flows) are kept cached. At least
b0f7b9b5
BP
3448 * the most-recently-used bucket of subfacets is kept, so actually an
3449 * arbitrary number of subfacets can be kept in any given expiration run
084f5290
SH
3450 * (though the next run will delete most of those unless they receive
3451 * additional data).
abe529af 3452 *
b0f7b9b5
BP
3453 * This requires a second pass through the subfacets, in addition to the
3454 * pass made by update_stats(), because the former function never looks at
3455 * uninstallable subfacets.
abe529af
BP
3456 */
3457 enum { BUCKET_WIDTH = ROUND_UP(100, TIME_UPDATE_INTERVAL) };
3458 enum { N_BUCKETS = 5000 / BUCKET_WIDTH };
3459 int buckets[N_BUCKETS] = { 0 };
f11c1ef4 3460 int total, subtotal, bucket;
b0f7b9b5 3461 struct subfacet *subfacet;
abe529af
BP
3462 long long int now;
3463 int i;
3464
b0f7b9b5 3465 total = hmap_count(&ofproto->subfacets);
084f5290 3466 if (total <= ofproto->up.flow_eviction_threshold) {
abe529af
BP
3467 return N_BUCKETS * BUCKET_WIDTH;
3468 }
3469
3470 /* Build histogram. */
3471 now = time_msec();
b0f7b9b5
BP
3472 HMAP_FOR_EACH (subfacet, hmap_node, &ofproto->subfacets) {
3473 long long int idle = now - subfacet->used;
abe529af
BP
3474 int bucket = (idle <= 0 ? 0
3475 : idle >= BUCKET_WIDTH * N_BUCKETS ? N_BUCKETS - 1
3476 : (unsigned int) idle / BUCKET_WIDTH);
3477 buckets[bucket]++;
3478 }
3479
3480 /* Find the first bucket whose flows should be expired. */
f11c1ef4
SH
3481 subtotal = bucket = 0;
3482 do {
3483 subtotal += buckets[bucket++];
084f5290
SH
3484 } while (bucket < N_BUCKETS &&
3485 subtotal < MAX(ofproto->up.flow_eviction_threshold, total / 100));
abe529af
BP
3486
3487 if (VLOG_IS_DBG_ENABLED()) {
3488 struct ds s;
3489
3490 ds_init(&s);
3491 ds_put_cstr(&s, "keep");
3492 for (i = 0; i < N_BUCKETS; i++) {
3493 if (i == bucket) {
3494 ds_put_cstr(&s, ", drop");
3495 }
3496 if (buckets[i]) {
3497 ds_put_format(&s, " %d:%d", i * BUCKET_WIDTH, buckets[i]);
3498 }
3499 }
3500 VLOG_INFO("%s: %s (msec:count)", ofproto->up.name, ds_cstr(&s));
3501 ds_destroy(&s);
3502 }
3503
3504 return bucket * BUCKET_WIDTH;
3505}
3506
b99d3cee
BP
3507enum { EXPIRE_MAX_BATCH = 50 };
3508
3509static void
3510expire_batch(struct ofproto_dpif *ofproto, struct subfacet **subfacets, int n)
3511{
3512 struct odputil_keybuf keybufs[EXPIRE_MAX_BATCH];
3513 struct dpif_op ops[EXPIRE_MAX_BATCH];
3514 struct dpif_op *opsp[EXPIRE_MAX_BATCH];
3515 struct ofpbuf keys[EXPIRE_MAX_BATCH];
3516 struct dpif_flow_stats stats[EXPIRE_MAX_BATCH];
3517 int i;
3518
3519 for (i = 0; i < n; i++) {
3520 ops[i].type = DPIF_OP_FLOW_DEL;
3521 subfacet_get_key(subfacets[i], &keybufs[i], &keys[i]);
3522 ops[i].u.flow_del.key = keys[i].data;
3523 ops[i].u.flow_del.key_len = keys[i].size;
3524 ops[i].u.flow_del.stats = &stats[i];
3525 opsp[i] = &ops[i];
3526 }
3527
3528 dpif_operate(ofproto->dpif, opsp, n);
3529 for (i = 0; i < n; i++) {
3530 subfacet_reset_dp_stats(subfacets[i], &stats[i]);
6a7e895f 3531 subfacets[i]->path = SF_NOT_INSTALLED;
b99d3cee
BP
3532 subfacet_destroy(subfacets[i]);
3533 }
3534}
3535
abe529af 3536static void
b0f7b9b5 3537expire_subfacets(struct ofproto_dpif *ofproto, int dp_max_idle)
abe529af 3538{
625b0720
BP
3539 /* Cutoff time for most flows. */
3540 long long int normal_cutoff = time_msec() - dp_max_idle;
3541
3542 /* We really want to keep flows for special protocols around, so use a more
3543 * conservative cutoff. */
3544 long long int special_cutoff = time_msec() - 10000;
b99d3cee 3545
b0f7b9b5 3546 struct subfacet *subfacet, *next_subfacet;
b99d3cee
BP
3547 struct subfacet *batch[EXPIRE_MAX_BATCH];
3548 int n_batch;
abe529af 3549
b99d3cee 3550 n_batch = 0;
b0f7b9b5
BP
3551 HMAP_FOR_EACH_SAFE (subfacet, next_subfacet, hmap_node,
3552 &ofproto->subfacets) {
625b0720
BP
3553 long long int cutoff;
3554
3555 cutoff = (subfacet->slow & (SLOW_CFM | SLOW_LACP | SLOW_STP)
3556 ? special_cutoff
3557 : normal_cutoff);
b0f7b9b5 3558 if (subfacet->used < cutoff) {
6a7e895f 3559 if (subfacet->path != SF_NOT_INSTALLED) {
b99d3cee
BP
3560 batch[n_batch++] = subfacet;
3561 if (n_batch >= EXPIRE_MAX_BATCH) {
3562 expire_batch(ofproto, batch, n_batch);
3563 n_batch = 0;
3564 }
3565 } else {
3566 subfacet_destroy(subfacet);
3567 }
abe529af
BP
3568 }
3569 }
b99d3cee
BP
3570
3571 if (n_batch > 0) {
3572 expire_batch(ofproto, batch, n_batch);
3573 }
abe529af
BP
3574}
3575
3576/* If 'rule' is an OpenFlow rule, that has expired according to OpenFlow rules,
3577 * then delete it entirely. */
3578static void
3579rule_expire(struct rule_dpif *rule)
3580{
abe529af
BP
3581 struct facet *facet, *next_facet;
3582 long long int now;
3583 uint8_t reason;
3584
e2a3d183
BP
3585 if (rule->up.pending) {
3586 /* We'll have to expire it later. */
3587 return;
3588 }
3589
abe529af
BP
3590 /* Has 'rule' expired? */
3591 now = time_msec();
3592 if (rule->up.hard_timeout
308881af 3593 && now > rule->up.modified + rule->up.hard_timeout * 1000) {
abe529af 3594 reason = OFPRR_HARD_TIMEOUT;
8ea6ac3e 3595 } else if (rule->up.idle_timeout
1745cd08 3596 && now > rule->up.used + rule->up.idle_timeout * 1000) {
abe529af
BP
3597 reason = OFPRR_IDLE_TIMEOUT;
3598 } else {
3599 return;
3600 }
3601
3602 COVERAGE_INC(ofproto_dpif_expired);
3603
3604 /* Update stats. (This is a no-op if the rule expired due to an idle
3605 * timeout, because that only happens when the rule has no facets left.) */
3606 LIST_FOR_EACH_SAFE (facet, next_facet, list_node, &rule->facets) {
15baa734 3607 facet_remove(facet);
abe529af
BP
3608 }
3609
3610 /* Get rid of the rule. */
3611 ofproto_rule_expire(&rule->up, reason);
3612}
3613\f
3614/* Facets. */
3615
f3827897 3616/* Creates and returns a new facet owned by 'rule', given a 'flow'.
abe529af
BP
3617 *
3618 * The caller must already have determined that no facet with an identical
3619 * 'flow' exists in 'ofproto' and that 'flow' is the best match for 'rule' in
f3827897
BP
3620 * the ofproto's classifier table.
3621 *
2b459b83
BP
3622 * 'hash' must be the return value of flow_hash(flow, 0).
3623 *
b0f7b9b5
BP
3624 * The facet will initially have no subfacets. The caller should create (at
3625 * least) one subfacet with subfacet_create(). */
abe529af 3626static struct facet *
2b459b83 3627facet_create(struct rule_dpif *rule, const struct flow *flow, uint32_t hash)
abe529af
BP
3628{
3629 struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
3630 struct facet *facet;
3631
3632 facet = xzalloc(sizeof *facet);
3633 facet->used = time_msec();
2b459b83 3634 hmap_insert(&ofproto->facets, &facet->hmap_node, hash);
abe529af
BP
3635 list_push_back(&rule->facets, &facet->list_node);
3636 facet->rule = rule;
3637 facet->flow = *flow;
b0f7b9b5 3638 list_init(&facet->subfacets);
abe529af
BP
3639 netflow_flow_init(&facet->nf_flow);
3640 netflow_flow_update_time(ofproto->netflow, &facet->nf_flow, facet->used);
3641
abe529af
BP
3642 return facet;
3643}
3644
3645static void
3646facet_free(struct facet *facet)
3647{
abe529af
BP
3648 free(facet);
3649}
3650
3d9e05f8
BP
3651/* Executes, within 'ofproto', the 'n_actions' actions in 'actions' on
3652 * 'packet', which arrived on 'in_port'.
3653 *
3654 * Takes ownership of 'packet'. */
3655static bool
3656execute_odp_actions(struct ofproto_dpif *ofproto, const struct flow *flow,
3657 const struct nlattr *odp_actions, size_t actions_len,
3658 struct ofpbuf *packet)
3659{
3660 struct odputil_keybuf keybuf;
3661 struct ofpbuf key;
3662 int error;
3663
6ff686f2
PS
3664 ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
3665 odp_flow_key_from_flow(&key, flow);
80e5eed9 3666
6ff686f2
PS
3667 error = dpif_execute(ofproto->dpif, key.data, key.size,
3668 odp_actions, actions_len, packet);
80e5eed9 3669
6ff686f2
PS
3670 ofpbuf_delete(packet);
3671 return !error;
abe529af
BP
3672}
3673
abe529af
BP
3674/* Remove 'facet' from 'ofproto' and free up the associated memory:
3675 *
3676 * - If 'facet' was installed in the datapath, uninstalls it and updates its
b0f7b9b5 3677 * rule's statistics, via subfacet_uninstall().
abe529af
BP
3678 *
3679 * - Removes 'facet' from its rule and from ofproto->facets.
3680 */
3681static void
15baa734 3682facet_remove(struct facet *facet)
abe529af 3683{
15baa734 3684 struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
b0f7b9b5
BP
3685 struct subfacet *subfacet, *next_subfacet;
3686
551a2f6c
BP
3687 assert(!list_is_empty(&facet->subfacets));
3688
3689 /* First uninstall all of the subfacets to get final statistics. */
3690 LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) {
15baa734 3691 subfacet_uninstall(subfacet);
551a2f6c
BP
3692 }
3693
3694 /* Flush the final stats to the rule.
3695 *
3696 * This might require us to have at least one subfacet around so that we
3697 * can use its actions for accounting in facet_account(), which is why we
3698 * have uninstalled but not yet destroyed the subfacets. */
15baa734 3699 facet_flush_stats(facet);
551a2f6c
BP
3700
3701 /* Now we're really all done so destroy everything. */
b0f7b9b5
BP
3702 LIST_FOR_EACH_SAFE (subfacet, next_subfacet, list_node,
3703 &facet->subfacets) {
15baa734 3704 subfacet_destroy__(subfacet);
b0f7b9b5 3705 }
abe529af
BP
3706 hmap_remove(&ofproto->facets, &facet->hmap_node);
3707 list_remove(&facet->list_node);
3708 facet_free(facet);
3709}
3710
3de9590b
BP
3711/* Feed information from 'facet' back into the learning table to keep it in
3712 * sync with what is actually flowing through the datapath. */
abe529af 3713static void
3de9590b 3714facet_learn(struct facet *facet)
abe529af 3715{
15baa734 3716 struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
3de9590b 3717 struct action_xlate_ctx ctx;
abe529af 3718
3de9590b
BP
3719 if (!facet->has_learn
3720 && !facet->has_normal
3721 && (!facet->has_fin_timeout
3722 || !(facet->tcp_flags & (TCP_FIN | TCP_RST)))) {
abe529af
BP
3723 return;
3724 }
abe529af 3725
3de9590b
BP
3726 action_xlate_ctx_init(&ctx, ofproto, &facet->flow,
3727 facet->flow.vlan_tci,
3728 facet->rule, facet->tcp_flags, NULL);
3729 ctx.may_learn = true;
f25d0cf3
BP
3730 xlate_actions_for_side_effects(&ctx, facet->rule->up.ofpacts,
3731 facet->rule->up.ofpacts_len);
3de9590b
BP
3732}
3733
3734static void
3735facet_account(struct facet *facet)
3736{
3737 struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
3738 struct subfacet *subfacet;
3739 const struct nlattr *a;
3740 unsigned int left;
3741 ovs_be16 vlan_tci;
3742 uint64_t n_bytes;
abe529af 3743
75a75043 3744 if (!facet->has_normal || !ofproto->has_bonded_bundles) {
abe529af
BP
3745 return;
3746 }
3de9590b 3747 n_bytes = facet->byte_count - facet->accounted_bytes;
d78be13b
BP
3748
3749 /* This loop feeds byte counters to bond_account() for rebalancing to use
3750 * as a basis. We also need to track the actual VLAN on which the packet
3751 * is going to be sent to ensure that it matches the one passed to
3752 * bond_choose_output_slave(). (Otherwise, we will account to the wrong
b95fc6ba
BP
3753 * hash bucket.)
3754 *
3755 * We use the actions from an arbitrary subfacet because they should all
3756 * be equally valid for our purpose. */
3757 subfacet = CONTAINER_OF(list_front(&facet->subfacets),
3758 struct subfacet, list_node);
d78be13b 3759 vlan_tci = facet->flow.vlan_tci;
b95fc6ba
BP
3760 NL_ATTR_FOR_EACH_UNSAFE (a, left,
3761 subfacet->actions, subfacet->actions_len) {
fea393b1 3762 const struct ovs_action_push_vlan *vlan;
d78be13b 3763 struct ofport_dpif *port;
abe529af 3764
d78be13b 3765 switch (nl_attr_type(a)) {
df2c07f4 3766 case OVS_ACTION_ATTR_OUTPUT:
abe529af
BP
3767 port = get_odp_port(ofproto, nl_attr_get_u32(a));
3768 if (port && port->bundle && port->bundle->bond) {
d78be13b 3769 bond_account(port->bundle->bond, &facet->flow,
dc155bff 3770 vlan_tci_to_vid(vlan_tci), n_bytes);
abe529af 3771 }
d78be13b
BP
3772 break;
3773
fea393b1
BP
3774 case OVS_ACTION_ATTR_POP_VLAN:
3775 vlan_tci = htons(0);
d78be13b
BP
3776 break;
3777
fea393b1
BP
3778 case OVS_ACTION_ATTR_PUSH_VLAN:
3779 vlan = nl_attr_get(a);
3780 vlan_tci = vlan->vlan_tci;
d78be13b 3781 break;
abe529af
BP
3782 }
3783 }
3784}
3785
abe529af
BP
3786/* Returns true if the only action for 'facet' is to send to the controller.
3787 * (We don't report NetFlow expiration messages for such facets because they
3788 * are just part of the control logic for the network, not real traffic). */
3789static bool
3790facet_is_controller_flow(struct facet *facet)
3791{
f25d0cf3
BP
3792 if (facet) {
3793 const struct rule *rule = &facet->rule->up;
3794 const struct ofpact *ofpacts = rule->ofpacts;
3795 size_t ofpacts_len = rule->ofpacts_len;
3796
3797 if (ofpacts->type == OFPACT_CONTROLLER &&
3798 ofpact_next(ofpacts) >= ofpact_end(ofpacts, ofpacts_len)) {
3799 return true;
3800 }
3801 }
3802 return false;
abe529af
BP
3803}
3804
3805/* Folds all of 'facet''s statistics into its rule. Also updates the
3806 * accounting ofhook and emits a NetFlow expiration if appropriate. All of
3807 * 'facet''s statistics in the datapath should have been zeroed and folded into
3808 * its packet and byte counts before this function is called. */
3809static void
15baa734 3810facet_flush_stats(struct facet *facet)
abe529af 3811{
15baa734 3812 struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
b0f7b9b5
BP
3813 struct subfacet *subfacet;
3814
3815 LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) {
3816 assert(!subfacet->dp_byte_count);
3817 assert(!subfacet->dp_packet_count);
3818 }
abe529af
BP
3819
3820 facet_push_stats(facet);
3de9590b
BP
3821 if (facet->accounted_bytes < facet->byte_count) {
3822 facet_account(facet);
3823 facet->accounted_bytes = facet->byte_count;
3824 }
abe529af
BP
3825
3826 if (ofproto->netflow && !facet_is_controller_flow(facet)) {
3827 struct ofexpired expired;
3828 expired.flow = facet->flow;
3829 expired.packet_count = facet->packet_count;
3830 expired.byte_count = facet->byte_count;
3831 expired.used = facet->used;
3832 netflow_expire(ofproto->netflow, &facet->nf_flow, &expired);
3833 }
3834
3835 facet->rule->packet_count += facet->packet_count;
3836 facet->rule->byte_count += facet->byte_count;
3837
3838 /* Reset counters to prevent double counting if 'facet' ever gets
3839 * reinstalled. */
bbb5d219 3840 facet_reset_counters(facet);
abe529af
BP
3841
3842 netflow_flow_clear(&facet->nf_flow);
0e553d9c 3843 facet->tcp_flags = 0;
abe529af
BP
3844}
3845
3846/* Searches 'ofproto''s table of facets for one exactly equal to 'flow'.
3847 * Returns it if found, otherwise a null pointer.
3848 *
2b459b83
BP
3849 * 'hash' must be the return value of flow_hash(flow, 0).
3850 *
abe529af
BP
3851 * The returned facet might need revalidation; use facet_lookup_valid()
3852 * instead if that is important. */
3853static struct facet *
2b459b83
BP
3854facet_find(struct ofproto_dpif *ofproto,
3855 const struct flow *flow, uint32_t hash)
abe529af
BP
3856{
3857 struct facet *facet;
3858
2b459b83 3859 HMAP_FOR_EACH_WITH_HASH (facet, hmap_node, hash, &ofproto->facets) {
abe529af
BP
3860 if (flow_equal(flow, &facet->flow)) {
3861 return facet;
3862 }
3863 }
3864
3865 return NULL;
3866}
3867
3868/* Searches 'ofproto''s table of facets for one exactly equal to 'flow'.
3869 * Returns it if found, otherwise a null pointer.
3870 *
2b459b83
BP
3871 * 'hash' must be the return value of flow_hash(flow, 0).
3872 *
abe529af
BP
3873 * The returned facet is guaranteed to be valid. */
3874static struct facet *
2b459b83
BP
3875facet_lookup_valid(struct ofproto_dpif *ofproto, const struct flow *flow,
3876 uint32_t hash)
abe529af 3877{
c57b2226 3878 struct facet *facet;
abe529af 3879
c57b2226 3880 facet = facet_find(ofproto, flow, hash);
abe529af 3881 if (facet
0e4b3771 3882 && (ofproto->need_revalidate
c57b2226
BP
3883 || tag_set_intersects(&ofproto->revalidate_set, facet->tags))) {
3884 facet_revalidate(facet);
abe529af
BP
3885 }
3886
3887 return facet;
3888}
3889
6a7e895f
BP
3890static const char *
3891subfacet_path_to_string(enum subfacet_path path)
3892{
3893 switch (path) {
3894 case SF_NOT_INSTALLED:
3895 return "not installed";
3896 case SF_FAST_PATH:
3897 return "in fast path";
3898 case SF_SLOW_PATH:
3899 return "in slow path";
3900 default:
3901 return "<error>";
3902 }
3903}
3904
3905/* Returns the path in which a subfacet should be installed if its 'slow'
3906 * member has the specified value. */
3907static enum subfacet_path
3908subfacet_want_path(enum slow_path_reason slow)
3909{
3910 return slow ? SF_SLOW_PATH : SF_FAST_PATH;
3911}
3912
3913/* Returns true if 'subfacet' needs to have its datapath flow updated,
3914 * supposing that its actions have been recalculated as 'want_actions' and that
3915 * 'slow' is nonzero iff 'subfacet' should be in the slow path. */
3916static bool
3917subfacet_should_install(struct subfacet *subfacet, enum slow_path_reason slow,
3918 const struct ofpbuf *want_actions)
3919{
3920 enum subfacet_path want_path = subfacet_want_path(slow);
3921 return (want_path != subfacet->path
3922 || (want_path == SF_FAST_PATH
3923 && (subfacet->actions_len != want_actions->size
3924 || memcmp(subfacet->actions, want_actions->data,
3925 subfacet->actions_len))));
3926}
3927
6814e51f
BP
3928static bool
3929facet_check_consistency(struct facet *facet)
3930{
3931 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 15);
3932
3933 struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
3934
050ac423
BP
3935 uint64_t odp_actions_stub[1024 / 8];
3936 struct ofpbuf odp_actions;
3937
6814e51f
BP
3938 struct rule_dpif *rule;
3939 struct subfacet *subfacet;
c53e1132 3940 bool may_log = false;
6814e51f
BP
3941 bool ok;
3942
3943 /* Check the rule for consistency. */
c57b2226
BP
3944 rule = rule_dpif_lookup(ofproto, &facet->flow);
3945 ok = rule == facet->rule;
3946 if (!ok) {
c53e1132 3947 may_log = !VLOG_DROP_WARN(&rl);
c53e1132
BP
3948 if (may_log) {
3949 struct ds s;
6814e51f 3950
c53e1132
BP
3951 ds_init(&s);
3952 flow_format(&s, &facet->flow);
3953 ds_put_format(&s, ": facet associated with wrong rule (was "
3954 "table=%"PRIu8",", facet->rule->up.table_id);
3955 cls_rule_format(&facet->rule->up.cr, &s);
3956 ds_put_format(&s, ") (should have been table=%"PRIu8",",
3957 rule->up.table_id);
3958 cls_rule_format(&rule->up.cr, &s);
3959 ds_put_char(&s, ')');
6814e51f 3960
c53e1132
BP
3961 VLOG_WARN("%s", ds_cstr(&s));
3962 ds_destroy(&s);
3963 }
6814e51f
BP
3964 }
3965
3966 /* Check the datapath actions for consistency. */
050ac423 3967 ofpbuf_use_stub(&odp_actions, odp_actions_stub, sizeof odp_actions_stub);
6814e51f 3968 LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) {
6a7e895f 3969 enum subfacet_path want_path;
9616614b 3970 struct odputil_keybuf keybuf;
6814e51f 3971 struct action_xlate_ctx ctx;
9616614b
BP
3972 struct ofpbuf key;
3973 struct ds s;
6814e51f
BP
3974
3975 action_xlate_ctx_init(&ctx, ofproto, &facet->flow,
0e553d9c 3976 subfacet->initial_tci, rule, 0, NULL);
f25d0cf3 3977 xlate_actions(&ctx, rule->up.ofpacts, rule->up.ofpacts_len,
050ac423 3978 &odp_actions);
6814e51f 3979
6a7e895f
BP
3980 if (subfacet->path == SF_NOT_INSTALLED) {
3981 /* This only happens if the datapath reported an error when we
3982 * tried to install the flow. Don't flag another error here. */
3983 continue;
3984 }
3985
3986 want_path = subfacet_want_path(subfacet->slow);
3987 if (want_path == SF_SLOW_PATH && subfacet->path == SF_SLOW_PATH) {
3988 /* The actions for slow-path flows may legitimately vary from one
3989 * packet to the next. We're done. */
050ac423 3990 continue;
6814e51f
BP
3991 }
3992
6a7e895f 3993 if (!subfacet_should_install(subfacet, subfacet->slow, &odp_actions)) {
9616614b
BP
3994 continue;
3995 }
c53e1132 3996
9616614b
BP
3997 /* Inconsistency! */
3998 if (ok) {
3999 may_log = !VLOG_DROP_WARN(&rl);
4000 ok = false;
4001 }
4002 if (!may_log) {
4003 /* Rate-limited, skip reporting. */
4004 continue;
4005 }
c53e1132 4006
9616614b
BP
4007 ds_init(&s);
4008 subfacet_get_key(subfacet, &keybuf, &key);
4009 odp_flow_key_format(key.data, key.size, &s);
4010
4011 ds_put_cstr(&s, ": inconsistency in subfacet");
6a7e895f 4012 if (want_path != subfacet->path) {
9616614b
BP
4013 enum odp_key_fitness fitness = subfacet->key_fitness;
4014
6a7e895f
BP
4015 ds_put_format(&s, " (%s, fitness=%s)",
4016 subfacet_path_to_string(subfacet->path),
9616614b 4017 odp_key_fitness_to_string(fitness));
6a7e895f
BP
4018 ds_put_format(&s, " (should have been %s)",
4019 subfacet_path_to_string(want_path));
4020 } else if (want_path == SF_FAST_PATH) {
9616614b
BP
4021 ds_put_cstr(&s, " (actions were: ");
4022 format_odp_actions(&s, subfacet->actions,
4023 subfacet->actions_len);
4024 ds_put_cstr(&s, ") (correct actions: ");
4025 format_odp_actions(&s, odp_actions.data, odp_actions.size);
4026 ds_put_char(&s, ')');
4027 } else {
4028 ds_put_cstr(&s, " (actions: ");
4029 format_odp_actions(&s, subfacet->actions,
4030 subfacet->actions_len);
4031 ds_put_char(&s, ')');
6814e51f 4032 }
9616614b
BP
4033 VLOG_WARN("%s", ds_cstr(&s));
4034 ds_destroy(&s);
6814e51f 4035 }
050ac423 4036 ofpbuf_uninit(&odp_actions);
6814e51f
BP
4037
4038 return ok;
4039}
4040
15baa734 4041/* Re-searches the classifier for 'facet':
abe529af
BP
4042 *
4043 * - If the rule found is different from 'facet''s current rule, moves
4044 * 'facet' to the new rule and recompiles its actions.
4045 *
4046 * - If the rule found is the same as 'facet''s current rule, leaves 'facet'
c57b2226
BP
4047 * where it is and recompiles its actions anyway. */
4048static void
15baa734 4049facet_revalidate(struct facet *facet)
abe529af 4050{
15baa734 4051 struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
b95fc6ba
BP
4052 struct actions {
4053 struct nlattr *odp_actions;
4054 size_t actions_len;
4055 };
4056 struct actions *new_actions;
4057
abe529af 4058 struct action_xlate_ctx ctx;
050ac423
BP
4059 uint64_t odp_actions_stub[1024 / 8];
4060 struct ofpbuf odp_actions;
4061
abe529af 4062 struct rule_dpif *new_rule;
b0f7b9b5 4063 struct subfacet *subfacet;
b95fc6ba 4064 int i;
abe529af
BP
4065
4066 COVERAGE_INC(facet_revalidate);
4067
c57b2226 4068 new_rule = rule_dpif_lookup(ofproto, &facet->flow);
abe529af 4069
df2c07f4 4070 /* Calculate new datapath actions.
abe529af
BP
4071 *
4072 * We do not modify any 'facet' state yet, because we might need to, e.g.,
4073 * emit a NetFlow expiration and, if so, we need to have the old state
4074 * around to properly compose it. */
abe529af 4075
df2c07f4
JP
4076 /* If the datapath actions changed or the installability changed,
4077 * then we need to talk to the datapath. */
b95fc6ba
BP
4078 i = 0;
4079 new_actions = NULL;
4080 memset(&ctx, 0, sizeof ctx);
050ac423 4081 ofpbuf_use_stub(&odp_actions, odp_actions_stub, sizeof odp_actions_stub);
b0f7b9b5 4082 LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) {
6a7e895f 4083 enum slow_path_reason slow;
b95fc6ba 4084
e84173dc 4085 action_xlate_ctx_init(&ctx, ofproto, &facet->flow,
0e553d9c 4086 subfacet->initial_tci, new_rule, 0, NULL);
f25d0cf3 4087 xlate_actions(&ctx, new_rule->up.ofpacts, new_rule->up.ofpacts_len,
050ac423 4088 &odp_actions);
b0f7b9b5 4089
6a7e895f
BP
4090 slow = (subfacet->slow & SLOW_MATCH) | ctx.slow;
4091 if (subfacet_should_install(subfacet, slow, &odp_actions)) {
4092 struct dpif_flow_stats stats;
4093
4094 subfacet_install(subfacet,
4095 odp_actions.data, odp_actions.size, &stats, slow);
4096 subfacet_update_stats(subfacet, &stats);
b95fc6ba
BP
4097
4098 if (!new_actions) {
4099 new_actions = xcalloc(list_size(&facet->subfacets),
4100 sizeof *new_actions);
4101 }
050ac423
BP
4102 new_actions[i].odp_actions = xmemdup(odp_actions.data,
4103 odp_actions.size);
4104 new_actions[i].actions_len = odp_actions.size;
abe529af 4105 }
b95fc6ba 4106
b95fc6ba 4107 i++;
b0f7b9b5 4108 }
050ac423
BP
4109 ofpbuf_uninit(&odp_actions);
4110
b95fc6ba 4111 if (new_actions) {
15baa734 4112 facet_flush_stats(facet);
abe529af
BP
4113 }
4114
4115 /* Update 'facet' now that we've taken care of all the old state. */
4116 facet->tags = ctx.tags;
4117 facet->nf_flow.output_iface = ctx.nf_output_iface;
75a75043
BP
4118 facet->has_learn = ctx.has_learn;
4119 facet->has_normal = ctx.has_normal;
0e553d9c 4120 facet->has_fin_timeout = ctx.has_fin_timeout;
9d24de3b 4121 facet->mirrors = ctx.mirrors;
6a7e895f
BP
4122
4123 i = 0;
4124 LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) {
4125 subfacet->slow = (subfacet->slow & SLOW_MATCH) | ctx.slow;
4126
4127 if (new_actions && new_actions[i].odp_actions) {
4128 free(subfacet->actions);
4129 subfacet->actions = new_actions[i].odp_actions;
4130 subfacet->actions_len = new_actions[i].actions_len;
b95fc6ba 4131 }
6a7e895f 4132 i++;
abe529af 4133 }
6a7e895f
BP
4134 free(new_actions);
4135
abe529af
BP
4136 if (facet->rule != new_rule) {
4137 COVERAGE_INC(facet_changed_rule);
4138 list_remove(&facet->list_node);
4139 list_push_back(&new_rule->facets, &facet->list_node);
4140 facet->rule = new_rule;
4141 facet->used = new_rule->up.created;
9d24de3b 4142 facet->prev_used = facet->used;
abe529af 4143 }
abe529af
BP
4144}
4145
4146/* Updates 'facet''s used time. Caller is responsible for calling
4147 * facet_push_stats() to update the flows which 'facet' resubmits into. */
4148static void
15baa734 4149facet_update_time(struct facet *facet, long long int used)
abe529af 4150{
15baa734 4151 struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
abe529af
BP
4152 if (used > facet->used) {
4153 facet->used = used;
1745cd08 4154 ofproto_rule_update_used(&facet->rule->up, used);
abe529af
BP
4155 netflow_flow_update_time(ofproto->netflow, &facet->nf_flow, used);
4156 }
4157}
4158
bbb5d219
EJ
4159static void
4160facet_reset_counters(struct facet *facet)
4161{
4162 facet->packet_count = 0;
4163 facet->byte_count = 0;
9d24de3b
JP
4164 facet->prev_packet_count = 0;
4165 facet->prev_byte_count = 0;
bbb5d219
EJ
4166 facet->accounted_bytes = 0;
4167}
4168
abe529af
BP
4169static void
4170facet_push_stats(struct facet *facet)
4171{
112bc5f4 4172 struct dpif_flow_stats stats;
abe529af 4173
9d24de3b
JP
4174 assert(facet->packet_count >= facet->prev_packet_count);
4175 assert(facet->byte_count >= facet->prev_byte_count);
4176 assert(facet->used >= facet->prev_used);
abe529af 4177
112bc5f4
BP
4178 stats.n_packets = facet->packet_count - facet->prev_packet_count;
4179 stats.n_bytes = facet->byte_count - facet->prev_byte_count;
4180 stats.used = facet->used;
4181 stats.tcp_flags = 0;
abe529af 4182
112bc5f4 4183 if (stats.n_packets || stats.n_bytes || facet->used > facet->prev_used) {
9d24de3b
JP
4184 facet->prev_packet_count = facet->packet_count;
4185 facet->prev_byte_count = facet->byte_count;
4186 facet->prev_used = facet->used;
abe529af 4187
112bc5f4 4188 flow_push_stats(facet->rule, &facet->flow, &stats);
9d24de3b
JP
4189
4190 update_mirror_stats(ofproto_dpif_cast(facet->rule->up.ofproto),
112bc5f4 4191 facet->mirrors, stats.n_packets, stats.n_bytes);
abe529af
BP
4192 }
4193}
4194
abe529af 4195static void
112bc5f4 4196rule_credit_stats(struct rule_dpif *rule, const struct dpif_flow_stats *stats)
abe529af 4197{
112bc5f4
BP
4198 rule->packet_count += stats->n_packets;
4199 rule->byte_count += stats->n_bytes;
4200 ofproto_rule_update_used(&rule->up, stats->used);
abe529af
BP
4201}
4202
4203/* Pushes flow statistics to the rules which 'flow' resubmits into given
9d24de3b 4204 * 'rule''s actions and mirrors. */
abe529af 4205static void
18b2a258 4206flow_push_stats(struct rule_dpif *rule,
112bc5f4 4207 const struct flow *flow, const struct dpif_flow_stats *stats)
abe529af
BP
4208{
4209 struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
112bc5f4 4210 struct action_xlate_ctx ctx;
abe529af 4211
112bc5f4 4212 ofproto_rule_update_used(&rule->up, stats->used);
f3b50afb 4213
112bc5f4 4214 action_xlate_ctx_init(&ctx, ofproto, flow, flow->vlan_tci, rule,
0e553d9c 4215 0, NULL);
112bc5f4 4216 ctx.resubmit_stats = stats;
f25d0cf3
BP
4217 xlate_actions_for_side_effects(&ctx, rule->up.ofpacts,
4218 rule->up.ofpacts_len);
abe529af
BP
4219}
4220\f
b0f7b9b5
BP
4221/* Subfacets. */
4222
4223static struct subfacet *
4224subfacet_find__(struct ofproto_dpif *ofproto,
4225 const struct nlattr *key, size_t key_len, uint32_t key_hash,
4226 const struct flow *flow)
4227{
4228 struct subfacet *subfacet;
4229
4230 HMAP_FOR_EACH_WITH_HASH (subfacet, hmap_node, key_hash,
4231 &ofproto->subfacets) {
4232 if (subfacet->key
4233 ? (subfacet->key_len == key_len
4234 && !memcmp(key, subfacet->key, key_len))
4235 : flow_equal(flow, &subfacet->facet->flow)) {
4236 return subfacet;
4237 }
4238 }
4239
4240 return NULL;
4241}
4242
4243/* Searches 'facet' (within 'ofproto') for a subfacet with the specified
4244 * 'key_fitness', 'key', and 'key_len'. Returns the existing subfacet if
b95fc6ba
BP
4245 * there is one, otherwise creates and returns a new subfacet.
4246 *
4247 * If the returned subfacet is new, then subfacet->actions will be NULL, in
4248 * which case the caller must populate the actions with
4249 * subfacet_make_actions(). */
b0f7b9b5 4250static struct subfacet *
15baa734 4251subfacet_create(struct facet *facet, enum odp_key_fitness key_fitness,
e84173dc 4252 const struct nlattr *key, size_t key_len, ovs_be16 initial_tci)
b0f7b9b5 4253{
15baa734 4254 struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
b0f7b9b5
BP
4255 uint32_t key_hash = odp_flow_key_hash(key, key_len);
4256 struct subfacet *subfacet;
4257
3b145dd7
BP
4258 if (list_is_empty(&facet->subfacets)) {
4259 subfacet = &facet->one_subfacet;
6a0a5bbb
BP
4260
4261 /* This subfacet should conceptually be created, and have its first
4262 * packet pass through, at the same time that its facet was created.
4263 * If we called time_msec() here, then the subfacet could look
4264 * (occasionally) as though it was used some time after the facet was
4265 * used. That can make a one-packet flow look like it has a nonzero
4266 * duration, which looks odd in e.g. NetFlow statistics. */
4267 subfacet->used = facet->used;
3b145dd7
BP
4268 } else {
4269 subfacet = subfacet_find__(ofproto, key, key_len, key_hash,
4270 &facet->flow);
4271 if (subfacet) {
4272 if (subfacet->facet == facet) {
4273 return subfacet;
4274 }
4275
4276 /* This shouldn't happen. */
4277 VLOG_ERR_RL(&rl, "subfacet with wrong facet");
4278 subfacet_destroy(subfacet);
b0f7b9b5
BP
4279 }
4280
3b145dd7 4281 subfacet = xmalloc(sizeof *subfacet);
6a0a5bbb 4282 subfacet->used = time_msec();
b0f7b9b5
BP
4283 }
4284
b0f7b9b5
BP
4285 hmap_insert(&ofproto->subfacets, &subfacet->hmap_node, key_hash);
4286 list_push_back(&facet->subfacets, &subfacet->list_node);
4287 subfacet->facet = facet;
b0f7b9b5
BP
4288 subfacet->key_fitness = key_fitness;
4289 if (key_fitness != ODP_FIT_PERFECT) {
4290 subfacet->key = xmemdup(key, key_len);
4291 subfacet->key_len = key_len;
26cd7e34
BP
4292 } else {
4293 subfacet->key = NULL;
4294 subfacet->key_len = 0;
b0f7b9b5 4295 }
26cd7e34
BP
4296 subfacet->dp_packet_count = 0;
4297 subfacet->dp_byte_count = 0;
4298 subfacet->actions_len = 0;
4299 subfacet->actions = NULL;
6a7e895f
BP
4300 subfacet->slow = (subfacet->key_fitness == ODP_FIT_TOO_LITTLE
4301 ? SLOW_MATCH
4302 : 0);
4303 subfacet->path = SF_NOT_INSTALLED;
e84173dc 4304 subfacet->initial_tci = initial_tci;
b0f7b9b5
BP
4305
4306 return subfacet;
4307}
4308
4309/* Searches 'ofproto' for a subfacet with the given 'key', 'key_len', and
4310 * 'flow'. Returns the subfacet if one exists, otherwise NULL. */
4311static struct subfacet *
4312subfacet_find(struct ofproto_dpif *ofproto,
6a542738 4313 const struct nlattr *key, size_t key_len)
b0f7b9b5
BP
4314{
4315 uint32_t key_hash = odp_flow_key_hash(key, key_len);
6a542738
PS
4316 enum odp_key_fitness fitness;
4317 struct flow flow;
4318
4319 fitness = odp_flow_key_to_flow(key, key_len, &flow);
4320 if (fitness == ODP_FIT_ERROR) {
4321 return NULL;
4322 }
b0f7b9b5 4323
6a542738 4324 return subfacet_find__(ofproto, key, key_len, key_hash, &flow);
b0f7b9b5
BP
4325}
4326
4327/* Uninstalls 'subfacet' from the datapath, if it is installed, removes it from
4328 * its facet within 'ofproto', and frees it. */
4329static void
15baa734 4330subfacet_destroy__(struct subfacet *subfacet)
b0f7b9b5 4331{
15baa734
BP
4332 struct facet *facet = subfacet->facet;
4333 struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
4334
4335 subfacet_uninstall(subfacet);
b0f7b9b5
BP
4336 hmap_remove(&ofproto->subfacets, &subfacet->hmap_node);
4337 list_remove(&subfacet->list_node);
4338 free(subfacet->key);
b95fc6ba 4339 free(subfacet->actions);
26cd7e34
BP
4340 if (subfacet != &facet->one_subfacet) {
4341 free(subfacet);
4342 }
b0f7b9b5
BP
4343}
4344
4345/* Destroys 'subfacet', as with subfacet_destroy__(), and then if this was the
4346 * last remaining subfacet in its facet destroys the facet too. */
4347static void
15baa734 4348subfacet_destroy(struct subfacet *subfacet)
b0f7b9b5
BP
4349{
4350 struct facet *facet = subfacet->facet;
4351
551a2f6c
BP
4352 if (list_is_singleton(&facet->subfacets)) {
4353 /* facet_remove() needs at least one subfacet (it will remove it). */
15baa734 4354 facet_remove(facet);
551a2f6c 4355 } else {
15baa734 4356 subfacet_destroy__(subfacet);
b0f7b9b5
BP
4357 }
4358}
4359
4360/* Initializes 'key' with the sequence of OVS_KEY_ATTR_* Netlink attributes
4361 * that can be used to refer to 'subfacet'. The caller must provide 'keybuf'
4362 * for use as temporary storage. */
4363static void
4364subfacet_get_key(struct subfacet *subfacet, struct odputil_keybuf *keybuf,
4365 struct ofpbuf *key)
4366{
4367 if (!subfacet->key) {
4368 ofpbuf_use_stack(key, keybuf, sizeof *keybuf);
4369 odp_flow_key_from_flow(key, &subfacet->facet->flow);
4370 } else {
4371 ofpbuf_use_const(key, subfacet->key, subfacet->key_len);
4372 }
4373}
4374
5fe20d5d
BP
4375/* Composes the datapath actions for 'subfacet' based on its rule's actions.
4376 * Translates the actions into 'odp_actions', which the caller must have
4377 * initialized and is responsible for uninitializing. */
b95fc6ba 4378static void
5fe20d5d
BP
4379subfacet_make_actions(struct subfacet *subfacet, const struct ofpbuf *packet,
4380 struct ofpbuf *odp_actions)
b95fc6ba
BP
4381{
4382 struct facet *facet = subfacet->facet;
18b2a258 4383 struct rule_dpif *rule = facet->rule;
15baa734 4384 struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
050ac423 4385
b95fc6ba
BP
4386 struct action_xlate_ctx ctx;
4387
15baa734 4388 action_xlate_ctx_init(&ctx, ofproto, &facet->flow, subfacet->initial_tci,
0e553d9c 4389 rule, 0, packet);
f25d0cf3 4390 xlate_actions(&ctx, rule->up.ofpacts, rule->up.ofpacts_len, odp_actions);
b95fc6ba 4391 facet->tags = ctx.tags;
b95fc6ba
BP
4392 facet->has_learn = ctx.has_learn;
4393 facet->has_normal = ctx.has_normal;
0e553d9c 4394 facet->has_fin_timeout = ctx.has_fin_timeout;
b95fc6ba 4395 facet->nf_flow.output_iface = ctx.nf_output_iface;
9d24de3b 4396 facet->mirrors = ctx.mirrors;
b95fc6ba 4397
6a7e895f 4398 subfacet->slow = (subfacet->slow & SLOW_MATCH) | ctx.slow;
5fe20d5d
BP
4399 if (subfacet->actions_len != odp_actions->size
4400 || memcmp(subfacet->actions, odp_actions->data, odp_actions->size)) {
b95fc6ba 4401 free(subfacet->actions);
5fe20d5d
BP
4402 subfacet->actions_len = odp_actions->size;
4403 subfacet->actions = xmemdup(odp_actions->data, odp_actions->size);
b95fc6ba 4404 }
b95fc6ba
BP
4405}
4406
b0f7b9b5
BP
4407/* Updates 'subfacet''s datapath flow, setting its actions to 'actions_len'
4408 * bytes of actions in 'actions'. If 'stats' is non-null, statistics counters
4409 * in the datapath will be zeroed and 'stats' will be updated with traffic new
4410 * since 'subfacet' was last updated.
4411 *
4412 * Returns 0 if successful, otherwise a positive errno value. */
4413static int
15baa734 4414subfacet_install(struct subfacet *subfacet,
b0f7b9b5 4415 const struct nlattr *actions, size_t actions_len,
6a7e895f
BP
4416 struct dpif_flow_stats *stats,
4417 enum slow_path_reason slow)
b0f7b9b5 4418{
15baa734
BP
4419 struct facet *facet = subfacet->facet;
4420 struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
6a7e895f
BP
4421 enum subfacet_path path = subfacet_want_path(slow);
4422 uint64_t slow_path_stub[128 / 8];
b0f7b9b5
BP
4423 struct odputil_keybuf keybuf;
4424 enum dpif_flow_put_flags flags;
4425 struct ofpbuf key;
4426 int ret;
4427
4428 flags = DPIF_FP_CREATE | DPIF_FP_MODIFY;
4429 if (stats) {
4430 flags |= DPIF_FP_ZERO_STATS;
4431 }
4432
6a7e895f
BP
4433 if (path == SF_SLOW_PATH) {
4434 compose_slow_path(ofproto, &facet->flow, slow,
4435 slow_path_stub, sizeof slow_path_stub,
4436 &actions, &actions_len);
4437 }
4438
b0f7b9b5
BP
4439 subfacet_get_key(subfacet, &keybuf, &key);
4440 ret = dpif_flow_put(ofproto->dpif, flags, key.data, key.size,
4441 actions, actions_len, stats);
4442
4443 if (stats) {
4444 subfacet_reset_dp_stats(subfacet, stats);
4445 }
4446
6a7e895f
BP
4447 if (!ret) {
4448 subfacet->path = path;
4449 }
b0f7b9b5
BP
4450 return ret;
4451}
4452
6a7e895f
BP
4453static int
4454subfacet_reinstall(struct subfacet *subfacet, struct dpif_flow_stats *stats)
4455{
4456 return subfacet_install(subfacet, subfacet->actions, subfacet->actions_len,
4457 stats, subfacet->slow);
4458}
4459
b0f7b9b5
BP
4460/* If 'subfacet' is installed in the datapath, uninstalls it. */
4461static void
15baa734 4462subfacet_uninstall(struct subfacet *subfacet)
b0f7b9b5 4463{
6a7e895f 4464 if (subfacet->path != SF_NOT_INSTALLED) {
15baa734
BP
4465 struct rule_dpif *rule = subfacet->facet->rule;
4466 struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
b0f7b9b5
BP
4467 struct odputil_keybuf keybuf;
4468 struct dpif_flow_stats stats;
4469 struct ofpbuf key;
4470 int error;
4471
4472 subfacet_get_key(subfacet, &keybuf, &key);
15baa734 4473 error = dpif_flow_del(ofproto->dpif, key.data, key.size, &stats);
b0f7b9b5
BP
4474 subfacet_reset_dp_stats(subfacet, &stats);
4475 if (!error) {
15baa734 4476 subfacet_update_stats(subfacet, &stats);
b0f7b9b5 4477 }
6a7e895f 4478 subfacet->path = SF_NOT_INSTALLED;
b0f7b9b5
BP
4479 } else {
4480 assert(subfacet->dp_packet_count == 0);
4481 assert(subfacet->dp_byte_count == 0);
4482 }
4483}
4484
4485/* Resets 'subfacet''s datapath statistics counters. This should be called
4486 * when 'subfacet''s statistics are cleared in the datapath. If 'stats' is
4487 * non-null, it should contain the statistics returned by dpif when 'subfacet'
4488 * was reset in the datapath. 'stats' will be modified to include only
4489 * statistics new since 'subfacet' was last updated. */
4490static void
4491subfacet_reset_dp_stats(struct subfacet *subfacet,
4492 struct dpif_flow_stats *stats)
4493{
4494 if (stats
4495 && subfacet->dp_packet_count <= stats->n_packets
4496 && subfacet->dp_byte_count <= stats->n_bytes) {
4497 stats->n_packets -= subfacet->dp_packet_count;
4498 stats->n_bytes -= subfacet->dp_byte_count;
4499 }
4500
4501 subfacet->dp_packet_count = 0;
4502 subfacet->dp_byte_count = 0;
4503}
4504
4505/* Updates 'subfacet''s used time. The caller is responsible for calling
4506 * facet_push_stats() to update the flows which 'subfacet' resubmits into. */
4507static void
15baa734 4508subfacet_update_time(struct subfacet *subfacet, long long int used)
b0f7b9b5
BP
4509{
4510 if (used > subfacet->used) {
4511 subfacet->used = used;
15baa734 4512 facet_update_time(subfacet->facet, used);
b0f7b9b5
BP
4513 }
4514}
4515
4516/* Folds the statistics from 'stats' into the counters in 'subfacet'.
4517 *
4518 * Because of the meaning of a subfacet's counters, it only makes sense to do
4519 * this if 'stats' are not tracked in the datapath, that is, if 'stats'
4520 * represents a packet that was sent by hand or if it represents statistics
4521 * that have been cleared out of the datapath. */
4522static void
15baa734 4523subfacet_update_stats(struct subfacet *subfacet,
b0f7b9b5
BP
4524 const struct dpif_flow_stats *stats)
4525{
4526 if (stats->n_packets || stats->used > subfacet->used) {
4527 struct facet *facet = subfacet->facet;
4528
15baa734 4529 subfacet_update_time(subfacet, stats->used);
b0f7b9b5
BP
4530 facet->packet_count += stats->n_packets;
4531 facet->byte_count += stats->n_bytes;
0e553d9c 4532 facet->tcp_flags |= stats->tcp_flags;
b0f7b9b5
BP
4533 facet_push_stats(facet);
4534 netflow_flow_update_flags(&facet->nf_flow, stats->tcp_flags);
4535 }
4536}
4537\f
abe529af
BP
4538/* Rules. */
4539
4540static struct rule_dpif *
c57b2226
BP
4541rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow)
4542{
4543 struct ofport_dpif *port;
4544 struct rule_dpif *rule;
4545
4546 rule = rule_dpif_lookup__(ofproto, flow, 0);
4547 if (rule) {
4548 return rule;
4549 }
4550
4551 port = get_ofp_port(ofproto, flow->in_port);
4552 if (!port) {
4553 VLOG_WARN_RL(&rl, "packet-in on unknown port %"PRIu16, flow->in_port);
4554 return ofproto->miss_rule;
4555 }
4556
4557 if (port->up.pp.config & OFPUTIL_PC_NO_PACKET_IN) {
4558 return ofproto->no_packet_in_rule;
4559 }
4560 return ofproto->miss_rule;
4561}
4562
4563static struct rule_dpif *
4564rule_dpif_lookup__(struct ofproto_dpif *ofproto, const struct flow *flow,
4565 uint8_t table_id)
abe529af 4566{
7257b535
BP
4567 struct cls_rule *cls_rule;
4568 struct classifier *cls;
4569
9cdaaebe
BP
4570 if (table_id >= N_TABLES) {
4571 return NULL;
4572 }
4573
d0918789 4574 cls = &ofproto->up.tables[table_id].cls;
eadef313 4575 if (flow->nw_frag & FLOW_NW_FRAG_ANY
7257b535
BP
4576 && ofproto->up.frag_handling == OFPC_FRAG_NORMAL) {
4577 /* For OFPC_NORMAL frag_handling, we must pretend that transport ports
4578 * are unavailable. */
4579 struct flow ofpc_normal_flow = *flow;
4580 ofpc_normal_flow.tp_src = htons(0);
4581 ofpc_normal_flow.tp_dst = htons(0);
4582 cls_rule = classifier_lookup(cls, &ofpc_normal_flow);
4583 } else {
4584 cls_rule = classifier_lookup(cls, flow);
4585 }
4586 return rule_dpif_cast(rule_from_cls_rule(cls_rule));
abe529af
BP
4587}
4588
7ee20df1
BP
4589static void
4590complete_operation(struct rule_dpif *rule)
4591{
4592 struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
4593
54a9cbc9 4594 rule_invalidate(rule);
7ee20df1
BP
4595 if (clogged) {
4596 struct dpif_completion *c = xmalloc(sizeof *c);
4597 c->op = rule->up.pending;
4598 list_push_back(&ofproto->completions, &c->list_node);
4599 } else {
4600 ofoperation_complete(rule->up.pending, 0);
4601 }
4602}
4603
abe529af
BP
4604static struct rule *
4605rule_alloc(void)
4606{
4607 struct rule_dpif *rule = xmalloc(sizeof *rule);
4608 return &rule->up;
4609}
4610
4611static void
4612rule_dealloc(struct rule *rule_)
4613{
4614 struct rule_dpif *rule = rule_dpif_cast(rule_);
4615 free(rule);
4616}
4617
90bf1e07 4618static enum ofperr
abe529af
BP
4619rule_construct(struct rule *rule_)
4620{
4621 struct rule_dpif *rule = rule_dpif_cast(rule_);
4622 struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
7ee20df1 4623 struct rule_dpif *victim;
54a9cbc9 4624 uint8_t table_id;
90bf1e07 4625 enum ofperr error;
5bf0e941 4626
f25d0cf3
BP
4627 error = ofpacts_check(rule->up.ofpacts, rule->up.ofpacts_len,
4628 &rule->up.cr.flow, ofproto->max_ports);
5bf0e941
BP
4629 if (error) {
4630 return error;
4631 }
abe529af 4632
abe529af
BP
4633 rule->packet_count = 0;
4634 rule->byte_count = 0;
abe529af 4635
7ee20df1
BP
4636 victim = rule_dpif_cast(ofoperation_get_victim(rule->up.pending));
4637 if (victim && !list_is_empty(&victim->facets)) {
4638 struct facet *facet;
4639
4640 rule->facets = victim->facets;
4641 list_moved(&rule->facets);
4642 LIST_FOR_EACH (facet, list_node, &rule->facets) {
bbb5d219
EJ
4643 /* XXX: We're only clearing our local counters here. It's possible
4644 * that quite a few packets are unaccounted for in the datapath
4645 * statistics. These will be accounted to the new rule instead of
4646 * cleared as required. This could be fixed by clearing out the
4647 * datapath statistics for this facet, but currently it doesn't
4648 * seem worth it. */
4649 facet_reset_counters(facet);
7ee20df1
BP
4650 facet->rule = rule;
4651 }
4652 } else {
4653 /* Must avoid list_moved() in this case. */
4654 list_init(&rule->facets);
4655 }
abe529af 4656
54a9cbc9
BP
4657 table_id = rule->up.table_id;
4658 rule->tag = (victim ? victim->tag
4659 : table_id == 0 ? 0
4660 : rule_calculate_tag(&rule->up.cr.flow, &rule->up.cr.wc,
4661 ofproto->tables[table_id].basis));
4662
7ee20df1 4663 complete_operation(rule);
abe529af
BP
4664 return 0;
4665}
4666
4667static void
4668rule_destruct(struct rule *rule_)
4669{
4670 struct rule_dpif *rule = rule_dpif_cast(rule_);
abe529af
BP
4671 struct facet *facet, *next_facet;
4672
abe529af 4673 LIST_FOR_EACH_SAFE (facet, next_facet, list_node, &rule->facets) {
15baa734 4674 facet_revalidate(facet);
abe529af 4675 }
7ee20df1
BP
4676
4677 complete_operation(rule);
abe529af
BP
4678}
4679
4680static void
4681rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes)
4682{
4683 struct rule_dpif *rule = rule_dpif_cast(rule_);
4684 struct facet *facet;
4685
4686 /* Start from historical data for 'rule' itself that are no longer tracked
4687 * in facets. This counts, for example, facets that have expired. */
4688 *packets = rule->packet_count;
4689 *bytes = rule->byte_count;
4690
4691 /* Add any statistics that are tracked by facets. This includes
4692 * statistical data recently updated by ofproto_update_stats() as well as
4693 * stats for packets that were executed "by hand" via dpif_execute(). */
4694 LIST_FOR_EACH (facet, list_node, &rule->facets) {
4695 *packets += facet->packet_count;
4696 *bytes += facet->byte_count;
4697 }
4698}
4699
90bf1e07 4700static enum ofperr
59d0f2c8
BP
4701rule_execute(struct rule *rule_, const struct flow *flow,
4702 struct ofpbuf *packet)
abe529af
BP
4703{
4704 struct rule_dpif *rule = rule_dpif_cast(rule_);
4705 struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
050ac423 4706
112bc5f4 4707 struct dpif_flow_stats stats;
050ac423 4708
abe529af 4709 struct action_xlate_ctx ctx;
050ac423
BP
4710 uint64_t odp_actions_stub[1024 / 8];
4711 struct ofpbuf odp_actions;
abe529af 4712
112bc5f4
BP
4713 dpif_flow_stats_extract(flow, packet, &stats);
4714 rule_credit_stats(rule, &stats);
4715
050ac423 4716 ofpbuf_use_stub(&odp_actions, odp_actions_stub, sizeof odp_actions_stub);
54834960 4717 action_xlate_ctx_init(&ctx, ofproto, flow, flow->vlan_tci,
112bc5f4
BP
4718 rule, stats.tcp_flags, packet);
4719 ctx.resubmit_stats = &stats;
f25d0cf3 4720 xlate_actions(&ctx, rule->up.ofpacts, rule->up.ofpacts_len, &odp_actions);
112bc5f4
BP
4721
4722 execute_odp_actions(ofproto, flow, odp_actions.data,
4723 odp_actions.size, packet);
4724
050ac423 4725 ofpbuf_uninit(&odp_actions);
5bf0e941
BP
4726
4727 return 0;
abe529af
BP
4728}
4729
7ee20df1
BP
4730static void
4731rule_modify_actions(struct rule *rule_)
abe529af
BP
4732{
4733 struct rule_dpif *rule = rule_dpif_cast(rule_);
4734 struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
90bf1e07 4735 enum ofperr error;
abe529af 4736
f25d0cf3
BP
4737 error = ofpacts_check(rule->up.ofpacts, rule->up.ofpacts_len,
4738 &rule->up.cr.flow, ofproto->max_ports);
7ee20df1
BP
4739 if (error) {
4740 ofoperation_complete(rule->up.pending, error);
4741 return;
abe529af 4742 }
7ee20df1
BP
4743
4744 complete_operation(rule);
abe529af
BP
4745}
4746\f
97d6520b 4747/* Sends 'packet' out 'ofport'.
52a90c29 4748 * May modify 'packet'.
abe529af
BP
4749 * Returns 0 if successful, otherwise a positive errno value. */
4750static int
52a90c29 4751send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet)
abe529af 4752{
97d6520b 4753 const struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
80e5eed9
BP
4754 struct ofpbuf key, odp_actions;
4755 struct odputil_keybuf keybuf;
52a90c29 4756 uint16_t odp_port;
80e5eed9 4757 struct flow flow;
abe529af
BP
4758 int error;
4759
88dbe0f6 4760 flow_extract(packet, 0, 0, 0, &flow);
52a90c29
BP
4761 odp_port = vsp_realdev_to_vlandev(ofproto, ofport->odp_port,
4762 flow.vlan_tci);
4763 if (odp_port != ofport->odp_port) {
4764 eth_pop_vlan(packet);
4765 flow.vlan_tci = htons(0);
4766 }
4767
80e5eed9
BP
4768 ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
4769 odp_flow_key_from_flow(&key, &flow);
4770
abe529af 4771 ofpbuf_init(&odp_actions, 32);
6ff686f2
PS
4772 compose_sflow_action(ofproto, &odp_actions, &flow, odp_port);
4773
df2c07f4 4774 nl_msg_put_u32(&odp_actions, OVS_ACTION_ATTR_OUTPUT, odp_port);
80e5eed9
BP
4775 error = dpif_execute(ofproto->dpif,
4776 key.data, key.size,
4777 odp_actions.data, odp_actions.size,
abe529af
BP
4778 packet);
4779 ofpbuf_uninit(&odp_actions);
4780
4781 if (error) {
4782 VLOG_WARN_RL(&rl, "%s: failed to send packet on port %"PRIu32" (%s)",
4783 ofproto->up.name, odp_port, strerror(error));
4784 }
6527c598 4785 ofproto_update_local_port_stats(ofport->up.ofproto, packet->size, 0);
abe529af
BP
4786 return error;
4787}
4788\f
df2c07f4 4789/* OpenFlow to datapath action translation. */
abe529af 4790
f25d0cf3
BP
4791static void do_xlate_actions(const struct ofpact *, size_t ofpacts_len,
4792 struct action_xlate_ctx *);
4cd78906 4793static void xlate_normal(struct action_xlate_ctx *);
abe529af 4794
6a7e895f
BP
4795/* Composes an ODP action for a "slow path" action for 'flow' within 'ofproto'.
4796 * The action will state 'slow' as the reason that the action is in the slow
4797 * path. (This is purely informational: it allows a human viewing "ovs-dpctl
4798 * dump-flows" output to see why a flow is in the slow path.)
4799 *
4800 * The 'stub_size' bytes in 'stub' will be used to store the action.
4801 * 'stub_size' must be large enough for the action.
4802 *
4803 * The action and its size will be stored in '*actionsp' and '*actions_lenp',
4804 * respectively. */
4805static void
4806compose_slow_path(const struct ofproto_dpif *ofproto, const struct flow *flow,
4807 enum slow_path_reason slow,
4808 uint64_t *stub, size_t stub_size,
4809 const struct nlattr **actionsp, size_t *actions_lenp)
4810{
4811 union user_action_cookie cookie;
4812 struct ofpbuf buf;
4813
4814 cookie.type = USER_ACTION_COOKIE_SLOW_PATH;
4815 cookie.slow_path.unused = 0;
4816 cookie.slow_path.reason = slow;
4817
4818 ofpbuf_use_stack(&buf, stub, stub_size);
625b0720
BP
4819 if (slow & (SLOW_CFM | SLOW_LACP | SLOW_STP)) {
4820 uint32_t pid = dpif_port_get_pid(ofproto->dpif, UINT16_MAX);
4821 odp_put_userspace_action(pid, &cookie, &buf);
4822 } else {
4823 put_userspace_action(ofproto, &buf, flow, &cookie);
4824 }
6a7e895f
BP
4825 *actionsp = buf.data;
4826 *actions_lenp = buf.size;
4827}
4828
98403001
BP
4829static size_t
4830put_userspace_action(const struct ofproto_dpif *ofproto,
4831 struct ofpbuf *odp_actions,
4832 const struct flow *flow,
1673e0e4 4833 const union user_action_cookie *cookie)
98403001 4834{
98403001
BP
4835 uint32_t pid;
4836
4837 pid = dpif_port_get_pid(ofproto->dpif,
4838 ofp_port_to_odp_port(flow->in_port));
4839
39db78a0 4840 return odp_put_userspace_action(pid, cookie, odp_actions);
98403001
BP
4841}
4842
36fc5f18
BP
4843static void
4844compose_sflow_cookie(const struct ofproto_dpif *ofproto,
4845 ovs_be16 vlan_tci, uint32_t odp_port,
1673e0e4 4846 unsigned int n_outputs, union user_action_cookie *cookie)
36fc5f18
BP
4847{
4848 int ifindex;
4849
4850 cookie->type = USER_ACTION_COOKIE_SFLOW;
1673e0e4 4851 cookie->sflow.vlan_tci = vlan_tci;
36fc5f18
BP
4852
4853 /* See http://www.sflow.org/sflow_version_5.txt (search for "Input/output
4854 * port information") for the interpretation of cookie->output. */
4855 switch (n_outputs) {
4856 case 0:
4857 /* 0x40000000 | 256 means "packet dropped for unknown reason". */
1673e0e4 4858 cookie->sflow.output = 0x40000000 | 256;
36fc5f18
BP
4859 break;
4860
4861 case 1:
4862 ifindex = dpif_sflow_odp_port_to_ifindex(ofproto->sflow, odp_port);
4863 if (ifindex) {
1673e0e4 4864 cookie->sflow.output = ifindex;
36fc5f18
BP
4865 break;
4866 }
4867 /* Fall through. */
4868 default:
4869 /* 0x80000000 means "multiple output ports. */
1673e0e4 4870 cookie->sflow.output = 0x80000000 | n_outputs;
36fc5f18
BP
4871 break;
4872 }
4873}
4874
6ff686f2
PS
4875/* Compose SAMPLE action for sFlow. */
4876static size_t
4877compose_sflow_action(const struct ofproto_dpif *ofproto,
4878 struct ofpbuf *odp_actions,
4879 const struct flow *flow,
4880 uint32_t odp_port)
4881{
6ff686f2 4882 uint32_t probability;
1673e0e4 4883 union user_action_cookie cookie;
6ff686f2 4884 size_t sample_offset, actions_offset;
36fc5f18 4885 int cookie_offset;
6ff686f2
PS
4886
4887 if (!ofproto->sflow || flow->in_port == OFPP_NONE) {
4888 return 0;
4889 }
4890
6ff686f2
PS
4891 sample_offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_SAMPLE);
4892
4893 /* Number of packets out of UINT_MAX to sample. */
4894 probability = dpif_sflow_get_probability(ofproto->sflow);
4895 nl_msg_put_u32(odp_actions, OVS_SAMPLE_ATTR_PROBABILITY, probability);
4896
4897 actions_offset = nl_msg_start_nested(odp_actions, OVS_SAMPLE_ATTR_ACTIONS);
36fc5f18
BP
4898 compose_sflow_cookie(ofproto, htons(0), odp_port,
4899 odp_port == OVSP_NONE ? 0 : 1, &cookie);
98403001 4900 cookie_offset = put_userspace_action(ofproto, odp_actions, flow, &cookie);
6ff686f2
PS
4901
4902 nl_msg_end_nested(odp_actions, actions_offset);
4903 nl_msg_end_nested(odp_actions, sample_offset);
98403001 4904 return cookie_offset;
6ff686f2
PS
4905}
4906
4907/* SAMPLE action must be first action in any given list of actions.
4908 * At this point we do not have all information required to build it. So try to
4909 * build sample action as complete as possible. */
4910static void
4911add_sflow_action(struct action_xlate_ctx *ctx)
4912{
4913 ctx->user_cookie_offset = compose_sflow_action(ctx->ofproto,
4914 ctx->odp_actions,
4915 &ctx->flow, OVSP_NONE);
4916 ctx->sflow_odp_port = 0;
4917 ctx->sflow_n_outputs = 0;
4918}
4919
4920/* Fix SAMPLE action according to data collected while composing ODP actions.
4921 * We need to fix SAMPLE actions OVS_SAMPLE_ATTR_ACTIONS attribute, i.e. nested
4922 * USERSPACE action's user-cookie which is required for sflow. */
4923static void
4924fix_sflow_action(struct action_xlate_ctx *ctx)
4925{
4926 const struct flow *base = &ctx->base_flow;
1673e0e4 4927 union user_action_cookie *cookie;
6ff686f2
PS
4928
4929 if (!ctx->user_cookie_offset) {
4930 return;
4931 }
4932
4933 cookie = ofpbuf_at(ctx->odp_actions, ctx->user_cookie_offset,
36fc5f18 4934 sizeof(*cookie));
6ff686f2
PS
4935 assert(cookie->type == USER_ACTION_COOKIE_SFLOW);
4936
36fc5f18
BP
4937 compose_sflow_cookie(ctx->ofproto, base->vlan_tci,
4938 ctx->sflow_odp_port, ctx->sflow_n_outputs, cookie);
6ff686f2
PS
4939}
4940
6ff686f2 4941static void
81b1afb1
EJ
4942compose_output_action__(struct action_xlate_ctx *ctx, uint16_t ofp_port,
4943 bool check_stp)
6ff686f2 4944{
d59906fb 4945 const struct ofport_dpif *ofport = get_ofp_port(ctx->ofproto, ofp_port);
5e48dc2b 4946 uint16_t odp_port = ofp_port_to_odp_port(ofp_port);
52a90c29 4947 ovs_be16 flow_vlan_tci = ctx->flow.vlan_tci;
8b36f51e 4948 uint8_t flow_nw_tos = ctx->flow.nw_tos;
52a90c29 4949 uint16_t out_port;
d59906fb 4950
81b1afb1 4951 if (ofport) {
8b36f51e
EJ
4952 struct priority_to_dscp *pdscp;
4953
9e1fd49b 4954 if (ofport->up.pp.config & OFPUTIL_PC_NO_FWD
81b1afb1
EJ
4955 || (check_stp && !stp_forward_in_state(ofport->stp_state))) {
4956 return;
4957 }
8b36f51e 4958
deedf7e7 4959 pdscp = get_priority(ofport, ctx->flow.skb_priority);
8b36f51e
EJ
4960 if (pdscp) {
4961 ctx->flow.nw_tos &= ~IP_DSCP_MASK;
4962 ctx->flow.nw_tos |= pdscp->dscp;
4963 }
81b1afb1
EJ
4964 } else {
4965 /* We may not have an ofport record for this port, but it doesn't hurt
4966 * to allow forwarding to it anyhow. Maybe such a port will appear
4967 * later and we're pre-populating the flow table. */
d59906fb
EJ
4968 }
4969
52a90c29
BP
4970 out_port = vsp_realdev_to_vlandev(ctx->ofproto, odp_port,
4971 ctx->flow.vlan_tci);
4972 if (out_port != odp_port) {
4973 ctx->flow.vlan_tci = htons(0);
4974 }
5bbda0aa 4975 commit_odp_actions(&ctx->flow, &ctx->base_flow, ctx->odp_actions);
52a90c29
BP
4976 nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_OUTPUT, out_port);
4977
6ff686f2
PS
4978 ctx->sflow_odp_port = odp_port;
4979 ctx->sflow_n_outputs++;
81b1afb1 4980 ctx->nf_output_iface = ofp_port;
52a90c29 4981 ctx->flow.vlan_tci = flow_vlan_tci;
8b36f51e 4982 ctx->flow.nw_tos = flow_nw_tos;
6ff686f2
PS
4983}
4984
abe529af 4985static void
5e48dc2b 4986compose_output_action(struct action_xlate_ctx *ctx, uint16_t ofp_port)
abe529af 4987{
81b1afb1 4988 compose_output_action__(ctx, ofp_port, true);
abe529af
BP
4989}
4990
4991static void
29901626
BP
4992xlate_table_action(struct action_xlate_ctx *ctx,
4993 uint16_t in_port, uint8_t table_id)
abe529af
BP
4994{
4995 if (ctx->recurse < MAX_RESUBMIT_RECURSION) {
54a9cbc9 4996 struct ofproto_dpif *ofproto = ctx->ofproto;
abe529af
BP
4997 struct rule_dpif *rule;
4998 uint16_t old_in_port;
29901626
BP
4999 uint8_t old_table_id;
5000
5001 old_table_id = ctx->table_id;
5002 ctx->table_id = table_id;
abe529af 5003
54a9cbc9 5004 /* Look up a flow with 'in_port' as the input port. */
abe529af
BP
5005 old_in_port = ctx->flow.in_port;
5006 ctx->flow.in_port = in_port;
c57b2226 5007 rule = rule_dpif_lookup__(ofproto, &ctx->flow, table_id);
54a9cbc9
BP
5008
5009 /* Tag the flow. */
5010 if (table_id > 0 && table_id < N_TABLES) {
5011 struct table_dpif *table = &ofproto->tables[table_id];
5012 if (table->other_table) {
33780682 5013 ctx->tags |= (rule && rule->tag
54a9cbc9
BP
5014 ? rule->tag
5015 : rule_calculate_tag(&ctx->flow,
5016 &table->other_table->wc,
5017 table->basis));
5018 }
5019 }
5020
5021 /* Restore the original input port. Otherwise OFPP_NORMAL and
5022 * OFPP_IN_PORT will have surprising behavior. */
abe529af
BP
5023 ctx->flow.in_port = old_in_port;
5024
5025 if (ctx->resubmit_hook) {
5026 ctx->resubmit_hook(ctx, rule);
5027 }
5028
5029 if (rule) {
18b2a258 5030 struct rule_dpif *old_rule = ctx->rule;
54834960 5031
112bc5f4
BP
5032 if (ctx->resubmit_stats) {
5033 rule_credit_stats(rule, ctx->resubmit_stats);
5034 }
5035
abe529af 5036 ctx->recurse++;
18b2a258 5037 ctx->rule = rule;
f25d0cf3 5038 do_xlate_actions(rule->up.ofpacts, rule->up.ofpacts_len, ctx);
18b2a258 5039 ctx->rule = old_rule;
abe529af
BP
5040 ctx->recurse--;
5041 }
29901626
BP
5042
5043 ctx->table_id = old_table_id;
abe529af
BP
5044 } else {
5045 static struct vlog_rate_limit recurse_rl = VLOG_RATE_LIMIT_INIT(1, 1);
5046
29901626 5047 VLOG_ERR_RL(&recurse_rl, "resubmit actions recursed over %d times",
abe529af 5048 MAX_RESUBMIT_RECURSION);
6a6455e5 5049 ctx->max_resubmit_trigger = true;
abe529af
BP
5050 }
5051}
5052
29901626 5053static void
f25d0cf3
BP
5054xlate_ofpact_resubmit(struct action_xlate_ctx *ctx,
5055 const struct ofpact_resubmit *resubmit)
29901626
BP
5056{
5057 uint16_t in_port;
5058 uint8_t table_id;
5059
f25d0cf3
BP
5060 in_port = resubmit->in_port;
5061 if (in_port == OFPP_IN_PORT) {
5062 in_port = ctx->flow.in_port;
5063 }
5064
5065 table_id = resubmit->table_id;
5066 if (table_id == 255) {
5067 table_id = ctx->table_id;
5068 }
29901626
BP
5069
5070 xlate_table_action(ctx, in_port, table_id);
5071}
5072
abe529af 5073static void
d59906fb 5074flood_packets(struct action_xlate_ctx *ctx, bool all)
abe529af
BP
5075{
5076 struct ofport_dpif *ofport;
5077
b3e9b2ed 5078 HMAP_FOR_EACH (ofport, up.hmap_node, &ctx->ofproto->up.ports) {
abe529af 5079 uint16_t ofp_port = ofport->up.ofp_port;
d59906fb
EJ
5080
5081 if (ofp_port == ctx->flow.in_port) {
5082 continue;
5083 }
5084
5e48dc2b 5085 if (all) {
81b1afb1 5086 compose_output_action__(ctx, ofp_port, false);
9e1fd49b 5087 } else if (!(ofport->up.pp.config & OFPUTIL_PC_NO_FLOOD)) {
5e48dc2b 5088 compose_output_action(ctx, ofp_port);
abe529af
BP
5089 }
5090 }
b3e9b2ed
EJ
5091
5092 ctx->nf_output_iface = NF_OUT_FLOOD;
abe529af
BP
5093}
5094
6ff686f2 5095static void
f0fd1a17 5096execute_controller_action(struct action_xlate_ctx *ctx, int len,
a7349929
BP
5097 enum ofp_packet_in_reason reason,
5098 uint16_t controller_id)
6ff686f2 5099{
999fba59
EJ
5100 struct ofputil_packet_in pin;
5101 struct ofpbuf *packet;
6ff686f2 5102
6a7e895f 5103 ctx->slow |= SLOW_CONTROLLER;
999fba59
EJ
5104 if (!ctx->packet) {
5105 return;
5106 }
5107
5108 packet = ofpbuf_clone(ctx->packet);
5109
5110 if (packet->l2 && packet->l3) {
5111 struct eth_header *eh;
5112
5113 eth_pop_vlan(packet);
5114 eh = packet->l2;
0104aba8
EJ
5115
5116 /* If the Ethernet type is less than ETH_TYPE_MIN, it's likely an 802.2
5117 * LLC frame. Calculating the Ethernet type of these frames is more
5118 * trouble than seems appropriate for a simple assertion. */
5119 assert(ntohs(eh->eth_type) < ETH_TYPE_MIN
5120 || eh->eth_type == ctx->flow.dl_type);
5121
999fba59
EJ
5122 memcpy(eh->eth_src, ctx->flow.dl_src, sizeof eh->eth_src);
5123 memcpy(eh->eth_dst, ctx->flow.dl_dst, sizeof eh->eth_dst);
5124
5125 if (ctx->flow.vlan_tci & htons(VLAN_CFI)) {
5126 eth_push_vlan(packet, ctx->flow.vlan_tci);
5127 }
5128
5129 if (packet->l4) {
5130 if (ctx->flow.dl_type == htons(ETH_TYPE_IP)) {
5131 packet_set_ipv4(packet, ctx->flow.nw_src, ctx->flow.nw_dst,
5132 ctx->flow.nw_tos, ctx->flow.nw_ttl);
5133 }
5134
5135 if (packet->l7) {
5136 if (ctx->flow.nw_proto == IPPROTO_TCP) {
5137 packet_set_tcp_port(packet, ctx->flow.tp_src,
5138 ctx->flow.tp_dst);
5139 } else if (ctx->flow.nw_proto == IPPROTO_UDP) {
5140 packet_set_udp_port(packet, ctx->flow.tp_src,
5141 ctx->flow.tp_dst);
5142 }
5143 }
5144 }
5145 }
5146
5147 pin.packet = packet->data;
5148 pin.packet_len = packet->size;
f0fd1a17 5149 pin.reason = reason;
a7349929 5150 pin.controller_id = controller_id;
54834960 5151 pin.table_id = ctx->table_id;
18b2a258 5152 pin.cookie = ctx->rule ? ctx->rule->up.flow_cookie : 0;
54834960 5153
999fba59 5154 pin.send_len = len;
999fba59
EJ
5155 flow_get_metadata(&ctx->flow, &pin.fmd);
5156
d8653c38 5157 connmgr_send_packet_in(ctx->ofproto->up.connmgr, &pin);
999fba59 5158 ofpbuf_delete(packet);
6ff686f2
PS
5159}
5160
f0fd1a17
PS
5161static bool
5162compose_dec_ttl(struct action_xlate_ctx *ctx)
5163{
5164 if (ctx->flow.dl_type != htons(ETH_TYPE_IP) &&
5165 ctx->flow.dl_type != htons(ETH_TYPE_IPV6)) {
5166 return false;
5167 }
5168
5169 if (ctx->flow.nw_ttl > 1) {
5170 ctx->flow.nw_ttl--;
5171 return false;
5172 } else {
a7349929 5173 execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL, 0);
f0fd1a17
PS
5174
5175 /* Stop processing for current table. */
5176 return true;
5177 }
5178}
5179
abe529af 5180static void
f25d0cf3
BP
5181xlate_output_action(struct action_xlate_ctx *ctx,
5182 uint16_t port, uint16_t max_len)
abe529af
BP
5183{
5184 uint16_t prev_nf_output_iface = ctx->nf_output_iface;
5185
5186 ctx->nf_output_iface = NF_OUT_DROP;
5187
5188 switch (port) {
5189 case OFPP_IN_PORT:
81b1afb1 5190 compose_output_action(ctx, ctx->flow.in_port);
abe529af
BP
5191 break;
5192 case OFPP_TABLE:
333be161 5193 xlate_table_action(ctx, ctx->flow.in_port, 0);
abe529af
BP
5194 break;
5195 case OFPP_NORMAL:
5196 xlate_normal(ctx);
5197 break;
5198 case OFPP_FLOOD:
d59906fb 5199 flood_packets(ctx, false);
abe529af
BP
5200 break;
5201 case OFPP_ALL:
d59906fb 5202 flood_packets(ctx, true);
abe529af
BP
5203 break;
5204 case OFPP_CONTROLLER:
a7349929 5205 execute_controller_action(ctx, max_len, OFPR_ACTION, 0);
abe529af 5206 break;
e81d2933
EJ
5207 case OFPP_NONE:
5208 break;
a0fbe94a 5209 case OFPP_LOCAL:
abe529af
BP
5210 default:
5211 if (port != ctx->flow.in_port) {
81b1afb1 5212 compose_output_action(ctx, port);
abe529af
BP
5213 }
5214 break;
5215 }
5216
5217 if (prev_nf_output_iface == NF_OUT_FLOOD) {
5218 ctx->nf_output_iface = NF_OUT_FLOOD;
5219 } else if (ctx->nf_output_iface == NF_OUT_DROP) {
5220 ctx->nf_output_iface = prev_nf_output_iface;
5221 } else if (prev_nf_output_iface != NF_OUT_DROP &&
5222 ctx->nf_output_iface != NF_OUT_FLOOD) {
5223 ctx->nf_output_iface = NF_OUT_MULTI;
5224 }
5225}
5226
f694937d
EJ
5227static void
5228xlate_output_reg_action(struct action_xlate_ctx *ctx,
f25d0cf3 5229 const struct ofpact_output_reg *or)
f694937d 5230{
f25d0cf3
BP
5231 uint64_t port = mf_get_subfield(&or->src, &ctx->flow);
5232 if (port <= UINT16_MAX) {
5233 xlate_output_action(ctx, port, or->max_len);
f694937d
EJ
5234 }
5235}
5236
abe529af
BP
5237static void
5238xlate_enqueue_action(struct action_xlate_ctx *ctx,
f25d0cf3 5239 const struct ofpact_enqueue *enqueue)
abe529af 5240{
f25d0cf3
BP
5241 uint16_t ofp_port = enqueue->port;
5242 uint32_t queue_id = enqueue->queue;
abff858b 5243 uint32_t flow_priority, priority;
abe529af
BP
5244 int error;
5245
f25d0cf3
BP
5246 /* Translate queue to priority. */
5247 error = dpif_queue_to_priority(ctx->ofproto->dpif, queue_id, &priority);
abe529af
BP
5248 if (error) {
5249 /* Fall back to ordinary output action. */
f25d0cf3 5250 xlate_output_action(ctx, enqueue->port, 0);
abe529af
BP
5251 return;
5252 }
5253
f25d0cf3 5254 /* Check output port. */
abe529af
BP
5255 if (ofp_port == OFPP_IN_PORT) {
5256 ofp_port = ctx->flow.in_port;
8ba855c1
BP
5257 } else if (ofp_port == ctx->flow.in_port) {
5258 return;
abe529af 5259 }
abe529af 5260
df2c07f4 5261 /* Add datapath actions. */
deedf7e7
BP
5262 flow_priority = ctx->flow.skb_priority;
5263 ctx->flow.skb_priority = priority;
81b1afb1 5264 compose_output_action(ctx, ofp_port);
deedf7e7 5265 ctx->flow.skb_priority = flow_priority;
abe529af
BP
5266
5267 /* Update NetFlow output port. */
5268 if (ctx->nf_output_iface == NF_OUT_DROP) {
4b23aebf 5269 ctx->nf_output_iface = ofp_port;
abe529af
BP
5270 } else if (ctx->nf_output_iface != NF_OUT_FLOOD) {
5271 ctx->nf_output_iface = NF_OUT_MULTI;
5272 }
5273}
5274
5275static void
f25d0cf3 5276xlate_set_queue_action(struct action_xlate_ctx *ctx, uint32_t queue_id)
abe529af 5277{
f25d0cf3 5278 uint32_t skb_priority;
abe529af 5279
f25d0cf3
BP
5280 if (!dpif_queue_to_priority(ctx->ofproto->dpif, queue_id, &skb_priority)) {
5281 ctx->flow.skb_priority = skb_priority;
5282 } else {
5283 /* Couldn't translate queue to a priority. Nothing to do. A warning
abe529af 5284 * has already been logged. */
abe529af 5285 }
abe529af
BP
5286}
5287
5288struct xlate_reg_state {
5289 ovs_be16 vlan_tci;
5290 ovs_be64 tun_id;
5291};
5292
abe529af
BP
5293static void
5294xlate_autopath(struct action_xlate_ctx *ctx,
f25d0cf3 5295 const struct ofpact_autopath *ap)
abe529af 5296{
f25d0cf3 5297 uint16_t ofp_port = ap->port;
abe529af
BP
5298 struct ofport_dpif *port = get_ofp_port(ctx->ofproto, ofp_port);
5299
5300 if (!port || !port->bundle) {
5301 ofp_port = OFPP_NONE;
5302 } else if (port->bundle->bond) {
5303 /* Autopath does not support VLAN hashing. */
5304 struct ofport_dpif *slave = bond_choose_output_slave(
dc155bff 5305 port->bundle->bond, &ctx->flow, 0, &ctx->tags);
abe529af
BP
5306 if (slave) {
5307 ofp_port = slave->up.ofp_port;
5308 }
5309 }
f25d0cf3 5310 nxm_reg_load(&ap->dst, ofp_port, &ctx->flow);
abe529af
BP
5311}
5312
daff3353
EJ
5313static bool
5314slave_enabled_cb(uint16_t ofp_port, void *ofproto_)
5315{
5316 struct ofproto_dpif *ofproto = ofproto_;
5317 struct ofport_dpif *port;
5318
5319 switch (ofp_port) {
5320 case OFPP_IN_PORT:
5321 case OFPP_TABLE:
5322 case OFPP_NORMAL:
5323 case OFPP_FLOOD:
5324 case OFPP_ALL:
439e4d8c 5325 case OFPP_NONE:
daff3353
EJ
5326 return true;
5327 case OFPP_CONTROLLER: /* Not supported by the bundle action. */
5328 return false;
5329 default:
5330 port = get_ofp_port(ofproto, ofp_port);
5331 return port ? port->may_enable : false;
5332 }
5333}
5334
f25d0cf3
BP
5335static void
5336xlate_bundle_action(struct action_xlate_ctx *ctx,
5337 const struct ofpact_bundle *bundle)
5338{
5339 uint16_t port;
5340
5341 port = bundle_execute(bundle, &ctx->flow, slave_enabled_cb, ctx->ofproto);
5342 if (bundle->dst.field) {
5343 nxm_reg_load(&bundle->dst, port, &ctx->flow);
5344 } else {
5345 xlate_output_action(ctx, port, 0);
5346 }
5347}
5348
75a75043
BP
5349static void
5350xlate_learn_action(struct action_xlate_ctx *ctx,
f25d0cf3 5351 const struct ofpact_learn *learn)
75a75043
BP
5352{
5353 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
5354 struct ofputil_flow_mod fm;
f25d0cf3
BP
5355 uint64_t ofpacts_stub[1024 / 8];
5356 struct ofpbuf ofpacts;
75a75043
BP
5357 int error;
5358
f25d0cf3
BP
5359 ofpbuf_use_stack(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
5360 learn_execute(learn, &ctx->flow, &fm, &ofpacts);
75a75043
BP
5361
5362 error = ofproto_flow_mod(&ctx->ofproto->up, &fm);
5363 if (error && !VLOG_DROP_WARN(&rl)) {
90bf1e07
BP
5364 VLOG_WARN("learning action failed to modify flow table (%s)",
5365 ofperr_get_name(error));
75a75043
BP
5366 }
5367
f25d0cf3 5368 ofpbuf_uninit(&ofpacts);
75a75043
BP
5369}
5370
0e553d9c
BP
5371/* Reduces '*timeout' to no more than 'max'. A value of zero in either case
5372 * means "infinite". */
5373static void
5374reduce_timeout(uint16_t max, uint16_t *timeout)
5375{
5376 if (max && (!*timeout || *timeout > max)) {
5377 *timeout = max;
5378 }
5379}
5380
5381static void
5382xlate_fin_timeout(struct action_xlate_ctx *ctx,
f25d0cf3 5383 const struct ofpact_fin_timeout *oft)
0e553d9c
BP
5384{
5385 if (ctx->tcp_flags & (TCP_FIN | TCP_RST) && ctx->rule) {
5386 struct rule_dpif *rule = ctx->rule;
5387
f25d0cf3
BP
5388 reduce_timeout(oft->fin_idle_timeout, &rule->up.idle_timeout);
5389 reduce_timeout(oft->fin_hard_timeout, &rule->up.hard_timeout);
0e553d9c
BP
5390 }
5391}
5392
21f7563c
JP
5393static bool
5394may_receive(const struct ofport_dpif *port, struct action_xlate_ctx *ctx)
5395{
9e1fd49b
BP
5396 if (port->up.pp.config & (eth_addr_equals(ctx->flow.dl_dst, eth_addr_stp)
5397 ? OFPUTIL_PC_NO_RECV_STP
5398 : OFPUTIL_PC_NO_RECV)) {
21f7563c
JP
5399 return false;
5400 }
5401
5402 /* Only drop packets here if both forwarding and learning are
5403 * disabled. If just learning is enabled, we need to have
5404 * OFPP_NORMAL and the learning action have a look at the packet
5405 * before we can drop it. */
5406 if (!stp_forward_in_state(port->stp_state)
5407 && !stp_learn_in_state(port->stp_state)) {
5408 return false;
5409 }
5410
5411 return true;
5412}
5413
abe529af 5414static void
f25d0cf3 5415do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
abe529af
BP
5416 struct action_xlate_ctx *ctx)
5417{
5418 const struct ofport_dpif *port;
254750ce 5419 bool was_evictable = true;
f25d0cf3 5420 const struct ofpact *a;
abe529af
BP
5421
5422 port = get_ofp_port(ctx->ofproto, ctx->flow.in_port);
21f7563c 5423 if (port && !may_receive(port, ctx)) {
abe529af
BP
5424 /* Drop this flow. */
5425 return;
5426 }
5427
254750ce
BP
5428 if (ctx->rule) {
5429 /* Don't let the rule we're working on get evicted underneath us. */
5430 was_evictable = ctx->rule->up.evictable;
5431 ctx->rule->up.evictable = false;
5432 }
f25d0cf3
BP
5433 OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
5434 struct ofpact_controller *controller;
38f2e360 5435
848e8809
EJ
5436 if (ctx->exit) {
5437 break;
5438 }
5439
f25d0cf3
BP
5440 switch (a->type) {
5441 case OFPACT_OUTPUT:
5442 xlate_output_action(ctx, ofpact_get_OUTPUT(a)->port,
5443 ofpact_get_OUTPUT(a)->max_len);
5444 break;
5445
5446 case OFPACT_CONTROLLER:
5447 controller = ofpact_get_CONTROLLER(a);
5448 execute_controller_action(ctx, controller->max_len,
5449 controller->reason,
5450 controller->controller_id);
5451 break;
690a61c5 5452
f25d0cf3
BP
5453 case OFPACT_ENQUEUE:
5454 xlate_enqueue_action(ctx, ofpact_get_ENQUEUE(a));
abe529af
BP
5455 break;
5456
f25d0cf3 5457 case OFPACT_SET_VLAN_VID:
abe529af 5458 ctx->flow.vlan_tci &= ~htons(VLAN_VID_MASK);
f25d0cf3
BP
5459 ctx->flow.vlan_tci |= (htons(ofpact_get_SET_VLAN_VID(a)->vlan_vid)
5460 | htons(VLAN_CFI));
abe529af
BP
5461 break;
5462
f25d0cf3 5463 case OFPACT_SET_VLAN_PCP:
abe529af 5464 ctx->flow.vlan_tci &= ~htons(VLAN_PCP_MASK);
f25d0cf3
BP
5465 ctx->flow.vlan_tci |= htons((ofpact_get_SET_VLAN_PCP(a)->vlan_pcp
5466 << VLAN_PCP_SHIFT)
5467 | VLAN_CFI);
abe529af
BP
5468 break;
5469
f25d0cf3 5470 case OFPACT_STRIP_VLAN:
abe529af 5471 ctx->flow.vlan_tci = htons(0);
abe529af
BP
5472 break;
5473
f25d0cf3
BP
5474 case OFPACT_SET_ETH_SRC:
5475 memcpy(ctx->flow.dl_src, ofpact_get_SET_ETH_SRC(a)->mac,
5476 ETH_ADDR_LEN);
abe529af
BP
5477 break;
5478
f25d0cf3
BP
5479 case OFPACT_SET_ETH_DST:
5480 memcpy(ctx->flow.dl_dst, ofpact_get_SET_ETH_DST(a)->mac,
5481 ETH_ADDR_LEN);
abe529af
BP
5482 break;
5483
f25d0cf3
BP
5484 case OFPACT_SET_IPV4_SRC:
5485 ctx->flow.nw_src = ofpact_get_SET_IPV4_SRC(a)->ipv4;
abe529af
BP
5486 break;
5487
f25d0cf3
BP
5488 case OFPACT_SET_IPV4_DST:
5489 ctx->flow.nw_dst = ofpact_get_SET_IPV4_DST(a)->ipv4;
abe529af
BP
5490 break;
5491
f25d0cf3 5492 case OFPACT_SET_IPV4_DSCP:
c4f2731d
PS
5493 /* OpenFlow 1.0 only supports IPv4. */
5494 if (ctx->flow.dl_type == htons(ETH_TYPE_IP)) {
5495 ctx->flow.nw_tos &= ~IP_DSCP_MASK;
f25d0cf3 5496 ctx->flow.nw_tos |= ofpact_get_SET_IPV4_DSCP(a)->dscp;
c4f2731d 5497 }
abe529af
BP
5498 break;
5499
f25d0cf3
BP
5500 case OFPACT_SET_L4_SRC_PORT:
5501 ctx->flow.tp_src = htons(ofpact_get_SET_L4_SRC_PORT(a)->port);
abe529af
BP
5502 break;
5503
f25d0cf3
BP
5504 case OFPACT_SET_L4_DST_PORT:
5505 ctx->flow.tp_dst = htons(ofpact_get_SET_L4_DST_PORT(a)->port);
abe529af
BP
5506 break;
5507
f25d0cf3
BP
5508 case OFPACT_RESUBMIT:
5509 xlate_ofpact_resubmit(ctx, ofpact_get_RESUBMIT(a));
38f2e360
BP
5510 break;
5511
f25d0cf3
BP
5512 case OFPACT_SET_TUNNEL:
5513 ctx->flow.tun_id = htonll(ofpact_get_SET_TUNNEL(a)->tun_id);
29901626
BP
5514 break;
5515
f25d0cf3
BP
5516 case OFPACT_SET_QUEUE:
5517 xlate_set_queue_action(ctx, ofpact_get_SET_QUEUE(a)->queue_id);
abe529af
BP
5518 break;
5519
f25d0cf3 5520 case OFPACT_POP_QUEUE:
deedf7e7 5521 ctx->flow.skb_priority = ctx->orig_skb_priority;
38f2e360
BP
5522 break;
5523
f25d0cf3
BP
5524 case OFPACT_REG_MOVE:
5525 nxm_execute_reg_move(ofpact_get_REG_MOVE(a), &ctx->flow);
38f2e360
BP
5526 break;
5527
f25d0cf3
BP
5528 case OFPACT_REG_LOAD:
5529 nxm_execute_reg_load(ofpact_get_REG_LOAD(a), &ctx->flow);
38f2e360
BP
5530 break;
5531
f25d0cf3
BP
5532 case OFPACT_DEC_TTL:
5533 if (compose_dec_ttl(ctx)) {
5534 goto out;
5535 }
38f2e360
BP
5536 break;
5537
f25d0cf3
BP
5538 case OFPACT_NOTE:
5539 /* Nothing to do. */
abe529af
BP
5540 break;
5541
f25d0cf3
BP
5542 case OFPACT_MULTIPATH:
5543 multipath_execute(ofpact_get_MULTIPATH(a), &ctx->flow);
abe529af 5544 break;
daff3353 5545
f25d0cf3
BP
5546 case OFPACT_AUTOPATH:
5547 xlate_autopath(ctx, ofpact_get_AUTOPATH(a));
daff3353 5548 break;
a368bb53 5549
f25d0cf3 5550 case OFPACT_BUNDLE:
a368bb53 5551 ctx->ofproto->has_bundle_action = true;
f25d0cf3 5552 xlate_bundle_action(ctx, ofpact_get_BUNDLE(a));
a368bb53 5553 break;
f694937d 5554
f25d0cf3
BP
5555 case OFPACT_OUTPUT_REG:
5556 xlate_output_reg_action(ctx, ofpact_get_OUTPUT_REG(a));
f694937d 5557 break;
75a75043 5558
f25d0cf3 5559 case OFPACT_LEARN:
75a75043 5560 ctx->has_learn = true;
3de9590b 5561 if (ctx->may_learn) {
f25d0cf3 5562 xlate_learn_action(ctx, ofpact_get_LEARN(a));
75a75043
BP
5563 }
5564 break;
848e8809 5565
f25d0cf3 5566 case OFPACT_EXIT:
848e8809
EJ
5567 ctx->exit = true;
5568 break;
0e553d9c 5569
f25d0cf3 5570 case OFPACT_FIN_TIMEOUT:
0e553d9c 5571 ctx->has_fin_timeout = true;
f25d0cf3 5572 xlate_fin_timeout(ctx, ofpact_get_FIN_TIMEOUT(a));
a7349929 5573 break;
abe529af
BP
5574 }
5575 }
21f7563c 5576
f0fd1a17 5577out:
21f7563c
JP
5578 /* We've let OFPP_NORMAL and the learning action look at the packet,
5579 * so drop it now if forwarding is disabled. */
5580 if (port && !stp_forward_in_state(port->stp_state)) {
5581 ofpbuf_clear(ctx->odp_actions);
5582 add_sflow_action(ctx);
5583 }
254750ce
BP
5584 if (ctx->rule) {
5585 ctx->rule->up.evictable = was_evictable;
5586 }
abe529af
BP
5587}
5588
5589static void
5590action_xlate_ctx_init(struct action_xlate_ctx *ctx,
5591 struct ofproto_dpif *ofproto, const struct flow *flow,
18b2a258 5592 ovs_be16 initial_tci, struct rule_dpif *rule,
0e553d9c 5593 uint8_t tcp_flags, const struct ofpbuf *packet)
abe529af
BP
5594{
5595 ctx->ofproto = ofproto;
5596 ctx->flow = *flow;
e84173dc
BP
5597 ctx->base_flow = ctx->flow;
5598 ctx->base_flow.tun_id = 0;
5599 ctx->base_flow.vlan_tci = initial_tci;
18b2a258 5600 ctx->rule = rule;
abe529af 5601 ctx->packet = packet;
3de9590b 5602 ctx->may_learn = packet != NULL;
0e553d9c 5603 ctx->tcp_flags = tcp_flags;
abe529af 5604 ctx->resubmit_hook = NULL;
479df176 5605 ctx->report_hook = NULL;
112bc5f4 5606 ctx->resubmit_stats = NULL;
abe529af
BP
5607}
5608
f25d0cf3
BP
5609/* Translates the 'ofpacts_len' bytes of "struct ofpacts" starting at 'ofpacts'
5610 * into datapath actions in 'odp_actions', using 'ctx'. */
050ac423 5611static void
abe529af 5612xlate_actions(struct action_xlate_ctx *ctx,
f25d0cf3 5613 const struct ofpact *ofpacts, size_t ofpacts_len,
050ac423 5614 struct ofpbuf *odp_actions)
abe529af 5615{
43d50bc8
BP
5616 /* Normally false. Set to true if we ever hit MAX_RESUBMIT_RECURSION, so
5617 * that in the future we always keep a copy of the original flow for
5618 * tracing purposes. */
5619 static bool hit_resubmit_limit;
5620
6a7e895f
BP
5621 enum slow_path_reason special;
5622
abe529af
BP
5623 COVERAGE_INC(ofproto_dpif_xlate);
5624
050ac423
BP
5625 ofpbuf_clear(odp_actions);
5626 ofpbuf_reserve(odp_actions, NL_A_U32_SIZE);
5627
5628 ctx->odp_actions = odp_actions;
97e42c92 5629 ctx->tags = 0;
6a7e895f 5630 ctx->slow = 0;
97e42c92
BP
5631 ctx->has_learn = false;
5632 ctx->has_normal = false;
0e553d9c 5633 ctx->has_fin_timeout = false;
97e42c92 5634 ctx->nf_output_iface = NF_OUT_DROP;
9d24de3b 5635 ctx->mirrors = 0;
97e42c92 5636 ctx->recurse = 0;
6a6455e5 5637 ctx->max_resubmit_trigger = false;
deedf7e7 5638 ctx->orig_skb_priority = ctx->flow.skb_priority;
97e42c92 5639 ctx->table_id = 0;
848e8809 5640 ctx->exit = false;
7257b535 5641
43d50bc8 5642 if (ctx->ofproto->has_mirrors || hit_resubmit_limit) {
ccb7c863
BP
5643 /* Do this conditionally because the copy is expensive enough that it
5644 * shows up in profiles.
5645 *
5646 * We keep orig_flow in 'ctx' only because I couldn't make GCC 4.4
5647 * believe that I wasn't using it without initializing it if I kept it
5648 * in a local variable. */
5649 ctx->orig_flow = ctx->flow;
5650 }
5651
eadef313 5652 if (ctx->flow.nw_frag & FLOW_NW_FRAG_ANY) {
7257b535
BP
5653 switch (ctx->ofproto->up.frag_handling) {
5654 case OFPC_FRAG_NORMAL:
5655 /* We must pretend that transport ports are unavailable. */
97e42c92
BP
5656 ctx->flow.tp_src = ctx->base_flow.tp_src = htons(0);
5657 ctx->flow.tp_dst = ctx->base_flow.tp_dst = htons(0);
7257b535
BP
5658 break;
5659
5660 case OFPC_FRAG_DROP:
050ac423 5661 return;
7257b535
BP
5662
5663 case OFPC_FRAG_REASM:
5664 NOT_REACHED();
5665
5666 case OFPC_FRAG_NX_MATCH:
5667 /* Nothing to do. */
5668 break;
f0fd1a17
PS
5669
5670 case OFPC_INVALID_TTL_TO_CONTROLLER:
5671 NOT_REACHED();
7257b535
BP
5672 }
5673 }
5674
6a7e895f
BP
5675 special = process_special(ctx->ofproto, &ctx->flow, ctx->packet);
5676 if (special) {
5677 ctx->slow |= special;
abe529af 5678 } else {
6a6455e5 5679 static struct vlog_rate_limit trace_rl = VLOG_RATE_LIMIT_INIT(1, 1);
6a6455e5
EJ
5680 ovs_be16 initial_tci = ctx->base_flow.vlan_tci;
5681
6ff686f2 5682 add_sflow_action(ctx);
f25d0cf3 5683 do_xlate_actions(ofpacts, ofpacts_len, ctx);
abe529af 5684
43d50bc8
BP
5685 if (ctx->max_resubmit_trigger && !ctx->resubmit_hook) {
5686 if (!hit_resubmit_limit) {
5687 /* We didn't record the original flow. Make sure we do from
5688 * now on. */
5689 hit_resubmit_limit = true;
5690 } else if (!VLOG_DROP_ERR(&trace_rl)) {
5691 struct ds ds = DS_EMPTY_INITIALIZER;
5692
5693 ofproto_trace(ctx->ofproto, &ctx->orig_flow, ctx->packet,
5694 initial_tci, &ds);
5695 VLOG_ERR("Trace triggered by excessive resubmit "
5696 "recursion:\n%s", ds_cstr(&ds));
5697 ds_destroy(&ds);
5698 }
6a6455e5
EJ
5699 }
5700
b6848f13
BP
5701 if (!connmgr_may_set_up_flow(ctx->ofproto->up.connmgr, &ctx->flow,
5702 ctx->odp_actions->data,
5703 ctx->odp_actions->size)) {
6a7e895f 5704 ctx->slow |= SLOW_IN_BAND;
b6848f13
BP
5705 if (ctx->packet
5706 && connmgr_msg_in_hook(ctx->ofproto->up.connmgr, &ctx->flow,
5707 ctx->packet)) {
5e48dc2b 5708 compose_output_action(ctx, OFPP_LOCAL);
b6848f13
BP
5709 }
5710 }
ccb7c863
BP
5711 if (ctx->ofproto->has_mirrors) {
5712 add_mirror_actions(ctx, &ctx->orig_flow);
5713 }
a7c4eaf6 5714 fix_sflow_action(ctx);
abe529af 5715 }
050ac423
BP
5716}
5717
f25d0cf3
BP
5718/* Translates the 'ofpacts_len' bytes of "struct ofpact"s starting at 'ofpacts'
5719 * into datapath actions, using 'ctx', and discards the datapath actions. */
050ac423
BP
5720static void
5721xlate_actions_for_side_effects(struct action_xlate_ctx *ctx,
f25d0cf3
BP
5722 const struct ofpact *ofpacts,
5723 size_t ofpacts_len)
050ac423
BP
5724{
5725 uint64_t odp_actions_stub[1024 / 8];
5726 struct ofpbuf odp_actions;
abe529af 5727
050ac423 5728 ofpbuf_use_stub(&odp_actions, odp_actions_stub, sizeof odp_actions_stub);
f25d0cf3 5729 xlate_actions(ctx, ofpacts, ofpacts_len, &odp_actions);
050ac423 5730 ofpbuf_uninit(&odp_actions);
abe529af 5731}
479df176
BP
5732
5733static void
5734xlate_report(struct action_xlate_ctx *ctx, const char *s)
5735{
5736 if (ctx->report_hook) {
5737 ctx->report_hook(ctx, s);
5738 }
5739}
abe529af
BP
5740\f
5741/* OFPP_NORMAL implementation. */
5742
abe529af
BP
5743static struct ofport_dpif *ofbundle_get_a_port(const struct ofbundle *);
5744
ecac4ebf
BP
5745/* Given 'vid', the VID obtained from the 802.1Q header that was received as
5746 * part of a packet (specify 0 if there was no 802.1Q header), and 'in_bundle',
5747 * the bundle on which the packet was received, returns the VLAN to which the
5748 * packet belongs.
5749 *
5750 * Both 'vid' and the return value are in the range 0...4095. */
5751static uint16_t
5752input_vid_to_vlan(const struct ofbundle *in_bundle, uint16_t vid)
5753{
5754 switch (in_bundle->vlan_mode) {
5755 case PORT_VLAN_ACCESS:
5756 return in_bundle->vlan;
5757 break;
5758
5759 case PORT_VLAN_TRUNK:
5760 return vid;
5761
5762 case PORT_VLAN_NATIVE_UNTAGGED:
5763 case PORT_VLAN_NATIVE_TAGGED:
5764 return vid ? vid : in_bundle->vlan;
5765
5766 default:
5767 NOT_REACHED();
5768 }
5769}
5770
5da5ec37
BP
5771/* Checks whether a packet with the given 'vid' may ingress on 'in_bundle'.
5772 * If so, returns true. Otherwise, returns false and, if 'warn' is true, logs
5773 * a warning.
5774 *
5775 * 'vid' should be the VID obtained from the 802.1Q header that was received as
5776 * part of a packet (specify 0 if there was no 802.1Q header), in the range
5777 * 0...4095. */
5778static bool
5779input_vid_is_valid(uint16_t vid, struct ofbundle *in_bundle, bool warn)
5780{
33158a18
JP
5781 /* Allow any VID on the OFPP_NONE port. */
5782 if (in_bundle == &ofpp_none_bundle) {
5783 return true;
5784 }
5785
5da5ec37
BP
5786 switch (in_bundle->vlan_mode) {
5787 case PORT_VLAN_ACCESS:
5788 if (vid) {
5789 if (warn) {
5790 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
5791 VLOG_WARN_RL(&rl, "bridge %s: dropping VLAN %"PRIu16" tagged "
5792 "packet received on port %s configured as VLAN "
5793 "%"PRIu16" access port",
5794 in_bundle->ofproto->up.name, vid,
5795 in_bundle->name, in_bundle->vlan);
5796 }
5797 return false;
5798 }
5799 return true;
5800
5801 case PORT_VLAN_NATIVE_UNTAGGED:
5802 case PORT_VLAN_NATIVE_TAGGED:
5803 if (!vid) {
5804 /* Port must always carry its native VLAN. */
5805 return true;
5806 }
5807 /* Fall through. */
5808 case PORT_VLAN_TRUNK:
5809 if (!ofbundle_includes_vlan(in_bundle, vid)) {
5810 if (warn) {
5811 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
5812 VLOG_WARN_RL(&rl, "bridge %s: dropping VLAN %"PRIu16" packet "
5813 "received on port %s not configured for trunking "
5814 "VLAN %"PRIu16,
5815 in_bundle->ofproto->up.name, vid,
5816 in_bundle->name, vid);
5817 }
5818 return false;
5819 }
5820 return true;
5821
5822 default:
5823 NOT_REACHED();
5824 }
5825
5826}
5827
ecac4ebf
BP
5828/* Given 'vlan', the VLAN that a packet belongs to, and
5829 * 'out_bundle', a bundle on which the packet is to be output, returns the VID
5830 * that should be included in the 802.1Q header. (If the return value is 0,
5831 * then the 802.1Q header should only be included in the packet if there is a
5832 * nonzero PCP.)
5833 *
5834 * Both 'vlan' and the return value are in the range 0...4095. */
5835static uint16_t
5836output_vlan_to_vid(const struct ofbundle *out_bundle, uint16_t vlan)
5837{
5838 switch (out_bundle->vlan_mode) {
5839 case PORT_VLAN_ACCESS:
5840 return 0;
5841
5842 case PORT_VLAN_TRUNK:
5843 case PORT_VLAN_NATIVE_TAGGED:
5844 return vlan;
5845
5846 case PORT_VLAN_NATIVE_UNTAGGED:
5847 return vlan == out_bundle->vlan ? 0 : vlan;
5848
5849 default:
5850 NOT_REACHED();
5851 }
5852}
5853
395e68ce
BP
5854static void
5855output_normal(struct action_xlate_ctx *ctx, const struct ofbundle *out_bundle,
5856 uint16_t vlan)
abe529af 5857{
395e68ce
BP
5858 struct ofport_dpif *port;
5859 uint16_t vid;
81b1afb1 5860 ovs_be16 tci, old_tci;
ecac4ebf 5861
395e68ce
BP
5862 vid = output_vlan_to_vid(out_bundle, vlan);
5863 if (!out_bundle->bond) {
5864 port = ofbundle_get_a_port(out_bundle);
5865 } else {
5866 port = bond_choose_output_slave(out_bundle->bond, &ctx->flow,
5867 vid, &ctx->tags);
5868 if (!port) {
5869 /* No slaves enabled, so drop packet. */
5870 return;
5871 }
5872 }
abe529af 5873
81b1afb1 5874 old_tci = ctx->flow.vlan_tci;
5e9ceccd
BP
5875 tci = htons(vid);
5876 if (tci || out_bundle->use_priority_tags) {
5877 tci |= ctx->flow.vlan_tci & htons(VLAN_PCP_MASK);
5878 if (tci) {
5879 tci |= htons(VLAN_CFI);
5880 }
395e68ce 5881 }
81b1afb1 5882 ctx->flow.vlan_tci = tci;
395e68ce 5883
5e48dc2b 5884 compose_output_action(ctx, port->up.ofp_port);
81b1afb1 5885 ctx->flow.vlan_tci = old_tci;
abe529af
BP
5886}
5887
5888static int
5889mirror_mask_ffs(mirror_mask_t mask)
5890{
5891 BUILD_ASSERT_DECL(sizeof(unsigned int) >= sizeof(mask));
5892 return ffs(mask);
5893}
5894
abe529af
BP
5895static bool
5896ofbundle_trunks_vlan(const struct ofbundle *bundle, uint16_t vlan)
5897{
ecac4ebf 5898 return (bundle->vlan_mode != PORT_VLAN_ACCESS
fc3d7408 5899 && (!bundle->trunks || bitmap_is_set(bundle->trunks, vlan)));
abe529af
BP
5900}
5901
5902static bool
5903ofbundle_includes_vlan(const struct ofbundle *bundle, uint16_t vlan)
5904{
5905 return vlan == bundle->vlan || ofbundle_trunks_vlan(bundle, vlan);
5906}
5907
5908/* Returns an arbitrary interface within 'bundle'. */
5909static struct ofport_dpif *
5910ofbundle_get_a_port(const struct ofbundle *bundle)
5911{
5912 return CONTAINER_OF(list_front(&bundle->ports),
5913 struct ofport_dpif, bundle_node);
5914}
5915
abe529af
BP
5916static bool
5917vlan_is_mirrored(const struct ofmirror *m, int vlan)
5918{
fc3d7408 5919 return !m->vlans || bitmap_is_set(m->vlans, vlan);
abe529af
BP
5920}
5921
5922static void
c06bba01 5923add_mirror_actions(struct action_xlate_ctx *ctx, const struct flow *orig_flow)
abe529af
BP
5924{
5925 struct ofproto_dpif *ofproto = ctx->ofproto;
5926 mirror_mask_t mirrors;
c06bba01
JP
5927 struct ofbundle *in_bundle;
5928 uint16_t vlan;
5929 uint16_t vid;
5930 const struct nlattr *a;
5931 size_t left;
5932
3581c12c 5933 in_bundle = lookup_input_bundle(ctx->ofproto, orig_flow->in_port,
70c2fd56 5934 ctx->packet != NULL, NULL);
3581c12c 5935 if (!in_bundle) {
c06bba01
JP
5936 return;
5937 }
c06bba01
JP
5938 mirrors = in_bundle->src_mirrors;
5939
5940 /* Drop frames on bundles reserved for mirroring. */
5941 if (in_bundle->mirror_out) {
5942 if (ctx->packet != NULL) {
5943 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
5944 VLOG_WARN_RL(&rl, "bridge %s: dropping packet received on port "
5945 "%s, which is reserved exclusively for mirroring",
5946 ctx->ofproto->up.name, in_bundle->name);
5947 }
5948 return;
5949 }
5950
5951 /* Check VLAN. */
5952 vid = vlan_tci_to_vid(orig_flow->vlan_tci);
5953 if (!input_vid_is_valid(vid, in_bundle, ctx->packet != NULL)) {
5954 return;
5955 }
5956 vlan = input_vid_to_vlan(in_bundle, vid);
5957
5958 /* Look at the output ports to check for destination selections. */
5959
5960 NL_ATTR_FOR_EACH (a, left, ctx->odp_actions->data,
5961 ctx->odp_actions->size) {
5962 enum ovs_action_attr type = nl_attr_type(a);
5963 struct ofport_dpif *ofport;
5964
5965 if (type != OVS_ACTION_ATTR_OUTPUT) {
5966 continue;
5967 }
5968
5969 ofport = get_odp_port(ofproto, nl_attr_get_u32(a));
521472bc
BP
5970 if (ofport && ofport->bundle) {
5971 mirrors |= ofport->bundle->dst_mirrors;
5972 }
c06bba01 5973 }
abe529af
BP
5974
5975 if (!mirrors) {
5976 return;
5977 }
5978
c06bba01
JP
5979 /* Restore the original packet before adding the mirror actions. */
5980 ctx->flow = *orig_flow;
5981
9ba15e2a
BP
5982 while (mirrors) {
5983 struct ofmirror *m;
9ba15e2a
BP
5984
5985 m = ofproto->mirrors[mirror_mask_ffs(mirrors) - 1];
5986
5987 if (!vlan_is_mirrored(m, vlan)) {
5988 mirrors &= mirrors - 1;
5989 continue;
5990 }
5991
5992 mirrors &= ~m->dup_mirrors;
9d24de3b 5993 ctx->mirrors |= m->dup_mirrors;
9ba15e2a 5994 if (m->out) {
395e68ce 5995 output_normal(ctx, m->out, vlan);
614ec445
EJ
5996 } else if (vlan != m->out_vlan
5997 && !eth_addr_is_reserved(orig_flow->dl_dst)) {
9ba15e2a
BP
5998 struct ofbundle *bundle;
5999
6000 HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
6001 if (ofbundle_includes_vlan(bundle, m->out_vlan)
395e68ce
BP
6002 && !bundle->mirror_out) {
6003 output_normal(ctx, bundle, m->out_vlan);
abe529af
BP
6004 }
6005 }
6006 }
abe529af
BP
6007 }
6008}
6009
9d24de3b
JP
6010static void
6011update_mirror_stats(struct ofproto_dpif *ofproto, mirror_mask_t mirrors,
6012 uint64_t packets, uint64_t bytes)
6013{
6014 if (!mirrors) {
6015 return;
6016 }
6017
6018 for (; mirrors; mirrors &= mirrors - 1) {
6019 struct ofmirror *m;
6020
6021 m = ofproto->mirrors[mirror_mask_ffs(mirrors) - 1];
6022
6023 if (!m) {
6024 /* In normal circumstances 'm' will not be NULL. However,
6025 * if mirrors are reconfigured, we can temporarily get out
6026 * of sync in facet_revalidate(). We could "correct" the
6027 * mirror list before reaching here, but doing that would
6028 * not properly account the traffic stats we've currently
6029 * accumulated for previous mirror configuration. */
6030 continue;
6031 }
6032
6033 m->packet_count += packets;
6034 m->byte_count += bytes;
6035 }
6036}
6037
abe529af
BP
6038/* A VM broadcasts a gratuitous ARP to indicate that it has resumed after
6039 * migration. Older Citrix-patched Linux DomU used gratuitous ARP replies to
6040 * indicate this; newer upstream kernels use gratuitous ARP requests. */
6041static bool
6042is_gratuitous_arp(const struct flow *flow)
6043{
6044 return (flow->dl_type == htons(ETH_TYPE_ARP)
6045 && eth_addr_is_broadcast(flow->dl_dst)
6046 && (flow->nw_proto == ARP_OP_REPLY
6047 || (flow->nw_proto == ARP_OP_REQUEST
6048 && flow->nw_src == flow->nw_dst)));
6049}
6050
6051static void
6052update_learning_table(struct ofproto_dpif *ofproto,
6053 const struct flow *flow, int vlan,
6054 struct ofbundle *in_bundle)
6055{
6056 struct mac_entry *mac;
6057
33158a18
JP
6058 /* Don't learn the OFPP_NONE port. */
6059 if (in_bundle == &ofpp_none_bundle) {
6060 return;
6061 }
6062
abe529af
BP
6063 if (!mac_learning_may_learn(ofproto->ml, flow->dl_src, vlan)) {
6064 return;
6065 }
6066
6067 mac = mac_learning_insert(ofproto->ml, flow->dl_src, vlan);
6068 if (is_gratuitous_arp(flow)) {
6069 /* We don't want to learn from gratuitous ARP packets that are
6070 * reflected back over bond slaves so we lock the learning table. */
6071 if (!in_bundle->bond) {
6072 mac_entry_set_grat_arp_lock(mac);
6073 } else if (mac_entry_is_grat_arp_locked(mac)) {
6074 return;
6075 }
6076 }
6077
6078 if (mac_entry_is_new(mac) || mac->port.p != in_bundle) {
6079 /* The log messages here could actually be useful in debugging,
6080 * so keep the rate limit relatively high. */
6081 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300);
6082 VLOG_DBG_RL(&rl, "bridge %s: learned that "ETH_ADDR_FMT" is "
6083 "on port %s in VLAN %d",
6084 ofproto->up.name, ETH_ADDR_ARGS(flow->dl_src),
6085 in_bundle->name, vlan);
6086
6087 mac->port.p = in_bundle;
6088 tag_set_add(&ofproto->revalidate_set,
6089 mac_learning_changed(ofproto->ml, mac));
6090 }
6091}
6092
3581c12c 6093static struct ofbundle *
4acbc98d
SH
6094lookup_input_bundle(const struct ofproto_dpif *ofproto, uint16_t in_port,
6095 bool warn, struct ofport_dpif **in_ofportp)
395e68ce
BP
6096{
6097 struct ofport_dpif *ofport;
6098
6099 /* Find the port and bundle for the received packet. */
6100 ofport = get_ofp_port(ofproto, in_port);
70c2fd56
BP
6101 if (in_ofportp) {
6102 *in_ofportp = ofport;
6103 }
395e68ce 6104 if (ofport && ofport->bundle) {
3581c12c 6105 return ofport->bundle;
395e68ce
BP
6106 }
6107
70c2fd56
BP
6108 /* Special-case OFPP_NONE, which a controller may use as the ingress
6109 * port for traffic that it is sourcing. */
6110 if (in_port == OFPP_NONE) {
6111 return &ofpp_none_bundle;
6112 }
6113
395e68ce
BP
6114 /* Odd. A few possible reasons here:
6115 *
6116 * - We deleted a port but there are still a few packets queued up
6117 * from it.
6118 *
6119 * - Someone externally added a port (e.g. "ovs-dpctl add-if") that
6120 * we don't know about.
6121 *
6122 * - The ofproto client didn't configure the port as part of a bundle.
6b803ddc
EJ
6123 * This is particularly likely to happen if a packet was received on the
6124 * port after it was created, but before the client had a chance to
6125 * configure its bundle.
395e68ce
BP
6126 */
6127 if (warn) {
6128 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
6129
6130 VLOG_WARN_RL(&rl, "bridge %s: received packet on unknown "
6131 "port %"PRIu16, ofproto->up.name, in_port);
6132 }
6133 return NULL;
6134}
6135
5da5ec37 6136/* Determines whether packets in 'flow' within 'ofproto' should be forwarded or
abe529af
BP
6137 * dropped. Returns true if they may be forwarded, false if they should be
6138 * dropped.
6139 *
395e68ce
BP
6140 * 'in_port' must be the ofport_dpif that corresponds to flow->in_port.
6141 * 'in_port' must be part of a bundle (e.g. in_port->bundle must be nonnull).
abe529af 6142 *
395e68ce
BP
6143 * 'vlan' must be the VLAN that corresponds to flow->vlan_tci on 'in_port', as
6144 * returned by input_vid_to_vlan(). It must be a valid VLAN for 'in_port', as
6145 * checked by input_vid_is_valid().
abe529af
BP
6146 *
6147 * May also add tags to '*tags', although the current implementation only does
6148 * so in one special case.
6149 */
6150static bool
479df176
BP
6151is_admissible(struct action_xlate_ctx *ctx, struct ofport_dpif *in_port,
6152 uint16_t vlan)
abe529af 6153{
479df176
BP
6154 struct ofproto_dpif *ofproto = ctx->ofproto;
6155 struct flow *flow = &ctx->flow;
395e68ce 6156 struct ofbundle *in_bundle = in_port->bundle;
abe529af 6157
395e68ce
BP
6158 /* Drop frames for reserved multicast addresses
6159 * only if forward_bpdu option is absent. */
614ec445 6160 if (!ofproto->up.forward_bpdu && eth_addr_is_reserved(flow->dl_dst)) {
479df176 6161 xlate_report(ctx, "packet has reserved destination MAC, dropping");
abe529af
BP
6162 return false;
6163 }
6164
abe529af
BP
6165 if (in_bundle->bond) {
6166 struct mac_entry *mac;
6167
6168 switch (bond_check_admissibility(in_bundle->bond, in_port,
479df176 6169 flow->dl_dst, &ctx->tags)) {
abe529af
BP
6170 case BV_ACCEPT:
6171 break;
6172
6173 case BV_DROP:
479df176 6174 xlate_report(ctx, "bonding refused admissibility, dropping");
abe529af
BP
6175 return false;
6176
6177 case BV_DROP_IF_MOVED:
6178 mac = mac_learning_lookup(ofproto->ml, flow->dl_src, vlan, NULL);
6179 if (mac && mac->port.p != in_bundle &&
6180 (!is_gratuitous_arp(flow)
6181 || mac_entry_is_grat_arp_locked(mac))) {
479df176
BP
6182 xlate_report(ctx, "SLB bond thinks this packet looped back, "
6183 "dropping");
abe529af
BP
6184 return false;
6185 }
6186 break;
6187 }
6188 }
6189
6190 return true;
6191}
6192
4cd78906 6193static void
abe529af
BP
6194xlate_normal(struct action_xlate_ctx *ctx)
6195{
395e68ce 6196 struct ofport_dpif *in_port;
abe529af 6197 struct ofbundle *in_bundle;
abe529af 6198 struct mac_entry *mac;
395e68ce
BP
6199 uint16_t vlan;
6200 uint16_t vid;
abe529af 6201
75a75043
BP
6202 ctx->has_normal = true;
6203
3581c12c 6204 in_bundle = lookup_input_bundle(ctx->ofproto, ctx->flow.in_port,
70c2fd56 6205 ctx->packet != NULL, &in_port);
3581c12c 6206 if (!in_bundle) {
479df176 6207 xlate_report(ctx, "no input bundle, dropping");
395e68ce
BP
6208 return;
6209 }
3581c12c 6210
395e68ce
BP
6211 /* Drop malformed frames. */
6212 if (ctx->flow.dl_type == htons(ETH_TYPE_VLAN) &&
6213 !(ctx->flow.vlan_tci & htons(VLAN_CFI))) {
6214 if (ctx->packet != NULL) {
6215 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
6216 VLOG_WARN_RL(&rl, "bridge %s: dropping packet with partial "
6217 "VLAN tag received on port %s",
6218 ctx->ofproto->up.name, in_bundle->name);
6219 }
479df176 6220 xlate_report(ctx, "partial VLAN tag, dropping");
395e68ce
BP
6221 return;
6222 }
6223
6224 /* Drop frames on bundles reserved for mirroring. */
6225 if (in_bundle->mirror_out) {
6226 if (ctx->packet != NULL) {
6227 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
6228 VLOG_WARN_RL(&rl, "bridge %s: dropping packet received on port "
6229 "%s, which is reserved exclusively for mirroring",
6230 ctx->ofproto->up.name, in_bundle->name);
6231 }
479df176 6232 xlate_report(ctx, "input port is mirror output port, dropping");
395e68ce
BP
6233 return;
6234 }
6235
6236 /* Check VLAN. */
6237 vid = vlan_tci_to_vid(ctx->flow.vlan_tci);
6238 if (!input_vid_is_valid(vid, in_bundle, ctx->packet != NULL)) {
479df176 6239 xlate_report(ctx, "disallowed VLAN VID for this input port, dropping");
395e68ce
BP
6240 return;
6241 }
6242 vlan = input_vid_to_vlan(in_bundle, vid);
6243
6244 /* Check other admissibility requirements. */
479df176 6245 if (in_port && !is_admissible(ctx, in_port, vlan)) {
395e68ce 6246 return;
abe529af
BP
6247 }
6248
75a75043 6249 /* Learn source MAC. */
3de9590b 6250 if (ctx->may_learn) {
abe529af
BP
6251 update_learning_table(ctx->ofproto, &ctx->flow, vlan, in_bundle);
6252 }
6253
6254 /* Determine output bundle. */
6255 mac = mac_learning_lookup(ctx->ofproto->ml, ctx->flow.dl_dst, vlan,
6256 &ctx->tags);
6257 if (mac) {
c06bba01 6258 if (mac->port.p != in_bundle) {
479df176 6259 xlate_report(ctx, "forwarding to learned port");
c06bba01 6260 output_normal(ctx, mac->port.p, vlan);
479df176
BP
6261 } else {
6262 xlate_report(ctx, "learned port is input port, dropping");
c06bba01 6263 }
abe529af 6264 } else {
c06bba01 6265 struct ofbundle *bundle;
abe529af 6266
479df176 6267 xlate_report(ctx, "no learned MAC for destination, flooding");
c06bba01
JP
6268 HMAP_FOR_EACH (bundle, hmap_node, &ctx->ofproto->bundles) {
6269 if (bundle != in_bundle
6270 && ofbundle_includes_vlan(bundle, vlan)
6271 && bundle->floodable
6272 && !bundle->mirror_out) {
6273 output_normal(ctx, bundle, vlan);
6274 }
6275 }
6276 ctx->nf_output_iface = NF_OUT_FLOOD;
abe529af 6277 }
abe529af
BP
6278}
6279\f
54a9cbc9
BP
6280/* Optimized flow revalidation.
6281 *
6282 * It's a difficult problem, in general, to tell which facets need to have
6283 * their actions recalculated whenever the OpenFlow flow table changes. We
6284 * don't try to solve that general problem: for most kinds of OpenFlow flow
6285 * table changes, we recalculate the actions for every facet. This is
6286 * relatively expensive, but it's good enough if the OpenFlow flow table
6287 * doesn't change very often.
6288 *
6289 * However, we can expect one particular kind of OpenFlow flow table change to
6290 * happen frequently: changes caused by MAC learning. To avoid wasting a lot
6291 * of CPU on revalidating every facet whenever MAC learning modifies the flow
6292 * table, we add a special case that applies to flow tables in which every rule
6293 * has the same form (that is, the same wildcards), except that the table is
6294 * also allowed to have a single "catch-all" flow that matches all packets. We
6295 * optimize this case by tagging all of the facets that resubmit into the table
6296 * and invalidating the same tag whenever a flow changes in that table. The
6297 * end result is that we revalidate just the facets that need it (and sometimes
6298 * a few more, but not all of the facets or even all of the facets that
6299 * resubmit to the table modified by MAC learning). */
6300
6301/* Calculates the tag to use for 'flow' and wildcards 'wc' when it is inserted
6302 * into an OpenFlow table with the given 'basis'. */
822d9414 6303static tag_type
54a9cbc9
BP
6304rule_calculate_tag(const struct flow *flow, const struct flow_wildcards *wc,
6305 uint32_t secret)
6306{
6307 if (flow_wildcards_is_catchall(wc)) {
6308 return 0;
6309 } else {
6310 struct flow tag_flow = *flow;
6311 flow_zero_wildcards(&tag_flow, wc);
6312 return tag_create_deterministic(flow_hash(&tag_flow, secret));
6313 }
6314}
6315
6316/* Following a change to OpenFlow table 'table_id' in 'ofproto', update the
6317 * taggability of that table.
6318 *
6319 * This function must be called after *each* change to a flow table. If you
6320 * skip calling it on some changes then the pointer comparisons at the end can
6321 * be invalid if you get unlucky. For example, if a flow removal causes a
6322 * cls_table to be destroyed and then a flow insertion causes a cls_table with
6323 * different wildcards to be created with the same address, then this function
6324 * will incorrectly skip revalidation. */
6325static void
6326table_update_taggable(struct ofproto_dpif *ofproto, uint8_t table_id)
6327{
6328 struct table_dpif *table = &ofproto->tables[table_id];
d0918789 6329 const struct oftable *oftable = &ofproto->up.tables[table_id];
54a9cbc9
BP
6330 struct cls_table *catchall, *other;
6331 struct cls_table *t;
6332
6333 catchall = other = NULL;
6334
d0918789 6335 switch (hmap_count(&oftable->cls.tables)) {
54a9cbc9
BP
6336 case 0:
6337 /* We could tag this OpenFlow table but it would make the logic a
6338 * little harder and it's a corner case that doesn't seem worth it
6339 * yet. */
6340 break;
6341
6342 case 1:
6343 case 2:
d0918789 6344 HMAP_FOR_EACH (t, hmap_node, &oftable->cls.tables) {
54a9cbc9
BP
6345 if (cls_table_is_catchall(t)) {
6346 catchall = t;
6347 } else if (!other) {
6348 other = t;
6349 } else {
6350 /* Indicate that we can't tag this by setting both tables to
6351 * NULL. (We know that 'catchall' is already NULL.) */
6352 other = NULL;
6353 }
6354 }
6355 break;
6356
6357 default:
6358 /* Can't tag this table. */
6359 break;
6360 }
6361
6362 if (table->catchall_table != catchall || table->other_table != other) {
6363 table->catchall_table = catchall;
6364 table->other_table = other;
3c4a309c 6365 ofproto->need_revalidate = REV_FLOW_TABLE;
54a9cbc9
BP
6366 }
6367}
6368
6369/* Given 'rule' that has changed in some way (either it is a rule being
6370 * inserted, a rule being deleted, or a rule whose actions are being
6371 * modified), marks facets for revalidation to ensure that packets will be
6372 * forwarded correctly according to the new state of the flow table.
6373 *
6374 * This function must be called after *each* change to a flow table. See
6375 * the comment on table_update_taggable() for more information. */
6376static void
6377rule_invalidate(const struct rule_dpif *rule)
6378{
6379 struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
6380
6381 table_update_taggable(ofproto, rule->up.table_id);
6382
6383 if (!ofproto->need_revalidate) {
6384 struct table_dpif *table = &ofproto->tables[rule->up.table_id];
6385
6386 if (table->other_table && rule->tag) {
6387 tag_set_add(&ofproto->revalidate_set, rule->tag);
6388 } else {
3c4a309c 6389 ofproto->need_revalidate = REV_FLOW_TABLE;
54a9cbc9
BP
6390 }
6391 }
6392}
6393\f
abe529af 6394static bool
7257b535
BP
6395set_frag_handling(struct ofproto *ofproto_,
6396 enum ofp_config_flags frag_handling)
abe529af
BP
6397{
6398 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
abe529af 6399
7257b535 6400 if (frag_handling != OFPC_FRAG_REASM) {
3c4a309c 6401 ofproto->need_revalidate = REV_RECONFIGURE;
7257b535
BP
6402 return true;
6403 } else {
6404 return false;
6405 }
abe529af
BP
6406}
6407
90bf1e07 6408static enum ofperr
abe529af
BP
6409packet_out(struct ofproto *ofproto_, struct ofpbuf *packet,
6410 const struct flow *flow,
f25d0cf3 6411 const struct ofpact *ofpacts, size_t ofpacts_len)
abe529af
BP
6412{
6413 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
90bf1e07 6414 enum ofperr error;
abe529af 6415
e1154f71 6416 if (flow->in_port >= ofproto->max_ports && flow->in_port < OFPP_MAX) {
90bf1e07 6417 return OFPERR_NXBRC_BAD_IN_PORT;
e1154f71
BP
6418 }
6419
f25d0cf3 6420 error = ofpacts_check(ofpacts, ofpacts_len, flow, ofproto->max_ports);
abe529af 6421 if (!error) {
80e5eed9 6422 struct odputil_keybuf keybuf;
112bc5f4
BP
6423 struct dpif_flow_stats stats;
6424
80e5eed9
BP
6425 struct ofpbuf key;
6426
112bc5f4 6427 struct action_xlate_ctx ctx;
050ac423
BP
6428 uint64_t odp_actions_stub[1024 / 8];
6429 struct ofpbuf odp_actions;
050ac423 6430
80e5eed9
BP
6431 ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
6432 odp_flow_key_from_flow(&key, flow);
abe529af 6433
112bc5f4 6434 dpif_flow_stats_extract(flow, packet, &stats);
2284188b 6435
112bc5f4
BP
6436 action_xlate_ctx_init(&ctx, ofproto, flow, flow->vlan_tci, NULL,
6437 packet_get_tcp_flags(packet, flow), packet);
6438 ctx.resubmit_stats = &stats;
2284188b 6439
050ac423
BP
6440 ofpbuf_use_stub(&odp_actions,
6441 odp_actions_stub, sizeof odp_actions_stub);
f25d0cf3 6442 xlate_actions(&ctx, ofpacts, ofpacts_len, &odp_actions);
80e5eed9 6443 dpif_execute(ofproto->dpif, key.data, key.size,
050ac423
BP
6444 odp_actions.data, odp_actions.size, packet);
6445 ofpbuf_uninit(&odp_actions);
abe529af
BP
6446 }
6447 return error;
6448}
6fca1ffb
BP
6449\f
6450/* NetFlow. */
6451
6452static int
6453set_netflow(struct ofproto *ofproto_,
6454 const struct netflow_options *netflow_options)
6455{
6456 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
6457
6458 if (netflow_options) {
6459 if (!ofproto->netflow) {
6460 ofproto->netflow = netflow_create();
6461 }
6462 return netflow_set_options(ofproto->netflow, netflow_options);
6463 } else {
6464 netflow_destroy(ofproto->netflow);
6465 ofproto->netflow = NULL;
6466 return 0;
6467 }
6468}
abe529af
BP
6469
6470static void
6471get_netflow_ids(const struct ofproto *ofproto_,
6472 uint8_t *engine_type, uint8_t *engine_id)
6473{
6474 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
6475
6476 dpif_get_netflow_ids(ofproto->dpif, engine_type, engine_id);
6477}
6fca1ffb
BP
6478
6479static void
6480send_active_timeout(struct ofproto_dpif *ofproto, struct facet *facet)
6481{
6482 if (!facet_is_controller_flow(facet) &&
6483 netflow_active_timeout_expired(ofproto->netflow, &facet->nf_flow)) {
b0f7b9b5 6484 struct subfacet *subfacet;
6fca1ffb
BP
6485 struct ofexpired expired;
6486
b0f7b9b5 6487 LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) {
6a7e895f 6488 if (subfacet->path == SF_FAST_PATH) {
b0f7b9b5 6489 struct dpif_flow_stats stats;
6fca1ffb 6490
6a7e895f 6491 subfacet_reinstall(subfacet, &stats);
15baa734 6492 subfacet_update_stats(subfacet, &stats);
b0f7b9b5 6493 }
6fca1ffb
BP
6494 }
6495
6496 expired.flow = facet->flow;
6497 expired.packet_count = facet->packet_count;
6498 expired.byte_count = facet->byte_count;
6499 expired.used = facet->used;
6500 netflow_expire(ofproto->netflow, &facet->nf_flow, &expired);
6501 }
6502}
6503
6504static void
6505send_netflow_active_timeouts(struct ofproto_dpif *ofproto)
6506{
6507 struct facet *facet;
6508
6509 HMAP_FOR_EACH (facet, hmap_node, &ofproto->facets) {
6510 send_active_timeout(ofproto, facet);
6511 }
6512}
abe529af
BP
6513\f
6514static struct ofproto_dpif *
6515ofproto_dpif_lookup(const char *name)
6516{
b44a10b7
BP
6517 struct ofproto_dpif *ofproto;
6518
6519 HMAP_FOR_EACH_WITH_HASH (ofproto, all_ofproto_dpifs_node,
6520 hash_string(name, 0), &all_ofproto_dpifs) {
6521 if (!strcmp(ofproto->up.name, name)) {
6522 return ofproto;
6523 }
6524 }
6525 return NULL;
abe529af
BP
6526}
6527
f0a3aa2e 6528static void
96e466a3 6529ofproto_unixctl_fdb_flush(struct unixctl_conn *conn, int argc,
0e15264f 6530 const char *argv[], void *aux OVS_UNUSED)
f0a3aa2e 6531{
490df1ef 6532 struct ofproto_dpif *ofproto;
f0a3aa2e 6533
96e466a3
EJ
6534 if (argc > 1) {
6535 ofproto = ofproto_dpif_lookup(argv[1]);
6536 if (!ofproto) {
bde9f75d 6537 unixctl_command_reply_error(conn, "no such bridge");
96e466a3
EJ
6538 return;
6539 }
d0040604 6540 mac_learning_flush(ofproto->ml, &ofproto->revalidate_set);
96e466a3
EJ
6541 } else {
6542 HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
d0040604 6543 mac_learning_flush(ofproto->ml, &ofproto->revalidate_set);
96e466a3 6544 }
f0a3aa2e 6545 }
f0a3aa2e 6546
bde9f75d 6547 unixctl_command_reply(conn, "table successfully flushed");
f0a3aa2e
AA
6548}
6549
abe529af 6550static void
0e15264f
BP
6551ofproto_unixctl_fdb_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
6552 const char *argv[], void *aux OVS_UNUSED)
abe529af
BP
6553{
6554 struct ds ds = DS_EMPTY_INITIALIZER;
6555 const struct ofproto_dpif *ofproto;
6556 const struct mac_entry *e;
6557
0e15264f 6558 ofproto = ofproto_dpif_lookup(argv[1]);
abe529af 6559 if (!ofproto) {
bde9f75d 6560 unixctl_command_reply_error(conn, "no such bridge");
abe529af
BP
6561 return;
6562 }
6563
6564 ds_put_cstr(&ds, " port VLAN MAC Age\n");
6565 LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
6566 struct ofbundle *bundle = e->port.p;
6567 ds_put_format(&ds, "%5d %4d "ETH_ADDR_FMT" %3d\n",
6568 ofbundle_get_a_port(bundle)->odp_port,
e764773c
BP
6569 e->vlan, ETH_ADDR_ARGS(e->mac),
6570 mac_entry_age(ofproto->ml, e));
abe529af 6571 }
bde9f75d 6572 unixctl_command_reply(conn, ds_cstr(&ds));
abe529af
BP
6573 ds_destroy(&ds);
6574}
6575
6a6455e5 6576struct trace_ctx {
abe529af
BP
6577 struct action_xlate_ctx ctx;
6578 struct flow flow;
6579 struct ds *result;
6580};
6581
6582static void
29901626
BP
6583trace_format_rule(struct ds *result, uint8_t table_id, int level,
6584 const struct rule_dpif *rule)
abe529af
BP
6585{
6586 ds_put_char_multiple(result, '\t', level);
6587 if (!rule) {
6588 ds_put_cstr(result, "No match\n");
6589 return;
6590 }
6591
29901626
BP
6592 ds_put_format(result, "Rule: table=%"PRIu8" cookie=%#"PRIx64" ",
6593 table_id, ntohll(rule->up.flow_cookie));
79feb7df 6594 cls_rule_format(&rule->up.cr, result);
abe529af
BP
6595 ds_put_char(result, '\n');
6596
6597 ds_put_char_multiple(result, '\t', level);
6598 ds_put_cstr(result, "OpenFlow ");
f25d0cf3 6599 ofpacts_format(rule->up.ofpacts, rule->up.ofpacts_len, result);
abe529af
BP
6600 ds_put_char(result, '\n');
6601}
6602
6603static void
6604trace_format_flow(struct ds *result, int level, const char *title,
6a6455e5 6605 struct trace_ctx *trace)
abe529af
BP
6606{
6607 ds_put_char_multiple(result, '\t', level);
6608 ds_put_format(result, "%s: ", title);
6609 if (flow_equal(&trace->ctx.flow, &trace->flow)) {
6610 ds_put_cstr(result, "unchanged");
6611 } else {
6612 flow_format(result, &trace->ctx.flow);
6613 trace->flow = trace->ctx.flow;
6614 }
6615 ds_put_char(result, '\n');
6616}
6617
eb9e1c26
EJ
6618static void
6619trace_format_regs(struct ds *result, int level, const char *title,
6a6455e5 6620 struct trace_ctx *trace)
eb9e1c26
EJ
6621{
6622 size_t i;
6623
6624 ds_put_char_multiple(result, '\t', level);
6625 ds_put_format(result, "%s:", title);
6626 for (i = 0; i < FLOW_N_REGS; i++) {
6627 ds_put_format(result, " reg%zu=0x%"PRIx32, i, trace->flow.regs[i]);
6628 }
6629 ds_put_char(result, '\n');
6630}
6631
1ed8d352
EJ
6632static void
6633trace_format_odp(struct ds *result, int level, const char *title,
6a6455e5 6634 struct trace_ctx *trace)
1ed8d352
EJ
6635{
6636 struct ofpbuf *odp_actions = trace->ctx.odp_actions;
6637
6638 ds_put_char_multiple(result, '\t', level);
6639 ds_put_format(result, "%s: ", title);
6640 format_odp_actions(result, odp_actions->data, odp_actions->size);
6641 ds_put_char(result, '\n');
6642}
6643
abe529af
BP
6644static void
6645trace_resubmit(struct action_xlate_ctx *ctx, struct rule_dpif *rule)
6646{
6a6455e5 6647 struct trace_ctx *trace = CONTAINER_OF(ctx, struct trace_ctx, ctx);
abe529af
BP
6648 struct ds *result = trace->result;
6649
6650 ds_put_char(result, '\n');
6651 trace_format_flow(result, ctx->recurse + 1, "Resubmitted flow", trace);
eb9e1c26 6652 trace_format_regs(result, ctx->recurse + 1, "Resubmitted regs", trace);
1ed8d352 6653 trace_format_odp(result, ctx->recurse + 1, "Resubmitted odp", trace);
29901626 6654 trace_format_rule(result, ctx->table_id, ctx->recurse + 1, rule);
abe529af
BP
6655}
6656
479df176
BP
6657static void
6658trace_report(struct action_xlate_ctx *ctx, const char *s)
6659{
6660 struct trace_ctx *trace = CONTAINER_OF(ctx, struct trace_ctx, ctx);
6661 struct ds *result = trace->result;
6662
6663 ds_put_char_multiple(result, '\t', ctx->recurse);
6664 ds_put_cstr(result, s);
6665 ds_put_char(result, '\n');
6666}
6667
abe529af 6668static void
0e15264f 6669ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[],
abe529af
BP
6670 void *aux OVS_UNUSED)
6671{
0e15264f 6672 const char *dpname = argv[1];
abe529af 6673 struct ofproto_dpif *ofproto;
876b0e1c
BP
6674 struct ofpbuf odp_key;
6675 struct ofpbuf *packet;
e84173dc 6676 ovs_be16 initial_tci;
abe529af
BP
6677 struct ds result;
6678 struct flow flow;
abe529af
BP
6679 char *s;
6680
876b0e1c
BP
6681 packet = NULL;
6682 ofpbuf_init(&odp_key, 0);
abe529af
BP
6683 ds_init(&result);
6684
e84173dc
BP
6685 ofproto = ofproto_dpif_lookup(dpname);
6686 if (!ofproto) {
bde9f75d
EJ
6687 unixctl_command_reply_error(conn, "Unknown ofproto (use ofproto/list "
6688 "for help)");
e84173dc
BP
6689 goto exit;
6690 }
0e15264f 6691 if (argc == 3 || (argc == 4 && !strcmp(argv[3], "-generate"))) {
8b3b8dd1 6692 /* ofproto/trace dpname flow [-generate] */
0e15264f
BP
6693 const char *flow_s = argv[2];
6694 const char *generate_s = argv[3];
876b0e1c 6695
31a19d69
BP
6696 /* Allow 'flow_s' to be either a datapath flow or an OpenFlow-like
6697 * flow. We guess which type it is based on whether 'flow_s' contains
6698 * an '(', since a datapath flow always contains '(') but an
6699 * OpenFlow-like flow should not (in fact it's allowed but I believe
6700 * that's not documented anywhere).
6701 *
6702 * An alternative would be to try to parse 'flow_s' both ways, but then
6703 * it would be tricky giving a sensible error message. After all, do
6704 * you just say "syntax error" or do you present both error messages?
6705 * Both choices seem lousy. */
6706 if (strchr(flow_s, '(')) {
6707 int error;
6708
6709 /* Convert string to datapath key. */
6710 ofpbuf_init(&odp_key, 0);
6711 error = odp_flow_key_from_string(flow_s, NULL, &odp_key);
6712 if (error) {
6713 unixctl_command_reply_error(conn, "Bad flow syntax");
6714 goto exit;
6715 }
876b0e1c 6716
31a19d69
BP
6717 /* Convert odp_key to flow. */
6718 error = ofproto_dpif_extract_flow_key(ofproto, odp_key.data,
6719 odp_key.size, &flow,
6720 &initial_tci, NULL);
6721 if (error == ODP_FIT_ERROR) {
6722 unixctl_command_reply_error(conn, "Invalid flow");
6723 goto exit;
6724 }
6725 } else {
6726 char *error_s;
6727
6728 error_s = parse_ofp_exact_flow(&flow, argv[2]);
6729 if (error_s) {
6730 unixctl_command_reply_error(conn, error_s);
6731 free(error_s);
6732 goto exit;
6733 }
6734
6735 initial_tci = flow.vlan_tci;
6736 vsp_adjust_flow(ofproto, &flow);
876b0e1c 6737 }
8b3b8dd1
BP
6738
6739 /* Generate a packet, if requested. */
0e15264f 6740 if (generate_s) {
8b3b8dd1
BP
6741 packet = ofpbuf_new(0);
6742 flow_compose(packet, &flow);
6743 }
0e15264f 6744 } else if (argc == 6) {
abff858b 6745 /* ofproto/trace dpname priority tun_id in_port packet */
0e15264f
BP
6746 const char *priority_s = argv[2];
6747 const char *tun_id_s = argv[3];
6748 const char *in_port_s = argv[4];
6749 const char *packet_s = argv[5];
6750 uint16_t in_port = ofp_port_to_odp_port(atoi(in_port_s));
6751 ovs_be64 tun_id = htonll(strtoull(tun_id_s, NULL, 0));
6752 uint32_t priority = atoi(priority_s);
e22f1753 6753 const char *msg;
0e15264f 6754
e22f1753
BP
6755 msg = eth_from_hex(packet_s, &packet);
6756 if (msg) {
bde9f75d 6757 unixctl_command_reply_error(conn, msg);
876b0e1c
BP
6758 goto exit;
6759 }
6760
6761 ds_put_cstr(&result, "Packet: ");
c499c75d 6762 s = ofp_packet_to_string(packet->data, packet->size);
876b0e1c
BP
6763 ds_put_cstr(&result, s);
6764 free(s);
6765
abff858b 6766 flow_extract(packet, priority, tun_id, in_port, &flow);
e84173dc 6767 initial_tci = flow.vlan_tci;
876b0e1c 6768 } else {
bde9f75d 6769 unixctl_command_reply_error(conn, "Bad command syntax");
abe529af
BP
6770 goto exit;
6771 }
6772
6a6455e5
EJ
6773 ofproto_trace(ofproto, &flow, packet, initial_tci, &result);
6774 unixctl_command_reply(conn, ds_cstr(&result));
6775
6776exit:
6777 ds_destroy(&result);
6778 ofpbuf_delete(packet);
6779 ofpbuf_uninit(&odp_key);
6780}
6781
6782static void
6783ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow,
6784 const struct ofpbuf *packet, ovs_be16 initial_tci,
6785 struct ds *ds)
6786{
6787 struct rule_dpif *rule;
6788
6789 ds_put_cstr(ds, "Flow: ");
6790 flow_format(ds, flow);
6791 ds_put_char(ds, '\n');
abe529af 6792
c57b2226
BP
6793 rule = rule_dpif_lookup(ofproto, flow);
6794
6a6455e5 6795 trace_format_rule(ds, 0, 0, rule);
c57b2226
BP
6796 if (rule == ofproto->miss_rule) {
6797 ds_put_cstr(ds, "\nNo match, flow generates \"packet in\"s.\n");
6798 } else if (rule == ofproto->no_packet_in_rule) {
6799 ds_put_cstr(ds, "\nNo match, packets dropped because "
6800 "OFPPC_NO_PACKET_IN is set on in_port.\n");
6801 }
6802
abe529af 6803 if (rule) {
050ac423
BP
6804 uint64_t odp_actions_stub[1024 / 8];
6805 struct ofpbuf odp_actions;
6806
6a6455e5 6807 struct trace_ctx trace;
0e553d9c 6808 uint8_t tcp_flags;
abe529af 6809
6a6455e5
EJ
6810 tcp_flags = packet ? packet_get_tcp_flags(packet, flow) : 0;
6811 trace.result = ds;
6812 trace.flow = *flow;
050ac423
BP
6813 ofpbuf_use_stub(&odp_actions,
6814 odp_actions_stub, sizeof odp_actions_stub);
6a6455e5 6815 action_xlate_ctx_init(&trace.ctx, ofproto, flow, initial_tci,
0e553d9c 6816 rule, tcp_flags, packet);
abe529af 6817 trace.ctx.resubmit_hook = trace_resubmit;
479df176 6818 trace.ctx.report_hook = trace_report;
f25d0cf3 6819 xlate_actions(&trace.ctx, rule->up.ofpacts, rule->up.ofpacts_len,
050ac423 6820 &odp_actions);
abe529af 6821
6a6455e5
EJ
6822 ds_put_char(ds, '\n');
6823 trace_format_flow(ds, 0, "Final flow", &trace);
6824 ds_put_cstr(ds, "Datapath actions: ");
050ac423
BP
6825 format_odp_actions(ds, odp_actions.data, odp_actions.size);
6826 ofpbuf_uninit(&odp_actions);
876b0e1c 6827
6a7e895f
BP
6828 if (trace.ctx.slow) {
6829 enum slow_path_reason slow;
6830
6831 ds_put_cstr(ds, "\nThis flow is handled by the userspace "
6832 "slow path because it:");
6833 for (slow = trace.ctx.slow; slow; ) {
6834 enum slow_path_reason bit = rightmost_1bit(slow);
6835
6836 switch (bit) {
6837 case SLOW_CFM:
6838 ds_put_cstr(ds, "\n\t- Consists of CFM packets.");
6839 break;
6840 case SLOW_LACP:
6841 ds_put_cstr(ds, "\n\t- Consists of LACP packets.");
6842 break;
6843 case SLOW_STP:
6844 ds_put_cstr(ds, "\n\t- Consists of STP packets.");
6845 break;
6846 case SLOW_IN_BAND:
6847 ds_put_cstr(ds, "\n\t- Needs in-band special case "
6848 "processing.");
6849 if (!packet) {
6850 ds_put_cstr(ds, "\n\t (The datapath actions are "
6851 "incomplete--for complete actions, "
6852 "please supply a packet.)");
6853 }
6854 break;
6855 case SLOW_CONTROLLER:
6856 ds_put_cstr(ds, "\n\t- Sends \"packet-in\" messages "
6857 "to the OpenFlow controller.");
6858 break;
6859 case SLOW_MATCH:
6860 ds_put_cstr(ds, "\n\t- Needs more specific matching "
6861 "than the datapath supports.");
6862 break;
6863 }
6864
6865 slow &= ~bit;
6866 }
6867
6868 if (slow & ~SLOW_MATCH) {
6869 ds_put_cstr(ds, "\nThe datapath actions above do not reflect "
6870 "the special slow-path processing.");
876b0e1c
BP
6871 }
6872 }
abe529af 6873 }
abe529af
BP
6874}
6875
7ee20df1 6876static void
0e15264f
BP
6877ofproto_dpif_clog(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED,
6878 const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
7ee20df1
BP
6879{
6880 clogged = true;
bde9f75d 6881 unixctl_command_reply(conn, NULL);
7ee20df1
BP
6882}
6883
6884static void
0e15264f
BP
6885ofproto_dpif_unclog(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED,
6886 const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
7ee20df1
BP
6887{
6888 clogged = false;
bde9f75d 6889 unixctl_command_reply(conn, NULL);
7ee20df1
BP
6890}
6891
6814e51f
BP
6892/* Runs a self-check of flow translations in 'ofproto'. Appends a message to
6893 * 'reply' describing the results. */
6894static void
6895ofproto_dpif_self_check__(struct ofproto_dpif *ofproto, struct ds *reply)
6896{
6897 struct facet *facet;
6898 int errors;
6899
6900 errors = 0;
6901 HMAP_FOR_EACH (facet, hmap_node, &ofproto->facets) {
6902 if (!facet_check_consistency(facet)) {
6903 errors++;
6904 }
6905 }
6906 if (errors) {
3c4a309c 6907 ofproto->need_revalidate = REV_INCONSISTENCY;
6814e51f
BP
6908 }
6909
6910 if (errors) {
6911 ds_put_format(reply, "%s: self-check failed (%d errors)\n",
6912 ofproto->up.name, errors);
6913 } else {
6914 ds_put_format(reply, "%s: self-check passed\n", ofproto->up.name);
6915 }
6916}
6917
6918static void
6919ofproto_dpif_self_check(struct unixctl_conn *conn,
6920 int argc, const char *argv[], void *aux OVS_UNUSED)
6921{
6922 struct ds reply = DS_EMPTY_INITIALIZER;
6923 struct ofproto_dpif *ofproto;
6924
6925 if (argc > 1) {
6926 ofproto = ofproto_dpif_lookup(argv[1]);
6927 if (!ofproto) {
bde9f75d
EJ
6928 unixctl_command_reply_error(conn, "Unknown ofproto (use "
6929 "ofproto/list for help)");
6814e51f
BP
6930 return;
6931 }
6932 ofproto_dpif_self_check__(ofproto, &reply);
6933 } else {
6934 HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
6935 ofproto_dpif_self_check__(ofproto, &reply);
6936 }
6937 }
6938
bde9f75d 6939 unixctl_command_reply(conn, ds_cstr(&reply));
6814e51f
BP
6940 ds_destroy(&reply);
6941}
6942
abe529af
BP
6943static void
6944ofproto_dpif_unixctl_init(void)
6945{
6946 static bool registered;
6947 if (registered) {
6948 return;
6949 }
6950 registered = true;
6951
0e15264f
BP
6952 unixctl_command_register(
6953 "ofproto/trace",
6954 "bridge {tun_id in_port packet | odp_flow [-generate]}",
aa3080c9 6955 2, 5, ofproto_unixctl_trace, NULL);
96e466a3 6956 unixctl_command_register("fdb/flush", "[bridge]", 0, 1,
0e15264f
BP
6957 ofproto_unixctl_fdb_flush, NULL);
6958 unixctl_command_register("fdb/show", "bridge", 1, 1,
6959 ofproto_unixctl_fdb_show, NULL);
6960 unixctl_command_register("ofproto/clog", "", 0, 0,
6961 ofproto_dpif_clog, NULL);
6962 unixctl_command_register("ofproto/unclog", "", 0, 0,
6963 ofproto_dpif_unclog, NULL);
6814e51f
BP
6964 unixctl_command_register("ofproto/self-check", "[bridge]", 0, 1,
6965 ofproto_dpif_self_check, NULL);
abe529af
BP
6966}
6967\f
52a90c29
BP
6968/* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
6969 *
6970 * This is deprecated. It is only for compatibility with broken device drivers
6971 * in old versions of Linux that do not properly support VLANs when VLAN
6972 * devices are not used. When broken device drivers are no longer in
6973 * widespread use, we will delete these interfaces. */
6974
6975static int
6976set_realdev(struct ofport *ofport_, uint16_t realdev_ofp_port, int vid)
6977{
6978 struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport_->ofproto);
6979 struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
6980
6981 if (realdev_ofp_port == ofport->realdev_ofp_port
6982 && vid == ofport->vlandev_vid) {
6983 return 0;
6984 }
6985
3c4a309c 6986 ofproto->need_revalidate = REV_RECONFIGURE;
52a90c29
BP
6987
6988 if (ofport->realdev_ofp_port) {
6989 vsp_remove(ofport);
6990 }
6991 if (realdev_ofp_port && ofport->bundle) {
6992 /* vlandevs are enslaved to their realdevs, so they are not allowed to
6993 * themselves be part of a bundle. */
6994 bundle_set(ofport->up.ofproto, ofport->bundle, NULL);
6995 }
6996
6997 ofport->realdev_ofp_port = realdev_ofp_port;
6998 ofport->vlandev_vid = vid;
6999
7000 if (realdev_ofp_port) {
7001 vsp_add(ofport, realdev_ofp_port, vid);
7002 }
7003
7004 return 0;
7005}
7006
7007static uint32_t
7008hash_realdev_vid(uint16_t realdev_ofp_port, int vid)
7009{
7010 return hash_2words(realdev_ofp_port, vid);
7011}
7012
40e05935
BP
7013/* Returns the ODP port number of the Linux VLAN device that corresponds to
7014 * 'vlan_tci' on the network device with port number 'realdev_odp_port' in
7015 * 'ofproto'. For example, given 'realdev_odp_port' of eth0 and 'vlan_tci' 9,
7016 * it would return the port number of eth0.9.
7017 *
7018 * Unless VLAN splinters are enabled for port 'realdev_odp_port', this
7019 * function just returns its 'realdev_odp_port' argument. */
52a90c29
BP
7020static uint32_t
7021vsp_realdev_to_vlandev(const struct ofproto_dpif *ofproto,
7022 uint32_t realdev_odp_port, ovs_be16 vlan_tci)
7023{
7024 if (!hmap_is_empty(&ofproto->realdev_vid_map)) {
7025 uint16_t realdev_ofp_port = odp_port_to_ofp_port(realdev_odp_port);
7026 int vid = vlan_tci_to_vid(vlan_tci);
7027 const struct vlan_splinter *vsp;
7028
7029 HMAP_FOR_EACH_WITH_HASH (vsp, realdev_vid_node,
7030 hash_realdev_vid(realdev_ofp_port, vid),
7031 &ofproto->realdev_vid_map) {
7032 if (vsp->realdev_ofp_port == realdev_ofp_port
7033 && vsp->vid == vid) {
7034 return ofp_port_to_odp_port(vsp->vlandev_ofp_port);
7035 }
7036 }
7037 }
7038 return realdev_odp_port;
7039}
7040
7041static struct vlan_splinter *
7042vlandev_find(const struct ofproto_dpif *ofproto, uint16_t vlandev_ofp_port)
7043{
7044 struct vlan_splinter *vsp;
7045
7046 HMAP_FOR_EACH_WITH_HASH (vsp, vlandev_node, hash_int(vlandev_ofp_port, 0),
7047 &ofproto->vlandev_map) {
7048 if (vsp->vlandev_ofp_port == vlandev_ofp_port) {
7049 return vsp;
7050 }
7051 }
7052
7053 return NULL;
7054}
7055
40e05935
BP
7056/* Returns the OpenFlow port number of the "real" device underlying the Linux
7057 * VLAN device with OpenFlow port number 'vlandev_ofp_port' and stores the
7058 * VLAN VID of the Linux VLAN device in '*vid'. For example, given
7059 * 'vlandev_ofp_port' of eth0.9, it would return the OpenFlow port number of
7060 * eth0 and store 9 in '*vid'.
7061 *
7062 * Returns 0 and does not modify '*vid' if 'vlandev_ofp_port' is not a Linux
7063 * VLAN device. Unless VLAN splinters are enabled, this is what this function
7064 * always does.*/
52a90c29
BP
7065static uint16_t
7066vsp_vlandev_to_realdev(const struct ofproto_dpif *ofproto,
40e05935 7067 uint16_t vlandev_ofp_port, int *vid)
52a90c29
BP
7068{
7069 if (!hmap_is_empty(&ofproto->vlandev_map)) {
7070 const struct vlan_splinter *vsp;
7071
7072 vsp = vlandev_find(ofproto, vlandev_ofp_port);
7073 if (vsp) {
7074 if (vid) {
7075 *vid = vsp->vid;
7076 }
7077 return vsp->realdev_ofp_port;
7078 }
7079 }
7080 return 0;
7081}
7082
b98d8985
BP
7083/* Given 'flow', a flow representing a packet received on 'ofproto', checks
7084 * whether 'flow->in_port' represents a Linux VLAN device. If so, changes
7085 * 'flow->in_port' to the "real" device backing the VLAN device, sets
7086 * 'flow->vlan_tci' to the VLAN VID, and returns true. Otherwise (which is
7087 * always the case unless VLAN splinters are enabled), returns false without
7088 * making any changes. */
7089static bool
7090vsp_adjust_flow(const struct ofproto_dpif *ofproto, struct flow *flow)
7091{
7092 uint16_t realdev;
7093 int vid;
7094
7095 realdev = vsp_vlandev_to_realdev(ofproto, flow->in_port, &vid);
7096 if (!realdev) {
7097 return false;
7098 }
7099
7100 /* Cause the flow to be processed as if it came in on the real device with
7101 * the VLAN device's VLAN ID. */
7102 flow->in_port = realdev;
7103 flow->vlan_tci = htons((vid & VLAN_VID_MASK) | VLAN_CFI);
7104 return true;
7105}
7106
52a90c29
BP
7107static void
7108vsp_remove(struct ofport_dpif *port)
7109{
7110 struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
7111 struct vlan_splinter *vsp;
7112
7113 vsp = vlandev_find(ofproto, port->up.ofp_port);
7114 if (vsp) {
7115 hmap_remove(&ofproto->vlandev_map, &vsp->vlandev_node);
7116 hmap_remove(&ofproto->realdev_vid_map, &vsp->realdev_vid_node);
7117 free(vsp);
7118
7119 port->realdev_ofp_port = 0;
7120 } else {
7121 VLOG_ERR("missing vlan device record");
7122 }
7123}
7124
7125static void
7126vsp_add(struct ofport_dpif *port, uint16_t realdev_ofp_port, int vid)
7127{
7128 struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
7129
7130 if (!vsp_vlandev_to_realdev(ofproto, port->up.ofp_port, NULL)
7131 && (vsp_realdev_to_vlandev(ofproto, realdev_ofp_port, htons(vid))
7132 == realdev_ofp_port)) {
7133 struct vlan_splinter *vsp;
7134
7135 vsp = xmalloc(sizeof *vsp);
7136 hmap_insert(&ofproto->vlandev_map, &vsp->vlandev_node,
7137 hash_int(port->up.ofp_port, 0));
7138 hmap_insert(&ofproto->realdev_vid_map, &vsp->realdev_vid_node,
7139 hash_realdev_vid(realdev_ofp_port, vid));
7140 vsp->realdev_ofp_port = realdev_ofp_port;
7141 vsp->vlandev_ofp_port = port->up.ofp_port;
7142 vsp->vid = vid;
7143
7144 port->realdev_ofp_port = realdev_ofp_port;
7145 } else {
7146 VLOG_ERR("duplicate vlan device record");
7147 }
7148}
7149\f
abe529af
BP
7150const struct ofproto_class ofproto_dpif_class = {
7151 enumerate_types,
7152 enumerate_names,
7153 del,
7154 alloc,
7155 construct,
7156 destruct,
7157 dealloc,
7158 run,
5fcc0d00 7159 run_fast,
abe529af 7160 wait,
0d085684 7161 get_memory_usage,
abe529af 7162 flush,
6c1491fb
BP
7163 get_features,
7164 get_tables,
abe529af
BP
7165 port_alloc,
7166 port_construct,
7167 port_destruct,
7168 port_dealloc,
7169 port_modified,
7170 port_reconfigured,
7171 port_query_by_name,
7172 port_add,
7173 port_del,
6527c598 7174 port_get_stats,
abe529af
BP
7175 port_dump_start,
7176 port_dump_next,
7177 port_dump_done,
7178 port_poll,
7179 port_poll_wait,
7180 port_is_lacp_current,
0ab6decf 7181 NULL, /* rule_choose_table */
abe529af
BP
7182 rule_alloc,
7183 rule_construct,
7184 rule_destruct,
7185 rule_dealloc,
abe529af
BP
7186 rule_get_stats,
7187 rule_execute,
7188 rule_modify_actions,
7257b535 7189 set_frag_handling,
abe529af
BP
7190 packet_out,
7191 set_netflow,
7192 get_netflow_ids,
7193 set_sflow,
7194 set_cfm,
a5610457 7195 get_cfm_fault,
1c0333b6 7196 get_cfm_opup,
1de11730 7197 get_cfm_remote_mpids,
3967a833 7198 get_cfm_health,
21f7563c
JP
7199 set_stp,
7200 get_stp_status,
7201 set_stp_port,
7202 get_stp_port_status,
8b36f51e 7203 set_queues,
abe529af
BP
7204 bundle_set,
7205 bundle_remove,
7206 mirror_set,
9d24de3b 7207 mirror_get_stats,
abe529af
BP
7208 set_flood_vlans,
7209 is_mirror_output_bundle,
8402c74b 7210 forward_bpdu_changed,
e764773c 7211 set_mac_idle_time,
52a90c29 7212 set_realdev,
abe529af 7213};