]> git.proxmox.com Git - mirror_ovs.git/blob - ofproto/ofproto-dpif-upcall.c
83007d00b46cf054faca4edd253d87f1757535fa
[mirror_ovs.git] / ofproto / ofproto-dpif-upcall.c
1 /* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License. */
14
15 #include <config.h>
16 #include "ofproto-dpif-upcall.h"
17
18 #include <errno.h>
19 #include <stdbool.h>
20 #include <inttypes.h>
21
22 #include "connmgr.h"
23 #include "coverage.h"
24 #include "cmap.h"
25 #include "dpif.h"
26 #include "openvswitch/dynamic-string.h"
27 #include "fail-open.h"
28 #include "guarded-list.h"
29 #include "latch.h"
30 #include "openvswitch/list.h"
31 #include "netlink.h"
32 #include "openvswitch/ofpbuf.h"
33 #include "ofproto-dpif-ipfix.h"
34 #include "ofproto-dpif-sflow.h"
35 #include "ofproto-dpif-xlate.h"
36 #include "ofproto-dpif-xlate-cache.h"
37 #include "ovs-rcu.h"
38 #include "packets.h"
39 #include "openvswitch/poll-loop.h"
40 #include "seq.h"
41 #include "unixctl.h"
42 #include "openvswitch/vlog.h"
43
44 #define MAX_QUEUE_LENGTH 512
45 #define UPCALL_MAX_BATCH 64
46 #define REVALIDATE_MAX_BATCH 50
47
48 VLOG_DEFINE_THIS_MODULE(ofproto_dpif_upcall);
49
50 COVERAGE_DEFINE(dumped_duplicate_flow);
51 COVERAGE_DEFINE(dumped_new_flow);
52 COVERAGE_DEFINE(handler_duplicate_upcall);
53 COVERAGE_DEFINE(upcall_ukey_contention);
54 COVERAGE_DEFINE(upcall_ukey_replace);
55 COVERAGE_DEFINE(revalidate_missed_dp_flow);
56
57 /* A thread that reads upcalls from dpif, forwards each upcall's packet,
58 * and possibly sets up a kernel flow as a cache. */
59 struct handler {
60 struct udpif *udpif; /* Parent udpif. */
61 pthread_t thread; /* Thread ID. */
62 uint32_t handler_id; /* Handler id. */
63 };
64
65 /* In the absence of a multiple-writer multiple-reader datastructure for
66 * storing udpif_keys ("ukeys"), we use a large number of cmaps, each with its
67 * own lock for writing. */
68 #define N_UMAPS 512 /* per udpif. */
69 struct umap {
70 struct ovs_mutex mutex; /* Take for writing to the following. */
71 struct cmap cmap; /* Datapath flow keys. */
72 };
73
74 /* A thread that processes datapath flows, updates OpenFlow statistics, and
75 * updates or removes them if necessary.
76 *
77 * Revalidator threads operate in two phases: "dump" and "sweep". In between
78 * each phase, all revalidators sync up so that all revalidator threads are
79 * either in one phase or the other, but not a combination.
80 *
81 * During the dump phase, revalidators fetch flows from the datapath and
82 * attribute the statistics to OpenFlow rules. Each datapath flow has a
83 * corresponding ukey which caches the most recently seen statistics. If
84 * a flow needs to be deleted (for example, because it is unused over a
85 * period of time), revalidator threads may delete the flow during the
86 * dump phase. The datapath is not guaranteed to reliably dump all flows
87 * from the datapath, and there is no mapping between datapath flows to
88 * revalidators, so a particular flow may be handled by zero or more
89 * revalidators during a single dump phase. To avoid duplicate attribution
90 * of statistics, ukeys are never deleted during this phase.
91 *
92 * During the sweep phase, each revalidator takes ownership of a different
93 * slice of umaps and sweeps through all ukeys in those umaps to figure out
94 * whether they need to be deleted. During this phase, revalidators may
95 * fetch individual flows which were not dumped during the dump phase to
96 * validate them and attribute statistics.
97 */
98 struct revalidator {
99 struct udpif *udpif; /* Parent udpif. */
100 pthread_t thread; /* Thread ID. */
101 unsigned int id; /* ovsthread_id_self(). */
102 };
103
104 /* An upcall handler for ofproto_dpif.
105 *
106 * udpif keeps records of two kind of logically separate units:
107 *
108 * upcall handling
109 * ---------------
110 *
111 * - An array of 'struct handler's for upcall handling and flow
112 * installation.
113 *
114 * flow revalidation
115 * -----------------
116 *
117 * - Revalidation threads which read the datapath flow table and maintains
118 * them.
119 */
120 struct udpif {
121 struct ovs_list list_node; /* In all_udpifs list. */
122
123 struct dpif *dpif; /* Datapath handle. */
124 struct dpif_backer *backer; /* Opaque dpif_backer pointer. */
125
126 struct handler *handlers; /* Upcall handlers. */
127 size_t n_handlers;
128
129 struct revalidator *revalidators; /* Flow revalidators. */
130 size_t n_revalidators;
131
132 struct latch exit_latch; /* Tells child threads to exit. */
133
134 /* Revalidation. */
135 struct seq *reval_seq; /* Incremented to force revalidation. */
136 bool reval_exit; /* Set by leader on 'exit_latch. */
137 struct ovs_barrier reval_barrier; /* Barrier used by revalidators. */
138 struct dpif_flow_dump *dump; /* DPIF flow dump state. */
139 long long int dump_duration; /* Duration of the last flow dump. */
140 struct seq *dump_seq; /* Increments each dump iteration. */
141 atomic_bool enable_ufid; /* If true, skip dumping flow attrs. */
142
143 /* These variables provide a mechanism for the main thread to pause
144 * all revalidation without having to completely shut the threads down.
145 * 'pause_latch' is shared between the main thread and the lead
146 * revalidator thread, so when it is desirable to halt revalidation, the
147 * main thread will set the latch. 'pause' and 'pause_barrier' are shared
148 * by revalidator threads. The lead revalidator will set 'pause' when it
149 * observes the latch has been set, and this will cause all revalidator
150 * threads to wait on 'pause_barrier' at the beginning of the next
151 * revalidation round. */
152 bool pause; /* Set by leader on 'pause_latch. */
153 struct latch pause_latch; /* Set to force revalidators pause. */
154 struct ovs_barrier pause_barrier; /* Barrier used to pause all */
155 /* revalidators by main thread. */
156
157 /* There are 'N_UMAPS' maps containing 'struct udpif_key' elements.
158 *
159 * During the flow dump phase, revalidators insert into these with a random
160 * distribution. During the garbage collection phase, each revalidator
161 * takes care of garbage collecting a slice of these maps. */
162 struct umap *ukeys;
163
164 /* Datapath flow statistics. */
165 unsigned int max_n_flows;
166 unsigned int avg_n_flows;
167
168 /* Following fields are accessed and modified by different threads. */
169 atomic_uint flow_limit; /* Datapath flow hard limit. */
170
171 /* n_flows_mutex prevents multiple threads updating these concurrently. */
172 atomic_uint n_flows; /* Number of flows in the datapath. */
173 atomic_llong n_flows_timestamp; /* Last time n_flows was updated. */
174 struct ovs_mutex n_flows_mutex;
175
176 /* Following fields are accessed and modified only from the main thread. */
177 struct unixctl_conn **conns; /* Connections waiting on dump_seq. */
178 uint64_t conn_seq; /* Corresponds to 'dump_seq' when
179 conns[n_conns-1] was stored. */
180 size_t n_conns; /* Number of connections waiting. */
181 };
182
183 enum upcall_type {
184 BAD_UPCALL, /* Some kind of bug somewhere. */
185 MISS_UPCALL, /* A flow miss. */
186 SLOW_PATH_UPCALL, /* Slow path upcall. */
187 SFLOW_UPCALL, /* sFlow sample. */
188 FLOW_SAMPLE_UPCALL, /* Per-flow sampling. */
189 IPFIX_UPCALL /* Per-bridge sampling. */
190 };
191
192 enum reval_result {
193 UKEY_KEEP,
194 UKEY_DELETE,
195 UKEY_MODIFY
196 };
197
198 struct upcall {
199 struct ofproto_dpif *ofproto; /* Parent ofproto. */
200 const struct recirc_id_node *recirc; /* Recirculation context. */
201 bool have_recirc_ref; /* Reference held on recirc ctx? */
202
203 /* The flow and packet are only required to be constant when using
204 * dpif-netdev. If a modification is absolutely necessary, a const cast
205 * may be used with other datapaths. */
206 const struct flow *flow; /* Parsed representation of the packet. */
207 const ovs_u128 *ufid; /* Unique identifier for 'flow'. */
208 unsigned pmd_id; /* Datapath poll mode driver id. */
209 const struct dp_packet *packet; /* Packet associated with this upcall. */
210 ofp_port_t in_port; /* OpenFlow in port, or OFPP_NONE. */
211 uint16_t mru; /* If !0, Maximum receive unit of
212 fragmented IP packet */
213
214 enum upcall_type type; /* Type of the upcall. */
215 const struct nlattr *actions; /* Flow actions in DPIF_UC_ACTION Upcalls. */
216
217 bool xout_initialized; /* True if 'xout' must be uninitialized. */
218 struct xlate_out xout; /* Result of xlate_actions(). */
219 struct ofpbuf odp_actions; /* Datapath actions from xlate_actions(). */
220 struct flow_wildcards wc; /* Dependencies that megaflow must match. */
221 struct ofpbuf put_actions; /* Actions 'put' in the fastpath. */
222
223 struct dpif_ipfix *ipfix; /* IPFIX pointer or NULL. */
224 struct dpif_sflow *sflow; /* SFlow pointer or NULL. */
225
226 struct udpif_key *ukey; /* Revalidator flow cache. */
227 bool ukey_persists; /* Set true to keep 'ukey' beyond the
228 lifetime of this upcall. */
229
230 uint64_t dump_seq; /* udpif->dump_seq at translation time. */
231 uint64_t reval_seq; /* udpif->reval_seq at translation time. */
232
233 /* Not used by the upcall callback interface. */
234 const struct nlattr *key; /* Datapath flow key. */
235 size_t key_len; /* Datapath flow key length. */
236 const struct nlattr *out_tun_key; /* Datapath output tunnel key. */
237
238 struct user_action_cookie cookie;
239
240 uint64_t odp_actions_stub[1024 / 8]; /* Stub for odp_actions. */
241 };
242
243 /* Ukeys must transition through these states using transition_ukey(). */
244 enum ukey_state {
245 UKEY_CREATED = 0,
246 UKEY_VISIBLE, /* Ukey is in umap, datapath flow install is queued. */
247 UKEY_OPERATIONAL, /* Ukey is in umap, datapath flow is installed. */
248 UKEY_EVICTING, /* Ukey is in umap, datapath flow delete is queued. */
249 UKEY_EVICTED, /* Ukey is in umap, datapath flow is deleted. */
250 UKEY_DELETED, /* Ukey removed from umap, ukey free is deferred. */
251 };
252 #define N_UKEY_STATES (UKEY_DELETED + 1)
253
254 /* 'udpif_key's are responsible for tracking the little bit of state udpif
255 * needs to do flow expiration which can't be pulled directly from the
256 * datapath. They may be created by any handler or revalidator thread at any
257 * time, and read by any revalidator during the dump phase. They are however
258 * each owned by a single revalidator which takes care of destroying them
259 * during the garbage-collection phase.
260 *
261 * The mutex within the ukey protects some members of the ukey. The ukey
262 * itself is protected by RCU and is held within a umap in the parent udpif.
263 * Adding or removing a ukey from a umap is only safe when holding the
264 * corresponding umap lock. */
265 struct udpif_key {
266 struct cmap_node cmap_node; /* In parent revalidator 'ukeys' map. */
267
268 /* These elements are read only once created, and therefore aren't
269 * protected by a mutex. */
270 const struct nlattr *key; /* Datapath flow key. */
271 size_t key_len; /* Length of 'key'. */
272 const struct nlattr *mask; /* Datapath flow mask. */
273 size_t mask_len; /* Length of 'mask'. */
274 ovs_u128 ufid; /* Unique flow identifier. */
275 bool ufid_present; /* True if 'ufid' is in datapath. */
276 uint32_t hash; /* Pre-computed hash for 'key'. */
277 unsigned pmd_id; /* Datapath poll mode driver id. */
278
279 struct ovs_mutex mutex; /* Guards the following. */
280 struct dpif_flow_stats stats OVS_GUARDED; /* Last known stats.*/
281 long long int created OVS_GUARDED; /* Estimate of creation time. */
282 uint64_t dump_seq OVS_GUARDED; /* Tracks udpif->dump_seq. */
283 uint64_t reval_seq OVS_GUARDED; /* Tracks udpif->reval_seq. */
284 enum ukey_state state OVS_GUARDED; /* Tracks ukey lifetime. */
285
286 /* 'state' debug information. */
287 unsigned int state_thread OVS_GUARDED; /* Thread that transitions. */
288 const char *state_where OVS_GUARDED; /* transition_ukey() locator. */
289
290 /* Datapath flow actions as nlattrs. Protected by RCU. Read with
291 * ukey_get_actions(), and write with ukey_set_actions(). */
292 OVSRCU_TYPE(struct ofpbuf *) actions;
293
294 struct xlate_cache *xcache OVS_GUARDED; /* Cache for xlate entries that
295 * are affected by this ukey.
296 * Used for stats and learning.*/
297 union {
298 struct odputil_keybuf buf;
299 struct nlattr nla;
300 } keybuf, maskbuf;
301
302 uint32_t key_recirc_id; /* Non-zero if reference is held by the ukey. */
303 struct recirc_refs recircs; /* Action recirc IDs with references held. */
304 };
305
306 /* Datapath operation with optional ukey attached. */
307 struct ukey_op {
308 struct udpif_key *ukey;
309 struct dpif_flow_stats stats; /* Stats for 'op'. */
310 struct dpif_op dop; /* Flow operation. */
311 };
312
313 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
314 static struct ovs_list all_udpifs = OVS_LIST_INITIALIZER(&all_udpifs);
315
316 static size_t recv_upcalls(struct handler *);
317 static int process_upcall(struct udpif *, struct upcall *,
318 struct ofpbuf *odp_actions, struct flow_wildcards *);
319 static void handle_upcalls(struct udpif *, struct upcall *, size_t n_upcalls);
320 static void udpif_stop_threads(struct udpif *);
321 static void udpif_start_threads(struct udpif *, size_t n_handlers,
322 size_t n_revalidators);
323 static void udpif_pause_revalidators(struct udpif *);
324 static void udpif_resume_revalidators(struct udpif *);
325 static void *udpif_upcall_handler(void *);
326 static void *udpif_revalidator(void *);
327 static unsigned long udpif_get_n_flows(struct udpif *);
328 static void revalidate(struct revalidator *);
329 static void revalidator_pause(struct revalidator *);
330 static void revalidator_sweep(struct revalidator *);
331 static void revalidator_purge(struct revalidator *);
332 static void upcall_unixctl_show(struct unixctl_conn *conn, int argc,
333 const char *argv[], void *aux);
334 static void upcall_unixctl_disable_megaflows(struct unixctl_conn *, int argc,
335 const char *argv[], void *aux);
336 static void upcall_unixctl_enable_megaflows(struct unixctl_conn *, int argc,
337 const char *argv[], void *aux);
338 static void upcall_unixctl_disable_ufid(struct unixctl_conn *, int argc,
339 const char *argv[], void *aux);
340 static void upcall_unixctl_enable_ufid(struct unixctl_conn *, int argc,
341 const char *argv[], void *aux);
342 static void upcall_unixctl_set_flow_limit(struct unixctl_conn *conn, int argc,
343 const char *argv[], void *aux);
344 static void upcall_unixctl_dump_wait(struct unixctl_conn *conn, int argc,
345 const char *argv[], void *aux);
346 static void upcall_unixctl_purge(struct unixctl_conn *conn, int argc,
347 const char *argv[], void *aux);
348
349 static struct udpif_key *ukey_create_from_upcall(struct upcall *,
350 struct flow_wildcards *);
351 static int ukey_create_from_dpif_flow(const struct udpif *,
352 const struct dpif_flow *,
353 struct udpif_key **);
354 static void ukey_get_actions(struct udpif_key *, const struct nlattr **actions,
355 size_t *size);
356 static bool ukey_install__(struct udpif *, struct udpif_key *ukey)
357 OVS_TRY_LOCK(true, ukey->mutex);
358 static bool ukey_install(struct udpif *udpif, struct udpif_key *ukey);
359 static void transition_ukey_at(struct udpif_key *ukey, enum ukey_state dst,
360 const char *where)
361 OVS_REQUIRES(ukey->mutex);
362 #define transition_ukey(UKEY, DST) \
363 transition_ukey_at(UKEY, DST, OVS_SOURCE_LOCATOR)
364 static struct udpif_key *ukey_lookup(struct udpif *udpif,
365 const ovs_u128 *ufid,
366 const unsigned pmd_id);
367 static int ukey_acquire(struct udpif *, const struct dpif_flow *,
368 struct udpif_key **result, int *error);
369 static void ukey_delete__(struct udpif_key *);
370 static void ukey_delete(struct umap *, struct udpif_key *);
371 static enum upcall_type classify_upcall(enum dpif_upcall_type type,
372 const struct nlattr *userdata,
373 struct user_action_cookie *cookie);
374
375 static void put_op_init(struct ukey_op *op, struct udpif_key *ukey,
376 enum dpif_flow_put_flags flags);
377 static void delete_op_init(struct udpif *udpif, struct ukey_op *op,
378 struct udpif_key *ukey);
379
380 static int upcall_receive(struct upcall *, const struct dpif_backer *,
381 const struct dp_packet *packet, enum dpif_upcall_type,
382 const struct nlattr *userdata, const struct flow *,
383 const unsigned int mru,
384 const ovs_u128 *ufid, const unsigned pmd_id);
385 static void upcall_uninit(struct upcall *);
386
387 static upcall_callback upcall_cb;
388 static dp_purge_callback dp_purge_cb;
389
390 static atomic_bool enable_megaflows = ATOMIC_VAR_INIT(true);
391 static atomic_bool enable_ufid = ATOMIC_VAR_INIT(true);
392
393 void
394 udpif_init(void)
395 {
396 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
397 if (ovsthread_once_start(&once)) {
398 unixctl_command_register("upcall/show", "", 0, 0, upcall_unixctl_show,
399 NULL);
400 unixctl_command_register("upcall/disable-megaflows", "", 0, 0,
401 upcall_unixctl_disable_megaflows, NULL);
402 unixctl_command_register("upcall/enable-megaflows", "", 0, 0,
403 upcall_unixctl_enable_megaflows, NULL);
404 unixctl_command_register("upcall/disable-ufid", "", 0, 0,
405 upcall_unixctl_disable_ufid, NULL);
406 unixctl_command_register("upcall/enable-ufid", "", 0, 0,
407 upcall_unixctl_enable_ufid, NULL);
408 unixctl_command_register("upcall/set-flow-limit", "flow-limit-number",
409 1, 1, upcall_unixctl_set_flow_limit, NULL);
410 unixctl_command_register("revalidator/wait", "", 0, 0,
411 upcall_unixctl_dump_wait, NULL);
412 unixctl_command_register("revalidator/purge", "", 0, 0,
413 upcall_unixctl_purge, NULL);
414 ovsthread_once_done(&once);
415 }
416 }
417
418 struct udpif *
419 udpif_create(struct dpif_backer *backer, struct dpif *dpif)
420 {
421 struct udpif *udpif = xzalloc(sizeof *udpif);
422
423 udpif->dpif = dpif;
424 udpif->backer = backer;
425 atomic_init(&udpif->flow_limit, MIN(ofproto_flow_limit, 10000));
426 udpif->reval_seq = seq_create();
427 udpif->dump_seq = seq_create();
428 latch_init(&udpif->exit_latch);
429 latch_init(&udpif->pause_latch);
430 ovs_list_push_back(&all_udpifs, &udpif->list_node);
431 atomic_init(&udpif->enable_ufid, false);
432 atomic_init(&udpif->n_flows, 0);
433 atomic_init(&udpif->n_flows_timestamp, LLONG_MIN);
434 ovs_mutex_init(&udpif->n_flows_mutex);
435 udpif->ukeys = xmalloc(N_UMAPS * sizeof *udpif->ukeys);
436 for (int i = 0; i < N_UMAPS; i++) {
437 cmap_init(&udpif->ukeys[i].cmap);
438 ovs_mutex_init(&udpif->ukeys[i].mutex);
439 }
440
441 dpif_register_upcall_cb(dpif, upcall_cb, udpif);
442 dpif_register_dp_purge_cb(dpif, dp_purge_cb, udpif);
443
444 return udpif;
445 }
446
447 void
448 udpif_run(struct udpif *udpif)
449 {
450 if (udpif->conns && udpif->conn_seq != seq_read(udpif->dump_seq)) {
451 int i;
452
453 for (i = 0; i < udpif->n_conns; i++) {
454 unixctl_command_reply(udpif->conns[i], NULL);
455 }
456 free(udpif->conns);
457 udpif->conns = NULL;
458 udpif->n_conns = 0;
459 }
460 }
461
462 void
463 udpif_destroy(struct udpif *udpif)
464 {
465 udpif_stop_threads(udpif);
466
467 dpif_register_dp_purge_cb(udpif->dpif, NULL, udpif);
468 dpif_register_upcall_cb(udpif->dpif, NULL, udpif);
469
470 for (int i = 0; i < N_UMAPS; i++) {
471 cmap_destroy(&udpif->ukeys[i].cmap);
472 ovs_mutex_destroy(&udpif->ukeys[i].mutex);
473 }
474 free(udpif->ukeys);
475 udpif->ukeys = NULL;
476
477 ovs_list_remove(&udpif->list_node);
478 latch_destroy(&udpif->exit_latch);
479 latch_destroy(&udpif->pause_latch);
480 seq_destroy(udpif->reval_seq);
481 seq_destroy(udpif->dump_seq);
482 ovs_mutex_destroy(&udpif->n_flows_mutex);
483 free(udpif);
484 }
485
486 /* Stops the handler and revalidator threads, must be enclosed in
487 * ovsrcu quiescent state unless when destroying udpif. */
488 static void
489 udpif_stop_threads(struct udpif *udpif)
490 {
491 if (udpif && (udpif->n_handlers != 0 || udpif->n_revalidators != 0)) {
492 size_t i;
493
494 latch_set(&udpif->exit_latch);
495
496 for (i = 0; i < udpif->n_handlers; i++) {
497 struct handler *handler = &udpif->handlers[i];
498
499 xpthread_join(handler->thread, NULL);
500 }
501
502 for (i = 0; i < udpif->n_revalidators; i++) {
503 xpthread_join(udpif->revalidators[i].thread, NULL);
504 }
505
506 dpif_disable_upcall(udpif->dpif);
507
508 for (i = 0; i < udpif->n_revalidators; i++) {
509 struct revalidator *revalidator = &udpif->revalidators[i];
510
511 /* Delete ukeys, and delete all flows from the datapath to prevent
512 * double-counting stats. */
513 revalidator_purge(revalidator);
514 }
515
516 latch_poll(&udpif->exit_latch);
517
518 ovs_barrier_destroy(&udpif->reval_barrier);
519 ovs_barrier_destroy(&udpif->pause_barrier);
520
521 free(udpif->revalidators);
522 udpif->revalidators = NULL;
523 udpif->n_revalidators = 0;
524
525 free(udpif->handlers);
526 udpif->handlers = NULL;
527 udpif->n_handlers = 0;
528 }
529 }
530
531 /* Starts the handler and revalidator threads, must be enclosed in
532 * ovsrcu quiescent state. */
533 static void
534 udpif_start_threads(struct udpif *udpif, size_t n_handlers,
535 size_t n_revalidators)
536 {
537 if (udpif && n_handlers && n_revalidators) {
538 size_t i;
539 bool enable_ufid;
540
541 udpif->n_handlers = n_handlers;
542 udpif->n_revalidators = n_revalidators;
543
544 udpif->handlers = xzalloc(udpif->n_handlers * sizeof *udpif->handlers);
545 for (i = 0; i < udpif->n_handlers; i++) {
546 struct handler *handler = &udpif->handlers[i];
547
548 handler->udpif = udpif;
549 handler->handler_id = i;
550 handler->thread = ovs_thread_create(
551 "handler", udpif_upcall_handler, handler);
552 }
553
554 enable_ufid = udpif->backer->rt_support.ufid;
555 atomic_init(&udpif->enable_ufid, enable_ufid);
556 dpif_enable_upcall(udpif->dpif);
557
558 ovs_barrier_init(&udpif->reval_barrier, udpif->n_revalidators);
559 ovs_barrier_init(&udpif->pause_barrier, udpif->n_revalidators + 1);
560 udpif->reval_exit = false;
561 udpif->pause = false;
562 udpif->revalidators = xzalloc(udpif->n_revalidators
563 * sizeof *udpif->revalidators);
564 for (i = 0; i < udpif->n_revalidators; i++) {
565 struct revalidator *revalidator = &udpif->revalidators[i];
566
567 revalidator->udpif = udpif;
568 revalidator->thread = ovs_thread_create(
569 "revalidator", udpif_revalidator, revalidator);
570 }
571 }
572 }
573
574 /* Pauses all revalidators. Should only be called by the main thread.
575 * When function returns, all revalidators are paused and will proceed
576 * only after udpif_resume_revalidators() is called. */
577 static void
578 udpif_pause_revalidators(struct udpif *udpif)
579 {
580 if (udpif->backer->recv_set_enable) {
581 latch_set(&udpif->pause_latch);
582 ovs_barrier_block(&udpif->pause_barrier);
583 }
584 }
585
586 /* Resumes the pausing of revalidators. Should only be called by the
587 * main thread. */
588 static void
589 udpif_resume_revalidators(struct udpif *udpif)
590 {
591 if (udpif->backer->recv_set_enable) {
592 latch_poll(&udpif->pause_latch);
593 ovs_barrier_block(&udpif->pause_barrier);
594 }
595 }
596
597 /* Tells 'udpif' how many threads it should use to handle upcalls.
598 * 'n_handlers' and 'n_revalidators' can never be zero. 'udpif''s
599 * datapath handle must have packet reception enabled before starting
600 * threads. */
601 void
602 udpif_set_threads(struct udpif *udpif, size_t n_handlers,
603 size_t n_revalidators)
604 {
605 ovs_assert(udpif);
606 ovs_assert(n_handlers && n_revalidators);
607
608 ovsrcu_quiesce_start();
609 if (udpif->n_handlers != n_handlers
610 || udpif->n_revalidators != n_revalidators) {
611 udpif_stop_threads(udpif);
612 }
613
614 if (!udpif->handlers && !udpif->revalidators) {
615 int error;
616
617 error = dpif_handlers_set(udpif->dpif, n_handlers);
618 if (error) {
619 VLOG_ERR("failed to configure handlers in dpif %s: %s",
620 dpif_name(udpif->dpif), ovs_strerror(error));
621 return;
622 }
623
624 udpif_start_threads(udpif, n_handlers, n_revalidators);
625 }
626 ovsrcu_quiesce_end();
627 }
628
629 /* Waits for all ongoing upcall translations to complete. This ensures that
630 * there are no transient references to any removed ofprotos (or other
631 * objects). In particular, this should be called after an ofproto is removed
632 * (e.g. via xlate_remove_ofproto()) but before it is destroyed. */
633 void
634 udpif_synchronize(struct udpif *udpif)
635 {
636 /* This is stronger than necessary. It would be sufficient to ensure
637 * (somehow) that each handler and revalidator thread had passed through
638 * its main loop once. */
639 size_t n_handlers = udpif->n_handlers;
640 size_t n_revalidators = udpif->n_revalidators;
641
642 ovsrcu_quiesce_start();
643 udpif_stop_threads(udpif);
644 udpif_start_threads(udpif, n_handlers, n_revalidators);
645 ovsrcu_quiesce_end();
646 }
647
648 /* Notifies 'udpif' that something changed which may render previous
649 * xlate_actions() results invalid. */
650 void
651 udpif_revalidate(struct udpif *udpif)
652 {
653 seq_change(udpif->reval_seq);
654 }
655
656 /* Returns a seq which increments every time 'udpif' pulls stats from the
657 * datapath. Callers can use this to get a sense of when might be a good time
658 * to do periodic work which relies on relatively up to date statistics. */
659 struct seq *
660 udpif_dump_seq(struct udpif *udpif)
661 {
662 return udpif->dump_seq;
663 }
664
665 void
666 udpif_get_memory_usage(struct udpif *udpif, struct simap *usage)
667 {
668 size_t i;
669
670 simap_increase(usage, "handlers", udpif->n_handlers);
671
672 simap_increase(usage, "revalidators", udpif->n_revalidators);
673 for (i = 0; i < N_UMAPS; i++) {
674 simap_increase(usage, "udpif keys", cmap_count(&udpif->ukeys[i].cmap));
675 }
676 }
677
678 /* Remove flows from a single datapath. */
679 void
680 udpif_flush(struct udpif *udpif)
681 {
682 size_t n_handlers, n_revalidators;
683
684 n_handlers = udpif->n_handlers;
685 n_revalidators = udpif->n_revalidators;
686
687 ovsrcu_quiesce_start();
688
689 udpif_stop_threads(udpif);
690 dpif_flow_flush(udpif->dpif);
691 udpif_start_threads(udpif, n_handlers, n_revalidators);
692
693 ovsrcu_quiesce_end();
694 }
695
696 /* Removes all flows from all datapaths. */
697 static void
698 udpif_flush_all_datapaths(void)
699 {
700 struct udpif *udpif;
701
702 LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
703 udpif_flush(udpif);
704 }
705 }
706
707 static bool
708 udpif_use_ufid(struct udpif *udpif)
709 {
710 bool enable;
711
712 atomic_read_relaxed(&enable_ufid, &enable);
713 return enable && udpif->backer->rt_support.ufid;
714 }
715
716 \f
717 static unsigned long
718 udpif_get_n_flows(struct udpif *udpif)
719 {
720 long long int time, now;
721 unsigned long flow_count;
722
723 now = time_msec();
724 atomic_read_relaxed(&udpif->n_flows_timestamp, &time);
725 if (time < now - 100 && !ovs_mutex_trylock(&udpif->n_flows_mutex)) {
726 struct dpif_dp_stats stats;
727
728 atomic_store_relaxed(&udpif->n_flows_timestamp, now);
729 dpif_get_dp_stats(udpif->dpif, &stats);
730 flow_count = stats.n_flows;
731 atomic_store_relaxed(&udpif->n_flows, flow_count);
732 ovs_mutex_unlock(&udpif->n_flows_mutex);
733 } else {
734 atomic_read_relaxed(&udpif->n_flows, &flow_count);
735 }
736 return flow_count;
737 }
738
739 /* The upcall handler thread tries to read a batch of UPCALL_MAX_BATCH
740 * upcalls from dpif, processes the batch and installs corresponding flows
741 * in dpif. */
742 static void *
743 udpif_upcall_handler(void *arg)
744 {
745 struct handler *handler = arg;
746 struct udpif *udpif = handler->udpif;
747
748 while (!latch_is_set(&handler->udpif->exit_latch)) {
749 if (recv_upcalls(handler)) {
750 poll_immediate_wake();
751 } else {
752 dpif_recv_wait(udpif->dpif, handler->handler_id);
753 latch_wait(&udpif->exit_latch);
754 }
755 poll_block();
756 }
757
758 return NULL;
759 }
760
761 static size_t
762 recv_upcalls(struct handler *handler)
763 {
764 struct udpif *udpif = handler->udpif;
765 uint64_t recv_stubs[UPCALL_MAX_BATCH][512 / 8];
766 struct ofpbuf recv_bufs[UPCALL_MAX_BATCH];
767 struct dpif_upcall dupcalls[UPCALL_MAX_BATCH];
768 struct upcall upcalls[UPCALL_MAX_BATCH];
769 struct flow flows[UPCALL_MAX_BATCH];
770 size_t n_upcalls, i;
771
772 n_upcalls = 0;
773 while (n_upcalls < UPCALL_MAX_BATCH) {
774 struct ofpbuf *recv_buf = &recv_bufs[n_upcalls];
775 struct dpif_upcall *dupcall = &dupcalls[n_upcalls];
776 struct upcall *upcall = &upcalls[n_upcalls];
777 struct flow *flow = &flows[n_upcalls];
778 unsigned int mru;
779 int error;
780
781 ofpbuf_use_stub(recv_buf, recv_stubs[n_upcalls],
782 sizeof recv_stubs[n_upcalls]);
783 if (dpif_recv(udpif->dpif, handler->handler_id, dupcall, recv_buf)) {
784 ofpbuf_uninit(recv_buf);
785 break;
786 }
787
788 if (odp_flow_key_to_flow(dupcall->key, dupcall->key_len, flow)
789 == ODP_FIT_ERROR) {
790 goto free_dupcall;
791 }
792
793 if (dupcall->mru) {
794 mru = nl_attr_get_u16(dupcall->mru);
795 } else {
796 mru = 0;
797 }
798
799 error = upcall_receive(upcall, udpif->backer, &dupcall->packet,
800 dupcall->type, dupcall->userdata, flow, mru,
801 &dupcall->ufid, PMD_ID_NULL);
802 if (error) {
803 if (error == ENODEV) {
804 /* Received packet on datapath port for which we couldn't
805 * associate an ofproto. This can happen if a port is removed
806 * while traffic is being received. Print a rate-limited
807 * message in case it happens frequently. */
808 dpif_flow_put(udpif->dpif, DPIF_FP_CREATE, dupcall->key,
809 dupcall->key_len, NULL, 0, NULL, 0,
810 &dupcall->ufid, PMD_ID_NULL, NULL);
811 VLOG_INFO_RL(&rl, "received packet on unassociated datapath "
812 "port %"PRIu32, flow->in_port.odp_port);
813 }
814 goto free_dupcall;
815 }
816
817 upcall->key = dupcall->key;
818 upcall->key_len = dupcall->key_len;
819 upcall->ufid = &dupcall->ufid;
820
821 upcall->out_tun_key = dupcall->out_tun_key;
822 upcall->actions = dupcall->actions;
823
824 pkt_metadata_from_flow(&dupcall->packet.md, flow);
825 flow_extract(&dupcall->packet, flow);
826
827 error = process_upcall(udpif, upcall,
828 &upcall->odp_actions, &upcall->wc);
829 if (error) {
830 goto cleanup;
831 }
832
833 n_upcalls++;
834 continue;
835
836 cleanup:
837 upcall_uninit(upcall);
838 free_dupcall:
839 dp_packet_uninit(&dupcall->packet);
840 ofpbuf_uninit(recv_buf);
841 }
842
843 if (n_upcalls) {
844 handle_upcalls(handler->udpif, upcalls, n_upcalls);
845 for (i = 0; i < n_upcalls; i++) {
846 dp_packet_uninit(&dupcalls[i].packet);
847 ofpbuf_uninit(&recv_bufs[i]);
848 upcall_uninit(&upcalls[i]);
849 }
850 }
851
852 return n_upcalls;
853 }
854
855 static void *
856 udpif_revalidator(void *arg)
857 {
858 /* Used by all revalidators. */
859 struct revalidator *revalidator = arg;
860 struct udpif *udpif = revalidator->udpif;
861 bool leader = revalidator == &udpif->revalidators[0];
862
863 /* Used only by the leader. */
864 long long int start_time = 0;
865 uint64_t last_reval_seq = 0;
866 size_t n_flows = 0;
867
868 revalidator->id = ovsthread_id_self();
869 for (;;) {
870 if (leader) {
871 uint64_t reval_seq;
872
873 recirc_run(); /* Recirculation cleanup. */
874
875 reval_seq = seq_read(udpif->reval_seq);
876 last_reval_seq = reval_seq;
877
878 n_flows = udpif_get_n_flows(udpif);
879 udpif->max_n_flows = MAX(n_flows, udpif->max_n_flows);
880 udpif->avg_n_flows = (udpif->avg_n_flows + n_flows) / 2;
881
882 /* Only the leader checks the pause latch to prevent a race where
883 * some threads think it's false and proceed to block on
884 * reval_barrier and others think it's true and block indefinitely
885 * on the pause_barrier */
886 udpif->pause = latch_is_set(&udpif->pause_latch);
887
888 /* Only the leader checks the exit latch to prevent a race where
889 * some threads think it's true and exit and others think it's
890 * false and block indefinitely on the reval_barrier */
891 udpif->reval_exit = latch_is_set(&udpif->exit_latch);
892
893 start_time = time_msec();
894 if (!udpif->reval_exit) {
895 bool terse_dump;
896
897 terse_dump = udpif_use_ufid(udpif);
898 udpif->dump = dpif_flow_dump_create(udpif->dpif, terse_dump,
899 NULL);
900 }
901 }
902
903 /* Wait for the leader to start the flow dump. */
904 ovs_barrier_block(&udpif->reval_barrier);
905 if (udpif->pause) {
906 revalidator_pause(revalidator);
907 }
908
909 if (udpif->reval_exit) {
910 break;
911 }
912 revalidate(revalidator);
913
914 /* Wait for all flows to have been dumped before we garbage collect. */
915 ovs_barrier_block(&udpif->reval_barrier);
916 revalidator_sweep(revalidator);
917
918 /* Wait for all revalidators to finish garbage collection. */
919 ovs_barrier_block(&udpif->reval_barrier);
920
921 if (leader) {
922 unsigned int flow_limit;
923 long long int duration;
924
925 atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
926
927 dpif_flow_dump_destroy(udpif->dump);
928 seq_change(udpif->dump_seq);
929
930 duration = MAX(time_msec() - start_time, 1);
931 udpif->dump_duration = duration;
932 if (duration > 2000) {
933 flow_limit /= duration / 1000;
934 } else if (duration > 1300) {
935 flow_limit = flow_limit * 3 / 4;
936 } else if (duration < 1000 && n_flows > 2000
937 && flow_limit < n_flows * 1000 / duration) {
938 flow_limit += 1000;
939 }
940 flow_limit = MIN(ofproto_flow_limit, MAX(flow_limit, 1000));
941 atomic_store_relaxed(&udpif->flow_limit, flow_limit);
942
943 if (duration > 2000) {
944 VLOG_INFO("Spent an unreasonably long %lldms dumping flows",
945 duration);
946 }
947
948 poll_timer_wait_until(start_time + MIN(ofproto_max_idle, 500));
949 seq_wait(udpif->reval_seq, last_reval_seq);
950 latch_wait(&udpif->exit_latch);
951 latch_wait(&udpif->pause_latch);
952 poll_block();
953
954 if (!latch_is_set(&udpif->pause_latch) &&
955 !latch_is_set(&udpif->exit_latch)) {
956 long long int now = time_msec();
957 /* Block again if we are woken up within 5ms of the last start
958 * time. */
959 start_time += 5;
960
961 if (now < start_time) {
962 poll_timer_wait_until(start_time);
963 latch_wait(&udpif->exit_latch);
964 latch_wait(&udpif->pause_latch);
965 poll_block();
966 }
967 }
968 }
969 }
970
971 return NULL;
972 }
973 \f
974 static enum upcall_type
975 classify_upcall(enum dpif_upcall_type type, const struct nlattr *userdata,
976 struct user_action_cookie *cookie)
977 {
978 /* First look at the upcall type. */
979 switch (type) {
980 case DPIF_UC_ACTION:
981 break;
982
983 case DPIF_UC_MISS:
984 return MISS_UPCALL;
985
986 case DPIF_N_UC_TYPES:
987 default:
988 VLOG_WARN_RL(&rl, "upcall has unexpected type %"PRIu32, type);
989 return BAD_UPCALL;
990 }
991
992 /* "action" upcalls need a closer look. */
993 if (!userdata) {
994 VLOG_WARN_RL(&rl, "action upcall missing cookie");
995 return BAD_UPCALL;
996 }
997
998 size_t userdata_len = nl_attr_get_size(userdata);
999 if (userdata_len != sizeof *cookie) {
1000 VLOG_WARN_RL(&rl, "action upcall cookie has unexpected size %"PRIuSIZE,
1001 userdata_len);
1002 return BAD_UPCALL;
1003 }
1004 memcpy(cookie, nl_attr_get(userdata), sizeof *cookie);
1005 if (cookie->type == USER_ACTION_COOKIE_SFLOW) {
1006 return SFLOW_UPCALL;
1007 } else if (cookie->type == USER_ACTION_COOKIE_SLOW_PATH) {
1008 return SLOW_PATH_UPCALL;
1009 } else if (cookie->type == USER_ACTION_COOKIE_FLOW_SAMPLE) {
1010 return FLOW_SAMPLE_UPCALL;
1011 } else if (cookie->type == USER_ACTION_COOKIE_IPFIX) {
1012 return IPFIX_UPCALL;
1013 } else {
1014 VLOG_WARN_RL(&rl, "invalid user cookie of type %"PRIu16
1015 " and size %"PRIuSIZE, cookie->type, userdata_len);
1016 return BAD_UPCALL;
1017 }
1018 }
1019
1020 /* Calculates slow path actions for 'xout'. 'buf' must statically be
1021 * initialized with at least 128 bytes of space. */
1022 static void
1023 compose_slow_path(struct udpif *udpif, struct xlate_out *xout,
1024 const struct flow *flow, odp_port_t odp_in_port,
1025 struct ofpbuf *buf, uint32_t slowpath_meter_id,
1026 uint32_t controller_meter_id)
1027 {
1028 struct user_action_cookie cookie;
1029 odp_port_t port;
1030 uint32_t pid;
1031
1032 cookie.type = USER_ACTION_COOKIE_SLOW_PATH;
1033 cookie.slow_path.unused = 0;
1034 cookie.slow_path.reason = xout->slow;
1035
1036 port = xout->slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP | SLOW_STP)
1037 ? ODPP_NONE
1038 : odp_in_port;
1039 pid = dpif_port_get_pid(udpif->dpif, port, flow_hash_5tuple(flow, 0));
1040
1041 size_t offset;
1042 size_t ac_offset;
1043 uint32_t meter_id = xout->slow & SLOW_CONTROLLER ? controller_meter_id
1044 : slowpath_meter_id;
1045
1046 if (meter_id != UINT32_MAX) {
1047 /* If slowpath meter is configured, generate clone(meter, userspace)
1048 * action. */
1049 offset = nl_msg_start_nested(buf, OVS_ACTION_ATTR_SAMPLE);
1050 nl_msg_put_u32(buf, OVS_SAMPLE_ATTR_PROBABILITY, UINT32_MAX);
1051 ac_offset = nl_msg_start_nested(buf, OVS_SAMPLE_ATTR_ACTIONS);
1052 nl_msg_put_u32(buf, OVS_ACTION_ATTR_METER, meter_id);
1053 }
1054
1055 odp_put_userspace_action(pid, &cookie, sizeof cookie,
1056 ODPP_NONE, false, buf);
1057
1058 if (meter_id != UINT32_MAX) {
1059 nl_msg_end_nested(buf, ac_offset);
1060 nl_msg_end_nested(buf, offset);
1061 }
1062 }
1063
1064 /* If there is no error, the upcall must be destroyed with upcall_uninit()
1065 * before quiescing, as the referred objects are guaranteed to exist only
1066 * until the calling thread quiesces. Otherwise, do not call upcall_uninit()
1067 * since the 'upcall->put_actions' remains uninitialized. */
1068 static int
1069 upcall_receive(struct upcall *upcall, const struct dpif_backer *backer,
1070 const struct dp_packet *packet, enum dpif_upcall_type type,
1071 const struct nlattr *userdata, const struct flow *flow,
1072 const unsigned int mru,
1073 const ovs_u128 *ufid, const unsigned pmd_id)
1074 {
1075 int error;
1076
1077 upcall->type = classify_upcall(type, userdata, &upcall->cookie);
1078 if (upcall->type == BAD_UPCALL) {
1079 return EAGAIN;
1080 }
1081
1082 error = xlate_lookup(backer, flow, &upcall->ofproto, &upcall->ipfix,
1083 &upcall->sflow, NULL, &upcall->in_port);
1084 if (error) {
1085 return error;
1086 }
1087
1088 upcall->recirc = NULL;
1089 upcall->have_recirc_ref = false;
1090 upcall->flow = flow;
1091 upcall->packet = packet;
1092 upcall->ufid = ufid;
1093 upcall->pmd_id = pmd_id;
1094 ofpbuf_use_stub(&upcall->odp_actions, upcall->odp_actions_stub,
1095 sizeof upcall->odp_actions_stub);
1096 ofpbuf_init(&upcall->put_actions, 0);
1097
1098 upcall->xout_initialized = false;
1099 upcall->ukey_persists = false;
1100
1101 upcall->ukey = NULL;
1102 upcall->key = NULL;
1103 upcall->key_len = 0;
1104 upcall->mru = mru;
1105
1106 upcall->out_tun_key = NULL;
1107 upcall->actions = NULL;
1108
1109 return 0;
1110 }
1111
1112 static void
1113 upcall_xlate(struct udpif *udpif, struct upcall *upcall,
1114 struct ofpbuf *odp_actions, struct flow_wildcards *wc)
1115 {
1116 struct dpif_flow_stats stats;
1117 struct xlate_in xin;
1118
1119 stats.n_packets = 1;
1120 stats.n_bytes = dp_packet_size(upcall->packet);
1121 stats.used = time_msec();
1122 stats.tcp_flags = ntohs(upcall->flow->tcp_flags);
1123
1124 xlate_in_init(&xin, upcall->ofproto,
1125 ofproto_dpif_get_tables_version(upcall->ofproto),
1126 upcall->flow, upcall->in_port, NULL,
1127 stats.tcp_flags, upcall->packet, wc, odp_actions);
1128
1129 if (upcall->type == MISS_UPCALL) {
1130 xin.resubmit_stats = &stats;
1131
1132 if (xin.frozen_state) {
1133 /* We may install a datapath flow only if we get a reference to the
1134 * recirculation context (otherwise we could have recirculation
1135 * upcalls using recirculation ID for which no context can be
1136 * found). We may still execute the flow's actions even if we
1137 * don't install the flow. */
1138 upcall->recirc = recirc_id_node_from_state(xin.frozen_state);
1139 upcall->have_recirc_ref = recirc_id_node_try_ref_rcu(upcall->recirc);
1140 }
1141 } else {
1142 /* For non-miss upcalls, we are either executing actions (one of which
1143 * is an userspace action) for an upcall, in which case the stats have
1144 * already been taken care of, or there's a flow in the datapath which
1145 * this packet was accounted to. Presumably the revalidators will deal
1146 * with pushing its stats eventually. */
1147 }
1148
1149 upcall->dump_seq = seq_read(udpif->dump_seq);
1150 upcall->reval_seq = seq_read(udpif->reval_seq);
1151
1152 xlate_actions(&xin, &upcall->xout);
1153 if (wc) {
1154 /* Convert the input port wildcard from OFP to ODP format. There's no
1155 * real way to do this for arbitrary bitmasks since the numbering spaces
1156 * aren't the same. However, flow translation always exact matches the
1157 * whole thing, so we can do the same here. */
1158 WC_MASK_FIELD(wc, in_port.odp_port);
1159 }
1160
1161 upcall->xout_initialized = true;
1162
1163 if (!upcall->xout.slow) {
1164 ofpbuf_use_const(&upcall->put_actions,
1165 odp_actions->data, odp_actions->size);
1166 } else {
1167 uint32_t smid = upcall->ofproto->up.slowpath_meter_id;
1168 uint32_t cmid = upcall->ofproto->up.controller_meter_id;
1169 /* upcall->put_actions already initialized by upcall_receive(). */
1170 compose_slow_path(udpif, &upcall->xout, upcall->flow,
1171 upcall->flow->in_port.odp_port,
1172 &upcall->put_actions, smid, cmid);
1173 }
1174
1175 /* This function is also called for slow-pathed flows. As we are only
1176 * going to create new datapath flows for actual datapath misses, there is
1177 * no point in creating a ukey otherwise. */
1178 if (upcall->type == MISS_UPCALL) {
1179 upcall->ukey = ukey_create_from_upcall(upcall, wc);
1180 }
1181 }
1182
1183 static void
1184 upcall_uninit(struct upcall *upcall)
1185 {
1186 if (upcall) {
1187 if (upcall->xout_initialized) {
1188 xlate_out_uninit(&upcall->xout);
1189 }
1190 ofpbuf_uninit(&upcall->odp_actions);
1191 ofpbuf_uninit(&upcall->put_actions);
1192 if (upcall->ukey) {
1193 if (!upcall->ukey_persists) {
1194 ukey_delete__(upcall->ukey);
1195 }
1196 } else if (upcall->have_recirc_ref) {
1197 /* The reference was transferred to the ukey if one was created. */
1198 recirc_id_node_unref(upcall->recirc);
1199 }
1200 }
1201 }
1202
1203 /* If there are less flows than the limit, and this is a miss upcall which
1204 *
1205 * - Has no recirc_id, OR
1206 * - Has a recirc_id and we can get a reference on the recirc ctx,
1207 *
1208 * Then we should install the flow (true). Otherwise, return false. */
1209 static bool
1210 should_install_flow(struct udpif *udpif, struct upcall *upcall)
1211 {
1212 unsigned int flow_limit;
1213
1214 if (upcall->type != MISS_UPCALL) {
1215 return false;
1216 } else if (upcall->recirc && !upcall->have_recirc_ref) {
1217 VLOG_DBG_RL(&rl, "upcall: no reference for recirc flow");
1218 return false;
1219 }
1220
1221 atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
1222 if (udpif_get_n_flows(udpif) >= flow_limit) {
1223 VLOG_WARN_RL(&rl, "upcall: datapath flow limit reached");
1224 return false;
1225 }
1226
1227 return true;
1228 }
1229
1230 static int
1231 upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufid,
1232 unsigned pmd_id, enum dpif_upcall_type type,
1233 const struct nlattr *userdata, struct ofpbuf *actions,
1234 struct flow_wildcards *wc, struct ofpbuf *put_actions, void *aux)
1235 {
1236 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1237 struct udpif *udpif = aux;
1238 struct upcall upcall;
1239 bool megaflow;
1240 int error;
1241
1242 atomic_read_relaxed(&enable_megaflows, &megaflow);
1243
1244 error = upcall_receive(&upcall, udpif->backer, packet, type, userdata,
1245 flow, 0, ufid, pmd_id);
1246 if (error) {
1247 return error;
1248 }
1249
1250 error = process_upcall(udpif, &upcall, actions, wc);
1251 if (error) {
1252 goto out;
1253 }
1254
1255 if (upcall.xout.slow && put_actions) {
1256 ofpbuf_put(put_actions, upcall.put_actions.data,
1257 upcall.put_actions.size);
1258 }
1259
1260 if (OVS_UNLIKELY(!megaflow && wc)) {
1261 flow_wildcards_init_for_packet(wc, flow);
1262 }
1263
1264 if (!should_install_flow(udpif, &upcall)) {
1265 error = ENOSPC;
1266 goto out;
1267 }
1268
1269 if (upcall.ukey && !ukey_install(udpif, upcall.ukey)) {
1270 VLOG_WARN_RL(&rl, "upcall_cb failure: ukey installation fails");
1271 error = ENOSPC;
1272 }
1273 out:
1274 if (!error) {
1275 upcall.ukey_persists = true;
1276 }
1277 upcall_uninit(&upcall);
1278 return error;
1279 }
1280
1281 static size_t
1282 dpif_get_actions(struct udpif *udpif, struct upcall *upcall,
1283 const struct nlattr **actions)
1284 {
1285 size_t actions_len = 0;
1286
1287 if (upcall->actions) {
1288 /* Actions were passed up from datapath. */
1289 *actions = nl_attr_get(upcall->actions);
1290 actions_len = nl_attr_get_size(upcall->actions);
1291 }
1292
1293 if (actions_len == 0) {
1294 /* Lookup actions in userspace cache. */
1295 struct udpif_key *ukey = ukey_lookup(udpif, upcall->ufid,
1296 upcall->pmd_id);
1297 if (ukey) {
1298 ukey_get_actions(ukey, actions, &actions_len);
1299 }
1300 }
1301
1302 return actions_len;
1303 }
1304
1305 static size_t
1306 dpif_read_actions(struct udpif *udpif, struct upcall *upcall,
1307 const struct flow *flow, enum upcall_type type,
1308 void *upcall_data)
1309 {
1310 const struct nlattr *actions = NULL;
1311 size_t actions_len = dpif_get_actions(udpif, upcall, &actions);
1312
1313 if (!actions || !actions_len) {
1314 return 0;
1315 }
1316
1317 switch (type) {
1318 case SFLOW_UPCALL:
1319 dpif_sflow_read_actions(flow, actions, actions_len, upcall_data);
1320 break;
1321 case FLOW_SAMPLE_UPCALL:
1322 case IPFIX_UPCALL:
1323 dpif_ipfix_read_actions(flow, actions, actions_len, upcall_data);
1324 break;
1325 case BAD_UPCALL:
1326 case MISS_UPCALL:
1327 case SLOW_PATH_UPCALL:
1328 default:
1329 break;
1330 }
1331
1332 return actions_len;
1333 }
1334
1335 static int
1336 process_upcall(struct udpif *udpif, struct upcall *upcall,
1337 struct ofpbuf *odp_actions, struct flow_wildcards *wc)
1338 {
1339 const struct dp_packet *packet = upcall->packet;
1340 const struct flow *flow = upcall->flow;
1341 size_t actions_len = 0;
1342
1343 switch (upcall->type) {
1344 case MISS_UPCALL:
1345 case SLOW_PATH_UPCALL:
1346 upcall_xlate(udpif, upcall, odp_actions, wc);
1347 return 0;
1348
1349 case SFLOW_UPCALL:
1350 if (upcall->sflow) {
1351 struct dpif_sflow_actions sflow_actions;
1352
1353 memset(&sflow_actions, 0, sizeof sflow_actions);
1354
1355 actions_len = dpif_read_actions(udpif, upcall, flow,
1356 upcall->type, &sflow_actions);
1357 dpif_sflow_received(upcall->sflow, packet, flow,
1358 flow->in_port.odp_port, &upcall->cookie,
1359 actions_len > 0 ? &sflow_actions : NULL);
1360 }
1361 break;
1362
1363 case IPFIX_UPCALL:
1364 if (upcall->ipfix) {
1365 struct flow_tnl output_tunnel_key;
1366 struct dpif_ipfix_actions ipfix_actions;
1367
1368 memset(&ipfix_actions, 0, sizeof ipfix_actions);
1369
1370 if (upcall->out_tun_key) {
1371 odp_tun_key_from_attr(upcall->out_tun_key, &output_tunnel_key);
1372 }
1373
1374 actions_len = dpif_read_actions(udpif, upcall, flow,
1375 upcall->type, &ipfix_actions);
1376 dpif_ipfix_bridge_sample(upcall->ipfix, packet, flow,
1377 flow->in_port.odp_port,
1378 upcall->cookie.ipfix.output_odp_port,
1379 upcall->out_tun_key ?
1380 &output_tunnel_key : NULL,
1381 actions_len > 0 ? &ipfix_actions: NULL);
1382 }
1383 break;
1384
1385 case FLOW_SAMPLE_UPCALL:
1386 if (upcall->ipfix) {
1387 struct flow_tnl output_tunnel_key;
1388 struct dpif_ipfix_actions ipfix_actions;
1389
1390 memset(&ipfix_actions, 0, sizeof ipfix_actions);
1391
1392 if (upcall->out_tun_key) {
1393 odp_tun_key_from_attr(upcall->out_tun_key, &output_tunnel_key);
1394 }
1395
1396 actions_len = dpif_read_actions(udpif, upcall, flow,
1397 upcall->type, &ipfix_actions);
1398 /* The flow reflects exactly the contents of the packet.
1399 * Sample the packet using it. */
1400 dpif_ipfix_flow_sample(upcall->ipfix, packet, flow,
1401 &upcall->cookie, flow->in_port.odp_port,
1402 upcall->out_tun_key ?
1403 &output_tunnel_key : NULL,
1404 actions_len > 0 ? &ipfix_actions: NULL);
1405 }
1406 break;
1407
1408 case BAD_UPCALL:
1409 break;
1410 }
1411
1412 return EAGAIN;
1413 }
1414
1415 static void
1416 handle_upcalls(struct udpif *udpif, struct upcall *upcalls,
1417 size_t n_upcalls)
1418 {
1419 struct dpif_op *opsp[UPCALL_MAX_BATCH * 2];
1420 struct ukey_op ops[UPCALL_MAX_BATCH * 2];
1421 size_t n_ops, n_opsp, i;
1422
1423 /* Handle the packets individually in order of arrival.
1424 *
1425 * - For SLOW_CFM, SLOW_LACP, SLOW_STP, SLOW_BFD, and SLOW_LLDP,
1426 * translation is what processes received packets for these
1427 * protocols.
1428 *
1429 * - For SLOW_CONTROLLER, translation sends the packet to the OpenFlow
1430 * controller.
1431 *
1432 * - For SLOW_ACTION, translation executes the actions directly.
1433 *
1434 * The loop fills 'ops' with an array of operations to execute in the
1435 * datapath. */
1436 n_ops = 0;
1437 for (i = 0; i < n_upcalls; i++) {
1438 struct upcall *upcall = &upcalls[i];
1439 const struct dp_packet *packet = upcall->packet;
1440 struct ukey_op *op;
1441
1442 if (should_install_flow(udpif, upcall)) {
1443 struct udpif_key *ukey = upcall->ukey;
1444
1445 if (ukey_install(udpif, ukey)) {
1446 upcall->ukey_persists = true;
1447 put_op_init(&ops[n_ops++], ukey, DPIF_FP_CREATE);
1448 }
1449 }
1450
1451 if (upcall->odp_actions.size) {
1452 op = &ops[n_ops++];
1453 op->ukey = NULL;
1454 op->dop.type = DPIF_OP_EXECUTE;
1455 op->dop.u.execute.packet = CONST_CAST(struct dp_packet *, packet);
1456 op->dop.u.execute.flow = upcall->flow;
1457 odp_key_to_dp_packet(upcall->key, upcall->key_len,
1458 op->dop.u.execute.packet);
1459 op->dop.u.execute.actions = upcall->odp_actions.data;
1460 op->dop.u.execute.actions_len = upcall->odp_actions.size;
1461 op->dop.u.execute.needs_help = (upcall->xout.slow & SLOW_ACTION) != 0;
1462 op->dop.u.execute.probe = false;
1463 op->dop.u.execute.mtu = upcall->mru;
1464 }
1465 }
1466
1467 /* Execute batch. */
1468 n_opsp = 0;
1469 for (i = 0; i < n_ops; i++) {
1470 opsp[n_opsp++] = &ops[i].dop;
1471 }
1472 dpif_operate(udpif->dpif, opsp, n_opsp);
1473 for (i = 0; i < n_ops; i++) {
1474 struct udpif_key *ukey = ops[i].ukey;
1475
1476 if (ukey) {
1477 ovs_mutex_lock(&ukey->mutex);
1478 if (ops[i].dop.error) {
1479 transition_ukey(ukey, UKEY_EVICTED);
1480 } else if (ukey->state < UKEY_OPERATIONAL) {
1481 transition_ukey(ukey, UKEY_OPERATIONAL);
1482 }
1483 ovs_mutex_unlock(&ukey->mutex);
1484 }
1485 }
1486 }
1487
1488 static uint32_t
1489 get_ukey_hash(const ovs_u128 *ufid, const unsigned pmd_id)
1490 {
1491 return hash_2words(ufid->u32[0], pmd_id);
1492 }
1493
1494 static struct udpif_key *
1495 ukey_lookup(struct udpif *udpif, const ovs_u128 *ufid, const unsigned pmd_id)
1496 {
1497 struct udpif_key *ukey;
1498 int idx = get_ukey_hash(ufid, pmd_id) % N_UMAPS;
1499 struct cmap *cmap = &udpif->ukeys[idx].cmap;
1500
1501 CMAP_FOR_EACH_WITH_HASH (ukey, cmap_node,
1502 get_ukey_hash(ufid, pmd_id), cmap) {
1503 if (ovs_u128_equals(ukey->ufid, *ufid)) {
1504 return ukey;
1505 }
1506 }
1507 return NULL;
1508 }
1509
1510 /* Provides safe lockless access of RCU protected 'ukey->actions'. Callers may
1511 * alternatively access the field directly if they take 'ukey->mutex'. */
1512 static void
1513 ukey_get_actions(struct udpif_key *ukey, const struct nlattr **actions, size_t *size)
1514 {
1515 const struct ofpbuf *buf = ovsrcu_get(struct ofpbuf *, &ukey->actions);
1516 *actions = buf->data;
1517 *size = buf->size;
1518 }
1519
1520 static void
1521 ukey_set_actions(struct udpif_key *ukey, const struct ofpbuf *actions)
1522 {
1523 ovsrcu_postpone(ofpbuf_delete,
1524 ovsrcu_get_protected(struct ofpbuf *, &ukey->actions));
1525 ovsrcu_set(&ukey->actions, ofpbuf_clone(actions));
1526 }
1527
1528 static struct udpif_key *
1529 ukey_create__(const struct nlattr *key, size_t key_len,
1530 const struct nlattr *mask, size_t mask_len,
1531 bool ufid_present, const ovs_u128 *ufid,
1532 const unsigned pmd_id, const struct ofpbuf *actions,
1533 uint64_t dump_seq, uint64_t reval_seq, long long int used,
1534 uint32_t key_recirc_id, struct xlate_out *xout)
1535 OVS_NO_THREAD_SAFETY_ANALYSIS
1536 {
1537 struct udpif_key *ukey = xmalloc(sizeof *ukey);
1538
1539 memcpy(&ukey->keybuf, key, key_len);
1540 ukey->key = &ukey->keybuf.nla;
1541 ukey->key_len = key_len;
1542 memcpy(&ukey->maskbuf, mask, mask_len);
1543 ukey->mask = &ukey->maskbuf.nla;
1544 ukey->mask_len = mask_len;
1545 ukey->ufid_present = ufid_present;
1546 ukey->ufid = *ufid;
1547 ukey->pmd_id = pmd_id;
1548 ukey->hash = get_ukey_hash(&ukey->ufid, pmd_id);
1549
1550 ovsrcu_init(&ukey->actions, NULL);
1551 ukey_set_actions(ukey, actions);
1552
1553 ovs_mutex_init(&ukey->mutex);
1554 ukey->dump_seq = dump_seq;
1555 ukey->reval_seq = reval_seq;
1556 ukey->state = UKEY_CREATED;
1557 ukey->state_thread = ovsthread_id_self();
1558 ukey->state_where = OVS_SOURCE_LOCATOR;
1559 ukey->created = time_msec();
1560 memset(&ukey->stats, 0, sizeof ukey->stats);
1561 ukey->stats.used = used;
1562 ukey->xcache = NULL;
1563
1564 ukey->key_recirc_id = key_recirc_id;
1565 recirc_refs_init(&ukey->recircs);
1566 if (xout) {
1567 /* Take ownership of the action recirc id references. */
1568 recirc_refs_swap(&ukey->recircs, &xout->recircs);
1569 }
1570
1571 return ukey;
1572 }
1573
1574 static struct udpif_key *
1575 ukey_create_from_upcall(struct upcall *upcall, struct flow_wildcards *wc)
1576 {
1577 struct odputil_keybuf keystub, maskstub;
1578 struct ofpbuf keybuf, maskbuf;
1579 bool megaflow;
1580 struct odp_flow_key_parms odp_parms = {
1581 .flow = upcall->flow,
1582 .mask = wc ? &wc->masks : NULL,
1583 };
1584
1585 odp_parms.support = upcall->ofproto->backer->rt_support.odp;
1586 if (upcall->key_len) {
1587 ofpbuf_use_const(&keybuf, upcall->key, upcall->key_len);
1588 } else {
1589 /* dpif-netdev doesn't provide a netlink-formatted flow key in the
1590 * upcall, so convert the upcall's flow here. */
1591 ofpbuf_use_stack(&keybuf, &keystub, sizeof keystub);
1592 odp_flow_key_from_flow(&odp_parms, &keybuf);
1593 }
1594
1595 atomic_read_relaxed(&enable_megaflows, &megaflow);
1596 ofpbuf_use_stack(&maskbuf, &maskstub, sizeof maskstub);
1597 if (megaflow && wc) {
1598 odp_parms.key_buf = &keybuf;
1599 odp_flow_key_from_mask(&odp_parms, &maskbuf);
1600 }
1601
1602 return ukey_create__(keybuf.data, keybuf.size, maskbuf.data, maskbuf.size,
1603 true, upcall->ufid, upcall->pmd_id,
1604 &upcall->put_actions, upcall->dump_seq,
1605 upcall->reval_seq, 0,
1606 upcall->have_recirc_ref ? upcall->recirc->id : 0,
1607 &upcall->xout);
1608 }
1609
1610 static int
1611 ukey_create_from_dpif_flow(const struct udpif *udpif,
1612 const struct dpif_flow *flow,
1613 struct udpif_key **ukey)
1614 {
1615 struct dpif_flow full_flow;
1616 struct ofpbuf actions;
1617 uint64_t dump_seq, reval_seq;
1618 uint64_t stub[DPIF_FLOW_BUFSIZE / 8];
1619 const struct nlattr *a;
1620 unsigned int left;
1621
1622 if (!flow->key_len || !flow->actions_len) {
1623 struct ofpbuf buf;
1624 int err;
1625
1626 /* If the key or actions were not provided by the datapath, fetch the
1627 * full flow. */
1628 ofpbuf_use_stack(&buf, &stub, sizeof stub);
1629 err = dpif_flow_get(udpif->dpif, flow->key, flow->key_len,
1630 flow->ufid_present ? &flow->ufid : NULL,
1631 flow->pmd_id, &buf, &full_flow);
1632 if (err) {
1633 return err;
1634 }
1635 flow = &full_flow;
1636 }
1637
1638 /* Check the flow actions for recirculation action. As recirculation
1639 * relies on OVS userspace internal state, we need to delete all old
1640 * datapath flows with either a non-zero recirc_id in the key, or any
1641 * recirculation actions upon OVS restart. */
1642 NL_ATTR_FOR_EACH (a, left, flow->key, flow->key_len) {
1643 if (nl_attr_type(a) == OVS_KEY_ATTR_RECIRC_ID
1644 && nl_attr_get_u32(a) != 0) {
1645 return EINVAL;
1646 }
1647 }
1648 NL_ATTR_FOR_EACH (a, left, flow->actions, flow->actions_len) {
1649 if (nl_attr_type(a) == OVS_ACTION_ATTR_RECIRC) {
1650 return EINVAL;
1651 }
1652 }
1653
1654 dump_seq = seq_read(udpif->dump_seq);
1655 reval_seq = seq_read(udpif->reval_seq) - 1; /* Ensure revalidation. */
1656 ofpbuf_use_const(&actions, &flow->actions, flow->actions_len);
1657 *ukey = ukey_create__(flow->key, flow->key_len,
1658 flow->mask, flow->mask_len, flow->ufid_present,
1659 &flow->ufid, flow->pmd_id, &actions, dump_seq,
1660 reval_seq, flow->stats.used, 0, NULL);
1661
1662 return 0;
1663 }
1664
1665 static bool
1666 try_ukey_replace(struct umap *umap, struct udpif_key *old_ukey,
1667 struct udpif_key *new_ukey)
1668 OVS_REQUIRES(umap->mutex)
1669 OVS_TRY_LOCK(true, new_ukey->mutex)
1670 {
1671 bool replaced = false;
1672
1673 if (!ovs_mutex_trylock(&old_ukey->mutex)) {
1674 if (old_ukey->state == UKEY_EVICTED) {
1675 /* The flow was deleted during the current revalidator dump,
1676 * but its ukey won't be fully cleaned up until the sweep phase.
1677 * In the mean time, we are receiving upcalls for this traffic.
1678 * Expedite the (new) flow install by replacing the ukey. */
1679 ovs_mutex_lock(&new_ukey->mutex);
1680 cmap_replace(&umap->cmap, &old_ukey->cmap_node,
1681 &new_ukey->cmap_node, new_ukey->hash);
1682 ovsrcu_postpone(ukey_delete__, old_ukey);
1683 transition_ukey(old_ukey, UKEY_DELETED);
1684 transition_ukey(new_ukey, UKEY_VISIBLE);
1685 replaced = true;
1686 }
1687 ovs_mutex_unlock(&old_ukey->mutex);
1688 }
1689
1690 if (replaced) {
1691 COVERAGE_INC(upcall_ukey_replace);
1692 } else {
1693 COVERAGE_INC(handler_duplicate_upcall);
1694 }
1695 return replaced;
1696 }
1697
1698 /* Attempts to insert a ukey into the shared ukey maps.
1699 *
1700 * On success, returns true, installs the ukey and returns it in a locked
1701 * state. Otherwise, returns false. */
1702 static bool
1703 ukey_install__(struct udpif *udpif, struct udpif_key *new_ukey)
1704 OVS_TRY_LOCK(true, new_ukey->mutex)
1705 {
1706 struct umap *umap;
1707 struct udpif_key *old_ukey;
1708 uint32_t idx;
1709 bool locked = false;
1710
1711 idx = new_ukey->hash % N_UMAPS;
1712 umap = &udpif->ukeys[idx];
1713 ovs_mutex_lock(&umap->mutex);
1714 old_ukey = ukey_lookup(udpif, &new_ukey->ufid, new_ukey->pmd_id);
1715 if (old_ukey) {
1716 /* Uncommon case: A ukey is already installed with the same UFID. */
1717 if (old_ukey->key_len == new_ukey->key_len
1718 && !memcmp(old_ukey->key, new_ukey->key, new_ukey->key_len)) {
1719 locked = try_ukey_replace(umap, old_ukey, new_ukey);
1720 } else {
1721 struct ds ds = DS_EMPTY_INITIALIZER;
1722
1723 odp_format_ufid(&old_ukey->ufid, &ds);
1724 ds_put_cstr(&ds, " ");
1725 odp_flow_key_format(old_ukey->key, old_ukey->key_len, &ds);
1726 ds_put_cstr(&ds, "\n");
1727 odp_format_ufid(&new_ukey->ufid, &ds);
1728 ds_put_cstr(&ds, " ");
1729 odp_flow_key_format(new_ukey->key, new_ukey->key_len, &ds);
1730
1731 VLOG_WARN_RL(&rl, "Conflicting ukey for flows:\n%s", ds_cstr(&ds));
1732 ds_destroy(&ds);
1733 }
1734 } else {
1735 ovs_mutex_lock(&new_ukey->mutex);
1736 cmap_insert(&umap->cmap, &new_ukey->cmap_node, new_ukey->hash);
1737 transition_ukey(new_ukey, UKEY_VISIBLE);
1738 locked = true;
1739 }
1740 ovs_mutex_unlock(&umap->mutex);
1741
1742 return locked;
1743 }
1744
1745 static void
1746 transition_ukey_at(struct udpif_key *ukey, enum ukey_state dst,
1747 const char *where)
1748 OVS_REQUIRES(ukey->mutex)
1749 {
1750 if (dst < ukey->state) {
1751 VLOG_ABORT("Invalid ukey transition %d->%d (last transitioned from "
1752 "thread %u at %s)", ukey->state, dst, ukey->state_thread,
1753 ukey->state_where);
1754 }
1755 if (ukey->state == dst && dst == UKEY_OPERATIONAL) {
1756 return;
1757 }
1758
1759 /* Valid state transitions:
1760 * UKEY_CREATED -> UKEY_VISIBLE
1761 * Ukey is now visible in the umap.
1762 * UKEY_VISIBLE -> UKEY_OPERATIONAL
1763 * A handler has installed the flow, and the flow is in the datapath.
1764 * UKEY_VISIBLE -> UKEY_EVICTING
1765 * A handler installs the flow, then revalidator sweeps the ukey before
1766 * the flow is dumped. Most likely the flow was installed; start trying
1767 * to delete it.
1768 * UKEY_VISIBLE -> UKEY_EVICTED
1769 * A handler attempts to install the flow, but the datapath rejects it.
1770 * Consider that the datapath has already destroyed it.
1771 * UKEY_OPERATIONAL -> UKEY_EVICTING
1772 * A revalidator decides to evict the datapath flow.
1773 * UKEY_EVICTING -> UKEY_EVICTED
1774 * A revalidator has evicted the datapath flow.
1775 * UKEY_EVICTED -> UKEY_DELETED
1776 * A revalidator has removed the ukey from the umap and is deleting it.
1777 */
1778 if (ukey->state == dst - 1 || (ukey->state == UKEY_VISIBLE &&
1779 dst < UKEY_DELETED)) {
1780 ukey->state = dst;
1781 } else {
1782 struct ds ds = DS_EMPTY_INITIALIZER;
1783
1784 odp_format_ufid(&ukey->ufid, &ds);
1785 VLOG_WARN_RL(&rl, "Invalid state transition for ukey %s: %d -> %d",
1786 ds_cstr(&ds), ukey->state, dst);
1787 ds_destroy(&ds);
1788 }
1789 ukey->state_thread = ovsthread_id_self();
1790 ukey->state_where = where;
1791 }
1792
1793 static bool
1794 ukey_install(struct udpif *udpif, struct udpif_key *ukey)
1795 {
1796 bool installed;
1797
1798 installed = ukey_install__(udpif, ukey);
1799 if (installed) {
1800 ovs_mutex_unlock(&ukey->mutex);
1801 }
1802
1803 return installed;
1804 }
1805
1806 /* Searches for a ukey in 'udpif->ukeys' that matches 'flow' and attempts to
1807 * lock the ukey. If the ukey does not exist, create it.
1808 *
1809 * Returns 0 on success, setting *result to the matching ukey and returning it
1810 * in a locked state. Otherwise, returns an errno and clears *result. EBUSY
1811 * indicates that another thread is handling this flow. Other errors indicate
1812 * an unexpected condition creating a new ukey.
1813 *
1814 * *error is an output parameter provided to appease the threadsafety analyser,
1815 * and its value matches the return value. */
1816 static int
1817 ukey_acquire(struct udpif *udpif, const struct dpif_flow *flow,
1818 struct udpif_key **result, int *error)
1819 OVS_TRY_LOCK(0, (*result)->mutex)
1820 {
1821 struct udpif_key *ukey;
1822 int retval;
1823
1824 ukey = ukey_lookup(udpif, &flow->ufid, flow->pmd_id);
1825 if (ukey) {
1826 retval = ovs_mutex_trylock(&ukey->mutex);
1827 } else {
1828 /* Usually we try to avoid installing flows from revalidator threads,
1829 * because locking on a umap may cause handler threads to block.
1830 * However there are certain cases, like when ovs-vswitchd is
1831 * restarted, where it is desirable to handle flows that exist in the
1832 * datapath gracefully (ie, don't just clear the datapath). */
1833 bool install;
1834
1835 retval = ukey_create_from_dpif_flow(udpif, flow, &ukey);
1836 if (retval) {
1837 goto done;
1838 }
1839 install = ukey_install__(udpif, ukey);
1840 if (install) {
1841 retval = 0;
1842 } else {
1843 ukey_delete__(ukey);
1844 retval = EBUSY;
1845 }
1846 }
1847
1848 done:
1849 *error = retval;
1850 if (retval) {
1851 *result = NULL;
1852 } else {
1853 *result = ukey;
1854 }
1855 return retval;
1856 }
1857
1858 static void
1859 ukey_delete__(struct udpif_key *ukey)
1860 OVS_NO_THREAD_SAFETY_ANALYSIS
1861 {
1862 if (ukey) {
1863 if (ukey->key_recirc_id) {
1864 recirc_free_id(ukey->key_recirc_id);
1865 }
1866 recirc_refs_unref(&ukey->recircs);
1867 xlate_cache_delete(ukey->xcache);
1868 ofpbuf_delete(ovsrcu_get(struct ofpbuf *, &ukey->actions));
1869 ovs_mutex_destroy(&ukey->mutex);
1870 free(ukey);
1871 }
1872 }
1873
1874 static void
1875 ukey_delete(struct umap *umap, struct udpif_key *ukey)
1876 OVS_REQUIRES(umap->mutex)
1877 {
1878 ovs_mutex_lock(&ukey->mutex);
1879 if (ukey->state < UKEY_DELETED) {
1880 cmap_remove(&umap->cmap, &ukey->cmap_node, ukey->hash);
1881 ovsrcu_postpone(ukey_delete__, ukey);
1882 transition_ukey(ukey, UKEY_DELETED);
1883 }
1884 ovs_mutex_unlock(&ukey->mutex);
1885 }
1886
1887 static bool
1888 should_revalidate(const struct udpif *udpif, uint64_t packets,
1889 long long int used)
1890 {
1891 long long int metric, now, duration;
1892
1893 if (!used) {
1894 /* Always revalidate the first time a flow is dumped. */
1895 return true;
1896 }
1897
1898 if (udpif->dump_duration < 200) {
1899 /* We are likely to handle full revalidation for the flows. */
1900 return true;
1901 }
1902
1903 /* Calculate the mean time between seeing these packets. If this
1904 * exceeds the threshold, then delete the flow rather than performing
1905 * costly revalidation for flows that aren't being hit frequently.
1906 *
1907 * This is targeted at situations where the dump_duration is high (~1s),
1908 * and revalidation is triggered by a call to udpif_revalidate(). In
1909 * these situations, revalidation of all flows causes fluctuations in the
1910 * flow_limit due to the interaction with the dump_duration and max_idle.
1911 * This tends to result in deletion of low-throughput flows anyway, so
1912 * skip the revalidation and just delete those flows. */
1913 packets = MAX(packets, 1);
1914 now = MAX(used, time_msec());
1915 duration = now - used;
1916 metric = duration / packets;
1917
1918 if (metric < 200) {
1919 /* The flow is receiving more than ~5pps, so keep it. */
1920 return true;
1921 }
1922 return false;
1923 }
1924
1925 struct reval_context {
1926 /* Optional output parameters */
1927 struct flow_wildcards *wc;
1928 struct ofpbuf *odp_actions;
1929 struct netflow **netflow;
1930 struct xlate_cache *xcache;
1931
1932 /* Required output parameters */
1933 struct xlate_out xout;
1934 struct flow flow;
1935 };
1936
1937 /* Translates 'key' into a flow, populating 'ctx' as it goes along.
1938 *
1939 * Returns 0 on success, otherwise a positive errno value.
1940 *
1941 * The caller is responsible for uninitializing ctx->xout on success.
1942 */
1943 static int
1944 xlate_key(struct udpif *udpif, const struct nlattr *key, unsigned int len,
1945 const struct dpif_flow_stats *push, struct reval_context *ctx)
1946 {
1947 struct ofproto_dpif *ofproto;
1948 ofp_port_t ofp_in_port;
1949 struct xlate_in xin;
1950 int error;
1951
1952 if (odp_flow_key_to_flow(key, len, &ctx->flow) == ODP_FIT_ERROR) {
1953 return EINVAL;
1954 }
1955
1956 error = xlate_lookup(udpif->backer, &ctx->flow, &ofproto, NULL, NULL,
1957 ctx->netflow, &ofp_in_port);
1958 if (error) {
1959 return error;
1960 }
1961
1962 xlate_in_init(&xin, ofproto, ofproto_dpif_get_tables_version(ofproto),
1963 &ctx->flow, ofp_in_port, NULL, push->tcp_flags,
1964 NULL, ctx->wc, ctx->odp_actions);
1965 if (push->n_packets) {
1966 xin.resubmit_stats = push;
1967 xin.allow_side_effects = true;
1968 }
1969 xin.xcache = ctx->xcache;
1970 xlate_actions(&xin, &ctx->xout);
1971
1972 return 0;
1973 }
1974
1975 static int
1976 xlate_ukey(struct udpif *udpif, const struct udpif_key *ukey,
1977 uint16_t tcp_flags, struct reval_context *ctx)
1978 {
1979 struct dpif_flow_stats push = {
1980 .tcp_flags = tcp_flags,
1981 };
1982 return xlate_key(udpif, ukey->key, ukey->key_len, &push, ctx);
1983 }
1984
1985 static int
1986 populate_xcache(struct udpif *udpif, struct udpif_key *ukey,
1987 uint16_t tcp_flags)
1988 OVS_REQUIRES(ukey->mutex)
1989 {
1990 struct reval_context ctx = {
1991 .odp_actions = NULL,
1992 .netflow = NULL,
1993 .wc = NULL,
1994 };
1995 int error;
1996
1997 ovs_assert(!ukey->xcache);
1998 ukey->xcache = ctx.xcache = xlate_cache_new();
1999 error = xlate_ukey(udpif, ukey, tcp_flags, &ctx);
2000 if (error) {
2001 return error;
2002 }
2003 xlate_out_uninit(&ctx.xout);
2004
2005 return 0;
2006 }
2007
2008 static enum reval_result
2009 revalidate_ukey__(struct udpif *udpif, const struct udpif_key *ukey,
2010 uint16_t tcp_flags, struct ofpbuf *odp_actions,
2011 struct recirc_refs *recircs, struct xlate_cache *xcache)
2012 {
2013 struct xlate_out *xoutp;
2014 struct netflow *netflow;
2015 struct flow_wildcards dp_mask, wc;
2016 enum reval_result result;
2017 struct reval_context ctx = {
2018 .odp_actions = odp_actions,
2019 .netflow = &netflow,
2020 .xcache = xcache,
2021 .wc = &wc,
2022 };
2023
2024 result = UKEY_DELETE;
2025 xoutp = NULL;
2026 netflow = NULL;
2027
2028 if (xlate_ukey(udpif, ukey, tcp_flags, &ctx)) {
2029 goto exit;
2030 }
2031 xoutp = &ctx.xout;
2032
2033 if (xoutp->avoid_caching) {
2034 goto exit;
2035 }
2036
2037 if (xoutp->slow) {
2038 struct ofproto_dpif *ofproto;
2039 ofproto = xlate_lookup_ofproto(udpif->backer, &ctx.flow, NULL);
2040 uint32_t smid = ofproto ? ofproto->up.slowpath_meter_id : UINT32_MAX;
2041 uint32_t cmid = ofproto ? ofproto->up.controller_meter_id : UINT32_MAX;
2042
2043 ofpbuf_clear(odp_actions);
2044 compose_slow_path(udpif, xoutp, &ctx.flow, ctx.flow.in_port.odp_port,
2045 odp_actions, smid, cmid);
2046 }
2047
2048 if (odp_flow_key_to_mask(ukey->mask, ukey->mask_len, &dp_mask, &ctx.flow)
2049 == ODP_FIT_ERROR) {
2050 goto exit;
2051 }
2052
2053 /* Do not modify if any bit is wildcarded by the installed datapath flow,
2054 * but not the newly revalidated wildcard mask (wc), i.e., if revalidation
2055 * tells that the datapath flow is now too generic and must be narrowed
2056 * down. Note that we do not know if the datapath has ignored any of the
2057 * wildcarded bits, so we may be overtly conservative here. */
2058 if (flow_wildcards_has_extra(&dp_mask, ctx.wc)) {
2059 goto exit;
2060 }
2061
2062 if (!ofpbuf_equal(odp_actions,
2063 ovsrcu_get(struct ofpbuf *, &ukey->actions))) {
2064 /* The datapath mask was OK, but the actions seem to have changed.
2065 * Let's modify it in place. */
2066 result = UKEY_MODIFY;
2067 /* Transfer recirc action ID references to the caller. */
2068 recirc_refs_swap(recircs, &xoutp->recircs);
2069 goto exit;
2070 }
2071
2072 result = UKEY_KEEP;
2073
2074 exit:
2075 if (netflow && result == UKEY_DELETE) {
2076 netflow_flow_clear(netflow, &ctx.flow);
2077 }
2078 xlate_out_uninit(xoutp);
2079 return result;
2080 }
2081
2082 /* Verifies that the datapath actions of 'ukey' are still correct, and pushes
2083 * 'stats' for it.
2084 *
2085 * Returns a recommended action for 'ukey', options include:
2086 * UKEY_DELETE The ukey should be deleted.
2087 * UKEY_KEEP The ukey is fine as is.
2088 * UKEY_MODIFY The ukey's actions should be changed but is otherwise
2089 * fine. Callers should change the actions to those found
2090 * in the caller supplied 'odp_actions' buffer. The
2091 * recirculation references can be found in 'recircs' and
2092 * must be handled by the caller.
2093 *
2094 * If the result is UKEY_MODIFY, then references to all recirc_ids used by the
2095 * new flow will be held within 'recircs' (which may be none).
2096 *
2097 * The caller is responsible for both initializing 'recircs' prior this call,
2098 * and ensuring any references are eventually freed.
2099 */
2100 static enum reval_result
2101 revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey,
2102 const struct dpif_flow_stats *stats,
2103 struct ofpbuf *odp_actions, uint64_t reval_seq,
2104 struct recirc_refs *recircs)
2105 OVS_REQUIRES(ukey->mutex)
2106 {
2107 bool need_revalidate = ukey->reval_seq != reval_seq;
2108 enum reval_result result = UKEY_DELETE;
2109 struct dpif_flow_stats push;
2110
2111 ofpbuf_clear(odp_actions);
2112
2113 push.used = stats->used;
2114 push.tcp_flags = stats->tcp_flags;
2115 push.n_packets = (stats->n_packets > ukey->stats.n_packets
2116 ? stats->n_packets - ukey->stats.n_packets
2117 : 0);
2118 push.n_bytes = (stats->n_bytes > ukey->stats.n_bytes
2119 ? stats->n_bytes - ukey->stats.n_bytes
2120 : 0);
2121
2122 if (need_revalidate) {
2123 if (should_revalidate(udpif, push.n_packets, ukey->stats.used)) {
2124 if (!ukey->xcache) {
2125 ukey->xcache = xlate_cache_new();
2126 } else {
2127 xlate_cache_clear(ukey->xcache);
2128 }
2129 result = revalidate_ukey__(udpif, ukey, push.tcp_flags,
2130 odp_actions, recircs, ukey->xcache);
2131 } /* else delete; too expensive to revalidate */
2132 } else if (!push.n_packets || ukey->xcache
2133 || !populate_xcache(udpif, ukey, push.tcp_flags)) {
2134 result = UKEY_KEEP;
2135 }
2136
2137 /* Stats for deleted flows will be attributed upon flow deletion. Skip. */
2138 if (result != UKEY_DELETE) {
2139 xlate_push_stats(ukey->xcache, &push);
2140 ukey->stats = *stats;
2141 ukey->reval_seq = reval_seq;
2142 }
2143
2144 return result;
2145 }
2146
2147 static void
2148 delete_op_init__(struct udpif *udpif, struct ukey_op *op,
2149 const struct dpif_flow *flow)
2150 {
2151 op->ukey = NULL;
2152 op->dop.type = DPIF_OP_FLOW_DEL;
2153 op->dop.u.flow_del.key = flow->key;
2154 op->dop.u.flow_del.key_len = flow->key_len;
2155 op->dop.u.flow_del.ufid = flow->ufid_present ? &flow->ufid : NULL;
2156 op->dop.u.flow_del.pmd_id = flow->pmd_id;
2157 op->dop.u.flow_del.stats = &op->stats;
2158 op->dop.u.flow_del.terse = udpif_use_ufid(udpif);
2159 }
2160
2161 static void
2162 delete_op_init(struct udpif *udpif, struct ukey_op *op, struct udpif_key *ukey)
2163 {
2164 op->ukey = ukey;
2165 op->dop.type = DPIF_OP_FLOW_DEL;
2166 op->dop.u.flow_del.key = ukey->key;
2167 op->dop.u.flow_del.key_len = ukey->key_len;
2168 op->dop.u.flow_del.ufid = ukey->ufid_present ? &ukey->ufid : NULL;
2169 op->dop.u.flow_del.pmd_id = ukey->pmd_id;
2170 op->dop.u.flow_del.stats = &op->stats;
2171 op->dop.u.flow_del.terse = udpif_use_ufid(udpif);
2172 }
2173
2174 static void
2175 put_op_init(struct ukey_op *op, struct udpif_key *ukey,
2176 enum dpif_flow_put_flags flags)
2177 {
2178 op->ukey = ukey;
2179 op->dop.type = DPIF_OP_FLOW_PUT;
2180 op->dop.u.flow_put.flags = flags;
2181 op->dop.u.flow_put.key = ukey->key;
2182 op->dop.u.flow_put.key_len = ukey->key_len;
2183 op->dop.u.flow_put.mask = ukey->mask;
2184 op->dop.u.flow_put.mask_len = ukey->mask_len;
2185 op->dop.u.flow_put.ufid = ukey->ufid_present ? &ukey->ufid : NULL;
2186 op->dop.u.flow_put.pmd_id = ukey->pmd_id;
2187 op->dop.u.flow_put.stats = NULL;
2188 ukey_get_actions(ukey, &op->dop.u.flow_put.actions,
2189 &op->dop.u.flow_put.actions_len);
2190 }
2191
2192 /* Executes datapath operations 'ops' and attributes stats retrieved from the
2193 * datapath as part of those operations. */
2194 static void
2195 push_dp_ops(struct udpif *udpif, struct ukey_op *ops, size_t n_ops)
2196 {
2197 struct dpif_op *opsp[REVALIDATE_MAX_BATCH];
2198 size_t i;
2199
2200 ovs_assert(n_ops <= REVALIDATE_MAX_BATCH);
2201 for (i = 0; i < n_ops; i++) {
2202 opsp[i] = &ops[i].dop;
2203 }
2204 dpif_operate(udpif->dpif, opsp, n_ops);
2205
2206 for (i = 0; i < n_ops; i++) {
2207 struct ukey_op *op = &ops[i];
2208 struct dpif_flow_stats *push, *stats, push_buf;
2209
2210 stats = op->dop.u.flow_del.stats;
2211 push = &push_buf;
2212
2213 if (op->dop.type != DPIF_OP_FLOW_DEL) {
2214 /* Only deleted flows need their stats pushed. */
2215 continue;
2216 }
2217
2218 if (op->dop.error) {
2219 /* flow_del error, 'stats' is unusable. */
2220 if (op->ukey) {
2221 ovs_mutex_lock(&op->ukey->mutex);
2222 transition_ukey(op->ukey, UKEY_EVICTED);
2223 ovs_mutex_unlock(&op->ukey->mutex);
2224 }
2225 continue;
2226 }
2227
2228 if (op->ukey) {
2229 ovs_mutex_lock(&op->ukey->mutex);
2230 transition_ukey(op->ukey, UKEY_EVICTED);
2231 push->used = MAX(stats->used, op->ukey->stats.used);
2232 push->tcp_flags = stats->tcp_flags | op->ukey->stats.tcp_flags;
2233 push->n_packets = stats->n_packets - op->ukey->stats.n_packets;
2234 push->n_bytes = stats->n_bytes - op->ukey->stats.n_bytes;
2235 ovs_mutex_unlock(&op->ukey->mutex);
2236 } else {
2237 push = stats;
2238 }
2239
2240 if (push->n_packets || netflow_exists()) {
2241 const struct nlattr *key = op->dop.u.flow_del.key;
2242 size_t key_len = op->dop.u.flow_del.key_len;
2243 struct netflow *netflow;
2244 struct reval_context ctx = {
2245 .netflow = &netflow,
2246 };
2247 int error;
2248
2249 if (op->ukey) {
2250 ovs_mutex_lock(&op->ukey->mutex);
2251 if (op->ukey->xcache) {
2252 xlate_push_stats(op->ukey->xcache, push);
2253 ovs_mutex_unlock(&op->ukey->mutex);
2254 continue;
2255 }
2256 ovs_mutex_unlock(&op->ukey->mutex);
2257 key = op->ukey->key;
2258 key_len = op->ukey->key_len;
2259 }
2260
2261 error = xlate_key(udpif, key, key_len, push, &ctx);
2262 if (error) {
2263 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2264
2265 VLOG_WARN_RL(&rl, "xlate_key failed (%s)!",
2266 ovs_strerror(error));
2267 } else {
2268 xlate_out_uninit(&ctx.xout);
2269 if (netflow) {
2270 netflow_flow_clear(netflow, &ctx.flow);
2271 }
2272 }
2273 }
2274 }
2275 }
2276
2277 /* Executes datapath operations 'ops', attributes stats retrieved from the
2278 * datapath, and deletes ukeys corresponding to deleted flows. */
2279 static void
2280 push_ukey_ops(struct udpif *udpif, struct umap *umap,
2281 struct ukey_op *ops, size_t n_ops)
2282 {
2283 int i;
2284
2285 push_dp_ops(udpif, ops, n_ops);
2286 ovs_mutex_lock(&umap->mutex);
2287 for (i = 0; i < n_ops; i++) {
2288 if (ops[i].dop.type == DPIF_OP_FLOW_DEL) {
2289 ukey_delete(umap, ops[i].ukey);
2290 }
2291 }
2292 ovs_mutex_unlock(&umap->mutex);
2293 }
2294
2295 static void
2296 log_unexpected_flow(const struct dpif_flow *flow, int error)
2297 {
2298 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 60);
2299 struct ds ds = DS_EMPTY_INITIALIZER;
2300
2301 ds_put_format(&ds, "Failed to acquire udpif_key corresponding to "
2302 "unexpected flow (%s): ", ovs_strerror(error));
2303 odp_format_ufid(&flow->ufid, &ds);
2304 VLOG_WARN_RL(&rl, "%s", ds_cstr(&ds));
2305 ds_destroy(&ds);
2306 }
2307
2308 static void
2309 reval_op_init(struct ukey_op *op, enum reval_result result,
2310 struct udpif *udpif, struct udpif_key *ukey,
2311 struct recirc_refs *recircs, struct ofpbuf *odp_actions)
2312 OVS_REQUIRES(ukey->mutex)
2313 {
2314 if (result == UKEY_DELETE) {
2315 delete_op_init(udpif, op, ukey);
2316 transition_ukey(ukey, UKEY_EVICTING);
2317 } else if (result == UKEY_MODIFY) {
2318 /* Store the new recircs. */
2319 recirc_refs_swap(&ukey->recircs, recircs);
2320 /* Release old recircs. */
2321 recirc_refs_unref(recircs);
2322 /* ukey->key_recirc_id remains, as the key is the same as before. */
2323
2324 ukey_set_actions(ukey, odp_actions);
2325 put_op_init(op, ukey, DPIF_FP_MODIFY);
2326 }
2327 }
2328
2329 static void
2330 revalidate(struct revalidator *revalidator)
2331 {
2332 uint64_t odp_actions_stub[1024 / 8];
2333 struct ofpbuf odp_actions = OFPBUF_STUB_INITIALIZER(odp_actions_stub);
2334
2335 struct udpif *udpif = revalidator->udpif;
2336 struct dpif_flow_dump_thread *dump_thread;
2337 uint64_t dump_seq, reval_seq;
2338 unsigned int flow_limit;
2339
2340 dump_seq = seq_read(udpif->dump_seq);
2341 reval_seq = seq_read(udpif->reval_seq);
2342 atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
2343 dump_thread = dpif_flow_dump_thread_create(udpif->dump);
2344 for (;;) {
2345 struct ukey_op ops[REVALIDATE_MAX_BATCH];
2346 int n_ops = 0;
2347
2348 struct dpif_flow flows[REVALIDATE_MAX_BATCH];
2349 const struct dpif_flow *f;
2350 int n_dumped;
2351
2352 long long int max_idle;
2353 long long int now;
2354 size_t n_dp_flows;
2355 bool kill_them_all;
2356
2357 n_dumped = dpif_flow_dump_next(dump_thread, flows, ARRAY_SIZE(flows));
2358 if (!n_dumped) {
2359 break;
2360 }
2361
2362 now = time_msec();
2363
2364 /* In normal operation we want to keep flows around until they have
2365 * been idle for 'ofproto_max_idle' milliseconds. However:
2366 *
2367 * - If the number of datapath flows climbs above 'flow_limit',
2368 * drop that down to 100 ms to try to bring the flows down to
2369 * the limit.
2370 *
2371 * - If the number of datapath flows climbs above twice
2372 * 'flow_limit', delete all the datapath flows as an emergency
2373 * measure. (We reassess this condition for the next batch of
2374 * datapath flows, so we will recover before all the flows are
2375 * gone.) */
2376 n_dp_flows = udpif_get_n_flows(udpif);
2377 kill_them_all = n_dp_flows > flow_limit * 2;
2378 max_idle = n_dp_flows > flow_limit ? 100 : ofproto_max_idle;
2379
2380 for (f = flows; f < &flows[n_dumped]; f++) {
2381 long long int used = f->stats.used;
2382 struct recirc_refs recircs = RECIRC_REFS_EMPTY_INITIALIZER;
2383 enum reval_result result;
2384 struct udpif_key *ukey;
2385 bool already_dumped;
2386 int error;
2387
2388 if (ukey_acquire(udpif, f, &ukey, &error)) {
2389 if (error == EBUSY) {
2390 /* Another thread is processing this flow, so don't bother
2391 * processing it.*/
2392 COVERAGE_INC(upcall_ukey_contention);
2393 } else {
2394 log_unexpected_flow(f, error);
2395 if (error != ENOENT) {
2396 delete_op_init__(udpif, &ops[n_ops++], f);
2397 }
2398 }
2399 continue;
2400 }
2401
2402 already_dumped = ukey->dump_seq == dump_seq;
2403 if (already_dumped) {
2404 /* The flow has already been handled during this flow dump
2405 * operation. Skip it. */
2406 if (ukey->xcache) {
2407 COVERAGE_INC(dumped_duplicate_flow);
2408 } else {
2409 COVERAGE_INC(dumped_new_flow);
2410 }
2411 ovs_mutex_unlock(&ukey->mutex);
2412 continue;
2413 }
2414
2415 if (ukey->state <= UKEY_OPERATIONAL) {
2416 /* The flow is now confirmed to be in the datapath. */
2417 transition_ukey(ukey, UKEY_OPERATIONAL);
2418 } else {
2419 VLOG_INFO("Unexpected ukey transition from state %d "
2420 "(last transitioned from thread %u at %s)",
2421 ukey->state, ukey->state_thread, ukey->state_where);
2422 ovs_mutex_unlock(&ukey->mutex);
2423 continue;
2424 }
2425
2426 if (!used) {
2427 used = ukey->created;
2428 }
2429 if (kill_them_all || (used && used < now - max_idle)) {
2430 result = UKEY_DELETE;
2431 } else {
2432 result = revalidate_ukey(udpif, ukey, &f->stats, &odp_actions,
2433 reval_seq, &recircs);
2434 }
2435 ukey->dump_seq = dump_seq;
2436
2437 if (result != UKEY_KEEP) {
2438 /* Takes ownership of 'recircs'. */
2439 reval_op_init(&ops[n_ops++], result, udpif, ukey, &recircs,
2440 &odp_actions);
2441 }
2442 ovs_mutex_unlock(&ukey->mutex);
2443 }
2444
2445 if (n_ops) {
2446 /* Push datapath ops but defer ukey deletion to 'sweep' phase. */
2447 push_dp_ops(udpif, ops, n_ops);
2448 }
2449 ovsrcu_quiesce();
2450 }
2451 dpif_flow_dump_thread_destroy(dump_thread);
2452 ofpbuf_uninit(&odp_actions);
2453 }
2454
2455 /* Pauses the 'revalidator', can only proceed after main thread
2456 * calls udpif_resume_revalidators(). */
2457 static void
2458 revalidator_pause(struct revalidator *revalidator)
2459 {
2460 /* The first block is for sync'ing the pause with main thread. */
2461 ovs_barrier_block(&revalidator->udpif->pause_barrier);
2462 /* The second block is for pausing until main thread resumes. */
2463 ovs_barrier_block(&revalidator->udpif->pause_barrier);
2464 }
2465
2466 static void
2467 revalidator_sweep__(struct revalidator *revalidator, bool purge)
2468 {
2469 struct udpif *udpif;
2470 uint64_t dump_seq, reval_seq;
2471 int slice;
2472
2473 udpif = revalidator->udpif;
2474 dump_seq = seq_read(udpif->dump_seq);
2475 reval_seq = seq_read(udpif->reval_seq);
2476 slice = revalidator - udpif->revalidators;
2477 ovs_assert(slice < udpif->n_revalidators);
2478
2479 for (int i = slice; i < N_UMAPS; i += udpif->n_revalidators) {
2480 uint64_t odp_actions_stub[1024 / 8];
2481 struct ofpbuf odp_actions = OFPBUF_STUB_INITIALIZER(odp_actions_stub);
2482
2483 struct ukey_op ops[REVALIDATE_MAX_BATCH];
2484 struct udpif_key *ukey;
2485 struct umap *umap = &udpif->ukeys[i];
2486 size_t n_ops = 0;
2487
2488 CMAP_FOR_EACH(ukey, cmap_node, &umap->cmap) {
2489 enum ukey_state ukey_state;
2490
2491 /* Handler threads could be holding a ukey lock while it installs a
2492 * new flow, so don't hang around waiting for access to it. */
2493 if (ovs_mutex_trylock(&ukey->mutex)) {
2494 continue;
2495 }
2496 ukey_state = ukey->state;
2497 if (ukey_state == UKEY_OPERATIONAL
2498 || (ukey_state == UKEY_VISIBLE && purge)) {
2499 struct recirc_refs recircs = RECIRC_REFS_EMPTY_INITIALIZER;
2500 bool seq_mismatch = (ukey->dump_seq != dump_seq
2501 && ukey->reval_seq != reval_seq);
2502 enum reval_result result;
2503
2504 if (purge) {
2505 result = UKEY_DELETE;
2506 } else if (!seq_mismatch) {
2507 result = UKEY_KEEP;
2508 } else {
2509 struct dpif_flow_stats stats;
2510 COVERAGE_INC(revalidate_missed_dp_flow);
2511 memset(&stats, 0, sizeof stats);
2512 result = revalidate_ukey(udpif, ukey, &stats, &odp_actions,
2513 reval_seq, &recircs);
2514 }
2515 if (result != UKEY_KEEP) {
2516 /* Clears 'recircs' if filled by revalidate_ukey(). */
2517 reval_op_init(&ops[n_ops++], result, udpif, ukey, &recircs,
2518 &odp_actions);
2519 }
2520 }
2521 ovs_mutex_unlock(&ukey->mutex);
2522
2523 if (ukey_state == UKEY_EVICTED) {
2524 /* The common flow deletion case involves deletion of the flow
2525 * during the dump phase and ukey deletion here. */
2526 ovs_mutex_lock(&umap->mutex);
2527 ukey_delete(umap, ukey);
2528 ovs_mutex_unlock(&umap->mutex);
2529 }
2530
2531 if (n_ops == REVALIDATE_MAX_BATCH) {
2532 /* Update/delete missed flows and clean up corresponding ukeys
2533 * if necessary. */
2534 push_ukey_ops(udpif, umap, ops, n_ops);
2535 n_ops = 0;
2536 }
2537 }
2538
2539 if (n_ops) {
2540 push_ukey_ops(udpif, umap, ops, n_ops);
2541 }
2542
2543 ofpbuf_uninit(&odp_actions);
2544 ovsrcu_quiesce();
2545 }
2546 }
2547
2548 static void
2549 revalidator_sweep(struct revalidator *revalidator)
2550 {
2551 revalidator_sweep__(revalidator, false);
2552 }
2553
2554 static void
2555 revalidator_purge(struct revalidator *revalidator)
2556 {
2557 revalidator_sweep__(revalidator, true);
2558 }
2559
2560 /* In reaction to dpif purge, purges all 'ukey's with same 'pmd_id'. */
2561 static void
2562 dp_purge_cb(void *aux, unsigned pmd_id)
2563 OVS_NO_THREAD_SAFETY_ANALYSIS
2564 {
2565 struct udpif *udpif = aux;
2566 size_t i;
2567
2568 udpif_pause_revalidators(udpif);
2569 for (i = 0; i < N_UMAPS; i++) {
2570 struct ukey_op ops[REVALIDATE_MAX_BATCH];
2571 struct udpif_key *ukey;
2572 struct umap *umap = &udpif->ukeys[i];
2573 size_t n_ops = 0;
2574
2575 CMAP_FOR_EACH(ukey, cmap_node, &umap->cmap) {
2576 if (ukey->pmd_id == pmd_id) {
2577 delete_op_init(udpif, &ops[n_ops++], ukey);
2578 transition_ukey(ukey, UKEY_EVICTING);
2579
2580 if (n_ops == REVALIDATE_MAX_BATCH) {
2581 push_ukey_ops(udpif, umap, ops, n_ops);
2582 n_ops = 0;
2583 }
2584 }
2585 }
2586
2587 if (n_ops) {
2588 push_ukey_ops(udpif, umap, ops, n_ops);
2589 }
2590
2591 ovsrcu_quiesce();
2592 }
2593 udpif_resume_revalidators(udpif);
2594 }
2595 \f
2596 static void
2597 upcall_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
2598 const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
2599 {
2600 struct ds ds = DS_EMPTY_INITIALIZER;
2601 struct udpif *udpif;
2602
2603 LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
2604 unsigned int flow_limit;
2605 bool ufid_enabled;
2606 size_t i;
2607
2608 atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
2609 ufid_enabled = udpif_use_ufid(udpif);
2610
2611 ds_put_format(&ds, "%s:\n", dpif_name(udpif->dpif));
2612 ds_put_format(&ds, "\tflows : (current %lu)"
2613 " (avg %u) (max %u) (limit %u)\n", udpif_get_n_flows(udpif),
2614 udpif->avg_n_flows, udpif->max_n_flows, flow_limit);
2615 ds_put_format(&ds, "\tdump duration : %lldms\n", udpif->dump_duration);
2616 ds_put_format(&ds, "\tufid enabled : ");
2617 if (ufid_enabled) {
2618 ds_put_format(&ds, "true\n");
2619 } else {
2620 ds_put_format(&ds, "false\n");
2621 }
2622 ds_put_char(&ds, '\n');
2623
2624 for (i = 0; i < n_revalidators; i++) {
2625 struct revalidator *revalidator = &udpif->revalidators[i];
2626 int j, elements = 0;
2627
2628 for (j = i; j < N_UMAPS; j += n_revalidators) {
2629 elements += cmap_count(&udpif->ukeys[j].cmap);
2630 }
2631 ds_put_format(&ds, "\t%u: (keys %d)\n", revalidator->id, elements);
2632 }
2633 }
2634
2635 unixctl_command_reply(conn, ds_cstr(&ds));
2636 ds_destroy(&ds);
2637 }
2638
2639 /* Disable using the megaflows.
2640 *
2641 * This command is only needed for advanced debugging, so it's not
2642 * documented in the man page. */
2643 static void
2644 upcall_unixctl_disable_megaflows(struct unixctl_conn *conn,
2645 int argc OVS_UNUSED,
2646 const char *argv[] OVS_UNUSED,
2647 void *aux OVS_UNUSED)
2648 {
2649 atomic_store_relaxed(&enable_megaflows, false);
2650 udpif_flush_all_datapaths();
2651 unixctl_command_reply(conn, "megaflows disabled");
2652 }
2653
2654 /* Re-enable using megaflows.
2655 *
2656 * This command is only needed for advanced debugging, so it's not
2657 * documented in the man page. */
2658 static void
2659 upcall_unixctl_enable_megaflows(struct unixctl_conn *conn,
2660 int argc OVS_UNUSED,
2661 const char *argv[] OVS_UNUSED,
2662 void *aux OVS_UNUSED)
2663 {
2664 atomic_store_relaxed(&enable_megaflows, true);
2665 udpif_flush_all_datapaths();
2666 unixctl_command_reply(conn, "megaflows enabled");
2667 }
2668
2669 /* Disable skipping flow attributes during flow dump.
2670 *
2671 * This command is only needed for advanced debugging, so it's not
2672 * documented in the man page. */
2673 static void
2674 upcall_unixctl_disable_ufid(struct unixctl_conn *conn, int argc OVS_UNUSED,
2675 const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
2676 {
2677 atomic_store_relaxed(&enable_ufid, false);
2678 unixctl_command_reply(conn, "Datapath dumping tersely using UFID disabled");
2679 }
2680
2681 /* Re-enable skipping flow attributes during flow dump.
2682 *
2683 * This command is only needed for advanced debugging, so it's not documented
2684 * in the man page. */
2685 static void
2686 upcall_unixctl_enable_ufid(struct unixctl_conn *conn, int argc OVS_UNUSED,
2687 const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
2688 {
2689 atomic_store_relaxed(&enable_ufid, true);
2690 unixctl_command_reply(conn, "Datapath dumping tersely using UFID enabled "
2691 "for supported datapaths");
2692 }
2693
2694 /* Set the flow limit.
2695 *
2696 * This command is only needed for advanced debugging, so it's not
2697 * documented in the man page. */
2698 static void
2699 upcall_unixctl_set_flow_limit(struct unixctl_conn *conn,
2700 int argc OVS_UNUSED,
2701 const char *argv[],
2702 void *aux OVS_UNUSED)
2703 {
2704 struct ds ds = DS_EMPTY_INITIALIZER;
2705 struct udpif *udpif;
2706 unsigned int flow_limit = atoi(argv[1]);
2707
2708 LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
2709 atomic_store_relaxed(&udpif->flow_limit, flow_limit);
2710 }
2711 ds_put_format(&ds, "set flow_limit to %u\n", flow_limit);
2712 unixctl_command_reply(conn, ds_cstr(&ds));
2713 ds_destroy(&ds);
2714 }
2715
2716 static void
2717 upcall_unixctl_dump_wait(struct unixctl_conn *conn,
2718 int argc OVS_UNUSED,
2719 const char *argv[] OVS_UNUSED,
2720 void *aux OVS_UNUSED)
2721 {
2722 if (ovs_list_is_singleton(&all_udpifs)) {
2723 struct udpif *udpif = NULL;
2724 size_t len;
2725
2726 udpif = OBJECT_CONTAINING(ovs_list_front(&all_udpifs), udpif, list_node);
2727 len = (udpif->n_conns + 1) * sizeof *udpif->conns;
2728 udpif->conn_seq = seq_read(udpif->dump_seq);
2729 udpif->conns = xrealloc(udpif->conns, len);
2730 udpif->conns[udpif->n_conns++] = conn;
2731 } else {
2732 unixctl_command_reply_error(conn, "can't wait on multiple udpifs.");
2733 }
2734 }
2735
2736 static void
2737 upcall_unixctl_purge(struct unixctl_conn *conn, int argc OVS_UNUSED,
2738 const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
2739 {
2740 struct udpif *udpif;
2741
2742 LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
2743 int n;
2744
2745 for (n = 0; n < udpif->n_revalidators; n++) {
2746 revalidator_purge(&udpif->revalidators[n]);
2747 }
2748 }
2749 unixctl_command_reply(conn, "");
2750 }