]>
Commit | Line | Data |
---|---|---|
e1ec7dd4 EJ |
1 | /* Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. |
2 | * | |
3 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
4 | * you may not use this file except in compliance with the License. | |
5 | * You may obtain a copy of the License at: | |
6 | * | |
7 | * http://www.apache.org/licenses/LICENSE-2.0 | |
8 | * | |
9 | * Unless required by applicable law or agreed to in writing, software | |
10 | * distributed under the License is distributed on an "AS IS" BASIS, | |
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 | * See the License for the specific language governing permissions and | |
13 | * limitations under the License. */ | |
14 | ||
15 | #include <config.h> | |
16 | #include "ofproto-dpif-upcall.h" | |
17 | ||
18 | #include <errno.h> | |
19 | #include <stdbool.h> | |
20 | #include <inttypes.h> | |
21 | ||
0fb7792a | 22 | #include "connmgr.h" |
e1ec7dd4 | 23 | #include "coverage.h" |
e1ec7dd4 | 24 | #include "dpif.h" |
e22d52ee | 25 | #include "dynamic-string.h" |
e1ec7dd4 | 26 | #include "fail-open.h" |
05067881 | 27 | #include "guarded-list.h" |
e1ec7dd4 | 28 | #include "latch.h" |
e1ec7dd4 EJ |
29 | #include "list.h" |
30 | #include "netlink.h" | |
31 | #include "ofpbuf.h" | |
10e57640 EJ |
32 | #include "ofproto-dpif-ipfix.h" |
33 | #include "ofproto-dpif-sflow.h" | |
e79a6c83 | 34 | #include "ofproto-dpif-xlate.h" |
e1ec7dd4 EJ |
35 | #include "packets.h" |
36 | #include "poll-loop.h" | |
e22d52ee EJ |
37 | #include "seq.h" |
38 | #include "unixctl.h" | |
e1ec7dd4 EJ |
39 | #include "vlog.h" |
40 | ||
41 | #define MAX_QUEUE_LENGTH 512 | |
e79a6c83 EJ |
42 | #define FLOW_MISS_MAX_BATCH 50 |
43 | #define REVALIDATE_MAX_BATCH 50 | |
e1ec7dd4 EJ |
44 | |
45 | VLOG_DEFINE_THIS_MODULE(ofproto_dpif_upcall); | |
46 | ||
10e57640 | 47 | COVERAGE_DEFINE(upcall_queue_overflow); |
e1ec7dd4 EJ |
48 | |
49 | /* A thread that processes each upcall handed to it by the dispatcher thread, | |
e79a6c83 EJ |
50 | * forwards the upcall's packet, and possibly sets up a kernel flow as a |
51 | * cache. */ | |
e1ec7dd4 EJ |
52 | struct handler { |
53 | struct udpif *udpif; /* Parent udpif. */ | |
54 | pthread_t thread; /* Thread ID. */ | |
e22d52ee | 55 | char *name; /* Thread name. */ |
e1ec7dd4 EJ |
56 | |
57 | struct ovs_mutex mutex; /* Mutex guarding the following. */ | |
58 | ||
10e57640 | 59 | /* Atomic queue of unprocessed upcalls. */ |
e1ec7dd4 EJ |
60 | struct list upcalls OVS_GUARDED; |
61 | size_t n_upcalls OVS_GUARDED; | |
62 | ||
9b32ece6 | 63 | bool need_signal; /* Only changed by the dispatcher. */ |
caf6491f | 64 | |
e1ec7dd4 EJ |
65 | pthread_cond_t wake_cond; /* Wakes 'thread' while holding |
66 | 'mutex'. */ | |
67 | }; | |
68 | ||
e79a6c83 EJ |
69 | /* A thread that processes each kernel flow handed to it by the flow_dumper |
70 | * thread, updates OpenFlow statistics, and updates or removes the kernel flow | |
71 | * as necessary. */ | |
72 | struct revalidator { | |
73 | struct udpif *udpif; /* Parent udpif. */ | |
74 | char *name; /* Thread name. */ | |
75 | ||
76 | pthread_t thread; /* Thread ID. */ | |
77 | struct hmap ukeys; /* Datapath flow keys. */ | |
78 | ||
79 | uint64_t dump_seq; | |
80 | ||
81 | struct ovs_mutex mutex; /* Mutex guarding the following. */ | |
82 | pthread_cond_t wake_cond; | |
83 | struct list udumps OVS_GUARDED; /* Unprocessed udumps. */ | |
84 | size_t n_udumps OVS_GUARDED; /* Number of unprocessed udumps. */ | |
85 | }; | |
86 | ||
e1ec7dd4 EJ |
87 | /* An upcall handler for ofproto_dpif. |
88 | * | |
e79a6c83 EJ |
89 | * udpif has two logically separate pieces: |
90 | * | |
91 | * - A "dispatcher" thread that reads upcalls from the kernel and dispatches | |
92 | * them to one of several "handler" threads (see struct handler). | |
93 | * | |
94 | * - A "flow_dumper" thread that reads the kernel flow table and dispatches | |
95 | * flows to one of several "revalidator" threads (see struct | |
96 | * revalidator). */ | |
e1ec7dd4 | 97 | struct udpif { |
e22d52ee EJ |
98 | struct list list_node; /* In all_udpifs list. */ |
99 | ||
e1ec7dd4 EJ |
100 | struct dpif *dpif; /* Datapath handle. */ |
101 | struct dpif_backer *backer; /* Opaque dpif_backer pointer. */ | |
102 | ||
103 | uint32_t secret; /* Random seed for upcall hash. */ | |
104 | ||
105 | pthread_t dispatcher; /* Dispatcher thread ID. */ | |
e79a6c83 | 106 | pthread_t flow_dumper; /* Flow dumper thread ID. */ |
e1ec7dd4 | 107 | |
10e57640 | 108 | struct handler *handlers; /* Upcall handlers. */ |
e1ec7dd4 EJ |
109 | size_t n_handlers; |
110 | ||
e79a6c83 EJ |
111 | struct revalidator *revalidators; /* Flow revalidators. */ |
112 | size_t n_revalidators; | |
113 | ||
114 | uint64_t last_reval_seq; /* 'reval_seq' at last revalidation. */ | |
115 | struct seq *reval_seq; /* Incremented to force revalidation. */ | |
116 | ||
117 | struct seq *dump_seq; /* Increments each dump iteration. */ | |
118 | ||
119 | struct latch exit_latch; /* Tells child threads to exit. */ | |
120 | ||
121 | long long int dump_duration; /* Duration of the last flow dump. */ | |
e1ec7dd4 | 122 | |
e79a6c83 EJ |
123 | /* Datapath flow statistics. */ |
124 | unsigned int max_n_flows; | |
125 | unsigned int avg_n_flows; | |
e1ec7dd4 | 126 | |
e79a6c83 EJ |
127 | /* Following fields are accessed and modified by different threads. */ |
128 | atomic_llong max_idle; /* Maximum datapath flow idle time. */ | |
129 | atomic_uint flow_limit; /* Datapath flow hard limit. */ | |
e1ec7dd4 EJ |
130 | }; |
131 | ||
10e57640 EJ |
132 | enum upcall_type { |
133 | BAD_UPCALL, /* Some kind of bug somewhere. */ | |
134 | MISS_UPCALL, /* A flow miss. */ | |
135 | SFLOW_UPCALL, /* sFlow sample. */ | |
136 | FLOW_SAMPLE_UPCALL, /* Per-flow sampling. */ | |
137 | IPFIX_UPCALL /* Per-bridge sampling. */ | |
138 | }; | |
139 | ||
140 | struct upcall { | |
141 | struct list list_node; /* For queuing upcalls. */ | |
142 | struct flow_miss *flow_miss; /* This upcall's flow_miss. */ | |
143 | ||
144 | /* Raw upcall plus data for keeping track of the memory backing it. */ | |
145 | struct dpif_upcall dpif_upcall; /* As returned by dpif_recv() */ | |
146 | struct ofpbuf upcall_buf; /* Owns some data in 'dpif_upcall'. */ | |
147 | uint64_t upcall_stub[512 / 8]; /* Buffer to reduce need for malloc(). */ | |
148 | }; | |
149 | ||
e79a6c83 EJ |
150 | /* 'udpif_key's are responsible for tracking the little bit of state udpif |
151 | * needs to do flow expiration which can't be pulled directly from the | |
152 | * datapath. They are owned, created by, maintained, and destroyed by a single | |
153 | * revalidator making them easy to efficiently handle with multiple threads. */ | |
154 | struct udpif_key { | |
155 | struct hmap_node hmap_node; /* In parent revalidator 'ukeys' map. */ | |
156 | ||
157 | struct nlattr *key; /* Datapath flow key. */ | |
158 | size_t key_len; /* Length of 'key'. */ | |
159 | ||
160 | struct dpif_flow_stats stats; /* Stats at most recent flow dump. */ | |
161 | long long int created; /* Estimation of creation time. */ | |
162 | ||
163 | bool mark; /* Used by mark and sweep GC algorithm. */ | |
164 | ||
165 | struct odputil_keybuf key_buf; /* Memory for 'key'. */ | |
166 | }; | |
167 | ||
168 | /* 'udpif_flow_dump's hold the state associated with one iteration in a flow | |
169 | * dump operation. This is created by the flow_dumper thread and handed to the | |
170 | * appropriate revalidator thread to be processed. */ | |
171 | struct udpif_flow_dump { | |
172 | struct list list_node; | |
173 | ||
174 | struct nlattr *key; /* Datapath flow key. */ | |
175 | size_t key_len; /* Length of 'key'. */ | |
176 | uint32_t key_hash; /* Hash of 'key'. */ | |
177 | ||
178 | struct odputil_keybuf mask_buf; | |
179 | struct nlattr *mask; /* Datapath mask for 'key'. */ | |
180 | size_t mask_len; /* Length of 'mask'. */ | |
181 | ||
182 | struct dpif_flow_stats stats; /* Stats pulled from the datapath. */ | |
183 | ||
184 | bool need_revalidate; /* Key needs revalidation? */ | |
185 | ||
186 | struct odputil_keybuf key_buf; | |
187 | }; | |
188 | ||
189 | /* Flow miss batching. | |
190 | * | |
191 | * Some dpifs implement operations faster when you hand them off in a batch. | |
192 | * To allow batching, "struct flow_miss" queues the dpif-related work needed | |
193 | * for a given flow. Each "struct flow_miss" corresponds to sending one or | |
194 | * more packets, plus possibly installing the flow in the dpif. */ | |
195 | struct flow_miss { | |
196 | struct hmap_node hmap_node; | |
197 | struct ofproto_dpif *ofproto; | |
198 | ||
199 | struct flow flow; | |
200 | enum odp_key_fitness key_fitness; | |
201 | const struct nlattr *key; | |
202 | size_t key_len; | |
203 | enum dpif_upcall_type upcall_type; | |
204 | struct dpif_flow_stats stats; | |
205 | odp_port_t odp_in_port; | |
206 | ||
207 | uint64_t slow_path_buf[128 / 8]; | |
208 | struct odputil_keybuf mask_buf; | |
209 | ||
210 | struct xlate_out xout; | |
211 | }; | |
212 | ||
10e57640 EJ |
213 | static void upcall_destroy(struct upcall *); |
214 | ||
e1ec7dd4 | 215 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); |
e22d52ee | 216 | static struct list all_udpifs = LIST_INITIALIZER(&all_udpifs); |
e1ec7dd4 EJ |
217 | |
218 | static void recv_upcalls(struct udpif *); | |
e79a6c83 EJ |
219 | static void handle_upcalls(struct handler *handler, struct list *upcalls); |
220 | static void *udpif_flow_dumper(void *); | |
e1ec7dd4 | 221 | static void *udpif_dispatcher(void *); |
10e57640 | 222 | static void *udpif_upcall_handler(void *); |
e79a6c83 EJ |
223 | static void *udpif_revalidator(void *); |
224 | static uint64_t udpif_get_n_flows(const struct udpif *); | |
225 | static void revalidate_udumps(struct revalidator *, struct list *udumps); | |
226 | static void revalidator_sweep(struct revalidator *); | |
e22d52ee EJ |
227 | static void upcall_unixctl_show(struct unixctl_conn *conn, int argc, |
228 | const char *argv[], void *aux); | |
e79a6c83 EJ |
229 | static void upcall_unixctl_disable_megaflows(struct unixctl_conn *, int argc, |
230 | const char *argv[], void *aux); | |
231 | static void upcall_unixctl_enable_megaflows(struct unixctl_conn *, int argc, | |
232 | const char *argv[], void *aux); | |
233 | static void ukey_delete(struct revalidator *, struct udpif_key *); | |
234 | ||
235 | static atomic_bool enable_megaflows = ATOMIC_VAR_INIT(true); | |
e1ec7dd4 EJ |
236 | |
237 | struct udpif * | |
238 | udpif_create(struct dpif_backer *backer, struct dpif *dpif) | |
239 | { | |
e22d52ee | 240 | static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; |
e1ec7dd4 EJ |
241 | struct udpif *udpif = xzalloc(sizeof *udpif); |
242 | ||
e22d52ee EJ |
243 | if (ovsthread_once_start(&once)) { |
244 | unixctl_command_register("upcall/show", "", 0, 0, upcall_unixctl_show, | |
245 | NULL); | |
e79a6c83 EJ |
246 | unixctl_command_register("upcall/disable-megaflows", "", 0, 0, |
247 | upcall_unixctl_disable_megaflows, NULL); | |
248 | unixctl_command_register("upcall/enable-megaflows", "", 0, 0, | |
249 | upcall_unixctl_enable_megaflows, NULL); | |
e22d52ee EJ |
250 | ovsthread_once_done(&once); |
251 | } | |
252 | ||
e1ec7dd4 EJ |
253 | udpif->dpif = dpif; |
254 | udpif->backer = backer; | |
e79a6c83 EJ |
255 | atomic_init(&udpif->max_idle, 5000); |
256 | atomic_init(&udpif->flow_limit, MIN(ofproto_flow_limit, 10000)); | |
e1ec7dd4 | 257 | udpif->secret = random_uint32(); |
d7285d74 | 258 | udpif->reval_seq = seq_create(); |
e79a6c83 | 259 | udpif->dump_seq = seq_create(); |
e1ec7dd4 | 260 | latch_init(&udpif->exit_latch); |
e22d52ee | 261 | list_push_back(&all_udpifs, &udpif->list_node); |
e1ec7dd4 EJ |
262 | |
263 | return udpif; | |
264 | } | |
265 | ||
266 | void | |
267 | udpif_destroy(struct udpif *udpif) | |
268 | { | |
e79a6c83 EJ |
269 | udpif_set_threads(udpif, 0, 0); |
270 | udpif_flush(); | |
e1ec7dd4 | 271 | |
e22d52ee | 272 | list_remove(&udpif->list_node); |
e1ec7dd4 | 273 | latch_destroy(&udpif->exit_latch); |
d7285d74 | 274 | seq_destroy(udpif->reval_seq); |
e79a6c83 | 275 | seq_destroy(udpif->dump_seq); |
e1ec7dd4 EJ |
276 | free(udpif); |
277 | } | |
278 | ||
6567010f | 279 | /* Tells 'udpif' how many threads it should use to handle upcalls. Disables |
e79a6c83 EJ |
280 | * all threads if 'n_handlers' and 'n_revalidators' is zero. 'udpif''s |
281 | * datapath handle must have packet reception enabled before starting threads. | |
282 | */ | |
e1ec7dd4 | 283 | void |
e79a6c83 EJ |
284 | udpif_set_threads(struct udpif *udpif, size_t n_handlers, |
285 | size_t n_revalidators) | |
e1ec7dd4 | 286 | { |
e1ec7dd4 | 287 | /* Stop the old threads (if any). */ |
e79a6c83 EJ |
288 | if (udpif->handlers && |
289 | (udpif->n_handlers != n_handlers | |
290 | || udpif->n_revalidators != n_revalidators)) { | |
e1ec7dd4 EJ |
291 | size_t i; |
292 | ||
293 | latch_set(&udpif->exit_latch); | |
294 | ||
e1ec7dd4 EJ |
295 | for (i = 0; i < udpif->n_handlers; i++) { |
296 | struct handler *handler = &udpif->handlers[i]; | |
297 | ||
298 | ovs_mutex_lock(&handler->mutex); | |
299 | xpthread_cond_signal(&handler->wake_cond); | |
300 | ovs_mutex_unlock(&handler->mutex); | |
e79a6c83 EJ |
301 | xpthread_join(handler->thread, NULL); |
302 | } | |
303 | ||
304 | for (i = 0; i < udpif->n_revalidators; i++) { | |
305 | struct revalidator *revalidator = &udpif->revalidators[i]; | |
306 | ||
307 | ovs_mutex_lock(&revalidator->mutex); | |
308 | xpthread_cond_signal(&revalidator->wake_cond); | |
309 | ovs_mutex_unlock(&revalidator->mutex); | |
310 | xpthread_join(revalidator->thread, NULL); | |
e1ec7dd4 EJ |
311 | } |
312 | ||
e79a6c83 | 313 | xpthread_join(udpif->flow_dumper, NULL); |
e1ec7dd4 | 314 | xpthread_join(udpif->dispatcher, NULL); |
e79a6c83 EJ |
315 | |
316 | for (i = 0; i < udpif->n_revalidators; i++) { | |
317 | struct revalidator *revalidator = &udpif->revalidators[i]; | |
318 | struct udpif_flow_dump *udump, *next_udump; | |
319 | struct udpif_key *ukey, *next_ukey; | |
320 | ||
321 | LIST_FOR_EACH_SAFE (udump, next_udump, list_node, | |
322 | &revalidator->udumps) { | |
323 | list_remove(&udump->list_node); | |
324 | free(udump); | |
325 | } | |
326 | ||
327 | HMAP_FOR_EACH_SAFE (ukey, next_ukey, hmap_node, | |
328 | &revalidator->ukeys) { | |
329 | ukey_delete(revalidator, ukey); | |
330 | } | |
331 | hmap_destroy(&revalidator->ukeys); | |
332 | ovs_mutex_destroy(&revalidator->mutex); | |
333 | ||
334 | free(revalidator->name); | |
335 | } | |
336 | ||
e1ec7dd4 EJ |
337 | for (i = 0; i < udpif->n_handlers; i++) { |
338 | struct handler *handler = &udpif->handlers[i]; | |
339 | struct upcall *miss, *next; | |
340 | ||
e1ec7dd4 EJ |
341 | LIST_FOR_EACH_SAFE (miss, next, list_node, &handler->upcalls) { |
342 | list_remove(&miss->list_node); | |
343 | upcall_destroy(miss); | |
344 | } | |
e1ec7dd4 EJ |
345 | ovs_mutex_destroy(&handler->mutex); |
346 | ||
347 | xpthread_cond_destroy(&handler->wake_cond); | |
e22d52ee | 348 | free(handler->name); |
e1ec7dd4 EJ |
349 | } |
350 | latch_poll(&udpif->exit_latch); | |
351 | ||
e79a6c83 EJ |
352 | free(udpif->revalidators); |
353 | udpif->revalidators = NULL; | |
354 | udpif->n_revalidators = 0; | |
355 | ||
e1ec7dd4 EJ |
356 | free(udpif->handlers); |
357 | udpif->handlers = NULL; | |
358 | udpif->n_handlers = 0; | |
359 | } | |
360 | ||
361 | /* Start new threads (if necessary). */ | |
362 | if (!udpif->handlers && n_handlers) { | |
363 | size_t i; | |
364 | ||
365 | udpif->n_handlers = n_handlers; | |
e79a6c83 EJ |
366 | udpif->n_revalidators = n_revalidators; |
367 | ||
e1ec7dd4 EJ |
368 | udpif->handlers = xzalloc(udpif->n_handlers * sizeof *udpif->handlers); |
369 | for (i = 0; i < udpif->n_handlers; i++) { | |
370 | struct handler *handler = &udpif->handlers[i]; | |
371 | ||
372 | handler->udpif = udpif; | |
373 | list_init(&handler->upcalls); | |
9b32ece6 | 374 | handler->need_signal = false; |
e1ec7dd4 | 375 | xpthread_cond_init(&handler->wake_cond, NULL); |
834d6caf | 376 | ovs_mutex_init(&handler->mutex); |
10e57640 EJ |
377 | xpthread_create(&handler->thread, NULL, udpif_upcall_handler, |
378 | handler); | |
e1ec7dd4 | 379 | } |
e1ec7dd4 | 380 | |
e79a6c83 EJ |
381 | udpif->revalidators = xzalloc(udpif->n_revalidators |
382 | * sizeof *udpif->revalidators); | |
383 | for (i = 0; i < udpif->n_revalidators; i++) { | |
384 | struct revalidator *revalidator = &udpif->revalidators[i]; | |
385 | ||
386 | revalidator->udpif = udpif; | |
387 | list_init(&revalidator->udumps); | |
388 | hmap_init(&revalidator->ukeys); | |
389 | ovs_mutex_init(&revalidator->mutex); | |
390 | xpthread_cond_init(&revalidator->wake_cond, NULL); | |
391 | xpthread_create(&revalidator->thread, NULL, udpif_revalidator, | |
392 | revalidator); | |
393 | } | |
394 | xpthread_create(&udpif->dispatcher, NULL, udpif_dispatcher, udpif); | |
395 | xpthread_create(&udpif->flow_dumper, NULL, udpif_flow_dumper, udpif); | |
e1ec7dd4 | 396 | } |
e1ec7dd4 EJ |
397 | } |
398 | ||
399 | /* Notifies 'udpif' that something changed which may render previous | |
400 | * xlate_actions() results invalid. */ | |
401 | void | |
402 | udpif_revalidate(struct udpif *udpif) | |
403 | { | |
d7285d74 | 404 | seq_change(udpif->reval_seq); |
e79a6c83 | 405 | } |
05067881 | 406 | |
e79a6c83 EJ |
407 | /* Returns a seq which increments every time 'udpif' pulls stats from the |
408 | * datapath. Callers can use this to get a sense of when might be a good time | |
409 | * to do periodic work which relies on relatively up to date statistics. */ | |
410 | struct seq * | |
411 | udpif_dump_seq(struct udpif *udpif) | |
412 | { | |
413 | return udpif->dump_seq; | |
e1ec7dd4 EJ |
414 | } |
415 | ||
1c030aa5 EJ |
416 | void |
417 | udpif_get_memory_usage(struct udpif *udpif, struct simap *usage) | |
418 | { | |
419 | size_t i; | |
420 | ||
421 | simap_increase(usage, "dispatchers", 1); | |
422 | simap_increase(usage, "flow_dumpers", 1); | |
423 | ||
424 | simap_increase(usage, "handlers", udpif->n_handlers); | |
425 | for (i = 0; i < udpif->n_handlers; i++) { | |
426 | struct handler *handler = &udpif->handlers[i]; | |
427 | ovs_mutex_lock(&handler->mutex); | |
428 | simap_increase(usage, "handler upcalls", handler->n_upcalls); | |
429 | ovs_mutex_unlock(&handler->mutex); | |
430 | } | |
e79a6c83 EJ |
431 | |
432 | simap_increase(usage, "revalidators", udpif->n_revalidators); | |
433 | for (i = 0; i < udpif->n_revalidators; i++) { | |
434 | struct revalidator *revalidator = &udpif->revalidators[i]; | |
435 | ovs_mutex_lock(&revalidator->mutex); | |
436 | simap_increase(usage, "revalidator dumps", revalidator->n_udumps); | |
437 | ||
438 | /* XXX: This isn't technically thread safe because the revalidator | |
439 | * ukeys maps isn't protected by a mutex since it's per thread. */ | |
440 | simap_increase(usage, "revalidator keys", | |
441 | hmap_count(&revalidator->ukeys)); | |
442 | ovs_mutex_unlock(&revalidator->mutex); | |
443 | } | |
1c030aa5 EJ |
444 | } |
445 | ||
e79a6c83 EJ |
446 | /* Removes all flows from all datapaths. */ |
447 | void | |
448 | udpif_flush(void) | |
449 | { | |
450 | struct udpif *udpif; | |
451 | ||
452 | LIST_FOR_EACH (udpif, list_node, &all_udpifs) { | |
453 | dpif_flow_flush(udpif->dpif); | |
454 | } | |
455 | } | |
456 | \f | |
e1ec7dd4 | 457 | /* Destroys and deallocates 'upcall'. */ |
10e57640 | 458 | static void |
e1ec7dd4 EJ |
459 | upcall_destroy(struct upcall *upcall) |
460 | { | |
461 | if (upcall) { | |
da546e07 | 462 | ofpbuf_uninit(&upcall->dpif_upcall.packet); |
e1ec7dd4 EJ |
463 | ofpbuf_uninit(&upcall->upcall_buf); |
464 | free(upcall); | |
465 | } | |
466 | } | |
467 | ||
e79a6c83 EJ |
468 | static uint64_t |
469 | udpif_get_n_flows(const struct udpif *udpif) | |
e1ec7dd4 | 470 | { |
e79a6c83 | 471 | struct dpif_dp_stats stats; |
05067881 | 472 | |
e79a6c83 EJ |
473 | dpif_get_dp_stats(udpif->dpif, &stats); |
474 | return stats.n_flows; | |
475 | } | |
e1ec7dd4 | 476 | |
e79a6c83 EJ |
477 | /* The dispatcher thread is responsible for receiving upcalls from the kernel, |
478 | * assigning them to a upcall_handler thread. */ | |
479 | static void * | |
480 | udpif_dispatcher(void *arg) | |
481 | { | |
482 | struct udpif *udpif = arg; | |
05067881 | 483 | |
e79a6c83 EJ |
484 | set_subprogram_name("dispatcher"); |
485 | while (!latch_is_set(&udpif->exit_latch)) { | |
486 | recv_upcalls(udpif); | |
487 | dpif_recv_wait(udpif->dpif); | |
488 | latch_wait(&udpif->exit_latch); | |
489 | poll_block(); | |
e1ec7dd4 | 490 | } |
05067881 BP |
491 | |
492 | return NULL; | |
e1ec7dd4 EJ |
493 | } |
494 | ||
e79a6c83 EJ |
495 | static void * |
496 | udpif_flow_dumper(void *arg) | |
e1ec7dd4 | 497 | { |
e79a6c83 | 498 | struct udpif *udpif = arg; |
ddeca9a4 | 499 | |
e79a6c83 EJ |
500 | set_subprogram_name("flow_dumper"); |
501 | while (!latch_is_set(&udpif->exit_latch)) { | |
502 | const struct dpif_flow_stats *stats; | |
503 | long long int start_time, duration; | |
504 | const struct nlattr *key, *mask; | |
505 | struct dpif_flow_dump dump; | |
506 | size_t key_len, mask_len; | |
507 | unsigned int flow_limit; | |
508 | long long int max_idle; | |
509 | bool need_revalidate; | |
510 | uint64_t reval_seq; | |
511 | size_t n_flows, i; | |
512 | ||
513 | reval_seq = seq_read(udpif->reval_seq); | |
514 | need_revalidate = udpif->last_reval_seq != reval_seq; | |
515 | udpif->last_reval_seq = reval_seq; | |
516 | ||
517 | n_flows = udpif_get_n_flows(udpif); | |
518 | udpif->max_n_flows = MAX(n_flows, udpif->max_n_flows); | |
519 | udpif->avg_n_flows = (udpif->avg_n_flows + n_flows) / 2; | |
520 | ||
521 | atomic_read(&udpif->flow_limit, &flow_limit); | |
522 | if (n_flows < flow_limit / 8) { | |
523 | max_idle = 5000; | |
524 | } else if (n_flows < flow_limit / 4) { | |
525 | max_idle = 2000; | |
526 | } else if (n_flows < flow_limit / 2) { | |
527 | max_idle = 1000; | |
528 | } else { | |
529 | max_idle = 500; | |
530 | } | |
531 | atomic_store(&udpif->max_idle, max_idle); | |
532 | ||
533 | start_time = time_msec(); | |
534 | dpif_flow_dump_start(&dump, udpif->dpif); | |
535 | while (dpif_flow_dump_next(&dump, &key, &key_len, &mask, &mask_len, | |
536 | NULL, NULL, &stats) | |
537 | && !latch_is_set(&udpif->exit_latch)) { | |
538 | struct udpif_flow_dump *udump = xmalloc(sizeof *udump); | |
539 | struct revalidator *revalidator; | |
540 | ||
541 | udump->key_hash = hash_bytes(key, key_len, udpif->secret); | |
542 | memcpy(&udump->key_buf, key, key_len); | |
543 | udump->key = (struct nlattr *) &udump->key_buf; | |
544 | udump->key_len = key_len; | |
545 | ||
546 | memcpy(&udump->mask_buf, mask, mask_len); | |
547 | udump->mask = (struct nlattr *) &udump->mask_buf; | |
548 | udump->mask_len = mask_len; | |
549 | ||
550 | udump->stats = *stats; | |
551 | udump->need_revalidate = need_revalidate; | |
552 | ||
553 | revalidator = &udpif->revalidators[udump->key_hash | |
554 | % udpif->n_revalidators]; | |
555 | ||
556 | ovs_mutex_lock(&revalidator->mutex); | |
557 | while (revalidator->n_udumps >= REVALIDATE_MAX_BATCH * 3 | |
558 | && !latch_is_set(&udpif->exit_latch)) { | |
559 | ovs_mutex_cond_wait(&revalidator->wake_cond, | |
560 | &revalidator->mutex); | |
561 | } | |
562 | list_push_back(&revalidator->udumps, &udump->list_node); | |
563 | revalidator->n_udumps++; | |
564 | xpthread_cond_signal(&revalidator->wake_cond); | |
565 | ovs_mutex_unlock(&revalidator->mutex); | |
566 | } | |
567 | dpif_flow_dump_done(&dump); | |
568 | ||
569 | /* Let all the revalidators finish and garbage collect. */ | |
570 | seq_change(udpif->dump_seq); | |
571 | for (i = 0; i < udpif->n_revalidators; i++) { | |
572 | struct revalidator *revalidator = &udpif->revalidators[i]; | |
573 | ovs_mutex_lock(&revalidator->mutex); | |
574 | xpthread_cond_signal(&revalidator->wake_cond); | |
575 | ovs_mutex_unlock(&revalidator->mutex); | |
576 | } | |
e1ec7dd4 | 577 | |
e79a6c83 EJ |
578 | for (i = 0; i < udpif->n_revalidators; i++) { |
579 | struct revalidator *revalidator = &udpif->revalidators[i]; | |
e1ec7dd4 | 580 | |
e79a6c83 EJ |
581 | ovs_mutex_lock(&revalidator->mutex); |
582 | while (revalidator->dump_seq != seq_read(udpif->dump_seq) | |
583 | && !latch_is_set(&udpif->exit_latch)) { | |
584 | ovs_mutex_cond_wait(&revalidator->wake_cond, | |
585 | &revalidator->mutex); | |
586 | } | |
587 | ovs_mutex_unlock(&revalidator->mutex); | |
588 | } | |
e1ec7dd4 | 589 | |
e79a6c83 EJ |
590 | duration = time_msec() - start_time; |
591 | udpif->dump_duration = duration; | |
592 | if (duration > 2000) { | |
593 | flow_limit /= duration / 1000; | |
594 | } else if (duration > 1300) { | |
595 | flow_limit = flow_limit * 3 / 4; | |
596 | } else if (duration < 1000 && n_flows > 2000 | |
597 | && flow_limit < n_flows * 1000 / duration) { | |
598 | flow_limit += 1000; | |
599 | } | |
600 | flow_limit = MIN(ofproto_flow_limit, MAX(flow_limit, 1000)); | |
601 | atomic_store(&udpif->flow_limit, flow_limit); | |
e1ec7dd4 | 602 | |
e79a6c83 | 603 | if (duration > 2000) { |
f8b92eb8 | 604 | VLOG_INFO("Spent an unreasonably long %lldms dumping flows", |
e79a6c83 EJ |
605 | duration); |
606 | } | |
e1ec7dd4 | 607 | |
e79a6c83 EJ |
608 | poll_timer_wait_until(start_time + MIN(max_idle, 500)); |
609 | seq_wait(udpif->reval_seq, udpif->last_reval_seq); | |
e1ec7dd4 EJ |
610 | latch_wait(&udpif->exit_latch); |
611 | poll_block(); | |
612 | } | |
613 | ||
614 | return NULL; | |
615 | } | |
616 | ||
5f37b938 | 617 | /* The miss handler thread is responsible for processing miss upcalls retrieved |
e1ec7dd4 EJ |
618 | * by the dispatcher thread. Once finished it passes the processed miss |
619 | * upcalls to ofproto-dpif where they're installed in the datapath. */ | |
620 | static void * | |
10e57640 | 621 | udpif_upcall_handler(void *arg) |
e1ec7dd4 | 622 | { |
e1ec7dd4 EJ |
623 | struct handler *handler = arg; |
624 | ||
e22d52ee EJ |
625 | handler->name = xasprintf("handler_%u", ovsthread_id_self()); |
626 | set_subprogram_name("%s", handler->name); | |
627 | ||
e1ec7dd4 | 628 | for (;;) { |
04a19fb8 | 629 | struct list misses = LIST_INITIALIZER(&misses); |
e1ec7dd4 EJ |
630 | size_t i; |
631 | ||
632 | ovs_mutex_lock(&handler->mutex); | |
633 | ||
634 | if (latch_is_set(&handler->udpif->exit_latch)) { | |
635 | ovs_mutex_unlock(&handler->mutex); | |
636 | return NULL; | |
637 | } | |
638 | ||
639 | if (!handler->n_upcalls) { | |
640 | ovs_mutex_cond_wait(&handler->wake_cond, &handler->mutex); | |
641 | } | |
642 | ||
643 | for (i = 0; i < FLOW_MISS_MAX_BATCH; i++) { | |
644 | if (handler->n_upcalls) { | |
645 | handler->n_upcalls--; | |
646 | list_push_back(&misses, list_pop_front(&handler->upcalls)); | |
647 | } else { | |
648 | break; | |
649 | } | |
650 | } | |
651 | ovs_mutex_unlock(&handler->mutex); | |
652 | ||
e79a6c83 | 653 | handle_upcalls(handler, &misses); |
de80e4b6 BP |
654 | |
655 | coverage_clear(); | |
e1ec7dd4 EJ |
656 | } |
657 | } | |
e79a6c83 EJ |
658 | |
659 | static void * | |
660 | udpif_revalidator(void *arg) | |
e1ec7dd4 | 661 | { |
e79a6c83 | 662 | struct revalidator *revalidator = arg; |
e1ec7dd4 | 663 | |
e79a6c83 EJ |
664 | revalidator->name = xasprintf("revalidator_%u", ovsthread_id_self()); |
665 | set_subprogram_name("%s", revalidator->name); | |
666 | for (;;) { | |
667 | struct list udumps = LIST_INITIALIZER(&udumps); | |
668 | struct udpif *udpif = revalidator->udpif; | |
669 | size_t i; | |
670 | ||
671 | ovs_mutex_lock(&revalidator->mutex); | |
672 | if (latch_is_set(&udpif->exit_latch)) { | |
673 | ovs_mutex_unlock(&revalidator->mutex); | |
674 | return NULL; | |
675 | } | |
676 | ||
677 | if (!revalidator->n_udumps) { | |
678 | if (revalidator->dump_seq != seq_read(udpif->dump_seq)) { | |
679 | revalidator->dump_seq = seq_read(udpif->dump_seq); | |
680 | revalidator_sweep(revalidator); | |
681 | } else { | |
682 | ovs_mutex_cond_wait(&revalidator->wake_cond, | |
683 | &revalidator->mutex); | |
684 | } | |
685 | } | |
686 | ||
687 | for (i = 0; i < REVALIDATE_MAX_BATCH && revalidator->n_udumps; i++) { | |
688 | list_push_back(&udumps, list_pop_front(&revalidator->udumps)); | |
689 | revalidator->n_udumps--; | |
690 | } | |
691 | ||
692 | /* Wake up the flow dumper. */ | |
693 | xpthread_cond_signal(&revalidator->wake_cond); | |
694 | ovs_mutex_unlock(&revalidator->mutex); | |
695 | ||
696 | if (!list_is_empty(&udumps)) { | |
697 | revalidate_udumps(revalidator, &udumps); | |
698 | } | |
699 | } | |
700 | ||
701 | return NULL; | |
702 | } | |
703 | \f | |
e1ec7dd4 EJ |
704 | static enum upcall_type |
705 | classify_upcall(const struct upcall *upcall) | |
706 | { | |
707 | const struct dpif_upcall *dpif_upcall = &upcall->dpif_upcall; | |
708 | union user_action_cookie cookie; | |
709 | size_t userdata_len; | |
710 | ||
711 | /* First look at the upcall type. */ | |
712 | switch (dpif_upcall->type) { | |
713 | case DPIF_UC_ACTION: | |
714 | break; | |
715 | ||
716 | case DPIF_UC_MISS: | |
717 | return MISS_UPCALL; | |
718 | ||
719 | case DPIF_N_UC_TYPES: | |
720 | default: | |
721 | VLOG_WARN_RL(&rl, "upcall has unexpected type %"PRIu32, | |
722 | dpif_upcall->type); | |
723 | return BAD_UPCALL; | |
724 | } | |
725 | ||
726 | /* "action" upcalls need a closer look. */ | |
727 | if (!dpif_upcall->userdata) { | |
728 | VLOG_WARN_RL(&rl, "action upcall missing cookie"); | |
729 | return BAD_UPCALL; | |
730 | } | |
731 | userdata_len = nl_attr_get_size(dpif_upcall->userdata); | |
732 | if (userdata_len < sizeof cookie.type | |
733 | || userdata_len > sizeof cookie) { | |
34582733 | 734 | VLOG_WARN_RL(&rl, "action upcall cookie has unexpected size %"PRIuSIZE, |
e1ec7dd4 EJ |
735 | userdata_len); |
736 | return BAD_UPCALL; | |
737 | } | |
738 | memset(&cookie, 0, sizeof cookie); | |
739 | memcpy(&cookie, nl_attr_get(dpif_upcall->userdata), userdata_len); | |
740 | if (userdata_len == sizeof cookie.sflow | |
741 | && cookie.type == USER_ACTION_COOKIE_SFLOW) { | |
742 | return SFLOW_UPCALL; | |
743 | } else if (userdata_len == sizeof cookie.slow_path | |
744 | && cookie.type == USER_ACTION_COOKIE_SLOW_PATH) { | |
745 | return MISS_UPCALL; | |
746 | } else if (userdata_len == sizeof cookie.flow_sample | |
747 | && cookie.type == USER_ACTION_COOKIE_FLOW_SAMPLE) { | |
748 | return FLOW_SAMPLE_UPCALL; | |
749 | } else if (userdata_len == sizeof cookie.ipfix | |
750 | && cookie.type == USER_ACTION_COOKIE_IPFIX) { | |
751 | return IPFIX_UPCALL; | |
752 | } else { | |
753 | VLOG_WARN_RL(&rl, "invalid user cookie of type %"PRIu16 | |
34582733 | 754 | " and size %"PRIuSIZE, cookie.type, userdata_len); |
e1ec7dd4 EJ |
755 | return BAD_UPCALL; |
756 | } | |
757 | } | |
758 | ||
759 | static void | |
760 | recv_upcalls(struct udpif *udpif) | |
761 | { | |
caf6491f JR |
762 | int n; |
763 | ||
e1ec7dd4 | 764 | for (;;) { |
10e57640 EJ |
765 | uint32_t hash = udpif->secret; |
766 | struct handler *handler; | |
e1ec7dd4 | 767 | struct upcall *upcall; |
10e57640 EJ |
768 | size_t n_bytes, left; |
769 | struct nlattr *nla; | |
e1ec7dd4 EJ |
770 | int error; |
771 | ||
772 | upcall = xmalloc(sizeof *upcall); | |
773 | ofpbuf_use_stub(&upcall->upcall_buf, upcall->upcall_stub, | |
774 | sizeof upcall->upcall_stub); | |
775 | error = dpif_recv(udpif->dpif, &upcall->dpif_upcall, | |
776 | &upcall->upcall_buf); | |
777 | if (error) { | |
837a88dc JR |
778 | /* upcall_destroy() can only be called on successfully received |
779 | * upcalls. */ | |
780 | ofpbuf_uninit(&upcall->upcall_buf); | |
781 | free(upcall); | |
e1ec7dd4 EJ |
782 | break; |
783 | } | |
784 | ||
10e57640 EJ |
785 | n_bytes = 0; |
786 | NL_ATTR_FOR_EACH (nla, left, upcall->dpif_upcall.key, | |
787 | upcall->dpif_upcall.key_len) { | |
788 | enum ovs_key_attr type = nl_attr_type(nla); | |
789 | if (type == OVS_KEY_ATTR_IN_PORT | |
790 | || type == OVS_KEY_ATTR_TCP | |
791 | || type == OVS_KEY_ATTR_UDP) { | |
792 | if (nl_attr_get_size(nla) == 4) { | |
be58eabb | 793 | hash = mhash_add(hash, nl_attr_get_u32(nla)); |
10e57640 EJ |
794 | n_bytes += 4; |
795 | } else { | |
796 | VLOG_WARN_RL(&rl, | |
797 | "Netlink attribute with incorrect size."); | |
e1ec7dd4 EJ |
798 | } |
799 | } | |
10e57640 EJ |
800 | } |
801 | hash = mhash_finish(hash, n_bytes); | |
dfbdea46 | 802 | |
10e57640 | 803 | handler = &udpif->handlers[hash % udpif->n_handlers]; |
dfbdea46 | 804 | |
10e57640 EJ |
805 | ovs_mutex_lock(&handler->mutex); |
806 | if (handler->n_upcalls < MAX_QUEUE_LENGTH) { | |
807 | list_push_back(&handler->upcalls, &upcall->list_node); | |
9b32ece6 YT |
808 | if (handler->n_upcalls == 0) { |
809 | handler->need_signal = true; | |
810 | } | |
811 | handler->n_upcalls++; | |
812 | if (handler->need_signal && | |
813 | handler->n_upcalls >= FLOW_MISS_MAX_BATCH) { | |
814 | handler->need_signal = false; | |
10e57640 | 815 | xpthread_cond_signal(&handler->wake_cond); |
dfbdea46 | 816 | } |
10e57640 EJ |
817 | ovs_mutex_unlock(&handler->mutex); |
818 | if (!VLOG_DROP_DBG(&rl)) { | |
819 | struct ds ds = DS_EMPTY_INITIALIZER; | |
820 | ||
821 | odp_flow_key_format(upcall->dpif_upcall.key, | |
822 | upcall->dpif_upcall.key_len, | |
823 | &ds); | |
824 | VLOG_DBG("dispatcher: enqueue (%s)", ds_cstr(&ds)); | |
825 | ds_destroy(&ds); | |
e1ec7dd4 | 826 | } |
10e57640 EJ |
827 | } else { |
828 | ovs_mutex_unlock(&handler->mutex); | |
829 | COVERAGE_INC(upcall_queue_overflow); | |
830 | upcall_destroy(upcall); | |
e1ec7dd4 EJ |
831 | } |
832 | } | |
10e57640 | 833 | |
caf6491f | 834 | for (n = 0; n < udpif->n_handlers; ++n) { |
10e57640 EJ |
835 | struct handler *handler = &udpif->handlers[n]; |
836 | ||
9b32ece6 YT |
837 | if (handler->need_signal) { |
838 | handler->need_signal = false; | |
caf6491f JR |
839 | ovs_mutex_lock(&handler->mutex); |
840 | xpthread_cond_signal(&handler->wake_cond); | |
841 | ovs_mutex_unlock(&handler->mutex); | |
842 | } | |
843 | } | |
e1ec7dd4 EJ |
844 | } |
845 | ||
e79a6c83 EJ |
846 | /* Calculates slow path actions for 'xout'. 'buf' must statically be |
847 | * initialized with at least 128 bytes of space. */ | |
848 | static void | |
849 | compose_slow_path(struct udpif *udpif, struct xlate_out *xout, | |
850 | odp_port_t odp_in_port, struct ofpbuf *buf) | |
851 | { | |
852 | union user_action_cookie cookie; | |
853 | odp_port_t port; | |
854 | uint32_t pid; | |
855 | ||
856 | cookie.type = USER_ACTION_COOKIE_SLOW_PATH; | |
857 | cookie.slow_path.unused = 0; | |
858 | cookie.slow_path.reason = xout->slow; | |
859 | ||
860 | port = xout->slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP | SLOW_STP) | |
861 | ? ODPP_NONE | |
862 | : odp_in_port; | |
863 | pid = dpif_port_get_pid(udpif->dpif, port); | |
864 | odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, buf); | |
865 | } | |
866 | ||
e1ec7dd4 EJ |
867 | static struct flow_miss * |
868 | flow_miss_find(struct hmap *todo, const struct ofproto_dpif *ofproto, | |
869 | const struct flow *flow, uint32_t hash) | |
870 | { | |
871 | struct flow_miss *miss; | |
872 | ||
873 | HMAP_FOR_EACH_WITH_HASH (miss, hmap_node, hash, todo) { | |
874 | if (miss->ofproto == ofproto && flow_equal(&miss->flow, flow)) { | |
875 | return miss; | |
876 | } | |
877 | } | |
878 | ||
879 | return NULL; | |
880 | } | |
881 | ||
e1ec7dd4 | 882 | static void |
e79a6c83 | 883 | handle_upcalls(struct handler *handler, struct list *upcalls) |
e1ec7dd4 | 884 | { |
e79a6c83 EJ |
885 | struct hmap misses = HMAP_INITIALIZER(&misses); |
886 | struct udpif *udpif = handler->udpif; | |
887 | ||
888 | struct flow_miss miss_buf[FLOW_MISS_MAX_BATCH]; | |
889 | struct dpif_op *opsp[FLOW_MISS_MAX_BATCH * 2]; | |
890 | struct dpif_op ops[FLOW_MISS_MAX_BATCH * 2]; | |
891 | struct flow_miss *miss, *next_miss; | |
e1ec7dd4 | 892 | struct upcall *upcall, *next; |
ddeca9a4 | 893 | size_t n_misses, n_ops, i; |
e79a6c83 EJ |
894 | unsigned int flow_limit; |
895 | bool fail_open, may_put; | |
10e57640 | 896 | enum upcall_type type; |
e1ec7dd4 | 897 | |
e79a6c83 EJ |
898 | atomic_read(&udpif->flow_limit, &flow_limit); |
899 | may_put = udpif_get_n_flows(udpif) < flow_limit; | |
900 | ||
901 | /* Extract the flow from each upcall. Construct in 'misses' a hash table | |
902 | * that maps each unique flow to a 'struct flow_miss'. | |
04a19fb8 BP |
903 | * |
904 | * Most commonly there is a single packet per flow_miss, but there are | |
905 | * several reasons why there might be more than one, e.g.: | |
906 | * | |
907 | * - The dpif packet interface does not support TSO (or UFO, etc.), so a | |
908 | * large packet sent to userspace is split into a sequence of smaller | |
909 | * ones. | |
e1ec7dd4 | 910 | * |
04a19fb8 BP |
911 | * - A stream of quickly arriving packets in an established "slow-pathed" |
912 | * flow. | |
913 | * | |
914 | * - Rarely, a stream of quickly arriving packets in a flow not yet | |
915 | * established. (This is rare because most protocols do not send | |
916 | * multiple back-to-back packets before receiving a reply from the | |
917 | * other end of the connection, which gives OVS a chance to set up a | |
918 | * datapath flow.) | |
919 | */ | |
ddeca9a4 | 920 | n_misses = 0; |
e1ec7dd4 EJ |
921 | LIST_FOR_EACH_SAFE (upcall, next, list_node, upcalls) { |
922 | struct dpif_upcall *dupcall = &upcall->dpif_upcall; | |
e79a6c83 | 923 | struct flow_miss *miss = &miss_buf[n_misses]; |
da546e07 | 924 | struct ofpbuf *packet = &dupcall->packet; |
e1ec7dd4 EJ |
925 | struct flow_miss *existing_miss; |
926 | struct ofproto_dpif *ofproto; | |
10e57640 EJ |
927 | struct dpif_sflow *sflow; |
928 | struct dpif_ipfix *ipfix; | |
e1ec7dd4 EJ |
929 | odp_port_t odp_in_port; |
930 | struct flow flow; | |
e1ec7dd4 EJ |
931 | int error; |
932 | ||
04a19fb8 | 933 | error = xlate_receive(udpif->backer, packet, dupcall->key, |
e1ec7dd4 | 934 | dupcall->key_len, &flow, &miss->key_fitness, |
1dfdb9b3 | 935 | &ofproto, &ipfix, &sflow, NULL, &odp_in_port); |
10e57640 EJ |
936 | if (error) { |
937 | if (error == ENODEV) { | |
10e57640 EJ |
938 | /* Received packet on datapath port for which we couldn't |
939 | * associate an ofproto. This can happen if a port is removed | |
940 | * while traffic is being received. Print a rate-limited | |
941 | * message in case it happens frequently. Install a drop flow | |
942 | * so that future packets of the flow are inexpensively dropped | |
943 | * in the kernel. */ | |
944 | VLOG_INFO_RL(&rl, "received packet on unassociated datapath " | |
945 | "port %"PRIu32, odp_in_port); | |
e79a6c83 EJ |
946 | dpif_flow_put(udpif->dpif, DPIF_FP_CREATE | DPIF_FP_MODIFY, |
947 | dupcall->key, dupcall->key_len, NULL, 0, NULL, 0, | |
948 | NULL); | |
10e57640 EJ |
949 | } |
950 | list_remove(&upcall->list_node); | |
951 | upcall_destroy(upcall); | |
952 | continue; | |
953 | } | |
954 | ||
955 | type = classify_upcall(upcall); | |
956 | if (type == MISS_UPCALL) { | |
04a19fb8 BP |
957 | uint32_t hash; |
958 | ||
959 | flow_extract(packet, flow.skb_priority, flow.pkt_mark, | |
960 | &flow.tunnel, &flow.in_port, &miss->flow); | |
961 | ||
962 | hash = flow_hash(&miss->flow, 0); | |
e79a6c83 | 963 | existing_miss = flow_miss_find(&misses, ofproto, &miss->flow, |
04a19fb8 BP |
964 | hash); |
965 | if (!existing_miss) { | |
e79a6c83 | 966 | hmap_insert(&misses, &miss->hmap_node, hash); |
04a19fb8 BP |
967 | miss->ofproto = ofproto; |
968 | miss->key = dupcall->key; | |
969 | miss->key_len = dupcall->key_len; | |
970 | miss->upcall_type = dupcall->type; | |
971 | miss->stats.n_packets = 0; | |
972 | miss->stats.n_bytes = 0; | |
973 | miss->stats.used = time_msec(); | |
974 | miss->stats.tcp_flags = 0; | |
e79a6c83 | 975 | miss->odp_in_port = odp_in_port; |
04a19fb8 | 976 | |
ddeca9a4 | 977 | n_misses++; |
e1ec7dd4 | 978 | } else { |
04a19fb8 | 979 | miss = existing_miss; |
e1ec7dd4 | 980 | } |
04a19fb8 BP |
981 | miss->stats.tcp_flags |= packet_get_tcp_flags(packet, &miss->flow); |
982 | miss->stats.n_bytes += packet->size; | |
983 | miss->stats.n_packets++; | |
e1ec7dd4 | 984 | |
04a19fb8 | 985 | upcall->flow_miss = miss; |
10e57640 EJ |
986 | continue; |
987 | } | |
04a19fb8 | 988 | |
10e57640 EJ |
989 | switch (type) { |
990 | case SFLOW_UPCALL: | |
10e57640 EJ |
991 | if (sflow) { |
992 | union user_action_cookie cookie; | |
993 | ||
994 | memset(&cookie, 0, sizeof cookie); | |
995 | memcpy(&cookie, nl_attr_get(dupcall->userdata), | |
996 | sizeof cookie.sflow); | |
da546e07 | 997 | dpif_sflow_received(sflow, packet, &flow, odp_in_port, |
10e57640 | 998 | &cookie); |
04a19fb8 | 999 | } |
10e57640 EJ |
1000 | break; |
1001 | case IPFIX_UPCALL: | |
10e57640 | 1002 | if (ipfix) { |
da546e07 | 1003 | dpif_ipfix_bridge_sample(ipfix, packet, &flow); |
10e57640 EJ |
1004 | } |
1005 | break; | |
1006 | case FLOW_SAMPLE_UPCALL: | |
10e57640 EJ |
1007 | if (ipfix) { |
1008 | union user_action_cookie cookie; | |
1009 | ||
1010 | memset(&cookie, 0, sizeof cookie); | |
1011 | memcpy(&cookie, nl_attr_get(dupcall->userdata), | |
1012 | sizeof cookie.flow_sample); | |
1013 | ||
1014 | /* The flow reflects exactly the contents of the packet. | |
1015 | * Sample the packet using it. */ | |
da546e07 | 1016 | dpif_ipfix_flow_sample(ipfix, packet, &flow, |
10e57640 EJ |
1017 | cookie.flow_sample.collector_set_id, |
1018 | cookie.flow_sample.probability, | |
1019 | cookie.flow_sample.obs_domain_id, | |
1020 | cookie.flow_sample.obs_point_id); | |
10e57640 EJ |
1021 | } |
1022 | break; | |
1023 | case BAD_UPCALL: | |
1024 | break; | |
1025 | case MISS_UPCALL: | |
428b2edd | 1026 | OVS_NOT_REACHED(); |
e1ec7dd4 | 1027 | } |
10e57640 | 1028 | |
1dfdb9b3 EJ |
1029 | dpif_ipfix_unref(ipfix); |
1030 | dpif_sflow_unref(sflow); | |
1031 | ||
10e57640 EJ |
1032 | list_remove(&upcall->list_node); |
1033 | upcall_destroy(upcall); | |
e1ec7dd4 EJ |
1034 | } |
1035 | ||
04a19fb8 BP |
1036 | /* Initialize each 'struct flow_miss's ->xout. |
1037 | * | |
1038 | * We do this per-flow_miss rather than per-packet because, most commonly, | |
1039 | * all the packets in a flow can use the same translation. | |
1040 | * | |
1041 | * We can't do this in the previous loop because we need the TCP flags for | |
1042 | * all the packets in each miss. */ | |
1043 | fail_open = false; | |
e79a6c83 | 1044 | HMAP_FOR_EACH (miss, hmap_node, &misses) { |
04a19fb8 BP |
1045 | struct xlate_in xin; |
1046 | ||
10c44245 | 1047 | xlate_in_init(&xin, miss->ofproto, &miss->flow, NULL, |
04a19fb8 BP |
1048 | miss->stats.tcp_flags, NULL); |
1049 | xin.may_learn = true; | |
e79a6c83 EJ |
1050 | |
1051 | if (miss->upcall_type == DPIF_UC_MISS) { | |
1052 | xin.resubmit_stats = &miss->stats; | |
1053 | } else { | |
1054 | /* For non-miss upcalls, there's a flow in the datapath which this | |
1055 | * packet was accounted to. Presumably the revalidators will deal | |
1056 | * with pushing its stats eventually. */ | |
1057 | } | |
1058 | ||
04a19fb8 | 1059 | xlate_actions(&xin, &miss->xout); |
10c44245 | 1060 | fail_open = fail_open || miss->xout.fail_open; |
04a19fb8 BP |
1061 | } |
1062 | ||
1063 | /* Now handle the packets individually in order of arrival. In the common | |
1064 | * case each packet of a miss can share the same actions, but slow-pathed | |
1065 | * packets need to be translated individually: | |
1066 | * | |
1067 | * - For SLOW_CFM, SLOW_LACP, SLOW_STP, and SLOW_BFD, translation is what | |
1068 | * processes received packets for these protocols. | |
1069 | * | |
1070 | * - For SLOW_CONTROLLER, translation sends the packet to the OpenFlow | |
1071 | * controller. | |
1072 | * | |
1073 | * The loop fills 'ops' with an array of operations to execute in the | |
1074 | * datapath. */ | |
1075 | n_ops = 0; | |
1076 | LIST_FOR_EACH (upcall, list_node, upcalls) { | |
1077 | struct flow_miss *miss = upcall->flow_miss; | |
da546e07 | 1078 | struct ofpbuf *packet = &upcall->dpif_upcall.packet; |
e79a6c83 EJ |
1079 | struct ofpbuf mask; |
1080 | struct dpif_op *op; | |
1081 | bool megaflow; | |
04a19fb8 BP |
1082 | |
1083 | if (miss->xout.slow) { | |
04a19fb8 BP |
1084 | struct xlate_in xin; |
1085 | ||
10c44245 | 1086 | xlate_in_init(&xin, miss->ofproto, &miss->flow, NULL, 0, packet); |
04a19fb8 | 1087 | xlate_actions_for_side_effects(&xin); |
04a19fb8 BP |
1088 | } |
1089 | ||
e79a6c83 EJ |
1090 | atomic_read(&enable_megaflows, &megaflow); |
1091 | ofpbuf_use_stack(&mask, &miss->mask_buf, sizeof miss->mask_buf); | |
1092 | if (megaflow) { | |
1093 | odp_flow_key_from_mask(&mask, &miss->xout.wc.masks, &miss->flow, | |
1094 | UINT32_MAX); | |
1095 | } | |
04a19fb8 | 1096 | |
e79a6c83 EJ |
1097 | if (may_put) { |
1098 | op = &ops[n_ops++]; | |
1099 | op->type = DPIF_OP_FLOW_PUT; | |
1100 | op->u.flow_put.flags = DPIF_FP_CREATE | DPIF_FP_MODIFY; | |
1101 | op->u.flow_put.key = miss->key; | |
1102 | op->u.flow_put.key_len = miss->key_len; | |
1103 | op->u.flow_put.mask = mask.data; | |
1104 | op->u.flow_put.mask_len = mask.size; | |
1105 | op->u.flow_put.stats = NULL; | |
1106 | ||
1107 | if (!miss->xout.slow) { | |
1108 | op->u.flow_put.actions = miss->xout.odp_actions.data; | |
1109 | op->u.flow_put.actions_len = miss->xout.odp_actions.size; | |
1110 | } else { | |
1111 | struct ofpbuf buf; | |
1112 | ||
1113 | ofpbuf_use_stack(&buf, miss->slow_path_buf, | |
1114 | sizeof miss->slow_path_buf); | |
1115 | compose_slow_path(udpif, &miss->xout, miss->odp_in_port, &buf); | |
1116 | op->u.flow_put.actions = buf.data; | |
1117 | op->u.flow_put.actions_len = buf.size; | |
1118 | } | |
1119 | } | |
1120 | ||
1121 | if (miss->xout.odp_actions.size) { | |
04a19fb8 BP |
1122 | if (miss->flow.in_port.ofp_port |
1123 | != vsp_realdev_to_vlandev(miss->ofproto, | |
1124 | miss->flow.in_port.ofp_port, | |
1125 | miss->flow.vlan_tci)) { | |
1126 | /* This packet was received on a VLAN splinter port. We | |
1127 | * added a VLAN to the packet to make the packet resemble | |
1128 | * the flow, but the actions were composed assuming that | |
1129 | * the packet contained no VLAN. So, we must remove the | |
1130 | * VLAN header from the packet before trying to execute the | |
1131 | * actions. */ | |
1132 | eth_pop_vlan(packet); | |
1133 | } | |
1134 | ||
1135 | op = &ops[n_ops++]; | |
1136 | op->type = DPIF_OP_EXECUTE; | |
1137 | op->u.execute.key = miss->key; | |
1138 | op->u.execute.key_len = miss->key_len; | |
1139 | op->u.execute.packet = packet; | |
1140 | op->u.execute.actions = miss->xout.odp_actions.data; | |
1141 | op->u.execute.actions_len = miss->xout.odp_actions.size; | |
7fd91025 | 1142 | op->u.execute.needs_help = (miss->xout.slow & SLOW_ACTION) != 0; |
04a19fb8 | 1143 | } |
e1ec7dd4 | 1144 | } |
e1ec7dd4 | 1145 | |
04a19fb8 BP |
1146 | /* Special case for fail-open mode. |
1147 | * | |
1148 | * If we are in fail-open mode, but we are connected to a controller too, | |
1149 | * then we should send the packet up to the controller in the hope that it | |
1150 | * will try to set up a flow and thereby allow us to exit fail-open. | |
1151 | * | |
da546e07 JR |
1152 | * See the top-level comment in fail-open.c for more information. |
1153 | * | |
1154 | * Copy packets before they are modified by execution. */ | |
04a19fb8 BP |
1155 | if (fail_open) { |
1156 | LIST_FOR_EACH (upcall, list_node, upcalls) { | |
1157 | struct flow_miss *miss = upcall->flow_miss; | |
da546e07 | 1158 | struct ofpbuf *packet = &upcall->dpif_upcall.packet; |
0fb7792a | 1159 | struct ofproto_packet_in *pin; |
04a19fb8 BP |
1160 | |
1161 | pin = xmalloc(sizeof *pin); | |
0fb7792a BP |
1162 | pin->up.packet = xmemdup(packet->data, packet->size); |
1163 | pin->up.packet_len = packet->size; | |
1164 | pin->up.reason = OFPR_NO_MATCH; | |
0fb7792a | 1165 | pin->up.table_id = 0; |
d4fa4e79 | 1166 | pin->up.cookie = OVS_BE64_MAX; |
0fb7792a | 1167 | flow_get_metadata(&miss->flow, &pin->up.fmd); |
d38a3c7b | 1168 | pin->send_len = 0; /* Not used for flow table misses. */ |
cfa955b0 | 1169 | pin->generated_by_table_miss = false; |
04a19fb8 BP |
1170 | ofproto_dpif_send_packet_in(miss->ofproto, pin); |
1171 | } | |
1172 | } | |
1173 | ||
da546e07 JR |
1174 | /* Execute batch. */ |
1175 | for (i = 0; i < n_ops; i++) { | |
1176 | opsp[i] = &ops[i]; | |
1177 | } | |
1178 | dpif_operate(udpif->dpif, opsp, n_ops); | |
1179 | ||
e79a6c83 EJ |
1180 | HMAP_FOR_EACH_SAFE (miss, next_miss, hmap_node, &misses) { |
1181 | hmap_remove(&misses, &miss->hmap_node); | |
1182 | xlate_out_uninit(&miss->xout); | |
1183 | } | |
1184 | hmap_destroy(&misses); | |
1185 | ||
1186 | LIST_FOR_EACH_SAFE (upcall, next, list_node, upcalls) { | |
1187 | list_remove(&upcall->list_node); | |
1188 | upcall_destroy(upcall); | |
1189 | } | |
1190 | } | |
1191 | ||
1192 | static struct udpif_key * | |
1193 | ukey_lookup(struct revalidator *revalidator, struct udpif_flow_dump *udump) | |
1194 | { | |
1195 | struct udpif_key *ukey; | |
1196 | ||
1197 | HMAP_FOR_EACH_WITH_HASH (ukey, hmap_node, udump->key_hash, | |
1198 | &revalidator->ukeys) { | |
1199 | if (ukey->key_len == udump->key_len | |
1200 | && !memcmp(ukey->key, udump->key, udump->key_len)) { | |
1201 | return ukey; | |
1202 | } | |
1203 | } | |
1204 | return NULL; | |
1205 | } | |
1206 | ||
1207 | static void | |
1208 | ukey_delete(struct revalidator *revalidator, struct udpif_key *ukey) | |
1209 | { | |
1210 | hmap_remove(&revalidator->ukeys, &ukey->hmap_node); | |
1211 | free(ukey); | |
1212 | } | |
1213 | ||
1214 | static bool | |
1215 | revalidate_ukey(struct udpif *udpif, struct udpif_flow_dump *udump, | |
1216 | struct udpif_key *ukey) | |
1217 | { | |
1218 | struct ofpbuf xout_actions, *actions; | |
1219 | uint64_t slow_path_buf[128 / 8]; | |
1220 | struct xlate_out xout, *xoutp; | |
1221 | struct flow flow, udump_mask; | |
1222 | struct ofproto_dpif *ofproto; | |
1223 | struct dpif_flow_stats push; | |
1224 | uint32_t *udump32, *xout32; | |
1225 | odp_port_t odp_in_port; | |
1226 | struct xlate_in xin; | |
1227 | int error; | |
1228 | size_t i; | |
1229 | bool ok; | |
1230 | ||
1231 | ok = false; | |
1232 | xoutp = NULL; | |
1233 | actions = NULL; | |
1234 | ||
1235 | /* If we don't need to revalidate, we can simply push the stats contained | |
1236 | * in the udump, otherwise we'll have to get the actions so we can check | |
1237 | * them. */ | |
1238 | if (udump->need_revalidate) { | |
1239 | if (dpif_flow_get(udpif->dpif, ukey->key, ukey->key_len, &actions, | |
1240 | &udump->stats)) { | |
1241 | goto exit; | |
1242 | } | |
1243 | } | |
1244 | ||
1245 | push.used = udump->stats.used; | |
1246 | push.tcp_flags = udump->stats.tcp_flags; | |
1247 | push.n_packets = udump->stats.n_packets > ukey->stats.n_packets | |
1248 | ? udump->stats.n_packets - ukey->stats.n_packets | |
1249 | : 0; | |
1250 | push.n_bytes = udump->stats.n_bytes > ukey->stats.n_bytes | |
1251 | ? udump->stats.n_bytes - ukey->stats.n_bytes | |
1252 | : 0; | |
1253 | ukey->stats = udump->stats; | |
1254 | ||
1255 | if (!push.n_packets && !udump->need_revalidate) { | |
1256 | ok = true; | |
1257 | goto exit; | |
1258 | } | |
1259 | ||
1260 | error = xlate_receive(udpif->backer, NULL, ukey->key, ukey->key_len, &flow, | |
1261 | NULL, &ofproto, NULL, NULL, NULL, &odp_in_port); | |
1262 | if (error) { | |
1263 | goto exit; | |
1264 | } | |
1265 | ||
1266 | xlate_in_init(&xin, ofproto, &flow, NULL, push.tcp_flags, NULL); | |
1267 | xin.resubmit_stats = push.n_packets ? &push : NULL; | |
1268 | xin.may_learn = push.n_packets > 0; | |
1269 | xin.skip_wildcards = !udump->need_revalidate; | |
1270 | xlate_actions(&xin, &xout); | |
1271 | xoutp = &xout; | |
ddeca9a4 | 1272 | |
e79a6c83 EJ |
1273 | if (!udump->need_revalidate) { |
1274 | ok = true; | |
1275 | goto exit; | |
1276 | } | |
1277 | ||
1278 | if (!xout.slow) { | |
1279 | ofpbuf_use_const(&xout_actions, xout.odp_actions.data, | |
1280 | xout.odp_actions.size); | |
05067881 | 1281 | } else { |
e79a6c83 EJ |
1282 | ofpbuf_use_stack(&xout_actions, slow_path_buf, sizeof slow_path_buf); |
1283 | compose_slow_path(udpif, &xout, odp_in_port, &xout_actions); | |
1284 | } | |
1285 | ||
1286 | if (!ofpbuf_equal(&xout_actions, actions)) { | |
1287 | goto exit; | |
1288 | } | |
1289 | ||
1290 | if (odp_flow_key_to_mask(udump->mask, udump->mask_len, &udump_mask, &flow) | |
1291 | == ODP_FIT_ERROR) { | |
1292 | goto exit; | |
1293 | } | |
1294 | ||
1295 | /* Since the kernel is free to ignore wildcarded bits in the mask, we can't | |
1296 | * directly check that the masks are the same. Instead we check that the | |
1297 | * mask in the kernel is more specific i.e. less wildcarded, than what | |
1298 | * we've calculated here. This guarantees we don't catch any packets we | |
1299 | * shouldn't with the megaflow. */ | |
1300 | udump32 = (uint32_t *) &udump_mask; | |
1301 | xout32 = (uint32_t *) &xout.wc.masks; | |
1302 | for (i = 0; i < FLOW_U32S; i++) { | |
1303 | if ((udump32[i] | xout32[i]) != udump32[i]) { | |
1304 | goto exit; | |
1305 | } | |
1306 | } | |
1307 | ok = true; | |
1308 | ||
1309 | exit: | |
1310 | ofpbuf_delete(actions); | |
1311 | xlate_out_uninit(xoutp); | |
1312 | return ok; | |
1313 | } | |
1314 | ||
1315 | static void | |
1316 | revalidate_udumps(struct revalidator *revalidator, struct list *udumps) | |
1317 | { | |
1318 | struct udpif *udpif = revalidator->udpif; | |
1319 | ||
1320 | struct { | |
1321 | struct dpif_flow_stats ukey_stats; /* Stats stored in the ukey. */ | |
1322 | struct dpif_flow_stats stats; /* Stats for 'op'. */ | |
1323 | struct dpif_op op; /* Flow del operation. */ | |
1324 | } ops[REVALIDATE_MAX_BATCH]; | |
1325 | ||
1326 | struct dpif_op *opsp[REVALIDATE_MAX_BATCH]; | |
1327 | struct udpif_flow_dump *udump, *next_udump; | |
1328 | size_t n_ops, i, n_flows; | |
1329 | unsigned int flow_limit; | |
1330 | long long int max_idle; | |
1331 | bool must_del; | |
1332 | ||
1333 | atomic_read(&udpif->max_idle, &max_idle); | |
1334 | atomic_read(&udpif->flow_limit, &flow_limit); | |
1335 | ||
1336 | n_flows = udpif_get_n_flows(udpif); | |
1337 | ||
1338 | must_del = false; | |
1339 | if (n_flows > flow_limit) { | |
1340 | must_del = n_flows > 2 * flow_limit; | |
1341 | max_idle = 100; | |
1342 | } | |
1343 | ||
1344 | n_ops = 0; | |
1345 | LIST_FOR_EACH_SAFE (udump, next_udump, list_node, udumps) { | |
1346 | long long int used, now; | |
1347 | struct udpif_key *ukey; | |
1348 | ||
1349 | now = time_msec(); | |
1350 | ukey = ukey_lookup(revalidator, udump); | |
1351 | ||
1352 | used = udump->stats.used; | |
1353 | if (!used && ukey) { | |
1354 | used = ukey->created; | |
1355 | } | |
1356 | ||
1357 | if (must_del || (used && used < now - max_idle)) { | |
1358 | struct dpif_flow_stats *ukey_stats = &ops[n_ops].ukey_stats; | |
1359 | struct dpif_op *op = &ops[n_ops].op; | |
1360 | ||
1361 | op->type = DPIF_OP_FLOW_DEL; | |
1362 | op->u.flow_del.key = udump->key; | |
1363 | op->u.flow_del.key_len = udump->key_len; | |
1364 | op->u.flow_del.stats = &ops[n_ops].stats; | |
1365 | n_ops++; | |
1366 | ||
1367 | if (ukey) { | |
1368 | *ukey_stats = ukey->stats; | |
1369 | ukey_delete(revalidator, ukey); | |
1370 | } else { | |
1371 | memset(ukey_stats, 0, sizeof *ukey_stats); | |
1372 | } | |
1373 | ||
1374 | continue; | |
1375 | } | |
1376 | ||
1377 | if (!ukey) { | |
1378 | ukey = xmalloc(sizeof *ukey); | |
1379 | ||
1380 | ukey->key = (struct nlattr *) &ukey->key_buf; | |
1381 | memcpy(ukey->key, udump->key, udump->key_len); | |
1382 | ukey->key_len = udump->key_len; | |
1383 | ||
1384 | ukey->created = used ? used : now; | |
1385 | memset(&ukey->stats, 0, sizeof ukey->stats); | |
1386 | ||
1387 | ukey->mark = false; | |
1388 | ||
1389 | hmap_insert(&revalidator->ukeys, &ukey->hmap_node, | |
1390 | udump->key_hash); | |
1391 | } | |
1392 | ukey->mark = true; | |
1393 | ||
1394 | if (!revalidate_ukey(udpif, udump, ukey)) { | |
1395 | dpif_flow_del(udpif->dpif, udump->key, udump->key_len, NULL); | |
1396 | ukey_delete(revalidator, ukey); | |
1397 | } | |
1398 | ||
1399 | list_remove(&udump->list_node); | |
1400 | free(udump); | |
1401 | } | |
1402 | ||
1403 | for (i = 0; i < n_ops; i++) { | |
1404 | opsp[i] = &ops[i].op; | |
1405 | } | |
1406 | dpif_operate(udpif->dpif, opsp, n_ops); | |
1407 | ||
1408 | for (i = 0; i < n_ops; i++) { | |
1409 | struct dpif_flow_stats push, *stats, *ukey_stats; | |
1410 | ||
1411 | ukey_stats = &ops[i].ukey_stats; | |
1412 | stats = ops[i].op.u.flow_del.stats; | |
1413 | push.used = MAX(stats->used, ukey_stats->used); | |
1414 | push.tcp_flags = stats->tcp_flags | ukey_stats->tcp_flags; | |
1415 | push.n_packets = stats->n_packets - ukey_stats->n_packets; | |
1416 | push.n_bytes = stats->n_bytes - ukey_stats->n_bytes; | |
1417 | ||
1418 | if (push.n_packets || netflow_exists()) { | |
1419 | struct ofproto_dpif *ofproto; | |
1420 | struct netflow *netflow; | |
1421 | struct flow flow; | |
1422 | ||
1423 | if (!xlate_receive(udpif->backer, NULL, ops[i].op.u.flow_del.key, | |
1424 | ops[i].op.u.flow_del.key_len, &flow, NULL, | |
1425 | &ofproto, NULL, NULL, &netflow, NULL)) { | |
1426 | struct xlate_in xin; | |
1427 | ||
1428 | xlate_in_init(&xin, ofproto, &flow, NULL, push.tcp_flags, | |
1429 | NULL); | |
1430 | xin.resubmit_stats = push.n_packets ? &push : NULL; | |
1431 | xin.may_learn = push.n_packets > 0; | |
1432 | xin.skip_wildcards = true; | |
1433 | xlate_actions_for_side_effects(&xin); | |
1434 | ||
1435 | if (netflow) { | |
1436 | netflow_expire(netflow, &flow); | |
1437 | netflow_flow_clear(netflow, &flow); | |
1438 | netflow_unref(netflow); | |
1439 | } | |
1440 | } | |
1441 | } | |
1442 | } | |
1443 | ||
1444 | LIST_FOR_EACH_SAFE (udump, next_udump, list_node, udumps) { | |
1445 | list_remove(&udump->list_node); | |
1446 | free(udump); | |
1447 | } | |
1448 | } | |
1449 | ||
1450 | static void | |
1451 | revalidator_sweep(struct revalidator *revalidator) | |
1452 | { | |
1453 | struct udpif_key *ukey, *next; | |
1454 | ||
1455 | HMAP_FOR_EACH_SAFE (ukey, next, hmap_node, &revalidator->ukeys) { | |
1456 | if (ukey->mark) { | |
1457 | ukey->mark = false; | |
1458 | } else { | |
1459 | ukey_delete(revalidator, ukey); | |
1460 | } | |
e1ec7dd4 EJ |
1461 | } |
1462 | } | |
e22d52ee EJ |
1463 | \f |
1464 | static void | |
1465 | upcall_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED, | |
1466 | const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED) | |
1467 | { | |
1468 | struct ds ds = DS_EMPTY_INITIALIZER; | |
1469 | struct udpif *udpif; | |
1470 | ||
1471 | LIST_FOR_EACH (udpif, list_node, &all_udpifs) { | |
e79a6c83 EJ |
1472 | unsigned int flow_limit; |
1473 | long long int max_idle; | |
e22d52ee EJ |
1474 | size_t i; |
1475 | ||
e79a6c83 EJ |
1476 | atomic_read(&udpif->flow_limit, &flow_limit); |
1477 | atomic_read(&udpif->max_idle, &max_idle); | |
1478 | ||
e22d52ee | 1479 | ds_put_format(&ds, "%s:\n", dpif_name(udpif->dpif)); |
e79a6c83 EJ |
1480 | ds_put_format(&ds, "\tflows : (current %"PRIu64")" |
1481 | " (avg %u) (max %u) (limit %u)\n", udpif_get_n_flows(udpif), | |
1482 | udpif->avg_n_flows, udpif->max_n_flows, flow_limit); | |
1483 | ds_put_format(&ds, "\tmax idle : %lldms\n", max_idle); | |
1484 | ds_put_format(&ds, "\tdump duration : %lldms\n", udpif->dump_duration); | |
1485 | ||
1486 | ds_put_char(&ds, '\n'); | |
e22d52ee EJ |
1487 | for (i = 0; i < udpif->n_handlers; i++) { |
1488 | struct handler *handler = &udpif->handlers[i]; | |
1489 | ||
1490 | ovs_mutex_lock(&handler->mutex); | |
1491 | ds_put_format(&ds, "\t%s: (upcall queue %"PRIuSIZE")\n", | |
1492 | handler->name, handler->n_upcalls); | |
1493 | ovs_mutex_unlock(&handler->mutex); | |
1494 | } | |
e79a6c83 EJ |
1495 | |
1496 | ds_put_char(&ds, '\n'); | |
1497 | for (i = 0; i < n_revalidators; i++) { | |
1498 | struct revalidator *revalidator = &udpif->revalidators[i]; | |
1499 | ||
1500 | /* XXX: The result of hmap_count(&revalidator->ukeys) may not be | |
1501 | * accurate because it's not protected by the revalidator mutex. */ | |
1502 | ovs_mutex_lock(&revalidator->mutex); | |
1503 | ds_put_format(&ds, "\t%s: (dump queue %"PRIuSIZE") (keys %"PRIuSIZE | |
1504 | ")\n", revalidator->name, revalidator->n_udumps, | |
1505 | hmap_count(&revalidator->ukeys)); | |
1506 | ovs_mutex_unlock(&revalidator->mutex); | |
1507 | } | |
e22d52ee EJ |
1508 | } |
1509 | ||
1510 | unixctl_command_reply(conn, ds_cstr(&ds)); | |
1511 | ds_destroy(&ds); | |
1512 | } | |
e79a6c83 EJ |
1513 | |
1514 | /* Disable using the megaflows. | |
1515 | * | |
1516 | * This command is only needed for advanced debugging, so it's not | |
1517 | * documented in the man page. */ | |
1518 | static void | |
1519 | upcall_unixctl_disable_megaflows(struct unixctl_conn *conn, | |
1520 | int argc OVS_UNUSED, | |
1521 | const char *argv[] OVS_UNUSED, | |
1522 | void *aux OVS_UNUSED) | |
1523 | { | |
1524 | atomic_store(&enable_megaflows, false); | |
1525 | udpif_flush(); | |
1526 | unixctl_command_reply(conn, "megaflows disabled"); | |
1527 | } | |
1528 | ||
1529 | /* Re-enable using megaflows. | |
1530 | * | |
1531 | * This command is only needed for advanced debugging, so it's not | |
1532 | * documented in the man page. */ | |
1533 | static void | |
1534 | upcall_unixctl_enable_megaflows(struct unixctl_conn *conn, | |
1535 | int argc OVS_UNUSED, | |
1536 | const char *argv[] OVS_UNUSED, | |
1537 | void *aux OVS_UNUSED) | |
1538 | { | |
1539 | atomic_store(&enable_megaflows, true); | |
1540 | udpif_flush(); | |
1541 | unixctl_command_reply(conn, "megaflows enabled"); | |
1542 | } |