]> git.proxmox.com Git - ovs.git/blame - lib/dpif.c
datapath-windows: Avoid BSOD when switch context is NULL
[ovs.git] / lib / dpif.c
CommitLineData
064af421 1/*
2f51a7eb 2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
064af421 3 *
a14bc59f
BP
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
064af421 7 *
a14bc59f
BP
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
064af421
BP
15 */
16
17#include <config.h>
96fba48f 18#include "dpif-provider.h"
064af421 19
064af421
BP
20#include <ctype.h>
21#include <errno.h>
064af421 22#include <inttypes.h>
064af421
BP
23#include <stdlib.h>
24#include <string.h>
064af421
BP
25
26#include "coverage.h"
fceef209 27#include "dpctl.h"
064af421
BP
28#include "dynamic-string.h"
29#include "flow.h"
c3827f61 30#include "netdev.h"
064af421 31#include "netlink.h"
7fd91025 32#include "odp-execute.h"
064af421 33#include "odp-util.h"
90bf1e07 34#include "ofp-errors.h"
064af421 35#include "ofp-print.h"
71ce9235 36#include "ofp-util.h"
064af421 37#include "ofpbuf.h"
91088554 38#include "packet-dpif.h"
064af421
BP
39#include "packets.h"
40#include "poll-loop.h"
999401aa 41#include "shash.h"
d0c23a1a 42#include "sset.h"
c97fb132 43#include "timeval.h"
a36de779
PS
44#include "tnl-arp-cache.h"
45#include "tnl-ports.h"
064af421
BP
46#include "util.h"
47#include "valgrind.h"
064af421 48#include "vlog.h"
5136ce49 49
d98e6007 50VLOG_DEFINE_THIS_MODULE(dpif);
064af421 51
d76f09ea
BP
52COVERAGE_DEFINE(dpif_destroy);
53COVERAGE_DEFINE(dpif_port_add);
54COVERAGE_DEFINE(dpif_port_del);
55COVERAGE_DEFINE(dpif_flow_flush);
56COVERAGE_DEFINE(dpif_flow_get);
57COVERAGE_DEFINE(dpif_flow_put);
58COVERAGE_DEFINE(dpif_flow_del);
d76f09ea
BP
59COVERAGE_DEFINE(dpif_execute);
60COVERAGE_DEFINE(dpif_purge);
7fd91025 61COVERAGE_DEFINE(dpif_execute_with_help);
d76f09ea 62
999401aa 63static const struct dpif_class *base_dpif_classes[] = {
93451a0a
AS
64#if defined(__linux__) || defined(_WIN32)
65 &dpif_netlink_class,
c83cdd30 66#endif
72865317 67 &dpif_netdev_class,
c228a364 68};
999401aa
JG
69
70struct registered_dpif_class {
d2d8fbeb 71 const struct dpif_class *dpif_class;
999401aa
JG
72 int refcount;
73};
74static struct shash dpif_classes = SHASH_INITIALIZER(&dpif_classes);
579a77e0 75static struct sset dpif_blacklist = SSET_INITIALIZER(&dpif_blacklist);
c228a364 76
5703b15f 77/* Protects 'dpif_classes', including the refcount, and 'dpif_blacklist'. */
97be1538 78static struct ovs_mutex dpif_mutex = OVS_MUTEX_INITIALIZER;
5703b15f 79
064af421
BP
80/* Rate limit for individual messages going to or from the datapath, output at
81 * DBG level. This is very high because, if these are enabled, it is because
82 * we really need to see them. */
83static struct vlog_rate_limit dpmsg_rl = VLOG_RATE_LIMIT_INIT(600, 600);
84
85/* Not really much point in logging many dpif errors. */
e2781405 86static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
064af421 87
feebdea2
BP
88static void log_flow_message(const struct dpif *dpif, int error,
89 const char *operation,
90 const struct nlattr *key, size_t key_len,
61fb711d 91 const struct nlattr *mask, size_t mask_len,
c97fb132 92 const struct dpif_flow_stats *stats,
feebdea2 93 const struct nlattr *actions, size_t actions_len);
96fba48f
BP
94static void log_operation(const struct dpif *, const char *operation,
95 int error);
96fba48f 96static bool should_log_flow_message(int error);
89625d1e
BP
97static void log_flow_put_message(struct dpif *, const struct dpif_flow_put *,
98 int error);
b99d3cee
BP
99static void log_flow_del_message(struct dpif *, const struct dpif_flow_del *,
100 int error);
89625d1e 101static void log_execute_message(struct dpif *, const struct dpif_execute *,
fc65bafc 102 bool subexecute, int error);
6fe09f8c
JS
103static void log_flow_get_message(const struct dpif *,
104 const struct dpif_flow_get *, int error);
064af421 105
999401aa
JG
106static void
107dp_initialize(void)
108{
eb8ed438 109 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
999401aa 110
eb8ed438 111 if (ovsthread_once_start(&once)) {
999401aa
JG
112 int i;
113
999401aa
JG
114 for (i = 0; i < ARRAY_SIZE(base_dpif_classes); i++) {
115 dp_register_provider(base_dpif_classes[i]);
116 }
fceef209 117 dpctl_unixctl_register();
eb8ed438 118 ovsthread_once_done(&once);
a36de779
PS
119 tnl_port_map_init();
120 tnl_arp_cache_init();
999401aa
JG
121 }
122}
123
5703b15f
BP
124static int
125dp_register_provider__(const struct dpif_class *new_class)
999401aa
JG
126{
127 struct registered_dpif_class *registered_class;
128
579a77e0
EJ
129 if (sset_contains(&dpif_blacklist, new_class->type)) {
130 VLOG_DBG("attempted to register blacklisted provider: %s",
131 new_class->type);
132 return EINVAL;
133 }
134
999401aa
JG
135 if (shash_find(&dpif_classes, new_class->type)) {
136 VLOG_WARN("attempted to register duplicate datapath provider: %s",
137 new_class->type);
138 return EEXIST;
139 }
1a6f1e2a 140
999401aa 141 registered_class = xmalloc(sizeof *registered_class);
d2d8fbeb 142 registered_class->dpif_class = new_class;
999401aa
JG
143 registered_class->refcount = 0;
144
145 shash_add(&dpif_classes, new_class->type, registered_class);
146
147 return 0;
148}
149
5703b15f
BP
150/* Registers a new datapath provider. After successful registration, new
151 * datapaths of that type can be opened using dpif_open(). */
152int
153dp_register_provider(const struct dpif_class *new_class)
154{
155 int error;
156
97be1538 157 ovs_mutex_lock(&dpif_mutex);
5703b15f 158 error = dp_register_provider__(new_class);
97be1538 159 ovs_mutex_unlock(&dpif_mutex);
5703b15f
BP
160
161 return error;
162}
163
999401aa
JG
164/* Unregisters a datapath provider. 'type' must have been previously
165 * registered and not currently be in use by any dpifs. After unregistration
166 * new datapaths of that type cannot be opened using dpif_open(). */
5703b15f
BP
167static int
168dp_unregister_provider__(const char *type)
999401aa
JG
169{
170 struct shash_node *node;
171 struct registered_dpif_class *registered_class;
172
173 node = shash_find(&dpif_classes, type);
174 if (!node) {
175 VLOG_WARN("attempted to unregister a datapath provider that is not "
176 "registered: %s", type);
177 return EAFNOSUPPORT;
178 }
179
180 registered_class = node->data;
181 if (registered_class->refcount) {
182 VLOG_WARN("attempted to unregister in use datapath provider: %s", type);
183 return EBUSY;
184 }
185
186 shash_delete(&dpif_classes, node);
187 free(registered_class);
188
189 return 0;
190}
191
5703b15f
BP
192/* Unregisters a datapath provider. 'type' must have been previously
193 * registered and not currently be in use by any dpifs. After unregistration
194 * new datapaths of that type cannot be opened using dpif_open(). */
195int
196dp_unregister_provider(const char *type)
197{
198 int error;
199
200 dp_initialize();
201
97be1538 202 ovs_mutex_lock(&dpif_mutex);
5703b15f 203 error = dp_unregister_provider__(type);
97be1538 204 ovs_mutex_unlock(&dpif_mutex);
5703b15f
BP
205
206 return error;
207}
208
579a77e0
EJ
209/* Blacklists a provider. Causes future calls of dp_register_provider() with
210 * a dpif_class which implements 'type' to fail. */
211void
212dp_blacklist_provider(const char *type)
213{
97be1538 214 ovs_mutex_lock(&dpif_mutex);
579a77e0 215 sset_add(&dpif_blacklist, type);
97be1538 216 ovs_mutex_unlock(&dpif_mutex);
579a77e0
EJ
217}
218
999401aa 219/* Clears 'types' and enumerates the types of all currently registered datapath
d0c23a1a 220 * providers into it. The caller must first initialize the sset. */
1a6f1e2a 221void
d0c23a1a 222dp_enumerate_types(struct sset *types)
1a6f1e2a 223{
999401aa 224 struct shash_node *node;
1a6f1e2a 225
999401aa 226 dp_initialize();
d0c23a1a 227 sset_clear(types);
1a6f1e2a 228
97be1538 229 ovs_mutex_lock(&dpif_mutex);
999401aa
JG
230 SHASH_FOR_EACH(node, &dpif_classes) {
231 const struct registered_dpif_class *registered_class = node->data;
d0c23a1a 232 sset_add(types, registered_class->dpif_class->type);
1a6f1e2a 233 }
97be1538 234 ovs_mutex_unlock(&dpif_mutex);
5703b15f
BP
235}
236
237static void
238dp_class_unref(struct registered_dpif_class *rc)
239{
97be1538 240 ovs_mutex_lock(&dpif_mutex);
5703b15f
BP
241 ovs_assert(rc->refcount);
242 rc->refcount--;
97be1538 243 ovs_mutex_unlock(&dpif_mutex);
5703b15f
BP
244}
245
246static struct registered_dpif_class *
247dp_class_lookup(const char *type)
248{
249 struct registered_dpif_class *rc;
250
97be1538 251 ovs_mutex_lock(&dpif_mutex);
5703b15f
BP
252 rc = shash_find_data(&dpif_classes, type);
253 if (rc) {
254 rc->refcount++;
255 }
97be1538 256 ovs_mutex_unlock(&dpif_mutex);
5703b15f
BP
257
258 return rc;
1a6f1e2a
JG
259}
260
261/* Clears 'names' and enumerates the names of all known created datapaths with
d0c23a1a 262 * the given 'type'. The caller must first initialize the sset. Returns 0 if
1a6f1e2a 263 * successful, otherwise a positive errno value.
d3d22744
BP
264 *
265 * Some kinds of datapaths might not be practically enumerable. This is not
266 * considered an error. */
267int
d0c23a1a 268dp_enumerate_names(const char *type, struct sset *names)
d3d22744 269{
5703b15f 270 struct registered_dpif_class *registered_class;
999401aa
JG
271 const struct dpif_class *dpif_class;
272 int error;
d3d22744 273
999401aa 274 dp_initialize();
d0c23a1a 275 sset_clear(names);
1a6f1e2a 276
5703b15f 277 registered_class = dp_class_lookup(type);
999401aa
JG
278 if (!registered_class) {
279 VLOG_WARN("could not enumerate unknown type: %s", type);
280 return EAFNOSUPPORT;
281 }
1a6f1e2a 282
d2d8fbeb 283 dpif_class = registered_class->dpif_class;
2240af25
DDP
284 error = (dpif_class->enumerate
285 ? dpif_class->enumerate(names, dpif_class)
286 : 0);
999401aa
JG
287 if (error) {
288 VLOG_WARN("failed to enumerate %s datapaths: %s", dpif_class->type,
10a89ef0 289 ovs_strerror(error));
d3d22744 290 }
5703b15f 291 dp_class_unref(registered_class);
1a6f1e2a 292
999401aa 293 return error;
1a6f1e2a
JG
294}
295
54ed8a5d
BP
296/* Parses 'datapath_name_', which is of the form [type@]name into its
297 * component pieces. 'name' and 'type' must be freed by the caller.
298 *
299 * The returned 'type' is normalized, as if by dpif_normalize_type(). */
1a6f1e2a
JG
300void
301dp_parse_name(const char *datapath_name_, char **name, char **type)
302{
303 char *datapath_name = xstrdup(datapath_name_);
304 char *separator;
305
306 separator = strchr(datapath_name, '@');
307 if (separator) {
308 *separator = '\0';
309 *type = datapath_name;
54ed8a5d 310 *name = xstrdup(dpif_normalize_type(separator + 1));
1a6f1e2a
JG
311 } else {
312 *name = datapath_name;
54ed8a5d 313 *type = xstrdup(dpif_normalize_type(NULL));
1a6f1e2a 314 }
d3d22744
BP
315}
316
96fba48f 317static int
1a6f1e2a 318do_open(const char *name, const char *type, bool create, struct dpif **dpifp)
064af421 319{
96fba48f 320 struct dpif *dpif = NULL;
064af421 321 int error;
999401aa
JG
322 struct registered_dpif_class *registered_class;
323
324 dp_initialize();
064af421 325
3a225db7 326 type = dpif_normalize_type(type);
5703b15f 327 registered_class = dp_class_lookup(type);
999401aa
JG
328 if (!registered_class) {
329 VLOG_WARN("could not create datapath %s of unknown type %s", name,
330 type);
331 error = EAFNOSUPPORT;
332 goto exit;
333 }
334
4a387741
BP
335 error = registered_class->dpif_class->open(registered_class->dpif_class,
336 name, create, &dpif);
999401aa 337 if (!error) {
cb22974d 338 ovs_assert(dpif->dpif_class == registered_class->dpif_class);
5703b15f
BP
339 } else {
340 dp_class_unref(registered_class);
064af421 341 }
064af421 342
96fba48f
BP
343exit:
344 *dpifp = error ? NULL : dpif;
345 return error;
064af421
BP
346}
347
1a6f1e2a
JG
348/* Tries to open an existing datapath named 'name' and type 'type'. Will fail
349 * if no datapath with 'name' and 'type' exists. 'type' may be either NULL or
350 * the empty string to specify the default system type. Returns 0 if
351 * successful, otherwise a positive errno value. On success stores a pointer
352 * to the datapath in '*dpifp', otherwise a null pointer. */
96fba48f 353int
1a6f1e2a 354dpif_open(const char *name, const char *type, struct dpif **dpifp)
064af421 355{
1a6f1e2a 356 return do_open(name, type, false, dpifp);
064af421
BP
357}
358
1a6f1e2a
JG
359/* Tries to create and open a new datapath with the given 'name' and 'type'.
360 * 'type' may be either NULL or the empty string to specify the default system
361 * type. Will fail if a datapath with 'name' and 'type' already exists.
362 * Returns 0 if successful, otherwise a positive errno value. On success
363 * stores a pointer to the datapath in '*dpifp', otherwise a null pointer. */
064af421 364int
1a6f1e2a 365dpif_create(const char *name, const char *type, struct dpif **dpifp)
064af421 366{
1a6f1e2a 367 return do_open(name, type, true, dpifp);
96fba48f 368}
064af421 369
1a6f1e2a
JG
370/* Tries to open a datapath with the given 'name' and 'type', creating it if it
371 * does not exist. 'type' may be either NULL or the empty string to specify
372 * the default system type. Returns 0 if successful, otherwise a positive
373 * errno value. On success stores a pointer to the datapath in '*dpifp',
374 * otherwise a null pointer. */
efacbce6 375int
1a6f1e2a 376dpif_create_and_open(const char *name, const char *type, struct dpif **dpifp)
efacbce6
BP
377{
378 int error;
379
1a6f1e2a 380 error = dpif_create(name, type, dpifp);
efacbce6 381 if (error == EEXIST || error == EBUSY) {
1a6f1e2a 382 error = dpif_open(name, type, dpifp);
efacbce6
BP
383 if (error) {
384 VLOG_WARN("datapath %s already exists but cannot be opened: %s",
10a89ef0 385 name, ovs_strerror(error));
efacbce6
BP
386 }
387 } else if (error) {
10a89ef0
BP
388 VLOG_WARN("failed to create datapath %s: %s",
389 name, ovs_strerror(error));
efacbce6
BP
390 }
391 return error;
392}
393
96fba48f
BP
394/* Closes and frees the connection to 'dpif'. Does not destroy the datapath
395 * itself; call dpif_delete() first, instead, if that is desirable. */
396void
397dpif_close(struct dpif *dpif)
398{
399 if (dpif) {
5703b15f 400 struct registered_dpif_class *rc;
999401aa 401
5703b15f 402 rc = shash_find_data(&dpif_classes, dpif->dpif_class->type);
999401aa 403 dpif_uninit(dpif, true);
5703b15f 404 dp_class_unref(rc);
064af421
BP
405 }
406}
407
640e1b20 408/* Performs periodic work needed by 'dpif'. */
a36de779 409bool
640e1b20
BP
410dpif_run(struct dpif *dpif)
411{
412 if (dpif->dpif_class->run) {
a36de779 413 return dpif->dpif_class->run(dpif);
640e1b20 414 }
a36de779 415 return false;
640e1b20
BP
416}
417
418/* Arranges for poll_block() to wake up when dp_run() needs to be called for
419 * 'dpif'. */
420void
421dpif_wait(struct dpif *dpif)
422{
423 if (dpif->dpif_class->wait) {
424 dpif->dpif_class->wait(dpif);
425 }
426}
427
1a6f1e2a
JG
428/* Returns the name of datapath 'dpif' prefixed with the type
429 * (for use in log messages). */
b29ba128
BP
430const char *
431dpif_name(const struct dpif *dpif)
432{
1a6f1e2a
JG
433 return dpif->full_name;
434}
435
436/* Returns the name of datapath 'dpif' without the type
437 * (for use in device names). */
438const char *
439dpif_base_name(const struct dpif *dpif)
440{
441 return dpif->base_name;
b29ba128
BP
442}
443
c7a26215
JP
444/* Returns the type of datapath 'dpif'. */
445const char *
446dpif_type(const struct dpif *dpif)
447{
448 return dpif->dpif_class->type;
449}
450
3a225db7
BP
451/* Returns the fully spelled out name for the given datapath 'type'.
452 *
453 * Normalized type string can be compared with strcmp(). Unnormalized type
454 * string might be the same even if they have different spellings. */
455const char *
456dpif_normalize_type(const char *type)
457{
458 return type && type[0] ? type : "system";
459}
460
96fba48f
BP
461/* Destroys the datapath that 'dpif' is connected to, first removing all of its
462 * ports. After calling this function, it does not make sense to pass 'dpif'
463 * to any functions other than dpif_name() or dpif_close(). */
064af421
BP
464int
465dpif_delete(struct dpif *dpif)
466{
96fba48f
BP
467 int error;
468
064af421 469 COVERAGE_INC(dpif_destroy);
96fba48f 470
1acb6baa 471 error = dpif->dpif_class->destroy(dpif);
96fba48f
BP
472 log_operation(dpif, "delete", error);
473 return error;
064af421
BP
474}
475
96fba48f
BP
476/* Retrieves statistics for 'dpif' into 'stats'. Returns 0 if successful,
477 * otherwise a positive errno value. */
064af421 478int
a8d9304d 479dpif_get_dp_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
064af421 480{
1acb6baa 481 int error = dpif->dpif_class->get_stats(dpif, stats);
96fba48f
BP
482 if (error) {
483 memset(stats, 0, sizeof *stats);
484 }
485 log_operation(dpif, "get_stats", error);
486 return error;
064af421
BP
487}
488
0aeaabc8
JP
489const char *
490dpif_port_open_type(const char *datapath_type, const char *port_type)
491{
5703b15f 492 struct registered_dpif_class *rc;
0aeaabc8
JP
493
494 datapath_type = dpif_normalize_type(datapath_type);
495
97be1538 496 ovs_mutex_lock(&dpif_mutex);
5703b15f
BP
497 rc = shash_find_data(&dpif_classes, datapath_type);
498 if (rc && rc->dpif_class->port_open_type) {
499 port_type = rc->dpif_class->port_open_type(rc->dpif_class, port_type);
0aeaabc8 500 }
97be1538 501 ovs_mutex_unlock(&dpif_mutex);
0aeaabc8 502
5703b15f 503 return port_type;
0aeaabc8
JP
504}
505
232dfa4a 506/* Attempts to add 'netdev' as a port on 'dpif'. If 'port_nop' is
4e022ec0 507 * non-null and its value is not ODPP_NONE, then attempts to use the
232dfa4a
JP
508 * value as the port number.
509 *
510 * If successful, returns 0 and sets '*port_nop' to the new port's port
511 * number (if 'port_nop' is non-null). On failure, returns a positive
4e022ec0 512 * errno value and sets '*port_nop' to ODPP_NONE (if 'port_nop' is
232dfa4a 513 * non-null). */
064af421 514int
4e022ec0 515dpif_port_add(struct dpif *dpif, struct netdev *netdev, odp_port_t *port_nop)
064af421 516{
c3827f61 517 const char *netdev_name = netdev_get_name(netdev);
4e022ec0 518 odp_port_t port_no = ODPP_NONE;
9ee3ae3e 519 int error;
064af421
BP
520
521 COVERAGE_INC(dpif_port_add);
9ee3ae3e 522
232dfa4a
JP
523 if (port_nop) {
524 port_no = *port_nop;
525 }
526
c3827f61 527 error = dpif->dpif_class->port_add(dpif, netdev, &port_no);
9ee3ae3e 528 if (!error) {
9b56fe13 529 VLOG_DBG_RL(&dpmsg_rl, "%s: added %s as port %"PRIu32,
c3827f61 530 dpif_name(dpif), netdev_name, port_no);
064af421 531 } else {
9ee3ae3e 532 VLOG_WARN_RL(&error_rl, "%s: failed to add %s as port: %s",
10a89ef0 533 dpif_name(dpif), netdev_name, ovs_strerror(error));
4e022ec0 534 port_no = ODPP_NONE;
9ee3ae3e
BP
535 }
536 if (port_nop) {
537 *port_nop = port_no;
064af421 538 }
9ee3ae3e 539 return error;
064af421
BP
540}
541
96fba48f
BP
542/* Attempts to remove 'dpif''s port number 'port_no'. Returns 0 if successful,
543 * otherwise a positive errno value. */
064af421 544int
4e022ec0 545dpif_port_del(struct dpif *dpif, odp_port_t port_no)
064af421 546{
96fba48f
BP
547 int error;
548
064af421 549 COVERAGE_INC(dpif_port_del);
96fba48f 550
1acb6baa 551 error = dpif->dpif_class->port_del(dpif, port_no);
a1811296 552 if (!error) {
9b56fe13 553 VLOG_DBG_RL(&dpmsg_rl, "%s: port_del(%"PRIu32")",
a1811296
BP
554 dpif_name(dpif), port_no);
555 } else {
556 log_operation(dpif, "port_del", error);
557 }
96fba48f 558 return error;
064af421
BP
559}
560
4c738a8d
BP
561/* Makes a deep copy of 'src' into 'dst'. */
562void
563dpif_port_clone(struct dpif_port *dst, const struct dpif_port *src)
564{
565 dst->name = xstrdup(src->name);
566 dst->type = xstrdup(src->type);
567 dst->port_no = src->port_no;
568}
569
570/* Frees memory allocated to members of 'dpif_port'.
571 *
572 * Do not call this function on a dpif_port obtained from
573 * dpif_port_dump_next(): that function retains ownership of the data in the
574 * dpif_port. */
575void
576dpif_port_destroy(struct dpif_port *dpif_port)
577{
578 free(dpif_port->name);
579 free(dpif_port->type);
580}
581
4afba28d
JP
582/* Checks if port named 'devname' exists in 'dpif'. If so, returns
583 * true; otherwise, returns false. */
584bool
585dpif_port_exists(const struct dpif *dpif, const char *devname)
586{
587 int error = dpif->dpif_class->port_query_by_name(dpif, devname, NULL);
bee6b8bc 588 if (error != 0 && error != ENOENT && error != ENODEV) {
4afba28d 589 VLOG_WARN_RL(&error_rl, "%s: failed to query port %s: %s",
10a89ef0 590 dpif_name(dpif), devname, ovs_strerror(error));
4afba28d
JP
591 }
592
593 return !error;
594}
595
96fba48f
BP
596/* Looks up port number 'port_no' in 'dpif'. On success, returns 0 and
597 * initializes '*port' appropriately; on failure, returns a positive errno
4c738a8d
BP
598 * value.
599 *
600 * The caller owns the data in 'port' and must free it with
601 * dpif_port_destroy() when it is no longer needed. */
064af421 602int
4e022ec0 603dpif_port_query_by_number(const struct dpif *dpif, odp_port_t port_no,
4c738a8d 604 struct dpif_port *port)
064af421 605{
1acb6baa 606 int error = dpif->dpif_class->port_query_by_number(dpif, port_no, port);
96fba48f 607 if (!error) {
9b56fe13 608 VLOG_DBG_RL(&dpmsg_rl, "%s: port %"PRIu32" is device %s",
4c738a8d 609 dpif_name(dpif), port_no, port->name);
064af421 610 } else {
96fba48f 611 memset(port, 0, sizeof *port);
9b56fe13 612 VLOG_WARN_RL(&error_rl, "%s: failed to query port %"PRIu32": %s",
10a89ef0 613 dpif_name(dpif), port_no, ovs_strerror(error));
064af421 614 }
96fba48f 615 return error;
064af421
BP
616}
617
96fba48f
BP
618/* Looks up port named 'devname' in 'dpif'. On success, returns 0 and
619 * initializes '*port' appropriately; on failure, returns a positive errno
4c738a8d
BP
620 * value.
621 *
622 * The caller owns the data in 'port' and must free it with
623 * dpif_port_destroy() when it is no longer needed. */
064af421
BP
624int
625dpif_port_query_by_name(const struct dpif *dpif, const char *devname,
4c738a8d 626 struct dpif_port *port)
064af421 627{
1acb6baa 628 int error = dpif->dpif_class->port_query_by_name(dpif, devname, port);
96fba48f 629 if (!error) {
9b56fe13 630 VLOG_DBG_RL(&dpmsg_rl, "%s: device %s is on port %"PRIu32,
4c738a8d 631 dpif_name(dpif), devname, port->port_no);
064af421 632 } else {
96fba48f
BP
633 memset(port, 0, sizeof *port);
634
d647f0a7
BP
635 /* For ENOENT or ENODEV we use DBG level because the caller is probably
636 * interested in whether 'dpif' actually has a port 'devname', so that
637 * it's not an issue worth logging if it doesn't. Other errors are
638 * uncommon and more likely to indicate a real problem. */
639 VLOG_RL(&error_rl,
640 error == ENOENT || error == ENODEV ? VLL_DBG : VLL_WARN,
641 "%s: failed to query port %s: %s",
10a89ef0 642 dpif_name(dpif), devname, ovs_strerror(error));
064af421 643 }
96fba48f 644 return error;
064af421
BP
645}
646
1954e6bb
AW
647/* Returns the Netlink PID value to supply in OVS_ACTION_ATTR_USERSPACE
648 * actions as the OVS_USERSPACE_ATTR_PID attribute's value, for use in
649 * flows whose packets arrived on port 'port_no'. In the case where the
650 * provider allocates multiple Netlink PIDs to a single port, it may use
651 * 'hash' to spread load among them. The caller need not use a particular
652 * hash function; a 5-tuple hash is suitable.
653 *
654 * (The datapath implementation might use some different hash function for
655 * distributing packets received via flow misses among PIDs. This means
656 * that packets received via flow misses might be reordered relative to
657 * packets received via userspace actions. This is not ordinarily a
658 * problem.)
98403001 659 *
4e022ec0 660 * A 'port_no' of ODPP_NONE is a special case: it returns a reserved PID, not
625b0720
BP
661 * allocated to any port, that the client may use for special purposes.
662 *
98403001
BP
663 * The return value is only meaningful when DPIF_UC_ACTION has been enabled in
664 * the 'dpif''s listen mask. It is allowed to change when DPIF_UC_ACTION is
665 * disabled and then re-enabled, so a client that does that must be prepared to
666 * update all of the flows that it installed that contain
667 * OVS_ACTION_ATTR_USERSPACE actions. */
668uint32_t
1954e6bb 669dpif_port_get_pid(const struct dpif *dpif, odp_port_t port_no, uint32_t hash)
98403001
BP
670{
671 return (dpif->dpif_class->port_get_pid
1954e6bb 672 ? (dpif->dpif_class->port_get_pid)(dpif, port_no, hash)
98403001
BP
673 : 0);
674}
675
96fba48f
BP
676/* Looks up port number 'port_no' in 'dpif'. On success, returns 0 and copies
677 * the port's name into the 'name_size' bytes in 'name', ensuring that the
678 * result is null-terminated. On failure, returns a positive errno value and
679 * makes 'name' the empty string. */
335562c0 680int
4e022ec0 681dpif_port_get_name(struct dpif *dpif, odp_port_t port_no,
335562c0
BP
682 char *name, size_t name_size)
683{
4c738a8d 684 struct dpif_port port;
335562c0
BP
685 int error;
686
cb22974d 687 ovs_assert(name_size > 0);
335562c0
BP
688
689 error = dpif_port_query_by_number(dpif, port_no, &port);
690 if (!error) {
4c738a8d
BP
691 ovs_strlcpy(name, port.name, name_size);
692 dpif_port_destroy(&port);
335562c0
BP
693 } else {
694 *name = '\0';
695 }
696 return error;
697}
698
b0ec0f27 699/* Initializes 'dump' to begin dumping the ports in a dpif.
96fba48f 700 *
b0ec0f27
BP
701 * This function provides no status indication. An error status for the entire
702 * dump operation is provided when it is completed by calling
703 * dpif_port_dump_done().
704 */
705void
706dpif_port_dump_start(struct dpif_port_dump *dump, const struct dpif *dpif)
707{
708 dump->dpif = dpif;
709 dump->error = dpif->dpif_class->port_dump_start(dpif, &dump->state);
710 log_operation(dpif, "port_dump_start", dump->error);
711}
712
713/* Attempts to retrieve another port from 'dump', which must have been
4c738a8d 714 * initialized with dpif_port_dump_start(). On success, stores a new dpif_port
b0ec0f27 715 * into 'port' and returns true. On failure, returns false.
96fba48f 716 *
b0ec0f27
BP
717 * Failure might indicate an actual error or merely that the last port has been
718 * dumped. An error status for the entire dump operation is provided when it
4c738a8d
BP
719 * is completed by calling dpif_port_dump_done().
720 *
721 * The dpif owns the data stored in 'port'. It will remain valid until at
722 * least the next time 'dump' is passed to dpif_port_dump_next() or
723 * dpif_port_dump_done(). */
b0ec0f27 724bool
4c738a8d 725dpif_port_dump_next(struct dpif_port_dump *dump, struct dpif_port *port)
064af421 726{
b0ec0f27 727 const struct dpif *dpif = dump->dpif;
064af421 728
b0ec0f27
BP
729 if (dump->error) {
730 return false;
731 }
f4ba4c4f 732
b0ec0f27
BP
733 dump->error = dpif->dpif_class->port_dump_next(dpif, dump->state, port);
734 if (dump->error == EOF) {
735 VLOG_DBG_RL(&dpmsg_rl, "%s: dumped all ports", dpif_name(dpif));
736 } else {
737 log_operation(dpif, "port_dump_next", dump->error);
738 }
064af421 739
b0ec0f27
BP
740 if (dump->error) {
741 dpif->dpif_class->port_dump_done(dpif, dump->state);
742 return false;
f4ba4c4f 743 }
b0ec0f27
BP
744 return true;
745}
064af421 746
b0ec0f27
BP
747/* Completes port table dump operation 'dump', which must have been initialized
748 * with dpif_port_dump_start(). Returns 0 if the dump operation was
749 * error-free, otherwise a positive errno value describing the problem. */
750int
751dpif_port_dump_done(struct dpif_port_dump *dump)
752{
753 const struct dpif *dpif = dump->dpif;
754 if (!dump->error) {
755 dump->error = dpif->dpif_class->port_dump_done(dpif, dump->state);
756 log_operation(dpif, "port_dump_done", dump->error);
f4ba4c4f 757 }
b0ec0f27 758 return dump->error == EOF ? 0 : dump->error;
064af421
BP
759}
760
e9e28be3
BP
761/* Polls for changes in the set of ports in 'dpif'. If the set of ports in
762 * 'dpif' has changed, this function does one of the following:
763 *
764 * - Stores the name of the device that was added to or deleted from 'dpif' in
765 * '*devnamep' and returns 0. The caller is responsible for freeing
766 * '*devnamep' (with free()) when it no longer needs it.
767 *
768 * - Returns ENOBUFS and sets '*devnamep' to NULL.
769 *
770 * This function may also return 'false positives', where it returns 0 and
771 * '*devnamep' names a device that was not actually added or deleted or it
772 * returns ENOBUFS without any change.
773 *
774 * Returns EAGAIN if the set of ports in 'dpif' has not changed. May also
775 * return other positive errno values to indicate that something has gone
776 * wrong. */
777int
778dpif_port_poll(const struct dpif *dpif, char **devnamep)
779{
1acb6baa 780 int error = dpif->dpif_class->port_poll(dpif, devnamep);
e9e28be3
BP
781 if (error) {
782 *devnamep = NULL;
783 }
784 return error;
785}
786
787/* Arranges for the poll loop to wake up when port_poll(dpif) will return a
788 * value other than EAGAIN. */
789void
790dpif_port_poll_wait(const struct dpif *dpif)
791{
1acb6baa 792 dpif->dpif_class->port_poll_wait(dpif);
e9e28be3
BP
793}
794
572b7068 795/* Extracts the flow stats for a packet. The 'flow' and 'packet'
a7752d4a
BP
796 * arguments must have been initialized through a call to flow_extract().
797 * 'used' is stored into stats->used. */
572b7068 798void
a39edbd4 799dpif_flow_stats_extract(const struct flow *flow, const struct ofpbuf *packet,
a7752d4a 800 long long int used, struct dpif_flow_stats *stats)
572b7068 801{
e0eecb1c 802 stats->tcp_flags = ntohs(flow->tcp_flags);
1f317cb5 803 stats->n_bytes = ofpbuf_size(packet);
572b7068 804 stats->n_packets = 1;
a7752d4a 805 stats->used = used;
572b7068
BP
806}
807
c97fb132
BP
808/* Appends a human-readable representation of 'stats' to 's'. */
809void
810dpif_flow_stats_format(const struct dpif_flow_stats *stats, struct ds *s)
811{
812 ds_put_format(s, "packets:%"PRIu64", bytes:%"PRIu64", used:",
813 stats->n_packets, stats->n_bytes);
814 if (stats->used) {
815 ds_put_format(s, "%.3fs", (time_msec() - stats->used) / 1000.0);
816 } else {
817 ds_put_format(s, "never");
818 }
7393104d
BP
819 if (stats->tcp_flags) {
820 ds_put_cstr(s, ", flags:");
821 packet_format_tcp_flags(s, stats->tcp_flags);
822 }
c97fb132
BP
823}
824
96fba48f
BP
825/* Deletes all flows from 'dpif'. Returns 0 if successful, otherwise a
826 * positive errno value. */
827int
828dpif_flow_flush(struct dpif *dpif)
064af421 829{
96fba48f
BP
830 int error;
831
832 COVERAGE_INC(dpif_flow_flush);
833
1acb6baa 834 error = dpif->dpif_class->flow_flush(dpif);
96fba48f
BP
835 log_operation(dpif, "flow_flush", error);
836 return error;
064af421
BP
837}
838
6fe09f8c 839/* A dpif_operate() wrapper for performing a single DPIF_OP_FLOW_GET. */
96fba48f 840int
6fe09f8c 841dpif_flow_get(struct dpif *dpif,
feebdea2 842 const struct nlattr *key, size_t key_len,
6fe09f8c 843 struct ofpbuf *buf, struct dpif_flow *flow)
064af421 844{
6fe09f8c
JS
845 struct dpif_op *opp;
846 struct dpif_op op;
96fba48f 847
6fe09f8c
JS
848 op.type = DPIF_OP_FLOW_GET;
849 op.u.flow_get.key = key;
850 op.u.flow_get.key_len = key_len;
851 op.u.flow_get.buffer = buf;
852 op.u.flow_get.flow = flow;
853 op.u.flow_get.flow->key = key;
854 op.u.flow_get.flow->key_len = key_len;
96fba48f 855
6fe09f8c
JS
856 opp = &op;
857 dpif_operate(dpif, &opp, 1);
858
859 return op.error;
064af421
BP
860}
861
1a0c894a 862/* A dpif_operate() wrapper for performing a single DPIF_OP_FLOW_PUT. */
064af421 863int
ba25b8f4 864dpif_flow_put(struct dpif *dpif, enum dpif_flow_put_flags flags,
feebdea2 865 const struct nlattr *key, size_t key_len,
e6cc0bab 866 const struct nlattr *mask, size_t mask_len,
feebdea2 867 const struct nlattr *actions, size_t actions_len,
c97fb132 868 struct dpif_flow_stats *stats)
064af421 869{
1a0c894a
BP
870 struct dpif_op *opp;
871 struct dpif_op op;
872
873 op.type = DPIF_OP_FLOW_PUT;
874 op.u.flow_put.flags = flags;
875 op.u.flow_put.key = key;
876 op.u.flow_put.key_len = key_len;
877 op.u.flow_put.mask = mask;
878 op.u.flow_put.mask_len = mask_len;
879 op.u.flow_put.actions = actions;
880 op.u.flow_put.actions_len = actions_len;
881 op.u.flow_put.stats = stats;
882
883 opp = &op;
884 dpif_operate(dpif, &opp, 1);
885
886 return op.error;
064af421
BP
887}
888
1a0c894a 889/* A dpif_operate() wrapper for performing a single DPIF_OP_FLOW_DEL. */
064af421 890int
feebdea2
BP
891dpif_flow_del(struct dpif *dpif,
892 const struct nlattr *key, size_t key_len,
c97fb132 893 struct dpif_flow_stats *stats)
064af421 894{
1a0c894a
BP
895 struct dpif_op *opp;
896 struct dpif_op op;
897
898 op.type = DPIF_OP_FLOW_DEL;
899 op.u.flow_del.key = key;
900 op.u.flow_del.key_len = key_len;
901 op.u.flow_del.stats = stats;
f1aa2072 902
1a0c894a
BP
903 opp = &op;
904 dpif_operate(dpif, &opp, 1);
905
906 return op.error;
064af421
BP
907}
908
ac64794a
BP
909/* Creates and returns a new 'struct dpif_flow_dump' for iterating through the
910 * flows in 'dpif'.
911 *
912 * This function always successfully returns a dpif_flow_dump. Error
913 * reporting is deferred to dpif_flow_dump_destroy(). */
914struct dpif_flow_dump *
915dpif_flow_dump_create(const struct dpif *dpif)
e723fd32 916{
ac64794a 917 return dpif->dpif_class->flow_dump_create(dpif);
e723fd32
JS
918}
919
ac64794a
BP
920/* Destroys 'dump', which must have been created with dpif_flow_dump_create().
921 * All dpif_flow_dump_thread structures previously created for 'dump' must
922 * previously have been destroyed.
923 *
924 * Returns 0 if the dump operation was error-free, otherwise a positive errno
925 * value describing the problem. */
926int
927dpif_flow_dump_destroy(struct dpif_flow_dump *dump)
e723fd32 928{
ac64794a
BP
929 const struct dpif *dpif = dump->dpif;
930 int error = dpif->dpif_class->flow_dump_destroy(dump);
931 log_operation(dpif, "flow_dump_destroy", error);
932 return error == EOF ? 0 : error;
e723fd32
JS
933}
934
ac64794a
BP
935/* Returns new thread-local state for use with dpif_flow_dump_next(). */
936struct dpif_flow_dump_thread *
937dpif_flow_dump_thread_create(struct dpif_flow_dump *dump)
064af421 938{
ac64794a 939 return dump->dpif->dpif_class->flow_dump_thread_create(dump);
064af421
BP
940}
941
ac64794a
BP
942/* Releases 'thread'. */
943void
944dpif_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread)
704a1e09 945{
ac64794a 946 thread->dpif->dpif_class->flow_dump_thread_destroy(thread);
704a1e09
BP
947}
948
ac64794a
BP
949/* Attempts to retrieve up to 'max_flows' more flows from 'thread'. Returns 0
950 * if and only if no flows remained to be retrieved, otherwise a positive
951 * number reflecting the number of elements in 'flows[]' that were updated.
952 * The number of flows returned might be less than 'max_flows' because
953 * fewer than 'max_flows' remained, because this particular datapath does not
954 * benefit from batching, or because an error occurred partway through
955 * retrieval. Thus, the caller should continue calling until a 0 return value,
956 * even if intermediate return values are less than 'max_flows'.
bdeadfdd 957 *
ac64794a
BP
958 * No error status is immediately provided. An error status for the entire
959 * dump operation is provided when it is completed by calling
960 * dpif_flow_dump_destroy().
bdeadfdd 961 *
ac64794a
BP
962 * All of the data stored into 'flows' is owned by the datapath, not by the
963 * caller, and the caller must not modify or free it. The datapath guarantees
58df55ce
JS
964 * that it remains accessible and unchanged until the first of:
965 * - The next call to dpif_flow_dump_next() for 'thread', or
966 * - The next rcu quiescent period. */
704a1e09 967int
ac64794a
BP
968dpif_flow_dump_next(struct dpif_flow_dump_thread *thread,
969 struct dpif_flow *flows, int max_flows)
704a1e09 970{
ac64794a
BP
971 struct dpif *dpif = thread->dpif;
972 int n;
973
974 ovs_assert(max_flows > 0);
975 n = dpif->dpif_class->flow_dump_next(thread, flows, max_flows);
976 if (n > 0) {
977 struct dpif_flow *f;
978
979 for (f = flows; f < &flows[n] && should_log_flow_message(0); f++) {
980 log_flow_message(dpif, 0, "flow_dump",
981 f->key, f->key_len, f->mask, f->mask_len,
982 &f->stats, f->actions, f->actions_len);
983 }
984 } else {
985 VLOG_DBG_RL(&dpmsg_rl, "%s: dumped all flows", dpif_name(dpif));
986 }
987 return n;
064af421
BP
988}
989
7fd91025
BP
990struct dpif_execute_helper_aux {
991 struct dpif *dpif;
992 int error;
993};
994
09f9da0b
JR
995/* This is called for actions that need the context of the datapath to be
996 * meaningful. */
7fd91025 997static void
8cbf4f47 998dpif_execute_helper_cb(void *aux_, struct dpif_packet **packets, int cnt,
09f9da0b 999 const struct nlattr *action, bool may_steal OVS_UNUSED)
7fd91025 1000{
758c456d 1001 struct dpif_execute_helper_aux *aux = aux_;
09f9da0b 1002 int type = nl_attr_type(action);
41ccaa24
PS
1003 struct ofpbuf *packet = &packets[0]->ofpbuf;
1004 struct pkt_metadata *md = &packets[0]->md;
8cbf4f47
DDP
1005
1006 ovs_assert(cnt == 1);
758c456d 1007
09f9da0b
JR
1008 switch ((enum ovs_action_attr)type) {
1009 case OVS_ACTION_ATTR_OUTPUT:
a36de779
PS
1010 case OVS_ACTION_ATTR_TUNNEL_PUSH:
1011 case OVS_ACTION_ATTR_TUNNEL_POP:
09f9da0b 1012 case OVS_ACTION_ATTR_USERSPACE:
2b651e44
BP
1013 case OVS_ACTION_ATTR_RECIRC: {
1014 struct dpif_execute execute;
1015 struct ofpbuf execute_actions;
1016 uint64_t stub[256 / 8];
1017
1018 if (md->tunnel.ip_dst) {
1019 /* The Linux kernel datapath throws away the tunnel information
1020 * that we supply as metadata. We have to use a "set" action to
1021 * supply it. */
1022 ofpbuf_use_stub(&execute_actions, stub, sizeof stub);
1023 odp_put_tunnel_action(&md->tunnel, &execute_actions);
1024 ofpbuf_put(&execute_actions, action, NLA_ALIGN(action->nla_len));
1025
1026 execute.actions = ofpbuf_data(&execute_actions);
1027 execute.actions_len = ofpbuf_size(&execute_actions);
1028 } else {
1029 execute.actions = action;
1030 execute.actions_len = NLA_ALIGN(action->nla_len);
1031 }
1032
8cbf4f47 1033 execute.packet = packet;
758c456d
JR
1034 execute.md = *md;
1035 execute.needs_help = false;
43f9ac0a 1036 execute.probe = false;
1a0c894a 1037 aux->error = dpif_execute(aux->dpif, &execute);
fc65bafc
BP
1038 log_execute_message(aux->dpif, &execute, true, aux->error);
1039
2b651e44
BP
1040 if (md->tunnel.ip_dst) {
1041 ofpbuf_uninit(&execute_actions);
1042 }
09f9da0b 1043 break;
2b651e44 1044 }
758c456d 1045
c6bf49f3 1046 case OVS_ACTION_ATTR_HASH:
09f9da0b
JR
1047 case OVS_ACTION_ATTR_PUSH_VLAN:
1048 case OVS_ACTION_ATTR_POP_VLAN:
1049 case OVS_ACTION_ATTR_PUSH_MPLS:
1050 case OVS_ACTION_ATTR_POP_MPLS:
1051 case OVS_ACTION_ATTR_SET:
6d670e7f 1052 case OVS_ACTION_ATTR_SET_MASKED:
09f9da0b
JR
1053 case OVS_ACTION_ATTR_SAMPLE:
1054 case OVS_ACTION_ATTR_UNSPEC:
1055 case __OVS_ACTION_ATTR_MAX:
1056 OVS_NOT_REACHED();
1057 }
7fd91025
BP
1058}
1059
1060/* Executes 'execute' by performing most of the actions in userspace and
1061 * passing the fully constructed packets to 'dpif' for output and userspace
1062 * actions.
1063 *
1064 * This helps with actions that a given 'dpif' doesn't implement directly. */
1065static int
758c456d 1066dpif_execute_with_help(struct dpif *dpif, struct dpif_execute *execute)
7fd91025 1067{
758c456d 1068 struct dpif_execute_helper_aux aux = {dpif, 0};
8cbf4f47 1069 struct dpif_packet packet, *pp;
7fd91025
BP
1070
1071 COVERAGE_INC(dpif_execute_with_help);
1072
91088554 1073 packet.ofpbuf = *execute->packet;
41ccaa24 1074 packet.md = execute->md;
8cbf4f47 1075 pp = &packet;
91088554 1076
41ccaa24 1077 odp_execute_actions(&aux, &pp, 1, false, execute->actions,
91088554
DDP
1078 execute->actions_len, dpif_execute_helper_cb);
1079
1080 /* Even though may_steal is set to false, some actions could modify or
1081 * reallocate the ofpbuf memory. We need to pass those changes to the
1082 * caller */
1083 *execute->packet = packet.ofpbuf;
41ccaa24 1084 execute->md = packet.md;
91088554 1085
7fd91025
BP
1086 return aux.error;
1087}
1088
87e5119b
BP
1089/* Returns true if the datapath needs help executing 'execute'. */
1090static bool
1091dpif_execute_needs_help(const struct dpif_execute *execute)
1092{
1093 return execute->needs_help || nl_attr_oversized(execute->actions_len);
1094}
1095
1a0c894a 1096/* A dpif_operate() wrapper for performing a single DPIF_OP_EXECUTE. */
758c456d
JR
1097int
1098dpif_execute(struct dpif *dpif, struct dpif_execute *execute)
89625d1e 1099{
1a0c894a
BP
1100 if (execute->actions_len) {
1101 struct dpif_op *opp;
1102 struct dpif_op op;
89625d1e 1103
1a0c894a
BP
1104 op.type = DPIF_OP_EXECUTE;
1105 op.u.execute = *execute;
89625d1e 1106
1a0c894a
BP
1107 opp = &op;
1108 dpif_operate(dpif, &opp, 1);
89625d1e 1109
1a0c894a
BP
1110 return op.error;
1111 } else {
1112 return 0;
1113 }
89625d1e
BP
1114}
1115
6bc60024 1116/* Executes each of the 'n_ops' operations in 'ops' on 'dpif', in the order in
1a0c894a
BP
1117 * which they are specified. Places each operation's results in the "output"
1118 * members documented in comments, and 0 in the 'error' member on success or a
1119 * positive errno on failure. */
6bc60024 1120void
c2b565b5 1121dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops)
6bc60024 1122{
1a0c894a
BP
1123 while (n_ops > 0) {
1124 size_t chunk;
7fd91025 1125
1a0c894a
BP
1126 /* Count 'chunk', the number of ops that can be executed without
1127 * needing any help. Ops that need help should be rare, so we
1128 * expect this to ordinarily be 'n_ops', that is, all the ops. */
1129 for (chunk = 0; chunk < n_ops; chunk++) {
1130 struct dpif_op *op = ops[chunk];
1131
1132 if (op->type == DPIF_OP_EXECUTE
1133 && dpif_execute_needs_help(&op->u.execute)) {
1134 break;
1135 }
1136 }
7fd91025 1137
1a0c894a
BP
1138 if (chunk) {
1139 /* Execute a chunk full of ops that the dpif provider can
1140 * handle itself, without help. */
1141 size_t i;
7fd91025 1142
1a0c894a 1143 dpif->dpif_class->operate(dpif, ops, chunk);
7fd91025 1144
1a0c894a
BP
1145 for (i = 0; i < chunk; i++) {
1146 struct dpif_op *op = ops[i];
1147 int error = op->error;
7fd91025 1148
1a0c894a
BP
1149 switch (op->type) {
1150 case DPIF_OP_FLOW_PUT: {
1151 struct dpif_flow_put *put = &op->u.flow_put;
7fd91025 1152
1a0c894a
BP
1153 COVERAGE_INC(dpif_flow_put);
1154 log_flow_put_message(dpif, put, error);
1155 if (error && put->stats) {
1156 memset(put->stats, 0, sizeof *put->stats);
7fd91025 1157 }
1a0c894a 1158 break;
7fd91025
BP
1159 }
1160
6fe09f8c
JS
1161 case DPIF_OP_FLOW_GET: {
1162 struct dpif_flow_get *get = &op->u.flow_get;
1163
1164 COVERAGE_INC(dpif_flow_get);
6fe09f8c
JS
1165 if (error) {
1166 memset(get->flow, 0, sizeof *get->flow);
1167 }
72d52166
MC
1168 log_flow_get_message(dpif, get, error);
1169
6fe09f8c
JS
1170 break;
1171 }
1172
1a0c894a
BP
1173 case DPIF_OP_FLOW_DEL: {
1174 struct dpif_flow_del *del = &op->u.flow_del;
7fd91025 1175
1a0c894a
BP
1176 COVERAGE_INC(dpif_flow_del);
1177 log_flow_del_message(dpif, del, error);
1178 if (error && del->stats) {
1179 memset(del->stats, 0, sizeof *del->stats);
1180 }
1181 break;
1182 }
f23d2845 1183
1a0c894a
BP
1184 case DPIF_OP_EXECUTE:
1185 COVERAGE_INC(dpif_execute);
1186 log_execute_message(dpif, &op->u.execute, false, error);
1187 break;
1188 }
1189 }
b99d3cee 1190
1a0c894a
BP
1191 ops += chunk;
1192 n_ops -= chunk;
1193 } else {
1194 /* Help the dpif provider to execute one op. */
1195 struct dpif_op *op = ops[0];
b99d3cee 1196
1a0c894a
BP
1197 COVERAGE_INC(dpif_execute);
1198 op->error = dpif_execute_with_help(dpif, &op->u.execute);
1199 ops++;
1200 n_ops--;
6bc60024
BP
1201 }
1202 }
1203}
1204
01545c1a
BP
1205/* Returns a string that represents 'type', for use in log messages. */
1206const char *
1207dpif_upcall_type_to_string(enum dpif_upcall_type type)
1208{
1209 switch (type) {
1210 case DPIF_UC_MISS: return "miss";
1211 case DPIF_UC_ACTION: return "action";
01545c1a
BP
1212 case DPIF_N_UC_TYPES: default: return "<unknown>";
1213 }
1214}
1215
a12b3ead
BP
1216/* Enables or disables receiving packets with dpif_recv() on 'dpif'. Returns 0
1217 * if successful, otherwise a positive errno value.
98403001 1218 *
a12b3ead 1219 * Turning packet receive off and then back on may change the Netlink PID
98403001
BP
1220 * assignments returned by dpif_port_get_pid(). If the client does this, it
1221 * must update all of the flows that have OVS_ACTION_ATTR_USERSPACE actions
1222 * using the new PID assignment. */
8f24562a 1223int
a12b3ead 1224dpif_recv_set(struct dpif *dpif, bool enable)
8f24562a 1225{
6b31e073
RW
1226 int error = 0;
1227
1228 if (dpif->dpif_class->recv_set) {
1229 error = dpif->dpif_class->recv_set(dpif, enable);
1230 log_operation(dpif, "recv_set", error);
1231 }
96fba48f 1232 return error;
8f24562a
BP
1233}
1234
1954e6bb
AW
1235/* Refreshes the poll loops and Netlink sockets associated to each port,
1236 * when the number of upcall handlers (upcall receiving thread) is changed
1237 * to 'n_handlers' and receiving packets for 'dpif' is enabled by
1238 * recv_set().
1239 *
1240 * Since multiple upcall handlers can read upcalls simultaneously from
1241 * 'dpif', each port can have multiple Netlink sockets, one per upcall
1242 * handler. So, handlers_set() is responsible for the following tasks:
1243 *
1244 * When receiving upcall is enabled, extends or creates the
1245 * configuration to support:
1246 *
1247 * - 'n_handlers' Netlink sockets for each port.
1248 *
1249 * - 'n_handlers' poll loops, one for each upcall handler.
1250 *
1251 * - registering the Netlink sockets for the same upcall handler to
1252 * the corresponding poll loop.
1253 *
1254 * Returns 0 if successful, otherwise a positive errno value. */
1255int
1256dpif_handlers_set(struct dpif *dpif, uint32_t n_handlers)
1257{
6b31e073
RW
1258 int error = 0;
1259
1260 if (dpif->dpif_class->handlers_set) {
1261 error = dpif->dpif_class->handlers_set(dpif, n_handlers);
1262 log_operation(dpif, "handlers_set", error);
1263 }
1954e6bb
AW
1264 return error;
1265}
1266
6b31e073 1267void
623540e4 1268dpif_register_upcall_cb(struct dpif *dpif, upcall_callback *cb, void *aux)
6b31e073
RW
1269{
1270 if (dpif->dpif_class->register_upcall_cb) {
623540e4 1271 dpif->dpif_class->register_upcall_cb(dpif, cb, aux);
6b31e073
RW
1272 }
1273}
1274
1275void
1276dpif_enable_upcall(struct dpif *dpif)
1277{
1278 if (dpif->dpif_class->enable_upcall) {
1279 dpif->dpif_class->enable_upcall(dpif);
1280 }
1281}
1282
1283void
1284dpif_disable_upcall(struct dpif *dpif)
1285{
1286 if (dpif->dpif_class->disable_upcall) {
1287 dpif->dpif_class->disable_upcall(dpif);
1288 }
1289}
1290
1291void
1292dpif_print_packet(struct dpif *dpif, struct dpif_upcall *upcall)
1293{
1294 if (!VLOG_DROP_DBG(&dpmsg_rl)) {
1295 struct ds flow;
1296 char *packet;
1297
1298 packet = ofp_packet_to_string(ofpbuf_data(&upcall->packet),
1299 ofpbuf_size(&upcall->packet));
1300
1301 ds_init(&flow);
1302 odp_flow_key_format(upcall->key, upcall->key_len, &flow);
1303
1304 VLOG_DBG("%s: %s upcall:\n%s\n%s",
1305 dpif_name(dpif), dpif_upcall_type_to_string(upcall->type),
1306 ds_cstr(&flow), packet);
1307
1308 ds_destroy(&flow);
1309 free(packet);
1310 }
1311}
1312
f2eee189
AW
1313/* If 'dpif' creates its own I/O polling threads, refreshes poll threads
1314 * configuration. */
1315int
1316dpif_poll_threads_set(struct dpif *dpif, unsigned int n_rxqs,
1317 const char *cmask)
1318{
1319 int error = 0;
1320
1321 if (dpif->dpif_class->poll_threads_set) {
1322 error = dpif->dpif_class->poll_threads_set(dpif, n_rxqs, cmask);
1323 if (error) {
1324 log_operation(dpif, "poll_threads_set", error);
1325 }
1326 }
1327
1328 return error;
1329}
1330
1954e6bb
AW
1331/* Polls for an upcall from 'dpif' for an upcall handler. Since there
1332 * there can be multiple poll loops, 'handler_id' is needed as index to
1333 * identify the corresponding poll loop. If successful, stores the upcall
1334 * into '*upcall', using 'buf' for storage. Should only be called if
1335 * 'recv_set' has been used to enable receiving packets from 'dpif'.
96fba48f 1336 *
da546e07
JR
1337 * 'upcall->key' and 'upcall->userdata' point into data in the caller-provided
1338 * 'buf', so their memory cannot be freed separately from 'buf'.
856081f6 1339 *
837a88dc
JR
1340 * The caller owns the data of 'upcall->packet' and may modify it. If
1341 * packet's headroom is exhausted as it is manipulated, 'upcall->packet'
1342 * will be reallocated. This requires the data of 'upcall->packet' to be
1343 * released with ofpbuf_uninit() before 'upcall' is destroyed. However,
1344 * when an error is returned, the 'upcall->packet' may be uninitialized
1345 * and should not be released.
1346 *
96fba48f 1347 * Returns 0 if successful, otherwise a positive errno value. Returns EAGAIN
856081f6 1348 * if no upcall is immediately available. */
064af421 1349int
1954e6bb
AW
1350dpif_recv(struct dpif *dpif, uint32_t handler_id, struct dpif_upcall *upcall,
1351 struct ofpbuf *buf)
064af421 1352{
6b31e073 1353 int error = EAGAIN;
01545c1a 1354
6b31e073
RW
1355 if (dpif->dpif_class->recv) {
1356 error = dpif->dpif_class->recv(dpif, handler_id, upcall, buf);
1357 if (!error) {
1358 dpif_print_packet(dpif, upcall);
1359 } else if (error != EAGAIN) {
1360 log_operation(dpif, "recv", error);
1361 }
064af421 1362 }
064af421
BP
1363 return error;
1364}
1365
96fba48f 1366/* Discards all messages that would otherwise be received by dpif_recv() on
1ba530f4
BP
1367 * 'dpif'. */
1368void
96fba48f
BP
1369dpif_recv_purge(struct dpif *dpif)
1370{
96fba48f 1371 COVERAGE_INC(dpif_purge);
1ba530f4
BP
1372 if (dpif->dpif_class->recv_purge) {
1373 dpif->dpif_class->recv_purge(dpif);
96fba48f 1374 }
96fba48f
BP
1375}
1376
1954e6bb
AW
1377/* Arranges for the poll loop for an upcall handler to wake up when 'dpif'
1378 * 'dpif' has a message queued to be received with the recv member
1379 * function. Since there can be multiple poll loops, 'handler_id' is
1380 * needed as index to identify the corresponding poll loop. */
064af421 1381void
1954e6bb 1382dpif_recv_wait(struct dpif *dpif, uint32_t handler_id)
064af421 1383{
6b31e073
RW
1384 if (dpif->dpif_class->recv_wait) {
1385 dpif->dpif_class->recv_wait(dpif, handler_id);
1386 }
064af421 1387}
53a4218d 1388
b5cbbcf6
AZ
1389/*
1390 * Return the datapath version. Caller is responsible for freeing
1391 * the string.
1392 */
1393char *
1394dpif_get_dp_version(const struct dpif *dpif)
1395{
1396 char *version = NULL;
1397
1398 if (dpif->dpif_class->get_datapath_version) {
1399 version = dpif->dpif_class->get_datapath_version();
1400 }
1401
1402 return version;
1403}
1404
96fba48f
BP
1405/* Obtains the NetFlow engine type and engine ID for 'dpif' into '*engine_type'
1406 * and '*engine_id', respectively. */
53a4218d
BP
1407void
1408dpif_get_netflow_ids(const struct dpif *dpif,
1409 uint8_t *engine_type, uint8_t *engine_id)
1410{
96fba48f
BP
1411 *engine_type = dpif->netflow_engine_type;
1412 *engine_id = dpif->netflow_engine_id;
1413}
aae51f53
BP
1414
1415/* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a priority
abff858b
PS
1416 * value used for setting packet priority.
1417 * On success, returns 0 and stores the priority into '*priority'.
1418 * On failure, returns a positive errno value and stores 0 into '*priority'. */
aae51f53
BP
1419int
1420dpif_queue_to_priority(const struct dpif *dpif, uint32_t queue_id,
1421 uint32_t *priority)
1422{
1423 int error = (dpif->dpif_class->queue_to_priority
1424 ? dpif->dpif_class->queue_to_priority(dpif, queue_id,
1425 priority)
1426 : EOPNOTSUPP);
1427 if (error) {
1428 *priority = 0;
1429 }
1430 log_operation(dpif, "queue_to_priority", error);
1431 return error;
1432}
96fba48f
BP
1433\f
1434void
1acb6baa
BP
1435dpif_init(struct dpif *dpif, const struct dpif_class *dpif_class,
1436 const char *name,
96fba48f
BP
1437 uint8_t netflow_engine_type, uint8_t netflow_engine_id)
1438{
1acb6baa 1439 dpif->dpif_class = dpif_class;
1a6f1e2a 1440 dpif->base_name = xstrdup(name);
a4af0040 1441 dpif->full_name = xasprintf("%s@%s", dpif_class->type, name);
96fba48f
BP
1442 dpif->netflow_engine_type = netflow_engine_type;
1443 dpif->netflow_engine_id = netflow_engine_id;
1444}
999401aa
JG
1445
1446/* Undoes the results of initialization.
1447 *
1448 * Normally this function only needs to be called from dpif_close().
1449 * However, it may be called by providers due to an error on opening
1450 * that occurs after initialization. It this case dpif_close() would
1451 * never be called. */
1452void
1453dpif_uninit(struct dpif *dpif, bool close)
1454{
1455 char *base_name = dpif->base_name;
1456 char *full_name = dpif->full_name;
1457
1458 if (close) {
a4af0040 1459 dpif->dpif_class->close(dpif);
999401aa
JG
1460 }
1461
1462 free(base_name);
1463 free(full_name);
1464}
96fba48f
BP
1465\f
1466static void
1467log_operation(const struct dpif *dpif, const char *operation, int error)
1468{
1469 if (!error) {
1470 VLOG_DBG_RL(&dpmsg_rl, "%s: %s success", dpif_name(dpif), operation);
90bf1e07 1471 } else if (ofperr_is_valid(error)) {
96fba48f 1472 VLOG_WARN_RL(&error_rl, "%s: %s failed (%s)",
90bf1e07 1473 dpif_name(dpif), operation, ofperr_get_name(error));
71ce9235 1474 } else {
90bf1e07 1475 VLOG_WARN_RL(&error_rl, "%s: %s failed (%s)",
10a89ef0 1476 dpif_name(dpif), operation, ovs_strerror(error));
96fba48f
BP
1477 }
1478}
1479
1480static enum vlog_level
1481flow_message_log_level(int error)
1482{
9b1a48c2
JP
1483 /* If flows arrive in a batch, userspace may push down multiple
1484 * unique flow definitions that overlap when wildcards are applied.
1485 * Kernels that support flow wildcarding will reject these flows as
1486 * duplicates (EEXIST), so lower the log level to debug for these
1487 * types of messages. */
1488 return (error && error != EEXIST) ? VLL_WARN : VLL_DBG;
96fba48f
BP
1489}
1490
1491static bool
1492should_log_flow_message(int error)
1493{
1494 return !vlog_should_drop(THIS_MODULE, flow_message_log_level(error),
1495 error ? &error_rl : &dpmsg_rl);
1496}
1497
1498static void
1499log_flow_message(const struct dpif *dpif, int error, const char *operation,
36956a7d 1500 const struct nlattr *key, size_t key_len,
61fb711d 1501 const struct nlattr *mask, size_t mask_len,
c97fb132 1502 const struct dpif_flow_stats *stats,
cf22f8cb 1503 const struct nlattr *actions, size_t actions_len)
96fba48f
BP
1504{
1505 struct ds ds = DS_EMPTY_INITIALIZER;
1506 ds_put_format(&ds, "%s: ", dpif_name(dpif));
1507 if (error) {
1508 ds_put_cstr(&ds, "failed to ");
1509 }
1510 ds_put_format(&ds, "%s ", operation);
1511 if (error) {
10a89ef0 1512 ds_put_format(&ds, "(%s) ", ovs_strerror(error));
96fba48f 1513 }
0a37839c 1514 odp_flow_format(key, key_len, mask, mask_len, NULL, &ds, true);
96fba48f
BP
1515 if (stats) {
1516 ds_put_cstr(&ds, ", ");
c97fb132 1517 dpif_flow_stats_format(stats, &ds);
96fba48f 1518 }
cdee00fd 1519 if (actions || actions_len) {
96fba48f 1520 ds_put_cstr(&ds, ", actions:");
cdee00fd 1521 format_odp_actions(&ds, actions, actions_len);
96fba48f
BP
1522 }
1523 vlog(THIS_MODULE, flow_message_log_level(error), "%s", ds_cstr(&ds));
1524 ds_destroy(&ds);
1525}
89625d1e
BP
1526
1527static void
1528log_flow_put_message(struct dpif *dpif, const struct dpif_flow_put *put,
1529 int error)
1530{
43f9ac0a 1531 if (should_log_flow_message(error) && !(put->flags & DPIF_FP_PROBE)) {
89625d1e
BP
1532 struct ds s;
1533
1534 ds_init(&s);
1535 ds_put_cstr(&s, "put");
1536 if (put->flags & DPIF_FP_CREATE) {
1537 ds_put_cstr(&s, "[create]");
1538 }
1539 if (put->flags & DPIF_FP_MODIFY) {
1540 ds_put_cstr(&s, "[modify]");
1541 }
1542 if (put->flags & DPIF_FP_ZERO_STATS) {
1543 ds_put_cstr(&s, "[zero]");
1544 }
1545 log_flow_message(dpif, error, ds_cstr(&s),
61fb711d
JP
1546 put->key, put->key_len, put->mask, put->mask_len,
1547 put->stats, put->actions, put->actions_len);
89625d1e
BP
1548 ds_destroy(&s);
1549 }
1550}
1551
b99d3cee
BP
1552static void
1553log_flow_del_message(struct dpif *dpif, const struct dpif_flow_del *del,
1554 int error)
1555{
1556 if (should_log_flow_message(error)) {
1557 log_flow_message(dpif, error, "flow_del", del->key, del->key_len,
61fb711d 1558 NULL, 0, !error ? del->stats : NULL, NULL, 0);
b99d3cee
BP
1559 }
1560}
1561
f0fe12fc
BP
1562/* Logs that 'execute' was executed on 'dpif' and completed with errno 'error'
1563 * (0 for success). 'subexecute' should be true if the execution is a result
1564 * of breaking down a larger execution that needed help, false otherwise.
1565 *
1566 *
1567 * XXX In theory, the log message could be deceptive because this function is
1568 * called after the dpif_provider's '->execute' function, which is allowed to
1569 * modify execute->packet and execute->md. In practice, though:
1570 *
93451a0a 1571 * - dpif-netlink doesn't modify execute->packet or execute->md.
f0fe12fc
BP
1572 *
1573 * - dpif-netdev does modify them but it is less likely to have problems
1574 * because it is built into ovs-vswitchd and cannot have version skew,
1575 * etc.
1576 *
1577 * It would still be better to avoid the potential problem. I don't know of a
1578 * good way to do that, though, that isn't expensive. */
89625d1e
BP
1579static void
1580log_execute_message(struct dpif *dpif, const struct dpif_execute *execute,
fc65bafc 1581 bool subexecute, int error)
89625d1e 1582{
43f9ac0a
JR
1583 if (!(error ? VLOG_DROP_WARN(&error_rl) : VLOG_DROP_DBG(&dpmsg_rl))
1584 && !execute->probe) {
89625d1e
BP
1585 struct ds ds = DS_EMPTY_INITIALIZER;
1586 char *packet;
1587
1f317cb5
PS
1588 packet = ofp_packet_to_string(ofpbuf_data(execute->packet),
1589 ofpbuf_size(execute->packet));
fc65bafc
BP
1590 ds_put_format(&ds, "%s: %sexecute ",
1591 dpif_name(dpif),
1592 (subexecute ? "sub-"
1593 : dpif_execute_needs_help(execute) ? "super-"
1594 : ""));
89625d1e
BP
1595 format_odp_actions(&ds, execute->actions, execute->actions_len);
1596 if (error) {
10a89ef0 1597 ds_put_format(&ds, " failed (%s)", ovs_strerror(error));
89625d1e
BP
1598 }
1599 ds_put_format(&ds, " on packet %s", packet);
1600 vlog(THIS_MODULE, error ? VLL_WARN : VLL_DBG, "%s", ds_cstr(&ds));
1601 ds_destroy(&ds);
1602 free(packet);
1603 }
1604}
6fe09f8c
JS
1605
1606static void
1607log_flow_get_message(const struct dpif *dpif, const struct dpif_flow_get *get,
1608 int error)
1609{
1610 if (should_log_flow_message(error)) {
1611 log_flow_message(dpif, error, "flow_get",
1612 get->key, get->key_len,
1613 get->flow->mask, get->flow->mask_len,
1614 &get->flow->stats,
1615 get->flow->actions, get->flow->actions_len);
1616 }
1617}
a36de779
PS
1618
1619bool
1620dpif_supports_tnl_push_pop(const struct dpif *dpif)
1621{
1622 return !strcmp(dpif->dpif_class->type, "netdev") ||
1623 !strcmp(dpif->dpif_class->type, "dummy");
1624}