]> git.proxmox.com Git - ovs.git/blame - lib/dpif.c
vlog: Make client supply semicolon for VLOG_DEFINE_THIS_MODULE.
[ovs.git] / lib / dpif.c
CommitLineData
064af421 1/*
1a6f1e2a 2 * Copyright (c) 2008, 2009, 2010 Nicira Networks.
064af421 3 *
a14bc59f
BP
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
064af421 7 *
a14bc59f
BP
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
064af421
BP
15 */
16
17#include <config.h>
96fba48f 18#include "dpif-provider.h"
064af421
BP
19
20#include <assert.h>
21#include <ctype.h>
22#include <errno.h>
064af421 23#include <inttypes.h>
064af421
BP
24#include <stdlib.h>
25#include <string.h>
064af421
BP
26
27#include "coverage.h"
28#include "dynamic-string.h"
29#include "flow.h"
30#include "netlink.h"
31#include "odp-util.h"
32#include "ofp-print.h"
33#include "ofpbuf.h"
34#include "packets.h"
35#include "poll-loop.h"
999401aa 36#include "shash.h"
d3d22744 37#include "svec.h"
064af421
BP
38#include "util.h"
39#include "valgrind.h"
064af421 40#include "vlog.h"
5136ce49 41
d98e6007 42VLOG_DEFINE_THIS_MODULE(dpif);
064af421 43
999401aa 44static const struct dpif_class *base_dpif_classes[] = {
c83cdd30 45#ifdef HAVE_NETLINK
96fba48f 46 &dpif_linux_class,
c83cdd30 47#endif
72865317 48 &dpif_netdev_class,
c228a364 49};
999401aa
JG
50
51struct registered_dpif_class {
52 struct dpif_class dpif_class;
53 int refcount;
54};
55static struct shash dpif_classes = SHASH_INITIALIZER(&dpif_classes);
c228a364 56
064af421
BP
57/* Rate limit for individual messages going to or from the datapath, output at
58 * DBG level. This is very high because, if these are enabled, it is because
59 * we really need to see them. */
60static struct vlog_rate_limit dpmsg_rl = VLOG_RATE_LIMIT_INIT(600, 600);
61
62/* Not really much point in logging many dpif errors. */
e2781405 63static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
064af421 64
96fba48f
BP
65static void log_operation(const struct dpif *, const char *operation,
66 int error);
67static void log_flow_operation(const struct dpif *, const char *operation,
68 int error, struct odp_flow *flow);
69static void log_flow_put(struct dpif *, int error,
70 const struct odp_flow_put *);
71static bool should_log_flow_message(int error);
064af421
BP
72static void check_rw_odp_flow(struct odp_flow *);
73
999401aa
JG
74static void
75dp_initialize(void)
76{
77 static int status = -1;
78
79 if (status < 0) {
80 int i;
81
82 status = 0;
83 for (i = 0; i < ARRAY_SIZE(base_dpif_classes); i++) {
84 dp_register_provider(base_dpif_classes[i]);
85 }
86 }
87}
88
5792c5c6
BP
89/* Performs periodic work needed by all the various kinds of dpifs.
90 *
8b61709d
BP
91 * If your program opens any dpifs, it must call both this function and
92 * netdev_run() within its main poll loop. */
5792c5c6
BP
93void
94dp_run(void)
95{
999401aa
JG
96 struct shash_node *node;
97 SHASH_FOR_EACH(node, &dpif_classes) {
98 const struct registered_dpif_class *registered_class = node->data;
99 if (registered_class->dpif_class.run) {
100 registered_class->dpif_class.run();
5792c5c6
BP
101 }
102 }
103}
104
105/* Arranges for poll_block() to wake up when dp_run() needs to be called.
106 *
8b61709d
BP
107 * If your program opens any dpifs, it must call both this function and
108 * netdev_wait() within its main poll loop. */
5792c5c6
BP
109void
110dp_wait(void)
111{
999401aa
JG
112 struct shash_node *node;
113 SHASH_FOR_EACH(node, &dpif_classes) {
114 const struct registered_dpif_class *registered_class = node->data;
115 if (registered_class->dpif_class.wait) {
116 registered_class->dpif_class.wait();
5792c5c6
BP
117 }
118 }
119}
120
999401aa
JG
121/* Registers a new datapath provider. After successful registration, new
122 * datapaths of that type can be opened using dpif_open(). */
123int
124dp_register_provider(const struct dpif_class *new_class)
125{
126 struct registered_dpif_class *registered_class;
127
128 if (shash_find(&dpif_classes, new_class->type)) {
129 VLOG_WARN("attempted to register duplicate datapath provider: %s",
130 new_class->type);
131 return EEXIST;
132 }
1a6f1e2a 133
999401aa
JG
134 registered_class = xmalloc(sizeof *registered_class);
135 memcpy(&registered_class->dpif_class, new_class,
136 sizeof registered_class->dpif_class);
137 registered_class->refcount = 0;
138
139 shash_add(&dpif_classes, new_class->type, registered_class);
140
141 return 0;
142}
143
144/* Unregisters a datapath provider. 'type' must have been previously
145 * registered and not currently be in use by any dpifs. After unregistration
146 * new datapaths of that type cannot be opened using dpif_open(). */
147int
148dp_unregister_provider(const char *type)
149{
150 struct shash_node *node;
151 struct registered_dpif_class *registered_class;
152
153 node = shash_find(&dpif_classes, type);
154 if (!node) {
155 VLOG_WARN("attempted to unregister a datapath provider that is not "
156 "registered: %s", type);
157 return EAFNOSUPPORT;
158 }
159
160 registered_class = node->data;
161 if (registered_class->refcount) {
162 VLOG_WARN("attempted to unregister in use datapath provider: %s", type);
163 return EBUSY;
164 }
165
166 shash_delete(&dpif_classes, node);
167 free(registered_class);
168
169 return 0;
170}
171
172/* Clears 'types' and enumerates the types of all currently registered datapath
173 * providers into it. The caller must first initialize the svec. */
1a6f1e2a
JG
174void
175dp_enumerate_types(struct svec *types)
176{
999401aa 177 struct shash_node *node;
1a6f1e2a 178
999401aa 179 dp_initialize();
1a6f1e2a
JG
180 svec_clear(types);
181
999401aa
JG
182 SHASH_FOR_EACH(node, &dpif_classes) {
183 const struct registered_dpif_class *registered_class = node->data;
184 svec_add(types, registered_class->dpif_class.type);
1a6f1e2a
JG
185 }
186}
187
188/* Clears 'names' and enumerates the names of all known created datapaths with
189 * the given 'type'. The caller must first initialize the svec. Returns 0 if
190 * successful, otherwise a positive errno value.
d3d22744
BP
191 *
192 * Some kinds of datapaths might not be practically enumerable. This is not
193 * considered an error. */
194int
1a6f1e2a 195dp_enumerate_names(const char *type, struct svec *names)
d3d22744 196{
999401aa
JG
197 const struct registered_dpif_class *registered_class;
198 const struct dpif_class *dpif_class;
199 int error;
d3d22744 200
999401aa 201 dp_initialize();
1a6f1e2a
JG
202 svec_clear(names);
203
999401aa
JG
204 registered_class = shash_find_data(&dpif_classes, type);
205 if (!registered_class) {
206 VLOG_WARN("could not enumerate unknown type: %s", type);
207 return EAFNOSUPPORT;
208 }
1a6f1e2a 209
999401aa
JG
210 dpif_class = &registered_class->dpif_class;
211 error = dpif_class->enumerate ? dpif_class->enumerate(names) : 0;
1a6f1e2a 212
999401aa
JG
213 if (error) {
214 VLOG_WARN("failed to enumerate %s datapaths: %s", dpif_class->type,
215 strerror(error));
d3d22744 216 }
1a6f1e2a 217
999401aa 218 return error;
1a6f1e2a
JG
219}
220
221/* Parses 'datapath name', which is of the form type@name into its
222 * component pieces. 'name' and 'type' must be freed by the caller. */
223void
224dp_parse_name(const char *datapath_name_, char **name, char **type)
225{
226 char *datapath_name = xstrdup(datapath_name_);
227 char *separator;
228
229 separator = strchr(datapath_name, '@');
230 if (separator) {
231 *separator = '\0';
232 *type = datapath_name;
233 *name = xstrdup(separator + 1);
234 } else {
235 *name = datapath_name;
236 *type = NULL;
237 }
d3d22744
BP
238}
239
96fba48f 240static int
1a6f1e2a 241do_open(const char *name, const char *type, bool create, struct dpif **dpifp)
064af421 242{
96fba48f 243 struct dpif *dpif = NULL;
064af421 244 int error;
999401aa
JG
245 struct registered_dpif_class *registered_class;
246
247 dp_initialize();
064af421 248
1a6f1e2a
JG
249 if (!type || *type == '\0') {
250 type = "system";
064af421
BP
251 }
252
999401aa
JG
253 registered_class = shash_find_data(&dpif_classes, type);
254 if (!registered_class) {
255 VLOG_WARN("could not create datapath %s of unknown type %s", name,
256 type);
257 error = EAFNOSUPPORT;
258 goto exit;
259 }
260
261 error = registered_class->dpif_class.open(name, type, create, &dpif);
262 if (!error) {
263 registered_class->refcount++;
064af421 264 }
064af421 265
96fba48f
BP
266exit:
267 *dpifp = error ? NULL : dpif;
268 return error;
064af421
BP
269}
270
1a6f1e2a
JG
271/* Tries to open an existing datapath named 'name' and type 'type'. Will fail
272 * if no datapath with 'name' and 'type' exists. 'type' may be either NULL or
273 * the empty string to specify the default system type. Returns 0 if
274 * successful, otherwise a positive errno value. On success stores a pointer
275 * to the datapath in '*dpifp', otherwise a null pointer. */
96fba48f 276int
1a6f1e2a 277dpif_open(const char *name, const char *type, struct dpif **dpifp)
064af421 278{
1a6f1e2a 279 return do_open(name, type, false, dpifp);
064af421
BP
280}
281
1a6f1e2a
JG
282/* Tries to create and open a new datapath with the given 'name' and 'type'.
283 * 'type' may be either NULL or the empty string to specify the default system
284 * type. Will fail if a datapath with 'name' and 'type' already exists.
285 * Returns 0 if successful, otherwise a positive errno value. On success
286 * stores a pointer to the datapath in '*dpifp', otherwise a null pointer. */
064af421 287int
1a6f1e2a 288dpif_create(const char *name, const char *type, struct dpif **dpifp)
064af421 289{
1a6f1e2a 290 return do_open(name, type, true, dpifp);
96fba48f 291}
064af421 292
1a6f1e2a
JG
293/* Tries to open a datapath with the given 'name' and 'type', creating it if it
294 * does not exist. 'type' may be either NULL or the empty string to specify
295 * the default system type. Returns 0 if successful, otherwise a positive
296 * errno value. On success stores a pointer to the datapath in '*dpifp',
297 * otherwise a null pointer. */
efacbce6 298int
1a6f1e2a 299dpif_create_and_open(const char *name, const char *type, struct dpif **dpifp)
efacbce6
BP
300{
301 int error;
302
1a6f1e2a 303 error = dpif_create(name, type, dpifp);
efacbce6 304 if (error == EEXIST || error == EBUSY) {
1a6f1e2a 305 error = dpif_open(name, type, dpifp);
efacbce6
BP
306 if (error) {
307 VLOG_WARN("datapath %s already exists but cannot be opened: %s",
308 name, strerror(error));
309 }
310 } else if (error) {
311 VLOG_WARN("failed to create datapath %s: %s", name, strerror(error));
312 }
313 return error;
314}
315
96fba48f
BP
316/* Closes and frees the connection to 'dpif'. Does not destroy the datapath
317 * itself; call dpif_delete() first, instead, if that is desirable. */
318void
319dpif_close(struct dpif *dpif)
320{
321 if (dpif) {
999401aa
JG
322 struct registered_dpif_class *registered_class;
323
d295e8e9 324 registered_class = shash_find_data(&dpif_classes,
a4af0040 325 dpif->dpif_class->type);
999401aa
JG
326 assert(registered_class);
327 assert(registered_class->refcount);
328
329 registered_class->refcount--;
330 dpif_uninit(dpif, true);
064af421
BP
331 }
332}
333
1a6f1e2a
JG
334/* Returns the name of datapath 'dpif' prefixed with the type
335 * (for use in log messages). */
b29ba128
BP
336const char *
337dpif_name(const struct dpif *dpif)
338{
1a6f1e2a
JG
339 return dpif->full_name;
340}
341
342/* Returns the name of datapath 'dpif' without the type
343 * (for use in device names). */
344const char *
345dpif_base_name(const struct dpif *dpif)
346{
347 return dpif->base_name;
b29ba128
BP
348}
349
d3d22744
BP
350/* Enumerates all names that may be used to open 'dpif' into 'all_names'. The
351 * Linux datapath, for example, supports opening a datapath both by number,
352 * e.g. "dp0", and by the name of the datapath's local port. For some
353 * datapaths, this might be an infinite set (e.g. in a file name, slashes may
354 * be duplicated any number of times), in which case only the names most likely
355 * to be used will be enumerated.
356 *
357 * The caller must already have initialized 'all_names'. Any existing names in
358 * 'all_names' will not be disturbed. */
359int
360dpif_get_all_names(const struct dpif *dpif, struct svec *all_names)
361{
1acb6baa
BP
362 if (dpif->dpif_class->get_all_names) {
363 int error = dpif->dpif_class->get_all_names(dpif, all_names);
d3d22744
BP
364 if (error) {
365 VLOG_WARN_RL(&error_rl,
366 "failed to retrieve names for datpath %s: %s",
367 dpif_name(dpif), strerror(error));
368 }
369 return error;
370 } else {
1a6f1e2a 371 svec_add(all_names, dpif_base_name(dpif));
d3d22744
BP
372 return 0;
373 }
374}
375
96fba48f
BP
376/* Destroys the datapath that 'dpif' is connected to, first removing all of its
377 * ports. After calling this function, it does not make sense to pass 'dpif'
378 * to any functions other than dpif_name() or dpif_close(). */
064af421
BP
379int
380dpif_delete(struct dpif *dpif)
381{
96fba48f
BP
382 int error;
383
064af421 384 COVERAGE_INC(dpif_destroy);
96fba48f 385
1acb6baa 386 error = dpif->dpif_class->destroy(dpif);
96fba48f
BP
387 log_operation(dpif, "delete", error);
388 return error;
064af421
BP
389}
390
96fba48f
BP
391/* Retrieves statistics for 'dpif' into 'stats'. Returns 0 if successful,
392 * otherwise a positive errno value. */
064af421
BP
393int
394dpif_get_dp_stats(const struct dpif *dpif, struct odp_stats *stats)
395{
1acb6baa 396 int error = dpif->dpif_class->get_stats(dpif, stats);
96fba48f
BP
397 if (error) {
398 memset(stats, 0, sizeof *stats);
399 }
400 log_operation(dpif, "get_stats", error);
401 return error;
064af421
BP
402}
403
96fba48f
BP
404/* Retrieves the current IP fragment handling policy for 'dpif' into
405 * '*drop_frags': true indicates that fragments are dropped, false indicates
406 * that fragments are treated in the same way as other IP packets (except that
407 * the L4 header cannot be read). Returns 0 if successful, otherwise a
408 * positive errno value. */
064af421
BP
409int
410dpif_get_drop_frags(const struct dpif *dpif, bool *drop_frags)
411{
1acb6baa 412 int error = dpif->dpif_class->get_drop_frags(dpif, drop_frags);
96fba48f
BP
413 if (error) {
414 *drop_frags = false;
415 }
416 log_operation(dpif, "get_drop_frags", error);
064af421
BP
417 return error;
418}
419
96fba48f
BP
420/* Changes 'dpif''s treatment of IP fragments to 'drop_frags', whose meaning is
421 * the same as for the get_drop_frags member function. Returns 0 if
422 * successful, otherwise a positive errno value. */
064af421
BP
423int
424dpif_set_drop_frags(struct dpif *dpif, bool drop_frags)
425{
1acb6baa 426 int error = dpif->dpif_class->set_drop_frags(dpif, drop_frags);
96fba48f
BP
427 log_operation(dpif, "set_drop_frags", error);
428 return error;
064af421
BP
429}
430
96fba48f
BP
431/* Attempts to add 'devname' as a port on 'dpif', given the combination of
432 * ODP_PORT_* flags in 'flags'. If successful, returns 0 and sets '*port_nop'
433 * to the new port's port number (if 'port_nop' is non-null). On failure,
434 * returns a positive errno value and sets '*port_nop' to UINT16_MAX (if
435 * 'port_nop' is non-null). */
064af421 436int
9ee3ae3e
BP
437dpif_port_add(struct dpif *dpif, const char *devname, uint16_t flags,
438 uint16_t *port_nop)
064af421 439{
9ee3ae3e
BP
440 uint16_t port_no;
441 int error;
064af421
BP
442
443 COVERAGE_INC(dpif_port_add);
9ee3ae3e 444
1acb6baa 445 error = dpif->dpif_class->port_add(dpif, devname, flags, &port_no);
9ee3ae3e 446 if (!error) {
b29ba128
BP
447 VLOG_DBG_RL(&dpmsg_rl, "%s: added %s as port %"PRIu16,
448 dpif_name(dpif), devname, port_no);
064af421 449 } else {
9ee3ae3e 450 VLOG_WARN_RL(&error_rl, "%s: failed to add %s as port: %s",
96fba48f
BP
451 dpif_name(dpif), devname, strerror(error));
452 port_no = UINT16_MAX;
9ee3ae3e
BP
453 }
454 if (port_nop) {
455 *port_nop = port_no;
064af421 456 }
9ee3ae3e 457 return error;
064af421
BP
458}
459
96fba48f
BP
460/* Attempts to remove 'dpif''s port number 'port_no'. Returns 0 if successful,
461 * otherwise a positive errno value. */
064af421
BP
462int
463dpif_port_del(struct dpif *dpif, uint16_t port_no)
464{
96fba48f
BP
465 int error;
466
064af421 467 COVERAGE_INC(dpif_port_del);
96fba48f 468
1acb6baa 469 error = dpif->dpif_class->port_del(dpif, port_no);
96fba48f
BP
470 log_operation(dpif, "port_del", error);
471 return error;
064af421
BP
472}
473
96fba48f
BP
474/* Looks up port number 'port_no' in 'dpif'. On success, returns 0 and
475 * initializes '*port' appropriately; on failure, returns a positive errno
476 * value. */
064af421
BP
477int
478dpif_port_query_by_number(const struct dpif *dpif, uint16_t port_no,
479 struct odp_port *port)
480{
1acb6baa 481 int error = dpif->dpif_class->port_query_by_number(dpif, port_no, port);
96fba48f 482 if (!error) {
b29ba128
BP
483 VLOG_DBG_RL(&dpmsg_rl, "%s: port %"PRIu16" is device %s",
484 dpif_name(dpif), port_no, port->devname);
064af421 485 } else {
96fba48f 486 memset(port, 0, sizeof *port);
b29ba128 487 VLOG_WARN_RL(&error_rl, "%s: failed to query port %"PRIu16": %s",
96fba48f 488 dpif_name(dpif), port_no, strerror(error));
064af421 489 }
96fba48f 490 return error;
064af421
BP
491}
492
96fba48f
BP
493/* Looks up port named 'devname' in 'dpif'. On success, returns 0 and
494 * initializes '*port' appropriately; on failure, returns a positive errno
495 * value. */
064af421
BP
496int
497dpif_port_query_by_name(const struct dpif *dpif, const char *devname,
498 struct odp_port *port)
499{
1acb6baa 500 int error = dpif->dpif_class->port_query_by_name(dpif, devname, port);
96fba48f 501 if (!error) {
b29ba128
BP
502 VLOG_DBG_RL(&dpmsg_rl, "%s: device %s is on port %"PRIu16,
503 dpif_name(dpif), devname, port->port);
064af421 504 } else {
96fba48f
BP
505 memset(port, 0, sizeof *port);
506
5c6d2a3f
BP
507 /* Log level is DBG here because all the current callers are interested
508 * in whether 'dpif' actually has a port 'devname', so that it's not an
509 * issue worth logging if it doesn't. */
510 VLOG_DBG_RL(&error_rl, "%s: failed to query port %s: %s",
96fba48f 511 dpif_name(dpif), devname, strerror(error));
064af421 512 }
96fba48f 513 return error;
064af421
BP
514}
515
96fba48f
BP
516/* Looks up port number 'port_no' in 'dpif'. On success, returns 0 and copies
517 * the port's name into the 'name_size' bytes in 'name', ensuring that the
518 * result is null-terminated. On failure, returns a positive errno value and
519 * makes 'name' the empty string. */
335562c0
BP
520int
521dpif_port_get_name(struct dpif *dpif, uint16_t port_no,
522 char *name, size_t name_size)
523{
524 struct odp_port port;
525 int error;
526
527 assert(name_size > 0);
528
529 error = dpif_port_query_by_number(dpif, port_no, &port);
530 if (!error) {
531 ovs_strlcpy(name, port.devname, name_size);
532 } else {
533 *name = '\0';
534 }
535 return error;
536}
537
96fba48f
BP
538/* Obtains a list of all the ports in 'dpif'.
539 *
540 * If successful, returns 0 and sets '*portsp' to point to an array of
541 * appropriately initialized port structures and '*n_portsp' to the number of
542 * ports in the array. The caller is responsible for freeing '*portp' by
543 * calling free().
544 *
545 * On failure, returns a positive errno value and sets '*portsp' to NULL and
546 * '*n_portsp' to 0. */
064af421
BP
547int
548dpif_port_list(const struct dpif *dpif,
f4ba4c4f 549 struct odp_port **portsp, size_t *n_portsp)
064af421 550{
f4ba4c4f 551 struct odp_port *ports;
b9b0ce61 552 size_t n_ports = 0;
064af421
BP
553 int error;
554
f4ba4c4f
BP
555 for (;;) {
556 struct odp_stats stats;
96fba48f 557 int retval;
f4ba4c4f 558
064af421
BP
559 error = dpif_get_dp_stats(dpif, &stats);
560 if (error) {
f4ba4c4f 561 goto exit;
064af421
BP
562 }
563
f4ba4c4f 564 ports = xcalloc(stats.n_ports, sizeof *ports);
1acb6baa 565 retval = dpif->dpif_class->port_list(dpif, ports, stats.n_ports);
96fba48f 566 if (retval < 0) {
f4ba4c4f 567 /* Hard error. */
96fba48f 568 error = -retval;
f4ba4c4f
BP
569 free(ports);
570 goto exit;
96fba48f 571 } else if (retval <= stats.n_ports) {
f4ba4c4f
BP
572 /* Success. */
573 error = 0;
96fba48f 574 n_ports = retval;
f4ba4c4f
BP
575 goto exit;
576 } else {
577 /* Soft error: port count increased behind our back. Try again. */
578 free(ports);
064af421 579 }
f4ba4c4f 580 }
064af421 581
f4ba4c4f
BP
582exit:
583 if (error) {
584 *portsp = NULL;
585 *n_portsp = 0;
586 } else {
587 *portsp = ports;
588 *n_portsp = n_ports;
589 }
96fba48f 590 log_operation(dpif, "port_list", error);
064af421
BP
591 return error;
592}
593
e9e28be3
BP
594/* Polls for changes in the set of ports in 'dpif'. If the set of ports in
595 * 'dpif' has changed, this function does one of the following:
596 *
597 * - Stores the name of the device that was added to or deleted from 'dpif' in
598 * '*devnamep' and returns 0. The caller is responsible for freeing
599 * '*devnamep' (with free()) when it no longer needs it.
600 *
601 * - Returns ENOBUFS and sets '*devnamep' to NULL.
602 *
603 * This function may also return 'false positives', where it returns 0 and
604 * '*devnamep' names a device that was not actually added or deleted or it
605 * returns ENOBUFS without any change.
606 *
607 * Returns EAGAIN if the set of ports in 'dpif' has not changed. May also
608 * return other positive errno values to indicate that something has gone
609 * wrong. */
610int
611dpif_port_poll(const struct dpif *dpif, char **devnamep)
612{
1acb6baa 613 int error = dpif->dpif_class->port_poll(dpif, devnamep);
e9e28be3
BP
614 if (error) {
615 *devnamep = NULL;
616 }
617 return error;
618}
619
620/* Arranges for the poll loop to wake up when port_poll(dpif) will return a
621 * value other than EAGAIN. */
622void
623dpif_port_poll_wait(const struct dpif *dpif)
624{
1acb6baa 625 dpif->dpif_class->port_poll_wait(dpif);
e9e28be3
BP
626}
627
96fba48f
BP
628/* Deletes all flows from 'dpif'. Returns 0 if successful, otherwise a
629 * positive errno value. */
630int
631dpif_flow_flush(struct dpif *dpif)
064af421 632{
96fba48f
BP
633 int error;
634
635 COVERAGE_INC(dpif_flow_flush);
636
1acb6baa 637 error = dpif->dpif_class->flow_flush(dpif);
96fba48f
BP
638 log_operation(dpif, "flow_flush", error);
639 return error;
064af421
BP
640}
641
96fba48f
BP
642/* Queries 'dpif' for a flow entry matching 'flow->key'.
643 *
644 * If a flow matching 'flow->key' exists in 'dpif', stores statistics for the
645 * flow into 'flow->stats'. If 'flow->n_actions' is zero, then 'flow->actions'
646 * is ignored. If 'flow->n_actions' is nonzero, then 'flow->actions' should
647 * point to an array of the specified number of actions. At most that many of
648 * the flow's actions will be copied into that array. 'flow->n_actions' will
649 * be updated to the number of actions actually present in the flow, which may
650 * be greater than the number stored if the flow has more actions than space
651 * available in the array.
652 *
653 * If no flow matching 'flow->key' exists in 'dpif', returns ENOENT. On other
654 * failure, returns a positive errno value. */
655int
656dpif_flow_get(const struct dpif *dpif, struct odp_flow *flow)
064af421 657{
96fba48f
BP
658 int error;
659
660 COVERAGE_INC(dpif_flow_get);
661
662 check_rw_odp_flow(flow);
1acb6baa 663 error = dpif->dpif_class->flow_get(dpif, flow, 1);
96fba48f
BP
664 if (!error) {
665 error = flow->stats.error;
064af421 666 }
b843fa1b
BP
667 if (error) {
668 /* Make the results predictable on error. */
669 memset(&flow->stats, 0, sizeof flow->stats);
670 flow->n_actions = 0;
671 }
96fba48f
BP
672 if (should_log_flow_message(error)) {
673 log_flow_operation(dpif, "flow_get", error, flow);
064af421 674 }
96fba48f 675 return error;
064af421
BP
676}
677
96fba48f
BP
678/* For each flow 'flow' in the 'n' flows in 'flows':
679 *
680 * - If a flow matching 'flow->key' exists in 'dpif':
681 *
682 * Stores 0 into 'flow->stats.error' and stores statistics for the flow
683 * into 'flow->stats'.
684 *
685 * If 'flow->n_actions' is zero, then 'flow->actions' is ignored. If
686 * 'flow->n_actions' is nonzero, then 'flow->actions' should point to an
687 * array of the specified number of actions. At most that many of the
688 * flow's actions will be copied into that array. 'flow->n_actions' will
689 * be updated to the number of actions actually present in the flow, which
690 * may be greater than the number stored if the flow has more actions than
691 * space available in the array.
692 *
693 * - Flow-specific errors are indicated by a positive errno value in
694 * 'flow->stats.error'. In particular, ENOENT indicates that no flow
695 * matching 'flow->key' exists in 'dpif'. When an error value is stored, the
696 * contents of 'flow->key' are preserved but other members of 'flow' should
697 * be treated as indeterminate.
698 *
699 * Returns 0 if all 'n' flows in 'flows' were updated (whether they were
700 * individually successful or not is indicated by 'flow->stats.error',
701 * however). Returns a positive errno value if an error that prevented this
702 * update occurred, in which the caller must not depend on any elements in
703 * 'flows' being updated or not updated.
704 */
705int
706dpif_flow_get_multiple(const struct dpif *dpif,
707 struct odp_flow flows[], size_t n)
064af421 708{
96fba48f
BP
709 int error;
710 size_t i;
711
712 COVERAGE_ADD(dpif_flow_get, n);
713
714 for (i = 0; i < n; i++) {
715 check_rw_odp_flow(&flows[i]);
064af421 716 }
96fba48f 717
1acb6baa 718 error = dpif->dpif_class->flow_get(dpif, flows, n);
96fba48f 719 log_operation(dpif, "flow_get_multiple", error);
064af421
BP
720 return error;
721}
722
96fba48f
BP
723/* Adds or modifies a flow in 'dpif' as specified in 'put':
724 *
725 * - If the flow specified in 'put->flow' does not exist in 'dpif', then
726 * behavior depends on whether ODPPF_CREATE is specified in 'put->flags': if
727 * it is, the flow will be added, otherwise the operation will fail with
728 * ENOENT.
729 *
730 * - Otherwise, the flow specified in 'put->flow' does exist in 'dpif'.
731 * Behavior in this case depends on whether ODPPF_MODIFY is specified in
732 * 'put->flags': if it is, the flow's actions will be updated, otherwise the
733 * operation will fail with EEXIST. If the flow's actions are updated, then
734 * its statistics will be zeroed if ODPPF_ZERO_STATS is set in 'put->flags',
735 * left as-is otherwise.
736 *
737 * Returns 0 if successful, otherwise a positive errno value.
738 */
064af421
BP
739int
740dpif_flow_put(struct dpif *dpif, struct odp_flow_put *put)
741{
96fba48f
BP
742 int error;
743
064af421 744 COVERAGE_INC(dpif_flow_put);
96fba48f 745
1acb6baa 746 error = dpif->dpif_class->flow_put(dpif, put);
064af421 747 if (should_log_flow_message(error)) {
96fba48f 748 log_flow_put(dpif, error, put);
064af421
BP
749 }
750 return error;
751}
752
96fba48f
BP
753/* Deletes a flow matching 'flow->key' from 'dpif' or returns ENOENT if 'dpif'
754 * does not contain such a flow.
755 *
756 * If successful, updates 'flow->stats', 'flow->n_actions', and 'flow->actions'
757 * as described for dpif_flow_get(). */
064af421
BP
758int
759dpif_flow_del(struct dpif *dpif, struct odp_flow *flow)
760{
f1aa2072
BP
761 int error;
762
96fba48f 763 COVERAGE_INC(dpif_flow_del);
f1aa2072 764
064af421 765 check_rw_odp_flow(flow);
96fba48f 766 memset(&flow->stats, 0, sizeof flow->stats);
064af421 767
1acb6baa 768 error = dpif->dpif_class->flow_del(dpif, flow);
96fba48f
BP
769 if (should_log_flow_message(error)) {
770 log_flow_operation(dpif, "delete flow", error, flow);
064af421 771 }
96fba48f 772 return error;
064af421
BP
773}
774
96fba48f
BP
775/* Stores up to 'n' flows in 'dpif' into 'flows', including their statistics
776 * but not including any information about their actions. If successful,
777 * returns 0 and sets '*n_out' to the number of flows actually present in
778 * 'dpif', which might be greater than the number stored (if 'dpif' has more
779 * than 'n' flows). On failure, returns a negative errno value and sets
780 * '*n_out' to 0. */
064af421
BP
781int
782dpif_flow_list(const struct dpif *dpif, struct odp_flow flows[], size_t n,
783 size_t *n_out)
784{
064af421 785 uint32_t i;
96fba48f 786 int retval;
064af421
BP
787
788 COVERAGE_INC(dpif_flow_query_list);
064af421
BP
789 if (RUNNING_ON_VALGRIND) {
790 memset(flows, 0, n * sizeof *flows);
791 } else {
792 for (i = 0; i < n; i++) {
793 flows[i].actions = NULL;
794 flows[i].n_actions = 0;
795 }
796 }
1acb6baa 797 retval = dpif->dpif_class->flow_list(dpif, flows, n);
96fba48f 798 if (retval < 0) {
064af421 799 *n_out = 0;
b29ba128 800 VLOG_WARN_RL(&error_rl, "%s: flow list failed (%s)",
96fba48f
BP
801 dpif_name(dpif), strerror(-retval));
802 return -retval;
064af421 803 } else {
96fba48f
BP
804 COVERAGE_ADD(dpif_flow_query_list_n, retval);
805 *n_out = MIN(n, retval);
806 VLOG_DBG_RL(&dpmsg_rl, "%s: listed %zu flows (of %d)",
807 dpif_name(dpif), *n_out, retval);
808 return 0;
064af421 809 }
064af421
BP
810}
811
96fba48f
BP
812/* Retrieves all of the flows in 'dpif'.
813 *
814 * If successful, returns 0 and stores in '*flowsp' a pointer to a newly
815 * allocated array of flows, including their statistics but not including any
816 * information about their actions, and sets '*np' to the number of flows in
817 * '*flowsp'. The caller is responsible for freeing '*flowsp' by calling
818 * free().
819 *
820 * On failure, returns a positive errno value and sets '*flowsp' to NULL and
821 * '*np' to 0. */
064af421
BP
822int
823dpif_flow_list_all(const struct dpif *dpif,
824 struct odp_flow **flowsp, size_t *np)
825{
826 struct odp_stats stats;
827 struct odp_flow *flows;
828 size_t n_flows;
829 int error;
830
831 *flowsp = NULL;
832 *np = 0;
833
834 error = dpif_get_dp_stats(dpif, &stats);
835 if (error) {
836 return error;
837 }
838
839 flows = xmalloc(sizeof *flows * stats.n_flows);
840 error = dpif_flow_list(dpif, flows, stats.n_flows, &n_flows);
841 if (error) {
842 free(flows);
843 return error;
844 }
845
846 if (stats.n_flows != n_flows) {
b29ba128 847 VLOG_WARN_RL(&error_rl, "%s: datapath stats reported %"PRIu32" "
064af421 848 "flows but flow listing reported %zu",
b29ba128 849 dpif_name(dpif), stats.n_flows, n_flows);
064af421
BP
850 }
851 *flowsp = flows;
852 *np = n_flows;
853 return 0;
854}
855
96fba48f
BP
856/* Causes 'dpif' to perform the 'n_actions' actions in 'actions' on the
857 * Ethernet frame specified in 'packet'.
858 *
96fba48f 859 * Returns 0 if successful, otherwise a positive errno value. */
064af421 860int
f1588b1f 861dpif_execute(struct dpif *dpif,
064af421
BP
862 const union odp_action actions[], size_t n_actions,
863 const struct ofpbuf *buf)
864{
865 int error;
866
867 COVERAGE_INC(dpif_execute);
868 if (n_actions > 0) {
f1588b1f 869 error = dpif->dpif_class->execute(dpif, actions, n_actions, buf);
064af421
BP
870 } else {
871 error = 0;
872 }
873
874 if (!(error ? VLOG_DROP_WARN(&error_rl) : VLOG_DROP_DBG(&dpmsg_rl))) {
875 struct ds ds = DS_EMPTY_INITIALIZER;
876 char *packet = ofp_packet_to_string(buf->data, buf->size, buf->size);
b29ba128 877 ds_put_format(&ds, "%s: execute ", dpif_name(dpif));
064af421
BP
878 format_odp_actions(&ds, actions, n_actions);
879 if (error) {
880 ds_put_format(&ds, " failed (%s)", strerror(error));
881 }
882 ds_put_format(&ds, " on packet %s", packet);
883 vlog(THIS_MODULE, error ? VLL_WARN : VLL_DBG, "%s", ds_cstr(&ds));
884 ds_destroy(&ds);
885 free(packet);
886 }
887 return error;
888}
889
96fba48f
BP
890/* Retrieves 'dpif''s "listen mask" into '*listen_mask'. Each ODPL_* bit set
891 * in '*listen_mask' indicates that dpif_recv() will receive messages of that
892 * type. Returns 0 if successful, otherwise a positive errno value. */
8f24562a
BP
893int
894dpif_recv_get_mask(const struct dpif *dpif, int *listen_mask)
895{
1acb6baa 896 int error = dpif->dpif_class->recv_get_mask(dpif, listen_mask);
8f24562a
BP
897 if (error) {
898 *listen_mask = 0;
899 }
96fba48f 900 log_operation(dpif, "recv_get_mask", error);
8f24562a
BP
901 return error;
902}
903
96fba48f
BP
904/* Sets 'dpif''s "listen mask" to 'listen_mask'. Each ODPL_* bit set in
905 * '*listen_mask' requests that dpif_recv() receive messages of that type.
906 * Returns 0 if successful, otherwise a positive errno value. */
8f24562a
BP
907int
908dpif_recv_set_mask(struct dpif *dpif, int listen_mask)
909{
1acb6baa 910 int error = dpif->dpif_class->recv_set_mask(dpif, listen_mask);
96fba48f
BP
911 log_operation(dpif, "recv_set_mask", error);
912 return error;
8f24562a
BP
913}
914
b4a7a3f3
BP
915/* Retrieve the sFlow sampling probability. '*probability' is expressed as the
916 * number of packets out of UINT_MAX to sample, e.g. probability/UINT_MAX is
917 * the probability of sampling a given packet.
72b06300
BP
918 *
919 * Returns 0 if successful, otherwise a positive errno value. EOPNOTSUPP
920 * indicates that 'dpif' does not support sFlow sampling. */
921int
922dpif_get_sflow_probability(const struct dpif *dpif, uint32_t *probability)
923{
49c36903
BP
924 int error = (dpif->dpif_class->get_sflow_probability
925 ? dpif->dpif_class->get_sflow_probability(dpif, probability)
72b06300
BP
926 : EOPNOTSUPP);
927 if (error) {
928 *probability = 0;
929 }
930 log_operation(dpif, "get_sflow_probability", error);
931 return error;
932}
933
b4a7a3f3
BP
934/* Set the sFlow sampling probability. 'probability' is expressed as the
935 * number of packets out of UINT_MAX to sample, e.g. probability/UINT_MAX is
936 * the probability of sampling a given packet.
72b06300
BP
937 *
938 * Returns 0 if successful, otherwise a positive errno value. EOPNOTSUPP
939 * indicates that 'dpif' does not support sFlow sampling. */
940int
941dpif_set_sflow_probability(struct dpif *dpif, uint32_t probability)
942{
49c36903
BP
943 int error = (dpif->dpif_class->set_sflow_probability
944 ? dpif->dpif_class->set_sflow_probability(dpif, probability)
72b06300
BP
945 : EOPNOTSUPP);
946 log_operation(dpif, "set_sflow_probability", error);
947 return error;
948}
949
96fba48f
BP
950/* Attempts to receive a message from 'dpif'. If successful, stores the
951 * message into '*packetp'. The message, if one is received, will begin with
43253595
BP
952 * 'struct odp_msg' as a header, and will have at least DPIF_RECV_MSG_PADDING
953 * bytes of headroom. Only messages of the types selected with
96fba48f
BP
954 * dpif_set_listen_mask() will ordinarily be received (but if a message type is
955 * enabled and then later disabled, some stragglers might pop up).
956 *
957 * Returns 0 if successful, otherwise a positive errno value. Returns EAGAIN
958 * if no message is immediately available. */
064af421 959int
96fba48f 960dpif_recv(struct dpif *dpif, struct ofpbuf **packetp)
064af421 961{
1acb6baa 962 int error = dpif->dpif_class->recv(dpif, packetp);
96fba48f 963 if (!error) {
43253595
BP
964 struct ofpbuf *buf = *packetp;
965
966 assert(ofpbuf_headroom(buf) >= DPIF_RECV_MSG_PADDING);
96fba48f 967 if (VLOG_IS_DBG_ENABLED()) {
96fba48f
BP
968 struct odp_msg *msg = buf->data;
969 void *payload = msg + 1;
970 size_t payload_len = buf->size - sizeof *msg;
971 char *s = ofp_packet_to_string(payload, payload_len, payload_len);
972 VLOG_DBG_RL(&dpmsg_rl, "%s: received %s message of length "
973 "%zu on port %"PRIu16": %s", dpif_name(dpif),
974 (msg->type == _ODPL_MISS_NR ? "miss"
975 : msg->type == _ODPL_ACTION_NR ? "action"
72b06300 976 : msg->type == _ODPL_SFLOW_NR ? "sFlow"
96fba48f
BP
977 : "<unknown>"),
978 payload_len, msg->port, s);
979 free(s);
064af421 980 }
064af421 981 } else {
96fba48f 982 *packetp = NULL;
064af421 983 }
064af421
BP
984 return error;
985}
986
96fba48f
BP
987/* Discards all messages that would otherwise be received by dpif_recv() on
988 * 'dpif'. Returns 0 if successful, otherwise a positive errno value. */
989int
990dpif_recv_purge(struct dpif *dpif)
991{
992 struct odp_stats stats;
993 unsigned int i;
994 int error;
995
996 COVERAGE_INC(dpif_purge);
997
998 error = dpif_get_dp_stats(dpif, &stats);
999 if (error) {
1000 return error;
1001 }
1002
72b06300 1003 for (i = 0; i < stats.max_miss_queue + stats.max_action_queue + stats.max_sflow_queue; i++) {
96fba48f
BP
1004 struct ofpbuf *buf;
1005 error = dpif_recv(dpif, &buf);
1006 if (error) {
1007 return error == EAGAIN ? 0 : error;
1008 }
1009 ofpbuf_delete(buf);
1010 }
1011 return 0;
1012}
1013
1014/* Arranges for the poll loop to wake up when 'dpif' has a message queued to be
1015 * received with dpif_recv(). */
064af421
BP
1016void
1017dpif_recv_wait(struct dpif *dpif)
1018{
1acb6baa 1019 dpif->dpif_class->recv_wait(dpif);
064af421 1020}
53a4218d 1021
96fba48f
BP
1022/* Obtains the NetFlow engine type and engine ID for 'dpif' into '*engine_type'
1023 * and '*engine_id', respectively. */
53a4218d
BP
1024void
1025dpif_get_netflow_ids(const struct dpif *dpif,
1026 uint8_t *engine_type, uint8_t *engine_id)
1027{
96fba48f
BP
1028 *engine_type = dpif->netflow_engine_type;
1029 *engine_id = dpif->netflow_engine_id;
1030}
aae51f53
BP
1031
1032/* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a priority
1033 * value for use in the ODPAT_SET_PRIORITY action. On success, returns 0 and
1034 * stores the priority into '*priority'. On failure, returns a positive errno
1035 * value and stores 0 into '*priority'. */
1036int
1037dpif_queue_to_priority(const struct dpif *dpif, uint32_t queue_id,
1038 uint32_t *priority)
1039{
1040 int error = (dpif->dpif_class->queue_to_priority
1041 ? dpif->dpif_class->queue_to_priority(dpif, queue_id,
1042 priority)
1043 : EOPNOTSUPP);
1044 if (error) {
1045 *priority = 0;
1046 }
1047 log_operation(dpif, "queue_to_priority", error);
1048 return error;
1049}
96fba48f
BP
1050\f
1051void
1acb6baa
BP
1052dpif_init(struct dpif *dpif, const struct dpif_class *dpif_class,
1053 const char *name,
96fba48f
BP
1054 uint8_t netflow_engine_type, uint8_t netflow_engine_id)
1055{
1acb6baa 1056 dpif->dpif_class = dpif_class;
1a6f1e2a 1057 dpif->base_name = xstrdup(name);
a4af0040 1058 dpif->full_name = xasprintf("%s@%s", dpif_class->type, name);
96fba48f
BP
1059 dpif->netflow_engine_type = netflow_engine_type;
1060 dpif->netflow_engine_id = netflow_engine_id;
1061}
999401aa
JG
1062
1063/* Undoes the results of initialization.
1064 *
1065 * Normally this function only needs to be called from dpif_close().
1066 * However, it may be called by providers due to an error on opening
1067 * that occurs after initialization. It this case dpif_close() would
1068 * never be called. */
1069void
1070dpif_uninit(struct dpif *dpif, bool close)
1071{
1072 char *base_name = dpif->base_name;
1073 char *full_name = dpif->full_name;
1074
1075 if (close) {
a4af0040 1076 dpif->dpif_class->close(dpif);
999401aa
JG
1077 }
1078
1079 free(base_name);
1080 free(full_name);
1081}
96fba48f
BP
1082\f
1083static void
1084log_operation(const struct dpif *dpif, const char *operation, int error)
1085{
1086 if (!error) {
1087 VLOG_DBG_RL(&dpmsg_rl, "%s: %s success", dpif_name(dpif), operation);
1088 } else {
1089 VLOG_WARN_RL(&error_rl, "%s: %s failed (%s)",
1090 dpif_name(dpif), operation, strerror(error));
1091 }
1092}
1093
1094static enum vlog_level
1095flow_message_log_level(int error)
1096{
1097 return error ? VLL_WARN : VLL_DBG;
1098}
1099
1100static bool
1101should_log_flow_message(int error)
1102{
1103 return !vlog_should_drop(THIS_MODULE, flow_message_log_level(error),
1104 error ? &error_rl : &dpmsg_rl);
1105}
1106
1107static void
1108log_flow_message(const struct dpif *dpif, int error, const char *operation,
14608a15
BP
1109 const struct odp_flow_key *flow,
1110 const struct odp_flow_stats *stats,
96fba48f
BP
1111 const union odp_action *actions, size_t n_actions)
1112{
1113 struct ds ds = DS_EMPTY_INITIALIZER;
1114 ds_put_format(&ds, "%s: ", dpif_name(dpif));
1115 if (error) {
1116 ds_put_cstr(&ds, "failed to ");
1117 }
1118 ds_put_format(&ds, "%s ", operation);
1119 if (error) {
1120 ds_put_format(&ds, "(%s) ", strerror(error));
1121 }
14608a15 1122 format_odp_flow_key(&ds, flow);
96fba48f
BP
1123 if (stats) {
1124 ds_put_cstr(&ds, ", ");
1125 format_odp_flow_stats(&ds, stats);
1126 }
1127 if (actions || n_actions) {
1128 ds_put_cstr(&ds, ", actions:");
1129 format_odp_actions(&ds, actions, n_actions);
1130 }
1131 vlog(THIS_MODULE, flow_message_log_level(error), "%s", ds_cstr(&ds));
1132 ds_destroy(&ds);
1133}
1134
1135static void
1136log_flow_operation(const struct dpif *dpif, const char *operation, int error,
1137 struct odp_flow *flow)
1138{
1139 if (error) {
1140 flow->n_actions = 0;
1141 }
1142 log_flow_message(dpif, error, operation, &flow->key,
1143 !error ? &flow->stats : NULL,
1144 flow->actions, flow->n_actions);
1145}
1146
1147static void
1148log_flow_put(struct dpif *dpif, int error, const struct odp_flow_put *put)
1149{
1150 enum { ODPPF_ALL = ODPPF_CREATE | ODPPF_MODIFY | ODPPF_ZERO_STATS };
1151 struct ds s;
1152
1153 ds_init(&s);
1154 ds_put_cstr(&s, "put");
1155 if (put->flags & ODPPF_CREATE) {
1156 ds_put_cstr(&s, "[create]");
1157 }
1158 if (put->flags & ODPPF_MODIFY) {
1159 ds_put_cstr(&s, "[modify]");
1160 }
1161 if (put->flags & ODPPF_ZERO_STATS) {
1162 ds_put_cstr(&s, "[zero]");
1163 }
1164 if (put->flags & ~ODPPF_ALL) {
1165 ds_put_format(&s, "[%x]", put->flags & ~ODPPF_ALL);
1166 }
1167 log_flow_message(dpif, error, ds_cstr(&s), &put->flow.key,
1168 !error ? &put->flow.stats : NULL,
1169 put->flow.actions, put->flow.n_actions);
1170 ds_destroy(&s);
1171}
1172
1173/* There is a tendency to construct odp_flow objects on the stack and to
1174 * forget to properly initialize their "actions" and "n_actions" members.
1175 * When this happens, we get memory corruption because the kernel
1176 * writes through the random pointer that is in the "actions" member.
1177 *
1178 * This function attempts to combat the problem by:
1179 *
1180 * - Forcing a segfault if "actions" points to an invalid region (instead
1181 * of just getting back EFAULT, which can be easily missed in the log).
1182 *
1183 * - Storing a distinctive value that is likely to cause an
1184 * easy-to-identify error later if it is dereferenced, etc.
1185 *
1186 * - Triggering a warning on uninitialized memory from Valgrind if
1187 * "actions" or "n_actions" was not initialized.
1188 */
1189static void
1190check_rw_odp_flow(struct odp_flow *flow)
1191{
1192 if (flow->n_actions) {
1193 memset(&flow->actions[0], 0xcc, sizeof flow->actions[0]);
1194 }
53a4218d 1195}