]> git.proxmox.com Git - ovs.git/blame - lib/dpif.h
datapath: collect mega flow mask stats
[ovs.git] / lib / dpif.h
CommitLineData
064af421 1/*
ffcb9f6e 2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
064af421 3 *
a14bc59f
BP
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
064af421 7 *
a14bc59f
BP
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
064af421
BP
15 */
16
ffcb9f6e
BP
17/*
18 * dpif, the DataPath InterFace.
19 *
20 * In Open vSwitch terminology, a "datapath" is a flow-based software switch.
21 * A datapath has no intelligence of its own. Rather, it relies entirely on
22 * its client to set up flows. The datapath layer is core to the Open vSwitch
23 * software switch: one could say, without much exaggeration, that everything
24 * in ovs-vswitchd above dpif exists only to make the correct decisions
25 * interacting with dpif.
26 *
27 * Typically, the client of a datapath is the software switch module in
28 * "ovs-vswitchd", but other clients can be written. The "ovs-dpctl" utility
29 * is also a (simple) client.
30 *
31 *
32 * Overview
33 * ========
34 *
35 * The terms written in quotes below are defined in later sections.
36 *
37 * When a datapath "port" receives a packet, it extracts the headers (the
38 * "flow"). If the datapath's "flow table" contains a "flow entry" whose flow
39 * is the same as the packet's, then it executes the "actions" in the flow
40 * entry and increments the flow's statistics. If there is no matching flow
41 * entry, the datapath instead appends the packet to an "upcall" queue.
42 *
43 *
44 * Ports
45 * =====
46 *
47 * A datapath has a set of ports that are analogous to the ports on an Ethernet
48 * switch. At the datapath level, each port has the following information
49 * associated with it:
50 *
51 * - A name, a short string that must be unique within the host. This is
52 * typically a name that would be familiar to the system administrator,
53 * e.g. "eth0" or "vif1.1", but it is otherwise arbitrary.
54 *
55 * - A 32-bit port number that must be unique within the datapath but is
56 * otherwise arbitrary. The port number is the most important identifier
57 * for a port in the datapath interface.
58 *
59 * - A type, a short string that identifies the kind of port. On a Linux
60 * host, typical types are "system" (for a network device such as eth0),
61 * "internal" (for a simulated port used to connect to the TCP/IP stack),
62 * and "gre" (for a GRE tunnel).
63 *
64 * - A Netlink PID (see "Upcall Queuing and Ordering" below).
65 *
66 * The dpif interface has functions for adding and deleting ports. When a
67 * datapath implements these (e.g. as the Linux and netdev datapaths do), then
68 * Open vSwitch's ovs-vswitchd daemon can directly control what ports are used
69 * for switching. Some datapaths might not implement them, or implement them
70 * with restrictions on the types of ports that can be added or removed
71 * (e.g. on ESX), on systems where port membership can only be changed by some
72 * external entity.
73 *
74 * Each datapath must have a port, sometimes called the "local port", whose
75 * name is the same as the datapath itself, with port number 0. The local port
76 * cannot be deleted.
77 *
78 * Ports are available as "struct netdev"s. To obtain a "struct netdev *" for
79 * a port named 'name' with type 'port_type', in a datapath of type
80 * 'datapath_type', call netdev_open(name, dpif_port_open_type(datapath_type,
81 * port_type). The netdev can be used to get and set important data related to
82 * the port, such as:
83 *
84 * - MTU (netdev_get_mtu(), netdev_set_mtu()).
85 *
86 * - Ethernet address (netdev_get_etheraddr(), netdev_set_etheraddr()).
87 *
88 * - Statistics such as the number of packets and bytes transmitted and
89 * received (netdev_get_stats()).
90 *
91 * - Carrier status (netdev_get_carrier()).
92 *
93 * - Speed (netdev_get_features()).
94 *
95 * - QoS queue configuration (netdev_get_queue(), netdev_set_queue() and
96 * related functions.)
97 *
98 * - Arbitrary port-specific configuration parameters (netdev_get_config(),
99 * netdev_set_config()). An example of such a parameter is the IP
100 * endpoint for a GRE tunnel.
101 *
102 *
103 * Flow Table
104 * ==========
105 *
106 * The flow table is a hash table of "flow entries". Each flow entry contains:
107 *
108 * - A "flow", that is, a summary of the headers in an Ethernet packet. The
109 * flow is the hash key and thus must be unique within the flow table.
110 * Flows are fine-grained entities that include L2, L3, and L4 headers. A
111 * single TCP connection consists of two flows, one in each direction.
112 *
113 * In Open vSwitch userspace, "struct flow" is the typical way to describe
114 * a flow, but the datapath interface uses a different data format to
115 * allow ABI forward- and backward-compatibility. datapath/README
116 * describes the rationale and design. Refer to OVS_KEY_ATTR_* and
117 * "struct ovs_key_*" in include/linux/openvswitch.h for details.
118 * lib/odp-util.h defines several functions for working with these flows.
119 *
120 * (In case you are familiar with OpenFlow, datapath flows are analogous
121 * to OpenFlow flow matches. The most important difference is that
122 * OpenFlow allows fields to be wildcarded and prioritized, whereas a
123 * datapath's flow table is a hash table so every flow must be
124 * exact-match, thus without priorities.)
125 *
126 * - A list of "actions" that tell the datapath what to do with packets
127 * within a flow. Some examples of actions are OVS_ACTION_ATTR_OUTPUT,
128 * which transmits the packet out a port, and OVS_ACTION_ATTR_SET, which
129 * modifies packet headers. Refer to OVS_ACTION_ATTR_* and "struct
130 * ovs_action_*" in include/linux/openvswitch.h for details.
131 * lib/odp-util.h defines several functions for working with datapath
132 * actions.
133 *
134 * The actions list may be empty. This indicates that nothing should be
135 * done to matching packets, that is, they should be dropped.
136 *
137 * (In case you are familiar with OpenFlow, datapath actions are analogous
138 * to OpenFlow actions.)
139 *
140 * - Statistics: the number of packets and bytes that the flow has
141 * processed, the last time that the flow processed a packet, and the
142 * union of all the TCP flags in packets processed by the flow. (The
143 * latter is 0 if the flow is not a TCP flow.)
144 *
145 * The datapath's client manages the flow table, primarily in reaction to
146 * "upcalls" (see below).
147 *
148 *
149 * Upcalls
150 * =======
151 *
152 * A datapath sometimes needs to notify its client that a packet was received.
153 * The datapath mechanism to do this is called an "upcall".
154 *
155 * Upcalls are used in two situations:
156 *
157 * - When a packet is received, but there is no matching flow entry in its
158 * flow table (a flow table "miss"), this causes an upcall of type
159 * DPIF_UC_MISS. These are called "miss" upcalls.
160 *
161 * - A datapath action of type OVS_ACTION_ATTR_USERSPACE causes an upcall of
162 * type DPIF_UC_ACTION. These are called "action" upcalls.
163 *
164 * An upcall contains an entire packet. There is no attempt to, e.g., copy
165 * only as much of the packet as normally needed to make a forwarding decision.
166 * Such an optimization is doable, but experimental prototypes showed it to be
167 * of little benefit because an upcall typically contains the first packet of a
168 * flow, which is usually short (e.g. a TCP SYN). Also, the entire packet can
169 * sometimes really be needed.
170 *
171 * After a client reads a given upcall, the datapath is finished with it, that
172 * is, the datapath doesn't maintain any lingering state past that point.
173 *
174 * The latency from the time that a packet arrives at a port to the time that
175 * it is received from dpif_recv() is critical in some benchmarks. For
176 * example, if this latency is 1 ms, then a netperf TCP_CRR test, which opens
177 * and closes TCP connections one at a time as quickly as it can, cannot
178 * possibly achieve more than 500 transactions per second, since every
179 * connection consists of two flows with 1-ms latency to set up each one.
180 *
181 * To receive upcalls, a client has to enable them with dpif_recv_set(). A
182 * datapath should generally support multiple clients at once (e.g. so that one
183 * may run "ovs-dpctl show" or "ovs-dpctl dump-flows" while "ovs-vswitchd" is
184 * also running) but need not support multiple clients enabling upcalls at
185 * once.
186 *
187 *
188 * Upcall Queuing and Ordering
189 * ---------------------------
190 *
191 * The datapath's client reads upcalls one at a time by calling dpif_recv().
192 * When more than one upcall is pending, the order in which the datapath
193 * presents upcalls to its client is important. The datapath's client does not
194 * directly control this order, so the datapath implementer must take care
195 * during design.
196 *
197 * The minimal behavior, suitable for initial testing of a datapath
198 * implementation, is that all upcalls are appended to a single queue, which is
199 * delivered to the client in order.
200 *
201 * The datapath should ensure that a high rate of upcalls from one particular
202 * port cannot cause upcalls from other sources to be dropped or unreasonably
203 * delayed. Otherwise, one port conducting a port scan or otherwise initiating
204 * high-rate traffic spanning many flows could suppress other traffic.
205 * Ideally, the datapath should present upcalls from each port in a "round
206 * robin" manner, to ensure fairness.
207 *
208 * The client has no control over "miss" upcalls and no insight into the
209 * datapath's implementation, so the datapath is entirely responsible for
210 * queuing and delivering them. On the other hand, the datapath has
211 * considerable freedom of implementation. One good approach is to maintain a
212 * separate queue for each port, to prevent any given port's upcalls from
213 * interfering with other ports' upcalls. If this is impractical, then another
214 * reasonable choice is to maintain some fixed number of queues and assign each
215 * port to one of them. Ports assigned to the same queue can then interfere
216 * with each other, but not with ports assigned to different queues. Other
217 * approaches are also possible.
218 *
219 * The client has some control over "action" upcalls: it can specify a 32-bit
220 * "Netlink PID" as part of the action. This terminology comes from the Linux
221 * datapath implementation, which uses a protocol called Netlink in which a PID
222 * designates a particular socket and the upcall data is delivered to the
223 * socket's receive queue. Generically, though, a Netlink PID identifies a
224 * queue for upcalls. The basic requirements on the datapath are:
225 *
226 * - The datapath must provide a Netlink PID associated with each port. The
227 * client can retrieve the PID with dpif_port_get_pid().
228 *
229 * - The datapath must provide a "special" Netlink PID not associated with
230 * any port. dpif_port_get_pid() also provides this PID. (ovs-vswitchd
231 * uses this PID to queue special packets that must not be lost even if a
232 * port is otherwise busy, such as packets used for tunnel monitoring.)
233 *
234 * The minimal behavior of dpif_port_get_pid() and the treatment of the Netlink
235 * PID in "action" upcalls is that dpif_port_get_pid() returns a constant value
236 * and all upcalls are appended to a single queue.
237 *
238 * The ideal behavior is:
239 *
240 * - Each port has a PID that identifies the queue used for "miss" upcalls
241 * on that port. (Thus, if each port has its own queue for "miss"
242 * upcalls, then each port has a different Netlink PID.)
243 *
244 * - "miss" upcalls for a given port and "action" upcalls that specify that
245 * port's Netlink PID add their upcalls to the same queue. The upcalls
246 * are delivered to the datapath's client in the order that the packets
247 * were received, regardless of whether the upcalls are "miss" or "action"
248 * upcalls.
249 *
250 * - Upcalls that specify the "special" Netlink PID are queued separately.
251 *
252 *
253 * Packet Format
254 * =============
255 *
256 * The datapath interface works with packets in a particular form. This is the
257 * form taken by packets received via upcalls (i.e. by dpif_recv()). Packets
258 * supplied to the datapath for processing (i.e. to dpif_execute()) also take
259 * this form.
260 *
261 * A VLAN tag is represented by an 802.1Q header. If the layer below the
262 * datapath interface uses another representation, then the datapath interface
263 * must perform conversion.
264 *
265 * The datapath interface requires all packets to fit within the MTU. Some
266 * operating systems internally process packets larger than MTU, with features
267 * such as TSO and UFO. When such a packet passes through the datapath
268 * interface, it must be broken into multiple MTU or smaller sized packets for
269 * presentation as upcalls. (This does not happen often, because an upcall
270 * typically contains the first packet of a flow, which is usually short.)
271 *
272 * Some operating system TCP/IP stacks maintain packets in an unchecksummed or
273 * partially checksummed state until transmission. The datapath interface
274 * requires all host-generated packets to be fully checksummed (e.g. IP and TCP
275 * checksums must be correct). On such an OS, the datapath interface must fill
276 * in these checksums.
277 *
278 * Packets passed through the datapath interface must be at least 14 bytes
279 * long, that is, they must have a complete Ethernet header. They are not
280 * required to be padded to the minimum Ethernet length.
281 *
282 *
283 * Typical Usage
284 * =============
285 *
286 * Typically, the client of a datapath begins by configuring the datapath with
287 * a set of ports. Afterward, the client runs in a loop polling for upcalls to
288 * arrive.
289 *
290 * For each upcall received, the client examines the enclosed packet and
291 * figures out what should be done with it. For example, if the client
292 * implements a MAC-learning switch, then it searches the forwarding database
293 * for the packet's destination MAC and VLAN and determines the set of ports to
294 * which it should be sent. In any case, the client composes a set of datapath
295 * actions to properly dispatch the packet and then directs the datapath to
296 * execute those actions on the packet (e.g. with dpif_execute()).
297 *
298 * Most of the time, the actions that the client executed on the packet apply
299 * to every packet with the same flow. For example, the flow includes both
300 * destination MAC and VLAN ID (and much more), so this is true for the
301 * MAC-learning switch example above. In such a case, the client can also
302 * direct the datapath to treat any further packets in the flow in the same
303 * way, using dpif_flow_put() to add a new flow entry.
304 *
305 * Other tasks the client might need to perform, in addition to reacting to
306 * upcalls, include:
307 *
308 * - Periodically polling flow statistics, perhaps to supply to its own
309 * clients.
310 *
311 * - Deleting flow entries from the datapath that haven't been used
312 * recently, to save memory.
313 *
314 * - Updating flow entries whose actions should change. For example, if a
315 * MAC learning switch learns that a MAC has moved, then it must update
316 * the actions of flow entries that sent packets to the MAC at its old
317 * location.
318 *
319 * - Adding and removing ports to achieve a new configuration.
5703b15f
BP
320 *
321 *
322 * Thread-safety
323 * =============
324 *
325 * Most of the dpif functions are fully thread-safe: they may be called from
326 * any number of threads on the same or different dpif objects. The exceptions
327 * are:
328 *
329 * - dpif_port_poll() and dpif_port_poll_wait() are conditionally
330 * thread-safe: they may be called from different threads only on
331 * different dpif objects.
332 *
333 * - Functions that operate on struct dpif_port_dump or struct
334 * dpif_flow_dump are conditionally thread-safe with respect to those
335 * objects. That is, one may dump ports or flows from any number of
336 * threads at once, but each thread must use its own struct dpif_port_dump
337 * or dpif_flow_dump.
ffcb9f6e 338 */
064af421
BP
339#ifndef DPIF_H
340#define DPIF_H 1
341
064af421
BP
342#include <stdbool.h>
343#include <stddef.h>
344#include <stdint.h>
9dbb9d5e 345#include "openflow/openflow.h"
032aa6a3 346#include "netdev.h"
9dbb9d5e 347#include "util.h"
064af421 348
03292c46
JG
349#ifdef __cplusplus
350extern "C" {
351#endif
352
c228a364 353struct dpif;
c97fb132 354struct ds;
572b7068 355struct flow;
cdee00fd 356struct nlattr;
064af421 357struct ofpbuf;
d0c23a1a 358struct sset;
999401aa 359struct dpif_class;
064af421 360
999401aa
JG
361int dp_register_provider(const struct dpif_class *);
362int dp_unregister_provider(const char *type);
579a77e0 363void dp_blacklist_provider(const char *type);
d0c23a1a 364void dp_enumerate_types(struct sset *types);
f79e673f 365const char *dpif_normalize_type(const char *);
999401aa 366
d0c23a1a 367int dp_enumerate_names(const char *type, struct sset *names);
1a6f1e2a 368void dp_parse_name(const char *datapath_name, char **name, char **type);
5792c5c6 369
1a6f1e2a
JG
370int dpif_open(const char *name, const char *type, struct dpif **);
371int dpif_create(const char *name, const char *type, struct dpif **);
372int dpif_create_and_open(const char *name, const char *type, struct dpif **);
064af421
BP
373void dpif_close(struct dpif *);
374
640e1b20
BP
375void dpif_run(struct dpif *);
376void dpif_wait(struct dpif *);
377
b29ba128 378const char *dpif_name(const struct dpif *);
1a6f1e2a 379const char *dpif_base_name(const struct dpif *);
c7a26215 380const char *dpif_type(const struct dpif *);
064af421
BP
381
382int dpif_delete(struct dpif *);
383
3b68500b 384/* Statistics for a dpif as a whole. */
a8d9304d 385struct dpif_dp_stats {
a8d9304d
BP
386 uint64_t n_hit; /* Number of flow table matches. */
387 uint64_t n_missed; /* Number of flow table misses. */
388 uint64_t n_lost; /* Number of misses not sent to userspace. */
389 uint64_t n_flows; /* Number of flows present. */
390};
391int dpif_get_dp_stats(const struct dpif *, struct dpif_dp_stats *);
392
6bc60024
BP
393\f
394/* Port operations. */
064af421 395
0aeaabc8
JP
396const char *dpif_port_open_type(const char *datapath_type,
397 const char *port_type);
4e022ec0
AW
398int dpif_port_add(struct dpif *, struct netdev *, odp_port_t *port_nop);
399int dpif_port_del(struct dpif *, odp_port_t port_no);
4c738a8d
BP
400
401/* A port within a datapath.
402 *
403 * 'name' and 'type' are suitable for passing to netdev_open(). */
404struct dpif_port {
405 char *name; /* Network device name, e.g. "eth0". */
406 char *type; /* Network device type, e.g. "system". */
4e022ec0 407 odp_port_t port_no; /* Port number within datapath. */
4c738a8d
BP
408};
409void dpif_port_clone(struct dpif_port *, const struct dpif_port *);
410void dpif_port_destroy(struct dpif_port *);
4afba28d 411bool dpif_port_exists(const struct dpif *dpif, const char *devname);
4e022ec0 412int dpif_port_query_by_number(const struct dpif *, odp_port_t port_no,
4c738a8d 413 struct dpif_port *);
064af421 414int dpif_port_query_by_name(const struct dpif *, const char *devname,
4c738a8d 415 struct dpif_port *);
4e022ec0 416int dpif_port_get_name(struct dpif *, odp_port_t port_no,
335562c0 417 char *name, size_t name_size);
1dd16b9a 418uint32_t dpif_get_max_ports(const struct dpif *);
4e022ec0 419uint32_t dpif_port_get_pid(const struct dpif *, odp_port_t port_no);
b0ec0f27
BP
420
421struct dpif_port_dump {
422 const struct dpif *dpif;
423 int error;
424 void *state;
425};
426void dpif_port_dump_start(struct dpif_port_dump *, const struct dpif *);
4c738a8d 427bool dpif_port_dump_next(struct dpif_port_dump *, struct dpif_port *);
b0ec0f27
BP
428int dpif_port_dump_done(struct dpif_port_dump *);
429
4c738a8d 430/* Iterates through each DPIF_PORT in DPIF, using DUMP as state.
b0ec0f27
BP
431 *
432 * Arguments all have pointer type.
433 *
434 * If you break out of the loop, then you need to free the dump structure by
435 * hand using dpif_port_dump_done(). */
4c738a8d 436#define DPIF_PORT_FOR_EACH(DPIF_PORT, DUMP, DPIF) \
b0ec0f27 437 for (dpif_port_dump_start(DUMP, DPIF); \
4c738a8d 438 (dpif_port_dump_next(DUMP, DPIF_PORT) \
b0ec0f27
BP
439 ? true \
440 : (dpif_port_dump_done(DUMP), false)); \
441 )
064af421 442
e9e28be3
BP
443int dpif_port_poll(const struct dpif *, char **devnamep);
444void dpif_port_poll_wait(const struct dpif *);
6bc60024
BP
445\f
446/* Flow table operations. */
e9e28be3 447
c97fb132
BP
448struct dpif_flow_stats {
449 uint64_t n_packets;
450 uint64_t n_bytes;
451 long long int used;
452 uint8_t tcp_flags;
453};
454
a39edbd4 455void dpif_flow_stats_extract(const struct flow *, const struct ofpbuf *packet,
a7752d4a 456 long long int used, struct dpif_flow_stats *);
c97fb132
BP
457void dpif_flow_stats_format(const struct dpif_flow_stats *, struct ds *);
458
ba25b8f4
BP
459enum dpif_flow_put_flags {
460 DPIF_FP_CREATE = 1 << 0, /* Allow creating a new flow. */
461 DPIF_FP_MODIFY = 1 << 1, /* Allow modifying an existing flow. */
462 DPIF_FP_ZERO_STATS = 1 << 2 /* Zero the stats of an existing flow. */
463};
464
064af421 465int dpif_flow_flush(struct dpif *);
ba25b8f4 466int dpif_flow_put(struct dpif *, enum dpif_flow_put_flags,
feebdea2 467 const struct nlattr *key, size_t key_len,
e6cc0bab 468 const struct nlattr *mask, size_t mask_len,
feebdea2 469 const struct nlattr *actions, size_t actions_len,
c97fb132 470 struct dpif_flow_stats *);
feebdea2
BP
471int dpif_flow_del(struct dpif *,
472 const struct nlattr *key, size_t key_len,
c97fb132 473 struct dpif_flow_stats *);
693c4a01 474int dpif_flow_get(const struct dpif *,
feebdea2 475 const struct nlattr *key, size_t key_len,
c97fb132 476 struct ofpbuf **actionsp, struct dpif_flow_stats *);
704a1e09
BP
477
478struct dpif_flow_dump {
479 const struct dpif *dpif;
480 int error;
481 void *state;
482};
483void dpif_flow_dump_start(struct dpif_flow_dump *, const struct dpif *);
feebdea2
BP
484bool dpif_flow_dump_next(struct dpif_flow_dump *,
485 const struct nlattr **key, size_t *key_len,
e6cc0bab 486 const struct nlattr **mask, size_t *mask_len,
feebdea2 487 const struct nlattr **actions, size_t *actions_len,
c97fb132 488 const struct dpif_flow_stats **);
704a1e09 489int dpif_flow_dump_done(struct dpif_flow_dump *);
6bc60024
BP
490\f
491/* Packet operations. */
064af421 492
80e5eed9
BP
493int dpif_execute(struct dpif *,
494 const struct nlattr *key, size_t key_len,
495 const struct nlattr *actions, size_t actions_len,
7fd91025
BP
496 const struct ofpbuf *,
497 bool needs_help);
6bc60024
BP
498\f
499/* Operation batching interface.
500 *
501 * Some datapaths are faster at performing N operations together than the same
502 * N operations individually, hence an interface for batching.
503 */
504
505enum dpif_op_type {
506 DPIF_OP_FLOW_PUT = 1,
b99d3cee
BP
507 DPIF_OP_FLOW_DEL,
508 DPIF_OP_EXECUTE,
6bc60024
BP
509};
510
511struct dpif_flow_put {
6bc60024
BP
512 /* Input. */
513 enum dpif_flow_put_flags flags; /* DPIF_FP_*. */
514 const struct nlattr *key; /* Flow to put. */
515 size_t key_len; /* Length of 'key' in bytes. */
e6cc0bab
AZ
516 const struct nlattr *mask; /* Mask to put. */
517 size_t mask_len; /* Length of 'mask' in bytes. */
6bc60024
BP
518 const struct nlattr *actions; /* Actions to perform on flow. */
519 size_t actions_len; /* Length of 'actions' in bytes. */
520
521 /* Output. */
522 struct dpif_flow_stats *stats; /* Optional flow statistics. */
6bc60024
BP
523};
524
b99d3cee
BP
525struct dpif_flow_del {
526 /* Input. */
527 const struct nlattr *key; /* Flow to delete. */
528 size_t key_len; /* Length of 'key' in bytes. */
529
530 /* Output. */
531 struct dpif_flow_stats *stats; /* Optional flow statistics. */
532};
533
6bc60024 534struct dpif_execute {
7fd91025 535 /* Raw support for execute passed along to the provider. */
6bc60024
BP
536 const struct nlattr *key; /* Partial flow key (only for metadata). */
537 size_t key_len; /* Length of 'key' in bytes. */
538 const struct nlattr *actions; /* Actions to execute on packet. */
539 size_t actions_len; /* Length of 'actions' in bytes. */
540 const struct ofpbuf *packet; /* Packet to execute. */
7fd91025
BP
541
542 /* Some dpif providers do not implement every action. The Linux kernel
543 * datapath, in particular, does not implement ARP field modification.
544 *
545 * If this member is set to true, the dpif layer executes in userspace all
546 * of the actions that it can, and for OVS_ACTION_ATTR_OUTPUT and
547 * OVS_ACTION_ATTR_USERSPACE actions it passes the packet through to the
548 * dpif implementation. */
549 bool needs_help;
6bc60024
BP
550};
551
c2b565b5 552struct dpif_op {
6bc60024 553 enum dpif_op_type type;
c2b565b5
BP
554 int error;
555 union {
556 struct dpif_flow_put flow_put;
b99d3cee 557 struct dpif_flow_del flow_del;
c2b565b5
BP
558 struct dpif_execute execute;
559 } u;
6bc60024
BP
560};
561
c2b565b5 562void dpif_operate(struct dpif *, struct dpif_op **ops, size_t n_ops);
6bc60024
BP
563\f
564/* Upcalls. */
064af421 565
82272ede
BP
566enum dpif_upcall_type {
567 DPIF_UC_MISS, /* Miss in flow table. */
df2c07f4 568 DPIF_UC_ACTION, /* OVS_ACTION_ATTR_USERSPACE action. */
982b8810 569 DPIF_N_UC_TYPES
82272ede
BP
570};
571
01545c1a
BP
572const char *dpif_upcall_type_to_string(enum dpif_upcall_type);
573
856081f6
BP
574/* A packet passed up from the datapath to userspace.
575 *
e995e3df
BP
576 * If 'key', 'actions', or 'userdata' is nonnull, then it points into data
577 * owned by 'packet', so their memory cannot be freed separately. (This is
578 * hardly a great way to do things but it works out OK for the dpif providers
579 * and clients that exist so far.)
856081f6
BP
580 */
581struct dpif_upcall {
856081f6 582 /* All types. */
82272ede 583 enum dpif_upcall_type type;
856081f6
BP
584 struct ofpbuf *packet; /* Packet data. */
585 struct nlattr *key; /* Flow key. */
586 size_t key_len; /* Length of 'key' in bytes. */
587
82272ede 588 /* DPIF_UC_ACTION only. */
e995e3df 589 struct nlattr *userdata; /* Argument to OVS_ACTION_ATTR_USERSPACE. */
856081f6 590};
9dbb9d5e 591
a12b3ead 592int dpif_recv_set(struct dpif *, bool enable);
90a7c55e 593int dpif_recv(struct dpif *, struct dpif_upcall *, struct ofpbuf *);
1ba530f4 594void dpif_recv_purge(struct dpif *);
064af421 595void dpif_recv_wait(struct dpif *);
6bc60024
BP
596\f
597/* Miscellaneous. */
064af421 598
53a4218d
BP
599void dpif_get_netflow_ids(const struct dpif *,
600 uint8_t *engine_type, uint8_t *engine_id);
064af421 601
aae51f53
BP
602int dpif_queue_to_priority(const struct dpif *, uint32_t queue_id,
603 uint32_t *priority);
604
03292c46
JG
605#ifdef __cplusplus
606}
607#endif
608
064af421 609#endif /* dpif.h */