]> git.proxmox.com Git - ovs.git/blame - lib/dpif.h
dpif-netdev: Polling threads directly call ofproto upcall functions.
[ovs.git] / lib / dpif.h
CommitLineData
064af421 1/*
1954e6bb 2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
064af421 3 *
a14bc59f
BP
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
064af421 7 *
a14bc59f
BP
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
064af421
BP
15 */
16
ffcb9f6e
BP
17/*
18 * dpif, the DataPath InterFace.
19 *
20 * In Open vSwitch terminology, a "datapath" is a flow-based software switch.
21 * A datapath has no intelligence of its own. Rather, it relies entirely on
22 * its client to set up flows. The datapath layer is core to the Open vSwitch
23 * software switch: one could say, without much exaggeration, that everything
24 * in ovs-vswitchd above dpif exists only to make the correct decisions
25 * interacting with dpif.
26 *
27 * Typically, the client of a datapath is the software switch module in
28 * "ovs-vswitchd", but other clients can be written. The "ovs-dpctl" utility
29 * is also a (simple) client.
30 *
31 *
32 * Overview
33 * ========
34 *
35 * The terms written in quotes below are defined in later sections.
36 *
37 * When a datapath "port" receives a packet, it extracts the headers (the
da546e07
JR
38 * "flow"). If the datapath's "flow table" contains a "flow entry" matching
39 * the packet, then it executes the "actions" in the flow entry and increments
40 * the flow's statistics. If there is no matching flow entry, the datapath
41 * instead appends the packet to an "upcall" queue.
ffcb9f6e
BP
42 *
43 *
44 * Ports
45 * =====
46 *
47 * A datapath has a set of ports that are analogous to the ports on an Ethernet
48 * switch. At the datapath level, each port has the following information
49 * associated with it:
50 *
51 * - A name, a short string that must be unique within the host. This is
52 * typically a name that would be familiar to the system administrator,
53 * e.g. "eth0" or "vif1.1", but it is otherwise arbitrary.
54 *
55 * - A 32-bit port number that must be unique within the datapath but is
56 * otherwise arbitrary. The port number is the most important identifier
57 * for a port in the datapath interface.
58 *
59 * - A type, a short string that identifies the kind of port. On a Linux
60 * host, typical types are "system" (for a network device such as eth0),
61 * "internal" (for a simulated port used to connect to the TCP/IP stack),
62 * and "gre" (for a GRE tunnel).
63 *
1954e6bb
AW
64 * - A Netlink PID for each upcall reading thread (see "Upcall Queuing and
65 * Ordering" below).
ffcb9f6e
BP
66 *
67 * The dpif interface has functions for adding and deleting ports. When a
68 * datapath implements these (e.g. as the Linux and netdev datapaths do), then
69 * Open vSwitch's ovs-vswitchd daemon can directly control what ports are used
70 * for switching. Some datapaths might not implement them, or implement them
71 * with restrictions on the types of ports that can be added or removed
72 * (e.g. on ESX), on systems where port membership can only be changed by some
73 * external entity.
74 *
75 * Each datapath must have a port, sometimes called the "local port", whose
76 * name is the same as the datapath itself, with port number 0. The local port
77 * cannot be deleted.
78 *
79 * Ports are available as "struct netdev"s. To obtain a "struct netdev *" for
80 * a port named 'name' with type 'port_type', in a datapath of type
81 * 'datapath_type', call netdev_open(name, dpif_port_open_type(datapath_type,
82 * port_type). The netdev can be used to get and set important data related to
83 * the port, such as:
84 *
85 * - MTU (netdev_get_mtu(), netdev_set_mtu()).
86 *
87 * - Ethernet address (netdev_get_etheraddr(), netdev_set_etheraddr()).
88 *
89 * - Statistics such as the number of packets and bytes transmitted and
90 * received (netdev_get_stats()).
91 *
92 * - Carrier status (netdev_get_carrier()).
93 *
94 * - Speed (netdev_get_features()).
95 *
96 * - QoS queue configuration (netdev_get_queue(), netdev_set_queue() and
97 * related functions.)
98 *
99 * - Arbitrary port-specific configuration parameters (netdev_get_config(),
100 * netdev_set_config()). An example of such a parameter is the IP
101 * endpoint for a GRE tunnel.
102 *
103 *
104 * Flow Table
105 * ==========
106 *
ee75c546 107 * The flow table is a collection of "flow entries". Each flow entry contains:
ffcb9f6e
BP
108 *
109 * - A "flow", that is, a summary of the headers in an Ethernet packet. The
ee75c546
BP
110 * flow must be unique within the flow table. Flows are fine-grained
111 * entities that include L2, L3, and L4 headers. A single TCP connection
112 * consists of two flows, one in each direction.
ffcb9f6e
BP
113 *
114 * In Open vSwitch userspace, "struct flow" is the typical way to describe
115 * a flow, but the datapath interface uses a different data format to
116 * allow ABI forward- and backward-compatibility. datapath/README
117 * describes the rationale and design. Refer to OVS_KEY_ATTR_* and
118 * "struct ovs_key_*" in include/linux/openvswitch.h for details.
119 * lib/odp-util.h defines several functions for working with these flows.
120 *
ee75c546
BP
121 * - A "mask" that, for each bit in the flow, specifies whether the datapath
122 * should consider the corresponding flow bit when deciding whether a
123 * given packet matches the flow entry. The original datapath design did
124 * not support matching: every flow entry was exact match. With the
125 * addition of a mask, the interface supports datapaths with a spectrum of
126 * wildcard matching capabilities, from those that only support exact
127 * matches to those that support bitwise wildcarding on the entire flow
128 * key, as well as datapaths with capabilities somewhere in between.
129 *
130 * Datapaths do not provide a way to query their wildcarding capabilities,
131 * nor is it expected that the client should attempt to probe for the
132 * details of their support. Instead, a client installs flows with masks
133 * that wildcard as many bits as acceptable. The datapath then actually
134 * wildcards as many of those bits as it can and changes the wildcard bits
135 * that it does not support into exact match bits. A datapath that can
136 * wildcard any bit, for example, would install the supplied mask, an
137 * exact-match only datapath would install an exact-match mask regardless
138 * of what mask the client supplied, and a datapath in the middle of the
139 * spectrum would selectively change some wildcard bits into exact match
140 * bits.
141 *
142 * Regardless of the requested or installed mask, the datapath retains the
143 * original flow supplied by the client. (It does not, for example, "zero
144 * out" the wildcarded bits.) This allows the client to unambiguously
145 * identify the flow entry in later flow table operations.
146 *
147 * The flow table does not have priorities; that is, all flow entries have
148 * equal priority. Detecting overlapping flow entries is expensive in
149 * general, so the datapath is not required to do it. It is primarily the
150 * client's responsibility not to install flow entries whose flow and mask
151 * combinations overlap.
ffcb9f6e
BP
152 *
153 * - A list of "actions" that tell the datapath what to do with packets
154 * within a flow. Some examples of actions are OVS_ACTION_ATTR_OUTPUT,
155 * which transmits the packet out a port, and OVS_ACTION_ATTR_SET, which
156 * modifies packet headers. Refer to OVS_ACTION_ATTR_* and "struct
157 * ovs_action_*" in include/linux/openvswitch.h for details.
158 * lib/odp-util.h defines several functions for working with datapath
159 * actions.
160 *
161 * The actions list may be empty. This indicates that nothing should be
162 * done to matching packets, that is, they should be dropped.
163 *
164 * (In case you are familiar with OpenFlow, datapath actions are analogous
165 * to OpenFlow actions.)
166 *
167 * - Statistics: the number of packets and bytes that the flow has
168 * processed, the last time that the flow processed a packet, and the
169 * union of all the TCP flags in packets processed by the flow. (The
170 * latter is 0 if the flow is not a TCP flow.)
171 *
172 * The datapath's client manages the flow table, primarily in reaction to
173 * "upcalls" (see below).
174 *
175 *
176 * Upcalls
177 * =======
178 *
179 * A datapath sometimes needs to notify its client that a packet was received.
180 * The datapath mechanism to do this is called an "upcall".
181 *
182 * Upcalls are used in two situations:
183 *
184 * - When a packet is received, but there is no matching flow entry in its
185 * flow table (a flow table "miss"), this causes an upcall of type
186 * DPIF_UC_MISS. These are called "miss" upcalls.
187 *
188 * - A datapath action of type OVS_ACTION_ATTR_USERSPACE causes an upcall of
189 * type DPIF_UC_ACTION. These are called "action" upcalls.
190 *
191 * An upcall contains an entire packet. There is no attempt to, e.g., copy
192 * only as much of the packet as normally needed to make a forwarding decision.
193 * Such an optimization is doable, but experimental prototypes showed it to be
194 * of little benefit because an upcall typically contains the first packet of a
195 * flow, which is usually short (e.g. a TCP SYN). Also, the entire packet can
196 * sometimes really be needed.
197 *
198 * After a client reads a given upcall, the datapath is finished with it, that
199 * is, the datapath doesn't maintain any lingering state past that point.
200 *
201 * The latency from the time that a packet arrives at a port to the time that
202 * it is received from dpif_recv() is critical in some benchmarks. For
203 * example, if this latency is 1 ms, then a netperf TCP_CRR test, which opens
204 * and closes TCP connections one at a time as quickly as it can, cannot
205 * possibly achieve more than 500 transactions per second, since every
206 * connection consists of two flows with 1-ms latency to set up each one.
207 *
208 * To receive upcalls, a client has to enable them with dpif_recv_set(). A
1954e6bb
AW
209 * datapath should generally support being opened multiple times (e.g. so that
210 * one may run "ovs-dpctl show" or "ovs-dpctl dump-flows" while "ovs-vswitchd"
211 * is also running) but need not support more than one of these clients
212 * enabling upcalls at once.
ffcb9f6e
BP
213 *
214 *
215 * Upcall Queuing and Ordering
216 * ---------------------------
217 *
218 * The datapath's client reads upcalls one at a time by calling dpif_recv().
219 * When more than one upcall is pending, the order in which the datapath
220 * presents upcalls to its client is important. The datapath's client does not
221 * directly control this order, so the datapath implementer must take care
222 * during design.
223 *
224 * The minimal behavior, suitable for initial testing of a datapath
225 * implementation, is that all upcalls are appended to a single queue, which is
226 * delivered to the client in order.
227 *
228 * The datapath should ensure that a high rate of upcalls from one particular
229 * port cannot cause upcalls from other sources to be dropped or unreasonably
230 * delayed. Otherwise, one port conducting a port scan or otherwise initiating
231 * high-rate traffic spanning many flows could suppress other traffic.
232 * Ideally, the datapath should present upcalls from each port in a "round
233 * robin" manner, to ensure fairness.
234 *
235 * The client has no control over "miss" upcalls and no insight into the
236 * datapath's implementation, so the datapath is entirely responsible for
237 * queuing and delivering them. On the other hand, the datapath has
238 * considerable freedom of implementation. One good approach is to maintain a
239 * separate queue for each port, to prevent any given port's upcalls from
240 * interfering with other ports' upcalls. If this is impractical, then another
241 * reasonable choice is to maintain some fixed number of queues and assign each
242 * port to one of them. Ports assigned to the same queue can then interfere
243 * with each other, but not with ports assigned to different queues. Other
244 * approaches are also possible.
245 *
246 * The client has some control over "action" upcalls: it can specify a 32-bit
247 * "Netlink PID" as part of the action. This terminology comes from the Linux
248 * datapath implementation, which uses a protocol called Netlink in which a PID
249 * designates a particular socket and the upcall data is delivered to the
250 * socket's receive queue. Generically, though, a Netlink PID identifies a
251 * queue for upcalls. The basic requirements on the datapath are:
252 *
253 * - The datapath must provide a Netlink PID associated with each port. The
254 * client can retrieve the PID with dpif_port_get_pid().
255 *
256 * - The datapath must provide a "special" Netlink PID not associated with
257 * any port. dpif_port_get_pid() also provides this PID. (ovs-vswitchd
258 * uses this PID to queue special packets that must not be lost even if a
259 * port is otherwise busy, such as packets used for tunnel monitoring.)
260 *
261 * The minimal behavior of dpif_port_get_pid() and the treatment of the Netlink
262 * PID in "action" upcalls is that dpif_port_get_pid() returns a constant value
263 * and all upcalls are appended to a single queue.
264 *
1954e6bb 265 * The preferred behavior is:
ffcb9f6e
BP
266 *
267 * - Each port has a PID that identifies the queue used for "miss" upcalls
268 * on that port. (Thus, if each port has its own queue for "miss"
269 * upcalls, then each port has a different Netlink PID.)
270 *
271 * - "miss" upcalls for a given port and "action" upcalls that specify that
272 * port's Netlink PID add their upcalls to the same queue. The upcalls
273 * are delivered to the datapath's client in the order that the packets
274 * were received, regardless of whether the upcalls are "miss" or "action"
275 * upcalls.
276 *
277 * - Upcalls that specify the "special" Netlink PID are queued separately.
278 *
1954e6bb
AW
279 * Multiple threads may want to read upcalls simultaneously from a single
280 * datapath. To support multiple threads well, one extends the above preferred
281 * behavior:
282 *
283 * - Each port has multiple PIDs. The datapath distributes "miss" upcalls
284 * across the PIDs, ensuring that a given flow is mapped in a stable way
285 * to a single PID.
286 *
287 * - For "action" upcalls, the thread can specify its own Netlink PID or
288 * other threads' Netlink PID of the same port for offloading purpose
289 * (e.g. in a "round robin" manner).
290 *
ffcb9f6e
BP
291 *
292 * Packet Format
293 * =============
294 *
295 * The datapath interface works with packets in a particular form. This is the
296 * form taken by packets received via upcalls (i.e. by dpif_recv()). Packets
297 * supplied to the datapath for processing (i.e. to dpif_execute()) also take
298 * this form.
299 *
300 * A VLAN tag is represented by an 802.1Q header. If the layer below the
301 * datapath interface uses another representation, then the datapath interface
302 * must perform conversion.
303 *
304 * The datapath interface requires all packets to fit within the MTU. Some
305 * operating systems internally process packets larger than MTU, with features
306 * such as TSO and UFO. When such a packet passes through the datapath
307 * interface, it must be broken into multiple MTU or smaller sized packets for
308 * presentation as upcalls. (This does not happen often, because an upcall
309 * typically contains the first packet of a flow, which is usually short.)
310 *
311 * Some operating system TCP/IP stacks maintain packets in an unchecksummed or
312 * partially checksummed state until transmission. The datapath interface
313 * requires all host-generated packets to be fully checksummed (e.g. IP and TCP
314 * checksums must be correct). On such an OS, the datapath interface must fill
315 * in these checksums.
316 *
317 * Packets passed through the datapath interface must be at least 14 bytes
318 * long, that is, they must have a complete Ethernet header. They are not
319 * required to be padded to the minimum Ethernet length.
320 *
321 *
322 * Typical Usage
323 * =============
324 *
325 * Typically, the client of a datapath begins by configuring the datapath with
326 * a set of ports. Afterward, the client runs in a loop polling for upcalls to
327 * arrive.
328 *
329 * For each upcall received, the client examines the enclosed packet and
330 * figures out what should be done with it. For example, if the client
331 * implements a MAC-learning switch, then it searches the forwarding database
332 * for the packet's destination MAC and VLAN and determines the set of ports to
333 * which it should be sent. In any case, the client composes a set of datapath
334 * actions to properly dispatch the packet and then directs the datapath to
335 * execute those actions on the packet (e.g. with dpif_execute()).
336 *
337 * Most of the time, the actions that the client executed on the packet apply
338 * to every packet with the same flow. For example, the flow includes both
339 * destination MAC and VLAN ID (and much more), so this is true for the
340 * MAC-learning switch example above. In such a case, the client can also
341 * direct the datapath to treat any further packets in the flow in the same
342 * way, using dpif_flow_put() to add a new flow entry.
343 *
344 * Other tasks the client might need to perform, in addition to reacting to
345 * upcalls, include:
346 *
347 * - Periodically polling flow statistics, perhaps to supply to its own
348 * clients.
349 *
350 * - Deleting flow entries from the datapath that haven't been used
351 * recently, to save memory.
352 *
353 * - Updating flow entries whose actions should change. For example, if a
354 * MAC learning switch learns that a MAC has moved, then it must update
355 * the actions of flow entries that sent packets to the MAC at its old
356 * location.
357 *
358 * - Adding and removing ports to achieve a new configuration.
5703b15f
BP
359 *
360 *
361 * Thread-safety
362 * =============
363 *
364 * Most of the dpif functions are fully thread-safe: they may be called from
365 * any number of threads on the same or different dpif objects. The exceptions
366 * are:
367 *
368 * - dpif_port_poll() and dpif_port_poll_wait() are conditionally
369 * thread-safe: they may be called from different threads only on
370 * different dpif objects.
371 *
d2ad7ef1
JS
372 * - dpif_flow_dump_next() is conditionally thread-safe: It may be called
373 * from different threads with the same 'struct dpif_flow_dump', but all
374 * other parameters must be different for each thread.
375 *
376 * - dpif_flow_dump_done() is conditionally thread-safe: All threads that
377 * share the same 'struct dpif_flow_dump' must have finished using it.
378 * This function must then be called exactly once for a particular
379 * dpif_flow_dump to finish the corresponding flow dump operation.
380 *
381 * - Functions that operate on 'struct dpif_port_dump' are conditionally
382 * thread-safe with respect to those objects. That is, one may dump ports
383 * from any number of threads at once, but each thread must use its own
384 * struct dpif_port_dump.
ffcb9f6e 385 */
064af421
BP
386#ifndef DPIF_H
387#define DPIF_H 1
388
064af421
BP
389#include <stdbool.h>
390#include <stddef.h>
391#include <stdint.h>
758c456d 392#include "netdev.h"
da546e07 393#include "ofpbuf.h"
9dbb9d5e 394#include "openflow/openflow.h"
758c456d 395#include "packets.h"
9dbb9d5e 396#include "util.h"
064af421 397
03292c46
JG
398#ifdef __cplusplus
399extern "C" {
400#endif
401
c228a364 402struct dpif;
c97fb132 403struct ds;
572b7068 404struct flow;
cdee00fd 405struct nlattr;
d0c23a1a 406struct sset;
999401aa 407struct dpif_class;
781fce15 408struct dpif_flow;
064af421 409
999401aa
JG
410int dp_register_provider(const struct dpif_class *);
411int dp_unregister_provider(const char *type);
579a77e0 412void dp_blacklist_provider(const char *type);
d0c23a1a 413void dp_enumerate_types(struct sset *types);
f79e673f 414const char *dpif_normalize_type(const char *);
999401aa 415
d0c23a1a 416int dp_enumerate_names(const char *type, struct sset *names);
1a6f1e2a 417void dp_parse_name(const char *datapath_name, char **name, char **type);
5792c5c6 418
1a6f1e2a
JG
419int dpif_open(const char *name, const char *type, struct dpif **);
420int dpif_create(const char *name, const char *type, struct dpif **);
421int dpif_create_and_open(const char *name, const char *type, struct dpif **);
064af421
BP
422void dpif_close(struct dpif *);
423
640e1b20
BP
424void dpif_run(struct dpif *);
425void dpif_wait(struct dpif *);
426
b29ba128 427const char *dpif_name(const struct dpif *);
1a6f1e2a 428const char *dpif_base_name(const struct dpif *);
c7a26215 429const char *dpif_type(const struct dpif *);
064af421
BP
430
431int dpif_delete(struct dpif *);
432
3b68500b 433/* Statistics for a dpif as a whole. */
a8d9304d 434struct dpif_dp_stats {
a8d9304d
BP
435 uint64_t n_hit; /* Number of flow table matches. */
436 uint64_t n_missed; /* Number of flow table misses. */
437 uint64_t n_lost; /* Number of misses not sent to userspace. */
438 uint64_t n_flows; /* Number of flows present. */
847108dc
AZ
439 uint64_t n_mask_hit; /* Number of mega flow masks visited for
440 flow table matches. */
1ce3fa06 441 uint32_t n_masks; /* Number of mega flow masks. */
a8d9304d
BP
442};
443int dpif_get_dp_stats(const struct dpif *, struct dpif_dp_stats *);
444
6bc60024
BP
445\f
446/* Port operations. */
064af421 447
0aeaabc8
JP
448const char *dpif_port_open_type(const char *datapath_type,
449 const char *port_type);
4e022ec0
AW
450int dpif_port_add(struct dpif *, struct netdev *, odp_port_t *port_nop);
451int dpif_port_del(struct dpif *, odp_port_t port_no);
4c738a8d
BP
452
453/* A port within a datapath.
454 *
455 * 'name' and 'type' are suitable for passing to netdev_open(). */
456struct dpif_port {
457 char *name; /* Network device name, e.g. "eth0". */
458 char *type; /* Network device type, e.g. "system". */
4e022ec0 459 odp_port_t port_no; /* Port number within datapath. */
4c738a8d
BP
460};
461void dpif_port_clone(struct dpif_port *, const struct dpif_port *);
462void dpif_port_destroy(struct dpif_port *);
4afba28d 463bool dpif_port_exists(const struct dpif *dpif, const char *devname);
4e022ec0 464int dpif_port_query_by_number(const struct dpif *, odp_port_t port_no,
4c738a8d 465 struct dpif_port *);
064af421 466int dpif_port_query_by_name(const struct dpif *, const char *devname,
4c738a8d 467 struct dpif_port *);
4e022ec0 468int dpif_port_get_name(struct dpif *, odp_port_t port_no,
335562c0 469 char *name, size_t name_size);
1954e6bb
AW
470uint32_t dpif_port_get_pid(const struct dpif *, odp_port_t port_no,
471 uint32_t hash);
b0ec0f27
BP
472
473struct dpif_port_dump {
474 const struct dpif *dpif;
475 int error;
476 void *state;
477};
478void dpif_port_dump_start(struct dpif_port_dump *, const struct dpif *);
4c738a8d 479bool dpif_port_dump_next(struct dpif_port_dump *, struct dpif_port *);
b0ec0f27
BP
480int dpif_port_dump_done(struct dpif_port_dump *);
481
4c738a8d 482/* Iterates through each DPIF_PORT in DPIF, using DUMP as state.
b0ec0f27
BP
483 *
484 * Arguments all have pointer type.
485 *
486 * If you break out of the loop, then you need to free the dump structure by
487 * hand using dpif_port_dump_done(). */
4c738a8d 488#define DPIF_PORT_FOR_EACH(DPIF_PORT, DUMP, DPIF) \
b0ec0f27 489 for (dpif_port_dump_start(DUMP, DPIF); \
4c738a8d 490 (dpif_port_dump_next(DUMP, DPIF_PORT) \
b0ec0f27
BP
491 ? true \
492 : (dpif_port_dump_done(DUMP), false)); \
493 )
064af421 494
e9e28be3
BP
495int dpif_port_poll(const struct dpif *, char **devnamep);
496void dpif_port_poll_wait(const struct dpif *);
6bc60024
BP
497\f
498/* Flow table operations. */
e9e28be3 499
c97fb132
BP
500struct dpif_flow_stats {
501 uint64_t n_packets;
502 uint64_t n_bytes;
503 long long int used;
a66733a8 504 uint16_t tcp_flags;
c97fb132
BP
505};
506
a39edbd4 507void dpif_flow_stats_extract(const struct flow *, const struct ofpbuf *packet,
a7752d4a 508 long long int used, struct dpif_flow_stats *);
c97fb132
BP
509void dpif_flow_stats_format(const struct dpif_flow_stats *, struct ds *);
510
ba25b8f4
BP
511enum dpif_flow_put_flags {
512 DPIF_FP_CREATE = 1 << 0, /* Allow creating a new flow. */
513 DPIF_FP_MODIFY = 1 << 1, /* Allow modifying an existing flow. */
514 DPIF_FP_ZERO_STATS = 1 << 2 /* Zero the stats of an existing flow. */
515};
516
064af421 517int dpif_flow_flush(struct dpif *);
ba25b8f4 518int dpif_flow_put(struct dpif *, enum dpif_flow_put_flags,
feebdea2 519 const struct nlattr *key, size_t key_len,
e6cc0bab 520 const struct nlattr *mask, size_t mask_len,
feebdea2 521 const struct nlattr *actions, size_t actions_len,
c97fb132 522 struct dpif_flow_stats *);
feebdea2
BP
523int dpif_flow_del(struct dpif *,
524 const struct nlattr *key, size_t key_len,
c97fb132 525 struct dpif_flow_stats *);
693c4a01 526int dpif_flow_get(const struct dpif *,
feebdea2 527 const struct nlattr *key, size_t key_len,
781fce15 528 struct ofpbuf **, struct dpif_flow *);
ac64794a
BP
529\f
530/* Flow dumping interface
531 * ======================
532 *
533 * This interface allows iteration through all of the flows currently installed
534 * in a datapath. It is somewhat complicated by two requirements:
535 *
536 * - Efficient support for dumping flows in parallel from multiple threads.
537 *
538 * - Allow callers to avoid making unnecessary copies of data returned by
539 * the interface across several flows in cases where the dpif
540 * implementation has to maintain a copy of that information anyhow.
541 * (That is, allow the client visibility into any underlying batching as
542 * part of its own batching.)
543 *
544 *
545 * Usage
546 * -----
547 *
548 * 1. Call dpif_flow_dump_create().
549 * 2. In each thread that participates in the dump (which may be just a single
550 * thread if parallelism isn't important):
551 * (a) Call dpif_flow_dump_thread_create().
552 * (b) Call dpif_flow_dump_next() repeatedly until it returns 0.
553 * (c) Call dpif_flow_dump_thread_destroy().
554 * 3. Call dpif_flow_dump_destroy().
555 *
556 * All error reporting is deferred to the call to dpif_flow_dump_destroy().
557 */
558struct dpif_flow_dump *dpif_flow_dump_create(const struct dpif *);
559int dpif_flow_dump_destroy(struct dpif_flow_dump *);
704a1e09 560
ac64794a
BP
561struct dpif_flow_dump_thread *dpif_flow_dump_thread_create(
562 struct dpif_flow_dump *);
563void dpif_flow_dump_thread_destroy(struct dpif_flow_dump_thread *);
564
565/* A datapath flow as dumped by dpif_flow_dump_next(). */
566struct dpif_flow {
567 const struct nlattr *key; /* Flow key, as OVS_KEY_ATTR_* attrs. */
568 size_t key_len; /* 'key' length in bytes. */
569 const struct nlattr *mask; /* Flow mask, as OVS_KEY_ATTR_* attrs. */
570 size_t mask_len; /* 'mask' length in bytes. */
571 const struct nlattr *actions; /* Actions, as OVS_ACTION_ATTR_ */
572 size_t actions_len; /* 'actions' length in bytes. */
573 struct dpif_flow_stats stats; /* Flow statistics. */
704a1e09 574};
ac64794a
BP
575int dpif_flow_dump_next(struct dpif_flow_dump_thread *,
576 struct dpif_flow *flows, int max_flows);
6bc60024 577\f
6bc60024
BP
578/* Operation batching interface.
579 *
580 * Some datapaths are faster at performing N operations together than the same
581 * N operations individually, hence an interface for batching.
582 */
583
584enum dpif_op_type {
585 DPIF_OP_FLOW_PUT = 1,
b99d3cee
BP
586 DPIF_OP_FLOW_DEL,
587 DPIF_OP_EXECUTE,
6bc60024
BP
588};
589
590struct dpif_flow_put {
6bc60024
BP
591 /* Input. */
592 enum dpif_flow_put_flags flags; /* DPIF_FP_*. */
593 const struct nlattr *key; /* Flow to put. */
594 size_t key_len; /* Length of 'key' in bytes. */
e6cc0bab
AZ
595 const struct nlattr *mask; /* Mask to put. */
596 size_t mask_len; /* Length of 'mask' in bytes. */
6bc60024
BP
597 const struct nlattr *actions; /* Actions to perform on flow. */
598 size_t actions_len; /* Length of 'actions' in bytes. */
599
600 /* Output. */
601 struct dpif_flow_stats *stats; /* Optional flow statistics. */
6bc60024
BP
602};
603
b99d3cee
BP
604struct dpif_flow_del {
605 /* Input. */
606 const struct nlattr *key; /* Flow to delete. */
607 size_t key_len; /* Length of 'key' in bytes. */
608
609 /* Output. */
610 struct dpif_flow_stats *stats; /* Optional flow statistics. */
611};
612
6bc60024 613struct dpif_execute {
7fd91025 614 /* Raw support for execute passed along to the provider. */
6bc60024
BP
615 const struct nlattr *actions; /* Actions to execute on packet. */
616 size_t actions_len; /* Length of 'actions' in bytes. */
da546e07 617 struct ofpbuf *packet; /* Packet to execute. */
758c456d 618 struct pkt_metadata md; /* Packet metadata. */
7fd91025
BP
619
620 /* Some dpif providers do not implement every action. The Linux kernel
621 * datapath, in particular, does not implement ARP field modification.
622 *
623 * If this member is set to true, the dpif layer executes in userspace all
624 * of the actions that it can, and for OVS_ACTION_ATTR_OUTPUT and
625 * OVS_ACTION_ATTR_USERSPACE actions it passes the packet through to the
626 * dpif implementation. */
627 bool needs_help;
6bc60024
BP
628};
629
758c456d
JR
630int dpif_execute(struct dpif *, struct dpif_execute *);
631
c2b565b5 632struct dpif_op {
6bc60024 633 enum dpif_op_type type;
c2b565b5
BP
634 int error;
635 union {
636 struct dpif_flow_put flow_put;
b99d3cee 637 struct dpif_flow_del flow_del;
c2b565b5
BP
638 struct dpif_execute execute;
639 } u;
6bc60024
BP
640};
641
c2b565b5 642void dpif_operate(struct dpif *, struct dpif_op **ops, size_t n_ops);
6bc60024
BP
643\f
644/* Upcalls. */
064af421 645
82272ede
BP
646enum dpif_upcall_type {
647 DPIF_UC_MISS, /* Miss in flow table. */
df2c07f4 648 DPIF_UC_ACTION, /* OVS_ACTION_ATTR_USERSPACE action. */
982b8810 649 DPIF_N_UC_TYPES
82272ede
BP
650};
651
01545c1a
BP
652const char *dpif_upcall_type_to_string(enum dpif_upcall_type);
653
856081f6
BP
654/* A packet passed up from the datapath to userspace.
655 *
da546e07
JR
656 * The 'packet', 'key' and 'userdata' may point into data in a buffer
657 * provided by the caller, so the buffer should be released only after the
658 * upcall processing has been finished.
659 *
660 * While being processed, the 'packet' may be reallocated, so the packet must
661 * be separately released with ofpbuf_uninit().
856081f6
BP
662 */
663struct dpif_upcall {
856081f6 664 /* All types. */
82272ede 665 enum dpif_upcall_type type;
da546e07 666 struct ofpbuf packet; /* Packet data. */
856081f6
BP
667 struct nlattr *key; /* Flow key. */
668 size_t key_len; /* Length of 'key' in bytes. */
669
82272ede 670 /* DPIF_UC_ACTION only. */
e995e3df 671 struct nlattr *userdata; /* Argument to OVS_ACTION_ATTR_USERSPACE. */
856081f6 672};
9dbb9d5e 673
6b31e073
RW
674typedef void exec_upcall_cb(struct dpif *, struct dpif_upcall *,
675 struct ofpbuf *, int cnt);
676
a12b3ead 677int dpif_recv_set(struct dpif *, bool enable);
1954e6bb
AW
678int dpif_handlers_set(struct dpif *, uint32_t n_handlers);
679int dpif_recv(struct dpif *, uint32_t handler_id, struct dpif_upcall *,
680 struct ofpbuf *);
1ba530f4 681void dpif_recv_purge(struct dpif *);
1954e6bb 682void dpif_recv_wait(struct dpif *, uint32_t handler_id);
6b31e073
RW
683void dpif_register_upcall_cb(struct dpif *, exec_upcall_cb *);
684void dpif_enable_upcall(struct dpif *);
685void dpif_disable_upcall(struct dpif *);
686
687void dpif_print_packet(struct dpif *, struct dpif_upcall *);
6bc60024
BP
688\f
689/* Miscellaneous. */
064af421 690
53a4218d
BP
691void dpif_get_netflow_ids(const struct dpif *,
692 uint8_t *engine_type, uint8_t *engine_id);
064af421 693
aae51f53
BP
694int dpif_queue_to_priority(const struct dpif *, uint32_t queue_id,
695 uint32_t *priority);
696
03292c46
JG
697#ifdef __cplusplus
698}
699#endif
700
064af421 701#endif /* dpif.h */