]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* |
6409e008 | 2 | * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc. |
a14bc59f BP |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
064af421 BP |
15 | */ |
16 | ||
17 | #ifndef OPENFLOW_NICIRA_EXT_H | |
18 | #define OPENFLOW_NICIRA_EXT_H 1 | |
19 | ||
1825f2ec TG |
20 | #include <openflow/openflow.h> |
21 | #include <openvswitch/types.h> | |
064af421 | 22 | |
e0edde6f | 23 | /* The following vendor extensions, proposed by Nicira, are not yet |
26c112c2 BP |
24 | * standardized, so they are not included in openflow.h. Some of them may be |
25 | * suitable for standardization; others we never expect to standardize. */ | |
064af421 | 26 | |
26c112c2 BP |
27 | \f |
28 | /* Nicira vendor-specific error messages extension. | |
29 | * | |
30 | * OpenFlow 1.0 has a set of predefined error types (OFPET_*) and codes (which | |
31 | * are specific to each type). It does not have any provision for | |
32 | * vendor-specific error codes, and it does not even provide "generic" error | |
33 | * codes that can apply to problems not anticipated by the OpenFlow | |
34 | * specification authors. | |
35 | * | |
36 | * This extension attempts to address the problem by adding a generic "error | |
37 | * vendor extension". The extension works as follows: use NXET_VENDOR as type | |
6eb59a8f | 38 | * and NXVC_VENDOR_ERROR as code, followed by struct nx_vendor_error with |
26c112c2 BP |
39 | * vendor-specific details, followed by at least 64 bytes of the failed |
40 | * request. | |
41 | * | |
a23aab1f | 42 | * It would be better to have a type-specific vendor extension, e.g. so that |
26c112c2 BP |
43 | * OFPET_BAD_ACTION could be used with vendor-specific code values. But |
44 | * OFPET_BAD_ACTION and most other standardized types already specify that | |
45 | * their 'data' values are (the start of) the OpenFlow message being replied | |
46 | * to, so there is no room to insert a vendor ID. | |
47 | * | |
48 | * Currently this extension is only implemented by Open vSwitch, but it seems | |
49 | * like a reasonable candidate for future standardization. | |
50 | */ | |
51 | ||
52 | /* This is a random number to avoid accidental collision with any other | |
53 | * vendor's extension. */ | |
54 | #define NXET_VENDOR 0xb0c2 | |
55 | ||
56 | /* ofp_error msg 'code' values for NXET_VENDOR. */ | |
57 | enum nx_vendor_code { | |
58 | NXVC_VENDOR_ERROR /* 'data' contains struct nx_vendor_error. */ | |
59 | }; | |
60 | ||
61 | /* 'data' for 'type' == NXET_VENDOR, 'code' == NXVC_VENDOR_ERROR. */ | |
62 | struct nx_vendor_error { | |
63 | ovs_be32 vendor; /* Vendor ID as in struct ofp_vendor_header. */ | |
64 | ovs_be16 type; /* Vendor-defined type. */ | |
65 | ovs_be16 code; /* Vendor-defined subtype. */ | |
66 | /* Followed by at least the first 64 bytes of the failed request. */ | |
67 | }; | |
68 | \f | |
69 | /* Nicira vendor requests and replies. */ | |
064af421 | 70 | |
520e9a2a EJ |
71 | /* Fields to use when hashing flows. */ |
72 | enum nx_hash_fields { | |
73 | /* Ethernet source address (NXM_OF_ETH_SRC) only. */ | |
74 | NX_HASH_FIELDS_ETH_SRC, | |
75 | ||
76 | /* L2 through L4, symmetric across src/dst. Specifically, each of the | |
77 | * following fields, if present, is hashed (slashes separate symmetric | |
78 | * pairs): | |
79 | * | |
80 | * - NXM_OF_ETH_DST / NXM_OF_ETH_SRC | |
81 | * - NXM_OF_ETH_TYPE | |
82 | * - The VID bits from NXM_OF_VLAN_TCI, ignoring PCP and CFI. | |
83 | * - NXM_OF_IP_PROTO | |
84 | * - NXM_OF_IP_SRC / NXM_OF_IP_DST | |
85 | * - NXM_OF_TCP_SRC / NXM_OF_TCP_DST | |
86 | */ | |
4249b547 JB |
87 | NX_HASH_FIELDS_SYMMETRIC_L4, |
88 | ||
89 | /* L3+L4 only, including the following fields: | |
90 | * | |
91 | * - NXM_OF_IP_PROTO | |
92 | * - NXM_OF_IP_SRC / NXM_OF_IP_DST | |
93 | * - NXM_OF_SCTP_SRC / NXM_OF_SCTP_DST | |
94 | * - NXM_OF_TCP_SRC / NXM_OF_TCP_DST | |
95 | */ | |
96 | NX_HASH_FIELDS_SYMMETRIC_L3L4, | |
97 | ||
98 | /* L3+L4 only with UDP ports, including the following fields: | |
99 | * | |
100 | * - NXM_OF_IP_PROTO | |
101 | * - NXM_OF_IP_SRC / NXM_OF_IP_DST | |
102 | * - NXM_OF_SCTP_SRC / NXM_OF_SCTP_DST | |
103 | * - NXM_OF_TCP_SRC / NXM_OF_TCP_DST | |
104 | * - NXM_OF_UDP_SRC / NXM_OF_UDP_DST | |
105 | */ | |
106 | NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP | |
107 | ||
108 | ||
520e9a2a EJ |
109 | }; |
110 | ||
6c1491fb BP |
111 | /* This command enables or disables an Open vSwitch extension that allows a |
112 | * controller to specify the OpenFlow table to which a flow should be added, | |
113 | * instead of having the switch decide which table is most appropriate as | |
63f7ef64 BP |
114 | * required by OpenFlow 1.0. Because NXM was designed as an extension to |
115 | * OpenFlow 1.0, the extension applies equally to ofp10_flow_mod and | |
116 | * nx_flow_mod. By default, the extension is disabled. | |
6c1491fb | 117 | * |
35805806 | 118 | * When this feature is enabled, Open vSwitch treats struct ofp10_flow_mod's |
63f7ef64 BP |
119 | * and struct nx_flow_mod's 16-bit 'command' member as two separate fields. |
120 | * The upper 8 bits are used as the table ID, the lower 8 bits specify the | |
121 | * command as usual. A table ID of 0xff is treated like a wildcarded table ID. | |
6c1491fb BP |
122 | * |
123 | * The specific treatment of the table ID depends on the type of flow mod: | |
124 | * | |
125 | * - OFPFC_ADD: Given a specific table ID, the flow is always placed in that | |
126 | * table. If an identical flow already exists in that table only, then it | |
127 | * is replaced. If the flow cannot be placed in the specified table, | |
128 | * either because the table is full or because the table cannot support | |
be2b69d1 BP |
129 | * flows of the given type, the switch replies with an OFPFMFC_TABLE_FULL |
130 | * error. (A controller can distinguish these cases by comparing the | |
131 | * current and maximum number of entries reported in ofp_table_stats.) | |
6c1491fb BP |
132 | * |
133 | * If the table ID is wildcarded, the switch picks an appropriate table | |
134 | * itself. If an identical flow already exist in the selected flow table, | |
135 | * then it is replaced. The choice of table might depend on the flows | |
136 | * that are already in the switch; for example, if one table fills up then | |
137 | * the switch might fall back to another one. | |
138 | * | |
139 | * - OFPFC_MODIFY, OFPFC_DELETE: Given a specific table ID, only flows | |
140 | * within that table are matched and modified or deleted. If the table ID | |
141 | * is wildcarded, flows within any table may be matched and modified or | |
142 | * deleted. | |
143 | * | |
144 | * - OFPFC_MODIFY_STRICT, OFPFC_DELETE_STRICT: Given a specific table ID, | |
145 | * only a flow within that table may be matched and modified or deleted. | |
146 | * If the table ID is wildcarded and exactly one flow within any table | |
147 | * matches, then it is modified or deleted; if flows in more than one | |
148 | * table match, then none is modified or deleted. | |
149 | */ | |
73dbf4ab | 150 | struct nx_flow_mod_table_id { |
6c1491fb BP |
151 | uint8_t set; /* Nonzero to enable, zero to disable. */ |
152 | uint8_t pad[7]; | |
153 | }; | |
982697a4 | 154 | OFP_ASSERT(sizeof(struct nx_flow_mod_table_id) == 8); |
6c1491fb | 155 | |
54834960 | 156 | enum nx_packet_in_format { |
6409e008 BP |
157 | NXPIF_STANDARD = 0, /* OFPT_PACKET_IN for this OpenFlow version. */ |
158 | NXPIF_NXT_PACKET_IN = 1, /* NXT_PACKET_IN (since OVS v1.1). */ | |
159 | NXPIF_NXT_PACKET_IN2 = 2, /* NXT_PACKET_IN2 (since OVS v2.6). */ | |
54834960 EJ |
160 | }; |
161 | ||
6409e008 BP |
162 | /* NXT_SET_PACKET_IN_FORMAT request. |
163 | * | |
164 | * For any given OpenFlow version, Open vSwitch supports multiple formats for | |
165 | * "packet-in" messages. The default is always the standard format for the | |
166 | * OpenFlow version in question, but NXT_SET_PACKET_IN_FORMAT can be used to | |
167 | * set an alternative format. | |
168 | * | |
169 | * From OVS v1.1 to OVS v2.5, this request was only honored for OpenFlow 1.0. | |
170 | * Requests to set format NXPIF_NXT_PACKET_IN were accepted for OF1.1+ but they | |
171 | * had no effect. (Requests to set formats other than NXPIF_STANDARD or | |
172 | * NXPIF_NXT_PACKET_IN were rejected with OFPBRC_EPERM.) | |
173 | * | |
174 | * From OVS v2.6 onward, this request is honored for all OpenFlow versions. | |
175 | */ | |
73dbf4ab | 176 | struct nx_set_packet_in_format { |
54834960 EJ |
177 | ovs_be32 format; /* One of NXPIF_*. */ |
178 | }; | |
982697a4 | 179 | OFP_ASSERT(sizeof(struct nx_set_packet_in_format) == 4); |
54834960 EJ |
180 | |
181 | /* NXT_PACKET_IN (analogous to OFPT_PACKET_IN). | |
182 | * | |
42edbe39 BP |
183 | * NXT_PACKET_IN is similar to the OpenFlow 1.2 OFPT_PACKET_IN. The |
184 | * differences are: | |
185 | * | |
186 | * - NXT_PACKET_IN includes the cookie of the rule that triggered the | |
187 | * message. (OpenFlow 1.3 OFPT_PACKET_IN also includes the cookie.) | |
188 | * | |
189 | * - The metadata fields use NXM (instead of OXM) field numbers. | |
190 | * | |
191 | * Open vSwitch 1.9.0 and later omits metadata fields that are zero (as allowed | |
192 | * by OpenFlow 1.2). Earlier versions included all implemented metadata | |
193 | * fields. | |
194 | * | |
195 | * Open vSwitch does not include non-metadata in the nx_match, because by | |
196 | * definition that information can be found in the packet itself. The format | |
197 | * and the standards allow this, however, so controllers should be prepared to | |
198 | * tolerate future changes. | |
199 | * | |
200 | * The NXM format is convenient for reporting metadata values, but it is | |
201 | * important not to interpret the format as matching against a flow, because it | |
202 | * does not. Nothing is being matched; arbitrary metadata masks would not be | |
203 | * meaningful. | |
54834960 EJ |
204 | * |
205 | * Whereas in most cases a controller can expect to only get back NXM fields | |
206 | * that it set up itself (e.g. flow dumps will ordinarily report only NXM | |
207 | * fields from flows that the controller added), NXT_PACKET_IN messages might | |
208 | * contain fields that the controller does not understand, because the switch | |
209 | * might support fields (new registers, new protocols, etc.) that the | |
42edbe39 | 210 | * controller does not. The controller must prepared to tolerate these. |
54834960 | 211 | * |
d4fa4e79 BP |
212 | * The 'cookie' field has no meaning when 'reason' is OFPR_NO_MATCH. In this |
213 | * case it should be UINT64_MAX. */ | |
73dbf4ab | 214 | struct nx_packet_in { |
54834960 EJ |
215 | ovs_be32 buffer_id; /* ID assigned by datapath. */ |
216 | ovs_be16 total_len; /* Full length of frame. */ | |
217 | uint8_t reason; /* Reason packet is sent (one of OFPR_*). */ | |
218 | uint8_t table_id; /* ID of the table that was looked up. */ | |
219 | ovs_be64 cookie; /* Cookie of the rule that was looked up. */ | |
220 | ovs_be16 match_len; /* Size of nx_match. */ | |
221 | uint8_t pad[6]; /* Align to 64-bits. */ | |
222 | /* Followed by: | |
223 | * - Exactly match_len (possibly 0) bytes containing the nx_match, then | |
224 | * - Exactly (match_len + 7)/8*8 - match_len (between 0 and 7) bytes of | |
225 | * all-zero bytes, then | |
226 | * - Exactly 2 all-zero padding bytes, then | |
227 | * - An Ethernet frame whose length is inferred from nxh.header.length. | |
228 | * | |
229 | * The padding bytes preceding the Ethernet frame ensure that the IP | |
230 | * header (if any) following the Ethernet header is 32-bit aligned. */ | |
231 | ||
42edbe39 | 232 | /* uint8_t nxm_fields[...]; */ /* NXM headers. */ |
54834960 EJ |
233 | /* uint8_t pad[2]; */ /* Align to 64 bit + 16 bit. */ |
234 | /* uint8_t data[0]; */ /* Ethernet frame. */ | |
235 | }; | |
982697a4 | 236 | OFP_ASSERT(sizeof(struct nx_packet_in) == 24); |
54834960 | 237 | |
77ab5fd2 BP |
238 | /* NXT_PACKET_IN2 |
239 | * ============== | |
6409e008 BP |
240 | * |
241 | * NXT_PACKET_IN2 is conceptually similar to OFPT_PACKET_IN but it is expressed | |
242 | * as an extensible set of properties instead of using a fixed structure. | |
243 | * | |
77ab5fd2 BP |
244 | * Added in Open vSwitch 2.6 |
245 | * | |
246 | * | |
247 | * Continuations | |
248 | * ------------- | |
249 | * | |
250 | * When a "controller" action specifies the "pause" flag, the controller action | |
251 | * freezes the packet's trip through Open vSwitch flow tables and serializes | |
252 | * that state into the packet-in message as a "continuation". The controller | |
253 | * can later send the continuation back to the switch, which will restart the | |
254 | * packet's traversal from the point where it was interrupted. This permits an | |
255 | * OpenFlow controller to interpose on a packet midway through processing in | |
256 | * Open vSwitch. | |
257 | * | |
258 | * Continuations fit into packet processing this way: | |
259 | * | |
260 | * 1. A packet ingresses into Open vSwitch, which runs it through the OpenFlow | |
261 | * tables. | |
262 | * | |
263 | * 2. An OpenFlow flow executes a "controller" action that includes the "pause" | |
264 | * flag. Open vSwitch serializes the packet processing state and sends it, | |
265 | * as an NXT_PACKET_IN2 that includes an additional NXPINT_CONTINUATION | |
266 | * property (the continuation), to the OpenFlow controller. | |
267 | * | |
268 | * (The controller must use NXAST_CONTROLLER2 to generate the packet-in, | |
269 | * because only this form of the "controller" action has a "pause" flag. | |
270 | * Similarly, the controller must use NXT_SET_PACKET_IN_FORMAT to select | |
271 | * NXT_PACKET_IN2 as the packet-in format, because this is the only format | |
272 | * that supports continuation passing.) | |
273 | * | |
274 | * 3. The controller receives the NXT_PACKET_IN2 and processes it. The | |
275 | * controller can interpret and, if desired, modify some of the contents of | |
276 | * the packet-in, such as the packet and the metadata being processed. | |
277 | * | |
278 | * 4. The controller sends the continuation back to the switch, using an | |
279 | * NXT_RESUME message. Packet processing resumes where it left off. | |
280 | * | |
281 | * The controller might change the pipeline configuration concurrently with | |
282 | * steps 2 through 4. For example, it might add or remove OpenFlow flows. If | |
283 | * that happens, then the packet will experience a mix of processing from the | |
284 | * two configurations, that is, the initial processing (before | |
285 | * NXAST_CONTROLLER2) uses the initial flow table, and the later processing | |
286 | * (after NXT_RESUME) uses the later flow table. This means that the | |
287 | * controller needs to take care to avoid incompatible pipeline changes while | |
288 | * processing continuations. | |
289 | * | |
290 | * External side effects (e.g. "output") of OpenFlow actions processed before | |
291 | * NXAST_CONTROLLER2 is encountered might be executed during step 2 or step 4, | |
292 | * and the details may vary among Open vSwitch features and versions. Thus, a | |
293 | * controller that wants to make sure that side effects are executed must pass | |
294 | * the continuation back to the switch, that is, must not skip step 4. | |
295 | * | |
296 | * Architecturally, continuations may be "stateful" or "stateless", that is, | |
297 | * they may or may not refer to buffered state maintained in Open vSwitch. | |
298 | * This means that a controller should not attempt to resume a given | |
299 | * continuations more than once (because the switch might have discarded the | |
300 | * buffered state after the first use). For the same reason, continuations | |
301 | * might become "stale" if the controller takes too long to resume them | |
302 | * (because the switch might have discarded old buffered state). Taken | |
303 | * together with the previous note, this means that a controller should resume | |
304 | * each continuation exactly once (and promptly). | |
305 | * | |
306 | * Without the information in NXPINT_CONTINUATION, the controller can (with | |
307 | * careful design, and help from the flow cookie) determine where the packet is | |
308 | * in the pipeline, but in the general case it can't determine what nested | |
309 | * "resubmit"s that may be in progress, or what data is on the stack maintained | |
310 | * by NXAST_STACK_PUSH and NXAST_STACK_POP actions, what is in the OpenFlow | |
311 | * action set, etc. | |
312 | * | |
313 | * Continuations are expensive because they require a round trip between the | |
314 | * switch and the controller. Thus, they should not be used to implement | |
315 | * processing that needs to happen at "line rate". | |
316 | * | |
317 | * The contents of NXPINT_CONTINUATION are private to the switch, may change | |
318 | * unpredictably from one version of Open vSwitch to another, and are not | |
319 | * documented here. The contents are also tied to a given Open vSwitch process | |
320 | * and bridge, so that restarting Open vSwitch or deleting and recreating a | |
321 | * bridge will cause the corresponding NXT_RESUME to be rejected. | |
322 | * | |
323 | * In the current implementation, Open vSwitch forks the packet processing | |
324 | * pipeline across patch ports. Suppose, for example, that the pipeline for | |
325 | * br0 outputs to a patch port whose peer belongs to br1, and that the pipeline | |
326 | * for br1 executes a controller action with the "pause" flag. This only | |
327 | * pauses processing within br1, and processing in br0 continues and possibly | |
328 | * completes with visible side effects, such as outputting to ports, before | |
329 | * br1's controller receives or processes the continuation. This | |
330 | * implementation maintains the independence of separate bridges and, since | |
331 | * processing in br1 cannot affect the behavior of br0 anyway, should not cause | |
332 | * visible behavioral changes. | |
333 | * | |
334 | * A stateless implementation of continuations may ignore the "controller" | |
335 | * action max_len, always sending the whole packet, because the full packet is | |
336 | * required to continue traversal. | |
337 | */ | |
6409e008 BP |
338 | enum nx_packet_in2_prop_type { |
339 | /* Packet. */ | |
340 | NXPINT_PACKET, /* Raw packet data. */ | |
341 | NXPINT_FULL_LEN, /* ovs_be32: Full packet len, if truncated. */ | |
342 | NXPINT_BUFFER_ID, /* ovs_be32: Buffer ID, if buffered. */ | |
343 | ||
344 | /* Information about the flow that triggered the packet-in. */ | |
345 | NXPINT_TABLE_ID, /* uint8_t: Table ID. */ | |
346 | NXPINT_COOKIE, /* ovs_be64: Flow cookie. */ | |
347 | ||
348 | /* Other. */ | |
349 | NXPINT_REASON, /* uint8_t, one of OFPR_*. */ | |
350 | NXPINT_METADATA, /* NXM or OXM for metadata fields. */ | |
bdcad671 | 351 | NXPINT_USERDATA, /* From NXAST_CONTROLLER2 userdata. */ |
77ab5fd2 | 352 | NXPINT_CONTINUATION, /* Private data for continuing processing. */ |
6409e008 BP |
353 | }; |
354 | ||
9deba63b BP |
355 | /* Configures the "role" of the sending controller. The default role is: |
356 | * | |
357 | * - Other (NX_ROLE_OTHER), which allows the controller access to all | |
358 | * OpenFlow features. | |
359 | * | |
360 | * The other possible roles are a related pair: | |
361 | * | |
362 | * - Master (NX_ROLE_MASTER) is equivalent to Other, except that there may | |
363 | * be at most one Master controller at a time: when a controller | |
364 | * configures itself as Master, any existing Master is demoted to the | |
365 | * Slave role. | |
366 | * | |
367 | * - Slave (NX_ROLE_SLAVE) allows the controller read-only access to | |
368 | * OpenFlow features. In particular attempts to modify the flow table | |
369 | * will be rejected with an OFPBRC_EPERM error. | |
370 | * | |
197a992f BP |
371 | * Slave controllers do not receive OFPT_PACKET_IN or OFPT_FLOW_REMOVED |
372 | * messages, but they do receive OFPT_PORT_STATUS messages. | |
9deba63b BP |
373 | */ |
374 | struct nx_role_request { | |
459749fe | 375 | ovs_be32 role; /* One of NX_ROLE_*. */ |
9deba63b | 376 | }; |
982697a4 | 377 | OFP_ASSERT(sizeof(struct nx_role_request) == 4); |
9deba63b BP |
378 | |
379 | enum nx_role { | |
380 | NX_ROLE_OTHER, /* Default role, full access. */ | |
381 | NX_ROLE_MASTER, /* Full access, at most one. */ | |
382 | NX_ROLE_SLAVE /* Read-only access. */ | |
383 | }; | |
80d5aefd BP |
384 | |
385 | /* NXT_SET_ASYNC_CONFIG. | |
386 | * | |
387 | * Sent by a controller, this message configures the asynchronous messages that | |
388 | * the controller wants to receive. Element 0 in each array specifies messages | |
389 | * of interest when the controller has an "other" or "master" role; element 1, | |
390 | * when the controller has a "slave" role. | |
391 | * | |
392 | * Each array element is a bitmask in which a 0-bit disables receiving a | |
393 | * particular message and a 1-bit enables receiving it. Each bit controls the | |
394 | * message whose 'reason' corresponds to the bit index. For example, the bit | |
395 | * with value 1<<2 == 4 in port_status_mask[1] determines whether the | |
396 | * controller will receive OFPT_PORT_STATUS messages with reason OFPPR_MODIFY | |
397 | * (value 2) when the controller has a "slave" role. | |
4550b647 MM |
398 | * |
399 | * As a side effect, for service controllers, this message changes the | |
400 | * miss_send_len from default of zero to OFP_DEFAULT_MISS_SEND_LEN (128). | |
80d5aefd BP |
401 | */ |
402 | struct nx_async_config { | |
80d5aefd BP |
403 | ovs_be32 packet_in_mask[2]; /* Bitmasks of OFPR_* values. */ |
404 | ovs_be32 port_status_mask[2]; /* Bitmasks of OFPRR_* values. */ | |
405 | ovs_be32 flow_removed_mask[2]; /* Bitmasks of OFPPR_* values. */ | |
406 | }; | |
982697a4 | 407 | OFP_ASSERT(sizeof(struct nx_async_config) == 24); |
26c112c2 | 408 | \f |
09246b99 BP |
409 | /* Flexible flow specifications (aka NXM = Nicira Extended Match). |
410 | * | |
eec25dc1 | 411 | * OpenFlow 1.0 has "struct ofp10_match" for specifying flow matches. This |
09246b99 BP |
412 | * structure is fixed-length and hence difficult to extend. This section |
413 | * describes a more flexible, variable-length flow match, called "nx_match" for | |
414 | * short, that is also supported by Open vSwitch. This section also defines a | |
eec25dc1 | 415 | * replacement for each OpenFlow message that includes struct ofp10_match. |
09246b99 | 416 | * |
16c35c1d YT |
417 | * OpenFlow 1.2+ introduced OpenFlow Extensible Match (OXM), adapting |
418 | * the design of NXM. The format of NXM and OXM are compatible. | |
419 | * | |
09246b99 BP |
420 | * |
421 | * Format | |
422 | * ====== | |
423 | * | |
424 | * An nx_match is a sequence of zero or more "nxm_entry"s, which are | |
425 | * type-length-value (TLV) entries, each 5 to 259 (inclusive) bytes long. | |
426 | * "nxm_entry"s are not aligned on or padded to any multibyte boundary. The | |
427 | * first 4 bytes of an nxm_entry are its "header", followed by the entry's | |
428 | * "body". | |
429 | * | |
430 | * An nxm_entry's header is interpreted as a 32-bit word in network byte order: | |
431 | * | |
432 | * |<-------------------- nxm_type ------------------>| | |
433 | * | | | |
434 | * |31 16 15 9| 8 7 0 | |
435 | * +----------------------------------+---------------+--+------------------+ | |
436 | * | nxm_vendor | nxm_field |hm| nxm_length | | |
437 | * +----------------------------------+---------------+--+------------------+ | |
438 | * | |
439 | * The most-significant 23 bits of the header are collectively "nxm_type". | |
16c35c1d YT |
440 | * Bits 16...31 are "nxm_vendor", one of OFPXMC12_* values. In case of |
441 | * NXM, it's either OFPXMC12_NXM_0 or OFPXMC12_NXM_1. | |
442 | * Bits 9...15 are "nxm_field", which is a vendor-specific value. nxm_type | |
443 | * normally designates a protocol header, such as the Ethernet type, but it | |
444 | * can also refer to packet metadata, such as the switch port on which a packet | |
445 | * arrived. | |
09246b99 BP |
446 | * |
447 | * Bit 8 is "nxm_hasmask" (labeled "hm" above for space reasons). The meaning | |
448 | * of this bit is explained later. | |
449 | * | |
450 | * The least-significant 8 bits are "nxm_length", a positive integer. The | |
451 | * length of the nxm_entry, including the header, is exactly 4 + nxm_length | |
452 | * bytes. | |
453 | * | |
454 | * For a given nxm_vendor, nxm_field, and nxm_hasmask value, nxm_length is a | |
455 | * constant. It is included only to allow software to minimally parse | |
456 | * "nxm_entry"s of unknown types. (Similarly, for a given nxm_vendor, | |
457 | * nxm_field, and nxm_length, nxm_hasmask is a constant.) | |
458 | * | |
459 | * | |
460 | * Semantics | |
461 | * ========= | |
462 | * | |
463 | * A zero-length nx_match (one with no "nxm_entry"s) matches every packet. | |
464 | * | |
465 | * An nxm_entry places a constraint on the packets matched by the nx_match: | |
466 | * | |
467 | * - If nxm_hasmask is 0, the nxm_entry's body contains a value for the | |
468 | * field, called "nxm_value". The nx_match matches only packets in which | |
469 | * the field equals nxm_value. | |
470 | * | |
471 | * - If nxm_hasmask is 1, then the nxm_entry's body contains a value for the | |
472 | * field (nxm_value), followed by a bitmask of the same length as the | |
473 | * value, called "nxm_mask". For each 1-bit in position J in nxm_mask, the | |
474 | * nx_match matches only packets for which bit J in the given field's value | |
475 | * matches bit J in nxm_value. A 0-bit in nxm_mask causes the | |
e1cfc4e4 BP |
476 | * corresponding bit in nxm_value is ignored (it should be 0; Open vSwitch |
477 | * may enforce this someday), as is the corresponding bit in the field's | |
478 | * value. (The sense of the nxm_mask bits is the opposite of that used by | |
479 | * the "wildcards" member of struct ofp10_match.) | |
09246b99 BP |
480 | * |
481 | * When nxm_hasmask is 1, nxm_length is always even. | |
482 | * | |
483 | * An all-zero-bits nxm_mask is equivalent to omitting the nxm_entry | |
484 | * entirely. An all-one-bits nxm_mask is equivalent to specifying 0 for | |
485 | * nxm_hasmask. | |
486 | * | |
487 | * When there are multiple "nxm_entry"s, all of the constraints must be met. | |
488 | * | |
489 | * | |
490 | * Mask Restrictions | |
491 | * ================= | |
492 | * | |
493 | * Masks may be restricted: | |
494 | * | |
495 | * - Some nxm_types may not support masked wildcards, that is, nxm_hasmask | |
496 | * must always be 0 when these fields are specified. For example, the | |
497 | * field that identifies the port on which a packet was received may not be | |
498 | * masked. | |
499 | * | |
500 | * - Some nxm_types that do support masked wildcards may only support certain | |
501 | * nxm_mask patterns. For example, fields that have IPv4 address values | |
502 | * may be restricted to CIDR masks. | |
503 | * | |
504 | * These restrictions should be noted in specifications for individual fields. | |
505 | * A switch may accept an nxm_hasmask or nxm_mask value that the specification | |
506 | * disallows, if the switch correctly implements support for that nxm_hasmask | |
507 | * or nxm_mask value. A switch must reject an attempt to set up a flow that | |
508 | * contains a nxm_hasmask or nxm_mask value that it does not support. | |
509 | * | |
510 | * | |
511 | * Prerequisite Restrictions | |
512 | * ========================= | |
513 | * | |
514 | * The presence of an nxm_entry with a given nxm_type may be restricted based | |
515 | * on the presence of or values of other "nxm_entry"s. For example: | |
516 | * | |
517 | * - An nxm_entry for nxm_type=NXM_OF_IP_TOS is allowed only if it is | |
518 | * preceded by another entry with nxm_type=NXM_OF_ETH_TYPE, nxm_hasmask=0, | |
519 | * and nxm_value=0x0800. That is, matching on the IP source address is | |
520 | * allowed only if the Ethernet type is explicitly set to IP. | |
521 | * | |
e51df1a0 BP |
522 | * - An nxm_entry for nxm_type=NXM_OF_TCP_SRC is allowed only if it is |
523 | * preceded by an entry with nxm_type=NXM_OF_ETH_TYPE, nxm_hasmask=0, and | |
524 | * nxm_value either 0x0800 or 0x86dd, and another with | |
525 | * nxm_type=NXM_OF_IP_PROTO, nxm_hasmask=0, nxm_value=6, in that order. | |
526 | * That is, matching on the TCP source port is allowed only if the Ethernet | |
527 | * type is IP or IPv6 and the IP protocol is TCP. | |
09246b99 BP |
528 | * |
529 | * These restrictions should be noted in specifications for individual fields. | |
530 | * A switch may implement relaxed versions of these restrictions. A switch | |
531 | * must reject an attempt to set up a flow that violates its restrictions. | |
532 | * | |
533 | * | |
534 | * Ordering Restrictions | |
535 | * ===================== | |
536 | * | |
537 | * An nxm_entry that has prerequisite restrictions must appear after the | |
538 | * "nxm_entry"s for its prerequisites. Ordering of "nxm_entry"s within an | |
539 | * nx_match is not otherwise constrained. | |
540 | * | |
541 | * Any given nxm_type may appear in an nx_match at most once. | |
542 | * | |
543 | * | |
544 | * nxm_entry Examples | |
545 | * ================== | |
546 | * | |
547 | * These examples show the format of a single nxm_entry with particular | |
548 | * nxm_hasmask and nxm_length values. The diagrams are labeled with field | |
549 | * numbers and byte indexes. | |
550 | * | |
551 | * | |
be86ea7a | 552 | * 8-bit nxm_value, nxm_hasmask=1, nxm_length=2: |
09246b99 BP |
553 | * |
554 | * 0 3 4 5 | |
555 | * +------------+---+---+ | |
556 | * | header | v | m | | |
557 | * +------------+---+---+ | |
558 | * | |
559 | * | |
560 | * 16-bit nxm_value, nxm_hasmask=0, nxm_length=2: | |
561 | * | |
562 | * 0 3 4 5 | |
563 | * +------------+------+ | |
564 | * | header | value| | |
565 | * +------------+------+ | |
566 | * | |
567 | * | |
568 | * 32-bit nxm_value, nxm_hasmask=0, nxm_length=4: | |
569 | * | |
570 | * 0 3 4 7 | |
571 | * +------------+-------------+ | |
572 | * | header | nxm_value | | |
573 | * +------------+-------------+ | |
574 | * | |
575 | * | |
576 | * 48-bit nxm_value, nxm_hasmask=0, nxm_length=6: | |
577 | * | |
578 | * 0 3 4 9 | |
579 | * +------------+------------------+ | |
580 | * | header | nxm_value | | |
581 | * +------------+------------------+ | |
582 | * | |
583 | * | |
584 | * 48-bit nxm_value, nxm_hasmask=1, nxm_length=12: | |
585 | * | |
586 | * 0 3 4 9 10 15 | |
587 | * +------------+------------------+------------------+ | |
588 | * | header | nxm_value | nxm_mask | | |
589 | * +------------+------------------+------------------+ | |
590 | * | |
591 | * | |
592 | * Error Reporting | |
593 | * =============== | |
594 | * | |
595 | * A switch should report an error in an nx_match using error type | |
596 | * OFPET_BAD_REQUEST and one of the NXBRC_NXM_* codes. Ideally the switch | |
597 | * should report a specific error code, if one is assigned for the particular | |
598 | * problem, but NXBRC_NXM_INVALID is also available to report a generic | |
599 | * nx_match error. | |
600 | */ | |
601 | ||
a4ce8b25 | 602 | /* Number of registers allocated NXM field IDs. */ |
b6c9e612 | 603 | #define NXM_NX_MAX_REGS 16 |
7257b535 BP |
604 | |
605 | /* Bits in the value of NXM_NX_IP_FRAG. */ | |
606 | #define NX_IP_FRAG_ANY (1 << 0) /* Is this a fragment? */ | |
607 | #define NX_IP_FRAG_LATER (1 << 1) /* Is this a fragment with nonzero offset? */ | |
d31f1109 | 608 | |
b666962b JG |
609 | /* Bits in the value of NXM_NX_TUN_FLAGS. */ |
610 | #define NX_TUN_FLAG_OAM (1 << 0) /* Is this an OAM packet? */ | |
611 | ||
09246b99 BP |
612 | /* ## --------------------- ## */ |
613 | /* ## Requests and replies. ## */ | |
614 | /* ## --------------------- ## */ | |
615 | ||
492f7572 | 616 | enum nx_flow_format { |
09246b99 | 617 | NXFF_OPENFLOW10 = 0, /* Standard OpenFlow 1.0 compatible. */ |
310f3699 | 618 | NXFF_NXM = 2 /* Nicira extended match. */ |
09246b99 BP |
619 | }; |
620 | ||
621 | /* NXT_SET_FLOW_FORMAT request. */ | |
73dbf4ab | 622 | struct nx_set_flow_format { |
09246b99 BP |
623 | ovs_be32 format; /* One of NXFF_*. */ |
624 | }; | |
982697a4 | 625 | OFP_ASSERT(sizeof(struct nx_set_flow_format) == 4); |
09246b99 | 626 | |
e729e793 JP |
627 | /* NXT_FLOW_MOD (analogous to OFPT_FLOW_MOD). |
628 | * | |
629 | * It is possible to limit flow deletions and modifications to certain | |
623e1caf JP |
630 | * cookies by using the NXM_NX_COOKIE(_W) matches. The "cookie" field |
631 | * is used only to add or modify flow cookies. | |
e729e793 | 632 | */ |
09246b99 | 633 | struct nx_flow_mod { |
09246b99 | 634 | ovs_be64 cookie; /* Opaque controller-issued identifier. */ |
63f7ef64 BP |
635 | ovs_be16 command; /* OFPFC_* + possibly a table ID (see comment |
636 | * on struct nx_flow_mod_table_id). */ | |
09246b99 BP |
637 | ovs_be16 idle_timeout; /* Idle time before discarding (seconds). */ |
638 | ovs_be16 hard_timeout; /* Max time before discarding (seconds). */ | |
639 | ovs_be16 priority; /* Priority level of flow entry. */ | |
640 | ovs_be32 buffer_id; /* Buffered packet to apply to (or -1). | |
641 | Not meaningful for OFPFC_DELETE*. */ | |
642 | ovs_be16 out_port; /* For OFPFC_DELETE* commands, require | |
643 | matching entries to include this as an | |
644 | output port. A value of OFPP_NONE | |
645 | indicates no restriction. */ | |
646 | ovs_be16 flags; /* One of OFPFF_*. */ | |
647 | ovs_be16 match_len; /* Size of nx_match. */ | |
648 | uint8_t pad[6]; /* Align to 64-bits. */ | |
649 | /* Followed by: | |
650 | * - Exactly match_len (possibly 0) bytes containing the nx_match, then | |
651 | * - Exactly (match_len + 7)/8*8 - match_len (between 0 and 7) bytes of | |
652 | * all-zero bytes, then | |
653 | * - Actions to fill out the remainder of the message length (always a | |
654 | * multiple of 8). | |
655 | */ | |
656 | }; | |
982697a4 | 657 | OFP_ASSERT(sizeof(struct nx_flow_mod) == 32); |
09246b99 | 658 | |
745bfd5e BP |
659 | /* NXT_FLOW_REMOVED (analogous to OFPT_FLOW_REMOVED). |
660 | * | |
661 | * 'table_id' is present only in Open vSwitch 1.11 and later. In earlier | |
662 | * versions of Open vSwitch, this is a padding byte that is always zeroed. | |
663 | * Therefore, a 'table_id' value of 0 indicates that the table ID is not known, | |
664 | * and other values may be interpreted as one more than the flow's former table | |
665 | * ID. */ | |
09246b99 | 666 | struct nx_flow_removed { |
09246b99 BP |
667 | ovs_be64 cookie; /* Opaque controller-issued identifier. */ |
668 | ovs_be16 priority; /* Priority level of flow entry. */ | |
669 | uint8_t reason; /* One of OFPRR_*. */ | |
745bfd5e | 670 | uint8_t table_id; /* Flow's former table ID, plus one. */ |
09246b99 BP |
671 | ovs_be32 duration_sec; /* Time flow was alive in seconds. */ |
672 | ovs_be32 duration_nsec; /* Time flow was alive in nanoseconds beyond | |
673 | duration_sec. */ | |
674 | ovs_be16 idle_timeout; /* Idle timeout from original flow mod. */ | |
675 | ovs_be16 match_len; /* Size of nx_match. */ | |
676 | ovs_be64 packet_count; | |
677 | ovs_be64 byte_count; | |
678 | /* Followed by: | |
679 | * - Exactly match_len (possibly 0) bytes containing the nx_match, then | |
680 | * - Exactly (match_len + 7)/8*8 - match_len (between 0 and 7) bytes of | |
681 | * all-zero bytes. */ | |
682 | }; | |
982697a4 | 683 | OFP_ASSERT(sizeof(struct nx_flow_removed) == 40); |
09246b99 BP |
684 | |
685 | /* Nicira vendor stats request of type NXST_FLOW (analogous to OFPST_FLOW | |
e729e793 JP |
686 | * request). |
687 | * | |
688 | * It is possible to limit matches to certain cookies by using the | |
689 | * NXM_NX_COOKIE and NXM_NX_COOKIE_W matches. | |
690 | */ | |
09246b99 | 691 | struct nx_flow_stats_request { |
09246b99 BP |
692 | ovs_be16 out_port; /* Require matching entries to include this |
693 | as an output port. A value of OFPP_NONE | |
694 | indicates no restriction. */ | |
695 | ovs_be16 match_len; /* Length of nx_match. */ | |
696 | uint8_t table_id; /* ID of table to read (from ofp_table_stats) | |
697 | or 0xff for all tables. */ | |
698 | uint8_t pad[3]; /* Align to 64 bits. */ | |
699 | /* Followed by: | |
700 | * - Exactly match_len (possibly 0) bytes containing the nx_match, then | |
701 | * - Exactly (match_len + 7)/8*8 - match_len (between 0 and 7) bytes of | |
702 | * all-zero bytes, which must also exactly fill out the length of the | |
703 | * message. | |
704 | */ | |
705 | }; | |
982697a4 | 706 | OFP_ASSERT(sizeof(struct nx_flow_stats_request) == 8); |
09246b99 BP |
707 | |
708 | /* Body for Nicira vendor stats reply of type NXST_FLOW (analogous to | |
f27f2134 BP |
709 | * OFPST_FLOW reply). |
710 | * | |
711 | * The values of 'idle_age' and 'hard_age' are only meaningful when talking to | |
712 | * a switch that implements the NXT_FLOW_AGE extension. Zero means that the | |
713 | * true value is unknown, perhaps because hardware does not track the value. | |
714 | * (Zero is also the value that one should ordinarily expect to see talking to | |
715 | * a switch that does not implement NXT_FLOW_AGE, since those switches zero the | |
716 | * padding bytes that these fields replaced.) A nonzero value X represents X-1 | |
717 | * seconds. A value of 65535 represents 65534 or more seconds. | |
718 | * | |
719 | * 'idle_age' is the number of seconds that the flow has been idle, that is, | |
720 | * the number of seconds since a packet passed through the flow. 'hard_age' is | |
721 | * the number of seconds since the flow was last modified (e.g. OFPFC_MODIFY or | |
722 | * OFPFC_MODIFY_STRICT). (The 'duration_*' fields are the elapsed time since | |
723 | * the flow was added, regardless of subsequent modifications.) | |
724 | * | |
725 | * For a flow with an idle or hard timeout, 'idle_age' or 'hard_age', | |
726 | * respectively, will ordinarily be smaller than the timeout, but flow | |
727 | * expiration times are only approximate and so one must be prepared to | |
728 | * tolerate expirations that occur somewhat early or late. | |
729 | */ | |
09246b99 BP |
730 | struct nx_flow_stats { |
731 | ovs_be16 length; /* Length of this entry. */ | |
732 | uint8_t table_id; /* ID of table flow came from. */ | |
733 | uint8_t pad; | |
734 | ovs_be32 duration_sec; /* Time flow has been alive in seconds. */ | |
735 | ovs_be32 duration_nsec; /* Time flow has been alive in nanoseconds | |
736 | beyond duration_sec. */ | |
cc75d2c7 | 737 | ovs_be16 priority; /* Priority of the entry. */ |
09246b99 BP |
738 | ovs_be16 idle_timeout; /* Number of seconds idle before expiration. */ |
739 | ovs_be16 hard_timeout; /* Number of seconds before expiration. */ | |
740 | ovs_be16 match_len; /* Length of nx_match. */ | |
f27f2134 BP |
741 | ovs_be16 idle_age; /* Seconds since last packet, plus one. */ |
742 | ovs_be16 hard_age; /* Seconds since last modification, plus one. */ | |
09246b99 | 743 | ovs_be64 cookie; /* Opaque controller-issued identifier. */ |
5e9d0469 BP |
744 | ovs_be64 packet_count; /* Number of packets, UINT64_MAX if unknown. */ |
745 | ovs_be64 byte_count; /* Number of bytes, UINT64_MAX if unknown. */ | |
09246b99 BP |
746 | /* Followed by: |
747 | * - Exactly match_len (possibly 0) bytes containing the nx_match, then | |
748 | * - Exactly (match_len + 7)/8*8 - match_len (between 0 and 7) bytes of | |
749 | * all-zero bytes, then | |
750 | * - Actions to fill out the remainder 'length' bytes (always a multiple | |
751 | * of 8). | |
752 | */ | |
753 | }; | |
754 | OFP_ASSERT(sizeof(struct nx_flow_stats) == 48); | |
755 | ||
756 | /* Nicira vendor stats request of type NXST_AGGREGATE (analogous to | |
a814ba0f BP |
757 | * OFPST_AGGREGATE request). |
758 | * | |
759 | * The reply format is identical to the reply format for OFPST_AGGREGATE, | |
760 | * except for the header. */ | |
09246b99 | 761 | struct nx_aggregate_stats_request { |
09246b99 BP |
762 | ovs_be16 out_port; /* Require matching entries to include this |
763 | as an output port. A value of OFPP_NONE | |
764 | indicates no restriction. */ | |
765 | ovs_be16 match_len; /* Length of nx_match. */ | |
766 | uint8_t table_id; /* ID of table to read (from ofp_table_stats) | |
767 | or 0xff for all tables. */ | |
768 | uint8_t pad[3]; /* Align to 64 bits. */ | |
769 | /* Followed by: | |
770 | * - Exactly match_len (possibly 0) bytes containing the nx_match, then | |
771 | * - Exactly (match_len + 7)/8*8 - match_len (between 0 and 7) bytes of | |
772 | * all-zero bytes, which must also exactly fill out the length of the | |
773 | * message. | |
774 | */ | |
775 | }; | |
982697a4 | 776 | OFP_ASSERT(sizeof(struct nx_aggregate_stats_request) == 8); |
fb8f22c1 BY |
777 | |
778 | struct nx_ipfix_stats_reply { | |
779 | ovs_be64 total_flows; | |
780 | ovs_be64 current_flows; | |
781 | ovs_be64 pkts; | |
782 | ovs_be64 ipv4_pkts; | |
783 | ovs_be64 ipv6_pkts; | |
784 | ovs_be64 error_pkts; | |
785 | ovs_be64 ipv4_error_pkts; | |
786 | ovs_be64 ipv6_error_pkts; | |
787 | ovs_be64 tx_pkts; | |
788 | ovs_be64 tx_errors; | |
789 | ovs_be32 collector_set_id; /* Range 0 to 4,294,967,295. */ | |
790 | uint8_t pad[4]; /* Pad to a multiple of 8 bytes. */ | |
791 | }; | |
792 | OFP_ASSERT(sizeof(struct nx_ipfix_stats_reply) == 88); | |
793 | ||
a7349929 BP |
794 | \f |
795 | /* NXT_SET_CONTROLLER_ID. | |
796 | * | |
797 | * Each OpenFlow controller connection has a 16-bit identifier that is | |
798 | * initially 0. This message changes the connection's ID to 'id'. | |
799 | * | |
800 | * Controller connection IDs need not be unique. | |
801 | * | |
802 | * The NXAST_CONTROLLER action is the only current user of controller | |
803 | * connection IDs. */ | |
804 | struct nx_controller_id { | |
a7349929 BP |
805 | uint8_t zero[6]; /* Must be zero. */ |
806 | ovs_be16 controller_id; /* New controller connection ID. */ | |
807 | }; | |
982697a4 | 808 | OFP_ASSERT(sizeof(struct nx_controller_id) == 8); |
2b07c8b1 BP |
809 | \f |
810 | /* Flow Table Monitoring | |
811 | * ===================== | |
812 | * | |
813 | * NXST_FLOW_MONITOR allows a controller to keep track of changes to OpenFlow | |
814 | * flow table(s) or subsets of them, with the following workflow: | |
815 | * | |
816 | * 1. The controller sends an NXST_FLOW_MONITOR request to begin monitoring | |
817 | * flows. The 'id' in the request must be unique among all monitors that | |
818 | * the controller has started and not yet canceled on this OpenFlow | |
819 | * connection. | |
820 | * | |
821 | * 2. The switch responds with an NXST_FLOW_MONITOR reply. If the request's | |
822 | * 'flags' included NXFMF_INITIAL, the reply includes all the flows that | |
823 | * matched the request at the time of the request (with event NXFME_ADDED). | |
824 | * If 'flags' did not include NXFMF_INITIAL, the reply is empty. | |
825 | * | |
826 | * The reply uses the xid of the request (as do all replies to OpenFlow | |
827 | * requests). | |
828 | * | |
829 | * 3. Whenever a change to a flow table entry matches some outstanding monitor | |
830 | * request's criteria and flags, the switch sends a notification to the | |
831 | * controller as an additional NXST_FLOW_MONITOR reply with xid 0. | |
832 | * | |
833 | * When multiple outstanding monitors match a single change, only a single | |
834 | * notification is sent. This merged notification includes the information | |
835 | * requested in any of the individual monitors. That is, if any of the | |
836 | * matching monitors requests actions (NXFMF_ACTIONS), the notification | |
837 | * includes actions, and if any of the monitors request full changes for the | |
838 | * controller's own changes (NXFMF_OWN), the controller's own changes will | |
839 | * be included in full. | |
840 | * | |
841 | * 4. The controller may cancel a monitor with NXT_FLOW_MONITOR_CANCEL. No | |
842 | * further notifications will be sent on the basis of the canceled monitor | |
843 | * afterward. | |
844 | * | |
845 | * | |
846 | * Buffer Management | |
847 | * ================= | |
848 | * | |
849 | * OpenFlow messages for flow monitor notifications can overflow the buffer | |
850 | * space available to the switch, either temporarily (e.g. due to network | |
851 | * conditions slowing OpenFlow traffic) or more permanently (e.g. the sustained | |
852 | * rate of flow table change exceeds the network bandwidth between switch and | |
853 | * controller). | |
854 | * | |
855 | * When Open vSwitch's notification buffer space reaches a limiting threshold, | |
856 | * OVS reacts as follows: | |
857 | * | |
858 | * 1. OVS sends an NXT_FLOW_MONITOR_PAUSED message to the controller, following | |
859 | * all the already queued notifications. After it receives this message, | |
860 | * the controller knows that its view of the flow table, as represented by | |
861 | * flow monitor notifications, is incomplete. | |
862 | * | |
863 | * 2. As long as the notification buffer is not empty: | |
864 | * | |
865 | * - NXMFE_ADD and NXFME_MODIFIED notifications will not be sent. | |
866 | * | |
867 | * - NXFME_DELETED notifications will still be sent, but only for flows | |
868 | * that existed before OVS sent NXT_FLOW_MONITOR_PAUSED. | |
869 | * | |
870 | * - NXFME_ABBREV notifications will not be sent. They are treated as | |
871 | * the expanded version (and therefore only the NXFME_DELETED | |
872 | * components, if any, are sent). | |
873 | * | |
874 | * 3. When the notification buffer empties, OVS sends NXFME_ADD notifications | |
875 | * for flows added since the buffer reached its limit and NXFME_MODIFIED | |
876 | * notifications for flows that existed before the limit was reached and | |
877 | * changed after the limit was reached. | |
878 | * | |
879 | * 4. OVS sends an NXT_FLOW_MONITOR_RESUMED message to the controller. After | |
880 | * it receives this message, the controller knows that its view of the flow | |
881 | * table, as represented by flow monitor notifications, is again complete. | |
882 | * | |
883 | * This allows the maximum buffer space requirement for notifications to be | |
884 | * bounded by the limit plus the maximum number of supported flows. | |
885 | * | |
886 | * | |
887 | * "Flow Removed" messages | |
888 | * ======================= | |
889 | * | |
890 | * The flow monitor mechanism is independent of OFPT_FLOW_REMOVED and | |
891 | * NXT_FLOW_REMOVED. Flow monitor updates for deletion are sent if | |
892 | * NXFMF_DELETE is set on a monitor, regardless of whether the | |
893 | * OFPFF_SEND_FLOW_REM flag was set when the flow was added. */ | |
894 | ||
895 | /* NXST_FLOW_MONITOR request. | |
896 | * | |
897 | * The NXST_FLOW_MONITOR request's body consists of an array of zero or more | |
898 | * instances of this structure. The request arranges to monitor the flows | |
899 | * that match the specified criteria, which are interpreted in the same way as | |
900 | * for NXST_FLOW. | |
901 | * | |
902 | * 'id' identifies a particular monitor for the purpose of allowing it to be | |
903 | * canceled later with NXT_FLOW_MONITOR_CANCEL. 'id' must be unique among | |
904 | * existing monitors that have not already been canceled. | |
905 | * | |
906 | * The reply includes the initial flow matches for monitors that have the | |
907 | * NXFMF_INITIAL flag set. No single flow will be included in the reply more | |
908 | * than once, even if more than one requested monitor matches that flow. The | |
909 | * reply will be empty if none of the monitors has NXFMF_INITIAL set or if none | |
910 | * of the monitors initially matches any flows. | |
911 | * | |
912 | * For NXFMF_ADD, an event will be reported if 'out_port' matches against the | |
913 | * actions of the flow being added or, for a flow that is replacing an existing | |
914 | * flow, if 'out_port' matches against the actions of the flow being replaced. | |
915 | * For NXFMF_DELETE, 'out_port' matches against the actions of a flow being | |
916 | * deleted. For NXFMF_MODIFY, an event will be reported if 'out_port' matches | |
917 | * either the old or the new actions. */ | |
918 | struct nx_flow_monitor_request { | |
919 | ovs_be32 id; /* Controller-assigned ID for this monitor. */ | |
920 | ovs_be16 flags; /* NXFMF_*. */ | |
921 | ovs_be16 out_port; /* Required output port, if not OFPP_NONE. */ | |
922 | ovs_be16 match_len; /* Length of nx_match. */ | |
923 | uint8_t table_id; /* One table's ID or 0xff for all tables. */ | |
924 | uint8_t zeros[5]; /* Align to 64 bits (must be zero). */ | |
925 | /* Followed by: | |
926 | * - Exactly match_len (possibly 0) bytes containing the nx_match, then | |
927 | * - Exactly (match_len + 7)/8*8 - match_len (between 0 and 7) bytes of | |
928 | * all-zero bytes. */ | |
929 | }; | |
930 | OFP_ASSERT(sizeof(struct nx_flow_monitor_request) == 16); | |
931 | ||
932 | /* 'flags' bits in struct nx_flow_monitor_request. */ | |
933 | enum nx_flow_monitor_flags { | |
934 | /* When to send updates. */ | |
935 | NXFMF_INITIAL = 1 << 0, /* Initially matching flows. */ | |
936 | NXFMF_ADD = 1 << 1, /* New matching flows as they are added. */ | |
937 | NXFMF_DELETE = 1 << 2, /* Old matching flows as they are removed. */ | |
938 | NXFMF_MODIFY = 1 << 3, /* Matching flows as they are changed. */ | |
939 | ||
940 | /* What to include in updates. */ | |
941 | NXFMF_ACTIONS = 1 << 4, /* If set, actions are included. */ | |
942 | NXFMF_OWN = 1 << 5, /* If set, include own changes in full. */ | |
943 | }; | |
944 | ||
945 | /* NXST_FLOW_MONITOR reply header. | |
946 | * | |
947 | * The body of an NXST_FLOW_MONITOR reply is an array of variable-length | |
948 | * structures, each of which begins with this header. The 'length' member may | |
949 | * be used to traverse the array, and the 'event' member may be used to | |
950 | * determine the particular structure. | |
951 | * | |
952 | * Every instance is a multiple of 8 bytes long. */ | |
953 | struct nx_flow_update_header { | |
954 | ovs_be16 length; /* Length of this entry. */ | |
955 | ovs_be16 event; /* One of NXFME_*. */ | |
956 | /* ...other data depending on 'event'... */ | |
957 | }; | |
958 | OFP_ASSERT(sizeof(struct nx_flow_update_header) == 4); | |
959 | ||
960 | /* 'event' values in struct nx_flow_update_header. */ | |
961 | enum nx_flow_update_event { | |
962 | /* struct nx_flow_update_full. */ | |
963 | NXFME_ADDED = 0, /* Flow was added. */ | |
964 | NXFME_DELETED = 1, /* Flow was deleted. */ | |
965 | NXFME_MODIFIED = 2, /* Flow (generally its actions) was changed. */ | |
966 | ||
967 | /* struct nx_flow_update_abbrev. */ | |
968 | NXFME_ABBREV = 3, /* Abbreviated reply. */ | |
969 | }; | |
970 | ||
971 | /* NXST_FLOW_MONITOR reply for NXFME_ADDED, NXFME_DELETED, and | |
972 | * NXFME_MODIFIED. */ | |
973 | struct nx_flow_update_full { | |
974 | ovs_be16 length; /* Length is 24. */ | |
975 | ovs_be16 event; /* One of NXFME_*. */ | |
976 | ovs_be16 reason; /* OFPRR_* for NXFME_DELETED, else zero. */ | |
977 | ovs_be16 priority; /* Priority of the entry. */ | |
978 | ovs_be16 idle_timeout; /* Number of seconds idle before expiration. */ | |
979 | ovs_be16 hard_timeout; /* Number of seconds before expiration. */ | |
980 | ovs_be16 match_len; /* Length of nx_match. */ | |
981 | uint8_t table_id; /* ID of flow's table. */ | |
982 | uint8_t pad; /* Reserved, currently zeroed. */ | |
983 | ovs_be64 cookie; /* Opaque controller-issued identifier. */ | |
984 | /* Followed by: | |
985 | * - Exactly match_len (possibly 0) bytes containing the nx_match, then | |
986 | * - Exactly (match_len + 7)/8*8 - match_len (between 0 and 7) bytes of | |
987 | * all-zero bytes, then | |
988 | * - Actions to fill out the remainder 'length' bytes (always a multiple | |
989 | * of 8). If NXFMF_ACTIONS was not specified, or 'event' is | |
990 | * NXFME_DELETED, no actions are included. | |
991 | */ | |
992 | }; | |
993 | OFP_ASSERT(sizeof(struct nx_flow_update_full) == 24); | |
994 | ||
995 | /* NXST_FLOW_MONITOR reply for NXFME_ABBREV. | |
996 | * | |
997 | * When the controller does not specify NXFMF_OWN in a monitor request, any | |
998 | * flow tables changes due to the controller's own requests (on the same | |
999 | * OpenFlow channel) will be abbreviated, when possible, to this form, which | |
1000 | * simply specifies the 'xid' of the OpenFlow request (e.g. an OFPT_FLOW_MOD or | |
1001 | * NXT_FLOW_MOD) that caused the change. | |
1002 | * | |
1003 | * Some changes cannot be abbreviated and will be sent in full: | |
1004 | * | |
1005 | * - Changes that only partially succeed. This can happen if, for example, | |
1006 | * a flow_mod with type OFPFC_MODIFY affects multiple flows, but only some | |
1007 | * of those modifications succeed (e.g. due to hardware limitations). | |
1008 | * | |
af822017 BP |
1009 | * This cannot occur with the Open vSwitch software datapath. This also |
1010 | * cannot occur in Open vSwitch 2.4 and later, because these versions only | |
1011 | * execute any flow modifications if all of them will succeed. | |
2b07c8b1 BP |
1012 | * |
1013 | * - Changes that race with conflicting changes made by other controllers or | |
1014 | * other flow_mods (not separated by barriers) by the same controller. | |
1015 | * | |
1016 | * This cannot occur with the current Open vSwitch implementation | |
1017 | * (regardless of datapath) because Open vSwitch internally serializes | |
1018 | * potentially conflicting changes. | |
1019 | * | |
af822017 BP |
1020 | * - Changes that occur when flow notification is paused (see "Buffer |
1021 | * Management" above). | |
1022 | * | |
2b07c8b1 BP |
1023 | * A flow_mod that does not change the flow table will not trigger any |
1024 | * notification, even an abbreviated one. For example, a "modify" or "delete" | |
1025 | * flow_mod that does not match any flows will not trigger a notification. | |
1026 | * Whether an "add" or "modify" that specifies all the same parameters that a | |
1027 | * flow already has triggers a notification is unspecified and subject to | |
1028 | * change in future versions of Open vSwitch. | |
1029 | * | |
1030 | * OVS will always send the notifications for a given flow table change before | |
b10a4760 BP |
1031 | * the reply to a OFPT_BARRIER_REQUEST request that follows the flow table |
1032 | * change. Thus, if the controller does not receive an abbreviated (or | |
1033 | * unabbreviated) notification for a flow_mod before the next | |
1034 | * OFPT_BARRIER_REPLY, it will never receive one. */ | |
2b07c8b1 BP |
1035 | struct nx_flow_update_abbrev { |
1036 | ovs_be16 length; /* Length is 8. */ | |
1037 | ovs_be16 event; /* NXFME_ABBREV. */ | |
1038 | ovs_be32 xid; /* Controller-specified xid from flow_mod. */ | |
1039 | }; | |
1040 | OFP_ASSERT(sizeof(struct nx_flow_update_abbrev) == 8); | |
1041 | ||
982697a4 BP |
1042 | /* NXT_FLOW_MONITOR_CANCEL. |
1043 | * | |
1044 | * Used by a controller to cancel an outstanding monitor. */ | |
2b07c8b1 | 1045 | struct nx_flow_monitor_cancel { |
2b07c8b1 BP |
1046 | ovs_be32 id; /* 'id' from nx_flow_monitor_request. */ |
1047 | }; | |
982697a4 | 1048 | OFP_ASSERT(sizeof(struct nx_flow_monitor_cancel) == 4); |
659586ef | 1049 | |
4e548ad9 ML |
1050 | /* Variable-length option TLV table maintenance commands. |
1051 | * | |
1052 | * The option in Type-Length-Value format is widely used in tunnel options, | |
1053 | * e.g., the base Geneve header is followed by zero or more options in TLV | |
1054 | * format. Each option consists of a four byte option header and a variable | |
1055 | * amount of option data interpreted according to the type. The generic TLV | |
1056 | * format in tunnel options is as following: | |
1057 | * | |
1058 | * 0 1 2 3 | |
1059 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |
1060 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
1061 | * | Option Class | Type |R|R|R| Length | | |
1062 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
1063 | * | Variable Option Data | | |
1064 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
1065 | * | |
1066 | * In order to work with this variable-length options in TLV format in | |
1067 | * tunnel options, we need to maintain a mapping table between an option | |
1068 | * TLV (defined by <class, type, length>) and an NXM field that can be | |
1069 | * operated on for the purposes of matches, actions, etc. This mapping | |
1070 | * must be explicitly specified by the user. | |
6159c531 JG |
1071 | * |
1072 | * There are two primary groups of OpenFlow messages that are introduced | |
1073 | * as Nicira extensions: modification commands (add, delete, clear mappings) | |
1074 | * and table status request/reply to dump the current table along with switch | |
1075 | * information. | |
1076 | * | |
1077 | * Note that mappings should not be changed while they are in active use by | |
1078 | * a flow. The result of doing so is undefined. */ | |
1079 | ||
4e548ad9 ML |
1080 | /* TLV table commands */ |
1081 | enum nx_tlv_table_mod_command { | |
1082 | NXTTMC_ADD, /* New mappings (fails if an option is already | |
6159c531 | 1083 | mapped). */ |
4e548ad9 | 1084 | NXTTMC_DELETE, /* Delete mappings, identified by index |
6159c531 | 1085 | * (unmapped options are ignored). */ |
4e548ad9 | 1086 | NXTTMC_CLEAR, /* Clear all mappings. Additional information |
6159c531 JG |
1087 | in this command is ignored. */ |
1088 | }; | |
1089 | ||
4e548ad9 ML |
1090 | /* Map between an option TLV and an NXM field. */ |
1091 | struct nx_tlv_map { | |
1092 | ovs_be16 option_class; /* TLV class. */ | |
1093 | uint8_t option_type; /* TLV type. */ | |
1094 | uint8_t option_len; /* TLV length (multiple of 4). */ | |
6159c531 JG |
1095 | ovs_be16 index; /* NXM_NX_TUN_METADATA<n> index */ |
1096 | uint8_t pad[2]; | |
1097 | }; | |
4e548ad9 | 1098 | OFP_ASSERT(sizeof(struct nx_tlv_map) == 8); |
6159c531 | 1099 | |
4e548ad9 | 1100 | /* NXT_TLV_TABLE_MOD. |
6159c531 | 1101 | * |
4e548ad9 | 1102 | * Use to configure a mapping between option TLVs (class, type, length) |
6159c531 JG |
1103 | * and NXM fields (NXM_NX_TUN_METADATA<n> where 'index' is <n>). |
1104 | * | |
1105 | * This command is atomic: all operations on different options will | |
1106 | * either succeed or fail. */ | |
4e548ad9 ML |
1107 | struct nx_tlv_table_mod { |
1108 | ovs_be16 command; /* One of NTTTMC_* */ | |
6159c531 | 1109 | uint8_t pad[6]; |
4e548ad9 ML |
1110 | /* struct nx_tlv_map[0]; Array of maps between indicies and option |
1111 | TLVs. The number of elements is inferred | |
1112 | from the length field in the header. */ | |
6159c531 | 1113 | }; |
4e548ad9 | 1114 | OFP_ASSERT(sizeof(struct nx_tlv_table_mod) == 8); |
6159c531 | 1115 | |
4e548ad9 | 1116 | /* NXT_TLV_TABLE_REPLY. |
6159c531 | 1117 | * |
4e548ad9 ML |
1118 | * Issued in reponse to an NXT_TLV_TABLE_REQUEST to give information |
1119 | * about the current status of the TLV table in the switch. Provides | |
6159c531 | 1120 | * both static information about the switch's capabilities as well as |
4e548ad9 ML |
1121 | * the configured TLV table. */ |
1122 | struct nx_tlv_table_reply { | |
6159c531 JG |
1123 | ovs_be32 max_option_space; /* Maximum total of option sizes supported. */ |
1124 | ovs_be16 max_fields; /* Maximum number of match fields supported. */ | |
1278cf96 | 1125 | uint8_t reserved[10]; |
4e548ad9 ML |
1126 | /* struct nx_tlv_map[0]; Array of maps between indicies and option |
1127 | TLVs. The number of elements is inferred | |
1128 | from the length field in the header. */ | |
6159c531 | 1129 | }; |
4e548ad9 | 1130 | OFP_ASSERT(sizeof(struct nx_tlv_table_reply) == 16); |
6159c531 | 1131 | |
064af421 | 1132 | #endif /* openflow/nicira-ext.h */ |