]> git.proxmox.com Git - mirror_ovs.git/blob - ofproto/ofproto-dpif-xlate.c
ofproto-dpif-xlate: Improve log message.
[mirror_ovs.git] / ofproto / ofproto-dpif-xlate.c
1 /* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 Nicira, Inc.
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License. */
14
15 #include <config.h>
16
17 #include "ofproto/ofproto-dpif-xlate.h"
18
19 #include <errno.h>
20 #include <sys/types.h>
21 #include <netinet/in.h>
22 #include <arpa/inet.h>
23 #include <net/if.h>
24 #include <sys/socket.h>
25
26 #include "bfd.h"
27 #include "bitmap.h"
28 #include "bond.h"
29 #include "bundle.h"
30 #include "byte-order.h"
31 #include "cfm.h"
32 #include "connmgr.h"
33 #include "coverage.h"
34 #include "csum.h"
35 #include "dp-packet.h"
36 #include "dpif.h"
37 #include "in-band.h"
38 #include "lacp.h"
39 #include "learn.h"
40 #include "mac-learning.h"
41 #include "mcast-snooping.h"
42 #include "multipath.h"
43 #include "netdev-vport.h"
44 #include "netlink.h"
45 #include "nx-match.h"
46 #include "odp-execute.h"
47 #include "ofproto/ofproto-dpif-ipfix.h"
48 #include "ofproto/ofproto-dpif-mirror.h"
49 #include "ofproto/ofproto-dpif-monitor.h"
50 #include "ofproto/ofproto-dpif-sflow.h"
51 #include "ofproto/ofproto-dpif-trace.h"
52 #include "ofproto/ofproto-dpif-xlate-cache.h"
53 #include "ofproto/ofproto-dpif.h"
54 #include "ofproto/ofproto-provider.h"
55 #include "openvswitch/dynamic-string.h"
56 #include "openvswitch/meta-flow.h"
57 #include "openvswitch/list.h"
58 #include "openvswitch/ofp-actions.h"
59 #include "openvswitch/ofp-ed-props.h"
60 #include "openvswitch/vlog.h"
61 #include "ovs-lldp.h"
62 #include "ovs-router.h"
63 #include "packets.h"
64 #include "tnl-neigh-cache.h"
65 #include "tnl-ports.h"
66 #include "tunnel.h"
67 #include "util.h"
68 #include "uuid.h"
69
70 COVERAGE_DEFINE(xlate_actions);
71 COVERAGE_DEFINE(xlate_actions_oversize);
72 COVERAGE_DEFINE(xlate_actions_too_many_output);
73
74 VLOG_DEFINE_THIS_MODULE(ofproto_dpif_xlate);
75
76 /* Maximum depth of flow table recursion (due to resubmit actions) in a
77 * flow translation.
78 *
79 * The goal of limiting the depth of resubmits is to ensure that flow
80 * translation eventually terminates. Only resubmits to the same table or an
81 * earlier table count against the maximum depth. This is because resubmits to
82 * strictly monotonically increasing table IDs will eventually terminate, since
83 * any OpenFlow switch has a finite number of tables. OpenFlow tables are most
84 * commonly traversed in numerically increasing order, so this limit has little
85 * effect on conventionally designed OpenFlow pipelines.
86 *
87 * Outputs to patch ports and to groups also count against the depth limit. */
88 #define MAX_DEPTH 64
89
90 /* Maximum number of resubmit actions in a flow translation, whether they are
91 * recursive or not. */
92 #define MAX_RESUBMITS (MAX_DEPTH * MAX_DEPTH)
93
94 /* The structure holds an array of IP addresses assigned to a bridge and the
95 * number of elements in the array. These data are mutable and are evaluated
96 * when ARP or Neighbor Advertisement packets received on a native tunnel
97 * port are xlated. So 'ref_cnt' and RCU are used for synchronization. */
98 struct xbridge_addr {
99 struct in6_addr *addr; /* Array of IP addresses of xbridge. */
100 int n_addr; /* Number of IP addresses. */
101 struct ovs_refcount ref_cnt;
102 };
103
104 struct xbridge {
105 struct hmap_node hmap_node; /* Node in global 'xbridges' map. */
106 struct ofproto_dpif *ofproto; /* Key in global 'xbridges' map. */
107
108 struct ovs_list xbundles; /* Owned xbundles. */
109 struct hmap xports; /* Indexed by ofp_port. */
110
111 char *name; /* Name used in log messages. */
112 struct dpif *dpif; /* Datapath interface. */
113 struct mac_learning *ml; /* Mac learning handle. */
114 struct mcast_snooping *ms; /* Multicast Snooping handle. */
115 struct mbridge *mbridge; /* Mirroring. */
116 struct dpif_sflow *sflow; /* SFlow handle, or null. */
117 struct dpif_ipfix *ipfix; /* Ipfix handle, or null. */
118 struct netflow *netflow; /* Netflow handle, or null. */
119 struct stp *stp; /* STP or null if disabled. */
120 struct rstp *rstp; /* RSTP or null if disabled. */
121
122 bool has_in_band; /* Bridge has in band control? */
123 bool forward_bpdu; /* Bridge forwards STP BPDUs? */
124
125 /* Datapath feature support. */
126 struct dpif_backer_support support;
127
128 struct xbridge_addr *addr;
129 };
130
131 struct xbundle {
132 struct hmap_node hmap_node; /* In global 'xbundles' map. */
133 struct ofbundle *ofbundle; /* Key in global 'xbundles' map. */
134
135 struct ovs_list list_node; /* In parent 'xbridges' list. */
136 struct xbridge *xbridge; /* Parent xbridge. */
137
138 struct ovs_list xports; /* Contains "struct xport"s. */
139
140 char *name; /* Name used in log messages. */
141 struct bond *bond; /* Nonnull iff more than one port. */
142 struct lacp *lacp; /* LACP handle or null. */
143
144 enum port_vlan_mode vlan_mode; /* VLAN mode. */
145 uint16_t qinq_ethtype; /* Ethertype of dot1q-tunnel interface
146 * either 0x8100 or 0x88a8. */
147 int vlan; /* -1=trunk port, else a 12-bit VLAN ID. */
148 unsigned long *trunks; /* Bitmap of trunked VLANs, if 'vlan' == -1.
149 * NULL if all VLANs are trunked. */
150 unsigned long *cvlans; /* Bitmap of allowed customer vlans,
151 * NULL if all VLANs are allowed */
152 bool use_priority_tags; /* Use 802.1p tag for frames in VLAN 0? */
153 bool floodable; /* No port has OFPUTIL_PC_NO_FLOOD set? */
154 bool protected; /* Protected port mode */
155 };
156
157 struct xport {
158 struct hmap_node hmap_node; /* Node in global 'xports' map. */
159 struct ofport_dpif *ofport; /* Key in global 'xports map. */
160
161 struct hmap_node ofp_node; /* Node in parent xbridge 'xports' map. */
162 ofp_port_t ofp_port; /* Key in parent xbridge 'xports' map. */
163
164 struct hmap_node uuid_node; /* Node in global 'xports_uuid' map. */
165 struct uuid uuid; /* Key in global 'xports_uuid' map. */
166
167 odp_port_t odp_port; /* Datapath port number or ODPP_NONE. */
168
169 struct ovs_list bundle_node; /* In parent xbundle (if it exists). */
170 struct xbundle *xbundle; /* Parent xbundle or null. */
171
172 struct netdev *netdev; /* 'ofport''s netdev. */
173
174 struct xbridge *xbridge; /* Parent bridge. */
175 struct xport *peer; /* Patch port peer or null. */
176
177 enum ofputil_port_config config; /* OpenFlow port configuration. */
178 enum ofputil_port_state state; /* OpenFlow port state. */
179 int stp_port_no; /* STP port number or -1 if not in use. */
180 struct rstp_port *rstp_port; /* RSTP port or null. */
181
182 struct hmap skb_priorities; /* Map of 'skb_priority_to_dscp's. */
183
184 bool may_enable; /* May be enabled in bonds. */
185 bool is_tunnel; /* Is a tunnel port. */
186 enum netdev_pt_mode pt_mode; /* packet_type handling. */
187
188 struct cfm *cfm; /* CFM handle or null. */
189 struct bfd *bfd; /* BFD handle or null. */
190 struct lldp *lldp; /* LLDP handle or null. */
191 };
192
193 struct xlate_ctx {
194 struct xlate_in *xin;
195 struct xlate_out *xout;
196
197 struct xlate_cfg *xcfg;
198 const struct xbridge *xbridge;
199
200 /* Flow at the last commit. */
201 struct flow base_flow;
202
203 /* Tunnel IP destination address as received. This is stored separately
204 * as the base_flow.tunnel is cleared on init to reflect the datapath
205 * behavior. Used to make sure not to send tunneled output to ourselves,
206 * which might lead to an infinite loop. This could happen easily
207 * if a tunnel is marked as 'ip_remote=flow', and the flow does not
208 * actually set the tun_dst field. */
209 struct in6_addr orig_tunnel_ipv6_dst;
210
211 /* Stack for the push and pop actions. See comment above nx_stack_push()
212 * in nx-match.c for info on how the stack is stored. */
213 struct ofpbuf stack;
214
215 /* The rule that we are currently translating, or NULL. */
216 struct rule_dpif *rule;
217
218 /* Flow translation populates this with wildcards relevant in translation.
219 * When 'xin->wc' is nonnull, this is the same pointer. When 'xin->wc' is
220 * null, this is a pointer to a temporary buffer. */
221 struct flow_wildcards *wc;
222
223 /* Output buffer for datapath actions. When 'xin->odp_actions' is nonnull,
224 * this is the same pointer. When 'xin->odp_actions' is null, this points
225 * to a scratch ofpbuf. This allows code to add actions to
226 * 'ctx->odp_actions' without worrying about whether the caller really
227 * wants actions. */
228 struct ofpbuf *odp_actions;
229
230 /* Statistics maintained by xlate_table_action().
231 *
232 * These statistics limit the amount of work that a single flow
233 * translation can perform. The goal of the first of these, 'depth', is
234 * primarily to prevent translation from performing an infinite amount of
235 * work. It counts the current depth of nested "resubmit"s (and a few
236 * other activities); when a resubmit returns, it decreases. Resubmits to
237 * tables in strictly monotonically increasing order don't contribute to
238 * 'depth' because they cannot cause a flow translation to take an infinite
239 * amount of time (because the number of tables is finite). Translation
240 * aborts when 'depth' exceeds MAX_DEPTH.
241 *
242 * 'resubmits', on the other hand, prevents flow translation from
243 * performing an extraordinarily large while still finite amount of work.
244 * It counts the total number of resubmits (and a few other activities)
245 * that have been executed. Returning from a resubmit does not affect this
246 * counter. Thus, this limits the amount of work that a particular
247 * translation can perform. Translation aborts when 'resubmits' exceeds
248 * MAX_RESUBMITS (which is much larger than MAX_DEPTH).
249 */
250 int depth; /* Current resubmit nesting depth. */
251 int resubmits; /* Total number of resubmits. */
252 bool in_action_set; /* Currently translating action_set, if true. */
253 bool in_packet_out; /* Currently translating a packet_out msg, if
254 * true. */
255 bool pending_encap; /* True when waiting to commit a pending
256 * encap action. */
257 bool pending_decap; /* True when waiting to commit a pending
258 * decap action. */
259 struct ofpbuf *encap_data; /* May contain a pointer to an ofpbuf with
260 * context for the datapath encap action.*/
261
262 uint8_t table_id; /* OpenFlow table ID where flow was found. */
263 ovs_be64 rule_cookie; /* Cookie of the rule being translated. */
264 uint32_t orig_skb_priority; /* Priority when packet arrived. */
265 uint32_t sflow_n_outputs; /* Number of output ports. */
266 odp_port_t sflow_odp_port; /* Output port for composing sFlow action. */
267 ofp_port_t nf_output_iface; /* Output interface index for NetFlow. */
268 bool exit; /* No further actions should be processed. */
269 mirror_mask_t mirrors; /* Bitmap of associated mirrors. */
270 int mirror_snaplen; /* Max size of a mirror packet in byte. */
271
272 /* Freezing Translation
273 * ====================
274 *
275 * At some point during translation, the code may recognize the need to halt
276 * and checkpoint the translation in a way that it can be restarted again
277 * later. We call the checkpointing process "freezing" and the restarting
278 * process "thawing".
279 *
280 * The use cases for freezing are:
281 *
282 * - "Recirculation", where the translation process discovers that it
283 * doesn't have enough information to complete translation without
284 * actually executing the actions that have already been translated,
285 * which provides the additionally needed information. In these
286 * situations, translation freezes translation and assigns the frozen
287 * data a unique "recirculation ID", which it associates with the data
288 * in a table in userspace (see ofproto-dpif-rid.h). It also adds a
289 * OVS_ACTION_ATTR_RECIRC action specifying that ID to the datapath
290 * actions. When a packet hits that action, the datapath looks its
291 * flow up again using the ID. If there's a miss, it comes back to
292 * userspace, which find the recirculation table entry for the ID,
293 * thaws the associated frozen data, and continues translation from
294 * that point given the additional information that is now known.
295 *
296 * The archetypal example is MPLS. As MPLS is implemented in
297 * OpenFlow, the protocol that follows the last MPLS label becomes
298 * known only when that label is popped by an OpenFlow action. That
299 * means that Open vSwitch can't extract the headers beyond the MPLS
300 * labels until the pop action is executed. Thus, at that point
301 * translation uses the recirculation process to extract the headers
302 * beyond the MPLS labels.
303 *
304 * (OVS also uses OVS_ACTION_ATTR_RECIRC to implement hashing for
305 * output to bonds. OVS pre-populates all the datapath flows for bond
306 * output in the datapath, though, which means that the elaborate
307 * process of coming back to userspace for a second round of
308 * translation isn't needed, and so bonds don't follow the above
309 * process.)
310 *
311 * - "Continuation". A continuation is a way for an OpenFlow controller
312 * to interpose on a packet's traversal of the OpenFlow tables. When
313 * the translation process encounters a "controller" action with the
314 * "pause" flag, it freezes translation, serializes the frozen data,
315 * and sends it to an OpenFlow controller. The controller then
316 * examines and possibly modifies the frozen data and eventually sends
317 * it back to the switch, which thaws it and continues translation.
318 *
319 * The main problem of freezing translation is preserving state, so that
320 * when the translation is thawed later it resumes from where it left off,
321 * without disruption. In particular, actions must be preserved as follows:
322 *
323 * - If we're freezing because an action needed more information, the
324 * action that prompted it.
325 *
326 * - Any actions remaining to be translated within the current flow.
327 *
328 * - If translation was frozen within a NXAST_RESUBMIT, then any actions
329 * following the resubmit action. Resubmit actions can be nested, so
330 * this has to go all the way up the control stack.
331 *
332 * - The OpenFlow 1.1+ action set.
333 *
334 * State that actions and flow table lookups can depend on, such as the
335 * following, must also be preserved:
336 *
337 * - Metadata fields (input port, registers, OF1.1+ metadata, ...).
338 *
339 * - The stack used by NXAST_STACK_PUSH and NXAST_STACK_POP actions.
340 *
341 * - The table ID and cookie of the flow being translated at each level
342 * of the control stack, because these can become visible through
343 * OFPAT_CONTROLLER actions (and other ways).
344 *
345 * Translation allows for the control of this state preservation via these
346 * members. When a need to freeze translation is identified, the
347 * translation process:
348 *
349 * 1. Sets 'freezing' to true.
350 *
351 * 2. Sets 'exit' to true to tell later steps that we're exiting from the
352 * translation process.
353 *
354 * 3. Adds an OFPACT_UNROLL_XLATE action to 'frozen_actions', and points
355 * frozen_actions.header to the action to make it easy to find it later.
356 * This action holds the current table ID and cookie so that they can be
357 * restored during a post-recirculation upcall translation.
358 *
359 * 4. Adds the action that prompted recirculation and any actions following
360 * it within the same flow to 'frozen_actions', so that they can be
361 * executed during a post-recirculation upcall translation.
362 *
363 * 5. Returns.
364 *
365 * 6. The action that prompted recirculation might be nested in a stack of
366 * nested "resubmit"s that have actions remaining. Each of these notices
367 * that we're exiting and freezing and responds by adding more
368 * OFPACT_UNROLL_XLATE actions to 'frozen_actions', as necessary,
369 * followed by any actions that were yet unprocessed.
370 *
371 * If we're freezing because of recirculation, the caller generates a
372 * recirculation ID and associates all the state produced by this process
373 * with it. For post-recirculation upcall translation, the caller passes it
374 * back in for the new translation to execute. The process yielded a set of
375 * ofpacts that can be translated directly, so it is not much of a special
376 * case at that point.
377 */
378 bool freezing;
379 bool recirc_update_dp_hash; /* Generated recirculation will be preceded
380 * by datapath HASH action to get an updated
381 * dp_hash after recirculation. */
382 uint32_t dp_hash_alg;
383 uint32_t dp_hash_basis;
384 struct ofpbuf frozen_actions;
385 const struct ofpact_controller *pause;
386
387 /* True if a packet was but is no longer MPLS (due to an MPLS pop action).
388 * This is a trigger for recirculation in cases where translating an action
389 * or looking up a flow requires access to the fields of the packet after
390 * the MPLS label stack that was originally present. */
391 bool was_mpls;
392
393 /* True if conntrack has been performed on this packet during processing
394 * on the current bridge. This is used to determine whether conntrack
395 * state from the datapath should be honored after thawing. */
396 bool conntracked;
397
398 /* Pointer to an embedded NAT action in a conntrack action, or NULL. */
399 struct ofpact_nat *ct_nat_action;
400
401 /* OpenFlow 1.1+ action set.
402 *
403 * 'action_set' accumulates "struct ofpact"s added by OFPACT_WRITE_ACTIONS.
404 * When translation is otherwise complete, ofpacts_execute_action_set()
405 * converts it to a set of "struct ofpact"s that can be translated into
406 * datapath actions. */
407 bool action_set_has_group; /* Action set contains OFPACT_GROUP? */
408 struct ofpbuf action_set; /* Action set. */
409
410 enum xlate_error error; /* Translation failed. */
411 };
412
413 /* Structure to track VLAN manipulation */
414 struct xvlan_single {
415 uint16_t tpid;
416 uint16_t vid;
417 uint16_t pcp;
418 };
419
420 struct xvlan {
421 struct xvlan_single v[FLOW_MAX_VLAN_HEADERS];
422 };
423
424 const char *xlate_strerror(enum xlate_error error)
425 {
426 switch (error) {
427 case XLATE_OK:
428 return "OK";
429 case XLATE_BRIDGE_NOT_FOUND:
430 return "Bridge not found";
431 case XLATE_RECURSION_TOO_DEEP:
432 return "Recursion too deep";
433 case XLATE_TOO_MANY_RESUBMITS:
434 return "Too many resubmits";
435 case XLATE_STACK_TOO_DEEP:
436 return "Stack too deep";
437 case XLATE_NO_RECIRCULATION_CONTEXT:
438 return "No recirculation context";
439 case XLATE_RECIRCULATION_CONFLICT:
440 return "Recirculation conflict";
441 case XLATE_TOO_MANY_MPLS_LABELS:
442 return "Too many MPLS labels";
443 case XLATE_INVALID_TUNNEL_METADATA:
444 return "Invalid tunnel metadata";
445 case XLATE_UNSUPPORTED_PACKET_TYPE:
446 return "Unsupported packet type";
447 }
448 return "Unknown error";
449 }
450
451 static void xlate_action_set(struct xlate_ctx *ctx);
452 static void xlate_commit_actions(struct xlate_ctx *ctx);
453
454 static void
455 patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev,
456 struct xport *out_dev);
457
458 static void
459 ctx_trigger_freeze(struct xlate_ctx *ctx)
460 {
461 ctx->exit = true;
462 ctx->freezing = true;
463 }
464
465 static void
466 ctx_trigger_recirculate_with_hash(struct xlate_ctx *ctx, uint32_t type,
467 uint32_t basis)
468 {
469 ctx->exit = true;
470 ctx->freezing = true;
471 ctx->recirc_update_dp_hash = true;
472 ctx->dp_hash_alg = type;
473 ctx->dp_hash_basis = basis;
474 }
475
476 static bool
477 ctx_first_frozen_action(const struct xlate_ctx *ctx)
478 {
479 return !ctx->frozen_actions.size;
480 }
481
482 static void
483 ctx_cancel_freeze(struct xlate_ctx *ctx)
484 {
485 if (ctx->freezing) {
486 ctx->freezing = false;
487 ctx->recirc_update_dp_hash = false;
488 ofpbuf_clear(&ctx->frozen_actions);
489 ctx->frozen_actions.header = NULL;
490 }
491 }
492
493 static void finish_freezing(struct xlate_ctx *ctx);
494
495 /* A controller may use OFPP_NONE as the ingress port to indicate that
496 * it did not arrive on a "real" port. 'ofpp_none_bundle' exists for
497 * when an input bundle is needed for validation (e.g., mirroring or
498 * OFPP_NORMAL processing). It is not connected to an 'ofproto' or have
499 * any 'port' structs, so care must be taken when dealing with it. */
500 static struct xbundle ofpp_none_bundle = {
501 .name = "OFPP_NONE",
502 .vlan_mode = PORT_VLAN_TRUNK
503 };
504
505 /* Node in 'xport''s 'skb_priorities' map. Used to maintain a map from
506 * 'priority' (the datapath's term for QoS queue) to the dscp bits which all
507 * traffic egressing the 'ofport' with that priority should be marked with. */
508 struct skb_priority_to_dscp {
509 struct hmap_node hmap_node; /* Node in 'ofport_dpif''s 'skb_priorities'. */
510 uint32_t skb_priority; /* Priority of this queue (see struct flow). */
511
512 uint8_t dscp; /* DSCP bits to mark outgoing traffic with. */
513 };
514
515 /* Xlate config contains hash maps of all bridges, bundles and ports.
516 * Xcfgp contains the pointer to the current xlate configuration.
517 * When the main thread needs to change the configuration, it copies xcfgp to
518 * new_xcfg and edits new_xcfg. This enables the use of RCU locking which
519 * does not block handler and revalidator threads. */
520 struct xlate_cfg {
521 struct hmap xbridges;
522 struct hmap xbundles;
523 struct hmap xports;
524 struct hmap xports_uuid;
525 };
526 static OVSRCU_TYPE(struct xlate_cfg *) xcfgp = OVSRCU_INITIALIZER(NULL);
527 static struct xlate_cfg *new_xcfg = NULL;
528
529 typedef void xlate_actions_handler(const struct ofpact *, size_t ofpacts_len,
530 struct xlate_ctx *, bool, bool);
531 static bool may_receive(const struct xport *, struct xlate_ctx *);
532 static void do_xlate_actions(const struct ofpact *, size_t ofpacts_len,
533 struct xlate_ctx *, bool, bool);
534 static void clone_xlate_actions(const struct ofpact *, size_t ofpacts_len,
535 struct xlate_ctx *, bool, bool);
536 static void xlate_normal(struct xlate_ctx *);
537 static void xlate_table_action(struct xlate_ctx *, ofp_port_t in_port,
538 uint8_t table_id, bool may_packet_in,
539 bool honor_table_miss, bool with_ct_orig,
540 bool is_last_action, xlate_actions_handler *);
541
542 static bool input_vid_is_valid(const struct xlate_ctx *,
543 uint16_t vid, struct xbundle *);
544 static void xvlan_copy(struct xvlan *dst, const struct xvlan *src);
545 static void xvlan_pop(struct xvlan *src);
546 static void xvlan_push_uninit(struct xvlan *src);
547 static void xvlan_extract(const struct flow *, struct xvlan *);
548 static void xvlan_put(struct flow *, const struct xvlan *);
549 static void xvlan_input_translate(const struct xbundle *,
550 const struct xvlan *in,
551 struct xvlan *xvlan);
552 static void xvlan_output_translate(const struct xbundle *,
553 const struct xvlan *xvlan,
554 struct xvlan *out);
555 static void output_normal(struct xlate_ctx *, const struct xbundle *,
556 const struct xvlan *);
557
558 /* Optional bond recirculation parameter to compose_output_action(). */
559 struct xlate_bond_recirc {
560 uint32_t recirc_id; /* !0 Use recirculation instead of output. */
561 uint8_t hash_alg; /* !0 Compute hash for recirc before. */
562 uint32_t hash_basis; /* Compute hash for recirc before. */
563 };
564
565 static void compose_output_action(struct xlate_ctx *, ofp_port_t ofp_port,
566 const struct xlate_bond_recirc *xr,
567 bool is_last_action, bool truncate);
568
569 static struct xbridge *xbridge_lookup(struct xlate_cfg *,
570 const struct ofproto_dpif *);
571 static struct xbridge *xbridge_lookup_by_uuid(struct xlate_cfg *,
572 const struct uuid *);
573 static struct xbundle *xbundle_lookup(struct xlate_cfg *,
574 const struct ofbundle *);
575 static struct xport *xport_lookup(struct xlate_cfg *,
576 const struct ofport_dpif *);
577 static struct xport *xport_lookup_by_uuid(struct xlate_cfg *,
578 const struct uuid *);
579 static struct xport *get_ofp_port(const struct xbridge *, ofp_port_t ofp_port);
580 static struct skb_priority_to_dscp *get_skb_priority(const struct xport *,
581 uint32_t skb_priority);
582 static void clear_skb_priorities(struct xport *);
583 static size_t count_skb_priorities(const struct xport *);
584 static bool dscp_from_skb_priority(const struct xport *, uint32_t skb_priority,
585 uint8_t *dscp);
586
587 static void xlate_xbridge_init(struct xlate_cfg *, struct xbridge *);
588 static void xlate_xbundle_init(struct xlate_cfg *, struct xbundle *);
589 static void xlate_xport_init(struct xlate_cfg *, struct xport *);
590 static void xlate_xbridge_set(struct xbridge *, struct dpif *,
591 const struct mac_learning *, struct stp *,
592 struct rstp *, const struct mcast_snooping *,
593 const struct mbridge *,
594 const struct dpif_sflow *,
595 const struct dpif_ipfix *,
596 const struct netflow *,
597 bool forward_bpdu, bool has_in_band,
598 const struct dpif_backer_support *,
599 const struct xbridge_addr *);
600 static void xlate_xbundle_set(struct xbundle *xbundle,
601 enum port_vlan_mode vlan_mode,
602 uint16_t qinq_ethtype, int vlan,
603 unsigned long *trunks, unsigned long *cvlans,
604 bool use_priority_tags,
605 const struct bond *bond, const struct lacp *lacp,
606 bool floodable, bool protected);
607 static void xlate_xport_set(struct xport *xport, odp_port_t odp_port,
608 const struct netdev *netdev, const struct cfm *cfm,
609 const struct bfd *bfd, const struct lldp *lldp,
610 int stp_port_no, const struct rstp_port *rstp_port,
611 enum ofputil_port_config config,
612 enum ofputil_port_state state, bool is_tunnel,
613 bool may_enable);
614 static void xlate_xbridge_remove(struct xlate_cfg *, struct xbridge *);
615 static void xlate_xbundle_remove(struct xlate_cfg *, struct xbundle *);
616 static void xlate_xport_remove(struct xlate_cfg *, struct xport *);
617 static void xlate_xbridge_copy(struct xbridge *);
618 static void xlate_xbundle_copy(struct xbridge *, struct xbundle *);
619 static void xlate_xport_copy(struct xbridge *, struct xbundle *,
620 struct xport *);
621 static void xlate_xcfg_free(struct xlate_cfg *);
622 \f
623 /* Tracing helpers. */
624
625 /* If tracing is enabled in 'ctx', creates a new trace node and appends it to
626 * the list of nodes maintained in ctx->xin. The new node has type 'type' and
627 * its text is created from 'format' by treating it as a printf format string.
628 * Returns the list of nodes embedded within the new trace node; ordinarily,
629 * the calleer can ignore this, but it is useful if the caller needs to nest
630 * more trace nodes within the new node.
631 *
632 * If tracing is not enabled, does nothing and returns NULL. */
633 static struct ovs_list * OVS_PRINTF_FORMAT(3, 4)
634 xlate_report(const struct xlate_ctx *ctx, enum oftrace_node_type type,
635 const char *format, ...)
636 {
637 struct ovs_list *subtrace = NULL;
638 if (OVS_UNLIKELY(ctx->xin->trace)) {
639 va_list args;
640 va_start(args, format);
641 char *text = xvasprintf(format, args);
642 subtrace = &oftrace_report(ctx->xin->trace, type, text)->subs;
643 va_end(args);
644 free(text);
645 }
646 return subtrace;
647 }
648
649 /* This is like xlate_report() for errors that are serious enough that we
650 * should log them even if we are not tracing. */
651 static void OVS_PRINTF_FORMAT(2, 3)
652 xlate_report_error(const struct xlate_ctx *ctx, const char *format, ...)
653 {
654 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
655 if (!OVS_UNLIKELY(ctx->xin->trace)
656 && (!ctx->xin->packet || VLOG_DROP_WARN(&rl))) {
657 return;
658 }
659
660 struct ds s = DS_EMPTY_INITIALIZER;
661 va_list args;
662 va_start(args, format);
663 ds_put_format_valist(&s, format, args);
664 va_end(args);
665
666 if (ctx->xin->trace) {
667 oftrace_report(ctx->xin->trace, OFT_ERROR, ds_cstr(&s));
668 } else {
669 ds_put_format(&s, " on bridge %s while processing ",
670 ctx->xbridge->name);
671 flow_format(&s, &ctx->base_flow, NULL);
672 VLOG_WARN("%s", ds_cstr(&s));
673 }
674 ds_destroy(&s);
675 }
676
677 /* This is like xlate_report() for messages that should be logged
678 at the info level (even when not tracing). */
679 static void OVS_PRINTF_FORMAT(2, 3)
680 xlate_report_info(const struct xlate_ctx *ctx, const char *format, ...)
681 {
682 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
683 if (!OVS_UNLIKELY(ctx->xin->trace)
684 && (!ctx->xin->packet || VLOG_DROP_INFO(&rl))) {
685 return;
686 }
687
688 struct ds s = DS_EMPTY_INITIALIZER;
689 va_list args;
690 va_start(args, format);
691 ds_put_format_valist(&s, format, args);
692 va_end(args);
693
694 if (ctx->xin->trace) {
695 oftrace_report(ctx->xin->trace, OFT_WARN, ds_cstr(&s));
696 } else {
697 ds_put_format(&s, " on bridge %s while processing ",
698 ctx->xbridge->name);
699 flow_format(&s, &ctx->base_flow, NULL);
700 VLOG_INFO("%s", ds_cstr(&s));
701 }
702 ds_destroy(&s);
703 }
704
705 /* This is like xlate_report() for messages that should be logged at debug
706 * level (even if we are not tracing) because they can be valuable for
707 * debugging. */
708 static void OVS_PRINTF_FORMAT(3, 4)
709 xlate_report_debug(const struct xlate_ctx *ctx, enum oftrace_node_type type,
710 const char *format, ...)
711 {
712 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300);
713 if (!OVS_UNLIKELY(ctx->xin->trace)
714 && (!ctx->xin->packet || VLOG_DROP_DBG(&rl))) {
715 return;
716 }
717
718 struct ds s = DS_EMPTY_INITIALIZER;
719 va_list args;
720 va_start(args, format);
721 ds_put_format_valist(&s, format, args);
722 va_end(args);
723
724 if (ctx->xin->trace) {
725 oftrace_report(ctx->xin->trace, type, ds_cstr(&s));
726 } else {
727 VLOG_DBG("bridge %s: %s", ctx->xbridge->name, ds_cstr(&s));
728 }
729 ds_destroy(&s);
730 }
731
732 /* If tracing is enabled in 'ctx', appends a node of the given 'type' to the
733 * trace, whose text is 'title' followed by a formatted version of the
734 * 'ofpacts_len' OpenFlow actions in 'ofpacts'.
735 *
736 * If tracing is not enabled, does nothing. */
737 static void
738 xlate_report_actions(const struct xlate_ctx *ctx, enum oftrace_node_type type,
739 const char *title,
740 const struct ofpact *ofpacts, size_t ofpacts_len)
741 {
742 if (OVS_UNLIKELY(ctx->xin->trace)) {
743 struct ds s = DS_EMPTY_INITIALIZER;
744 ds_put_format(&s, "%s: ", title);
745 struct ofpact_format_params fp = { .s = &s };
746 ofpacts_format(ofpacts, ofpacts_len, &fp);
747 oftrace_report(ctx->xin->trace, type, ds_cstr(&s));
748 ds_destroy(&s);
749 }
750 }
751
752 /* If tracing is enabled in 'ctx', appends a node of type OFT_DETAIL to the
753 * trace, whose the message is a formatted version of the OpenFlow action set.
754 * 'verb' should be "was" or "is", depending on whether the action set reported
755 * is the new action set or the old one.
756 *
757 * If tracing is not enabled, does nothing. */
758 static void
759 xlate_report_action_set(const struct xlate_ctx *ctx, const char *verb)
760 {
761 if (OVS_UNLIKELY(ctx->xin->trace)) {
762 struct ofpbuf action_list;
763 ofpbuf_init(&action_list, 0);
764 ofpacts_execute_action_set(&action_list, &ctx->action_set);
765 if (action_list.size) {
766 struct ds s = DS_EMPTY_INITIALIZER;
767 struct ofpact_format_params fp = { .s = &s };
768 ofpacts_format(action_list.data, action_list.size, &fp);
769 xlate_report(ctx, OFT_DETAIL, "action set %s: %s",
770 verb, ds_cstr(&s));
771 ds_destroy(&s);
772 } else {
773 xlate_report(ctx, OFT_DETAIL, "action set %s empty", verb);
774 }
775 ofpbuf_uninit(&action_list);
776 }
777 }
778
779
780 /* If tracing is enabled in 'ctx', appends a node representing 'rule' (in
781 * OpenFlow table 'table_id') to the trace and makes this node the parent for
782 * future trace nodes. The caller should save ctx->xin->trace before calling
783 * this function, then after tracing all of the activities under the table,
784 * restore its previous value.
785 *
786 * If tracing is not enabled, does nothing. */
787 static void
788 xlate_report_table(const struct xlate_ctx *ctx, struct rule_dpif *rule,
789 uint8_t table_id)
790 {
791 if (OVS_LIKELY(!ctx->xin->trace)) {
792 return;
793 }
794
795 struct ds s = DS_EMPTY_INITIALIZER;
796 ds_put_format(&s, "%2d. ", table_id);
797 if (rule == ctx->xin->ofproto->miss_rule) {
798 ds_put_cstr(&s, "No match, and a \"packet-in\" is called for.");
799 } else if (rule == ctx->xin->ofproto->no_packet_in_rule) {
800 ds_put_cstr(&s, "No match.");
801 } else if (rule == ctx->xin->ofproto->drop_frags_rule) {
802 ds_put_cstr(&s, "Packets are IP fragments and "
803 "the fragment handling mode is \"drop\".");
804 } else {
805 minimatch_format(&rule->up.cr.match,
806 ofproto_get_tun_tab(&ctx->xin->ofproto->up),
807 NULL, &s, OFP_DEFAULT_PRIORITY);
808 if (ds_last(&s) != ' ') {
809 ds_put_cstr(&s, ", ");
810 }
811 ds_put_format(&s, "priority %d", rule->up.cr.priority);
812 if (rule->up.flow_cookie) {
813 ds_put_format(&s, ", cookie %#"PRIx64,
814 ntohll(rule->up.flow_cookie));
815 }
816 }
817 ctx->xin->trace = &oftrace_report(ctx->xin->trace, OFT_TABLE,
818 ds_cstr(&s))->subs;
819 ds_destroy(&s);
820 }
821
822 /* If tracing is enabled in 'ctx', adds an OFT_DETAIL trace node to 'ctx'
823 * reporting the value of subfield 'sf'.
824 *
825 * If tracing is not enabled, does nothing. */
826 static void
827 xlate_report_subfield(const struct xlate_ctx *ctx,
828 const struct mf_subfield *sf)
829 {
830 if (OVS_UNLIKELY(ctx->xin->trace)) {
831 struct ds s = DS_EMPTY_INITIALIZER;
832 mf_format_subfield(sf, &s);
833 ds_put_cstr(&s, " is now ");
834
835 if (sf->ofs == 0 && sf->n_bits >= sf->field->n_bits) {
836 union mf_value value;
837 mf_get_value(sf->field, &ctx->xin->flow, &value);
838 mf_format(sf->field, &value, NULL, NULL, &s);
839 } else {
840 union mf_subvalue cst;
841 mf_read_subfield(sf, &ctx->xin->flow, &cst);
842 ds_put_hex(&s, &cst, sizeof cst);
843 }
844
845 xlate_report(ctx, OFT_DETAIL, "%s", ds_cstr(&s));
846
847 ds_destroy(&s);
848 }
849 }
850 \f
851 static void
852 xlate_xbridge_init(struct xlate_cfg *xcfg, struct xbridge *xbridge)
853 {
854 ovs_list_init(&xbridge->xbundles);
855 hmap_init(&xbridge->xports);
856 hmap_insert(&xcfg->xbridges, &xbridge->hmap_node,
857 hash_pointer(xbridge->ofproto, 0));
858 }
859
860 static void
861 xlate_xbundle_init(struct xlate_cfg *xcfg, struct xbundle *xbundle)
862 {
863 ovs_list_init(&xbundle->xports);
864 ovs_list_insert(&xbundle->xbridge->xbundles, &xbundle->list_node);
865 hmap_insert(&xcfg->xbundles, &xbundle->hmap_node,
866 hash_pointer(xbundle->ofbundle, 0));
867 }
868
869 static void
870 xlate_xport_init(struct xlate_cfg *xcfg, struct xport *xport)
871 {
872 hmap_init(&xport->skb_priorities);
873 hmap_insert(&xcfg->xports, &xport->hmap_node,
874 hash_pointer(xport->ofport, 0));
875 hmap_insert(&xport->xbridge->xports, &xport->ofp_node,
876 hash_ofp_port(xport->ofp_port));
877 hmap_insert(&xcfg->xports_uuid, &xport->uuid_node,
878 uuid_hash(&xport->uuid));
879 }
880
881 static struct xbridge_addr *
882 xbridge_addr_create(struct xbridge *xbridge)
883 {
884 struct xbridge_addr *xbridge_addr = xbridge->addr;
885 struct in6_addr *addr = NULL, *mask = NULL;
886 struct netdev *dev;
887 int err, n_addr = 0;
888
889 err = netdev_open(xbridge->name, NULL, &dev);
890 if (!err) {
891 err = netdev_get_addr_list(dev, &addr, &mask, &n_addr);
892 if (!err) {
893 if (!xbridge->addr ||
894 n_addr != xbridge->addr->n_addr ||
895 (xbridge->addr->addr && memcmp(addr, xbridge->addr->addr,
896 sizeof(*addr) * n_addr))) {
897 xbridge_addr = xzalloc(sizeof *xbridge_addr);
898 xbridge_addr->addr = addr;
899 xbridge_addr->n_addr = n_addr;
900 ovs_refcount_init(&xbridge_addr->ref_cnt);
901 } else {
902 free(addr);
903 }
904 free(mask);
905 }
906 netdev_close(dev);
907 }
908
909 return xbridge_addr;
910 }
911
912 static struct xbridge_addr *
913 xbridge_addr_ref(const struct xbridge_addr *addr_)
914 {
915 struct xbridge_addr *addr = CONST_CAST(struct xbridge_addr *, addr_);
916 if (addr) {
917 ovs_refcount_ref(&addr->ref_cnt);
918 }
919 return addr;
920 }
921
922 static void
923 xbridge_addr_unref(struct xbridge_addr *addr)
924 {
925 if (addr && ovs_refcount_unref_relaxed(&addr->ref_cnt) == 1) {
926 free(addr->addr);
927 free(addr);
928 }
929 }
930
931 static void
932 xlate_xbridge_set(struct xbridge *xbridge,
933 struct dpif *dpif,
934 const struct mac_learning *ml, struct stp *stp,
935 struct rstp *rstp, const struct mcast_snooping *ms,
936 const struct mbridge *mbridge,
937 const struct dpif_sflow *sflow,
938 const struct dpif_ipfix *ipfix,
939 const struct netflow *netflow,
940 bool forward_bpdu, bool has_in_band,
941 const struct dpif_backer_support *support,
942 const struct xbridge_addr *addr)
943 {
944 if (xbridge->ml != ml) {
945 mac_learning_unref(xbridge->ml);
946 xbridge->ml = mac_learning_ref(ml);
947 }
948
949 if (xbridge->ms != ms) {
950 mcast_snooping_unref(xbridge->ms);
951 xbridge->ms = mcast_snooping_ref(ms);
952 }
953
954 if (xbridge->mbridge != mbridge) {
955 mbridge_unref(xbridge->mbridge);
956 xbridge->mbridge = mbridge_ref(mbridge);
957 }
958
959 if (xbridge->sflow != sflow) {
960 dpif_sflow_unref(xbridge->sflow);
961 xbridge->sflow = dpif_sflow_ref(sflow);
962 }
963
964 if (xbridge->ipfix != ipfix) {
965 dpif_ipfix_unref(xbridge->ipfix);
966 xbridge->ipfix = dpif_ipfix_ref(ipfix);
967 }
968
969 if (xbridge->stp != stp) {
970 stp_unref(xbridge->stp);
971 xbridge->stp = stp_ref(stp);
972 }
973
974 if (xbridge->rstp != rstp) {
975 rstp_unref(xbridge->rstp);
976 xbridge->rstp = rstp_ref(rstp);
977 }
978
979 if (xbridge->netflow != netflow) {
980 netflow_unref(xbridge->netflow);
981 xbridge->netflow = netflow_ref(netflow);
982 }
983
984 if (xbridge->addr != addr) {
985 xbridge_addr_unref(xbridge->addr);
986 xbridge->addr = xbridge_addr_ref(addr);
987 }
988
989 xbridge->dpif = dpif;
990 xbridge->forward_bpdu = forward_bpdu;
991 xbridge->has_in_band = has_in_band;
992 xbridge->support = *support;
993 }
994
995 static void
996 xlate_xbundle_set(struct xbundle *xbundle,
997 enum port_vlan_mode vlan_mode, uint16_t qinq_ethtype,
998 int vlan, unsigned long *trunks, unsigned long *cvlans,
999 bool use_priority_tags,
1000 const struct bond *bond, const struct lacp *lacp,
1001 bool floodable, bool protected)
1002 {
1003 ovs_assert(xbundle->xbridge);
1004
1005 xbundle->vlan_mode = vlan_mode;
1006 xbundle->qinq_ethtype = qinq_ethtype;
1007 xbundle->vlan = vlan;
1008 xbundle->trunks = trunks;
1009 xbundle->cvlans = cvlans;
1010 xbundle->use_priority_tags = use_priority_tags;
1011 xbundle->floodable = floodable;
1012 xbundle->protected = protected;
1013
1014 if (xbundle->bond != bond) {
1015 bond_unref(xbundle->bond);
1016 xbundle->bond = bond_ref(bond);
1017 }
1018
1019 if (xbundle->lacp != lacp) {
1020 lacp_unref(xbundle->lacp);
1021 xbundle->lacp = lacp_ref(lacp);
1022 }
1023 }
1024
1025 static void
1026 xlate_xport_set(struct xport *xport, odp_port_t odp_port,
1027 const struct netdev *netdev, const struct cfm *cfm,
1028 const struct bfd *bfd, const struct lldp *lldp, int stp_port_no,
1029 const struct rstp_port* rstp_port,
1030 enum ofputil_port_config config, enum ofputil_port_state state,
1031 bool is_tunnel, bool may_enable)
1032 {
1033 xport->config = config;
1034 xport->state = state;
1035 xport->stp_port_no = stp_port_no;
1036 xport->is_tunnel = is_tunnel;
1037 xport->pt_mode = netdev_get_pt_mode(netdev);
1038 xport->may_enable = may_enable;
1039 xport->odp_port = odp_port;
1040
1041 if (xport->rstp_port != rstp_port) {
1042 rstp_port_unref(xport->rstp_port);
1043 xport->rstp_port = rstp_port_ref(rstp_port);
1044 }
1045
1046 if (xport->cfm != cfm) {
1047 cfm_unref(xport->cfm);
1048 xport->cfm = cfm_ref(cfm);
1049 }
1050
1051 if (xport->bfd != bfd) {
1052 bfd_unref(xport->bfd);
1053 xport->bfd = bfd_ref(bfd);
1054 }
1055
1056 if (xport->lldp != lldp) {
1057 lldp_unref(xport->lldp);
1058 xport->lldp = lldp_ref(lldp);
1059 }
1060
1061 if (xport->netdev != netdev) {
1062 netdev_close(xport->netdev);
1063 xport->netdev = netdev_ref(netdev);
1064 }
1065 }
1066
1067 static void
1068 xlate_xbridge_copy(struct xbridge *xbridge)
1069 {
1070 struct xbundle *xbundle;
1071 struct xport *xport;
1072 struct xbridge *new_xbridge = xzalloc(sizeof *xbridge);
1073 new_xbridge->ofproto = xbridge->ofproto;
1074 new_xbridge->name = xstrdup(xbridge->name);
1075 xlate_xbridge_init(new_xcfg, new_xbridge);
1076
1077 xlate_xbridge_set(new_xbridge,
1078 xbridge->dpif, xbridge->ml, xbridge->stp,
1079 xbridge->rstp, xbridge->ms, xbridge->mbridge,
1080 xbridge->sflow, xbridge->ipfix, xbridge->netflow,
1081 xbridge->forward_bpdu, xbridge->has_in_band,
1082 &xbridge->support, xbridge->addr);
1083 LIST_FOR_EACH (xbundle, list_node, &xbridge->xbundles) {
1084 xlate_xbundle_copy(new_xbridge, xbundle);
1085 }
1086
1087 /* Copy xports which are not part of a xbundle */
1088 HMAP_FOR_EACH (xport, ofp_node, &xbridge->xports) {
1089 if (!xport->xbundle) {
1090 xlate_xport_copy(new_xbridge, NULL, xport);
1091 }
1092 }
1093 }
1094
1095 static void
1096 xlate_xbundle_copy(struct xbridge *xbridge, struct xbundle *xbundle)
1097 {
1098 struct xport *xport;
1099 struct xbundle *new_xbundle = xzalloc(sizeof *xbundle);
1100 new_xbundle->ofbundle = xbundle->ofbundle;
1101 new_xbundle->xbridge = xbridge;
1102 new_xbundle->name = xstrdup(xbundle->name);
1103 xlate_xbundle_init(new_xcfg, new_xbundle);
1104
1105 xlate_xbundle_set(new_xbundle, xbundle->vlan_mode, xbundle->qinq_ethtype,
1106 xbundle->vlan, xbundle->trunks, xbundle->cvlans,
1107 xbundle->use_priority_tags, xbundle->bond, xbundle->lacp,
1108 xbundle->floodable, xbundle->protected);
1109 LIST_FOR_EACH (xport, bundle_node, &xbundle->xports) {
1110 xlate_xport_copy(xbridge, new_xbundle, xport);
1111 }
1112 }
1113
1114 static void
1115 xlate_xport_copy(struct xbridge *xbridge, struct xbundle *xbundle,
1116 struct xport *xport)
1117 {
1118 struct skb_priority_to_dscp *pdscp, *new_pdscp;
1119 struct xport *new_xport = xzalloc(sizeof *xport);
1120 new_xport->ofport = xport->ofport;
1121 new_xport->ofp_port = xport->ofp_port;
1122 new_xport->xbridge = xbridge;
1123 new_xport->uuid = xport->uuid;
1124 xlate_xport_init(new_xcfg, new_xport);
1125
1126 xlate_xport_set(new_xport, xport->odp_port, xport->netdev, xport->cfm,
1127 xport->bfd, xport->lldp, xport->stp_port_no,
1128 xport->rstp_port, xport->config, xport->state,
1129 xport->is_tunnel, xport->may_enable);
1130
1131 if (xport->peer) {
1132 struct xport *peer = xport_lookup(new_xcfg, xport->peer->ofport);
1133 if (peer) {
1134 new_xport->peer = peer;
1135 new_xport->peer->peer = new_xport;
1136 }
1137 }
1138
1139 if (xbundle) {
1140 new_xport->xbundle = xbundle;
1141 ovs_list_insert(&new_xport->xbundle->xports, &new_xport->bundle_node);
1142 }
1143
1144 HMAP_FOR_EACH (pdscp, hmap_node, &xport->skb_priorities) {
1145 new_pdscp = xmalloc(sizeof *pdscp);
1146 new_pdscp->skb_priority = pdscp->skb_priority;
1147 new_pdscp->dscp = pdscp->dscp;
1148 hmap_insert(&new_xport->skb_priorities, &new_pdscp->hmap_node,
1149 hash_int(new_pdscp->skb_priority, 0));
1150 }
1151 }
1152
1153 /* Sets the current xlate configuration to new_xcfg and frees the old xlate
1154 * configuration in xcfgp.
1155 *
1156 * This needs to be called after editing the xlate configuration.
1157 *
1158 * Functions that edit the new xlate configuration are
1159 * xlate_<ofproto/bundle/ofport>_set and xlate_<ofproto/bundle/ofport>_remove.
1160 *
1161 * A sample workflow:
1162 *
1163 * xlate_txn_start();
1164 * ...
1165 * edit_xlate_configuration();
1166 * ...
1167 * xlate_txn_commit(); */
1168 void
1169 xlate_txn_commit(void)
1170 {
1171 struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
1172
1173 ovsrcu_set(&xcfgp, new_xcfg);
1174 ovsrcu_synchronize();
1175 xlate_xcfg_free(xcfg);
1176 new_xcfg = NULL;
1177 }
1178
1179 /* Copies the current xlate configuration in xcfgp to new_xcfg.
1180 *
1181 * This needs to be called prior to editing the xlate configuration. */
1182 void
1183 xlate_txn_start(void)
1184 {
1185 struct xbridge *xbridge;
1186 struct xlate_cfg *xcfg;
1187
1188 ovs_assert(!new_xcfg);
1189
1190 new_xcfg = xmalloc(sizeof *new_xcfg);
1191 hmap_init(&new_xcfg->xbridges);
1192 hmap_init(&new_xcfg->xbundles);
1193 hmap_init(&new_xcfg->xports);
1194 hmap_init(&new_xcfg->xports_uuid);
1195
1196 xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
1197 if (!xcfg) {
1198 return;
1199 }
1200
1201 HMAP_FOR_EACH (xbridge, hmap_node, &xcfg->xbridges) {
1202 xlate_xbridge_copy(xbridge);
1203 }
1204 }
1205
1206
1207 static void
1208 xlate_xcfg_free(struct xlate_cfg *xcfg)
1209 {
1210 struct xbridge *xbridge, *next_xbridge;
1211
1212 if (!xcfg) {
1213 return;
1214 }
1215
1216 HMAP_FOR_EACH_SAFE (xbridge, next_xbridge, hmap_node, &xcfg->xbridges) {
1217 xlate_xbridge_remove(xcfg, xbridge);
1218 }
1219
1220 hmap_destroy(&xcfg->xbridges);
1221 hmap_destroy(&xcfg->xbundles);
1222 hmap_destroy(&xcfg->xports);
1223 hmap_destroy(&xcfg->xports_uuid);
1224 free(xcfg);
1225 }
1226
1227 void
1228 xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name,
1229 struct dpif *dpif,
1230 const struct mac_learning *ml, struct stp *stp,
1231 struct rstp *rstp, const struct mcast_snooping *ms,
1232 const struct mbridge *mbridge,
1233 const struct dpif_sflow *sflow,
1234 const struct dpif_ipfix *ipfix,
1235 const struct netflow *netflow,
1236 bool forward_bpdu, bool has_in_band,
1237 const struct dpif_backer_support *support)
1238 {
1239 struct xbridge *xbridge;
1240 struct xbridge_addr *xbridge_addr, *old_addr;
1241
1242 ovs_assert(new_xcfg);
1243
1244 xbridge = xbridge_lookup(new_xcfg, ofproto);
1245 if (!xbridge) {
1246 xbridge = xzalloc(sizeof *xbridge);
1247 xbridge->ofproto = ofproto;
1248
1249 xlate_xbridge_init(new_xcfg, xbridge);
1250 }
1251
1252 free(xbridge->name);
1253 xbridge->name = xstrdup(name);
1254
1255 xbridge_addr = xbridge_addr_create(xbridge);
1256 old_addr = xbridge->addr;
1257
1258 xlate_xbridge_set(xbridge, dpif, ml, stp, rstp, ms, mbridge, sflow, ipfix,
1259 netflow, forward_bpdu, has_in_band, support,
1260 xbridge_addr);
1261
1262 if (xbridge_addr != old_addr) {
1263 xbridge_addr_unref(xbridge_addr);
1264 }
1265 }
1266
1267 static void
1268 xlate_xbridge_remove(struct xlate_cfg *xcfg, struct xbridge *xbridge)
1269 {
1270 struct xbundle *xbundle, *next_xbundle;
1271 struct xport *xport, *next_xport;
1272
1273 if (!xbridge) {
1274 return;
1275 }
1276
1277 HMAP_FOR_EACH_SAFE (xport, next_xport, ofp_node, &xbridge->xports) {
1278 xlate_xport_remove(xcfg, xport);
1279 }
1280
1281 LIST_FOR_EACH_SAFE (xbundle, next_xbundle, list_node, &xbridge->xbundles) {
1282 xlate_xbundle_remove(xcfg, xbundle);
1283 }
1284
1285 hmap_remove(&xcfg->xbridges, &xbridge->hmap_node);
1286 mac_learning_unref(xbridge->ml);
1287 mcast_snooping_unref(xbridge->ms);
1288 mbridge_unref(xbridge->mbridge);
1289 dpif_sflow_unref(xbridge->sflow);
1290 dpif_ipfix_unref(xbridge->ipfix);
1291 netflow_unref(xbridge->netflow);
1292 stp_unref(xbridge->stp);
1293 rstp_unref(xbridge->rstp);
1294 xbridge_addr_unref(xbridge->addr);
1295 hmap_destroy(&xbridge->xports);
1296 free(xbridge->name);
1297 free(xbridge);
1298 }
1299
1300 void
1301 xlate_remove_ofproto(struct ofproto_dpif *ofproto)
1302 {
1303 struct xbridge *xbridge;
1304
1305 ovs_assert(new_xcfg);
1306
1307 xbridge = xbridge_lookup(new_xcfg, ofproto);
1308 xlate_xbridge_remove(new_xcfg, xbridge);
1309 }
1310
1311 void
1312 xlate_bundle_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
1313 const char *name, enum port_vlan_mode vlan_mode,
1314 uint16_t qinq_ethtype, int vlan,
1315 unsigned long *trunks, unsigned long *cvlans,
1316 bool use_priority_tags,
1317 const struct bond *bond, const struct lacp *lacp,
1318 bool floodable, bool protected)
1319 {
1320 struct xbundle *xbundle;
1321
1322 ovs_assert(new_xcfg);
1323
1324 xbundle = xbundle_lookup(new_xcfg, ofbundle);
1325 if (!xbundle) {
1326 xbundle = xzalloc(sizeof *xbundle);
1327 xbundle->ofbundle = ofbundle;
1328 xbundle->xbridge = xbridge_lookup(new_xcfg, ofproto);
1329
1330 xlate_xbundle_init(new_xcfg, xbundle);
1331 }
1332
1333 free(xbundle->name);
1334 xbundle->name = xstrdup(name);
1335
1336 xlate_xbundle_set(xbundle, vlan_mode, qinq_ethtype, vlan, trunks, cvlans,
1337 use_priority_tags, bond, lacp, floodable, protected);
1338 }
1339
1340 static void
1341 xlate_xbundle_remove(struct xlate_cfg *xcfg, struct xbundle *xbundle)
1342 {
1343 struct xport *xport;
1344
1345 if (!xbundle) {
1346 return;
1347 }
1348
1349 LIST_FOR_EACH_POP (xport, bundle_node, &xbundle->xports) {
1350 xport->xbundle = NULL;
1351 }
1352
1353 hmap_remove(&xcfg->xbundles, &xbundle->hmap_node);
1354 ovs_list_remove(&xbundle->list_node);
1355 bond_unref(xbundle->bond);
1356 lacp_unref(xbundle->lacp);
1357 free(xbundle->name);
1358 free(xbundle);
1359 }
1360
1361 void
1362 xlate_bundle_remove(struct ofbundle *ofbundle)
1363 {
1364 struct xbundle *xbundle;
1365
1366 ovs_assert(new_xcfg);
1367
1368 xbundle = xbundle_lookup(new_xcfg, ofbundle);
1369 xlate_xbundle_remove(new_xcfg, xbundle);
1370 }
1371
1372 void
1373 xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
1374 struct ofport_dpif *ofport, ofp_port_t ofp_port,
1375 odp_port_t odp_port, const struct netdev *netdev,
1376 const struct cfm *cfm, const struct bfd *bfd,
1377 const struct lldp *lldp, struct ofport_dpif *peer,
1378 int stp_port_no, const struct rstp_port *rstp_port,
1379 const struct ofproto_port_queue *qdscp_list, size_t n_qdscp,
1380 enum ofputil_port_config config,
1381 enum ofputil_port_state state, bool is_tunnel,
1382 bool may_enable)
1383 {
1384 size_t i;
1385 struct xport *xport;
1386
1387 ovs_assert(new_xcfg);
1388
1389 xport = xport_lookup(new_xcfg, ofport);
1390 if (!xport) {
1391 xport = xzalloc(sizeof *xport);
1392 xport->ofport = ofport;
1393 xport->xbridge = xbridge_lookup(new_xcfg, ofproto);
1394 xport->ofp_port = ofp_port;
1395 uuid_generate(&xport->uuid);
1396
1397 xlate_xport_init(new_xcfg, xport);
1398 }
1399
1400 ovs_assert(xport->ofp_port == ofp_port);
1401
1402 xlate_xport_set(xport, odp_port, netdev, cfm, bfd, lldp,
1403 stp_port_no, rstp_port, config, state, is_tunnel,
1404 may_enable);
1405
1406 if (xport->peer) {
1407 xport->peer->peer = NULL;
1408 }
1409 xport->peer = xport_lookup(new_xcfg, peer);
1410 if (xport->peer) {
1411 xport->peer->peer = xport;
1412 }
1413
1414 if (xport->xbundle) {
1415 ovs_list_remove(&xport->bundle_node);
1416 }
1417 xport->xbundle = xbundle_lookup(new_xcfg, ofbundle);
1418 if (xport->xbundle) {
1419 ovs_list_insert(&xport->xbundle->xports, &xport->bundle_node);
1420 }
1421
1422 clear_skb_priorities(xport);
1423 for (i = 0; i < n_qdscp; i++) {
1424 struct skb_priority_to_dscp *pdscp;
1425 uint32_t skb_priority;
1426
1427 if (dpif_queue_to_priority(xport->xbridge->dpif, qdscp_list[i].queue,
1428 &skb_priority)) {
1429 continue;
1430 }
1431
1432 pdscp = xmalloc(sizeof *pdscp);
1433 pdscp->skb_priority = skb_priority;
1434 pdscp->dscp = (qdscp_list[i].dscp << 2) & IP_DSCP_MASK;
1435 hmap_insert(&xport->skb_priorities, &pdscp->hmap_node,
1436 hash_int(pdscp->skb_priority, 0));
1437 }
1438 }
1439
1440 static void
1441 xlate_xport_remove(struct xlate_cfg *xcfg, struct xport *xport)
1442 {
1443 if (!xport) {
1444 return;
1445 }
1446
1447 if (xport->peer) {
1448 xport->peer->peer = NULL;
1449 xport->peer = NULL;
1450 }
1451
1452 if (xport->xbundle) {
1453 ovs_list_remove(&xport->bundle_node);
1454 }
1455
1456 clear_skb_priorities(xport);
1457 hmap_destroy(&xport->skb_priorities);
1458
1459 hmap_remove(&xcfg->xports, &xport->hmap_node);
1460 hmap_remove(&xcfg->xports_uuid, &xport->uuid_node);
1461 hmap_remove(&xport->xbridge->xports, &xport->ofp_node);
1462
1463 netdev_close(xport->netdev);
1464 rstp_port_unref(xport->rstp_port);
1465 cfm_unref(xport->cfm);
1466 bfd_unref(xport->bfd);
1467 lldp_unref(xport->lldp);
1468 free(xport);
1469 }
1470
1471 void
1472 xlate_ofport_remove(struct ofport_dpif *ofport)
1473 {
1474 struct xport *xport;
1475
1476 ovs_assert(new_xcfg);
1477
1478 xport = xport_lookup(new_xcfg, ofport);
1479 xlate_xport_remove(new_xcfg, xport);
1480 }
1481
1482 static struct ofproto_dpif *
1483 xlate_lookup_ofproto_(const struct dpif_backer *backer, const struct flow *flow,
1484 ofp_port_t *ofp_in_port, const struct xport **xportp)
1485 {
1486 struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
1487 const struct xport *xport;
1488
1489 /* If packet is recirculated, xport can be retrieved from frozen state. */
1490 if (flow->recirc_id) {
1491 const struct recirc_id_node *recirc_id_node;
1492
1493 recirc_id_node = recirc_id_node_find(flow->recirc_id);
1494
1495 if (OVS_UNLIKELY(!recirc_id_node)) {
1496 return NULL;
1497 }
1498
1499 /* If recirculation was initiated due to bond (in_port = OFPP_NONE)
1500 * then frozen state is static and xport_uuid is not defined, so xport
1501 * cannot be restored from frozen state. */
1502 if (recirc_id_node->state.metadata.in_port != OFPP_NONE) {
1503 struct uuid xport_uuid = recirc_id_node->state.xport_uuid;
1504 xport = xport_lookup_by_uuid(xcfg, &xport_uuid);
1505 if (xport && xport->xbridge && xport->xbridge->ofproto) {
1506 goto out;
1507 }
1508 }
1509 }
1510
1511 xport = xport_lookup(xcfg, tnl_port_should_receive(flow)
1512 ? tnl_port_receive(flow)
1513 : odp_port_to_ofport(backer, flow->in_port.odp_port));
1514 if (OVS_UNLIKELY(!xport)) {
1515 return NULL;
1516 }
1517
1518 out:
1519 *xportp = xport;
1520 if (ofp_in_port) {
1521 *ofp_in_port = xport->ofp_port;
1522 }
1523 return xport->xbridge->ofproto;
1524 }
1525
1526 /* Given a datapath and flow metadata ('backer', and 'flow' respectively)
1527 * returns the corresponding struct ofproto_dpif and OpenFlow port number. */
1528 struct ofproto_dpif *
1529 xlate_lookup_ofproto(const struct dpif_backer *backer, const struct flow *flow,
1530 ofp_port_t *ofp_in_port)
1531 {
1532 const struct xport *xport;
1533
1534 return xlate_lookup_ofproto_(backer, flow, ofp_in_port, &xport);
1535 }
1536
1537 /* Given a datapath and flow metadata ('backer', and 'flow' respectively),
1538 * optionally populates 'ofprotop' with the ofproto_dpif, 'ofp_in_port' with the
1539 * openflow in_port, and 'ipfix', 'sflow', and 'netflow' with the appropriate
1540 * handles for those protocols if they're enabled. Caller may use the returned
1541 * pointers until quiescing, for longer term use additional references must
1542 * be taken.
1543 *
1544 * Returns 0 if successful, ENODEV if the parsed flow has no associated ofproto.
1545 */
1546 int
1547 xlate_lookup(const struct dpif_backer *backer, const struct flow *flow,
1548 struct ofproto_dpif **ofprotop, struct dpif_ipfix **ipfix,
1549 struct dpif_sflow **sflow, struct netflow **netflow,
1550 ofp_port_t *ofp_in_port)
1551 {
1552 struct ofproto_dpif *ofproto;
1553 const struct xport *xport;
1554
1555 ofproto = xlate_lookup_ofproto_(backer, flow, ofp_in_port, &xport);
1556
1557 if (!ofproto) {
1558 return ENODEV;
1559 }
1560
1561 if (ofprotop) {
1562 *ofprotop = ofproto;
1563 }
1564
1565 if (ipfix) {
1566 *ipfix = xport ? xport->xbridge->ipfix : NULL;
1567 }
1568
1569 if (sflow) {
1570 *sflow = xport ? xport->xbridge->sflow : NULL;
1571 }
1572
1573 if (netflow) {
1574 *netflow = xport ? xport->xbridge->netflow : NULL;
1575 }
1576
1577 return 0;
1578 }
1579
1580 static struct xbridge *
1581 xbridge_lookup(struct xlate_cfg *xcfg, const struct ofproto_dpif *ofproto)
1582 {
1583 struct hmap *xbridges;
1584 struct xbridge *xbridge;
1585
1586 if (!ofproto || !xcfg) {
1587 return NULL;
1588 }
1589
1590 xbridges = &xcfg->xbridges;
1591
1592 HMAP_FOR_EACH_IN_BUCKET (xbridge, hmap_node, hash_pointer(ofproto, 0),
1593 xbridges) {
1594 if (xbridge->ofproto == ofproto) {
1595 return xbridge;
1596 }
1597 }
1598 return NULL;
1599 }
1600
1601 static struct xbridge *
1602 xbridge_lookup_by_uuid(struct xlate_cfg *xcfg, const struct uuid *uuid)
1603 {
1604 struct xbridge *xbridge;
1605
1606 HMAP_FOR_EACH (xbridge, hmap_node, &xcfg->xbridges) {
1607 if (uuid_equals(&xbridge->ofproto->uuid, uuid)) {
1608 return xbridge;
1609 }
1610 }
1611 return NULL;
1612 }
1613
1614 static struct xbundle *
1615 xbundle_lookup(struct xlate_cfg *xcfg, const struct ofbundle *ofbundle)
1616 {
1617 struct hmap *xbundles;
1618 struct xbundle *xbundle;
1619
1620 if (!ofbundle || !xcfg) {
1621 return NULL;
1622 }
1623
1624 xbundles = &xcfg->xbundles;
1625
1626 HMAP_FOR_EACH_IN_BUCKET (xbundle, hmap_node, hash_pointer(ofbundle, 0),
1627 xbundles) {
1628 if (xbundle->ofbundle == ofbundle) {
1629 return xbundle;
1630 }
1631 }
1632 return NULL;
1633 }
1634
1635 static struct xport *
1636 xport_lookup(struct xlate_cfg *xcfg, const struct ofport_dpif *ofport)
1637 {
1638 struct hmap *xports;
1639 struct xport *xport;
1640
1641 if (!ofport || !xcfg) {
1642 return NULL;
1643 }
1644
1645 xports = &xcfg->xports;
1646
1647 HMAP_FOR_EACH_IN_BUCKET (xport, hmap_node, hash_pointer(ofport, 0),
1648 xports) {
1649 if (xport->ofport == ofport) {
1650 return xport;
1651 }
1652 }
1653 return NULL;
1654 }
1655
1656 static struct xport *
1657 xport_lookup_by_uuid(struct xlate_cfg *xcfg, const struct uuid *uuid)
1658 {
1659 struct hmap *xports;
1660 struct xport *xport;
1661
1662 if (uuid_is_zero(uuid) || !xcfg) {
1663 return NULL;
1664 }
1665
1666 xports = &xcfg->xports_uuid;
1667
1668 HMAP_FOR_EACH_IN_BUCKET (xport, uuid_node, uuid_hash(uuid), xports) {
1669 if (uuid_equals(&xport->uuid, uuid)) {
1670 return xport;
1671 }
1672 }
1673 return NULL;
1674 }
1675
1676 static struct stp_port *
1677 xport_get_stp_port(const struct xport *xport)
1678 {
1679 return xport->xbridge->stp && xport->stp_port_no != -1
1680 ? stp_get_port(xport->xbridge->stp, xport->stp_port_no)
1681 : NULL;
1682 }
1683
1684 static bool
1685 xport_stp_learn_state(const struct xport *xport)
1686 {
1687 struct stp_port *sp = xport_get_stp_port(xport);
1688 return sp
1689 ? stp_learn_in_state(stp_port_get_state(sp))
1690 : true;
1691 }
1692
1693 static bool
1694 xport_stp_forward_state(const struct xport *xport)
1695 {
1696 struct stp_port *sp = xport_get_stp_port(xport);
1697 return sp
1698 ? stp_forward_in_state(stp_port_get_state(sp))
1699 : true;
1700 }
1701
1702 static bool
1703 xport_stp_should_forward_bpdu(const struct xport *xport)
1704 {
1705 struct stp_port *sp = xport_get_stp_port(xport);
1706 return stp_should_forward_bpdu(sp ? stp_port_get_state(sp) : STP_DISABLED);
1707 }
1708
1709 /* Returns true if STP should process 'flow'. Sets fields in 'wc' that
1710 * were used to make the determination.*/
1711 static bool
1712 stp_should_process_flow(const struct flow *flow, struct flow_wildcards *wc)
1713 {
1714 /* is_stp() also checks dl_type, but dl_type is always set in 'wc'. */
1715 memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
1716 return is_stp(flow);
1717 }
1718
1719 static void
1720 stp_process_packet(const struct xport *xport, const struct dp_packet *packet)
1721 {
1722 struct stp_port *sp = xport_get_stp_port(xport);
1723 struct dp_packet payload = *packet;
1724 struct eth_header *eth = dp_packet_data(&payload);
1725
1726 /* Sink packets on ports that have STP disabled when the bridge has
1727 * STP enabled. */
1728 if (!sp || stp_port_get_state(sp) == STP_DISABLED) {
1729 return;
1730 }
1731
1732 /* Trim off padding on payload. */
1733 if (dp_packet_size(&payload) > ntohs(eth->eth_type) + ETH_HEADER_LEN) {
1734 dp_packet_set_size(&payload, ntohs(eth->eth_type) + ETH_HEADER_LEN);
1735 }
1736
1737 if (dp_packet_try_pull(&payload, ETH_HEADER_LEN + LLC_HEADER_LEN)) {
1738 stp_received_bpdu(sp, dp_packet_data(&payload), dp_packet_size(&payload));
1739 }
1740 }
1741
1742 static enum rstp_state
1743 xport_get_rstp_port_state(const struct xport *xport)
1744 {
1745 return xport->rstp_port
1746 ? rstp_port_get_state(xport->rstp_port)
1747 : RSTP_DISABLED;
1748 }
1749
1750 static bool
1751 xport_rstp_learn_state(const struct xport *xport)
1752 {
1753 return xport->xbridge->rstp && xport->rstp_port
1754 ? rstp_learn_in_state(xport_get_rstp_port_state(xport))
1755 : true;
1756 }
1757
1758 static bool
1759 xport_rstp_forward_state(const struct xport *xport)
1760 {
1761 return xport->xbridge->rstp && xport->rstp_port
1762 ? rstp_forward_in_state(xport_get_rstp_port_state(xport))
1763 : true;
1764 }
1765
1766 static bool
1767 xport_rstp_should_manage_bpdu(const struct xport *xport)
1768 {
1769 return rstp_should_manage_bpdu(xport_get_rstp_port_state(xport));
1770 }
1771
1772 static void
1773 rstp_process_packet(const struct xport *xport, const struct dp_packet *packet)
1774 {
1775 struct dp_packet payload = *packet;
1776 struct eth_header *eth = dp_packet_data(&payload);
1777
1778 /* Sink packets on ports that have no RSTP. */
1779 if (!xport->rstp_port) {
1780 return;
1781 }
1782
1783 /* Trim off padding on payload. */
1784 if (dp_packet_size(&payload) > ntohs(eth->eth_type) + ETH_HEADER_LEN) {
1785 dp_packet_set_size(&payload, ntohs(eth->eth_type) + ETH_HEADER_LEN);
1786 }
1787
1788 if (dp_packet_try_pull(&payload, ETH_HEADER_LEN + LLC_HEADER_LEN)) {
1789 rstp_port_received_bpdu(xport->rstp_port, dp_packet_data(&payload),
1790 dp_packet_size(&payload));
1791 }
1792 }
1793
1794 static struct xport *
1795 get_ofp_port(const struct xbridge *xbridge, ofp_port_t ofp_port)
1796 {
1797 struct xport *xport;
1798
1799 HMAP_FOR_EACH_IN_BUCKET (xport, ofp_node, hash_ofp_port(ofp_port),
1800 &xbridge->xports) {
1801 if (xport->ofp_port == ofp_port) {
1802 return xport;
1803 }
1804 }
1805 return NULL;
1806 }
1807
1808 static odp_port_t
1809 ofp_port_to_odp_port(const struct xbridge *xbridge, ofp_port_t ofp_port)
1810 {
1811 const struct xport *xport = get_ofp_port(xbridge, ofp_port);
1812 return xport ? xport->odp_port : ODPP_NONE;
1813 }
1814
1815 static bool
1816 odp_port_is_alive(const struct xlate_ctx *ctx, ofp_port_t ofp_port)
1817 {
1818 struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port);
1819 return xport && xport->may_enable;
1820 }
1821
1822 static struct ofputil_bucket *
1823 group_first_live_bucket(const struct xlate_ctx *, const struct group_dpif *,
1824 int depth);
1825
1826 static bool
1827 group_is_alive(const struct xlate_ctx *ctx, uint32_t group_id, int depth)
1828 {
1829 struct group_dpif *group;
1830
1831 group = group_dpif_lookup(ctx->xbridge->ofproto, group_id,
1832 ctx->xin->tables_version, false);
1833 if (group) {
1834 return group_first_live_bucket(ctx, group, depth) != NULL;
1835 }
1836
1837 return false;
1838 }
1839
1840 #define MAX_LIVENESS_RECURSION 128 /* Arbitrary limit */
1841
1842 static bool
1843 bucket_is_alive(const struct xlate_ctx *ctx,
1844 struct ofputil_bucket *bucket, int depth)
1845 {
1846 if (depth >= MAX_LIVENESS_RECURSION) {
1847 xlate_report_error(ctx, "bucket chaining exceeded %d links",
1848 MAX_LIVENESS_RECURSION);
1849 return false;
1850 }
1851
1852 return (!ofputil_bucket_has_liveness(bucket)
1853 || (bucket->watch_port != OFPP_ANY
1854 && odp_port_is_alive(ctx, bucket->watch_port))
1855 || (bucket->watch_group != OFPG_ANY
1856 && group_is_alive(ctx, bucket->watch_group, depth + 1)));
1857 }
1858
1859 static void
1860 xlate_report_bucket_not_live(const struct xlate_ctx *ctx,
1861 const struct ofputil_bucket *bucket)
1862 {
1863 if (OVS_UNLIKELY(ctx->xin->trace)) {
1864 struct ds s = DS_EMPTY_INITIALIZER;
1865 if (bucket->watch_port != OFPP_ANY) {
1866 ds_put_cstr(&s, "port ");
1867 ofputil_format_port(bucket->watch_port, NULL, &s);
1868 }
1869 if (bucket->watch_group != OFPG_ANY) {
1870 if (s.length) {
1871 ds_put_cstr(&s, " and ");
1872 }
1873 ds_put_format(&s, "port %"PRIu32, bucket->watch_group);
1874 }
1875
1876 xlate_report(ctx, OFT_DETAIL, "bucket %"PRIu32": not live due to %s",
1877 bucket->bucket_id, ds_cstr(&s));
1878
1879 ds_destroy(&s);
1880 }
1881 }
1882
1883 static struct ofputil_bucket *
1884 group_first_live_bucket(const struct xlate_ctx *ctx,
1885 const struct group_dpif *group, int depth)
1886 {
1887 struct ofputil_bucket *bucket;
1888 LIST_FOR_EACH (bucket, list_node, &group->up.buckets) {
1889 if (bucket_is_alive(ctx, bucket, depth)) {
1890 return bucket;
1891 }
1892 xlate_report_bucket_not_live(ctx, bucket);
1893 }
1894
1895 return NULL;
1896 }
1897
1898 static struct ofputil_bucket *
1899 group_best_live_bucket(const struct xlate_ctx *ctx,
1900 const struct group_dpif *group,
1901 uint32_t basis)
1902 {
1903 struct ofputil_bucket *best_bucket = NULL;
1904 uint32_t best_score = 0;
1905
1906 struct ofputil_bucket *bucket;
1907 LIST_FOR_EACH (bucket, list_node, &group->up.buckets) {
1908 if (bucket_is_alive(ctx, bucket, 0)) {
1909 uint32_t score =
1910 (hash_int(bucket->bucket_id, basis) & 0xffff) * bucket->weight;
1911 if (score >= best_score) {
1912 best_bucket = bucket;
1913 best_score = score;
1914 }
1915 xlate_report(ctx, OFT_DETAIL, "bucket %"PRIu32": score %"PRIu32,
1916 bucket->bucket_id, score);
1917 } else {
1918 xlate_report_bucket_not_live(ctx, bucket);
1919 }
1920 }
1921
1922 return best_bucket;
1923 }
1924
1925 static bool
1926 xbundle_trunks_vlan(const struct xbundle *bundle, uint16_t vlan)
1927 {
1928 return (bundle->vlan_mode != PORT_VLAN_ACCESS
1929 && (!bundle->trunks || bitmap_is_set(bundle->trunks, vlan)));
1930 }
1931
1932 static bool
1933 xbundle_allows_cvlan(const struct xbundle *bundle, uint16_t vlan)
1934 {
1935 return (!bundle->cvlans || bitmap_is_set(bundle->cvlans, vlan));
1936 }
1937
1938 static bool
1939 xbundle_includes_vlan(const struct xbundle *xbundle, const struct xvlan *xvlan)
1940 {
1941 switch (xbundle->vlan_mode) {
1942 case PORT_VLAN_ACCESS:
1943 return xvlan->v[0].vid == xbundle->vlan && xvlan->v[1].vid == 0;
1944
1945 case PORT_VLAN_TRUNK:
1946 case PORT_VLAN_NATIVE_UNTAGGED:
1947 case PORT_VLAN_NATIVE_TAGGED:
1948 return xbundle_trunks_vlan(xbundle, xvlan->v[0].vid);
1949
1950 case PORT_VLAN_DOT1Q_TUNNEL:
1951 return xvlan->v[0].vid == xbundle->vlan &&
1952 xbundle_allows_cvlan(xbundle, xvlan->v[1].vid);
1953
1954 default:
1955 OVS_NOT_REACHED();
1956 }
1957 }
1958
1959 static mirror_mask_t
1960 xbundle_mirror_out(const struct xbridge *xbridge, struct xbundle *xbundle)
1961 {
1962 return xbundle != &ofpp_none_bundle
1963 ? mirror_bundle_out(xbridge->mbridge, xbundle->ofbundle)
1964 : 0;
1965 }
1966
1967 static mirror_mask_t
1968 xbundle_mirror_src(const struct xbridge *xbridge, struct xbundle *xbundle)
1969 {
1970 return xbundle != &ofpp_none_bundle
1971 ? mirror_bundle_src(xbridge->mbridge, xbundle->ofbundle)
1972 : 0;
1973 }
1974
1975 static mirror_mask_t
1976 xbundle_mirror_dst(const struct xbridge *xbridge, struct xbundle *xbundle)
1977 {
1978 return xbundle != &ofpp_none_bundle
1979 ? mirror_bundle_dst(xbridge->mbridge, xbundle->ofbundle)
1980 : 0;
1981 }
1982
1983 static struct xbundle *
1984 lookup_input_bundle__(const struct xbridge *xbridge,
1985 ofp_port_t in_port, struct xport **in_xportp)
1986 {
1987 struct xport *xport;
1988
1989 /* Find the port and bundle for the received packet. */
1990 xport = get_ofp_port(xbridge, in_port);
1991 if (in_xportp) {
1992 *in_xportp = xport;
1993 }
1994 if (xport && xport->xbundle) {
1995 return xport->xbundle;
1996 }
1997
1998 /* Special-case OFPP_NONE (OF1.0) and OFPP_CONTROLLER (OF1.1+),
1999 * which a controller may use as the ingress port for traffic that
2000 * it is sourcing. */
2001 if (in_port == OFPP_CONTROLLER || in_port == OFPP_NONE) {
2002 return &ofpp_none_bundle;
2003 }
2004 return NULL;
2005 }
2006
2007 static struct xbundle *
2008 lookup_input_bundle(const struct xlate_ctx *ctx,
2009 ofp_port_t in_port, struct xport **in_xportp)
2010 {
2011 struct xbundle *xbundle = lookup_input_bundle__(ctx->xbridge,
2012 in_port, in_xportp);
2013 if (!xbundle) {
2014 /* Odd. A few possible reasons here:
2015 *
2016 * - We deleted a port but there are still a few packets queued up
2017 * from it.
2018 *
2019 * - Someone externally added a port (e.g. "ovs-dpctl add-if") that
2020 * we don't know about.
2021 *
2022 * - The ofproto client didn't configure the port as part of a bundle.
2023 * This is particularly likely to happen if a packet was received on
2024 * the port after it was created, but before the client had a chance
2025 * to configure its bundle.
2026 */
2027 xlate_report_error(ctx, "received packet on unknown port %"PRIu32,
2028 in_port);
2029 }
2030 return xbundle;
2031 }
2032
2033 /* Mirrors the packet represented by 'ctx' to appropriate mirror destinations,
2034 * given the packet is ingressing or egressing on 'xbundle', which has ingress
2035 * or egress (as appropriate) mirrors 'mirrors'. */
2036 static void
2037 mirror_packet(struct xlate_ctx *ctx, struct xbundle *xbundle,
2038 mirror_mask_t mirrors)
2039 {
2040 struct xvlan in_xvlan;
2041 struct xvlan xvlan;
2042
2043 /* Figure out what VLAN the packet is in (because mirrors can select
2044 * packets on basis of VLAN). */
2045 xvlan_extract(&ctx->xin->flow, &in_xvlan);
2046 if (!input_vid_is_valid(ctx, in_xvlan.v[0].vid, xbundle)) {
2047 return;
2048 }
2049 xvlan_input_translate(xbundle, &in_xvlan, &xvlan);
2050
2051 const struct xbridge *xbridge = ctx->xbridge;
2052
2053 /* Don't mirror to destinations that we've already mirrored to. */
2054 mirrors &= ~ctx->mirrors;
2055 if (!mirrors) {
2056 return;
2057 }
2058
2059 if (ctx->xin->resubmit_stats) {
2060 mirror_update_stats(xbridge->mbridge, mirrors,
2061 ctx->xin->resubmit_stats->n_packets,
2062 ctx->xin->resubmit_stats->n_bytes);
2063 }
2064 if (ctx->xin->xcache) {
2065 struct xc_entry *entry;
2066
2067 entry = xlate_cache_add_entry(ctx->xin->xcache, XC_MIRROR);
2068 entry->mirror.mbridge = mbridge_ref(xbridge->mbridge);
2069 entry->mirror.mirrors = mirrors;
2070 }
2071
2072 /* 'mirrors' is a bit-mask of candidates for mirroring. Iterate as long as
2073 * some candidates remain. */
2074 while (mirrors) {
2075 const unsigned long *vlans;
2076 mirror_mask_t dup_mirrors;
2077 struct ofbundle *out;
2078 int out_vlan;
2079 int snaplen;
2080
2081 /* Get the details of the mirror represented by the rightmost 1-bit. */
2082 ovs_assert(mirror_get(xbridge->mbridge, raw_ctz(mirrors),
2083 &vlans, &dup_mirrors,
2084 &out, &snaplen, &out_vlan));
2085
2086
2087 /* If this mirror selects on the basis of VLAN, and it does not select
2088 * 'vlan', then discard this mirror and go on to the next one. */
2089 if (vlans) {
2090 ctx->wc->masks.vlans[0].tci |= htons(VLAN_CFI | VLAN_VID_MASK);
2091 }
2092 if (vlans && !bitmap_is_set(vlans, xvlan.v[0].vid)) {
2093 mirrors = zero_rightmost_1bit(mirrors);
2094 continue;
2095 }
2096
2097 /* Record the mirror, and the mirrors that output to the same
2098 * destination, so that we don't mirror to them again. This must be
2099 * done now to ensure that output_normal(), below, doesn't recursively
2100 * output to the same mirrors. */
2101 ctx->mirrors |= dup_mirrors;
2102 ctx->mirror_snaplen = snaplen;
2103
2104 /* Send the packet to the mirror. */
2105 if (out) {
2106 struct xbundle *out_xbundle = xbundle_lookup(ctx->xcfg, out);
2107 if (out_xbundle) {
2108 output_normal(ctx, out_xbundle, &xvlan);
2109 }
2110 } else if (xvlan.v[0].vid != out_vlan
2111 && !eth_addr_is_reserved(ctx->xin->flow.dl_dst)) {
2112 struct xbundle *xb;
2113 uint16_t old_vid = xvlan.v[0].vid;
2114
2115 xvlan.v[0].vid = out_vlan;
2116 LIST_FOR_EACH (xb, list_node, &xbridge->xbundles) {
2117 if (xbundle_includes_vlan(xb, &xvlan)
2118 && !xbundle_mirror_out(xbridge, xb)) {
2119 output_normal(ctx, xb, &xvlan);
2120 }
2121 }
2122 xvlan.v[0].vid = old_vid;
2123 }
2124
2125 /* output_normal() could have recursively output (to different
2126 * mirrors), so make sure that we don't send duplicates. */
2127 mirrors &= ~ctx->mirrors;
2128 ctx->mirror_snaplen = 0;
2129 }
2130 }
2131
2132 static void
2133 mirror_ingress_packet(struct xlate_ctx *ctx)
2134 {
2135 if (mbridge_has_mirrors(ctx->xbridge->mbridge)) {
2136 struct xbundle *xbundle = lookup_input_bundle(
2137 ctx, ctx->xin->flow.in_port.ofp_port, NULL);
2138 if (xbundle) {
2139 mirror_packet(ctx, xbundle,
2140 xbundle_mirror_src(ctx->xbridge, xbundle));
2141 }
2142 }
2143 }
2144
2145 /* Checks whether a packet with the given 'vid' may ingress on 'in_xbundle'.
2146 * If so, returns true. Otherwise, returns false.
2147 *
2148 * 'vid' should be the VID obtained from the 802.1Q header that was received as
2149 * part of a packet (specify 0 if there was no 802.1Q header), in the range
2150 * 0...4095. */
2151 static bool
2152 input_vid_is_valid(const struct xlate_ctx *ctx,
2153 uint16_t vid, struct xbundle *in_xbundle)
2154 {
2155 /* Allow any VID on the OFPP_NONE port. */
2156 if (in_xbundle == &ofpp_none_bundle) {
2157 return true;
2158 }
2159
2160 switch (in_xbundle->vlan_mode) {
2161 case PORT_VLAN_ACCESS:
2162 if (vid) {
2163 xlate_report_error(ctx, "dropping VLAN %"PRIu16" tagged "
2164 "packet received on port %s configured as VLAN "
2165 "%d access port", vid, in_xbundle->name,
2166 in_xbundle->vlan);
2167 return false;
2168 }
2169 return true;
2170
2171 case PORT_VLAN_NATIVE_UNTAGGED:
2172 case PORT_VLAN_NATIVE_TAGGED:
2173 if (!vid) {
2174 /* Port must always carry its native VLAN. */
2175 return true;
2176 }
2177 /* Fall through. */
2178 case PORT_VLAN_TRUNK:
2179 if (!xbundle_trunks_vlan(in_xbundle, vid)) {
2180 xlate_report_error(ctx, "dropping VLAN %"PRIu16" packet "
2181 "received on port %s not configured for "
2182 "trunking VLAN %"PRIu16,
2183 vid, in_xbundle->name, vid);
2184 return false;
2185 }
2186 return true;
2187
2188 case PORT_VLAN_DOT1Q_TUNNEL:
2189 if (!xbundle_allows_cvlan(in_xbundle, vid)) {
2190 xlate_report_error(ctx, "dropping VLAN %"PRIu16" packet received "
2191 "on dot1q-tunnel port %s that excludes this "
2192 "VLAN", vid, in_xbundle->name);
2193 return false;
2194 }
2195 return true;
2196
2197 default:
2198 OVS_NOT_REACHED();
2199 }
2200
2201 }
2202
2203 static void
2204 xvlan_copy(struct xvlan *dst, const struct xvlan *src)
2205 {
2206 *dst = *src;
2207 }
2208
2209 static void
2210 xvlan_pop(struct xvlan *src)
2211 {
2212 memmove(&src->v[0], &src->v[1], sizeof(src->v) - sizeof(src->v[0]));
2213 memset(&src->v[FLOW_MAX_VLAN_HEADERS - 1], 0,
2214 sizeof(src->v[FLOW_MAX_VLAN_HEADERS - 1]));
2215 }
2216
2217 static void
2218 xvlan_push_uninit(struct xvlan *src)
2219 {
2220 memmove(&src->v[1], &src->v[0], sizeof(src->v) - sizeof(src->v[0]));
2221 memset(&src->v[0], 0, sizeof(src->v[0]));
2222 }
2223
2224 /* Extract VLAN information (headers) from flow */
2225 static void
2226 xvlan_extract(const struct flow *flow, struct xvlan *xvlan)
2227 {
2228 int i;
2229 memset(xvlan, 0, sizeof(*xvlan));
2230 for (i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
2231 if (!eth_type_vlan(flow->vlans[i].tpid) ||
2232 !(flow->vlans[i].tci & htons(VLAN_CFI))) {
2233 break;
2234 }
2235 xvlan->v[i].tpid = ntohs(flow->vlans[i].tpid);
2236 xvlan->v[i].vid = vlan_tci_to_vid(flow->vlans[i].tci);
2237 xvlan->v[i].pcp = ntohs(flow->vlans[i].tci) & VLAN_PCP_MASK;
2238 }
2239 }
2240
2241 /* Put VLAN information (headers) to flow */
2242 static void
2243 xvlan_put(struct flow *flow, const struct xvlan *xvlan)
2244 {
2245 ovs_be16 tci;
2246 int i;
2247 for (i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
2248 tci = htons(xvlan->v[i].vid | (xvlan->v[i].pcp & VLAN_PCP_MASK));
2249 if (tci) {
2250 tci |= htons(VLAN_CFI);
2251 flow->vlans[i].tpid = xvlan->v[i].tpid ?
2252 htons(xvlan->v[i].tpid) :
2253 htons(ETH_TYPE_VLAN_8021Q);
2254 }
2255 flow->vlans[i].tci = tci;
2256 }
2257 }
2258
2259 /* Given 'in_xvlan', extracted from the input 802.1Q headers received as part
2260 * of a packet, and 'in_xbundle', the bundle on which the packet was received,
2261 * returns the VLANs of the packet during bridge internal processing. */
2262 static void
2263 xvlan_input_translate(const struct xbundle *in_xbundle,
2264 const struct xvlan *in_xvlan, struct xvlan *xvlan)
2265 {
2266
2267 switch (in_xbundle->vlan_mode) {
2268 case PORT_VLAN_ACCESS:
2269 memset(xvlan, 0, sizeof(*xvlan));
2270 xvlan->v[0].tpid = in_xvlan->v[0].tpid ? in_xvlan->v[0].tpid :
2271 ETH_TYPE_VLAN_8021Q;
2272 xvlan->v[0].vid = in_xbundle->vlan;
2273 xvlan->v[0].pcp = in_xvlan->v[0].pcp;
2274 break;
2275
2276 case PORT_VLAN_TRUNK:
2277 xvlan_copy(xvlan, in_xvlan);
2278 break;
2279
2280 case PORT_VLAN_NATIVE_UNTAGGED:
2281 case PORT_VLAN_NATIVE_TAGGED:
2282 xvlan_copy(xvlan, in_xvlan);
2283 if (!in_xvlan->v[0].vid) {
2284 xvlan->v[0].tpid = in_xvlan->v[0].tpid ? in_xvlan->v[0].tpid :
2285 ETH_TYPE_VLAN_8021Q;
2286 xvlan->v[0].vid = in_xbundle->vlan;
2287 xvlan->v[0].pcp = in_xvlan->v[0].pcp;
2288 }
2289 break;
2290
2291 case PORT_VLAN_DOT1Q_TUNNEL:
2292 xvlan_copy(xvlan, in_xvlan);
2293 xvlan_push_uninit(xvlan);
2294 xvlan->v[0].tpid = in_xbundle->qinq_ethtype;
2295 xvlan->v[0].vid = in_xbundle->vlan;
2296 xvlan->v[0].pcp = 0;
2297 break;
2298
2299 default:
2300 OVS_NOT_REACHED();
2301 }
2302 }
2303
2304 /* Given 'xvlan', the VLANs of a packet during internal processing, and
2305 * 'out_xbundle', a bundle on which the packet is to be output, returns the
2306 * VLANs that should be included in output packet. */
2307 static void
2308 xvlan_output_translate(const struct xbundle *out_xbundle,
2309 const struct xvlan *xvlan, struct xvlan *out_xvlan)
2310 {
2311 switch (out_xbundle->vlan_mode) {
2312 case PORT_VLAN_ACCESS:
2313 memset(out_xvlan, 0, sizeof(*out_xvlan));
2314 break;
2315
2316 case PORT_VLAN_TRUNK:
2317 case PORT_VLAN_NATIVE_TAGGED:
2318 xvlan_copy(out_xvlan, xvlan);
2319 break;
2320
2321 case PORT_VLAN_NATIVE_UNTAGGED:
2322 xvlan_copy(out_xvlan, xvlan);
2323 if (xvlan->v[0].vid == out_xbundle->vlan) {
2324 xvlan_pop(out_xvlan);
2325 }
2326 break;
2327
2328 case PORT_VLAN_DOT1Q_TUNNEL:
2329 xvlan_copy(out_xvlan, xvlan);
2330 xvlan_pop(out_xvlan);
2331 break;
2332
2333 default:
2334 OVS_NOT_REACHED();
2335 }
2336 }
2337
2338 /* If output xbundle is dot1q-tunnel, set mask bits of cvlan */
2339 static void
2340 check_and_set_cvlan_mask(struct flow_wildcards *wc,
2341 const struct xbundle *xbundle)
2342 {
2343 if (xbundle->vlan_mode == PORT_VLAN_DOT1Q_TUNNEL && xbundle->cvlans) {
2344 wc->masks.vlans[1].tci = htons(0xffff);
2345 }
2346 }
2347
2348 static void
2349 output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle,
2350 const struct xvlan *xvlan)
2351 {
2352 uint16_t vid;
2353 union flow_vlan_hdr old_vlans[FLOW_MAX_VLAN_HEADERS];
2354 struct xport *xport;
2355 struct xlate_bond_recirc xr;
2356 bool use_recirc = false;
2357 struct xvlan out_xvlan;
2358
2359 check_and_set_cvlan_mask(ctx->wc, out_xbundle);
2360
2361 xvlan_output_translate(out_xbundle, xvlan, &out_xvlan);
2362 if (out_xbundle->use_priority_tags) {
2363 out_xvlan.v[0].pcp = ntohs(ctx->xin->flow.vlans[0].tci) &
2364 VLAN_PCP_MASK;
2365 }
2366 vid = out_xvlan.v[0].vid;
2367 if (ovs_list_is_empty(&out_xbundle->xports)) {
2368 /* Partially configured bundle with no slaves. Drop the packet. */
2369 return;
2370 } else if (!out_xbundle->bond) {
2371 xport = CONTAINER_OF(ovs_list_front(&out_xbundle->xports), struct xport,
2372 bundle_node);
2373 } else {
2374 struct flow_wildcards *wc = ctx->wc;
2375 struct ofport_dpif *ofport;
2376
2377 if (ctx->xbridge->support.odp.recirc) {
2378 /* In case recirculation is not actually in use, 'xr.recirc_id'
2379 * will be set to '0', since a valid 'recirc_id' can
2380 * not be zero. */
2381 bond_update_post_recirc_rules(out_xbundle->bond,
2382 &xr.recirc_id,
2383 &xr.hash_basis);
2384 if (xr.recirc_id) {
2385 /* Use recirculation instead of output. */
2386 use_recirc = true;
2387 xr.hash_alg = OVS_HASH_ALG_L4;
2388 /* Recirculation does not require unmasking hash fields. */
2389 wc = NULL;
2390 }
2391 }
2392
2393 ofport = bond_choose_output_slave(out_xbundle->bond,
2394 &ctx->xin->flow, wc, vid);
2395 xport = xport_lookup(ctx->xcfg, ofport);
2396
2397 if (!xport) {
2398 /* No slaves enabled, so drop packet. */
2399 return;
2400 }
2401
2402 /* If use_recirc is set, the main thread will handle stats
2403 * accounting for this bond. */
2404 if (!use_recirc) {
2405 if (ctx->xin->resubmit_stats) {
2406 bond_account(out_xbundle->bond, &ctx->xin->flow, vid,
2407 ctx->xin->resubmit_stats->n_bytes);
2408 }
2409 if (ctx->xin->xcache) {
2410 struct xc_entry *entry;
2411 struct flow *flow;
2412
2413 flow = &ctx->xin->flow;
2414 entry = xlate_cache_add_entry(ctx->xin->xcache, XC_BOND);
2415 entry->bond.bond = bond_ref(out_xbundle->bond);
2416 entry->bond.flow = xmemdup(flow, sizeof *flow);
2417 entry->bond.vid = vid;
2418 }
2419 }
2420 }
2421
2422 memcpy(&old_vlans, &ctx->xin->flow.vlans, sizeof(old_vlans));
2423 xvlan_put(&ctx->xin->flow, &out_xvlan);
2424
2425 compose_output_action(ctx, xport->ofp_port, use_recirc ? &xr : NULL,
2426 false, false);
2427 memcpy(&ctx->xin->flow.vlans, &old_vlans, sizeof(old_vlans));
2428 }
2429
2430 /* A VM broadcasts a gratuitous ARP to indicate that it has resumed after
2431 * migration. Older Citrix-patched Linux DomU used gratuitous ARP replies to
2432 * indicate this; newer upstream kernels use gratuitous ARP requests. */
2433 static bool
2434 is_gratuitous_arp(const struct flow *flow, struct flow_wildcards *wc)
2435 {
2436 if (flow->dl_type != htons(ETH_TYPE_ARP)) {
2437 return false;
2438 }
2439
2440 memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
2441 if (!eth_addr_is_broadcast(flow->dl_dst)) {
2442 return false;
2443 }
2444
2445 memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
2446 if (flow->nw_proto == ARP_OP_REPLY) {
2447 return true;
2448 } else if (flow->nw_proto == ARP_OP_REQUEST) {
2449 memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
2450 memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
2451
2452 return flow->nw_src == flow->nw_dst;
2453 } else {
2454 return false;
2455 }
2456 }
2457
2458 /* Determines whether packets in 'flow' within 'xbridge' should be forwarded or
2459 * dropped. Returns true if they may be forwarded, false if they should be
2460 * dropped.
2461 *
2462 * 'in_port' must be the xport that corresponds to flow->in_port.
2463 * 'in_port' must be part of a bundle (e.g. in_port->bundle must be nonnull).
2464 *
2465 * 'vlan' must be the VLAN that corresponds to flow->vlan_tci on 'in_port', as
2466 * returned by input_vid_to_vlan(). It must be a valid VLAN for 'in_port', as
2467 * checked by input_vid_is_valid().
2468 *
2469 * May also add tags to '*tags', although the current implementation only does
2470 * so in one special case.
2471 */
2472 static bool
2473 is_admissible(struct xlate_ctx *ctx, struct xport *in_port,
2474 uint16_t vlan)
2475 {
2476 struct xbundle *in_xbundle = in_port->xbundle;
2477 const struct xbridge *xbridge = ctx->xbridge;
2478 struct flow *flow = &ctx->xin->flow;
2479
2480 /* Drop frames for reserved multicast addresses
2481 * only if forward_bpdu option is absent. */
2482 if (!xbridge->forward_bpdu && eth_addr_is_reserved(flow->dl_dst)) {
2483 xlate_report(ctx, OFT_DETAIL,
2484 "packet has reserved destination MAC, dropping");
2485 return false;
2486 }
2487
2488 if (in_xbundle->bond) {
2489 struct mac_entry *mac;
2490
2491 switch (bond_check_admissibility(in_xbundle->bond, in_port->ofport,
2492 flow->dl_dst)) {
2493 case BV_ACCEPT:
2494 break;
2495
2496 case BV_DROP:
2497 xlate_report(ctx, OFT_DETAIL,
2498 "bonding refused admissibility, dropping");
2499 return false;
2500
2501 case BV_DROP_IF_MOVED:
2502 ovs_rwlock_rdlock(&xbridge->ml->rwlock);
2503 mac = mac_learning_lookup(xbridge->ml, flow->dl_src, vlan);
2504 if (mac
2505 && mac_entry_get_port(xbridge->ml, mac) != in_xbundle->ofbundle
2506 && (!is_gratuitous_arp(flow, ctx->wc)
2507 || mac_entry_is_grat_arp_locked(mac))) {
2508 ovs_rwlock_unlock(&xbridge->ml->rwlock);
2509 xlate_report(ctx, OFT_DETAIL,
2510 "SLB bond thinks this packet looped back, "
2511 "dropping");
2512 return false;
2513 }
2514 ovs_rwlock_unlock(&xbridge->ml->rwlock);
2515 break;
2516 }
2517 }
2518
2519 return true;
2520 }
2521
2522 static bool
2523 update_learning_table__(const struct xbridge *xbridge,
2524 struct xbundle *in_xbundle, struct eth_addr dl_src,
2525 int vlan, bool is_grat_arp)
2526 {
2527 return (in_xbundle == &ofpp_none_bundle
2528 || !mac_learning_update(xbridge->ml, dl_src, vlan,
2529 is_grat_arp,
2530 in_xbundle->bond != NULL,
2531 in_xbundle->ofbundle));
2532 }
2533
2534 static void
2535 update_learning_table(const struct xlate_ctx *ctx,
2536 struct xbundle *in_xbundle, struct eth_addr dl_src,
2537 int vlan, bool is_grat_arp)
2538 {
2539 if (!update_learning_table__(ctx->xbridge, in_xbundle, dl_src, vlan,
2540 is_grat_arp)) {
2541 xlate_report_debug(ctx, OFT_DETAIL, "learned that "ETH_ADDR_FMT" is "
2542 "on port %s in VLAN %d",
2543 ETH_ADDR_ARGS(dl_src), in_xbundle->name, vlan);
2544 }
2545 }
2546
2547 /* Updates multicast snooping table 'ms' given that a packet matching 'flow'
2548 * was received on 'in_xbundle' in 'vlan' and is either Report or Query. */
2549 static void
2550 update_mcast_snooping_table4__(const struct xlate_ctx *ctx,
2551 const struct flow *flow,
2552 struct mcast_snooping *ms, int vlan,
2553 struct xbundle *in_xbundle,
2554 const struct dp_packet *packet)
2555 OVS_REQ_WRLOCK(ms->rwlock)
2556 {
2557 const struct igmp_header *igmp;
2558 int count;
2559 size_t offset;
2560 ovs_be32 ip4 = flow->igmp_group_ip4;
2561
2562 offset = (char *) dp_packet_l4(packet) - (char *) dp_packet_data(packet);
2563 igmp = dp_packet_at(packet, offset, IGMP_HEADER_LEN);
2564 if (!igmp || csum(igmp, dp_packet_l4_size(packet)) != 0) {
2565 xlate_report_debug(ctx, OFT_DETAIL,
2566 "multicast snooping received bad IGMP "
2567 "checksum on port %s in VLAN %d",
2568 in_xbundle->name, vlan);
2569 return;
2570 }
2571
2572 switch (ntohs(flow->tp_src)) {
2573 case IGMP_HOST_MEMBERSHIP_REPORT:
2574 case IGMPV2_HOST_MEMBERSHIP_REPORT:
2575 if (mcast_snooping_add_group4(ms, ip4, vlan, in_xbundle->ofbundle)) {
2576 xlate_report_debug(ctx, OFT_DETAIL,
2577 "multicast snooping learned that "
2578 IP_FMT" is on port %s in VLAN %d",
2579 IP_ARGS(ip4), in_xbundle->name, vlan);
2580 }
2581 break;
2582 case IGMP_HOST_LEAVE_MESSAGE:
2583 if (mcast_snooping_leave_group4(ms, ip4, vlan, in_xbundle->ofbundle)) {
2584 xlate_report_debug(ctx, OFT_DETAIL, "multicast snooping leaving "
2585 IP_FMT" is on port %s in VLAN %d",
2586 IP_ARGS(ip4), in_xbundle->name, vlan);
2587 }
2588 break;
2589 case IGMP_HOST_MEMBERSHIP_QUERY:
2590 if (flow->nw_src && mcast_snooping_add_mrouter(ms, vlan,
2591 in_xbundle->ofbundle)) {
2592 xlate_report_debug(ctx, OFT_DETAIL, "multicast snooping query "
2593 "from "IP_FMT" is on port %s in VLAN %d",
2594 IP_ARGS(flow->nw_src), in_xbundle->name, vlan);
2595 }
2596 break;
2597 case IGMPV3_HOST_MEMBERSHIP_REPORT:
2598 count = mcast_snooping_add_report(ms, packet, vlan,
2599 in_xbundle->ofbundle);
2600 if (count) {
2601 xlate_report_debug(ctx, OFT_DETAIL, "multicast snooping processed "
2602 "%d addresses on port %s in VLAN %d",
2603 count, in_xbundle->name, vlan);
2604 }
2605 break;
2606 }
2607 }
2608
2609 static void
2610 update_mcast_snooping_table6__(const struct xlate_ctx *ctx,
2611 const struct flow *flow,
2612 struct mcast_snooping *ms, int vlan,
2613 struct xbundle *in_xbundle,
2614 const struct dp_packet *packet)
2615 OVS_REQ_WRLOCK(ms->rwlock)
2616 {
2617 const struct mld_header *mld;
2618 int count;
2619 size_t offset;
2620
2621 offset = (char *) dp_packet_l4(packet) - (char *) dp_packet_data(packet);
2622 mld = dp_packet_at(packet, offset, MLD_HEADER_LEN);
2623
2624 if (!mld ||
2625 packet_csum_upperlayer6(dp_packet_l3(packet),
2626 mld, IPPROTO_ICMPV6,
2627 dp_packet_l4_size(packet)) != 0) {
2628 xlate_report_debug(ctx, OFT_DETAIL, "multicast snooping received "
2629 "bad MLD checksum on port %s in VLAN %d",
2630 in_xbundle->name, vlan);
2631 return;
2632 }
2633
2634 switch (ntohs(flow->tp_src)) {
2635 case MLD_QUERY:
2636 if (!ipv6_addr_equals(&flow->ipv6_src, &in6addr_any)
2637 && mcast_snooping_add_mrouter(ms, vlan, in_xbundle->ofbundle)) {
2638 xlate_report_debug(ctx, OFT_DETAIL, "multicast snooping query on "
2639 "port %s in VLAN %d", in_xbundle->name, vlan);
2640 }
2641 break;
2642 case MLD_REPORT:
2643 case MLD_DONE:
2644 case MLD2_REPORT:
2645 count = mcast_snooping_add_mld(ms, packet, vlan, in_xbundle->ofbundle);
2646 if (count) {
2647 xlate_report_debug(ctx, OFT_DETAIL, "multicast snooping processed "
2648 "%d addresses on port %s in VLAN %d",
2649 count, in_xbundle->name, vlan);
2650 }
2651 break;
2652 }
2653 }
2654
2655 /* Updates multicast snooping table 'ms' given that a packet matching 'flow'
2656 * was received on 'in_xbundle' in 'vlan'. */
2657 static void
2658 update_mcast_snooping_table(const struct xlate_ctx *ctx,
2659 const struct flow *flow, int vlan,
2660 struct xbundle *in_xbundle,
2661 const struct dp_packet *packet)
2662 {
2663 struct mcast_snooping *ms = ctx->xbridge->ms;
2664 struct xbundle *mcast_xbundle;
2665 struct mcast_port_bundle *fport;
2666
2667 /* Don't learn the OFPP_NONE port. */
2668 if (in_xbundle == &ofpp_none_bundle) {
2669 return;
2670 }
2671
2672 /* Don't learn from flood ports */
2673 mcast_xbundle = NULL;
2674 ovs_rwlock_wrlock(&ms->rwlock);
2675 LIST_FOR_EACH(fport, node, &ms->fport_list) {
2676 mcast_xbundle = xbundle_lookup(ctx->xcfg, fport->port);
2677 if (mcast_xbundle == in_xbundle) {
2678 break;
2679 }
2680 }
2681
2682 if (!mcast_xbundle || mcast_xbundle != in_xbundle) {
2683 if (flow->dl_type == htons(ETH_TYPE_IP)) {
2684 update_mcast_snooping_table4__(ctx, flow, ms, vlan,
2685 in_xbundle, packet);
2686 } else {
2687 update_mcast_snooping_table6__(ctx, flow, ms, vlan,
2688 in_xbundle, packet);
2689 }
2690 }
2691 ovs_rwlock_unlock(&ms->rwlock);
2692 }
2693
2694 /* send the packet to ports having the multicast group learned */
2695 static void
2696 xlate_normal_mcast_send_group(struct xlate_ctx *ctx,
2697 struct mcast_snooping *ms OVS_UNUSED,
2698 struct mcast_group *grp,
2699 struct xbundle *in_xbundle,
2700 const struct xvlan *xvlan)
2701 OVS_REQ_RDLOCK(ms->rwlock)
2702 {
2703 struct mcast_group_bundle *b;
2704 struct xbundle *mcast_xbundle;
2705
2706 LIST_FOR_EACH(b, bundle_node, &grp->bundle_lru) {
2707 mcast_xbundle = xbundle_lookup(ctx->xcfg, b->port);
2708 if (mcast_xbundle && mcast_xbundle != in_xbundle) {
2709 xlate_report(ctx, OFT_DETAIL, "forwarding to mcast group port");
2710 output_normal(ctx, mcast_xbundle, xvlan);
2711 } else if (!mcast_xbundle) {
2712 xlate_report(ctx, OFT_WARN,
2713 "mcast group port is unknown, dropping");
2714 } else {
2715 xlate_report(ctx, OFT_DETAIL,
2716 "mcast group port is input port, dropping");
2717 }
2718 }
2719 }
2720
2721 /* send the packet to ports connected to multicast routers */
2722 static void
2723 xlate_normal_mcast_send_mrouters(struct xlate_ctx *ctx,
2724 struct mcast_snooping *ms,
2725 struct xbundle *in_xbundle,
2726 const struct xvlan *xvlan)
2727 OVS_REQ_RDLOCK(ms->rwlock)
2728 {
2729 struct mcast_mrouter_bundle *mrouter;
2730 struct xbundle *mcast_xbundle;
2731
2732 LIST_FOR_EACH(mrouter, mrouter_node, &ms->mrouter_lru) {
2733 mcast_xbundle = xbundle_lookup(ctx->xcfg, mrouter->port);
2734 if (mcast_xbundle && mcast_xbundle != in_xbundle
2735 && mrouter->vlan == xvlan->v[0].vid) {
2736 xlate_report(ctx, OFT_DETAIL, "forwarding to mcast router port");
2737 output_normal(ctx, mcast_xbundle, xvlan);
2738 } else if (!mcast_xbundle) {
2739 xlate_report(ctx, OFT_WARN,
2740 "mcast router port is unknown, dropping");
2741 } else if (mrouter->vlan != xvlan->v[0].vid) {
2742 xlate_report(ctx, OFT_DETAIL,
2743 "mcast router is on another vlan, dropping");
2744 } else {
2745 xlate_report(ctx, OFT_DETAIL,
2746 "mcast router port is input port, dropping");
2747 }
2748 }
2749 }
2750
2751 /* send the packet to ports flagged to be flooded */
2752 static void
2753 xlate_normal_mcast_send_fports(struct xlate_ctx *ctx,
2754 struct mcast_snooping *ms,
2755 struct xbundle *in_xbundle,
2756 const struct xvlan *xvlan)
2757 OVS_REQ_RDLOCK(ms->rwlock)
2758 {
2759 struct mcast_port_bundle *fport;
2760 struct xbundle *mcast_xbundle;
2761
2762 LIST_FOR_EACH(fport, node, &ms->fport_list) {
2763 mcast_xbundle = xbundle_lookup(ctx->xcfg, fport->port);
2764 if (mcast_xbundle && mcast_xbundle != in_xbundle) {
2765 xlate_report(ctx, OFT_DETAIL, "forwarding to mcast flood port");
2766 output_normal(ctx, mcast_xbundle, xvlan);
2767 } else if (!mcast_xbundle) {
2768 xlate_report(ctx, OFT_WARN,
2769 "mcast flood port is unknown, dropping");
2770 } else {
2771 xlate_report(ctx, OFT_DETAIL,
2772 "mcast flood port is input port, dropping");
2773 }
2774 }
2775 }
2776
2777 /* forward the Reports to configured ports */
2778 static void
2779 xlate_normal_mcast_send_rports(struct xlate_ctx *ctx,
2780 struct mcast_snooping *ms,
2781 struct xbundle *in_xbundle,
2782 const struct xvlan *xvlan)
2783 OVS_REQ_RDLOCK(ms->rwlock)
2784 {
2785 struct mcast_port_bundle *rport;
2786 struct xbundle *mcast_xbundle;
2787
2788 LIST_FOR_EACH(rport, node, &ms->rport_list) {
2789 mcast_xbundle = xbundle_lookup(ctx->xcfg, rport->port);
2790 if (mcast_xbundle
2791 && mcast_xbundle != in_xbundle
2792 && mcast_xbundle->ofbundle != in_xbundle->ofbundle) {
2793 xlate_report(ctx, OFT_DETAIL,
2794 "forwarding report to mcast flagged port");
2795 output_normal(ctx, mcast_xbundle, xvlan);
2796 } else if (!mcast_xbundle) {
2797 xlate_report(ctx, OFT_WARN,
2798 "mcast port is unknown, dropping the report");
2799 } else {
2800 xlate_report(ctx, OFT_DETAIL,
2801 "mcast port is input port, dropping the Report");
2802 }
2803 }
2804 }
2805
2806 static void
2807 xlate_normal_flood(struct xlate_ctx *ctx, struct xbundle *in_xbundle,
2808 struct xvlan *xvlan)
2809 {
2810 struct xbundle *xbundle;
2811
2812 LIST_FOR_EACH (xbundle, list_node, &ctx->xbridge->xbundles) {
2813 if (xbundle != in_xbundle
2814 && xbundle->ofbundle != in_xbundle->ofbundle
2815 && xbundle_includes_vlan(xbundle, xvlan)
2816 && xbundle->floodable
2817 && !xbundle_mirror_out(ctx->xbridge, xbundle)) {
2818 output_normal(ctx, xbundle, xvlan);
2819 }
2820 }
2821 ctx->nf_output_iface = NF_OUT_FLOOD;
2822 }
2823
2824 static bool
2825 is_ip_local_multicast(const struct flow *flow, struct flow_wildcards *wc)
2826 {
2827 if (flow->dl_type == htons(ETH_TYPE_IP)) {
2828 memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
2829 return ip_is_local_multicast(flow->nw_dst);
2830 } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2831 memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst);
2832 return ipv6_is_all_hosts(&flow->ipv6_dst);
2833 } else {
2834 return false;
2835 }
2836 }
2837
2838 static void
2839 xlate_normal(struct xlate_ctx *ctx)
2840 {
2841 struct flow_wildcards *wc = ctx->wc;
2842 struct flow *flow = &ctx->xin->flow;
2843 struct xbundle *in_xbundle;
2844 struct xport *in_port;
2845 struct mac_entry *mac;
2846 void *mac_port;
2847 struct xvlan in_xvlan;
2848 struct xvlan xvlan;
2849 uint16_t vlan;
2850
2851 memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src);
2852 memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
2853 wc->masks.vlans[0].tci |= htons(VLAN_VID_MASK | VLAN_CFI);
2854
2855 in_xbundle = lookup_input_bundle(ctx, flow->in_port.ofp_port, &in_port);
2856 if (!in_xbundle) {
2857 xlate_report(ctx, OFT_WARN, "no input bundle, dropping");
2858 return;
2859 }
2860
2861 /* Drop malformed frames. */
2862 if (eth_type_vlan(flow->dl_type) &&
2863 !(flow->vlans[0].tci & htons(VLAN_CFI))) {
2864 if (ctx->xin->packet != NULL) {
2865 xlate_report_error(ctx, "dropping packet with partial "
2866 "VLAN tag received on port %s",
2867 in_xbundle->name);
2868 }
2869 xlate_report(ctx, OFT_WARN, "partial VLAN tag, dropping");
2870 return;
2871 }
2872
2873 /* Drop frames on bundles reserved for mirroring. */
2874 if (xbundle_mirror_out(ctx->xbridge, in_xbundle)) {
2875 if (ctx->xin->packet != NULL) {
2876 xlate_report_error(ctx, "dropping packet received on port %s, "
2877 "which is reserved exclusively for mirroring",
2878 in_xbundle->name);
2879 }
2880 xlate_report(ctx, OFT_WARN,
2881 "input port is mirror output port, dropping");
2882 return;
2883 }
2884
2885 /* Check VLAN. */
2886 xvlan_extract(flow, &in_xvlan);
2887 if (!input_vid_is_valid(ctx, in_xvlan.v[0].vid, in_xbundle)) {
2888 xlate_report(ctx, OFT_WARN,
2889 "disallowed VLAN VID for this input port, dropping");
2890 return;
2891 }
2892 xvlan_input_translate(in_xbundle, &in_xvlan, &xvlan);
2893 vlan = xvlan.v[0].vid;
2894
2895 /* Check other admissibility requirements. */
2896 if (in_port && !is_admissible(ctx, in_port, vlan)) {
2897 return;
2898 }
2899
2900 /* Learn source MAC. */
2901 bool is_grat_arp = is_gratuitous_arp(flow, wc);
2902 if (ctx->xin->allow_side_effects
2903 && flow->packet_type == htonl(PT_ETH)
2904 && in_port->pt_mode != NETDEV_PT_LEGACY_L3
2905 ) {
2906 update_learning_table(ctx, in_xbundle, flow->dl_src, vlan,
2907 is_grat_arp);
2908 }
2909 if (ctx->xin->xcache && in_xbundle != &ofpp_none_bundle) {
2910 struct xc_entry *entry;
2911
2912 /* Save just enough info to update mac learning table later. */
2913 entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NORMAL);
2914 entry->normal.ofproto = ctx->xbridge->ofproto;
2915 entry->normal.in_port = flow->in_port.ofp_port;
2916 entry->normal.dl_src = flow->dl_src;
2917 entry->normal.vlan = vlan;
2918 entry->normal.is_gratuitous_arp = is_grat_arp;
2919 }
2920
2921 /* Determine output bundle. */
2922 if (mcast_snooping_enabled(ctx->xbridge->ms)
2923 && !eth_addr_is_broadcast(flow->dl_dst)
2924 && eth_addr_is_multicast(flow->dl_dst)
2925 && is_ip_any(flow)) {
2926 struct mcast_snooping *ms = ctx->xbridge->ms;
2927 struct mcast_group *grp = NULL;
2928
2929 if (is_igmp(flow, wc)) {
2930 /*
2931 * IGMP packets need to take the slow path, in order to be
2932 * processed for mdb updates. That will prevent expires
2933 * firing off even after hosts have sent reports.
2934 */
2935 ctx->xout->slow |= SLOW_ACTION;
2936
2937 memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
2938 if (mcast_snooping_is_membership(flow->tp_src) ||
2939 mcast_snooping_is_query(flow->tp_src)) {
2940 if (ctx->xin->allow_side_effects && ctx->xin->packet) {
2941 update_mcast_snooping_table(ctx, flow, vlan,
2942 in_xbundle, ctx->xin->packet);
2943 }
2944 }
2945
2946 if (mcast_snooping_is_membership(flow->tp_src)) {
2947 ovs_rwlock_rdlock(&ms->rwlock);
2948 xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, &xvlan);
2949 /* RFC4541: section 2.1.1, item 1: A snooping switch should
2950 * forward IGMP Membership Reports only to those ports where
2951 * multicast routers are attached. Alternatively stated: a
2952 * snooping switch should not forward IGMP Membership Reports
2953 * to ports on which only hosts are attached.
2954 * An administrative control may be provided to override this
2955 * restriction, allowing the report messages to be flooded to
2956 * other ports. */
2957 xlate_normal_mcast_send_rports(ctx, ms, in_xbundle, &xvlan);
2958 ovs_rwlock_unlock(&ms->rwlock);
2959 } else {
2960 xlate_report(ctx, OFT_DETAIL, "multicast traffic, flooding");
2961 xlate_normal_flood(ctx, in_xbundle, &xvlan);
2962 }
2963 return;
2964 } else if (is_mld(flow, wc)) {
2965 ctx->xout->slow |= SLOW_ACTION;
2966 if (ctx->xin->allow_side_effects && ctx->xin->packet) {
2967 update_mcast_snooping_table(ctx, flow, vlan,
2968 in_xbundle, ctx->xin->packet);
2969 }
2970 if (is_mld_report(flow, wc)) {
2971 ovs_rwlock_rdlock(&ms->rwlock);
2972 xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, &xvlan);
2973 xlate_normal_mcast_send_rports(ctx, ms, in_xbundle, &xvlan);
2974 ovs_rwlock_unlock(&ms->rwlock);
2975 } else {
2976 xlate_report(ctx, OFT_DETAIL, "MLD query, flooding");
2977 xlate_normal_flood(ctx, in_xbundle, &xvlan);
2978 }
2979 } else {
2980 if (is_ip_local_multicast(flow, wc)) {
2981 /* RFC4541: section 2.1.2, item 2: Packets with a dst IP
2982 * address in the 224.0.0.x range which are not IGMP must
2983 * be forwarded on all ports */
2984 xlate_report(ctx, OFT_DETAIL,
2985 "RFC4541: section 2.1.2, item 2, flooding");
2986 xlate_normal_flood(ctx, in_xbundle, &xvlan);
2987 return;
2988 }
2989 }
2990
2991 /* forwarding to group base ports */
2992 ovs_rwlock_rdlock(&ms->rwlock);
2993 if (flow->dl_type == htons(ETH_TYPE_IP)) {
2994 grp = mcast_snooping_lookup4(ms, flow->nw_dst, vlan);
2995 } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2996 grp = mcast_snooping_lookup(ms, &flow->ipv6_dst, vlan);
2997 }
2998 if (grp) {
2999 xlate_normal_mcast_send_group(ctx, ms, grp, in_xbundle, &xvlan);
3000 xlate_normal_mcast_send_fports(ctx, ms, in_xbundle, &xvlan);
3001 xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, &xvlan);
3002 } else {
3003 if (mcast_snooping_flood_unreg(ms)) {
3004 xlate_report(ctx, OFT_DETAIL,
3005 "unregistered multicast, flooding");
3006 xlate_normal_flood(ctx, in_xbundle, &xvlan);
3007 } else {
3008 xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, &xvlan);
3009 xlate_normal_mcast_send_fports(ctx, ms, in_xbundle, &xvlan);
3010 }
3011 }
3012 ovs_rwlock_unlock(&ms->rwlock);
3013 } else {
3014 ovs_rwlock_rdlock(&ctx->xbridge->ml->rwlock);
3015 mac = mac_learning_lookup(ctx->xbridge->ml, flow->dl_dst, vlan);
3016 mac_port = mac ? mac_entry_get_port(ctx->xbridge->ml, mac) : NULL;
3017 ovs_rwlock_unlock(&ctx->xbridge->ml->rwlock);
3018
3019 if (mac_port) {
3020 struct xbundle *mac_xbundle = xbundle_lookup(ctx->xcfg, mac_port);
3021 if (mac_xbundle
3022 && mac_xbundle != in_xbundle
3023 && mac_xbundle->ofbundle != in_xbundle->ofbundle) {
3024 xlate_report(ctx, OFT_DETAIL, "forwarding to learned port");
3025 output_normal(ctx, mac_xbundle, &xvlan);
3026 } else if (!mac_xbundle) {
3027 xlate_report(ctx, OFT_WARN,
3028 "learned port is unknown, dropping");
3029 } else {
3030 xlate_report(ctx, OFT_DETAIL,
3031 "learned port is input port, dropping");
3032 }
3033 } else {
3034 xlate_report(ctx, OFT_DETAIL,
3035 "no learned MAC for destination, flooding");
3036 xlate_normal_flood(ctx, in_xbundle, &xvlan);
3037 }
3038 }
3039 }
3040
3041 /* Appends a "sample" action for sFlow or IPFIX to 'ctx->odp_actions'. The
3042 * 'probability' is the number of packets out of UINT32_MAX to sample. The
3043 * 'cookie' is passed back in the callback for each sampled packet.
3044 * 'tunnel_out_port', if not ODPP_NONE, is added as the
3045 * OVS_USERSPACE_ATTR_EGRESS_TUN_PORT attribute. If 'include_actions',
3046 * an OVS_USERSPACE_ATTR_ACTIONS attribute is added. If
3047 * 'emit_set_tunnel', sample(sampling_port=1) would translate into
3048 * datapath sample action set(tunnel(...)), sample(...) and it is used
3049 * for sampling egress tunnel information.
3050 */
3051 static size_t
3052 compose_sample_action(struct xlate_ctx *ctx,
3053 const uint32_t probability,
3054 const struct user_action_cookie *cookie,
3055 const odp_port_t tunnel_out_port,
3056 bool include_actions)
3057 {
3058 if (probability == 0) {
3059 /* No need to generate sampling or the inner action. */
3060 return 0;
3061 }
3062
3063 /* If the slow path meter is configured by the controller,
3064 * insert a meter action before the user space action. */
3065 struct ofproto *ofproto = &ctx->xin->ofproto->up;
3066 uint32_t meter_id = ofproto->slowpath_meter_id;
3067
3068 /* When meter action is not required, avoid generate sample action
3069 * for 100% sampling rate. */
3070 bool is_sample = probability < UINT32_MAX || meter_id != UINT32_MAX;
3071 size_t sample_offset, actions_offset;
3072 if (is_sample) {
3073 sample_offset = nl_msg_start_nested(ctx->odp_actions,
3074 OVS_ACTION_ATTR_SAMPLE);
3075 nl_msg_put_u32(ctx->odp_actions, OVS_SAMPLE_ATTR_PROBABILITY,
3076 probability);
3077 actions_offset = nl_msg_start_nested(ctx->odp_actions,
3078 OVS_SAMPLE_ATTR_ACTIONS);
3079 }
3080
3081 if (meter_id != UINT32_MAX) {
3082 nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_METER, meter_id);
3083 }
3084
3085 odp_port_t odp_port = ofp_port_to_odp_port(
3086 ctx->xbridge, ctx->xin->flow.in_port.ofp_port);
3087 uint32_t pid = dpif_port_get_pid(ctx->xbridge->dpif, odp_port,
3088 flow_hash_5tuple(&ctx->xin->flow, 0));
3089 size_t cookie_offset = odp_put_userspace_action(pid, cookie,
3090 sizeof *cookie,
3091 tunnel_out_port,
3092 include_actions,
3093 ctx->odp_actions);
3094
3095 if (is_sample) {
3096 nl_msg_end_nested(ctx->odp_actions, actions_offset);
3097 nl_msg_end_nested(ctx->odp_actions, sample_offset);
3098 }
3099
3100 return cookie_offset;
3101 }
3102
3103 /* If sFLow is not enabled, returns 0 without doing anything.
3104 *
3105 * If sFlow is enabled, appends a template "sample" action to the ODP actions
3106 * in 'ctx'. This action is a template because some of the information needed
3107 * to fill it out is not available until flow translation is complete. In this
3108 * case, this functions returns an offset, which is always nonzero, to pass
3109 * later to fix_sflow_action() to fill in the rest of the template. */
3110 static size_t
3111 compose_sflow_action(struct xlate_ctx *ctx)
3112 {
3113 struct dpif_sflow *sflow = ctx->xbridge->sflow;
3114 if (!sflow || ctx->xin->flow.in_port.ofp_port == OFPP_NONE) {
3115 return 0;
3116 }
3117
3118 struct user_action_cookie cookie = {
3119 .type = USER_ACTION_COOKIE_SFLOW,
3120 .ofp_in_port = ctx->xin->flow.in_port.ofp_port,
3121 .ofproto_uuid = ctx->xbridge->ofproto->uuid
3122 };
3123 return compose_sample_action(ctx, dpif_sflow_get_probability(sflow),
3124 &cookie, ODPP_NONE, true);
3125 }
3126
3127 /* If flow IPFIX is enabled, make sure IPFIX flow sample action
3128 * at egress point of tunnel port is just in front of corresponding
3129 * output action. If bridge IPFIX is enabled, this appends an IPFIX
3130 * sample action to 'ctx->odp_actions'. */
3131 static void
3132 compose_ipfix_action(struct xlate_ctx *ctx, odp_port_t output_odp_port)
3133 {
3134 struct dpif_ipfix *ipfix = ctx->xbridge->ipfix;
3135 odp_port_t tunnel_out_port = ODPP_NONE;
3136
3137 if (!ipfix || ctx->xin->flow.in_port.ofp_port == OFPP_NONE) {
3138 return;
3139 }
3140
3141 /* For input case, output_odp_port is ODPP_NONE, which is an invalid port
3142 * number. */
3143 if (output_odp_port == ODPP_NONE &&
3144 !dpif_ipfix_get_bridge_exporter_input_sampling(ipfix)) {
3145 return;
3146 }
3147
3148 /* For output case, output_odp_port is valid. */
3149 if (output_odp_port != ODPP_NONE) {
3150 if (!dpif_ipfix_get_bridge_exporter_output_sampling(ipfix)) {
3151 return;
3152 }
3153 /* If tunnel sampling is enabled, put an additional option attribute:
3154 * OVS_USERSPACE_ATTR_TUNNEL_OUT_PORT
3155 */
3156 if (dpif_ipfix_get_bridge_exporter_tunnel_sampling(ipfix) &&
3157 dpif_ipfix_is_tunnel_port(ipfix, output_odp_port) ) {
3158 tunnel_out_port = output_odp_port;
3159 }
3160 }
3161
3162 struct user_action_cookie cookie = {
3163 .type = USER_ACTION_COOKIE_IPFIX,
3164 .ofp_in_port = ctx->xin->flow.in_port.ofp_port,
3165 .ofproto_uuid = ctx->xbridge->ofproto->uuid,
3166 .ipfix.output_odp_port = output_odp_port
3167 };
3168 compose_sample_action(ctx,
3169 dpif_ipfix_get_bridge_exporter_probability(ipfix),
3170 &cookie, tunnel_out_port, false);
3171 }
3172
3173 /* Fix "sample" action according to data collected while composing ODP actions,
3174 * as described in compose_sflow_action().
3175 *
3176 * 'user_cookie_offset' must be the offset returned by
3177 * compose_sflow_action(). */
3178 static void
3179 fix_sflow_action(struct xlate_ctx *ctx, unsigned int user_cookie_offset)
3180 {
3181 const struct flow *base = &ctx->base_flow;
3182 struct user_action_cookie *cookie;
3183
3184 cookie = ofpbuf_at(ctx->odp_actions, user_cookie_offset, sizeof *cookie);
3185 ovs_assert(cookie->type == USER_ACTION_COOKIE_SFLOW);
3186
3187 cookie->sflow.vlan_tci = base->vlans[0].tci;
3188
3189 /* See http://www.sflow.org/sflow_version_5.txt (search for "Input/output
3190 * port information") for the interpretation of cookie->output. */
3191 switch (ctx->sflow_n_outputs) {
3192 case 0:
3193 /* 0x40000000 | 256 means "packet dropped for unknown reason". */
3194 cookie->sflow.output = 0x40000000 | 256;
3195 break;
3196
3197 case 1:
3198 cookie->sflow.output = dpif_sflow_odp_port_to_ifindex(
3199 ctx->xbridge->sflow, ctx->sflow_odp_port);
3200 if (cookie->sflow.output) {
3201 break;
3202 }
3203 /* Fall through. */
3204 default:
3205 /* 0x80000000 means "multiple output ports. */
3206 cookie->sflow.output = 0x80000000 | ctx->sflow_n_outputs;
3207 break;
3208 }
3209 }
3210
3211 static bool
3212 process_special(struct xlate_ctx *ctx, const struct xport *xport)
3213 {
3214 const struct flow *flow = &ctx->xin->flow;
3215 struct flow_wildcards *wc = ctx->wc;
3216 const struct xbridge *xbridge = ctx->xbridge;
3217 const struct dp_packet *packet = ctx->xin->packet;
3218 enum slow_path_reason slow;
3219
3220 if (!xport) {
3221 slow = 0;
3222 } else if (xport->cfm && cfm_should_process_flow(xport->cfm, flow, wc)) {
3223 if (packet) {
3224 cfm_process_heartbeat(xport->cfm, packet);
3225 }
3226 slow = SLOW_CFM;
3227 } else if (xport->bfd && bfd_should_process_flow(xport->bfd, flow, wc)) {
3228 if (packet) {
3229 bfd_process_packet(xport->bfd, flow, packet);
3230 /* If POLL received, immediately sends FINAL back. */
3231 if (bfd_should_send_packet(xport->bfd)) {
3232 ofproto_dpif_monitor_port_send_soon(xport->ofport);
3233 }
3234 }
3235 slow = SLOW_BFD;
3236 } else if (xport->xbundle && xport->xbundle->lacp
3237 && flow->dl_type == htons(ETH_TYPE_LACP)) {
3238 if (packet) {
3239 lacp_process_packet(xport->xbundle->lacp, xport->ofport, packet);
3240 }
3241 slow = SLOW_LACP;
3242 } else if ((xbridge->stp || xbridge->rstp) &&
3243 stp_should_process_flow(flow, wc)) {
3244 if (packet) {
3245 xbridge->stp
3246 ? stp_process_packet(xport, packet)
3247 : rstp_process_packet(xport, packet);
3248 }
3249 slow = SLOW_STP;
3250 } else if (xport->lldp && lldp_should_process_flow(xport->lldp, flow)) {
3251 if (packet) {
3252 lldp_process_packet(xport->lldp, packet);
3253 }
3254 slow = SLOW_LLDP;
3255 } else {
3256 slow = 0;
3257 }
3258
3259 if (slow) {
3260 ctx->xout->slow |= slow;
3261 return true;
3262 } else {
3263 return false;
3264 }
3265 }
3266
3267 static int
3268 tnl_route_lookup_flow(const struct xlate_ctx *ctx,
3269 const struct flow *oflow,
3270 struct in6_addr *ip, struct in6_addr *src,
3271 struct xport **out_port)
3272 {
3273 char out_dev[IFNAMSIZ];
3274 struct xbridge *xbridge;
3275 struct in6_addr gw;
3276 struct in6_addr dst;
3277
3278 dst = flow_tnl_dst(&oflow->tunnel);
3279 if (!ovs_router_lookup(oflow->pkt_mark, &dst, out_dev, src, &gw)) {
3280 return -ENOENT;
3281 }
3282
3283 if (ipv6_addr_is_set(&gw) &&
3284 (!IN6_IS_ADDR_V4MAPPED(&gw) || in6_addr_get_mapped_ipv4(&gw))) {
3285 *ip = gw;
3286 } else {
3287 *ip = dst;
3288 }
3289
3290 HMAP_FOR_EACH (xbridge, hmap_node, &ctx->xcfg->xbridges) {
3291 if (!strncmp(xbridge->name, out_dev, IFNAMSIZ)) {
3292 struct xport *port;
3293
3294 HMAP_FOR_EACH (port, ofp_node, &xbridge->xports) {
3295 if (!strncmp(netdev_get_name(port->netdev), out_dev, IFNAMSIZ)) {
3296 *out_port = port;
3297 return 0;
3298 }
3299 }
3300 }
3301 }
3302 return -ENOENT;
3303 }
3304
3305 static int
3306 compose_table_xlate(struct xlate_ctx *ctx, const struct xport *out_dev,
3307 struct dp_packet *packet)
3308 {
3309 struct xbridge *xbridge = out_dev->xbridge;
3310 struct ofpact_output output;
3311 struct flow flow;
3312
3313 ofpact_init(&output.ofpact, OFPACT_OUTPUT, sizeof output);
3314 flow_extract(packet, &flow);
3315 flow.in_port.ofp_port = out_dev->ofp_port;
3316 output.port = OFPP_TABLE;
3317 output.max_len = 0;
3318
3319 return ofproto_dpif_execute_actions__(xbridge->ofproto,
3320 ctx->xin->tables_version, &flow,
3321 NULL, &output.ofpact, sizeof output,
3322 ctx->depth, ctx->resubmits, packet);
3323 }
3324
3325 static void
3326 tnl_send_nd_request(struct xlate_ctx *ctx, const struct xport *out_dev,
3327 const struct eth_addr eth_src,
3328 struct in6_addr * ipv6_src, struct in6_addr * ipv6_dst)
3329 {
3330 struct dp_packet packet;
3331
3332 dp_packet_init(&packet, 0);
3333 compose_nd_ns(&packet, eth_src, ipv6_src, ipv6_dst);
3334 compose_table_xlate(ctx, out_dev, &packet);
3335 dp_packet_uninit(&packet);
3336 }
3337
3338 static void
3339 tnl_send_arp_request(struct xlate_ctx *ctx, const struct xport *out_dev,
3340 const struct eth_addr eth_src,
3341 ovs_be32 ip_src, ovs_be32 ip_dst)
3342 {
3343 struct dp_packet packet;
3344
3345 dp_packet_init(&packet, 0);
3346 compose_arp(&packet, ARP_OP_REQUEST,
3347 eth_src, eth_addr_zero, true, ip_src, ip_dst);
3348
3349 compose_table_xlate(ctx, out_dev, &packet);
3350 dp_packet_uninit(&packet);
3351 }
3352
3353 static void
3354 propagate_tunnel_data_to_flow__(struct flow *dst_flow,
3355 const struct flow *src_flow,
3356 struct eth_addr dmac, struct eth_addr smac,
3357 struct in6_addr s_ip6, ovs_be32 s_ip,
3358 bool is_tnl_ipv6, uint8_t nw_proto)
3359 {
3360 dst_flow->dl_dst = dmac;
3361 dst_flow->dl_src = smac;
3362
3363 dst_flow->packet_type = htonl(PT_ETH);
3364 dst_flow->nw_dst = src_flow->tunnel.ip_dst;
3365 dst_flow->nw_src = src_flow->tunnel.ip_src;
3366 dst_flow->ipv6_dst = src_flow->tunnel.ipv6_dst;
3367 dst_flow->ipv6_src = src_flow->tunnel.ipv6_src;
3368
3369 dst_flow->nw_frag = 0; /* Tunnel packets are unfragmented. */
3370 dst_flow->nw_tos = src_flow->tunnel.ip_tos;
3371 dst_flow->nw_ttl = src_flow->tunnel.ip_ttl;
3372 dst_flow->tp_dst = src_flow->tunnel.tp_dst;
3373 dst_flow->tp_src = src_flow->tunnel.tp_src;
3374
3375 if (is_tnl_ipv6) {
3376 dst_flow->dl_type = htons(ETH_TYPE_IPV6);
3377 if (ipv6_mask_is_any(&dst_flow->ipv6_src)
3378 && !ipv6_mask_is_any(&s_ip6)) {
3379 dst_flow->ipv6_src = s_ip6;
3380 }
3381 } else {
3382 dst_flow->dl_type = htons(ETH_TYPE_IP);
3383 if (dst_flow->nw_src == 0 && s_ip) {
3384 dst_flow->nw_src = s_ip;
3385 }
3386 }
3387 dst_flow->nw_proto = nw_proto;
3388 }
3389
3390 /*
3391 * Populate the 'flow' and 'base_flow' L3 fields to do the post tunnel push
3392 * translations.
3393 */
3394 static void
3395 propagate_tunnel_data_to_flow(struct xlate_ctx *ctx, struct eth_addr dmac,
3396 struct eth_addr smac, struct in6_addr s_ip6,
3397 ovs_be32 s_ip, bool is_tnl_ipv6,
3398 enum ovs_vport_type tnl_type)
3399 {
3400 struct flow *base_flow, *flow;
3401 flow = &ctx->xin->flow;
3402 base_flow = &ctx->base_flow;
3403 uint8_t nw_proto = 0;
3404
3405 switch (tnl_type) {
3406 case OVS_VPORT_TYPE_GRE:
3407 case OVS_VPORT_TYPE_ERSPAN:
3408 case OVS_VPORT_TYPE_IP6ERSPAN:
3409 case OVS_VPORT_TYPE_IP6GRE:
3410 nw_proto = IPPROTO_GRE;
3411 break;
3412 case OVS_VPORT_TYPE_VXLAN:
3413 case OVS_VPORT_TYPE_GENEVE:
3414 nw_proto = IPPROTO_UDP;
3415 break;
3416 case OVS_VPORT_TYPE_LISP:
3417 case OVS_VPORT_TYPE_STT:
3418 case OVS_VPORT_TYPE_UNSPEC:
3419 case OVS_VPORT_TYPE_NETDEV:
3420 case OVS_VPORT_TYPE_INTERNAL:
3421 case __OVS_VPORT_TYPE_MAX:
3422 default:
3423 OVS_NOT_REACHED();
3424 }
3425 /*
3426 * Update base_flow first followed by flow as the dst_flow gets modified
3427 * in the function.
3428 */
3429 propagate_tunnel_data_to_flow__(base_flow, flow, dmac, smac, s_ip6, s_ip,
3430 is_tnl_ipv6, nw_proto);
3431 propagate_tunnel_data_to_flow__(flow, flow, dmac, smac, s_ip6, s_ip,
3432 is_tnl_ipv6, nw_proto);
3433 }
3434
3435 static int
3436 native_tunnel_output(struct xlate_ctx *ctx, const struct xport *xport,
3437 const struct flow *flow, odp_port_t tunnel_odp_port,
3438 bool truncate)
3439 {
3440 struct netdev_tnl_build_header_params tnl_params;
3441 struct ovs_action_push_tnl tnl_push_data;
3442 struct xport *out_dev = NULL;
3443 ovs_be32 s_ip = 0, d_ip = 0;
3444 struct in6_addr s_ip6 = in6addr_any;
3445 struct in6_addr d_ip6 = in6addr_any;
3446 struct eth_addr smac;
3447 struct eth_addr dmac;
3448 int err;
3449 char buf_sip6[INET6_ADDRSTRLEN];
3450 char buf_dip6[INET6_ADDRSTRLEN];
3451
3452 /* Store sFlow data. */
3453 uint32_t sflow_n_outputs = ctx->sflow_n_outputs;
3454
3455 /* Structures to backup Ethernet and IP of base_flow. */
3456 struct flow old_base_flow;
3457 struct flow old_flow;
3458
3459 /* Backup flow & base_flow data. */
3460 memcpy(&old_base_flow, &ctx->base_flow, sizeof old_base_flow);
3461 memcpy(&old_flow, &ctx->xin->flow, sizeof old_flow);
3462
3463 if (flow->tunnel.ip_src) {
3464 in6_addr_set_mapped_ipv4(&s_ip6, flow->tunnel.ip_src);
3465 }
3466
3467 err = tnl_route_lookup_flow(ctx, flow, &d_ip6, &s_ip6, &out_dev);
3468 if (err) {
3469 xlate_report(ctx, OFT_WARN, "native tunnel routing failed");
3470 return err;
3471 }
3472
3473 xlate_report(ctx, OFT_DETAIL, "tunneling to %s via %s",
3474 ipv6_string_mapped(buf_dip6, &d_ip6),
3475 netdev_get_name(out_dev->netdev));
3476
3477 /* Use mac addr of bridge port of the peer. */
3478 err = netdev_get_etheraddr(out_dev->netdev, &smac);
3479 if (err) {
3480 xlate_report(ctx, OFT_WARN,
3481 "tunnel output device lacks Ethernet address");
3482 return err;
3483 }
3484
3485 d_ip = in6_addr_get_mapped_ipv4(&d_ip6);
3486 if (d_ip) {
3487 s_ip = in6_addr_get_mapped_ipv4(&s_ip6);
3488 }
3489
3490 err = tnl_neigh_lookup(out_dev->xbridge->name, &d_ip6, &dmac);
3491 if (err) {
3492 xlate_report(ctx, OFT_DETAIL,
3493 "neighbor cache miss for %s on bridge %s, "
3494 "sending %s request",
3495 buf_dip6, out_dev->xbridge->name, d_ip ? "ARP" : "ND");
3496 if (d_ip) {
3497 tnl_send_arp_request(ctx, out_dev, smac, s_ip, d_ip);
3498 } else {
3499 tnl_send_nd_request(ctx, out_dev, smac, &s_ip6, &d_ip6);
3500 }
3501 return err;
3502 }
3503
3504 if (ctx->xin->xcache) {
3505 struct xc_entry *entry;
3506
3507 entry = xlate_cache_add_entry(ctx->xin->xcache, XC_TNL_NEIGH);
3508 ovs_strlcpy(entry->tnl_neigh_cache.br_name, out_dev->xbridge->name,
3509 sizeof entry->tnl_neigh_cache.br_name);
3510 entry->tnl_neigh_cache.d_ipv6 = d_ip6;
3511 }
3512
3513 xlate_report(ctx, OFT_DETAIL, "tunneling from "ETH_ADDR_FMT" %s"
3514 " to "ETH_ADDR_FMT" %s",
3515 ETH_ADDR_ARGS(smac), ipv6_string_mapped(buf_sip6, &s_ip6),
3516 ETH_ADDR_ARGS(dmac), buf_dip6);
3517
3518 netdev_init_tnl_build_header_params(&tnl_params, flow, &s_ip6, dmac, smac);
3519 err = tnl_port_build_header(xport->ofport, &tnl_push_data, &tnl_params);
3520 if (err) {
3521 return err;
3522 }
3523 tnl_push_data.tnl_port = tunnel_odp_port;
3524 tnl_push_data.out_port = out_dev->odp_port;
3525
3526 /* After tunnel header has been added, MAC and IP data of flow and
3527 * base_flow need to be set properly, since there is not recirculation
3528 * any more when sending packet to tunnel. */
3529
3530 propagate_tunnel_data_to_flow(ctx, dmac, smac, s_ip6,
3531 s_ip, tnl_params.is_ipv6,
3532 tnl_push_data.tnl_type);
3533
3534 size_t clone_ofs = 0;
3535 size_t push_action_size;
3536
3537 clone_ofs = nl_msg_start_nested(ctx->odp_actions, OVS_ACTION_ATTR_CLONE);
3538 odp_put_tnl_push_action(ctx->odp_actions, &tnl_push_data);
3539 push_action_size = ctx->odp_actions->size;
3540
3541 if (!truncate) {
3542 const struct dpif_flow_stats *backup_resubmit_stats;
3543 struct xlate_cache *backup_xcache;
3544 struct flow_wildcards *backup_wc, wc;
3545 bool backup_side_effects;
3546 const struct dp_packet *backup_packet;
3547
3548 memset(&wc, 0 , sizeof wc);
3549 backup_wc = ctx->wc;
3550 ctx->wc = &wc;
3551 ctx->xin->wc = NULL;
3552 backup_resubmit_stats = ctx->xin->resubmit_stats;
3553 backup_xcache = ctx->xin->xcache;
3554 backup_side_effects = ctx->xin->allow_side_effects;
3555 backup_packet = ctx->xin->packet;
3556
3557 ctx->xin->resubmit_stats = NULL;
3558 ctx->xin->xcache = xlate_cache_new(); /* Use new temporary cache. */
3559 ctx->xin->allow_side_effects = false;
3560 ctx->xin->packet = NULL;
3561
3562 /* Push the cache entry for the tunnel first. */
3563 struct xc_entry *entry;
3564 entry = xlate_cache_add_entry(ctx->xin->xcache, XC_TUNNEL_HEADER);
3565 entry->tunnel_hdr.hdr_size = tnl_push_data.header_len;
3566 entry->tunnel_hdr.operation = ADD;
3567
3568 patch_port_output(ctx, xport, out_dev);
3569
3570 /* Similar to the stats update in revalidation, the x_cache entries
3571 * are populated by the previous translation are used to update the
3572 * stats correctly.
3573 */
3574 if (backup_resubmit_stats) {
3575 struct dpif_flow_stats stats = *backup_resubmit_stats;
3576 xlate_push_stats(ctx->xin->xcache, &stats);
3577 }
3578 xlate_cache_steal_entries(backup_xcache, ctx->xin->xcache);
3579
3580 if (ctx->odp_actions->size > push_action_size) {
3581 nl_msg_end_non_empty_nested(ctx->odp_actions, clone_ofs);
3582 } else {
3583 nl_msg_cancel_nested(ctx->odp_actions, clone_ofs);
3584 /* XXX : There is no real use-case for a tunnel push without
3585 * any post actions. However keeping it now
3586 * as is to make the 'make check' happy. Should remove when all the
3587 * make check tunnel test case does something meaningful on a
3588 * tunnel encap packets.
3589 */
3590 odp_put_tnl_push_action(ctx->odp_actions, &tnl_push_data);
3591 }
3592
3593 /* Restore context status. */
3594 ctx->xin->resubmit_stats = backup_resubmit_stats;
3595 xlate_cache_delete(ctx->xin->xcache);
3596 ctx->xin->xcache = backup_xcache;
3597 ctx->xin->allow_side_effects = backup_side_effects;
3598 ctx->xin->packet = backup_packet;
3599 ctx->wc = backup_wc;
3600 } else {
3601 /* In order to maintain accurate stats, use recirc for
3602 * natvie tunneling. */
3603 nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC, 0);
3604 nl_msg_end_nested(ctx->odp_actions, clone_ofs);
3605 }
3606
3607 /* Restore the flows after the translation. */
3608 memcpy(&ctx->xin->flow, &old_flow, sizeof ctx->xin->flow);
3609 memcpy(&ctx->base_flow, &old_base_flow, sizeof ctx->base_flow);
3610
3611 /* Restore sFlow data. */
3612 ctx->sflow_n_outputs = sflow_n_outputs;
3613
3614 return 0;
3615 }
3616
3617 static void
3618 xlate_commit_actions(struct xlate_ctx *ctx)
3619 {
3620 bool use_masked = ctx->xbridge->support.masked_set_action;
3621
3622 ctx->xout->slow |= commit_odp_actions(&ctx->xin->flow, &ctx->base_flow,
3623 ctx->odp_actions, ctx->wc,
3624 use_masked, ctx->pending_encap,
3625 ctx->pending_decap, ctx->encap_data);
3626 ctx->pending_encap = false;
3627 ctx->pending_decap = false;
3628 ofpbuf_delete(ctx->encap_data);
3629 ctx->encap_data = NULL;
3630 }
3631
3632 static void
3633 clear_conntrack(struct xlate_ctx *ctx)
3634 {
3635 ctx->conntracked = false;
3636 flow_clear_conntrack(&ctx->xin->flow);
3637 }
3638
3639 static bool
3640 xlate_flow_is_protected(const struct xlate_ctx *ctx, const struct flow *flow, const struct xport *xport_out)
3641 {
3642 const struct xport *xport_in;
3643
3644 if (!xport_out) {
3645 return false;
3646 }
3647
3648 xport_in = get_ofp_port(ctx->xbridge, flow->in_port.ofp_port);
3649
3650 return (xport_in && xport_in->xbundle && xport_out->xbundle &&
3651 xport_in->xbundle->protected && xport_out->xbundle->protected);
3652 }
3653
3654 /* Function handles when a packet is sent from one bridge to another bridge.
3655 *
3656 * The bridges are internally connected, either with patch ports or with
3657 * tunnel ports.
3658 *
3659 * The output action to another bridge causes translation to continue within
3660 * the next bridge. This process can be recursive; the next bridge can
3661 * output yet to another bridge.
3662 *
3663 * The translated actions from the second bridge onwards are enclosed within
3664 * the clone action, so that any modification to the packet will not be visible
3665 * to the remaining actions of the originating bridge.
3666 */
3667 static void
3668 patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev,
3669 struct xport *out_dev)
3670 {
3671 struct flow *flow = &ctx->xin->flow;
3672 struct flow old_flow = ctx->xin->flow;
3673 struct flow_tnl old_flow_tnl_wc = ctx->wc->masks.tunnel;
3674 bool old_conntrack = ctx->conntracked;
3675 bool old_was_mpls = ctx->was_mpls;
3676 ovs_version_t old_version = ctx->xin->tables_version;
3677 struct ofpbuf old_stack = ctx->stack;
3678 uint8_t new_stack[1024];
3679 struct ofpbuf old_action_set = ctx->action_set;
3680 struct ovs_list *old_trace = ctx->xin->trace;
3681 uint64_t actset_stub[1024 / 8];
3682
3683 ofpbuf_use_stub(&ctx->stack, new_stack, sizeof new_stack);
3684 ofpbuf_use_stub(&ctx->action_set, actset_stub, sizeof actset_stub);
3685 flow->in_port.ofp_port = out_dev->ofp_port;
3686 flow->metadata = htonll(0);
3687 memset(&flow->tunnel, 0, sizeof flow->tunnel);
3688 memset(&ctx->wc->masks.tunnel, 0, sizeof ctx->wc->masks.tunnel);
3689 flow->tunnel.metadata.tab =
3690 ofproto_get_tun_tab(&out_dev->xbridge->ofproto->up);
3691 ctx->wc->masks.tunnel.metadata.tab = flow->tunnel.metadata.tab;
3692 memset(flow->regs, 0, sizeof flow->regs);
3693 flow->actset_output = OFPP_UNSET;
3694 clear_conntrack(ctx);
3695 ctx->xin->trace = xlate_report(ctx, OFT_BRIDGE, "bridge(\"%s\")",
3696 out_dev->xbridge->name);
3697 mirror_mask_t old_mirrors = ctx->mirrors;
3698 bool independent_mirrors = out_dev->xbridge != ctx->xbridge;
3699 if (independent_mirrors) {
3700 ctx->mirrors = 0;
3701 }
3702 ctx->xbridge = out_dev->xbridge;
3703
3704 /* The bridge is now known so obtain its table version. */
3705 ctx->xin->tables_version
3706 = ofproto_dpif_get_tables_version(ctx->xbridge->ofproto);
3707
3708 if (!process_special(ctx, out_dev) && may_receive(out_dev, ctx)) {
3709 if (xport_stp_forward_state(out_dev) &&
3710 xport_rstp_forward_state(out_dev)) {
3711 xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true,
3712 false, true, clone_xlate_actions);
3713 if (!ctx->freezing) {
3714 xlate_action_set(ctx);
3715 }
3716 if (ctx->freezing) {
3717 finish_freezing(ctx);
3718 }
3719 } else {
3720 /* Forwarding is disabled by STP and RSTP. Let OFPP_NORMAL and
3721 * the learning action look at the packet, then drop it. */
3722 struct flow old_base_flow = ctx->base_flow;
3723 size_t old_size = ctx->odp_actions->size;
3724 mirror_mask_t old_mirrors2 = ctx->mirrors;
3725
3726 xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true,
3727 false, true, clone_xlate_actions);
3728 ctx->mirrors = old_mirrors2;
3729 ctx->base_flow = old_base_flow;
3730 ctx->odp_actions->size = old_size;
3731
3732 /* Undo changes that may have been done for freezing. */
3733 ctx_cancel_freeze(ctx);
3734 }
3735 }
3736
3737 ctx->xin->trace = old_trace;
3738 if (independent_mirrors) {
3739 ctx->mirrors = old_mirrors;
3740 }
3741 ctx->xin->flow = old_flow;
3742 ctx->xbridge = in_dev->xbridge;
3743 ofpbuf_uninit(&ctx->action_set);
3744 ctx->action_set = old_action_set;
3745 ofpbuf_uninit(&ctx->stack);
3746 ctx->stack = old_stack;
3747
3748 /* Restore calling bridge's lookup version. */
3749 ctx->xin->tables_version = old_version;
3750
3751 /* Restore to calling bridge tunneling information */
3752 ctx->wc->masks.tunnel = old_flow_tnl_wc;
3753
3754 /* The out bridge popping MPLS should have no effect on the original
3755 * bridge. */
3756 ctx->was_mpls = old_was_mpls;
3757
3758 /* The out bridge's conntrack execution should have no effect on the
3759 * original bridge. */
3760 ctx->conntracked = old_conntrack;
3761
3762 /* The fact that the out bridge exits (for any reason) does not mean
3763 * that the original bridge should exit. Specifically, if the out
3764 * bridge freezes translation, the original bridge must continue
3765 * processing with the original, not the frozen packet! */
3766 ctx->exit = false;
3767
3768 /* Out bridge errors do not propagate back. */
3769 ctx->error = XLATE_OK;
3770
3771 if (ctx->xin->resubmit_stats) {
3772 netdev_vport_inc_tx(in_dev->netdev, ctx->xin->resubmit_stats);
3773 netdev_vport_inc_rx(out_dev->netdev, ctx->xin->resubmit_stats);
3774 if (out_dev->bfd) {
3775 bfd_account_rx(out_dev->bfd, ctx->xin->resubmit_stats);
3776 }
3777 }
3778 if (ctx->xin->xcache) {
3779 struct xc_entry *entry;
3780
3781 entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NETDEV);
3782 entry->dev.tx = netdev_ref(in_dev->netdev);
3783 entry->dev.rx = netdev_ref(out_dev->netdev);
3784 entry->dev.bfd = bfd_ref(out_dev->bfd);
3785 }
3786 }
3787
3788 static bool
3789 check_output_prerequisites(struct xlate_ctx *ctx,
3790 const struct xport *xport,
3791 struct flow *flow,
3792 bool check_stp)
3793 {
3794 struct flow_wildcards *wc = ctx->wc;
3795
3796 if (!xport) {
3797 xlate_report(ctx, OFT_WARN, "Nonexistent output port");
3798 return false;
3799 } else if (xport->config & OFPUTIL_PC_NO_FWD) {
3800 xlate_report(ctx, OFT_DETAIL, "OFPPC_NO_FWD set, skipping output");
3801 return false;
3802 } else if (ctx->mirror_snaplen != 0 && xport->odp_port == ODPP_NONE) {
3803 xlate_report(ctx, OFT_WARN,
3804 "Mirror truncate to ODPP_NONE, skipping output");
3805 return false;
3806 } else if (xlate_flow_is_protected(ctx, flow, xport)) {
3807 xlate_report(ctx, OFT_WARN,
3808 "Flow is between protected ports, skipping output.");
3809 return false;
3810 } else if (check_stp) {
3811 if (is_stp(&ctx->base_flow)) {
3812 if (!xport_stp_should_forward_bpdu(xport) &&
3813 !xport_rstp_should_manage_bpdu(xport)) {
3814 if (ctx->xbridge->stp != NULL) {
3815 xlate_report(ctx, OFT_WARN,
3816 "STP not in listening state, "
3817 "skipping bpdu output");
3818 } else if (ctx->xbridge->rstp != NULL) {
3819 xlate_report(ctx, OFT_WARN,
3820 "RSTP not managing BPDU in this state, "
3821 "skipping bpdu output");
3822 }
3823 return false;
3824 }
3825 } else if ((xport->cfm && cfm_should_process_flow(xport->cfm, flow, wc))
3826 || (xport->bfd && bfd_should_process_flow(xport->bfd, flow,
3827 wc))) {
3828 /* Pass; STP should not block link health detection. */
3829 } else if (!xport_stp_forward_state(xport) ||
3830 !xport_rstp_forward_state(xport)) {
3831 if (ctx->xbridge->stp != NULL) {
3832 xlate_report(ctx, OFT_WARN,
3833 "STP not in forwarding state, skipping output");
3834 } else if (ctx->xbridge->rstp != NULL) {
3835 xlate_report(ctx, OFT_WARN,
3836 "RSTP not in forwarding state, skipping output");
3837 }
3838 return false;
3839 }
3840 }
3841
3842 if (xport->pt_mode == NETDEV_PT_LEGACY_L2 &&
3843 flow->packet_type != htonl(PT_ETH)) {
3844 xlate_report(ctx, OFT_WARN, "Trying to send non-Ethernet packet "
3845 "through legacy L2 port. Dropping packet.");
3846 return false;
3847 }
3848
3849 return true;
3850 }
3851
3852 /* Function verifies if destination address of received Neighbor Advertisement
3853 * message stored in 'flow' is correct. It should be either FF02::1:FFXX:XXXX
3854 * where XX:XXXX stands for the last 24 bits of 'ipv6_addr' or it should match
3855 * 'ipv6_addr'. */
3856 static bool
3857 is_nd_dst_correct(const struct flow *flow, const struct in6_addr *ipv6_addr)
3858 {
3859 const uint8_t *flow_ipv6_addr = (uint8_t *) &flow->ipv6_dst;
3860 const uint8_t *addr = (uint8_t *) ipv6_addr;
3861
3862 return (IN6_IS_ADDR_MC_LINKLOCAL(&flow->ipv6_dst) &&
3863 flow_ipv6_addr[11] == 0x01 &&
3864 flow_ipv6_addr[12] == 0xff &&
3865 flow_ipv6_addr[13] == addr[13] &&
3866 flow_ipv6_addr[14] == addr[14] &&
3867 flow_ipv6_addr[15] == addr[15]) ||
3868 IN6_ARE_ADDR_EQUAL(&flow->ipv6_dst, ipv6_addr);
3869 }
3870
3871 /* Function verifies if the ARP reply or Neighbor Advertisement represented by
3872 * 'flow' addresses the 'xbridge' of 'ctx'. Returns true if the ARP TA or
3873 * neighbor discovery destination is in the list of configured IP addresses of
3874 * the bridge. Otherwise, it returns false. */
3875 static bool
3876 is_neighbor_reply_correct(const struct xlate_ctx *ctx, const struct flow *flow)
3877 {
3878 bool ret = false;
3879 int i;
3880 struct xbridge_addr *xbridge_addr = xbridge_addr_ref(ctx->xbridge->addr);
3881
3882 /* Verify if 'nw_dst' of ARP or 'ipv6_dst' of ICMPV6 is in the list. */
3883 for (i = 0; xbridge_addr && i < xbridge_addr->n_addr; i++) {
3884 struct in6_addr *ip_addr = &xbridge_addr->addr[i];
3885 if ((IN6_IS_ADDR_V4MAPPED(ip_addr) &&
3886 flow->dl_type == htons(ETH_TYPE_ARP) &&
3887 in6_addr_get_mapped_ipv4(ip_addr) == flow->nw_dst) ||
3888 (!IN6_IS_ADDR_V4MAPPED(ip_addr) &&
3889 is_nd_dst_correct(flow, ip_addr))) {
3890 /* Found a match. */
3891 ret = true;
3892 break;
3893 }
3894 }
3895
3896 xbridge_addr_unref(xbridge_addr);
3897 return ret;
3898 }
3899
3900 static bool
3901 terminate_native_tunnel(struct xlate_ctx *ctx, ofp_port_t ofp_port,
3902 struct flow *flow, struct flow_wildcards *wc,
3903 odp_port_t *tnl_port)
3904 {
3905 *tnl_port = ODPP_NONE;
3906
3907 /* XXX: Write better Filter for tunnel port. We can use in_port
3908 * in tunnel-port flow to avoid these checks completely. */
3909 if (ofp_port == OFPP_LOCAL &&
3910 ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
3911 *tnl_port = tnl_port_map_lookup(flow, wc);
3912
3913 /* If no tunnel port was found and it's about an ARP or ICMPv6 packet,
3914 * do tunnel neighbor snooping. */
3915 if (*tnl_port == ODPP_NONE &&
3916 (flow->dl_type == htons(ETH_TYPE_ARP) ||
3917 flow->nw_proto == IPPROTO_ICMPV6) &&
3918 is_neighbor_reply_correct(ctx, flow)) {
3919 tnl_neigh_snoop(flow, wc, ctx->xbridge->name);
3920 }
3921 }
3922
3923 return *tnl_port != ODPP_NONE;
3924 }
3925
3926 static void
3927 compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
3928 const struct xlate_bond_recirc *xr, bool check_stp,
3929 bool is_last_action OVS_UNUSED, bool truncate)
3930 {
3931 const struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port);
3932 struct flow_wildcards *wc = ctx->wc;
3933 struct flow *flow = &ctx->xin->flow;
3934 struct flow_tnl flow_tnl;
3935 union flow_vlan_hdr flow_vlans[FLOW_MAX_VLAN_HEADERS];
3936 uint8_t flow_nw_tos;
3937 odp_port_t out_port, odp_port, odp_tnl_port;
3938 bool is_native_tunnel = false;
3939 uint8_t dscp;
3940 struct eth_addr flow_dl_dst = flow->dl_dst;
3941 struct eth_addr flow_dl_src = flow->dl_src;
3942 ovs_be32 flow_packet_type = flow->packet_type;
3943 ovs_be16 flow_dl_type = flow->dl_type;
3944
3945 /* If 'struct flow' gets additional metadata, we'll need to zero it out
3946 * before traversing a patch port. */
3947 BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41);
3948 memset(&flow_tnl, 0, sizeof flow_tnl);
3949
3950 if (!check_output_prerequisites(ctx, xport, flow, check_stp)) {
3951 return;
3952 }
3953
3954 if (flow->packet_type == htonl(PT_ETH)) {
3955 /* Strip Ethernet header for legacy L3 port. */
3956 if (xport->pt_mode == NETDEV_PT_LEGACY_L3) {
3957 flow->packet_type = PACKET_TYPE_BE(OFPHTN_ETHERTYPE,
3958 ntohs(flow->dl_type));
3959 }
3960 }
3961
3962 if (xport->peer) {
3963 if (truncate) {
3964 xlate_report_error(ctx, "Cannot truncate output to patch port");
3965 }
3966 patch_port_output(ctx, xport, xport->peer);
3967 return;
3968 }
3969
3970 memcpy(flow_vlans, flow->vlans, sizeof flow_vlans);
3971 flow_nw_tos = flow->nw_tos;
3972
3973 if (count_skb_priorities(xport)) {
3974 memset(&wc->masks.skb_priority, 0xff, sizeof wc->masks.skb_priority);
3975 if (dscp_from_skb_priority(xport, flow->skb_priority, &dscp)) {
3976 wc->masks.nw_tos |= IP_DSCP_MASK;
3977 flow->nw_tos &= ~IP_DSCP_MASK;
3978 flow->nw_tos |= dscp;
3979 }
3980 }
3981
3982 if (xport->is_tunnel) {
3983 struct in6_addr dst;
3984 /* Save tunnel metadata so that changes made due to
3985 * the Logical (tunnel) Port are not visible for any further
3986 * matches, while explicit set actions on tunnel metadata are.
3987 */
3988 flow_tnl = flow->tunnel;
3989 odp_port = tnl_port_send(xport->ofport, flow, ctx->wc);
3990 if (odp_port == ODPP_NONE) {
3991 xlate_report(ctx, OFT_WARN, "Tunneling decided against output");
3992 goto out; /* restore flow_nw_tos */
3993 }
3994 dst = flow_tnl_dst(&flow->tunnel);
3995 if (ipv6_addr_equals(&dst, &ctx->orig_tunnel_ipv6_dst)) {
3996 xlate_report(ctx, OFT_WARN, "Not tunneling to our own address");
3997 goto out; /* restore flow_nw_tos */
3998 }
3999 if (ctx->xin->resubmit_stats) {
4000 netdev_vport_inc_tx(xport->netdev, ctx->xin->resubmit_stats);
4001 }
4002 if (ctx->xin->xcache) {
4003 struct xc_entry *entry;
4004
4005 entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NETDEV);
4006 entry->dev.tx = netdev_ref(xport->netdev);
4007 }
4008 out_port = odp_port;
4009 if (ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
4010 xlate_report(ctx, OFT_DETAIL, "output to native tunnel");
4011 is_native_tunnel = true;
4012 } else {
4013 const char *tnl_type;
4014
4015 xlate_report(ctx, OFT_DETAIL, "output to kernel tunnel");
4016 tnl_type = tnl_port_get_type(xport->ofport);
4017 commit_odp_tunnel_action(flow, &ctx->base_flow,
4018 ctx->odp_actions, tnl_type);
4019 flow->tunnel = flow_tnl; /* Restore tunnel metadata */
4020 }
4021 } else {
4022 odp_port = xport->odp_port;
4023 out_port = odp_port;
4024 }
4025
4026 if (out_port != ODPP_NONE) {
4027 /* Commit accumulated flow updates before output. */
4028 xlate_commit_actions(ctx);
4029
4030 if (xr) {
4031 /* Recirculate the packet. */
4032 struct ovs_action_hash *act_hash;
4033
4034 /* Hash action. */
4035 enum ovs_hash_alg hash_alg = xr->hash_alg;
4036 if (hash_alg > ctx->xbridge->support.max_hash_alg) {
4037 /* Algorithm supported by all datapaths. */
4038 hash_alg = OVS_HASH_ALG_L4;
4039 }
4040 act_hash = nl_msg_put_unspec_uninit(ctx->odp_actions,
4041 OVS_ACTION_ATTR_HASH,
4042 sizeof *act_hash);
4043 act_hash->hash_alg = hash_alg;
4044 act_hash->hash_basis = xr->hash_basis;
4045
4046 /* Recirc action. */
4047 nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC,
4048 xr->recirc_id);
4049 } else if (is_native_tunnel) {
4050 /* Output to native tunnel port. */
4051 native_tunnel_output(ctx, xport, flow, odp_port, truncate);
4052 flow->tunnel = flow_tnl; /* Restore tunnel metadata */
4053
4054 } else if (terminate_native_tunnel(ctx, ofp_port, flow, wc,
4055 &odp_tnl_port)) {
4056 /* Intercept packet to be received on native tunnel port. */
4057 nl_msg_put_odp_port(ctx->odp_actions, OVS_ACTION_ATTR_TUNNEL_POP,
4058 odp_tnl_port);
4059
4060 } else {
4061 /* Tunnel push-pop action is not compatible with
4062 * IPFIX action. */
4063 compose_ipfix_action(ctx, out_port);
4064
4065 /* Handle truncation of the mirrored packet. */
4066 if (ctx->mirror_snaplen > 0 &&
4067 ctx->mirror_snaplen < UINT16_MAX) {
4068 struct ovs_action_trunc *trunc;
4069
4070 trunc = nl_msg_put_unspec_uninit(ctx->odp_actions,
4071 OVS_ACTION_ATTR_TRUNC,
4072 sizeof *trunc);
4073 trunc->max_len = ctx->mirror_snaplen;
4074 if (!ctx->xbridge->support.trunc) {
4075 ctx->xout->slow |= SLOW_ACTION;
4076 }
4077 }
4078
4079 nl_msg_put_odp_port(ctx->odp_actions,
4080 OVS_ACTION_ATTR_OUTPUT,
4081 out_port);
4082 }
4083
4084 ctx->sflow_odp_port = odp_port;
4085 ctx->sflow_n_outputs++;
4086 ctx->nf_output_iface = ofp_port;
4087 }
4088
4089 if (mbridge_has_mirrors(ctx->xbridge->mbridge) && xport->xbundle) {
4090 mirror_packet(ctx, xport->xbundle,
4091 xbundle_mirror_dst(xport->xbundle->xbridge,
4092 xport->xbundle));
4093 }
4094
4095 out:
4096 /* Restore flow */
4097 memcpy(flow->vlans, flow_vlans, sizeof flow->vlans);
4098 flow->nw_tos = flow_nw_tos;
4099 flow->dl_dst = flow_dl_dst;
4100 flow->dl_src = flow_dl_src;
4101 flow->packet_type = flow_packet_type;
4102 flow->dl_type = flow_dl_type;
4103 }
4104
4105 static void
4106 compose_output_action(struct xlate_ctx *ctx, ofp_port_t ofp_port,
4107 const struct xlate_bond_recirc *xr,
4108 bool is_last_action, bool truncate)
4109 {
4110 compose_output_action__(ctx, ofp_port, xr, true,
4111 is_last_action, truncate);
4112 }
4113
4114 static void
4115 xlate_recursively(struct xlate_ctx *ctx, struct rule_dpif *rule,
4116 bool deepens, bool is_last_action,
4117 xlate_actions_handler *actions_xlator)
4118 {
4119 struct rule_dpif *old_rule = ctx->rule;
4120 ovs_be64 old_cookie = ctx->rule_cookie;
4121 const struct rule_actions *actions;
4122
4123 if (ctx->xin->resubmit_stats) {
4124 rule_dpif_credit_stats(rule, ctx->xin->resubmit_stats);
4125 }
4126
4127 ctx->resubmits++;
4128
4129 ctx->depth += deepens;
4130 ctx->rule = rule;
4131 ctx->rule_cookie = rule->up.flow_cookie;
4132 actions = rule_get_actions(&rule->up);
4133 actions_xlator(actions->ofpacts, actions->ofpacts_len, ctx,
4134 is_last_action, false);
4135 ctx->rule_cookie = old_cookie;
4136 ctx->rule = old_rule;
4137 ctx->depth -= deepens;
4138 }
4139
4140 static bool
4141 xlate_resubmit_resource_check(struct xlate_ctx *ctx)
4142 {
4143 if (ctx->depth >= MAX_DEPTH) {
4144 xlate_report_error(ctx, "over max translation depth %d", MAX_DEPTH);
4145 ctx->error = XLATE_RECURSION_TOO_DEEP;
4146 } else if (ctx->resubmits >= MAX_RESUBMITS) {
4147 xlate_report_error(ctx, "over %d resubmit actions", MAX_RESUBMITS);
4148 ctx->error = XLATE_TOO_MANY_RESUBMITS;
4149 } else if (ctx->odp_actions->size > UINT16_MAX) {
4150 xlate_report_error(ctx, "resubmits yielded over 64 kB of actions");
4151 /* NOT an error, as we'll be slow-pathing the flow in this case? */
4152 ctx->exit = true; /* XXX: translation still terminated! */
4153 } else if (ctx->stack.size >= 65536) {
4154 xlate_report_error(ctx, "resubmits yielded over 64 kB of stack");
4155 ctx->error = XLATE_STACK_TOO_DEEP;
4156 } else {
4157 return true;
4158 }
4159
4160 return false;
4161 }
4162
4163 static void
4164 tuple_swap_flow(struct flow *flow, bool ipv4)
4165 {
4166 uint8_t nw_proto = flow->nw_proto;
4167 flow->nw_proto = flow->ct_nw_proto;
4168 flow->ct_nw_proto = nw_proto;
4169
4170 if (ipv4) {
4171 ovs_be32 nw_src = flow->nw_src;
4172 flow->nw_src = flow->ct_nw_src;
4173 flow->ct_nw_src = nw_src;
4174
4175 ovs_be32 nw_dst = flow->nw_dst;
4176 flow->nw_dst = flow->ct_nw_dst;
4177 flow->ct_nw_dst = nw_dst;
4178 } else {
4179 struct in6_addr ipv6_src = flow->ipv6_src;
4180 flow->ipv6_src = flow->ct_ipv6_src;
4181 flow->ct_ipv6_src = ipv6_src;
4182
4183 struct in6_addr ipv6_dst = flow->ipv6_dst;
4184 flow->ipv6_dst = flow->ct_ipv6_dst;
4185 flow->ct_ipv6_dst = ipv6_dst;
4186 }
4187
4188 ovs_be16 tp_src = flow->tp_src;
4189 flow->tp_src = flow->ct_tp_src;
4190 flow->ct_tp_src = tp_src;
4191
4192 ovs_be16 tp_dst = flow->tp_dst;
4193 flow->tp_dst = flow->ct_tp_dst;
4194 flow->ct_tp_dst = tp_dst;
4195 }
4196
4197 static void
4198 tuple_swap(struct flow *flow, struct flow_wildcards *wc)
4199 {
4200 bool ipv4 = (flow->dl_type == htons(ETH_TYPE_IP));
4201
4202 tuple_swap_flow(flow, ipv4);
4203 tuple_swap_flow(&wc->masks, ipv4);
4204 }
4205
4206 static void
4207 xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t table_id,
4208 bool may_packet_in, bool honor_table_miss,
4209 bool with_ct_orig, bool is_last_action,
4210 xlate_actions_handler *xlator)
4211 {
4212 /* Check if we need to recirculate before matching in a table. */
4213 if (ctx->was_mpls) {
4214 ctx_trigger_freeze(ctx);
4215 return;
4216 }
4217 if (xlate_resubmit_resource_check(ctx)) {
4218 uint8_t old_table_id = ctx->table_id;
4219 struct rule_dpif *rule;
4220
4221 ctx->table_id = table_id;
4222
4223 /* Swap packet fields with CT 5-tuple if requested. */
4224 if (with_ct_orig) {
4225 /* Do not swap if there is no CT tuple, or if key is not IP. */
4226 if (ctx->xin->flow.ct_nw_proto == 0 ||
4227 !is_ip_any(&ctx->xin->flow)) {
4228 xlate_report_error(ctx,
4229 "resubmit(ct) with non-tracked or non-IP packet!");
4230 return;
4231 }
4232 tuple_swap(&ctx->xin->flow, ctx->wc);
4233 }
4234 rule = rule_dpif_lookup_from_table(ctx->xbridge->ofproto,
4235 ctx->xin->tables_version,
4236 &ctx->xin->flow, ctx->wc,
4237 ctx->xin->resubmit_stats,
4238 &ctx->table_id, in_port,
4239 may_packet_in, honor_table_miss,
4240 ctx->xin->xcache);
4241 /* Swap back. */
4242 if (with_ct_orig) {
4243 tuple_swap(&ctx->xin->flow, ctx->wc);
4244 }
4245
4246 if (rule) {
4247 /* Fill in the cache entry here instead of xlate_recursively
4248 * to make the reference counting more explicit. We take a
4249 * reference in the lookups above if we are going to cache the
4250 * rule. */
4251 if (ctx->xin->xcache) {
4252 struct xc_entry *entry;
4253
4254 entry = xlate_cache_add_entry(ctx->xin->xcache, XC_RULE);
4255 entry->rule = rule;
4256 ofproto_rule_ref(&rule->up);
4257 }
4258
4259 struct ovs_list *old_trace = ctx->xin->trace;
4260 xlate_report_table(ctx, rule, table_id);
4261 xlate_recursively(ctx, rule, table_id <= old_table_id,
4262 is_last_action, xlator);
4263 ctx->xin->trace = old_trace;
4264 }
4265
4266 ctx->table_id = old_table_id;
4267 return;
4268 }
4269 }
4270
4271 /* Consumes the group reference, which is only taken if xcache exists. */
4272 static void
4273 xlate_group_stats(struct xlate_ctx *ctx, struct group_dpif *group,
4274 struct ofputil_bucket *bucket)
4275 {
4276 if (ctx->xin->resubmit_stats) {
4277 group_dpif_credit_stats(group, bucket, ctx->xin->resubmit_stats);
4278 }
4279 if (ctx->xin->xcache) {
4280 struct xc_entry *entry;
4281
4282 entry = xlate_cache_add_entry(ctx->xin->xcache, XC_GROUP);
4283 entry->group.group = group;
4284 entry->group.bucket = bucket;
4285 }
4286 }
4287
4288 static void
4289 xlate_group_bucket(struct xlate_ctx *ctx, struct ofputil_bucket *bucket,
4290 bool is_last_action)
4291 {
4292 struct ovs_list *old_trace = ctx->xin->trace;
4293 if (OVS_UNLIKELY(ctx->xin->trace)) {
4294 char *s = xasprintf("bucket %"PRIu32, bucket->bucket_id);
4295 ctx->xin->trace = &oftrace_report(ctx->xin->trace, OFT_BUCKET,
4296 s)->subs;
4297 free(s);
4298 }
4299
4300 uint64_t action_list_stub[1024 / 8];
4301 struct ofpbuf action_list = OFPBUF_STUB_INITIALIZER(action_list_stub);
4302 struct ofpbuf action_set = ofpbuf_const_initializer(bucket->ofpacts,
4303 bucket->ofpacts_len);
4304 struct flow old_flow = ctx->xin->flow;
4305 bool old_was_mpls = ctx->was_mpls;
4306
4307 ofpacts_execute_action_set(&action_list, &action_set);
4308 ctx->depth++;
4309 do_xlate_actions(action_list.data, action_list.size, ctx, is_last_action,
4310 true);
4311 ctx->depth--;
4312
4313 ofpbuf_uninit(&action_list);
4314
4315 /* Check if need to freeze. */
4316 if (ctx->freezing) {
4317 finish_freezing(ctx);
4318 }
4319
4320 /* Roll back flow to previous state.
4321 * This is equivalent to cloning the packet for each bucket.
4322 *
4323 * As a side effect any subsequently applied actions will
4324 * also effectively be applied to a clone of the packet taken
4325 * just before applying the all or indirect group.
4326 *
4327 * Note that group buckets are action sets, hence they cannot modify the
4328 * main action set. Also any stack actions are ignored when executing an
4329 * action set, so group buckets cannot change the stack either.
4330 * However, we do allow resubmit actions in group buckets, which could
4331 * break the above assumptions. It is up to the controller to not mess up
4332 * with the action_set and stack in the tables resubmitted to from
4333 * group buckets. */
4334 ctx->xin->flow = old_flow;
4335
4336 /* The group bucket popping MPLS should have no effect after bucket
4337 * execution. */
4338 ctx->was_mpls = old_was_mpls;
4339
4340 /* The fact that the group bucket exits (for any reason) does not mean that
4341 * the translation after the group action should exit. Specifically, if
4342 * the group bucket freezes translation, the actions after the group action
4343 * must continue processing with the original, not the frozen packet! */
4344 ctx->exit = false;
4345
4346 /* Context error in a bucket should not impact processing of other buckets
4347 * or actions. This is similar to cloning a packet for group buckets.
4348 * There is no need to restore the error back to old value due to the fact
4349 * that we actually processed group action which can happen only when there
4350 * is no previous context error.
4351 *
4352 * Exception to above is errors which are system limits to protect
4353 * translation from running too long or occupy too much space. These errors
4354 * should not be masked. XLATE_RECURSION_TOO_DEEP, XLATE_TOO_MANY_RESUBMITS
4355 * and XLATE_STACK_TOO_DEEP fall in this category. */
4356 if (ctx->error == XLATE_TOO_MANY_MPLS_LABELS ||
4357 ctx->error == XLATE_UNSUPPORTED_PACKET_TYPE) {
4358 /* reset the error and continue processing other buckets */
4359 ctx->error = XLATE_OK;
4360 }
4361
4362 ctx->xin->trace = old_trace;
4363 }
4364
4365 static struct ofputil_bucket *
4366 pick_ff_group(struct xlate_ctx *ctx, struct group_dpif *group)
4367 {
4368 return group_first_live_bucket(ctx, group, 0);
4369 }
4370
4371 static struct ofputil_bucket *
4372 pick_default_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
4373 {
4374 flow_mask_hash_fields(&ctx->xin->flow, ctx->wc,
4375 NX_HASH_FIELDS_SYMMETRIC_L4);
4376 return group_best_live_bucket(ctx, group,
4377 flow_hash_symmetric_l4(&ctx->xin->flow, 0));
4378 }
4379
4380 static struct ofputil_bucket *
4381 pick_hash_fields_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
4382 {
4383 const struct field_array *fields = &group->up.props.fields;
4384 const uint8_t *mask_values = fields->values;
4385 uint32_t basis = hash_uint64(group->up.props.selection_method_param);
4386
4387 size_t i;
4388 BITMAP_FOR_EACH_1 (i, MFF_N_IDS, fields->used.bm) {
4389 const struct mf_field *mf = mf_from_id(i);
4390
4391 /* Skip fields for which prerequisites are not met. */
4392 if (!mf_are_prereqs_ok(mf, &ctx->xin->flow, ctx->wc)) {
4393 /* Skip the mask bytes for this field. */
4394 mask_values += mf->n_bytes;
4395 continue;
4396 }
4397
4398 union mf_value value;
4399 union mf_value mask;
4400
4401 mf_get_value(mf, &ctx->xin->flow, &value);
4402 /* Mask the value. */
4403 for (int j = 0; j < mf->n_bytes; j++) {
4404 mask.b[j] = *mask_values++;
4405 value.b[j] &= mask.b[j];
4406 }
4407 basis = hash_bytes(&value, mf->n_bytes, basis);
4408
4409 /* For tunnels, hash in whether the field is present. */
4410 if (mf_is_tun_metadata(mf)) {
4411 basis = hash_boolean(mf_is_set(mf, &ctx->xin->flow), basis);
4412 }
4413
4414 mf_mask_field_masked(mf, &mask, ctx->wc);
4415 }
4416
4417 return group_best_live_bucket(ctx, group, basis);
4418 }
4419
4420 static struct ofputil_bucket *
4421 pick_dp_hash_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
4422 {
4423 uint32_t dp_hash = ctx->xin->flow.dp_hash;
4424
4425 /* dp_hash value 0 is special since it means that the dp_hash has not been
4426 * computed, as all computed dp_hash values are non-zero. Therefore
4427 * compare to zero can be used to decide if the dp_hash value is valid
4428 * without masking the dp_hash field. */
4429 if (!dp_hash) {
4430 enum ovs_hash_alg hash_alg = group->hash_alg;
4431 if (hash_alg > ctx->xbridge->support.max_hash_alg) {
4432 /* Algorithm supported by all datapaths. */
4433 hash_alg = OVS_HASH_ALG_L4;
4434 }
4435 ctx_trigger_recirculate_with_hash(ctx, hash_alg, group->hash_basis);
4436 return NULL;
4437 } else {
4438 uint32_t hash_mask = group->hash_mask;
4439 ctx->wc->masks.dp_hash |= hash_mask;
4440
4441 /* Starting from the original masked dp_hash value iterate over the
4442 * hash mapping table to find the first live bucket. As the buckets
4443 * are quasi-randomly spread over the hash values, this maintains
4444 * a distribution according to bucket weights even when some buckets
4445 * are non-live. */
4446 for (int i = 0; i <= hash_mask; i++) {
4447 struct ofputil_bucket *b =
4448 group->hash_map[(dp_hash + i) & hash_mask];
4449 if (bucket_is_alive(ctx, b, 0)) {
4450 return b;
4451 }
4452 }
4453
4454 return NULL;
4455 }
4456 }
4457
4458 static struct ofputil_bucket *
4459 pick_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
4460 {
4461 /* Select groups may access flow keys beyond L2 in order to
4462 * select a bucket. Recirculate as appropriate to make this possible.
4463 */
4464 if (ctx->was_mpls) {
4465 ctx_trigger_freeze(ctx);
4466 }
4467
4468 switch (group->selection_method) {
4469 case SEL_METHOD_DEFAULT:
4470 return pick_default_select_group(ctx, group);
4471 break;
4472 case SEL_METHOD_HASH:
4473 return pick_hash_fields_select_group(ctx, group);
4474 break;
4475 case SEL_METHOD_DP_HASH:
4476 return pick_dp_hash_select_group(ctx, group);
4477 break;
4478 default:
4479 /* Parsing of groups ensures this never happens */
4480 OVS_NOT_REACHED();
4481 }
4482
4483 return NULL;
4484 }
4485
4486 static void
4487 xlate_group_action__(struct xlate_ctx *ctx, struct group_dpif *group,
4488 bool is_last_action)
4489 {
4490 if (group->up.type == OFPGT11_ALL || group->up.type == OFPGT11_INDIRECT) {
4491 struct ovs_list *last_bucket = ovs_list_back(&group->up.buckets);
4492 struct ofputil_bucket *bucket;
4493 LIST_FOR_EACH (bucket, list_node, &group->up.buckets) {
4494 bool is_last_bucket = &bucket->list_node == last_bucket;
4495 xlate_group_bucket(ctx, bucket, is_last_action && is_last_bucket);
4496 }
4497 xlate_group_stats(ctx, group, NULL);
4498 } else {
4499 struct ofputil_bucket *bucket;
4500 if (group->up.type == OFPGT11_SELECT) {
4501 bucket = pick_select_group(ctx, group);
4502 } else if (group->up.type == OFPGT11_FF) {
4503 bucket = pick_ff_group(ctx, group);
4504 } else {
4505 OVS_NOT_REACHED();
4506 }
4507
4508 if (bucket) {
4509 xlate_report(ctx, OFT_DETAIL, "using bucket %"PRIu32,
4510 bucket->bucket_id);
4511 xlate_group_bucket(ctx, bucket, is_last_action);
4512 xlate_group_stats(ctx, group, bucket);
4513 } else {
4514 xlate_report(ctx, OFT_DETAIL, "no live bucket");
4515 if (ctx->xin->xcache) {
4516 ofproto_group_unref(&group->up);
4517 }
4518 }
4519 }
4520 }
4521
4522 static bool
4523 xlate_group_action(struct xlate_ctx *ctx, uint32_t group_id,
4524 bool is_last_action)
4525 {
4526 if (xlate_resubmit_resource_check(ctx)) {
4527 struct group_dpif *group;
4528
4529 /* Take ref only if xcache exists. */
4530 group = group_dpif_lookup(ctx->xbridge->ofproto, group_id,
4531 ctx->xin->tables_version, ctx->xin->xcache);
4532 if (!group) {
4533 /* XXX: Should set ctx->error ? */
4534 xlate_report(ctx, OFT_WARN, "output to nonexistent group %"PRIu32,
4535 group_id);
4536 return true;
4537 }
4538 xlate_group_action__(ctx, group, is_last_action);
4539 }
4540
4541 return false;
4542 }
4543
4544 static void
4545 xlate_ofpact_resubmit(struct xlate_ctx *ctx,
4546 const struct ofpact_resubmit *resubmit,
4547 bool is_last_action)
4548 {
4549 ofp_port_t in_port;
4550 uint8_t table_id;
4551 bool may_packet_in = false;
4552 bool honor_table_miss = false;
4553
4554 if (ctx->rule && rule_dpif_is_internal(ctx->rule)) {
4555 /* Still allow missed packets to be sent to the controller
4556 * if resubmitting from an internal table. */
4557 may_packet_in = true;
4558 honor_table_miss = true;
4559 }
4560
4561 in_port = resubmit->in_port;
4562 if (in_port == OFPP_IN_PORT) {
4563 in_port = ctx->xin->flow.in_port.ofp_port;
4564 }
4565
4566 table_id = resubmit->table_id;
4567 if (table_id == 255) {
4568 table_id = ctx->table_id;
4569 }
4570
4571 xlate_table_action(ctx, in_port, table_id, may_packet_in,
4572 honor_table_miss, resubmit->with_ct_orig,
4573 is_last_action, do_xlate_actions);
4574 }
4575
4576 static void
4577 flood_packet_to_port(struct xlate_ctx *ctx, const struct xport *xport,
4578 bool all, bool is_last_action)
4579 {
4580 if (!xport) {
4581 return;
4582 }
4583
4584 if (all) {
4585 compose_output_action__(ctx, xport->ofp_port, NULL, false,
4586 is_last_action, false);
4587 } else {
4588 compose_output_action(ctx, xport->ofp_port, NULL, is_last_action,
4589 false);
4590 }
4591 }
4592
4593 static void
4594 flood_packets(struct xlate_ctx *ctx, bool all, bool is_last_action)
4595 {
4596 const struct xport *xport, *last = NULL;
4597
4598 /* Use 'last' the keep track of the last output port. */
4599 HMAP_FOR_EACH (xport, ofp_node, &ctx->xbridge->xports) {
4600 if (xport->ofp_port == ctx->xin->flow.in_port.ofp_port) {
4601 continue;
4602 }
4603
4604 if (all || !(xport->config & OFPUTIL_PC_NO_FLOOD)) {
4605 /* 'last' is not the last port, send a packet out, and
4606 * update 'last'. */
4607 flood_packet_to_port(ctx, last, all, false);
4608 last = xport;
4609 }
4610 }
4611
4612 /* Send the packet to the 'last' port. */
4613 flood_packet_to_port(ctx, last, all, is_last_action);
4614 ctx->nf_output_iface = NF_OUT_FLOOD;
4615 }
4616
4617 static void
4618 put_controller_user_action(struct xlate_ctx *ctx,
4619 bool dont_send, bool continuation,
4620 uint32_t recirc_id, int len,
4621 enum ofp_packet_in_reason reason,
4622 uint16_t controller_id)
4623 {
4624 struct user_action_cookie cookie;
4625
4626 memset(&cookie, 0, sizeof cookie);
4627 cookie.type = USER_ACTION_COOKIE_CONTROLLER;
4628 cookie.ofp_in_port = OFPP_NONE,
4629 cookie.ofproto_uuid = ctx->xbridge->ofproto->uuid;
4630 cookie.controller.dont_send = dont_send;
4631 cookie.controller.continuation = continuation;
4632 cookie.controller.reason = reason;
4633 cookie.controller.recirc_id = recirc_id;
4634 put_32aligned_be64(&cookie.controller.rule_cookie, ctx->rule_cookie);
4635 cookie.controller.controller_id = controller_id;
4636 cookie.controller.max_len = len;
4637
4638 odp_port_t odp_port = ofp_port_to_odp_port(ctx->xbridge,
4639 ctx->xin->flow.in_port.ofp_port);
4640 uint32_t pid = dpif_port_get_pid(ctx->xbridge->dpif, odp_port,
4641 flow_hash_5tuple(&ctx->xin->flow, 0));
4642 odp_put_userspace_action(pid, &cookie, sizeof cookie, ODPP_NONE,
4643 false, ctx->odp_actions);
4644 }
4645
4646 static void
4647 xlate_controller_action(struct xlate_ctx *ctx, int len,
4648 enum ofp_packet_in_reason reason,
4649 uint16_t controller_id,
4650 uint32_t provider_meter_id,
4651 const uint8_t *userdata, size_t userdata_len)
4652 {
4653 xlate_commit_actions(ctx);
4654
4655 /* A packet sent by an action in a table-miss rule is considered an
4656 * explicit table miss. OpenFlow before 1.3 doesn't have that concept so
4657 * it will get translated back to OFPR_ACTION for those versions. */
4658 if (reason == OFPR_ACTION
4659 && ctx->rule && rule_is_table_miss(&ctx->rule->up)) {
4660 reason = OFPR_EXPLICIT_MISS;
4661 }
4662
4663 struct frozen_state state = {
4664 .table_id = ctx->table_id,
4665 .ofproto_uuid = ctx->xbridge->ofproto->uuid,
4666 .stack = ctx->stack.data,
4667 .stack_size = ctx->stack.size,
4668 .mirrors = ctx->mirrors,
4669 .conntracked = ctx->conntracked,
4670 .ofpacts = NULL,
4671 .ofpacts_len = 0,
4672 .action_set = NULL,
4673 .action_set_len = 0,
4674 .userdata = CONST_CAST(uint8_t *, userdata),
4675 .userdata_len = userdata_len,
4676 };
4677 frozen_metadata_from_flow(&state.metadata, &ctx->xin->flow);
4678
4679 uint32_t recirc_id = recirc_alloc_id_ctx(&state);
4680 if (!recirc_id) {
4681 xlate_report_error(ctx, "Failed to allocate recirculation id");
4682 ctx->error = XLATE_NO_RECIRCULATION_CONTEXT;
4683 return;
4684 }
4685 recirc_refs_add(&ctx->xout->recircs, recirc_id);
4686
4687 /* If the controller action didn't request a meter (indicated by a
4688 * 'meter_id' argument other than NX_CTLR_NO_METER), see if one was
4689 * configured through the "controller" virtual meter.
4690 *
4691 * Internally, ovs-vswitchd uses UINT32_MAX to indicate no meter is
4692 * configured. */
4693 uint32_t meter_id;
4694 if (provider_meter_id == UINT32_MAX) {
4695 meter_id = ctx->xbridge->ofproto->up.controller_meter_id;
4696 } else {
4697 meter_id = provider_meter_id;
4698 }
4699
4700 size_t offset;
4701 size_t ac_offset;
4702 if (meter_id != UINT32_MAX) {
4703 /* If controller meter is configured, generate clone(meter, userspace)
4704 * action. */
4705 offset = nl_msg_start_nested(ctx->odp_actions, OVS_ACTION_ATTR_SAMPLE);
4706 nl_msg_put_u32(ctx->odp_actions, OVS_SAMPLE_ATTR_PROBABILITY,
4707 UINT32_MAX);
4708 ac_offset = nl_msg_start_nested(ctx->odp_actions,
4709 OVS_SAMPLE_ATTR_ACTIONS);
4710 nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_METER, meter_id);
4711 }
4712
4713 /* Generate the datapath flows even if we don't send the packet-in
4714 * so that debugging more closely represents normal state. */
4715 bool dont_send = false;
4716 if (!ctx->xin->allow_side_effects && !ctx->xin->xcache) {
4717 dont_send = true;
4718 }
4719 put_controller_user_action(ctx, dont_send, false, recirc_id, len,
4720 reason, controller_id);
4721
4722 if (meter_id != UINT32_MAX) {
4723 nl_msg_end_nested(ctx->odp_actions, ac_offset);
4724 nl_msg_end_nested(ctx->odp_actions, offset);
4725 }
4726 }
4727
4728 /* Creates a frozen state, and allocates a unique recirc id for the given
4729 * state. Returns a non-zero recirc id if it is allocated successfully.
4730 * Returns 0 otherwise.
4731 **/
4732 static uint32_t
4733 finish_freezing__(struct xlate_ctx *ctx, uint8_t table)
4734 {
4735 ovs_assert(ctx->freezing);
4736
4737 struct frozen_state state = {
4738 .table_id = table,
4739 .ofproto_uuid = ctx->xbridge->ofproto->uuid,
4740 .stack = ctx->stack.data,
4741 .stack_size = ctx->stack.size,
4742 .mirrors = ctx->mirrors,
4743 .conntracked = ctx->conntracked,
4744 .xport_uuid = ctx->xin->xport_uuid,
4745 .ofpacts = ctx->frozen_actions.data,
4746 .ofpacts_len = ctx->frozen_actions.size,
4747 .action_set = ctx->action_set.data,
4748 .action_set_len = ctx->action_set.size,
4749 .userdata = ctx->pause ? CONST_CAST(uint8_t *,ctx->pause->userdata)
4750 : NULL,
4751 .userdata_len = ctx->pause ? ctx->pause->userdata_len : 0,
4752 };
4753 frozen_metadata_from_flow(&state.metadata, &ctx->xin->flow);
4754
4755 /* Allocate a unique recirc id for the given metadata state in the
4756 * flow. An existing id, with a new reference to the corresponding
4757 * recirculation context, will be returned if possible.
4758 * The life-cycle of this recirc id is managed by associating it
4759 * with the udpif key ('ukey') created for each new datapath flow. */
4760 uint32_t recirc_id = recirc_alloc_id_ctx(&state);
4761 if (!recirc_id) {
4762 xlate_report_error(ctx, "Failed to allocate recirculation id");
4763 ctx->error = XLATE_NO_RECIRCULATION_CONTEXT;
4764 return 0;
4765 }
4766 recirc_refs_add(&ctx->xout->recircs, recirc_id);
4767
4768 if (ctx->pause) {
4769 if (!ctx->xin->allow_side_effects && !ctx->xin->xcache) {
4770 return 0;
4771 }
4772
4773 put_controller_user_action(ctx, false, true, recirc_id,
4774 ctx->pause->max_len,
4775 ctx->pause->reason,
4776 ctx->pause->controller_id);
4777 } else {
4778 if (ctx->recirc_update_dp_hash) {
4779 struct ovs_action_hash *act_hash;
4780
4781 /* Hash action. */
4782 act_hash = nl_msg_put_unspec_uninit(ctx->odp_actions,
4783 OVS_ACTION_ATTR_HASH,
4784 sizeof *act_hash);
4785 act_hash->hash_alg = ctx->dp_hash_alg;
4786 act_hash->hash_basis = ctx->dp_hash_basis;
4787 }
4788 nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC, recirc_id);
4789 }
4790
4791 /* Undo changes done by freezing. */
4792 ctx_cancel_freeze(ctx);
4793 return recirc_id;
4794 }
4795
4796 /* Called only when we're freezing. */
4797 static void
4798 finish_freezing(struct xlate_ctx *ctx)
4799 {
4800 xlate_commit_actions(ctx);
4801 finish_freezing__(ctx, 0);
4802 }
4803
4804 /* Fork the pipeline here. The current packet will continue processing the
4805 * current action list. A clone of the current packet will recirculate, skip
4806 * the remainder of the current action list and asynchronously resume pipeline
4807 * processing in 'table' with the current metadata and action set. */
4808 static void
4809 compose_recirculate_and_fork(struct xlate_ctx *ctx, uint8_t table,
4810 const uint16_t zone)
4811 {
4812 uint32_t recirc_id;
4813 ctx->freezing = true;
4814 recirc_id = finish_freezing__(ctx, table);
4815
4816 if (OVS_UNLIKELY(ctx->xin->trace) && recirc_id) {
4817 if (oftrace_add_recirc_node(ctx->xin->recirc_queue,
4818 OFT_RECIRC_CONNTRACK, &ctx->xin->flow,
4819 ctx->xin->packet, recirc_id, zone)) {
4820 xlate_report(ctx, OFT_DETAIL, "A clone of the packet is forked to "
4821 "recirculate. The forked pipeline will be resumed at "
4822 "table %u.", table);
4823 } else {
4824 xlate_report(ctx, OFT_DETAIL, "Failed to trace the conntrack "
4825 "forked pipeline with recirc_id = %d.", recirc_id);
4826 }
4827 }
4828 }
4829
4830 static void
4831 compose_mpls_push_action(struct xlate_ctx *ctx, struct ofpact_push_mpls *mpls)
4832 {
4833 struct flow *flow = &ctx->xin->flow;
4834 int n;
4835
4836 ovs_assert(eth_type_mpls(mpls->ethertype));
4837
4838 n = flow_count_mpls_labels(flow, ctx->wc);
4839 if (!n) {
4840 xlate_commit_actions(ctx);
4841 } else if (n >= FLOW_MAX_MPLS_LABELS) {
4842 if (ctx->xin->packet != NULL) {
4843 xlate_report_error(ctx, "dropping packet on which an MPLS push "
4844 "action can't be performed as it would have "
4845 "more MPLS LSEs than the %d supported.",
4846 FLOW_MAX_MPLS_LABELS);
4847 }
4848 ctx->error = XLATE_TOO_MANY_MPLS_LABELS;
4849 return;
4850 }
4851
4852 /* Update flow's MPLS stack, and clear L3/4 fields to mark them invalid. */
4853 flow_push_mpls(flow, n, mpls->ethertype, ctx->wc, true);
4854 }
4855
4856 static void
4857 compose_mpls_pop_action(struct xlate_ctx *ctx, ovs_be16 eth_type)
4858 {
4859 struct flow *flow = &ctx->xin->flow;
4860 int n = flow_count_mpls_labels(flow, ctx->wc);
4861
4862 if (flow_pop_mpls(flow, n, eth_type, ctx->wc)) {
4863 if (!eth_type_mpls(eth_type) && ctx->xbridge->support.odp.recirc) {
4864 ctx->was_mpls = true;
4865 }
4866 } else if (n >= FLOW_MAX_MPLS_LABELS) {
4867 if (ctx->xin->packet != NULL) {
4868 xlate_report_error(ctx, "dropping packet on which an "
4869 "MPLS pop action can't be performed as it has "
4870 "more MPLS LSEs than the %d supported.",
4871 FLOW_MAX_MPLS_LABELS);
4872 }
4873 ctx->error = XLATE_TOO_MANY_MPLS_LABELS;
4874 ofpbuf_clear(ctx->odp_actions);
4875 }
4876 }
4877
4878 static bool
4879 compose_dec_ttl(struct xlate_ctx *ctx, struct ofpact_cnt_ids *ids)
4880 {
4881 struct flow *flow = &ctx->xin->flow;
4882
4883 if (!is_ip_any(flow)) {
4884 return false;
4885 }
4886
4887 ctx->wc->masks.nw_ttl = 0xff;
4888 if (flow->nw_ttl > 1) {
4889 flow->nw_ttl--;
4890 return false;
4891 } else {
4892 size_t i;
4893
4894 for (i = 0; i < ids->n_controllers; i++) {
4895 xlate_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL,
4896 ids->cnt_ids[i], UINT32_MAX, NULL, 0);
4897 }
4898
4899 /* Stop processing for current table. */
4900 xlate_report(ctx, OFT_WARN, "IPv%d decrement TTL exception",
4901 flow->dl_type == htons(ETH_TYPE_IP) ? 4 : 6);
4902 return true;
4903 }
4904 }
4905
4906 static void
4907 compose_set_mpls_label_action(struct xlate_ctx *ctx, ovs_be32 label)
4908 {
4909 if (eth_type_mpls(ctx->xin->flow.dl_type)) {
4910 ctx->wc->masks.mpls_lse[0] |= htonl(MPLS_LABEL_MASK);
4911 set_mpls_lse_label(&ctx->xin->flow.mpls_lse[0], label);
4912 }
4913 }
4914
4915 static void
4916 compose_set_mpls_tc_action(struct xlate_ctx *ctx, uint8_t tc)
4917 {
4918 if (eth_type_mpls(ctx->xin->flow.dl_type)) {
4919 ctx->wc->masks.mpls_lse[0] |= htonl(MPLS_TC_MASK);
4920 set_mpls_lse_tc(&ctx->xin->flow.mpls_lse[0], tc);
4921 }
4922 }
4923
4924 static bool
4925 compose_dec_nsh_ttl_action(struct xlate_ctx *ctx)
4926 {
4927 struct flow *flow = &ctx->xin->flow;
4928
4929 if ((flow->packet_type == htonl(PT_NSH)) ||
4930 (flow->dl_type == htons(ETH_TYPE_NSH))) {
4931 ctx->wc->masks.nsh.ttl = 0xff;
4932 if (flow->nsh.ttl > 1) {
4933 flow->nsh.ttl--;
4934 return false;
4935 } else {
4936 xlate_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL,
4937 0, UINT32_MAX, NULL, 0);
4938 }
4939 }
4940
4941 /* Stop processing for current table. */
4942 xlate_report(ctx, OFT_WARN, "NSH decrement TTL exception");
4943 return true;
4944 }
4945
4946 static void
4947 compose_set_mpls_ttl_action(struct xlate_ctx *ctx, uint8_t ttl)
4948 {
4949 if (eth_type_mpls(ctx->xin->flow.dl_type)) {
4950 ctx->wc->masks.mpls_lse[0] |= htonl(MPLS_TTL_MASK);
4951 set_mpls_lse_ttl(&ctx->xin->flow.mpls_lse[0], ttl);
4952 }
4953 }
4954
4955 static bool
4956 compose_dec_mpls_ttl_action(struct xlate_ctx *ctx)
4957 {
4958 struct flow *flow = &ctx->xin->flow;
4959
4960 if (eth_type_mpls(flow->dl_type)) {
4961 uint8_t ttl = mpls_lse_to_ttl(flow->mpls_lse[0]);
4962
4963 ctx->wc->masks.mpls_lse[0] |= htonl(MPLS_TTL_MASK);
4964 if (ttl > 1) {
4965 ttl--;
4966 set_mpls_lse_ttl(&flow->mpls_lse[0], ttl);
4967 return false;
4968 } else {
4969 xlate_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL, 0,
4970 UINT32_MAX, NULL, 0);
4971 }
4972 }
4973
4974 /* Stop processing for current table. */
4975 xlate_report(ctx, OFT_WARN, "MPLS decrement TTL exception");
4976 return true;
4977 }
4978
4979 /* Emits an action that outputs to 'port', within 'ctx'.
4980 *
4981 * 'controller_len' affects only packets sent to an OpenFlow controller. It
4982 * is the maximum number of bytes of the packet to send. UINT16_MAX means to
4983 * send the whole packet (and 0 means to omit the packet entirely).
4984 *
4985 * 'may_packet_in' determines whether the packet may be sent to an OpenFlow
4986 * controller. If it is false, then the packet is never sent to the OpenFlow
4987 * controller.
4988 *
4989 * 'is_last_action' should be true if this output is the last OpenFlow action
4990 * to be processed, which enables certain optimizations.
4991 *
4992 * 'truncate' should be true if the packet to be output is being truncated,
4993 * which suppresses certain optimizations. */
4994 static void
4995 xlate_output_action(struct xlate_ctx *ctx, ofp_port_t port,
4996 uint16_t controller_len, bool may_packet_in,
4997 bool is_last_action, bool truncate,
4998 bool group_bucket_action)
4999 {
5000 ofp_port_t prev_nf_output_iface = ctx->nf_output_iface;
5001
5002 ctx->nf_output_iface = NF_OUT_DROP;
5003
5004 switch (port) {
5005 case OFPP_IN_PORT:
5006 compose_output_action(ctx, ctx->xin->flow.in_port.ofp_port, NULL,
5007 is_last_action, truncate);
5008 break;
5009 case OFPP_TABLE:
5010 xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port,
5011 0, may_packet_in, true, false, false,
5012 do_xlate_actions);
5013 break;
5014 case OFPP_NORMAL:
5015 xlate_normal(ctx);
5016 break;
5017 case OFPP_FLOOD:
5018 flood_packets(ctx, false, is_last_action);
5019 break;
5020 case OFPP_ALL:
5021 flood_packets(ctx, true, is_last_action);
5022 break;
5023 case OFPP_CONTROLLER:
5024 xlate_controller_action(ctx, controller_len,
5025 (ctx->in_packet_out ? OFPR_PACKET_OUT
5026 : group_bucket_action ? OFPR_GROUP
5027 : ctx->in_action_set ? OFPR_ACTION_SET
5028 : OFPR_ACTION),
5029 0, UINT32_MAX, NULL, 0);
5030 break;
5031 case OFPP_NONE:
5032 break;
5033 case OFPP_LOCAL:
5034 default:
5035 if (port != ctx->xin->flow.in_port.ofp_port) {
5036 compose_output_action(ctx, port, NULL, is_last_action, truncate);
5037 } else {
5038 xlate_report_info(ctx, "skipping output to input port");
5039 }
5040 break;
5041 }
5042
5043 if (prev_nf_output_iface == NF_OUT_FLOOD) {
5044 ctx->nf_output_iface = NF_OUT_FLOOD;
5045 } else if (ctx->nf_output_iface == NF_OUT_DROP) {
5046 ctx->nf_output_iface = prev_nf_output_iface;
5047 } else if (prev_nf_output_iface != NF_OUT_DROP &&
5048 ctx->nf_output_iface != NF_OUT_FLOOD) {
5049 ctx->nf_output_iface = NF_OUT_MULTI;
5050 }
5051 }
5052
5053 static void
5054 xlate_output_reg_action(struct xlate_ctx *ctx,
5055 const struct ofpact_output_reg *or,
5056 bool is_last_action,
5057 bool group_bucket_action)
5058 {
5059 uint64_t port = mf_get_subfield(&or->src, &ctx->xin->flow);
5060 if (port <= UINT16_MAX) {
5061 xlate_report(ctx, OFT_DETAIL, "output port is %"PRIu64, port);
5062
5063 union mf_subvalue value;
5064
5065 memset(&value, 0xff, sizeof value);
5066 mf_write_subfield_flow(&or->src, &value, &ctx->wc->masks);
5067 xlate_output_action(ctx, u16_to_ofp(port), or->max_len,
5068 false, is_last_action, false,
5069 group_bucket_action);
5070 } else {
5071 xlate_report(ctx, OFT_WARN, "output port %"PRIu64" is out of range",
5072 port);
5073 }
5074 }
5075
5076 static void
5077 xlate_output_trunc_action(struct xlate_ctx *ctx,
5078 ofp_port_t port, uint32_t max_len,
5079 bool is_last_action,
5080 bool group_bucket_action)
5081 {
5082 bool support_trunc = ctx->xbridge->support.trunc;
5083 struct ovs_action_trunc *trunc;
5084 char name[OFP10_MAX_PORT_NAME_LEN];
5085
5086 switch (port) {
5087 case OFPP_TABLE:
5088 case OFPP_NORMAL:
5089 case OFPP_FLOOD:
5090 case OFPP_ALL:
5091 case OFPP_CONTROLLER:
5092 case OFPP_NONE:
5093 ofputil_port_to_string(port, NULL, name, sizeof name);
5094 xlate_report(ctx, OFT_WARN,
5095 "output_trunc does not support port: %s", name);
5096 break;
5097 case OFPP_LOCAL:
5098 case OFPP_IN_PORT:
5099 default:
5100 if (port != ctx->xin->flow.in_port.ofp_port) {
5101 const struct xport *xport = get_ofp_port(ctx->xbridge, port);
5102
5103 if (xport == NULL || xport->odp_port == ODPP_NONE) {
5104 /* Since truncate happens at its following output action, if
5105 * the output port is a patch port, the behavior is somehow
5106 * unpredictable. For simplicity, disallow this case. */
5107 ofputil_port_to_string(port, NULL, name, sizeof name);
5108 xlate_report_error(ctx, "output_trunc does not support "
5109 "patch port %s", name);
5110 break;
5111 }
5112
5113 trunc = nl_msg_put_unspec_uninit(ctx->odp_actions,
5114 OVS_ACTION_ATTR_TRUNC,
5115 sizeof *trunc);
5116 trunc->max_len = max_len;
5117 xlate_output_action(ctx, port, 0, false, is_last_action, true,
5118 group_bucket_action);
5119 if (!support_trunc) {
5120 ctx->xout->slow |= SLOW_ACTION;
5121 }
5122 } else {
5123 xlate_report_info(ctx, "skipping output to input port");
5124 }
5125 break;
5126 }
5127 }
5128
5129 static void
5130 xlate_enqueue_action(struct xlate_ctx *ctx,
5131 const struct ofpact_enqueue *enqueue,
5132 bool is_last_action,
5133 bool group_bucket_action)
5134 {
5135 ofp_port_t ofp_port = enqueue->port;
5136 uint32_t queue_id = enqueue->queue;
5137 uint32_t flow_priority, priority;
5138 int error;
5139
5140 /* Translate queue to priority. */
5141 error = dpif_queue_to_priority(ctx->xbridge->dpif, queue_id, &priority);
5142 if (error) {
5143 /* Fall back to ordinary output action. */
5144 xlate_output_action(ctx, enqueue->port, 0, false,
5145 is_last_action, false,
5146 group_bucket_action);
5147 return;
5148 }
5149
5150 /* Check output port. */
5151 if (ofp_port == OFPP_IN_PORT) {
5152 ofp_port = ctx->xin->flow.in_port.ofp_port;
5153 } else if (ofp_port == ctx->xin->flow.in_port.ofp_port) {
5154 return;
5155 }
5156
5157 /* Add datapath actions. */
5158 flow_priority = ctx->xin->flow.skb_priority;
5159 ctx->xin->flow.skb_priority = priority;
5160 compose_output_action(ctx, ofp_port, NULL, is_last_action, false);
5161 ctx->xin->flow.skb_priority = flow_priority;
5162
5163 /* Update NetFlow output port. */
5164 if (ctx->nf_output_iface == NF_OUT_DROP) {
5165 ctx->nf_output_iface = ofp_port;
5166 } else if (ctx->nf_output_iface != NF_OUT_FLOOD) {
5167 ctx->nf_output_iface = NF_OUT_MULTI;
5168 }
5169 }
5170
5171 static void
5172 xlate_set_queue_action(struct xlate_ctx *ctx, uint32_t queue_id)
5173 {
5174 uint32_t skb_priority;
5175
5176 if (!dpif_queue_to_priority(ctx->xbridge->dpif, queue_id, &skb_priority)) {
5177 ctx->xin->flow.skb_priority = skb_priority;
5178 } else {
5179 /* Couldn't translate queue to a priority. Nothing to do. A warning
5180 * has already been logged. */
5181 }
5182 }
5183
5184 static bool
5185 slave_enabled_cb(ofp_port_t ofp_port, void *xbridge_)
5186 {
5187 const struct xbridge *xbridge = xbridge_;
5188 struct xport *port;
5189
5190 switch (ofp_port) {
5191 case OFPP_IN_PORT:
5192 case OFPP_TABLE:
5193 case OFPP_NORMAL:
5194 case OFPP_FLOOD:
5195 case OFPP_ALL:
5196 case OFPP_NONE:
5197 return true;
5198 case OFPP_CONTROLLER: /* Not supported by the bundle action. */
5199 return false;
5200 default:
5201 port = get_ofp_port(xbridge, ofp_port);
5202 return port ? port->may_enable : false;
5203 }
5204 }
5205
5206 static void
5207 xlate_bundle_action(struct xlate_ctx *ctx,
5208 const struct ofpact_bundle *bundle,
5209 bool is_last_action,
5210 bool group_bucket_action)
5211 {
5212 ofp_port_t port;
5213
5214 port = bundle_execute(bundle, &ctx->xin->flow, ctx->wc, slave_enabled_cb,
5215 CONST_CAST(struct xbridge *, ctx->xbridge));
5216 if (bundle->dst.field) {
5217 nxm_reg_load(&bundle->dst, ofp_to_u16(port), &ctx->xin->flow, ctx->wc);
5218 xlate_report_subfield(ctx, &bundle->dst);
5219 } else {
5220 xlate_output_action(ctx, port, 0, false, is_last_action, false,
5221 group_bucket_action);
5222 }
5223 }
5224
5225 static void
5226 xlate_learn_action(struct xlate_ctx *ctx, const struct ofpact_learn *learn)
5227 {
5228 learn_mask(learn, ctx->wc);
5229
5230 if (ctx->xin->xcache || ctx->xin->allow_side_effects) {
5231 uint64_t ofpacts_stub[1024 / 8];
5232 struct ofputil_flow_mod fm;
5233 struct ofproto_flow_mod ofm__, *ofm;
5234 struct ofpbuf ofpacts;
5235 enum ofperr error;
5236
5237 if (ctx->xin->xcache) {
5238 ofm = xmalloc(sizeof *ofm);
5239 } else {
5240 ofm = &ofm__;
5241 }
5242
5243 ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
5244 learn_execute(learn, &ctx->xin->flow, &fm, &ofpacts);
5245 if (OVS_UNLIKELY(ctx->xin->trace)) {
5246 struct ds s = DS_EMPTY_INITIALIZER;
5247 ds_put_format(&s, "table=%"PRIu8" ", fm.table_id);
5248 minimatch_format(&fm.match,
5249 ofproto_get_tun_tab(&ctx->xin->ofproto->up),
5250 NULL, &s, OFP_DEFAULT_PRIORITY);
5251 ds_chomp(&s, ' ');
5252 ds_put_format(&s, " priority=%d", fm.priority);
5253 if (fm.new_cookie) {
5254 ds_put_format(&s, " cookie=%#"PRIx64, ntohll(fm.new_cookie));
5255 }
5256 if (fm.idle_timeout != OFP_FLOW_PERMANENT) {
5257 ds_put_format(&s, " idle=%"PRIu16, fm.idle_timeout);
5258 }
5259 if (fm.hard_timeout != OFP_FLOW_PERMANENT) {
5260 ds_put_format(&s, " hard=%"PRIu16, fm.hard_timeout);
5261 }
5262 if (fm.flags & NX_LEARN_F_SEND_FLOW_REM) {
5263 ds_put_cstr(&s, " send_flow_rem");
5264 }
5265 ds_put_cstr(&s, " actions=");
5266 struct ofpact_format_params fp = { .s = &s };
5267 ofpacts_format(fm.ofpacts, fm.ofpacts_len, &fp);
5268 xlate_report(ctx, OFT_DETAIL, "%s", ds_cstr(&s));
5269 ds_destroy(&s);
5270 }
5271 error = ofproto_dpif_flow_mod_init_for_learn(ctx->xbridge->ofproto,
5272 &fm, ofm);
5273 ofpbuf_uninit(&ofpacts);
5274
5275 if (!error) {
5276 bool success = true;
5277 if (ctx->xin->allow_side_effects) {
5278 error = ofproto_flow_mod_learn(ofm, ctx->xin->xcache != NULL,
5279 learn->limit, &success);
5280 } else if (learn->limit) {
5281 if (!ofm->temp_rule
5282 || ofm->temp_rule->state != RULE_INSERTED) {
5283 /* The learned rule expired and there are no packets, so
5284 * we cannot learn again. Since the translated actions
5285 * depend on the result of learning, we tell the caller
5286 * that there's no point in caching this result. */
5287 ctx->xout->avoid_caching = true;
5288 }
5289 }
5290
5291 if (learn->flags & NX_LEARN_F_WRITE_RESULT) {
5292 nxm_reg_load(&learn->result_dst, success ? 1 : 0,
5293 &ctx->xin->flow, ctx->wc);
5294 xlate_report_subfield(ctx, &learn->result_dst);
5295 }
5296
5297 if (success && ctx->xin->xcache) {
5298 struct xc_entry *entry;
5299
5300 entry = xlate_cache_add_entry(ctx->xin->xcache, XC_LEARN);
5301 entry->learn.ofm = ofm;
5302 entry->learn.limit = learn->limit;
5303 ofm = NULL;
5304 } else {
5305 ofproto_flow_mod_uninit(ofm);
5306 }
5307
5308 if (OVS_UNLIKELY(ctx->xin->trace && !success)) {
5309 xlate_report(ctx, OFT_DETAIL, "Limit exceeded, learn failed");
5310 }
5311 }
5312
5313 if (ofm != &ofm__) {
5314 free(ofm);
5315 }
5316
5317 if (error) {
5318 xlate_report_error(ctx, "LEARN action execution failed (%s).",
5319 ofperr_to_string(error));
5320 }
5321
5322 minimatch_destroy(&fm.match);
5323 } else {
5324 xlate_report(ctx, OFT_WARN,
5325 "suppressing side effects, so learn action ignored");
5326 }
5327 }
5328
5329 static void
5330 xlate_fin_timeout__(struct rule_dpif *rule, uint16_t tcp_flags,
5331 uint16_t idle_timeout, uint16_t hard_timeout)
5332 {
5333 if (tcp_flags & (TCP_FIN | TCP_RST)) {
5334 ofproto_rule_reduce_timeouts(&rule->up, idle_timeout, hard_timeout);
5335 }
5336 }
5337
5338 static void
5339 xlate_fin_timeout(struct xlate_ctx *ctx,
5340 const struct ofpact_fin_timeout *oft)
5341 {
5342 if (ctx->rule) {
5343 if (ctx->xin->allow_side_effects) {
5344 xlate_fin_timeout__(ctx->rule, ctx->xin->tcp_flags,
5345 oft->fin_idle_timeout, oft->fin_hard_timeout);
5346 }
5347 if (ctx->xin->xcache) {
5348 struct xc_entry *entry;
5349
5350 entry = xlate_cache_add_entry(ctx->xin->xcache, XC_FIN_TIMEOUT);
5351 /* XC_RULE already holds a reference on the rule, none is taken
5352 * here. */
5353 entry->fin.rule = ctx->rule;
5354 entry->fin.idle = oft->fin_idle_timeout;
5355 entry->fin.hard = oft->fin_hard_timeout;
5356 }
5357 }
5358 }
5359
5360 static void
5361 xlate_sample_action(struct xlate_ctx *ctx,
5362 const struct ofpact_sample *os)
5363 {
5364 odp_port_t output_odp_port = ODPP_NONE;
5365 odp_port_t tunnel_out_port = ODPP_NONE;
5366 struct dpif_ipfix *ipfix = ctx->xbridge->ipfix;
5367 bool emit_set_tunnel = false;
5368
5369 if (!ipfix || ctx->xin->flow.in_port.ofp_port == OFPP_NONE) {
5370 return;
5371 }
5372
5373 /* Scale the probability from 16-bit to 32-bit while representing
5374 * the same percentage. */
5375 uint32_t probability = (os->probability << 16) | os->probability;
5376
5377 /* If ofp_port in flow sample action is equel to ofp_port,
5378 * this sample action is a input port action. */
5379 if (os->sampling_port != OFPP_NONE &&
5380 os->sampling_port != ctx->xin->flow.in_port.ofp_port) {
5381 output_odp_port = ofp_port_to_odp_port(ctx->xbridge,
5382 os->sampling_port);
5383 if (output_odp_port == ODPP_NONE) {
5384 xlate_report_error(ctx, "can't use unknown port %d in flow sample "
5385 "action", os->sampling_port);
5386 return;
5387 }
5388
5389 if (dpif_ipfix_get_flow_exporter_tunnel_sampling(ipfix,
5390 os->collector_set_id)
5391 && dpif_ipfix_is_tunnel_port(ipfix, output_odp_port)) {
5392 tunnel_out_port = output_odp_port;
5393 emit_set_tunnel = true;
5394 }
5395 }
5396
5397 xlate_commit_actions(ctx);
5398 /* If 'emit_set_tunnel', sample(sampling_port=1) would translate
5399 * into datapath sample action set(tunnel(...)), sample(...) and
5400 * it is used for sampling egress tunnel information. */
5401 if (emit_set_tunnel) {
5402 const struct xport *xport = get_ofp_port(ctx->xbridge,
5403 os->sampling_port);
5404
5405 if (xport && xport->is_tunnel) {
5406 struct flow *flow = &ctx->xin->flow;
5407 tnl_port_send(xport->ofport, flow, ctx->wc);
5408 if (!ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
5409 struct flow_tnl flow_tnl = flow->tunnel;
5410 const char *tnl_type;
5411
5412 tnl_type = tnl_port_get_type(xport->ofport);
5413 commit_odp_tunnel_action(flow, &ctx->base_flow,
5414 ctx->odp_actions, tnl_type);
5415 flow->tunnel = flow_tnl;
5416 }
5417 } else {
5418 xlate_report_error(ctx,
5419 "sampling_port:%d should be a tunnel port.",
5420 os->sampling_port);
5421 }
5422 }
5423
5424 struct user_action_cookie cookie = {
5425 .type = USER_ACTION_COOKIE_FLOW_SAMPLE,
5426 .ofp_in_port = ctx->xin->flow.in_port.ofp_port,
5427 .ofproto_uuid = ctx->xbridge->ofproto->uuid,
5428 .flow_sample = {
5429 .probability = os->probability,
5430 .collector_set_id = os->collector_set_id,
5431 .obs_domain_id = os->obs_domain_id,
5432 .obs_point_id = os->obs_point_id,
5433 .output_odp_port = output_odp_port,
5434 .direction = os->direction,
5435 }
5436 };
5437 compose_sample_action(ctx, probability, &cookie, tunnel_out_port, false);
5438 }
5439
5440 /* Determine if an datapath action translated from the openflow action
5441 * can be reversed by another datapath action.
5442 *
5443 * Openflow actions that do not emit datapath actions are trivially
5444 * reversible. Reversiblity of other actions depends on nature of
5445 * action and their translation. */
5446 static bool
5447 reversible_actions(const struct ofpact *ofpacts, size_t ofpacts_len)
5448 {
5449 const struct ofpact *a;
5450
5451 OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
5452 switch (a->type) {
5453 case OFPACT_BUNDLE:
5454 case OFPACT_CLEAR_ACTIONS:
5455 case OFPACT_CLONE:
5456 case OFPACT_CONJUNCTION:
5457 case OFPACT_CONTROLLER:
5458 case OFPACT_CT_CLEAR:
5459 case OFPACT_DEBUG_RECIRC:
5460 case OFPACT_DEBUG_SLOW:
5461 case OFPACT_DEC_MPLS_TTL:
5462 case OFPACT_DEC_TTL:
5463 case OFPACT_ENQUEUE:
5464 case OFPACT_EXIT:
5465 case OFPACT_FIN_TIMEOUT:
5466 case OFPACT_GOTO_TABLE:
5467 case OFPACT_GROUP:
5468 case OFPACT_LEARN:
5469 case OFPACT_MULTIPATH:
5470 case OFPACT_NOTE:
5471 case OFPACT_OUTPUT:
5472 case OFPACT_OUTPUT_REG:
5473 case OFPACT_POP_MPLS:
5474 case OFPACT_POP_QUEUE:
5475 case OFPACT_PUSH_MPLS:
5476 case OFPACT_PUSH_VLAN:
5477 case OFPACT_REG_MOVE:
5478 case OFPACT_RESUBMIT:
5479 case OFPACT_SAMPLE:
5480 case OFPACT_SET_ETH_DST:
5481 case OFPACT_SET_ETH_SRC:
5482 case OFPACT_SET_FIELD:
5483 case OFPACT_SET_IP_DSCP:
5484 case OFPACT_SET_IP_ECN:
5485 case OFPACT_SET_IP_TTL:
5486 case OFPACT_SET_IPV4_DST:
5487 case OFPACT_SET_IPV4_SRC:
5488 case OFPACT_SET_L4_DST_PORT:
5489 case OFPACT_SET_L4_SRC_PORT:
5490 case OFPACT_SET_MPLS_LABEL:
5491 case OFPACT_SET_MPLS_TC:
5492 case OFPACT_SET_MPLS_TTL:
5493 case OFPACT_SET_QUEUE:
5494 case OFPACT_SET_TUNNEL:
5495 case OFPACT_SET_VLAN_PCP:
5496 case OFPACT_SET_VLAN_VID:
5497 case OFPACT_STACK_POP:
5498 case OFPACT_STACK_PUSH:
5499 case OFPACT_STRIP_VLAN:
5500 case OFPACT_UNROLL_XLATE:
5501 case OFPACT_WRITE_ACTIONS:
5502 case OFPACT_WRITE_METADATA:
5503 break;
5504
5505 case OFPACT_CT:
5506 case OFPACT_METER:
5507 case OFPACT_NAT:
5508 case OFPACT_OUTPUT_TRUNC:
5509 case OFPACT_ENCAP:
5510 case OFPACT_DECAP:
5511 case OFPACT_DEC_NSH_TTL:
5512 return false;
5513 }
5514 }
5515 return true;
5516 }
5517
5518 static void
5519 clone_xlate_actions(const struct ofpact *actions, size_t actions_len,
5520 struct xlate_ctx *ctx, bool is_last_action,
5521 bool group_bucket_action OVS_UNUSED)
5522 {
5523 struct ofpbuf old_stack = ctx->stack;
5524 union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)];
5525 ofpbuf_use_stub(&ctx->stack, new_stack, sizeof new_stack);
5526 ofpbuf_put(&ctx->stack, old_stack.data, old_stack.size);
5527
5528 struct ofpbuf old_action_set = ctx->action_set;
5529 uint64_t actset_stub[1024 / 8];
5530 ofpbuf_use_stub(&ctx->action_set, actset_stub, sizeof actset_stub);
5531 ofpbuf_put(&ctx->action_set, old_action_set.data, old_action_set.size);
5532
5533 size_t offset, ac_offset;
5534 struct flow old_flow = ctx->xin->flow;
5535
5536 if (reversible_actions(actions, actions_len) || is_last_action) {
5537 old_flow = ctx->xin->flow;
5538 do_xlate_actions(actions, actions_len, ctx, is_last_action, false);
5539 if (!ctx->freezing) {
5540 xlate_action_set(ctx);
5541 }
5542 if (ctx->freezing) {
5543 finish_freezing(ctx);
5544 }
5545 goto xlate_done;
5546 }
5547
5548 /* Commit datapath actions before emitting the clone action to
5549 * avoid emitting those actions twice. Once inside
5550 * the clone, another time for the action after clone. */
5551 xlate_commit_actions(ctx);
5552 struct flow old_base = ctx->base_flow;
5553 bool old_was_mpls = ctx->was_mpls;
5554 bool old_conntracked = ctx->conntracked;
5555
5556 /* The actions are not reversible, a datapath clone action is
5557 * required to encode the translation. Select the clone action
5558 * based on datapath capabilities. */
5559 if (ctx->xbridge->support.clone) { /* Use clone action */
5560 /* Use clone action as datapath clone. */
5561 offset = nl_msg_start_nested(ctx->odp_actions, OVS_ACTION_ATTR_CLONE);
5562 do_xlate_actions(actions, actions_len, ctx, true, false);
5563 if (!ctx->freezing) {
5564 xlate_action_set(ctx);
5565 }
5566 if (ctx->freezing) {
5567 finish_freezing(ctx);
5568 }
5569 nl_msg_end_non_empty_nested(ctx->odp_actions, offset);
5570 goto dp_clone_done;
5571 }
5572
5573 if (ctx->xbridge->support.sample_nesting > 3) {
5574 /* Use sample action as datapath clone. */
5575 offset = nl_msg_start_nested(ctx->odp_actions, OVS_ACTION_ATTR_SAMPLE);
5576 ac_offset = nl_msg_start_nested(ctx->odp_actions,
5577 OVS_SAMPLE_ATTR_ACTIONS);
5578 do_xlate_actions(actions, actions_len, ctx, true, false);
5579 if (!ctx->freezing) {
5580 xlate_action_set(ctx);
5581 }
5582 if (ctx->freezing) {
5583 finish_freezing(ctx);
5584 }
5585 if (nl_msg_end_non_empty_nested(ctx->odp_actions, ac_offset)) {
5586 nl_msg_cancel_nested(ctx->odp_actions, offset);
5587 } else {
5588 nl_msg_put_u32(ctx->odp_actions, OVS_SAMPLE_ATTR_PROBABILITY,
5589 UINT32_MAX); /* 100% probability. */
5590 nl_msg_end_nested(ctx->odp_actions, offset);
5591 }
5592 goto dp_clone_done;
5593 }
5594
5595 /* Datapath does not support clone, skip xlate 'oc' and
5596 * report an error */
5597 xlate_report_error(ctx, "Failed to compose clone action");
5598
5599 dp_clone_done:
5600 /* The clone's conntrack execution should have no effect on the original
5601 * packet. */
5602 ctx->conntracked = old_conntracked;
5603
5604 /* Popping MPLS from the clone should have no effect on the original
5605 * packet. */
5606 ctx->was_mpls = old_was_mpls;
5607
5608 /* Restore the 'base_flow' for the next action. */
5609 ctx->base_flow = old_base;
5610
5611 xlate_done:
5612 ofpbuf_uninit(&ctx->action_set);
5613 ctx->action_set = old_action_set;
5614 ofpbuf_uninit(&ctx->stack);
5615 ctx->stack = old_stack;
5616 ctx->xin->flow = old_flow;
5617 }
5618
5619 static void
5620 compose_clone(struct xlate_ctx *ctx, const struct ofpact_nest *oc,
5621 bool is_last_action)
5622 {
5623 size_t oc_actions_len = ofpact_nest_get_action_len(oc);
5624
5625 clone_xlate_actions(oc->actions, oc_actions_len, ctx, is_last_action,
5626 false);
5627 }
5628
5629 static void
5630 xlate_meter_action(struct xlate_ctx *ctx, const struct ofpact_meter *meter)
5631 {
5632 if (meter->provider_meter_id != UINT32_MAX) {
5633 nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_METER,
5634 meter->provider_meter_id);
5635 }
5636 }
5637
5638 static bool
5639 may_receive(const struct xport *xport, struct xlate_ctx *ctx)
5640 {
5641 if (xport->config & (is_stp(&ctx->xin->flow)
5642 ? OFPUTIL_PC_NO_RECV_STP
5643 : OFPUTIL_PC_NO_RECV)) {
5644 return false;
5645 }
5646
5647 /* Only drop packets here if both forwarding and learning are
5648 * disabled. If just learning is enabled, we need to have
5649 * OFPP_NORMAL and the learning action have a look at the packet
5650 * before we can drop it. */
5651 if ((!xport_stp_forward_state(xport) && !xport_stp_learn_state(xport)) ||
5652 (!xport_rstp_forward_state(xport) && !xport_rstp_learn_state(xport))) {
5653 return false;
5654 }
5655
5656 return true;
5657 }
5658
5659 static void
5660 xlate_write_actions__(struct xlate_ctx *ctx,
5661 const struct ofpact *ofpacts, size_t ofpacts_len)
5662 {
5663 /* Maintain actset_output depending on the contents of the action set:
5664 *
5665 * - OFPP_UNSET, if there is no "output" action.
5666 *
5667 * - The output port, if there is an "output" action and no "group"
5668 * action.
5669 *
5670 * - OFPP_UNSET, if there is a "group" action.
5671 */
5672 if (!ctx->action_set_has_group) {
5673 const struct ofpact *a;
5674 OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
5675 if (a->type == OFPACT_OUTPUT) {
5676 ctx->xin->flow.actset_output = ofpact_get_OUTPUT(a)->port;
5677 } else if (a->type == OFPACT_GROUP) {
5678 ctx->xin->flow.actset_output = OFPP_UNSET;
5679 ctx->action_set_has_group = true;
5680 break;
5681 }
5682 }
5683 }
5684
5685 ofpbuf_put(&ctx->action_set, ofpacts, ofpacts_len);
5686 }
5687
5688 static void
5689 xlate_write_actions(struct xlate_ctx *ctx, const struct ofpact_nest *a)
5690 {
5691 xlate_write_actions__(ctx, a->actions, ofpact_nest_get_action_len(a));
5692 }
5693
5694 static void
5695 xlate_action_set(struct xlate_ctx *ctx)
5696 {
5697 uint64_t action_list_stub[1024 / 8];
5698 struct ofpbuf action_list = OFPBUF_STUB_INITIALIZER(action_list_stub);
5699 ofpacts_execute_action_set(&action_list, &ctx->action_set);
5700 /* Clear the action set, as it is not needed any more. */
5701 ofpbuf_clear(&ctx->action_set);
5702 if (action_list.size) {
5703 ctx->in_action_set = true;
5704
5705 struct ovs_list *old_trace = ctx->xin->trace;
5706 ctx->xin->trace = xlate_report(ctx, OFT_TABLE,
5707 "--. Executing action set:");
5708 do_xlate_actions(action_list.data, action_list.size, ctx, true, false);
5709 ctx->xin->trace = old_trace;
5710
5711 ctx->in_action_set = false;
5712 }
5713 ofpbuf_uninit(&action_list);
5714 }
5715
5716 static void
5717 freeze_put_unroll_xlate(struct xlate_ctx *ctx)
5718 {
5719 struct ofpact_unroll_xlate *unroll = ctx->frozen_actions.header;
5720
5721 /* Restore the table_id and rule cookie for a potential PACKET
5722 * IN if needed. */
5723 if (!unroll ||
5724 (ctx->table_id != unroll->rule_table_id
5725 || ctx->rule_cookie != unroll->rule_cookie)) {
5726 unroll = ofpact_put_UNROLL_XLATE(&ctx->frozen_actions);
5727 unroll->rule_table_id = ctx->table_id;
5728 unroll->rule_cookie = ctx->rule_cookie;
5729 ctx->frozen_actions.header = unroll;
5730 }
5731 }
5732
5733
5734 /* Copy actions 'a' through 'end' to ctx->frozen_actions, which will be
5735 * executed after thawing. Inserts an UNROLL_XLATE action, if none is already
5736 * present, before any action that may depend on the current table ID or flow
5737 * cookie. */
5738 static void
5739 freeze_unroll_actions(const struct ofpact *a, const struct ofpact *end,
5740 struct xlate_ctx *ctx)
5741 {
5742 for (; a < end; a = ofpact_next(a)) {
5743 switch (a->type) {
5744 case OFPACT_OUTPUT_REG:
5745 case OFPACT_OUTPUT_TRUNC:
5746 case OFPACT_GROUP:
5747 case OFPACT_OUTPUT:
5748 case OFPACT_CONTROLLER:
5749 case OFPACT_DEC_MPLS_TTL:
5750 case OFPACT_DEC_NSH_TTL:
5751 case OFPACT_DEC_TTL:
5752 /* These actions may generate asynchronous messages, which include
5753 * table ID and flow cookie information. */
5754 freeze_put_unroll_xlate(ctx);
5755 break;
5756
5757 case OFPACT_RESUBMIT:
5758 if (ofpact_get_RESUBMIT(a)->table_id == 0xff) {
5759 /* This resubmit action is relative to the current table, so we
5760 * need to track what table that is.*/
5761 freeze_put_unroll_xlate(ctx);
5762 }
5763 break;
5764
5765 case OFPACT_SET_TUNNEL:
5766 case OFPACT_REG_MOVE:
5767 case OFPACT_SET_FIELD:
5768 case OFPACT_STACK_PUSH:
5769 case OFPACT_STACK_POP:
5770 case OFPACT_LEARN:
5771 case OFPACT_WRITE_METADATA:
5772 case OFPACT_GOTO_TABLE:
5773 case OFPACT_ENQUEUE:
5774 case OFPACT_SET_VLAN_VID:
5775 case OFPACT_SET_VLAN_PCP:
5776 case OFPACT_STRIP_VLAN:
5777 case OFPACT_PUSH_VLAN:
5778 case OFPACT_SET_ETH_SRC:
5779 case OFPACT_SET_ETH_DST:
5780 case OFPACT_SET_IPV4_SRC:
5781 case OFPACT_SET_IPV4_DST:
5782 case OFPACT_SET_IP_DSCP:
5783 case OFPACT_SET_IP_ECN:
5784 case OFPACT_SET_IP_TTL:
5785 case OFPACT_SET_L4_SRC_PORT:
5786 case OFPACT_SET_L4_DST_PORT:
5787 case OFPACT_SET_QUEUE:
5788 case OFPACT_POP_QUEUE:
5789 case OFPACT_PUSH_MPLS:
5790 case OFPACT_POP_MPLS:
5791 case OFPACT_SET_MPLS_LABEL:
5792 case OFPACT_SET_MPLS_TC:
5793 case OFPACT_SET_MPLS_TTL:
5794 case OFPACT_MULTIPATH:
5795 case OFPACT_BUNDLE:
5796 case OFPACT_EXIT:
5797 case OFPACT_UNROLL_XLATE:
5798 case OFPACT_FIN_TIMEOUT:
5799 case OFPACT_CLEAR_ACTIONS:
5800 case OFPACT_WRITE_ACTIONS:
5801 case OFPACT_METER:
5802 case OFPACT_SAMPLE:
5803 case OFPACT_CLONE:
5804 case OFPACT_ENCAP:
5805 case OFPACT_DECAP:
5806 case OFPACT_DEBUG_RECIRC:
5807 case OFPACT_DEBUG_SLOW:
5808 case OFPACT_CT:
5809 case OFPACT_CT_CLEAR:
5810 case OFPACT_NAT:
5811 /* These may not generate PACKET INs. */
5812 break;
5813
5814 case OFPACT_NOTE:
5815 case OFPACT_CONJUNCTION:
5816 /* These need not be copied for restoration. */
5817 continue;
5818 }
5819 /* Copy the action over. */
5820 ofpbuf_put(&ctx->frozen_actions, a, OFPACT_ALIGN(a->len));
5821 }
5822 }
5823
5824 static void
5825 put_ct_mark(const struct flow *flow, struct ofpbuf *odp_actions,
5826 struct flow_wildcards *wc)
5827 {
5828 if (wc->masks.ct_mark) {
5829 struct {
5830 uint32_t key;
5831 uint32_t mask;
5832 } *odp_ct_mark;
5833
5834 odp_ct_mark = nl_msg_put_unspec_uninit(odp_actions, OVS_CT_ATTR_MARK,
5835 sizeof(*odp_ct_mark));
5836 odp_ct_mark->key = flow->ct_mark & wc->masks.ct_mark;
5837 odp_ct_mark->mask = wc->masks.ct_mark;
5838 }
5839 }
5840
5841 static void
5842 put_ct_label(const struct flow *flow, struct ofpbuf *odp_actions,
5843 struct flow_wildcards *wc)
5844 {
5845 if (!ovs_u128_is_zero(wc->masks.ct_label)) {
5846 struct {
5847 ovs_u128 key;
5848 ovs_u128 mask;
5849 } odp_ct_label;
5850
5851 odp_ct_label.key = ovs_u128_and(flow->ct_label, wc->masks.ct_label);
5852 odp_ct_label.mask = wc->masks.ct_label;
5853 nl_msg_put_unspec(odp_actions, OVS_CT_ATTR_LABELS,
5854 &odp_ct_label, sizeof odp_ct_label);
5855 }
5856 }
5857
5858 static void
5859 put_ct_helper(struct xlate_ctx *ctx,
5860 struct ofpbuf *odp_actions, struct ofpact_conntrack *ofc)
5861 {
5862 if (ofc->alg) {
5863 switch(ofc->alg) {
5864 case IPPORT_FTP:
5865 nl_msg_put_string(odp_actions, OVS_CT_ATTR_HELPER, "ftp");
5866 break;
5867 case IPPORT_TFTP:
5868 nl_msg_put_string(odp_actions, OVS_CT_ATTR_HELPER, "tftp");
5869 break;
5870 default:
5871 xlate_report_error(ctx, "cannot serialize ct_helper %d", ofc->alg);
5872 break;
5873 }
5874 }
5875 }
5876
5877 static void
5878 put_ct_nat(struct xlate_ctx *ctx)
5879 {
5880 struct ofpact_nat *ofn = ctx->ct_nat_action;
5881 size_t nat_offset;
5882
5883 if (!ofn) {
5884 return;
5885 }
5886
5887 nat_offset = nl_msg_start_nested(ctx->odp_actions, OVS_CT_ATTR_NAT);
5888 if (ofn->flags & NX_NAT_F_SRC || ofn->flags & NX_NAT_F_DST) {
5889 nl_msg_put_flag(ctx->odp_actions, ofn->flags & NX_NAT_F_SRC
5890 ? OVS_NAT_ATTR_SRC : OVS_NAT_ATTR_DST);
5891 if (ofn->flags & NX_NAT_F_PERSISTENT) {
5892 nl_msg_put_flag(ctx->odp_actions, OVS_NAT_ATTR_PERSISTENT);
5893 }
5894 if (ofn->flags & NX_NAT_F_PROTO_HASH) {
5895 nl_msg_put_flag(ctx->odp_actions, OVS_NAT_ATTR_PROTO_HASH);
5896 } else if (ofn->flags & NX_NAT_F_PROTO_RANDOM) {
5897 nl_msg_put_flag(ctx->odp_actions, OVS_NAT_ATTR_PROTO_RANDOM);
5898 }
5899 if (ofn->range_af == AF_INET) {
5900 nl_msg_put_be32(ctx->odp_actions, OVS_NAT_ATTR_IP_MIN,
5901 ofn->range.addr.ipv4.min);
5902 if (ofn->range.addr.ipv4.max &&
5903 (ntohl(ofn->range.addr.ipv4.max)
5904 > ntohl(ofn->range.addr.ipv4.min))) {
5905 nl_msg_put_be32(ctx->odp_actions, OVS_NAT_ATTR_IP_MAX,
5906 ofn->range.addr.ipv4.max);
5907 }
5908 } else if (ofn->range_af == AF_INET6) {
5909 nl_msg_put_unspec(ctx->odp_actions, OVS_NAT_ATTR_IP_MIN,
5910 &ofn->range.addr.ipv6.min,
5911 sizeof ofn->range.addr.ipv6.min);
5912 if (!ipv6_mask_is_any(&ofn->range.addr.ipv6.max) &&
5913 memcmp(&ofn->range.addr.ipv6.max, &ofn->range.addr.ipv6.min,
5914 sizeof ofn->range.addr.ipv6.max) > 0) {
5915 nl_msg_put_unspec(ctx->odp_actions, OVS_NAT_ATTR_IP_MAX,
5916 &ofn->range.addr.ipv6.max,
5917 sizeof ofn->range.addr.ipv6.max);
5918 }
5919 }
5920 if (ofn->range_af != AF_UNSPEC && ofn->range.proto.min) {
5921 nl_msg_put_u16(ctx->odp_actions, OVS_NAT_ATTR_PROTO_MIN,
5922 ofn->range.proto.min);
5923 if (ofn->range.proto.max &&
5924 ofn->range.proto.max > ofn->range.proto.min) {
5925 nl_msg_put_u16(ctx->odp_actions, OVS_NAT_ATTR_PROTO_MAX,
5926 ofn->range.proto.max);
5927 }
5928 }
5929 }
5930 nl_msg_end_nested(ctx->odp_actions, nat_offset);
5931 }
5932
5933 static void
5934 compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc,
5935 bool is_last_action)
5936 {
5937 ovs_u128 old_ct_label_mask = ctx->wc->masks.ct_label;
5938 uint32_t old_ct_mark_mask = ctx->wc->masks.ct_mark;
5939 size_t ct_offset;
5940 uint16_t zone;
5941
5942 /* Ensure that any prior actions are applied before composing the new
5943 * conntrack action. */
5944 xlate_commit_actions(ctx);
5945
5946 /* Process nested actions first, to populate the key. */
5947 ctx->ct_nat_action = NULL;
5948 ctx->wc->masks.ct_mark = 0;
5949 ctx->wc->masks.ct_label = OVS_U128_ZERO;
5950 do_xlate_actions(ofc->actions, ofpact_ct_get_action_len(ofc), ctx,
5951 is_last_action, false);
5952
5953 if (ofc->zone_src.field) {
5954 zone = mf_get_subfield(&ofc->zone_src, &ctx->xin->flow);
5955 } else {
5956 zone = ofc->zone_imm;
5957 }
5958
5959 ct_offset = nl_msg_start_nested(ctx->odp_actions, OVS_ACTION_ATTR_CT);
5960 if (ofc->flags & NX_CT_F_COMMIT) {
5961 nl_msg_put_flag(ctx->odp_actions, ofc->flags & NX_CT_F_FORCE ?
5962 OVS_CT_ATTR_FORCE_COMMIT : OVS_CT_ATTR_COMMIT);
5963 if (ctx->xbridge->support.ct_eventmask) {
5964 nl_msg_put_u32(ctx->odp_actions, OVS_CT_ATTR_EVENTMASK,
5965 OVS_CT_EVENTMASK_DEFAULT);
5966 }
5967 }
5968 nl_msg_put_u16(ctx->odp_actions, OVS_CT_ATTR_ZONE, zone);
5969 put_ct_mark(&ctx->xin->flow, ctx->odp_actions, ctx->wc);
5970 put_ct_label(&ctx->xin->flow, ctx->odp_actions, ctx->wc);
5971 put_ct_helper(ctx, ctx->odp_actions, ofc);
5972 put_ct_nat(ctx);
5973 ctx->ct_nat_action = NULL;
5974 nl_msg_end_nested(ctx->odp_actions, ct_offset);
5975
5976 ctx->wc->masks.ct_mark = old_ct_mark_mask;
5977 ctx->wc->masks.ct_label = old_ct_label_mask;
5978
5979 if (ofc->recirc_table != NX_CT_RECIRC_NONE) {
5980 ctx->conntracked = true;
5981 compose_recirculate_and_fork(ctx, ofc->recirc_table, zone);
5982 }
5983
5984 /* The ct_* fields are only available in the scope of the 'recirc_table'
5985 * call chain. */
5986 flow_clear_conntrack(&ctx->xin->flow);
5987 xlate_report(ctx, OFT_DETAIL, "Sets the packet to an untracked state, "
5988 "and clears all the conntrack fields.");
5989 ctx->conntracked = false;
5990 }
5991
5992 static void
5993 compose_ct_clear_action(struct xlate_ctx *ctx)
5994 {
5995 clear_conntrack(ctx);
5996 /* This action originally existed without dpif support. So to preserve
5997 * compatibility, only append it if the dpif supports it. */
5998 if (ctx->xbridge->support.ct_clear) {
5999 nl_msg_put_flag(ctx->odp_actions, OVS_ACTION_ATTR_CT_CLEAR);
6000 }
6001 }
6002
6003 static void
6004 rewrite_flow_encap_ethernet(struct xlate_ctx *ctx,
6005 struct flow *flow,
6006 struct flow_wildcards *wc)
6007 {
6008 wc->masks.packet_type = OVS_BE32_MAX;
6009 if (pt_ns(flow->packet_type) == OFPHTN_ETHERTYPE) {
6010 /* Only adjust the packet_type and zero the dummy Ethernet addresses. */
6011 ovs_be16 ethertype = pt_ns_type_be(flow->packet_type);
6012 flow->packet_type = htonl(PT_ETH);
6013 flow->dl_src = eth_addr_zero;
6014 flow->dl_dst = eth_addr_zero;
6015 flow->dl_type = ethertype;
6016 } else {
6017 /* Error handling: drop packet. */
6018 xlate_report_debug(ctx, OFT_ACTION,
6019 "Dropping packet as encap(ethernet) is not "
6020 "supported for packet type ethernet.");
6021 ctx->error = XLATE_UNSUPPORTED_PACKET_TYPE;
6022 }
6023 }
6024
6025 /* For an MD2 NSH header returns a pointer to an ofpbuf with the encoded
6026 * MD2 TLVs provided as encap properties to the encap operation. This
6027 * will be stored as encap_data in the ctx and copied into the push_nsh
6028 * action at the next commit. */
6029 static struct ofpbuf *
6030 rewrite_flow_push_nsh(struct xlate_ctx *ctx,
6031 const struct ofpact_encap *encap,
6032 struct flow *flow,
6033 struct flow_wildcards *wc)
6034 {
6035 ovs_be32 packet_type = flow->packet_type;
6036 const char *ptr = (char *) encap->props;
6037 struct ofpbuf *buf = ofpbuf_new(NSH_CTX_HDRS_MAX_LEN);
6038 uint8_t md_type = NSH_M_TYPE1;
6039 uint8_t np = 0;
6040 int i;
6041
6042 /* Scan the optional NSH encap TLV properties, if any. */
6043 for (i = 0; i < encap->n_props; i++) {
6044 struct ofpact_ed_prop *prop_ptr =
6045 ALIGNED_CAST(struct ofpact_ed_prop *, ptr);
6046 if (prop_ptr->prop_class == OFPPPC_NSH) {
6047 switch (prop_ptr->type) {
6048 case OFPPPT_PROP_NSH_MDTYPE: {
6049 struct ofpact_ed_prop_nsh_md_type *prop_md_type =
6050 ALIGNED_CAST(struct ofpact_ed_prop_nsh_md_type *,
6051 prop_ptr);
6052 md_type = prop_md_type->md_type;
6053 break;
6054 }
6055 case OFPPPT_PROP_NSH_TLV: {
6056 struct ofpact_ed_prop_nsh_tlv *tlv_prop =
6057 ALIGNED_CAST(struct ofpact_ed_prop_nsh_tlv *,
6058 prop_ptr);
6059 struct nsh_md2_tlv *md2_ctx =
6060 ofpbuf_put_uninit(buf, sizeof(*md2_ctx));
6061 md2_ctx->md_class = tlv_prop->tlv_class;
6062 md2_ctx->type = tlv_prop->tlv_type;
6063 md2_ctx->length = tlv_prop->tlv_len;
6064 size_t len = ROUND_UP(md2_ctx->length, 4);
6065 size_t padding = len - md2_ctx->length;
6066 ofpbuf_put(buf, tlv_prop->data, md2_ctx->length);
6067 ofpbuf_put_zeros(buf, padding);
6068 break;
6069 }
6070 default:
6071 /* No other NSH encap properties defined yet. */
6072 break;
6073 }
6074 }
6075 ptr += ROUND_UP(prop_ptr->len, 8);
6076 }
6077 if (buf->size == 0 || buf->size > NSH_CTX_HDRS_MAX_LEN) {
6078 ofpbuf_delete(buf);
6079 buf = NULL;
6080 }
6081
6082 /* Determine the Next Protocol field for NSH header. */
6083 switch (ntohl(packet_type)) {
6084 case PT_ETH:
6085 np = NSH_P_ETHERNET;
6086 break;
6087 case PT_IPV4:
6088 np = NSH_P_IPV4;
6089 break;
6090 case PT_IPV6:
6091 np = NSH_P_IPV6;
6092 break;
6093 case PT_NSH:
6094 np = NSH_P_NSH;
6095 break;
6096 default:
6097 /* Error handling: drop packet. */
6098 xlate_report_debug(ctx, OFT_ACTION,
6099 "Dropping packet as encap(nsh) is not "
6100 "supported for packet type (%d,0x%x)",
6101 pt_ns(packet_type), pt_ns_type(packet_type));
6102 ctx->error = XLATE_UNSUPPORTED_PACKET_TYPE;
6103 return buf;
6104 }
6105 /* Note that we have matched on packet_type! */
6106 wc->masks.packet_type = OVS_BE32_MAX;
6107
6108 /* Reset all current flow packet headers. */
6109 memset(&flow->dl_dst, 0,
6110 sizeof(struct flow) - offsetof(struct flow, dl_dst));
6111
6112 /* Populate the flow with the new NSH header. */
6113 flow->packet_type = htonl(PT_NSH);
6114 flow->dl_type = htons(ETH_TYPE_NSH);
6115 flow->nsh.flags = 0;
6116 flow->nsh.ttl = 63;
6117 flow->nsh.np = np;
6118 flow->nsh.path_hdr = htonl(255);
6119
6120 if (md_type == NSH_M_TYPE1) {
6121 flow->nsh.mdtype = NSH_M_TYPE1;
6122 memset(flow->nsh.context, 0, sizeof flow->nsh.context);
6123 if (buf) {
6124 /* Drop any MD2 context TLVs. */
6125 ofpbuf_delete(buf);
6126 buf = NULL;
6127 }
6128 } else if (md_type == NSH_M_TYPE2) {
6129 flow->nsh.mdtype = NSH_M_TYPE2;
6130 }
6131 flow->nsh.mdtype &= NSH_MDTYPE_MASK;
6132
6133 return buf;
6134 }
6135
6136 static void
6137 xlate_generic_encap_action(struct xlate_ctx *ctx,
6138 const struct ofpact_encap *encap)
6139 {
6140 struct flow *flow = &ctx->xin->flow;
6141 struct flow_wildcards *wc = ctx->wc;
6142 struct ofpbuf *encap_data = NULL;
6143
6144 /* Ensure that any pending actions on the inner packet are applied before
6145 * rewriting the flow */
6146 xlate_commit_actions(ctx);
6147
6148 /* Rewrite the flow to reflect the effect of pushing the new encap header. */
6149 switch (ntohl(encap->new_pkt_type)) {
6150 case PT_ETH:
6151 rewrite_flow_encap_ethernet(ctx, flow, wc);
6152 break;
6153 case PT_NSH:
6154 encap_data = rewrite_flow_push_nsh(ctx, encap, flow, wc);
6155 break;
6156 default:
6157 /* New packet type was checked during decoding. */
6158 OVS_NOT_REACHED();
6159 }
6160
6161 if (!ctx->error) {
6162 /* The actual encap datapath action will be generated at next commit. */
6163 ctx->pending_encap = true;
6164 ctx->encap_data = encap_data;
6165 }
6166 }
6167
6168 /* Returns true if packet must be recirculated after decapsulation. */
6169 static bool
6170 xlate_generic_decap_action(struct xlate_ctx *ctx,
6171 const struct ofpact_decap *decap OVS_UNUSED)
6172 {
6173 struct flow *flow = &ctx->xin->flow;
6174
6175 /* Ensure that any pending actions on the current packet are applied
6176 * before generating the decap action. */
6177 xlate_commit_actions(ctx);
6178
6179 /* We assume for now that the new_pkt_type is PT_USE_NEXT_PROTO. */
6180 switch (ntohl(flow->packet_type)) {
6181 case PT_ETH:
6182 if (flow->vlans[0].tci & htons(VLAN_CFI)) {
6183 /* Error handling: drop packet. */
6184 xlate_report_debug(ctx, OFT_ACTION, "Dropping packet, cannot "
6185 "decap Ethernet if VLAN is present.");
6186 ctx->error = XLATE_UNSUPPORTED_PACKET_TYPE;
6187 } else {
6188 /* Just change the packet_type.
6189 * Delay generating pop_eth to the next commit. */
6190 flow->packet_type = htonl(PACKET_TYPE(OFPHTN_ETHERTYPE,
6191 ntohs(flow->dl_type)));
6192 ctx->wc->masks.dl_type = OVS_BE16_MAX;
6193 }
6194 return false;
6195 case PT_NSH:
6196 /* The pop_nsh action is generated at the commit executed as
6197 * part of freezing the ctx for recirculation. Here we just set
6198 * the new packet type based on the NSH next protocol field. */
6199 switch (flow->nsh.np) {
6200 case NSH_P_ETHERNET:
6201 flow->packet_type = htonl(PT_ETH);
6202 break;
6203 case NSH_P_IPV4:
6204 flow->packet_type = htonl(PT_IPV4);
6205 break;
6206 case NSH_P_IPV6:
6207 flow->packet_type = htonl(PT_IPV6);
6208 break;
6209 case NSH_P_NSH:
6210 flow->packet_type = htonl(PT_NSH);
6211 break;
6212 default:
6213 /* Error handling: drop packet. */
6214 xlate_report_debug(ctx, OFT_ACTION,
6215 "Dropping packet as NSH next protocol %d "
6216 "is not supported", flow->nsh.np);
6217 ctx->error = XLATE_UNSUPPORTED_PACKET_TYPE;
6218 return false;
6219 break;
6220 }
6221 ctx->wc->masks.nsh.np = UINT8_MAX;
6222 ctx->pending_decap = true;
6223 /* Trigger recirculation. */
6224 return true;
6225 default:
6226 /* Error handling: drop packet. */
6227 xlate_report_debug(
6228 ctx, OFT_ACTION,
6229 "Dropping packet as the decap() does not support "
6230 "packet type (%d,0x%x)",
6231 pt_ns(flow->packet_type), pt_ns_type(flow->packet_type));
6232 ctx->error = XLATE_UNSUPPORTED_PACKET_TYPE;
6233 return false;
6234 }
6235 }
6236
6237 static void
6238 recirc_for_mpls(const struct ofpact *a, struct xlate_ctx *ctx)
6239 {
6240 /* No need to recirculate if already exiting. */
6241 if (ctx->exit) {
6242 return;
6243 }
6244
6245 /* Do not consider recirculating unless the packet was previously MPLS. */
6246 if (!ctx->was_mpls) {
6247 return;
6248 }
6249
6250 /* Special case these actions, only recirculating if necessary.
6251 * This avoids the overhead of recirculation in common use-cases.
6252 */
6253 switch (a->type) {
6254
6255 /* Output actions do not require recirculation. */
6256 case OFPACT_OUTPUT:
6257 case OFPACT_OUTPUT_TRUNC:
6258 case OFPACT_ENQUEUE:
6259 case OFPACT_OUTPUT_REG:
6260 /* Set actions that don't touch L3+ fields do not require recirculation. */
6261 case OFPACT_SET_VLAN_VID:
6262 case OFPACT_SET_VLAN_PCP:
6263 case OFPACT_SET_ETH_SRC:
6264 case OFPACT_SET_ETH_DST:
6265 case OFPACT_SET_TUNNEL:
6266 case OFPACT_SET_QUEUE:
6267 /* If actions of a group require recirculation that can be detected
6268 * when translating them. */
6269 case OFPACT_GROUP:
6270 return;
6271
6272 /* Set field that don't touch L3+ fields don't require recirculation. */
6273 case OFPACT_SET_FIELD:
6274 if (mf_is_l3_or_higher(ofpact_get_SET_FIELD(a)->field)) {
6275 break;
6276 }
6277 return;
6278
6279 /* For simplicity, recirculate in all other cases. */
6280 case OFPACT_CONTROLLER:
6281 case OFPACT_BUNDLE:
6282 case OFPACT_STRIP_VLAN:
6283 case OFPACT_PUSH_VLAN:
6284 case OFPACT_SET_IPV4_SRC:
6285 case OFPACT_SET_IPV4_DST:
6286 case OFPACT_SET_IP_DSCP:
6287 case OFPACT_SET_IP_ECN:
6288 case OFPACT_SET_IP_TTL:
6289 case OFPACT_SET_L4_SRC_PORT:
6290 case OFPACT_SET_L4_DST_PORT:
6291 case OFPACT_REG_MOVE:
6292 case OFPACT_STACK_PUSH:
6293 case OFPACT_STACK_POP:
6294 case OFPACT_DEC_TTL:
6295 case OFPACT_SET_MPLS_LABEL:
6296 case OFPACT_SET_MPLS_TC:
6297 case OFPACT_SET_MPLS_TTL:
6298 case OFPACT_DEC_MPLS_TTL:
6299 case OFPACT_PUSH_MPLS:
6300 case OFPACT_POP_MPLS:
6301 case OFPACT_POP_QUEUE:
6302 case OFPACT_FIN_TIMEOUT:
6303 case OFPACT_RESUBMIT:
6304 case OFPACT_LEARN:
6305 case OFPACT_CONJUNCTION:
6306 case OFPACT_MULTIPATH:
6307 case OFPACT_NOTE:
6308 case OFPACT_EXIT:
6309 case OFPACT_SAMPLE:
6310 case OFPACT_CLONE:
6311 case OFPACT_ENCAP:
6312 case OFPACT_DECAP:
6313 case OFPACT_DEC_NSH_TTL:
6314 case OFPACT_UNROLL_XLATE:
6315 case OFPACT_CT:
6316 case OFPACT_CT_CLEAR:
6317 case OFPACT_NAT:
6318 case OFPACT_DEBUG_RECIRC:
6319 case OFPACT_DEBUG_SLOW:
6320 case OFPACT_METER:
6321 case OFPACT_CLEAR_ACTIONS:
6322 case OFPACT_WRITE_ACTIONS:
6323 case OFPACT_WRITE_METADATA:
6324 case OFPACT_GOTO_TABLE:
6325 default:
6326 break;
6327 }
6328
6329 /* Recirculate */
6330 ctx_trigger_freeze(ctx);
6331 }
6332
6333 static void
6334 xlate_ofpact_reg_move(struct xlate_ctx *ctx, const struct ofpact_reg_move *a)
6335 {
6336 mf_subfield_copy(&a->src, &a->dst, &ctx->xin->flow, ctx->wc);
6337 xlate_report_subfield(ctx, &a->dst);
6338 }
6339
6340 static void
6341 xlate_ofpact_stack_pop(struct xlate_ctx *ctx, const struct ofpact_stack *a)
6342 {
6343 if (nxm_execute_stack_pop(a, &ctx->xin->flow, ctx->wc, &ctx->stack)) {
6344 xlate_report_subfield(ctx, &a->subfield);
6345 } else {
6346 xlate_report_error(ctx, "stack underflow");
6347 }
6348 }
6349
6350 /* Restore translation context data that was stored earlier. */
6351 static void
6352 xlate_ofpact_unroll_xlate(struct xlate_ctx *ctx,
6353 const struct ofpact_unroll_xlate *a)
6354 {
6355 ctx->table_id = a->rule_table_id;
6356 ctx->rule_cookie = a->rule_cookie;
6357 xlate_report(ctx, OFT_THAW, "restored state: table=%"PRIu8", "
6358 "cookie=%#"PRIx64, a->rule_table_id, a->rule_cookie);
6359 }
6360
6361 static void
6362 do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
6363 struct xlate_ctx *ctx, bool is_last_action,
6364 bool group_bucket_action)
6365 {
6366 struct flow_wildcards *wc = ctx->wc;
6367 struct flow *flow = &ctx->xin->flow;
6368 const struct ofpact *a;
6369
6370 /* dl_type already in the mask, not set below. */
6371
6372 if (!ofpacts_len) {
6373 xlate_report(ctx, OFT_ACTION, "drop");
6374 return;
6375 }
6376
6377 OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
6378 struct ofpact_controller *controller;
6379 const struct ofpact_metadata *metadata;
6380 const struct ofpact_set_field *set_field;
6381 const struct mf_field *mf;
6382 bool last = is_last_action && ofpact_last(a, ofpacts, ofpacts_len)
6383 && ctx->action_set.size;
6384
6385 if (ctx->error) {
6386 break;
6387 }
6388
6389 recirc_for_mpls(a, ctx);
6390
6391 if (ctx->exit) {
6392 /* Check if need to store the remaining actions for later
6393 * execution. */
6394 if (ctx->freezing) {
6395 freeze_unroll_actions(a, ofpact_end(ofpacts, ofpacts_len),
6396 ctx);
6397 }
6398 break;
6399 }
6400
6401 if (OVS_UNLIKELY(ctx->xin->trace)) {
6402 struct ds s = DS_EMPTY_INITIALIZER;
6403 struct ofpact_format_params fp = { .s = &s };
6404 ofpacts_format(a, OFPACT_ALIGN(a->len), &fp);
6405 xlate_report(ctx, OFT_ACTION, "%s", ds_cstr(&s));
6406 ds_destroy(&s);
6407 }
6408
6409 switch (a->type) {
6410 case OFPACT_OUTPUT:
6411 xlate_output_action(ctx, ofpact_get_OUTPUT(a)->port,
6412 ofpact_get_OUTPUT(a)->max_len, true, last,
6413 false, group_bucket_action);
6414 break;
6415
6416 case OFPACT_GROUP:
6417 if (xlate_group_action(ctx, ofpact_get_GROUP(a)->group_id, last)) {
6418 /* Group could not be found. */
6419
6420 /* XXX: Terminates action list translation, but does not
6421 * terminate the pipeline. */
6422 return;
6423 }
6424 break;
6425
6426 case OFPACT_CONTROLLER:
6427 controller = ofpact_get_CONTROLLER(a);
6428 if (controller->pause) {
6429 ctx->pause = controller;
6430 ctx_trigger_freeze(ctx);
6431 a = ofpact_next(a);
6432 } else {
6433 xlate_controller_action(ctx, controller->max_len,
6434 controller->reason,
6435 controller->controller_id,
6436 controller->provider_meter_id,
6437 controller->userdata,
6438 controller->userdata_len);
6439 }
6440 break;
6441
6442 case OFPACT_ENQUEUE:
6443 memset(&wc->masks.skb_priority, 0xff,
6444 sizeof wc->masks.skb_priority);
6445 xlate_enqueue_action(ctx, ofpact_get_ENQUEUE(a), last,
6446 group_bucket_action);
6447 break;
6448
6449 case OFPACT_SET_VLAN_VID:
6450 wc->masks.vlans[0].tci |= htons(VLAN_VID_MASK | VLAN_CFI);
6451 if (flow->vlans[0].tci & htons(VLAN_CFI) ||
6452 ofpact_get_SET_VLAN_VID(a)->push_vlan_if_needed) {
6453 if (!flow->vlans[0].tpid) {
6454 flow->vlans[0].tpid = htons(ETH_TYPE_VLAN);
6455 }
6456 flow->vlans[0].tci &= ~htons(VLAN_VID_MASK);
6457 flow->vlans[0].tci |=
6458 (htons(ofpact_get_SET_VLAN_VID(a)->vlan_vid) |
6459 htons(VLAN_CFI));
6460 }
6461 break;
6462
6463 case OFPACT_SET_VLAN_PCP:
6464 wc->masks.vlans[0].tci |= htons(VLAN_PCP_MASK | VLAN_CFI);
6465 if (flow->vlans[0].tci & htons(VLAN_CFI) ||
6466 ofpact_get_SET_VLAN_PCP(a)->push_vlan_if_needed) {
6467 if (!flow->vlans[0].tpid) {
6468 flow->vlans[0].tpid = htons(ETH_TYPE_VLAN);
6469 }
6470 flow->vlans[0].tci &= ~htons(VLAN_PCP_MASK);
6471 flow->vlans[0].tci |=
6472 htons((ofpact_get_SET_VLAN_PCP(a)->vlan_pcp
6473 << VLAN_PCP_SHIFT) | VLAN_CFI);
6474 }
6475 break;
6476
6477 case OFPACT_STRIP_VLAN:
6478 flow_pop_vlan(flow, wc);
6479 break;
6480
6481 case OFPACT_PUSH_VLAN:
6482 flow_push_vlan_uninit(flow, wc);
6483 flow->vlans[0].tpid = ofpact_get_PUSH_VLAN(a)->ethertype;
6484 flow->vlans[0].tci = htons(VLAN_CFI);
6485 break;
6486
6487 case OFPACT_SET_ETH_SRC:
6488 WC_MASK_FIELD(wc, dl_src);
6489 flow->dl_src = ofpact_get_SET_ETH_SRC(a)->mac;
6490 break;
6491
6492 case OFPACT_SET_ETH_DST:
6493 WC_MASK_FIELD(wc, dl_dst);
6494 flow->dl_dst = ofpact_get_SET_ETH_DST(a)->mac;
6495 break;
6496
6497 case OFPACT_SET_IPV4_SRC:
6498 if (flow->dl_type == htons(ETH_TYPE_IP)) {
6499 memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
6500 flow->nw_src = ofpact_get_SET_IPV4_SRC(a)->ipv4;
6501 }
6502 break;
6503
6504 case OFPACT_SET_IPV4_DST:
6505 if (flow->dl_type == htons(ETH_TYPE_IP)) {
6506 memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
6507 flow->nw_dst = ofpact_get_SET_IPV4_DST(a)->ipv4;
6508 }
6509 break;
6510
6511 case OFPACT_SET_IP_DSCP:
6512 if (is_ip_any(flow)) {
6513 wc->masks.nw_tos |= IP_DSCP_MASK;
6514 flow->nw_tos &= ~IP_DSCP_MASK;
6515 flow->nw_tos |= ofpact_get_SET_IP_DSCP(a)->dscp;
6516 }
6517 break;
6518
6519 case OFPACT_SET_IP_ECN:
6520 if (is_ip_any(flow)) {
6521 wc->masks.nw_tos |= IP_ECN_MASK;
6522 flow->nw_tos &= ~IP_ECN_MASK;
6523 flow->nw_tos |= ofpact_get_SET_IP_ECN(a)->ecn;
6524 }
6525 break;
6526
6527 case OFPACT_SET_IP_TTL:
6528 if (is_ip_any(flow)) {
6529 wc->masks.nw_ttl = 0xff;
6530 flow->nw_ttl = ofpact_get_SET_IP_TTL(a)->ttl;
6531 }
6532 break;
6533
6534 case OFPACT_SET_L4_SRC_PORT:
6535 if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
6536 memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
6537 memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
6538 flow->tp_src = htons(ofpact_get_SET_L4_SRC_PORT(a)->port);
6539 }
6540 break;
6541
6542 case OFPACT_SET_L4_DST_PORT:
6543 if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
6544 memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
6545 memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
6546 flow->tp_dst = htons(ofpact_get_SET_L4_DST_PORT(a)->port);
6547 }
6548 break;
6549
6550 case OFPACT_RESUBMIT:
6551 /* Freezing complicates resubmit. Some action in the flow
6552 * entry found by resubmit might trigger freezing. If that
6553 * happens, then we do not want to execute the resubmit again after
6554 * during thawing, so we want to skip back to the head of the loop
6555 * to avoid that, only adding any actions that follow the resubmit
6556 * to the frozen actions.
6557 */
6558 xlate_ofpact_resubmit(ctx, ofpact_get_RESUBMIT(a), last);
6559 continue;
6560
6561 case OFPACT_SET_TUNNEL:
6562 flow->tunnel.tun_id = htonll(ofpact_get_SET_TUNNEL(a)->tun_id);
6563 break;
6564
6565 case OFPACT_SET_QUEUE:
6566 memset(&wc->masks.skb_priority, 0xff,
6567 sizeof wc->masks.skb_priority);
6568 xlate_set_queue_action(ctx, ofpact_get_SET_QUEUE(a)->queue_id);
6569 break;
6570
6571 case OFPACT_POP_QUEUE:
6572 memset(&wc->masks.skb_priority, 0xff,
6573 sizeof wc->masks.skb_priority);
6574 if (flow->skb_priority != ctx->orig_skb_priority) {
6575 flow->skb_priority = ctx->orig_skb_priority;
6576 xlate_report(ctx, OFT_DETAIL, "queue = %#"PRIx32,
6577 flow->skb_priority);
6578 }
6579 break;
6580
6581 case OFPACT_REG_MOVE:
6582 xlate_ofpact_reg_move(ctx, ofpact_get_REG_MOVE(a));
6583 break;
6584
6585 case OFPACT_SET_FIELD:
6586 set_field = ofpact_get_SET_FIELD(a);
6587 mf = set_field->field;
6588
6589 /* Set the field only if the packet actually has it. */
6590 if (mf_are_prereqs_ok(mf, flow, wc)) {
6591 mf_mask_field_masked(mf, ofpact_set_field_mask(set_field), wc);
6592 mf_set_flow_value_masked(mf, set_field->value,
6593 ofpact_set_field_mask(set_field),
6594 flow);
6595 } else {
6596 xlate_report(ctx, OFT_WARN,
6597 "unmet prerequisites for %s, set_field ignored",
6598 mf->name);
6599
6600 }
6601 break;
6602
6603 case OFPACT_STACK_PUSH:
6604 nxm_execute_stack_push(ofpact_get_STACK_PUSH(a), flow, wc,
6605 &ctx->stack);
6606 break;
6607
6608 case OFPACT_STACK_POP:
6609 xlate_ofpact_stack_pop(ctx, ofpact_get_STACK_POP(a));
6610 break;
6611
6612 case OFPACT_PUSH_MPLS:
6613 compose_mpls_push_action(ctx, ofpact_get_PUSH_MPLS(a));
6614 break;
6615
6616 case OFPACT_POP_MPLS:
6617 compose_mpls_pop_action(ctx, ofpact_get_POP_MPLS(a)->ethertype);
6618 break;
6619
6620 case OFPACT_SET_MPLS_LABEL:
6621 compose_set_mpls_label_action(
6622 ctx, ofpact_get_SET_MPLS_LABEL(a)->label);
6623 break;
6624
6625 case OFPACT_SET_MPLS_TC:
6626 compose_set_mpls_tc_action(ctx, ofpact_get_SET_MPLS_TC(a)->tc);
6627 break;
6628
6629 case OFPACT_SET_MPLS_TTL:
6630 compose_set_mpls_ttl_action(ctx, ofpact_get_SET_MPLS_TTL(a)->ttl);
6631 break;
6632
6633 case OFPACT_DEC_MPLS_TTL:
6634 if (compose_dec_mpls_ttl_action(ctx)) {
6635 return;
6636 }
6637 break;
6638
6639 case OFPACT_DEC_NSH_TTL:
6640 if (compose_dec_nsh_ttl_action(ctx)) {
6641 return;
6642 }
6643 break;
6644
6645 case OFPACT_DEC_TTL:
6646 wc->masks.nw_ttl = 0xff;
6647 if (compose_dec_ttl(ctx, ofpact_get_DEC_TTL(a))) {
6648 return;
6649 }
6650 break;
6651
6652 case OFPACT_NOTE:
6653 /* Nothing to do. */
6654 break;
6655
6656 case OFPACT_MULTIPATH:
6657 multipath_execute(ofpact_get_MULTIPATH(a), flow, wc);
6658 xlate_report_subfield(ctx, &ofpact_get_MULTIPATH(a)->dst);
6659 break;
6660
6661 case OFPACT_BUNDLE:
6662 xlate_bundle_action(ctx, ofpact_get_BUNDLE(a), last,
6663 group_bucket_action);
6664 break;
6665
6666 case OFPACT_OUTPUT_REG:
6667 xlate_output_reg_action(ctx, ofpact_get_OUTPUT_REG(a), last,
6668 group_bucket_action);
6669 break;
6670
6671 case OFPACT_OUTPUT_TRUNC:
6672 xlate_output_trunc_action(ctx, ofpact_get_OUTPUT_TRUNC(a)->port,
6673 ofpact_get_OUTPUT_TRUNC(a)->max_len, last,
6674 group_bucket_action);
6675 break;
6676
6677 case OFPACT_LEARN:
6678 xlate_learn_action(ctx, ofpact_get_LEARN(a));
6679 break;
6680
6681 case OFPACT_CONJUNCTION:
6682 /* A flow with a "conjunction" action represents part of a special
6683 * kind of "set membership match". Such a flow should not actually
6684 * get executed, but it could via, say, a "packet-out", even though
6685 * that wouldn't be useful. Log it to help debugging. */
6686 xlate_report_error(ctx, "executing no-op conjunction action");
6687 break;
6688
6689 case OFPACT_EXIT:
6690 ctx->exit = true;
6691 break;
6692
6693 case OFPACT_UNROLL_XLATE:
6694 xlate_ofpact_unroll_xlate(ctx, ofpact_get_UNROLL_XLATE(a));
6695 break;
6696
6697 case OFPACT_FIN_TIMEOUT:
6698 memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
6699 xlate_fin_timeout(ctx, ofpact_get_FIN_TIMEOUT(a));
6700 break;
6701
6702 case OFPACT_CLEAR_ACTIONS:
6703 xlate_report_action_set(ctx, "was");
6704 ofpbuf_clear(&ctx->action_set);
6705 ctx->xin->flow.actset_output = OFPP_UNSET;
6706 ctx->action_set_has_group = false;
6707 break;
6708
6709 case OFPACT_WRITE_ACTIONS:
6710 xlate_write_actions(ctx, ofpact_get_WRITE_ACTIONS(a));
6711 xlate_report_action_set(ctx, "is");
6712 break;
6713
6714 case OFPACT_WRITE_METADATA:
6715 metadata = ofpact_get_WRITE_METADATA(a);
6716 flow->metadata &= ~metadata->mask;
6717 flow->metadata |= metadata->metadata & metadata->mask;
6718 break;
6719
6720 case OFPACT_METER:
6721 xlate_meter_action(ctx, ofpact_get_METER(a));
6722 break;
6723
6724 case OFPACT_GOTO_TABLE: {
6725 struct ofpact_goto_table *ogt = ofpact_get_GOTO_TABLE(a);
6726
6727 ovs_assert(ctx->table_id < ogt->table_id);
6728
6729 xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port,
6730 ogt->table_id, true, true, false, last,
6731 do_xlate_actions);
6732 break;
6733 }
6734
6735 case OFPACT_SAMPLE:
6736 xlate_sample_action(ctx, ofpact_get_SAMPLE(a));
6737 break;
6738
6739 case OFPACT_CLONE:
6740 compose_clone(ctx, ofpact_get_CLONE(a), last);
6741 break;
6742
6743 case OFPACT_ENCAP:
6744 xlate_generic_encap_action(ctx, ofpact_get_ENCAP(a));
6745 break;
6746
6747 case OFPACT_DECAP: {
6748 bool recirc_needed =
6749 xlate_generic_decap_action(ctx, ofpact_get_DECAP(a));
6750 if (!ctx->error && recirc_needed) {
6751 /* Recirculate for parsing of inner packet. */
6752 ctx_trigger_freeze(ctx);
6753 /* Then continue with next action. */
6754 a = ofpact_next(a);
6755 }
6756 break;
6757 }
6758
6759 case OFPACT_CT:
6760 compose_conntrack_action(ctx, ofpact_get_CT(a), last);
6761 break;
6762
6763 case OFPACT_CT_CLEAR:
6764 compose_ct_clear_action(ctx);
6765 break;
6766
6767 case OFPACT_NAT:
6768 /* This will be processed by compose_conntrack_action(). */
6769 ctx->ct_nat_action = ofpact_get_NAT(a);
6770 break;
6771
6772 case OFPACT_DEBUG_RECIRC:
6773 ctx_trigger_freeze(ctx);
6774 a = ofpact_next(a);
6775 break;
6776
6777 case OFPACT_DEBUG_SLOW:
6778 ctx->xout->slow |= SLOW_ACTION;
6779 break;
6780 }
6781
6782 /* Check if need to store this and the remaining actions for later
6783 * execution. */
6784 if (!ctx->error && ctx->exit && ctx_first_frozen_action(ctx)) {
6785 freeze_unroll_actions(a, ofpact_end(ofpacts, ofpacts_len), ctx);
6786 break;
6787 }
6788 }
6789 }
6790
6791 void
6792 xlate_in_init(struct xlate_in *xin, struct ofproto_dpif *ofproto,
6793 ovs_version_t version, const struct flow *flow,
6794 ofp_port_t in_port, struct rule_dpif *rule, uint16_t tcp_flags,
6795 const struct dp_packet *packet, struct flow_wildcards *wc,
6796 struct ofpbuf *odp_actions)
6797 {
6798 xin->ofproto = ofproto;
6799 xin->tables_version = version;
6800 xin->flow = *flow;
6801 xin->upcall_flow = flow;
6802 xin->flow.in_port.ofp_port = in_port;
6803 xin->flow.actset_output = OFPP_UNSET;
6804 xin->packet = packet;
6805 xin->allow_side_effects = packet != NULL;
6806 xin->rule = rule;
6807 xin->xcache = NULL;
6808 xin->ofpacts = NULL;
6809 xin->ofpacts_len = 0;
6810 xin->tcp_flags = tcp_flags;
6811 xin->trace = NULL;
6812 xin->resubmit_stats = NULL;
6813 xin->depth = 0;
6814 xin->resubmits = 0;
6815 xin->wc = wc;
6816 xin->odp_actions = odp_actions;
6817 xin->in_packet_out = false;
6818 xin->recirc_queue = NULL;
6819 xin->xport_uuid = UUID_ZERO;
6820
6821 /* Do recirc lookup. */
6822 xin->frozen_state = NULL;
6823 if (flow->recirc_id) {
6824 const struct recirc_id_node *node
6825 = recirc_id_node_find(flow->recirc_id);
6826 if (node) {
6827 xin->frozen_state = &node->state;
6828 }
6829 }
6830 }
6831
6832 void
6833 xlate_out_uninit(struct xlate_out *xout)
6834 {
6835 if (xout) {
6836 recirc_refs_unref(&xout->recircs);
6837 }
6838 }
6839 \f
6840 static struct skb_priority_to_dscp *
6841 get_skb_priority(const struct xport *xport, uint32_t skb_priority)
6842 {
6843 struct skb_priority_to_dscp *pdscp;
6844 uint32_t hash;
6845
6846 hash = hash_int(skb_priority, 0);
6847 HMAP_FOR_EACH_IN_BUCKET (pdscp, hmap_node, hash, &xport->skb_priorities) {
6848 if (pdscp->skb_priority == skb_priority) {
6849 return pdscp;
6850 }
6851 }
6852 return NULL;
6853 }
6854
6855 static bool
6856 dscp_from_skb_priority(const struct xport *xport, uint32_t skb_priority,
6857 uint8_t *dscp)
6858 {
6859 struct skb_priority_to_dscp *pdscp = get_skb_priority(xport, skb_priority);
6860 *dscp = pdscp ? pdscp->dscp : 0;
6861 return pdscp != NULL;
6862 }
6863
6864 static size_t
6865 count_skb_priorities(const struct xport *xport)
6866 {
6867 return hmap_count(&xport->skb_priorities);
6868 }
6869
6870 static void
6871 clear_skb_priorities(struct xport *xport)
6872 {
6873 struct skb_priority_to_dscp *pdscp;
6874
6875 HMAP_FOR_EACH_POP (pdscp, hmap_node, &xport->skb_priorities) {
6876 free(pdscp);
6877 }
6878 }
6879
6880 static bool
6881 actions_output_to_local_port(const struct xlate_ctx *ctx)
6882 {
6883 odp_port_t local_odp_port = ofp_port_to_odp_port(ctx->xbridge, OFPP_LOCAL);
6884 const struct nlattr *a;
6885 unsigned int left;
6886
6887 NL_ATTR_FOR_EACH_UNSAFE (a, left, ctx->odp_actions->data,
6888 ctx->odp_actions->size) {
6889 if (nl_attr_type(a) == OVS_ACTION_ATTR_OUTPUT
6890 && nl_attr_get_odp_port(a) == local_odp_port) {
6891 return true;
6892 }
6893 }
6894 return false;
6895 }
6896
6897 #if defined(__linux__)
6898 /* Returns the maximum number of packets that the Linux kernel is willing to
6899 * queue up internally to certain kinds of software-implemented ports, or the
6900 * default (and rarely modified) value if it cannot be determined. */
6901 static int
6902 netdev_max_backlog(void)
6903 {
6904 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
6905 static int max_backlog = 1000; /* The normal default value. */
6906
6907 if (ovsthread_once_start(&once)) {
6908 static const char filename[] = "/proc/sys/net/core/netdev_max_backlog";
6909 FILE *stream;
6910 int n;
6911
6912 stream = fopen(filename, "r");
6913 if (!stream) {
6914 VLOG_INFO("%s: open failed (%s)", filename, ovs_strerror(errno));
6915 } else {
6916 if (fscanf(stream, "%d", &n) != 1) {
6917 VLOG_WARN("%s: read error", filename);
6918 } else if (n <= 100) {
6919 VLOG_WARN("%s: unexpectedly small value %d", filename, n);
6920 } else {
6921 max_backlog = n;
6922 }
6923 fclose(stream);
6924 }
6925 ovsthread_once_done(&once);
6926
6927 VLOG_DBG("%s: using %d max_backlog", filename, max_backlog);
6928 }
6929
6930 return max_backlog;
6931 }
6932
6933 /* Counts and returns the number of OVS_ACTION_ATTR_OUTPUT actions in
6934 * 'odp_actions'. */
6935 static int
6936 count_output_actions(const struct ofpbuf *odp_actions)
6937 {
6938 const struct nlattr *a;
6939 size_t left;
6940 int n = 0;
6941
6942 NL_ATTR_FOR_EACH_UNSAFE (a, left, odp_actions->data, odp_actions->size) {
6943 if (a->nla_type == OVS_ACTION_ATTR_OUTPUT) {
6944 n++;
6945 }
6946 }
6947 return n;
6948 }
6949 #endif /* defined(__linux__) */
6950
6951 /* Returns true if 'odp_actions' contains more output actions than the datapath
6952 * can reliably handle in one go. On Linux, this is the value of the
6953 * net.core.netdev_max_backlog sysctl, which limits the maximum number of
6954 * packets that the kernel is willing to queue up for processing while the
6955 * datapath is processing a set of actions. */
6956 static bool
6957 too_many_output_actions(const struct ofpbuf *odp_actions OVS_UNUSED)
6958 {
6959 #ifdef __linux__
6960 return (odp_actions->size / NL_A_U32_SIZE > netdev_max_backlog()
6961 && count_output_actions(odp_actions) > netdev_max_backlog());
6962 #else
6963 /* OSes other than Linux might have similar limits, but we don't know how
6964 * to determine them.*/
6965 return false;
6966 #endif
6967 }
6968
6969 static void
6970 xlate_wc_init(struct xlate_ctx *ctx)
6971 {
6972 flow_wildcards_init_catchall(ctx->wc);
6973
6974 /* Some fields we consider to always be examined. */
6975 WC_MASK_FIELD(ctx->wc, packet_type);
6976 WC_MASK_FIELD(ctx->wc, in_port);
6977 if (is_ethernet(&ctx->xin->flow, NULL)) {
6978 WC_MASK_FIELD(ctx->wc, dl_type);
6979 }
6980 if (is_ip_any(&ctx->xin->flow)) {
6981 WC_MASK_FIELD_MASK(ctx->wc, nw_frag, FLOW_NW_FRAG_MASK);
6982 }
6983
6984 if (ctx->xbridge->support.odp.recirc) {
6985 /* Always exactly match recirc_id when datapath supports
6986 * recirculation. */
6987 WC_MASK_FIELD(ctx->wc, recirc_id);
6988 }
6989
6990 if (ctx->xbridge->netflow) {
6991 netflow_mask_wc(&ctx->xin->flow, ctx->wc);
6992 }
6993
6994 tnl_wc_init(&ctx->xin->flow, ctx->wc);
6995 }
6996
6997 static void
6998 xlate_wc_finish(struct xlate_ctx *ctx)
6999 {
7000 int i;
7001
7002 /* Clear the metadata and register wildcard masks, because we won't
7003 * use non-header fields as part of the cache. */
7004 flow_wildcards_clear_non_packet_fields(ctx->wc);
7005
7006 /* Wildcard ethernet fields if the original packet type was not
7007 * Ethernet. */
7008 if (ctx->xin->upcall_flow->packet_type != htonl(PT_ETH)) {
7009 ctx->wc->masks.dl_dst = eth_addr_zero;
7010 ctx->wc->masks.dl_src = eth_addr_zero;
7011 ctx->wc->masks.dl_type = 0;
7012 }
7013
7014 /* ICMPv4 and ICMPv6 have 8-bit "type" and "code" fields. struct flow
7015 * uses the low 8 bits of the 16-bit tp_src and tp_dst members to
7016 * represent these fields. The datapath interface, on the other hand,
7017 * represents them with just 8 bits each. This means that if the high
7018 * 8 bits of the masks for these fields somehow become set, then they
7019 * will get chopped off by a round trip through the datapath, and
7020 * revalidation will spot that as an inconsistency and delete the flow.
7021 * Avoid the problem here by making sure that only the low 8 bits of
7022 * either field can be unwildcarded for ICMP.
7023 */
7024 if (is_icmpv4(&ctx->xin->flow, NULL) || is_icmpv6(&ctx->xin->flow, NULL)) {
7025 ctx->wc->masks.tp_src &= htons(UINT8_MAX);
7026 ctx->wc->masks.tp_dst &= htons(UINT8_MAX);
7027 }
7028 /* VLAN_TCI CFI bit must be matched if any of the TCI is matched. */
7029 for (i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
7030 if (ctx->wc->masks.vlans[i].tci) {
7031 ctx->wc->masks.vlans[i].tci |= htons(VLAN_CFI);
7032 }
7033 }
7034
7035 /* The classifier might return masks that match on tp_src and tp_dst even
7036 * for later fragments. This happens because there might be flows that
7037 * match on tp_src or tp_dst without matching on the frag bits, because
7038 * it is not a prerequisite for OpenFlow. Since it is a prerequisite for
7039 * datapath flows and since tp_src and tp_dst are always going to be 0,
7040 * wildcard the fields here. */
7041 if (ctx->xin->flow.nw_frag & FLOW_NW_FRAG_LATER) {
7042 ctx->wc->masks.tp_src = 0;
7043 ctx->wc->masks.tp_dst = 0;
7044 }
7045 }
7046
7047 /* Translates the flow, actions, or rule in 'xin' into datapath actions in
7048 * 'xout'.
7049 * The caller must take responsibility for eventually freeing 'xout', with
7050 * xlate_out_uninit().
7051 * Returns 'XLATE_OK' if translation was successful. In case of an error an
7052 * empty set of actions will be returned in 'xin->odp_actions' (if non-NULL),
7053 * so that most callers may ignore the return value and transparently install a
7054 * drop flow when the translation fails. */
7055 enum xlate_error
7056 xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
7057 {
7058 *xout = (struct xlate_out) {
7059 .slow = 0,
7060 .recircs = RECIRC_REFS_EMPTY_INITIALIZER,
7061 };
7062
7063 struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
7064 struct xbridge *xbridge = xbridge_lookup(xcfg, xin->ofproto);
7065 if (!xbridge) {
7066 return XLATE_BRIDGE_NOT_FOUND;
7067 }
7068
7069 struct flow *flow = &xin->flow;
7070
7071 uint8_t stack_stub[1024];
7072 uint64_t action_set_stub[1024 / 8];
7073 uint64_t frozen_actions_stub[1024 / 8];
7074 uint64_t actions_stub[256 / 8];
7075 struct ofpbuf scratch_actions = OFPBUF_STUB_INITIALIZER(actions_stub);
7076 struct xlate_ctx ctx = {
7077 .xin = xin,
7078 .xout = xout,
7079 .base_flow = *flow,
7080 .orig_tunnel_ipv6_dst = flow_tnl_dst(&flow->tunnel),
7081 .xcfg = xcfg,
7082 .xbridge = xbridge,
7083 .stack = OFPBUF_STUB_INITIALIZER(stack_stub),
7084 .rule = xin->rule,
7085 .wc = (xin->wc
7086 ? xin->wc
7087 : &(struct flow_wildcards) { .masks = { .dl_type = 0 } }),
7088 .odp_actions = xin->odp_actions ? xin->odp_actions : &scratch_actions,
7089
7090 .depth = xin->depth,
7091 .resubmits = xin->resubmits,
7092 .in_action_set = false,
7093 .in_packet_out = xin->in_packet_out,
7094 .pending_encap = false,
7095 .pending_decap = false,
7096 .encap_data = NULL,
7097
7098 .table_id = 0,
7099 .rule_cookie = OVS_BE64_MAX,
7100 .orig_skb_priority = flow->skb_priority,
7101 .sflow_n_outputs = 0,
7102 .sflow_odp_port = 0,
7103 .nf_output_iface = NF_OUT_DROP,
7104 .exit = false,
7105 .error = XLATE_OK,
7106 .mirrors = 0,
7107
7108 .freezing = false,
7109 .recirc_update_dp_hash = false,
7110 .frozen_actions = OFPBUF_STUB_INITIALIZER(frozen_actions_stub),
7111 .pause = NULL,
7112
7113 .was_mpls = false,
7114 .conntracked = false,
7115
7116 .ct_nat_action = NULL,
7117
7118 .action_set_has_group = false,
7119 .action_set = OFPBUF_STUB_INITIALIZER(action_set_stub),
7120 };
7121
7122 /* 'base_flow' reflects the packet as it came in, but we need it to reflect
7123 * the packet as the datapath will treat it for output actions. Our
7124 * datapath doesn't retain tunneling information without us re-setting
7125 * it, so clear the tunnel data.
7126 */
7127
7128 memset(&ctx.base_flow.tunnel, 0, sizeof ctx.base_flow.tunnel);
7129
7130 ofpbuf_reserve(ctx.odp_actions, NL_A_U32_SIZE);
7131 xlate_wc_init(&ctx);
7132
7133 COVERAGE_INC(xlate_actions);
7134
7135 xin->trace = xlate_report(&ctx, OFT_BRIDGE, "bridge(\"%s\")",
7136 xbridge->name);
7137 if (xin->frozen_state) {
7138 const struct frozen_state *state = xin->frozen_state;
7139
7140 struct ovs_list *old_trace = xin->trace;
7141 xin->trace = xlate_report(&ctx, OFT_THAW, "thaw");
7142
7143 if (xin->ofpacts_len > 0 || ctx.rule) {
7144 xlate_report_error(&ctx, "Recirculation conflict (%s)!",
7145 xin->ofpacts_len ? "actions" : "rule");
7146 ctx.error = XLATE_RECIRCULATION_CONFLICT;
7147 goto exit;
7148 }
7149
7150 /* Set the bridge for post-recirculation processing if needed. */
7151 if (!uuid_equals(&ctx.xbridge->ofproto->uuid, &state->ofproto_uuid)) {
7152 const struct xbridge *new_bridge
7153 = xbridge_lookup_by_uuid(xcfg, &state->ofproto_uuid);
7154
7155 if (OVS_UNLIKELY(!new_bridge)) {
7156 /* Drop the packet if the bridge cannot be found. */
7157 xlate_report_error(&ctx, "Frozen bridge no longer exists.");
7158 ctx.error = XLATE_BRIDGE_NOT_FOUND;
7159 xin->trace = old_trace;
7160 goto exit;
7161 }
7162 ctx.xbridge = new_bridge;
7163 /* The bridge is now known so obtain its table version. */
7164 ctx.xin->tables_version
7165 = ofproto_dpif_get_tables_version(ctx.xbridge->ofproto);
7166 }
7167
7168 /* Set the thawed table id. Note: A table lookup is done only if there
7169 * are no frozen actions. */
7170 ctx.table_id = state->table_id;
7171 xlate_report(&ctx, OFT_THAW,
7172 "Resuming from table %"PRIu8, ctx.table_id);
7173
7174 ctx.conntracked = state->conntracked;
7175 if (!state->conntracked) {
7176 clear_conntrack(&ctx);
7177 }
7178
7179 /* Restore pipeline metadata. May change flow's in_port and other
7180 * metadata to the values that existed when freezing was triggered. */
7181 frozen_metadata_to_flow(&state->metadata, flow);
7182
7183 /* Restore stack, if any. */
7184 if (state->stack) {
7185 ofpbuf_put(&ctx.stack, state->stack, state->stack_size);
7186 }
7187
7188 /* Restore mirror state. */
7189 ctx.mirrors = state->mirrors;
7190
7191 /* Restore action set, if any. */
7192 if (state->action_set_len) {
7193 xlate_report_actions(&ctx, OFT_THAW, "Restoring action set",
7194 state->action_set, state->action_set_len);
7195
7196 flow->actset_output = OFPP_UNSET;
7197 xlate_write_actions__(&ctx, state->action_set,
7198 state->action_set_len);
7199 }
7200
7201 /* Restore frozen actions. If there are no actions, processing will
7202 * start with a lookup in the table set above. */
7203 xin->ofpacts = state->ofpacts;
7204 xin->ofpacts_len = state->ofpacts_len;
7205 if (state->ofpacts_len) {
7206 xlate_report_actions(&ctx, OFT_THAW, "Restoring actions",
7207 xin->ofpacts, xin->ofpacts_len);
7208 }
7209
7210 xin->trace = old_trace;
7211 } else if (OVS_UNLIKELY(flow->recirc_id)) {
7212 xlate_report_error(&ctx,
7213 "Recirculation context not found for ID %"PRIx32,
7214 flow->recirc_id);
7215 ctx.error = XLATE_NO_RECIRCULATION_CONTEXT;
7216 goto exit;
7217 }
7218
7219 /* Tunnel metadata in udpif format must be normalized before translation. */
7220 if (flow->tunnel.flags & FLOW_TNL_F_UDPIF) {
7221 const struct tun_table *tun_tab = ofproto_get_tun_tab(
7222 &ctx.xbridge->ofproto->up);
7223 int err;
7224
7225 err = tun_metadata_from_geneve_udpif(tun_tab, &xin->upcall_flow->tunnel,
7226 &xin->upcall_flow->tunnel,
7227 &flow->tunnel);
7228 if (err) {
7229 xlate_report_error(&ctx, "Invalid Geneve tunnel metadata");
7230 ctx.error = XLATE_INVALID_TUNNEL_METADATA;
7231 goto exit;
7232 }
7233 } else if (!flow->tunnel.metadata.tab || xin->frozen_state) {
7234 /* If the original flow did not come in on a tunnel, then it won't have
7235 * FLOW_TNL_F_UDPIF set. However, we still need to have a metadata
7236 * table in case we generate tunnel actions. */
7237 /* If the translation is from a frozen state, we use the latest
7238 * TLV map to avoid segmentation fault in case the old TLV map is
7239 * replaced by a new one.
7240 * XXX: It is better to abort translation if the table is changed. */
7241 flow->tunnel.metadata.tab = ofproto_get_tun_tab(
7242 &ctx.xbridge->ofproto->up);
7243 }
7244 ctx.wc->masks.tunnel.metadata.tab = flow->tunnel.metadata.tab;
7245
7246 /* Get the proximate input port of the packet. (If xin->frozen_state,
7247 * flow->in_port is the ultimate input port of the packet.) */
7248 struct xport *in_port = get_ofp_port(xbridge,
7249 ctx.base_flow.in_port.ofp_port);
7250 if (in_port && !in_port->peer) {
7251 ctx.xin->xport_uuid = in_port->uuid;
7252 }
7253
7254 if (flow->packet_type != htonl(PT_ETH) && in_port &&
7255 in_port->pt_mode == NETDEV_PT_LEGACY_L3 && ctx.table_id == 0) {
7256 /* Add dummy Ethernet header to non-L2 packet if it's coming from a
7257 * L3 port. So all packets will be L2 packets for lookup.
7258 * The dl_type has already been set from the packet_type. */
7259 flow->packet_type = htonl(PT_ETH);
7260 flow->dl_src = eth_addr_zero;
7261 flow->dl_dst = eth_addr_zero;
7262 ctx.pending_encap = true;
7263 }
7264
7265 if (!xin->ofpacts && !ctx.rule) {
7266 ctx.rule = rule_dpif_lookup_from_table(
7267 ctx.xbridge->ofproto, ctx.xin->tables_version, flow, ctx.wc,
7268 ctx.xin->resubmit_stats, &ctx.table_id,
7269 flow->in_port.ofp_port, true, true, ctx.xin->xcache);
7270 if (ctx.xin->resubmit_stats) {
7271 rule_dpif_credit_stats(ctx.rule, ctx.xin->resubmit_stats);
7272 }
7273 if (ctx.xin->xcache) {
7274 struct xc_entry *entry;
7275
7276 entry = xlate_cache_add_entry(ctx.xin->xcache, XC_RULE);
7277 entry->rule = ctx.rule;
7278 ofproto_rule_ref(&ctx.rule->up);
7279 }
7280
7281 xlate_report_table(&ctx, ctx.rule, ctx.table_id);
7282 }
7283
7284 /* Tunnel stats only for not-thawed packets. */
7285 if (!xin->frozen_state && in_port && in_port->is_tunnel) {
7286 if (ctx.xin->resubmit_stats) {
7287 netdev_vport_inc_rx(in_port->netdev, ctx.xin->resubmit_stats);
7288 if (in_port->bfd) {
7289 bfd_account_rx(in_port->bfd, ctx.xin->resubmit_stats);
7290 }
7291 }
7292 if (ctx.xin->xcache) {
7293 struct xc_entry *entry;
7294
7295 entry = xlate_cache_add_entry(ctx.xin->xcache, XC_NETDEV);
7296 entry->dev.rx = netdev_ref(in_port->netdev);
7297 entry->dev.bfd = bfd_ref(in_port->bfd);
7298 }
7299 }
7300
7301 if (!xin->frozen_state && process_special(&ctx, in_port)) {
7302 /* process_special() did all the processing for this packet.
7303 *
7304 * We do not perform special processing on thawed packets, since that
7305 * was done before they were frozen and should not be redone. */
7306 mirror_ingress_packet(&ctx);
7307 } else if (in_port && in_port->xbundle
7308 && xbundle_mirror_out(xbridge, in_port->xbundle)) {
7309 xlate_report_error(&ctx, "dropping packet received on port "
7310 "%s, which is reserved exclusively for mirroring",
7311 in_port->xbundle->name);
7312 } else {
7313 /* Sampling is done on initial reception; don't redo after thawing. */
7314 unsigned int user_cookie_offset = 0;
7315 if (!xin->frozen_state) {
7316 user_cookie_offset = compose_sflow_action(&ctx);
7317 compose_ipfix_action(&ctx, ODPP_NONE);
7318 }
7319 size_t sample_actions_len = ctx.odp_actions->size;
7320
7321 if (tnl_process_ecn(flow)
7322 && (!in_port || may_receive(in_port, &ctx))) {
7323 const struct ofpact *ofpacts;
7324 size_t ofpacts_len;
7325
7326 if (xin->ofpacts) {
7327 ofpacts = xin->ofpacts;
7328 ofpacts_len = xin->ofpacts_len;
7329 } else if (ctx.rule) {
7330 const struct rule_actions *actions
7331 = rule_get_actions(&ctx.rule->up);
7332 ofpacts = actions->ofpacts;
7333 ofpacts_len = actions->ofpacts_len;
7334 ctx.rule_cookie = ctx.rule->up.flow_cookie;
7335 } else {
7336 OVS_NOT_REACHED();
7337 }
7338
7339 mirror_ingress_packet(&ctx);
7340 do_xlate_actions(ofpacts, ofpacts_len, &ctx, true, false);
7341 if (ctx.error) {
7342 goto exit;
7343 }
7344
7345 /* We've let OFPP_NORMAL and the learning action look at the
7346 * packet, so cancel all actions and freezing if forwarding is
7347 * disabled. */
7348 if (in_port && (!xport_stp_forward_state(in_port) ||
7349 !xport_rstp_forward_state(in_port))) {
7350 ctx.odp_actions->size = sample_actions_len;
7351 ctx_cancel_freeze(&ctx);
7352 ofpbuf_clear(&ctx.action_set);
7353 }
7354
7355 if (!ctx.freezing) {
7356 xlate_action_set(&ctx);
7357 }
7358 if (ctx.freezing) {
7359 finish_freezing(&ctx);
7360 }
7361 }
7362
7363 /* Output only fully processed packets. */
7364 if (!ctx.freezing
7365 && xbridge->has_in_band
7366 && in_band_must_output_to_local_port(flow)
7367 && !actions_output_to_local_port(&ctx)) {
7368 compose_output_action(&ctx, OFPP_LOCAL, NULL, false, false);
7369 }
7370
7371 if (user_cookie_offset) {
7372 fix_sflow_action(&ctx, user_cookie_offset);
7373 }
7374 }
7375
7376 if (nl_attr_oversized(ctx.odp_actions->size)) {
7377 /* These datapath actions are too big for a Netlink attribute, so we
7378 * can't hand them to the kernel directly. dpif_execute() can execute
7379 * them one by one with help, so just mark the result as SLOW_ACTION to
7380 * prevent the flow from being installed. */
7381 COVERAGE_INC(xlate_actions_oversize);
7382 ctx.xout->slow |= SLOW_ACTION;
7383 } else if (too_many_output_actions(ctx.odp_actions)) {
7384 COVERAGE_INC(xlate_actions_too_many_output);
7385 ctx.xout->slow |= SLOW_ACTION;
7386 }
7387
7388 /* Update NetFlow for non-frozen traffic. */
7389 if (xbridge->netflow && !xin->frozen_state) {
7390 if (ctx.xin->resubmit_stats) {
7391 netflow_flow_update(xbridge->netflow, flow,
7392 ctx.nf_output_iface,
7393 ctx.xin->resubmit_stats);
7394 }
7395 if (ctx.xin->xcache) {
7396 struct xc_entry *entry;
7397
7398 entry = xlate_cache_add_entry(ctx.xin->xcache, XC_NETFLOW);
7399 entry->nf.netflow = netflow_ref(xbridge->netflow);
7400 entry->nf.flow = xmemdup(flow, sizeof *flow);
7401 entry->nf.iface = ctx.nf_output_iface;
7402 }
7403 }
7404
7405 /* Translate tunnel metadata masks to udpif format if necessary. */
7406 if (xin->upcall_flow->tunnel.flags & FLOW_TNL_F_UDPIF) {
7407 if (ctx.wc->masks.tunnel.metadata.present.map) {
7408 const struct flow_tnl *upcall_tnl = &xin->upcall_flow->tunnel;
7409 struct geneve_opt opts[TLV_TOT_OPT_SIZE /
7410 sizeof(struct geneve_opt)];
7411
7412 tun_metadata_to_geneve_udpif_mask(&flow->tunnel,
7413 &ctx.wc->masks.tunnel,
7414 upcall_tnl->metadata.opts.gnv,
7415 upcall_tnl->metadata.present.len,
7416 opts);
7417 memset(&ctx.wc->masks.tunnel.metadata, 0,
7418 sizeof ctx.wc->masks.tunnel.metadata);
7419 memcpy(&ctx.wc->masks.tunnel.metadata.opts.gnv, opts,
7420 upcall_tnl->metadata.present.len);
7421 }
7422 ctx.wc->masks.tunnel.metadata.present.len = 0xff;
7423 ctx.wc->masks.tunnel.metadata.tab = NULL;
7424 ctx.wc->masks.tunnel.flags |= FLOW_TNL_F_UDPIF;
7425 } else if (!xin->upcall_flow->tunnel.metadata.tab) {
7426 /* If we didn't have options in UDPIF format and didn't have an existing
7427 * metadata table, then it means that there were no options at all when
7428 * we started processing and any wildcards we picked up were from
7429 * action generation. Without options on the incoming packet, wildcards
7430 * aren't meaningful. To avoid them possibly getting misinterpreted,
7431 * just clear everything. */
7432 if (ctx.wc->masks.tunnel.metadata.present.map) {
7433 memset(&ctx.wc->masks.tunnel.metadata, 0,
7434 sizeof ctx.wc->masks.tunnel.metadata);
7435 } else {
7436 ctx.wc->masks.tunnel.metadata.tab = NULL;
7437 }
7438 }
7439
7440 xlate_wc_finish(&ctx);
7441
7442 exit:
7443 /* Reset the table to what it was when we came in. If we only fetched
7444 * it locally, then it has no meaning outside of flow translation. */
7445 flow->tunnel.metadata.tab = xin->upcall_flow->tunnel.metadata.tab;
7446
7447 ofpbuf_uninit(&ctx.stack);
7448 ofpbuf_uninit(&ctx.action_set);
7449 ofpbuf_uninit(&ctx.frozen_actions);
7450 ofpbuf_uninit(&scratch_actions);
7451 ofpbuf_delete(ctx.encap_data);
7452
7453 /* Make sure we return a "drop flow" in case of an error. */
7454 if (ctx.error) {
7455 xout->slow = 0;
7456 if (xin->odp_actions) {
7457 ofpbuf_clear(xin->odp_actions);
7458 }
7459 }
7460 return ctx.error;
7461 }
7462
7463 enum ofperr
7464 xlate_resume(struct ofproto_dpif *ofproto,
7465 const struct ofputil_packet_in_private *pin,
7466 struct ofpbuf *odp_actions,
7467 enum slow_path_reason *slow)
7468 {
7469 struct dp_packet packet;
7470 dp_packet_use_const(&packet, pin->base.packet,
7471 pin->base.packet_len);
7472
7473 struct flow flow;
7474 flow_extract(&packet, &flow);
7475
7476 struct xlate_in xin;
7477 xlate_in_init(&xin, ofproto, ofproto_dpif_get_tables_version(ofproto),
7478 &flow, 0, NULL, ntohs(flow.tcp_flags),
7479 &packet, NULL, odp_actions);
7480
7481 struct ofpact_note noop;
7482 ofpact_init_NOTE(&noop);
7483 noop.length = 0;
7484
7485 bool any_actions = pin->actions_len > 0;
7486 struct frozen_state state = {
7487 .table_id = 0, /* Not the table where NXAST_PAUSE was executed. */
7488 .ofproto_uuid = pin->bridge,
7489 .stack = pin->stack,
7490 .stack_size = pin->stack_size,
7491 .mirrors = pin->mirrors,
7492 .conntracked = pin->conntracked,
7493 .xport_uuid = UUID_ZERO,
7494
7495 /* When there are no actions, xlate_actions() will search the flow
7496 * table. We don't want it to do that (we want it to resume), so
7497 * supply a no-op action if there aren't any.
7498 *
7499 * (We can't necessarily avoid translating actions entirely if there
7500 * aren't any actions, because there might be some finishing-up to do
7501 * at the end of the pipeline, and we don't check for those
7502 * conditions.) */
7503 .ofpacts = any_actions ? pin->actions : &noop.ofpact,
7504 .ofpacts_len = any_actions ? pin->actions_len : sizeof noop,
7505
7506 .action_set = pin->action_set,
7507 .action_set_len = pin->action_set_len,
7508 };
7509 frozen_metadata_from_flow(&state.metadata,
7510 &pin->base.flow_metadata.flow);
7511 xin.frozen_state = &state;
7512
7513 struct xlate_out xout;
7514 enum xlate_error error = xlate_actions(&xin, &xout);
7515 *slow = xout.slow;
7516 xlate_out_uninit(&xout);
7517
7518 /* xlate_actions() can generate a number of errors, but only
7519 * XLATE_BRIDGE_NOT_FOUND really stands out to me as one that we should be
7520 * sure to report over OpenFlow. The others could come up in packet-outs
7521 * or regular flow translation and I don't think that it's going to be too
7522 * useful to report them to the controller. */
7523 return error == XLATE_BRIDGE_NOT_FOUND ? OFPERR_NXR_STALE : 0;
7524 }
7525
7526 /* Sends 'packet' out 'ofport'. If 'port' is a tunnel and that tunnel type
7527 * supports a notion of an OAM flag, sets it if 'oam' is true.
7528 * May modify 'packet'.
7529 * Returns 0 if successful, otherwise a positive errno value. */
7530 int
7531 xlate_send_packet(const struct ofport_dpif *ofport, bool oam,
7532 struct dp_packet *packet)
7533 {
7534 struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
7535 struct xport *xport;
7536 uint64_t ofpacts_stub[1024 / 8];
7537 struct ofpbuf ofpacts;
7538 struct flow flow;
7539
7540 ofpbuf_use_stack(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
7541 /* Use OFPP_NONE as the in_port to avoid special packet processing. */
7542 flow_extract(packet, &flow);
7543 flow.in_port.ofp_port = OFPP_NONE;
7544
7545 xport = xport_lookup(xcfg, ofport);
7546 if (!xport) {
7547 return EINVAL;
7548 }
7549
7550 if (oam) {
7551 const ovs_be16 flag = htons(NX_TUN_FLAG_OAM);
7552 ofpact_put_set_field(&ofpacts, mf_from_id(MFF_TUN_FLAGS),
7553 &flag, &flag);
7554 }
7555
7556 ofpact_put_OUTPUT(&ofpacts)->port = xport->ofp_port;
7557
7558 /* Actions here are not referring to anything versionable (flow tables or
7559 * groups) so we don't need to worry about the version here. */
7560 return ofproto_dpif_execute_actions(xport->xbridge->ofproto,
7561 OVS_VERSION_MAX, &flow, NULL,
7562 ofpacts.data, ofpacts.size, packet);
7563 }
7564
7565 void
7566 xlate_mac_learning_update(const struct ofproto_dpif *ofproto,
7567 ofp_port_t in_port, struct eth_addr dl_src,
7568 int vlan, bool is_grat_arp)
7569 {
7570 struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
7571 struct xbridge *xbridge;
7572 struct xbundle *xbundle;
7573
7574 xbridge = xbridge_lookup(xcfg, ofproto);
7575 if (!xbridge) {
7576 return;
7577 }
7578
7579 xbundle = lookup_input_bundle__(xbridge, in_port, NULL);
7580 if (!xbundle) {
7581 return;
7582 }
7583
7584 update_learning_table__(xbridge, xbundle, dl_src, vlan, is_grat_arp);
7585 }
7586
7587 void
7588 xlate_set_support(const struct ofproto_dpif *ofproto,
7589 const struct dpif_backer_support *support)
7590 {
7591 struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
7592 struct xbridge *xbridge = xbridge_lookup(xcfg, ofproto);
7593
7594 if (xbridge) {
7595 xbridge->support = *support;
7596 }
7597 }