]> git.proxmox.com Git - ovs.git/blob - ovn/controller/physical.c
ovn-controller: Fix leak in patched_datapaths processing.
[ovs.git] / ovn / controller / physical.c
1 /* Copyright (c) 2015, 2016 Nicira, Inc.
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include <config.h>
17 #include "binding.h"
18 #include "byte-order.h"
19 #include "flow.h"
20 #include "lflow.h"
21 #include "lib/poll-loop.h"
22 #include "ofctrl.h"
23 #include "openvswitch/hmap.h"
24 #include "openvswitch/match.h"
25 #include "openvswitch/ofp-actions.h"
26 #include "openvswitch/ofpbuf.h"
27 #include "openvswitch/vlog.h"
28 #include "ovn-controller.h"
29 #include "ovn/lib/ovn-sb-idl.h"
30 #include "ovn/lib/ovn-util.h"
31 #include "physical.h"
32 #include "openvswitch/shash.h"
33 #include "simap.h"
34 #include "smap.h"
35 #include "sset.h"
36 #include "util.h"
37 #include "vswitch-idl.h"
38
39 VLOG_DEFINE_THIS_MODULE(physical);
40
41 void
42 physical_register_ovs_idl(struct ovsdb_idl *ovs_idl)
43 {
44 ovsdb_idl_add_table(ovs_idl, &ovsrec_table_bridge);
45 ovsdb_idl_add_column(ovs_idl, &ovsrec_bridge_col_ports);
46
47 ovsdb_idl_add_table(ovs_idl, &ovsrec_table_port);
48 ovsdb_idl_add_column(ovs_idl, &ovsrec_port_col_name);
49 ovsdb_idl_add_column(ovs_idl, &ovsrec_port_col_interfaces);
50 ovsdb_idl_add_column(ovs_idl, &ovsrec_port_col_external_ids);
51
52 ovsdb_idl_add_table(ovs_idl, &ovsrec_table_interface);
53 ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_name);
54 ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_ofport);
55 ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_external_ids);
56 }
57
58 static struct simap localvif_to_ofport =
59 SIMAP_INITIALIZER(&localvif_to_ofport);
60 static struct hmap tunnels = HMAP_INITIALIZER(&tunnels);
61
62 /* Maps from a chassis to the OpenFlow port number of the tunnel that can be
63 * used to reach that chassis. */
64 struct chassis_tunnel {
65 struct hmap_node hmap_node;
66 const char *chassis_id;
67 ofp_port_t ofport;
68 enum chassis_tunnel_type type;
69 };
70
71 static struct chassis_tunnel *
72 chassis_tunnel_find(const char *chassis_id)
73 {
74 struct chassis_tunnel *tun;
75 HMAP_FOR_EACH_WITH_HASH (tun, hmap_node, hash_string(chassis_id, 0),
76 &tunnels) {
77 if (!strcmp(tun->chassis_id, chassis_id)) {
78 return tun;
79 }
80 }
81 return NULL;
82 }
83
84 static void
85 put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits,
86 struct ofpbuf *ofpacts)
87 {
88 struct ofpact_set_field *sf = ofpact_put_set_field(ofpacts,
89 mf_from_id(dst), NULL,
90 NULL);
91 ovs_be64 n_value = htonll(value);
92 bitwise_copy(&n_value, 8, 0, sf->value, sf->field->n_bytes, ofs, n_bits);
93 bitwise_one(ofpact_set_field_mask(sf), sf->field->n_bytes, ofs, n_bits);
94 }
95
96 static void
97 put_move(enum mf_field_id src, int src_ofs,
98 enum mf_field_id dst, int dst_ofs,
99 int n_bits,
100 struct ofpbuf *ofpacts)
101 {
102 struct ofpact_reg_move *move = ofpact_put_REG_MOVE(ofpacts);
103 move->src.field = mf_from_id(src);
104 move->src.ofs = src_ofs;
105 move->src.n_bits = n_bits;
106 move->dst.field = mf_from_id(dst);
107 move->dst.ofs = dst_ofs;
108 move->dst.n_bits = n_bits;
109 }
110
111 static void
112 put_resubmit(uint8_t table_id, struct ofpbuf *ofpacts)
113 {
114 struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(ofpacts);
115 resubmit->in_port = OFPP_IN_PORT;
116 resubmit->table_id = table_id;
117 }
118
119 static void
120 put_encapsulation(enum mf_field_id mff_ovn_geneve,
121 const struct chassis_tunnel *tun,
122 const struct sbrec_datapath_binding *datapath,
123 uint16_t outport, struct ofpbuf *ofpacts)
124 {
125 if (tun->type == GENEVE) {
126 put_load(datapath->tunnel_key, MFF_TUN_ID, 0, 24, ofpacts);
127 put_load(outport, mff_ovn_geneve, 0, 32, ofpacts);
128 put_move(MFF_LOG_INPORT, 0, mff_ovn_geneve, 16, 15, ofpacts);
129 } else if (tun->type == STT) {
130 put_load(datapath->tunnel_key | (outport << 24), MFF_TUN_ID, 0, 64,
131 ofpacts);
132 put_move(MFF_LOG_INPORT, 0, MFF_TUN_ID, 40, 15, ofpacts);
133 } else if (tun->type == VXLAN) {
134 put_load(datapath->tunnel_key, MFF_TUN_ID, 0, 24, ofpacts);
135 } else {
136 OVS_NOT_REACHED();
137 }
138 }
139
140 static void
141 put_stack(enum mf_field_id field, struct ofpact_stack *stack)
142 {
143 stack->subfield.field = mf_from_id(field);
144 stack->subfield.ofs = 0;
145 stack->subfield.n_bits = stack->subfield.field->n_bits;
146 }
147
148 static const struct sbrec_port_binding*
149 get_localnet_port(struct hmap *local_datapaths, int64_t tunnel_key)
150 {
151 struct local_datapath *ld = get_local_datapath(local_datapaths,
152 tunnel_key);
153 return ld ? ld->localnet_port : NULL;
154 }
155
156 static void
157 consider_port_binding(enum mf_field_id mff_ovn_geneve,
158 const struct simap *ct_zones,
159 struct hmap *local_datapaths,
160 struct hmap *patched_datapaths,
161 const struct sbrec_port_binding *binding,
162 struct ofpbuf *ofpacts_p,
163 struct hmap *flow_table)
164 {
165 /* Skip the port binding if the port is on a datapath that is neither
166 * local nor with any logical patch port connected, because local ports
167 * would never need to talk to those ports.
168 *
169 * Even with this approach there could still be unnecessary port
170 * bindings processed. A better approach would be a kind of "flood
171 * fill" algorithm:
172 *
173 * 1. Initialize set S to the logical datapaths that have a port
174 * located on the hypervisor.
175 *
176 * 2. For each patch port P in a logical datapath in S, add the
177 * logical datapath of the remote end of P to S. Iterate
178 * until S reaches a fixed point.
179 *
180 * This can be implemented in northd, which can generate the sets and
181 * save it on each port-binding record in SB, and ovn-controller can
182 * use the information directly. However, there can be update storms
183 * when a pair of patch ports are added/removed to connect/disconnect
184 * large lrouters and lswitches. This need to be studied further.
185 */
186 uint32_t dp_key = binding->datapath->tunnel_key;
187 uint32_t port_key = binding->tunnel_key;
188 if (!get_local_datapath(local_datapaths, dp_key)
189 && !get_patched_datapath(patched_datapaths, dp_key)) {
190 return;
191 }
192
193 /* Find the OpenFlow port for the logical port, as 'ofport'. This is
194 * one of:
195 *
196 * - If the port is a VIF on the chassis we're managing, the
197 * OpenFlow port for the VIF. 'tun' will be NULL.
198 *
199 * The same logic handles logical patch ports, as well as
200 * localnet patch ports.
201 *
202 * For a container nested inside a VM and accessible via a VLAN,
203 * 'tag' is the VLAN ID; otherwise 'tag' is 0.
204 *
205 * For a localnet patch port, if a VLAN ID was configured, 'tag'
206 * is set to that VLAN ID; otherwise 'tag' is 0.
207 *
208 * - If the port is on a remote chassis, the OpenFlow port for a
209 * tunnel to the VIF's remote chassis. 'tun' identifies that
210 * tunnel.
211 */
212
213 int tag = 0;
214 ofp_port_t ofport;
215 bool is_remote = false;
216 if (binding->parent_port && *binding->parent_port) {
217 if (!binding->tag) {
218 return;
219 }
220 ofport = u16_to_ofp(simap_get(&localvif_to_ofport,
221 binding->parent_port));
222 if (ofport) {
223 tag = *binding->tag;
224 }
225 } else {
226 ofport = u16_to_ofp(simap_get(&localvif_to_ofport,
227 binding->logical_port));
228 if ((!strcmp(binding->type, "localnet")
229 || !strcmp(binding->type, "l2gateway"))
230 && ofport && binding->tag) {
231 tag = *binding->tag;
232 }
233 }
234
235 const struct chassis_tunnel *tun = NULL;
236 const struct sbrec_port_binding *localnet_port =
237 get_localnet_port(local_datapaths, dp_key);
238 if (!ofport) {
239 /* It is remote port, may be reached by tunnel or localnet port */
240 is_remote = true;
241 if (!binding->chassis) {
242 return;
243 }
244 if (localnet_port) {
245 ofport = u16_to_ofp(simap_get(&localvif_to_ofport,
246 localnet_port->logical_port));
247 if (!ofport) {
248 return;
249 }
250 } else {
251 tun = chassis_tunnel_find(binding->chassis->name);
252 if (!tun) {
253 return;
254 }
255 ofport = tun->ofport;
256 }
257 }
258
259 struct match match;
260 if (!is_remote) {
261 int zone_id = simap_get(ct_zones, binding->logical_port);
262 /* Packets that arrive from a vif can belong to a VM or
263 * to a container located inside that VM. Packets that
264 * arrive from containers have a tag (vlan) associated with them.
265 */
266
267 /* Table 0, Priority 150 and 100.
268 * ==============================
269 *
270 * Priority 150 is for tagged traffic. This may be containers in a
271 * VM or a VLAN on a local network. For such traffic, match on the
272 * tags and then strip the tag.
273 *
274 * Priority 100 is for traffic belonging to VMs or untagged locally
275 * connected networks.
276 *
277 * For both types of traffic: set MFF_LOG_INPORT to the logical
278 * input port, MFF_LOG_DATAPATH to the logical datapath, and
279 * resubmit into the logical ingress pipeline starting at table
280 * 16. */
281 ofpbuf_clear(ofpacts_p);
282 match_init_catchall(&match);
283 match_set_in_port(&match, ofport);
284
285 /* Match a VLAN tag and strip it, including stripping priority tags
286 * (e.g. VLAN ID 0). In the latter case we'll add a second flow
287 * for frames that lack any 802.1Q header later. */
288 if (tag || !strcmp(binding->type, "localnet")
289 || !strcmp(binding->type, "l2gateway")) {
290 match_set_dl_vlan(&match, htons(tag));
291 ofpact_put_STRIP_VLAN(ofpacts_p);
292 }
293
294 /* Remember the size with just strip vlan added so far,
295 * as we're going to remove this with ofpbuf_pull() later. */
296 uint32_t ofpacts_orig_size = ofpacts_p->size;
297
298 if (zone_id) {
299 put_load(zone_id, MFF_LOG_CT_ZONE, 0, 32, ofpacts_p);
300 }
301
302 int zone_id_dnat, zone_id_snat;
303 const struct uuid *key = &binding->datapath->header_.uuid;
304 char *dnat = alloc_nat_zone_key(key, "dnat");
305 char *snat = alloc_nat_zone_key(key, "snat");
306
307 zone_id_dnat = simap_get(ct_zones, dnat);
308 if (zone_id_dnat) {
309 put_load(zone_id_dnat, MFF_LOG_DNAT_ZONE, 0, 32, ofpacts_p);
310 }
311 free(dnat);
312
313 zone_id_snat = simap_get(ct_zones, snat);
314 if (zone_id_snat) {
315 put_load(zone_id_snat, MFF_LOG_SNAT_ZONE, 0, 32, ofpacts_p);
316 }
317 free(snat);
318
319 /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */
320 put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, ofpacts_p);
321 put_load(port_key, MFF_LOG_INPORT, 0, 32, ofpacts_p);
322
323 /* Resubmit to first logical ingress pipeline table. */
324 put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, ofpacts_p);
325 ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG,
326 tag ? 150 : 100, &match, ofpacts_p);
327
328 if (!tag && (!strcmp(binding->type, "localnet")
329 || !strcmp(binding->type, "l2gateway"))) {
330
331 /* Add a second flow for frames that lack any 802.1Q
332 * header. For these, drop the OFPACT_STRIP_VLAN
333 * action. */
334 ofpbuf_pull(ofpacts_p, ofpacts_orig_size);
335 match_set_dl_tci_masked(&match, 0, htons(VLAN_CFI));
336 ofctrl_add_flow(flow_table, 0, 100, &match, ofpacts_p);
337 }
338
339 /* Table 33, priority 100.
340 * =======================
341 *
342 * Implements output to local hypervisor. Each flow matches a
343 * logical output port on the local hypervisor, and resubmits to
344 * table 34.
345 */
346
347 match_init_catchall(&match);
348 ofpbuf_clear(ofpacts_p);
349
350 /* Match MFF_LOG_DATAPATH, MFF_LOG_OUTPORT. */
351 match_set_metadata(&match, htonll(dp_key));
352 match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, port_key);
353
354 if (zone_id) {
355 put_load(zone_id, MFF_LOG_CT_ZONE, 0, 32, ofpacts_p);
356 }
357 if (zone_id_dnat) {
358 put_load(zone_id_dnat, MFF_LOG_DNAT_ZONE, 0, 32, ofpacts_p);
359 }
360 if (zone_id_snat) {
361 put_load(zone_id_snat, MFF_LOG_SNAT_ZONE, 0, 32, ofpacts_p);
362 }
363
364 /* Resubmit to table 34. */
365 put_resubmit(OFTABLE_CHECK_LOOPBACK, ofpacts_p);
366 ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100,
367 &match, ofpacts_p);
368
369 /* Table 34, Priority 100.
370 * =======================
371 *
372 * Drop packets whose logical inport and outport are the same
373 * and the MLF_ALLOW_LOOPBACK flag is not set. */
374 match_init_catchall(&match);
375 ofpbuf_clear(ofpacts_p);
376 match_set_metadata(&match, htonll(dp_key));
377 match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0,
378 0, MLF_ALLOW_LOOPBACK);
379 match_set_reg(&match, MFF_LOG_INPORT - MFF_REG0, port_key);
380 match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, port_key);
381 ofctrl_add_flow(flow_table, OFTABLE_CHECK_LOOPBACK, 100,
382 &match, ofpacts_p);
383
384 /* Table 64, Priority 100.
385 * =======================
386 *
387 * If the packet is supposed to hair-pin because the "loopback"
388 * flag is set, temporarily set the in_port to zero, resubmit to
389 * table 65 for logical-to-physical translation, then restore
390 * the port number. */
391 match_init_catchall(&match);
392 ofpbuf_clear(ofpacts_p);
393 match_set_metadata(&match, htonll(dp_key));
394 match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0,
395 MLF_ALLOW_LOOPBACK, MLF_ALLOW_LOOPBACK);
396 match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, port_key);
397
398 put_stack(MFF_IN_PORT, ofpact_put_STACK_PUSH(ofpacts_p));
399 put_load(0, MFF_IN_PORT, 0, 16, ofpacts_p);
400 put_resubmit(OFTABLE_LOG_TO_PHY, ofpacts_p);
401 put_stack(MFF_IN_PORT, ofpact_put_STACK_POP(ofpacts_p));
402 ofctrl_add_flow(flow_table, OFTABLE_SAVE_INPORT, 100,
403 &match, ofpacts_p);
404
405 /* Table 65, Priority 100.
406 * =======================
407 *
408 * Deliver the packet to the local vif. */
409 match_init_catchall(&match);
410 ofpbuf_clear(ofpacts_p);
411 match_set_metadata(&match, htonll(dp_key));
412 match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, port_key);
413 if (tag) {
414 /* For containers sitting behind a local vif, tag the packets
415 * before delivering them. */
416 struct ofpact_vlan_vid *vlan_vid;
417 vlan_vid = ofpact_put_SET_VLAN_VID(ofpacts_p);
418 vlan_vid->vlan_vid = tag;
419 vlan_vid->push_vlan_if_needed = true;
420
421 /* A packet might need to hair-pin back into its ingress
422 * OpenFlow port (to a different logical port, which we already
423 * checked back in table 34), so set the in_port to zero. */
424 put_stack(MFF_IN_PORT, ofpact_put_STACK_PUSH(ofpacts_p));
425 put_load(0, MFF_IN_PORT, 0, 16, ofpacts_p);
426 }
427 ofpact_put_OUTPUT(ofpacts_p)->port = ofport;
428 if (tag) {
429 /* Revert the tag added to the packets headed to containers
430 * in the previous step. If we don't do this, the packets
431 * that are to be broadcasted to a VM in the same logical
432 * switch will also contain the tag. Also revert the zero'd
433 * in_port. */
434 ofpact_put_STRIP_VLAN(ofpacts_p);
435 put_stack(MFF_IN_PORT, ofpact_put_STACK_POP(ofpacts_p));
436 }
437 ofctrl_add_flow(flow_table, OFTABLE_LOG_TO_PHY, 100,
438 &match, ofpacts_p);
439 } else if (!tun) {
440 /* Remote port connected by localnet port */
441 /* Table 33, priority 100.
442 * =======================
443 *
444 * Implements switching to localnet port. Each flow matches a
445 * logical output port on remote hypervisor, switch the output port
446 * to connected localnet port and resubmits to same table.
447 */
448
449 match_init_catchall(&match);
450 ofpbuf_clear(ofpacts_p);
451
452 /* Match MFF_LOG_DATAPATH, MFF_LOG_OUTPORT. */
453 match_set_metadata(&match, htonll(dp_key));
454 match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, port_key);
455
456 put_load(localnet_port->tunnel_key, MFF_LOG_OUTPORT, 0, 32, ofpacts_p);
457
458 /* Resubmit to table 33. */
459 put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts_p);
460 ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100,
461 &match, ofpacts_p);
462 } else {
463 /* Remote port connected by tunnel */
464
465 /* Table 32, priority 150 and 100.
466 * ===============================
467 *
468 * Priority 150 is for packets received from a VXLAN tunnel
469 * which get resubmitted to OFTABLE_LOG_INGRESS_PIPELINE due to
470 * lack of needed metadata in VXLAN, explicitly skip sending
471 * back out any tunnels and resubmit to table 33 for local
472 * delivery.
473 *
474 * Priority 100 is for all other traffic which need to be sent
475 * to a remote hypervisor. Each flow matches an output port
476 * that includes a logical port on a remote hypervisor, and
477 * tunnels the packet to that hypervisor.
478 */
479 match_init_catchall(&match);
480 ofpbuf_clear(ofpacts_p);
481 match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0,
482 MLF_RCV_FROM_VXLAN, MLF_RCV_FROM_VXLAN);
483
484 /* Resubmit to table 33. */
485 put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts_p);
486 ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 150, &match,
487 ofpacts_p);
488
489
490 match_init_catchall(&match);
491 ofpbuf_clear(ofpacts_p);
492
493 /* Match MFF_LOG_DATAPATH, MFF_LOG_OUTPORT. */
494 match_set_metadata(&match, htonll(dp_key));
495 match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, port_key);
496
497 put_encapsulation(mff_ovn_geneve, tun, binding->datapath,
498 port_key, ofpacts_p);
499
500 /* Output to tunnel. */
501 ofpact_put_OUTPUT(ofpacts_p)->port = ofport;
502 ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 100,
503 &match, ofpacts_p);
504 }
505 }
506
507 static void
508 consider_mc_group(enum mf_field_id mff_ovn_geneve,
509 const struct simap *ct_zones,
510 struct hmap *local_datapaths,
511 const struct sbrec_multicast_group *mc,
512 struct ofpbuf *ofpacts_p,
513 struct ofpbuf *remote_ofpacts_p,
514 struct hmap *flow_table)
515 {
516 struct sset remote_chassis = SSET_INITIALIZER(&remote_chassis);
517 struct match match;
518
519 match_init_catchall(&match);
520 match_set_metadata(&match, htonll(mc->datapath->tunnel_key));
521 match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, mc->tunnel_key);
522
523 /* Go through all of the ports in the multicast group:
524 *
525 * - For remote ports, add the chassis to 'remote_chassis'.
526 *
527 * - For local ports (other than logical patch ports), add actions
528 * to 'ofpacts_p' to set the output port and resubmit.
529 *
530 * - For logical patch ports, add actions to 'remote_ofpacts_p'
531 * instead. (If we put them in 'ofpacts', then the output
532 * would happen on every hypervisor in the multicast group,
533 * effectively duplicating the packet.)
534 */
535 ofpbuf_clear(ofpacts_p);
536 ofpbuf_clear(remote_ofpacts_p);
537 for (size_t i = 0; i < mc->n_ports; i++) {
538 struct sbrec_port_binding *port = mc->ports[i];
539
540 if (port->datapath != mc->datapath) {
541 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
542 VLOG_WARN_RL(&rl, UUID_FMT": multicast group contains ports "
543 "in wrong datapath",
544 UUID_ARGS(&mc->header_.uuid));
545 continue;
546 }
547
548 int zone_id = simap_get(ct_zones, port->logical_port);
549 if (zone_id) {
550 put_load(zone_id, MFF_LOG_CT_ZONE, 0, 32, ofpacts_p);
551 }
552
553 if (!strcmp(port->type, "patch")) {
554 put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
555 remote_ofpacts_p);
556 put_resubmit(OFTABLE_CHECK_LOOPBACK, remote_ofpacts_p);
557 } else if (simap_contains(&localvif_to_ofport,
558 (port->parent_port && *port->parent_port)
559 ? port->parent_port : port->logical_port)) {
560 put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32, ofpacts_p);
561 put_resubmit(OFTABLE_CHECK_LOOPBACK, ofpacts_p);
562 } else if (port->chassis && !get_localnet_port(local_datapaths,
563 mc->datapath->tunnel_key)) {
564 /* Add remote chassis only when localnet port not exist,
565 * otherwise multicast will reach remote ports through localnet
566 * port. */
567 sset_add(&remote_chassis, port->chassis->name);
568 }
569 }
570
571 /* Table 33, priority 100.
572 * =======================
573 *
574 * Handle output to the local logical ports in the multicast group, if
575 * any. */
576 bool local_ports = ofpacts_p->size > 0;
577 if (local_ports) {
578 /* Following delivery to local logical ports, restore the multicast
579 * group as the logical output port. */
580 put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, ofpacts_p);
581
582 ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100,
583 &match, ofpacts_p);
584 }
585
586 /* Table 32, priority 100.
587 * =======================
588 *
589 * Handle output to the remote chassis in the multicast group, if
590 * any. */
591 if (!sset_is_empty(&remote_chassis) || remote_ofpacts_p->size > 0) {
592 if (remote_ofpacts_p->size > 0) {
593 /* Following delivery to logical patch ports, restore the
594 * multicast group as the logical output port. */
595 put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
596 remote_ofpacts_p);
597 }
598
599 const char *chassis;
600 const struct chassis_tunnel *prev = NULL;
601 SSET_FOR_EACH (chassis, &remote_chassis) {
602 const struct chassis_tunnel *tun
603 = chassis_tunnel_find(chassis);
604 if (!tun) {
605 continue;
606 }
607
608 if (!prev || tun->type != prev->type) {
609 put_encapsulation(mff_ovn_geneve, tun, mc->datapath,
610 mc->tunnel_key, remote_ofpacts_p);
611 prev = tun;
612 }
613 ofpact_put_OUTPUT(remote_ofpacts_p)->port = tun->ofport;
614 }
615
616 if (remote_ofpacts_p->size) {
617 if (local_ports) {
618 put_resubmit(OFTABLE_LOCAL_OUTPUT, remote_ofpacts_p);
619 }
620 ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 100,
621 &match, remote_ofpacts_p);
622 }
623 }
624 sset_destroy(&remote_chassis);
625 }
626
627 void
628 physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve,
629 const struct ovsrec_bridge *br_int, const char *this_chassis_id,
630 const struct simap *ct_zones, struct hmap *flow_table,
631 struct hmap *local_datapaths, struct hmap *patched_datapaths)
632 {
633
634 /* This bool tracks physical mapping changes. */
635 bool physical_map_changed = false;
636
637 struct simap new_localvif_to_ofport =
638 SIMAP_INITIALIZER(&new_localvif_to_ofport);
639 struct simap new_tunnel_to_ofport =
640 SIMAP_INITIALIZER(&new_tunnel_to_ofport);
641 for (int i = 0; i < br_int->n_ports; i++) {
642 const struct ovsrec_port *port_rec = br_int->ports[i];
643 if (!strcmp(port_rec->name, br_int->name)) {
644 continue;
645 }
646
647 const char *chassis_id = smap_get(&port_rec->external_ids,
648 "ovn-chassis-id");
649 if (chassis_id && !strcmp(chassis_id, this_chassis_id)) {
650 continue;
651 }
652
653 const char *localnet = smap_get(&port_rec->external_ids,
654 "ovn-localnet-port");
655 const char *l2gateway = smap_get(&port_rec->external_ids,
656 "ovn-l2gateway-port");
657 const char *l3gateway = smap_get(&port_rec->external_ids,
658 "ovn-l3gateway-port");
659 const char *logpatch = smap_get(&port_rec->external_ids,
660 "ovn-logical-patch-port");
661
662 for (int j = 0; j < port_rec->n_interfaces; j++) {
663 const struct ovsrec_interface *iface_rec = port_rec->interfaces[j];
664
665 /* Get OpenFlow port number. */
666 if (!iface_rec->n_ofport) {
667 continue;
668 }
669 int64_t ofport = iface_rec->ofport[0];
670 if (ofport < 1 || ofport > ofp_to_u16(OFPP_MAX)) {
671 continue;
672 }
673
674 /* Record as patch to local net, logical patch port, chassis, or
675 * local logical port. */
676 bool is_patch = !strcmp(iface_rec->type, "patch");
677 if (is_patch && localnet) {
678 /* localnet patch ports can be handled just like VIFs. */
679 simap_put(&new_localvif_to_ofport, localnet, ofport);
680 break;
681 } else if (is_patch && l2gateway) {
682 /* L2 gateway patch ports can be handled just like VIFs. */
683 simap_put(&new_localvif_to_ofport, l2gateway, ofport);
684 break;
685 } else if (is_patch && l3gateway) {
686 /* L3 gateway patch ports can be handled just like VIFs. */
687 simap_put(&new_localvif_to_ofport, l3gateway, ofport);
688 break;
689 } else if (is_patch && logpatch) {
690 /* Logical patch ports can be handled just like VIFs. */
691 simap_put(&new_localvif_to_ofport, logpatch, ofport);
692 break;
693 } else if (chassis_id) {
694 enum chassis_tunnel_type tunnel_type;
695 if (!strcmp(iface_rec->type, "geneve")) {
696 tunnel_type = GENEVE;
697 if (!mff_ovn_geneve) {
698 continue;
699 }
700 } else if (!strcmp(iface_rec->type, "stt")) {
701 tunnel_type = STT;
702 } else if (!strcmp(iface_rec->type, "vxlan")) {
703 tunnel_type = VXLAN;
704 } else {
705 continue;
706 }
707
708 simap_put(&new_tunnel_to_ofport, chassis_id, ofport);
709 struct chassis_tunnel *tun = chassis_tunnel_find(chassis_id);
710 if (tun) {
711 /* If the tunnel's ofport has changed, update. */
712 if (tun->ofport != u16_to_ofp(ofport) ||
713 tun->type != tunnel_type) {
714 tun->ofport = u16_to_ofp(ofport);
715 tun->type = tunnel_type;
716 physical_map_changed = true;
717 }
718 } else {
719 tun = xmalloc(sizeof *tun);
720 hmap_insert(&tunnels, &tun->hmap_node,
721 hash_string(chassis_id, 0));
722 tun->chassis_id = chassis_id;
723 tun->ofport = u16_to_ofp(ofport);
724 tun->type = tunnel_type;
725 physical_map_changed = true;
726 }
727 break;
728 } else {
729 const char *iface_id = smap_get(&iface_rec->external_ids,
730 "iface-id");
731 if (iface_id) {
732 simap_put(&new_localvif_to_ofport, iface_id, ofport);
733 }
734 }
735 }
736 }
737
738 /* Remove tunnels that are no longer here. */
739 struct chassis_tunnel *tun, *tun_next;
740 HMAP_FOR_EACH_SAFE (tun, tun_next, hmap_node, &tunnels) {
741 if (!simap_find(&new_tunnel_to_ofport, tun->chassis_id)) {
742 hmap_remove(&tunnels, &tun->hmap_node);
743 physical_map_changed = true;
744 free(tun);
745 }
746 }
747
748 /* Capture changed or removed openflow ports. */
749 struct simap_node *vif_name, *vif_name_next;
750 SIMAP_FOR_EACH_SAFE (vif_name, vif_name_next, &localvif_to_ofport) {
751 int newport;
752 if ((newport = simap_get(&new_localvif_to_ofport, vif_name->name))) {
753 if (newport != simap_get(&localvif_to_ofport, vif_name->name)) {
754 simap_put(&localvif_to_ofport, vif_name->name, newport);
755 physical_map_changed = true;
756 }
757 } else {
758 simap_find_and_delete(&localvif_to_ofport, vif_name->name);
759 physical_map_changed = true;
760 }
761 }
762 SIMAP_FOR_EACH (vif_name, &new_localvif_to_ofport) {
763 if (!simap_get(&localvif_to_ofport, vif_name->name)) {
764 simap_put(&localvif_to_ofport, vif_name->name,
765 simap_get(&new_localvif_to_ofport, vif_name->name));
766 physical_map_changed = true;
767 }
768 }
769 if (physical_map_changed) {
770 /* Reprocess logical flow table immediately. */
771 poll_immediate_wake();
772 }
773
774 struct ofpbuf ofpacts;
775 ofpbuf_init(&ofpacts, 0);
776
777 /* Set up flows in table 0 for physical-to-logical translation and in table
778 * 64 for logical-to-physical translation. */
779 const struct sbrec_port_binding *binding;
780 SBREC_PORT_BINDING_FOR_EACH (binding, ctx->ovnsb_idl) {
781 /* Because it is possible in the above code to enter this
782 * for loop without having cleared the flow table first, we
783 * should clear the old flows to avoid collisions. */
784 consider_port_binding(mff_ovn_geneve, ct_zones, local_datapaths,
785 patched_datapaths, binding, &ofpacts,
786 flow_table);
787 }
788
789 /* Handle output to multicast groups, in tables 32 and 33. */
790 const struct sbrec_multicast_group *mc;
791 struct ofpbuf remote_ofpacts;
792 ofpbuf_init(&remote_ofpacts, 0);
793 SBREC_MULTICAST_GROUP_FOR_EACH (mc, ctx->ovnsb_idl) {
794 /* As multicast groups are always reprocessed each time,
795 * the first step is to clean the old flows for the group
796 * so that we avoid warning messages on collisions. */
797 consider_mc_group(mff_ovn_geneve, ct_zones,
798 local_datapaths, mc, &ofpacts, &remote_ofpacts,
799 flow_table);
800 }
801
802 ofpbuf_uninit(&remote_ofpacts);
803
804 /* Table 0, priority 100.
805 * ======================
806 *
807 * Process packets that arrive from a remote hypervisor (by matching
808 * on tunnel in_port). */
809
810 /* Add flows for Geneve and STT encapsulations. These
811 * encapsulations have metadata about the ingress and egress logical
812 * ports. We set MFF_LOG_DATAPATH, MFF_LOG_INPORT, and
813 * MFF_LOG_OUTPORT from the tunnel key data, then resubmit to table
814 * 33 to handle packets to the local hypervisor. */
815 HMAP_FOR_EACH (tun, hmap_node, &tunnels) {
816 struct match match = MATCH_CATCHALL_INITIALIZER;
817 match_set_in_port(&match, tun->ofport);
818
819 ofpbuf_clear(&ofpacts);
820 if (tun->type == GENEVE) {
821 put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, &ofpacts);
822 put_move(mff_ovn_geneve, 16, MFF_LOG_INPORT, 0, 15,
823 &ofpacts);
824 put_move(mff_ovn_geneve, 0, MFF_LOG_OUTPORT, 0, 16,
825 &ofpacts);
826 } else if (tun->type == STT) {
827 put_move(MFF_TUN_ID, 40, MFF_LOG_INPORT, 0, 15, &ofpacts);
828 put_move(MFF_TUN_ID, 24, MFF_LOG_OUTPORT, 0, 16, &ofpacts);
829 put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, &ofpacts);
830 } else if (tun->type == VXLAN) {
831 /* We'll handle VXLAN later. */
832 continue;
833 } else {
834 OVS_NOT_REACHED();
835 }
836
837 put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts);
838
839 ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 100, &match, &ofpacts);
840 }
841
842 /* Add flows for VXLAN encapsulations. Due to the limited amount of
843 * metadata, we only support VXLAN for connections to gateways. The
844 * VNI is used to populate MFF_LOG_DATAPATH. The gateway's logical
845 * port is set to MFF_LOG_INPORT. Then the packet is resubmitted to
846 * table 16 to determine the logical egress port. */
847 HMAP_FOR_EACH (tun, hmap_node, &tunnels) {
848 if (tun->type != VXLAN) {
849 continue;
850 }
851
852 SBREC_PORT_BINDING_FOR_EACH (binding, ctx->ovnsb_idl) {
853 struct match match = MATCH_CATCHALL_INITIALIZER;
854
855 if (!binding->chassis ||
856 strcmp(tun->chassis_id, binding->chassis->name)) {
857 continue;
858 }
859
860 match_set_in_port(&match, tun->ofport);
861 match_set_tun_id(&match, htonll(binding->datapath->tunnel_key));
862
863 ofpbuf_clear(&ofpacts);
864 put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, &ofpacts);
865 put_load(binding->tunnel_key, MFF_LOG_INPORT, 0, 15, &ofpacts);
866 /* For packets received from a vxlan tunnel, set a flag to that
867 * effect. */
868 put_load(1, MFF_LOG_FLAGS, MLF_RCV_FROM_VXLAN_BIT, 1, &ofpacts);
869 put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts);
870
871 ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 100, &match,
872 &ofpacts);
873 }
874 }
875
876 /* Table 32, Priority 0.
877 * =======================
878 *
879 * Resubmit packets that are not directed at tunnels or part of a
880 * multicast group to the local output table. */
881 struct match match;
882 match_init_catchall(&match);
883 ofpbuf_clear(&ofpacts);
884 put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts);
885 ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 0, &match, &ofpacts);
886
887 /* Table 34, Priority 0.
888 * =======================
889 *
890 * Resubmit packets that don't output to the ingress port (already checked
891 * in table 33) to the logical egress pipeline, clearing the logical
892 * registers (for consistent behavior with packets that get tunneled). */
893 match_init_catchall(&match);
894 ofpbuf_clear(&ofpacts);
895 for (int i = 0; i < MFF_N_LOG_REGS; i++) {
896 put_load(0, MFF_REG0 + i, 0, 32, &ofpacts);
897 }
898 put_resubmit(OFTABLE_LOG_EGRESS_PIPELINE, &ofpacts);
899 ofctrl_add_flow(flow_table, OFTABLE_CHECK_LOOPBACK, 0, &match, &ofpacts);
900
901 /* Table 64, Priority 0.
902 * =======================
903 *
904 * Resubmit packets that do not have the MLF_ALLOW_LOOPBACK flag set
905 * to table 65 for logical-to-physical translation. */
906 match_init_catchall(&match);
907 ofpbuf_clear(&ofpacts);
908 put_resubmit(OFTABLE_LOG_TO_PHY, &ofpacts);
909 ofctrl_add_flow(flow_table, OFTABLE_SAVE_INPORT, 0, &match, &ofpacts);
910
911 ofpbuf_uninit(&ofpacts);
912
913 simap_destroy(&new_localvif_to_ofport);
914 simap_destroy(&new_tunnel_to_ofport);
915 }