From: Gurucharan Shetty Date: Mon, 9 May 2016 20:44:34 +0000 (-0700) Subject: ovn: Introduce l3 gateway router. X-Git-Url: https://git.proxmox.com/?a=commitdiff_plain;h=c1645003c8acd3d28e1cef3597a6e931388e3190;p=ovs.git ovn: Introduce l3 gateway router. Currently OVN has distributed switches and routers. When a packet exits a container or a VM, the entire lifecycle of the packet through multiple switches and routers are calculated in source chassis itself. When the destination endpoint resides on a different chassis, the packet is sent to the other chassis and it only goes through the egress pipeline of that chassis once and eventually to the real destination. When the packet returns back, the same thing happens. The return packet leaves the VM/container on the chassis where it resides. The packet goes through all the switches and routers in the logical pipleline on that chassis and then sent to the eventual destination over the tunnel. The above makes the logical pipeline very flexible and easy. But, creates a problem for cases where you need to add stateful services (via conntrack) on switches and routers. For l3 gateways, we plan to leverage DNAT and SNAT functionality and we want to apply DNAT and SNAT rules on a router. So we ideally need the packet to go through that router in both directions in the same chassis. To achieve this, this commit introduces a new gateway router which is static and can be connected to your distributed router via a switch. To make minimal changes in OVN's logical pipeline, this commit tries to make the switch port connected to a l3 gateway router look like a container/VM endpoint for every other chassis except the chassis on which the l3 gateway router resides. On the chassis where the gateway router resides, the connection looks just like a patch port. This is achieved by the doing the following: Introduces a new type of port_binding record called 'gateway'. On the chassis where the gateway router resides, this port behaves just like the port of type 'patch'. The ovn-controller on that chassis populates the "chassis" column for this record as an indication for other ovn-controllers of its physical location. Other ovn-controllers treat this port as they would treat a VM/Container port on a different chassis. Signed-off-by: Gurucharan Shetty Acked-by: Ben Pfaff --- diff --git a/ovn/controller/binding.c b/ovn/controller/binding.c index a0d8b969a..e5e55b1b3 100644 --- a/ovn/controller/binding.c +++ b/ovn/controller/binding.c @@ -200,7 +200,8 @@ binding_run(struct controller_ctx *ctx, const struct ovsrec_bridge *br_int, } sbrec_port_binding_set_chassis(binding_rec, chassis_rec); } - } else if (chassis_rec && binding_rec->chassis == chassis_rec) { + } else if (chassis_rec && binding_rec->chassis == chassis_rec + && strcmp(binding_rec->type, "gateway")) { if (ctx->ovnsb_idl_txn) { VLOG_INFO("Releasing lport %s from this chassis.", binding_rec->logical_port); diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c index 511b18469..bc4c24f8f 100644 --- a/ovn/controller/ovn-controller.c +++ b/ovn/controller/ovn-controller.c @@ -364,8 +364,9 @@ main(int argc, char *argv[]) &local_datapaths); } - if (br_int) { - patch_run(&ctx, br_int, &local_datapaths, &patched_datapaths); + if (br_int && chassis_id) { + patch_run(&ctx, br_int, chassis_id, &local_datapaths, + &patched_datapaths); struct lport_index lports; struct mcgroup_index mcgroups; diff --git a/ovn/controller/patch.c b/ovn/controller/patch.c index 4808146e9..e8abe3002 100644 --- a/ovn/controller/patch.c +++ b/ovn/controller/patch.c @@ -267,12 +267,28 @@ add_patched_datapath(struct hmap *patched_datapaths, static void add_logical_patch_ports(struct controller_ctx *ctx, const struct ovsrec_bridge *br_int, + const char *local_chassis_id, struct shash *existing_ports, struct hmap *patched_datapaths) { + const struct sbrec_chassis *chassis_rec; + chassis_rec = get_chassis(ctx->ovnsb_idl, local_chassis_id); + if (!chassis_rec) { + return; + } + const struct sbrec_port_binding *binding; SBREC_PORT_BINDING_FOR_EACH (binding, ctx->ovnsb_idl) { - if (!strcmp(binding->type, "patch")) { + bool local_port = false; + if (!strcmp(binding->type, "gateway")) { + const char *chassis = smap_get(&binding->options, + "gateway-chassis"); + if (!strcmp(local_chassis_id, chassis)) { + local_port = true; + } + } + + if (!strcmp(binding->type, "patch") || local_port) { const char *local = binding->logical_port; const char *peer = smap_get(&binding->options, "peer"); if (!peer) { @@ -287,13 +303,19 @@ add_logical_patch_ports(struct controller_ctx *ctx, free(dst_name); free(src_name); add_patched_datapath(patched_datapaths, binding); + if (local_port) { + if (binding->chassis != chassis_rec && ctx->ovnsb_idl_txn) { + sbrec_port_binding_set_chassis(binding, chassis_rec); + } + } } } } void patch_run(struct controller_ctx *ctx, const struct ovsrec_bridge *br_int, - struct hmap *local_datapaths, struct hmap *patched_datapaths) + const char *chassis_id, struct hmap *local_datapaths, + struct hmap *patched_datapaths) { if (!ctx->ovs_idl_txn) { return; @@ -313,7 +335,8 @@ patch_run(struct controller_ctx *ctx, const struct ovsrec_bridge *br_int, * 'existing_ports' any patch ports that do exist in the database and * should be there. */ add_bridge_mappings(ctx, br_int, &existing_ports, local_datapaths); - add_logical_patch_ports(ctx, br_int, &existing_ports, patched_datapaths); + add_logical_patch_ports(ctx, br_int, chassis_id, &existing_ports, + patched_datapaths); /* Now 'existing_ports' only still contains patch ports that exist in the * database but shouldn't. Delete them from the database. */ diff --git a/ovn/controller/patch.h b/ovn/controller/patch.h index d5d842e6e..7920a485b 100644 --- a/ovn/controller/patch.h +++ b/ovn/controller/patch.h @@ -27,6 +27,7 @@ struct hmap; struct ovsrec_bridge; void patch_run(struct controller_ctx *, const struct ovsrec_bridge *br_int, - struct hmap *local_datapaths, struct hmap *patched_datapaths); + const char *chassis_id, struct hmap *local_datapaths, + struct hmap *patched_datapaths); #endif /* ovn/patch.h */ diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c index 3932417ba..a8dd2bb0c 100644 --- a/ovn/northd/ovn-northd.c +++ b/ovn/northd/ovn-northd.c @@ -690,11 +690,24 @@ ovn_port_update_sbrec(const struct ovn_port *op) { sbrec_port_binding_set_datapath(op->sb, op->od->sb); if (op->nbr) { - sbrec_port_binding_set_type(op->sb, "patch"); + /* If the router is for l3 gateway, it resides on a chassis + * and its port type is "gateway". */ + const char *chassis = smap_get(&op->od->nbr->options, "chassis"); + if (chassis) { + sbrec_port_binding_set_type(op->sb, "gateway"); + } else { + sbrec_port_binding_set_type(op->sb, "patch"); + } const char *peer = op->peer ? op->peer->key : ""; - const struct smap ids = SMAP_CONST1(&ids, "peer", peer); - sbrec_port_binding_set_options(op->sb, &ids); + struct smap new; + smap_init(&new); + smap_add(&new, "peer", peer); + if (chassis) { + smap_add(&new, "gateway-chassis", chassis); + } + sbrec_port_binding_set_options(op->sb, &new); + smap_destroy(&new); sbrec_port_binding_set_parent_port(op->sb, NULL); sbrec_port_binding_set_tag(op->sb, NULL, 0); @@ -704,15 +717,32 @@ ovn_port_update_sbrec(const struct ovn_port *op) sbrec_port_binding_set_type(op->sb, op->nbs->type); sbrec_port_binding_set_options(op->sb, &op->nbs->options); } else { - sbrec_port_binding_set_type(op->sb, "patch"); + const char *chassis = NULL; + if (op->peer && op->peer->od && op->peer->od->nbr) { + chassis = smap_get(&op->peer->od->nbr->options, "chassis"); + } + + /* A switch port connected to a gateway router is also of + * type "gateway". */ + if (chassis) { + sbrec_port_binding_set_type(op->sb, "gateway"); + } else { + sbrec_port_binding_set_type(op->sb, "patch"); + } const char *router_port = smap_get(&op->nbs->options, "router-port"); if (!router_port) { router_port = ""; } - const struct smap ids = SMAP_CONST1(&ids, "peer", router_port); - sbrec_port_binding_set_options(op->sb, &ids); + struct smap new; + smap_init(&new); + smap_add(&new, "peer", router_port); + if (chassis) { + smap_add(&new, "gateway-chassis", chassis); + } + sbrec_port_binding_set_options(op->sb, &new); + smap_destroy(&new); } sbrec_port_binding_set_parent_port(op->sb, op->nbs->parent_name); sbrec_port_binding_set_tag(op->sb, op->nbs->tag, op->nbs->n_tag); diff --git a/ovn/ovn-nb.ovsschema b/ovn/ovn-nb.ovsschema index 8163f6a47..fa21b3046 100644 --- a/ovn/ovn-nb.ovsschema +++ b/ovn/ovn-nb.ovsschema @@ -1,7 +1,7 @@ { "name": "OVN_Northbound", - "version": "2.1.1", - "cksum": "2615511875 5108", + "version": "2.1.2", + "cksum": "429668869 5325", "tables": { "Logical_Switch": { "columns": { @@ -78,6 +78,11 @@ "max": "unlimited"}}, "default_gw": {"type": {"key": "string", "min": 0, "max": 1}}, "enabled": {"type": {"key": "boolean", "min": 0, "max": 1}}, + "options": { + "type": {"key": "string", + "value": "string", + "min": 0, + "max": "unlimited"}}, "external_ids": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}}, diff --git a/ovn/ovn-nb.xml b/ovn/ovn-nb.xml index d7fd59590..41092f144 100644 --- a/ovn/ovn-nb.xml +++ b/ovn/ovn-nb.xml @@ -630,6 +630,21 @@ column is set to false, the router is disabled. A disabled router has all ingress and egress traffic dropped. + + +

+ Additional options for the logical router. +

+ + + If set, indicates that the logical router in question is + a Gateway router (which is centralized) and resides in the set + chassis. The same value is also used by ovn-controller + to uniquely identify the chassis in the OVN deployment and + comes from external_ids:system-id in the + Open_vSwitch table of Open_vSwitch database. + +
diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml index efd2f9a00..1231b4ebb 100644 --- a/ovn/ovn-sb.xml +++ b/ovn/ovn-sb.xml @@ -1220,7 +1220,12 @@ tcp.flags = RST; which ovn-controller/ovn-controller-vtep in turn finds out by monitoring the local hypervisor's Open_vSwitch database, which identifies logical ports via the conventions described - in IntegrationGuide.md. + in IntegrationGuide.md. (The exceptions are for + Port_Binding records with type of + gateway, whose locations are identified by + ovn-northd via the options:gateway-chassis + column in this table. ovn-controller is still responsible + to populate the chassis column.)

@@ -1298,6 +1303,14 @@ tcp.flags = RST; a logical router to a logical switch or to another logical router. +

gateway
+
+ One of a pair of logical ports that act as if connected by a patch + cable across multiple chassis. Useful for connecting a logical + switch with a Gateway router (which is only resident on a + particular chassis). +
+
localnet
A connection to a locally accessible network from each @@ -1336,6 +1349,26 @@ tcp.flags = RST; + +

+ These options apply to logical ports with of + gateway. +

+ + + The in the + record for the other side of the 'gateway' port. The named must specify this + in its own peer option. That is, the two 'gateway' + logical ports must have reversed and + peer values. + + + + The chassis in which the port resides. + +
+

These options apply to logical ports with of diff --git a/tests/ovn.at b/tests/ovn.at index 1990d3744..059c96981 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -2870,3 +2870,187 @@ OVS_APP_EXIT_AND_WAIT([ovs-vswitchd]) OVS_APP_EXIT_AND_WAIT([ovsdb-server]) AT_CLEANUP + + +AT_SETUP([ovn -- 2 HVs, 2 LRs connected via LS, gateway router]) +AT_KEYWORDS([ovngatewayrouter]) +AT_SKIP_IF([test $HAVE_PYTHON = no]) +ovn_start + +# Logical network: +# Two LRs - R1 and R2 that are connected to each other via LS "join" +# in 20.0.0.0/24 network. R1 has switchess foo (192.168.1.0/24) +# connected to it. R2 has alice (172.16.1.0/24) connected to it. +# R2 is a gateway router. + + + +# Create two hypervisor and create OVS ports corresponding to logical ports. +net_add n1 + +sim_add hv1 +as hv1 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.1 +ovs-vsctl -- add-port br-int hv1-vif1 -- \ + set interface hv1-vif1 external-ids:iface-id=foo1 \ + options:tx_pcap=hv1/vif1-tx.pcap \ + options:rxq_pcap=hv1/vif1-rx.pcap \ + ofport-request=1 + + +sim_add hv2 +as hv2 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.2 +ovs-vsctl -- add-port br-int hv2-vif1 -- \ + set interface hv2-vif1 external-ids:iface-id=alice1 \ + options:tx_pcap=hv2/vif1-tx.pcap \ + options:rxq_pcap=hv2/vif1-rx.pcap \ + ofport-request=1 + +# Pre-populate the hypervisors' ARP tables so that we don't lose any +# packets for ARP resolution (native tunneling doesn't queue packets +# for ARP resolution). +ovn_populate_arp + +ovn-nbctl create Logical_Router name=R1 +ovn-nbctl create Logical_Router name=R2 options:chassis="hv2" + +ovn-nbctl lswitch-add foo +ovn-nbctl lswitch-add alice +ovn-nbctl lswitch-add join + +# Connect foo to R1 +ovn-nbctl -- --id=@lrp create Logical_Router_port name=foo \ +network=192.168.1.1/24 mac=\"00:00:01:01:02:03\" -- add Logical_Router R1 \ +ports @lrp -- lport-add foo rp-foo + +ovn-nbctl set Logical_port rp-foo type=router options:router-port=foo \ +addresses=\"00:00:01:01:02:03\" + +# Connect alice to R2 +ovn-nbctl -- --id=@lrp create Logical_Router_port name=alice \ +network=172.16.1.1/24 mac=\"00:00:02:01:02:03\" -- add Logical_Router R2 \ +ports @lrp -- lport-add alice rp-alice + +ovn-nbctl set Logical_port rp-alice type=router options:router-port=alice \ +addresses=\"00:00:02:01:02:03\" + + +# Connect R1 to join +ovn-nbctl -- --id=@lrp create Logical_Router_port name=R1_join \ +network=20.0.0.1/24 mac=\"00:00:04:01:02:03\" -- add Logical_Router R1 \ +ports @lrp -- lport-add join r1-join + +ovn-nbctl set Logical_port r1-join type=router options:router-port=R1_join \ +addresses='"00:00:04:01:02:03"' + +# Connect R2 to join +ovn-nbctl -- --id=@lrp create Logical_Router_port name=R2_join \ +network=20.0.0.2/24 mac=\"00:00:04:01:02:04\" -- add Logical_Router R2 \ +ports @lrp -- lport-add join r2-join + +ovn-nbctl set Logical_port r2-join type=router options:router-port=R2_join \ +addresses='"00:00:04:01:02:04"' + + +#install static routes +ovn-nbctl -- --id=@lrt create Logical_Router_Static_Route \ +ip_prefix=172.16.1.0/24 nexthop=20.0.0.2 -- add Logical_Router \ +R1 static_routes @lrt + +ovn-nbctl -- --id=@lrt create Logical_Router_Static_Route \ +ip_prefix=192.168.1.0/24 nexthop=20.0.0.1 -- add Logical_Router \ +R2 static_routes @lrt + +# Create logical port foo1 in foo +ovn-nbctl lport-add foo foo1 \ +-- lport-set-addresses foo1 "f0:00:00:01:02:03 192.168.1.2" + +# Create logical port alice1 in alice +ovn-nbctl lport-add alice alice1 \ +-- lport-set-addresses alice1 "f0:00:00:01:02:04 172.16.1.2" + + +# Allow some time for ovn-northd and ovn-controller to catch up. +# XXX This should be more systematic. +sleep 2 + +ip_to_hex() { + printf "%02x%02x%02x%02x" "$@" +} +trim_zeros() { + sed 's/\(00\)\{1,\}$//' +} + +# Send ip packets between foo1 and alice1 +src_mac="f00000010203" +dst_mac="000001010203" +src_ip=`ip_to_hex 192 168 1 2` +dst_ip=`ip_to_hex 172 16 1 2` +packet=${dst_mac}${src_mac}08004500001c0000000040110000${src_ip}${dst_ip}0035111100080000 + +echo "---------NB dump-----" +ovn-nbctl show +echo "---------------------" +ovn-nbctl list logical_router +echo "---------------------" +ovn-nbctl list logical_router_port +echo "---------------------" + +echo "---------SB dump-----" +ovn-sbctl list datapath_binding +echo "---------------------" +ovn-sbctl list port_binding +echo "---------------------" +ovn-sbctl dump-flows +echo "---------------------" +ovn-sbctl list chassis +ovn-sbctl list encap +echo "---------------------" + +echo "------ hv1 dump ----------" +as hv1 ovs-ofctl show br-int +as hv1 ovs-ofctl dump-flows br-int +echo "------ hv2 dump ----------" +as hv2 ovs-ofctl show br-int +as hv2 ovs-ofctl dump-flows br-int +echo "----------------------------" + +# Packet to Expect at alice1 +src_mac="000002010203" +dst_mac="f00000010204" +src_ip=`ip_to_hex 192 168 1 2` +dst_ip=`ip_to_hex 172 16 1 2` +expected=${dst_mac}${src_mac}08004500001c000000003e110200${src_ip}${dst_ip}0035111100080000 + + +as hv1 ovs-appctl netdev-dummy/receive hv1-vif1 $packet +as hv1 ovs-appctl ofproto/trace br-int in_port=1 $packet + +$PYTHON "$top_srcdir/utilities/ovs-pcap.in" hv2/vif1-tx.pcap | trim_zeros > received1.packets +echo $expected | trim_zeros > expout +AT_CHECK([cat received1.packets], [0], [expout]) + +for sim in hv1 hv2; do + as $sim + OVS_APP_EXIT_AND_WAIT([ovn-controller]) + OVS_APP_EXIT_AND_WAIT([ovs-vswitchd]) + OVS_APP_EXIT_AND_WAIT([ovsdb-server]) +done + +as ovn-sb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as ovn-nb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as northd +OVS_APP_EXIT_AND_WAIT([ovn-northd]) + +as main +OVS_APP_EXIT_AND_WAIT([ovs-vswitchd]) +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +AT_CLEANUP