]> git.proxmox.com Git - ovs.git/commitdiff
ovn: Introduce l3 gateway router.
authorGurucharan Shetty <guru@ovn.org>
Mon, 9 May 2016 20:44:34 +0000 (13:44 -0700)
committerGurucharan Shetty <guru@ovn.org>
Fri, 3 Jun 2016 02:30:33 +0000 (19:30 -0700)
Currently OVN has distributed switches and routers. When a packet
exits a container or a VM, the entire lifecycle of the packet
through multiple switches and routers are calculated in source
chassis itself. When the destination endpoint resides on a different
chassis, the packet is sent to the other chassis and it only goes
through the egress pipeline of that chassis once and eventually to
the real destination.

When the packet returns back, the same thing happens. The return
packet leaves the VM/container on the chassis where it resides.
The packet goes through all the switches and routers in the logical
pipleline on that chassis and then sent to the eventual destination
over the tunnel.

The above makes the logical pipeline very flexible and easy. But,
creates a problem for cases where you need to add stateful services
(via conntrack) on switches and routers.

For l3 gateways, we plan to leverage DNAT and SNAT functionality
and we want to apply DNAT and SNAT rules on a router. So we ideally need
the packet to go through that router in both directions in the same
chassis. To achieve this, this commit introduces a new gateway router which is
static and can be connected to your distributed router via a switch.

To make minimal changes in OVN's logical pipeline, this commit
tries to make the switch port connected to a l3 gateway router look like
a container/VM endpoint for every other chassis except the chassis
on which the l3 gateway router resides. On the chassis where the
gateway router resides, the connection looks just like a patch port.

This is achieved by the doing the following:
Introduces a new type of port_binding record called 'gateway'.
On the chassis where the gateway router resides, this port behaves just
like the port of type 'patch'. The ovn-controller on that chassis
populates the "chassis" column for this record as an indication for
other ovn-controllers of its physical location. Other ovn-controllers
treat this port as they would treat a VM/Container port on a different
chassis.

Signed-off-by: Gurucharan Shetty <guru@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>
ovn/controller/binding.c
ovn/controller/ovn-controller.c
ovn/controller/patch.c
ovn/controller/patch.h
ovn/northd/ovn-northd.c
ovn/ovn-nb.ovsschema
ovn/ovn-nb.xml
ovn/ovn-sb.xml
tests/ovn.at

index a0d8b969a1451e76bd287a64c5ba52a65ac6e490..e5e55b1b305b081229651c835cb687a99e3f8495 100644 (file)
@@ -200,7 +200,8 @@ binding_run(struct controller_ctx *ctx, const struct ovsrec_bridge *br_int,
                 }
                 sbrec_port_binding_set_chassis(binding_rec, chassis_rec);
             }
-        } else if (chassis_rec && binding_rec->chassis == chassis_rec) {
+        } else if (chassis_rec && binding_rec->chassis == chassis_rec
+                   && strcmp(binding_rec->type, "gateway")) {
             if (ctx->ovnsb_idl_txn) {
                 VLOG_INFO("Releasing lport %s from this chassis.",
                           binding_rec->logical_port);
index 511b18469f4d203471dd7899a48d53a9824de8c4..bc4c24f8f977e38b825a8ea76b673d81fe953b4e 100644 (file)
@@ -364,8 +364,9 @@ main(int argc, char *argv[])
                     &local_datapaths);
         }
 
-        if (br_int) {
-            patch_run(&ctx, br_int, &local_datapaths, &patched_datapaths);
+        if (br_int && chassis_id) {
+            patch_run(&ctx, br_int, chassis_id, &local_datapaths,
+                      &patched_datapaths);
 
             struct lport_index lports;
             struct mcgroup_index mcgroups;
index 4808146e9e90a7a6c604203ad350263c8ab12e59..e8abe3002a786cfaa52dfcdbbc5f8b1a507d73ab 100644 (file)
@@ -267,12 +267,28 @@ add_patched_datapath(struct hmap *patched_datapaths,
 static void
 add_logical_patch_ports(struct controller_ctx *ctx,
                         const struct ovsrec_bridge *br_int,
+                        const char *local_chassis_id,
                         struct shash *existing_ports,
                         struct hmap *patched_datapaths)
 {
+    const struct sbrec_chassis *chassis_rec;
+    chassis_rec = get_chassis(ctx->ovnsb_idl, local_chassis_id);
+    if (!chassis_rec) {
+        return;
+    }
+
     const struct sbrec_port_binding *binding;
     SBREC_PORT_BINDING_FOR_EACH (binding, ctx->ovnsb_idl) {
-        if (!strcmp(binding->type, "patch")) {
+        bool local_port = false;
+        if (!strcmp(binding->type, "gateway")) {
+            const char *chassis = smap_get(&binding->options,
+                                           "gateway-chassis");
+            if (!strcmp(local_chassis_id, chassis)) {
+                local_port = true;
+            }
+        }
+
+        if (!strcmp(binding->type, "patch") || local_port) {
             const char *local = binding->logical_port;
             const char *peer = smap_get(&binding->options, "peer");
             if (!peer) {
@@ -287,13 +303,19 @@ add_logical_patch_ports(struct controller_ctx *ctx,
             free(dst_name);
             free(src_name);
             add_patched_datapath(patched_datapaths, binding);
+            if (local_port) {
+                if (binding->chassis != chassis_rec && ctx->ovnsb_idl_txn) {
+                    sbrec_port_binding_set_chassis(binding, chassis_rec);
+                }
+            }
         }
     }
 }
 
 void
 patch_run(struct controller_ctx *ctx, const struct ovsrec_bridge *br_int,
-          struct hmap *local_datapaths, struct hmap *patched_datapaths)
+          const char *chassis_id, struct hmap *local_datapaths,
+          struct hmap *patched_datapaths)
 {
     if (!ctx->ovs_idl_txn) {
         return;
@@ -313,7 +335,8 @@ patch_run(struct controller_ctx *ctx, const struct ovsrec_bridge *br_int,
      * 'existing_ports' any patch ports that do exist in the database and
      * should be there. */
     add_bridge_mappings(ctx, br_int, &existing_ports, local_datapaths);
-    add_logical_patch_ports(ctx, br_int, &existing_ports, patched_datapaths);
+    add_logical_patch_ports(ctx, br_int, chassis_id, &existing_ports,
+                            patched_datapaths);
 
     /* Now 'existing_ports' only still contains patch ports that exist in the
      * database but shouldn't.  Delete them from the database. */
index d5d842e6eff3929407c4d64ee2deb90d6bc47c9a..7920a485b76d8cb0dade35e9d08e25783955b4e5 100644 (file)
@@ -27,6 +27,7 @@ struct hmap;
 struct ovsrec_bridge;
 
 void patch_run(struct controller_ctx *, const struct ovsrec_bridge *br_int,
-               struct hmap *local_datapaths, struct hmap *patched_datapaths);
+               const char *chassis_id, struct hmap *local_datapaths,
+               struct hmap *patched_datapaths);
 
 #endif /* ovn/patch.h */
index 3932417bac3a5e08ce155764792f15e0b965a6d2..a8dd2bb0caa46d977e43f2ca7c7d4202fd073632 100644 (file)
@@ -690,11 +690,24 @@ ovn_port_update_sbrec(const struct ovn_port *op)
 {
     sbrec_port_binding_set_datapath(op->sb, op->od->sb);
     if (op->nbr) {
-        sbrec_port_binding_set_type(op->sb, "patch");
+        /* If the router is for l3 gateway, it resides on a chassis
+         * and its port type is "gateway". */
+        const char *chassis = smap_get(&op->od->nbr->options, "chassis");
+        if (chassis) {
+            sbrec_port_binding_set_type(op->sb, "gateway");
+        } else {
+            sbrec_port_binding_set_type(op->sb, "patch");
+        }
 
         const char *peer = op->peer ? op->peer->key : "<error>";
-        const struct smap ids = SMAP_CONST1(&ids, "peer", peer);
-        sbrec_port_binding_set_options(op->sb, &ids);
+        struct smap new;
+        smap_init(&new);
+        smap_add(&new, "peer", peer);
+        if (chassis) {
+            smap_add(&new, "gateway-chassis", chassis);
+        }
+        sbrec_port_binding_set_options(op->sb, &new);
+        smap_destroy(&new);
 
         sbrec_port_binding_set_parent_port(op->sb, NULL);
         sbrec_port_binding_set_tag(op->sb, NULL, 0);
@@ -704,15 +717,32 @@ ovn_port_update_sbrec(const struct ovn_port *op)
             sbrec_port_binding_set_type(op->sb, op->nbs->type);
             sbrec_port_binding_set_options(op->sb, &op->nbs->options);
         } else {
-            sbrec_port_binding_set_type(op->sb, "patch");
+            const char *chassis = NULL;
+            if (op->peer && op->peer->od && op->peer->od->nbr) {
+                chassis = smap_get(&op->peer->od->nbr->options, "chassis");
+            }
+
+            /* A switch port connected to a gateway router is also of
+             * type "gateway". */
+            if (chassis) {
+                sbrec_port_binding_set_type(op->sb, "gateway");
+            } else {
+                sbrec_port_binding_set_type(op->sb, "patch");
+            }
 
             const char *router_port = smap_get(&op->nbs->options,
                                                "router-port");
             if (!router_port) {
                 router_port = "<error>";
             }
-            const struct smap ids = SMAP_CONST1(&ids, "peer", router_port);
-            sbrec_port_binding_set_options(op->sb, &ids);
+            struct smap new;
+            smap_init(&new);
+            smap_add(&new, "peer", router_port);
+            if (chassis) {
+                smap_add(&new, "gateway-chassis", chassis);
+            }
+            sbrec_port_binding_set_options(op->sb, &new);
+            smap_destroy(&new);
         }
         sbrec_port_binding_set_parent_port(op->sb, op->nbs->parent_name);
         sbrec_port_binding_set_tag(op->sb, op->nbs->tag, op->nbs->n_tag);
index 8163f6a47a56c545da43f43fca6752bc18e048f9..fa21b304632e4d8a4a191002b05fb3e1137381fe 100644 (file)
@@ -1,7 +1,7 @@
 {
     "name": "OVN_Northbound",
-    "version": "2.1.1",
-    "cksum": "2615511875 5108",
+    "version": "2.1.2",
+    "cksum": "429668869 5325",
     "tables": {
         "Logical_Switch": {
             "columns": {
                                    "max": "unlimited"}},
                 "default_gw": {"type": {"key": "string", "min": 0, "max": 1}},
                 "enabled": {"type": {"key": "boolean", "min": 0, "max": 1}},
+                "options": {
+                     "type": {"key": "string",
+                              "value": "string",
+                              "min": 0,
+                              "max": "unlimited"}},
                 "external_ids": {
                     "type": {"key": "string", "value": "string",
                              "min": 0, "max": "unlimited"}}},
index d7fd59590551d00342c3fa9c42e530f26d04e67c..41092f144cb25608a2a185dbe55d5634a69d902a 100644 (file)
       column is set to <code>false</code>, the router is disabled.  A disabled
       router has all ingress and egress traffic dropped.
     </column>
+
+    <group title="Options">
+      <p>
+        Additional options for the logical router.
+      </p>
+
+      <column name="options" key="chassis">
+        If set, indicates that the logical router in question is
+        a Gateway router (which is centralized) and resides in the set
+        chassis.  The same value is also used by <code>ovn-controller</code>
+        to uniquely identify the chassis in the OVN deployment and
+        comes from <code>external_ids:system-id</code> in the
+        <code>Open_vSwitch</code> table of Open_vSwitch database.
+      </column>
+    </group>
     
     <group title="Common Columns">
       <column name="external_ids">
index efd2f9a00d4b4f0e7ebc7282f21796211682d9dc..1231b4ebb260e6604143587a03842f94a38ba475 100644 (file)
@@ -1220,7 +1220,12 @@ tcp.flags = RST;
       which <code>ovn-controller</code>/<code>ovn-controller-vtep</code> in
       turn finds out by monitoring the local hypervisor's Open_vSwitch
       database, which identifies logical ports via the conventions described
-      in <code>IntegrationGuide.md</code>.
+      in <code>IntegrationGuide.md</code>.  (The exceptions are for
+      <code>Port_Binding</code> records with <code>type</code> of
+      <code>gateway</code>, whose locations are identified by
+      <code>ovn-northd</code> via the <code>options:gateway-chassis</code>
+      column in this table.  <code>ovn-controller</code> is still responsible
+      to populate the <code>chassis</code> column.)
     </p>
 
     <p>
@@ -1298,6 +1303,14 @@ tcp.flags = RST;
             a logical router to a logical switch or to another logical router.
           </dd>
 
+          <dt><code>gateway</code></dt>
+          <dd>
+            One of a pair of logical ports that act as if connected by a patch
+            cable across multiple chassis.  Useful for connecting a logical
+            switch with a Gateway router (which is only resident on a
+            particular chassis).
+          </dd>
+
           <dt><code>localnet</code></dt>
           <dd>
             A connection to a locally accessible network from each
@@ -1336,6 +1349,26 @@ tcp.flags = RST;
       </column>
     </group>
 
+    <group title="Gateway Options">
+      <p>
+        These options apply to logical ports with <ref column="type"/> of
+        <code>gateway</code>.
+      </p>
+
+      <column name="options" key="peer">
+        The <ref column="logical_port"/> in the <ref table="Port_Binding"/>
+        record for the other side of the 'gateway' port.  The named <ref
+        column="logical_port"/> must specify this <ref column="logical_port"/>
+        in its own <code>peer</code> option.  That is, the two 'gateway'
+        logical ports must have reversed <ref column="logical_port"/> and
+        <code>peer</code> values.
+      </column>
+
+      <column name="options" key="gateway-chassis">
+        The <code>chassis</code> in which the port resides.
+      </column>
+    </group>
+
     <group title="Localnet Options">
       <p>
         These options apply to logical ports with <ref column="type"/> of
index 1990d37449d94d0ec38f7eebbd79b817679d679b..059c96981270fce5ddde8be000cf41c7b9d4b914 100644 (file)
@@ -2870,3 +2870,187 @@ OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
 OVS_APP_EXIT_AND_WAIT([ovsdb-server])
 
 AT_CLEANUP
+
+
+AT_SETUP([ovn -- 2 HVs, 2 LRs connected via LS, gateway router])
+AT_KEYWORDS([ovngatewayrouter])
+AT_SKIP_IF([test $HAVE_PYTHON = no])
+ovn_start
+
+# Logical network:
+# Two LRs - R1 and R2 that are connected to each other via LS "join"
+# in 20.0.0.0/24 network. R1 has switchess foo (192.168.1.0/24)
+# connected to it. R2 has alice (172.16.1.0/24) connected to it.
+# R2 is a gateway router.
+
+
+
+# Create two hypervisor and create OVS ports corresponding to logical ports.
+net_add n1
+
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+ovs-vsctl -- add-port br-int hv1-vif1 -- \
+    set interface hv1-vif1 external-ids:iface-id=foo1 \
+    options:tx_pcap=hv1/vif1-tx.pcap \
+    options:rxq_pcap=hv1/vif1-rx.pcap \
+    ofport-request=1
+
+
+sim_add hv2
+as hv2
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.2
+ovs-vsctl -- add-port br-int hv2-vif1 -- \
+    set interface hv2-vif1 external-ids:iface-id=alice1 \
+    options:tx_pcap=hv2/vif1-tx.pcap \
+    options:rxq_pcap=hv2/vif1-rx.pcap \
+    ofport-request=1
+
+# Pre-populate the hypervisors' ARP tables so that we don't lose any
+# packets for ARP resolution (native tunneling doesn't queue packets
+# for ARP resolution).
+ovn_populate_arp
+
+ovn-nbctl create Logical_Router name=R1
+ovn-nbctl create Logical_Router name=R2 options:chassis="hv2"
+
+ovn-nbctl lswitch-add foo
+ovn-nbctl lswitch-add alice
+ovn-nbctl lswitch-add join
+
+# Connect foo to R1
+ovn-nbctl -- --id=@lrp create Logical_Router_port name=foo \
+network=192.168.1.1/24 mac=\"00:00:01:01:02:03\" -- add Logical_Router R1 \
+ports @lrp -- lport-add foo rp-foo
+
+ovn-nbctl set Logical_port rp-foo type=router options:router-port=foo \
+addresses=\"00:00:01:01:02:03\"
+
+# Connect alice to R2
+ovn-nbctl -- --id=@lrp create Logical_Router_port name=alice \
+network=172.16.1.1/24 mac=\"00:00:02:01:02:03\" -- add Logical_Router R2 \
+ports @lrp -- lport-add alice rp-alice
+
+ovn-nbctl set Logical_port rp-alice type=router options:router-port=alice \
+addresses=\"00:00:02:01:02:03\"
+
+
+# Connect R1 to join
+ovn-nbctl -- --id=@lrp create Logical_Router_port name=R1_join \
+network=20.0.0.1/24 mac=\"00:00:04:01:02:03\" -- add Logical_Router R1 \
+ports @lrp -- lport-add join r1-join
+
+ovn-nbctl set Logical_port r1-join type=router options:router-port=R1_join \
+addresses='"00:00:04:01:02:03"'
+
+# Connect R2 to join
+ovn-nbctl -- --id=@lrp create Logical_Router_port name=R2_join \
+network=20.0.0.2/24 mac=\"00:00:04:01:02:04\" -- add Logical_Router R2 \
+ports @lrp -- lport-add join r2-join
+
+ovn-nbctl set Logical_port r2-join type=router options:router-port=R2_join \
+addresses='"00:00:04:01:02:04"'
+
+
+#install static routes
+ovn-nbctl -- --id=@lrt create Logical_Router_Static_Route \
+ip_prefix=172.16.1.0/24 nexthop=20.0.0.2 -- add Logical_Router \
+R1 static_routes @lrt
+
+ovn-nbctl -- --id=@lrt create Logical_Router_Static_Route \
+ip_prefix=192.168.1.0/24 nexthop=20.0.0.1 -- add Logical_Router \
+R2 static_routes @lrt
+
+# Create logical port foo1 in foo
+ovn-nbctl lport-add foo foo1 \
+-- lport-set-addresses foo1 "f0:00:00:01:02:03 192.168.1.2"
+
+# Create logical port alice1 in alice
+ovn-nbctl lport-add alice alice1 \
+-- lport-set-addresses alice1 "f0:00:00:01:02:04 172.16.1.2"
+
+
+# Allow some time for ovn-northd and ovn-controller to catch up.
+# XXX This should be more systematic.
+sleep 2
+
+ip_to_hex() {
+    printf "%02x%02x%02x%02x" "$@"
+}
+trim_zeros() {
+    sed 's/\(00\)\{1,\}$//'
+}
+
+# Send ip packets between foo1 and alice1
+src_mac="f00000010203"
+dst_mac="000001010203"
+src_ip=`ip_to_hex 192 168 1 2`
+dst_ip=`ip_to_hex 172 16 1 2`
+packet=${dst_mac}${src_mac}08004500001c0000000040110000${src_ip}${dst_ip}0035111100080000
+
+echo "---------NB dump-----"
+ovn-nbctl show
+echo "---------------------"
+ovn-nbctl list logical_router
+echo "---------------------"
+ovn-nbctl list logical_router_port
+echo "---------------------"
+
+echo "---------SB dump-----"
+ovn-sbctl list datapath_binding
+echo "---------------------"
+ovn-sbctl list port_binding
+echo "---------------------"
+ovn-sbctl dump-flows
+echo "---------------------"
+ovn-sbctl list chassis
+ovn-sbctl list encap
+echo "---------------------"
+
+echo "------ hv1 dump ----------"
+as hv1 ovs-ofctl show br-int
+as hv1 ovs-ofctl dump-flows br-int
+echo "------ hv2 dump ----------"
+as hv2 ovs-ofctl show br-int
+as hv2 ovs-ofctl dump-flows br-int
+echo "----------------------------"
+
+# Packet to Expect at alice1
+src_mac="000002010203"
+dst_mac="f00000010204"
+src_ip=`ip_to_hex 192 168 1 2`
+dst_ip=`ip_to_hex 172 16 1 2`
+expected=${dst_mac}${src_mac}08004500001c000000003e110200${src_ip}${dst_ip}0035111100080000
+
+
+as hv1 ovs-appctl netdev-dummy/receive hv1-vif1 $packet
+as hv1 ovs-appctl ofproto/trace br-int in_port=1 $packet
+
+$PYTHON "$top_srcdir/utilities/ovs-pcap.in" hv2/vif1-tx.pcap | trim_zeros > received1.packets
+echo $expected | trim_zeros > expout
+AT_CHECK([cat received1.packets], [0], [expout])
+
+for sim in hv1 hv2; do
+    as $sim
+    OVS_APP_EXIT_AND_WAIT([ovn-controller])
+    OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
+    OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+done
+
+as ovn-sb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as ovn-nb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as northd
+OVS_APP_EXIT_AND_WAIT([ovn-northd])
+
+as main
+OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+AT_CLEANUP