X-Git-Url: https://git.proxmox.com/?p=pve-docs.git;a=blobdiff_plain;f=vxlan-and-evpn.adoc;h=9cd55fc2d9690d26e7d4426dda486bb9a67b4d25;hp=703cd8b0242774bedac67fa0e8f920c10c24e793;hb=7d6078845fa6a3bd308c7dc843273e56be33f315;hpb=1821b137ad48df7a9cee1d0c65db238606e12876 diff --git a/vxlan-and-evpn.adoc b/vxlan-and-evpn.adoc index 703cd8b..9cd55fc 100644 --- a/vxlan-and-evpn.adoc +++ b/vxlan-and-evpn.adoc @@ -16,6 +16,9 @@ while accommodating a very large number of tenants. It is defined in RFC 7348. Each overlay network is known as a VXLAN Segment and identified by a unique 24-bit segment ID called a VXLAN Network Identifier (VNI). +VXLAN encapsulation add 50bytes overhead, so you need to increase mtu on your host +physical interfaces to 1550 at minimum. (or decrease mtu inside your vms to 1450) + For BUM traffic (broadcast / unknown unicast traffic, multicast), we have 3 differents vxlan setup modes : multicast, unicast, bgp-evpn @@ -36,6 +39,7 @@ remote VTEPs will get the packet and answer accordingly direct to the originatin ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -47,6 +51,7 @@ iface vmbr0 inet static auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan-svcnodeip 225.20.1.1 vxlan-physdev eno1 @@ -58,6 +63,7 @@ iface vmbr2 inet manual auto vxlan3 iface vxlan3 inet manual + vxlan-id 3 vxlan-svcnodeip 225.20.1.1 vxlan-physdev eno1 @@ -74,6 +80,7 @@ iface vmbr3 inet manual ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -85,6 +92,7 @@ iface vmbr0 inet static auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan-svcnodeip 225.20.1.1 vxlan-physdev eno1 @@ -97,6 +105,7 @@ iface vmbr2 inet manual auto vxlan3 iface vxlan3 inet manual + vxlan-id 3 vxlan-svcnodeip 225.20.1.1 vxlan-physdev eno1 @@ -113,6 +122,7 @@ iface vmbr3 inet manual ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -124,6 +134,7 @@ iface vmbr0 inet static auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan-svcnodeip 225.20.1.1 vxlan-physdev eno1 @@ -136,6 +147,7 @@ iface vmbr2 inet manual auto vxlan3 iface vxlan3 inet manual + vxlan-id 3 vxlan-svcnodeip 225.20.1.1 vxlan-physdev eno1 @@ -161,6 +173,7 @@ The VXLAN device will still learn remote addresses automatically using source-ad ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -173,6 +186,7 @@ iface vmbr0 inet static auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan_remoteip 192.168.0.2 vxlan_remoteip 192.168.0.3 @@ -186,6 +200,7 @@ iface vmbr2 inet manual auto vxlan3 iface vxlan2 inet manual + vxlan-id 3 vxlan_remoteip 192.168.0.2 vxlan_remoteip 192.168.0.3 @@ -203,6 +218,7 @@ iface vmbr3 inet manual ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -214,6 +230,7 @@ iface vmbr0 inet static auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan_remoteip 192.168.0.1 vxlan_remoteip 192.168.0.3 @@ -227,6 +244,7 @@ iface vmbr2 inet manual auto vxlan3 iface vxlan2 inet manual + vxlan-id 3 vxlan_remoteip 192.168.0.1 vxlan_remoteip 192.168.0.3 @@ -244,6 +262,7 @@ iface vmbr3 inet manual ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -255,6 +274,7 @@ iface vmbr0 inet static auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan_remoteip 192.168.0.2 vxlan_remoteip 192.168.0.3 @@ -268,6 +288,7 @@ iface vmbr2 inet manual auto vxlan3 iface vxlan2 inet manual + vxlan-id 3 vxlan_remoteip 192.168.0.2 vxlan_remoteip 192.168.0.3 @@ -296,6 +317,7 @@ it's possible to use external bgp route reflector servers. ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -307,6 +329,7 @@ iface vmbr0 inet static auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan-local-tunnelip 192.168.0.1 bridge-learning off bridge-arp-nd-suppress on @@ -323,6 +346,7 @@ iface vmbr2 inet manual auto vxlan3 iface vxlan3 inet manual + vxlan-id 3 vxlan-local-tunnelip 192.168.0.1 bridge-learning off bridge-arp-nd-suppress on @@ -363,6 +387,7 @@ line vty ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -374,6 +399,7 @@ iface vmbr0 inet static auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan-local-tunnelip 192.168.0.2 bridge-learning off bridge-arp-nd-suppress on @@ -389,6 +415,7 @@ iface vmbr2 inet manual auto vxlan3 iface vxlan3 inet manual + vxlan-id 3 vxlan-local-tunnelip 192.168.0.2 bridge-learning off bridge-arp-nd-suppress on @@ -429,6 +456,7 @@ line vty ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -440,6 +468,7 @@ iface vmbr0 inet static auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan-local-tunnelip 192.168.0.3 bridge-learning off bridge-arp-nd-suppress on @@ -455,6 +484,7 @@ iface vmbr2 inet manual auto vxlan3 iface vxlan3 inet manual + vxlan-id 3 vxlan-local-tunnelip 192.168.0.3 bridge-learning off bridge-arp-nd-suppress on @@ -490,240 +520,98 @@ line vty ! ---- +VXLAN layer3 routing with anycast gateway +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -VXLAN layer2 with vlan aware linux bridges -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -We use 1 vmbr bridge, each vxlan is mapped to a vlan - -image::images/vxlan-l2-vlanaware.svg["vxlan l2 bridge vlan aware",align="center"] - -multicast mode -^^^^^^^^^^^^^^ - -* node1 - ----- -auto eno1 -iface eno1 inet manual - -auto vmbr0 -iface vmbr0 inet static - address 192.168.0.1 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes - -auto vxlan2 -iface vxlan2 inet manual - vxlan-svcnodeip 225.20.1.1 - vxlan-physdev eno1 - bridge-access 2 - -auto vxlan3 -iface vxlan3 inet manual - vxlan-svcnodeip 225.20.1.1 - vxlan-physdev eno1 - bridge-access 3 ----- - +With this need, each vmbr bridge will be the gateway for the vm. +Same vmbr on different node, will have same ip address and same mac address, +to have working vm live migration and no network disruption. -* node2 +VXLAN layer3 routing only work with FRR and non-aware bridge. +(vlan aware bridge support is buggy currently). ----- -auto eno1 -iface eno1 inet manual +asymmetric model +^^^^^^^^^^^^^^^^ -auto vmbr0 -iface vmbr0 inet static - address 192.168.0.2 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes +This is the simplest mode. To get it work, all vxlan need to be defined on all nodes. -auto vxlan2 -iface vxlan2 inet manual - vxlan-svcnodeip 225.20.1.1 - vxlan-physdev eno1 - bridge-access 2 +The asymmetric model allows routing and bridging on the VXLAN tunnel ingress, +but only bridging on the egress. +This results in bi-directional VXLAN traffic traveling on different VNIs +in each direction (always the destination VNI) across the routed infrastructure. -auto vxlan3 -iface vxlan3 inet manual - vxlan-svcnodeip 225.20.1.1 - vxlan-physdev eno1 - bridge-access 3 ----- +image::images/vxlan-l3-asymmetric.svg["vxlan l3 asymmetric",align="center"] -* node3 +sysctl.conf tuning ---- -auto eno1 -iface eno1 inet manual - -auto vmbr0 -iface vmbr0 inet static - address 192.168.0.3 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes - -auto vxlan2 -iface vxlan2 inet manual - vxlan-svcnodeip 225.20.1.1 - vxlan-physdev eno1 - bridge-access 2 - -auto vxlan3 -iface vxlan3 inet manual - vxlan-svcnodeip 225.20.1.1 - vxlan-physdev eno1 - bridge-access 3 +#enable routing +net.ipv4.ip_forward=1 +net.ipv6.conf.all.forwarding=1 ---- - -unicast mode -^^^^^^^^^^^^ - * node1 ---- auto eno1 iface eno1 inet manual - -auto vmbr0 -iface vmbr0 inet static - address 192.168.0.1 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes - -auto vxlan2 -iface vxlan2 inet manual - vxlan_remoteip 192.168.0.2 - vxlan_remoteip 192.168.0.3 - bridge-access 2 - -auto vxlan3 -iface vxlan3 inet manual - vxlan_remoteip 192.168.0.2 - vxlan_remoteip 192.168.0.3 - bridge-access 3 ----- - - -* node2 - ----- -auto eno1 -iface eno1 inet manual - + mtu 1550 + auto vmbr0 iface vmbr0 inet static - address 192.168.0.2 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes - + address 192.168.0.1 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + auto vxlan2 iface vxlan2 inet manual - vxlan_remoteip 192.168.0.1 - vxlan_remoteip 192.168.0.3 - bridge-access 2 - -auto vxlan3 -iface vxlan3 inet manual - vxlan_remoteip 192.168.0.1 - vxlan_remoteip 192.168.0.3 - bridge-access 3 ----- - - -* node3 + vxlan-id 2 + vxlan-local-tunnelip 192.168.0.1 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off ----- -auto eno1 -iface eno1 inet manual -auto vmbr0 -iface vmbr0 inet static - address 192.168.0.3 +auto vmbr2 +iface vmbr2 inet static + address 10.0.2.254 netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 + hwaddress 44:39:39:FF:40:94 + bridge_ports vxlan2 bridge_stp off bridge_fd 0 - bridge_vlan_aware yes -auto vxlan2 -iface vxlan2 inet manual - vxlan_remoteip 192.168.0.2 - vxlan_remoteip 192.168.0.3 - bridge-access 2 auto vxlan3 iface vxlan3 inet manual - vxlan_remoteip 192.168.0.2 - vxlan_remoteip 192.168.0.3 - bridge-access 3 ----- - - -bgp-evpn -^^^^^^^^ - -Note: currently FRR is working only with 1 vlan aware bridge - -* node1 - - ----- -auto eno1 -iface eno1 inet manual - -auto vmbr0 -iface vmbr0 inet static - address 192.168.0.1 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes - -auto vxlan0 -iface vxlan0 inet manual + vxlan-id 3 vxlan-local-tunnelip 192.168.0.1 bridge-learning off bridge-arp-nd-suppress on bridge-unicast-flood off bridge-multicast-flood off - bridge-access 2 -auto vxlan3 -iface vxlan3 inet manual - vxlan-local-tunnelip 192.168.0.1 - bridge-learning off - bridge-arp-nd-suppress on - bridge-unicast-flood off - bridge-multicast-flood off - bridge-access 3 +auto vmbr3 +iface vmbr3 inet static + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 ---- -/etc/frr/frr.conf +frr.conf ---- router bgp 1234 + bgp router-id 192.168.0.1 no bgp default ipv4-unicast coalesce-time 1000 neighbor 192.168.0.2 remote-as 1234 @@ -732,7 +620,7 @@ router bgp 1234 address-family l2vpn evpn neighbor 192.168.0.2 activate neighbor 192.168.0.3 activate - advertise-all-vni + advertise-all-vni exit-address-family ! line vty @@ -745,41 +633,62 @@ line vty ---- auto eno1 iface eno1 inet manual - + mtu 1550 + auto vmbr0 iface vmbr0 inet static - address 192.168.0.2 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes - -auto vxlan0 -iface vxlan0 inet manual + address 192.168.0.2 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + +auto vxlan2 +iface vxlan2 inet manual + vxlan-id 2 vxlan-local-tunnelip 192.168.0.2 bridge-learning off bridge-arp-nd-suppress on bridge-unicast-flood off bridge-multicast-flood off - bridge-access 2 + + +auto vmbr2 +iface vmbr2 inet static + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 + bridge_ports vxlan2 + bridge_stp off + bridge_fd 0 auto vxlan3 iface vxlan3 inet manual + vxlan-id 3 vxlan-local-tunnelip 192.168.0.2 bridge-learning off bridge-arp-nd-suppress on bridge-unicast-flood off bridge-multicast-flood off - bridge-access 3 + + +auto vmbr3 +iface vmbr3 inet static + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 ---- -/etc/frr/frr.conf +frr.conf ---- router bgp 1234 + bgp router-id 192.168.0.2 no bgp default ipv4-unicast coalesce-time 1000 neighbor 192.168.0.1 remote-as 1234 @@ -788,7 +697,7 @@ router bgp 1234 address-family l2vpn evpn neighbor 192.168.0.1 activate neighbor 192.168.0.3 activate - advertise-all-vni + advertise-all-vni exit-address-family ! line vty @@ -801,40 +710,62 @@ line vty ---- auto eno1 iface eno1 inet manual - + mtu 1550 + auto vmbr0 iface vmbr0 inet static - address 192.168.0.3 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes - -auto vxlan0 -iface vxlan0 inet manual - vxlan-local-tunnelip 192.168.0.3 - bridge-learning off - bridge-arp-nd-suppress on - bridge-unicast-flood off + address 192.168.0.3 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + +auto vxlan2 +iface vxlan2 inet manual + vxlan-id 2 + vxlan-local-tunnelip 192.168.0.3 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off bridge-multicast-flood off - bridge-access 2 + + +auto vmbr2 +iface vmbr2 inet static + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 + bridge_ports vxlan2 + bridge_stp off + bridge_fd 0 auto vxlan3 iface vxlan3 inet manual + vxlan-id 3 vxlan-local-tunnelip 192.168.0.3 bridge-learning off bridge-arp-nd-suppress on bridge-unicast-flood off bridge-multicast-flood off - bridge-access 3 + + +auto vmbr3 +iface vmbr3 inet static + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 ---- -/etc/frr/frr.conf +frr.conf + ---- router bgp 1234 + bgp router-id 192.168.0.3 no bgp default ipv4-unicast coalesce-time 1000 neighbor 192.168.0.1 remote-as 1234 @@ -843,23 +774,29 @@ router bgp 1234 address-family l2vpn evpn neighbor 192.168.0.1 activate neighbor 192.168.0.2 activate - advertise-all-vni + advertise-all-vni exit-address-family ! line vty ! ---- -VXLAN layer3 routing with anycast gateway -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -With this need, each vmbr bridge will be the gateway for the vm. -Same vmbr on different node, will have same ip address and same mac address, -to have working vm live migration and no network disruption. +symmetric model +^^^^^^^^^^^^^^^ -VXLAN layer3 routing only work with FRR and non-aware bridge. -(vlan aware bridge support is buggy currently). +With this model, you don't need to have all vxlan on all nodes. +This model will also be needed to route traffic to an external router. + +The symmetric model routes and bridges on both the ingress and the egress leafs. +This results in bi-directional traffic being able to travel on the same VNI, hence the symmetric name. +However, a new specialty transit VNI is used for all routed VXLAN traffic, called the L3VNI. +All traffic that needs to be routed will be routed onto the L3VNI, tunneled across the layer 3 Infrastructure, +routed off the L3VNI to the appropriate VLAN and ultimately bridged to the destination. + +A vrf is needed for the L3VNI, so all vmbr bridge need to be in the vrf if they want to be able to reach each others. +image::images/vxlan-l3-symmetric.svg["vxlan l3 symmetric",align="center"] sysctl.conf tuning @@ -867,32 +804,370 @@ sysctl.conf tuning #enable routing net.ipv4.ip_forward=1 net.ipv6.conf.all.forwarding=1 -#disable reverse path filtering -net.ipv4.conf.default.rp_filter=0 -net.ipv4.conf.all.rp_filter=0 -#allow frr to work with vrf -net.ipv4.tcp_l3mdev_accept=1 ---- -asymmetric model -^^^^^^^^^^^^^^^^ - -This is the simplest mode. To get it work, all vxlan need to be defined on all nodes. - -The asymmetric model allows routing and bridging on the VXLAN tunnel ingress, -but only bridging on the egress. -This results in bi-directional VXLAN traffic traveling on different VNIs -in each direction (always the destination VNI) across the routed infrastructure. +* node1 + +---- +auto vrf1 +iface vrf1 + vrf-table auto + +auto eno1 +iface eno1 inet manual + mtu 1550 + +auto vmbr0 +iface vmbr0 inet static + address 192.168.0.1 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + +auto vxlan2 +iface vxlan2 inet manual + vxlan-id 2 + vxlan-local-tunnelip 192.168.0.1 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr2 +iface vmbr2 inet static + bridge_ports vxlan2 + bridge_stp off + bridge_fd 0 + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr2 + vrf vrf1 + +auto vxlan3 +iface vxlan3 inet manual + vxlan-id 3 + vxlan-local-tunnelip 192.168.0.1 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr3 +iface vmbr3 inet static + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr3 + vrf vrf1 + +#interconnect vxlan-vfr l3vni +auto vxlan4000 +iface vxlan4000 inet manual + vxlan-id 4000 + vxlan-local-tunnelip 192.168.0.1 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + + +auto vmbr4000 +iface vmbr4000 inet manual + bridge_ports vxlan4000 + bridge_stp off + bridge_fd 0 + hwaddress 44:39:39:FF:40:90 #must be different on each node + vrf vrf1 +---- + +frr.conf + +---- +vrf vrf1 + vni 4000 +! +router bgp 1234 + bgp router-id 192.168.0.1 + no bgp default ipv4-unicast + coalesce-time 1000 + neighbor 192.168.0.2 remote-as 1234 + neighbor 192.168.0.3 remote-as 1234 + ! + address-family l2vpn evpn + neighbor 192.168.0.2 activate + neighbor 192.168.0.3 activate + advertise-all-vni + exit-address-family +! +router bgp 1234 vrf vrf1 +! + bgp router-id 192.168.0.1 + ! + address-family ipv4 unicast + redistribute connected + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast + exit-address-family +! +line vty +! +---- + + +* node2 + +---- +auto vrf1 +iface vrf1 + vrf-table auto + +auto eno1 +iface eno1 inet manual + mtu 1550 + +auto vmbr0 +iface vmbr0 inet static + address 192.168.0.2 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + +auto vxlan2 +iface vxlan2 inet manual + vxlan-id 2 + vxlan-local-tunnelip 192.168.0.2 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr2 +iface vmbr2 inet static + bridge_ports vxlan2 + bridge_stp off + bridge_fd 0 + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr2 + vrf vrf1 + +auto vxlan3 +iface vxlan3 inet manual + vxlan-id 3 + vxlan-local-tunnelip 192.168.0.2 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr3 +iface vmbr3 inet static + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr3 + vrf vrf1 + +#interconnect vxlan-vfr l3vni +auto vxlan4000 +iface vxlan4000 inet manual + vxlan-id 4000 + vxlan-local-tunnelip 192.168.0.2 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + + +auto vmbr4000 +iface vmbr4000 inet manual + bridge_ports vxlan4000 + bridge_stp off + bridge_fd 0 + hwaddress 44:39:39:FF:40:91 #must be different on each node + vrf vrf1 +---- + + +frr.conf + +---- +vrf vrf1 + vni 4000 +! +router bgp 1234 + bgp router-id 192.168.0.2 + no bgp default ipv4-unicast + coalesce-time 1000 + neighbor 192.168.0.1 remote-as 1234 + neighbor 192.168.0.3 remote-as 1234 + ! + address-family l2vpn evpn + neighbor 192.168.0.1 activate + neighbor 192.168.0.3 activate + advertise-all-vni + exit-address-family +! +router bgp 1234 vrf vrf1 +! + bgp router-id 192.168.0.2 + ! + address-family ipv4 unicast + redistribute connected + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast + exit-address-family +! +line vty +! +---- + + +* node3 + +---- +auto vrf1 +iface vrf1 + vrf-table auto + +auto eno1 +iface eno1 inet manual + mtu 1550 + +auto vmbr0 +iface vmbr0 inet static + address 192.168.0.3 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + +auto vxlan2 +iface vxlan2 inet manual + vxlan-id 2 + vxlan-local-tunnelip 192.168.0.3 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr2 +iface vmbr2 inet static + bridge_ports vxlan2 + bridge_stp off + bridge_fd 0 + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr2 + vrf vrf1 + +auto vxlan3 +iface vxlan3 inet manual + vxlan-id 3 + vxlan-local-tunnelip 192.168.0.3 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr3 +iface vmbr3 inet static + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr3 + vrf vrf1 + +#interconnect vxlan-vfr l3vni +auto vxlan4000 +iface vxlan4000 inet manual + vxlan-id 4000 + vxlan-local-tunnelip 192.168.0.3 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + + +auto vmbr4000 +iface vmbr4000 inet manual + bridge_ports vxlan4000 + bridge_stp off + bridge_fd 0 + hwaddress 44:39:39:FF:40:92 #must be different on each node + vrf vrf1 +---- + + +frr.conf + +---- +vrf vrf1 + vni 4000 +! +router bgp 1234 + bgp router-id 192.168.0.3 + no bgp default ipv4-unicast + coalesce-time 1000 + neighbor 192.168.0.1 remote-as 1234 + neighbor 192.168.0.2 remote-as 1234 + ! + address-family l2vpn evpn + neighbor 192.168.0.1 activate + neighbor 192.168.0.2 activate + advertise-all-vni + exit-address-family +! +router bgp 1234 vrf vrf1 +! + bgp router-id 192.168.0.3 + ! + address-family ipv4 unicast + redistribute connected + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast + exit-address-family +! +line vty +! +---- + +VXLAN layer3 routing with anycast gateway + routing to outside with external router +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Routing to outside need the symmetric model. -image::images/vxlan-l3-asymmetric.svg["vxlan l3 asymmetric",align="center"] +1 gateway node +^^^^^^^^^^^^^^ +In this example, we'll use only 1 proxmox node as exit gateway. (node1) +This node have a simple default gw in the vrf to the external router (no bgp between router and node1) +and announce this default gw to other proxmox nodes. -* node1 +*node1 ---- +auto vrf1 +iface vrf1 + vrf-table auto + auto eno1 iface eno1 inet manual - + mtu 1550 + auto vmbr0 iface vmbr0 inet static address 192.168.0.1 @@ -900,49 +1175,80 @@ iface vmbr0 inet static bridge_ports eno1 bridge_stp off bridge_fd 0 - + +auto eno2 +iface eno2 + address 172.16.0.1 + netmask 255.255.255.0 + vrf vrf1 + post-up ip route add default via 172.16.0.254 dev eno2 vrf vrf1 + #if you have multiple external routers, you can use ecmp balancing + #post-up route add default nexthop via 172.16.0.253 dev eno2 vrf vrf1 nexthop via 172.16.0.254 dev eno2 vrf vrf1 + auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan-local-tunnelip 192.168.0.1 bridge-learning off bridge-arp-nd-suppress on bridge-unicast-flood off bridge-multicast-flood off - auto vmbr2 iface vmbr2 inet static - address 10.0.2.254 - netmask 255.255.255.0 - hwaddress 44:39:39:FF:40:94 bridge_ports vxlan2 bridge_stp off bridge_fd 0 - + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr2 + vrf vrf1 auto vxlan3 iface vxlan3 inet manual + vxlan-id 3 vxlan-local-tunnelip 192.168.0.1 bridge-learning off bridge-arp-nd-suppress on bridge-unicast-flood off bridge-multicast-flood off - auto vmbr3 iface vmbr3 inet static + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 address 10.0.3.254 netmask 255.255.255.0 - hwaddress 44:39:39:FF:40:94 - bridge_ports vxlan3 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr3 + vrf vrf1 + +#interconnect vxlan-vfr l3vni +auto vxlan4000 +iface vxlan4000 inet manual + vxlan-id 4000 + vxlan-local-tunnelip 192.168.0.1 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr4000 +iface vmbr4000 inet manual + bridge_ports vxlan4000 bridge_stp off bridge_fd 0 + hwaddress 44:39:39:FF:40:90 #must be different on each node + vrf vrf1 ---- frr.conf ---- +vrf vrf1 + vni 4000 +! router bgp 1234 bgp router-id 192.168.0.1 no bgp default ipv4-unicast @@ -953,7 +1259,20 @@ router bgp 1234 address-family l2vpn evpn neighbor 192.168.0.2 activate neighbor 192.168.0.3 activate - advertise-all-vni + advertise-all-vni + exit-address-family +! +router bgp 1234 vrf vrf1 +! + bgp router-id 172.16.0.1 + ! + address-family ipv4 unicast + redistribute connected + redistribute kernel !announce your default gw to all nodes + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast exit-address-family ! line vty @@ -964,8 +1283,13 @@ line vty * node2 ---- +auto vrf1 +iface vrf1 + vrf-table auto + auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -974,49 +1298,72 @@ iface vmbr0 inet static bridge_ports eno1 bridge_stp off bridge_fd 0 - + auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan-local-tunnelip 192.168.0.2 bridge-learning off bridge-arp-nd-suppress on bridge-unicast-flood off bridge-multicast-flood off - auto vmbr2 iface vmbr2 inet static - address 10.0.2.254 - netmask 255.255.255.0 - hwaddress 44:39:39:FF:40:94 bridge_ports vxlan2 bridge_stp off bridge_fd 0 - + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr2 + vrf vrf1 auto vxlan3 iface vxlan3 inet manual + vxlan-id 3 vxlan-local-tunnelip 192.168.0.2 bridge-learning off bridge-arp-nd-suppress on bridge-unicast-flood off bridge-multicast-flood off - auto vmbr3 iface vmbr3 inet static + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 address 10.0.3.254 netmask 255.255.255.0 - hwaddress 44:39:39:FF:40:94 - bridge_ports vxlan3 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr3 + vrf vrf1 + +#interconnect vxlan-vfr l3vni +auto vxlan4000 +iface vxlan4000 inet manual + vxlan-id 4000 + vxlan-local-tunnelip 192.168.0.2 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + + +auto vmbr4000 +iface vmbr4000 inet manual + bridge_ports vxlan4000 bridge_stp off bridge_fd 0 + hwaddress 44:39:39:FF:40:91 #must be different on each node + vrf vrf1 ---- frr.conf ---- +vrf vrf1 + vni 4000 +! router bgp 1234 bgp router-id 192.168.0.2 no bgp default ipv4-unicast @@ -1027,7 +1374,19 @@ router bgp 1234 address-family l2vpn evpn neighbor 192.168.0.1 activate neighbor 192.168.0.3 activate - advertise-all-vni + advertise-all-vni + exit-address-family +! +router bgp 1234 vrf vrf1 +! + bgp router-id 192.168.0.2 + ! + address-family ipv4 unicast + redistribute connected + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast exit-address-family ! line vty @@ -1038,8 +1397,13 @@ line vty * node3 ---- +auto vrf1 +iface vrf1 + vrf-table auto + auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -1048,49 +1412,72 @@ iface vmbr0 inet static bridge_ports eno1 bridge_stp off bridge_fd 0 - + auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan-local-tunnelip 192.168.0.3 bridge-learning off bridge-arp-nd-suppress on bridge-unicast-flood off bridge-multicast-flood off - auto vmbr2 iface vmbr2 inet static - address 10.0.2.254 - netmask 255.255.255.0 - hwaddress 44:39:39:FF:40:94 bridge_ports vxlan2 bridge_stp off bridge_fd 0 - + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr2 + vrf vrf1 auto vxlan3 iface vxlan3 inet manual + vxlan-id 3 vxlan-local-tunnelip 192.168.0.3 bridge-learning off bridge-arp-nd-suppress on bridge-unicast-flood off bridge-multicast-flood off - auto vmbr3 iface vmbr3 inet static + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 address 10.0.3.254 netmask 255.255.255.0 - hwaddress 44:39:39:FF:40:94 - bridge_ports vxlan3 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr3 + vrf vrf1 + +#interconnect vxlan-vfr l3vni +auto vxlan4000 +iface vxlan4000 inet manual + vxlan-id 4000 + vxlan-local-tunnelip 192.168.0.3 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + + +auto vmbr4000 +iface vmbr4000 inet manual + bridge_ports vxlan4000 bridge_stp off bridge_fd 0 + hwaddress 44:39:39:FF:40:92 #must be different on each node + vrf vrf1 ---- frr.conf ---- +vrf vrf1 + vni 4000 +! router bgp 1234 bgp router-id 192.168.0.3 no bgp default ipv4-unicast @@ -1101,32 +1488,35 @@ router bgp 1234 address-family l2vpn evpn neighbor 192.168.0.1 activate neighbor 192.168.0.2 activate - advertise-all-vni + advertise-all-vni + exit-address-family +! +router bgp 1234 vrf vrf1 +! + bgp router-id 192.168.0.3 + ! + address-family ipv4 unicast + redistribute connected + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast exit-address-family ! line vty ! ---- +multiple gateway nodes +^^^^^^^^^^^^^^^^^^^^^^ +In this example, all nodes will be used as exit gateway. (But you can use only 2 nodes if you want) +All nodes have a simple default gw in the vrf to the external router (no bgp between router and node1) +and announce this default gw. +The external router have ecmp routes to all proxmox nodes.(balancing). +If the router send the packet to a wrong node (vm is not on this node), this node will route through +vxlan the packet to final destination. -symmetric model -^^^^^^^^^^^^^^^ - -With this model, you don't need to have all vxlan on all nodes. -This model will also be needed to route traffic to an external router. - -The symmetric model routes and bridges on both the ingress and the egress leafs. -This results in bi-directional traffic being able to travel on the same VNI, hence the symmetric name. -However, a new specialty transit VNI is used for all routed VXLAN traffic, called the L3VNI. -All traffic that needs to be routed will be routed onto the L3VNI, tunneled across the layer 3 Infrastructure, -routed off the L3VNI to the appropriate VLAN and ultimately bridged to the destination. - -A vrf is needed for the L3VNI, so all vmbr bridge need to be in the vrf if they want to be able to reach each others. - -image::images/vxlan-l3-symmetric.svg["vxlan l3 symmetric",align="center"] - - -* node1 +*node1 ---- auto vrf1 @@ -1135,7 +1525,8 @@ iface vrf1 auto eno1 iface eno1 inet manual - + mtu 1550 + auto vmbr0 iface vmbr0 inet static address 192.168.0.1 @@ -1144,8 +1535,19 @@ iface vmbr0 inet static bridge_stp off bridge_fd 0 +auto eno2 +iface eno2 + address 172.16.0.1 + netmask 255.255.255.0 + vrf vrf1 + mtu 1550 + post-up ip route add default via 172.16.0.254 dev eno2 vrf vrf1 + #if you have multiple external routers, you can use ecmp balancing + #post-up route add default nexthop via 172.16.0.253 dev eno2 vrf vrf1 nexthop via 172.16.0.254 dev eno2 vrf vrf1 + auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan-local-tunnelip 192.168.0.1 bridge-learning off bridge-arp-nd-suppress on @@ -1164,6 +1566,7 @@ iface vmbr2 inet static auto vxlan3 iface vxlan3 inet manual + vxlan-id 3 vxlan-local-tunnelip 192.168.0.1 bridge-learning off bridge-arp-nd-suppress on @@ -1183,13 +1586,13 @@ iface vmbr3 inet static #interconnect vxlan-vfr l3vni auto vxlan4000 iface vxlan4000 inet manual + vxlan-id 4000 vxlan-local-tunnelip 192.168.0.1 bridge-learning off bridge-arp-nd-suppress on bridge-unicast-flood off bridge-multicast-flood off - auto vmbr4000 iface vmbr4000 inet manual bridge_ports vxlan4000 @@ -1199,6 +1602,7 @@ iface vmbr4000 inet manual vrf vrf1 ---- + frr.conf ---- @@ -1219,11 +1623,12 @@ router bgp 1234 exit-address-family ! router bgp 1234 vrf vrf1 -! - bgp router-id 192.168.0.1 - ! +! + bgp router-id 172.16.0.1 + ! address-family ipv4 unicast redistribute connected + redistribute kernel !announce your default gw to all nodes exit-address-family ! address-family l2vpn evpn @@ -1244,6 +1649,7 @@ iface vrf1 auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -1253,8 +1659,19 @@ iface vmbr0 inet static bridge_stp off bridge_fd 0 +auto eno2 +iface eno2 + address 172.16.0.3 + netmask 255.255.255.0 + vrf vrf1 + mtu 1550 + post-up ip route add default via 172.16.0.254 dev eno2 vrf vrf1 + #if you have multiple external routers, you can use ecmp balancing + #post-up route add default nexthop via 172.16.0.253 dev eno2 vrf vrf1 nexthop via 172.16.0.254 dev eno2 vrf vrf1 + auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan-local-tunnelip 192.168.0.2 bridge-learning off bridge-arp-nd-suppress on @@ -1273,6 +1690,7 @@ iface vmbr2 inet static auto vxlan3 iface vxlan3 inet manual + vxlan-id 3 vxlan-local-tunnelip 192.168.0.2 bridge-learning off bridge-arp-nd-suppress on @@ -1292,6 +1710,7 @@ iface vmbr3 inet static #interconnect vxlan-vfr l3vni auto vxlan4000 iface vxlan4000 inet manual + vxlan-id 4000 vxlan-local-tunnelip 192.168.0.2 bridge-learning off bridge-arp-nd-suppress on @@ -1329,11 +1748,12 @@ router bgp 1234 exit-address-family ! router bgp 1234 vrf vrf1 -! - bgp router-id 192.168.0.2 - ! +! + bgp router-id 172.16.0.2 + ! address-family ipv4 unicast redistribute connected + redistribute kernel !announce your default gw to all nodes exit-address-family ! address-family l2vpn evpn @@ -1354,7 +1774,8 @@ iface vrf1 auto eno1 iface eno1 inet manual - + mtu 1550 + auto vmbr0 iface vmbr0 inet static address 192.168.0.3 @@ -1363,8 +1784,19 @@ iface vmbr0 inet static bridge_stp off bridge_fd 0 +auto eno2 +iface eno2 + address 172.16.0.3 + netmask 255.255.255.0 + vrf vrf1 + mtu 1550 + post-up ip route add default via 172.16.0.254 dev eno2 vrf vrf1 + #if you have multiple external routers, you can use ecmp balancing + #post-up route add default nexthop via 172.16.0.253 dev eno2 vrf vrf1 nexthop via 172.16.0.254 dev eno2 vrf vrf1 + auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan-local-tunnelip 192.168.0.3 bridge-learning off bridge-arp-nd-suppress on @@ -1383,6 +1815,7 @@ iface vmbr2 inet static auto vxlan3 iface vxlan3 inet manual + vxlan-id 3 vxlan-local-tunnelip 192.168.0.3 bridge-learning off bridge-arp-nd-suppress on @@ -1402,6 +1835,7 @@ iface vmbr3 inet static #interconnect vxlan-vfr l3vni auto vxlan4000 iface vxlan4000 inet manual + vxlan-id 4000 vxlan-local-tunnelip 192.168.0.3 bridge-learning off bridge-arp-nd-suppress on @@ -1439,11 +1873,12 @@ router bgp 1234 exit-address-family ! router bgp 1234 vrf vrf1 -! - bgp router-id 192.168.0.3 - ! +! + bgp router-id 172.16.0.3 + ! address-family ipv4 unicast redistribute connected + redistribute kernel !announce your default gw to all nodes exit-address-family ! address-family l2vpn evpn @@ -1453,3 +1888,45 @@ router bgp 1234 vrf vrf1 line vty ! ---- + +Note +^^^^ + +If your external router don't support ecmp to reach multiple proxmox nodes, +you can setup an HA floating vip on proxmox nodes with vrrp + +I this example, we will setup an floating 172.16.0.10 ip on node1 and node2. +Node1 is the primary and failover to node2 in case of failure. + + +* node1 + +---- +auto eno2 +iface eno2 + address 172.16.0.1 + netmask 255.255.255.0 + vrf vrf1 + mtu 1550 + post-up ip route add default via 172.16.0.254 dev eno2 vrf vrf1 + vrrp-id 1 + vrrp-priority 1 + vrrp-virtual-ip 172.16.0.10 +---- + +* node2 + +---- +auto eno2 +iface eno2 + address 172.16.0.2 + netmask 255.255.255.0 + mtu 1550 + vrf vrf1 + post-up ip route add default via 172.16.0.254 dev eno2 vrf vrf1 + vrrp-id 1 + vrrp-priority 2 + vrrp-virtual-ip 172.16.0.10 +---- + +