X-Git-Url: https://git.proxmox.com/?p=pve-docs.git;a=blobdiff_plain;f=vxlan-and-evpn.adoc;h=9cd55fc2d9690d26e7d4426dda486bb9a67b4d25;hp=73ae4a6b3c41aec54c924f52fcd7e107b4efc7c5;hb=7d6078845fa6a3bd308c7dc843273e56be33f315;hpb=445822a94613be87eb68f126f21d56249d0e88ca diff --git a/vxlan-and-evpn.adoc b/vxlan-and-evpn.adoc index 73ae4a6..9cd55fc 100644 --- a/vxlan-and-evpn.adoc +++ b/vxlan-and-evpn.adoc @@ -16,6 +16,9 @@ while accommodating a very large number of tenants. It is defined in RFC 7348. Each overlay network is known as a VXLAN Segment and identified by a unique 24-bit segment ID called a VXLAN Network Identifier (VNI). +VXLAN encapsulation add 50bytes overhead, so you need to increase mtu on your host +physical interfaces to 1550 at minimum. (or decrease mtu inside your vms to 1450) + For BUM traffic (broadcast / unknown unicast traffic, multicast), we have 3 differents vxlan setup modes : multicast, unicast, bgp-evpn @@ -36,6 +39,7 @@ remote VTEPs will get the packet and answer accordingly direct to the originatin ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -47,6 +51,7 @@ iface vmbr0 inet static auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan-svcnodeip 225.20.1.1 vxlan-physdev eno1 @@ -58,6 +63,7 @@ iface vmbr2 inet manual auto vxlan3 iface vxlan3 inet manual + vxlan-id 3 vxlan-svcnodeip 225.20.1.1 vxlan-physdev eno1 @@ -74,6 +80,7 @@ iface vmbr3 inet manual ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -85,6 +92,7 @@ iface vmbr0 inet static auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan-svcnodeip 225.20.1.1 vxlan-physdev eno1 @@ -97,6 +105,7 @@ iface vmbr2 inet manual auto vxlan3 iface vxlan3 inet manual + vxlan-id 3 vxlan-svcnodeip 225.20.1.1 vxlan-physdev eno1 @@ -113,6 +122,7 @@ iface vmbr3 inet manual ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -124,6 +134,7 @@ iface vmbr0 inet static auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan-svcnodeip 225.20.1.1 vxlan-physdev eno1 @@ -136,6 +147,7 @@ iface vmbr2 inet manual auto vxlan3 iface vxlan3 inet manual + vxlan-id 3 vxlan-svcnodeip 225.20.1.1 vxlan-physdev eno1 @@ -161,6 +173,7 @@ The VXLAN device will still learn remote addresses automatically using source-ad ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -173,6 +186,7 @@ iface vmbr0 inet static auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan_remoteip 192.168.0.2 vxlan_remoteip 192.168.0.3 @@ -186,6 +200,7 @@ iface vmbr2 inet manual auto vxlan3 iface vxlan2 inet manual + vxlan-id 3 vxlan_remoteip 192.168.0.2 vxlan_remoteip 192.168.0.3 @@ -203,6 +218,7 @@ iface vmbr3 inet manual ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -214,6 +230,7 @@ iface vmbr0 inet static auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan_remoteip 192.168.0.1 vxlan_remoteip 192.168.0.3 @@ -227,6 +244,7 @@ iface vmbr2 inet manual auto vxlan3 iface vxlan2 inet manual + vxlan-id 3 vxlan_remoteip 192.168.0.1 vxlan_remoteip 192.168.0.3 @@ -244,6 +262,7 @@ iface vmbr3 inet manual ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -255,6 +274,7 @@ iface vmbr0 inet static auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan_remoteip 192.168.0.2 vxlan_remoteip 192.168.0.3 @@ -268,6 +288,7 @@ iface vmbr2 inet manual auto vxlan3 iface vxlan2 inet manual + vxlan-id 3 vxlan_remoteip 192.168.0.2 vxlan_remoteip 192.168.0.3 @@ -296,6 +317,7 @@ it's possible to use external bgp route reflector servers. ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -307,6 +329,7 @@ iface vmbr0 inet static auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan-local-tunnelip 192.168.0.1 bridge-learning off bridge-arp-nd-suppress on @@ -323,6 +346,7 @@ iface vmbr2 inet manual auto vxlan3 iface vxlan3 inet manual + vxlan-id 3 vxlan-local-tunnelip 192.168.0.1 bridge-learning off bridge-arp-nd-suppress on @@ -363,6 +387,7 @@ line vty ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -374,6 +399,7 @@ iface vmbr0 inet static auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan-local-tunnelip 192.168.0.2 bridge-learning off bridge-arp-nd-suppress on @@ -389,6 +415,7 @@ iface vmbr2 inet manual auto vxlan3 iface vxlan3 inet manual + vxlan-id 3 vxlan-local-tunnelip 192.168.0.2 bridge-learning off bridge-arp-nd-suppress on @@ -429,6 +456,7 @@ line vty ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -440,6 +468,7 @@ iface vmbr0 inet static auto vxlan2 iface vxlan2 inet manual + vxlan-id 2 vxlan-local-tunnelip 192.168.0.3 bridge-learning off bridge-arp-nd-suppress on @@ -455,6 +484,7 @@ iface vmbr2 inet manual auto vxlan3 iface vxlan3 inet manual + vxlan-id 3 vxlan-local-tunnelip 192.168.0.3 bridge-learning off bridge-arp-nd-suppress on @@ -490,240 +520,98 @@ line vty ! ---- +VXLAN layer3 routing with anycast gateway +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -VXLAN layer2 with vlan aware linux bridges -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -We use 1 vmbr bridge, each vxlan is mapped to a vlan - -image::images/vxlan-l2-vlanaware.svg["vxlan l2 bridge vlan aware",align="center"] - -multicast mode -^^^^^^^^^^^^^^ - -* node1 - ----- -auto eno1 -iface eno1 inet manual - -auto vmbr0 -iface vmbr0 inet static - address 192.168.0.1 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes - -auto vxlan2 -iface vxlan2 inet manual - vxlan-svcnodeip 225.20.1.1 - vxlan-physdev eno1 - bridge-access 2 - -auto vxlan3 -iface vxlan3 inet manual - vxlan-svcnodeip 225.20.1.1 - vxlan-physdev eno1 - bridge-access 3 ----- - +With this need, each vmbr bridge will be the gateway for the vm. +Same vmbr on different node, will have same ip address and same mac address, +to have working vm live migration and no network disruption. -* node2 +VXLAN layer3 routing only work with FRR and non-aware bridge. +(vlan aware bridge support is buggy currently). ----- -auto eno1 -iface eno1 inet manual +asymmetric model +^^^^^^^^^^^^^^^^ -auto vmbr0 -iface vmbr0 inet static - address 192.168.0.2 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes +This is the simplest mode. To get it work, all vxlan need to be defined on all nodes. -auto vxlan2 -iface vxlan2 inet manual - vxlan-svcnodeip 225.20.1.1 - vxlan-physdev eno1 - bridge-access 2 +The asymmetric model allows routing and bridging on the VXLAN tunnel ingress, +but only bridging on the egress. +This results in bi-directional VXLAN traffic traveling on different VNIs +in each direction (always the destination VNI) across the routed infrastructure. -auto vxlan3 -iface vxlan3 inet manual - vxlan-svcnodeip 225.20.1.1 - vxlan-physdev eno1 - bridge-access 3 ----- +image::images/vxlan-l3-asymmetric.svg["vxlan l3 asymmetric",align="center"] -* node3 +sysctl.conf tuning ---- -auto eno1 -iface eno1 inet manual - -auto vmbr0 -iface vmbr0 inet static - address 192.168.0.3 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes - -auto vxlan2 -iface vxlan2 inet manual - vxlan-svcnodeip 225.20.1.1 - vxlan-physdev eno1 - bridge-access 2 - -auto vxlan3 -iface vxlan3 inet manual - vxlan-svcnodeip 225.20.1.1 - vxlan-physdev eno1 - bridge-access 3 +#enable routing +net.ipv4.ip_forward=1 +net.ipv6.conf.all.forwarding=1 ---- - -unicast mode -^^^^^^^^^^^^ - * node1 ---- auto eno1 iface eno1 inet manual - -auto vmbr0 -iface vmbr0 inet static - address 192.168.0.1 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes - -auto vxlan2 -iface vxlan2 inet manual - vxlan_remoteip 192.168.0.2 - vxlan_remoteip 192.168.0.3 - bridge-access 2 - -auto vxlan3 -iface vxlan3 inet manual - vxlan_remoteip 192.168.0.2 - vxlan_remoteip 192.168.0.3 - bridge-access 3 ----- - - -* node2 - ----- -auto eno1 -iface eno1 inet manual - + mtu 1550 + auto vmbr0 iface vmbr0 inet static - address 192.168.0.2 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes - + address 192.168.0.1 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + auto vxlan2 iface vxlan2 inet manual - vxlan_remoteip 192.168.0.1 - vxlan_remoteip 192.168.0.3 - bridge-access 2 - -auto vxlan3 -iface vxlan3 inet manual - vxlan_remoteip 192.168.0.1 - vxlan_remoteip 192.168.0.3 - bridge-access 3 ----- - - -* node3 + vxlan-id 2 + vxlan-local-tunnelip 192.168.0.1 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off ----- -auto eno1 -iface eno1 inet manual -auto vmbr0 -iface vmbr0 inet static - address 192.168.0.3 +auto vmbr2 +iface vmbr2 inet static + address 10.0.2.254 netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 + hwaddress 44:39:39:FF:40:94 + bridge_ports vxlan2 bridge_stp off bridge_fd 0 - bridge_vlan_aware yes -auto vxlan2 -iface vxlan2 inet manual - vxlan_remoteip 192.168.0.2 - vxlan_remoteip 192.168.0.3 - bridge-access 2 auto vxlan3 iface vxlan3 inet manual - vxlan_remoteip 192.168.0.2 - vxlan_remoteip 192.168.0.3 - bridge-access 3 ----- - - -bgp-evpn -^^^^^^^^ - -Note: currently FRR is working only with 1 vlan aware bridge - -* node1 - - ----- -auto eno1 -iface eno1 inet manual - -auto vmbr0 -iface vmbr0 inet static - address 192.168.0.1 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes - -auto vxlan0 -iface vxlan0 inet manual + vxlan-id 3 vxlan-local-tunnelip 192.168.0.1 bridge-learning off bridge-arp-nd-suppress on bridge-unicast-flood off bridge-multicast-flood off - bridge-access 2 -auto vxlan3 -iface vxlan3 inet manual - vxlan-local-tunnelip 192.168.0.1 - bridge-learning off - bridge-arp-nd-suppress on - bridge-unicast-flood off - bridge-multicast-flood off - bridge-access 3 +auto vmbr3 +iface vmbr3 inet static + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 ---- -/etc/frr/frr.conf +frr.conf ---- router bgp 1234 + bgp router-id 192.168.0.1 no bgp default ipv4-unicast coalesce-time 1000 neighbor 192.168.0.2 remote-as 1234 @@ -732,7 +620,7 @@ router bgp 1234 address-family l2vpn evpn neighbor 192.168.0.2 activate neighbor 192.168.0.3 activate - advertise-all-vni + advertise-all-vni exit-address-family ! line vty @@ -745,41 +633,62 @@ line vty ---- auto eno1 iface eno1 inet manual - + mtu 1550 + auto vmbr0 iface vmbr0 inet static - address 192.168.0.2 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes - -auto vxlan0 -iface vxlan0 inet manual + address 192.168.0.2 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + +auto vxlan2 +iface vxlan2 inet manual + vxlan-id 2 vxlan-local-tunnelip 192.168.0.2 bridge-learning off bridge-arp-nd-suppress on bridge-unicast-flood off bridge-multicast-flood off - bridge-access 2 + + +auto vmbr2 +iface vmbr2 inet static + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 + bridge_ports vxlan2 + bridge_stp off + bridge_fd 0 auto vxlan3 iface vxlan3 inet manual + vxlan-id 3 vxlan-local-tunnelip 192.168.0.2 bridge-learning off bridge-arp-nd-suppress on bridge-unicast-flood off bridge-multicast-flood off - bridge-access 3 + + +auto vmbr3 +iface vmbr3 inet static + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 ---- -/etc/frr/frr.conf +frr.conf ---- router bgp 1234 + bgp router-id 192.168.0.2 no bgp default ipv4-unicast coalesce-time 1000 neighbor 192.168.0.1 remote-as 1234 @@ -788,7 +697,7 @@ router bgp 1234 address-family l2vpn evpn neighbor 192.168.0.1 activate neighbor 192.168.0.3 activate - advertise-all-vni + advertise-all-vni exit-address-family ! line vty @@ -801,40 +710,62 @@ line vty ---- auto eno1 iface eno1 inet manual - + mtu 1550 + auto vmbr0 iface vmbr0 inet static - address 192.168.0.3 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes - -auto vxlan0 -iface vxlan0 inet manual + address 192.168.0.3 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + +auto vxlan2 +iface vxlan2 inet manual + vxlan-id 2 vxlan-local-tunnelip 192.168.0.3 bridge-learning off bridge-arp-nd-suppress on bridge-unicast-flood off bridge-multicast-flood off - bridge-access 2 + + +auto vmbr2 +iface vmbr2 inet static + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 + bridge_ports vxlan2 + bridge_stp off + bridge_fd 0 auto vxlan3 iface vxlan3 inet manual + vxlan-id 3 vxlan-local-tunnelip 192.168.0.3 bridge-learning off bridge-arp-nd-suppress on bridge-unicast-flood off bridge-multicast-flood off - bridge-access 3 + + +auto vmbr3 +iface vmbr3 inet static + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 ---- -/etc/frr/frr.conf +frr.conf + ---- router bgp 1234 + bgp router-id 192.168.0.3 no bgp default ipv4-unicast coalesce-time 1000 neighbor 192.168.0.1 remote-as 1234 @@ -843,9 +774,1159 @@ router bgp 1234 address-family l2vpn evpn neighbor 192.168.0.1 activate neighbor 192.168.0.2 activate - advertise-all-vni + advertise-all-vni exit-address-family ! line vty ! ---- + + +symmetric model +^^^^^^^^^^^^^^^ + +With this model, you don't need to have all vxlan on all nodes. +This model will also be needed to route traffic to an external router. + +The symmetric model routes and bridges on both the ingress and the egress leafs. +This results in bi-directional traffic being able to travel on the same VNI, hence the symmetric name. +However, a new specialty transit VNI is used for all routed VXLAN traffic, called the L3VNI. +All traffic that needs to be routed will be routed onto the L3VNI, tunneled across the layer 3 Infrastructure, +routed off the L3VNI to the appropriate VLAN and ultimately bridged to the destination. + +A vrf is needed for the L3VNI, so all vmbr bridge need to be in the vrf if they want to be able to reach each others. + +image::images/vxlan-l3-symmetric.svg["vxlan l3 symmetric",align="center"] + +sysctl.conf tuning + +---- +#enable routing +net.ipv4.ip_forward=1 +net.ipv6.conf.all.forwarding=1 +---- + +* node1 + +---- +auto vrf1 +iface vrf1 + vrf-table auto + +auto eno1 +iface eno1 inet manual + mtu 1550 + +auto vmbr0 +iface vmbr0 inet static + address 192.168.0.1 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + +auto vxlan2 +iface vxlan2 inet manual + vxlan-id 2 + vxlan-local-tunnelip 192.168.0.1 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr2 +iface vmbr2 inet static + bridge_ports vxlan2 + bridge_stp off + bridge_fd 0 + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr2 + vrf vrf1 + +auto vxlan3 +iface vxlan3 inet manual + vxlan-id 3 + vxlan-local-tunnelip 192.168.0.1 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr3 +iface vmbr3 inet static + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr3 + vrf vrf1 + +#interconnect vxlan-vfr l3vni +auto vxlan4000 +iface vxlan4000 inet manual + vxlan-id 4000 + vxlan-local-tunnelip 192.168.0.1 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + + +auto vmbr4000 +iface vmbr4000 inet manual + bridge_ports vxlan4000 + bridge_stp off + bridge_fd 0 + hwaddress 44:39:39:FF:40:90 #must be different on each node + vrf vrf1 +---- + +frr.conf + +---- +vrf vrf1 + vni 4000 +! +router bgp 1234 + bgp router-id 192.168.0.1 + no bgp default ipv4-unicast + coalesce-time 1000 + neighbor 192.168.0.2 remote-as 1234 + neighbor 192.168.0.3 remote-as 1234 + ! + address-family l2vpn evpn + neighbor 192.168.0.2 activate + neighbor 192.168.0.3 activate + advertise-all-vni + exit-address-family +! +router bgp 1234 vrf vrf1 +! + bgp router-id 192.168.0.1 + ! + address-family ipv4 unicast + redistribute connected + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast + exit-address-family +! +line vty +! +---- + + +* node2 + +---- +auto vrf1 +iface vrf1 + vrf-table auto + +auto eno1 +iface eno1 inet manual + mtu 1550 + +auto vmbr0 +iface vmbr0 inet static + address 192.168.0.2 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + +auto vxlan2 +iface vxlan2 inet manual + vxlan-id 2 + vxlan-local-tunnelip 192.168.0.2 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr2 +iface vmbr2 inet static + bridge_ports vxlan2 + bridge_stp off + bridge_fd 0 + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr2 + vrf vrf1 + +auto vxlan3 +iface vxlan3 inet manual + vxlan-id 3 + vxlan-local-tunnelip 192.168.0.2 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr3 +iface vmbr3 inet static + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr3 + vrf vrf1 + +#interconnect vxlan-vfr l3vni +auto vxlan4000 +iface vxlan4000 inet manual + vxlan-id 4000 + vxlan-local-tunnelip 192.168.0.2 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + + +auto vmbr4000 +iface vmbr4000 inet manual + bridge_ports vxlan4000 + bridge_stp off + bridge_fd 0 + hwaddress 44:39:39:FF:40:91 #must be different on each node + vrf vrf1 +---- + + +frr.conf + +---- +vrf vrf1 + vni 4000 +! +router bgp 1234 + bgp router-id 192.168.0.2 + no bgp default ipv4-unicast + coalesce-time 1000 + neighbor 192.168.0.1 remote-as 1234 + neighbor 192.168.0.3 remote-as 1234 + ! + address-family l2vpn evpn + neighbor 192.168.0.1 activate + neighbor 192.168.0.3 activate + advertise-all-vni + exit-address-family +! +router bgp 1234 vrf vrf1 +! + bgp router-id 192.168.0.2 + ! + address-family ipv4 unicast + redistribute connected + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast + exit-address-family +! +line vty +! +---- + + +* node3 + +---- +auto vrf1 +iface vrf1 + vrf-table auto + +auto eno1 +iface eno1 inet manual + mtu 1550 + +auto vmbr0 +iface vmbr0 inet static + address 192.168.0.3 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + +auto vxlan2 +iface vxlan2 inet manual + vxlan-id 2 + vxlan-local-tunnelip 192.168.0.3 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr2 +iface vmbr2 inet static + bridge_ports vxlan2 + bridge_stp off + bridge_fd 0 + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr2 + vrf vrf1 + +auto vxlan3 +iface vxlan3 inet manual + vxlan-id 3 + vxlan-local-tunnelip 192.168.0.3 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr3 +iface vmbr3 inet static + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr3 + vrf vrf1 + +#interconnect vxlan-vfr l3vni +auto vxlan4000 +iface vxlan4000 inet manual + vxlan-id 4000 + vxlan-local-tunnelip 192.168.0.3 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + + +auto vmbr4000 +iface vmbr4000 inet manual + bridge_ports vxlan4000 + bridge_stp off + bridge_fd 0 + hwaddress 44:39:39:FF:40:92 #must be different on each node + vrf vrf1 +---- + + +frr.conf + +---- +vrf vrf1 + vni 4000 +! +router bgp 1234 + bgp router-id 192.168.0.3 + no bgp default ipv4-unicast + coalesce-time 1000 + neighbor 192.168.0.1 remote-as 1234 + neighbor 192.168.0.2 remote-as 1234 + ! + address-family l2vpn evpn + neighbor 192.168.0.1 activate + neighbor 192.168.0.2 activate + advertise-all-vni + exit-address-family +! +router bgp 1234 vrf vrf1 +! + bgp router-id 192.168.0.3 + ! + address-family ipv4 unicast + redistribute connected + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast + exit-address-family +! +line vty +! +---- + +VXLAN layer3 routing with anycast gateway + routing to outside with external router +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Routing to outside need the symmetric model. + +1 gateway node +^^^^^^^^^^^^^^ +In this example, we'll use only 1 proxmox node as exit gateway. (node1) +This node have a simple default gw in the vrf to the external router (no bgp between router and node1) +and announce this default gw to other proxmox nodes. + + +*node1 + +---- +auto vrf1 +iface vrf1 + vrf-table auto + +auto eno1 +iface eno1 inet manual + mtu 1550 + +auto vmbr0 +iface vmbr0 inet static + address 192.168.0.1 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + +auto eno2 +iface eno2 + address 172.16.0.1 + netmask 255.255.255.0 + vrf vrf1 + post-up ip route add default via 172.16.0.254 dev eno2 vrf vrf1 + #if you have multiple external routers, you can use ecmp balancing + #post-up route add default nexthop via 172.16.0.253 dev eno2 vrf vrf1 nexthop via 172.16.0.254 dev eno2 vrf vrf1 + +auto vxlan2 +iface vxlan2 inet manual + vxlan-id 2 + vxlan-local-tunnelip 192.168.0.1 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr2 +iface vmbr2 inet static + bridge_ports vxlan2 + bridge_stp off + bridge_fd 0 + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr2 + vrf vrf1 + +auto vxlan3 +iface vxlan3 inet manual + vxlan-id 3 + vxlan-local-tunnelip 192.168.0.1 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr3 +iface vmbr3 inet static + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr3 + vrf vrf1 + +#interconnect vxlan-vfr l3vni +auto vxlan4000 +iface vxlan4000 inet manual + vxlan-id 4000 + vxlan-local-tunnelip 192.168.0.1 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr4000 +iface vmbr4000 inet manual + bridge_ports vxlan4000 + bridge_stp off + bridge_fd 0 + hwaddress 44:39:39:FF:40:90 #must be different on each node + vrf vrf1 +---- + + +frr.conf + +---- +vrf vrf1 + vni 4000 +! +router bgp 1234 + bgp router-id 192.168.0.1 + no bgp default ipv4-unicast + coalesce-time 1000 + neighbor 192.168.0.2 remote-as 1234 + neighbor 192.168.0.3 remote-as 1234 + ! + address-family l2vpn evpn + neighbor 192.168.0.2 activate + neighbor 192.168.0.3 activate + advertise-all-vni + exit-address-family +! +router bgp 1234 vrf vrf1 +! + bgp router-id 172.16.0.1 + ! + address-family ipv4 unicast + redistribute connected + redistribute kernel !announce your default gw to all nodes + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast + exit-address-family +! +line vty +! +---- + + +* node2 + +---- +auto vrf1 +iface vrf1 + vrf-table auto + +auto eno1 +iface eno1 inet manual + mtu 1550 + +auto vmbr0 +iface vmbr0 inet static + address 192.168.0.2 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + +auto vxlan2 +iface vxlan2 inet manual + vxlan-id 2 + vxlan-local-tunnelip 192.168.0.2 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr2 +iface vmbr2 inet static + bridge_ports vxlan2 + bridge_stp off + bridge_fd 0 + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr2 + vrf vrf1 + +auto vxlan3 +iface vxlan3 inet manual + vxlan-id 3 + vxlan-local-tunnelip 192.168.0.2 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr3 +iface vmbr3 inet static + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr3 + vrf vrf1 + +#interconnect vxlan-vfr l3vni +auto vxlan4000 +iface vxlan4000 inet manual + vxlan-id 4000 + vxlan-local-tunnelip 192.168.0.2 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + + +auto vmbr4000 +iface vmbr4000 inet manual + bridge_ports vxlan4000 + bridge_stp off + bridge_fd 0 + hwaddress 44:39:39:FF:40:91 #must be different on each node + vrf vrf1 +---- + + +frr.conf + +---- +vrf vrf1 + vni 4000 +! +router bgp 1234 + bgp router-id 192.168.0.2 + no bgp default ipv4-unicast + coalesce-time 1000 + neighbor 192.168.0.1 remote-as 1234 + neighbor 192.168.0.3 remote-as 1234 + ! + address-family l2vpn evpn + neighbor 192.168.0.1 activate + neighbor 192.168.0.3 activate + advertise-all-vni + exit-address-family +! +router bgp 1234 vrf vrf1 +! + bgp router-id 192.168.0.2 + ! + address-family ipv4 unicast + redistribute connected + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast + exit-address-family +! +line vty +! +---- + + +* node3 + +---- +auto vrf1 +iface vrf1 + vrf-table auto + +auto eno1 +iface eno1 inet manual + mtu 1550 + +auto vmbr0 +iface vmbr0 inet static + address 192.168.0.3 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + +auto vxlan2 +iface vxlan2 inet manual + vxlan-id 2 + vxlan-local-tunnelip 192.168.0.3 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr2 +iface vmbr2 inet static + bridge_ports vxlan2 + bridge_stp off + bridge_fd 0 + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr2 + vrf vrf1 + +auto vxlan3 +iface vxlan3 inet manual + vxlan-id 3 + vxlan-local-tunnelip 192.168.0.3 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr3 +iface vmbr3 inet static + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr3 + vrf vrf1 + +#interconnect vxlan-vfr l3vni +auto vxlan4000 +iface vxlan4000 inet manual + vxlan-id 4000 + vxlan-local-tunnelip 192.168.0.3 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + + +auto vmbr4000 +iface vmbr4000 inet manual + bridge_ports vxlan4000 + bridge_stp off + bridge_fd 0 + hwaddress 44:39:39:FF:40:92 #must be different on each node + vrf vrf1 +---- + + +frr.conf + +---- +vrf vrf1 + vni 4000 +! +router bgp 1234 + bgp router-id 192.168.0.3 + no bgp default ipv4-unicast + coalesce-time 1000 + neighbor 192.168.0.1 remote-as 1234 + neighbor 192.168.0.2 remote-as 1234 + ! + address-family l2vpn evpn + neighbor 192.168.0.1 activate + neighbor 192.168.0.2 activate + advertise-all-vni + exit-address-family +! +router bgp 1234 vrf vrf1 +! + bgp router-id 192.168.0.3 + ! + address-family ipv4 unicast + redistribute connected + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast + exit-address-family +! +line vty +! +---- + +multiple gateway nodes +^^^^^^^^^^^^^^^^^^^^^^ +In this example, all nodes will be used as exit gateway. (But you can use only 2 nodes if you want) +All nodes have a simple default gw in the vrf to the external router (no bgp between router and node1) +and announce this default gw. +The external router have ecmp routes to all proxmox nodes.(balancing). +If the router send the packet to a wrong node (vm is not on this node), this node will route through +vxlan the packet to final destination. + +*node1 + +---- +auto vrf1 +iface vrf1 + vrf-table auto + +auto eno1 +iface eno1 inet manual + mtu 1550 + +auto vmbr0 +iface vmbr0 inet static + address 192.168.0.1 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + +auto eno2 +iface eno2 + address 172.16.0.1 + netmask 255.255.255.0 + vrf vrf1 + mtu 1550 + post-up ip route add default via 172.16.0.254 dev eno2 vrf vrf1 + #if you have multiple external routers, you can use ecmp balancing + #post-up route add default nexthop via 172.16.0.253 dev eno2 vrf vrf1 nexthop via 172.16.0.254 dev eno2 vrf vrf1 + +auto vxlan2 +iface vxlan2 inet manual + vxlan-id 2 + vxlan-local-tunnelip 192.168.0.1 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr2 +iface vmbr2 inet static + bridge_ports vxlan2 + bridge_stp off + bridge_fd 0 + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr2 + vrf vrf1 + +auto vxlan3 +iface vxlan3 inet manual + vxlan-id 3 + vxlan-local-tunnelip 192.168.0.1 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr3 +iface vmbr3 inet static + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr3 + vrf vrf1 + +#interconnect vxlan-vfr l3vni +auto vxlan4000 +iface vxlan4000 inet manual + vxlan-id 4000 + vxlan-local-tunnelip 192.168.0.1 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr4000 +iface vmbr4000 inet manual + bridge_ports vxlan4000 + bridge_stp off + bridge_fd 0 + hwaddress 44:39:39:FF:40:90 #must be different on each node + vrf vrf1 +---- + + +frr.conf + +---- +vrf vrf1 + vni 4000 +! +router bgp 1234 + bgp router-id 192.168.0.1 + no bgp default ipv4-unicast + coalesce-time 1000 + neighbor 192.168.0.2 remote-as 1234 + neighbor 192.168.0.3 remote-as 1234 + ! + address-family l2vpn evpn + neighbor 192.168.0.2 activate + neighbor 192.168.0.3 activate + advertise-all-vni + exit-address-family +! +router bgp 1234 vrf vrf1 +! + bgp router-id 172.16.0.1 + ! + address-family ipv4 unicast + redistribute connected + redistribute kernel !announce your default gw to all nodes + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast + exit-address-family +! +line vty +! +---- + + +* node2 + +---- +auto vrf1 +iface vrf1 + vrf-table auto + +auto eno1 +iface eno1 inet manual + mtu 1550 + +auto vmbr0 +iface vmbr0 inet static + address 192.168.0.2 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + +auto eno2 +iface eno2 + address 172.16.0.3 + netmask 255.255.255.0 + vrf vrf1 + mtu 1550 + post-up ip route add default via 172.16.0.254 dev eno2 vrf vrf1 + #if you have multiple external routers, you can use ecmp balancing + #post-up route add default nexthop via 172.16.0.253 dev eno2 vrf vrf1 nexthop via 172.16.0.254 dev eno2 vrf vrf1 + +auto vxlan2 +iface vxlan2 inet manual + vxlan-id 2 + vxlan-local-tunnelip 192.168.0.2 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr2 +iface vmbr2 inet static + bridge_ports vxlan2 + bridge_stp off + bridge_fd 0 + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr2 + vrf vrf1 + +auto vxlan3 +iface vxlan3 inet manual + vxlan-id 3 + vxlan-local-tunnelip 192.168.0.2 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr3 +iface vmbr3 inet static + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr3 + vrf vrf1 + +#interconnect vxlan-vfr l3vni +auto vxlan4000 +iface vxlan4000 inet manual + vxlan-id 4000 + vxlan-local-tunnelip 192.168.0.2 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + + +auto vmbr4000 +iface vmbr4000 inet manual + bridge_ports vxlan4000 + bridge_stp off + bridge_fd 0 + hwaddress 44:39:39:FF:40:91 #must be different on each node + vrf vrf1 +---- + + +frr.conf + +---- +vrf vrf1 + vni 4000 +! +router bgp 1234 + bgp router-id 192.168.0.2 + no bgp default ipv4-unicast + coalesce-time 1000 + neighbor 192.168.0.1 remote-as 1234 + neighbor 192.168.0.3 remote-as 1234 + ! + address-family l2vpn evpn + neighbor 192.168.0.1 activate + neighbor 192.168.0.3 activate + advertise-all-vni + exit-address-family +! +router bgp 1234 vrf vrf1 +! + bgp router-id 172.16.0.2 + ! + address-family ipv4 unicast + redistribute connected + redistribute kernel !announce your default gw to all nodes + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast + exit-address-family +! +line vty +! +---- + + +* node3 + +---- +auto vrf1 +iface vrf1 + vrf-table auto + +auto eno1 +iface eno1 inet manual + mtu 1550 + +auto vmbr0 +iface vmbr0 inet static + address 192.168.0.3 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + +auto eno2 +iface eno2 + address 172.16.0.3 + netmask 255.255.255.0 + vrf vrf1 + mtu 1550 + post-up ip route add default via 172.16.0.254 dev eno2 vrf vrf1 + #if you have multiple external routers, you can use ecmp balancing + #post-up route add default nexthop via 172.16.0.253 dev eno2 vrf vrf1 nexthop via 172.16.0.254 dev eno2 vrf vrf1 + +auto vxlan2 +iface vxlan2 inet manual + vxlan-id 2 + vxlan-local-tunnelip 192.168.0.3 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr2 +iface vmbr2 inet static + bridge_ports vxlan2 + bridge_stp off + bridge_fd 0 + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr2 + vrf vrf1 + +auto vxlan3 +iface vxlan3 inet manual + vxlan-id 3 + vxlan-local-tunnelip 192.168.0.3 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr3 +iface vmbr3 inet static + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr3 + vrf vrf1 + +#interconnect vxlan-vfr l3vni +auto vxlan4000 +iface vxlan4000 inet manual + vxlan-id 4000 + vxlan-local-tunnelip 192.168.0.3 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + + +auto vmbr4000 +iface vmbr4000 inet manual + bridge_ports vxlan4000 + bridge_stp off + bridge_fd 0 + hwaddress 44:39:39:FF:40:92 #must be different on each node + vrf vrf1 +---- + + +frr.conf + +---- +vrf vrf1 + vni 4000 +! +router bgp 1234 + bgp router-id 192.168.0.3 + no bgp default ipv4-unicast + coalesce-time 1000 + neighbor 192.168.0.1 remote-as 1234 + neighbor 192.168.0.2 remote-as 1234 + ! + address-family l2vpn evpn + neighbor 192.168.0.1 activate + neighbor 192.168.0.2 activate + advertise-all-vni + exit-address-family +! +router bgp 1234 vrf vrf1 +! + bgp router-id 172.16.0.3 + ! + address-family ipv4 unicast + redistribute connected + redistribute kernel !announce your default gw to all nodes + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast + exit-address-family +! +line vty +! +---- + +Note +^^^^ + +If your external router don't support ecmp to reach multiple proxmox nodes, +you can setup an HA floating vip on proxmox nodes with vrrp + +I this example, we will setup an floating 172.16.0.10 ip on node1 and node2. +Node1 is the primary and failover to node2 in case of failure. + + +* node1 + +---- +auto eno2 +iface eno2 + address 172.16.0.1 + netmask 255.255.255.0 + vrf vrf1 + mtu 1550 + post-up ip route add default via 172.16.0.254 dev eno2 vrf vrf1 + vrrp-id 1 + vrrp-priority 1 + vrrp-virtual-ip 172.16.0.10 +---- + +* node2 + +---- +auto eno2 +iface eno2 + address 172.16.0.2 + netmask 255.255.255.0 + mtu 1550 + vrf vrf1 + post-up ip route add default via 172.16.0.254 dev eno2 vrf vrf1 + vrrp-id 1 + vrrp-priority 2 + vrrp-virtual-ip 172.16.0.10 +---- + +