X-Git-Url: https://git.proxmox.com/?p=pve-docs.git;a=blobdiff_plain;f=vxlan-and-evpn.adoc;h=5115d787d66a266ba243af7598e46aa1e8144080;hp=703cd8b0242774bedac67fa0e8f920c10c24e793;hb=4ccb911c76017fed3c649586c39c7b8b79448619;hpb=1821b137ad48df7a9cee1d0c65db238606e12876 diff --git a/vxlan-and-evpn.adoc b/vxlan-and-evpn.adoc index 703cd8b..5115d78 100644 --- a/vxlan-and-evpn.adoc +++ b/vxlan-and-evpn.adoc @@ -16,6 +16,9 @@ while accommodating a very large number of tenants. It is defined in RFC 7348. Each overlay network is known as a VXLAN Segment and identified by a unique 24-bit segment ID called a VXLAN Network Identifier (VNI). +VXLAN encapsulation add 50bytes overhead, so you need to increase mtu on your host +physical interfaces to 1550 at minimum. (or decrease mtu inside your vms to 1450) + For BUM traffic (broadcast / unknown unicast traffic, multicast), we have 3 differents vxlan setup modes : multicast, unicast, bgp-evpn @@ -36,6 +39,7 @@ remote VTEPs will get the packet and answer accordingly direct to the originatin ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -74,6 +78,7 @@ iface vmbr3 inet manual ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -113,6 +118,7 @@ iface vmbr3 inet manual ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -161,6 +167,7 @@ The VXLAN device will still learn remote addresses automatically using source-ad ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -203,6 +210,7 @@ iface vmbr3 inet manual ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -244,6 +252,7 @@ iface vmbr3 inet manual ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -296,6 +305,7 @@ it's possible to use external bgp route reflector servers. ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -363,6 +373,7 @@ line vty ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -429,6 +440,7 @@ line vty ---- auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -490,240 +502,96 @@ line vty ! ---- +VXLAN layer3 routing with anycast gateway +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -VXLAN layer2 with vlan aware linux bridges -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -We use 1 vmbr bridge, each vxlan is mapped to a vlan - -image::images/vxlan-l2-vlanaware.svg["vxlan l2 bridge vlan aware",align="center"] - -multicast mode -^^^^^^^^^^^^^^ - -* node1 - ----- -auto eno1 -iface eno1 inet manual - -auto vmbr0 -iface vmbr0 inet static - address 192.168.0.1 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes - -auto vxlan2 -iface vxlan2 inet manual - vxlan-svcnodeip 225.20.1.1 - vxlan-physdev eno1 - bridge-access 2 - -auto vxlan3 -iface vxlan3 inet manual - vxlan-svcnodeip 225.20.1.1 - vxlan-physdev eno1 - bridge-access 3 ----- - +With this need, each vmbr bridge will be the gateway for the vm. +Same vmbr on different node, will have same ip address and same mac address, +to have working vm live migration and no network disruption. -* node2 +VXLAN layer3 routing only work with FRR and non-aware bridge. +(vlan aware bridge support is buggy currently). ----- -auto eno1 -iface eno1 inet manual +asymmetric model +^^^^^^^^^^^^^^^^ -auto vmbr0 -iface vmbr0 inet static - address 192.168.0.2 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes +This is the simplest mode. To get it work, all vxlan need to be defined on all nodes. -auto vxlan2 -iface vxlan2 inet manual - vxlan-svcnodeip 225.20.1.1 - vxlan-physdev eno1 - bridge-access 2 +The asymmetric model allows routing and bridging on the VXLAN tunnel ingress, +but only bridging on the egress. +This results in bi-directional VXLAN traffic traveling on different VNIs +in each direction (always the destination VNI) across the routed infrastructure. -auto vxlan3 -iface vxlan3 inet manual - vxlan-svcnodeip 225.20.1.1 - vxlan-physdev eno1 - bridge-access 3 ----- +image::images/vxlan-l3-asymmetric.svg["vxlan l3 asymmetric",align="center"] -* node3 +sysctl.conf tuning ---- -auto eno1 -iface eno1 inet manual - -auto vmbr0 -iface vmbr0 inet static - address 192.168.0.3 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes - -auto vxlan2 -iface vxlan2 inet manual - vxlan-svcnodeip 225.20.1.1 - vxlan-physdev eno1 - bridge-access 2 - -auto vxlan3 -iface vxlan3 inet manual - vxlan-svcnodeip 225.20.1.1 - vxlan-physdev eno1 - bridge-access 3 +#enable routing +net.ipv4.ip_forward=1 +net.ipv6.conf.all.forwarding=1 ---- - -unicast mode -^^^^^^^^^^^^ - * node1 ---- auto eno1 iface eno1 inet manual - -auto vmbr0 -iface vmbr0 inet static - address 192.168.0.1 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes - -auto vxlan2 -iface vxlan2 inet manual - vxlan_remoteip 192.168.0.2 - vxlan_remoteip 192.168.0.3 - bridge-access 2 - -auto vxlan3 -iface vxlan3 inet manual - vxlan_remoteip 192.168.0.2 - vxlan_remoteip 192.168.0.3 - bridge-access 3 ----- - - -* node2 - ----- -auto eno1 -iface eno1 inet manual - + mtu 1550 + auto vmbr0 iface vmbr0 inet static - address 192.168.0.2 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes - + address 192.168.0.1 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + auto vxlan2 iface vxlan2 inet manual - vxlan_remoteip 192.168.0.1 - vxlan_remoteip 192.168.0.3 - bridge-access 2 - -auto vxlan3 -iface vxlan3 inet manual - vxlan_remoteip 192.168.0.1 - vxlan_remoteip 192.168.0.3 - bridge-access 3 ----- - - -* node3 + vxlan-local-tunnelip 192.168.0.1 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off ----- -auto eno1 -iface eno1 inet manual -auto vmbr0 -iface vmbr0 inet static - address 192.168.0.3 +auto vmbr2 +iface vmbr2 inet static + address 10.0.2.254 netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 + hwaddress 44:39:39:FF:40:94 + bridge_ports vxlan2 bridge_stp off bridge_fd 0 - bridge_vlan_aware yes -auto vxlan2 -iface vxlan2 inet manual - vxlan_remoteip 192.168.0.2 - vxlan_remoteip 192.168.0.3 - bridge-access 2 auto vxlan3 iface vxlan3 inet manual - vxlan_remoteip 192.168.0.2 - vxlan_remoteip 192.168.0.3 - bridge-access 3 ----- - - -bgp-evpn -^^^^^^^^ - -Note: currently FRR is working only with 1 vlan aware bridge - -* node1 - - ----- -auto eno1 -iface eno1 inet manual - -auto vmbr0 -iface vmbr0 inet static - address 192.168.0.1 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes - -auto vxlan0 -iface vxlan0 inet manual vxlan-local-tunnelip 192.168.0.1 bridge-learning off bridge-arp-nd-suppress on bridge-unicast-flood off bridge-multicast-flood off - bridge-access 2 -auto vxlan3 -iface vxlan3 inet manual - vxlan-local-tunnelip 192.168.0.1 - bridge-learning off - bridge-arp-nd-suppress on - bridge-unicast-flood off - bridge-multicast-flood off - bridge-access 3 +auto vmbr3 +iface vmbr3 inet static + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 ---- -/etc/frr/frr.conf +frr.conf ---- router bgp 1234 + bgp router-id 192.168.0.1 no bgp default ipv4-unicast coalesce-time 1000 neighbor 192.168.0.2 remote-as 1234 @@ -732,7 +600,7 @@ router bgp 1234 address-family l2vpn evpn neighbor 192.168.0.2 activate neighbor 192.168.0.3 activate - advertise-all-vni + advertise-all-vni exit-address-family ! line vty @@ -745,24 +613,33 @@ line vty ---- auto eno1 iface eno1 inet manual - + mtu 1550 + auto vmbr0 iface vmbr0 inet static - address 192.168.0.2 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes - -auto vxlan0 -iface vxlan0 inet manual + address 192.168.0.2 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + +auto vxlan2 +iface vxlan2 inet manual vxlan-local-tunnelip 192.168.0.2 bridge-learning off bridge-arp-nd-suppress on bridge-unicast-flood off bridge-multicast-flood off - bridge-access 2 + + +auto vmbr2 +iface vmbr2 inet static + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 + bridge_ports vxlan2 + bridge_stp off + bridge_fd 0 auto vxlan3 @@ -772,14 +649,24 @@ iface vxlan3 inet manual bridge-arp-nd-suppress on bridge-unicast-flood off bridge-multicast-flood off - bridge-access 3 + + +auto vmbr3 +iface vmbr3 inet static + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 ---- -/etc/frr/frr.conf +frr.conf ---- router bgp 1234 + bgp router-id 192.168.0.2 no bgp default ipv4-unicast coalesce-time 1000 neighbor 192.168.0.1 remote-as 1234 @@ -788,7 +675,7 @@ router bgp 1234 address-family l2vpn evpn neighbor 192.168.0.1 activate neighbor 192.168.0.3 activate - advertise-all-vni + advertise-all-vni exit-address-family ! line vty @@ -801,24 +688,33 @@ line vty ---- auto eno1 iface eno1 inet manual - + mtu 1550 + auto vmbr0 iface vmbr0 inet static - address 192.168.0.3 - netmask 255.255.255.0 - bridge_ports eno1 vxlan2 vxlan3 - bridge_stp off - bridge_fd 0 - bridge_vlan_aware yes - -auto vxlan0 -iface vxlan0 inet manual - vxlan-local-tunnelip 192.168.0.3 - bridge-learning off - bridge-arp-nd-suppress on - bridge-unicast-flood off + address 192.168.0.3 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + +auto vxlan2 +iface vxlan2 inet manual + vxlan-local-tunnelip 192.168.0.3 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off bridge-multicast-flood off - bridge-access 2 + + +auto vmbr2 +iface vmbr2 inet static + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 + bridge_ports vxlan2 + bridge_stp off + bridge_fd 0 auto vxlan3 @@ -828,13 +724,24 @@ iface vxlan3 inet manual bridge-arp-nd-suppress on bridge-unicast-flood off bridge-multicast-flood off - bridge-access 3 + + +auto vmbr3 +iface vmbr3 inet static + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 ---- -/etc/frr/frr.conf +frr.conf + ---- router bgp 1234 + bgp router-id 192.168.0.3 no bgp default ipv4-unicast coalesce-time 1000 neighbor 192.168.0.1 remote-as 1234 @@ -843,23 +750,29 @@ router bgp 1234 address-family l2vpn evpn neighbor 192.168.0.1 activate neighbor 192.168.0.2 activate - advertise-all-vni + advertise-all-vni exit-address-family ! line vty ! ---- -VXLAN layer3 routing with anycast gateway -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -With this need, each vmbr bridge will be the gateway for the vm. -Same vmbr on different node, will have same ip address and same mac address, -to have working vm live migration and no network disruption. +symmetric model +^^^^^^^^^^^^^^^ -VXLAN layer3 routing only work with FRR and non-aware bridge. -(vlan aware bridge support is buggy currently). +With this model, you don't need to have all vxlan on all nodes. +This model will also be needed to route traffic to an external router. + +The symmetric model routes and bridges on both the ingress and the egress leafs. +This results in bi-directional traffic being able to travel on the same VNI, hence the symmetric name. +However, a new specialty transit VNI is used for all routed VXLAN traffic, called the L3VNI. +All traffic that needs to be routed will be routed onto the L3VNI, tunneled across the layer 3 Infrastructure, +routed off the L3VNI to the appropriate VLAN and ultimately bridged to the destination. + +A vrf is needed for the L3VNI, so all vmbr bridge need to be in the vrf if they want to be able to reach each others. +image::images/vxlan-l3-symmetric.svg["vxlan l3 symmetric",align="center"] sysctl.conf tuning @@ -870,29 +783,361 @@ net.ipv6.conf.all.forwarding=1 #disable reverse path filtering net.ipv4.conf.default.rp_filter=0 net.ipv4.conf.all.rp_filter=0 -#allow frr to work with vrf -net.ipv4.tcp_l3mdev_accept=1 ---- -asymmetric model -^^^^^^^^^^^^^^^^ - -This is the simplest mode. To get it work, all vxlan need to be defined on all nodes. - -The asymmetric model allows routing and bridging on the VXLAN tunnel ingress, -but only bridging on the egress. -This results in bi-directional VXLAN traffic traveling on different VNIs -in each direction (always the destination VNI) across the routed infrastructure. +* node1 + +---- +auto vrf1 +iface vrf1 + vrf-table auto + +auto eno1 +iface eno1 inet manual + mtu 1550 + +auto vmbr0 +iface vmbr0 inet static + address 192.168.0.1 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + +auto vxlan2 +iface vxlan2 inet manual + vxlan-local-tunnelip 192.168.0.1 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr2 +iface vmbr2 inet static + bridge_ports vxlan2 + bridge_stp off + bridge_fd 0 + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr2 + vrf vrf1 + +auto vxlan3 +iface vxlan3 inet manual + vxlan-local-tunnelip 192.168.0.1 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr3 +iface vmbr3 inet static + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr3 + vrf vrf1 + +#interconnect vxlan-vfr l3vni +auto vxlan4000 +iface vxlan4000 inet manual + vxlan-local-tunnelip 192.168.0.1 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + + +auto vmbr4000 +iface vmbr4000 inet manual + bridge_ports vxlan4000 + bridge_stp off + bridge_fd 0 + hwaddress 44:39:39:FF:40:90 #must be different on each node + vrf vrf1 +---- + +frr.conf + +---- +vrf vrf1 + vni 4000 +! +router bgp 1234 + bgp router-id 192.168.0.1 + no bgp default ipv4-unicast + coalesce-time 1000 + neighbor 192.168.0.2 remote-as 1234 + neighbor 192.168.0.3 remote-as 1234 + ! + address-family l2vpn evpn + neighbor 192.168.0.2 activate + neighbor 192.168.0.3 activate + advertise-all-vni + exit-address-family +! +router bgp 1234 vrf vrf1 +! + bgp router-id 192.168.0.1 + ! + address-family ipv4 unicast + redistribute connected + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast + exit-address-family +! +line vty +! +---- + + +* node2 + +---- +auto vrf1 +iface vrf1 + vrf-table auto + +auto eno1 +iface eno1 inet manual + mtu 1550 + +auto vmbr0 +iface vmbr0 inet static + address 192.168.0.2 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + +auto vxlan2 +iface vxlan2 inet manual + vxlan-local-tunnelip 192.168.0.2 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr2 +iface vmbr2 inet static + bridge_ports vxlan2 + bridge_stp off + bridge_fd 0 + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr2 + vrf vrf1 + +auto vxlan3 +iface vxlan3 inet manual + vxlan-local-tunnelip 192.168.0.2 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr3 +iface vmbr3 inet static + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr3 + vrf vrf1 + +#interconnect vxlan-vfr l3vni +auto vxlan4000 +iface vxlan4000 inet manual + vxlan-local-tunnelip 192.168.0.2 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + + +auto vmbr4000 +iface vmbr4000 inet manual + bridge_ports vxlan4000 + bridge_stp off + bridge_fd 0 + hwaddress 44:39:39:FF:40:91 #must be different on each node + vrf vrf1 +---- + + +frr.conf + +---- +vrf vrf1 + vni 4000 +! +router bgp 1234 + bgp router-id 192.168.0.2 + no bgp default ipv4-unicast + coalesce-time 1000 + neighbor 192.168.0.1 remote-as 1234 + neighbor 192.168.0.3 remote-as 1234 + ! + address-family l2vpn evpn + neighbor 192.168.0.1 activate + neighbor 192.168.0.3 activate + advertise-all-vni + exit-address-family +! +router bgp 1234 vrf vrf1 +! + bgp router-id 192.168.0.2 + ! + address-family ipv4 unicast + redistribute connected + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast + exit-address-family +! +line vty +! +---- + + +* node3 + +---- +auto vrf1 +iface vrf1 + vrf-table auto + +auto eno1 +iface eno1 inet manual + mtu 1550 + +auto vmbr0 +iface vmbr0 inet static + address 192.168.0.3 + netmask 255.255.255.0 + bridge_ports eno1 + bridge_stp off + bridge_fd 0 + +auto vxlan2 +iface vxlan2 inet manual + vxlan-local-tunnelip 192.168.0.3 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr2 +iface vmbr2 inet static + bridge_ports vxlan2 + bridge_stp off + bridge_fd 0 + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr2 + vrf vrf1 + +auto vxlan3 +iface vxlan3 inet manual + vxlan-local-tunnelip 192.168.0.3 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr3 +iface vmbr3 inet static + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 + address 10.0.3.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr3 + vrf vrf1 + +#interconnect vxlan-vfr l3vni +auto vxlan4000 +iface vxlan4000 inet manual + vxlan-local-tunnelip 192.168.0.3 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + + +auto vmbr4000 +iface vmbr4000 inet manual + bridge_ports vxlan4000 + bridge_stp off + bridge_fd 0 + hwaddress 44:39:39:FF:40:92 #must be different on each node + vrf vrf1 +---- + + +frr.conf + +---- +vrf vrf1 + vni 4000 +! +router bgp 1234 + bgp router-id 192.168.0.3 + no bgp default ipv4-unicast + coalesce-time 1000 + neighbor 192.168.0.1 remote-as 1234 + neighbor 192.168.0.2 remote-as 1234 + ! + address-family l2vpn evpn + neighbor 192.168.0.1 activate + neighbor 192.168.0.2 activate + advertise-all-vni + exit-address-family +! +router bgp 1234 vrf vrf1 +! + bgp router-id 192.168.0.3 + ! + address-family ipv4 unicast + redistribute connected + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast + exit-address-family +! +line vty +! +---- + +VXLAN layer3 routing with anycast gateway + routing to outside with external router +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Routing to outside need the symmetric model. -image::images/vxlan-l3-asymmetric.svg["vxlan l3 asymmetric",align="center"] +1 gateway node +^^^^^^^^^^^^^^ +In this example, we'll use only 1 proxmox node as exit gateway. (node1) +This node have a simple default gw in the vrf to the external router (no bgp between router and node1) +and announce this default gw to other proxmox nodes. -* node1 +*node1 ---- +auto vrf1 +iface vrf1 + vrf-table auto + auto eno1 iface eno1 inet manual - + mtu 1550 + auto vmbr0 iface vmbr0 inet static address 192.168.0.1 @@ -900,7 +1145,16 @@ iface vmbr0 inet static bridge_ports eno1 bridge_stp off bridge_fd 0 - + +auto eno2 +iface eno2 + address 172.16.0.1 + netmask 255.255.255.0 + vrf vrf1 + post-up ip route add default via 172.16.0.254 dev eno2 vrf vrf1 + #if you have multiple external routers, you can use ecmp balancing + #post-up route add default nexthop via 172.16.0.253 dev eno2 vrf vrf1 nexthop via 172.16.0.254 dev eno2 vrf vrf1 + auto vxlan2 iface vxlan2 inet manual vxlan-local-tunnelip 192.168.0.1 @@ -909,16 +1163,15 @@ iface vxlan2 inet manual bridge-unicast-flood off bridge-multicast-flood off - auto vmbr2 iface vmbr2 inet static - address 10.0.2.254 - netmask 255.255.255.0 - hwaddress 44:39:39:FF:40:94 bridge_ports vxlan2 bridge_stp off bridge_fd 0 - + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr2 + vrf vrf1 auto vxlan3 iface vxlan3 inet manual @@ -928,21 +1181,41 @@ iface vxlan3 inet manual bridge-unicast-flood off bridge-multicast-flood off - auto vmbr3 iface vmbr3 inet static + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 address 10.0.3.254 netmask 255.255.255.0 - hwaddress 44:39:39:FF:40:94 - bridge_ports vxlan3 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr3 + vrf vrf1 + +#interconnect vxlan-vfr l3vni +auto vxlan4000 +iface vxlan4000 inet manual + vxlan-local-tunnelip 192.168.0.1 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + +auto vmbr4000 +iface vmbr4000 inet manual + bridge_ports vxlan4000 bridge_stp off bridge_fd 0 + hwaddress 44:39:39:FF:40:90 #must be different on each node + vrf vrf1 ---- frr.conf ---- +vrf vrf1 + vni 4000 +! router bgp 1234 bgp router-id 192.168.0.1 no bgp default ipv4-unicast @@ -953,7 +1226,20 @@ router bgp 1234 address-family l2vpn evpn neighbor 192.168.0.2 activate neighbor 192.168.0.3 activate - advertise-all-vni + advertise-all-vni + exit-address-family +! +router bgp 1234 vrf vrf1 +! + bgp router-id 172.16.0.1 + ! + address-family ipv4 unicast + redistribute connected + redistribute kernel !announce your default gw to all nodes + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast exit-address-family ! line vty @@ -964,8 +1250,13 @@ line vty * node2 ---- +auto vrf1 +iface vrf1 + vrf-table auto + auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -974,7 +1265,7 @@ iface vmbr0 inet static bridge_ports eno1 bridge_stp off bridge_fd 0 - + auto vxlan2 iface vxlan2 inet manual vxlan-local-tunnelip 192.168.0.2 @@ -983,16 +1274,15 @@ iface vxlan2 inet manual bridge-unicast-flood off bridge-multicast-flood off - auto vmbr2 iface vmbr2 inet static - address 10.0.2.254 - netmask 255.255.255.0 - hwaddress 44:39:39:FF:40:94 bridge_ports vxlan2 bridge_stp off bridge_fd 0 - + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr2 + vrf vrf1 auto vxlan3 iface vxlan3 inet manual @@ -1002,21 +1292,42 @@ iface vxlan3 inet manual bridge-unicast-flood off bridge-multicast-flood off - auto vmbr3 iface vmbr3 inet static + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 address 10.0.3.254 netmask 255.255.255.0 - hwaddress 44:39:39:FF:40:94 - bridge_ports vxlan3 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr3 + vrf vrf1 + +#interconnect vxlan-vfr l3vni +auto vxlan4000 +iface vxlan4000 inet manual + vxlan-local-tunnelip 192.168.0.2 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + + +auto vmbr4000 +iface vmbr4000 inet manual + bridge_ports vxlan4000 bridge_stp off bridge_fd 0 + hwaddress 44:39:39:FF:40:91 #must be different on each node + vrf vrf1 ---- frr.conf ---- +vrf vrf1 + vni 4000 +! router bgp 1234 bgp router-id 192.168.0.2 no bgp default ipv4-unicast @@ -1027,7 +1338,19 @@ router bgp 1234 address-family l2vpn evpn neighbor 192.168.0.1 activate neighbor 192.168.0.3 activate - advertise-all-vni + advertise-all-vni + exit-address-family +! +router bgp 1234 vrf vrf1 +! + bgp router-id 192.168.0.2 + ! + address-family ipv4 unicast + redistribute connected + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast exit-address-family ! line vty @@ -1038,8 +1361,13 @@ line vty * node3 ---- +auto vrf1 +iface vrf1 + vrf-table auto + auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -1048,7 +1376,7 @@ iface vmbr0 inet static bridge_ports eno1 bridge_stp off bridge_fd 0 - + auto vxlan2 iface vxlan2 inet manual vxlan-local-tunnelip 192.168.0.3 @@ -1057,16 +1385,15 @@ iface vxlan2 inet manual bridge-unicast-flood off bridge-multicast-flood off - auto vmbr2 iface vmbr2 inet static - address 10.0.2.254 - netmask 255.255.255.0 - hwaddress 44:39:39:FF:40:94 bridge_ports vxlan2 bridge_stp off bridge_fd 0 - + address 10.0.2.254 + netmask 255.255.255.0 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr2 + vrf vrf1 auto vxlan3 iface vxlan3 inet manual @@ -1076,21 +1403,42 @@ iface vxlan3 inet manual bridge-unicast-flood off bridge-multicast-flood off - auto vmbr3 iface vmbr3 inet static + bridge_ports vxlan3 + bridge_stp off + bridge_fd 0 address 10.0.3.254 netmask 255.255.255.0 - hwaddress 44:39:39:FF:40:94 - bridge_ports vxlan3 + hwaddress 44:39:39:FF:40:94 #must be same on each node vmbr3 + vrf vrf1 + +#interconnect vxlan-vfr l3vni +auto vxlan4000 +iface vxlan4000 inet manual + vxlan-local-tunnelip 192.168.0.3 + bridge-learning off + bridge-arp-nd-suppress on + bridge-unicast-flood off + bridge-multicast-flood off + + +auto vmbr4000 +iface vmbr4000 inet manual + bridge_ports vxlan4000 bridge_stp off bridge_fd 0 + hwaddress 44:39:39:FF:40:92 #must be different on each node + vrf vrf1 ---- frr.conf ---- +vrf vrf1 + vni 4000 +! router bgp 1234 bgp router-id 192.168.0.3 no bgp default ipv4-unicast @@ -1101,32 +1449,35 @@ router bgp 1234 address-family l2vpn evpn neighbor 192.168.0.1 activate neighbor 192.168.0.2 activate - advertise-all-vni + advertise-all-vni + exit-address-family +! +router bgp 1234 vrf vrf1 +! + bgp router-id 192.168.0.3 + ! + address-family ipv4 unicast + redistribute connected + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast exit-address-family ! line vty ! ---- +multiple gateway nodes +^^^^^^^^^^^^^^^^^^^^^^ +In this example, all nodes will be used as exit gateway. (But you can use only 2 nodes if you want) +All nodes have a simple default gw in the vrf to the external router (no bgp between router and node1) +and announce this default gw. +The external router have ecmp routes to all proxmox nodes.(balancing). +If the router send the packet to a wrong node (vm is not on this node), this node will route through +vxlan the packet to final destination. -symmetric model -^^^^^^^^^^^^^^^ - -With this model, you don't need to have all vxlan on all nodes. -This model will also be needed to route traffic to an external router. - -The symmetric model routes and bridges on both the ingress and the egress leafs. -This results in bi-directional traffic being able to travel on the same VNI, hence the symmetric name. -However, a new specialty transit VNI is used for all routed VXLAN traffic, called the L3VNI. -All traffic that needs to be routed will be routed onto the L3VNI, tunneled across the layer 3 Infrastructure, -routed off the L3VNI to the appropriate VLAN and ultimately bridged to the destination. - -A vrf is needed for the L3VNI, so all vmbr bridge need to be in the vrf if they want to be able to reach each others. - -image::images/vxlan-l3-symmetric.svg["vxlan l3 symmetric",align="center"] - - -* node1 +*node1 ---- auto vrf1 @@ -1135,7 +1486,8 @@ iface vrf1 auto eno1 iface eno1 inet manual - + mtu 1550 + auto vmbr0 iface vmbr0 inet static address 192.168.0.1 @@ -1144,6 +1496,16 @@ iface vmbr0 inet static bridge_stp off bridge_fd 0 +auto eno2 +iface eno2 + address 172.16.0.1 + netmask 255.255.255.0 + vrf vrf1 + mtu 1550 + post-up ip route add default via 172.16.0.254 dev eno2 vrf vrf1 + #if you have multiple external routers, you can use ecmp balancing + #post-up route add default nexthop via 172.16.0.253 dev eno2 vrf vrf1 nexthop via 172.16.0.254 dev eno2 vrf vrf1 + auto vxlan2 iface vxlan2 inet manual vxlan-local-tunnelip 192.168.0.1 @@ -1189,7 +1551,6 @@ iface vxlan4000 inet manual bridge-unicast-flood off bridge-multicast-flood off - auto vmbr4000 iface vmbr4000 inet manual bridge_ports vxlan4000 @@ -1199,6 +1560,7 @@ iface vmbr4000 inet manual vrf vrf1 ---- + frr.conf ---- @@ -1219,11 +1581,12 @@ router bgp 1234 exit-address-family ! router bgp 1234 vrf vrf1 -! - bgp router-id 192.168.0.1 - ! +! + bgp router-id 172.16.0.1 + ! address-family ipv4 unicast redistribute connected + redistribute kernel !announce your default gw to all nodes exit-address-family ! address-family l2vpn evpn @@ -1244,6 +1607,7 @@ iface vrf1 auto eno1 iface eno1 inet manual + mtu 1550 auto vmbr0 iface vmbr0 inet static @@ -1253,6 +1617,16 @@ iface vmbr0 inet static bridge_stp off bridge_fd 0 +auto eno2 +iface eno2 + address 172.16.0.3 + netmask 255.255.255.0 + vrf vrf1 + mtu 1550 + post-up ip route add default via 172.16.0.254 dev eno2 vrf vrf1 + #if you have multiple external routers, you can use ecmp balancing + #post-up route add default nexthop via 172.16.0.253 dev eno2 vrf vrf1 nexthop via 172.16.0.254 dev eno2 vrf vrf1 + auto vxlan2 iface vxlan2 inet manual vxlan-local-tunnelip 192.168.0.2 @@ -1329,11 +1703,12 @@ router bgp 1234 exit-address-family ! router bgp 1234 vrf vrf1 -! - bgp router-id 192.168.0.2 - ! +! + bgp router-id 172.16.0.2 + ! address-family ipv4 unicast redistribute connected + redistribute kernel !announce your default gw to all nodes exit-address-family ! address-family l2vpn evpn @@ -1354,7 +1729,8 @@ iface vrf1 auto eno1 iface eno1 inet manual - + mtu 1550 + auto vmbr0 iface vmbr0 inet static address 192.168.0.3 @@ -1363,6 +1739,16 @@ iface vmbr0 inet static bridge_stp off bridge_fd 0 +auto eno2 +iface eno2 + address 172.16.0.3 + netmask 255.255.255.0 + vrf vrf1 + mtu 1550 + post-up ip route add default via 172.16.0.254 dev eno2 vrf vrf1 + #if you have multiple external routers, you can use ecmp balancing + #post-up route add default nexthop via 172.16.0.253 dev eno2 vrf vrf1 nexthop via 172.16.0.254 dev eno2 vrf vrf1 + auto vxlan2 iface vxlan2 inet manual vxlan-local-tunnelip 192.168.0.3 @@ -1439,11 +1825,12 @@ router bgp 1234 exit-address-family ! router bgp 1234 vrf vrf1 -! - bgp router-id 192.168.0.3 - ! +! + bgp router-id 172.16.0.3 + ! address-family ipv4 unicast redistribute connected + redistribute kernel !announce your default gw to all nodes exit-address-family ! address-family l2vpn evpn @@ -1453,3 +1840,45 @@ router bgp 1234 vrf vrf1 line vty ! ---- + +Note +^^^^ + +If your external router don't support ecmp to reach multiple proxmox nodes, +you can setup an HA floating vip on proxmox nodes with vrrp + +I this example, we will setup an floating 172.16.0.10 ip on node1 and node2. +Node1 is the primary and failover to node2 in case of failure. + + +* node1 + +---- +auto eno2 +iface eno2 + address 172.16.0.1 + netmask 255.255.255.0 + vrf vrf1 + mtu 1550 + post-up ip route add default via 172.16.0.254 dev eno2 vrf vrf1 + vrrp-id 1 + vrrp-priority 1 + vrrp-virtual-ip 172.16.0.10 +---- + +* node2 + +---- +auto eno2 +iface eno2 + address 172.16.0.2 + netmask 255.255.255.0 + mtu 1550 + vrf vrf1 + post-up ip route add default via 172.16.0.254 dev eno2 vrf vrf1 + vrrp-id 1 + vrrp-priority 2 + vrrp-virtual-ip 172.16.0.10 +---- + +