]>
Commit | Line | Data |
---|---|---|
b9436cda DM |
1 | package PVE::Network; |
2 | ||
3 | use strict; | |
c36f332e | 4 | use warnings; |
f27d5e6b | 5 | |
b9436cda | 6 | use PVE::INotify; |
f27d5e6b TL |
7 | use PVE::ProcFSTools; |
8 | use PVE::Tools qw(run_command lock_file); | |
9 | ||
b9436cda | 10 | use File::Basename; |
b6bff92e | 11 | use IO::Socket::IP; |
d7cafe51 | 12 | use JSON; |
bf52d27b | 13 | use Net::IP; |
8286ef53 | 14 | use NetAddr::IP qw(:lower); |
f27d5e6b TL |
15 | use POSIX qw(ECONNREFUSED); |
16 | use Socket qw(NI_NUMERICHOST NI_NUMERICSERV); | |
bf52d27b | 17 | |
b9436cda DM |
18 | # host network related utility functions |
19 | ||
19819404 | 20 | our $PHYSICAL_NIC_RE = qr/(?:eth\d+|en[^:.]+|ib[^:.]+)/; |
3dabe28a | 21 | |
61aa94e4 WB |
22 | our $ipv4_reverse_mask = [ |
23 | '0.0.0.0', | |
24 | '128.0.0.0', | |
25 | '192.0.0.0', | |
26 | '224.0.0.0', | |
27 | '240.0.0.0', | |
28 | '248.0.0.0', | |
29 | '252.0.0.0', | |
30 | '254.0.0.0', | |
31 | '255.0.0.0', | |
32 | '255.128.0.0', | |
33 | '255.192.0.0', | |
34 | '255.224.0.0', | |
35 | '255.240.0.0', | |
36 | '255.248.0.0', | |
37 | '255.252.0.0', | |
38 | '255.254.0.0', | |
39 | '255.255.0.0', | |
40 | '255.255.128.0', | |
41 | '255.255.192.0', | |
42 | '255.255.224.0', | |
43 | '255.255.240.0', | |
44 | '255.255.248.0', | |
45 | '255.255.252.0', | |
46 | '255.255.254.0', | |
47 | '255.255.255.0', | |
48 | '255.255.255.128', | |
49 | '255.255.255.192', | |
50 | '255.255.255.224', | |
51 | '255.255.255.240', | |
52 | '255.255.255.248', | |
53 | '255.255.255.252', | |
54 | '255.255.255.254', | |
55 | '255.255.255.255', | |
56 | ]; | |
57 | ||
58 | our $ipv4_mask_hash_localnet = { | |
19e609fd WB |
59 | '255.0.0.0' => 8, |
60 | '255.128.0.0' => 9, | |
61 | '255.192.0.0' => 10, | |
62 | '255.224.0.0' => 11, | |
63 | '255.240.0.0' => 12, | |
64 | '255.248.0.0' => 13, | |
65 | '255.252.0.0' => 14, | |
66 | '255.254.0.0' => 15, | |
61aa94e4 WB |
67 | '255.255.0.0' => 16, |
68 | '255.255.128.0' => 17, | |
69 | '255.255.192.0' => 18, | |
70 | '255.255.224.0' => 19, | |
71 | '255.255.240.0' => 20, | |
72 | '255.255.248.0' => 21, | |
73 | '255.255.252.0' => 22, | |
74 | '255.255.254.0' => 23, | |
75 | '255.255.255.0' => 24, | |
76 | '255.255.255.128' => 25, | |
77 | '255.255.255.192' => 26, | |
78 | '255.255.255.224' => 27, | |
79 | '255.255.255.240' => 28, | |
80 | '255.255.255.248' => 29, | |
81 | '255.255.255.252' => 30, | |
e43faad9 WB |
82 | '255.255.255.254' => 31, |
83 | '255.255.255.255' => 32, | |
61aa94e4 WB |
84 | }; |
85 | ||
74d1b045 | 86 | sub setup_tc_rate_limit { |
6256f2c3 | 87 | my ($iface, $rate, $burst) = @_; |
74d1b045 | 88 | |
2d6b3a90 FG |
89 | # these are allowed / expected to fail, e.g. when there is no previous rate limit to remove |
90 | eval { run_command("/sbin/tc class del dev $iface parent 1: classid 1:1 >/dev/null 2>&1"); }; | |
91 | eval { run_command("/sbin/tc filter del dev $iface parent ffff: protocol all pref 50 u32 >/dev/null 2>&1"); }; | |
92 | eval { run_command("/sbin/tc qdisc del dev $iface ingress >/dev/null 2>&1"); }; | |
93 | eval { run_command("/sbin/tc qdisc del dev $iface root >/dev/null 2>&1"); }; | |
74d1b045 | 94 | |
d6f2623b | 95 | return if !$rate; |
957753df | 96 | |
74d1b045 DM |
97 | # tbf does not work for unknown reason |
98 | #$TC qdisc add dev $DEV root tbf rate $RATE latency 100ms burst $BURST | |
99 | # so we use htb instead | |
100 | run_command("/sbin/tc qdisc add dev $iface root handle 1: htb default 1"); | |
101 | run_command("/sbin/tc class add dev $iface parent 1: classid 1:1 " . | |
102 | "htb rate ${rate}bps burst ${burst}b"); | |
103 | ||
5d35df41 W |
104 | run_command("/sbin/tc qdisc add dev $iface handle ffff: ingress"); |
105 | run_command("/sbin/tc filter add dev $iface parent ffff: " . | |
1b915170 | 106 | "prio 50 basic " . |
5d35df41 | 107 | "police rate ${rate}bps burst ${burst}b mtu 64kb " . |
edbdf0b2 | 108 | "drop"); |
74d1b045 DM |
109 | } |
110 | ||
ec9ada18 AD |
111 | sub tap_rate_limit { |
112 | my ($iface, $rate) = @_; | |
113 | ||
ad066ae2 | 114 | $rate = int($rate*1024*1024) if $rate; |
ec9ada18 AD |
115 | my $burst = 1024*1024; |
116 | ||
6256f2c3 | 117 | setup_tc_rate_limit($iface, $rate, $burst); |
ec9ada18 | 118 | } |
74d1b045 | 119 | |
1b6ad61c | 120 | sub read_bridge_mtu { |
605bb891 DM |
121 | my ($bridge) = @_; |
122 | ||
123 | my $mtu = PVE::Tools::file_read_firstline("/sys/class/net/$bridge/mtu"); | |
124 | die "bridge '$bridge' does not exist\n" if !$mtu; | |
125 | # avoid insecure dependency; | |
126 | die "unable to parse mtu value" if $mtu !~ /^(\d+)$/; | |
127 | $mtu = int($1); | |
128 | ||
129 | return $mtu; | |
130 | }; | |
131 | ||
32cb7d27 | 132 | my $parse_tap_device_name = sub { |
6c80e6d6 | 133 | my ($iface, $noerr) = @_; |
605bb891 DM |
134 | |
135 | my ($vmid, $devid); | |
136 | ||
137 | if ($iface =~ m/^tap(\d+)i(\d+)$/) { | |
138 | $vmid = $1; | |
139 | $devid = $2; | |
32cb7d27 | 140 | } elsif ($iface =~ m/^veth(\d+)i(\d+)$/) { |
605bb891 DM |
141 | $vmid = $1; |
142 | $devid = $2; | |
143 | } else { | |
6c80e6d6 DM |
144 | return undef if $noerr; |
145 | die "can't create firewall bridge for random interface name '$iface'\n"; | |
605bb891 DM |
146 | } |
147 | ||
148 | return ($vmid, $devid); | |
149 | }; | |
150 | ||
70ab4434 | 151 | my $compute_fwbr_names = sub { |
605bb891 DM |
152 | my ($vmid, $devid) = @_; |
153 | ||
154 | my $fwbr = "fwbr${vmid}i${devid}"; | |
f193aa74 | 155 | # Note: the firewall use 'fwln+' to filter traffic to VMs |
7d78a966 AD |
156 | my $vethfw = "fwln${vmid}i${devid}"; |
157 | my $vethfwpeer = "fwpr${vmid}p${devid}"; | |
158 | my $ovsintport = "fwln${vmid}o${devid}"; | |
605bb891 | 159 | |
70ab4434 | 160 | return ($fwbr, $vethfw, $vethfwpeer, $ovsintport); |
605bb891 DM |
161 | }; |
162 | ||
e9b54cc6 WB |
163 | sub iface_delete($) { |
164 | my ($iface) = @_; | |
165 | run_command(['/sbin/ip', 'link', 'delete', 'dev', $iface], noerr => 1) | |
166 | == 0 or die "failed to delete interface '$iface'\n"; | |
167 | } | |
168 | ||
169 | sub iface_create($$@) { | |
170 | my ($iface, $type, @args) = @_; | |
171 | run_command(['/sbin/ip', 'link', 'add', $iface, 'type', $type, @args], noerr => 1) | |
172 | == 0 or die "failed to create interface '$iface'\n"; | |
173 | } | |
174 | ||
175 | sub iface_set($@) { | |
176 | my ($iface, @opts) = @_; | |
177 | run_command(['/sbin/ip', 'link', 'set', $iface, @opts], noerr => 1) | |
178 | == 0 or die "failed to set interface options for '$iface' (".join(' ', @opts).")\n"; | |
179 | } | |
180 | ||
181 | # helper for nicer error messages: | |
182 | sub iface_set_master($$) { | |
183 | my ($iface, $master) = @_; | |
184 | if (defined($master)) { | |
185 | eval { iface_set($iface, 'master', $master) }; | |
186 | die "can't enslave '$iface' to '$master'\n" if $@; | |
187 | } else { | |
188 | eval { iface_set($iface, 'nomaster') }; | |
189 | die "can't unenslave '$iface'\n" if $@; | |
190 | } | |
191 | } | |
192 | ||
605bb891 DM |
193 | my $cond_create_bridge = sub { |
194 | my ($bridge) = @_; | |
195 | ||
196 | if (! -d "/sys/class/net/$bridge") { | |
e9b54cc6 | 197 | iface_create($bridge, 'bridge'); |
86b84237 | 198 | disable_ipv6($bridge); |
605bb891 DM |
199 | } |
200 | }; | |
201 | ||
f3ccd9b4 WB |
202 | sub disable_ipv6 { |
203 | my ($iface) = @_; | |
204 | return if !-d '/proc/sys/net/ipv6'; # ipv6 might be completely disabled | |
205 | my $file = "/proc/sys/net/ipv6/conf/$iface/disable_ipv6"; | |
206 | open(my $fh, '>', $file) or die "failed to open $file for writing: $!\n"; | |
207 | print {$fh} "1\n" or die "failed to disable link-local ipv6 for $iface\n"; | |
208 | close($fh); | |
209 | } | |
210 | ||
354ec8de AD |
211 | my $bridge_disable_interface_learning = sub { |
212 | my ($iface) = @_; | |
213 | ||
214 | PVE::ProcFSTools::write_proc_entry("/sys/class/net/$iface/brport/unicast_flood", "0"); | |
215 | PVE::ProcFSTools::write_proc_entry("/sys/class/net/$iface/brport/learning", "0"); | |
216 | ||
217 | }; | |
218 | ||
605bb891 | 219 | my $bridge_add_interface = sub { |
b0b34ffd | 220 | my ($bridge, $iface, $tag, $trunks) = @_; |
605bb891 | 221 | |
c3db7708 AD |
222 | my $bridgemtu = read_bridge_mtu($bridge); |
223 | eval { | |
224 | PVE::Tools::run_command(['/sbin/ip', 'link', 'set', $iface, 'mtu', $bridgemtu]); | |
225 | }; | |
226 | ||
f3ccd9b4 WB |
227 | # drop link local address (it can't be used when on a bridge anyway) |
228 | disable_ipv6($iface); | |
e9b54cc6 | 229 | iface_set_master($iface, $bridge); |
4d25f4aa AD |
230 | |
231 | my $vlan_aware = PVE::Tools::file_read_firstline("/sys/class/net/$bridge/bridge/vlan_filtering"); | |
232 | ||
233 | if ($vlan_aware) { | |
aa91ae3d AD |
234 | |
235 | eval { run_command(['/sbin/bridge', 'vlan', 'del', 'dev', $iface, 'vid', '1-4094']) }; | |
236 | die "failed to remove default vlan tags of $iface - $@\n" if $@; | |
237 | ||
238 | if ($trunks) { | |
239 | my @trunks_array = split /;/, $trunks; | |
240 | foreach my $trunk (@trunks_array) { | |
241 | eval { run_command(['/sbin/bridge', 'vlan', 'add', 'dev', $iface, 'vid', $trunk]) }; | |
242 | die "unable to add vlan $trunk to interface $iface - $@\n" if $@; | |
243 | } | |
244 | } elsif (!$tag) { | |
245 | eval { run_command(['/sbin/bridge', 'vlan', 'add', 'dev', $iface, 'vid', '2-4094']) }; | |
246 | die "unable to add default vlan tags to interface $iface - $@\n" if $@; | |
247 | } | |
248 | ||
249 | $tag = 1 if !$tag; | |
250 | eval { run_command(['/sbin/bridge', 'vlan', 'add', 'dev', $iface, 'vid', $tag, 'pvid', 'untagged']) }; | |
251 | die "unable to add vlan $tag to interface $iface - $@\n" if $@; | |
4d25f4aa | 252 | } |
605bb891 DM |
253 | }; |
254 | ||
70ab4434 | 255 | my $ovs_bridge_add_port = sub { |
b0b34ffd AD |
256 | my ($bridge, $iface, $tag, $internal, $trunks) = @_; |
257 | ||
258 | $trunks =~ s/;/,/g if $trunks; | |
70ab4434 | 259 | |
89ea13ef FG |
260 | my $cmd = ['/usr/bin/ovs-vsctl']; |
261 | # first command | |
262 | push @$cmd, '--', 'add-port', $bridge, $iface; | |
263 | push @$cmd, "tag=$tag" if $tag; | |
264 | push @$cmd, "trunks=". join(',', $trunks) if $trunks; | |
265 | push @$cmd, "vlan_mode=native-untagged" if $tag && $trunks; | |
266 | ||
c3db7708 AD |
267 | my $bridgemtu = read_bridge_mtu($bridge); |
268 | push @$cmd, '--', 'set', 'Interface', $iface, "mtu_request=$bridgemtu"; | |
269 | ||
89ea13ef FG |
270 | if ($internal) { |
271 | # second command | |
272 | push @$cmd, '--', 'set', 'Interface', $iface, 'type=internal'; | |
273 | } | |
274 | ||
275 | eval { run_command($cmd) }; | |
276 | die "can't add ovs port '$iface' - $@\n" if $@; | |
b0b34ffd | 277 | |
f3ccd9b4 | 278 | disable_ipv6($iface); |
70ab4434 DM |
279 | }; |
280 | ||
605bb891 | 281 | my $activate_interface = sub { |
c3db7708 | 282 | my ($iface, $mtu) = @_; |
605bb891 | 283 | |
c3db7708 AD |
284 | my $cmd = ['/sbin/ip', 'link', 'set', $iface, 'up']; |
285 | push (@$cmd, ('mtu', $mtu)) if $mtu; | |
286 | ||
287 | eval { run_command($cmd) }; | |
89ea13ef | 288 | die "can't activate interface '$iface' - $@\n" if $@; |
605bb891 DM |
289 | }; |
290 | ||
354ec8de AD |
291 | sub add_bridge_fdb { |
292 | my ($iface, $mac) = @_; | |
293 | ||
294 | my $learning = PVE::Tools::file_read_firstline("/sys/class/net/$iface/brport/learning"); | |
295 | return if $learning; | |
296 | ||
297 | my ($vmid, $devid) = &$parse_tap_device_name($iface, 1); | |
298 | return if !defined($vmid); | |
299 | ||
b8638604 | 300 | run_command(['/sbin/bridge', 'fdb', 'append', $mac, 'dev', $iface, 'master', 'static']); |
354ec8de AD |
301 | |
302 | my ($fwbr, $vethfw, $vethfwpeer, $ovsintport) = &$compute_fwbr_names($vmid, $devid); | |
303 | ||
304 | if (-d "/sys/class/net/$vethfwpeer") { | |
b8638604 | 305 | run_command(['/sbin/bridge', 'fdb', 'append', $mac, 'dev', $vethfwpeer, 'master', 'static']); |
354ec8de AD |
306 | } |
307 | ||
308 | } | |
309 | ||
310 | sub del_bridge_fdb { | |
311 | my ($iface, $mac) = @_; | |
312 | ||
313 | my $learning = PVE::Tools::file_read_firstline("/sys/class/net/$iface/brport/learning"); | |
314 | return if $learning; | |
315 | ||
316 | my ($vmid, $devid) = &$parse_tap_device_name($iface, 1); | |
317 | return if !defined($vmid); | |
318 | ||
b8638604 | 319 | run_command(['/sbin/bridge', 'fdb', 'del', $mac, 'dev', $iface, 'master', 'static']); |
354ec8de AD |
320 | |
321 | my ($fwbr, $vethfw, $vethfwpeer, $ovsintport) = &$compute_fwbr_names($vmid, $devid); | |
322 | ||
323 | if (-d "/sys/class/net/$vethfwpeer") { | |
b8638604 | 324 | run_command(['/sbin/bridge', 'fdb', 'del', $mac, 'dev', $vethfwpeer, 'master', 'static']); |
354ec8de AD |
325 | } |
326 | } | |
327 | ||
3aa99c70 AD |
328 | sub tap_create { |
329 | my ($iface, $bridge) = @_; | |
330 | ||
331 | die "unable to get bridge setting\n" if !$bridge; | |
332 | ||
1b6ad61c | 333 | my $bridgemtu = read_bridge_mtu($bridge); |
3aa99c70 | 334 | |
9bbc4e17 | 335 | eval { |
f3ccd9b4 | 336 | disable_ipv6($iface); |
b8638604 | 337 | run_command(['/sbin/ip', 'link', 'set', $iface, 'up', 'promisc', 'on', 'mtu', $bridgemtu]); |
098795e0 DM |
338 | }; |
339 | die "interface activation failed\n" if $@; | |
3aa99c70 AD |
340 | } |
341 | ||
35efc4eb AD |
342 | sub veth_create { |
343 | my ($veth, $vethpeer, $bridge, $mac) = @_; | |
344 | ||
345 | die "unable to get bridge setting\n" if !$bridge; | |
346 | ||
1b6ad61c | 347 | my $bridgemtu = read_bridge_mtu($bridge); |
35efc4eb AD |
348 | |
349 | # create veth pair | |
350 | if (! -d "/sys/class/net/$veth") { | |
89ea13ef FG |
351 | my $cmd = ['/sbin/ip', 'link', 'add']; |
352 | # veth device + MTU | |
353 | push @$cmd, 'name', $veth; | |
354 | push @$cmd, 'mtu', $bridgemtu; | |
355 | push @$cmd, 'type', 'veth'; | |
356 | # peer device + MTU | |
357 | push @$cmd, 'peer', 'name', $vethpeer, 'mtu', $bridgemtu; | |
358 | ||
359 | push @$cmd, 'addr', $mac if $mac; | |
360 | ||
361 | eval { run_command($cmd) }; | |
362 | die "can't create interface $veth - $@\n" if $@; | |
35efc4eb AD |
363 | } |
364 | ||
365 | # up vethpair | |
f3ccd9b4 WB |
366 | disable_ipv6($veth); |
367 | disable_ipv6($vethpeer); | |
c3db7708 AD |
368 | &$activate_interface($veth, $bridgemtu); |
369 | &$activate_interface($vethpeer, $bridgemtu); | |
370 | ||
35efc4eb AD |
371 | } |
372 | ||
f3f0bc3a AD |
373 | sub veth_delete { |
374 | my ($veth) = @_; | |
375 | ||
376 | if (-d "/sys/class/net/$veth") { | |
e9b54cc6 | 377 | iface_delete($veth); |
f3f0bc3a | 378 | } |
e0a862e2 | 379 | eval { tap_unplug($veth) }; |
f3f0bc3a | 380 | } |
35efc4eb | 381 | |
605bb891 | 382 | my $create_firewall_bridge_linux = sub { |
93cc2aa9 | 383 | my ($iface, $bridge, $tag, $trunks, $no_learning) = @_; |
605bb891 | 384 | |
32cb7d27 | 385 | my ($vmid, $devid) = &$parse_tap_device_name($iface); |
70ab4434 | 386 | my ($fwbr, $vethfw, $vethfwpeer) = &$compute_fwbr_names($vmid, $devid); |
605bb891 | 387 | |
c3db7708 AD |
388 | my $bridgemtu = read_bridge_mtu($bridge); |
389 | ||
605bb891 | 390 | &$cond_create_bridge($fwbr); |
c3db7708 | 391 | &$activate_interface($fwbr, $bridgemtu); |
605bb891 DM |
392 | |
393 | copy_bridge_config($bridge, $fwbr); | |
35efc4eb | 394 | veth_create($vethfw, $vethfwpeer, $bridge); |
605bb891 | 395 | |
b0b34ffd | 396 | &$bridge_add_interface($bridge, $vethfwpeer, $tag, $trunks); |
93cc2aa9 | 397 | &$bridge_disable_interface_learning($vethfwpeer) if $no_learning; |
354ec8de | 398 | &$bridge_add_interface($fwbr, $vethfw); |
605bb891 | 399 | |
4d25f4aa | 400 | &$bridge_add_interface($fwbr, $iface); |
605bb891 DM |
401 | }; |
402 | ||
70ab4434 | 403 | my $create_firewall_bridge_ovs = sub { |
93cc2aa9 | 404 | my ($iface, $bridge, $tag, $trunks, $no_learning) = @_; |
70ab4434 | 405 | |
32cb7d27 | 406 | my ($vmid, $devid) = &$parse_tap_device_name($iface); |
70ab4434 DM |
407 | my ($fwbr, undef, undef, $ovsintport) = &$compute_fwbr_names($vmid, $devid); |
408 | ||
1b6ad61c | 409 | my $bridgemtu = read_bridge_mtu($bridge); |
70ab4434 DM |
410 | |
411 | &$cond_create_bridge($fwbr); | |
c3db7708 | 412 | &$activate_interface($fwbr, $bridgemtu); |
70ab4434 DM |
413 | |
414 | &$bridge_add_interface($fwbr, $iface); | |
415 | ||
b0b34ffd | 416 | &$ovs_bridge_add_port($bridge, $ovsintport, $tag, 1, $trunks); |
c3db7708 | 417 | &$activate_interface($ovsintport, $bridgemtu); |
9bbc4e17 | 418 | |
70ab4434 | 419 | &$bridge_add_interface($fwbr, $ovsintport); |
93cc2aa9 | 420 | &$bridge_disable_interface_learning($ovsintport) if $no_learning; |
70ab4434 DM |
421 | }; |
422 | ||
423 | my $cleanup_firewall_bridge = sub { | |
605bb891 DM |
424 | my ($iface) = @_; |
425 | ||
32cb7d27 | 426 | my ($vmid, $devid) = &$parse_tap_device_name($iface, 1); |
9bbc4e17 | 427 | return if !defined($vmid); |
70ab4434 DM |
428 | my ($fwbr, $vethfw, $vethfwpeer, $ovsintport) = &$compute_fwbr_names($vmid, $devid); |
429 | ||
430 | # cleanup old port config from any openvswitch bridge | |
431 | if (-d "/sys/class/net/$ovsintport") { | |
432 | run_command("/usr/bin/ovs-vsctl del-port $ovsintport", outfunc => sub {}, errfunc => sub {}); | |
433 | } | |
605bb891 DM |
434 | |
435 | # delete old vethfw interface | |
f3f0bc3a | 436 | veth_delete($vethfw); |
605bb891 DM |
437 | |
438 | # cleanup fwbr bridge | |
439 | if (-d "/sys/class/net/$fwbr") { | |
e9b54cc6 | 440 | iface_delete($fwbr); |
605bb891 DM |
441 | } |
442 | }; | |
443 | ||
f0c190ee | 444 | sub tap_plug { |
93cc2aa9 | 445 | my ($iface, $bridge, $tag, $firewall, $trunks, $rate, $opts) = @_; |
f0c190ee | 446 | |
93cc2aa9 TL |
447 | $opts = {} if !defined($opts); |
448 | ||
c1978f2e | 449 | my $no_learning = defined($opts->{learning}) && !$opts->{learning}; # default to learning on |
93cc2aa9 TL |
450 | |
451 | # cleanup old port config from any openvswitch bridge | |
452 | eval { | |
453 | run_command("/usr/bin/ovs-vsctl del-port $iface", outfunc => sub {}, errfunc => sub {}); | |
454 | }; | |
4cbabd40 | 455 | |
098795e0 | 456 | if (-d "/sys/class/net/$bridge/bridge") { |
70ab4434 | 457 | &$cleanup_firewall_bridge($iface); # remove stale devices |
605bb891 | 458 | |
4d25f4aa | 459 | my $vlan_aware = PVE::Tools::file_read_firstline("/sys/class/net/$bridge/bridge/vlan_filtering"); |
098795e0 | 460 | |
4d25f4aa | 461 | if (!$vlan_aware) { |
b0b34ffd | 462 | die "vlan aware feature need to be enabled to use trunks" if $trunks; |
4d25f4aa AD |
463 | my $newbridge = activate_bridge_vlan($bridge, $tag); |
464 | copy_bridge_config($bridge, $newbridge) if $bridge ne $newbridge; | |
ff042056 | 465 | $bridge = $newbridge; |
4d25f4aa AD |
466 | $tag = undef; |
467 | } | |
468 | ||
469 | if ($firewall) { | |
93cc2aa9 | 470 | &$create_firewall_bridge_linux($iface, $bridge, $tag, $trunks, $no_learning); |
4d25f4aa | 471 | } else { |
b0b34ffd | 472 | &$bridge_add_interface($bridge, $iface, $tag, $trunks); |
4d25f4aa | 473 | } |
93cc2aa9 | 474 | $bridge_disable_interface_learning->($iface) if $no_learning; |
605bb891 | 475 | |
098795e0 | 476 | } else { |
70ab4434 DM |
477 | &$cleanup_firewall_bridge($iface); # remove stale devices |
478 | ||
479 | if ($firewall) { | |
93cc2aa9 | 480 | &$create_firewall_bridge_ovs($iface, $bridge, $tag, $trunks, $no_learning); |
70ab4434 | 481 | } else { |
b0b34ffd | 482 | &$ovs_bridge_add_port($bridge, $iface, $tag, undef, $trunks); |
70ab4434 | 483 | } |
4cbabd40 | 484 | } |
bce2a5b3 WB |
485 | |
486 | tap_rate_limit($iface, $rate); | |
f0c190ee AD |
487 | } |
488 | ||
a84b65c0 | 489 | sub tap_unplug { |
2db1cc0d | 490 | my ($iface) = @_; |
a84b65c0 | 491 | |
2db1cc0d DM |
492 | my $path= "/sys/class/net/$iface/brport/bridge"; |
493 | if (-l $path) { | |
494 | my $bridge = basename(readlink($path)); | |
495 | #avoid insecure dependency; | |
496 | ($bridge) = $bridge =~ /(\S+)/; | |
4cbabd40 | 497 | |
e9b54cc6 | 498 | iface_set_master($iface, undef); |
4cbabd40 | 499 | } |
9bbc4e17 | 500 | |
70ab4434 | 501 | &$cleanup_firewall_bridge($iface); |
dd44486e WB |
502 | #cleanup old port config from any openvswitch bridge |
503 | eval {run_command("/usr/bin/ovs-vsctl del-port $iface", outfunc => sub {}, errfunc => sub {}) }; | |
a84b65c0 AD |
504 | } |
505 | ||
b9436cda DM |
506 | sub copy_bridge_config { |
507 | my ($br0, $br1) = @_; | |
508 | ||
509 | return if $br0 eq $br1; | |
510 | ||
b8638604 TL |
511 | my $br_configs = [ |
512 | 'ageing_time', 'stp_state', 'priority', 'forward_delay', | |
513 | 'hello_time', 'max_age', 'multicast_snooping', 'multicast_querier', | |
514 | ]; | |
b9436cda DM |
515 | |
516 | foreach my $sysname (@$br_configs) { | |
517 | eval { | |
518 | my $v0 = PVE::Tools::file_read_firstline("/sys/class/net/$br0/bridge/$sysname"); | |
519 | my $v1 = PVE::Tools::file_read_firstline("/sys/class/net/$br1/bridge/$sysname"); | |
520 | if ($v0 ne $v1) { | |
aec04803 | 521 | PVE::ProcFSTools::write_proc_entry("/sys/class/net/$br1/bridge/$sysname", $v0); |
b9436cda DM |
522 | } |
523 | }; | |
524 | warn $@ if $@; | |
525 | } | |
526 | } | |
527 | ||
70d89745 PRG |
528 | sub activate_bridge_vlan_slave { |
529 | my ($bridgevlan, $iface, $tag) = @_; | |
b9436cda | 530 | my $ifacevlan = "${iface}.$tag"; |
9bbc4e17 | 531 | |
b9436cda DM |
532 | # create vlan on $iface is not already exist |
533 | if (! -d "/sys/class/net/$ifacevlan") { | |
89ea13ef FG |
534 | eval { |
535 | my $cmd = ['/sbin/ip', 'link', 'add']; | |
536 | push @$cmd, 'link', $iface; | |
537 | push @$cmd, 'name', $ifacevlan; | |
538 | push @$cmd, 'type', 'vlan', 'id', $tag; | |
539 | run_command($cmd); | |
540 | }; | |
541 | die "can't add vlan tag $tag to interface $iface - $@\n" if $@; | |
b9436cda | 542 | |
86b84237 WB |
543 | # remove ipv6 link-local address before activation |
544 | disable_ipv6($ifacevlan); | |
545 | } | |
f3ccd9b4 | 546 | |
b9436cda | 547 | # be sure to have the $ifacevlan up |
605bb891 | 548 | &$activate_interface($ifacevlan); |
b9436cda DM |
549 | |
550 | # test if $vlaniface is already enslaved in another bridge | |
551 | my $path= "/sys/class/net/$ifacevlan/brport/bridge"; | |
552 | if (-l $path) { | |
553 | my $tbridge = basename(readlink($path)); | |
70d89745 | 554 | if ($tbridge ne $bridgevlan) { |
b9436cda | 555 | die "interface $ifacevlan already exist in bridge $tbridge\n"; |
eee4b32a PRG |
556 | } else { |
557 | # Port already attached to bridge: do nothing. | |
558 | return; | |
b9436cda DM |
559 | } |
560 | } | |
561 | ||
70d89745 | 562 | # add $ifacevlan to the bridge |
605bb891 | 563 | &$bridge_add_interface($bridgevlan, $ifacevlan); |
70d89745 PRG |
564 | } |
565 | ||
566 | sub activate_bridge_vlan { | |
567 | my ($bridge, $tag_param) = @_; | |
568 | ||
569 | die "bridge '$bridge' is not active\n" if ! -d "/sys/class/net/$bridge"; | |
570 | ||
571 | return $bridge if !defined($tag_param); # no vlan, simply return | |
572 | ||
573 | my $tag = int($tag_param); | |
574 | ||
575 | die "got strange vlan tag '$tag_param'\n" if $tag < 1 || $tag > 4094; | |
576 | ||
577 | my $bridgevlan = "${bridge}v$tag"; | |
578 | ||
c9030d97 PRG |
579 | my @ifaces = (); |
580 | my $dir = "/sys/class/net/$bridge/brif"; | |
899f8c4a | 581 | PVE::Tools::dir_glob_foreach($dir, '(((eth|bond)\d+|en[^.]+)(\.\d+)?)', sub { |
5ffa7628 | 582 | push @ifaces, $_[0]; |
c9030d97 PRG |
583 | }); |
584 | ||
5ffa7628 | 585 | die "no physical interface on bridge '$bridge'\n" if scalar(@ifaces) == 0; |
c9030d97 | 586 | |
a712bf6e WB |
587 | lock_network(sub { |
588 | # add bridgevlan if it doesn't already exist | |
589 | if (! -d "/sys/class/net/$bridgevlan") { | |
e9b54cc6 | 590 | iface_create($bridgevlan, 'bridge'); |
a712bf6e | 591 | } |
b9436cda | 592 | |
a712bf6e WB |
593 | # for each physical interface (eth or bridge) bind them to bridge vlan |
594 | foreach my $iface (@ifaces) { | |
595 | activate_bridge_vlan_slave($bridgevlan, $iface, $tag); | |
596 | } | |
70d89745 | 597 | |
a712bf6e | 598 | #fixme: set other bridge flags |
b9436cda | 599 | |
f3ccd9b4 WB |
600 | # remove ipv6 link-local address before activation |
601 | disable_ipv6($bridgevlan); | |
a712bf6e | 602 | # be sure to have the bridge up |
f3ccd9b4 | 603 | &$activate_interface($bridgevlan); |
a712bf6e | 604 | }); |
b9436cda DM |
605 | return $bridgevlan; |
606 | } | |
607 | ||
b6bff92e WB |
608 | sub tcp_ping { |
609 | my ($host, $port, $timeout) = @_; | |
610 | ||
611 | my $refused = 1; | |
612 | ||
613 | $timeout = 3 if !$timeout; # sane default | |
614 | if (!$port) { | |
615 | # Net::Ping defaults to the echo port | |
616 | $port = 7; | |
617 | } else { | |
618 | # Net::Ping's port_number() implies service_check(1) | |
619 | $refused = 0; | |
620 | } | |
621 | ||
622 | my ($sock, $result); | |
623 | eval { | |
624 | $result = PVE::Tools::run_with_timeout($timeout, sub { | |
625 | $sock = IO::Socket::IP->new(PeerHost => $host, PeerPort => $port, Type => SOCK_STREAM); | |
626 | $result = $refused if $! == ECONNREFUSED; | |
627 | }); | |
628 | }; | |
629 | if ($sock) { | |
630 | $sock->close(); | |
631 | $result = 1; | |
632 | } | |
633 | return $result; | |
634 | } | |
635 | ||
bf52d27b WB |
636 | sub IP_from_cidr { |
637 | my ($cidr, $version) = @_; | |
638 | ||
639 | return if $cidr !~ m!^(\S+?)/(\S+)$!; | |
640 | my ($ip, $prefix) = ($1, $2); | |
641 | ||
642 | my $ipobj = Net::IP->new($ip, $version); | |
643 | return if !$ipobj; | |
644 | ||
645 | $version = $ipobj->version(); | |
646 | ||
647 | my $binmask = Net::IP::ip_get_mask($prefix, $version); | |
648 | return if !$binmask; | |
649 | ||
650 | my $masked_binip = $ipobj->binip() & $binmask; | |
651 | my $masked_ip = Net::IP::ip_bintoip($masked_binip, $version); | |
652 | return Net::IP->new("$masked_ip/$prefix"); | |
653 | } | |
654 | ||
655 | sub is_ip_in_cidr { | |
656 | my ($ip, $cidr, $version) = @_; | |
657 | ||
658 | my $cidr_obj = IP_from_cidr($cidr, $version); | |
659 | return undef if !$cidr_obj; | |
660 | ||
661 | my $ip_obj = Net::IP->new($ip, $version); | |
662 | return undef if !$ip_obj; | |
663 | ||
123c3104 FE |
664 | my $overlap = $cidr_obj->overlaps($ip_obj); |
665 | ||
b0e3bcc1 FE |
666 | return if !defined($overlap); |
667 | ||
123c3104 | 668 | return $overlap == $Net::IP::IP_B_IN_A_OVERLAP || $overlap == $Net::IP::IP_IDENTICAL; |
bf52d27b WB |
669 | } |
670 | ||
d7cafe51 TL |
671 | # get all currently configured addresses that have a global scope, i.e., are reachable from the |
672 | # outside of the host and thus are neither loopback nor link-local ones | |
673 | # returns an array ref of: { addr => "IP", cidr => "IP/PREFIXLEN", family => "inet|inet6" } | |
674 | sub get_reachable_networks { | |
675 | my $raw = ''; | |
676 | run_command([qw(ip -j addr show up scope global)], outfunc => sub { $raw .= shift }); | |
4e405958 | 677 | my $decoded = decode_json($raw); |
d7cafe51 | 678 | |
4e405958 TL |
679 | my $addrs = []; # filter/transform first so that we can sort correctly more easily below |
680 | for my $e ($decoded->@*) { | |
681 | next if !$e->{addr_info} || grep { $_ eq 'LOOPBACK' } $e->{flags}->@*; | |
682 | push $addrs->@*, grep { scalar(keys $_->%*) } $e->{addr_info}->@* | |
683 | } | |
d7cafe51 | 684 | my $res = []; |
4e405958 TL |
685 | for my $info (sort { $a->{family} cmp $b->{family} || $a->{local} cmp $b->{local} } $addrs->@*) { |
686 | push $res->@*, { | |
687 | addr => $info->{local}, | |
688 | cidr => "$info->{local}/$info->{prefixlen}", | |
689 | family => $info->{family}, | |
690 | }; | |
d7cafe51 TL |
691 | } |
692 | ||
693 | return $res; | |
694 | } | |
beb9820f | 695 | |
ac487a88 TL |
696 | # get one or all local IPs that are not loopback ones, able to pick up the following ones (in order) |
697 | # - the hostname primary resolves too, follows gai.conf (admin controlled) and will be prioritised | |
698 | # - all configured in the interfaces configuration | |
699 | # - all currently networks known to the kernel in the current (root) namespace | |
700 | # returns a single address if no parameter is passed, and all found, grouped by type, if `all => 1` | |
701 | # is passed. | |
702 | sub get_local_ip { | |
703 | my (%param) = @_; | |
704 | ||
705 | my $nodename = PVE::INotify::nodename(); | |
706 | my $resolved_host = eval { get_ip_from_hostname($nodename) }; | |
707 | ||
708 | return $resolved_host if defined($resolved_host) && !$param{all}; | |
709 | ||
710 | my $all = { v4 => {}, v6 => {} }; # hash to avoid duplicates and group by type | |
711 | ||
712 | my $ifaces = PVE::INotify::read_file('interfaces', 1)->{data}->{ifaces}; | |
713 | for my $if (values $ifaces->%*) { | |
714 | next if $if->{type} eq 'loopback' || (!defined($if->{address}) && !defined($if->{address6})); | |
715 | my ($v4, $v6) = ($if->{address}, $if->{address6}); | |
716 | ||
717 | return ($v4 // $v6) if !$param{all}; # prefer v4, admin can override $resolved_host via hosts/gai.conf | |
718 | ||
719 | $all->{v4}->{$v4} = 1 if defined($v4); | |
720 | $all->{v6}->{$v6} = 1 if defined($v6); | |
721 | } | |
722 | ||
97809c69 | 723 | my $live = eval { get_reachable_networks() } // []; |
ac487a88 TL |
724 | for my $info ($live->@*) { |
725 | my $addr = $info->{addr}; | |
726 | ||
727 | return $addr if !$param{all}; | |
728 | ||
729 | if ($info->{family} eq 'inet') { | |
730 | $all->{v4}->{$addr} = 1; | |
731 | } else { | |
732 | $all->{v6}->{$addr} = 1; | |
733 | } | |
734 | } | |
735 | ||
736 | return undef if !$param{all}; # getting here means no early return above triggered -> no IPs | |
737 | ||
738 | my $res = []; # order gai.conf controlled first, then group v4 and v6, simply lexically sorted | |
739 | if ($resolved_host) { | |
740 | push $res->@*, $resolved_host; | |
741 | delete $all->{v4}->{$resolved_host}; | |
742 | delete $all->{v6}->{$resolved_host}; | |
743 | } | |
744 | push $res->@*, sort { $a cmp $b } keys $all->{v4}->%*; | |
745 | push $res->@*, sort { $a cmp $b } keys $all->{v6}->%*; | |
746 | ||
747 | return $res; | |
748 | } | |
749 | ||
beb9820f TL |
750 | sub get_local_ip_from_cidr { |
751 | my ($cidr) = @_; | |
752 | ||
1e55a6cd | 753 | my $IPs = {}; |
ef737f0b | 754 | my $i = 1; |
b15e50dd TL |
755 | run_command(['/sbin/ip', 'address', 'show', 'to', $cidr, 'up'], outfunc => sub { |
756 | if ($_[0] =~ m!^\s*inet(?:6)?\s+($PVE::Tools::IPRE)(?:/\d+|\s+peer\s+)!) { | |
ef737f0b | 757 | $IPs->{$1} = $i++ if !exists($IPs->{$1}); |
beb9820f | 758 | } |
b15e50dd | 759 | }); |
beb9820f | 760 | |
ef737f0b | 761 | return [ sort { $IPs->{$a} <=> $IPs->{$b} } keys %{$IPs} ]; |
beb9820f TL |
762 | } |
763 | ||
87aa00de TL |
764 | sub addr_to_ip { |
765 | my ($addr) = @_; | |
766 | my ($err, $host, $port) = Socket::getnameinfo($addr, NI_NUMERICHOST | NI_NUMERICSERV); | |
767 | die "failed to get numerical host address: $err\n" if $err; | |
768 | return ($host, $port) if wantarray; | |
769 | return $host; | |
770 | } | |
771 | ||
772 | sub get_ip_from_hostname { | |
773 | my ($hostname, $noerr) = @_; | |
774 | ||
5bd1e56b | 775 | my @res = eval { PVE::Tools::getaddrinfo_all($hostname) }; |
87aa00de | 776 | if ($@) { |
4ed6974a | 777 | die "hostname lookup '$hostname' failed - $@" if !$noerr; |
87aa00de TL |
778 | return undef; |
779 | } | |
780 | ||
5bd1e56b | 781 | for my $ai (@res) { |
29dde5f4 TL |
782 | my $ip = addr_to_ip($ai->{addr}); |
783 | if ($ip !~ m/^127\.|^::1$/) { | |
784 | return wantarray ? ($ip, $ai->{family}) : $ip; | |
5bd1e56b TL |
785 | } |
786 | } | |
29dde5f4 TL |
787 | # NOTE: we only get here if no WAN/LAN IP was found, so this is now the error path! |
788 | die "address lookup for '$hostname' did not find any IP address\n" if !$noerr; | |
789 | return undef; | |
87aa00de TL |
790 | } |
791 | ||
a712bf6e WB |
792 | sub lock_network { |
793 | my ($code, @param) = @_; | |
794 | my $res = lock_file('/var/lock/pve-network.lck', 10, $code, @param); | |
795 | die $@ if $@; | |
796 | return $res; | |
797 | } | |
798 | ||
8286ef53 FE |
799 | # the canonical form of the given IP, i.e. dotted quad for IPv4 and RFC 5952 for IPv6 |
800 | sub canonical_ip { | |
801 | my ($ip) = @_; | |
802 | ||
803 | my $ip_obj = NetAddr::IP->new($ip) or die "invalid IP string '$ip'\n"; | |
804 | ||
805 | return $ip_obj->canon(); | |
806 | } | |
807 | ||
8f75194c FE |
808 | # List of unique, canonical IPs in the provided list. |
809 | # Keeps the original order, filtering later duplicates. | |
810 | sub unique_ips { | |
811 | my ($ips) = @_; | |
812 | ||
813 | my $res = []; | |
814 | my $seen = {}; | |
815 | ||
816 | for my $ip (@{$ips}) { | |
817 | $ip = canonical_ip($ip); | |
818 | ||
819 | next if $seen->{$ip}; | |
820 | ||
821 | $seen->{$ip} = 1; | |
822 | push @{$res}, $ip; | |
823 | } | |
824 | ||
825 | return $res; | |
826 | } | |
827 | ||
b9436cda | 828 | 1; |