]>
Commit | Line | Data |
---|---|---|
1 | package PVE::Network; | |
2 | ||
3 | use strict; | |
4 | use warnings; | |
5 | use PVE::Tools qw(run_command); | |
6 | use PVE::ProcFSTools; | |
7 | use PVE::INotify; | |
8 | use File::Basename; | |
9 | use IO::Socket::IP; | |
10 | use POSIX qw(ECONNREFUSED); | |
11 | ||
12 | use Net::IP; | |
13 | ||
14 | # host network related utility functions | |
15 | ||
16 | our $ipv4_reverse_mask = [ | |
17 | '0.0.0.0', | |
18 | '128.0.0.0', | |
19 | '192.0.0.0', | |
20 | '224.0.0.0', | |
21 | '240.0.0.0', | |
22 | '248.0.0.0', | |
23 | '252.0.0.0', | |
24 | '254.0.0.0', | |
25 | '255.0.0.0', | |
26 | '255.128.0.0', | |
27 | '255.192.0.0', | |
28 | '255.224.0.0', | |
29 | '255.240.0.0', | |
30 | '255.248.0.0', | |
31 | '255.252.0.0', | |
32 | '255.254.0.0', | |
33 | '255.255.0.0', | |
34 | '255.255.128.0', | |
35 | '255.255.192.0', | |
36 | '255.255.224.0', | |
37 | '255.255.240.0', | |
38 | '255.255.248.0', | |
39 | '255.255.252.0', | |
40 | '255.255.254.0', | |
41 | '255.255.255.0', | |
42 | '255.255.255.128', | |
43 | '255.255.255.192', | |
44 | '255.255.255.224', | |
45 | '255.255.255.240', | |
46 | '255.255.255.248', | |
47 | '255.255.255.252', | |
48 | '255.255.255.254', | |
49 | '255.255.255.255', | |
50 | ]; | |
51 | ||
52 | our $ipv4_mask_hash_localnet = { | |
53 | '255.0.0.0' => 8, | |
54 | '255.128.0.0' => 9, | |
55 | '255.192.0.0' => 10, | |
56 | '255.224.0.0' => 11, | |
57 | '255.240.0.0' => 12, | |
58 | '255.248.0.0' => 13, | |
59 | '255.252.0.0' => 14, | |
60 | '255.254.0.0' => 15, | |
61 | '255.255.0.0' => 16, | |
62 | '255.255.128.0' => 17, | |
63 | '255.255.192.0' => 18, | |
64 | '255.255.224.0' => 19, | |
65 | '255.255.240.0' => 20, | |
66 | '255.255.248.0' => 21, | |
67 | '255.255.252.0' => 22, | |
68 | '255.255.254.0' => 23, | |
69 | '255.255.255.0' => 24, | |
70 | '255.255.255.128' => 25, | |
71 | '255.255.255.192' => 26, | |
72 | '255.255.255.224' => 27, | |
73 | '255.255.255.240' => 28, | |
74 | '255.255.255.248' => 29, | |
75 | '255.255.255.252' => 30, | |
76 | '255.255.255.254' => 31, | |
77 | '255.255.255.255' => 32, | |
78 | }; | |
79 | ||
80 | sub setup_tc_rate_limit { | |
81 | my ($iface, $rate, $burst, $debug) = @_; | |
82 | ||
83 | # these are allowed / expected to fail, e.g. when there is no previous rate limit to remove | |
84 | eval { run_command("/sbin/tc class del dev $iface parent 1: classid 1:1 >/dev/null 2>&1"); }; | |
85 | eval { run_command("/sbin/tc filter del dev $iface parent ffff: protocol all pref 50 u32 >/dev/null 2>&1"); }; | |
86 | eval { run_command("/sbin/tc qdisc del dev $iface ingress >/dev/null 2>&1"); }; | |
87 | eval { run_command("/sbin/tc qdisc del dev $iface root >/dev/null 2>&1"); }; | |
88 | ||
89 | return if !$rate; | |
90 | ||
91 | # tbf does not work for unknown reason | |
92 | #$TC qdisc add dev $DEV root tbf rate $RATE latency 100ms burst $BURST | |
93 | # so we use htb instead | |
94 | run_command("/sbin/tc qdisc add dev $iface root handle 1: htb default 1"); | |
95 | run_command("/sbin/tc class add dev $iface parent 1: classid 1:1 " . | |
96 | "htb rate ${rate}bps burst ${burst}b"); | |
97 | ||
98 | run_command("/sbin/tc qdisc add dev $iface handle ffff: ingress"); | |
99 | run_command("/sbin/tc filter add dev $iface parent ffff: " . | |
100 | "prio 50 basic " . | |
101 | "police rate ${rate}bps burst ${burst}b mtu 64kb " . | |
102 | "drop flowid :1"); | |
103 | ||
104 | if ($debug) { | |
105 | print "DEBUG tc settings\n"; | |
106 | system("/sbin/tc qdisc ls dev $iface"); | |
107 | system("/sbin/tc class ls dev $iface"); | |
108 | system("/sbin/tc filter ls dev $iface parent ffff:"); | |
109 | } | |
110 | } | |
111 | ||
112 | sub tap_rate_limit { | |
113 | my ($iface, $rate) = @_; | |
114 | ||
115 | my $debug = 0; | |
116 | $rate = int($rate*1024*1024) if $rate; | |
117 | my $burst = 1024*1024; | |
118 | ||
119 | setup_tc_rate_limit($iface, $rate, $burst, $debug); | |
120 | } | |
121 | ||
122 | my $read_bridge_mtu = sub { | |
123 | my ($bridge) = @_; | |
124 | ||
125 | my $mtu = PVE::Tools::file_read_firstline("/sys/class/net/$bridge/mtu"); | |
126 | die "bridge '$bridge' does not exist\n" if !$mtu; | |
127 | # avoid insecure dependency; | |
128 | die "unable to parse mtu value" if $mtu !~ /^(\d+)$/; | |
129 | $mtu = int($1); | |
130 | ||
131 | return $mtu; | |
132 | }; | |
133 | ||
134 | my $parse_tap_device_name = sub { | |
135 | my ($iface, $noerr) = @_; | |
136 | ||
137 | my ($vmid, $devid); | |
138 | ||
139 | if ($iface =~ m/^tap(\d+)i(\d+)$/) { | |
140 | $vmid = $1; | |
141 | $devid = $2; | |
142 | } elsif ($iface =~ m/^veth(\d+)i(\d+)$/) { | |
143 | $vmid = $1; | |
144 | $devid = $2; | |
145 | } else { | |
146 | return undef if $noerr; | |
147 | die "can't create firewall bridge for random interface name '$iface'\n"; | |
148 | } | |
149 | ||
150 | return ($vmid, $devid); | |
151 | }; | |
152 | ||
153 | my $compute_fwbr_names = sub { | |
154 | my ($vmid, $devid) = @_; | |
155 | ||
156 | my $fwbr = "fwbr${vmid}i${devid}"; | |
157 | # Note: the firewall use 'fwln+' to filter traffic to VMs | |
158 | my $vethfw = "fwln${vmid}i${devid}"; | |
159 | my $vethfwpeer = "fwpr${vmid}p${devid}"; | |
160 | my $ovsintport = "fwln${vmid}o${devid}"; | |
161 | ||
162 | return ($fwbr, $vethfw, $vethfwpeer, $ovsintport); | |
163 | }; | |
164 | ||
165 | my $cond_create_bridge = sub { | |
166 | my ($bridge) = @_; | |
167 | ||
168 | if (! -d "/sys/class/net/$bridge") { | |
169 | system("/sbin/brctl addbr $bridge") == 0 || | |
170 | die "can't add bridge '$bridge'\n"; | |
171 | } | |
172 | }; | |
173 | ||
174 | my $bridge_add_interface = sub { | |
175 | my ($bridge, $iface, $tag, $trunks) = @_; | |
176 | ||
177 | system("/sbin/brctl addif $bridge $iface") == 0 || | |
178 | die "can't add interface 'iface' to bridge '$bridge'\n"; | |
179 | ||
180 | my $vlan_aware = PVE::Tools::file_read_firstline("/sys/class/net/$bridge/bridge/vlan_filtering"); | |
181 | ||
182 | if ($vlan_aware) { | |
183 | if ($tag) { | |
184 | system({'/sbin/bridge'} 'bridge', 'vlan', 'del', 'dev', $iface, 'vid', '1-4094') == 0 | |
185 | or die "failed to remove default vlan tags of $iface\n"; | |
186 | system({'/sbin/bridge'} 'bridge', 'vlan', 'add', 'dev', $iface, 'vid', $tag, 'pvid', 'untagged') == 0 | |
187 | or die "unable to add vlan $tag to interface $iface\n"; | |
188 | ||
189 | warn "Caution: Setting VLAN ID 1 on a VLAN aware bridge may be dangerous\n" if $tag == 1; | |
190 | } else { | |
191 | system("/sbin/bridge vlan add dev $iface vid 2-4094") == 0 || | |
192 | die "unable to add default vlan tags to interface $iface\n" if !$trunks; | |
193 | } | |
194 | ||
195 | if ($trunks) { | |
196 | my @trunks_array = split /;/, $trunks; | |
197 | foreach my $trunk (@trunks_array) { | |
198 | system("/sbin/bridge vlan add dev $iface vid $trunk") == 0 || | |
199 | die "unable to add vlan $trunk to interface $iface\n"; | |
200 | } | |
201 | } | |
202 | } | |
203 | }; | |
204 | ||
205 | my $ovs_bridge_add_port = sub { | |
206 | my ($bridge, $iface, $tag, $internal, $trunks) = @_; | |
207 | ||
208 | $trunks =~ s/;/,/g if $trunks; | |
209 | ||
210 | my $cmd = "/usr/bin/ovs-vsctl add-port $bridge $iface"; | |
211 | $cmd .= " tag=$tag" if $tag; | |
212 | $cmd .= " trunks=". join(',', $trunks) if $trunks; | |
213 | $cmd .= " vlan_mode=native-untagged" if $tag && $trunks; | |
214 | ||
215 | $cmd .= " -- set Interface $iface type=internal" if $internal; | |
216 | system($cmd) == 0 || | |
217 | die "can't add ovs port '$iface'\n"; | |
218 | }; | |
219 | ||
220 | my $activate_interface = sub { | |
221 | my ($iface) = @_; | |
222 | ||
223 | system("/sbin/ip link set $iface up") == 0 || | |
224 | die "can't activate interface '$iface'\n"; | |
225 | }; | |
226 | ||
227 | sub tap_create { | |
228 | my ($iface, $bridge) = @_; | |
229 | ||
230 | die "unable to get bridge setting\n" if !$bridge; | |
231 | ||
232 | my $bridgemtu = &$read_bridge_mtu($bridge); | |
233 | ||
234 | eval { | |
235 | PVE::Tools::run_command("/sbin/ifconfig $iface 0.0.0.0 promisc up mtu $bridgemtu"); | |
236 | }; | |
237 | die "interface activation failed\n" if $@; | |
238 | } | |
239 | ||
240 | sub veth_create { | |
241 | my ($veth, $vethpeer, $bridge, $mac) = @_; | |
242 | ||
243 | die "unable to get bridge setting\n" if !$bridge; | |
244 | ||
245 | my $bridgemtu = &$read_bridge_mtu($bridge); | |
246 | ||
247 | # create veth pair | |
248 | if (! -d "/sys/class/net/$veth") { | |
249 | my $cmd = "/sbin/ip link add name $veth type veth peer name $vethpeer mtu $bridgemtu"; | |
250 | $cmd .= " addr $mac" if $mac; | |
251 | system($cmd) == 0 || die "can't create interface $veth\n"; | |
252 | } | |
253 | ||
254 | # up vethpair | |
255 | &$activate_interface($veth); | |
256 | &$activate_interface($vethpeer); | |
257 | } | |
258 | ||
259 | sub veth_delete { | |
260 | my ($veth) = @_; | |
261 | ||
262 | if (-d "/sys/class/net/$veth") { | |
263 | run_command("/sbin/ip link delete dev $veth", outfunc => sub {}, errfunc => sub {}); | |
264 | } | |
265 | ||
266 | } | |
267 | ||
268 | my $create_firewall_bridge_linux = sub { | |
269 | my ($iface, $bridge, $tag, $trunks) = @_; | |
270 | ||
271 | my ($vmid, $devid) = &$parse_tap_device_name($iface); | |
272 | my ($fwbr, $vethfw, $vethfwpeer) = &$compute_fwbr_names($vmid, $devid); | |
273 | ||
274 | &$cond_create_bridge($fwbr); | |
275 | &$activate_interface($fwbr); | |
276 | ||
277 | copy_bridge_config($bridge, $fwbr); | |
278 | veth_create($vethfw, $vethfwpeer, $bridge); | |
279 | ||
280 | &$bridge_add_interface($fwbr, $vethfw); | |
281 | &$bridge_add_interface($bridge, $vethfwpeer, $tag, $trunks); | |
282 | ||
283 | &$bridge_add_interface($fwbr, $iface); | |
284 | }; | |
285 | ||
286 | my $create_firewall_bridge_ovs = sub { | |
287 | my ($iface, $bridge, $tag, $trunks) = @_; | |
288 | ||
289 | my ($vmid, $devid) = &$parse_tap_device_name($iface); | |
290 | my ($fwbr, undef, undef, $ovsintport) = &$compute_fwbr_names($vmid, $devid); | |
291 | ||
292 | my $bridgemtu = &$read_bridge_mtu($bridge); | |
293 | ||
294 | &$cond_create_bridge($fwbr); | |
295 | &$activate_interface($fwbr); | |
296 | ||
297 | &$bridge_add_interface($fwbr, $iface); | |
298 | ||
299 | &$ovs_bridge_add_port($bridge, $ovsintport, $tag, 1, $trunks); | |
300 | &$activate_interface($ovsintport); | |
301 | ||
302 | # set the same mtu for ovs int port | |
303 | PVE::Tools::run_command("/sbin/ifconfig $ovsintport mtu $bridgemtu"); | |
304 | ||
305 | &$bridge_add_interface($fwbr, $ovsintport); | |
306 | }; | |
307 | ||
308 | my $cleanup_firewall_bridge = sub { | |
309 | my ($iface) = @_; | |
310 | ||
311 | my ($vmid, $devid) = &$parse_tap_device_name($iface, 1); | |
312 | return if !defined($vmid); | |
313 | my ($fwbr, $vethfw, $vethfwpeer, $ovsintport) = &$compute_fwbr_names($vmid, $devid); | |
314 | ||
315 | # cleanup old port config from any openvswitch bridge | |
316 | if (-d "/sys/class/net/$ovsintport") { | |
317 | run_command("/usr/bin/ovs-vsctl del-port $ovsintport", outfunc => sub {}, errfunc => sub {}); | |
318 | } | |
319 | ||
320 | # delete old vethfw interface | |
321 | veth_delete($vethfw); | |
322 | ||
323 | # cleanup fwbr bridge | |
324 | if (-d "/sys/class/net/$fwbr") { | |
325 | run_command("/sbin/ip link set dev $fwbr down", outfunc => sub {}, errfunc => sub {}); | |
326 | run_command("/sbin/brctl delbr $fwbr", outfunc => sub {}, errfunc => sub {}); | |
327 | } | |
328 | }; | |
329 | ||
330 | sub tap_plug { | |
331 | my ($iface, $bridge, $tag, $firewall, $trunks, $rate) = @_; | |
332 | ||
333 | #cleanup old port config from any openvswitch bridge | |
334 | eval {run_command("/usr/bin/ovs-vsctl del-port $iface", outfunc => sub {}, errfunc => sub {}) }; | |
335 | ||
336 | if (-d "/sys/class/net/$bridge/bridge") { | |
337 | &$cleanup_firewall_bridge($iface); # remove stale devices | |
338 | ||
339 | my $vlan_aware = PVE::Tools::file_read_firstline("/sys/class/net/$bridge/bridge/vlan_filtering"); | |
340 | ||
341 | if (!$vlan_aware) { | |
342 | die "vlan aware feature need to be enabled to use trunks" if $trunks; | |
343 | my $newbridge = activate_bridge_vlan($bridge, $tag); | |
344 | copy_bridge_config($bridge, $newbridge) if $bridge ne $newbridge; | |
345 | $bridge = $newbridge; | |
346 | $tag = undef; | |
347 | } | |
348 | ||
349 | if ($firewall) { | |
350 | &$create_firewall_bridge_linux($iface, $bridge, $tag, $trunks); | |
351 | } else { | |
352 | &$bridge_add_interface($bridge, $iface, $tag, $trunks); | |
353 | } | |
354 | ||
355 | } else { | |
356 | &$cleanup_firewall_bridge($iface); # remove stale devices | |
357 | ||
358 | if ($firewall) { | |
359 | &$create_firewall_bridge_ovs($iface, $bridge, $tag, $trunks); | |
360 | } else { | |
361 | &$ovs_bridge_add_port($bridge, $iface, $tag, undef, $trunks); | |
362 | } | |
363 | } | |
364 | ||
365 | tap_rate_limit($iface, $rate); | |
366 | } | |
367 | ||
368 | sub tap_unplug { | |
369 | my ($iface) = @_; | |
370 | ||
371 | my $path= "/sys/class/net/$iface/brport/bridge"; | |
372 | if (-l $path) { | |
373 | my $bridge = basename(readlink($path)); | |
374 | #avoid insecure dependency; | |
375 | ($bridge) = $bridge =~ /(\S+)/; | |
376 | ||
377 | system("/sbin/brctl delif $bridge $iface") == 0 || | |
378 | die "can't del interface '$iface' from bridge '$bridge'\n"; | |
379 | ||
380 | } | |
381 | ||
382 | &$cleanup_firewall_bridge($iface); | |
383 | #cleanup old port config from any openvswitch bridge | |
384 | eval {run_command("/usr/bin/ovs-vsctl del-port $iface", outfunc => sub {}, errfunc => sub {}) }; | |
385 | } | |
386 | ||
387 | sub copy_bridge_config { | |
388 | my ($br0, $br1) = @_; | |
389 | ||
390 | return if $br0 eq $br1; | |
391 | ||
392 | my $br_configs = [ 'ageing_time', 'stp_state', 'priority', 'forward_delay', | |
393 | 'hello_time', 'max_age', 'multicast_snooping', 'multicast_querier']; | |
394 | ||
395 | foreach my $sysname (@$br_configs) { | |
396 | eval { | |
397 | my $v0 = PVE::Tools::file_read_firstline("/sys/class/net/$br0/bridge/$sysname"); | |
398 | my $v1 = PVE::Tools::file_read_firstline("/sys/class/net/$br1/bridge/$sysname"); | |
399 | if ($v0 ne $v1) { | |
400 | PVE::ProcFSTools::write_proc_entry("/sys/class/net/$br1/bridge/$sysname", $v0); | |
401 | } | |
402 | }; | |
403 | warn $@ if $@; | |
404 | } | |
405 | } | |
406 | ||
407 | sub activate_bridge_vlan_slave { | |
408 | my ($bridgevlan, $iface, $tag) = @_; | |
409 | my $ifacevlan = "${iface}.$tag"; | |
410 | ||
411 | # create vlan on $iface is not already exist | |
412 | if (! -d "/sys/class/net/$ifacevlan") { | |
413 | system("/sbin/ip link add link $iface name ${iface}.${tag} type vlan id $tag") == 0 || | |
414 | die "can't add vlan tag $tag to interface $iface\n"; | |
415 | } | |
416 | ||
417 | # be sure to have the $ifacevlan up | |
418 | &$activate_interface($ifacevlan); | |
419 | ||
420 | # test if $vlaniface is already enslaved in another bridge | |
421 | my $path= "/sys/class/net/$ifacevlan/brport/bridge"; | |
422 | if (-l $path) { | |
423 | my $tbridge = basename(readlink($path)); | |
424 | if ($tbridge ne $bridgevlan) { | |
425 | die "interface $ifacevlan already exist in bridge $tbridge\n"; | |
426 | } else { | |
427 | # Port already attached to bridge: do nothing. | |
428 | return; | |
429 | } | |
430 | } | |
431 | ||
432 | # add $ifacevlan to the bridge | |
433 | &$bridge_add_interface($bridgevlan, $ifacevlan); | |
434 | } | |
435 | ||
436 | sub activate_bridge_vlan { | |
437 | my ($bridge, $tag_param) = @_; | |
438 | ||
439 | die "bridge '$bridge' is not active\n" if ! -d "/sys/class/net/$bridge"; | |
440 | ||
441 | return $bridge if !defined($tag_param); # no vlan, simply return | |
442 | ||
443 | my $tag = int($tag_param); | |
444 | ||
445 | die "got strange vlan tag '$tag_param'\n" if $tag < 1 || $tag > 4094; | |
446 | ||
447 | my $bridgevlan = "${bridge}v$tag"; | |
448 | ||
449 | my @ifaces = (); | |
450 | my $dir = "/sys/class/net/$bridge/brif"; | |
451 | PVE::Tools::dir_glob_foreach($dir, '(((eth|bond)\d+|en[^.]+)(\.\d+)?)', sub { | |
452 | push @ifaces, $_[0]; | |
453 | }); | |
454 | ||
455 | die "no physical interface on bridge '$bridge'\n" if scalar(@ifaces) == 0; | |
456 | ||
457 | # add bridgevlan if it doesn't already exist | |
458 | if (! -d "/sys/class/net/$bridgevlan") { | |
459 | system("/sbin/brctl addbr $bridgevlan") == 0 || | |
460 | die "can't add bridge $bridgevlan\n"; | |
461 | } | |
462 | ||
463 | # for each physical interface (eth or bridge) bind them to bridge vlan | |
464 | foreach my $iface (@ifaces) { | |
465 | activate_bridge_vlan_slave($bridgevlan, $iface, $tag); | |
466 | } | |
467 | ||
468 | #fixme: set other bridge flags | |
469 | ||
470 | # be sure to have the bridge up | |
471 | system("/sbin/ip link set $bridgevlan up") == 0 || | |
472 | die "can't up bridge $bridgevlan\n"; | |
473 | ||
474 | return $bridgevlan; | |
475 | } | |
476 | ||
477 | sub tcp_ping { | |
478 | my ($host, $port, $timeout) = @_; | |
479 | ||
480 | my $refused = 1; | |
481 | ||
482 | $timeout = 3 if !$timeout; # sane default | |
483 | if (!$port) { | |
484 | # Net::Ping defaults to the echo port | |
485 | $port = 7; | |
486 | } else { | |
487 | # Net::Ping's port_number() implies service_check(1) | |
488 | $refused = 0; | |
489 | } | |
490 | ||
491 | my ($sock, $result); | |
492 | eval { | |
493 | $result = PVE::Tools::run_with_timeout($timeout, sub { | |
494 | $sock = IO::Socket::IP->new(PeerHost => $host, PeerPort => $port, Type => SOCK_STREAM); | |
495 | $result = $refused if $! == ECONNREFUSED; | |
496 | }); | |
497 | }; | |
498 | if ($sock) { | |
499 | $sock->close(); | |
500 | $result = 1; | |
501 | } | |
502 | return $result; | |
503 | } | |
504 | ||
505 | sub IP_from_cidr { | |
506 | my ($cidr, $version) = @_; | |
507 | ||
508 | return if $cidr !~ m!^(\S+?)/(\S+)$!; | |
509 | my ($ip, $prefix) = ($1, $2); | |
510 | ||
511 | my $ipobj = Net::IP->new($ip, $version); | |
512 | return if !$ipobj; | |
513 | ||
514 | $version = $ipobj->version(); | |
515 | ||
516 | my $binmask = Net::IP::ip_get_mask($prefix, $version); | |
517 | return if !$binmask; | |
518 | ||
519 | my $masked_binip = $ipobj->binip() & $binmask; | |
520 | my $masked_ip = Net::IP::ip_bintoip($masked_binip, $version); | |
521 | return Net::IP->new("$masked_ip/$prefix"); | |
522 | } | |
523 | ||
524 | sub is_ip_in_cidr { | |
525 | my ($ip, $cidr, $version) = @_; | |
526 | ||
527 | my $cidr_obj = IP_from_cidr($cidr, $version); | |
528 | return undef if !$cidr_obj; | |
529 | ||
530 | my $ip_obj = Net::IP->new($ip, $version); | |
531 | return undef if !$ip_obj; | |
532 | ||
533 | return $cidr_obj->overlaps($ip_obj) == $Net::IP::IP_B_IN_A_OVERLAP; | |
534 | } | |
535 | ||
536 | 1; |