1 package PVE
::Storage
::ZFSPoolPlugin
;
11 use PVE
::RPCEnvironment
;
12 use PVE
::Storage
::Plugin
;
13 use PVE
::Tools
qw(run_command);
15 use base
qw(PVE::Storage::Plugin);
23 content
=> [ {images
=> 1, rootdir
=> 1}, {images
=> 1 , rootdir
=> 1}],
24 format
=> [ { raw
=> 1, subvol
=> 1 } , 'raw' ],
31 description
=> "block size",
35 description
=> "use sparse volumes",
39 description
=> "mount point",
40 type
=> 'string', format
=> 'pve-storage-path',
47 pool
=> { fixed
=> 1 },
48 blocksize
=> { optional
=> 1 },
49 sparse
=> { optional
=> 1 },
50 nodes
=> { optional
=> 1 },
51 disable
=> { optional
=> 1 },
52 content
=> { optional
=> 1 },
53 bwlimit
=> { optional
=> 1 },
54 mountpoint
=> { optional
=> 1 },
58 # static zfs helper methods
60 sub zfs_parse_zvol_list
{
65 return $list if !$text;
67 my @lines = split /\n/, $text;
68 foreach my $line (@lines) {
69 my ($dataset, $size, $origin, $type, $refquota) = split(/\s+/, $line);
70 next if !($type eq 'volume' || $type eq 'filesystem');
73 my @parts = split /\//, $dataset;
74 next if scalar(@parts) < 2; # we need pool/name
75 my $name = pop @parts;
76 my $pool = join('/', @parts);
78 next unless $name =~ m!^(vm|base|subvol|basevol)-(\d+)-(\S+)$!;
81 $zvol->{pool
} = $pool;
82 $zvol->{name
} = $name;
83 if ($type eq 'filesystem') {
84 if ($refquota eq 'none') {
87 $zvol->{size
} = $refquota + 0;
89 $zvol->{format
} = 'subvol';
91 $zvol->{size
} = $size + 0;
92 $zvol->{format
} = 'raw';
94 if ($origin !~ /^-$/) {
95 $zvol->{origin
} = $origin;
104 my ($class, $volname) = @_;
106 if ($volname =~ m/^(((base|basevol)-(\d+)-\S+)\/)?
((base
|basevol
|vm
|subvol
)-(\d
+)-\S
+)$/) {
107 my $format = ($6 eq 'subvol' || $6 eq 'basevol') ?
'subvol' : 'raw';
108 my $isBase = ($6 eq 'base' || $6 eq 'basevol');
109 return ('images', $5, $7, $2, $4, $isBase, $format);
112 die "unable to parse zfs volume name '$volname'\n";
115 # virtual zfs methods (subclass can overwrite them)
118 my ($class, $storeid, $scfg, %param) = @_;
120 my $cfg_mountpoint = $scfg->{mountpoint
};
122 # ignore failure, pool might currently not be imported
125 my $res = $class->zfs_get_properties($scfg, 'mountpoint', $scfg->{pool
}, 1);
126 $mountpoint = PVE
::Storage
::Plugin
::verify_path
($res, 1) if defined($res);
129 if (defined($cfg_mountpoint)) {
130 if (defined($mountpoint) && !($cfg_mountpoint =~ m
|^\Q
$mountpoint\E/?$|)) {
131 warn "warning for $storeid - mountpoint: $cfg_mountpoint " .
132 "does not match current mount point: $mountpoint\n";
135 $scfg->{mountpoint
} = $mountpoint;
142 my ($class, $scfg, $volname, $storeid, $snapname) = @_;
144 my ($vtype, $name, $vmid) = $class->parse_volname($volname);
147 my $mountpoint = $scfg->{mountpoint
} // "/$scfg->{pool}";
149 if ($vtype eq "images") {
150 if ($name =~ m/^subvol-/ || $name =~ m/^basevol-/) {
151 $path = "$mountpoint/$name";
153 $path = "/dev/zvol/$scfg->{pool}/$name";
155 $path .= "\@$snapname" if defined($snapname);
157 die "$vtype is not allowed in ZFSPool!";
160 return ($path, $vmid, $vtype);
164 my ($class, $scfg, $timeout, $method, @params) = @_;
168 if ($method eq 'zpool_list') {
169 push @$cmd, 'zpool', 'list';
170 } elsif ($method eq 'zpool_import') {
171 push @$cmd, 'zpool', 'import';
172 $timeout = 15 if !$timeout || $timeout < 15;
174 push @$cmd, 'zfs', $method;
179 my $output = sub { $msg .= "$_[0]\n" };
181 if (PVE
::RPCEnvironment-
>is_worker()) {
182 $timeout = 60*60 if !$timeout;
183 $timeout = 60*5 if $timeout < 60*5;
185 $timeout = 10 if !$timeout;
188 run_command
($cmd, errmsg
=> "zfs error", outfunc
=> $output, timeout
=> $timeout);
193 sub zfs_wait_for_zvol_link
{
194 my ($class, $scfg, $volname, $timeout) = @_;
196 my $default_timeout = PVE
::RPCEnvironment-
>is_worker() ?
60*5 : 10;
197 $timeout = $default_timeout if !defined($timeout);
199 my ($devname, undef, undef) = $class->path($scfg, $volname);
201 for (my $i = 1; $i <= $timeout; $i++) {
203 die "timeout: no zvol device link for '$volname' found after $timeout sec found.\n"
211 my ($class, $storeid, $scfg, $vmid, $fmt, $name, $size) = @_;
217 die "illegal name '$volname' - should be 'vm-$vmid-*'\n"
218 if $volname && $volname !~ m/^vm-$vmid-/;
219 $volname = $class->find_free_diskname($storeid, $scfg, $vmid, $fmt)
222 $class->zfs_create_zvol($scfg, $volname, $size);
223 $class->zfs_wait_for_zvol_link($scfg, $volname);
225 } elsif ( $fmt eq 'subvol') {
227 die "illegal name '$volname' - should be 'subvol-$vmid-*'\n"
228 if $volname && $volname !~ m/^subvol-$vmid-/;
229 $volname = $class->find_free_diskname($storeid, $scfg, $vmid, $fmt)
232 die "illegal name '$volname' - should be 'subvol-$vmid-*'\n"
233 if $volname !~ m/^subvol-$vmid-/;
235 $class->zfs_create_subvol($scfg, $volname, $size);
238 die "unsupported format '$fmt'";
245 my ($class, $storeid, $scfg, $volname, $isBase) = @_;
247 my (undef, $name, undef) = $class->parse_volname($volname);
249 $class->zfs_delete_zvol($scfg, $name);
255 my ($class, $storeid, $scfg, $vmid, $vollist, $cache) = @_;
257 $cache->{zfs
} = $class->zfs_list_zvol($scfg) if !$cache->{zfs
};
258 my $zfspool = $scfg->{pool
};
261 if (my $dat = $cache->{zfs
}->{$zfspool}) {
263 foreach my $image (keys %$dat) {
265 my $info = $dat->{$image};
267 my $volname = $info->{name
};
268 my $parent = $info->{parent
};
269 my $owner = $info->{vmid
};
271 if ($parent && $parent =~ m/^(\S+)\@__base__$/) {
272 my ($basename) = ($1);
273 $info->{volid
} = "$storeid:$basename/$volname";
275 $info->{volid
} = "$storeid:$volname";
279 my $found = grep { $_ eq $info->{volid
} } @$vollist;
282 next if defined ($vmid) && ($owner ne $vmid);
291 sub zfs_get_properties
{
292 my ($class, $scfg, $properties, $dataset, $timeout) = @_;
294 my $result = $class->zfs_request($scfg, $timeout, 'get', '-o', 'value',
295 '-Hp', $properties, $dataset);
296 my @values = split /\n/, $result;
297 return wantarray ?
@values : $values[0];
300 sub zfs_get_pool_stats
{
301 my ($class, $scfg) = @_;
306 my @lines = $class->zfs_get_properties($scfg, 'available,used', $scfg->{pool
});
308 if($lines[0] =~ /^(\d+)$/) {
312 if($lines[1] =~ /^(\d+)$/) {
316 return ($available, $used);
319 sub zfs_create_zvol
{
320 my ($class, $scfg, $zvol, $size) = @_;
322 # always align size to 1M as workaround until
323 # https://github.com/zfsonlinux/zfs/issues/8541 is solved
324 my $padding = (1024 - $size % 1024) % 1024;
325 $size = $size + $padding;
327 my $cmd = ['create'];
329 push @$cmd, '-s' if $scfg->{sparse
};
331 push @$cmd, '-b', $scfg->{blocksize
} if $scfg->{blocksize
};
333 push @$cmd, '-V', "${size}k", "$scfg->{pool}/$zvol";
335 $class->zfs_request($scfg, undef, @$cmd);
338 sub zfs_create_subvol
{
339 my ($class, $scfg, $volname, $size) = @_;
341 my $dataset = "$scfg->{pool}/$volname";
342 my $quota = $size ?
"${size}k" : "none";
344 my $cmd = ['create', '-o', 'acltype=posixacl', '-o', 'xattr=sa',
345 '-o', "refquota=${quota}", $dataset];
347 $class->zfs_request($scfg, undef, @$cmd);
350 sub zfs_delete_zvol
{
351 my ($class, $scfg, $zvol) = @_;
355 for (my $i = 0; $i < 6; $i++) {
357 eval { $class->zfs_request($scfg, undef, 'destroy', '-r', "$scfg->{pool}/$zvol"); };
359 if ($err =~ m/^zfs error:(.*): dataset is busy.*/) {
361 } elsif ($err =~ m/^zfs error:.*: dataset does not exist.*$/) {
376 my ($class, $scfg) = @_;
378 my $text = $class->zfs_request($scfg, 10, 'list', '-o', 'name,volsize,origin,type,refquota', '-t', 'volume,filesystem', '-Hrp');
379 my $zvols = zfs_parse_zvol_list
($text);
380 return undef if !$zvols;
383 foreach my $zvol (@$zvols) {
384 my $pool = $zvol->{pool
};
385 my $name = $zvol->{name
};
386 my $parent = $zvol->{origin
};
387 if($zvol->{origin
} && $zvol->{origin
} =~ m/^$scfg->{pool}\/(\S
+)$/){
391 $list->{$pool}->{$name} = {
393 size
=> $zvol->{size
},
395 format
=> $zvol->{format
},
396 vmid
=> $zvol->{owner
},
403 sub zfs_get_sorted_snapshot_list
{
404 my ($class, $scfg, $volname, $sort_params) = @_;
406 my @params = ('-H', '-r', '-t', 'snapshot', '-o', 'name', $sort_params->@*);
408 my $vname = ($class->parse_volname($volname))[1];
409 push @params, "$scfg->{pool}\/$vname";
411 my $text = $class->zfs_request($scfg, undef, 'list', @params);
412 my @snapshots = split(/\n/, $text);
415 for my $snapshot (@snapshots) {
416 (my $snap_name = $snapshot) =~ s/^.*@//;
417 push $snap_names->@*, $snap_name;
423 my ($class, $storeid, $scfg, $cache) = @_;
431 ($free, $used) = $class->zfs_get_pool_stats($scfg);
433 $total = $free + $used;
437 return ($total, $free, $used, $active);
440 sub volume_size_info
{
441 my ($class, $scfg, $storeid, $volname, $timeout) = @_;
443 my (undef, $vname, undef, undef, undef, undef, $format) =
444 $class->parse_volname($volname);
446 my $attr = $format eq 'subvol' ?
'refquota' : 'volsize';
447 my $value = $class->zfs_get_properties($scfg, $attr, "$scfg->{pool}/$vname");
448 if ($value =~ /^(\d+)$/) {
452 die "Could not get zfs volume size\n";
455 sub volume_snapshot
{
456 my ($class, $scfg, $storeid, $volname, $snap) = @_;
458 my $vname = ($class->parse_volname($volname))[1];
460 $class->zfs_request($scfg, undef, 'snapshot', "$scfg->{pool}/$vname\@$snap");
463 sub volume_snapshot_delete
{
464 my ($class, $scfg, $storeid, $volname, $snap, $running) = @_;
466 my $vname = ($class->parse_volname($volname))[1];
468 $class->deactivate_volume($storeid, $scfg, $vname, $snap, {});
469 $class->zfs_request($scfg, undef, 'destroy', "$scfg->{pool}/$vname\@$snap");
472 sub volume_snapshot_rollback
{
473 my ($class, $scfg, $storeid, $volname, $snap) = @_;
475 my (undef, $vname, undef, undef, undef, undef, $format) = $class->parse_volname($volname);
477 my $msg = $class->zfs_request($scfg, undef, 'rollback', "$scfg->{pool}/$vname\@$snap");
479 # we have to unmount rollbacked subvols, to invalidate wrong kernel
480 # caches, they get mounted in activate volume again
481 # see zfs bug #10931 https://github.com/openzfs/zfs/issues/10931
482 if ($format eq 'subvol') {
483 eval { $class->zfs_request($scfg, undef, 'unmount', "$scfg->{pool}/$vname"); };
485 die $err if $err !~ m/not currently mounted$/;
492 sub volume_rollback_is_possible
{
493 my ($class, $scfg, $storeid, $volname, $snap, $blockers) = @_;
495 # can't use '-S creation', because zfs list won't reverse the order when the
496 # creation time is the same second, breaking at least our tests.
497 my $snapshots = $class->zfs_get_sorted_snapshot_list($scfg, $volname, ['-s', 'creation']);
500 $blockers //= []; # not guaranteed to be set by caller
501 for my $snapshot ($snapshots->@*) {
502 if ($snapshot eq $snap) {
505 push $blockers->@*, $snapshot;
509 my $volid = "${storeid}:${volname}";
511 die "can't rollback, snapshot '$snap' does not exist on '$volid'\n"
514 die "can't rollback, '$snap' is not most recent snapshot on '$volid'\n"
515 if scalar($blockers->@*) > 0;
520 sub volume_snapshot_info
{
521 my ($class, $scfg, $storeid, $volname) = @_;
523 my @params = ('-Hp', '-r', '-t', 'snapshot', '-o', 'name,guid,creation');
525 my $vname = ($class->parse_volname($volname))[1];
526 push @params, "$scfg->{pool}\/$vname";
528 my $text = $class->zfs_request($scfg, undef, 'list', @params);
529 my @lines = split(/\n/, $text);
532 for my $line (@lines) {
533 my ($snapshot, $guid, $creation) = split(/\s+/, $line);
534 (my $snap_name = $snapshot) =~ s/^.*@//;
536 $info->{$snap_name} = {
538 timestamp
=> $creation,
544 my sub dataset_mounted_heuristic
{
547 my $mounts = PVE
::ProcFSTools
::parse_proc_mounts
();
548 for my $mp (@$mounts) {
549 my ($what, $dir, $fs) = $mp->@*;
550 next if $fs ne 'zfs';
551 # check for root-dataset or any child-dataset (root-dataset could have 'canmount=off')
552 # If any child is mounted heuristically assume that `zfs mount -a` was successful
553 next if $what !~ m!^$dataset(?:/|$)!;
559 sub activate_storage
{
560 my ($class, $storeid, $scfg, $cache) = @_;
562 # Note: $scfg->{pool} can include dataset <pool>/<dataset>
563 my $dataset = $scfg->{pool
};
564 my $pool = ($dataset =~ s!/.*$!!r);
566 return 1 if dataset_mounted_heuristic
($dataset); # early return
568 my $pool_imported = sub {
569 my @param = ('-o', 'name', '-H', $pool);
570 my $res = eval { $class->zfs_request($scfg, undef, 'zpool_list', @param) };
573 return defined($res) && $res =~ m/$pool/;
576 if (!$pool_imported->()) {
577 # import can only be done if not yet imported!
578 my @param = ('-d', '/dev/disk/by-id/', '-o', 'cachefile=none', $pool);
579 eval { $class->zfs_request($scfg, undef, 'zpool_import', @param) };
581 # just could've raced with another import, so recheck if it is imported
582 die "could not activate storage '$storeid', $err\n" if !$pool_imported->();
585 eval { $class->zfs_request($scfg, undef, 'mount', '-a') };
586 die "could not activate storage '$storeid', $@\n" if $@;
590 sub deactivate_storage
{
591 my ($class, $storeid, $scfg, $cache) = @_;
595 sub activate_volume
{
596 my ($class, $storeid, $scfg, $volname, $snapname, $cache) = @_;
598 return 1 if defined($snapname);
600 my (undef, $dataset, undef, undef, undef, undef, $format) = $class->parse_volname($volname);
602 if ($format eq 'raw') {
603 $class->zfs_wait_for_zvol_link($scfg, $volname);
604 } elsif ($format eq 'subvol') {
605 my $mounted = $class->zfs_get_properties($scfg, 'mounted', "$scfg->{pool}/$dataset");
606 if ($mounted !~ m/^yes$/) {
607 $class->zfs_request($scfg, undef, 'mount', "$scfg->{pool}/$dataset");
614 sub deactivate_volume
{
615 my ($class, $storeid, $scfg, $volname, $snapname, $cache) = @_;
620 my ($class, $scfg, $storeid, $volname, $vmid, $snap) = @_;
622 $snap ||= '__base__';
624 my ($vtype, $basename, $basevmid, undef, undef, $isBase, $format) =
625 $class->parse_volname($volname);
627 die "clone_image only works on base images\n" if !$isBase;
629 my $name = $class->find_free_diskname($storeid, $scfg, $vmid, $format);
631 if ($format eq 'subvol') {
632 my $size = $class->zfs_request($scfg, undef, 'list', '-Hp', '-o', 'refquota', "$scfg->{pool}/$basename");
634 $class->zfs_request($scfg, undef, 'clone', "$scfg->{pool}/$basename\@$snap", "$scfg->{pool}/$name", '-o', "refquota=$size");
636 $class->zfs_request($scfg, undef, 'clone', "$scfg->{pool}/$basename\@$snap", "$scfg->{pool}/$name");
639 return "$basename/$name";
643 my ($class, $storeid, $scfg, $volname) = @_;
645 my $snap = '__base__';
647 my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) =
648 $class->parse_volname($volname);
650 die "create_base not possible with base image\n" if $isBase;
653 if ( $format eq 'subvol' ) {
654 $newname =~ s/^subvol-/basevol-/;
656 $newname =~ s/^vm-/base-/;
658 my $newvolname = $basename ?
"$basename/$newname" : "$newname";
660 $class->zfs_request($scfg, undef, 'rename', "$scfg->{pool}/$name", "$scfg->{pool}/$newname");
662 my $running = undef; #fixme : is create_base always offline ?
664 $class->volume_snapshot($scfg, $storeid, $newname, $snap, $running);
670 my ($class, $scfg, $storeid, $volname, $size, $running) = @_;
672 my $new_size = int($size/1024);
674 my (undef, $vname, undef, undef, undef, undef, $format) =
675 $class->parse_volname($volname);
677 my $attr = $format eq 'subvol' ?
'refquota' : 'volsize';
679 # align size to 1M so we always have a valid multiple of the volume block size
680 if ($format eq 'raw') {
681 my $padding = (1024 - $new_size % 1024) % 1024;
682 $new_size = $new_size + $padding;
685 $class->zfs_request($scfg, undef, 'set', "$attr=${new_size}k", "$scfg->{pool}/$vname");
690 sub storage_can_replicate
{
691 my ($class, $scfg, $storeid, $format) = @_;
693 return 1 if $format eq 'raw' || $format eq 'subvol';
698 sub volume_has_feature
{
699 my ($class, $scfg, $feature, $storeid, $volname, $snapname, $running) = @_;
702 snapshot
=> { current
=> 1, snap
=> 1},
703 clone
=> { base
=> 1},
704 template
=> { current
=> 1},
705 copy
=> { base
=> 1, current
=> 1},
706 sparseinit
=> { base
=> 1, current
=> 1},
707 replicate
=> { base
=> 1, current
=> 1},
708 rename => {current
=> 1},
711 my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) =
712 $class->parse_volname($volname);
719 $key = $isBase ?
'base' : 'current';
722 return 1 if $features->{$feature}->{$key};
728 my ($class, $scfg, $storeid, $fh, $volname, $format, $snapshot, $base_snapshot, $with_snapshots) = @_;
730 die "unsupported export stream format for $class: $format\n"
733 die "$class storage can only export snapshots\n"
734 if !defined($snapshot);
736 my $dataset = ($class->parse_volname($volname))[1];
738 my $fd = fileno($fh);
739 die "internal error: invalid file handle for volume_export\n"
743 # For zfs we always create a replication stream (-R) which means the remote
744 # side will always delete non-existing source snapshots. This should work
745 # for all our use cases.
746 my $cmd = ['zfs', 'send', '-Rpv'];
747 if (defined($base_snapshot)) {
748 my $arg = $with_snapshots ?
'-I' : '-i';
749 push @$cmd, $arg, $base_snapshot;
751 push @$cmd, '--', "$scfg->{pool}/$dataset\@$snapshot";
753 run_command
($cmd, output
=> $fd);
758 sub volume_export_formats
{
759 my ($class, $scfg, $storeid, $volname, $snapshot, $base_snapshot, $with_snapshots) = @_;
761 my @formats = ('zfs');
763 # push @formats, 'fies' if $volname !~ /^(?:basevol|subvol)-/;
764 # push @formats, 'raw' if !$base_snapshot && !$with_snapshots;
769 my ($class, $scfg, $storeid, $fh, $volname, $format, $snapshot, $base_snapshot, $with_snapshots, $allow_rename) = @_;
771 die "unsupported import stream format for $class: $format\n"
774 my $fd = fileno($fh);
775 die "internal error: invalid file handle for volume_import\n"
778 my (undef, $dataset, $vmid, undef, undef, undef, $volume_format) =
779 $class->parse_volname($volname);
781 my $zfspath = "$scfg->{pool}/$dataset";
782 my $suffix = defined($base_snapshot) ?
"\@$base_snapshot" : '';
783 my $exists = 0 == run_command
(['zfs', 'get', '-H', 'name', $zfspath.$suffix],
784 noerr
=> 1, quiet
=> 1);
785 if (defined($base_snapshot)) {
786 die "base snapshot '$zfspath\@$base_snapshot' doesn't exist\n" if !$exists;
788 die "volume '$zfspath' already exists\n" if !$allow_rename;
789 warn "volume '$zfspath' already exists - importing with a different name\n";
790 $dataset = $class->find_free_diskname($storeid, $scfg, $vmid, $volume_format);
791 $zfspath = "$scfg->{pool}/$dataset";
794 eval { run_command
(['zfs', 'recv', '-F', '--', $zfspath], input
=> "<&$fd") };
796 if (defined($base_snapshot)) {
797 eval { run_command
(['zfs', 'rollback', '-r', '--', "$zfspath\@$base_snapshot"]) };
799 eval { run_command
(['zfs', 'destroy', '-r', '--', $zfspath]) };
804 return "$storeid:$dataset";
807 sub volume_import_formats
{
808 my ($class, $scfg, $storeid, $volname, $snapshot, $base_snapshot, $with_snapshots) = @_;
810 return $class->volume_export_formats($scfg, $storeid, $volname, $snapshot, $base_snapshot, $with_snapshots);
814 my ($class, $scfg, $storeid, $source_volname, $target_vmid, $target_volname) = @_;
824 ) = $class->parse_volname($source_volname);
825 $target_volname = $class->find_free_diskname($storeid, $scfg, $target_vmid, $format)
828 my $pool = $scfg->{pool
};
829 my $source_zfspath = "${pool}/${source_image}";
830 my $target_zfspath = "${pool}/${target_volname}";
832 my $exists = 0 == run_command
(['zfs', 'get', '-H', 'name', $target_zfspath],
833 noerr
=> 1, quiet
=> 1);
834 die "target volume '${target_volname}' already exists\n" if $exists;
836 $class->zfs_request($scfg, 5, 'rename', ${source_zfspath
}, ${target_zfspath
});
838 $base_name = $base_name ?
"${base_name}/" : '';
840 return "${storeid}:${base_name}${target_volname}";