]> git.proxmox.com Git - pve-storage.git/blob - PVE/Storage/ZFSPoolPlugin.pm
Diskmanage: allow get_disks to take multiple disks
[pve-storage.git] / PVE / Storage / ZFSPoolPlugin.pm
1 package PVE::Storage::ZFSPoolPlugin;
2
3 use strict;
4 use warnings;
5 use IO::File;
6 use POSIX;
7 use PVE::Tools qw(run_command);
8 use PVE::Storage::Plugin;
9 use PVE::RPCEnvironment;
10 use Net::IP;
11
12 use base qw(PVE::Storage::Plugin);
13
14 sub type {
15 return 'zfspool';
16 }
17
18 sub plugindata {
19 return {
20 content => [ {images => 1, rootdir => 1}, {images => 1 , rootdir => 1}],
21 format => [ { raw => 1, subvol => 1 } , 'raw' ],
22 };
23 }
24
25 sub properties {
26 return {
27 blocksize => {
28 description => "block size",
29 type => 'string',
30 },
31 sparse => {
32 description => "use sparse volumes",
33 type => 'boolean',
34 },
35 };
36 }
37
38 sub options {
39 return {
40 pool => { fixed => 1 },
41 blocksize => { optional => 1 },
42 sparse => { optional => 1 },
43 nodes => { optional => 1 },
44 disable => { optional => 1 },
45 content => { optional => 1 },
46 bwlimit => { optional => 1 },
47 };
48 }
49
50 # static zfs helper methods
51
52 sub zfs_parse_size {
53 my ($text) = @_;
54
55 return 0 if !$text;
56
57 if ($text =~ m/^(\d+(\.\d+)?)([TGMK])?$/) {
58
59 my ($size, $reminder, $unit) = ($1, $2, $3);
60
61 if ($unit) {
62 if ($unit eq 'K') {
63 $size *= 1024;
64 } elsif ($unit eq 'M') {
65 $size *= 1024*1024;
66 } elsif ($unit eq 'G') {
67 $size *= 1024*1024*1024;
68 } elsif ($unit eq 'T') {
69 $size *= 1024*1024*1024*1024;
70 } else {
71 die "got unknown zfs size unit '$unit'\n";
72 }
73 }
74
75 if ($reminder) {
76 $size = ceil($size);
77 }
78
79 return $size;
80
81 }
82
83 warn "unable to parse zfs size '$text'\n";
84
85 return 0;
86 }
87
88 sub zfs_parse_zvol_list {
89 my ($text) = @_;
90
91 my $list = ();
92
93 return $list if !$text;
94
95 my @lines = split /\n/, $text;
96 foreach my $line (@lines) {
97 my ($dataset, $size, $origin, $type, $refquota) = split(/\s+/, $line);
98 next if !($type eq 'volume' || $type eq 'filesystem');
99
100 my $zvol = {};
101 my @parts = split /\//, $dataset;
102 next if scalar(@parts) < 2; # we need pool/name
103 my $name = pop @parts;
104 my $pool = join('/', @parts);
105
106 next unless $name =~ m!^(vm|base|subvol|basevol)-(\d+)-(\S+)$!;
107 $zvol->{owner} = $2;
108
109 $zvol->{pool} = $pool;
110 $zvol->{name} = $name;
111 if ($type eq 'filesystem') {
112 if ($refquota eq 'none') {
113 $zvol->{size} = 0;
114 } else {
115 $zvol->{size} = zfs_parse_size($refquota);
116 }
117 $zvol->{format} = 'subvol';
118 } else {
119 $zvol->{size} = zfs_parse_size($size);
120 $zvol->{format} = 'raw';
121 }
122 if ($origin !~ /^-$/) {
123 $zvol->{origin} = $origin;
124 }
125 push @$list, $zvol;
126 }
127
128 return $list;
129 }
130
131 sub parse_volname {
132 my ($class, $volname) = @_;
133
134 if ($volname =~ m/^(((base|basevol)-(\d+)-\S+)\/)?((base|basevol|vm|subvol)-(\d+)-\S+)$/) {
135 my $format = ($6 eq 'subvol' || $6 eq 'basevol') ? 'subvol' : 'raw';
136 my $isBase = ($6 eq 'base' || $6 eq 'basevol');
137 return ('images', $5, $7, $2, $4, $isBase, $format);
138 }
139
140 die "unable to parse zfs volume name '$volname'\n";
141 }
142
143 # virtual zfs methods (subclass can overwrite them)
144
145 sub path {
146 my ($class, $scfg, $volname, $storeid, $snapname) = @_;
147
148 my ($vtype, $name, $vmid) = $class->parse_volname($volname);
149
150 my $path = '';
151
152 if ($vtype eq "images") {
153 if ($name =~ m/^subvol-/ || $name =~ m/^basevol-/) {
154 # fixme: we currently assume standard mount point?!
155 $path = "/$scfg->{pool}/$name";
156 } else {
157 $path = "/dev/zvol/$scfg->{pool}/$name";
158 }
159 $path .= "\@$snapname" if defined($snapname);
160 } else {
161 die "$vtype is not allowed in ZFSPool!";
162 }
163
164 return ($path, $vmid, $vtype);
165 }
166
167 sub zfs_request {
168 my ($class, $scfg, $timeout, $method, @params) = @_;
169
170 my $cmd = [];
171
172 if ($method eq 'zpool_list') {
173 push @$cmd, 'zpool', 'list';
174 } elsif ($method eq 'zpool_import') {
175 push @$cmd, 'zpool', 'import';
176 $timeout = 15 if !$timeout || $timeout < 15;
177 } else {
178 push @$cmd, 'zfs', $method;
179 }
180 push @$cmd, @params;
181
182 my $msg = '';
183 my $output = sub { $msg .= "$_[0]\n" };
184
185 $timeout = PVE::RPCEnvironment->is_worker() ? 60*60 : 5 if !$timeout;
186
187 run_command($cmd, errmsg => "zfs error", outfunc => $output, timeout => $timeout);
188
189 return $msg;
190 }
191
192 sub alloc_image {
193 my ($class, $storeid, $scfg, $vmid, $fmt, $name, $size) = @_;
194
195 my $volname = $name;
196
197 if ($fmt eq 'raw') {
198
199 die "illegal name '$volname' - should be 'vm-$vmid-*'\n"
200 if $volname && $volname !~ m/^vm-$vmid-/;
201 $volname = $class->zfs_find_free_diskname($storeid, $scfg, $vmid, $fmt)
202 if !$volname;
203
204 $class->zfs_create_zvol($scfg, $volname, $size);
205 my $devname = "/dev/zvol/$scfg->{pool}/$volname";
206
207 my $timeout = PVE::RPCEnvironment->is_worker() ? 60*5 : 10;
208 for (my $i = 1; $i <= $timeout; $i++) {
209 last if -b $devname;
210 die "Timeout: no zvol after $timeout sec found.\n"
211 if $i == $timeout;
212
213 sleep(1);
214 }
215 } elsif ( $fmt eq 'subvol') {
216
217 die "illegal name '$volname' - should be 'subvol-$vmid-*'\n"
218 if $volname && $volname !~ m/^subvol-$vmid-/;
219 $volname = $class->zfs_find_free_diskname($storeid, $scfg, $vmid, $fmt)
220 if !$volname;
221
222 die "illegal name '$volname' - should be 'subvol-$vmid-*'\n"
223 if $volname !~ m/^subvol-$vmid-/;
224
225 $class->zfs_create_subvol($scfg, $volname, $size);
226
227 } else {
228 die "unsupported format '$fmt'";
229 }
230
231 return $volname;
232 }
233
234 sub free_image {
235 my ($class, $storeid, $scfg, $volname, $isBase) = @_;
236
237 my (undef, $name, undef) = $class->parse_volname($volname);
238
239 $class->zfs_delete_zvol($scfg, $name);
240
241 return undef;
242 }
243
244 sub list_images {
245 my ($class, $storeid, $scfg, $vmid, $vollist, $cache) = @_;
246
247 $cache->{zfs} = $class->zfs_list_zvol($scfg) if !$cache->{zfs};
248 my $zfspool = $scfg->{pool};
249 my $res = [];
250
251 if (my $dat = $cache->{zfs}->{$zfspool}) {
252
253 foreach my $image (keys %$dat) {
254
255 my $info = $dat->{$image};
256
257 my $volname = $info->{name};
258 my $parent = $info->{parent};
259 my $owner = $info->{vmid};
260
261 if ($parent && $parent =~ m/^(\S+)\@__base__$/) {
262 my ($basename) = ($1);
263 $info->{volid} = "$storeid:$basename/$volname";
264 } else {
265 $info->{volid} = "$storeid:$volname";
266 }
267
268 if ($vollist) {
269 my $found = grep { $_ eq $info->{volid} } @$vollist;
270 next if !$found;
271 } else {
272 next if defined ($vmid) && ($owner ne $vmid);
273 }
274
275 push @$res, $info;
276 }
277 }
278 return $res;
279 }
280
281 sub zfs_get_pool_stats {
282 my ($class, $scfg) = @_;
283
284 my $available = 0;
285 my $used = 0;
286
287 my $text = $class->zfs_request($scfg, undef, 'get', '-o', 'value', '-Hp',
288 'available,used', $scfg->{pool});
289
290 my @lines = split /\n/, $text;
291
292 if($lines[0] =~ /^(\d+)$/) {
293 $available = $1;
294 }
295
296 if($lines[1] =~ /^(\d+)$/) {
297 $used = $1;
298 }
299
300 return ($available, $used);
301 }
302
303 sub zfs_create_zvol {
304 my ($class, $scfg, $zvol, $size) = @_;
305
306 # always align size to 1M as workaround until
307 # https://github.com/zfsonlinux/zfs/issues/8541 is solved
308 my $padding = (1024 - $size % 1024) % 1024;
309 $size = $size + $padding;
310
311 my $cmd = ['create'];
312
313 push @$cmd, '-s' if $scfg->{sparse};
314
315 push @$cmd, '-b', $scfg->{blocksize} if $scfg->{blocksize};
316
317 push @$cmd, '-V', "${size}k", "$scfg->{pool}/$zvol";
318
319 $class->zfs_request($scfg, undef, @$cmd);
320 }
321
322 sub zfs_create_subvol {
323 my ($class, $scfg, $volname, $size) = @_;
324
325 my $dataset = "$scfg->{pool}/$volname";
326
327 my $cmd = ['create', '-o', 'acltype=posixacl', '-o', 'xattr=sa',
328 '-o', "refquota=${size}k", $dataset];
329
330 $class->zfs_request($scfg, undef, @$cmd);
331 }
332
333 sub zfs_delete_zvol {
334 my ($class, $scfg, $zvol) = @_;
335
336 my $err;
337
338 for (my $i = 0; $i < 6; $i++) {
339
340 eval { $class->zfs_request($scfg, undef, 'destroy', '-r', "$scfg->{pool}/$zvol"); };
341 if ($err = $@) {
342 if ($err =~ m/^zfs error:(.*): dataset is busy.*/) {
343 sleep(1);
344 } elsif ($err =~ m/^zfs error:.*: dataset does not exist.*$/) {
345 $err = undef;
346 last;
347 } else {
348 die $err;
349 }
350 } else {
351 last;
352 }
353 }
354
355 die $err if $err;
356 }
357
358 sub zfs_list_zvol {
359 my ($class, $scfg) = @_;
360
361 my $text = $class->zfs_request($scfg, 10, 'list', '-o', 'name,volsize,origin,type,refquota', '-t', 'volume,filesystem', '-Hr');
362 my $zvols = zfs_parse_zvol_list($text);
363 return undef if !$zvols;
364
365 my $list = ();
366 foreach my $zvol (@$zvols) {
367 my $pool = $zvol->{pool};
368 my $name = $zvol->{name};
369 my $parent = $zvol->{origin};
370 if($zvol->{origin} && $zvol->{origin} =~ m/^$scfg->{pool}\/(\S+)$/){
371 $parent = $1;
372 }
373
374 $list->{$pool}->{$name} = {
375 name => $name,
376 size => $zvol->{size},
377 parent => $parent,
378 format => $zvol->{format},
379 vmid => $zvol->{owner},
380 };
381 }
382
383 return $list;
384 }
385
386 sub zfs_find_free_diskname {
387 my ($class, $storeid, $scfg, $vmid, $format) = @_;
388
389 my $volumes = $class->zfs_list_zvol($scfg);
390 my $dat = $volumes->{$scfg->{pool}};
391
392 my $disk_list = [ keys %$dat ];
393 return PVE::Storage::Plugin::get_next_vm_diskname($disk_list, $storeid, $vmid, $format, $scfg);
394 }
395
396 sub zfs_get_latest_snapshot {
397 my ($class, $scfg, $volname) = @_;
398
399 my $vname = ($class->parse_volname($volname))[1];
400
401 # abort rollback if snapshot is not the latest
402 my @params = ('-t', 'snapshot', '-o', 'name', '-s', 'creation');
403 my $text = $class->zfs_request($scfg, undef, 'list', @params);
404 my @snapshots = split(/\n/, $text);
405
406 my $recentsnap;
407 foreach (@snapshots) {
408 if (/$scfg->{pool}\/$vname/) {
409 s/^.*@//;
410 $recentsnap = $_;
411 }
412 }
413
414 return $recentsnap;
415 }
416
417 sub status {
418 my ($class, $storeid, $scfg, $cache) = @_;
419
420 my $total = 0;
421 my $free = 0;
422 my $used = 0;
423 my $active = 0;
424
425 eval {
426 ($free, $used) = $class->zfs_get_pool_stats($scfg);
427 $active = 1;
428 $total = $free + $used;
429 };
430 warn $@ if $@;
431
432 return ($total, $free, $used, $active);
433 }
434
435 sub volume_size_info {
436 my ($class, $scfg, $storeid, $volname, $timeout) = @_;
437
438 my (undef, $vname, undef, undef, undef, undef, $format) =
439 $class->parse_volname($volname);
440
441 my $attr = $format eq 'subvol' ? 'refquota' : 'volsize';
442 my $text = $class->zfs_request($scfg, undef, 'get', '-Hp', $attr, "$scfg->{pool}/$vname");
443 if ($text =~ /\s$attr\s(\d+)\s/) {
444 return $1;
445 }
446
447 die "Could not get zfs volume size\n";
448 }
449
450 sub volume_snapshot {
451 my ($class, $scfg, $storeid, $volname, $snap) = @_;
452
453 my $vname = ($class->parse_volname($volname))[1];
454
455 $class->zfs_request($scfg, undef, 'snapshot', "$scfg->{pool}/$vname\@$snap");
456 }
457
458 sub volume_snapshot_delete {
459 my ($class, $scfg, $storeid, $volname, $snap, $running) = @_;
460
461 my $vname = ($class->parse_volname($volname))[1];
462
463 $class->deactivate_volume($storeid, $scfg, $vname, $snap, {});
464 $class->zfs_request($scfg, undef, 'destroy', "$scfg->{pool}/$vname\@$snap");
465 }
466
467 sub volume_snapshot_rollback {
468 my ($class, $scfg, $storeid, $volname, $snap) = @_;
469
470 my $vname = ($class->parse_volname($volname))[1];
471
472 $class->zfs_request($scfg, undef, 'rollback', "$scfg->{pool}/$vname\@$snap");
473 }
474
475 sub volume_rollback_is_possible {
476 my ($class, $scfg, $storeid, $volname, $snap) = @_;
477
478 my $recentsnap = $class->zfs_get_latest_snapshot($scfg, $volname);
479 if ($snap ne $recentsnap) {
480 die "can't rollback, more recent snapshots exist\n";
481 }
482
483 return 1;
484 }
485
486 sub volume_snapshot_list {
487 my ($class, $scfg, $storeid, $volname) = @_;
488
489 my ($vtype, $name, $vmid) = $class->parse_volname($volname);
490
491 my $zpath = "$scfg->{pool}/$name";
492
493 my $snaps = [];
494
495 my $cmd = ['zfs', 'list', '-r', '-H', '-S', 'name', '-t', 'snap', '-o',
496 'name', $zpath];
497
498 my $outfunc = sub {
499 my $line = shift;
500
501 if ($line =~ m/^\Q$zpath\E@(.*)$/) {
502 push @$snaps, $1;
503 }
504 };
505
506 eval { run_command( [$cmd], outfunc => $outfunc , errfunc => sub{}); };
507
508 # return an empty array if dataset does not exist.
509 return $snaps;
510 }
511
512 sub activate_storage {
513 my ($class, $storeid, $scfg, $cache) = @_;
514
515 # Note: $scfg->{pool} can include dataset <pool>/<dataset>
516 my $pool = $scfg->{pool};
517 $pool =~ s!/.*$!!;
518
519 my $pool_imported = sub {
520 my @param = ('-o', 'name', '-H', "$pool");
521 my $res = eval { $class->zfs_request($scfg, undef, 'zpool_list', @param) };
522 if ($@) {
523 warn "$@\n";
524 return undef;
525 }
526 return defined($res) && $res =~ m/$pool/;
527 };
528
529 if (!$pool_imported->()) {
530 # import can only be done if not yet imported!
531 my @param = ('-d', '/dev/disk/by-id/', '-o', 'cachefile=none', "$pool");
532 eval { $class->zfs_request($scfg, undef, 'zpool_import', @param) };
533 if (my $err = $@) {
534 # just could've raced with another import, so recheck if it is imported
535 die "could not activate storage '$storeid', $@\n" if !$pool_imported->();
536 }
537 }
538 return 1;
539 }
540
541 sub deactivate_storage {
542 my ($class, $storeid, $scfg, $cache) = @_;
543 return 1;
544 }
545
546 sub activate_volume {
547 my ($class, $storeid, $scfg, $volname, $snapname, $cache) = @_;
548 return 1;
549 }
550
551 sub deactivate_volume {
552 my ($class, $storeid, $scfg, $volname, $snapname, $cache) = @_;
553 return 1;
554 }
555
556 sub clone_image {
557 my ($class, $scfg, $storeid, $volname, $vmid, $snap) = @_;
558
559 $snap ||= '__base__';
560
561 my ($vtype, $basename, $basevmid, undef, undef, $isBase, $format) =
562 $class->parse_volname($volname);
563
564 die "clone_image only works on base images\n" if !$isBase;
565
566 my $name = $class->zfs_find_free_diskname($storeid, $scfg, $vmid, $format);
567
568 if ($format eq 'subvol') {
569 my $size = $class->zfs_request($scfg, undef, 'list', '-H', '-o', 'refquota', "$scfg->{pool}/$basename");
570 chomp($size);
571 $class->zfs_request($scfg, undef, 'clone', "$scfg->{pool}/$basename\@$snap", "$scfg->{pool}/$name", '-o', "refquota=$size");
572 } else {
573 $class->zfs_request($scfg, undef, 'clone', "$scfg->{pool}/$basename\@$snap", "$scfg->{pool}/$name");
574 }
575
576 return "$basename/$name";
577 }
578
579 sub create_base {
580 my ($class, $storeid, $scfg, $volname) = @_;
581
582 my $snap = '__base__';
583
584 my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) =
585 $class->parse_volname($volname);
586
587 die "create_base not possible with base image\n" if $isBase;
588
589 my $newname = $name;
590 if ( $format eq 'subvol' ) {
591 $newname =~ s/^subvol-/basevol-/;
592 } else {
593 $newname =~ s/^vm-/base-/;
594 }
595 my $newvolname = $basename ? "$basename/$newname" : "$newname";
596
597 $class->zfs_request($scfg, undef, 'rename', "$scfg->{pool}/$name", "$scfg->{pool}/$newname");
598
599 my $running = undef; #fixme : is create_base always offline ?
600
601 $class->volume_snapshot($scfg, $storeid, $newname, $snap, $running);
602
603 return $newvolname;
604 }
605
606 sub volume_resize {
607 my ($class, $scfg, $storeid, $volname, $size, $running) = @_;
608
609 my $new_size = int($size/1024);
610
611 my (undef, $vname, undef, undef, undef, undef, $format) =
612 $class->parse_volname($volname);
613
614 my $attr = $format eq 'subvol' ? 'refquota' : 'volsize';
615
616 $class->zfs_request($scfg, undef, 'set', "$attr=${new_size}k", "$scfg->{pool}/$vname");
617
618 return $new_size;
619 }
620
621 sub storage_can_replicate {
622 my ($class, $scfg, $storeid, $format) = @_;
623
624 return 1 if $format eq 'raw' || $format eq 'subvol';
625
626 return 0;
627 }
628
629 sub volume_has_feature {
630 my ($class, $scfg, $feature, $storeid, $volname, $snapname, $running) = @_;
631
632 my $features = {
633 snapshot => { current => 1, snap => 1},
634 clone => { base => 1},
635 template => { current => 1},
636 copy => { base => 1, current => 1},
637 sparseinit => { base => 1, current => 1},
638 replicate => { base => 1, current => 1},
639 };
640
641 my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) =
642 $class->parse_volname($volname);
643
644 my $key = undef;
645
646 if ($snapname) {
647 $key = 'snap';
648 } else {
649 $key = $isBase ? 'base' : 'current';
650 }
651
652 return 1 if $features->{$feature}->{$key};
653
654 return undef;
655 }
656
657 sub volume_export {
658 my ($class, $scfg, $storeid, $fh, $volname, $format, $snapshot, $base_snapshot, $with_snapshots) = @_;
659
660 die "unsupported export stream format for $class: $format\n"
661 if $format ne 'zfs';
662
663 die "$class storage can only export snapshots\n"
664 if !defined($snapshot);
665
666 my $dataset = ($class->parse_volname($volname))[1];
667
668 my $fd = fileno($fh);
669 die "internal error: invalid file handle for volume_export\n"
670 if !defined($fd);
671 $fd = ">&$fd";
672
673 # For zfs we always create a replication stream (-R) which means the remote
674 # side will always delete non-existing source snapshots. This should work
675 # for all our use cases.
676 my $cmd = ['zfs', 'send', '-Rpv'];
677 if (defined($base_snapshot)) {
678 my $arg = $with_snapshots ? '-I' : '-i';
679 push @$cmd, $arg, $base_snapshot;
680 }
681 push @$cmd, '--', "$scfg->{pool}/$dataset\@$snapshot";
682
683 run_command($cmd, output => $fd);
684
685 return;
686 }
687
688 sub volume_export_formats {
689 my ($class, $scfg, $storeid, $volname, $snapshot, $base_snapshot, $with_snapshots) = @_;
690
691 my @formats = ('zfs');
692 # TODOs:
693 # push @formats, 'fies' if $volname !~ /^(?:basevol|subvol)-/;
694 # push @formats, 'raw' if !$base_snapshot && !$with_snapshots;
695 return @formats;
696 }
697
698 sub volume_import {
699 my ($class, $scfg, $storeid, $fh, $volname, $format, $base_snapshot, $with_snapshots) = @_;
700
701 die "unsupported import stream format for $class: $format\n"
702 if $format ne 'zfs';
703
704 my $fd = fileno($fh);
705 die "internal error: invalid file handle for volume_import\n"
706 if !defined($fd);
707
708 my $dataset = ($class->parse_volname($volname))[1];
709 my $zfspath = "$scfg->{pool}/$dataset";
710 my $suffix = defined($base_snapshot) ? "\@$base_snapshot" : '';
711 my $exists = 0 == run_command(['zfs', 'get', '-H', 'name', $zfspath.$suffix],
712 noerr => 1, errfunc => sub {});
713 if (defined($base_snapshot)) {
714 die "base snapshot '$zfspath\@$base_snapshot' doesn't exist\n" if !$exists;
715 } else {
716 die "volume '$zfspath' already exists\n" if $exists;
717 }
718
719 eval { run_command(['zfs', 'recv', '-F', '--', $zfspath], input => "<&$fd") };
720 if (my $err = $@) {
721 if (defined($base_snapshot)) {
722 eval { run_command(['zfs', 'rollback', '-r', '--', "$zfspath\@$base_snapshot"]) };
723 } else {
724 eval { run_command(['zfs', 'destroy', '-r', '--', $zfspath]) };
725 }
726 die $err;
727 }
728
729 return;
730 }
731
732 sub volume_import_formats {
733 my ($class, $scfg, $storeid, $volname, $base_snapshot, $with_snapshots) = @_;
734
735 return $class->volume_export_formats($scfg, $storeid, $volname, undef, $base_snapshot, $with_snapshots);
736 }
737
738 1;