]> git.proxmox.com Git - pve-storage.git/blob - PVE/Storage/BTRFSPlugin.pm
add BTRFS storage plugin
[pve-storage.git] / PVE / Storage / BTRFSPlugin.pm
1 package PVE::Storage::BTRFSPlugin;
2
3 use strict;
4 use warnings;
5
6 use base qw(PVE::Storage::Plugin);
7
8 use Fcntl qw(S_ISDIR O_WRONLY O_CREAT O_EXCL);
9 use File::Basename qw(dirname);
10 use File::Path qw(mkpath);
11 use IO::Dir;
12
13 use PVE::Tools qw(run_command);
14
15 use PVE::Storage::DirPlugin;
16
17 use constant {
18 BTRFS_FIRST_FREE_OBJECTID => 256,
19 FS_NOCOW_FL => 0x00800000,
20 FS_IOC_GETFLAGS => 0x40086602,
21 FS_IOC_SETFLAGS => 0x80086601,
22 };
23
24 # Configuration (similar to DirPlugin)
25
26 sub type {
27 return 'btrfs';
28 }
29
30 sub plugindata {
31 return {
32 content => [
33 {
34 images => 1,
35 rootdir => 1,
36 vztmpl => 1,
37 iso => 1,
38 backup => 1,
39 snippets => 1,
40 none => 1,
41 },
42 { images => 1, rootdir => 1 },
43 ],
44 format => [ { raw => 1, qcow2 => 1, vmdk => 1, subvol => 1 }, 'raw', ],
45 };
46 }
47
48 sub options {
49 return {
50 path => { fixed => 1 },
51 nodes => { optional => 1 },
52 shared => { optional => 1 },
53 disable => { optional => 1 },
54 maxfiles => { optional => 1 },
55 content => { optional => 1 },
56 format => { optional => 1 },
57 is_mountpoint => { optional => 1 },
58 # TODO: The new variant of mkdir with `populate` vs `create`...
59 };
60 }
61
62 # Storage implementation
63 #
64 # We use the same volume names are directory plugins, but map *raw* disk image file names into a
65 # subdirectory.
66 #
67 # `vm-VMID-disk-ID.raw`
68 # -> `images/VMID/vm-VMID-disk-ID/disk.raw`
69 # where the `vm-VMID-disk-ID/` subdirectory is a btrfs subvolume
70
71 # Reuse `DirPlugin`'s `check_config`. This simply checks for invalid paths.
72 sub check_config {
73 my ($self, $sectionId, $config, $create, $skipSchemaCheck) = @_;
74 return PVE::Storage::DirPlugin::check_config($self, $sectionId, $config, $create, $skipSchemaCheck);
75 }
76
77 sub activate_storage {
78 my ($class, $storeid, $scfg, $cache) = @_;
79 return PVE::Storage::DirPlugin::activate_storage($class, $storeid, $scfg, $cache);
80 }
81
82 sub status {
83 my ($class, $storeid, $scfg, $cache) = @_;
84 return PVE::Storage::DirPlugin::status($class, $storeid, $scfg, $cache);
85 }
86
87 # TODO: sub get_volume_notes {}
88
89 # TODO: sub update_volume_notes {}
90
91 # croak would not include the caller from within this module
92 sub __error {
93 my ($msg) = @_;
94 my (undef, $f, $n) = caller(1);
95 die "$msg at $f: $n\n";
96 }
97
98 # Given a name (eg. `vm-VMID-disk-ID.raw`), take the part up to the format suffix as the name of
99 # the subdirectory (subvolume).
100 sub raw_name_to_dir($) {
101 my ($raw) = @_;
102
103 # For the subvolume directory Strip the `.<format>` suffix:
104 if ($raw =~ /^(.*)\.raw$/) {
105 return $1;
106 }
107
108 __error "internal error: bad disk name: $raw";
109 }
110
111 sub raw_file_to_subvol($) {
112 my ($file) = @_;
113
114 if ($file =~ m|^(.*)/disk\.raw$|) {
115 return "$1";
116 }
117
118 __error "internal error: bad raw path: $file";
119 }
120
121 sub filesystem_path {
122 my ($class, $scfg, $volname, $snapname) = @_;
123
124 my ($vtype, $name, $vmid, undef, undef, $isBase, $format) =
125 $class->parse_volname($volname);
126
127 my $path = $class->get_subdir($scfg, $vtype);
128
129 $path .= "/$vmid" if $vtype eq 'images';
130
131 if ($format eq 'raw') {
132 my $dir = raw_name_to_dir($name);
133 if ($snapname) {
134 $dir .= "\@$snapname";
135 }
136 $path .= "/$dir/disk.raw";
137 } elsif ($format eq 'subvol') {
138 $path .= "/$name";
139 if ($snapname) {
140 $path .= "\@$snapname";
141 }
142 } else {
143 $path .= "/$name";
144 }
145
146 return wantarray ? ($path, $vmid, $vtype) : $path;
147 }
148
149 sub btrfs_cmd {
150 my ($class, $cmd, $outfunc) = @_;
151
152 my $msg = '';
153 my $func;
154 if (defined($outfunc)) {
155 $func = sub {
156 my $part = &$outfunc(@_);
157 $msg .= $part if defined($part);
158 };
159 } else {
160 $func = sub { $msg .= "$_[0]\n" };
161 }
162 run_command(['btrfs', '-q', @$cmd], errmsg => 'btrfs error', outfunc => $func);
163
164 return $msg;
165 }
166
167 sub btrfs_get_subvol_id {
168 my ($class, $path) = @_;
169 my $info = $class->btrfs_cmd(['subvolume', 'show', '--', $path]);
170 if ($info !~ /^\s*(?:Object|Subvolume) ID:\s*(\d+)$/m) {
171 die "failed to get btrfs subvolume ID from: $info\n";
172 }
173 return $1;
174 }
175
176 my sub chattr : prototype($$$) {
177 my ($fh, $mask, $xor) = @_;
178
179 my $flags = pack('L!', 0);
180 ioctl($fh, FS_IOC_GETFLAGS, $flags) or die "FS_IOC_GETFLAGS failed - $!\n";
181 $flags = pack('L!', (unpack('L!', $flags) & $mask) ^ $xor);
182 ioctl($fh, FS_IOC_SETFLAGS, $flags) or die "FS_IOC_SETFLAGS failed - $!\n";
183 return 1;
184 }
185
186 sub create_base {
187 my ($class, $storeid, $scfg, $volname) = @_;
188
189 my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) =
190 $class->parse_volname($volname);
191
192 my $newname = $name;
193 $newname =~ s/^vm-/base-/;
194
195 # If we're not working with a 'raw' file, which is the only thing that's "different" for btrfs,
196 # or a subvolume, we forward to the DirPlugin
197 if ($format ne 'raw' && $format ne 'subvol') {
198 return PVE::Storage::DirPlugin::create_base(@_);
199 }
200
201 my $path = $class->filesystem_path($scfg, $volname);
202 my $newvolname = $basename ? "$basevmid/$basename/$vmid/$newname" : "$vmid/$newname";
203 my $newpath = $class->filesystem_path($scfg, $newvolname);
204
205 my $subvol = $path;
206 my $newsubvol = $newpath;
207 if ($format eq 'raw') {
208 $subvol = raw_file_to_subvol($subvol);
209 $newsubvol = raw_file_to_subvol($newsubvol);
210 }
211
212 rename($subvol, $newsubvol)
213 || die "rename '$subvol' to '$newsubvol' failed - $!\n";
214 eval { $class->btrfs_cmd(['property', 'set', $newsubvol, 'ro', 'true']) };
215 warn $@ if $@;
216
217 return $newvolname;
218 }
219
220 sub clone_image {
221 my ($class, $scfg, $storeid, $volname, $vmid, $snap) = @_;
222
223 my ($vtype, $basename, $basevmid, undef, undef, $isBase, $format) =
224 $class->parse_volname($volname);
225
226 # If we're not working with a 'raw' file, which is the only thing that's "different" for btrfs,
227 # or a subvolume, we forward to the DirPlugin
228 if ($format ne 'raw' && $format ne 'subvol') {
229 return PVE::Storage::DirPlugin::clone_image(@_);
230 }
231
232 my $imagedir = $class->get_subdir($scfg, 'images');
233 $imagedir .= "/$vmid";
234 mkpath $imagedir;
235
236 my $path = $class->filesystem_path($scfg, $volname);
237 my $newname = $class->find_free_diskname($storeid, $scfg, $vmid, $format, 1);
238
239 # For btrfs subvolumes we don't actually need the "link":
240 #my $newvolname = "$basevmid/$basename/$vmid/$newname";
241 my $newvolname = "$vmid/$newname";
242 my $newpath = $class->filesystem_path($scfg, $newvolname);
243
244 my $subvol = $path;
245 my $newsubvol = $newpath;
246 if ($format eq 'raw') {
247 $subvol = raw_file_to_subvol($subvol);
248 $newsubvol = raw_file_to_subvol($newsubvol);
249 }
250
251 $class->btrfs_cmd(['subvolume', 'snapshot', '--', $subvol, $newsubvol]);
252
253 return $newvolname;
254 }
255
256 sub alloc_image {
257 my ($class, $storeid, $scfg, $vmid, $fmt, $name, $size) = @_;
258
259 if ($fmt ne 'raw' && $fmt ne 'subvol') {
260 return PVE::Storage::DirPlugin::alloc_image(@_);
261 }
262
263 # From Plugin.pm:
264
265 my $imagedir = $class->get_subdir($scfg, 'images') . "/$vmid";
266
267 mkpath $imagedir;
268
269 $name = $class->find_free_diskname($storeid, $scfg, $vmid, $fmt, 1) if !$name;
270
271 my (undef, $tmpfmt) = PVE::Storage::Plugin::parse_name_dir($name);
272
273 die "illegal name '$name' - wrong extension for format ('$tmpfmt != '$fmt')\n"
274 if $tmpfmt ne $fmt;
275
276 # End copy from Plugin.pm
277
278 my $subvol = "$imagedir/$name";
279 # .raw is not part of the directory name
280 $subvol =~ s/\.raw$//;
281
282 die "disk image '$subvol' already exists\n" if -e $subvol;
283
284 my $path;
285 if ($fmt eq 'raw') {
286 $path = "$subvol/disk.raw";
287 }
288
289 if ($fmt eq 'subvol' && !!$size) {
290 # NOTE: `btrfs send/recv` actually drops quota information so supporting subvolumes with
291 # quotas doesn't play nice with send/recv.
292 die "btrfs quotas are currently not supported, use an unsized subvolume or a raw file\n";
293 }
294
295 $class->btrfs_cmd(['subvolume', 'create', '--', $subvol]);
296
297 eval {
298 if ($fmt eq 'subvol') {
299 # Nothing to do for now...
300
301 # This is how we *would* do it:
302 # # Use the subvol's default 0/$id qgroup
303 # eval {
304 # # This call should happen at storage creation instead and therefore governed by a
305 # # configuration option!
306 # # $class->btrfs_cmd(['quota', 'enable', $subvol]);
307 # my $id = $class->btrfs_get_subvol_id($subvol);
308 # $class->btrfs_cmd(['qgroup', 'limit', "${size}k", "0/$id", $subvol]);
309 # };
310 } elsif ($fmt eq 'raw') {
311 sysopen my $fh, $path, O_WRONLY | O_CREAT | O_EXCL
312 or die "failed to create raw file '$path' - $!\n";
313 chattr($fh, ~FS_NOCOW_FL, FS_NOCOW_FL);
314 truncate($fh, $size * 1024)
315 or die "failed to set file size for '$path' - $!\n";
316 close($fh);
317 } else {
318 die "internal format error (format = $fmt)\n";
319 }
320 };
321
322 if (my $err = $@) {
323 eval { $class->btrfs_cmd(['subvolume', 'delete', '--', $subvol]); };
324 warn $@ if $@;
325 die $err;
326 }
327
328 return "$vmid/$name";
329 }
330
331 # Same as btrfsprogs does:
332 my sub path_is_subvolume : prototype($) {
333 my ($path) = @_;
334 my @stat = stat($path)
335 or die "stat failed on '$path' - $!\n";
336 my ($ino, $mode) = @stat[1, 2];
337 return S_ISDIR($mode) && $ino == BTRFS_FIRST_FREE_OBJECTID;
338 }
339
340 my $BTRFS_VOL_REGEX = qr/((?:vm|base|subvol)-\d+-disk-\d+(?:\.subvol)?)(?:\@(\S+))$/;
341
342 # Calls `$code->($volume, $name, $snapshot)` for each subvol in a directory matching our volume
343 # regex.
344 my sub foreach_subvol : prototype($$) {
345 my ($dir, $code) = @_;
346
347 dir_glob_foreach($dir, $BTRFS_VOL_REGEX, sub {
348 my ($volume, $name, $snapshot) = ($1, $2, $3);
349 return if !path_is_subvolume("$dir/$volume");
350 $code->($volume, $name, $snapshot);
351 })
352 }
353
354 sub free_image {
355 my ($class, $storeid, $scfg, $volname, $isBase, $_format) = @_;
356
357 my (undef, undef, $vmid, undef, undef, undef, $format) =
358 $class->parse_volname($volname);
359
360 if ($format ne 'subvol' && $format ne 'raw') {
361 return PVE::Storage::DirPlugin::free_image(@_);
362 }
363
364 my $path = $class->filesystem_path($scfg, $volname);
365
366 my $subvol = $path;
367 if ($format eq 'raw') {
368 $subvol = raw_file_to_subvol($path);
369 }
370
371 my $dir = dirname($subvol);
372 my @snapshot_vols;
373 foreach_subvol($dir, sub {
374 my ($volume, $name, $snapshot) = @_;
375 return if !defined $snapshot;
376 push @snapshot_vols, "$dir/$volume";
377 });
378
379 $class->btrfs_cmd(['subvolume', 'delete', '--', @snapshot_vols, $subvol]);
380 # try to cleanup directory to not clutter storage with empty $vmid dirs if
381 # all images from a guest got deleted
382 rmdir($dir);
383
384 return undef;
385 }
386
387 # Currently not used because quotas clash with send/recv.
388 # my sub btrfs_subvol_quota {
389 # my ($class, $path) = @_;
390 # my $id = '0/' . $class->btrfs_get_subvol_id($path);
391 # my $search = qr/^\Q$id\E\s+(\d)+\s+\d+\s+(\d+)\s*$/;
392 # my ($used, $size);
393 # $class->btrfs_cmd(['qgroup', 'show', '--raw', '-rf', '--', $path], sub {
394 # return if defined($size);
395 # if ($_[0] =~ $search) {
396 # ($used, $size) = ($1, $2);
397 # }
398 # });
399 # if (!defined($size)) {
400 # # syslog should include more information:
401 # syslog('err', "failed to get subvolume size for: $path (id $id)");
402 # # UI should only see the last path component:
403 # $path =~ s|^.*/||;
404 # die "failed to get subvolume size for $path\n";
405 # }
406 # return wantarray ? ($used, $size) : $size;
407 # }
408
409 sub volume_size_info {
410 my ($class, $scfg, $storeid, $volname, $timeout) = @_;
411
412 my $path = $class->filesystem_path($scfg, $volname);
413
414 my $format = ($class->parse_volname($volname))[6];
415
416 if ($format eq 'subvol') {
417 my $ctime = (stat($path))[10];
418 my ($used, $size) = (0, 0);
419 #my ($used, $size) = btrfs_subvol_quota($class, $path); # uses wantarray
420 return wantarray ? ($size, 'subvol', $used, undef, $ctime) : 1;
421 }
422
423 return PVE::Storage::Plugin::file_size_info($path, $timeout);
424 }
425
426 sub volume_resize {
427 my ($class, $scfg, $storeid, $volname, $size, $running) = @_;
428
429 my $format = ($class->parse_volname($volname))[6];
430 if ($format eq 'subvol') {
431 my $path = $class->filesystem_path($scfg, $volname);
432 my $id = '0/' . $class->btrfs_get_subvol_id($path);
433 $class->btrfs_cmd(['qgroup', 'limit', '--', "${size}k", "0/$id", $path]);
434 return undef;
435 }
436
437 return PVE::Storage::Plugin::volume_resize(@_);
438 }
439
440 sub volume_snapshot {
441 my ($class, $scfg, $storeid, $volname, $snap) = @_;
442
443 my ($name, $vmid, $format) = ($class->parse_volname($volname))[1,2,6];
444 if ($format ne 'subvol' && $format ne 'raw') {
445 return PVE::Storage::Plugin::volume_snapshot(@_);
446 }
447
448 my $path = $class->filesystem_path($scfg, $volname);
449 my $snap_path = $class->filesystem_path($scfg, $volname, $snap);
450
451 if ($format eq 'raw') {
452 $path = raw_file_to_subvol($path);
453 $snap_path = raw_file_to_subvol($snap_path);
454 }
455
456 my $snapshot_dir = $class->get_subdir($scfg, 'images') . "/$vmid";
457 mkpath $snapshot_dir;
458
459 $class->btrfs_cmd(['subvolume', 'snapshot', '-r', '--', $path, $snap_path]);
460 return undef;
461 }
462
463 sub volume_rollback_is_possible {
464 my ($class, $scfg, $storeid, $volname, $snap) = @_;
465
466 return 1;
467 }
468
469 sub volume_snapshot_rollback {
470 my ($class, $scfg, $storeid, $volname, $snap) = @_;
471
472 my ($name, $format) = ($class->parse_volname($volname))[1,6];
473
474 if ($format ne 'subvol' && $format ne 'raw') {
475 return PVE::Storage::Plugin::volume_snapshot_rollback(@_);
476 }
477
478 my $path = $class->filesystem_path($scfg, $volname);
479 my $snap_path = $class->filesystem_path($scfg, $volname, $snap);
480
481 if ($format eq 'raw') {
482 $path = raw_file_to_subvol($path);
483 $snap_path = raw_file_to_subvol($snap_path);
484 }
485
486 # Simple version would be:
487 # rename old to temp
488 # create new
489 # on error rename temp back
490 # But for atomicity in case the rename after create-failure *also* fails, we create the new
491 # subvol first, then use RENAME_EXCHANGE,
492 my $tmp_path = "$path.tmp.$$";
493 $class->btrfs_cmd(['subvolume', 'snapshot', '--', $snap_path, $tmp_path]);
494 # The paths are absolute, so pass -1 as file descriptors.
495 my $ok = PVE::Tools::renameat2(-1, $tmp_path, -1, $path, &PVE::Tools::RENAME_EXCHANGE);
496
497 eval { $class->btrfs_cmd(['subvolume', 'delete', '--', $tmp_path]) };
498 warn "failed to remove '$tmp_path' subvolume: $@" if $@;
499
500 if (!$ok) {
501 die "failed to rotate '$tmp_path' into place at '$path' - $!\n";
502 }
503
504 return undef;
505 }
506
507 sub volume_snapshot_delete {
508 my ($class, $scfg, $storeid, $volname, $snap, $running) = @_;
509
510 my ($name, $vmid, $format) = ($class->parse_volname($volname))[1,2,6];
511
512 if ($format ne 'subvol' && $format ne 'raw') {
513 return PVE::Storage::Plugin::volume_snapshot_delete(@_);
514 }
515
516 my $path = $class->filesystem_path($scfg, $volname, $snap);
517
518 if ($format eq 'raw') {
519 $path = raw_file_to_subvol($path);
520 }
521
522 $class->btrfs_cmd(['subvolume', 'delete', '--', $path]);
523
524 return undef;
525 }
526
527 sub volume_has_feature {
528 my ($class, $scfg, $feature, $storeid, $volname, $snapname, $running) = @_;
529
530 my $features = {
531 snapshot => {
532 current => { qcow2 => 1, raw => 1, subvol => 1 },
533 snap => { qcow2 => 1, raw => 1, subvol => 1 }
534 },
535 clone => {
536 base => { qcow2 => 1, raw => 1, subvol => 1, vmdk => 1 },
537 current => { raw => 1 },
538 snap => { raw => 1 },
539 },
540 template => { current => { qcow2 => 1, raw => 1, vmdk => 1, subvol => 1 } },
541 copy => {
542 base => { qcow2 => 1, raw => 1, subvol => 1, vmdk => 1 },
543 current => { qcow2 => 1, raw => 1, subvol => 1, vmdk => 1 },
544 snap => { qcow2 => 1, raw => 1, subvol => 1 },
545 },
546 sparseinit => { base => {qcow2 => 1, raw => 1, vmdk => 1 },
547 current => {qcow2 => 1, raw => 1, vmdk => 1 } },
548 };
549
550 my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) =
551 $class->parse_volname($volname);
552
553 my $key = undef;
554 if ($snapname) {
555 $key = 'snap';
556 } else {
557 $key = $isBase ? 'base' : 'current';
558 }
559
560 return 1 if defined($features->{$feature}->{$key}->{$format});
561
562 return undef;
563 }
564
565 sub list_images {
566 my ($class, $storeid, $scfg, $vmid, $vollist, $cache) = @_;
567 my $imagedir = $class->get_subdir($scfg, 'images');
568
569 my $res = [];
570
571 # Copied from Plugin.pm, with file_size_info calls adapted:
572 foreach my $fn (<$imagedir/[0-9][0-9]*/*>) {
573 # different to in Plugin.pm the regex below also excludes '@' as valid file name
574 next if $fn !~ m@^(/.+/(\d+)/([^/\@.]+(?:\.(qcow2|vmdk|subvol))?))$@;
575 $fn = $1; # untaint
576
577 my $owner = $2;
578 my $name = $3;
579 my $ext = $4;
580
581 next if !$vollist && defined($vmid) && ($owner ne $vmid);
582
583 my $volid = "$storeid:$owner/$name";
584 my ($size, $format, $used, $parent, $ctime);
585
586 if (!$ext) { # raw
587 $volid .= '.raw';
588 ($size, $format, $used, $parent, $ctime) = PVE::Storage::Plugin::file_size_info("$fn/disk.raw");
589 } elsif ($ext eq 'subvol') {
590 ($used, $size) = (0, 0);
591 #($used, $size) = btrfs_subvol_quota($class, $fn);
592 $format = 'subvol';
593 } else {
594 ($size, $format, $used, $parent, $ctime) = PVE::Storage::Plugin::file_size_info($fn);
595 }
596 next if !($format && defined($size));
597
598 if ($vollist) {
599 next if ! grep { $_ eq $volid } @$vollist;
600 }
601
602 my $info = {
603 volid => $volid, format => $format,
604 size => $size, vmid => $owner, used => $used, parent => $parent,
605 };
606
607 $info->{ctime} = $ctime if $ctime;
608
609 push @$res, $info;
610 }
611
612 return $res;
613 }
614
615 # For now we don't implement `btrfs send/recv` as it needs some updates to our import/export API
616 # first!
617
618 sub volume_export_formats {
619 return PVE::Storage::DirPlugin::volume_export_formats(@_);
620 }
621
622 sub volume_export {
623 return PVE::Storage::DirPlugin::volume_export(@_);
624 }
625
626 sub volume_import_formats {
627 return PVE::Storage::DirPlugin::volume_import_formats(@_);
628 }
629
630 sub volume_import {
631 return PVE::Storage::DirPlugin::volume_import(@_);
632 }
633
634 1