1 package PVE
::Storage
::RBDPlugin
;
11 use PVE
::Cluster
qw(cfs_read_file);;
12 use PVE
::JSONSchema
qw(get_standard_option);
15 use PVE
::Storage
::Plugin
;
16 use PVE
::Tools
qw(run_command trim);
18 use base
qw(PVE::Storage::Plugin);
20 my $get_parent_image_name = sub {
22 return undef if !$parent;
23 return $parent->{image
} . "@" . $parent->{snapshot
};
26 my $librados_connect = sub {
27 my ($scfg, $storeid, $options) = @_;
29 my $librados_config = PVE
::CephConfig
::ceph_connect_option
($scfg, $storeid);
31 my $rados = PVE
::RADOS-
>new(%$librados_config);
37 my ($scfg, $volume) = @_;
38 my $path = $scfg->{pool
} ?
$scfg->{pool
} : 'rbd';
39 $path .= "/$scfg->{namespace}" if defined($scfg->{namespace
});
40 $path .= "/$volume" if defined($volume);
44 my sub get_rbd_dev_path
{
45 my ($scfg, $storeid, $volume) = @_;
48 if ($scfg->{monhost
}) {
49 my $rados = $librados_connect->($scfg, $storeid);
50 $cluster_id = $rados->mon_command({ prefix
=> 'fsid', format
=> 'json' })->{fsid
};
52 $cluster_id = cfs_read_file
('ceph.conf')->{global
}->{fsid
};
55 my $uuid_pattern = "([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})";
56 if ($cluster_id =~ qr/^${uuid_pattern}$/is) {
57 $cluster_id = $1; # use untained value
59 die "cluster fsid has invalid format\n";
62 my $rbd_path = get_rbd_path
($scfg, $volume);
63 my $pve_path = "/dev/rbd-pve/${cluster_id}/${rbd_path}";
64 my $path = "/dev/rbd/${rbd_path}";
66 return $path if !-e
$pve_path && -e
$path; # mapped before rbd-pve udev rule existed
71 my ($binary, $scfg, $storeid, $op, @options) = @_;
73 my $cmd_option = PVE
::CephConfig
::ceph_connect_option
($scfg, $storeid);
74 my $pool = $scfg->{pool
} ?
$scfg->{pool
} : 'rbd';
76 my $cmd = [$binary, '-p', $pool];
78 if (defined(my $namespace = $scfg->{namespace
})) {
79 # some subcommands will fail if the --namespace parameter is present
80 my $no_namespace_parameter = {
83 push @$cmd, '--namespace', "$namespace" if !$no_namespace_parameter->{$op};
85 push @$cmd, '-c', $cmd_option->{ceph_conf
} if ($cmd_option->{ceph_conf
});
86 push @$cmd, '-m', $cmd_option->{mon_host
} if ($cmd_option->{mon_host
});
87 push @$cmd, '--auth_supported', $cmd_option->{auth_supported
} if ($cmd_option->{auth_supported
});
88 push @$cmd, '-n', "client.$cmd_option->{userid}" if ($cmd_option->{userid
});
89 push @$cmd, '--keyring', $cmd_option->{keyring
} if ($cmd_option->{keyring
});
93 push @$cmd, @options if scalar(@options);
99 my ($scfg, $storeid, $op, @options) = @_;
101 return $build_cmd->('/usr/bin/rbd', $scfg, $storeid, $op, @options);
104 my $rados_cmd = sub {
105 my ($scfg, $storeid, $op, @options) = @_;
107 return $build_cmd->('/usr/bin/rados', $scfg, $storeid, $op, @options);
110 # needed for volumes created using ceph jewel (or higher)
111 my $krbd_feature_update = sub {
112 my ($scfg, $storeid, $name) = @_;
114 my (@disable, @enable);
115 my ($kmajor, $kminor) = PVE
::ProcFSTools
::kernel_version
();
117 if ($kmajor > 5 || $kmajor == 5 && $kminor >= 3) {
118 # 'deep-flatten' can only be disabled, not enabled after image creation
119 push @enable, 'fast-diff', 'object-map';
121 push @disable, 'fast-diff', 'object-map', 'deep-flatten';
125 push @enable, 'exclusive-lock';
127 push @disable, 'exclusive-lock';
130 my $active_features_list = (rbd_volume_info
($scfg, $storeid, $name))[4];
131 my $active_features = { map { $_ => 1 } @$active_features_list };
133 my $to_disable = join(',', grep { $active_features->{$_} } @disable);
134 my $to_enable = join(',', grep { !$active_features->{$_} } @enable );
137 print "disable RBD image features this kernel RBD drivers is not compatible with: $to_disable\n";
138 my $cmd = $rbd_cmd->($scfg, $storeid, 'feature', 'disable', $name, $to_disable);
141 errmsg
=> "could not disable krbd-incompatible image features '$to_disable' for rbd image: $name",
145 print "enable RBD image features this kernel RBD drivers supports: $to_enable\n";
147 my $cmd = $rbd_cmd->($scfg, $storeid, 'feature', 'enable', $name, $to_enable);
150 errmsg
=> "could not enable krbd-compatible image features '$to_enable' for rbd image: $name",
157 sub run_rbd_command
{
158 my ($cmd, %args) = @_;
161 my $errmsg = $args{errmsg
} . ": " || "";
162 if (!exists($args{errfunc
})) {
163 # ' error: 2014-02-06 11:51:59.839135 7f09f94d0760 -1 librbd: snap_unprotect: can't unprotect;
164 # at least 1 child(ren) in pool cephstor1
165 $args{errfunc
} = sub {
167 if ($line =~ m/^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ [0-9a-f]+ [\-\d]+ librbd: (.*)$/) {
172 print STDERR
$lasterr;
177 eval { run_command
($cmd, %args); };
179 die $errmsg . $lasterr if length($lasterr);
187 my ($scfg, $storeid) = @_;
189 my $pool = $scfg->{pool
} ?
$scfg->{pool
} : 'rbd';
190 $pool .= "/$scfg->{namespace}" if defined($scfg->{namespace
});
193 my $parser = sub { $raw .= shift };
195 my $cmd = $rbd_cmd->($scfg, $storeid, 'ls', '-l', '--format', 'json');
197 run_rbd_command
($cmd, errmsg
=> "rbd error", errfunc
=> sub {}, outfunc
=> $parser);
201 die $err if $err && $err !~ m/doesn't contain rbd images/ ;
206 } elsif ($raw =~ m/^(\[.*\])$/s) { # untaint
207 $result = JSON
::decode_json
($1);
209 die "got unexpected data from rbd ls: '$raw'\n";
214 foreach my $el (@$result) {
215 next if defined($el->{snapshot
});
217 my $image = $el->{image
};
219 my ($owner) = $image =~ m/^(?:vm|base)-(\d+)-/;
220 next if !defined($owner);
222 $list->{$pool}->{$image} = {
225 parent
=> $get_parent_image_name->($el->{parent
}),
234 my ($scfg, $storeid, $name) = @_;
236 my $cmd = $rbd_cmd->($scfg, $storeid, 'snap', 'ls', $name, '--format', 'json');
239 run_rbd_command
($cmd, errmsg
=> "rbd error", errfunc
=> sub {}, outfunc
=> sub { $raw .= shift; });
242 if ($raw =~ m/^(\[.*\])$/s) { # untaint
243 $list = eval { JSON
::decode_json
($1) };
244 die "invalid JSON output from 'rbd snap ls $name': $@\n" if $@;
246 die "got unexpected data from 'rbd snap ls $name': '$raw'\n";
249 $list = [] if !defined($list);
252 foreach my $el (@$list) {
253 my $snap = $el->{name
};
254 my $protected = defined($el->{protected
}) && $el->{protected
} eq "true" ?
1 : undef;
257 id
=> $el->{id
} // undef,
258 size
=> $el->{size
} // 0,
259 protected
=> $protected,
265 sub rbd_volume_info
{
266 my ($scfg, $storeid, $volname, $snap) = @_;
270 my @options = ('info', $volname, '--format', 'json');
272 push @options, '--snap', $snap;
275 $cmd = $rbd_cmd->($scfg, $storeid, @options);
278 my $parser = sub { $raw .= shift };
280 run_rbd_command
($cmd, errmsg
=> "rbd error", errfunc
=> sub {}, outfunc
=> $parser);
285 } elsif ($raw =~ m/^(\{.*\})$/s) { # untaint
286 $volume = JSON
::decode_json
($1);
288 die "got unexpected data from rbd info: '$raw'\n";
291 $volume->{parent
} = $get_parent_image_name->($volume->{parent
});
292 $volume->{protected
} = defined($volume->{protected
}) && $volume->{protected
} eq "true" ?
1 : undef;
294 return $volume->@{qw(size parent format protected features)};
305 content
=> [ {images
=> 1, rootdir
=> 1}, { images
=> 1 }],
312 description
=> "IP addresses of monitors (for external clusters).",
313 type
=> 'string', format
=> 'pve-storage-portal-dns-list',
316 description
=> "Pool.",
320 description
=> "Data Pool (for erasure coding only)",
324 description
=> "RBD Namespace.",
328 description
=> "RBD Id.",
332 description
=> "Authsupported.",
336 description
=> "Always access rbd through krbd kernel module.",
340 description
=> "Client keyring contents (for external clusters).",
348 nodes
=> { optional
=> 1 },
349 disable
=> { optional
=> 1 },
350 monhost
=> { optional
=> 1},
351 pool
=> { optional
=> 1 },
352 'data-pool' => { optional
=> 1 },
353 namespace
=> { optional
=> 1 },
354 username
=> { optional
=> 1 },
355 content
=> { optional
=> 1 },
356 krbd
=> { optional
=> 1 },
357 keyring
=> { optional
=> 1 },
358 bwlimit
=> { optional
=> 1 },
362 # Storage implementation
365 my ($class, $storeid, $scfg, %param) = @_;
367 my $secret = $param{keyring
} if defined $param{keyring
} // undef;
368 PVE
::CephConfig
::ceph_create_keyfile
($scfg->{type
}, $storeid, $secret);
374 my ($class, $storeid, $scfg, %param) = @_;
376 if (exists($param{keyring
})) {
377 if (defined($param{keyring
})) {
378 PVE
::CephConfig
::ceph_create_keyfile
($scfg->{type
}, $storeid, $param{keyring
});
380 PVE
::CephConfig
::ceph_remove_keyfile
($scfg->{type
}, $storeid);
388 my ($class, $storeid, $scfg) = @_;
389 PVE
::CephConfig
::ceph_remove_keyfile
($scfg->{type
}, $storeid);
394 my ($class, $volname) = @_;
396 if ($volname =~ m/^((base-(\d+)-\S+)\/)?
((base
)?
(vm
)?
-(\d
+)-\S
+)$/) {
397 return ('images', $4, $7, $2, $3, $5, 'raw');
400 die "unable to parse rbd volume name '$volname'\n";
404 my ($class, $scfg, $volname, $storeid, $snapname) = @_;
406 my $cmd_option = PVE
::CephConfig
::ceph_connect_option
($scfg, $storeid);
407 my ($vtype, $name, $vmid) = $class->parse_volname($volname);
408 $name .= '@'.$snapname if $snapname;
410 my $rbd_dev_path = get_rbd_dev_path
($scfg, $storeid, $name);
411 return ($rbd_dev_path, $vmid, $vtype) if $scfg->{krbd
};
413 my $rbd_path = get_rbd_path
($scfg, $name);
414 my $path = "rbd:${rbd_path}";
416 $path .= ":conf=$cmd_option->{ceph_conf}" if $cmd_option->{ceph_conf
};
417 if (defined($scfg->{monhost
})) {
418 my $monhost = PVE
::CephConfig
::hostlist
($scfg->{monhost
}, ';');
419 $monhost =~ s/:/\\:/g;
420 $path .= ":mon_host=$monhost";
421 $path .= ":auth_supported=$cmd_option->{auth_supported}";
424 $path .= ":id=$cmd_option->{userid}:keyring=$cmd_option->{keyring}" if ($cmd_option->{keyring
});
426 return ($path, $vmid, $vtype);
429 sub find_free_diskname
{
430 my ($class, $storeid, $scfg, $vmid, $fmt, $add_fmt_suffix) = @_;
432 my $cmd = $rbd_cmd->($scfg, $storeid, 'ls');
438 if ($line =~ m/^(.*)$/) { # untaint
439 push @$disk_list, $1;
444 run_rbd_command
($cmd, errmsg
=> "rbd error", errfunc
=> sub {}, outfunc
=> $parser);
448 die $err if $err && $err !~ m/doesn't contain rbd images/;
450 return PVE
::Storage
::Plugin
::get_next_vm_diskname
($disk_list, $storeid, $vmid, undef, $scfg);
454 my ($class, $storeid, $scfg, $volname) = @_;
456 my $snap = '__base__';
458 my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) =
459 $class->parse_volname($volname);
461 die "create_base not possible with base image\n" if $isBase;
463 my ($size, $parent, $format, undef) = rbd_volume_info
($scfg, $storeid, $name);
464 die "rbd volume info on '$name' failed\n" if !($size);
466 die "rbd image must be at format V2" if $format ne "2";
468 die "volname '$volname' contains wrong information about parent $parent $basename\n"
469 if $basename && (!$parent || $parent ne $basename."@".$snap);
472 $newname =~ s/^vm-/base-/;
474 my $newvolname = $basename ?
"$basename/$newname" : "$newname";
476 my $cmd = $rbd_cmd->(
480 get_rbd_path
($scfg, $name),
481 get_rbd_path
($scfg, $newname),
483 run_rbd_command
($cmd, errmsg
=> "rbd rename '$name' error");
485 my $running = undef; #fixme : is create_base always offline ?
487 $class->volume_snapshot($scfg, $storeid, $newname, $snap, $running);
489 my (undef, undef, undef, $protected) = rbd_volume_info
($scfg, $storeid, $newname, $snap);
492 my $cmd = $rbd_cmd->($scfg, $storeid, 'snap', 'protect', $newname, '--snap', $snap);
493 run_rbd_command
($cmd, errmsg
=> "rbd protect $newname snap '$snap' error");
501 my ($class, $scfg, $storeid, $volname, $vmid, $snapname) = @_;
503 my $snap = '__base__';
504 $snap = $snapname if length $snapname;
506 my ($vtype, $basename, $basevmid, undef, undef, $isBase) =
507 $class->parse_volname($volname);
509 die "$volname is not a base image and snapname is not provided\n"
510 if !$isBase && !length($snapname);
512 my $name = $class->find_free_diskname($storeid, $scfg, $vmid);
514 warn "clone $volname: $basename snapname $snap to $name\n";
516 if (length($snapname)) {
517 my (undef, undef, undef, $protected) = rbd_volume_info
($scfg, $storeid, $volname, $snapname);
520 my $cmd = $rbd_cmd->($scfg, $storeid, 'snap', 'protect', $volname, '--snap', $snapname);
521 run_rbd_command
($cmd, errmsg
=> "rbd protect $volname snap $snapname error");
525 my $newvol = "$basename/$name";
526 $newvol = $name if length($snapname);
529 get_rbd_path
($scfg, $basename),
532 push @options, ('--data-pool', $scfg->{'data-pool'}) if $scfg->{'data-pool'};
534 my $cmd = $rbd_cmd->($scfg, $storeid, 'clone', @options, get_rbd_path
($scfg, $name));
535 run_rbd_command
($cmd, errmsg
=> "rbd clone '$basename' error");
541 my ($class, $storeid, $scfg, $vmid, $fmt, $name, $size) = @_;
544 die "illegal name '$name' - should be 'vm-$vmid-*'\n"
545 if $name && $name !~ m/^vm-$vmid-/;
547 $name = $class->find_free_diskname($storeid, $scfg, $vmid) if !$name;
550 '--image-format' , 2,
551 '--size', int(($size + 1023) / 1024),
553 push @options, ('--data-pool', $scfg->{'data-pool'}) if $scfg->{'data-pool'};
555 my $cmd = $rbd_cmd->($scfg, $storeid, 'create', @options, $name);
556 run_rbd_command
($cmd, errmsg
=> "rbd create '$name' error");
562 my ($class, $storeid, $scfg, $volname, $isBase) = @_;
564 my ($vtype, $name, $vmid, undef, undef, undef) =
565 $class->parse_volname($volname);
568 my $snaps = rbd_ls_snap
($scfg, $storeid, $name);
569 foreach my $snap (keys %$snaps) {
570 if ($snaps->{$snap}->{protected
}) {
571 my $cmd = $rbd_cmd->($scfg, $storeid, 'snap', 'unprotect', $name, '--snap', $snap);
572 run_rbd_command
($cmd, errmsg
=> "rbd unprotect $name snap '$snap' error");
576 $class->deactivate_volume($storeid, $scfg, $volname);
578 my $cmd = $rbd_cmd->($scfg, $storeid, 'snap', 'purge', $name);
579 run_rbd_command
($cmd, errmsg
=> "rbd snap purge '$name' error");
581 $cmd = $rbd_cmd->($scfg, $storeid, 'rm', $name);
582 run_rbd_command
($cmd, errmsg
=> "rbd rm '$name' error");
588 my ($class, $storeid, $scfg, $vmid, $vollist, $cache) = @_;
590 $cache->{rbd
} = rbd_ls
($scfg, $storeid) if !$cache->{rbd
};
592 my $dat = $cache->{rbd
}->{get_rbd_path
($scfg)};
593 return [] if !$dat; # nothing found
596 for my $image (sort keys %$dat) {
597 my $info = $dat->{$image};
598 my ($volname, $parent, $owner) = $info->@{'name', 'parent', 'vmid'};
600 if ($parent && $parent =~ m/^(base-\d+-\S+)\@__base__$/) {
601 $info->{volid
} = "$storeid:$1/$volname";
603 $info->{volid
} = "$storeid:$volname";
607 my $found = grep { $_ eq $info->{volid
} } @$vollist;
610 next if defined ($vmid) && ($owner ne $vmid);
613 $info->{format
} = 'raw';
622 my ($class, $storeid, $scfg, $cache) = @_;
624 my $rados = $librados_connect->($scfg, $storeid);
625 my $df = $rados->mon_command({ prefix
=> 'df', format
=> 'json' });
627 my ($d) = grep { $_->{name
} eq $scfg->{pool
} } @{$df->{pools
}};
629 # max_avail -> max available space for data w/o replication in the pool
630 # bytes_used -> data w/o replication in the pool
631 my $free = $d->{stats
}->{max_avail
};
632 my $used = $d->{stats
}->{stored
} // $d->{stats
}->{bytes_used
};
633 my $total = $used + $free;
636 return ($total, $free, $used, $active);
639 sub activate_storage
{
640 my ($class, $storeid, $scfg, $cache) = @_;
644 sub deactivate_storage
{
645 my ($class, $storeid, $scfg, $cache) = @_;
649 my sub get_kernel_device_path
{
650 my ($scfg, $storeid, $name) = @_;
651 return get_rbd_dev_path
($scfg, $storeid, $name);
655 my ($class, $storeid, $scfg, $volname, $snapname) = @_;
657 my ($vtype, $img_name, $vmid) = $class->parse_volname($volname);
659 my $name = $img_name;
660 $name .= '@'.$snapname if $snapname;
662 my $kerneldev = get_kernel_device_path
($scfg, $storeid, $name);
664 return $kerneldev if -b
$kerneldev; # already mapped
666 # features can only be enabled/disabled for image, not for snapshot!
667 $krbd_feature_update->($scfg, $storeid, $img_name);
669 my $cmd = $rbd_cmd->($scfg, $storeid, 'map', $name);
670 run_rbd_command
($cmd, errmsg
=> "can't map rbd volume $name");
676 my ($class, $storeid, $scfg, $volname, $snapname) = @_;
678 my ($vtype, $name, $vmid) = $class->parse_volname($volname);
679 $name .= '@'.$snapname if $snapname;
681 my $kerneldev = get_kernel_device_path
($scfg, $storeid, $name);
684 my $cmd = $rbd_cmd->($scfg, $storeid, 'unmap', $kerneldev);
685 run_rbd_command
($cmd, errmsg
=> "can't unmap rbd device $kerneldev");
691 sub activate_volume
{
692 my ($class, $storeid, $scfg, $volname, $snapname, $cache) = @_;
694 $class->map_volume($storeid, $scfg, $volname, $snapname) if $scfg->{krbd
};
699 sub deactivate_volume
{
700 my ($class, $storeid, $scfg, $volname, $snapname, $cache) = @_;
702 $class->unmap_volume($storeid, $scfg, $volname, $snapname);
707 sub volume_size_info
{
708 my ($class, $scfg, $storeid, $volname, $timeout) = @_;
710 my ($vtype, $name, $vmid) = $class->parse_volname($volname);
711 my ($size, undef) = rbd_volume_info
($scfg, $storeid, $name);
716 my ($class, $scfg, $storeid, $volname, $size, $running) = @_;
718 return 1 if $running && !$scfg->{krbd
}; # FIXME???
720 my ($vtype, $name, $vmid) = $class->parse_volname($volname);
722 my $cmd = $rbd_cmd->($scfg, $storeid, 'resize', '--allow-shrink', '--size', ($size/1024/1024), $name);
723 run_rbd_command
($cmd, errmsg
=> "rbd resize '$volname' error");
727 sub volume_snapshot
{
728 my ($class, $scfg, $storeid, $volname, $snap) = @_;
730 my ($vtype, $name, $vmid) = $class->parse_volname($volname);
732 my $cmd = $rbd_cmd->($scfg, $storeid, 'snap', 'create', '--snap', $snap, $name);
733 run_rbd_command
($cmd, errmsg
=> "rbd snapshot '$volname' error");
737 sub volume_snapshot_rollback
{
738 my ($class, $scfg, $storeid, $volname, $snap) = @_;
740 my ($vtype, $name, $vmid) = $class->parse_volname($volname);
742 my $cmd = $rbd_cmd->($scfg, $storeid, 'snap', 'rollback', '--snap', $snap, $name);
743 run_rbd_command
($cmd, errmsg
=> "rbd snapshot $volname to '$snap' error");
746 sub volume_snapshot_delete
{
747 my ($class, $scfg, $storeid, $volname, $snap, $running) = @_;
749 return 1 if $running && !$scfg->{krbd
}; # FIXME: ????
751 $class->deactivate_volume($storeid, $scfg, $volname, $snap, {});
753 my ($vtype, $name, $vmid) = $class->parse_volname($volname);
755 my (undef, undef, undef, $protected) = rbd_volume_info
($scfg, $storeid, $name, $snap);
757 my $cmd = $rbd_cmd->($scfg, $storeid, 'snap', 'unprotect', $name, '--snap', $snap);
758 run_rbd_command
($cmd, errmsg
=> "rbd unprotect $name snap '$snap' error");
761 my $cmd = $rbd_cmd->($scfg, $storeid, 'snap', 'rm', '--snap', $snap, $name);
763 run_rbd_command
($cmd, errmsg
=> "rbd snapshot '$volname' error");
768 sub volume_snapshot_needs_fsfreeze
{
772 sub volume_has_feature
{
773 my ($class, $scfg, $feature, $storeid, $volname, $snapname, $running) = @_;
776 snapshot
=> { current
=> 1, snap
=> 1},
777 clone
=> { base
=> 1, snap
=> 1},
778 template
=> { current
=> 1},
779 copy
=> { base
=> 1, current
=> 1, snap
=> 1},
780 sparseinit
=> { base
=> 1, current
=> 1},
781 rename => {current
=> 1},
784 my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) = $class->parse_volname($volname);
790 $key = $isBase ?
'base' : 'current';
792 return 1 if $features->{$feature}->{$key};
798 my ($class, $scfg, $storeid, $source_volname, $target_vmid, $target_volname) = @_;
808 ) = $class->parse_volname($source_volname);
809 $target_volname = $class->find_free_diskname($storeid, $scfg, $target_vmid, $format)
813 my $cmd = $rbd_cmd->($scfg, $storeid, 'info', $target_volname);
814 run_rbd_command
($cmd, errmsg
=> "exist check", quiet
=> 1);
816 die "target volume '${target_volname}' already exists\n" if !$@;
818 my $cmd = $rbd_cmd->($scfg, $storeid, 'rename', $source_image, $target_volname);
822 errmsg
=> "could not rename image '${source_image}' to '${target_volname}'",
825 $base_name = $base_name ?
"${base_name}/" : '';
827 return "${storeid}:${base_name}${target_volname}";