]> git.proxmox.com Git - pve-manager.git/blob - PVE/API2/Ceph/OSD.pm
api: ceph: improve reporting of ceph OSD memory usage
[pve-manager.git] / PVE / API2 / Ceph / OSD.pm
1 package PVE::API2::Ceph::OSD;
2
3 use strict;
4 use warnings;
5
6 use Cwd qw(abs_path);
7 use IO::File;
8 use JSON;
9 use UUID;
10
11 use PVE::Ceph::Tools;
12 use PVE::Ceph::Services;
13 use PVE::CephConfig;
14 use PVE::Cluster qw(cfs_read_file cfs_write_file);
15 use PVE::Diskmanage;
16 use PVE::Storage::LVMPlugin;
17 use PVE::Exception qw(raise_param_exc);
18 use PVE::JSONSchema qw(get_standard_option);
19 use PVE::INotify;
20 use PVE::RADOS;
21 use PVE::RESTHandler;
22 use PVE::RPCEnvironment;
23 use PVE::Tools qw(run_command file_set_contents);
24 use PVE::ProcFSTools;
25 use PVE::Network;
26
27 use base qw(PVE::RESTHandler);
28
29 my $nodename = PVE::INotify::nodename();
30
31 my $get_osd_status = sub {
32 my ($rados, $osdid) = @_;
33
34 my $stat = $rados->mon_command({ prefix => 'osd dump' });
35
36 my $osdlist = $stat->{osds} || [];
37
38 my $flags = $stat->{flags} || undef;
39
40 my $osdstat;
41 foreach my $d (@$osdlist) {
42 $osdstat->{$d->{osd}} = $d if defined($d->{osd});
43 }
44 if (defined($osdid)) {
45 die "no such OSD '$osdid'\n" if !$osdstat->{$osdid};
46 return $osdstat->{$osdid};
47 }
48
49 return wantarray ? ($osdstat, $flags) : $osdstat;
50 };
51
52 my $get_osd_usage = sub {
53 my ($rados) = @_;
54
55 my $osdlist = $rados->mon_command({ prefix => 'pg dump', dumpcontents => [ 'osds' ]});
56 if (!($osdlist && ref($osdlist))) {
57 warn "got unknown result format for 'pg dump osds' command\n";
58 return [];
59 }
60
61 if (ref($osdlist) eq "HASH") { # since nautilus
62 $osdlist = $osdlist->{osd_stats};
63 }
64
65 my $osdstat = {};
66 for my $d (@$osdlist) {
67 $osdstat->{$d->{osd}} = $d if defined($d->{osd});
68 }
69
70 return $osdstat;
71 };
72
73 __PACKAGE__->register_method ({
74 name => 'index',
75 path => '',
76 method => 'GET',
77 description => "Get Ceph osd list/tree.",
78 proxyto => 'node',
79 protected => 1,
80 permissions => {
81 check => ['perm', '/', [ 'Sys.Audit', 'Datastore.Audit' ], any => 1],
82 },
83 parameters => {
84 additionalProperties => 0,
85 properties => {
86 node => get_standard_option('pve-node'),
87 },
88 },
89 # fixme: return a list instead of extjs tree format ?
90 returns => {
91 type => "object",
92 items => {
93 type => "object",
94 properties => {
95 flags => { type => "string" },
96 root => {
97 type => "object",
98 description => "Tree with OSDs in the CRUSH map structure.",
99 },
100 },
101 },
102 },
103 code => sub {
104 my ($param) = @_;
105
106 PVE::Ceph::Tools::check_ceph_inited();
107
108 my $rados = PVE::RADOS->new();
109 my $res = $rados->mon_command({ prefix => 'osd df', output_method => 'tree', });
110
111 die "no tree nodes found\n" if !($res && $res->{nodes});
112
113 my ($osdhash, $flags) = $get_osd_status->($rados);
114
115 my $osd_usage = $get_osd_usage->($rados);
116
117 my $osdmetadata_res = $rados->mon_command({ prefix => 'osd metadata' });
118 my $osdmetadata = { map { $_->{id} => $_ } @$osdmetadata_res };
119
120 my $hostversions = PVE::Ceph::Services::get_ceph_versions();
121
122 my $nodes = {};
123 my $newnodes = {};
124 foreach my $e (@{$res->{nodes}}) {
125 my ($id, $name) = $e->@{qw(id name)};
126
127 $nodes->{$id} = $e;
128
129 my $new = {
130 id => $id,
131 name => $name,
132 type => $e->{type}
133 };
134
135 foreach my $opt (qw(status crush_weight reweight device_class pgs)) {
136 $new->{$opt} = $e->{$opt} if defined($e->{$opt});
137 }
138
139 if (my $stat = $osdhash->{$id}) {
140 $new->{in} = $stat->{in} if defined($stat->{in});
141 }
142
143 if (my $stat = $osd_usage->{$id}) {
144 $new->{total_space} = ($stat->{kb} || 1) * 1024;
145 $new->{bytes_used} = ($stat->{kb_used} || 0) * 1024;
146 $new->{percent_used} = ($new->{bytes_used}*100)/$new->{total_space};
147 if (my $d = $stat->{perf_stat}) {
148 $new->{commit_latency_ms} = $d->{commit_latency_ms};
149 $new->{apply_latency_ms} = $d->{apply_latency_ms};
150 }
151 }
152
153 my $osdmd = $osdmetadata->{$id};
154 if ($e->{type} eq 'osd' && $osdmd) {
155 if ($osdmd->{bluefs}) {
156 $new->{osdtype} = 'bluestore';
157 $new->{blfsdev} = $osdmd->{bluestore_bdev_dev_node};
158 $new->{dbdev} = $osdmd->{bluefs_db_dev_node};
159 $new->{waldev} = $osdmd->{bluefs_wal_dev_node};
160 } else {
161 $new->{osdtype} = 'filestore';
162 }
163 for my $field (qw(ceph_version ceph_version_short)) {
164 $new->{$field} = $osdmd->{$field} if $osdmd->{$field};
165 }
166 }
167
168 $newnodes->{$id} = $new;
169 }
170
171 foreach my $e (@{$res->{nodes}}) {
172 my ($id, $name) = $e->@{qw(id name)};
173 my $new = $newnodes->{$id};
174
175 if ($e->{children} && scalar(@{$e->{children}})) {
176 $new->{children} = [];
177 $new->{leaf} = 0;
178 foreach my $cid (@{$e->{children}}) {
179 $nodes->{$cid}->{parent} = $id;
180 if ($nodes->{$cid}->{type} eq 'osd' && $e->{type} eq 'host') {
181 $newnodes->{$cid}->{host} = $name;
182 }
183 push @{$new->{children}}, $newnodes->{$cid};
184 }
185 } else {
186 $new->{leaf} = ($id >= 0) ? 1 : 0;
187 }
188
189 if ($name && $e->{type} eq 'host') {
190 $new->{version} = $hostversions->{$name}->{version}->{str};
191 }
192 }
193
194 my $realroots = [];
195 foreach my $e (@{$res->{nodes}}) {
196 my $id = $e->{id};
197 if (!$nodes->{$id}->{parent}) {
198 push @$realroots, $newnodes->{$id};
199 }
200 }
201
202 die "no root node\n" if scalar(@$realroots) < 1;
203
204 my $data = {
205 root => {
206 leaf => 0,
207 children => $realroots
208 },
209 };
210
211 $data->{flags} = $flags if $flags; # we want this for the noout flag
212
213 return $data;
214 }});
215
216 __PACKAGE__->register_method ({
217 name => 'createosd',
218 path => '',
219 method => 'POST',
220 description => "Create OSD",
221 proxyto => 'node',
222 protected => 1,
223 parameters => {
224 additionalProperties => 0,
225 properties => {
226 node => get_standard_option('pve-node'),
227 dev => {
228 description => "Block device name.",
229 type => 'string',
230 },
231 db_dev => {
232 description => "Block device name for block.db.",
233 optional => 1,
234 type => 'string',
235 },
236 db_dev_size => {
237 description => "Size in GiB for block.db.",
238 verbose_description => "If a block.db is requested but the size is not given, ".
239 "will be automatically selected by: bluestore_block_db_size from the ".
240 "ceph database (osd or global section) or config (osd or global section)".
241 "in that order. If this is not available, it will be sized 10% of the size ".
242 "of the OSD device. Fails if the available size is not enough.",
243 optional => 1,
244 type => 'number',
245 default => 'bluestore_block_db_size or 10% of OSD size',
246 requires => 'db_dev',
247 minimum => 1.0,
248 },
249 wal_dev => {
250 description => "Block device name for block.wal.",
251 optional => 1,
252 type => 'string',
253 },
254 wal_dev_size => {
255 description => "Size in GiB for block.wal.",
256 verbose_description => "If a block.wal is requested but the size is not given, ".
257 "will be automatically selected by: bluestore_block_wal_size from the ".
258 "ceph database (osd or global section) or config (osd or global section)".
259 "in that order. If this is not available, it will be sized 1% of the size ".
260 "of the OSD device. Fails if the available size is not enough.",
261 optional => 1,
262 minimum => 0.5,
263 default => 'bluestore_block_wal_size or 1% of OSD size',
264 requires => 'wal_dev',
265 type => 'number',
266 },
267 encrypted => {
268 type => 'boolean',
269 optional => 1,
270 default => 0,
271 description => "Enables encryption of the OSD."
272 },
273 'crush-device-class' => {
274 optional => 1,
275 type => 'string',
276 description => "Set the device class of the OSD in crush."
277 },
278 },
279 },
280 returns => { type => 'string' },
281 code => sub {
282 my ($param) = @_;
283
284 my $rpcenv = PVE::RPCEnvironment::get();
285
286 my $authuser = $rpcenv->get_user();
287
288 # test basic requirements
289 PVE::Ceph::Tools::check_ceph_inited();
290 PVE::Ceph::Tools::setup_pve_symlinks();
291 PVE::Ceph::Tools::check_ceph_installed('ceph_osd');
292 PVE::Ceph::Tools::check_ceph_installed('ceph_volume');
293
294 # extract parameter info and fail if a device is set more than once
295 my $devs = {};
296
297 my $ceph_conf = cfs_read_file('ceph.conf');
298
299 my $osd_network = $ceph_conf->{global}->{cluster_network};
300 $osd_network //= $ceph_conf->{global}->{public_network}; # fallback
301
302 if ($osd_network) { # check only if something is configured
303 my $cluster_net_ips = PVE::Network::get_local_ip_from_cidr($osd_network);
304 if (scalar(@$cluster_net_ips) < 1) {
305 my $osd_net_obj = PVE::Network::IP_from_cidr($osd_network);
306 my $osd_base_cidr = $osd_net_obj->{ip} . "/" . $osd_net_obj->{prefixlen};
307
308 die "No address from ceph cluster network (${osd_base_cidr}) found on node '$nodename'. ".
309 "Check your network config.\n";
310 }
311 }
312
313 for my $type ( qw(dev db_dev wal_dev) ) {
314 next if !$param->{$type};
315
316 my $type_dev = PVE::Diskmanage::verify_blockdev_path($param->{$type});
317 (my $type_devname = $type_dev) =~ s|/dev/||;
318
319 raise_param_exc({ $type => "cannot chose '$type_dev' for more than one type." })
320 if grep { $_->{name} eq $type_devname } values %$devs;
321
322 $devs->{$type} = {
323 dev => $type_dev,
324 name => $type_devname,
325 };
326
327 if (my $size = $param->{"${type}_size"}) {
328 $devs->{$type}->{size} = PVE::Tools::convert_size($size, 'gb' => 'b') ;
329 }
330 }
331
332 my $test_disk_requirements = sub {
333 my ($disklist) = @_;
334
335 my $dev = $devs->{dev}->{dev};
336 my $devname = $devs->{dev}->{name};
337 die "unable to get device info for '$dev'\n" if !$disklist->{$devname};
338 die "device '$dev' is already in use\n" if $disklist->{$devname}->{used};
339
340 for my $type ( qw(db_dev wal_dev) ) {
341 my $d = $devs->{$type};
342 next if !$d;
343 my $name = $d->{name};
344 my $info = $disklist->{$name};
345 die "unable to get device info for '$d->{dev}' for type $type\n" if !$disklist->{$name};
346 if (my $usage = $info->{used}) {
347 if ($usage eq 'partitions') {
348 die "device '$d->{dev}' is not GPT partitioned\n" if !$info->{gpt};
349 } elsif ($usage ne 'LVM') {
350 die "device '$d->{dev}' is already in use and has no LVM on it\n";
351 }
352 }
353 }
354 };
355
356
357 # test disk requirements early
358 my $devlist = [ map { $_->{name} } values %$devs ];
359 my $disklist = PVE::Diskmanage::get_disks($devlist, 1, 1);
360 $test_disk_requirements->($disklist);
361
362 # get necessary ceph infos
363 my $rados = PVE::RADOS->new();
364 my $monstat = $rados->mon_command({ prefix => 'quorum_status' });
365
366 die "unable to get fsid\n" if !$monstat->{monmap} || !$monstat->{monmap}->{fsid};
367 my $fsid = $monstat->{monmap}->{fsid};
368 $fsid = $1 if $fsid =~ m/^([0-9a-f\-]+)$/;
369
370 my $ceph_bootstrap_osd_keyring = PVE::Ceph::Tools::get_config('ceph_bootstrap_osd_keyring');
371
372 if (! -f $ceph_bootstrap_osd_keyring && $ceph_conf->{global}->{auth_client_required} eq 'cephx') {
373 my $bindata = $rados->mon_command({
374 prefix => 'auth get-or-create',
375 entity => 'client.bootstrap-osd',
376 caps => [
377 'mon' => 'allow profile bootstrap-osd'
378 ],
379 format => 'plain',
380 });
381 file_set_contents($ceph_bootstrap_osd_keyring, $bindata);
382 };
383
384 # See FIXME below
385 my @udev_trigger_devs = ();
386
387 my $create_part_or_lv = sub {
388 my ($dev, $size, $type) = @_;
389
390 $size =~ m/^(\d+)$/ or die "invalid size '$size'\n";
391 $size = $1;
392
393 die "'$dev->{devpath}' is smaller than requested size '$size' bytes\n"
394 if $dev->{size} < $size;
395
396 # sgdisk and lvcreate can only sizes divisible by 512b
397 # so we round down to the nearest kb
398 $size = PVE::Tools::convert_size($size, 'b' => 'kb', 1);
399
400 if (!$dev->{used}) {
401 # create pv,vg,lv
402
403 my $vg = "ceph-" . UUID::uuid();
404 my $lv = $type . "-" . UUID::uuid();
405
406 PVE::Storage::LVMPlugin::lvm_create_volume_group($dev->{devpath}, $vg);
407 PVE::Storage::LVMPlugin::lvcreate($vg, $lv, "${size}k");
408
409 if (PVE::Diskmanage::is_partition($dev->{devpath})) {
410 eval { PVE::Diskmanage::change_parttype($dev->{devpath}, '8E00'); };
411 warn $@ if $@;
412 }
413
414 push @udev_trigger_devs, $dev->{devpath};
415
416 return "$vg/$lv";
417
418 } elsif ($dev->{used} eq 'LVM') {
419 # check pv/vg and create lv
420
421 my $vgs = PVE::Storage::LVMPlugin::lvm_vgs(1);
422 my $vg;
423 for my $vgname ( sort keys %$vgs ) {
424 next if $vgname !~ /^ceph-/;
425
426 for my $pv ( @{$vgs->{$vgname}->{pvs}} ) {
427 next if $pv->{name} ne $dev->{devpath};
428 $vg = $vgname;
429 last;
430 }
431 last if $vg;
432 }
433
434 die "no ceph vg found on '$dev->{devpath}'\n" if !$vg;
435 die "vg '$vg' has not enough free space\n" if $vgs->{$vg}->{free} < $size;
436
437 my $lv = $type . "-" . UUID::uuid();
438
439 PVE::Storage::LVMPlugin::lvcreate($vg, $lv, "${size}k");
440
441 return "$vg/$lv";
442
443 } elsif ($dev->{used} eq 'partitions' && $dev->{gpt}) {
444 # create new partition at the end
445 my $parttypes = {
446 'osd-db' => '30CD0809-C2B2-499C-8879-2D6B78529876',
447 'osd-wal' => '5CE17FCE-4087-4169-B7FF-056CC58473F9',
448 };
449
450 my $part = PVE::Diskmanage::append_partition($dev->{devpath}, $size * 1024);
451
452 if (my $parttype = $parttypes->{$type}) {
453 eval { PVE::Diskmanage::change_parttype($part, $parttype); };
454 warn $@ if $@;
455 }
456
457 push @udev_trigger_devs, $part;
458 return $part;
459 }
460
461 die "cannot use '$dev->{devpath}' for '$type'\n";
462 };
463
464 my $worker = sub {
465 my $upid = shift;
466
467 PVE::Diskmanage::locked_disk_action(sub {
468 # update disklist and re-test requirements
469 $disklist = PVE::Diskmanage::get_disks($devlist, 1, 1);
470 $test_disk_requirements->($disklist);
471
472 my $dev_class = $param->{'crush-device-class'};
473 my $cmd = ['ceph-volume', 'lvm', 'create', '--cluster-fsid', $fsid ];
474 push @$cmd, '--crush-device-class', $dev_class if $dev_class;
475
476 my $devname = $devs->{dev}->{name};
477 my $devpath = $disklist->{$devname}->{devpath};
478 print "create OSD on $devpath (bluestore)\n";
479
480 push @udev_trigger_devs, $devpath;
481
482 my $osd_size = $disklist->{$devname}->{size};
483 my $size_map = {
484 db => int($osd_size / 10), # 10% of OSD
485 wal => int($osd_size / 100), # 1% of OSD
486 };
487
488 my $sizes;
489 foreach my $type ( qw(db wal) ) {
490 my $fallback_size = $size_map->{$type};
491 my $d = $devs->{"${type}_dev"};
492 next if !$d;
493
494 # size was not set via api, getting from config/fallback
495 if (!defined($d->{size})) {
496 $sizes = PVE::Ceph::Tools::get_db_wal_sizes() if !$sizes;
497 $d->{size} = $sizes->{$type} // $fallback_size;
498 }
499 print "creating block.$type on '$d->{dev}'\n";
500 my $name = $d->{name};
501 my $part_or_lv = $create_part_or_lv->($disklist->{$name}, $d->{size}, "osd-$type");
502
503 print "using '$part_or_lv' for block.$type\n";
504 push @$cmd, "--block.$type", $part_or_lv;
505 }
506
507 push @$cmd, '--data', $devpath;
508 push @$cmd, '--dmcrypt' if $param->{encrypted};
509
510 PVE::Diskmanage::wipe_blockdev($devpath);
511
512 if (PVE::Diskmanage::is_partition($devpath)) {
513 eval { PVE::Diskmanage::change_parttype($devpath, '8E00'); };
514 warn $@ if $@;
515 }
516
517 run_command($cmd);
518
519 # FIXME: Remove once we depend on systemd >= v249.
520 # Work around udev bug https://github.com/systemd/systemd/issues/18525 to ensure the
521 # udev database is updated.
522 eval { run_command(['udevadm', 'trigger', @udev_trigger_devs]); };
523 warn $@ if $@;
524 });
525 };
526
527 return $rpcenv->fork_worker('cephcreateosd', $devs->{dev}->{name}, $authuser, $worker);
528 }});
529
530 my $OSD_DEV_RETURN_PROPS = {
531 device => {
532 type => 'string',
533 enum => ['block', 'db', 'wal'],
534 description => 'Kind of OSD device',
535 },
536 dev_node => {
537 type => 'string',
538 description => 'Device node',
539 },
540 devices => {
541 type => 'string',
542 description => 'Physical disks used',
543 },
544 size => {
545 type => 'integer',
546 description => 'Size in bytes',
547 },
548 support_discard => {
549 type => 'boolean',
550 description => 'Discard support of the physical device',
551 },
552 type => {
553 type => 'string',
554 description => 'Type of device. For example, hdd or ssd',
555 },
556 };
557
558 __PACKAGE__->register_method ({
559 name => 'osdindex',
560 path => '{osdid}',
561 method => 'GET',
562 permissions => { user => 'all' },
563 description => "OSD index.",
564 parameters => {
565 additionalProperties => 0,
566 properties => {
567 node => get_standard_option('pve-node'),
568 osdid => {
569 description => 'OSD ID',
570 type => 'integer',
571 },
572 },
573 },
574 returns => {
575 type => 'array',
576 items => {
577 type => "object",
578 properties => {},
579 },
580 links => [ { rel => 'child', href => "{name}" } ],
581 },
582 code => sub {
583 my ($param) = @_;
584
585 my $result = [
586 { name => 'metadata' },
587 { name => 'lv-info' },
588 ];
589
590 return $result;
591 }});
592
593 __PACKAGE__->register_method ({
594 name => 'osddetails',
595 path => '{osdid}/metadata',
596 method => 'GET',
597 description => "Get OSD details",
598 proxyto => 'node',
599 protected => 1,
600 permissions => {
601 check => ['perm', '/', [ 'Sys.Audit' ], any => 1],
602 },
603 parameters => {
604 additionalProperties => 0,
605 properties => {
606 node => get_standard_option('pve-node'),
607 osdid => {
608 description => 'OSD ID',
609 type => 'integer',
610 },
611 },
612 },
613 returns => {
614 type => 'object',
615 properties => {
616 osd => {
617 type => 'object',
618 description => 'General information about the OSD',
619 properties => {
620 hostname => {
621 type => 'string',
622 description => 'Name of the host containing the OSD.',
623 },
624 id => {
625 type => 'integer',
626 description => 'ID of the OSD.',
627 },
628 mem_usage => {
629 type => 'integer',
630 description => 'Memory usage of the OSD service.',
631 },
632 osd_data => {
633 type => 'string',
634 description => "Path to the OSD's data directory.",
635 },
636 osd_objectstore => {
637 type => 'string',
638 description => 'The type of object store used.',
639 },
640 pid => {
641 type => 'integer',
642 description => 'OSD process ID.',
643 },
644 version => {
645 type => 'string',
646 description => 'Ceph version of the OSD service.',
647 },
648 front_addr => {
649 type => 'string',
650 description => 'Address and port used to talk to clients and monitors.',
651 },
652 back_addr => {
653 type => 'string',
654 description => 'Address and port used to talk to other OSDs.',
655 },
656 hb_front_addr => {
657 type => 'string',
658 description => 'Heartbeat address and port for clients and monitors.',
659 },
660 hb_back_addr => {
661 type => 'string',
662 description => 'Heartbeat address and port for other OSDs.',
663 },
664 },
665 },
666 devices => {
667 type => 'array',
668 description => 'Array containing data about devices',
669 items => {
670 type => "object",
671 properties => $OSD_DEV_RETURN_PROPS,
672 },
673 }
674 }
675 },
676 code => sub {
677 my ($param) = @_;
678
679 PVE::Ceph::Tools::check_ceph_inited();
680
681 my $osdid = $param->{osdid};
682 my $rados = PVE::RADOS->new();
683 my $metadata = $rados->mon_command({ prefix => 'osd metadata', id => int($osdid) });
684
685 die "OSD '${osdid}' does not exists on host '${nodename}'\n"
686 if $nodename ne $metadata->{hostname};
687
688 my $raw = '';
689 my $pid;
690 my $parser = sub {
691 my $line = shift;
692 if ($line =~ m/^MainPID=([0-9]*)$/) {
693 $pid = $1;
694 }
695 };
696
697 my $cmd = [
698 '/bin/systemctl',
699 'show',
700 "ceph-osd\@${osdid}.service",
701 '--property',
702 'MainPID',
703 ];
704 run_command($cmd, errmsg => 'fetching OSD PID and memory usage failed', outfunc => $parser);
705
706 $pid = defined($pid) ? int($pid) : undef;
707
708 my $memory = 0;
709 if ($pid && $pid > 0) {
710 open (my $SMAPS, '<', "/proc/$pid/smaps_rollup")
711 or die "failed to read PSS memory-stat from process - $!\n";
712
713 while (my $line = <$SMAPS>) {
714 if ($line =~ m/^Pss:\s+([0-9]+) kB$/) {
715 $memory = $1 * 1024;
716 last;
717 }
718 }
719
720 close $SMAPS;
721 }
722
723 my $data = {
724 osd => {
725 hostname => $metadata->{hostname},
726 id => $metadata->{id},
727 mem_usage => $memory,
728 osd_data => $metadata->{osd_data},
729 osd_objectstore => $metadata->{osd_objectstore},
730 pid => $pid,
731 version => "$metadata->{ceph_version_short} ($metadata->{ceph_release})",
732 front_addr => $metadata->{front_addr},
733 back_addr => $metadata->{back_addr},
734 hb_front_addr => $metadata->{hb_front_addr},
735 hb_back_addr => $metadata->{hb_back_addr},
736 },
737 };
738
739 $data->{devices} = [];
740
741 my $get_data = sub {
742 my ($dev, $prefix, $device) = @_;
743 push (
744 @{$data->{devices}},
745 {
746 dev_node => $metadata->{"${prefix}_${dev}_dev_node"},
747 physical_device => $metadata->{"${prefix}_${dev}_devices"},
748 size => int($metadata->{"${prefix}_${dev}_size"}),
749 support_discard => int($metadata->{"${prefix}_${dev}_support_discard"}),
750 type => $metadata->{"${prefix}_${dev}_type"},
751 device => $device,
752 }
753 );
754 };
755
756 $get_data->("bdev", "bluestore", "block");
757 $get_data->("db", "bluefs", "db") if $metadata->{bluefs_dedicated_db};
758 $get_data->("wal", "bluefs", "wal") if $metadata->{bluefs_dedicated_wal};
759
760 return $data;
761 }});
762
763 __PACKAGE__->register_method ({
764 name => 'osdvolume',
765 path => '{osdid}/lv-info',
766 method => 'GET',
767 description => "Get OSD volume details",
768 proxyto => 'node',
769 protected => 1,
770 permissions => {
771 check => ['perm', '/', [ 'Sys.Audit' ], any => 1],
772 },
773 parameters => {
774 additionalProperties => 0,
775 properties => {
776 node => get_standard_option('pve-node'),
777 osdid => {
778 description => 'OSD ID',
779 type => 'integer',
780 },
781 type => {
782 description => 'OSD device type',
783 type => 'string',
784 enum => ['block', 'db', 'wal'],
785 default => 'block',
786 optional => 1,
787 },
788 },
789 },
790 returns => {
791 type => 'object',
792 properties => {
793 creation_time => {
794 type => 'string',
795 description => "Creation time as reported by `lvs`.",
796 },
797 lv_name => {
798 type => 'string',
799 description => 'Name of the logical volume (LV).',
800 },
801 lv_path => {
802 type => 'string',
803 description => 'Path to the logical volume (LV).',
804 },
805 lv_size => {
806 type => 'integer',
807 description => 'Size of the logical volume (LV).',
808 },
809 lv_uuid => {
810 type => 'string',
811 description => 'UUID of the logical volume (LV).',
812 },
813 vg_name => {
814 type => 'string',
815 description => 'Name of the volume group (VG).',
816 },
817 },
818 },
819 code => sub {
820 my ($param) = @_;
821
822 PVE::Ceph::Tools::check_ceph_inited();
823
824 my $osdid = $param->{osdid};
825 my $type = $param->{type} // 'block';
826
827 my $raw = '';
828 my $parser = sub { $raw .= shift };
829 my $cmd = ['/usr/sbin/ceph-volume', 'lvm', 'list', $osdid, '--format', 'json'];
830 run_command($cmd, errmsg => 'listing Ceph LVM volumes failed', outfunc => $parser);
831
832 my $result;
833 if ($raw =~ m/^(\{.*\})$/s) { #untaint
834 $result = JSON::decode_json($1);
835 } else {
836 die "got unexpected data from ceph-volume: '${raw}'\n";
837 }
838 if (!$result->{$osdid}) {
839 die "OSD '${osdid}' not found in 'ceph-volume lvm list' on node '${nodename}'.\n"
840 ."Maybe it was created before LVM became the default?\n";
841 }
842
843 my $lv_data = { map { $_->{type} => $_ } @{$result->{$osdid}} };
844 my $volume = $lv_data->{$type} || die "volume type '${type}' not found for OSD ${osdid}\n";
845
846 $raw = '';
847 $cmd = ['/sbin/lvs', $volume->{lv_path}, '--reportformat', 'json', '-o', 'lv_time'];
848 run_command($cmd, errmsg => 'listing logical volumes failed', outfunc => $parser);
849
850 if ($raw =~ m/(\{.*\})$/s) { #untaint, lvs has whitespace at beginning
851 $result = JSON::decode_json($1);
852 } else {
853 die "got unexpected data from lvs: '${raw}'\n";
854 }
855
856 my $data = { map { $_ => $volume->{$_} } qw(lv_name lv_path lv_uuid vg_name) };
857 $data->{lv_size} = int($volume->{lv_size});
858
859 $data->{creation_time} = @{$result->{report}}[0]->{lv}[0]->{lv_time};
860
861 return $data;
862 }});
863
864 # Check if $osdid belongs to $nodename
865 # $tree ... rados osd tree (passing the tree makes it easy to test)
866 sub osd_belongs_to_node {
867 my ($tree, $nodename, $osdid) = @_;
868 return 0 if !($tree && $tree->{nodes});
869
870 my $node_map = {};
871 for my $el (grep { defined($_->{type}) && $_->{type} eq 'host' } @{$tree->{nodes}}) {
872 my $name = $el->{name};
873 die "internal error: duplicate host name found '$name'\n" if $node_map->{$name};
874 $node_map->{$name} = $el;
875 }
876
877 my $osds = $node_map->{$nodename}->{children};
878 return 0 if !$osds;
879
880 return grep($_ == $osdid, @$osds);
881 }
882
883 __PACKAGE__->register_method ({
884 name => 'destroyosd',
885 path => '{osdid}',
886 method => 'DELETE',
887 description => "Destroy OSD",
888 proxyto => 'node',
889 protected => 1,
890 parameters => {
891 additionalProperties => 0,
892 properties => {
893 node => get_standard_option('pve-node'),
894 osdid => {
895 description => 'OSD ID',
896 type => 'integer',
897 },
898 cleanup => {
899 description => "If set, we remove partition table entries.",
900 type => 'boolean',
901 optional => 1,
902 default => 0,
903 },
904 },
905 },
906 returns => { type => 'string' },
907 code => sub {
908 my ($param) = @_;
909
910 my $rpcenv = PVE::RPCEnvironment::get();
911
912 my $authuser = $rpcenv->get_user();
913
914 PVE::Ceph::Tools::check_ceph_inited();
915
916 my $osdid = $param->{osdid};
917 my $cleanup = $param->{cleanup};
918
919 my $rados = PVE::RADOS->new();
920
921 my $osd_belongs_to_node = osd_belongs_to_node(
922 $rados->mon_command({ prefix => 'osd tree' }),
923 $param->{node},
924 $osdid,
925 );
926 die "OSD osd.$osdid does not belong to node $param->{node}!"
927 if !$osd_belongs_to_node;
928
929 # dies if osdid is unknown
930 my $osdstat = $get_osd_status->($rados, $osdid);
931
932 die "osd is in use (in == 1)\n" if $osdstat->{in};
933 #&$run_ceph_cmd(['osd', 'out', $osdid]);
934
935 die "osd is still running (up == 1)\n" if $osdstat->{up};
936
937 my $osdsection = "osd.$osdid";
938
939 my $worker = sub {
940 my $upid = shift;
941
942 # reopen with longer timeout
943 $rados = PVE::RADOS->new(timeout => PVE::Ceph::Tools::get_config('long_rados_timeout'));
944
945 print "destroy OSD $osdsection\n";
946
947 eval {
948 PVE::Ceph::Services::ceph_service_cmd('stop', $osdsection);
949 PVE::Ceph::Services::ceph_service_cmd('disable', $osdsection);
950 };
951 warn $@ if $@;
952
953 print "Remove $osdsection from the CRUSH map\n";
954 $rados->mon_command({ prefix => "osd crush remove", name => $osdsection, format => 'plain' });
955
956 print "Remove the $osdsection authentication key.\n";
957 $rados->mon_command({ prefix => "auth del", entity => $osdsection, format => 'plain' });
958
959 print "Remove OSD $osdsection\n";
960 $rados->mon_command({ prefix => "osd rm", ids => [ $osdsection ], format => 'plain' });
961
962 # try to unmount from standard mount point
963 my $mountpoint = "/var/lib/ceph/osd/ceph-$osdid";
964
965 # See FIXME below
966 my $udev_trigger_devs = {};
967
968 my $remove_partition = sub {
969 my ($part) = @_;
970
971 return if !$part || (! -b $part );
972 my $partnum = PVE::Diskmanage::get_partnum($part);
973 my $devpath = PVE::Diskmanage::get_blockdev($part);
974
975 $udev_trigger_devs->{$devpath} = 1;
976
977 PVE::Diskmanage::wipe_blockdev($part);
978 print "remove partition $part (disk '${devpath}', partnum $partnum)\n";
979 eval { run_command(['/sbin/sgdisk', '-d', $partnum, "${devpath}"]); };
980 warn $@ if $@;
981 };
982
983 my $osd_list = PVE::Ceph::Tools::ceph_volume_list();
984
985 if ($osd_list->{$osdid}) { # ceph-volume managed
986
987 eval { PVE::Ceph::Tools::ceph_volume_zap($osdid, $cleanup) };
988 warn $@ if $@;
989
990 if ($cleanup) {
991 # try to remove pvs, but do not fail if it does not work
992 for my $osd_part (@{$osd_list->{$osdid}}) {
993 for my $dev (@{$osd_part->{devices}}) {
994 ($dev) = ($dev =~ m|^(/dev/[-_.a-zA-Z0-9\/]+)$|); #untaint
995
996 eval { run_command(['/sbin/pvremove', $dev], errfunc => sub {}) };
997 warn $@ if $@;
998
999 $udev_trigger_devs->{$dev} = 1;
1000 }
1001 }
1002 }
1003 } else {
1004 my $partitions_to_remove = [];
1005 if ($cleanup) {
1006 if (my $mp = PVE::ProcFSTools::parse_proc_mounts()) {
1007 foreach my $line (@$mp) {
1008 my ($dev, $path, $fstype) = @$line;
1009 next if !($dev && $path && $fstype);
1010 next if $dev !~ m|^/dev/|;
1011
1012 if ($path eq $mountpoint) {
1013 abs_path($dev) =~ m|^(/.+)| or die "invalid dev: $dev\n";
1014 push @$partitions_to_remove, $1;
1015 last;
1016 }
1017 }
1018 }
1019
1020 foreach my $path (qw(journal block block.db block.wal)) {
1021 abs_path("$mountpoint/$path") =~ m|^(/.+)| or die "invalid path: $path\n";
1022 push @$partitions_to_remove, $1;
1023 }
1024 }
1025
1026 print "Unmount OSD $osdsection from $mountpoint\n";
1027 eval { run_command(['/bin/umount', $mountpoint]); };
1028 if (my $err = $@) {
1029 warn $err;
1030 } elsif ($cleanup) {
1031 #be aware of the ceph udev rules which can remount.
1032 foreach my $part (@$partitions_to_remove) {
1033 $remove_partition->($part);
1034 }
1035 }
1036 }
1037
1038 # FIXME: Remove once we depend on systemd >= v249.
1039 # Work around udev bug https://github.com/systemd/systemd/issues/18525 to ensure the
1040 # udev database is updated.
1041 if ($cleanup) {
1042 eval { run_command(['udevadm', 'trigger', keys $udev_trigger_devs->%*]); };
1043 warn $@ if $@;
1044 }
1045 };
1046
1047 return $rpcenv->fork_worker('cephdestroyosd', $osdsection, $authuser, $worker);
1048 }});
1049
1050 __PACKAGE__->register_method ({
1051 name => 'in',
1052 path => '{osdid}/in',
1053 method => 'POST',
1054 description => "ceph osd in",
1055 proxyto => 'node',
1056 protected => 1,
1057 permissions => {
1058 check => ['perm', '/', [ 'Sys.Modify' ]],
1059 },
1060 parameters => {
1061 additionalProperties => 0,
1062 properties => {
1063 node => get_standard_option('pve-node'),
1064 osdid => {
1065 description => 'OSD ID',
1066 type => 'integer',
1067 },
1068 },
1069 },
1070 returns => { type => "null" },
1071 code => sub {
1072 my ($param) = @_;
1073
1074 PVE::Ceph::Tools::check_ceph_inited();
1075
1076 my $osdid = $param->{osdid};
1077
1078 my $rados = PVE::RADOS->new();
1079
1080 $get_osd_status->($rados, $osdid); # osd exists?
1081
1082 my $osdsection = "osd.$osdid";
1083
1084 $rados->mon_command({ prefix => "osd in", ids => [ $osdsection ], format => 'plain' });
1085
1086 return undef;
1087 }});
1088
1089 __PACKAGE__->register_method ({
1090 name => 'out',
1091 path => '{osdid}/out',
1092 method => 'POST',
1093 description => "ceph osd out",
1094 proxyto => 'node',
1095 protected => 1,
1096 permissions => {
1097 check => ['perm', '/', [ 'Sys.Modify' ]],
1098 },
1099 parameters => {
1100 additionalProperties => 0,
1101 properties => {
1102 node => get_standard_option('pve-node'),
1103 osdid => {
1104 description => 'OSD ID',
1105 type => 'integer',
1106 },
1107 },
1108 },
1109 returns => { type => "null" },
1110 code => sub {
1111 my ($param) = @_;
1112
1113 PVE::Ceph::Tools::check_ceph_inited();
1114
1115 my $osdid = $param->{osdid};
1116
1117 my $rados = PVE::RADOS->new();
1118
1119 $get_osd_status->($rados, $osdid); # osd exists?
1120
1121 my $osdsection = "osd.$osdid";
1122
1123 $rados->mon_command({ prefix => "osd out", ids => [ $osdsection ], format => 'plain' });
1124
1125 return undef;
1126 }});
1127
1128 __PACKAGE__->register_method ({
1129 name => 'scrub',
1130 path => '{osdid}/scrub',
1131 method => 'POST',
1132 description => "Instruct the OSD to scrub.",
1133 proxyto => 'node',
1134 protected => 1,
1135 permissions => {
1136 check => ['perm', '/', [ 'Sys.Modify' ]],
1137 },
1138 parameters => {
1139 additionalProperties => 0,
1140 properties => {
1141 node => get_standard_option('pve-node'),
1142 osdid => {
1143 description => 'OSD ID',
1144 type => 'integer',
1145 },
1146 deep => {
1147 description => 'If set, instructs a deep scrub instead of a normal one.',
1148 type => 'boolean',
1149 optional => 1,
1150 default => 0,
1151 },
1152 },
1153 },
1154 returns => { type => "null" },
1155 code => sub {
1156 my ($param) = @_;
1157
1158 PVE::Ceph::Tools::check_ceph_inited();
1159
1160 my $osdid = $param->{osdid};
1161 my $deep = $param->{deep} // 0;
1162
1163 my $rados = PVE::RADOS->new();
1164
1165 $get_osd_status->($rados, $osdid); # osd exists?
1166
1167 my $prefix = $deep ? 'osd deep-scrub' : 'osd scrub';
1168 $rados->mon_command({ prefix => $prefix, who => $osdid });
1169
1170 return undef;
1171 }});
1172
1173 1;