]> git.proxmox.com Git - pve-manager.git/blame - PVE/API2/Ceph/OSD.pm
api: ceph: improve reporting of ceph OSD memory usage
[pve-manager.git] / PVE / API2 / Ceph / OSD.pm
CommitLineData
79fa41a2
DC
1package PVE::API2::Ceph::OSD;
2
3use strict;
4use warnings;
5
6use Cwd qw(abs_path);
7use IO::File;
e907f822 8use JSON;
7783f755 9use UUID;
79fa41a2
DC
10
11use PVE::Ceph::Tools;
12use PVE::Ceph::Services;
13use PVE::CephConfig;
14use PVE::Cluster qw(cfs_read_file cfs_write_file);
15use PVE::Diskmanage;
7783f755 16use PVE::Storage::LVMPlugin;
79fa41a2
DC
17use PVE::Exception qw(raise_param_exc);
18use PVE::JSONSchema qw(get_standard_option);
a05349ab 19use PVE::INotify;
79fa41a2
DC
20use PVE::RADOS;
21use PVE::RESTHandler;
22use PVE::RPCEnvironment;
23use PVE::Tools qw(run_command file_set_contents);
3c6aa3f4 24use PVE::ProcFSTools;
05bd76ac 25use PVE::Network;
79fa41a2
DC
26
27use base qw(PVE::RESTHandler);
28
a05349ab
TL
29my $nodename = PVE::INotify::nodename();
30
79fa41a2
DC
31my $get_osd_status = sub {
32 my ($rados, $osdid) = @_;
33
34 my $stat = $rados->mon_command({ prefix => 'osd dump' });
35
36 my $osdlist = $stat->{osds} || [];
37
38 my $flags = $stat->{flags} || undef;
39
40 my $osdstat;
41 foreach my $d (@$osdlist) {
42 $osdstat->{$d->{osd}} = $d if defined($d->{osd});
43 }
44 if (defined($osdid)) {
45 die "no such OSD '$osdid'\n" if !$osdstat->{$osdid};
46 return $osdstat->{$osdid};
47 }
48
017bb1a8 49 return wantarray ? ($osdstat, $flags) : $osdstat;
79fa41a2
DC
50};
51
52my $get_osd_usage = sub {
53 my ($rados) = @_;
54
de6ad72f
TL
55 my $osdlist = $rados->mon_command({ prefix => 'pg dump', dumpcontents => [ 'osds' ]});
56 if (!($osdlist && ref($osdlist))) {
57 warn "got unknown result format for 'pg dump osds' command\n";
58 return [];
91564b72 59 }
79fa41a2 60
de6ad72f
TL
61 if (ref($osdlist) eq "HASH") { # since nautilus
62 $osdlist = $osdlist->{osd_stats};
63 }
64
65 my $osdstat = {};
66 for my $d (@$osdlist) {
79fa41a2
DC
67 $osdstat->{$d->{osd}} = $d if defined($d->{osd});
68 }
69
70 return $osdstat;
71};
72
73__PACKAGE__->register_method ({
74 name => 'index',
75 path => '',
76 method => 'GET',
77 description => "Get Ceph osd list/tree.",
78 proxyto => 'node',
79 protected => 1,
80 permissions => {
81 check => ['perm', '/', [ 'Sys.Audit', 'Datastore.Audit' ], any => 1],
82 },
83 parameters => {
84 additionalProperties => 0,
85 properties => {
86 node => get_standard_option('pve-node'),
87 },
88 },
89 # fixme: return a list instead of extjs tree format ?
90 returns => {
91 type => "object",
b62ba85a
AL
92 items => {
93 type => "object",
94 properties => {
95 flags => { type => "string" },
96 root => {
97 type => "object",
98 description => "Tree with OSDs in the CRUSH map structure.",
99 },
100 },
101 },
79fa41a2
DC
102 },
103 code => sub {
104 my ($param) = @_;
105
106 PVE::Ceph::Tools::check_ceph_inited();
107
108 my $rados = PVE::RADOS->new();
c4368cf6 109 my $res = $rados->mon_command({ prefix => 'osd df', output_method => 'tree', });
79fa41a2
DC
110
111 die "no tree nodes found\n" if !($res && $res->{nodes});
112
9cc5ac9e 113 my ($osdhash, $flags) = $get_osd_status->($rados);
79fa41a2 114
de6ad72f 115 my $osd_usage = $get_osd_usage->($rados);
79fa41a2 116
78c2d7f7
TL
117 my $osdmetadata_res = $rados->mon_command({ prefix => 'osd metadata' });
118 my $osdmetadata = { map { $_->{id} => $_ } @$osdmetadata_res };
79fa41a2 119
d3eed3b4 120 my $hostversions = PVE::Ceph::Services::get_ceph_versions();
cead98bd 121
79fa41a2
DC
122 my $nodes = {};
123 my $newnodes = {};
124 foreach my $e (@{$res->{nodes}}) {
cead98bd
TL
125 my ($id, $name) = $e->@{qw(id name)};
126
127 $nodes->{$id} = $e;
79fa41a2
DC
128
129 my $new = {
cead98bd
TL
130 id => $id,
131 name => $name,
79fa41a2
DC
132 type => $e->{type}
133 };
134
c4368cf6 135 foreach my $opt (qw(status crush_weight reweight device_class pgs)) {
79fa41a2
DC
136 $new->{$opt} = $e->{$opt} if defined($e->{$opt});
137 }
138
cead98bd 139 if (my $stat = $osdhash->{$id}) {
79fa41a2
DC
140 $new->{in} = $stat->{in} if defined($stat->{in});
141 }
142
cead98bd 143 if (my $stat = $osd_usage->{$id}) {
79fa41a2
DC
144 $new->{total_space} = ($stat->{kb} || 1) * 1024;
145 $new->{bytes_used} = ($stat->{kb_used} || 0) * 1024;
146 $new->{percent_used} = ($new->{bytes_used}*100)/$new->{total_space};
147 if (my $d = $stat->{perf_stat}) {
148 $new->{commit_latency_ms} = $d->{commit_latency_ms};
149 $new->{apply_latency_ms} = $d->{apply_latency_ms};
150 }
151 }
152
cead98bd 153 my $osdmd = $osdmetadata->{$id};
79fa41a2
DC
154 if ($e->{type} eq 'osd' && $osdmd) {
155 if ($osdmd->{bluefs}) {
156 $new->{osdtype} = 'bluestore';
157 $new->{blfsdev} = $osdmd->{bluestore_bdev_dev_node};
158 $new->{dbdev} = $osdmd->{bluefs_db_dev_node};
159 $new->{waldev} = $osdmd->{bluefs_wal_dev_node};
160 } else {
161 $new->{osdtype} = 'filestore';
162 }
e0297023
DC
163 for my $field (qw(ceph_version ceph_version_short)) {
164 $new->{$field} = $osdmd->{$field} if $osdmd->{$field};
165 }
79fa41a2
DC
166 }
167
cead98bd 168 $newnodes->{$id} = $new;
79fa41a2
DC
169 }
170
171 foreach my $e (@{$res->{nodes}}) {
cead98bd
TL
172 my ($id, $name) = $e->@{qw(id name)};
173 my $new = $newnodes->{$id};
174
79fa41a2
DC
175 if ($e->{children} && scalar(@{$e->{children}})) {
176 $new->{children} = [];
177 $new->{leaf} = 0;
178 foreach my $cid (@{$e->{children}}) {
cead98bd
TL
179 $nodes->{$cid}->{parent} = $id;
180 if ($nodes->{$cid}->{type} eq 'osd' && $e->{type} eq 'host') {
181 $newnodes->{$cid}->{host} = $name;
79fa41a2
DC
182 }
183 push @{$new->{children}}, $newnodes->{$cid};
184 }
185 } else {
cead98bd 186 $new->{leaf} = ($id >= 0) ? 1 : 0;
79fa41a2 187 }
69ad2e53 188
cead98bd 189 if ($name && $e->{type} eq 'host') {
d3eed3b4 190 $new->{version} = $hostversions->{$name}->{version}->{str};
69ad2e53 191 }
79fa41a2
DC
192 }
193
cead98bd 194 my $realroots = [];
79fa41a2 195 foreach my $e (@{$res->{nodes}}) {
cead98bd
TL
196 my $id = $e->{id};
197 if (!$nodes->{$id}->{parent}) {
198 push @$realroots, $newnodes->{$id};
79fa41a2
DC
199 }
200 }
201
cead98bd 202 die "no root node\n" if scalar(@$realroots) < 1;
79fa41a2 203
cead98bd
TL
204 my $data = {
205 root => {
206 leaf => 0,
207 children => $realroots
208 },
cead98bd 209 };
79fa41a2 210
cead98bd 211 $data->{flags} = $flags if $flags; # we want this for the noout flag
79fa41a2
DC
212
213 return $data;
214 }});
215
216__PACKAGE__->register_method ({
217 name => 'createosd',
218 path => '',
219 method => 'POST',
220 description => "Create OSD",
221 proxyto => 'node',
222 protected => 1,
223 parameters => {
224 additionalProperties => 0,
225 properties => {
226 node => get_standard_option('pve-node'),
227 dev => {
228 description => "Block device name.",
229 type => 'string',
230 },
7783f755
DC
231 db_dev => {
232 description => "Block device name for block.db.",
79fa41a2
DC
233 optional => 1,
234 type => 'string',
235 },
596bb7b1 236 db_dev_size => {
0e5f83ba
TL
237 description => "Size in GiB for block.db.",
238 verbose_description => "If a block.db is requested but the size is not given, ".
239 "will be automatically selected by: bluestore_block_db_size from the ".
7783f755
DC
240 "ceph database (osd or global section) or config (osd or global section)".
241 "in that order. If this is not available, it will be sized 10% of the size ".
242 "of the OSD device. Fails if the available size is not enough.",
79fa41a2 243 optional => 1,
7783f755 244 type => 'number',
0e5f83ba 245 default => 'bluestore_block_db_size or 10% of OSD size',
7783f755
DC
246 requires => 'db_dev',
247 minimum => 1.0,
79fa41a2 248 },
7783f755
DC
249 wal_dev => {
250 description => "Block device name for block.wal.",
79fa41a2 251 optional => 1,
7783f755 252 type => 'string',
79fa41a2 253 },
596bb7b1 254 wal_dev_size => {
0e5f83ba
TL
255 description => "Size in GiB for block.wal.",
256 verbose_description => "If a block.wal is requested but the size is not given, ".
257 "will be automatically selected by: bluestore_block_wal_size from the ".
7783f755
DC
258 "ceph database (osd or global section) or config (osd or global section)".
259 "in that order. If this is not available, it will be sized 1% of the size ".
260 "of the OSD device. Fails if the available size is not enough.",
79fa41a2 261 optional => 1,
7783f755 262 minimum => 0.5,
0e5f83ba 263 default => 'bluestore_block_wal_size or 1% of OSD size',
7783f755
DC
264 requires => 'wal_dev',
265 type => 'number',
79fa41a2 266 },
4ce04578
DC
267 encrypted => {
268 type => 'boolean',
269 optional => 1,
270 default => 0,
271 description => "Enables encryption of the OSD."
272 },
2184098e
AA
273 'crush-device-class' => {
274 optional => 1,
275 type => 'string',
276 description => "Set the device class of the OSD in crush."
277 },
79fa41a2
DC
278 },
279 },
280 returns => { type => 'string' },
281 code => sub {
282 my ($param) = @_;
283
284 my $rpcenv = PVE::RPCEnvironment::get();
285
286 my $authuser = $rpcenv->get_user();
287
45d45a63 288 # test basic requirements
79fa41a2 289 PVE::Ceph::Tools::check_ceph_inited();
79fa41a2 290 PVE::Ceph::Tools::setup_pve_symlinks();
79fa41a2 291 PVE::Ceph::Tools::check_ceph_installed('ceph_osd');
7783f755 292 PVE::Ceph::Tools::check_ceph_installed('ceph_volume');
79fa41a2 293
45d45a63
DC
294 # extract parameter info and fail if a device is set more than once
295 my $devs = {};
79fa41a2 296
05bd76ac
AL
297 my $ceph_conf = cfs_read_file('ceph.conf');
298
a05349ab
TL
299 my $osd_network = $ceph_conf->{global}->{cluster_network};
300 $osd_network //= $ceph_conf->{global}->{public_network}; # fallback
05bd76ac 301
a0ef509a
DC
302 if ($osd_network) { # check only if something is configured
303 my $cluster_net_ips = PVE::Network::get_local_ip_from_cidr($osd_network);
304 if (scalar(@$cluster_net_ips) < 1) {
305 my $osd_net_obj = PVE::Network::IP_from_cidr($osd_network);
306 my $osd_base_cidr = $osd_net_obj->{ip} . "/" . $osd_net_obj->{prefixlen};
307
308 die "No address from ceph cluster network (${osd_base_cidr}) found on node '$nodename'. ".
309 "Check your network config.\n";
310 }
05bd76ac
AL
311 }
312
970f96fd
TL
313 for my $type ( qw(dev db_dev wal_dev) ) {
314 next if !$param->{$type};
0154e795 315
970f96fd 316 my $type_dev = PVE::Diskmanage::verify_blockdev_path($param->{$type});
45d45a63 317 (my $type_devname = $type_dev) =~ s|/dev/||;
79fa41a2 318
970f96fd 319 raise_param_exc({ $type => "cannot chose '$type_dev' for more than one type." })
45d45a63 320 if grep { $_->{name} eq $type_devname } values %$devs;
79fa41a2 321
45d45a63
DC
322 $devs->{$type} = {
323 dev => $type_dev,
324 name => $type_devname,
325 };
79fa41a2 326
45d45a63
DC
327 if (my $size = $param->{"${type}_size"}) {
328 $devs->{$type}->{size} = PVE::Tools::convert_size($size, 'gb' => 'b') ;
329 }
330 }
79fa41a2 331
e2565956
FE
332 my $test_disk_requirements = sub {
333 my ($disklist) = @_;
334
335 my $dev = $devs->{dev}->{dev};
336 my $devname = $devs->{dev}->{name};
337 die "unable to get device info for '$dev'\n" if !$disklist->{$devname};
338 die "device '$dev' is already in use\n" if $disklist->{$devname}->{used};
339
340 for my $type ( qw(db_dev wal_dev) ) {
341 my $d = $devs->{$type};
342 next if !$d;
343 my $name = $d->{name};
344 my $info = $disklist->{$name};
345 die "unable to get device info for '$d->{dev}' for type $type\n" if !$disklist->{$name};
346 if (my $usage = $info->{used}) {
347 if ($usage eq 'partitions') {
348 die "device '$d->{dev}' is not GPT partitioned\n" if !$info->{gpt};
349 } elsif ($usage ne 'LVM') {
350 die "device '$d->{dev}' is already in use and has no LVM on it\n";
351 }
385df838
DC
352 }
353 }
e2565956
FE
354 };
355
356
357 # test disk requirements early
358 my $devlist = [ map { $_->{name} } values %$devs ];
5161a0c2 359 my $disklist = PVE::Diskmanage::get_disks($devlist, 1, 1);
e2565956 360 $test_disk_requirements->($disklist);
0154e795 361
45d45a63 362 # get necessary ceph infos
79fa41a2 363 my $rados = PVE::RADOS->new();
e25dda25 364 my $monstat = $rados->mon_command({ prefix => 'quorum_status' });
79fa41a2 365
0154e795 366 die "unable to get fsid\n" if !$monstat->{monmap} || !$monstat->{monmap}->{fsid};
79fa41a2
DC
367 my $fsid = $monstat->{monmap}->{fsid};
368 $fsid = $1 if $fsid =~ m/^([0-9a-f\-]+)$/;
369
370 my $ceph_bootstrap_osd_keyring = PVE::Ceph::Tools::get_config('ceph_bootstrap_osd_keyring');
371
7712a4e1 372 if (! -f $ceph_bootstrap_osd_keyring && $ceph_conf->{global}->{auth_client_required} eq 'cephx') {
217dde83
DC
373 my $bindata = $rados->mon_command({
374 prefix => 'auth get-or-create',
375 entity => 'client.bootstrap-osd',
376 caps => [
377 'mon' => 'allow profile bootstrap-osd'
378 ],
379 format => 'plain',
380 });
79fa41a2
DC
381 file_set_contents($ceph_bootstrap_osd_keyring, $bindata);
382 };
383
45d602f2
FE
384 # See FIXME below
385 my @udev_trigger_devs = ();
386
7783f755
DC
387 my $create_part_or_lv = sub {
388 my ($dev, $size, $type) = @_;
389
0154e795
TL
390 $size =~ m/^(\d+)$/ or die "invalid size '$size'\n";
391 $size = $1;
7783f755
DC
392
393 die "'$dev->{devpath}' is smaller than requested size '$size' bytes\n"
394 if $dev->{size} < $size;
79fa41a2 395
ab62d137
DC
396 # sgdisk and lvcreate can only sizes divisible by 512b
397 # so we round down to the nearest kb
398 $size = PVE::Tools::convert_size($size, 'b' => 'kb', 1);
399
7783f755
DC
400 if (!$dev->{used}) {
401 # create pv,vg,lv
79fa41a2 402
7783f755
DC
403 my $vg = "ceph-" . UUID::uuid();
404 my $lv = $type . "-" . UUID::uuid();
79fa41a2 405
7783f755 406 PVE::Storage::LVMPlugin::lvm_create_volume_group($dev->{devpath}, $vg);
ab62d137 407 PVE::Storage::LVMPlugin::lvcreate($vg, $lv, "${size}k");
79fa41a2 408
cffeb115
FE
409 if (PVE::Diskmanage::is_partition($dev->{devpath})) {
410 eval { PVE::Diskmanage::change_parttype($dev->{devpath}, '8E00'); };
411 warn $@ if $@;
412 }
413
45d602f2
FE
414 push @udev_trigger_devs, $dev->{devpath};
415
7783f755
DC
416 return "$vg/$lv";
417
418 } elsif ($dev->{used} eq 'LVM') {
419 # check pv/vg and create lv
420
421 my $vgs = PVE::Storage::LVMPlugin::lvm_vgs(1);
422 my $vg;
423 for my $vgname ( sort keys %$vgs ) {
424 next if $vgname !~ /^ceph-/;
425
426 for my $pv ( @{$vgs->{$vgname}->{pvs}} ) {
427 next if $pv->{name} ne $dev->{devpath};
428 $vg = $vgname;
429 last;
430 }
431 last if $vg;
432 }
433
434 die "no ceph vg found on '$dev->{devpath}'\n" if !$vg;
435 die "vg '$vg' has not enough free space\n" if $vgs->{$vg}->{free} < $size;
436
afa09e02 437 my $lv = $type . "-" . UUID::uuid();
7783f755 438
ab62d137 439 PVE::Storage::LVMPlugin::lvcreate($vg, $lv, "${size}k");
7783f755
DC
440
441 return "$vg/$lv";
442
3d7b3992 443 } elsif ($dev->{used} eq 'partitions' && $dev->{gpt}) {
7783f755 444 # create new partition at the end
46b1ccc3
FE
445 my $parttypes = {
446 'osd-db' => '30CD0809-C2B2-499C-8879-2D6B78529876',
447 'osd-wal' => '5CE17FCE-4087-4169-B7FF-056CC58473F9',
448 };
7783f755 449
45d602f2 450 my $part = PVE::Diskmanage::append_partition($dev->{devpath}, $size * 1024);
46b1ccc3
FE
451
452 if (my $parttype = $parttypes->{$type}) {
453 eval { PVE::Diskmanage::change_parttype($part, $parttype); };
454 warn $@ if $@;
455 }
456
45d602f2
FE
457 push @udev_trigger_devs, $part;
458 return $part;
7783f755
DC
459 }
460
461 die "cannot use '$dev->{devpath}' for '$type'\n";
462 };
463
464 my $worker = sub {
465 my $upid = shift;
466
467 PVE::Diskmanage::locked_disk_action(sub {
e2565956 468 # update disklist and re-test requirements
5161a0c2 469 $disklist = PVE::Diskmanage::get_disks($devlist, 1, 1);
e2565956 470 $test_disk_requirements->($disklist);
7783f755 471
2184098e 472 my $dev_class = $param->{'crush-device-class'};
7783f755 473 my $cmd = ['ceph-volume', 'lvm', 'create', '--cluster-fsid', $fsid ];
2184098e 474 push @$cmd, '--crush-device-class', $dev_class if $dev_class;
79fa41a2 475
e2565956 476 my $devname = $devs->{dev}->{name};
45d45a63 477 my $devpath = $disklist->{$devname}->{devpath};
79fa41a2 478 print "create OSD on $devpath (bluestore)\n";
79fa41a2 479
45d602f2
FE
480 push @udev_trigger_devs, $devpath;
481
45d45a63
DC
482 my $osd_size = $disklist->{$devname}->{size};
483 my $size_map = {
484 db => int($osd_size / 10), # 10% of OSD
485 wal => int($osd_size / 100), # 1% of OSD
486 };
487
488 my $sizes;
489 foreach my $type ( qw(db wal) ) {
490 my $fallback_size = $size_map->{$type};
970f96fd 491 my $d = $devs->{"${type}_dev"};
45d45a63
DC
492 next if !$d;
493
494 # size was not set via api, getting from config/fallback
495 if (!defined($d->{size})) {
496 $sizes = PVE::Ceph::Tools::get_db_wal_sizes() if !$sizes;
497 $d->{size} = $sizes->{$type} // $fallback_size;
498 }
499 print "creating block.$type on '$d->{dev}'\n";
500 my $name = $d->{name};
501 my $part_or_lv = $create_part_or_lv->($disklist->{$name}, $d->{size}, "osd-$type");
79fa41a2 502
45d45a63
DC
503 print "using '$part_or_lv' for block.$type\n";
504 push @$cmd, "--block.$type", $part_or_lv;
79fa41a2
DC
505 }
506
7783f755 507 push @$cmd, '--data', $devpath;
4ce04578 508 push @$cmd, '--dmcrypt' if $param->{encrypted};
79fa41a2 509
683a3563 510 PVE::Diskmanage::wipe_blockdev($devpath);
79fa41a2 511
cffeb115
FE
512 if (PVE::Diskmanage::is_partition($devpath)) {
513 eval { PVE::Diskmanage::change_parttype($devpath, '8E00'); };
514 warn $@ if $@;
515 }
516
7783f755 517 run_command($cmd);
45d602f2
FE
518
519 # FIXME: Remove once we depend on systemd >= v249.
520 # Work around udev bug https://github.com/systemd/systemd/issues/18525 to ensure the
521 # udev database is updated.
522 eval { run_command(['udevadm', 'trigger', @udev_trigger_devs]); };
523 warn $@ if $@;
7783f755 524 });
79fa41a2
DC
525 };
526
e2565956 527 return $rpcenv->fork_worker('cephcreateosd', $devs->{dev}->{name}, $authuser, $worker);
79fa41a2
DC
528 }});
529
e907f822
AL
530my $OSD_DEV_RETURN_PROPS = {
531 device => {
532 type => 'string',
533 enum => ['block', 'db', 'wal'],
534 description => 'Kind of OSD device',
535 },
536 dev_node => {
537 type => 'string',
538 description => 'Device node',
539 },
540 devices => {
541 type => 'string',
542 description => 'Physical disks used',
543 },
544 size => {
545 type => 'integer',
546 description => 'Size in bytes',
547 },
548 support_discard => {
549 type => 'boolean',
550 description => 'Discard support of the physical device',
551 },
552 type => {
553 type => 'string',
554 description => 'Type of device. For example, hdd or ssd',
555 },
556};
557
558__PACKAGE__->register_method ({
559 name => 'osdindex',
560 path => '{osdid}',
561 method => 'GET',
562 permissions => { user => 'all' },
563 description => "OSD index.",
564 parameters => {
565 additionalProperties => 0,
566 properties => {
567 node => get_standard_option('pve-node'),
568 osdid => {
569 description => 'OSD ID',
570 type => 'integer',
571 },
572 },
573 },
574 returns => {
575 type => 'array',
576 items => {
577 type => "object",
578 properties => {},
579 },
580 links => [ { rel => 'child', href => "{name}" } ],
581 },
582 code => sub {
583 my ($param) = @_;
584
585 my $result = [
586 { name => 'metadata' },
587 { name => 'lv-info' },
588 ];
589
590 return $result;
591 }});
592
593__PACKAGE__->register_method ({
594 name => 'osddetails',
595 path => '{osdid}/metadata',
596 method => 'GET',
597 description => "Get OSD details",
598 proxyto => 'node',
599 protected => 1,
600 permissions => {
601 check => ['perm', '/', [ 'Sys.Audit' ], any => 1],
602 },
603 parameters => {
604 additionalProperties => 0,
605 properties => {
606 node => get_standard_option('pve-node'),
607 osdid => {
608 description => 'OSD ID',
609 type => 'integer',
610 },
611 },
612 },
613 returns => {
614 type => 'object',
615 properties => {
616 osd => {
617 type => 'object',
618 description => 'General information about the OSD',
619 properties => {
620 hostname => {
621 type => 'string',
622 description => 'Name of the host containing the OSD.',
623 },
624 id => {
625 type => 'integer',
626 description => 'ID of the OSD.',
627 },
628 mem_usage => {
629 type => 'integer',
630 description => 'Memory usage of the OSD service.',
631 },
632 osd_data => {
633 type => 'string',
634 description => "Path to the OSD's data directory.",
635 },
636 osd_objectstore => {
637 type => 'string',
638 description => 'The type of object store used.',
639 },
640 pid => {
641 type => 'integer',
642 description => 'OSD process ID.',
643 },
644 version => {
645 type => 'string',
646 description => 'Ceph version of the OSD service.',
647 },
648 front_addr => {
649 type => 'string',
650 description => 'Address and port used to talk to clients and monitors.',
651 },
652 back_addr => {
653 type => 'string',
654 description => 'Address and port used to talk to other OSDs.',
655 },
656 hb_front_addr => {
657 type => 'string',
658 description => 'Heartbeat address and port for clients and monitors.',
659 },
660 hb_back_addr => {
661 type => 'string',
662 description => 'Heartbeat address and port for other OSDs.',
663 },
664 },
665 },
666 devices => {
667 type => 'array',
668 description => 'Array containing data about devices',
669 items => {
670 type => "object",
671 properties => $OSD_DEV_RETURN_PROPS,
672 },
673 }
674 }
675 },
676 code => sub {
677 my ($param) = @_;
678
679 PVE::Ceph::Tools::check_ceph_inited();
680
681 my $osdid = $param->{osdid};
682 my $rados = PVE::RADOS->new();
683 my $metadata = $rados->mon_command({ prefix => 'osd metadata', id => int($osdid) });
684
685 die "OSD '${osdid}' does not exists on host '${nodename}'\n"
686 if $nodename ne $metadata->{hostname};
687
688 my $raw = '';
689 my $pid;
e907f822
AL
690 my $parser = sub {
691 my $line = shift;
692 if ($line =~ m/^MainPID=([0-9]*)$/) {
693 $pid = $1;
e907f822
AL
694 }
695 };
696
697 my $cmd = [
698 '/bin/systemctl',
699 'show',
700 "ceph-osd\@${osdid}.service",
701 '--property',
808eb12f 702 'MainPID',
e907f822
AL
703 ];
704 run_command($cmd, errmsg => 'fetching OSD PID and memory usage failed', outfunc => $parser);
705
706 $pid = defined($pid) ? int($pid) : undef;
808eb12f
SH
707
708 my $memory = 0;
709 if ($pid && $pid > 0) {
710 open (my $SMAPS, '<', "/proc/$pid/smaps_rollup")
711 or die "failed to read PSS memory-stat from process - $!\n";
712
713 while (my $line = <$SMAPS>) {
714 if ($line =~ m/^Pss:\s+([0-9]+) kB$/) {
715 $memory = $1 * 1024;
716 last;
717 }
718 }
719
720 close $SMAPS;
721 }
e907f822
AL
722
723 my $data = {
724 osd => {
725 hostname => $metadata->{hostname},
726 id => $metadata->{id},
727 mem_usage => $memory,
728 osd_data => $metadata->{osd_data},
729 osd_objectstore => $metadata->{osd_objectstore},
730 pid => $pid,
731 version => "$metadata->{ceph_version_short} ($metadata->{ceph_release})",
732 front_addr => $metadata->{front_addr},
733 back_addr => $metadata->{back_addr},
734 hb_front_addr => $metadata->{hb_front_addr},
735 hb_back_addr => $metadata->{hb_back_addr},
736 },
737 };
738
739 $data->{devices} = [];
740
741 my $get_data = sub {
742 my ($dev, $prefix, $device) = @_;
743 push (
744 @{$data->{devices}},
745 {
746 dev_node => $metadata->{"${prefix}_${dev}_dev_node"},
747 physical_device => $metadata->{"${prefix}_${dev}_devices"},
748 size => int($metadata->{"${prefix}_${dev}_size"}),
749 support_discard => int($metadata->{"${prefix}_${dev}_support_discard"}),
750 type => $metadata->{"${prefix}_${dev}_type"},
751 device => $device,
752 }
753 );
754 };
755
756 $get_data->("bdev", "bluestore", "block");
757 $get_data->("db", "bluefs", "db") if $metadata->{bluefs_dedicated_db};
758 $get_data->("wal", "bluefs", "wal") if $metadata->{bluefs_dedicated_wal};
759
760 return $data;
761 }});
762
763__PACKAGE__->register_method ({
764 name => 'osdvolume',
765 path => '{osdid}/lv-info',
766 method => 'GET',
767 description => "Get OSD volume details",
768 proxyto => 'node',
769 protected => 1,
770 permissions => {
771 check => ['perm', '/', [ 'Sys.Audit' ], any => 1],
772 },
773 parameters => {
774 additionalProperties => 0,
775 properties => {
776 node => get_standard_option('pve-node'),
777 osdid => {
778 description => 'OSD ID',
779 type => 'integer',
780 },
781 type => {
782 description => 'OSD device type',
783 type => 'string',
784 enum => ['block', 'db', 'wal'],
785 default => 'block',
786 optional => 1,
787 },
788 },
789 },
790 returns => {
791 type => 'object',
792 properties => {
793 creation_time => {
794 type => 'string',
795 description => "Creation time as reported by `lvs`.",
796 },
797 lv_name => {
798 type => 'string',
799 description => 'Name of the logical volume (LV).',
800 },
801 lv_path => {
802 type => 'string',
803 description => 'Path to the logical volume (LV).',
804 },
805 lv_size => {
806 type => 'integer',
807 description => 'Size of the logical volume (LV).',
808 },
809 lv_uuid => {
810 type => 'string',
811 description => 'UUID of the logical volume (LV).',
812 },
813 vg_name => {
814 type => 'string',
815 description => 'Name of the volume group (VG).',
816 },
817 },
818 },
819 code => sub {
820 my ($param) = @_;
821
822 PVE::Ceph::Tools::check_ceph_inited();
823
824 my $osdid = $param->{osdid};
825 my $type = $param->{type} // 'block';
826
827 my $raw = '';
828 my $parser = sub { $raw .= shift };
829 my $cmd = ['/usr/sbin/ceph-volume', 'lvm', 'list', $osdid, '--format', 'json'];
830 run_command($cmd, errmsg => 'listing Ceph LVM volumes failed', outfunc => $parser);
831
832 my $result;
833 if ($raw =~ m/^(\{.*\})$/s) { #untaint
834 $result = JSON::decode_json($1);
835 } else {
836 die "got unexpected data from ceph-volume: '${raw}'\n";
837 }
838 if (!$result->{$osdid}) {
839 die "OSD '${osdid}' not found in 'ceph-volume lvm list' on node '${nodename}'.\n"
840 ."Maybe it was created before LVM became the default?\n";
841 }
842
843 my $lv_data = { map { $_->{type} => $_ } @{$result->{$osdid}} };
844 my $volume = $lv_data->{$type} || die "volume type '${type}' not found for OSD ${osdid}\n";
845
846 $raw = '';
847 $cmd = ['/sbin/lvs', $volume->{lv_path}, '--reportformat', 'json', '-o', 'lv_time'];
848 run_command($cmd, errmsg => 'listing logical volumes failed', outfunc => $parser);
849
850 if ($raw =~ m/(\{.*\})$/s) { #untaint, lvs has whitespace at beginning
851 $result = JSON::decode_json($1);
852 } else {
853 die "got unexpected data from lvs: '${raw}'\n";
854 }
855
856 my $data = { map { $_ => $volume->{$_} } qw(lv_name lv_path lv_uuid vg_name) };
857 $data->{lv_size} = int($volume->{lv_size});
858
859 $data->{creation_time} = @{$result->{report}}[0]->{lv}[0]->{lv_time};
860
861 return $data;
862 }});
863
220173e9
DJ
864# Check if $osdid belongs to $nodename
865# $tree ... rados osd tree (passing the tree makes it easy to test)
866sub osd_belongs_to_node {
867 my ($tree, $nodename, $osdid) = @_;
d7a63207 868 return 0 if !($tree && $tree->{nodes});
220173e9 869
d7a63207
TL
870 my $node_map = {};
871 for my $el (grep { defined($_->{type}) && $_->{type} eq 'host' } @{$tree->{nodes}}) {
872 my $name = $el->{name};
873 die "internal error: duplicate host name found '$name'\n" if $node_map->{$name};
874 $node_map->{$name} = $el;
875 }
220173e9 876
d7a63207
TL
877 my $osds = $node_map->{$nodename}->{children};
878 return 0 if !$osds;
220173e9 879
220173e9
DJ
880 return grep($_ == $osdid, @$osds);
881}
882
79fa41a2
DC
883__PACKAGE__->register_method ({
884 name => 'destroyosd',
885 path => '{osdid}',
886 method => 'DELETE',
887 description => "Destroy OSD",
888 proxyto => 'node',
889 protected => 1,
890 parameters => {
891 additionalProperties => 0,
892 properties => {
893 node => get_standard_option('pve-node'),
894 osdid => {
895 description => 'OSD ID',
896 type => 'integer',
897 },
898 cleanup => {
899 description => "If set, we remove partition table entries.",
900 type => 'boolean',
901 optional => 1,
902 default => 0,
903 },
904 },
905 },
906 returns => { type => 'string' },
907 code => sub {
908 my ($param) = @_;
909
910 my $rpcenv = PVE::RPCEnvironment::get();
911
912 my $authuser = $rpcenv->get_user();
913
914 PVE::Ceph::Tools::check_ceph_inited();
915
916 my $osdid = $param->{osdid};
5ebb945c 917 my $cleanup = $param->{cleanup};
79fa41a2
DC
918
919 my $rados = PVE::RADOS->new();
220173e9
DJ
920
921 my $osd_belongs_to_node = osd_belongs_to_node(
922 $rados->mon_command({ prefix => 'osd tree' }),
923 $param->{node},
924 $osdid,
925 );
926 die "OSD osd.$osdid does not belong to node $param->{node}!"
927 if !$osd_belongs_to_node;
928
017bb1a8 929 # dies if osdid is unknown
9cc5ac9e 930 my $osdstat = $get_osd_status->($rados, $osdid);
79fa41a2
DC
931
932 die "osd is in use (in == 1)\n" if $osdstat->{in};
933 #&$run_ceph_cmd(['osd', 'out', $osdid]);
934
017bb1a8 935 die "osd is still running (up == 1)\n" if $osdstat->{up};
79fa41a2
DC
936
937 my $osdsection = "osd.$osdid";
938
939 my $worker = sub {
940 my $upid = shift;
941
942 # reopen with longer timeout
943 $rados = PVE::RADOS->new(timeout => PVE::Ceph::Tools::get_config('long_rados_timeout'));
944
945 print "destroy OSD $osdsection\n";
946
947 eval {
948 PVE::Ceph::Services::ceph_service_cmd('stop', $osdsection);
949 PVE::Ceph::Services::ceph_service_cmd('disable', $osdsection);
950 };
951 warn $@ if $@;
952
953 print "Remove $osdsection from the CRUSH map\n";
954 $rados->mon_command({ prefix => "osd crush remove", name => $osdsection, format => 'plain' });
955
956 print "Remove the $osdsection authentication key.\n";
957 $rados->mon_command({ prefix => "auth del", entity => $osdsection, format => 'plain' });
958
959 print "Remove OSD $osdsection\n";
960 $rados->mon_command({ prefix => "osd rm", ids => [ $osdsection ], format => 'plain' });
961
962 # try to unmount from standard mount point
963 my $mountpoint = "/var/lib/ceph/osd/ceph-$osdid";
964
45d602f2
FE
965 # See FIXME below
966 my $udev_trigger_devs = {};
967
79fa41a2
DC
968 my $remove_partition = sub {
969 my ($part) = @_;
970
971 return if !$part || (! -b $part );
972 my $partnum = PVE::Diskmanage::get_partnum($part);
973 my $devpath = PVE::Diskmanage::get_blockdev($part);
974
45d602f2
FE
975 $udev_trigger_devs->{$devpath} = 1;
976
683a3563 977 PVE::Diskmanage::wipe_blockdev($part);
79fa41a2
DC
978 print "remove partition $part (disk '${devpath}', partnum $partnum)\n";
979 eval { run_command(['/sbin/sgdisk', '-d', $partnum, "${devpath}"]); };
980 warn $@ if $@;
79fa41a2
DC
981 };
982
9b44d03d
DC
983 my $osd_list = PVE::Ceph::Tools::ceph_volume_list();
984
b32e9255 985 if ($osd_list->{$osdid}) { # ceph-volume managed
79fa41a2 986
b32e9255 987 eval { PVE::Ceph::Tools::ceph_volume_zap($osdid, $cleanup) };
9b44d03d 988 warn $@ if $@;
5ebb945c
TL
989
990 if ($cleanup) {
9b44d03d 991 # try to remove pvs, but do not fail if it does not work
b32e9255
TL
992 for my $osd_part (@{$osd_list->{$osdid}}) {
993 for my $dev (@{$osd_part->{devices}}) {
c92fc8a1
SI
994 ($dev) = ($dev =~ m|^(/dev/[-_.a-zA-Z0-9\/]+)$|); #untaint
995
259b557c 996 eval { run_command(['/sbin/pvremove', $dev], errfunc => sub {}) };
b32e9255 997 warn $@ if $@;
45d602f2
FE
998
999 $udev_trigger_devs->{$dev} = 1;
b32e9255 1000 }
9b44d03d
DC
1001 }
1002 }
1003 } else {
1004 my $partitions_to_remove = [];
5ebb945c 1005 if ($cleanup) {
9b44d03d
DC
1006 if (my $mp = PVE::ProcFSTools::parse_proc_mounts()) {
1007 foreach my $line (@$mp) {
1008 my ($dev, $path, $fstype) = @$line;
1009 next if !($dev && $path && $fstype);
1010 next if $dev !~ m|^/dev/|;
1011
1012 if ($path eq $mountpoint) {
1013 abs_path($dev) =~ m|^(/.+)| or die "invalid dev: $dev\n";
1014 push @$partitions_to_remove, $1;
1015 last;
1016 }
1017 }
1018 }
1019
1020 foreach my $path (qw(journal block block.db block.wal)) {
1021 abs_path("$mountpoint/$path") =~ m|^(/.+)| or die "invalid path: $path\n";
1022 push @$partitions_to_remove, $1;
1023 }
79fa41a2 1024 }
79fa41a2 1025
9b44d03d
DC
1026 print "Unmount OSD $osdsection from $mountpoint\n";
1027 eval { run_command(['/bin/umount', $mountpoint]); };
1028 if (my $err = $@) {
1029 warn $err;
5ebb945c 1030 } elsif ($cleanup) {
9b44d03d
DC
1031 #be aware of the ceph udev rules which can remount.
1032 foreach my $part (@$partitions_to_remove) {
1033 $remove_partition->($part);
1034 }
79fa41a2 1035 }
79fa41a2 1036 }
45d602f2
FE
1037
1038 # FIXME: Remove once we depend on systemd >= v249.
1039 # Work around udev bug https://github.com/systemd/systemd/issues/18525 to ensure the
1040 # udev database is updated.
1041 if ($cleanup) {
1042 eval { run_command(['udevadm', 'trigger', keys $udev_trigger_devs->%*]); };
1043 warn $@ if $@;
1044 }
79fa41a2
DC
1045 };
1046
1047 return $rpcenv->fork_worker('cephdestroyosd', $osdsection, $authuser, $worker);
1048 }});
1049
1050__PACKAGE__->register_method ({
1051 name => 'in',
1052 path => '{osdid}/in',
1053 method => 'POST',
1054 description => "ceph osd in",
1055 proxyto => 'node',
1056 protected => 1,
1057 permissions => {
1058 check => ['perm', '/', [ 'Sys.Modify' ]],
1059 },
1060 parameters => {
1061 additionalProperties => 0,
1062 properties => {
1063 node => get_standard_option('pve-node'),
1064 osdid => {
1065 description => 'OSD ID',
1066 type => 'integer',
1067 },
1068 },
1069 },
1070 returns => { type => "null" },
1071 code => sub {
1072 my ($param) = @_;
1073
1074 PVE::Ceph::Tools::check_ceph_inited();
1075
1076 my $osdid = $param->{osdid};
1077
1078 my $rados = PVE::RADOS->new();
1079
9cc5ac9e 1080 $get_osd_status->($rados, $osdid); # osd exists?
79fa41a2
DC
1081
1082 my $osdsection = "osd.$osdid";
1083
1084 $rados->mon_command({ prefix => "osd in", ids => [ $osdsection ], format => 'plain' });
1085
1086 return undef;
1087 }});
1088
1089__PACKAGE__->register_method ({
1090 name => 'out',
1091 path => '{osdid}/out',
1092 method => 'POST',
1093 description => "ceph osd out",
1094 proxyto => 'node',
1095 protected => 1,
1096 permissions => {
1097 check => ['perm', '/', [ 'Sys.Modify' ]],
1098 },
1099 parameters => {
1100 additionalProperties => 0,
1101 properties => {
1102 node => get_standard_option('pve-node'),
1103 osdid => {
1104 description => 'OSD ID',
1105 type => 'integer',
1106 },
1107 },
1108 },
1109 returns => { type => "null" },
1110 code => sub {
1111 my ($param) = @_;
1112
1113 PVE::Ceph::Tools::check_ceph_inited();
1114
1115 my $osdid = $param->{osdid};
1116
1117 my $rados = PVE::RADOS->new();
1118
9cc5ac9e 1119 $get_osd_status->($rados, $osdid); # osd exists?
79fa41a2
DC
1120
1121 my $osdsection = "osd.$osdid";
1122
1123 $rados->mon_command({ prefix => "osd out", ids => [ $osdsection ], format => 'plain' });
1124
1125 return undef;
1126 }});
1127
b7701301
DC
1128__PACKAGE__->register_method ({
1129 name => 'scrub',
1130 path => '{osdid}/scrub',
1131 method => 'POST',
1132 description => "Instruct the OSD to scrub.",
1133 proxyto => 'node',
1134 protected => 1,
1135 permissions => {
1136 check => ['perm', '/', [ 'Sys.Modify' ]],
1137 },
1138 parameters => {
1139 additionalProperties => 0,
1140 properties => {
1141 node => get_standard_option('pve-node'),
1142 osdid => {
1143 description => 'OSD ID',
1144 type => 'integer',
1145 },
1146 deep => {
1147 description => 'If set, instructs a deep scrub instead of a normal one.',
1148 type => 'boolean',
1149 optional => 1,
1150 default => 0,
1151 },
1152 },
1153 },
1154 returns => { type => "null" },
1155 code => sub {
1156 my ($param) = @_;
1157
1158 PVE::Ceph::Tools::check_ceph_inited();
1159
1160 my $osdid = $param->{osdid};
1161 my $deep = $param->{deep} // 0;
1162
1163 my $rados = PVE::RADOS->new();
1164
9cc5ac9e 1165 $get_osd_status->($rados, $osdid); # osd exists?
b7701301 1166
9cc5ac9e 1167 my $prefix = $deep ? 'osd deep-scrub' : 'osd scrub';
b7701301
DC
1168 $rados->mon_command({ prefix => $prefix, who => $osdid });
1169
1170 return undef;
1171 }});
1172
79fa41a2 11731;