]>
Commit | Line | Data |
---|---|---|
1 | package PVE::API2::Ceph::OSD; | |
2 | ||
3 | use strict; | |
4 | use warnings; | |
5 | ||
6 | use Cwd qw(abs_path); | |
7 | use IO::File; | |
8 | use JSON; | |
9 | use UUID; | |
10 | ||
11 | use PVE::Ceph::Tools; | |
12 | use PVE::Ceph::Services; | |
13 | use PVE::CephConfig; | |
14 | use PVE::Cluster qw(cfs_read_file cfs_write_file); | |
15 | use PVE::Diskmanage; | |
16 | use PVE::Storage::LVMPlugin; | |
17 | use PVE::Exception qw(raise_param_exc); | |
18 | use PVE::JSONSchema qw(get_standard_option); | |
19 | use PVE::INotify; | |
20 | use PVE::RADOS; | |
21 | use PVE::RESTHandler; | |
22 | use PVE::RPCEnvironment; | |
23 | use PVE::Tools qw(run_command file_set_contents); | |
24 | use PVE::ProcFSTools; | |
25 | use PVE::Network; | |
26 | ||
27 | use base qw(PVE::RESTHandler); | |
28 | ||
29 | my $nodename = PVE::INotify::nodename(); | |
30 | ||
31 | my $get_osd_status = sub { | |
32 | my ($rados, $osdid) = @_; | |
33 | ||
34 | my $stat = $rados->mon_command({ prefix => 'osd dump' }); | |
35 | ||
36 | my $osdlist = $stat->{osds} || []; | |
37 | ||
38 | my $flags = $stat->{flags} || undef; | |
39 | ||
40 | my $osdstat; | |
41 | foreach my $d (@$osdlist) { | |
42 | $osdstat->{$d->{osd}} = $d if defined($d->{osd}); | |
43 | } | |
44 | if (defined($osdid)) { | |
45 | die "no such OSD '$osdid'\n" if !$osdstat->{$osdid}; | |
46 | return $osdstat->{$osdid}; | |
47 | } | |
48 | ||
49 | return wantarray ? ($osdstat, $flags) : $osdstat; | |
50 | }; | |
51 | ||
52 | my $get_osd_usage = sub { | |
53 | my ($rados) = @_; | |
54 | ||
55 | my $osdlist = $rados->mon_command({ prefix => 'pg dump', dumpcontents => [ 'osds' ]}); | |
56 | if (!($osdlist && ref($osdlist))) { | |
57 | warn "got unknown result format for 'pg dump osds' command\n"; | |
58 | return []; | |
59 | } | |
60 | ||
61 | if (ref($osdlist) eq "HASH") { # since nautilus | |
62 | $osdlist = $osdlist->{osd_stats}; | |
63 | } | |
64 | ||
65 | my $osdstat = {}; | |
66 | for my $d (@$osdlist) { | |
67 | $osdstat->{$d->{osd}} = $d if defined($d->{osd}); | |
68 | } | |
69 | ||
70 | return $osdstat; | |
71 | }; | |
72 | ||
73 | __PACKAGE__->register_method ({ | |
74 | name => 'index', | |
75 | path => '', | |
76 | method => 'GET', | |
77 | description => "Get Ceph osd list/tree.", | |
78 | proxyto => 'node', | |
79 | protected => 1, | |
80 | permissions => { | |
81 | check => ['perm', '/', [ 'Sys.Audit', 'Datastore.Audit' ], any => 1], | |
82 | }, | |
83 | parameters => { | |
84 | additionalProperties => 0, | |
85 | properties => { | |
86 | node => get_standard_option('pve-node'), | |
87 | }, | |
88 | }, | |
89 | # fixme: return a list instead of extjs tree format ? | |
90 | returns => { | |
91 | type => "object", | |
92 | items => { | |
93 | type => "object", | |
94 | properties => { | |
95 | flags => { type => "string" }, | |
96 | root => { | |
97 | type => "object", | |
98 | description => "Tree with OSDs in the CRUSH map structure.", | |
99 | }, | |
100 | }, | |
101 | }, | |
102 | }, | |
103 | code => sub { | |
104 | my ($param) = @_; | |
105 | ||
106 | PVE::Ceph::Tools::check_ceph_inited(); | |
107 | ||
108 | my $rados = PVE::RADOS->new(); | |
109 | my $res = $rados->mon_command({ prefix => 'osd df', output_method => 'tree', }); | |
110 | ||
111 | die "no tree nodes found\n" if !($res && $res->{nodes}); | |
112 | ||
113 | my ($osdhash, $flags) = $get_osd_status->($rados); | |
114 | ||
115 | my $osd_usage = $get_osd_usage->($rados); | |
116 | ||
117 | my $osdmetadata_res = $rados->mon_command({ prefix => 'osd metadata' }); | |
118 | my $osdmetadata = { map { $_->{id} => $_ } @$osdmetadata_res }; | |
119 | ||
120 | my $hostversions = PVE::Ceph::Services::get_ceph_versions(); | |
121 | ||
122 | my $nodes = {}; | |
123 | my $newnodes = {}; | |
124 | foreach my $e (@{$res->{nodes}}) { | |
125 | my ($id, $name) = $e->@{qw(id name)}; | |
126 | ||
127 | $nodes->{$id} = $e; | |
128 | ||
129 | my $new = { | |
130 | id => $id, | |
131 | name => $name, | |
132 | type => $e->{type} | |
133 | }; | |
134 | ||
135 | foreach my $opt (qw(status crush_weight reweight device_class pgs)) { | |
136 | $new->{$opt} = $e->{$opt} if defined($e->{$opt}); | |
137 | } | |
138 | ||
139 | if (my $stat = $osdhash->{$id}) { | |
140 | $new->{in} = $stat->{in} if defined($stat->{in}); | |
141 | } | |
142 | ||
143 | if (my $stat = $osd_usage->{$id}) { | |
144 | $new->{total_space} = ($stat->{kb} || 1) * 1024; | |
145 | $new->{bytes_used} = ($stat->{kb_used} || 0) * 1024; | |
146 | $new->{percent_used} = ($new->{bytes_used}*100)/$new->{total_space}; | |
147 | if (my $d = $stat->{perf_stat}) { | |
148 | $new->{commit_latency_ms} = $d->{commit_latency_ms}; | |
149 | $new->{apply_latency_ms} = $d->{apply_latency_ms}; | |
150 | } | |
151 | } | |
152 | ||
153 | my $osdmd = $osdmetadata->{$id}; | |
154 | if ($e->{type} eq 'osd' && $osdmd) { | |
155 | if ($osdmd->{bluefs}) { | |
156 | $new->{osdtype} = 'bluestore'; | |
157 | $new->{blfsdev} = $osdmd->{bluestore_bdev_dev_node}; | |
158 | $new->{dbdev} = $osdmd->{bluefs_db_dev_node}; | |
159 | $new->{waldev} = $osdmd->{bluefs_wal_dev_node}; | |
160 | } else { | |
161 | $new->{osdtype} = 'filestore'; | |
162 | } | |
163 | for my $field (qw(ceph_version ceph_version_short)) { | |
164 | $new->{$field} = $osdmd->{$field} if $osdmd->{$field}; | |
165 | } | |
166 | } | |
167 | ||
168 | $newnodes->{$id} = $new; | |
169 | } | |
170 | ||
171 | foreach my $e (@{$res->{nodes}}) { | |
172 | my ($id, $name) = $e->@{qw(id name)}; | |
173 | my $new = $newnodes->{$id}; | |
174 | ||
175 | if ($e->{children} && scalar(@{$e->{children}})) { | |
176 | $new->{children} = []; | |
177 | $new->{leaf} = 0; | |
178 | foreach my $cid (@{$e->{children}}) { | |
179 | $nodes->{$cid}->{parent} = $id; | |
180 | if ($nodes->{$cid}->{type} eq 'osd' && $e->{type} eq 'host') { | |
181 | $newnodes->{$cid}->{host} = $name; | |
182 | } | |
183 | push @{$new->{children}}, $newnodes->{$cid}; | |
184 | } | |
185 | } else { | |
186 | $new->{leaf} = ($id >= 0) ? 1 : 0; | |
187 | } | |
188 | ||
189 | if ($name && $e->{type} eq 'host') { | |
190 | $new->{version} = $hostversions->{$name}->{version}->{str}; | |
191 | } | |
192 | } | |
193 | ||
194 | my $realroots = []; | |
195 | foreach my $e (@{$res->{nodes}}) { | |
196 | my $id = $e->{id}; | |
197 | if (!$nodes->{$id}->{parent}) { | |
198 | push @$realroots, $newnodes->{$id}; | |
199 | } | |
200 | } | |
201 | ||
202 | die "no root node\n" if scalar(@$realroots) < 1; | |
203 | ||
204 | my $data = { | |
205 | root => { | |
206 | leaf => 0, | |
207 | children => $realroots | |
208 | }, | |
209 | }; | |
210 | ||
211 | $data->{flags} = $flags if $flags; # we want this for the noout flag | |
212 | ||
213 | return $data; | |
214 | }}); | |
215 | ||
216 | __PACKAGE__->register_method ({ | |
217 | name => 'createosd', | |
218 | path => '', | |
219 | method => 'POST', | |
220 | description => "Create OSD", | |
221 | proxyto => 'node', | |
222 | protected => 1, | |
223 | parameters => { | |
224 | additionalProperties => 0, | |
225 | properties => { | |
226 | node => get_standard_option('pve-node'), | |
227 | dev => { | |
228 | description => "Block device name.", | |
229 | type => 'string', | |
230 | }, | |
231 | db_dev => { | |
232 | description => "Block device name for block.db.", | |
233 | optional => 1, | |
234 | type => 'string', | |
235 | }, | |
236 | db_dev_size => { | |
237 | description => "Size in GiB for block.db.", | |
238 | verbose_description => "If a block.db is requested but the size is not given, ". | |
239 | "will be automatically selected by: bluestore_block_db_size from the ". | |
240 | "ceph database (osd or global section) or config (osd or global section)". | |
241 | "in that order. If this is not available, it will be sized 10% of the size ". | |
242 | "of the OSD device. Fails if the available size is not enough.", | |
243 | optional => 1, | |
244 | type => 'number', | |
245 | default => 'bluestore_block_db_size or 10% of OSD size', | |
246 | requires => 'db_dev', | |
247 | minimum => 1.0, | |
248 | }, | |
249 | wal_dev => { | |
250 | description => "Block device name for block.wal.", | |
251 | optional => 1, | |
252 | type => 'string', | |
253 | }, | |
254 | wal_dev_size => { | |
255 | description => "Size in GiB for block.wal.", | |
256 | verbose_description => "If a block.wal is requested but the size is not given, ". | |
257 | "will be automatically selected by: bluestore_block_wal_size from the ". | |
258 | "ceph database (osd or global section) or config (osd or global section)". | |
259 | "in that order. If this is not available, it will be sized 1% of the size ". | |
260 | "of the OSD device. Fails if the available size is not enough.", | |
261 | optional => 1, | |
262 | minimum => 0.5, | |
263 | default => 'bluestore_block_wal_size or 1% of OSD size', | |
264 | requires => 'wal_dev', | |
265 | type => 'number', | |
266 | }, | |
267 | encrypted => { | |
268 | type => 'boolean', | |
269 | optional => 1, | |
270 | default => 0, | |
271 | description => "Enables encryption of the OSD." | |
272 | }, | |
273 | 'crush-device-class' => { | |
274 | optional => 1, | |
275 | type => 'string', | |
276 | description => "Set the device class of the OSD in crush." | |
277 | }, | |
278 | }, | |
279 | }, | |
280 | returns => { type => 'string' }, | |
281 | code => sub { | |
282 | my ($param) = @_; | |
283 | ||
284 | my $rpcenv = PVE::RPCEnvironment::get(); | |
285 | ||
286 | my $authuser = $rpcenv->get_user(); | |
287 | ||
288 | # test basic requirements | |
289 | PVE::Ceph::Tools::check_ceph_inited(); | |
290 | PVE::Ceph::Tools::setup_pve_symlinks(); | |
291 | PVE::Ceph::Tools::check_ceph_installed('ceph_osd'); | |
292 | PVE::Ceph::Tools::check_ceph_installed('ceph_volume'); | |
293 | ||
294 | # extract parameter info and fail if a device is set more than once | |
295 | my $devs = {}; | |
296 | ||
297 | my $ceph_conf = cfs_read_file('ceph.conf'); | |
298 | ||
299 | my $osd_network = $ceph_conf->{global}->{cluster_network}; | |
300 | $osd_network //= $ceph_conf->{global}->{public_network}; # fallback | |
301 | ||
302 | if ($osd_network) { # check only if something is configured | |
303 | my $cluster_net_ips = PVE::Network::get_local_ip_from_cidr($osd_network); | |
304 | if (scalar(@$cluster_net_ips) < 1) { | |
305 | my $osd_net_obj = PVE::Network::IP_from_cidr($osd_network); | |
306 | my $osd_base_cidr = $osd_net_obj->{ip} . "/" . $osd_net_obj->{prefixlen}; | |
307 | ||
308 | die "No address from ceph cluster network (${osd_base_cidr}) found on node '$nodename'. ". | |
309 | "Check your network config.\n"; | |
310 | } | |
311 | } | |
312 | ||
313 | for my $type ( qw(dev db_dev wal_dev) ) { | |
314 | next if !$param->{$type}; | |
315 | ||
316 | my $type_dev = PVE::Diskmanage::verify_blockdev_path($param->{$type}); | |
317 | (my $type_devname = $type_dev) =~ s|/dev/||; | |
318 | ||
319 | raise_param_exc({ $type => "cannot chose '$type_dev' for more than one type." }) | |
320 | if grep { $_->{name} eq $type_devname } values %$devs; | |
321 | ||
322 | $devs->{$type} = { | |
323 | dev => $type_dev, | |
324 | name => $type_devname, | |
325 | }; | |
326 | ||
327 | if (my $size = $param->{"${type}_size"}) { | |
328 | $devs->{$type}->{size} = PVE::Tools::convert_size($size, 'gb' => 'b') ; | |
329 | } | |
330 | } | |
331 | ||
332 | my $test_disk_requirements = sub { | |
333 | my ($disklist) = @_; | |
334 | ||
335 | my $dev = $devs->{dev}->{dev}; | |
336 | my $devname = $devs->{dev}->{name}; | |
337 | die "unable to get device info for '$dev'\n" if !$disklist->{$devname}; | |
338 | die "device '$dev' is already in use\n" if $disklist->{$devname}->{used}; | |
339 | ||
340 | for my $type ( qw(db_dev wal_dev) ) { | |
341 | my $d = $devs->{$type}; | |
342 | next if !$d; | |
343 | my $name = $d->{name}; | |
344 | my $info = $disklist->{$name}; | |
345 | die "unable to get device info for '$d->{dev}' for type $type\n" if !$disklist->{$name}; | |
346 | if (my $usage = $info->{used}) { | |
347 | if ($usage eq 'partitions') { | |
348 | die "device '$d->{dev}' is not GPT partitioned\n" if !$info->{gpt}; | |
349 | } elsif ($usage ne 'LVM') { | |
350 | die "device '$d->{dev}' is already in use and has no LVM on it\n"; | |
351 | } | |
352 | } | |
353 | } | |
354 | }; | |
355 | ||
356 | ||
357 | # test disk requirements early | |
358 | my $devlist = [ map { $_->{name} } values %$devs ]; | |
359 | my $disklist = PVE::Diskmanage::get_disks($devlist, 1, 1); | |
360 | $test_disk_requirements->($disklist); | |
361 | ||
362 | # get necessary ceph infos | |
363 | my $rados = PVE::RADOS->new(); | |
364 | my $monstat = $rados->mon_command({ prefix => 'quorum_status' }); | |
365 | ||
366 | die "unable to get fsid\n" if !$monstat->{monmap} || !$monstat->{monmap}->{fsid}; | |
367 | my $fsid = $monstat->{monmap}->{fsid}; | |
368 | $fsid = $1 if $fsid =~ m/^([0-9a-f\-]+)$/; | |
369 | ||
370 | my $ceph_bootstrap_osd_keyring = PVE::Ceph::Tools::get_config('ceph_bootstrap_osd_keyring'); | |
371 | ||
372 | if (! -f $ceph_bootstrap_osd_keyring && $ceph_conf->{global}->{auth_client_required} eq 'cephx') { | |
373 | my $bindata = $rados->mon_command({ | |
374 | prefix => 'auth get-or-create', | |
375 | entity => 'client.bootstrap-osd', | |
376 | caps => [ | |
377 | 'mon' => 'allow profile bootstrap-osd' | |
378 | ], | |
379 | format => 'plain', | |
380 | }); | |
381 | file_set_contents($ceph_bootstrap_osd_keyring, $bindata); | |
382 | }; | |
383 | ||
384 | # See FIXME below | |
385 | my @udev_trigger_devs = (); | |
386 | ||
387 | my $create_part_or_lv = sub { | |
388 | my ($dev, $size, $type) = @_; | |
389 | ||
390 | $size =~ m/^(\d+)$/ or die "invalid size '$size'\n"; | |
391 | $size = $1; | |
392 | ||
393 | die "'$dev->{devpath}' is smaller than requested size '$size' bytes\n" | |
394 | if $dev->{size} < $size; | |
395 | ||
396 | # sgdisk and lvcreate can only sizes divisible by 512b | |
397 | # so we round down to the nearest kb | |
398 | $size = PVE::Tools::convert_size($size, 'b' => 'kb', 1); | |
399 | ||
400 | if (!$dev->{used}) { | |
401 | # create pv,vg,lv | |
402 | ||
403 | my $vg = "ceph-" . UUID::uuid(); | |
404 | my $lv = $type . "-" . UUID::uuid(); | |
405 | ||
406 | PVE::Storage::LVMPlugin::lvm_create_volume_group($dev->{devpath}, $vg); | |
407 | PVE::Storage::LVMPlugin::lvcreate($vg, $lv, "${size}k"); | |
408 | ||
409 | if (PVE::Diskmanage::is_partition($dev->{devpath})) { | |
410 | eval { PVE::Diskmanage::change_parttype($dev->{devpath}, '8E00'); }; | |
411 | warn $@ if $@; | |
412 | } | |
413 | ||
414 | push @udev_trigger_devs, $dev->{devpath}; | |
415 | ||
416 | return "$vg/$lv"; | |
417 | ||
418 | } elsif ($dev->{used} eq 'LVM') { | |
419 | # check pv/vg and create lv | |
420 | ||
421 | my $vgs = PVE::Storage::LVMPlugin::lvm_vgs(1); | |
422 | my $vg; | |
423 | for my $vgname ( sort keys %$vgs ) { | |
424 | next if $vgname !~ /^ceph-/; | |
425 | ||
426 | for my $pv ( @{$vgs->{$vgname}->{pvs}} ) { | |
427 | next if $pv->{name} ne $dev->{devpath}; | |
428 | $vg = $vgname; | |
429 | last; | |
430 | } | |
431 | last if $vg; | |
432 | } | |
433 | ||
434 | die "no ceph vg found on '$dev->{devpath}'\n" if !$vg; | |
435 | die "vg '$vg' has not enough free space\n" if $vgs->{$vg}->{free} < $size; | |
436 | ||
437 | my $lv = $type . "-" . UUID::uuid(); | |
438 | ||
439 | PVE::Storage::LVMPlugin::lvcreate($vg, $lv, "${size}k"); | |
440 | ||
441 | return "$vg/$lv"; | |
442 | ||
443 | } elsif ($dev->{used} eq 'partitions' && $dev->{gpt}) { | |
444 | # create new partition at the end | |
445 | my $parttypes = { | |
446 | 'osd-db' => '30CD0809-C2B2-499C-8879-2D6B78529876', | |
447 | 'osd-wal' => '5CE17FCE-4087-4169-B7FF-056CC58473F9', | |
448 | }; | |
449 | ||
450 | my $part = PVE::Diskmanage::append_partition($dev->{devpath}, $size * 1024); | |
451 | ||
452 | if (my $parttype = $parttypes->{$type}) { | |
453 | eval { PVE::Diskmanage::change_parttype($part, $parttype); }; | |
454 | warn $@ if $@; | |
455 | } | |
456 | ||
457 | push @udev_trigger_devs, $part; | |
458 | return $part; | |
459 | } | |
460 | ||
461 | die "cannot use '$dev->{devpath}' for '$type'\n"; | |
462 | }; | |
463 | ||
464 | my $worker = sub { | |
465 | my $upid = shift; | |
466 | ||
467 | PVE::Diskmanage::locked_disk_action(sub { | |
468 | # update disklist and re-test requirements | |
469 | $disklist = PVE::Diskmanage::get_disks($devlist, 1, 1); | |
470 | $test_disk_requirements->($disklist); | |
471 | ||
472 | my $dev_class = $param->{'crush-device-class'}; | |
473 | my $cmd = ['ceph-volume', 'lvm', 'create', '--cluster-fsid', $fsid ]; | |
474 | push @$cmd, '--crush-device-class', $dev_class if $dev_class; | |
475 | ||
476 | my $devname = $devs->{dev}->{name}; | |
477 | my $devpath = $disklist->{$devname}->{devpath}; | |
478 | print "create OSD on $devpath (bluestore)\n"; | |
479 | ||
480 | push @udev_trigger_devs, $devpath; | |
481 | ||
482 | my $osd_size = $disklist->{$devname}->{size}; | |
483 | my $size_map = { | |
484 | db => int($osd_size / 10), # 10% of OSD | |
485 | wal => int($osd_size / 100), # 1% of OSD | |
486 | }; | |
487 | ||
488 | my $sizes; | |
489 | foreach my $type ( qw(db wal) ) { | |
490 | my $fallback_size = $size_map->{$type}; | |
491 | my $d = $devs->{"${type}_dev"}; | |
492 | next if !$d; | |
493 | ||
494 | # size was not set via api, getting from config/fallback | |
495 | if (!defined($d->{size})) { | |
496 | $sizes = PVE::Ceph::Tools::get_db_wal_sizes() if !$sizes; | |
497 | $d->{size} = $sizes->{$type} // $fallback_size; | |
498 | } | |
499 | print "creating block.$type on '$d->{dev}'\n"; | |
500 | my $name = $d->{name}; | |
501 | my $part_or_lv = $create_part_or_lv->($disklist->{$name}, $d->{size}, "osd-$type"); | |
502 | ||
503 | print "using '$part_or_lv' for block.$type\n"; | |
504 | push @$cmd, "--block.$type", $part_or_lv; | |
505 | } | |
506 | ||
507 | push @$cmd, '--data', $devpath; | |
508 | push @$cmd, '--dmcrypt' if $param->{encrypted}; | |
509 | ||
510 | PVE::Diskmanage::wipe_blockdev($devpath); | |
511 | ||
512 | if (PVE::Diskmanage::is_partition($devpath)) { | |
513 | eval { PVE::Diskmanage::change_parttype($devpath, '8E00'); }; | |
514 | warn $@ if $@; | |
515 | } | |
516 | ||
517 | run_command($cmd); | |
518 | ||
519 | # FIXME: Remove once we depend on systemd >= v249. | |
520 | # Work around udev bug https://github.com/systemd/systemd/issues/18525 to ensure the | |
521 | # udev database is updated. | |
522 | eval { run_command(['udevadm', 'trigger', @udev_trigger_devs]); }; | |
523 | warn $@ if $@; | |
524 | }); | |
525 | }; | |
526 | ||
527 | return $rpcenv->fork_worker('cephcreateosd', $devs->{dev}->{name}, $authuser, $worker); | |
528 | }}); | |
529 | ||
530 | my $OSD_DEV_RETURN_PROPS = { | |
531 | device => { | |
532 | type => 'string', | |
533 | enum => ['block', 'db', 'wal'], | |
534 | description => 'Kind of OSD device', | |
535 | }, | |
536 | dev_node => { | |
537 | type => 'string', | |
538 | description => 'Device node', | |
539 | }, | |
540 | devices => { | |
541 | type => 'string', | |
542 | description => 'Physical disks used', | |
543 | }, | |
544 | size => { | |
545 | type => 'integer', | |
546 | description => 'Size in bytes', | |
547 | }, | |
548 | support_discard => { | |
549 | type => 'boolean', | |
550 | description => 'Discard support of the physical device', | |
551 | }, | |
552 | type => { | |
553 | type => 'string', | |
554 | description => 'Type of device. For example, hdd or ssd', | |
555 | }, | |
556 | }; | |
557 | ||
558 | __PACKAGE__->register_method ({ | |
559 | name => 'osdindex', | |
560 | path => '{osdid}', | |
561 | method => 'GET', | |
562 | permissions => { user => 'all' }, | |
563 | description => "OSD index.", | |
564 | parameters => { | |
565 | additionalProperties => 0, | |
566 | properties => { | |
567 | node => get_standard_option('pve-node'), | |
568 | osdid => { | |
569 | description => 'OSD ID', | |
570 | type => 'integer', | |
571 | }, | |
572 | }, | |
573 | }, | |
574 | returns => { | |
575 | type => 'array', | |
576 | items => { | |
577 | type => "object", | |
578 | properties => {}, | |
579 | }, | |
580 | links => [ { rel => 'child', href => "{name}" } ], | |
581 | }, | |
582 | code => sub { | |
583 | my ($param) = @_; | |
584 | ||
585 | my $result = [ | |
586 | { name => 'metadata' }, | |
587 | { name => 'lv-info' }, | |
588 | ]; | |
589 | ||
590 | return $result; | |
591 | }}); | |
592 | ||
593 | __PACKAGE__->register_method ({ | |
594 | name => 'osddetails', | |
595 | path => '{osdid}/metadata', | |
596 | method => 'GET', | |
597 | description => "Get OSD details", | |
598 | proxyto => 'node', | |
599 | protected => 1, | |
600 | permissions => { | |
601 | check => ['perm', '/', [ 'Sys.Audit' ], any => 1], | |
602 | }, | |
603 | parameters => { | |
604 | additionalProperties => 0, | |
605 | properties => { | |
606 | node => get_standard_option('pve-node'), | |
607 | osdid => { | |
608 | description => 'OSD ID', | |
609 | type => 'integer', | |
610 | }, | |
611 | }, | |
612 | }, | |
613 | returns => { | |
614 | type => 'object', | |
615 | properties => { | |
616 | osd => { | |
617 | type => 'object', | |
618 | description => 'General information about the OSD', | |
619 | properties => { | |
620 | hostname => { | |
621 | type => 'string', | |
622 | description => 'Name of the host containing the OSD.', | |
623 | }, | |
624 | id => { | |
625 | type => 'integer', | |
626 | description => 'ID of the OSD.', | |
627 | }, | |
628 | mem_usage => { | |
629 | type => 'integer', | |
630 | description => 'Memory usage of the OSD service.', | |
631 | }, | |
632 | osd_data => { | |
633 | type => 'string', | |
634 | description => "Path to the OSD's data directory.", | |
635 | }, | |
636 | osd_objectstore => { | |
637 | type => 'string', | |
638 | description => 'The type of object store used.', | |
639 | }, | |
640 | pid => { | |
641 | type => 'integer', | |
642 | description => 'OSD process ID.', | |
643 | }, | |
644 | version => { | |
645 | type => 'string', | |
646 | description => 'Ceph version of the OSD service.', | |
647 | }, | |
648 | front_addr => { | |
649 | type => 'string', | |
650 | description => 'Address and port used to talk to clients and monitors.', | |
651 | }, | |
652 | back_addr => { | |
653 | type => 'string', | |
654 | description => 'Address and port used to talk to other OSDs.', | |
655 | }, | |
656 | hb_front_addr => { | |
657 | type => 'string', | |
658 | description => 'Heartbeat address and port for clients and monitors.', | |
659 | }, | |
660 | hb_back_addr => { | |
661 | type => 'string', | |
662 | description => 'Heartbeat address and port for other OSDs.', | |
663 | }, | |
664 | }, | |
665 | }, | |
666 | devices => { | |
667 | type => 'array', | |
668 | description => 'Array containing data about devices', | |
669 | items => { | |
670 | type => "object", | |
671 | properties => $OSD_DEV_RETURN_PROPS, | |
672 | }, | |
673 | } | |
674 | } | |
675 | }, | |
676 | code => sub { | |
677 | my ($param) = @_; | |
678 | ||
679 | PVE::Ceph::Tools::check_ceph_inited(); | |
680 | ||
681 | my $osdid = $param->{osdid}; | |
682 | my $rados = PVE::RADOS->new(); | |
683 | my $metadata = $rados->mon_command({ prefix => 'osd metadata', id => int($osdid) }); | |
684 | ||
685 | die "OSD '${osdid}' does not exists on host '${nodename}'\n" | |
686 | if $nodename ne $metadata->{hostname}; | |
687 | ||
688 | my $raw = ''; | |
689 | my $pid; | |
690 | my $parser = sub { | |
691 | my $line = shift; | |
692 | if ($line =~ m/^MainPID=([0-9]*)$/) { | |
693 | $pid = $1; | |
694 | } | |
695 | }; | |
696 | ||
697 | my $cmd = [ | |
698 | '/bin/systemctl', | |
699 | 'show', | |
700 | "ceph-osd\@${osdid}.service", | |
701 | '--property', | |
702 | 'MainPID', | |
703 | ]; | |
704 | run_command($cmd, errmsg => 'fetching OSD PID and memory usage failed', outfunc => $parser); | |
705 | ||
706 | $pid = defined($pid) ? int($pid) : undef; | |
707 | ||
708 | my $memory = 0; | |
709 | if ($pid && $pid > 0) { | |
710 | open (my $SMAPS, '<', "/proc/$pid/smaps_rollup") | |
711 | or die "failed to read PSS memory-stat from process - $!\n"; | |
712 | ||
713 | while (my $line = <$SMAPS>) { | |
714 | if ($line =~ m/^Pss:\s+([0-9]+) kB$/) { | |
715 | $memory = $1 * 1024; | |
716 | last; | |
717 | } | |
718 | } | |
719 | ||
720 | close $SMAPS; | |
721 | } | |
722 | ||
723 | my $data = { | |
724 | osd => { | |
725 | hostname => $metadata->{hostname}, | |
726 | id => $metadata->{id}, | |
727 | mem_usage => $memory, | |
728 | osd_data => $metadata->{osd_data}, | |
729 | osd_objectstore => $metadata->{osd_objectstore}, | |
730 | pid => $pid, | |
731 | version => "$metadata->{ceph_version_short} ($metadata->{ceph_release})", | |
732 | front_addr => $metadata->{front_addr}, | |
733 | back_addr => $metadata->{back_addr}, | |
734 | hb_front_addr => $metadata->{hb_front_addr}, | |
735 | hb_back_addr => $metadata->{hb_back_addr}, | |
736 | }, | |
737 | }; | |
738 | ||
739 | $data->{devices} = []; | |
740 | ||
741 | my $get_data = sub { | |
742 | my ($dev, $prefix, $device) = @_; | |
743 | push ( | |
744 | @{$data->{devices}}, | |
745 | { | |
746 | dev_node => $metadata->{"${prefix}_${dev}_dev_node"}, | |
747 | physical_device => $metadata->{"${prefix}_${dev}_devices"}, | |
748 | size => int($metadata->{"${prefix}_${dev}_size"}), | |
749 | support_discard => int($metadata->{"${prefix}_${dev}_support_discard"}), | |
750 | type => $metadata->{"${prefix}_${dev}_type"}, | |
751 | device => $device, | |
752 | } | |
753 | ); | |
754 | }; | |
755 | ||
756 | $get_data->("bdev", "bluestore", "block"); | |
757 | $get_data->("db", "bluefs", "db") if $metadata->{bluefs_dedicated_db}; | |
758 | $get_data->("wal", "bluefs", "wal") if $metadata->{bluefs_dedicated_wal}; | |
759 | ||
760 | return $data; | |
761 | }}); | |
762 | ||
763 | __PACKAGE__->register_method ({ | |
764 | name => 'osdvolume', | |
765 | path => '{osdid}/lv-info', | |
766 | method => 'GET', | |
767 | description => "Get OSD volume details", | |
768 | proxyto => 'node', | |
769 | protected => 1, | |
770 | permissions => { | |
771 | check => ['perm', '/', [ 'Sys.Audit' ], any => 1], | |
772 | }, | |
773 | parameters => { | |
774 | additionalProperties => 0, | |
775 | properties => { | |
776 | node => get_standard_option('pve-node'), | |
777 | osdid => { | |
778 | description => 'OSD ID', | |
779 | type => 'integer', | |
780 | }, | |
781 | type => { | |
782 | description => 'OSD device type', | |
783 | type => 'string', | |
784 | enum => ['block', 'db', 'wal'], | |
785 | default => 'block', | |
786 | optional => 1, | |
787 | }, | |
788 | }, | |
789 | }, | |
790 | returns => { | |
791 | type => 'object', | |
792 | properties => { | |
793 | creation_time => { | |
794 | type => 'string', | |
795 | description => "Creation time as reported by `lvs`.", | |
796 | }, | |
797 | lv_name => { | |
798 | type => 'string', | |
799 | description => 'Name of the logical volume (LV).', | |
800 | }, | |
801 | lv_path => { | |
802 | type => 'string', | |
803 | description => 'Path to the logical volume (LV).', | |
804 | }, | |
805 | lv_size => { | |
806 | type => 'integer', | |
807 | description => 'Size of the logical volume (LV).', | |
808 | }, | |
809 | lv_uuid => { | |
810 | type => 'string', | |
811 | description => 'UUID of the logical volume (LV).', | |
812 | }, | |
813 | vg_name => { | |
814 | type => 'string', | |
815 | description => 'Name of the volume group (VG).', | |
816 | }, | |
817 | }, | |
818 | }, | |
819 | code => sub { | |
820 | my ($param) = @_; | |
821 | ||
822 | PVE::Ceph::Tools::check_ceph_inited(); | |
823 | ||
824 | my $osdid = $param->{osdid}; | |
825 | my $type = $param->{type} // 'block'; | |
826 | ||
827 | my $raw = ''; | |
828 | my $parser = sub { $raw .= shift }; | |
829 | my $cmd = ['/usr/sbin/ceph-volume', 'lvm', 'list', $osdid, '--format', 'json']; | |
830 | run_command($cmd, errmsg => 'listing Ceph LVM volumes failed', outfunc => $parser); | |
831 | ||
832 | my $result; | |
833 | if ($raw =~ m/^(\{.*\})$/s) { #untaint | |
834 | $result = JSON::decode_json($1); | |
835 | } else { | |
836 | die "got unexpected data from ceph-volume: '${raw}'\n"; | |
837 | } | |
838 | if (!$result->{$osdid}) { | |
839 | die "OSD '${osdid}' not found in 'ceph-volume lvm list' on node '${nodename}'.\n" | |
840 | ."Maybe it was created before LVM became the default?\n"; | |
841 | } | |
842 | ||
843 | my $lv_data = { map { $_->{type} => $_ } @{$result->{$osdid}} }; | |
844 | my $volume = $lv_data->{$type} || die "volume type '${type}' not found for OSD ${osdid}\n"; | |
845 | ||
846 | $raw = ''; | |
847 | $cmd = ['/sbin/lvs', $volume->{lv_path}, '--reportformat', 'json', '-o', 'lv_time']; | |
848 | run_command($cmd, errmsg => 'listing logical volumes failed', outfunc => $parser); | |
849 | ||
850 | if ($raw =~ m/(\{.*\})$/s) { #untaint, lvs has whitespace at beginning | |
851 | $result = JSON::decode_json($1); | |
852 | } else { | |
853 | die "got unexpected data from lvs: '${raw}'\n"; | |
854 | } | |
855 | ||
856 | my $data = { map { $_ => $volume->{$_} } qw(lv_name lv_path lv_uuid vg_name) }; | |
857 | $data->{lv_size} = int($volume->{lv_size}); | |
858 | ||
859 | $data->{creation_time} = @{$result->{report}}[0]->{lv}[0]->{lv_time}; | |
860 | ||
861 | return $data; | |
862 | }}); | |
863 | ||
864 | # Check if $osdid belongs to $nodename | |
865 | # $tree ... rados osd tree (passing the tree makes it easy to test) | |
866 | sub osd_belongs_to_node { | |
867 | my ($tree, $nodename, $osdid) = @_; | |
868 | return 0 if !($tree && $tree->{nodes}); | |
869 | ||
870 | my $node_map = {}; | |
871 | for my $el (grep { defined($_->{type}) && $_->{type} eq 'host' } @{$tree->{nodes}}) { | |
872 | my $name = $el->{name}; | |
873 | die "internal error: duplicate host name found '$name'\n" if $node_map->{$name}; | |
874 | $node_map->{$name} = $el; | |
875 | } | |
876 | ||
877 | my $osds = $node_map->{$nodename}->{children}; | |
878 | return 0 if !$osds; | |
879 | ||
880 | return grep($_ == $osdid, @$osds); | |
881 | } | |
882 | ||
883 | __PACKAGE__->register_method ({ | |
884 | name => 'destroyosd', | |
885 | path => '{osdid}', | |
886 | method => 'DELETE', | |
887 | description => "Destroy OSD", | |
888 | proxyto => 'node', | |
889 | protected => 1, | |
890 | parameters => { | |
891 | additionalProperties => 0, | |
892 | properties => { | |
893 | node => get_standard_option('pve-node'), | |
894 | osdid => { | |
895 | description => 'OSD ID', | |
896 | type => 'integer', | |
897 | }, | |
898 | cleanup => { | |
899 | description => "If set, we remove partition table entries.", | |
900 | type => 'boolean', | |
901 | optional => 1, | |
902 | default => 0, | |
903 | }, | |
904 | }, | |
905 | }, | |
906 | returns => { type => 'string' }, | |
907 | code => sub { | |
908 | my ($param) = @_; | |
909 | ||
910 | my $rpcenv = PVE::RPCEnvironment::get(); | |
911 | ||
912 | my $authuser = $rpcenv->get_user(); | |
913 | ||
914 | PVE::Ceph::Tools::check_ceph_inited(); | |
915 | ||
916 | my $osdid = $param->{osdid}; | |
917 | my $cleanup = $param->{cleanup}; | |
918 | ||
919 | my $rados = PVE::RADOS->new(); | |
920 | ||
921 | my $osd_belongs_to_node = osd_belongs_to_node( | |
922 | $rados->mon_command({ prefix => 'osd tree' }), | |
923 | $param->{node}, | |
924 | $osdid, | |
925 | ); | |
926 | die "OSD osd.$osdid does not belong to node $param->{node}!" | |
927 | if !$osd_belongs_to_node; | |
928 | ||
929 | # dies if osdid is unknown | |
930 | my $osdstat = $get_osd_status->($rados, $osdid); | |
931 | ||
932 | die "osd is in use (in == 1)\n" if $osdstat->{in}; | |
933 | #&$run_ceph_cmd(['osd', 'out', $osdid]); | |
934 | ||
935 | die "osd is still running (up == 1)\n" if $osdstat->{up}; | |
936 | ||
937 | my $osdsection = "osd.$osdid"; | |
938 | ||
939 | my $worker = sub { | |
940 | my $upid = shift; | |
941 | ||
942 | # reopen with longer timeout | |
943 | $rados = PVE::RADOS->new(timeout => PVE::Ceph::Tools::get_config('long_rados_timeout')); | |
944 | ||
945 | print "destroy OSD $osdsection\n"; | |
946 | ||
947 | eval { | |
948 | PVE::Ceph::Services::ceph_service_cmd('stop', $osdsection); | |
949 | PVE::Ceph::Services::ceph_service_cmd('disable', $osdsection); | |
950 | }; | |
951 | warn $@ if $@; | |
952 | ||
953 | print "Remove $osdsection from the CRUSH map\n"; | |
954 | $rados->mon_command({ prefix => "osd crush remove", name => $osdsection, format => 'plain' }); | |
955 | ||
956 | print "Remove the $osdsection authentication key.\n"; | |
957 | $rados->mon_command({ prefix => "auth del", entity => $osdsection, format => 'plain' }); | |
958 | ||
959 | print "Remove OSD $osdsection\n"; | |
960 | $rados->mon_command({ prefix => "osd rm", ids => [ $osdsection ], format => 'plain' }); | |
961 | ||
962 | # try to unmount from standard mount point | |
963 | my $mountpoint = "/var/lib/ceph/osd/ceph-$osdid"; | |
964 | ||
965 | # See FIXME below | |
966 | my $udev_trigger_devs = {}; | |
967 | ||
968 | my $remove_partition = sub { | |
969 | my ($part) = @_; | |
970 | ||
971 | return if !$part || (! -b $part ); | |
972 | my $partnum = PVE::Diskmanage::get_partnum($part); | |
973 | my $devpath = PVE::Diskmanage::get_blockdev($part); | |
974 | ||
975 | $udev_trigger_devs->{$devpath} = 1; | |
976 | ||
977 | PVE::Diskmanage::wipe_blockdev($part); | |
978 | print "remove partition $part (disk '${devpath}', partnum $partnum)\n"; | |
979 | eval { run_command(['/sbin/sgdisk', '-d', $partnum, "${devpath}"]); }; | |
980 | warn $@ if $@; | |
981 | }; | |
982 | ||
983 | my $osd_list = PVE::Ceph::Tools::ceph_volume_list(); | |
984 | ||
985 | if ($osd_list->{$osdid}) { # ceph-volume managed | |
986 | ||
987 | eval { PVE::Ceph::Tools::ceph_volume_zap($osdid, $cleanup) }; | |
988 | warn $@ if $@; | |
989 | ||
990 | if ($cleanup) { | |
991 | # try to remove pvs, but do not fail if it does not work | |
992 | for my $osd_part (@{$osd_list->{$osdid}}) { | |
993 | for my $dev (@{$osd_part->{devices}}) { | |
994 | ($dev) = ($dev =~ m|^(/dev/[-_.a-zA-Z0-9\/]+)$|); #untaint | |
995 | ||
996 | eval { run_command(['/sbin/pvremove', $dev], errfunc => sub {}) }; | |
997 | warn $@ if $@; | |
998 | ||
999 | $udev_trigger_devs->{$dev} = 1; | |
1000 | } | |
1001 | } | |
1002 | } | |
1003 | } else { | |
1004 | my $partitions_to_remove = []; | |
1005 | if ($cleanup) { | |
1006 | if (my $mp = PVE::ProcFSTools::parse_proc_mounts()) { | |
1007 | foreach my $line (@$mp) { | |
1008 | my ($dev, $path, $fstype) = @$line; | |
1009 | next if !($dev && $path && $fstype); | |
1010 | next if $dev !~ m|^/dev/|; | |
1011 | ||
1012 | if ($path eq $mountpoint) { | |
1013 | abs_path($dev) =~ m|^(/.+)| or die "invalid dev: $dev\n"; | |
1014 | push @$partitions_to_remove, $1; | |
1015 | last; | |
1016 | } | |
1017 | } | |
1018 | } | |
1019 | ||
1020 | foreach my $path (qw(journal block block.db block.wal)) { | |
1021 | abs_path("$mountpoint/$path") =~ m|^(/.+)| or die "invalid path: $path\n"; | |
1022 | push @$partitions_to_remove, $1; | |
1023 | } | |
1024 | } | |
1025 | ||
1026 | print "Unmount OSD $osdsection from $mountpoint\n"; | |
1027 | eval { run_command(['/bin/umount', $mountpoint]); }; | |
1028 | if (my $err = $@) { | |
1029 | warn $err; | |
1030 | } elsif ($cleanup) { | |
1031 | #be aware of the ceph udev rules which can remount. | |
1032 | foreach my $part (@$partitions_to_remove) { | |
1033 | $remove_partition->($part); | |
1034 | } | |
1035 | } | |
1036 | } | |
1037 | ||
1038 | # FIXME: Remove once we depend on systemd >= v249. | |
1039 | # Work around udev bug https://github.com/systemd/systemd/issues/18525 to ensure the | |
1040 | # udev database is updated. | |
1041 | if ($cleanup) { | |
1042 | eval { run_command(['udevadm', 'trigger', keys $udev_trigger_devs->%*]); }; | |
1043 | warn $@ if $@; | |
1044 | } | |
1045 | }; | |
1046 | ||
1047 | return $rpcenv->fork_worker('cephdestroyosd', $osdsection, $authuser, $worker); | |
1048 | }}); | |
1049 | ||
1050 | __PACKAGE__->register_method ({ | |
1051 | name => 'in', | |
1052 | path => '{osdid}/in', | |
1053 | method => 'POST', | |
1054 | description => "ceph osd in", | |
1055 | proxyto => 'node', | |
1056 | protected => 1, | |
1057 | permissions => { | |
1058 | check => ['perm', '/', [ 'Sys.Modify' ]], | |
1059 | }, | |
1060 | parameters => { | |
1061 | additionalProperties => 0, | |
1062 | properties => { | |
1063 | node => get_standard_option('pve-node'), | |
1064 | osdid => { | |
1065 | description => 'OSD ID', | |
1066 | type => 'integer', | |
1067 | }, | |
1068 | }, | |
1069 | }, | |
1070 | returns => { type => "null" }, | |
1071 | code => sub { | |
1072 | my ($param) = @_; | |
1073 | ||
1074 | PVE::Ceph::Tools::check_ceph_inited(); | |
1075 | ||
1076 | my $osdid = $param->{osdid}; | |
1077 | ||
1078 | my $rados = PVE::RADOS->new(); | |
1079 | ||
1080 | $get_osd_status->($rados, $osdid); # osd exists? | |
1081 | ||
1082 | my $osdsection = "osd.$osdid"; | |
1083 | ||
1084 | $rados->mon_command({ prefix => "osd in", ids => [ $osdsection ], format => 'plain' }); | |
1085 | ||
1086 | return undef; | |
1087 | }}); | |
1088 | ||
1089 | __PACKAGE__->register_method ({ | |
1090 | name => 'out', | |
1091 | path => '{osdid}/out', | |
1092 | method => 'POST', | |
1093 | description => "ceph osd out", | |
1094 | proxyto => 'node', | |
1095 | protected => 1, | |
1096 | permissions => { | |
1097 | check => ['perm', '/', [ 'Sys.Modify' ]], | |
1098 | }, | |
1099 | parameters => { | |
1100 | additionalProperties => 0, | |
1101 | properties => { | |
1102 | node => get_standard_option('pve-node'), | |
1103 | osdid => { | |
1104 | description => 'OSD ID', | |
1105 | type => 'integer', | |
1106 | }, | |
1107 | }, | |
1108 | }, | |
1109 | returns => { type => "null" }, | |
1110 | code => sub { | |
1111 | my ($param) = @_; | |
1112 | ||
1113 | PVE::Ceph::Tools::check_ceph_inited(); | |
1114 | ||
1115 | my $osdid = $param->{osdid}; | |
1116 | ||
1117 | my $rados = PVE::RADOS->new(); | |
1118 | ||
1119 | $get_osd_status->($rados, $osdid); # osd exists? | |
1120 | ||
1121 | my $osdsection = "osd.$osdid"; | |
1122 | ||
1123 | $rados->mon_command({ prefix => "osd out", ids => [ $osdsection ], format => 'plain' }); | |
1124 | ||
1125 | return undef; | |
1126 | }}); | |
1127 | ||
1128 | __PACKAGE__->register_method ({ | |
1129 | name => 'scrub', | |
1130 | path => '{osdid}/scrub', | |
1131 | method => 'POST', | |
1132 | description => "Instruct the OSD to scrub.", | |
1133 | proxyto => 'node', | |
1134 | protected => 1, | |
1135 | permissions => { | |
1136 | check => ['perm', '/', [ 'Sys.Modify' ]], | |
1137 | }, | |
1138 | parameters => { | |
1139 | additionalProperties => 0, | |
1140 | properties => { | |
1141 | node => get_standard_option('pve-node'), | |
1142 | osdid => { | |
1143 | description => 'OSD ID', | |
1144 | type => 'integer', | |
1145 | }, | |
1146 | deep => { | |
1147 | description => 'If set, instructs a deep scrub instead of a normal one.', | |
1148 | type => 'boolean', | |
1149 | optional => 1, | |
1150 | default => 0, | |
1151 | }, | |
1152 | }, | |
1153 | }, | |
1154 | returns => { type => "null" }, | |
1155 | code => sub { | |
1156 | my ($param) = @_; | |
1157 | ||
1158 | PVE::Ceph::Tools::check_ceph_inited(); | |
1159 | ||
1160 | my $osdid = $param->{osdid}; | |
1161 | my $deep = $param->{deep} // 0; | |
1162 | ||
1163 | my $rados = PVE::RADOS->new(); | |
1164 | ||
1165 | $get_osd_status->($rados, $osdid); # osd exists? | |
1166 | ||
1167 | my $prefix = $deep ? 'osd deep-scrub' : 'osd scrub'; | |
1168 | $rados->mon_command({ prefix => $prefix, who => $osdid }); | |
1169 | ||
1170 | return undef; | |
1171 | }}); | |
1172 | ||
1173 | 1; |