]>
Commit | Line | Data |
---|---|---|
79fa41a2 DC |
1 | package PVE::API2::Ceph::OSD; |
2 | ||
3 | use strict; | |
4 | use warnings; | |
5 | ||
6 | use Cwd qw(abs_path); | |
7 | use IO::File; | |
7783f755 | 8 | use UUID; |
79fa41a2 DC |
9 | |
10 | use PVE::Ceph::Tools; | |
11 | use PVE::Ceph::Services; | |
12 | use PVE::CephConfig; | |
13 | use PVE::Cluster qw(cfs_read_file cfs_write_file); | |
14 | use PVE::Diskmanage; | |
7783f755 | 15 | use PVE::Storage::LVMPlugin; |
79fa41a2 DC |
16 | use PVE::Exception qw(raise_param_exc); |
17 | use PVE::JSONSchema qw(get_standard_option); | |
a05349ab | 18 | use PVE::INotify; |
79fa41a2 DC |
19 | use PVE::RADOS; |
20 | use PVE::RESTHandler; | |
21 | use PVE::RPCEnvironment; | |
22 | use PVE::Tools qw(run_command file_set_contents); | |
3c6aa3f4 | 23 | use PVE::ProcFSTools; |
05bd76ac | 24 | use PVE::Network; |
79fa41a2 DC |
25 | |
26 | use base qw(PVE::RESTHandler); | |
27 | ||
a05349ab TL |
28 | my $nodename = PVE::INotify::nodename(); |
29 | ||
79fa41a2 DC |
30 | my $get_osd_status = sub { |
31 | my ($rados, $osdid) = @_; | |
32 | ||
33 | my $stat = $rados->mon_command({ prefix => 'osd dump' }); | |
34 | ||
35 | my $osdlist = $stat->{osds} || []; | |
36 | ||
37 | my $flags = $stat->{flags} || undef; | |
38 | ||
39 | my $osdstat; | |
40 | foreach my $d (@$osdlist) { | |
41 | $osdstat->{$d->{osd}} = $d if defined($d->{osd}); | |
42 | } | |
43 | if (defined($osdid)) { | |
44 | die "no such OSD '$osdid'\n" if !$osdstat->{$osdid}; | |
45 | return $osdstat->{$osdid}; | |
46 | } | |
47 | ||
017bb1a8 | 48 | return wantarray ? ($osdstat, $flags) : $osdstat; |
79fa41a2 DC |
49 | }; |
50 | ||
51 | my $get_osd_usage = sub { | |
52 | my ($rados) = @_; | |
53 | ||
de6ad72f TL |
54 | my $osdlist = $rados->mon_command({ prefix => 'pg dump', dumpcontents => [ 'osds' ]}); |
55 | if (!($osdlist && ref($osdlist))) { | |
56 | warn "got unknown result format for 'pg dump osds' command\n"; | |
57 | return []; | |
91564b72 | 58 | } |
79fa41a2 | 59 | |
de6ad72f TL |
60 | if (ref($osdlist) eq "HASH") { # since nautilus |
61 | $osdlist = $osdlist->{osd_stats}; | |
62 | } | |
63 | ||
64 | my $osdstat = {}; | |
65 | for my $d (@$osdlist) { | |
79fa41a2 DC |
66 | $osdstat->{$d->{osd}} = $d if defined($d->{osd}); |
67 | } | |
68 | ||
69 | return $osdstat; | |
70 | }; | |
71 | ||
72 | __PACKAGE__->register_method ({ | |
73 | name => 'index', | |
74 | path => '', | |
75 | method => 'GET', | |
76 | description => "Get Ceph osd list/tree.", | |
77 | proxyto => 'node', | |
78 | protected => 1, | |
79 | permissions => { | |
80 | check => ['perm', '/', [ 'Sys.Audit', 'Datastore.Audit' ], any => 1], | |
81 | }, | |
82 | parameters => { | |
83 | additionalProperties => 0, | |
84 | properties => { | |
85 | node => get_standard_option('pve-node'), | |
86 | }, | |
87 | }, | |
88 | # fixme: return a list instead of extjs tree format ? | |
89 | returns => { | |
90 | type => "object", | |
91 | }, | |
92 | code => sub { | |
93 | my ($param) = @_; | |
94 | ||
95 | PVE::Ceph::Tools::check_ceph_inited(); | |
96 | ||
97 | my $rados = PVE::RADOS->new(); | |
98 | my $res = $rados->mon_command({ prefix => 'osd tree' }); | |
99 | ||
100 | die "no tree nodes found\n" if !($res && $res->{nodes}); | |
101 | ||
9cc5ac9e | 102 | my ($osdhash, $flags) = $get_osd_status->($rados); |
79fa41a2 | 103 | |
de6ad72f | 104 | my $osd_usage = $get_osd_usage->($rados); |
79fa41a2 | 105 | |
78c2d7f7 TL |
106 | my $osdmetadata_res = $rados->mon_command({ prefix => 'osd metadata' }); |
107 | my $osdmetadata = { map { $_->{id} => $_ } @$osdmetadata_res }; | |
79fa41a2 | 108 | |
cead98bd TL |
109 | my $hostversions = PVE::Cluster::get_node_kv("ceph-version"); |
110 | ||
79fa41a2 DC |
111 | my $nodes = {}; |
112 | my $newnodes = {}; | |
113 | foreach my $e (@{$res->{nodes}}) { | |
cead98bd TL |
114 | my ($id, $name) = $e->@{qw(id name)}; |
115 | ||
116 | $nodes->{$id} = $e; | |
79fa41a2 DC |
117 | |
118 | my $new = { | |
cead98bd TL |
119 | id => $id, |
120 | name => $name, | |
79fa41a2 DC |
121 | type => $e->{type} |
122 | }; | |
123 | ||
124 | foreach my $opt (qw(status crush_weight reweight device_class)) { | |
125 | $new->{$opt} = $e->{$opt} if defined($e->{$opt}); | |
126 | } | |
127 | ||
cead98bd | 128 | if (my $stat = $osdhash->{$id}) { |
79fa41a2 DC |
129 | $new->{in} = $stat->{in} if defined($stat->{in}); |
130 | } | |
131 | ||
cead98bd | 132 | if (my $stat = $osd_usage->{$id}) { |
79fa41a2 DC |
133 | $new->{total_space} = ($stat->{kb} || 1) * 1024; |
134 | $new->{bytes_used} = ($stat->{kb_used} || 0) * 1024; | |
135 | $new->{percent_used} = ($new->{bytes_used}*100)/$new->{total_space}; | |
136 | if (my $d = $stat->{perf_stat}) { | |
137 | $new->{commit_latency_ms} = $d->{commit_latency_ms}; | |
138 | $new->{apply_latency_ms} = $d->{apply_latency_ms}; | |
139 | } | |
140 | } | |
141 | ||
cead98bd | 142 | my $osdmd = $osdmetadata->{$id}; |
79fa41a2 DC |
143 | if ($e->{type} eq 'osd' && $osdmd) { |
144 | if ($osdmd->{bluefs}) { | |
145 | $new->{osdtype} = 'bluestore'; | |
146 | $new->{blfsdev} = $osdmd->{bluestore_bdev_dev_node}; | |
147 | $new->{dbdev} = $osdmd->{bluefs_db_dev_node}; | |
148 | $new->{waldev} = $osdmd->{bluefs_wal_dev_node}; | |
149 | } else { | |
150 | $new->{osdtype} = 'filestore'; | |
151 | } | |
e0297023 DC |
152 | for my $field (qw(ceph_version ceph_version_short)) { |
153 | $new->{$field} = $osdmd->{$field} if $osdmd->{$field}; | |
154 | } | |
79fa41a2 DC |
155 | } |
156 | ||
cead98bd | 157 | $newnodes->{$id} = $new; |
79fa41a2 DC |
158 | } |
159 | ||
160 | foreach my $e (@{$res->{nodes}}) { | |
cead98bd TL |
161 | my ($id, $name) = $e->@{qw(id name)}; |
162 | my $new = $newnodes->{$id}; | |
163 | ||
79fa41a2 DC |
164 | if ($e->{children} && scalar(@{$e->{children}})) { |
165 | $new->{children} = []; | |
166 | $new->{leaf} = 0; | |
167 | foreach my $cid (@{$e->{children}}) { | |
cead98bd TL |
168 | $nodes->{$cid}->{parent} = $id; |
169 | if ($nodes->{$cid}->{type} eq 'osd' && $e->{type} eq 'host') { | |
170 | $newnodes->{$cid}->{host} = $name; | |
79fa41a2 DC |
171 | } |
172 | push @{$new->{children}}, $newnodes->{$cid}; | |
173 | } | |
174 | } else { | |
cead98bd | 175 | $new->{leaf} = ($id >= 0) ? 1 : 0; |
79fa41a2 | 176 | } |
69ad2e53 | 177 | |
cead98bd | 178 | if ($name && $e->{type} eq 'host') { |
69ad2e53 DC |
179 | $new->{version} = $hostversions->{$name}; |
180 | } | |
79fa41a2 DC |
181 | } |
182 | ||
cead98bd | 183 | my $realroots = []; |
79fa41a2 | 184 | foreach my $e (@{$res->{nodes}}) { |
cead98bd TL |
185 | my $id = $e->{id}; |
186 | if (!$nodes->{$id}->{parent}) { | |
187 | push @$realroots, $newnodes->{$id}; | |
79fa41a2 DC |
188 | } |
189 | } | |
190 | ||
cead98bd | 191 | die "no root node\n" if scalar(@$realroots) < 1; |
79fa41a2 | 192 | |
cead98bd TL |
193 | my $data = { |
194 | root => { | |
195 | leaf => 0, | |
196 | children => $realroots | |
197 | }, | |
198 | versions => $hostversions, # for compatibility | |
199 | }; | |
79fa41a2 | 200 | |
cead98bd | 201 | $data->{flags} = $flags if $flags; # we want this for the noout flag |
79fa41a2 DC |
202 | |
203 | return $data; | |
204 | }}); | |
205 | ||
206 | __PACKAGE__->register_method ({ | |
207 | name => 'createosd', | |
208 | path => '', | |
209 | method => 'POST', | |
210 | description => "Create OSD", | |
211 | proxyto => 'node', | |
212 | protected => 1, | |
213 | parameters => { | |
214 | additionalProperties => 0, | |
215 | properties => { | |
216 | node => get_standard_option('pve-node'), | |
217 | dev => { | |
218 | description => "Block device name.", | |
219 | type => 'string', | |
220 | }, | |
7783f755 DC |
221 | db_dev => { |
222 | description => "Block device name for block.db.", | |
79fa41a2 DC |
223 | optional => 1, |
224 | type => 'string', | |
225 | }, | |
596bb7b1 | 226 | db_dev_size => { |
0e5f83ba TL |
227 | description => "Size in GiB for block.db.", |
228 | verbose_description => "If a block.db is requested but the size is not given, ". | |
229 | "will be automatically selected by: bluestore_block_db_size from the ". | |
7783f755 DC |
230 | "ceph database (osd or global section) or config (osd or global section)". |
231 | "in that order. If this is not available, it will be sized 10% of the size ". | |
232 | "of the OSD device. Fails if the available size is not enough.", | |
79fa41a2 | 233 | optional => 1, |
7783f755 | 234 | type => 'number', |
0e5f83ba | 235 | default => 'bluestore_block_db_size or 10% of OSD size', |
7783f755 DC |
236 | requires => 'db_dev', |
237 | minimum => 1.0, | |
79fa41a2 | 238 | }, |
7783f755 DC |
239 | wal_dev => { |
240 | description => "Block device name for block.wal.", | |
79fa41a2 | 241 | optional => 1, |
7783f755 | 242 | type => 'string', |
79fa41a2 | 243 | }, |
596bb7b1 | 244 | wal_dev_size => { |
0e5f83ba TL |
245 | description => "Size in GiB for block.wal.", |
246 | verbose_description => "If a block.wal is requested but the size is not given, ". | |
247 | "will be automatically selected by: bluestore_block_wal_size from the ". | |
7783f755 DC |
248 | "ceph database (osd or global section) or config (osd or global section)". |
249 | "in that order. If this is not available, it will be sized 1% of the size ". | |
250 | "of the OSD device. Fails if the available size is not enough.", | |
79fa41a2 | 251 | optional => 1, |
7783f755 | 252 | minimum => 0.5, |
0e5f83ba | 253 | default => 'bluestore_block_wal_size or 1% of OSD size', |
7783f755 DC |
254 | requires => 'wal_dev', |
255 | type => 'number', | |
79fa41a2 | 256 | }, |
4ce04578 DC |
257 | encrypted => { |
258 | type => 'boolean', | |
259 | optional => 1, | |
260 | default => 0, | |
261 | description => "Enables encryption of the OSD." | |
262 | }, | |
2184098e AA |
263 | 'crush-device-class' => { |
264 | optional => 1, | |
265 | type => 'string', | |
266 | description => "Set the device class of the OSD in crush." | |
267 | }, | |
79fa41a2 DC |
268 | }, |
269 | }, | |
270 | returns => { type => 'string' }, | |
271 | code => sub { | |
272 | my ($param) = @_; | |
273 | ||
274 | my $rpcenv = PVE::RPCEnvironment::get(); | |
275 | ||
276 | my $authuser = $rpcenv->get_user(); | |
277 | ||
45d45a63 | 278 | # test basic requirements |
79fa41a2 | 279 | PVE::Ceph::Tools::check_ceph_inited(); |
79fa41a2 | 280 | PVE::Ceph::Tools::setup_pve_symlinks(); |
79fa41a2 | 281 | PVE::Ceph::Tools::check_ceph_installed('ceph_osd'); |
7783f755 | 282 | PVE::Ceph::Tools::check_ceph_installed('ceph_volume'); |
79fa41a2 | 283 | |
45d45a63 DC |
284 | # extract parameter info and fail if a device is set more than once |
285 | my $devs = {}; | |
79fa41a2 | 286 | |
05bd76ac AL |
287 | my $ceph_conf = cfs_read_file('ceph.conf'); |
288 | ||
a05349ab TL |
289 | my $osd_network = $ceph_conf->{global}->{cluster_network}; |
290 | $osd_network //= $ceph_conf->{global}->{public_network}; # fallback | |
05bd76ac | 291 | |
a0ef509a DC |
292 | if ($osd_network) { # check only if something is configured |
293 | my $cluster_net_ips = PVE::Network::get_local_ip_from_cidr($osd_network); | |
294 | if (scalar(@$cluster_net_ips) < 1) { | |
295 | my $osd_net_obj = PVE::Network::IP_from_cidr($osd_network); | |
296 | my $osd_base_cidr = $osd_net_obj->{ip} . "/" . $osd_net_obj->{prefixlen}; | |
297 | ||
298 | die "No address from ceph cluster network (${osd_base_cidr}) found on node '$nodename'. ". | |
299 | "Check your network config.\n"; | |
300 | } | |
05bd76ac AL |
301 | } |
302 | ||
970f96fd TL |
303 | for my $type ( qw(dev db_dev wal_dev) ) { |
304 | next if !$param->{$type}; | |
0154e795 | 305 | |
970f96fd | 306 | my $type_dev = PVE::Diskmanage::verify_blockdev_path($param->{$type}); |
45d45a63 | 307 | (my $type_devname = $type_dev) =~ s|/dev/||; |
79fa41a2 | 308 | |
970f96fd | 309 | raise_param_exc({ $type => "cannot chose '$type_dev' for more than one type." }) |
45d45a63 | 310 | if grep { $_->{name} eq $type_devname } values %$devs; |
79fa41a2 | 311 | |
45d45a63 DC |
312 | $devs->{$type} = { |
313 | dev => $type_dev, | |
314 | name => $type_devname, | |
315 | }; | |
79fa41a2 | 316 | |
45d45a63 DC |
317 | if (my $size = $param->{"${type}_size"}) { |
318 | $devs->{$type}->{size} = PVE::Tools::convert_size($size, 'gb' => 'b') ; | |
319 | } | |
320 | } | |
79fa41a2 | 321 | |
e2565956 FE |
322 | my $test_disk_requirements = sub { |
323 | my ($disklist) = @_; | |
324 | ||
325 | my $dev = $devs->{dev}->{dev}; | |
326 | my $devname = $devs->{dev}->{name}; | |
327 | die "unable to get device info for '$dev'\n" if !$disklist->{$devname}; | |
328 | die "device '$dev' is already in use\n" if $disklist->{$devname}->{used}; | |
329 | ||
330 | for my $type ( qw(db_dev wal_dev) ) { | |
331 | my $d = $devs->{$type}; | |
332 | next if !$d; | |
333 | my $name = $d->{name}; | |
334 | my $info = $disklist->{$name}; | |
335 | die "unable to get device info for '$d->{dev}' for type $type\n" if !$disklist->{$name}; | |
336 | if (my $usage = $info->{used}) { | |
337 | if ($usage eq 'partitions') { | |
338 | die "device '$d->{dev}' is not GPT partitioned\n" if !$info->{gpt}; | |
339 | } elsif ($usage ne 'LVM') { | |
340 | die "device '$d->{dev}' is already in use and has no LVM on it\n"; | |
341 | } | |
385df838 DC |
342 | } |
343 | } | |
e2565956 FE |
344 | }; |
345 | ||
346 | ||
347 | # test disk requirements early | |
348 | my $devlist = [ map { $_->{name} } values %$devs ]; | |
349 | my $disklist = PVE::Diskmanage::get_disks($devlist, 1); | |
350 | $test_disk_requirements->($disklist); | |
0154e795 | 351 | |
45d45a63 | 352 | # get necessary ceph infos |
79fa41a2 | 353 | my $rados = PVE::RADOS->new(); |
e25dda25 | 354 | my $monstat = $rados->mon_command({ prefix => 'quorum_status' }); |
79fa41a2 | 355 | |
0154e795 | 356 | die "unable to get fsid\n" if !$monstat->{monmap} || !$monstat->{monmap}->{fsid}; |
79fa41a2 DC |
357 | my $fsid = $monstat->{monmap}->{fsid}; |
358 | $fsid = $1 if $fsid =~ m/^([0-9a-f\-]+)$/; | |
359 | ||
360 | my $ceph_bootstrap_osd_keyring = PVE::Ceph::Tools::get_config('ceph_bootstrap_osd_keyring'); | |
361 | ||
7712a4e1 | 362 | if (! -f $ceph_bootstrap_osd_keyring && $ceph_conf->{global}->{auth_client_required} eq 'cephx') { |
217dde83 DC |
363 | my $bindata = $rados->mon_command({ |
364 | prefix => 'auth get-or-create', | |
365 | entity => 'client.bootstrap-osd', | |
366 | caps => [ | |
367 | 'mon' => 'allow profile bootstrap-osd' | |
368 | ], | |
369 | format => 'plain', | |
370 | }); | |
79fa41a2 DC |
371 | file_set_contents($ceph_bootstrap_osd_keyring, $bindata); |
372 | }; | |
373 | ||
7783f755 DC |
374 | my $create_part_or_lv = sub { |
375 | my ($dev, $size, $type) = @_; | |
376 | ||
0154e795 TL |
377 | $size =~ m/^(\d+)$/ or die "invalid size '$size'\n"; |
378 | $size = $1; | |
7783f755 DC |
379 | |
380 | die "'$dev->{devpath}' is smaller than requested size '$size' bytes\n" | |
381 | if $dev->{size} < $size; | |
79fa41a2 | 382 | |
ab62d137 DC |
383 | # sgdisk and lvcreate can only sizes divisible by 512b |
384 | # so we round down to the nearest kb | |
385 | $size = PVE::Tools::convert_size($size, 'b' => 'kb', 1); | |
386 | ||
7783f755 DC |
387 | if (!$dev->{used}) { |
388 | # create pv,vg,lv | |
79fa41a2 | 389 | |
7783f755 DC |
390 | my $vg = "ceph-" . UUID::uuid(); |
391 | my $lv = $type . "-" . UUID::uuid(); | |
79fa41a2 | 392 | |
7783f755 | 393 | PVE::Storage::LVMPlugin::lvm_create_volume_group($dev->{devpath}, $vg); |
ab62d137 | 394 | PVE::Storage::LVMPlugin::lvcreate($vg, $lv, "${size}k"); |
79fa41a2 | 395 | |
7783f755 DC |
396 | return "$vg/$lv"; |
397 | ||
398 | } elsif ($dev->{used} eq 'LVM') { | |
399 | # check pv/vg and create lv | |
400 | ||
401 | my $vgs = PVE::Storage::LVMPlugin::lvm_vgs(1); | |
402 | my $vg; | |
403 | for my $vgname ( sort keys %$vgs ) { | |
404 | next if $vgname !~ /^ceph-/; | |
405 | ||
406 | for my $pv ( @{$vgs->{$vgname}->{pvs}} ) { | |
407 | next if $pv->{name} ne $dev->{devpath}; | |
408 | $vg = $vgname; | |
409 | last; | |
410 | } | |
411 | last if $vg; | |
412 | } | |
413 | ||
414 | die "no ceph vg found on '$dev->{devpath}'\n" if !$vg; | |
415 | die "vg '$vg' has not enough free space\n" if $vgs->{$vg}->{free} < $size; | |
416 | ||
afa09e02 | 417 | my $lv = $type . "-" . UUID::uuid(); |
7783f755 | 418 | |
ab62d137 | 419 | PVE::Storage::LVMPlugin::lvcreate($vg, $lv, "${size}k"); |
7783f755 DC |
420 | |
421 | return "$vg/$lv"; | |
422 | ||
3d7b3992 | 423 | } elsif ($dev->{used} eq 'partitions' && $dev->{gpt}) { |
7783f755 DC |
424 | # create new partition at the end |
425 | ||
ab62d137 | 426 | return PVE::Diskmanage::append_partition($dev->{devpath}, $size * 1024); |
7783f755 DC |
427 | } |
428 | ||
429 | die "cannot use '$dev->{devpath}' for '$type'\n"; | |
430 | }; | |
431 | ||
432 | my $worker = sub { | |
433 | my $upid = shift; | |
434 | ||
435 | PVE::Diskmanage::locked_disk_action(sub { | |
e2565956 | 436 | # update disklist and re-test requirements |
45d45a63 | 437 | $disklist = PVE::Diskmanage::get_disks($devlist, 1); |
e2565956 | 438 | $test_disk_requirements->($disklist); |
7783f755 | 439 | |
2184098e | 440 | my $dev_class = $param->{'crush-device-class'}; |
7783f755 | 441 | my $cmd = ['ceph-volume', 'lvm', 'create', '--cluster-fsid', $fsid ]; |
2184098e | 442 | push @$cmd, '--crush-device-class', $dev_class if $dev_class; |
79fa41a2 | 443 | |
e2565956 | 444 | my $devname = $devs->{dev}->{name}; |
45d45a63 | 445 | my $devpath = $disklist->{$devname}->{devpath}; |
79fa41a2 | 446 | print "create OSD on $devpath (bluestore)\n"; |
79fa41a2 | 447 | |
45d45a63 DC |
448 | my $osd_size = $disklist->{$devname}->{size}; |
449 | my $size_map = { | |
450 | db => int($osd_size / 10), # 10% of OSD | |
451 | wal => int($osd_size / 100), # 1% of OSD | |
452 | }; | |
453 | ||
454 | my $sizes; | |
455 | foreach my $type ( qw(db wal) ) { | |
456 | my $fallback_size = $size_map->{$type}; | |
970f96fd | 457 | my $d = $devs->{"${type}_dev"}; |
45d45a63 DC |
458 | next if !$d; |
459 | ||
460 | # size was not set via api, getting from config/fallback | |
461 | if (!defined($d->{size})) { | |
462 | $sizes = PVE::Ceph::Tools::get_db_wal_sizes() if !$sizes; | |
463 | $d->{size} = $sizes->{$type} // $fallback_size; | |
464 | } | |
465 | print "creating block.$type on '$d->{dev}'\n"; | |
466 | my $name = $d->{name}; | |
467 | my $part_or_lv = $create_part_or_lv->($disklist->{$name}, $d->{size}, "osd-$type"); | |
79fa41a2 | 468 | |
45d45a63 DC |
469 | print "using '$part_or_lv' for block.$type\n"; |
470 | push @$cmd, "--block.$type", $part_or_lv; | |
79fa41a2 DC |
471 | } |
472 | ||
7783f755 | 473 | push @$cmd, '--data', $devpath; |
4ce04578 | 474 | push @$cmd, '--dmcrypt' if $param->{encrypted}; |
79fa41a2 | 475 | |
7783f755 | 476 | PVE::Ceph::Tools::wipe_disks($devpath); |
79fa41a2 | 477 | |
7783f755 DC |
478 | run_command($cmd); |
479 | }); | |
79fa41a2 DC |
480 | }; |
481 | ||
e2565956 | 482 | return $rpcenv->fork_worker('cephcreateosd', $devs->{dev}->{name}, $authuser, $worker); |
79fa41a2 DC |
483 | }}); |
484 | ||
220173e9 DJ |
485 | # Check if $osdid belongs to $nodename |
486 | # $tree ... rados osd tree (passing the tree makes it easy to test) | |
487 | sub osd_belongs_to_node { | |
488 | my ($tree, $nodename, $osdid) = @_; | |
d7a63207 | 489 | return 0 if !($tree && $tree->{nodes}); |
220173e9 | 490 | |
d7a63207 TL |
491 | my $node_map = {}; |
492 | for my $el (grep { defined($_->{type}) && $_->{type} eq 'host' } @{$tree->{nodes}}) { | |
493 | my $name = $el->{name}; | |
494 | die "internal error: duplicate host name found '$name'\n" if $node_map->{$name}; | |
495 | $node_map->{$name} = $el; | |
496 | } | |
220173e9 | 497 | |
d7a63207 TL |
498 | my $osds = $node_map->{$nodename}->{children}; |
499 | return 0 if !$osds; | |
220173e9 | 500 | |
220173e9 DJ |
501 | return grep($_ == $osdid, @$osds); |
502 | } | |
503 | ||
79fa41a2 DC |
504 | __PACKAGE__->register_method ({ |
505 | name => 'destroyosd', | |
506 | path => '{osdid}', | |
507 | method => 'DELETE', | |
508 | description => "Destroy OSD", | |
509 | proxyto => 'node', | |
510 | protected => 1, | |
511 | parameters => { | |
512 | additionalProperties => 0, | |
513 | properties => { | |
514 | node => get_standard_option('pve-node'), | |
515 | osdid => { | |
516 | description => 'OSD ID', | |
517 | type => 'integer', | |
518 | }, | |
519 | cleanup => { | |
520 | description => "If set, we remove partition table entries.", | |
521 | type => 'boolean', | |
522 | optional => 1, | |
523 | default => 0, | |
524 | }, | |
525 | }, | |
526 | }, | |
527 | returns => { type => 'string' }, | |
528 | code => sub { | |
529 | my ($param) = @_; | |
530 | ||
531 | my $rpcenv = PVE::RPCEnvironment::get(); | |
532 | ||
533 | my $authuser = $rpcenv->get_user(); | |
534 | ||
535 | PVE::Ceph::Tools::check_ceph_inited(); | |
536 | ||
537 | my $osdid = $param->{osdid}; | |
5ebb945c | 538 | my $cleanup = $param->{cleanup}; |
79fa41a2 DC |
539 | |
540 | my $rados = PVE::RADOS->new(); | |
220173e9 DJ |
541 | |
542 | my $osd_belongs_to_node = osd_belongs_to_node( | |
543 | $rados->mon_command({ prefix => 'osd tree' }), | |
544 | $param->{node}, | |
545 | $osdid, | |
546 | ); | |
547 | die "OSD osd.$osdid does not belong to node $param->{node}!" | |
548 | if !$osd_belongs_to_node; | |
549 | ||
017bb1a8 | 550 | # dies if osdid is unknown |
9cc5ac9e | 551 | my $osdstat = $get_osd_status->($rados, $osdid); |
79fa41a2 DC |
552 | |
553 | die "osd is in use (in == 1)\n" if $osdstat->{in}; | |
554 | #&$run_ceph_cmd(['osd', 'out', $osdid]); | |
555 | ||
017bb1a8 | 556 | die "osd is still running (up == 1)\n" if $osdstat->{up}; |
79fa41a2 DC |
557 | |
558 | my $osdsection = "osd.$osdid"; | |
559 | ||
560 | my $worker = sub { | |
561 | my $upid = shift; | |
562 | ||
563 | # reopen with longer timeout | |
564 | $rados = PVE::RADOS->new(timeout => PVE::Ceph::Tools::get_config('long_rados_timeout')); | |
565 | ||
566 | print "destroy OSD $osdsection\n"; | |
567 | ||
568 | eval { | |
569 | PVE::Ceph::Services::ceph_service_cmd('stop', $osdsection); | |
570 | PVE::Ceph::Services::ceph_service_cmd('disable', $osdsection); | |
571 | }; | |
572 | warn $@ if $@; | |
573 | ||
574 | print "Remove $osdsection from the CRUSH map\n"; | |
575 | $rados->mon_command({ prefix => "osd crush remove", name => $osdsection, format => 'plain' }); | |
576 | ||
577 | print "Remove the $osdsection authentication key.\n"; | |
578 | $rados->mon_command({ prefix => "auth del", entity => $osdsection, format => 'plain' }); | |
579 | ||
580 | print "Remove OSD $osdsection\n"; | |
581 | $rados->mon_command({ prefix => "osd rm", ids => [ $osdsection ], format => 'plain' }); | |
582 | ||
583 | # try to unmount from standard mount point | |
584 | my $mountpoint = "/var/lib/ceph/osd/ceph-$osdid"; | |
585 | ||
79fa41a2 DC |
586 | my $remove_partition = sub { |
587 | my ($part) = @_; | |
588 | ||
589 | return if !$part || (! -b $part ); | |
590 | my $partnum = PVE::Diskmanage::get_partnum($part); | |
591 | my $devpath = PVE::Diskmanage::get_blockdev($part); | |
592 | ||
b436dca8 | 593 | PVE::Ceph::Tools::wipe_disks($part); |
79fa41a2 DC |
594 | print "remove partition $part (disk '${devpath}', partnum $partnum)\n"; |
595 | eval { run_command(['/sbin/sgdisk', '-d', $partnum, "${devpath}"]); }; | |
596 | warn $@ if $@; | |
79fa41a2 DC |
597 | }; |
598 | ||
9b44d03d DC |
599 | my $osd_list = PVE::Ceph::Tools::ceph_volume_list(); |
600 | ||
b32e9255 | 601 | if ($osd_list->{$osdid}) { # ceph-volume managed |
79fa41a2 | 602 | |
b32e9255 | 603 | eval { PVE::Ceph::Tools::ceph_volume_zap($osdid, $cleanup) }; |
9b44d03d | 604 | warn $@ if $@; |
5ebb945c TL |
605 | |
606 | if ($cleanup) { | |
9b44d03d | 607 | # try to remove pvs, but do not fail if it does not work |
b32e9255 TL |
608 | for my $osd_part (@{$osd_list->{$osdid}}) { |
609 | for my $dev (@{$osd_part->{devices}}) { | |
c92fc8a1 SI |
610 | ($dev) = ($dev =~ m|^(/dev/[-_.a-zA-Z0-9\/]+)$|); #untaint |
611 | ||
259b557c | 612 | eval { run_command(['/sbin/pvremove', $dev], errfunc => sub {}) }; |
b32e9255 TL |
613 | warn $@ if $@; |
614 | } | |
9b44d03d DC |
615 | } |
616 | } | |
617 | } else { | |
618 | my $partitions_to_remove = []; | |
5ebb945c | 619 | if ($cleanup) { |
9b44d03d DC |
620 | if (my $mp = PVE::ProcFSTools::parse_proc_mounts()) { |
621 | foreach my $line (@$mp) { | |
622 | my ($dev, $path, $fstype) = @$line; | |
623 | next if !($dev && $path && $fstype); | |
624 | next if $dev !~ m|^/dev/|; | |
625 | ||
626 | if ($path eq $mountpoint) { | |
627 | abs_path($dev) =~ m|^(/.+)| or die "invalid dev: $dev\n"; | |
628 | push @$partitions_to_remove, $1; | |
629 | last; | |
630 | } | |
631 | } | |
632 | } | |
633 | ||
634 | foreach my $path (qw(journal block block.db block.wal)) { | |
635 | abs_path("$mountpoint/$path") =~ m|^(/.+)| or die "invalid path: $path\n"; | |
636 | push @$partitions_to_remove, $1; | |
637 | } | |
79fa41a2 | 638 | } |
79fa41a2 | 639 | |
9b44d03d DC |
640 | print "Unmount OSD $osdsection from $mountpoint\n"; |
641 | eval { run_command(['/bin/umount', $mountpoint]); }; | |
642 | if (my $err = $@) { | |
643 | warn $err; | |
5ebb945c | 644 | } elsif ($cleanup) { |
9b44d03d DC |
645 | #be aware of the ceph udev rules which can remount. |
646 | foreach my $part (@$partitions_to_remove) { | |
647 | $remove_partition->($part); | |
648 | } | |
79fa41a2 | 649 | } |
79fa41a2 DC |
650 | } |
651 | }; | |
652 | ||
653 | return $rpcenv->fork_worker('cephdestroyosd', $osdsection, $authuser, $worker); | |
654 | }}); | |
655 | ||
656 | __PACKAGE__->register_method ({ | |
657 | name => 'in', | |
658 | path => '{osdid}/in', | |
659 | method => 'POST', | |
660 | description => "ceph osd in", | |
661 | proxyto => 'node', | |
662 | protected => 1, | |
663 | permissions => { | |
664 | check => ['perm', '/', [ 'Sys.Modify' ]], | |
665 | }, | |
666 | parameters => { | |
667 | additionalProperties => 0, | |
668 | properties => { | |
669 | node => get_standard_option('pve-node'), | |
670 | osdid => { | |
671 | description => 'OSD ID', | |
672 | type => 'integer', | |
673 | }, | |
674 | }, | |
675 | }, | |
676 | returns => { type => "null" }, | |
677 | code => sub { | |
678 | my ($param) = @_; | |
679 | ||
680 | PVE::Ceph::Tools::check_ceph_inited(); | |
681 | ||
682 | my $osdid = $param->{osdid}; | |
683 | ||
684 | my $rados = PVE::RADOS->new(); | |
685 | ||
9cc5ac9e | 686 | $get_osd_status->($rados, $osdid); # osd exists? |
79fa41a2 DC |
687 | |
688 | my $osdsection = "osd.$osdid"; | |
689 | ||
690 | $rados->mon_command({ prefix => "osd in", ids => [ $osdsection ], format => 'plain' }); | |
691 | ||
692 | return undef; | |
693 | }}); | |
694 | ||
695 | __PACKAGE__->register_method ({ | |
696 | name => 'out', | |
697 | path => '{osdid}/out', | |
698 | method => 'POST', | |
699 | description => "ceph osd out", | |
700 | proxyto => 'node', | |
701 | protected => 1, | |
702 | permissions => { | |
703 | check => ['perm', '/', [ 'Sys.Modify' ]], | |
704 | }, | |
705 | parameters => { | |
706 | additionalProperties => 0, | |
707 | properties => { | |
708 | node => get_standard_option('pve-node'), | |
709 | osdid => { | |
710 | description => 'OSD ID', | |
711 | type => 'integer', | |
712 | }, | |
713 | }, | |
714 | }, | |
715 | returns => { type => "null" }, | |
716 | code => sub { | |
717 | my ($param) = @_; | |
718 | ||
719 | PVE::Ceph::Tools::check_ceph_inited(); | |
720 | ||
721 | my $osdid = $param->{osdid}; | |
722 | ||
723 | my $rados = PVE::RADOS->new(); | |
724 | ||
9cc5ac9e | 725 | $get_osd_status->($rados, $osdid); # osd exists? |
79fa41a2 DC |
726 | |
727 | my $osdsection = "osd.$osdid"; | |
728 | ||
729 | $rados->mon_command({ prefix => "osd out", ids => [ $osdsection ], format => 'plain' }); | |
730 | ||
731 | return undef; | |
732 | }}); | |
733 | ||
b7701301 DC |
734 | __PACKAGE__->register_method ({ |
735 | name => 'scrub', | |
736 | path => '{osdid}/scrub', | |
737 | method => 'POST', | |
738 | description => "Instruct the OSD to scrub.", | |
739 | proxyto => 'node', | |
740 | protected => 1, | |
741 | permissions => { | |
742 | check => ['perm', '/', [ 'Sys.Modify' ]], | |
743 | }, | |
744 | parameters => { | |
745 | additionalProperties => 0, | |
746 | properties => { | |
747 | node => get_standard_option('pve-node'), | |
748 | osdid => { | |
749 | description => 'OSD ID', | |
750 | type => 'integer', | |
751 | }, | |
752 | deep => { | |
753 | description => 'If set, instructs a deep scrub instead of a normal one.', | |
754 | type => 'boolean', | |
755 | optional => 1, | |
756 | default => 0, | |
757 | }, | |
758 | }, | |
759 | }, | |
760 | returns => { type => "null" }, | |
761 | code => sub { | |
762 | my ($param) = @_; | |
763 | ||
764 | PVE::Ceph::Tools::check_ceph_inited(); | |
765 | ||
766 | my $osdid = $param->{osdid}; | |
767 | my $deep = $param->{deep} // 0; | |
768 | ||
769 | my $rados = PVE::RADOS->new(); | |
770 | ||
9cc5ac9e | 771 | $get_osd_status->($rados, $osdid); # osd exists? |
b7701301 | 772 | |
9cc5ac9e | 773 | my $prefix = $deep ? 'osd deep-scrub' : 'osd scrub'; |
b7701301 DC |
774 | $rados->mon_command({ prefix => $prefix, who => $osdid }); |
775 | ||
776 | return undef; | |
777 | }}); | |
778 | ||
79fa41a2 | 779 | 1; |