]>
Commit | Line | Data |
---|---|---|
79fa41a2 DC |
1 | package PVE::API2::Ceph::OSD; |
2 | ||
3 | use strict; | |
4 | use warnings; | |
5 | ||
6 | use Cwd qw(abs_path); | |
7 | use IO::File; | |
7783f755 | 8 | use UUID; |
79fa41a2 DC |
9 | |
10 | use PVE::Ceph::Tools; | |
11 | use PVE::Ceph::Services; | |
12 | use PVE::CephConfig; | |
13 | use PVE::Cluster qw(cfs_read_file cfs_write_file); | |
14 | use PVE::Diskmanage; | |
7783f755 | 15 | use PVE::Storage::LVMPlugin; |
79fa41a2 DC |
16 | use PVE::Exception qw(raise_param_exc); |
17 | use PVE::JSONSchema qw(get_standard_option); | |
a05349ab | 18 | use PVE::INotify; |
79fa41a2 DC |
19 | use PVE::RADOS; |
20 | use PVE::RESTHandler; | |
21 | use PVE::RPCEnvironment; | |
22 | use PVE::Tools qw(run_command file_set_contents); | |
3c6aa3f4 | 23 | use PVE::ProcFSTools; |
05bd76ac | 24 | use PVE::Network; |
79fa41a2 DC |
25 | |
26 | use base qw(PVE::RESTHandler); | |
27 | ||
a05349ab TL |
28 | my $nodename = PVE::INotify::nodename(); |
29 | ||
79fa41a2 DC |
30 | my $get_osd_status = sub { |
31 | my ($rados, $osdid) = @_; | |
32 | ||
33 | my $stat = $rados->mon_command({ prefix => 'osd dump' }); | |
34 | ||
35 | my $osdlist = $stat->{osds} || []; | |
36 | ||
37 | my $flags = $stat->{flags} || undef; | |
38 | ||
39 | my $osdstat; | |
40 | foreach my $d (@$osdlist) { | |
41 | $osdstat->{$d->{osd}} = $d if defined($d->{osd}); | |
42 | } | |
43 | if (defined($osdid)) { | |
44 | die "no such OSD '$osdid'\n" if !$osdstat->{$osdid}; | |
45 | return $osdstat->{$osdid}; | |
46 | } | |
47 | ||
017bb1a8 | 48 | return wantarray ? ($osdstat, $flags) : $osdstat; |
79fa41a2 DC |
49 | }; |
50 | ||
51 | my $get_osd_usage = sub { | |
52 | my ($rados) = @_; | |
53 | ||
de6ad72f TL |
54 | my $osdlist = $rados->mon_command({ prefix => 'pg dump', dumpcontents => [ 'osds' ]}); |
55 | if (!($osdlist && ref($osdlist))) { | |
56 | warn "got unknown result format for 'pg dump osds' command\n"; | |
57 | return []; | |
91564b72 | 58 | } |
79fa41a2 | 59 | |
de6ad72f TL |
60 | if (ref($osdlist) eq "HASH") { # since nautilus |
61 | $osdlist = $osdlist->{osd_stats}; | |
62 | } | |
63 | ||
64 | my $osdstat = {}; | |
65 | for my $d (@$osdlist) { | |
79fa41a2 DC |
66 | $osdstat->{$d->{osd}} = $d if defined($d->{osd}); |
67 | } | |
68 | ||
69 | return $osdstat; | |
70 | }; | |
71 | ||
72 | __PACKAGE__->register_method ({ | |
73 | name => 'index', | |
74 | path => '', | |
75 | method => 'GET', | |
76 | description => "Get Ceph osd list/tree.", | |
77 | proxyto => 'node', | |
78 | protected => 1, | |
79 | permissions => { | |
80 | check => ['perm', '/', [ 'Sys.Audit', 'Datastore.Audit' ], any => 1], | |
81 | }, | |
82 | parameters => { | |
83 | additionalProperties => 0, | |
84 | properties => { | |
85 | node => get_standard_option('pve-node'), | |
86 | }, | |
87 | }, | |
88 | # fixme: return a list instead of extjs tree format ? | |
89 | returns => { | |
90 | type => "object", | |
91 | }, | |
92 | code => sub { | |
93 | my ($param) = @_; | |
94 | ||
95 | PVE::Ceph::Tools::check_ceph_inited(); | |
96 | ||
97 | my $rados = PVE::RADOS->new(); | |
98 | my $res = $rados->mon_command({ prefix => 'osd tree' }); | |
99 | ||
100 | die "no tree nodes found\n" if !($res && $res->{nodes}); | |
101 | ||
9cc5ac9e | 102 | my ($osdhash, $flags) = $get_osd_status->($rados); |
79fa41a2 | 103 | |
de6ad72f | 104 | my $osd_usage = $get_osd_usage->($rados); |
79fa41a2 | 105 | |
78c2d7f7 TL |
106 | my $osdmetadata_res = $rados->mon_command({ prefix => 'osd metadata' }); |
107 | my $osdmetadata = { map { $_->{id} => $_ } @$osdmetadata_res }; | |
79fa41a2 | 108 | |
cead98bd TL |
109 | my $hostversions = PVE::Cluster::get_node_kv("ceph-version"); |
110 | ||
79fa41a2 DC |
111 | my $nodes = {}; |
112 | my $newnodes = {}; | |
113 | foreach my $e (@{$res->{nodes}}) { | |
cead98bd TL |
114 | my ($id, $name) = $e->@{qw(id name)}; |
115 | ||
116 | $nodes->{$id} = $e; | |
79fa41a2 DC |
117 | |
118 | my $new = { | |
cead98bd TL |
119 | id => $id, |
120 | name => $name, | |
79fa41a2 DC |
121 | type => $e->{type} |
122 | }; | |
123 | ||
124 | foreach my $opt (qw(status crush_weight reweight device_class)) { | |
125 | $new->{$opt} = $e->{$opt} if defined($e->{$opt}); | |
126 | } | |
127 | ||
cead98bd | 128 | if (my $stat = $osdhash->{$id}) { |
79fa41a2 DC |
129 | $new->{in} = $stat->{in} if defined($stat->{in}); |
130 | } | |
131 | ||
cead98bd | 132 | if (my $stat = $osd_usage->{$id}) { |
79fa41a2 DC |
133 | $new->{total_space} = ($stat->{kb} || 1) * 1024; |
134 | $new->{bytes_used} = ($stat->{kb_used} || 0) * 1024; | |
135 | $new->{percent_used} = ($new->{bytes_used}*100)/$new->{total_space}; | |
136 | if (my $d = $stat->{perf_stat}) { | |
137 | $new->{commit_latency_ms} = $d->{commit_latency_ms}; | |
138 | $new->{apply_latency_ms} = $d->{apply_latency_ms}; | |
139 | } | |
140 | } | |
141 | ||
cead98bd | 142 | my $osdmd = $osdmetadata->{$id}; |
79fa41a2 DC |
143 | if ($e->{type} eq 'osd' && $osdmd) { |
144 | if ($osdmd->{bluefs}) { | |
145 | $new->{osdtype} = 'bluestore'; | |
146 | $new->{blfsdev} = $osdmd->{bluestore_bdev_dev_node}; | |
147 | $new->{dbdev} = $osdmd->{bluefs_db_dev_node}; | |
148 | $new->{waldev} = $osdmd->{bluefs_wal_dev_node}; | |
149 | } else { | |
150 | $new->{osdtype} = 'filestore'; | |
151 | } | |
e0297023 DC |
152 | for my $field (qw(ceph_version ceph_version_short)) { |
153 | $new->{$field} = $osdmd->{$field} if $osdmd->{$field}; | |
154 | } | |
79fa41a2 DC |
155 | } |
156 | ||
cead98bd | 157 | $newnodes->{$id} = $new; |
79fa41a2 DC |
158 | } |
159 | ||
160 | foreach my $e (@{$res->{nodes}}) { | |
cead98bd TL |
161 | my ($id, $name) = $e->@{qw(id name)}; |
162 | my $new = $newnodes->{$id}; | |
163 | ||
79fa41a2 DC |
164 | if ($e->{children} && scalar(@{$e->{children}})) { |
165 | $new->{children} = []; | |
166 | $new->{leaf} = 0; | |
167 | foreach my $cid (@{$e->{children}}) { | |
cead98bd TL |
168 | $nodes->{$cid}->{parent} = $id; |
169 | if ($nodes->{$cid}->{type} eq 'osd' && $e->{type} eq 'host') { | |
170 | $newnodes->{$cid}->{host} = $name; | |
79fa41a2 DC |
171 | } |
172 | push @{$new->{children}}, $newnodes->{$cid}; | |
173 | } | |
174 | } else { | |
cead98bd | 175 | $new->{leaf} = ($id >= 0) ? 1 : 0; |
79fa41a2 | 176 | } |
69ad2e53 | 177 | |
cead98bd | 178 | if ($name && $e->{type} eq 'host') { |
69ad2e53 DC |
179 | $new->{version} = $hostversions->{$name}; |
180 | } | |
79fa41a2 DC |
181 | } |
182 | ||
cead98bd | 183 | my $realroots = []; |
79fa41a2 | 184 | foreach my $e (@{$res->{nodes}}) { |
cead98bd TL |
185 | my $id = $e->{id}; |
186 | if (!$nodes->{$id}->{parent}) { | |
187 | push @$realroots, $newnodes->{$id}; | |
79fa41a2 DC |
188 | } |
189 | } | |
190 | ||
cead98bd | 191 | die "no root node\n" if scalar(@$realroots) < 1; |
79fa41a2 | 192 | |
cead98bd TL |
193 | my $data = { |
194 | root => { | |
195 | leaf => 0, | |
196 | children => $realroots | |
197 | }, | |
198 | versions => $hostversions, # for compatibility | |
199 | }; | |
79fa41a2 | 200 | |
cead98bd | 201 | $data->{flags} = $flags if $flags; # we want this for the noout flag |
79fa41a2 DC |
202 | |
203 | return $data; | |
204 | }}); | |
205 | ||
206 | __PACKAGE__->register_method ({ | |
207 | name => 'createosd', | |
208 | path => '', | |
209 | method => 'POST', | |
210 | description => "Create OSD", | |
211 | proxyto => 'node', | |
212 | protected => 1, | |
213 | parameters => { | |
214 | additionalProperties => 0, | |
215 | properties => { | |
216 | node => get_standard_option('pve-node'), | |
217 | dev => { | |
218 | description => "Block device name.", | |
219 | type => 'string', | |
220 | }, | |
7783f755 DC |
221 | db_dev => { |
222 | description => "Block device name for block.db.", | |
79fa41a2 DC |
223 | optional => 1, |
224 | type => 'string', | |
225 | }, | |
7783f755 | 226 | db_size => { |
0e5f83ba TL |
227 | description => "Size in GiB for block.db.", |
228 | verbose_description => "If a block.db is requested but the size is not given, ". | |
229 | "will be automatically selected by: bluestore_block_db_size from the ". | |
7783f755 DC |
230 | "ceph database (osd or global section) or config (osd or global section)". |
231 | "in that order. If this is not available, it will be sized 10% of the size ". | |
232 | "of the OSD device. Fails if the available size is not enough.", | |
79fa41a2 | 233 | optional => 1, |
7783f755 | 234 | type => 'number', |
0e5f83ba | 235 | default => 'bluestore_block_db_size or 10% of OSD size', |
7783f755 DC |
236 | requires => 'db_dev', |
237 | minimum => 1.0, | |
79fa41a2 | 238 | }, |
7783f755 DC |
239 | wal_dev => { |
240 | description => "Block device name for block.wal.", | |
79fa41a2 | 241 | optional => 1, |
7783f755 | 242 | type => 'string', |
79fa41a2 | 243 | }, |
7783f755 | 244 | wal_size => { |
0e5f83ba TL |
245 | description => "Size in GiB for block.wal.", |
246 | verbose_description => "If a block.wal is requested but the size is not given, ". | |
247 | "will be automatically selected by: bluestore_block_wal_size from the ". | |
7783f755 DC |
248 | "ceph database (osd or global section) or config (osd or global section)". |
249 | "in that order. If this is not available, it will be sized 1% of the size ". | |
250 | "of the OSD device. Fails if the available size is not enough.", | |
79fa41a2 | 251 | optional => 1, |
7783f755 | 252 | minimum => 0.5, |
0e5f83ba | 253 | default => 'bluestore_block_wal_size or 1% of OSD size', |
7783f755 DC |
254 | requires => 'wal_dev', |
255 | type => 'number', | |
79fa41a2 | 256 | }, |
4ce04578 DC |
257 | encrypted => { |
258 | type => 'boolean', | |
259 | optional => 1, | |
260 | default => 0, | |
261 | description => "Enables encryption of the OSD." | |
262 | }, | |
2184098e AA |
263 | 'crush-device-class' => { |
264 | optional => 1, | |
265 | type => 'string', | |
266 | description => "Set the device class of the OSD in crush." | |
267 | }, | |
79fa41a2 DC |
268 | }, |
269 | }, | |
270 | returns => { type => 'string' }, | |
271 | code => sub { | |
272 | my ($param) = @_; | |
273 | ||
274 | my $rpcenv = PVE::RPCEnvironment::get(); | |
275 | ||
276 | my $authuser = $rpcenv->get_user(); | |
277 | ||
45d45a63 | 278 | # test basic requirements |
79fa41a2 | 279 | PVE::Ceph::Tools::check_ceph_inited(); |
79fa41a2 | 280 | PVE::Ceph::Tools::setup_pve_symlinks(); |
79fa41a2 | 281 | PVE::Ceph::Tools::check_ceph_installed('ceph_osd'); |
7783f755 | 282 | PVE::Ceph::Tools::check_ceph_installed('ceph_volume'); |
79fa41a2 | 283 | |
45d45a63 DC |
284 | # extract parameter info and fail if a device is set more than once |
285 | my $devs = {}; | |
79fa41a2 | 286 | |
05bd76ac AL |
287 | my $ceph_conf = cfs_read_file('ceph.conf'); |
288 | ||
a05349ab TL |
289 | my $osd_network = $ceph_conf->{global}->{cluster_network}; |
290 | $osd_network //= $ceph_conf->{global}->{public_network}; # fallback | |
05bd76ac | 291 | |
a0ef509a DC |
292 | if ($osd_network) { # check only if something is configured |
293 | my $cluster_net_ips = PVE::Network::get_local_ip_from_cidr($osd_network); | |
294 | if (scalar(@$cluster_net_ips) < 1) { | |
295 | my $osd_net_obj = PVE::Network::IP_from_cidr($osd_network); | |
296 | my $osd_base_cidr = $osd_net_obj->{ip} . "/" . $osd_net_obj->{prefixlen}; | |
297 | ||
298 | die "No address from ceph cluster network (${osd_base_cidr}) found on node '$nodename'. ". | |
299 | "Check your network config.\n"; | |
300 | } | |
05bd76ac AL |
301 | } |
302 | ||
67d8218f TL |
303 | # FIXME: rename params on next API compatibillity change (7.0) |
304 | $param->{wal_dev_size} = delete $param->{wal_size}; | |
305 | $param->{db_dev_size} = delete $param->{db_size}; | |
306 | ||
970f96fd TL |
307 | for my $type ( qw(dev db_dev wal_dev) ) { |
308 | next if !$param->{$type}; | |
0154e795 | 309 | |
970f96fd | 310 | my $type_dev = PVE::Diskmanage::verify_blockdev_path($param->{$type}); |
45d45a63 | 311 | (my $type_devname = $type_dev) =~ s|/dev/||; |
79fa41a2 | 312 | |
970f96fd | 313 | raise_param_exc({ $type => "cannot chose '$type_dev' for more than one type." }) |
45d45a63 | 314 | if grep { $_->{name} eq $type_devname } values %$devs; |
79fa41a2 | 315 | |
45d45a63 DC |
316 | $devs->{$type} = { |
317 | dev => $type_dev, | |
318 | name => $type_devname, | |
319 | }; | |
79fa41a2 | 320 | |
45d45a63 DC |
321 | if (my $size = $param->{"${type}_size"}) { |
322 | $devs->{$type}->{size} = PVE::Tools::convert_size($size, 'gb' => 'b') ; | |
323 | } | |
324 | } | |
79fa41a2 | 325 | |
45d45a63 DC |
326 | # test osd requirements early |
327 | my $devlist = [ map { $_->{name} } values %$devs ]; | |
328 | my $disklist = PVE::Diskmanage::get_disks($devlist, 1); | |
329 | my $dev = $devs->{dev}->{dev}; | |
330 | my $devname = $devs->{dev}->{name}; | |
331 | die "unable to get device info for '$dev'\n" if !$disklist->{$devname}; | |
332 | die "device '$dev' is already in use\n" if $disklist->{$devname}->{used}; | |
333 | ||
334 | # test db/wal requirements early | |
970f96fd | 335 | for my $type ( qw(db_dev wal_dev) ) { |
45d45a63 DC |
336 | my $d = $devs->{$type}; |
337 | next if !$d; | |
338 | my $name = $d->{name}; | |
339 | my $info = $disklist->{$name}; | |
340 | die "unable to get device info for '$d->{dev}' for type $type\n" if !$disklist->{$name}; | |
385df838 DC |
341 | if (my $usage = $info->{used}) { |
342 | if ($usage eq 'partitions') { | |
343 | die "device '$d->{dev}' is not GPT partitioned\n" if !$info->{gpt}; | |
344 | } elsif ($usage ne 'LVM') { | |
345 | die "device '$d->{dev}' is already in use and has no LVM on it\n"; | |
346 | } | |
347 | } | |
45d45a63 | 348 | } |
0154e795 | 349 | |
45d45a63 | 350 | # get necessary ceph infos |
79fa41a2 | 351 | my $rados = PVE::RADOS->new(); |
e25dda25 | 352 | my $monstat = $rados->mon_command({ prefix => 'quorum_status' }); |
79fa41a2 | 353 | |
0154e795 | 354 | die "unable to get fsid\n" if !$monstat->{monmap} || !$monstat->{monmap}->{fsid}; |
79fa41a2 DC |
355 | my $fsid = $monstat->{monmap}->{fsid}; |
356 | $fsid = $1 if $fsid =~ m/^([0-9a-f\-]+)$/; | |
357 | ||
358 | my $ceph_bootstrap_osd_keyring = PVE::Ceph::Tools::get_config('ceph_bootstrap_osd_keyring'); | |
359 | ||
7712a4e1 | 360 | if (! -f $ceph_bootstrap_osd_keyring && $ceph_conf->{global}->{auth_client_required} eq 'cephx') { |
217dde83 DC |
361 | my $bindata = $rados->mon_command({ |
362 | prefix => 'auth get-or-create', | |
363 | entity => 'client.bootstrap-osd', | |
364 | caps => [ | |
365 | 'mon' => 'allow profile bootstrap-osd' | |
366 | ], | |
367 | format => 'plain', | |
368 | }); | |
79fa41a2 DC |
369 | file_set_contents($ceph_bootstrap_osd_keyring, $bindata); |
370 | }; | |
371 | ||
7783f755 DC |
372 | my $create_part_or_lv = sub { |
373 | my ($dev, $size, $type) = @_; | |
374 | ||
0154e795 TL |
375 | $size =~ m/^(\d+)$/ or die "invalid size '$size'\n"; |
376 | $size = $1; | |
7783f755 DC |
377 | |
378 | die "'$dev->{devpath}' is smaller than requested size '$size' bytes\n" | |
379 | if $dev->{size} < $size; | |
79fa41a2 | 380 | |
ab62d137 DC |
381 | # sgdisk and lvcreate can only sizes divisible by 512b |
382 | # so we round down to the nearest kb | |
383 | $size = PVE::Tools::convert_size($size, 'b' => 'kb', 1); | |
384 | ||
7783f755 DC |
385 | if (!$dev->{used}) { |
386 | # create pv,vg,lv | |
79fa41a2 | 387 | |
7783f755 DC |
388 | my $vg = "ceph-" . UUID::uuid(); |
389 | my $lv = $type . "-" . UUID::uuid(); | |
79fa41a2 | 390 | |
7783f755 | 391 | PVE::Storage::LVMPlugin::lvm_create_volume_group($dev->{devpath}, $vg); |
ab62d137 | 392 | PVE::Storage::LVMPlugin::lvcreate($vg, $lv, "${size}k"); |
79fa41a2 | 393 | |
7783f755 DC |
394 | return "$vg/$lv"; |
395 | ||
396 | } elsif ($dev->{used} eq 'LVM') { | |
397 | # check pv/vg and create lv | |
398 | ||
399 | my $vgs = PVE::Storage::LVMPlugin::lvm_vgs(1); | |
400 | my $vg; | |
401 | for my $vgname ( sort keys %$vgs ) { | |
402 | next if $vgname !~ /^ceph-/; | |
403 | ||
404 | for my $pv ( @{$vgs->{$vgname}->{pvs}} ) { | |
405 | next if $pv->{name} ne $dev->{devpath}; | |
406 | $vg = $vgname; | |
407 | last; | |
408 | } | |
409 | last if $vg; | |
410 | } | |
411 | ||
412 | die "no ceph vg found on '$dev->{devpath}'\n" if !$vg; | |
413 | die "vg '$vg' has not enough free space\n" if $vgs->{$vg}->{free} < $size; | |
414 | ||
afa09e02 | 415 | my $lv = $type . "-" . UUID::uuid(); |
7783f755 | 416 | |
ab62d137 | 417 | PVE::Storage::LVMPlugin::lvcreate($vg, $lv, "${size}k"); |
7783f755 DC |
418 | |
419 | return "$vg/$lv"; | |
420 | ||
3d7b3992 | 421 | } elsif ($dev->{used} eq 'partitions' && $dev->{gpt}) { |
7783f755 DC |
422 | # create new partition at the end |
423 | ||
ab62d137 | 424 | return PVE::Diskmanage::append_partition($dev->{devpath}, $size * 1024); |
7783f755 DC |
425 | } |
426 | ||
427 | die "cannot use '$dev->{devpath}' for '$type'\n"; | |
428 | }; | |
429 | ||
430 | my $worker = sub { | |
431 | my $upid = shift; | |
432 | ||
433 | PVE::Diskmanage::locked_disk_action(sub { | |
45d45a63 DC |
434 | # update disklist |
435 | $disklist = PVE::Diskmanage::get_disks($devlist, 1); | |
7783f755 | 436 | |
2184098e | 437 | my $dev_class = $param->{'crush-device-class'}; |
7783f755 | 438 | my $cmd = ['ceph-volume', 'lvm', 'create', '--cluster-fsid', $fsid ]; |
2184098e | 439 | push @$cmd, '--crush-device-class', $dev_class if $dev_class; |
79fa41a2 | 440 | |
45d45a63 | 441 | my $devpath = $disklist->{$devname}->{devpath}; |
79fa41a2 | 442 | print "create OSD on $devpath (bluestore)\n"; |
79fa41a2 | 443 | |
45d45a63 DC |
444 | my $osd_size = $disklist->{$devname}->{size}; |
445 | my $size_map = { | |
446 | db => int($osd_size / 10), # 10% of OSD | |
447 | wal => int($osd_size / 100), # 1% of OSD | |
448 | }; | |
449 | ||
450 | my $sizes; | |
451 | foreach my $type ( qw(db wal) ) { | |
452 | my $fallback_size = $size_map->{$type}; | |
970f96fd | 453 | my $d = $devs->{"${type}_dev"}; |
45d45a63 DC |
454 | next if !$d; |
455 | ||
456 | # size was not set via api, getting from config/fallback | |
457 | if (!defined($d->{size})) { | |
458 | $sizes = PVE::Ceph::Tools::get_db_wal_sizes() if !$sizes; | |
459 | $d->{size} = $sizes->{$type} // $fallback_size; | |
460 | } | |
461 | print "creating block.$type on '$d->{dev}'\n"; | |
462 | my $name = $d->{name}; | |
463 | my $part_or_lv = $create_part_or_lv->($disklist->{$name}, $d->{size}, "osd-$type"); | |
79fa41a2 | 464 | |
45d45a63 DC |
465 | print "using '$part_or_lv' for block.$type\n"; |
466 | push @$cmd, "--block.$type", $part_or_lv; | |
79fa41a2 DC |
467 | } |
468 | ||
7783f755 | 469 | push @$cmd, '--data', $devpath; |
4ce04578 | 470 | push @$cmd, '--dmcrypt' if $param->{encrypted}; |
79fa41a2 | 471 | |
7783f755 | 472 | PVE::Ceph::Tools::wipe_disks($devpath); |
79fa41a2 | 473 | |
7783f755 DC |
474 | run_command($cmd); |
475 | }); | |
79fa41a2 DC |
476 | }; |
477 | ||
478 | return $rpcenv->fork_worker('cephcreateosd', $devname, $authuser, $worker); | |
479 | }}); | |
480 | ||
220173e9 DJ |
481 | # Check if $osdid belongs to $nodename |
482 | # $tree ... rados osd tree (passing the tree makes it easy to test) | |
483 | sub osd_belongs_to_node { | |
484 | my ($tree, $nodename, $osdid) = @_; | |
485 | ||
486 | die "No tree nodes found\n" if !($tree && $tree->{nodes}); | |
487 | my $allNodes = $tree->{nodes}; | |
488 | ||
489 | my @match = grep($_->{name} eq $nodename, @$allNodes); | |
490 | my $node = shift @match; # contains rados information about $nodename | |
491 | die "There must not be more than one such node in the list" if @match; | |
492 | ||
493 | my $osds = $node->{children}; | |
494 | return grep($_ == $osdid, @$osds); | |
495 | } | |
496 | ||
79fa41a2 DC |
497 | __PACKAGE__->register_method ({ |
498 | name => 'destroyosd', | |
499 | path => '{osdid}', | |
500 | method => 'DELETE', | |
501 | description => "Destroy OSD", | |
502 | proxyto => 'node', | |
503 | protected => 1, | |
504 | parameters => { | |
505 | additionalProperties => 0, | |
506 | properties => { | |
507 | node => get_standard_option('pve-node'), | |
508 | osdid => { | |
509 | description => 'OSD ID', | |
510 | type => 'integer', | |
511 | }, | |
512 | cleanup => { | |
513 | description => "If set, we remove partition table entries.", | |
514 | type => 'boolean', | |
515 | optional => 1, | |
516 | default => 0, | |
517 | }, | |
518 | }, | |
519 | }, | |
520 | returns => { type => 'string' }, | |
521 | code => sub { | |
522 | my ($param) = @_; | |
523 | ||
524 | my $rpcenv = PVE::RPCEnvironment::get(); | |
525 | ||
526 | my $authuser = $rpcenv->get_user(); | |
527 | ||
528 | PVE::Ceph::Tools::check_ceph_inited(); | |
529 | ||
530 | my $osdid = $param->{osdid}; | |
5ebb945c | 531 | my $cleanup = $param->{cleanup}; |
79fa41a2 DC |
532 | |
533 | my $rados = PVE::RADOS->new(); | |
220173e9 DJ |
534 | |
535 | my $osd_belongs_to_node = osd_belongs_to_node( | |
536 | $rados->mon_command({ prefix => 'osd tree' }), | |
537 | $param->{node}, | |
538 | $osdid, | |
539 | ); | |
540 | die "OSD osd.$osdid does not belong to node $param->{node}!" | |
541 | if !$osd_belongs_to_node; | |
542 | ||
017bb1a8 | 543 | # dies if osdid is unknown |
9cc5ac9e | 544 | my $osdstat = $get_osd_status->($rados, $osdid); |
79fa41a2 DC |
545 | |
546 | die "osd is in use (in == 1)\n" if $osdstat->{in}; | |
547 | #&$run_ceph_cmd(['osd', 'out', $osdid]); | |
548 | ||
017bb1a8 | 549 | die "osd is still running (up == 1)\n" if $osdstat->{up}; |
79fa41a2 DC |
550 | |
551 | my $osdsection = "osd.$osdid"; | |
552 | ||
553 | my $worker = sub { | |
554 | my $upid = shift; | |
555 | ||
556 | # reopen with longer timeout | |
557 | $rados = PVE::RADOS->new(timeout => PVE::Ceph::Tools::get_config('long_rados_timeout')); | |
558 | ||
559 | print "destroy OSD $osdsection\n"; | |
560 | ||
561 | eval { | |
562 | PVE::Ceph::Services::ceph_service_cmd('stop', $osdsection); | |
563 | PVE::Ceph::Services::ceph_service_cmd('disable', $osdsection); | |
564 | }; | |
565 | warn $@ if $@; | |
566 | ||
567 | print "Remove $osdsection from the CRUSH map\n"; | |
568 | $rados->mon_command({ prefix => "osd crush remove", name => $osdsection, format => 'plain' }); | |
569 | ||
570 | print "Remove the $osdsection authentication key.\n"; | |
571 | $rados->mon_command({ prefix => "auth del", entity => $osdsection, format => 'plain' }); | |
572 | ||
573 | print "Remove OSD $osdsection\n"; | |
574 | $rados->mon_command({ prefix => "osd rm", ids => [ $osdsection ], format => 'plain' }); | |
575 | ||
576 | # try to unmount from standard mount point | |
577 | my $mountpoint = "/var/lib/ceph/osd/ceph-$osdid"; | |
578 | ||
79fa41a2 DC |
579 | my $remove_partition = sub { |
580 | my ($part) = @_; | |
581 | ||
582 | return if !$part || (! -b $part ); | |
583 | my $partnum = PVE::Diskmanage::get_partnum($part); | |
584 | my $devpath = PVE::Diskmanage::get_blockdev($part); | |
585 | ||
b436dca8 | 586 | PVE::Ceph::Tools::wipe_disks($part); |
79fa41a2 DC |
587 | print "remove partition $part (disk '${devpath}', partnum $partnum)\n"; |
588 | eval { run_command(['/sbin/sgdisk', '-d', $partnum, "${devpath}"]); }; | |
589 | warn $@ if $@; | |
79fa41a2 DC |
590 | }; |
591 | ||
9b44d03d DC |
592 | my $osd_list = PVE::Ceph::Tools::ceph_volume_list(); |
593 | ||
b32e9255 | 594 | if ($osd_list->{$osdid}) { # ceph-volume managed |
79fa41a2 | 595 | |
b32e9255 | 596 | eval { PVE::Ceph::Tools::ceph_volume_zap($osdid, $cleanup) }; |
9b44d03d | 597 | warn $@ if $@; |
5ebb945c TL |
598 | |
599 | if ($cleanup) { | |
9b44d03d | 600 | # try to remove pvs, but do not fail if it does not work |
b32e9255 TL |
601 | for my $osd_part (@{$osd_list->{$osdid}}) { |
602 | for my $dev (@{$osd_part->{devices}}) { | |
c92fc8a1 SI |
603 | ($dev) = ($dev =~ m|^(/dev/[-_.a-zA-Z0-9\/]+)$|); #untaint |
604 | ||
259b557c | 605 | eval { run_command(['/sbin/pvremove', $dev], errfunc => sub {}) }; |
b32e9255 TL |
606 | warn $@ if $@; |
607 | } | |
9b44d03d DC |
608 | } |
609 | } | |
610 | } else { | |
611 | my $partitions_to_remove = []; | |
5ebb945c | 612 | if ($cleanup) { |
9b44d03d DC |
613 | if (my $mp = PVE::ProcFSTools::parse_proc_mounts()) { |
614 | foreach my $line (@$mp) { | |
615 | my ($dev, $path, $fstype) = @$line; | |
616 | next if !($dev && $path && $fstype); | |
617 | next if $dev !~ m|^/dev/|; | |
618 | ||
619 | if ($path eq $mountpoint) { | |
620 | abs_path($dev) =~ m|^(/.+)| or die "invalid dev: $dev\n"; | |
621 | push @$partitions_to_remove, $1; | |
622 | last; | |
623 | } | |
624 | } | |
625 | } | |
626 | ||
627 | foreach my $path (qw(journal block block.db block.wal)) { | |
628 | abs_path("$mountpoint/$path") =~ m|^(/.+)| or die "invalid path: $path\n"; | |
629 | push @$partitions_to_remove, $1; | |
630 | } | |
79fa41a2 | 631 | } |
79fa41a2 | 632 | |
9b44d03d DC |
633 | print "Unmount OSD $osdsection from $mountpoint\n"; |
634 | eval { run_command(['/bin/umount', $mountpoint]); }; | |
635 | if (my $err = $@) { | |
636 | warn $err; | |
5ebb945c | 637 | } elsif ($cleanup) { |
9b44d03d DC |
638 | #be aware of the ceph udev rules which can remount. |
639 | foreach my $part (@$partitions_to_remove) { | |
640 | $remove_partition->($part); | |
641 | } | |
79fa41a2 | 642 | } |
79fa41a2 DC |
643 | } |
644 | }; | |
645 | ||
646 | return $rpcenv->fork_worker('cephdestroyosd', $osdsection, $authuser, $worker); | |
647 | }}); | |
648 | ||
649 | __PACKAGE__->register_method ({ | |
650 | name => 'in', | |
651 | path => '{osdid}/in', | |
652 | method => 'POST', | |
653 | description => "ceph osd in", | |
654 | proxyto => 'node', | |
655 | protected => 1, | |
656 | permissions => { | |
657 | check => ['perm', '/', [ 'Sys.Modify' ]], | |
658 | }, | |
659 | parameters => { | |
660 | additionalProperties => 0, | |
661 | properties => { | |
662 | node => get_standard_option('pve-node'), | |
663 | osdid => { | |
664 | description => 'OSD ID', | |
665 | type => 'integer', | |
666 | }, | |
667 | }, | |
668 | }, | |
669 | returns => { type => "null" }, | |
670 | code => sub { | |
671 | my ($param) = @_; | |
672 | ||
673 | PVE::Ceph::Tools::check_ceph_inited(); | |
674 | ||
675 | my $osdid = $param->{osdid}; | |
676 | ||
677 | my $rados = PVE::RADOS->new(); | |
678 | ||
9cc5ac9e | 679 | $get_osd_status->($rados, $osdid); # osd exists? |
79fa41a2 DC |
680 | |
681 | my $osdsection = "osd.$osdid"; | |
682 | ||
683 | $rados->mon_command({ prefix => "osd in", ids => [ $osdsection ], format => 'plain' }); | |
684 | ||
685 | return undef; | |
686 | }}); | |
687 | ||
688 | __PACKAGE__->register_method ({ | |
689 | name => 'out', | |
690 | path => '{osdid}/out', | |
691 | method => 'POST', | |
692 | description => "ceph osd out", | |
693 | proxyto => 'node', | |
694 | protected => 1, | |
695 | permissions => { | |
696 | check => ['perm', '/', [ 'Sys.Modify' ]], | |
697 | }, | |
698 | parameters => { | |
699 | additionalProperties => 0, | |
700 | properties => { | |
701 | node => get_standard_option('pve-node'), | |
702 | osdid => { | |
703 | description => 'OSD ID', | |
704 | type => 'integer', | |
705 | }, | |
706 | }, | |
707 | }, | |
708 | returns => { type => "null" }, | |
709 | code => sub { | |
710 | my ($param) = @_; | |
711 | ||
712 | PVE::Ceph::Tools::check_ceph_inited(); | |
713 | ||
714 | my $osdid = $param->{osdid}; | |
715 | ||
716 | my $rados = PVE::RADOS->new(); | |
717 | ||
9cc5ac9e | 718 | $get_osd_status->($rados, $osdid); # osd exists? |
79fa41a2 DC |
719 | |
720 | my $osdsection = "osd.$osdid"; | |
721 | ||
722 | $rados->mon_command({ prefix => "osd out", ids => [ $osdsection ], format => 'plain' }); | |
723 | ||
724 | return undef; | |
725 | }}); | |
726 | ||
b7701301 DC |
727 | __PACKAGE__->register_method ({ |
728 | name => 'scrub', | |
729 | path => '{osdid}/scrub', | |
730 | method => 'POST', | |
731 | description => "Instruct the OSD to scrub.", | |
732 | proxyto => 'node', | |
733 | protected => 1, | |
734 | permissions => { | |
735 | check => ['perm', '/', [ 'Sys.Modify' ]], | |
736 | }, | |
737 | parameters => { | |
738 | additionalProperties => 0, | |
739 | properties => { | |
740 | node => get_standard_option('pve-node'), | |
741 | osdid => { | |
742 | description => 'OSD ID', | |
743 | type => 'integer', | |
744 | }, | |
745 | deep => { | |
746 | description => 'If set, instructs a deep scrub instead of a normal one.', | |
747 | type => 'boolean', | |
748 | optional => 1, | |
749 | default => 0, | |
750 | }, | |
751 | }, | |
752 | }, | |
753 | returns => { type => "null" }, | |
754 | code => sub { | |
755 | my ($param) = @_; | |
756 | ||
757 | PVE::Ceph::Tools::check_ceph_inited(); | |
758 | ||
759 | my $osdid = $param->{osdid}; | |
760 | my $deep = $param->{deep} // 0; | |
761 | ||
762 | my $rados = PVE::RADOS->new(); | |
763 | ||
9cc5ac9e | 764 | $get_osd_status->($rados, $osdid); # osd exists? |
b7701301 | 765 | |
9cc5ac9e | 766 | my $prefix = $deep ? 'osd deep-scrub' : 'osd scrub'; |
b7701301 DC |
767 | $rados->mon_command({ prefix => $prefix, who => $osdid }); |
768 | ||
769 | return undef; | |
770 | }}); | |
771 | ||
79fa41a2 | 772 | 1; |