]>
git.proxmox.com Git - pve-manager.git/blob - PVE/API2/Ceph/OSD.pm
1 package PVE
::API2
::Ceph
::OSD
;
12 use PVE
::Ceph
::Services
;
14 use PVE
::Cluster
qw(cfs_read_file cfs_write_file);
16 use PVE
::Storage
::LVMPlugin
;
17 use PVE
::Exception
qw(raise_param_exc);
18 use PVE
::JSONSchema
qw(get_standard_option);
22 use PVE
::RPCEnvironment
;
23 use PVE
::Tools
qw(run_command file_set_contents);
27 use base
qw(PVE::RESTHandler);
29 my $nodename = PVE
::INotify
::nodename
();
31 my $get_osd_status = sub {
32 my ($rados, $osdid) = @_;
34 my $stat = $rados->mon_command({ prefix
=> 'osd dump' });
36 my $osdlist = $stat->{osds
} || [];
38 my $flags = $stat->{flags
} || undef;
41 foreach my $d (@$osdlist) {
42 $osdstat->{$d->{osd
}} = $d if defined($d->{osd
});
44 if (defined($osdid)) {
45 die "no such OSD '$osdid'\n" if !$osdstat->{$osdid};
46 return $osdstat->{$osdid};
49 return wantarray ?
($osdstat, $flags) : $osdstat;
52 my $get_osd_usage = sub {
55 my $osdlist = $rados->mon_command({ prefix
=> 'pg dump', dumpcontents
=> [ 'osds' ]});
56 if (!($osdlist && ref($osdlist))) {
57 warn "got unknown result format for 'pg dump osds' command\n";
61 if (ref($osdlist) eq "HASH") { # since nautilus
62 $osdlist = $osdlist->{osd_stats
};
66 for my $d (@$osdlist) {
67 $osdstat->{$d->{osd
}} = $d if defined($d->{osd
});
73 my sub get_proc_pss_from_pid
{
75 return if !defined($pid) || $pid <= 1;
77 open (my $SMAPS_FH, '<', "/proc/$pid/smaps_rollup")
78 or die "failed to open PSS memory-stat from process - $!\n";
80 while (my $line = <$SMAPS_FH>) {
81 if ($line =~ m/^Pss:\s+([0-9]+) kB$/) { # using PSS avoids bias with many OSDs
83 return int($1) * 1024;
87 die "internal error: failed to find PSS memory-stat in procfs for PID $pid\n";
91 __PACKAGE__-
>register_method ({
95 description
=> "Get Ceph osd list/tree.",
99 check
=> ['perm', '/', [ 'Sys.Audit', 'Datastore.Audit' ], any
=> 1],
102 additionalProperties
=> 0,
104 node
=> get_standard_option
('pve-node'),
107 # fixme: return a list instead of extjs tree format ?
113 flags
=> { type
=> "string" },
116 description
=> "Tree with OSDs in the CRUSH map structure.",
124 PVE
::Ceph
::Tools
::check_ceph_inited
();
126 my $rados = PVE
::RADOS-
>new();
127 my $res = $rados->mon_command({ prefix
=> 'osd df', output_method
=> 'tree', });
129 die "no tree nodes found\n" if !($res && $res->{nodes
});
131 my ($osdhash, $flags) = $get_osd_status->($rados);
133 my $osd_usage = $get_osd_usage->($rados);
135 my $osdmetadata_res = $rados->mon_command({ prefix
=> 'osd metadata' });
136 my $osdmetadata = { map { $_->{id
} => $_ } @$osdmetadata_res };
138 my $hostversions = PVE
::Ceph
::Services
::get_ceph_versions
();
142 foreach my $e (@{$res->{nodes
}}) {
143 my ($id, $name) = $e->@{qw(id name)};
153 foreach my $opt (qw(status crush_weight reweight device_class pgs)) {
154 $new->{$opt} = $e->{$opt} if defined($e->{$opt});
157 if (my $stat = $osdhash->{$id}) {
158 $new->{in} = $stat->{in} if defined($stat->{in});
161 if (my $stat = $osd_usage->{$id}) {
162 $new->{total_space
} = ($stat->{kb
} || 1) * 1024;
163 $new->{bytes_used
} = ($stat->{kb_used
} || 0) * 1024;
164 $new->{percent_used
} = ($new->{bytes_used
}*100)/$new->{total_space
};
165 if (my $d = $stat->{perf_stat
}) {
166 $new->{commit_latency_ms
} = $d->{commit_latency_ms
};
167 $new->{apply_latency_ms
} = $d->{apply_latency_ms
};
171 my $osdmd = $osdmetadata->{$id};
172 if ($e->{type
} eq 'osd' && $osdmd) {
173 if ($osdmd->{bluefs
}) {
174 $new->{osdtype
} = 'bluestore';
175 $new->{blfsdev
} = $osdmd->{bluestore_bdev_dev_node
};
176 $new->{dbdev
} = $osdmd->{bluefs_db_dev_node
};
177 $new->{waldev
} = $osdmd->{bluefs_wal_dev_node
};
179 $new->{osdtype
} = 'filestore';
181 for my $field (qw(ceph_version ceph_version_short)) {
182 $new->{$field} = $osdmd->{$field} if $osdmd->{$field};
186 $newnodes->{$id} = $new;
189 foreach my $e (@{$res->{nodes
}}) {
190 my ($id, $name) = $e->@{qw(id name)};
191 my $new = $newnodes->{$id};
193 if ($e->{children
} && scalar(@{$e->{children
}})) {
194 $new->{children
} = [];
196 foreach my $cid (@{$e->{children
}}) {
197 $nodes->{$cid}->{parent
} = $id;
198 if ($nodes->{$cid}->{type
} eq 'osd' && $e->{type
} eq 'host') {
199 $newnodes->{$cid}->{host
} = $name;
201 push @{$new->{children
}}, $newnodes->{$cid};
204 $new->{leaf
} = ($id >= 0) ?
1 : 0;
207 if ($name && $e->{type
} eq 'host') {
208 $new->{version
} = $hostversions->{$name}->{version
}->{str
};
213 foreach my $e (@{$res->{nodes
}}) {
215 if (!$nodes->{$id}->{parent
}) {
216 push @$realroots, $newnodes->{$id};
220 die "no root node\n" if scalar(@$realroots) < 1;
225 children
=> $realroots
229 $data->{flags
} = $flags if $flags; # we want this for the noout flag
234 __PACKAGE__-
>register_method ({
238 description
=> "Create OSD",
242 additionalProperties
=> 0,
244 node
=> get_standard_option
('pve-node'),
246 description
=> "Block device name.",
250 description
=> "Block device name for block.db.",
255 description
=> "Size in GiB for block.db.",
256 verbose_description
=> "If a block.db is requested but the size is not given, ".
257 "will be automatically selected by: bluestore_block_db_size from the ".
258 "ceph database (osd or global section) or config (osd or global section)".
259 "in that order. If this is not available, it will be sized 10% of the size ".
260 "of the OSD device. Fails if the available size is not enough.",
263 default => 'bluestore_block_db_size or 10% of OSD size',
264 requires
=> 'db_dev',
268 description
=> "Block device name for block.wal.",
273 description
=> "Size in GiB for block.wal.",
274 verbose_description
=> "If a block.wal is requested but the size is not given, ".
275 "will be automatically selected by: bluestore_block_wal_size from the ".
276 "ceph database (osd or global section) or config (osd or global section)".
277 "in that order. If this is not available, it will be sized 1% of the size ".
278 "of the OSD device. Fails if the available size is not enough.",
281 default => 'bluestore_block_wal_size or 1% of OSD size',
282 requires
=> 'wal_dev',
289 description
=> "Enables encryption of the OSD."
291 'crush-device-class' => {
294 description
=> "Set the device class of the OSD in crush."
296 'osds-per-device' => {
300 description
=> 'OSD services per physical device. Only useful for fast ".
301 "NVME devices to utilize their performance better.',
305 returns
=> { type
=> 'string' },
309 my $rpcenv = PVE
::RPCEnvironment
::get
();
311 my $authuser = $rpcenv->get_user();
313 # test basic requirements
314 PVE
::Ceph
::Tools
::check_ceph_inited
();
315 PVE
::Ceph
::Tools
::setup_pve_symlinks
();
316 PVE
::Ceph
::Tools
::check_ceph_installed
('ceph_osd');
317 PVE
::Ceph
::Tools
::check_ceph_installed
('ceph_volume');
319 # extract parameter info and fail if a device is set more than once
322 # allow 'osds-per-device' only without dedicated db and/or wal devs. We cannot specify them with
323 # 'ceph-volume lvm batch' and they don't make a lot of sense on fast NVMEs anyway.
324 if ($param->{'osds-per-device'}) {
325 for my $type ( qw(db_dev wal_dev) ) {
326 raise_param_exc
({ $type => "canot use 'osds-per-device' parameter with '${type}'" })
331 my $ceph_conf = cfs_read_file
('ceph.conf');
333 my $osd_network = $ceph_conf->{global
}->{cluster_network
};
334 $osd_network //= $ceph_conf->{global
}->{public_network
}; # fallback
336 if ($osd_network) { # check only if something is configured
337 my $cluster_net_ips = PVE
::Network
::get_local_ip_from_cidr
($osd_network);
338 if (scalar(@$cluster_net_ips) < 1) {
339 my $osd_net_obj = PVE
::Network
::IP_from_cidr
($osd_network);
340 my $osd_base_cidr = $osd_net_obj->{ip
} . "/" . $osd_net_obj->{prefixlen
};
342 die "No address from ceph cluster network (${osd_base_cidr}) found on node '$nodename'. ".
343 "Check your network config.\n";
347 for my $type ( qw(dev db_dev wal_dev) ) {
348 next if !$param->{$type};
350 my $type_dev = PVE
::Diskmanage
::verify_blockdev_path
($param->{$type});
351 (my $type_devname = $type_dev) =~ s
|/dev/||;
353 raise_param_exc
({ $type => "cannot chose '$type_dev' for more than one type." })
354 if grep { $_->{name
} eq $type_devname } values %$devs;
358 name
=> $type_devname,
361 if (my $size = $param->{"${type}_size"}) {
362 $devs->{$type}->{size
} = PVE
::Tools
::convert_size
($size, 'gb' => 'b') ;
366 my $test_disk_requirements = sub {
369 my $dev = $devs->{dev
}->{dev
};
370 my $devname = $devs->{dev
}->{name
};
371 die "unable to get device info for '$dev'\n" if !$disklist->{$devname};
372 die "device '$dev' is already in use\n" if $disklist->{$devname}->{used
};
374 for my $type ( qw(db_dev wal_dev) ) {
375 my $d = $devs->{$type};
377 my $name = $d->{name
};
378 my $info = $disklist->{$name};
379 die "unable to get device info for '$d->{dev}' for type $type\n" if !$disklist->{$name};
380 if (my $usage = $info->{used
}) {
381 if ($usage eq 'partitions') {
382 die "device '$d->{dev}' is not GPT partitioned\n" if !$info->{gpt
};
383 } elsif ($usage ne 'LVM') {
384 die "device '$d->{dev}' is already in use and has no LVM on it\n";
391 # test disk requirements early
392 my $devlist = [ map { $_->{name
} } values %$devs ];
393 my $disklist = PVE
::Diskmanage
::get_disks
($devlist, 1, 1);
394 $test_disk_requirements->($disklist);
396 # get necessary ceph infos
397 my $rados = PVE
::RADOS-
>new();
398 my $monstat = $rados->mon_command({ prefix
=> 'quorum_status' });
400 my $ceph_bootstrap_osd_keyring = PVE
::Ceph
::Tools
::get_config
('ceph_bootstrap_osd_keyring');
402 if (! -f
$ceph_bootstrap_osd_keyring && $ceph_conf->{global
}->{auth_client_required
} eq 'cephx') {
403 my $bindata = $rados->mon_command({
404 prefix
=> 'auth get-or-create',
405 entity
=> 'client.bootstrap-osd',
407 'mon' => 'allow profile bootstrap-osd'
411 file_set_contents
($ceph_bootstrap_osd_keyring, $bindata);
415 my @udev_trigger_devs = ();
417 my $create_part_or_lv = sub {
418 my ($dev, $size, $type) = @_;
420 $size =~ m/^(\d+)$/ or die "invalid size '$size'\n";
423 die "'$dev->{devpath}' is smaller than requested size '$size' bytes\n"
424 if $dev->{size
} < $size;
426 # sgdisk and lvcreate can only sizes divisible by 512b
427 # so we round down to the nearest kb
428 $size = PVE
::Tools
::convert_size
($size, 'b' => 'kb', 1);
433 my $vg = "ceph-" . UUID
::uuid
();
434 my $lv = $type . "-" . UUID
::uuid
();
436 PVE
::Storage
::LVMPlugin
::lvm_create_volume_group
($dev->{devpath
}, $vg);
437 PVE
::Storage
::LVMPlugin
::lvcreate
($vg, $lv, "${size}k");
439 if (PVE
::Diskmanage
::is_partition
($dev->{devpath
})) {
440 eval { PVE
::Diskmanage
::change_parttype
($dev->{devpath
}, '8E00'); };
444 push @udev_trigger_devs, $dev->{devpath
};
448 } elsif ($dev->{used
} eq 'LVM') {
449 # check pv/vg and create lv
451 my $vgs = PVE
::Storage
::LVMPlugin
::lvm_vgs
(1);
453 for my $vgname ( sort keys %$vgs ) {
454 next if $vgname !~ /^ceph-/;
456 for my $pv ( @{$vgs->{$vgname}->{pvs
}} ) {
457 next if $pv->{name
} ne $dev->{devpath
};
464 die "no ceph vg found on '$dev->{devpath}'\n" if !$vg;
465 die "vg '$vg' has not enough free space\n" if $vgs->{$vg}->{free
} < $size;
467 my $lv = $type . "-" . UUID
::uuid
();
469 PVE
::Storage
::LVMPlugin
::lvcreate
($vg, $lv, "${size}k");
473 } elsif ($dev->{used
} eq 'partitions' && $dev->{gpt
}) {
474 # create new partition at the end
476 'osd-db' => '30CD0809-C2B2-499C-8879-2D6B78529876',
477 'osd-wal' => '5CE17FCE-4087-4169-B7FF-056CC58473F9',
480 my $part = PVE
::Diskmanage
::append_partition
($dev->{devpath
}, $size * 1024);
482 if (my $parttype = $parttypes->{$type}) {
483 eval { PVE
::Diskmanage
::change_parttype
($part, $parttype); };
487 push @udev_trigger_devs, $part;
491 die "cannot use '$dev->{devpath}' for '$type'\n";
497 PVE
::Diskmanage
::locked_disk_action
(sub {
498 # update disklist and re-test requirements
499 $disklist = PVE
::Diskmanage
::get_disks
($devlist, 1, 1);
500 $test_disk_requirements->($disklist);
502 my $dev_class = $param->{'crush-device-class'};
503 # create allows for detailed configuration of DB and WAL devices
504 # batch for easy creation of multiple OSDs (per device)
505 my $create_mode = $param->{'osds-per-device'} ?
'batch' : 'create';
506 my $cmd = ['ceph-volume', 'lvm', $create_mode ];
507 push @$cmd, '--crush-device-class', $dev_class if $dev_class;
509 my $devname = $devs->{dev
}->{name
};
510 my $devpath = $disklist->{$devname}->{devpath
};
511 print "create OSD on $devpath (bluestore)\n";
513 push @udev_trigger_devs, $devpath;
515 my $osd_size = $disklist->{$devname}->{size
};
517 db
=> int($osd_size / 10), # 10% of OSD
518 wal
=> int($osd_size / 100), # 1% of OSD
522 foreach my $type ( qw(db wal) ) {
523 my $fallback_size = $size_map->{$type};
524 my $d = $devs->{"${type}_dev"};
527 # size was not set via api, getting from config/fallback
528 if (!defined($d->{size
})) {
529 $sizes = PVE
::Ceph
::Tools
::get_db_wal_sizes
() if !$sizes;
530 $d->{size
} = $sizes->{$type} // $fallback_size;
532 print "creating block.$type on '$d->{dev}'\n";
533 my $name = $d->{name
};
534 my $part_or_lv = $create_part_or_lv->($disklist->{$name}, $d->{size
}, "osd-$type");
536 print "using '$part_or_lv' for block.$type\n";
537 push @$cmd, "--block.$type", $part_or_lv;
540 push @$cmd, '--data', $devpath if $create_mode eq 'create';
541 push @$cmd, '--dmcrypt' if $param->{encrypted
};
543 if ($create_mode eq 'batch') {
545 '--osds-per-device', $param->{'osds-per-device'},
551 PVE
::Diskmanage
::wipe_blockdev
($devpath);
553 if (PVE
::Diskmanage
::is_partition
($devpath)) {
554 eval { PVE
::Diskmanage
::change_parttype
($devpath, '8E00'); };
560 # FIXME: Remove once we depend on systemd >= v249.
561 # Work around udev bug https://github.com/systemd/systemd/issues/18525 to ensure the
562 # udev database is updated.
563 eval { run_command
(['udevadm', 'trigger', @udev_trigger_devs]); };
568 return $rpcenv->fork_worker('cephcreateosd', $devs->{dev
}->{name
}, $authuser, $worker);
571 my $OSD_DEV_RETURN_PROPS = {
574 enum
=> ['block', 'db', 'wal'],
575 description
=> 'Kind of OSD device',
579 description
=> 'Device node',
583 description
=> 'Physical disks used',
587 description
=> 'Size in bytes',
591 description
=> 'Discard support of the physical device',
595 description
=> 'Type of device. For example, hdd or ssd',
599 __PACKAGE__-
>register_method ({
603 permissions
=> { user
=> 'all' },
604 description
=> "OSD index.",
606 additionalProperties
=> 0,
608 node
=> get_standard_option
('pve-node'),
610 description
=> 'OSD ID',
621 links
=> [ { rel
=> 'child', href
=> "{name}" } ],
627 { name
=> 'metadata' },
628 { name
=> 'lv-info' },
634 __PACKAGE__-
>register_method ({
635 name
=> 'osddetails',
636 path
=> '{osdid}/metadata',
638 description
=> "Get OSD details",
642 check
=> ['perm', '/', [ 'Sys.Audit' ], any
=> 1],
645 additionalProperties
=> 0,
647 node
=> get_standard_option
('pve-node'),
649 description
=> 'OSD ID',
659 description
=> 'General information about the OSD',
663 description
=> 'Name of the host containing the OSD.',
667 description
=> 'ID of the OSD.',
671 description
=> 'Memory usage of the OSD service.',
675 description
=> "Path to the OSD's data directory.",
679 description
=> 'The type of object store used.',
683 description
=> 'OSD process ID.',
687 description
=> 'Ceph version of the OSD service.',
691 description
=> 'Address and port used to talk to clients and monitors.',
695 description
=> 'Address and port used to talk to other OSDs.',
699 description
=> 'Heartbeat address and port for clients and monitors.',
703 description
=> 'Heartbeat address and port for other OSDs.',
709 description
=> 'Array containing data about devices',
712 properties
=> $OSD_DEV_RETURN_PROPS,
720 PVE
::Ceph
::Tools
::check_ceph_inited
();
722 my $osdid = $param->{osdid
};
723 my $rados = PVE
::RADOS-
>new();
724 my $metadata = $rados->mon_command({ prefix
=> 'osd metadata', id
=> int($osdid) });
726 die "OSD '${osdid}' does not exists on host '${nodename}'\n"
727 if $nodename ne $metadata->{hostname
};
732 if ($line =~ m/^MainPID=([0-9]*)$/) {
740 "ceph-osd\@${osdid}.service",
744 run_command
($cmd, errmsg
=> 'fetching OSD PID and memory usage failed', outfunc
=> $parser);
746 my $osd_pss_memory = eval { get_proc_pss_from_pid
($pid) } // 0;
751 hostname
=> $metadata->{hostname
},
752 id
=> $metadata->{id
},
753 mem_usage
=> $osd_pss_memory,
754 osd_data
=> $metadata->{osd_data
},
755 osd_objectstore
=> $metadata->{osd_objectstore
},
757 version
=> "$metadata->{ceph_version_short} ($metadata->{ceph_release})",
758 front_addr
=> $metadata->{front_addr
},
759 back_addr
=> $metadata->{back_addr
},
760 hb_front_addr
=> $metadata->{hb_front_addr
},
761 hb_back_addr
=> $metadata->{hb_back_addr
},
765 $data->{devices
} = [];
768 my ($dev, $prefix, $device) = @_;
772 dev_node
=> $metadata->{"${prefix}_${dev}_dev_node"},
773 physical_device
=> $metadata->{"${prefix}_${dev}_devices"},
774 size
=> int($metadata->{"${prefix}_${dev}_size"}),
775 support_discard
=> int($metadata->{"${prefix}_${dev}_support_discard"}),
776 type
=> $metadata->{"${prefix}_${dev}_type"},
782 $get_data->("bdev", "bluestore", "block");
783 $get_data->("db", "bluefs", "db") if $metadata->{bluefs_dedicated_db
};
784 $get_data->("wal", "bluefs", "wal") if $metadata->{bluefs_dedicated_wal
};
789 __PACKAGE__-
>register_method ({
791 path
=> '{osdid}/lv-info',
793 description
=> "Get OSD volume details",
797 check
=> ['perm', '/', [ 'Sys.Audit' ], any
=> 1],
800 additionalProperties
=> 0,
802 node
=> get_standard_option
('pve-node'),
804 description
=> 'OSD ID',
808 description
=> 'OSD device type',
810 enum
=> ['block', 'db', 'wal'],
821 description
=> "Creation time as reported by `lvs`.",
825 description
=> 'Name of the logical volume (LV).',
829 description
=> 'Path to the logical volume (LV).',
833 description
=> 'Size of the logical volume (LV).',
837 description
=> 'UUID of the logical volume (LV).',
841 description
=> 'Name of the volume group (VG).',
848 PVE
::Ceph
::Tools
::check_ceph_inited
();
850 my $osdid = $param->{osdid
};
851 my $type = $param->{type
} // 'block';
854 my $parser = sub { $raw .= shift };
855 my $cmd = ['/usr/sbin/ceph-volume', 'lvm', 'list', $osdid, '--format', 'json'];
856 run_command
($cmd, errmsg
=> 'listing Ceph LVM volumes failed', outfunc
=> $parser);
859 if ($raw =~ m/^(\{.*\})$/s) { #untaint
860 $result = JSON
::decode_json
($1);
862 die "got unexpected data from ceph-volume: '${raw}'\n";
864 if (!$result->{$osdid}) {
865 die "OSD '${osdid}' not found in 'ceph-volume lvm list' on node '${nodename}'.\n"
866 ."Maybe it was created before LVM became the default?\n";
869 my $lv_data = { map { $_->{type
} => $_ } @{$result->{$osdid}} };
870 my $volume = $lv_data->{$type} || die "volume type '${type}' not found for OSD ${osdid}\n";
873 $cmd = ['/sbin/lvs', $volume->{lv_path
}, '--reportformat', 'json', '-o', 'lv_time'];
874 run_command
($cmd, errmsg
=> 'listing logical volumes failed', outfunc
=> $parser);
876 if ($raw =~ m/(\{.*\})$/s) { #untaint, lvs has whitespace at beginning
877 $result = JSON
::decode_json
($1);
879 die "got unexpected data from lvs: '${raw}'\n";
882 my $data = { map { $_ => $volume->{$_} } qw(lv_name lv_path lv_uuid vg_name) };
883 $data->{lv_size
} = int($volume->{lv_size
});
885 $data->{creation_time
} = @{$result->{report
}}[0]->{lv
}[0]->{lv_time
};
890 # Check if $osdid belongs to $nodename
891 # $tree ... rados osd tree (passing the tree makes it easy to test)
892 sub osd_belongs_to_node
{
893 my ($tree, $nodename, $osdid) = @_;
894 return 0 if !($tree && $tree->{nodes
});
897 for my $el (grep { defined($_->{type
}) && $_->{type
} eq 'host' } @{$tree->{nodes
}}) {
898 my $name = $el->{name
};
899 die "internal error: duplicate host name found '$name'\n" if $node_map->{$name};
900 $node_map->{$name} = $el;
903 my $osds = $node_map->{$nodename}->{children
};
906 return grep($_ == $osdid, @$osds);
909 __PACKAGE__-
>register_method ({
910 name
=> 'destroyosd',
913 description
=> "Destroy OSD",
917 additionalProperties
=> 0,
919 node
=> get_standard_option
('pve-node'),
921 description
=> 'OSD ID',
925 description
=> "If set, we remove partition table entries.",
932 returns
=> { type
=> 'string' },
936 my $rpcenv = PVE
::RPCEnvironment
::get
();
938 my $authuser = $rpcenv->get_user();
940 PVE
::Ceph
::Tools
::check_ceph_inited
();
942 my $osdid = $param->{osdid
};
943 my $cleanup = $param->{cleanup
};
945 my $rados = PVE
::RADOS-
>new();
947 my $osd_belongs_to_node = osd_belongs_to_node
(
948 $rados->mon_command({ prefix
=> 'osd tree' }),
952 die "OSD osd.$osdid does not belong to node $param->{node}!"
953 if !$osd_belongs_to_node;
955 # dies if osdid is unknown
956 my $osdstat = $get_osd_status->($rados, $osdid);
958 die "osd is in use (in == 1)\n" if $osdstat->{in};
959 #&$run_ceph_cmd(['osd', 'out', $osdid]);
961 die "osd is still running (up == 1)\n" if $osdstat->{up
};
963 my $osdsection = "osd.$osdid";
968 # reopen with longer timeout
969 $rados = PVE
::RADOS-
>new(timeout
=> PVE
::Ceph
::Tools
::get_config
('long_rados_timeout'));
971 print "destroy OSD $osdsection\n";
974 PVE
::Ceph
::Services
::ceph_service_cmd
('stop', $osdsection);
975 PVE
::Ceph
::Services
::ceph_service_cmd
('disable', $osdsection);
979 print "Remove $osdsection from the CRUSH map\n";
980 $rados->mon_command({ prefix
=> "osd crush remove", name
=> $osdsection, format
=> 'plain' });
982 print "Remove the $osdsection authentication key.\n";
983 $rados->mon_command({ prefix
=> "auth del", entity
=> $osdsection, format
=> 'plain' });
985 print "Remove OSD $osdsection\n";
986 $rados->mon_command({ prefix
=> "osd rm", ids
=> [ $osdsection ], format
=> 'plain' });
988 # try to unmount from standard mount point
989 my $mountpoint = "/var/lib/ceph/osd/ceph-$osdid";
992 my $udev_trigger_devs = {};
994 my $remove_partition = sub {
997 return if !$part || (! -b
$part );
998 my $partnum = PVE
::Diskmanage
::get_partnum
($part);
999 my $devpath = PVE
::Diskmanage
::get_blockdev
($part);
1001 $udev_trigger_devs->{$devpath} = 1;
1003 PVE
::Diskmanage
::wipe_blockdev
($part);
1004 print "remove partition $part (disk '${devpath}', partnum $partnum)\n";
1005 eval { run_command
(['/sbin/sgdisk', '-d', $partnum, "${devpath}"]); };
1009 my $osd_list = PVE
::Ceph
::Tools
::ceph_volume_list
();
1011 if ($osd_list->{$osdid}) { # ceph-volume managed
1013 eval { PVE
::Ceph
::Tools
::ceph_volume_zap
($osdid, $cleanup) };
1017 # try to remove pvs, but do not fail if it does not work
1018 for my $osd_part (@{$osd_list->{$osdid}}) {
1019 for my $dev (@{$osd_part->{devices
}}) {
1020 ($dev) = ($dev =~ m
|^(/dev/[-_
.a-zA-Z0-9\
/]+)$|); #untaint
1022 eval { run_command
(['/sbin/pvremove', $dev], errfunc
=> sub {}) };
1025 $udev_trigger_devs->{$dev} = 1;
1030 my $partitions_to_remove = [];
1032 if (my $mp = PVE
::ProcFSTools
::parse_proc_mounts
()) {
1033 foreach my $line (@$mp) {
1034 my ($dev, $path, $fstype) = @$line;
1035 next if !($dev && $path && $fstype);
1036 next if $dev !~ m
|^/dev/|;
1038 if ($path eq $mountpoint) {
1039 abs_path
($dev) =~ m
|^(/.+)| or die "invalid dev: $dev\n";
1040 push @$partitions_to_remove, $1;
1046 foreach my $path (qw(journal block block.db block.wal)) {
1047 abs_path
("$mountpoint/$path") =~ m
|^(/.+)| or die "invalid path: $path\n";
1048 push @$partitions_to_remove, $1;
1052 print "Unmount OSD $osdsection from $mountpoint\n";
1053 eval { run_command
(['/bin/umount', $mountpoint]); };
1056 } elsif ($cleanup) {
1057 #be aware of the ceph udev rules which can remount.
1058 foreach my $part (@$partitions_to_remove) {
1059 $remove_partition->($part);
1064 # FIXME: Remove once we depend on systemd >= v249.
1065 # Work around udev bug https://github.com/systemd/systemd/issues/18525 to ensure the
1066 # udev database is updated.
1068 eval { run_command
(['udevadm', 'trigger', keys $udev_trigger_devs->%*]); };
1073 return $rpcenv->fork_worker('cephdestroyosd', $osdsection, $authuser, $worker);
1076 __PACKAGE__-
>register_method ({
1078 path
=> '{osdid}/in',
1080 description
=> "ceph osd in",
1084 check
=> ['perm', '/', [ 'Sys.Modify' ]],
1087 additionalProperties
=> 0,
1089 node
=> get_standard_option
('pve-node'),
1091 description
=> 'OSD ID',
1096 returns
=> { type
=> "null" },
1100 PVE
::Ceph
::Tools
::check_ceph_inited
();
1102 my $osdid = $param->{osdid
};
1104 my $rados = PVE
::RADOS-
>new();
1106 $get_osd_status->($rados, $osdid); # osd exists?
1108 my $osdsection = "osd.$osdid";
1110 $rados->mon_command({ prefix
=> "osd in", ids
=> [ $osdsection ], format
=> 'plain' });
1115 __PACKAGE__-
>register_method ({
1117 path
=> '{osdid}/out',
1119 description
=> "ceph osd out",
1123 check
=> ['perm', '/', [ 'Sys.Modify' ]],
1126 additionalProperties
=> 0,
1128 node
=> get_standard_option
('pve-node'),
1130 description
=> 'OSD ID',
1135 returns
=> { type
=> "null" },
1139 PVE
::Ceph
::Tools
::check_ceph_inited
();
1141 my $osdid = $param->{osdid
};
1143 my $rados = PVE
::RADOS-
>new();
1145 $get_osd_status->($rados, $osdid); # osd exists?
1147 my $osdsection = "osd.$osdid";
1149 $rados->mon_command({ prefix
=> "osd out", ids
=> [ $osdsection ], format
=> 'plain' });
1154 __PACKAGE__-
>register_method ({
1156 path
=> '{osdid}/scrub',
1158 description
=> "Instruct the OSD to scrub.",
1162 check
=> ['perm', '/', [ 'Sys.Modify' ]],
1165 additionalProperties
=> 0,
1167 node
=> get_standard_option
('pve-node'),
1169 description
=> 'OSD ID',
1173 description
=> 'If set, instructs a deep scrub instead of a normal one.',
1180 returns
=> { type
=> "null" },
1184 PVE
::Ceph
::Tools
::check_ceph_inited
();
1186 my $osdid = $param->{osdid
};
1187 my $deep = $param->{deep
} // 0;
1189 my $rados = PVE
::RADOS-
>new();
1191 $get_osd_status->($rados, $osdid); # osd exists?
1193 my $prefix = $deep ?
'osd deep-scrub' : 'osd scrub';
1194 $rados->mon_command({ prefix
=> $prefix, who
=> $osdid });