]> git.proxmox.com Git - pve-manager.git/blob - PVE/Ceph/Tools.pm
ceph tools: add erasure code management functions
[pve-manager.git] / PVE / Ceph / Tools.pm
1 package PVE::Ceph::Tools;
2
3 use strict;
4 use warnings;
5
6 use File::Path;
7 use File::Basename;
8 use IO::File;
9 use JSON;
10
11 use PVE::Tools qw(run_command dir_glob_foreach);
12 use PVE::Cluster qw(cfs_read_file);
13 use PVE::RADOS;
14 use PVE::Ceph::Services;
15 use PVE::CephConfig;
16
17 my $ccname = 'ceph'; # ceph cluster name
18 my $ceph_cfgdir = "/etc/ceph";
19 my $pve_ceph_cfgpath = "/etc/pve/$ccname.conf";
20 my $ceph_cfgpath = "$ceph_cfgdir/$ccname.conf";
21
22 my $pve_mon_key_path = "/etc/pve/priv/$ccname.mon.keyring";
23 my $pve_ckeyring_path = "/etc/pve/priv/$ccname.client.admin.keyring";
24 my $ckeyring_path = "/etc/ceph/ceph.client.admin.keyring";
25 my $ceph_bootstrap_osd_keyring = "/var/lib/ceph/bootstrap-osd/$ccname.keyring";
26 my $ceph_bootstrap_mds_keyring = "/var/lib/ceph/bootstrap-mds/$ccname.keyring";
27 my $ceph_mds_data_dir = '/var/lib/ceph/mds';
28
29 my $ceph_service = {
30 ceph_bin => "/usr/bin/ceph",
31 ceph_mon => "/usr/bin/ceph-mon",
32 ceph_mgr => "/usr/bin/ceph-mgr",
33 ceph_osd => "/usr/bin/ceph-osd",
34 ceph_mds => "/usr/bin/ceph-mds",
35 ceph_volume => '/usr/sbin/ceph-volume',
36 };
37
38 my $config_hash = {
39 ccname => $ccname,
40 pve_ceph_cfgpath => $pve_ceph_cfgpath,
41 pve_mon_key_path => $pve_mon_key_path,
42 pve_ckeyring_path => $pve_ckeyring_path,
43 ceph_bootstrap_osd_keyring => $ceph_bootstrap_osd_keyring,
44 ceph_bootstrap_mds_keyring => $ceph_bootstrap_mds_keyring,
45 ceph_mds_data_dir => $ceph_mds_data_dir,
46 long_rados_timeout => 60,
47 ceph_cfgpath => $ceph_cfgpath,
48 };
49
50 sub get_local_version {
51 my ($noerr) = @_;
52
53 if (check_ceph_installed('ceph_bin', $noerr)) {
54 my $ceph_version;
55 run_command(
56 [ $ceph_service->{ceph_bin}, '--version' ],
57 noerr => $noerr,
58 outfunc => sub { $ceph_version = shift if !defined $ceph_version },
59 );
60 return undef if !defined $ceph_version;
61
62 if ($ceph_version =~ /^ceph.*\sv?(\d+(?:\.\d+)+(?:-pve\d+)?)\s+(?:\(([a-zA-Z0-9]+)\))?/) {
63 my ($version, $buildcommit) = ($1, $2);
64 my $subversions = [ split(/\.|-/, $version) ];
65
66 # return (version, buildid, major, minor, ...) : major;
67 return wantarray
68 ? ($version, $buildcommit, $subversions)
69 : $subversions->[0];
70 }
71 }
72
73 return undef;
74 }
75
76 sub get_cluster_versions {
77 my ($service, $noerr) = @_;
78
79 my $rados = PVE::RADOS->new();
80 my $cmd = $service ? "$service versions" : 'versions';
81 return $rados->mon_command({ prefix => $cmd });
82 }
83
84 sub get_config {
85 my $key = shift;
86
87 my $value = $config_hash->{$key};
88
89 die "no such ceph config '$key'" if !$value;
90
91 return $value;
92 }
93
94 sub purge_all_ceph_files {
95 my ($services) = @_;
96 my $is_local_mon;
97 my $monlist = [ split(',', PVE::CephConfig::get_monaddr_list($pve_ceph_cfgpath)) ];
98
99 foreach my $service (keys %$services) {
100 my $type = $services->{$service};
101 next if (!%$type);
102
103 foreach my $name (keys %$type) {
104 my $dir_exists = $type->{$name}->{direxists};
105
106 $is_local_mon = grep($type->{$name}->{addr}, @$monlist)
107 if $service eq 'mon';
108
109 my $path = "/var/lib/ceph/$service";
110 $path = '/var/log/ceph' if $service eq 'logs';
111 if ($dir_exists) {
112 my $err;
113 File::Path::remove_tree($path, {
114 keep_root => 1,
115 error => \$err,
116 });
117 warn "Error removing path, '$path'\n" if @$err;
118 }
119 }
120 }
121
122 if (scalar @$monlist > 0 && !$is_local_mon) {
123 warn "Foreign MON address in ceph.conf. Keeping config & keyrings\n"
124 } else {
125 print "Removing config & keyring files\n";
126 foreach my $file (%$config_hash) {
127 unlink $file if (-e $file);
128 }
129 }
130 }
131
132 sub purge_all_ceph_services {
133 my ($services) = @_;
134
135 foreach my $service (keys %$services) {
136 my $type = $services->{$service};
137 next if (!%$type);
138
139 foreach my $name (keys %$type) {
140 my $service_exists = $type->{$name}->{service};
141
142 if ($service_exists) {
143 eval { PVE::Ceph::Services::ceph_service_cmd('disable', "$service.$name") };
144 warn "Could not disable ceph-$service\@$name, error: $@\n" if $@;
145
146 eval { PVE::Ceph::Services::ceph_service_cmd('stop', "$service.$name") };
147 warn "Could not stop ceph-$service\@$name, error: $@\n" if $@;
148 }
149 }
150 }
151 }
152
153 sub ceph_install_flag_file { return '/run/pve-ceph-install-flag' };
154
155 sub check_ceph_installed {
156 my ($service, $noerr) = @_;
157
158 $service = 'ceph_bin' if !defined($service);
159
160 # NOTE: the flag file is checked as on a new installation, the binary gets
161 # extracted by dpkg before the installation is finished
162 if (! -x $ceph_service->{$service} || -f ceph_install_flag_file()) {
163 die "binary not installed: $ceph_service->{$service}\n" if !$noerr;
164 return undef;
165 }
166
167 return 1;
168 }
169
170
171 sub check_ceph_configured {
172
173 check_ceph_inited();
174
175 die "ceph not fully configured - missing '$pve_ckeyring_path'\n"
176 if ! -f $pve_ckeyring_path;
177
178 return 1;
179 }
180
181 sub check_ceph_inited {
182 my ($noerr) = @_;
183
184 return undef if !check_ceph_installed('ceph_mon', $noerr);
185
186 if (! -f $pve_ceph_cfgpath) {
187 die "pveceph configuration not initialized\n" if !$noerr;
188 return undef;
189 }
190
191 return 1;
192 }
193
194 sub check_ceph_enabled {
195 my ($noerr) = @_;
196
197 return undef if !check_ceph_inited($noerr);
198
199 if (! -f $ceph_cfgpath) {
200 die "pveceph configuration not enabled\n" if !$noerr;
201 return undef;
202 }
203
204 return 1;
205 }
206
207 my $set_pool_setting = sub {
208 my ($pool, $setting, $value) = @_;
209
210 my $command;
211 if ($setting eq 'application') {
212 $command = {
213 prefix => "osd pool application enable",
214 pool => "$pool",
215 app => "$value",
216 };
217 } else {
218 $command = {
219 prefix => "osd pool set",
220 pool => "$pool",
221 var => "$setting",
222 val => "$value",
223 format => 'plain',
224 };
225 }
226
227 my $rados = PVE::RADOS->new();
228 eval { $rados->mon_command($command); };
229 return $@ ? $@ : undef;
230 };
231
232 sub set_pool {
233 my ($pool, $param) = @_;
234
235 # by default, pool size always resets min_size, so set it as first item
236 # https://tracker.ceph.com/issues/44862
237 my $keys = [ grep { $_ ne 'size' } sort keys %$param ];
238 unshift @$keys, 'size' if exists $param->{size};
239
240 for my $setting (@$keys) {
241 my $value = $param->{$setting};
242
243 print "pool $pool: applying $setting = $value\n";
244 if (my $err = $set_pool_setting->($pool, $setting, $value)) {
245 print "$err";
246 } else {
247 delete $param->{$setting};
248 }
249 }
250
251 if (scalar(keys %$param) > 0) {
252 my $missing = join(', ', sort keys %$param );
253 die "Could not set: $missing\n";
254 }
255
256 }
257
258 sub get_pool_properties {
259 my ($pool) = @_;
260 my $command = {
261 prefix => "osd pool get",
262 pool => "$pool",
263 var => "all",
264 format => 'json',
265 };
266
267 my $rados = PVE::RADOS->new();
268 return $rados->mon_command($command);
269 }
270
271 sub create_pool {
272 my ($pool, $param, $rados) = @_;
273 $rados = PVE::RADOS->new() if !defined($rados);
274
275 my $pg_num = $param->{pg_num} || 128;
276
277 $rados->mon_command({
278 prefix => "osd pool create",
279 pool => $pool,
280 pg_num => int($pg_num),
281 format => 'plain',
282 });
283
284 set_pool($pool, $param);
285
286 }
287
288 sub ls_pools {
289 my ($pool, $rados) = @_;
290 $rados = PVE::RADOS->new() if !defined($rados);
291
292 my $res = $rados->mon_command({ prefix => "osd lspools" });
293
294 return $res;
295 }
296
297 sub destroy_pool {
298 my ($pool, $rados) = @_;
299 $rados = PVE::RADOS->new() if !defined($rados);
300
301 # fixme: '--yes-i-really-really-mean-it'
302 $rados->mon_command({
303 prefix => "osd pool delete",
304 pool => $pool,
305 pool2 => $pool,
306 'yes_i_really_really_mean_it' => JSON::true,
307 format => 'plain',
308 });
309 }
310
311 # we get something like:
312 #[{
313 # 'metadata_pool_id' => 2,
314 # 'data_pool_ids' => [ 1 ],
315 # 'metadata_pool' => 'cephfs_metadata',
316 # 'data_pools' => [ 'cephfs_data' ],
317 # 'name' => 'cephfs',
318 #}]
319 sub ls_fs {
320 my ($rados) = @_;
321 $rados = PVE::RADOS->new() if !defined($rados);
322
323 my $res = $rados->mon_command({ prefix => "fs ls" });
324
325 return $res;
326 }
327
328 sub create_fs {
329 my ($fs, $param, $rados) = @_;
330
331 if (!defined($rados)) {
332 $rados = PVE::RADOS->new();
333 }
334
335 $rados->mon_command({
336 prefix => "fs new",
337 fs_name => $fs,
338 metadata => $param->{pool_metadata},
339 data => $param->{pool_data},
340 format => 'plain',
341 });
342 }
343
344 sub destroy_fs {
345 my ($fs, $rados) = @_;
346 $rados = PVE::RADOS->new() if !defined($rados);
347
348 $rados->mon_command({
349 prefix => "fs rm",
350 fs_name => $fs,
351 'yes_i_really_mean_it' => JSON::true,
352 format => 'plain',
353 });
354 }
355
356 sub setup_pve_symlinks {
357 # fail if we find a real file instead of a link
358 if (-f $ceph_cfgpath) {
359 my $lnk = readlink($ceph_cfgpath);
360 die "file '$ceph_cfgpath' already exists and is not a symlink to $pve_ceph_cfgpath\n"
361 if !$lnk || $lnk ne $pve_ceph_cfgpath;
362 } else {
363 mkdir $ceph_cfgdir;
364 symlink($pve_ceph_cfgpath, $ceph_cfgpath) ||
365 die "unable to create symlink '$ceph_cfgpath' - $!\n";
366 }
367 my $ceph_uid = getpwnam('ceph');
368 my $ceph_gid = getgrnam('ceph');
369 chown $ceph_uid, $ceph_gid, $ceph_cfgdir;
370 }
371
372 sub get_or_create_admin_keyring {
373 if (! -f $pve_ckeyring_path) {
374 run_command("ceph-authtool --create-keyring $pve_ckeyring_path " .
375 "--gen-key -n client.admin " .
376 "--cap mon 'allow *' " .
377 "--cap osd 'allow *' " .
378 "--cap mds 'allow *' " .
379 "--cap mgr 'allow *' ");
380 # we do not want to overwrite it
381 if (! -f $ckeyring_path) {
382 run_command("cp $pve_ckeyring_path $ckeyring_path");
383 run_command("chown ceph:ceph $ckeyring_path");
384 }
385 }
386 return $pve_ckeyring_path;
387 }
388
389 # get ceph-volume managed osds
390 sub ceph_volume_list {
391 my $result = {};
392
393 if (!check_ceph_installed('ceph_volume', 1)) {
394 return $result;
395 }
396
397 my $output = '';
398 my $cmd = [ $ceph_service->{ceph_volume}, 'lvm', 'list', '--format', 'json' ];
399 run_command($cmd, outfunc => sub { $output .= shift });
400
401 $result = eval { decode_json($output) };
402 warn $@ if $@;
403 return $result;
404 }
405
406 sub ceph_volume_zap {
407 my ($osdid, $destroy) = @_;
408
409 die "no osdid given\n" if !defined($osdid);
410
411 my $cmd = [ $ceph_service->{ceph_volume}, 'lvm', 'zap', '--osd-id', $osdid ];
412 push @$cmd, '--destroy' if $destroy;
413
414 run_command($cmd);
415 }
416
417 sub get_db_wal_sizes {
418 my $res = {};
419
420 my $rados = PVE::RADOS->new();
421 my $db_config = $rados->mon_command({ prefix => 'config-key dump', key => 'config/' });
422
423 $res->{db} = $db_config->{"config/osd/bluestore_block_db_size"} //
424 $db_config->{"config/global/bluestore_block_db_size"};
425
426 $res->{wal} = $db_config->{"config/osd/bluestore_block_wal_size"} //
427 $db_config->{"config/global/bluestore_block_wal_size"};
428
429 if (!$res->{db} || !$res->{wal}) {
430 my $cfg = cfs_read_file('ceph.conf');
431 if (!$res->{db}) {
432 $res->{db} = $cfg->{osd}->{bluestore_block_db_size} //
433 $cfg->{global}->{bluestore_block_db_size};
434 }
435
436 if (!$res->{wal}) {
437 $res->{wal} = $cfg->{osd}->{bluestore_block_wal_size} //
438 $cfg->{global}->{bluestore_block_wal_size};
439 }
440 }
441
442 return $res;
443 }
444 sub get_possible_osd_flags {
445 my $possible_flags = {
446 pause => {
447 description => 'Pauses read and writes.',
448 type => 'boolean',
449 optional=> 1,
450 },
451 noup => {
452 description => 'OSDs are not allowed to start.',
453 type => 'boolean',
454 optional=> 1,
455 },
456 nodown => {
457 description => 'OSD failure reports are being ignored, such that the monitors will not mark OSDs down.',
458 type => 'boolean',
459 optional=> 1,
460 },
461 noout => {
462 description => 'OSDs will not automatically be marked out after the configured interval.',
463 type => 'boolean',
464 optional=> 1,
465 },
466 noin => {
467 description => 'OSDs that were previously marked out will not be marked back in when they start.',
468 type => 'boolean',
469 optional=> 1,
470 },
471 nobackfill => {
472 description => 'Backfilling of PGs is suspended.',
473 type => 'boolean',
474 optional=> 1,
475 },
476 norebalance => {
477 description => 'Rebalancing of PGs is suspended.',
478 type => 'boolean',
479 optional=> 1,
480 },
481 norecover => {
482 description => 'Recovery of PGs is suspended.',
483 type => 'boolean',
484 optional=> 1,
485 },
486 noscrub => {
487 description => 'Scrubbing is disabled.',
488 type => 'boolean',
489 optional=> 1,
490 },
491 'nodeep-scrub' => {
492 description => 'Deep Scrubbing is disabled.',
493 type => 'boolean',
494 optional=> 1,
495 },
496 notieragent => {
497 description => 'Cache tiering activity is suspended.',
498 type => 'boolean',
499 optional=> 1,
500 },
501 };
502 return $possible_flags;
503 }
504
505 sub get_real_flag_name {
506 my ($flag) = @_;
507
508 # the 'pause' flag gets always set to both 'pauserd' and 'pausewr'
509 # so decide that the 'pause' flag is set if we detect 'pauserd'
510 my $flagmap = {
511 'pause' => 'pauserd',
512 };
513
514 return $flagmap->{$flag} // $flag;
515 }
516
517 sub ceph_cluster_status {
518 my ($rados) = @_;
519 $rados = PVE::RADOS->new() if !$rados;
520
521 my $status = $rados->mon_command({ prefix => 'status' });
522 $status->{health} = $rados->mon_command({ prefix => 'health', detail => 'detail' });
523
524 if (!exists $status->{monmap}->{mons}) { # octopus moved most info out of status, re-add
525 $status->{monmap} = $rados->mon_command({ prefix => 'mon dump' });
526 $status->{mgrmap} = $rados->mon_command({ prefix => 'mgr dump' });
527 }
528
529 return $status;
530 }
531
532 sub ecprofile_exists {
533 my ($name) = @_;
534
535 my $rados = PVE::RADOS->new();
536 my $res = $rados->mon_command({ prefix => 'osd erasure-code-profile ls' });
537
538 my $profiles = { map { $_ => 1 } @$res };
539 return $profiles->{$name};
540 }
541
542 sub create_ecprofile {
543 my ($name, $k, $m, $failure_domain, $device_class) = @_;
544
545 $failure_domain = 'host' if !$failure_domain;
546
547 my $profile = [
548 "crush-failure-domain=${failure_domain}",
549 "k=${k}",
550 "m=${m}",
551 ];
552
553 push(@$profile, "crush-device-class=${device_class}") if $device_class;
554
555 my $rados = PVE::RADOS->new();
556 $rados->mon_command({
557 prefix => 'osd erasure-code-profile set',
558 name => $name,
559 profile => $profile,
560 });
561 }
562
563 sub destroy_ecprofile {
564 my ($profile) = @_;
565
566 my $rados = PVE::RADOS->new();
567 my $command = {
568 prefix => 'osd erasure-code-profile rm',
569 name => $profile,
570 format => 'plain',
571 };
572 return $rados->mon_command($command);
573 }
574
575 sub get_ecprofile_name {
576 my ($name) = @_;
577 return "pve_ec_${name}";
578 }
579
580 sub destroy_crush_rule {
581 my ($rule) = @_;
582 my $rados = PVE::RADOS->new();
583 my $command = {
584 prefix => 'osd crush rule rm',
585 name => $rule,
586 format => 'plain',
587 };
588 return $rados->mon_command($command);
589 }
590
591 1;