]> git.proxmox.com Git - pve-manager.git/blob - PVE/API2/Ceph/Pools.pm
ceph pools: allow to create erasure code pools
[pve-manager.git] / PVE / API2 / Ceph / Pools.pm
1 package PVE::API2::Ceph::Pools;
2
3 use strict;
4 use warnings;
5
6 use PVE::Ceph::Tools;
7 use PVE::Ceph::Services;
8 use PVE::JSONSchema qw(get_standard_option);
9 use PVE::RADOS;
10 use PVE::RESTHandler;
11 use PVE::RPCEnvironment;
12 use PVE::Storage;
13 use PVE::Tools qw(extract_param);
14
15 use PVE::API2::Storage::Config;
16
17 use base qw(PVE::RESTHandler);
18
19 my $get_autoscale_status = sub {
20 my ($rados) = shift;
21
22 $rados = PVE::RADOS->new() if !defined($rados);
23
24 my $autoscale = $rados->mon_command({
25 prefix => 'osd pool autoscale-status'});
26
27 my $data;
28 foreach my $p (@$autoscale) {
29 $data->{$p->{pool_name}} = $p;
30 }
31
32 return $data;
33 };
34
35
36 __PACKAGE__->register_method ({
37 name => 'lspools',
38 path => '',
39 method => 'GET',
40 description => "List all pools.",
41 proxyto => 'node',
42 protected => 1,
43 permissions => {
44 check => ['perm', '/', [ 'Sys.Audit', 'Datastore.Audit' ], any => 1],
45 },
46 parameters => {
47 additionalProperties => 0,
48 properties => {
49 node => get_standard_option('pve-node'),
50 },
51 },
52 returns => {
53 type => 'array',
54 items => {
55 type => "object",
56 properties => {
57 pool => {
58 type => 'integer',
59 title => 'ID',
60 },
61 pool_name => {
62 type => 'string',
63 title => 'Name',
64 },
65 size => {
66 type => 'integer',
67 title => 'Size',
68 },
69 min_size => {
70 type => 'integer',
71 title => 'Min Size',
72 },
73 pg_num => {
74 type => 'integer',
75 title => 'PG Num',
76 },
77 pg_num_min => {
78 type => 'integer',
79 title => 'min. PG Num',
80 optional => 1,
81 },
82 pg_num_final => {
83 type => 'integer',
84 title => 'Optimal PG Num',
85 optional => 1,
86 },
87 pg_autoscale_mode => {
88 type => 'string',
89 title => 'PG Autoscale Mode',
90 optional => 1,
91 },
92 crush_rule => {
93 type => 'integer',
94 title => 'Crush Rule',
95 },
96 crush_rule_name => {
97 type => 'string',
98 title => 'Crush Rule Name',
99 },
100 percent_used => {
101 type => 'number',
102 title => '%-Used',
103 },
104 bytes_used => {
105 type => 'integer',
106 title => 'Used',
107 },
108 target_size => {
109 type => 'integer',
110 title => 'PG Autoscale Target Size',
111 optional => 1,
112 },
113 target_size_ratio => {
114 type => 'number',
115 title => 'PG Autoscale Target Ratio',
116 optional => 1,
117 },
118 autoscale_status => {
119 type => 'object',
120 title => 'Autoscale Status',
121 optional => 1,
122 },
123 },
124 },
125 links => [ { rel => 'child', href => "{pool_name}" } ],
126 },
127 code => sub {
128 my ($param) = @_;
129
130 PVE::Ceph::Tools::check_ceph_inited();
131
132 my $rados = PVE::RADOS->new();
133
134 my $stats = {};
135 my $res = $rados->mon_command({ prefix => 'df' });
136
137 foreach my $d (@{$res->{pools}}) {
138 next if !$d->{stats};
139 next if !defined($d->{id});
140 $stats->{$d->{id}} = $d->{stats};
141 }
142
143 $res = $rados->mon_command({ prefix => 'osd dump' });
144 my $rulestmp = $rados->mon_command({ prefix => 'osd crush rule dump'});
145
146 my $rules = {};
147 for my $rule (@$rulestmp) {
148 $rules->{$rule->{rule_id}} = $rule->{rule_name};
149 }
150
151 my $data = [];
152 my $attr_list = [
153 'pool',
154 'pool_name',
155 'size',
156 'min_size',
157 'pg_num',
158 'crush_rule',
159 'pg_autoscale_mode',
160 ];
161
162 # pg_autoscaler module is not enabled in Nautilus
163 my $autoscale = eval { $get_autoscale_status->($rados) };
164
165 foreach my $e (@{$res->{pools}}) {
166 my $d = {};
167 foreach my $attr (@$attr_list) {
168 $d->{$attr} = $e->{$attr} if defined($e->{$attr});
169 }
170
171 if ($autoscale) {
172 $d->{autoscale_status} = $autoscale->{$d->{pool_name}};
173 $d->{pg_num_final} = $d->{autoscale_status}->{pg_num_final};
174 # some info is nested under options instead
175 $d->{pg_num_min} = $e->{options}->{pg_num_min};
176 $d->{target_size} = $e->{options}->{target_size_bytes};
177 $d->{target_size_ratio} = $e->{options}->{target_size_ratio};
178 }
179
180 if (defined($d->{crush_rule}) && defined($rules->{$d->{crush_rule}})) {
181 $d->{crush_rule_name} = $rules->{$d->{crush_rule}};
182 }
183
184 if (my $s = $stats->{$d->{pool}}) {
185 $d->{bytes_used} = $s->{bytes_used};
186 $d->{percent_used} = $s->{percent_used};
187 }
188 push @$data, $d;
189 }
190
191
192 return $data;
193 }});
194
195
196 my $ceph_pool_common_options = sub {
197 my ($nodefault) = shift;
198 my $options = {
199 name => {
200 title => 'Name',
201 description => "The name of the pool. It must be unique.",
202 type => 'string',
203 },
204 size => {
205 title => 'Size',
206 description => 'Number of replicas per object',
207 type => 'integer',
208 default => 3,
209 optional => 1,
210 minimum => 1,
211 maximum => 7,
212 },
213 min_size => {
214 title => 'Min Size',
215 description => 'Minimum number of replicas per object',
216 type => 'integer',
217 default => 2,
218 optional => 1,
219 minimum => 1,
220 maximum => 7,
221 },
222 pg_num => {
223 title => 'PG Num',
224 description => "Number of placement groups.",
225 type => 'integer',
226 default => 128,
227 optional => 1,
228 minimum => 1,
229 maximum => 32768,
230 },
231 pg_num_min => {
232 title => 'min. PG Num',
233 description => "Minimal number of placement groups.",
234 type => 'integer',
235 optional => 1,
236 maximum => 32768,
237 },
238 crush_rule => {
239 title => 'Crush Rule Name',
240 description => "The rule to use for mapping object placement in the cluster.",
241 type => 'string',
242 optional => 1,
243 },
244 application => {
245 title => 'Application',
246 description => "The application of the pool.",
247 default => 'rbd',
248 type => 'string',
249 enum => ['rbd', 'cephfs', 'rgw'],
250 optional => 1,
251 },
252 pg_autoscale_mode => {
253 title => 'PG Autoscale Mode',
254 description => "The automatic PG scaling mode of the pool.",
255 type => 'string',
256 enum => ['on', 'off', 'warn'],
257 default => 'warn',
258 optional => 1,
259 },
260 target_size => {
261 description => "The estimated target size of the pool for the PG autoscaler.",
262 title => 'PG Autoscale Target Size',
263 type => 'string',
264 pattern => '^(\d+(\.\d+)?)([KMGT])?$',
265 optional => 1,
266 },
267 target_size_ratio => {
268 description => "The estimated target ratio of the pool for the PG autoscaler.",
269 title => 'PG Autoscale Target Ratio',
270 type => 'number',
271 optional => 1,
272 },
273 };
274
275 if ($nodefault) {
276 delete $options->{$_}->{default} for keys %$options;
277 }
278 return $options;
279 };
280
281
282 my $add_storage = sub {
283 my ($pool, $storeid, $data_pool) = @_;
284
285 my $storage_params = {
286 type => 'rbd',
287 pool => $pool,
288 storage => $storeid,
289 krbd => 0,
290 content => 'rootdir,images',
291 };
292
293 $storage_params->{'data-pool'} = $data_pool if $data_pool;
294
295 PVE::API2::Storage::Config->create($storage_params);
296 };
297
298 my $get_storages = sub {
299 my ($pool) = @_;
300
301 my $cfg = PVE::Storage::config();
302
303 my $storages = $cfg->{ids};
304 my $res = {};
305 foreach my $storeid (keys %$storages) {
306 my $curr = $storages->{$storeid};
307 next if $curr->{type} ne 'rbd';
308 if (
309 $pool eq $curr->{pool} ||
310 (defined $curr->{'data-pool'} && $pool eq $curr->{'data-pool'})
311 ) {
312 $res->{$storeid} = $storages->{$storeid};
313 }
314 }
315
316 return $res;
317 };
318
319
320 __PACKAGE__->register_method ({
321 name => 'createpool',
322 path => '',
323 method => 'POST',
324 description => "Create POOL",
325 proxyto => 'node',
326 protected => 1,
327 permissions => {
328 check => ['perm', '/', [ 'Sys.Modify' ]],
329 },
330 parameters => {
331 additionalProperties => 0,
332 properties => {
333 node => get_standard_option('pve-node'),
334 add_storages => {
335 description => "Configure VM and CT storage using the new pool. ".
336 "Always enabled for erasure coded pools.",
337 type => 'boolean',
338 optional => 1,
339 },
340 k => {
341 type => 'integer',
342 description => "Number of data chunks. Will create an erasure coded pool plus a ".
343 "replicated pool for metadata.",
344 optional => 1,
345 },
346 m => {
347 type => 'integer',
348 description => "Number of coding chunks. Will create an erasure coded pool plus a ".
349 "replicated pool for metadata.",
350 optional => 1,
351 },
352 'failure-domain' => {
353 type => 'string',
354 description => "CRUSH failure domain. Default is 'host'. Will create an erasure ".
355 "coded pool plus a replicated pool for metadata.",
356 optional => 1,
357 },
358 'device-class' => {
359 type => 'string',
360 description => "CRUSH device class. Will create an erasure coded pool plus a ".
361 "replicated pool for metadata.",
362 optional => 1,
363 },
364 ecprofile => {
365 description => "Override the erasure code (EC) profile to use. Will create an ".
366 "erasure coded pool plus a replicated pool for metadata.",
367 type => 'string',
368 optional => 1,
369 },
370 %{ $ceph_pool_common_options->() },
371 },
372 },
373 returns => { type => 'string' },
374 code => sub {
375 my ($param) = @_;
376
377 PVE::Cluster::check_cfs_quorum();
378 PVE::Ceph::Tools::check_ceph_configured();
379
380 my $pool = my $name = extract_param($param, 'name');
381 my $node = extract_param($param, 'node');
382 my $add_storages = extract_param($param, 'add_storages');
383
384 my $ec_k = extract_param($param, 'k');
385 my $ec_m = extract_param($param, 'm');
386 my $ec_failure_domain = extract_param($param, 'failure-domain');
387 my $ec_device_class = extract_param($param, 'device-class');
388
389 my $is_ec = 0;
390
391 my $ecprofile = extract_param($param, 'ecprofile');
392 die "Erasure code profile '$ecprofile' does not exist.\n"
393 if $ecprofile && !PVE::Ceph::Tools::ecprofile_exists($ecprofile);
394
395 if ($ec_k || $ec_m || $ec_failure_domain || $ec_device_class) {
396 die "'k' and 'm' parameters are needed for an erasure coded pool\n"
397 if !$ec_k || !$ec_m;
398
399 $is_ec = 1;
400 }
401
402 $is_ec = 1 if $ecprofile;
403 $add_storages = 1 if $is_ec;
404
405 my $rpcenv = PVE::RPCEnvironment::get();
406 my $user = $rpcenv->get_user();
407
408 # Ceph uses target_size_bytes
409 if (defined($param->{'target_size'})) {
410 my $target_sizestr = extract_param($param, 'target_size');
411 $param->{target_size_bytes} = PVE::JSONSchema::parse_size($target_sizestr);
412 }
413
414 if ($add_storages) {
415 $rpcenv->check($user, '/storage', ['Datastore.Allocate']);
416 die "pool name contains characters which are illegal for storage naming\n"
417 if !PVE::JSONSchema::parse_storage_id($pool);
418 }
419
420 # pool defaults
421 $param->{pg_num} //= 128;
422 $param->{size} //= 3;
423 $param->{min_size} //= 2;
424 $param->{application} //= 'rbd';
425 $param->{pg_autoscale_mode} //= 'warn';
426
427 my $data_param = {};
428 my $data_pool = '';
429 if (!$ecprofile) {
430 $ecprofile = PVE::Ceph::Tools::get_ecprofile_name($pool);
431 eval {
432 PVE::Ceph::Tools::create_ecprofile(
433 $ecprofile,
434 $ec_k,
435 $ec_m,
436 $ec_failure_domain,
437 $ec_device_class,
438 );
439 };
440 die "could not create erasure code profile '$ecprofile': $@\n" if $@;
441 }
442
443 if ($is_ec) {
444 # copy all params, should be a flat hash
445 $data_param = { map { $_ => $param->{$_} } keys %$param };
446
447 $data_param->{pool_type} = 'erasure';
448 $data_param->{allow_ec_overwrites} = 'true';
449 $data_param->{erasure_code_profile} = $ecprofile;
450 delete $data_param->{size};
451 delete $data_param->{min_size};
452
453 # metadata pool should be ok with 32 PGs
454 $param->{pg_num} = 32;
455
456 $pool = "${name}-metadata";
457 $data_pool = "${name}-data";
458 }
459
460 my $worker = sub {
461 PVE::Ceph::Tools::create_pool($pool, $param);
462
463 PVE::Ceph::Tools::create_pool($data_pool, $data_param) if $is_ec;
464
465 if ($add_storages) {
466 eval { $add_storage->($pool, "${name}", $data_pool) };
467 die "adding PVE storage for ceph pool '$name' failed: $@\n" if $@;
468 }
469 };
470
471 return $rpcenv->fork_worker('cephcreatepool', $pool, $user, $worker);
472 }});
473
474
475 __PACKAGE__->register_method ({
476 name => 'destroypool',
477 path => '{name}',
478 method => 'DELETE',
479 description => "Destroy pool",
480 proxyto => 'node',
481 protected => 1,
482 permissions => {
483 check => ['perm', '/', [ 'Sys.Modify' ]],
484 },
485 parameters => {
486 additionalProperties => 0,
487 properties => {
488 node => get_standard_option('pve-node'),
489 name => {
490 description => "The name of the pool. It must be unique.",
491 type => 'string',
492 },
493 force => {
494 description => "If true, destroys pool even if in use",
495 type => 'boolean',
496 optional => 1,
497 default => 0,
498 },
499 remove_storages => {
500 description => "Remove all pveceph-managed storages configured for this pool",
501 type => 'boolean',
502 optional => 1,
503 default => 0,
504 },
505 remove_ecprofile => {
506 description => "Remove the erasure code profile. Used for erasure code pools. Default is true",
507 type => 'boolean',
508 optional => 1,
509 default => 1,
510 },
511 },
512 },
513 returns => { type => 'string' },
514 code => sub {
515 my ($param) = @_;
516
517 PVE::Ceph::Tools::check_ceph_inited();
518
519 my $rpcenv = PVE::RPCEnvironment::get();
520 my $user = $rpcenv->get_user();
521 $rpcenv->check($user, '/storage', ['Datastore.Allocate'])
522 if $param->{remove_storages};
523
524 my $pool = $param->{name};
525 my $remove_ecprofile = $param->{remove_ecprofile} // 1;
526
527 my $worker = sub {
528 my $storages = $get_storages->($pool);
529
530 # if not forced, destroy ceph pool only when no
531 # vm disks are on it anymore
532 if (!$param->{force}) {
533 my $storagecfg = PVE::Storage::config();
534 foreach my $storeid (keys %$storages) {
535 my $storage = $storages->{$storeid};
536
537 # check if any vm disks are on the pool
538 print "checking storage '$storeid' for RBD images..\n";
539 my $res = PVE::Storage::vdisk_list($storagecfg, $storeid);
540 die "ceph pool '$pool' still in use by storage '$storeid'\n"
541 if @{$res->{$storeid}} != 0;
542 }
543 }
544
545 my $pool_properties = PVE::Ceph::Tools::get_pool_properties($pool);
546
547 PVE::Ceph::Tools::destroy_pool($pool);
548
549 if (my $ecprofile = $pool_properties->{erasure_code_profile}) {
550 my $crush_rule = $pool_properties->{crush_rule};
551 eval { PVE::Ceph::Tools::destroy_crush_rule($crush_rule); };
552 warn "removing crush rule '${crush_rule}' failed: $@\n" if $@;
553
554 if ($remove_ecprofile) {
555 eval { PVE::Ceph::Tools::destroy_ecprofile($ecprofile) };
556 warn "removing EC profile '${ecprofile}' failed: $@\n" if $@;
557 }
558 }
559
560 if ($param->{remove_storages}) {
561 my $err;
562 foreach my $storeid (keys %$storages) {
563 # skip external clusters, not managed by pveceph
564 next if $storages->{$storeid}->{monhost};
565 eval { PVE::API2::Storage::Config->delete({storage => $storeid}) };
566 if ($@) {
567 warn "failed to remove storage '$storeid': $@\n";
568 $err = 1;
569 }
570 }
571 die "failed to remove (some) storages - check log and remove manually!\n"
572 if $err;
573 }
574 };
575 return $rpcenv->fork_worker('cephdestroypool', $pool, $user, $worker);
576 }});
577
578
579 __PACKAGE__->register_method ({
580 name => 'setpool',
581 path => '{name}',
582 method => 'PUT',
583 description => "Change POOL settings",
584 proxyto => 'node',
585 protected => 1,
586 permissions => {
587 check => ['perm', '/', [ 'Sys.Modify' ]],
588 },
589 parameters => {
590 additionalProperties => 0,
591 properties => {
592 node => get_standard_option('pve-node'),
593 %{ $ceph_pool_common_options->('nodefault') },
594 },
595 },
596 returns => { type => 'string' },
597 code => sub {
598 my ($param) = @_;
599
600 PVE::Ceph::Tools::check_ceph_configured();
601
602 my $rpcenv = PVE::RPCEnvironment::get();
603 my $authuser = $rpcenv->get_user();
604
605 my $pool = extract_param($param, 'name');
606 my $node = extract_param($param, 'node');
607
608 # Ceph uses target_size_bytes
609 if (defined($param->{'target_size'})) {
610 my $target_sizestr = extract_param($param, 'target_size');
611 $param->{target_size_bytes} = PVE::JSONSchema::parse_size($target_sizestr);
612 }
613
614 my $worker = sub {
615 PVE::Ceph::Tools::set_pool($pool, $param);
616 };
617
618 return $rpcenv->fork_worker('cephsetpool', $pool, $authuser, $worker);
619 }});
620
621
622 __PACKAGE__->register_method ({
623 name => 'getpool',
624 path => '{name}',
625 method => 'GET',
626 description => "List pool settings.",
627 proxyto => 'node',
628 protected => 1,
629 permissions => {
630 check => ['perm', '/', [ 'Sys.Audit', 'Datastore.Audit' ], any => 1],
631 },
632 parameters => {
633 additionalProperties => 0,
634 properties => {
635 node => get_standard_option('pve-node'),
636 name => {
637 description => "The name of the pool. It must be unique.",
638 type => 'string',
639 },
640 verbose => {
641 type => 'boolean',
642 default => 0,
643 optional => 1,
644 description => "If enabled, will display additional data".
645 "(eg. statistics).",
646 },
647 },
648 },
649 returns => {
650 type => "object",
651 properties => {
652 id => { type => 'integer', title => 'ID' },
653 pgp_num => { type => 'integer', title => 'PGP num' },
654 noscrub => { type => 'boolean', title => 'noscrub' },
655 'nodeep-scrub' => { type => 'boolean', title => 'nodeep-scrub' },
656 nodelete => { type => 'boolean', title => 'nodelete' },
657 nopgchange => { type => 'boolean', title => 'nopgchange' },
658 nosizechange => { type => 'boolean', title => 'nosizechange' },
659 write_fadvise_dontneed => { type => 'boolean', title => 'write_fadvise_dontneed' },
660 hashpspool => { type => 'boolean', title => 'hashpspool' },
661 use_gmt_hitset => { type => 'boolean', title => 'use_gmt_hitset' },
662 fast_read => { type => 'boolean', title => 'Fast Read' },
663 application_list => { type => 'array', title => 'Application', optional => 1 },
664 statistics => { type => 'object', title => 'Statistics', optional => 1 },
665 autoscale_status => { type => 'object', title => 'Autoscale Status', optional => 1 },
666 %{ $ceph_pool_common_options->() },
667 },
668 },
669 code => sub {
670 my ($param) = @_;
671
672 PVE::Ceph::Tools::check_ceph_inited();
673
674 my $verbose = $param->{verbose};
675 my $pool = $param->{name};
676
677 my $rados = PVE::RADOS->new();
678 my $res = $rados->mon_command({
679 prefix => 'osd pool get',
680 pool => "$pool",
681 var => 'all',
682 });
683
684 my $data = {
685 id => $res->{pool_id},
686 name => $pool,
687 size => $res->{size},
688 min_size => $res->{min_size},
689 pg_num => $res->{pg_num},
690 pg_num_min => $res->{pg_num_min},
691 pgp_num => $res->{pgp_num},
692 crush_rule => $res->{crush_rule},
693 pg_autoscale_mode => $res->{pg_autoscale_mode},
694 noscrub => "$res->{noscrub}",
695 'nodeep-scrub' => "$res->{'nodeep-scrub'}",
696 nodelete => "$res->{nodelete}",
697 nopgchange => "$res->{nopgchange}",
698 nosizechange => "$res->{nosizechange}",
699 write_fadvise_dontneed => "$res->{write_fadvise_dontneed}",
700 hashpspool => "$res->{hashpspool}",
701 use_gmt_hitset => "$res->{use_gmt_hitset}",
702 fast_read => "$res->{fast_read}",
703 target_size => $res->{target_size_bytes},
704 target_size_ratio => $res->{target_size_ratio},
705 };
706
707 if ($verbose) {
708 my $stats;
709 my $res = $rados->mon_command({ prefix => 'df' });
710
711 # pg_autoscaler module is not enabled in Nautilus
712 # avoid partial read further down, use new rados instance
713 my $autoscale_status = eval { $get_autoscale_status->() };
714 $data->{autoscale_status} = $autoscale_status->{$pool};
715
716 foreach my $d (@{$res->{pools}}) {
717 next if !$d->{stats};
718 next if !defined($d->{name}) && !$d->{name} ne "$pool";
719 $data->{statistics} = $d->{stats};
720 }
721
722 my $apps = $rados->mon_command({ prefix => "osd pool application get", pool => "$pool", });
723 $data->{application_list} = [ keys %$apps ];
724 }
725
726 return $data;
727 }});
728
729
730 1;