]> git.proxmox.com Git - pve-manager.git/blame - PVE/CephTools.pm
Raise the default pg_num to 128
[pve-manager.git] / PVE / CephTools.pm
CommitLineData
a34866f0
DM
1package PVE::CephTools;
2
3use strict;
4use warnings;
f8346b52 5
a34866f0 6use File::Path;
f8346b52 7use IO::File;
a34866f0 8
f8346b52 9use PVE::Tools qw(run_command dir_glob_foreach);
f96d7012 10use PVE::RADOS;
a34866f0
DM
11
12my $ccname = 'ceph'; # ceph cluster name
13my $ceph_cfgdir = "/etc/ceph";
14my $pve_ceph_cfgpath = "/etc/pve/$ccname.conf";
15my $ceph_cfgpath = "$ceph_cfgdir/$ccname.conf";
16
17my $pve_mon_key_path = "/etc/pve/priv/$ccname.mon.keyring";
18my $pve_ckeyring_path = "/etc/pve/priv/$ccname.client.admin.keyring";
19my $ceph_bootstrap_osd_keyring = "/var/lib/ceph/bootstrap-osd/$ccname.keyring";
20my $ceph_bootstrap_mds_keyring = "/var/lib/ceph/bootstrap-mds/$ccname.keyring";
b82649cc 21my $ceph_mds_data_dir = '/var/lib/ceph/mds';
a34866f0 22
c64c04dd
AA
23my $ceph_service = {
24 ceph_bin => "/usr/bin/ceph",
25 ceph_mon => "/usr/bin/ceph-mon",
26 ceph_mgr => "/usr/bin/ceph-mgr",
b82649cc
TL
27 ceph_osd => "/usr/bin/ceph-osd",
28 ceph_mds => "/usr/bin/ceph-mds",
c64c04dd 29};
a34866f0
DM
30
31my $config_hash = {
32 ccname => $ccname,
33 pve_ceph_cfgpath => $pve_ceph_cfgpath,
34 pve_mon_key_path => $pve_mon_key_path,
35 pve_ckeyring_path => $pve_ckeyring_path,
36 ceph_bootstrap_osd_keyring => $ceph_bootstrap_osd_keyring,
37 ceph_bootstrap_mds_keyring => $ceph_bootstrap_mds_keyring,
b82649cc 38 ceph_mds_data_dir => $ceph_mds_data_dir,
7d4fc5ef 39 long_rados_timeout => 60,
a34866f0
DM
40};
41
c64c04dd
AA
42sub get_local_version {
43 my ($noerr) = @_;
44
45 if (PVE::CephTools::check_ceph_installed('ceph_bin', $noerr)) {
46 my $ceph_version;
47 run_command([$ceph_service->{ceph_bin}, '--version'],
48 noerr => $noerr,
49 outfunc => sub { $ceph_version = shift; });
50 if ($ceph_version && $ceph_version =~ /^ceph.*\s((\d+)\.(\d+)\.(\d+))/) {
51 # return (version, major, minor, patch) : major;
52 return wantarray ? ($1, $2, $3, $4) : $2;
53 }
54 }
55
56 return undef;
57}
58
a34866f0
DM
59sub get_config {
60 my $key = shift;
61
62 my $value = $config_hash->{$key};
63
6f8bf83d 64 die "no such ceph config '$key'" if !$value;
a34866f0
DM
65
66 return $value;
67}
68
a34866f0
DM
69sub purge_all_ceph_files {
70 # fixme: this is very dangerous - should we really support this function?
71
72 unlink $ceph_cfgpath;
73
74 unlink $pve_ceph_cfgpath;
75 unlink $pve_ckeyring_path;
76 unlink $pve_mon_key_path;
77
78 unlink $ceph_bootstrap_osd_keyring;
79 unlink $ceph_bootstrap_mds_keyring;
80
81 system("rm -rf /var/lib/ceph/mon/ceph-*");
82
83 # remove osd?
84}
85
86sub check_ceph_installed {
c64c04dd
AA
87 my ($service, $noerr) = @_;
88
89 $service = 'ceph_bin' if !defined($service);
a34866f0 90
c64c04dd
AA
91 if (! -x $ceph_service->{$service}) {
92 die "binary not installed: $ceph_service->{$service}\n" if !$noerr;
a34866f0
DM
93 return undef;
94 }
95
96 return 1;
97}
98
99sub check_ceph_inited {
100 my ($noerr) = @_;
101
c64c04dd 102 return undef if !check_ceph_installed('ceph_bin', $noerr);
6f8bf83d 103
a34866f0
DM
104 if (! -f $pve_ceph_cfgpath) {
105 die "pveceph configuration not initialized\n" if !$noerr;
106 return undef;
107 }
108
109 return 1;
110}
111
112sub check_ceph_enabled {
113 my ($noerr) = @_;
114
115 return undef if !check_ceph_inited($noerr);
116
117 if (! -f $ceph_cfgpath) {
118 die "pveceph configuration not enabled\n" if !$noerr;
119 return undef;
120 }
121
122 return 1;
123}
124
125sub parse_ceph_config {
126 my ($filename) = @_;
127
128 $filename = $pve_ceph_cfgpath if !$filename;
129
130 my $cfg = {};
131
132 return $cfg if ! -f $filename;
133
134 my $fh = IO::File->new($filename, "r") ||
135 die "unable to open '$filename' - $!\n";
136
137 my $section;
138
139 while (defined(my $line = <$fh>)) {
140 $line =~ s/[;#].*$//;
141 $line =~ s/^\s+//;
142 $line =~ s/\s+$//;
143 next if !$line;
144
145 $section = $1 if $line =~ m/^\[(\S+)\]$/;
146 if (!$section) {
147 warn "no section - skip: $line\n";
148 next;
149 }
150
15a5cdd1 151 if ($line =~ m/^(.*?\S)\s*=\s*(\S.*)$/) {
a34866f0
DM
152 $cfg->{$section}->{$1} = $2;
153 }
154
155 }
156
157 return $cfg;
158}
159
160sub write_ceph_config {
161 my ($cfg) = @_;
162
163 my $out = '';
164
165 my $cond_write_sec = sub {
166 my $re = shift;
167
168 foreach my $section (keys %$cfg) {
169 next if $section !~ m/^$re$/;
170 $out .= "[$section]\n";
171 foreach my $key (sort keys %{$cfg->{$section}}) {
172 $out .= "\t $key = $cfg->{$section}->{$key}\n";
173 }
174 $out .= "\n";
175 }
176 };
177
178 &$cond_write_sec('global');
19924e77 179 &$cond_write_sec('client');
0fe9bdd5
DM
180 &$cond_write_sec('mds');
181 &$cond_write_sec('mds\..*');
a34866f0
DM
182 &$cond_write_sec('mon');
183 &$cond_write_sec('osd');
184 &$cond_write_sec('mon\..*');
185 &$cond_write_sec('osd\..*');
186
187 PVE::Tools::file_set_contents($pve_ceph_cfgpath, $out);
188}
189
f96d7012
TL
190sub create_pool {
191 my ($pool, $param, $rados) = @_;
192
193 if (!defined($rados)) {
194 $rados = PVE::RADOS->new();
195 }
196
197 my $pg_num = $param->{pg_num} || 64;
198 my $size = $param->{size} || 3;
199 my $min_size = $param->{min_size} || 2;
200 my $application = $param->{application} // 'rbd';
201
202 $rados->mon_command({
203 prefix => "osd pool create",
204 pool => $pool,
205 pg_num => int($pg_num),
206 format => 'plain',
207 });
208
209 $rados->mon_command({
210 prefix => "osd pool set",
211 pool => $pool,
212 var => 'min_size',
213 val => $min_size,
214 format => 'plain',
215 });
216
217 $rados->mon_command({
218 prefix => "osd pool set",
219 pool => $pool,
220 var => 'size',
221 val => $size,
222 format => 'plain',
223 });
224
225 if (defined($param->{crush_rule})) {
226 $rados->mon_command({
227 prefix => "osd pool set",
228 pool => $pool,
229 var => 'crush_rule',
230 val => $param->{crush_rule},
231 format => 'plain',
232 });
233 }
234
235 $rados->mon_command({
236 prefix => "osd pool application enable",
237 pool => $pool,
238 app => $application,
239 });
240
241}
242
7e1a9d25
TL
243sub ls_pools {
244 my ($pool, $rados) = @_;
245
246 if (!defined($rados)) {
247 $rados = PVE::RADOS->new();
248 }
249
250 my $res = $rados->mon_command({ prefix => "osd lspools" });
251
252 return $res;
253}
254
f96d7012
TL
255sub destroy_pool {
256 my ($pool, $rados) = @_;
257
258 if (!defined($rados)) {
259 $rados = PVE::RADOS->new();
260 }
261
262 # fixme: '--yes-i-really-really-mean-it'
263 $rados->mon_command({
264 prefix => "osd pool delete",
265 pool => $pool,
266 pool2 => $pool,
267 sure => '--yes-i-really-really-mean-it',
268 format => 'plain',
269 });
270}
271
a34866f0
DM
272sub setup_pve_symlinks {
273 # fail if we find a real file instead of a link
274 if (-f $ceph_cfgpath) {
275 my $lnk = readlink($ceph_cfgpath);
276 die "file '$ceph_cfgpath' already exists\n"
277 if !$lnk || $lnk ne $pve_ceph_cfgpath;
278 } else {
279 symlink($pve_ceph_cfgpath, $ceph_cfgpath) ||
280 die "unable to create symlink '$ceph_cfgpath' - $!\n";
281 }
282}
283
284sub ceph_service_cmd {
2bfacbcf
AD
285 my ($action, $service) = @_;
286
1aecf972 287 if (systemd_managed()) {
2bfacbcf 288
c05ff7b4 289 if ($service && $service =~ m/^(mon|osd|mds|mgr|radosgw)(\.([A-Za-z0-9\-]{1,32}))?$/) {
2bfacbcf
AD
290 $service = defined($3) ? "ceph-$1\@$3" : "ceph-$1.target";
291 } else {
292 $service = "ceph.target";
293 }
294
295 PVE::Tools::run_command(['/bin/systemctl', $action, $service]);
296
297 } else {
298 # ceph daemons does not call 'setsid', so we do that ourself
6f8bf83d 299 # (fork_worker send KILL to whole process group)
2bfacbcf
AD
300 PVE::Tools::run_command(['setsid', 'service', 'ceph', '-c', $pve_ceph_cfgpath, $action, $service]);
301 }
a34866f0
DM
302}
303
1aecf972
WL
304# Ceph versions greater Hammer use 'ceph' as user and group instead
305# of 'root', and use systemd.
306sub systemd_managed {
307
308 if (-f "/lib/systemd/system/ceph-osd\@.service") {
309 return 1;
310 } else {
311 return 0;
312 }
313}
314
b82649cc
TL
315sub list_local_mds_ids {
316 my $mds_list = [];
317
318 PVE::Tools::dir_glob_foreach($ceph_mds_data_dir, qr/$ccname-(\S+)/, sub {
319 my (undef, $mds_id) = @_;
320 push @$mds_list, $mds_id;
321 });
322
323 return $mds_list;
324}
325
326sub get_cluster_mds_state {
327 my ($rados) = @_;
328
329 my $mds_state = {};
330
331 if (!defined($rados)) {
332 $rados = PVE::RADOS->new();
333 }
334
335 my $add_state = sub {
336 my ($mds) = @_;
337
338 my $state = {};
339 $state->{addr} = $mds->{addr};
340 $state->{rank} = $mds->{rank};
341 $state->{standby_replay} = $mds->{standby_replay} ? 1 : 0;
342 $state->{state} = $mds->{state};
343
344 $mds_state->{$mds->{name}} = $state;
345 };
346
347 my $mds_dump = $rados->mon_command({ prefix => 'mds stat' });
348 my $fsmap = $mds_dump->{fsmap};
349
350
351 foreach my $mds (@{$fsmap->{standbys}}) {
352 $add_state->($mds);
353 }
354
355 my $fs_info = $fsmap->{filesystems}->[0];
356 my $active_mds = $fs_info->{mdsmap}->{info};
357
358 # normally there's only one active MDS, but we can have multiple active for
359 # different ranks (e.g., different cephs path hierarchy). So just add all.
360 foreach my $mds (values %$active_mds) {
361 $add_state->($mds);
362 }
363
364 return $mds_state;
365}
366
a62d7bd9
TL
367sub is_any_mds_active {
368 my ($rados) = @_;
369
370 if (!defined($rados)) {
371 $rados = PVE::RADOS->new();
372 }
373
374 my $mds_dump = $rados->mon_command({ prefix => 'mds stat' });
375 my $fs = $mds_dump->{fsmap}->{filesystems};
376
377 if (!($fs && scalar(@$fs) > 0)) {
378 return undef;
379 }
380 my $active_mds = $fs->[0]->{mdsmap}->{info};
381
195ae681
TL
382 for my $mds (values %$active_mds) {
383 return 1 if $mds->{state} eq 'up:active';
384 }
385
386 return 0;
a62d7bd9
TL
387}
388
b82649cc
TL
389sub create_mds {
390 my ($id, $rados) = @_;
391
392 # `ceph fs status` fails with numeric only ID.
393 die "ID: $id, numeric only IDs are not supported\n"
394 if $id =~ /^\d+$/;
395
396 if (!defined($rados)) {
397 $rados = PVE::RADOS->new();
398 }
399
400 my $service_dir = "/var/lib/ceph/mds/$ccname-$id";
401 my $service_keyring = "$service_dir/keyring";
402 my $service_name = "mds.$id";
403
404 die "ceph MDS directory '$service_dir' already exists\n"
405 if -d $service_dir;
406
407 print "creating MDS directory '$service_dir'\n";
408 eval { File::Path::mkpath($service_dir) };
409 my $err = $@;
410 die "creation MDS directory '$service_dir' failed\n" if $err;
411
412 # http://docs.ceph.com/docs/luminous/install/manual-deployment/#adding-mds
413 my $priv = [
414 mon => 'allow profile mds',
415 osd => 'allow rwx',
416 mds => 'allow *',
417 ];
418
419 print "creating keys for '$service_name'\n";
420 my $output = $rados->mon_command({
421 prefix => 'auth get-or-create',
422 entity => $service_name,
423 caps => $priv,
424 format => 'plain',
425 });
426
427 PVE::Tools::file_set_contents($service_keyring, $output);
428
429 print "setting ceph as owner for service directory\n";
430 run_command(["chown", 'ceph:ceph', '-R', $service_dir]);
431
432 print "enabling service 'ceph-mds\@$id.service'\n";
433 ceph_service_cmd('enable', $service_name);
434 print "starting service 'ceph-mds\@$id.service'\n";
435 ceph_service_cmd('start', $service_name);
436
437 return undef;
438};
439
440sub destroy_mds {
441 my ($id, $rados) = @_;
442
443 if (!defined($rados)) {
444 $rados = PVE::RADOS->new();
445 }
446
447 my $service_name = "mds.$id";
448 my $service_dir = "/var/lib/ceph/mds/$ccname-$id";
449
450 print "disabling service 'ceph-mds\@$id.service'\n";
451 ceph_service_cmd('disable', $service_name);
452 print "stopping service 'ceph-mds\@$id.service'\n";
453 ceph_service_cmd('stop', $service_name);
454
455 if (-d $service_dir) {
456 print "removing ceph-mds directory '$service_dir'\n";
457 File::Path::remove_tree($service_dir);
458 } else {
459 warn "cannot cleanup MDS $id directory, '$service_dir' not found\n"
460 }
461
462 print "removing ceph auth for '$service_name'\n";
463 $rados->mon_command({
464 prefix => 'auth del',
465 entity => $service_name,
466 format => 'plain'
467 });
468
469 return undef;
470};
471
456a7f4d
AA
472# wipe the first 200 MB to clear off leftovers from previous use, otherwise a
473# create OSD fails.
1343ae6d 474sub wipe_disks {
456a7f4d
AA
475 my (@devs) = @_;
476
477 my @wipe_cmd = qw(/bin/dd if=/dev/zero bs=1M count=200 conv=fdatasync);
a1a7aa74 478 foreach my $devpath (@devs) {
456a7f4d
AA
479 print "wipe disk: $devpath\n";
480 eval { run_command([@wipe_cmd, "of=${devpath}"]) };
481 warn $@ if $@;
482 }
483};
484
a34866f0 4851;