]>
Commit | Line | Data |
---|---|---|
a34866f0 DM |
1 | package PVE::CephTools; |
2 | ||
3 | use strict; | |
4 | use warnings; | |
f8346b52 | 5 | |
a34866f0 | 6 | use File::Path; |
f8346b52 | 7 | use IO::File; |
a34866f0 | 8 | |
f8346b52 | 9 | use PVE::Tools qw(run_command dir_glob_foreach); |
f96d7012 | 10 | use PVE::RADOS; |
a34866f0 DM |
11 | |
12 | my $ccname = 'ceph'; # ceph cluster name | |
13 | my $ceph_cfgdir = "/etc/ceph"; | |
14 | my $pve_ceph_cfgpath = "/etc/pve/$ccname.conf"; | |
15 | my $ceph_cfgpath = "$ceph_cfgdir/$ccname.conf"; | |
16 | ||
17 | my $pve_mon_key_path = "/etc/pve/priv/$ccname.mon.keyring"; | |
18 | my $pve_ckeyring_path = "/etc/pve/priv/$ccname.client.admin.keyring"; | |
19 | my $ceph_bootstrap_osd_keyring = "/var/lib/ceph/bootstrap-osd/$ccname.keyring"; | |
20 | my $ceph_bootstrap_mds_keyring = "/var/lib/ceph/bootstrap-mds/$ccname.keyring"; | |
b82649cc | 21 | my $ceph_mds_data_dir = '/var/lib/ceph/mds'; |
a34866f0 | 22 | |
c64c04dd AA |
23 | my $ceph_service = { |
24 | ceph_bin => "/usr/bin/ceph", | |
25 | ceph_mon => "/usr/bin/ceph-mon", | |
26 | ceph_mgr => "/usr/bin/ceph-mgr", | |
b82649cc TL |
27 | ceph_osd => "/usr/bin/ceph-osd", |
28 | ceph_mds => "/usr/bin/ceph-mds", | |
c64c04dd | 29 | }; |
a34866f0 DM |
30 | |
31 | my $config_hash = { | |
32 | ccname => $ccname, | |
33 | pve_ceph_cfgpath => $pve_ceph_cfgpath, | |
34 | pve_mon_key_path => $pve_mon_key_path, | |
35 | pve_ckeyring_path => $pve_ckeyring_path, | |
36 | ceph_bootstrap_osd_keyring => $ceph_bootstrap_osd_keyring, | |
37 | ceph_bootstrap_mds_keyring => $ceph_bootstrap_mds_keyring, | |
b82649cc | 38 | ceph_mds_data_dir => $ceph_mds_data_dir, |
7d4fc5ef | 39 | long_rados_timeout => 60, |
a34866f0 DM |
40 | }; |
41 | ||
c64c04dd AA |
42 | sub get_local_version { |
43 | my ($noerr) = @_; | |
44 | ||
45 | if (PVE::CephTools::check_ceph_installed('ceph_bin', $noerr)) { | |
46 | my $ceph_version; | |
47 | run_command([$ceph_service->{ceph_bin}, '--version'], | |
48 | noerr => $noerr, | |
49 | outfunc => sub { $ceph_version = shift; }); | |
50 | if ($ceph_version && $ceph_version =~ /^ceph.*\s((\d+)\.(\d+)\.(\d+))/) { | |
51 | # return (version, major, minor, patch) : major; | |
52 | return wantarray ? ($1, $2, $3, $4) : $2; | |
53 | } | |
54 | } | |
55 | ||
56 | return undef; | |
57 | } | |
58 | ||
a34866f0 DM |
59 | sub get_config { |
60 | my $key = shift; | |
61 | ||
62 | my $value = $config_hash->{$key}; | |
63 | ||
6f8bf83d | 64 | die "no such ceph config '$key'" if !$value; |
a34866f0 DM |
65 | |
66 | return $value; | |
67 | } | |
68 | ||
a34866f0 DM |
69 | sub purge_all_ceph_files { |
70 | # fixme: this is very dangerous - should we really support this function? | |
71 | ||
72 | unlink $ceph_cfgpath; | |
73 | ||
74 | unlink $pve_ceph_cfgpath; | |
75 | unlink $pve_ckeyring_path; | |
76 | unlink $pve_mon_key_path; | |
77 | ||
78 | unlink $ceph_bootstrap_osd_keyring; | |
79 | unlink $ceph_bootstrap_mds_keyring; | |
80 | ||
81 | system("rm -rf /var/lib/ceph/mon/ceph-*"); | |
82 | ||
83 | # remove osd? | |
84 | } | |
85 | ||
86 | sub check_ceph_installed { | |
c64c04dd AA |
87 | my ($service, $noerr) = @_; |
88 | ||
89 | $service = 'ceph_bin' if !defined($service); | |
a34866f0 | 90 | |
c64c04dd AA |
91 | if (! -x $ceph_service->{$service}) { |
92 | die "binary not installed: $ceph_service->{$service}\n" if !$noerr; | |
a34866f0 DM |
93 | return undef; |
94 | } | |
95 | ||
96 | return 1; | |
97 | } | |
98 | ||
99 | sub check_ceph_inited { | |
100 | my ($noerr) = @_; | |
101 | ||
c64c04dd | 102 | return undef if !check_ceph_installed('ceph_bin', $noerr); |
6f8bf83d | 103 | |
a34866f0 DM |
104 | if (! -f $pve_ceph_cfgpath) { |
105 | die "pveceph configuration not initialized\n" if !$noerr; | |
106 | return undef; | |
107 | } | |
108 | ||
109 | return 1; | |
110 | } | |
111 | ||
112 | sub check_ceph_enabled { | |
113 | my ($noerr) = @_; | |
114 | ||
115 | return undef if !check_ceph_inited($noerr); | |
116 | ||
117 | if (! -f $ceph_cfgpath) { | |
118 | die "pveceph configuration not enabled\n" if !$noerr; | |
119 | return undef; | |
120 | } | |
121 | ||
122 | return 1; | |
123 | } | |
124 | ||
125 | sub parse_ceph_config { | |
126 | my ($filename) = @_; | |
127 | ||
128 | $filename = $pve_ceph_cfgpath if !$filename; | |
129 | ||
130 | my $cfg = {}; | |
131 | ||
132 | return $cfg if ! -f $filename; | |
133 | ||
134 | my $fh = IO::File->new($filename, "r") || | |
135 | die "unable to open '$filename' - $!\n"; | |
136 | ||
137 | my $section; | |
138 | ||
139 | while (defined(my $line = <$fh>)) { | |
140 | $line =~ s/[;#].*$//; | |
141 | $line =~ s/^\s+//; | |
142 | $line =~ s/\s+$//; | |
143 | next if !$line; | |
144 | ||
145 | $section = $1 if $line =~ m/^\[(\S+)\]$/; | |
146 | if (!$section) { | |
147 | warn "no section - skip: $line\n"; | |
148 | next; | |
149 | } | |
150 | ||
15a5cdd1 | 151 | if ($line =~ m/^(.*?\S)\s*=\s*(\S.*)$/) { |
a34866f0 DM |
152 | $cfg->{$section}->{$1} = $2; |
153 | } | |
154 | ||
155 | } | |
156 | ||
157 | return $cfg; | |
158 | } | |
159 | ||
160 | sub write_ceph_config { | |
161 | my ($cfg) = @_; | |
162 | ||
163 | my $out = ''; | |
164 | ||
165 | my $cond_write_sec = sub { | |
166 | my $re = shift; | |
167 | ||
168 | foreach my $section (keys %$cfg) { | |
169 | next if $section !~ m/^$re$/; | |
170 | $out .= "[$section]\n"; | |
171 | foreach my $key (sort keys %{$cfg->{$section}}) { | |
172 | $out .= "\t $key = $cfg->{$section}->{$key}\n"; | |
173 | } | |
174 | $out .= "\n"; | |
175 | } | |
176 | }; | |
177 | ||
178 | &$cond_write_sec('global'); | |
19924e77 | 179 | &$cond_write_sec('client'); |
0fe9bdd5 DM |
180 | &$cond_write_sec('mds'); |
181 | &$cond_write_sec('mds\..*'); | |
a34866f0 DM |
182 | &$cond_write_sec('mon'); |
183 | &$cond_write_sec('osd'); | |
184 | &$cond_write_sec('mon\..*'); | |
185 | &$cond_write_sec('osd\..*'); | |
186 | ||
187 | PVE::Tools::file_set_contents($pve_ceph_cfgpath, $out); | |
188 | } | |
189 | ||
f96d7012 TL |
190 | sub create_pool { |
191 | my ($pool, $param, $rados) = @_; | |
192 | ||
193 | if (!defined($rados)) { | |
194 | $rados = PVE::RADOS->new(); | |
195 | } | |
196 | ||
197 | my $pg_num = $param->{pg_num} || 64; | |
198 | my $size = $param->{size} || 3; | |
199 | my $min_size = $param->{min_size} || 2; | |
200 | my $application = $param->{application} // 'rbd'; | |
201 | ||
202 | $rados->mon_command({ | |
203 | prefix => "osd pool create", | |
204 | pool => $pool, | |
205 | pg_num => int($pg_num), | |
206 | format => 'plain', | |
207 | }); | |
208 | ||
209 | $rados->mon_command({ | |
210 | prefix => "osd pool set", | |
211 | pool => $pool, | |
212 | var => 'min_size', | |
213 | val => $min_size, | |
214 | format => 'plain', | |
215 | }); | |
216 | ||
217 | $rados->mon_command({ | |
218 | prefix => "osd pool set", | |
219 | pool => $pool, | |
220 | var => 'size', | |
221 | val => $size, | |
222 | format => 'plain', | |
223 | }); | |
224 | ||
225 | if (defined($param->{crush_rule})) { | |
226 | $rados->mon_command({ | |
227 | prefix => "osd pool set", | |
228 | pool => $pool, | |
229 | var => 'crush_rule', | |
230 | val => $param->{crush_rule}, | |
231 | format => 'plain', | |
232 | }); | |
233 | } | |
234 | ||
235 | $rados->mon_command({ | |
236 | prefix => "osd pool application enable", | |
237 | pool => $pool, | |
238 | app => $application, | |
239 | }); | |
240 | ||
241 | } | |
242 | ||
7e1a9d25 TL |
243 | sub ls_pools { |
244 | my ($pool, $rados) = @_; | |
245 | ||
246 | if (!defined($rados)) { | |
247 | $rados = PVE::RADOS->new(); | |
248 | } | |
249 | ||
250 | my $res = $rados->mon_command({ prefix => "osd lspools" }); | |
251 | ||
252 | return $res; | |
253 | } | |
254 | ||
f96d7012 TL |
255 | sub destroy_pool { |
256 | my ($pool, $rados) = @_; | |
257 | ||
258 | if (!defined($rados)) { | |
259 | $rados = PVE::RADOS->new(); | |
260 | } | |
261 | ||
262 | # fixme: '--yes-i-really-really-mean-it' | |
263 | $rados->mon_command({ | |
264 | prefix => "osd pool delete", | |
265 | pool => $pool, | |
266 | pool2 => $pool, | |
267 | sure => '--yes-i-really-really-mean-it', | |
268 | format => 'plain', | |
269 | }); | |
270 | } | |
271 | ||
a34866f0 DM |
272 | sub setup_pve_symlinks { |
273 | # fail if we find a real file instead of a link | |
274 | if (-f $ceph_cfgpath) { | |
275 | my $lnk = readlink($ceph_cfgpath); | |
276 | die "file '$ceph_cfgpath' already exists\n" | |
277 | if !$lnk || $lnk ne $pve_ceph_cfgpath; | |
278 | } else { | |
279 | symlink($pve_ceph_cfgpath, $ceph_cfgpath) || | |
280 | die "unable to create symlink '$ceph_cfgpath' - $!\n"; | |
281 | } | |
282 | } | |
283 | ||
284 | sub ceph_service_cmd { | |
2bfacbcf AD |
285 | my ($action, $service) = @_; |
286 | ||
1aecf972 | 287 | if (systemd_managed()) { |
2bfacbcf | 288 | |
c05ff7b4 | 289 | if ($service && $service =~ m/^(mon|osd|mds|mgr|radosgw)(\.([A-Za-z0-9\-]{1,32}))?$/) { |
2bfacbcf AD |
290 | $service = defined($3) ? "ceph-$1\@$3" : "ceph-$1.target"; |
291 | } else { | |
292 | $service = "ceph.target"; | |
293 | } | |
294 | ||
295 | PVE::Tools::run_command(['/bin/systemctl', $action, $service]); | |
296 | ||
297 | } else { | |
298 | # ceph daemons does not call 'setsid', so we do that ourself | |
6f8bf83d | 299 | # (fork_worker send KILL to whole process group) |
2bfacbcf AD |
300 | PVE::Tools::run_command(['setsid', 'service', 'ceph', '-c', $pve_ceph_cfgpath, $action, $service]); |
301 | } | |
a34866f0 DM |
302 | } |
303 | ||
1aecf972 WL |
304 | # Ceph versions greater Hammer use 'ceph' as user and group instead |
305 | # of 'root', and use systemd. | |
306 | sub systemd_managed { | |
307 | ||
308 | if (-f "/lib/systemd/system/ceph-osd\@.service") { | |
309 | return 1; | |
310 | } else { | |
311 | return 0; | |
312 | } | |
313 | } | |
314 | ||
b82649cc TL |
315 | sub list_local_mds_ids { |
316 | my $mds_list = []; | |
317 | ||
318 | PVE::Tools::dir_glob_foreach($ceph_mds_data_dir, qr/$ccname-(\S+)/, sub { | |
319 | my (undef, $mds_id) = @_; | |
320 | push @$mds_list, $mds_id; | |
321 | }); | |
322 | ||
323 | return $mds_list; | |
324 | } | |
325 | ||
326 | sub get_cluster_mds_state { | |
327 | my ($rados) = @_; | |
328 | ||
329 | my $mds_state = {}; | |
330 | ||
331 | if (!defined($rados)) { | |
332 | $rados = PVE::RADOS->new(); | |
333 | } | |
334 | ||
335 | my $add_state = sub { | |
336 | my ($mds) = @_; | |
337 | ||
338 | my $state = {}; | |
339 | $state->{addr} = $mds->{addr}; | |
340 | $state->{rank} = $mds->{rank}; | |
341 | $state->{standby_replay} = $mds->{standby_replay} ? 1 : 0; | |
342 | $state->{state} = $mds->{state}; | |
343 | ||
344 | $mds_state->{$mds->{name}} = $state; | |
345 | }; | |
346 | ||
347 | my $mds_dump = $rados->mon_command({ prefix => 'mds stat' }); | |
348 | my $fsmap = $mds_dump->{fsmap}; | |
349 | ||
350 | ||
351 | foreach my $mds (@{$fsmap->{standbys}}) { | |
352 | $add_state->($mds); | |
353 | } | |
354 | ||
355 | my $fs_info = $fsmap->{filesystems}->[0]; | |
356 | my $active_mds = $fs_info->{mdsmap}->{info}; | |
357 | ||
358 | # normally there's only one active MDS, but we can have multiple active for | |
359 | # different ranks (e.g., different cephs path hierarchy). So just add all. | |
360 | foreach my $mds (values %$active_mds) { | |
361 | $add_state->($mds); | |
362 | } | |
363 | ||
364 | return $mds_state; | |
365 | } | |
366 | ||
a62d7bd9 TL |
367 | sub is_any_mds_active { |
368 | my ($rados) = @_; | |
369 | ||
370 | if (!defined($rados)) { | |
371 | $rados = PVE::RADOS->new(); | |
372 | } | |
373 | ||
374 | my $mds_dump = $rados->mon_command({ prefix => 'mds stat' }); | |
375 | my $fs = $mds_dump->{fsmap}->{filesystems}; | |
376 | ||
377 | if (!($fs && scalar(@$fs) > 0)) { | |
378 | return undef; | |
379 | } | |
380 | my $active_mds = $fs->[0]->{mdsmap}->{info}; | |
381 | ||
195ae681 TL |
382 | for my $mds (values %$active_mds) { |
383 | return 1 if $mds->{state} eq 'up:active'; | |
384 | } | |
385 | ||
386 | return 0; | |
a62d7bd9 TL |
387 | } |
388 | ||
b82649cc TL |
389 | sub create_mds { |
390 | my ($id, $rados) = @_; | |
391 | ||
392 | # `ceph fs status` fails with numeric only ID. | |
393 | die "ID: $id, numeric only IDs are not supported\n" | |
394 | if $id =~ /^\d+$/; | |
395 | ||
396 | if (!defined($rados)) { | |
397 | $rados = PVE::RADOS->new(); | |
398 | } | |
399 | ||
400 | my $service_dir = "/var/lib/ceph/mds/$ccname-$id"; | |
401 | my $service_keyring = "$service_dir/keyring"; | |
402 | my $service_name = "mds.$id"; | |
403 | ||
404 | die "ceph MDS directory '$service_dir' already exists\n" | |
405 | if -d $service_dir; | |
406 | ||
407 | print "creating MDS directory '$service_dir'\n"; | |
408 | eval { File::Path::mkpath($service_dir) }; | |
409 | my $err = $@; | |
410 | die "creation MDS directory '$service_dir' failed\n" if $err; | |
411 | ||
412 | # http://docs.ceph.com/docs/luminous/install/manual-deployment/#adding-mds | |
413 | my $priv = [ | |
414 | mon => 'allow profile mds', | |
415 | osd => 'allow rwx', | |
416 | mds => 'allow *', | |
417 | ]; | |
418 | ||
419 | print "creating keys for '$service_name'\n"; | |
420 | my $output = $rados->mon_command({ | |
421 | prefix => 'auth get-or-create', | |
422 | entity => $service_name, | |
423 | caps => $priv, | |
424 | format => 'plain', | |
425 | }); | |
426 | ||
427 | PVE::Tools::file_set_contents($service_keyring, $output); | |
428 | ||
429 | print "setting ceph as owner for service directory\n"; | |
430 | run_command(["chown", 'ceph:ceph', '-R', $service_dir]); | |
431 | ||
432 | print "enabling service 'ceph-mds\@$id.service'\n"; | |
433 | ceph_service_cmd('enable', $service_name); | |
434 | print "starting service 'ceph-mds\@$id.service'\n"; | |
435 | ceph_service_cmd('start', $service_name); | |
436 | ||
437 | return undef; | |
438 | }; | |
439 | ||
440 | sub destroy_mds { | |
441 | my ($id, $rados) = @_; | |
442 | ||
443 | if (!defined($rados)) { | |
444 | $rados = PVE::RADOS->new(); | |
445 | } | |
446 | ||
447 | my $service_name = "mds.$id"; | |
448 | my $service_dir = "/var/lib/ceph/mds/$ccname-$id"; | |
449 | ||
450 | print "disabling service 'ceph-mds\@$id.service'\n"; | |
451 | ceph_service_cmd('disable', $service_name); | |
452 | print "stopping service 'ceph-mds\@$id.service'\n"; | |
453 | ceph_service_cmd('stop', $service_name); | |
454 | ||
455 | if (-d $service_dir) { | |
456 | print "removing ceph-mds directory '$service_dir'\n"; | |
457 | File::Path::remove_tree($service_dir); | |
458 | } else { | |
459 | warn "cannot cleanup MDS $id directory, '$service_dir' not found\n" | |
460 | } | |
461 | ||
462 | print "removing ceph auth for '$service_name'\n"; | |
463 | $rados->mon_command({ | |
464 | prefix => 'auth del', | |
465 | entity => $service_name, | |
466 | format => 'plain' | |
467 | }); | |
468 | ||
469 | return undef; | |
470 | }; | |
471 | ||
456a7f4d AA |
472 | # wipe the first 200 MB to clear off leftovers from previous use, otherwise a |
473 | # create OSD fails. | |
1343ae6d | 474 | sub wipe_disks { |
456a7f4d AA |
475 | my (@devs) = @_; |
476 | ||
477 | my @wipe_cmd = qw(/bin/dd if=/dev/zero bs=1M count=200 conv=fdatasync); | |
a1a7aa74 | 478 | foreach my $devpath (@devs) { |
456a7f4d AA |
479 | print "wipe disk: $devpath\n"; |
480 | eval { run_command([@wipe_cmd, "of=${devpath}"]) }; | |
481 | warn $@ if $@; | |
482 | } | |
483 | }; | |
484 | ||
a34866f0 | 485 | 1; |