]> git.proxmox.com Git - pve-manager.git/blob - PVE/Ceph/Tools.pm
ceph: move CephTools into Ceph/Tools.pm
[pve-manager.git] / PVE / Ceph / Tools.pm
1 package PVE::Ceph::Tools;
2
3 use strict;
4 use warnings;
5
6 use File::Path;
7 use IO::File;
8
9 use PVE::Tools qw(run_command dir_glob_foreach);
10 use PVE::RADOS;
11
12 my $ccname = 'ceph'; # ceph cluster name
13 my $ceph_cfgdir = "/etc/ceph";
14 my $pve_ceph_cfgpath = "/etc/pve/$ccname.conf";
15 my $ceph_cfgpath = "$ceph_cfgdir/$ccname.conf";
16
17 my $pve_mon_key_path = "/etc/pve/priv/$ccname.mon.keyring";
18 my $pve_ckeyring_path = "/etc/pve/priv/$ccname.client.admin.keyring";
19 my $ceph_bootstrap_osd_keyring = "/var/lib/ceph/bootstrap-osd/$ccname.keyring";
20 my $ceph_bootstrap_mds_keyring = "/var/lib/ceph/bootstrap-mds/$ccname.keyring";
21 my $ceph_mds_data_dir = '/var/lib/ceph/mds';
22
23 my $ceph_service = {
24 ceph_bin => "/usr/bin/ceph",
25 ceph_mon => "/usr/bin/ceph-mon",
26 ceph_mgr => "/usr/bin/ceph-mgr",
27 ceph_osd => "/usr/bin/ceph-osd",
28 ceph_mds => "/usr/bin/ceph-mds",
29 };
30
31 my $config_hash = {
32 ccname => $ccname,
33 pve_ceph_cfgpath => $pve_ceph_cfgpath,
34 pve_mon_key_path => $pve_mon_key_path,
35 pve_ckeyring_path => $pve_ckeyring_path,
36 ceph_bootstrap_osd_keyring => $ceph_bootstrap_osd_keyring,
37 ceph_bootstrap_mds_keyring => $ceph_bootstrap_mds_keyring,
38 ceph_mds_data_dir => $ceph_mds_data_dir,
39 long_rados_timeout => 60,
40 };
41
42 sub get_local_version {
43 my ($noerr) = @_;
44
45 if (check_ceph_installed('ceph_bin', $noerr)) {
46 my $ceph_version;
47 run_command([$ceph_service->{ceph_bin}, '--version'],
48 noerr => $noerr,
49 outfunc => sub { $ceph_version = shift; });
50 if ($ceph_version && $ceph_version =~ /^ceph.*\s((\d+)\.(\d+)\.(\d+))/) {
51 # return (version, major, minor, patch) : major;
52 return wantarray ? ($1, $2, $3, $4) : $2;
53 }
54 }
55
56 return undef;
57 }
58
59 sub get_config {
60 my $key = shift;
61
62 my $value = $config_hash->{$key};
63
64 die "no such ceph config '$key'" if !$value;
65
66 return $value;
67 }
68
69 sub purge_all_ceph_files {
70 # fixme: this is very dangerous - should we really support this function?
71
72 unlink $ceph_cfgpath;
73
74 unlink $pve_ceph_cfgpath;
75 unlink $pve_ckeyring_path;
76 unlink $pve_mon_key_path;
77
78 unlink $ceph_bootstrap_osd_keyring;
79 unlink $ceph_bootstrap_mds_keyring;
80
81 system("rm -rf /var/lib/ceph/mon/ceph-*");
82
83 # remove osd?
84 }
85
86 sub check_ceph_installed {
87 my ($service, $noerr) = @_;
88
89 $service = 'ceph_bin' if !defined($service);
90
91 if (! -x $ceph_service->{$service}) {
92 die "binary not installed: $ceph_service->{$service}\n" if !$noerr;
93 return undef;
94 }
95
96 return 1;
97 }
98
99 sub check_ceph_inited {
100 my ($noerr) = @_;
101
102 return undef if !check_ceph_installed('ceph_bin', $noerr);
103
104 if (! -f $pve_ceph_cfgpath) {
105 die "pveceph configuration not initialized\n" if !$noerr;
106 return undef;
107 }
108
109 return 1;
110 }
111
112 sub check_ceph_enabled {
113 my ($noerr) = @_;
114
115 return undef if !check_ceph_inited($noerr);
116
117 if (! -f $ceph_cfgpath) {
118 die "pveceph configuration not enabled\n" if !$noerr;
119 return undef;
120 }
121
122 return 1;
123 }
124
125 sub create_pool {
126 my ($pool, $param, $rados) = @_;
127
128 if (!defined($rados)) {
129 $rados = PVE::RADOS->new();
130 }
131
132 my $pg_num = $param->{pg_num} || 128;
133 my $size = $param->{size} || 3;
134 my $min_size = $param->{min_size} || 2;
135 my $application = $param->{application} // 'rbd';
136
137 $rados->mon_command({
138 prefix => "osd pool create",
139 pool => $pool,
140 pg_num => int($pg_num),
141 format => 'plain',
142 });
143
144 $rados->mon_command({
145 prefix => "osd pool set",
146 pool => $pool,
147 var => 'min_size',
148 val => $min_size,
149 format => 'plain',
150 });
151
152 $rados->mon_command({
153 prefix => "osd pool set",
154 pool => $pool,
155 var => 'size',
156 val => $size,
157 format => 'plain',
158 });
159
160 if (defined($param->{crush_rule})) {
161 $rados->mon_command({
162 prefix => "osd pool set",
163 pool => $pool,
164 var => 'crush_rule',
165 val => $param->{crush_rule},
166 format => 'plain',
167 });
168 }
169
170 $rados->mon_command({
171 prefix => "osd pool application enable",
172 pool => $pool,
173 app => $application,
174 });
175
176 }
177
178 sub ls_pools {
179 my ($pool, $rados) = @_;
180
181 if (!defined($rados)) {
182 $rados = PVE::RADOS->new();
183 }
184
185 my $res = $rados->mon_command({ prefix => "osd lspools" });
186
187 return $res;
188 }
189
190 sub destroy_pool {
191 my ($pool, $rados) = @_;
192
193 if (!defined($rados)) {
194 $rados = PVE::RADOS->new();
195 }
196
197 # fixme: '--yes-i-really-really-mean-it'
198 $rados->mon_command({
199 prefix => "osd pool delete",
200 pool => $pool,
201 pool2 => $pool,
202 sure => '--yes-i-really-really-mean-it',
203 format => 'plain',
204 });
205 }
206
207 sub setup_pve_symlinks {
208 # fail if we find a real file instead of a link
209 if (-f $ceph_cfgpath) {
210 my $lnk = readlink($ceph_cfgpath);
211 die "file '$ceph_cfgpath' already exists\n"
212 if !$lnk || $lnk ne $pve_ceph_cfgpath;
213 } else {
214 symlink($pve_ceph_cfgpath, $ceph_cfgpath) ||
215 die "unable to create symlink '$ceph_cfgpath' - $!\n";
216 }
217 }
218
219 sub ceph_service_cmd {
220 my ($action, $service) = @_;
221
222 if (systemd_managed()) {
223
224 if ($service && $service =~ m/^(mon|osd|mds|mgr|radosgw)(\.([A-Za-z0-9\-]{1,32}))?$/) {
225 $service = defined($3) ? "ceph-$1\@$3" : "ceph-$1.target";
226 } else {
227 $service = "ceph.target";
228 }
229
230 PVE::Tools::run_command(['/bin/systemctl', $action, $service]);
231
232 } else {
233 # ceph daemons does not call 'setsid', so we do that ourself
234 # (fork_worker send KILL to whole process group)
235 PVE::Tools::run_command(['setsid', 'service', 'ceph', '-c', $pve_ceph_cfgpath, $action, $service]);
236 }
237 }
238
239 # Ceph versions greater Hammer use 'ceph' as user and group instead
240 # of 'root', and use systemd.
241 sub systemd_managed {
242
243 if (-f "/lib/systemd/system/ceph-osd\@.service") {
244 return 1;
245 } else {
246 return 0;
247 }
248 }
249
250 sub list_local_mds_ids {
251 my $mds_list = [];
252
253 PVE::Tools::dir_glob_foreach($ceph_mds_data_dir, qr/$ccname-(\S+)/, sub {
254 my (undef, $mds_id) = @_;
255 push @$mds_list, $mds_id;
256 });
257
258 return $mds_list;
259 }
260
261 sub get_cluster_mds_state {
262 my ($rados) = @_;
263
264 my $mds_state = {};
265
266 if (!defined($rados)) {
267 $rados = PVE::RADOS->new();
268 }
269
270 my $add_state = sub {
271 my ($mds) = @_;
272
273 my $state = {};
274 $state->{addr} = $mds->{addr};
275 $state->{rank} = $mds->{rank};
276 $state->{standby_replay} = $mds->{standby_replay} ? 1 : 0;
277 $state->{state} = $mds->{state};
278
279 $mds_state->{$mds->{name}} = $state;
280 };
281
282 my $mds_dump = $rados->mon_command({ prefix => 'mds stat' });
283 my $fsmap = $mds_dump->{fsmap};
284
285
286 foreach my $mds (@{$fsmap->{standbys}}) {
287 $add_state->($mds);
288 }
289
290 my $fs_info = $fsmap->{filesystems}->[0];
291 my $active_mds = $fs_info->{mdsmap}->{info};
292
293 # normally there's only one active MDS, but we can have multiple active for
294 # different ranks (e.g., different cephs path hierarchy). So just add all.
295 foreach my $mds (values %$active_mds) {
296 $add_state->($mds);
297 }
298
299 return $mds_state;
300 }
301
302 sub is_any_mds_active {
303 my ($rados) = @_;
304
305 if (!defined($rados)) {
306 $rados = PVE::RADOS->new();
307 }
308
309 my $mds_dump = $rados->mon_command({ prefix => 'mds stat' });
310 my $fs = $mds_dump->{fsmap}->{filesystems};
311
312 if (!($fs && scalar(@$fs) > 0)) {
313 return undef;
314 }
315 my $active_mds = $fs->[0]->{mdsmap}->{info};
316
317 for my $mds (values %$active_mds) {
318 return 1 if $mds->{state} eq 'up:active';
319 }
320
321 return 0;
322 }
323
324 sub create_mds {
325 my ($id, $rados) = @_;
326
327 # `ceph fs status` fails with numeric only ID.
328 die "ID: $id, numeric only IDs are not supported\n"
329 if $id =~ /^\d+$/;
330
331 if (!defined($rados)) {
332 $rados = PVE::RADOS->new();
333 }
334
335 my $service_dir = "/var/lib/ceph/mds/$ccname-$id";
336 my $service_keyring = "$service_dir/keyring";
337 my $service_name = "mds.$id";
338
339 die "ceph MDS directory '$service_dir' already exists\n"
340 if -d $service_dir;
341
342 print "creating MDS directory '$service_dir'\n";
343 eval { File::Path::mkpath($service_dir) };
344 my $err = $@;
345 die "creation MDS directory '$service_dir' failed\n" if $err;
346
347 # http://docs.ceph.com/docs/luminous/install/manual-deployment/#adding-mds
348 my $priv = [
349 mon => 'allow profile mds',
350 osd => 'allow rwx',
351 mds => 'allow *',
352 ];
353
354 print "creating keys for '$service_name'\n";
355 my $output = $rados->mon_command({
356 prefix => 'auth get-or-create',
357 entity => $service_name,
358 caps => $priv,
359 format => 'plain',
360 });
361
362 PVE::Tools::file_set_contents($service_keyring, $output);
363
364 print "setting ceph as owner for service directory\n";
365 run_command(["chown", 'ceph:ceph', '-R', $service_dir]);
366
367 print "enabling service 'ceph-mds\@$id.service'\n";
368 ceph_service_cmd('enable', $service_name);
369 print "starting service 'ceph-mds\@$id.service'\n";
370 ceph_service_cmd('start', $service_name);
371
372 return undef;
373 };
374
375 sub destroy_mds {
376 my ($id, $rados) = @_;
377
378 if (!defined($rados)) {
379 $rados = PVE::RADOS->new();
380 }
381
382 my $service_name = "mds.$id";
383 my $service_dir = "/var/lib/ceph/mds/$ccname-$id";
384
385 print "disabling service 'ceph-mds\@$id.service'\n";
386 ceph_service_cmd('disable', $service_name);
387 print "stopping service 'ceph-mds\@$id.service'\n";
388 ceph_service_cmd('stop', $service_name);
389
390 if (-d $service_dir) {
391 print "removing ceph-mds directory '$service_dir'\n";
392 File::Path::remove_tree($service_dir);
393 } else {
394 warn "cannot cleanup MDS $id directory, '$service_dir' not found\n"
395 }
396
397 print "removing ceph auth for '$service_name'\n";
398 $rados->mon_command({
399 prefix => 'auth del',
400 entity => $service_name,
401 format => 'plain'
402 });
403
404 return undef;
405 };
406
407 # wipe the first 200 MB to clear off leftovers from previous use, otherwise a
408 # create OSD fails.
409 sub wipe_disks {
410 my (@devs) = @_;
411
412 my @wipe_cmd = qw(/bin/dd if=/dev/zero bs=1M count=200 conv=fdatasync);
413 foreach my $devpath (@devs) {
414 print "wipe disk: $devpath\n";
415 eval { run_command([@wipe_cmd, "of=${devpath}"]) };
416 warn $@ if $@;
417 }
418 };
419
420 1;