]> git.proxmox.com Git - ceph.git/blob - ceph/src/ceph-disk/ceph_disk/main.py
414a63301d3b2ba96ea7b1cc9ad41d75d028537c
[ceph.git] / ceph / src / ceph-disk / ceph_disk / main.py
1 #!/usr/bin/env python
2 #
3 # Copyright (C) 2015, 2016 Red Hat <contact@redhat.com>
4 # Copyright (C) 2014 Inktank <info@inktank.com>
5 # Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
6 # Copyright (C) 2014 Catalyst.net Ltd
7 #
8 # Author: Loic Dachary <loic@dachary.org>
9 #
10 # This program is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU Library Public License as published by
12 # the Free Software Foundation; either version 2, or (at your option)
13 # any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU Library Public License for more details.
19 #
20
21 from __future__ import print_function
22
23 import argparse
24 import base64
25 import errno
26 import fcntl
27 import json
28 import logging
29 import os
30 import platform
31 import re
32 import subprocess
33 import stat
34 import sys
35 import tempfile
36 import uuid
37 import time
38 import shlex
39 import pwd
40 import grp
41 import textwrap
42 import glob
43
44 CEPH_OSD_ONDISK_MAGIC = 'ceph osd volume v026'
45 CEPH_LOCKBOX_ONDISK_MAGIC = 'ceph lockbox volume v001'
46
47 KEY_MANAGEMENT_MODE_V1 = 'ceph-mon v1'
48
49 PTYPE = {
50 'regular': {
51 'journal': {
52 # identical because creating a journal is atomic
53 'ready': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
54 'tobe': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
55 },
56 'block': {
57 # identical because creating a block is atomic
58 'ready': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
59 'tobe': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
60 },
61 'block.db': {
62 # identical because creating a block is atomic
63 'ready': '30cd0809-c2b2-499c-8879-2d6b78529876',
64 'tobe': '30cd0809-c2b2-499c-8879-2d6b785292be',
65 },
66 'block.wal': {
67 # identical because creating a block is atomic
68 'ready': '5ce17fce-4087-4169-b7ff-056cc58473f9',
69 'tobe': '5ce17fce-4087-4169-b7ff-056cc58472be',
70 },
71 'osd': {
72 'ready': '4fbd7e29-9d25-41b8-afd0-062c0ceff05d',
73 'tobe': '89c57f98-2fe5-4dc0-89c1-f3ad0ceff2be',
74 },
75 'lockbox': {
76 'ready': 'fb3aabf9-d25f-47cc-bf5e-721d1816496b',
77 'tobe': 'fb3aabf9-d25f-47cc-bf5e-721d181642be',
78 },
79 },
80 'luks': {
81 'journal': {
82 'ready': '45b0969e-9b03-4f30-b4c6-35865ceff106',
83 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
84 },
85 'block': {
86 'ready': 'cafecafe-9b03-4f30-b4c6-35865ceff106',
87 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
88 },
89 'block.db': {
90 'ready': '166418da-c469-4022-adf4-b30afd37f176',
91 'tobe': '7521c784-4626-4260-bc8d-ba77a0f5f2be',
92 },
93 'block.wal': {
94 'ready': '86a32090-3647-40b9-bbbd-38d8c573aa86',
95 'tobe': '92dad30f-175b-4d40-a5b0-5c0a258b42be',
96 },
97 'osd': {
98 'ready': '4fbd7e29-9d25-41b8-afd0-35865ceff05d',
99 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
100 },
101 },
102 'plain': {
103 'journal': {
104 'ready': '45b0969e-9b03-4f30-b4c6-5ec00ceff106',
105 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
106 },
107 'block': {
108 'ready': 'cafecafe-9b03-4f30-b4c6-5ec00ceff106',
109 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
110 },
111 'block.db': {
112 'ready': '93b0052d-02d9-4d8a-a43b-33a3ee4dfbc3',
113 'tobe': '69d17c68-3e58-4399-aff0-b68265f2e2be',
114 },
115 'block.wal': {
116 'ready': '306e8683-4fe2-4330-b7c0-00a917c16966',
117 'tobe': 'f2d89683-a621-4063-964a-eb1f7863a2be',
118 },
119 'osd': {
120 'ready': '4fbd7e29-9d25-41b8-afd0-5ec00ceff05d',
121 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
122 },
123 },
124 'mpath': {
125 'journal': {
126 'ready': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
127 'tobe': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
128 },
129 'block': {
130 'ready': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
131 'tobe': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
132 },
133 'block.db': {
134 'ready': 'ec6d6385-e346-45dc-be91-da2a7c8b3261',
135 'tobe': 'ec6d6385-e346-45dc-be91-da2a7c8b32be',
136 },
137 'block.wal': {
138 'ready': '01b41e1b-002a-453c-9f17-88793989ff8f',
139 'tobe': '01b41e1b-002a-453c-9f17-88793989f2be',
140 },
141 'osd': {
142 'ready': '4fbd7e29-8ae0-4982-bf9d-5a8d867af560',
143 'tobe': '89c57f98-8ae0-4982-bf9d-5a8d867af560',
144 },
145 'lockbox': {
146 'ready': '7f4a666a-16f3-47a2-8445-152ef4d03f6c',
147 'tobe': '7f4a666a-16f3-47a2-8445-152ef4d032be',
148 },
149 },
150 }
151
152
153 class Ptype(object):
154
155 @staticmethod
156 def get_ready_by_type(what):
157 return [x['ready'] for x in PTYPE[what].values()]
158
159 @staticmethod
160 def get_ready_by_name(name):
161 return [x[name]['ready'] for x in PTYPE.values() if name in x]
162
163 @staticmethod
164 def is_regular_space(ptype):
165 return Ptype.is_what_space('regular', ptype)
166
167 @staticmethod
168 def is_mpath_space(ptype):
169 return Ptype.is_what_space('mpath', ptype)
170
171 @staticmethod
172 def is_plain_space(ptype):
173 return Ptype.is_what_space('plain', ptype)
174
175 @staticmethod
176 def is_luks_space(ptype):
177 return Ptype.is_what_space('luks', ptype)
178
179 @staticmethod
180 def is_what_space(what, ptype):
181 for name in Space.NAMES:
182 if ptype == PTYPE[what][name]['ready']:
183 return True
184 return False
185
186 @staticmethod
187 def space_ptype_to_name(ptype):
188 for what in PTYPE.values():
189 for name in Space.NAMES:
190 if ptype == what[name]['ready']:
191 return name
192 raise ValueError('ptype ' + ptype + ' not found')
193
194 @staticmethod
195 def is_dmcrypt_space(ptype):
196 for name in Space.NAMES:
197 if Ptype.is_dmcrypt(ptype, name):
198 return True
199 return False
200
201 @staticmethod
202 def is_dmcrypt(ptype, name):
203 for what in ('plain', 'luks'):
204 if ptype == PTYPE[what][name]['ready']:
205 return True
206 return False
207
208
209 SYSFS = '/sys'
210
211 if platform.system() == 'FreeBSD':
212 FREEBSD = True
213 DEFAULT_FS_TYPE = 'zfs'
214 PROCDIR = '/compat/linux/proc'
215 # FreeBSD does not have blockdevices any more
216 BLOCKDIR = '/dev'
217 else:
218 FREEBSD = False
219 DEFAULT_FS_TYPE = 'xfs'
220 PROCDIR = '/proc'
221 BLOCKDIR = '/sys/block'
222
223 """
224 OSD STATUS Definition
225 """
226 OSD_STATUS_OUT_DOWN = 0
227 OSD_STATUS_OUT_UP = 1
228 OSD_STATUS_IN_DOWN = 2
229 OSD_STATUS_IN_UP = 3
230
231 MOUNT_OPTIONS = dict(
232 btrfs='noatime,user_subvol_rm_allowed',
233 # user_xattr is default ever since linux 2.6.39 / 3.0, but we'll
234 # delay a moment before removing it fully because we did have some
235 # issues with ext4 before the xatts-in-leveldb work, and it seemed
236 # that user_xattr helped
237 ext4='noatime,user_xattr',
238 xfs='noatime,inode64',
239 )
240
241 MKFS_ARGS = dict(
242 btrfs=[
243 # btrfs requires -f, for the same reason as xfs (see comment below)
244 '-f',
245 '-m', 'single',
246 '-l', '32768',
247 '-n', '32768',
248 ],
249 xfs=[
250 # xfs insists on not overwriting previous fs; even if we wipe
251 # partition table, we often recreate it exactly the same way,
252 # so we'll see ghosts of filesystems past
253 '-f',
254 '-i', 'size=2048',
255 ],
256 zfs=[
257 '-o', 'atime=off'
258 ],
259 )
260
261 INIT_SYSTEMS = [
262 'upstart',
263 'sysvinit',
264 'systemd',
265 'openrc',
266 'bsdrc',
267 'auto',
268 'none',
269 ]
270
271 STATEDIR = '/var/lib/ceph'
272
273 SYSCONFDIR = '/etc/ceph'
274
275 prepare_lock = None
276 activate_lock = None
277 SUPPRESS_PREFIX = None
278
279 # only warn once about some things
280 warned_about = {}
281
282 # Nuke the TERM variable to avoid confusing any subprocesses we call.
283 # For example, libreadline will print weird control sequences for some
284 # TERM values.
285 if 'TERM' in os.environ:
286 del os.environ['TERM']
287
288 LOG_NAME = __name__
289 if LOG_NAME == '__main__':
290 LOG_NAME = os.path.basename(sys.argv[0])
291 LOG = logging.getLogger(LOG_NAME)
292
293 # Allow user-preferred values for subprocess user and group
294 CEPH_PREF_USER = None
295 CEPH_PREF_GROUP = None
296
297
298 class FileLock(object):
299 def __init__(self, fn):
300 self.fn = fn
301 self.fd = None
302
303 def __enter__(self):
304 assert not self.fd
305 self.fd = os.open(self.fn, os.O_WRONLY | os.O_CREAT)
306 fcntl.lockf(self.fd, fcntl.LOCK_EX)
307
308 def __exit__(self, exc_type, exc_val, exc_tb):
309 assert self.fd
310 fcntl.lockf(self.fd, fcntl.LOCK_UN)
311 os.close(self.fd)
312 self.fd = None
313
314
315 class Error(Exception):
316 """
317 Error
318 """
319
320 def __str__(self):
321 doc = _bytes2str(self.__doc__.strip())
322 try:
323 str_type = basestring
324 except NameError:
325 str_type = str
326 args = [a if isinstance(a, str_type) else str(a) for a in self.args]
327 return ': '.join([doc] + [_bytes2str(a) for a in args])
328
329
330 class MountError(Error):
331 """
332 Mounting filesystem failed
333 """
334
335
336 class UnmountError(Error):
337 """
338 Unmounting filesystem failed
339 """
340
341
342 class BadMagicError(Error):
343 """
344 Does not look like a Ceph OSD, or incompatible version
345 """
346
347
348 class TruncatedLineError(Error):
349 """
350 Line is truncated
351 """
352
353
354 class TooManyLinesError(Error):
355 """
356 Too many lines
357 """
358
359
360 class FilesystemTypeError(Error):
361 """
362 Cannot discover filesystem type
363 """
364
365
366 class CephDiskException(Exception):
367 """
368 A base exception for ceph-disk to provide custom (ad-hoc) messages that
369 will be caught and dealt with when main() is executed
370 """
371 pass
372
373
374 class ExecutableNotFound(CephDiskException):
375 """
376 Exception to report on executables not available in PATH
377 """
378 pass
379
380
381 def is_systemd():
382 """
383 Detect whether systemd is running
384 """
385 with open(PROCDIR + '/1/comm', 'r') as f:
386 return 'systemd' in f.read()
387
388
389 def is_upstart():
390 """
391 Detect whether upstart is running
392 """
393 (out, err, _) = command(['init', '--version'])
394 return 'upstart' in out
395
396
397 def maybe_mkdir(*a, **kw):
398 """
399 Creates a new directory if it doesn't exist, removes
400 existing symlink before creating the directory.
401 """
402 # remove any symlink, if it is there..
403 if os.path.exists(*a) and stat.S_ISLNK(os.lstat(*a).st_mode):
404 LOG.debug('Removing old symlink at %s', *a)
405 os.unlink(*a)
406 try:
407 os.mkdir(*a, **kw)
408 except OSError as e:
409 if e.errno == errno.EEXIST:
410 pass
411 else:
412 raise
413
414
415 def which(executable):
416 """find the location of an executable"""
417 envpath = os.environ.get('PATH') or os.defpath
418 PATH = envpath.split(os.pathsep)
419
420 locations = PATH + [
421 '/usr/local/bin',
422 '/bin',
423 '/usr/bin',
424 '/usr/local/sbin',
425 '/usr/sbin',
426 '/sbin',
427 ]
428
429 for location in locations:
430 executable_path = os.path.join(location, executable)
431 if (os.path.isfile(executable_path) and
432 os.access(executable_path, os.X_OK)):
433 return executable_path
434
435
436 def _get_command_executable(arguments):
437 """
438 Return the full path for an executable, raise if the executable is not
439 found. If the executable has already a full path do not perform any checks.
440 """
441 if os.path.isabs(arguments[0]): # an absolute path
442 return arguments
443 executable = which(arguments[0])
444 if not executable:
445 command_msg = 'Could not run command: %s' % ' '.join(arguments)
446 executable_msg = '%s not in path.' % arguments[0]
447 raise ExecutableNotFound('%s %s' % (executable_msg, command_msg))
448
449 # swap the old executable for the new one
450 arguments[0] = executable
451 return arguments
452
453
454 def command(arguments, **kwargs):
455 """
456 Safely execute a ``subprocess.Popen`` call making sure that the
457 executable exists and raising a helpful error message
458 if it does not.
459
460 .. note:: This should be the preferred way of calling ``subprocess.Popen``
461 since it provides the caller with the safety net of making sure that
462 executables *will* be found and will error nicely otherwise.
463
464 This returns the output of the command and the return code of the
465 process in a tuple: (stdout, stderr, returncode).
466 """
467
468 arguments = list(map(_bytes2str, _get_command_executable(arguments)))
469
470 LOG.info('Running command: %s' % ' '.join(arguments))
471 process = subprocess.Popen(
472 arguments,
473 stdout=subprocess.PIPE,
474 stderr=subprocess.PIPE,
475 **kwargs)
476 out, err = process.communicate()
477
478 return _bytes2str(out), _bytes2str(err), process.returncode
479
480
481 def _bytes2str(string):
482 return string.decode('utf-8') if isinstance(string, bytes) else string
483
484
485 def command_init(arguments, **kwargs):
486 """
487 Safely execute a non-blocking ``subprocess.Popen`` call
488 making sure that the executable exists and raising a helpful
489 error message if it does not.
490
491 .. note:: This should be the preferred way of calling ``subprocess.Popen``
492 since it provides the caller with the safety net of making sure that
493 executables *will* be found and will error nicely otherwise.
494
495 This returns the process.
496 """
497
498 arguments = list(map(_bytes2str, _get_command_executable(arguments)))
499
500 LOG.info('Running command: %s' % ' '.join(arguments))
501 process = subprocess.Popen(
502 arguments,
503 stdout=subprocess.PIPE,
504 stderr=subprocess.PIPE,
505 **kwargs)
506 return process
507
508
509 def command_wait(process):
510 """
511 Wait for the process finish and parse its output.
512 """
513
514 out, err = process.communicate()
515
516 return _bytes2str(out), _bytes2str(err), process.returncode
517
518
519 def command_check_call(arguments, exit=False):
520 """
521 Safely execute a ``subprocess.check_call`` call making sure that the
522 executable exists and raising a helpful error message if it does not.
523
524 When ``exit`` is set to ``True`` this helper will do a clean (sans
525 traceback) system exit.
526 .. note:: This should be the preferred way of calling
527 ``subprocess.check_call`` since it provides the caller with the safety net
528 of making sure that executables *will* be found and will error nicely
529 otherwise.
530 """
531 arguments = _get_command_executable(arguments)
532 command = ' '.join(arguments)
533 LOG.info('Running command: %s', command)
534 try:
535 return subprocess.check_call(arguments)
536 except subprocess.CalledProcessError as error:
537 if exit:
538 if error.output:
539 LOG.error(error.output)
540 raise SystemExit(
541 "'{cmd}' failed with status code {returncode}".format(
542 cmd=command,
543 returncode=error.returncode,
544 )
545 )
546 raise
547
548
549 #
550 # An alternative block_path implementation would be
551 #
552 # name = basename(dev)
553 # return /sys/devices/virtual/block/$name
554 #
555 # It is however more fragile because it relies on the fact
556 # that the basename of the device the user will use always
557 # matches the one the driver will use. On Ubuntu 14.04, for
558 # instance, when multipath creates a partition table on
559 #
560 # /dev/mapper/353333330000007d0 -> ../dm-0
561 #
562 # it will create partition devices named
563 #
564 # /dev/mapper/353333330000007d0-part1
565 #
566 # which is the same device as /dev/dm-1 but not a symbolic
567 # link to it:
568 #
569 # ubuntu@other:~$ ls -l /dev/mapper /dev/dm-1
570 # brw-rw---- 1 root disk 252, 1 Aug 15 17:52 /dev/dm-1
571 # lrwxrwxrwx 1 root root 7 Aug 15 17:52 353333330000007d0 -> ../dm-0
572 # brw-rw---- 1 root disk 252, 1 Aug 15 17:52 353333330000007d0-part1
573 #
574 # Using the basename in this case fails.
575 #
576
577
578 def block_path(dev):
579 if FREEBSD:
580 return dev
581 path = os.path.realpath(dev)
582 rdev = os.stat(path).st_rdev
583 (M, m) = (os.major(rdev), os.minor(rdev))
584 return "{sysfs}/dev/block/{M}:{m}".format(sysfs=SYSFS, M=M, m=m)
585
586
587 def get_dm_uuid(dev):
588 uuid_path = os.path.join(block_path(dev), 'dm', 'uuid')
589 LOG.debug("get_dm_uuid " + dev + " uuid path is " + uuid_path)
590 if not os.path.exists(uuid_path):
591 return False
592 uuid = open(uuid_path, 'r').read()
593 LOG.debug("get_dm_uuid " + dev + " uuid is " + uuid)
594 return uuid
595
596
597 def is_mpath(dev):
598 """
599 True if the path is managed by multipath
600 """
601 if FREEBSD:
602 return False
603 uuid = get_dm_uuid(dev)
604 return (uuid and
605 (re.match('part\d+-mpath-', uuid) or
606 re.match('mpath-', uuid)))
607
608
609 def get_dev_name(path):
610 """
611 get device name from path. e.g.::
612
613 /dev/sda -> sda, /dev/cciss/c0d1 -> cciss!c0d1
614
615 a device "name" is something like::
616
617 sdb
618 cciss!c0d1
619
620 """
621 assert path.startswith('/dev/')
622 base = path[5:]
623 return base.replace('/', '!')
624
625
626 def get_dev_path(name):
627 """
628 get a path (/dev/...) from a name (cciss!c0d1)
629 a device "path" is something like::
630
631 /dev/sdb
632 /dev/cciss/c0d1
633
634 """
635 return '/dev/' + name.replace('!', '/')
636
637
638 def get_dev_relpath(name):
639 """
640 get a relative path to /dev from a name (cciss!c0d1)
641 """
642 return name.replace('!', '/')
643
644
645 def get_dev_size(dev, size='megabytes'):
646 """
647 Attempt to get the size of a device so that we can prevent errors
648 from actions to devices that are smaller, and improve error reporting.
649
650 Because we want to avoid breakage in case this approach is not robust, we
651 will issue a warning if we failed to get the size.
652
653 :param size: bytes or megabytes
654 :param dev: the device to calculate the size
655 """
656 fd = os.open(dev, os.O_RDONLY)
657 dividers = {'bytes': 1, 'megabytes': 1024 * 1024}
658 try:
659 device_size = os.lseek(fd, 0, os.SEEK_END)
660 divider = dividers.get(size, 1024 * 1024) # default to megabytes
661 return device_size // divider
662 except Exception as error:
663 LOG.warning('failed to get size of %s: %s' % (dev, str(error)))
664 finally:
665 os.close(fd)
666
667
668 def get_partition_mpath(dev, pnum):
669 part_re = "part{pnum}-mpath-".format(pnum=pnum)
670 partitions = list_partitions_mpath(dev, part_re)
671 if partitions:
672 return partitions[0]
673 else:
674 return None
675
676
677 def get_partition_dev(dev, pnum):
678 """
679 get the device name for a partition
680
681 assume that partitions are named like the base dev,
682 with a number, and optionally
683 some intervening characters (like 'p'). e.g.,
684
685 sda 1 -> sda1
686 cciss/c0d1 1 -> cciss!c0d1p1
687 """
688 max_retry = 10
689 for retry in range(0, max_retry + 1):
690 partname = None
691 error_msg = ""
692 if is_mpath(dev):
693 partname = get_partition_mpath(dev, pnum)
694 else:
695 name = get_dev_name(os.path.realpath(dev))
696 sys_entry = os.path.join(BLOCKDIR, name)
697 error_msg = " in %s" % sys_entry
698 for f in os.listdir(sys_entry):
699 if f.startswith(name) and f.endswith(str(pnum)):
700 # we want the shortest name that starts with the base name
701 # and ends with the partition number
702 if not partname or len(f) < len(partname):
703 partname = f
704 if partname:
705 if retry:
706 LOG.info('Found partition %d for %s after %d tries' %
707 (pnum, dev, retry))
708 return get_dev_path(partname)
709 else:
710 if retry < max_retry:
711 LOG.info('Try %d/%d : partition %d for %s does not exist%s' %
712 (retry + 1, max_retry, pnum, dev, error_msg))
713 time.sleep(.2)
714 continue
715 else:
716 raise Error('partition %d for %s does not appear to exist%s' %
717 (pnum, dev, error_msg))
718
719
720 def list_all_partitions():
721 """
722 Return a list of devices and partitions
723 """
724 if not FREEBSD:
725 names = os.listdir(BLOCKDIR)
726 dev_part_list = {}
727 for name in names:
728 # /dev/fd0 may hang http://tracker.ceph.com/issues/6827
729 if re.match(r'^fd\d$', name):
730 continue
731 dev_part_list[name] = list_partitions(get_dev_path(name))
732 else:
733 with open(os.path.join(PROCDIR, "partitions")) as partitions:
734 for line in partitions:
735 columns = line.split()
736 if len(columns) >= 4:
737 name = columns[3]
738 dev_part_list[name] = list_partitions(get_dev_path(name))
739 return dev_part_list
740
741
742 def list_partitions(dev):
743 dev = os.path.realpath(dev)
744 if is_mpath(dev):
745 return list_partitions_mpath(dev)
746 else:
747 return list_partitions_device(dev)
748
749
750 def list_partitions_mpath(dev, part_re="part\d+-mpath-"):
751 p = block_path(dev)
752 partitions = []
753 holders = os.path.join(p, 'holders')
754 for holder in os.listdir(holders):
755 uuid_path = os.path.join(holders, holder, 'dm', 'uuid')
756 uuid = open(uuid_path, 'r').read()
757 LOG.debug("list_partitions_mpath: " + uuid_path + " uuid = " + uuid)
758 if re.match(part_re, uuid):
759 partitions.append(holder)
760 return partitions
761
762
763 def list_partitions_device(dev):
764 """
765 Return a list of partitions on the given device name
766 """
767 partitions = []
768 basename = get_dev_name(dev)
769 for name in os.listdir(block_path(dev)):
770 if name.startswith(basename):
771 partitions.append(name)
772 return partitions
773
774
775 def get_partition_base(dev):
776 """
777 Get the base device for a partition
778 """
779 dev = os.path.realpath(dev)
780 if not stat.S_ISBLK(os.lstat(dev).st_mode):
781 raise Error('not a block device', dev)
782
783 name = get_dev_name(dev)
784 if os.path.exists(os.path.join('/sys/block', name)):
785 raise Error('not a partition', dev)
786
787 # find the base
788 for basename in os.listdir('/sys/block'):
789 if os.path.exists(os.path.join('/sys/block', basename, name)):
790 return get_dev_path(basename)
791 raise Error('no parent device for partition', dev)
792
793
794 def is_partition_mpath(dev):
795 uuid = get_dm_uuid(dev)
796 return bool(re.match('part\d+-mpath-', uuid))
797
798
799 def partnum_mpath(dev):
800 uuid = get_dm_uuid(dev)
801 return re.findall('part(\d+)-mpath-', uuid)[0]
802
803
804 def get_partition_base_mpath(dev):
805 slave_path = os.path.join(block_path(dev), 'slaves')
806 slaves = os.listdir(slave_path)
807 assert slaves
808 name_path = os.path.join(slave_path, slaves[0], 'dm', 'name')
809 name = open(name_path, 'r').read().strip()
810 return os.path.join('/dev/mapper', name)
811
812
813 def is_partition(dev):
814 """
815 Check whether a given device path is a partition or a full disk.
816 """
817 if is_mpath(dev):
818 return is_partition_mpath(dev)
819
820 dev = os.path.realpath(dev)
821 st = os.lstat(dev)
822 if not stat.S_ISBLK(st.st_mode):
823 raise Error('not a block device', dev)
824
825 name = get_dev_name(dev)
826 if os.path.exists(os.path.join(BLOCKDIR, name)):
827 return False
828
829 # make sure it is a partition of something else
830 major = os.major(st.st_rdev)
831 minor = os.minor(st.st_rdev)
832 if os.path.exists('/sys/dev/block/%d:%d/partition' % (major, minor)):
833 return True
834
835 raise Error('not a disk or partition', dev)
836
837
838 def is_mounted(dev):
839 """
840 Check if the given device is mounted.
841 """
842 dev = os.path.realpath(dev)
843 with open(PROCDIR + '/mounts', 'rb') as proc_mounts:
844 for line in proc_mounts:
845 fields = line.split()
846 if len(fields) < 3:
847 continue
848 mounts_dev = fields[0]
849 path = fields[1]
850 if os.path.isabs(mounts_dev) and os.path.exists(mounts_dev):
851 mounts_dev = os.path.realpath(mounts_dev)
852 if mounts_dev == dev:
853 return _bytes2str(path)
854 return None
855
856
857 def is_held(dev):
858 """
859 Check if a device is held by another device (e.g., a dm-crypt mapping)
860 """
861 assert os.path.exists(dev)
862 if is_mpath(dev):
863 return []
864
865 dev = os.path.realpath(dev)
866 base = get_dev_name(dev)
867
868 # full disk?
869 directory = '/sys/block/{base}/holders'.format(base=base)
870 if os.path.exists(directory):
871 return os.listdir(directory)
872
873 # partition?
874 part = base
875 while len(base):
876 directory = '/sys/block/{base}/{part}/holders'.format(
877 part=part, base=base)
878 if os.path.exists(directory):
879 return os.listdir(directory)
880 base = base[:-1]
881 return []
882
883
884 def verify_not_in_use(dev, check_partitions=False):
885 """
886 Verify if a given device (path) is in use (e.g. mounted or
887 in use by device-mapper).
888
889 :raises: Error if device is in use.
890 """
891 assert os.path.exists(dev)
892 if is_mounted(dev):
893 raise Error('Device is mounted', dev)
894 holders = is_held(dev)
895 if holders:
896 raise Error('Device %s is in use by a device-mapper '
897 'mapping (dm-crypt?)' % dev, ','.join(holders))
898
899 if check_partitions and not is_partition(dev):
900 for partname in list_partitions(dev):
901 partition = get_dev_path(partname)
902 if is_mounted(partition):
903 raise Error('Device is mounted', partition)
904 holders = is_held(partition)
905 if holders:
906 raise Error('Device %s is in use by a device-mapper '
907 'mapping (dm-crypt?)'
908 % partition, ','.join(holders))
909
910
911 def must_be_one_line(line):
912 """
913 Checks if given line is really one single line.
914
915 :raises: TruncatedLineError or TooManyLinesError
916 :return: Content of the line, or None if line isn't valid.
917 """
918 line = _bytes2str(line)
919
920 if line[-1:] != '\n':
921 raise TruncatedLineError(line)
922 line = line[:-1]
923 if '\n' in line:
924 raise TooManyLinesError(line)
925 return line
926
927
928 def read_one_line(parent, name):
929 """
930 Read a file whose sole contents are a single line.
931
932 Strips the newline.
933
934 :return: Contents of the line, or None if file did not exist.
935 """
936 path = os.path.join(parent, name)
937 try:
938 line = open(path, 'rb').read()
939 except IOError as e:
940 if e.errno == errno.ENOENT:
941 return None
942 else:
943 raise
944
945 try:
946 line = must_be_one_line(line)
947 except (TruncatedLineError, TooManyLinesError) as e:
948 raise Error(
949 'File is corrupt: {path}: {msg}'.format(
950 path=path,
951 msg=e,
952 )
953 )
954 return line
955
956
957 def write_one_line(parent, name, text):
958 """
959 Write a file whose sole contents are a single line.
960
961 Adds a newline.
962 """
963 path = os.path.join(parent, name)
964 tmp = '{path}.{pid}.tmp'.format(path=path, pid=os.getpid())
965 with open(tmp, 'wb') as tmp_file:
966 tmp_file.write(text.encode('utf-8') + b'\n')
967 os.fsync(tmp_file.fileno())
968 path_set_context(tmp)
969 os.rename(tmp, path)
970
971
972 def init_get():
973 """
974 Get a init system using 'ceph-detect-init'
975 """
976 init = _check_output(
977 args=[
978 'ceph-detect-init',
979 '--default', 'sysvinit',
980 ],
981 )
982 init = must_be_one_line(init)
983 return init
984
985
986 def check_osd_magic(path):
987 """
988 Check that this path has the Ceph OSD magic.
989
990 :raises: BadMagicError if this does not look like a Ceph OSD data
991 dir.
992 """
993 magic = read_one_line(path, 'magic')
994 if magic is None:
995 # probably not mkfs'ed yet
996 raise BadMagicError(path)
997 if magic != CEPH_OSD_ONDISK_MAGIC:
998 raise BadMagicError(path)
999
1000
1001 def check_osd_id(osd_id):
1002 """
1003 Ensures osd id is numeric.
1004 """
1005 if not re.match(r'^[0-9]+$', osd_id):
1006 raise Error('osd id is not numeric', osd_id)
1007
1008
1009 def allocate_osd_id(
1010 cluster,
1011 fsid,
1012 keyring,
1013 ):
1014 """
1015 Accocates an OSD id on the given cluster.
1016
1017 :raises: Error if the call to allocate the OSD id fails.
1018 :return: The allocated OSD id.
1019 """
1020
1021 LOG.debug('Allocating OSD id...')
1022 try:
1023 osd_id = _check_output(
1024 args=[
1025 'ceph',
1026 '--cluster', cluster,
1027 '--name', 'client.bootstrap-osd',
1028 '--keyring', keyring,
1029 'osd', 'create', '--concise',
1030 fsid,
1031 ],
1032 )
1033 except subprocess.CalledProcessError as e:
1034 raise Error('ceph osd create failed', e, e.output)
1035 osd_id = must_be_one_line(osd_id)
1036 check_osd_id(osd_id)
1037 return osd_id
1038
1039
1040 def get_osd_id(path):
1041 """
1042 Gets the OSD id of the OSD at the given path.
1043 """
1044 osd_id = read_one_line(path, 'whoami')
1045 if osd_id is not None:
1046 check_osd_id(osd_id)
1047 return osd_id
1048
1049
1050 def get_ceph_user():
1051 global CEPH_PREF_USER
1052
1053 if CEPH_PREF_USER is not None:
1054 try:
1055 pwd.getpwnam(CEPH_PREF_USER)
1056 return CEPH_PREF_USER
1057 except KeyError:
1058 print("No such user:", CEPH_PREF_USER)
1059 sys.exit(2)
1060 else:
1061 try:
1062 pwd.getpwnam('ceph')
1063 return 'ceph'
1064 except KeyError:
1065 return 'root'
1066
1067
1068 def get_ceph_group():
1069 global CEPH_PREF_GROUP
1070
1071 if CEPH_PREF_GROUP is not None:
1072 try:
1073 grp.getgrnam(CEPH_PREF_GROUP)
1074 return CEPH_PREF_GROUP
1075 except KeyError:
1076 print("No such group:", CEPH_PREF_GROUP)
1077 sys.exit(2)
1078 else:
1079 try:
1080 grp.getgrnam('ceph')
1081 return 'ceph'
1082 except KeyError:
1083 return 'root'
1084
1085
1086 def path_set_context(path):
1087 # restore selinux context to default policy values
1088 if which('restorecon'):
1089 command(['restorecon', '-R', path])
1090
1091 # if ceph user exists, set owner to ceph
1092 if get_ceph_user() == 'ceph':
1093 command(['chown', '-R', 'ceph:ceph', path])
1094
1095
1096 def _check_output(args=None, **kwargs):
1097 out, err, ret = command(args, **kwargs)
1098 if ret:
1099 cmd = args[0]
1100 error = subprocess.CalledProcessError(ret, cmd)
1101 error.output = out + err
1102 raise error
1103 return _bytes2str(out)
1104
1105
1106 def get_conf(cluster, variable):
1107 """
1108 Get the value of the given configuration variable from the
1109 cluster.
1110
1111 :raises: Error if call to ceph-conf fails.
1112 :return: The variable value or None.
1113 """
1114 try:
1115 out, err, ret = command(
1116 [
1117 'ceph-conf',
1118 '--cluster={cluster}'.format(
1119 cluster=cluster,
1120 ),
1121 '--name=osd.',
1122 '--lookup',
1123 variable,
1124 ],
1125 close_fds=True,
1126 )
1127 except OSError as e:
1128 raise Error('error executing ceph-conf', e, err)
1129 if ret == 1:
1130 # config entry not found
1131 return None
1132 elif ret != 0:
1133 raise Error('getting variable from configuration failed')
1134 value = out.split('\n', 1)[0]
1135 # don't differentiate between "var=" and no var set
1136 if not value:
1137 return None
1138 return value
1139
1140
1141 def get_conf_with_default(cluster, variable):
1142 """
1143 Get a config value that is known to the C++ code.
1144
1145 This will fail if called on variables that are not defined in
1146 common config options.
1147 """
1148 try:
1149 out = _check_output(
1150 args=[
1151 'ceph-osd',
1152 '--cluster={cluster}'.format(
1153 cluster=cluster,
1154 ),
1155 '--show-config-value={variable}'.format(
1156 variable=variable,
1157 ),
1158 ],
1159 close_fds=True,
1160 )
1161 except subprocess.CalledProcessError as e:
1162 raise Error(
1163 'getting variable from configuration failed',
1164 e,
1165 )
1166
1167 value = str(out).split('\n', 1)[0]
1168 return value
1169
1170
1171 def get_fsid(cluster):
1172 """
1173 Get the fsid of the cluster.
1174
1175 :return: The fsid or raises Error.
1176 """
1177 fsid = get_conf_with_default(cluster=cluster, variable='fsid')
1178 if fsid is None:
1179 raise Error('getting cluster uuid from configuration failed')
1180 return fsid.lower()
1181
1182
1183 def get_dmcrypt_key_path(
1184 _uuid,
1185 key_dir,
1186 luks
1187 ):
1188 """
1189 Get path to dmcrypt key file.
1190
1191 :return: Path to the dmcrypt key file, callers should check for existence.
1192 """
1193 if luks:
1194 path = os.path.join(key_dir, _uuid + ".luks.key")
1195 else:
1196 path = os.path.join(key_dir, _uuid)
1197
1198 return path
1199
1200
1201 def get_dmcrypt_key(
1202 _uuid,
1203 key_dir,
1204 luks
1205 ):
1206 legacy_path = get_dmcrypt_key_path(_uuid, key_dir, luks)
1207 if os.path.exists(legacy_path):
1208 return (legacy_path,)
1209 path = os.path.join(STATEDIR, 'osd-lockbox', _uuid)
1210 if os.path.exists(path):
1211 mode = get_oneliner(path, 'key-management-mode')
1212 osd_uuid = get_oneliner(path, 'osd-uuid')
1213 ceph_fsid = read_one_line(path, 'ceph_fsid')
1214 if ceph_fsid is None:
1215 raise Error('No cluster uuid assigned.')
1216 cluster = find_cluster_by_uuid(ceph_fsid)
1217 if cluster is None:
1218 raise Error('No cluster conf found in ' + SYSCONFDIR +
1219 ' with fsid %s' % ceph_fsid)
1220
1221 if mode == KEY_MANAGEMENT_MODE_V1:
1222 key, stderr, ret = command(
1223 [
1224 'ceph',
1225 '--cluster', cluster,
1226 '--name',
1227 'client.osd-lockbox.' + osd_uuid,
1228 '--keyring',
1229 os.path.join(path, 'keyring'),
1230 'config-key',
1231 'get',
1232 'dm-crypt/osd/' + osd_uuid + '/luks',
1233 ],
1234 )
1235 LOG.debug("stderr " + stderr)
1236 assert ret == 0
1237 return base64.b64decode(key)
1238 else:
1239 raise Error('unknown key-management-mode ' + str(mode))
1240 raise Error('unable to read dm-crypt key', path, legacy_path)
1241
1242
1243 def _dmcrypt_map(
1244 rawdev,
1245 key,
1246 _uuid,
1247 cryptsetup_parameters,
1248 luks,
1249 format_dev=False,
1250 ):
1251 dev = dmcrypt_is_mapped(_uuid)
1252 if dev:
1253 return dev
1254
1255 if isinstance(key, tuple):
1256 # legacy, before lockbox
1257 assert os.path.exists(key[0])
1258 keypath = key[0]
1259 key = None
1260 else:
1261 keypath = '-'
1262 dev = '/dev/mapper/' + _uuid
1263 luksFormat_args = [
1264 'cryptsetup',
1265 '--batch-mode',
1266 '--key-file',
1267 keypath,
1268 'luksFormat',
1269 rawdev,
1270 ] + cryptsetup_parameters
1271
1272 luksOpen_args = [
1273 'cryptsetup',
1274 '--key-file',
1275 keypath,
1276 'luksOpen',
1277 rawdev,
1278 _uuid,
1279 ]
1280
1281 create_args = [
1282 'cryptsetup',
1283 '--key-file',
1284 keypath,
1285 'create',
1286 _uuid,
1287 rawdev,
1288 ] + cryptsetup_parameters
1289
1290 def run(args, stdin):
1291 LOG.info(" ".join(args))
1292 process = subprocess.Popen(
1293 args,
1294 stdin=subprocess.PIPE,
1295 stdout=subprocess.PIPE,
1296 stderr=subprocess.PIPE)
1297 out, err = process.communicate(stdin)
1298 LOG.debug(out)
1299 LOG.error(err)
1300 assert process.returncode == 0
1301
1302 try:
1303 if luks:
1304 if format_dev:
1305 run(luksFormat_args, key)
1306 run(luksOpen_args, key)
1307 else:
1308 # Plain mode has no format function, nor any validation
1309 # that the key is correct.
1310 run(create_args, key)
1311 # set proper ownership of mapped device
1312 command_check_call(['chown', 'ceph:ceph', dev])
1313 return dev
1314
1315 except subprocess.CalledProcessError as e:
1316 raise Error('unable to map device', rawdev, e)
1317
1318
1319 def dmcrypt_unmap(
1320 _uuid
1321 ):
1322 if not os.path.exists('/dev/mapper/' + _uuid):
1323 return
1324 retries = 0
1325 while True:
1326 try:
1327 command_check_call(['cryptsetup', 'remove', _uuid])
1328 break
1329 except subprocess.CalledProcessError as e:
1330 if retries == 10:
1331 raise Error('unable to unmap device', _uuid, e)
1332 else:
1333 time.sleep(0.5 + retries * 1.0)
1334 retries += 1
1335
1336
1337 def mount(
1338 dev,
1339 fstype,
1340 options,
1341 ):
1342 """
1343 Mounts a device with given filessystem type and
1344 mount options to a tempfile path under /var/lib/ceph/tmp.
1345 """
1346 # sanity check: none of the arguments are None
1347 if dev is None:
1348 raise ValueError('dev may not be None')
1349 if fstype is None:
1350 raise ValueError('fstype may not be None')
1351
1352 # pick best-of-breed mount options based on fs type
1353 if options is None:
1354 options = MOUNT_OPTIONS.get(fstype, '')
1355
1356 myTemp = STATEDIR + '/tmp'
1357 # mkdtemp expect 'dir' to be existing on the system
1358 # Let's be sure it's always the case
1359 if not os.path.exists(myTemp):
1360 os.makedirs(myTemp)
1361
1362 # mount
1363 path = tempfile.mkdtemp(
1364 prefix='mnt.',
1365 dir=myTemp,
1366 )
1367 try:
1368 LOG.debug('Mounting %s on %s with options %s', dev, path, options)
1369 command_check_call(
1370 [
1371 'mount',
1372 '-t', fstype,
1373 '-o', options,
1374 '--',
1375 dev,
1376 path,
1377 ],
1378 )
1379 if which('restorecon'):
1380 command(
1381 [
1382 'restorecon',
1383 path,
1384 ],
1385 )
1386 except subprocess.CalledProcessError as e:
1387 try:
1388 os.rmdir(path)
1389 except (OSError, IOError):
1390 pass
1391 raise MountError(e)
1392
1393 return path
1394
1395
1396 def unmount(
1397 path,
1398 ):
1399 """
1400 Unmount and removes the given mount point.
1401 """
1402 retries = 0
1403 while True:
1404 try:
1405 LOG.debug('Unmounting %s', path)
1406 command_check_call(
1407 [
1408 '/bin/umount',
1409 '--',
1410 path,
1411 ],
1412 )
1413 break
1414 except subprocess.CalledProcessError as e:
1415 # on failure, retry 3 times with incremental backoff
1416 if retries == 3:
1417 raise UnmountError(e)
1418 else:
1419 time.sleep(0.5 + retries * 1.0)
1420 retries += 1
1421
1422 os.rmdir(path)
1423
1424
1425 ###########################################
1426
1427 def extract_parted_partition_numbers(partitions):
1428 numbers_as_strings = re.findall('^\d+', partitions, re.MULTILINE)
1429 return map(int, numbers_as_strings)
1430
1431
1432 def get_free_partition_index(dev):
1433 """
1434 Get the next free partition index on a given device.
1435
1436 :return: Index number (> 1 if there is already a partition on the device)
1437 or 1 if there is no partition table.
1438 """
1439 try:
1440 lines = _check_output(
1441 args=[
1442 'parted',
1443 '--machine',
1444 '--',
1445 dev,
1446 'print',
1447 ],
1448 )
1449 except subprocess.CalledProcessError as e:
1450 LOG.info('cannot read partition index; assume it '
1451 'isn\'t present\n (Error: %s)' % e)
1452 return 1
1453
1454 if not lines:
1455 raise Error('parted failed to output anything')
1456 LOG.debug('get_free_partition_index: analyzing ' + lines)
1457 if ('CHS;' not in lines and
1458 'CYL;' not in lines and
1459 'BYT;' not in lines):
1460 raise Error('parted output expected to contain one of ' +
1461 'CHH; CYL; or BYT; : ' + lines)
1462 if os.path.realpath(dev) not in lines:
1463 raise Error('parted output expected to contain ' + dev + ': ' + lines)
1464 _, partitions = lines.split(os.path.realpath(dev))
1465 partition_numbers = extract_parted_partition_numbers(partitions)
1466 if partition_numbers:
1467 return max(partition_numbers) + 1
1468 else:
1469 return 1
1470
1471
1472 def check_journal_reqs(args):
1473 _, _, allows_journal = command([
1474 'ceph-osd', '--check-allows-journal',
1475 '-i', '0',
1476 '--log-file', '$run_dir/$cluster-osd-check.log',
1477 '--cluster', args.cluster,
1478 '--setuser', get_ceph_user(),
1479 '--setgroup', get_ceph_group(),
1480 ])
1481 _, _, wants_journal = command([
1482 'ceph-osd', '--check-wants-journal',
1483 '-i', '0',
1484 '--log-file', '$run_dir/$cluster-osd-check.log',
1485 '--cluster', args.cluster,
1486 '--setuser', get_ceph_user(),
1487 '--setgroup', get_ceph_group(),
1488 ])
1489 _, _, needs_journal = command([
1490 'ceph-osd', '--check-needs-journal',
1491 '-i', '0',
1492 '--log-file', '$run_dir/$cluster-osd-check.log',
1493 '--cluster', args.cluster,
1494 '--setuser', get_ceph_user(),
1495 '--setgroup', get_ceph_group(),
1496 ])
1497 return (not allows_journal, not wants_journal, not needs_journal)
1498
1499
1500 def update_partition(dev, description):
1501 """
1502 Must be called after modifying a partition table so the kernel
1503 know about the change and fire udev events accordingly. A side
1504 effect of partprobe is to remove partitions and add them again.
1505 The first udevadm settle waits for ongoing udev events to
1506 complete, just in case one of them rely on an existing partition
1507 on dev. The second udevadm settle guarantees to the caller that
1508 all udev events related to the partition table change have been
1509 processed, i.e. the 95-ceph-osd.rules actions and mode changes,
1510 group changes etc. are complete.
1511 """
1512 LOG.debug('Calling partprobe on %s device %s', description, dev)
1513 partprobe_ok = False
1514 error = 'unknown error'
1515 partprobe = _get_command_executable(['partprobe'])[0]
1516 for i in range(5):
1517 command_check_call(['udevadm', 'settle', '--timeout=600'])
1518 try:
1519 _check_output(['flock', '-s', dev, partprobe, dev])
1520 partprobe_ok = True
1521 break
1522 except subprocess.CalledProcessError as e:
1523 error = e.output
1524 if ('unable to inform the kernel' not in error and
1525 'Device or resource busy' not in error):
1526 raise
1527 LOG.debug('partprobe %s failed : %s (ignored, waiting 60s)'
1528 % (dev, error))
1529 time.sleep(60)
1530 if not partprobe_ok:
1531 raise Error('partprobe %s failed : %s' % (dev, error))
1532 command_check_call(['udevadm', 'settle', '--timeout=600'])
1533
1534
1535 def zap(dev):
1536 """
1537 Destroy the partition table and content of a given disk.
1538 """
1539 dev = os.path.realpath(dev)
1540 dmode = os.stat(dev).st_mode
1541 if not stat.S_ISBLK(dmode) or is_partition(dev):
1542 raise Error('not full block device; cannot zap', dev)
1543 try:
1544 # Thoroughly wipe all partitions of any traces of
1545 # Filesystems or OSD Journals
1546 #
1547 # In addition we need to write 10M of data to each partition
1548 # to make sure that after re-creating the same partition
1549 # there is no trace left of any previous Filesystem or OSD
1550 # Journal
1551
1552 LOG.debug('Writing zeros to existing partitions on %s', dev)
1553
1554 for partname in list_partitions(dev):
1555 partition = get_dev_path(partname)
1556 command_check_call(
1557 [
1558 'wipefs',
1559 '--all',
1560 partition,
1561 ],
1562 )
1563
1564 command_check_call(
1565 [
1566 'dd',
1567 'if=/dev/zero',
1568 'of={path}'.format(path=partition),
1569 'bs=1M',
1570 'count=10',
1571 ],
1572 )
1573
1574 LOG.debug('Zapping partition table on %s', dev)
1575
1576 # try to wipe out any GPT partition table backups. sgdisk
1577 # isn't too thorough.
1578 lba_size = 4096
1579 size = 33 * lba_size
1580 with open(dev, 'wb') as dev_file:
1581 dev_file.seek(-size, os.SEEK_END)
1582 dev_file.write(size * b'\0')
1583
1584 command_check_call(
1585 [
1586 'sgdisk',
1587 '--zap-all',
1588 '--',
1589 dev,
1590 ],
1591 )
1592 command_check_call(
1593 [
1594 'sgdisk',
1595 '--clear',
1596 '--mbrtogpt',
1597 '--',
1598 dev,
1599 ],
1600 )
1601
1602 update_partition(dev, 'zapped')
1603
1604 except subprocess.CalledProcessError as e:
1605 raise Error(e)
1606
1607
1608 def adjust_symlink(target, path):
1609 create = True
1610 if os.path.lexists(path):
1611 try:
1612 mode = os.lstat(path).st_mode
1613 if stat.S_ISREG(mode):
1614 LOG.debug('Removing old file %s', path)
1615 os.unlink(path)
1616 elif stat.S_ISLNK(mode):
1617 old = os.readlink(path)
1618 if old != target:
1619 LOG.debug('Removing old symlink %s -> %s', path, old)
1620 os.unlink(path)
1621 else:
1622 create = False
1623 except:
1624 raise Error('unable to remove (or adjust) old file (symlink)',
1625 path)
1626 if create:
1627 LOG.debug('Creating symlink %s -> %s', path, target)
1628 try:
1629 os.symlink(target, path)
1630 except:
1631 raise Error('unable to create symlink %s -> %s' % (path, target))
1632
1633
1634 def get_mount_options(cluster, fs_type):
1635 mount_options = get_conf(
1636 cluster,
1637 variable='osd_mount_options_{fstype}'.format(
1638 fstype=fs_type,
1639 ),
1640 )
1641 if mount_options is None:
1642 mount_options = get_conf(
1643 cluster,
1644 variable='osd_fs_mount_options_{fstype}'.format(
1645 fstype=fs_type,
1646 ),
1647 )
1648 else:
1649 # remove whitespaces
1650 mount_options = "".join(mount_options.split())
1651 return mount_options
1652
1653
1654 class Device(object):
1655
1656 def __init__(self, path, args):
1657 self.args = args
1658 self.path = path
1659 self.dev_size = None
1660 self.partitions = {}
1661 self.ptype_map = None
1662 assert not is_partition(self.path)
1663
1664 def create_partition(self, uuid, name, size=0, num=0):
1665 ptype = self.ptype_tobe_for_name(name)
1666 if num == 0:
1667 num = get_free_partition_index(dev=self.path)
1668 if size > 0:
1669 new = '--new={num}:0:+{size}M'.format(num=num, size=size)
1670 if size > self.get_dev_size():
1671 LOG.error('refusing to create %s on %s' % (name, self.path))
1672 LOG.error('%s size (%sM) is bigger than device (%sM)'
1673 % (name, size, self.get_dev_size()))
1674 raise Error('%s device size (%sM) is not big enough for %s'
1675 % (self.path, self.get_dev_size(), name))
1676 else:
1677 new = '--largest-new={num}'.format(num=num)
1678
1679 LOG.debug('Creating %s partition num %d size %d on %s',
1680 name, num, size, self.path)
1681 command_check_call(
1682 [
1683 'sgdisk',
1684 new,
1685 '--change-name={num}:ceph {name}'.format(num=num, name=name),
1686 '--partition-guid={num}:{uuid}'.format(num=num, uuid=uuid),
1687 '--typecode={num}:{uuid}'.format(num=num, uuid=ptype),
1688 '--mbrtogpt',
1689 '--',
1690 self.path,
1691 ],
1692 exit=True
1693 )
1694 update_partition(self.path, 'created')
1695 return num
1696
1697 def ptype_tobe_for_name(self, name):
1698 LOG.debug("name = " + name)
1699 if name == 'data':
1700 name = 'osd'
1701 if name == 'lockbox':
1702 if is_mpath(self.path):
1703 return PTYPE['mpath']['lockbox']['tobe']
1704 else:
1705 return PTYPE['regular']['lockbox']['tobe']
1706 if self.ptype_map is None:
1707 partition = DevicePartition.factory(
1708 path=self.path, dev=None, args=self.args)
1709 self.ptype_map = partition.ptype_map
1710 return self.ptype_map[name]['tobe']
1711
1712 def get_partition(self, num):
1713 if num not in self.partitions:
1714 dev = get_partition_dev(self.path, num)
1715 partition = DevicePartition.factory(
1716 path=self.path, dev=dev, args=self.args)
1717 partition.set_partition_number(num)
1718 self.partitions[num] = partition
1719 return self.partitions[num]
1720
1721 def get_dev_size(self):
1722 if self.dev_size is None:
1723 self.dev_size = get_dev_size(self.path)
1724 return self.dev_size
1725
1726 @staticmethod
1727 def factory(path, args):
1728 return Device(path, args)
1729
1730
1731 class DevicePartition(object):
1732
1733 def __init__(self, args):
1734 self.args = args
1735 self.num = None
1736 self.rawdev = None
1737 self.dev = None
1738 self.uuid = None
1739 self.ptype_map = None
1740 self.ptype = None
1741 self.set_variables_ptype()
1742
1743 def get_uuid(self):
1744 if self.uuid is None:
1745 self.uuid = get_partition_uuid(self.rawdev)
1746 return self.uuid
1747
1748 def get_ptype(self):
1749 if self.ptype is None:
1750 self.ptype = get_partition_type(self.rawdev)
1751 return self.ptype
1752
1753 def set_partition_number(self, num):
1754 self.num = num
1755
1756 def get_partition_number(self):
1757 return self.num
1758
1759 def set_dev(self, dev):
1760 self.dev = dev
1761 self.rawdev = dev
1762
1763 def get_dev(self):
1764 return self.dev
1765
1766 def get_rawdev(self):
1767 return self.rawdev
1768
1769 def set_variables_ptype(self):
1770 self.ptype_map = PTYPE['regular']
1771
1772 def ptype_for_name(self, name):
1773 return self.ptype_map[name]['ready']
1774
1775 @staticmethod
1776 def factory(path, dev, args):
1777 dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
1778 if ((path is not None and is_mpath(path)) or
1779 (dev is not None and is_mpath(dev))):
1780 partition = DevicePartitionMultipath(args)
1781 elif dmcrypt_type == 'luks':
1782 partition = DevicePartitionCryptLuks(args)
1783 elif dmcrypt_type == 'plain':
1784 partition = DevicePartitionCryptPlain(args)
1785 else:
1786 partition = DevicePartition(args)
1787 partition.set_dev(dev)
1788 return partition
1789
1790
1791 class DevicePartitionMultipath(DevicePartition):
1792
1793 def set_variables_ptype(self):
1794 self.ptype_map = PTYPE['mpath']
1795
1796
1797 class DevicePartitionCrypt(DevicePartition):
1798
1799 def __init__(self, args):
1800 super(DevicePartitionCrypt, self).__init__(args)
1801 self.osd_dm_key = None
1802 self.cryptsetup_parameters = CryptHelpers.get_cryptsetup_parameters(
1803 self.args)
1804 self.dmcrypt_type = CryptHelpers.get_dmcrypt_type(self.args)
1805 self.dmcrypt_keysize = CryptHelpers.get_dmcrypt_keysize(self.args)
1806
1807 def setup_crypt(self):
1808 pass
1809
1810 def map(self):
1811 self.setup_crypt()
1812 self.dev = _dmcrypt_map(
1813 rawdev=self.rawdev,
1814 key=self.osd_dm_key,
1815 _uuid=self.get_uuid(),
1816 cryptsetup_parameters=self.cryptsetup_parameters,
1817 luks=self.luks(),
1818 format_dev=True,
1819 )
1820
1821 def unmap(self):
1822 self.setup_crypt()
1823 dmcrypt_unmap(self.get_uuid())
1824 self.dev = self.rawdev
1825
1826 def format(self):
1827 self.setup_crypt()
1828 self.map()
1829
1830
1831 class DevicePartitionCryptPlain(DevicePartitionCrypt):
1832
1833 def luks(self):
1834 return False
1835
1836 def setup_crypt(self):
1837 if self.osd_dm_key is not None:
1838 return
1839
1840 self.cryptsetup_parameters += ['--key-size', str(self.dmcrypt_keysize)]
1841
1842 self.osd_dm_key = get_dmcrypt_key(
1843 self.get_uuid(), self.args.dmcrypt_key_dir,
1844 False)
1845
1846 def set_variables_ptype(self):
1847 self.ptype_map = PTYPE['plain']
1848
1849
1850 class DevicePartitionCryptLuks(DevicePartitionCrypt):
1851
1852 def luks(self):
1853 return True
1854
1855 def setup_crypt(self):
1856 if self.osd_dm_key is not None:
1857 return
1858
1859 if self.dmcrypt_keysize == 1024:
1860 # We don't force this into the cryptsetup_parameters,
1861 # as we want the cryptsetup defaults
1862 # to prevail for the actual LUKS key lengths.
1863 pass
1864 else:
1865 self.cryptsetup_parameters += ['--key-size',
1866 str(self.dmcrypt_keysize)]
1867
1868 self.osd_dm_key = get_dmcrypt_key(
1869 self.get_uuid(), self.args.dmcrypt_key_dir,
1870 True)
1871
1872 def set_variables_ptype(self):
1873 self.ptype_map = PTYPE['luks']
1874
1875
1876 class Prepare(object):
1877
1878 def __init__(self, args):
1879 self.args = args
1880
1881 @staticmethod
1882 def parser():
1883 parser = argparse.ArgumentParser(add_help=False)
1884 parser.add_argument(
1885 '--cluster',
1886 metavar='NAME',
1887 default='ceph',
1888 help='cluster name to assign this disk to',
1889 )
1890 parser.add_argument(
1891 '--cluster-uuid',
1892 metavar='UUID',
1893 help='cluster uuid to assign this disk to',
1894 )
1895 parser.add_argument(
1896 '--osd-uuid',
1897 metavar='UUID',
1898 help='unique OSD uuid to assign this disk to',
1899 )
1900 parser.add_argument(
1901 '--crush-device-class',
1902 help='crush device class to assign this disk to',
1903 )
1904 parser.add_argument(
1905 '--dmcrypt',
1906 action='store_true', default=None,
1907 help='encrypt DATA and/or JOURNAL devices with dm-crypt',
1908 )
1909 parser.add_argument(
1910 '--dmcrypt-key-dir',
1911 metavar='KEYDIR',
1912 default='/etc/ceph/dmcrypt-keys',
1913 help='directory where dm-crypt keys are stored',
1914 )
1915 parser.add_argument(
1916 '--prepare-key',
1917 metavar='PATH',
1918 help='bootstrap-osd keyring path template (%(default)s)',
1919 default='{statedir}/bootstrap-osd/{cluster}.keyring',
1920 dest='prepare_key_template',
1921 )
1922 parser.add_argument(
1923 '--no-locking',
1924 action='store_true', default=None,
1925 help='let many prepare\'s run in parallel',
1926 )
1927 return parser
1928
1929 @staticmethod
1930 def set_subparser(subparsers):
1931 parents = [
1932 Prepare.parser(),
1933 PrepareData.parser(),
1934 Lockbox.parser(),
1935 ]
1936 parents.extend(PrepareFilestore.parent_parsers())
1937 parents.extend(PrepareBluestore.parent_parsers())
1938 parser = subparsers.add_parser(
1939 'prepare',
1940 parents=parents,
1941 formatter_class=argparse.RawDescriptionHelpFormatter,
1942 description=textwrap.fill(textwrap.dedent("""\
1943 If the --bluestore argument is given, a bluestore objectstore
1944 will be created. If --filestore is provided, a legacy FileStore
1945 objectstore will be created. If neither is specified, we default
1946 to BlueStore.
1947
1948 When an entire device is prepared for bluestore, two
1949 partitions are created. The first partition is for metadata,
1950 the second partition is for blocks that contain data.
1951
1952 Unless explicitly specified with --block.db or
1953 --block.wal, the bluestore DB and WAL data is stored on
1954 the main block device. For instance:
1955
1956 ceph-disk prepare --bluestore /dev/sdc
1957
1958 Will create
1959
1960 /dev/sdc1 for osd metadata
1961 /dev/sdc2 for block, db, and wal data (the rest of the disk)
1962
1963
1964 If either --block.db or --block.wal are specified to be
1965 the same whole device, they will be created as partition
1966 three and four respectively. For instance:
1967
1968 ceph-disk prepare --bluestore \\
1969 --block.db /dev/sdc \\
1970 --block.wal /dev/sdc \\
1971 /dev/sdc
1972
1973 Will create
1974
1975 /dev/sdc1 for osd metadata
1976 /dev/sdc2 for block (the rest of the disk)
1977 /dev/sdc3 for db
1978 /dev/sdc4 for wal
1979
1980 """)),
1981 help='Prepare a directory or disk for a Ceph OSD',
1982 )
1983 parser.set_defaults(
1984 func=Prepare.main,
1985 )
1986 return parser
1987
1988 def prepare(self):
1989 if self.args.no_locking:
1990 self._prepare()
1991 else:
1992 with prepare_lock:
1993 self._prepare()
1994
1995 @staticmethod
1996 def factory(args):
1997 if args.bluestore:
1998 return PrepareBluestore(args)
1999 else:
2000 return PrepareFilestore(args)
2001
2002 @staticmethod
2003 def main(args):
2004 Prepare.factory(args).prepare()
2005
2006
2007 class PrepareFilestore(Prepare):
2008
2009 def __init__(self, args):
2010 super(PrepareFilestore, self).__init__(args)
2011 if args.dmcrypt:
2012 self.lockbox = Lockbox(args)
2013 self.data = PrepareFilestoreData(args)
2014 self.journal = PrepareJournal(args)
2015
2016 @staticmethod
2017 def parent_parsers():
2018 return [
2019 PrepareJournal.parser(),
2020 ]
2021
2022 def _prepare(self):
2023 if self.data.args.dmcrypt:
2024 self.lockbox.prepare()
2025 self.data.prepare(self.journal)
2026
2027
2028 class PrepareBluestore(Prepare):
2029
2030 def __init__(self, args):
2031 super(PrepareBluestore, self).__init__(args)
2032 if args.dmcrypt:
2033 self.lockbox = Lockbox(args)
2034 self.data = PrepareBluestoreData(args)
2035 self.block = PrepareBluestoreBlock(args)
2036 self.blockdb = PrepareBluestoreBlockDB(args)
2037 self.blockwal = PrepareBluestoreBlockWAL(args)
2038
2039 @staticmethod
2040 def parser():
2041 parser = argparse.ArgumentParser(add_help=False)
2042 parser.add_argument(
2043 '--bluestore',
2044 dest='bluestore',
2045 action='store_true', default=True,
2046 help='bluestore objectstore',
2047 )
2048 parser.add_argument(
2049 '--filestore',
2050 dest='bluestore',
2051 action='store_false',
2052 help='filestore objectstore',
2053 )
2054 return parser
2055
2056 @staticmethod
2057 def parent_parsers():
2058 return [
2059 PrepareBluestore.parser(),
2060 PrepareBluestoreBlock.parser(),
2061 PrepareBluestoreBlockDB.parser(),
2062 PrepareBluestoreBlockWAL.parser(),
2063 ]
2064
2065 def _prepare(self):
2066 if self.data.args.dmcrypt:
2067 self.lockbox.prepare()
2068 to_prepare_list = []
2069 if getattr(self.data.args, 'block.db'):
2070 to_prepare_list.append(self.blockdb)
2071 if getattr(self.data.args, 'block.wal'):
2072 to_prepare_list.append(self.blockwal)
2073 to_prepare_list.append(self.block)
2074 self.data.prepare(*to_prepare_list)
2075
2076
2077 class Space(object):
2078
2079 NAMES = ('block', 'journal', 'block.db', 'block.wal')
2080
2081
2082 class PrepareSpace(object):
2083
2084 NONE = 0
2085 FILE = 1
2086 DEVICE = 2
2087
2088 def __init__(self, args):
2089 self.args = args
2090 self.set_type()
2091 self.space_size = self.get_space_size()
2092 if getattr(self.args, self.name + '_uuid') is None:
2093 setattr(self.args, self.name + '_uuid', str(uuid.uuid4()))
2094 self.space_symlink = None
2095 self.space_dmcrypt = None
2096
2097 def set_type(self):
2098 name = self.name
2099 args = self.args
2100 dmode = os.stat(args.data).st_mode
2101 if (self.wants_space() and
2102 stat.S_ISBLK(dmode) and
2103 not is_partition(args.data) and
2104 getattr(args, name) is None and
2105 getattr(args, name + '_file') is None):
2106 LOG.info('Will colocate %s with data on %s',
2107 name, args.data)
2108 setattr(args, name, args.data)
2109
2110 if getattr(args, name) is None:
2111 if getattr(args, name + '_dev'):
2112 raise Error('%s is unspecified; not a block device' %
2113 name.capitalize(), getattr(args, name))
2114 self.type = self.NONE
2115 return
2116
2117 if not os.path.exists(getattr(args, name)):
2118 if getattr(args, name + '_dev'):
2119 raise Error('%s does not exist; not a block device' %
2120 name.capitalize(), getattr(args, name))
2121 self.type = self.FILE
2122 return
2123
2124 mode = os.stat(getattr(args, name)).st_mode
2125 if stat.S_ISBLK(mode):
2126 if getattr(args, name + '_file'):
2127 raise Error('%s is not a regular file' % name.capitalize,
2128 getattr(args, name))
2129 self.type = self.DEVICE
2130 return
2131
2132 if stat.S_ISREG(mode):
2133 if getattr(args, name + '_dev'):
2134 raise Error('%s is not a block device' % name.capitalize,
2135 getattr(args, name))
2136 self.type = self.FILE
2137 return
2138
2139 raise Error('%s %s is neither a block device nor regular file' %
2140 (name.capitalize, getattr(args, name)))
2141
2142 def is_none(self):
2143 return self.type == self.NONE
2144
2145 def is_file(self):
2146 return self.type == self.FILE
2147
2148 def is_device(self):
2149 return self.type == self.DEVICE
2150
2151 @staticmethod
2152 def parser(name, positional=True):
2153 parser = argparse.ArgumentParser(add_help=False)
2154 parser.add_argument(
2155 '--%s-uuid' % name,
2156 metavar='UUID',
2157 help='unique uuid to assign to the %s' % name,
2158 )
2159 parser.add_argument(
2160 '--%s-file' % name,
2161 action='store_true', default=None,
2162 help='verify that %s is a file' % name.upper(),
2163 )
2164 parser.add_argument(
2165 '--%s-dev' % name,
2166 action='store_true', default=None,
2167 help='verify that %s is a block device' % name.upper(),
2168 )
2169
2170 if positional:
2171 parser.add_argument(
2172 name,
2173 metavar=name.upper(),
2174 nargs='?',
2175 help=('path to OSD %s disk block device;' % name +
2176 ' leave out to store %s in file' % name),
2177 )
2178 return parser
2179
2180 def wants_space(self):
2181 return True
2182
2183 def populate_data_path(self, path):
2184 if self.type == self.DEVICE:
2185 self.populate_data_path_device(path)
2186 elif self.type == self.FILE:
2187 self.populate_data_path_file(path)
2188 elif self.type == self.NONE:
2189 pass
2190 else:
2191 raise Error('unexpected type ', self.type)
2192
2193 def populate_data_path_file(self, path):
2194 space_uuid = self.name + '_uuid'
2195 if getattr(self.args, space_uuid) is not None:
2196 write_one_line(path, space_uuid,
2197 getattr(self.args, space_uuid))
2198 if self.space_symlink is not None:
2199 adjust_symlink(self.space_symlink,
2200 os.path.join(path, self.name))
2201
2202 def populate_data_path_device(self, path):
2203 self.populate_data_path_file(path)
2204
2205 if self.space_dmcrypt is not None:
2206 adjust_symlink(self.space_dmcrypt,
2207 os.path.join(path, self.name + '_dmcrypt'))
2208 else:
2209 try:
2210 os.unlink(os.path.join(path, self.name + '_dmcrypt'))
2211 except OSError:
2212 pass
2213
2214 def prepare(self):
2215 if self.type == self.DEVICE:
2216 self.prepare_device()
2217 elif self.type == self.FILE:
2218 self.prepare_file()
2219 elif self.type == self.NONE:
2220 pass
2221 else:
2222 raise Error('unexpected type ', self.type)
2223
2224 def prepare_file(self):
2225 space_filename = getattr(self.args, self.name)
2226 if not os.path.exists(space_filename):
2227 LOG.debug('Creating %s file %s with size 0'
2228 ' (ceph-osd will resize and allocate)',
2229 self.name,
2230 space_filename)
2231 space_file = open(space_filename, 'wb')
2232 space_file.close()
2233 path_set_context(space_filename)
2234
2235 LOG.debug('%s is file %s',
2236 self.name.capitalize(),
2237 space_filename)
2238 LOG.warning('OSD will not be hot-swappable if %s is '
2239 'not the same device as the osd data' %
2240 self.name)
2241 self.space_symlink = space_filename
2242
2243 def prepare_device(self):
2244 reusing_partition = False
2245
2246 if is_partition(getattr(self.args, self.name)):
2247 LOG.debug('%s %s is a partition',
2248 self.name.capitalize(), getattr(self.args, self.name))
2249 partition = DevicePartition.factory(
2250 path=None, dev=getattr(self.args, self.name), args=self.args)
2251 if isinstance(partition, DevicePartitionCrypt):
2252 raise Error(getattr(self.args, self.name) +
2253 ' partition already exists'
2254 ' and --dmcrypt specified')
2255 LOG.warning('OSD will not be hot-swappable' +
2256 ' if ' + self.name + ' is not' +
2257 ' the same device as the osd data')
2258 if partition.get_ptype() == partition.ptype_for_name(self.name):
2259 LOG.debug('%s %s was previously prepared with '
2260 'ceph-disk. Reusing it.',
2261 self.name.capitalize(),
2262 getattr(self.args, self.name))
2263 reusing_partition = True
2264 # Read and reuse the partition uuid from this journal's
2265 # previous life. We reuse the uuid instead of changing it
2266 # because udev does not reliably notice changes to an
2267 # existing partition's GUID. See
2268 # http://tracker.ceph.com/issues/10146
2269 setattr(self.args, self.name + '_uuid', partition.get_uuid())
2270 LOG.debug('Reusing %s with uuid %s',
2271 self.name,
2272 getattr(self.args, self.name + '_uuid'))
2273 else:
2274 LOG.warning('%s %s was not prepared with '
2275 'ceph-disk. Symlinking directly.',
2276 self.name.capitalize(),
2277 getattr(self.args, self.name))
2278 self.space_symlink = getattr(self.args, self.name)
2279 return
2280
2281 self.space_symlink = '/dev/disk/by-partuuid/{uuid}'.format(
2282 uuid=getattr(self.args, self.name + '_uuid'))
2283
2284 if self.args.dmcrypt:
2285 self.space_dmcrypt = self.space_symlink
2286 self.space_symlink = '/dev/mapper/{uuid}'.format(
2287 uuid=getattr(self.args, self.name + '_uuid'))
2288
2289 if reusing_partition:
2290 # confirm that the space_symlink exists. It should since
2291 # this was an active space
2292 # in the past. Continuing otherwise would be futile.
2293 assert os.path.exists(self.space_symlink)
2294 return
2295
2296 num = self.desired_partition_number()
2297
2298 if num == 0:
2299 LOG.warning('OSD will not be hot-swappable if %s '
2300 'is not the same device as the osd data',
2301 self.name)
2302
2303 device = Device.factory(getattr(self.args, self.name), self.args)
2304 num = device.create_partition(
2305 uuid=getattr(self.args, self.name + '_uuid'),
2306 name=self.name,
2307 size=self.space_size,
2308 num=num)
2309
2310 partition = device.get_partition(num)
2311
2312 LOG.debug('%s is GPT partition %s',
2313 self.name.capitalize(),
2314 self.space_symlink)
2315
2316 if isinstance(partition, DevicePartitionCrypt):
2317 partition.format()
2318 partition.map()
2319
2320 command_check_call(
2321 [
2322 'sgdisk',
2323 '--typecode={num}:{uuid}'.format(
2324 num=num,
2325 uuid=partition.ptype_for_name(self.name),
2326 ),
2327 '--',
2328 getattr(self.args, self.name),
2329 ],
2330 )
2331 update_partition(getattr(self.args, self.name), 'prepared')
2332
2333 LOG.debug('%s is GPT partition %s',
2334 self.name.capitalize(),
2335 self.space_symlink)
2336
2337
2338 class PrepareJournal(PrepareSpace):
2339
2340 def __init__(self, args):
2341 self.name = 'journal'
2342 (self.allows_journal,
2343 self.wants_journal,
2344 self.needs_journal) = check_journal_reqs(args)
2345
2346 if args.journal and not self.allows_journal:
2347 raise Error('journal specified but not allowed by osd backend')
2348
2349 super(PrepareJournal, self).__init__(args)
2350
2351 def wants_space(self):
2352 return self.wants_journal
2353
2354 def get_space_size(self):
2355 return int(get_conf_with_default(
2356 cluster=self.args.cluster,
2357 variable='osd_journal_size',
2358 ))
2359
2360 def desired_partition_number(self):
2361 if self.args.journal == self.args.data:
2362 # we're sharing the disk between osd data and journal;
2363 # make journal be partition number 2
2364 num = 2
2365 else:
2366 num = 0
2367 return num
2368
2369 @staticmethod
2370 def parser():
2371 return PrepareSpace.parser('journal')
2372
2373
2374 class PrepareBluestoreBlock(PrepareSpace):
2375
2376 def __init__(self, args):
2377 self.name = 'block'
2378 super(PrepareBluestoreBlock, self).__init__(args)
2379
2380 def get_space_size(self):
2381 block_size = get_conf(
2382 cluster=self.args.cluster,
2383 variable='bluestore_block_size',
2384 )
2385
2386 if block_size is None:
2387 return 0 # get as much space as possible
2388 else:
2389 return int(block_size) / 1048576 # MB
2390
2391 def desired_partition_number(self):
2392 if self.args.block == self.args.data:
2393 num = 2
2394 else:
2395 num = 0
2396 return num
2397
2398 @staticmethod
2399 def parser():
2400 return PrepareSpace.parser('block')
2401
2402
2403 class PrepareBluestoreBlockDB(PrepareSpace):
2404
2405 def __init__(self, args):
2406 self.name = 'block.db'
2407 super(PrepareBluestoreBlockDB, self).__init__(args)
2408
2409 def get_space_size(self):
2410 block_db_size = get_conf(
2411 cluster=self.args.cluster,
2412 variable='bluestore_block_db_size',
2413 )
2414
2415 if block_db_size is None or int(block_db_size) == 0:
2416 block_size = get_conf(
2417 cluster=self.args.cluster,
2418 variable='bluestore_block_size',
2419 )
2420 if block_size is None:
2421 return 1024 # MB
2422 size = int(block_size) / 100 / 1048576
2423 return max(size, 1024) # MB
2424 else:
2425 return int(block_db_size) / 1048576 # MB
2426
2427 def desired_partition_number(self):
2428 if getattr(self.args, 'block.db') == self.args.data:
2429 num = 3
2430 else:
2431 num = 0
2432 return num
2433
2434 def wants_space(self):
2435 return False
2436
2437 @staticmethod
2438 def parser():
2439 parser = PrepareSpace.parser('block.db', positional=False)
2440 parser.add_argument(
2441 '--block.db',
2442 metavar='BLOCKDB',
2443 help='path to the device or file for bluestore block.db',
2444 )
2445 return parser
2446
2447
2448 class PrepareBluestoreBlockWAL(PrepareSpace):
2449
2450 def __init__(self, args):
2451 self.name = 'block.wal'
2452 super(PrepareBluestoreBlockWAL, self).__init__(args)
2453
2454 def get_space_size(self):
2455 block_size = get_conf(
2456 cluster=self.args.cluster,
2457 variable='bluestore_block_wal_size',
2458 )
2459
2460 if block_size is None:
2461 return 576 # MB, default value
2462 else:
2463 return int(block_size) / 1048576 # MB
2464
2465 def desired_partition_number(self):
2466 if getattr(self.args, 'block.wal') == self.args.data:
2467 num = 4
2468 else:
2469 num = 0
2470 return num
2471
2472 def wants_space(self):
2473 return False
2474
2475 @staticmethod
2476 def parser():
2477 parser = PrepareSpace.parser('block.wal', positional=False)
2478 parser.add_argument(
2479 '--block.wal',
2480 metavar='BLOCKWAL',
2481 help='path to the device or file for bluestore block.wal',
2482 )
2483 return parser
2484
2485
2486 class CryptHelpers(object):
2487
2488 @staticmethod
2489 def get_cryptsetup_parameters(args):
2490 cryptsetup_parameters_str = get_conf(
2491 cluster=args.cluster,
2492 variable='osd_cryptsetup_parameters',
2493 )
2494 if cryptsetup_parameters_str is None:
2495 return []
2496 else:
2497 return shlex.split(cryptsetup_parameters_str)
2498
2499 @staticmethod
2500 def get_dmcrypt_keysize(args):
2501 dmcrypt_keysize_str = get_conf(
2502 cluster=args.cluster,
2503 variable='osd_dmcrypt_key_size',
2504 )
2505 dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
2506 if dmcrypt_type == 'luks':
2507 if dmcrypt_keysize_str is None:
2508 # As LUKS will hash the 'passphrase' in .luks.key
2509 # into a key, set a large default
2510 # so if not updated for some time, it is still a
2511 # reasonable value.
2512 #
2513 return 1024
2514 else:
2515 return int(dmcrypt_keysize_str)
2516 elif dmcrypt_type == 'plain':
2517 if dmcrypt_keysize_str is None:
2518 # This value is hard-coded in the udev script
2519 return 256
2520 else:
2521 LOG.warning('ensure the 95-ceph-osd.rules file has '
2522 'been copied to /etc/udev/rules.d '
2523 'and modified to call cryptsetup '
2524 'with --key-size=%s' % dmcrypt_keysize_str)
2525 return int(dmcrypt_keysize_str)
2526 else:
2527 return 0
2528
2529 @staticmethod
2530 def get_dmcrypt_type(args):
2531 if hasattr(args, 'dmcrypt') and args.dmcrypt:
2532 dmcrypt_type = get_conf(
2533 cluster=args.cluster,
2534 variable='osd_dmcrypt_type',
2535 )
2536
2537 if dmcrypt_type is None or dmcrypt_type == 'luks':
2538 return 'luks'
2539 elif dmcrypt_type == 'plain':
2540 return 'plain'
2541 else:
2542 raise Error('invalid osd_dmcrypt_type parameter '
2543 '(must be luks or plain): ', dmcrypt_type)
2544 else:
2545 return None
2546
2547
2548 class Lockbox(object):
2549
2550 def __init__(self, args):
2551 self.args = args
2552 self.partition = None
2553 self.device = None
2554
2555 if hasattr(self.args, 'lockbox') and self.args.lockbox is None:
2556 self.args.lockbox = self.args.data
2557
2558 def set_partition(self, partition):
2559 self.partition = partition
2560
2561 @staticmethod
2562 def parser():
2563 parser = argparse.ArgumentParser(add_help=False)
2564 parser.add_argument(
2565 '--lockbox',
2566 help='path to the device to store the lockbox',
2567 )
2568 parser.add_argument(
2569 '--lockbox-uuid',
2570 metavar='UUID',
2571 help='unique lockbox uuid',
2572 )
2573 return parser
2574
2575 def create_partition(self):
2576 self.device = Device.factory(self.args.lockbox, argparse.Namespace())
2577 partition_number = 3
2578 self.device.create_partition(uuid=self.args.lockbox_uuid,
2579 name='lockbox',
2580 num=partition_number,
2581 size=10) # MB
2582 return self.device.get_partition(partition_number)
2583
2584 def set_or_create_partition(self):
2585 if is_partition(self.args.lockbox):
2586 LOG.debug('OSD lockbox device %s is a partition',
2587 self.args.lockbox)
2588 self.partition = DevicePartition.factory(
2589 path=None, dev=self.args.lockbox, args=self.args)
2590 ptype = self.partition.get_ptype()
2591 ready = Ptype.get_ready_by_name('lockbox')
2592 if ptype not in ready:
2593 LOG.warning('incorrect partition UUID: %s, expected %s'
2594 % (ptype, str(ready)))
2595 else:
2596 LOG.debug('Creating osd partition on %s',
2597 self.args.lockbox)
2598 self.partition = self.create_partition()
2599
2600 def create_key(self):
2601 key_size = CryptHelpers.get_dmcrypt_keysize(self.args)
2602 key = open('/dev/urandom', 'rb').read(key_size / 8)
2603 base64_key = base64.b64encode(key)
2604 cluster = self.args.cluster
2605 bootstrap = self.args.prepare_key_template.format(cluster=cluster,
2606 statedir=STATEDIR)
2607 command_check_call(
2608 [
2609 'ceph',
2610 '--cluster', cluster,
2611 '--name', 'client.bootstrap-osd',
2612 '--keyring', bootstrap,
2613 'config-key',
2614 'put',
2615 'dm-crypt/osd/' + self.args.osd_uuid + '/luks',
2616 base64_key,
2617 ],
2618 )
2619 keyring, stderr, ret = command(
2620 [
2621 'ceph',
2622 '--cluster', cluster,
2623 '--name', 'client.bootstrap-osd',
2624 '--keyring', bootstrap,
2625 'auth',
2626 'get-or-create',
2627 'client.osd-lockbox.' + self.args.osd_uuid,
2628 'mon',
2629 ('allow command "config-key get" with key="dm-crypt/osd/' +
2630 self.args.osd_uuid + '/luks"'),
2631 ],
2632 )
2633 LOG.debug("stderr " + stderr)
2634 assert ret == 0
2635 path = self.get_mount_point()
2636 open(os.path.join(path, 'keyring'), 'w').write(keyring)
2637 write_one_line(path, 'key-management-mode', KEY_MANAGEMENT_MODE_V1)
2638
2639 def symlink_spaces(self, path):
2640 target = self.get_mount_point()
2641 for name in Space.NAMES:
2642 if (hasattr(self.args, name + '_uuid') and
2643 getattr(self.args, name + '_uuid')):
2644 uuid = getattr(self.args, name + '_uuid')
2645 symlink = os.path.join(STATEDIR, 'osd-lockbox', uuid)
2646 adjust_symlink(target, symlink)
2647 write_one_line(path, name + '-uuid', uuid)
2648
2649 def populate(self):
2650 maybe_mkdir(os.path.join(STATEDIR, 'osd-lockbox'))
2651 args = ['mkfs', '-t', 'ext4', self.partition.get_dev()]
2652 LOG.debug('Creating lockbox fs on %s: ' + str(" ".join(args)))
2653 command_check_call(args)
2654 path = self.get_mount_point()
2655 maybe_mkdir(path)
2656 args = ['mount', '-t', 'ext4', self.partition.get_dev(), path]
2657 LOG.debug('Mounting lockbox ' + str(" ".join(args)))
2658 command_check_call(args)
2659 write_one_line(path, 'osd-uuid', self.args.osd_uuid)
2660 if self.args.cluster_uuid is None:
2661 self.args.cluster_uuid = get_fsid(cluster=self.args.cluster)
2662 write_one_line(path, 'ceph_fsid', self.args.cluster_uuid)
2663 self.create_key()
2664 self.symlink_spaces(path)
2665 write_one_line(path, 'magic', CEPH_LOCKBOX_ONDISK_MAGIC)
2666 if self.device is not None:
2667 command_check_call(
2668 [
2669 'sgdisk',
2670 '--typecode={num}:{uuid}'.format(
2671 num=self.partition.get_partition_number(),
2672 uuid=self.partition.ptype_for_name('lockbox'),
2673 ),
2674 '--',
2675 get_partition_base(self.partition.get_dev()),
2676 ],
2677 )
2678
2679 def get_mount_point(self):
2680 return os.path.join(STATEDIR, 'osd-lockbox', self.args.osd_uuid)
2681
2682 def get_osd_uuid(self):
2683 return self.args.osd_uuid
2684
2685 def activate(self):
2686 path = is_mounted(self.partition.get_dev())
2687 if path:
2688 LOG.info("Lockbox already mounted at " + path)
2689 return
2690
2691 path = tempfile.mkdtemp(
2692 prefix='mnt.',
2693 dir=STATEDIR + '/tmp',
2694 )
2695 args = ['mount', '-t', 'ext4', '-o', 'ro',
2696 self.partition.get_dev(),
2697 path]
2698 LOG.debug('Mounting lockbox temporarily ' + str(" ".join(args)))
2699 command_check_call(args)
2700 self.args.osd_uuid = get_oneliner(path, 'osd-uuid')
2701 command_check_call(['umount', path])
2702 LOG.debug('Mounting lockbox readonly ' + str(" ".join(args)))
2703 args = ['mount', '-t', 'ext4', '-o', 'ro',
2704 self.partition.get_dev(),
2705 self.get_mount_point()]
2706 command_check_call(args)
2707 for name in Space.NAMES + ('osd',):
2708 uuid_path = os.path.join(self.get_mount_point(), name + '-uuid')
2709 if os.path.exists(uuid_path):
2710 uuid = get_oneliner(self.get_mount_point(), name + '-uuid')
2711 dev = os.path.join('/dev/disk/by-partuuid/', uuid.lower())
2712 args = ['ceph-disk', 'trigger', dev]
2713 command_check_call(args)
2714
2715 def prepare(self):
2716 verify_not_in_use(self.args.lockbox, check_partitions=True)
2717 self.set_or_create_partition()
2718 self.populate()
2719
2720
2721 class PrepareData(object):
2722
2723 FILE = 1
2724 DEVICE = 2
2725
2726 def __init__(self, args):
2727
2728 self.args = args
2729 self.partition = None
2730 self.set_type()
2731 if self.args.cluster_uuid is None:
2732 self.args.cluster_uuid = get_fsid(cluster=self.args.cluster)
2733
2734 if self.args.osd_uuid is None:
2735 self.args.osd_uuid = str(uuid.uuid4())
2736
2737 def set_type(self):
2738 dmode = os.stat(self.args.data).st_mode
2739
2740 if stat.S_ISDIR(dmode):
2741 self.type = self.FILE
2742 elif stat.S_ISBLK(dmode):
2743 self.type = self.DEVICE
2744 else:
2745 raise Error('not a dir or block device', self.args.data)
2746
2747 def is_file(self):
2748 return self.type == self.FILE
2749
2750 def is_device(self):
2751 return self.type == self.DEVICE
2752
2753 @staticmethod
2754 def parser():
2755 parser = argparse.ArgumentParser(add_help=False)
2756 parser.add_argument(
2757 '--fs-type',
2758 help='file system type to use (e.g. "ext4")',
2759 )
2760 parser.add_argument(
2761 '--zap-disk',
2762 action='store_true', default=None,
2763 help='destroy the partition table (and content) of a disk',
2764 )
2765 parser.add_argument(
2766 '--data-dir',
2767 action='store_true', default=None,
2768 help='verify that DATA is a dir',
2769 )
2770 parser.add_argument(
2771 '--data-dev',
2772 action='store_true', default=None,
2773 help='verify that DATA is a block device',
2774 )
2775 parser.add_argument(
2776 'data',
2777 metavar='DATA',
2778 help='path to OSD data (a disk block device or directory)',
2779 )
2780 return parser
2781
2782 def populate_data_path_file(self, path, *to_prepare_list):
2783 self.populate_data_path(path, *to_prepare_list)
2784
2785 def populate_data_path(self, path, *to_prepare_list):
2786 if os.path.exists(os.path.join(path, 'magic')):
2787 LOG.debug('Data dir %s already exists', path)
2788 return
2789 else:
2790 LOG.debug('Preparing osd data dir %s', path)
2791
2792 if self.args.osd_uuid is None:
2793 self.args.osd_uuid = str(uuid.uuid4())
2794
2795 write_one_line(path, 'ceph_fsid', self.args.cluster_uuid)
2796 write_one_line(path, 'fsid', self.args.osd_uuid)
2797 if self.args.crush_device_class:
2798 write_one_line(path, 'crush_device_class',
2799 self.args.crush_device_class)
2800 write_one_line(path, 'magic', CEPH_OSD_ONDISK_MAGIC)
2801
2802 for to_prepare in to_prepare_list:
2803 to_prepare.populate_data_path(path)
2804
2805 def prepare(self, *to_prepare_list):
2806 if self.type == self.DEVICE:
2807 self.prepare_device(*to_prepare_list)
2808 elif self.type == self.FILE:
2809 self.prepare_file(*to_prepare_list)
2810 else:
2811 raise Error('unexpected type ', self.type)
2812
2813 def prepare_file(self, *to_prepare_list):
2814
2815 if not os.path.exists(self.args.data):
2816 raise Error('data path for directory does not exist',
2817 self.args.data)
2818
2819 if self.args.data_dev:
2820 raise Error('data path is not a block device', self.args.data)
2821
2822 for to_prepare in to_prepare_list:
2823 to_prepare.prepare()
2824
2825 self.populate_data_path_file(self.args.data, *to_prepare_list)
2826
2827 def sanity_checks(self):
2828 if not os.path.exists(self.args.data):
2829 raise Error('data path for device does not exist',
2830 self.args.data)
2831 verify_not_in_use(self.args.data,
2832 check_partitions=not self.args.dmcrypt)
2833
2834 def set_variables(self):
2835 if self.args.fs_type is None:
2836 self.args.fs_type = get_conf(
2837 cluster=self.args.cluster,
2838 variable='osd_mkfs_type',
2839 )
2840 if self.args.fs_type is None:
2841 self.args.fs_type = get_conf(
2842 cluster=self.args.cluster,
2843 variable='osd_fs_type',
2844 )
2845 if self.args.fs_type is None:
2846 self.args.fs_type = DEFAULT_FS_TYPE
2847
2848 self.mkfs_args = get_conf(
2849 cluster=self.args.cluster,
2850 variable='osd_mkfs_options_{fstype}'.format(
2851 fstype=self.args.fs_type,
2852 ),
2853 )
2854 if self.mkfs_args is None:
2855 self.mkfs_args = get_conf(
2856 cluster=self.args.cluster,
2857 variable='osd_fs_mkfs_options_{fstype}'.format(
2858 fstype=self.args.fs_type,
2859 ),
2860 )
2861
2862 self.mount_options = get_mount_options(cluster=self.args.cluster,
2863 fs_type=self.args.fs_type)
2864
2865 if self.args.osd_uuid is None:
2866 self.args.osd_uuid = str(uuid.uuid4())
2867
2868 def prepare_device(self, *to_prepare_list):
2869 self.sanity_checks()
2870 self.set_variables()
2871 if self.args.zap_disk is not None:
2872 zap(self.args.data)
2873
2874 def create_data_partition(self):
2875 device = Device.factory(self.args.data, self.args)
2876 partition_number = 1
2877 device.create_partition(uuid=self.args.osd_uuid,
2878 name='data',
2879 num=partition_number,
2880 size=self.get_space_size())
2881 return device.get_partition(partition_number)
2882
2883 def set_data_partition(self):
2884 if is_partition(self.args.data):
2885 LOG.debug('OSD data device %s is a partition',
2886 self.args.data)
2887 self.partition = DevicePartition.factory(
2888 path=None, dev=self.args.data, args=self.args)
2889 ptype = self.partition.get_ptype()
2890 ready = Ptype.get_ready_by_name('osd')
2891 if ptype not in ready:
2892 LOG.warning('incorrect partition UUID: %s, expected %s'
2893 % (ptype, str(ready)))
2894 else:
2895 LOG.debug('Creating osd partition on %s',
2896 self.args.data)
2897 self.partition = self.create_data_partition()
2898
2899 def populate_data_path_device(self, *to_prepare_list):
2900 partition = self.partition
2901
2902 if isinstance(partition, DevicePartitionCrypt):
2903 partition.map()
2904
2905 try:
2906 args = [
2907 'mkfs',
2908 '-t',
2909 self.args.fs_type,
2910 ]
2911 if self.mkfs_args is not None:
2912 args.extend(self.mkfs_args.split())
2913 if self.args.fs_type == 'xfs':
2914 args.extend(['-f']) # always force
2915 else:
2916 args.extend(MKFS_ARGS.get(self.args.fs_type, []))
2917 args.extend([
2918 '--',
2919 partition.get_dev(),
2920 ])
2921 LOG.debug('Creating %s fs on %s',
2922 self.args.fs_type, partition.get_dev())
2923 command_check_call(args, exit=True)
2924
2925 path = mount(dev=partition.get_dev(),
2926 fstype=self.args.fs_type,
2927 options=self.mount_options)
2928
2929 try:
2930 self.populate_data_path(path, *to_prepare_list)
2931 finally:
2932 path_set_context(path)
2933 unmount(path)
2934 finally:
2935 if isinstance(partition, DevicePartitionCrypt):
2936 partition.unmap()
2937
2938 if not is_partition(self.args.data):
2939 command_check_call(
2940 [
2941 'sgdisk',
2942 '--typecode=%d:%s' % (partition.get_partition_number(),
2943 partition.ptype_for_name('osd')),
2944 '--',
2945 self.args.data,
2946 ],
2947 exit=True,
2948 )
2949 update_partition(self.args.data, 'prepared')
2950 command_check_call(['udevadm', 'trigger',
2951 '--action=add',
2952 '--sysname-match',
2953 os.path.basename(partition.rawdev)])
2954
2955
2956 class PrepareFilestoreData(PrepareData):
2957
2958 def get_space_size(self):
2959 return 0 # get as much space as possible
2960
2961 def prepare_device(self, *to_prepare_list):
2962 super(PrepareFilestoreData, self).prepare_device(*to_prepare_list)
2963 for to_prepare in to_prepare_list:
2964 to_prepare.prepare()
2965 self.set_data_partition()
2966 self.populate_data_path_device(*to_prepare_list)
2967
2968 def populate_data_path(self, path, *to_prepare_list):
2969 super(PrepareFilestoreData, self).populate_data_path(path,
2970 *to_prepare_list)
2971 write_one_line(path, 'type', 'filestore')
2972
2973
2974 class PrepareBluestoreData(PrepareData):
2975
2976 def get_space_size(self):
2977 return 100 # MB
2978
2979 def prepare_device(self, *to_prepare_list):
2980 super(PrepareBluestoreData, self).prepare_device(*to_prepare_list)
2981 self.set_data_partition()
2982 for to_prepare in to_prepare_list:
2983 to_prepare.prepare()
2984 self.populate_data_path_device(*to_prepare_list)
2985
2986 def populate_data_path(self, path, *to_prepare_list):
2987 super(PrepareBluestoreData, self).populate_data_path(path,
2988 *to_prepare_list)
2989 write_one_line(path, 'type', 'bluestore')
2990
2991
2992 #
2993 # Temporary workaround: if ceph-osd --mkfs does not
2994 # complete within 5 minutes, assume it is blocked
2995 # because of http://tracker.ceph.com/issues/13522
2996 # and retry a few times.
2997 #
2998 # Remove this function calls with command_check_call
2999 # when http://tracker.ceph.com/issues/13522 is fixed
3000 #
3001 def ceph_osd_mkfs(arguments):
3002 timeout = _get_command_executable(['timeout'])
3003 mkfs_ok = False
3004 error = 'unknown error'
3005 for delay in os.environ.get('CEPH_OSD_MKFS_DELAYS',
3006 '300 300 300 300 300').split():
3007 try:
3008 _check_output(timeout + [delay] + arguments)
3009 mkfs_ok = True
3010 break
3011 except subprocess.CalledProcessError as e:
3012 error = e.output
3013 if e.returncode == 124: # timeout fired, retry
3014 LOG.debug('%s timed out : %s (retry)'
3015 % (str(arguments), error))
3016 else:
3017 break
3018 if not mkfs_ok:
3019 raise Error('%s failed : %s' % (str(arguments), error))
3020
3021
3022 def mkfs(
3023 path,
3024 cluster,
3025 osd_id,
3026 fsid,
3027 keyring,
3028 ):
3029 monmap = os.path.join(path, 'activate.monmap')
3030 command_check_call(
3031 [
3032 'ceph',
3033 '--cluster', cluster,
3034 '--name', 'client.bootstrap-osd',
3035 '--keyring', keyring,
3036 'mon', 'getmap', '-o', monmap,
3037 ],
3038 )
3039
3040 osd_type = read_one_line(path, 'type')
3041
3042 if osd_type == 'bluestore':
3043 ceph_osd_mkfs(
3044 [
3045 'ceph-osd',
3046 '--cluster', cluster,
3047 '--mkfs',
3048 '--mkkey',
3049 '-i', osd_id,
3050 '--monmap', monmap,
3051 '--osd-data', path,
3052 '--osd-uuid', fsid,
3053 '--keyring', os.path.join(path, 'keyring'),
3054 '--setuser', get_ceph_user(),
3055 '--setgroup', get_ceph_group(),
3056 ],
3057 )
3058 elif osd_type == 'filestore':
3059 ceph_osd_mkfs(
3060 [
3061 'ceph-osd',
3062 '--cluster', cluster,
3063 '--mkfs',
3064 '--mkkey',
3065 '-i', osd_id,
3066 '--monmap', monmap,
3067 '--osd-data', path,
3068 '--osd-journal', os.path.join(path, 'journal'),
3069 '--osd-uuid', fsid,
3070 '--keyring', os.path.join(path, 'keyring'),
3071 '--setuser', get_ceph_user(),
3072 '--setgroup', get_ceph_group(),
3073 ],
3074 )
3075 else:
3076 raise Error('unrecognized objectstore type %s' % osd_type)
3077
3078
3079 def auth_key(
3080 path,
3081 cluster,
3082 osd_id,
3083 keyring,
3084 ):
3085 try:
3086 # try dumpling+ cap scheme
3087 command_check_call(
3088 [
3089 'ceph',
3090 '--cluster', cluster,
3091 '--name', 'client.bootstrap-osd',
3092 '--keyring', keyring,
3093 'auth', 'add', 'osd.{osd_id}'.format(osd_id=osd_id),
3094 '-i', os.path.join(path, 'keyring'),
3095 'osd', 'allow *',
3096 'mon', 'allow profile osd',
3097 ],
3098 )
3099 except subprocess.CalledProcessError as err:
3100 if err.returncode == errno.EINVAL:
3101 # try old cap scheme
3102 command_check_call(
3103 [
3104 'ceph',
3105 '--cluster', cluster,
3106 '--name', 'client.bootstrap-osd',
3107 '--keyring', keyring,
3108 'auth', 'add', 'osd.{osd_id}'.format(osd_id=osd_id),
3109 '-i', os.path.join(path, 'keyring'),
3110 'osd', 'allow *',
3111 'mon', 'allow rwx',
3112 ],
3113 )
3114 else:
3115 raise
3116
3117
3118 def get_mount_point(cluster, osd_id):
3119 parent = STATEDIR + '/osd'
3120 return os.path.join(
3121 parent,
3122 '{cluster}-{osd_id}'.format(cluster=cluster, osd_id=osd_id),
3123 )
3124
3125
3126 def move_mount(
3127 dev,
3128 path,
3129 cluster,
3130 osd_id,
3131 fstype,
3132 mount_options,
3133 ):
3134 LOG.debug('Moving mount to final location...')
3135 osd_data = get_mount_point(cluster, osd_id)
3136 maybe_mkdir(osd_data)
3137
3138 # pick best-of-breed mount options based on fs type
3139 if mount_options is None:
3140 mount_options = MOUNT_OPTIONS.get(fstype, '')
3141
3142 # we really want to mount --move, but that is not supported when
3143 # the parent mount is shared, as it is by default on RH, Fedora,
3144 # and probably others. Also, --bind doesn't properly manipulate
3145 # /etc/mtab, which *still* isn't a symlink to /proc/mounts despite
3146 # this being 2013. Instead, mount the original device at the final
3147 # location.
3148 command_check_call(
3149 [
3150 '/bin/mount',
3151 '-o',
3152 mount_options,
3153 '--',
3154 dev,
3155 osd_data,
3156 ],
3157 )
3158 command_check_call(
3159 [
3160 '/bin/umount',
3161 '-l', # lazy, in case someone else is peeking at the
3162 # wrong moment
3163 '--',
3164 path,
3165 ],
3166 )
3167
3168
3169 #
3170 # For upgrade purposes, to make sure there are no competing units,
3171 # both --runtime unit and the default should be disabled. There can be
3172 # two units at the same time: one with --runtime and another without
3173 # it. If, for any reason (manual or ceph-disk) the two units co-exist
3174 # they will compete with each other.
3175 #
3176 def systemd_disable(
3177 path,
3178 osd_id,
3179 ):
3180 # ensure there is no duplicate ceph-osd@.service
3181 for style in ([], ['--runtime']):
3182 command_check_call(
3183 [
3184 'systemctl',
3185 'disable',
3186 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3187 ] + style,
3188 )
3189
3190
3191 def systemd_start(
3192 path,
3193 osd_id,
3194 ):
3195 systemd_disable(path, osd_id)
3196 if is_mounted(path):
3197 style = ['--runtime']
3198 else:
3199 style = []
3200 command_check_call(
3201 [
3202 'systemctl',
3203 'enable',
3204 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3205 ] + style,
3206 )
3207 command_check_call(
3208 [
3209 'systemctl',
3210 'start',
3211 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3212 ],
3213 )
3214
3215
3216 def systemd_stop(
3217 path,
3218 osd_id,
3219 ):
3220 systemd_disable(path, osd_id)
3221 command_check_call(
3222 [
3223 'systemctl',
3224 'stop',
3225 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3226 ],
3227 )
3228
3229
3230 def start_daemon(
3231 cluster,
3232 osd_id,
3233 ):
3234 LOG.debug('Starting %s osd.%s...', cluster, osd_id)
3235
3236 path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3237 cluster=cluster, osd_id=osd_id)
3238
3239 try:
3240 if os.path.exists(os.path.join(path, 'upstart')):
3241 command_check_call(
3242 [
3243 '/sbin/initctl',
3244 # use emit, not start, because start would fail if the
3245 # instance was already running
3246 'emit',
3247 # since the daemon starting doesn't guarantee much about
3248 # the service being operational anyway, don't bother
3249 # waiting for it
3250 '--no-wait',
3251 '--',
3252 'ceph-osd',
3253 'cluster={cluster}'.format(cluster=cluster),
3254 'id={osd_id}'.format(osd_id=osd_id),
3255 ],
3256 )
3257 elif os.path.exists(os.path.join(path, 'sysvinit')):
3258 if os.path.exists('/usr/sbin/service'):
3259 svc = '/usr/sbin/service'
3260 else:
3261 svc = '/sbin/service'
3262 command_check_call(
3263 [
3264 svc,
3265 'ceph',
3266 '--cluster',
3267 '{cluster}'.format(cluster=cluster),
3268 'start',
3269 'osd.{osd_id}'.format(osd_id=osd_id),
3270 ],
3271 )
3272 elif os.path.exists(os.path.join(path, 'systemd')):
3273 systemd_start(path, osd_id)
3274 elif os.path.exists(os.path.join(path, 'openrc')):
3275 base_script = '/etc/init.d/ceph-osd'
3276 osd_script = '{base}.{osd_id}'.format(
3277 base=base_script,
3278 osd_id=osd_id
3279 )
3280 if not os.path.exists(osd_script):
3281 os.symlink(base_script, osd_script)
3282 command_check_call(
3283 [
3284 osd_script,
3285 'start',
3286 ],
3287 )
3288 elif os.path.exists(os.path.join(path, 'bsdrc')):
3289 command_check_call(
3290 [
3291 '/usr/sbin/service', 'ceph', 'start',
3292 'osd.{osd_id}'.format(osd_id=osd_id),
3293 ],
3294 )
3295 else:
3296 raise Error('{cluster} osd.{osd_id} '
3297 'is not tagged with an init system'
3298 .format(
3299 cluster=cluster,
3300 osd_id=osd_id,
3301 ))
3302 except subprocess.CalledProcessError as e:
3303 raise Error('ceph osd start failed', e)
3304
3305
3306 def stop_daemon(
3307 cluster,
3308 osd_id,
3309 ):
3310 LOG.debug('Stoping %s osd.%s...', cluster, osd_id)
3311
3312 path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3313 cluster=cluster, osd_id=osd_id)
3314
3315 try:
3316 if os.path.exists(os.path.join(path, 'upstart')):
3317 command_check_call(
3318 [
3319 '/sbin/initctl',
3320 'stop',
3321 'ceph-osd',
3322 'cluster={cluster}'.format(cluster=cluster),
3323 'id={osd_id}'.format(osd_id=osd_id),
3324 ],
3325 )
3326 elif os.path.exists(os.path.join(path, 'sysvinit')):
3327 svc = which('service')
3328 command_check_call(
3329 [
3330 svc,
3331 'ceph',
3332 '--cluster',
3333 '{cluster}'.format(cluster=cluster),
3334 'stop',
3335 'osd.{osd_id}'.format(osd_id=osd_id),
3336 ],
3337 )
3338 elif os.path.exists(os.path.join(path, 'systemd')):
3339 systemd_stop(path, osd_id)
3340 elif os.path.exists(os.path.join(path, 'openrc')):
3341 command_check_call(
3342 [
3343 '/etc/init.d/ceph-osd.{osd_id}'.format(osd_id=osd_id),
3344 'stop',
3345 ],
3346 )
3347 elif os.path.exists(os.path.join(path, 'bsdrc')):
3348 command_check_call(
3349 [
3350 '/usr/local/etc/rc.d/ceph stop osd.{osd_id}'
3351 .format(osd_id=osd_id),
3352 ],
3353 )
3354 else:
3355 raise Error('{cluster} osd.{osd_id} '
3356 'is not tagged with an init system'
3357 .format(cluster=cluster, osd_id=osd_id))
3358 except subprocess.CalledProcessError as e:
3359 raise Error('ceph osd stop failed', e)
3360
3361
3362 def detect_fstype(dev):
3363 if FREEBSD:
3364 fstype = _check_output(
3365 args=[
3366 'fstyp',
3367 '-u',
3368 dev,
3369 ],
3370 )
3371 else:
3372 fstype = _check_output(
3373 args=[
3374 '/sbin/blkid',
3375 # we don't want stale cached results
3376 '-p',
3377 '-s', 'TYPE',
3378 '-o', 'value',
3379 '--',
3380 dev,
3381 ],
3382 )
3383 fstype = must_be_one_line(fstype)
3384 return fstype
3385
3386
3387 def dmcrypt_is_mapped(uuid):
3388 path = os.path.join('/dev/mapper', uuid)
3389 if os.path.exists(path):
3390 return path
3391 else:
3392 return None
3393
3394
3395 def dmcrypt_map(dev, dmcrypt_key_dir):
3396 ptype = get_partition_type(dev)
3397 if ptype in Ptype.get_ready_by_type('plain'):
3398 luks = False
3399 cryptsetup_parameters = ['--key-size', '256']
3400 elif ptype in Ptype.get_ready_by_type('luks'):
3401 luks = True
3402 cryptsetup_parameters = []
3403 else:
3404 raise Error('--dmcrypt called for dev %s with invalid ptype %s'
3405 % (dev, ptype))
3406 part_uuid = get_partition_uuid(dev)
3407 dmcrypt_key = get_dmcrypt_key(part_uuid, dmcrypt_key_dir, luks)
3408 return _dmcrypt_map(
3409 rawdev=dev,
3410 key=dmcrypt_key,
3411 _uuid=part_uuid,
3412 cryptsetup_parameters=cryptsetup_parameters,
3413 luks=luks,
3414 format_dev=False,
3415 )
3416
3417
3418 def mount_activate(
3419 dev,
3420 activate_key_template,
3421 init,
3422 dmcrypt,
3423 dmcrypt_key_dir,
3424 reactivate=False,
3425 ):
3426
3427 if dmcrypt:
3428 part_uuid = get_partition_uuid(dev)
3429 dev = dmcrypt_map(dev, dmcrypt_key_dir)
3430 try:
3431 fstype = detect_fstype(dev=dev)
3432 except (subprocess.CalledProcessError,
3433 TruncatedLineError,
3434 TooManyLinesError) as e:
3435 raise FilesystemTypeError(
3436 'device {dev}'.format(dev=dev),
3437 e,
3438 )
3439
3440 # TODO always using mount options from cluster=ceph for
3441 # now; see http://tracker.newdream.net/issues/3253
3442 mount_options = get_mount_options(cluster='ceph', fs_type=fstype)
3443
3444 path = mount(dev=dev, fstype=fstype, options=mount_options)
3445
3446 # check if the disk is deactive, change the journal owner, group
3447 # mode for correct user and group.
3448 if os.path.exists(os.path.join(path, 'deactive')):
3449 # logging to syslog will help us easy to know udev triggered failure
3450 if not reactivate:
3451 unmount(path)
3452 # we need to unmap again because dmcrypt map will create again
3453 # on bootup stage (due to deactivate)
3454 if '/dev/mapper/' in dev:
3455 part_uuid = dev.replace('/dev/mapper/', '')
3456 dmcrypt_unmap(part_uuid)
3457 LOG.info('OSD deactivated! reactivate with: --reactivate')
3458 raise Error('OSD deactivated! reactivate with: --reactivate')
3459 # flag to activate a deactive osd.
3460 deactive = True
3461 else:
3462 deactive = False
3463
3464 osd_id = None
3465 cluster = None
3466 try:
3467 (osd_id, cluster) = activate(path, activate_key_template, init)
3468
3469 # Now active successfully
3470 # If we got reactivate and deactive, remove the deactive file
3471 if deactive and reactivate:
3472 os.remove(os.path.join(path, 'deactive'))
3473 LOG.info('Remove `deactive` file.')
3474
3475 # check if the disk is already active, or if something else is already
3476 # mounted there
3477 active = False
3478 other = False
3479 src_dev = os.stat(path).st_dev
3480 try:
3481 dst_dev = os.stat((STATEDIR + '/osd/{cluster}-{osd_id}').format(
3482 cluster=cluster,
3483 osd_id=osd_id)).st_dev
3484 if src_dev == dst_dev:
3485 active = True
3486 else:
3487 parent_dev = os.stat(STATEDIR + '/osd').st_dev
3488 if dst_dev != parent_dev:
3489 other = True
3490 elif os.listdir(get_mount_point(cluster, osd_id)):
3491 LOG.info(get_mount_point(cluster, osd_id) +
3492 " is not empty, won't override")
3493 other = True
3494
3495 except OSError:
3496 pass
3497
3498 if active:
3499 LOG.info('%s osd.%s already mounted in position; unmounting ours.'
3500 % (cluster, osd_id))
3501 unmount(path)
3502 elif other:
3503 raise Error('another %s osd.%s already mounted in position '
3504 '(old/different cluster instance?); unmounting ours.'
3505 % (cluster, osd_id))
3506 else:
3507 move_mount(
3508 dev=dev,
3509 path=path,
3510 cluster=cluster,
3511 osd_id=osd_id,
3512 fstype=fstype,
3513 mount_options=mount_options,
3514 )
3515 return cluster, osd_id
3516
3517 except:
3518 LOG.error('Failed to activate')
3519 unmount(path)
3520 raise
3521 finally:
3522 # remove our temp dir
3523 if os.path.exists(path):
3524 os.rmdir(path)
3525
3526
3527 def activate_dir(
3528 path,
3529 activate_key_template,
3530 init,
3531 ):
3532
3533 if not os.path.exists(path):
3534 raise Error(
3535 'directory %s does not exist' % path
3536 )
3537
3538 (osd_id, cluster) = activate(path, activate_key_template, init)
3539
3540 if init not in (None, 'none'):
3541 canonical = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3542 cluster=cluster,
3543 osd_id=osd_id)
3544 if path != canonical:
3545 # symlink it from the proper location
3546 create = True
3547 if os.path.lexists(canonical):
3548 old = os.readlink(canonical)
3549 if old != path:
3550 LOG.debug('Removing old symlink %s -> %s', canonical, old)
3551 try:
3552 os.unlink(canonical)
3553 except:
3554 raise Error('unable to remove old symlink', canonical)
3555 else:
3556 create = False
3557 if create:
3558 LOG.debug('Creating symlink %s -> %s', canonical, path)
3559 try:
3560 os.symlink(path, canonical)
3561 except:
3562 raise Error('unable to create symlink %s -> %s'
3563 % (canonical, path))
3564
3565 return cluster, osd_id
3566
3567
3568 def find_cluster_by_uuid(_uuid):
3569 """
3570 Find a cluster name by searching /etc/ceph/*.conf for a conf file
3571 with the right uuid.
3572 """
3573 _uuid = _uuid.lower()
3574 no_fsid = []
3575 if not os.path.exists(SYSCONFDIR):
3576 return None
3577 for conf_file in os.listdir(SYSCONFDIR):
3578 if not conf_file.endswith('.conf'):
3579 continue
3580 cluster = conf_file[:-5]
3581 try:
3582 fsid = get_fsid(cluster)
3583 except Error as e:
3584 if 'getting cluster uuid from configuration failed' not in str(e):
3585 raise e
3586 no_fsid.append(cluster)
3587 else:
3588 if fsid == _uuid:
3589 return cluster
3590 # be tolerant of /etc/ceph/ceph.conf without an fsid defined.
3591 if len(no_fsid) == 1 and no_fsid[0] == 'ceph':
3592 LOG.warning('No fsid defined in ' + SYSCONFDIR +
3593 '/ceph.conf; using anyway')
3594 return 'ceph'
3595 return None
3596
3597
3598 def activate(
3599 path,
3600 activate_key_template,
3601 init,
3602 ):
3603
3604 check_osd_magic(path)
3605
3606 ceph_fsid = read_one_line(path, 'ceph_fsid')
3607 if ceph_fsid is None:
3608 raise Error('No cluster uuid assigned.')
3609 LOG.debug('Cluster uuid is %s', ceph_fsid)
3610
3611 cluster = find_cluster_by_uuid(ceph_fsid)
3612 if cluster is None:
3613 raise Error('No cluster conf found in ' + SYSCONFDIR +
3614 ' with fsid %s' % ceph_fsid)
3615 LOG.debug('Cluster name is %s', cluster)
3616
3617 fsid = read_one_line(path, 'fsid')
3618 if fsid is None:
3619 raise Error('No OSD uuid assigned.')
3620 LOG.debug('OSD uuid is %s', fsid)
3621
3622 keyring = activate_key_template.format(cluster=cluster,
3623 statedir=STATEDIR)
3624
3625 osd_id = get_osd_id(path)
3626 if osd_id is None:
3627 osd_id = allocate_osd_id(
3628 cluster=cluster,
3629 fsid=fsid,
3630 keyring=keyring,
3631 )
3632 write_one_line(path, 'whoami', osd_id)
3633 LOG.debug('OSD id is %s', osd_id)
3634
3635 if not os.path.exists(os.path.join(path, 'ready')):
3636 LOG.debug('Initializing OSD...')
3637 # re-running mkfs is safe, so just run until it completes
3638 mkfs(
3639 path=path,
3640 cluster=cluster,
3641 osd_id=osd_id,
3642 fsid=fsid,
3643 keyring=keyring,
3644 )
3645
3646 if init not in (None, 'none'):
3647 if init == 'auto':
3648 conf_val = get_conf(
3649 cluster=cluster,
3650 variable='init'
3651 )
3652 if conf_val is not None:
3653 init = conf_val
3654 else:
3655 init = init_get()
3656
3657 LOG.debug('Marking with init system %s', init)
3658 init_path = os.path.join(path, init)
3659 with open(init_path, 'w'):
3660 path_set_context(init_path)
3661
3662 # remove markers for others, just in case.
3663 for other in INIT_SYSTEMS:
3664 if other != init:
3665 try:
3666 os.unlink(os.path.join(path, other))
3667 except OSError:
3668 pass
3669
3670 if not os.path.exists(os.path.join(path, 'active')):
3671 LOG.debug('Authorizing OSD key...')
3672 auth_key(
3673 path=path,
3674 cluster=cluster,
3675 osd_id=osd_id,
3676 keyring=keyring,
3677 )
3678 write_one_line(path, 'active', 'ok')
3679 LOG.debug('%s osd.%s data dir is ready at %s', cluster, osd_id, path)
3680 return (osd_id, cluster)
3681
3682
3683 def main_activate(args):
3684 cluster = None
3685 osd_id = None
3686
3687 LOG.info('path = ' + str(args.path))
3688 if not os.path.exists(args.path):
3689 raise Error('%s does not exist' % args.path)
3690
3691 if is_suppressed(args.path):
3692 LOG.info('suppressed activate request on %s', args.path)
3693 return
3694
3695 with activate_lock:
3696 mode = os.stat(args.path).st_mode
3697 if stat.S_ISBLK(mode):
3698 if (is_partition(args.path) and
3699 (get_partition_type(args.path) ==
3700 PTYPE['mpath']['osd']['ready']) and
3701 not is_mpath(args.path)):
3702 raise Error('%s is not a multipath block device' %
3703 args.path)
3704 (cluster, osd_id) = mount_activate(
3705 dev=args.path,
3706 activate_key_template=args.activate_key_template,
3707 init=args.mark_init,
3708 dmcrypt=args.dmcrypt,
3709 dmcrypt_key_dir=args.dmcrypt_key_dir,
3710 reactivate=args.reactivate,
3711 )
3712 osd_data = get_mount_point(cluster, osd_id)
3713
3714 elif stat.S_ISDIR(mode):
3715 (cluster, osd_id) = activate_dir(
3716 path=args.path,
3717 activate_key_template=args.activate_key_template,
3718 init=args.mark_init,
3719 )
3720 osd_data = args.path
3721
3722 else:
3723 raise Error('%s is not a directory or block device' % args.path)
3724
3725 # exit with 0 if the journal device is not up, yet
3726 # journal device will do the activation
3727 osd_journal = '{path}/journal'.format(path=osd_data)
3728 if os.path.islink(osd_journal) and not os.access(osd_journal, os.F_OK):
3729 LOG.info("activate: Journal not present, not starting, yet")
3730 return
3731
3732 if (not args.no_start_daemon and args.mark_init == 'none'):
3733 command_check_call(
3734 [
3735 'ceph-osd',
3736 '--cluster={cluster}'.format(cluster=cluster),
3737 '--id={osd_id}'.format(osd_id=osd_id),
3738 '--osd-data={path}'.format(path=osd_data),
3739 '--osd-journal={journal}'.format(journal=osd_journal),
3740 ],
3741 )
3742
3743 if (not args.no_start_daemon and
3744 args.mark_init not in (None, 'none')):
3745
3746 start_daemon(
3747 cluster=cluster,
3748 osd_id=osd_id,
3749 )
3750
3751
3752 def main_activate_lockbox(args):
3753 with activate_lock:
3754 main_activate_lockbox_protected(args)
3755
3756
3757 def main_activate_lockbox_protected(args):
3758 partition = DevicePartition.factory(
3759 path=None, dev=args.path, args=args)
3760
3761 lockbox = Lockbox(args)
3762 lockbox.set_partition(partition)
3763 lockbox.activate()
3764
3765
3766 ###########################
3767
3768 def _mark_osd_out(cluster, osd_id):
3769 LOG.info('Prepare to mark osd.%d out...', osd_id)
3770 command([
3771 'ceph',
3772 'osd',
3773 'out',
3774 'osd.%d' % osd_id,
3775 ])
3776
3777
3778 def _check_osd_status(cluster, osd_id):
3779 """
3780 report the osd status:
3781 00(0) : means OSD OUT AND DOWN
3782 01(1) : means OSD OUT AND UP
3783 10(2) : means OSD IN AND DOWN
3784 11(3) : means OSD IN AND UP
3785 """
3786 LOG.info("Checking osd id: %s ..." % osd_id)
3787 found = False
3788 status_code = 0
3789 out, err, ret = command([
3790 'ceph',
3791 'osd',
3792 'dump',
3793 '--cluster={cluster}'.format(
3794 cluster=cluster,
3795 ),
3796 '--format',
3797 'json',
3798 ])
3799 out_json = json.loads(out)
3800 for item in out_json[u'osds']:
3801 if item.get(u'osd') == int(osd_id):
3802 found = True
3803 if item.get(u'in') is 1:
3804 status_code += 2
3805 if item.get(u'up') is 1:
3806 status_code += 1
3807 if not found:
3808 raise Error('Could not osd.%s in osd tree!' % osd_id)
3809 return status_code
3810
3811
3812 def _remove_osd_directory_files(mounted_path, cluster):
3813 """
3814 To remove the 'ready', 'active', INIT-specific files.
3815 """
3816 if os.path.exists(os.path.join(mounted_path, 'ready')):
3817 os.remove(os.path.join(mounted_path, 'ready'))
3818 LOG.info('Remove `ready` file.')
3819 else:
3820 LOG.info('`ready` file is already removed.')
3821
3822 if os.path.exists(os.path.join(mounted_path, 'active')):
3823 os.remove(os.path.join(mounted_path, 'active'))
3824 LOG.info('Remove `active` file.')
3825 else:
3826 LOG.info('`active` file is already removed.')
3827
3828 # Just check `upstart` and `sysvinit` directly if filename is init-spec.
3829 conf_val = get_conf(
3830 cluster=cluster,
3831 variable='init'
3832 )
3833 if conf_val is not None:
3834 init = conf_val
3835 else:
3836 init = init_get()
3837 os.remove(os.path.join(mounted_path, init))
3838 LOG.info('Remove `%s` file.', init)
3839 return
3840
3841
3842 def main_deactivate(args):
3843 with activate_lock:
3844 main_deactivate_locked(args)
3845
3846
3847 def main_deactivate_locked(args):
3848 osd_id = args.deactivate_by_id
3849 path = args.path
3850 target_dev = None
3851 dmcrypt = False
3852 devices = list_devices()
3853
3854 # list all devices and found we need
3855 for device in devices:
3856 if 'partitions' in device:
3857 for dev_part in device.get('partitions'):
3858 if (osd_id and
3859 'whoami' in dev_part and
3860 dev_part['whoami'] == osd_id):
3861 target_dev = dev_part
3862 elif (path and
3863 'path' in dev_part and
3864 dev_part['path'] == path):
3865 target_dev = dev_part
3866 if not target_dev:
3867 raise Error('Cannot find any match device!!')
3868
3869 # set up all we need variable
3870 osd_id = target_dev['whoami']
3871 part_type = target_dev['ptype']
3872 mounted_path = target_dev['mount']
3873 if Ptype.is_dmcrypt(part_type, 'osd'):
3874 dmcrypt = True
3875
3876 # Do not do anything if osd is already down.
3877 status_code = _check_osd_status(args.cluster, osd_id)
3878 if status_code == OSD_STATUS_IN_UP:
3879 if args.mark_out is True:
3880 _mark_osd_out(args.cluster, int(osd_id))
3881 stop_daemon(args.cluster, osd_id)
3882 elif status_code == OSD_STATUS_IN_DOWN:
3883 if args.mark_out is True:
3884 _mark_osd_out(args.cluster, int(osd_id))
3885 LOG.info("OSD already out/down. Do not do anything now.")
3886 return
3887 elif status_code == OSD_STATUS_OUT_UP:
3888 stop_daemon(args.cluster, osd_id)
3889 elif status_code == OSD_STATUS_OUT_DOWN:
3890 LOG.info("OSD already out/down. Do not do anything now.")
3891 return
3892
3893 if not args.once:
3894 # remove 'ready', 'active', and INIT-specific files.
3895 _remove_osd_directory_files(mounted_path, args.cluster)
3896
3897 # Write deactivate to osd directory!
3898 with open(os.path.join(mounted_path, 'deactive'), 'w'):
3899 path_set_context(os.path.join(mounted_path, 'deactive'))
3900
3901 unmount(mounted_path)
3902 LOG.info("Umount `%s` successfully.", mounted_path)
3903
3904 if dmcrypt:
3905 lockbox = os.path.join(STATEDIR, 'osd-lockbox')
3906 command(['umount', os.path.join(lockbox, target_dev['uuid'])])
3907
3908 dmcrypt_unmap(target_dev['uuid'])
3909 for name in Space.NAMES:
3910 if name + '_uuid' in target_dev:
3911 dmcrypt_unmap(target_dev[name + '_uuid'])
3912
3913 ###########################
3914
3915
3916 def _remove_from_crush_map(cluster, osd_id):
3917 LOG.info("Prepare to remove osd.%s from crush map..." % osd_id)
3918 command([
3919 'ceph',
3920 'osd',
3921 'crush',
3922 'remove',
3923 'osd.%s' % osd_id,
3924 ])
3925
3926
3927 def _delete_osd_auth_key(cluster, osd_id):
3928 LOG.info("Prepare to delete osd.%s cephx key..." % osd_id)
3929 command([
3930 'ceph',
3931 'auth',
3932 'del',
3933 'osd.%s' % osd_id,
3934 ])
3935
3936
3937 def _deallocate_osd_id(cluster, osd_id):
3938 LOG.info("Prepare to deallocate the osd-id: %s..." % osd_id)
3939 command([
3940 'ceph',
3941 'osd',
3942 'rm',
3943 '%s' % osd_id,
3944 ])
3945
3946
3947 def _remove_lockbox(uuid, cluster):
3948 command([
3949 'ceph',
3950 '--cluster', cluster,
3951 'auth',
3952 'del',
3953 'client.osd-lockbox.' + uuid,
3954 ])
3955 command([
3956 'ceph',
3957 '--cluster', cluster,
3958 'config-key',
3959 'del',
3960 'dm-crypt/osd/' + uuid + '/luks',
3961 ])
3962 lockbox = os.path.join(STATEDIR, 'osd-lockbox')
3963 if not os.path.exists(lockbox):
3964 return
3965 canonical = os.path.join(lockbox, uuid)
3966 command(['umount', canonical])
3967 for name in os.listdir(lockbox):
3968 path = os.path.join(lockbox, name)
3969 if os.path.islink(path) and os.readlink(path) == canonical:
3970 os.unlink(path)
3971
3972
3973 def destroy_lookup_device(args, predicate, description):
3974 devices = list_devices()
3975 for device in devices:
3976 for partition in device.get('partitions', []):
3977 if partition['type'] == 'lockbox':
3978 if not is_mounted(partition['path']):
3979 main_activate_lockbox_protected(
3980 argparse.Namespace(verbose=args.verbose,
3981 path=partition['path']))
3982 for device in devices:
3983 for partition in device.get('partitions', []):
3984 if partition['dmcrypt']:
3985 dmcrypt_path = dmcrypt_is_mapped(partition['uuid'])
3986 if dmcrypt_path:
3987 unmap = False
3988 else:
3989 dmcrypt_path = dmcrypt_map(partition['path'],
3990 args.dmcrypt_key_dir)
3991 unmap = True
3992 list_dev_osd(dmcrypt_path, {}, partition)
3993 if unmap:
3994 dmcrypt_unmap(partition['uuid'])
3995 dmcrypt = True
3996 else:
3997 dmcrypt = False
3998 if predicate(partition):
3999 return dmcrypt, partition
4000 raise Error('found no device matching ', description)
4001
4002
4003 def main_destroy(args):
4004 with activate_lock:
4005 main_destroy_locked(args)
4006
4007
4008 def main_destroy_locked(args):
4009 osd_id = args.destroy_by_id
4010 path = args.path
4011 target_dev = None
4012
4013 if path:
4014 if not is_partition(path):
4015 raise Error(path + " must be a partition device")
4016 path = os.path.realpath(path)
4017
4018 if path:
4019 (dmcrypt, target_dev) = destroy_lookup_device(
4020 args, lambda x: x.get('path') == path,
4021 path)
4022 elif osd_id:
4023 (dmcrypt, target_dev) = destroy_lookup_device(
4024 args, lambda x: x.get('whoami') == osd_id,
4025 'osd id ' + str(osd_id))
4026
4027 osd_id = target_dev['whoami']
4028 dev_path = target_dev['path']
4029 if target_dev['ptype'] == PTYPE['mpath']['osd']['ready']:
4030 base_dev = get_partition_base_mpath(dev_path)
4031 else:
4032 base_dev = get_partition_base(dev_path)
4033
4034 # Before osd deactivate, we cannot destroy it
4035 status_code = _check_osd_status(args.cluster, osd_id)
4036 if status_code != OSD_STATUS_OUT_DOWN and \
4037 status_code != OSD_STATUS_IN_DOWN:
4038 raise Error("Could not destroy the active osd. (osd-id: %s)" %
4039 osd_id)
4040
4041 # Remove OSD from crush map
4042 _remove_from_crush_map(args.cluster, osd_id)
4043
4044 # Remove OSD cephx key
4045 _delete_osd_auth_key(args.cluster, osd_id)
4046
4047 # Deallocate OSD ID
4048 _deallocate_osd_id(args.cluster, osd_id)
4049
4050 # we remove the crypt map and device mapper (if dmcrypt is True)
4051 if dmcrypt:
4052 for name in Space.NAMES:
4053 if target_dev.get(name + '_uuid'):
4054 dmcrypt_unmap(target_dev[name + '_uuid'])
4055 _remove_lockbox(target_dev['uuid'], args.cluster)
4056
4057 # Check zap flag. If we found zap flag, we need to find device for
4058 # destroy this osd data.
4059 if args.zap is True:
4060 # erase the osd data
4061 LOG.info("Prepare to zap the device %s" % base_dev)
4062 zap(base_dev)
4063
4064
4065 def get_space_osd_uuid(name, path):
4066 if not os.path.exists(path):
4067 raise Error('%s does not exist' % path)
4068
4069 mode = os.stat(path).st_mode
4070 if not stat.S_ISBLK(mode):
4071 raise Error('%s is not a block device' % path)
4072
4073 if (is_partition(path) and
4074 get_partition_type(path) in (PTYPE['mpath']['journal']['ready'],
4075 PTYPE['mpath']['block']['ready']) and
4076 not is_mpath(path)):
4077 raise Error('%s is not a multipath block device' %
4078 path)
4079
4080 try:
4081 out = _check_output(
4082 args=[
4083 'ceph-osd',
4084 '--get-device-fsid',
4085 path,
4086 ],
4087 close_fds=True,
4088 )
4089 except subprocess.CalledProcessError as e:
4090 raise Error(
4091 'failed to get osd uuid/fsid from %s' % name,
4092 e,
4093 )
4094 value = str(out).split('\n', 1)[0]
4095 LOG.debug('%s %s has OSD UUID %s', name.capitalize(), path, value)
4096 return value
4097
4098
4099 def main_activate_space(name, args):
4100 if not os.path.exists(args.dev):
4101 raise Error('%s does not exist' % args.dev)
4102
4103 cluster = None
4104 osd_id = None
4105 osd_uuid = None
4106 dev = None
4107 with activate_lock:
4108 if args.dmcrypt:
4109 dev = dmcrypt_map(args.dev, args.dmcrypt_key_dir)
4110 else:
4111 dev = args.dev
4112 # FIXME: For an encrypted journal dev, does this return the
4113 # cyphertext or plaintext dev uuid!? Also, if the journal is
4114 # encrypted, is the data partition also always encrypted, or
4115 # are mixed pairs supported!?
4116 osd_uuid = get_space_osd_uuid(name, dev)
4117 path = os.path.join('/dev/disk/by-partuuid/', osd_uuid.lower())
4118
4119 if is_suppressed(path):
4120 LOG.info('suppressed activate request on %s', path)
4121 return
4122
4123 # warn and exit with 0 if the data device is not up, yet
4124 # data device will do the activation
4125 if not os.access(path, os.F_OK):
4126 LOG.info("activate: OSD device not present, not starting, yet")
4127 return
4128
4129 (cluster, osd_id) = mount_activate(
4130 dev=path,
4131 activate_key_template=args.activate_key_template,
4132 init=args.mark_init,
4133 dmcrypt=args.dmcrypt,
4134 dmcrypt_key_dir=args.dmcrypt_key_dir,
4135 reactivate=args.reactivate,
4136 )
4137
4138 start_daemon(
4139 cluster=cluster,
4140 osd_id=osd_id,
4141 )
4142
4143
4144 ###########################
4145
4146
4147 def main_activate_all(args):
4148 dir = '/dev/disk/by-parttypeuuid'
4149 LOG.debug('Scanning %s', dir)
4150 if not os.path.exists(dir):
4151 return
4152 err = False
4153 for name in os.listdir(dir):
4154 if name.find('.') < 0:
4155 continue
4156 (tag, uuid) = name.split('.')
4157
4158 if tag in Ptype.get_ready_by_name('osd'):
4159
4160 if Ptype.is_dmcrypt(tag, 'osd'):
4161 path = os.path.join('/dev/mapper', uuid)
4162 else:
4163 path = os.path.join(dir, name)
4164
4165 if is_suppressed(path):
4166 LOG.info('suppressed activate request on %s', path)
4167 continue
4168
4169 LOG.info('Activating %s', path)
4170 with activate_lock:
4171 try:
4172 # never map dmcrypt cyphertext devices
4173 (cluster, osd_id) = mount_activate(
4174 dev=path,
4175 activate_key_template=args.activate_key_template,
4176 init=args.mark_init,
4177 dmcrypt=False,
4178 dmcrypt_key_dir='',
4179 )
4180 start_daemon(
4181 cluster=cluster,
4182 osd_id=osd_id,
4183 )
4184
4185 except Exception as e:
4186 print(
4187 '{prog}: {msg}'.format(prog=args.prog, msg=e),
4188 file=sys.stderr
4189 )
4190
4191 err = True
4192
4193 if err:
4194 raise Error('One or more partitions failed to activate')
4195
4196
4197 ###########################
4198
4199 def is_swap(dev):
4200 dev = os.path.realpath(dev)
4201 with open(PROCDIR + '/swaps', 'rb') as proc_swaps:
4202 for line in proc_swaps.readlines()[1:]:
4203 fields = line.split()
4204 if len(fields) < 3:
4205 continue
4206 swaps_dev = fields[0]
4207 if os.path.isabs(swaps_dev) and os.path.exists(swaps_dev):
4208 swaps_dev = os.path.realpath(swaps_dev)
4209 if swaps_dev == dev:
4210 return True
4211 return False
4212
4213
4214 def get_oneliner(base, name):
4215 path = os.path.join(base, name)
4216 if os.path.isfile(path):
4217 with open(path, 'rb') as _file:
4218 return _bytes2str(_file.readline().rstrip())
4219 return None
4220
4221
4222 def get_dev_fs(dev):
4223 if FREEBSD:
4224 fstype, _, ret = command(
4225 [
4226 'fstyp',
4227 '-u',
4228 dev,
4229 ],
4230 )
4231 if ret == 0:
4232 return fstype
4233 else:
4234 fscheck, _, _ = command(
4235 [
4236 'blkid',
4237 '-s',
4238 'TYPE',
4239 dev,
4240 ],
4241 )
4242 if 'TYPE' in fscheck:
4243 fstype = fscheck.split()[1].split('"')[1]
4244 return fstype
4245 return None
4246
4247
4248 def split_dev_base_partnum(dev):
4249 if is_mpath(dev):
4250 partnum = partnum_mpath(dev)
4251 base = get_partition_base_mpath(dev)
4252 else:
4253 b = block_path(dev)
4254 partnum = open(os.path.join(b, 'partition')).read().strip()
4255 base = get_partition_base(dev)
4256 return base, partnum
4257
4258
4259 def get_partition_type(part):
4260 return get_blkid_partition_info(part, 'ID_PART_ENTRY_TYPE')
4261
4262
4263 def get_partition_uuid(part):
4264 return get_blkid_partition_info(part, 'ID_PART_ENTRY_UUID')
4265
4266
4267 def get_blkid_partition_info(dev, what=None):
4268 out, _, _ = command(
4269 [
4270 'blkid',
4271 '-o',
4272 'udev',
4273 '-p',
4274 dev,
4275 ]
4276 )
4277 p = {}
4278 for line in out.splitlines():
4279 (key, value) = line.split('=')
4280 p[key] = value
4281 if what:
4282 return p.get(what)
4283 else:
4284 return p
4285
4286
4287 def more_osd_info(path, uuid_map, desc):
4288 desc['ceph_fsid'] = get_oneliner(path, 'ceph_fsid')
4289 if desc['ceph_fsid']:
4290 desc['cluster'] = find_cluster_by_uuid(desc['ceph_fsid'])
4291 desc['whoami'] = get_oneliner(path, 'whoami')
4292 for name in Space.NAMES:
4293 uuid = get_oneliner(path, name + '_uuid')
4294 if uuid:
4295 desc[name + '_uuid'] = uuid.lower()
4296 if desc[name + '_uuid'] in uuid_map:
4297 desc[name + '_dev'] = uuid_map[desc[name + '_uuid']]
4298
4299
4300 def list_dev_osd(dev, uuid_map, desc):
4301 desc['mount'] = is_mounted(dev)
4302 desc['fs_type'] = get_dev_fs(dev)
4303 desc['state'] = 'unprepared'
4304 if desc['mount']:
4305 desc['state'] = 'active'
4306 more_osd_info(desc['mount'], uuid_map, desc)
4307 elif desc['fs_type']:
4308 try:
4309 tpath = mount(dev=dev, fstype=desc['fs_type'], options='')
4310 if tpath:
4311 try:
4312 magic = get_oneliner(tpath, 'magic')
4313 if magic is not None:
4314 desc['magic'] = magic
4315 desc['state'] = 'prepared'
4316 more_osd_info(tpath, uuid_map, desc)
4317 finally:
4318 unmount(tpath)
4319 except MountError:
4320 pass
4321
4322
4323 def list_dev_lockbox(dev, uuid_map, desc):
4324 desc['mount'] = is_mounted(dev)
4325 desc['fs_type'] = get_dev_fs(dev)
4326 desc['state'] = 'unprepared'
4327 if desc['mount']:
4328 desc['state'] = 'active'
4329 desc['osd_uuid'] = get_oneliner(desc['mount'], 'osd-uuid')
4330 elif desc['fs_type']:
4331 try:
4332 tpath = tempfile.mkdtemp(prefix='mnt.', dir=STATEDIR + '/tmp')
4333 args = ['mount', '-t', 'ext4', dev, tpath]
4334 LOG.debug('Mounting lockbox ' + str(" ".join(args)))
4335 command_check_call(args)
4336 magic = get_oneliner(tpath, 'magic')
4337 if magic is not None:
4338 desc['magic'] = magic
4339 desc['state'] = 'prepared'
4340 desc['osd_uuid'] = get_oneliner(tpath, 'osd-uuid')
4341 unmount(tpath)
4342 except subprocess.CalledProcessError:
4343 pass
4344 if desc.get('osd_uuid') in uuid_map:
4345 desc['lockbox_for'] = uuid_map[desc['osd_uuid']]
4346
4347
4348 def list_format_lockbox_plain(dev):
4349 desc = []
4350 if dev.get('lockbox_for'):
4351 desc.append('for ' + dev['lockbox_for'])
4352 elif dev.get('osd_uuid'):
4353 desc.append('for osd ' + dev['osd_uuid'])
4354 return desc
4355
4356
4357 def list_format_more_osd_info_plain(dev):
4358 desc = []
4359 if dev.get('ceph_fsid'):
4360 if dev.get('cluster'):
4361 desc.append('cluster ' + dev['cluster'])
4362 else:
4363 desc.append('unknown cluster ' + dev['ceph_fsid'])
4364 if dev.get('whoami'):
4365 desc.append('osd.%s' % dev['whoami'])
4366 for name in Space.NAMES:
4367 if dev.get(name + '_dev'):
4368 desc.append(name + ' %s' % dev[name + '_dev'])
4369 return desc
4370
4371
4372 def list_format_dev_plain(dev, prefix=''):
4373 desc = []
4374 if dev['ptype'] == PTYPE['regular']['osd']['ready']:
4375 desc = (['ceph data', dev['state']] +
4376 list_format_more_osd_info_plain(dev))
4377 elif dev['ptype'] in (PTYPE['regular']['lockbox']['ready'],
4378 PTYPE['mpath']['lockbox']['ready']):
4379 desc = (['ceph lockbox', dev['state']] +
4380 list_format_lockbox_plain(dev))
4381 elif Ptype.is_dmcrypt(dev['ptype'], 'osd'):
4382 dmcrypt = dev['dmcrypt']
4383 if not dmcrypt['holders']:
4384 desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
4385 'not currently mapped']
4386 elif len(dmcrypt['holders']) == 1:
4387 holder = get_dev_path(dmcrypt['holders'][0])
4388 desc = ['ceph data (dmcrypt %s %s)' %
4389 (dmcrypt['type'], holder)]
4390 desc += list_format_more_osd_info_plain(dev)
4391 else:
4392 desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
4393 'holders: ' + ','.join(dmcrypt['holders'])]
4394 elif Ptype.is_regular_space(dev['ptype']):
4395 name = Ptype.space_ptype_to_name(dev['ptype'])
4396 desc.append('ceph ' + name)
4397 if dev.get(name + '_for'):
4398 desc.append('for %s' % dev[name + '_for'])
4399 elif Ptype.is_dmcrypt_space(dev['ptype']):
4400 name = Ptype.space_ptype_to_name(dev['ptype'])
4401 dmcrypt = dev['dmcrypt']
4402 if dmcrypt['holders'] and len(dmcrypt['holders']) == 1:
4403 holder = get_dev_path(dmcrypt['holders'][0])
4404 desc = ['ceph ' + name + ' (dmcrypt %s %s)' %
4405 (dmcrypt['type'], holder)]
4406 else:
4407 desc = ['ceph ' + name + ' (dmcrypt %s)' % dmcrypt['type']]
4408 if dev.get(name + '_for'):
4409 desc.append('for %s' % dev[name + '_for'])
4410 else:
4411 desc.append(dev['type'])
4412 if dev.get('fs_type'):
4413 desc.append(dev['fs_type'])
4414 elif dev.get('ptype'):
4415 desc.append(dev['ptype'])
4416 if dev.get('mount'):
4417 desc.append('mounted on %s' % dev['mount'])
4418 return '%s%s %s' % (prefix, dev['path'], ', '.join(desc))
4419
4420
4421 def list_format_plain(devices):
4422 lines = []
4423 for device in devices:
4424 if device.get('partitions'):
4425 lines.append('%s :' % device['path'])
4426 for p in sorted(device['partitions'], key=lambda x: x['path']):
4427 lines.append(list_format_dev_plain(dev=p,
4428 prefix=' '))
4429 else:
4430 lines.append(list_format_dev_plain(dev=device,
4431 prefix=''))
4432 return "\n".join(lines)
4433
4434
4435 def list_dev(dev, uuid_map, space_map):
4436 info = {
4437 'path': dev,
4438 'dmcrypt': {},
4439 }
4440
4441 info['is_partition'] = is_partition(dev)
4442 if info['is_partition']:
4443 ptype = get_partition_type(dev)
4444 info['uuid'] = get_partition_uuid(dev)
4445 else:
4446 ptype = 'unknown'
4447 info['ptype'] = ptype
4448 LOG.info("list_dev(dev = " + dev + ", ptype = " + str(ptype) + ")")
4449 if ptype in (PTYPE['regular']['osd']['ready'],
4450 PTYPE['mpath']['osd']['ready']):
4451 info['type'] = 'data'
4452 if ptype == PTYPE['mpath']['osd']['ready']:
4453 info['multipath'] = True
4454 list_dev_osd(dev, uuid_map, info)
4455 elif ptype in (PTYPE['regular']['lockbox']['ready'],
4456 PTYPE['mpath']['lockbox']['ready']):
4457 info['type'] = 'lockbox'
4458 if ptype == PTYPE['mpath']['osd']['ready']:
4459 info['multipath'] = True
4460 list_dev_lockbox(dev, uuid_map, info)
4461 elif ptype == PTYPE['plain']['osd']['ready']:
4462 holders = is_held(dev)
4463 info['type'] = 'data'
4464 info['dmcrypt']['holders'] = holders
4465 info['dmcrypt']['type'] = 'plain'
4466 if len(holders) == 1:
4467 list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
4468 elif ptype == PTYPE['luks']['osd']['ready']:
4469 holders = is_held(dev)
4470 info['type'] = 'data'
4471 info['dmcrypt']['holders'] = holders
4472 info['dmcrypt']['type'] = 'LUKS'
4473 if len(holders) == 1:
4474 list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
4475 elif Ptype.is_regular_space(ptype) or Ptype.is_mpath_space(ptype):
4476 name = Ptype.space_ptype_to_name(ptype)
4477 info['type'] = name
4478 if ptype == PTYPE['mpath'][name]['ready']:
4479 info['multipath'] = True
4480 if info.get('uuid') in space_map:
4481 info[name + '_for'] = space_map[info['uuid']]
4482 elif Ptype.is_plain_space(ptype):
4483 name = Ptype.space_ptype_to_name(ptype)
4484 holders = is_held(dev)
4485 info['type'] = name
4486 info['dmcrypt']['type'] = 'plain'
4487 info['dmcrypt']['holders'] = holders
4488 if info.get('uuid') in space_map:
4489 info[name + '_for'] = space_map[info['uuid']]
4490 elif Ptype.is_luks_space(ptype):
4491 name = Ptype.space_ptype_to_name(ptype)
4492 holders = is_held(dev)
4493 info['type'] = name
4494 info['dmcrypt']['type'] = 'LUKS'
4495 info['dmcrypt']['holders'] = holders
4496 if info.get('uuid') in space_map:
4497 info[name + '_for'] = space_map[info['uuid']]
4498 else:
4499 path = is_mounted(dev)
4500 fs_type = get_dev_fs(dev)
4501 if is_swap(dev):
4502 info['type'] = 'swap'
4503 else:
4504 info['type'] = 'other'
4505 if fs_type:
4506 info['fs_type'] = fs_type
4507 if path:
4508 info['mount'] = path
4509
4510 return info
4511
4512
4513 def list_devices():
4514 partmap = list_all_partitions()
4515
4516 uuid_map = {}
4517 space_map = {}
4518 for base, parts in sorted(partmap.items()):
4519 for p in parts:
4520 dev = get_dev_path(p)
4521 part_uuid = get_partition_uuid(dev)
4522 if part_uuid:
4523 uuid_map[part_uuid] = dev
4524 ptype = get_partition_type(dev)
4525 LOG.debug("main_list: " + dev +
4526 " ptype = " + str(ptype) +
4527 " uuid = " + str(part_uuid))
4528 if ptype in Ptype.get_ready_by_name('osd'):
4529 if Ptype.is_dmcrypt(ptype, 'osd'):
4530 holders = is_held(dev)
4531 if len(holders) != 1:
4532 continue
4533 dev_to_mount = get_dev_path(holders[0])
4534 else:
4535 dev_to_mount = dev
4536
4537 fs_type = get_dev_fs(dev_to_mount)
4538 if fs_type is not None:
4539 mount_options = get_mount_options(cluster='ceph',
4540 fs_type=fs_type)
4541 try:
4542 tpath = mount(dev=dev_to_mount,
4543 fstype=fs_type, options=mount_options)
4544 try:
4545 for name in Space.NAMES:
4546 space_uuid = get_oneliner(tpath,
4547 name + '_uuid')
4548 if space_uuid:
4549 space_map[space_uuid.lower()] = dev
4550 finally:
4551 unmount(tpath)
4552 except MountError:
4553 pass
4554
4555 LOG.debug("main_list: " + str(partmap) + ", uuid_map = " +
4556 str(uuid_map) + ", space_map = " + str(space_map))
4557
4558 devices = []
4559 for base, parts in sorted(partmap.items()):
4560 if parts:
4561 disk = {'path': get_dev_path(base)}
4562 partitions = []
4563 for p in sorted(parts):
4564 partitions.append(list_dev(get_dev_path(p),
4565 uuid_map,
4566 space_map))
4567 disk['partitions'] = partitions
4568 devices.append(disk)
4569 else:
4570 device = list_dev(get_dev_path(base), uuid_map, space_map)
4571 device['path'] = get_dev_path(base)
4572 devices.append(device)
4573 LOG.debug("list_devices: " + str(devices))
4574 return devices
4575
4576
4577 def list_zfs():
4578 try:
4579 out, err, ret = command(
4580 [
4581 'zfs',
4582 'list',
4583 '-o', 'name,mountpoint'
4584 ]
4585 )
4586 except subprocess.CalledProcessError as e:
4587 LOG.info('zfs list -o name,mountpoint '
4588 'fails.\n (Error: %s)' % e)
4589 raise
4590 lines = out.splitlines()
4591 for line in lines[1:]:
4592 vdevline = line.split()
4593 if os.path.exists(os.path.join(vdevline[1], 'active')):
4594 elems = os.path.split(vdevline[1])
4595 print(vdevline[0], "ceph data, active, cluster ceph,", elems[1],
4596 "mounted on:", vdevline[1])
4597 else:
4598 print(vdevline[0] + " other, zfs, mounted on: " + vdevline[1])
4599
4600
4601 def main_list(args):
4602 with activate_lock:
4603 if FREEBSD:
4604 main_list_freebsd(args)
4605 else:
4606 main_list_protected(args)
4607
4608
4609 def main_list_protected(args):
4610 devices = list_devices()
4611 if args.path:
4612 paths = []
4613 for path in args.path:
4614 if os.path.exists(path):
4615 paths.append(os.path.realpath(path))
4616 else:
4617 paths.append(path)
4618 selected_devices = []
4619 for device in devices:
4620 for path in paths:
4621 if re.search(path + '$', device['path']):
4622 selected_devices.append(device)
4623 else:
4624 selected_devices = devices
4625 if args.format == 'json':
4626 print(json.dumps(selected_devices))
4627 else:
4628 output = list_format_plain(selected_devices)
4629 if output:
4630 print(output)
4631
4632
4633 def main_list_freebsd(args):
4634 # Currently accomodate only ZFS Filestore partitions
4635 # return a list of VDEVs and mountpoints
4636 # > zfs list
4637 # NAME USED AVAIL REFER MOUNTPOINT
4638 # osd0 1.01G 1.32T 1.01G /var/lib/ceph/osd/osd.0
4639 # osd1 1.01G 1.32T 1.01G /var/lib/ceph/osd/osd.1
4640 list_zfs()
4641
4642
4643 ###########################
4644 #
4645 # Mark devices that we want to suppress activates on with a
4646 # file like
4647 #
4648 # /var/lib/ceph/tmp/suppress-activate.sdb
4649 #
4650 # where the last bit is the sanitized device name (/dev/X without the
4651 # /dev/ prefix) and the is_suppress() check matches a prefix. That
4652 # means suppressing sdb will stop activate on sdb1, sdb2, etc.
4653 #
4654
4655 def is_suppressed(path):
4656 disk = os.path.realpath(path)
4657 try:
4658 if (not disk.startswith('/dev/') or
4659 not stat.S_ISBLK(os.lstat(disk).st_mode)):
4660 return False
4661 base = get_dev_name(disk)
4662 while len(base):
4663 if os.path.exists(SUPPRESS_PREFIX + base): # noqa
4664 return True
4665 base = base[:-1]
4666 except:
4667 return False
4668
4669
4670 def set_suppress(path):
4671 disk = os.path.realpath(path)
4672 if not os.path.exists(disk):
4673 raise Error('does not exist', path)
4674 if not stat.S_ISBLK(os.lstat(path).st_mode):
4675 raise Error('not a block device', path)
4676 base = get_dev_name(disk)
4677
4678 with open(SUPPRESS_PREFIX + base, 'w') as f: # noqa
4679 pass
4680 LOG.info('set suppress flag on %s', base)
4681
4682
4683 def unset_suppress(path):
4684 disk = os.path.realpath(path)
4685 if not os.path.exists(disk):
4686 raise Error('does not exist', path)
4687 if not stat.S_ISBLK(os.lstat(path).st_mode):
4688 raise Error('not a block device', path)
4689 assert disk.startswith('/dev/')
4690 base = get_dev_name(disk)
4691
4692 fn = SUPPRESS_PREFIX + base # noqa
4693 if not os.path.exists(fn):
4694 raise Error('not marked as suppressed', path)
4695
4696 try:
4697 os.unlink(fn)
4698 LOG.info('unset suppress flag on %s', base)
4699 except OSError as e:
4700 raise Error('failed to unsuppress', e)
4701
4702
4703 def main_suppress(args):
4704 set_suppress(args.path)
4705
4706
4707 def main_unsuppress(args):
4708 unset_suppress(args.path)
4709
4710
4711 def main_zap(args):
4712 for dev in args.dev:
4713 zap(dev)
4714
4715
4716 def main_trigger(args):
4717 LOG.debug("main_trigger: " + str(args))
4718 if is_systemd() and not args.sync:
4719 # http://www.freedesktop.org/software/systemd/man/systemd-escape.html
4720 escaped_dev = args.dev[1:].replace('-', '\\x2d')
4721 service = 'ceph-disk@{dev}.service'.format(dev=escaped_dev)
4722 LOG.info('systemd detected, triggering %s' % service)
4723 command(
4724 [
4725 'systemctl',
4726 '--no-block',
4727 'restart',
4728 service,
4729 ]
4730 )
4731 return
4732 if is_upstart() and not args.sync:
4733 LOG.info('upstart detected, triggering ceph-disk task')
4734 command(
4735 [
4736 'initctl',
4737 'emit',
4738 'ceph-disk',
4739 'dev={dev}'.format(dev=args.dev),
4740 'pid={pid}'.format(pid=os.getpid()),
4741 ]
4742 )
4743 return
4744
4745 if get_ceph_user() == 'ceph':
4746 command_check_call(['chown', 'ceph:ceph', args.dev])
4747 parttype = get_partition_type(args.dev)
4748 partid = get_partition_uuid(args.dev)
4749
4750 LOG.info('trigger {dev} parttype {parttype} uuid {partid}'.format(
4751 dev=args.dev,
4752 parttype=parttype,
4753 partid=partid,
4754 ))
4755
4756 ceph_disk = ['ceph-disk']
4757 if args.verbose:
4758 ceph_disk.append('--verbose')
4759
4760 if parttype in (PTYPE['regular']['osd']['ready'],
4761 PTYPE['mpath']['osd']['ready']):
4762 out, err, ret = command(
4763 ceph_disk +
4764 [
4765 'activate',
4766 args.dev,
4767 ]
4768 )
4769
4770 elif parttype in (PTYPE['plain']['osd']['ready'],
4771 PTYPE['luks']['osd']['ready']):
4772 out, err, ret = command(
4773 ceph_disk +
4774 [
4775 'activate',
4776 '--dmcrypt',
4777 args.dev,
4778 ]
4779 )
4780
4781 elif parttype in (PTYPE['regular']['journal']['ready'],
4782 PTYPE['mpath']['journal']['ready']):
4783 out, err, ret = command(
4784 ceph_disk +
4785 [
4786 'activate-journal',
4787 args.dev,
4788 ]
4789 )
4790
4791 elif parttype in (PTYPE['plain']['journal']['ready'],
4792 PTYPE['luks']['journal']['ready']):
4793 out, err, ret = command(
4794 ceph_disk +
4795 [
4796 'activate-journal',
4797 '--dmcrypt',
4798 args.dev,
4799 ]
4800 )
4801
4802 elif parttype in (PTYPE['regular']['block']['ready'],
4803 PTYPE['regular']['block.db']['ready'],
4804 PTYPE['regular']['block.wal']['ready'],
4805 PTYPE['mpath']['block']['ready'],
4806 PTYPE['mpath']['block.db']['ready'],
4807 PTYPE['mpath']['block.wal']['ready']):
4808 out, err, ret = command(
4809 ceph_disk +
4810 [
4811 'activate-block',
4812 args.dev,
4813 ]
4814 )
4815
4816 elif parttype in (PTYPE['plain']['block']['ready'],
4817 PTYPE['plain']['block.db']['ready'],
4818 PTYPE['plain']['block.wal']['ready'],
4819 PTYPE['luks']['block']['ready'],
4820 PTYPE['luks']['block.db']['ready'],
4821 PTYPE['luks']['block.wal']['ready']):
4822 out, err, ret = command(
4823 ceph_disk +
4824 [
4825 'activate-block',
4826 '--dmcrypt',
4827 args.dev,
4828 ]
4829 )
4830
4831 elif parttype in (PTYPE['regular']['lockbox']['ready'],
4832 PTYPE['mpath']['lockbox']['ready']):
4833 out, err, ret = command(
4834 ceph_disk +
4835 [
4836 'activate-lockbox',
4837 args.dev,
4838 ]
4839 )
4840
4841 else:
4842 raise Error('unrecognized partition type %s' % parttype)
4843
4844 if ret != 0:
4845 LOG.info(out)
4846 LOG.error(err)
4847 raise Error('return code ' + str(ret))
4848 else:
4849 LOG.debug(out)
4850 LOG.debug(err)
4851
4852
4853 def main_fix(args):
4854 # A hash table containing 'path': ('uid', 'gid', blocking, recursive)
4855 fix_table = [
4856 ('/usr/bin/ceph-mon', 'root', 'root', True, False),
4857 ('/usr/bin/ceph-mds', 'root', 'root', True, False),
4858 ('/usr/bin/ceph-osd', 'root', 'root', True, False),
4859 ('/usr/bin/radosgw', 'root', 'root', True, False),
4860 ('/etc/ceph', 'root', 'root', True, True),
4861 ('/var/run/ceph', 'ceph', 'ceph', True, True),
4862 ('/var/log/ceph', 'ceph', 'ceph', True, True),
4863 ('/var/log/radosgw', 'ceph', 'ceph', True, True),
4864 ('/var/lib/ceph', 'ceph', 'ceph', True, False),
4865 ]
4866
4867 # Relabel/chown all files under /var/lib/ceph/ recursively (except for osd)
4868 for directory in glob.glob('/var/lib/ceph/*'):
4869 if directory == '/var/lib/ceph/osd':
4870 fix_table.append((directory, 'ceph', 'ceph', True, False))
4871 else:
4872 fix_table.append((directory, 'ceph', 'ceph', True, True))
4873
4874 # Relabel/chown the osds recursively and in parallel
4875 for directory in glob.glob('/var/lib/ceph/osd/*'):
4876 fix_table.append((directory, 'ceph', 'ceph', False, True))
4877
4878 LOG.debug("fix_table: " + str(fix_table))
4879
4880 # The lists of background processes
4881 all_processes = []
4882 permissions_processes = []
4883 selinux_processes = []
4884
4885 # Preliminary checks
4886 if args.selinux or args.all:
4887 out, err, ret = command(['selinuxenabled'])
4888 if ret:
4889 LOG.error('SELinux is not enabled, please enable it, first.')
4890 raise Error('no SELinux')
4891
4892 for daemon in ['ceph-mon', 'ceph-osd', 'ceph-mds', 'radosgw', 'ceph-mgr']:
4893 out, err, ret = command(['pgrep', daemon])
4894 if ret == 0:
4895 LOG.error(daemon + ' is running, please stop it, first')
4896 raise Error(daemon + ' running')
4897
4898 # Relabel the basic system data without the ceph files
4899 if args.system or args.all:
4900 c = ['restorecon', '-R', '/']
4901 for directory, _, _, _, _ in fix_table:
4902 # Skip /var/lib/ceph subdirectories
4903 if directory.startswith('/var/lib/ceph/'):
4904 continue
4905 c.append('-e')
4906 c.append(directory)
4907
4908 out, err, ret = command(c)
4909
4910 if ret:
4911 LOG.error("Failed to restore labels of the underlying system")
4912 LOG.error(err)
4913 raise Error("basic restore failed")
4914
4915 # Use find to relabel + chown ~simultaenously
4916 if args.all:
4917 for directory, uid, gid, blocking, recursive in fix_table:
4918 # Skip directories/files that are not installed
4919 if not os.access(directory, os.F_OK):
4920 continue
4921
4922 c = [
4923 'find',
4924 directory,
4925 '-exec',
4926 'chown',
4927 ':'.join((uid, gid)),
4928 '{}',
4929 '+',
4930 '-exec',
4931 'restorecon',
4932 '{}',
4933 '+',
4934 ]
4935
4936 # Just pass -maxdepth 0 for non-recursive calls
4937 if not recursive:
4938 c += ['-maxdepth', '0']
4939
4940 if blocking:
4941 out, err, ret = command(c)
4942
4943 if ret:
4944 LOG.error("Failed to fix " + directory)
4945 LOG.error(err)
4946 raise Error(directory + " fix failed")
4947 else:
4948 all_processes.append(command_init(c))
4949
4950 LOG.debug("all_processes: " + str(all_processes))
4951 for process in all_processes:
4952 out, err, ret = command_wait(process)
4953 if ret:
4954 LOG.error("A background find process failed")
4955 LOG.error(err)
4956 raise Error("background failed")
4957
4958 # Fix permissions
4959 if args.permissions:
4960 for directory, uid, gid, blocking, recursive in fix_table:
4961 # Skip directories/files that are not installed
4962 if not os.access(directory, os.F_OK):
4963 continue
4964
4965 if recursive:
4966 c = [
4967 'chown',
4968 '-R',
4969 ':'.join((uid, gid)),
4970 directory
4971 ]
4972 else:
4973 c = [
4974 'chown',
4975 ':'.join((uid, gid)),
4976 directory
4977 ]
4978
4979 if blocking:
4980 out, err, ret = command(c)
4981
4982 if ret:
4983 LOG.error("Failed to chown " + directory)
4984 LOG.error(err)
4985 raise Error(directory + " chown failed")
4986 else:
4987 permissions_processes.append(command_init(c))
4988
4989 LOG.debug("permissions_processes: " + str(permissions_processes))
4990 for process in permissions_processes:
4991 out, err, ret = command_wait(process)
4992 if ret:
4993 LOG.error("A background permissions process failed")
4994 LOG.error(err)
4995 raise Error("background failed")
4996
4997 # Fix SELinux labels
4998 if args.selinux:
4999 for directory, uid, gid, blocking, recursive in fix_table:
5000 # Skip directories/files that are not installed
5001 if not os.access(directory, os.F_OK):
5002 continue
5003
5004 if recursive:
5005 c = [
5006 'restorecon',
5007 '-R',
5008 directory
5009 ]
5010 else:
5011 c = [
5012 'restorecon',
5013 directory
5014 ]
5015
5016 if blocking:
5017 out, err, ret = command(c)
5018
5019 if ret:
5020 LOG.error("Failed to restore labels for " + directory)
5021 LOG.error(err)
5022 raise Error(directory + " relabel failed")
5023 else:
5024 selinux_processes.append(command_init(c))
5025
5026 LOG.debug("selinux_processes: " + str(selinux_processes))
5027 for process in selinux_processes:
5028 out, err, ret = command_wait(process)
5029 if ret:
5030 LOG.error("A background selinux process failed")
5031 LOG.error(err)
5032 raise Error("background failed")
5033
5034 LOG.info(
5035 "The ceph files has been fixed, please reboot "
5036 "the system for the changes to take effect."
5037 )
5038
5039
5040 def setup_statedir(dir):
5041 # XXX The following use of globals makes linting
5042 # really hard. Global state in Python is iffy and
5043 # should be avoided.
5044 global STATEDIR
5045 STATEDIR = dir
5046
5047 if not os.path.exists(STATEDIR):
5048 os.mkdir(STATEDIR)
5049 if not os.path.exists(STATEDIR + "/tmp"):
5050 os.mkdir(STATEDIR + "/tmp")
5051
5052 global prepare_lock
5053 prepare_lock = FileLock(STATEDIR + '/tmp/ceph-disk.prepare.lock')
5054
5055 global activate_lock
5056 activate_lock = FileLock(STATEDIR + '/tmp/ceph-disk.activate.lock')
5057
5058 global SUPPRESS_PREFIX
5059 SUPPRESS_PREFIX = STATEDIR + '/tmp/suppress-activate.'
5060
5061
5062 def setup_sysconfdir(dir):
5063 global SYSCONFDIR
5064 SYSCONFDIR = dir
5065
5066
5067 def parse_args(argv):
5068 parser = argparse.ArgumentParser(
5069 'ceph-disk',
5070 )
5071 parser.add_argument(
5072 '-v', '--verbose',
5073 action='store_true', default=None,
5074 help='be more verbose',
5075 )
5076 parser.add_argument(
5077 '--log-stdout',
5078 action='store_true', default=None,
5079 help='log to stdout',
5080 )
5081 parser.add_argument(
5082 '--prepend-to-path',
5083 metavar='PATH',
5084 default='/usr/bin',
5085 help=('prepend PATH to $PATH for backward compatibility '
5086 '(default /usr/bin)'),
5087 )
5088 parser.add_argument(
5089 '--statedir',
5090 metavar='PATH',
5091 default='/var/lib/ceph',
5092 help=('directory in which ceph state is preserved '
5093 '(default /var/lib/ceph)'),
5094 )
5095 parser.add_argument(
5096 '--sysconfdir',
5097 metavar='PATH',
5098 default='/etc/ceph',
5099 help=('directory in which ceph configuration files are found '
5100 '(default /etc/ceph)'),
5101 )
5102 parser.add_argument(
5103 '--setuser',
5104 metavar='USER',
5105 default=None,
5106 help='use the given user for subprocesses, rather than ceph or root'
5107 )
5108 parser.add_argument(
5109 '--setgroup',
5110 metavar='GROUP',
5111 default=None,
5112 help='use the given group for subprocesses, rather than ceph or root'
5113 )
5114 parser.set_defaults(
5115 # we want to hold on to this, for later
5116 prog=parser.prog,
5117 )
5118
5119 subparsers = parser.add_subparsers(
5120 title='subcommands',
5121 description='valid subcommands',
5122 help='sub-command help',
5123 )
5124
5125 Prepare.set_subparser(subparsers)
5126 make_activate_parser(subparsers)
5127 make_activate_lockbox_parser(subparsers)
5128 make_activate_block_parser(subparsers)
5129 make_activate_journal_parser(subparsers)
5130 make_activate_all_parser(subparsers)
5131 make_list_parser(subparsers)
5132 make_suppress_parser(subparsers)
5133 make_deactivate_parser(subparsers)
5134 make_destroy_parser(subparsers)
5135 make_zap_parser(subparsers)
5136 make_trigger_parser(subparsers)
5137 make_fix_parser(subparsers)
5138
5139 args = parser.parse_args(argv)
5140 return args
5141
5142
5143 def make_fix_parser(subparsers):
5144 fix_parser = subparsers.add_parser(
5145 'fix',
5146 formatter_class=argparse.RawDescriptionHelpFormatter,
5147 description=textwrap.fill(textwrap.dedent("""\
5148 """)),
5149 help='fix SELinux labels and/or file permissions')
5150
5151 fix_parser.add_argument(
5152 '--system',
5153 action='store_true',
5154 default=False,
5155 help='fix SELinux labels for the non-ceph system data'
5156 )
5157 fix_parser.add_argument(
5158 '--selinux',
5159 action='store_true',
5160 default=False,
5161 help='fix SELinux labels for ceph data'
5162 )
5163 fix_parser.add_argument(
5164 '--permissions',
5165 action='store_true',
5166 default=False,
5167 help='fix file permissions for ceph data'
5168 )
5169 fix_parser.add_argument(
5170 '--all',
5171 action='store_true',
5172 default=False,
5173 help='perform all the fix-related operations'
5174 )
5175 fix_parser.set_defaults(
5176 func=main_fix,
5177 )
5178 return fix_parser
5179
5180
5181 def make_trigger_parser(subparsers):
5182 trigger_parser = subparsers.add_parser(
5183 'trigger',
5184 formatter_class=argparse.RawDescriptionHelpFormatter,
5185 description=textwrap.fill(textwrap.dedent("""\
5186 The partition given in argument is activated. The type of the
5187 partition (data, lockbox, journal etc.) is detected by its
5188 type. If the init system is upstart or systemd, the activation is
5189 delegated to it and runs asynchronously, which
5190 helps reduce the execution time of udev actions.
5191 """)),
5192 help='activate any device (called by udev)')
5193 trigger_parser.add_argument(
5194 'dev',
5195 help=('device'),
5196 )
5197 trigger_parser.add_argument(
5198 '--cluster',
5199 metavar='NAME',
5200 default='ceph',
5201 help='cluster name to assign this disk to',
5202 )
5203 trigger_parser.add_argument(
5204 '--dmcrypt',
5205 action='store_true', default=None,
5206 help='map devices with dm-crypt',
5207 )
5208 trigger_parser.add_argument(
5209 '--dmcrypt-key-dir',
5210 metavar='KEYDIR',
5211 default='/etc/ceph/dmcrypt-keys',
5212 help='directory where dm-crypt keys are stored',
5213 )
5214 trigger_parser.add_argument(
5215 '--sync',
5216 action='store_true', default=None,
5217 help='do operation synchronously; do not trigger systemd',
5218 )
5219 trigger_parser.set_defaults(
5220 func=main_trigger,
5221 )
5222 return trigger_parser
5223
5224
5225 def make_activate_parser(subparsers):
5226 activate_parser = subparsers.add_parser(
5227 'activate',
5228 formatter_class=argparse.RawDescriptionHelpFormatter,
5229 description=textwrap.fill(textwrap.dedent("""\
5230 Activate the OSD found at PATH (can be a directory
5231 or a device partition, possibly encrypted). When
5232 activated for the first time, a unique OSD id is obtained
5233 from the cluster. If PATH is a directory, a symbolic
5234 link is added in {statedir}/osd/ceph-$id. If PATH is
5235 a partition, it is mounted on {statedir}/osd/ceph-$id.
5236 Finally, the OSD daemon is run.
5237
5238 If the OSD depends on auxiliary partitions (journal, block, ...)
5239 they need to be available otherwise activation will fail. It
5240 may happen if a journal is encrypted and cryptsetup was not
5241 run yet.
5242 """.format(statedir=STATEDIR))),
5243 help='Activate a Ceph OSD')
5244 activate_parser.add_argument(
5245 '--mount',
5246 action='store_true', default=None,
5247 help='mount a block device [deprecated, ignored]',
5248 )
5249 activate_parser.add_argument(
5250 '--activate-key',
5251 metavar='PATH',
5252 help='bootstrap-osd keyring path template (%(default)s)',
5253 dest='activate_key_template',
5254 )
5255 activate_parser.add_argument(
5256 '--mark-init',
5257 metavar='INITSYSTEM',
5258 help='init system to manage this dir',
5259 default='auto',
5260 choices=INIT_SYSTEMS,
5261 )
5262 activate_parser.add_argument(
5263 '--no-start-daemon',
5264 action='store_true', default=None,
5265 help='do not start the daemon',
5266 )
5267 activate_parser.add_argument(
5268 'path',
5269 metavar='PATH',
5270 help='path to block device or directory',
5271 )
5272 activate_parser.add_argument(
5273 '--dmcrypt',
5274 action='store_true', default=None,
5275 help='map DATA and/or JOURNAL devices with dm-crypt',
5276 )
5277 activate_parser.add_argument(
5278 '--dmcrypt-key-dir',
5279 metavar='KEYDIR',
5280 default='/etc/ceph/dmcrypt-keys',
5281 help='directory where dm-crypt keys are stored',
5282 )
5283 activate_parser.add_argument(
5284 '--reactivate',
5285 action='store_true', default=False,
5286 help='activate the deactived OSD',
5287 )
5288 activate_parser.set_defaults(
5289 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5290 func=main_activate,
5291 )
5292 return activate_parser
5293
5294
5295 def make_activate_lockbox_parser(subparsers):
5296 parser = subparsers.add_parser(
5297 'activate-lockbox',
5298 formatter_class=argparse.RawDescriptionHelpFormatter,
5299 description=textwrap.fill(textwrap.dedent("""\
5300 Mount the partition found at PATH on {statedir}/osd-lockbox/$uuid
5301 where $uuid uniquely identifies the OSD that needs this lockbox
5302 to retrieve keys from the monitor and unlock its partitions.
5303
5304 If the OSD has one or more auxiliary devices (journal, block, ...)
5305 symbolic links are created at {statedir}/osd-lockbox/$other_uuid
5306 and point to {statedir}/osd-lockbox/$uuid. This will, for instance,
5307 allow a journal encrypted in a partition identified by $other_uuid to
5308 fetch the keys it needs from the monitor.
5309
5310 Finally the OSD is activated, as it would be with ceph-disk activate.
5311 """.format(statedir=STATEDIR))),
5312 help='Activate a Ceph lockbox')
5313 parser.add_argument(
5314 '--activate-key',
5315 help='bootstrap-osd keyring path template (%(default)s)',
5316 dest='activate_key_template',
5317 )
5318 parser.add_argument(
5319 '--dmcrypt-key-dir',
5320 metavar='KEYDIR',
5321 default='/etc/ceph/dmcrypt-keys',
5322 help='directory where dm-crypt keys are stored',
5323 )
5324 parser.add_argument(
5325 'path',
5326 metavar='PATH',
5327 help='path to block device',
5328 )
5329 parser.set_defaults(
5330 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5331 func=main_activate_lockbox,
5332 )
5333 return parser
5334
5335
5336 def make_activate_block_parser(subparsers):
5337 return make_activate_space_parser('block', subparsers)
5338
5339
5340 def make_activate_journal_parser(subparsers):
5341 return make_activate_space_parser('journal', subparsers)
5342
5343
5344 def make_activate_space_parser(name, subparsers):
5345 activate_space_parser = subparsers.add_parser(
5346 'activate-%s' % name,
5347 formatter_class=argparse.RawDescriptionHelpFormatter,
5348 description=textwrap.fill(textwrap.dedent("""\
5349 Activating a {name} partition is only meaningfull
5350 if it is encrypted and it will map it using
5351 cryptsetup.
5352
5353 Finally the corresponding OSD is activated,
5354 as it would be with ceph-disk activate.
5355 """.format(name=name))),
5356 help='Activate an OSD via its %s device' % name)
5357 activate_space_parser.add_argument(
5358 'dev',
5359 metavar='DEV',
5360 help='path to %s block device' % name,
5361 )
5362 activate_space_parser.add_argument(
5363 '--activate-key',
5364 metavar='PATH',
5365 help='bootstrap-osd keyring path template (%(default)s)',
5366 dest='activate_key_template',
5367 )
5368 activate_space_parser.add_argument(
5369 '--mark-init',
5370 metavar='INITSYSTEM',
5371 help='init system to manage this dir',
5372 default='auto',
5373 choices=INIT_SYSTEMS,
5374 )
5375 activate_space_parser.add_argument(
5376 '--dmcrypt',
5377 action='store_true', default=None,
5378 help=('map data and/or auxiliariy (journal, etc.) '
5379 'devices with dm-crypt'),
5380 )
5381 activate_space_parser.add_argument(
5382 '--dmcrypt-key-dir',
5383 metavar='KEYDIR',
5384 default='/etc/ceph/dmcrypt-keys',
5385 help='directory where dm-crypt keys are stored',
5386 )
5387 activate_space_parser.add_argument(
5388 '--reactivate',
5389 action='store_true', default=False,
5390 help='activate the deactived OSD',
5391 )
5392 activate_space_parser.set_defaults(
5393 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5394 func=lambda args: main_activate_space(name, args),
5395 )
5396 return activate_space_parser
5397
5398
5399 def make_activate_all_parser(subparsers):
5400 activate_all_parser = subparsers.add_parser(
5401 'activate-all',
5402 formatter_class=argparse.RawDescriptionHelpFormatter,
5403 description=textwrap.fill(textwrap.dedent("""\
5404 Activate all OSD partitions found in /dev/disk/by-parttypeuuid.
5405 The partitions containing auxiliary devices (journal, block, ...)
5406 are not activated.
5407 """)),
5408 help='Activate all tagged OSD partitions')
5409 activate_all_parser.add_argument(
5410 '--activate-key',
5411 metavar='PATH',
5412 help='bootstrap-osd keyring path template (%(default)s)',
5413 dest='activate_key_template',
5414 )
5415 activate_all_parser.add_argument(
5416 '--mark-init',
5417 metavar='INITSYSTEM',
5418 help='init system to manage this dir',
5419 default='auto',
5420 choices=INIT_SYSTEMS,
5421 )
5422 activate_all_parser.set_defaults(
5423 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5424 func=main_activate_all,
5425 )
5426 return activate_all_parser
5427
5428
5429 def make_list_parser(subparsers):
5430 list_parser = subparsers.add_parser(
5431 'list',
5432 formatter_class=argparse.RawDescriptionHelpFormatter,
5433 description=textwrap.fill(textwrap.dedent("""\
5434 Display all partitions on the system and their
5435 associated Ceph information, if any.
5436 """)),
5437 help='List disks, partitions, and Ceph OSDs')
5438 list_parser.add_argument(
5439 '--format',
5440 help='output format',
5441 default='plain',
5442 choices=['json', 'plain'],
5443 )
5444 list_parser.add_argument(
5445 'path',
5446 metavar='PATH',
5447 nargs='*',
5448 help='path to block devices, relative to /sys/block',
5449 )
5450 list_parser.set_defaults(
5451 func=main_list,
5452 )
5453 return list_parser
5454
5455
5456 def make_suppress_parser(subparsers):
5457 suppress_parser = subparsers.add_parser(
5458 'suppress-activate',
5459 formatter_class=argparse.RawDescriptionHelpFormatter,
5460 description=textwrap.fill(textwrap.dedent("""\
5461 Add a prefix to the list of suppressed device names
5462 so that they are ignored by all activate* subcommands.
5463 """)),
5464 help='Suppress activate on a device (prefix)')
5465 suppress_parser.add_argument(
5466 'path',
5467 metavar='PATH',
5468 help='path to block device or directory',
5469 )
5470 suppress_parser.set_defaults(
5471 func=main_suppress,
5472 )
5473
5474 unsuppress_parser = subparsers.add_parser(
5475 'unsuppress-activate',
5476 formatter_class=argparse.RawDescriptionHelpFormatter,
5477 description=textwrap.fill(textwrap.dedent("""\
5478 Remove a prefix from the list of suppressed device names
5479 so that they are no longer ignored by all
5480 activate* subcommands.
5481 """)),
5482 help='Stop suppressing activate on a device (prefix)')
5483 unsuppress_parser.add_argument(
5484 'path',
5485 metavar='PATH',
5486 help='path to block device or directory',
5487 )
5488 unsuppress_parser.set_defaults(
5489 func=main_unsuppress,
5490 )
5491 return suppress_parser
5492
5493
5494 def make_deactivate_parser(subparsers):
5495 deactivate_parser = subparsers.add_parser(
5496 'deactivate',
5497 formatter_class=argparse.RawDescriptionHelpFormatter,
5498 description=textwrap.fill(textwrap.dedent("""\
5499 Deactivate the OSD located at PATH. It stops the OSD daemon
5500 and optionally marks it out (with --mark-out). The content of
5501 the OSD is left untouched.
5502
5503 By default, the, ready, active, INIT-specific files are
5504 removed (so that it is not automatically re-activated by the
5505 udev rules or ceph-disk trigger) and the file deactive is
5506 created to remember the OSD is deactivated.
5507
5508 If the --once option is given, the ready, active, INIT-specific
5509 files are not removed and the OSD will reactivate whenever
5510 ceph-disk trigger is run on one of the devices (journal, data,
5511 block, lockbox, ...).
5512
5513 If the OSD is dmcrypt, remove the data dmcrypt map. When
5514 deactivate finishes, the OSD is down.
5515 """)),
5516 help='Deactivate a Ceph OSD')
5517 deactivate_parser.add_argument(
5518 '--cluster',
5519 metavar='NAME',
5520 default='ceph',
5521 help='cluster name to assign this disk to',
5522 )
5523 deactivate_parser.add_argument(
5524 'path',
5525 metavar='PATH',
5526 nargs='?',
5527 help='path to block device or directory',
5528 )
5529 deactivate_parser.add_argument(
5530 '--deactivate-by-id',
5531 metavar='<id>',
5532 help='ID of OSD to deactive'
5533 )
5534 deactivate_parser.add_argument(
5535 '--mark-out',
5536 action='store_true', default=False,
5537 help='option to mark the osd out',
5538 )
5539 deactivate_parser.add_argument(
5540 '--once',
5541 action='store_true', default=False,
5542 help='does not need --reactivate to activate again',
5543 )
5544 deactivate_parser.set_defaults(
5545 func=main_deactivate,
5546 )
5547
5548
5549 def make_destroy_parser(subparsers):
5550 destroy_parser = subparsers.add_parser(
5551 'destroy',
5552 formatter_class=argparse.RawDescriptionHelpFormatter,
5553 description=textwrap.fill(textwrap.dedent("""\
5554 Destroy the OSD located at PATH.
5555 It removes the OSD from the cluster, the crushmap and
5556 deallocates the OSD id. An OSD must be down before it
5557 can be destroyed.
5558 """)),
5559 help='Destroy a Ceph OSD')
5560 destroy_parser.add_argument(
5561 '--cluster',
5562 metavar='NAME',
5563 default='ceph',
5564 help='cluster name to assign this disk to',
5565 )
5566 destroy_parser.add_argument(
5567 'path',
5568 metavar='PATH',
5569 nargs='?',
5570 help='path to block device or directory',
5571 )
5572 destroy_parser.add_argument(
5573 '--destroy-by-id',
5574 metavar='<id>',
5575 help='ID of OSD to destroy'
5576 )
5577 destroy_parser.add_argument(
5578 '--dmcrypt-key-dir',
5579 metavar='KEYDIR',
5580 default='/etc/ceph/dmcrypt-keys',
5581 help=('directory where dm-crypt keys are stored '
5582 '(If you don\'t know how it work, '
5583 'dont use it. we have default value)'),
5584 )
5585 destroy_parser.add_argument(
5586 '--zap',
5587 action='store_true', default=False,
5588 help='option to erase data and partition',
5589 )
5590 destroy_parser.set_defaults(
5591 func=main_destroy,
5592 )
5593
5594
5595 def make_zap_parser(subparsers):
5596 zap_parser = subparsers.add_parser(
5597 'zap',
5598 formatter_class=argparse.RawDescriptionHelpFormatter,
5599 description=textwrap.fill(textwrap.dedent("""\
5600 Zap/erase/destroy a device's partition table and contents. It
5601 actually uses sgdisk and it's option --zap-all to
5602 destroy both GPT and MBR data structures so that the disk
5603 becomes suitable for repartitioning.
5604 """)),
5605 help='Zap/erase/destroy a device\'s partition table (and contents)')
5606 zap_parser.add_argument(
5607 'dev',
5608 metavar='DEV',
5609 nargs='+',
5610 help='path to block device',
5611 )
5612 zap_parser.set_defaults(
5613 func=main_zap,
5614 )
5615 return zap_parser
5616
5617
5618 def main(argv):
5619 args = parse_args(argv)
5620
5621 setup_logging(args.verbose, args.log_stdout)
5622
5623 if args.prepend_to_path != '':
5624 path = os.environ.get('PATH', os.defpath)
5625 os.environ['PATH'] = args.prepend_to_path + ":" + path
5626
5627 if args.func.__name__ != 'main_trigger':
5628 # trigger may run when statedir is unavailable and does not use it
5629 setup_statedir(args.statedir)
5630 setup_sysconfdir(args.sysconfdir)
5631
5632 global CEPH_PREF_USER
5633 CEPH_PREF_USER = args.setuser
5634 global CEPH_PREF_GROUP
5635 CEPH_PREF_GROUP = args.setgroup
5636
5637 if args.verbose:
5638 args.func(args)
5639 else:
5640 main_catch(args.func, args)
5641
5642
5643 def setup_logging(verbose, log_stdout):
5644 loglevel = logging.WARNING
5645 if verbose:
5646 loglevel = logging.DEBUG
5647
5648 if log_stdout:
5649 ch = logging.StreamHandler(stream=sys.stdout)
5650 ch.setLevel(loglevel)
5651 formatter = logging.Formatter('%(funcName)s: %(message)s')
5652 ch.setFormatter(formatter)
5653 LOG.addHandler(ch)
5654 LOG.setLevel(loglevel)
5655 else:
5656 logging.basicConfig(
5657 level=loglevel,
5658 format='%(funcName)s: %(message)s',
5659 )
5660
5661
5662 def main_catch(func, args):
5663
5664 try:
5665 func(args)
5666
5667 except Error as e:
5668 raise SystemExit(
5669 '{prog}: {msg}'.format(
5670 prog=args.prog,
5671 msg=e,
5672 )
5673 )
5674
5675 except CephDiskException as error:
5676 exc_name = error.__class__.__name__
5677 raise SystemExit(
5678 '{prog} {exc_name}: {msg}'.format(
5679 prog=args.prog,
5680 exc_name=exc_name,
5681 msg=error,
5682 )
5683 )
5684
5685
5686 def run():
5687 main(sys.argv[1:])
5688
5689
5690 if __name__ == '__main__':
5691 main(sys.argv[1:])
5692 warned_about = {}