]> git.proxmox.com Git - ceph.git/blob - ceph/src/ceph-disk/ceph_disk/main.py
update sources to v12.2.3
[ceph.git] / ceph / src / ceph-disk / ceph_disk / main.py
1 #!/usr/bin/env python
2 #
3 # Copyright (C) 2015, 2016, 2017 Red Hat <contact@redhat.com>
4 # Copyright (C) 2014 Inktank <info@inktank.com>
5 # Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
6 # Copyright (C) 2014 Catalyst.net Ltd
7 #
8 # Author: Loic Dachary <loic@dachary.org>
9 #
10 # This program is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU Library Public License as published by
12 # the Free Software Foundation; either version 2, or (at your option)
13 # any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU Library Public License for more details.
19 #
20
21 from __future__ import print_function
22
23 import argparse
24 import base64
25 import errno
26 import fcntl
27 import functools
28 import json
29 import logging
30 import os
31 import platform
32 import re
33 import subprocess
34 import stat
35 import sys
36 import tempfile
37 import uuid
38 import time
39 import shlex
40 import shutil
41 import pwd
42 import grp
43 import textwrap
44 import glob
45
46 CEPH_OSD_ONDISK_MAGIC = 'ceph osd volume v026'
47 CEPH_LOCKBOX_ONDISK_MAGIC = 'ceph lockbox volume v001'
48
49 KEY_MANAGEMENT_MODE_V1 = 'ceph-mon v1'
50
51 PTYPE = {
52 'regular': {
53 'journal': {
54 # identical because creating a journal is atomic
55 'ready': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
56 'tobe': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
57 },
58 'block': {
59 # identical because creating a block is atomic
60 'ready': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
61 'tobe': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
62 },
63 'block.db': {
64 # identical because creating a block is atomic
65 'ready': '30cd0809-c2b2-499c-8879-2d6b78529876',
66 'tobe': '30cd0809-c2b2-499c-8879-2d6b785292be',
67 },
68 'block.wal': {
69 # identical because creating a block is atomic
70 'ready': '5ce17fce-4087-4169-b7ff-056cc58473f9',
71 'tobe': '5ce17fce-4087-4169-b7ff-056cc58472be',
72 },
73 'osd': {
74 'ready': '4fbd7e29-9d25-41b8-afd0-062c0ceff05d',
75 'tobe': '89c57f98-2fe5-4dc0-89c1-f3ad0ceff2be',
76 },
77 'lockbox': {
78 'ready': 'fb3aabf9-d25f-47cc-bf5e-721d1816496b',
79 'tobe': 'fb3aabf9-d25f-47cc-bf5e-721d181642be',
80 },
81 },
82 'luks': {
83 'journal': {
84 'ready': '45b0969e-9b03-4f30-b4c6-35865ceff106',
85 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
86 },
87 'block': {
88 'ready': 'cafecafe-9b03-4f30-b4c6-35865ceff106',
89 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
90 },
91 'block.db': {
92 'ready': '166418da-c469-4022-adf4-b30afd37f176',
93 'tobe': '7521c784-4626-4260-bc8d-ba77a0f5f2be',
94 },
95 'block.wal': {
96 'ready': '86a32090-3647-40b9-bbbd-38d8c573aa86',
97 'tobe': '92dad30f-175b-4d40-a5b0-5c0a258b42be',
98 },
99 'osd': {
100 'ready': '4fbd7e29-9d25-41b8-afd0-35865ceff05d',
101 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
102 },
103 },
104 'plain': {
105 'journal': {
106 'ready': '45b0969e-9b03-4f30-b4c6-5ec00ceff106',
107 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
108 },
109 'block': {
110 'ready': 'cafecafe-9b03-4f30-b4c6-5ec00ceff106',
111 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
112 },
113 'block.db': {
114 'ready': '93b0052d-02d9-4d8a-a43b-33a3ee4dfbc3',
115 'tobe': '69d17c68-3e58-4399-aff0-b68265f2e2be',
116 },
117 'block.wal': {
118 'ready': '306e8683-4fe2-4330-b7c0-00a917c16966',
119 'tobe': 'f2d89683-a621-4063-964a-eb1f7863a2be',
120 },
121 'osd': {
122 'ready': '4fbd7e29-9d25-41b8-afd0-5ec00ceff05d',
123 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
124 },
125 },
126 'mpath': {
127 'journal': {
128 'ready': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
129 'tobe': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
130 },
131 'block': {
132 'ready': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
133 'tobe': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
134 },
135 'block.db': {
136 'ready': 'ec6d6385-e346-45dc-be91-da2a7c8b3261',
137 'tobe': 'ec6d6385-e346-45dc-be91-da2a7c8b32be',
138 },
139 'block.wal': {
140 'ready': '01b41e1b-002a-453c-9f17-88793989ff8f',
141 'tobe': '01b41e1b-002a-453c-9f17-88793989f2be',
142 },
143 'osd': {
144 'ready': '4fbd7e29-8ae0-4982-bf9d-5a8d867af560',
145 'tobe': '89c57f98-8ae0-4982-bf9d-5a8d867af560',
146 },
147 'lockbox': {
148 'ready': '7f4a666a-16f3-47a2-8445-152ef4d03f6c',
149 'tobe': '7f4a666a-16f3-47a2-8445-152ef4d032be',
150 },
151 },
152 }
153
154 try:
155 # see https://bugs.python.org/issue23098
156 os.major(0x80002b00)
157 except OverflowError:
158 os.major = lambda devid: ((devid >> 8) & 0xfff) | ((devid >> 32) & ~0xfff)
159 os.minor = lambda devid: (devid & 0xff) | ((devid >> 12) & ~0xff)
160
161
162 class Ptype(object):
163
164 @staticmethod
165 def get_ready_by_type(what):
166 return [x['ready'] for x in PTYPE[what].values()]
167
168 @staticmethod
169 def get_ready_by_name(name):
170 return [x[name]['ready'] for x in PTYPE.values() if name in x]
171
172 @staticmethod
173 def is_regular_space(ptype):
174 return Ptype.is_what_space('regular', ptype)
175
176 @staticmethod
177 def is_mpath_space(ptype):
178 return Ptype.is_what_space('mpath', ptype)
179
180 @staticmethod
181 def is_plain_space(ptype):
182 return Ptype.is_what_space('plain', ptype)
183
184 @staticmethod
185 def is_luks_space(ptype):
186 return Ptype.is_what_space('luks', ptype)
187
188 @staticmethod
189 def is_what_space(what, ptype):
190 for name in Space.NAMES:
191 if ptype == PTYPE[what][name]['ready']:
192 return True
193 return False
194
195 @staticmethod
196 def space_ptype_to_name(ptype):
197 for what in PTYPE.values():
198 for name in Space.NAMES:
199 if ptype == what[name]['ready']:
200 return name
201 raise ValueError('ptype ' + ptype + ' not found')
202
203 @staticmethod
204 def is_dmcrypt_space(ptype):
205 for name in Space.NAMES:
206 if Ptype.is_dmcrypt(ptype, name):
207 return True
208 return False
209
210 @staticmethod
211 def is_dmcrypt(ptype, name):
212 for what in ('plain', 'luks'):
213 if ptype == PTYPE[what][name]['ready']:
214 return True
215 return False
216
217
218 SYSFS = '/sys'
219
220 if platform.system() == 'FreeBSD':
221 FREEBSD = True
222 DEFAULT_FS_TYPE = 'zfs'
223 PROCDIR = '/compat/linux/proc'
224 # FreeBSD does not have blockdevices any more
225 BLOCKDIR = '/dev'
226 ROOTGROUP = 'wheel'
227 else:
228 FREEBSD = False
229 DEFAULT_FS_TYPE = 'xfs'
230 PROCDIR = '/proc'
231 BLOCKDIR = '/sys/block'
232 ROOTGROUP = 'root'
233
234 """
235 OSD STATUS Definition
236 """
237 OSD_STATUS_OUT_DOWN = 0
238 OSD_STATUS_OUT_UP = 1
239 OSD_STATUS_IN_DOWN = 2
240 OSD_STATUS_IN_UP = 3
241
242 MOUNT_OPTIONS = dict(
243 btrfs='noatime,user_subvol_rm_allowed',
244 # user_xattr is default ever since linux 2.6.39 / 3.0, but we'll
245 # delay a moment before removing it fully because we did have some
246 # issues with ext4 before the xatts-in-leveldb work, and it seemed
247 # that user_xattr helped
248 ext4='noatime,user_xattr',
249 xfs='noatime,inode64',
250 )
251
252 MKFS_ARGS = dict(
253 btrfs=[
254 # btrfs requires -f, for the same reason as xfs (see comment below)
255 '-f',
256 '-m', 'single',
257 '-l', '32768',
258 '-n', '32768',
259 ],
260 xfs=[
261 # xfs insists on not overwriting previous fs; even if we wipe
262 # partition table, we often recreate it exactly the same way,
263 # so we'll see ghosts of filesystems past
264 '-f',
265 '-i', 'size=2048',
266 ],
267 zfs=[
268 '-o', 'atime=off'
269 ],
270 )
271
272 INIT_SYSTEMS = [
273 'upstart',
274 'sysvinit',
275 'systemd',
276 'openrc',
277 'bsdrc',
278 'auto',
279 'none',
280 ]
281
282 STATEDIR = '/var/lib/ceph'
283
284 SYSCONFDIR = '/etc/ceph'
285
286 prepare_lock = None
287 activate_lock = None
288 SUPPRESS_PREFIX = None
289
290 # only warn once about some things
291 warned_about = {}
292
293 # Nuke the TERM variable to avoid confusing any subprocesses we call.
294 # For example, libreadline will print weird control sequences for some
295 # TERM values.
296 if 'TERM' in os.environ:
297 del os.environ['TERM']
298
299 LOG_NAME = __name__
300 if LOG_NAME == '__main__':
301 LOG_NAME = os.path.basename(sys.argv[0])
302 LOG = logging.getLogger(LOG_NAME)
303
304 # Allow user-preferred values for subprocess user and group
305 CEPH_PREF_USER = None
306 CEPH_PREF_GROUP = None
307
308
309 class FileLock(object):
310 def __init__(self, fn):
311 self.fn = fn
312 self.fd = None
313
314 def __enter__(self):
315 assert not self.fd
316 self.fd = os.open(self.fn, os.O_WRONLY | os.O_CREAT)
317 fcntl.lockf(self.fd, fcntl.LOCK_EX)
318
319 def __exit__(self, exc_type, exc_val, exc_tb):
320 assert self.fd
321 fcntl.lockf(self.fd, fcntl.LOCK_UN)
322 os.close(self.fd)
323 self.fd = None
324
325
326 class Error(Exception):
327 """
328 Error
329 """
330
331 def __str__(self):
332 doc = _bytes2str(self.__doc__.strip())
333 try:
334 str_type = basestring
335 except NameError:
336 str_type = str
337 args = [a if isinstance(a, str_type) else str(a) for a in self.args]
338 return ': '.join([doc] + [_bytes2str(a) for a in args])
339
340
341 class MountError(Error):
342 """
343 Mounting filesystem failed
344 """
345
346
347 class UnmountError(Error):
348 """
349 Unmounting filesystem failed
350 """
351
352
353 class BadMagicError(Error):
354 """
355 Does not look like a Ceph OSD, or incompatible version
356 """
357
358
359 class TruncatedLineError(Error):
360 """
361 Line is truncated
362 """
363
364
365 class TooManyLinesError(Error):
366 """
367 Too many lines
368 """
369
370
371 class FilesystemTypeError(Error):
372 """
373 Cannot discover filesystem type
374 """
375
376
377 class CephDiskException(Exception):
378 """
379 A base exception for ceph-disk to provide custom (ad-hoc) messages that
380 will be caught and dealt with when main() is executed
381 """
382 pass
383
384
385 class ExecutableNotFound(CephDiskException):
386 """
387 Exception to report on executables not available in PATH
388 """
389 pass
390
391
392 def is_systemd():
393 """
394 Detect whether systemd is running
395 """
396 with open(PROCDIR + '/1/comm', 'r') as f:
397 return 'systemd' in f.read()
398
399
400 def is_upstart():
401 """
402 Detect whether upstart is running
403 """
404 (out, err, _) = command(['init', '--version'])
405 return 'upstart' in out
406
407
408 def maybe_mkdir(*a, **kw):
409 """
410 Creates a new directory if it doesn't exist, removes
411 existing symlink before creating the directory.
412 """
413 # remove any symlink, if it is there..
414 if os.path.exists(*a) and stat.S_ISLNK(os.lstat(*a).st_mode):
415 LOG.debug('Removing old symlink at %s', *a)
416 os.unlink(*a)
417 try:
418 os.mkdir(*a, **kw)
419 except OSError as e:
420 if e.errno == errno.EEXIST:
421 pass
422 else:
423 raise
424
425
426 def which(executable):
427 """find the location of an executable"""
428 envpath = os.environ.get('PATH') or os.defpath
429 PATH = envpath.split(os.pathsep)
430
431 locations = PATH + [
432 '/usr/local/bin',
433 '/bin',
434 '/usr/bin',
435 '/usr/local/sbin',
436 '/usr/sbin',
437 '/sbin',
438 ]
439
440 for location in locations:
441 executable_path = os.path.join(location, executable)
442 if (os.path.isfile(executable_path) and
443 os.access(executable_path, os.X_OK)):
444 return executable_path
445
446
447 def _get_command_executable(arguments):
448 """
449 Return the full path for an executable, raise if the executable is not
450 found. If the executable has already a full path do not perform any checks.
451 """
452 if os.path.isabs(arguments[0]): # an absolute path
453 return arguments
454 executable = which(arguments[0])
455 if not executable:
456 command_msg = 'Could not run command: %s' % ' '.join(arguments)
457 executable_msg = '%s not in path.' % arguments[0]
458 raise ExecutableNotFound('%s %s' % (executable_msg, command_msg))
459
460 # swap the old executable for the new one
461 arguments[0] = executable
462 return arguments
463
464
465 def command(arguments, **kwargs):
466 """
467 Safely execute a ``subprocess.Popen`` call making sure that the
468 executable exists and raising a helpful error message
469 if it does not.
470
471 .. note:: This should be the preferred way of calling ``subprocess.Popen``
472 since it provides the caller with the safety net of making sure that
473 executables *will* be found and will error nicely otherwise.
474
475 This returns the output of the command and the return code of the
476 process in a tuple: (stdout, stderr, returncode).
477 """
478
479 arguments = list(map(_bytes2str, _get_command_executable(arguments)))
480
481 LOG.info('Running command: %s' % ' '.join(arguments))
482 process = subprocess.Popen(
483 arguments,
484 stdout=subprocess.PIPE,
485 stderr=subprocess.PIPE,
486 **kwargs)
487 out, err = process.communicate()
488
489 return _bytes2str(out), _bytes2str(err), process.returncode
490
491
492 def command_with_stdin(arguments, stdin):
493 LOG.info("Running command with stdin: " + " ".join(arguments))
494 process = subprocess.Popen(
495 arguments,
496 stdin=subprocess.PIPE,
497 stdout=subprocess.PIPE,
498 stderr=subprocess.PIPE)
499 out, err = process.communicate(stdin)
500 LOG.debug(out)
501 if process.returncode != 0:
502 LOG.error(err)
503 raise SystemExit(
504 "'{cmd}' failed with status code {returncode}".format(
505 cmd=arguments,
506 returncode=process.returncode,
507 )
508 )
509 return out
510
511
512 def _bytes2str(string):
513 return string.decode('utf-8') if isinstance(string, bytes) else string
514
515
516 def command_init(arguments, **kwargs):
517 """
518 Safely execute a non-blocking ``subprocess.Popen`` call
519 making sure that the executable exists and raising a helpful
520 error message if it does not.
521
522 .. note:: This should be the preferred way of calling ``subprocess.Popen``
523 since it provides the caller with the safety net of making sure that
524 executables *will* be found and will error nicely otherwise.
525
526 This returns the process.
527 """
528
529 arguments = list(map(_bytes2str, _get_command_executable(arguments)))
530
531 LOG.info('Running command: %s' % ' '.join(arguments))
532 process = subprocess.Popen(
533 arguments,
534 stdout=subprocess.PIPE,
535 stderr=subprocess.PIPE,
536 **kwargs)
537 return process
538
539
540 def command_wait(process):
541 """
542 Wait for the process finish and parse its output.
543 """
544
545 out, err = process.communicate()
546
547 return _bytes2str(out), _bytes2str(err), process.returncode
548
549
550 def command_check_call(arguments, exit=False):
551 """
552 Safely execute a ``subprocess.check_call`` call making sure that the
553 executable exists and raising a helpful error message if it does not.
554
555 When ``exit`` is set to ``True`` this helper will do a clean (sans
556 traceback) system exit.
557 .. note:: This should be the preferred way of calling
558 ``subprocess.check_call`` since it provides the caller with the safety net
559 of making sure that executables *will* be found and will error nicely
560 otherwise.
561 """
562 arguments = _get_command_executable(arguments)
563 command = ' '.join(arguments)
564 LOG.info('Running command: %s', command)
565 try:
566 return subprocess.check_call(arguments)
567 except subprocess.CalledProcessError as error:
568 if exit:
569 if error.output:
570 LOG.error(error.output)
571 raise SystemExit(
572 "'{cmd}' failed with status code {returncode}".format(
573 cmd=command,
574 returncode=error.returncode,
575 )
576 )
577 raise
578
579
580 #
581 # An alternative block_path implementation would be
582 #
583 # name = basename(dev)
584 # return /sys/devices/virtual/block/$name
585 #
586 # It is however more fragile because it relies on the fact
587 # that the basename of the device the user will use always
588 # matches the one the driver will use. On Ubuntu 14.04, for
589 # instance, when multipath creates a partition table on
590 #
591 # /dev/mapper/353333330000007d0 -> ../dm-0
592 #
593 # it will create partition devices named
594 #
595 # /dev/mapper/353333330000007d0-part1
596 #
597 # which is the same device as /dev/dm-1 but not a symbolic
598 # link to it:
599 #
600 # ubuntu@other:~$ ls -l /dev/mapper /dev/dm-1
601 # brw-rw---- 1 root disk 252, 1 Aug 15 17:52 /dev/dm-1
602 # lrwxrwxrwx 1 root root 7 Aug 15 17:52 353333330000007d0 -> ../dm-0
603 # brw-rw---- 1 root disk 252, 1 Aug 15 17:52 353333330000007d0-part1
604 #
605 # Using the basename in this case fails.
606 #
607
608
609 def block_path(dev):
610 if FREEBSD:
611 return dev
612 path = os.path.realpath(dev)
613 rdev = os.stat(path).st_rdev
614 (M, m) = (os.major(rdev), os.minor(rdev))
615 return "{sysfs}/dev/block/{M}:{m}".format(sysfs=SYSFS, M=M, m=m)
616
617
618 def get_dm_uuid(dev):
619 uuid_path = os.path.join(block_path(dev), 'dm', 'uuid')
620 LOG.debug("get_dm_uuid " + dev + " uuid path is " + uuid_path)
621 if not os.path.exists(uuid_path):
622 return False
623 uuid = open(uuid_path, 'r').read()
624 LOG.debug("get_dm_uuid " + dev + " uuid is " + uuid)
625 return uuid
626
627
628 def is_mpath(dev):
629 """
630 True if the path is managed by multipath
631 """
632 if FREEBSD:
633 return False
634 uuid = get_dm_uuid(dev)
635 return (uuid and
636 (re.match('part\d+-mpath-', uuid) or
637 re.match('mpath-', uuid)))
638
639
640 def get_dev_name(path):
641 """
642 get device name from path. e.g.::
643
644 /dev/sda -> sda, /dev/cciss/c0d1 -> cciss!c0d1
645
646 a device "name" is something like::
647
648 sdb
649 cciss!c0d1
650
651 """
652 assert path.startswith('/dev/')
653 base = path[5:]
654 return base.replace('/', '!')
655
656
657 def get_dev_path(name):
658 """
659 get a path (/dev/...) from a name (cciss!c0d1)
660 a device "path" is something like::
661
662 /dev/sdb
663 /dev/cciss/c0d1
664
665 """
666 return '/dev/' + name.replace('!', '/')
667
668
669 def get_dev_relpath(name):
670 """
671 get a relative path to /dev from a name (cciss!c0d1)
672 """
673 return name.replace('!', '/')
674
675
676 def get_dev_size(dev, size='megabytes'):
677 """
678 Attempt to get the size of a device so that we can prevent errors
679 from actions to devices that are smaller, and improve error reporting.
680
681 Because we want to avoid breakage in case this approach is not robust, we
682 will issue a warning if we failed to get the size.
683
684 :param size: bytes or megabytes
685 :param dev: the device to calculate the size
686 """
687 fd = os.open(dev, os.O_RDONLY)
688 dividers = {'bytes': 1, 'megabytes': 1024 * 1024}
689 try:
690 device_size = os.lseek(fd, 0, os.SEEK_END)
691 divider = dividers.get(size, 1024 * 1024) # default to megabytes
692 return device_size // divider
693 except Exception as error:
694 LOG.warning('failed to get size of %s: %s' % (dev, str(error)))
695 finally:
696 os.close(fd)
697
698
699 def stmode_is_diskdevice(dmode):
700 if stat.S_ISBLK(dmode):
701 return True
702 else:
703 # FreeBSD does not have block devices
704 # All disks are character devices
705 return FREEBSD and stat.S_ISCHR(dmode)
706
707
708 def dev_is_diskdevice(dev):
709 dmode = os.stat(dev).st_mode
710 return stmode_is_diskdevice(dmode)
711
712
713 def ldev_is_diskdevice(dev):
714 dmode = os.lstat(dev).st_mode
715 return stmode_is_diskdevice(dmode)
716
717
718 def path_is_diskdevice(path):
719 dev = os.path.realpath(path)
720 return dev_is_diskdevice(dev)
721
722
723 def get_partition_mpath(dev, pnum):
724 part_re = "part{pnum}-mpath-".format(pnum=pnum)
725 partitions = list_partitions_mpath(dev, part_re)
726 if partitions:
727 return partitions[0]
728 else:
729 return None
730
731
732 def retry(on_error=Exception, max_tries=10, wait=0.2, backoff=0):
733 def wrapper(func):
734 @functools.wraps(func)
735 def repeat(*args, **kwargs):
736 for tries in range(max_tries - 1):
737 try:
738 return func(*args, **kwargs)
739 except on_error:
740 time.sleep(wait + backoff * tries)
741 return func(*args, **kwargs)
742 return repeat
743 return wrapper
744
745
746 @retry(Error)
747 def get_partition_dev(dev, pnum):
748 """
749 get the device name for a partition
750
751 assume that partitions are named like the base dev,
752 with a number, and optionally
753 some intervening characters (like 'p'). e.g.,
754
755 sda 1 -> sda1
756 cciss/c0d1 1 -> cciss!c0d1p1
757 """
758 partname = None
759 error_msg = ""
760 if is_mpath(dev):
761 partname = get_partition_mpath(dev, pnum)
762 else:
763 name = get_dev_name(os.path.realpath(dev))
764 sys_entry = os.path.join(BLOCKDIR, name)
765 error_msg = " in %s" % sys_entry
766 for f in os.listdir(sys_entry):
767 if f.startswith(name) and f.endswith(str(pnum)):
768 # we want the shortest name that starts with the base name
769 # and ends with the partition number
770 if not partname or len(f) < len(partname):
771 partname = f
772 if partname:
773 return get_dev_path(partname)
774 else:
775 raise Error('partition %d for %s does not appear to exist%s' %
776 (pnum, dev, error_msg))
777
778
779 def list_all_partitions():
780 """
781 Return a list of devices and partitions
782 """
783 if not FREEBSD:
784 names = os.listdir(BLOCKDIR)
785 dev_part_list = {}
786 for name in names:
787 # /dev/fd0 may hang http://tracker.ceph.com/issues/6827
788 if re.match(r'^fd\d$', name):
789 continue
790 dev_part_list[name] = list_partitions(get_dev_path(name))
791 else:
792 with open(os.path.join(PROCDIR, "partitions")) as partitions:
793 for line in partitions:
794 columns = line.split()
795 if len(columns) >= 4:
796 name = columns[3]
797 dev_part_list[name] = list_partitions(get_dev_path(name))
798 return dev_part_list
799
800
801 def list_partitions(dev):
802 dev = os.path.realpath(dev)
803 if is_mpath(dev):
804 return list_partitions_mpath(dev)
805 else:
806 return list_partitions_device(dev)
807
808
809 def list_partitions_mpath(dev, part_re="part\d+-mpath-"):
810 p = block_path(dev)
811 partitions = []
812 holders = os.path.join(p, 'holders')
813 for holder in os.listdir(holders):
814 uuid_path = os.path.join(holders, holder, 'dm', 'uuid')
815 uuid = open(uuid_path, 'r').read()
816 LOG.debug("list_partitions_mpath: " + uuid_path + " uuid = " + uuid)
817 if re.match(part_re, uuid):
818 partitions.append(holder)
819 return partitions
820
821
822 def list_partitions_device(dev):
823 """
824 Return a list of partitions on the given device name
825 """
826 partitions = []
827 basename = get_dev_name(dev)
828 for name in os.listdir(block_path(dev)):
829 if name.startswith(basename):
830 partitions.append(name)
831 return partitions
832
833
834 def get_partition_base(dev):
835 """
836 Get the base device for a partition
837 """
838 dev = os.path.realpath(dev)
839 if not ldev_is_diskdevice(dev):
840 raise Error('not a block device', dev)
841
842 name = get_dev_name(dev)
843 if os.path.exists(os.path.join('/sys/block', name)):
844 raise Error('not a partition', dev)
845
846 # find the base
847 for basename in os.listdir('/sys/block'):
848 if os.path.exists(os.path.join('/sys/block', basename, name)):
849 return get_dev_path(basename)
850 raise Error('no parent device for partition', dev)
851
852
853 def is_partition_mpath(dev):
854 uuid = get_dm_uuid(dev)
855 return bool(re.match('part\d+-mpath-', uuid))
856
857
858 def partnum_mpath(dev):
859 uuid = get_dm_uuid(dev)
860 return re.findall('part(\d+)-mpath-', uuid)[0]
861
862
863 def get_partition_base_mpath(dev):
864 slave_path = os.path.join(block_path(dev), 'slaves')
865 slaves = os.listdir(slave_path)
866 assert slaves
867 name_path = os.path.join(slave_path, slaves[0], 'dm', 'name')
868 name = open(name_path, 'r').read().strip()
869 return os.path.join('/dev/mapper', name)
870
871
872 def is_partition(dev):
873 """
874 Check whether a given device path is a partition or a full disk.
875 """
876 if is_mpath(dev):
877 return is_partition_mpath(dev)
878
879 dev = os.path.realpath(dev)
880 st = os.lstat(dev)
881 if not stmode_is_diskdevice(st.st_mode):
882 raise Error('not a block device', dev)
883
884 name = get_dev_name(dev)
885 if os.path.exists(os.path.join(BLOCKDIR, name)):
886 return False
887
888 # make sure it is a partition of something else
889 major = os.major(st.st_rdev)
890 minor = os.minor(st.st_rdev)
891 if os.path.exists('/sys/dev/block/%d:%d/partition' % (major, minor)):
892 return True
893
894 raise Error('not a disk or partition', dev)
895
896
897 def is_mounted(dev):
898 """
899 Check if the given device is mounted.
900 """
901 dev = os.path.realpath(dev)
902 with open(PROCDIR + '/mounts', 'rb') as proc_mounts:
903 for line in proc_mounts:
904 fields = line.split()
905 if len(fields) < 3:
906 continue
907 mounts_dev = fields[0]
908 path = fields[1]
909 if os.path.isabs(mounts_dev) and os.path.exists(mounts_dev):
910 mounts_dev = os.path.realpath(mounts_dev)
911 if mounts_dev == dev:
912 return _bytes2str(path)
913 return None
914
915
916 def is_held(dev):
917 """
918 Check if a device is held by another device (e.g., a dm-crypt mapping)
919 """
920 assert os.path.exists(dev)
921 if is_mpath(dev):
922 return []
923
924 dev = os.path.realpath(dev)
925 base = get_dev_name(dev)
926
927 # full disk?
928 directory = '/sys/block/{base}/holders'.format(base=base)
929 if os.path.exists(directory):
930 return os.listdir(directory)
931
932 # partition?
933 part = base
934 while len(base):
935 directory = '/sys/block/{base}/{part}/holders'.format(
936 part=part, base=base)
937 if os.path.exists(directory):
938 return os.listdir(directory)
939 base = base[:-1]
940 return []
941
942
943 def verify_not_in_use(dev, check_partitions=False):
944 """
945 Verify if a given device (path) is in use (e.g. mounted or
946 in use by device-mapper).
947
948 :raises: Error if device is in use.
949 """
950 assert os.path.exists(dev)
951 if is_mounted(dev):
952 raise Error('Device is mounted', dev)
953 holders = is_held(dev)
954 if holders:
955 raise Error('Device %s is in use by a device-mapper '
956 'mapping (dm-crypt?)' % dev, ','.join(holders))
957
958 if check_partitions and not is_partition(dev):
959 for partname in list_partitions(dev):
960 partition = get_dev_path(partname)
961 if is_mounted(partition):
962 raise Error('Device is mounted', partition)
963 holders = is_held(partition)
964 if holders:
965 raise Error('Device %s is in use by a device-mapper '
966 'mapping (dm-crypt?)'
967 % partition, ','.join(holders))
968
969
970 def must_be_one_line(line):
971 """
972 Checks if given line is really one single line.
973
974 :raises: TruncatedLineError or TooManyLinesError
975 :return: Content of the line, or None if line isn't valid.
976 """
977 line = _bytes2str(line)
978
979 if line[-1:] != '\n':
980 raise TruncatedLineError(line)
981 line = line[:-1]
982 if '\n' in line:
983 raise TooManyLinesError(line)
984 return line
985
986
987 def read_one_line(parent, name):
988 """
989 Read a file whose sole contents are a single line.
990
991 Strips the newline.
992
993 :return: Contents of the line, or None if file did not exist.
994 """
995 path = os.path.join(parent, name)
996 try:
997 line = open(path, 'rb').read()
998 except IOError as e:
999 if e.errno == errno.ENOENT:
1000 return None
1001 else:
1002 raise
1003
1004 try:
1005 line = must_be_one_line(line)
1006 except (TruncatedLineError, TooManyLinesError) as e:
1007 raise Error(
1008 'File is corrupt: {path}: {msg}'.format(
1009 path=path,
1010 msg=e,
1011 )
1012 )
1013 return line
1014
1015
1016 def write_one_line(parent, name, text):
1017 """
1018 Write a file whose sole contents are a single line.
1019
1020 Adds a newline.
1021 """
1022 path = os.path.join(parent, name)
1023 tmp = '{path}.{pid}.tmp'.format(path=path, pid=os.getpid())
1024 with open(tmp, 'wb') as tmp_file:
1025 tmp_file.write(text.encode('utf-8') + b'\n')
1026 os.fsync(tmp_file.fileno())
1027 path_set_context(tmp)
1028 os.rename(tmp, path)
1029
1030
1031 def init_get():
1032 """
1033 Get a init system using 'ceph-detect-init'
1034 """
1035 init = _check_output(
1036 args=[
1037 'ceph-detect-init',
1038 '--default', 'sysvinit',
1039 ],
1040 )
1041 init = must_be_one_line(init)
1042 return init
1043
1044
1045 def check_osd_magic(path):
1046 """
1047 Check that this path has the Ceph OSD magic.
1048
1049 :raises: BadMagicError if this does not look like a Ceph OSD data
1050 dir.
1051 """
1052 magic = read_one_line(path, 'magic')
1053 if magic is None:
1054 # probably not mkfs'ed yet
1055 raise BadMagicError(path)
1056 if magic != CEPH_OSD_ONDISK_MAGIC:
1057 raise BadMagicError(path)
1058
1059
1060 def check_osd_id(osd_id):
1061 """
1062 Ensures osd id is numeric.
1063 """
1064 if not re.match(r'^[0-9]+$', osd_id):
1065 raise Error('osd id is not numeric', osd_id)
1066
1067
1068 def allocate_osd_id(
1069 cluster,
1070 fsid,
1071 keyring,
1072 path,
1073 ):
1074 """
1075 Allocates an OSD id on the given cluster.
1076
1077 :raises: Error if the call to allocate the OSD id fails.
1078 :return: The allocated OSD id.
1079 """
1080 lockbox_path = os.path.join(STATEDIR, 'osd-lockbox', fsid)
1081 lockbox_osd_id = read_one_line(lockbox_path, 'whoami')
1082 osd_keyring = os.path.join(path, 'keyring')
1083 if lockbox_osd_id:
1084 LOG.debug('Getting OSD id from Lockbox...')
1085 osd_id = lockbox_osd_id
1086 shutil.move(os.path.join(lockbox_path, 'osd_keyring'),
1087 osd_keyring)
1088 path_set_context(osd_keyring)
1089 os.unlink(os.path.join(lockbox_path, 'whoami'))
1090 return osd_id
1091
1092 LOG.debug('Allocating OSD id...')
1093 secrets = Secrets()
1094 try:
1095 wanttobe = read_one_line(path, 'wanttobe')
1096 if os.path.exists(os.path.join(path, 'wanttobe')):
1097 os.unlink(os.path.join(path, 'wanttobe'))
1098 id_arg = wanttobe and [wanttobe] or []
1099 osd_id = command_with_stdin(
1100 [
1101 'ceph',
1102 '--cluster', cluster,
1103 '--name', 'client.bootstrap-osd',
1104 '--keyring', keyring,
1105 '-i', '-',
1106 'osd', 'new',
1107 fsid,
1108 ] + id_arg,
1109 secrets.get_json()
1110 )
1111 except subprocess.CalledProcessError as e:
1112 raise Error('ceph osd create failed', e, e.output)
1113 osd_id = must_be_one_line(osd_id)
1114 check_osd_id(osd_id)
1115 secrets.write_osd_keyring(osd_keyring, osd_id)
1116 return osd_id
1117
1118
1119 def get_osd_id(path):
1120 """
1121 Gets the OSD id of the OSD at the given path.
1122 """
1123 osd_id = read_one_line(path, 'whoami')
1124 if osd_id is not None:
1125 check_osd_id(osd_id)
1126 return osd_id
1127
1128
1129 def get_ceph_user():
1130 global CEPH_PREF_USER
1131
1132 if CEPH_PREF_USER is not None:
1133 try:
1134 pwd.getpwnam(CEPH_PREF_USER)
1135 return CEPH_PREF_USER
1136 except KeyError:
1137 print("No such user:", CEPH_PREF_USER)
1138 sys.exit(2)
1139 else:
1140 try:
1141 pwd.getpwnam('ceph')
1142 return 'ceph'
1143 except KeyError:
1144 return 'root'
1145
1146
1147 def get_ceph_group():
1148 global CEPH_PREF_GROUP
1149
1150 if CEPH_PREF_GROUP is not None:
1151 try:
1152 grp.getgrnam(CEPH_PREF_GROUP)
1153 return CEPH_PREF_GROUP
1154 except KeyError:
1155 print("No such group:", CEPH_PREF_GROUP)
1156 sys.exit(2)
1157 else:
1158 try:
1159 grp.getgrnam('ceph')
1160 return 'ceph'
1161 except KeyError:
1162 return 'root'
1163
1164
1165 def path_set_context(path):
1166 # restore selinux context to default policy values
1167 if which('restorecon'):
1168 command(['restorecon', '-R', path])
1169
1170 # if ceph user exists, set owner to ceph
1171 if get_ceph_user() == 'ceph':
1172 command(['chown', '-R', 'ceph:ceph', path])
1173
1174
1175 def _check_output(args=None, **kwargs):
1176 out, err, ret = command(args, **kwargs)
1177 if ret:
1178 cmd = args[0]
1179 error = subprocess.CalledProcessError(ret, cmd)
1180 error.output = out + err
1181 raise error
1182 return _bytes2str(out)
1183
1184
1185 def get_conf(cluster, variable):
1186 """
1187 Get the value of the given configuration variable from the
1188 cluster.
1189
1190 :raises: Error if call to ceph-conf fails.
1191 :return: The variable value or None.
1192 """
1193 try:
1194 out, err, ret = command(
1195 [
1196 'ceph-conf',
1197 '--cluster={cluster}'.format(
1198 cluster=cluster,
1199 ),
1200 '--name=osd.',
1201 '--lookup',
1202 variable,
1203 ],
1204 close_fds=True,
1205 )
1206 except OSError as e:
1207 raise Error('error executing ceph-conf', e, err)
1208 if ret == 1:
1209 # config entry not found
1210 return None
1211 elif ret != 0:
1212 raise Error('getting variable from configuration failed')
1213 value = out.split('\n', 1)[0]
1214 # don't differentiate between "var=" and no var set
1215 if not value:
1216 return None
1217 return value
1218
1219
1220 def get_conf_with_default(cluster, variable):
1221 """
1222 Get a config value that is known to the C++ code.
1223
1224 This will fail if called on variables that are not defined in
1225 common config options.
1226 """
1227 try:
1228 out = _check_output(
1229 args=[
1230 'ceph-osd',
1231 '--cluster={cluster}'.format(
1232 cluster=cluster,
1233 ),
1234 '--show-config-value={variable}'.format(
1235 variable=variable,
1236 ),
1237 ],
1238 close_fds=True,
1239 )
1240 except subprocess.CalledProcessError as e:
1241 raise Error(
1242 'getting variable from configuration failed',
1243 e,
1244 )
1245
1246 value = str(out).split('\n', 1)[0]
1247 return value
1248
1249
1250 def get_fsid(cluster):
1251 """
1252 Get the fsid of the cluster.
1253
1254 :return: The fsid or raises Error.
1255 """
1256 fsid = get_conf_with_default(cluster=cluster, variable='fsid')
1257 # uuids from boost always default to 'the empty uuid'
1258 if fsid == '00000000-0000-0000-0000-000000000000':
1259 raise Error('getting cluster uuid from configuration failed')
1260 return fsid.lower()
1261
1262
1263 def get_dmcrypt_key_path(
1264 _uuid,
1265 key_dir,
1266 luks
1267 ):
1268 """
1269 Get path to dmcrypt key file.
1270
1271 :return: Path to the dmcrypt key file, callers should check for existence.
1272 """
1273 if luks:
1274 path = os.path.join(key_dir, _uuid + ".luks.key")
1275 else:
1276 path = os.path.join(key_dir, _uuid)
1277
1278 return path
1279
1280
1281 def get_dmcrypt_key(
1282 _uuid,
1283 key_dir,
1284 luks
1285 ):
1286 legacy_path = get_dmcrypt_key_path(_uuid, key_dir, luks)
1287 if os.path.exists(legacy_path):
1288 return (legacy_path,)
1289 path = os.path.join(STATEDIR, 'osd-lockbox', _uuid)
1290 if os.path.exists(path):
1291 mode = get_oneliner(path, 'key-management-mode')
1292 osd_uuid = get_oneliner(path, 'osd-uuid')
1293 ceph_fsid = read_one_line(path, 'ceph_fsid')
1294 if ceph_fsid is None:
1295 LOG.warning("no `ceph_fsid` found falling back to 'ceph' "
1296 "for cluster name")
1297 cluster = 'ceph'
1298 else:
1299 cluster = find_cluster_by_uuid(ceph_fsid)
1300 if cluster is None:
1301 raise Error('No cluster conf found in ' + SYSCONFDIR +
1302 ' with fsid %s' % ceph_fsid)
1303
1304 if mode == KEY_MANAGEMENT_MODE_V1:
1305 key, stderr, ret = command(
1306 [
1307 'ceph',
1308 '--cluster', cluster,
1309 '--name',
1310 'client.osd-lockbox.' + osd_uuid,
1311 '--keyring',
1312 os.path.join(path, 'keyring'),
1313 'config-key',
1314 'get',
1315 'dm-crypt/osd/' + osd_uuid + '/luks',
1316 ],
1317 )
1318 LOG.debug("stderr " + stderr)
1319 assert ret == 0
1320 return base64.b64decode(key)
1321 else:
1322 raise Error('unknown key-management-mode ' + str(mode))
1323 raise Error('unable to read dm-crypt key', path, legacy_path)
1324
1325
1326 def _dmcrypt_map(
1327 rawdev,
1328 key,
1329 _uuid,
1330 cryptsetup_parameters,
1331 luks,
1332 format_dev=False,
1333 ):
1334 dev = dmcrypt_is_mapped(_uuid)
1335 if dev:
1336 return dev
1337
1338 if isinstance(key, tuple):
1339 # legacy, before lockbox
1340 assert os.path.exists(key[0])
1341 keypath = key[0]
1342 key = None
1343 else:
1344 keypath = '-'
1345 dev = '/dev/mapper/' + _uuid
1346 luksFormat_args = [
1347 'cryptsetup',
1348 '--batch-mode',
1349 '--key-file',
1350 keypath,
1351 'luksFormat',
1352 rawdev,
1353 ] + cryptsetup_parameters
1354
1355 luksOpen_args = [
1356 'cryptsetup',
1357 '--key-file',
1358 keypath,
1359 'luksOpen',
1360 rawdev,
1361 _uuid,
1362 ]
1363
1364 create_args = [
1365 'cryptsetup',
1366 '--key-file',
1367 keypath,
1368 'create',
1369 _uuid,
1370 rawdev,
1371 ] + cryptsetup_parameters
1372
1373 try:
1374 if luks:
1375 if format_dev:
1376 command_with_stdin(luksFormat_args, key)
1377 command_with_stdin(luksOpen_args, key)
1378 else:
1379 # Plain mode has no format function, nor any validation
1380 # that the key is correct.
1381 command_with_stdin(create_args, key)
1382 # set proper ownership of mapped device
1383 command_check_call(['chown', 'ceph:ceph', dev])
1384 return dev
1385
1386 except subprocess.CalledProcessError as e:
1387 raise Error('unable to map device', rawdev, e)
1388
1389
1390 @retry(Error, max_tries=10, wait=0.5, backoff=1.0)
1391 def dmcrypt_unmap(_uuid):
1392 if not os.path.exists('/dev/mapper/' + _uuid):
1393 return
1394 try:
1395 command_check_call(['cryptsetup', 'remove', _uuid])
1396 except subprocess.CalledProcessError as e:
1397 raise Error('unable to unmap device', _uuid, e)
1398
1399
1400 def mount(
1401 dev,
1402 fstype,
1403 options,
1404 ):
1405 """
1406 Mounts a device with given filessystem type and
1407 mount options to a tempfile path under /var/lib/ceph/tmp.
1408 """
1409 # sanity check: none of the arguments are None
1410 if dev is None:
1411 raise ValueError('dev may not be None')
1412 if fstype is None:
1413 raise ValueError('fstype may not be None')
1414
1415 # pick best-of-breed mount options based on fs type
1416 if options is None:
1417 options = MOUNT_OPTIONS.get(fstype, '')
1418
1419 myTemp = STATEDIR + '/tmp'
1420 # mkdtemp expect 'dir' to be existing on the system
1421 # Let's be sure it's always the case
1422 if not os.path.exists(myTemp):
1423 os.makedirs(myTemp)
1424
1425 # mount
1426 path = tempfile.mkdtemp(
1427 prefix='mnt.',
1428 dir=myTemp,
1429 )
1430 try:
1431 LOG.debug('Mounting %s on %s with options %s', dev, path, options)
1432 command_check_call(
1433 [
1434 'mount',
1435 '-t', fstype,
1436 '-o', options,
1437 '--',
1438 dev,
1439 path,
1440 ],
1441 )
1442 if which('restorecon'):
1443 command(
1444 [
1445 'restorecon',
1446 path,
1447 ],
1448 )
1449 except subprocess.CalledProcessError as e:
1450 try:
1451 os.rmdir(path)
1452 except (OSError, IOError):
1453 pass
1454 raise MountError(e)
1455
1456 return path
1457
1458
1459 @retry(UnmountError, max_tries=3, wait=0.5, backoff=1.0)
1460 def unmount(
1461 path,
1462 do_rm=True,
1463 ):
1464 """
1465 Unmount and removes the given mount point.
1466 """
1467 try:
1468 LOG.debug('Unmounting %s', path)
1469 command_check_call(
1470 [
1471 '/bin/umount',
1472 '--',
1473 path,
1474 ],
1475 )
1476 except subprocess.CalledProcessError as e:
1477 raise UnmountError(e)
1478 if not do_rm:
1479 return
1480 os.rmdir(path)
1481
1482
1483 ###########################################
1484
1485 def extract_parted_partition_numbers(partitions):
1486 numbers_as_strings = re.findall('^\d+', partitions, re.MULTILINE)
1487 return map(int, numbers_as_strings)
1488
1489
1490 def get_free_partition_index(dev):
1491 """
1492 Get the next free partition index on a given device.
1493
1494 :return: Index number (> 1 if there is already a partition on the device)
1495 or 1 if there is no partition table.
1496 """
1497 try:
1498 lines = _check_output(
1499 args=[
1500 'parted',
1501 '--machine',
1502 '--',
1503 dev,
1504 'print',
1505 ],
1506 )
1507 except subprocess.CalledProcessError as e:
1508 LOG.info('cannot read partition index; assume it '
1509 'isn\'t present\n (Error: %s)' % e)
1510 return 1
1511
1512 if not lines:
1513 raise Error('parted failed to output anything')
1514 LOG.debug('get_free_partition_index: analyzing ' + lines)
1515 if ('CHS;' not in lines and
1516 'CYL;' not in lines and
1517 'BYT;' not in lines):
1518 raise Error('parted output expected to contain one of ' +
1519 'CHH; CYL; or BYT; : ' + lines)
1520 if os.path.realpath(dev) not in lines:
1521 raise Error('parted output expected to contain ' + dev + ': ' + lines)
1522 _, partitions = lines.split(os.path.realpath(dev))
1523 partition_numbers = extract_parted_partition_numbers(partitions)
1524 if partition_numbers:
1525 return max(partition_numbers) + 1
1526 else:
1527 return 1
1528
1529
1530 def check_journal_reqs(args):
1531 _, _, allows_journal = command([
1532 'ceph-osd', '--check-allows-journal',
1533 '-i', '0',
1534 '--log-file', '$run_dir/$cluster-osd-check.log',
1535 '--cluster', args.cluster,
1536 '--setuser', get_ceph_user(),
1537 '--setgroup', get_ceph_group(),
1538 ])
1539 _, _, wants_journal = command([
1540 'ceph-osd', '--check-wants-journal',
1541 '-i', '0',
1542 '--log-file', '$run_dir/$cluster-osd-check.log',
1543 '--cluster', args.cluster,
1544 '--setuser', get_ceph_user(),
1545 '--setgroup', get_ceph_group(),
1546 ])
1547 _, _, needs_journal = command([
1548 'ceph-osd', '--check-needs-journal',
1549 '-i', '0',
1550 '--log-file', '$run_dir/$cluster-osd-check.log',
1551 '--cluster', args.cluster,
1552 '--setuser', get_ceph_user(),
1553 '--setgroup', get_ceph_group(),
1554 ])
1555 return (not allows_journal, not wants_journal, not needs_journal)
1556
1557
1558 def update_partition(dev, description):
1559 """
1560 Must be called after modifying a partition table so the kernel
1561 know about the change and fire udev events accordingly. A side
1562 effect of partprobe is to remove partitions and add them again.
1563 The first udevadm settle waits for ongoing udev events to
1564 complete, just in case one of them rely on an existing partition
1565 on dev. The second udevadm settle guarantees to the caller that
1566 all udev events related to the partition table change have been
1567 processed, i.e. the 95-ceph-osd.rules actions and mode changes,
1568 group changes etc. are complete.
1569 """
1570 LOG.debug('Calling partprobe on %s device %s', description, dev)
1571 partprobe_ok = False
1572 error = 'unknown error'
1573 partprobe = _get_command_executable(['partprobe'])[0]
1574 for i in range(5):
1575 command_check_call(['udevadm', 'settle', '--timeout=600'])
1576 try:
1577 _check_output(['flock', '-s', dev, partprobe, dev])
1578 partprobe_ok = True
1579 break
1580 except subprocess.CalledProcessError as e:
1581 error = e.output
1582 if ('unable to inform the kernel' not in error and
1583 'Device or resource busy' not in error):
1584 raise
1585 LOG.debug('partprobe %s failed : %s (ignored, waiting 60s)'
1586 % (dev, error))
1587 time.sleep(60)
1588 if not partprobe_ok:
1589 raise Error('partprobe %s failed : %s' % (dev, error))
1590 command_check_call(['udevadm', 'settle', '--timeout=600'])
1591
1592
1593 def zap_linux(dev):
1594 try:
1595 # Thoroughly wipe all partitions of any traces of
1596 # Filesystems or OSD Journals
1597 #
1598 # In addition we need to write 10M of data to each partition
1599 # to make sure that after re-creating the same partition
1600 # there is no trace left of any previous Filesystem or OSD
1601 # Journal
1602
1603 LOG.debug('Writing zeros to existing partitions on %s', dev)
1604
1605 for partname in list_partitions(dev):
1606 partition = get_dev_path(partname)
1607 command_check_call(
1608 [
1609 'wipefs',
1610 '--all',
1611 partition,
1612 ],
1613 )
1614
1615 command_check_call(
1616 [
1617 'dd',
1618 'if=/dev/zero',
1619 'of={path}'.format(path=partition),
1620 'bs=1M',
1621 'count=10',
1622 ],
1623 )
1624
1625 LOG.debug('Zapping partition table on %s', dev)
1626
1627 # try to wipe out any GPT partition table backups. sgdisk
1628 # isn't too thorough.
1629 lba_size = 4096
1630 size = 33 * lba_size
1631 with open(dev, 'wb') as dev_file:
1632 dev_file.seek(-size, os.SEEK_END)
1633 dev_file.write(size * b'\0')
1634
1635 command_check_call(
1636 [
1637 'sgdisk',
1638 '--zap-all',
1639 '--',
1640 dev,
1641 ],
1642 )
1643 command_check_call(
1644 [
1645 'sgdisk',
1646 '--clear',
1647 '--mbrtogpt',
1648 '--',
1649 dev,
1650 ],
1651 )
1652 update_partition(dev, 'zapped')
1653
1654 except subprocess.CalledProcessError as e:
1655 raise Error(e)
1656
1657
1658 def zap_freebsd(dev):
1659 try:
1660 # For FreeBSD we just need to zap the partition.
1661 command_check_call(
1662 [
1663 'gpart',
1664 'destroy',
1665 '-F',
1666 dev,
1667 ],
1668 )
1669
1670 except subprocess.CalledProcessError as e:
1671 raise Error(e)
1672
1673
1674 def zap(dev):
1675 """
1676 Destroy the partition table and content of a given disk.
1677 """
1678 dev = os.path.realpath(dev)
1679 dmode = os.stat(dev).st_mode
1680 if not stat.S_ISBLK(dmode) or is_partition(dev):
1681 raise Error('not full block device; cannot zap', dev)
1682 if FREEBSD:
1683 zap_freebsd(dev)
1684 else:
1685 zap_linux(dev)
1686
1687
1688 def adjust_symlink(target, path):
1689 create = True
1690 if os.path.lexists(path):
1691 try:
1692 mode = os.lstat(path).st_mode
1693 if stat.S_ISREG(mode):
1694 LOG.debug('Removing old file %s', path)
1695 os.unlink(path)
1696 elif stat.S_ISLNK(mode):
1697 old = os.readlink(path)
1698 if old != target:
1699 LOG.debug('Removing old symlink %s -> %s', path, old)
1700 os.unlink(path)
1701 else:
1702 create = False
1703 except:
1704 raise Error('unable to remove (or adjust) old file (symlink)',
1705 path)
1706 if create:
1707 LOG.debug('Creating symlink %s -> %s', path, target)
1708 try:
1709 os.symlink(target, path)
1710 except:
1711 raise Error('unable to create symlink %s -> %s' % (path, target))
1712
1713
1714 def get_mount_options(cluster, fs_type):
1715 mount_options = get_conf(
1716 cluster,
1717 variable='osd_mount_options_{fstype}'.format(
1718 fstype=fs_type,
1719 ),
1720 )
1721 if mount_options is None:
1722 mount_options = get_conf(
1723 cluster,
1724 variable='osd_fs_mount_options_{fstype}'.format(
1725 fstype=fs_type,
1726 ),
1727 )
1728 else:
1729 # remove whitespaces
1730 mount_options = "".join(mount_options.split())
1731 return mount_options
1732
1733
1734 class Device(object):
1735
1736 def __init__(self, path, args):
1737 self.args = args
1738 self.path = path
1739 self.dev_size = None
1740 self.partitions = {}
1741 self.ptype_map = None
1742 assert not is_partition(self.path)
1743
1744 def create_partition(self, uuid, name, size=0, num=0):
1745 ptype = self.ptype_tobe_for_name(name)
1746 if num == 0:
1747 num = get_free_partition_index(dev=self.path)
1748 if size > 0:
1749 new = '--new={num}:0:+{size}M'.format(num=num, size=size)
1750 if size > self.get_dev_size():
1751 LOG.error('refusing to create %s on %s' % (name, self.path))
1752 LOG.error('%s size (%sM) is bigger than device (%sM)'
1753 % (name, size, self.get_dev_size()))
1754 raise Error('%s device size (%sM) is not big enough for %s'
1755 % (self.path, self.get_dev_size(), name))
1756 else:
1757 new = '--largest-new={num}'.format(num=num)
1758
1759 LOG.debug('Creating %s partition num %d size %d on %s',
1760 name, num, size, self.path)
1761 command_check_call(
1762 [
1763 'sgdisk',
1764 new,
1765 '--change-name={num}:ceph {name}'.format(num=num, name=name),
1766 '--partition-guid={num}:{uuid}'.format(num=num, uuid=uuid),
1767 '--typecode={num}:{uuid}'.format(num=num, uuid=ptype),
1768 '--mbrtogpt',
1769 '--',
1770 self.path,
1771 ],
1772 exit=True
1773 )
1774 update_partition(self.path, 'created')
1775 return num
1776
1777 def ptype_tobe_for_name(self, name):
1778 LOG.debug("name = " + name)
1779 if name == 'data':
1780 name = 'osd'
1781 if name == 'lockbox':
1782 if is_mpath(self.path):
1783 return PTYPE['mpath']['lockbox']['tobe']
1784 else:
1785 return PTYPE['regular']['lockbox']['tobe']
1786 if self.ptype_map is None:
1787 partition = DevicePartition.factory(
1788 path=self.path, dev=None, args=self.args)
1789 self.ptype_map = partition.ptype_map
1790 return self.ptype_map[name]['tobe']
1791
1792 def get_partition(self, num):
1793 if num not in self.partitions:
1794 dev = get_partition_dev(self.path, num)
1795 partition = DevicePartition.factory(
1796 path=self.path, dev=dev, args=self.args)
1797 partition.set_partition_number(num)
1798 self.partitions[num] = partition
1799 return self.partitions[num]
1800
1801 def get_dev_size(self):
1802 if self.dev_size is None:
1803 self.dev_size = get_dev_size(self.path)
1804 return self.dev_size
1805
1806 @staticmethod
1807 def factory(path, args):
1808 return Device(path, args)
1809
1810
1811 class DevicePartition(object):
1812
1813 def __init__(self, args):
1814 self.args = args
1815 self.num = None
1816 self.rawdev = None
1817 self.dev = None
1818 self.uuid = None
1819 self.ptype_map = None
1820 self.ptype = None
1821 self.set_variables_ptype()
1822
1823 def get_uuid(self):
1824 if self.uuid is None:
1825 self.uuid = get_partition_uuid(self.rawdev)
1826 return self.uuid
1827
1828 def get_ptype(self):
1829 if self.ptype is None:
1830 self.ptype = get_partition_type(self.rawdev)
1831 return self.ptype
1832
1833 def set_partition_number(self, num):
1834 self.num = num
1835
1836 def get_partition_number(self):
1837 return self.num
1838
1839 def set_dev(self, dev):
1840 self.dev = dev
1841 self.rawdev = dev
1842
1843 def get_dev(self):
1844 return self.dev
1845
1846 def get_rawdev(self):
1847 return self.rawdev
1848
1849 def set_variables_ptype(self):
1850 self.ptype_map = PTYPE['regular']
1851
1852 def ptype_for_name(self, name):
1853 return self.ptype_map[name]['ready']
1854
1855 @staticmethod
1856 @retry(OSError)
1857 def factory(path, dev, args):
1858 dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
1859 if ((path is not None and is_mpath(path)) or
1860 (dev is not None and is_mpath(dev))):
1861 partition = DevicePartitionMultipath(args)
1862 elif dmcrypt_type == 'luks':
1863 partition = DevicePartitionCryptLuks(args)
1864 elif dmcrypt_type == 'plain':
1865 partition = DevicePartitionCryptPlain(args)
1866 else:
1867 partition = DevicePartition(args)
1868 partition.set_dev(dev)
1869 return partition
1870
1871
1872 class DevicePartitionMultipath(DevicePartition):
1873
1874 def set_variables_ptype(self):
1875 self.ptype_map = PTYPE['mpath']
1876
1877
1878 class DevicePartitionCrypt(DevicePartition):
1879
1880 def __init__(self, args):
1881 super(DevicePartitionCrypt, self).__init__(args)
1882 self.osd_dm_key = None
1883 self.cryptsetup_parameters = CryptHelpers.get_cryptsetup_parameters(
1884 self.args)
1885 self.dmcrypt_type = CryptHelpers.get_dmcrypt_type(self.args)
1886 self.dmcrypt_keysize = CryptHelpers.get_dmcrypt_keysize(self.args)
1887
1888 def setup_crypt(self):
1889 pass
1890
1891 def map(self):
1892 self.setup_crypt()
1893 self.dev = _dmcrypt_map(
1894 rawdev=self.rawdev,
1895 key=self.osd_dm_key,
1896 _uuid=self.get_uuid(),
1897 cryptsetup_parameters=self.cryptsetup_parameters,
1898 luks=self.luks(),
1899 format_dev=True,
1900 )
1901
1902 def unmap(self):
1903 self.setup_crypt()
1904 dmcrypt_unmap(self.get_uuid())
1905 self.dev = self.rawdev
1906
1907 def format(self):
1908 self.setup_crypt()
1909 self.map()
1910
1911
1912 class DevicePartitionCryptPlain(DevicePartitionCrypt):
1913
1914 def luks(self):
1915 return False
1916
1917 def setup_crypt(self):
1918 if self.osd_dm_key is not None:
1919 return
1920
1921 self.cryptsetup_parameters += ['--key-size', str(self.dmcrypt_keysize)]
1922
1923 self.osd_dm_key = get_dmcrypt_key(
1924 self.get_uuid(), self.args.dmcrypt_key_dir,
1925 False)
1926
1927 def set_variables_ptype(self):
1928 self.ptype_map = PTYPE['plain']
1929
1930
1931 class DevicePartitionCryptLuks(DevicePartitionCrypt):
1932
1933 def luks(self):
1934 return True
1935
1936 def setup_crypt(self):
1937 if self.osd_dm_key is not None:
1938 return
1939
1940 if self.dmcrypt_keysize == 1024:
1941 # We don't force this into the cryptsetup_parameters,
1942 # as we want the cryptsetup defaults
1943 # to prevail for the actual LUKS key lengths.
1944 pass
1945 else:
1946 self.cryptsetup_parameters += ['--key-size',
1947 str(self.dmcrypt_keysize)]
1948
1949 self.osd_dm_key = get_dmcrypt_key(
1950 self.get_uuid(), self.args.dmcrypt_key_dir,
1951 True)
1952
1953 def set_variables_ptype(self):
1954 self.ptype_map = PTYPE['luks']
1955
1956
1957 class Prepare(object):
1958
1959 def __init__(self, args):
1960 self.args = args
1961
1962 @staticmethod
1963 def parser():
1964 parser = argparse.ArgumentParser(add_help=False)
1965 parser.add_argument(
1966 '--cluster',
1967 metavar='NAME',
1968 default='ceph',
1969 help='cluster name to assign this disk to',
1970 )
1971 parser.add_argument(
1972 '--cluster-uuid',
1973 metavar='UUID',
1974 help='cluster uuid to assign this disk to',
1975 )
1976 parser.add_argument(
1977 '--osd-uuid',
1978 metavar='UUID',
1979 help='unique OSD uuid to assign this disk to',
1980 )
1981 parser.add_argument(
1982 '--osd-id',
1983 metavar='ID',
1984 help='unique OSD id to assign this disk to',
1985 )
1986 parser.add_argument(
1987 '--crush-device-class',
1988 help='crush device class to assign this disk to',
1989 )
1990 parser.add_argument(
1991 '--dmcrypt',
1992 action='store_true', default=None,
1993 help='encrypt DATA and/or JOURNAL devices with dm-crypt',
1994 )
1995 parser.add_argument(
1996 '--dmcrypt-key-dir',
1997 metavar='KEYDIR',
1998 default='/etc/ceph/dmcrypt-keys',
1999 help='directory where dm-crypt keys are stored',
2000 )
2001 parser.add_argument(
2002 '--prepare-key',
2003 metavar='PATH',
2004 help='bootstrap-osd keyring path template (%(default)s)',
2005 default='{statedir}/bootstrap-osd/{cluster}.keyring',
2006 dest='prepare_key_template',
2007 )
2008 parser.add_argument(
2009 '--no-locking',
2010 action='store_true', default=None,
2011 help='let many prepare\'s run in parallel',
2012 )
2013 return parser
2014
2015 @staticmethod
2016 def set_subparser(subparsers):
2017 parents = [
2018 Prepare.parser(),
2019 PrepareData.parser(),
2020 Lockbox.parser(),
2021 ]
2022 parents.extend(PrepareFilestore.parent_parsers())
2023 parents.extend(PrepareBluestore.parent_parsers())
2024 parser = subparsers.add_parser(
2025 'prepare',
2026 parents=parents,
2027 formatter_class=argparse.RawDescriptionHelpFormatter,
2028 description=textwrap.fill(textwrap.dedent("""\
2029 If the --bluestore argument is given, a bluestore objectstore
2030 will be created. If --filestore is provided, a legacy FileStore
2031 objectstore will be created. If neither is specified, we default
2032 to BlueStore.
2033
2034 When an entire device is prepared for bluestore, two
2035 partitions are created. The first partition is for metadata,
2036 the second partition is for blocks that contain data.
2037
2038 Unless explicitly specified with --block.db or
2039 --block.wal, the bluestore DB and WAL data is stored on
2040 the main block device. For instance:
2041
2042 ceph-disk prepare --bluestore /dev/sdc
2043
2044 Will create
2045
2046 /dev/sdc1 for osd metadata
2047 /dev/sdc2 for block, db, and wal data (the rest of the disk)
2048
2049
2050 If either --block.db or --block.wal are specified to be
2051 the same whole device, they will be created as partition
2052 three and four respectively. For instance:
2053
2054 ceph-disk prepare --bluestore \\
2055 --block.db /dev/sdc \\
2056 --block.wal /dev/sdc \\
2057 /dev/sdc
2058
2059 Will create
2060
2061 /dev/sdc1 for osd metadata
2062 /dev/sdc2 for block (the rest of the disk)
2063 /dev/sdc3 for db
2064 /dev/sdc4 for wal
2065
2066 """)),
2067 help='Prepare a directory or disk for a Ceph OSD',
2068 )
2069 parser.set_defaults(
2070 func=Prepare.main,
2071 )
2072 return parser
2073
2074 def prepare(self):
2075 if self.args.no_locking:
2076 self._prepare()
2077 else:
2078 with prepare_lock:
2079 self._prepare()
2080
2081 @staticmethod
2082 def factory(args):
2083 if args.bluestore:
2084 return PrepareBluestore(args)
2085 else:
2086 return PrepareFilestore(args)
2087
2088 @staticmethod
2089 def main(args):
2090 Prepare.factory(args).prepare()
2091
2092
2093 class PrepareFilestore(Prepare):
2094
2095 def __init__(self, args):
2096 super(PrepareFilestore, self).__init__(args)
2097 if args.dmcrypt:
2098 self.lockbox = Lockbox(args)
2099 self.data = PrepareFilestoreData(args)
2100 self.journal = PrepareJournal(args)
2101
2102 @staticmethod
2103 def parent_parsers():
2104 return [
2105 PrepareJournal.parser(),
2106 ]
2107
2108 def _prepare(self):
2109 if self.data.args.dmcrypt:
2110 self.lockbox.prepare()
2111 self.data.prepare(self.journal)
2112
2113
2114 class PrepareBluestore(Prepare):
2115
2116 def __init__(self, args):
2117 super(PrepareBluestore, self).__init__(args)
2118 if args.dmcrypt:
2119 self.lockbox = Lockbox(args)
2120 self.data = PrepareBluestoreData(args)
2121 self.block = PrepareBluestoreBlock(args)
2122 self.blockdb = PrepareBluestoreBlockDB(args)
2123 self.blockwal = PrepareBluestoreBlockWAL(args)
2124
2125 @staticmethod
2126 def parser():
2127 parser = argparse.ArgumentParser(add_help=False)
2128 parser.add_argument(
2129 '--bluestore',
2130 dest='bluestore',
2131 action='store_true', default=True,
2132 help='bluestore objectstore',
2133 )
2134 parser.add_argument(
2135 '--filestore',
2136 dest='bluestore',
2137 action='store_false',
2138 help='filestore objectstore',
2139 )
2140 return parser
2141
2142 @staticmethod
2143 def parent_parsers():
2144 return [
2145 PrepareBluestore.parser(),
2146 PrepareBluestoreBlock.parser(),
2147 PrepareBluestoreBlockDB.parser(),
2148 PrepareBluestoreBlockWAL.parser(),
2149 ]
2150
2151 def _prepare(self):
2152 if self.data.args.dmcrypt:
2153 self.lockbox.prepare()
2154 to_prepare_list = []
2155 if getattr(self.data.args, 'block.db'):
2156 to_prepare_list.append(self.blockdb)
2157 if getattr(self.data.args, 'block.wal'):
2158 to_prepare_list.append(self.blockwal)
2159 to_prepare_list.append(self.block)
2160 self.data.prepare(*to_prepare_list)
2161
2162
2163 class Space(object):
2164
2165 NAMES = ('block', 'journal', 'block.db', 'block.wal')
2166
2167
2168 class PrepareSpace(object):
2169
2170 NONE = 0
2171 FILE = 1
2172 DEVICE = 2
2173
2174 def __init__(self, args):
2175 self.args = args
2176 self.set_type()
2177 self.space_size = self.get_space_size()
2178 if getattr(self.args, self.name + '_uuid') is None:
2179 setattr(self.args, self.name + '_uuid', str(uuid.uuid4()))
2180 self.space_symlink = None
2181 self.space_dmcrypt = None
2182
2183 def set_type(self):
2184 name = self.name
2185 args = self.args
2186 if (self.wants_space() and
2187 dev_is_diskdevice(args.data) and
2188 not is_partition(args.data) and
2189 getattr(args, name) is None and
2190 getattr(args, name + '_file') is None):
2191 LOG.info('Will colocate %s with data on %s',
2192 name, args.data)
2193 setattr(args, name, args.data)
2194
2195 if getattr(args, name) is None:
2196 if getattr(args, name + '_dev'):
2197 raise Error('%s is unspecified; not a block device' %
2198 name.capitalize(), getattr(args, name))
2199 self.type = self.NONE
2200 return
2201
2202 if not os.path.exists(getattr(args, name)):
2203 if getattr(args, name + '_dev'):
2204 raise Error('%s does not exist; not a block device' %
2205 name.capitalize(), getattr(args, name))
2206 self.type = self.FILE
2207 return
2208
2209 mode = os.stat(getattr(args, name)).st_mode
2210 if stmode_is_diskdevice(mode):
2211 if getattr(args, name + '_file'):
2212 raise Error('%s is not a regular file' % name.capitalize,
2213 getattr(args, name))
2214 self.type = self.DEVICE
2215 return
2216
2217 if stat.S_ISREG(mode):
2218 if getattr(args, name + '_dev'):
2219 raise Error('%s is not a block device' % name.capitalize,
2220 getattr(args, name))
2221 self.type = self.FILE
2222 return
2223
2224 raise Error('%s %s is neither a block device nor regular file' %
2225 (name.capitalize, getattr(args, name)))
2226
2227 def is_none(self):
2228 return self.type == self.NONE
2229
2230 def is_file(self):
2231 return self.type == self.FILE
2232
2233 def is_device(self):
2234 return self.type == self.DEVICE
2235
2236 @staticmethod
2237 def parser(name, positional=True):
2238 parser = argparse.ArgumentParser(add_help=False)
2239 parser.add_argument(
2240 '--%s-uuid' % name,
2241 metavar='UUID',
2242 help='unique uuid to assign to the %s' % name,
2243 )
2244 parser.add_argument(
2245 '--%s-file' % name,
2246 action='store_true', default=None,
2247 help='verify that %s is a file' % name.upper(),
2248 )
2249 parser.add_argument(
2250 '--%s-dev' % name,
2251 action='store_true', default=None,
2252 help='verify that %s is a block device' % name.upper(),
2253 )
2254
2255 if positional:
2256 parser.add_argument(
2257 name,
2258 metavar=name.upper(),
2259 nargs='?',
2260 help=('path to OSD %s disk block device;' % name +
2261 ' leave out to store %s in file' % name),
2262 )
2263 return parser
2264
2265 def wants_space(self):
2266 return True
2267
2268 def populate_data_path(self, path):
2269 if self.type == self.DEVICE:
2270 self.populate_data_path_device(path)
2271 elif self.type == self.FILE:
2272 self.populate_data_path_file(path)
2273 elif self.type == self.NONE:
2274 pass
2275 else:
2276 raise Error('unexpected type ', self.type)
2277
2278 def populate_data_path_file(self, path):
2279 space_uuid = self.name + '_uuid'
2280 if getattr(self.args, space_uuid) is not None:
2281 write_one_line(path, space_uuid,
2282 getattr(self.args, space_uuid))
2283 if self.space_symlink is not None:
2284 adjust_symlink(self.space_symlink,
2285 os.path.join(path, self.name))
2286
2287 def populate_data_path_device(self, path):
2288 self.populate_data_path_file(path)
2289
2290 if self.space_dmcrypt is not None:
2291 adjust_symlink(self.space_dmcrypt,
2292 os.path.join(path, self.name + '_dmcrypt'))
2293 else:
2294 try:
2295 os.unlink(os.path.join(path, self.name + '_dmcrypt'))
2296 except OSError:
2297 pass
2298
2299 def prepare(self):
2300 if self.type == self.DEVICE:
2301 self.prepare_device()
2302 elif self.type == self.FILE:
2303 self.prepare_file()
2304 elif self.type == self.NONE:
2305 pass
2306 else:
2307 raise Error('unexpected type ', self.type)
2308
2309 def prepare_file(self):
2310 space_filename = getattr(self.args, self.name)
2311 if not os.path.exists(space_filename):
2312 LOG.debug('Creating %s file %s with size 0'
2313 ' (ceph-osd will resize and allocate)',
2314 self.name,
2315 space_filename)
2316 space_file = open(space_filename, 'wb')
2317 space_file.close()
2318 path_set_context(space_filename)
2319
2320 LOG.debug('%s is file %s',
2321 self.name.capitalize(),
2322 space_filename)
2323 LOG.warning('OSD will not be hot-swappable if %s is '
2324 'not the same device as the osd data' %
2325 self.name)
2326 self.space_symlink = space_filename
2327
2328 def prepare_device(self):
2329 reusing_partition = False
2330
2331 if is_partition(getattr(self.args, self.name)):
2332 LOG.debug('%s %s is a partition',
2333 self.name.capitalize(), getattr(self.args, self.name))
2334 partition = DevicePartition.factory(
2335 path=None, dev=getattr(self.args, self.name), args=self.args)
2336 if isinstance(partition, DevicePartitionCrypt):
2337 raise Error(getattr(self.args, self.name) +
2338 ' partition already exists'
2339 ' and --dmcrypt specified')
2340 LOG.warning('OSD will not be hot-swappable' +
2341 ' if ' + self.name + ' is not' +
2342 ' the same device as the osd data')
2343 if partition.get_ptype() == partition.ptype_for_name(self.name):
2344 LOG.debug('%s %s was previously prepared with '
2345 'ceph-disk. Reusing it.',
2346 self.name.capitalize(),
2347 getattr(self.args, self.name))
2348 reusing_partition = True
2349 # Read and reuse the partition uuid from this journal's
2350 # previous life. We reuse the uuid instead of changing it
2351 # because udev does not reliably notice changes to an
2352 # existing partition's GUID. See
2353 # http://tracker.ceph.com/issues/10146
2354 setattr(self.args, self.name + '_uuid', partition.get_uuid())
2355 LOG.debug('Reusing %s with uuid %s',
2356 self.name,
2357 getattr(self.args, self.name + '_uuid'))
2358 else:
2359 LOG.warning('%s %s was not prepared with '
2360 'ceph-disk. Symlinking directly.',
2361 self.name.capitalize(),
2362 getattr(self.args, self.name))
2363 self.space_symlink = getattr(self.args, self.name)
2364 return
2365
2366 self.space_symlink = '/dev/disk/by-partuuid/{uuid}'.format(
2367 uuid=getattr(self.args, self.name + '_uuid'))
2368
2369 if self.args.dmcrypt:
2370 self.space_dmcrypt = self.space_symlink
2371 self.space_symlink = '/dev/mapper/{uuid}'.format(
2372 uuid=getattr(self.args, self.name + '_uuid'))
2373
2374 if reusing_partition:
2375 # confirm that the space_symlink exists. It should since
2376 # this was an active space
2377 # in the past. Continuing otherwise would be futile.
2378 assert os.path.exists(self.space_symlink)
2379 return
2380
2381 num = self.desired_partition_number()
2382
2383 if num == 0:
2384 LOG.warning('OSD will not be hot-swappable if %s '
2385 'is not the same device as the osd data',
2386 self.name)
2387
2388 device = Device.factory(getattr(self.args, self.name), self.args)
2389 num = device.create_partition(
2390 uuid=getattr(self.args, self.name + '_uuid'),
2391 name=self.name,
2392 size=self.space_size,
2393 num=num)
2394
2395 partition = device.get_partition(num)
2396
2397 LOG.debug('%s is GPT partition %s',
2398 self.name.capitalize(),
2399 self.space_symlink)
2400
2401 if isinstance(partition, DevicePartitionCrypt):
2402 partition.format()
2403 partition.map()
2404
2405 command_check_call(
2406 [
2407 'sgdisk',
2408 '--typecode={num}:{uuid}'.format(
2409 num=num,
2410 uuid=partition.ptype_for_name(self.name),
2411 ),
2412 '--',
2413 getattr(self.args, self.name),
2414 ],
2415 )
2416 update_partition(getattr(self.args, self.name), 'prepared')
2417
2418 LOG.debug('%s is GPT partition %s',
2419 self.name.capitalize(),
2420 self.space_symlink)
2421
2422
2423 class PrepareJournal(PrepareSpace):
2424
2425 def __init__(self, args):
2426 self.name = 'journal'
2427 (self.allows_journal,
2428 self.wants_journal,
2429 self.needs_journal) = check_journal_reqs(args)
2430
2431 if args.journal and not self.allows_journal:
2432 raise Error('journal specified but not allowed by osd backend')
2433
2434 super(PrepareJournal, self).__init__(args)
2435
2436 def wants_space(self):
2437 return self.wants_journal
2438
2439 def get_space_size(self):
2440 return int(get_conf_with_default(
2441 cluster=self.args.cluster,
2442 variable='osd_journal_size',
2443 ))
2444
2445 def desired_partition_number(self):
2446 if self.args.journal == self.args.data:
2447 # we're sharing the disk between osd data and journal;
2448 # make journal be partition number 2
2449 num = 2
2450 else:
2451 num = 0
2452 return num
2453
2454 @staticmethod
2455 def parser():
2456 return PrepareSpace.parser('journal')
2457
2458
2459 class PrepareBluestoreBlock(PrepareSpace):
2460
2461 def __init__(self, args):
2462 self.name = 'block'
2463 super(PrepareBluestoreBlock, self).__init__(args)
2464
2465 def get_space_size(self):
2466 block_size = get_conf(
2467 cluster=self.args.cluster,
2468 variable='bluestore_block_size',
2469 )
2470
2471 if block_size is None:
2472 return 0 # get as much space as possible
2473 else:
2474 return int(block_size) / 1048576 # MB
2475
2476 def desired_partition_number(self):
2477 if self.args.block == self.args.data:
2478 num = 2
2479 else:
2480 num = 0
2481 return num
2482
2483 @staticmethod
2484 def parser():
2485 return PrepareSpace.parser('block')
2486
2487
2488 class PrepareBluestoreBlockDB(PrepareSpace):
2489
2490 def __init__(self, args):
2491 self.name = 'block.db'
2492 super(PrepareBluestoreBlockDB, self).__init__(args)
2493
2494 def get_space_size(self):
2495 block_db_size = get_conf(
2496 cluster=self.args.cluster,
2497 variable='bluestore_block_db_size',
2498 )
2499
2500 if block_db_size is None or int(block_db_size) == 0:
2501 block_size = get_conf(
2502 cluster=self.args.cluster,
2503 variable='bluestore_block_size',
2504 )
2505 if block_size is None:
2506 return 1024 # MB
2507 size = int(block_size) / 100 / 1048576
2508 return max(size, 1024) # MB
2509 else:
2510 return int(block_db_size) / 1048576 # MB
2511
2512 def desired_partition_number(self):
2513 if getattr(self.args, 'block.db') == self.args.data:
2514 num = 3
2515 else:
2516 num = 0
2517 return num
2518
2519 def wants_space(self):
2520 return False
2521
2522 @staticmethod
2523 def parser():
2524 parser = PrepareSpace.parser('block.db', positional=False)
2525 parser.add_argument(
2526 '--block.db',
2527 metavar='BLOCKDB',
2528 help='path to the device or file for bluestore block.db',
2529 )
2530 return parser
2531
2532
2533 class PrepareBluestoreBlockWAL(PrepareSpace):
2534
2535 def __init__(self, args):
2536 self.name = 'block.wal'
2537 super(PrepareBluestoreBlockWAL, self).__init__(args)
2538
2539 def get_space_size(self):
2540 block_size = get_conf(
2541 cluster=self.args.cluster,
2542 variable='bluestore_block_wal_size',
2543 )
2544
2545 if block_size is None:
2546 return 576 # MB, default value
2547 else:
2548 return int(block_size) / 1048576 # MB
2549
2550 def desired_partition_number(self):
2551 if getattr(self.args, 'block.wal') == self.args.data:
2552 num = 4
2553 else:
2554 num = 0
2555 return num
2556
2557 def wants_space(self):
2558 return False
2559
2560 @staticmethod
2561 def parser():
2562 parser = PrepareSpace.parser('block.wal', positional=False)
2563 parser.add_argument(
2564 '--block.wal',
2565 metavar='BLOCKWAL',
2566 help='path to the device or file for bluestore block.wal',
2567 )
2568 return parser
2569
2570
2571 class CryptHelpers(object):
2572
2573 @staticmethod
2574 def get_cryptsetup_parameters(args):
2575 cryptsetup_parameters_str = get_conf(
2576 cluster=args.cluster,
2577 variable='osd_cryptsetup_parameters',
2578 )
2579 if cryptsetup_parameters_str is None:
2580 return []
2581 else:
2582 return shlex.split(cryptsetup_parameters_str)
2583
2584 @staticmethod
2585 def get_dmcrypt_keysize(args):
2586 dmcrypt_keysize_str = get_conf(
2587 cluster=args.cluster,
2588 variable='osd_dmcrypt_key_size',
2589 )
2590 dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
2591 if dmcrypt_type == 'luks':
2592 if dmcrypt_keysize_str is None:
2593 # As LUKS will hash the 'passphrase' in .luks.key
2594 # into a key, set a large default
2595 # so if not updated for some time, it is still a
2596 # reasonable value.
2597 #
2598 return 1024
2599 else:
2600 return int(dmcrypt_keysize_str)
2601 elif dmcrypt_type == 'plain':
2602 if dmcrypt_keysize_str is None:
2603 # This value is hard-coded in the udev script
2604 return 256
2605 else:
2606 LOG.warning('ensure the 95-ceph-osd.rules file has '
2607 'been copied to /etc/udev/rules.d '
2608 'and modified to call cryptsetup '
2609 'with --key-size=%s' % dmcrypt_keysize_str)
2610 return int(dmcrypt_keysize_str)
2611 else:
2612 return 0
2613
2614 @staticmethod
2615 def get_dmcrypt_type(args):
2616 if hasattr(args, 'dmcrypt') and args.dmcrypt:
2617 dmcrypt_type = get_conf(
2618 cluster=args.cluster,
2619 variable='osd_dmcrypt_type',
2620 )
2621
2622 if dmcrypt_type is None or dmcrypt_type == 'luks':
2623 return 'luks'
2624 elif dmcrypt_type == 'plain':
2625 return 'plain'
2626 else:
2627 raise Error('invalid osd_dmcrypt_type parameter '
2628 '(must be luks or plain): ', dmcrypt_type)
2629 else:
2630 return None
2631
2632
2633 class Secrets(object):
2634
2635 def __init__(self):
2636 secret, stderr, ret = command(['ceph-authtool', '--gen-print-key'])
2637 LOG.debug("stderr " + stderr)
2638 assert ret == 0
2639 self.keys = {
2640 'cephx_secret': secret.strip(),
2641 }
2642
2643 def write_osd_keyring(self, keyring, osd_id):
2644 command_check_call(
2645 [
2646 'ceph-authtool', keyring,
2647 '--create-keyring',
2648 '--name', 'osd.' + str(osd_id),
2649 '--add-key', self.keys['cephx_secret'],
2650 ])
2651 path_set_context(keyring)
2652
2653 def get_json(self):
2654 return bytearray(json.dumps(self.keys), 'ascii')
2655
2656
2657 class LockboxSecrets(Secrets):
2658
2659 def __init__(self, args):
2660 super(LockboxSecrets, self).__init__()
2661
2662 key_size = CryptHelpers.get_dmcrypt_keysize(args)
2663 key = open('/dev/urandom', 'rb').read(key_size / 8)
2664 base64_key = base64.b64encode(key).decode('ascii')
2665
2666 secret, stderr, ret = command(['ceph-authtool', '--gen-print-key'])
2667 LOG.debug("stderr " + stderr)
2668 assert ret == 0
2669
2670 self.keys.update({
2671 'dmcrypt_key': base64_key,
2672 'cephx_lockbox_secret': secret.strip(),
2673 })
2674
2675 def write_lockbox_keyring(self, path, osd_uuid):
2676 keyring = os.path.join(path, 'keyring')
2677 command_check_call(
2678 [
2679 'ceph-authtool', keyring,
2680 '--create-keyring',
2681 '--name', 'client.osd-lockbox.' + osd_uuid,
2682 '--add-key', self.keys['cephx_lockbox_secret'],
2683 ])
2684 path_set_context(keyring)
2685
2686
2687 class Lockbox(object):
2688
2689 def __init__(self, args):
2690 self.args = args
2691 self.partition = None
2692 self.device = None
2693
2694 if hasattr(self.args, 'lockbox') and self.args.lockbox is None:
2695 self.args.lockbox = self.args.data
2696
2697 def set_partition(self, partition):
2698 self.partition = partition
2699
2700 @staticmethod
2701 def parser():
2702 parser = argparse.ArgumentParser(add_help=False)
2703 parser.add_argument(
2704 '--lockbox',
2705 help='path to the device to store the lockbox',
2706 )
2707 parser.add_argument(
2708 '--lockbox-uuid',
2709 metavar='UUID',
2710 help='unique lockbox uuid',
2711 )
2712 return parser
2713
2714 def create_partition(self):
2715 self.device = Device.factory(self.args.lockbox, argparse.Namespace())
2716 partition_number = 5
2717 self.device.create_partition(uuid=self.args.lockbox_uuid,
2718 name='lockbox',
2719 num=partition_number,
2720 size=10) # MB
2721 return self.device.get_partition(partition_number)
2722
2723 def set_or_create_partition(self):
2724 if is_partition(self.args.lockbox):
2725 LOG.debug('OSD lockbox device %s is a partition',
2726 self.args.lockbox)
2727 self.partition = DevicePartition.factory(
2728 path=None, dev=self.args.lockbox, args=self.args)
2729 ptype = self.partition.get_ptype()
2730 ready = Ptype.get_ready_by_name('lockbox')
2731 if ptype not in ready:
2732 LOG.warning('incorrect partition UUID: %s, expected %s'
2733 % (ptype, str(ready)))
2734 else:
2735 LOG.debug('Creating osd partition on %s',
2736 self.args.lockbox)
2737 self.partition = self.create_partition()
2738
2739 def create_key(self):
2740 cluster = self.args.cluster
2741 bootstrap = self.args.prepare_key_template.format(cluster=cluster,
2742 statedir=STATEDIR)
2743 path = self.get_mount_point()
2744 secrets = LockboxSecrets(self.args)
2745 id_arg = self.args.osd_id and [self.args.osd_id] or []
2746 osd_id = command_with_stdin(
2747 [
2748 'ceph',
2749 '--cluster', cluster,
2750 '--name', 'client.bootstrap-osd',
2751 '--keyring', bootstrap,
2752 '-i', '-',
2753 'osd', 'new', self.args.osd_uuid,
2754 ] + id_arg,
2755 secrets.get_json()
2756 )
2757 secrets.write_lockbox_keyring(path, self.args.osd_uuid)
2758 osd_id = must_be_one_line(osd_id)
2759 check_osd_id(osd_id)
2760 write_one_line(path, 'whoami', osd_id)
2761 secrets.write_osd_keyring(os.path.join(path, 'osd_keyring'), osd_id)
2762 write_one_line(path, 'key-management-mode', KEY_MANAGEMENT_MODE_V1)
2763
2764 def symlink_spaces(self, path):
2765 target = self.get_mount_point()
2766 for name in Space.NAMES:
2767 if (hasattr(self.args, name + '_uuid') and
2768 getattr(self.args, name + '_uuid')):
2769 uuid = getattr(self.args, name + '_uuid')
2770 symlink = os.path.join(STATEDIR, 'osd-lockbox', uuid)
2771 adjust_symlink(target, symlink)
2772 write_one_line(path, name + '-uuid', uuid)
2773
2774 def populate(self):
2775 maybe_mkdir(os.path.join(STATEDIR, 'osd-lockbox'))
2776 args = ['mkfs', '-t', 'ext4', self.partition.get_dev()]
2777 LOG.debug('Creating lockbox fs on %s: ' + str(" ".join(args)))
2778 command_check_call(args)
2779 path = self.get_mount_point()
2780 maybe_mkdir(path)
2781 args = ['mount', '-t', 'ext4', self.partition.get_dev(), path]
2782 LOG.debug('Mounting lockbox ' + str(" ".join(args)))
2783 command_check_call(args)
2784 write_one_line(path, 'osd-uuid', self.args.osd_uuid)
2785 if self.args.cluster_uuid is None:
2786 self.args.cluster_uuid = get_fsid(cluster=self.args.cluster)
2787 write_one_line(path, 'ceph_fsid', self.args.cluster_uuid)
2788 self.create_key()
2789 self.symlink_spaces(path)
2790 write_one_line(path, 'magic', CEPH_LOCKBOX_ONDISK_MAGIC)
2791 if self.device is not None:
2792 command_check_call(
2793 [
2794 'sgdisk',
2795 '--typecode={num}:{uuid}'.format(
2796 num=self.partition.get_partition_number(),
2797 uuid=self.partition.ptype_for_name('lockbox'),
2798 ),
2799 '--',
2800 get_partition_base(self.partition.get_dev()),
2801 ],
2802 )
2803
2804 def get_mount_point(self):
2805 return os.path.join(STATEDIR, 'osd-lockbox', self.args.osd_uuid)
2806
2807 def get_osd_uuid(self):
2808 return self.args.osd_uuid
2809
2810 def activate(self):
2811 path = is_mounted(self.partition.get_dev())
2812 if path:
2813 LOG.info("Lockbox already mounted at " + path)
2814 return
2815
2816 path = tempfile.mkdtemp(
2817 prefix='mnt.',
2818 dir=STATEDIR + '/tmp',
2819 )
2820 args = ['mount', '-t', 'ext4', '-o', 'ro',
2821 self.partition.get_dev(),
2822 path]
2823 LOG.debug('Mounting lockbox temporarily ' + str(" ".join(args)))
2824 command_check_call(args)
2825 self.args.osd_uuid = get_oneliner(path, 'osd-uuid')
2826 command_check_call(['umount', path])
2827 LOG.debug('Mounting lockbox readonly ' + str(" ".join(args)))
2828 args = ['mount', '-t', 'ext4', '-o', 'ro',
2829 self.partition.get_dev(),
2830 self.get_mount_point()]
2831 command_check_call(args)
2832 for name in Space.NAMES + ('osd',):
2833 uuid_path = os.path.join(self.get_mount_point(), name + '-uuid')
2834 if os.path.exists(uuid_path):
2835 uuid = get_oneliner(self.get_mount_point(), name + '-uuid')
2836 dev = os.path.join('/dev/disk/by-partuuid/', uuid.lower())
2837 args = ['ceph-disk', 'trigger', dev]
2838 command_check_call(args)
2839
2840 def prepare(self):
2841 verify_not_in_use(self.args.lockbox, check_partitions=True)
2842 self.set_or_create_partition()
2843 self.populate()
2844
2845
2846 class PrepareData(object):
2847
2848 FILE = 1
2849 DEVICE = 2
2850
2851 def __init__(self, args):
2852
2853 self.args = args
2854 self.partition = None
2855 self.set_type()
2856 if self.args.cluster_uuid is None:
2857 self.args.cluster_uuid = get_fsid(cluster=self.args.cluster)
2858
2859 if self.args.osd_uuid is None:
2860 self.args.osd_uuid = str(uuid.uuid4())
2861
2862 def set_type(self):
2863 dmode = os.stat(self.args.data).st_mode
2864
2865 if stat.S_ISDIR(dmode):
2866 self.type = self.FILE
2867 elif stmode_is_diskdevice(dmode):
2868 self.type = self.DEVICE
2869 else:
2870 raise Error('not a dir or block device', self.args.data)
2871
2872 def is_file(self):
2873 return self.type == self.FILE
2874
2875 def is_device(self):
2876 return self.type == self.DEVICE
2877
2878 @staticmethod
2879 def parser():
2880 parser = argparse.ArgumentParser(add_help=False)
2881 parser.add_argument(
2882 '--fs-type',
2883 help='file system type to use (e.g. "ext4")',
2884 )
2885 parser.add_argument(
2886 '--zap-disk',
2887 action='store_true', default=None,
2888 help='destroy the partition table (and content) of a disk',
2889 )
2890 parser.add_argument(
2891 '--data-dir',
2892 action='store_true', default=None,
2893 help='verify that DATA is a dir',
2894 )
2895 parser.add_argument(
2896 '--data-dev',
2897 action='store_true', default=None,
2898 help='verify that DATA is a block device',
2899 )
2900 parser.add_argument(
2901 'data',
2902 metavar='DATA',
2903 help='path to OSD data (a disk block device or directory)',
2904 )
2905 return parser
2906
2907 def populate_data_path_file(self, path, *to_prepare_list):
2908 self.populate_data_path(path, *to_prepare_list)
2909
2910 def populate_data_path(self, path, *to_prepare_list):
2911 if os.path.exists(os.path.join(path, 'magic')):
2912 LOG.debug('Data dir %s already exists', path)
2913 return
2914 else:
2915 LOG.debug('Preparing osd data dir %s', path)
2916
2917 if self.args.osd_uuid is None:
2918 self.args.osd_uuid = str(uuid.uuid4())
2919
2920 write_one_line(path, 'ceph_fsid', self.args.cluster_uuid)
2921 write_one_line(path, 'fsid', self.args.osd_uuid)
2922 if self.args.osd_id:
2923 write_one_line(path, 'wanttobe', self.args.osd_id)
2924 if self.args.crush_device_class:
2925 write_one_line(path, 'crush_device_class',
2926 self.args.crush_device_class)
2927 write_one_line(path, 'magic', CEPH_OSD_ONDISK_MAGIC)
2928
2929 for to_prepare in to_prepare_list:
2930 to_prepare.populate_data_path(path)
2931
2932 def prepare(self, *to_prepare_list):
2933 if self.type == self.DEVICE:
2934 self.prepare_device(*to_prepare_list)
2935 elif self.type == self.FILE:
2936 self.prepare_file(*to_prepare_list)
2937 else:
2938 raise Error('unexpected type ', self.type)
2939
2940 def prepare_file(self, *to_prepare_list):
2941
2942 if not os.path.exists(self.args.data):
2943 raise Error('data path for directory does not exist',
2944 self.args.data)
2945
2946 if self.args.data_dev:
2947 raise Error('data path is not a block device', self.args.data)
2948
2949 for to_prepare in to_prepare_list:
2950 to_prepare.prepare()
2951
2952 self.populate_data_path_file(self.args.data, *to_prepare_list)
2953
2954 def sanity_checks(self):
2955 if not os.path.exists(self.args.data):
2956 raise Error('data path for device does not exist',
2957 self.args.data)
2958 verify_not_in_use(self.args.data,
2959 check_partitions=not self.args.dmcrypt)
2960
2961 def set_variables(self):
2962 if self.args.fs_type is None:
2963 self.args.fs_type = get_conf(
2964 cluster=self.args.cluster,
2965 variable='osd_mkfs_type',
2966 )
2967 if self.args.fs_type is None:
2968 self.args.fs_type = get_conf(
2969 cluster=self.args.cluster,
2970 variable='osd_fs_type',
2971 )
2972 if self.args.fs_type is None:
2973 self.args.fs_type = DEFAULT_FS_TYPE
2974
2975 self.mkfs_args = get_conf(
2976 cluster=self.args.cluster,
2977 variable='osd_mkfs_options_{fstype}'.format(
2978 fstype=self.args.fs_type,
2979 ),
2980 )
2981 if self.mkfs_args is None:
2982 self.mkfs_args = get_conf(
2983 cluster=self.args.cluster,
2984 variable='osd_fs_mkfs_options_{fstype}'.format(
2985 fstype=self.args.fs_type,
2986 ),
2987 )
2988
2989 self.mount_options = get_mount_options(cluster=self.args.cluster,
2990 fs_type=self.args.fs_type)
2991
2992 if self.args.osd_uuid is None:
2993 self.args.osd_uuid = str(uuid.uuid4())
2994
2995 def prepare_device(self, *to_prepare_list):
2996 self.sanity_checks()
2997 self.set_variables()
2998 if self.args.zap_disk is not None:
2999 zap(self.args.data)
3000
3001 def create_data_partition(self):
3002 device = Device.factory(self.args.data, self.args)
3003 partition_number = 1
3004 device.create_partition(uuid=self.args.osd_uuid,
3005 name='data',
3006 num=partition_number,
3007 size=self.get_space_size())
3008 return device.get_partition(partition_number)
3009
3010 def set_data_partition(self):
3011 if is_partition(self.args.data):
3012 LOG.debug('OSD data device %s is a partition',
3013 self.args.data)
3014 self.partition = DevicePartition.factory(
3015 path=None, dev=self.args.data, args=self.args)
3016 ptype = self.partition.get_ptype()
3017 ready = Ptype.get_ready_by_name('osd')
3018 if ptype not in ready:
3019 LOG.warning('incorrect partition UUID: %s, expected %s'
3020 % (ptype, str(ready)))
3021 else:
3022 LOG.debug('Creating osd partition on %s',
3023 self.args.data)
3024 self.partition = self.create_data_partition()
3025
3026 def populate_data_path_device(self, *to_prepare_list):
3027 partition = self.partition
3028
3029 if isinstance(partition, DevicePartitionCrypt):
3030 partition.map()
3031
3032 try:
3033 args = [
3034 'mkfs',
3035 '-t',
3036 self.args.fs_type,
3037 ]
3038 if self.mkfs_args is not None:
3039 args.extend(self.mkfs_args.split())
3040 if self.args.fs_type == 'xfs':
3041 args.extend(['-f']) # always force
3042 else:
3043 args.extend(MKFS_ARGS.get(self.args.fs_type, []))
3044 args.extend([
3045 '--',
3046 partition.get_dev(),
3047 ])
3048 LOG.debug('Creating %s fs on %s',
3049 self.args.fs_type, partition.get_dev())
3050 command_check_call(args, exit=True)
3051
3052 path = mount(dev=partition.get_dev(),
3053 fstype=self.args.fs_type,
3054 options=self.mount_options)
3055
3056 try:
3057 self.populate_data_path(path, *to_prepare_list)
3058 finally:
3059 path_set_context(path)
3060 unmount(path)
3061 finally:
3062 if isinstance(partition, DevicePartitionCrypt):
3063 partition.unmap()
3064
3065 if not is_partition(self.args.data):
3066 command_check_call(
3067 [
3068 'sgdisk',
3069 '--typecode=%d:%s' % (partition.get_partition_number(),
3070 partition.ptype_for_name('osd')),
3071 '--',
3072 self.args.data,
3073 ],
3074 exit=True,
3075 )
3076 update_partition(self.args.data, 'prepared')
3077 command_check_call(['udevadm', 'trigger',
3078 '--action=add',
3079 '--sysname-match',
3080 os.path.basename(partition.rawdev)])
3081
3082
3083 class PrepareFilestoreData(PrepareData):
3084
3085 def get_space_size(self):
3086 return 0 # get as much space as possible
3087
3088 def prepare_device(self, *to_prepare_list):
3089 super(PrepareFilestoreData, self).prepare_device(*to_prepare_list)
3090 for to_prepare in to_prepare_list:
3091 to_prepare.prepare()
3092 self.set_data_partition()
3093 self.populate_data_path_device(*to_prepare_list)
3094
3095 def populate_data_path(self, path, *to_prepare_list):
3096 super(PrepareFilestoreData, self).populate_data_path(path,
3097 *to_prepare_list)
3098 write_one_line(path, 'type', 'filestore')
3099
3100
3101 class PrepareBluestoreData(PrepareData):
3102
3103 def get_space_size(self):
3104 return 100 # MB
3105
3106 def prepare_device(self, *to_prepare_list):
3107 super(PrepareBluestoreData, self).prepare_device(*to_prepare_list)
3108 self.set_data_partition()
3109 for to_prepare in to_prepare_list:
3110 to_prepare.prepare()
3111 self.populate_data_path_device(*to_prepare_list)
3112
3113 def populate_data_path(self, path, *to_prepare_list):
3114 super(PrepareBluestoreData, self).populate_data_path(path,
3115 *to_prepare_list)
3116 write_one_line(path, 'type', 'bluestore')
3117
3118
3119 def mkfs(
3120 path,
3121 cluster,
3122 osd_id,
3123 fsid,
3124 keyring,
3125 ):
3126 monmap = os.path.join(path, 'activate.monmap')
3127 command_check_call(
3128 [
3129 'ceph',
3130 '--cluster', cluster,
3131 '--name', 'client.bootstrap-osd',
3132 '--keyring', keyring,
3133 'mon', 'getmap', '-o', monmap,
3134 ],
3135 )
3136
3137 osd_type = read_one_line(path, 'type')
3138
3139 if osd_type == 'bluestore':
3140 command_check_call(
3141 [
3142 'ceph-osd',
3143 '--cluster', cluster,
3144 '--mkfs',
3145 '-i', osd_id,
3146 '--monmap', monmap,
3147 '--osd-data', path,
3148 '--osd-uuid', fsid,
3149 '--setuser', get_ceph_user(),
3150 '--setgroup', get_ceph_group(),
3151 ],
3152 )
3153 elif osd_type == 'filestore':
3154 command_check_call(
3155 [
3156 'ceph-osd',
3157 '--cluster', cluster,
3158 '--mkfs',
3159 '-i', osd_id,
3160 '--monmap', monmap,
3161 '--osd-data', path,
3162 '--osd-journal', os.path.join(path, 'journal'),
3163 '--osd-uuid', fsid,
3164 '--setuser', get_ceph_user(),
3165 '--setgroup', get_ceph_group(),
3166 ],
3167 )
3168 else:
3169 raise Error('unrecognized objectstore type %s' % osd_type)
3170
3171
3172 def get_mount_point(cluster, osd_id):
3173 parent = STATEDIR + '/osd'
3174 return os.path.join(
3175 parent,
3176 '{cluster}-{osd_id}'.format(cluster=cluster, osd_id=osd_id),
3177 )
3178
3179
3180 def move_mount(
3181 dev,
3182 path,
3183 cluster,
3184 osd_id,
3185 fstype,
3186 mount_options,
3187 ):
3188 LOG.debug('Moving mount to final location...')
3189 osd_data = get_mount_point(cluster, osd_id)
3190 maybe_mkdir(osd_data)
3191
3192 # pick best-of-breed mount options based on fs type
3193 if mount_options is None:
3194 mount_options = MOUNT_OPTIONS.get(fstype, '')
3195
3196 # we really want to mount --move, but that is not supported when
3197 # the parent mount is shared, as it is by default on RH, Fedora,
3198 # and probably others. Also, --bind doesn't properly manipulate
3199 # /etc/mtab, which *still* isn't a symlink to /proc/mounts despite
3200 # this being 2013. Instead, mount the original device at the final
3201 # location.
3202 command_check_call(
3203 [
3204 '/bin/mount',
3205 '-o',
3206 mount_options,
3207 '--',
3208 dev,
3209 osd_data,
3210 ],
3211 )
3212 command_check_call(
3213 [
3214 '/bin/umount',
3215 '-l', # lazy, in case someone else is peeking at the
3216 # wrong moment
3217 '--',
3218 path,
3219 ],
3220 )
3221
3222
3223 #
3224 # For upgrade purposes, to make sure there are no competing units,
3225 # both --runtime unit and the default should be disabled. There can be
3226 # two units at the same time: one with --runtime and another without
3227 # it. If, for any reason (manual or ceph-disk) the two units co-exist
3228 # they will compete with each other.
3229 #
3230 def systemd_disable(
3231 path,
3232 osd_id,
3233 ):
3234 # ensure there is no duplicate ceph-osd@.service
3235 for style in ([], ['--runtime']):
3236 command_check_call(
3237 [
3238 'systemctl',
3239 'disable',
3240 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3241 ] + style,
3242 )
3243
3244
3245 def systemd_start(
3246 path,
3247 osd_id,
3248 ):
3249 systemd_disable(path, osd_id)
3250 if os.path.ismount(path):
3251 style = ['--runtime']
3252 else:
3253 style = []
3254 command_check_call(
3255 [
3256 'systemctl',
3257 'enable',
3258 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3259 ] + style,
3260 )
3261 command_check_call(
3262 [
3263 'systemctl',
3264 'start',
3265 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3266 ],
3267 )
3268
3269
3270 def systemd_stop(
3271 path,
3272 osd_id,
3273 ):
3274 systemd_disable(path, osd_id)
3275 command_check_call(
3276 [
3277 'systemctl',
3278 'stop',
3279 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3280 ],
3281 )
3282
3283
3284 def start_daemon(
3285 cluster,
3286 osd_id,
3287 ):
3288 LOG.debug('Starting %s osd.%s...', cluster, osd_id)
3289
3290 path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3291 cluster=cluster, osd_id=osd_id)
3292
3293 try:
3294 if os.path.exists(os.path.join(path, 'upstart')):
3295 command_check_call(
3296 [
3297 '/sbin/initctl',
3298 # use emit, not start, because start would fail if the
3299 # instance was already running
3300 'emit',
3301 # since the daemon starting doesn't guarantee much about
3302 # the service being operational anyway, don't bother
3303 # waiting for it
3304 '--no-wait',
3305 '--',
3306 'ceph-osd',
3307 'cluster={cluster}'.format(cluster=cluster),
3308 'id={osd_id}'.format(osd_id=osd_id),
3309 ],
3310 )
3311 elif os.path.exists(os.path.join(path, 'sysvinit')):
3312 if os.path.exists('/usr/sbin/service'):
3313 svc = '/usr/sbin/service'
3314 else:
3315 svc = '/sbin/service'
3316 command_check_call(
3317 [
3318 svc,
3319 'ceph',
3320 '--cluster',
3321 '{cluster}'.format(cluster=cluster),
3322 'start',
3323 'osd.{osd_id}'.format(osd_id=osd_id),
3324 ],
3325 )
3326 elif os.path.exists(os.path.join(path, 'systemd')):
3327 systemd_start(path, osd_id)
3328 elif os.path.exists(os.path.join(path, 'openrc')):
3329 base_script = '/etc/init.d/ceph-osd'
3330 osd_script = '{base}.{osd_id}'.format(
3331 base=base_script,
3332 osd_id=osd_id
3333 )
3334 if not os.path.exists(osd_script):
3335 os.symlink(base_script, osd_script)
3336 command_check_call(
3337 [
3338 osd_script,
3339 'start',
3340 ],
3341 )
3342 elif os.path.exists(os.path.join(path, 'bsdrc')):
3343 command_check_call(
3344 [
3345 '/usr/sbin/service', 'ceph', 'start',
3346 'osd.{osd_id}'.format(osd_id=osd_id),
3347 ],
3348 )
3349 else:
3350 raise Error('{cluster} osd.{osd_id} '
3351 'is not tagged with an init system'
3352 .format(
3353 cluster=cluster,
3354 osd_id=osd_id,
3355 ))
3356 except subprocess.CalledProcessError as e:
3357 raise Error('ceph osd start failed', e)
3358
3359
3360 def stop_daemon(
3361 cluster,
3362 osd_id,
3363 ):
3364 LOG.debug('Stoping %s osd.%s...', cluster, osd_id)
3365
3366 path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3367 cluster=cluster, osd_id=osd_id)
3368
3369 try:
3370 if os.path.exists(os.path.join(path, 'upstart')):
3371 command_check_call(
3372 [
3373 '/sbin/initctl',
3374 'stop',
3375 'ceph-osd',
3376 'cluster={cluster}'.format(cluster=cluster),
3377 'id={osd_id}'.format(osd_id=osd_id),
3378 ],
3379 )
3380 elif os.path.exists(os.path.join(path, 'sysvinit')):
3381 svc = which('service')
3382 command_check_call(
3383 [
3384 svc,
3385 'ceph',
3386 '--cluster',
3387 '{cluster}'.format(cluster=cluster),
3388 'stop',
3389 'osd.{osd_id}'.format(osd_id=osd_id),
3390 ],
3391 )
3392 elif os.path.exists(os.path.join(path, 'systemd')):
3393 systemd_stop(path, osd_id)
3394 elif os.path.exists(os.path.join(path, 'openrc')):
3395 command_check_call(
3396 [
3397 '/etc/init.d/ceph-osd.{osd_id}'.format(osd_id=osd_id),
3398 'stop',
3399 ],
3400 )
3401 elif os.path.exists(os.path.join(path, 'bsdrc')):
3402 command_check_call(
3403 [
3404 '/usr/local/etc/rc.d/ceph stop osd.{osd_id}'
3405 .format(osd_id=osd_id),
3406 ],
3407 )
3408 else:
3409 raise Error('{cluster} osd.{osd_id} '
3410 'is not tagged with an init system'
3411 .format(cluster=cluster, osd_id=osd_id))
3412 except subprocess.CalledProcessError as e:
3413 raise Error('ceph osd stop failed', e)
3414
3415
3416 def detect_fstype(dev):
3417 if FREEBSD:
3418 fstype = _check_output(
3419 args=[
3420 'fstyp',
3421 '-u',
3422 dev,
3423 ],
3424 )
3425 else:
3426 fstype = _check_output(
3427 args=[
3428 '/sbin/blkid',
3429 # we don't want stale cached results
3430 '-p',
3431 '-s', 'TYPE',
3432 '-o', 'value',
3433 '--',
3434 dev,
3435 ],
3436 )
3437 fstype = must_be_one_line(fstype)
3438 return fstype
3439
3440
3441 def dmcrypt_is_mapped(uuid):
3442 path = os.path.join('/dev/mapper', uuid)
3443 if os.path.exists(path):
3444 return path
3445 else:
3446 return None
3447
3448
3449 def dmcrypt_map(dev, dmcrypt_key_dir):
3450 ptype = get_partition_type(dev)
3451 if ptype in Ptype.get_ready_by_type('plain'):
3452 luks = False
3453 cryptsetup_parameters = ['--key-size', '256']
3454 elif ptype in Ptype.get_ready_by_type('luks'):
3455 luks = True
3456 cryptsetup_parameters = []
3457 else:
3458 raise Error('--dmcrypt called for dev %s with invalid ptype %s'
3459 % (dev, ptype))
3460 part_uuid = get_partition_uuid(dev)
3461 dmcrypt_key = get_dmcrypt_key(part_uuid, dmcrypt_key_dir, luks)
3462 return _dmcrypt_map(
3463 rawdev=dev,
3464 key=dmcrypt_key,
3465 _uuid=part_uuid,
3466 cryptsetup_parameters=cryptsetup_parameters,
3467 luks=luks,
3468 format_dev=False,
3469 )
3470
3471
3472 def mount_activate(
3473 dev,
3474 activate_key_template,
3475 init,
3476 dmcrypt,
3477 dmcrypt_key_dir,
3478 reactivate=False,
3479 ):
3480
3481 if dmcrypt:
3482 part_uuid = get_partition_uuid(dev)
3483 dev = dmcrypt_map(dev, dmcrypt_key_dir)
3484 try:
3485 fstype = detect_fstype(dev=dev)
3486 except (subprocess.CalledProcessError,
3487 TruncatedLineError,
3488 TooManyLinesError) as e:
3489 raise FilesystemTypeError(
3490 'device {dev}'.format(dev=dev),
3491 e,
3492 )
3493
3494 # TODO always using mount options from cluster=ceph for
3495 # now; see http://tracker.newdream.net/issues/3253
3496 mount_options = get_mount_options(cluster='ceph', fs_type=fstype)
3497
3498 path = mount(dev=dev, fstype=fstype, options=mount_options)
3499
3500 # check if the disk is deactive, change the journal owner, group
3501 # mode for correct user and group.
3502 if os.path.exists(os.path.join(path, 'deactive')):
3503 # logging to syslog will help us easy to know udev triggered failure
3504 if not reactivate:
3505 unmount(path)
3506 # we need to unmap again because dmcrypt map will create again
3507 # on bootup stage (due to deactivate)
3508 if '/dev/mapper/' in dev:
3509 part_uuid = dev.replace('/dev/mapper/', '')
3510 dmcrypt_unmap(part_uuid)
3511 LOG.info('OSD deactivated! reactivate with: --reactivate')
3512 raise Error('OSD deactivated! reactivate with: --reactivate')
3513 # flag to activate a deactive osd.
3514 deactive = True
3515 else:
3516 deactive = False
3517
3518 osd_id = None
3519 cluster = None
3520 try:
3521 (osd_id, cluster) = activate(path, activate_key_template, init)
3522
3523 # Now active successfully
3524 # If we got reactivate and deactive, remove the deactive file
3525 if deactive and reactivate:
3526 os.remove(os.path.join(path, 'deactive'))
3527 LOG.info('Remove `deactive` file.')
3528
3529 # check if the disk is already active, or if something else is already
3530 # mounted there
3531 active = False
3532 other = False
3533 src_dev = os.stat(path).st_dev
3534 try:
3535 dst_dev = os.stat((STATEDIR + '/osd/{cluster}-{osd_id}').format(
3536 cluster=cluster,
3537 osd_id=osd_id)).st_dev
3538 if src_dev == dst_dev:
3539 active = True
3540 else:
3541 parent_dev = os.stat(STATEDIR + '/osd').st_dev
3542 if dst_dev != parent_dev:
3543 other = True
3544 elif os.listdir(get_mount_point(cluster, osd_id)):
3545 LOG.info(get_mount_point(cluster, osd_id) +
3546 " is not empty, won't override")
3547 other = True
3548
3549 except OSError:
3550 pass
3551
3552 if active:
3553 LOG.info('%s osd.%s already mounted in position; unmounting ours.'
3554 % (cluster, osd_id))
3555 unmount(path)
3556 elif other:
3557 raise Error('another %s osd.%s already mounted in position '
3558 '(old/different cluster instance?); unmounting ours.'
3559 % (cluster, osd_id))
3560 else:
3561 move_mount(
3562 dev=dev,
3563 path=path,
3564 cluster=cluster,
3565 osd_id=osd_id,
3566 fstype=fstype,
3567 mount_options=mount_options,
3568 )
3569 return cluster, osd_id
3570
3571 except:
3572 LOG.error('Failed to activate')
3573 unmount(path)
3574 raise
3575 finally:
3576 # remove our temp dir
3577 if os.path.exists(path):
3578 os.rmdir(path)
3579
3580
3581 def activate_dir(
3582 path,
3583 activate_key_template,
3584 init,
3585 ):
3586
3587 if not os.path.exists(path):
3588 raise Error(
3589 'directory %s does not exist' % path
3590 )
3591
3592 (osd_id, cluster) = activate(path, activate_key_template, init)
3593
3594 if init not in (None, 'none'):
3595 canonical = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3596 cluster=cluster,
3597 osd_id=osd_id)
3598 if path != canonical:
3599 # symlink it from the proper location
3600 create = True
3601 if os.path.lexists(canonical):
3602 old = os.readlink(canonical)
3603 if old != path:
3604 LOG.debug('Removing old symlink %s -> %s', canonical, old)
3605 try:
3606 os.unlink(canonical)
3607 except:
3608 raise Error('unable to remove old symlink', canonical)
3609 else:
3610 create = False
3611 if create:
3612 LOG.debug('Creating symlink %s -> %s', canonical, path)
3613 try:
3614 os.symlink(path, canonical)
3615 except:
3616 raise Error('unable to create symlink %s -> %s'
3617 % (canonical, path))
3618
3619 return cluster, osd_id
3620
3621
3622 def find_cluster_by_uuid(_uuid):
3623 """
3624 Find a cluster name by searching /etc/ceph/*.conf for a conf file
3625 with the right uuid.
3626 """
3627 _uuid = _uuid.lower()
3628 no_fsid = []
3629 if not os.path.exists(SYSCONFDIR):
3630 return None
3631 for conf_file in os.listdir(SYSCONFDIR):
3632 if not conf_file.endswith('.conf'):
3633 continue
3634 cluster = conf_file[:-5]
3635 try:
3636 fsid = get_fsid(cluster)
3637 except Error as e:
3638 if 'getting cluster uuid from configuration failed' not in str(e):
3639 raise e
3640 no_fsid.append(cluster)
3641 else:
3642 if fsid == _uuid:
3643 return cluster
3644 # be tolerant of /etc/ceph/ceph.conf without an fsid defined.
3645 if len(no_fsid) == 1 and no_fsid[0] == 'ceph':
3646 LOG.warning('No fsid defined in ' + SYSCONFDIR +
3647 '/ceph.conf; using anyway')
3648 return 'ceph'
3649 return None
3650
3651
3652 def activate(
3653 path,
3654 activate_key_template,
3655 init,
3656 ):
3657
3658 check_osd_magic(path)
3659
3660 ceph_fsid = read_one_line(path, 'ceph_fsid')
3661 if ceph_fsid is None:
3662 raise Error('No cluster uuid assigned.')
3663 LOG.debug('Cluster uuid is %s', ceph_fsid)
3664
3665 cluster = find_cluster_by_uuid(ceph_fsid)
3666 if cluster is None:
3667 raise Error('No cluster conf found in ' + SYSCONFDIR +
3668 ' with fsid %s' % ceph_fsid)
3669 LOG.debug('Cluster name is %s', cluster)
3670
3671 fsid = read_one_line(path, 'fsid')
3672 if fsid is None:
3673 raise Error('No OSD uuid assigned.')
3674 LOG.debug('OSD uuid is %s', fsid)
3675
3676 keyring = activate_key_template.format(cluster=cluster,
3677 statedir=STATEDIR)
3678
3679 osd_id = get_osd_id(path)
3680 if osd_id is None:
3681 osd_id = allocate_osd_id(
3682 cluster=cluster,
3683 fsid=fsid,
3684 keyring=keyring,
3685 path=path,
3686 )
3687 write_one_line(path, 'whoami', osd_id)
3688 LOG.debug('OSD id is %s', osd_id)
3689
3690 if not os.path.exists(os.path.join(path, 'ready')):
3691 LOG.debug('Initializing OSD...')
3692 # re-running mkfs is safe, so just run until it completes
3693 mkfs(
3694 path=path,
3695 cluster=cluster,
3696 osd_id=osd_id,
3697 fsid=fsid,
3698 keyring=keyring,
3699 )
3700
3701 if init not in (None, 'none'):
3702 if init == 'auto':
3703 conf_val = get_conf(
3704 cluster=cluster,
3705 variable='init'
3706 )
3707 if conf_val is not None:
3708 init = conf_val
3709 else:
3710 init = init_get()
3711
3712 LOG.debug('Marking with init system %s', init)
3713 init_path = os.path.join(path, init)
3714 with open(init_path, 'w'):
3715 path_set_context(init_path)
3716
3717 # remove markers for others, just in case.
3718 for other in INIT_SYSTEMS:
3719 if other != init:
3720 try:
3721 os.unlink(os.path.join(path, other))
3722 except OSError:
3723 pass
3724
3725 if not os.path.exists(os.path.join(path, 'active')):
3726 write_one_line(path, 'active', 'ok')
3727 LOG.debug('%s osd.%s data dir is ready at %s', cluster, osd_id, path)
3728 return (osd_id, cluster)
3729
3730
3731 def main_activate(args):
3732 cluster = None
3733 osd_id = None
3734
3735 LOG.info('path = ' + str(args.path))
3736 if not os.path.exists(args.path):
3737 raise Error('%s does not exist' % args.path)
3738
3739 if is_suppressed(args.path):
3740 LOG.info('suppressed activate request on %s', args.path)
3741 return
3742
3743 with activate_lock:
3744 mode = os.stat(args.path).st_mode
3745 if stmode_is_diskdevice(mode):
3746 if (is_partition(args.path) and
3747 (get_partition_type(args.path) ==
3748 PTYPE['mpath']['osd']['ready']) and
3749 not is_mpath(args.path)):
3750 raise Error('%s is not a multipath block device' %
3751 args.path)
3752 (cluster, osd_id) = mount_activate(
3753 dev=args.path,
3754 activate_key_template=args.activate_key_template,
3755 init=args.mark_init,
3756 dmcrypt=args.dmcrypt,
3757 dmcrypt_key_dir=args.dmcrypt_key_dir,
3758 reactivate=args.reactivate,
3759 )
3760 osd_data = get_mount_point(cluster, osd_id)
3761
3762 args.cluster = cluster
3763 if args.dmcrypt:
3764 for name in Space.NAMES:
3765 # Check if encrypted device in journal
3766 dev_path = os.path.join(osd_data, name + '_dmcrypt')
3767 if not os.path.exists(dev_path):
3768 continue
3769 partition = DevicePartition.factory(
3770 path=None,
3771 dev=dev_path,
3772 args=args)
3773 partition.rawdev = args.path
3774 partition.map()
3775
3776 elif stat.S_ISDIR(mode):
3777 (cluster, osd_id) = activate_dir(
3778 path=args.path,
3779 activate_key_template=args.activate_key_template,
3780 init=args.mark_init,
3781 )
3782 osd_data = args.path
3783
3784 else:
3785 raise Error('%s is not a directory or block device' % args.path)
3786
3787 # exit with 0 if the journal device is not up, yet
3788 # journal device will do the activation
3789 osd_journal = '{path}/journal'.format(path=osd_data)
3790 if os.path.islink(osd_journal) and not os.access(osd_journal, os.F_OK):
3791 LOG.info("activate: Journal not present, not starting, yet")
3792 return
3793
3794 if (not args.no_start_daemon and args.mark_init == 'none'):
3795 command_check_call(
3796 [
3797 'ceph-osd',
3798 '--cluster={cluster}'.format(cluster=cluster),
3799 '--id={osd_id}'.format(osd_id=osd_id),
3800 '--osd-data={path}'.format(path=osd_data),
3801 '--osd-journal={journal}'.format(journal=osd_journal),
3802 ],
3803 )
3804
3805 if (not args.no_start_daemon and
3806 args.mark_init not in (None, 'none')):
3807
3808 start_daemon(
3809 cluster=cluster,
3810 osd_id=osd_id,
3811 )
3812
3813
3814 def main_activate_lockbox(args):
3815 with activate_lock:
3816 main_activate_lockbox_protected(args)
3817
3818
3819 def main_activate_lockbox_protected(args):
3820 partition = DevicePartition.factory(
3821 path=None, dev=args.path, args=args)
3822
3823 lockbox = Lockbox(args)
3824 lockbox.set_partition(partition)
3825 lockbox.activate()
3826
3827
3828 ###########################
3829
3830 def _mark_osd_out(cluster, osd_id):
3831 LOG.info('Prepare to mark osd.%d out...', osd_id)
3832 command([
3833 'ceph',
3834 'osd',
3835 'out',
3836 'osd.%d' % osd_id,
3837 ])
3838
3839
3840 def _check_osd_status(cluster, osd_id):
3841 """
3842 report the osd status:
3843 00(0) : means OSD OUT AND DOWN
3844 01(1) : means OSD OUT AND UP
3845 10(2) : means OSD IN AND DOWN
3846 11(3) : means OSD IN AND UP
3847 """
3848 LOG.info("Checking osd id: %s ..." % osd_id)
3849 found = False
3850 status_code = 0
3851 out, err, ret = command([
3852 'ceph',
3853 'osd',
3854 'dump',
3855 '--cluster={cluster}'.format(
3856 cluster=cluster,
3857 ),
3858 '--format',
3859 'json',
3860 ])
3861 out_json = json.loads(out)
3862 for item in out_json[u'osds']:
3863 if item.get(u'osd') == int(osd_id):
3864 found = True
3865 if item.get(u'in') is 1:
3866 status_code += 2
3867 if item.get(u'up') is 1:
3868 status_code += 1
3869 if not found:
3870 raise Error('Could not osd.%s in osd tree!' % osd_id)
3871 return status_code
3872
3873
3874 def _remove_osd_directory_files(mounted_path, cluster):
3875 """
3876 To remove the 'ready', 'active', INIT-specific files.
3877 """
3878 if os.path.exists(os.path.join(mounted_path, 'ready')):
3879 os.remove(os.path.join(mounted_path, 'ready'))
3880 LOG.info('Remove `ready` file.')
3881 else:
3882 LOG.info('`ready` file is already removed.')
3883
3884 if os.path.exists(os.path.join(mounted_path, 'active')):
3885 os.remove(os.path.join(mounted_path, 'active'))
3886 LOG.info('Remove `active` file.')
3887 else:
3888 LOG.info('`active` file is already removed.')
3889
3890 # Just check `upstart` and `sysvinit` directly if filename is init-spec.
3891 conf_val = get_conf(
3892 cluster=cluster,
3893 variable='init'
3894 )
3895 if conf_val is not None:
3896 init = conf_val
3897 else:
3898 init = init_get()
3899 os.remove(os.path.join(mounted_path, init))
3900 LOG.info('Remove `%s` file.', init)
3901 return
3902
3903
3904 def main_deactivate(args):
3905 with activate_lock:
3906 main_deactivate_locked(args)
3907
3908
3909 def main_deactivate_locked(args):
3910 osd_id = args.deactivate_by_id
3911 path = args.path
3912 target_dev = None
3913 dmcrypt = False
3914 devices = list_devices()
3915
3916 # list all devices and found we need
3917 for device in devices:
3918 if 'partitions' in device:
3919 for dev_part in device.get('partitions'):
3920 if (osd_id and
3921 'whoami' in dev_part and
3922 dev_part['whoami'] == osd_id):
3923 target_dev = dev_part
3924 elif (path and
3925 'path' in dev_part and
3926 dev_part['path'] == path):
3927 target_dev = dev_part
3928 if not target_dev:
3929 raise Error('Cannot find any match device!!')
3930
3931 # set up all we need variable
3932 osd_id = target_dev['whoami']
3933 part_type = target_dev['ptype']
3934 mounted_path = target_dev['mount']
3935 if Ptype.is_dmcrypt(part_type, 'osd'):
3936 dmcrypt = True
3937
3938 # Do not do anything if osd is already down.
3939 status_code = _check_osd_status(args.cluster, osd_id)
3940 if status_code == OSD_STATUS_IN_UP:
3941 if args.mark_out is True:
3942 _mark_osd_out(args.cluster, int(osd_id))
3943 stop_daemon(args.cluster, osd_id)
3944 elif status_code == OSD_STATUS_IN_DOWN:
3945 if args.mark_out is True:
3946 _mark_osd_out(args.cluster, int(osd_id))
3947 LOG.info("OSD already out/down. Do not do anything now.")
3948 return
3949 elif status_code == OSD_STATUS_OUT_UP:
3950 stop_daemon(args.cluster, osd_id)
3951 elif status_code == OSD_STATUS_OUT_DOWN:
3952 LOG.info("OSD already out/down. Do not do anything now.")
3953 return
3954
3955 if not args.once:
3956 # remove 'ready', 'active', and INIT-specific files.
3957 _remove_osd_directory_files(mounted_path, args.cluster)
3958
3959 # Write deactivate to osd directory!
3960 with open(os.path.join(mounted_path, 'deactive'), 'w'):
3961 path_set_context(os.path.join(mounted_path, 'deactive'))
3962
3963 unmount(mounted_path, do_rm=not args.once)
3964 LOG.info("Umount `%s` successfully.", mounted_path)
3965
3966 if dmcrypt:
3967 lockbox = os.path.join(STATEDIR, 'osd-lockbox')
3968 command(['umount', os.path.join(lockbox, target_dev['uuid'])])
3969
3970 dmcrypt_unmap(target_dev['uuid'])
3971 for name in Space.NAMES:
3972 if name + '_uuid' in target_dev:
3973 dmcrypt_unmap(target_dev[name + '_uuid'])
3974
3975 ###########################
3976
3977
3978 def _remove_lockbox(uuid, cluster):
3979 lockbox = os.path.join(STATEDIR, 'osd-lockbox')
3980 if not os.path.exists(lockbox):
3981 return
3982 canonical = os.path.join(lockbox, uuid)
3983 command(['umount', canonical])
3984 for name in os.listdir(lockbox):
3985 path = os.path.join(lockbox, name)
3986 if os.path.islink(path) and os.readlink(path) == canonical:
3987 os.unlink(path)
3988
3989
3990 def destroy_lookup_device(args, predicate, description):
3991 devices = list_devices()
3992 for device in devices:
3993 for partition in device.get('partitions', []):
3994 if partition['type'] == 'lockbox':
3995 if not is_mounted(partition['path']):
3996 main_activate_lockbox_protected(
3997 argparse.Namespace(verbose=args.verbose,
3998 path=partition['path']))
3999 for device in devices:
4000 for partition in device.get('partitions', []):
4001 if partition['dmcrypt']:
4002 dmcrypt_path = dmcrypt_is_mapped(partition['uuid'])
4003 if dmcrypt_path:
4004 unmap = False
4005 else:
4006 dmcrypt_path = dmcrypt_map(partition['path'],
4007 args.dmcrypt_key_dir)
4008 unmap = True
4009 list_dev_osd(dmcrypt_path, {}, partition)
4010 if unmap:
4011 dmcrypt_unmap(partition['uuid'])
4012 dmcrypt = True
4013 else:
4014 dmcrypt = False
4015 if predicate(partition):
4016 return dmcrypt, partition
4017 raise Error('found no device matching ', description)
4018
4019
4020 def main_destroy(args):
4021 with activate_lock:
4022 main_destroy_locked(args)
4023
4024
4025 def main_destroy_locked(args):
4026 osd_id = args.destroy_by_id
4027 path = args.path
4028 target_dev = None
4029
4030 if path:
4031 if not is_partition(path):
4032 raise Error(path + " must be a partition device")
4033 path = os.path.realpath(path)
4034
4035 if path:
4036 (dmcrypt, target_dev) = destroy_lookup_device(
4037 args, lambda x: x.get('path') == path,
4038 path)
4039 elif osd_id:
4040 (dmcrypt, target_dev) = destroy_lookup_device(
4041 args, lambda x: x.get('whoami') == osd_id,
4042 'osd id ' + str(osd_id))
4043
4044 osd_id = target_dev['whoami']
4045 dev_path = target_dev['path']
4046 if target_dev['ptype'] == PTYPE['mpath']['osd']['ready']:
4047 base_dev = get_partition_base_mpath(dev_path)
4048 else:
4049 base_dev = get_partition_base(dev_path)
4050
4051 # Before osd deactivate, we cannot destroy it
4052 status_code = _check_osd_status(args.cluster, osd_id)
4053 if status_code != OSD_STATUS_OUT_DOWN and \
4054 status_code != OSD_STATUS_IN_DOWN:
4055 raise Error("Could not destroy the active osd. (osd-id: %s)" %
4056 osd_id)
4057
4058 if args.purge:
4059 action = 'purge'
4060 else:
4061 action = 'destroy'
4062 LOG.info("Prepare to %s osd.%s" % (action, osd_id))
4063 command([
4064 'ceph',
4065 'osd',
4066 action,
4067 'osd.%s' % osd_id,
4068 '--yes-i-really-mean-it',
4069 ])
4070
4071 # we remove the crypt map and device mapper (if dmcrypt is True)
4072 if dmcrypt:
4073 for name in Space.NAMES:
4074 if target_dev.get(name + '_uuid'):
4075 dmcrypt_unmap(target_dev[name + '_uuid'])
4076 _remove_lockbox(target_dev['uuid'], args.cluster)
4077
4078 # Check zap flag. If we found zap flag, we need to find device for
4079 # destroy this osd data.
4080 if args.zap is True:
4081 # erase the osd data
4082 LOG.info("Prepare to zap the device %s" % base_dev)
4083 zap(base_dev)
4084
4085
4086 def get_space_osd_uuid(name, path):
4087 if not os.path.exists(path):
4088 raise Error('%s does not exist' % path)
4089
4090 if not path_is_diskdevice(path):
4091 raise Error('%s is not a block device' % path)
4092
4093 if (is_partition(path) and
4094 get_partition_type(path) in (PTYPE['mpath']['journal']['ready'],
4095 PTYPE['mpath']['block']['ready']) and
4096 not is_mpath(path)):
4097 raise Error('%s is not a multipath block device' %
4098 path)
4099
4100 try:
4101 out = _check_output(
4102 args=[
4103 'ceph-osd',
4104 '--get-device-fsid',
4105 path,
4106 ],
4107 close_fds=True,
4108 )
4109 except subprocess.CalledProcessError as e:
4110 raise Error(
4111 'failed to get osd uuid/fsid from %s' % name,
4112 e,
4113 )
4114 value = str(out).split('\n', 1)[0]
4115 LOG.debug('%s %s has OSD UUID %s', name.capitalize(), path, value)
4116 return value
4117
4118
4119 def main_activate_space(name, args):
4120 if not os.path.exists(args.dev):
4121 raise Error('%s does not exist' % args.dev)
4122
4123 if is_suppressed(args.dev):
4124 LOG.info('suppressed activate request on space %s', args.dev)
4125 return
4126
4127 cluster = None
4128 osd_id = None
4129 osd_uuid = None
4130 dev = None
4131 with activate_lock:
4132 if args.dmcrypt:
4133 dev = dmcrypt_map(args.dev, args.dmcrypt_key_dir)
4134 else:
4135 dev = args.dev
4136 # FIXME: For an encrypted journal dev, does this return the
4137 # cyphertext or plaintext dev uuid!? Also, if the journal is
4138 # encrypted, is the data partition also always encrypted, or
4139 # are mixed pairs supported!?
4140 osd_uuid = get_space_osd_uuid(name, dev)
4141 path = os.path.join('/dev/disk/by-partuuid/', osd_uuid.lower())
4142
4143 if is_suppressed(path):
4144 LOG.info('suppressed activate request on %s', path)
4145 return
4146
4147 # warn and exit with 0 if the data device is not up, yet
4148 # data device will do the activation
4149 if not os.access(path, os.F_OK):
4150 LOG.info("activate: OSD device not present, not starting, yet")
4151 return
4152
4153 (cluster, osd_id) = mount_activate(
4154 dev=path,
4155 activate_key_template=args.activate_key_template,
4156 init=args.mark_init,
4157 dmcrypt=args.dmcrypt,
4158 dmcrypt_key_dir=args.dmcrypt_key_dir,
4159 reactivate=args.reactivate,
4160 )
4161
4162 start_daemon(
4163 cluster=cluster,
4164 osd_id=osd_id,
4165 )
4166
4167
4168 ###########################
4169
4170
4171 def main_activate_all(args):
4172 dir = '/dev/disk/by-parttypeuuid'
4173 LOG.debug('Scanning %s', dir)
4174 if not os.path.exists(dir):
4175 return
4176 err = False
4177 for name in os.listdir(dir):
4178 if name.find('.') < 0:
4179 continue
4180 (tag, uuid) = name.split('.')
4181
4182 if tag in Ptype.get_ready_by_name('osd'):
4183
4184 if Ptype.is_dmcrypt(tag, 'osd'):
4185 path = os.path.join('/dev/mapper', uuid)
4186 else:
4187 path = os.path.join(dir, name)
4188
4189 if is_suppressed(path):
4190 LOG.info('suppressed activate request on %s', path)
4191 continue
4192
4193 LOG.info('Activating %s', path)
4194 with activate_lock:
4195 try:
4196 # never map dmcrypt cyphertext devices
4197 (cluster, osd_id) = mount_activate(
4198 dev=path,
4199 activate_key_template=args.activate_key_template,
4200 init=args.mark_init,
4201 dmcrypt=False,
4202 dmcrypt_key_dir='',
4203 )
4204 start_daemon(
4205 cluster=cluster,
4206 osd_id=osd_id,
4207 )
4208
4209 except Exception as e:
4210 print(
4211 '{prog}: {msg}'.format(prog=args.prog, msg=e),
4212 file=sys.stderr
4213 )
4214
4215 err = True
4216
4217 if err:
4218 raise Error('One or more partitions failed to activate')
4219
4220
4221 ###########################
4222
4223 def is_swap(dev):
4224 dev = os.path.realpath(dev)
4225 with open(PROCDIR + '/swaps', 'rb') as proc_swaps:
4226 for line in proc_swaps.readlines()[1:]:
4227 fields = line.split()
4228 if len(fields) < 3:
4229 continue
4230 swaps_dev = fields[0]
4231 if os.path.isabs(swaps_dev) and os.path.exists(swaps_dev):
4232 swaps_dev = os.path.realpath(swaps_dev)
4233 if swaps_dev == dev:
4234 return True
4235 return False
4236
4237
4238 def get_oneliner(base, name):
4239 path = os.path.join(base, name)
4240 if os.path.isfile(path):
4241 with open(path, 'rb') as _file:
4242 return _bytes2str(_file.readline().rstrip())
4243 return None
4244
4245
4246 def get_dev_fs(dev):
4247 if FREEBSD:
4248 fstype, _, ret = command(
4249 [
4250 'fstyp',
4251 '-u',
4252 dev,
4253 ],
4254 )
4255 if ret == 0:
4256 return fstype
4257 else:
4258 fscheck, _, _ = command(
4259 [
4260 'blkid',
4261 '-s',
4262 'TYPE',
4263 dev,
4264 ],
4265 )
4266 if 'TYPE' in fscheck:
4267 fstype = fscheck.split()[1].split('"')[1]
4268 return fstype
4269 return None
4270
4271
4272 def split_dev_base_partnum(dev):
4273 if is_mpath(dev):
4274 partnum = partnum_mpath(dev)
4275 base = get_partition_base_mpath(dev)
4276 else:
4277 b = block_path(dev)
4278 partnum = open(os.path.join(b, 'partition')).read().strip()
4279 base = get_partition_base(dev)
4280 return base, partnum
4281
4282
4283 def get_partition_type(part):
4284 return get_blkid_partition_info(part, 'ID_PART_ENTRY_TYPE')
4285
4286
4287 def get_partition_uuid(part):
4288 return get_blkid_partition_info(part, 'ID_PART_ENTRY_UUID')
4289
4290
4291 def get_blkid_partition_info(dev, what=None):
4292 out, _, _ = command(
4293 [
4294 'blkid',
4295 '-o',
4296 'udev',
4297 '-p',
4298 dev,
4299 ]
4300 )
4301 p = {}
4302 for line in out.splitlines():
4303 (key, value) = line.split('=')
4304 p[key] = value
4305 if what:
4306 return p.get(what)
4307 else:
4308 return p
4309
4310
4311 def more_osd_info(path, uuid_map, desc):
4312 desc['ceph_fsid'] = get_oneliner(path, 'ceph_fsid')
4313 if desc['ceph_fsid']:
4314 desc['cluster'] = find_cluster_by_uuid(desc['ceph_fsid'])
4315 desc['whoami'] = get_oneliner(path, 'whoami')
4316 for name in Space.NAMES:
4317 uuid = get_oneliner(path, name + '_uuid')
4318 if uuid:
4319 desc[name + '_uuid'] = uuid.lower()
4320 if desc[name + '_uuid'] in uuid_map:
4321 desc[name + '_dev'] = uuid_map[desc[name + '_uuid']]
4322
4323
4324 def list_dev_osd(dev, uuid_map, desc):
4325 desc['mount'] = is_mounted(dev)
4326 desc['fs_type'] = get_dev_fs(dev)
4327 desc['state'] = 'unprepared'
4328 if desc['mount']:
4329 desc['state'] = 'active'
4330 more_osd_info(desc['mount'], uuid_map, desc)
4331 elif desc['fs_type']:
4332 try:
4333 tpath = mount(dev=dev, fstype=desc['fs_type'], options='')
4334 if tpath:
4335 try:
4336 magic = get_oneliner(tpath, 'magic')
4337 if magic is not None:
4338 desc['magic'] = magic
4339 desc['state'] = 'prepared'
4340 more_osd_info(tpath, uuid_map, desc)
4341 finally:
4342 unmount(tpath)
4343 except MountError:
4344 pass
4345
4346
4347 def list_dev_lockbox(dev, uuid_map, desc):
4348 desc['mount'] = is_mounted(dev)
4349 desc['fs_type'] = get_dev_fs(dev)
4350 desc['state'] = 'unprepared'
4351 if desc['mount']:
4352 desc['state'] = 'active'
4353 desc['osd_uuid'] = get_oneliner(desc['mount'], 'osd-uuid')
4354 elif desc['fs_type']:
4355 try:
4356 tpath = tempfile.mkdtemp(prefix='mnt.', dir=STATEDIR + '/tmp')
4357 args = ['mount', '-t', 'ext4', dev, tpath]
4358 LOG.debug('Mounting lockbox ' + str(" ".join(args)))
4359 command_check_call(args)
4360 magic = get_oneliner(tpath, 'magic')
4361 if magic is not None:
4362 desc['magic'] = magic
4363 desc['state'] = 'prepared'
4364 desc['osd_uuid'] = get_oneliner(tpath, 'osd-uuid')
4365 unmount(tpath)
4366 except subprocess.CalledProcessError:
4367 pass
4368 if desc.get('osd_uuid') in uuid_map:
4369 desc['lockbox_for'] = uuid_map[desc['osd_uuid']]
4370
4371
4372 def list_format_lockbox_plain(dev):
4373 desc = []
4374 if dev.get('lockbox_for'):
4375 desc.append('for ' + dev['lockbox_for'])
4376 elif dev.get('osd_uuid'):
4377 desc.append('for osd ' + dev['osd_uuid'])
4378 return desc
4379
4380
4381 def list_format_more_osd_info_plain(dev):
4382 desc = []
4383 if dev.get('ceph_fsid'):
4384 if dev.get('cluster'):
4385 desc.append('cluster ' + dev['cluster'])
4386 else:
4387 desc.append('unknown cluster ' + dev['ceph_fsid'])
4388 if dev.get('whoami'):
4389 desc.append('osd.%s' % dev['whoami'])
4390 for name in Space.NAMES:
4391 if dev.get(name + '_dev'):
4392 desc.append(name + ' %s' % dev[name + '_dev'])
4393 return desc
4394
4395
4396 def list_format_dev_plain(dev, prefix=''):
4397 desc = []
4398 if dev['ptype'] == PTYPE['regular']['osd']['ready']:
4399 desc = (['ceph data', dev['state']] +
4400 list_format_more_osd_info_plain(dev))
4401 elif dev['ptype'] in (PTYPE['regular']['lockbox']['ready'],
4402 PTYPE['mpath']['lockbox']['ready']):
4403 desc = (['ceph lockbox', dev['state']] +
4404 list_format_lockbox_plain(dev))
4405 elif Ptype.is_dmcrypt(dev['ptype'], 'osd'):
4406 dmcrypt = dev['dmcrypt']
4407 if not dmcrypt['holders']:
4408 desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
4409 'not currently mapped']
4410 elif len(dmcrypt['holders']) == 1:
4411 holder = get_dev_path(dmcrypt['holders'][0])
4412 desc = ['ceph data (dmcrypt %s %s)' %
4413 (dmcrypt['type'], holder)]
4414 desc += list_format_more_osd_info_plain(dev)
4415 else:
4416 desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
4417 'holders: ' + ','.join(dmcrypt['holders'])]
4418 elif Ptype.is_regular_space(dev['ptype']):
4419 name = Ptype.space_ptype_to_name(dev['ptype'])
4420 desc.append('ceph ' + name)
4421 if dev.get(name + '_for'):
4422 desc.append('for %s' % dev[name + '_for'])
4423 elif Ptype.is_dmcrypt_space(dev['ptype']):
4424 name = Ptype.space_ptype_to_name(dev['ptype'])
4425 dmcrypt = dev['dmcrypt']
4426 if dmcrypt['holders'] and len(dmcrypt['holders']) == 1:
4427 holder = get_dev_path(dmcrypt['holders'][0])
4428 desc = ['ceph ' + name + ' (dmcrypt %s %s)' %
4429 (dmcrypt['type'], holder)]
4430 else:
4431 desc = ['ceph ' + name + ' (dmcrypt %s)' % dmcrypt['type']]
4432 if dev.get(name + '_for'):
4433 desc.append('for %s' % dev[name + '_for'])
4434 else:
4435 desc.append(dev['type'])
4436 if dev.get('fs_type'):
4437 desc.append(dev['fs_type'])
4438 elif dev.get('ptype'):
4439 desc.append(dev['ptype'])
4440 if dev.get('mount'):
4441 desc.append('mounted on %s' % dev['mount'])
4442 return '%s%s %s' % (prefix, dev['path'], ', '.join(desc))
4443
4444
4445 def list_format_plain(devices):
4446 lines = []
4447 for device in devices:
4448 if device.get('partitions'):
4449 lines.append('%s :' % device['path'])
4450 for p in sorted(device['partitions'], key=lambda x: x['path']):
4451 lines.append(list_format_dev_plain(dev=p,
4452 prefix=' '))
4453 else:
4454 lines.append(list_format_dev_plain(dev=device,
4455 prefix=''))
4456 return "\n".join(lines)
4457
4458
4459 def list_dev(dev, uuid_map, space_map):
4460 info = {
4461 'path': dev,
4462 'dmcrypt': {},
4463 }
4464
4465 info['is_partition'] = is_partition(dev)
4466 if info['is_partition']:
4467 ptype = get_partition_type(dev)
4468 info['uuid'] = get_partition_uuid(dev)
4469 else:
4470 ptype = 'unknown'
4471 info['ptype'] = ptype
4472 LOG.info("list_dev(dev = " + dev + ", ptype = " + str(ptype) + ")")
4473 if ptype in (PTYPE['regular']['osd']['ready'],
4474 PTYPE['mpath']['osd']['ready']):
4475 info['type'] = 'data'
4476 if ptype == PTYPE['mpath']['osd']['ready']:
4477 info['multipath'] = True
4478 list_dev_osd(dev, uuid_map, info)
4479 elif ptype in (PTYPE['regular']['lockbox']['ready'],
4480 PTYPE['mpath']['lockbox']['ready']):
4481 info['type'] = 'lockbox'
4482 if ptype == PTYPE['mpath']['osd']['ready']:
4483 info['multipath'] = True
4484 list_dev_lockbox(dev, uuid_map, info)
4485 elif ptype == PTYPE['plain']['osd']['ready']:
4486 holders = is_held(dev)
4487 info['type'] = 'data'
4488 info['dmcrypt']['holders'] = holders
4489 info['dmcrypt']['type'] = 'plain'
4490 if len(holders) == 1:
4491 list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
4492 elif ptype == PTYPE['luks']['osd']['ready']:
4493 holders = is_held(dev)
4494 info['type'] = 'data'
4495 info['dmcrypt']['holders'] = holders
4496 info['dmcrypt']['type'] = 'LUKS'
4497 if len(holders) == 1:
4498 list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
4499 elif Ptype.is_regular_space(ptype) or Ptype.is_mpath_space(ptype):
4500 name = Ptype.space_ptype_to_name(ptype)
4501 info['type'] = name
4502 if ptype == PTYPE['mpath'][name]['ready']:
4503 info['multipath'] = True
4504 if info.get('uuid') in space_map:
4505 info[name + '_for'] = space_map[info['uuid']]
4506 elif Ptype.is_plain_space(ptype):
4507 name = Ptype.space_ptype_to_name(ptype)
4508 holders = is_held(dev)
4509 info['type'] = name
4510 info['dmcrypt']['type'] = 'plain'
4511 info['dmcrypt']['holders'] = holders
4512 if info.get('uuid') in space_map:
4513 info[name + '_for'] = space_map[info['uuid']]
4514 elif Ptype.is_luks_space(ptype):
4515 name = Ptype.space_ptype_to_name(ptype)
4516 holders = is_held(dev)
4517 info['type'] = name
4518 info['dmcrypt']['type'] = 'LUKS'
4519 info['dmcrypt']['holders'] = holders
4520 if info.get('uuid') in space_map:
4521 info[name + '_for'] = space_map[info['uuid']]
4522 else:
4523 path = is_mounted(dev)
4524 fs_type = get_dev_fs(dev)
4525 if is_swap(dev):
4526 info['type'] = 'swap'
4527 else:
4528 info['type'] = 'other'
4529 if fs_type:
4530 info['fs_type'] = fs_type
4531 if path:
4532 info['mount'] = path
4533
4534 return info
4535
4536
4537 def list_devices():
4538 partmap = list_all_partitions()
4539
4540 uuid_map = {}
4541 space_map = {}
4542 for base, parts in sorted(partmap.items()):
4543 for p in parts:
4544 dev = get_dev_path(p)
4545 part_uuid = get_partition_uuid(dev)
4546 if part_uuid:
4547 uuid_map[part_uuid] = dev
4548 ptype = get_partition_type(dev)
4549 LOG.debug("main_list: " + dev +
4550 " ptype = " + str(ptype) +
4551 " uuid = " + str(part_uuid))
4552 if ptype in Ptype.get_ready_by_name('osd'):
4553 if Ptype.is_dmcrypt(ptype, 'osd'):
4554 holders = is_held(dev)
4555 if len(holders) != 1:
4556 continue
4557 dev_to_mount = get_dev_path(holders[0])
4558 else:
4559 dev_to_mount = dev
4560
4561 fs_type = get_dev_fs(dev_to_mount)
4562 if fs_type is not None:
4563 mount_options = get_mount_options(cluster='ceph',
4564 fs_type=fs_type)
4565 try:
4566 tpath = mount(dev=dev_to_mount,
4567 fstype=fs_type, options=mount_options)
4568 try:
4569 for name in Space.NAMES:
4570 space_uuid = get_oneliner(tpath,
4571 name + '_uuid')
4572 if space_uuid:
4573 space_map[space_uuid.lower()] = dev
4574 finally:
4575 unmount(tpath)
4576 except MountError:
4577 pass
4578
4579 LOG.debug("main_list: " + str(partmap) + ", uuid_map = " +
4580 str(uuid_map) + ", space_map = " + str(space_map))
4581
4582 devices = []
4583 for base, parts in sorted(partmap.items()):
4584 if parts:
4585 disk = {'path': get_dev_path(base)}
4586 partitions = []
4587 for p in sorted(parts):
4588 partitions.append(list_dev(get_dev_path(p),
4589 uuid_map,
4590 space_map))
4591 disk['partitions'] = partitions
4592 devices.append(disk)
4593 else:
4594 device = list_dev(get_dev_path(base), uuid_map, space_map)
4595 device['path'] = get_dev_path(base)
4596 devices.append(device)
4597 LOG.debug("list_devices: " + str(devices))
4598 return devices
4599
4600
4601 def list_zfs():
4602 try:
4603 out, err, ret = command(
4604 [
4605 'zfs',
4606 'list',
4607 '-o', 'name,mountpoint'
4608 ]
4609 )
4610 except subprocess.CalledProcessError as e:
4611 LOG.info('zfs list -o name,mountpoint '
4612 'fails.\n (Error: %s)' % e)
4613 raise
4614 lines = out.splitlines()
4615 for line in lines[1:]:
4616 vdevline = line.split()
4617 if os.path.exists(os.path.join(vdevline[1], 'active')):
4618 elems = os.path.split(vdevline[1])
4619 print(vdevline[0], "ceph data, active, cluster ceph,", elems[1],
4620 "mounted on:", vdevline[1])
4621 else:
4622 print(vdevline[0] + " other, zfs, mounted on: " + vdevline[1])
4623
4624
4625 def main_list(args):
4626 with activate_lock:
4627 if FREEBSD:
4628 main_list_freebsd(args)
4629 else:
4630 main_list_protected(args)
4631
4632
4633 def main_list_protected(args):
4634 devices = list_devices()
4635 if args.path:
4636 paths = []
4637 for path in args.path:
4638 if os.path.exists(path):
4639 paths.append(os.path.realpath(path))
4640 else:
4641 paths.append(path)
4642 selected_devices = []
4643 for device in devices:
4644 for path in paths:
4645 if re.search(path + '$', device['path']):
4646 selected_devices.append(device)
4647 else:
4648 selected_devices = devices
4649 if args.format == 'json':
4650 print(json.dumps(selected_devices))
4651 else:
4652 output = list_format_plain(selected_devices)
4653 if output:
4654 print(output)
4655
4656
4657 def main_list_freebsd(args):
4658 # Currently accomodate only ZFS Filestore partitions
4659 # return a list of VDEVs and mountpoints
4660 # > zfs list
4661 # NAME USED AVAIL REFER MOUNTPOINT
4662 # osd0 1.01G 1.32T 1.01G /var/lib/ceph/osd/osd.0
4663 # osd1 1.01G 1.32T 1.01G /var/lib/ceph/osd/osd.1
4664 list_zfs()
4665
4666
4667 ###########################
4668 #
4669 # Mark devices that we want to suppress activates on with a
4670 # file like
4671 #
4672 # /var/lib/ceph/tmp/suppress-activate.sdb
4673 #
4674 # where the last bit is the sanitized device name (/dev/X without the
4675 # /dev/ prefix) and the is_suppress() check matches a prefix. That
4676 # means suppressing sdb will stop activate on sdb1, sdb2, etc.
4677 #
4678
4679 def is_suppressed(path):
4680 disk = os.path.realpath(path)
4681 try:
4682 if (not disk.startswith('/dev/') or
4683 not ldev_is_diskdevice(disk)):
4684 return False
4685 base = get_dev_name(disk)
4686 while len(base):
4687 if os.path.exists(SUPPRESS_PREFIX + base): # noqa
4688 return True
4689 base = base[:-1]
4690 except:
4691 return False
4692
4693
4694 def set_suppress(path):
4695 disk = os.path.realpath(path)
4696 if not os.path.exists(disk):
4697 raise Error('does not exist', path)
4698 if not ldev_is_diskdevice(path):
4699 raise Error('not a block device', path)
4700 base = get_dev_name(disk)
4701
4702 with open(SUPPRESS_PREFIX + base, 'w') as f: # noqa
4703 pass
4704 LOG.info('set suppress flag on %s', base)
4705
4706
4707 def unset_suppress(path):
4708 disk = os.path.realpath(path)
4709 if not os.path.exists(disk):
4710 raise Error('does not exist', path)
4711 if not ldev_is_diskdevice(path):
4712 raise Error('not a block device', path)
4713 assert disk.startswith('/dev/')
4714 base = get_dev_name(disk)
4715
4716 fn = SUPPRESS_PREFIX + base # noqa
4717 if not os.path.exists(fn):
4718 raise Error('not marked as suppressed', path)
4719
4720 try:
4721 os.unlink(fn)
4722 LOG.info('unset suppress flag on %s', base)
4723 except OSError as e:
4724 raise Error('failed to unsuppress', e)
4725
4726
4727 def main_suppress(args):
4728 set_suppress(args.path)
4729
4730
4731 def main_unsuppress(args):
4732 unset_suppress(args.path)
4733
4734
4735 def main_zap(args):
4736 for dev in args.dev:
4737 zap(dev)
4738
4739
4740 def main_trigger(args):
4741 LOG.debug("main_trigger: " + str(args))
4742 if is_systemd() and not args.sync:
4743 # http://www.freedesktop.org/software/systemd/man/systemd-escape.html
4744 escaped_dev = args.dev[1:].replace('-', '\\x2d')
4745 service = 'ceph-disk@{dev}.service'.format(dev=escaped_dev)
4746 LOG.info('systemd detected, triggering %s' % service)
4747 command(
4748 [
4749 'systemctl',
4750 '--no-block',
4751 'restart',
4752 service,
4753 ]
4754 )
4755 return
4756 if is_upstart() and not args.sync:
4757 LOG.info('upstart detected, triggering ceph-disk task')
4758 command(
4759 [
4760 'initctl',
4761 'emit',
4762 'ceph-disk',
4763 'dev={dev}'.format(dev=args.dev),
4764 'pid={pid}'.format(pid=os.getpid()),
4765 ]
4766 )
4767 return
4768
4769 if get_ceph_user() == 'ceph':
4770 command_check_call(['chown', 'ceph:ceph', args.dev])
4771 parttype = get_partition_type(args.dev)
4772 partid = get_partition_uuid(args.dev)
4773
4774 LOG.info('trigger {dev} parttype {parttype} uuid {partid}'.format(
4775 dev=args.dev,
4776 parttype=parttype,
4777 partid=partid,
4778 ))
4779
4780 ceph_disk = ['ceph-disk']
4781 if args.verbose:
4782 ceph_disk.append('--verbose')
4783
4784 if parttype in (PTYPE['regular']['osd']['ready'],
4785 PTYPE['mpath']['osd']['ready']):
4786 out, err, ret = command(
4787 ceph_disk +
4788 [
4789 'activate',
4790 args.dev,
4791 ]
4792 )
4793
4794 elif parttype in (PTYPE['plain']['osd']['ready'],
4795 PTYPE['luks']['osd']['ready']):
4796 out, err, ret = command(
4797 ceph_disk +
4798 [
4799 'activate',
4800 '--dmcrypt',
4801 args.dev,
4802 ]
4803 )
4804
4805 elif parttype in (PTYPE['regular']['journal']['ready'],
4806 PTYPE['mpath']['journal']['ready']):
4807 out, err, ret = command(
4808 ceph_disk +
4809 [
4810 'activate-journal',
4811 args.dev,
4812 ]
4813 )
4814
4815 elif parttype in (PTYPE['plain']['journal']['ready'],
4816 PTYPE['luks']['journal']['ready']):
4817 out, err, ret = command(
4818 ceph_disk +
4819 [
4820 'activate-journal',
4821 '--dmcrypt',
4822 args.dev,
4823 ]
4824 )
4825
4826 elif parttype in (PTYPE['regular']['block']['ready'],
4827 PTYPE['regular']['block.db']['ready'],
4828 PTYPE['regular']['block.wal']['ready'],
4829 PTYPE['mpath']['block']['ready'],
4830 PTYPE['mpath']['block.db']['ready'],
4831 PTYPE['mpath']['block.wal']['ready']):
4832 out, err, ret = command(
4833 ceph_disk +
4834 [
4835 'activate-block',
4836 args.dev,
4837 ]
4838 )
4839
4840 elif parttype in (PTYPE['plain']['block']['ready'],
4841 PTYPE['plain']['block.db']['ready'],
4842 PTYPE['plain']['block.wal']['ready'],
4843 PTYPE['luks']['block']['ready'],
4844 PTYPE['luks']['block.db']['ready'],
4845 PTYPE['luks']['block.wal']['ready']):
4846 out, err, ret = command(
4847 ceph_disk +
4848 [
4849 'activate-block',
4850 '--dmcrypt',
4851 args.dev,
4852 ]
4853 )
4854
4855 elif parttype in (PTYPE['regular']['lockbox']['ready'],
4856 PTYPE['mpath']['lockbox']['ready']):
4857 out, err, ret = command(
4858 ceph_disk +
4859 [
4860 'activate-lockbox',
4861 args.dev,
4862 ]
4863 )
4864
4865 else:
4866 raise Error('unrecognized partition type %s' % parttype)
4867
4868 if ret != 0:
4869 LOG.info(out)
4870 LOG.error(err)
4871 raise Error('return code ' + str(ret))
4872 else:
4873 LOG.debug(out)
4874 LOG.debug(err)
4875
4876
4877 def main_fix(args):
4878 # A hash table containing 'path': ('uid', 'gid', blocking, recursive)
4879 fix_table = [
4880 ('/usr/bin/ceph-mon', 'root', ROOTGROUP, True, False),
4881 ('/usr/bin/ceph-mds', 'root', ROOTGROUP, True, False),
4882 ('/usr/bin/ceph-osd', 'root', ROOTGROUP, True, False),
4883 ('/usr/bin/radosgw', 'root', ROOTGROUP, True, False),
4884 ('/etc/ceph', 'root', ROOTGROUP, True, True),
4885 ('/var/run/ceph', 'ceph', 'ceph', True, True),
4886 ('/var/log/ceph', 'ceph', 'ceph', True, True),
4887 ('/var/log/radosgw', 'ceph', 'ceph', True, True),
4888 ('/var/lib/ceph', 'ceph', 'ceph', True, False),
4889 ]
4890
4891 # Relabel/chown all files under /var/lib/ceph/ recursively (except for osd)
4892 for directory in glob.glob('/var/lib/ceph/*'):
4893 if directory == '/var/lib/ceph/osd':
4894 fix_table.append((directory, 'ceph', 'ceph', True, False))
4895 else:
4896 fix_table.append((directory, 'ceph', 'ceph', True, True))
4897
4898 # Relabel/chown the osds recursively and in parallel
4899 for directory in glob.glob('/var/lib/ceph/osd/*'):
4900 fix_table.append((directory, 'ceph', 'ceph', False, True))
4901
4902 LOG.debug("fix_table: " + str(fix_table))
4903
4904 # The lists of background processes
4905 all_processes = []
4906 permissions_processes = []
4907 selinux_processes = []
4908
4909 # Preliminary checks
4910 if args.selinux or args.all:
4911 out, err, ret = command(['selinuxenabled'])
4912 if ret:
4913 LOG.error('SELinux is not enabled, please enable it, first.')
4914 raise Error('no SELinux')
4915
4916 for daemon in ['ceph-mon', 'ceph-osd', 'ceph-mds', 'radosgw', 'ceph-mgr']:
4917 out, err, ret = command(['pgrep', daemon])
4918 if ret == 0:
4919 LOG.error(daemon + ' is running, please stop it, first')
4920 raise Error(daemon + ' running')
4921
4922 # Relabel the basic system data without the ceph files
4923 if args.system or args.all:
4924 c = ['restorecon', '-R', '/']
4925 for directory, _, _, _, _ in fix_table:
4926 # Skip /var/lib/ceph subdirectories
4927 if directory.startswith('/var/lib/ceph/'):
4928 continue
4929 c.append('-e')
4930 c.append(directory)
4931
4932 out, err, ret = command(c)
4933
4934 if ret:
4935 LOG.error("Failed to restore labels of the underlying system")
4936 LOG.error(err)
4937 raise Error("basic restore failed")
4938
4939 # Use find to relabel + chown ~simultaenously
4940 if args.all:
4941 for directory, uid, gid, blocking, recursive in fix_table:
4942 # Skip directories/files that are not installed
4943 if not os.access(directory, os.F_OK):
4944 continue
4945
4946 c = [
4947 'find',
4948 directory,
4949 '-exec',
4950 'chown',
4951 ':'.join((uid, gid)),
4952 '{}',
4953 '+',
4954 '-exec',
4955 'restorecon',
4956 '{}',
4957 '+',
4958 ]
4959
4960 # Just pass -maxdepth 0 for non-recursive calls
4961 if not recursive:
4962 c += ['-maxdepth', '0']
4963
4964 if blocking:
4965 out, err, ret = command(c)
4966
4967 if ret:
4968 LOG.error("Failed to fix " + directory)
4969 LOG.error(err)
4970 raise Error(directory + " fix failed")
4971 else:
4972 all_processes.append(command_init(c))
4973
4974 LOG.debug("all_processes: " + str(all_processes))
4975 for process in all_processes:
4976 out, err, ret = command_wait(process)
4977 if ret:
4978 LOG.error("A background find process failed")
4979 LOG.error(err)
4980 raise Error("background failed")
4981
4982 # Fix permissions
4983 if args.permissions:
4984 for directory, uid, gid, blocking, recursive in fix_table:
4985 # Skip directories/files that are not installed
4986 if not os.access(directory, os.F_OK):
4987 continue
4988
4989 if recursive:
4990 c = [
4991 'chown',
4992 '-R',
4993 ':'.join((uid, gid)),
4994 directory
4995 ]
4996 else:
4997 c = [
4998 'chown',
4999 ':'.join((uid, gid)),
5000 directory
5001 ]
5002
5003 if blocking:
5004 out, err, ret = command(c)
5005
5006 if ret:
5007 LOG.error("Failed to chown " + directory)
5008 LOG.error(err)
5009 raise Error(directory + " chown failed")
5010 else:
5011 permissions_processes.append(command_init(c))
5012
5013 LOG.debug("permissions_processes: " + str(permissions_processes))
5014 for process in permissions_processes:
5015 out, err, ret = command_wait(process)
5016 if ret:
5017 LOG.error("A background permissions process failed")
5018 LOG.error(err)
5019 raise Error("background failed")
5020
5021 # Fix SELinux labels
5022 if args.selinux:
5023 for directory, uid, gid, blocking, recursive in fix_table:
5024 # Skip directories/files that are not installed
5025 if not os.access(directory, os.F_OK):
5026 continue
5027
5028 if recursive:
5029 c = [
5030 'restorecon',
5031 '-R',
5032 directory
5033 ]
5034 else:
5035 c = [
5036 'restorecon',
5037 directory
5038 ]
5039
5040 if blocking:
5041 out, err, ret = command(c)
5042
5043 if ret:
5044 LOG.error("Failed to restore labels for " + directory)
5045 LOG.error(err)
5046 raise Error(directory + " relabel failed")
5047 else:
5048 selinux_processes.append(command_init(c))
5049
5050 LOG.debug("selinux_processes: " + str(selinux_processes))
5051 for process in selinux_processes:
5052 out, err, ret = command_wait(process)
5053 if ret:
5054 LOG.error("A background selinux process failed")
5055 LOG.error(err)
5056 raise Error("background failed")
5057
5058 LOG.info(
5059 "The ceph files has been fixed, please reboot "
5060 "the system for the changes to take effect."
5061 )
5062
5063
5064 def setup_statedir(dir):
5065 # XXX The following use of globals makes linting
5066 # really hard. Global state in Python is iffy and
5067 # should be avoided.
5068 global STATEDIR
5069 STATEDIR = dir
5070
5071 if not os.path.exists(STATEDIR):
5072 os.mkdir(STATEDIR)
5073 if not os.path.exists(STATEDIR + "/tmp"):
5074 os.mkdir(STATEDIR + "/tmp")
5075
5076 global prepare_lock
5077 prepare_lock = FileLock(STATEDIR + '/tmp/ceph-disk.prepare.lock')
5078
5079 global activate_lock
5080 activate_lock = FileLock(STATEDIR + '/tmp/ceph-disk.activate.lock')
5081
5082 global SUPPRESS_PREFIX
5083 SUPPRESS_PREFIX = STATEDIR + '/tmp/suppress-activate.'
5084
5085
5086 def setup_sysconfdir(dir):
5087 global SYSCONFDIR
5088 SYSCONFDIR = dir
5089
5090
5091 def parse_args(argv):
5092 parser = argparse.ArgumentParser(
5093 'ceph-disk',
5094 )
5095 parser.add_argument(
5096 '-v', '--verbose',
5097 action='store_true', default=None,
5098 help='be more verbose',
5099 )
5100 parser.add_argument(
5101 '--log-stdout',
5102 action='store_true', default=None,
5103 help='log to stdout',
5104 )
5105 parser.add_argument(
5106 '--prepend-to-path',
5107 metavar='PATH',
5108 default='/usr/bin',
5109 help=('prepend PATH to $PATH for backward compatibility '
5110 '(default /usr/bin)'),
5111 )
5112 parser.add_argument(
5113 '--statedir',
5114 metavar='PATH',
5115 default='/var/lib/ceph',
5116 help=('directory in which ceph state is preserved '
5117 '(default /var/lib/ceph)'),
5118 )
5119 parser.add_argument(
5120 '--sysconfdir',
5121 metavar='PATH',
5122 default='/etc/ceph',
5123 help=('directory in which ceph configuration files are found '
5124 '(default /etc/ceph)'),
5125 )
5126 parser.add_argument(
5127 '--setuser',
5128 metavar='USER',
5129 default=None,
5130 help='use the given user for subprocesses, rather than ceph or root'
5131 )
5132 parser.add_argument(
5133 '--setgroup',
5134 metavar='GROUP',
5135 default=None,
5136 help='use the given group for subprocesses, rather than ceph or root'
5137 )
5138 parser.set_defaults(
5139 # we want to hold on to this, for later
5140 prog=parser.prog,
5141 )
5142
5143 subparsers = parser.add_subparsers(
5144 title='subcommands',
5145 description='valid subcommands',
5146 help='sub-command help',
5147 )
5148
5149 Prepare.set_subparser(subparsers)
5150 make_activate_parser(subparsers)
5151 make_activate_lockbox_parser(subparsers)
5152 make_activate_block_parser(subparsers)
5153 make_activate_journal_parser(subparsers)
5154 make_activate_all_parser(subparsers)
5155 make_list_parser(subparsers)
5156 make_suppress_parser(subparsers)
5157 make_deactivate_parser(subparsers)
5158 make_destroy_parser(subparsers)
5159 make_zap_parser(subparsers)
5160 make_trigger_parser(subparsers)
5161 make_fix_parser(subparsers)
5162
5163 args = parser.parse_args(argv)
5164 return args
5165
5166
5167 def make_fix_parser(subparsers):
5168 fix_parser = subparsers.add_parser(
5169 'fix',
5170 formatter_class=argparse.RawDescriptionHelpFormatter,
5171 description=textwrap.fill(textwrap.dedent("""\
5172 """)),
5173 help='fix SELinux labels and/or file permissions')
5174
5175 fix_parser.add_argument(
5176 '--system',
5177 action='store_true',
5178 default=False,
5179 help='fix SELinux labels for the non-ceph system data'
5180 )
5181 fix_parser.add_argument(
5182 '--selinux',
5183 action='store_true',
5184 default=False,
5185 help='fix SELinux labels for ceph data'
5186 )
5187 fix_parser.add_argument(
5188 '--permissions',
5189 action='store_true',
5190 default=False,
5191 help='fix file permissions for ceph data'
5192 )
5193 fix_parser.add_argument(
5194 '--all',
5195 action='store_true',
5196 default=False,
5197 help='perform all the fix-related operations'
5198 )
5199 fix_parser.set_defaults(
5200 func=main_fix,
5201 )
5202 return fix_parser
5203
5204
5205 def make_trigger_parser(subparsers):
5206 trigger_parser = subparsers.add_parser(
5207 'trigger',
5208 formatter_class=argparse.RawDescriptionHelpFormatter,
5209 description=textwrap.fill(textwrap.dedent("""\
5210 The partition given in argument is activated. The type of the
5211 partition (data, lockbox, journal etc.) is detected by its
5212 type. If the init system is upstart or systemd, the activation is
5213 delegated to it and runs asynchronously, which
5214 helps reduce the execution time of udev actions.
5215 """)),
5216 help='activate any device (called by udev)')
5217 trigger_parser.add_argument(
5218 'dev',
5219 help=('device'),
5220 )
5221 trigger_parser.add_argument(
5222 '--cluster',
5223 metavar='NAME',
5224 default='ceph',
5225 help='cluster name to assign this disk to',
5226 )
5227 trigger_parser.add_argument(
5228 '--dmcrypt',
5229 action='store_true', default=None,
5230 help='map devices with dm-crypt',
5231 )
5232 trigger_parser.add_argument(
5233 '--dmcrypt-key-dir',
5234 metavar='KEYDIR',
5235 default='/etc/ceph/dmcrypt-keys',
5236 help='directory where dm-crypt keys are stored',
5237 )
5238 trigger_parser.add_argument(
5239 '--sync',
5240 action='store_true', default=None,
5241 help='do operation synchronously; do not trigger systemd',
5242 )
5243 trigger_parser.set_defaults(
5244 func=main_trigger,
5245 )
5246 return trigger_parser
5247
5248
5249 def make_activate_parser(subparsers):
5250 activate_parser = subparsers.add_parser(
5251 'activate',
5252 formatter_class=argparse.RawDescriptionHelpFormatter,
5253 description=textwrap.fill(textwrap.dedent("""\
5254 Activate the OSD found at PATH (can be a directory
5255 or a device partition, possibly encrypted). When
5256 activated for the first time, a unique OSD id is obtained
5257 from the cluster. If PATH is a directory, a symbolic
5258 link is added in {statedir}/osd/ceph-$id. If PATH is
5259 a partition, it is mounted on {statedir}/osd/ceph-$id.
5260 Finally, the OSD daemon is run.
5261
5262 If the OSD depends on auxiliary partitions (journal, block, ...)
5263 they need to be available otherwise activation will fail. It
5264 may happen if a journal is encrypted and cryptsetup was not
5265 run yet.
5266 """.format(statedir=STATEDIR))),
5267 help='Activate a Ceph OSD')
5268 activate_parser.add_argument(
5269 '--mount',
5270 action='store_true', default=None,
5271 help='mount a block device [deprecated, ignored]',
5272 )
5273 activate_parser.add_argument(
5274 '--activate-key',
5275 metavar='PATH',
5276 help='bootstrap-osd keyring path template (%(default)s)',
5277 dest='activate_key_template',
5278 )
5279 activate_parser.add_argument(
5280 '--mark-init',
5281 metavar='INITSYSTEM',
5282 help='init system to manage this dir',
5283 default='auto',
5284 choices=INIT_SYSTEMS,
5285 )
5286 activate_parser.add_argument(
5287 '--no-start-daemon',
5288 action='store_true', default=None,
5289 help='do not start the daemon',
5290 )
5291 activate_parser.add_argument(
5292 'path',
5293 metavar='PATH',
5294 help='path to block device or directory',
5295 )
5296 activate_parser.add_argument(
5297 '--dmcrypt',
5298 action='store_true', default=None,
5299 help='map DATA and/or JOURNAL devices with dm-crypt',
5300 )
5301 activate_parser.add_argument(
5302 '--dmcrypt-key-dir',
5303 metavar='KEYDIR',
5304 default='/etc/ceph/dmcrypt-keys',
5305 help='directory where dm-crypt keys are stored',
5306 )
5307 activate_parser.add_argument(
5308 '--reactivate',
5309 action='store_true', default=False,
5310 help='activate the deactived OSD',
5311 )
5312 activate_parser.set_defaults(
5313 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5314 func=main_activate,
5315 )
5316 return activate_parser
5317
5318
5319 def make_activate_lockbox_parser(subparsers):
5320 parser = subparsers.add_parser(
5321 'activate-lockbox',
5322 formatter_class=argparse.RawDescriptionHelpFormatter,
5323 description=textwrap.fill(textwrap.dedent("""\
5324 Mount the partition found at PATH on {statedir}/osd-lockbox/$uuid
5325 where $uuid uniquely identifies the OSD that needs this lockbox
5326 to retrieve keys from the monitor and unlock its partitions.
5327
5328 If the OSD has one or more auxiliary devices (journal, block, ...)
5329 symbolic links are created at {statedir}/osd-lockbox/$other_uuid
5330 and point to {statedir}/osd-lockbox/$uuid. This will, for instance,
5331 allow a journal encrypted in a partition identified by $other_uuid to
5332 fetch the keys it needs from the monitor.
5333
5334 Finally the OSD is activated, as it would be with ceph-disk activate.
5335 """.format(statedir=STATEDIR))),
5336 help='Activate a Ceph lockbox')
5337 parser.add_argument(
5338 '--activate-key',
5339 help='bootstrap-osd keyring path template (%(default)s)',
5340 dest='activate_key_template',
5341 )
5342 parser.add_argument(
5343 '--dmcrypt-key-dir',
5344 metavar='KEYDIR',
5345 default='/etc/ceph/dmcrypt-keys',
5346 help='directory where dm-crypt keys are stored',
5347 )
5348 parser.add_argument(
5349 'path',
5350 metavar='PATH',
5351 help='path to block device',
5352 )
5353 parser.set_defaults(
5354 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5355 func=main_activate_lockbox,
5356 )
5357 return parser
5358
5359
5360 def make_activate_block_parser(subparsers):
5361 return make_activate_space_parser('block', subparsers)
5362
5363
5364 def make_activate_journal_parser(subparsers):
5365 return make_activate_space_parser('journal', subparsers)
5366
5367
5368 def make_activate_space_parser(name, subparsers):
5369 activate_space_parser = subparsers.add_parser(
5370 'activate-%s' % name,
5371 formatter_class=argparse.RawDescriptionHelpFormatter,
5372 description=textwrap.fill(textwrap.dedent("""\
5373 Activating a {name} partition is only meaningfull
5374 if it is encrypted and it will map it using
5375 cryptsetup.
5376
5377 Finally the corresponding OSD is activated,
5378 as it would be with ceph-disk activate.
5379 """.format(name=name))),
5380 help='Activate an OSD via its %s device' % name)
5381 activate_space_parser.add_argument(
5382 'dev',
5383 metavar='DEV',
5384 help='path to %s block device' % name,
5385 )
5386 activate_space_parser.add_argument(
5387 '--activate-key',
5388 metavar='PATH',
5389 help='bootstrap-osd keyring path template (%(default)s)',
5390 dest='activate_key_template',
5391 )
5392 activate_space_parser.add_argument(
5393 '--mark-init',
5394 metavar='INITSYSTEM',
5395 help='init system to manage this dir',
5396 default='auto',
5397 choices=INIT_SYSTEMS,
5398 )
5399 activate_space_parser.add_argument(
5400 '--dmcrypt',
5401 action='store_true', default=None,
5402 help=('map data and/or auxiliariy (journal, etc.) '
5403 'devices with dm-crypt'),
5404 )
5405 activate_space_parser.add_argument(
5406 '--dmcrypt-key-dir',
5407 metavar='KEYDIR',
5408 default='/etc/ceph/dmcrypt-keys',
5409 help='directory where dm-crypt keys are stored',
5410 )
5411 activate_space_parser.add_argument(
5412 '--reactivate',
5413 action='store_true', default=False,
5414 help='activate the deactived OSD',
5415 )
5416 activate_space_parser.set_defaults(
5417 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5418 func=lambda args: main_activate_space(name, args),
5419 )
5420 return activate_space_parser
5421
5422
5423 def make_activate_all_parser(subparsers):
5424 activate_all_parser = subparsers.add_parser(
5425 'activate-all',
5426 formatter_class=argparse.RawDescriptionHelpFormatter,
5427 description=textwrap.fill(textwrap.dedent("""\
5428 Activate all OSD partitions found in /dev/disk/by-parttypeuuid.
5429 The partitions containing auxiliary devices (journal, block, ...)
5430 are not activated.
5431 """)),
5432 help='Activate all tagged OSD partitions')
5433 activate_all_parser.add_argument(
5434 '--activate-key',
5435 metavar='PATH',
5436 help='bootstrap-osd keyring path template (%(default)s)',
5437 dest='activate_key_template',
5438 )
5439 activate_all_parser.add_argument(
5440 '--mark-init',
5441 metavar='INITSYSTEM',
5442 help='init system to manage this dir',
5443 default='auto',
5444 choices=INIT_SYSTEMS,
5445 )
5446 activate_all_parser.set_defaults(
5447 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5448 func=main_activate_all,
5449 )
5450 return activate_all_parser
5451
5452
5453 def make_list_parser(subparsers):
5454 list_parser = subparsers.add_parser(
5455 'list',
5456 formatter_class=argparse.RawDescriptionHelpFormatter,
5457 description=textwrap.fill(textwrap.dedent("""\
5458 Display all partitions on the system and their
5459 associated Ceph information, if any.
5460 """)),
5461 help='List disks, partitions, and Ceph OSDs')
5462 list_parser.add_argument(
5463 '--format',
5464 help='output format',
5465 default='plain',
5466 choices=['json', 'plain'],
5467 )
5468 list_parser.add_argument(
5469 'path',
5470 metavar='PATH',
5471 nargs='*',
5472 help='path to block devices, relative to /sys/block',
5473 )
5474 list_parser.set_defaults(
5475 func=main_list,
5476 )
5477 return list_parser
5478
5479
5480 def make_suppress_parser(subparsers):
5481 suppress_parser = subparsers.add_parser(
5482 'suppress-activate',
5483 formatter_class=argparse.RawDescriptionHelpFormatter,
5484 description=textwrap.fill(textwrap.dedent("""\
5485 Add a prefix to the list of suppressed device names
5486 so that they are ignored by all activate* subcommands.
5487 """)),
5488 help='Suppress activate on a device (prefix)')
5489 suppress_parser.add_argument(
5490 'path',
5491 metavar='PATH',
5492 help='path to block device or directory',
5493 )
5494 suppress_parser.set_defaults(
5495 func=main_suppress,
5496 )
5497
5498 unsuppress_parser = subparsers.add_parser(
5499 'unsuppress-activate',
5500 formatter_class=argparse.RawDescriptionHelpFormatter,
5501 description=textwrap.fill(textwrap.dedent("""\
5502 Remove a prefix from the list of suppressed device names
5503 so that they are no longer ignored by all
5504 activate* subcommands.
5505 """)),
5506 help='Stop suppressing activate on a device (prefix)')
5507 unsuppress_parser.add_argument(
5508 'path',
5509 metavar='PATH',
5510 help='path to block device or directory',
5511 )
5512 unsuppress_parser.set_defaults(
5513 func=main_unsuppress,
5514 )
5515 return suppress_parser
5516
5517
5518 def make_deactivate_parser(subparsers):
5519 deactivate_parser = subparsers.add_parser(
5520 'deactivate',
5521 formatter_class=argparse.RawDescriptionHelpFormatter,
5522 description=textwrap.fill(textwrap.dedent("""\
5523 Deactivate the OSD located at PATH. It stops the OSD daemon
5524 and optionally marks it out (with --mark-out). The content of
5525 the OSD is left untouched.
5526
5527 By default, the, ready, active, INIT-specific files are
5528 removed (so that it is not automatically re-activated by the
5529 udev rules or ceph-disk trigger) and the file deactive is
5530 created to remember the OSD is deactivated.
5531
5532 If the --once option is given, the ready, active, INIT-specific
5533 files are not removed and the OSD will reactivate whenever
5534 ceph-disk trigger is run on one of the devices (journal, data,
5535 block, lockbox, ...).
5536
5537 If the OSD is dmcrypt, remove the data dmcrypt map. When
5538 deactivate finishes, the OSD is down.
5539 """)),
5540 help='Deactivate a Ceph OSD')
5541 deactivate_parser.add_argument(
5542 '--cluster',
5543 metavar='NAME',
5544 default='ceph',
5545 help='cluster name to assign this disk to',
5546 )
5547 deactivate_parser.add_argument(
5548 'path',
5549 metavar='PATH',
5550 nargs='?',
5551 help='path to block device or directory',
5552 )
5553 deactivate_parser.add_argument(
5554 '--deactivate-by-id',
5555 metavar='<id>',
5556 help='ID of OSD to deactive'
5557 )
5558 deactivate_parser.add_argument(
5559 '--mark-out',
5560 action='store_true', default=False,
5561 help='option to mark the osd out',
5562 )
5563 deactivate_parser.add_argument(
5564 '--once',
5565 action='store_true', default=False,
5566 help='does not need --reactivate to activate again',
5567 )
5568 deactivate_parser.set_defaults(
5569 func=main_deactivate,
5570 )
5571
5572
5573 def make_destroy_parser(subparsers):
5574 destroy_parser = subparsers.add_parser(
5575 'destroy',
5576 formatter_class=argparse.RawDescriptionHelpFormatter,
5577 description=textwrap.fill(textwrap.dedent("""\ Destroy the OSD located at PATH. It removes the OSD from the
5578 cluster and marks it destroyed. An OSD must be down before it
5579 can be destroyed. Once it is destroyed, a new OSD can be created
5580 in its place, reusing the same OSD id and position (e.g. after
5581 a failed HDD or SSD is replaced). Alternatively, if the
5582 --purge option is also specified, the OSD is removed from the
5583 CRUSH map and the OSD id is deallocated.""")),
5584 help='Destroy a Ceph OSD')
5585 destroy_parser.add_argument(
5586 '--cluster',
5587 metavar='NAME',
5588 default='ceph',
5589 help='cluster name to assign this disk to',
5590 )
5591 destroy_parser.add_argument(
5592 'path',
5593 metavar='PATH',
5594 nargs='?',
5595 help='path to block device or directory',
5596 )
5597 destroy_parser.add_argument(
5598 '--destroy-by-id',
5599 metavar='<id>',
5600 help='ID of OSD to destroy'
5601 )
5602 destroy_parser.add_argument(
5603 '--dmcrypt-key-dir',
5604 metavar='KEYDIR',
5605 default='/etc/ceph/dmcrypt-keys',
5606 help=('directory where dm-crypt keys are stored '
5607 '(If you don\'t know how it work, '
5608 'dont use it. we have default value)'),
5609 )
5610 destroy_parser.add_argument(
5611 '--zap',
5612 action='store_true', default=False,
5613 help='option to erase data and partition',
5614 )
5615 destroy_parser.add_argument(
5616 '--purge',
5617 action='store_true', default=False,
5618 help='option to remove OSD from CRUSH map and deallocate the id',
5619 )
5620 destroy_parser.set_defaults(
5621 func=main_destroy,
5622 )
5623
5624
5625 def make_zap_parser(subparsers):
5626 zap_parser = subparsers.add_parser(
5627 'zap',
5628 formatter_class=argparse.RawDescriptionHelpFormatter,
5629 description=textwrap.fill(textwrap.dedent("""\
5630 Zap/erase/destroy a device's partition table and contents. It
5631 actually uses sgdisk and it's option --zap-all to
5632 destroy both GPT and MBR data structures so that the disk
5633 becomes suitable for repartitioning.
5634 """)),
5635 help='Zap/erase/destroy a device\'s partition table (and contents)')
5636 zap_parser.add_argument(
5637 'dev',
5638 metavar='DEV',
5639 nargs='+',
5640 help='path to block device',
5641 )
5642 zap_parser.set_defaults(
5643 func=main_zap,
5644 )
5645 return zap_parser
5646
5647
5648 def main(argv):
5649 args = parse_args(argv)
5650
5651 setup_logging(args.verbose, args.log_stdout)
5652
5653 if args.prepend_to_path != '':
5654 path = os.environ.get('PATH', os.defpath)
5655 os.environ['PATH'] = args.prepend_to_path + ":" + path
5656
5657 if args.func.__name__ != 'main_trigger':
5658 # trigger may run when statedir is unavailable and does not use it
5659 setup_statedir(args.statedir)
5660 setup_sysconfdir(args.sysconfdir)
5661
5662 global CEPH_PREF_USER
5663 CEPH_PREF_USER = args.setuser
5664 global CEPH_PREF_GROUP
5665 CEPH_PREF_GROUP = args.setgroup
5666
5667 if args.verbose:
5668 args.func(args)
5669 else:
5670 main_catch(args.func, args)
5671
5672
5673 def setup_logging(verbose, log_stdout):
5674 loglevel = logging.WARNING
5675 if verbose:
5676 loglevel = logging.DEBUG
5677
5678 if log_stdout:
5679 ch = logging.StreamHandler(stream=sys.stdout)
5680 ch.setLevel(loglevel)
5681 formatter = logging.Formatter('%(funcName)s: %(message)s')
5682 ch.setFormatter(formatter)
5683 LOG.addHandler(ch)
5684 LOG.setLevel(loglevel)
5685 else:
5686 logging.basicConfig(
5687 level=loglevel,
5688 format='%(funcName)s: %(message)s',
5689 )
5690
5691
5692 def main_catch(func, args):
5693
5694 try:
5695 func(args)
5696
5697 except Error as e:
5698 raise SystemExit(
5699 '{prog}: {msg}'.format(
5700 prog=args.prog,
5701 msg=e,
5702 )
5703 )
5704
5705 except CephDiskException as error:
5706 exc_name = error.__class__.__name__
5707 raise SystemExit(
5708 '{prog} {exc_name}: {msg}'.format(
5709 prog=args.prog,
5710 exc_name=exc_name,
5711 msg=error,
5712 )
5713 )
5714
5715
5716 def run():
5717 main(sys.argv[1:])
5718
5719
5720 if __name__ == '__main__':
5721 main(sys.argv[1:])
5722 warned_about = {}