]> git.proxmox.com Git - ceph.git/blob - ceph/src/ceph-disk/ceph_disk/main.py
update sources to 12.2.7
[ceph.git] / ceph / src / ceph-disk / ceph_disk / main.py
1 #!/usr/bin/env python
2 #
3 # Copyright (C) 2015, 2016, 2017 Red Hat <contact@redhat.com>
4 # Copyright (C) 2014 Inktank <info@inktank.com>
5 # Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
6 # Copyright (C) 2014 Catalyst.net Ltd
7 #
8 # Author: Loic Dachary <loic@dachary.org>
9 #
10 # This program is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU Library Public License as published by
12 # the Free Software Foundation; either version 2, or (at your option)
13 # any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU Library Public License for more details.
19 #
20
21 from __future__ import print_function
22
23 import argparse
24 import base64
25 import errno
26 import fcntl
27 import functools
28 import json
29 import logging
30 import os
31 import platform
32 import re
33 import subprocess
34 import stat
35 import sys
36 import tempfile
37 import uuid
38 import time
39 import shlex
40 import shutil
41 import pwd
42 import grp
43 import textwrap
44 import glob
45
46 CEPH_OSD_ONDISK_MAGIC = 'ceph osd volume v026'
47 CEPH_LOCKBOX_ONDISK_MAGIC = 'ceph lockbox volume v001'
48
49 KEY_MANAGEMENT_MODE_V1 = 'ceph-mon v1'
50
51 PTYPE = {
52 'regular': {
53 'journal': {
54 # identical because creating a journal is atomic
55 'ready': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
56 'tobe': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
57 },
58 'block': {
59 # identical because creating a block is atomic
60 'ready': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
61 'tobe': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
62 },
63 'block.db': {
64 # identical because creating a block is atomic
65 'ready': '30cd0809-c2b2-499c-8879-2d6b78529876',
66 'tobe': '30cd0809-c2b2-499c-8879-2d6b785292be',
67 },
68 'block.wal': {
69 # identical because creating a block is atomic
70 'ready': '5ce17fce-4087-4169-b7ff-056cc58473f9',
71 'tobe': '5ce17fce-4087-4169-b7ff-056cc58472be',
72 },
73 'osd': {
74 'ready': '4fbd7e29-9d25-41b8-afd0-062c0ceff05d',
75 'tobe': '89c57f98-2fe5-4dc0-89c1-f3ad0ceff2be',
76 },
77 'lockbox': {
78 'ready': 'fb3aabf9-d25f-47cc-bf5e-721d1816496b',
79 'tobe': 'fb3aabf9-d25f-47cc-bf5e-721d181642be',
80 },
81 },
82 'luks': {
83 'journal': {
84 'ready': '45b0969e-9b03-4f30-b4c6-35865ceff106',
85 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
86 },
87 'block': {
88 'ready': 'cafecafe-9b03-4f30-b4c6-35865ceff106',
89 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
90 },
91 'block.db': {
92 'ready': '166418da-c469-4022-adf4-b30afd37f176',
93 'tobe': '7521c784-4626-4260-bc8d-ba77a0f5f2be',
94 },
95 'block.wal': {
96 'ready': '86a32090-3647-40b9-bbbd-38d8c573aa86',
97 'tobe': '92dad30f-175b-4d40-a5b0-5c0a258b42be',
98 },
99 'osd': {
100 'ready': '4fbd7e29-9d25-41b8-afd0-35865ceff05d',
101 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
102 },
103 },
104 'plain': {
105 'journal': {
106 'ready': '45b0969e-9b03-4f30-b4c6-5ec00ceff106',
107 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
108 },
109 'block': {
110 'ready': 'cafecafe-9b03-4f30-b4c6-5ec00ceff106',
111 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
112 },
113 'block.db': {
114 'ready': '93b0052d-02d9-4d8a-a43b-33a3ee4dfbc3',
115 'tobe': '69d17c68-3e58-4399-aff0-b68265f2e2be',
116 },
117 'block.wal': {
118 'ready': '306e8683-4fe2-4330-b7c0-00a917c16966',
119 'tobe': 'f2d89683-a621-4063-964a-eb1f7863a2be',
120 },
121 'osd': {
122 'ready': '4fbd7e29-9d25-41b8-afd0-5ec00ceff05d',
123 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
124 },
125 },
126 'mpath': {
127 'journal': {
128 'ready': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
129 'tobe': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
130 },
131 'block': {
132 'ready': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
133 'tobe': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
134 },
135 'block.db': {
136 'ready': 'ec6d6385-e346-45dc-be91-da2a7c8b3261',
137 'tobe': 'ec6d6385-e346-45dc-be91-da2a7c8b32be',
138 },
139 'block.wal': {
140 'ready': '01b41e1b-002a-453c-9f17-88793989ff8f',
141 'tobe': '01b41e1b-002a-453c-9f17-88793989f2be',
142 },
143 'osd': {
144 'ready': '4fbd7e29-8ae0-4982-bf9d-5a8d867af560',
145 'tobe': '89c57f98-8ae0-4982-bf9d-5a8d867af560',
146 },
147 'lockbox': {
148 'ready': '7f4a666a-16f3-47a2-8445-152ef4d03f6c',
149 'tobe': '7f4a666a-16f3-47a2-8445-152ef4d032be',
150 },
151 },
152 }
153
154 try:
155 # see https://bugs.python.org/issue23098
156 os.major(0x80002b00)
157 except OverflowError:
158 os.major = lambda devid: ((devid >> 8) & 0xfff) | ((devid >> 32) & ~0xfff)
159 os.minor = lambda devid: (devid & 0xff) | ((devid >> 12) & ~0xff)
160
161
162 class Ptype(object):
163
164 @staticmethod
165 def get_ready_by_type(what):
166 return [x['ready'] for x in PTYPE[what].values()]
167
168 @staticmethod
169 def get_ready_by_name(name):
170 return [x[name]['ready'] for x in PTYPE.values() if name in x]
171
172 @staticmethod
173 def is_regular_space(ptype):
174 return Ptype.is_what_space('regular', ptype)
175
176 @staticmethod
177 def is_mpath_space(ptype):
178 return Ptype.is_what_space('mpath', ptype)
179
180 @staticmethod
181 def is_plain_space(ptype):
182 return Ptype.is_what_space('plain', ptype)
183
184 @staticmethod
185 def is_luks_space(ptype):
186 return Ptype.is_what_space('luks', ptype)
187
188 @staticmethod
189 def is_what_space(what, ptype):
190 for name in Space.NAMES:
191 if ptype == PTYPE[what][name]['ready']:
192 return True
193 return False
194
195 @staticmethod
196 def space_ptype_to_name(ptype):
197 for what in PTYPE.values():
198 for name in Space.NAMES:
199 if ptype == what[name]['ready']:
200 return name
201 raise ValueError('ptype ' + ptype + ' not found')
202
203 @staticmethod
204 def is_dmcrypt_space(ptype):
205 for name in Space.NAMES:
206 if Ptype.is_dmcrypt(ptype, name):
207 return True
208 return False
209
210 @staticmethod
211 def is_dmcrypt(ptype, name):
212 for what in ('plain', 'luks'):
213 if ptype == PTYPE[what][name]['ready']:
214 return True
215 return False
216
217
218 SYSFS = '/sys'
219
220 if platform.system() == 'FreeBSD':
221 FREEBSD = True
222 DEFAULT_FS_TYPE = 'zfs'
223 PROCDIR = '/compat/linux/proc'
224 # FreeBSD does not have blockdevices any more
225 BLOCKDIR = '/dev'
226 ROOTGROUP = 'wheel'
227 else:
228 FREEBSD = False
229 DEFAULT_FS_TYPE = 'xfs'
230 PROCDIR = '/proc'
231 BLOCKDIR = '/sys/block'
232 ROOTGROUP = 'root'
233
234 """
235 OSD STATUS Definition
236 """
237 OSD_STATUS_OUT_DOWN = 0
238 OSD_STATUS_OUT_UP = 1
239 OSD_STATUS_IN_DOWN = 2
240 OSD_STATUS_IN_UP = 3
241
242 MOUNT_OPTIONS = dict(
243 btrfs='noatime,user_subvol_rm_allowed',
244 # user_xattr is default ever since linux 2.6.39 / 3.0, but we'll
245 # delay a moment before removing it fully because we did have some
246 # issues with ext4 before the xatts-in-leveldb work, and it seemed
247 # that user_xattr helped
248 ext4='noatime,user_xattr',
249 xfs='noatime,inode64',
250 )
251
252 MKFS_ARGS = dict(
253 btrfs=[
254 # btrfs requires -f, for the same reason as xfs (see comment below)
255 '-f',
256 '-m', 'single',
257 '-l', '32768',
258 '-n', '32768',
259 ],
260 xfs=[
261 # xfs insists on not overwriting previous fs; even if we wipe
262 # partition table, we often recreate it exactly the same way,
263 # so we'll see ghosts of filesystems past
264 '-f',
265 '-i', 'size=2048',
266 ],
267 zfs=[
268 '-o', 'atime=off'
269 ],
270 )
271
272 INIT_SYSTEMS = [
273 'upstart',
274 'sysvinit',
275 'systemd',
276 'openrc',
277 'bsdrc',
278 'auto',
279 'none',
280 ]
281
282 STATEDIR = '/var/lib/ceph'
283
284 SYSCONFDIR = '/etc/ceph'
285
286 prepare_lock = None
287 activate_lock = None
288 SUPPRESS_PREFIX = None
289
290 # only warn once about some things
291 warned_about = {}
292
293 # Nuke the TERM variable to avoid confusing any subprocesses we call.
294 # For example, libreadline will print weird control sequences for some
295 # TERM values.
296 if 'TERM' in os.environ:
297 del os.environ['TERM']
298
299 LOG_NAME = __name__
300 if LOG_NAME == '__main__':
301 LOG_NAME = os.path.basename(sys.argv[0])
302 LOG = logging.getLogger(LOG_NAME)
303
304 # Allow user-preferred values for subprocess user and group
305 CEPH_PREF_USER = None
306 CEPH_PREF_GROUP = None
307
308
309 class FileLock(object):
310 def __init__(self, fn):
311 self.fn = fn
312 self.fd = None
313
314 def __enter__(self):
315 assert not self.fd
316 self.fd = os.open(self.fn, os.O_WRONLY | os.O_CREAT)
317 fcntl.lockf(self.fd, fcntl.LOCK_EX)
318
319 def __exit__(self, exc_type, exc_val, exc_tb):
320 assert self.fd
321 fcntl.lockf(self.fd, fcntl.LOCK_UN)
322 os.close(self.fd)
323 self.fd = None
324
325
326 class Error(Exception):
327 """
328 Error
329 """
330
331 def __str__(self):
332 doc = _bytes2str(self.__doc__.strip())
333 try:
334 str_type = basestring
335 except NameError:
336 str_type = str
337 args = [a if isinstance(a, str_type) else str(a) for a in self.args]
338 return ': '.join([doc] + [_bytes2str(a) for a in args])
339
340
341 class MountError(Error):
342 """
343 Mounting filesystem failed
344 """
345
346
347 class UnmountError(Error):
348 """
349 Unmounting filesystem failed
350 """
351
352
353 class BadMagicError(Error):
354 """
355 Does not look like a Ceph OSD, or incompatible version
356 """
357
358
359 class TruncatedLineError(Error):
360 """
361 Line is truncated
362 """
363
364
365 class TooManyLinesError(Error):
366 """
367 Too many lines
368 """
369
370
371 class FilesystemTypeError(Error):
372 """
373 Cannot discover filesystem type
374 """
375
376
377 class CephDiskException(Exception):
378 """
379 A base exception for ceph-disk to provide custom (ad-hoc) messages that
380 will be caught and dealt with when main() is executed
381 """
382 pass
383
384
385 class ExecutableNotFound(CephDiskException):
386 """
387 Exception to report on executables not available in PATH
388 """
389 pass
390
391
392 def is_systemd():
393 """
394 Detect whether systemd is running
395 """
396 with open(PROCDIR + '/1/comm', 'r') as f:
397 return 'systemd' in f.read()
398
399
400 def is_upstart():
401 """
402 Detect whether upstart is running
403 """
404 (out, err, _) = command(['init', '--version'])
405 return 'upstart' in out
406
407
408 def maybe_mkdir(*a, **kw):
409 """
410 Creates a new directory if it doesn't exist, removes
411 existing symlink before creating the directory.
412 """
413 # remove any symlink, if it is there..
414 if os.path.exists(*a) and stat.S_ISLNK(os.lstat(*a).st_mode):
415 LOG.debug('Removing old symlink at %s', *a)
416 os.unlink(*a)
417 try:
418 os.mkdir(*a, **kw)
419 except OSError as e:
420 if e.errno == errno.EEXIST:
421 pass
422 else:
423 raise
424
425
426 def which(executable):
427 """find the location of an executable"""
428 envpath = os.environ.get('PATH') or os.defpath
429 PATH = envpath.split(os.pathsep)
430
431 locations = PATH + [
432 '/usr/local/bin',
433 '/bin',
434 '/usr/bin',
435 '/usr/local/sbin',
436 '/usr/sbin',
437 '/sbin',
438 ]
439
440 for location in locations:
441 executable_path = os.path.join(location, executable)
442 if (os.path.isfile(executable_path) and
443 os.access(executable_path, os.X_OK)):
444 return executable_path
445
446
447 def _get_command_executable(arguments):
448 """
449 Return the full path for an executable, raise if the executable is not
450 found. If the executable has already a full path do not perform any checks.
451 """
452 if os.path.isabs(arguments[0]): # an absolute path
453 return arguments
454 executable = which(arguments[0])
455 if not executable:
456 command_msg = 'Could not run command: %s' % ' '.join(arguments)
457 executable_msg = '%s not in path.' % arguments[0]
458 raise ExecutableNotFound('%s %s' % (executable_msg, command_msg))
459
460 # swap the old executable for the new one
461 arguments[0] = executable
462 return arguments
463
464
465 def command(arguments, **kwargs):
466 """
467 Safely execute a ``subprocess.Popen`` call making sure that the
468 executable exists and raising a helpful error message
469 if it does not.
470
471 .. note:: This should be the preferred way of calling ``subprocess.Popen``
472 since it provides the caller with the safety net of making sure that
473 executables *will* be found and will error nicely otherwise.
474
475 This returns the output of the command and the return code of the
476 process in a tuple: (stdout, stderr, returncode).
477 """
478
479 arguments = list(map(_bytes2str, _get_command_executable(arguments)))
480
481 LOG.info('Running command: %s' % ' '.join(arguments))
482 process = subprocess.Popen(
483 arguments,
484 stdout=subprocess.PIPE,
485 stderr=subprocess.PIPE,
486 **kwargs)
487 out, err = process.communicate()
488
489 return _bytes2str(out), _bytes2str(err), process.returncode
490
491
492 def command_with_stdin(arguments, stdin):
493 LOG.info("Running command with stdin: " + " ".join(arguments))
494 process = subprocess.Popen(
495 arguments,
496 stdin=subprocess.PIPE,
497 stdout=subprocess.PIPE,
498 stderr=subprocess.PIPE)
499 out, err = process.communicate(stdin)
500 LOG.debug(out)
501 if process.returncode != 0:
502 LOG.error(err)
503 raise SystemExit(
504 "'{cmd}' failed with status code {returncode}".format(
505 cmd=arguments,
506 returncode=process.returncode,
507 )
508 )
509 return out
510
511
512 def _bytes2str(string):
513 return string.decode('utf-8') if isinstance(string, bytes) else string
514
515
516 def command_init(arguments, **kwargs):
517 """
518 Safely execute a non-blocking ``subprocess.Popen`` call
519 making sure that the executable exists and raising a helpful
520 error message if it does not.
521
522 .. note:: This should be the preferred way of calling ``subprocess.Popen``
523 since it provides the caller with the safety net of making sure that
524 executables *will* be found and will error nicely otherwise.
525
526 This returns the process.
527 """
528
529 arguments = list(map(_bytes2str, _get_command_executable(arguments)))
530
531 LOG.info('Running command: %s' % ' '.join(arguments))
532 process = subprocess.Popen(
533 arguments,
534 stdout=subprocess.PIPE,
535 stderr=subprocess.PIPE,
536 **kwargs)
537 return process
538
539
540 def command_wait(process):
541 """
542 Wait for the process finish and parse its output.
543 """
544
545 out, err = process.communicate()
546
547 return _bytes2str(out), _bytes2str(err), process.returncode
548
549
550 def command_check_call(arguments, exit=False):
551 """
552 Safely execute a ``subprocess.check_call`` call making sure that the
553 executable exists and raising a helpful error message if it does not.
554
555 When ``exit`` is set to ``True`` this helper will do a clean (sans
556 traceback) system exit.
557 .. note:: This should be the preferred way of calling
558 ``subprocess.check_call`` since it provides the caller with the safety net
559 of making sure that executables *will* be found and will error nicely
560 otherwise.
561 """
562 arguments = _get_command_executable(arguments)
563 command = ' '.join(arguments)
564 LOG.info('Running command: %s', command)
565 try:
566 return subprocess.check_call(arguments)
567 except subprocess.CalledProcessError as error:
568 if exit:
569 if error.output:
570 LOG.error(error.output)
571 raise SystemExit(
572 "'{cmd}' failed with status code {returncode}".format(
573 cmd=command,
574 returncode=error.returncode,
575 )
576 )
577 raise
578
579
580 #
581 # An alternative block_path implementation would be
582 #
583 # name = basename(dev)
584 # return /sys/devices/virtual/block/$name
585 #
586 # It is however more fragile because it relies on the fact
587 # that the basename of the device the user will use always
588 # matches the one the driver will use. On Ubuntu 14.04, for
589 # instance, when multipath creates a partition table on
590 #
591 # /dev/mapper/353333330000007d0 -> ../dm-0
592 #
593 # it will create partition devices named
594 #
595 # /dev/mapper/353333330000007d0-part1
596 #
597 # which is the same device as /dev/dm-1 but not a symbolic
598 # link to it:
599 #
600 # ubuntu@other:~$ ls -l /dev/mapper /dev/dm-1
601 # brw-rw---- 1 root disk 252, 1 Aug 15 17:52 /dev/dm-1
602 # lrwxrwxrwx 1 root root 7 Aug 15 17:52 353333330000007d0 -> ../dm-0
603 # brw-rw---- 1 root disk 252, 1 Aug 15 17:52 353333330000007d0-part1
604 #
605 # Using the basename in this case fails.
606 #
607
608
609 def block_path(dev):
610 if FREEBSD:
611 return dev
612 path = os.path.realpath(dev)
613 rdev = os.stat(path).st_rdev
614 (M, m) = (os.major(rdev), os.minor(rdev))
615 return "{sysfs}/dev/block/{M}:{m}".format(sysfs=SYSFS, M=M, m=m)
616
617
618 def get_dm_uuid(dev):
619 uuid_path = os.path.join(block_path(dev), 'dm', 'uuid')
620 LOG.debug("get_dm_uuid " + dev + " uuid path is " + uuid_path)
621 if not os.path.exists(uuid_path):
622 return False
623 uuid = open(uuid_path, 'r').read()
624 LOG.debug("get_dm_uuid " + dev + " uuid is " + uuid)
625 return uuid
626
627
628 def is_mpath(dev):
629 """
630 True if the path is managed by multipath
631 """
632 if FREEBSD:
633 return False
634 uuid = get_dm_uuid(dev)
635 return (uuid and
636 (re.match('part\d+-mpath-', uuid) or
637 re.match('mpath-', uuid)))
638
639
640 def get_dev_name(path):
641 """
642 get device name from path. e.g.::
643
644 /dev/sda -> sda, /dev/cciss/c0d1 -> cciss!c0d1
645
646 a device "name" is something like::
647
648 sdb
649 cciss!c0d1
650
651 """
652 assert path.startswith('/dev/')
653 base = path[5:]
654 return base.replace('/', '!')
655
656
657 def get_dev_path(name):
658 """
659 get a path (/dev/...) from a name (cciss!c0d1)
660 a device "path" is something like::
661
662 /dev/sdb
663 /dev/cciss/c0d1
664
665 """
666 return '/dev/' + name.replace('!', '/')
667
668
669 def get_dev_relpath(name):
670 """
671 get a relative path to /dev from a name (cciss!c0d1)
672 """
673 return name.replace('!', '/')
674
675
676 def get_dev_size(dev, size='megabytes'):
677 """
678 Attempt to get the size of a device so that we can prevent errors
679 from actions to devices that are smaller, and improve error reporting.
680
681 Because we want to avoid breakage in case this approach is not robust, we
682 will issue a warning if we failed to get the size.
683
684 :param size: bytes or megabytes
685 :param dev: the device to calculate the size
686 """
687 fd = os.open(dev, os.O_RDONLY)
688 dividers = {'bytes': 1, 'megabytes': 1024 * 1024}
689 try:
690 device_size = os.lseek(fd, 0, os.SEEK_END)
691 divider = dividers.get(size, 1024 * 1024) # default to megabytes
692 return device_size // divider
693 except Exception as error:
694 LOG.warning('failed to get size of %s: %s' % (dev, str(error)))
695 finally:
696 os.close(fd)
697
698
699 def stmode_is_diskdevice(dmode):
700 if stat.S_ISBLK(dmode):
701 return True
702 else:
703 # FreeBSD does not have block devices
704 # All disks are character devices
705 return FREEBSD and stat.S_ISCHR(dmode)
706
707
708 def dev_is_diskdevice(dev):
709 dmode = os.stat(dev).st_mode
710 return stmode_is_diskdevice(dmode)
711
712
713 def ldev_is_diskdevice(dev):
714 dmode = os.lstat(dev).st_mode
715 return stmode_is_diskdevice(dmode)
716
717
718 def path_is_diskdevice(path):
719 dev = os.path.realpath(path)
720 return dev_is_diskdevice(dev)
721
722
723 def get_partition_mpath(dev, pnum):
724 part_re = "part{pnum}-mpath-".format(pnum=pnum)
725 partitions = list_partitions_mpath(dev, part_re)
726 if partitions:
727 return partitions[0]
728 else:
729 return None
730
731
732 def retry(on_error=Exception, max_tries=10, wait=0.2, backoff=0):
733 def wrapper(func):
734 @functools.wraps(func)
735 def repeat(*args, **kwargs):
736 for tries in range(max_tries - 1):
737 try:
738 return func(*args, **kwargs)
739 except on_error:
740 time.sleep(wait + backoff * tries)
741 return func(*args, **kwargs)
742 return repeat
743 return wrapper
744
745
746 @retry(Error)
747 def get_partition_dev(dev, pnum):
748 """
749 get the device name for a partition
750
751 assume that partitions are named like the base dev,
752 with a number, and optionally
753 some intervening characters (like 'p'). e.g.,
754
755 sda 1 -> sda1
756 cciss/c0d1 1 -> cciss!c0d1p1
757 """
758 partname = None
759 error_msg = ""
760 if is_mpath(dev):
761 partname = get_partition_mpath(dev, pnum)
762 else:
763 name = get_dev_name(os.path.realpath(dev))
764 sys_entry = os.path.join(BLOCKDIR, name)
765 error_msg = " in %s" % sys_entry
766 for f in os.listdir(sys_entry):
767 if f.startswith(name) and f.endswith(str(pnum)):
768 # we want the shortest name that starts with the base name
769 # and ends with the partition number
770 if not partname or len(f) < len(partname):
771 partname = f
772 if partname:
773 return get_dev_path(partname)
774 else:
775 raise Error('partition %d for %s does not appear to exist%s' %
776 (pnum, dev, error_msg))
777
778
779 def list_all_partitions():
780 """
781 Return a list of devices and partitions
782 """
783 if not FREEBSD:
784 names = os.listdir(BLOCKDIR)
785 dev_part_list = {}
786 for name in names:
787 # /dev/fd0 may hang http://tracker.ceph.com/issues/6827
788 if re.match(r'^fd\d$', name):
789 continue
790 dev_part_list[name] = list_partitions(get_dev_path(name))
791 else:
792 with open(os.path.join(PROCDIR, "partitions")) as partitions:
793 for line in partitions:
794 columns = line.split()
795 if len(columns) >= 4:
796 name = columns[3]
797 dev_part_list[name] = list_partitions(get_dev_path(name))
798 return dev_part_list
799
800
801 def list_partitions(dev):
802 dev = os.path.realpath(dev)
803 if is_mpath(dev):
804 return list_partitions_mpath(dev)
805 else:
806 return list_partitions_device(dev)
807
808
809 def list_partitions_mpath(dev, part_re="part\d+-mpath-"):
810 p = block_path(dev)
811 partitions = []
812 holders = os.path.join(p, 'holders')
813 for holder in os.listdir(holders):
814 uuid_path = os.path.join(holders, holder, 'dm', 'uuid')
815 uuid = open(uuid_path, 'r').read()
816 LOG.debug("list_partitions_mpath: " + uuid_path + " uuid = " + uuid)
817 if re.match(part_re, uuid):
818 partitions.append(holder)
819 return partitions
820
821
822 def list_partitions_device(dev):
823 """
824 Return a list of partitions on the given device name
825 """
826 partitions = []
827 basename = get_dev_name(dev)
828 for name in os.listdir(block_path(dev)):
829 if name.startswith(basename):
830 partitions.append(name)
831 return partitions
832
833
834 def get_partition_base(dev):
835 """
836 Get the base device for a partition
837 """
838 dev = os.path.realpath(dev)
839 if not ldev_is_diskdevice(dev):
840 raise Error('not a block device', dev)
841
842 name = get_dev_name(dev)
843 if os.path.exists(os.path.join('/sys/block', name)):
844 raise Error('not a partition', dev)
845
846 # find the base
847 for basename in os.listdir('/sys/block'):
848 if os.path.exists(os.path.join('/sys/block', basename, name)):
849 return get_dev_path(basename)
850 raise Error('no parent device for partition', dev)
851
852
853 def is_partition_mpath(dev):
854 uuid = get_dm_uuid(dev)
855 return bool(re.match('part\d+-mpath-', uuid))
856
857
858 def partnum_mpath(dev):
859 uuid = get_dm_uuid(dev)
860 return re.findall('part(\d+)-mpath-', uuid)[0]
861
862
863 def get_partition_base_mpath(dev):
864 slave_path = os.path.join(block_path(dev), 'slaves')
865 slaves = os.listdir(slave_path)
866 assert slaves
867 name_path = os.path.join(slave_path, slaves[0], 'dm', 'name')
868 name = open(name_path, 'r').read().strip()
869 return os.path.join('/dev/mapper', name)
870
871
872 def is_partition(dev):
873 """
874 Check whether a given device path is a partition or a full disk.
875 """
876 if is_mpath(dev):
877 return is_partition_mpath(dev)
878
879 dev = os.path.realpath(dev)
880 st = os.lstat(dev)
881 if not stmode_is_diskdevice(st.st_mode):
882 raise Error('not a block device', dev)
883
884 name = get_dev_name(dev)
885 if os.path.exists(os.path.join(BLOCKDIR, name)):
886 return False
887
888 # make sure it is a partition of something else
889 major = os.major(st.st_rdev)
890 minor = os.minor(st.st_rdev)
891 if os.path.exists('/sys/dev/block/%d:%d/partition' % (major, minor)):
892 return True
893
894 raise Error('not a disk or partition', dev)
895
896
897 def is_mounted(dev):
898 """
899 Check if the given device is mounted.
900 """
901 dev = os.path.realpath(dev)
902 with open(PROCDIR + '/mounts', 'rb') as proc_mounts:
903 for line in proc_mounts:
904 fields = line.split()
905 if len(fields) < 3:
906 continue
907 mounts_dev = fields[0]
908 path = fields[1]
909 if os.path.isabs(mounts_dev) and os.path.exists(mounts_dev):
910 mounts_dev = os.path.realpath(mounts_dev)
911 if mounts_dev == dev:
912 return _bytes2str(path)
913 return None
914
915
916 def is_held(dev):
917 """
918 Check if a device is held by another device (e.g., a dm-crypt mapping)
919 """
920 assert os.path.exists(dev)
921 if is_mpath(dev):
922 return []
923
924 dev = os.path.realpath(dev)
925 base = get_dev_name(dev)
926
927 # full disk?
928 directory = '/sys/block/{base}/holders'.format(base=base)
929 if os.path.exists(directory):
930 return os.listdir(directory)
931
932 # partition?
933 part = base
934 while len(base):
935 directory = '/sys/block/{base}/{part}/holders'.format(
936 part=part, base=base)
937 if os.path.exists(directory):
938 return os.listdir(directory)
939 base = base[:-1]
940 return []
941
942
943 def verify_not_in_use(dev, check_partitions=False):
944 """
945 Verify if a given device (path) is in use (e.g. mounted or
946 in use by device-mapper).
947
948 :raises: Error if device is in use.
949 """
950 assert os.path.exists(dev)
951 if is_mounted(dev):
952 raise Error('Device is mounted', dev)
953 holders = is_held(dev)
954 if holders:
955 raise Error('Device %s is in use by a device-mapper '
956 'mapping (dm-crypt?)' % dev, ','.join(holders))
957
958 if check_partitions and not is_partition(dev):
959 for partname in list_partitions(dev):
960 partition = get_dev_path(partname)
961 if is_mounted(partition):
962 raise Error('Device is mounted', partition)
963 holders = is_held(partition)
964 if holders:
965 raise Error('Device %s is in use by a device-mapper '
966 'mapping (dm-crypt?)'
967 % partition, ','.join(holders))
968
969
970 def must_be_one_line(line):
971 """
972 Checks if given line is really one single line.
973
974 :raises: TruncatedLineError or TooManyLinesError
975 :return: Content of the line, or None if line isn't valid.
976 """
977 line = _bytes2str(line)
978
979 if line[-1:] != '\n':
980 raise TruncatedLineError(line)
981 line = line[:-1]
982 if '\n' in line:
983 raise TooManyLinesError(line)
984 return line
985
986
987 def read_one_line(parent, name):
988 """
989 Read a file whose sole contents are a single line.
990
991 Strips the newline.
992
993 :return: Contents of the line, or None if file did not exist.
994 """
995 path = os.path.join(parent, name)
996 try:
997 line = open(path, 'rb').read()
998 except IOError as e:
999 if e.errno == errno.ENOENT:
1000 return None
1001 else:
1002 raise
1003
1004 try:
1005 line = must_be_one_line(line)
1006 except (TruncatedLineError, TooManyLinesError) as e:
1007 raise Error(
1008 'File is corrupt: {path}: {msg}'.format(
1009 path=path,
1010 msg=e,
1011 )
1012 )
1013 return line
1014
1015
1016 def write_one_line(parent, name, text):
1017 """
1018 Write a file whose sole contents are a single line.
1019
1020 Adds a newline.
1021 """
1022 path = os.path.join(parent, name)
1023 tmp = '{path}.{pid}.tmp'.format(path=path, pid=os.getpid())
1024 with open(tmp, 'wb') as tmp_file:
1025 tmp_file.write(text.encode('utf-8') + b'\n')
1026 os.fsync(tmp_file.fileno())
1027 path_set_context(tmp)
1028 os.rename(tmp, path)
1029
1030
1031 def init_get():
1032 """
1033 Get a init system using 'ceph-detect-init'
1034 """
1035 init = _check_output(
1036 args=[
1037 'ceph-detect-init',
1038 '--default', 'sysvinit',
1039 ],
1040 )
1041 init = must_be_one_line(init)
1042 return init
1043
1044
1045 def check_osd_magic(path):
1046 """
1047 Check that this path has the Ceph OSD magic.
1048
1049 :raises: BadMagicError if this does not look like a Ceph OSD data
1050 dir.
1051 """
1052 magic = read_one_line(path, 'magic')
1053 if magic is None:
1054 # probably not mkfs'ed yet
1055 raise BadMagicError(path)
1056 if magic != CEPH_OSD_ONDISK_MAGIC:
1057 raise BadMagicError(path)
1058
1059
1060 def check_osd_id(osd_id):
1061 """
1062 Ensures osd id is numeric.
1063 """
1064 if not re.match(r'^[0-9]+$', osd_id):
1065 raise Error('osd id is not numeric', osd_id)
1066
1067
1068 def allocate_osd_id(
1069 cluster,
1070 fsid,
1071 keyring,
1072 path,
1073 ):
1074 """
1075 Allocates an OSD id on the given cluster.
1076
1077 :raises: Error if the call to allocate the OSD id fails.
1078 :return: The allocated OSD id.
1079 """
1080 lockbox_path = os.path.join(STATEDIR, 'osd-lockbox', fsid)
1081 lockbox_osd_id = read_one_line(lockbox_path, 'whoami')
1082 osd_keyring = os.path.join(path, 'keyring')
1083 if lockbox_osd_id:
1084 LOG.debug('Getting OSD id from Lockbox...')
1085 osd_id = lockbox_osd_id
1086 shutil.move(os.path.join(lockbox_path, 'osd_keyring'),
1087 osd_keyring)
1088 path_set_context(osd_keyring)
1089 os.unlink(os.path.join(lockbox_path, 'whoami'))
1090 return osd_id
1091
1092 LOG.debug('Allocating OSD id...')
1093 secrets = Secrets()
1094 try:
1095 wanttobe = read_one_line(path, 'wanttobe')
1096 if os.path.exists(os.path.join(path, 'wanttobe')):
1097 os.unlink(os.path.join(path, 'wanttobe'))
1098 id_arg = wanttobe and [wanttobe] or []
1099 osd_id = command_with_stdin(
1100 [
1101 'ceph',
1102 '--cluster', cluster,
1103 '--name', 'client.bootstrap-osd',
1104 '--keyring', keyring,
1105 '-i', '-',
1106 'osd', 'new',
1107 fsid,
1108 ] + id_arg,
1109 secrets.get_json()
1110 )
1111 except subprocess.CalledProcessError as e:
1112 raise Error('ceph osd create failed', e, e.output)
1113 osd_id = must_be_one_line(osd_id)
1114 check_osd_id(osd_id)
1115 secrets.write_osd_keyring(osd_keyring, osd_id)
1116 return osd_id
1117
1118
1119 def get_osd_id(path):
1120 """
1121 Gets the OSD id of the OSD at the given path.
1122 """
1123 osd_id = read_one_line(path, 'whoami')
1124 if osd_id is not None:
1125 check_osd_id(osd_id)
1126 return osd_id
1127
1128
1129 def get_ceph_user():
1130 global CEPH_PREF_USER
1131
1132 if CEPH_PREF_USER is not None:
1133 try:
1134 pwd.getpwnam(CEPH_PREF_USER)
1135 return CEPH_PREF_USER
1136 except KeyError:
1137 print("No such user:", CEPH_PREF_USER)
1138 sys.exit(2)
1139 else:
1140 try:
1141 pwd.getpwnam('ceph')
1142 return 'ceph'
1143 except KeyError:
1144 return 'root'
1145
1146
1147 def get_ceph_group():
1148 global CEPH_PREF_GROUP
1149
1150 if CEPH_PREF_GROUP is not None:
1151 try:
1152 grp.getgrnam(CEPH_PREF_GROUP)
1153 return CEPH_PREF_GROUP
1154 except KeyError:
1155 print("No such group:", CEPH_PREF_GROUP)
1156 sys.exit(2)
1157 else:
1158 try:
1159 grp.getgrnam('ceph')
1160 return 'ceph'
1161 except KeyError:
1162 return 'root'
1163
1164
1165 def path_set_context(path):
1166 # restore selinux context to default policy values
1167 if which('restorecon'):
1168 command(['restorecon', '-R', path])
1169
1170 # if ceph user exists, set owner to ceph
1171 if get_ceph_user() == 'ceph':
1172 command(['chown', '-R', 'ceph:ceph', path])
1173
1174
1175 def _check_output(args=None, **kwargs):
1176 out, err, ret = command(args, **kwargs)
1177 if ret:
1178 cmd = args[0]
1179 error = subprocess.CalledProcessError(ret, cmd)
1180 error.output = out + err
1181 raise error
1182 return _bytes2str(out)
1183
1184
1185 def get_conf(cluster, variable):
1186 """
1187 Get the value of the given configuration variable from the
1188 cluster.
1189
1190 :raises: Error if call to ceph-conf fails.
1191 :return: The variable value or None.
1192 """
1193 try:
1194 out, err, ret = command(
1195 [
1196 'ceph-conf',
1197 '--cluster={cluster}'.format(
1198 cluster=cluster,
1199 ),
1200 '--name=osd.',
1201 '--lookup',
1202 variable,
1203 ],
1204 close_fds=True,
1205 )
1206 except OSError as e:
1207 raise Error('error executing ceph-conf', e, err)
1208 if ret == 1:
1209 # config entry not found
1210 return None
1211 elif ret != 0:
1212 raise Error('getting variable from configuration failed')
1213 value = out.split('\n', 1)[0]
1214 # don't differentiate between "var=" and no var set
1215 if not value:
1216 return None
1217 return value
1218
1219
1220 def get_conf_with_default(cluster, variable):
1221 """
1222 Get a config value that is known to the C++ code.
1223
1224 This will fail if called on variables that are not defined in
1225 common config options.
1226 """
1227 try:
1228 out = _check_output(
1229 args=[
1230 'ceph-osd',
1231 '--cluster={cluster}'.format(
1232 cluster=cluster,
1233 ),
1234 '--show-config-value={variable}'.format(
1235 variable=variable,
1236 ),
1237 ],
1238 close_fds=True,
1239 )
1240 except subprocess.CalledProcessError as e:
1241 raise Error(
1242 'getting variable from configuration failed',
1243 e,
1244 )
1245
1246 value = str(out).split('\n', 1)[0]
1247 return value
1248
1249
1250 def get_fsid(cluster):
1251 """
1252 Get the fsid of the cluster.
1253
1254 :return: The fsid or raises Error.
1255 """
1256 fsid = get_conf_with_default(cluster=cluster, variable='fsid')
1257 # uuids from boost always default to 'the empty uuid'
1258 if fsid == '00000000-0000-0000-0000-000000000000':
1259 raise Error('getting cluster uuid from configuration failed')
1260 return fsid.lower()
1261
1262
1263 def get_dmcrypt_key_path(
1264 _uuid,
1265 key_dir,
1266 luks
1267 ):
1268 """
1269 Get path to dmcrypt key file.
1270
1271 :return: Path to the dmcrypt key file, callers should check for existence.
1272 """
1273 if luks:
1274 path = os.path.join(key_dir, _uuid + ".luks.key")
1275 else:
1276 path = os.path.join(key_dir, _uuid)
1277
1278 return path
1279
1280
1281 def get_dmcrypt_key(
1282 _uuid,
1283 key_dir,
1284 luks
1285 ):
1286 legacy_path = get_dmcrypt_key_path(_uuid, key_dir, luks)
1287 if os.path.exists(legacy_path):
1288 return (legacy_path,)
1289 path = os.path.join(STATEDIR, 'osd-lockbox', _uuid)
1290 if os.path.exists(path):
1291 mode = get_oneliner(path, 'key-management-mode')
1292 osd_uuid = get_oneliner(path, 'osd-uuid')
1293 ceph_fsid = read_one_line(path, 'ceph_fsid')
1294 if ceph_fsid is None:
1295 LOG.warning("no `ceph_fsid` found falling back to 'ceph' "
1296 "for cluster name")
1297 cluster = 'ceph'
1298 else:
1299 cluster = find_cluster_by_uuid(ceph_fsid)
1300 if cluster is None:
1301 raise Error('No cluster conf found in ' + SYSCONFDIR +
1302 ' with fsid %s' % ceph_fsid)
1303
1304 if mode == KEY_MANAGEMENT_MODE_V1:
1305 key, stderr, ret = command(
1306 [
1307 'ceph',
1308 '--cluster', cluster,
1309 '--name',
1310 'client.osd-lockbox.' + osd_uuid,
1311 '--keyring',
1312 os.path.join(path, 'keyring'),
1313 'config-key',
1314 'get',
1315 'dm-crypt/osd/' + osd_uuid + '/luks',
1316 ],
1317 )
1318 LOG.debug("stderr " + stderr)
1319 assert ret == 0
1320 return base64.b64decode(key)
1321 else:
1322 raise Error('unknown key-management-mode ' + str(mode))
1323 raise Error('unable to read dm-crypt key', path, legacy_path)
1324
1325
1326 def _dmcrypt_map(
1327 rawdev,
1328 key,
1329 _uuid,
1330 cryptsetup_parameters,
1331 luks,
1332 format_dev=False,
1333 ):
1334 dev = dmcrypt_is_mapped(_uuid)
1335 if dev:
1336 return dev
1337
1338 if isinstance(key, tuple):
1339 # legacy, before lockbox
1340 assert os.path.exists(key[0])
1341 keypath = key[0]
1342 key = None
1343 else:
1344 keypath = '-'
1345 dev = '/dev/mapper/' + _uuid
1346 luksFormat_args = [
1347 'cryptsetup',
1348 '--batch-mode',
1349 '--key-file',
1350 keypath,
1351 'luksFormat',
1352 rawdev,
1353 ] + cryptsetup_parameters
1354
1355 luksOpen_args = [
1356 'cryptsetup',
1357 '--key-file',
1358 keypath,
1359 'luksOpen',
1360 rawdev,
1361 _uuid,
1362 ]
1363
1364 create_args = [
1365 'cryptsetup',
1366 '--key-file',
1367 keypath,
1368 'create',
1369 _uuid,
1370 rawdev,
1371 ] + cryptsetup_parameters
1372
1373 try:
1374 if luks:
1375 if format_dev:
1376 command_with_stdin(luksFormat_args, key)
1377 command_with_stdin(luksOpen_args, key)
1378 else:
1379 # Plain mode has no format function, nor any validation
1380 # that the key is correct.
1381 command_with_stdin(create_args, key)
1382 # set proper ownership of mapped device
1383 command_check_call(['chown', 'ceph:ceph', dev])
1384 return dev
1385
1386 except subprocess.CalledProcessError as e:
1387 raise Error('unable to map device', rawdev, e)
1388
1389
1390 @retry(Error, max_tries=10, wait=0.5, backoff=1.0)
1391 def dmcrypt_unmap(_uuid):
1392 if not os.path.exists('/dev/mapper/' + _uuid):
1393 return
1394 try:
1395 command_check_call(['cryptsetup', 'remove', _uuid])
1396 except subprocess.CalledProcessError as e:
1397 raise Error('unable to unmap device', _uuid, e)
1398
1399
1400 def mount(
1401 dev,
1402 fstype,
1403 options,
1404 ):
1405 """
1406 Mounts a device with given filessystem type and
1407 mount options to a tempfile path under /var/lib/ceph/tmp.
1408 """
1409 # sanity check: none of the arguments are None
1410 if dev is None:
1411 raise ValueError('dev may not be None')
1412 if fstype is None:
1413 raise ValueError('fstype may not be None')
1414
1415 # pick best-of-breed mount options based on fs type
1416 if options is None:
1417 options = MOUNT_OPTIONS.get(fstype, '')
1418
1419 myTemp = STATEDIR + '/tmp'
1420 # mkdtemp expect 'dir' to be existing on the system
1421 # Let's be sure it's always the case
1422 if not os.path.exists(myTemp):
1423 os.makedirs(myTemp)
1424
1425 # mount
1426 path = tempfile.mkdtemp(
1427 prefix='mnt.',
1428 dir=myTemp,
1429 )
1430 try:
1431 LOG.debug('Mounting %s on %s with options %s', dev, path, options)
1432 command_check_call(
1433 [
1434 'mount',
1435 '-t', fstype,
1436 '-o', options,
1437 '--',
1438 dev,
1439 path,
1440 ],
1441 )
1442 if which('restorecon'):
1443 command(
1444 [
1445 'restorecon',
1446 path,
1447 ],
1448 )
1449 except subprocess.CalledProcessError as e:
1450 try:
1451 os.rmdir(path)
1452 except (OSError, IOError):
1453 pass
1454 raise MountError(e)
1455
1456 return path
1457
1458
1459 @retry(UnmountError, max_tries=3, wait=0.5, backoff=1.0)
1460 def unmount(
1461 path,
1462 do_rm=True,
1463 ):
1464 """
1465 Unmount and removes the given mount point.
1466 """
1467 try:
1468 LOG.debug('Unmounting %s', path)
1469 command_check_call(
1470 [
1471 '/bin/umount',
1472 '--',
1473 path,
1474 ],
1475 )
1476 except subprocess.CalledProcessError as e:
1477 raise UnmountError(e)
1478 if not do_rm:
1479 return
1480 os.rmdir(path)
1481
1482
1483 ###########################################
1484
1485 def extract_parted_partition_numbers(partitions):
1486 numbers_as_strings = re.findall('^\d+', partitions, re.MULTILINE)
1487 return map(int, numbers_as_strings)
1488
1489
1490 def get_free_partition_index(dev):
1491 """
1492 Get the next free partition index on a given device.
1493
1494 :return: Index number (> 1 if there is already a partition on the device)
1495 or 1 if there is no partition table.
1496 """
1497 try:
1498 lines = _check_output(
1499 args=[
1500 'parted',
1501 '--machine',
1502 '--',
1503 dev,
1504 'print',
1505 ],
1506 )
1507 except subprocess.CalledProcessError as e:
1508 LOG.info('cannot read partition index; assume it '
1509 'isn\'t present\n (Error: %s)' % e)
1510 return 1
1511
1512 if not lines:
1513 raise Error('parted failed to output anything')
1514 LOG.debug('get_free_partition_index: analyzing ' + lines)
1515 if ('CHS;' not in lines and
1516 'CYL;' not in lines and
1517 'BYT;' not in lines):
1518 raise Error('parted output expected to contain one of ' +
1519 'CHH; CYL; or BYT; : ' + lines)
1520 if os.path.realpath(dev) not in lines:
1521 raise Error('parted output expected to contain ' + dev + ': ' + lines)
1522 _, partitions = lines.split(os.path.realpath(dev))
1523 partition_numbers = extract_parted_partition_numbers(partitions)
1524 if partition_numbers:
1525 return max(partition_numbers) + 1
1526 else:
1527 return 1
1528
1529
1530 def check_journal_reqs(args):
1531 log_file = "/var/log/ceph/$cluster-osd-check.log"
1532 _, _, allows_journal = command([
1533 'ceph-osd', '--check-allows-journal',
1534 '-i', '0',
1535 '--log-file', log_file,
1536 '--cluster', args.cluster,
1537 '--setuser', get_ceph_user(),
1538 '--setgroup', get_ceph_group(),
1539 ])
1540 _, _, wants_journal = command([
1541 'ceph-osd', '--check-wants-journal',
1542 '-i', '0',
1543 '--log-file', log_file,
1544 '--cluster', args.cluster,
1545 '--setuser', get_ceph_user(),
1546 '--setgroup', get_ceph_group(),
1547 ])
1548 _, _, needs_journal = command([
1549 'ceph-osd', '--check-needs-journal',
1550 '-i', '0',
1551 '--log-file', log_file,
1552 '--cluster', args.cluster,
1553 '--setuser', get_ceph_user(),
1554 '--setgroup', get_ceph_group(),
1555 ])
1556 return (not allows_journal, not wants_journal, not needs_journal)
1557
1558
1559 def update_partition(dev, description):
1560 """
1561 Must be called after modifying a partition table so the kernel
1562 know about the change and fire udev events accordingly. A side
1563 effect of partprobe is to remove partitions and add them again.
1564 The first udevadm settle waits for ongoing udev events to
1565 complete, just in case one of them rely on an existing partition
1566 on dev. The second udevadm settle guarantees to the caller that
1567 all udev events related to the partition table change have been
1568 processed, i.e. the 95-ceph-osd.rules actions and mode changes,
1569 group changes etc. are complete.
1570 """
1571 LOG.debug('Calling partprobe on %s device %s', description, dev)
1572 partprobe_ok = False
1573 error = 'unknown error'
1574 partprobe = _get_command_executable(['partprobe'])[0]
1575 for i in range(5):
1576 command_check_call(['udevadm', 'settle', '--timeout=600'])
1577 try:
1578 _check_output(['flock', '-s', dev, partprobe, dev])
1579 partprobe_ok = True
1580 break
1581 except subprocess.CalledProcessError as e:
1582 error = e.output
1583 if ('unable to inform the kernel' not in error and
1584 'Device or resource busy' not in error):
1585 raise
1586 LOG.debug('partprobe %s failed : %s (ignored, waiting 60s)'
1587 % (dev, error))
1588 time.sleep(60)
1589 if not partprobe_ok:
1590 raise Error('partprobe %s failed : %s' % (dev, error))
1591 command_check_call(['udevadm', 'settle', '--timeout=600'])
1592
1593
1594 def zap_linux(dev):
1595 try:
1596 # Thoroughly wipe all partitions of any traces of
1597 # Filesystems or OSD Journals
1598 #
1599 # In addition we need to write 110M (read following comment for more
1600 # details on the context of this magic number) of data to each
1601 # partition to make sure that after re-creating the same partition
1602 # there is no trace left of any previous Filesystem or OSD
1603 # Journal
1604
1605 LOG.debug('Writing zeros to existing partitions on %s', dev)
1606
1607 for partname in list_partitions(dev):
1608 partition = get_dev_path(partname)
1609 command_check_call(
1610 [
1611 'wipefs',
1612 '--all',
1613 partition,
1614 ],
1615 )
1616
1617 # for an typical bluestore device, it has
1618 # 1. a 100M xfs data partition
1619 # 2. a bluestore_block_size block partition
1620 # 3. a bluestore_block_db_size block.db partition
1621 # 4. a bluestore_block_wal_size block.wal partition
1622 # so we need to wipe out the bits storing the bits storing
1623 # bluestore's collections' meta information in that case to
1624 # prevent OSD from comparing the meta data, like OSD id and fsid,
1625 # stored on the device to be zapped with the oness passed in. here,
1626 # we assume that the allocator of bluestore puts these meta data
1627 # at the beginning of the block partition. without knowning the
1628 # actual layout of the bluefs, we add extra 10M to be on the safe
1629 # side. if this partition was formatted for a filesystem, 10MB
1630 # would be more than enough to nuke its superblock.
1631 count = min(PrepareBluestoreData.SPACE_SIZE + 10,
1632 get_dev_size(partition))
1633 command_check_call(
1634 [
1635 'dd',
1636 'if=/dev/zero',
1637 'of={path}'.format(path=partition),
1638 'bs=1M',
1639 'count={count}'.format(count=count),
1640 ],
1641 )
1642
1643 LOG.debug('Zapping partition table on %s', dev)
1644
1645 # try to wipe out any GPT partition table backups. sgdisk
1646 # isn't too thorough.
1647 lba_size = 4096
1648 size = 33 * lba_size
1649 with open(dev, 'wb') as dev_file:
1650 dev_file.seek(-size, os.SEEK_END)
1651 dev_file.write(size * b'\0')
1652
1653 command_check_call(
1654 [
1655 'sgdisk',
1656 '--zap-all',
1657 '--',
1658 dev,
1659 ],
1660 )
1661 command_check_call(
1662 [
1663 'sgdisk',
1664 '--clear',
1665 '--mbrtogpt',
1666 '--',
1667 dev,
1668 ],
1669 )
1670 update_partition(dev, 'zapped')
1671
1672 except subprocess.CalledProcessError as e:
1673 raise Error(e)
1674
1675
1676 def zap_freebsd(dev):
1677 try:
1678 # For FreeBSD we just need to zap the partition.
1679 command_check_call(
1680 [
1681 'gpart',
1682 'destroy',
1683 '-F',
1684 dev,
1685 ],
1686 )
1687
1688 except subprocess.CalledProcessError as e:
1689 raise Error(e)
1690
1691
1692 def zap(dev):
1693 """
1694 Destroy the partition table and content of a given disk.
1695 """
1696 dev = os.path.realpath(dev)
1697 dmode = os.stat(dev).st_mode
1698 if not stat.S_ISBLK(dmode) or is_partition(dev):
1699 raise Error('not full block device; cannot zap', dev)
1700 if FREEBSD:
1701 zap_freebsd(dev)
1702 else:
1703 zap_linux(dev)
1704
1705
1706 def adjust_symlink(target, path):
1707 create = True
1708 if os.path.lexists(path):
1709 try:
1710 mode = os.lstat(path).st_mode
1711 if stat.S_ISREG(mode):
1712 LOG.debug('Removing old file %s', path)
1713 os.unlink(path)
1714 elif stat.S_ISLNK(mode):
1715 old = os.readlink(path)
1716 if old != target:
1717 LOG.debug('Removing old symlink %s -> %s', path, old)
1718 os.unlink(path)
1719 else:
1720 create = False
1721 except:
1722 raise Error('unable to remove (or adjust) old file (symlink)',
1723 path)
1724 if create:
1725 LOG.debug('Creating symlink %s -> %s', path, target)
1726 try:
1727 os.symlink(target, path)
1728 except:
1729 raise Error('unable to create symlink %s -> %s' % (path, target))
1730
1731
1732 def get_mount_options(cluster, fs_type):
1733 mount_options = get_conf(
1734 cluster,
1735 variable='osd_mount_options_{fstype}'.format(
1736 fstype=fs_type,
1737 ),
1738 )
1739 if mount_options is None:
1740 mount_options = get_conf(
1741 cluster,
1742 variable='osd_fs_mount_options_{fstype}'.format(
1743 fstype=fs_type,
1744 ),
1745 )
1746 else:
1747 # remove whitespaces
1748 mount_options = "".join(mount_options.split())
1749 return mount_options
1750
1751
1752 class Device(object):
1753
1754 def __init__(self, path, args):
1755 self.args = args
1756 self.path = path
1757 self.dev_size = None
1758 self.partitions = {}
1759 self.ptype_map = None
1760 assert not is_partition(self.path)
1761
1762 def create_partition(self, uuid, name, size=0, num=0):
1763 ptype = self.ptype_tobe_for_name(name)
1764 if num == 0:
1765 num = get_free_partition_index(dev=self.path)
1766 if size > 0:
1767 new = '--new={num}:0:+{size}M'.format(num=num, size=size)
1768 if size > self.get_dev_size():
1769 LOG.error('refusing to create %s on %s' % (name, self.path))
1770 LOG.error('%s size (%sM) is bigger than device (%sM)'
1771 % (name, size, self.get_dev_size()))
1772 raise Error('%s device size (%sM) is not big enough for %s'
1773 % (self.path, self.get_dev_size(), name))
1774 else:
1775 new = '--largest-new={num}'.format(num=num)
1776
1777 LOG.debug('Creating %s partition num %d size %d on %s',
1778 name, num, size, self.path)
1779 command_check_call(
1780 [
1781 'sgdisk',
1782 new,
1783 '--change-name={num}:ceph {name}'.format(num=num, name=name),
1784 '--partition-guid={num}:{uuid}'.format(num=num, uuid=uuid),
1785 '--typecode={num}:{uuid}'.format(num=num, uuid=ptype),
1786 '--mbrtogpt',
1787 '--',
1788 self.path,
1789 ],
1790 exit=True
1791 )
1792 update_partition(self.path, 'created')
1793 return num
1794
1795 def ptype_tobe_for_name(self, name):
1796 LOG.debug("name = " + name)
1797 if name == 'data':
1798 name = 'osd'
1799 if name == 'lockbox':
1800 if is_mpath(self.path):
1801 return PTYPE['mpath']['lockbox']['tobe']
1802 else:
1803 return PTYPE['regular']['lockbox']['tobe']
1804 if self.ptype_map is None:
1805 partition = DevicePartition.factory(
1806 path=self.path, dev=None, args=self.args)
1807 self.ptype_map = partition.ptype_map
1808 return self.ptype_map[name]['tobe']
1809
1810 def get_partition(self, num):
1811 if num not in self.partitions:
1812 dev = get_partition_dev(self.path, num)
1813 partition = DevicePartition.factory(
1814 path=self.path, dev=dev, args=self.args)
1815 partition.set_partition_number(num)
1816 self.partitions[num] = partition
1817 return self.partitions[num]
1818
1819 def get_dev_size(self):
1820 if self.dev_size is None:
1821 self.dev_size = get_dev_size(self.path)
1822 return self.dev_size
1823
1824 @staticmethod
1825 def factory(path, args):
1826 return Device(path, args)
1827
1828
1829 class DevicePartition(object):
1830
1831 def __init__(self, args):
1832 self.args = args
1833 self.num = None
1834 self.rawdev = None
1835 self.dev = None
1836 self.uuid = None
1837 self.ptype_map = None
1838 self.ptype = None
1839 self.set_variables_ptype()
1840
1841 def get_uuid(self):
1842 if self.uuid is None:
1843 self.uuid = get_partition_uuid(self.rawdev)
1844 return self.uuid
1845
1846 def get_ptype(self):
1847 if self.ptype is None:
1848 self.ptype = get_partition_type(self.rawdev)
1849 return self.ptype
1850
1851 def set_partition_number(self, num):
1852 self.num = num
1853
1854 def get_partition_number(self):
1855 return self.num
1856
1857 def set_dev(self, dev):
1858 self.dev = dev
1859 self.rawdev = dev
1860
1861 def get_dev(self):
1862 return self.dev
1863
1864 def get_rawdev(self):
1865 return self.rawdev
1866
1867 def set_variables_ptype(self):
1868 self.ptype_map = PTYPE['regular']
1869
1870 def ptype_for_name(self, name):
1871 return self.ptype_map[name]['ready']
1872
1873 @staticmethod
1874 @retry(OSError)
1875 def factory(path, dev, args):
1876 dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
1877 if ((path is not None and is_mpath(path)) or
1878 (dev is not None and is_mpath(dev))):
1879 partition = DevicePartitionMultipath(args)
1880 elif dmcrypt_type == 'luks':
1881 partition = DevicePartitionCryptLuks(args)
1882 elif dmcrypt_type == 'plain':
1883 partition = DevicePartitionCryptPlain(args)
1884 else:
1885 partition = DevicePartition(args)
1886 partition.set_dev(dev)
1887 return partition
1888
1889
1890 class DevicePartitionMultipath(DevicePartition):
1891
1892 def set_variables_ptype(self):
1893 self.ptype_map = PTYPE['mpath']
1894
1895
1896 class DevicePartitionCrypt(DevicePartition):
1897
1898 def __init__(self, args):
1899 super(DevicePartitionCrypt, self).__init__(args)
1900 self.osd_dm_key = None
1901 self.cryptsetup_parameters = CryptHelpers.get_cryptsetup_parameters(
1902 self.args)
1903 self.dmcrypt_type = CryptHelpers.get_dmcrypt_type(self.args)
1904 self.dmcrypt_keysize = CryptHelpers.get_dmcrypt_keysize(self.args)
1905
1906 def setup_crypt(self):
1907 pass
1908
1909 def map(self):
1910 self.setup_crypt()
1911 self.dev = _dmcrypt_map(
1912 rawdev=self.rawdev,
1913 key=self.osd_dm_key,
1914 _uuid=self.get_uuid(),
1915 cryptsetup_parameters=self.cryptsetup_parameters,
1916 luks=self.luks(),
1917 format_dev=True,
1918 )
1919
1920 def unmap(self):
1921 self.setup_crypt()
1922 dmcrypt_unmap(self.get_uuid())
1923 self.dev = self.rawdev
1924
1925 def format(self):
1926 self.setup_crypt()
1927 self.map()
1928
1929
1930 class DevicePartitionCryptPlain(DevicePartitionCrypt):
1931
1932 def luks(self):
1933 return False
1934
1935 def setup_crypt(self):
1936 if self.osd_dm_key is not None:
1937 return
1938
1939 self.cryptsetup_parameters += ['--key-size', str(self.dmcrypt_keysize)]
1940
1941 self.osd_dm_key = get_dmcrypt_key(
1942 self.get_uuid(), self.args.dmcrypt_key_dir,
1943 False)
1944
1945 def set_variables_ptype(self):
1946 self.ptype_map = PTYPE['plain']
1947
1948
1949 class DevicePartitionCryptLuks(DevicePartitionCrypt):
1950
1951 def luks(self):
1952 return True
1953
1954 def setup_crypt(self):
1955 if self.osd_dm_key is not None:
1956 return
1957
1958 if self.dmcrypt_keysize == 1024:
1959 # We don't force this into the cryptsetup_parameters,
1960 # as we want the cryptsetup defaults
1961 # to prevail for the actual LUKS key lengths.
1962 pass
1963 else:
1964 self.cryptsetup_parameters += ['--key-size',
1965 str(self.dmcrypt_keysize)]
1966
1967 self.osd_dm_key = get_dmcrypt_key(
1968 self.get_uuid(), self.args.dmcrypt_key_dir,
1969 True)
1970
1971 def set_variables_ptype(self):
1972 self.ptype_map = PTYPE['luks']
1973
1974
1975 class Prepare(object):
1976
1977 def __init__(self, args):
1978 self.args = args
1979
1980 @staticmethod
1981 def parser():
1982 parser = argparse.ArgumentParser(add_help=False)
1983 parser.add_argument(
1984 '--cluster',
1985 metavar='NAME',
1986 default='ceph',
1987 help='cluster name to assign this disk to',
1988 )
1989 parser.add_argument(
1990 '--cluster-uuid',
1991 metavar='UUID',
1992 help='cluster uuid to assign this disk to',
1993 )
1994 parser.add_argument(
1995 '--osd-uuid',
1996 metavar='UUID',
1997 help='unique OSD uuid to assign this disk to',
1998 )
1999 parser.add_argument(
2000 '--osd-id',
2001 metavar='ID',
2002 help='unique OSD id to assign this disk to',
2003 )
2004 parser.add_argument(
2005 '--crush-device-class',
2006 help='crush device class to assign this disk to',
2007 )
2008 parser.add_argument(
2009 '--dmcrypt',
2010 action='store_true', default=None,
2011 help='encrypt DATA and/or JOURNAL devices with dm-crypt',
2012 )
2013 parser.add_argument(
2014 '--dmcrypt-key-dir',
2015 metavar='KEYDIR',
2016 default='/etc/ceph/dmcrypt-keys',
2017 help='directory where dm-crypt keys are stored',
2018 )
2019 parser.add_argument(
2020 '--prepare-key',
2021 metavar='PATH',
2022 help='bootstrap-osd keyring path template (%(default)s)',
2023 default='{statedir}/bootstrap-osd/{cluster}.keyring',
2024 dest='prepare_key_template',
2025 )
2026 parser.add_argument(
2027 '--no-locking',
2028 action='store_true', default=None,
2029 help='let many prepare\'s run in parallel',
2030 )
2031 return parser
2032
2033 @staticmethod
2034 def set_subparser(subparsers):
2035 parents = [
2036 Prepare.parser(),
2037 PrepareData.parser(),
2038 Lockbox.parser(),
2039 ]
2040 parents.extend(PrepareFilestore.parent_parsers())
2041 parents.extend(PrepareBluestore.parent_parsers())
2042 parser = subparsers.add_parser(
2043 'prepare',
2044 parents=parents,
2045 formatter_class=argparse.RawDescriptionHelpFormatter,
2046 description=textwrap.fill(textwrap.dedent("""\
2047 If the --bluestore argument is given, a bluestore objectstore
2048 will be created. If --filestore is provided, a legacy FileStore
2049 objectstore will be created. If neither is specified, we default
2050 to BlueStore.
2051
2052 When an entire device is prepared for bluestore, two
2053 partitions are created. The first partition is for metadata,
2054 the second partition is for blocks that contain data.
2055
2056 Unless explicitly specified with --block.db or
2057 --block.wal, the bluestore DB and WAL data is stored on
2058 the main block device. For instance:
2059
2060 ceph-disk prepare --bluestore /dev/sdc
2061
2062 Will create
2063
2064 /dev/sdc1 for osd metadata
2065 /dev/sdc2 for block, db, and wal data (the rest of the disk)
2066
2067
2068 If either --block.db or --block.wal are specified to be
2069 the same whole device, they will be created as partition
2070 three and four respectively. For instance:
2071
2072 ceph-disk prepare --bluestore \\
2073 --block.db /dev/sdc \\
2074 --block.wal /dev/sdc \\
2075 /dev/sdc
2076
2077 Will create
2078
2079 /dev/sdc1 for osd metadata
2080 /dev/sdc2 for block (the rest of the disk)
2081 /dev/sdc3 for db
2082 /dev/sdc4 for wal
2083
2084 """)),
2085 help='Prepare a directory or disk for a Ceph OSD',
2086 )
2087 parser.set_defaults(
2088 func=Prepare.main,
2089 )
2090 return parser
2091
2092 def prepare(self):
2093 if self.args.no_locking:
2094 self._prepare()
2095 else:
2096 with prepare_lock:
2097 self._prepare()
2098
2099 @staticmethod
2100 def factory(args):
2101 if args.bluestore:
2102 return PrepareBluestore(args)
2103 else:
2104 return PrepareFilestore(args)
2105
2106 @staticmethod
2107 def main(args):
2108 Prepare.factory(args).prepare()
2109
2110
2111 class PrepareFilestore(Prepare):
2112
2113 def __init__(self, args):
2114 super(PrepareFilestore, self).__init__(args)
2115 if args.dmcrypt:
2116 self.lockbox = Lockbox(args)
2117 self.data = PrepareFilestoreData(args)
2118 self.journal = PrepareJournal(args)
2119
2120 @staticmethod
2121 def parent_parsers():
2122 return [
2123 PrepareJournal.parser(),
2124 ]
2125
2126 def _prepare(self):
2127 if self.data.args.dmcrypt:
2128 self.lockbox.prepare()
2129 self.data.prepare(self.journal)
2130
2131
2132 class PrepareBluestore(Prepare):
2133
2134 def __init__(self, args):
2135 super(PrepareBluestore, self).__init__(args)
2136 if args.dmcrypt:
2137 self.lockbox = Lockbox(args)
2138 self.data = PrepareBluestoreData(args)
2139 self.block = PrepareBluestoreBlock(args)
2140 self.blockdb = PrepareBluestoreBlockDB(args)
2141 self.blockwal = PrepareBluestoreBlockWAL(args)
2142
2143 @staticmethod
2144 def parser():
2145 parser = argparse.ArgumentParser(add_help=False)
2146 parser.add_argument(
2147 '--bluestore',
2148 dest='bluestore',
2149 action='store_true', default=True,
2150 help='bluestore objectstore',
2151 )
2152 parser.add_argument(
2153 '--filestore',
2154 dest='bluestore',
2155 action='store_false',
2156 help='filestore objectstore',
2157 )
2158 return parser
2159
2160 @staticmethod
2161 def parent_parsers():
2162 return [
2163 PrepareBluestore.parser(),
2164 PrepareBluestoreBlock.parser(),
2165 PrepareBluestoreBlockDB.parser(),
2166 PrepareBluestoreBlockWAL.parser(),
2167 ]
2168
2169 def _prepare(self):
2170 if self.data.args.dmcrypt:
2171 self.lockbox.prepare()
2172 to_prepare_list = []
2173 if getattr(self.data.args, 'block.db'):
2174 to_prepare_list.append(self.blockdb)
2175 if getattr(self.data.args, 'block.wal'):
2176 to_prepare_list.append(self.blockwal)
2177 to_prepare_list.append(self.block)
2178 self.data.prepare(*to_prepare_list)
2179
2180
2181 class Space(object):
2182
2183 NAMES = ('block', 'journal', 'block.db', 'block.wal')
2184
2185
2186 class PrepareSpace(object):
2187
2188 NONE = 0
2189 FILE = 1
2190 DEVICE = 2
2191
2192 def __init__(self, args):
2193 self.args = args
2194 self.set_type()
2195 self.space_size = self.get_space_size()
2196 if getattr(self.args, self.name + '_uuid') is None:
2197 setattr(self.args, self.name + '_uuid', str(uuid.uuid4()))
2198 self.space_symlink = None
2199 self.space_dmcrypt = None
2200
2201 def set_type(self):
2202 name = self.name
2203 args = self.args
2204 if (self.wants_space() and
2205 dev_is_diskdevice(args.data) and
2206 not is_partition(args.data) and
2207 getattr(args, name) is None and
2208 getattr(args, name + '_file') is None):
2209 LOG.info('Will colocate %s with data on %s',
2210 name, args.data)
2211 setattr(args, name, args.data)
2212
2213 if getattr(args, name) is None:
2214 if getattr(args, name + '_dev'):
2215 raise Error('%s is unspecified; not a block device' %
2216 name.capitalize(), getattr(args, name))
2217 self.type = self.NONE
2218 return
2219
2220 if not os.path.exists(getattr(args, name)):
2221 if getattr(args, name + '_dev'):
2222 raise Error('%s does not exist; not a block device' %
2223 name.capitalize(), getattr(args, name))
2224 self.type = self.FILE
2225 return
2226
2227 mode = os.stat(getattr(args, name)).st_mode
2228 if stmode_is_diskdevice(mode):
2229 if getattr(args, name + '_file'):
2230 raise Error('%s is not a regular file' % name.capitalize,
2231 getattr(args, name))
2232 self.type = self.DEVICE
2233 return
2234
2235 if stat.S_ISREG(mode):
2236 if getattr(args, name + '_dev'):
2237 raise Error('%s is not a block device' % name.capitalize,
2238 getattr(args, name))
2239 self.type = self.FILE
2240 return
2241
2242 raise Error('%s %s is neither a block device nor regular file' %
2243 (name.capitalize, getattr(args, name)))
2244
2245 def is_none(self):
2246 return self.type == self.NONE
2247
2248 def is_file(self):
2249 return self.type == self.FILE
2250
2251 def is_device(self):
2252 return self.type == self.DEVICE
2253
2254 @staticmethod
2255 def parser(name, positional=True):
2256 parser = argparse.ArgumentParser(add_help=False)
2257 parser.add_argument(
2258 '--%s-uuid' % name,
2259 metavar='UUID',
2260 help='unique uuid to assign to the %s' % name,
2261 )
2262 parser.add_argument(
2263 '--%s-file' % name,
2264 action='store_true', default=None,
2265 help='verify that %s is a file' % name.upper(),
2266 )
2267 parser.add_argument(
2268 '--%s-dev' % name,
2269 action='store_true', default=None,
2270 help='verify that %s is a block device' % name.upper(),
2271 )
2272
2273 if positional:
2274 parser.add_argument(
2275 name,
2276 metavar=name.upper(),
2277 nargs='?',
2278 help=('path to OSD %s disk block device;' % name +
2279 ' leave out to store %s in file' % name),
2280 )
2281 return parser
2282
2283 def wants_space(self):
2284 return True
2285
2286 def populate_data_path(self, path):
2287 if self.type == self.DEVICE:
2288 self.populate_data_path_device(path)
2289 elif self.type == self.FILE:
2290 self.populate_data_path_file(path)
2291 elif self.type == self.NONE:
2292 pass
2293 else:
2294 raise Error('unexpected type ', self.type)
2295
2296 def populate_data_path_file(self, path):
2297 space_uuid = self.name + '_uuid'
2298 if getattr(self.args, space_uuid) is not None:
2299 write_one_line(path, space_uuid,
2300 getattr(self.args, space_uuid))
2301 if self.space_symlink is not None:
2302 adjust_symlink(self.space_symlink,
2303 os.path.join(path, self.name))
2304
2305 def populate_data_path_device(self, path):
2306 self.populate_data_path_file(path)
2307
2308 if self.space_dmcrypt is not None:
2309 adjust_symlink(self.space_dmcrypt,
2310 os.path.join(path, self.name + '_dmcrypt'))
2311 else:
2312 try:
2313 os.unlink(os.path.join(path, self.name + '_dmcrypt'))
2314 except OSError:
2315 pass
2316
2317 def prepare(self):
2318 if self.type == self.DEVICE:
2319 self.prepare_device()
2320 elif self.type == self.FILE:
2321 self.prepare_file()
2322 elif self.type == self.NONE:
2323 pass
2324 else:
2325 raise Error('unexpected type ', self.type)
2326
2327 def prepare_file(self):
2328 space_filename = getattr(self.args, self.name)
2329 if not os.path.exists(space_filename):
2330 LOG.debug('Creating %s file %s with size 0'
2331 ' (ceph-osd will resize and allocate)',
2332 self.name,
2333 space_filename)
2334 space_file = open(space_filename, 'wb')
2335 space_file.close()
2336 path_set_context(space_filename)
2337
2338 LOG.debug('%s is file %s',
2339 self.name.capitalize(),
2340 space_filename)
2341 LOG.warning('OSD will not be hot-swappable if %s is '
2342 'not the same device as the osd data' %
2343 self.name)
2344 self.space_symlink = space_filename
2345
2346 def prepare_device(self):
2347 reusing_partition = False
2348
2349 if is_partition(getattr(self.args, self.name)):
2350 LOG.debug('%s %s is a partition',
2351 self.name.capitalize(), getattr(self.args, self.name))
2352 partition = DevicePartition.factory(
2353 path=None, dev=getattr(self.args, self.name), args=self.args)
2354 if isinstance(partition, DevicePartitionCrypt):
2355 raise Error(getattr(self.args, self.name) +
2356 ' partition already exists'
2357 ' and --dmcrypt specified')
2358 LOG.warning('OSD will not be hot-swappable' +
2359 ' if ' + self.name + ' is not' +
2360 ' the same device as the osd data')
2361 if partition.get_ptype() == partition.ptype_for_name(self.name):
2362 LOG.debug('%s %s was previously prepared with '
2363 'ceph-disk. Reusing it.',
2364 self.name.capitalize(),
2365 getattr(self.args, self.name))
2366 reusing_partition = True
2367 # Read and reuse the partition uuid from this journal's
2368 # previous life. We reuse the uuid instead of changing it
2369 # because udev does not reliably notice changes to an
2370 # existing partition's GUID. See
2371 # http://tracker.ceph.com/issues/10146
2372 setattr(self.args, self.name + '_uuid', partition.get_uuid())
2373 LOG.debug('Reusing %s with uuid %s',
2374 self.name,
2375 getattr(self.args, self.name + '_uuid'))
2376 else:
2377 LOG.warning('%s %s was not prepared with '
2378 'ceph-disk. Symlinking directly.',
2379 self.name.capitalize(),
2380 getattr(self.args, self.name))
2381 self.space_symlink = getattr(self.args, self.name)
2382 return
2383
2384 self.space_symlink = '/dev/disk/by-partuuid/{uuid}'.format(
2385 uuid=getattr(self.args, self.name + '_uuid'))
2386
2387 if self.args.dmcrypt:
2388 self.space_dmcrypt = self.space_symlink
2389 self.space_symlink = '/dev/mapper/{uuid}'.format(
2390 uuid=getattr(self.args, self.name + '_uuid'))
2391
2392 if reusing_partition:
2393 # confirm that the space_symlink exists. It should since
2394 # this was an active space
2395 # in the past. Continuing otherwise would be futile.
2396 assert os.path.exists(self.space_symlink)
2397 return
2398
2399 num = self.desired_partition_number()
2400
2401 if num == 0:
2402 LOG.warning('OSD will not be hot-swappable if %s '
2403 'is not the same device as the osd data',
2404 self.name)
2405
2406 device = Device.factory(getattr(self.args, self.name), self.args)
2407 num = device.create_partition(
2408 uuid=getattr(self.args, self.name + '_uuid'),
2409 name=self.name,
2410 size=self.space_size,
2411 num=num)
2412
2413 partition = device.get_partition(num)
2414
2415 LOG.debug('%s is GPT partition %s',
2416 self.name.capitalize(),
2417 self.space_symlink)
2418
2419 if isinstance(partition, DevicePartitionCrypt):
2420 partition.format()
2421 partition.map()
2422
2423 command_check_call(
2424 [
2425 'sgdisk',
2426 '--typecode={num}:{uuid}'.format(
2427 num=num,
2428 uuid=partition.ptype_for_name(self.name),
2429 ),
2430 '--',
2431 getattr(self.args, self.name),
2432 ],
2433 )
2434 update_partition(getattr(self.args, self.name), 'prepared')
2435
2436 LOG.debug('%s is GPT partition %s',
2437 self.name.capitalize(),
2438 self.space_symlink)
2439
2440
2441 class PrepareJournal(PrepareSpace):
2442
2443 def __init__(self, args):
2444 self.name = 'journal'
2445 (self.allows_journal,
2446 self.wants_journal,
2447 self.needs_journal) = check_journal_reqs(args)
2448
2449 if args.journal and not self.allows_journal:
2450 raise Error('journal specified but not allowed by osd backend')
2451
2452 super(PrepareJournal, self).__init__(args)
2453
2454 def wants_space(self):
2455 return self.wants_journal
2456
2457 def get_space_size(self):
2458 return int(get_conf_with_default(
2459 cluster=self.args.cluster,
2460 variable='osd_journal_size',
2461 ))
2462
2463 def desired_partition_number(self):
2464 if self.args.journal == self.args.data:
2465 # we're sharing the disk between osd data and journal;
2466 # make journal be partition number 2
2467 num = 2
2468 else:
2469 num = 0
2470 return num
2471
2472 @staticmethod
2473 def parser():
2474 return PrepareSpace.parser('journal')
2475
2476
2477 class PrepareBluestoreBlock(PrepareSpace):
2478
2479 def __init__(self, args):
2480 self.name = 'block'
2481 super(PrepareBluestoreBlock, self).__init__(args)
2482
2483 def get_space_size(self):
2484 block_size = get_conf(
2485 cluster=self.args.cluster,
2486 variable='bluestore_block_size',
2487 )
2488
2489 if block_size is None:
2490 return 0 # get as much space as possible
2491 else:
2492 return int(block_size) / 1048576 # MB
2493
2494 def desired_partition_number(self):
2495 if self.args.block == self.args.data:
2496 num = 2
2497 else:
2498 num = 0
2499 return num
2500
2501 @staticmethod
2502 def parser():
2503 return PrepareSpace.parser('block')
2504
2505
2506 class PrepareBluestoreBlockDB(PrepareSpace):
2507
2508 def __init__(self, args):
2509 self.name = 'block.db'
2510 super(PrepareBluestoreBlockDB, self).__init__(args)
2511
2512 def get_space_size(self):
2513 block_db_size = get_conf(
2514 cluster=self.args.cluster,
2515 variable='bluestore_block_db_size',
2516 )
2517
2518 if block_db_size is None or int(block_db_size) == 0:
2519 block_size = get_conf(
2520 cluster=self.args.cluster,
2521 variable='bluestore_block_size',
2522 )
2523 if block_size is None:
2524 return 1024 # MB
2525 size = int(block_size) / 100 / 1048576
2526 return max(size, 1024) # MB
2527 else:
2528 return int(block_db_size) / 1048576 # MB
2529
2530 def desired_partition_number(self):
2531 if getattr(self.args, 'block.db') == self.args.data:
2532 num = 3
2533 else:
2534 num = 0
2535 return num
2536
2537 def wants_space(self):
2538 return False
2539
2540 @staticmethod
2541 def parser():
2542 parser = PrepareSpace.parser('block.db', positional=False)
2543 parser.add_argument(
2544 '--block.db',
2545 metavar='BLOCKDB',
2546 help='path to the device or file for bluestore block.db',
2547 )
2548 return parser
2549
2550
2551 class PrepareBluestoreBlockWAL(PrepareSpace):
2552
2553 def __init__(self, args):
2554 self.name = 'block.wal'
2555 super(PrepareBluestoreBlockWAL, self).__init__(args)
2556
2557 def get_space_size(self):
2558 block_size = get_conf(
2559 cluster=self.args.cluster,
2560 variable='bluestore_block_wal_size',
2561 )
2562
2563 if block_size is None:
2564 return 576 # MB, default value
2565 else:
2566 return int(block_size) / 1048576 # MB
2567
2568 def desired_partition_number(self):
2569 if getattr(self.args, 'block.wal') == self.args.data:
2570 num = 4
2571 else:
2572 num = 0
2573 return num
2574
2575 def wants_space(self):
2576 return False
2577
2578 @staticmethod
2579 def parser():
2580 parser = PrepareSpace.parser('block.wal', positional=False)
2581 parser.add_argument(
2582 '--block.wal',
2583 metavar='BLOCKWAL',
2584 help='path to the device or file for bluestore block.wal',
2585 )
2586 return parser
2587
2588
2589 class CryptHelpers(object):
2590
2591 @staticmethod
2592 def get_cryptsetup_parameters(args):
2593 cryptsetup_parameters_str = get_conf(
2594 cluster=args.cluster,
2595 variable='osd_cryptsetup_parameters',
2596 )
2597 if cryptsetup_parameters_str is None:
2598 return []
2599 else:
2600 return shlex.split(cryptsetup_parameters_str)
2601
2602 @staticmethod
2603 def get_dmcrypt_keysize(args):
2604 dmcrypt_keysize_str = get_conf(
2605 cluster=args.cluster,
2606 variable='osd_dmcrypt_key_size',
2607 )
2608 dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
2609 if dmcrypt_type == 'luks':
2610 if dmcrypt_keysize_str is None:
2611 # As LUKS will hash the 'passphrase' in .luks.key
2612 # into a key, set a large default
2613 # so if not updated for some time, it is still a
2614 # reasonable value.
2615 #
2616 return 1024
2617 else:
2618 return int(dmcrypt_keysize_str)
2619 elif dmcrypt_type == 'plain':
2620 if dmcrypt_keysize_str is None:
2621 # This value is hard-coded in the udev script
2622 return 256
2623 else:
2624 LOG.warning('ensure the 95-ceph-osd.rules file has '
2625 'been copied to /etc/udev/rules.d '
2626 'and modified to call cryptsetup '
2627 'with --key-size=%s' % dmcrypt_keysize_str)
2628 return int(dmcrypt_keysize_str)
2629 else:
2630 return 0
2631
2632 @staticmethod
2633 def get_dmcrypt_type(args):
2634 if hasattr(args, 'dmcrypt') and args.dmcrypt:
2635 dmcrypt_type = get_conf(
2636 cluster=args.cluster,
2637 variable='osd_dmcrypt_type',
2638 )
2639
2640 if dmcrypt_type is None or dmcrypt_type == 'luks':
2641 return 'luks'
2642 elif dmcrypt_type == 'plain':
2643 return 'plain'
2644 else:
2645 raise Error('invalid osd_dmcrypt_type parameter '
2646 '(must be luks or plain): ', dmcrypt_type)
2647 else:
2648 return None
2649
2650
2651 class Secrets(object):
2652
2653 def __init__(self):
2654 secret, stderr, ret = command(['ceph-authtool', '--gen-print-key'])
2655 LOG.debug("stderr " + stderr)
2656 assert ret == 0
2657 self.keys = {
2658 'cephx_secret': secret.strip(),
2659 }
2660
2661 def write_osd_keyring(self, keyring, osd_id):
2662 command_check_call(
2663 [
2664 'ceph-authtool', keyring,
2665 '--create-keyring',
2666 '--name', 'osd.' + str(osd_id),
2667 '--add-key', self.keys['cephx_secret'],
2668 ])
2669 path_set_context(keyring)
2670
2671 def get_json(self):
2672 return bytearray(json.dumps(self.keys), 'ascii')
2673
2674
2675 class LockboxSecrets(Secrets):
2676
2677 def __init__(self, args):
2678 super(LockboxSecrets, self).__init__()
2679
2680 key_size = CryptHelpers.get_dmcrypt_keysize(args)
2681 key = open('/dev/urandom', 'rb').read(key_size / 8)
2682 base64_key = base64.b64encode(key).decode('ascii')
2683
2684 secret, stderr, ret = command(['ceph-authtool', '--gen-print-key'])
2685 LOG.debug("stderr " + stderr)
2686 assert ret == 0
2687
2688 self.keys.update({
2689 'dmcrypt_key': base64_key,
2690 'cephx_lockbox_secret': secret.strip(),
2691 })
2692
2693 def write_lockbox_keyring(self, path, osd_uuid):
2694 keyring = os.path.join(path, 'keyring')
2695 command_check_call(
2696 [
2697 'ceph-authtool', keyring,
2698 '--create-keyring',
2699 '--name', 'client.osd-lockbox.' + osd_uuid,
2700 '--add-key', self.keys['cephx_lockbox_secret'],
2701 ])
2702 path_set_context(keyring)
2703
2704
2705 class Lockbox(object):
2706
2707 def __init__(self, args):
2708 self.args = args
2709 self.partition = None
2710 self.device = None
2711
2712 if hasattr(self.args, 'lockbox') and self.args.lockbox is None:
2713 self.args.lockbox = self.args.data
2714
2715 def set_partition(self, partition):
2716 self.partition = partition
2717
2718 @staticmethod
2719 def parser():
2720 parser = argparse.ArgumentParser(add_help=False)
2721 parser.add_argument(
2722 '--lockbox',
2723 help='path to the device to store the lockbox',
2724 )
2725 parser.add_argument(
2726 '--lockbox-uuid',
2727 metavar='UUID',
2728 help='unique lockbox uuid',
2729 )
2730 return parser
2731
2732 def create_partition(self):
2733 self.device = Device.factory(self.args.lockbox, argparse.Namespace())
2734 partition_number = 5
2735 self.device.create_partition(uuid=self.args.lockbox_uuid,
2736 name='lockbox',
2737 num=partition_number,
2738 size=10) # MB
2739 return self.device.get_partition(partition_number)
2740
2741 def set_or_create_partition(self):
2742 if is_partition(self.args.lockbox):
2743 LOG.debug('OSD lockbox device %s is a partition',
2744 self.args.lockbox)
2745 self.partition = DevicePartition.factory(
2746 path=None, dev=self.args.lockbox, args=self.args)
2747 ptype = self.partition.get_ptype()
2748 ready = Ptype.get_ready_by_name('lockbox')
2749 if ptype not in ready:
2750 LOG.warning('incorrect partition UUID: %s, expected %s'
2751 % (ptype, str(ready)))
2752 else:
2753 LOG.debug('Creating osd partition on %s',
2754 self.args.lockbox)
2755 self.partition = self.create_partition()
2756
2757 def create_key(self):
2758 cluster = self.args.cluster
2759 bootstrap = self.args.prepare_key_template.format(cluster=cluster,
2760 statedir=STATEDIR)
2761 path = self.get_mount_point()
2762 secrets = LockboxSecrets(self.args)
2763 id_arg = self.args.osd_id and [self.args.osd_id] or []
2764 osd_id = command_with_stdin(
2765 [
2766 'ceph',
2767 '--cluster', cluster,
2768 '--name', 'client.bootstrap-osd',
2769 '--keyring', bootstrap,
2770 '-i', '-',
2771 'osd', 'new', self.args.osd_uuid,
2772 ] + id_arg,
2773 secrets.get_json()
2774 )
2775 secrets.write_lockbox_keyring(path, self.args.osd_uuid)
2776 osd_id = must_be_one_line(osd_id)
2777 check_osd_id(osd_id)
2778 write_one_line(path, 'whoami', osd_id)
2779 secrets.write_osd_keyring(os.path.join(path, 'osd_keyring'), osd_id)
2780 write_one_line(path, 'key-management-mode', KEY_MANAGEMENT_MODE_V1)
2781
2782 def symlink_spaces(self, path):
2783 target = self.get_mount_point()
2784 for name in Space.NAMES:
2785 if (hasattr(self.args, name + '_uuid') and
2786 getattr(self.args, name + '_uuid')):
2787 uuid = getattr(self.args, name + '_uuid')
2788 symlink = os.path.join(STATEDIR, 'osd-lockbox', uuid)
2789 adjust_symlink(target, symlink)
2790 write_one_line(path, name + '-uuid', uuid)
2791
2792 def populate(self):
2793 maybe_mkdir(os.path.join(STATEDIR, 'osd-lockbox'))
2794 args = ['mkfs', '-t', 'ext4', self.partition.get_dev()]
2795 LOG.debug('Creating lockbox fs on %s: ' + str(" ".join(args)))
2796 command_check_call(args)
2797 path = self.get_mount_point()
2798 maybe_mkdir(path)
2799 args = ['mount', '-t', 'ext4', self.partition.get_dev(), path]
2800 LOG.debug('Mounting lockbox ' + str(" ".join(args)))
2801 command_check_call(args)
2802 write_one_line(path, 'osd-uuid', self.args.osd_uuid)
2803 if self.args.cluster_uuid is None:
2804 self.args.cluster_uuid = get_fsid(cluster=self.args.cluster)
2805 write_one_line(path, 'ceph_fsid', self.args.cluster_uuid)
2806 self.create_key()
2807 self.symlink_spaces(path)
2808 write_one_line(path, 'magic', CEPH_LOCKBOX_ONDISK_MAGIC)
2809 if self.device is not None:
2810 command_check_call(
2811 [
2812 'sgdisk',
2813 '--typecode={num}:{uuid}'.format(
2814 num=self.partition.get_partition_number(),
2815 uuid=self.partition.ptype_for_name('lockbox'),
2816 ),
2817 '--',
2818 get_partition_base(self.partition.get_dev()),
2819 ],
2820 )
2821
2822 def get_mount_point(self):
2823 return os.path.join(STATEDIR, 'osd-lockbox', self.args.osd_uuid)
2824
2825 def get_osd_uuid(self):
2826 return self.args.osd_uuid
2827
2828 def activate(self):
2829 path = is_mounted(self.partition.get_dev())
2830 if path:
2831 LOG.info("Lockbox already mounted at " + path)
2832 return
2833
2834 path = tempfile.mkdtemp(
2835 prefix='mnt.',
2836 dir=STATEDIR + '/tmp',
2837 )
2838 args = ['mount', '-t', 'ext4', '-o', 'ro',
2839 self.partition.get_dev(),
2840 path]
2841 LOG.debug('Mounting lockbox temporarily ' + str(" ".join(args)))
2842 command_check_call(args)
2843 self.args.osd_uuid = get_oneliner(path, 'osd-uuid')
2844 command_check_call(['umount', path])
2845 LOG.debug('Mounting lockbox readonly ' + str(" ".join(args)))
2846 args = ['mount', '-t', 'ext4', '-o', 'ro',
2847 self.partition.get_dev(),
2848 self.get_mount_point()]
2849 command_check_call(args)
2850 for name in Space.NAMES + ('osd',):
2851 uuid_path = os.path.join(self.get_mount_point(), name + '-uuid')
2852 if os.path.exists(uuid_path):
2853 uuid = get_oneliner(self.get_mount_point(), name + '-uuid')
2854 dev = os.path.join('/dev/disk/by-partuuid/', uuid.lower())
2855 args = ['ceph-disk', 'trigger', dev]
2856 command_check_call(args)
2857
2858 def prepare(self):
2859 verify_not_in_use(self.args.lockbox, check_partitions=True)
2860 self.set_or_create_partition()
2861 self.populate()
2862
2863
2864 class PrepareData(object):
2865
2866 FILE = 1
2867 DEVICE = 2
2868
2869 def __init__(self, args):
2870
2871 self.args = args
2872 self.partition = None
2873 self.set_type()
2874 if self.args.cluster_uuid is None:
2875 self.args.cluster_uuid = get_fsid(cluster=self.args.cluster)
2876
2877 if self.args.osd_uuid is None:
2878 self.args.osd_uuid = str(uuid.uuid4())
2879
2880 def set_type(self):
2881 dmode = os.stat(self.args.data).st_mode
2882
2883 if stat.S_ISDIR(dmode):
2884 self.type = self.FILE
2885 elif stmode_is_diskdevice(dmode):
2886 self.type = self.DEVICE
2887 else:
2888 raise Error('not a dir or block device', self.args.data)
2889
2890 def is_file(self):
2891 return self.type == self.FILE
2892
2893 def is_device(self):
2894 return self.type == self.DEVICE
2895
2896 @staticmethod
2897 def parser():
2898 parser = argparse.ArgumentParser(add_help=False)
2899 parser.add_argument(
2900 '--fs-type',
2901 help='file system type to use (e.g. "ext4")',
2902 )
2903 parser.add_argument(
2904 '--zap-disk',
2905 action='store_true', default=None,
2906 help='destroy the partition table (and content) of a disk',
2907 )
2908 parser.add_argument(
2909 '--data-dir',
2910 action='store_true', default=None,
2911 help='verify that DATA is a dir',
2912 )
2913 parser.add_argument(
2914 '--data-dev',
2915 action='store_true', default=None,
2916 help='verify that DATA is a block device',
2917 )
2918 parser.add_argument(
2919 'data',
2920 metavar='DATA',
2921 help='path to OSD data (a disk block device or directory)',
2922 )
2923 return parser
2924
2925 def populate_data_path_file(self, path, *to_prepare_list):
2926 self.populate_data_path(path, *to_prepare_list)
2927
2928 def populate_data_path(self, path, *to_prepare_list):
2929 if os.path.exists(os.path.join(path, 'magic')):
2930 LOG.debug('Data dir %s already exists', path)
2931 return
2932 else:
2933 LOG.debug('Preparing osd data dir %s', path)
2934
2935 if self.args.osd_uuid is None:
2936 self.args.osd_uuid = str(uuid.uuid4())
2937
2938 write_one_line(path, 'ceph_fsid', self.args.cluster_uuid)
2939 write_one_line(path, 'fsid', self.args.osd_uuid)
2940 if self.args.osd_id:
2941 write_one_line(path, 'wanttobe', self.args.osd_id)
2942 if self.args.crush_device_class:
2943 write_one_line(path, 'crush_device_class',
2944 self.args.crush_device_class)
2945 write_one_line(path, 'magic', CEPH_OSD_ONDISK_MAGIC)
2946
2947 for to_prepare in to_prepare_list:
2948 to_prepare.populate_data_path(path)
2949
2950 def prepare(self, *to_prepare_list):
2951 if self.type == self.DEVICE:
2952 self.prepare_device(*to_prepare_list)
2953 elif self.type == self.FILE:
2954 self.prepare_file(*to_prepare_list)
2955 else:
2956 raise Error('unexpected type ', self.type)
2957
2958 def prepare_file(self, *to_prepare_list):
2959
2960 if not os.path.exists(self.args.data):
2961 raise Error('data path for directory does not exist',
2962 self.args.data)
2963
2964 if self.args.data_dev:
2965 raise Error('data path is not a block device', self.args.data)
2966
2967 for to_prepare in to_prepare_list:
2968 to_prepare.prepare()
2969
2970 self.populate_data_path_file(self.args.data, *to_prepare_list)
2971
2972 def sanity_checks(self):
2973 if not os.path.exists(self.args.data):
2974 raise Error('data path for device does not exist',
2975 self.args.data)
2976 verify_not_in_use(self.args.data,
2977 check_partitions=not self.args.dmcrypt)
2978
2979 def set_variables(self):
2980 if self.args.fs_type is None:
2981 self.args.fs_type = get_conf(
2982 cluster=self.args.cluster,
2983 variable='osd_mkfs_type',
2984 )
2985 if self.args.fs_type is None:
2986 self.args.fs_type = get_conf(
2987 cluster=self.args.cluster,
2988 variable='osd_fs_type',
2989 )
2990 if self.args.fs_type is None:
2991 self.args.fs_type = DEFAULT_FS_TYPE
2992
2993 self.mkfs_args = get_conf(
2994 cluster=self.args.cluster,
2995 variable='osd_mkfs_options_{fstype}'.format(
2996 fstype=self.args.fs_type,
2997 ),
2998 )
2999 if self.mkfs_args is None:
3000 self.mkfs_args = get_conf(
3001 cluster=self.args.cluster,
3002 variable='osd_fs_mkfs_options_{fstype}'.format(
3003 fstype=self.args.fs_type,
3004 ),
3005 )
3006
3007 self.mount_options = get_mount_options(cluster=self.args.cluster,
3008 fs_type=self.args.fs_type)
3009
3010 if self.args.osd_uuid is None:
3011 self.args.osd_uuid = str(uuid.uuid4())
3012
3013 def prepare_device(self, *to_prepare_list):
3014 self.sanity_checks()
3015 self.set_variables()
3016 if self.args.zap_disk is not None:
3017 zap(self.args.data)
3018
3019 def create_data_partition(self):
3020 device = Device.factory(self.args.data, self.args)
3021 partition_number = 1
3022 device.create_partition(uuid=self.args.osd_uuid,
3023 name='data',
3024 num=partition_number,
3025 size=self.get_space_size())
3026 return device.get_partition(partition_number)
3027
3028 def set_data_partition(self):
3029 if is_partition(self.args.data):
3030 LOG.debug('OSD data device %s is a partition',
3031 self.args.data)
3032 self.partition = DevicePartition.factory(
3033 path=None, dev=self.args.data, args=self.args)
3034 ptype = self.partition.get_ptype()
3035 ready = Ptype.get_ready_by_name('osd')
3036 if ptype not in ready:
3037 LOG.warning('incorrect partition UUID: %s, expected %s'
3038 % (ptype, str(ready)))
3039 else:
3040 LOG.debug('Creating osd partition on %s',
3041 self.args.data)
3042 self.partition = self.create_data_partition()
3043
3044 def populate_data_path_device(self, *to_prepare_list):
3045 partition = self.partition
3046
3047 if isinstance(partition, DevicePartitionCrypt):
3048 partition.map()
3049
3050 try:
3051 args = [
3052 'mkfs',
3053 '-t',
3054 self.args.fs_type,
3055 ]
3056 if self.mkfs_args is not None:
3057 args.extend(self.mkfs_args.split())
3058 if self.args.fs_type == 'xfs':
3059 args.extend(['-f']) # always force
3060 else:
3061 args.extend(MKFS_ARGS.get(self.args.fs_type, []))
3062 args.extend([
3063 '--',
3064 partition.get_dev(),
3065 ])
3066 LOG.debug('Creating %s fs on %s',
3067 self.args.fs_type, partition.get_dev())
3068 command_check_call(args, exit=True)
3069
3070 path = mount(dev=partition.get_dev(),
3071 fstype=self.args.fs_type,
3072 options=self.mount_options)
3073
3074 try:
3075 self.populate_data_path(path, *to_prepare_list)
3076 finally:
3077 path_set_context(path)
3078 unmount(path)
3079 finally:
3080 if isinstance(partition, DevicePartitionCrypt):
3081 partition.unmap()
3082
3083 if not is_partition(self.args.data):
3084 command_check_call(
3085 [
3086 'sgdisk',
3087 '--typecode=%d:%s' % (partition.get_partition_number(),
3088 partition.ptype_for_name('osd')),
3089 '--',
3090 self.args.data,
3091 ],
3092 exit=True,
3093 )
3094 update_partition(self.args.data, 'prepared')
3095 command_check_call(['udevadm', 'trigger',
3096 '--action=add',
3097 '--sysname-match',
3098 os.path.basename(partition.rawdev)])
3099
3100
3101 class PrepareFilestoreData(PrepareData):
3102
3103 def get_space_size(self):
3104 return 0 # get as much space as possible
3105
3106 def prepare_device(self, *to_prepare_list):
3107 super(PrepareFilestoreData, self).prepare_device(*to_prepare_list)
3108 for to_prepare in to_prepare_list:
3109 to_prepare.prepare()
3110 self.set_data_partition()
3111 self.populate_data_path_device(*to_prepare_list)
3112
3113 def populate_data_path(self, path, *to_prepare_list):
3114 super(PrepareFilestoreData, self).populate_data_path(path,
3115 *to_prepare_list)
3116 write_one_line(path, 'type', 'filestore')
3117
3118
3119 class PrepareBluestoreData(PrepareData):
3120 SPACE_SIZE = 100
3121
3122 def get_space_size(self):
3123 return self.SPACE_SIZE # MB
3124
3125 def prepare_device(self, *to_prepare_list):
3126 super(PrepareBluestoreData, self).prepare_device(*to_prepare_list)
3127 self.set_data_partition()
3128 for to_prepare in to_prepare_list:
3129 to_prepare.prepare()
3130 self.populate_data_path_device(*to_prepare_list)
3131
3132 def populate_data_path(self, path, *to_prepare_list):
3133 super(PrepareBluestoreData, self).populate_data_path(path,
3134 *to_prepare_list)
3135 write_one_line(path, 'type', 'bluestore')
3136
3137
3138 def mkfs(
3139 path,
3140 cluster,
3141 osd_id,
3142 fsid,
3143 keyring,
3144 ):
3145 monmap = os.path.join(path, 'activate.monmap')
3146 command_check_call(
3147 [
3148 'ceph',
3149 '--cluster', cluster,
3150 '--name', 'client.bootstrap-osd',
3151 '--keyring', keyring,
3152 'mon', 'getmap', '-o', monmap,
3153 ],
3154 )
3155
3156 osd_type = read_one_line(path, 'type')
3157
3158 if osd_type == 'bluestore':
3159 command_check_call(
3160 [
3161 'ceph-osd',
3162 '--cluster', cluster,
3163 '--mkfs',
3164 '-i', osd_id,
3165 '--monmap', monmap,
3166 '--osd-data', path,
3167 '--osd-uuid', fsid,
3168 '--setuser', get_ceph_user(),
3169 '--setgroup', get_ceph_group(),
3170 ],
3171 )
3172 elif osd_type == 'filestore':
3173 command_check_call(
3174 [
3175 'ceph-osd',
3176 '--cluster', cluster,
3177 '--mkfs',
3178 '-i', osd_id,
3179 '--monmap', monmap,
3180 '--osd-data', path,
3181 '--osd-journal', os.path.join(path, 'journal'),
3182 '--osd-uuid', fsid,
3183 '--setuser', get_ceph_user(),
3184 '--setgroup', get_ceph_group(),
3185 ],
3186 )
3187 else:
3188 raise Error('unrecognized objectstore type %s' % osd_type)
3189
3190
3191 def get_mount_point(cluster, osd_id):
3192 parent = STATEDIR + '/osd'
3193 return os.path.join(
3194 parent,
3195 '{cluster}-{osd_id}'.format(cluster=cluster, osd_id=osd_id),
3196 )
3197
3198
3199 def move_mount(
3200 dev,
3201 path,
3202 cluster,
3203 osd_id,
3204 fstype,
3205 mount_options,
3206 ):
3207 LOG.debug('Moving mount to final location...')
3208 osd_data = get_mount_point(cluster, osd_id)
3209 maybe_mkdir(osd_data)
3210
3211 # pick best-of-breed mount options based on fs type
3212 if mount_options is None:
3213 mount_options = MOUNT_OPTIONS.get(fstype, '')
3214
3215 # we really want to mount --move, but that is not supported when
3216 # the parent mount is shared, as it is by default on RH, Fedora,
3217 # and probably others. Also, --bind doesn't properly manipulate
3218 # /etc/mtab, which *still* isn't a symlink to /proc/mounts despite
3219 # this being 2013. Instead, mount the original device at the final
3220 # location.
3221 command_check_call(
3222 [
3223 '/bin/mount',
3224 '-o',
3225 mount_options,
3226 '--',
3227 dev,
3228 osd_data,
3229 ],
3230 )
3231 command_check_call(
3232 [
3233 '/bin/umount',
3234 '-l', # lazy, in case someone else is peeking at the
3235 # wrong moment
3236 '--',
3237 path,
3238 ],
3239 )
3240
3241
3242 #
3243 # For upgrade purposes, to make sure there are no competing units,
3244 # both --runtime unit and the default should be disabled. There can be
3245 # two units at the same time: one with --runtime and another without
3246 # it. If, for any reason (manual or ceph-disk) the two units co-exist
3247 # they will compete with each other.
3248 #
3249 def systemd_disable(
3250 path,
3251 osd_id,
3252 ):
3253 # ensure there is no duplicate ceph-osd@.service
3254 for style in ([], ['--runtime']):
3255 command_check_call(
3256 [
3257 'systemctl',
3258 'disable',
3259 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3260 ] + style,
3261 )
3262
3263
3264 def systemd_start(
3265 path,
3266 osd_id,
3267 ):
3268 systemd_disable(path, osd_id)
3269 if os.path.ismount(path):
3270 style = ['--runtime']
3271 else:
3272 style = []
3273 command_check_call(
3274 [
3275 'systemctl',
3276 'enable',
3277 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3278 ] + style,
3279 )
3280 command_check_call(
3281 [
3282 'systemctl',
3283 'start',
3284 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3285 ],
3286 )
3287
3288
3289 def systemd_stop(
3290 path,
3291 osd_id,
3292 ):
3293 systemd_disable(path, osd_id)
3294 command_check_call(
3295 [
3296 'systemctl',
3297 'stop',
3298 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3299 ],
3300 )
3301
3302
3303 def start_daemon(
3304 cluster,
3305 osd_id,
3306 ):
3307 LOG.debug('Starting %s osd.%s...', cluster, osd_id)
3308
3309 path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3310 cluster=cluster, osd_id=osd_id)
3311
3312 try:
3313 if os.path.exists(os.path.join(path, 'upstart')):
3314 command_check_call(
3315 [
3316 '/sbin/initctl',
3317 # use emit, not start, because start would fail if the
3318 # instance was already running
3319 'emit',
3320 # since the daemon starting doesn't guarantee much about
3321 # the service being operational anyway, don't bother
3322 # waiting for it
3323 '--no-wait',
3324 '--',
3325 'ceph-osd',
3326 'cluster={cluster}'.format(cluster=cluster),
3327 'id={osd_id}'.format(osd_id=osd_id),
3328 ],
3329 )
3330 elif os.path.exists(os.path.join(path, 'sysvinit')):
3331 if os.path.exists('/usr/sbin/service'):
3332 svc = '/usr/sbin/service'
3333 else:
3334 svc = '/sbin/service'
3335 command_check_call(
3336 [
3337 svc,
3338 'ceph',
3339 '--cluster',
3340 '{cluster}'.format(cluster=cluster),
3341 'start',
3342 'osd.{osd_id}'.format(osd_id=osd_id),
3343 ],
3344 )
3345 elif os.path.exists(os.path.join(path, 'systemd')):
3346 systemd_start(path, osd_id)
3347 elif os.path.exists(os.path.join(path, 'openrc')):
3348 base_script = '/etc/init.d/ceph-osd'
3349 osd_script = '{base}.{osd_id}'.format(
3350 base=base_script,
3351 osd_id=osd_id
3352 )
3353 if not os.path.exists(osd_script):
3354 os.symlink(base_script, osd_script)
3355 command_check_call(
3356 [
3357 osd_script,
3358 'start',
3359 ],
3360 )
3361 elif os.path.exists(os.path.join(path, 'bsdrc')):
3362 command_check_call(
3363 [
3364 '/usr/sbin/service', 'ceph', 'start',
3365 'osd.{osd_id}'.format(osd_id=osd_id),
3366 ],
3367 )
3368 else:
3369 raise Error('{cluster} osd.{osd_id} '
3370 'is not tagged with an init system'
3371 .format(
3372 cluster=cluster,
3373 osd_id=osd_id,
3374 ))
3375 except subprocess.CalledProcessError as e:
3376 raise Error('ceph osd start failed', e)
3377
3378
3379 def stop_daemon(
3380 cluster,
3381 osd_id,
3382 ):
3383 LOG.debug('Stoping %s osd.%s...', cluster, osd_id)
3384
3385 path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3386 cluster=cluster, osd_id=osd_id)
3387
3388 try:
3389 if os.path.exists(os.path.join(path, 'upstart')):
3390 command_check_call(
3391 [
3392 '/sbin/initctl',
3393 'stop',
3394 'ceph-osd',
3395 'cluster={cluster}'.format(cluster=cluster),
3396 'id={osd_id}'.format(osd_id=osd_id),
3397 ],
3398 )
3399 elif os.path.exists(os.path.join(path, 'sysvinit')):
3400 svc = which('service')
3401 command_check_call(
3402 [
3403 svc,
3404 'ceph',
3405 '--cluster',
3406 '{cluster}'.format(cluster=cluster),
3407 'stop',
3408 'osd.{osd_id}'.format(osd_id=osd_id),
3409 ],
3410 )
3411 elif os.path.exists(os.path.join(path, 'systemd')):
3412 systemd_stop(path, osd_id)
3413 elif os.path.exists(os.path.join(path, 'openrc')):
3414 command_check_call(
3415 [
3416 '/etc/init.d/ceph-osd.{osd_id}'.format(osd_id=osd_id),
3417 'stop',
3418 ],
3419 )
3420 elif os.path.exists(os.path.join(path, 'bsdrc')):
3421 command_check_call(
3422 [
3423 '/usr/local/etc/rc.d/ceph stop osd.{osd_id}'
3424 .format(osd_id=osd_id),
3425 ],
3426 )
3427 else:
3428 raise Error('{cluster} osd.{osd_id} '
3429 'is not tagged with an init system'
3430 .format(cluster=cluster, osd_id=osd_id))
3431 except subprocess.CalledProcessError as e:
3432 raise Error('ceph osd stop failed', e)
3433
3434
3435 def detect_fstype(dev):
3436 if FREEBSD:
3437 fstype = _check_output(
3438 args=[
3439 'fstyp',
3440 '-u',
3441 dev,
3442 ],
3443 )
3444 else:
3445 fstype = _check_output(
3446 args=[
3447 '/sbin/blkid',
3448 # we don't want stale cached results
3449 '-p',
3450 '-s', 'TYPE',
3451 '-o', 'value',
3452 '--',
3453 dev,
3454 ],
3455 )
3456 fstype = must_be_one_line(fstype)
3457 return fstype
3458
3459
3460 def dmcrypt_is_mapped(uuid):
3461 path = os.path.join('/dev/mapper', uuid)
3462 if os.path.exists(path):
3463 return path
3464 else:
3465 return None
3466
3467
3468 def dmcrypt_map(dev, dmcrypt_key_dir):
3469 ptype = get_partition_type(dev)
3470 if ptype in Ptype.get_ready_by_type('plain'):
3471 luks = False
3472 cryptsetup_parameters = ['--key-size', '256']
3473 elif ptype in Ptype.get_ready_by_type('luks'):
3474 luks = True
3475 cryptsetup_parameters = []
3476 else:
3477 raise Error('--dmcrypt called for dev %s with invalid ptype %s'
3478 % (dev, ptype))
3479 part_uuid = get_partition_uuid(dev)
3480 dmcrypt_key = get_dmcrypt_key(part_uuid, dmcrypt_key_dir, luks)
3481 return _dmcrypt_map(
3482 rawdev=dev,
3483 key=dmcrypt_key,
3484 _uuid=part_uuid,
3485 cryptsetup_parameters=cryptsetup_parameters,
3486 luks=luks,
3487 format_dev=False,
3488 )
3489
3490
3491 def mount_activate(
3492 dev,
3493 activate_key_template,
3494 init,
3495 dmcrypt,
3496 dmcrypt_key_dir,
3497 reactivate=False,
3498 ):
3499
3500 if dmcrypt:
3501 part_uuid = get_partition_uuid(dev)
3502 dev = dmcrypt_map(dev, dmcrypt_key_dir)
3503 try:
3504 fstype = detect_fstype(dev=dev)
3505 except (subprocess.CalledProcessError,
3506 TruncatedLineError,
3507 TooManyLinesError) as e:
3508 raise FilesystemTypeError(
3509 'device {dev}'.format(dev=dev),
3510 e,
3511 )
3512
3513 # TODO always using mount options from cluster=ceph for
3514 # now; see http://tracker.newdream.net/issues/3253
3515 mount_options = get_mount_options(cluster='ceph', fs_type=fstype)
3516
3517 path = mount(dev=dev, fstype=fstype, options=mount_options)
3518
3519 # check if the disk is deactive, change the journal owner, group
3520 # mode for correct user and group.
3521 if os.path.exists(os.path.join(path, 'deactive')):
3522 # logging to syslog will help us easy to know udev triggered failure
3523 if not reactivate:
3524 unmount(path)
3525 # we need to unmap again because dmcrypt map will create again
3526 # on bootup stage (due to deactivate)
3527 if '/dev/mapper/' in dev:
3528 part_uuid = dev.replace('/dev/mapper/', '')
3529 dmcrypt_unmap(part_uuid)
3530 LOG.info('OSD deactivated! reactivate with: --reactivate')
3531 raise Error('OSD deactivated! reactivate with: --reactivate')
3532 # flag to activate a deactive osd.
3533 deactive = True
3534 else:
3535 deactive = False
3536
3537 osd_id = None
3538 cluster = None
3539 try:
3540 (osd_id, cluster) = activate(path, activate_key_template, init)
3541
3542 # Now active successfully
3543 # If we got reactivate and deactive, remove the deactive file
3544 if deactive and reactivate:
3545 os.remove(os.path.join(path, 'deactive'))
3546 LOG.info('Remove `deactive` file.')
3547
3548 # check if the disk is already active, or if something else is already
3549 # mounted there
3550 active = False
3551 other = False
3552 src_dev = os.stat(path).st_dev
3553 try:
3554 dst_dev = os.stat((STATEDIR + '/osd/{cluster}-{osd_id}').format(
3555 cluster=cluster,
3556 osd_id=osd_id)).st_dev
3557 if src_dev == dst_dev:
3558 active = True
3559 else:
3560 parent_dev = os.stat(STATEDIR + '/osd').st_dev
3561 if dst_dev != parent_dev:
3562 other = True
3563 elif os.listdir(get_mount_point(cluster, osd_id)):
3564 LOG.info(get_mount_point(cluster, osd_id) +
3565 " is not empty, won't override")
3566 other = True
3567
3568 except OSError:
3569 pass
3570
3571 if active:
3572 LOG.info('%s osd.%s already mounted in position; unmounting ours.'
3573 % (cluster, osd_id))
3574 unmount(path)
3575 elif other:
3576 raise Error('another %s osd.%s already mounted in position '
3577 '(old/different cluster instance?); unmounting ours.'
3578 % (cluster, osd_id))
3579 else:
3580 move_mount(
3581 dev=dev,
3582 path=path,
3583 cluster=cluster,
3584 osd_id=osd_id,
3585 fstype=fstype,
3586 mount_options=mount_options,
3587 )
3588 return cluster, osd_id
3589
3590 except:
3591 LOG.error('Failed to activate')
3592 unmount(path)
3593 raise
3594 finally:
3595 # remove our temp dir
3596 if os.path.exists(path):
3597 os.rmdir(path)
3598
3599
3600 def activate_dir(
3601 path,
3602 activate_key_template,
3603 init,
3604 ):
3605
3606 if not os.path.exists(path):
3607 raise Error(
3608 'directory %s does not exist' % path
3609 )
3610
3611 (osd_id, cluster) = activate(path, activate_key_template, init)
3612
3613 if init not in (None, 'none'):
3614 canonical = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3615 cluster=cluster,
3616 osd_id=osd_id)
3617 if path != canonical:
3618 # symlink it from the proper location
3619 create = True
3620 if os.path.lexists(canonical):
3621 old = os.readlink(canonical)
3622 if old != path:
3623 LOG.debug('Removing old symlink %s -> %s', canonical, old)
3624 try:
3625 os.unlink(canonical)
3626 except:
3627 raise Error('unable to remove old symlink', canonical)
3628 else:
3629 create = False
3630 if create:
3631 LOG.debug('Creating symlink %s -> %s', canonical, path)
3632 try:
3633 os.symlink(path, canonical)
3634 except:
3635 raise Error('unable to create symlink %s -> %s'
3636 % (canonical, path))
3637
3638 return cluster, osd_id
3639
3640
3641 def find_cluster_by_uuid(_uuid):
3642 """
3643 Find a cluster name by searching /etc/ceph/*.conf for a conf file
3644 with the right uuid.
3645 """
3646 _uuid = _uuid.lower()
3647 no_fsid = []
3648 if not os.path.exists(SYSCONFDIR):
3649 return None
3650 for conf_file in os.listdir(SYSCONFDIR):
3651 if not conf_file.endswith('.conf'):
3652 continue
3653 cluster = conf_file[:-5]
3654 try:
3655 fsid = get_fsid(cluster)
3656 except Error as e:
3657 if 'getting cluster uuid from configuration failed' not in str(e):
3658 raise e
3659 no_fsid.append(cluster)
3660 else:
3661 if fsid == _uuid:
3662 return cluster
3663 # be tolerant of /etc/ceph/ceph.conf without an fsid defined.
3664 if len(no_fsid) == 1 and no_fsid[0] == 'ceph':
3665 LOG.warning('No fsid defined in ' + SYSCONFDIR +
3666 '/ceph.conf; using anyway')
3667 return 'ceph'
3668 return None
3669
3670
3671 def activate(
3672 path,
3673 activate_key_template,
3674 init,
3675 ):
3676
3677 check_osd_magic(path)
3678
3679 ceph_fsid = read_one_line(path, 'ceph_fsid')
3680 if ceph_fsid is None:
3681 raise Error('No cluster uuid assigned.')
3682 LOG.debug('Cluster uuid is %s', ceph_fsid)
3683
3684 cluster = find_cluster_by_uuid(ceph_fsid)
3685 if cluster is None:
3686 raise Error('No cluster conf found in ' + SYSCONFDIR +
3687 ' with fsid %s' % ceph_fsid)
3688 LOG.debug('Cluster name is %s', cluster)
3689
3690 fsid = read_one_line(path, 'fsid')
3691 if fsid is None:
3692 raise Error('No OSD uuid assigned.')
3693 LOG.debug('OSD uuid is %s', fsid)
3694
3695 keyring = activate_key_template.format(cluster=cluster,
3696 statedir=STATEDIR)
3697
3698 osd_id = get_osd_id(path)
3699 if osd_id is None:
3700 osd_id = allocate_osd_id(
3701 cluster=cluster,
3702 fsid=fsid,
3703 keyring=keyring,
3704 path=path,
3705 )
3706 write_one_line(path, 'whoami', osd_id)
3707 LOG.debug('OSD id is %s', osd_id)
3708
3709 if not os.path.exists(os.path.join(path, 'ready')):
3710 LOG.debug('Initializing OSD...')
3711 # re-running mkfs is safe, so just run until it completes
3712 mkfs(
3713 path=path,
3714 cluster=cluster,
3715 osd_id=osd_id,
3716 fsid=fsid,
3717 keyring=keyring,
3718 )
3719
3720 if init not in (None, 'none'):
3721 if init == 'auto':
3722 conf_val = get_conf(
3723 cluster=cluster,
3724 variable='init'
3725 )
3726 if conf_val is not None:
3727 init = conf_val
3728 else:
3729 init = init_get()
3730
3731 LOG.debug('Marking with init system %s', init)
3732 init_path = os.path.join(path, init)
3733 with open(init_path, 'w'):
3734 path_set_context(init_path)
3735
3736 # remove markers for others, just in case.
3737 for other in INIT_SYSTEMS:
3738 if other != init:
3739 try:
3740 os.unlink(os.path.join(path, other))
3741 except OSError:
3742 pass
3743
3744 if not os.path.exists(os.path.join(path, 'active')):
3745 write_one_line(path, 'active', 'ok')
3746 LOG.debug('%s osd.%s data dir is ready at %s', cluster, osd_id, path)
3747 return (osd_id, cluster)
3748
3749
3750 def main_activate(args):
3751 cluster = None
3752 osd_id = None
3753
3754 LOG.info('path = ' + str(args.path))
3755 if not os.path.exists(args.path):
3756 raise Error('%s does not exist' % args.path)
3757
3758 if is_suppressed(args.path):
3759 LOG.info('suppressed activate request on %s', args.path)
3760 return
3761
3762 with activate_lock:
3763 mode = os.stat(args.path).st_mode
3764 if stmode_is_diskdevice(mode):
3765 if (is_partition(args.path) and
3766 (get_partition_type(args.path) ==
3767 PTYPE['mpath']['osd']['ready']) and
3768 not is_mpath(args.path)):
3769 raise Error('%s is not a multipath block device' %
3770 args.path)
3771 (cluster, osd_id) = mount_activate(
3772 dev=args.path,
3773 activate_key_template=args.activate_key_template,
3774 init=args.mark_init,
3775 dmcrypt=args.dmcrypt,
3776 dmcrypt_key_dir=args.dmcrypt_key_dir,
3777 reactivate=args.reactivate,
3778 )
3779 osd_data = get_mount_point(cluster, osd_id)
3780
3781 args.cluster = cluster
3782 if args.dmcrypt:
3783 for name in Space.NAMES:
3784 # Check if encrypted device in journal
3785 dev_path = os.path.join(osd_data, name + '_dmcrypt')
3786 if not os.path.exists(dev_path):
3787 continue
3788 partition = DevicePartition.factory(
3789 path=None,
3790 dev=dev_path,
3791 args=args)
3792 partition.rawdev = args.path
3793 partition.map()
3794
3795 elif stat.S_ISDIR(mode):
3796 (cluster, osd_id) = activate_dir(
3797 path=args.path,
3798 activate_key_template=args.activate_key_template,
3799 init=args.mark_init,
3800 )
3801 osd_data = args.path
3802
3803 else:
3804 raise Error('%s is not a directory or block device' % args.path)
3805
3806 # exit with 0 if the journal device is not up, yet
3807 # journal device will do the activation
3808 osd_journal = '{path}/journal'.format(path=osd_data)
3809 if os.path.islink(osd_journal) and not os.access(osd_journal, os.F_OK):
3810 LOG.info("activate: Journal not present, not starting, yet")
3811 return
3812
3813 if (not args.no_start_daemon and args.mark_init == 'none'):
3814 command_check_call(
3815 [
3816 'ceph-osd',
3817 '--cluster={cluster}'.format(cluster=cluster),
3818 '--id={osd_id}'.format(osd_id=osd_id),
3819 '--osd-data={path}'.format(path=osd_data),
3820 '--osd-journal={journal}'.format(journal=osd_journal),
3821 ],
3822 )
3823
3824 if (not args.no_start_daemon and
3825 args.mark_init not in (None, 'none')):
3826
3827 start_daemon(
3828 cluster=cluster,
3829 osd_id=osd_id,
3830 )
3831
3832
3833 def main_activate_lockbox(args):
3834 with activate_lock:
3835 main_activate_lockbox_protected(args)
3836
3837
3838 def main_activate_lockbox_protected(args):
3839 partition = DevicePartition.factory(
3840 path=None, dev=args.path, args=args)
3841
3842 lockbox = Lockbox(args)
3843 lockbox.set_partition(partition)
3844 lockbox.activate()
3845
3846
3847 ###########################
3848
3849 def _mark_osd_out(cluster, osd_id):
3850 LOG.info('Prepare to mark osd.%d out...', osd_id)
3851 command([
3852 'ceph',
3853 'osd',
3854 'out',
3855 'osd.%d' % osd_id,
3856 ])
3857
3858
3859 def _check_osd_status(cluster, osd_id):
3860 """
3861 report the osd status:
3862 00(0) : means OSD OUT AND DOWN
3863 01(1) : means OSD OUT AND UP
3864 10(2) : means OSD IN AND DOWN
3865 11(3) : means OSD IN AND UP
3866 """
3867 LOG.info("Checking osd id: %s ..." % osd_id)
3868 found = False
3869 status_code = 0
3870 out, err, ret = command([
3871 'ceph',
3872 'osd',
3873 'dump',
3874 '--cluster={cluster}'.format(
3875 cluster=cluster,
3876 ),
3877 '--format',
3878 'json',
3879 ])
3880 out_json = json.loads(out)
3881 for item in out_json[u'osds']:
3882 if item.get(u'osd') == int(osd_id):
3883 found = True
3884 if item.get(u'in') is 1:
3885 status_code += 2
3886 if item.get(u'up') is 1:
3887 status_code += 1
3888 if not found:
3889 raise Error('Could not osd.%s in osd tree!' % osd_id)
3890 return status_code
3891
3892
3893 def _remove_osd_directory_files(mounted_path, cluster):
3894 """
3895 To remove the 'ready', 'active', INIT-specific files.
3896 """
3897 if os.path.exists(os.path.join(mounted_path, 'ready')):
3898 os.remove(os.path.join(mounted_path, 'ready'))
3899 LOG.info('Remove `ready` file.')
3900 else:
3901 LOG.info('`ready` file is already removed.')
3902
3903 if os.path.exists(os.path.join(mounted_path, 'active')):
3904 os.remove(os.path.join(mounted_path, 'active'))
3905 LOG.info('Remove `active` file.')
3906 else:
3907 LOG.info('`active` file is already removed.')
3908
3909 # Just check `upstart` and `sysvinit` directly if filename is init-spec.
3910 conf_val = get_conf(
3911 cluster=cluster,
3912 variable='init'
3913 )
3914 if conf_val is not None:
3915 init = conf_val
3916 else:
3917 init = init_get()
3918 os.remove(os.path.join(mounted_path, init))
3919 LOG.info('Remove `%s` file.', init)
3920 return
3921
3922
3923 def main_deactivate(args):
3924 with activate_lock:
3925 main_deactivate_locked(args)
3926
3927
3928 def main_deactivate_locked(args):
3929 osd_id = args.deactivate_by_id
3930 path = args.path
3931 target_dev = None
3932 dmcrypt = False
3933 devices = list_devices()
3934
3935 # list all devices and found we need
3936 for device in devices:
3937 if 'partitions' in device:
3938 for dev_part in device.get('partitions'):
3939 if (osd_id and
3940 'whoami' in dev_part and
3941 dev_part['whoami'] == osd_id):
3942 target_dev = dev_part
3943 elif (path and
3944 'path' in dev_part and
3945 dev_part['path'] == path):
3946 target_dev = dev_part
3947 if not target_dev:
3948 raise Error('Cannot find any match device!!')
3949
3950 # set up all we need variable
3951 osd_id = target_dev['whoami']
3952 part_type = target_dev['ptype']
3953 mounted_path = target_dev['mount']
3954 if Ptype.is_dmcrypt(part_type, 'osd'):
3955 dmcrypt = True
3956
3957 # Do not do anything if osd is already down.
3958 status_code = _check_osd_status(args.cluster, osd_id)
3959 if status_code == OSD_STATUS_IN_UP:
3960 if args.mark_out is True:
3961 _mark_osd_out(args.cluster, int(osd_id))
3962 stop_daemon(args.cluster, osd_id)
3963 elif status_code == OSD_STATUS_IN_DOWN:
3964 if args.mark_out is True:
3965 _mark_osd_out(args.cluster, int(osd_id))
3966 LOG.info("OSD already out/down. Do not do anything now.")
3967 return
3968 elif status_code == OSD_STATUS_OUT_UP:
3969 stop_daemon(args.cluster, osd_id)
3970 elif status_code == OSD_STATUS_OUT_DOWN:
3971 LOG.info("OSD already out/down. Do not do anything now.")
3972 return
3973
3974 if not args.once:
3975 # remove 'ready', 'active', and INIT-specific files.
3976 _remove_osd_directory_files(mounted_path, args.cluster)
3977
3978 # Write deactivate to osd directory!
3979 with open(os.path.join(mounted_path, 'deactive'), 'w'):
3980 path_set_context(os.path.join(mounted_path, 'deactive'))
3981
3982 unmount(mounted_path, do_rm=not args.once)
3983 LOG.info("Umount `%s` successfully.", mounted_path)
3984
3985 if dmcrypt:
3986 lockbox = os.path.join(STATEDIR, 'osd-lockbox')
3987 command(['umount', os.path.join(lockbox, target_dev['uuid'])])
3988
3989 dmcrypt_unmap(target_dev['uuid'])
3990 for name in Space.NAMES:
3991 if name + '_uuid' in target_dev:
3992 dmcrypt_unmap(target_dev[name + '_uuid'])
3993
3994 ###########################
3995
3996
3997 def _remove_lockbox(uuid, cluster):
3998 lockbox = os.path.join(STATEDIR, 'osd-lockbox')
3999 if not os.path.exists(lockbox):
4000 return
4001 canonical = os.path.join(lockbox, uuid)
4002 command(['umount', canonical])
4003 for name in os.listdir(lockbox):
4004 path = os.path.join(lockbox, name)
4005 if os.path.islink(path) and os.readlink(path) == canonical:
4006 os.unlink(path)
4007
4008
4009 def destroy_lookup_device(args, predicate, description):
4010 devices = list_devices()
4011 for device in devices:
4012 for partition in device.get('partitions', []):
4013 if partition['type'] == 'lockbox':
4014 if not is_mounted(partition['path']):
4015 main_activate_lockbox_protected(
4016 argparse.Namespace(verbose=args.verbose,
4017 path=partition['path']))
4018 for device in devices:
4019 for partition in device.get('partitions', []):
4020 if partition['dmcrypt']:
4021 dmcrypt_path = dmcrypt_is_mapped(partition['uuid'])
4022 if dmcrypt_path:
4023 unmap = False
4024 else:
4025 dmcrypt_path = dmcrypt_map(partition['path'],
4026 args.dmcrypt_key_dir)
4027 unmap = True
4028 list_dev_osd(dmcrypt_path, {}, partition)
4029 if unmap:
4030 dmcrypt_unmap(partition['uuid'])
4031 dmcrypt = True
4032 else:
4033 dmcrypt = False
4034 if predicate(partition):
4035 return dmcrypt, partition
4036 raise Error('found no device matching ', description)
4037
4038
4039 def main_destroy(args):
4040 with activate_lock:
4041 main_destroy_locked(args)
4042
4043
4044 def main_destroy_locked(args):
4045 osd_id = args.destroy_by_id
4046 path = args.path
4047 target_dev = None
4048
4049 if path:
4050 if not is_partition(path):
4051 raise Error(path + " must be a partition device")
4052 path = os.path.realpath(path)
4053
4054 if path:
4055 (dmcrypt, target_dev) = destroy_lookup_device(
4056 args, lambda x: x.get('path') == path,
4057 path)
4058 elif osd_id:
4059 (dmcrypt, target_dev) = destroy_lookup_device(
4060 args, lambda x: x.get('whoami') == osd_id,
4061 'osd id ' + str(osd_id))
4062
4063 osd_id = target_dev['whoami']
4064 dev_path = target_dev['path']
4065 if target_dev['ptype'] == PTYPE['mpath']['osd']['ready']:
4066 base_dev = get_partition_base_mpath(dev_path)
4067 else:
4068 base_dev = get_partition_base(dev_path)
4069
4070 # Before osd deactivate, we cannot destroy it
4071 status_code = _check_osd_status(args.cluster, osd_id)
4072 if status_code != OSD_STATUS_OUT_DOWN and \
4073 status_code != OSD_STATUS_IN_DOWN:
4074 raise Error("Could not destroy the active osd. (osd-id: %s)" %
4075 osd_id)
4076
4077 if args.purge:
4078 action = 'purge'
4079 else:
4080 action = 'destroy'
4081 LOG.info("Prepare to %s osd.%s" % (action, osd_id))
4082 command([
4083 'ceph',
4084 'osd',
4085 action,
4086 'osd.%s' % osd_id,
4087 '--yes-i-really-mean-it',
4088 ])
4089
4090 # we remove the crypt map and device mapper (if dmcrypt is True)
4091 if dmcrypt:
4092 for name in Space.NAMES:
4093 if target_dev.get(name + '_uuid'):
4094 dmcrypt_unmap(target_dev[name + '_uuid'])
4095 _remove_lockbox(target_dev['uuid'], args.cluster)
4096
4097 # Check zap flag. If we found zap flag, we need to find device for
4098 # destroy this osd data.
4099 if args.zap is True:
4100 # erase the osd data
4101 LOG.info("Prepare to zap the device %s" % base_dev)
4102 zap(base_dev)
4103
4104
4105 def get_space_osd_uuid(name, path):
4106 if not os.path.exists(path):
4107 raise Error('%s does not exist' % path)
4108
4109 if not path_is_diskdevice(path):
4110 raise Error('%s is not a block device' % path)
4111
4112 if (is_partition(path) and
4113 get_partition_type(path) in (PTYPE['mpath']['journal']['ready'],
4114 PTYPE['mpath']['block']['ready']) and
4115 not is_mpath(path)):
4116 raise Error('%s is not a multipath block device' %
4117 path)
4118
4119 try:
4120 out = _check_output(
4121 args=[
4122 'ceph-osd',
4123 '--get-device-fsid',
4124 path,
4125 ],
4126 close_fds=True,
4127 )
4128 except subprocess.CalledProcessError as e:
4129 raise Error(
4130 'failed to get osd uuid/fsid from %s' % name,
4131 e,
4132 )
4133 value = str(out).split('\n', 1)[0]
4134 LOG.debug('%s %s has OSD UUID %s', name.capitalize(), path, value)
4135 return value
4136
4137
4138 def main_activate_space(name, args):
4139 if not os.path.exists(args.dev):
4140 raise Error('%s does not exist' % args.dev)
4141
4142 if is_suppressed(args.dev):
4143 LOG.info('suppressed activate request on space %s', args.dev)
4144 return
4145
4146 cluster = None
4147 osd_id = None
4148 osd_uuid = None
4149 dev = None
4150 with activate_lock:
4151 if args.dmcrypt:
4152 dev = dmcrypt_map(args.dev, args.dmcrypt_key_dir)
4153 else:
4154 dev = args.dev
4155 # FIXME: For an encrypted journal dev, does this return the
4156 # cyphertext or plaintext dev uuid!? Also, if the journal is
4157 # encrypted, is the data partition also always encrypted, or
4158 # are mixed pairs supported!?
4159 osd_uuid = get_space_osd_uuid(name, dev)
4160 path = os.path.join('/dev/disk/by-partuuid/', osd_uuid.lower())
4161
4162 if is_suppressed(path):
4163 LOG.info('suppressed activate request on %s', path)
4164 return
4165
4166 # warn and exit with 0 if the data device is not up, yet
4167 # data device will do the activation
4168 if not os.access(path, os.F_OK):
4169 LOG.info("activate: OSD device not present, not starting, yet")
4170 return
4171
4172 (cluster, osd_id) = mount_activate(
4173 dev=path,
4174 activate_key_template=args.activate_key_template,
4175 init=args.mark_init,
4176 dmcrypt=args.dmcrypt,
4177 dmcrypt_key_dir=args.dmcrypt_key_dir,
4178 reactivate=args.reactivate,
4179 )
4180
4181 start_daemon(
4182 cluster=cluster,
4183 osd_id=osd_id,
4184 )
4185
4186
4187 ###########################
4188
4189
4190 def main_activate_all(args):
4191 dir = '/dev/disk/by-parttypeuuid'
4192 LOG.debug('Scanning %s', dir)
4193 if not os.path.exists(dir):
4194 return
4195 err = False
4196 for name in os.listdir(dir):
4197 if name.find('.') < 0:
4198 continue
4199 (tag, uuid) = name.split('.')
4200
4201 if tag in Ptype.get_ready_by_name('osd'):
4202
4203 if Ptype.is_dmcrypt(tag, 'osd'):
4204 path = os.path.join('/dev/mapper', uuid)
4205 else:
4206 path = os.path.join(dir, name)
4207
4208 if is_suppressed(path):
4209 LOG.info('suppressed activate request on %s', path)
4210 continue
4211
4212 LOG.info('Activating %s', path)
4213 with activate_lock:
4214 try:
4215 # never map dmcrypt cyphertext devices
4216 (cluster, osd_id) = mount_activate(
4217 dev=path,
4218 activate_key_template=args.activate_key_template,
4219 init=args.mark_init,
4220 dmcrypt=False,
4221 dmcrypt_key_dir='',
4222 )
4223 start_daemon(
4224 cluster=cluster,
4225 osd_id=osd_id,
4226 )
4227
4228 except Exception as e:
4229 print(
4230 '{prog}: {msg}'.format(prog=args.prog, msg=e),
4231 file=sys.stderr
4232 )
4233
4234 err = True
4235
4236 if err:
4237 raise Error('One or more partitions failed to activate')
4238
4239
4240 ###########################
4241
4242 def is_swap(dev):
4243 dev = os.path.realpath(dev)
4244 with open(PROCDIR + '/swaps', 'rb') as proc_swaps:
4245 for line in proc_swaps.readlines()[1:]:
4246 fields = line.split()
4247 if len(fields) < 3:
4248 continue
4249 swaps_dev = fields[0]
4250 if os.path.isabs(swaps_dev) and os.path.exists(swaps_dev):
4251 swaps_dev = os.path.realpath(swaps_dev)
4252 if swaps_dev == dev:
4253 return True
4254 return False
4255
4256
4257 def get_oneliner(base, name):
4258 path = os.path.join(base, name)
4259 if os.path.isfile(path):
4260 with open(path, 'rb') as _file:
4261 return _bytes2str(_file.readline().rstrip())
4262 return None
4263
4264
4265 def get_dev_fs(dev):
4266 if FREEBSD:
4267 fstype, _, ret = command(
4268 [
4269 'fstyp',
4270 '-u',
4271 dev,
4272 ],
4273 )
4274 if ret == 0:
4275 return fstype
4276 else:
4277 fscheck, _, _ = command(
4278 [
4279 'blkid',
4280 '-s',
4281 'TYPE',
4282 dev,
4283 ],
4284 )
4285 if 'TYPE' in fscheck:
4286 fstype = fscheck.split()[1].split('"')[1]
4287 return fstype
4288 return None
4289
4290
4291 def split_dev_base_partnum(dev):
4292 if is_mpath(dev):
4293 partnum = partnum_mpath(dev)
4294 base = get_partition_base_mpath(dev)
4295 else:
4296 b = block_path(dev)
4297 partnum = open(os.path.join(b, 'partition')).read().strip()
4298 base = get_partition_base(dev)
4299 return base, partnum
4300
4301
4302 def get_partition_type(part):
4303 return get_blkid_partition_info(part, 'ID_PART_ENTRY_TYPE')
4304
4305
4306 def get_partition_uuid(part):
4307 return get_blkid_partition_info(part, 'ID_PART_ENTRY_UUID')
4308
4309
4310 def get_blkid_partition_info(dev, what=None):
4311 out, _, _ = command(
4312 [
4313 'blkid',
4314 '-o',
4315 'udev',
4316 '-p',
4317 dev,
4318 ]
4319 )
4320 p = {}
4321 for line in out.splitlines():
4322 (key, value) = line.split('=')
4323 p[key] = value
4324 if what:
4325 return p.get(what)
4326 else:
4327 return p
4328
4329
4330 def more_osd_info(path, uuid_map, desc):
4331 desc['ceph_fsid'] = get_oneliner(path, 'ceph_fsid')
4332 if desc['ceph_fsid']:
4333 desc['cluster'] = find_cluster_by_uuid(desc['ceph_fsid'])
4334 desc['whoami'] = get_oneliner(path, 'whoami')
4335 for name in Space.NAMES:
4336 uuid = get_oneliner(path, name + '_uuid')
4337 if uuid:
4338 desc[name + '_uuid'] = uuid.lower()
4339 if desc[name + '_uuid'] in uuid_map:
4340 desc[name + '_dev'] = uuid_map[desc[name + '_uuid']]
4341
4342
4343 def list_dev_osd(dev, uuid_map, desc):
4344 desc['mount'] = is_mounted(dev)
4345 desc['fs_type'] = get_dev_fs(dev)
4346 desc['state'] = 'unprepared'
4347 if desc['mount']:
4348 desc['state'] = 'active'
4349 more_osd_info(desc['mount'], uuid_map, desc)
4350 elif desc['fs_type']:
4351 try:
4352 tpath = mount(dev=dev, fstype=desc['fs_type'], options='')
4353 if tpath:
4354 try:
4355 magic = get_oneliner(tpath, 'magic')
4356 if magic is not None:
4357 desc['magic'] = magic
4358 desc['state'] = 'prepared'
4359 more_osd_info(tpath, uuid_map, desc)
4360 finally:
4361 unmount(tpath)
4362 except MountError:
4363 pass
4364
4365
4366 def list_dev_lockbox(dev, uuid_map, desc):
4367 desc['mount'] = is_mounted(dev)
4368 desc['fs_type'] = get_dev_fs(dev)
4369 desc['state'] = 'unprepared'
4370 if desc['mount']:
4371 desc['state'] = 'active'
4372 desc['osd_uuid'] = get_oneliner(desc['mount'], 'osd-uuid')
4373 elif desc['fs_type']:
4374 try:
4375 tpath = tempfile.mkdtemp(prefix='mnt.', dir=STATEDIR + '/tmp')
4376 args = ['mount', '-t', 'ext4', dev, tpath]
4377 LOG.debug('Mounting lockbox ' + str(" ".join(args)))
4378 command_check_call(args)
4379 magic = get_oneliner(tpath, 'magic')
4380 if magic is not None:
4381 desc['magic'] = magic
4382 desc['state'] = 'prepared'
4383 desc['osd_uuid'] = get_oneliner(tpath, 'osd-uuid')
4384 unmount(tpath)
4385 except subprocess.CalledProcessError:
4386 pass
4387 if desc.get('osd_uuid') in uuid_map:
4388 desc['lockbox_for'] = uuid_map[desc['osd_uuid']]
4389
4390
4391 def list_format_lockbox_plain(dev):
4392 desc = []
4393 if dev.get('lockbox_for'):
4394 desc.append('for ' + dev['lockbox_for'])
4395 elif dev.get('osd_uuid'):
4396 desc.append('for osd ' + dev['osd_uuid'])
4397 return desc
4398
4399
4400 def list_format_more_osd_info_plain(dev):
4401 desc = []
4402 if dev.get('ceph_fsid'):
4403 if dev.get('cluster'):
4404 desc.append('cluster ' + dev['cluster'])
4405 else:
4406 desc.append('unknown cluster ' + dev['ceph_fsid'])
4407 if dev.get('whoami'):
4408 desc.append('osd.%s' % dev['whoami'])
4409 for name in Space.NAMES:
4410 if dev.get(name + '_dev'):
4411 desc.append(name + ' %s' % dev[name + '_dev'])
4412 return desc
4413
4414
4415 def list_format_dev_plain(dev, prefix=''):
4416 desc = []
4417 if dev['ptype'] == PTYPE['regular']['osd']['ready']:
4418 desc = (['ceph data', dev['state']] +
4419 list_format_more_osd_info_plain(dev))
4420 elif dev['ptype'] in (PTYPE['regular']['lockbox']['ready'],
4421 PTYPE['mpath']['lockbox']['ready']):
4422 desc = (['ceph lockbox', dev['state']] +
4423 list_format_lockbox_plain(dev))
4424 elif Ptype.is_dmcrypt(dev['ptype'], 'osd'):
4425 dmcrypt = dev['dmcrypt']
4426 if not dmcrypt['holders']:
4427 desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
4428 'not currently mapped']
4429 elif len(dmcrypt['holders']) == 1:
4430 holder = get_dev_path(dmcrypt['holders'][0])
4431 desc = ['ceph data (dmcrypt %s %s)' %
4432 (dmcrypt['type'], holder)]
4433 desc += list_format_more_osd_info_plain(dev)
4434 else:
4435 desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
4436 'holders: ' + ','.join(dmcrypt['holders'])]
4437 elif Ptype.is_regular_space(dev['ptype']):
4438 name = Ptype.space_ptype_to_name(dev['ptype'])
4439 desc.append('ceph ' + name)
4440 if dev.get(name + '_for'):
4441 desc.append('for %s' % dev[name + '_for'])
4442 elif Ptype.is_dmcrypt_space(dev['ptype']):
4443 name = Ptype.space_ptype_to_name(dev['ptype'])
4444 dmcrypt = dev['dmcrypt']
4445 if dmcrypt['holders'] and len(dmcrypt['holders']) == 1:
4446 holder = get_dev_path(dmcrypt['holders'][0])
4447 desc = ['ceph ' + name + ' (dmcrypt %s %s)' %
4448 (dmcrypt['type'], holder)]
4449 else:
4450 desc = ['ceph ' + name + ' (dmcrypt %s)' % dmcrypt['type']]
4451 if dev.get(name + '_for'):
4452 desc.append('for %s' % dev[name + '_for'])
4453 else:
4454 desc.append(dev['type'])
4455 if dev.get('fs_type'):
4456 desc.append(dev['fs_type'])
4457 elif dev.get('ptype'):
4458 desc.append(dev['ptype'])
4459 if dev.get('mount'):
4460 desc.append('mounted on %s' % dev['mount'])
4461 return '%s%s %s' % (prefix, dev['path'], ', '.join(desc))
4462
4463
4464 def list_format_plain(devices):
4465 lines = []
4466 for device in devices:
4467 if device.get('partitions'):
4468 lines.append('%s :' % device['path'])
4469 for p in sorted(device['partitions'], key=lambda x: x['path']):
4470 lines.append(list_format_dev_plain(dev=p,
4471 prefix=' '))
4472 else:
4473 lines.append(list_format_dev_plain(dev=device,
4474 prefix=''))
4475 return "\n".join(lines)
4476
4477
4478 def list_dev(dev, uuid_map, space_map):
4479 info = {
4480 'path': dev,
4481 'dmcrypt': {},
4482 }
4483
4484 info['is_partition'] = is_partition(dev)
4485 if info['is_partition']:
4486 ptype = get_partition_type(dev)
4487 info['uuid'] = get_partition_uuid(dev)
4488 else:
4489 ptype = 'unknown'
4490 info['ptype'] = ptype
4491 LOG.info("list_dev(dev = " + dev + ", ptype = " + str(ptype) + ")")
4492 if ptype in (PTYPE['regular']['osd']['ready'],
4493 PTYPE['mpath']['osd']['ready']):
4494 info['type'] = 'data'
4495 if ptype == PTYPE['mpath']['osd']['ready']:
4496 info['multipath'] = True
4497 list_dev_osd(dev, uuid_map, info)
4498 elif ptype in (PTYPE['regular']['lockbox']['ready'],
4499 PTYPE['mpath']['lockbox']['ready']):
4500 info['type'] = 'lockbox'
4501 if ptype == PTYPE['mpath']['osd']['ready']:
4502 info['multipath'] = True
4503 list_dev_lockbox(dev, uuid_map, info)
4504 elif ptype == PTYPE['plain']['osd']['ready']:
4505 holders = is_held(dev)
4506 info['type'] = 'data'
4507 info['dmcrypt']['holders'] = holders
4508 info['dmcrypt']['type'] = 'plain'
4509 if len(holders) == 1:
4510 list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
4511 elif ptype == PTYPE['luks']['osd']['ready']:
4512 holders = is_held(dev)
4513 info['type'] = 'data'
4514 info['dmcrypt']['holders'] = holders
4515 info['dmcrypt']['type'] = 'LUKS'
4516 if len(holders) == 1:
4517 list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
4518 elif Ptype.is_regular_space(ptype) or Ptype.is_mpath_space(ptype):
4519 name = Ptype.space_ptype_to_name(ptype)
4520 info['type'] = name
4521 if ptype == PTYPE['mpath'][name]['ready']:
4522 info['multipath'] = True
4523 if info.get('uuid') in space_map:
4524 info[name + '_for'] = space_map[info['uuid']]
4525 elif Ptype.is_plain_space(ptype):
4526 name = Ptype.space_ptype_to_name(ptype)
4527 holders = is_held(dev)
4528 info['type'] = name
4529 info['dmcrypt']['type'] = 'plain'
4530 info['dmcrypt']['holders'] = holders
4531 if info.get('uuid') in space_map:
4532 info[name + '_for'] = space_map[info['uuid']]
4533 elif Ptype.is_luks_space(ptype):
4534 name = Ptype.space_ptype_to_name(ptype)
4535 holders = is_held(dev)
4536 info['type'] = name
4537 info['dmcrypt']['type'] = 'LUKS'
4538 info['dmcrypt']['holders'] = holders
4539 if info.get('uuid') in space_map:
4540 info[name + '_for'] = space_map[info['uuid']]
4541 else:
4542 path = is_mounted(dev)
4543 fs_type = get_dev_fs(dev)
4544 if is_swap(dev):
4545 info['type'] = 'swap'
4546 else:
4547 info['type'] = 'other'
4548 if fs_type:
4549 info['fs_type'] = fs_type
4550 if path:
4551 info['mount'] = path
4552
4553 return info
4554
4555
4556 def list_devices():
4557 partmap = list_all_partitions()
4558
4559 uuid_map = {}
4560 space_map = {}
4561 for base, parts in sorted(partmap.items()):
4562 for p in parts:
4563 dev = get_dev_path(p)
4564 part_uuid = get_partition_uuid(dev)
4565 if part_uuid:
4566 uuid_map[part_uuid] = dev
4567 ptype = get_partition_type(dev)
4568 LOG.debug("main_list: " + dev +
4569 " ptype = " + str(ptype) +
4570 " uuid = " + str(part_uuid))
4571 if ptype in Ptype.get_ready_by_name('osd'):
4572 if Ptype.is_dmcrypt(ptype, 'osd'):
4573 holders = is_held(dev)
4574 if len(holders) != 1:
4575 continue
4576 dev_to_mount = get_dev_path(holders[0])
4577 else:
4578 dev_to_mount = dev
4579
4580 fs_type = get_dev_fs(dev_to_mount)
4581 if fs_type is not None:
4582 mount_options = get_mount_options(cluster='ceph',
4583 fs_type=fs_type)
4584 try:
4585 tpath = mount(dev=dev_to_mount,
4586 fstype=fs_type, options=mount_options)
4587 try:
4588 for name in Space.NAMES:
4589 space_uuid = get_oneliner(tpath,
4590 name + '_uuid')
4591 if space_uuid:
4592 space_map[space_uuid.lower()] = dev
4593 finally:
4594 unmount(tpath)
4595 except MountError:
4596 pass
4597
4598 LOG.debug("main_list: " + str(partmap) + ", uuid_map = " +
4599 str(uuid_map) + ", space_map = " + str(space_map))
4600
4601 devices = []
4602 for base, parts in sorted(partmap.items()):
4603 if parts:
4604 disk = {'path': get_dev_path(base)}
4605 partitions = []
4606 for p in sorted(parts):
4607 partitions.append(list_dev(get_dev_path(p),
4608 uuid_map,
4609 space_map))
4610 disk['partitions'] = partitions
4611 devices.append(disk)
4612 else:
4613 device = list_dev(get_dev_path(base), uuid_map, space_map)
4614 device['path'] = get_dev_path(base)
4615 devices.append(device)
4616 LOG.debug("list_devices: " + str(devices))
4617 return devices
4618
4619
4620 def list_zfs():
4621 try:
4622 out, err, ret = command(
4623 [
4624 'zfs',
4625 'list',
4626 '-o', 'name,mountpoint'
4627 ]
4628 )
4629 except subprocess.CalledProcessError as e:
4630 LOG.info('zfs list -o name,mountpoint '
4631 'fails.\n (Error: %s)' % e)
4632 raise
4633 lines = out.splitlines()
4634 for line in lines[1:]:
4635 vdevline = line.split()
4636 if os.path.exists(os.path.join(vdevline[1], 'active')):
4637 elems = os.path.split(vdevline[1])
4638 print(vdevline[0], "ceph data, active, cluster ceph,", elems[1],
4639 "mounted on:", vdevline[1])
4640 else:
4641 print(vdevline[0] + " other, zfs, mounted on: " + vdevline[1])
4642
4643
4644 def main_list(args):
4645 with activate_lock:
4646 if FREEBSD:
4647 main_list_freebsd(args)
4648 else:
4649 main_list_protected(args)
4650
4651
4652 def main_list_protected(args):
4653 devices = list_devices()
4654 if args.path:
4655 paths = []
4656 for path in args.path:
4657 if os.path.exists(path):
4658 paths.append(os.path.realpath(path))
4659 else:
4660 paths.append(path)
4661 selected_devices = []
4662 for device in devices:
4663 for path in paths:
4664 if re.search(path + '$', device['path']):
4665 selected_devices.append(device)
4666 else:
4667 selected_devices = devices
4668 if args.format == 'json':
4669 print(json.dumps(selected_devices))
4670 else:
4671 output = list_format_plain(selected_devices)
4672 if output:
4673 print(output)
4674
4675
4676 def main_list_freebsd(args):
4677 # Currently accomodate only ZFS Filestore partitions
4678 # return a list of VDEVs and mountpoints
4679 # > zfs list
4680 # NAME USED AVAIL REFER MOUNTPOINT
4681 # osd0 1.01G 1.32T 1.01G /var/lib/ceph/osd/osd.0
4682 # osd1 1.01G 1.32T 1.01G /var/lib/ceph/osd/osd.1
4683 list_zfs()
4684
4685
4686 ###########################
4687 #
4688 # Mark devices that we want to suppress activates on with a
4689 # file like
4690 #
4691 # /var/lib/ceph/tmp/suppress-activate.sdb
4692 #
4693 # where the last bit is the sanitized device name (/dev/X without the
4694 # /dev/ prefix) and the is_suppress() check matches a prefix. That
4695 # means suppressing sdb will stop activate on sdb1, sdb2, etc.
4696 #
4697
4698 def is_suppressed(path):
4699 disk = os.path.realpath(path)
4700 try:
4701 if (not disk.startswith('/dev/') or
4702 not ldev_is_diskdevice(disk)):
4703 return False
4704 base = get_dev_name(disk)
4705 while len(base):
4706 if os.path.exists(SUPPRESS_PREFIX + base): # noqa
4707 return True
4708 base = base[:-1]
4709 except:
4710 return False
4711
4712
4713 def set_suppress(path):
4714 disk = os.path.realpath(path)
4715 if not os.path.exists(disk):
4716 raise Error('does not exist', path)
4717 if not ldev_is_diskdevice(path):
4718 raise Error('not a block device', path)
4719 base = get_dev_name(disk)
4720
4721 with open(SUPPRESS_PREFIX + base, 'w') as f: # noqa
4722 pass
4723 LOG.info('set suppress flag on %s', base)
4724
4725
4726 def unset_suppress(path):
4727 disk = os.path.realpath(path)
4728 if not os.path.exists(disk):
4729 raise Error('does not exist', path)
4730 if not ldev_is_diskdevice(path):
4731 raise Error('not a block device', path)
4732 assert disk.startswith('/dev/')
4733 base = get_dev_name(disk)
4734
4735 fn = SUPPRESS_PREFIX + base # noqa
4736 if not os.path.exists(fn):
4737 raise Error('not marked as suppressed', path)
4738
4739 try:
4740 os.unlink(fn)
4741 LOG.info('unset suppress flag on %s', base)
4742 except OSError as e:
4743 raise Error('failed to unsuppress', e)
4744
4745
4746 def main_suppress(args):
4747 set_suppress(args.path)
4748
4749
4750 def main_unsuppress(args):
4751 unset_suppress(args.path)
4752
4753
4754 def main_zap(args):
4755 for dev in args.dev:
4756 zap(dev)
4757
4758
4759 def main_trigger(args):
4760 LOG.debug("main_trigger: " + str(args))
4761 if is_systemd() and not args.sync:
4762 # http://www.freedesktop.org/software/systemd/man/systemd-escape.html
4763 escaped_dev = args.dev[1:].replace('-', '\\x2d')
4764 service = 'ceph-disk@{dev}.service'.format(dev=escaped_dev)
4765 LOG.info('systemd detected, triggering %s' % service)
4766 command(
4767 [
4768 'systemctl',
4769 '--no-block',
4770 'restart',
4771 service,
4772 ]
4773 )
4774 return
4775 if is_upstart() and not args.sync:
4776 LOG.info('upstart detected, triggering ceph-disk task')
4777 command(
4778 [
4779 'initctl',
4780 'emit',
4781 'ceph-disk',
4782 'dev={dev}'.format(dev=args.dev),
4783 'pid={pid}'.format(pid=os.getpid()),
4784 ]
4785 )
4786 return
4787
4788 if get_ceph_user() == 'ceph':
4789 command_check_call(['chown', 'ceph:ceph', args.dev])
4790 parttype = get_partition_type(args.dev)
4791 partid = get_partition_uuid(args.dev)
4792
4793 LOG.info('trigger {dev} parttype {parttype} uuid {partid}'.format(
4794 dev=args.dev,
4795 parttype=parttype,
4796 partid=partid,
4797 ))
4798
4799 ceph_disk = ['ceph-disk']
4800 if args.verbose:
4801 ceph_disk.append('--verbose')
4802
4803 if parttype in (PTYPE['regular']['osd']['ready'],
4804 PTYPE['mpath']['osd']['ready']):
4805 out, err, ret = command(
4806 ceph_disk +
4807 [
4808 'activate',
4809 args.dev,
4810 ]
4811 )
4812
4813 elif parttype in (PTYPE['plain']['osd']['ready'],
4814 PTYPE['luks']['osd']['ready']):
4815 out, err, ret = command(
4816 ceph_disk +
4817 [
4818 'activate',
4819 '--dmcrypt',
4820 args.dev,
4821 ]
4822 )
4823
4824 elif parttype in (PTYPE['regular']['journal']['ready'],
4825 PTYPE['mpath']['journal']['ready']):
4826 out, err, ret = command(
4827 ceph_disk +
4828 [
4829 'activate-journal',
4830 args.dev,
4831 ]
4832 )
4833
4834 elif parttype in (PTYPE['plain']['journal']['ready'],
4835 PTYPE['luks']['journal']['ready']):
4836 out, err, ret = command(
4837 ceph_disk +
4838 [
4839 'activate-journal',
4840 '--dmcrypt',
4841 args.dev,
4842 ]
4843 )
4844
4845 elif parttype in (PTYPE['regular']['block']['ready'],
4846 PTYPE['regular']['block.db']['ready'],
4847 PTYPE['regular']['block.wal']['ready'],
4848 PTYPE['mpath']['block']['ready'],
4849 PTYPE['mpath']['block.db']['ready'],
4850 PTYPE['mpath']['block.wal']['ready']):
4851 out, err, ret = command(
4852 ceph_disk +
4853 [
4854 'activate-block',
4855 args.dev,
4856 ]
4857 )
4858
4859 elif parttype in (PTYPE['plain']['block']['ready'],
4860 PTYPE['plain']['block.db']['ready'],
4861 PTYPE['plain']['block.wal']['ready'],
4862 PTYPE['luks']['block']['ready'],
4863 PTYPE['luks']['block.db']['ready'],
4864 PTYPE['luks']['block.wal']['ready']):
4865 out, err, ret = command(
4866 ceph_disk +
4867 [
4868 'activate-block',
4869 '--dmcrypt',
4870 args.dev,
4871 ]
4872 )
4873
4874 elif parttype in (PTYPE['regular']['lockbox']['ready'],
4875 PTYPE['mpath']['lockbox']['ready']):
4876 out, err, ret = command(
4877 ceph_disk +
4878 [
4879 'activate-lockbox',
4880 args.dev,
4881 ]
4882 )
4883
4884 else:
4885 raise Error('unrecognized partition type %s' % parttype)
4886
4887 if ret != 0:
4888 LOG.info(out)
4889 LOG.error(err)
4890 raise Error('return code ' + str(ret))
4891 else:
4892 LOG.debug(out)
4893 LOG.debug(err)
4894
4895
4896 def main_fix(args):
4897 # A hash table containing 'path': ('uid', 'gid', blocking, recursive)
4898 fix_table = [
4899 ('/usr/bin/ceph-mon', 'root', ROOTGROUP, True, False),
4900 ('/usr/bin/ceph-mds', 'root', ROOTGROUP, True, False),
4901 ('/usr/bin/ceph-osd', 'root', ROOTGROUP, True, False),
4902 ('/usr/bin/radosgw', 'root', ROOTGROUP, True, False),
4903 ('/etc/ceph', 'root', ROOTGROUP, True, True),
4904 ('/var/run/ceph', 'ceph', 'ceph', True, True),
4905 ('/var/log/ceph', 'ceph', 'ceph', True, True),
4906 ('/var/log/radosgw', 'ceph', 'ceph', True, True),
4907 ('/var/lib/ceph', 'ceph', 'ceph', True, False),
4908 ]
4909
4910 # Relabel/chown all files under /var/lib/ceph/ recursively (except for osd)
4911 for directory in glob.glob('/var/lib/ceph/*'):
4912 if directory == '/var/lib/ceph/osd':
4913 fix_table.append((directory, 'ceph', 'ceph', True, False))
4914 else:
4915 fix_table.append((directory, 'ceph', 'ceph', True, True))
4916
4917 # Relabel/chown the osds recursively and in parallel
4918 for directory in glob.glob('/var/lib/ceph/osd/*'):
4919 fix_table.append((directory, 'ceph', 'ceph', False, True))
4920
4921 LOG.debug("fix_table: " + str(fix_table))
4922
4923 # The lists of background processes
4924 all_processes = []
4925 permissions_processes = []
4926 selinux_processes = []
4927
4928 # Preliminary checks
4929 if args.selinux or args.all:
4930 out, err, ret = command(['selinuxenabled'])
4931 if ret:
4932 LOG.error('SELinux is not enabled, please enable it, first.')
4933 raise Error('no SELinux')
4934
4935 for daemon in ['ceph-mon', 'ceph-osd', 'ceph-mds', 'radosgw', 'ceph-mgr']:
4936 out, err, ret = command(['pgrep', daemon])
4937 if ret == 0:
4938 LOG.error(daemon + ' is running, please stop it, first')
4939 raise Error(daemon + ' running')
4940
4941 # Relabel the basic system data without the ceph files
4942 if args.system or args.all:
4943 c = ['restorecon', '-R', '/']
4944 for directory, _, _, _, _ in fix_table:
4945 # Skip /var/lib/ceph subdirectories
4946 if directory.startswith('/var/lib/ceph/'):
4947 continue
4948 c.append('-e')
4949 c.append(directory)
4950
4951 out, err, ret = command(c)
4952
4953 if ret:
4954 LOG.error("Failed to restore labels of the underlying system")
4955 LOG.error(err)
4956 raise Error("basic restore failed")
4957
4958 # Use find to relabel + chown ~simultaenously
4959 if args.all:
4960 for directory, uid, gid, blocking, recursive in fix_table:
4961 # Skip directories/files that are not installed
4962 if not os.access(directory, os.F_OK):
4963 continue
4964
4965 c = [
4966 'find',
4967 directory,
4968 '-exec',
4969 'chown',
4970 ':'.join((uid, gid)),
4971 '{}',
4972 '+',
4973 '-exec',
4974 'restorecon',
4975 '{}',
4976 '+',
4977 ]
4978
4979 # Just pass -maxdepth 0 for non-recursive calls
4980 if not recursive:
4981 c += ['-maxdepth', '0']
4982
4983 if blocking:
4984 out, err, ret = command(c)
4985
4986 if ret:
4987 LOG.error("Failed to fix " + directory)
4988 LOG.error(err)
4989 raise Error(directory + " fix failed")
4990 else:
4991 all_processes.append(command_init(c))
4992
4993 LOG.debug("all_processes: " + str(all_processes))
4994 for process in all_processes:
4995 out, err, ret = command_wait(process)
4996 if ret:
4997 LOG.error("A background find process failed")
4998 LOG.error(err)
4999 raise Error("background failed")
5000
5001 # Fix permissions
5002 if args.permissions:
5003 for directory, uid, gid, blocking, recursive in fix_table:
5004 # Skip directories/files that are not installed
5005 if not os.access(directory, os.F_OK):
5006 continue
5007
5008 if recursive:
5009 c = [
5010 'chown',
5011 '-R',
5012 ':'.join((uid, gid)),
5013 directory
5014 ]
5015 else:
5016 c = [
5017 'chown',
5018 ':'.join((uid, gid)),
5019 directory
5020 ]
5021
5022 if blocking:
5023 out, err, ret = command(c)
5024
5025 if ret:
5026 LOG.error("Failed to chown " + directory)
5027 LOG.error(err)
5028 raise Error(directory + " chown failed")
5029 else:
5030 permissions_processes.append(command_init(c))
5031
5032 LOG.debug("permissions_processes: " + str(permissions_processes))
5033 for process in permissions_processes:
5034 out, err, ret = command_wait(process)
5035 if ret:
5036 LOG.error("A background permissions process failed")
5037 LOG.error(err)
5038 raise Error("background failed")
5039
5040 # Fix SELinux labels
5041 if args.selinux:
5042 for directory, uid, gid, blocking, recursive in fix_table:
5043 # Skip directories/files that are not installed
5044 if not os.access(directory, os.F_OK):
5045 continue
5046
5047 if recursive:
5048 c = [
5049 'restorecon',
5050 '-R',
5051 directory
5052 ]
5053 else:
5054 c = [
5055 'restorecon',
5056 directory
5057 ]
5058
5059 if blocking:
5060 out, err, ret = command(c)
5061
5062 if ret:
5063 LOG.error("Failed to restore labels for " + directory)
5064 LOG.error(err)
5065 raise Error(directory + " relabel failed")
5066 else:
5067 selinux_processes.append(command_init(c))
5068
5069 LOG.debug("selinux_processes: " + str(selinux_processes))
5070 for process in selinux_processes:
5071 out, err, ret = command_wait(process)
5072 if ret:
5073 LOG.error("A background selinux process failed")
5074 LOG.error(err)
5075 raise Error("background failed")
5076
5077 LOG.info(
5078 "The ceph files has been fixed, please reboot "
5079 "the system for the changes to take effect."
5080 )
5081
5082
5083 def setup_statedir(dir):
5084 # XXX The following use of globals makes linting
5085 # really hard. Global state in Python is iffy and
5086 # should be avoided.
5087 global STATEDIR
5088 STATEDIR = dir
5089
5090 if not os.path.exists(STATEDIR):
5091 os.mkdir(STATEDIR)
5092 if not os.path.exists(STATEDIR + "/tmp"):
5093 os.mkdir(STATEDIR + "/tmp")
5094
5095 global prepare_lock
5096 prepare_lock = FileLock(STATEDIR + '/tmp/ceph-disk.prepare.lock')
5097
5098 global activate_lock
5099 activate_lock = FileLock(STATEDIR + '/tmp/ceph-disk.activate.lock')
5100
5101 global SUPPRESS_PREFIX
5102 SUPPRESS_PREFIX = STATEDIR + '/tmp/suppress-activate.'
5103
5104
5105 def setup_sysconfdir(dir):
5106 global SYSCONFDIR
5107 SYSCONFDIR = dir
5108
5109
5110 def parse_args(argv):
5111 parser = argparse.ArgumentParser(
5112 'ceph-disk',
5113 )
5114 parser.add_argument(
5115 '-v', '--verbose',
5116 action='store_true', default=None,
5117 help='be more verbose',
5118 )
5119 parser.add_argument(
5120 '--log-stdout',
5121 action='store_true', default=None,
5122 help='log to stdout',
5123 )
5124 parser.add_argument(
5125 '--prepend-to-path',
5126 metavar='PATH',
5127 default='/usr/bin',
5128 help=('prepend PATH to $PATH for backward compatibility '
5129 '(default /usr/bin)'),
5130 )
5131 parser.add_argument(
5132 '--statedir',
5133 metavar='PATH',
5134 default='/var/lib/ceph',
5135 help=('directory in which ceph state is preserved '
5136 '(default /var/lib/ceph)'),
5137 )
5138 parser.add_argument(
5139 '--sysconfdir',
5140 metavar='PATH',
5141 default='/etc/ceph',
5142 help=('directory in which ceph configuration files are found '
5143 '(default /etc/ceph)'),
5144 )
5145 parser.add_argument(
5146 '--setuser',
5147 metavar='USER',
5148 default=None,
5149 help='use the given user for subprocesses, rather than ceph or root'
5150 )
5151 parser.add_argument(
5152 '--setgroup',
5153 metavar='GROUP',
5154 default=None,
5155 help='use the given group for subprocesses, rather than ceph or root'
5156 )
5157 parser.set_defaults(
5158 # we want to hold on to this, for later
5159 prog=parser.prog,
5160 )
5161
5162 subparsers = parser.add_subparsers(
5163 title='subcommands',
5164 description='valid subcommands',
5165 help='sub-command help',
5166 )
5167
5168 Prepare.set_subparser(subparsers)
5169 make_activate_parser(subparsers)
5170 make_activate_lockbox_parser(subparsers)
5171 make_activate_block_parser(subparsers)
5172 make_activate_journal_parser(subparsers)
5173 make_activate_all_parser(subparsers)
5174 make_list_parser(subparsers)
5175 make_suppress_parser(subparsers)
5176 make_deactivate_parser(subparsers)
5177 make_destroy_parser(subparsers)
5178 make_zap_parser(subparsers)
5179 make_trigger_parser(subparsers)
5180 make_fix_parser(subparsers)
5181
5182 args = parser.parse_args(argv)
5183 return args
5184
5185
5186 def make_fix_parser(subparsers):
5187 fix_parser = subparsers.add_parser(
5188 'fix',
5189 formatter_class=argparse.RawDescriptionHelpFormatter,
5190 description=textwrap.fill(textwrap.dedent("""\
5191 """)),
5192 help='fix SELinux labels and/or file permissions')
5193
5194 fix_parser.add_argument(
5195 '--system',
5196 action='store_true',
5197 default=False,
5198 help='fix SELinux labels for the non-ceph system data'
5199 )
5200 fix_parser.add_argument(
5201 '--selinux',
5202 action='store_true',
5203 default=False,
5204 help='fix SELinux labels for ceph data'
5205 )
5206 fix_parser.add_argument(
5207 '--permissions',
5208 action='store_true',
5209 default=False,
5210 help='fix file permissions for ceph data'
5211 )
5212 fix_parser.add_argument(
5213 '--all',
5214 action='store_true',
5215 default=False,
5216 help='perform all the fix-related operations'
5217 )
5218 fix_parser.set_defaults(
5219 func=main_fix,
5220 )
5221 return fix_parser
5222
5223
5224 def make_trigger_parser(subparsers):
5225 trigger_parser = subparsers.add_parser(
5226 'trigger',
5227 formatter_class=argparse.RawDescriptionHelpFormatter,
5228 description=textwrap.fill(textwrap.dedent("""\
5229 The partition given in argument is activated. The type of the
5230 partition (data, lockbox, journal etc.) is detected by its
5231 type. If the init system is upstart or systemd, the activation is
5232 delegated to it and runs asynchronously, which
5233 helps reduce the execution time of udev actions.
5234 """)),
5235 help='activate any device (called by udev)')
5236 trigger_parser.add_argument(
5237 'dev',
5238 help=('device'),
5239 )
5240 trigger_parser.add_argument(
5241 '--cluster',
5242 metavar='NAME',
5243 default='ceph',
5244 help='cluster name to assign this disk to',
5245 )
5246 trigger_parser.add_argument(
5247 '--dmcrypt',
5248 action='store_true', default=None,
5249 help='map devices with dm-crypt',
5250 )
5251 trigger_parser.add_argument(
5252 '--dmcrypt-key-dir',
5253 metavar='KEYDIR',
5254 default='/etc/ceph/dmcrypt-keys',
5255 help='directory where dm-crypt keys are stored',
5256 )
5257 trigger_parser.add_argument(
5258 '--sync',
5259 action='store_true', default=None,
5260 help='do operation synchronously; do not trigger systemd',
5261 )
5262 trigger_parser.set_defaults(
5263 func=main_trigger,
5264 )
5265 return trigger_parser
5266
5267
5268 def make_activate_parser(subparsers):
5269 activate_parser = subparsers.add_parser(
5270 'activate',
5271 formatter_class=argparse.RawDescriptionHelpFormatter,
5272 description=textwrap.fill(textwrap.dedent("""\
5273 Activate the OSD found at PATH (can be a directory
5274 or a device partition, possibly encrypted). When
5275 activated for the first time, a unique OSD id is obtained
5276 from the cluster. If PATH is a directory, a symbolic
5277 link is added in {statedir}/osd/ceph-$id. If PATH is
5278 a partition, it is mounted on {statedir}/osd/ceph-$id.
5279 Finally, the OSD daemon is run.
5280
5281 If the OSD depends on auxiliary partitions (journal, block, ...)
5282 they need to be available otherwise activation will fail. It
5283 may happen if a journal is encrypted and cryptsetup was not
5284 run yet.
5285 """.format(statedir=STATEDIR))),
5286 help='Activate a Ceph OSD')
5287 activate_parser.add_argument(
5288 '--mount',
5289 action='store_true', default=None,
5290 help='mount a block device [deprecated, ignored]',
5291 )
5292 activate_parser.add_argument(
5293 '--activate-key',
5294 metavar='PATH',
5295 help='bootstrap-osd keyring path template (%(default)s)',
5296 dest='activate_key_template',
5297 )
5298 activate_parser.add_argument(
5299 '--mark-init',
5300 metavar='INITSYSTEM',
5301 help='init system to manage this dir',
5302 default='auto',
5303 choices=INIT_SYSTEMS,
5304 )
5305 activate_parser.add_argument(
5306 '--no-start-daemon',
5307 action='store_true', default=None,
5308 help='do not start the daemon',
5309 )
5310 activate_parser.add_argument(
5311 'path',
5312 metavar='PATH',
5313 help='path to block device or directory',
5314 )
5315 activate_parser.add_argument(
5316 '--dmcrypt',
5317 action='store_true', default=None,
5318 help='map DATA and/or JOURNAL devices with dm-crypt',
5319 )
5320 activate_parser.add_argument(
5321 '--dmcrypt-key-dir',
5322 metavar='KEYDIR',
5323 default='/etc/ceph/dmcrypt-keys',
5324 help='directory where dm-crypt keys are stored',
5325 )
5326 activate_parser.add_argument(
5327 '--reactivate',
5328 action='store_true', default=False,
5329 help='activate the deactived OSD',
5330 )
5331 activate_parser.set_defaults(
5332 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5333 func=main_activate,
5334 )
5335 return activate_parser
5336
5337
5338 def make_activate_lockbox_parser(subparsers):
5339 parser = subparsers.add_parser(
5340 'activate-lockbox',
5341 formatter_class=argparse.RawDescriptionHelpFormatter,
5342 description=textwrap.fill(textwrap.dedent("""\
5343 Mount the partition found at PATH on {statedir}/osd-lockbox/$uuid
5344 where $uuid uniquely identifies the OSD that needs this lockbox
5345 to retrieve keys from the monitor and unlock its partitions.
5346
5347 If the OSD has one or more auxiliary devices (journal, block, ...)
5348 symbolic links are created at {statedir}/osd-lockbox/$other_uuid
5349 and point to {statedir}/osd-lockbox/$uuid. This will, for instance,
5350 allow a journal encrypted in a partition identified by $other_uuid to
5351 fetch the keys it needs from the monitor.
5352
5353 Finally the OSD is activated, as it would be with ceph-disk activate.
5354 """.format(statedir=STATEDIR))),
5355 help='Activate a Ceph lockbox')
5356 parser.add_argument(
5357 '--activate-key',
5358 help='bootstrap-osd keyring path template (%(default)s)',
5359 dest='activate_key_template',
5360 )
5361 parser.add_argument(
5362 '--dmcrypt-key-dir',
5363 metavar='KEYDIR',
5364 default='/etc/ceph/dmcrypt-keys',
5365 help='directory where dm-crypt keys are stored',
5366 )
5367 parser.add_argument(
5368 'path',
5369 metavar='PATH',
5370 help='path to block device',
5371 )
5372 parser.set_defaults(
5373 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5374 func=main_activate_lockbox,
5375 )
5376 return parser
5377
5378
5379 def make_activate_block_parser(subparsers):
5380 return make_activate_space_parser('block', subparsers)
5381
5382
5383 def make_activate_journal_parser(subparsers):
5384 return make_activate_space_parser('journal', subparsers)
5385
5386
5387 def make_activate_space_parser(name, subparsers):
5388 activate_space_parser = subparsers.add_parser(
5389 'activate-%s' % name,
5390 formatter_class=argparse.RawDescriptionHelpFormatter,
5391 description=textwrap.fill(textwrap.dedent("""\
5392 Activating a {name} partition is only meaningfull
5393 if it is encrypted and it will map it using
5394 cryptsetup.
5395
5396 Finally the corresponding OSD is activated,
5397 as it would be with ceph-disk activate.
5398 """.format(name=name))),
5399 help='Activate an OSD via its %s device' % name)
5400 activate_space_parser.add_argument(
5401 'dev',
5402 metavar='DEV',
5403 help='path to %s block device' % name,
5404 )
5405 activate_space_parser.add_argument(
5406 '--activate-key',
5407 metavar='PATH',
5408 help='bootstrap-osd keyring path template (%(default)s)',
5409 dest='activate_key_template',
5410 )
5411 activate_space_parser.add_argument(
5412 '--mark-init',
5413 metavar='INITSYSTEM',
5414 help='init system to manage this dir',
5415 default='auto',
5416 choices=INIT_SYSTEMS,
5417 )
5418 activate_space_parser.add_argument(
5419 '--dmcrypt',
5420 action='store_true', default=None,
5421 help=('map data and/or auxiliariy (journal, etc.) '
5422 'devices with dm-crypt'),
5423 )
5424 activate_space_parser.add_argument(
5425 '--dmcrypt-key-dir',
5426 metavar='KEYDIR',
5427 default='/etc/ceph/dmcrypt-keys',
5428 help='directory where dm-crypt keys are stored',
5429 )
5430 activate_space_parser.add_argument(
5431 '--reactivate',
5432 action='store_true', default=False,
5433 help='activate the deactived OSD',
5434 )
5435 activate_space_parser.set_defaults(
5436 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5437 func=lambda args: main_activate_space(name, args),
5438 )
5439 return activate_space_parser
5440
5441
5442 def make_activate_all_parser(subparsers):
5443 activate_all_parser = subparsers.add_parser(
5444 'activate-all',
5445 formatter_class=argparse.RawDescriptionHelpFormatter,
5446 description=textwrap.fill(textwrap.dedent("""\
5447 Activate all OSD partitions found in /dev/disk/by-parttypeuuid.
5448 The partitions containing auxiliary devices (journal, block, ...)
5449 are not activated.
5450 """)),
5451 help='Activate all tagged OSD partitions')
5452 activate_all_parser.add_argument(
5453 '--activate-key',
5454 metavar='PATH',
5455 help='bootstrap-osd keyring path template (%(default)s)',
5456 dest='activate_key_template',
5457 )
5458 activate_all_parser.add_argument(
5459 '--mark-init',
5460 metavar='INITSYSTEM',
5461 help='init system to manage this dir',
5462 default='auto',
5463 choices=INIT_SYSTEMS,
5464 )
5465 activate_all_parser.set_defaults(
5466 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5467 func=main_activate_all,
5468 )
5469 return activate_all_parser
5470
5471
5472 def make_list_parser(subparsers):
5473 list_parser = subparsers.add_parser(
5474 'list',
5475 formatter_class=argparse.RawDescriptionHelpFormatter,
5476 description=textwrap.fill(textwrap.dedent("""\
5477 Display all partitions on the system and their
5478 associated Ceph information, if any.
5479 """)),
5480 help='List disks, partitions, and Ceph OSDs')
5481 list_parser.add_argument(
5482 '--format',
5483 help='output format',
5484 default='plain',
5485 choices=['json', 'plain'],
5486 )
5487 list_parser.add_argument(
5488 'path',
5489 metavar='PATH',
5490 nargs='*',
5491 help='path to block devices, relative to /sys/block',
5492 )
5493 list_parser.set_defaults(
5494 func=main_list,
5495 )
5496 return list_parser
5497
5498
5499 def make_suppress_parser(subparsers):
5500 suppress_parser = subparsers.add_parser(
5501 'suppress-activate',
5502 formatter_class=argparse.RawDescriptionHelpFormatter,
5503 description=textwrap.fill(textwrap.dedent("""\
5504 Add a prefix to the list of suppressed device names
5505 so that they are ignored by all activate* subcommands.
5506 """)),
5507 help='Suppress activate on a device (prefix)')
5508 suppress_parser.add_argument(
5509 'path',
5510 metavar='PATH',
5511 help='path to block device or directory',
5512 )
5513 suppress_parser.set_defaults(
5514 func=main_suppress,
5515 )
5516
5517 unsuppress_parser = subparsers.add_parser(
5518 'unsuppress-activate',
5519 formatter_class=argparse.RawDescriptionHelpFormatter,
5520 description=textwrap.fill(textwrap.dedent("""\
5521 Remove a prefix from the list of suppressed device names
5522 so that they are no longer ignored by all
5523 activate* subcommands.
5524 """)),
5525 help='Stop suppressing activate on a device (prefix)')
5526 unsuppress_parser.add_argument(
5527 'path',
5528 metavar='PATH',
5529 help='path to block device or directory',
5530 )
5531 unsuppress_parser.set_defaults(
5532 func=main_unsuppress,
5533 )
5534 return suppress_parser
5535
5536
5537 def make_deactivate_parser(subparsers):
5538 deactivate_parser = subparsers.add_parser(
5539 'deactivate',
5540 formatter_class=argparse.RawDescriptionHelpFormatter,
5541 description=textwrap.fill(textwrap.dedent("""\
5542 Deactivate the OSD located at PATH. It stops the OSD daemon
5543 and optionally marks it out (with --mark-out). The content of
5544 the OSD is left untouched.
5545
5546 By default, the, ready, active, INIT-specific files are
5547 removed (so that it is not automatically re-activated by the
5548 udev rules or ceph-disk trigger) and the file deactive is
5549 created to remember the OSD is deactivated.
5550
5551 If the --once option is given, the ready, active, INIT-specific
5552 files are not removed and the OSD will reactivate whenever
5553 ceph-disk trigger is run on one of the devices (journal, data,
5554 block, lockbox, ...).
5555
5556 If the OSD is dmcrypt, remove the data dmcrypt map. When
5557 deactivate finishes, the OSD is down.
5558 """)),
5559 help='Deactivate a Ceph OSD')
5560 deactivate_parser.add_argument(
5561 '--cluster',
5562 metavar='NAME',
5563 default='ceph',
5564 help='cluster name to assign this disk to',
5565 )
5566 deactivate_parser.add_argument(
5567 'path',
5568 metavar='PATH',
5569 nargs='?',
5570 help='path to block device or directory',
5571 )
5572 deactivate_parser.add_argument(
5573 '--deactivate-by-id',
5574 metavar='<id>',
5575 help='ID of OSD to deactive'
5576 )
5577 deactivate_parser.add_argument(
5578 '--mark-out',
5579 action='store_true', default=False,
5580 help='option to mark the osd out',
5581 )
5582 deactivate_parser.add_argument(
5583 '--once',
5584 action='store_true', default=False,
5585 help='does not need --reactivate to activate again',
5586 )
5587 deactivate_parser.set_defaults(
5588 func=main_deactivate,
5589 )
5590
5591
5592 def make_destroy_parser(subparsers):
5593 destroy_parser = subparsers.add_parser(
5594 'destroy',
5595 formatter_class=argparse.RawDescriptionHelpFormatter,
5596 description=textwrap.fill(textwrap.dedent("""\ Destroy the OSD located at PATH. It removes the OSD from the
5597 cluster and marks it destroyed. An OSD must be down before it
5598 can be destroyed. Once it is destroyed, a new OSD can be created
5599 in its place, reusing the same OSD id and position (e.g. after
5600 a failed HDD or SSD is replaced). Alternatively, if the
5601 --purge option is also specified, the OSD is removed from the
5602 CRUSH map and the OSD id is deallocated.""")),
5603 help='Destroy a Ceph OSD')
5604 destroy_parser.add_argument(
5605 '--cluster',
5606 metavar='NAME',
5607 default='ceph',
5608 help='cluster name to assign this disk to',
5609 )
5610 destroy_parser.add_argument(
5611 'path',
5612 metavar='PATH',
5613 nargs='?',
5614 help='path to block device or directory',
5615 )
5616 destroy_parser.add_argument(
5617 '--destroy-by-id',
5618 metavar='<id>',
5619 help='ID of OSD to destroy'
5620 )
5621 destroy_parser.add_argument(
5622 '--dmcrypt-key-dir',
5623 metavar='KEYDIR',
5624 default='/etc/ceph/dmcrypt-keys',
5625 help=('directory where dm-crypt keys are stored '
5626 '(If you don\'t know how it work, '
5627 'dont use it. we have default value)'),
5628 )
5629 destroy_parser.add_argument(
5630 '--zap',
5631 action='store_true', default=False,
5632 help='option to erase data and partition',
5633 )
5634 destroy_parser.add_argument(
5635 '--purge',
5636 action='store_true', default=False,
5637 help='option to remove OSD from CRUSH map and deallocate the id',
5638 )
5639 destroy_parser.set_defaults(
5640 func=main_destroy,
5641 )
5642
5643
5644 def make_zap_parser(subparsers):
5645 zap_parser = subparsers.add_parser(
5646 'zap',
5647 formatter_class=argparse.RawDescriptionHelpFormatter,
5648 description=textwrap.fill(textwrap.dedent("""\
5649 Zap/erase/destroy a device's partition table and contents. It
5650 actually uses sgdisk and it's option --zap-all to
5651 destroy both GPT and MBR data structures so that the disk
5652 becomes suitable for repartitioning.
5653 """)),
5654 help='Zap/erase/destroy a device\'s partition table (and contents)')
5655 zap_parser.add_argument(
5656 'dev',
5657 metavar='DEV',
5658 nargs='+',
5659 help='path to block device',
5660 )
5661 zap_parser.set_defaults(
5662 func=main_zap,
5663 )
5664 return zap_parser
5665
5666
5667 def main(argv):
5668 args = parse_args(argv)
5669
5670 setup_logging(args.verbose, args.log_stdout)
5671
5672 if args.prepend_to_path != '':
5673 path = os.environ.get('PATH', os.defpath)
5674 os.environ['PATH'] = args.prepend_to_path + ":" + path
5675
5676 if args.func.__name__ != 'main_trigger':
5677 # trigger may run when statedir is unavailable and does not use it
5678 setup_statedir(args.statedir)
5679 setup_sysconfdir(args.sysconfdir)
5680
5681 global CEPH_PREF_USER
5682 CEPH_PREF_USER = args.setuser
5683 global CEPH_PREF_GROUP
5684 CEPH_PREF_GROUP = args.setgroup
5685
5686 if args.verbose:
5687 args.func(args)
5688 else:
5689 main_catch(args.func, args)
5690
5691
5692 def setup_logging(verbose, log_stdout):
5693 loglevel = logging.WARNING
5694 if verbose:
5695 loglevel = logging.DEBUG
5696
5697 if log_stdout:
5698 ch = logging.StreamHandler(stream=sys.stdout)
5699 ch.setLevel(loglevel)
5700 formatter = logging.Formatter('%(funcName)s: %(message)s')
5701 ch.setFormatter(formatter)
5702 LOG.addHandler(ch)
5703 LOG.setLevel(loglevel)
5704 else:
5705 logging.basicConfig(
5706 level=loglevel,
5707 format='%(funcName)s: %(message)s',
5708 )
5709
5710
5711 def main_catch(func, args):
5712
5713 try:
5714 func(args)
5715
5716 except Error as e:
5717 raise SystemExit(
5718 '{prog}: {msg}'.format(
5719 prog=args.prog,
5720 msg=e,
5721 )
5722 )
5723
5724 except CephDiskException as error:
5725 exc_name = error.__class__.__name__
5726 raise SystemExit(
5727 '{prog} {exc_name}: {msg}'.format(
5728 prog=args.prog,
5729 exc_name=exc_name,
5730 msg=error,
5731 )
5732 )
5733
5734
5735 def run():
5736 main(sys.argv[1:])
5737
5738
5739 if __name__ == '__main__':
5740 main(sys.argv[1:])
5741 warned_about = {}