]> git.proxmox.com Git - ceph.git/blob - ceph/src/ceph-disk/ceph_disk/main.py
update sources to v12.1.1
[ceph.git] / ceph / src / ceph-disk / ceph_disk / main.py
1 #!/usr/bin/env python
2 #
3 # Copyright (C) 2015, 2016 Red Hat <contact@redhat.com>
4 # Copyright (C) 2014 Inktank <info@inktank.com>
5 # Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
6 # Copyright (C) 2014 Catalyst.net Ltd
7 #
8 # Author: Loic Dachary <loic@dachary.org>
9 #
10 # This program is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU Library Public License as published by
12 # the Free Software Foundation; either version 2, or (at your option)
13 # any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU Library Public License for more details.
19 #
20
21 from __future__ import print_function
22
23 import argparse
24 import base64
25 import errno
26 import fcntl
27 import json
28 import logging
29 import os
30 import platform
31 import re
32 import subprocess
33 import stat
34 import sys
35 import tempfile
36 import uuid
37 import time
38 import shlex
39 import pwd
40 import grp
41 import textwrap
42 import glob
43
44 CEPH_OSD_ONDISK_MAGIC = 'ceph osd volume v026'
45 CEPH_LOCKBOX_ONDISK_MAGIC = 'ceph lockbox volume v001'
46
47 KEY_MANAGEMENT_MODE_V1 = 'ceph-mon v1'
48
49 PTYPE = {
50 'regular': {
51 'journal': {
52 # identical because creating a journal is atomic
53 'ready': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
54 'tobe': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
55 },
56 'block': {
57 # identical because creating a block is atomic
58 'ready': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
59 'tobe': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
60 },
61 'block.db': {
62 # identical because creating a block is atomic
63 'ready': '30cd0809-c2b2-499c-8879-2d6b78529876',
64 'tobe': '30cd0809-c2b2-499c-8879-2d6b785292be',
65 },
66 'block.wal': {
67 # identical because creating a block is atomic
68 'ready': '5ce17fce-4087-4169-b7ff-056cc58473f9',
69 'tobe': '5ce17fce-4087-4169-b7ff-056cc58472be',
70 },
71 'osd': {
72 'ready': '4fbd7e29-9d25-41b8-afd0-062c0ceff05d',
73 'tobe': '89c57f98-2fe5-4dc0-89c1-f3ad0ceff2be',
74 },
75 'lockbox': {
76 'ready': 'fb3aabf9-d25f-47cc-bf5e-721d1816496b',
77 'tobe': 'fb3aabf9-d25f-47cc-bf5e-721d181642be',
78 },
79 },
80 'luks': {
81 'journal': {
82 'ready': '45b0969e-9b03-4f30-b4c6-35865ceff106',
83 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
84 },
85 'block': {
86 'ready': 'cafecafe-9b03-4f30-b4c6-35865ceff106',
87 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
88 },
89 'block.db': {
90 'ready': '166418da-c469-4022-adf4-b30afd37f176',
91 'tobe': '7521c784-4626-4260-bc8d-ba77a0f5f2be',
92 },
93 'block.wal': {
94 'ready': '86a32090-3647-40b9-bbbd-38d8c573aa86',
95 'tobe': '92dad30f-175b-4d40-a5b0-5c0a258b42be',
96 },
97 'osd': {
98 'ready': '4fbd7e29-9d25-41b8-afd0-35865ceff05d',
99 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
100 },
101 },
102 'plain': {
103 'journal': {
104 'ready': '45b0969e-9b03-4f30-b4c6-5ec00ceff106',
105 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
106 },
107 'block': {
108 'ready': 'cafecafe-9b03-4f30-b4c6-5ec00ceff106',
109 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
110 },
111 'block.db': {
112 'ready': '93b0052d-02d9-4d8a-a43b-33a3ee4dfbc3',
113 'tobe': '69d17c68-3e58-4399-aff0-b68265f2e2be',
114 },
115 'block.wal': {
116 'ready': '306e8683-4fe2-4330-b7c0-00a917c16966',
117 'tobe': 'f2d89683-a621-4063-964a-eb1f7863a2be',
118 },
119 'osd': {
120 'ready': '4fbd7e29-9d25-41b8-afd0-5ec00ceff05d',
121 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
122 },
123 },
124 'mpath': {
125 'journal': {
126 'ready': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
127 'tobe': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
128 },
129 'block': {
130 'ready': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
131 'tobe': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
132 },
133 'block.db': {
134 'ready': 'ec6d6385-e346-45dc-be91-da2a7c8b3261',
135 'tobe': 'ec6d6385-e346-45dc-be91-da2a7c8b32be',
136 },
137 'block.wal': {
138 'ready': '01b41e1b-002a-453c-9f17-88793989ff8f',
139 'tobe': '01b41e1b-002a-453c-9f17-88793989f2be',
140 },
141 'osd': {
142 'ready': '4fbd7e29-8ae0-4982-bf9d-5a8d867af560',
143 'tobe': '89c57f98-8ae0-4982-bf9d-5a8d867af560',
144 },
145 'lockbox': {
146 'ready': '7f4a666a-16f3-47a2-8445-152ef4d03f6c',
147 'tobe': '7f4a666a-16f3-47a2-8445-152ef4d032be',
148 },
149 },
150 }
151
152
153 class Ptype(object):
154
155 @staticmethod
156 def get_ready_by_type(what):
157 return [x['ready'] for x in PTYPE[what].values()]
158
159 @staticmethod
160 def get_ready_by_name(name):
161 return [x[name]['ready'] for x in PTYPE.values() if name in x]
162
163 @staticmethod
164 def is_regular_space(ptype):
165 return Ptype.is_what_space('regular', ptype)
166
167 @staticmethod
168 def is_mpath_space(ptype):
169 return Ptype.is_what_space('mpath', ptype)
170
171 @staticmethod
172 def is_plain_space(ptype):
173 return Ptype.is_what_space('plain', ptype)
174
175 @staticmethod
176 def is_luks_space(ptype):
177 return Ptype.is_what_space('luks', ptype)
178
179 @staticmethod
180 def is_what_space(what, ptype):
181 for name in Space.NAMES:
182 if ptype == PTYPE[what][name]['ready']:
183 return True
184 return False
185
186 @staticmethod
187 def space_ptype_to_name(ptype):
188 for what in PTYPE.values():
189 for name in Space.NAMES:
190 if ptype == what[name]['ready']:
191 return name
192 raise ValueError('ptype ' + ptype + ' not found')
193
194 @staticmethod
195 def is_dmcrypt_space(ptype):
196 for name in Space.NAMES:
197 if Ptype.is_dmcrypt(ptype, name):
198 return True
199 return False
200
201 @staticmethod
202 def is_dmcrypt(ptype, name):
203 for what in ('plain', 'luks'):
204 if ptype == PTYPE[what][name]['ready']:
205 return True
206 return False
207
208
209 SYSFS = '/sys'
210
211 if platform.system() == 'FreeBSD':
212 FREEBSD = True
213 DEFAULT_FS_TYPE = 'zfs'
214 PROCDIR = '/compat/linux/proc'
215 # FreeBSD does not have blockdevices any more
216 BLOCKDIR = '/dev'
217 else:
218 FREEBSD = False
219 DEFAULT_FS_TYPE = 'xfs'
220 PROCDIR = '/proc'
221 BLOCKDIR = '/sys/block'
222
223 """
224 OSD STATUS Definition
225 """
226 OSD_STATUS_OUT_DOWN = 0
227 OSD_STATUS_OUT_UP = 1
228 OSD_STATUS_IN_DOWN = 2
229 OSD_STATUS_IN_UP = 3
230
231 MOUNT_OPTIONS = dict(
232 btrfs='noatime,user_subvol_rm_allowed',
233 # user_xattr is default ever since linux 2.6.39 / 3.0, but we'll
234 # delay a moment before removing it fully because we did have some
235 # issues with ext4 before the xatts-in-leveldb work, and it seemed
236 # that user_xattr helped
237 ext4='noatime,user_xattr',
238 xfs='noatime,inode64',
239 )
240
241 MKFS_ARGS = dict(
242 btrfs=[
243 # btrfs requires -f, for the same reason as xfs (see comment below)
244 '-f',
245 '-m', 'single',
246 '-l', '32768',
247 '-n', '32768',
248 ],
249 xfs=[
250 # xfs insists on not overwriting previous fs; even if we wipe
251 # partition table, we often recreate it exactly the same way,
252 # so we'll see ghosts of filesystems past
253 '-f',
254 '-i', 'size=2048',
255 ],
256 zfs=[
257 '-o', 'atime=off'
258 ],
259 )
260
261 INIT_SYSTEMS = [
262 'upstart',
263 'sysvinit',
264 'systemd',
265 'openrc',
266 'bsdrc',
267 'auto',
268 'none',
269 ]
270
271 STATEDIR = '/var/lib/ceph'
272
273 SYSCONFDIR = '/etc/ceph'
274
275 prepare_lock = None
276 activate_lock = None
277 SUPPRESS_PREFIX = None
278
279 # only warn once about some things
280 warned_about = {}
281
282 # Nuke the TERM variable to avoid confusing any subprocesses we call.
283 # For example, libreadline will print weird control sequences for some
284 # TERM values.
285 if 'TERM' in os.environ:
286 del os.environ['TERM']
287
288 LOG_NAME = __name__
289 if LOG_NAME == '__main__':
290 LOG_NAME = os.path.basename(sys.argv[0])
291 LOG = logging.getLogger(LOG_NAME)
292
293 # Allow user-preferred values for subprocess user and group
294 CEPH_PREF_USER = None
295 CEPH_PREF_GROUP = None
296
297
298 class FileLock(object):
299 def __init__(self, fn):
300 self.fn = fn
301 self.fd = None
302
303 def __enter__(self):
304 assert not self.fd
305 self.fd = os.open(self.fn, os.O_WRONLY | os.O_CREAT)
306 fcntl.lockf(self.fd, fcntl.LOCK_EX)
307
308 def __exit__(self, exc_type, exc_val, exc_tb):
309 assert self.fd
310 fcntl.lockf(self.fd, fcntl.LOCK_UN)
311 os.close(self.fd)
312 self.fd = None
313
314
315 class Error(Exception):
316 """
317 Error
318 """
319
320 def __str__(self):
321 doc = _bytes2str(self.__doc__.strip())
322 try:
323 str_type = basestring
324 except NameError:
325 str_type = str
326 args = [a if isinstance(a, str_type) else str(a) for a in self.args]
327 return ': '.join([doc] + [_bytes2str(a) for a in args])
328
329
330 class MountError(Error):
331 """
332 Mounting filesystem failed
333 """
334
335
336 class UnmountError(Error):
337 """
338 Unmounting filesystem failed
339 """
340
341
342 class BadMagicError(Error):
343 """
344 Does not look like a Ceph OSD, or incompatible version
345 """
346
347
348 class TruncatedLineError(Error):
349 """
350 Line is truncated
351 """
352
353
354 class TooManyLinesError(Error):
355 """
356 Too many lines
357 """
358
359
360 class FilesystemTypeError(Error):
361 """
362 Cannot discover filesystem type
363 """
364
365
366 class CephDiskException(Exception):
367 """
368 A base exception for ceph-disk to provide custom (ad-hoc) messages that
369 will be caught and dealt with when main() is executed
370 """
371 pass
372
373
374 class ExecutableNotFound(CephDiskException):
375 """
376 Exception to report on executables not available in PATH
377 """
378 pass
379
380
381 def is_systemd():
382 """
383 Detect whether systemd is running
384 """
385 with open(PROCDIR + '/1/comm', 'r') as f:
386 return 'systemd' in f.read()
387
388
389 def is_upstart():
390 """
391 Detect whether upstart is running
392 """
393 (out, err, _) = command(['init', '--version'])
394 return 'upstart' in out
395
396
397 def maybe_mkdir(*a, **kw):
398 """
399 Creates a new directory if it doesn't exist, removes
400 existing symlink before creating the directory.
401 """
402 # remove any symlink, if it is there..
403 if os.path.exists(*a) and stat.S_ISLNK(os.lstat(*a).st_mode):
404 LOG.debug('Removing old symlink at %s', *a)
405 os.unlink(*a)
406 try:
407 os.mkdir(*a, **kw)
408 except OSError as e:
409 if e.errno == errno.EEXIST:
410 pass
411 else:
412 raise
413
414
415 def which(executable):
416 """find the location of an executable"""
417 envpath = os.environ.get('PATH') or os.defpath
418 PATH = envpath.split(os.pathsep)
419
420 locations = PATH + [
421 '/usr/local/bin',
422 '/bin',
423 '/usr/bin',
424 '/usr/local/sbin',
425 '/usr/sbin',
426 '/sbin',
427 ]
428
429 for location in locations:
430 executable_path = os.path.join(location, executable)
431 if (os.path.isfile(executable_path) and
432 os.access(executable_path, os.X_OK)):
433 return executable_path
434
435
436 def _get_command_executable(arguments):
437 """
438 Return the full path for an executable, raise if the executable is not
439 found. If the executable has already a full path do not perform any checks.
440 """
441 if os.path.isabs(arguments[0]): # an absolute path
442 return arguments
443 executable = which(arguments[0])
444 if not executable:
445 command_msg = 'Could not run command: %s' % ' '.join(arguments)
446 executable_msg = '%s not in path.' % arguments[0]
447 raise ExecutableNotFound('%s %s' % (executable_msg, command_msg))
448
449 # swap the old executable for the new one
450 arguments[0] = executable
451 return arguments
452
453
454 def command(arguments, **kwargs):
455 """
456 Safely execute a ``subprocess.Popen`` call making sure that the
457 executable exists and raising a helpful error message
458 if it does not.
459
460 .. note:: This should be the preferred way of calling ``subprocess.Popen``
461 since it provides the caller with the safety net of making sure that
462 executables *will* be found and will error nicely otherwise.
463
464 This returns the output of the command and the return code of the
465 process in a tuple: (stdout, stderr, returncode).
466 """
467
468 arguments = list(map(_bytes2str, _get_command_executable(arguments)))
469
470 LOG.info('Running command: %s' % ' '.join(arguments))
471 process = subprocess.Popen(
472 arguments,
473 stdout=subprocess.PIPE,
474 stderr=subprocess.PIPE,
475 **kwargs)
476 out, err = process.communicate()
477
478 return _bytes2str(out), _bytes2str(err), process.returncode
479
480
481 def _bytes2str(string):
482 return string.decode('utf-8') if isinstance(string, bytes) else string
483
484
485 def command_init(arguments, **kwargs):
486 """
487 Safely execute a non-blocking ``subprocess.Popen`` call
488 making sure that the executable exists and raising a helpful
489 error message if it does not.
490
491 .. note:: This should be the preferred way of calling ``subprocess.Popen``
492 since it provides the caller with the safety net of making sure that
493 executables *will* be found and will error nicely otherwise.
494
495 This returns the process.
496 """
497
498 arguments = list(map(_bytes2str, _get_command_executable(arguments)))
499
500 LOG.info('Running command: %s' % ' '.join(arguments))
501 process = subprocess.Popen(
502 arguments,
503 stdout=subprocess.PIPE,
504 stderr=subprocess.PIPE,
505 **kwargs)
506 return process
507
508
509 def command_wait(process):
510 """
511 Wait for the process finish and parse its output.
512 """
513
514 out, err = process.communicate()
515
516 return _bytes2str(out), _bytes2str(err), process.returncode
517
518
519 def command_check_call(arguments, exit=False):
520 """
521 Safely execute a ``subprocess.check_call`` call making sure that the
522 executable exists and raising a helpful error message if it does not.
523
524 When ``exit`` is set to ``True`` this helper will do a clean (sans
525 traceback) system exit.
526 .. note:: This should be the preferred way of calling
527 ``subprocess.check_call`` since it provides the caller with the safety net
528 of making sure that executables *will* be found and will error nicely
529 otherwise.
530 """
531 arguments = _get_command_executable(arguments)
532 command = ' '.join(arguments)
533 LOG.info('Running command: %s', command)
534 try:
535 return subprocess.check_call(arguments)
536 except subprocess.CalledProcessError as error:
537 if exit:
538 if error.output:
539 LOG.error(error.output)
540 raise SystemExit(
541 "'{cmd}' failed with status code {returncode}".format(
542 cmd=command,
543 returncode=error.returncode,
544 )
545 )
546 raise
547
548
549 #
550 # An alternative block_path implementation would be
551 #
552 # name = basename(dev)
553 # return /sys/devices/virtual/block/$name
554 #
555 # It is however more fragile because it relies on the fact
556 # that the basename of the device the user will use always
557 # matches the one the driver will use. On Ubuntu 14.04, for
558 # instance, when multipath creates a partition table on
559 #
560 # /dev/mapper/353333330000007d0 -> ../dm-0
561 #
562 # it will create partition devices named
563 #
564 # /dev/mapper/353333330000007d0-part1
565 #
566 # which is the same device as /dev/dm-1 but not a symbolic
567 # link to it:
568 #
569 # ubuntu@other:~$ ls -l /dev/mapper /dev/dm-1
570 # brw-rw---- 1 root disk 252, 1 Aug 15 17:52 /dev/dm-1
571 # lrwxrwxrwx 1 root root 7 Aug 15 17:52 353333330000007d0 -> ../dm-0
572 # brw-rw---- 1 root disk 252, 1 Aug 15 17:52 353333330000007d0-part1
573 #
574 # Using the basename in this case fails.
575 #
576
577
578 def block_path(dev):
579 if FREEBSD:
580 return dev
581 path = os.path.realpath(dev)
582 rdev = os.stat(path).st_rdev
583 (M, m) = (os.major(rdev), os.minor(rdev))
584 return "{sysfs}/dev/block/{M}:{m}".format(sysfs=SYSFS, M=M, m=m)
585
586
587 def get_dm_uuid(dev):
588 uuid_path = os.path.join(block_path(dev), 'dm', 'uuid')
589 LOG.debug("get_dm_uuid " + dev + " uuid path is " + uuid_path)
590 if not os.path.exists(uuid_path):
591 return False
592 uuid = open(uuid_path, 'r').read()
593 LOG.debug("get_dm_uuid " + dev + " uuid is " + uuid)
594 return uuid
595
596
597 def is_mpath(dev):
598 """
599 True if the path is managed by multipath
600 """
601 if FREEBSD:
602 return False
603 uuid = get_dm_uuid(dev)
604 return (uuid and
605 (re.match('part\d+-mpath-', uuid) or
606 re.match('mpath-', uuid)))
607
608
609 def get_dev_name(path):
610 """
611 get device name from path. e.g.::
612
613 /dev/sda -> sda, /dev/cciss/c0d1 -> cciss!c0d1
614
615 a device "name" is something like::
616
617 sdb
618 cciss!c0d1
619
620 """
621 assert path.startswith('/dev/')
622 base = path[5:]
623 return base.replace('/', '!')
624
625
626 def get_dev_path(name):
627 """
628 get a path (/dev/...) from a name (cciss!c0d1)
629 a device "path" is something like::
630
631 /dev/sdb
632 /dev/cciss/c0d1
633
634 """
635 return '/dev/' + name.replace('!', '/')
636
637
638 def get_dev_relpath(name):
639 """
640 get a relative path to /dev from a name (cciss!c0d1)
641 """
642 return name.replace('!', '/')
643
644
645 def get_dev_size(dev, size='megabytes'):
646 """
647 Attempt to get the size of a device so that we can prevent errors
648 from actions to devices that are smaller, and improve error reporting.
649
650 Because we want to avoid breakage in case this approach is not robust, we
651 will issue a warning if we failed to get the size.
652
653 :param size: bytes or megabytes
654 :param dev: the device to calculate the size
655 """
656 fd = os.open(dev, os.O_RDONLY)
657 dividers = {'bytes': 1, 'megabytes': 1024 * 1024}
658 try:
659 device_size = os.lseek(fd, 0, os.SEEK_END)
660 divider = dividers.get(size, 1024 * 1024) # default to megabytes
661 return device_size // divider
662 except Exception as error:
663 LOG.warning('failed to get size of %s: %s' % (dev, str(error)))
664 finally:
665 os.close(fd)
666
667
668 def stmode_is_diskdevice(dmode):
669 if stat.S_ISBLK(dmode):
670 return True
671 else:
672 # FreeBSD does not have block devices
673 # All disks are character devices
674 return FREEBSD and stat.S_ISCHR(dmode)
675
676
677 def dev_is_diskdevice(dev):
678 dmode = os.stat(dev).st_mode
679 return stmode_is_diskdevice(dmode)
680
681
682 def ldev_is_diskdevice(dev):
683 dmode = os.lstat(dev).st_mode
684 return stmode_is_diskdevice(dmode)
685
686
687 def path_is_diskdevice(path):
688 dev = os.path.realpath(path)
689 return dev_is_diskdevice(dev)
690
691
692 def get_partition_mpath(dev, pnum):
693 part_re = "part{pnum}-mpath-".format(pnum=pnum)
694 partitions = list_partitions_mpath(dev, part_re)
695 if partitions:
696 return partitions[0]
697 else:
698 return None
699
700
701 def get_partition_dev(dev, pnum):
702 """
703 get the device name for a partition
704
705 assume that partitions are named like the base dev,
706 with a number, and optionally
707 some intervening characters (like 'p'). e.g.,
708
709 sda 1 -> sda1
710 cciss/c0d1 1 -> cciss!c0d1p1
711 """
712 max_retry = 10
713 for retry in range(0, max_retry + 1):
714 partname = None
715 error_msg = ""
716 if is_mpath(dev):
717 partname = get_partition_mpath(dev, pnum)
718 else:
719 name = get_dev_name(os.path.realpath(dev))
720 sys_entry = os.path.join(BLOCKDIR, name)
721 error_msg = " in %s" % sys_entry
722 for f in os.listdir(sys_entry):
723 if f.startswith(name) and f.endswith(str(pnum)):
724 # we want the shortest name that starts with the base name
725 # and ends with the partition number
726 if not partname or len(f) < len(partname):
727 partname = f
728 if partname:
729 if retry:
730 LOG.info('Found partition %d for %s after %d tries' %
731 (pnum, dev, retry))
732 return get_dev_path(partname)
733 else:
734 if retry < max_retry:
735 LOG.info('Try %d/%d : partition %d for %s does not exist%s' %
736 (retry + 1, max_retry, pnum, dev, error_msg))
737 time.sleep(.2)
738 continue
739 else:
740 raise Error('partition %d for %s does not appear to exist%s' %
741 (pnum, dev, error_msg))
742
743
744 def list_all_partitions():
745 """
746 Return a list of devices and partitions
747 """
748 if not FREEBSD:
749 names = os.listdir(BLOCKDIR)
750 dev_part_list = {}
751 for name in names:
752 # /dev/fd0 may hang http://tracker.ceph.com/issues/6827
753 if re.match(r'^fd\d$', name):
754 continue
755 dev_part_list[name] = list_partitions(get_dev_path(name))
756 else:
757 with open(os.path.join(PROCDIR, "partitions")) as partitions:
758 for line in partitions:
759 columns = line.split()
760 if len(columns) >= 4:
761 name = columns[3]
762 dev_part_list[name] = list_partitions(get_dev_path(name))
763 return dev_part_list
764
765
766 def list_partitions(dev):
767 dev = os.path.realpath(dev)
768 if is_mpath(dev):
769 return list_partitions_mpath(dev)
770 else:
771 return list_partitions_device(dev)
772
773
774 def list_partitions_mpath(dev, part_re="part\d+-mpath-"):
775 p = block_path(dev)
776 partitions = []
777 holders = os.path.join(p, 'holders')
778 for holder in os.listdir(holders):
779 uuid_path = os.path.join(holders, holder, 'dm', 'uuid')
780 uuid = open(uuid_path, 'r').read()
781 LOG.debug("list_partitions_mpath: " + uuid_path + " uuid = " + uuid)
782 if re.match(part_re, uuid):
783 partitions.append(holder)
784 return partitions
785
786
787 def list_partitions_device(dev):
788 """
789 Return a list of partitions on the given device name
790 """
791 partitions = []
792 basename = get_dev_name(dev)
793 for name in os.listdir(block_path(dev)):
794 if name.startswith(basename):
795 partitions.append(name)
796 return partitions
797
798
799 def get_partition_base(dev):
800 """
801 Get the base device for a partition
802 """
803 dev = os.path.realpath(dev)
804 if not ldev_is_diskdevice(dev):
805 raise Error('not a block device', dev)
806
807 name = get_dev_name(dev)
808 if os.path.exists(os.path.join('/sys/block', name)):
809 raise Error('not a partition', dev)
810
811 # find the base
812 for basename in os.listdir('/sys/block'):
813 if os.path.exists(os.path.join('/sys/block', basename, name)):
814 return get_dev_path(basename)
815 raise Error('no parent device for partition', dev)
816
817
818 def is_partition_mpath(dev):
819 uuid = get_dm_uuid(dev)
820 return bool(re.match('part\d+-mpath-', uuid))
821
822
823 def partnum_mpath(dev):
824 uuid = get_dm_uuid(dev)
825 return re.findall('part(\d+)-mpath-', uuid)[0]
826
827
828 def get_partition_base_mpath(dev):
829 slave_path = os.path.join(block_path(dev), 'slaves')
830 slaves = os.listdir(slave_path)
831 assert slaves
832 name_path = os.path.join(slave_path, slaves[0], 'dm', 'name')
833 name = open(name_path, 'r').read().strip()
834 return os.path.join('/dev/mapper', name)
835
836
837 def is_partition(dev):
838 """
839 Check whether a given device path is a partition or a full disk.
840 """
841 if is_mpath(dev):
842 return is_partition_mpath(dev)
843
844 dev = os.path.realpath(dev)
845 st = os.lstat(dev)
846 if not stmode_is_diskdevice(st.st_mode):
847 raise Error('not a block device', dev)
848
849 name = get_dev_name(dev)
850 if os.path.exists(os.path.join(BLOCKDIR, name)):
851 return False
852
853 # make sure it is a partition of something else
854 major = os.major(st.st_rdev)
855 minor = os.minor(st.st_rdev)
856 if os.path.exists('/sys/dev/block/%d:%d/partition' % (major, minor)):
857 return True
858
859 raise Error('not a disk or partition', dev)
860
861
862 def is_mounted(dev):
863 """
864 Check if the given device is mounted.
865 """
866 dev = os.path.realpath(dev)
867 with open(PROCDIR + '/mounts', 'rb') as proc_mounts:
868 for line in proc_mounts:
869 fields = line.split()
870 if len(fields) < 3:
871 continue
872 mounts_dev = fields[0]
873 path = fields[1]
874 if os.path.isabs(mounts_dev) and os.path.exists(mounts_dev):
875 mounts_dev = os.path.realpath(mounts_dev)
876 if mounts_dev == dev:
877 return _bytes2str(path)
878 return None
879
880
881 def is_held(dev):
882 """
883 Check if a device is held by another device (e.g., a dm-crypt mapping)
884 """
885 assert os.path.exists(dev)
886 if is_mpath(dev):
887 return []
888
889 dev = os.path.realpath(dev)
890 base = get_dev_name(dev)
891
892 # full disk?
893 directory = '/sys/block/{base}/holders'.format(base=base)
894 if os.path.exists(directory):
895 return os.listdir(directory)
896
897 # partition?
898 part = base
899 while len(base):
900 directory = '/sys/block/{base}/{part}/holders'.format(
901 part=part, base=base)
902 if os.path.exists(directory):
903 return os.listdir(directory)
904 base = base[:-1]
905 return []
906
907
908 def verify_not_in_use(dev, check_partitions=False):
909 """
910 Verify if a given device (path) is in use (e.g. mounted or
911 in use by device-mapper).
912
913 :raises: Error if device is in use.
914 """
915 assert os.path.exists(dev)
916 if is_mounted(dev):
917 raise Error('Device is mounted', dev)
918 holders = is_held(dev)
919 if holders:
920 raise Error('Device %s is in use by a device-mapper '
921 'mapping (dm-crypt?)' % dev, ','.join(holders))
922
923 if check_partitions and not is_partition(dev):
924 for partname in list_partitions(dev):
925 partition = get_dev_path(partname)
926 if is_mounted(partition):
927 raise Error('Device is mounted', partition)
928 holders = is_held(partition)
929 if holders:
930 raise Error('Device %s is in use by a device-mapper '
931 'mapping (dm-crypt?)'
932 % partition, ','.join(holders))
933
934
935 def must_be_one_line(line):
936 """
937 Checks if given line is really one single line.
938
939 :raises: TruncatedLineError or TooManyLinesError
940 :return: Content of the line, or None if line isn't valid.
941 """
942 line = _bytes2str(line)
943
944 if line[-1:] != '\n':
945 raise TruncatedLineError(line)
946 line = line[:-1]
947 if '\n' in line:
948 raise TooManyLinesError(line)
949 return line
950
951
952 def read_one_line(parent, name):
953 """
954 Read a file whose sole contents are a single line.
955
956 Strips the newline.
957
958 :return: Contents of the line, or None if file did not exist.
959 """
960 path = os.path.join(parent, name)
961 try:
962 line = open(path, 'rb').read()
963 except IOError as e:
964 if e.errno == errno.ENOENT:
965 return None
966 else:
967 raise
968
969 try:
970 line = must_be_one_line(line)
971 except (TruncatedLineError, TooManyLinesError) as e:
972 raise Error(
973 'File is corrupt: {path}: {msg}'.format(
974 path=path,
975 msg=e,
976 )
977 )
978 return line
979
980
981 def write_one_line(parent, name, text):
982 """
983 Write a file whose sole contents are a single line.
984
985 Adds a newline.
986 """
987 path = os.path.join(parent, name)
988 tmp = '{path}.{pid}.tmp'.format(path=path, pid=os.getpid())
989 with open(tmp, 'wb') as tmp_file:
990 tmp_file.write(text.encode('utf-8') + b'\n')
991 os.fsync(tmp_file.fileno())
992 path_set_context(tmp)
993 os.rename(tmp, path)
994
995
996 def init_get():
997 """
998 Get a init system using 'ceph-detect-init'
999 """
1000 init = _check_output(
1001 args=[
1002 'ceph-detect-init',
1003 '--default', 'sysvinit',
1004 ],
1005 )
1006 init = must_be_one_line(init)
1007 return init
1008
1009
1010 def check_osd_magic(path):
1011 """
1012 Check that this path has the Ceph OSD magic.
1013
1014 :raises: BadMagicError if this does not look like a Ceph OSD data
1015 dir.
1016 """
1017 magic = read_one_line(path, 'magic')
1018 if magic is None:
1019 # probably not mkfs'ed yet
1020 raise BadMagicError(path)
1021 if magic != CEPH_OSD_ONDISK_MAGIC:
1022 raise BadMagicError(path)
1023
1024
1025 def check_osd_id(osd_id):
1026 """
1027 Ensures osd id is numeric.
1028 """
1029 if not re.match(r'^[0-9]+$', osd_id):
1030 raise Error('osd id is not numeric', osd_id)
1031
1032
1033 def allocate_osd_id(
1034 cluster,
1035 fsid,
1036 keyring,
1037 ):
1038 """
1039 Accocates an OSD id on the given cluster.
1040
1041 :raises: Error if the call to allocate the OSD id fails.
1042 :return: The allocated OSD id.
1043 """
1044
1045 LOG.debug('Allocating OSD id...')
1046 try:
1047 osd_id = _check_output(
1048 args=[
1049 'ceph',
1050 '--cluster', cluster,
1051 '--name', 'client.bootstrap-osd',
1052 '--keyring', keyring,
1053 'osd', 'create', '--concise',
1054 fsid,
1055 ],
1056 )
1057 except subprocess.CalledProcessError as e:
1058 raise Error('ceph osd create failed', e, e.output)
1059 osd_id = must_be_one_line(osd_id)
1060 check_osd_id(osd_id)
1061 return osd_id
1062
1063
1064 def get_osd_id(path):
1065 """
1066 Gets the OSD id of the OSD at the given path.
1067 """
1068 osd_id = read_one_line(path, 'whoami')
1069 if osd_id is not None:
1070 check_osd_id(osd_id)
1071 return osd_id
1072
1073
1074 def get_ceph_user():
1075 global CEPH_PREF_USER
1076
1077 if CEPH_PREF_USER is not None:
1078 try:
1079 pwd.getpwnam(CEPH_PREF_USER)
1080 return CEPH_PREF_USER
1081 except KeyError:
1082 print("No such user:", CEPH_PREF_USER)
1083 sys.exit(2)
1084 else:
1085 try:
1086 pwd.getpwnam('ceph')
1087 return 'ceph'
1088 except KeyError:
1089 return 'root'
1090
1091
1092 def get_ceph_group():
1093 global CEPH_PREF_GROUP
1094
1095 if CEPH_PREF_GROUP is not None:
1096 try:
1097 grp.getgrnam(CEPH_PREF_GROUP)
1098 return CEPH_PREF_GROUP
1099 except KeyError:
1100 print("No such group:", CEPH_PREF_GROUP)
1101 sys.exit(2)
1102 else:
1103 try:
1104 grp.getgrnam('ceph')
1105 return 'ceph'
1106 except KeyError:
1107 return 'root'
1108
1109
1110 def path_set_context(path):
1111 # restore selinux context to default policy values
1112 if which('restorecon'):
1113 command(['restorecon', '-R', path])
1114
1115 # if ceph user exists, set owner to ceph
1116 if get_ceph_user() == 'ceph':
1117 command(['chown', '-R', 'ceph:ceph', path])
1118
1119
1120 def _check_output(args=None, **kwargs):
1121 out, err, ret = command(args, **kwargs)
1122 if ret:
1123 cmd = args[0]
1124 error = subprocess.CalledProcessError(ret, cmd)
1125 error.output = out + err
1126 raise error
1127 return _bytes2str(out)
1128
1129
1130 def get_conf(cluster, variable):
1131 """
1132 Get the value of the given configuration variable from the
1133 cluster.
1134
1135 :raises: Error if call to ceph-conf fails.
1136 :return: The variable value or None.
1137 """
1138 try:
1139 out, err, ret = command(
1140 [
1141 'ceph-conf',
1142 '--cluster={cluster}'.format(
1143 cluster=cluster,
1144 ),
1145 '--name=osd.',
1146 '--lookup',
1147 variable,
1148 ],
1149 close_fds=True,
1150 )
1151 except OSError as e:
1152 raise Error('error executing ceph-conf', e, err)
1153 if ret == 1:
1154 # config entry not found
1155 return None
1156 elif ret != 0:
1157 raise Error('getting variable from configuration failed')
1158 value = out.split('\n', 1)[0]
1159 # don't differentiate between "var=" and no var set
1160 if not value:
1161 return None
1162 return value
1163
1164
1165 def get_conf_with_default(cluster, variable):
1166 """
1167 Get a config value that is known to the C++ code.
1168
1169 This will fail if called on variables that are not defined in
1170 common config options.
1171 """
1172 try:
1173 out = _check_output(
1174 args=[
1175 'ceph-osd',
1176 '--cluster={cluster}'.format(
1177 cluster=cluster,
1178 ),
1179 '--show-config-value={variable}'.format(
1180 variable=variable,
1181 ),
1182 ],
1183 close_fds=True,
1184 )
1185 except subprocess.CalledProcessError as e:
1186 raise Error(
1187 'getting variable from configuration failed',
1188 e,
1189 )
1190
1191 value = str(out).split('\n', 1)[0]
1192 return value
1193
1194
1195 def get_fsid(cluster):
1196 """
1197 Get the fsid of the cluster.
1198
1199 :return: The fsid or raises Error.
1200 """
1201 fsid = get_conf_with_default(cluster=cluster, variable='fsid')
1202 if fsid is None:
1203 raise Error('getting cluster uuid from configuration failed')
1204 return fsid.lower()
1205
1206
1207 def get_dmcrypt_key_path(
1208 _uuid,
1209 key_dir,
1210 luks
1211 ):
1212 """
1213 Get path to dmcrypt key file.
1214
1215 :return: Path to the dmcrypt key file, callers should check for existence.
1216 """
1217 if luks:
1218 path = os.path.join(key_dir, _uuid + ".luks.key")
1219 else:
1220 path = os.path.join(key_dir, _uuid)
1221
1222 return path
1223
1224
1225 def get_dmcrypt_key(
1226 _uuid,
1227 key_dir,
1228 luks
1229 ):
1230 legacy_path = get_dmcrypt_key_path(_uuid, key_dir, luks)
1231 if os.path.exists(legacy_path):
1232 return (legacy_path,)
1233 path = os.path.join(STATEDIR, 'osd-lockbox', _uuid)
1234 if os.path.exists(path):
1235 mode = get_oneliner(path, 'key-management-mode')
1236 osd_uuid = get_oneliner(path, 'osd-uuid')
1237 ceph_fsid = read_one_line(path, 'ceph_fsid')
1238 if ceph_fsid is None:
1239 raise Error('No cluster uuid assigned.')
1240 cluster = find_cluster_by_uuid(ceph_fsid)
1241 if cluster is None:
1242 raise Error('No cluster conf found in ' + SYSCONFDIR +
1243 ' with fsid %s' % ceph_fsid)
1244
1245 if mode == KEY_MANAGEMENT_MODE_V1:
1246 key, stderr, ret = command(
1247 [
1248 'ceph',
1249 '--cluster', cluster,
1250 '--name',
1251 'client.osd-lockbox.' + osd_uuid,
1252 '--keyring',
1253 os.path.join(path, 'keyring'),
1254 'config-key',
1255 'get',
1256 'dm-crypt/osd/' + osd_uuid + '/luks',
1257 ],
1258 )
1259 LOG.debug("stderr " + stderr)
1260 assert ret == 0
1261 return base64.b64decode(key)
1262 else:
1263 raise Error('unknown key-management-mode ' + str(mode))
1264 raise Error('unable to read dm-crypt key', path, legacy_path)
1265
1266
1267 def _dmcrypt_map(
1268 rawdev,
1269 key,
1270 _uuid,
1271 cryptsetup_parameters,
1272 luks,
1273 format_dev=False,
1274 ):
1275 dev = dmcrypt_is_mapped(_uuid)
1276 if dev:
1277 return dev
1278
1279 if isinstance(key, tuple):
1280 # legacy, before lockbox
1281 assert os.path.exists(key[0])
1282 keypath = key[0]
1283 key = None
1284 else:
1285 keypath = '-'
1286 dev = '/dev/mapper/' + _uuid
1287 luksFormat_args = [
1288 'cryptsetup',
1289 '--batch-mode',
1290 '--key-file',
1291 keypath,
1292 'luksFormat',
1293 rawdev,
1294 ] + cryptsetup_parameters
1295
1296 luksOpen_args = [
1297 'cryptsetup',
1298 '--key-file',
1299 keypath,
1300 'luksOpen',
1301 rawdev,
1302 _uuid,
1303 ]
1304
1305 create_args = [
1306 'cryptsetup',
1307 '--key-file',
1308 keypath,
1309 'create',
1310 _uuid,
1311 rawdev,
1312 ] + cryptsetup_parameters
1313
1314 def run(args, stdin):
1315 LOG.info(" ".join(args))
1316 process = subprocess.Popen(
1317 args,
1318 stdin=subprocess.PIPE,
1319 stdout=subprocess.PIPE,
1320 stderr=subprocess.PIPE)
1321 out, err = process.communicate(stdin)
1322 LOG.debug(out)
1323 LOG.error(err)
1324 assert process.returncode == 0
1325
1326 try:
1327 if luks:
1328 if format_dev:
1329 run(luksFormat_args, key)
1330 run(luksOpen_args, key)
1331 else:
1332 # Plain mode has no format function, nor any validation
1333 # that the key is correct.
1334 run(create_args, key)
1335 # set proper ownership of mapped device
1336 command_check_call(['chown', 'ceph:ceph', dev])
1337 return dev
1338
1339 except subprocess.CalledProcessError as e:
1340 raise Error('unable to map device', rawdev, e)
1341
1342
1343 def dmcrypt_unmap(
1344 _uuid
1345 ):
1346 if not os.path.exists('/dev/mapper/' + _uuid):
1347 return
1348 retries = 0
1349 while True:
1350 try:
1351 command_check_call(['cryptsetup', 'remove', _uuid])
1352 break
1353 except subprocess.CalledProcessError as e:
1354 if retries == 10:
1355 raise Error('unable to unmap device', _uuid, e)
1356 else:
1357 time.sleep(0.5 + retries * 1.0)
1358 retries += 1
1359
1360
1361 def mount(
1362 dev,
1363 fstype,
1364 options,
1365 ):
1366 """
1367 Mounts a device with given filessystem type and
1368 mount options to a tempfile path under /var/lib/ceph/tmp.
1369 """
1370 # sanity check: none of the arguments are None
1371 if dev is None:
1372 raise ValueError('dev may not be None')
1373 if fstype is None:
1374 raise ValueError('fstype may not be None')
1375
1376 # pick best-of-breed mount options based on fs type
1377 if options is None:
1378 options = MOUNT_OPTIONS.get(fstype, '')
1379
1380 myTemp = STATEDIR + '/tmp'
1381 # mkdtemp expect 'dir' to be existing on the system
1382 # Let's be sure it's always the case
1383 if not os.path.exists(myTemp):
1384 os.makedirs(myTemp)
1385
1386 # mount
1387 path = tempfile.mkdtemp(
1388 prefix='mnt.',
1389 dir=myTemp,
1390 )
1391 try:
1392 LOG.debug('Mounting %s on %s with options %s', dev, path, options)
1393 command_check_call(
1394 [
1395 'mount',
1396 '-t', fstype,
1397 '-o', options,
1398 '--',
1399 dev,
1400 path,
1401 ],
1402 )
1403 if which('restorecon'):
1404 command(
1405 [
1406 'restorecon',
1407 path,
1408 ],
1409 )
1410 except subprocess.CalledProcessError as e:
1411 try:
1412 os.rmdir(path)
1413 except (OSError, IOError):
1414 pass
1415 raise MountError(e)
1416
1417 return path
1418
1419
1420 def unmount(
1421 path,
1422 ):
1423 """
1424 Unmount and removes the given mount point.
1425 """
1426 retries = 0
1427 while True:
1428 try:
1429 LOG.debug('Unmounting %s', path)
1430 command_check_call(
1431 [
1432 '/bin/umount',
1433 '--',
1434 path,
1435 ],
1436 )
1437 break
1438 except subprocess.CalledProcessError as e:
1439 # on failure, retry 3 times with incremental backoff
1440 if retries == 3:
1441 raise UnmountError(e)
1442 else:
1443 time.sleep(0.5 + retries * 1.0)
1444 retries += 1
1445
1446 os.rmdir(path)
1447
1448
1449 ###########################################
1450
1451 def extract_parted_partition_numbers(partitions):
1452 numbers_as_strings = re.findall('^\d+', partitions, re.MULTILINE)
1453 return map(int, numbers_as_strings)
1454
1455
1456 def get_free_partition_index(dev):
1457 """
1458 Get the next free partition index on a given device.
1459
1460 :return: Index number (> 1 if there is already a partition on the device)
1461 or 1 if there is no partition table.
1462 """
1463 try:
1464 lines = _check_output(
1465 args=[
1466 'parted',
1467 '--machine',
1468 '--',
1469 dev,
1470 'print',
1471 ],
1472 )
1473 except subprocess.CalledProcessError as e:
1474 LOG.info('cannot read partition index; assume it '
1475 'isn\'t present\n (Error: %s)' % e)
1476 return 1
1477
1478 if not lines:
1479 raise Error('parted failed to output anything')
1480 LOG.debug('get_free_partition_index: analyzing ' + lines)
1481 if ('CHS;' not in lines and
1482 'CYL;' not in lines and
1483 'BYT;' not in lines):
1484 raise Error('parted output expected to contain one of ' +
1485 'CHH; CYL; or BYT; : ' + lines)
1486 if os.path.realpath(dev) not in lines:
1487 raise Error('parted output expected to contain ' + dev + ': ' + lines)
1488 _, partitions = lines.split(os.path.realpath(dev))
1489 partition_numbers = extract_parted_partition_numbers(partitions)
1490 if partition_numbers:
1491 return max(partition_numbers) + 1
1492 else:
1493 return 1
1494
1495
1496 def check_journal_reqs(args):
1497 _, _, allows_journal = command([
1498 'ceph-osd', '--check-allows-journal',
1499 '-i', '0',
1500 '--log-file', '$run_dir/$cluster-osd-check.log',
1501 '--cluster', args.cluster,
1502 '--setuser', get_ceph_user(),
1503 '--setgroup', get_ceph_group(),
1504 ])
1505 _, _, wants_journal = command([
1506 'ceph-osd', '--check-wants-journal',
1507 '-i', '0',
1508 '--log-file', '$run_dir/$cluster-osd-check.log',
1509 '--cluster', args.cluster,
1510 '--setuser', get_ceph_user(),
1511 '--setgroup', get_ceph_group(),
1512 ])
1513 _, _, needs_journal = command([
1514 'ceph-osd', '--check-needs-journal',
1515 '-i', '0',
1516 '--log-file', '$run_dir/$cluster-osd-check.log',
1517 '--cluster', args.cluster,
1518 '--setuser', get_ceph_user(),
1519 '--setgroup', get_ceph_group(),
1520 ])
1521 return (not allows_journal, not wants_journal, not needs_journal)
1522
1523
1524 def update_partition(dev, description):
1525 """
1526 Must be called after modifying a partition table so the kernel
1527 know about the change and fire udev events accordingly. A side
1528 effect of partprobe is to remove partitions and add them again.
1529 The first udevadm settle waits for ongoing udev events to
1530 complete, just in case one of them rely on an existing partition
1531 on dev. The second udevadm settle guarantees to the caller that
1532 all udev events related to the partition table change have been
1533 processed, i.e. the 95-ceph-osd.rules actions and mode changes,
1534 group changes etc. are complete.
1535 """
1536 LOG.debug('Calling partprobe on %s device %s', description, dev)
1537 partprobe_ok = False
1538 error = 'unknown error'
1539 partprobe = _get_command_executable(['partprobe'])[0]
1540 for i in range(5):
1541 command_check_call(['udevadm', 'settle', '--timeout=600'])
1542 try:
1543 _check_output(['flock', '-s', dev, partprobe, dev])
1544 partprobe_ok = True
1545 break
1546 except subprocess.CalledProcessError as e:
1547 error = e.output
1548 if ('unable to inform the kernel' not in error and
1549 'Device or resource busy' not in error):
1550 raise
1551 LOG.debug('partprobe %s failed : %s (ignored, waiting 60s)'
1552 % (dev, error))
1553 time.sleep(60)
1554 if not partprobe_ok:
1555 raise Error('partprobe %s failed : %s' % (dev, error))
1556 command_check_call(['udevadm', 'settle', '--timeout=600'])
1557
1558
1559 def zap_linux(dev):
1560 try:
1561 # Thoroughly wipe all partitions of any traces of
1562 # Filesystems or OSD Journals
1563 #
1564 # In addition we need to write 10M of data to each partition
1565 # to make sure that after re-creating the same partition
1566 # there is no trace left of any previous Filesystem or OSD
1567 # Journal
1568
1569 LOG.debug('Writing zeros to existing partitions on %s', dev)
1570
1571 for partname in list_partitions(dev):
1572 partition = get_dev_path(partname)
1573 command_check_call(
1574 [
1575 'wipefs',
1576 '--all',
1577 partition,
1578 ],
1579 )
1580
1581 command_check_call(
1582 [
1583 'dd',
1584 'if=/dev/zero',
1585 'of={path}'.format(path=partition),
1586 'bs=1M',
1587 'count=10',
1588 ],
1589 )
1590
1591 LOG.debug('Zapping partition table on %s', dev)
1592
1593 # try to wipe out any GPT partition table backups. sgdisk
1594 # isn't too thorough.
1595 lba_size = 4096
1596 size = 33 * lba_size
1597 with open(dev, 'wb') as dev_file:
1598 dev_file.seek(-size, os.SEEK_END)
1599 dev_file.write(size * b'\0')
1600
1601 command_check_call(
1602 [
1603 'sgdisk',
1604 '--zap-all',
1605 '--',
1606 dev,
1607 ],
1608 )
1609 command_check_call(
1610 [
1611 'sgdisk',
1612 '--clear',
1613 '--mbrtogpt',
1614 '--',
1615 dev,
1616 ],
1617 )
1618 update_partition(dev, 'zapped')
1619
1620 except subprocess.CalledProcessError as e:
1621 raise Error(e)
1622
1623
1624 def zap_freebsd(dev):
1625 try:
1626 # For FreeBSD we just need to zap the partition.
1627 command_check_call(
1628 [
1629 'gpart',
1630 'destroy',
1631 '-F',
1632 dev,
1633 ],
1634 )
1635
1636 except subprocess.CalledProcessError as e:
1637 raise Error(e)
1638
1639
1640 def zap(dev):
1641 """
1642 Destroy the partition table and content of a given disk.
1643 """
1644 dev = os.path.realpath(dev)
1645 dmode = os.stat(dev).st_mode
1646 if not stat.S_ISBLK(dmode) or is_partition(dev):
1647 raise Error('not full block device; cannot zap', dev)
1648 if FREEBSD:
1649 zap_freebsd(dev)
1650 else:
1651 zap_linux(dev)
1652
1653
1654 def adjust_symlink(target, path):
1655 create = True
1656 if os.path.lexists(path):
1657 try:
1658 mode = os.lstat(path).st_mode
1659 if stat.S_ISREG(mode):
1660 LOG.debug('Removing old file %s', path)
1661 os.unlink(path)
1662 elif stat.S_ISLNK(mode):
1663 old = os.readlink(path)
1664 if old != target:
1665 LOG.debug('Removing old symlink %s -> %s', path, old)
1666 os.unlink(path)
1667 else:
1668 create = False
1669 except:
1670 raise Error('unable to remove (or adjust) old file (symlink)',
1671 path)
1672 if create:
1673 LOG.debug('Creating symlink %s -> %s', path, target)
1674 try:
1675 os.symlink(target, path)
1676 except:
1677 raise Error('unable to create symlink %s -> %s' % (path, target))
1678
1679
1680 def get_mount_options(cluster, fs_type):
1681 mount_options = get_conf(
1682 cluster,
1683 variable='osd_mount_options_{fstype}'.format(
1684 fstype=fs_type,
1685 ),
1686 )
1687 if mount_options is None:
1688 mount_options = get_conf(
1689 cluster,
1690 variable='osd_fs_mount_options_{fstype}'.format(
1691 fstype=fs_type,
1692 ),
1693 )
1694 else:
1695 # remove whitespaces
1696 mount_options = "".join(mount_options.split())
1697 return mount_options
1698
1699
1700 class Device(object):
1701
1702 def __init__(self, path, args):
1703 self.args = args
1704 self.path = path
1705 self.dev_size = None
1706 self.partitions = {}
1707 self.ptype_map = None
1708 assert not is_partition(self.path)
1709
1710 def create_partition(self, uuid, name, size=0, num=0):
1711 ptype = self.ptype_tobe_for_name(name)
1712 if num == 0:
1713 num = get_free_partition_index(dev=self.path)
1714 if size > 0:
1715 new = '--new={num}:0:+{size}M'.format(num=num, size=size)
1716 if size > self.get_dev_size():
1717 LOG.error('refusing to create %s on %s' % (name, self.path))
1718 LOG.error('%s size (%sM) is bigger than device (%sM)'
1719 % (name, size, self.get_dev_size()))
1720 raise Error('%s device size (%sM) is not big enough for %s'
1721 % (self.path, self.get_dev_size(), name))
1722 else:
1723 new = '--largest-new={num}'.format(num=num)
1724
1725 LOG.debug('Creating %s partition num %d size %d on %s',
1726 name, num, size, self.path)
1727 command_check_call(
1728 [
1729 'sgdisk',
1730 new,
1731 '--change-name={num}:ceph {name}'.format(num=num, name=name),
1732 '--partition-guid={num}:{uuid}'.format(num=num, uuid=uuid),
1733 '--typecode={num}:{uuid}'.format(num=num, uuid=ptype),
1734 '--mbrtogpt',
1735 '--',
1736 self.path,
1737 ],
1738 exit=True
1739 )
1740 update_partition(self.path, 'created')
1741 return num
1742
1743 def ptype_tobe_for_name(self, name):
1744 LOG.debug("name = " + name)
1745 if name == 'data':
1746 name = 'osd'
1747 if name == 'lockbox':
1748 if is_mpath(self.path):
1749 return PTYPE['mpath']['lockbox']['tobe']
1750 else:
1751 return PTYPE['regular']['lockbox']['tobe']
1752 if self.ptype_map is None:
1753 partition = DevicePartition.factory(
1754 path=self.path, dev=None, args=self.args)
1755 self.ptype_map = partition.ptype_map
1756 return self.ptype_map[name]['tobe']
1757
1758 def get_partition(self, num):
1759 if num not in self.partitions:
1760 dev = get_partition_dev(self.path, num)
1761 partition = DevicePartition.factory(
1762 path=self.path, dev=dev, args=self.args)
1763 partition.set_partition_number(num)
1764 self.partitions[num] = partition
1765 return self.partitions[num]
1766
1767 def get_dev_size(self):
1768 if self.dev_size is None:
1769 self.dev_size = get_dev_size(self.path)
1770 return self.dev_size
1771
1772 @staticmethod
1773 def factory(path, args):
1774 return Device(path, args)
1775
1776
1777 class DevicePartition(object):
1778
1779 def __init__(self, args):
1780 self.args = args
1781 self.num = None
1782 self.rawdev = None
1783 self.dev = None
1784 self.uuid = None
1785 self.ptype_map = None
1786 self.ptype = None
1787 self.set_variables_ptype()
1788
1789 def get_uuid(self):
1790 if self.uuid is None:
1791 self.uuid = get_partition_uuid(self.rawdev)
1792 return self.uuid
1793
1794 def get_ptype(self):
1795 if self.ptype is None:
1796 self.ptype = get_partition_type(self.rawdev)
1797 return self.ptype
1798
1799 def set_partition_number(self, num):
1800 self.num = num
1801
1802 def get_partition_number(self):
1803 return self.num
1804
1805 def set_dev(self, dev):
1806 self.dev = dev
1807 self.rawdev = dev
1808
1809 def get_dev(self):
1810 return self.dev
1811
1812 def get_rawdev(self):
1813 return self.rawdev
1814
1815 def set_variables_ptype(self):
1816 self.ptype_map = PTYPE['regular']
1817
1818 def ptype_for_name(self, name):
1819 return self.ptype_map[name]['ready']
1820
1821 @staticmethod
1822 def factory(path, dev, args):
1823 dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
1824 if ((path is not None and is_mpath(path)) or
1825 (dev is not None and is_mpath(dev))):
1826 partition = DevicePartitionMultipath(args)
1827 elif dmcrypt_type == 'luks':
1828 partition = DevicePartitionCryptLuks(args)
1829 elif dmcrypt_type == 'plain':
1830 partition = DevicePartitionCryptPlain(args)
1831 else:
1832 partition = DevicePartition(args)
1833 partition.set_dev(dev)
1834 return partition
1835
1836
1837 class DevicePartitionMultipath(DevicePartition):
1838
1839 def set_variables_ptype(self):
1840 self.ptype_map = PTYPE['mpath']
1841
1842
1843 class DevicePartitionCrypt(DevicePartition):
1844
1845 def __init__(self, args):
1846 super(DevicePartitionCrypt, self).__init__(args)
1847 self.osd_dm_key = None
1848 self.cryptsetup_parameters = CryptHelpers.get_cryptsetup_parameters(
1849 self.args)
1850 self.dmcrypt_type = CryptHelpers.get_dmcrypt_type(self.args)
1851 self.dmcrypt_keysize = CryptHelpers.get_dmcrypt_keysize(self.args)
1852
1853 def setup_crypt(self):
1854 pass
1855
1856 def map(self):
1857 self.setup_crypt()
1858 self.dev = _dmcrypt_map(
1859 rawdev=self.rawdev,
1860 key=self.osd_dm_key,
1861 _uuid=self.get_uuid(),
1862 cryptsetup_parameters=self.cryptsetup_parameters,
1863 luks=self.luks(),
1864 format_dev=True,
1865 )
1866
1867 def unmap(self):
1868 self.setup_crypt()
1869 dmcrypt_unmap(self.get_uuid())
1870 self.dev = self.rawdev
1871
1872 def format(self):
1873 self.setup_crypt()
1874 self.map()
1875
1876
1877 class DevicePartitionCryptPlain(DevicePartitionCrypt):
1878
1879 def luks(self):
1880 return False
1881
1882 def setup_crypt(self):
1883 if self.osd_dm_key is not None:
1884 return
1885
1886 self.cryptsetup_parameters += ['--key-size', str(self.dmcrypt_keysize)]
1887
1888 self.osd_dm_key = get_dmcrypt_key(
1889 self.get_uuid(), self.args.dmcrypt_key_dir,
1890 False)
1891
1892 def set_variables_ptype(self):
1893 self.ptype_map = PTYPE['plain']
1894
1895
1896 class DevicePartitionCryptLuks(DevicePartitionCrypt):
1897
1898 def luks(self):
1899 return True
1900
1901 def setup_crypt(self):
1902 if self.osd_dm_key is not None:
1903 return
1904
1905 if self.dmcrypt_keysize == 1024:
1906 # We don't force this into the cryptsetup_parameters,
1907 # as we want the cryptsetup defaults
1908 # to prevail for the actual LUKS key lengths.
1909 pass
1910 else:
1911 self.cryptsetup_parameters += ['--key-size',
1912 str(self.dmcrypt_keysize)]
1913
1914 self.osd_dm_key = get_dmcrypt_key(
1915 self.get_uuid(), self.args.dmcrypt_key_dir,
1916 True)
1917
1918 def set_variables_ptype(self):
1919 self.ptype_map = PTYPE['luks']
1920
1921
1922 class Prepare(object):
1923
1924 def __init__(self, args):
1925 self.args = args
1926
1927 @staticmethod
1928 def parser():
1929 parser = argparse.ArgumentParser(add_help=False)
1930 parser.add_argument(
1931 '--cluster',
1932 metavar='NAME',
1933 default='ceph',
1934 help='cluster name to assign this disk to',
1935 )
1936 parser.add_argument(
1937 '--cluster-uuid',
1938 metavar='UUID',
1939 help='cluster uuid to assign this disk to',
1940 )
1941 parser.add_argument(
1942 '--osd-uuid',
1943 metavar='UUID',
1944 help='unique OSD uuid to assign this disk to',
1945 )
1946 parser.add_argument(
1947 '--crush-device-class',
1948 help='crush device class to assign this disk to',
1949 )
1950 parser.add_argument(
1951 '--dmcrypt',
1952 action='store_true', default=None,
1953 help='encrypt DATA and/or JOURNAL devices with dm-crypt',
1954 )
1955 parser.add_argument(
1956 '--dmcrypt-key-dir',
1957 metavar='KEYDIR',
1958 default='/etc/ceph/dmcrypt-keys',
1959 help='directory where dm-crypt keys are stored',
1960 )
1961 parser.add_argument(
1962 '--prepare-key',
1963 metavar='PATH',
1964 help='bootstrap-osd keyring path template (%(default)s)',
1965 default='{statedir}/bootstrap-osd/{cluster}.keyring',
1966 dest='prepare_key_template',
1967 )
1968 parser.add_argument(
1969 '--no-locking',
1970 action='store_true', default=None,
1971 help='let many prepare\'s run in parallel',
1972 )
1973 return parser
1974
1975 @staticmethod
1976 def set_subparser(subparsers):
1977 parents = [
1978 Prepare.parser(),
1979 PrepareData.parser(),
1980 Lockbox.parser(),
1981 ]
1982 parents.extend(PrepareFilestore.parent_parsers())
1983 parents.extend(PrepareBluestore.parent_parsers())
1984 parser = subparsers.add_parser(
1985 'prepare',
1986 parents=parents,
1987 formatter_class=argparse.RawDescriptionHelpFormatter,
1988 description=textwrap.fill(textwrap.dedent("""\
1989 If the --bluestore argument is given, a bluestore objectstore
1990 will be created. If --filestore is provided, a legacy FileStore
1991 objectstore will be created. If neither is specified, we default
1992 to BlueStore.
1993
1994 When an entire device is prepared for bluestore, two
1995 partitions are created. The first partition is for metadata,
1996 the second partition is for blocks that contain data.
1997
1998 Unless explicitly specified with --block.db or
1999 --block.wal, the bluestore DB and WAL data is stored on
2000 the main block device. For instance:
2001
2002 ceph-disk prepare --bluestore /dev/sdc
2003
2004 Will create
2005
2006 /dev/sdc1 for osd metadata
2007 /dev/sdc2 for block, db, and wal data (the rest of the disk)
2008
2009
2010 If either --block.db or --block.wal are specified to be
2011 the same whole device, they will be created as partition
2012 three and four respectively. For instance:
2013
2014 ceph-disk prepare --bluestore \\
2015 --block.db /dev/sdc \\
2016 --block.wal /dev/sdc \\
2017 /dev/sdc
2018
2019 Will create
2020
2021 /dev/sdc1 for osd metadata
2022 /dev/sdc2 for block (the rest of the disk)
2023 /dev/sdc3 for db
2024 /dev/sdc4 for wal
2025
2026 """)),
2027 help='Prepare a directory or disk for a Ceph OSD',
2028 )
2029 parser.set_defaults(
2030 func=Prepare.main,
2031 )
2032 return parser
2033
2034 def prepare(self):
2035 if self.args.no_locking:
2036 self._prepare()
2037 else:
2038 with prepare_lock:
2039 self._prepare()
2040
2041 @staticmethod
2042 def factory(args):
2043 if args.bluestore:
2044 return PrepareBluestore(args)
2045 else:
2046 return PrepareFilestore(args)
2047
2048 @staticmethod
2049 def main(args):
2050 Prepare.factory(args).prepare()
2051
2052
2053 class PrepareFilestore(Prepare):
2054
2055 def __init__(self, args):
2056 super(PrepareFilestore, self).__init__(args)
2057 if args.dmcrypt:
2058 self.lockbox = Lockbox(args)
2059 self.data = PrepareFilestoreData(args)
2060 self.journal = PrepareJournal(args)
2061
2062 @staticmethod
2063 def parent_parsers():
2064 return [
2065 PrepareJournal.parser(),
2066 ]
2067
2068 def _prepare(self):
2069 if self.data.args.dmcrypt:
2070 self.lockbox.prepare()
2071 self.data.prepare(self.journal)
2072
2073
2074 class PrepareBluestore(Prepare):
2075
2076 def __init__(self, args):
2077 super(PrepareBluestore, self).__init__(args)
2078 if args.dmcrypt:
2079 self.lockbox = Lockbox(args)
2080 self.data = PrepareBluestoreData(args)
2081 self.block = PrepareBluestoreBlock(args)
2082 self.blockdb = PrepareBluestoreBlockDB(args)
2083 self.blockwal = PrepareBluestoreBlockWAL(args)
2084
2085 @staticmethod
2086 def parser():
2087 parser = argparse.ArgumentParser(add_help=False)
2088 parser.add_argument(
2089 '--bluestore',
2090 dest='bluestore',
2091 action='store_true', default=True,
2092 help='bluestore objectstore',
2093 )
2094 parser.add_argument(
2095 '--filestore',
2096 dest='bluestore',
2097 action='store_false',
2098 help='filestore objectstore',
2099 )
2100 return parser
2101
2102 @staticmethod
2103 def parent_parsers():
2104 return [
2105 PrepareBluestore.parser(),
2106 PrepareBluestoreBlock.parser(),
2107 PrepareBluestoreBlockDB.parser(),
2108 PrepareBluestoreBlockWAL.parser(),
2109 ]
2110
2111 def _prepare(self):
2112 if self.data.args.dmcrypt:
2113 self.lockbox.prepare()
2114 to_prepare_list = []
2115 if getattr(self.data.args, 'block.db'):
2116 to_prepare_list.append(self.blockdb)
2117 if getattr(self.data.args, 'block.wal'):
2118 to_prepare_list.append(self.blockwal)
2119 to_prepare_list.append(self.block)
2120 self.data.prepare(*to_prepare_list)
2121
2122
2123 class Space(object):
2124
2125 NAMES = ('block', 'journal', 'block.db', 'block.wal')
2126
2127
2128 class PrepareSpace(object):
2129
2130 NONE = 0
2131 FILE = 1
2132 DEVICE = 2
2133
2134 def __init__(self, args):
2135 self.args = args
2136 self.set_type()
2137 self.space_size = self.get_space_size()
2138 if getattr(self.args, self.name + '_uuid') is None:
2139 setattr(self.args, self.name + '_uuid', str(uuid.uuid4()))
2140 self.space_symlink = None
2141 self.space_dmcrypt = None
2142
2143 def set_type(self):
2144 name = self.name
2145 args = self.args
2146 if (self.wants_space() and
2147 dev_is_diskdevice(args.data) and
2148 not is_partition(args.data) and
2149 getattr(args, name) is None and
2150 getattr(args, name + '_file') is None):
2151 LOG.info('Will colocate %s with data on %s',
2152 name, args.data)
2153 setattr(args, name, args.data)
2154
2155 if getattr(args, name) is None:
2156 if getattr(args, name + '_dev'):
2157 raise Error('%s is unspecified; not a block device' %
2158 name.capitalize(), getattr(args, name))
2159 self.type = self.NONE
2160 return
2161
2162 if not os.path.exists(getattr(args, name)):
2163 if getattr(args, name + '_dev'):
2164 raise Error('%s does not exist; not a block device' %
2165 name.capitalize(), getattr(args, name))
2166 self.type = self.FILE
2167 return
2168
2169 mode = os.stat(getattr(args, name)).st_mode
2170 if stmode_is_diskdevice(mode):
2171 if getattr(args, name + '_file'):
2172 raise Error('%s is not a regular file' % name.capitalize,
2173 getattr(args, name))
2174 self.type = self.DEVICE
2175 return
2176
2177 if stat.S_ISREG(mode):
2178 if getattr(args, name + '_dev'):
2179 raise Error('%s is not a block device' % name.capitalize,
2180 getattr(args, name))
2181 self.type = self.FILE
2182 return
2183
2184 raise Error('%s %s is neither a block device nor regular file' %
2185 (name.capitalize, getattr(args, name)))
2186
2187 def is_none(self):
2188 return self.type == self.NONE
2189
2190 def is_file(self):
2191 return self.type == self.FILE
2192
2193 def is_device(self):
2194 return self.type == self.DEVICE
2195
2196 @staticmethod
2197 def parser(name, positional=True):
2198 parser = argparse.ArgumentParser(add_help=False)
2199 parser.add_argument(
2200 '--%s-uuid' % name,
2201 metavar='UUID',
2202 help='unique uuid to assign to the %s' % name,
2203 )
2204 parser.add_argument(
2205 '--%s-file' % name,
2206 action='store_true', default=None,
2207 help='verify that %s is a file' % name.upper(),
2208 )
2209 parser.add_argument(
2210 '--%s-dev' % name,
2211 action='store_true', default=None,
2212 help='verify that %s is a block device' % name.upper(),
2213 )
2214
2215 if positional:
2216 parser.add_argument(
2217 name,
2218 metavar=name.upper(),
2219 nargs='?',
2220 help=('path to OSD %s disk block device;' % name +
2221 ' leave out to store %s in file' % name),
2222 )
2223 return parser
2224
2225 def wants_space(self):
2226 return True
2227
2228 def populate_data_path(self, path):
2229 if self.type == self.DEVICE:
2230 self.populate_data_path_device(path)
2231 elif self.type == self.FILE:
2232 self.populate_data_path_file(path)
2233 elif self.type == self.NONE:
2234 pass
2235 else:
2236 raise Error('unexpected type ', self.type)
2237
2238 def populate_data_path_file(self, path):
2239 space_uuid = self.name + '_uuid'
2240 if getattr(self.args, space_uuid) is not None:
2241 write_one_line(path, space_uuid,
2242 getattr(self.args, space_uuid))
2243 if self.space_symlink is not None:
2244 adjust_symlink(self.space_symlink,
2245 os.path.join(path, self.name))
2246
2247 def populate_data_path_device(self, path):
2248 self.populate_data_path_file(path)
2249
2250 if self.space_dmcrypt is not None:
2251 adjust_symlink(self.space_dmcrypt,
2252 os.path.join(path, self.name + '_dmcrypt'))
2253 else:
2254 try:
2255 os.unlink(os.path.join(path, self.name + '_dmcrypt'))
2256 except OSError:
2257 pass
2258
2259 def prepare(self):
2260 if self.type == self.DEVICE:
2261 self.prepare_device()
2262 elif self.type == self.FILE:
2263 self.prepare_file()
2264 elif self.type == self.NONE:
2265 pass
2266 else:
2267 raise Error('unexpected type ', self.type)
2268
2269 def prepare_file(self):
2270 space_filename = getattr(self.args, self.name)
2271 if not os.path.exists(space_filename):
2272 LOG.debug('Creating %s file %s with size 0'
2273 ' (ceph-osd will resize and allocate)',
2274 self.name,
2275 space_filename)
2276 space_file = open(space_filename, 'wb')
2277 space_file.close()
2278 path_set_context(space_filename)
2279
2280 LOG.debug('%s is file %s',
2281 self.name.capitalize(),
2282 space_filename)
2283 LOG.warning('OSD will not be hot-swappable if %s is '
2284 'not the same device as the osd data' %
2285 self.name)
2286 self.space_symlink = space_filename
2287
2288 def prepare_device(self):
2289 reusing_partition = False
2290
2291 if is_partition(getattr(self.args, self.name)):
2292 LOG.debug('%s %s is a partition',
2293 self.name.capitalize(), getattr(self.args, self.name))
2294 partition = DevicePartition.factory(
2295 path=None, dev=getattr(self.args, self.name), args=self.args)
2296 if isinstance(partition, DevicePartitionCrypt):
2297 raise Error(getattr(self.args, self.name) +
2298 ' partition already exists'
2299 ' and --dmcrypt specified')
2300 LOG.warning('OSD will not be hot-swappable' +
2301 ' if ' + self.name + ' is not' +
2302 ' the same device as the osd data')
2303 if partition.get_ptype() == partition.ptype_for_name(self.name):
2304 LOG.debug('%s %s was previously prepared with '
2305 'ceph-disk. Reusing it.',
2306 self.name.capitalize(),
2307 getattr(self.args, self.name))
2308 reusing_partition = True
2309 # Read and reuse the partition uuid from this journal's
2310 # previous life. We reuse the uuid instead of changing it
2311 # because udev does not reliably notice changes to an
2312 # existing partition's GUID. See
2313 # http://tracker.ceph.com/issues/10146
2314 setattr(self.args, self.name + '_uuid', partition.get_uuid())
2315 LOG.debug('Reusing %s with uuid %s',
2316 self.name,
2317 getattr(self.args, self.name + '_uuid'))
2318 else:
2319 LOG.warning('%s %s was not prepared with '
2320 'ceph-disk. Symlinking directly.',
2321 self.name.capitalize(),
2322 getattr(self.args, self.name))
2323 self.space_symlink = getattr(self.args, self.name)
2324 return
2325
2326 self.space_symlink = '/dev/disk/by-partuuid/{uuid}'.format(
2327 uuid=getattr(self.args, self.name + '_uuid'))
2328
2329 if self.args.dmcrypt:
2330 self.space_dmcrypt = self.space_symlink
2331 self.space_symlink = '/dev/mapper/{uuid}'.format(
2332 uuid=getattr(self.args, self.name + '_uuid'))
2333
2334 if reusing_partition:
2335 # confirm that the space_symlink exists. It should since
2336 # this was an active space
2337 # in the past. Continuing otherwise would be futile.
2338 assert os.path.exists(self.space_symlink)
2339 return
2340
2341 num = self.desired_partition_number()
2342
2343 if num == 0:
2344 LOG.warning('OSD will not be hot-swappable if %s '
2345 'is not the same device as the osd data',
2346 self.name)
2347
2348 device = Device.factory(getattr(self.args, self.name), self.args)
2349 num = device.create_partition(
2350 uuid=getattr(self.args, self.name + '_uuid'),
2351 name=self.name,
2352 size=self.space_size,
2353 num=num)
2354
2355 partition = device.get_partition(num)
2356
2357 LOG.debug('%s is GPT partition %s',
2358 self.name.capitalize(),
2359 self.space_symlink)
2360
2361 if isinstance(partition, DevicePartitionCrypt):
2362 partition.format()
2363 partition.map()
2364
2365 command_check_call(
2366 [
2367 'sgdisk',
2368 '--typecode={num}:{uuid}'.format(
2369 num=num,
2370 uuid=partition.ptype_for_name(self.name),
2371 ),
2372 '--',
2373 getattr(self.args, self.name),
2374 ],
2375 )
2376 update_partition(getattr(self.args, self.name), 'prepared')
2377
2378 LOG.debug('%s is GPT partition %s',
2379 self.name.capitalize(),
2380 self.space_symlink)
2381
2382
2383 class PrepareJournal(PrepareSpace):
2384
2385 def __init__(self, args):
2386 self.name = 'journal'
2387 (self.allows_journal,
2388 self.wants_journal,
2389 self.needs_journal) = check_journal_reqs(args)
2390
2391 if args.journal and not self.allows_journal:
2392 raise Error('journal specified but not allowed by osd backend')
2393
2394 super(PrepareJournal, self).__init__(args)
2395
2396 def wants_space(self):
2397 return self.wants_journal
2398
2399 def get_space_size(self):
2400 return int(get_conf_with_default(
2401 cluster=self.args.cluster,
2402 variable='osd_journal_size',
2403 ))
2404
2405 def desired_partition_number(self):
2406 if self.args.journal == self.args.data:
2407 # we're sharing the disk between osd data and journal;
2408 # make journal be partition number 2
2409 num = 2
2410 else:
2411 num = 0
2412 return num
2413
2414 @staticmethod
2415 def parser():
2416 return PrepareSpace.parser('journal')
2417
2418
2419 class PrepareBluestoreBlock(PrepareSpace):
2420
2421 def __init__(self, args):
2422 self.name = 'block'
2423 super(PrepareBluestoreBlock, self).__init__(args)
2424
2425 def get_space_size(self):
2426 block_size = get_conf(
2427 cluster=self.args.cluster,
2428 variable='bluestore_block_size',
2429 )
2430
2431 if block_size is None:
2432 return 0 # get as much space as possible
2433 else:
2434 return int(block_size) / 1048576 # MB
2435
2436 def desired_partition_number(self):
2437 if self.args.block == self.args.data:
2438 num = 2
2439 else:
2440 num = 0
2441 return num
2442
2443 @staticmethod
2444 def parser():
2445 return PrepareSpace.parser('block')
2446
2447
2448 class PrepareBluestoreBlockDB(PrepareSpace):
2449
2450 def __init__(self, args):
2451 self.name = 'block.db'
2452 super(PrepareBluestoreBlockDB, self).__init__(args)
2453
2454 def get_space_size(self):
2455 block_db_size = get_conf(
2456 cluster=self.args.cluster,
2457 variable='bluestore_block_db_size',
2458 )
2459
2460 if block_db_size is None or int(block_db_size) == 0:
2461 block_size = get_conf(
2462 cluster=self.args.cluster,
2463 variable='bluestore_block_size',
2464 )
2465 if block_size is None:
2466 return 1024 # MB
2467 size = int(block_size) / 100 / 1048576
2468 return max(size, 1024) # MB
2469 else:
2470 return int(block_db_size) / 1048576 # MB
2471
2472 def desired_partition_number(self):
2473 if getattr(self.args, 'block.db') == self.args.data:
2474 num = 3
2475 else:
2476 num = 0
2477 return num
2478
2479 def wants_space(self):
2480 return False
2481
2482 @staticmethod
2483 def parser():
2484 parser = PrepareSpace.parser('block.db', positional=False)
2485 parser.add_argument(
2486 '--block.db',
2487 metavar='BLOCKDB',
2488 help='path to the device or file for bluestore block.db',
2489 )
2490 return parser
2491
2492
2493 class PrepareBluestoreBlockWAL(PrepareSpace):
2494
2495 def __init__(self, args):
2496 self.name = 'block.wal'
2497 super(PrepareBluestoreBlockWAL, self).__init__(args)
2498
2499 def get_space_size(self):
2500 block_size = get_conf(
2501 cluster=self.args.cluster,
2502 variable='bluestore_block_wal_size',
2503 )
2504
2505 if block_size is None:
2506 return 576 # MB, default value
2507 else:
2508 return int(block_size) / 1048576 # MB
2509
2510 def desired_partition_number(self):
2511 if getattr(self.args, 'block.wal') == self.args.data:
2512 num = 4
2513 else:
2514 num = 0
2515 return num
2516
2517 def wants_space(self):
2518 return False
2519
2520 @staticmethod
2521 def parser():
2522 parser = PrepareSpace.parser('block.wal', positional=False)
2523 parser.add_argument(
2524 '--block.wal',
2525 metavar='BLOCKWAL',
2526 help='path to the device or file for bluestore block.wal',
2527 )
2528 return parser
2529
2530
2531 class CryptHelpers(object):
2532
2533 @staticmethod
2534 def get_cryptsetup_parameters(args):
2535 cryptsetup_parameters_str = get_conf(
2536 cluster=args.cluster,
2537 variable='osd_cryptsetup_parameters',
2538 )
2539 if cryptsetup_parameters_str is None:
2540 return []
2541 else:
2542 return shlex.split(cryptsetup_parameters_str)
2543
2544 @staticmethod
2545 def get_dmcrypt_keysize(args):
2546 dmcrypt_keysize_str = get_conf(
2547 cluster=args.cluster,
2548 variable='osd_dmcrypt_key_size',
2549 )
2550 dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
2551 if dmcrypt_type == 'luks':
2552 if dmcrypt_keysize_str is None:
2553 # As LUKS will hash the 'passphrase' in .luks.key
2554 # into a key, set a large default
2555 # so if not updated for some time, it is still a
2556 # reasonable value.
2557 #
2558 return 1024
2559 else:
2560 return int(dmcrypt_keysize_str)
2561 elif dmcrypt_type == 'plain':
2562 if dmcrypt_keysize_str is None:
2563 # This value is hard-coded in the udev script
2564 return 256
2565 else:
2566 LOG.warning('ensure the 95-ceph-osd.rules file has '
2567 'been copied to /etc/udev/rules.d '
2568 'and modified to call cryptsetup '
2569 'with --key-size=%s' % dmcrypt_keysize_str)
2570 return int(dmcrypt_keysize_str)
2571 else:
2572 return 0
2573
2574 @staticmethod
2575 def get_dmcrypt_type(args):
2576 if hasattr(args, 'dmcrypt') and args.dmcrypt:
2577 dmcrypt_type = get_conf(
2578 cluster=args.cluster,
2579 variable='osd_dmcrypt_type',
2580 )
2581
2582 if dmcrypt_type is None or dmcrypt_type == 'luks':
2583 return 'luks'
2584 elif dmcrypt_type == 'plain':
2585 return 'plain'
2586 else:
2587 raise Error('invalid osd_dmcrypt_type parameter '
2588 '(must be luks or plain): ', dmcrypt_type)
2589 else:
2590 return None
2591
2592
2593 class Lockbox(object):
2594
2595 def __init__(self, args):
2596 self.args = args
2597 self.partition = None
2598 self.device = None
2599
2600 if hasattr(self.args, 'lockbox') and self.args.lockbox is None:
2601 self.args.lockbox = self.args.data
2602
2603 def set_partition(self, partition):
2604 self.partition = partition
2605
2606 @staticmethod
2607 def parser():
2608 parser = argparse.ArgumentParser(add_help=False)
2609 parser.add_argument(
2610 '--lockbox',
2611 help='path to the device to store the lockbox',
2612 )
2613 parser.add_argument(
2614 '--lockbox-uuid',
2615 metavar='UUID',
2616 help='unique lockbox uuid',
2617 )
2618 return parser
2619
2620 def create_partition(self):
2621 self.device = Device.factory(self.args.lockbox, argparse.Namespace())
2622 partition_number = 3
2623 self.device.create_partition(uuid=self.args.lockbox_uuid,
2624 name='lockbox',
2625 num=partition_number,
2626 size=10) # MB
2627 return self.device.get_partition(partition_number)
2628
2629 def set_or_create_partition(self):
2630 if is_partition(self.args.lockbox):
2631 LOG.debug('OSD lockbox device %s is a partition',
2632 self.args.lockbox)
2633 self.partition = DevicePartition.factory(
2634 path=None, dev=self.args.lockbox, args=self.args)
2635 ptype = self.partition.get_ptype()
2636 ready = Ptype.get_ready_by_name('lockbox')
2637 if ptype not in ready:
2638 LOG.warning('incorrect partition UUID: %s, expected %s'
2639 % (ptype, str(ready)))
2640 else:
2641 LOG.debug('Creating osd partition on %s',
2642 self.args.lockbox)
2643 self.partition = self.create_partition()
2644
2645 def create_key(self):
2646 key_size = CryptHelpers.get_dmcrypt_keysize(self.args)
2647 key = open('/dev/urandom', 'rb').read(key_size / 8)
2648 base64_key = base64.b64encode(key)
2649 cluster = self.args.cluster
2650 bootstrap = self.args.prepare_key_template.format(cluster=cluster,
2651 statedir=STATEDIR)
2652 command_check_call(
2653 [
2654 'ceph',
2655 '--cluster', cluster,
2656 '--name', 'client.bootstrap-osd',
2657 '--keyring', bootstrap,
2658 'config-key',
2659 'put',
2660 'dm-crypt/osd/' + self.args.osd_uuid + '/luks',
2661 base64_key,
2662 ],
2663 )
2664 keyring, stderr, ret = command(
2665 [
2666 'ceph',
2667 '--cluster', cluster,
2668 '--name', 'client.bootstrap-osd',
2669 '--keyring', bootstrap,
2670 'auth',
2671 'get-or-create',
2672 'client.osd-lockbox.' + self.args.osd_uuid,
2673 'mon',
2674 ('allow command "config-key get" with key="dm-crypt/osd/' +
2675 self.args.osd_uuid + '/luks"'),
2676 ],
2677 )
2678 LOG.debug("stderr " + stderr)
2679 assert ret == 0
2680 path = self.get_mount_point()
2681 open(os.path.join(path, 'keyring'), 'w').write(keyring)
2682 write_one_line(path, 'key-management-mode', KEY_MANAGEMENT_MODE_V1)
2683
2684 def symlink_spaces(self, path):
2685 target = self.get_mount_point()
2686 for name in Space.NAMES:
2687 if (hasattr(self.args, name + '_uuid') and
2688 getattr(self.args, name + '_uuid')):
2689 uuid = getattr(self.args, name + '_uuid')
2690 symlink = os.path.join(STATEDIR, 'osd-lockbox', uuid)
2691 adjust_symlink(target, symlink)
2692 write_one_line(path, name + '-uuid', uuid)
2693
2694 def populate(self):
2695 maybe_mkdir(os.path.join(STATEDIR, 'osd-lockbox'))
2696 args = ['mkfs', '-t', 'ext4', self.partition.get_dev()]
2697 LOG.debug('Creating lockbox fs on %s: ' + str(" ".join(args)))
2698 command_check_call(args)
2699 path = self.get_mount_point()
2700 maybe_mkdir(path)
2701 args = ['mount', '-t', 'ext4', self.partition.get_dev(), path]
2702 LOG.debug('Mounting lockbox ' + str(" ".join(args)))
2703 command_check_call(args)
2704 write_one_line(path, 'osd-uuid', self.args.osd_uuid)
2705 if self.args.cluster_uuid is None:
2706 self.args.cluster_uuid = get_fsid(cluster=self.args.cluster)
2707 write_one_line(path, 'ceph_fsid', self.args.cluster_uuid)
2708 self.create_key()
2709 self.symlink_spaces(path)
2710 write_one_line(path, 'magic', CEPH_LOCKBOX_ONDISK_MAGIC)
2711 if self.device is not None:
2712 command_check_call(
2713 [
2714 'sgdisk',
2715 '--typecode={num}:{uuid}'.format(
2716 num=self.partition.get_partition_number(),
2717 uuid=self.partition.ptype_for_name('lockbox'),
2718 ),
2719 '--',
2720 get_partition_base(self.partition.get_dev()),
2721 ],
2722 )
2723
2724 def get_mount_point(self):
2725 return os.path.join(STATEDIR, 'osd-lockbox', self.args.osd_uuid)
2726
2727 def get_osd_uuid(self):
2728 return self.args.osd_uuid
2729
2730 def activate(self):
2731 path = is_mounted(self.partition.get_dev())
2732 if path:
2733 LOG.info("Lockbox already mounted at " + path)
2734 return
2735
2736 path = tempfile.mkdtemp(
2737 prefix='mnt.',
2738 dir=STATEDIR + '/tmp',
2739 )
2740 args = ['mount', '-t', 'ext4', '-o', 'ro',
2741 self.partition.get_dev(),
2742 path]
2743 LOG.debug('Mounting lockbox temporarily ' + str(" ".join(args)))
2744 command_check_call(args)
2745 self.args.osd_uuid = get_oneliner(path, 'osd-uuid')
2746 command_check_call(['umount', path])
2747 LOG.debug('Mounting lockbox readonly ' + str(" ".join(args)))
2748 args = ['mount', '-t', 'ext4', '-o', 'ro',
2749 self.partition.get_dev(),
2750 self.get_mount_point()]
2751 command_check_call(args)
2752 for name in Space.NAMES + ('osd',):
2753 uuid_path = os.path.join(self.get_mount_point(), name + '-uuid')
2754 if os.path.exists(uuid_path):
2755 uuid = get_oneliner(self.get_mount_point(), name + '-uuid')
2756 dev = os.path.join('/dev/disk/by-partuuid/', uuid.lower())
2757 args = ['ceph-disk', 'trigger', dev]
2758 command_check_call(args)
2759
2760 def prepare(self):
2761 verify_not_in_use(self.args.lockbox, check_partitions=True)
2762 self.set_or_create_partition()
2763 self.populate()
2764
2765
2766 class PrepareData(object):
2767
2768 FILE = 1
2769 DEVICE = 2
2770
2771 def __init__(self, args):
2772
2773 self.args = args
2774 self.partition = None
2775 self.set_type()
2776 if self.args.cluster_uuid is None:
2777 self.args.cluster_uuid = get_fsid(cluster=self.args.cluster)
2778
2779 if self.args.osd_uuid is None:
2780 self.args.osd_uuid = str(uuid.uuid4())
2781
2782 def set_type(self):
2783 dmode = os.stat(self.args.data).st_mode
2784
2785 if stat.S_ISDIR(dmode):
2786 self.type = self.FILE
2787 elif stmode_is_diskdevice(dmode):
2788 self.type = self.DEVICE
2789 else:
2790 raise Error('not a dir or block device', self.args.data)
2791
2792 def is_file(self):
2793 return self.type == self.FILE
2794
2795 def is_device(self):
2796 return self.type == self.DEVICE
2797
2798 @staticmethod
2799 def parser():
2800 parser = argparse.ArgumentParser(add_help=False)
2801 parser.add_argument(
2802 '--fs-type',
2803 help='file system type to use (e.g. "ext4")',
2804 )
2805 parser.add_argument(
2806 '--zap-disk',
2807 action='store_true', default=None,
2808 help='destroy the partition table (and content) of a disk',
2809 )
2810 parser.add_argument(
2811 '--data-dir',
2812 action='store_true', default=None,
2813 help='verify that DATA is a dir',
2814 )
2815 parser.add_argument(
2816 '--data-dev',
2817 action='store_true', default=None,
2818 help='verify that DATA is a block device',
2819 )
2820 parser.add_argument(
2821 'data',
2822 metavar='DATA',
2823 help='path to OSD data (a disk block device or directory)',
2824 )
2825 return parser
2826
2827 def populate_data_path_file(self, path, *to_prepare_list):
2828 self.populate_data_path(path, *to_prepare_list)
2829
2830 def populate_data_path(self, path, *to_prepare_list):
2831 if os.path.exists(os.path.join(path, 'magic')):
2832 LOG.debug('Data dir %s already exists', path)
2833 return
2834 else:
2835 LOG.debug('Preparing osd data dir %s', path)
2836
2837 if self.args.osd_uuid is None:
2838 self.args.osd_uuid = str(uuid.uuid4())
2839
2840 write_one_line(path, 'ceph_fsid', self.args.cluster_uuid)
2841 write_one_line(path, 'fsid', self.args.osd_uuid)
2842 if self.args.crush_device_class:
2843 write_one_line(path, 'crush_device_class',
2844 self.args.crush_device_class)
2845 write_one_line(path, 'magic', CEPH_OSD_ONDISK_MAGIC)
2846
2847 for to_prepare in to_prepare_list:
2848 to_prepare.populate_data_path(path)
2849
2850 def prepare(self, *to_prepare_list):
2851 if self.type == self.DEVICE:
2852 self.prepare_device(*to_prepare_list)
2853 elif self.type == self.FILE:
2854 self.prepare_file(*to_prepare_list)
2855 else:
2856 raise Error('unexpected type ', self.type)
2857
2858 def prepare_file(self, *to_prepare_list):
2859
2860 if not os.path.exists(self.args.data):
2861 raise Error('data path for directory does not exist',
2862 self.args.data)
2863
2864 if self.args.data_dev:
2865 raise Error('data path is not a block device', self.args.data)
2866
2867 for to_prepare in to_prepare_list:
2868 to_prepare.prepare()
2869
2870 self.populate_data_path_file(self.args.data, *to_prepare_list)
2871
2872 def sanity_checks(self):
2873 if not os.path.exists(self.args.data):
2874 raise Error('data path for device does not exist',
2875 self.args.data)
2876 verify_not_in_use(self.args.data,
2877 check_partitions=not self.args.dmcrypt)
2878
2879 def set_variables(self):
2880 if self.args.fs_type is None:
2881 self.args.fs_type = get_conf(
2882 cluster=self.args.cluster,
2883 variable='osd_mkfs_type',
2884 )
2885 if self.args.fs_type is None:
2886 self.args.fs_type = get_conf(
2887 cluster=self.args.cluster,
2888 variable='osd_fs_type',
2889 )
2890 if self.args.fs_type is None:
2891 self.args.fs_type = DEFAULT_FS_TYPE
2892
2893 self.mkfs_args = get_conf(
2894 cluster=self.args.cluster,
2895 variable='osd_mkfs_options_{fstype}'.format(
2896 fstype=self.args.fs_type,
2897 ),
2898 )
2899 if self.mkfs_args is None:
2900 self.mkfs_args = get_conf(
2901 cluster=self.args.cluster,
2902 variable='osd_fs_mkfs_options_{fstype}'.format(
2903 fstype=self.args.fs_type,
2904 ),
2905 )
2906
2907 self.mount_options = get_mount_options(cluster=self.args.cluster,
2908 fs_type=self.args.fs_type)
2909
2910 if self.args.osd_uuid is None:
2911 self.args.osd_uuid = str(uuid.uuid4())
2912
2913 def prepare_device(self, *to_prepare_list):
2914 self.sanity_checks()
2915 self.set_variables()
2916 if self.args.zap_disk is not None:
2917 zap(self.args.data)
2918
2919 def create_data_partition(self):
2920 device = Device.factory(self.args.data, self.args)
2921 partition_number = 1
2922 device.create_partition(uuid=self.args.osd_uuid,
2923 name='data',
2924 num=partition_number,
2925 size=self.get_space_size())
2926 return device.get_partition(partition_number)
2927
2928 def set_data_partition(self):
2929 if is_partition(self.args.data):
2930 LOG.debug('OSD data device %s is a partition',
2931 self.args.data)
2932 self.partition = DevicePartition.factory(
2933 path=None, dev=self.args.data, args=self.args)
2934 ptype = self.partition.get_ptype()
2935 ready = Ptype.get_ready_by_name('osd')
2936 if ptype not in ready:
2937 LOG.warning('incorrect partition UUID: %s, expected %s'
2938 % (ptype, str(ready)))
2939 else:
2940 LOG.debug('Creating osd partition on %s',
2941 self.args.data)
2942 self.partition = self.create_data_partition()
2943
2944 def populate_data_path_device(self, *to_prepare_list):
2945 partition = self.partition
2946
2947 if isinstance(partition, DevicePartitionCrypt):
2948 partition.map()
2949
2950 try:
2951 args = [
2952 'mkfs',
2953 '-t',
2954 self.args.fs_type,
2955 ]
2956 if self.mkfs_args is not None:
2957 args.extend(self.mkfs_args.split())
2958 if self.args.fs_type == 'xfs':
2959 args.extend(['-f']) # always force
2960 else:
2961 args.extend(MKFS_ARGS.get(self.args.fs_type, []))
2962 args.extend([
2963 '--',
2964 partition.get_dev(),
2965 ])
2966 LOG.debug('Creating %s fs on %s',
2967 self.args.fs_type, partition.get_dev())
2968 command_check_call(args, exit=True)
2969
2970 path = mount(dev=partition.get_dev(),
2971 fstype=self.args.fs_type,
2972 options=self.mount_options)
2973
2974 try:
2975 self.populate_data_path(path, *to_prepare_list)
2976 finally:
2977 path_set_context(path)
2978 unmount(path)
2979 finally:
2980 if isinstance(partition, DevicePartitionCrypt):
2981 partition.unmap()
2982
2983 if not is_partition(self.args.data):
2984 command_check_call(
2985 [
2986 'sgdisk',
2987 '--typecode=%d:%s' % (partition.get_partition_number(),
2988 partition.ptype_for_name('osd')),
2989 '--',
2990 self.args.data,
2991 ],
2992 exit=True,
2993 )
2994 update_partition(self.args.data, 'prepared')
2995 command_check_call(['udevadm', 'trigger',
2996 '--action=add',
2997 '--sysname-match',
2998 os.path.basename(partition.rawdev)])
2999
3000
3001 class PrepareFilestoreData(PrepareData):
3002
3003 def get_space_size(self):
3004 return 0 # get as much space as possible
3005
3006 def prepare_device(self, *to_prepare_list):
3007 super(PrepareFilestoreData, self).prepare_device(*to_prepare_list)
3008 for to_prepare in to_prepare_list:
3009 to_prepare.prepare()
3010 self.set_data_partition()
3011 self.populate_data_path_device(*to_prepare_list)
3012
3013 def populate_data_path(self, path, *to_prepare_list):
3014 super(PrepareFilestoreData, self).populate_data_path(path,
3015 *to_prepare_list)
3016 write_one_line(path, 'type', 'filestore')
3017
3018
3019 class PrepareBluestoreData(PrepareData):
3020
3021 def get_space_size(self):
3022 return 100 # MB
3023
3024 def prepare_device(self, *to_prepare_list):
3025 super(PrepareBluestoreData, self).prepare_device(*to_prepare_list)
3026 self.set_data_partition()
3027 for to_prepare in to_prepare_list:
3028 to_prepare.prepare()
3029 self.populate_data_path_device(*to_prepare_list)
3030
3031 def populate_data_path(self, path, *to_prepare_list):
3032 super(PrepareBluestoreData, self).populate_data_path(path,
3033 *to_prepare_list)
3034 write_one_line(path, 'type', 'bluestore')
3035
3036
3037 #
3038 # Temporary workaround: if ceph-osd --mkfs does not
3039 # complete within 5 minutes, assume it is blocked
3040 # because of http://tracker.ceph.com/issues/13522
3041 # and retry a few times.
3042 #
3043 # Remove this function calls with command_check_call
3044 # when http://tracker.ceph.com/issues/13522 is fixed
3045 #
3046 def ceph_osd_mkfs(arguments):
3047 timeout = _get_command_executable(['timeout'])
3048 mkfs_ok = False
3049 error = 'unknown error'
3050 for delay in os.environ.get('CEPH_OSD_MKFS_DELAYS',
3051 '300 300 300 300 300').split():
3052 try:
3053 _check_output(timeout + [delay] + arguments)
3054 mkfs_ok = True
3055 break
3056 except subprocess.CalledProcessError as e:
3057 error = e.output
3058 if e.returncode == 124: # timeout fired, retry
3059 LOG.debug('%s timed out : %s (retry)'
3060 % (str(arguments), error))
3061 else:
3062 break
3063 if not mkfs_ok:
3064 raise Error('%s failed : %s' % (str(arguments), error))
3065
3066
3067 def mkfs(
3068 path,
3069 cluster,
3070 osd_id,
3071 fsid,
3072 keyring,
3073 ):
3074 monmap = os.path.join(path, 'activate.monmap')
3075 command_check_call(
3076 [
3077 'ceph',
3078 '--cluster', cluster,
3079 '--name', 'client.bootstrap-osd',
3080 '--keyring', keyring,
3081 'mon', 'getmap', '-o', monmap,
3082 ],
3083 )
3084
3085 osd_type = read_one_line(path, 'type')
3086
3087 if osd_type == 'bluestore':
3088 ceph_osd_mkfs(
3089 [
3090 'ceph-osd',
3091 '--cluster', cluster,
3092 '--mkfs',
3093 '--mkkey',
3094 '-i', osd_id,
3095 '--monmap', monmap,
3096 '--osd-data', path,
3097 '--osd-uuid', fsid,
3098 '--keyring', os.path.join(path, 'keyring'),
3099 '--setuser', get_ceph_user(),
3100 '--setgroup', get_ceph_group(),
3101 ],
3102 )
3103 elif osd_type == 'filestore':
3104 ceph_osd_mkfs(
3105 [
3106 'ceph-osd',
3107 '--cluster', cluster,
3108 '--mkfs',
3109 '--mkkey',
3110 '-i', osd_id,
3111 '--monmap', monmap,
3112 '--osd-data', path,
3113 '--osd-journal', os.path.join(path, 'journal'),
3114 '--osd-uuid', fsid,
3115 '--keyring', os.path.join(path, 'keyring'),
3116 '--setuser', get_ceph_user(),
3117 '--setgroup', get_ceph_group(),
3118 ],
3119 )
3120 else:
3121 raise Error('unrecognized objectstore type %s' % osd_type)
3122
3123
3124 def auth_key(
3125 path,
3126 cluster,
3127 osd_id,
3128 keyring,
3129 ):
3130 try:
3131 # try dumpling+ cap scheme
3132 command_check_call(
3133 [
3134 'ceph',
3135 '--cluster', cluster,
3136 '--name', 'client.bootstrap-osd',
3137 '--keyring', keyring,
3138 'auth', 'add', 'osd.{osd_id}'.format(osd_id=osd_id),
3139 '-i', os.path.join(path, 'keyring'),
3140 'osd', 'allow *',
3141 'mon', 'allow profile osd',
3142 ],
3143 )
3144 except subprocess.CalledProcessError as err:
3145 if err.returncode == errno.EINVAL:
3146 # try old cap scheme
3147 command_check_call(
3148 [
3149 'ceph',
3150 '--cluster', cluster,
3151 '--name', 'client.bootstrap-osd',
3152 '--keyring', keyring,
3153 'auth', 'add', 'osd.{osd_id}'.format(osd_id=osd_id),
3154 '-i', os.path.join(path, 'keyring'),
3155 'osd', 'allow *',
3156 'mon', 'allow rwx',
3157 ],
3158 )
3159 else:
3160 raise
3161
3162
3163 def get_mount_point(cluster, osd_id):
3164 parent = STATEDIR + '/osd'
3165 return os.path.join(
3166 parent,
3167 '{cluster}-{osd_id}'.format(cluster=cluster, osd_id=osd_id),
3168 )
3169
3170
3171 def move_mount(
3172 dev,
3173 path,
3174 cluster,
3175 osd_id,
3176 fstype,
3177 mount_options,
3178 ):
3179 LOG.debug('Moving mount to final location...')
3180 osd_data = get_mount_point(cluster, osd_id)
3181 maybe_mkdir(osd_data)
3182
3183 # pick best-of-breed mount options based on fs type
3184 if mount_options is None:
3185 mount_options = MOUNT_OPTIONS.get(fstype, '')
3186
3187 # we really want to mount --move, but that is not supported when
3188 # the parent mount is shared, as it is by default on RH, Fedora,
3189 # and probably others. Also, --bind doesn't properly manipulate
3190 # /etc/mtab, which *still* isn't a symlink to /proc/mounts despite
3191 # this being 2013. Instead, mount the original device at the final
3192 # location.
3193 command_check_call(
3194 [
3195 '/bin/mount',
3196 '-o',
3197 mount_options,
3198 '--',
3199 dev,
3200 osd_data,
3201 ],
3202 )
3203 command_check_call(
3204 [
3205 '/bin/umount',
3206 '-l', # lazy, in case someone else is peeking at the
3207 # wrong moment
3208 '--',
3209 path,
3210 ],
3211 )
3212
3213
3214 #
3215 # For upgrade purposes, to make sure there are no competing units,
3216 # both --runtime unit and the default should be disabled. There can be
3217 # two units at the same time: one with --runtime and another without
3218 # it. If, for any reason (manual or ceph-disk) the two units co-exist
3219 # they will compete with each other.
3220 #
3221 def systemd_disable(
3222 path,
3223 osd_id,
3224 ):
3225 # ensure there is no duplicate ceph-osd@.service
3226 for style in ([], ['--runtime']):
3227 command_check_call(
3228 [
3229 'systemctl',
3230 'disable',
3231 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3232 ] + style,
3233 )
3234
3235
3236 def systemd_start(
3237 path,
3238 osd_id,
3239 ):
3240 systemd_disable(path, osd_id)
3241 if is_mounted(path):
3242 style = ['--runtime']
3243 else:
3244 style = []
3245 command_check_call(
3246 [
3247 'systemctl',
3248 'enable',
3249 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3250 ] + style,
3251 )
3252 command_check_call(
3253 [
3254 'systemctl',
3255 'start',
3256 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3257 ],
3258 )
3259
3260
3261 def systemd_stop(
3262 path,
3263 osd_id,
3264 ):
3265 systemd_disable(path, osd_id)
3266 command_check_call(
3267 [
3268 'systemctl',
3269 'stop',
3270 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3271 ],
3272 )
3273
3274
3275 def start_daemon(
3276 cluster,
3277 osd_id,
3278 ):
3279 LOG.debug('Starting %s osd.%s...', cluster, osd_id)
3280
3281 path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3282 cluster=cluster, osd_id=osd_id)
3283
3284 try:
3285 if os.path.exists(os.path.join(path, 'upstart')):
3286 command_check_call(
3287 [
3288 '/sbin/initctl',
3289 # use emit, not start, because start would fail if the
3290 # instance was already running
3291 'emit',
3292 # since the daemon starting doesn't guarantee much about
3293 # the service being operational anyway, don't bother
3294 # waiting for it
3295 '--no-wait',
3296 '--',
3297 'ceph-osd',
3298 'cluster={cluster}'.format(cluster=cluster),
3299 'id={osd_id}'.format(osd_id=osd_id),
3300 ],
3301 )
3302 elif os.path.exists(os.path.join(path, 'sysvinit')):
3303 if os.path.exists('/usr/sbin/service'):
3304 svc = '/usr/sbin/service'
3305 else:
3306 svc = '/sbin/service'
3307 command_check_call(
3308 [
3309 svc,
3310 'ceph',
3311 '--cluster',
3312 '{cluster}'.format(cluster=cluster),
3313 'start',
3314 'osd.{osd_id}'.format(osd_id=osd_id),
3315 ],
3316 )
3317 elif os.path.exists(os.path.join(path, 'systemd')):
3318 systemd_start(path, osd_id)
3319 elif os.path.exists(os.path.join(path, 'openrc')):
3320 base_script = '/etc/init.d/ceph-osd'
3321 osd_script = '{base}.{osd_id}'.format(
3322 base=base_script,
3323 osd_id=osd_id
3324 )
3325 if not os.path.exists(osd_script):
3326 os.symlink(base_script, osd_script)
3327 command_check_call(
3328 [
3329 osd_script,
3330 'start',
3331 ],
3332 )
3333 elif os.path.exists(os.path.join(path, 'bsdrc')):
3334 command_check_call(
3335 [
3336 '/usr/sbin/service', 'ceph', 'start',
3337 'osd.{osd_id}'.format(osd_id=osd_id),
3338 ],
3339 )
3340 else:
3341 raise Error('{cluster} osd.{osd_id} '
3342 'is not tagged with an init system'
3343 .format(
3344 cluster=cluster,
3345 osd_id=osd_id,
3346 ))
3347 except subprocess.CalledProcessError as e:
3348 raise Error('ceph osd start failed', e)
3349
3350
3351 def stop_daemon(
3352 cluster,
3353 osd_id,
3354 ):
3355 LOG.debug('Stoping %s osd.%s...', cluster, osd_id)
3356
3357 path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3358 cluster=cluster, osd_id=osd_id)
3359
3360 try:
3361 if os.path.exists(os.path.join(path, 'upstart')):
3362 command_check_call(
3363 [
3364 '/sbin/initctl',
3365 'stop',
3366 'ceph-osd',
3367 'cluster={cluster}'.format(cluster=cluster),
3368 'id={osd_id}'.format(osd_id=osd_id),
3369 ],
3370 )
3371 elif os.path.exists(os.path.join(path, 'sysvinit')):
3372 svc = which('service')
3373 command_check_call(
3374 [
3375 svc,
3376 'ceph',
3377 '--cluster',
3378 '{cluster}'.format(cluster=cluster),
3379 'stop',
3380 'osd.{osd_id}'.format(osd_id=osd_id),
3381 ],
3382 )
3383 elif os.path.exists(os.path.join(path, 'systemd')):
3384 systemd_stop(path, osd_id)
3385 elif os.path.exists(os.path.join(path, 'openrc')):
3386 command_check_call(
3387 [
3388 '/etc/init.d/ceph-osd.{osd_id}'.format(osd_id=osd_id),
3389 'stop',
3390 ],
3391 )
3392 elif os.path.exists(os.path.join(path, 'bsdrc')):
3393 command_check_call(
3394 [
3395 '/usr/local/etc/rc.d/ceph stop osd.{osd_id}'
3396 .format(osd_id=osd_id),
3397 ],
3398 )
3399 else:
3400 raise Error('{cluster} osd.{osd_id} '
3401 'is not tagged with an init system'
3402 .format(cluster=cluster, osd_id=osd_id))
3403 except subprocess.CalledProcessError as e:
3404 raise Error('ceph osd stop failed', e)
3405
3406
3407 def detect_fstype(dev):
3408 if FREEBSD:
3409 fstype = _check_output(
3410 args=[
3411 'fstyp',
3412 '-u',
3413 dev,
3414 ],
3415 )
3416 else:
3417 fstype = _check_output(
3418 args=[
3419 '/sbin/blkid',
3420 # we don't want stale cached results
3421 '-p',
3422 '-s', 'TYPE',
3423 '-o', 'value',
3424 '--',
3425 dev,
3426 ],
3427 )
3428 fstype = must_be_one_line(fstype)
3429 return fstype
3430
3431
3432 def dmcrypt_is_mapped(uuid):
3433 path = os.path.join('/dev/mapper', uuid)
3434 if os.path.exists(path):
3435 return path
3436 else:
3437 return None
3438
3439
3440 def dmcrypt_map(dev, dmcrypt_key_dir):
3441 ptype = get_partition_type(dev)
3442 if ptype in Ptype.get_ready_by_type('plain'):
3443 luks = False
3444 cryptsetup_parameters = ['--key-size', '256']
3445 elif ptype in Ptype.get_ready_by_type('luks'):
3446 luks = True
3447 cryptsetup_parameters = []
3448 else:
3449 raise Error('--dmcrypt called for dev %s with invalid ptype %s'
3450 % (dev, ptype))
3451 part_uuid = get_partition_uuid(dev)
3452 dmcrypt_key = get_dmcrypt_key(part_uuid, dmcrypt_key_dir, luks)
3453 return _dmcrypt_map(
3454 rawdev=dev,
3455 key=dmcrypt_key,
3456 _uuid=part_uuid,
3457 cryptsetup_parameters=cryptsetup_parameters,
3458 luks=luks,
3459 format_dev=False,
3460 )
3461
3462
3463 def mount_activate(
3464 dev,
3465 activate_key_template,
3466 init,
3467 dmcrypt,
3468 dmcrypt_key_dir,
3469 reactivate=False,
3470 ):
3471
3472 if dmcrypt:
3473 part_uuid = get_partition_uuid(dev)
3474 dev = dmcrypt_map(dev, dmcrypt_key_dir)
3475 try:
3476 fstype = detect_fstype(dev=dev)
3477 except (subprocess.CalledProcessError,
3478 TruncatedLineError,
3479 TooManyLinesError) as e:
3480 raise FilesystemTypeError(
3481 'device {dev}'.format(dev=dev),
3482 e,
3483 )
3484
3485 # TODO always using mount options from cluster=ceph for
3486 # now; see http://tracker.newdream.net/issues/3253
3487 mount_options = get_mount_options(cluster='ceph', fs_type=fstype)
3488
3489 path = mount(dev=dev, fstype=fstype, options=mount_options)
3490
3491 # check if the disk is deactive, change the journal owner, group
3492 # mode for correct user and group.
3493 if os.path.exists(os.path.join(path, 'deactive')):
3494 # logging to syslog will help us easy to know udev triggered failure
3495 if not reactivate:
3496 unmount(path)
3497 # we need to unmap again because dmcrypt map will create again
3498 # on bootup stage (due to deactivate)
3499 if '/dev/mapper/' in dev:
3500 part_uuid = dev.replace('/dev/mapper/', '')
3501 dmcrypt_unmap(part_uuid)
3502 LOG.info('OSD deactivated! reactivate with: --reactivate')
3503 raise Error('OSD deactivated! reactivate with: --reactivate')
3504 # flag to activate a deactive osd.
3505 deactive = True
3506 else:
3507 deactive = False
3508
3509 osd_id = None
3510 cluster = None
3511 try:
3512 (osd_id, cluster) = activate(path, activate_key_template, init)
3513
3514 # Now active successfully
3515 # If we got reactivate and deactive, remove the deactive file
3516 if deactive and reactivate:
3517 os.remove(os.path.join(path, 'deactive'))
3518 LOG.info('Remove `deactive` file.')
3519
3520 # check if the disk is already active, or if something else is already
3521 # mounted there
3522 active = False
3523 other = False
3524 src_dev = os.stat(path).st_dev
3525 try:
3526 dst_dev = os.stat((STATEDIR + '/osd/{cluster}-{osd_id}').format(
3527 cluster=cluster,
3528 osd_id=osd_id)).st_dev
3529 if src_dev == dst_dev:
3530 active = True
3531 else:
3532 parent_dev = os.stat(STATEDIR + '/osd').st_dev
3533 if dst_dev != parent_dev:
3534 other = True
3535 elif os.listdir(get_mount_point(cluster, osd_id)):
3536 LOG.info(get_mount_point(cluster, osd_id) +
3537 " is not empty, won't override")
3538 other = True
3539
3540 except OSError:
3541 pass
3542
3543 if active:
3544 LOG.info('%s osd.%s already mounted in position; unmounting ours.'
3545 % (cluster, osd_id))
3546 unmount(path)
3547 elif other:
3548 raise Error('another %s osd.%s already mounted in position '
3549 '(old/different cluster instance?); unmounting ours.'
3550 % (cluster, osd_id))
3551 else:
3552 move_mount(
3553 dev=dev,
3554 path=path,
3555 cluster=cluster,
3556 osd_id=osd_id,
3557 fstype=fstype,
3558 mount_options=mount_options,
3559 )
3560 return cluster, osd_id
3561
3562 except:
3563 LOG.error('Failed to activate')
3564 unmount(path)
3565 raise
3566 finally:
3567 # remove our temp dir
3568 if os.path.exists(path):
3569 os.rmdir(path)
3570
3571
3572 def activate_dir(
3573 path,
3574 activate_key_template,
3575 init,
3576 ):
3577
3578 if not os.path.exists(path):
3579 raise Error(
3580 'directory %s does not exist' % path
3581 )
3582
3583 (osd_id, cluster) = activate(path, activate_key_template, init)
3584
3585 if init not in (None, 'none'):
3586 canonical = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3587 cluster=cluster,
3588 osd_id=osd_id)
3589 if path != canonical:
3590 # symlink it from the proper location
3591 create = True
3592 if os.path.lexists(canonical):
3593 old = os.readlink(canonical)
3594 if old != path:
3595 LOG.debug('Removing old symlink %s -> %s', canonical, old)
3596 try:
3597 os.unlink(canonical)
3598 except:
3599 raise Error('unable to remove old symlink', canonical)
3600 else:
3601 create = False
3602 if create:
3603 LOG.debug('Creating symlink %s -> %s', canonical, path)
3604 try:
3605 os.symlink(path, canonical)
3606 except:
3607 raise Error('unable to create symlink %s -> %s'
3608 % (canonical, path))
3609
3610 return cluster, osd_id
3611
3612
3613 def find_cluster_by_uuid(_uuid):
3614 """
3615 Find a cluster name by searching /etc/ceph/*.conf for a conf file
3616 with the right uuid.
3617 """
3618 _uuid = _uuid.lower()
3619 no_fsid = []
3620 if not os.path.exists(SYSCONFDIR):
3621 return None
3622 for conf_file in os.listdir(SYSCONFDIR):
3623 if not conf_file.endswith('.conf'):
3624 continue
3625 cluster = conf_file[:-5]
3626 try:
3627 fsid = get_fsid(cluster)
3628 except Error as e:
3629 if 'getting cluster uuid from configuration failed' not in str(e):
3630 raise e
3631 no_fsid.append(cluster)
3632 else:
3633 if fsid == _uuid:
3634 return cluster
3635 # be tolerant of /etc/ceph/ceph.conf without an fsid defined.
3636 if len(no_fsid) == 1 and no_fsid[0] == 'ceph':
3637 LOG.warning('No fsid defined in ' + SYSCONFDIR +
3638 '/ceph.conf; using anyway')
3639 return 'ceph'
3640 return None
3641
3642
3643 def activate(
3644 path,
3645 activate_key_template,
3646 init,
3647 ):
3648
3649 check_osd_magic(path)
3650
3651 ceph_fsid = read_one_line(path, 'ceph_fsid')
3652 if ceph_fsid is None:
3653 raise Error('No cluster uuid assigned.')
3654 LOG.debug('Cluster uuid is %s', ceph_fsid)
3655
3656 cluster = find_cluster_by_uuid(ceph_fsid)
3657 if cluster is None:
3658 raise Error('No cluster conf found in ' + SYSCONFDIR +
3659 ' with fsid %s' % ceph_fsid)
3660 LOG.debug('Cluster name is %s', cluster)
3661
3662 fsid = read_one_line(path, 'fsid')
3663 if fsid is None:
3664 raise Error('No OSD uuid assigned.')
3665 LOG.debug('OSD uuid is %s', fsid)
3666
3667 keyring = activate_key_template.format(cluster=cluster,
3668 statedir=STATEDIR)
3669
3670 osd_id = get_osd_id(path)
3671 if osd_id is None:
3672 osd_id = allocate_osd_id(
3673 cluster=cluster,
3674 fsid=fsid,
3675 keyring=keyring,
3676 )
3677 write_one_line(path, 'whoami', osd_id)
3678 LOG.debug('OSD id is %s', osd_id)
3679
3680 if not os.path.exists(os.path.join(path, 'ready')):
3681 LOG.debug('Initializing OSD...')
3682 # re-running mkfs is safe, so just run until it completes
3683 mkfs(
3684 path=path,
3685 cluster=cluster,
3686 osd_id=osd_id,
3687 fsid=fsid,
3688 keyring=keyring,
3689 )
3690
3691 if init not in (None, 'none'):
3692 if init == 'auto':
3693 conf_val = get_conf(
3694 cluster=cluster,
3695 variable='init'
3696 )
3697 if conf_val is not None:
3698 init = conf_val
3699 else:
3700 init = init_get()
3701
3702 LOG.debug('Marking with init system %s', init)
3703 init_path = os.path.join(path, init)
3704 with open(init_path, 'w'):
3705 path_set_context(init_path)
3706
3707 # remove markers for others, just in case.
3708 for other in INIT_SYSTEMS:
3709 if other != init:
3710 try:
3711 os.unlink(os.path.join(path, other))
3712 except OSError:
3713 pass
3714
3715 if not os.path.exists(os.path.join(path, 'active')):
3716 LOG.debug('Authorizing OSD key...')
3717 auth_key(
3718 path=path,
3719 cluster=cluster,
3720 osd_id=osd_id,
3721 keyring=keyring,
3722 )
3723 write_one_line(path, 'active', 'ok')
3724 LOG.debug('%s osd.%s data dir is ready at %s', cluster, osd_id, path)
3725 return (osd_id, cluster)
3726
3727
3728 def main_activate(args):
3729 cluster = None
3730 osd_id = None
3731
3732 LOG.info('path = ' + str(args.path))
3733 if not os.path.exists(args.path):
3734 raise Error('%s does not exist' % args.path)
3735
3736 if is_suppressed(args.path):
3737 LOG.info('suppressed activate request on %s', args.path)
3738 return
3739
3740 with activate_lock:
3741 mode = os.stat(args.path).st_mode
3742 if stmode_is_diskdevice(mode):
3743 if (is_partition(args.path) and
3744 (get_partition_type(args.path) ==
3745 PTYPE['mpath']['osd']['ready']) and
3746 not is_mpath(args.path)):
3747 raise Error('%s is not a multipath block device' %
3748 args.path)
3749 (cluster, osd_id) = mount_activate(
3750 dev=args.path,
3751 activate_key_template=args.activate_key_template,
3752 init=args.mark_init,
3753 dmcrypt=args.dmcrypt,
3754 dmcrypt_key_dir=args.dmcrypt_key_dir,
3755 reactivate=args.reactivate,
3756 )
3757 osd_data = get_mount_point(cluster, osd_id)
3758
3759 elif stat.S_ISDIR(mode):
3760 (cluster, osd_id) = activate_dir(
3761 path=args.path,
3762 activate_key_template=args.activate_key_template,
3763 init=args.mark_init,
3764 )
3765 osd_data = args.path
3766
3767 else:
3768 raise Error('%s is not a directory or block device' % args.path)
3769
3770 # exit with 0 if the journal device is not up, yet
3771 # journal device will do the activation
3772 osd_journal = '{path}/journal'.format(path=osd_data)
3773 if os.path.islink(osd_journal) and not os.access(osd_journal, os.F_OK):
3774 LOG.info("activate: Journal not present, not starting, yet")
3775 return
3776
3777 if (not args.no_start_daemon and args.mark_init == 'none'):
3778 command_check_call(
3779 [
3780 'ceph-osd',
3781 '--cluster={cluster}'.format(cluster=cluster),
3782 '--id={osd_id}'.format(osd_id=osd_id),
3783 '--osd-data={path}'.format(path=osd_data),
3784 '--osd-journal={journal}'.format(journal=osd_journal),
3785 ],
3786 )
3787
3788 if (not args.no_start_daemon and
3789 args.mark_init not in (None, 'none')):
3790
3791 start_daemon(
3792 cluster=cluster,
3793 osd_id=osd_id,
3794 )
3795
3796
3797 def main_activate_lockbox(args):
3798 with activate_lock:
3799 main_activate_lockbox_protected(args)
3800
3801
3802 def main_activate_lockbox_protected(args):
3803 partition = DevicePartition.factory(
3804 path=None, dev=args.path, args=args)
3805
3806 lockbox = Lockbox(args)
3807 lockbox.set_partition(partition)
3808 lockbox.activate()
3809
3810
3811 ###########################
3812
3813 def _mark_osd_out(cluster, osd_id):
3814 LOG.info('Prepare to mark osd.%d out...', osd_id)
3815 command([
3816 'ceph',
3817 'osd',
3818 'out',
3819 'osd.%d' % osd_id,
3820 ])
3821
3822
3823 def _check_osd_status(cluster, osd_id):
3824 """
3825 report the osd status:
3826 00(0) : means OSD OUT AND DOWN
3827 01(1) : means OSD OUT AND UP
3828 10(2) : means OSD IN AND DOWN
3829 11(3) : means OSD IN AND UP
3830 """
3831 LOG.info("Checking osd id: %s ..." % osd_id)
3832 found = False
3833 status_code = 0
3834 out, err, ret = command([
3835 'ceph',
3836 'osd',
3837 'dump',
3838 '--cluster={cluster}'.format(
3839 cluster=cluster,
3840 ),
3841 '--format',
3842 'json',
3843 ])
3844 out_json = json.loads(out)
3845 for item in out_json[u'osds']:
3846 if item.get(u'osd') == int(osd_id):
3847 found = True
3848 if item.get(u'in') is 1:
3849 status_code += 2
3850 if item.get(u'up') is 1:
3851 status_code += 1
3852 if not found:
3853 raise Error('Could not osd.%s in osd tree!' % osd_id)
3854 return status_code
3855
3856
3857 def _remove_osd_directory_files(mounted_path, cluster):
3858 """
3859 To remove the 'ready', 'active', INIT-specific files.
3860 """
3861 if os.path.exists(os.path.join(mounted_path, 'ready')):
3862 os.remove(os.path.join(mounted_path, 'ready'))
3863 LOG.info('Remove `ready` file.')
3864 else:
3865 LOG.info('`ready` file is already removed.')
3866
3867 if os.path.exists(os.path.join(mounted_path, 'active')):
3868 os.remove(os.path.join(mounted_path, 'active'))
3869 LOG.info('Remove `active` file.')
3870 else:
3871 LOG.info('`active` file is already removed.')
3872
3873 # Just check `upstart` and `sysvinit` directly if filename is init-spec.
3874 conf_val = get_conf(
3875 cluster=cluster,
3876 variable='init'
3877 )
3878 if conf_val is not None:
3879 init = conf_val
3880 else:
3881 init = init_get()
3882 os.remove(os.path.join(mounted_path, init))
3883 LOG.info('Remove `%s` file.', init)
3884 return
3885
3886
3887 def main_deactivate(args):
3888 with activate_lock:
3889 main_deactivate_locked(args)
3890
3891
3892 def main_deactivate_locked(args):
3893 osd_id = args.deactivate_by_id
3894 path = args.path
3895 target_dev = None
3896 dmcrypt = False
3897 devices = list_devices()
3898
3899 # list all devices and found we need
3900 for device in devices:
3901 if 'partitions' in device:
3902 for dev_part in device.get('partitions'):
3903 if (osd_id and
3904 'whoami' in dev_part and
3905 dev_part['whoami'] == osd_id):
3906 target_dev = dev_part
3907 elif (path and
3908 'path' in dev_part and
3909 dev_part['path'] == path):
3910 target_dev = dev_part
3911 if not target_dev:
3912 raise Error('Cannot find any match device!!')
3913
3914 # set up all we need variable
3915 osd_id = target_dev['whoami']
3916 part_type = target_dev['ptype']
3917 mounted_path = target_dev['mount']
3918 if Ptype.is_dmcrypt(part_type, 'osd'):
3919 dmcrypt = True
3920
3921 # Do not do anything if osd is already down.
3922 status_code = _check_osd_status(args.cluster, osd_id)
3923 if status_code == OSD_STATUS_IN_UP:
3924 if args.mark_out is True:
3925 _mark_osd_out(args.cluster, int(osd_id))
3926 stop_daemon(args.cluster, osd_id)
3927 elif status_code == OSD_STATUS_IN_DOWN:
3928 if args.mark_out is True:
3929 _mark_osd_out(args.cluster, int(osd_id))
3930 LOG.info("OSD already out/down. Do not do anything now.")
3931 return
3932 elif status_code == OSD_STATUS_OUT_UP:
3933 stop_daemon(args.cluster, osd_id)
3934 elif status_code == OSD_STATUS_OUT_DOWN:
3935 LOG.info("OSD already out/down. Do not do anything now.")
3936 return
3937
3938 if not args.once:
3939 # remove 'ready', 'active', and INIT-specific files.
3940 _remove_osd_directory_files(mounted_path, args.cluster)
3941
3942 # Write deactivate to osd directory!
3943 with open(os.path.join(mounted_path, 'deactive'), 'w'):
3944 path_set_context(os.path.join(mounted_path, 'deactive'))
3945
3946 unmount(mounted_path)
3947 LOG.info("Umount `%s` successfully.", mounted_path)
3948
3949 if dmcrypt:
3950 lockbox = os.path.join(STATEDIR, 'osd-lockbox')
3951 command(['umount', os.path.join(lockbox, target_dev['uuid'])])
3952
3953 dmcrypt_unmap(target_dev['uuid'])
3954 for name in Space.NAMES:
3955 if name + '_uuid' in target_dev:
3956 dmcrypt_unmap(target_dev[name + '_uuid'])
3957
3958 ###########################
3959
3960
3961 def _remove_from_crush_map(cluster, osd_id):
3962 LOG.info("Prepare to remove osd.%s from crush map..." % osd_id)
3963 command([
3964 'ceph',
3965 'osd',
3966 'crush',
3967 'remove',
3968 'osd.%s' % osd_id,
3969 ])
3970
3971
3972 def _delete_osd_auth_key(cluster, osd_id):
3973 LOG.info("Prepare to delete osd.%s cephx key..." % osd_id)
3974 command([
3975 'ceph',
3976 'auth',
3977 'del',
3978 'osd.%s' % osd_id,
3979 ])
3980
3981
3982 def _deallocate_osd_id(cluster, osd_id):
3983 LOG.info("Prepare to deallocate the osd-id: %s..." % osd_id)
3984 command([
3985 'ceph',
3986 'osd',
3987 'rm',
3988 '%s' % osd_id,
3989 ])
3990
3991
3992 def _remove_lockbox(uuid, cluster):
3993 command([
3994 'ceph',
3995 '--cluster', cluster,
3996 'auth',
3997 'del',
3998 'client.osd-lockbox.' + uuid,
3999 ])
4000 command([
4001 'ceph',
4002 '--cluster', cluster,
4003 'config-key',
4004 'del',
4005 'dm-crypt/osd/' + uuid + '/luks',
4006 ])
4007 lockbox = os.path.join(STATEDIR, 'osd-lockbox')
4008 if not os.path.exists(lockbox):
4009 return
4010 canonical = os.path.join(lockbox, uuid)
4011 command(['umount', canonical])
4012 for name in os.listdir(lockbox):
4013 path = os.path.join(lockbox, name)
4014 if os.path.islink(path) and os.readlink(path) == canonical:
4015 os.unlink(path)
4016
4017
4018 def destroy_lookup_device(args, predicate, description):
4019 devices = list_devices()
4020 for device in devices:
4021 for partition in device.get('partitions', []):
4022 if partition['type'] == 'lockbox':
4023 if not is_mounted(partition['path']):
4024 main_activate_lockbox_protected(
4025 argparse.Namespace(verbose=args.verbose,
4026 path=partition['path']))
4027 for device in devices:
4028 for partition in device.get('partitions', []):
4029 if partition['dmcrypt']:
4030 dmcrypt_path = dmcrypt_is_mapped(partition['uuid'])
4031 if dmcrypt_path:
4032 unmap = False
4033 else:
4034 dmcrypt_path = dmcrypt_map(partition['path'],
4035 args.dmcrypt_key_dir)
4036 unmap = True
4037 list_dev_osd(dmcrypt_path, {}, partition)
4038 if unmap:
4039 dmcrypt_unmap(partition['uuid'])
4040 dmcrypt = True
4041 else:
4042 dmcrypt = False
4043 if predicate(partition):
4044 return dmcrypt, partition
4045 raise Error('found no device matching ', description)
4046
4047
4048 def main_destroy(args):
4049 with activate_lock:
4050 main_destroy_locked(args)
4051
4052
4053 def main_destroy_locked(args):
4054 osd_id = args.destroy_by_id
4055 path = args.path
4056 target_dev = None
4057
4058 if path:
4059 if not is_partition(path):
4060 raise Error(path + " must be a partition device")
4061 path = os.path.realpath(path)
4062
4063 if path:
4064 (dmcrypt, target_dev) = destroy_lookup_device(
4065 args, lambda x: x.get('path') == path,
4066 path)
4067 elif osd_id:
4068 (dmcrypt, target_dev) = destroy_lookup_device(
4069 args, lambda x: x.get('whoami') == osd_id,
4070 'osd id ' + str(osd_id))
4071
4072 osd_id = target_dev['whoami']
4073 dev_path = target_dev['path']
4074 if target_dev['ptype'] == PTYPE['mpath']['osd']['ready']:
4075 base_dev = get_partition_base_mpath(dev_path)
4076 else:
4077 base_dev = get_partition_base(dev_path)
4078
4079 # Before osd deactivate, we cannot destroy it
4080 status_code = _check_osd_status(args.cluster, osd_id)
4081 if status_code != OSD_STATUS_OUT_DOWN and \
4082 status_code != OSD_STATUS_IN_DOWN:
4083 raise Error("Could not destroy the active osd. (osd-id: %s)" %
4084 osd_id)
4085
4086 # Remove OSD from crush map
4087 _remove_from_crush_map(args.cluster, osd_id)
4088
4089 # Remove OSD cephx key
4090 _delete_osd_auth_key(args.cluster, osd_id)
4091
4092 # Deallocate OSD ID
4093 _deallocate_osd_id(args.cluster, osd_id)
4094
4095 # we remove the crypt map and device mapper (if dmcrypt is True)
4096 if dmcrypt:
4097 for name in Space.NAMES:
4098 if target_dev.get(name + '_uuid'):
4099 dmcrypt_unmap(target_dev[name + '_uuid'])
4100 _remove_lockbox(target_dev['uuid'], args.cluster)
4101
4102 # Check zap flag. If we found zap flag, we need to find device for
4103 # destroy this osd data.
4104 if args.zap is True:
4105 # erase the osd data
4106 LOG.info("Prepare to zap the device %s" % base_dev)
4107 zap(base_dev)
4108
4109
4110 def get_space_osd_uuid(name, path):
4111 if not os.path.exists(path):
4112 raise Error('%s does not exist' % path)
4113
4114 if path_is_diskdevice(path):
4115 raise Error('%s is not a block device' % path)
4116
4117 if (is_partition(path) and
4118 get_partition_type(path) in (PTYPE['mpath']['journal']['ready'],
4119 PTYPE['mpath']['block']['ready']) and
4120 not is_mpath(path)):
4121 raise Error('%s is not a multipath block device' %
4122 path)
4123
4124 try:
4125 out = _check_output(
4126 args=[
4127 'ceph-osd',
4128 '--get-device-fsid',
4129 path,
4130 ],
4131 close_fds=True,
4132 )
4133 except subprocess.CalledProcessError as e:
4134 raise Error(
4135 'failed to get osd uuid/fsid from %s' % name,
4136 e,
4137 )
4138 value = str(out).split('\n', 1)[0]
4139 LOG.debug('%s %s has OSD UUID %s', name.capitalize(), path, value)
4140 return value
4141
4142
4143 def main_activate_space(name, args):
4144 if not os.path.exists(args.dev):
4145 raise Error('%s does not exist' % args.dev)
4146
4147 cluster = None
4148 osd_id = None
4149 osd_uuid = None
4150 dev = None
4151 with activate_lock:
4152 if args.dmcrypt:
4153 dev = dmcrypt_map(args.dev, args.dmcrypt_key_dir)
4154 else:
4155 dev = args.dev
4156 # FIXME: For an encrypted journal dev, does this return the
4157 # cyphertext or plaintext dev uuid!? Also, if the journal is
4158 # encrypted, is the data partition also always encrypted, or
4159 # are mixed pairs supported!?
4160 osd_uuid = get_space_osd_uuid(name, dev)
4161 path = os.path.join('/dev/disk/by-partuuid/', osd_uuid.lower())
4162
4163 if is_suppressed(path):
4164 LOG.info('suppressed activate request on %s', path)
4165 return
4166
4167 # warn and exit with 0 if the data device is not up, yet
4168 # data device will do the activation
4169 if not os.access(path, os.F_OK):
4170 LOG.info("activate: OSD device not present, not starting, yet")
4171 return
4172
4173 (cluster, osd_id) = mount_activate(
4174 dev=path,
4175 activate_key_template=args.activate_key_template,
4176 init=args.mark_init,
4177 dmcrypt=args.dmcrypt,
4178 dmcrypt_key_dir=args.dmcrypt_key_dir,
4179 reactivate=args.reactivate,
4180 )
4181
4182 start_daemon(
4183 cluster=cluster,
4184 osd_id=osd_id,
4185 )
4186
4187
4188 ###########################
4189
4190
4191 def main_activate_all(args):
4192 dir = '/dev/disk/by-parttypeuuid'
4193 LOG.debug('Scanning %s', dir)
4194 if not os.path.exists(dir):
4195 return
4196 err = False
4197 for name in os.listdir(dir):
4198 if name.find('.') < 0:
4199 continue
4200 (tag, uuid) = name.split('.')
4201
4202 if tag in Ptype.get_ready_by_name('osd'):
4203
4204 if Ptype.is_dmcrypt(tag, 'osd'):
4205 path = os.path.join('/dev/mapper', uuid)
4206 else:
4207 path = os.path.join(dir, name)
4208
4209 if is_suppressed(path):
4210 LOG.info('suppressed activate request on %s', path)
4211 continue
4212
4213 LOG.info('Activating %s', path)
4214 with activate_lock:
4215 try:
4216 # never map dmcrypt cyphertext devices
4217 (cluster, osd_id) = mount_activate(
4218 dev=path,
4219 activate_key_template=args.activate_key_template,
4220 init=args.mark_init,
4221 dmcrypt=False,
4222 dmcrypt_key_dir='',
4223 )
4224 start_daemon(
4225 cluster=cluster,
4226 osd_id=osd_id,
4227 )
4228
4229 except Exception as e:
4230 print(
4231 '{prog}: {msg}'.format(prog=args.prog, msg=e),
4232 file=sys.stderr
4233 )
4234
4235 err = True
4236
4237 if err:
4238 raise Error('One or more partitions failed to activate')
4239
4240
4241 ###########################
4242
4243 def is_swap(dev):
4244 dev = os.path.realpath(dev)
4245 with open(PROCDIR + '/swaps', 'rb') as proc_swaps:
4246 for line in proc_swaps.readlines()[1:]:
4247 fields = line.split()
4248 if len(fields) < 3:
4249 continue
4250 swaps_dev = fields[0]
4251 if os.path.isabs(swaps_dev) and os.path.exists(swaps_dev):
4252 swaps_dev = os.path.realpath(swaps_dev)
4253 if swaps_dev == dev:
4254 return True
4255 return False
4256
4257
4258 def get_oneliner(base, name):
4259 path = os.path.join(base, name)
4260 if os.path.isfile(path):
4261 with open(path, 'rb') as _file:
4262 return _bytes2str(_file.readline().rstrip())
4263 return None
4264
4265
4266 def get_dev_fs(dev):
4267 if FREEBSD:
4268 fstype, _, ret = command(
4269 [
4270 'fstyp',
4271 '-u',
4272 dev,
4273 ],
4274 )
4275 if ret == 0:
4276 return fstype
4277 else:
4278 fscheck, _, _ = command(
4279 [
4280 'blkid',
4281 '-s',
4282 'TYPE',
4283 dev,
4284 ],
4285 )
4286 if 'TYPE' in fscheck:
4287 fstype = fscheck.split()[1].split('"')[1]
4288 return fstype
4289 return None
4290
4291
4292 def split_dev_base_partnum(dev):
4293 if is_mpath(dev):
4294 partnum = partnum_mpath(dev)
4295 base = get_partition_base_mpath(dev)
4296 else:
4297 b = block_path(dev)
4298 partnum = open(os.path.join(b, 'partition')).read().strip()
4299 base = get_partition_base(dev)
4300 return base, partnum
4301
4302
4303 def get_partition_type(part):
4304 return get_blkid_partition_info(part, 'ID_PART_ENTRY_TYPE')
4305
4306
4307 def get_partition_uuid(part):
4308 return get_blkid_partition_info(part, 'ID_PART_ENTRY_UUID')
4309
4310
4311 def get_blkid_partition_info(dev, what=None):
4312 out, _, _ = command(
4313 [
4314 'blkid',
4315 '-o',
4316 'udev',
4317 '-p',
4318 dev,
4319 ]
4320 )
4321 p = {}
4322 for line in out.splitlines():
4323 (key, value) = line.split('=')
4324 p[key] = value
4325 if what:
4326 return p.get(what)
4327 else:
4328 return p
4329
4330
4331 def more_osd_info(path, uuid_map, desc):
4332 desc['ceph_fsid'] = get_oneliner(path, 'ceph_fsid')
4333 if desc['ceph_fsid']:
4334 desc['cluster'] = find_cluster_by_uuid(desc['ceph_fsid'])
4335 desc['whoami'] = get_oneliner(path, 'whoami')
4336 for name in Space.NAMES:
4337 uuid = get_oneliner(path, name + '_uuid')
4338 if uuid:
4339 desc[name + '_uuid'] = uuid.lower()
4340 if desc[name + '_uuid'] in uuid_map:
4341 desc[name + '_dev'] = uuid_map[desc[name + '_uuid']]
4342
4343
4344 def list_dev_osd(dev, uuid_map, desc):
4345 desc['mount'] = is_mounted(dev)
4346 desc['fs_type'] = get_dev_fs(dev)
4347 desc['state'] = 'unprepared'
4348 if desc['mount']:
4349 desc['state'] = 'active'
4350 more_osd_info(desc['mount'], uuid_map, desc)
4351 elif desc['fs_type']:
4352 try:
4353 tpath = mount(dev=dev, fstype=desc['fs_type'], options='')
4354 if tpath:
4355 try:
4356 magic = get_oneliner(tpath, 'magic')
4357 if magic is not None:
4358 desc['magic'] = magic
4359 desc['state'] = 'prepared'
4360 more_osd_info(tpath, uuid_map, desc)
4361 finally:
4362 unmount(tpath)
4363 except MountError:
4364 pass
4365
4366
4367 def list_dev_lockbox(dev, uuid_map, desc):
4368 desc['mount'] = is_mounted(dev)
4369 desc['fs_type'] = get_dev_fs(dev)
4370 desc['state'] = 'unprepared'
4371 if desc['mount']:
4372 desc['state'] = 'active'
4373 desc['osd_uuid'] = get_oneliner(desc['mount'], 'osd-uuid')
4374 elif desc['fs_type']:
4375 try:
4376 tpath = tempfile.mkdtemp(prefix='mnt.', dir=STATEDIR + '/tmp')
4377 args = ['mount', '-t', 'ext4', dev, tpath]
4378 LOG.debug('Mounting lockbox ' + str(" ".join(args)))
4379 command_check_call(args)
4380 magic = get_oneliner(tpath, 'magic')
4381 if magic is not None:
4382 desc['magic'] = magic
4383 desc['state'] = 'prepared'
4384 desc['osd_uuid'] = get_oneliner(tpath, 'osd-uuid')
4385 unmount(tpath)
4386 except subprocess.CalledProcessError:
4387 pass
4388 if desc.get('osd_uuid') in uuid_map:
4389 desc['lockbox_for'] = uuid_map[desc['osd_uuid']]
4390
4391
4392 def list_format_lockbox_plain(dev):
4393 desc = []
4394 if dev.get('lockbox_for'):
4395 desc.append('for ' + dev['lockbox_for'])
4396 elif dev.get('osd_uuid'):
4397 desc.append('for osd ' + dev['osd_uuid'])
4398 return desc
4399
4400
4401 def list_format_more_osd_info_plain(dev):
4402 desc = []
4403 if dev.get('ceph_fsid'):
4404 if dev.get('cluster'):
4405 desc.append('cluster ' + dev['cluster'])
4406 else:
4407 desc.append('unknown cluster ' + dev['ceph_fsid'])
4408 if dev.get('whoami'):
4409 desc.append('osd.%s' % dev['whoami'])
4410 for name in Space.NAMES:
4411 if dev.get(name + '_dev'):
4412 desc.append(name + ' %s' % dev[name + '_dev'])
4413 return desc
4414
4415
4416 def list_format_dev_plain(dev, prefix=''):
4417 desc = []
4418 if dev['ptype'] == PTYPE['regular']['osd']['ready']:
4419 desc = (['ceph data', dev['state']] +
4420 list_format_more_osd_info_plain(dev))
4421 elif dev['ptype'] in (PTYPE['regular']['lockbox']['ready'],
4422 PTYPE['mpath']['lockbox']['ready']):
4423 desc = (['ceph lockbox', dev['state']] +
4424 list_format_lockbox_plain(dev))
4425 elif Ptype.is_dmcrypt(dev['ptype'], 'osd'):
4426 dmcrypt = dev['dmcrypt']
4427 if not dmcrypt['holders']:
4428 desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
4429 'not currently mapped']
4430 elif len(dmcrypt['holders']) == 1:
4431 holder = get_dev_path(dmcrypt['holders'][0])
4432 desc = ['ceph data (dmcrypt %s %s)' %
4433 (dmcrypt['type'], holder)]
4434 desc += list_format_more_osd_info_plain(dev)
4435 else:
4436 desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
4437 'holders: ' + ','.join(dmcrypt['holders'])]
4438 elif Ptype.is_regular_space(dev['ptype']):
4439 name = Ptype.space_ptype_to_name(dev['ptype'])
4440 desc.append('ceph ' + name)
4441 if dev.get(name + '_for'):
4442 desc.append('for %s' % dev[name + '_for'])
4443 elif Ptype.is_dmcrypt_space(dev['ptype']):
4444 name = Ptype.space_ptype_to_name(dev['ptype'])
4445 dmcrypt = dev['dmcrypt']
4446 if dmcrypt['holders'] and len(dmcrypt['holders']) == 1:
4447 holder = get_dev_path(dmcrypt['holders'][0])
4448 desc = ['ceph ' + name + ' (dmcrypt %s %s)' %
4449 (dmcrypt['type'], holder)]
4450 else:
4451 desc = ['ceph ' + name + ' (dmcrypt %s)' % dmcrypt['type']]
4452 if dev.get(name + '_for'):
4453 desc.append('for %s' % dev[name + '_for'])
4454 else:
4455 desc.append(dev['type'])
4456 if dev.get('fs_type'):
4457 desc.append(dev['fs_type'])
4458 elif dev.get('ptype'):
4459 desc.append(dev['ptype'])
4460 if dev.get('mount'):
4461 desc.append('mounted on %s' % dev['mount'])
4462 return '%s%s %s' % (prefix, dev['path'], ', '.join(desc))
4463
4464
4465 def list_format_plain(devices):
4466 lines = []
4467 for device in devices:
4468 if device.get('partitions'):
4469 lines.append('%s :' % device['path'])
4470 for p in sorted(device['partitions'], key=lambda x: x['path']):
4471 lines.append(list_format_dev_plain(dev=p,
4472 prefix=' '))
4473 else:
4474 lines.append(list_format_dev_plain(dev=device,
4475 prefix=''))
4476 return "\n".join(lines)
4477
4478
4479 def list_dev(dev, uuid_map, space_map):
4480 info = {
4481 'path': dev,
4482 'dmcrypt': {},
4483 }
4484
4485 info['is_partition'] = is_partition(dev)
4486 if info['is_partition']:
4487 ptype = get_partition_type(dev)
4488 info['uuid'] = get_partition_uuid(dev)
4489 else:
4490 ptype = 'unknown'
4491 info['ptype'] = ptype
4492 LOG.info("list_dev(dev = " + dev + ", ptype = " + str(ptype) + ")")
4493 if ptype in (PTYPE['regular']['osd']['ready'],
4494 PTYPE['mpath']['osd']['ready']):
4495 info['type'] = 'data'
4496 if ptype == PTYPE['mpath']['osd']['ready']:
4497 info['multipath'] = True
4498 list_dev_osd(dev, uuid_map, info)
4499 elif ptype in (PTYPE['regular']['lockbox']['ready'],
4500 PTYPE['mpath']['lockbox']['ready']):
4501 info['type'] = 'lockbox'
4502 if ptype == PTYPE['mpath']['osd']['ready']:
4503 info['multipath'] = True
4504 list_dev_lockbox(dev, uuid_map, info)
4505 elif ptype == PTYPE['plain']['osd']['ready']:
4506 holders = is_held(dev)
4507 info['type'] = 'data'
4508 info['dmcrypt']['holders'] = holders
4509 info['dmcrypt']['type'] = 'plain'
4510 if len(holders) == 1:
4511 list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
4512 elif ptype == PTYPE['luks']['osd']['ready']:
4513 holders = is_held(dev)
4514 info['type'] = 'data'
4515 info['dmcrypt']['holders'] = holders
4516 info['dmcrypt']['type'] = 'LUKS'
4517 if len(holders) == 1:
4518 list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
4519 elif Ptype.is_regular_space(ptype) or Ptype.is_mpath_space(ptype):
4520 name = Ptype.space_ptype_to_name(ptype)
4521 info['type'] = name
4522 if ptype == PTYPE['mpath'][name]['ready']:
4523 info['multipath'] = True
4524 if info.get('uuid') in space_map:
4525 info[name + '_for'] = space_map[info['uuid']]
4526 elif Ptype.is_plain_space(ptype):
4527 name = Ptype.space_ptype_to_name(ptype)
4528 holders = is_held(dev)
4529 info['type'] = name
4530 info['dmcrypt']['type'] = 'plain'
4531 info['dmcrypt']['holders'] = holders
4532 if info.get('uuid') in space_map:
4533 info[name + '_for'] = space_map[info['uuid']]
4534 elif Ptype.is_luks_space(ptype):
4535 name = Ptype.space_ptype_to_name(ptype)
4536 holders = is_held(dev)
4537 info['type'] = name
4538 info['dmcrypt']['type'] = 'LUKS'
4539 info['dmcrypt']['holders'] = holders
4540 if info.get('uuid') in space_map:
4541 info[name + '_for'] = space_map[info['uuid']]
4542 else:
4543 path = is_mounted(dev)
4544 fs_type = get_dev_fs(dev)
4545 if is_swap(dev):
4546 info['type'] = 'swap'
4547 else:
4548 info['type'] = 'other'
4549 if fs_type:
4550 info['fs_type'] = fs_type
4551 if path:
4552 info['mount'] = path
4553
4554 return info
4555
4556
4557 def list_devices():
4558 partmap = list_all_partitions()
4559
4560 uuid_map = {}
4561 space_map = {}
4562 for base, parts in sorted(partmap.items()):
4563 for p in parts:
4564 dev = get_dev_path(p)
4565 part_uuid = get_partition_uuid(dev)
4566 if part_uuid:
4567 uuid_map[part_uuid] = dev
4568 ptype = get_partition_type(dev)
4569 LOG.debug("main_list: " + dev +
4570 " ptype = " + str(ptype) +
4571 " uuid = " + str(part_uuid))
4572 if ptype in Ptype.get_ready_by_name('osd'):
4573 if Ptype.is_dmcrypt(ptype, 'osd'):
4574 holders = is_held(dev)
4575 if len(holders) != 1:
4576 continue
4577 dev_to_mount = get_dev_path(holders[0])
4578 else:
4579 dev_to_mount = dev
4580
4581 fs_type = get_dev_fs(dev_to_mount)
4582 if fs_type is not None:
4583 mount_options = get_mount_options(cluster='ceph',
4584 fs_type=fs_type)
4585 try:
4586 tpath = mount(dev=dev_to_mount,
4587 fstype=fs_type, options=mount_options)
4588 try:
4589 for name in Space.NAMES:
4590 space_uuid = get_oneliner(tpath,
4591 name + '_uuid')
4592 if space_uuid:
4593 space_map[space_uuid.lower()] = dev
4594 finally:
4595 unmount(tpath)
4596 except MountError:
4597 pass
4598
4599 LOG.debug("main_list: " + str(partmap) + ", uuid_map = " +
4600 str(uuid_map) + ", space_map = " + str(space_map))
4601
4602 devices = []
4603 for base, parts in sorted(partmap.items()):
4604 if parts:
4605 disk = {'path': get_dev_path(base)}
4606 partitions = []
4607 for p in sorted(parts):
4608 partitions.append(list_dev(get_dev_path(p),
4609 uuid_map,
4610 space_map))
4611 disk['partitions'] = partitions
4612 devices.append(disk)
4613 else:
4614 device = list_dev(get_dev_path(base), uuid_map, space_map)
4615 device['path'] = get_dev_path(base)
4616 devices.append(device)
4617 LOG.debug("list_devices: " + str(devices))
4618 return devices
4619
4620
4621 def list_zfs():
4622 try:
4623 out, err, ret = command(
4624 [
4625 'zfs',
4626 'list',
4627 '-o', 'name,mountpoint'
4628 ]
4629 )
4630 except subprocess.CalledProcessError as e:
4631 LOG.info('zfs list -o name,mountpoint '
4632 'fails.\n (Error: %s)' % e)
4633 raise
4634 lines = out.splitlines()
4635 for line in lines[1:]:
4636 vdevline = line.split()
4637 if os.path.exists(os.path.join(vdevline[1], 'active')):
4638 elems = os.path.split(vdevline[1])
4639 print(vdevline[0], "ceph data, active, cluster ceph,", elems[1],
4640 "mounted on:", vdevline[1])
4641 else:
4642 print(vdevline[0] + " other, zfs, mounted on: " + vdevline[1])
4643
4644
4645 def main_list(args):
4646 with activate_lock:
4647 if FREEBSD:
4648 main_list_freebsd(args)
4649 else:
4650 main_list_protected(args)
4651
4652
4653 def main_list_protected(args):
4654 devices = list_devices()
4655 if args.path:
4656 paths = []
4657 for path in args.path:
4658 if os.path.exists(path):
4659 paths.append(os.path.realpath(path))
4660 else:
4661 paths.append(path)
4662 selected_devices = []
4663 for device in devices:
4664 for path in paths:
4665 if re.search(path + '$', device['path']):
4666 selected_devices.append(device)
4667 else:
4668 selected_devices = devices
4669 if args.format == 'json':
4670 print(json.dumps(selected_devices))
4671 else:
4672 output = list_format_plain(selected_devices)
4673 if output:
4674 print(output)
4675
4676
4677 def main_list_freebsd(args):
4678 # Currently accomodate only ZFS Filestore partitions
4679 # return a list of VDEVs and mountpoints
4680 # > zfs list
4681 # NAME USED AVAIL REFER MOUNTPOINT
4682 # osd0 1.01G 1.32T 1.01G /var/lib/ceph/osd/osd.0
4683 # osd1 1.01G 1.32T 1.01G /var/lib/ceph/osd/osd.1
4684 list_zfs()
4685
4686
4687 ###########################
4688 #
4689 # Mark devices that we want to suppress activates on with a
4690 # file like
4691 #
4692 # /var/lib/ceph/tmp/suppress-activate.sdb
4693 #
4694 # where the last bit is the sanitized device name (/dev/X without the
4695 # /dev/ prefix) and the is_suppress() check matches a prefix. That
4696 # means suppressing sdb will stop activate on sdb1, sdb2, etc.
4697 #
4698
4699 def is_suppressed(path):
4700 disk = os.path.realpath(path)
4701 try:
4702 if (not disk.startswith('/dev/') or
4703 not ldev_is_diskdevice(disk)):
4704 return False
4705 base = get_dev_name(disk)
4706 while len(base):
4707 if os.path.exists(SUPPRESS_PREFIX + base): # noqa
4708 return True
4709 base = base[:-1]
4710 except:
4711 return False
4712
4713
4714 def set_suppress(path):
4715 disk = os.path.realpath(path)
4716 if not os.path.exists(disk):
4717 raise Error('does not exist', path)
4718 if ldev_is_diskdevice(path):
4719 raise Error('not a block device', path)
4720 base = get_dev_name(disk)
4721
4722 with open(SUPPRESS_PREFIX + base, 'w') as f: # noqa
4723 pass
4724 LOG.info('set suppress flag on %s', base)
4725
4726
4727 def unset_suppress(path):
4728 disk = os.path.realpath(path)
4729 if not os.path.exists(disk):
4730 raise Error('does not exist', path)
4731 if not ldev_is_diskdevice(path):
4732 raise Error('not a block device', path)
4733 assert disk.startswith('/dev/')
4734 base = get_dev_name(disk)
4735
4736 fn = SUPPRESS_PREFIX + base # noqa
4737 if not os.path.exists(fn):
4738 raise Error('not marked as suppressed', path)
4739
4740 try:
4741 os.unlink(fn)
4742 LOG.info('unset suppress flag on %s', base)
4743 except OSError as e:
4744 raise Error('failed to unsuppress', e)
4745
4746
4747 def main_suppress(args):
4748 set_suppress(args.path)
4749
4750
4751 def main_unsuppress(args):
4752 unset_suppress(args.path)
4753
4754
4755 def main_zap(args):
4756 for dev in args.dev:
4757 zap(dev)
4758
4759
4760 def main_trigger(args):
4761 LOG.debug("main_trigger: " + str(args))
4762 if is_systemd() and not args.sync:
4763 # http://www.freedesktop.org/software/systemd/man/systemd-escape.html
4764 escaped_dev = args.dev[1:].replace('-', '\\x2d')
4765 service = 'ceph-disk@{dev}.service'.format(dev=escaped_dev)
4766 LOG.info('systemd detected, triggering %s' % service)
4767 command(
4768 [
4769 'systemctl',
4770 '--no-block',
4771 'restart',
4772 service,
4773 ]
4774 )
4775 return
4776 if is_upstart() and not args.sync:
4777 LOG.info('upstart detected, triggering ceph-disk task')
4778 command(
4779 [
4780 'initctl',
4781 'emit',
4782 'ceph-disk',
4783 'dev={dev}'.format(dev=args.dev),
4784 'pid={pid}'.format(pid=os.getpid()),
4785 ]
4786 )
4787 return
4788
4789 if get_ceph_user() == 'ceph':
4790 command_check_call(['chown', 'ceph:ceph', args.dev])
4791 parttype = get_partition_type(args.dev)
4792 partid = get_partition_uuid(args.dev)
4793
4794 LOG.info('trigger {dev} parttype {parttype} uuid {partid}'.format(
4795 dev=args.dev,
4796 parttype=parttype,
4797 partid=partid,
4798 ))
4799
4800 ceph_disk = ['ceph-disk']
4801 if args.verbose:
4802 ceph_disk.append('--verbose')
4803
4804 if parttype in (PTYPE['regular']['osd']['ready'],
4805 PTYPE['mpath']['osd']['ready']):
4806 out, err, ret = command(
4807 ceph_disk +
4808 [
4809 'activate',
4810 args.dev,
4811 ]
4812 )
4813
4814 elif parttype in (PTYPE['plain']['osd']['ready'],
4815 PTYPE['luks']['osd']['ready']):
4816 out, err, ret = command(
4817 ceph_disk +
4818 [
4819 'activate',
4820 '--dmcrypt',
4821 args.dev,
4822 ]
4823 )
4824
4825 elif parttype in (PTYPE['regular']['journal']['ready'],
4826 PTYPE['mpath']['journal']['ready']):
4827 out, err, ret = command(
4828 ceph_disk +
4829 [
4830 'activate-journal',
4831 args.dev,
4832 ]
4833 )
4834
4835 elif parttype in (PTYPE['plain']['journal']['ready'],
4836 PTYPE['luks']['journal']['ready']):
4837 out, err, ret = command(
4838 ceph_disk +
4839 [
4840 'activate-journal',
4841 '--dmcrypt',
4842 args.dev,
4843 ]
4844 )
4845
4846 elif parttype in (PTYPE['regular']['block']['ready'],
4847 PTYPE['regular']['block.db']['ready'],
4848 PTYPE['regular']['block.wal']['ready'],
4849 PTYPE['mpath']['block']['ready'],
4850 PTYPE['mpath']['block.db']['ready'],
4851 PTYPE['mpath']['block.wal']['ready']):
4852 out, err, ret = command(
4853 ceph_disk +
4854 [
4855 'activate-block',
4856 args.dev,
4857 ]
4858 )
4859
4860 elif parttype in (PTYPE['plain']['block']['ready'],
4861 PTYPE['plain']['block.db']['ready'],
4862 PTYPE['plain']['block.wal']['ready'],
4863 PTYPE['luks']['block']['ready'],
4864 PTYPE['luks']['block.db']['ready'],
4865 PTYPE['luks']['block.wal']['ready']):
4866 out, err, ret = command(
4867 ceph_disk +
4868 [
4869 'activate-block',
4870 '--dmcrypt',
4871 args.dev,
4872 ]
4873 )
4874
4875 elif parttype in (PTYPE['regular']['lockbox']['ready'],
4876 PTYPE['mpath']['lockbox']['ready']):
4877 out, err, ret = command(
4878 ceph_disk +
4879 [
4880 'activate-lockbox',
4881 args.dev,
4882 ]
4883 )
4884
4885 else:
4886 raise Error('unrecognized partition type %s' % parttype)
4887
4888 if ret != 0:
4889 LOG.info(out)
4890 LOG.error(err)
4891 raise Error('return code ' + str(ret))
4892 else:
4893 LOG.debug(out)
4894 LOG.debug(err)
4895
4896
4897 def main_fix(args):
4898 # A hash table containing 'path': ('uid', 'gid', blocking, recursive)
4899 fix_table = [
4900 ('/usr/bin/ceph-mon', 'root', 'root', True, False),
4901 ('/usr/bin/ceph-mds', 'root', 'root', True, False),
4902 ('/usr/bin/ceph-osd', 'root', 'root', True, False),
4903 ('/usr/bin/radosgw', 'root', 'root', True, False),
4904 ('/etc/ceph', 'root', 'root', True, True),
4905 ('/var/run/ceph', 'ceph', 'ceph', True, True),
4906 ('/var/log/ceph', 'ceph', 'ceph', True, True),
4907 ('/var/log/radosgw', 'ceph', 'ceph', True, True),
4908 ('/var/lib/ceph', 'ceph', 'ceph', True, False),
4909 ]
4910
4911 # Relabel/chown all files under /var/lib/ceph/ recursively (except for osd)
4912 for directory in glob.glob('/var/lib/ceph/*'):
4913 if directory == '/var/lib/ceph/osd':
4914 fix_table.append((directory, 'ceph', 'ceph', True, False))
4915 else:
4916 fix_table.append((directory, 'ceph', 'ceph', True, True))
4917
4918 # Relabel/chown the osds recursively and in parallel
4919 for directory in glob.glob('/var/lib/ceph/osd/*'):
4920 fix_table.append((directory, 'ceph', 'ceph', False, True))
4921
4922 LOG.debug("fix_table: " + str(fix_table))
4923
4924 # The lists of background processes
4925 all_processes = []
4926 permissions_processes = []
4927 selinux_processes = []
4928
4929 # Preliminary checks
4930 if args.selinux or args.all:
4931 out, err, ret = command(['selinuxenabled'])
4932 if ret:
4933 LOG.error('SELinux is not enabled, please enable it, first.')
4934 raise Error('no SELinux')
4935
4936 for daemon in ['ceph-mon', 'ceph-osd', 'ceph-mds', 'radosgw', 'ceph-mgr']:
4937 out, err, ret = command(['pgrep', daemon])
4938 if ret == 0:
4939 LOG.error(daemon + ' is running, please stop it, first')
4940 raise Error(daemon + ' running')
4941
4942 # Relabel the basic system data without the ceph files
4943 if args.system or args.all:
4944 c = ['restorecon', '-R', '/']
4945 for directory, _, _, _, _ in fix_table:
4946 # Skip /var/lib/ceph subdirectories
4947 if directory.startswith('/var/lib/ceph/'):
4948 continue
4949 c.append('-e')
4950 c.append(directory)
4951
4952 out, err, ret = command(c)
4953
4954 if ret:
4955 LOG.error("Failed to restore labels of the underlying system")
4956 LOG.error(err)
4957 raise Error("basic restore failed")
4958
4959 # Use find to relabel + chown ~simultaenously
4960 if args.all:
4961 for directory, uid, gid, blocking, recursive in fix_table:
4962 # Skip directories/files that are not installed
4963 if not os.access(directory, os.F_OK):
4964 continue
4965
4966 c = [
4967 'find',
4968 directory,
4969 '-exec',
4970 'chown',
4971 ':'.join((uid, gid)),
4972 '{}',
4973 '+',
4974 '-exec',
4975 'restorecon',
4976 '{}',
4977 '+',
4978 ]
4979
4980 # Just pass -maxdepth 0 for non-recursive calls
4981 if not recursive:
4982 c += ['-maxdepth', '0']
4983
4984 if blocking:
4985 out, err, ret = command(c)
4986
4987 if ret:
4988 LOG.error("Failed to fix " + directory)
4989 LOG.error(err)
4990 raise Error(directory + " fix failed")
4991 else:
4992 all_processes.append(command_init(c))
4993
4994 LOG.debug("all_processes: " + str(all_processes))
4995 for process in all_processes:
4996 out, err, ret = command_wait(process)
4997 if ret:
4998 LOG.error("A background find process failed")
4999 LOG.error(err)
5000 raise Error("background failed")
5001
5002 # Fix permissions
5003 if args.permissions:
5004 for directory, uid, gid, blocking, recursive in fix_table:
5005 # Skip directories/files that are not installed
5006 if not os.access(directory, os.F_OK):
5007 continue
5008
5009 if recursive:
5010 c = [
5011 'chown',
5012 '-R',
5013 ':'.join((uid, gid)),
5014 directory
5015 ]
5016 else:
5017 c = [
5018 'chown',
5019 ':'.join((uid, gid)),
5020 directory
5021 ]
5022
5023 if blocking:
5024 out, err, ret = command(c)
5025
5026 if ret:
5027 LOG.error("Failed to chown " + directory)
5028 LOG.error(err)
5029 raise Error(directory + " chown failed")
5030 else:
5031 permissions_processes.append(command_init(c))
5032
5033 LOG.debug("permissions_processes: " + str(permissions_processes))
5034 for process in permissions_processes:
5035 out, err, ret = command_wait(process)
5036 if ret:
5037 LOG.error("A background permissions process failed")
5038 LOG.error(err)
5039 raise Error("background failed")
5040
5041 # Fix SELinux labels
5042 if args.selinux:
5043 for directory, uid, gid, blocking, recursive in fix_table:
5044 # Skip directories/files that are not installed
5045 if not os.access(directory, os.F_OK):
5046 continue
5047
5048 if recursive:
5049 c = [
5050 'restorecon',
5051 '-R',
5052 directory
5053 ]
5054 else:
5055 c = [
5056 'restorecon',
5057 directory
5058 ]
5059
5060 if blocking:
5061 out, err, ret = command(c)
5062
5063 if ret:
5064 LOG.error("Failed to restore labels for " + directory)
5065 LOG.error(err)
5066 raise Error(directory + " relabel failed")
5067 else:
5068 selinux_processes.append(command_init(c))
5069
5070 LOG.debug("selinux_processes: " + str(selinux_processes))
5071 for process in selinux_processes:
5072 out, err, ret = command_wait(process)
5073 if ret:
5074 LOG.error("A background selinux process failed")
5075 LOG.error(err)
5076 raise Error("background failed")
5077
5078 LOG.info(
5079 "The ceph files has been fixed, please reboot "
5080 "the system for the changes to take effect."
5081 )
5082
5083
5084 def setup_statedir(dir):
5085 # XXX The following use of globals makes linting
5086 # really hard. Global state in Python is iffy and
5087 # should be avoided.
5088 global STATEDIR
5089 STATEDIR = dir
5090
5091 if not os.path.exists(STATEDIR):
5092 os.mkdir(STATEDIR)
5093 if not os.path.exists(STATEDIR + "/tmp"):
5094 os.mkdir(STATEDIR + "/tmp")
5095
5096 global prepare_lock
5097 prepare_lock = FileLock(STATEDIR + '/tmp/ceph-disk.prepare.lock')
5098
5099 global activate_lock
5100 activate_lock = FileLock(STATEDIR + '/tmp/ceph-disk.activate.lock')
5101
5102 global SUPPRESS_PREFIX
5103 SUPPRESS_PREFIX = STATEDIR + '/tmp/suppress-activate.'
5104
5105
5106 def setup_sysconfdir(dir):
5107 global SYSCONFDIR
5108 SYSCONFDIR = dir
5109
5110
5111 def parse_args(argv):
5112 parser = argparse.ArgumentParser(
5113 'ceph-disk',
5114 )
5115 parser.add_argument(
5116 '-v', '--verbose',
5117 action='store_true', default=None,
5118 help='be more verbose',
5119 )
5120 parser.add_argument(
5121 '--log-stdout',
5122 action='store_true', default=None,
5123 help='log to stdout',
5124 )
5125 parser.add_argument(
5126 '--prepend-to-path',
5127 metavar='PATH',
5128 default='/usr/bin',
5129 help=('prepend PATH to $PATH for backward compatibility '
5130 '(default /usr/bin)'),
5131 )
5132 parser.add_argument(
5133 '--statedir',
5134 metavar='PATH',
5135 default='/var/lib/ceph',
5136 help=('directory in which ceph state is preserved '
5137 '(default /var/lib/ceph)'),
5138 )
5139 parser.add_argument(
5140 '--sysconfdir',
5141 metavar='PATH',
5142 default='/etc/ceph',
5143 help=('directory in which ceph configuration files are found '
5144 '(default /etc/ceph)'),
5145 )
5146 parser.add_argument(
5147 '--setuser',
5148 metavar='USER',
5149 default=None,
5150 help='use the given user for subprocesses, rather than ceph or root'
5151 )
5152 parser.add_argument(
5153 '--setgroup',
5154 metavar='GROUP',
5155 default=None,
5156 help='use the given group for subprocesses, rather than ceph or root'
5157 )
5158 parser.set_defaults(
5159 # we want to hold on to this, for later
5160 prog=parser.prog,
5161 )
5162
5163 subparsers = parser.add_subparsers(
5164 title='subcommands',
5165 description='valid subcommands',
5166 help='sub-command help',
5167 )
5168
5169 Prepare.set_subparser(subparsers)
5170 make_activate_parser(subparsers)
5171 make_activate_lockbox_parser(subparsers)
5172 make_activate_block_parser(subparsers)
5173 make_activate_journal_parser(subparsers)
5174 make_activate_all_parser(subparsers)
5175 make_list_parser(subparsers)
5176 make_suppress_parser(subparsers)
5177 make_deactivate_parser(subparsers)
5178 make_destroy_parser(subparsers)
5179 make_zap_parser(subparsers)
5180 make_trigger_parser(subparsers)
5181 make_fix_parser(subparsers)
5182
5183 args = parser.parse_args(argv)
5184 return args
5185
5186
5187 def make_fix_parser(subparsers):
5188 fix_parser = subparsers.add_parser(
5189 'fix',
5190 formatter_class=argparse.RawDescriptionHelpFormatter,
5191 description=textwrap.fill(textwrap.dedent("""\
5192 """)),
5193 help='fix SELinux labels and/or file permissions')
5194
5195 fix_parser.add_argument(
5196 '--system',
5197 action='store_true',
5198 default=False,
5199 help='fix SELinux labels for the non-ceph system data'
5200 )
5201 fix_parser.add_argument(
5202 '--selinux',
5203 action='store_true',
5204 default=False,
5205 help='fix SELinux labels for ceph data'
5206 )
5207 fix_parser.add_argument(
5208 '--permissions',
5209 action='store_true',
5210 default=False,
5211 help='fix file permissions for ceph data'
5212 )
5213 fix_parser.add_argument(
5214 '--all',
5215 action='store_true',
5216 default=False,
5217 help='perform all the fix-related operations'
5218 )
5219 fix_parser.set_defaults(
5220 func=main_fix,
5221 )
5222 return fix_parser
5223
5224
5225 def make_trigger_parser(subparsers):
5226 trigger_parser = subparsers.add_parser(
5227 'trigger',
5228 formatter_class=argparse.RawDescriptionHelpFormatter,
5229 description=textwrap.fill(textwrap.dedent("""\
5230 The partition given in argument is activated. The type of the
5231 partition (data, lockbox, journal etc.) is detected by its
5232 type. If the init system is upstart or systemd, the activation is
5233 delegated to it and runs asynchronously, which
5234 helps reduce the execution time of udev actions.
5235 """)),
5236 help='activate any device (called by udev)')
5237 trigger_parser.add_argument(
5238 'dev',
5239 help=('device'),
5240 )
5241 trigger_parser.add_argument(
5242 '--cluster',
5243 metavar='NAME',
5244 default='ceph',
5245 help='cluster name to assign this disk to',
5246 )
5247 trigger_parser.add_argument(
5248 '--dmcrypt',
5249 action='store_true', default=None,
5250 help='map devices with dm-crypt',
5251 )
5252 trigger_parser.add_argument(
5253 '--dmcrypt-key-dir',
5254 metavar='KEYDIR',
5255 default='/etc/ceph/dmcrypt-keys',
5256 help='directory where dm-crypt keys are stored',
5257 )
5258 trigger_parser.add_argument(
5259 '--sync',
5260 action='store_true', default=None,
5261 help='do operation synchronously; do not trigger systemd',
5262 )
5263 trigger_parser.set_defaults(
5264 func=main_trigger,
5265 )
5266 return trigger_parser
5267
5268
5269 def make_activate_parser(subparsers):
5270 activate_parser = subparsers.add_parser(
5271 'activate',
5272 formatter_class=argparse.RawDescriptionHelpFormatter,
5273 description=textwrap.fill(textwrap.dedent("""\
5274 Activate the OSD found at PATH (can be a directory
5275 or a device partition, possibly encrypted). When
5276 activated for the first time, a unique OSD id is obtained
5277 from the cluster. If PATH is a directory, a symbolic
5278 link is added in {statedir}/osd/ceph-$id. If PATH is
5279 a partition, it is mounted on {statedir}/osd/ceph-$id.
5280 Finally, the OSD daemon is run.
5281
5282 If the OSD depends on auxiliary partitions (journal, block, ...)
5283 they need to be available otherwise activation will fail. It
5284 may happen if a journal is encrypted and cryptsetup was not
5285 run yet.
5286 """.format(statedir=STATEDIR))),
5287 help='Activate a Ceph OSD')
5288 activate_parser.add_argument(
5289 '--mount',
5290 action='store_true', default=None,
5291 help='mount a block device [deprecated, ignored]',
5292 )
5293 activate_parser.add_argument(
5294 '--activate-key',
5295 metavar='PATH',
5296 help='bootstrap-osd keyring path template (%(default)s)',
5297 dest='activate_key_template',
5298 )
5299 activate_parser.add_argument(
5300 '--mark-init',
5301 metavar='INITSYSTEM',
5302 help='init system to manage this dir',
5303 default='auto',
5304 choices=INIT_SYSTEMS,
5305 )
5306 activate_parser.add_argument(
5307 '--no-start-daemon',
5308 action='store_true', default=None,
5309 help='do not start the daemon',
5310 )
5311 activate_parser.add_argument(
5312 'path',
5313 metavar='PATH',
5314 help='path to block device or directory',
5315 )
5316 activate_parser.add_argument(
5317 '--dmcrypt',
5318 action='store_true', default=None,
5319 help='map DATA and/or JOURNAL devices with dm-crypt',
5320 )
5321 activate_parser.add_argument(
5322 '--dmcrypt-key-dir',
5323 metavar='KEYDIR',
5324 default='/etc/ceph/dmcrypt-keys',
5325 help='directory where dm-crypt keys are stored',
5326 )
5327 activate_parser.add_argument(
5328 '--reactivate',
5329 action='store_true', default=False,
5330 help='activate the deactived OSD',
5331 )
5332 activate_parser.set_defaults(
5333 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5334 func=main_activate,
5335 )
5336 return activate_parser
5337
5338
5339 def make_activate_lockbox_parser(subparsers):
5340 parser = subparsers.add_parser(
5341 'activate-lockbox',
5342 formatter_class=argparse.RawDescriptionHelpFormatter,
5343 description=textwrap.fill(textwrap.dedent("""\
5344 Mount the partition found at PATH on {statedir}/osd-lockbox/$uuid
5345 where $uuid uniquely identifies the OSD that needs this lockbox
5346 to retrieve keys from the monitor and unlock its partitions.
5347
5348 If the OSD has one or more auxiliary devices (journal, block, ...)
5349 symbolic links are created at {statedir}/osd-lockbox/$other_uuid
5350 and point to {statedir}/osd-lockbox/$uuid. This will, for instance,
5351 allow a journal encrypted in a partition identified by $other_uuid to
5352 fetch the keys it needs from the monitor.
5353
5354 Finally the OSD is activated, as it would be with ceph-disk activate.
5355 """.format(statedir=STATEDIR))),
5356 help='Activate a Ceph lockbox')
5357 parser.add_argument(
5358 '--activate-key',
5359 help='bootstrap-osd keyring path template (%(default)s)',
5360 dest='activate_key_template',
5361 )
5362 parser.add_argument(
5363 '--dmcrypt-key-dir',
5364 metavar='KEYDIR',
5365 default='/etc/ceph/dmcrypt-keys',
5366 help='directory where dm-crypt keys are stored',
5367 )
5368 parser.add_argument(
5369 'path',
5370 metavar='PATH',
5371 help='path to block device',
5372 )
5373 parser.set_defaults(
5374 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5375 func=main_activate_lockbox,
5376 )
5377 return parser
5378
5379
5380 def make_activate_block_parser(subparsers):
5381 return make_activate_space_parser('block', subparsers)
5382
5383
5384 def make_activate_journal_parser(subparsers):
5385 return make_activate_space_parser('journal', subparsers)
5386
5387
5388 def make_activate_space_parser(name, subparsers):
5389 activate_space_parser = subparsers.add_parser(
5390 'activate-%s' % name,
5391 formatter_class=argparse.RawDescriptionHelpFormatter,
5392 description=textwrap.fill(textwrap.dedent("""\
5393 Activating a {name} partition is only meaningfull
5394 if it is encrypted and it will map it using
5395 cryptsetup.
5396
5397 Finally the corresponding OSD is activated,
5398 as it would be with ceph-disk activate.
5399 """.format(name=name))),
5400 help='Activate an OSD via its %s device' % name)
5401 activate_space_parser.add_argument(
5402 'dev',
5403 metavar='DEV',
5404 help='path to %s block device' % name,
5405 )
5406 activate_space_parser.add_argument(
5407 '--activate-key',
5408 metavar='PATH',
5409 help='bootstrap-osd keyring path template (%(default)s)',
5410 dest='activate_key_template',
5411 )
5412 activate_space_parser.add_argument(
5413 '--mark-init',
5414 metavar='INITSYSTEM',
5415 help='init system to manage this dir',
5416 default='auto',
5417 choices=INIT_SYSTEMS,
5418 )
5419 activate_space_parser.add_argument(
5420 '--dmcrypt',
5421 action='store_true', default=None,
5422 help=('map data and/or auxiliariy (journal, etc.) '
5423 'devices with dm-crypt'),
5424 )
5425 activate_space_parser.add_argument(
5426 '--dmcrypt-key-dir',
5427 metavar='KEYDIR',
5428 default='/etc/ceph/dmcrypt-keys',
5429 help='directory where dm-crypt keys are stored',
5430 )
5431 activate_space_parser.add_argument(
5432 '--reactivate',
5433 action='store_true', default=False,
5434 help='activate the deactived OSD',
5435 )
5436 activate_space_parser.set_defaults(
5437 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5438 func=lambda args: main_activate_space(name, args),
5439 )
5440 return activate_space_parser
5441
5442
5443 def make_activate_all_parser(subparsers):
5444 activate_all_parser = subparsers.add_parser(
5445 'activate-all',
5446 formatter_class=argparse.RawDescriptionHelpFormatter,
5447 description=textwrap.fill(textwrap.dedent("""\
5448 Activate all OSD partitions found in /dev/disk/by-parttypeuuid.
5449 The partitions containing auxiliary devices (journal, block, ...)
5450 are not activated.
5451 """)),
5452 help='Activate all tagged OSD partitions')
5453 activate_all_parser.add_argument(
5454 '--activate-key',
5455 metavar='PATH',
5456 help='bootstrap-osd keyring path template (%(default)s)',
5457 dest='activate_key_template',
5458 )
5459 activate_all_parser.add_argument(
5460 '--mark-init',
5461 metavar='INITSYSTEM',
5462 help='init system to manage this dir',
5463 default='auto',
5464 choices=INIT_SYSTEMS,
5465 )
5466 activate_all_parser.set_defaults(
5467 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5468 func=main_activate_all,
5469 )
5470 return activate_all_parser
5471
5472
5473 def make_list_parser(subparsers):
5474 list_parser = subparsers.add_parser(
5475 'list',
5476 formatter_class=argparse.RawDescriptionHelpFormatter,
5477 description=textwrap.fill(textwrap.dedent("""\
5478 Display all partitions on the system and their
5479 associated Ceph information, if any.
5480 """)),
5481 help='List disks, partitions, and Ceph OSDs')
5482 list_parser.add_argument(
5483 '--format',
5484 help='output format',
5485 default='plain',
5486 choices=['json', 'plain'],
5487 )
5488 list_parser.add_argument(
5489 'path',
5490 metavar='PATH',
5491 nargs='*',
5492 help='path to block devices, relative to /sys/block',
5493 )
5494 list_parser.set_defaults(
5495 func=main_list,
5496 )
5497 return list_parser
5498
5499
5500 def make_suppress_parser(subparsers):
5501 suppress_parser = subparsers.add_parser(
5502 'suppress-activate',
5503 formatter_class=argparse.RawDescriptionHelpFormatter,
5504 description=textwrap.fill(textwrap.dedent("""\
5505 Add a prefix to the list of suppressed device names
5506 so that they are ignored by all activate* subcommands.
5507 """)),
5508 help='Suppress activate on a device (prefix)')
5509 suppress_parser.add_argument(
5510 'path',
5511 metavar='PATH',
5512 help='path to block device or directory',
5513 )
5514 suppress_parser.set_defaults(
5515 func=main_suppress,
5516 )
5517
5518 unsuppress_parser = subparsers.add_parser(
5519 'unsuppress-activate',
5520 formatter_class=argparse.RawDescriptionHelpFormatter,
5521 description=textwrap.fill(textwrap.dedent("""\
5522 Remove a prefix from the list of suppressed device names
5523 so that they are no longer ignored by all
5524 activate* subcommands.
5525 """)),
5526 help='Stop suppressing activate on a device (prefix)')
5527 unsuppress_parser.add_argument(
5528 'path',
5529 metavar='PATH',
5530 help='path to block device or directory',
5531 )
5532 unsuppress_parser.set_defaults(
5533 func=main_unsuppress,
5534 )
5535 return suppress_parser
5536
5537
5538 def make_deactivate_parser(subparsers):
5539 deactivate_parser = subparsers.add_parser(
5540 'deactivate',
5541 formatter_class=argparse.RawDescriptionHelpFormatter,
5542 description=textwrap.fill(textwrap.dedent("""\
5543 Deactivate the OSD located at PATH. It stops the OSD daemon
5544 and optionally marks it out (with --mark-out). The content of
5545 the OSD is left untouched.
5546
5547 By default, the, ready, active, INIT-specific files are
5548 removed (so that it is not automatically re-activated by the
5549 udev rules or ceph-disk trigger) and the file deactive is
5550 created to remember the OSD is deactivated.
5551
5552 If the --once option is given, the ready, active, INIT-specific
5553 files are not removed and the OSD will reactivate whenever
5554 ceph-disk trigger is run on one of the devices (journal, data,
5555 block, lockbox, ...).
5556
5557 If the OSD is dmcrypt, remove the data dmcrypt map. When
5558 deactivate finishes, the OSD is down.
5559 """)),
5560 help='Deactivate a Ceph OSD')
5561 deactivate_parser.add_argument(
5562 '--cluster',
5563 metavar='NAME',
5564 default='ceph',
5565 help='cluster name to assign this disk to',
5566 )
5567 deactivate_parser.add_argument(
5568 'path',
5569 metavar='PATH',
5570 nargs='?',
5571 help='path to block device or directory',
5572 )
5573 deactivate_parser.add_argument(
5574 '--deactivate-by-id',
5575 metavar='<id>',
5576 help='ID of OSD to deactive'
5577 )
5578 deactivate_parser.add_argument(
5579 '--mark-out',
5580 action='store_true', default=False,
5581 help='option to mark the osd out',
5582 )
5583 deactivate_parser.add_argument(
5584 '--once',
5585 action='store_true', default=False,
5586 help='does not need --reactivate to activate again',
5587 )
5588 deactivate_parser.set_defaults(
5589 func=main_deactivate,
5590 )
5591
5592
5593 def make_destroy_parser(subparsers):
5594 destroy_parser = subparsers.add_parser(
5595 'destroy',
5596 formatter_class=argparse.RawDescriptionHelpFormatter,
5597 description=textwrap.fill(textwrap.dedent("""\
5598 Destroy the OSD located at PATH.
5599 It removes the OSD from the cluster, the crushmap and
5600 deallocates the OSD id. An OSD must be down before it
5601 can be destroyed.
5602 """)),
5603 help='Destroy a Ceph OSD')
5604 destroy_parser.add_argument(
5605 '--cluster',
5606 metavar='NAME',
5607 default='ceph',
5608 help='cluster name to assign this disk to',
5609 )
5610 destroy_parser.add_argument(
5611 'path',
5612 metavar='PATH',
5613 nargs='?',
5614 help='path to block device or directory',
5615 )
5616 destroy_parser.add_argument(
5617 '--destroy-by-id',
5618 metavar='<id>',
5619 help='ID of OSD to destroy'
5620 )
5621 destroy_parser.add_argument(
5622 '--dmcrypt-key-dir',
5623 metavar='KEYDIR',
5624 default='/etc/ceph/dmcrypt-keys',
5625 help=('directory where dm-crypt keys are stored '
5626 '(If you don\'t know how it work, '
5627 'dont use it. we have default value)'),
5628 )
5629 destroy_parser.add_argument(
5630 '--zap',
5631 action='store_true', default=False,
5632 help='option to erase data and partition',
5633 )
5634 destroy_parser.set_defaults(
5635 func=main_destroy,
5636 )
5637
5638
5639 def make_zap_parser(subparsers):
5640 zap_parser = subparsers.add_parser(
5641 'zap',
5642 formatter_class=argparse.RawDescriptionHelpFormatter,
5643 description=textwrap.fill(textwrap.dedent("""\
5644 Zap/erase/destroy a device's partition table and contents. It
5645 actually uses sgdisk and it's option --zap-all to
5646 destroy both GPT and MBR data structures so that the disk
5647 becomes suitable for repartitioning.
5648 """)),
5649 help='Zap/erase/destroy a device\'s partition table (and contents)')
5650 zap_parser.add_argument(
5651 'dev',
5652 metavar='DEV',
5653 nargs='+',
5654 help='path to block device',
5655 )
5656 zap_parser.set_defaults(
5657 func=main_zap,
5658 )
5659 return zap_parser
5660
5661
5662 def main(argv):
5663 args = parse_args(argv)
5664
5665 setup_logging(args.verbose, args.log_stdout)
5666
5667 if args.prepend_to_path != '':
5668 path = os.environ.get('PATH', os.defpath)
5669 os.environ['PATH'] = args.prepend_to_path + ":" + path
5670
5671 if args.func.__name__ != 'main_trigger':
5672 # trigger may run when statedir is unavailable and does not use it
5673 setup_statedir(args.statedir)
5674 setup_sysconfdir(args.sysconfdir)
5675
5676 global CEPH_PREF_USER
5677 CEPH_PREF_USER = args.setuser
5678 global CEPH_PREF_GROUP
5679 CEPH_PREF_GROUP = args.setgroup
5680
5681 if args.verbose:
5682 args.func(args)
5683 else:
5684 main_catch(args.func, args)
5685
5686
5687 def setup_logging(verbose, log_stdout):
5688 loglevel = logging.WARNING
5689 if verbose:
5690 loglevel = logging.DEBUG
5691
5692 if log_stdout:
5693 ch = logging.StreamHandler(stream=sys.stdout)
5694 ch.setLevel(loglevel)
5695 formatter = logging.Formatter('%(funcName)s: %(message)s')
5696 ch.setFormatter(formatter)
5697 LOG.addHandler(ch)
5698 LOG.setLevel(loglevel)
5699 else:
5700 logging.basicConfig(
5701 level=loglevel,
5702 format='%(funcName)s: %(message)s',
5703 )
5704
5705
5706 def main_catch(func, args):
5707
5708 try:
5709 func(args)
5710
5711 except Error as e:
5712 raise SystemExit(
5713 '{prog}: {msg}'.format(
5714 prog=args.prog,
5715 msg=e,
5716 )
5717 )
5718
5719 except CephDiskException as error:
5720 exc_name = error.__class__.__name__
5721 raise SystemExit(
5722 '{prog} {exc_name}: {msg}'.format(
5723 prog=args.prog,
5724 exc_name=exc_name,
5725 msg=error,
5726 )
5727 )
5728
5729
5730 def run():
5731 main(sys.argv[1:])
5732
5733
5734 if __name__ == '__main__':
5735 main(sys.argv[1:])
5736 warned_about = {}