]> git.proxmox.com Git - ceph.git/blame - ceph/src/ceph-disk/ceph_disk/main.py
update sources to v12.2.4
[ceph.git] / ceph / src / ceph-disk / ceph_disk / main.py
CommitLineData
7c673cae
FG
1#!/usr/bin/env python
2#
c07f9fc5 3# Copyright (C) 2015, 2016, 2017 Red Hat <contact@redhat.com>
7c673cae
FG
4# Copyright (C) 2014 Inktank <info@inktank.com>
5# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
6# Copyright (C) 2014 Catalyst.net Ltd
7#
8# Author: Loic Dachary <loic@dachary.org>
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU Library Public License as published by
12# the Free Software Foundation; either version 2, or (at your option)
13# any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU Library Public License for more details.
19#
20
21from __future__ import print_function
22
23import argparse
24import base64
25import errno
26import fcntl
3efd9988 27import functools
7c673cae
FG
28import json
29import logging
30import os
31import platform
32import re
33import subprocess
34import stat
35import sys
36import tempfile
37import uuid
38import time
39import shlex
c07f9fc5 40import shutil
7c673cae
FG
41import pwd
42import grp
43import textwrap
44import glob
45
46CEPH_OSD_ONDISK_MAGIC = 'ceph osd volume v026'
47CEPH_LOCKBOX_ONDISK_MAGIC = 'ceph lockbox volume v001'
48
49KEY_MANAGEMENT_MODE_V1 = 'ceph-mon v1'
50
51PTYPE = {
52 'regular': {
53 'journal': {
54 # identical because creating a journal is atomic
55 'ready': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
56 'tobe': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
57 },
58 'block': {
59 # identical because creating a block is atomic
60 'ready': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
61 'tobe': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
62 },
63 'block.db': {
64 # identical because creating a block is atomic
65 'ready': '30cd0809-c2b2-499c-8879-2d6b78529876',
66 'tobe': '30cd0809-c2b2-499c-8879-2d6b785292be',
67 },
68 'block.wal': {
69 # identical because creating a block is atomic
70 'ready': '5ce17fce-4087-4169-b7ff-056cc58473f9',
71 'tobe': '5ce17fce-4087-4169-b7ff-056cc58472be',
72 },
73 'osd': {
74 'ready': '4fbd7e29-9d25-41b8-afd0-062c0ceff05d',
75 'tobe': '89c57f98-2fe5-4dc0-89c1-f3ad0ceff2be',
76 },
77 'lockbox': {
78 'ready': 'fb3aabf9-d25f-47cc-bf5e-721d1816496b',
79 'tobe': 'fb3aabf9-d25f-47cc-bf5e-721d181642be',
80 },
81 },
82 'luks': {
83 'journal': {
84 'ready': '45b0969e-9b03-4f30-b4c6-35865ceff106',
85 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
86 },
87 'block': {
88 'ready': 'cafecafe-9b03-4f30-b4c6-35865ceff106',
89 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
90 },
91 'block.db': {
92 'ready': '166418da-c469-4022-adf4-b30afd37f176',
93 'tobe': '7521c784-4626-4260-bc8d-ba77a0f5f2be',
94 },
95 'block.wal': {
96 'ready': '86a32090-3647-40b9-bbbd-38d8c573aa86',
97 'tobe': '92dad30f-175b-4d40-a5b0-5c0a258b42be',
98 },
99 'osd': {
100 'ready': '4fbd7e29-9d25-41b8-afd0-35865ceff05d',
101 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
102 },
103 },
104 'plain': {
105 'journal': {
106 'ready': '45b0969e-9b03-4f30-b4c6-5ec00ceff106',
107 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
108 },
109 'block': {
110 'ready': 'cafecafe-9b03-4f30-b4c6-5ec00ceff106',
111 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
112 },
113 'block.db': {
114 'ready': '93b0052d-02d9-4d8a-a43b-33a3ee4dfbc3',
115 'tobe': '69d17c68-3e58-4399-aff0-b68265f2e2be',
116 },
117 'block.wal': {
118 'ready': '306e8683-4fe2-4330-b7c0-00a917c16966',
119 'tobe': 'f2d89683-a621-4063-964a-eb1f7863a2be',
120 },
121 'osd': {
122 'ready': '4fbd7e29-9d25-41b8-afd0-5ec00ceff05d',
123 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
124 },
125 },
126 'mpath': {
127 'journal': {
128 'ready': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
129 'tobe': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
130 },
131 'block': {
132 'ready': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
133 'tobe': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
134 },
135 'block.db': {
136 'ready': 'ec6d6385-e346-45dc-be91-da2a7c8b3261',
137 'tobe': 'ec6d6385-e346-45dc-be91-da2a7c8b32be',
138 },
139 'block.wal': {
140 'ready': '01b41e1b-002a-453c-9f17-88793989ff8f',
141 'tobe': '01b41e1b-002a-453c-9f17-88793989f2be',
142 },
143 'osd': {
144 'ready': '4fbd7e29-8ae0-4982-bf9d-5a8d867af560',
145 'tobe': '89c57f98-8ae0-4982-bf9d-5a8d867af560',
146 },
147 'lockbox': {
148 'ready': '7f4a666a-16f3-47a2-8445-152ef4d03f6c',
149 'tobe': '7f4a666a-16f3-47a2-8445-152ef4d032be',
150 },
151 },
152}
153
b32b8144
FG
154try:
155 # see https://bugs.python.org/issue23098
156 os.major(0x80002b00)
157except OverflowError:
158 os.major = lambda devid: ((devid >> 8) & 0xfff) | ((devid >> 32) & ~0xfff)
159 os.minor = lambda devid: (devid & 0xff) | ((devid >> 12) & ~0xff)
160
7c673cae
FG
161
162class Ptype(object):
163
164 @staticmethod
165 def get_ready_by_type(what):
166 return [x['ready'] for x in PTYPE[what].values()]
167
168 @staticmethod
169 def get_ready_by_name(name):
170 return [x[name]['ready'] for x in PTYPE.values() if name in x]
171
172 @staticmethod
173 def is_regular_space(ptype):
174 return Ptype.is_what_space('regular', ptype)
175
176 @staticmethod
177 def is_mpath_space(ptype):
178 return Ptype.is_what_space('mpath', ptype)
179
180 @staticmethod
181 def is_plain_space(ptype):
182 return Ptype.is_what_space('plain', ptype)
183
184 @staticmethod
185 def is_luks_space(ptype):
186 return Ptype.is_what_space('luks', ptype)
187
188 @staticmethod
189 def is_what_space(what, ptype):
190 for name in Space.NAMES:
191 if ptype == PTYPE[what][name]['ready']:
192 return True
193 return False
194
195 @staticmethod
196 def space_ptype_to_name(ptype):
197 for what in PTYPE.values():
198 for name in Space.NAMES:
199 if ptype == what[name]['ready']:
200 return name
201 raise ValueError('ptype ' + ptype + ' not found')
202
203 @staticmethod
204 def is_dmcrypt_space(ptype):
205 for name in Space.NAMES:
206 if Ptype.is_dmcrypt(ptype, name):
207 return True
208 return False
209
210 @staticmethod
211 def is_dmcrypt(ptype, name):
212 for what in ('plain', 'luks'):
213 if ptype == PTYPE[what][name]['ready']:
214 return True
215 return False
216
217
218SYSFS = '/sys'
219
220if platform.system() == 'FreeBSD':
221 FREEBSD = True
222 DEFAULT_FS_TYPE = 'zfs'
223 PROCDIR = '/compat/linux/proc'
224 # FreeBSD does not have blockdevices any more
225 BLOCKDIR = '/dev'
c07f9fc5 226 ROOTGROUP = 'wheel'
7c673cae
FG
227else:
228 FREEBSD = False
229 DEFAULT_FS_TYPE = 'xfs'
230 PROCDIR = '/proc'
231 BLOCKDIR = '/sys/block'
c07f9fc5 232 ROOTGROUP = 'root'
7c673cae
FG
233
234"""
235OSD STATUS Definition
236"""
237OSD_STATUS_OUT_DOWN = 0
238OSD_STATUS_OUT_UP = 1
239OSD_STATUS_IN_DOWN = 2
240OSD_STATUS_IN_UP = 3
241
242MOUNT_OPTIONS = dict(
243 btrfs='noatime,user_subvol_rm_allowed',
244 # user_xattr is default ever since linux 2.6.39 / 3.0, but we'll
245 # delay a moment before removing it fully because we did have some
246 # issues with ext4 before the xatts-in-leveldb work, and it seemed
247 # that user_xattr helped
248 ext4='noatime,user_xattr',
249 xfs='noatime,inode64',
250)
251
252MKFS_ARGS = dict(
253 btrfs=[
254 # btrfs requires -f, for the same reason as xfs (see comment below)
255 '-f',
256 '-m', 'single',
257 '-l', '32768',
258 '-n', '32768',
259 ],
260 xfs=[
261 # xfs insists on not overwriting previous fs; even if we wipe
262 # partition table, we often recreate it exactly the same way,
263 # so we'll see ghosts of filesystems past
264 '-f',
265 '-i', 'size=2048',
266 ],
267 zfs=[
268 '-o', 'atime=off'
269 ],
270)
271
272INIT_SYSTEMS = [
273 'upstart',
274 'sysvinit',
275 'systemd',
276 'openrc',
277 'bsdrc',
278 'auto',
279 'none',
280]
281
282STATEDIR = '/var/lib/ceph'
283
284SYSCONFDIR = '/etc/ceph'
285
286prepare_lock = None
287activate_lock = None
288SUPPRESS_PREFIX = None
289
290# only warn once about some things
291warned_about = {}
292
293# Nuke the TERM variable to avoid confusing any subprocesses we call.
294# For example, libreadline will print weird control sequences for some
295# TERM values.
296if 'TERM' in os.environ:
297 del os.environ['TERM']
298
299LOG_NAME = __name__
300if LOG_NAME == '__main__':
301 LOG_NAME = os.path.basename(sys.argv[0])
302LOG = logging.getLogger(LOG_NAME)
303
304# Allow user-preferred values for subprocess user and group
305CEPH_PREF_USER = None
306CEPH_PREF_GROUP = None
307
308
309class FileLock(object):
310 def __init__(self, fn):
311 self.fn = fn
312 self.fd = None
313
314 def __enter__(self):
315 assert not self.fd
316 self.fd = os.open(self.fn, os.O_WRONLY | os.O_CREAT)
317 fcntl.lockf(self.fd, fcntl.LOCK_EX)
318
319 def __exit__(self, exc_type, exc_val, exc_tb):
320 assert self.fd
321 fcntl.lockf(self.fd, fcntl.LOCK_UN)
322 os.close(self.fd)
323 self.fd = None
324
325
326class Error(Exception):
327 """
328 Error
329 """
330
331 def __str__(self):
332 doc = _bytes2str(self.__doc__.strip())
333 try:
334 str_type = basestring
335 except NameError:
336 str_type = str
337 args = [a if isinstance(a, str_type) else str(a) for a in self.args]
338 return ': '.join([doc] + [_bytes2str(a) for a in args])
339
340
341class MountError(Error):
342 """
343 Mounting filesystem failed
344 """
345
346
347class UnmountError(Error):
348 """
349 Unmounting filesystem failed
350 """
351
352
353class BadMagicError(Error):
354 """
355 Does not look like a Ceph OSD, or incompatible version
356 """
357
358
359class TruncatedLineError(Error):
360 """
361 Line is truncated
362 """
363
364
365class TooManyLinesError(Error):
366 """
367 Too many lines
368 """
369
370
371class FilesystemTypeError(Error):
372 """
373 Cannot discover filesystem type
374 """
375
376
377class CephDiskException(Exception):
378 """
379 A base exception for ceph-disk to provide custom (ad-hoc) messages that
380 will be caught and dealt with when main() is executed
381 """
382 pass
383
384
385class ExecutableNotFound(CephDiskException):
386 """
387 Exception to report on executables not available in PATH
388 """
389 pass
390
391
392def is_systemd():
393 """
394 Detect whether systemd is running
395 """
396 with open(PROCDIR + '/1/comm', 'r') as f:
397 return 'systemd' in f.read()
398
399
400def is_upstart():
401 """
402 Detect whether upstart is running
403 """
404 (out, err, _) = command(['init', '--version'])
405 return 'upstart' in out
406
407
408def maybe_mkdir(*a, **kw):
409 """
410 Creates a new directory if it doesn't exist, removes
411 existing symlink before creating the directory.
412 """
413 # remove any symlink, if it is there..
414 if os.path.exists(*a) and stat.S_ISLNK(os.lstat(*a).st_mode):
415 LOG.debug('Removing old symlink at %s', *a)
416 os.unlink(*a)
417 try:
418 os.mkdir(*a, **kw)
419 except OSError as e:
420 if e.errno == errno.EEXIST:
421 pass
422 else:
423 raise
424
425
426def which(executable):
427 """find the location of an executable"""
428 envpath = os.environ.get('PATH') or os.defpath
429 PATH = envpath.split(os.pathsep)
430
431 locations = PATH + [
432 '/usr/local/bin',
433 '/bin',
434 '/usr/bin',
435 '/usr/local/sbin',
436 '/usr/sbin',
437 '/sbin',
438 ]
439
440 for location in locations:
441 executable_path = os.path.join(location, executable)
442 if (os.path.isfile(executable_path) and
443 os.access(executable_path, os.X_OK)):
444 return executable_path
445
446
447def _get_command_executable(arguments):
448 """
449 Return the full path for an executable, raise if the executable is not
450 found. If the executable has already a full path do not perform any checks.
451 """
452 if os.path.isabs(arguments[0]): # an absolute path
453 return arguments
454 executable = which(arguments[0])
455 if not executable:
456 command_msg = 'Could not run command: %s' % ' '.join(arguments)
457 executable_msg = '%s not in path.' % arguments[0]
458 raise ExecutableNotFound('%s %s' % (executable_msg, command_msg))
459
460 # swap the old executable for the new one
461 arguments[0] = executable
462 return arguments
463
464
465def command(arguments, **kwargs):
466 """
467 Safely execute a ``subprocess.Popen`` call making sure that the
468 executable exists and raising a helpful error message
469 if it does not.
470
471 .. note:: This should be the preferred way of calling ``subprocess.Popen``
472 since it provides the caller with the safety net of making sure that
473 executables *will* be found and will error nicely otherwise.
474
475 This returns the output of the command and the return code of the
476 process in a tuple: (stdout, stderr, returncode).
477 """
478
479 arguments = list(map(_bytes2str, _get_command_executable(arguments)))
480
481 LOG.info('Running command: %s' % ' '.join(arguments))
482 process = subprocess.Popen(
483 arguments,
484 stdout=subprocess.PIPE,
485 stderr=subprocess.PIPE,
486 **kwargs)
487 out, err = process.communicate()
488
489 return _bytes2str(out), _bytes2str(err), process.returncode
490
491
c07f9fc5
FG
492def command_with_stdin(arguments, stdin):
493 LOG.info("Running command with stdin: " + " ".join(arguments))
494 process = subprocess.Popen(
495 arguments,
496 stdin=subprocess.PIPE,
497 stdout=subprocess.PIPE,
498 stderr=subprocess.PIPE)
499 out, err = process.communicate(stdin)
500 LOG.debug(out)
501 if process.returncode != 0:
502 LOG.error(err)
503 raise SystemExit(
504 "'{cmd}' failed with status code {returncode}".format(
505 cmd=arguments,
506 returncode=process.returncode,
507 )
508 )
509 return out
510
511
7c673cae
FG
512def _bytes2str(string):
513 return string.decode('utf-8') if isinstance(string, bytes) else string
514
515
516def command_init(arguments, **kwargs):
517 """
518 Safely execute a non-blocking ``subprocess.Popen`` call
519 making sure that the executable exists and raising a helpful
520 error message if it does not.
521
522 .. note:: This should be the preferred way of calling ``subprocess.Popen``
523 since it provides the caller with the safety net of making sure that
524 executables *will* be found and will error nicely otherwise.
525
526 This returns the process.
527 """
528
529 arguments = list(map(_bytes2str, _get_command_executable(arguments)))
530
531 LOG.info('Running command: %s' % ' '.join(arguments))
532 process = subprocess.Popen(
533 arguments,
534 stdout=subprocess.PIPE,
535 stderr=subprocess.PIPE,
536 **kwargs)
537 return process
538
539
540def command_wait(process):
541 """
542 Wait for the process finish and parse its output.
543 """
544
545 out, err = process.communicate()
546
547 return _bytes2str(out), _bytes2str(err), process.returncode
548
549
550def command_check_call(arguments, exit=False):
551 """
552 Safely execute a ``subprocess.check_call`` call making sure that the
553 executable exists and raising a helpful error message if it does not.
554
555 When ``exit`` is set to ``True`` this helper will do a clean (sans
556 traceback) system exit.
557 .. note:: This should be the preferred way of calling
558 ``subprocess.check_call`` since it provides the caller with the safety net
559 of making sure that executables *will* be found and will error nicely
560 otherwise.
561 """
562 arguments = _get_command_executable(arguments)
563 command = ' '.join(arguments)
564 LOG.info('Running command: %s', command)
565 try:
566 return subprocess.check_call(arguments)
567 except subprocess.CalledProcessError as error:
568 if exit:
569 if error.output:
570 LOG.error(error.output)
571 raise SystemExit(
572 "'{cmd}' failed with status code {returncode}".format(
573 cmd=command,
574 returncode=error.returncode,
575 )
576 )
577 raise
578
579
7c673cae
FG
580#
581# An alternative block_path implementation would be
582#
583# name = basename(dev)
584# return /sys/devices/virtual/block/$name
585#
586# It is however more fragile because it relies on the fact
587# that the basename of the device the user will use always
588# matches the one the driver will use. On Ubuntu 14.04, for
589# instance, when multipath creates a partition table on
590#
591# /dev/mapper/353333330000007d0 -> ../dm-0
592#
593# it will create partition devices named
594#
595# /dev/mapper/353333330000007d0-part1
596#
597# which is the same device as /dev/dm-1 but not a symbolic
598# link to it:
599#
600# ubuntu@other:~$ ls -l /dev/mapper /dev/dm-1
601# brw-rw---- 1 root disk 252, 1 Aug 15 17:52 /dev/dm-1
602# lrwxrwxrwx 1 root root 7 Aug 15 17:52 353333330000007d0 -> ../dm-0
603# brw-rw---- 1 root disk 252, 1 Aug 15 17:52 353333330000007d0-part1
604#
605# Using the basename in this case fails.
606#
607
608
609def block_path(dev):
610 if FREEBSD:
611 return dev
612 path = os.path.realpath(dev)
613 rdev = os.stat(path).st_rdev
614 (M, m) = (os.major(rdev), os.minor(rdev))
615 return "{sysfs}/dev/block/{M}:{m}".format(sysfs=SYSFS, M=M, m=m)
616
617
618def get_dm_uuid(dev):
619 uuid_path = os.path.join(block_path(dev), 'dm', 'uuid')
620 LOG.debug("get_dm_uuid " + dev + " uuid path is " + uuid_path)
621 if not os.path.exists(uuid_path):
622 return False
623 uuid = open(uuid_path, 'r').read()
624 LOG.debug("get_dm_uuid " + dev + " uuid is " + uuid)
625 return uuid
626
627
628def is_mpath(dev):
629 """
630 True if the path is managed by multipath
631 """
632 if FREEBSD:
633 return False
634 uuid = get_dm_uuid(dev)
635 return (uuid and
636 (re.match('part\d+-mpath-', uuid) or
637 re.match('mpath-', uuid)))
638
639
640def get_dev_name(path):
641 """
642 get device name from path. e.g.::
643
644 /dev/sda -> sda, /dev/cciss/c0d1 -> cciss!c0d1
645
646 a device "name" is something like::
647
648 sdb
649 cciss!c0d1
650
651 """
652 assert path.startswith('/dev/')
653 base = path[5:]
654 return base.replace('/', '!')
655
656
657def get_dev_path(name):
658 """
659 get a path (/dev/...) from a name (cciss!c0d1)
660 a device "path" is something like::
661
662 /dev/sdb
663 /dev/cciss/c0d1
664
665 """
666 return '/dev/' + name.replace('!', '/')
667
668
669def get_dev_relpath(name):
670 """
671 get a relative path to /dev from a name (cciss!c0d1)
672 """
673 return name.replace('!', '/')
674
675
676def get_dev_size(dev, size='megabytes'):
677 """
678 Attempt to get the size of a device so that we can prevent errors
679 from actions to devices that are smaller, and improve error reporting.
680
681 Because we want to avoid breakage in case this approach is not robust, we
682 will issue a warning if we failed to get the size.
683
684 :param size: bytes or megabytes
685 :param dev: the device to calculate the size
686 """
687 fd = os.open(dev, os.O_RDONLY)
688 dividers = {'bytes': 1, 'megabytes': 1024 * 1024}
689 try:
690 device_size = os.lseek(fd, 0, os.SEEK_END)
691 divider = dividers.get(size, 1024 * 1024) # default to megabytes
692 return device_size // divider
693 except Exception as error:
694 LOG.warning('failed to get size of %s: %s' % (dev, str(error)))
695 finally:
696 os.close(fd)
697
698
224ce89b
WB
699def stmode_is_diskdevice(dmode):
700 if stat.S_ISBLK(dmode):
701 return True
702 else:
703 # FreeBSD does not have block devices
704 # All disks are character devices
705 return FREEBSD and stat.S_ISCHR(dmode)
706
707
708def dev_is_diskdevice(dev):
709 dmode = os.stat(dev).st_mode
710 return stmode_is_diskdevice(dmode)
711
712
713def ldev_is_diskdevice(dev):
714 dmode = os.lstat(dev).st_mode
715 return stmode_is_diskdevice(dmode)
716
717
718def path_is_diskdevice(path):
719 dev = os.path.realpath(path)
720 return dev_is_diskdevice(dev)
721
722
7c673cae
FG
723def get_partition_mpath(dev, pnum):
724 part_re = "part{pnum}-mpath-".format(pnum=pnum)
725 partitions = list_partitions_mpath(dev, part_re)
726 if partitions:
727 return partitions[0]
728 else:
729 return None
730
731
3efd9988
FG
732def retry(on_error=Exception, max_tries=10, wait=0.2, backoff=0):
733 def wrapper(func):
734 @functools.wraps(func)
735 def repeat(*args, **kwargs):
736 for tries in range(max_tries - 1):
737 try:
738 return func(*args, **kwargs)
739 except on_error:
740 time.sleep(wait + backoff * tries)
741 return func(*args, **kwargs)
742 return repeat
743 return wrapper
744
745
746@retry(Error)
7c673cae
FG
747def get_partition_dev(dev, pnum):
748 """
749 get the device name for a partition
750
751 assume that partitions are named like the base dev,
752 with a number, and optionally
753 some intervening characters (like 'p'). e.g.,
754
755 sda 1 -> sda1
756 cciss/c0d1 1 -> cciss!c0d1p1
757 """
3efd9988
FG
758 partname = None
759 error_msg = ""
760 if is_mpath(dev):
761 partname = get_partition_mpath(dev, pnum)
762 else:
763 name = get_dev_name(os.path.realpath(dev))
764 sys_entry = os.path.join(BLOCKDIR, name)
765 error_msg = " in %s" % sys_entry
766 for f in os.listdir(sys_entry):
767 if f.startswith(name) and f.endswith(str(pnum)):
768 # we want the shortest name that starts with the base name
769 # and ends with the partition number
770 if not partname or len(f) < len(partname):
771 partname = f
772 if partname:
773 return get_dev_path(partname)
774 else:
775 raise Error('partition %d for %s does not appear to exist%s' %
776 (pnum, dev, error_msg))
7c673cae
FG
777
778
779def list_all_partitions():
780 """
781 Return a list of devices and partitions
782 """
783 if not FREEBSD:
784 names = os.listdir(BLOCKDIR)
785 dev_part_list = {}
786 for name in names:
787 # /dev/fd0 may hang http://tracker.ceph.com/issues/6827
788 if re.match(r'^fd\d$', name):
789 continue
790 dev_part_list[name] = list_partitions(get_dev_path(name))
791 else:
792 with open(os.path.join(PROCDIR, "partitions")) as partitions:
793 for line in partitions:
794 columns = line.split()
795 if len(columns) >= 4:
796 name = columns[3]
797 dev_part_list[name] = list_partitions(get_dev_path(name))
798 return dev_part_list
799
800
801def list_partitions(dev):
802 dev = os.path.realpath(dev)
803 if is_mpath(dev):
804 return list_partitions_mpath(dev)
805 else:
806 return list_partitions_device(dev)
807
808
809def list_partitions_mpath(dev, part_re="part\d+-mpath-"):
810 p = block_path(dev)
811 partitions = []
812 holders = os.path.join(p, 'holders')
813 for holder in os.listdir(holders):
814 uuid_path = os.path.join(holders, holder, 'dm', 'uuid')
815 uuid = open(uuid_path, 'r').read()
816 LOG.debug("list_partitions_mpath: " + uuid_path + " uuid = " + uuid)
817 if re.match(part_re, uuid):
818 partitions.append(holder)
819 return partitions
820
821
822def list_partitions_device(dev):
823 """
824 Return a list of partitions on the given device name
825 """
826 partitions = []
827 basename = get_dev_name(dev)
828 for name in os.listdir(block_path(dev)):
829 if name.startswith(basename):
830 partitions.append(name)
831 return partitions
832
833
834def get_partition_base(dev):
835 """
836 Get the base device for a partition
837 """
838 dev = os.path.realpath(dev)
224ce89b 839 if not ldev_is_diskdevice(dev):
7c673cae
FG
840 raise Error('not a block device', dev)
841
842 name = get_dev_name(dev)
843 if os.path.exists(os.path.join('/sys/block', name)):
844 raise Error('not a partition', dev)
845
846 # find the base
847 for basename in os.listdir('/sys/block'):
848 if os.path.exists(os.path.join('/sys/block', basename, name)):
849 return get_dev_path(basename)
850 raise Error('no parent device for partition', dev)
851
852
853def is_partition_mpath(dev):
854 uuid = get_dm_uuid(dev)
855 return bool(re.match('part\d+-mpath-', uuid))
856
857
858def partnum_mpath(dev):
859 uuid = get_dm_uuid(dev)
860 return re.findall('part(\d+)-mpath-', uuid)[0]
861
862
863def get_partition_base_mpath(dev):
864 slave_path = os.path.join(block_path(dev), 'slaves')
865 slaves = os.listdir(slave_path)
866 assert slaves
867 name_path = os.path.join(slave_path, slaves[0], 'dm', 'name')
868 name = open(name_path, 'r').read().strip()
869 return os.path.join('/dev/mapper', name)
870
871
872def is_partition(dev):
873 """
874 Check whether a given device path is a partition or a full disk.
875 """
876 if is_mpath(dev):
877 return is_partition_mpath(dev)
878
879 dev = os.path.realpath(dev)
880 st = os.lstat(dev)
224ce89b 881 if not stmode_is_diskdevice(st.st_mode):
7c673cae
FG
882 raise Error('not a block device', dev)
883
884 name = get_dev_name(dev)
885 if os.path.exists(os.path.join(BLOCKDIR, name)):
886 return False
887
888 # make sure it is a partition of something else
889 major = os.major(st.st_rdev)
890 minor = os.minor(st.st_rdev)
891 if os.path.exists('/sys/dev/block/%d:%d/partition' % (major, minor)):
892 return True
893
894 raise Error('not a disk or partition', dev)
895
896
897def is_mounted(dev):
898 """
899 Check if the given device is mounted.
900 """
901 dev = os.path.realpath(dev)
902 with open(PROCDIR + '/mounts', 'rb') as proc_mounts:
903 for line in proc_mounts:
904 fields = line.split()
905 if len(fields) < 3:
906 continue
907 mounts_dev = fields[0]
908 path = fields[1]
909 if os.path.isabs(mounts_dev) and os.path.exists(mounts_dev):
910 mounts_dev = os.path.realpath(mounts_dev)
911 if mounts_dev == dev:
912 return _bytes2str(path)
913 return None
914
915
916def is_held(dev):
917 """
918 Check if a device is held by another device (e.g., a dm-crypt mapping)
919 """
920 assert os.path.exists(dev)
921 if is_mpath(dev):
922 return []
923
924 dev = os.path.realpath(dev)
925 base = get_dev_name(dev)
926
927 # full disk?
928 directory = '/sys/block/{base}/holders'.format(base=base)
929 if os.path.exists(directory):
930 return os.listdir(directory)
931
932 # partition?
933 part = base
934 while len(base):
935 directory = '/sys/block/{base}/{part}/holders'.format(
936 part=part, base=base)
937 if os.path.exists(directory):
938 return os.listdir(directory)
939 base = base[:-1]
940 return []
941
942
943def verify_not_in_use(dev, check_partitions=False):
944 """
945 Verify if a given device (path) is in use (e.g. mounted or
946 in use by device-mapper).
947
948 :raises: Error if device is in use.
949 """
950 assert os.path.exists(dev)
951 if is_mounted(dev):
952 raise Error('Device is mounted', dev)
953 holders = is_held(dev)
954 if holders:
955 raise Error('Device %s is in use by a device-mapper '
956 'mapping (dm-crypt?)' % dev, ','.join(holders))
957
958 if check_partitions and not is_partition(dev):
959 for partname in list_partitions(dev):
960 partition = get_dev_path(partname)
961 if is_mounted(partition):
962 raise Error('Device is mounted', partition)
963 holders = is_held(partition)
964 if holders:
965 raise Error('Device %s is in use by a device-mapper '
966 'mapping (dm-crypt?)'
967 % partition, ','.join(holders))
968
969
970def must_be_one_line(line):
971 """
972 Checks if given line is really one single line.
973
974 :raises: TruncatedLineError or TooManyLinesError
975 :return: Content of the line, or None if line isn't valid.
976 """
977 line = _bytes2str(line)
978
979 if line[-1:] != '\n':
980 raise TruncatedLineError(line)
981 line = line[:-1]
982 if '\n' in line:
983 raise TooManyLinesError(line)
984 return line
985
986
987def read_one_line(parent, name):
988 """
989 Read a file whose sole contents are a single line.
990
991 Strips the newline.
992
993 :return: Contents of the line, or None if file did not exist.
994 """
995 path = os.path.join(parent, name)
996 try:
997 line = open(path, 'rb').read()
998 except IOError as e:
999 if e.errno == errno.ENOENT:
1000 return None
1001 else:
1002 raise
1003
1004 try:
1005 line = must_be_one_line(line)
1006 except (TruncatedLineError, TooManyLinesError) as e:
1007 raise Error(
1008 'File is corrupt: {path}: {msg}'.format(
1009 path=path,
1010 msg=e,
1011 )
1012 )
1013 return line
1014
1015
1016def write_one_line(parent, name, text):
1017 """
1018 Write a file whose sole contents are a single line.
1019
1020 Adds a newline.
1021 """
1022 path = os.path.join(parent, name)
1023 tmp = '{path}.{pid}.tmp'.format(path=path, pid=os.getpid())
1024 with open(tmp, 'wb') as tmp_file:
1025 tmp_file.write(text.encode('utf-8') + b'\n')
1026 os.fsync(tmp_file.fileno())
1027 path_set_context(tmp)
1028 os.rename(tmp, path)
1029
1030
1031def init_get():
1032 """
1033 Get a init system using 'ceph-detect-init'
1034 """
1035 init = _check_output(
1036 args=[
1037 'ceph-detect-init',
1038 '--default', 'sysvinit',
1039 ],
1040 )
1041 init = must_be_one_line(init)
1042 return init
1043
1044
1045def check_osd_magic(path):
1046 """
1047 Check that this path has the Ceph OSD magic.
1048
1049 :raises: BadMagicError if this does not look like a Ceph OSD data
1050 dir.
1051 """
1052 magic = read_one_line(path, 'magic')
1053 if magic is None:
1054 # probably not mkfs'ed yet
1055 raise BadMagicError(path)
1056 if magic != CEPH_OSD_ONDISK_MAGIC:
1057 raise BadMagicError(path)
1058
1059
1060def check_osd_id(osd_id):
1061 """
1062 Ensures osd id is numeric.
1063 """
1064 if not re.match(r'^[0-9]+$', osd_id):
1065 raise Error('osd id is not numeric', osd_id)
1066
1067
1068def allocate_osd_id(
1069 cluster,
1070 fsid,
1071 keyring,
c07f9fc5 1072 path,
7c673cae
FG
1073):
1074 """
c07f9fc5 1075 Allocates an OSD id on the given cluster.
7c673cae
FG
1076
1077 :raises: Error if the call to allocate the OSD id fails.
1078 :return: The allocated OSD id.
1079 """
c07f9fc5
FG
1080 lockbox_path = os.path.join(STATEDIR, 'osd-lockbox', fsid)
1081 lockbox_osd_id = read_one_line(lockbox_path, 'whoami')
1082 osd_keyring = os.path.join(path, 'keyring')
1083 if lockbox_osd_id:
1084 LOG.debug('Getting OSD id from Lockbox...')
1085 osd_id = lockbox_osd_id
1086 shutil.move(os.path.join(lockbox_path, 'osd_keyring'),
1087 osd_keyring)
1088 path_set_context(osd_keyring)
1089 os.unlink(os.path.join(lockbox_path, 'whoami'))
1090 return osd_id
7c673cae
FG
1091
1092 LOG.debug('Allocating OSD id...')
c07f9fc5 1093 secrets = Secrets()
7c673cae 1094 try:
c07f9fc5
FG
1095 wanttobe = read_one_line(path, 'wanttobe')
1096 if os.path.exists(os.path.join(path, 'wanttobe')):
1097 os.unlink(os.path.join(path, 'wanttobe'))
1098 id_arg = wanttobe and [wanttobe] or []
1099 osd_id = command_with_stdin(
1100 [
7c673cae
FG
1101 'ceph',
1102 '--cluster', cluster,
1103 '--name', 'client.bootstrap-osd',
1104 '--keyring', keyring,
c07f9fc5
FG
1105 '-i', '-',
1106 'osd', 'new',
7c673cae 1107 fsid,
c07f9fc5
FG
1108 ] + id_arg,
1109 secrets.get_json()
7c673cae
FG
1110 )
1111 except subprocess.CalledProcessError as e:
1112 raise Error('ceph osd create failed', e, e.output)
1113 osd_id = must_be_one_line(osd_id)
1114 check_osd_id(osd_id)
c07f9fc5 1115 secrets.write_osd_keyring(osd_keyring, osd_id)
7c673cae
FG
1116 return osd_id
1117
1118
1119def get_osd_id(path):
1120 """
1121 Gets the OSD id of the OSD at the given path.
1122 """
1123 osd_id = read_one_line(path, 'whoami')
1124 if osd_id is not None:
1125 check_osd_id(osd_id)
1126 return osd_id
1127
1128
1129def get_ceph_user():
1130 global CEPH_PREF_USER
1131
1132 if CEPH_PREF_USER is not None:
1133 try:
1134 pwd.getpwnam(CEPH_PREF_USER)
1135 return CEPH_PREF_USER
1136 except KeyError:
1137 print("No such user:", CEPH_PREF_USER)
1138 sys.exit(2)
1139 else:
1140 try:
1141 pwd.getpwnam('ceph')
1142 return 'ceph'
1143 except KeyError:
1144 return 'root'
1145
1146
1147def get_ceph_group():
1148 global CEPH_PREF_GROUP
1149
1150 if CEPH_PREF_GROUP is not None:
1151 try:
1152 grp.getgrnam(CEPH_PREF_GROUP)
1153 return CEPH_PREF_GROUP
1154 except KeyError:
1155 print("No such group:", CEPH_PREF_GROUP)
1156 sys.exit(2)
1157 else:
1158 try:
1159 grp.getgrnam('ceph')
1160 return 'ceph'
1161 except KeyError:
1162 return 'root'
1163
1164
1165def path_set_context(path):
1166 # restore selinux context to default policy values
1167 if which('restorecon'):
1168 command(['restorecon', '-R', path])
1169
1170 # if ceph user exists, set owner to ceph
1171 if get_ceph_user() == 'ceph':
1172 command(['chown', '-R', 'ceph:ceph', path])
1173
1174
1175def _check_output(args=None, **kwargs):
1176 out, err, ret = command(args, **kwargs)
1177 if ret:
1178 cmd = args[0]
1179 error = subprocess.CalledProcessError(ret, cmd)
1180 error.output = out + err
1181 raise error
1182 return _bytes2str(out)
1183
1184
1185def get_conf(cluster, variable):
1186 """
1187 Get the value of the given configuration variable from the
1188 cluster.
1189
1190 :raises: Error if call to ceph-conf fails.
1191 :return: The variable value or None.
1192 """
1193 try:
1194 out, err, ret = command(
1195 [
1196 'ceph-conf',
1197 '--cluster={cluster}'.format(
1198 cluster=cluster,
1199 ),
1200 '--name=osd.',
1201 '--lookup',
1202 variable,
1203 ],
1204 close_fds=True,
1205 )
1206 except OSError as e:
1207 raise Error('error executing ceph-conf', e, err)
1208 if ret == 1:
1209 # config entry not found
1210 return None
1211 elif ret != 0:
1212 raise Error('getting variable from configuration failed')
1213 value = out.split('\n', 1)[0]
1214 # don't differentiate between "var=" and no var set
1215 if not value:
1216 return None
1217 return value
1218
1219
1220def get_conf_with_default(cluster, variable):
1221 """
1222 Get a config value that is known to the C++ code.
1223
1224 This will fail if called on variables that are not defined in
1225 common config options.
1226 """
1227 try:
1228 out = _check_output(
1229 args=[
1230 'ceph-osd',
1231 '--cluster={cluster}'.format(
1232 cluster=cluster,
1233 ),
1234 '--show-config-value={variable}'.format(
1235 variable=variable,
1236 ),
1237 ],
1238 close_fds=True,
1239 )
1240 except subprocess.CalledProcessError as e:
1241 raise Error(
1242 'getting variable from configuration failed',
1243 e,
1244 )
1245
1246 value = str(out).split('\n', 1)[0]
1247 return value
1248
1249
1250def get_fsid(cluster):
1251 """
1252 Get the fsid of the cluster.
1253
1254 :return: The fsid or raises Error.
1255 """
1256 fsid = get_conf_with_default(cluster=cluster, variable='fsid')
b32b8144
FG
1257 # uuids from boost always default to 'the empty uuid'
1258 if fsid == '00000000-0000-0000-0000-000000000000':
7c673cae
FG
1259 raise Error('getting cluster uuid from configuration failed')
1260 return fsid.lower()
1261
1262
1263def get_dmcrypt_key_path(
1264 _uuid,
1265 key_dir,
1266 luks
1267):
1268 """
1269 Get path to dmcrypt key file.
1270
1271 :return: Path to the dmcrypt key file, callers should check for existence.
1272 """
1273 if luks:
1274 path = os.path.join(key_dir, _uuid + ".luks.key")
1275 else:
1276 path = os.path.join(key_dir, _uuid)
1277
1278 return path
1279
1280
1281def get_dmcrypt_key(
1282 _uuid,
1283 key_dir,
1284 luks
1285):
1286 legacy_path = get_dmcrypt_key_path(_uuid, key_dir, luks)
1287 if os.path.exists(legacy_path):
1288 return (legacy_path,)
1289 path = os.path.join(STATEDIR, 'osd-lockbox', _uuid)
1290 if os.path.exists(path):
1291 mode = get_oneliner(path, 'key-management-mode')
1292 osd_uuid = get_oneliner(path, 'osd-uuid')
1293 ceph_fsid = read_one_line(path, 'ceph_fsid')
1294 if ceph_fsid is None:
d2e6a577
FG
1295 LOG.warning("no `ceph_fsid` found falling back to 'ceph' "
1296 "for cluster name")
1297 cluster = 'ceph'
1298 else:
1299 cluster = find_cluster_by_uuid(ceph_fsid)
1300 if cluster is None:
1301 raise Error('No cluster conf found in ' + SYSCONFDIR +
1302 ' with fsid %s' % ceph_fsid)
7c673cae
FG
1303
1304 if mode == KEY_MANAGEMENT_MODE_V1:
1305 key, stderr, ret = command(
1306 [
1307 'ceph',
1308 '--cluster', cluster,
1309 '--name',
1310 'client.osd-lockbox.' + osd_uuid,
1311 '--keyring',
1312 os.path.join(path, 'keyring'),
1313 'config-key',
1314 'get',
1315 'dm-crypt/osd/' + osd_uuid + '/luks',
1316 ],
1317 )
1318 LOG.debug("stderr " + stderr)
1319 assert ret == 0
1320 return base64.b64decode(key)
1321 else:
1322 raise Error('unknown key-management-mode ' + str(mode))
1323 raise Error('unable to read dm-crypt key', path, legacy_path)
1324
1325
1326def _dmcrypt_map(
1327 rawdev,
1328 key,
1329 _uuid,
1330 cryptsetup_parameters,
1331 luks,
1332 format_dev=False,
1333):
1334 dev = dmcrypt_is_mapped(_uuid)
1335 if dev:
1336 return dev
1337
1338 if isinstance(key, tuple):
1339 # legacy, before lockbox
1340 assert os.path.exists(key[0])
1341 keypath = key[0]
1342 key = None
1343 else:
1344 keypath = '-'
1345 dev = '/dev/mapper/' + _uuid
1346 luksFormat_args = [
1347 'cryptsetup',
1348 '--batch-mode',
1349 '--key-file',
1350 keypath,
1351 'luksFormat',
1352 rawdev,
1353 ] + cryptsetup_parameters
1354
1355 luksOpen_args = [
1356 'cryptsetup',
1357 '--key-file',
1358 keypath,
1359 'luksOpen',
1360 rawdev,
1361 _uuid,
1362 ]
1363
1364 create_args = [
1365 'cryptsetup',
1366 '--key-file',
1367 keypath,
1368 'create',
1369 _uuid,
1370 rawdev,
1371 ] + cryptsetup_parameters
1372
7c673cae
FG
1373 try:
1374 if luks:
1375 if format_dev:
c07f9fc5
FG
1376 command_with_stdin(luksFormat_args, key)
1377 command_with_stdin(luksOpen_args, key)
7c673cae
FG
1378 else:
1379 # Plain mode has no format function, nor any validation
1380 # that the key is correct.
c07f9fc5 1381 command_with_stdin(create_args, key)
7c673cae
FG
1382 # set proper ownership of mapped device
1383 command_check_call(['chown', 'ceph:ceph', dev])
1384 return dev
1385
1386 except subprocess.CalledProcessError as e:
1387 raise Error('unable to map device', rawdev, e)
1388
1389
3efd9988
FG
1390@retry(Error, max_tries=10, wait=0.5, backoff=1.0)
1391def dmcrypt_unmap(_uuid):
7c673cae
FG
1392 if not os.path.exists('/dev/mapper/' + _uuid):
1393 return
3efd9988
FG
1394 try:
1395 command_check_call(['cryptsetup', 'remove', _uuid])
1396 except subprocess.CalledProcessError as e:
1397 raise Error('unable to unmap device', _uuid, e)
7c673cae
FG
1398
1399
1400def mount(
1401 dev,
1402 fstype,
1403 options,
1404):
1405 """
1406 Mounts a device with given filessystem type and
1407 mount options to a tempfile path under /var/lib/ceph/tmp.
1408 """
1409 # sanity check: none of the arguments are None
1410 if dev is None:
1411 raise ValueError('dev may not be None')
1412 if fstype is None:
1413 raise ValueError('fstype may not be None')
1414
1415 # pick best-of-breed mount options based on fs type
1416 if options is None:
1417 options = MOUNT_OPTIONS.get(fstype, '')
1418
1419 myTemp = STATEDIR + '/tmp'
1420 # mkdtemp expect 'dir' to be existing on the system
1421 # Let's be sure it's always the case
1422 if not os.path.exists(myTemp):
1423 os.makedirs(myTemp)
1424
1425 # mount
1426 path = tempfile.mkdtemp(
1427 prefix='mnt.',
1428 dir=myTemp,
1429 )
1430 try:
1431 LOG.debug('Mounting %s on %s with options %s', dev, path, options)
1432 command_check_call(
1433 [
1434 'mount',
1435 '-t', fstype,
1436 '-o', options,
1437 '--',
1438 dev,
1439 path,
1440 ],
1441 )
1442 if which('restorecon'):
1443 command(
1444 [
1445 'restorecon',
1446 path,
1447 ],
1448 )
1449 except subprocess.CalledProcessError as e:
1450 try:
1451 os.rmdir(path)
1452 except (OSError, IOError):
1453 pass
1454 raise MountError(e)
1455
1456 return path
1457
1458
3efd9988 1459@retry(UnmountError, max_tries=3, wait=0.5, backoff=1.0)
7c673cae
FG
1460def unmount(
1461 path,
d2e6a577 1462 do_rm=True,
7c673cae
FG
1463):
1464 """
1465 Unmount and removes the given mount point.
1466 """
3efd9988
FG
1467 try:
1468 LOG.debug('Unmounting %s', path)
1469 command_check_call(
1470 [
1471 '/bin/umount',
1472 '--',
1473 path,
1474 ],
1475 )
1476 except subprocess.CalledProcessError as e:
1477 raise UnmountError(e)
d2e6a577
FG
1478 if not do_rm:
1479 return
7c673cae
FG
1480 os.rmdir(path)
1481
1482
1483###########################################
1484
1485def extract_parted_partition_numbers(partitions):
1486 numbers_as_strings = re.findall('^\d+', partitions, re.MULTILINE)
1487 return map(int, numbers_as_strings)
1488
1489
1490def get_free_partition_index(dev):
1491 """
1492 Get the next free partition index on a given device.
1493
1494 :return: Index number (> 1 if there is already a partition on the device)
1495 or 1 if there is no partition table.
1496 """
1497 try:
1498 lines = _check_output(
1499 args=[
1500 'parted',
1501 '--machine',
1502 '--',
1503 dev,
1504 'print',
1505 ],
1506 )
1507 except subprocess.CalledProcessError as e:
1508 LOG.info('cannot read partition index; assume it '
1509 'isn\'t present\n (Error: %s)' % e)
1510 return 1
1511
1512 if not lines:
1513 raise Error('parted failed to output anything')
1514 LOG.debug('get_free_partition_index: analyzing ' + lines)
1515 if ('CHS;' not in lines and
1516 'CYL;' not in lines and
1517 'BYT;' not in lines):
1518 raise Error('parted output expected to contain one of ' +
1519 'CHH; CYL; or BYT; : ' + lines)
1520 if os.path.realpath(dev) not in lines:
1521 raise Error('parted output expected to contain ' + dev + ': ' + lines)
1522 _, partitions = lines.split(os.path.realpath(dev))
1523 partition_numbers = extract_parted_partition_numbers(partitions)
1524 if partition_numbers:
1525 return max(partition_numbers) + 1
1526 else:
1527 return 1
1528
1529
1530def check_journal_reqs(args):
1531 _, _, allows_journal = command([
1532 'ceph-osd', '--check-allows-journal',
1533 '-i', '0',
1534 '--log-file', '$run_dir/$cluster-osd-check.log',
1535 '--cluster', args.cluster,
1536 '--setuser', get_ceph_user(),
1537 '--setgroup', get_ceph_group(),
1538 ])
1539 _, _, wants_journal = command([
1540 'ceph-osd', '--check-wants-journal',
1541 '-i', '0',
1542 '--log-file', '$run_dir/$cluster-osd-check.log',
1543 '--cluster', args.cluster,
1544 '--setuser', get_ceph_user(),
1545 '--setgroup', get_ceph_group(),
1546 ])
1547 _, _, needs_journal = command([
1548 'ceph-osd', '--check-needs-journal',
1549 '-i', '0',
1550 '--log-file', '$run_dir/$cluster-osd-check.log',
1551 '--cluster', args.cluster,
1552 '--setuser', get_ceph_user(),
1553 '--setgroup', get_ceph_group(),
1554 ])
1555 return (not allows_journal, not wants_journal, not needs_journal)
1556
1557
1558def update_partition(dev, description):
1559 """
1560 Must be called after modifying a partition table so the kernel
1561 know about the change and fire udev events accordingly. A side
1562 effect of partprobe is to remove partitions and add them again.
1563 The first udevadm settle waits for ongoing udev events to
1564 complete, just in case one of them rely on an existing partition
1565 on dev. The second udevadm settle guarantees to the caller that
1566 all udev events related to the partition table change have been
1567 processed, i.e. the 95-ceph-osd.rules actions and mode changes,
1568 group changes etc. are complete.
1569 """
1570 LOG.debug('Calling partprobe on %s device %s', description, dev)
1571 partprobe_ok = False
1572 error = 'unknown error'
1573 partprobe = _get_command_executable(['partprobe'])[0]
1574 for i in range(5):
1575 command_check_call(['udevadm', 'settle', '--timeout=600'])
1576 try:
1577 _check_output(['flock', '-s', dev, partprobe, dev])
1578 partprobe_ok = True
1579 break
1580 except subprocess.CalledProcessError as e:
1581 error = e.output
1582 if ('unable to inform the kernel' not in error and
1583 'Device or resource busy' not in error):
1584 raise
1585 LOG.debug('partprobe %s failed : %s (ignored, waiting 60s)'
1586 % (dev, error))
1587 time.sleep(60)
1588 if not partprobe_ok:
1589 raise Error('partprobe %s failed : %s' % (dev, error))
1590 command_check_call(['udevadm', 'settle', '--timeout=600'])
1591
1592
224ce89b 1593def zap_linux(dev):
7c673cae
FG
1594 try:
1595 # Thoroughly wipe all partitions of any traces of
1596 # Filesystems or OSD Journals
1597 #
3a9019d9
FG
1598 # In addition we need to write 110M (read following comment for more
1599 # details on the context of this magic number) of data to each
1600 # partition to make sure that after re-creating the same partition
7c673cae
FG
1601 # there is no trace left of any previous Filesystem or OSD
1602 # Journal
1603
1604 LOG.debug('Writing zeros to existing partitions on %s', dev)
1605
1606 for partname in list_partitions(dev):
1607 partition = get_dev_path(partname)
1608 command_check_call(
1609 [
1610 'wipefs',
1611 '--all',
1612 partition,
1613 ],
1614 )
1615
3a9019d9
FG
1616 # for an typical bluestore device, it has
1617 # 1. a 100M xfs data partition
1618 # 2. a bluestore_block_size block partition
1619 # 3. a bluestore_block_db_size block.db partition
1620 # 4. a bluestore_block_wal_size block.wal partition
1621 # so we need to wipe out the bits storing the bits storing
1622 # bluestore's collections' meta information in that case to
1623 # prevent OSD from comparing the meta data, like OSD id and fsid,
1624 # stored on the device to be zapped with the oness passed in. here,
1625 # we assume that the allocator of bluestore puts these meta data
1626 # at the beginning of the block partition. without knowning the
1627 # actual layout of the bluefs, we add extra 10M to be on the safe
1628 # side. if this partition was formatted for a filesystem, 10MB
1629 # would be more than enough to nuke its superblock.
1630 count = min(PrepareBluestoreData.SPACE_SIZE + 10,
1631 get_dev_size(partition))
7c673cae
FG
1632 command_check_call(
1633 [
1634 'dd',
1635 'if=/dev/zero',
1636 'of={path}'.format(path=partition),
1637 'bs=1M',
3a9019d9 1638 'count={count}'.format(count=count),
7c673cae
FG
1639 ],
1640 )
1641
1642 LOG.debug('Zapping partition table on %s', dev)
1643
1644 # try to wipe out any GPT partition table backups. sgdisk
1645 # isn't too thorough.
1646 lba_size = 4096
1647 size = 33 * lba_size
1648 with open(dev, 'wb') as dev_file:
1649 dev_file.seek(-size, os.SEEK_END)
1650 dev_file.write(size * b'\0')
1651
1652 command_check_call(
1653 [
1654 'sgdisk',
1655 '--zap-all',
1656 '--',
1657 dev,
1658 ],
1659 )
1660 command_check_call(
1661 [
1662 'sgdisk',
1663 '--clear',
1664 '--mbrtogpt',
1665 '--',
1666 dev,
1667 ],
1668 )
7c673cae
FG
1669 update_partition(dev, 'zapped')
1670
1671 except subprocess.CalledProcessError as e:
1672 raise Error(e)
1673
1674
224ce89b
WB
1675def zap_freebsd(dev):
1676 try:
1677 # For FreeBSD we just need to zap the partition.
1678 command_check_call(
1679 [
1680 'gpart',
1681 'destroy',
1682 '-F',
1683 dev,
1684 ],
1685 )
1686
1687 except subprocess.CalledProcessError as e:
1688 raise Error(e)
1689
1690
1691def zap(dev):
1692 """
1693 Destroy the partition table and content of a given disk.
1694 """
1695 dev = os.path.realpath(dev)
1696 dmode = os.stat(dev).st_mode
1697 if not stat.S_ISBLK(dmode) or is_partition(dev):
1698 raise Error('not full block device; cannot zap', dev)
1699 if FREEBSD:
1700 zap_freebsd(dev)
1701 else:
1702 zap_linux(dev)
1703
1704
7c673cae
FG
1705def adjust_symlink(target, path):
1706 create = True
1707 if os.path.lexists(path):
1708 try:
1709 mode = os.lstat(path).st_mode
1710 if stat.S_ISREG(mode):
1711 LOG.debug('Removing old file %s', path)
1712 os.unlink(path)
1713 elif stat.S_ISLNK(mode):
1714 old = os.readlink(path)
1715 if old != target:
1716 LOG.debug('Removing old symlink %s -> %s', path, old)
1717 os.unlink(path)
1718 else:
1719 create = False
1720 except:
1721 raise Error('unable to remove (or adjust) old file (symlink)',
1722 path)
1723 if create:
1724 LOG.debug('Creating symlink %s -> %s', path, target)
1725 try:
1726 os.symlink(target, path)
1727 except:
1728 raise Error('unable to create symlink %s -> %s' % (path, target))
1729
1730
1731def get_mount_options(cluster, fs_type):
1732 mount_options = get_conf(
1733 cluster,
1734 variable='osd_mount_options_{fstype}'.format(
1735 fstype=fs_type,
1736 ),
1737 )
1738 if mount_options is None:
1739 mount_options = get_conf(
1740 cluster,
1741 variable='osd_fs_mount_options_{fstype}'.format(
1742 fstype=fs_type,
1743 ),
1744 )
1745 else:
1746 # remove whitespaces
1747 mount_options = "".join(mount_options.split())
1748 return mount_options
1749
1750
1751class Device(object):
1752
1753 def __init__(self, path, args):
1754 self.args = args
1755 self.path = path
1756 self.dev_size = None
1757 self.partitions = {}
1758 self.ptype_map = None
1759 assert not is_partition(self.path)
1760
1761 def create_partition(self, uuid, name, size=0, num=0):
1762 ptype = self.ptype_tobe_for_name(name)
1763 if num == 0:
1764 num = get_free_partition_index(dev=self.path)
1765 if size > 0:
1766 new = '--new={num}:0:+{size}M'.format(num=num, size=size)
1767 if size > self.get_dev_size():
1768 LOG.error('refusing to create %s on %s' % (name, self.path))
1769 LOG.error('%s size (%sM) is bigger than device (%sM)'
1770 % (name, size, self.get_dev_size()))
1771 raise Error('%s device size (%sM) is not big enough for %s'
1772 % (self.path, self.get_dev_size(), name))
1773 else:
1774 new = '--largest-new={num}'.format(num=num)
1775
1776 LOG.debug('Creating %s partition num %d size %d on %s',
1777 name, num, size, self.path)
1778 command_check_call(
1779 [
1780 'sgdisk',
1781 new,
1782 '--change-name={num}:ceph {name}'.format(num=num, name=name),
1783 '--partition-guid={num}:{uuid}'.format(num=num, uuid=uuid),
1784 '--typecode={num}:{uuid}'.format(num=num, uuid=ptype),
1785 '--mbrtogpt',
1786 '--',
1787 self.path,
1788 ],
1789 exit=True
1790 )
1791 update_partition(self.path, 'created')
1792 return num
1793
1794 def ptype_tobe_for_name(self, name):
1795 LOG.debug("name = " + name)
1796 if name == 'data':
1797 name = 'osd'
1798 if name == 'lockbox':
1799 if is_mpath(self.path):
1800 return PTYPE['mpath']['lockbox']['tobe']
1801 else:
1802 return PTYPE['regular']['lockbox']['tobe']
1803 if self.ptype_map is None:
1804 partition = DevicePartition.factory(
1805 path=self.path, dev=None, args=self.args)
1806 self.ptype_map = partition.ptype_map
1807 return self.ptype_map[name]['tobe']
1808
1809 def get_partition(self, num):
1810 if num not in self.partitions:
1811 dev = get_partition_dev(self.path, num)
1812 partition = DevicePartition.factory(
1813 path=self.path, dev=dev, args=self.args)
1814 partition.set_partition_number(num)
1815 self.partitions[num] = partition
1816 return self.partitions[num]
1817
1818 def get_dev_size(self):
1819 if self.dev_size is None:
1820 self.dev_size = get_dev_size(self.path)
1821 return self.dev_size
1822
1823 @staticmethod
1824 def factory(path, args):
1825 return Device(path, args)
1826
1827
1828class DevicePartition(object):
1829
1830 def __init__(self, args):
1831 self.args = args
1832 self.num = None
1833 self.rawdev = None
1834 self.dev = None
1835 self.uuid = None
1836 self.ptype_map = None
1837 self.ptype = None
1838 self.set_variables_ptype()
1839
1840 def get_uuid(self):
1841 if self.uuid is None:
1842 self.uuid = get_partition_uuid(self.rawdev)
1843 return self.uuid
1844
1845 def get_ptype(self):
1846 if self.ptype is None:
1847 self.ptype = get_partition_type(self.rawdev)
1848 return self.ptype
1849
1850 def set_partition_number(self, num):
1851 self.num = num
1852
1853 def get_partition_number(self):
1854 return self.num
1855
1856 def set_dev(self, dev):
1857 self.dev = dev
1858 self.rawdev = dev
1859
1860 def get_dev(self):
1861 return self.dev
1862
1863 def get_rawdev(self):
1864 return self.rawdev
1865
1866 def set_variables_ptype(self):
1867 self.ptype_map = PTYPE['regular']
1868
1869 def ptype_for_name(self, name):
1870 return self.ptype_map[name]['ready']
1871
1872 @staticmethod
3efd9988 1873 @retry(OSError)
7c673cae
FG
1874 def factory(path, dev, args):
1875 dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
1876 if ((path is not None and is_mpath(path)) or
1877 (dev is not None and is_mpath(dev))):
1878 partition = DevicePartitionMultipath(args)
1879 elif dmcrypt_type == 'luks':
1880 partition = DevicePartitionCryptLuks(args)
1881 elif dmcrypt_type == 'plain':
1882 partition = DevicePartitionCryptPlain(args)
1883 else:
1884 partition = DevicePartition(args)
1885 partition.set_dev(dev)
1886 return partition
1887
1888
1889class DevicePartitionMultipath(DevicePartition):
1890
1891 def set_variables_ptype(self):
1892 self.ptype_map = PTYPE['mpath']
1893
1894
1895class DevicePartitionCrypt(DevicePartition):
1896
1897 def __init__(self, args):
1898 super(DevicePartitionCrypt, self).__init__(args)
1899 self.osd_dm_key = None
1900 self.cryptsetup_parameters = CryptHelpers.get_cryptsetup_parameters(
1901 self.args)
1902 self.dmcrypt_type = CryptHelpers.get_dmcrypt_type(self.args)
1903 self.dmcrypt_keysize = CryptHelpers.get_dmcrypt_keysize(self.args)
1904
1905 def setup_crypt(self):
1906 pass
1907
1908 def map(self):
1909 self.setup_crypt()
1910 self.dev = _dmcrypt_map(
1911 rawdev=self.rawdev,
1912 key=self.osd_dm_key,
1913 _uuid=self.get_uuid(),
1914 cryptsetup_parameters=self.cryptsetup_parameters,
1915 luks=self.luks(),
1916 format_dev=True,
1917 )
1918
1919 def unmap(self):
1920 self.setup_crypt()
1921 dmcrypt_unmap(self.get_uuid())
1922 self.dev = self.rawdev
1923
1924 def format(self):
1925 self.setup_crypt()
1926 self.map()
1927
1928
1929class DevicePartitionCryptPlain(DevicePartitionCrypt):
1930
1931 def luks(self):
1932 return False
1933
1934 def setup_crypt(self):
1935 if self.osd_dm_key is not None:
1936 return
1937
1938 self.cryptsetup_parameters += ['--key-size', str(self.dmcrypt_keysize)]
1939
1940 self.osd_dm_key = get_dmcrypt_key(
1941 self.get_uuid(), self.args.dmcrypt_key_dir,
1942 False)
1943
1944 def set_variables_ptype(self):
1945 self.ptype_map = PTYPE['plain']
1946
1947
1948class DevicePartitionCryptLuks(DevicePartitionCrypt):
1949
1950 def luks(self):
1951 return True
1952
1953 def setup_crypt(self):
1954 if self.osd_dm_key is not None:
1955 return
1956
1957 if self.dmcrypt_keysize == 1024:
1958 # We don't force this into the cryptsetup_parameters,
1959 # as we want the cryptsetup defaults
1960 # to prevail for the actual LUKS key lengths.
1961 pass
1962 else:
1963 self.cryptsetup_parameters += ['--key-size',
1964 str(self.dmcrypt_keysize)]
1965
1966 self.osd_dm_key = get_dmcrypt_key(
1967 self.get_uuid(), self.args.dmcrypt_key_dir,
1968 True)
1969
1970 def set_variables_ptype(self):
1971 self.ptype_map = PTYPE['luks']
1972
1973
1974class Prepare(object):
1975
1976 def __init__(self, args):
1977 self.args = args
1978
1979 @staticmethod
1980 def parser():
1981 parser = argparse.ArgumentParser(add_help=False)
1982 parser.add_argument(
1983 '--cluster',
1984 metavar='NAME',
1985 default='ceph',
1986 help='cluster name to assign this disk to',
1987 )
1988 parser.add_argument(
1989 '--cluster-uuid',
1990 metavar='UUID',
1991 help='cluster uuid to assign this disk to',
1992 )
1993 parser.add_argument(
1994 '--osd-uuid',
1995 metavar='UUID',
1996 help='unique OSD uuid to assign this disk to',
1997 )
c07f9fc5
FG
1998 parser.add_argument(
1999 '--osd-id',
2000 metavar='ID',
2001 help='unique OSD id to assign this disk to',
2002 )
7c673cae
FG
2003 parser.add_argument(
2004 '--crush-device-class',
2005 help='crush device class to assign this disk to',
2006 )
2007 parser.add_argument(
2008 '--dmcrypt',
2009 action='store_true', default=None,
2010 help='encrypt DATA and/or JOURNAL devices with dm-crypt',
2011 )
2012 parser.add_argument(
2013 '--dmcrypt-key-dir',
2014 metavar='KEYDIR',
2015 default='/etc/ceph/dmcrypt-keys',
2016 help='directory where dm-crypt keys are stored',
2017 )
2018 parser.add_argument(
2019 '--prepare-key',
2020 metavar='PATH',
2021 help='bootstrap-osd keyring path template (%(default)s)',
2022 default='{statedir}/bootstrap-osd/{cluster}.keyring',
2023 dest='prepare_key_template',
2024 )
2025 parser.add_argument(
2026 '--no-locking',
2027 action='store_true', default=None,
2028 help='let many prepare\'s run in parallel',
2029 )
2030 return parser
2031
2032 @staticmethod
2033 def set_subparser(subparsers):
2034 parents = [
2035 Prepare.parser(),
2036 PrepareData.parser(),
2037 Lockbox.parser(),
2038 ]
2039 parents.extend(PrepareFilestore.parent_parsers())
2040 parents.extend(PrepareBluestore.parent_parsers())
2041 parser = subparsers.add_parser(
2042 'prepare',
2043 parents=parents,
2044 formatter_class=argparse.RawDescriptionHelpFormatter,
2045 description=textwrap.fill(textwrap.dedent("""\
2046 If the --bluestore argument is given, a bluestore objectstore
31f18b77
FG
2047 will be created. If --filestore is provided, a legacy FileStore
2048 objectstore will be created. If neither is specified, we default
2049 to BlueStore.
7c673cae
FG
2050
2051 When an entire device is prepared for bluestore, two
2052 partitions are created. The first partition is for metadata,
2053 the second partition is for blocks that contain data.
2054
2055 Unless explicitly specified with --block.db or
2056 --block.wal, the bluestore DB and WAL data is stored on
2057 the main block device. For instance:
2058
2059 ceph-disk prepare --bluestore /dev/sdc
2060
2061 Will create
2062
2063 /dev/sdc1 for osd metadata
2064 /dev/sdc2 for block, db, and wal data (the rest of the disk)
2065
2066
2067 If either --block.db or --block.wal are specified to be
2068 the same whole device, they will be created as partition
2069 three and four respectively. For instance:
2070
2071 ceph-disk prepare --bluestore \\
2072 --block.db /dev/sdc \\
2073 --block.wal /dev/sdc \\
2074 /dev/sdc
2075
2076 Will create
2077
2078 /dev/sdc1 for osd metadata
2079 /dev/sdc2 for block (the rest of the disk)
2080 /dev/sdc3 for db
2081 /dev/sdc4 for wal
2082
2083 """)),
2084 help='Prepare a directory or disk for a Ceph OSD',
2085 )
2086 parser.set_defaults(
2087 func=Prepare.main,
2088 )
2089 return parser
2090
2091 def prepare(self):
2092 if self.args.no_locking:
2093 self._prepare()
2094 else:
2095 with prepare_lock:
2096 self._prepare()
2097
2098 @staticmethod
2099 def factory(args):
2100 if args.bluestore:
2101 return PrepareBluestore(args)
2102 else:
2103 return PrepareFilestore(args)
2104
2105 @staticmethod
2106 def main(args):
2107 Prepare.factory(args).prepare()
2108
2109
2110class PrepareFilestore(Prepare):
2111
2112 def __init__(self, args):
2113 super(PrepareFilestore, self).__init__(args)
2114 if args.dmcrypt:
2115 self.lockbox = Lockbox(args)
2116 self.data = PrepareFilestoreData(args)
2117 self.journal = PrepareJournal(args)
2118
2119 @staticmethod
2120 def parent_parsers():
2121 return [
2122 PrepareJournal.parser(),
2123 ]
2124
2125 def _prepare(self):
2126 if self.data.args.dmcrypt:
2127 self.lockbox.prepare()
2128 self.data.prepare(self.journal)
2129
2130
2131class PrepareBluestore(Prepare):
2132
2133 def __init__(self, args):
2134 super(PrepareBluestore, self).__init__(args)
2135 if args.dmcrypt:
2136 self.lockbox = Lockbox(args)
2137 self.data = PrepareBluestoreData(args)
2138 self.block = PrepareBluestoreBlock(args)
2139 self.blockdb = PrepareBluestoreBlockDB(args)
2140 self.blockwal = PrepareBluestoreBlockWAL(args)
2141
2142 @staticmethod
2143 def parser():
2144 parser = argparse.ArgumentParser(add_help=False)
2145 parser.add_argument(
2146 '--bluestore',
31f18b77
FG
2147 dest='bluestore',
2148 action='store_true', default=True,
7c673cae
FG
2149 help='bluestore objectstore',
2150 )
31f18b77
FG
2151 parser.add_argument(
2152 '--filestore',
2153 dest='bluestore',
2154 action='store_false',
2155 help='filestore objectstore',
2156 )
7c673cae
FG
2157 return parser
2158
2159 @staticmethod
2160 def parent_parsers():
2161 return [
2162 PrepareBluestore.parser(),
2163 PrepareBluestoreBlock.parser(),
2164 PrepareBluestoreBlockDB.parser(),
2165 PrepareBluestoreBlockWAL.parser(),
2166 ]
2167
2168 def _prepare(self):
2169 if self.data.args.dmcrypt:
2170 self.lockbox.prepare()
2171 to_prepare_list = []
2172 if getattr(self.data.args, 'block.db'):
2173 to_prepare_list.append(self.blockdb)
2174 if getattr(self.data.args, 'block.wal'):
2175 to_prepare_list.append(self.blockwal)
2176 to_prepare_list.append(self.block)
2177 self.data.prepare(*to_prepare_list)
2178
2179
2180class Space(object):
2181
2182 NAMES = ('block', 'journal', 'block.db', 'block.wal')
2183
2184
2185class PrepareSpace(object):
2186
2187 NONE = 0
2188 FILE = 1
2189 DEVICE = 2
2190
2191 def __init__(self, args):
2192 self.args = args
2193 self.set_type()
2194 self.space_size = self.get_space_size()
2195 if getattr(self.args, self.name + '_uuid') is None:
2196 setattr(self.args, self.name + '_uuid', str(uuid.uuid4()))
2197 self.space_symlink = None
2198 self.space_dmcrypt = None
2199
2200 def set_type(self):
2201 name = self.name
2202 args = self.args
7c673cae 2203 if (self.wants_space() and
224ce89b 2204 dev_is_diskdevice(args.data) and
7c673cae
FG
2205 not is_partition(args.data) and
2206 getattr(args, name) is None and
2207 getattr(args, name + '_file') is None):
2208 LOG.info('Will colocate %s with data on %s',
2209 name, args.data)
2210 setattr(args, name, args.data)
2211
2212 if getattr(args, name) is None:
2213 if getattr(args, name + '_dev'):
2214 raise Error('%s is unspecified; not a block device' %
2215 name.capitalize(), getattr(args, name))
2216 self.type = self.NONE
2217 return
2218
2219 if not os.path.exists(getattr(args, name)):
2220 if getattr(args, name + '_dev'):
2221 raise Error('%s does not exist; not a block device' %
2222 name.capitalize(), getattr(args, name))
2223 self.type = self.FILE
2224 return
2225
2226 mode = os.stat(getattr(args, name)).st_mode
224ce89b 2227 if stmode_is_diskdevice(mode):
7c673cae
FG
2228 if getattr(args, name + '_file'):
2229 raise Error('%s is not a regular file' % name.capitalize,
2230 getattr(args, name))
2231 self.type = self.DEVICE
2232 return
2233
2234 if stat.S_ISREG(mode):
2235 if getattr(args, name + '_dev'):
2236 raise Error('%s is not a block device' % name.capitalize,
2237 getattr(args, name))
2238 self.type = self.FILE
2239 return
2240
2241 raise Error('%s %s is neither a block device nor regular file' %
2242 (name.capitalize, getattr(args, name)))
2243
2244 def is_none(self):
2245 return self.type == self.NONE
2246
2247 def is_file(self):
2248 return self.type == self.FILE
2249
2250 def is_device(self):
2251 return self.type == self.DEVICE
2252
2253 @staticmethod
2254 def parser(name, positional=True):
2255 parser = argparse.ArgumentParser(add_help=False)
2256 parser.add_argument(
2257 '--%s-uuid' % name,
2258 metavar='UUID',
2259 help='unique uuid to assign to the %s' % name,
2260 )
2261 parser.add_argument(
2262 '--%s-file' % name,
2263 action='store_true', default=None,
2264 help='verify that %s is a file' % name.upper(),
2265 )
2266 parser.add_argument(
2267 '--%s-dev' % name,
2268 action='store_true', default=None,
2269 help='verify that %s is a block device' % name.upper(),
2270 )
2271
2272 if positional:
2273 parser.add_argument(
2274 name,
2275 metavar=name.upper(),
2276 nargs='?',
2277 help=('path to OSD %s disk block device;' % name +
2278 ' leave out to store %s in file' % name),
2279 )
2280 return parser
2281
2282 def wants_space(self):
2283 return True
2284
2285 def populate_data_path(self, path):
2286 if self.type == self.DEVICE:
2287 self.populate_data_path_device(path)
2288 elif self.type == self.FILE:
2289 self.populate_data_path_file(path)
2290 elif self.type == self.NONE:
2291 pass
2292 else:
2293 raise Error('unexpected type ', self.type)
2294
2295 def populate_data_path_file(self, path):
2296 space_uuid = self.name + '_uuid'
2297 if getattr(self.args, space_uuid) is not None:
2298 write_one_line(path, space_uuid,
2299 getattr(self.args, space_uuid))
2300 if self.space_symlink is not None:
2301 adjust_symlink(self.space_symlink,
2302 os.path.join(path, self.name))
2303
2304 def populate_data_path_device(self, path):
2305 self.populate_data_path_file(path)
2306
2307 if self.space_dmcrypt is not None:
2308 adjust_symlink(self.space_dmcrypt,
2309 os.path.join(path, self.name + '_dmcrypt'))
2310 else:
2311 try:
2312 os.unlink(os.path.join(path, self.name + '_dmcrypt'))
2313 except OSError:
2314 pass
2315
2316 def prepare(self):
2317 if self.type == self.DEVICE:
2318 self.prepare_device()
2319 elif self.type == self.FILE:
2320 self.prepare_file()
2321 elif self.type == self.NONE:
2322 pass
2323 else:
2324 raise Error('unexpected type ', self.type)
2325
2326 def prepare_file(self):
2327 space_filename = getattr(self.args, self.name)
2328 if not os.path.exists(space_filename):
2329 LOG.debug('Creating %s file %s with size 0'
2330 ' (ceph-osd will resize and allocate)',
2331 self.name,
2332 space_filename)
2333 space_file = open(space_filename, 'wb')
2334 space_file.close()
2335 path_set_context(space_filename)
2336
2337 LOG.debug('%s is file %s',
2338 self.name.capitalize(),
2339 space_filename)
2340 LOG.warning('OSD will not be hot-swappable if %s is '
2341 'not the same device as the osd data' %
2342 self.name)
2343 self.space_symlink = space_filename
2344
2345 def prepare_device(self):
2346 reusing_partition = False
2347
2348 if is_partition(getattr(self.args, self.name)):
2349 LOG.debug('%s %s is a partition',
2350 self.name.capitalize(), getattr(self.args, self.name))
2351 partition = DevicePartition.factory(
2352 path=None, dev=getattr(self.args, self.name), args=self.args)
2353 if isinstance(partition, DevicePartitionCrypt):
2354 raise Error(getattr(self.args, self.name) +
2355 ' partition already exists'
2356 ' and --dmcrypt specified')
2357 LOG.warning('OSD will not be hot-swappable' +
2358 ' if ' + self.name + ' is not' +
2359 ' the same device as the osd data')
2360 if partition.get_ptype() == partition.ptype_for_name(self.name):
2361 LOG.debug('%s %s was previously prepared with '
2362 'ceph-disk. Reusing it.',
2363 self.name.capitalize(),
2364 getattr(self.args, self.name))
2365 reusing_partition = True
2366 # Read and reuse the partition uuid from this journal's
2367 # previous life. We reuse the uuid instead of changing it
2368 # because udev does not reliably notice changes to an
2369 # existing partition's GUID. See
2370 # http://tracker.ceph.com/issues/10146
2371 setattr(self.args, self.name + '_uuid', partition.get_uuid())
2372 LOG.debug('Reusing %s with uuid %s',
2373 self.name,
2374 getattr(self.args, self.name + '_uuid'))
2375 else:
2376 LOG.warning('%s %s was not prepared with '
2377 'ceph-disk. Symlinking directly.',
2378 self.name.capitalize(),
2379 getattr(self.args, self.name))
2380 self.space_symlink = getattr(self.args, self.name)
2381 return
2382
2383 self.space_symlink = '/dev/disk/by-partuuid/{uuid}'.format(
2384 uuid=getattr(self.args, self.name + '_uuid'))
2385
2386 if self.args.dmcrypt:
2387 self.space_dmcrypt = self.space_symlink
2388 self.space_symlink = '/dev/mapper/{uuid}'.format(
2389 uuid=getattr(self.args, self.name + '_uuid'))
2390
2391 if reusing_partition:
2392 # confirm that the space_symlink exists. It should since
2393 # this was an active space
2394 # in the past. Continuing otherwise would be futile.
2395 assert os.path.exists(self.space_symlink)
2396 return
2397
2398 num = self.desired_partition_number()
2399
2400 if num == 0:
2401 LOG.warning('OSD will not be hot-swappable if %s '
2402 'is not the same device as the osd data',
2403 self.name)
2404
2405 device = Device.factory(getattr(self.args, self.name), self.args)
2406 num = device.create_partition(
2407 uuid=getattr(self.args, self.name + '_uuid'),
2408 name=self.name,
2409 size=self.space_size,
2410 num=num)
2411
2412 partition = device.get_partition(num)
2413
2414 LOG.debug('%s is GPT partition %s',
2415 self.name.capitalize(),
2416 self.space_symlink)
2417
2418 if isinstance(partition, DevicePartitionCrypt):
2419 partition.format()
2420 partition.map()
2421
2422 command_check_call(
2423 [
2424 'sgdisk',
2425 '--typecode={num}:{uuid}'.format(
2426 num=num,
2427 uuid=partition.ptype_for_name(self.name),
2428 ),
2429 '--',
2430 getattr(self.args, self.name),
2431 ],
2432 )
2433 update_partition(getattr(self.args, self.name), 'prepared')
2434
2435 LOG.debug('%s is GPT partition %s',
2436 self.name.capitalize(),
2437 self.space_symlink)
2438
2439
2440class PrepareJournal(PrepareSpace):
2441
2442 def __init__(self, args):
2443 self.name = 'journal'
2444 (self.allows_journal,
2445 self.wants_journal,
2446 self.needs_journal) = check_journal_reqs(args)
2447
2448 if args.journal and not self.allows_journal:
2449 raise Error('journal specified but not allowed by osd backend')
2450
2451 super(PrepareJournal, self).__init__(args)
2452
2453 def wants_space(self):
2454 return self.wants_journal
2455
2456 def get_space_size(self):
2457 return int(get_conf_with_default(
2458 cluster=self.args.cluster,
2459 variable='osd_journal_size',
2460 ))
2461
2462 def desired_partition_number(self):
2463 if self.args.journal == self.args.data:
2464 # we're sharing the disk between osd data and journal;
2465 # make journal be partition number 2
2466 num = 2
2467 else:
2468 num = 0
2469 return num
2470
2471 @staticmethod
2472 def parser():
2473 return PrepareSpace.parser('journal')
2474
2475
2476class PrepareBluestoreBlock(PrepareSpace):
2477
2478 def __init__(self, args):
2479 self.name = 'block'
2480 super(PrepareBluestoreBlock, self).__init__(args)
2481
2482 def get_space_size(self):
2483 block_size = get_conf(
2484 cluster=self.args.cluster,
2485 variable='bluestore_block_size',
2486 )
2487
2488 if block_size is None:
2489 return 0 # get as much space as possible
2490 else:
2491 return int(block_size) / 1048576 # MB
2492
2493 def desired_partition_number(self):
2494 if self.args.block == self.args.data:
2495 num = 2
2496 else:
2497 num = 0
2498 return num
2499
2500 @staticmethod
2501 def parser():
2502 return PrepareSpace.parser('block')
2503
2504
2505class PrepareBluestoreBlockDB(PrepareSpace):
2506
2507 def __init__(self, args):
2508 self.name = 'block.db'
2509 super(PrepareBluestoreBlockDB, self).__init__(args)
2510
2511 def get_space_size(self):
31f18b77 2512 block_db_size = get_conf(
7c673cae
FG
2513 cluster=self.args.cluster,
2514 variable='bluestore_block_db_size',
2515 )
2516
31f18b77
FG
2517 if block_db_size is None or int(block_db_size) == 0:
2518 block_size = get_conf(
2519 cluster=self.args.cluster,
2520 variable='bluestore_block_size',
2521 )
2522 if block_size is None:
2523 return 1024 # MB
2524 size = int(block_size) / 100 / 1048576
2525 return max(size, 1024) # MB
7c673cae 2526 else:
31f18b77 2527 return int(block_db_size) / 1048576 # MB
7c673cae
FG
2528
2529 def desired_partition_number(self):
2530 if getattr(self.args, 'block.db') == self.args.data:
2531 num = 3
2532 else:
2533 num = 0
2534 return num
2535
2536 def wants_space(self):
2537 return False
2538
2539 @staticmethod
2540 def parser():
2541 parser = PrepareSpace.parser('block.db', positional=False)
2542 parser.add_argument(
2543 '--block.db',
2544 metavar='BLOCKDB',
2545 help='path to the device or file for bluestore block.db',
2546 )
2547 return parser
2548
2549
2550class PrepareBluestoreBlockWAL(PrepareSpace):
2551
2552 def __init__(self, args):
2553 self.name = 'block.wal'
2554 super(PrepareBluestoreBlockWAL, self).__init__(args)
2555
2556 def get_space_size(self):
2557 block_size = get_conf(
2558 cluster=self.args.cluster,
2559 variable='bluestore_block_wal_size',
2560 )
2561
2562 if block_size is None:
2563 return 576 # MB, default value
2564 else:
2565 return int(block_size) / 1048576 # MB
2566
2567 def desired_partition_number(self):
2568 if getattr(self.args, 'block.wal') == self.args.data:
2569 num = 4
2570 else:
2571 num = 0
2572 return num
2573
2574 def wants_space(self):
2575 return False
2576
2577 @staticmethod
2578 def parser():
2579 parser = PrepareSpace.parser('block.wal', positional=False)
2580 parser.add_argument(
2581 '--block.wal',
2582 metavar='BLOCKWAL',
2583 help='path to the device or file for bluestore block.wal',
2584 )
2585 return parser
2586
2587
2588class CryptHelpers(object):
2589
2590 @staticmethod
2591 def get_cryptsetup_parameters(args):
2592 cryptsetup_parameters_str = get_conf(
2593 cluster=args.cluster,
2594 variable='osd_cryptsetup_parameters',
2595 )
2596 if cryptsetup_parameters_str is None:
2597 return []
2598 else:
2599 return shlex.split(cryptsetup_parameters_str)
2600
2601 @staticmethod
2602 def get_dmcrypt_keysize(args):
2603 dmcrypt_keysize_str = get_conf(
2604 cluster=args.cluster,
2605 variable='osd_dmcrypt_key_size',
2606 )
2607 dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
2608 if dmcrypt_type == 'luks':
2609 if dmcrypt_keysize_str is None:
2610 # As LUKS will hash the 'passphrase' in .luks.key
2611 # into a key, set a large default
2612 # so if not updated for some time, it is still a
2613 # reasonable value.
2614 #
2615 return 1024
2616 else:
2617 return int(dmcrypt_keysize_str)
2618 elif dmcrypt_type == 'plain':
2619 if dmcrypt_keysize_str is None:
2620 # This value is hard-coded in the udev script
2621 return 256
2622 else:
2623 LOG.warning('ensure the 95-ceph-osd.rules file has '
2624 'been copied to /etc/udev/rules.d '
2625 'and modified to call cryptsetup '
2626 'with --key-size=%s' % dmcrypt_keysize_str)
2627 return int(dmcrypt_keysize_str)
2628 else:
2629 return 0
2630
2631 @staticmethod
2632 def get_dmcrypt_type(args):
2633 if hasattr(args, 'dmcrypt') and args.dmcrypt:
2634 dmcrypt_type = get_conf(
2635 cluster=args.cluster,
2636 variable='osd_dmcrypt_type',
2637 )
2638
2639 if dmcrypt_type is None or dmcrypt_type == 'luks':
2640 return 'luks'
2641 elif dmcrypt_type == 'plain':
2642 return 'plain'
2643 else:
2644 raise Error('invalid osd_dmcrypt_type parameter '
2645 '(must be luks or plain): ', dmcrypt_type)
2646 else:
2647 return None
2648
2649
c07f9fc5
FG
2650class Secrets(object):
2651
2652 def __init__(self):
2653 secret, stderr, ret = command(['ceph-authtool', '--gen-print-key'])
2654 LOG.debug("stderr " + stderr)
2655 assert ret == 0
2656 self.keys = {
2657 'cephx_secret': secret.strip(),
2658 }
2659
2660 def write_osd_keyring(self, keyring, osd_id):
2661 command_check_call(
2662 [
2663 'ceph-authtool', keyring,
2664 '--create-keyring',
2665 '--name', 'osd.' + str(osd_id),
2666 '--add-key', self.keys['cephx_secret'],
2667 ])
2668 path_set_context(keyring)
2669
2670 def get_json(self):
2671 return bytearray(json.dumps(self.keys), 'ascii')
2672
2673
2674class LockboxSecrets(Secrets):
2675
2676 def __init__(self, args):
2677 super(LockboxSecrets, self).__init__()
2678
2679 key_size = CryptHelpers.get_dmcrypt_keysize(args)
2680 key = open('/dev/urandom', 'rb').read(key_size / 8)
2681 base64_key = base64.b64encode(key).decode('ascii')
2682
2683 secret, stderr, ret = command(['ceph-authtool', '--gen-print-key'])
2684 LOG.debug("stderr " + stderr)
2685 assert ret == 0
2686
2687 self.keys.update({
2688 'dmcrypt_key': base64_key,
2689 'cephx_lockbox_secret': secret.strip(),
2690 })
2691
2692 def write_lockbox_keyring(self, path, osd_uuid):
2693 keyring = os.path.join(path, 'keyring')
2694 command_check_call(
2695 [
2696 'ceph-authtool', keyring,
2697 '--create-keyring',
2698 '--name', 'client.osd-lockbox.' + osd_uuid,
2699 '--add-key', self.keys['cephx_lockbox_secret'],
2700 ])
2701 path_set_context(keyring)
2702
2703
7c673cae
FG
2704class Lockbox(object):
2705
2706 def __init__(self, args):
2707 self.args = args
2708 self.partition = None
2709 self.device = None
2710
2711 if hasattr(self.args, 'lockbox') and self.args.lockbox is None:
2712 self.args.lockbox = self.args.data
2713
2714 def set_partition(self, partition):
2715 self.partition = partition
2716
2717 @staticmethod
2718 def parser():
2719 parser = argparse.ArgumentParser(add_help=False)
2720 parser.add_argument(
2721 '--lockbox',
2722 help='path to the device to store the lockbox',
2723 )
2724 parser.add_argument(
2725 '--lockbox-uuid',
2726 metavar='UUID',
2727 help='unique lockbox uuid',
2728 )
2729 return parser
2730
2731 def create_partition(self):
2732 self.device = Device.factory(self.args.lockbox, argparse.Namespace())
c07f9fc5 2733 partition_number = 5
7c673cae
FG
2734 self.device.create_partition(uuid=self.args.lockbox_uuid,
2735 name='lockbox',
2736 num=partition_number,
2737 size=10) # MB
2738 return self.device.get_partition(partition_number)
2739
2740 def set_or_create_partition(self):
2741 if is_partition(self.args.lockbox):
2742 LOG.debug('OSD lockbox device %s is a partition',
2743 self.args.lockbox)
2744 self.partition = DevicePartition.factory(
2745 path=None, dev=self.args.lockbox, args=self.args)
2746 ptype = self.partition.get_ptype()
2747 ready = Ptype.get_ready_by_name('lockbox')
2748 if ptype not in ready:
2749 LOG.warning('incorrect partition UUID: %s, expected %s'
2750 % (ptype, str(ready)))
2751 else:
2752 LOG.debug('Creating osd partition on %s',
2753 self.args.lockbox)
2754 self.partition = self.create_partition()
2755
2756 def create_key(self):
7c673cae
FG
2757 cluster = self.args.cluster
2758 bootstrap = self.args.prepare_key_template.format(cluster=cluster,
2759 statedir=STATEDIR)
c07f9fc5
FG
2760 path = self.get_mount_point()
2761 secrets = LockboxSecrets(self.args)
2762 id_arg = self.args.osd_id and [self.args.osd_id] or []
2763 osd_id = command_with_stdin(
7c673cae
FG
2764 [
2765 'ceph',
2766 '--cluster', cluster,
2767 '--name', 'client.bootstrap-osd',
2768 '--keyring', bootstrap,
c07f9fc5
FG
2769 '-i', '-',
2770 'osd', 'new', self.args.osd_uuid,
2771 ] + id_arg,
2772 secrets.get_json()
7c673cae 2773 )
c07f9fc5
FG
2774 secrets.write_lockbox_keyring(path, self.args.osd_uuid)
2775 osd_id = must_be_one_line(osd_id)
2776 check_osd_id(osd_id)
2777 write_one_line(path, 'whoami', osd_id)
2778 secrets.write_osd_keyring(os.path.join(path, 'osd_keyring'), osd_id)
7c673cae
FG
2779 write_one_line(path, 'key-management-mode', KEY_MANAGEMENT_MODE_V1)
2780
2781 def symlink_spaces(self, path):
2782 target = self.get_mount_point()
2783 for name in Space.NAMES:
2784 if (hasattr(self.args, name + '_uuid') and
2785 getattr(self.args, name + '_uuid')):
2786 uuid = getattr(self.args, name + '_uuid')
2787 symlink = os.path.join(STATEDIR, 'osd-lockbox', uuid)
2788 adjust_symlink(target, symlink)
2789 write_one_line(path, name + '-uuid', uuid)
2790
2791 def populate(self):
2792 maybe_mkdir(os.path.join(STATEDIR, 'osd-lockbox'))
2793 args = ['mkfs', '-t', 'ext4', self.partition.get_dev()]
2794 LOG.debug('Creating lockbox fs on %s: ' + str(" ".join(args)))
2795 command_check_call(args)
2796 path = self.get_mount_point()
2797 maybe_mkdir(path)
2798 args = ['mount', '-t', 'ext4', self.partition.get_dev(), path]
2799 LOG.debug('Mounting lockbox ' + str(" ".join(args)))
2800 command_check_call(args)
2801 write_one_line(path, 'osd-uuid', self.args.osd_uuid)
2802 if self.args.cluster_uuid is None:
2803 self.args.cluster_uuid = get_fsid(cluster=self.args.cluster)
2804 write_one_line(path, 'ceph_fsid', self.args.cluster_uuid)
2805 self.create_key()
2806 self.symlink_spaces(path)
2807 write_one_line(path, 'magic', CEPH_LOCKBOX_ONDISK_MAGIC)
2808 if self.device is not None:
2809 command_check_call(
2810 [
2811 'sgdisk',
2812 '--typecode={num}:{uuid}'.format(
2813 num=self.partition.get_partition_number(),
2814 uuid=self.partition.ptype_for_name('lockbox'),
2815 ),
2816 '--',
2817 get_partition_base(self.partition.get_dev()),
2818 ],
2819 )
2820
2821 def get_mount_point(self):
2822 return os.path.join(STATEDIR, 'osd-lockbox', self.args.osd_uuid)
2823
2824 def get_osd_uuid(self):
2825 return self.args.osd_uuid
2826
2827 def activate(self):
2828 path = is_mounted(self.partition.get_dev())
2829 if path:
2830 LOG.info("Lockbox already mounted at " + path)
2831 return
2832
2833 path = tempfile.mkdtemp(
2834 prefix='mnt.',
2835 dir=STATEDIR + '/tmp',
2836 )
2837 args = ['mount', '-t', 'ext4', '-o', 'ro',
2838 self.partition.get_dev(),
2839 path]
2840 LOG.debug('Mounting lockbox temporarily ' + str(" ".join(args)))
2841 command_check_call(args)
2842 self.args.osd_uuid = get_oneliner(path, 'osd-uuid')
2843 command_check_call(['umount', path])
2844 LOG.debug('Mounting lockbox readonly ' + str(" ".join(args)))
2845 args = ['mount', '-t', 'ext4', '-o', 'ro',
2846 self.partition.get_dev(),
2847 self.get_mount_point()]
2848 command_check_call(args)
2849 for name in Space.NAMES + ('osd',):
2850 uuid_path = os.path.join(self.get_mount_point(), name + '-uuid')
2851 if os.path.exists(uuid_path):
2852 uuid = get_oneliner(self.get_mount_point(), name + '-uuid')
2853 dev = os.path.join('/dev/disk/by-partuuid/', uuid.lower())
2854 args = ['ceph-disk', 'trigger', dev]
2855 command_check_call(args)
2856
2857 def prepare(self):
2858 verify_not_in_use(self.args.lockbox, check_partitions=True)
2859 self.set_or_create_partition()
2860 self.populate()
2861
2862
2863class PrepareData(object):
2864
2865 FILE = 1
2866 DEVICE = 2
2867
2868 def __init__(self, args):
2869
2870 self.args = args
2871 self.partition = None
2872 self.set_type()
2873 if self.args.cluster_uuid is None:
2874 self.args.cluster_uuid = get_fsid(cluster=self.args.cluster)
2875
2876 if self.args.osd_uuid is None:
2877 self.args.osd_uuid = str(uuid.uuid4())
2878
2879 def set_type(self):
2880 dmode = os.stat(self.args.data).st_mode
2881
2882 if stat.S_ISDIR(dmode):
2883 self.type = self.FILE
224ce89b 2884 elif stmode_is_diskdevice(dmode):
7c673cae
FG
2885 self.type = self.DEVICE
2886 else:
2887 raise Error('not a dir or block device', self.args.data)
2888
2889 def is_file(self):
2890 return self.type == self.FILE
2891
2892 def is_device(self):
2893 return self.type == self.DEVICE
2894
2895 @staticmethod
2896 def parser():
2897 parser = argparse.ArgumentParser(add_help=False)
2898 parser.add_argument(
2899 '--fs-type',
2900 help='file system type to use (e.g. "ext4")',
2901 )
2902 parser.add_argument(
2903 '--zap-disk',
2904 action='store_true', default=None,
2905 help='destroy the partition table (and content) of a disk',
2906 )
2907 parser.add_argument(
2908 '--data-dir',
2909 action='store_true', default=None,
2910 help='verify that DATA is a dir',
2911 )
2912 parser.add_argument(
2913 '--data-dev',
2914 action='store_true', default=None,
2915 help='verify that DATA is a block device',
2916 )
2917 parser.add_argument(
2918 'data',
2919 metavar='DATA',
2920 help='path to OSD data (a disk block device or directory)',
2921 )
2922 return parser
2923
2924 def populate_data_path_file(self, path, *to_prepare_list):
2925 self.populate_data_path(path, *to_prepare_list)
2926
2927 def populate_data_path(self, path, *to_prepare_list):
2928 if os.path.exists(os.path.join(path, 'magic')):
2929 LOG.debug('Data dir %s already exists', path)
2930 return
2931 else:
2932 LOG.debug('Preparing osd data dir %s', path)
2933
2934 if self.args.osd_uuid is None:
2935 self.args.osd_uuid = str(uuid.uuid4())
2936
2937 write_one_line(path, 'ceph_fsid', self.args.cluster_uuid)
2938 write_one_line(path, 'fsid', self.args.osd_uuid)
c07f9fc5
FG
2939 if self.args.osd_id:
2940 write_one_line(path, 'wanttobe', self.args.osd_id)
7c673cae
FG
2941 if self.args.crush_device_class:
2942 write_one_line(path, 'crush_device_class',
2943 self.args.crush_device_class)
2944 write_one_line(path, 'magic', CEPH_OSD_ONDISK_MAGIC)
2945
2946 for to_prepare in to_prepare_list:
2947 to_prepare.populate_data_path(path)
2948
2949 def prepare(self, *to_prepare_list):
2950 if self.type == self.DEVICE:
2951 self.prepare_device(*to_prepare_list)
2952 elif self.type == self.FILE:
2953 self.prepare_file(*to_prepare_list)
2954 else:
2955 raise Error('unexpected type ', self.type)
2956
2957 def prepare_file(self, *to_prepare_list):
2958
2959 if not os.path.exists(self.args.data):
2960 raise Error('data path for directory does not exist',
2961 self.args.data)
2962
2963 if self.args.data_dev:
2964 raise Error('data path is not a block device', self.args.data)
2965
2966 for to_prepare in to_prepare_list:
2967 to_prepare.prepare()
2968
2969 self.populate_data_path_file(self.args.data, *to_prepare_list)
2970
2971 def sanity_checks(self):
2972 if not os.path.exists(self.args.data):
2973 raise Error('data path for device does not exist',
2974 self.args.data)
2975 verify_not_in_use(self.args.data,
2976 check_partitions=not self.args.dmcrypt)
2977
2978 def set_variables(self):
2979 if self.args.fs_type is None:
2980 self.args.fs_type = get_conf(
2981 cluster=self.args.cluster,
2982 variable='osd_mkfs_type',
2983 )
2984 if self.args.fs_type is None:
2985 self.args.fs_type = get_conf(
2986 cluster=self.args.cluster,
2987 variable='osd_fs_type',
2988 )
2989 if self.args.fs_type is None:
2990 self.args.fs_type = DEFAULT_FS_TYPE
2991
2992 self.mkfs_args = get_conf(
2993 cluster=self.args.cluster,
2994 variable='osd_mkfs_options_{fstype}'.format(
2995 fstype=self.args.fs_type,
2996 ),
2997 )
2998 if self.mkfs_args is None:
2999 self.mkfs_args = get_conf(
3000 cluster=self.args.cluster,
3001 variable='osd_fs_mkfs_options_{fstype}'.format(
3002 fstype=self.args.fs_type,
3003 ),
3004 )
3005
3006 self.mount_options = get_mount_options(cluster=self.args.cluster,
3007 fs_type=self.args.fs_type)
3008
3009 if self.args.osd_uuid is None:
3010 self.args.osd_uuid = str(uuid.uuid4())
3011
3012 def prepare_device(self, *to_prepare_list):
3013 self.sanity_checks()
3014 self.set_variables()
3015 if self.args.zap_disk is not None:
3016 zap(self.args.data)
3017
3018 def create_data_partition(self):
3019 device = Device.factory(self.args.data, self.args)
3020 partition_number = 1
3021 device.create_partition(uuid=self.args.osd_uuid,
3022 name='data',
3023 num=partition_number,
3024 size=self.get_space_size())
3025 return device.get_partition(partition_number)
3026
3027 def set_data_partition(self):
3028 if is_partition(self.args.data):
3029 LOG.debug('OSD data device %s is a partition',
3030 self.args.data)
3031 self.partition = DevicePartition.factory(
3032 path=None, dev=self.args.data, args=self.args)
3033 ptype = self.partition.get_ptype()
3034 ready = Ptype.get_ready_by_name('osd')
3035 if ptype not in ready:
3036 LOG.warning('incorrect partition UUID: %s, expected %s'
3037 % (ptype, str(ready)))
3038 else:
3039 LOG.debug('Creating osd partition on %s',
3040 self.args.data)
3041 self.partition = self.create_data_partition()
3042
3043 def populate_data_path_device(self, *to_prepare_list):
3044 partition = self.partition
3045
3046 if isinstance(partition, DevicePartitionCrypt):
3047 partition.map()
3048
3049 try:
3050 args = [
3051 'mkfs',
3052 '-t',
3053 self.args.fs_type,
3054 ]
3055 if self.mkfs_args is not None:
3056 args.extend(self.mkfs_args.split())
3057 if self.args.fs_type == 'xfs':
3058 args.extend(['-f']) # always force
3059 else:
3060 args.extend(MKFS_ARGS.get(self.args.fs_type, []))
3061 args.extend([
3062 '--',
3063 partition.get_dev(),
3064 ])
3065 LOG.debug('Creating %s fs on %s',
3066 self.args.fs_type, partition.get_dev())
3067 command_check_call(args, exit=True)
3068
3069 path = mount(dev=partition.get_dev(),
3070 fstype=self.args.fs_type,
3071 options=self.mount_options)
3072
3073 try:
3074 self.populate_data_path(path, *to_prepare_list)
3075 finally:
3076 path_set_context(path)
3077 unmount(path)
3078 finally:
3079 if isinstance(partition, DevicePartitionCrypt):
3080 partition.unmap()
3081
3082 if not is_partition(self.args.data):
3083 command_check_call(
3084 [
3085 'sgdisk',
3086 '--typecode=%d:%s' % (partition.get_partition_number(),
3087 partition.ptype_for_name('osd')),
3088 '--',
3089 self.args.data,
3090 ],
3091 exit=True,
3092 )
3093 update_partition(self.args.data, 'prepared')
3094 command_check_call(['udevadm', 'trigger',
3095 '--action=add',
3096 '--sysname-match',
3097 os.path.basename(partition.rawdev)])
3098
3099
3100class PrepareFilestoreData(PrepareData):
3101
3102 def get_space_size(self):
3103 return 0 # get as much space as possible
3104
3105 def prepare_device(self, *to_prepare_list):
3106 super(PrepareFilestoreData, self).prepare_device(*to_prepare_list)
3107 for to_prepare in to_prepare_list:
3108 to_prepare.prepare()
3109 self.set_data_partition()
3110 self.populate_data_path_device(*to_prepare_list)
3111
31f18b77
FG
3112 def populate_data_path(self, path, *to_prepare_list):
3113 super(PrepareFilestoreData, self).populate_data_path(path,
3114 *to_prepare_list)
3115 write_one_line(path, 'type', 'filestore')
3116
7c673cae
FG
3117
3118class PrepareBluestoreData(PrepareData):
3a9019d9 3119 SPACE_SIZE = 100
7c673cae
FG
3120
3121 def get_space_size(self):
3a9019d9 3122 return self.SPACE_SIZE # MB
7c673cae
FG
3123
3124 def prepare_device(self, *to_prepare_list):
3125 super(PrepareBluestoreData, self).prepare_device(*to_prepare_list)
3126 self.set_data_partition()
3127 for to_prepare in to_prepare_list:
3128 to_prepare.prepare()
3129 self.populate_data_path_device(*to_prepare_list)
3130
3131 def populate_data_path(self, path, *to_prepare_list):
3132 super(PrepareBluestoreData, self).populate_data_path(path,
3133 *to_prepare_list)
3134 write_one_line(path, 'type', 'bluestore')
3135
3136
7c673cae
FG
3137def mkfs(
3138 path,
3139 cluster,
3140 osd_id,
3141 fsid,
3142 keyring,
3143):
3144 monmap = os.path.join(path, 'activate.monmap')
3145 command_check_call(
3146 [
3147 'ceph',
3148 '--cluster', cluster,
3149 '--name', 'client.bootstrap-osd',
3150 '--keyring', keyring,
3151 'mon', 'getmap', '-o', monmap,
3152 ],
3153 )
3154
3155 osd_type = read_one_line(path, 'type')
3156
3157 if osd_type == 'bluestore':
c07f9fc5 3158 command_check_call(
7c673cae
FG
3159 [
3160 'ceph-osd',
3161 '--cluster', cluster,
3162 '--mkfs',
7c673cae
FG
3163 '-i', osd_id,
3164 '--monmap', monmap,
3165 '--osd-data', path,
3166 '--osd-uuid', fsid,
7c673cae
FG
3167 '--setuser', get_ceph_user(),
3168 '--setgroup', get_ceph_group(),
3169 ],
3170 )
31f18b77 3171 elif osd_type == 'filestore':
c07f9fc5 3172 command_check_call(
7c673cae
FG
3173 [
3174 'ceph-osd',
3175 '--cluster', cluster,
3176 '--mkfs',
7c673cae
FG
3177 '-i', osd_id,
3178 '--monmap', monmap,
3179 '--osd-data', path,
3180 '--osd-journal', os.path.join(path, 'journal'),
3181 '--osd-uuid', fsid,
7c673cae
FG
3182 '--setuser', get_ceph_user(),
3183 '--setgroup', get_ceph_group(),
3184 ],
3185 )
31f18b77
FG
3186 else:
3187 raise Error('unrecognized objectstore type %s' % osd_type)
7c673cae
FG
3188
3189
7c673cae
FG
3190def get_mount_point(cluster, osd_id):
3191 parent = STATEDIR + '/osd'
3192 return os.path.join(
3193 parent,
3194 '{cluster}-{osd_id}'.format(cluster=cluster, osd_id=osd_id),
3195 )
3196
3197
3198def move_mount(
3199 dev,
3200 path,
3201 cluster,
3202 osd_id,
3203 fstype,
3204 mount_options,
3205):
3206 LOG.debug('Moving mount to final location...')
3207 osd_data = get_mount_point(cluster, osd_id)
3208 maybe_mkdir(osd_data)
3209
3210 # pick best-of-breed mount options based on fs type
3211 if mount_options is None:
3212 mount_options = MOUNT_OPTIONS.get(fstype, '')
3213
3214 # we really want to mount --move, but that is not supported when
3215 # the parent mount is shared, as it is by default on RH, Fedora,
3216 # and probably others. Also, --bind doesn't properly manipulate
3217 # /etc/mtab, which *still* isn't a symlink to /proc/mounts despite
3218 # this being 2013. Instead, mount the original device at the final
3219 # location.
3220 command_check_call(
3221 [
3222 '/bin/mount',
3223 '-o',
3224 mount_options,
3225 '--',
3226 dev,
3227 osd_data,
3228 ],
3229 )
3230 command_check_call(
3231 [
3232 '/bin/umount',
3233 '-l', # lazy, in case someone else is peeking at the
3234 # wrong moment
3235 '--',
3236 path,
3237 ],
3238 )
3239
3240
3241#
3242# For upgrade purposes, to make sure there are no competing units,
3243# both --runtime unit and the default should be disabled. There can be
3244# two units at the same time: one with --runtime and another without
3245# it. If, for any reason (manual or ceph-disk) the two units co-exist
3246# they will compete with each other.
3247#
3248def systemd_disable(
3249 path,
3250 osd_id,
3251):
3252 # ensure there is no duplicate ceph-osd@.service
3253 for style in ([], ['--runtime']):
3254 command_check_call(
3255 [
3256 'systemctl',
3257 'disable',
3258 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3259 ] + style,
3260 )
3261
3262
3263def systemd_start(
3264 path,
3265 osd_id,
3266):
3267 systemd_disable(path, osd_id)
3efd9988 3268 if os.path.ismount(path):
7c673cae
FG
3269 style = ['--runtime']
3270 else:
3271 style = []
3272 command_check_call(
3273 [
3274 'systemctl',
3275 'enable',
3276 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3277 ] + style,
3278 )
3279 command_check_call(
3280 [
3281 'systemctl',
3282 'start',
3283 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3284 ],
3285 )
3286
3287
3288def systemd_stop(
3289 path,
3290 osd_id,
3291):
3292 systemd_disable(path, osd_id)
3293 command_check_call(
3294 [
3295 'systemctl',
3296 'stop',
3297 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3298 ],
3299 )
3300
3301
3302def start_daemon(
3303 cluster,
3304 osd_id,
3305):
3306 LOG.debug('Starting %s osd.%s...', cluster, osd_id)
3307
3308 path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3309 cluster=cluster, osd_id=osd_id)
3310
3311 try:
3312 if os.path.exists(os.path.join(path, 'upstart')):
3313 command_check_call(
3314 [
3315 '/sbin/initctl',
3316 # use emit, not start, because start would fail if the
3317 # instance was already running
3318 'emit',
3319 # since the daemon starting doesn't guarantee much about
3320 # the service being operational anyway, don't bother
3321 # waiting for it
3322 '--no-wait',
3323 '--',
3324 'ceph-osd',
3325 'cluster={cluster}'.format(cluster=cluster),
3326 'id={osd_id}'.format(osd_id=osd_id),
3327 ],
3328 )
3329 elif os.path.exists(os.path.join(path, 'sysvinit')):
3330 if os.path.exists('/usr/sbin/service'):
3331 svc = '/usr/sbin/service'
3332 else:
3333 svc = '/sbin/service'
3334 command_check_call(
3335 [
3336 svc,
3337 'ceph',
3338 '--cluster',
3339 '{cluster}'.format(cluster=cluster),
3340 'start',
3341 'osd.{osd_id}'.format(osd_id=osd_id),
3342 ],
3343 )
3344 elif os.path.exists(os.path.join(path, 'systemd')):
3345 systemd_start(path, osd_id)
3346 elif os.path.exists(os.path.join(path, 'openrc')):
3347 base_script = '/etc/init.d/ceph-osd'
3348 osd_script = '{base}.{osd_id}'.format(
3349 base=base_script,
3350 osd_id=osd_id
3351 )
3352 if not os.path.exists(osd_script):
3353 os.symlink(base_script, osd_script)
3354 command_check_call(
3355 [
3356 osd_script,
3357 'start',
3358 ],
3359 )
3360 elif os.path.exists(os.path.join(path, 'bsdrc')):
3361 command_check_call(
3362 [
31f18b77
FG
3363 '/usr/sbin/service', 'ceph', 'start',
3364 'osd.{osd_id}'.format(osd_id=osd_id),
7c673cae
FG
3365 ],
3366 )
3367 else:
3368 raise Error('{cluster} osd.{osd_id} '
3369 'is not tagged with an init system'
3370 .format(
3371 cluster=cluster,
3372 osd_id=osd_id,
3373 ))
3374 except subprocess.CalledProcessError as e:
3375 raise Error('ceph osd start failed', e)
3376
3377
3378def stop_daemon(
3379 cluster,
3380 osd_id,
3381):
3382 LOG.debug('Stoping %s osd.%s...', cluster, osd_id)
3383
3384 path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3385 cluster=cluster, osd_id=osd_id)
3386
3387 try:
3388 if os.path.exists(os.path.join(path, 'upstart')):
3389 command_check_call(
3390 [
3391 '/sbin/initctl',
3392 'stop',
3393 'ceph-osd',
3394 'cluster={cluster}'.format(cluster=cluster),
3395 'id={osd_id}'.format(osd_id=osd_id),
3396 ],
3397 )
3398 elif os.path.exists(os.path.join(path, 'sysvinit')):
3399 svc = which('service')
3400 command_check_call(
3401 [
3402 svc,
3403 'ceph',
3404 '--cluster',
3405 '{cluster}'.format(cluster=cluster),
3406 'stop',
3407 'osd.{osd_id}'.format(osd_id=osd_id),
3408 ],
3409 )
3410 elif os.path.exists(os.path.join(path, 'systemd')):
3411 systemd_stop(path, osd_id)
3412 elif os.path.exists(os.path.join(path, 'openrc')):
3413 command_check_call(
3414 [
3415 '/etc/init.d/ceph-osd.{osd_id}'.format(osd_id=osd_id),
3416 'stop',
3417 ],
3418 )
3419 elif os.path.exists(os.path.join(path, 'bsdrc')):
3420 command_check_call(
3421 [
3422 '/usr/local/etc/rc.d/ceph stop osd.{osd_id}'
3423 .format(osd_id=osd_id),
3424 ],
3425 )
3426 else:
3427 raise Error('{cluster} osd.{osd_id} '
3428 'is not tagged with an init system'
3429 .format(cluster=cluster, osd_id=osd_id))
3430 except subprocess.CalledProcessError as e:
3431 raise Error('ceph osd stop failed', e)
3432
3433
3434def detect_fstype(dev):
3435 if FREEBSD:
3436 fstype = _check_output(
3437 args=[
3438 'fstyp',
3439 '-u',
3440 dev,
3441 ],
3442 )
3443 else:
3444 fstype = _check_output(
3445 args=[
3446 '/sbin/blkid',
3447 # we don't want stale cached results
3448 '-p',
3449 '-s', 'TYPE',
3450 '-o', 'value',
3451 '--',
3452 dev,
3453 ],
3454 )
3455 fstype = must_be_one_line(fstype)
3456 return fstype
3457
3458
3459def dmcrypt_is_mapped(uuid):
3460 path = os.path.join('/dev/mapper', uuid)
3461 if os.path.exists(path):
3462 return path
3463 else:
3464 return None
3465
3466
3467def dmcrypt_map(dev, dmcrypt_key_dir):
3468 ptype = get_partition_type(dev)
3469 if ptype in Ptype.get_ready_by_type('plain'):
3470 luks = False
3471 cryptsetup_parameters = ['--key-size', '256']
3472 elif ptype in Ptype.get_ready_by_type('luks'):
3473 luks = True
3474 cryptsetup_parameters = []
3475 else:
3476 raise Error('--dmcrypt called for dev %s with invalid ptype %s'
3477 % (dev, ptype))
3478 part_uuid = get_partition_uuid(dev)
3479 dmcrypt_key = get_dmcrypt_key(part_uuid, dmcrypt_key_dir, luks)
3480 return _dmcrypt_map(
3481 rawdev=dev,
3482 key=dmcrypt_key,
3483 _uuid=part_uuid,
3484 cryptsetup_parameters=cryptsetup_parameters,
3485 luks=luks,
3486 format_dev=False,
3487 )
3488
3489
3490def mount_activate(
3491 dev,
3492 activate_key_template,
3493 init,
3494 dmcrypt,
3495 dmcrypt_key_dir,
3496 reactivate=False,
3497):
3498
3499 if dmcrypt:
3500 part_uuid = get_partition_uuid(dev)
3501 dev = dmcrypt_map(dev, dmcrypt_key_dir)
3502 try:
3503 fstype = detect_fstype(dev=dev)
3504 except (subprocess.CalledProcessError,
3505 TruncatedLineError,
3506 TooManyLinesError) as e:
3507 raise FilesystemTypeError(
3508 'device {dev}'.format(dev=dev),
3509 e,
3510 )
3511
3512 # TODO always using mount options from cluster=ceph for
3513 # now; see http://tracker.newdream.net/issues/3253
3514 mount_options = get_mount_options(cluster='ceph', fs_type=fstype)
3515
3516 path = mount(dev=dev, fstype=fstype, options=mount_options)
3517
3518 # check if the disk is deactive, change the journal owner, group
3519 # mode for correct user and group.
3520 if os.path.exists(os.path.join(path, 'deactive')):
3521 # logging to syslog will help us easy to know udev triggered failure
3522 if not reactivate:
3523 unmount(path)
3524 # we need to unmap again because dmcrypt map will create again
3525 # on bootup stage (due to deactivate)
3526 if '/dev/mapper/' in dev:
3527 part_uuid = dev.replace('/dev/mapper/', '')
3528 dmcrypt_unmap(part_uuid)
3529 LOG.info('OSD deactivated! reactivate with: --reactivate')
3530 raise Error('OSD deactivated! reactivate with: --reactivate')
3531 # flag to activate a deactive osd.
3532 deactive = True
3533 else:
3534 deactive = False
3535
3536 osd_id = None
3537 cluster = None
3538 try:
3539 (osd_id, cluster) = activate(path, activate_key_template, init)
3540
3541 # Now active successfully
3542 # If we got reactivate and deactive, remove the deactive file
3543 if deactive and reactivate:
3544 os.remove(os.path.join(path, 'deactive'))
3545 LOG.info('Remove `deactive` file.')
3546
3547 # check if the disk is already active, or if something else is already
3548 # mounted there
3549 active = False
3550 other = False
3551 src_dev = os.stat(path).st_dev
3552 try:
3553 dst_dev = os.stat((STATEDIR + '/osd/{cluster}-{osd_id}').format(
3554 cluster=cluster,
3555 osd_id=osd_id)).st_dev
3556 if src_dev == dst_dev:
3557 active = True
3558 else:
3559 parent_dev = os.stat(STATEDIR + '/osd').st_dev
3560 if dst_dev != parent_dev:
3561 other = True
3562 elif os.listdir(get_mount_point(cluster, osd_id)):
3563 LOG.info(get_mount_point(cluster, osd_id) +
3564 " is not empty, won't override")
3565 other = True
3566
3567 except OSError:
3568 pass
3569
3570 if active:
3571 LOG.info('%s osd.%s already mounted in position; unmounting ours.'
3572 % (cluster, osd_id))
3573 unmount(path)
3574 elif other:
3575 raise Error('another %s osd.%s already mounted in position '
3576 '(old/different cluster instance?); unmounting ours.'
3577 % (cluster, osd_id))
3578 else:
3579 move_mount(
3580 dev=dev,
3581 path=path,
3582 cluster=cluster,
3583 osd_id=osd_id,
3584 fstype=fstype,
3585 mount_options=mount_options,
3586 )
3587 return cluster, osd_id
3588
3589 except:
3590 LOG.error('Failed to activate')
3591 unmount(path)
3592 raise
3593 finally:
3594 # remove our temp dir
3595 if os.path.exists(path):
3596 os.rmdir(path)
3597
3598
3599def activate_dir(
3600 path,
3601 activate_key_template,
3602 init,
3603):
3604
3605 if not os.path.exists(path):
3606 raise Error(
3607 'directory %s does not exist' % path
3608 )
3609
3610 (osd_id, cluster) = activate(path, activate_key_template, init)
3611
3612 if init not in (None, 'none'):
3613 canonical = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3614 cluster=cluster,
3615 osd_id=osd_id)
3616 if path != canonical:
3617 # symlink it from the proper location
3618 create = True
3619 if os.path.lexists(canonical):
3620 old = os.readlink(canonical)
3621 if old != path:
3622 LOG.debug('Removing old symlink %s -> %s', canonical, old)
3623 try:
3624 os.unlink(canonical)
3625 except:
3626 raise Error('unable to remove old symlink', canonical)
3627 else:
3628 create = False
3629 if create:
3630 LOG.debug('Creating symlink %s -> %s', canonical, path)
3631 try:
3632 os.symlink(path, canonical)
3633 except:
3634 raise Error('unable to create symlink %s -> %s'
3635 % (canonical, path))
3636
3637 return cluster, osd_id
3638
3639
3640def find_cluster_by_uuid(_uuid):
3641 """
3642 Find a cluster name by searching /etc/ceph/*.conf for a conf file
3643 with the right uuid.
3644 """
3645 _uuid = _uuid.lower()
3646 no_fsid = []
3647 if not os.path.exists(SYSCONFDIR):
3648 return None
3649 for conf_file in os.listdir(SYSCONFDIR):
3650 if not conf_file.endswith('.conf'):
3651 continue
3652 cluster = conf_file[:-5]
3653 try:
3654 fsid = get_fsid(cluster)
3655 except Error as e:
3656 if 'getting cluster uuid from configuration failed' not in str(e):
3657 raise e
3658 no_fsid.append(cluster)
3659 else:
3660 if fsid == _uuid:
3661 return cluster
3662 # be tolerant of /etc/ceph/ceph.conf without an fsid defined.
3663 if len(no_fsid) == 1 and no_fsid[0] == 'ceph':
3664 LOG.warning('No fsid defined in ' + SYSCONFDIR +
3665 '/ceph.conf; using anyway')
3666 return 'ceph'
3667 return None
3668
3669
3670def activate(
3671 path,
3672 activate_key_template,
3673 init,
3674):
3675
3676 check_osd_magic(path)
3677
3678 ceph_fsid = read_one_line(path, 'ceph_fsid')
3679 if ceph_fsid is None:
3680 raise Error('No cluster uuid assigned.')
3681 LOG.debug('Cluster uuid is %s', ceph_fsid)
3682
3683 cluster = find_cluster_by_uuid(ceph_fsid)
3684 if cluster is None:
3685 raise Error('No cluster conf found in ' + SYSCONFDIR +
3686 ' with fsid %s' % ceph_fsid)
3687 LOG.debug('Cluster name is %s', cluster)
3688
3689 fsid = read_one_line(path, 'fsid')
3690 if fsid is None:
3691 raise Error('No OSD uuid assigned.')
3692 LOG.debug('OSD uuid is %s', fsid)
3693
3694 keyring = activate_key_template.format(cluster=cluster,
3695 statedir=STATEDIR)
3696
3697 osd_id = get_osd_id(path)
3698 if osd_id is None:
3699 osd_id = allocate_osd_id(
3700 cluster=cluster,
3701 fsid=fsid,
3702 keyring=keyring,
c07f9fc5 3703 path=path,
7c673cae
FG
3704 )
3705 write_one_line(path, 'whoami', osd_id)
3706 LOG.debug('OSD id is %s', osd_id)
3707
3708 if not os.path.exists(os.path.join(path, 'ready')):
3709 LOG.debug('Initializing OSD...')
3710 # re-running mkfs is safe, so just run until it completes
3711 mkfs(
3712 path=path,
3713 cluster=cluster,
3714 osd_id=osd_id,
3715 fsid=fsid,
3716 keyring=keyring,
3717 )
3718
3719 if init not in (None, 'none'):
3720 if init == 'auto':
3721 conf_val = get_conf(
3722 cluster=cluster,
3723 variable='init'
3724 )
3725 if conf_val is not None:
3726 init = conf_val
3727 else:
3728 init = init_get()
3729
3730 LOG.debug('Marking with init system %s', init)
3731 init_path = os.path.join(path, init)
3732 with open(init_path, 'w'):
3733 path_set_context(init_path)
3734
3735 # remove markers for others, just in case.
3736 for other in INIT_SYSTEMS:
3737 if other != init:
3738 try:
3739 os.unlink(os.path.join(path, other))
3740 except OSError:
3741 pass
3742
3743 if not os.path.exists(os.path.join(path, 'active')):
7c673cae
FG
3744 write_one_line(path, 'active', 'ok')
3745 LOG.debug('%s osd.%s data dir is ready at %s', cluster, osd_id, path)
3746 return (osd_id, cluster)
3747
3748
3749def main_activate(args):
3750 cluster = None
3751 osd_id = None
3752
3753 LOG.info('path = ' + str(args.path))
3754 if not os.path.exists(args.path):
3755 raise Error('%s does not exist' % args.path)
3756
3757 if is_suppressed(args.path):
3758 LOG.info('suppressed activate request on %s', args.path)
3759 return
3760
3761 with activate_lock:
3762 mode = os.stat(args.path).st_mode
224ce89b 3763 if stmode_is_diskdevice(mode):
7c673cae
FG
3764 if (is_partition(args.path) and
3765 (get_partition_type(args.path) ==
3766 PTYPE['mpath']['osd']['ready']) and
3767 not is_mpath(args.path)):
3768 raise Error('%s is not a multipath block device' %
3769 args.path)
3770 (cluster, osd_id) = mount_activate(
3771 dev=args.path,
3772 activate_key_template=args.activate_key_template,
3773 init=args.mark_init,
3774 dmcrypt=args.dmcrypt,
3775 dmcrypt_key_dir=args.dmcrypt_key_dir,
3776 reactivate=args.reactivate,
3777 )
3778 osd_data = get_mount_point(cluster, osd_id)
3779
3efd9988
FG
3780 args.cluster = cluster
3781 if args.dmcrypt:
3782 for name in Space.NAMES:
3783 # Check if encrypted device in journal
3784 dev_path = os.path.join(osd_data, name + '_dmcrypt')
3785 if not os.path.exists(dev_path):
3786 continue
3787 partition = DevicePartition.factory(
3788 path=None,
3789 dev=dev_path,
3790 args=args)
3791 partition.rawdev = args.path
3792 partition.map()
3793
7c673cae
FG
3794 elif stat.S_ISDIR(mode):
3795 (cluster, osd_id) = activate_dir(
3796 path=args.path,
3797 activate_key_template=args.activate_key_template,
3798 init=args.mark_init,
3799 )
3800 osd_data = args.path
3801
3802 else:
3803 raise Error('%s is not a directory or block device' % args.path)
3804
3805 # exit with 0 if the journal device is not up, yet
3806 # journal device will do the activation
3807 osd_journal = '{path}/journal'.format(path=osd_data)
3808 if os.path.islink(osd_journal) and not os.access(osd_journal, os.F_OK):
3809 LOG.info("activate: Journal not present, not starting, yet")
3810 return
3811
3812 if (not args.no_start_daemon and args.mark_init == 'none'):
3813 command_check_call(
3814 [
3815 'ceph-osd',
3816 '--cluster={cluster}'.format(cluster=cluster),
3817 '--id={osd_id}'.format(osd_id=osd_id),
3818 '--osd-data={path}'.format(path=osd_data),
3819 '--osd-journal={journal}'.format(journal=osd_journal),
3820 ],
3821 )
3822
3823 if (not args.no_start_daemon and
3824 args.mark_init not in (None, 'none')):
3825
3826 start_daemon(
3827 cluster=cluster,
3828 osd_id=osd_id,
3829 )
3830
3831
3832def main_activate_lockbox(args):
3833 with activate_lock:
3834 main_activate_lockbox_protected(args)
3835
3836
3837def main_activate_lockbox_protected(args):
3838 partition = DevicePartition.factory(
3839 path=None, dev=args.path, args=args)
3840
3841 lockbox = Lockbox(args)
3842 lockbox.set_partition(partition)
3843 lockbox.activate()
3844
3845
3846###########################
3847
3848def _mark_osd_out(cluster, osd_id):
3849 LOG.info('Prepare to mark osd.%d out...', osd_id)
3850 command([
3851 'ceph',
3852 'osd',
3853 'out',
3854 'osd.%d' % osd_id,
3855 ])
3856
3857
3858def _check_osd_status(cluster, osd_id):
3859 """
3860 report the osd status:
3861 00(0) : means OSD OUT AND DOWN
3862 01(1) : means OSD OUT AND UP
3863 10(2) : means OSD IN AND DOWN
3864 11(3) : means OSD IN AND UP
3865 """
3866 LOG.info("Checking osd id: %s ..." % osd_id)
3867 found = False
3868 status_code = 0
3869 out, err, ret = command([
3870 'ceph',
3871 'osd',
3872 'dump',
3873 '--cluster={cluster}'.format(
3874 cluster=cluster,
3875 ),
3876 '--format',
3877 'json',
3878 ])
3879 out_json = json.loads(out)
3880 for item in out_json[u'osds']:
3881 if item.get(u'osd') == int(osd_id):
3882 found = True
3883 if item.get(u'in') is 1:
3884 status_code += 2
3885 if item.get(u'up') is 1:
3886 status_code += 1
3887 if not found:
3888 raise Error('Could not osd.%s in osd tree!' % osd_id)
3889 return status_code
3890
3891
3892def _remove_osd_directory_files(mounted_path, cluster):
3893 """
3894 To remove the 'ready', 'active', INIT-specific files.
3895 """
3896 if os.path.exists(os.path.join(mounted_path, 'ready')):
3897 os.remove(os.path.join(mounted_path, 'ready'))
3898 LOG.info('Remove `ready` file.')
3899 else:
3900 LOG.info('`ready` file is already removed.')
3901
3902 if os.path.exists(os.path.join(mounted_path, 'active')):
3903 os.remove(os.path.join(mounted_path, 'active'))
3904 LOG.info('Remove `active` file.')
3905 else:
3906 LOG.info('`active` file is already removed.')
3907
3908 # Just check `upstart` and `sysvinit` directly if filename is init-spec.
3909 conf_val = get_conf(
3910 cluster=cluster,
3911 variable='init'
3912 )
3913 if conf_val is not None:
3914 init = conf_val
3915 else:
3916 init = init_get()
3917 os.remove(os.path.join(mounted_path, init))
3918 LOG.info('Remove `%s` file.', init)
3919 return
3920
3921
3922def main_deactivate(args):
3923 with activate_lock:
3924 main_deactivate_locked(args)
3925
3926
3927def main_deactivate_locked(args):
3928 osd_id = args.deactivate_by_id
3929 path = args.path
3930 target_dev = None
3931 dmcrypt = False
3932 devices = list_devices()
3933
3934 # list all devices and found we need
3935 for device in devices:
3936 if 'partitions' in device:
3937 for dev_part in device.get('partitions'):
3938 if (osd_id and
3939 'whoami' in dev_part and
3940 dev_part['whoami'] == osd_id):
3941 target_dev = dev_part
3942 elif (path and
3943 'path' in dev_part and
3944 dev_part['path'] == path):
3945 target_dev = dev_part
3946 if not target_dev:
3947 raise Error('Cannot find any match device!!')
3948
3949 # set up all we need variable
3950 osd_id = target_dev['whoami']
3951 part_type = target_dev['ptype']
3952 mounted_path = target_dev['mount']
3953 if Ptype.is_dmcrypt(part_type, 'osd'):
3954 dmcrypt = True
3955
3956 # Do not do anything if osd is already down.
3957 status_code = _check_osd_status(args.cluster, osd_id)
3958 if status_code == OSD_STATUS_IN_UP:
3959 if args.mark_out is True:
3960 _mark_osd_out(args.cluster, int(osd_id))
3961 stop_daemon(args.cluster, osd_id)
3962 elif status_code == OSD_STATUS_IN_DOWN:
3963 if args.mark_out is True:
3964 _mark_osd_out(args.cluster, int(osd_id))
3965 LOG.info("OSD already out/down. Do not do anything now.")
3966 return
3967 elif status_code == OSD_STATUS_OUT_UP:
3968 stop_daemon(args.cluster, osd_id)
3969 elif status_code == OSD_STATUS_OUT_DOWN:
3970 LOG.info("OSD already out/down. Do not do anything now.")
3971 return
3972
3973 if not args.once:
3974 # remove 'ready', 'active', and INIT-specific files.
3975 _remove_osd_directory_files(mounted_path, args.cluster)
3976
3977 # Write deactivate to osd directory!
3978 with open(os.path.join(mounted_path, 'deactive'), 'w'):
3979 path_set_context(os.path.join(mounted_path, 'deactive'))
3980
d2e6a577 3981 unmount(mounted_path, do_rm=not args.once)
7c673cae
FG
3982 LOG.info("Umount `%s` successfully.", mounted_path)
3983
3984 if dmcrypt:
3985 lockbox = os.path.join(STATEDIR, 'osd-lockbox')
3986 command(['umount', os.path.join(lockbox, target_dev['uuid'])])
3987
3988 dmcrypt_unmap(target_dev['uuid'])
3989 for name in Space.NAMES:
3990 if name + '_uuid' in target_dev:
3991 dmcrypt_unmap(target_dev[name + '_uuid'])
3992
3993###########################
3994
3995
7c673cae 3996def _remove_lockbox(uuid, cluster):
7c673cae
FG
3997 lockbox = os.path.join(STATEDIR, 'osd-lockbox')
3998 if not os.path.exists(lockbox):
3999 return
4000 canonical = os.path.join(lockbox, uuid)
4001 command(['umount', canonical])
4002 for name in os.listdir(lockbox):
4003 path = os.path.join(lockbox, name)
4004 if os.path.islink(path) and os.readlink(path) == canonical:
4005 os.unlink(path)
4006
4007
4008def destroy_lookup_device(args, predicate, description):
4009 devices = list_devices()
4010 for device in devices:
4011 for partition in device.get('partitions', []):
4012 if partition['type'] == 'lockbox':
4013 if not is_mounted(partition['path']):
4014 main_activate_lockbox_protected(
4015 argparse.Namespace(verbose=args.verbose,
4016 path=partition['path']))
4017 for device in devices:
4018 for partition in device.get('partitions', []):
4019 if partition['dmcrypt']:
4020 dmcrypt_path = dmcrypt_is_mapped(partition['uuid'])
4021 if dmcrypt_path:
4022 unmap = False
4023 else:
4024 dmcrypt_path = dmcrypt_map(partition['path'],
4025 args.dmcrypt_key_dir)
4026 unmap = True
4027 list_dev_osd(dmcrypt_path, {}, partition)
4028 if unmap:
4029 dmcrypt_unmap(partition['uuid'])
4030 dmcrypt = True
4031 else:
4032 dmcrypt = False
4033 if predicate(partition):
4034 return dmcrypt, partition
4035 raise Error('found no device matching ', description)
4036
4037
4038def main_destroy(args):
4039 with activate_lock:
4040 main_destroy_locked(args)
4041
4042
4043def main_destroy_locked(args):
4044 osd_id = args.destroy_by_id
4045 path = args.path
4046 target_dev = None
4047
4048 if path:
4049 if not is_partition(path):
4050 raise Error(path + " must be a partition device")
4051 path = os.path.realpath(path)
4052
4053 if path:
4054 (dmcrypt, target_dev) = destroy_lookup_device(
4055 args, lambda x: x.get('path') == path,
4056 path)
4057 elif osd_id:
4058 (dmcrypt, target_dev) = destroy_lookup_device(
4059 args, lambda x: x.get('whoami') == osd_id,
4060 'osd id ' + str(osd_id))
4061
4062 osd_id = target_dev['whoami']
4063 dev_path = target_dev['path']
4064 if target_dev['ptype'] == PTYPE['mpath']['osd']['ready']:
4065 base_dev = get_partition_base_mpath(dev_path)
4066 else:
4067 base_dev = get_partition_base(dev_path)
4068
4069 # Before osd deactivate, we cannot destroy it
4070 status_code = _check_osd_status(args.cluster, osd_id)
4071 if status_code != OSD_STATUS_OUT_DOWN and \
4072 status_code != OSD_STATUS_IN_DOWN:
4073 raise Error("Could not destroy the active osd. (osd-id: %s)" %
4074 osd_id)
4075
c07f9fc5
FG
4076 if args.purge:
4077 action = 'purge'
4078 else:
4079 action = 'destroy'
4080 LOG.info("Prepare to %s osd.%s" % (action, osd_id))
4081 command([
4082 'ceph',
4083 'osd',
4084 action,
4085 'osd.%s' % osd_id,
4086 '--yes-i-really-mean-it',
4087 ])
7c673cae
FG
4088
4089 # we remove the crypt map and device mapper (if dmcrypt is True)
4090 if dmcrypt:
4091 for name in Space.NAMES:
4092 if target_dev.get(name + '_uuid'):
4093 dmcrypt_unmap(target_dev[name + '_uuid'])
4094 _remove_lockbox(target_dev['uuid'], args.cluster)
4095
4096 # Check zap flag. If we found zap flag, we need to find device for
4097 # destroy this osd data.
4098 if args.zap is True:
4099 # erase the osd data
4100 LOG.info("Prepare to zap the device %s" % base_dev)
4101 zap(base_dev)
4102
4103
4104def get_space_osd_uuid(name, path):
4105 if not os.path.exists(path):
4106 raise Error('%s does not exist' % path)
4107
c07f9fc5 4108 if not path_is_diskdevice(path):
7c673cae
FG
4109 raise Error('%s is not a block device' % path)
4110
4111 if (is_partition(path) and
4112 get_partition_type(path) in (PTYPE['mpath']['journal']['ready'],
4113 PTYPE['mpath']['block']['ready']) and
4114 not is_mpath(path)):
4115 raise Error('%s is not a multipath block device' %
4116 path)
4117
4118 try:
4119 out = _check_output(
4120 args=[
4121 'ceph-osd',
4122 '--get-device-fsid',
4123 path,
4124 ],
4125 close_fds=True,
4126 )
4127 except subprocess.CalledProcessError as e:
4128 raise Error(
4129 'failed to get osd uuid/fsid from %s' % name,
4130 e,
4131 )
4132 value = str(out).split('\n', 1)[0]
4133 LOG.debug('%s %s has OSD UUID %s', name.capitalize(), path, value)
4134 return value
4135
4136
4137def main_activate_space(name, args):
4138 if not os.path.exists(args.dev):
4139 raise Error('%s does not exist' % args.dev)
4140
c07f9fc5
FG
4141 if is_suppressed(args.dev):
4142 LOG.info('suppressed activate request on space %s', args.dev)
4143 return
4144
7c673cae
FG
4145 cluster = None
4146 osd_id = None
4147 osd_uuid = None
4148 dev = None
4149 with activate_lock:
4150 if args.dmcrypt:
4151 dev = dmcrypt_map(args.dev, args.dmcrypt_key_dir)
4152 else:
4153 dev = args.dev
4154 # FIXME: For an encrypted journal dev, does this return the
4155 # cyphertext or plaintext dev uuid!? Also, if the journal is
4156 # encrypted, is the data partition also always encrypted, or
4157 # are mixed pairs supported!?
4158 osd_uuid = get_space_osd_uuid(name, dev)
4159 path = os.path.join('/dev/disk/by-partuuid/', osd_uuid.lower())
4160
4161 if is_suppressed(path):
4162 LOG.info('suppressed activate request on %s', path)
4163 return
4164
4165 # warn and exit with 0 if the data device is not up, yet
4166 # data device will do the activation
4167 if not os.access(path, os.F_OK):
4168 LOG.info("activate: OSD device not present, not starting, yet")
4169 return
4170
4171 (cluster, osd_id) = mount_activate(
4172 dev=path,
4173 activate_key_template=args.activate_key_template,
4174 init=args.mark_init,
4175 dmcrypt=args.dmcrypt,
4176 dmcrypt_key_dir=args.dmcrypt_key_dir,
4177 reactivate=args.reactivate,
4178 )
4179
4180 start_daemon(
4181 cluster=cluster,
4182 osd_id=osd_id,
4183 )
4184
4185
4186###########################
4187
4188
4189def main_activate_all(args):
4190 dir = '/dev/disk/by-parttypeuuid'
4191 LOG.debug('Scanning %s', dir)
4192 if not os.path.exists(dir):
4193 return
4194 err = False
4195 for name in os.listdir(dir):
4196 if name.find('.') < 0:
4197 continue
4198 (tag, uuid) = name.split('.')
4199
4200 if tag in Ptype.get_ready_by_name('osd'):
4201
4202 if Ptype.is_dmcrypt(tag, 'osd'):
4203 path = os.path.join('/dev/mapper', uuid)
4204 else:
4205 path = os.path.join(dir, name)
4206
4207 if is_suppressed(path):
4208 LOG.info('suppressed activate request on %s', path)
4209 continue
4210
4211 LOG.info('Activating %s', path)
4212 with activate_lock:
4213 try:
4214 # never map dmcrypt cyphertext devices
4215 (cluster, osd_id) = mount_activate(
4216 dev=path,
4217 activate_key_template=args.activate_key_template,
4218 init=args.mark_init,
4219 dmcrypt=False,
4220 dmcrypt_key_dir='',
4221 )
4222 start_daemon(
4223 cluster=cluster,
4224 osd_id=osd_id,
4225 )
4226
4227 except Exception as e:
4228 print(
4229 '{prog}: {msg}'.format(prog=args.prog, msg=e),
4230 file=sys.stderr
4231 )
4232
4233 err = True
4234
4235 if err:
4236 raise Error('One or more partitions failed to activate')
4237
4238
4239###########################
4240
4241def is_swap(dev):
4242 dev = os.path.realpath(dev)
4243 with open(PROCDIR + '/swaps', 'rb') as proc_swaps:
4244 for line in proc_swaps.readlines()[1:]:
4245 fields = line.split()
4246 if len(fields) < 3:
4247 continue
4248 swaps_dev = fields[0]
4249 if os.path.isabs(swaps_dev) and os.path.exists(swaps_dev):
4250 swaps_dev = os.path.realpath(swaps_dev)
4251 if swaps_dev == dev:
4252 return True
4253 return False
4254
4255
4256def get_oneliner(base, name):
4257 path = os.path.join(base, name)
4258 if os.path.isfile(path):
4259 with open(path, 'rb') as _file:
4260 return _bytes2str(_file.readline().rstrip())
4261 return None
4262
4263
4264def get_dev_fs(dev):
4265 if FREEBSD:
4266 fstype, _, ret = command(
4267 [
4268 'fstyp',
4269 '-u',
4270 dev,
4271 ],
4272 )
4273 if ret == 0:
4274 return fstype
4275 else:
4276 fscheck, _, _ = command(
4277 [
4278 'blkid',
4279 '-s',
4280 'TYPE',
4281 dev,
4282 ],
4283 )
4284 if 'TYPE' in fscheck:
4285 fstype = fscheck.split()[1].split('"')[1]
4286 return fstype
4287 return None
4288
4289
4290def split_dev_base_partnum(dev):
4291 if is_mpath(dev):
4292 partnum = partnum_mpath(dev)
4293 base = get_partition_base_mpath(dev)
4294 else:
4295 b = block_path(dev)
4296 partnum = open(os.path.join(b, 'partition')).read().strip()
4297 base = get_partition_base(dev)
4298 return base, partnum
4299
4300
4301def get_partition_type(part):
4302 return get_blkid_partition_info(part, 'ID_PART_ENTRY_TYPE')
4303
4304
4305def get_partition_uuid(part):
4306 return get_blkid_partition_info(part, 'ID_PART_ENTRY_UUID')
4307
4308
4309def get_blkid_partition_info(dev, what=None):
4310 out, _, _ = command(
4311 [
4312 'blkid',
4313 '-o',
4314 'udev',
4315 '-p',
4316 dev,
4317 ]
4318 )
4319 p = {}
4320 for line in out.splitlines():
4321 (key, value) = line.split('=')
4322 p[key] = value
4323 if what:
4324 return p.get(what)
4325 else:
4326 return p
4327
4328
4329def more_osd_info(path, uuid_map, desc):
4330 desc['ceph_fsid'] = get_oneliner(path, 'ceph_fsid')
4331 if desc['ceph_fsid']:
4332 desc['cluster'] = find_cluster_by_uuid(desc['ceph_fsid'])
4333 desc['whoami'] = get_oneliner(path, 'whoami')
4334 for name in Space.NAMES:
4335 uuid = get_oneliner(path, name + '_uuid')
4336 if uuid:
4337 desc[name + '_uuid'] = uuid.lower()
4338 if desc[name + '_uuid'] in uuid_map:
4339 desc[name + '_dev'] = uuid_map[desc[name + '_uuid']]
4340
4341
4342def list_dev_osd(dev, uuid_map, desc):
4343 desc['mount'] = is_mounted(dev)
4344 desc['fs_type'] = get_dev_fs(dev)
4345 desc['state'] = 'unprepared'
4346 if desc['mount']:
4347 desc['state'] = 'active'
4348 more_osd_info(desc['mount'], uuid_map, desc)
4349 elif desc['fs_type']:
4350 try:
4351 tpath = mount(dev=dev, fstype=desc['fs_type'], options='')
4352 if tpath:
4353 try:
4354 magic = get_oneliner(tpath, 'magic')
4355 if magic is not None:
4356 desc['magic'] = magic
4357 desc['state'] = 'prepared'
4358 more_osd_info(tpath, uuid_map, desc)
4359 finally:
4360 unmount(tpath)
4361 except MountError:
4362 pass
4363
4364
4365def list_dev_lockbox(dev, uuid_map, desc):
4366 desc['mount'] = is_mounted(dev)
4367 desc['fs_type'] = get_dev_fs(dev)
4368 desc['state'] = 'unprepared'
4369 if desc['mount']:
4370 desc['state'] = 'active'
4371 desc['osd_uuid'] = get_oneliner(desc['mount'], 'osd-uuid')
4372 elif desc['fs_type']:
4373 try:
4374 tpath = tempfile.mkdtemp(prefix='mnt.', dir=STATEDIR + '/tmp')
4375 args = ['mount', '-t', 'ext4', dev, tpath]
4376 LOG.debug('Mounting lockbox ' + str(" ".join(args)))
4377 command_check_call(args)
4378 magic = get_oneliner(tpath, 'magic')
4379 if magic is not None:
4380 desc['magic'] = magic
4381 desc['state'] = 'prepared'
4382 desc['osd_uuid'] = get_oneliner(tpath, 'osd-uuid')
4383 unmount(tpath)
4384 except subprocess.CalledProcessError:
4385 pass
4386 if desc.get('osd_uuid') in uuid_map:
4387 desc['lockbox_for'] = uuid_map[desc['osd_uuid']]
4388
4389
4390def list_format_lockbox_plain(dev):
4391 desc = []
4392 if dev.get('lockbox_for'):
4393 desc.append('for ' + dev['lockbox_for'])
4394 elif dev.get('osd_uuid'):
4395 desc.append('for osd ' + dev['osd_uuid'])
4396 return desc
4397
4398
4399def list_format_more_osd_info_plain(dev):
4400 desc = []
4401 if dev.get('ceph_fsid'):
4402 if dev.get('cluster'):
4403 desc.append('cluster ' + dev['cluster'])
4404 else:
4405 desc.append('unknown cluster ' + dev['ceph_fsid'])
4406 if dev.get('whoami'):
4407 desc.append('osd.%s' % dev['whoami'])
4408 for name in Space.NAMES:
4409 if dev.get(name + '_dev'):
4410 desc.append(name + ' %s' % dev[name + '_dev'])
4411 return desc
4412
4413
4414def list_format_dev_plain(dev, prefix=''):
4415 desc = []
4416 if dev['ptype'] == PTYPE['regular']['osd']['ready']:
4417 desc = (['ceph data', dev['state']] +
4418 list_format_more_osd_info_plain(dev))
4419 elif dev['ptype'] in (PTYPE['regular']['lockbox']['ready'],
4420 PTYPE['mpath']['lockbox']['ready']):
4421 desc = (['ceph lockbox', dev['state']] +
4422 list_format_lockbox_plain(dev))
4423 elif Ptype.is_dmcrypt(dev['ptype'], 'osd'):
4424 dmcrypt = dev['dmcrypt']
4425 if not dmcrypt['holders']:
4426 desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
4427 'not currently mapped']
4428 elif len(dmcrypt['holders']) == 1:
4429 holder = get_dev_path(dmcrypt['holders'][0])
4430 desc = ['ceph data (dmcrypt %s %s)' %
4431 (dmcrypt['type'], holder)]
4432 desc += list_format_more_osd_info_plain(dev)
4433 else:
4434 desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
4435 'holders: ' + ','.join(dmcrypt['holders'])]
4436 elif Ptype.is_regular_space(dev['ptype']):
4437 name = Ptype.space_ptype_to_name(dev['ptype'])
4438 desc.append('ceph ' + name)
4439 if dev.get(name + '_for'):
4440 desc.append('for %s' % dev[name + '_for'])
4441 elif Ptype.is_dmcrypt_space(dev['ptype']):
4442 name = Ptype.space_ptype_to_name(dev['ptype'])
4443 dmcrypt = dev['dmcrypt']
4444 if dmcrypt['holders'] and len(dmcrypt['holders']) == 1:
4445 holder = get_dev_path(dmcrypt['holders'][0])
4446 desc = ['ceph ' + name + ' (dmcrypt %s %s)' %
4447 (dmcrypt['type'], holder)]
4448 else:
4449 desc = ['ceph ' + name + ' (dmcrypt %s)' % dmcrypt['type']]
4450 if dev.get(name + '_for'):
4451 desc.append('for %s' % dev[name + '_for'])
4452 else:
4453 desc.append(dev['type'])
4454 if dev.get('fs_type'):
4455 desc.append(dev['fs_type'])
4456 elif dev.get('ptype'):
4457 desc.append(dev['ptype'])
4458 if dev.get('mount'):
4459 desc.append('mounted on %s' % dev['mount'])
4460 return '%s%s %s' % (prefix, dev['path'], ', '.join(desc))
4461
4462
4463def list_format_plain(devices):
4464 lines = []
4465 for device in devices:
4466 if device.get('partitions'):
4467 lines.append('%s :' % device['path'])
4468 for p in sorted(device['partitions'], key=lambda x: x['path']):
4469 lines.append(list_format_dev_plain(dev=p,
4470 prefix=' '))
4471 else:
4472 lines.append(list_format_dev_plain(dev=device,
4473 prefix=''))
4474 return "\n".join(lines)
4475
4476
4477def list_dev(dev, uuid_map, space_map):
4478 info = {
4479 'path': dev,
4480 'dmcrypt': {},
4481 }
4482
4483 info['is_partition'] = is_partition(dev)
4484 if info['is_partition']:
4485 ptype = get_partition_type(dev)
4486 info['uuid'] = get_partition_uuid(dev)
4487 else:
4488 ptype = 'unknown'
4489 info['ptype'] = ptype
4490 LOG.info("list_dev(dev = " + dev + ", ptype = " + str(ptype) + ")")
4491 if ptype in (PTYPE['regular']['osd']['ready'],
4492 PTYPE['mpath']['osd']['ready']):
4493 info['type'] = 'data'
4494 if ptype == PTYPE['mpath']['osd']['ready']:
4495 info['multipath'] = True
4496 list_dev_osd(dev, uuid_map, info)
4497 elif ptype in (PTYPE['regular']['lockbox']['ready'],
4498 PTYPE['mpath']['lockbox']['ready']):
4499 info['type'] = 'lockbox'
4500 if ptype == PTYPE['mpath']['osd']['ready']:
4501 info['multipath'] = True
4502 list_dev_lockbox(dev, uuid_map, info)
4503 elif ptype == PTYPE['plain']['osd']['ready']:
4504 holders = is_held(dev)
4505 info['type'] = 'data'
4506 info['dmcrypt']['holders'] = holders
4507 info['dmcrypt']['type'] = 'plain'
4508 if len(holders) == 1:
4509 list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
4510 elif ptype == PTYPE['luks']['osd']['ready']:
4511 holders = is_held(dev)
4512 info['type'] = 'data'
4513 info['dmcrypt']['holders'] = holders
4514 info['dmcrypt']['type'] = 'LUKS'
4515 if len(holders) == 1:
4516 list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
4517 elif Ptype.is_regular_space(ptype) or Ptype.is_mpath_space(ptype):
4518 name = Ptype.space_ptype_to_name(ptype)
4519 info['type'] = name
4520 if ptype == PTYPE['mpath'][name]['ready']:
4521 info['multipath'] = True
4522 if info.get('uuid') in space_map:
4523 info[name + '_for'] = space_map[info['uuid']]
4524 elif Ptype.is_plain_space(ptype):
4525 name = Ptype.space_ptype_to_name(ptype)
4526 holders = is_held(dev)
4527 info['type'] = name
4528 info['dmcrypt']['type'] = 'plain'
4529 info['dmcrypt']['holders'] = holders
4530 if info.get('uuid') in space_map:
4531 info[name + '_for'] = space_map[info['uuid']]
4532 elif Ptype.is_luks_space(ptype):
4533 name = Ptype.space_ptype_to_name(ptype)
4534 holders = is_held(dev)
4535 info['type'] = name
4536 info['dmcrypt']['type'] = 'LUKS'
4537 info['dmcrypt']['holders'] = holders
4538 if info.get('uuid') in space_map:
4539 info[name + '_for'] = space_map[info['uuid']]
4540 else:
4541 path = is_mounted(dev)
4542 fs_type = get_dev_fs(dev)
4543 if is_swap(dev):
4544 info['type'] = 'swap'
4545 else:
4546 info['type'] = 'other'
4547 if fs_type:
4548 info['fs_type'] = fs_type
4549 if path:
4550 info['mount'] = path
4551
4552 return info
4553
4554
4555def list_devices():
4556 partmap = list_all_partitions()
4557
4558 uuid_map = {}
4559 space_map = {}
4560 for base, parts in sorted(partmap.items()):
4561 for p in parts:
4562 dev = get_dev_path(p)
4563 part_uuid = get_partition_uuid(dev)
4564 if part_uuid:
4565 uuid_map[part_uuid] = dev
4566 ptype = get_partition_type(dev)
4567 LOG.debug("main_list: " + dev +
4568 " ptype = " + str(ptype) +
4569 " uuid = " + str(part_uuid))
4570 if ptype in Ptype.get_ready_by_name('osd'):
4571 if Ptype.is_dmcrypt(ptype, 'osd'):
4572 holders = is_held(dev)
4573 if len(holders) != 1:
4574 continue
4575 dev_to_mount = get_dev_path(holders[0])
4576 else:
4577 dev_to_mount = dev
4578
4579 fs_type = get_dev_fs(dev_to_mount)
4580 if fs_type is not None:
4581 mount_options = get_mount_options(cluster='ceph',
4582 fs_type=fs_type)
4583 try:
4584 tpath = mount(dev=dev_to_mount,
4585 fstype=fs_type, options=mount_options)
4586 try:
4587 for name in Space.NAMES:
4588 space_uuid = get_oneliner(tpath,
4589 name + '_uuid')
4590 if space_uuid:
4591 space_map[space_uuid.lower()] = dev
4592 finally:
4593 unmount(tpath)
4594 except MountError:
4595 pass
4596
4597 LOG.debug("main_list: " + str(partmap) + ", uuid_map = " +
4598 str(uuid_map) + ", space_map = " + str(space_map))
4599
4600 devices = []
4601 for base, parts in sorted(partmap.items()):
4602 if parts:
4603 disk = {'path': get_dev_path(base)}
4604 partitions = []
4605 for p in sorted(parts):
4606 partitions.append(list_dev(get_dev_path(p),
4607 uuid_map,
4608 space_map))
4609 disk['partitions'] = partitions
4610 devices.append(disk)
4611 else:
4612 device = list_dev(get_dev_path(base), uuid_map, space_map)
4613 device['path'] = get_dev_path(base)
4614 devices.append(device)
4615 LOG.debug("list_devices: " + str(devices))
4616 return devices
4617
4618
4619def list_zfs():
4620 try:
4621 out, err, ret = command(
4622 [
4623 'zfs',
4624 'list',
4625 '-o', 'name,mountpoint'
4626 ]
4627 )
4628 except subprocess.CalledProcessError as e:
4629 LOG.info('zfs list -o name,mountpoint '
4630 'fails.\n (Error: %s)' % e)
4631 raise
4632 lines = out.splitlines()
4633 for line in lines[1:]:
4634 vdevline = line.split()
4635 if os.path.exists(os.path.join(vdevline[1], 'active')):
4636 elems = os.path.split(vdevline[1])
4637 print(vdevline[0], "ceph data, active, cluster ceph,", elems[1],
4638 "mounted on:", vdevline[1])
4639 else:
4640 print(vdevline[0] + " other, zfs, mounted on: " + vdevline[1])
4641
4642
4643def main_list(args):
4644 with activate_lock:
4645 if FREEBSD:
4646 main_list_freebsd(args)
4647 else:
4648 main_list_protected(args)
4649
4650
4651def main_list_protected(args):
4652 devices = list_devices()
4653 if args.path:
4654 paths = []
4655 for path in args.path:
4656 if os.path.exists(path):
4657 paths.append(os.path.realpath(path))
4658 else:
4659 paths.append(path)
4660 selected_devices = []
4661 for device in devices:
4662 for path in paths:
4663 if re.search(path + '$', device['path']):
4664 selected_devices.append(device)
4665 else:
4666 selected_devices = devices
4667 if args.format == 'json':
4668 print(json.dumps(selected_devices))
4669 else:
4670 output = list_format_plain(selected_devices)
4671 if output:
4672 print(output)
4673
4674
4675def main_list_freebsd(args):
4676 # Currently accomodate only ZFS Filestore partitions
4677 # return a list of VDEVs and mountpoints
4678 # > zfs list
4679 # NAME USED AVAIL REFER MOUNTPOINT
4680 # osd0 1.01G 1.32T 1.01G /var/lib/ceph/osd/osd.0
4681 # osd1 1.01G 1.32T 1.01G /var/lib/ceph/osd/osd.1
4682 list_zfs()
4683
4684
4685###########################
4686#
4687# Mark devices that we want to suppress activates on with a
4688# file like
4689#
4690# /var/lib/ceph/tmp/suppress-activate.sdb
4691#
4692# where the last bit is the sanitized device name (/dev/X without the
4693# /dev/ prefix) and the is_suppress() check matches a prefix. That
4694# means suppressing sdb will stop activate on sdb1, sdb2, etc.
4695#
4696
4697def is_suppressed(path):
4698 disk = os.path.realpath(path)
4699 try:
4700 if (not disk.startswith('/dev/') or
224ce89b 4701 not ldev_is_diskdevice(disk)):
7c673cae
FG
4702 return False
4703 base = get_dev_name(disk)
4704 while len(base):
4705 if os.path.exists(SUPPRESS_PREFIX + base): # noqa
4706 return True
4707 base = base[:-1]
4708 except:
4709 return False
4710
4711
4712def set_suppress(path):
4713 disk = os.path.realpath(path)
4714 if not os.path.exists(disk):
4715 raise Error('does not exist', path)
c07f9fc5 4716 if not ldev_is_diskdevice(path):
7c673cae
FG
4717 raise Error('not a block device', path)
4718 base = get_dev_name(disk)
4719
4720 with open(SUPPRESS_PREFIX + base, 'w') as f: # noqa
4721 pass
4722 LOG.info('set suppress flag on %s', base)
4723
4724
4725def unset_suppress(path):
4726 disk = os.path.realpath(path)
4727 if not os.path.exists(disk):
4728 raise Error('does not exist', path)
224ce89b 4729 if not ldev_is_diskdevice(path):
7c673cae
FG
4730 raise Error('not a block device', path)
4731 assert disk.startswith('/dev/')
4732 base = get_dev_name(disk)
4733
4734 fn = SUPPRESS_PREFIX + base # noqa
4735 if not os.path.exists(fn):
4736 raise Error('not marked as suppressed', path)
4737
4738 try:
4739 os.unlink(fn)
4740 LOG.info('unset suppress flag on %s', base)
4741 except OSError as e:
4742 raise Error('failed to unsuppress', e)
4743
4744
4745def main_suppress(args):
4746 set_suppress(args.path)
4747
4748
4749def main_unsuppress(args):
4750 unset_suppress(args.path)
4751
4752
4753def main_zap(args):
4754 for dev in args.dev:
4755 zap(dev)
4756
4757
4758def main_trigger(args):
4759 LOG.debug("main_trigger: " + str(args))
4760 if is_systemd() and not args.sync:
4761 # http://www.freedesktop.org/software/systemd/man/systemd-escape.html
4762 escaped_dev = args.dev[1:].replace('-', '\\x2d')
4763 service = 'ceph-disk@{dev}.service'.format(dev=escaped_dev)
4764 LOG.info('systemd detected, triggering %s' % service)
4765 command(
4766 [
4767 'systemctl',
4768 '--no-block',
4769 'restart',
4770 service,
4771 ]
4772 )
4773 return
4774 if is_upstart() and not args.sync:
4775 LOG.info('upstart detected, triggering ceph-disk task')
4776 command(
4777 [
4778 'initctl',
4779 'emit',
4780 'ceph-disk',
4781 'dev={dev}'.format(dev=args.dev),
4782 'pid={pid}'.format(pid=os.getpid()),
4783 ]
4784 )
4785 return
4786
4787 if get_ceph_user() == 'ceph':
4788 command_check_call(['chown', 'ceph:ceph', args.dev])
4789 parttype = get_partition_type(args.dev)
4790 partid = get_partition_uuid(args.dev)
4791
4792 LOG.info('trigger {dev} parttype {parttype} uuid {partid}'.format(
4793 dev=args.dev,
4794 parttype=parttype,
4795 partid=partid,
4796 ))
4797
4798 ceph_disk = ['ceph-disk']
4799 if args.verbose:
4800 ceph_disk.append('--verbose')
4801
4802 if parttype in (PTYPE['regular']['osd']['ready'],
4803 PTYPE['mpath']['osd']['ready']):
4804 out, err, ret = command(
4805 ceph_disk +
4806 [
4807 'activate',
4808 args.dev,
4809 ]
4810 )
4811
4812 elif parttype in (PTYPE['plain']['osd']['ready'],
4813 PTYPE['luks']['osd']['ready']):
4814 out, err, ret = command(
4815 ceph_disk +
4816 [
4817 'activate',
4818 '--dmcrypt',
4819 args.dev,
4820 ]
4821 )
4822
4823 elif parttype in (PTYPE['regular']['journal']['ready'],
4824 PTYPE['mpath']['journal']['ready']):
4825 out, err, ret = command(
4826 ceph_disk +
4827 [
4828 'activate-journal',
4829 args.dev,
4830 ]
4831 )
4832
4833 elif parttype in (PTYPE['plain']['journal']['ready'],
4834 PTYPE['luks']['journal']['ready']):
4835 out, err, ret = command(
4836 ceph_disk +
4837 [
4838 'activate-journal',
4839 '--dmcrypt',
4840 args.dev,
4841 ]
4842 )
4843
4844 elif parttype in (PTYPE['regular']['block']['ready'],
4845 PTYPE['regular']['block.db']['ready'],
4846 PTYPE['regular']['block.wal']['ready'],
4847 PTYPE['mpath']['block']['ready'],
4848 PTYPE['mpath']['block.db']['ready'],
4849 PTYPE['mpath']['block.wal']['ready']):
4850 out, err, ret = command(
4851 ceph_disk +
4852 [
4853 'activate-block',
4854 args.dev,
4855 ]
4856 )
4857
4858 elif parttype in (PTYPE['plain']['block']['ready'],
4859 PTYPE['plain']['block.db']['ready'],
4860 PTYPE['plain']['block.wal']['ready'],
4861 PTYPE['luks']['block']['ready'],
4862 PTYPE['luks']['block.db']['ready'],
4863 PTYPE['luks']['block.wal']['ready']):
4864 out, err, ret = command(
4865 ceph_disk +
4866 [
4867 'activate-block',
4868 '--dmcrypt',
4869 args.dev,
4870 ]
4871 )
4872
4873 elif parttype in (PTYPE['regular']['lockbox']['ready'],
4874 PTYPE['mpath']['lockbox']['ready']):
4875 out, err, ret = command(
4876 ceph_disk +
4877 [
4878 'activate-lockbox',
4879 args.dev,
4880 ]
4881 )
4882
4883 else:
4884 raise Error('unrecognized partition type %s' % parttype)
4885
4886 if ret != 0:
4887 LOG.info(out)
4888 LOG.error(err)
4889 raise Error('return code ' + str(ret))
4890 else:
4891 LOG.debug(out)
4892 LOG.debug(err)
4893
4894
4895def main_fix(args):
4896 # A hash table containing 'path': ('uid', 'gid', blocking, recursive)
4897 fix_table = [
c07f9fc5
FG
4898 ('/usr/bin/ceph-mon', 'root', ROOTGROUP, True, False),
4899 ('/usr/bin/ceph-mds', 'root', ROOTGROUP, True, False),
4900 ('/usr/bin/ceph-osd', 'root', ROOTGROUP, True, False),
4901 ('/usr/bin/radosgw', 'root', ROOTGROUP, True, False),
4902 ('/etc/ceph', 'root', ROOTGROUP, True, True),
7c673cae
FG
4903 ('/var/run/ceph', 'ceph', 'ceph', True, True),
4904 ('/var/log/ceph', 'ceph', 'ceph', True, True),
31f18b77 4905 ('/var/log/radosgw', 'ceph', 'ceph', True, True),
7c673cae
FG
4906 ('/var/lib/ceph', 'ceph', 'ceph', True, False),
4907 ]
4908
4909 # Relabel/chown all files under /var/lib/ceph/ recursively (except for osd)
4910 for directory in glob.glob('/var/lib/ceph/*'):
4911 if directory == '/var/lib/ceph/osd':
4912 fix_table.append((directory, 'ceph', 'ceph', True, False))
4913 else:
4914 fix_table.append((directory, 'ceph', 'ceph', True, True))
4915
4916 # Relabel/chown the osds recursively and in parallel
4917 for directory in glob.glob('/var/lib/ceph/osd/*'):
4918 fix_table.append((directory, 'ceph', 'ceph', False, True))
4919
4920 LOG.debug("fix_table: " + str(fix_table))
4921
4922 # The lists of background processes
4923 all_processes = []
4924 permissions_processes = []
4925 selinux_processes = []
4926
4927 # Preliminary checks
4928 if args.selinux or args.all:
4929 out, err, ret = command(['selinuxenabled'])
4930 if ret:
4931 LOG.error('SELinux is not enabled, please enable it, first.')
4932 raise Error('no SELinux')
4933
4934 for daemon in ['ceph-mon', 'ceph-osd', 'ceph-mds', 'radosgw', 'ceph-mgr']:
4935 out, err, ret = command(['pgrep', daemon])
4936 if ret == 0:
4937 LOG.error(daemon + ' is running, please stop it, first')
4938 raise Error(daemon + ' running')
4939
4940 # Relabel the basic system data without the ceph files
4941 if args.system or args.all:
4942 c = ['restorecon', '-R', '/']
4943 for directory, _, _, _, _ in fix_table:
4944 # Skip /var/lib/ceph subdirectories
4945 if directory.startswith('/var/lib/ceph/'):
4946 continue
4947 c.append('-e')
4948 c.append(directory)
4949
4950 out, err, ret = command(c)
4951
4952 if ret:
4953 LOG.error("Failed to restore labels of the underlying system")
4954 LOG.error(err)
4955 raise Error("basic restore failed")
4956
4957 # Use find to relabel + chown ~simultaenously
4958 if args.all:
4959 for directory, uid, gid, blocking, recursive in fix_table:
31f18b77
FG
4960 # Skip directories/files that are not installed
4961 if not os.access(directory, os.F_OK):
4962 continue
4963
7c673cae
FG
4964 c = [
4965 'find',
4966 directory,
4967 '-exec',
4968 'chown',
4969 ':'.join((uid, gid)),
4970 '{}',
4971 '+',
4972 '-exec',
4973 'restorecon',
4974 '{}',
4975 '+',
4976 ]
4977
4978 # Just pass -maxdepth 0 for non-recursive calls
4979 if not recursive:
4980 c += ['-maxdepth', '0']
4981
4982 if blocking:
4983 out, err, ret = command(c)
4984
4985 if ret:
4986 LOG.error("Failed to fix " + directory)
4987 LOG.error(err)
4988 raise Error(directory + " fix failed")
4989 else:
4990 all_processes.append(command_init(c))
4991
4992 LOG.debug("all_processes: " + str(all_processes))
4993 for process in all_processes:
4994 out, err, ret = command_wait(process)
4995 if ret:
4996 LOG.error("A background find process failed")
4997 LOG.error(err)
4998 raise Error("background failed")
4999
5000 # Fix permissions
5001 if args.permissions:
5002 for directory, uid, gid, blocking, recursive in fix_table:
31f18b77
FG
5003 # Skip directories/files that are not installed
5004 if not os.access(directory, os.F_OK):
5005 continue
5006
7c673cae
FG
5007 if recursive:
5008 c = [
5009 'chown',
5010 '-R',
5011 ':'.join((uid, gid)),
5012 directory
5013 ]
5014 else:
5015 c = [
5016 'chown',
5017 ':'.join((uid, gid)),
5018 directory
5019 ]
5020
5021 if blocking:
5022 out, err, ret = command(c)
5023
5024 if ret:
5025 LOG.error("Failed to chown " + directory)
5026 LOG.error(err)
5027 raise Error(directory + " chown failed")
5028 else:
5029 permissions_processes.append(command_init(c))
5030
5031 LOG.debug("permissions_processes: " + str(permissions_processes))
5032 for process in permissions_processes:
5033 out, err, ret = command_wait(process)
5034 if ret:
5035 LOG.error("A background permissions process failed")
5036 LOG.error(err)
5037 raise Error("background failed")
5038
5039 # Fix SELinux labels
5040 if args.selinux:
5041 for directory, uid, gid, blocking, recursive in fix_table:
31f18b77
FG
5042 # Skip directories/files that are not installed
5043 if not os.access(directory, os.F_OK):
5044 continue
5045
7c673cae
FG
5046 if recursive:
5047 c = [
5048 'restorecon',
5049 '-R',
5050 directory
5051 ]
5052 else:
5053 c = [
5054 'restorecon',
5055 directory
5056 ]
5057
5058 if blocking:
5059 out, err, ret = command(c)
5060
5061 if ret:
5062 LOG.error("Failed to restore labels for " + directory)
5063 LOG.error(err)
5064 raise Error(directory + " relabel failed")
5065 else:
5066 selinux_processes.append(command_init(c))
5067
5068 LOG.debug("selinux_processes: " + str(selinux_processes))
5069 for process in selinux_processes:
5070 out, err, ret = command_wait(process)
5071 if ret:
5072 LOG.error("A background selinux process failed")
5073 LOG.error(err)
5074 raise Error("background failed")
5075
5076 LOG.info(
5077 "The ceph files has been fixed, please reboot "
5078 "the system for the changes to take effect."
5079 )
5080
5081
5082def setup_statedir(dir):
5083 # XXX The following use of globals makes linting
5084 # really hard. Global state in Python is iffy and
5085 # should be avoided.
5086 global STATEDIR
5087 STATEDIR = dir
5088
5089 if not os.path.exists(STATEDIR):
5090 os.mkdir(STATEDIR)
5091 if not os.path.exists(STATEDIR + "/tmp"):
5092 os.mkdir(STATEDIR + "/tmp")
5093
5094 global prepare_lock
5095 prepare_lock = FileLock(STATEDIR + '/tmp/ceph-disk.prepare.lock')
5096
5097 global activate_lock
5098 activate_lock = FileLock(STATEDIR + '/tmp/ceph-disk.activate.lock')
5099
5100 global SUPPRESS_PREFIX
5101 SUPPRESS_PREFIX = STATEDIR + '/tmp/suppress-activate.'
5102
5103
5104def setup_sysconfdir(dir):
5105 global SYSCONFDIR
5106 SYSCONFDIR = dir
5107
5108
5109def parse_args(argv):
5110 parser = argparse.ArgumentParser(
5111 'ceph-disk',
5112 )
5113 parser.add_argument(
5114 '-v', '--verbose',
5115 action='store_true', default=None,
5116 help='be more verbose',
5117 )
5118 parser.add_argument(
5119 '--log-stdout',
5120 action='store_true', default=None,
5121 help='log to stdout',
5122 )
5123 parser.add_argument(
5124 '--prepend-to-path',
5125 metavar='PATH',
5126 default='/usr/bin',
5127 help=('prepend PATH to $PATH for backward compatibility '
5128 '(default /usr/bin)'),
5129 )
5130 parser.add_argument(
5131 '--statedir',
5132 metavar='PATH',
5133 default='/var/lib/ceph',
5134 help=('directory in which ceph state is preserved '
5135 '(default /var/lib/ceph)'),
5136 )
5137 parser.add_argument(
5138 '--sysconfdir',
5139 metavar='PATH',
5140 default='/etc/ceph',
5141 help=('directory in which ceph configuration files are found '
5142 '(default /etc/ceph)'),
5143 )
5144 parser.add_argument(
5145 '--setuser',
5146 metavar='USER',
5147 default=None,
5148 help='use the given user for subprocesses, rather than ceph or root'
5149 )
5150 parser.add_argument(
5151 '--setgroup',
5152 metavar='GROUP',
5153 default=None,
5154 help='use the given group for subprocesses, rather than ceph or root'
5155 )
5156 parser.set_defaults(
5157 # we want to hold on to this, for later
5158 prog=parser.prog,
5159 )
5160
5161 subparsers = parser.add_subparsers(
5162 title='subcommands',
5163 description='valid subcommands',
5164 help='sub-command help',
5165 )
5166
5167 Prepare.set_subparser(subparsers)
5168 make_activate_parser(subparsers)
5169 make_activate_lockbox_parser(subparsers)
5170 make_activate_block_parser(subparsers)
5171 make_activate_journal_parser(subparsers)
5172 make_activate_all_parser(subparsers)
5173 make_list_parser(subparsers)
5174 make_suppress_parser(subparsers)
5175 make_deactivate_parser(subparsers)
5176 make_destroy_parser(subparsers)
5177 make_zap_parser(subparsers)
5178 make_trigger_parser(subparsers)
5179 make_fix_parser(subparsers)
5180
5181 args = parser.parse_args(argv)
5182 return args
5183
5184
5185def make_fix_parser(subparsers):
5186 fix_parser = subparsers.add_parser(
5187 'fix',
5188 formatter_class=argparse.RawDescriptionHelpFormatter,
5189 description=textwrap.fill(textwrap.dedent("""\
5190 """)),
5191 help='fix SELinux labels and/or file permissions')
5192
5193 fix_parser.add_argument(
5194 '--system',
5195 action='store_true',
5196 default=False,
5197 help='fix SELinux labels for the non-ceph system data'
5198 )
5199 fix_parser.add_argument(
5200 '--selinux',
5201 action='store_true',
5202 default=False,
5203 help='fix SELinux labels for ceph data'
5204 )
5205 fix_parser.add_argument(
5206 '--permissions',
5207 action='store_true',
5208 default=False,
5209 help='fix file permissions for ceph data'
5210 )
5211 fix_parser.add_argument(
5212 '--all',
5213 action='store_true',
5214 default=False,
5215 help='perform all the fix-related operations'
5216 )
5217 fix_parser.set_defaults(
5218 func=main_fix,
5219 )
5220 return fix_parser
5221
5222
5223def make_trigger_parser(subparsers):
5224 trigger_parser = subparsers.add_parser(
5225 'trigger',
5226 formatter_class=argparse.RawDescriptionHelpFormatter,
5227 description=textwrap.fill(textwrap.dedent("""\
5228 The partition given in argument is activated. The type of the
5229 partition (data, lockbox, journal etc.) is detected by its
5230 type. If the init system is upstart or systemd, the activation is
5231 delegated to it and runs asynchronously, which
5232 helps reduce the execution time of udev actions.
5233 """)),
5234 help='activate any device (called by udev)')
5235 trigger_parser.add_argument(
5236 'dev',
5237 help=('device'),
5238 )
5239 trigger_parser.add_argument(
5240 '--cluster',
5241 metavar='NAME',
5242 default='ceph',
5243 help='cluster name to assign this disk to',
5244 )
5245 trigger_parser.add_argument(
5246 '--dmcrypt',
5247 action='store_true', default=None,
5248 help='map devices with dm-crypt',
5249 )
5250 trigger_parser.add_argument(
5251 '--dmcrypt-key-dir',
5252 metavar='KEYDIR',
5253 default='/etc/ceph/dmcrypt-keys',
5254 help='directory where dm-crypt keys are stored',
5255 )
5256 trigger_parser.add_argument(
5257 '--sync',
5258 action='store_true', default=None,
5259 help='do operation synchronously; do not trigger systemd',
5260 )
5261 trigger_parser.set_defaults(
5262 func=main_trigger,
5263 )
5264 return trigger_parser
5265
5266
5267def make_activate_parser(subparsers):
5268 activate_parser = subparsers.add_parser(
5269 'activate',
5270 formatter_class=argparse.RawDescriptionHelpFormatter,
5271 description=textwrap.fill(textwrap.dedent("""\
5272 Activate the OSD found at PATH (can be a directory
5273 or a device partition, possibly encrypted). When
5274 activated for the first time, a unique OSD id is obtained
5275 from the cluster. If PATH is a directory, a symbolic
5276 link is added in {statedir}/osd/ceph-$id. If PATH is
5277 a partition, it is mounted on {statedir}/osd/ceph-$id.
5278 Finally, the OSD daemon is run.
5279
5280 If the OSD depends on auxiliary partitions (journal, block, ...)
5281 they need to be available otherwise activation will fail. It
5282 may happen if a journal is encrypted and cryptsetup was not
5283 run yet.
5284 """.format(statedir=STATEDIR))),
5285 help='Activate a Ceph OSD')
5286 activate_parser.add_argument(
5287 '--mount',
5288 action='store_true', default=None,
5289 help='mount a block device [deprecated, ignored]',
5290 )
5291 activate_parser.add_argument(
5292 '--activate-key',
5293 metavar='PATH',
5294 help='bootstrap-osd keyring path template (%(default)s)',
5295 dest='activate_key_template',
5296 )
5297 activate_parser.add_argument(
5298 '--mark-init',
5299 metavar='INITSYSTEM',
5300 help='init system to manage this dir',
5301 default='auto',
5302 choices=INIT_SYSTEMS,
5303 )
5304 activate_parser.add_argument(
5305 '--no-start-daemon',
5306 action='store_true', default=None,
5307 help='do not start the daemon',
5308 )
5309 activate_parser.add_argument(
5310 'path',
5311 metavar='PATH',
5312 help='path to block device or directory',
5313 )
5314 activate_parser.add_argument(
5315 '--dmcrypt',
5316 action='store_true', default=None,
5317 help='map DATA and/or JOURNAL devices with dm-crypt',
5318 )
5319 activate_parser.add_argument(
5320 '--dmcrypt-key-dir',
5321 metavar='KEYDIR',
5322 default='/etc/ceph/dmcrypt-keys',
5323 help='directory where dm-crypt keys are stored',
5324 )
5325 activate_parser.add_argument(
5326 '--reactivate',
5327 action='store_true', default=False,
5328 help='activate the deactived OSD',
5329 )
5330 activate_parser.set_defaults(
5331 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5332 func=main_activate,
5333 )
5334 return activate_parser
5335
5336
5337def make_activate_lockbox_parser(subparsers):
5338 parser = subparsers.add_parser(
5339 'activate-lockbox',
5340 formatter_class=argparse.RawDescriptionHelpFormatter,
5341 description=textwrap.fill(textwrap.dedent("""\
5342 Mount the partition found at PATH on {statedir}/osd-lockbox/$uuid
5343 where $uuid uniquely identifies the OSD that needs this lockbox
5344 to retrieve keys from the monitor and unlock its partitions.
5345
5346 If the OSD has one or more auxiliary devices (journal, block, ...)
5347 symbolic links are created at {statedir}/osd-lockbox/$other_uuid
5348 and point to {statedir}/osd-lockbox/$uuid. This will, for instance,
5349 allow a journal encrypted in a partition identified by $other_uuid to
5350 fetch the keys it needs from the monitor.
5351
5352 Finally the OSD is activated, as it would be with ceph-disk activate.
5353 """.format(statedir=STATEDIR))),
5354 help='Activate a Ceph lockbox')
5355 parser.add_argument(
5356 '--activate-key',
5357 help='bootstrap-osd keyring path template (%(default)s)',
5358 dest='activate_key_template',
5359 )
5360 parser.add_argument(
5361 '--dmcrypt-key-dir',
5362 metavar='KEYDIR',
5363 default='/etc/ceph/dmcrypt-keys',
5364 help='directory where dm-crypt keys are stored',
5365 )
5366 parser.add_argument(
5367 'path',
5368 metavar='PATH',
5369 help='path to block device',
5370 )
5371 parser.set_defaults(
5372 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5373 func=main_activate_lockbox,
5374 )
5375 return parser
5376
5377
5378def make_activate_block_parser(subparsers):
5379 return make_activate_space_parser('block', subparsers)
5380
5381
5382def make_activate_journal_parser(subparsers):
5383 return make_activate_space_parser('journal', subparsers)
5384
5385
5386def make_activate_space_parser(name, subparsers):
5387 activate_space_parser = subparsers.add_parser(
5388 'activate-%s' % name,
5389 formatter_class=argparse.RawDescriptionHelpFormatter,
5390 description=textwrap.fill(textwrap.dedent("""\
5391 Activating a {name} partition is only meaningfull
5392 if it is encrypted and it will map it using
5393 cryptsetup.
5394
5395 Finally the corresponding OSD is activated,
5396 as it would be with ceph-disk activate.
5397 """.format(name=name))),
5398 help='Activate an OSD via its %s device' % name)
5399 activate_space_parser.add_argument(
5400 'dev',
5401 metavar='DEV',
5402 help='path to %s block device' % name,
5403 )
5404 activate_space_parser.add_argument(
5405 '--activate-key',
5406 metavar='PATH',
5407 help='bootstrap-osd keyring path template (%(default)s)',
5408 dest='activate_key_template',
5409 )
5410 activate_space_parser.add_argument(
5411 '--mark-init',
5412 metavar='INITSYSTEM',
5413 help='init system to manage this dir',
5414 default='auto',
5415 choices=INIT_SYSTEMS,
5416 )
5417 activate_space_parser.add_argument(
5418 '--dmcrypt',
5419 action='store_true', default=None,
5420 help=('map data and/or auxiliariy (journal, etc.) '
5421 'devices with dm-crypt'),
5422 )
5423 activate_space_parser.add_argument(
5424 '--dmcrypt-key-dir',
5425 metavar='KEYDIR',
5426 default='/etc/ceph/dmcrypt-keys',
5427 help='directory where dm-crypt keys are stored',
5428 )
5429 activate_space_parser.add_argument(
5430 '--reactivate',
5431 action='store_true', default=False,
5432 help='activate the deactived OSD',
5433 )
5434 activate_space_parser.set_defaults(
5435 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5436 func=lambda args: main_activate_space(name, args),
5437 )
5438 return activate_space_parser
5439
5440
5441def make_activate_all_parser(subparsers):
5442 activate_all_parser = subparsers.add_parser(
5443 'activate-all',
5444 formatter_class=argparse.RawDescriptionHelpFormatter,
5445 description=textwrap.fill(textwrap.dedent("""\
5446 Activate all OSD partitions found in /dev/disk/by-parttypeuuid.
5447 The partitions containing auxiliary devices (journal, block, ...)
5448 are not activated.
5449 """)),
5450 help='Activate all tagged OSD partitions')
5451 activate_all_parser.add_argument(
5452 '--activate-key',
5453 metavar='PATH',
5454 help='bootstrap-osd keyring path template (%(default)s)',
5455 dest='activate_key_template',
5456 )
5457 activate_all_parser.add_argument(
5458 '--mark-init',
5459 metavar='INITSYSTEM',
5460 help='init system to manage this dir',
5461 default='auto',
5462 choices=INIT_SYSTEMS,
5463 )
5464 activate_all_parser.set_defaults(
5465 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5466 func=main_activate_all,
5467 )
5468 return activate_all_parser
5469
5470
5471def make_list_parser(subparsers):
5472 list_parser = subparsers.add_parser(
5473 'list',
5474 formatter_class=argparse.RawDescriptionHelpFormatter,
5475 description=textwrap.fill(textwrap.dedent("""\
5476 Display all partitions on the system and their
5477 associated Ceph information, if any.
5478 """)),
5479 help='List disks, partitions, and Ceph OSDs')
5480 list_parser.add_argument(
5481 '--format',
5482 help='output format',
5483 default='plain',
5484 choices=['json', 'plain'],
5485 )
5486 list_parser.add_argument(
5487 'path',
5488 metavar='PATH',
5489 nargs='*',
5490 help='path to block devices, relative to /sys/block',
5491 )
5492 list_parser.set_defaults(
5493 func=main_list,
5494 )
5495 return list_parser
5496
5497
5498def make_suppress_parser(subparsers):
5499 suppress_parser = subparsers.add_parser(
5500 'suppress-activate',
5501 formatter_class=argparse.RawDescriptionHelpFormatter,
5502 description=textwrap.fill(textwrap.dedent("""\
5503 Add a prefix to the list of suppressed device names
5504 so that they are ignored by all activate* subcommands.
5505 """)),
5506 help='Suppress activate on a device (prefix)')
5507 suppress_parser.add_argument(
5508 'path',
5509 metavar='PATH',
5510 help='path to block device or directory',
5511 )
5512 suppress_parser.set_defaults(
5513 func=main_suppress,
5514 )
5515
5516 unsuppress_parser = subparsers.add_parser(
5517 'unsuppress-activate',
5518 formatter_class=argparse.RawDescriptionHelpFormatter,
5519 description=textwrap.fill(textwrap.dedent("""\
5520 Remove a prefix from the list of suppressed device names
5521 so that they are no longer ignored by all
5522 activate* subcommands.
5523 """)),
5524 help='Stop suppressing activate on a device (prefix)')
5525 unsuppress_parser.add_argument(
5526 'path',
5527 metavar='PATH',
5528 help='path to block device or directory',
5529 )
5530 unsuppress_parser.set_defaults(
5531 func=main_unsuppress,
5532 )
5533 return suppress_parser
5534
5535
5536def make_deactivate_parser(subparsers):
5537 deactivate_parser = subparsers.add_parser(
5538 'deactivate',
5539 formatter_class=argparse.RawDescriptionHelpFormatter,
5540 description=textwrap.fill(textwrap.dedent("""\
5541 Deactivate the OSD located at PATH. It stops the OSD daemon
5542 and optionally marks it out (with --mark-out). The content of
5543 the OSD is left untouched.
5544
5545 By default, the, ready, active, INIT-specific files are
5546 removed (so that it is not automatically re-activated by the
5547 udev rules or ceph-disk trigger) and the file deactive is
5548 created to remember the OSD is deactivated.
5549
5550 If the --once option is given, the ready, active, INIT-specific
5551 files are not removed and the OSD will reactivate whenever
5552 ceph-disk trigger is run on one of the devices (journal, data,
5553 block, lockbox, ...).
5554
5555 If the OSD is dmcrypt, remove the data dmcrypt map. When
5556 deactivate finishes, the OSD is down.
5557 """)),
5558 help='Deactivate a Ceph OSD')
5559 deactivate_parser.add_argument(
5560 '--cluster',
5561 metavar='NAME',
5562 default='ceph',
5563 help='cluster name to assign this disk to',
5564 )
5565 deactivate_parser.add_argument(
5566 'path',
5567 metavar='PATH',
5568 nargs='?',
5569 help='path to block device or directory',
5570 )
5571 deactivate_parser.add_argument(
5572 '--deactivate-by-id',
5573 metavar='<id>',
5574 help='ID of OSD to deactive'
5575 )
5576 deactivate_parser.add_argument(
5577 '--mark-out',
5578 action='store_true', default=False,
5579 help='option to mark the osd out',
5580 )
5581 deactivate_parser.add_argument(
5582 '--once',
5583 action='store_true', default=False,
5584 help='does not need --reactivate to activate again',
5585 )
5586 deactivate_parser.set_defaults(
5587 func=main_deactivate,
5588 )
5589
5590
5591def make_destroy_parser(subparsers):
5592 destroy_parser = subparsers.add_parser(
5593 'destroy',
5594 formatter_class=argparse.RawDescriptionHelpFormatter,
c07f9fc5
FG
5595 description=textwrap.fill(textwrap.dedent("""\ Destroy the OSD located at PATH. It removes the OSD from the
5596 cluster and marks it destroyed. An OSD must be down before it
5597 can be destroyed. Once it is destroyed, a new OSD can be created
5598 in its place, reusing the same OSD id and position (e.g. after
5599 a failed HDD or SSD is replaced). Alternatively, if the
5600 --purge option is also specified, the OSD is removed from the
5601 CRUSH map and the OSD id is deallocated.""")),
7c673cae
FG
5602 help='Destroy a Ceph OSD')
5603 destroy_parser.add_argument(
5604 '--cluster',
5605 metavar='NAME',
5606 default='ceph',
5607 help='cluster name to assign this disk to',
5608 )
5609 destroy_parser.add_argument(
5610 'path',
5611 metavar='PATH',
5612 nargs='?',
5613 help='path to block device or directory',
5614 )
5615 destroy_parser.add_argument(
5616 '--destroy-by-id',
5617 metavar='<id>',
5618 help='ID of OSD to destroy'
5619 )
5620 destroy_parser.add_argument(
5621 '--dmcrypt-key-dir',
5622 metavar='KEYDIR',
5623 default='/etc/ceph/dmcrypt-keys',
5624 help=('directory where dm-crypt keys are stored '
5625 '(If you don\'t know how it work, '
5626 'dont use it. we have default value)'),
5627 )
5628 destroy_parser.add_argument(
5629 '--zap',
5630 action='store_true', default=False,
5631 help='option to erase data and partition',
5632 )
c07f9fc5
FG
5633 destroy_parser.add_argument(
5634 '--purge',
5635 action='store_true', default=False,
5636 help='option to remove OSD from CRUSH map and deallocate the id',
5637 )
7c673cae
FG
5638 destroy_parser.set_defaults(
5639 func=main_destroy,
5640 )
5641
5642
5643def make_zap_parser(subparsers):
5644 zap_parser = subparsers.add_parser(
5645 'zap',
5646 formatter_class=argparse.RawDescriptionHelpFormatter,
5647 description=textwrap.fill(textwrap.dedent("""\
5648 Zap/erase/destroy a device's partition table and contents. It
5649 actually uses sgdisk and it's option --zap-all to
5650 destroy both GPT and MBR data structures so that the disk
5651 becomes suitable for repartitioning.
5652 """)),
5653 help='Zap/erase/destroy a device\'s partition table (and contents)')
5654 zap_parser.add_argument(
5655 'dev',
5656 metavar='DEV',
5657 nargs='+',
5658 help='path to block device',
5659 )
5660 zap_parser.set_defaults(
5661 func=main_zap,
5662 )
5663 return zap_parser
5664
5665
5666def main(argv):
5667 args = parse_args(argv)
5668
5669 setup_logging(args.verbose, args.log_stdout)
5670
5671 if args.prepend_to_path != '':
5672 path = os.environ.get('PATH', os.defpath)
5673 os.environ['PATH'] = args.prepend_to_path + ":" + path
5674
31f18b77
FG
5675 if args.func.__name__ != 'main_trigger':
5676 # trigger may run when statedir is unavailable and does not use it
5677 setup_statedir(args.statedir)
7c673cae
FG
5678 setup_sysconfdir(args.sysconfdir)
5679
5680 global CEPH_PREF_USER
5681 CEPH_PREF_USER = args.setuser
5682 global CEPH_PREF_GROUP
5683 CEPH_PREF_GROUP = args.setgroup
5684
5685 if args.verbose:
b32b8144 5686 args.func(args)
7c673cae
FG
5687 else:
5688 main_catch(args.func, args)
5689
5690
5691def setup_logging(verbose, log_stdout):
5692 loglevel = logging.WARNING
5693 if verbose:
5694 loglevel = logging.DEBUG
5695
5696 if log_stdout:
5697 ch = logging.StreamHandler(stream=sys.stdout)
5698 ch.setLevel(loglevel)
5699 formatter = logging.Formatter('%(funcName)s: %(message)s')
5700 ch.setFormatter(formatter)
5701 LOG.addHandler(ch)
5702 LOG.setLevel(loglevel)
5703 else:
5704 logging.basicConfig(
5705 level=loglevel,
5706 format='%(funcName)s: %(message)s',
5707 )
5708
5709
5710def main_catch(func, args):
5711
5712 try:
5713 func(args)
5714
5715 except Error as e:
5716 raise SystemExit(
5717 '{prog}: {msg}'.format(
5718 prog=args.prog,
5719 msg=e,
5720 )
5721 )
5722
5723 except CephDiskException as error:
5724 exc_name = error.__class__.__name__
5725 raise SystemExit(
5726 '{prog} {exc_name}: {msg}'.format(
5727 prog=args.prog,
5728 exc_name=exc_name,
5729 msg=error,
5730 )
5731 )
5732
5733
5734def run():
5735 main(sys.argv[1:])
5736
5737
5738if __name__ == '__main__':
5739 main(sys.argv[1:])
5740 warned_about = {}