]> git.proxmox.com Git - ceph.git/blame - ceph/src/ceph-disk/ceph_disk/main.py
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / ceph-disk / ceph_disk / main.py
CommitLineData
7c673cae
FG
1#!/usr/bin/env python
2#
3# Copyright (C) 2015, 2016 Red Hat <contact@redhat.com>
4# Copyright (C) 2014 Inktank <info@inktank.com>
5# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
6# Copyright (C) 2014 Catalyst.net Ltd
7#
8# Author: Loic Dachary <loic@dachary.org>
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU Library Public License as published by
12# the Free Software Foundation; either version 2, or (at your option)
13# any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU Library Public License for more details.
19#
20
21from __future__ import print_function
22
23import argparse
24import base64
25import errno
26import fcntl
27import json
28import logging
29import os
30import platform
31import re
32import subprocess
33import stat
34import sys
35import tempfile
36import uuid
37import time
38import shlex
39import pwd
40import grp
41import textwrap
42import glob
43
44CEPH_OSD_ONDISK_MAGIC = 'ceph osd volume v026'
45CEPH_LOCKBOX_ONDISK_MAGIC = 'ceph lockbox volume v001'
46
47KEY_MANAGEMENT_MODE_V1 = 'ceph-mon v1'
48
49PTYPE = {
50 'regular': {
51 'journal': {
52 # identical because creating a journal is atomic
53 'ready': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
54 'tobe': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
55 },
56 'block': {
57 # identical because creating a block is atomic
58 'ready': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
59 'tobe': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
60 },
61 'block.db': {
62 # identical because creating a block is atomic
63 'ready': '30cd0809-c2b2-499c-8879-2d6b78529876',
64 'tobe': '30cd0809-c2b2-499c-8879-2d6b785292be',
65 },
66 'block.wal': {
67 # identical because creating a block is atomic
68 'ready': '5ce17fce-4087-4169-b7ff-056cc58473f9',
69 'tobe': '5ce17fce-4087-4169-b7ff-056cc58472be',
70 },
71 'osd': {
72 'ready': '4fbd7e29-9d25-41b8-afd0-062c0ceff05d',
73 'tobe': '89c57f98-2fe5-4dc0-89c1-f3ad0ceff2be',
74 },
75 'lockbox': {
76 'ready': 'fb3aabf9-d25f-47cc-bf5e-721d1816496b',
77 'tobe': 'fb3aabf9-d25f-47cc-bf5e-721d181642be',
78 },
79 },
80 'luks': {
81 'journal': {
82 'ready': '45b0969e-9b03-4f30-b4c6-35865ceff106',
83 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
84 },
85 'block': {
86 'ready': 'cafecafe-9b03-4f30-b4c6-35865ceff106',
87 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
88 },
89 'block.db': {
90 'ready': '166418da-c469-4022-adf4-b30afd37f176',
91 'tobe': '7521c784-4626-4260-bc8d-ba77a0f5f2be',
92 },
93 'block.wal': {
94 'ready': '86a32090-3647-40b9-bbbd-38d8c573aa86',
95 'tobe': '92dad30f-175b-4d40-a5b0-5c0a258b42be',
96 },
97 'osd': {
98 'ready': '4fbd7e29-9d25-41b8-afd0-35865ceff05d',
99 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
100 },
101 },
102 'plain': {
103 'journal': {
104 'ready': '45b0969e-9b03-4f30-b4c6-5ec00ceff106',
105 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
106 },
107 'block': {
108 'ready': 'cafecafe-9b03-4f30-b4c6-5ec00ceff106',
109 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
110 },
111 'block.db': {
112 'ready': '93b0052d-02d9-4d8a-a43b-33a3ee4dfbc3',
113 'tobe': '69d17c68-3e58-4399-aff0-b68265f2e2be',
114 },
115 'block.wal': {
116 'ready': '306e8683-4fe2-4330-b7c0-00a917c16966',
117 'tobe': 'f2d89683-a621-4063-964a-eb1f7863a2be',
118 },
119 'osd': {
120 'ready': '4fbd7e29-9d25-41b8-afd0-5ec00ceff05d',
121 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
122 },
123 },
124 'mpath': {
125 'journal': {
126 'ready': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
127 'tobe': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
128 },
129 'block': {
130 'ready': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
131 'tobe': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
132 },
133 'block.db': {
134 'ready': 'ec6d6385-e346-45dc-be91-da2a7c8b3261',
135 'tobe': 'ec6d6385-e346-45dc-be91-da2a7c8b32be',
136 },
137 'block.wal': {
138 'ready': '01b41e1b-002a-453c-9f17-88793989ff8f',
139 'tobe': '01b41e1b-002a-453c-9f17-88793989f2be',
140 },
141 'osd': {
142 'ready': '4fbd7e29-8ae0-4982-bf9d-5a8d867af560',
143 'tobe': '89c57f98-8ae0-4982-bf9d-5a8d867af560',
144 },
145 'lockbox': {
146 'ready': '7f4a666a-16f3-47a2-8445-152ef4d03f6c',
147 'tobe': '7f4a666a-16f3-47a2-8445-152ef4d032be',
148 },
149 },
150}
151
152
153class Ptype(object):
154
155 @staticmethod
156 def get_ready_by_type(what):
157 return [x['ready'] for x in PTYPE[what].values()]
158
159 @staticmethod
160 def get_ready_by_name(name):
161 return [x[name]['ready'] for x in PTYPE.values() if name in x]
162
163 @staticmethod
164 def is_regular_space(ptype):
165 return Ptype.is_what_space('regular', ptype)
166
167 @staticmethod
168 def is_mpath_space(ptype):
169 return Ptype.is_what_space('mpath', ptype)
170
171 @staticmethod
172 def is_plain_space(ptype):
173 return Ptype.is_what_space('plain', ptype)
174
175 @staticmethod
176 def is_luks_space(ptype):
177 return Ptype.is_what_space('luks', ptype)
178
179 @staticmethod
180 def is_what_space(what, ptype):
181 for name in Space.NAMES:
182 if ptype == PTYPE[what][name]['ready']:
183 return True
184 return False
185
186 @staticmethod
187 def space_ptype_to_name(ptype):
188 for what in PTYPE.values():
189 for name in Space.NAMES:
190 if ptype == what[name]['ready']:
191 return name
192 raise ValueError('ptype ' + ptype + ' not found')
193
194 @staticmethod
195 def is_dmcrypt_space(ptype):
196 for name in Space.NAMES:
197 if Ptype.is_dmcrypt(ptype, name):
198 return True
199 return False
200
201 @staticmethod
202 def is_dmcrypt(ptype, name):
203 for what in ('plain', 'luks'):
204 if ptype == PTYPE[what][name]['ready']:
205 return True
206 return False
207
208
209SYSFS = '/sys'
210
211if platform.system() == 'FreeBSD':
212 FREEBSD = True
213 DEFAULT_FS_TYPE = 'zfs'
214 PROCDIR = '/compat/linux/proc'
215 # FreeBSD does not have blockdevices any more
216 BLOCKDIR = '/dev'
217else:
218 FREEBSD = False
219 DEFAULT_FS_TYPE = 'xfs'
220 PROCDIR = '/proc'
221 BLOCKDIR = '/sys/block'
222
223"""
224OSD STATUS Definition
225"""
226OSD_STATUS_OUT_DOWN = 0
227OSD_STATUS_OUT_UP = 1
228OSD_STATUS_IN_DOWN = 2
229OSD_STATUS_IN_UP = 3
230
231MOUNT_OPTIONS = dict(
232 btrfs='noatime,user_subvol_rm_allowed',
233 # user_xattr is default ever since linux 2.6.39 / 3.0, but we'll
234 # delay a moment before removing it fully because we did have some
235 # issues with ext4 before the xatts-in-leveldb work, and it seemed
236 # that user_xattr helped
237 ext4='noatime,user_xattr',
238 xfs='noatime,inode64',
239)
240
241MKFS_ARGS = dict(
242 btrfs=[
243 # btrfs requires -f, for the same reason as xfs (see comment below)
244 '-f',
245 '-m', 'single',
246 '-l', '32768',
247 '-n', '32768',
248 ],
249 xfs=[
250 # xfs insists on not overwriting previous fs; even if we wipe
251 # partition table, we often recreate it exactly the same way,
252 # so we'll see ghosts of filesystems past
253 '-f',
254 '-i', 'size=2048',
255 ],
256 zfs=[
257 '-o', 'atime=off'
258 ],
259)
260
261INIT_SYSTEMS = [
262 'upstart',
263 'sysvinit',
264 'systemd',
265 'openrc',
266 'bsdrc',
267 'auto',
268 'none',
269]
270
271STATEDIR = '/var/lib/ceph'
272
273SYSCONFDIR = '/etc/ceph'
274
275prepare_lock = None
276activate_lock = None
277SUPPRESS_PREFIX = None
278
279# only warn once about some things
280warned_about = {}
281
282# Nuke the TERM variable to avoid confusing any subprocesses we call.
283# For example, libreadline will print weird control sequences for some
284# TERM values.
285if 'TERM' in os.environ:
286 del os.environ['TERM']
287
288LOG_NAME = __name__
289if LOG_NAME == '__main__':
290 LOG_NAME = os.path.basename(sys.argv[0])
291LOG = logging.getLogger(LOG_NAME)
292
293# Allow user-preferred values for subprocess user and group
294CEPH_PREF_USER = None
295CEPH_PREF_GROUP = None
296
297
298class FileLock(object):
299 def __init__(self, fn):
300 self.fn = fn
301 self.fd = None
302
303 def __enter__(self):
304 assert not self.fd
305 self.fd = os.open(self.fn, os.O_WRONLY | os.O_CREAT)
306 fcntl.lockf(self.fd, fcntl.LOCK_EX)
307
308 def __exit__(self, exc_type, exc_val, exc_tb):
309 assert self.fd
310 fcntl.lockf(self.fd, fcntl.LOCK_UN)
311 os.close(self.fd)
312 self.fd = None
313
314
315class Error(Exception):
316 """
317 Error
318 """
319
320 def __str__(self):
321 doc = _bytes2str(self.__doc__.strip())
322 try:
323 str_type = basestring
324 except NameError:
325 str_type = str
326 args = [a if isinstance(a, str_type) else str(a) for a in self.args]
327 return ': '.join([doc] + [_bytes2str(a) for a in args])
328
329
330class MountError(Error):
331 """
332 Mounting filesystem failed
333 """
334
335
336class UnmountError(Error):
337 """
338 Unmounting filesystem failed
339 """
340
341
342class BadMagicError(Error):
343 """
344 Does not look like a Ceph OSD, or incompatible version
345 """
346
347
348class TruncatedLineError(Error):
349 """
350 Line is truncated
351 """
352
353
354class TooManyLinesError(Error):
355 """
356 Too many lines
357 """
358
359
360class FilesystemTypeError(Error):
361 """
362 Cannot discover filesystem type
363 """
364
365
366class CephDiskException(Exception):
367 """
368 A base exception for ceph-disk to provide custom (ad-hoc) messages that
369 will be caught and dealt with when main() is executed
370 """
371 pass
372
373
374class ExecutableNotFound(CephDiskException):
375 """
376 Exception to report on executables not available in PATH
377 """
378 pass
379
380
381def is_systemd():
382 """
383 Detect whether systemd is running
384 """
385 with open(PROCDIR + '/1/comm', 'r') as f:
386 return 'systemd' in f.read()
387
388
389def is_upstart():
390 """
391 Detect whether upstart is running
392 """
393 (out, err, _) = command(['init', '--version'])
394 return 'upstart' in out
395
396
397def maybe_mkdir(*a, **kw):
398 """
399 Creates a new directory if it doesn't exist, removes
400 existing symlink before creating the directory.
401 """
402 # remove any symlink, if it is there..
403 if os.path.exists(*a) and stat.S_ISLNK(os.lstat(*a).st_mode):
404 LOG.debug('Removing old symlink at %s', *a)
405 os.unlink(*a)
406 try:
407 os.mkdir(*a, **kw)
408 except OSError as e:
409 if e.errno == errno.EEXIST:
410 pass
411 else:
412 raise
413
414
415def which(executable):
416 """find the location of an executable"""
417 envpath = os.environ.get('PATH') or os.defpath
418 PATH = envpath.split(os.pathsep)
419
420 locations = PATH + [
421 '/usr/local/bin',
422 '/bin',
423 '/usr/bin',
424 '/usr/local/sbin',
425 '/usr/sbin',
426 '/sbin',
427 ]
428
429 for location in locations:
430 executable_path = os.path.join(location, executable)
431 if (os.path.isfile(executable_path) and
432 os.access(executable_path, os.X_OK)):
433 return executable_path
434
435
436def _get_command_executable(arguments):
437 """
438 Return the full path for an executable, raise if the executable is not
439 found. If the executable has already a full path do not perform any checks.
440 """
441 if os.path.isabs(arguments[0]): # an absolute path
442 return arguments
443 executable = which(arguments[0])
444 if not executable:
445 command_msg = 'Could not run command: %s' % ' '.join(arguments)
446 executable_msg = '%s not in path.' % arguments[0]
447 raise ExecutableNotFound('%s %s' % (executable_msg, command_msg))
448
449 # swap the old executable for the new one
450 arguments[0] = executable
451 return arguments
452
453
454def command(arguments, **kwargs):
455 """
456 Safely execute a ``subprocess.Popen`` call making sure that the
457 executable exists and raising a helpful error message
458 if it does not.
459
460 .. note:: This should be the preferred way of calling ``subprocess.Popen``
461 since it provides the caller with the safety net of making sure that
462 executables *will* be found and will error nicely otherwise.
463
464 This returns the output of the command and the return code of the
465 process in a tuple: (stdout, stderr, returncode).
466 """
467
468 arguments = list(map(_bytes2str, _get_command_executable(arguments)))
469
470 LOG.info('Running command: %s' % ' '.join(arguments))
471 process = subprocess.Popen(
472 arguments,
473 stdout=subprocess.PIPE,
474 stderr=subprocess.PIPE,
475 **kwargs)
476 out, err = process.communicate()
477
478 return _bytes2str(out), _bytes2str(err), process.returncode
479
480
481def _bytes2str(string):
482 return string.decode('utf-8') if isinstance(string, bytes) else string
483
484
485def command_init(arguments, **kwargs):
486 """
487 Safely execute a non-blocking ``subprocess.Popen`` call
488 making sure that the executable exists and raising a helpful
489 error message if it does not.
490
491 .. note:: This should be the preferred way of calling ``subprocess.Popen``
492 since it provides the caller with the safety net of making sure that
493 executables *will* be found and will error nicely otherwise.
494
495 This returns the process.
496 """
497
498 arguments = list(map(_bytes2str, _get_command_executable(arguments)))
499
500 LOG.info('Running command: %s' % ' '.join(arguments))
501 process = subprocess.Popen(
502 arguments,
503 stdout=subprocess.PIPE,
504 stderr=subprocess.PIPE,
505 **kwargs)
506 return process
507
508
509def command_wait(process):
510 """
511 Wait for the process finish and parse its output.
512 """
513
514 out, err = process.communicate()
515
516 return _bytes2str(out), _bytes2str(err), process.returncode
517
518
519def command_check_call(arguments, exit=False):
520 """
521 Safely execute a ``subprocess.check_call`` call making sure that the
522 executable exists and raising a helpful error message if it does not.
523
524 When ``exit`` is set to ``True`` this helper will do a clean (sans
525 traceback) system exit.
526 .. note:: This should be the preferred way of calling
527 ``subprocess.check_call`` since it provides the caller with the safety net
528 of making sure that executables *will* be found and will error nicely
529 otherwise.
530 """
531 arguments = _get_command_executable(arguments)
532 command = ' '.join(arguments)
533 LOG.info('Running command: %s', command)
534 try:
535 return subprocess.check_call(arguments)
536 except subprocess.CalledProcessError as error:
537 if exit:
538 if error.output:
539 LOG.error(error.output)
540 raise SystemExit(
541 "'{cmd}' failed with status code {returncode}".format(
542 cmd=command,
543 returncode=error.returncode,
544 )
545 )
546 raise
547
548
549def platform_distro():
550 """
551 Returns a normalized, lower case string without any leading nor trailing
552 whitespace that represents the distribution name of the current machine.
553 """
554 distro = platform_information()[0] or ''
555 return distro.strip().lower()
556
557
558def platform_information():
559 if FREEBSD:
560 distro = platform.system()
561 release = platform.version().split()[1]
562 codename = platform.version().split()[3]
563 version = platform.version().split('-')[0][:-1]
564 major_version = version.split('.')[0]
565 major, minor = release.split('.')
566 else:
567 distro, release, codename = platform.linux_distribution()
568 # this could be an empty string in Debian
569 if not codename and 'debian' in distro.lower():
570 debian_codenames = {
571 '8': 'jessie',
572 '7': 'wheezy',
573 '6': 'squeeze',
574 }
575 major_version = release.split('.')[0]
576 codename = debian_codenames.get(major_version, '')
577
578 # In order to support newer jessie/sid, wheezy/sid strings we test
579 # this if sid is buried in the minor, we should use sid anyway.
580 if not codename and '/' in release:
581 major, minor = release.split('/')
582 if minor == 'sid':
583 codename = minor
584 else:
585 codename = major
586 # this could be an empty string in Virtuozzo linux
587 if not codename and 'virtuozzo linux' in distro.lower():
588 codename = 'virtuozzo'
589
590 return (
591 str(distro).strip(),
592 str(release).strip(),
593 str(codename).strip()
594 )
595
596#
597# An alternative block_path implementation would be
598#
599# name = basename(dev)
600# return /sys/devices/virtual/block/$name
601#
602# It is however more fragile because it relies on the fact
603# that the basename of the device the user will use always
604# matches the one the driver will use. On Ubuntu 14.04, for
605# instance, when multipath creates a partition table on
606#
607# /dev/mapper/353333330000007d0 -> ../dm-0
608#
609# it will create partition devices named
610#
611# /dev/mapper/353333330000007d0-part1
612#
613# which is the same device as /dev/dm-1 but not a symbolic
614# link to it:
615#
616# ubuntu@other:~$ ls -l /dev/mapper /dev/dm-1
617# brw-rw---- 1 root disk 252, 1 Aug 15 17:52 /dev/dm-1
618# lrwxrwxrwx 1 root root 7 Aug 15 17:52 353333330000007d0 -> ../dm-0
619# brw-rw---- 1 root disk 252, 1 Aug 15 17:52 353333330000007d0-part1
620#
621# Using the basename in this case fails.
622#
623
624
625def block_path(dev):
626 if FREEBSD:
627 return dev
628 path = os.path.realpath(dev)
629 rdev = os.stat(path).st_rdev
630 (M, m) = (os.major(rdev), os.minor(rdev))
631 return "{sysfs}/dev/block/{M}:{m}".format(sysfs=SYSFS, M=M, m=m)
632
633
634def get_dm_uuid(dev):
635 uuid_path = os.path.join(block_path(dev), 'dm', 'uuid')
636 LOG.debug("get_dm_uuid " + dev + " uuid path is " + uuid_path)
637 if not os.path.exists(uuid_path):
638 return False
639 uuid = open(uuid_path, 'r').read()
640 LOG.debug("get_dm_uuid " + dev + " uuid is " + uuid)
641 return uuid
642
643
644def is_mpath(dev):
645 """
646 True if the path is managed by multipath
647 """
648 if FREEBSD:
649 return False
650 uuid = get_dm_uuid(dev)
651 return (uuid and
652 (re.match('part\d+-mpath-', uuid) or
653 re.match('mpath-', uuid)))
654
655
656def get_dev_name(path):
657 """
658 get device name from path. e.g.::
659
660 /dev/sda -> sda, /dev/cciss/c0d1 -> cciss!c0d1
661
662 a device "name" is something like::
663
664 sdb
665 cciss!c0d1
666
667 """
668 assert path.startswith('/dev/')
669 base = path[5:]
670 return base.replace('/', '!')
671
672
673def get_dev_path(name):
674 """
675 get a path (/dev/...) from a name (cciss!c0d1)
676 a device "path" is something like::
677
678 /dev/sdb
679 /dev/cciss/c0d1
680
681 """
682 return '/dev/' + name.replace('!', '/')
683
684
685def get_dev_relpath(name):
686 """
687 get a relative path to /dev from a name (cciss!c0d1)
688 """
689 return name.replace('!', '/')
690
691
692def get_dev_size(dev, size='megabytes'):
693 """
694 Attempt to get the size of a device so that we can prevent errors
695 from actions to devices that are smaller, and improve error reporting.
696
697 Because we want to avoid breakage in case this approach is not robust, we
698 will issue a warning if we failed to get the size.
699
700 :param size: bytes or megabytes
701 :param dev: the device to calculate the size
702 """
703 fd = os.open(dev, os.O_RDONLY)
704 dividers = {'bytes': 1, 'megabytes': 1024 * 1024}
705 try:
706 device_size = os.lseek(fd, 0, os.SEEK_END)
707 divider = dividers.get(size, 1024 * 1024) # default to megabytes
708 return device_size // divider
709 except Exception as error:
710 LOG.warning('failed to get size of %s: %s' % (dev, str(error)))
711 finally:
712 os.close(fd)
713
714
715def get_partition_mpath(dev, pnum):
716 part_re = "part{pnum}-mpath-".format(pnum=pnum)
717 partitions = list_partitions_mpath(dev, part_re)
718 if partitions:
719 return partitions[0]
720 else:
721 return None
722
723
724def get_partition_dev(dev, pnum):
725 """
726 get the device name for a partition
727
728 assume that partitions are named like the base dev,
729 with a number, and optionally
730 some intervening characters (like 'p'). e.g.,
731
732 sda 1 -> sda1
733 cciss/c0d1 1 -> cciss!c0d1p1
734 """
735 max_retry = 10
736 for retry in range(0, max_retry + 1):
737 partname = None
738 error_msg = ""
739 if is_mpath(dev):
740 partname = get_partition_mpath(dev, pnum)
741 else:
742 name = get_dev_name(os.path.realpath(dev))
743 sys_entry = os.path.join(BLOCKDIR, name)
744 error_msg = " in %s" % sys_entry
745 for f in os.listdir(sys_entry):
746 if f.startswith(name) and f.endswith(str(pnum)):
747 # we want the shortest name that starts with the base name
748 # and ends with the partition number
749 if not partname or len(f) < len(partname):
750 partname = f
751 if partname:
752 if retry:
753 LOG.info('Found partition %d for %s after %d tries' %
754 (pnum, dev, retry))
755 return get_dev_path(partname)
756 else:
757 if retry < max_retry:
758 LOG.info('Try %d/%d : partition %d for %s does not exist%s' %
759 (retry + 1, max_retry, pnum, dev, error_msg))
760 time.sleep(.2)
761 continue
762 else:
763 raise Error('partition %d for %s does not appear to exist%s' %
764 (pnum, dev, error_msg))
765
766
767def list_all_partitions():
768 """
769 Return a list of devices and partitions
770 """
771 if not FREEBSD:
772 names = os.listdir(BLOCKDIR)
773 dev_part_list = {}
774 for name in names:
775 # /dev/fd0 may hang http://tracker.ceph.com/issues/6827
776 if re.match(r'^fd\d$', name):
777 continue
778 dev_part_list[name] = list_partitions(get_dev_path(name))
779 else:
780 with open(os.path.join(PROCDIR, "partitions")) as partitions:
781 for line in partitions:
782 columns = line.split()
783 if len(columns) >= 4:
784 name = columns[3]
785 dev_part_list[name] = list_partitions(get_dev_path(name))
786 return dev_part_list
787
788
789def list_partitions(dev):
790 dev = os.path.realpath(dev)
791 if is_mpath(dev):
792 return list_partitions_mpath(dev)
793 else:
794 return list_partitions_device(dev)
795
796
797def list_partitions_mpath(dev, part_re="part\d+-mpath-"):
798 p = block_path(dev)
799 partitions = []
800 holders = os.path.join(p, 'holders')
801 for holder in os.listdir(holders):
802 uuid_path = os.path.join(holders, holder, 'dm', 'uuid')
803 uuid = open(uuid_path, 'r').read()
804 LOG.debug("list_partitions_mpath: " + uuid_path + " uuid = " + uuid)
805 if re.match(part_re, uuid):
806 partitions.append(holder)
807 return partitions
808
809
810def list_partitions_device(dev):
811 """
812 Return a list of partitions on the given device name
813 """
814 partitions = []
815 basename = get_dev_name(dev)
816 for name in os.listdir(block_path(dev)):
817 if name.startswith(basename):
818 partitions.append(name)
819 return partitions
820
821
822def get_partition_base(dev):
823 """
824 Get the base device for a partition
825 """
826 dev = os.path.realpath(dev)
827 if not stat.S_ISBLK(os.lstat(dev).st_mode):
828 raise Error('not a block device', dev)
829
830 name = get_dev_name(dev)
831 if os.path.exists(os.path.join('/sys/block', name)):
832 raise Error('not a partition', dev)
833
834 # find the base
835 for basename in os.listdir('/sys/block'):
836 if os.path.exists(os.path.join('/sys/block', basename, name)):
837 return get_dev_path(basename)
838 raise Error('no parent device for partition', dev)
839
840
841def is_partition_mpath(dev):
842 uuid = get_dm_uuid(dev)
843 return bool(re.match('part\d+-mpath-', uuid))
844
845
846def partnum_mpath(dev):
847 uuid = get_dm_uuid(dev)
848 return re.findall('part(\d+)-mpath-', uuid)[0]
849
850
851def get_partition_base_mpath(dev):
852 slave_path = os.path.join(block_path(dev), 'slaves')
853 slaves = os.listdir(slave_path)
854 assert slaves
855 name_path = os.path.join(slave_path, slaves[0], 'dm', 'name')
856 name = open(name_path, 'r').read().strip()
857 return os.path.join('/dev/mapper', name)
858
859
860def is_partition(dev):
861 """
862 Check whether a given device path is a partition or a full disk.
863 """
864 if is_mpath(dev):
865 return is_partition_mpath(dev)
866
867 dev = os.path.realpath(dev)
868 st = os.lstat(dev)
869 if not stat.S_ISBLK(st.st_mode):
870 raise Error('not a block device', dev)
871
872 name = get_dev_name(dev)
873 if os.path.exists(os.path.join(BLOCKDIR, name)):
874 return False
875
876 # make sure it is a partition of something else
877 major = os.major(st.st_rdev)
878 minor = os.minor(st.st_rdev)
879 if os.path.exists('/sys/dev/block/%d:%d/partition' % (major, minor)):
880 return True
881
882 raise Error('not a disk or partition', dev)
883
884
885def is_mounted(dev):
886 """
887 Check if the given device is mounted.
888 """
889 dev = os.path.realpath(dev)
890 with open(PROCDIR + '/mounts', 'rb') as proc_mounts:
891 for line in proc_mounts:
892 fields = line.split()
893 if len(fields) < 3:
894 continue
895 mounts_dev = fields[0]
896 path = fields[1]
897 if os.path.isabs(mounts_dev) and os.path.exists(mounts_dev):
898 mounts_dev = os.path.realpath(mounts_dev)
899 if mounts_dev == dev:
900 return _bytes2str(path)
901 return None
902
903
904def is_held(dev):
905 """
906 Check if a device is held by another device (e.g., a dm-crypt mapping)
907 """
908 assert os.path.exists(dev)
909 if is_mpath(dev):
910 return []
911
912 dev = os.path.realpath(dev)
913 base = get_dev_name(dev)
914
915 # full disk?
916 directory = '/sys/block/{base}/holders'.format(base=base)
917 if os.path.exists(directory):
918 return os.listdir(directory)
919
920 # partition?
921 part = base
922 while len(base):
923 directory = '/sys/block/{base}/{part}/holders'.format(
924 part=part, base=base)
925 if os.path.exists(directory):
926 return os.listdir(directory)
927 base = base[:-1]
928 return []
929
930
931def verify_not_in_use(dev, check_partitions=False):
932 """
933 Verify if a given device (path) is in use (e.g. mounted or
934 in use by device-mapper).
935
936 :raises: Error if device is in use.
937 """
938 assert os.path.exists(dev)
939 if is_mounted(dev):
940 raise Error('Device is mounted', dev)
941 holders = is_held(dev)
942 if holders:
943 raise Error('Device %s is in use by a device-mapper '
944 'mapping (dm-crypt?)' % dev, ','.join(holders))
945
946 if check_partitions and not is_partition(dev):
947 for partname in list_partitions(dev):
948 partition = get_dev_path(partname)
949 if is_mounted(partition):
950 raise Error('Device is mounted', partition)
951 holders = is_held(partition)
952 if holders:
953 raise Error('Device %s is in use by a device-mapper '
954 'mapping (dm-crypt?)'
955 % partition, ','.join(holders))
956
957
958def must_be_one_line(line):
959 """
960 Checks if given line is really one single line.
961
962 :raises: TruncatedLineError or TooManyLinesError
963 :return: Content of the line, or None if line isn't valid.
964 """
965 line = _bytes2str(line)
966
967 if line[-1:] != '\n':
968 raise TruncatedLineError(line)
969 line = line[:-1]
970 if '\n' in line:
971 raise TooManyLinesError(line)
972 return line
973
974
975def read_one_line(parent, name):
976 """
977 Read a file whose sole contents are a single line.
978
979 Strips the newline.
980
981 :return: Contents of the line, or None if file did not exist.
982 """
983 path = os.path.join(parent, name)
984 try:
985 line = open(path, 'rb').read()
986 except IOError as e:
987 if e.errno == errno.ENOENT:
988 return None
989 else:
990 raise
991
992 try:
993 line = must_be_one_line(line)
994 except (TruncatedLineError, TooManyLinesError) as e:
995 raise Error(
996 'File is corrupt: {path}: {msg}'.format(
997 path=path,
998 msg=e,
999 )
1000 )
1001 return line
1002
1003
1004def write_one_line(parent, name, text):
1005 """
1006 Write a file whose sole contents are a single line.
1007
1008 Adds a newline.
1009 """
1010 path = os.path.join(parent, name)
1011 tmp = '{path}.{pid}.tmp'.format(path=path, pid=os.getpid())
1012 with open(tmp, 'wb') as tmp_file:
1013 tmp_file.write(text.encode('utf-8') + b'\n')
1014 os.fsync(tmp_file.fileno())
1015 path_set_context(tmp)
1016 os.rename(tmp, path)
1017
1018
1019def init_get():
1020 """
1021 Get a init system using 'ceph-detect-init'
1022 """
1023 init = _check_output(
1024 args=[
1025 'ceph-detect-init',
1026 '--default', 'sysvinit',
1027 ],
1028 )
1029 init = must_be_one_line(init)
1030 return init
1031
1032
1033def check_osd_magic(path):
1034 """
1035 Check that this path has the Ceph OSD magic.
1036
1037 :raises: BadMagicError if this does not look like a Ceph OSD data
1038 dir.
1039 """
1040 magic = read_one_line(path, 'magic')
1041 if magic is None:
1042 # probably not mkfs'ed yet
1043 raise BadMagicError(path)
1044 if magic != CEPH_OSD_ONDISK_MAGIC:
1045 raise BadMagicError(path)
1046
1047
1048def check_osd_id(osd_id):
1049 """
1050 Ensures osd id is numeric.
1051 """
1052 if not re.match(r'^[0-9]+$', osd_id):
1053 raise Error('osd id is not numeric', osd_id)
1054
1055
1056def allocate_osd_id(
1057 cluster,
1058 fsid,
1059 keyring,
1060):
1061 """
1062 Accocates an OSD id on the given cluster.
1063
1064 :raises: Error if the call to allocate the OSD id fails.
1065 :return: The allocated OSD id.
1066 """
1067
1068 LOG.debug('Allocating OSD id...')
1069 try:
1070 osd_id = _check_output(
1071 args=[
1072 'ceph',
1073 '--cluster', cluster,
1074 '--name', 'client.bootstrap-osd',
1075 '--keyring', keyring,
1076 'osd', 'create', '--concise',
1077 fsid,
1078 ],
1079 )
1080 except subprocess.CalledProcessError as e:
1081 raise Error('ceph osd create failed', e, e.output)
1082 osd_id = must_be_one_line(osd_id)
1083 check_osd_id(osd_id)
1084 return osd_id
1085
1086
1087def get_osd_id(path):
1088 """
1089 Gets the OSD id of the OSD at the given path.
1090 """
1091 osd_id = read_one_line(path, 'whoami')
1092 if osd_id is not None:
1093 check_osd_id(osd_id)
1094 return osd_id
1095
1096
1097def get_ceph_user():
1098 global CEPH_PREF_USER
1099
1100 if CEPH_PREF_USER is not None:
1101 try:
1102 pwd.getpwnam(CEPH_PREF_USER)
1103 return CEPH_PREF_USER
1104 except KeyError:
1105 print("No such user:", CEPH_PREF_USER)
1106 sys.exit(2)
1107 else:
1108 try:
1109 pwd.getpwnam('ceph')
1110 return 'ceph'
1111 except KeyError:
1112 return 'root'
1113
1114
1115def get_ceph_group():
1116 global CEPH_PREF_GROUP
1117
1118 if CEPH_PREF_GROUP is not None:
1119 try:
1120 grp.getgrnam(CEPH_PREF_GROUP)
1121 return CEPH_PREF_GROUP
1122 except KeyError:
1123 print("No such group:", CEPH_PREF_GROUP)
1124 sys.exit(2)
1125 else:
1126 try:
1127 grp.getgrnam('ceph')
1128 return 'ceph'
1129 except KeyError:
1130 return 'root'
1131
1132
1133def path_set_context(path):
1134 # restore selinux context to default policy values
1135 if which('restorecon'):
1136 command(['restorecon', '-R', path])
1137
1138 # if ceph user exists, set owner to ceph
1139 if get_ceph_user() == 'ceph':
1140 command(['chown', '-R', 'ceph:ceph', path])
1141
1142
1143def _check_output(args=None, **kwargs):
1144 out, err, ret = command(args, **kwargs)
1145 if ret:
1146 cmd = args[0]
1147 error = subprocess.CalledProcessError(ret, cmd)
1148 error.output = out + err
1149 raise error
1150 return _bytes2str(out)
1151
1152
1153def get_conf(cluster, variable):
1154 """
1155 Get the value of the given configuration variable from the
1156 cluster.
1157
1158 :raises: Error if call to ceph-conf fails.
1159 :return: The variable value or None.
1160 """
1161 try:
1162 out, err, ret = command(
1163 [
1164 'ceph-conf',
1165 '--cluster={cluster}'.format(
1166 cluster=cluster,
1167 ),
1168 '--name=osd.',
1169 '--lookup',
1170 variable,
1171 ],
1172 close_fds=True,
1173 )
1174 except OSError as e:
1175 raise Error('error executing ceph-conf', e, err)
1176 if ret == 1:
1177 # config entry not found
1178 return None
1179 elif ret != 0:
1180 raise Error('getting variable from configuration failed')
1181 value = out.split('\n', 1)[0]
1182 # don't differentiate between "var=" and no var set
1183 if not value:
1184 return None
1185 return value
1186
1187
1188def get_conf_with_default(cluster, variable):
1189 """
1190 Get a config value that is known to the C++ code.
1191
1192 This will fail if called on variables that are not defined in
1193 common config options.
1194 """
1195 try:
1196 out = _check_output(
1197 args=[
1198 'ceph-osd',
1199 '--cluster={cluster}'.format(
1200 cluster=cluster,
1201 ),
1202 '--show-config-value={variable}'.format(
1203 variable=variable,
1204 ),
1205 ],
1206 close_fds=True,
1207 )
1208 except subprocess.CalledProcessError as e:
1209 raise Error(
1210 'getting variable from configuration failed',
1211 e,
1212 )
1213
1214 value = str(out).split('\n', 1)[0]
1215 return value
1216
1217
1218def get_fsid(cluster):
1219 """
1220 Get the fsid of the cluster.
1221
1222 :return: The fsid or raises Error.
1223 """
1224 fsid = get_conf_with_default(cluster=cluster, variable='fsid')
1225 if fsid is None:
1226 raise Error('getting cluster uuid from configuration failed')
1227 return fsid.lower()
1228
1229
1230def get_dmcrypt_key_path(
1231 _uuid,
1232 key_dir,
1233 luks
1234):
1235 """
1236 Get path to dmcrypt key file.
1237
1238 :return: Path to the dmcrypt key file, callers should check for existence.
1239 """
1240 if luks:
1241 path = os.path.join(key_dir, _uuid + ".luks.key")
1242 else:
1243 path = os.path.join(key_dir, _uuid)
1244
1245 return path
1246
1247
1248def get_dmcrypt_key(
1249 _uuid,
1250 key_dir,
1251 luks
1252):
1253 legacy_path = get_dmcrypt_key_path(_uuid, key_dir, luks)
1254 if os.path.exists(legacy_path):
1255 return (legacy_path,)
1256 path = os.path.join(STATEDIR, 'osd-lockbox', _uuid)
1257 if os.path.exists(path):
1258 mode = get_oneliner(path, 'key-management-mode')
1259 osd_uuid = get_oneliner(path, 'osd-uuid')
1260 ceph_fsid = read_one_line(path, 'ceph_fsid')
1261 if ceph_fsid is None:
1262 raise Error('No cluster uuid assigned.')
1263 cluster = find_cluster_by_uuid(ceph_fsid)
1264 if cluster is None:
1265 raise Error('No cluster conf found in ' + SYSCONFDIR +
1266 ' with fsid %s' % ceph_fsid)
1267
1268 if mode == KEY_MANAGEMENT_MODE_V1:
1269 key, stderr, ret = command(
1270 [
1271 'ceph',
1272 '--cluster', cluster,
1273 '--name',
1274 'client.osd-lockbox.' + osd_uuid,
1275 '--keyring',
1276 os.path.join(path, 'keyring'),
1277 'config-key',
1278 'get',
1279 'dm-crypt/osd/' + osd_uuid + '/luks',
1280 ],
1281 )
1282 LOG.debug("stderr " + stderr)
1283 assert ret == 0
1284 return base64.b64decode(key)
1285 else:
1286 raise Error('unknown key-management-mode ' + str(mode))
1287 raise Error('unable to read dm-crypt key', path, legacy_path)
1288
1289
1290def _dmcrypt_map(
1291 rawdev,
1292 key,
1293 _uuid,
1294 cryptsetup_parameters,
1295 luks,
1296 format_dev=False,
1297):
1298 dev = dmcrypt_is_mapped(_uuid)
1299 if dev:
1300 return dev
1301
1302 if isinstance(key, tuple):
1303 # legacy, before lockbox
1304 assert os.path.exists(key[0])
1305 keypath = key[0]
1306 key = None
1307 else:
1308 keypath = '-'
1309 dev = '/dev/mapper/' + _uuid
1310 luksFormat_args = [
1311 'cryptsetup',
1312 '--batch-mode',
1313 '--key-file',
1314 keypath,
1315 'luksFormat',
1316 rawdev,
1317 ] + cryptsetup_parameters
1318
1319 luksOpen_args = [
1320 'cryptsetup',
1321 '--key-file',
1322 keypath,
1323 'luksOpen',
1324 rawdev,
1325 _uuid,
1326 ]
1327
1328 create_args = [
1329 'cryptsetup',
1330 '--key-file',
1331 keypath,
1332 'create',
1333 _uuid,
1334 rawdev,
1335 ] + cryptsetup_parameters
1336
1337 def run(args, stdin):
1338 LOG.info(" ".join(args))
1339 process = subprocess.Popen(
1340 args,
1341 stdin=subprocess.PIPE,
1342 stdout=subprocess.PIPE,
1343 stderr=subprocess.PIPE)
1344 out, err = process.communicate(stdin)
1345 LOG.debug(out)
1346 LOG.error(err)
1347 assert process.returncode == 0
1348
1349 try:
1350 if luks:
1351 if format_dev:
1352 run(luksFormat_args, key)
1353 run(luksOpen_args, key)
1354 else:
1355 # Plain mode has no format function, nor any validation
1356 # that the key is correct.
1357 run(create_args, key)
1358 # set proper ownership of mapped device
1359 command_check_call(['chown', 'ceph:ceph', dev])
1360 return dev
1361
1362 except subprocess.CalledProcessError as e:
1363 raise Error('unable to map device', rawdev, e)
1364
1365
1366def dmcrypt_unmap(
1367 _uuid
1368):
1369 if not os.path.exists('/dev/mapper/' + _uuid):
1370 return
1371 retries = 0
1372 while True:
1373 try:
1374 command_check_call(['cryptsetup', 'remove', _uuid])
1375 break
1376 except subprocess.CalledProcessError as e:
1377 if retries == 10:
1378 raise Error('unable to unmap device', _uuid, e)
1379 else:
1380 time.sleep(0.5 + retries * 1.0)
1381 retries += 1
1382
1383
1384def mount(
1385 dev,
1386 fstype,
1387 options,
1388):
1389 """
1390 Mounts a device with given filessystem type and
1391 mount options to a tempfile path under /var/lib/ceph/tmp.
1392 """
1393 # sanity check: none of the arguments are None
1394 if dev is None:
1395 raise ValueError('dev may not be None')
1396 if fstype is None:
1397 raise ValueError('fstype may not be None')
1398
1399 # pick best-of-breed mount options based on fs type
1400 if options is None:
1401 options = MOUNT_OPTIONS.get(fstype, '')
1402
1403 myTemp = STATEDIR + '/tmp'
1404 # mkdtemp expect 'dir' to be existing on the system
1405 # Let's be sure it's always the case
1406 if not os.path.exists(myTemp):
1407 os.makedirs(myTemp)
1408
1409 # mount
1410 path = tempfile.mkdtemp(
1411 prefix='mnt.',
1412 dir=myTemp,
1413 )
1414 try:
1415 LOG.debug('Mounting %s on %s with options %s', dev, path, options)
1416 command_check_call(
1417 [
1418 'mount',
1419 '-t', fstype,
1420 '-o', options,
1421 '--',
1422 dev,
1423 path,
1424 ],
1425 )
1426 if which('restorecon'):
1427 command(
1428 [
1429 'restorecon',
1430 path,
1431 ],
1432 )
1433 except subprocess.CalledProcessError as e:
1434 try:
1435 os.rmdir(path)
1436 except (OSError, IOError):
1437 pass
1438 raise MountError(e)
1439
1440 return path
1441
1442
1443def unmount(
1444 path,
1445):
1446 """
1447 Unmount and removes the given mount point.
1448 """
1449 retries = 0
1450 while True:
1451 try:
1452 LOG.debug('Unmounting %s', path)
1453 command_check_call(
1454 [
1455 '/bin/umount',
1456 '--',
1457 path,
1458 ],
1459 )
1460 break
1461 except subprocess.CalledProcessError as e:
1462 # on failure, retry 3 times with incremental backoff
1463 if retries == 3:
1464 raise UnmountError(e)
1465 else:
1466 time.sleep(0.5 + retries * 1.0)
1467 retries += 1
1468
1469 os.rmdir(path)
1470
1471
1472###########################################
1473
1474def extract_parted_partition_numbers(partitions):
1475 numbers_as_strings = re.findall('^\d+', partitions, re.MULTILINE)
1476 return map(int, numbers_as_strings)
1477
1478
1479def get_free_partition_index(dev):
1480 """
1481 Get the next free partition index on a given device.
1482
1483 :return: Index number (> 1 if there is already a partition on the device)
1484 or 1 if there is no partition table.
1485 """
1486 try:
1487 lines = _check_output(
1488 args=[
1489 'parted',
1490 '--machine',
1491 '--',
1492 dev,
1493 'print',
1494 ],
1495 )
1496 except subprocess.CalledProcessError as e:
1497 LOG.info('cannot read partition index; assume it '
1498 'isn\'t present\n (Error: %s)' % e)
1499 return 1
1500
1501 if not lines:
1502 raise Error('parted failed to output anything')
1503 LOG.debug('get_free_partition_index: analyzing ' + lines)
1504 if ('CHS;' not in lines and
1505 'CYL;' not in lines and
1506 'BYT;' not in lines):
1507 raise Error('parted output expected to contain one of ' +
1508 'CHH; CYL; or BYT; : ' + lines)
1509 if os.path.realpath(dev) not in lines:
1510 raise Error('parted output expected to contain ' + dev + ': ' + lines)
1511 _, partitions = lines.split(os.path.realpath(dev))
1512 partition_numbers = extract_parted_partition_numbers(partitions)
1513 if partition_numbers:
1514 return max(partition_numbers) + 1
1515 else:
1516 return 1
1517
1518
1519def check_journal_reqs(args):
1520 _, _, allows_journal = command([
1521 'ceph-osd', '--check-allows-journal',
1522 '-i', '0',
1523 '--log-file', '$run_dir/$cluster-osd-check.log',
1524 '--cluster', args.cluster,
1525 '--setuser', get_ceph_user(),
1526 '--setgroup', get_ceph_group(),
1527 ])
1528 _, _, wants_journal = command([
1529 'ceph-osd', '--check-wants-journal',
1530 '-i', '0',
1531 '--log-file', '$run_dir/$cluster-osd-check.log',
1532 '--cluster', args.cluster,
1533 '--setuser', get_ceph_user(),
1534 '--setgroup', get_ceph_group(),
1535 ])
1536 _, _, needs_journal = command([
1537 'ceph-osd', '--check-needs-journal',
1538 '-i', '0',
1539 '--log-file', '$run_dir/$cluster-osd-check.log',
1540 '--cluster', args.cluster,
1541 '--setuser', get_ceph_user(),
1542 '--setgroup', get_ceph_group(),
1543 ])
1544 return (not allows_journal, not wants_journal, not needs_journal)
1545
1546
1547def update_partition(dev, description):
1548 """
1549 Must be called after modifying a partition table so the kernel
1550 know about the change and fire udev events accordingly. A side
1551 effect of partprobe is to remove partitions and add them again.
1552 The first udevadm settle waits for ongoing udev events to
1553 complete, just in case one of them rely on an existing partition
1554 on dev. The second udevadm settle guarantees to the caller that
1555 all udev events related to the partition table change have been
1556 processed, i.e. the 95-ceph-osd.rules actions and mode changes,
1557 group changes etc. are complete.
1558 """
1559 LOG.debug('Calling partprobe on %s device %s', description, dev)
1560 partprobe_ok = False
1561 error = 'unknown error'
1562 partprobe = _get_command_executable(['partprobe'])[0]
1563 for i in range(5):
1564 command_check_call(['udevadm', 'settle', '--timeout=600'])
1565 try:
1566 _check_output(['flock', '-s', dev, partprobe, dev])
1567 partprobe_ok = True
1568 break
1569 except subprocess.CalledProcessError as e:
1570 error = e.output
1571 if ('unable to inform the kernel' not in error and
1572 'Device or resource busy' not in error):
1573 raise
1574 LOG.debug('partprobe %s failed : %s (ignored, waiting 60s)'
1575 % (dev, error))
1576 time.sleep(60)
1577 if not partprobe_ok:
1578 raise Error('partprobe %s failed : %s' % (dev, error))
1579 command_check_call(['udevadm', 'settle', '--timeout=600'])
1580
1581
1582def zap(dev):
1583 """
1584 Destroy the partition table and content of a given disk.
1585 """
1586 dev = os.path.realpath(dev)
1587 dmode = os.stat(dev).st_mode
1588 if not stat.S_ISBLK(dmode) or is_partition(dev):
1589 raise Error('not full block device; cannot zap', dev)
1590 try:
1591 # Thoroughly wipe all partitions of any traces of
1592 # Filesystems or OSD Journals
1593 #
1594 # In addition we need to write 10M of data to each partition
1595 # to make sure that after re-creating the same partition
1596 # there is no trace left of any previous Filesystem or OSD
1597 # Journal
1598
1599 LOG.debug('Writing zeros to existing partitions on %s', dev)
1600
1601 for partname in list_partitions(dev):
1602 partition = get_dev_path(partname)
1603 command_check_call(
1604 [
1605 'wipefs',
1606 '--all',
1607 partition,
1608 ],
1609 )
1610
1611 command_check_call(
1612 [
1613 'dd',
1614 'if=/dev/zero',
1615 'of={path}'.format(path=partition),
1616 'bs=1M',
1617 'count=10',
1618 ],
1619 )
1620
1621 LOG.debug('Zapping partition table on %s', dev)
1622
1623 # try to wipe out any GPT partition table backups. sgdisk
1624 # isn't too thorough.
1625 lba_size = 4096
1626 size = 33 * lba_size
1627 with open(dev, 'wb') as dev_file:
1628 dev_file.seek(-size, os.SEEK_END)
1629 dev_file.write(size * b'\0')
1630
1631 command_check_call(
1632 [
1633 'sgdisk',
1634 '--zap-all',
1635 '--',
1636 dev,
1637 ],
1638 )
1639 command_check_call(
1640 [
1641 'sgdisk',
1642 '--clear',
1643 '--mbrtogpt',
1644 '--',
1645 dev,
1646 ],
1647 )
1648
1649 update_partition(dev, 'zapped')
1650
1651 except subprocess.CalledProcessError as e:
1652 raise Error(e)
1653
1654
1655def adjust_symlink(target, path):
1656 create = True
1657 if os.path.lexists(path):
1658 try:
1659 mode = os.lstat(path).st_mode
1660 if stat.S_ISREG(mode):
1661 LOG.debug('Removing old file %s', path)
1662 os.unlink(path)
1663 elif stat.S_ISLNK(mode):
1664 old = os.readlink(path)
1665 if old != target:
1666 LOG.debug('Removing old symlink %s -> %s', path, old)
1667 os.unlink(path)
1668 else:
1669 create = False
1670 except:
1671 raise Error('unable to remove (or adjust) old file (symlink)',
1672 path)
1673 if create:
1674 LOG.debug('Creating symlink %s -> %s', path, target)
1675 try:
1676 os.symlink(target, path)
1677 except:
1678 raise Error('unable to create symlink %s -> %s' % (path, target))
1679
1680
1681def get_mount_options(cluster, fs_type):
1682 mount_options = get_conf(
1683 cluster,
1684 variable='osd_mount_options_{fstype}'.format(
1685 fstype=fs_type,
1686 ),
1687 )
1688 if mount_options is None:
1689 mount_options = get_conf(
1690 cluster,
1691 variable='osd_fs_mount_options_{fstype}'.format(
1692 fstype=fs_type,
1693 ),
1694 )
1695 else:
1696 # remove whitespaces
1697 mount_options = "".join(mount_options.split())
1698 return mount_options
1699
1700
1701class Device(object):
1702
1703 def __init__(self, path, args):
1704 self.args = args
1705 self.path = path
1706 self.dev_size = None
1707 self.partitions = {}
1708 self.ptype_map = None
1709 assert not is_partition(self.path)
1710
1711 def create_partition(self, uuid, name, size=0, num=0):
1712 ptype = self.ptype_tobe_for_name(name)
1713 if num == 0:
1714 num = get_free_partition_index(dev=self.path)
1715 if size > 0:
1716 new = '--new={num}:0:+{size}M'.format(num=num, size=size)
1717 if size > self.get_dev_size():
1718 LOG.error('refusing to create %s on %s' % (name, self.path))
1719 LOG.error('%s size (%sM) is bigger than device (%sM)'
1720 % (name, size, self.get_dev_size()))
1721 raise Error('%s device size (%sM) is not big enough for %s'
1722 % (self.path, self.get_dev_size(), name))
1723 else:
1724 new = '--largest-new={num}'.format(num=num)
1725
1726 LOG.debug('Creating %s partition num %d size %d on %s',
1727 name, num, size, self.path)
1728 command_check_call(
1729 [
1730 'sgdisk',
1731 new,
1732 '--change-name={num}:ceph {name}'.format(num=num, name=name),
1733 '--partition-guid={num}:{uuid}'.format(num=num, uuid=uuid),
1734 '--typecode={num}:{uuid}'.format(num=num, uuid=ptype),
1735 '--mbrtogpt',
1736 '--',
1737 self.path,
1738 ],
1739 exit=True
1740 )
1741 update_partition(self.path, 'created')
1742 return num
1743
1744 def ptype_tobe_for_name(self, name):
1745 LOG.debug("name = " + name)
1746 if name == 'data':
1747 name = 'osd'
1748 if name == 'lockbox':
1749 if is_mpath(self.path):
1750 return PTYPE['mpath']['lockbox']['tobe']
1751 else:
1752 return PTYPE['regular']['lockbox']['tobe']
1753 if self.ptype_map is None:
1754 partition = DevicePartition.factory(
1755 path=self.path, dev=None, args=self.args)
1756 self.ptype_map = partition.ptype_map
1757 return self.ptype_map[name]['tobe']
1758
1759 def get_partition(self, num):
1760 if num not in self.partitions:
1761 dev = get_partition_dev(self.path, num)
1762 partition = DevicePartition.factory(
1763 path=self.path, dev=dev, args=self.args)
1764 partition.set_partition_number(num)
1765 self.partitions[num] = partition
1766 return self.partitions[num]
1767
1768 def get_dev_size(self):
1769 if self.dev_size is None:
1770 self.dev_size = get_dev_size(self.path)
1771 return self.dev_size
1772
1773 @staticmethod
1774 def factory(path, args):
1775 return Device(path, args)
1776
1777
1778class DevicePartition(object):
1779
1780 def __init__(self, args):
1781 self.args = args
1782 self.num = None
1783 self.rawdev = None
1784 self.dev = None
1785 self.uuid = None
1786 self.ptype_map = None
1787 self.ptype = None
1788 self.set_variables_ptype()
1789
1790 def get_uuid(self):
1791 if self.uuid is None:
1792 self.uuid = get_partition_uuid(self.rawdev)
1793 return self.uuid
1794
1795 def get_ptype(self):
1796 if self.ptype is None:
1797 self.ptype = get_partition_type(self.rawdev)
1798 return self.ptype
1799
1800 def set_partition_number(self, num):
1801 self.num = num
1802
1803 def get_partition_number(self):
1804 return self.num
1805
1806 def set_dev(self, dev):
1807 self.dev = dev
1808 self.rawdev = dev
1809
1810 def get_dev(self):
1811 return self.dev
1812
1813 def get_rawdev(self):
1814 return self.rawdev
1815
1816 def set_variables_ptype(self):
1817 self.ptype_map = PTYPE['regular']
1818
1819 def ptype_for_name(self, name):
1820 return self.ptype_map[name]['ready']
1821
1822 @staticmethod
1823 def factory(path, dev, args):
1824 dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
1825 if ((path is not None and is_mpath(path)) or
1826 (dev is not None and is_mpath(dev))):
1827 partition = DevicePartitionMultipath(args)
1828 elif dmcrypt_type == 'luks':
1829 partition = DevicePartitionCryptLuks(args)
1830 elif dmcrypt_type == 'plain':
1831 partition = DevicePartitionCryptPlain(args)
1832 else:
1833 partition = DevicePartition(args)
1834 partition.set_dev(dev)
1835 return partition
1836
1837
1838class DevicePartitionMultipath(DevicePartition):
1839
1840 def set_variables_ptype(self):
1841 self.ptype_map = PTYPE['mpath']
1842
1843
1844class DevicePartitionCrypt(DevicePartition):
1845
1846 def __init__(self, args):
1847 super(DevicePartitionCrypt, self).__init__(args)
1848 self.osd_dm_key = None
1849 self.cryptsetup_parameters = CryptHelpers.get_cryptsetup_parameters(
1850 self.args)
1851 self.dmcrypt_type = CryptHelpers.get_dmcrypt_type(self.args)
1852 self.dmcrypt_keysize = CryptHelpers.get_dmcrypt_keysize(self.args)
1853
1854 def setup_crypt(self):
1855 pass
1856
1857 def map(self):
1858 self.setup_crypt()
1859 self.dev = _dmcrypt_map(
1860 rawdev=self.rawdev,
1861 key=self.osd_dm_key,
1862 _uuid=self.get_uuid(),
1863 cryptsetup_parameters=self.cryptsetup_parameters,
1864 luks=self.luks(),
1865 format_dev=True,
1866 )
1867
1868 def unmap(self):
1869 self.setup_crypt()
1870 dmcrypt_unmap(self.get_uuid())
1871 self.dev = self.rawdev
1872
1873 def format(self):
1874 self.setup_crypt()
1875 self.map()
1876
1877
1878class DevicePartitionCryptPlain(DevicePartitionCrypt):
1879
1880 def luks(self):
1881 return False
1882
1883 def setup_crypt(self):
1884 if self.osd_dm_key is not None:
1885 return
1886
1887 self.cryptsetup_parameters += ['--key-size', str(self.dmcrypt_keysize)]
1888
1889 self.osd_dm_key = get_dmcrypt_key(
1890 self.get_uuid(), self.args.dmcrypt_key_dir,
1891 False)
1892
1893 def set_variables_ptype(self):
1894 self.ptype_map = PTYPE['plain']
1895
1896
1897class DevicePartitionCryptLuks(DevicePartitionCrypt):
1898
1899 def luks(self):
1900 return True
1901
1902 def setup_crypt(self):
1903 if self.osd_dm_key is not None:
1904 return
1905
1906 if self.dmcrypt_keysize == 1024:
1907 # We don't force this into the cryptsetup_parameters,
1908 # as we want the cryptsetup defaults
1909 # to prevail for the actual LUKS key lengths.
1910 pass
1911 else:
1912 self.cryptsetup_parameters += ['--key-size',
1913 str(self.dmcrypt_keysize)]
1914
1915 self.osd_dm_key = get_dmcrypt_key(
1916 self.get_uuid(), self.args.dmcrypt_key_dir,
1917 True)
1918
1919 def set_variables_ptype(self):
1920 self.ptype_map = PTYPE['luks']
1921
1922
1923class Prepare(object):
1924
1925 def __init__(self, args):
1926 self.args = args
1927
1928 @staticmethod
1929 def parser():
1930 parser = argparse.ArgumentParser(add_help=False)
1931 parser.add_argument(
1932 '--cluster',
1933 metavar='NAME',
1934 default='ceph',
1935 help='cluster name to assign this disk to',
1936 )
1937 parser.add_argument(
1938 '--cluster-uuid',
1939 metavar='UUID',
1940 help='cluster uuid to assign this disk to',
1941 )
1942 parser.add_argument(
1943 '--osd-uuid',
1944 metavar='UUID',
1945 help='unique OSD uuid to assign this disk to',
1946 )
1947 parser.add_argument(
1948 '--crush-device-class',
1949 help='crush device class to assign this disk to',
1950 )
1951 parser.add_argument(
1952 '--dmcrypt',
1953 action='store_true', default=None,
1954 help='encrypt DATA and/or JOURNAL devices with dm-crypt',
1955 )
1956 parser.add_argument(
1957 '--dmcrypt-key-dir',
1958 metavar='KEYDIR',
1959 default='/etc/ceph/dmcrypt-keys',
1960 help='directory where dm-crypt keys are stored',
1961 )
1962 parser.add_argument(
1963 '--prepare-key',
1964 metavar='PATH',
1965 help='bootstrap-osd keyring path template (%(default)s)',
1966 default='{statedir}/bootstrap-osd/{cluster}.keyring',
1967 dest='prepare_key_template',
1968 )
1969 parser.add_argument(
1970 '--no-locking',
1971 action='store_true', default=None,
1972 help='let many prepare\'s run in parallel',
1973 )
1974 return parser
1975
1976 @staticmethod
1977 def set_subparser(subparsers):
1978 parents = [
1979 Prepare.parser(),
1980 PrepareData.parser(),
1981 Lockbox.parser(),
1982 ]
1983 parents.extend(PrepareFilestore.parent_parsers())
1984 parents.extend(PrepareBluestore.parent_parsers())
1985 parser = subparsers.add_parser(
1986 'prepare',
1987 parents=parents,
1988 formatter_class=argparse.RawDescriptionHelpFormatter,
1989 description=textwrap.fill(textwrap.dedent("""\
1990 If the --bluestore argument is given, a bluestore objectstore
1991 will be used instead of the legacy filestore objectstore.
1992
1993 When an entire device is prepared for bluestore, two
1994 partitions are created. The first partition is for metadata,
1995 the second partition is for blocks that contain data.
1996
1997 Unless explicitly specified with --block.db or
1998 --block.wal, the bluestore DB and WAL data is stored on
1999 the main block device. For instance:
2000
2001 ceph-disk prepare --bluestore /dev/sdc
2002
2003 Will create
2004
2005 /dev/sdc1 for osd metadata
2006 /dev/sdc2 for block, db, and wal data (the rest of the disk)
2007
2008
2009 If either --block.db or --block.wal are specified to be
2010 the same whole device, they will be created as partition
2011 three and four respectively. For instance:
2012
2013 ceph-disk prepare --bluestore \\
2014 --block.db /dev/sdc \\
2015 --block.wal /dev/sdc \\
2016 /dev/sdc
2017
2018 Will create
2019
2020 /dev/sdc1 for osd metadata
2021 /dev/sdc2 for block (the rest of the disk)
2022 /dev/sdc3 for db
2023 /dev/sdc4 for wal
2024
2025 """)),
2026 help='Prepare a directory or disk for a Ceph OSD',
2027 )
2028 parser.set_defaults(
2029 func=Prepare.main,
2030 )
2031 return parser
2032
2033 def prepare(self):
2034 if self.args.no_locking:
2035 self._prepare()
2036 else:
2037 with prepare_lock:
2038 self._prepare()
2039
2040 @staticmethod
2041 def factory(args):
2042 if args.bluestore:
2043 return PrepareBluestore(args)
2044 else:
2045 return PrepareFilestore(args)
2046
2047 @staticmethod
2048 def main(args):
2049 Prepare.factory(args).prepare()
2050
2051
2052class PrepareFilestore(Prepare):
2053
2054 def __init__(self, args):
2055 super(PrepareFilestore, self).__init__(args)
2056 if args.dmcrypt:
2057 self.lockbox = Lockbox(args)
2058 self.data = PrepareFilestoreData(args)
2059 self.journal = PrepareJournal(args)
2060
2061 @staticmethod
2062 def parent_parsers():
2063 return [
2064 PrepareJournal.parser(),
2065 ]
2066
2067 def _prepare(self):
2068 if self.data.args.dmcrypt:
2069 self.lockbox.prepare()
2070 self.data.prepare(self.journal)
2071
2072
2073class PrepareBluestore(Prepare):
2074
2075 def __init__(self, args):
2076 super(PrepareBluestore, self).__init__(args)
2077 if args.dmcrypt:
2078 self.lockbox = Lockbox(args)
2079 self.data = PrepareBluestoreData(args)
2080 self.block = PrepareBluestoreBlock(args)
2081 self.blockdb = PrepareBluestoreBlockDB(args)
2082 self.blockwal = PrepareBluestoreBlockWAL(args)
2083
2084 @staticmethod
2085 def parser():
2086 parser = argparse.ArgumentParser(add_help=False)
2087 parser.add_argument(
2088 '--bluestore',
2089 action='store_true', default=None,
2090 help='bluestore objectstore',
2091 )
2092 return parser
2093
2094 @staticmethod
2095 def parent_parsers():
2096 return [
2097 PrepareBluestore.parser(),
2098 PrepareBluestoreBlock.parser(),
2099 PrepareBluestoreBlockDB.parser(),
2100 PrepareBluestoreBlockWAL.parser(),
2101 ]
2102
2103 def _prepare(self):
2104 if self.data.args.dmcrypt:
2105 self.lockbox.prepare()
2106 to_prepare_list = []
2107 if getattr(self.data.args, 'block.db'):
2108 to_prepare_list.append(self.blockdb)
2109 if getattr(self.data.args, 'block.wal'):
2110 to_prepare_list.append(self.blockwal)
2111 to_prepare_list.append(self.block)
2112 self.data.prepare(*to_prepare_list)
2113
2114
2115class Space(object):
2116
2117 NAMES = ('block', 'journal', 'block.db', 'block.wal')
2118
2119
2120class PrepareSpace(object):
2121
2122 NONE = 0
2123 FILE = 1
2124 DEVICE = 2
2125
2126 def __init__(self, args):
2127 self.args = args
2128 self.set_type()
2129 self.space_size = self.get_space_size()
2130 if getattr(self.args, self.name + '_uuid') is None:
2131 setattr(self.args, self.name + '_uuid', str(uuid.uuid4()))
2132 self.space_symlink = None
2133 self.space_dmcrypt = None
2134
2135 def set_type(self):
2136 name = self.name
2137 args = self.args
2138 dmode = os.stat(args.data).st_mode
2139 if (self.wants_space() and
2140 stat.S_ISBLK(dmode) and
2141 not is_partition(args.data) and
2142 getattr(args, name) is None and
2143 getattr(args, name + '_file') is None):
2144 LOG.info('Will colocate %s with data on %s',
2145 name, args.data)
2146 setattr(args, name, args.data)
2147
2148 if getattr(args, name) is None:
2149 if getattr(args, name + '_dev'):
2150 raise Error('%s is unspecified; not a block device' %
2151 name.capitalize(), getattr(args, name))
2152 self.type = self.NONE
2153 return
2154
2155 if not os.path.exists(getattr(args, name)):
2156 if getattr(args, name + '_dev'):
2157 raise Error('%s does not exist; not a block device' %
2158 name.capitalize(), getattr(args, name))
2159 self.type = self.FILE
2160 return
2161
2162 mode = os.stat(getattr(args, name)).st_mode
2163 if stat.S_ISBLK(mode):
2164 if getattr(args, name + '_file'):
2165 raise Error('%s is not a regular file' % name.capitalize,
2166 getattr(args, name))
2167 self.type = self.DEVICE
2168 return
2169
2170 if stat.S_ISREG(mode):
2171 if getattr(args, name + '_dev'):
2172 raise Error('%s is not a block device' % name.capitalize,
2173 getattr(args, name))
2174 self.type = self.FILE
2175 return
2176
2177 raise Error('%s %s is neither a block device nor regular file' %
2178 (name.capitalize, getattr(args, name)))
2179
2180 def is_none(self):
2181 return self.type == self.NONE
2182
2183 def is_file(self):
2184 return self.type == self.FILE
2185
2186 def is_device(self):
2187 return self.type == self.DEVICE
2188
2189 @staticmethod
2190 def parser(name, positional=True):
2191 parser = argparse.ArgumentParser(add_help=False)
2192 parser.add_argument(
2193 '--%s-uuid' % name,
2194 metavar='UUID',
2195 help='unique uuid to assign to the %s' % name,
2196 )
2197 parser.add_argument(
2198 '--%s-file' % name,
2199 action='store_true', default=None,
2200 help='verify that %s is a file' % name.upper(),
2201 )
2202 parser.add_argument(
2203 '--%s-dev' % name,
2204 action='store_true', default=None,
2205 help='verify that %s is a block device' % name.upper(),
2206 )
2207
2208 if positional:
2209 parser.add_argument(
2210 name,
2211 metavar=name.upper(),
2212 nargs='?',
2213 help=('path to OSD %s disk block device;' % name +
2214 ' leave out to store %s in file' % name),
2215 )
2216 return parser
2217
2218 def wants_space(self):
2219 return True
2220
2221 def populate_data_path(self, path):
2222 if self.type == self.DEVICE:
2223 self.populate_data_path_device(path)
2224 elif self.type == self.FILE:
2225 self.populate_data_path_file(path)
2226 elif self.type == self.NONE:
2227 pass
2228 else:
2229 raise Error('unexpected type ', self.type)
2230
2231 def populate_data_path_file(self, path):
2232 space_uuid = self.name + '_uuid'
2233 if getattr(self.args, space_uuid) is not None:
2234 write_one_line(path, space_uuid,
2235 getattr(self.args, space_uuid))
2236 if self.space_symlink is not None:
2237 adjust_symlink(self.space_symlink,
2238 os.path.join(path, self.name))
2239
2240 def populate_data_path_device(self, path):
2241 self.populate_data_path_file(path)
2242
2243 if self.space_dmcrypt is not None:
2244 adjust_symlink(self.space_dmcrypt,
2245 os.path.join(path, self.name + '_dmcrypt'))
2246 else:
2247 try:
2248 os.unlink(os.path.join(path, self.name + '_dmcrypt'))
2249 except OSError:
2250 pass
2251
2252 def prepare(self):
2253 if self.type == self.DEVICE:
2254 self.prepare_device()
2255 elif self.type == self.FILE:
2256 self.prepare_file()
2257 elif self.type == self.NONE:
2258 pass
2259 else:
2260 raise Error('unexpected type ', self.type)
2261
2262 def prepare_file(self):
2263 space_filename = getattr(self.args, self.name)
2264 if not os.path.exists(space_filename):
2265 LOG.debug('Creating %s file %s with size 0'
2266 ' (ceph-osd will resize and allocate)',
2267 self.name,
2268 space_filename)
2269 space_file = open(space_filename, 'wb')
2270 space_file.close()
2271 path_set_context(space_filename)
2272
2273 LOG.debug('%s is file %s',
2274 self.name.capitalize(),
2275 space_filename)
2276 LOG.warning('OSD will not be hot-swappable if %s is '
2277 'not the same device as the osd data' %
2278 self.name)
2279 self.space_symlink = space_filename
2280
2281 def prepare_device(self):
2282 reusing_partition = False
2283
2284 if is_partition(getattr(self.args, self.name)):
2285 LOG.debug('%s %s is a partition',
2286 self.name.capitalize(), getattr(self.args, self.name))
2287 partition = DevicePartition.factory(
2288 path=None, dev=getattr(self.args, self.name), args=self.args)
2289 if isinstance(partition, DevicePartitionCrypt):
2290 raise Error(getattr(self.args, self.name) +
2291 ' partition already exists'
2292 ' and --dmcrypt specified')
2293 LOG.warning('OSD will not be hot-swappable' +
2294 ' if ' + self.name + ' is not' +
2295 ' the same device as the osd data')
2296 if partition.get_ptype() == partition.ptype_for_name(self.name):
2297 LOG.debug('%s %s was previously prepared with '
2298 'ceph-disk. Reusing it.',
2299 self.name.capitalize(),
2300 getattr(self.args, self.name))
2301 reusing_partition = True
2302 # Read and reuse the partition uuid from this journal's
2303 # previous life. We reuse the uuid instead of changing it
2304 # because udev does not reliably notice changes to an
2305 # existing partition's GUID. See
2306 # http://tracker.ceph.com/issues/10146
2307 setattr(self.args, self.name + '_uuid', partition.get_uuid())
2308 LOG.debug('Reusing %s with uuid %s',
2309 self.name,
2310 getattr(self.args, self.name + '_uuid'))
2311 else:
2312 LOG.warning('%s %s was not prepared with '
2313 'ceph-disk. Symlinking directly.',
2314 self.name.capitalize(),
2315 getattr(self.args, self.name))
2316 self.space_symlink = getattr(self.args, self.name)
2317 return
2318
2319 self.space_symlink = '/dev/disk/by-partuuid/{uuid}'.format(
2320 uuid=getattr(self.args, self.name + '_uuid'))
2321
2322 if self.args.dmcrypt:
2323 self.space_dmcrypt = self.space_symlink
2324 self.space_symlink = '/dev/mapper/{uuid}'.format(
2325 uuid=getattr(self.args, self.name + '_uuid'))
2326
2327 if reusing_partition:
2328 # confirm that the space_symlink exists. It should since
2329 # this was an active space
2330 # in the past. Continuing otherwise would be futile.
2331 assert os.path.exists(self.space_symlink)
2332 return
2333
2334 num = self.desired_partition_number()
2335
2336 if num == 0:
2337 LOG.warning('OSD will not be hot-swappable if %s '
2338 'is not the same device as the osd data',
2339 self.name)
2340
2341 device = Device.factory(getattr(self.args, self.name), self.args)
2342 num = device.create_partition(
2343 uuid=getattr(self.args, self.name + '_uuid'),
2344 name=self.name,
2345 size=self.space_size,
2346 num=num)
2347
2348 partition = device.get_partition(num)
2349
2350 LOG.debug('%s is GPT partition %s',
2351 self.name.capitalize(),
2352 self.space_symlink)
2353
2354 if isinstance(partition, DevicePartitionCrypt):
2355 partition.format()
2356 partition.map()
2357
2358 command_check_call(
2359 [
2360 'sgdisk',
2361 '--typecode={num}:{uuid}'.format(
2362 num=num,
2363 uuid=partition.ptype_for_name(self.name),
2364 ),
2365 '--',
2366 getattr(self.args, self.name),
2367 ],
2368 )
2369 update_partition(getattr(self.args, self.name), 'prepared')
2370
2371 LOG.debug('%s is GPT partition %s',
2372 self.name.capitalize(),
2373 self.space_symlink)
2374
2375
2376class PrepareJournal(PrepareSpace):
2377
2378 def __init__(self, args):
2379 self.name = 'journal'
2380 (self.allows_journal,
2381 self.wants_journal,
2382 self.needs_journal) = check_journal_reqs(args)
2383
2384 if args.journal and not self.allows_journal:
2385 raise Error('journal specified but not allowed by osd backend')
2386
2387 super(PrepareJournal, self).__init__(args)
2388
2389 def wants_space(self):
2390 return self.wants_journal
2391
2392 def get_space_size(self):
2393 return int(get_conf_with_default(
2394 cluster=self.args.cluster,
2395 variable='osd_journal_size',
2396 ))
2397
2398 def desired_partition_number(self):
2399 if self.args.journal == self.args.data:
2400 # we're sharing the disk between osd data and journal;
2401 # make journal be partition number 2
2402 num = 2
2403 else:
2404 num = 0
2405 return num
2406
2407 @staticmethod
2408 def parser():
2409 return PrepareSpace.parser('journal')
2410
2411
2412class PrepareBluestoreBlock(PrepareSpace):
2413
2414 def __init__(self, args):
2415 self.name = 'block'
2416 super(PrepareBluestoreBlock, self).__init__(args)
2417
2418 def get_space_size(self):
2419 block_size = get_conf(
2420 cluster=self.args.cluster,
2421 variable='bluestore_block_size',
2422 )
2423
2424 if block_size is None:
2425 return 0 # get as much space as possible
2426 else:
2427 return int(block_size) / 1048576 # MB
2428
2429 def desired_partition_number(self):
2430 if self.args.block == self.args.data:
2431 num = 2
2432 else:
2433 num = 0
2434 return num
2435
2436 @staticmethod
2437 def parser():
2438 return PrepareSpace.parser('block')
2439
2440
2441class PrepareBluestoreBlockDB(PrepareSpace):
2442
2443 def __init__(self, args):
2444 self.name = 'block.db'
2445 super(PrepareBluestoreBlockDB, self).__init__(args)
2446
2447 def get_space_size(self):
2448 block_size = get_conf(
2449 cluster=self.args.cluster,
2450 variable='bluestore_block_db_size',
2451 )
2452
2453 if block_size is None:
2454 return 20480 # MB, default value
2455 else:
2456 return int(block_size) / 1048576 # MB
2457
2458 def desired_partition_number(self):
2459 if getattr(self.args, 'block.db') == self.args.data:
2460 num = 3
2461 else:
2462 num = 0
2463 return num
2464
2465 def wants_space(self):
2466 return False
2467
2468 @staticmethod
2469 def parser():
2470 parser = PrepareSpace.parser('block.db', positional=False)
2471 parser.add_argument(
2472 '--block.db',
2473 metavar='BLOCKDB',
2474 help='path to the device or file for bluestore block.db',
2475 )
2476 return parser
2477
2478
2479class PrepareBluestoreBlockWAL(PrepareSpace):
2480
2481 def __init__(self, args):
2482 self.name = 'block.wal'
2483 super(PrepareBluestoreBlockWAL, self).__init__(args)
2484
2485 def get_space_size(self):
2486 block_size = get_conf(
2487 cluster=self.args.cluster,
2488 variable='bluestore_block_wal_size',
2489 )
2490
2491 if block_size is None:
2492 return 576 # MB, default value
2493 else:
2494 return int(block_size) / 1048576 # MB
2495
2496 def desired_partition_number(self):
2497 if getattr(self.args, 'block.wal') == self.args.data:
2498 num = 4
2499 else:
2500 num = 0
2501 return num
2502
2503 def wants_space(self):
2504 return False
2505
2506 @staticmethod
2507 def parser():
2508 parser = PrepareSpace.parser('block.wal', positional=False)
2509 parser.add_argument(
2510 '--block.wal',
2511 metavar='BLOCKWAL',
2512 help='path to the device or file for bluestore block.wal',
2513 )
2514 return parser
2515
2516
2517class CryptHelpers(object):
2518
2519 @staticmethod
2520 def get_cryptsetup_parameters(args):
2521 cryptsetup_parameters_str = get_conf(
2522 cluster=args.cluster,
2523 variable='osd_cryptsetup_parameters',
2524 )
2525 if cryptsetup_parameters_str is None:
2526 return []
2527 else:
2528 return shlex.split(cryptsetup_parameters_str)
2529
2530 @staticmethod
2531 def get_dmcrypt_keysize(args):
2532 dmcrypt_keysize_str = get_conf(
2533 cluster=args.cluster,
2534 variable='osd_dmcrypt_key_size',
2535 )
2536 dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
2537 if dmcrypt_type == 'luks':
2538 if dmcrypt_keysize_str is None:
2539 # As LUKS will hash the 'passphrase' in .luks.key
2540 # into a key, set a large default
2541 # so if not updated for some time, it is still a
2542 # reasonable value.
2543 #
2544 return 1024
2545 else:
2546 return int(dmcrypt_keysize_str)
2547 elif dmcrypt_type == 'plain':
2548 if dmcrypt_keysize_str is None:
2549 # This value is hard-coded in the udev script
2550 return 256
2551 else:
2552 LOG.warning('ensure the 95-ceph-osd.rules file has '
2553 'been copied to /etc/udev/rules.d '
2554 'and modified to call cryptsetup '
2555 'with --key-size=%s' % dmcrypt_keysize_str)
2556 return int(dmcrypt_keysize_str)
2557 else:
2558 return 0
2559
2560 @staticmethod
2561 def get_dmcrypt_type(args):
2562 if hasattr(args, 'dmcrypt') and args.dmcrypt:
2563 dmcrypt_type = get_conf(
2564 cluster=args.cluster,
2565 variable='osd_dmcrypt_type',
2566 )
2567
2568 if dmcrypt_type is None or dmcrypt_type == 'luks':
2569 return 'luks'
2570 elif dmcrypt_type == 'plain':
2571 return 'plain'
2572 else:
2573 raise Error('invalid osd_dmcrypt_type parameter '
2574 '(must be luks or plain): ', dmcrypt_type)
2575 else:
2576 return None
2577
2578
2579class Lockbox(object):
2580
2581 def __init__(self, args):
2582 self.args = args
2583 self.partition = None
2584 self.device = None
2585
2586 if hasattr(self.args, 'lockbox') and self.args.lockbox is None:
2587 self.args.lockbox = self.args.data
2588
2589 def set_partition(self, partition):
2590 self.partition = partition
2591
2592 @staticmethod
2593 def parser():
2594 parser = argparse.ArgumentParser(add_help=False)
2595 parser.add_argument(
2596 '--lockbox',
2597 help='path to the device to store the lockbox',
2598 )
2599 parser.add_argument(
2600 '--lockbox-uuid',
2601 metavar='UUID',
2602 help='unique lockbox uuid',
2603 )
2604 return parser
2605
2606 def create_partition(self):
2607 self.device = Device.factory(self.args.lockbox, argparse.Namespace())
2608 partition_number = 3
2609 self.device.create_partition(uuid=self.args.lockbox_uuid,
2610 name='lockbox',
2611 num=partition_number,
2612 size=10) # MB
2613 return self.device.get_partition(partition_number)
2614
2615 def set_or_create_partition(self):
2616 if is_partition(self.args.lockbox):
2617 LOG.debug('OSD lockbox device %s is a partition',
2618 self.args.lockbox)
2619 self.partition = DevicePartition.factory(
2620 path=None, dev=self.args.lockbox, args=self.args)
2621 ptype = self.partition.get_ptype()
2622 ready = Ptype.get_ready_by_name('lockbox')
2623 if ptype not in ready:
2624 LOG.warning('incorrect partition UUID: %s, expected %s'
2625 % (ptype, str(ready)))
2626 else:
2627 LOG.debug('Creating osd partition on %s',
2628 self.args.lockbox)
2629 self.partition = self.create_partition()
2630
2631 def create_key(self):
2632 key_size = CryptHelpers.get_dmcrypt_keysize(self.args)
2633 key = open('/dev/urandom', 'rb').read(key_size / 8)
2634 base64_key = base64.b64encode(key)
2635 cluster = self.args.cluster
2636 bootstrap = self.args.prepare_key_template.format(cluster=cluster,
2637 statedir=STATEDIR)
2638 command_check_call(
2639 [
2640 'ceph',
2641 '--cluster', cluster,
2642 '--name', 'client.bootstrap-osd',
2643 '--keyring', bootstrap,
2644 'config-key',
2645 'put',
2646 'dm-crypt/osd/' + self.args.osd_uuid + '/luks',
2647 base64_key,
2648 ],
2649 )
2650 keyring, stderr, ret = command(
2651 [
2652 'ceph',
2653 '--cluster', cluster,
2654 '--name', 'client.bootstrap-osd',
2655 '--keyring', bootstrap,
2656 'auth',
2657 'get-or-create',
2658 'client.osd-lockbox.' + self.args.osd_uuid,
2659 'mon',
2660 ('allow command "config-key get" with key="dm-crypt/osd/' +
2661 self.args.osd_uuid + '/luks"'),
2662 ],
2663 )
2664 LOG.debug("stderr " + stderr)
2665 assert ret == 0
2666 path = self.get_mount_point()
2667 open(os.path.join(path, 'keyring'), 'w').write(keyring)
2668 write_one_line(path, 'key-management-mode', KEY_MANAGEMENT_MODE_V1)
2669
2670 def symlink_spaces(self, path):
2671 target = self.get_mount_point()
2672 for name in Space.NAMES:
2673 if (hasattr(self.args, name + '_uuid') and
2674 getattr(self.args, name + '_uuid')):
2675 uuid = getattr(self.args, name + '_uuid')
2676 symlink = os.path.join(STATEDIR, 'osd-lockbox', uuid)
2677 adjust_symlink(target, symlink)
2678 write_one_line(path, name + '-uuid', uuid)
2679
2680 def populate(self):
2681 maybe_mkdir(os.path.join(STATEDIR, 'osd-lockbox'))
2682 args = ['mkfs', '-t', 'ext4', self.partition.get_dev()]
2683 LOG.debug('Creating lockbox fs on %s: ' + str(" ".join(args)))
2684 command_check_call(args)
2685 path = self.get_mount_point()
2686 maybe_mkdir(path)
2687 args = ['mount', '-t', 'ext4', self.partition.get_dev(), path]
2688 LOG.debug('Mounting lockbox ' + str(" ".join(args)))
2689 command_check_call(args)
2690 write_one_line(path, 'osd-uuid', self.args.osd_uuid)
2691 if self.args.cluster_uuid is None:
2692 self.args.cluster_uuid = get_fsid(cluster=self.args.cluster)
2693 write_one_line(path, 'ceph_fsid', self.args.cluster_uuid)
2694 self.create_key()
2695 self.symlink_spaces(path)
2696 write_one_line(path, 'magic', CEPH_LOCKBOX_ONDISK_MAGIC)
2697 if self.device is not None:
2698 command_check_call(
2699 [
2700 'sgdisk',
2701 '--typecode={num}:{uuid}'.format(
2702 num=self.partition.get_partition_number(),
2703 uuid=self.partition.ptype_for_name('lockbox'),
2704 ),
2705 '--',
2706 get_partition_base(self.partition.get_dev()),
2707 ],
2708 )
2709
2710 def get_mount_point(self):
2711 return os.path.join(STATEDIR, 'osd-lockbox', self.args.osd_uuid)
2712
2713 def get_osd_uuid(self):
2714 return self.args.osd_uuid
2715
2716 def activate(self):
2717 path = is_mounted(self.partition.get_dev())
2718 if path:
2719 LOG.info("Lockbox already mounted at " + path)
2720 return
2721
2722 path = tempfile.mkdtemp(
2723 prefix='mnt.',
2724 dir=STATEDIR + '/tmp',
2725 )
2726 args = ['mount', '-t', 'ext4', '-o', 'ro',
2727 self.partition.get_dev(),
2728 path]
2729 LOG.debug('Mounting lockbox temporarily ' + str(" ".join(args)))
2730 command_check_call(args)
2731 self.args.osd_uuid = get_oneliner(path, 'osd-uuid')
2732 command_check_call(['umount', path])
2733 LOG.debug('Mounting lockbox readonly ' + str(" ".join(args)))
2734 args = ['mount', '-t', 'ext4', '-o', 'ro',
2735 self.partition.get_dev(),
2736 self.get_mount_point()]
2737 command_check_call(args)
2738 for name in Space.NAMES + ('osd',):
2739 uuid_path = os.path.join(self.get_mount_point(), name + '-uuid')
2740 if os.path.exists(uuid_path):
2741 uuid = get_oneliner(self.get_mount_point(), name + '-uuid')
2742 dev = os.path.join('/dev/disk/by-partuuid/', uuid.lower())
2743 args = ['ceph-disk', 'trigger', dev]
2744 command_check_call(args)
2745
2746 def prepare(self):
2747 verify_not_in_use(self.args.lockbox, check_partitions=True)
2748 self.set_or_create_partition()
2749 self.populate()
2750
2751
2752class PrepareData(object):
2753
2754 FILE = 1
2755 DEVICE = 2
2756
2757 def __init__(self, args):
2758
2759 self.args = args
2760 self.partition = None
2761 self.set_type()
2762 if self.args.cluster_uuid is None:
2763 self.args.cluster_uuid = get_fsid(cluster=self.args.cluster)
2764
2765 if self.args.osd_uuid is None:
2766 self.args.osd_uuid = str(uuid.uuid4())
2767
2768 def set_type(self):
2769 dmode = os.stat(self.args.data).st_mode
2770
2771 if stat.S_ISDIR(dmode):
2772 self.type = self.FILE
2773 elif stat.S_ISBLK(dmode):
2774 self.type = self.DEVICE
2775 else:
2776 raise Error('not a dir or block device', self.args.data)
2777
2778 def is_file(self):
2779 return self.type == self.FILE
2780
2781 def is_device(self):
2782 return self.type == self.DEVICE
2783
2784 @staticmethod
2785 def parser():
2786 parser = argparse.ArgumentParser(add_help=False)
2787 parser.add_argument(
2788 '--fs-type',
2789 help='file system type to use (e.g. "ext4")',
2790 )
2791 parser.add_argument(
2792 '--zap-disk',
2793 action='store_true', default=None,
2794 help='destroy the partition table (and content) of a disk',
2795 )
2796 parser.add_argument(
2797 '--data-dir',
2798 action='store_true', default=None,
2799 help='verify that DATA is a dir',
2800 )
2801 parser.add_argument(
2802 '--data-dev',
2803 action='store_true', default=None,
2804 help='verify that DATA is a block device',
2805 )
2806 parser.add_argument(
2807 'data',
2808 metavar='DATA',
2809 help='path to OSD data (a disk block device or directory)',
2810 )
2811 return parser
2812
2813 def populate_data_path_file(self, path, *to_prepare_list):
2814 self.populate_data_path(path, *to_prepare_list)
2815
2816 def populate_data_path(self, path, *to_prepare_list):
2817 if os.path.exists(os.path.join(path, 'magic')):
2818 LOG.debug('Data dir %s already exists', path)
2819 return
2820 else:
2821 LOG.debug('Preparing osd data dir %s', path)
2822
2823 if self.args.osd_uuid is None:
2824 self.args.osd_uuid = str(uuid.uuid4())
2825
2826 write_one_line(path, 'ceph_fsid', self.args.cluster_uuid)
2827 write_one_line(path, 'fsid', self.args.osd_uuid)
2828 if self.args.crush_device_class:
2829 write_one_line(path, 'crush_device_class',
2830 self.args.crush_device_class)
2831 write_one_line(path, 'magic', CEPH_OSD_ONDISK_MAGIC)
2832
2833 for to_prepare in to_prepare_list:
2834 to_prepare.populate_data_path(path)
2835
2836 def prepare(self, *to_prepare_list):
2837 if self.type == self.DEVICE:
2838 self.prepare_device(*to_prepare_list)
2839 elif self.type == self.FILE:
2840 self.prepare_file(*to_prepare_list)
2841 else:
2842 raise Error('unexpected type ', self.type)
2843
2844 def prepare_file(self, *to_prepare_list):
2845
2846 if not os.path.exists(self.args.data):
2847 raise Error('data path for directory does not exist',
2848 self.args.data)
2849
2850 if self.args.data_dev:
2851 raise Error('data path is not a block device', self.args.data)
2852
2853 for to_prepare in to_prepare_list:
2854 to_prepare.prepare()
2855
2856 self.populate_data_path_file(self.args.data, *to_prepare_list)
2857
2858 def sanity_checks(self):
2859 if not os.path.exists(self.args.data):
2860 raise Error('data path for device does not exist',
2861 self.args.data)
2862 verify_not_in_use(self.args.data,
2863 check_partitions=not self.args.dmcrypt)
2864
2865 def set_variables(self):
2866 if self.args.fs_type is None:
2867 self.args.fs_type = get_conf(
2868 cluster=self.args.cluster,
2869 variable='osd_mkfs_type',
2870 )
2871 if self.args.fs_type is None:
2872 self.args.fs_type = get_conf(
2873 cluster=self.args.cluster,
2874 variable='osd_fs_type',
2875 )
2876 if self.args.fs_type is None:
2877 self.args.fs_type = DEFAULT_FS_TYPE
2878
2879 self.mkfs_args = get_conf(
2880 cluster=self.args.cluster,
2881 variable='osd_mkfs_options_{fstype}'.format(
2882 fstype=self.args.fs_type,
2883 ),
2884 )
2885 if self.mkfs_args is None:
2886 self.mkfs_args = get_conf(
2887 cluster=self.args.cluster,
2888 variable='osd_fs_mkfs_options_{fstype}'.format(
2889 fstype=self.args.fs_type,
2890 ),
2891 )
2892
2893 self.mount_options = get_mount_options(cluster=self.args.cluster,
2894 fs_type=self.args.fs_type)
2895
2896 if self.args.osd_uuid is None:
2897 self.args.osd_uuid = str(uuid.uuid4())
2898
2899 def prepare_device(self, *to_prepare_list):
2900 self.sanity_checks()
2901 self.set_variables()
2902 if self.args.zap_disk is not None:
2903 zap(self.args.data)
2904
2905 def create_data_partition(self):
2906 device = Device.factory(self.args.data, self.args)
2907 partition_number = 1
2908 device.create_partition(uuid=self.args.osd_uuid,
2909 name='data',
2910 num=partition_number,
2911 size=self.get_space_size())
2912 return device.get_partition(partition_number)
2913
2914 def set_data_partition(self):
2915 if is_partition(self.args.data):
2916 LOG.debug('OSD data device %s is a partition',
2917 self.args.data)
2918 self.partition = DevicePartition.factory(
2919 path=None, dev=self.args.data, args=self.args)
2920 ptype = self.partition.get_ptype()
2921 ready = Ptype.get_ready_by_name('osd')
2922 if ptype not in ready:
2923 LOG.warning('incorrect partition UUID: %s, expected %s'
2924 % (ptype, str(ready)))
2925 else:
2926 LOG.debug('Creating osd partition on %s',
2927 self.args.data)
2928 self.partition = self.create_data_partition()
2929
2930 def populate_data_path_device(self, *to_prepare_list):
2931 partition = self.partition
2932
2933 if isinstance(partition, DevicePartitionCrypt):
2934 partition.map()
2935
2936 try:
2937 args = [
2938 'mkfs',
2939 '-t',
2940 self.args.fs_type,
2941 ]
2942 if self.mkfs_args is not None:
2943 args.extend(self.mkfs_args.split())
2944 if self.args.fs_type == 'xfs':
2945 args.extend(['-f']) # always force
2946 else:
2947 args.extend(MKFS_ARGS.get(self.args.fs_type, []))
2948 args.extend([
2949 '--',
2950 partition.get_dev(),
2951 ])
2952 LOG.debug('Creating %s fs on %s',
2953 self.args.fs_type, partition.get_dev())
2954 command_check_call(args, exit=True)
2955
2956 path = mount(dev=partition.get_dev(),
2957 fstype=self.args.fs_type,
2958 options=self.mount_options)
2959
2960 try:
2961 self.populate_data_path(path, *to_prepare_list)
2962 finally:
2963 path_set_context(path)
2964 unmount(path)
2965 finally:
2966 if isinstance(partition, DevicePartitionCrypt):
2967 partition.unmap()
2968
2969 if not is_partition(self.args.data):
2970 command_check_call(
2971 [
2972 'sgdisk',
2973 '--typecode=%d:%s' % (partition.get_partition_number(),
2974 partition.ptype_for_name('osd')),
2975 '--',
2976 self.args.data,
2977 ],
2978 exit=True,
2979 )
2980 update_partition(self.args.data, 'prepared')
2981 command_check_call(['udevadm', 'trigger',
2982 '--action=add',
2983 '--sysname-match',
2984 os.path.basename(partition.rawdev)])
2985
2986
2987class PrepareFilestoreData(PrepareData):
2988
2989 def get_space_size(self):
2990 return 0 # get as much space as possible
2991
2992 def prepare_device(self, *to_prepare_list):
2993 super(PrepareFilestoreData, self).prepare_device(*to_prepare_list)
2994 for to_prepare in to_prepare_list:
2995 to_prepare.prepare()
2996 self.set_data_partition()
2997 self.populate_data_path_device(*to_prepare_list)
2998
2999
3000class PrepareBluestoreData(PrepareData):
3001
3002 def get_space_size(self):
3003 return 100 # MB
3004
3005 def prepare_device(self, *to_prepare_list):
3006 super(PrepareBluestoreData, self).prepare_device(*to_prepare_list)
3007 self.set_data_partition()
3008 for to_prepare in to_prepare_list:
3009 to_prepare.prepare()
3010 self.populate_data_path_device(*to_prepare_list)
3011
3012 def populate_data_path(self, path, *to_prepare_list):
3013 super(PrepareBluestoreData, self).populate_data_path(path,
3014 *to_prepare_list)
3015 write_one_line(path, 'type', 'bluestore')
3016
3017
3018#
3019# Temporary workaround: if ceph-osd --mkfs does not
3020# complete within 5 minutes, assume it is blocked
3021# because of http://tracker.ceph.com/issues/13522
3022# and retry a few times.
3023#
3024# Remove this function calls with command_check_call
3025# when http://tracker.ceph.com/issues/13522 is fixed
3026#
3027def ceph_osd_mkfs(arguments):
3028 timeout = _get_command_executable(['timeout'])
3029 mkfs_ok = False
3030 error = 'unknown error'
3031 for delay in os.environ.get('CEPH_OSD_MKFS_DELAYS',
3032 '300 300 300 300 300').split():
3033 try:
3034 _check_output(timeout + [delay] + arguments)
3035 mkfs_ok = True
3036 break
3037 except subprocess.CalledProcessError as e:
3038 error = e.output
3039 if e.returncode == 124: # timeout fired, retry
3040 LOG.debug('%s timed out : %s (retry)'
3041 % (str(arguments), error))
3042 else:
3043 break
3044 if not mkfs_ok:
3045 raise Error('%s failed : %s' % (str(arguments), error))
3046
3047
3048def mkfs(
3049 path,
3050 cluster,
3051 osd_id,
3052 fsid,
3053 keyring,
3054):
3055 monmap = os.path.join(path, 'activate.monmap')
3056 command_check_call(
3057 [
3058 'ceph',
3059 '--cluster', cluster,
3060 '--name', 'client.bootstrap-osd',
3061 '--keyring', keyring,
3062 'mon', 'getmap', '-o', monmap,
3063 ],
3064 )
3065
3066 osd_type = read_one_line(path, 'type')
3067
3068 if osd_type == 'bluestore':
3069 ceph_osd_mkfs(
3070 [
3071 'ceph-osd',
3072 '--cluster', cluster,
3073 '--mkfs',
3074 '--mkkey',
3075 '-i', osd_id,
3076 '--monmap', monmap,
3077 '--osd-data', path,
3078 '--osd-uuid', fsid,
3079 '--keyring', os.path.join(path, 'keyring'),
3080 '--setuser', get_ceph_user(),
3081 '--setgroup', get_ceph_group(),
3082 ],
3083 )
3084 else:
3085 ceph_osd_mkfs(
3086 [
3087 'ceph-osd',
3088 '--cluster', cluster,
3089 '--mkfs',
3090 '--mkkey',
3091 '-i', osd_id,
3092 '--monmap', monmap,
3093 '--osd-data', path,
3094 '--osd-journal', os.path.join(path, 'journal'),
3095 '--osd-uuid', fsid,
3096 '--keyring', os.path.join(path, 'keyring'),
3097 '--setuser', get_ceph_user(),
3098 '--setgroup', get_ceph_group(),
3099 ],
3100 )
3101
3102
3103def auth_key(
3104 path,
3105 cluster,
3106 osd_id,
3107 keyring,
3108):
3109 try:
3110 # try dumpling+ cap scheme
3111 command_check_call(
3112 [
3113 'ceph',
3114 '--cluster', cluster,
3115 '--name', 'client.bootstrap-osd',
3116 '--keyring', keyring,
3117 'auth', 'add', 'osd.{osd_id}'.format(osd_id=osd_id),
3118 '-i', os.path.join(path, 'keyring'),
3119 'osd', 'allow *',
3120 'mon', 'allow profile osd',
3121 ],
3122 )
3123 except subprocess.CalledProcessError as err:
3124 if err.returncode == errno.EINVAL:
3125 # try old cap scheme
3126 command_check_call(
3127 [
3128 'ceph',
3129 '--cluster', cluster,
3130 '--name', 'client.bootstrap-osd',
3131 '--keyring', keyring,
3132 'auth', 'add', 'osd.{osd_id}'.format(osd_id=osd_id),
3133 '-i', os.path.join(path, 'keyring'),
3134 'osd', 'allow *',
3135 'mon', 'allow rwx',
3136 ],
3137 )
3138 else:
3139 raise
3140
3141
3142def get_mount_point(cluster, osd_id):
3143 parent = STATEDIR + '/osd'
3144 return os.path.join(
3145 parent,
3146 '{cluster}-{osd_id}'.format(cluster=cluster, osd_id=osd_id),
3147 )
3148
3149
3150def move_mount(
3151 dev,
3152 path,
3153 cluster,
3154 osd_id,
3155 fstype,
3156 mount_options,
3157):
3158 LOG.debug('Moving mount to final location...')
3159 osd_data = get_mount_point(cluster, osd_id)
3160 maybe_mkdir(osd_data)
3161
3162 # pick best-of-breed mount options based on fs type
3163 if mount_options is None:
3164 mount_options = MOUNT_OPTIONS.get(fstype, '')
3165
3166 # we really want to mount --move, but that is not supported when
3167 # the parent mount is shared, as it is by default on RH, Fedora,
3168 # and probably others. Also, --bind doesn't properly manipulate
3169 # /etc/mtab, which *still* isn't a symlink to /proc/mounts despite
3170 # this being 2013. Instead, mount the original device at the final
3171 # location.
3172 command_check_call(
3173 [
3174 '/bin/mount',
3175 '-o',
3176 mount_options,
3177 '--',
3178 dev,
3179 osd_data,
3180 ],
3181 )
3182 command_check_call(
3183 [
3184 '/bin/umount',
3185 '-l', # lazy, in case someone else is peeking at the
3186 # wrong moment
3187 '--',
3188 path,
3189 ],
3190 )
3191
3192
3193#
3194# For upgrade purposes, to make sure there are no competing units,
3195# both --runtime unit and the default should be disabled. There can be
3196# two units at the same time: one with --runtime and another without
3197# it. If, for any reason (manual or ceph-disk) the two units co-exist
3198# they will compete with each other.
3199#
3200def systemd_disable(
3201 path,
3202 osd_id,
3203):
3204 # ensure there is no duplicate ceph-osd@.service
3205 for style in ([], ['--runtime']):
3206 command_check_call(
3207 [
3208 'systemctl',
3209 'disable',
3210 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3211 ] + style,
3212 )
3213
3214
3215def systemd_start(
3216 path,
3217 osd_id,
3218):
3219 systemd_disable(path, osd_id)
3220 if is_mounted(path):
3221 style = ['--runtime']
3222 else:
3223 style = []
3224 command_check_call(
3225 [
3226 'systemctl',
3227 'enable',
3228 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3229 ] + style,
3230 )
3231 command_check_call(
3232 [
3233 'systemctl',
3234 'start',
3235 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3236 ],
3237 )
3238
3239
3240def systemd_stop(
3241 path,
3242 osd_id,
3243):
3244 systemd_disable(path, osd_id)
3245 command_check_call(
3246 [
3247 'systemctl',
3248 'stop',
3249 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3250 ],
3251 )
3252
3253
3254def start_daemon(
3255 cluster,
3256 osd_id,
3257):
3258 LOG.debug('Starting %s osd.%s...', cluster, osd_id)
3259
3260 path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3261 cluster=cluster, osd_id=osd_id)
3262
3263 try:
3264 if os.path.exists(os.path.join(path, 'upstart')):
3265 command_check_call(
3266 [
3267 '/sbin/initctl',
3268 # use emit, not start, because start would fail if the
3269 # instance was already running
3270 'emit',
3271 # since the daemon starting doesn't guarantee much about
3272 # the service being operational anyway, don't bother
3273 # waiting for it
3274 '--no-wait',
3275 '--',
3276 'ceph-osd',
3277 'cluster={cluster}'.format(cluster=cluster),
3278 'id={osd_id}'.format(osd_id=osd_id),
3279 ],
3280 )
3281 elif os.path.exists(os.path.join(path, 'sysvinit')):
3282 if os.path.exists('/usr/sbin/service'):
3283 svc = '/usr/sbin/service'
3284 else:
3285 svc = '/sbin/service'
3286 command_check_call(
3287 [
3288 svc,
3289 'ceph',
3290 '--cluster',
3291 '{cluster}'.format(cluster=cluster),
3292 'start',
3293 'osd.{osd_id}'.format(osd_id=osd_id),
3294 ],
3295 )
3296 elif os.path.exists(os.path.join(path, 'systemd')):
3297 systemd_start(path, osd_id)
3298 elif os.path.exists(os.path.join(path, 'openrc')):
3299 base_script = '/etc/init.d/ceph-osd'
3300 osd_script = '{base}.{osd_id}'.format(
3301 base=base_script,
3302 osd_id=osd_id
3303 )
3304 if not os.path.exists(osd_script):
3305 os.symlink(base_script, osd_script)
3306 command_check_call(
3307 [
3308 osd_script,
3309 'start',
3310 ],
3311 )
3312 elif os.path.exists(os.path.join(path, 'bsdrc')):
3313 command_check_call(
3314 [
3315 '/usr/local/etc/rc.d/ceph start osd.{osd_id}'
3316 .format(osd_id=osd_id),
3317 ],
3318 )
3319 else:
3320 raise Error('{cluster} osd.{osd_id} '
3321 'is not tagged with an init system'
3322 .format(
3323 cluster=cluster,
3324 osd_id=osd_id,
3325 ))
3326 except subprocess.CalledProcessError as e:
3327 raise Error('ceph osd start failed', e)
3328
3329
3330def stop_daemon(
3331 cluster,
3332 osd_id,
3333):
3334 LOG.debug('Stoping %s osd.%s...', cluster, osd_id)
3335
3336 path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3337 cluster=cluster, osd_id=osd_id)
3338
3339 try:
3340 if os.path.exists(os.path.join(path, 'upstart')):
3341 command_check_call(
3342 [
3343 '/sbin/initctl',
3344 'stop',
3345 'ceph-osd',
3346 'cluster={cluster}'.format(cluster=cluster),
3347 'id={osd_id}'.format(osd_id=osd_id),
3348 ],
3349 )
3350 elif os.path.exists(os.path.join(path, 'sysvinit')):
3351 svc = which('service')
3352 command_check_call(
3353 [
3354 svc,
3355 'ceph',
3356 '--cluster',
3357 '{cluster}'.format(cluster=cluster),
3358 'stop',
3359 'osd.{osd_id}'.format(osd_id=osd_id),
3360 ],
3361 )
3362 elif os.path.exists(os.path.join(path, 'systemd')):
3363 systemd_stop(path, osd_id)
3364 elif os.path.exists(os.path.join(path, 'openrc')):
3365 command_check_call(
3366 [
3367 '/etc/init.d/ceph-osd.{osd_id}'.format(osd_id=osd_id),
3368 'stop',
3369 ],
3370 )
3371 elif os.path.exists(os.path.join(path, 'bsdrc')):
3372 command_check_call(
3373 [
3374 '/usr/local/etc/rc.d/ceph stop osd.{osd_id}'
3375 .format(osd_id=osd_id),
3376 ],
3377 )
3378 else:
3379 raise Error('{cluster} osd.{osd_id} '
3380 'is not tagged with an init system'
3381 .format(cluster=cluster, osd_id=osd_id))
3382 except subprocess.CalledProcessError as e:
3383 raise Error('ceph osd stop failed', e)
3384
3385
3386def detect_fstype(dev):
3387 if FREEBSD:
3388 fstype = _check_output(
3389 args=[
3390 'fstyp',
3391 '-u',
3392 dev,
3393 ],
3394 )
3395 else:
3396 fstype = _check_output(
3397 args=[
3398 '/sbin/blkid',
3399 # we don't want stale cached results
3400 '-p',
3401 '-s', 'TYPE',
3402 '-o', 'value',
3403 '--',
3404 dev,
3405 ],
3406 )
3407 fstype = must_be_one_line(fstype)
3408 return fstype
3409
3410
3411def dmcrypt_is_mapped(uuid):
3412 path = os.path.join('/dev/mapper', uuid)
3413 if os.path.exists(path):
3414 return path
3415 else:
3416 return None
3417
3418
3419def dmcrypt_map(dev, dmcrypt_key_dir):
3420 ptype = get_partition_type(dev)
3421 if ptype in Ptype.get_ready_by_type('plain'):
3422 luks = False
3423 cryptsetup_parameters = ['--key-size', '256']
3424 elif ptype in Ptype.get_ready_by_type('luks'):
3425 luks = True
3426 cryptsetup_parameters = []
3427 else:
3428 raise Error('--dmcrypt called for dev %s with invalid ptype %s'
3429 % (dev, ptype))
3430 part_uuid = get_partition_uuid(dev)
3431 dmcrypt_key = get_dmcrypt_key(part_uuid, dmcrypt_key_dir, luks)
3432 return _dmcrypt_map(
3433 rawdev=dev,
3434 key=dmcrypt_key,
3435 _uuid=part_uuid,
3436 cryptsetup_parameters=cryptsetup_parameters,
3437 luks=luks,
3438 format_dev=False,
3439 )
3440
3441
3442def mount_activate(
3443 dev,
3444 activate_key_template,
3445 init,
3446 dmcrypt,
3447 dmcrypt_key_dir,
3448 reactivate=False,
3449):
3450
3451 if dmcrypt:
3452 part_uuid = get_partition_uuid(dev)
3453 dev = dmcrypt_map(dev, dmcrypt_key_dir)
3454 try:
3455 fstype = detect_fstype(dev=dev)
3456 except (subprocess.CalledProcessError,
3457 TruncatedLineError,
3458 TooManyLinesError) as e:
3459 raise FilesystemTypeError(
3460 'device {dev}'.format(dev=dev),
3461 e,
3462 )
3463
3464 # TODO always using mount options from cluster=ceph for
3465 # now; see http://tracker.newdream.net/issues/3253
3466 mount_options = get_mount_options(cluster='ceph', fs_type=fstype)
3467
3468 path = mount(dev=dev, fstype=fstype, options=mount_options)
3469
3470 # check if the disk is deactive, change the journal owner, group
3471 # mode for correct user and group.
3472 if os.path.exists(os.path.join(path, 'deactive')):
3473 # logging to syslog will help us easy to know udev triggered failure
3474 if not reactivate:
3475 unmount(path)
3476 # we need to unmap again because dmcrypt map will create again
3477 # on bootup stage (due to deactivate)
3478 if '/dev/mapper/' in dev:
3479 part_uuid = dev.replace('/dev/mapper/', '')
3480 dmcrypt_unmap(part_uuid)
3481 LOG.info('OSD deactivated! reactivate with: --reactivate')
3482 raise Error('OSD deactivated! reactivate with: --reactivate')
3483 # flag to activate a deactive osd.
3484 deactive = True
3485 else:
3486 deactive = False
3487
3488 osd_id = None
3489 cluster = None
3490 try:
3491 (osd_id, cluster) = activate(path, activate_key_template, init)
3492
3493 # Now active successfully
3494 # If we got reactivate and deactive, remove the deactive file
3495 if deactive and reactivate:
3496 os.remove(os.path.join(path, 'deactive'))
3497 LOG.info('Remove `deactive` file.')
3498
3499 # check if the disk is already active, or if something else is already
3500 # mounted there
3501 active = False
3502 other = False
3503 src_dev = os.stat(path).st_dev
3504 try:
3505 dst_dev = os.stat((STATEDIR + '/osd/{cluster}-{osd_id}').format(
3506 cluster=cluster,
3507 osd_id=osd_id)).st_dev
3508 if src_dev == dst_dev:
3509 active = True
3510 else:
3511 parent_dev = os.stat(STATEDIR + '/osd').st_dev
3512 if dst_dev != parent_dev:
3513 other = True
3514 elif os.listdir(get_mount_point(cluster, osd_id)):
3515 LOG.info(get_mount_point(cluster, osd_id) +
3516 " is not empty, won't override")
3517 other = True
3518
3519 except OSError:
3520 pass
3521
3522 if active:
3523 LOG.info('%s osd.%s already mounted in position; unmounting ours.'
3524 % (cluster, osd_id))
3525 unmount(path)
3526 elif other:
3527 raise Error('another %s osd.%s already mounted in position '
3528 '(old/different cluster instance?); unmounting ours.'
3529 % (cluster, osd_id))
3530 else:
3531 move_mount(
3532 dev=dev,
3533 path=path,
3534 cluster=cluster,
3535 osd_id=osd_id,
3536 fstype=fstype,
3537 mount_options=mount_options,
3538 )
3539 return cluster, osd_id
3540
3541 except:
3542 LOG.error('Failed to activate')
3543 unmount(path)
3544 raise
3545 finally:
3546 # remove our temp dir
3547 if os.path.exists(path):
3548 os.rmdir(path)
3549
3550
3551def activate_dir(
3552 path,
3553 activate_key_template,
3554 init,
3555):
3556
3557 if not os.path.exists(path):
3558 raise Error(
3559 'directory %s does not exist' % path
3560 )
3561
3562 (osd_id, cluster) = activate(path, activate_key_template, init)
3563
3564 if init not in (None, 'none'):
3565 canonical = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3566 cluster=cluster,
3567 osd_id=osd_id)
3568 if path != canonical:
3569 # symlink it from the proper location
3570 create = True
3571 if os.path.lexists(canonical):
3572 old = os.readlink(canonical)
3573 if old != path:
3574 LOG.debug('Removing old symlink %s -> %s', canonical, old)
3575 try:
3576 os.unlink(canonical)
3577 except:
3578 raise Error('unable to remove old symlink', canonical)
3579 else:
3580 create = False
3581 if create:
3582 LOG.debug('Creating symlink %s -> %s', canonical, path)
3583 try:
3584 os.symlink(path, canonical)
3585 except:
3586 raise Error('unable to create symlink %s -> %s'
3587 % (canonical, path))
3588
3589 return cluster, osd_id
3590
3591
3592def find_cluster_by_uuid(_uuid):
3593 """
3594 Find a cluster name by searching /etc/ceph/*.conf for a conf file
3595 with the right uuid.
3596 """
3597 _uuid = _uuid.lower()
3598 no_fsid = []
3599 if not os.path.exists(SYSCONFDIR):
3600 return None
3601 for conf_file in os.listdir(SYSCONFDIR):
3602 if not conf_file.endswith('.conf'):
3603 continue
3604 cluster = conf_file[:-5]
3605 try:
3606 fsid = get_fsid(cluster)
3607 except Error as e:
3608 if 'getting cluster uuid from configuration failed' not in str(e):
3609 raise e
3610 no_fsid.append(cluster)
3611 else:
3612 if fsid == _uuid:
3613 return cluster
3614 # be tolerant of /etc/ceph/ceph.conf without an fsid defined.
3615 if len(no_fsid) == 1 and no_fsid[0] == 'ceph':
3616 LOG.warning('No fsid defined in ' + SYSCONFDIR +
3617 '/ceph.conf; using anyway')
3618 return 'ceph'
3619 return None
3620
3621
3622def activate(
3623 path,
3624 activate_key_template,
3625 init,
3626):
3627
3628 check_osd_magic(path)
3629
3630 ceph_fsid = read_one_line(path, 'ceph_fsid')
3631 if ceph_fsid is None:
3632 raise Error('No cluster uuid assigned.')
3633 LOG.debug('Cluster uuid is %s', ceph_fsid)
3634
3635 cluster = find_cluster_by_uuid(ceph_fsid)
3636 if cluster is None:
3637 raise Error('No cluster conf found in ' + SYSCONFDIR +
3638 ' with fsid %s' % ceph_fsid)
3639 LOG.debug('Cluster name is %s', cluster)
3640
3641 fsid = read_one_line(path, 'fsid')
3642 if fsid is None:
3643 raise Error('No OSD uuid assigned.')
3644 LOG.debug('OSD uuid is %s', fsid)
3645
3646 keyring = activate_key_template.format(cluster=cluster,
3647 statedir=STATEDIR)
3648
3649 osd_id = get_osd_id(path)
3650 if osd_id is None:
3651 osd_id = allocate_osd_id(
3652 cluster=cluster,
3653 fsid=fsid,
3654 keyring=keyring,
3655 )
3656 write_one_line(path, 'whoami', osd_id)
3657 LOG.debug('OSD id is %s', osd_id)
3658
3659 if not os.path.exists(os.path.join(path, 'ready')):
3660 LOG.debug('Initializing OSD...')
3661 # re-running mkfs is safe, so just run until it completes
3662 mkfs(
3663 path=path,
3664 cluster=cluster,
3665 osd_id=osd_id,
3666 fsid=fsid,
3667 keyring=keyring,
3668 )
3669
3670 if init not in (None, 'none'):
3671 if init == 'auto':
3672 conf_val = get_conf(
3673 cluster=cluster,
3674 variable='init'
3675 )
3676 if conf_val is not None:
3677 init = conf_val
3678 else:
3679 init = init_get()
3680
3681 LOG.debug('Marking with init system %s', init)
3682 init_path = os.path.join(path, init)
3683 with open(init_path, 'w'):
3684 path_set_context(init_path)
3685
3686 # remove markers for others, just in case.
3687 for other in INIT_SYSTEMS:
3688 if other != init:
3689 try:
3690 os.unlink(os.path.join(path, other))
3691 except OSError:
3692 pass
3693
3694 if not os.path.exists(os.path.join(path, 'active')):
3695 LOG.debug('Authorizing OSD key...')
3696 auth_key(
3697 path=path,
3698 cluster=cluster,
3699 osd_id=osd_id,
3700 keyring=keyring,
3701 )
3702 write_one_line(path, 'active', 'ok')
3703 LOG.debug('%s osd.%s data dir is ready at %s', cluster, osd_id, path)
3704 return (osd_id, cluster)
3705
3706
3707def main_activate(args):
3708 cluster = None
3709 osd_id = None
3710
3711 LOG.info('path = ' + str(args.path))
3712 if not os.path.exists(args.path):
3713 raise Error('%s does not exist' % args.path)
3714
3715 if is_suppressed(args.path):
3716 LOG.info('suppressed activate request on %s', args.path)
3717 return
3718
3719 with activate_lock:
3720 mode = os.stat(args.path).st_mode
3721 if stat.S_ISBLK(mode):
3722 if (is_partition(args.path) and
3723 (get_partition_type(args.path) ==
3724 PTYPE['mpath']['osd']['ready']) and
3725 not is_mpath(args.path)):
3726 raise Error('%s is not a multipath block device' %
3727 args.path)
3728 (cluster, osd_id) = mount_activate(
3729 dev=args.path,
3730 activate_key_template=args.activate_key_template,
3731 init=args.mark_init,
3732 dmcrypt=args.dmcrypt,
3733 dmcrypt_key_dir=args.dmcrypt_key_dir,
3734 reactivate=args.reactivate,
3735 )
3736 osd_data = get_mount_point(cluster, osd_id)
3737
3738 elif stat.S_ISDIR(mode):
3739 (cluster, osd_id) = activate_dir(
3740 path=args.path,
3741 activate_key_template=args.activate_key_template,
3742 init=args.mark_init,
3743 )
3744 osd_data = args.path
3745
3746 else:
3747 raise Error('%s is not a directory or block device' % args.path)
3748
3749 # exit with 0 if the journal device is not up, yet
3750 # journal device will do the activation
3751 osd_journal = '{path}/journal'.format(path=osd_data)
3752 if os.path.islink(osd_journal) and not os.access(osd_journal, os.F_OK):
3753 LOG.info("activate: Journal not present, not starting, yet")
3754 return
3755
3756 if (not args.no_start_daemon and args.mark_init == 'none'):
3757 command_check_call(
3758 [
3759 'ceph-osd',
3760 '--cluster={cluster}'.format(cluster=cluster),
3761 '--id={osd_id}'.format(osd_id=osd_id),
3762 '--osd-data={path}'.format(path=osd_data),
3763 '--osd-journal={journal}'.format(journal=osd_journal),
3764 ],
3765 )
3766
3767 if (not args.no_start_daemon and
3768 args.mark_init not in (None, 'none')):
3769
3770 start_daemon(
3771 cluster=cluster,
3772 osd_id=osd_id,
3773 )
3774
3775
3776def main_activate_lockbox(args):
3777 with activate_lock:
3778 main_activate_lockbox_protected(args)
3779
3780
3781def main_activate_lockbox_protected(args):
3782 partition = DevicePartition.factory(
3783 path=None, dev=args.path, args=args)
3784
3785 lockbox = Lockbox(args)
3786 lockbox.set_partition(partition)
3787 lockbox.activate()
3788
3789
3790###########################
3791
3792def _mark_osd_out(cluster, osd_id):
3793 LOG.info('Prepare to mark osd.%d out...', osd_id)
3794 command([
3795 'ceph',
3796 'osd',
3797 'out',
3798 'osd.%d' % osd_id,
3799 ])
3800
3801
3802def _check_osd_status(cluster, osd_id):
3803 """
3804 report the osd status:
3805 00(0) : means OSD OUT AND DOWN
3806 01(1) : means OSD OUT AND UP
3807 10(2) : means OSD IN AND DOWN
3808 11(3) : means OSD IN AND UP
3809 """
3810 LOG.info("Checking osd id: %s ..." % osd_id)
3811 found = False
3812 status_code = 0
3813 out, err, ret = command([
3814 'ceph',
3815 'osd',
3816 'dump',
3817 '--cluster={cluster}'.format(
3818 cluster=cluster,
3819 ),
3820 '--format',
3821 'json',
3822 ])
3823 out_json = json.loads(out)
3824 for item in out_json[u'osds']:
3825 if item.get(u'osd') == int(osd_id):
3826 found = True
3827 if item.get(u'in') is 1:
3828 status_code += 2
3829 if item.get(u'up') is 1:
3830 status_code += 1
3831 if not found:
3832 raise Error('Could not osd.%s in osd tree!' % osd_id)
3833 return status_code
3834
3835
3836def _remove_osd_directory_files(mounted_path, cluster):
3837 """
3838 To remove the 'ready', 'active', INIT-specific files.
3839 """
3840 if os.path.exists(os.path.join(mounted_path, 'ready')):
3841 os.remove(os.path.join(mounted_path, 'ready'))
3842 LOG.info('Remove `ready` file.')
3843 else:
3844 LOG.info('`ready` file is already removed.')
3845
3846 if os.path.exists(os.path.join(mounted_path, 'active')):
3847 os.remove(os.path.join(mounted_path, 'active'))
3848 LOG.info('Remove `active` file.')
3849 else:
3850 LOG.info('`active` file is already removed.')
3851
3852 # Just check `upstart` and `sysvinit` directly if filename is init-spec.
3853 conf_val = get_conf(
3854 cluster=cluster,
3855 variable='init'
3856 )
3857 if conf_val is not None:
3858 init = conf_val
3859 else:
3860 init = init_get()
3861 os.remove(os.path.join(mounted_path, init))
3862 LOG.info('Remove `%s` file.', init)
3863 return
3864
3865
3866def main_deactivate(args):
3867 with activate_lock:
3868 main_deactivate_locked(args)
3869
3870
3871def main_deactivate_locked(args):
3872 osd_id = args.deactivate_by_id
3873 path = args.path
3874 target_dev = None
3875 dmcrypt = False
3876 devices = list_devices()
3877
3878 # list all devices and found we need
3879 for device in devices:
3880 if 'partitions' in device:
3881 for dev_part in device.get('partitions'):
3882 if (osd_id and
3883 'whoami' in dev_part and
3884 dev_part['whoami'] == osd_id):
3885 target_dev = dev_part
3886 elif (path and
3887 'path' in dev_part and
3888 dev_part['path'] == path):
3889 target_dev = dev_part
3890 if not target_dev:
3891 raise Error('Cannot find any match device!!')
3892
3893 # set up all we need variable
3894 osd_id = target_dev['whoami']
3895 part_type = target_dev['ptype']
3896 mounted_path = target_dev['mount']
3897 if Ptype.is_dmcrypt(part_type, 'osd'):
3898 dmcrypt = True
3899
3900 # Do not do anything if osd is already down.
3901 status_code = _check_osd_status(args.cluster, osd_id)
3902 if status_code == OSD_STATUS_IN_UP:
3903 if args.mark_out is True:
3904 _mark_osd_out(args.cluster, int(osd_id))
3905 stop_daemon(args.cluster, osd_id)
3906 elif status_code == OSD_STATUS_IN_DOWN:
3907 if args.mark_out is True:
3908 _mark_osd_out(args.cluster, int(osd_id))
3909 LOG.info("OSD already out/down. Do not do anything now.")
3910 return
3911 elif status_code == OSD_STATUS_OUT_UP:
3912 stop_daemon(args.cluster, osd_id)
3913 elif status_code == OSD_STATUS_OUT_DOWN:
3914 LOG.info("OSD already out/down. Do not do anything now.")
3915 return
3916
3917 if not args.once:
3918 # remove 'ready', 'active', and INIT-specific files.
3919 _remove_osd_directory_files(mounted_path, args.cluster)
3920
3921 # Write deactivate to osd directory!
3922 with open(os.path.join(mounted_path, 'deactive'), 'w'):
3923 path_set_context(os.path.join(mounted_path, 'deactive'))
3924
3925 unmount(mounted_path)
3926 LOG.info("Umount `%s` successfully.", mounted_path)
3927
3928 if dmcrypt:
3929 lockbox = os.path.join(STATEDIR, 'osd-lockbox')
3930 command(['umount', os.path.join(lockbox, target_dev['uuid'])])
3931
3932 dmcrypt_unmap(target_dev['uuid'])
3933 for name in Space.NAMES:
3934 if name + '_uuid' in target_dev:
3935 dmcrypt_unmap(target_dev[name + '_uuid'])
3936
3937###########################
3938
3939
3940def _remove_from_crush_map(cluster, osd_id):
3941 LOG.info("Prepare to remove osd.%s from crush map..." % osd_id)
3942 command([
3943 'ceph',
3944 'osd',
3945 'crush',
3946 'remove',
3947 'osd.%s' % osd_id,
3948 ])
3949
3950
3951def _delete_osd_auth_key(cluster, osd_id):
3952 LOG.info("Prepare to delete osd.%s cephx key..." % osd_id)
3953 command([
3954 'ceph',
3955 'auth',
3956 'del',
3957 'osd.%s' % osd_id,
3958 ])
3959
3960
3961def _deallocate_osd_id(cluster, osd_id):
3962 LOG.info("Prepare to deallocate the osd-id: %s..." % osd_id)
3963 command([
3964 'ceph',
3965 'osd',
3966 'rm',
3967 '%s' % osd_id,
3968 ])
3969
3970
3971def _remove_lockbox(uuid, cluster):
3972 command([
3973 'ceph',
3974 '--cluster', cluster,
3975 'auth',
3976 'del',
3977 'client.osd-lockbox.' + uuid,
3978 ])
3979 command([
3980 'ceph',
3981 '--cluster', cluster,
3982 'config-key',
3983 'del',
3984 'dm-crypt/osd/' + uuid + '/luks',
3985 ])
3986 lockbox = os.path.join(STATEDIR, 'osd-lockbox')
3987 if not os.path.exists(lockbox):
3988 return
3989 canonical = os.path.join(lockbox, uuid)
3990 command(['umount', canonical])
3991 for name in os.listdir(lockbox):
3992 path = os.path.join(lockbox, name)
3993 if os.path.islink(path) and os.readlink(path) == canonical:
3994 os.unlink(path)
3995
3996
3997def destroy_lookup_device(args, predicate, description):
3998 devices = list_devices()
3999 for device in devices:
4000 for partition in device.get('partitions', []):
4001 if partition['type'] == 'lockbox':
4002 if not is_mounted(partition['path']):
4003 main_activate_lockbox_protected(
4004 argparse.Namespace(verbose=args.verbose,
4005 path=partition['path']))
4006 for device in devices:
4007 for partition in device.get('partitions', []):
4008 if partition['dmcrypt']:
4009 dmcrypt_path = dmcrypt_is_mapped(partition['uuid'])
4010 if dmcrypt_path:
4011 unmap = False
4012 else:
4013 dmcrypt_path = dmcrypt_map(partition['path'],
4014 args.dmcrypt_key_dir)
4015 unmap = True
4016 list_dev_osd(dmcrypt_path, {}, partition)
4017 if unmap:
4018 dmcrypt_unmap(partition['uuid'])
4019 dmcrypt = True
4020 else:
4021 dmcrypt = False
4022 if predicate(partition):
4023 return dmcrypt, partition
4024 raise Error('found no device matching ', description)
4025
4026
4027def main_destroy(args):
4028 with activate_lock:
4029 main_destroy_locked(args)
4030
4031
4032def main_destroy_locked(args):
4033 osd_id = args.destroy_by_id
4034 path = args.path
4035 target_dev = None
4036
4037 if path:
4038 if not is_partition(path):
4039 raise Error(path + " must be a partition device")
4040 path = os.path.realpath(path)
4041
4042 if path:
4043 (dmcrypt, target_dev) = destroy_lookup_device(
4044 args, lambda x: x.get('path') == path,
4045 path)
4046 elif osd_id:
4047 (dmcrypt, target_dev) = destroy_lookup_device(
4048 args, lambda x: x.get('whoami') == osd_id,
4049 'osd id ' + str(osd_id))
4050
4051 osd_id = target_dev['whoami']
4052 dev_path = target_dev['path']
4053 if target_dev['ptype'] == PTYPE['mpath']['osd']['ready']:
4054 base_dev = get_partition_base_mpath(dev_path)
4055 else:
4056 base_dev = get_partition_base(dev_path)
4057
4058 # Before osd deactivate, we cannot destroy it
4059 status_code = _check_osd_status(args.cluster, osd_id)
4060 if status_code != OSD_STATUS_OUT_DOWN and \
4061 status_code != OSD_STATUS_IN_DOWN:
4062 raise Error("Could not destroy the active osd. (osd-id: %s)" %
4063 osd_id)
4064
4065 # Remove OSD from crush map
4066 _remove_from_crush_map(args.cluster, osd_id)
4067
4068 # Remove OSD cephx key
4069 _delete_osd_auth_key(args.cluster, osd_id)
4070
4071 # Deallocate OSD ID
4072 _deallocate_osd_id(args.cluster, osd_id)
4073
4074 # we remove the crypt map and device mapper (if dmcrypt is True)
4075 if dmcrypt:
4076 for name in Space.NAMES:
4077 if target_dev.get(name + '_uuid'):
4078 dmcrypt_unmap(target_dev[name + '_uuid'])
4079 _remove_lockbox(target_dev['uuid'], args.cluster)
4080
4081 # Check zap flag. If we found zap flag, we need to find device for
4082 # destroy this osd data.
4083 if args.zap is True:
4084 # erase the osd data
4085 LOG.info("Prepare to zap the device %s" % base_dev)
4086 zap(base_dev)
4087
4088
4089def get_space_osd_uuid(name, path):
4090 if not os.path.exists(path):
4091 raise Error('%s does not exist' % path)
4092
4093 mode = os.stat(path).st_mode
4094 if not stat.S_ISBLK(mode):
4095 raise Error('%s is not a block device' % path)
4096
4097 if (is_partition(path) and
4098 get_partition_type(path) in (PTYPE['mpath']['journal']['ready'],
4099 PTYPE['mpath']['block']['ready']) and
4100 not is_mpath(path)):
4101 raise Error('%s is not a multipath block device' %
4102 path)
4103
4104 try:
4105 out = _check_output(
4106 args=[
4107 'ceph-osd',
4108 '--get-device-fsid',
4109 path,
4110 ],
4111 close_fds=True,
4112 )
4113 except subprocess.CalledProcessError as e:
4114 raise Error(
4115 'failed to get osd uuid/fsid from %s' % name,
4116 e,
4117 )
4118 value = str(out).split('\n', 1)[0]
4119 LOG.debug('%s %s has OSD UUID %s', name.capitalize(), path, value)
4120 return value
4121
4122
4123def main_activate_space(name, args):
4124 if not os.path.exists(args.dev):
4125 raise Error('%s does not exist' % args.dev)
4126
4127 cluster = None
4128 osd_id = None
4129 osd_uuid = None
4130 dev = None
4131 with activate_lock:
4132 if args.dmcrypt:
4133 dev = dmcrypt_map(args.dev, args.dmcrypt_key_dir)
4134 else:
4135 dev = args.dev
4136 # FIXME: For an encrypted journal dev, does this return the
4137 # cyphertext or plaintext dev uuid!? Also, if the journal is
4138 # encrypted, is the data partition also always encrypted, or
4139 # are mixed pairs supported!?
4140 osd_uuid = get_space_osd_uuid(name, dev)
4141 path = os.path.join('/dev/disk/by-partuuid/', osd_uuid.lower())
4142
4143 if is_suppressed(path):
4144 LOG.info('suppressed activate request on %s', path)
4145 return
4146
4147 # warn and exit with 0 if the data device is not up, yet
4148 # data device will do the activation
4149 if not os.access(path, os.F_OK):
4150 LOG.info("activate: OSD device not present, not starting, yet")
4151 return
4152
4153 (cluster, osd_id) = mount_activate(
4154 dev=path,
4155 activate_key_template=args.activate_key_template,
4156 init=args.mark_init,
4157 dmcrypt=args.dmcrypt,
4158 dmcrypt_key_dir=args.dmcrypt_key_dir,
4159 reactivate=args.reactivate,
4160 )
4161
4162 start_daemon(
4163 cluster=cluster,
4164 osd_id=osd_id,
4165 )
4166
4167
4168###########################
4169
4170
4171def main_activate_all(args):
4172 dir = '/dev/disk/by-parttypeuuid'
4173 LOG.debug('Scanning %s', dir)
4174 if not os.path.exists(dir):
4175 return
4176 err = False
4177 for name in os.listdir(dir):
4178 if name.find('.') < 0:
4179 continue
4180 (tag, uuid) = name.split('.')
4181
4182 if tag in Ptype.get_ready_by_name('osd'):
4183
4184 if Ptype.is_dmcrypt(tag, 'osd'):
4185 path = os.path.join('/dev/mapper', uuid)
4186 else:
4187 path = os.path.join(dir, name)
4188
4189 if is_suppressed(path):
4190 LOG.info('suppressed activate request on %s', path)
4191 continue
4192
4193 LOG.info('Activating %s', path)
4194 with activate_lock:
4195 try:
4196 # never map dmcrypt cyphertext devices
4197 (cluster, osd_id) = mount_activate(
4198 dev=path,
4199 activate_key_template=args.activate_key_template,
4200 init=args.mark_init,
4201 dmcrypt=False,
4202 dmcrypt_key_dir='',
4203 )
4204 start_daemon(
4205 cluster=cluster,
4206 osd_id=osd_id,
4207 )
4208
4209 except Exception as e:
4210 print(
4211 '{prog}: {msg}'.format(prog=args.prog, msg=e),
4212 file=sys.stderr
4213 )
4214
4215 err = True
4216
4217 if err:
4218 raise Error('One or more partitions failed to activate')
4219
4220
4221###########################
4222
4223def is_swap(dev):
4224 dev = os.path.realpath(dev)
4225 with open(PROCDIR + '/swaps', 'rb') as proc_swaps:
4226 for line in proc_swaps.readlines()[1:]:
4227 fields = line.split()
4228 if len(fields) < 3:
4229 continue
4230 swaps_dev = fields[0]
4231 if os.path.isabs(swaps_dev) and os.path.exists(swaps_dev):
4232 swaps_dev = os.path.realpath(swaps_dev)
4233 if swaps_dev == dev:
4234 return True
4235 return False
4236
4237
4238def get_oneliner(base, name):
4239 path = os.path.join(base, name)
4240 if os.path.isfile(path):
4241 with open(path, 'rb') as _file:
4242 return _bytes2str(_file.readline().rstrip())
4243 return None
4244
4245
4246def get_dev_fs(dev):
4247 if FREEBSD:
4248 fstype, _, ret = command(
4249 [
4250 'fstyp',
4251 '-u',
4252 dev,
4253 ],
4254 )
4255 if ret == 0:
4256 return fstype
4257 else:
4258 fscheck, _, _ = command(
4259 [
4260 'blkid',
4261 '-s',
4262 'TYPE',
4263 dev,
4264 ],
4265 )
4266 if 'TYPE' in fscheck:
4267 fstype = fscheck.split()[1].split('"')[1]
4268 return fstype
4269 return None
4270
4271
4272def split_dev_base_partnum(dev):
4273 if is_mpath(dev):
4274 partnum = partnum_mpath(dev)
4275 base = get_partition_base_mpath(dev)
4276 else:
4277 b = block_path(dev)
4278 partnum = open(os.path.join(b, 'partition')).read().strip()
4279 base = get_partition_base(dev)
4280 return base, partnum
4281
4282
4283def get_partition_type(part):
4284 return get_blkid_partition_info(part, 'ID_PART_ENTRY_TYPE')
4285
4286
4287def get_partition_uuid(part):
4288 return get_blkid_partition_info(part, 'ID_PART_ENTRY_UUID')
4289
4290
4291def get_blkid_partition_info(dev, what=None):
4292 out, _, _ = command(
4293 [
4294 'blkid',
4295 '-o',
4296 'udev',
4297 '-p',
4298 dev,
4299 ]
4300 )
4301 p = {}
4302 for line in out.splitlines():
4303 (key, value) = line.split('=')
4304 p[key] = value
4305 if what:
4306 return p.get(what)
4307 else:
4308 return p
4309
4310
4311def more_osd_info(path, uuid_map, desc):
4312 desc['ceph_fsid'] = get_oneliner(path, 'ceph_fsid')
4313 if desc['ceph_fsid']:
4314 desc['cluster'] = find_cluster_by_uuid(desc['ceph_fsid'])
4315 desc['whoami'] = get_oneliner(path, 'whoami')
4316 for name in Space.NAMES:
4317 uuid = get_oneliner(path, name + '_uuid')
4318 if uuid:
4319 desc[name + '_uuid'] = uuid.lower()
4320 if desc[name + '_uuid'] in uuid_map:
4321 desc[name + '_dev'] = uuid_map[desc[name + '_uuid']]
4322
4323
4324def list_dev_osd(dev, uuid_map, desc):
4325 desc['mount'] = is_mounted(dev)
4326 desc['fs_type'] = get_dev_fs(dev)
4327 desc['state'] = 'unprepared'
4328 if desc['mount']:
4329 desc['state'] = 'active'
4330 more_osd_info(desc['mount'], uuid_map, desc)
4331 elif desc['fs_type']:
4332 try:
4333 tpath = mount(dev=dev, fstype=desc['fs_type'], options='')
4334 if tpath:
4335 try:
4336 magic = get_oneliner(tpath, 'magic')
4337 if magic is not None:
4338 desc['magic'] = magic
4339 desc['state'] = 'prepared'
4340 more_osd_info(tpath, uuid_map, desc)
4341 finally:
4342 unmount(tpath)
4343 except MountError:
4344 pass
4345
4346
4347def list_dev_lockbox(dev, uuid_map, desc):
4348 desc['mount'] = is_mounted(dev)
4349 desc['fs_type'] = get_dev_fs(dev)
4350 desc['state'] = 'unprepared'
4351 if desc['mount']:
4352 desc['state'] = 'active'
4353 desc['osd_uuid'] = get_oneliner(desc['mount'], 'osd-uuid')
4354 elif desc['fs_type']:
4355 try:
4356 tpath = tempfile.mkdtemp(prefix='mnt.', dir=STATEDIR + '/tmp')
4357 args = ['mount', '-t', 'ext4', dev, tpath]
4358 LOG.debug('Mounting lockbox ' + str(" ".join(args)))
4359 command_check_call(args)
4360 magic = get_oneliner(tpath, 'magic')
4361 if magic is not None:
4362 desc['magic'] = magic
4363 desc['state'] = 'prepared'
4364 desc['osd_uuid'] = get_oneliner(tpath, 'osd-uuid')
4365 unmount(tpath)
4366 except subprocess.CalledProcessError:
4367 pass
4368 if desc.get('osd_uuid') in uuid_map:
4369 desc['lockbox_for'] = uuid_map[desc['osd_uuid']]
4370
4371
4372def list_format_lockbox_plain(dev):
4373 desc = []
4374 if dev.get('lockbox_for'):
4375 desc.append('for ' + dev['lockbox_for'])
4376 elif dev.get('osd_uuid'):
4377 desc.append('for osd ' + dev['osd_uuid'])
4378 return desc
4379
4380
4381def list_format_more_osd_info_plain(dev):
4382 desc = []
4383 if dev.get('ceph_fsid'):
4384 if dev.get('cluster'):
4385 desc.append('cluster ' + dev['cluster'])
4386 else:
4387 desc.append('unknown cluster ' + dev['ceph_fsid'])
4388 if dev.get('whoami'):
4389 desc.append('osd.%s' % dev['whoami'])
4390 for name in Space.NAMES:
4391 if dev.get(name + '_dev'):
4392 desc.append(name + ' %s' % dev[name + '_dev'])
4393 return desc
4394
4395
4396def list_format_dev_plain(dev, prefix=''):
4397 desc = []
4398 if dev['ptype'] == PTYPE['regular']['osd']['ready']:
4399 desc = (['ceph data', dev['state']] +
4400 list_format_more_osd_info_plain(dev))
4401 elif dev['ptype'] in (PTYPE['regular']['lockbox']['ready'],
4402 PTYPE['mpath']['lockbox']['ready']):
4403 desc = (['ceph lockbox', dev['state']] +
4404 list_format_lockbox_plain(dev))
4405 elif Ptype.is_dmcrypt(dev['ptype'], 'osd'):
4406 dmcrypt = dev['dmcrypt']
4407 if not dmcrypt['holders']:
4408 desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
4409 'not currently mapped']
4410 elif len(dmcrypt['holders']) == 1:
4411 holder = get_dev_path(dmcrypt['holders'][0])
4412 desc = ['ceph data (dmcrypt %s %s)' %
4413 (dmcrypt['type'], holder)]
4414 desc += list_format_more_osd_info_plain(dev)
4415 else:
4416 desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
4417 'holders: ' + ','.join(dmcrypt['holders'])]
4418 elif Ptype.is_regular_space(dev['ptype']):
4419 name = Ptype.space_ptype_to_name(dev['ptype'])
4420 desc.append('ceph ' + name)
4421 if dev.get(name + '_for'):
4422 desc.append('for %s' % dev[name + '_for'])
4423 elif Ptype.is_dmcrypt_space(dev['ptype']):
4424 name = Ptype.space_ptype_to_name(dev['ptype'])
4425 dmcrypt = dev['dmcrypt']
4426 if dmcrypt['holders'] and len(dmcrypt['holders']) == 1:
4427 holder = get_dev_path(dmcrypt['holders'][0])
4428 desc = ['ceph ' + name + ' (dmcrypt %s %s)' %
4429 (dmcrypt['type'], holder)]
4430 else:
4431 desc = ['ceph ' + name + ' (dmcrypt %s)' % dmcrypt['type']]
4432 if dev.get(name + '_for'):
4433 desc.append('for %s' % dev[name + '_for'])
4434 else:
4435 desc.append(dev['type'])
4436 if dev.get('fs_type'):
4437 desc.append(dev['fs_type'])
4438 elif dev.get('ptype'):
4439 desc.append(dev['ptype'])
4440 if dev.get('mount'):
4441 desc.append('mounted on %s' % dev['mount'])
4442 return '%s%s %s' % (prefix, dev['path'], ', '.join(desc))
4443
4444
4445def list_format_plain(devices):
4446 lines = []
4447 for device in devices:
4448 if device.get('partitions'):
4449 lines.append('%s :' % device['path'])
4450 for p in sorted(device['partitions'], key=lambda x: x['path']):
4451 lines.append(list_format_dev_plain(dev=p,
4452 prefix=' '))
4453 else:
4454 lines.append(list_format_dev_plain(dev=device,
4455 prefix=''))
4456 return "\n".join(lines)
4457
4458
4459def list_dev(dev, uuid_map, space_map):
4460 info = {
4461 'path': dev,
4462 'dmcrypt': {},
4463 }
4464
4465 info['is_partition'] = is_partition(dev)
4466 if info['is_partition']:
4467 ptype = get_partition_type(dev)
4468 info['uuid'] = get_partition_uuid(dev)
4469 else:
4470 ptype = 'unknown'
4471 info['ptype'] = ptype
4472 LOG.info("list_dev(dev = " + dev + ", ptype = " + str(ptype) + ")")
4473 if ptype in (PTYPE['regular']['osd']['ready'],
4474 PTYPE['mpath']['osd']['ready']):
4475 info['type'] = 'data'
4476 if ptype == PTYPE['mpath']['osd']['ready']:
4477 info['multipath'] = True
4478 list_dev_osd(dev, uuid_map, info)
4479 elif ptype in (PTYPE['regular']['lockbox']['ready'],
4480 PTYPE['mpath']['lockbox']['ready']):
4481 info['type'] = 'lockbox'
4482 if ptype == PTYPE['mpath']['osd']['ready']:
4483 info['multipath'] = True
4484 list_dev_lockbox(dev, uuid_map, info)
4485 elif ptype == PTYPE['plain']['osd']['ready']:
4486 holders = is_held(dev)
4487 info['type'] = 'data'
4488 info['dmcrypt']['holders'] = holders
4489 info['dmcrypt']['type'] = 'plain'
4490 if len(holders) == 1:
4491 list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
4492 elif ptype == PTYPE['luks']['osd']['ready']:
4493 holders = is_held(dev)
4494 info['type'] = 'data'
4495 info['dmcrypt']['holders'] = holders
4496 info['dmcrypt']['type'] = 'LUKS'
4497 if len(holders) == 1:
4498 list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
4499 elif Ptype.is_regular_space(ptype) or Ptype.is_mpath_space(ptype):
4500 name = Ptype.space_ptype_to_name(ptype)
4501 info['type'] = name
4502 if ptype == PTYPE['mpath'][name]['ready']:
4503 info['multipath'] = True
4504 if info.get('uuid') in space_map:
4505 info[name + '_for'] = space_map[info['uuid']]
4506 elif Ptype.is_plain_space(ptype):
4507 name = Ptype.space_ptype_to_name(ptype)
4508 holders = is_held(dev)
4509 info['type'] = name
4510 info['dmcrypt']['type'] = 'plain'
4511 info['dmcrypt']['holders'] = holders
4512 if info.get('uuid') in space_map:
4513 info[name + '_for'] = space_map[info['uuid']]
4514 elif Ptype.is_luks_space(ptype):
4515 name = Ptype.space_ptype_to_name(ptype)
4516 holders = is_held(dev)
4517 info['type'] = name
4518 info['dmcrypt']['type'] = 'LUKS'
4519 info['dmcrypt']['holders'] = holders
4520 if info.get('uuid') in space_map:
4521 info[name + '_for'] = space_map[info['uuid']]
4522 else:
4523 path = is_mounted(dev)
4524 fs_type = get_dev_fs(dev)
4525 if is_swap(dev):
4526 info['type'] = 'swap'
4527 else:
4528 info['type'] = 'other'
4529 if fs_type:
4530 info['fs_type'] = fs_type
4531 if path:
4532 info['mount'] = path
4533
4534 return info
4535
4536
4537def list_devices():
4538 partmap = list_all_partitions()
4539
4540 uuid_map = {}
4541 space_map = {}
4542 for base, parts in sorted(partmap.items()):
4543 for p in parts:
4544 dev = get_dev_path(p)
4545 part_uuid = get_partition_uuid(dev)
4546 if part_uuid:
4547 uuid_map[part_uuid] = dev
4548 ptype = get_partition_type(dev)
4549 LOG.debug("main_list: " + dev +
4550 " ptype = " + str(ptype) +
4551 " uuid = " + str(part_uuid))
4552 if ptype in Ptype.get_ready_by_name('osd'):
4553 if Ptype.is_dmcrypt(ptype, 'osd'):
4554 holders = is_held(dev)
4555 if len(holders) != 1:
4556 continue
4557 dev_to_mount = get_dev_path(holders[0])
4558 else:
4559 dev_to_mount = dev
4560
4561 fs_type = get_dev_fs(dev_to_mount)
4562 if fs_type is not None:
4563 mount_options = get_mount_options(cluster='ceph',
4564 fs_type=fs_type)
4565 try:
4566 tpath = mount(dev=dev_to_mount,
4567 fstype=fs_type, options=mount_options)
4568 try:
4569 for name in Space.NAMES:
4570 space_uuid = get_oneliner(tpath,
4571 name + '_uuid')
4572 if space_uuid:
4573 space_map[space_uuid.lower()] = dev
4574 finally:
4575 unmount(tpath)
4576 except MountError:
4577 pass
4578
4579 LOG.debug("main_list: " + str(partmap) + ", uuid_map = " +
4580 str(uuid_map) + ", space_map = " + str(space_map))
4581
4582 devices = []
4583 for base, parts in sorted(partmap.items()):
4584 if parts:
4585 disk = {'path': get_dev_path(base)}
4586 partitions = []
4587 for p in sorted(parts):
4588 partitions.append(list_dev(get_dev_path(p),
4589 uuid_map,
4590 space_map))
4591 disk['partitions'] = partitions
4592 devices.append(disk)
4593 else:
4594 device = list_dev(get_dev_path(base), uuid_map, space_map)
4595 device['path'] = get_dev_path(base)
4596 devices.append(device)
4597 LOG.debug("list_devices: " + str(devices))
4598 return devices
4599
4600
4601def list_zfs():
4602 try:
4603 out, err, ret = command(
4604 [
4605 'zfs',
4606 'list',
4607 '-o', 'name,mountpoint'
4608 ]
4609 )
4610 except subprocess.CalledProcessError as e:
4611 LOG.info('zfs list -o name,mountpoint '
4612 'fails.\n (Error: %s)' % e)
4613 raise
4614 lines = out.splitlines()
4615 for line in lines[1:]:
4616 vdevline = line.split()
4617 if os.path.exists(os.path.join(vdevline[1], 'active')):
4618 elems = os.path.split(vdevline[1])
4619 print(vdevline[0], "ceph data, active, cluster ceph,", elems[1],
4620 "mounted on:", vdevline[1])
4621 else:
4622 print(vdevline[0] + " other, zfs, mounted on: " + vdevline[1])
4623
4624
4625def main_list(args):
4626 with activate_lock:
4627 if FREEBSD:
4628 main_list_freebsd(args)
4629 else:
4630 main_list_protected(args)
4631
4632
4633def main_list_protected(args):
4634 devices = list_devices()
4635 if args.path:
4636 paths = []
4637 for path in args.path:
4638 if os.path.exists(path):
4639 paths.append(os.path.realpath(path))
4640 else:
4641 paths.append(path)
4642 selected_devices = []
4643 for device in devices:
4644 for path in paths:
4645 if re.search(path + '$', device['path']):
4646 selected_devices.append(device)
4647 else:
4648 selected_devices = devices
4649 if args.format == 'json':
4650 print(json.dumps(selected_devices))
4651 else:
4652 output = list_format_plain(selected_devices)
4653 if output:
4654 print(output)
4655
4656
4657def main_list_freebsd(args):
4658 # Currently accomodate only ZFS Filestore partitions
4659 # return a list of VDEVs and mountpoints
4660 # > zfs list
4661 # NAME USED AVAIL REFER MOUNTPOINT
4662 # osd0 1.01G 1.32T 1.01G /var/lib/ceph/osd/osd.0
4663 # osd1 1.01G 1.32T 1.01G /var/lib/ceph/osd/osd.1
4664 list_zfs()
4665
4666
4667###########################
4668#
4669# Mark devices that we want to suppress activates on with a
4670# file like
4671#
4672# /var/lib/ceph/tmp/suppress-activate.sdb
4673#
4674# where the last bit is the sanitized device name (/dev/X without the
4675# /dev/ prefix) and the is_suppress() check matches a prefix. That
4676# means suppressing sdb will stop activate on sdb1, sdb2, etc.
4677#
4678
4679def is_suppressed(path):
4680 disk = os.path.realpath(path)
4681 try:
4682 if (not disk.startswith('/dev/') or
4683 not stat.S_ISBLK(os.lstat(disk).st_mode)):
4684 return False
4685 base = get_dev_name(disk)
4686 while len(base):
4687 if os.path.exists(SUPPRESS_PREFIX + base): # noqa
4688 return True
4689 base = base[:-1]
4690 except:
4691 return False
4692
4693
4694def set_suppress(path):
4695 disk = os.path.realpath(path)
4696 if not os.path.exists(disk):
4697 raise Error('does not exist', path)
4698 if not stat.S_ISBLK(os.lstat(path).st_mode):
4699 raise Error('not a block device', path)
4700 base = get_dev_name(disk)
4701
4702 with open(SUPPRESS_PREFIX + base, 'w') as f: # noqa
4703 pass
4704 LOG.info('set suppress flag on %s', base)
4705
4706
4707def unset_suppress(path):
4708 disk = os.path.realpath(path)
4709 if not os.path.exists(disk):
4710 raise Error('does not exist', path)
4711 if not stat.S_ISBLK(os.lstat(path).st_mode):
4712 raise Error('not a block device', path)
4713 assert disk.startswith('/dev/')
4714 base = get_dev_name(disk)
4715
4716 fn = SUPPRESS_PREFIX + base # noqa
4717 if not os.path.exists(fn):
4718 raise Error('not marked as suppressed', path)
4719
4720 try:
4721 os.unlink(fn)
4722 LOG.info('unset suppress flag on %s', base)
4723 except OSError as e:
4724 raise Error('failed to unsuppress', e)
4725
4726
4727def main_suppress(args):
4728 set_suppress(args.path)
4729
4730
4731def main_unsuppress(args):
4732 unset_suppress(args.path)
4733
4734
4735def main_zap(args):
4736 for dev in args.dev:
4737 zap(dev)
4738
4739
4740def main_trigger(args):
4741 LOG.debug("main_trigger: " + str(args))
4742 if is_systemd() and not args.sync:
4743 # http://www.freedesktop.org/software/systemd/man/systemd-escape.html
4744 escaped_dev = args.dev[1:].replace('-', '\\x2d')
4745 service = 'ceph-disk@{dev}.service'.format(dev=escaped_dev)
4746 LOG.info('systemd detected, triggering %s' % service)
4747 command(
4748 [
4749 'systemctl',
4750 '--no-block',
4751 'restart',
4752 service,
4753 ]
4754 )
4755 return
4756 if is_upstart() and not args.sync:
4757 LOG.info('upstart detected, triggering ceph-disk task')
4758 command(
4759 [
4760 'initctl',
4761 'emit',
4762 'ceph-disk',
4763 'dev={dev}'.format(dev=args.dev),
4764 'pid={pid}'.format(pid=os.getpid()),
4765 ]
4766 )
4767 return
4768
4769 if get_ceph_user() == 'ceph':
4770 command_check_call(['chown', 'ceph:ceph', args.dev])
4771 parttype = get_partition_type(args.dev)
4772 partid = get_partition_uuid(args.dev)
4773
4774 LOG.info('trigger {dev} parttype {parttype} uuid {partid}'.format(
4775 dev=args.dev,
4776 parttype=parttype,
4777 partid=partid,
4778 ))
4779
4780 ceph_disk = ['ceph-disk']
4781 if args.verbose:
4782 ceph_disk.append('--verbose')
4783
4784 if parttype in (PTYPE['regular']['osd']['ready'],
4785 PTYPE['mpath']['osd']['ready']):
4786 out, err, ret = command(
4787 ceph_disk +
4788 [
4789 'activate',
4790 args.dev,
4791 ]
4792 )
4793
4794 elif parttype in (PTYPE['plain']['osd']['ready'],
4795 PTYPE['luks']['osd']['ready']):
4796 out, err, ret = command(
4797 ceph_disk +
4798 [
4799 'activate',
4800 '--dmcrypt',
4801 args.dev,
4802 ]
4803 )
4804
4805 elif parttype in (PTYPE['regular']['journal']['ready'],
4806 PTYPE['mpath']['journal']['ready']):
4807 out, err, ret = command(
4808 ceph_disk +
4809 [
4810 'activate-journal',
4811 args.dev,
4812 ]
4813 )
4814
4815 elif parttype in (PTYPE['plain']['journal']['ready'],
4816 PTYPE['luks']['journal']['ready']):
4817 out, err, ret = command(
4818 ceph_disk +
4819 [
4820 'activate-journal',
4821 '--dmcrypt',
4822 args.dev,
4823 ]
4824 )
4825
4826 elif parttype in (PTYPE['regular']['block']['ready'],
4827 PTYPE['regular']['block.db']['ready'],
4828 PTYPE['regular']['block.wal']['ready'],
4829 PTYPE['mpath']['block']['ready'],
4830 PTYPE['mpath']['block.db']['ready'],
4831 PTYPE['mpath']['block.wal']['ready']):
4832 out, err, ret = command(
4833 ceph_disk +
4834 [
4835 'activate-block',
4836 args.dev,
4837 ]
4838 )
4839
4840 elif parttype in (PTYPE['plain']['block']['ready'],
4841 PTYPE['plain']['block.db']['ready'],
4842 PTYPE['plain']['block.wal']['ready'],
4843 PTYPE['luks']['block']['ready'],
4844 PTYPE['luks']['block.db']['ready'],
4845 PTYPE['luks']['block.wal']['ready']):
4846 out, err, ret = command(
4847 ceph_disk +
4848 [
4849 'activate-block',
4850 '--dmcrypt',
4851 args.dev,
4852 ]
4853 )
4854
4855 elif parttype in (PTYPE['regular']['lockbox']['ready'],
4856 PTYPE['mpath']['lockbox']['ready']):
4857 out, err, ret = command(
4858 ceph_disk +
4859 [
4860 'activate-lockbox',
4861 args.dev,
4862 ]
4863 )
4864
4865 else:
4866 raise Error('unrecognized partition type %s' % parttype)
4867
4868 if ret != 0:
4869 LOG.info(out)
4870 LOG.error(err)
4871 raise Error('return code ' + str(ret))
4872 else:
4873 LOG.debug(out)
4874 LOG.debug(err)
4875
4876
4877def main_fix(args):
4878 # A hash table containing 'path': ('uid', 'gid', blocking, recursive)
4879 fix_table = [
4880 ('/etc/ceph', 'ceph', 'ceph', True, True),
4881 ('/var/run/ceph', 'ceph', 'ceph', True, True),
4882 ('/var/log/ceph', 'ceph', 'ceph', True, True),
4883 ('/var/lib/ceph', 'ceph', 'ceph', True, False),
4884 ]
4885
4886 # Relabel/chown all files under /var/lib/ceph/ recursively (except for osd)
4887 for directory in glob.glob('/var/lib/ceph/*'):
4888 if directory == '/var/lib/ceph/osd':
4889 fix_table.append((directory, 'ceph', 'ceph', True, False))
4890 else:
4891 fix_table.append((directory, 'ceph', 'ceph', True, True))
4892
4893 # Relabel/chown the osds recursively and in parallel
4894 for directory in glob.glob('/var/lib/ceph/osd/*'):
4895 fix_table.append((directory, 'ceph', 'ceph', False, True))
4896
4897 LOG.debug("fix_table: " + str(fix_table))
4898
4899 # The lists of background processes
4900 all_processes = []
4901 permissions_processes = []
4902 selinux_processes = []
4903
4904 # Preliminary checks
4905 if args.selinux or args.all:
4906 out, err, ret = command(['selinuxenabled'])
4907 if ret:
4908 LOG.error('SELinux is not enabled, please enable it, first.')
4909 raise Error('no SELinux')
4910
4911 for daemon in ['ceph-mon', 'ceph-osd', 'ceph-mds', 'radosgw', 'ceph-mgr']:
4912 out, err, ret = command(['pgrep', daemon])
4913 if ret == 0:
4914 LOG.error(daemon + ' is running, please stop it, first')
4915 raise Error(daemon + ' running')
4916
4917 # Relabel the basic system data without the ceph files
4918 if args.system or args.all:
4919 c = ['restorecon', '-R', '/']
4920 for directory, _, _, _, _ in fix_table:
4921 # Skip /var/lib/ceph subdirectories
4922 if directory.startswith('/var/lib/ceph/'):
4923 continue
4924 c.append('-e')
4925 c.append(directory)
4926
4927 out, err, ret = command(c)
4928
4929 if ret:
4930 LOG.error("Failed to restore labels of the underlying system")
4931 LOG.error(err)
4932 raise Error("basic restore failed")
4933
4934 # Use find to relabel + chown ~simultaenously
4935 if args.all:
4936 for directory, uid, gid, blocking, recursive in fix_table:
4937 c = [
4938 'find',
4939 directory,
4940 '-exec',
4941 'chown',
4942 ':'.join((uid, gid)),
4943 '{}',
4944 '+',
4945 '-exec',
4946 'restorecon',
4947 '{}',
4948 '+',
4949 ]
4950
4951 # Just pass -maxdepth 0 for non-recursive calls
4952 if not recursive:
4953 c += ['-maxdepth', '0']
4954
4955 if blocking:
4956 out, err, ret = command(c)
4957
4958 if ret:
4959 LOG.error("Failed to fix " + directory)
4960 LOG.error(err)
4961 raise Error(directory + " fix failed")
4962 else:
4963 all_processes.append(command_init(c))
4964
4965 LOG.debug("all_processes: " + str(all_processes))
4966 for process in all_processes:
4967 out, err, ret = command_wait(process)
4968 if ret:
4969 LOG.error("A background find process failed")
4970 LOG.error(err)
4971 raise Error("background failed")
4972
4973 # Fix permissions
4974 if args.permissions:
4975 for directory, uid, gid, blocking, recursive in fix_table:
4976 if recursive:
4977 c = [
4978 'chown',
4979 '-R',
4980 ':'.join((uid, gid)),
4981 directory
4982 ]
4983 else:
4984 c = [
4985 'chown',
4986 ':'.join((uid, gid)),
4987 directory
4988 ]
4989
4990 if blocking:
4991 out, err, ret = command(c)
4992
4993 if ret:
4994 LOG.error("Failed to chown " + directory)
4995 LOG.error(err)
4996 raise Error(directory + " chown failed")
4997 else:
4998 permissions_processes.append(command_init(c))
4999
5000 LOG.debug("permissions_processes: " + str(permissions_processes))
5001 for process in permissions_processes:
5002 out, err, ret = command_wait(process)
5003 if ret:
5004 LOG.error("A background permissions process failed")
5005 LOG.error(err)
5006 raise Error("background failed")
5007
5008 # Fix SELinux labels
5009 if args.selinux:
5010 for directory, uid, gid, blocking, recursive in fix_table:
5011 if recursive:
5012 c = [
5013 'restorecon',
5014 '-R',
5015 directory
5016 ]
5017 else:
5018 c = [
5019 'restorecon',
5020 directory
5021 ]
5022
5023 if blocking:
5024 out, err, ret = command(c)
5025
5026 if ret:
5027 LOG.error("Failed to restore labels for " + directory)
5028 LOG.error(err)
5029 raise Error(directory + " relabel failed")
5030 else:
5031 selinux_processes.append(command_init(c))
5032
5033 LOG.debug("selinux_processes: " + str(selinux_processes))
5034 for process in selinux_processes:
5035 out, err, ret = command_wait(process)
5036 if ret:
5037 LOG.error("A background selinux process failed")
5038 LOG.error(err)
5039 raise Error("background failed")
5040
5041 LOG.info(
5042 "The ceph files has been fixed, please reboot "
5043 "the system for the changes to take effect."
5044 )
5045
5046
5047def setup_statedir(dir):
5048 # XXX The following use of globals makes linting
5049 # really hard. Global state in Python is iffy and
5050 # should be avoided.
5051 global STATEDIR
5052 STATEDIR = dir
5053
5054 if not os.path.exists(STATEDIR):
5055 os.mkdir(STATEDIR)
5056 if not os.path.exists(STATEDIR + "/tmp"):
5057 os.mkdir(STATEDIR + "/tmp")
5058
5059 global prepare_lock
5060 prepare_lock = FileLock(STATEDIR + '/tmp/ceph-disk.prepare.lock')
5061
5062 global activate_lock
5063 activate_lock = FileLock(STATEDIR + '/tmp/ceph-disk.activate.lock')
5064
5065 global SUPPRESS_PREFIX
5066 SUPPRESS_PREFIX = STATEDIR + '/tmp/suppress-activate.'
5067
5068
5069def setup_sysconfdir(dir):
5070 global SYSCONFDIR
5071 SYSCONFDIR = dir
5072
5073
5074def parse_args(argv):
5075 parser = argparse.ArgumentParser(
5076 'ceph-disk',
5077 )
5078 parser.add_argument(
5079 '-v', '--verbose',
5080 action='store_true', default=None,
5081 help='be more verbose',
5082 )
5083 parser.add_argument(
5084 '--log-stdout',
5085 action='store_true', default=None,
5086 help='log to stdout',
5087 )
5088 parser.add_argument(
5089 '--prepend-to-path',
5090 metavar='PATH',
5091 default='/usr/bin',
5092 help=('prepend PATH to $PATH for backward compatibility '
5093 '(default /usr/bin)'),
5094 )
5095 parser.add_argument(
5096 '--statedir',
5097 metavar='PATH',
5098 default='/var/lib/ceph',
5099 help=('directory in which ceph state is preserved '
5100 '(default /var/lib/ceph)'),
5101 )
5102 parser.add_argument(
5103 '--sysconfdir',
5104 metavar='PATH',
5105 default='/etc/ceph',
5106 help=('directory in which ceph configuration files are found '
5107 '(default /etc/ceph)'),
5108 )
5109 parser.add_argument(
5110 '--setuser',
5111 metavar='USER',
5112 default=None,
5113 help='use the given user for subprocesses, rather than ceph or root'
5114 )
5115 parser.add_argument(
5116 '--setgroup',
5117 metavar='GROUP',
5118 default=None,
5119 help='use the given group for subprocesses, rather than ceph or root'
5120 )
5121 parser.set_defaults(
5122 # we want to hold on to this, for later
5123 prog=parser.prog,
5124 )
5125
5126 subparsers = parser.add_subparsers(
5127 title='subcommands',
5128 description='valid subcommands',
5129 help='sub-command help',
5130 )
5131
5132 Prepare.set_subparser(subparsers)
5133 make_activate_parser(subparsers)
5134 make_activate_lockbox_parser(subparsers)
5135 make_activate_block_parser(subparsers)
5136 make_activate_journal_parser(subparsers)
5137 make_activate_all_parser(subparsers)
5138 make_list_parser(subparsers)
5139 make_suppress_parser(subparsers)
5140 make_deactivate_parser(subparsers)
5141 make_destroy_parser(subparsers)
5142 make_zap_parser(subparsers)
5143 make_trigger_parser(subparsers)
5144 make_fix_parser(subparsers)
5145
5146 args = parser.parse_args(argv)
5147 return args
5148
5149
5150def make_fix_parser(subparsers):
5151 fix_parser = subparsers.add_parser(
5152 'fix',
5153 formatter_class=argparse.RawDescriptionHelpFormatter,
5154 description=textwrap.fill(textwrap.dedent("""\
5155 """)),
5156 help='fix SELinux labels and/or file permissions')
5157
5158 fix_parser.add_argument(
5159 '--system',
5160 action='store_true',
5161 default=False,
5162 help='fix SELinux labels for the non-ceph system data'
5163 )
5164 fix_parser.add_argument(
5165 '--selinux',
5166 action='store_true',
5167 default=False,
5168 help='fix SELinux labels for ceph data'
5169 )
5170 fix_parser.add_argument(
5171 '--permissions',
5172 action='store_true',
5173 default=False,
5174 help='fix file permissions for ceph data'
5175 )
5176 fix_parser.add_argument(
5177 '--all',
5178 action='store_true',
5179 default=False,
5180 help='perform all the fix-related operations'
5181 )
5182 fix_parser.set_defaults(
5183 func=main_fix,
5184 )
5185 return fix_parser
5186
5187
5188def make_trigger_parser(subparsers):
5189 trigger_parser = subparsers.add_parser(
5190 'trigger',
5191 formatter_class=argparse.RawDescriptionHelpFormatter,
5192 description=textwrap.fill(textwrap.dedent("""\
5193 The partition given in argument is activated. The type of the
5194 partition (data, lockbox, journal etc.) is detected by its
5195 type. If the init system is upstart or systemd, the activation is
5196 delegated to it and runs asynchronously, which
5197 helps reduce the execution time of udev actions.
5198 """)),
5199 help='activate any device (called by udev)')
5200 trigger_parser.add_argument(
5201 'dev',
5202 help=('device'),
5203 )
5204 trigger_parser.add_argument(
5205 '--cluster',
5206 metavar='NAME',
5207 default='ceph',
5208 help='cluster name to assign this disk to',
5209 )
5210 trigger_parser.add_argument(
5211 '--dmcrypt',
5212 action='store_true', default=None,
5213 help='map devices with dm-crypt',
5214 )
5215 trigger_parser.add_argument(
5216 '--dmcrypt-key-dir',
5217 metavar='KEYDIR',
5218 default='/etc/ceph/dmcrypt-keys',
5219 help='directory where dm-crypt keys are stored',
5220 )
5221 trigger_parser.add_argument(
5222 '--sync',
5223 action='store_true', default=None,
5224 help='do operation synchronously; do not trigger systemd',
5225 )
5226 trigger_parser.set_defaults(
5227 func=main_trigger,
5228 )
5229 return trigger_parser
5230
5231
5232def make_activate_parser(subparsers):
5233 activate_parser = subparsers.add_parser(
5234 'activate',
5235 formatter_class=argparse.RawDescriptionHelpFormatter,
5236 description=textwrap.fill(textwrap.dedent("""\
5237 Activate the OSD found at PATH (can be a directory
5238 or a device partition, possibly encrypted). When
5239 activated for the first time, a unique OSD id is obtained
5240 from the cluster. If PATH is a directory, a symbolic
5241 link is added in {statedir}/osd/ceph-$id. If PATH is
5242 a partition, it is mounted on {statedir}/osd/ceph-$id.
5243 Finally, the OSD daemon is run.
5244
5245 If the OSD depends on auxiliary partitions (journal, block, ...)
5246 they need to be available otherwise activation will fail. It
5247 may happen if a journal is encrypted and cryptsetup was not
5248 run yet.
5249 """.format(statedir=STATEDIR))),
5250 help='Activate a Ceph OSD')
5251 activate_parser.add_argument(
5252 '--mount',
5253 action='store_true', default=None,
5254 help='mount a block device [deprecated, ignored]',
5255 )
5256 activate_parser.add_argument(
5257 '--activate-key',
5258 metavar='PATH',
5259 help='bootstrap-osd keyring path template (%(default)s)',
5260 dest='activate_key_template',
5261 )
5262 activate_parser.add_argument(
5263 '--mark-init',
5264 metavar='INITSYSTEM',
5265 help='init system to manage this dir',
5266 default='auto',
5267 choices=INIT_SYSTEMS,
5268 )
5269 activate_parser.add_argument(
5270 '--no-start-daemon',
5271 action='store_true', default=None,
5272 help='do not start the daemon',
5273 )
5274 activate_parser.add_argument(
5275 'path',
5276 metavar='PATH',
5277 help='path to block device or directory',
5278 )
5279 activate_parser.add_argument(
5280 '--dmcrypt',
5281 action='store_true', default=None,
5282 help='map DATA and/or JOURNAL devices with dm-crypt',
5283 )
5284 activate_parser.add_argument(
5285 '--dmcrypt-key-dir',
5286 metavar='KEYDIR',
5287 default='/etc/ceph/dmcrypt-keys',
5288 help='directory where dm-crypt keys are stored',
5289 )
5290 activate_parser.add_argument(
5291 '--reactivate',
5292 action='store_true', default=False,
5293 help='activate the deactived OSD',
5294 )
5295 activate_parser.set_defaults(
5296 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5297 func=main_activate,
5298 )
5299 return activate_parser
5300
5301
5302def make_activate_lockbox_parser(subparsers):
5303 parser = subparsers.add_parser(
5304 'activate-lockbox',
5305 formatter_class=argparse.RawDescriptionHelpFormatter,
5306 description=textwrap.fill(textwrap.dedent("""\
5307 Mount the partition found at PATH on {statedir}/osd-lockbox/$uuid
5308 where $uuid uniquely identifies the OSD that needs this lockbox
5309 to retrieve keys from the monitor and unlock its partitions.
5310
5311 If the OSD has one or more auxiliary devices (journal, block, ...)
5312 symbolic links are created at {statedir}/osd-lockbox/$other_uuid
5313 and point to {statedir}/osd-lockbox/$uuid. This will, for instance,
5314 allow a journal encrypted in a partition identified by $other_uuid to
5315 fetch the keys it needs from the monitor.
5316
5317 Finally the OSD is activated, as it would be with ceph-disk activate.
5318 """.format(statedir=STATEDIR))),
5319 help='Activate a Ceph lockbox')
5320 parser.add_argument(
5321 '--activate-key',
5322 help='bootstrap-osd keyring path template (%(default)s)',
5323 dest='activate_key_template',
5324 )
5325 parser.add_argument(
5326 '--dmcrypt-key-dir',
5327 metavar='KEYDIR',
5328 default='/etc/ceph/dmcrypt-keys',
5329 help='directory where dm-crypt keys are stored',
5330 )
5331 parser.add_argument(
5332 'path',
5333 metavar='PATH',
5334 help='path to block device',
5335 )
5336 parser.set_defaults(
5337 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5338 func=main_activate_lockbox,
5339 )
5340 return parser
5341
5342
5343def make_activate_block_parser(subparsers):
5344 return make_activate_space_parser('block', subparsers)
5345
5346
5347def make_activate_journal_parser(subparsers):
5348 return make_activate_space_parser('journal', subparsers)
5349
5350
5351def make_activate_space_parser(name, subparsers):
5352 activate_space_parser = subparsers.add_parser(
5353 'activate-%s' % name,
5354 formatter_class=argparse.RawDescriptionHelpFormatter,
5355 description=textwrap.fill(textwrap.dedent("""\
5356 Activating a {name} partition is only meaningfull
5357 if it is encrypted and it will map it using
5358 cryptsetup.
5359
5360 Finally the corresponding OSD is activated,
5361 as it would be with ceph-disk activate.
5362 """.format(name=name))),
5363 help='Activate an OSD via its %s device' % name)
5364 activate_space_parser.add_argument(
5365 'dev',
5366 metavar='DEV',
5367 help='path to %s block device' % name,
5368 )
5369 activate_space_parser.add_argument(
5370 '--activate-key',
5371 metavar='PATH',
5372 help='bootstrap-osd keyring path template (%(default)s)',
5373 dest='activate_key_template',
5374 )
5375 activate_space_parser.add_argument(
5376 '--mark-init',
5377 metavar='INITSYSTEM',
5378 help='init system to manage this dir',
5379 default='auto',
5380 choices=INIT_SYSTEMS,
5381 )
5382 activate_space_parser.add_argument(
5383 '--dmcrypt',
5384 action='store_true', default=None,
5385 help=('map data and/or auxiliariy (journal, etc.) '
5386 'devices with dm-crypt'),
5387 )
5388 activate_space_parser.add_argument(
5389 '--dmcrypt-key-dir',
5390 metavar='KEYDIR',
5391 default='/etc/ceph/dmcrypt-keys',
5392 help='directory where dm-crypt keys are stored',
5393 )
5394 activate_space_parser.add_argument(
5395 '--reactivate',
5396 action='store_true', default=False,
5397 help='activate the deactived OSD',
5398 )
5399 activate_space_parser.set_defaults(
5400 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5401 func=lambda args: main_activate_space(name, args),
5402 )
5403 return activate_space_parser
5404
5405
5406def make_activate_all_parser(subparsers):
5407 activate_all_parser = subparsers.add_parser(
5408 'activate-all',
5409 formatter_class=argparse.RawDescriptionHelpFormatter,
5410 description=textwrap.fill(textwrap.dedent("""\
5411 Activate all OSD partitions found in /dev/disk/by-parttypeuuid.
5412 The partitions containing auxiliary devices (journal, block, ...)
5413 are not activated.
5414 """)),
5415 help='Activate all tagged OSD partitions')
5416 activate_all_parser.add_argument(
5417 '--activate-key',
5418 metavar='PATH',
5419 help='bootstrap-osd keyring path template (%(default)s)',
5420 dest='activate_key_template',
5421 )
5422 activate_all_parser.add_argument(
5423 '--mark-init',
5424 metavar='INITSYSTEM',
5425 help='init system to manage this dir',
5426 default='auto',
5427 choices=INIT_SYSTEMS,
5428 )
5429 activate_all_parser.set_defaults(
5430 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5431 func=main_activate_all,
5432 )
5433 return activate_all_parser
5434
5435
5436def make_list_parser(subparsers):
5437 list_parser = subparsers.add_parser(
5438 'list',
5439 formatter_class=argparse.RawDescriptionHelpFormatter,
5440 description=textwrap.fill(textwrap.dedent("""\
5441 Display all partitions on the system and their
5442 associated Ceph information, if any.
5443 """)),
5444 help='List disks, partitions, and Ceph OSDs')
5445 list_parser.add_argument(
5446 '--format',
5447 help='output format',
5448 default='plain',
5449 choices=['json', 'plain'],
5450 )
5451 list_parser.add_argument(
5452 'path',
5453 metavar='PATH',
5454 nargs='*',
5455 help='path to block devices, relative to /sys/block',
5456 )
5457 list_parser.set_defaults(
5458 func=main_list,
5459 )
5460 return list_parser
5461
5462
5463def make_suppress_parser(subparsers):
5464 suppress_parser = subparsers.add_parser(
5465 'suppress-activate',
5466 formatter_class=argparse.RawDescriptionHelpFormatter,
5467 description=textwrap.fill(textwrap.dedent("""\
5468 Add a prefix to the list of suppressed device names
5469 so that they are ignored by all activate* subcommands.
5470 """)),
5471 help='Suppress activate on a device (prefix)')
5472 suppress_parser.add_argument(
5473 'path',
5474 metavar='PATH',
5475 help='path to block device or directory',
5476 )
5477 suppress_parser.set_defaults(
5478 func=main_suppress,
5479 )
5480
5481 unsuppress_parser = subparsers.add_parser(
5482 'unsuppress-activate',
5483 formatter_class=argparse.RawDescriptionHelpFormatter,
5484 description=textwrap.fill(textwrap.dedent("""\
5485 Remove a prefix from the list of suppressed device names
5486 so that they are no longer ignored by all
5487 activate* subcommands.
5488 """)),
5489 help='Stop suppressing activate on a device (prefix)')
5490 unsuppress_parser.add_argument(
5491 'path',
5492 metavar='PATH',
5493 help='path to block device or directory',
5494 )
5495 unsuppress_parser.set_defaults(
5496 func=main_unsuppress,
5497 )
5498 return suppress_parser
5499
5500
5501def make_deactivate_parser(subparsers):
5502 deactivate_parser = subparsers.add_parser(
5503 'deactivate',
5504 formatter_class=argparse.RawDescriptionHelpFormatter,
5505 description=textwrap.fill(textwrap.dedent("""\
5506 Deactivate the OSD located at PATH. It stops the OSD daemon
5507 and optionally marks it out (with --mark-out). The content of
5508 the OSD is left untouched.
5509
5510 By default, the, ready, active, INIT-specific files are
5511 removed (so that it is not automatically re-activated by the
5512 udev rules or ceph-disk trigger) and the file deactive is
5513 created to remember the OSD is deactivated.
5514
5515 If the --once option is given, the ready, active, INIT-specific
5516 files are not removed and the OSD will reactivate whenever
5517 ceph-disk trigger is run on one of the devices (journal, data,
5518 block, lockbox, ...).
5519
5520 If the OSD is dmcrypt, remove the data dmcrypt map. When
5521 deactivate finishes, the OSD is down.
5522 """)),
5523 help='Deactivate a Ceph OSD')
5524 deactivate_parser.add_argument(
5525 '--cluster',
5526 metavar='NAME',
5527 default='ceph',
5528 help='cluster name to assign this disk to',
5529 )
5530 deactivate_parser.add_argument(
5531 'path',
5532 metavar='PATH',
5533 nargs='?',
5534 help='path to block device or directory',
5535 )
5536 deactivate_parser.add_argument(
5537 '--deactivate-by-id',
5538 metavar='<id>',
5539 help='ID of OSD to deactive'
5540 )
5541 deactivate_parser.add_argument(
5542 '--mark-out',
5543 action='store_true', default=False,
5544 help='option to mark the osd out',
5545 )
5546 deactivate_parser.add_argument(
5547 '--once',
5548 action='store_true', default=False,
5549 help='does not need --reactivate to activate again',
5550 )
5551 deactivate_parser.set_defaults(
5552 func=main_deactivate,
5553 )
5554
5555
5556def make_destroy_parser(subparsers):
5557 destroy_parser = subparsers.add_parser(
5558 'destroy',
5559 formatter_class=argparse.RawDescriptionHelpFormatter,
5560 description=textwrap.fill(textwrap.dedent("""\
5561 Destroy the OSD located at PATH.
5562 It removes the OSD from the cluster, the crushmap and
5563 deallocates the OSD id. An OSD must be down before it
5564 can be destroyed.
5565 """)),
5566 help='Destroy a Ceph OSD')
5567 destroy_parser.add_argument(
5568 '--cluster',
5569 metavar='NAME',
5570 default='ceph',
5571 help='cluster name to assign this disk to',
5572 )
5573 destroy_parser.add_argument(
5574 'path',
5575 metavar='PATH',
5576 nargs='?',
5577 help='path to block device or directory',
5578 )
5579 destroy_parser.add_argument(
5580 '--destroy-by-id',
5581 metavar='<id>',
5582 help='ID of OSD to destroy'
5583 )
5584 destroy_parser.add_argument(
5585 '--dmcrypt-key-dir',
5586 metavar='KEYDIR',
5587 default='/etc/ceph/dmcrypt-keys',
5588 help=('directory where dm-crypt keys are stored '
5589 '(If you don\'t know how it work, '
5590 'dont use it. we have default value)'),
5591 )
5592 destroy_parser.add_argument(
5593 '--zap',
5594 action='store_true', default=False,
5595 help='option to erase data and partition',
5596 )
5597 destroy_parser.set_defaults(
5598 func=main_destroy,
5599 )
5600
5601
5602def make_zap_parser(subparsers):
5603 zap_parser = subparsers.add_parser(
5604 'zap',
5605 formatter_class=argparse.RawDescriptionHelpFormatter,
5606 description=textwrap.fill(textwrap.dedent("""\
5607 Zap/erase/destroy a device's partition table and contents. It
5608 actually uses sgdisk and it's option --zap-all to
5609 destroy both GPT and MBR data structures so that the disk
5610 becomes suitable for repartitioning.
5611 """)),
5612 help='Zap/erase/destroy a device\'s partition table (and contents)')
5613 zap_parser.add_argument(
5614 'dev',
5615 metavar='DEV',
5616 nargs='+',
5617 help='path to block device',
5618 )
5619 zap_parser.set_defaults(
5620 func=main_zap,
5621 )
5622 return zap_parser
5623
5624
5625def main(argv):
5626 args = parse_args(argv)
5627
5628 setup_logging(args.verbose, args.log_stdout)
5629
5630 if args.prepend_to_path != '':
5631 path = os.environ.get('PATH', os.defpath)
5632 os.environ['PATH'] = args.prepend_to_path + ":" + path
5633
5634 setup_statedir(args.statedir)
5635 setup_sysconfdir(args.sysconfdir)
5636
5637 global CEPH_PREF_USER
5638 CEPH_PREF_USER = args.setuser
5639 global CEPH_PREF_GROUP
5640 CEPH_PREF_GROUP = args.setgroup
5641
5642 if args.verbose:
5643 args.func(args)
5644 else:
5645 main_catch(args.func, args)
5646
5647
5648def setup_logging(verbose, log_stdout):
5649 loglevel = logging.WARNING
5650 if verbose:
5651 loglevel = logging.DEBUG
5652
5653 if log_stdout:
5654 ch = logging.StreamHandler(stream=sys.stdout)
5655 ch.setLevel(loglevel)
5656 formatter = logging.Formatter('%(funcName)s: %(message)s')
5657 ch.setFormatter(formatter)
5658 LOG.addHandler(ch)
5659 LOG.setLevel(loglevel)
5660 else:
5661 logging.basicConfig(
5662 level=loglevel,
5663 format='%(funcName)s: %(message)s',
5664 )
5665
5666
5667def main_catch(func, args):
5668
5669 try:
5670 func(args)
5671
5672 except Error as e:
5673 raise SystemExit(
5674 '{prog}: {msg}'.format(
5675 prog=args.prog,
5676 msg=e,
5677 )
5678 )
5679
5680 except CephDiskException as error:
5681 exc_name = error.__class__.__name__
5682 raise SystemExit(
5683 '{prog} {exc_name}: {msg}'.format(
5684 prog=args.prog,
5685 exc_name=exc_name,
5686 msg=error,
5687 )
5688 )
5689
5690
5691def run():
5692 main(sys.argv[1:])
5693
5694
5695if __name__ == '__main__':
5696 main(sys.argv[1:])
5697 warned_about = {}