]> git.proxmox.com Git - ceph.git/blame - ceph/src/ceph-disk/ceph_disk/main.py
update sources to v12.1.3
[ceph.git] / ceph / src / ceph-disk / ceph_disk / main.py
CommitLineData
7c673cae
FG
1#!/usr/bin/env python
2#
c07f9fc5 3# Copyright (C) 2015, 2016, 2017 Red Hat <contact@redhat.com>
7c673cae
FG
4# Copyright (C) 2014 Inktank <info@inktank.com>
5# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
6# Copyright (C) 2014 Catalyst.net Ltd
7#
8# Author: Loic Dachary <loic@dachary.org>
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU Library Public License as published by
12# the Free Software Foundation; either version 2, or (at your option)
13# any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU Library Public License for more details.
19#
20
21from __future__ import print_function
22
23import argparse
24import base64
25import errno
26import fcntl
27import json
28import logging
29import os
30import platform
31import re
32import subprocess
33import stat
34import sys
35import tempfile
36import uuid
37import time
38import shlex
c07f9fc5 39import shutil
7c673cae
FG
40import pwd
41import grp
42import textwrap
43import glob
44
45CEPH_OSD_ONDISK_MAGIC = 'ceph osd volume v026'
46CEPH_LOCKBOX_ONDISK_MAGIC = 'ceph lockbox volume v001'
47
48KEY_MANAGEMENT_MODE_V1 = 'ceph-mon v1'
49
50PTYPE = {
51 'regular': {
52 'journal': {
53 # identical because creating a journal is atomic
54 'ready': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
55 'tobe': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
56 },
57 'block': {
58 # identical because creating a block is atomic
59 'ready': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
60 'tobe': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
61 },
62 'block.db': {
63 # identical because creating a block is atomic
64 'ready': '30cd0809-c2b2-499c-8879-2d6b78529876',
65 'tobe': '30cd0809-c2b2-499c-8879-2d6b785292be',
66 },
67 'block.wal': {
68 # identical because creating a block is atomic
69 'ready': '5ce17fce-4087-4169-b7ff-056cc58473f9',
70 'tobe': '5ce17fce-4087-4169-b7ff-056cc58472be',
71 },
72 'osd': {
73 'ready': '4fbd7e29-9d25-41b8-afd0-062c0ceff05d',
74 'tobe': '89c57f98-2fe5-4dc0-89c1-f3ad0ceff2be',
75 },
76 'lockbox': {
77 'ready': 'fb3aabf9-d25f-47cc-bf5e-721d1816496b',
78 'tobe': 'fb3aabf9-d25f-47cc-bf5e-721d181642be',
79 },
80 },
81 'luks': {
82 'journal': {
83 'ready': '45b0969e-9b03-4f30-b4c6-35865ceff106',
84 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
85 },
86 'block': {
87 'ready': 'cafecafe-9b03-4f30-b4c6-35865ceff106',
88 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
89 },
90 'block.db': {
91 'ready': '166418da-c469-4022-adf4-b30afd37f176',
92 'tobe': '7521c784-4626-4260-bc8d-ba77a0f5f2be',
93 },
94 'block.wal': {
95 'ready': '86a32090-3647-40b9-bbbd-38d8c573aa86',
96 'tobe': '92dad30f-175b-4d40-a5b0-5c0a258b42be',
97 },
98 'osd': {
99 'ready': '4fbd7e29-9d25-41b8-afd0-35865ceff05d',
100 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
101 },
102 },
103 'plain': {
104 'journal': {
105 'ready': '45b0969e-9b03-4f30-b4c6-5ec00ceff106',
106 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
107 },
108 'block': {
109 'ready': 'cafecafe-9b03-4f30-b4c6-5ec00ceff106',
110 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
111 },
112 'block.db': {
113 'ready': '93b0052d-02d9-4d8a-a43b-33a3ee4dfbc3',
114 'tobe': '69d17c68-3e58-4399-aff0-b68265f2e2be',
115 },
116 'block.wal': {
117 'ready': '306e8683-4fe2-4330-b7c0-00a917c16966',
118 'tobe': 'f2d89683-a621-4063-964a-eb1f7863a2be',
119 },
120 'osd': {
121 'ready': '4fbd7e29-9d25-41b8-afd0-5ec00ceff05d',
122 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
123 },
124 },
125 'mpath': {
126 'journal': {
127 'ready': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
128 'tobe': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
129 },
130 'block': {
131 'ready': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
132 'tobe': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
133 },
134 'block.db': {
135 'ready': 'ec6d6385-e346-45dc-be91-da2a7c8b3261',
136 'tobe': 'ec6d6385-e346-45dc-be91-da2a7c8b32be',
137 },
138 'block.wal': {
139 'ready': '01b41e1b-002a-453c-9f17-88793989ff8f',
140 'tobe': '01b41e1b-002a-453c-9f17-88793989f2be',
141 },
142 'osd': {
143 'ready': '4fbd7e29-8ae0-4982-bf9d-5a8d867af560',
144 'tobe': '89c57f98-8ae0-4982-bf9d-5a8d867af560',
145 },
146 'lockbox': {
147 'ready': '7f4a666a-16f3-47a2-8445-152ef4d03f6c',
148 'tobe': '7f4a666a-16f3-47a2-8445-152ef4d032be',
149 },
150 },
151}
152
153
154class Ptype(object):
155
156 @staticmethod
157 def get_ready_by_type(what):
158 return [x['ready'] for x in PTYPE[what].values()]
159
160 @staticmethod
161 def get_ready_by_name(name):
162 return [x[name]['ready'] for x in PTYPE.values() if name in x]
163
164 @staticmethod
165 def is_regular_space(ptype):
166 return Ptype.is_what_space('regular', ptype)
167
168 @staticmethod
169 def is_mpath_space(ptype):
170 return Ptype.is_what_space('mpath', ptype)
171
172 @staticmethod
173 def is_plain_space(ptype):
174 return Ptype.is_what_space('plain', ptype)
175
176 @staticmethod
177 def is_luks_space(ptype):
178 return Ptype.is_what_space('luks', ptype)
179
180 @staticmethod
181 def is_what_space(what, ptype):
182 for name in Space.NAMES:
183 if ptype == PTYPE[what][name]['ready']:
184 return True
185 return False
186
187 @staticmethod
188 def space_ptype_to_name(ptype):
189 for what in PTYPE.values():
190 for name in Space.NAMES:
191 if ptype == what[name]['ready']:
192 return name
193 raise ValueError('ptype ' + ptype + ' not found')
194
195 @staticmethod
196 def is_dmcrypt_space(ptype):
197 for name in Space.NAMES:
198 if Ptype.is_dmcrypt(ptype, name):
199 return True
200 return False
201
202 @staticmethod
203 def is_dmcrypt(ptype, name):
204 for what in ('plain', 'luks'):
205 if ptype == PTYPE[what][name]['ready']:
206 return True
207 return False
208
209
210SYSFS = '/sys'
211
212if platform.system() == 'FreeBSD':
213 FREEBSD = True
214 DEFAULT_FS_TYPE = 'zfs'
215 PROCDIR = '/compat/linux/proc'
216 # FreeBSD does not have blockdevices any more
217 BLOCKDIR = '/dev'
c07f9fc5 218 ROOTGROUP = 'wheel'
7c673cae
FG
219else:
220 FREEBSD = False
221 DEFAULT_FS_TYPE = 'xfs'
222 PROCDIR = '/proc'
223 BLOCKDIR = '/sys/block'
c07f9fc5 224 ROOTGROUP = 'root'
7c673cae
FG
225
226"""
227OSD STATUS Definition
228"""
229OSD_STATUS_OUT_DOWN = 0
230OSD_STATUS_OUT_UP = 1
231OSD_STATUS_IN_DOWN = 2
232OSD_STATUS_IN_UP = 3
233
234MOUNT_OPTIONS = dict(
235 btrfs='noatime,user_subvol_rm_allowed',
236 # user_xattr is default ever since linux 2.6.39 / 3.0, but we'll
237 # delay a moment before removing it fully because we did have some
238 # issues with ext4 before the xatts-in-leveldb work, and it seemed
239 # that user_xattr helped
240 ext4='noatime,user_xattr',
241 xfs='noatime,inode64',
242)
243
244MKFS_ARGS = dict(
245 btrfs=[
246 # btrfs requires -f, for the same reason as xfs (see comment below)
247 '-f',
248 '-m', 'single',
249 '-l', '32768',
250 '-n', '32768',
251 ],
252 xfs=[
253 # xfs insists on not overwriting previous fs; even if we wipe
254 # partition table, we often recreate it exactly the same way,
255 # so we'll see ghosts of filesystems past
256 '-f',
257 '-i', 'size=2048',
258 ],
259 zfs=[
260 '-o', 'atime=off'
261 ],
262)
263
264INIT_SYSTEMS = [
265 'upstart',
266 'sysvinit',
267 'systemd',
268 'openrc',
269 'bsdrc',
270 'auto',
271 'none',
272]
273
274STATEDIR = '/var/lib/ceph'
275
276SYSCONFDIR = '/etc/ceph'
277
278prepare_lock = None
279activate_lock = None
280SUPPRESS_PREFIX = None
281
282# only warn once about some things
283warned_about = {}
284
285# Nuke the TERM variable to avoid confusing any subprocesses we call.
286# For example, libreadline will print weird control sequences for some
287# TERM values.
288if 'TERM' in os.environ:
289 del os.environ['TERM']
290
291LOG_NAME = __name__
292if LOG_NAME == '__main__':
293 LOG_NAME = os.path.basename(sys.argv[0])
294LOG = logging.getLogger(LOG_NAME)
295
296# Allow user-preferred values for subprocess user and group
297CEPH_PREF_USER = None
298CEPH_PREF_GROUP = None
299
300
301class FileLock(object):
302 def __init__(self, fn):
303 self.fn = fn
304 self.fd = None
305
306 def __enter__(self):
307 assert not self.fd
308 self.fd = os.open(self.fn, os.O_WRONLY | os.O_CREAT)
309 fcntl.lockf(self.fd, fcntl.LOCK_EX)
310
311 def __exit__(self, exc_type, exc_val, exc_tb):
312 assert self.fd
313 fcntl.lockf(self.fd, fcntl.LOCK_UN)
314 os.close(self.fd)
315 self.fd = None
316
317
318class Error(Exception):
319 """
320 Error
321 """
322
323 def __str__(self):
324 doc = _bytes2str(self.__doc__.strip())
325 try:
326 str_type = basestring
327 except NameError:
328 str_type = str
329 args = [a if isinstance(a, str_type) else str(a) for a in self.args]
330 return ': '.join([doc] + [_bytes2str(a) for a in args])
331
332
333class MountError(Error):
334 """
335 Mounting filesystem failed
336 """
337
338
339class UnmountError(Error):
340 """
341 Unmounting filesystem failed
342 """
343
344
345class BadMagicError(Error):
346 """
347 Does not look like a Ceph OSD, or incompatible version
348 """
349
350
351class TruncatedLineError(Error):
352 """
353 Line is truncated
354 """
355
356
357class TooManyLinesError(Error):
358 """
359 Too many lines
360 """
361
362
363class FilesystemTypeError(Error):
364 """
365 Cannot discover filesystem type
366 """
367
368
369class CephDiskException(Exception):
370 """
371 A base exception for ceph-disk to provide custom (ad-hoc) messages that
372 will be caught and dealt with when main() is executed
373 """
374 pass
375
376
377class ExecutableNotFound(CephDiskException):
378 """
379 Exception to report on executables not available in PATH
380 """
381 pass
382
383
384def is_systemd():
385 """
386 Detect whether systemd is running
387 """
388 with open(PROCDIR + '/1/comm', 'r') as f:
389 return 'systemd' in f.read()
390
391
392def is_upstart():
393 """
394 Detect whether upstart is running
395 """
396 (out, err, _) = command(['init', '--version'])
397 return 'upstart' in out
398
399
400def maybe_mkdir(*a, **kw):
401 """
402 Creates a new directory if it doesn't exist, removes
403 existing symlink before creating the directory.
404 """
405 # remove any symlink, if it is there..
406 if os.path.exists(*a) and stat.S_ISLNK(os.lstat(*a).st_mode):
407 LOG.debug('Removing old symlink at %s', *a)
408 os.unlink(*a)
409 try:
410 os.mkdir(*a, **kw)
411 except OSError as e:
412 if e.errno == errno.EEXIST:
413 pass
414 else:
415 raise
416
417
418def which(executable):
419 """find the location of an executable"""
420 envpath = os.environ.get('PATH') or os.defpath
421 PATH = envpath.split(os.pathsep)
422
423 locations = PATH + [
424 '/usr/local/bin',
425 '/bin',
426 '/usr/bin',
427 '/usr/local/sbin',
428 '/usr/sbin',
429 '/sbin',
430 ]
431
432 for location in locations:
433 executable_path = os.path.join(location, executable)
434 if (os.path.isfile(executable_path) and
435 os.access(executable_path, os.X_OK)):
436 return executable_path
437
438
439def _get_command_executable(arguments):
440 """
441 Return the full path for an executable, raise if the executable is not
442 found. If the executable has already a full path do not perform any checks.
443 """
444 if os.path.isabs(arguments[0]): # an absolute path
445 return arguments
446 executable = which(arguments[0])
447 if not executable:
448 command_msg = 'Could not run command: %s' % ' '.join(arguments)
449 executable_msg = '%s not in path.' % arguments[0]
450 raise ExecutableNotFound('%s %s' % (executable_msg, command_msg))
451
452 # swap the old executable for the new one
453 arguments[0] = executable
454 return arguments
455
456
457def command(arguments, **kwargs):
458 """
459 Safely execute a ``subprocess.Popen`` call making sure that the
460 executable exists and raising a helpful error message
461 if it does not.
462
463 .. note:: This should be the preferred way of calling ``subprocess.Popen``
464 since it provides the caller with the safety net of making sure that
465 executables *will* be found and will error nicely otherwise.
466
467 This returns the output of the command and the return code of the
468 process in a tuple: (stdout, stderr, returncode).
469 """
470
471 arguments = list(map(_bytes2str, _get_command_executable(arguments)))
472
473 LOG.info('Running command: %s' % ' '.join(arguments))
474 process = subprocess.Popen(
475 arguments,
476 stdout=subprocess.PIPE,
477 stderr=subprocess.PIPE,
478 **kwargs)
479 out, err = process.communicate()
480
481 return _bytes2str(out), _bytes2str(err), process.returncode
482
483
c07f9fc5
FG
484def command_with_stdin(arguments, stdin):
485 LOG.info("Running command with stdin: " + " ".join(arguments))
486 process = subprocess.Popen(
487 arguments,
488 stdin=subprocess.PIPE,
489 stdout=subprocess.PIPE,
490 stderr=subprocess.PIPE)
491 out, err = process.communicate(stdin)
492 LOG.debug(out)
493 if process.returncode != 0:
494 LOG.error(err)
495 raise SystemExit(
496 "'{cmd}' failed with status code {returncode}".format(
497 cmd=arguments,
498 returncode=process.returncode,
499 )
500 )
501 return out
502
503
7c673cae
FG
504def _bytes2str(string):
505 return string.decode('utf-8') if isinstance(string, bytes) else string
506
507
508def command_init(arguments, **kwargs):
509 """
510 Safely execute a non-blocking ``subprocess.Popen`` call
511 making sure that the executable exists and raising a helpful
512 error message if it does not.
513
514 .. note:: This should be the preferred way of calling ``subprocess.Popen``
515 since it provides the caller with the safety net of making sure that
516 executables *will* be found and will error nicely otherwise.
517
518 This returns the process.
519 """
520
521 arguments = list(map(_bytes2str, _get_command_executable(arguments)))
522
523 LOG.info('Running command: %s' % ' '.join(arguments))
524 process = subprocess.Popen(
525 arguments,
526 stdout=subprocess.PIPE,
527 stderr=subprocess.PIPE,
528 **kwargs)
529 return process
530
531
532def command_wait(process):
533 """
534 Wait for the process finish and parse its output.
535 """
536
537 out, err = process.communicate()
538
539 return _bytes2str(out), _bytes2str(err), process.returncode
540
541
542def command_check_call(arguments, exit=False):
543 """
544 Safely execute a ``subprocess.check_call`` call making sure that the
545 executable exists and raising a helpful error message if it does not.
546
547 When ``exit`` is set to ``True`` this helper will do a clean (sans
548 traceback) system exit.
549 .. note:: This should be the preferred way of calling
550 ``subprocess.check_call`` since it provides the caller with the safety net
551 of making sure that executables *will* be found and will error nicely
552 otherwise.
553 """
554 arguments = _get_command_executable(arguments)
555 command = ' '.join(arguments)
556 LOG.info('Running command: %s', command)
557 try:
558 return subprocess.check_call(arguments)
559 except subprocess.CalledProcessError as error:
560 if exit:
561 if error.output:
562 LOG.error(error.output)
563 raise SystemExit(
564 "'{cmd}' failed with status code {returncode}".format(
565 cmd=command,
566 returncode=error.returncode,
567 )
568 )
569 raise
570
571
7c673cae
FG
572#
573# An alternative block_path implementation would be
574#
575# name = basename(dev)
576# return /sys/devices/virtual/block/$name
577#
578# It is however more fragile because it relies on the fact
579# that the basename of the device the user will use always
580# matches the one the driver will use. On Ubuntu 14.04, for
581# instance, when multipath creates a partition table on
582#
583# /dev/mapper/353333330000007d0 -> ../dm-0
584#
585# it will create partition devices named
586#
587# /dev/mapper/353333330000007d0-part1
588#
589# which is the same device as /dev/dm-1 but not a symbolic
590# link to it:
591#
592# ubuntu@other:~$ ls -l /dev/mapper /dev/dm-1
593# brw-rw---- 1 root disk 252, 1 Aug 15 17:52 /dev/dm-1
594# lrwxrwxrwx 1 root root 7 Aug 15 17:52 353333330000007d0 -> ../dm-0
595# brw-rw---- 1 root disk 252, 1 Aug 15 17:52 353333330000007d0-part1
596#
597# Using the basename in this case fails.
598#
599
600
601def block_path(dev):
602 if FREEBSD:
603 return dev
604 path = os.path.realpath(dev)
605 rdev = os.stat(path).st_rdev
606 (M, m) = (os.major(rdev), os.minor(rdev))
607 return "{sysfs}/dev/block/{M}:{m}".format(sysfs=SYSFS, M=M, m=m)
608
609
610def get_dm_uuid(dev):
611 uuid_path = os.path.join(block_path(dev), 'dm', 'uuid')
612 LOG.debug("get_dm_uuid " + dev + " uuid path is " + uuid_path)
613 if not os.path.exists(uuid_path):
614 return False
615 uuid = open(uuid_path, 'r').read()
616 LOG.debug("get_dm_uuid " + dev + " uuid is " + uuid)
617 return uuid
618
619
620def is_mpath(dev):
621 """
622 True if the path is managed by multipath
623 """
624 if FREEBSD:
625 return False
626 uuid = get_dm_uuid(dev)
627 return (uuid and
628 (re.match('part\d+-mpath-', uuid) or
629 re.match('mpath-', uuid)))
630
631
632def get_dev_name(path):
633 """
634 get device name from path. e.g.::
635
636 /dev/sda -> sda, /dev/cciss/c0d1 -> cciss!c0d1
637
638 a device "name" is something like::
639
640 sdb
641 cciss!c0d1
642
643 """
644 assert path.startswith('/dev/')
645 base = path[5:]
646 return base.replace('/', '!')
647
648
649def get_dev_path(name):
650 """
651 get a path (/dev/...) from a name (cciss!c0d1)
652 a device "path" is something like::
653
654 /dev/sdb
655 /dev/cciss/c0d1
656
657 """
658 return '/dev/' + name.replace('!', '/')
659
660
661def get_dev_relpath(name):
662 """
663 get a relative path to /dev from a name (cciss!c0d1)
664 """
665 return name.replace('!', '/')
666
667
668def get_dev_size(dev, size='megabytes'):
669 """
670 Attempt to get the size of a device so that we can prevent errors
671 from actions to devices that are smaller, and improve error reporting.
672
673 Because we want to avoid breakage in case this approach is not robust, we
674 will issue a warning if we failed to get the size.
675
676 :param size: bytes or megabytes
677 :param dev: the device to calculate the size
678 """
679 fd = os.open(dev, os.O_RDONLY)
680 dividers = {'bytes': 1, 'megabytes': 1024 * 1024}
681 try:
682 device_size = os.lseek(fd, 0, os.SEEK_END)
683 divider = dividers.get(size, 1024 * 1024) # default to megabytes
684 return device_size // divider
685 except Exception as error:
686 LOG.warning('failed to get size of %s: %s' % (dev, str(error)))
687 finally:
688 os.close(fd)
689
690
224ce89b
WB
691def stmode_is_diskdevice(dmode):
692 if stat.S_ISBLK(dmode):
693 return True
694 else:
695 # FreeBSD does not have block devices
696 # All disks are character devices
697 return FREEBSD and stat.S_ISCHR(dmode)
698
699
700def dev_is_diskdevice(dev):
701 dmode = os.stat(dev).st_mode
702 return stmode_is_diskdevice(dmode)
703
704
705def ldev_is_diskdevice(dev):
706 dmode = os.lstat(dev).st_mode
707 return stmode_is_diskdevice(dmode)
708
709
710def path_is_diskdevice(path):
711 dev = os.path.realpath(path)
712 return dev_is_diskdevice(dev)
713
714
7c673cae
FG
715def get_partition_mpath(dev, pnum):
716 part_re = "part{pnum}-mpath-".format(pnum=pnum)
717 partitions = list_partitions_mpath(dev, part_re)
718 if partitions:
719 return partitions[0]
720 else:
721 return None
722
723
724def get_partition_dev(dev, pnum):
725 """
726 get the device name for a partition
727
728 assume that partitions are named like the base dev,
729 with a number, and optionally
730 some intervening characters (like 'p'). e.g.,
731
732 sda 1 -> sda1
733 cciss/c0d1 1 -> cciss!c0d1p1
734 """
735 max_retry = 10
736 for retry in range(0, max_retry + 1):
737 partname = None
738 error_msg = ""
739 if is_mpath(dev):
740 partname = get_partition_mpath(dev, pnum)
741 else:
742 name = get_dev_name(os.path.realpath(dev))
743 sys_entry = os.path.join(BLOCKDIR, name)
744 error_msg = " in %s" % sys_entry
745 for f in os.listdir(sys_entry):
746 if f.startswith(name) and f.endswith(str(pnum)):
747 # we want the shortest name that starts with the base name
748 # and ends with the partition number
749 if not partname or len(f) < len(partname):
750 partname = f
751 if partname:
752 if retry:
753 LOG.info('Found partition %d for %s after %d tries' %
754 (pnum, dev, retry))
755 return get_dev_path(partname)
756 else:
757 if retry < max_retry:
758 LOG.info('Try %d/%d : partition %d for %s does not exist%s' %
759 (retry + 1, max_retry, pnum, dev, error_msg))
760 time.sleep(.2)
761 continue
762 else:
763 raise Error('partition %d for %s does not appear to exist%s' %
764 (pnum, dev, error_msg))
765
766
767def list_all_partitions():
768 """
769 Return a list of devices and partitions
770 """
771 if not FREEBSD:
772 names = os.listdir(BLOCKDIR)
773 dev_part_list = {}
774 for name in names:
775 # /dev/fd0 may hang http://tracker.ceph.com/issues/6827
776 if re.match(r'^fd\d$', name):
777 continue
778 dev_part_list[name] = list_partitions(get_dev_path(name))
779 else:
780 with open(os.path.join(PROCDIR, "partitions")) as partitions:
781 for line in partitions:
782 columns = line.split()
783 if len(columns) >= 4:
784 name = columns[3]
785 dev_part_list[name] = list_partitions(get_dev_path(name))
786 return dev_part_list
787
788
789def list_partitions(dev):
790 dev = os.path.realpath(dev)
791 if is_mpath(dev):
792 return list_partitions_mpath(dev)
793 else:
794 return list_partitions_device(dev)
795
796
797def list_partitions_mpath(dev, part_re="part\d+-mpath-"):
798 p = block_path(dev)
799 partitions = []
800 holders = os.path.join(p, 'holders')
801 for holder in os.listdir(holders):
802 uuid_path = os.path.join(holders, holder, 'dm', 'uuid')
803 uuid = open(uuid_path, 'r').read()
804 LOG.debug("list_partitions_mpath: " + uuid_path + " uuid = " + uuid)
805 if re.match(part_re, uuid):
806 partitions.append(holder)
807 return partitions
808
809
810def list_partitions_device(dev):
811 """
812 Return a list of partitions on the given device name
813 """
814 partitions = []
815 basename = get_dev_name(dev)
816 for name in os.listdir(block_path(dev)):
817 if name.startswith(basename):
818 partitions.append(name)
819 return partitions
820
821
822def get_partition_base(dev):
823 """
824 Get the base device for a partition
825 """
826 dev = os.path.realpath(dev)
224ce89b 827 if not ldev_is_diskdevice(dev):
7c673cae
FG
828 raise Error('not a block device', dev)
829
830 name = get_dev_name(dev)
831 if os.path.exists(os.path.join('/sys/block', name)):
832 raise Error('not a partition', dev)
833
834 # find the base
835 for basename in os.listdir('/sys/block'):
836 if os.path.exists(os.path.join('/sys/block', basename, name)):
837 return get_dev_path(basename)
838 raise Error('no parent device for partition', dev)
839
840
841def is_partition_mpath(dev):
842 uuid = get_dm_uuid(dev)
843 return bool(re.match('part\d+-mpath-', uuid))
844
845
846def partnum_mpath(dev):
847 uuid = get_dm_uuid(dev)
848 return re.findall('part(\d+)-mpath-', uuid)[0]
849
850
851def get_partition_base_mpath(dev):
852 slave_path = os.path.join(block_path(dev), 'slaves')
853 slaves = os.listdir(slave_path)
854 assert slaves
855 name_path = os.path.join(slave_path, slaves[0], 'dm', 'name')
856 name = open(name_path, 'r').read().strip()
857 return os.path.join('/dev/mapper', name)
858
859
860def is_partition(dev):
861 """
862 Check whether a given device path is a partition or a full disk.
863 """
864 if is_mpath(dev):
865 return is_partition_mpath(dev)
866
867 dev = os.path.realpath(dev)
868 st = os.lstat(dev)
224ce89b 869 if not stmode_is_diskdevice(st.st_mode):
7c673cae
FG
870 raise Error('not a block device', dev)
871
872 name = get_dev_name(dev)
873 if os.path.exists(os.path.join(BLOCKDIR, name)):
874 return False
875
876 # make sure it is a partition of something else
877 major = os.major(st.st_rdev)
878 minor = os.minor(st.st_rdev)
879 if os.path.exists('/sys/dev/block/%d:%d/partition' % (major, minor)):
880 return True
881
882 raise Error('not a disk or partition', dev)
883
884
885def is_mounted(dev):
886 """
887 Check if the given device is mounted.
888 """
889 dev = os.path.realpath(dev)
890 with open(PROCDIR + '/mounts', 'rb') as proc_mounts:
891 for line in proc_mounts:
892 fields = line.split()
893 if len(fields) < 3:
894 continue
895 mounts_dev = fields[0]
896 path = fields[1]
897 if os.path.isabs(mounts_dev) and os.path.exists(mounts_dev):
898 mounts_dev = os.path.realpath(mounts_dev)
899 if mounts_dev == dev:
900 return _bytes2str(path)
901 return None
902
903
904def is_held(dev):
905 """
906 Check if a device is held by another device (e.g., a dm-crypt mapping)
907 """
908 assert os.path.exists(dev)
909 if is_mpath(dev):
910 return []
911
912 dev = os.path.realpath(dev)
913 base = get_dev_name(dev)
914
915 # full disk?
916 directory = '/sys/block/{base}/holders'.format(base=base)
917 if os.path.exists(directory):
918 return os.listdir(directory)
919
920 # partition?
921 part = base
922 while len(base):
923 directory = '/sys/block/{base}/{part}/holders'.format(
924 part=part, base=base)
925 if os.path.exists(directory):
926 return os.listdir(directory)
927 base = base[:-1]
928 return []
929
930
931def verify_not_in_use(dev, check_partitions=False):
932 """
933 Verify if a given device (path) is in use (e.g. mounted or
934 in use by device-mapper).
935
936 :raises: Error if device is in use.
937 """
938 assert os.path.exists(dev)
939 if is_mounted(dev):
940 raise Error('Device is mounted', dev)
941 holders = is_held(dev)
942 if holders:
943 raise Error('Device %s is in use by a device-mapper '
944 'mapping (dm-crypt?)' % dev, ','.join(holders))
945
946 if check_partitions and not is_partition(dev):
947 for partname in list_partitions(dev):
948 partition = get_dev_path(partname)
949 if is_mounted(partition):
950 raise Error('Device is mounted', partition)
951 holders = is_held(partition)
952 if holders:
953 raise Error('Device %s is in use by a device-mapper '
954 'mapping (dm-crypt?)'
955 % partition, ','.join(holders))
956
957
958def must_be_one_line(line):
959 """
960 Checks if given line is really one single line.
961
962 :raises: TruncatedLineError or TooManyLinesError
963 :return: Content of the line, or None if line isn't valid.
964 """
965 line = _bytes2str(line)
966
967 if line[-1:] != '\n':
968 raise TruncatedLineError(line)
969 line = line[:-1]
970 if '\n' in line:
971 raise TooManyLinesError(line)
972 return line
973
974
975def read_one_line(parent, name):
976 """
977 Read a file whose sole contents are a single line.
978
979 Strips the newline.
980
981 :return: Contents of the line, or None if file did not exist.
982 """
983 path = os.path.join(parent, name)
984 try:
985 line = open(path, 'rb').read()
986 except IOError as e:
987 if e.errno == errno.ENOENT:
988 return None
989 else:
990 raise
991
992 try:
993 line = must_be_one_line(line)
994 except (TruncatedLineError, TooManyLinesError) as e:
995 raise Error(
996 'File is corrupt: {path}: {msg}'.format(
997 path=path,
998 msg=e,
999 )
1000 )
1001 return line
1002
1003
1004def write_one_line(parent, name, text):
1005 """
1006 Write a file whose sole contents are a single line.
1007
1008 Adds a newline.
1009 """
1010 path = os.path.join(parent, name)
1011 tmp = '{path}.{pid}.tmp'.format(path=path, pid=os.getpid())
1012 with open(tmp, 'wb') as tmp_file:
1013 tmp_file.write(text.encode('utf-8') + b'\n')
1014 os.fsync(tmp_file.fileno())
1015 path_set_context(tmp)
1016 os.rename(tmp, path)
1017
1018
1019def init_get():
1020 """
1021 Get a init system using 'ceph-detect-init'
1022 """
1023 init = _check_output(
1024 args=[
1025 'ceph-detect-init',
1026 '--default', 'sysvinit',
1027 ],
1028 )
1029 init = must_be_one_line(init)
1030 return init
1031
1032
1033def check_osd_magic(path):
1034 """
1035 Check that this path has the Ceph OSD magic.
1036
1037 :raises: BadMagicError if this does not look like a Ceph OSD data
1038 dir.
1039 """
1040 magic = read_one_line(path, 'magic')
1041 if magic is None:
1042 # probably not mkfs'ed yet
1043 raise BadMagicError(path)
1044 if magic != CEPH_OSD_ONDISK_MAGIC:
1045 raise BadMagicError(path)
1046
1047
1048def check_osd_id(osd_id):
1049 """
1050 Ensures osd id is numeric.
1051 """
1052 if not re.match(r'^[0-9]+$', osd_id):
1053 raise Error('osd id is not numeric', osd_id)
1054
1055
1056def allocate_osd_id(
1057 cluster,
1058 fsid,
1059 keyring,
c07f9fc5 1060 path,
7c673cae
FG
1061):
1062 """
c07f9fc5 1063 Allocates an OSD id on the given cluster.
7c673cae
FG
1064
1065 :raises: Error if the call to allocate the OSD id fails.
1066 :return: The allocated OSD id.
1067 """
c07f9fc5
FG
1068 lockbox_path = os.path.join(STATEDIR, 'osd-lockbox', fsid)
1069 lockbox_osd_id = read_one_line(lockbox_path, 'whoami')
1070 osd_keyring = os.path.join(path, 'keyring')
1071 if lockbox_osd_id:
1072 LOG.debug('Getting OSD id from Lockbox...')
1073 osd_id = lockbox_osd_id
1074 shutil.move(os.path.join(lockbox_path, 'osd_keyring'),
1075 osd_keyring)
1076 path_set_context(osd_keyring)
1077 os.unlink(os.path.join(lockbox_path, 'whoami'))
1078 return osd_id
7c673cae
FG
1079
1080 LOG.debug('Allocating OSD id...')
c07f9fc5 1081 secrets = Secrets()
7c673cae 1082 try:
c07f9fc5
FG
1083 wanttobe = read_one_line(path, 'wanttobe')
1084 if os.path.exists(os.path.join(path, 'wanttobe')):
1085 os.unlink(os.path.join(path, 'wanttobe'))
1086 id_arg = wanttobe and [wanttobe] or []
1087 osd_id = command_with_stdin(
1088 [
7c673cae
FG
1089 'ceph',
1090 '--cluster', cluster,
1091 '--name', 'client.bootstrap-osd',
1092 '--keyring', keyring,
c07f9fc5
FG
1093 '-i', '-',
1094 'osd', 'new',
7c673cae 1095 fsid,
c07f9fc5
FG
1096 ] + id_arg,
1097 secrets.get_json()
7c673cae
FG
1098 )
1099 except subprocess.CalledProcessError as e:
1100 raise Error('ceph osd create failed', e, e.output)
1101 osd_id = must_be_one_line(osd_id)
1102 check_osd_id(osd_id)
c07f9fc5 1103 secrets.write_osd_keyring(osd_keyring, osd_id)
7c673cae
FG
1104 return osd_id
1105
1106
1107def get_osd_id(path):
1108 """
1109 Gets the OSD id of the OSD at the given path.
1110 """
1111 osd_id = read_one_line(path, 'whoami')
1112 if osd_id is not None:
1113 check_osd_id(osd_id)
1114 return osd_id
1115
1116
1117def get_ceph_user():
1118 global CEPH_PREF_USER
1119
1120 if CEPH_PREF_USER is not None:
1121 try:
1122 pwd.getpwnam(CEPH_PREF_USER)
1123 return CEPH_PREF_USER
1124 except KeyError:
1125 print("No such user:", CEPH_PREF_USER)
1126 sys.exit(2)
1127 else:
1128 try:
1129 pwd.getpwnam('ceph')
1130 return 'ceph'
1131 except KeyError:
1132 return 'root'
1133
1134
1135def get_ceph_group():
1136 global CEPH_PREF_GROUP
1137
1138 if CEPH_PREF_GROUP is not None:
1139 try:
1140 grp.getgrnam(CEPH_PREF_GROUP)
1141 return CEPH_PREF_GROUP
1142 except KeyError:
1143 print("No such group:", CEPH_PREF_GROUP)
1144 sys.exit(2)
1145 else:
1146 try:
1147 grp.getgrnam('ceph')
1148 return 'ceph'
1149 except KeyError:
1150 return 'root'
1151
1152
1153def path_set_context(path):
1154 # restore selinux context to default policy values
1155 if which('restorecon'):
1156 command(['restorecon', '-R', path])
1157
1158 # if ceph user exists, set owner to ceph
1159 if get_ceph_user() == 'ceph':
1160 command(['chown', '-R', 'ceph:ceph', path])
1161
1162
1163def _check_output(args=None, **kwargs):
1164 out, err, ret = command(args, **kwargs)
1165 if ret:
1166 cmd = args[0]
1167 error = subprocess.CalledProcessError(ret, cmd)
1168 error.output = out + err
1169 raise error
1170 return _bytes2str(out)
1171
1172
1173def get_conf(cluster, variable):
1174 """
1175 Get the value of the given configuration variable from the
1176 cluster.
1177
1178 :raises: Error if call to ceph-conf fails.
1179 :return: The variable value or None.
1180 """
1181 try:
1182 out, err, ret = command(
1183 [
1184 'ceph-conf',
1185 '--cluster={cluster}'.format(
1186 cluster=cluster,
1187 ),
1188 '--name=osd.',
1189 '--lookup',
1190 variable,
1191 ],
1192 close_fds=True,
1193 )
1194 except OSError as e:
1195 raise Error('error executing ceph-conf', e, err)
1196 if ret == 1:
1197 # config entry not found
1198 return None
1199 elif ret != 0:
1200 raise Error('getting variable from configuration failed')
1201 value = out.split('\n', 1)[0]
1202 # don't differentiate between "var=" and no var set
1203 if not value:
1204 return None
1205 return value
1206
1207
1208def get_conf_with_default(cluster, variable):
1209 """
1210 Get a config value that is known to the C++ code.
1211
1212 This will fail if called on variables that are not defined in
1213 common config options.
1214 """
1215 try:
1216 out = _check_output(
1217 args=[
1218 'ceph-osd',
1219 '--cluster={cluster}'.format(
1220 cluster=cluster,
1221 ),
1222 '--show-config-value={variable}'.format(
1223 variable=variable,
1224 ),
1225 ],
1226 close_fds=True,
1227 )
1228 except subprocess.CalledProcessError as e:
1229 raise Error(
1230 'getting variable from configuration failed',
1231 e,
1232 )
1233
1234 value = str(out).split('\n', 1)[0]
1235 return value
1236
1237
1238def get_fsid(cluster):
1239 """
1240 Get the fsid of the cluster.
1241
1242 :return: The fsid or raises Error.
1243 """
1244 fsid = get_conf_with_default(cluster=cluster, variable='fsid')
1245 if fsid is None:
1246 raise Error('getting cluster uuid from configuration failed')
1247 return fsid.lower()
1248
1249
1250def get_dmcrypt_key_path(
1251 _uuid,
1252 key_dir,
1253 luks
1254):
1255 """
1256 Get path to dmcrypt key file.
1257
1258 :return: Path to the dmcrypt key file, callers should check for existence.
1259 """
1260 if luks:
1261 path = os.path.join(key_dir, _uuid + ".luks.key")
1262 else:
1263 path = os.path.join(key_dir, _uuid)
1264
1265 return path
1266
1267
1268def get_dmcrypt_key(
1269 _uuid,
1270 key_dir,
1271 luks
1272):
1273 legacy_path = get_dmcrypt_key_path(_uuid, key_dir, luks)
1274 if os.path.exists(legacy_path):
1275 return (legacy_path,)
1276 path = os.path.join(STATEDIR, 'osd-lockbox', _uuid)
1277 if os.path.exists(path):
1278 mode = get_oneliner(path, 'key-management-mode')
1279 osd_uuid = get_oneliner(path, 'osd-uuid')
1280 ceph_fsid = read_one_line(path, 'ceph_fsid')
1281 if ceph_fsid is None:
d2e6a577
FG
1282 LOG.warning("no `ceph_fsid` found falling back to 'ceph' "
1283 "for cluster name")
1284 cluster = 'ceph'
1285 else:
1286 cluster = find_cluster_by_uuid(ceph_fsid)
1287 if cluster is None:
1288 raise Error('No cluster conf found in ' + SYSCONFDIR +
1289 ' with fsid %s' % ceph_fsid)
7c673cae
FG
1290
1291 if mode == KEY_MANAGEMENT_MODE_V1:
1292 key, stderr, ret = command(
1293 [
1294 'ceph',
1295 '--cluster', cluster,
1296 '--name',
1297 'client.osd-lockbox.' + osd_uuid,
1298 '--keyring',
1299 os.path.join(path, 'keyring'),
1300 'config-key',
1301 'get',
1302 'dm-crypt/osd/' + osd_uuid + '/luks',
1303 ],
1304 )
1305 LOG.debug("stderr " + stderr)
1306 assert ret == 0
1307 return base64.b64decode(key)
1308 else:
1309 raise Error('unknown key-management-mode ' + str(mode))
1310 raise Error('unable to read dm-crypt key', path, legacy_path)
1311
1312
1313def _dmcrypt_map(
1314 rawdev,
1315 key,
1316 _uuid,
1317 cryptsetup_parameters,
1318 luks,
1319 format_dev=False,
1320):
1321 dev = dmcrypt_is_mapped(_uuid)
1322 if dev:
1323 return dev
1324
1325 if isinstance(key, tuple):
1326 # legacy, before lockbox
1327 assert os.path.exists(key[0])
1328 keypath = key[0]
1329 key = None
1330 else:
1331 keypath = '-'
1332 dev = '/dev/mapper/' + _uuid
1333 luksFormat_args = [
1334 'cryptsetup',
1335 '--batch-mode',
1336 '--key-file',
1337 keypath,
1338 'luksFormat',
1339 rawdev,
1340 ] + cryptsetup_parameters
1341
1342 luksOpen_args = [
1343 'cryptsetup',
1344 '--key-file',
1345 keypath,
1346 'luksOpen',
1347 rawdev,
1348 _uuid,
1349 ]
1350
1351 create_args = [
1352 'cryptsetup',
1353 '--key-file',
1354 keypath,
1355 'create',
1356 _uuid,
1357 rawdev,
1358 ] + cryptsetup_parameters
1359
7c673cae
FG
1360 try:
1361 if luks:
1362 if format_dev:
c07f9fc5
FG
1363 command_with_stdin(luksFormat_args, key)
1364 command_with_stdin(luksOpen_args, key)
7c673cae
FG
1365 else:
1366 # Plain mode has no format function, nor any validation
1367 # that the key is correct.
c07f9fc5 1368 command_with_stdin(create_args, key)
7c673cae
FG
1369 # set proper ownership of mapped device
1370 command_check_call(['chown', 'ceph:ceph', dev])
1371 return dev
1372
1373 except subprocess.CalledProcessError as e:
1374 raise Error('unable to map device', rawdev, e)
1375
1376
1377def dmcrypt_unmap(
1378 _uuid
1379):
1380 if not os.path.exists('/dev/mapper/' + _uuid):
1381 return
1382 retries = 0
1383 while True:
1384 try:
1385 command_check_call(['cryptsetup', 'remove', _uuid])
1386 break
1387 except subprocess.CalledProcessError as e:
1388 if retries == 10:
1389 raise Error('unable to unmap device', _uuid, e)
1390 else:
1391 time.sleep(0.5 + retries * 1.0)
1392 retries += 1
1393
1394
1395def mount(
1396 dev,
1397 fstype,
1398 options,
1399):
1400 """
1401 Mounts a device with given filessystem type and
1402 mount options to a tempfile path under /var/lib/ceph/tmp.
1403 """
1404 # sanity check: none of the arguments are None
1405 if dev is None:
1406 raise ValueError('dev may not be None')
1407 if fstype is None:
1408 raise ValueError('fstype may not be None')
1409
1410 # pick best-of-breed mount options based on fs type
1411 if options is None:
1412 options = MOUNT_OPTIONS.get(fstype, '')
1413
1414 myTemp = STATEDIR + '/tmp'
1415 # mkdtemp expect 'dir' to be existing on the system
1416 # Let's be sure it's always the case
1417 if not os.path.exists(myTemp):
1418 os.makedirs(myTemp)
1419
1420 # mount
1421 path = tempfile.mkdtemp(
1422 prefix='mnt.',
1423 dir=myTemp,
1424 )
1425 try:
1426 LOG.debug('Mounting %s on %s with options %s', dev, path, options)
1427 command_check_call(
1428 [
1429 'mount',
1430 '-t', fstype,
1431 '-o', options,
1432 '--',
1433 dev,
1434 path,
1435 ],
1436 )
1437 if which('restorecon'):
1438 command(
1439 [
1440 'restorecon',
1441 path,
1442 ],
1443 )
1444 except subprocess.CalledProcessError as e:
1445 try:
1446 os.rmdir(path)
1447 except (OSError, IOError):
1448 pass
1449 raise MountError(e)
1450
1451 return path
1452
1453
1454def unmount(
1455 path,
d2e6a577 1456 do_rm=True,
7c673cae
FG
1457):
1458 """
1459 Unmount and removes the given mount point.
1460 """
1461 retries = 0
1462 while True:
1463 try:
1464 LOG.debug('Unmounting %s', path)
1465 command_check_call(
1466 [
1467 '/bin/umount',
1468 '--',
1469 path,
1470 ],
1471 )
1472 break
1473 except subprocess.CalledProcessError as e:
1474 # on failure, retry 3 times with incremental backoff
1475 if retries == 3:
1476 raise UnmountError(e)
1477 else:
1478 time.sleep(0.5 + retries * 1.0)
1479 retries += 1
d2e6a577
FG
1480 if not do_rm:
1481 return
7c673cae
FG
1482 os.rmdir(path)
1483
1484
1485###########################################
1486
1487def extract_parted_partition_numbers(partitions):
1488 numbers_as_strings = re.findall('^\d+', partitions, re.MULTILINE)
1489 return map(int, numbers_as_strings)
1490
1491
1492def get_free_partition_index(dev):
1493 """
1494 Get the next free partition index on a given device.
1495
1496 :return: Index number (> 1 if there is already a partition on the device)
1497 or 1 if there is no partition table.
1498 """
1499 try:
1500 lines = _check_output(
1501 args=[
1502 'parted',
1503 '--machine',
1504 '--',
1505 dev,
1506 'print',
1507 ],
1508 )
1509 except subprocess.CalledProcessError as e:
1510 LOG.info('cannot read partition index; assume it '
1511 'isn\'t present\n (Error: %s)' % e)
1512 return 1
1513
1514 if not lines:
1515 raise Error('parted failed to output anything')
1516 LOG.debug('get_free_partition_index: analyzing ' + lines)
1517 if ('CHS;' not in lines and
1518 'CYL;' not in lines and
1519 'BYT;' not in lines):
1520 raise Error('parted output expected to contain one of ' +
1521 'CHH; CYL; or BYT; : ' + lines)
1522 if os.path.realpath(dev) not in lines:
1523 raise Error('parted output expected to contain ' + dev + ': ' + lines)
1524 _, partitions = lines.split(os.path.realpath(dev))
1525 partition_numbers = extract_parted_partition_numbers(partitions)
1526 if partition_numbers:
1527 return max(partition_numbers) + 1
1528 else:
1529 return 1
1530
1531
1532def check_journal_reqs(args):
1533 _, _, allows_journal = command([
1534 'ceph-osd', '--check-allows-journal',
1535 '-i', '0',
1536 '--log-file', '$run_dir/$cluster-osd-check.log',
1537 '--cluster', args.cluster,
1538 '--setuser', get_ceph_user(),
1539 '--setgroup', get_ceph_group(),
1540 ])
1541 _, _, wants_journal = command([
1542 'ceph-osd', '--check-wants-journal',
1543 '-i', '0',
1544 '--log-file', '$run_dir/$cluster-osd-check.log',
1545 '--cluster', args.cluster,
1546 '--setuser', get_ceph_user(),
1547 '--setgroup', get_ceph_group(),
1548 ])
1549 _, _, needs_journal = command([
1550 'ceph-osd', '--check-needs-journal',
1551 '-i', '0',
1552 '--log-file', '$run_dir/$cluster-osd-check.log',
1553 '--cluster', args.cluster,
1554 '--setuser', get_ceph_user(),
1555 '--setgroup', get_ceph_group(),
1556 ])
1557 return (not allows_journal, not wants_journal, not needs_journal)
1558
1559
1560def update_partition(dev, description):
1561 """
1562 Must be called after modifying a partition table so the kernel
1563 know about the change and fire udev events accordingly. A side
1564 effect of partprobe is to remove partitions and add them again.
1565 The first udevadm settle waits for ongoing udev events to
1566 complete, just in case one of them rely on an existing partition
1567 on dev. The second udevadm settle guarantees to the caller that
1568 all udev events related to the partition table change have been
1569 processed, i.e. the 95-ceph-osd.rules actions and mode changes,
1570 group changes etc. are complete.
1571 """
1572 LOG.debug('Calling partprobe on %s device %s', description, dev)
1573 partprobe_ok = False
1574 error = 'unknown error'
1575 partprobe = _get_command_executable(['partprobe'])[0]
1576 for i in range(5):
1577 command_check_call(['udevadm', 'settle', '--timeout=600'])
1578 try:
1579 _check_output(['flock', '-s', dev, partprobe, dev])
1580 partprobe_ok = True
1581 break
1582 except subprocess.CalledProcessError as e:
1583 error = e.output
1584 if ('unable to inform the kernel' not in error and
1585 'Device or resource busy' not in error):
1586 raise
1587 LOG.debug('partprobe %s failed : %s (ignored, waiting 60s)'
1588 % (dev, error))
1589 time.sleep(60)
1590 if not partprobe_ok:
1591 raise Error('partprobe %s failed : %s' % (dev, error))
1592 command_check_call(['udevadm', 'settle', '--timeout=600'])
1593
1594
224ce89b 1595def zap_linux(dev):
7c673cae
FG
1596 try:
1597 # Thoroughly wipe all partitions of any traces of
1598 # Filesystems or OSD Journals
1599 #
1600 # In addition we need to write 10M of data to each partition
1601 # to make sure that after re-creating the same partition
1602 # there is no trace left of any previous Filesystem or OSD
1603 # Journal
1604
1605 LOG.debug('Writing zeros to existing partitions on %s', dev)
1606
1607 for partname in list_partitions(dev):
1608 partition = get_dev_path(partname)
1609 command_check_call(
1610 [
1611 'wipefs',
1612 '--all',
1613 partition,
1614 ],
1615 )
1616
1617 command_check_call(
1618 [
1619 'dd',
1620 'if=/dev/zero',
1621 'of={path}'.format(path=partition),
1622 'bs=1M',
1623 'count=10',
1624 ],
1625 )
1626
1627 LOG.debug('Zapping partition table on %s', dev)
1628
1629 # try to wipe out any GPT partition table backups. sgdisk
1630 # isn't too thorough.
1631 lba_size = 4096
1632 size = 33 * lba_size
1633 with open(dev, 'wb') as dev_file:
1634 dev_file.seek(-size, os.SEEK_END)
1635 dev_file.write(size * b'\0')
1636
1637 command_check_call(
1638 [
1639 'sgdisk',
1640 '--zap-all',
1641 '--',
1642 dev,
1643 ],
1644 )
1645 command_check_call(
1646 [
1647 'sgdisk',
1648 '--clear',
1649 '--mbrtogpt',
1650 '--',
1651 dev,
1652 ],
1653 )
7c673cae
FG
1654 update_partition(dev, 'zapped')
1655
1656 except subprocess.CalledProcessError as e:
1657 raise Error(e)
1658
1659
224ce89b
WB
1660def zap_freebsd(dev):
1661 try:
1662 # For FreeBSD we just need to zap the partition.
1663 command_check_call(
1664 [
1665 'gpart',
1666 'destroy',
1667 '-F',
1668 dev,
1669 ],
1670 )
1671
1672 except subprocess.CalledProcessError as e:
1673 raise Error(e)
1674
1675
1676def zap(dev):
1677 """
1678 Destroy the partition table and content of a given disk.
1679 """
1680 dev = os.path.realpath(dev)
1681 dmode = os.stat(dev).st_mode
1682 if not stat.S_ISBLK(dmode) or is_partition(dev):
1683 raise Error('not full block device; cannot zap', dev)
1684 if FREEBSD:
1685 zap_freebsd(dev)
1686 else:
1687 zap_linux(dev)
1688
1689
7c673cae
FG
1690def adjust_symlink(target, path):
1691 create = True
1692 if os.path.lexists(path):
1693 try:
1694 mode = os.lstat(path).st_mode
1695 if stat.S_ISREG(mode):
1696 LOG.debug('Removing old file %s', path)
1697 os.unlink(path)
1698 elif stat.S_ISLNK(mode):
1699 old = os.readlink(path)
1700 if old != target:
1701 LOG.debug('Removing old symlink %s -> %s', path, old)
1702 os.unlink(path)
1703 else:
1704 create = False
1705 except:
1706 raise Error('unable to remove (or adjust) old file (symlink)',
1707 path)
1708 if create:
1709 LOG.debug('Creating symlink %s -> %s', path, target)
1710 try:
1711 os.symlink(target, path)
1712 except:
1713 raise Error('unable to create symlink %s -> %s' % (path, target))
1714
1715
1716def get_mount_options(cluster, fs_type):
1717 mount_options = get_conf(
1718 cluster,
1719 variable='osd_mount_options_{fstype}'.format(
1720 fstype=fs_type,
1721 ),
1722 )
1723 if mount_options is None:
1724 mount_options = get_conf(
1725 cluster,
1726 variable='osd_fs_mount_options_{fstype}'.format(
1727 fstype=fs_type,
1728 ),
1729 )
1730 else:
1731 # remove whitespaces
1732 mount_options = "".join(mount_options.split())
1733 return mount_options
1734
1735
1736class Device(object):
1737
1738 def __init__(self, path, args):
1739 self.args = args
1740 self.path = path
1741 self.dev_size = None
1742 self.partitions = {}
1743 self.ptype_map = None
1744 assert not is_partition(self.path)
1745
1746 def create_partition(self, uuid, name, size=0, num=0):
1747 ptype = self.ptype_tobe_for_name(name)
1748 if num == 0:
1749 num = get_free_partition_index(dev=self.path)
1750 if size > 0:
1751 new = '--new={num}:0:+{size}M'.format(num=num, size=size)
1752 if size > self.get_dev_size():
1753 LOG.error('refusing to create %s on %s' % (name, self.path))
1754 LOG.error('%s size (%sM) is bigger than device (%sM)'
1755 % (name, size, self.get_dev_size()))
1756 raise Error('%s device size (%sM) is not big enough for %s'
1757 % (self.path, self.get_dev_size(), name))
1758 else:
1759 new = '--largest-new={num}'.format(num=num)
1760
1761 LOG.debug('Creating %s partition num %d size %d on %s',
1762 name, num, size, self.path)
1763 command_check_call(
1764 [
1765 'sgdisk',
1766 new,
1767 '--change-name={num}:ceph {name}'.format(num=num, name=name),
1768 '--partition-guid={num}:{uuid}'.format(num=num, uuid=uuid),
1769 '--typecode={num}:{uuid}'.format(num=num, uuid=ptype),
1770 '--mbrtogpt',
1771 '--',
1772 self.path,
1773 ],
1774 exit=True
1775 )
1776 update_partition(self.path, 'created')
1777 return num
1778
1779 def ptype_tobe_for_name(self, name):
1780 LOG.debug("name = " + name)
1781 if name == 'data':
1782 name = 'osd'
1783 if name == 'lockbox':
1784 if is_mpath(self.path):
1785 return PTYPE['mpath']['lockbox']['tobe']
1786 else:
1787 return PTYPE['regular']['lockbox']['tobe']
1788 if self.ptype_map is None:
1789 partition = DevicePartition.factory(
1790 path=self.path, dev=None, args=self.args)
1791 self.ptype_map = partition.ptype_map
1792 return self.ptype_map[name]['tobe']
1793
1794 def get_partition(self, num):
1795 if num not in self.partitions:
1796 dev = get_partition_dev(self.path, num)
1797 partition = DevicePartition.factory(
1798 path=self.path, dev=dev, args=self.args)
1799 partition.set_partition_number(num)
1800 self.partitions[num] = partition
1801 return self.partitions[num]
1802
1803 def get_dev_size(self):
1804 if self.dev_size is None:
1805 self.dev_size = get_dev_size(self.path)
1806 return self.dev_size
1807
1808 @staticmethod
1809 def factory(path, args):
1810 return Device(path, args)
1811
1812
1813class DevicePartition(object):
1814
1815 def __init__(self, args):
1816 self.args = args
1817 self.num = None
1818 self.rawdev = None
1819 self.dev = None
1820 self.uuid = None
1821 self.ptype_map = None
1822 self.ptype = None
1823 self.set_variables_ptype()
1824
1825 def get_uuid(self):
1826 if self.uuid is None:
1827 self.uuid = get_partition_uuid(self.rawdev)
1828 return self.uuid
1829
1830 def get_ptype(self):
1831 if self.ptype is None:
1832 self.ptype = get_partition_type(self.rawdev)
1833 return self.ptype
1834
1835 def set_partition_number(self, num):
1836 self.num = num
1837
1838 def get_partition_number(self):
1839 return self.num
1840
1841 def set_dev(self, dev):
1842 self.dev = dev
1843 self.rawdev = dev
1844
1845 def get_dev(self):
1846 return self.dev
1847
1848 def get_rawdev(self):
1849 return self.rawdev
1850
1851 def set_variables_ptype(self):
1852 self.ptype_map = PTYPE['regular']
1853
1854 def ptype_for_name(self, name):
1855 return self.ptype_map[name]['ready']
1856
1857 @staticmethod
1858 def factory(path, dev, args):
1859 dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
1860 if ((path is not None and is_mpath(path)) or
1861 (dev is not None and is_mpath(dev))):
1862 partition = DevicePartitionMultipath(args)
1863 elif dmcrypt_type == 'luks':
1864 partition = DevicePartitionCryptLuks(args)
1865 elif dmcrypt_type == 'plain':
1866 partition = DevicePartitionCryptPlain(args)
1867 else:
1868 partition = DevicePartition(args)
1869 partition.set_dev(dev)
1870 return partition
1871
1872
1873class DevicePartitionMultipath(DevicePartition):
1874
1875 def set_variables_ptype(self):
1876 self.ptype_map = PTYPE['mpath']
1877
1878
1879class DevicePartitionCrypt(DevicePartition):
1880
1881 def __init__(self, args):
1882 super(DevicePartitionCrypt, self).__init__(args)
1883 self.osd_dm_key = None
1884 self.cryptsetup_parameters = CryptHelpers.get_cryptsetup_parameters(
1885 self.args)
1886 self.dmcrypt_type = CryptHelpers.get_dmcrypt_type(self.args)
1887 self.dmcrypt_keysize = CryptHelpers.get_dmcrypt_keysize(self.args)
1888
1889 def setup_crypt(self):
1890 pass
1891
1892 def map(self):
1893 self.setup_crypt()
1894 self.dev = _dmcrypt_map(
1895 rawdev=self.rawdev,
1896 key=self.osd_dm_key,
1897 _uuid=self.get_uuid(),
1898 cryptsetup_parameters=self.cryptsetup_parameters,
1899 luks=self.luks(),
1900 format_dev=True,
1901 )
1902
1903 def unmap(self):
1904 self.setup_crypt()
1905 dmcrypt_unmap(self.get_uuid())
1906 self.dev = self.rawdev
1907
1908 def format(self):
1909 self.setup_crypt()
1910 self.map()
1911
1912
1913class DevicePartitionCryptPlain(DevicePartitionCrypt):
1914
1915 def luks(self):
1916 return False
1917
1918 def setup_crypt(self):
1919 if self.osd_dm_key is not None:
1920 return
1921
1922 self.cryptsetup_parameters += ['--key-size', str(self.dmcrypt_keysize)]
1923
1924 self.osd_dm_key = get_dmcrypt_key(
1925 self.get_uuid(), self.args.dmcrypt_key_dir,
1926 False)
1927
1928 def set_variables_ptype(self):
1929 self.ptype_map = PTYPE['plain']
1930
1931
1932class DevicePartitionCryptLuks(DevicePartitionCrypt):
1933
1934 def luks(self):
1935 return True
1936
1937 def setup_crypt(self):
1938 if self.osd_dm_key is not None:
1939 return
1940
1941 if self.dmcrypt_keysize == 1024:
1942 # We don't force this into the cryptsetup_parameters,
1943 # as we want the cryptsetup defaults
1944 # to prevail for the actual LUKS key lengths.
1945 pass
1946 else:
1947 self.cryptsetup_parameters += ['--key-size',
1948 str(self.dmcrypt_keysize)]
1949
1950 self.osd_dm_key = get_dmcrypt_key(
1951 self.get_uuid(), self.args.dmcrypt_key_dir,
1952 True)
1953
1954 def set_variables_ptype(self):
1955 self.ptype_map = PTYPE['luks']
1956
1957
1958class Prepare(object):
1959
1960 def __init__(self, args):
1961 self.args = args
1962
1963 @staticmethod
1964 def parser():
1965 parser = argparse.ArgumentParser(add_help=False)
1966 parser.add_argument(
1967 '--cluster',
1968 metavar='NAME',
1969 default='ceph',
1970 help='cluster name to assign this disk to',
1971 )
1972 parser.add_argument(
1973 '--cluster-uuid',
1974 metavar='UUID',
1975 help='cluster uuid to assign this disk to',
1976 )
1977 parser.add_argument(
1978 '--osd-uuid',
1979 metavar='UUID',
1980 help='unique OSD uuid to assign this disk to',
1981 )
c07f9fc5
FG
1982 parser.add_argument(
1983 '--osd-id',
1984 metavar='ID',
1985 help='unique OSD id to assign this disk to',
1986 )
7c673cae
FG
1987 parser.add_argument(
1988 '--crush-device-class',
1989 help='crush device class to assign this disk to',
1990 )
1991 parser.add_argument(
1992 '--dmcrypt',
1993 action='store_true', default=None,
1994 help='encrypt DATA and/or JOURNAL devices with dm-crypt',
1995 )
1996 parser.add_argument(
1997 '--dmcrypt-key-dir',
1998 metavar='KEYDIR',
1999 default='/etc/ceph/dmcrypt-keys',
2000 help='directory where dm-crypt keys are stored',
2001 )
2002 parser.add_argument(
2003 '--prepare-key',
2004 metavar='PATH',
2005 help='bootstrap-osd keyring path template (%(default)s)',
2006 default='{statedir}/bootstrap-osd/{cluster}.keyring',
2007 dest='prepare_key_template',
2008 )
2009 parser.add_argument(
2010 '--no-locking',
2011 action='store_true', default=None,
2012 help='let many prepare\'s run in parallel',
2013 )
2014 return parser
2015
2016 @staticmethod
2017 def set_subparser(subparsers):
2018 parents = [
2019 Prepare.parser(),
2020 PrepareData.parser(),
2021 Lockbox.parser(),
2022 ]
2023 parents.extend(PrepareFilestore.parent_parsers())
2024 parents.extend(PrepareBluestore.parent_parsers())
2025 parser = subparsers.add_parser(
2026 'prepare',
2027 parents=parents,
2028 formatter_class=argparse.RawDescriptionHelpFormatter,
2029 description=textwrap.fill(textwrap.dedent("""\
2030 If the --bluestore argument is given, a bluestore objectstore
31f18b77
FG
2031 will be created. If --filestore is provided, a legacy FileStore
2032 objectstore will be created. If neither is specified, we default
2033 to BlueStore.
7c673cae
FG
2034
2035 When an entire device is prepared for bluestore, two
2036 partitions are created. The first partition is for metadata,
2037 the second partition is for blocks that contain data.
2038
2039 Unless explicitly specified with --block.db or
2040 --block.wal, the bluestore DB and WAL data is stored on
2041 the main block device. For instance:
2042
2043 ceph-disk prepare --bluestore /dev/sdc
2044
2045 Will create
2046
2047 /dev/sdc1 for osd metadata
2048 /dev/sdc2 for block, db, and wal data (the rest of the disk)
2049
2050
2051 If either --block.db or --block.wal are specified to be
2052 the same whole device, they will be created as partition
2053 three and four respectively. For instance:
2054
2055 ceph-disk prepare --bluestore \\
2056 --block.db /dev/sdc \\
2057 --block.wal /dev/sdc \\
2058 /dev/sdc
2059
2060 Will create
2061
2062 /dev/sdc1 for osd metadata
2063 /dev/sdc2 for block (the rest of the disk)
2064 /dev/sdc3 for db
2065 /dev/sdc4 for wal
2066
2067 """)),
2068 help='Prepare a directory or disk for a Ceph OSD',
2069 )
2070 parser.set_defaults(
2071 func=Prepare.main,
2072 )
2073 return parser
2074
2075 def prepare(self):
2076 if self.args.no_locking:
2077 self._prepare()
2078 else:
2079 with prepare_lock:
2080 self._prepare()
2081
2082 @staticmethod
2083 def factory(args):
2084 if args.bluestore:
2085 return PrepareBluestore(args)
2086 else:
2087 return PrepareFilestore(args)
2088
2089 @staticmethod
2090 def main(args):
2091 Prepare.factory(args).prepare()
2092
2093
2094class PrepareFilestore(Prepare):
2095
2096 def __init__(self, args):
2097 super(PrepareFilestore, self).__init__(args)
2098 if args.dmcrypt:
2099 self.lockbox = Lockbox(args)
2100 self.data = PrepareFilestoreData(args)
2101 self.journal = PrepareJournal(args)
2102
2103 @staticmethod
2104 def parent_parsers():
2105 return [
2106 PrepareJournal.parser(),
2107 ]
2108
2109 def _prepare(self):
2110 if self.data.args.dmcrypt:
2111 self.lockbox.prepare()
2112 self.data.prepare(self.journal)
2113
2114
2115class PrepareBluestore(Prepare):
2116
2117 def __init__(self, args):
2118 super(PrepareBluestore, self).__init__(args)
2119 if args.dmcrypt:
2120 self.lockbox = Lockbox(args)
2121 self.data = PrepareBluestoreData(args)
2122 self.block = PrepareBluestoreBlock(args)
2123 self.blockdb = PrepareBluestoreBlockDB(args)
2124 self.blockwal = PrepareBluestoreBlockWAL(args)
2125
2126 @staticmethod
2127 def parser():
2128 parser = argparse.ArgumentParser(add_help=False)
2129 parser.add_argument(
2130 '--bluestore',
31f18b77
FG
2131 dest='bluestore',
2132 action='store_true', default=True,
7c673cae
FG
2133 help='bluestore objectstore',
2134 )
31f18b77
FG
2135 parser.add_argument(
2136 '--filestore',
2137 dest='bluestore',
2138 action='store_false',
2139 help='filestore objectstore',
2140 )
7c673cae
FG
2141 return parser
2142
2143 @staticmethod
2144 def parent_parsers():
2145 return [
2146 PrepareBluestore.parser(),
2147 PrepareBluestoreBlock.parser(),
2148 PrepareBluestoreBlockDB.parser(),
2149 PrepareBluestoreBlockWAL.parser(),
2150 ]
2151
2152 def _prepare(self):
2153 if self.data.args.dmcrypt:
2154 self.lockbox.prepare()
2155 to_prepare_list = []
2156 if getattr(self.data.args, 'block.db'):
2157 to_prepare_list.append(self.blockdb)
2158 if getattr(self.data.args, 'block.wal'):
2159 to_prepare_list.append(self.blockwal)
2160 to_prepare_list.append(self.block)
2161 self.data.prepare(*to_prepare_list)
2162
2163
2164class Space(object):
2165
2166 NAMES = ('block', 'journal', 'block.db', 'block.wal')
2167
2168
2169class PrepareSpace(object):
2170
2171 NONE = 0
2172 FILE = 1
2173 DEVICE = 2
2174
2175 def __init__(self, args):
2176 self.args = args
2177 self.set_type()
2178 self.space_size = self.get_space_size()
2179 if getattr(self.args, self.name + '_uuid') is None:
2180 setattr(self.args, self.name + '_uuid', str(uuid.uuid4()))
2181 self.space_symlink = None
2182 self.space_dmcrypt = None
2183
2184 def set_type(self):
2185 name = self.name
2186 args = self.args
7c673cae 2187 if (self.wants_space() and
224ce89b 2188 dev_is_diskdevice(args.data) and
7c673cae
FG
2189 not is_partition(args.data) and
2190 getattr(args, name) is None and
2191 getattr(args, name + '_file') is None):
2192 LOG.info('Will colocate %s with data on %s',
2193 name, args.data)
2194 setattr(args, name, args.data)
2195
2196 if getattr(args, name) is None:
2197 if getattr(args, name + '_dev'):
2198 raise Error('%s is unspecified; not a block device' %
2199 name.capitalize(), getattr(args, name))
2200 self.type = self.NONE
2201 return
2202
2203 if not os.path.exists(getattr(args, name)):
2204 if getattr(args, name + '_dev'):
2205 raise Error('%s does not exist; not a block device' %
2206 name.capitalize(), getattr(args, name))
2207 self.type = self.FILE
2208 return
2209
2210 mode = os.stat(getattr(args, name)).st_mode
224ce89b 2211 if stmode_is_diskdevice(mode):
7c673cae
FG
2212 if getattr(args, name + '_file'):
2213 raise Error('%s is not a regular file' % name.capitalize,
2214 getattr(args, name))
2215 self.type = self.DEVICE
2216 return
2217
2218 if stat.S_ISREG(mode):
2219 if getattr(args, name + '_dev'):
2220 raise Error('%s is not a block device' % name.capitalize,
2221 getattr(args, name))
2222 self.type = self.FILE
2223 return
2224
2225 raise Error('%s %s is neither a block device nor regular file' %
2226 (name.capitalize, getattr(args, name)))
2227
2228 def is_none(self):
2229 return self.type == self.NONE
2230
2231 def is_file(self):
2232 return self.type == self.FILE
2233
2234 def is_device(self):
2235 return self.type == self.DEVICE
2236
2237 @staticmethod
2238 def parser(name, positional=True):
2239 parser = argparse.ArgumentParser(add_help=False)
2240 parser.add_argument(
2241 '--%s-uuid' % name,
2242 metavar='UUID',
2243 help='unique uuid to assign to the %s' % name,
2244 )
2245 parser.add_argument(
2246 '--%s-file' % name,
2247 action='store_true', default=None,
2248 help='verify that %s is a file' % name.upper(),
2249 )
2250 parser.add_argument(
2251 '--%s-dev' % name,
2252 action='store_true', default=None,
2253 help='verify that %s is a block device' % name.upper(),
2254 )
2255
2256 if positional:
2257 parser.add_argument(
2258 name,
2259 metavar=name.upper(),
2260 nargs='?',
2261 help=('path to OSD %s disk block device;' % name +
2262 ' leave out to store %s in file' % name),
2263 )
2264 return parser
2265
2266 def wants_space(self):
2267 return True
2268
2269 def populate_data_path(self, path):
2270 if self.type == self.DEVICE:
2271 self.populate_data_path_device(path)
2272 elif self.type == self.FILE:
2273 self.populate_data_path_file(path)
2274 elif self.type == self.NONE:
2275 pass
2276 else:
2277 raise Error('unexpected type ', self.type)
2278
2279 def populate_data_path_file(self, path):
2280 space_uuid = self.name + '_uuid'
2281 if getattr(self.args, space_uuid) is not None:
2282 write_one_line(path, space_uuid,
2283 getattr(self.args, space_uuid))
2284 if self.space_symlink is not None:
2285 adjust_symlink(self.space_symlink,
2286 os.path.join(path, self.name))
2287
2288 def populate_data_path_device(self, path):
2289 self.populate_data_path_file(path)
2290
2291 if self.space_dmcrypt is not None:
2292 adjust_symlink(self.space_dmcrypt,
2293 os.path.join(path, self.name + '_dmcrypt'))
2294 else:
2295 try:
2296 os.unlink(os.path.join(path, self.name + '_dmcrypt'))
2297 except OSError:
2298 pass
2299
2300 def prepare(self):
2301 if self.type == self.DEVICE:
2302 self.prepare_device()
2303 elif self.type == self.FILE:
2304 self.prepare_file()
2305 elif self.type == self.NONE:
2306 pass
2307 else:
2308 raise Error('unexpected type ', self.type)
2309
2310 def prepare_file(self):
2311 space_filename = getattr(self.args, self.name)
2312 if not os.path.exists(space_filename):
2313 LOG.debug('Creating %s file %s with size 0'
2314 ' (ceph-osd will resize and allocate)',
2315 self.name,
2316 space_filename)
2317 space_file = open(space_filename, 'wb')
2318 space_file.close()
2319 path_set_context(space_filename)
2320
2321 LOG.debug('%s is file %s',
2322 self.name.capitalize(),
2323 space_filename)
2324 LOG.warning('OSD will not be hot-swappable if %s is '
2325 'not the same device as the osd data' %
2326 self.name)
2327 self.space_symlink = space_filename
2328
2329 def prepare_device(self):
2330 reusing_partition = False
2331
2332 if is_partition(getattr(self.args, self.name)):
2333 LOG.debug('%s %s is a partition',
2334 self.name.capitalize(), getattr(self.args, self.name))
2335 partition = DevicePartition.factory(
2336 path=None, dev=getattr(self.args, self.name), args=self.args)
2337 if isinstance(partition, DevicePartitionCrypt):
2338 raise Error(getattr(self.args, self.name) +
2339 ' partition already exists'
2340 ' and --dmcrypt specified')
2341 LOG.warning('OSD will not be hot-swappable' +
2342 ' if ' + self.name + ' is not' +
2343 ' the same device as the osd data')
2344 if partition.get_ptype() == partition.ptype_for_name(self.name):
2345 LOG.debug('%s %s was previously prepared with '
2346 'ceph-disk. Reusing it.',
2347 self.name.capitalize(),
2348 getattr(self.args, self.name))
2349 reusing_partition = True
2350 # Read and reuse the partition uuid from this journal's
2351 # previous life. We reuse the uuid instead of changing it
2352 # because udev does not reliably notice changes to an
2353 # existing partition's GUID. See
2354 # http://tracker.ceph.com/issues/10146
2355 setattr(self.args, self.name + '_uuid', partition.get_uuid())
2356 LOG.debug('Reusing %s with uuid %s',
2357 self.name,
2358 getattr(self.args, self.name + '_uuid'))
2359 else:
2360 LOG.warning('%s %s was not prepared with '
2361 'ceph-disk. Symlinking directly.',
2362 self.name.capitalize(),
2363 getattr(self.args, self.name))
2364 self.space_symlink = getattr(self.args, self.name)
2365 return
2366
2367 self.space_symlink = '/dev/disk/by-partuuid/{uuid}'.format(
2368 uuid=getattr(self.args, self.name + '_uuid'))
2369
2370 if self.args.dmcrypt:
2371 self.space_dmcrypt = self.space_symlink
2372 self.space_symlink = '/dev/mapper/{uuid}'.format(
2373 uuid=getattr(self.args, self.name + '_uuid'))
2374
2375 if reusing_partition:
2376 # confirm that the space_symlink exists. It should since
2377 # this was an active space
2378 # in the past. Continuing otherwise would be futile.
2379 assert os.path.exists(self.space_symlink)
2380 return
2381
2382 num = self.desired_partition_number()
2383
2384 if num == 0:
2385 LOG.warning('OSD will not be hot-swappable if %s '
2386 'is not the same device as the osd data',
2387 self.name)
2388
2389 device = Device.factory(getattr(self.args, self.name), self.args)
2390 num = device.create_partition(
2391 uuid=getattr(self.args, self.name + '_uuid'),
2392 name=self.name,
2393 size=self.space_size,
2394 num=num)
2395
2396 partition = device.get_partition(num)
2397
2398 LOG.debug('%s is GPT partition %s',
2399 self.name.capitalize(),
2400 self.space_symlink)
2401
2402 if isinstance(partition, DevicePartitionCrypt):
2403 partition.format()
2404 partition.map()
2405
2406 command_check_call(
2407 [
2408 'sgdisk',
2409 '--typecode={num}:{uuid}'.format(
2410 num=num,
2411 uuid=partition.ptype_for_name(self.name),
2412 ),
2413 '--',
2414 getattr(self.args, self.name),
2415 ],
2416 )
2417 update_partition(getattr(self.args, self.name), 'prepared')
2418
2419 LOG.debug('%s is GPT partition %s',
2420 self.name.capitalize(),
2421 self.space_symlink)
2422
2423
2424class PrepareJournal(PrepareSpace):
2425
2426 def __init__(self, args):
2427 self.name = 'journal'
2428 (self.allows_journal,
2429 self.wants_journal,
2430 self.needs_journal) = check_journal_reqs(args)
2431
2432 if args.journal and not self.allows_journal:
2433 raise Error('journal specified but not allowed by osd backend')
2434
2435 super(PrepareJournal, self).__init__(args)
2436
2437 def wants_space(self):
2438 return self.wants_journal
2439
2440 def get_space_size(self):
2441 return int(get_conf_with_default(
2442 cluster=self.args.cluster,
2443 variable='osd_journal_size',
2444 ))
2445
2446 def desired_partition_number(self):
2447 if self.args.journal == self.args.data:
2448 # we're sharing the disk between osd data and journal;
2449 # make journal be partition number 2
2450 num = 2
2451 else:
2452 num = 0
2453 return num
2454
2455 @staticmethod
2456 def parser():
2457 return PrepareSpace.parser('journal')
2458
2459
2460class PrepareBluestoreBlock(PrepareSpace):
2461
2462 def __init__(self, args):
2463 self.name = 'block'
2464 super(PrepareBluestoreBlock, self).__init__(args)
2465
2466 def get_space_size(self):
2467 block_size = get_conf(
2468 cluster=self.args.cluster,
2469 variable='bluestore_block_size',
2470 )
2471
2472 if block_size is None:
2473 return 0 # get as much space as possible
2474 else:
2475 return int(block_size) / 1048576 # MB
2476
2477 def desired_partition_number(self):
2478 if self.args.block == self.args.data:
2479 num = 2
2480 else:
2481 num = 0
2482 return num
2483
2484 @staticmethod
2485 def parser():
2486 return PrepareSpace.parser('block')
2487
2488
2489class PrepareBluestoreBlockDB(PrepareSpace):
2490
2491 def __init__(self, args):
2492 self.name = 'block.db'
2493 super(PrepareBluestoreBlockDB, self).__init__(args)
2494
2495 def get_space_size(self):
31f18b77 2496 block_db_size = get_conf(
7c673cae
FG
2497 cluster=self.args.cluster,
2498 variable='bluestore_block_db_size',
2499 )
2500
31f18b77
FG
2501 if block_db_size is None or int(block_db_size) == 0:
2502 block_size = get_conf(
2503 cluster=self.args.cluster,
2504 variable='bluestore_block_size',
2505 )
2506 if block_size is None:
2507 return 1024 # MB
2508 size = int(block_size) / 100 / 1048576
2509 return max(size, 1024) # MB
7c673cae 2510 else:
31f18b77 2511 return int(block_db_size) / 1048576 # MB
7c673cae
FG
2512
2513 def desired_partition_number(self):
2514 if getattr(self.args, 'block.db') == self.args.data:
2515 num = 3
2516 else:
2517 num = 0
2518 return num
2519
2520 def wants_space(self):
2521 return False
2522
2523 @staticmethod
2524 def parser():
2525 parser = PrepareSpace.parser('block.db', positional=False)
2526 parser.add_argument(
2527 '--block.db',
2528 metavar='BLOCKDB',
2529 help='path to the device or file for bluestore block.db',
2530 )
2531 return parser
2532
2533
2534class PrepareBluestoreBlockWAL(PrepareSpace):
2535
2536 def __init__(self, args):
2537 self.name = 'block.wal'
2538 super(PrepareBluestoreBlockWAL, self).__init__(args)
2539
2540 def get_space_size(self):
2541 block_size = get_conf(
2542 cluster=self.args.cluster,
2543 variable='bluestore_block_wal_size',
2544 )
2545
2546 if block_size is None:
2547 return 576 # MB, default value
2548 else:
2549 return int(block_size) / 1048576 # MB
2550
2551 def desired_partition_number(self):
2552 if getattr(self.args, 'block.wal') == self.args.data:
2553 num = 4
2554 else:
2555 num = 0
2556 return num
2557
2558 def wants_space(self):
2559 return False
2560
2561 @staticmethod
2562 def parser():
2563 parser = PrepareSpace.parser('block.wal', positional=False)
2564 parser.add_argument(
2565 '--block.wal',
2566 metavar='BLOCKWAL',
2567 help='path to the device or file for bluestore block.wal',
2568 )
2569 return parser
2570
2571
2572class CryptHelpers(object):
2573
2574 @staticmethod
2575 def get_cryptsetup_parameters(args):
2576 cryptsetup_parameters_str = get_conf(
2577 cluster=args.cluster,
2578 variable='osd_cryptsetup_parameters',
2579 )
2580 if cryptsetup_parameters_str is None:
2581 return []
2582 else:
2583 return shlex.split(cryptsetup_parameters_str)
2584
2585 @staticmethod
2586 def get_dmcrypt_keysize(args):
2587 dmcrypt_keysize_str = get_conf(
2588 cluster=args.cluster,
2589 variable='osd_dmcrypt_key_size',
2590 )
2591 dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
2592 if dmcrypt_type == 'luks':
2593 if dmcrypt_keysize_str is None:
2594 # As LUKS will hash the 'passphrase' in .luks.key
2595 # into a key, set a large default
2596 # so if not updated for some time, it is still a
2597 # reasonable value.
2598 #
2599 return 1024
2600 else:
2601 return int(dmcrypt_keysize_str)
2602 elif dmcrypt_type == 'plain':
2603 if dmcrypt_keysize_str is None:
2604 # This value is hard-coded in the udev script
2605 return 256
2606 else:
2607 LOG.warning('ensure the 95-ceph-osd.rules file has '
2608 'been copied to /etc/udev/rules.d '
2609 'and modified to call cryptsetup '
2610 'with --key-size=%s' % dmcrypt_keysize_str)
2611 return int(dmcrypt_keysize_str)
2612 else:
2613 return 0
2614
2615 @staticmethod
2616 def get_dmcrypt_type(args):
2617 if hasattr(args, 'dmcrypt') and args.dmcrypt:
2618 dmcrypt_type = get_conf(
2619 cluster=args.cluster,
2620 variable='osd_dmcrypt_type',
2621 )
2622
2623 if dmcrypt_type is None or dmcrypt_type == 'luks':
2624 return 'luks'
2625 elif dmcrypt_type == 'plain':
2626 return 'plain'
2627 else:
2628 raise Error('invalid osd_dmcrypt_type parameter '
2629 '(must be luks or plain): ', dmcrypt_type)
2630 else:
2631 return None
2632
2633
c07f9fc5
FG
2634class Secrets(object):
2635
2636 def __init__(self):
2637 secret, stderr, ret = command(['ceph-authtool', '--gen-print-key'])
2638 LOG.debug("stderr " + stderr)
2639 assert ret == 0
2640 self.keys = {
2641 'cephx_secret': secret.strip(),
2642 }
2643
2644 def write_osd_keyring(self, keyring, osd_id):
2645 command_check_call(
2646 [
2647 'ceph-authtool', keyring,
2648 '--create-keyring',
2649 '--name', 'osd.' + str(osd_id),
2650 '--add-key', self.keys['cephx_secret'],
2651 ])
2652 path_set_context(keyring)
2653
2654 def get_json(self):
2655 return bytearray(json.dumps(self.keys), 'ascii')
2656
2657
2658class LockboxSecrets(Secrets):
2659
2660 def __init__(self, args):
2661 super(LockboxSecrets, self).__init__()
2662
2663 key_size = CryptHelpers.get_dmcrypt_keysize(args)
2664 key = open('/dev/urandom', 'rb').read(key_size / 8)
2665 base64_key = base64.b64encode(key).decode('ascii')
2666
2667 secret, stderr, ret = command(['ceph-authtool', '--gen-print-key'])
2668 LOG.debug("stderr " + stderr)
2669 assert ret == 0
2670
2671 self.keys.update({
2672 'dmcrypt_key': base64_key,
2673 'cephx_lockbox_secret': secret.strip(),
2674 })
2675
2676 def write_lockbox_keyring(self, path, osd_uuid):
2677 keyring = os.path.join(path, 'keyring')
2678 command_check_call(
2679 [
2680 'ceph-authtool', keyring,
2681 '--create-keyring',
2682 '--name', 'client.osd-lockbox.' + osd_uuid,
2683 '--add-key', self.keys['cephx_lockbox_secret'],
2684 ])
2685 path_set_context(keyring)
2686
2687
7c673cae
FG
2688class Lockbox(object):
2689
2690 def __init__(self, args):
2691 self.args = args
2692 self.partition = None
2693 self.device = None
2694
2695 if hasattr(self.args, 'lockbox') and self.args.lockbox is None:
2696 self.args.lockbox = self.args.data
2697
2698 def set_partition(self, partition):
2699 self.partition = partition
2700
2701 @staticmethod
2702 def parser():
2703 parser = argparse.ArgumentParser(add_help=False)
2704 parser.add_argument(
2705 '--lockbox',
2706 help='path to the device to store the lockbox',
2707 )
2708 parser.add_argument(
2709 '--lockbox-uuid',
2710 metavar='UUID',
2711 help='unique lockbox uuid',
2712 )
2713 return parser
2714
2715 def create_partition(self):
2716 self.device = Device.factory(self.args.lockbox, argparse.Namespace())
c07f9fc5 2717 partition_number = 5
7c673cae
FG
2718 self.device.create_partition(uuid=self.args.lockbox_uuid,
2719 name='lockbox',
2720 num=partition_number,
2721 size=10) # MB
2722 return self.device.get_partition(partition_number)
2723
2724 def set_or_create_partition(self):
2725 if is_partition(self.args.lockbox):
2726 LOG.debug('OSD lockbox device %s is a partition',
2727 self.args.lockbox)
2728 self.partition = DevicePartition.factory(
2729 path=None, dev=self.args.lockbox, args=self.args)
2730 ptype = self.partition.get_ptype()
2731 ready = Ptype.get_ready_by_name('lockbox')
2732 if ptype not in ready:
2733 LOG.warning('incorrect partition UUID: %s, expected %s'
2734 % (ptype, str(ready)))
2735 else:
2736 LOG.debug('Creating osd partition on %s',
2737 self.args.lockbox)
2738 self.partition = self.create_partition()
2739
2740 def create_key(self):
7c673cae
FG
2741 cluster = self.args.cluster
2742 bootstrap = self.args.prepare_key_template.format(cluster=cluster,
2743 statedir=STATEDIR)
c07f9fc5
FG
2744 path = self.get_mount_point()
2745 secrets = LockboxSecrets(self.args)
2746 id_arg = self.args.osd_id and [self.args.osd_id] or []
2747 osd_id = command_with_stdin(
7c673cae
FG
2748 [
2749 'ceph',
2750 '--cluster', cluster,
2751 '--name', 'client.bootstrap-osd',
2752 '--keyring', bootstrap,
c07f9fc5
FG
2753 '-i', '-',
2754 'osd', 'new', self.args.osd_uuid,
2755 ] + id_arg,
2756 secrets.get_json()
7c673cae 2757 )
c07f9fc5
FG
2758 secrets.write_lockbox_keyring(path, self.args.osd_uuid)
2759 osd_id = must_be_one_line(osd_id)
2760 check_osd_id(osd_id)
2761 write_one_line(path, 'whoami', osd_id)
2762 secrets.write_osd_keyring(os.path.join(path, 'osd_keyring'), osd_id)
7c673cae
FG
2763 write_one_line(path, 'key-management-mode', KEY_MANAGEMENT_MODE_V1)
2764
2765 def symlink_spaces(self, path):
2766 target = self.get_mount_point()
2767 for name in Space.NAMES:
2768 if (hasattr(self.args, name + '_uuid') and
2769 getattr(self.args, name + '_uuid')):
2770 uuid = getattr(self.args, name + '_uuid')
2771 symlink = os.path.join(STATEDIR, 'osd-lockbox', uuid)
2772 adjust_symlink(target, symlink)
2773 write_one_line(path, name + '-uuid', uuid)
2774
2775 def populate(self):
2776 maybe_mkdir(os.path.join(STATEDIR, 'osd-lockbox'))
2777 args = ['mkfs', '-t', 'ext4', self.partition.get_dev()]
2778 LOG.debug('Creating lockbox fs on %s: ' + str(" ".join(args)))
2779 command_check_call(args)
2780 path = self.get_mount_point()
2781 maybe_mkdir(path)
2782 args = ['mount', '-t', 'ext4', self.partition.get_dev(), path]
2783 LOG.debug('Mounting lockbox ' + str(" ".join(args)))
2784 command_check_call(args)
2785 write_one_line(path, 'osd-uuid', self.args.osd_uuid)
2786 if self.args.cluster_uuid is None:
2787 self.args.cluster_uuid = get_fsid(cluster=self.args.cluster)
2788 write_one_line(path, 'ceph_fsid', self.args.cluster_uuid)
2789 self.create_key()
2790 self.symlink_spaces(path)
2791 write_one_line(path, 'magic', CEPH_LOCKBOX_ONDISK_MAGIC)
2792 if self.device is not None:
2793 command_check_call(
2794 [
2795 'sgdisk',
2796 '--typecode={num}:{uuid}'.format(
2797 num=self.partition.get_partition_number(),
2798 uuid=self.partition.ptype_for_name('lockbox'),
2799 ),
2800 '--',
2801 get_partition_base(self.partition.get_dev()),
2802 ],
2803 )
2804
2805 def get_mount_point(self):
2806 return os.path.join(STATEDIR, 'osd-lockbox', self.args.osd_uuid)
2807
2808 def get_osd_uuid(self):
2809 return self.args.osd_uuid
2810
2811 def activate(self):
2812 path = is_mounted(self.partition.get_dev())
2813 if path:
2814 LOG.info("Lockbox already mounted at " + path)
2815 return
2816
2817 path = tempfile.mkdtemp(
2818 prefix='mnt.',
2819 dir=STATEDIR + '/tmp',
2820 )
2821 args = ['mount', '-t', 'ext4', '-o', 'ro',
2822 self.partition.get_dev(),
2823 path]
2824 LOG.debug('Mounting lockbox temporarily ' + str(" ".join(args)))
2825 command_check_call(args)
2826 self.args.osd_uuid = get_oneliner(path, 'osd-uuid')
2827 command_check_call(['umount', path])
2828 LOG.debug('Mounting lockbox readonly ' + str(" ".join(args)))
2829 args = ['mount', '-t', 'ext4', '-o', 'ro',
2830 self.partition.get_dev(),
2831 self.get_mount_point()]
2832 command_check_call(args)
2833 for name in Space.NAMES + ('osd',):
2834 uuid_path = os.path.join(self.get_mount_point(), name + '-uuid')
2835 if os.path.exists(uuid_path):
2836 uuid = get_oneliner(self.get_mount_point(), name + '-uuid')
2837 dev = os.path.join('/dev/disk/by-partuuid/', uuid.lower())
2838 args = ['ceph-disk', 'trigger', dev]
2839 command_check_call(args)
2840
2841 def prepare(self):
2842 verify_not_in_use(self.args.lockbox, check_partitions=True)
2843 self.set_or_create_partition()
2844 self.populate()
2845
2846
2847class PrepareData(object):
2848
2849 FILE = 1
2850 DEVICE = 2
2851
2852 def __init__(self, args):
2853
2854 self.args = args
2855 self.partition = None
2856 self.set_type()
2857 if self.args.cluster_uuid is None:
2858 self.args.cluster_uuid = get_fsid(cluster=self.args.cluster)
2859
2860 if self.args.osd_uuid is None:
2861 self.args.osd_uuid = str(uuid.uuid4())
2862
2863 def set_type(self):
2864 dmode = os.stat(self.args.data).st_mode
2865
2866 if stat.S_ISDIR(dmode):
2867 self.type = self.FILE
224ce89b 2868 elif stmode_is_diskdevice(dmode):
7c673cae
FG
2869 self.type = self.DEVICE
2870 else:
2871 raise Error('not a dir or block device', self.args.data)
2872
2873 def is_file(self):
2874 return self.type == self.FILE
2875
2876 def is_device(self):
2877 return self.type == self.DEVICE
2878
2879 @staticmethod
2880 def parser():
2881 parser = argparse.ArgumentParser(add_help=False)
2882 parser.add_argument(
2883 '--fs-type',
2884 help='file system type to use (e.g. "ext4")',
2885 )
2886 parser.add_argument(
2887 '--zap-disk',
2888 action='store_true', default=None,
2889 help='destroy the partition table (and content) of a disk',
2890 )
2891 parser.add_argument(
2892 '--data-dir',
2893 action='store_true', default=None,
2894 help='verify that DATA is a dir',
2895 )
2896 parser.add_argument(
2897 '--data-dev',
2898 action='store_true', default=None,
2899 help='verify that DATA is a block device',
2900 )
2901 parser.add_argument(
2902 'data',
2903 metavar='DATA',
2904 help='path to OSD data (a disk block device or directory)',
2905 )
2906 return parser
2907
2908 def populate_data_path_file(self, path, *to_prepare_list):
2909 self.populate_data_path(path, *to_prepare_list)
2910
2911 def populate_data_path(self, path, *to_prepare_list):
2912 if os.path.exists(os.path.join(path, 'magic')):
2913 LOG.debug('Data dir %s already exists', path)
2914 return
2915 else:
2916 LOG.debug('Preparing osd data dir %s', path)
2917
2918 if self.args.osd_uuid is None:
2919 self.args.osd_uuid = str(uuid.uuid4())
2920
2921 write_one_line(path, 'ceph_fsid', self.args.cluster_uuid)
2922 write_one_line(path, 'fsid', self.args.osd_uuid)
c07f9fc5
FG
2923 if self.args.osd_id:
2924 write_one_line(path, 'wanttobe', self.args.osd_id)
7c673cae
FG
2925 if self.args.crush_device_class:
2926 write_one_line(path, 'crush_device_class',
2927 self.args.crush_device_class)
2928 write_one_line(path, 'magic', CEPH_OSD_ONDISK_MAGIC)
2929
2930 for to_prepare in to_prepare_list:
2931 to_prepare.populate_data_path(path)
2932
2933 def prepare(self, *to_prepare_list):
2934 if self.type == self.DEVICE:
2935 self.prepare_device(*to_prepare_list)
2936 elif self.type == self.FILE:
2937 self.prepare_file(*to_prepare_list)
2938 else:
2939 raise Error('unexpected type ', self.type)
2940
2941 def prepare_file(self, *to_prepare_list):
2942
2943 if not os.path.exists(self.args.data):
2944 raise Error('data path for directory does not exist',
2945 self.args.data)
2946
2947 if self.args.data_dev:
2948 raise Error('data path is not a block device', self.args.data)
2949
2950 for to_prepare in to_prepare_list:
2951 to_prepare.prepare()
2952
2953 self.populate_data_path_file(self.args.data, *to_prepare_list)
2954
2955 def sanity_checks(self):
2956 if not os.path.exists(self.args.data):
2957 raise Error('data path for device does not exist',
2958 self.args.data)
2959 verify_not_in_use(self.args.data,
2960 check_partitions=not self.args.dmcrypt)
2961
2962 def set_variables(self):
2963 if self.args.fs_type is None:
2964 self.args.fs_type = get_conf(
2965 cluster=self.args.cluster,
2966 variable='osd_mkfs_type',
2967 )
2968 if self.args.fs_type is None:
2969 self.args.fs_type = get_conf(
2970 cluster=self.args.cluster,
2971 variable='osd_fs_type',
2972 )
2973 if self.args.fs_type is None:
2974 self.args.fs_type = DEFAULT_FS_TYPE
2975
2976 self.mkfs_args = get_conf(
2977 cluster=self.args.cluster,
2978 variable='osd_mkfs_options_{fstype}'.format(
2979 fstype=self.args.fs_type,
2980 ),
2981 )
2982 if self.mkfs_args is None:
2983 self.mkfs_args = get_conf(
2984 cluster=self.args.cluster,
2985 variable='osd_fs_mkfs_options_{fstype}'.format(
2986 fstype=self.args.fs_type,
2987 ),
2988 )
2989
2990 self.mount_options = get_mount_options(cluster=self.args.cluster,
2991 fs_type=self.args.fs_type)
2992
2993 if self.args.osd_uuid is None:
2994 self.args.osd_uuid = str(uuid.uuid4())
2995
2996 def prepare_device(self, *to_prepare_list):
2997 self.sanity_checks()
2998 self.set_variables()
2999 if self.args.zap_disk is not None:
3000 zap(self.args.data)
3001
3002 def create_data_partition(self):
3003 device = Device.factory(self.args.data, self.args)
3004 partition_number = 1
3005 device.create_partition(uuid=self.args.osd_uuid,
3006 name='data',
3007 num=partition_number,
3008 size=self.get_space_size())
3009 return device.get_partition(partition_number)
3010
3011 def set_data_partition(self):
3012 if is_partition(self.args.data):
3013 LOG.debug('OSD data device %s is a partition',
3014 self.args.data)
3015 self.partition = DevicePartition.factory(
3016 path=None, dev=self.args.data, args=self.args)
3017 ptype = self.partition.get_ptype()
3018 ready = Ptype.get_ready_by_name('osd')
3019 if ptype not in ready:
3020 LOG.warning('incorrect partition UUID: %s, expected %s'
3021 % (ptype, str(ready)))
3022 else:
3023 LOG.debug('Creating osd partition on %s',
3024 self.args.data)
3025 self.partition = self.create_data_partition()
3026
3027 def populate_data_path_device(self, *to_prepare_list):
3028 partition = self.partition
3029
3030 if isinstance(partition, DevicePartitionCrypt):
3031 partition.map()
3032
3033 try:
3034 args = [
3035 'mkfs',
3036 '-t',
3037 self.args.fs_type,
3038 ]
3039 if self.mkfs_args is not None:
3040 args.extend(self.mkfs_args.split())
3041 if self.args.fs_type == 'xfs':
3042 args.extend(['-f']) # always force
3043 else:
3044 args.extend(MKFS_ARGS.get(self.args.fs_type, []))
3045 args.extend([
3046 '--',
3047 partition.get_dev(),
3048 ])
3049 LOG.debug('Creating %s fs on %s',
3050 self.args.fs_type, partition.get_dev())
3051 command_check_call(args, exit=True)
3052
3053 path = mount(dev=partition.get_dev(),
3054 fstype=self.args.fs_type,
3055 options=self.mount_options)
3056
3057 try:
3058 self.populate_data_path(path, *to_prepare_list)
3059 finally:
3060 path_set_context(path)
3061 unmount(path)
3062 finally:
3063 if isinstance(partition, DevicePartitionCrypt):
3064 partition.unmap()
3065
3066 if not is_partition(self.args.data):
3067 command_check_call(
3068 [
3069 'sgdisk',
3070 '--typecode=%d:%s' % (partition.get_partition_number(),
3071 partition.ptype_for_name('osd')),
3072 '--',
3073 self.args.data,
3074 ],
3075 exit=True,
3076 )
3077 update_partition(self.args.data, 'prepared')
3078 command_check_call(['udevadm', 'trigger',
3079 '--action=add',
3080 '--sysname-match',
3081 os.path.basename(partition.rawdev)])
3082
3083
3084class PrepareFilestoreData(PrepareData):
3085
3086 def get_space_size(self):
3087 return 0 # get as much space as possible
3088
3089 def prepare_device(self, *to_prepare_list):
3090 super(PrepareFilestoreData, self).prepare_device(*to_prepare_list)
3091 for to_prepare in to_prepare_list:
3092 to_prepare.prepare()
3093 self.set_data_partition()
3094 self.populate_data_path_device(*to_prepare_list)
3095
31f18b77
FG
3096 def populate_data_path(self, path, *to_prepare_list):
3097 super(PrepareFilestoreData, self).populate_data_path(path,
3098 *to_prepare_list)
3099 write_one_line(path, 'type', 'filestore')
3100
7c673cae
FG
3101
3102class PrepareBluestoreData(PrepareData):
3103
3104 def get_space_size(self):
3105 return 100 # MB
3106
3107 def prepare_device(self, *to_prepare_list):
3108 super(PrepareBluestoreData, self).prepare_device(*to_prepare_list)
3109 self.set_data_partition()
3110 for to_prepare in to_prepare_list:
3111 to_prepare.prepare()
3112 self.populate_data_path_device(*to_prepare_list)
3113
3114 def populate_data_path(self, path, *to_prepare_list):
3115 super(PrepareBluestoreData, self).populate_data_path(path,
3116 *to_prepare_list)
3117 write_one_line(path, 'type', 'bluestore')
3118
3119
7c673cae
FG
3120def mkfs(
3121 path,
3122 cluster,
3123 osd_id,
3124 fsid,
3125 keyring,
3126):
3127 monmap = os.path.join(path, 'activate.monmap')
3128 command_check_call(
3129 [
3130 'ceph',
3131 '--cluster', cluster,
3132 '--name', 'client.bootstrap-osd',
3133 '--keyring', keyring,
3134 'mon', 'getmap', '-o', monmap,
3135 ],
3136 )
3137
3138 osd_type = read_one_line(path, 'type')
3139
3140 if osd_type == 'bluestore':
c07f9fc5 3141 command_check_call(
7c673cae
FG
3142 [
3143 'ceph-osd',
3144 '--cluster', cluster,
3145 '--mkfs',
7c673cae
FG
3146 '-i', osd_id,
3147 '--monmap', monmap,
3148 '--osd-data', path,
3149 '--osd-uuid', fsid,
7c673cae
FG
3150 '--setuser', get_ceph_user(),
3151 '--setgroup', get_ceph_group(),
3152 ],
3153 )
31f18b77 3154 elif osd_type == 'filestore':
c07f9fc5 3155 command_check_call(
7c673cae
FG
3156 [
3157 'ceph-osd',
3158 '--cluster', cluster,
3159 '--mkfs',
7c673cae
FG
3160 '-i', osd_id,
3161 '--monmap', monmap,
3162 '--osd-data', path,
3163 '--osd-journal', os.path.join(path, 'journal'),
3164 '--osd-uuid', fsid,
7c673cae
FG
3165 '--setuser', get_ceph_user(),
3166 '--setgroup', get_ceph_group(),
3167 ],
3168 )
31f18b77
FG
3169 else:
3170 raise Error('unrecognized objectstore type %s' % osd_type)
7c673cae
FG
3171
3172
7c673cae
FG
3173def get_mount_point(cluster, osd_id):
3174 parent = STATEDIR + '/osd'
3175 return os.path.join(
3176 parent,
3177 '{cluster}-{osd_id}'.format(cluster=cluster, osd_id=osd_id),
3178 )
3179
3180
3181def move_mount(
3182 dev,
3183 path,
3184 cluster,
3185 osd_id,
3186 fstype,
3187 mount_options,
3188):
3189 LOG.debug('Moving mount to final location...')
3190 osd_data = get_mount_point(cluster, osd_id)
3191 maybe_mkdir(osd_data)
3192
3193 # pick best-of-breed mount options based on fs type
3194 if mount_options is None:
3195 mount_options = MOUNT_OPTIONS.get(fstype, '')
3196
3197 # we really want to mount --move, but that is not supported when
3198 # the parent mount is shared, as it is by default on RH, Fedora,
3199 # and probably others. Also, --bind doesn't properly manipulate
3200 # /etc/mtab, which *still* isn't a symlink to /proc/mounts despite
3201 # this being 2013. Instead, mount the original device at the final
3202 # location.
3203 command_check_call(
3204 [
3205 '/bin/mount',
3206 '-o',
3207 mount_options,
3208 '--',
3209 dev,
3210 osd_data,
3211 ],
3212 )
3213 command_check_call(
3214 [
3215 '/bin/umount',
3216 '-l', # lazy, in case someone else is peeking at the
3217 # wrong moment
3218 '--',
3219 path,
3220 ],
3221 )
3222
3223
3224#
3225# For upgrade purposes, to make sure there are no competing units,
3226# both --runtime unit and the default should be disabled. There can be
3227# two units at the same time: one with --runtime and another without
3228# it. If, for any reason (manual or ceph-disk) the two units co-exist
3229# they will compete with each other.
3230#
3231def systemd_disable(
3232 path,
3233 osd_id,
3234):
3235 # ensure there is no duplicate ceph-osd@.service
3236 for style in ([], ['--runtime']):
3237 command_check_call(
3238 [
3239 'systemctl',
3240 'disable',
3241 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3242 ] + style,
3243 )
3244
3245
3246def systemd_start(
3247 path,
3248 osd_id,
3249):
3250 systemd_disable(path, osd_id)
3251 if is_mounted(path):
3252 style = ['--runtime']
3253 else:
3254 style = []
3255 command_check_call(
3256 [
3257 'systemctl',
3258 'enable',
3259 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3260 ] + style,
3261 )
3262 command_check_call(
3263 [
3264 'systemctl',
3265 'start',
3266 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3267 ],
3268 )
3269
3270
3271def systemd_stop(
3272 path,
3273 osd_id,
3274):
3275 systemd_disable(path, osd_id)
3276 command_check_call(
3277 [
3278 'systemctl',
3279 'stop',
3280 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3281 ],
3282 )
3283
3284
3285def start_daemon(
3286 cluster,
3287 osd_id,
3288):
3289 LOG.debug('Starting %s osd.%s...', cluster, osd_id)
3290
3291 path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3292 cluster=cluster, osd_id=osd_id)
3293
3294 try:
3295 if os.path.exists(os.path.join(path, 'upstart')):
3296 command_check_call(
3297 [
3298 '/sbin/initctl',
3299 # use emit, not start, because start would fail if the
3300 # instance was already running
3301 'emit',
3302 # since the daemon starting doesn't guarantee much about
3303 # the service being operational anyway, don't bother
3304 # waiting for it
3305 '--no-wait',
3306 '--',
3307 'ceph-osd',
3308 'cluster={cluster}'.format(cluster=cluster),
3309 'id={osd_id}'.format(osd_id=osd_id),
3310 ],
3311 )
3312 elif os.path.exists(os.path.join(path, 'sysvinit')):
3313 if os.path.exists('/usr/sbin/service'):
3314 svc = '/usr/sbin/service'
3315 else:
3316 svc = '/sbin/service'
3317 command_check_call(
3318 [
3319 svc,
3320 'ceph',
3321 '--cluster',
3322 '{cluster}'.format(cluster=cluster),
3323 'start',
3324 'osd.{osd_id}'.format(osd_id=osd_id),
3325 ],
3326 )
3327 elif os.path.exists(os.path.join(path, 'systemd')):
3328 systemd_start(path, osd_id)
3329 elif os.path.exists(os.path.join(path, 'openrc')):
3330 base_script = '/etc/init.d/ceph-osd'
3331 osd_script = '{base}.{osd_id}'.format(
3332 base=base_script,
3333 osd_id=osd_id
3334 )
3335 if not os.path.exists(osd_script):
3336 os.symlink(base_script, osd_script)
3337 command_check_call(
3338 [
3339 osd_script,
3340 'start',
3341 ],
3342 )
3343 elif os.path.exists(os.path.join(path, 'bsdrc')):
3344 command_check_call(
3345 [
31f18b77
FG
3346 '/usr/sbin/service', 'ceph', 'start',
3347 'osd.{osd_id}'.format(osd_id=osd_id),
7c673cae
FG
3348 ],
3349 )
3350 else:
3351 raise Error('{cluster} osd.{osd_id} '
3352 'is not tagged with an init system'
3353 .format(
3354 cluster=cluster,
3355 osd_id=osd_id,
3356 ))
3357 except subprocess.CalledProcessError as e:
3358 raise Error('ceph osd start failed', e)
3359
3360
3361def stop_daemon(
3362 cluster,
3363 osd_id,
3364):
3365 LOG.debug('Stoping %s osd.%s...', cluster, osd_id)
3366
3367 path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3368 cluster=cluster, osd_id=osd_id)
3369
3370 try:
3371 if os.path.exists(os.path.join(path, 'upstart')):
3372 command_check_call(
3373 [
3374 '/sbin/initctl',
3375 'stop',
3376 'ceph-osd',
3377 'cluster={cluster}'.format(cluster=cluster),
3378 'id={osd_id}'.format(osd_id=osd_id),
3379 ],
3380 )
3381 elif os.path.exists(os.path.join(path, 'sysvinit')):
3382 svc = which('service')
3383 command_check_call(
3384 [
3385 svc,
3386 'ceph',
3387 '--cluster',
3388 '{cluster}'.format(cluster=cluster),
3389 'stop',
3390 'osd.{osd_id}'.format(osd_id=osd_id),
3391 ],
3392 )
3393 elif os.path.exists(os.path.join(path, 'systemd')):
3394 systemd_stop(path, osd_id)
3395 elif os.path.exists(os.path.join(path, 'openrc')):
3396 command_check_call(
3397 [
3398 '/etc/init.d/ceph-osd.{osd_id}'.format(osd_id=osd_id),
3399 'stop',
3400 ],
3401 )
3402 elif os.path.exists(os.path.join(path, 'bsdrc')):
3403 command_check_call(
3404 [
3405 '/usr/local/etc/rc.d/ceph stop osd.{osd_id}'
3406 .format(osd_id=osd_id),
3407 ],
3408 )
3409 else:
3410 raise Error('{cluster} osd.{osd_id} '
3411 'is not tagged with an init system'
3412 .format(cluster=cluster, osd_id=osd_id))
3413 except subprocess.CalledProcessError as e:
3414 raise Error('ceph osd stop failed', e)
3415
3416
3417def detect_fstype(dev):
3418 if FREEBSD:
3419 fstype = _check_output(
3420 args=[
3421 'fstyp',
3422 '-u',
3423 dev,
3424 ],
3425 )
3426 else:
3427 fstype = _check_output(
3428 args=[
3429 '/sbin/blkid',
3430 # we don't want stale cached results
3431 '-p',
3432 '-s', 'TYPE',
3433 '-o', 'value',
3434 '--',
3435 dev,
3436 ],
3437 )
3438 fstype = must_be_one_line(fstype)
3439 return fstype
3440
3441
3442def dmcrypt_is_mapped(uuid):
3443 path = os.path.join('/dev/mapper', uuid)
3444 if os.path.exists(path):
3445 return path
3446 else:
3447 return None
3448
3449
3450def dmcrypt_map(dev, dmcrypt_key_dir):
3451 ptype = get_partition_type(dev)
3452 if ptype in Ptype.get_ready_by_type('plain'):
3453 luks = False
3454 cryptsetup_parameters = ['--key-size', '256']
3455 elif ptype in Ptype.get_ready_by_type('luks'):
3456 luks = True
3457 cryptsetup_parameters = []
3458 else:
3459 raise Error('--dmcrypt called for dev %s with invalid ptype %s'
3460 % (dev, ptype))
3461 part_uuid = get_partition_uuid(dev)
3462 dmcrypt_key = get_dmcrypt_key(part_uuid, dmcrypt_key_dir, luks)
3463 return _dmcrypt_map(
3464 rawdev=dev,
3465 key=dmcrypt_key,
3466 _uuid=part_uuid,
3467 cryptsetup_parameters=cryptsetup_parameters,
3468 luks=luks,
3469 format_dev=False,
3470 )
3471
3472
3473def mount_activate(
3474 dev,
3475 activate_key_template,
3476 init,
3477 dmcrypt,
3478 dmcrypt_key_dir,
3479 reactivate=False,
3480):
3481
3482 if dmcrypt:
3483 part_uuid = get_partition_uuid(dev)
3484 dev = dmcrypt_map(dev, dmcrypt_key_dir)
3485 try:
3486 fstype = detect_fstype(dev=dev)
3487 except (subprocess.CalledProcessError,
3488 TruncatedLineError,
3489 TooManyLinesError) as e:
3490 raise FilesystemTypeError(
3491 'device {dev}'.format(dev=dev),
3492 e,
3493 )
3494
3495 # TODO always using mount options from cluster=ceph for
3496 # now; see http://tracker.newdream.net/issues/3253
3497 mount_options = get_mount_options(cluster='ceph', fs_type=fstype)
3498
3499 path = mount(dev=dev, fstype=fstype, options=mount_options)
3500
3501 # check if the disk is deactive, change the journal owner, group
3502 # mode for correct user and group.
3503 if os.path.exists(os.path.join(path, 'deactive')):
3504 # logging to syslog will help us easy to know udev triggered failure
3505 if not reactivate:
3506 unmount(path)
3507 # we need to unmap again because dmcrypt map will create again
3508 # on bootup stage (due to deactivate)
3509 if '/dev/mapper/' in dev:
3510 part_uuid = dev.replace('/dev/mapper/', '')
3511 dmcrypt_unmap(part_uuid)
3512 LOG.info('OSD deactivated! reactivate with: --reactivate')
3513 raise Error('OSD deactivated! reactivate with: --reactivate')
3514 # flag to activate a deactive osd.
3515 deactive = True
3516 else:
3517 deactive = False
3518
3519 osd_id = None
3520 cluster = None
3521 try:
3522 (osd_id, cluster) = activate(path, activate_key_template, init)
3523
3524 # Now active successfully
3525 # If we got reactivate and deactive, remove the deactive file
3526 if deactive and reactivate:
3527 os.remove(os.path.join(path, 'deactive'))
3528 LOG.info('Remove `deactive` file.')
3529
3530 # check if the disk is already active, or if something else is already
3531 # mounted there
3532 active = False
3533 other = False
3534 src_dev = os.stat(path).st_dev
3535 try:
3536 dst_dev = os.stat((STATEDIR + '/osd/{cluster}-{osd_id}').format(
3537 cluster=cluster,
3538 osd_id=osd_id)).st_dev
3539 if src_dev == dst_dev:
3540 active = True
3541 else:
3542 parent_dev = os.stat(STATEDIR + '/osd').st_dev
3543 if dst_dev != parent_dev:
3544 other = True
3545 elif os.listdir(get_mount_point(cluster, osd_id)):
3546 LOG.info(get_mount_point(cluster, osd_id) +
3547 " is not empty, won't override")
3548 other = True
3549
3550 except OSError:
3551 pass
3552
3553 if active:
3554 LOG.info('%s osd.%s already mounted in position; unmounting ours.'
3555 % (cluster, osd_id))
3556 unmount(path)
3557 elif other:
3558 raise Error('another %s osd.%s already mounted in position '
3559 '(old/different cluster instance?); unmounting ours.'
3560 % (cluster, osd_id))
3561 else:
3562 move_mount(
3563 dev=dev,
3564 path=path,
3565 cluster=cluster,
3566 osd_id=osd_id,
3567 fstype=fstype,
3568 mount_options=mount_options,
3569 )
3570 return cluster, osd_id
3571
3572 except:
3573 LOG.error('Failed to activate')
3574 unmount(path)
3575 raise
3576 finally:
3577 # remove our temp dir
3578 if os.path.exists(path):
3579 os.rmdir(path)
3580
3581
3582def activate_dir(
3583 path,
3584 activate_key_template,
3585 init,
3586):
3587
3588 if not os.path.exists(path):
3589 raise Error(
3590 'directory %s does not exist' % path
3591 )
3592
3593 (osd_id, cluster) = activate(path, activate_key_template, init)
3594
3595 if init not in (None, 'none'):
3596 canonical = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3597 cluster=cluster,
3598 osd_id=osd_id)
3599 if path != canonical:
3600 # symlink it from the proper location
3601 create = True
3602 if os.path.lexists(canonical):
3603 old = os.readlink(canonical)
3604 if old != path:
3605 LOG.debug('Removing old symlink %s -> %s', canonical, old)
3606 try:
3607 os.unlink(canonical)
3608 except:
3609 raise Error('unable to remove old symlink', canonical)
3610 else:
3611 create = False
3612 if create:
3613 LOG.debug('Creating symlink %s -> %s', canonical, path)
3614 try:
3615 os.symlink(path, canonical)
3616 except:
3617 raise Error('unable to create symlink %s -> %s'
3618 % (canonical, path))
3619
3620 return cluster, osd_id
3621
3622
3623def find_cluster_by_uuid(_uuid):
3624 """
3625 Find a cluster name by searching /etc/ceph/*.conf for a conf file
3626 with the right uuid.
3627 """
3628 _uuid = _uuid.lower()
3629 no_fsid = []
3630 if not os.path.exists(SYSCONFDIR):
3631 return None
3632 for conf_file in os.listdir(SYSCONFDIR):
3633 if not conf_file.endswith('.conf'):
3634 continue
3635 cluster = conf_file[:-5]
3636 try:
3637 fsid = get_fsid(cluster)
3638 except Error as e:
3639 if 'getting cluster uuid from configuration failed' not in str(e):
3640 raise e
3641 no_fsid.append(cluster)
3642 else:
3643 if fsid == _uuid:
3644 return cluster
3645 # be tolerant of /etc/ceph/ceph.conf without an fsid defined.
3646 if len(no_fsid) == 1 and no_fsid[0] == 'ceph':
3647 LOG.warning('No fsid defined in ' + SYSCONFDIR +
3648 '/ceph.conf; using anyway')
3649 return 'ceph'
3650 return None
3651
3652
3653def activate(
3654 path,
3655 activate_key_template,
3656 init,
3657):
3658
3659 check_osd_magic(path)
3660
3661 ceph_fsid = read_one_line(path, 'ceph_fsid')
3662 if ceph_fsid is None:
3663 raise Error('No cluster uuid assigned.')
3664 LOG.debug('Cluster uuid is %s', ceph_fsid)
3665
3666 cluster = find_cluster_by_uuid(ceph_fsid)
3667 if cluster is None:
3668 raise Error('No cluster conf found in ' + SYSCONFDIR +
3669 ' with fsid %s' % ceph_fsid)
3670 LOG.debug('Cluster name is %s', cluster)
3671
3672 fsid = read_one_line(path, 'fsid')
3673 if fsid is None:
3674 raise Error('No OSD uuid assigned.')
3675 LOG.debug('OSD uuid is %s', fsid)
3676
3677 keyring = activate_key_template.format(cluster=cluster,
3678 statedir=STATEDIR)
3679
3680 osd_id = get_osd_id(path)
3681 if osd_id is None:
3682 osd_id = allocate_osd_id(
3683 cluster=cluster,
3684 fsid=fsid,
3685 keyring=keyring,
c07f9fc5 3686 path=path,
7c673cae
FG
3687 )
3688 write_one_line(path, 'whoami', osd_id)
3689 LOG.debug('OSD id is %s', osd_id)
3690
3691 if not os.path.exists(os.path.join(path, 'ready')):
3692 LOG.debug('Initializing OSD...')
3693 # re-running mkfs is safe, so just run until it completes
3694 mkfs(
3695 path=path,
3696 cluster=cluster,
3697 osd_id=osd_id,
3698 fsid=fsid,
3699 keyring=keyring,
3700 )
3701
3702 if init not in (None, 'none'):
3703 if init == 'auto':
3704 conf_val = get_conf(
3705 cluster=cluster,
3706 variable='init'
3707 )
3708 if conf_val is not None:
3709 init = conf_val
3710 else:
3711 init = init_get()
3712
3713 LOG.debug('Marking with init system %s', init)
3714 init_path = os.path.join(path, init)
3715 with open(init_path, 'w'):
3716 path_set_context(init_path)
3717
3718 # remove markers for others, just in case.
3719 for other in INIT_SYSTEMS:
3720 if other != init:
3721 try:
3722 os.unlink(os.path.join(path, other))
3723 except OSError:
3724 pass
3725
3726 if not os.path.exists(os.path.join(path, 'active')):
7c673cae
FG
3727 write_one_line(path, 'active', 'ok')
3728 LOG.debug('%s osd.%s data dir is ready at %s', cluster, osd_id, path)
3729 return (osd_id, cluster)
3730
3731
3732def main_activate(args):
3733 cluster = None
3734 osd_id = None
3735
3736 LOG.info('path = ' + str(args.path))
3737 if not os.path.exists(args.path):
3738 raise Error('%s does not exist' % args.path)
3739
3740 if is_suppressed(args.path):
3741 LOG.info('suppressed activate request on %s', args.path)
3742 return
3743
3744 with activate_lock:
3745 mode = os.stat(args.path).st_mode
224ce89b 3746 if stmode_is_diskdevice(mode):
7c673cae
FG
3747 if (is_partition(args.path) and
3748 (get_partition_type(args.path) ==
3749 PTYPE['mpath']['osd']['ready']) and
3750 not is_mpath(args.path)):
3751 raise Error('%s is not a multipath block device' %
3752 args.path)
3753 (cluster, osd_id) = mount_activate(
3754 dev=args.path,
3755 activate_key_template=args.activate_key_template,
3756 init=args.mark_init,
3757 dmcrypt=args.dmcrypt,
3758 dmcrypt_key_dir=args.dmcrypt_key_dir,
3759 reactivate=args.reactivate,
3760 )
3761 osd_data = get_mount_point(cluster, osd_id)
3762
3763 elif stat.S_ISDIR(mode):
3764 (cluster, osd_id) = activate_dir(
3765 path=args.path,
3766 activate_key_template=args.activate_key_template,
3767 init=args.mark_init,
3768 )
3769 osd_data = args.path
3770
3771 else:
3772 raise Error('%s is not a directory or block device' % args.path)
3773
3774 # exit with 0 if the journal device is not up, yet
3775 # journal device will do the activation
3776 osd_journal = '{path}/journal'.format(path=osd_data)
3777 if os.path.islink(osd_journal) and not os.access(osd_journal, os.F_OK):
3778 LOG.info("activate: Journal not present, not starting, yet")
3779 return
3780
3781 if (not args.no_start_daemon and args.mark_init == 'none'):
3782 command_check_call(
3783 [
3784 'ceph-osd',
3785 '--cluster={cluster}'.format(cluster=cluster),
3786 '--id={osd_id}'.format(osd_id=osd_id),
3787 '--osd-data={path}'.format(path=osd_data),
3788 '--osd-journal={journal}'.format(journal=osd_journal),
3789 ],
3790 )
3791
3792 if (not args.no_start_daemon and
3793 args.mark_init not in (None, 'none')):
3794
3795 start_daemon(
3796 cluster=cluster,
3797 osd_id=osd_id,
3798 )
3799
3800
3801def main_activate_lockbox(args):
3802 with activate_lock:
3803 main_activate_lockbox_protected(args)
3804
3805
3806def main_activate_lockbox_protected(args):
3807 partition = DevicePartition.factory(
3808 path=None, dev=args.path, args=args)
3809
3810 lockbox = Lockbox(args)
3811 lockbox.set_partition(partition)
3812 lockbox.activate()
3813
3814
3815###########################
3816
3817def _mark_osd_out(cluster, osd_id):
3818 LOG.info('Prepare to mark osd.%d out...', osd_id)
3819 command([
3820 'ceph',
3821 'osd',
3822 'out',
3823 'osd.%d' % osd_id,
3824 ])
3825
3826
3827def _check_osd_status(cluster, osd_id):
3828 """
3829 report the osd status:
3830 00(0) : means OSD OUT AND DOWN
3831 01(1) : means OSD OUT AND UP
3832 10(2) : means OSD IN AND DOWN
3833 11(3) : means OSD IN AND UP
3834 """
3835 LOG.info("Checking osd id: %s ..." % osd_id)
3836 found = False
3837 status_code = 0
3838 out, err, ret = command([
3839 'ceph',
3840 'osd',
3841 'dump',
3842 '--cluster={cluster}'.format(
3843 cluster=cluster,
3844 ),
3845 '--format',
3846 'json',
3847 ])
3848 out_json = json.loads(out)
3849 for item in out_json[u'osds']:
3850 if item.get(u'osd') == int(osd_id):
3851 found = True
3852 if item.get(u'in') is 1:
3853 status_code += 2
3854 if item.get(u'up') is 1:
3855 status_code += 1
3856 if not found:
3857 raise Error('Could not osd.%s in osd tree!' % osd_id)
3858 return status_code
3859
3860
3861def _remove_osd_directory_files(mounted_path, cluster):
3862 """
3863 To remove the 'ready', 'active', INIT-specific files.
3864 """
3865 if os.path.exists(os.path.join(mounted_path, 'ready')):
3866 os.remove(os.path.join(mounted_path, 'ready'))
3867 LOG.info('Remove `ready` file.')
3868 else:
3869 LOG.info('`ready` file is already removed.')
3870
3871 if os.path.exists(os.path.join(mounted_path, 'active')):
3872 os.remove(os.path.join(mounted_path, 'active'))
3873 LOG.info('Remove `active` file.')
3874 else:
3875 LOG.info('`active` file is already removed.')
3876
3877 # Just check `upstart` and `sysvinit` directly if filename is init-spec.
3878 conf_val = get_conf(
3879 cluster=cluster,
3880 variable='init'
3881 )
3882 if conf_val is not None:
3883 init = conf_val
3884 else:
3885 init = init_get()
3886 os.remove(os.path.join(mounted_path, init))
3887 LOG.info('Remove `%s` file.', init)
3888 return
3889
3890
3891def main_deactivate(args):
3892 with activate_lock:
3893 main_deactivate_locked(args)
3894
3895
3896def main_deactivate_locked(args):
3897 osd_id = args.deactivate_by_id
3898 path = args.path
3899 target_dev = None
3900 dmcrypt = False
3901 devices = list_devices()
3902
3903 # list all devices and found we need
3904 for device in devices:
3905 if 'partitions' in device:
3906 for dev_part in device.get('partitions'):
3907 if (osd_id and
3908 'whoami' in dev_part and
3909 dev_part['whoami'] == osd_id):
3910 target_dev = dev_part
3911 elif (path and
3912 'path' in dev_part and
3913 dev_part['path'] == path):
3914 target_dev = dev_part
3915 if not target_dev:
3916 raise Error('Cannot find any match device!!')
3917
3918 # set up all we need variable
3919 osd_id = target_dev['whoami']
3920 part_type = target_dev['ptype']
3921 mounted_path = target_dev['mount']
3922 if Ptype.is_dmcrypt(part_type, 'osd'):
3923 dmcrypt = True
3924
3925 # Do not do anything if osd is already down.
3926 status_code = _check_osd_status(args.cluster, osd_id)
3927 if status_code == OSD_STATUS_IN_UP:
3928 if args.mark_out is True:
3929 _mark_osd_out(args.cluster, int(osd_id))
3930 stop_daemon(args.cluster, osd_id)
3931 elif status_code == OSD_STATUS_IN_DOWN:
3932 if args.mark_out is True:
3933 _mark_osd_out(args.cluster, int(osd_id))
3934 LOG.info("OSD already out/down. Do not do anything now.")
3935 return
3936 elif status_code == OSD_STATUS_OUT_UP:
3937 stop_daemon(args.cluster, osd_id)
3938 elif status_code == OSD_STATUS_OUT_DOWN:
3939 LOG.info("OSD already out/down. Do not do anything now.")
3940 return
3941
3942 if not args.once:
3943 # remove 'ready', 'active', and INIT-specific files.
3944 _remove_osd_directory_files(mounted_path, args.cluster)
3945
3946 # Write deactivate to osd directory!
3947 with open(os.path.join(mounted_path, 'deactive'), 'w'):
3948 path_set_context(os.path.join(mounted_path, 'deactive'))
3949
d2e6a577 3950 unmount(mounted_path, do_rm=not args.once)
7c673cae
FG
3951 LOG.info("Umount `%s` successfully.", mounted_path)
3952
3953 if dmcrypt:
3954 lockbox = os.path.join(STATEDIR, 'osd-lockbox')
3955 command(['umount', os.path.join(lockbox, target_dev['uuid'])])
3956
3957 dmcrypt_unmap(target_dev['uuid'])
3958 for name in Space.NAMES:
3959 if name + '_uuid' in target_dev:
3960 dmcrypt_unmap(target_dev[name + '_uuid'])
3961
3962###########################
3963
3964
7c673cae 3965def _remove_lockbox(uuid, cluster):
7c673cae
FG
3966 lockbox = os.path.join(STATEDIR, 'osd-lockbox')
3967 if not os.path.exists(lockbox):
3968 return
3969 canonical = os.path.join(lockbox, uuid)
3970 command(['umount', canonical])
3971 for name in os.listdir(lockbox):
3972 path = os.path.join(lockbox, name)
3973 if os.path.islink(path) and os.readlink(path) == canonical:
3974 os.unlink(path)
3975
3976
3977def destroy_lookup_device(args, predicate, description):
3978 devices = list_devices()
3979 for device in devices:
3980 for partition in device.get('partitions', []):
3981 if partition['type'] == 'lockbox':
3982 if not is_mounted(partition['path']):
3983 main_activate_lockbox_protected(
3984 argparse.Namespace(verbose=args.verbose,
3985 path=partition['path']))
3986 for device in devices:
3987 for partition in device.get('partitions', []):
3988 if partition['dmcrypt']:
3989 dmcrypt_path = dmcrypt_is_mapped(partition['uuid'])
3990 if dmcrypt_path:
3991 unmap = False
3992 else:
3993 dmcrypt_path = dmcrypt_map(partition['path'],
3994 args.dmcrypt_key_dir)
3995 unmap = True
3996 list_dev_osd(dmcrypt_path, {}, partition)
3997 if unmap:
3998 dmcrypt_unmap(partition['uuid'])
3999 dmcrypt = True
4000 else:
4001 dmcrypt = False
4002 if predicate(partition):
4003 return dmcrypt, partition
4004 raise Error('found no device matching ', description)
4005
4006
4007def main_destroy(args):
4008 with activate_lock:
4009 main_destroy_locked(args)
4010
4011
4012def main_destroy_locked(args):
4013 osd_id = args.destroy_by_id
4014 path = args.path
4015 target_dev = None
4016
4017 if path:
4018 if not is_partition(path):
4019 raise Error(path + " must be a partition device")
4020 path = os.path.realpath(path)
4021
4022 if path:
4023 (dmcrypt, target_dev) = destroy_lookup_device(
4024 args, lambda x: x.get('path') == path,
4025 path)
4026 elif osd_id:
4027 (dmcrypt, target_dev) = destroy_lookup_device(
4028 args, lambda x: x.get('whoami') == osd_id,
4029 'osd id ' + str(osd_id))
4030
4031 osd_id = target_dev['whoami']
4032 dev_path = target_dev['path']
4033 if target_dev['ptype'] == PTYPE['mpath']['osd']['ready']:
4034 base_dev = get_partition_base_mpath(dev_path)
4035 else:
4036 base_dev = get_partition_base(dev_path)
4037
4038 # Before osd deactivate, we cannot destroy it
4039 status_code = _check_osd_status(args.cluster, osd_id)
4040 if status_code != OSD_STATUS_OUT_DOWN and \
4041 status_code != OSD_STATUS_IN_DOWN:
4042 raise Error("Could not destroy the active osd. (osd-id: %s)" %
4043 osd_id)
4044
c07f9fc5
FG
4045 if args.purge:
4046 action = 'purge'
4047 else:
4048 action = 'destroy'
4049 LOG.info("Prepare to %s osd.%s" % (action, osd_id))
4050 command([
4051 'ceph',
4052 'osd',
4053 action,
4054 'osd.%s' % osd_id,
4055 '--yes-i-really-mean-it',
4056 ])
7c673cae
FG
4057
4058 # we remove the crypt map and device mapper (if dmcrypt is True)
4059 if dmcrypt:
4060 for name in Space.NAMES:
4061 if target_dev.get(name + '_uuid'):
4062 dmcrypt_unmap(target_dev[name + '_uuid'])
4063 _remove_lockbox(target_dev['uuid'], args.cluster)
4064
4065 # Check zap flag. If we found zap flag, we need to find device for
4066 # destroy this osd data.
4067 if args.zap is True:
4068 # erase the osd data
4069 LOG.info("Prepare to zap the device %s" % base_dev)
4070 zap(base_dev)
4071
4072
4073def get_space_osd_uuid(name, path):
4074 if not os.path.exists(path):
4075 raise Error('%s does not exist' % path)
4076
c07f9fc5 4077 if not path_is_diskdevice(path):
7c673cae
FG
4078 raise Error('%s is not a block device' % path)
4079
4080 if (is_partition(path) and
4081 get_partition_type(path) in (PTYPE['mpath']['journal']['ready'],
4082 PTYPE['mpath']['block']['ready']) and
4083 not is_mpath(path)):
4084 raise Error('%s is not a multipath block device' %
4085 path)
4086
4087 try:
4088 out = _check_output(
4089 args=[
4090 'ceph-osd',
4091 '--get-device-fsid',
4092 path,
4093 ],
4094 close_fds=True,
4095 )
4096 except subprocess.CalledProcessError as e:
4097 raise Error(
4098 'failed to get osd uuid/fsid from %s' % name,
4099 e,
4100 )
4101 value = str(out).split('\n', 1)[0]
4102 LOG.debug('%s %s has OSD UUID %s', name.capitalize(), path, value)
4103 return value
4104
4105
4106def main_activate_space(name, args):
4107 if not os.path.exists(args.dev):
4108 raise Error('%s does not exist' % args.dev)
4109
c07f9fc5
FG
4110 if is_suppressed(args.dev):
4111 LOG.info('suppressed activate request on space %s', args.dev)
4112 return
4113
7c673cae
FG
4114 cluster = None
4115 osd_id = None
4116 osd_uuid = None
4117 dev = None
4118 with activate_lock:
4119 if args.dmcrypt:
4120 dev = dmcrypt_map(args.dev, args.dmcrypt_key_dir)
4121 else:
4122 dev = args.dev
4123 # FIXME: For an encrypted journal dev, does this return the
4124 # cyphertext or plaintext dev uuid!? Also, if the journal is
4125 # encrypted, is the data partition also always encrypted, or
4126 # are mixed pairs supported!?
4127 osd_uuid = get_space_osd_uuid(name, dev)
4128 path = os.path.join('/dev/disk/by-partuuid/', osd_uuid.lower())
4129
4130 if is_suppressed(path):
4131 LOG.info('suppressed activate request on %s', path)
4132 return
4133
4134 # warn and exit with 0 if the data device is not up, yet
4135 # data device will do the activation
4136 if not os.access(path, os.F_OK):
4137 LOG.info("activate: OSD device not present, not starting, yet")
4138 return
4139
4140 (cluster, osd_id) = mount_activate(
4141 dev=path,
4142 activate_key_template=args.activate_key_template,
4143 init=args.mark_init,
4144 dmcrypt=args.dmcrypt,
4145 dmcrypt_key_dir=args.dmcrypt_key_dir,
4146 reactivate=args.reactivate,
4147 )
4148
4149 start_daemon(
4150 cluster=cluster,
4151 osd_id=osd_id,
4152 )
4153
4154
4155###########################
4156
4157
4158def main_activate_all(args):
4159 dir = '/dev/disk/by-parttypeuuid'
4160 LOG.debug('Scanning %s', dir)
4161 if not os.path.exists(dir):
4162 return
4163 err = False
4164 for name in os.listdir(dir):
4165 if name.find('.') < 0:
4166 continue
4167 (tag, uuid) = name.split('.')
4168
4169 if tag in Ptype.get_ready_by_name('osd'):
4170
4171 if Ptype.is_dmcrypt(tag, 'osd'):
4172 path = os.path.join('/dev/mapper', uuid)
4173 else:
4174 path = os.path.join(dir, name)
4175
4176 if is_suppressed(path):
4177 LOG.info('suppressed activate request on %s', path)
4178 continue
4179
4180 LOG.info('Activating %s', path)
4181 with activate_lock:
4182 try:
4183 # never map dmcrypt cyphertext devices
4184 (cluster, osd_id) = mount_activate(
4185 dev=path,
4186 activate_key_template=args.activate_key_template,
4187 init=args.mark_init,
4188 dmcrypt=False,
4189 dmcrypt_key_dir='',
4190 )
4191 start_daemon(
4192 cluster=cluster,
4193 osd_id=osd_id,
4194 )
4195
4196 except Exception as e:
4197 print(
4198 '{prog}: {msg}'.format(prog=args.prog, msg=e),
4199 file=sys.stderr
4200 )
4201
4202 err = True
4203
4204 if err:
4205 raise Error('One or more partitions failed to activate')
4206
4207
4208###########################
4209
4210def is_swap(dev):
4211 dev = os.path.realpath(dev)
4212 with open(PROCDIR + '/swaps', 'rb') as proc_swaps:
4213 for line in proc_swaps.readlines()[1:]:
4214 fields = line.split()
4215 if len(fields) < 3:
4216 continue
4217 swaps_dev = fields[0]
4218 if os.path.isabs(swaps_dev) and os.path.exists(swaps_dev):
4219 swaps_dev = os.path.realpath(swaps_dev)
4220 if swaps_dev == dev:
4221 return True
4222 return False
4223
4224
4225def get_oneliner(base, name):
4226 path = os.path.join(base, name)
4227 if os.path.isfile(path):
4228 with open(path, 'rb') as _file:
4229 return _bytes2str(_file.readline().rstrip())
4230 return None
4231
4232
4233def get_dev_fs(dev):
4234 if FREEBSD:
4235 fstype, _, ret = command(
4236 [
4237 'fstyp',
4238 '-u',
4239 dev,
4240 ],
4241 )
4242 if ret == 0:
4243 return fstype
4244 else:
4245 fscheck, _, _ = command(
4246 [
4247 'blkid',
4248 '-s',
4249 'TYPE',
4250 dev,
4251 ],
4252 )
4253 if 'TYPE' in fscheck:
4254 fstype = fscheck.split()[1].split('"')[1]
4255 return fstype
4256 return None
4257
4258
4259def split_dev_base_partnum(dev):
4260 if is_mpath(dev):
4261 partnum = partnum_mpath(dev)
4262 base = get_partition_base_mpath(dev)
4263 else:
4264 b = block_path(dev)
4265 partnum = open(os.path.join(b, 'partition')).read().strip()
4266 base = get_partition_base(dev)
4267 return base, partnum
4268
4269
4270def get_partition_type(part):
4271 return get_blkid_partition_info(part, 'ID_PART_ENTRY_TYPE')
4272
4273
4274def get_partition_uuid(part):
4275 return get_blkid_partition_info(part, 'ID_PART_ENTRY_UUID')
4276
4277
4278def get_blkid_partition_info(dev, what=None):
4279 out, _, _ = command(
4280 [
4281 'blkid',
4282 '-o',
4283 'udev',
4284 '-p',
4285 dev,
4286 ]
4287 )
4288 p = {}
4289 for line in out.splitlines():
4290 (key, value) = line.split('=')
4291 p[key] = value
4292 if what:
4293 return p.get(what)
4294 else:
4295 return p
4296
4297
4298def more_osd_info(path, uuid_map, desc):
4299 desc['ceph_fsid'] = get_oneliner(path, 'ceph_fsid')
4300 if desc['ceph_fsid']:
4301 desc['cluster'] = find_cluster_by_uuid(desc['ceph_fsid'])
4302 desc['whoami'] = get_oneliner(path, 'whoami')
4303 for name in Space.NAMES:
4304 uuid = get_oneliner(path, name + '_uuid')
4305 if uuid:
4306 desc[name + '_uuid'] = uuid.lower()
4307 if desc[name + '_uuid'] in uuid_map:
4308 desc[name + '_dev'] = uuid_map[desc[name + '_uuid']]
4309
4310
4311def list_dev_osd(dev, uuid_map, desc):
4312 desc['mount'] = is_mounted(dev)
4313 desc['fs_type'] = get_dev_fs(dev)
4314 desc['state'] = 'unprepared'
4315 if desc['mount']:
4316 desc['state'] = 'active'
4317 more_osd_info(desc['mount'], uuid_map, desc)
4318 elif desc['fs_type']:
4319 try:
4320 tpath = mount(dev=dev, fstype=desc['fs_type'], options='')
4321 if tpath:
4322 try:
4323 magic = get_oneliner(tpath, 'magic')
4324 if magic is not None:
4325 desc['magic'] = magic
4326 desc['state'] = 'prepared'
4327 more_osd_info(tpath, uuid_map, desc)
4328 finally:
4329 unmount(tpath)
4330 except MountError:
4331 pass
4332
4333
4334def list_dev_lockbox(dev, uuid_map, desc):
4335 desc['mount'] = is_mounted(dev)
4336 desc['fs_type'] = get_dev_fs(dev)
4337 desc['state'] = 'unprepared'
4338 if desc['mount']:
4339 desc['state'] = 'active'
4340 desc['osd_uuid'] = get_oneliner(desc['mount'], 'osd-uuid')
4341 elif desc['fs_type']:
4342 try:
4343 tpath = tempfile.mkdtemp(prefix='mnt.', dir=STATEDIR + '/tmp')
4344 args = ['mount', '-t', 'ext4', dev, tpath]
4345 LOG.debug('Mounting lockbox ' + str(" ".join(args)))
4346 command_check_call(args)
4347 magic = get_oneliner(tpath, 'magic')
4348 if magic is not None:
4349 desc['magic'] = magic
4350 desc['state'] = 'prepared'
4351 desc['osd_uuid'] = get_oneliner(tpath, 'osd-uuid')
4352 unmount(tpath)
4353 except subprocess.CalledProcessError:
4354 pass
4355 if desc.get('osd_uuid') in uuid_map:
4356 desc['lockbox_for'] = uuid_map[desc['osd_uuid']]
4357
4358
4359def list_format_lockbox_plain(dev):
4360 desc = []
4361 if dev.get('lockbox_for'):
4362 desc.append('for ' + dev['lockbox_for'])
4363 elif dev.get('osd_uuid'):
4364 desc.append('for osd ' + dev['osd_uuid'])
4365 return desc
4366
4367
4368def list_format_more_osd_info_plain(dev):
4369 desc = []
4370 if dev.get('ceph_fsid'):
4371 if dev.get('cluster'):
4372 desc.append('cluster ' + dev['cluster'])
4373 else:
4374 desc.append('unknown cluster ' + dev['ceph_fsid'])
4375 if dev.get('whoami'):
4376 desc.append('osd.%s' % dev['whoami'])
4377 for name in Space.NAMES:
4378 if dev.get(name + '_dev'):
4379 desc.append(name + ' %s' % dev[name + '_dev'])
4380 return desc
4381
4382
4383def list_format_dev_plain(dev, prefix=''):
4384 desc = []
4385 if dev['ptype'] == PTYPE['regular']['osd']['ready']:
4386 desc = (['ceph data', dev['state']] +
4387 list_format_more_osd_info_plain(dev))
4388 elif dev['ptype'] in (PTYPE['regular']['lockbox']['ready'],
4389 PTYPE['mpath']['lockbox']['ready']):
4390 desc = (['ceph lockbox', dev['state']] +
4391 list_format_lockbox_plain(dev))
4392 elif Ptype.is_dmcrypt(dev['ptype'], 'osd'):
4393 dmcrypt = dev['dmcrypt']
4394 if not dmcrypt['holders']:
4395 desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
4396 'not currently mapped']
4397 elif len(dmcrypt['holders']) == 1:
4398 holder = get_dev_path(dmcrypt['holders'][0])
4399 desc = ['ceph data (dmcrypt %s %s)' %
4400 (dmcrypt['type'], holder)]
4401 desc += list_format_more_osd_info_plain(dev)
4402 else:
4403 desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
4404 'holders: ' + ','.join(dmcrypt['holders'])]
4405 elif Ptype.is_regular_space(dev['ptype']):
4406 name = Ptype.space_ptype_to_name(dev['ptype'])
4407 desc.append('ceph ' + name)
4408 if dev.get(name + '_for'):
4409 desc.append('for %s' % dev[name + '_for'])
4410 elif Ptype.is_dmcrypt_space(dev['ptype']):
4411 name = Ptype.space_ptype_to_name(dev['ptype'])
4412 dmcrypt = dev['dmcrypt']
4413 if dmcrypt['holders'] and len(dmcrypt['holders']) == 1:
4414 holder = get_dev_path(dmcrypt['holders'][0])
4415 desc = ['ceph ' + name + ' (dmcrypt %s %s)' %
4416 (dmcrypt['type'], holder)]
4417 else:
4418 desc = ['ceph ' + name + ' (dmcrypt %s)' % dmcrypt['type']]
4419 if dev.get(name + '_for'):
4420 desc.append('for %s' % dev[name + '_for'])
4421 else:
4422 desc.append(dev['type'])
4423 if dev.get('fs_type'):
4424 desc.append(dev['fs_type'])
4425 elif dev.get('ptype'):
4426 desc.append(dev['ptype'])
4427 if dev.get('mount'):
4428 desc.append('mounted on %s' % dev['mount'])
4429 return '%s%s %s' % (prefix, dev['path'], ', '.join(desc))
4430
4431
4432def list_format_plain(devices):
4433 lines = []
4434 for device in devices:
4435 if device.get('partitions'):
4436 lines.append('%s :' % device['path'])
4437 for p in sorted(device['partitions'], key=lambda x: x['path']):
4438 lines.append(list_format_dev_plain(dev=p,
4439 prefix=' '))
4440 else:
4441 lines.append(list_format_dev_plain(dev=device,
4442 prefix=''))
4443 return "\n".join(lines)
4444
4445
4446def list_dev(dev, uuid_map, space_map):
4447 info = {
4448 'path': dev,
4449 'dmcrypt': {},
4450 }
4451
4452 info['is_partition'] = is_partition(dev)
4453 if info['is_partition']:
4454 ptype = get_partition_type(dev)
4455 info['uuid'] = get_partition_uuid(dev)
4456 else:
4457 ptype = 'unknown'
4458 info['ptype'] = ptype
4459 LOG.info("list_dev(dev = " + dev + ", ptype = " + str(ptype) + ")")
4460 if ptype in (PTYPE['regular']['osd']['ready'],
4461 PTYPE['mpath']['osd']['ready']):
4462 info['type'] = 'data'
4463 if ptype == PTYPE['mpath']['osd']['ready']:
4464 info['multipath'] = True
4465 list_dev_osd(dev, uuid_map, info)
4466 elif ptype in (PTYPE['regular']['lockbox']['ready'],
4467 PTYPE['mpath']['lockbox']['ready']):
4468 info['type'] = 'lockbox'
4469 if ptype == PTYPE['mpath']['osd']['ready']:
4470 info['multipath'] = True
4471 list_dev_lockbox(dev, uuid_map, info)
4472 elif ptype == PTYPE['plain']['osd']['ready']:
4473 holders = is_held(dev)
4474 info['type'] = 'data'
4475 info['dmcrypt']['holders'] = holders
4476 info['dmcrypt']['type'] = 'plain'
4477 if len(holders) == 1:
4478 list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
4479 elif ptype == PTYPE['luks']['osd']['ready']:
4480 holders = is_held(dev)
4481 info['type'] = 'data'
4482 info['dmcrypt']['holders'] = holders
4483 info['dmcrypt']['type'] = 'LUKS'
4484 if len(holders) == 1:
4485 list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
4486 elif Ptype.is_regular_space(ptype) or Ptype.is_mpath_space(ptype):
4487 name = Ptype.space_ptype_to_name(ptype)
4488 info['type'] = name
4489 if ptype == PTYPE['mpath'][name]['ready']:
4490 info['multipath'] = True
4491 if info.get('uuid') in space_map:
4492 info[name + '_for'] = space_map[info['uuid']]
4493 elif Ptype.is_plain_space(ptype):
4494 name = Ptype.space_ptype_to_name(ptype)
4495 holders = is_held(dev)
4496 info['type'] = name
4497 info['dmcrypt']['type'] = 'plain'
4498 info['dmcrypt']['holders'] = holders
4499 if info.get('uuid') in space_map:
4500 info[name + '_for'] = space_map[info['uuid']]
4501 elif Ptype.is_luks_space(ptype):
4502 name = Ptype.space_ptype_to_name(ptype)
4503 holders = is_held(dev)
4504 info['type'] = name
4505 info['dmcrypt']['type'] = 'LUKS'
4506 info['dmcrypt']['holders'] = holders
4507 if info.get('uuid') in space_map:
4508 info[name + '_for'] = space_map[info['uuid']]
4509 else:
4510 path = is_mounted(dev)
4511 fs_type = get_dev_fs(dev)
4512 if is_swap(dev):
4513 info['type'] = 'swap'
4514 else:
4515 info['type'] = 'other'
4516 if fs_type:
4517 info['fs_type'] = fs_type
4518 if path:
4519 info['mount'] = path
4520
4521 return info
4522
4523
4524def list_devices():
4525 partmap = list_all_partitions()
4526
4527 uuid_map = {}
4528 space_map = {}
4529 for base, parts in sorted(partmap.items()):
4530 for p in parts:
4531 dev = get_dev_path(p)
4532 part_uuid = get_partition_uuid(dev)
4533 if part_uuid:
4534 uuid_map[part_uuid] = dev
4535 ptype = get_partition_type(dev)
4536 LOG.debug("main_list: " + dev +
4537 " ptype = " + str(ptype) +
4538 " uuid = " + str(part_uuid))
4539 if ptype in Ptype.get_ready_by_name('osd'):
4540 if Ptype.is_dmcrypt(ptype, 'osd'):
4541 holders = is_held(dev)
4542 if len(holders) != 1:
4543 continue
4544 dev_to_mount = get_dev_path(holders[0])
4545 else:
4546 dev_to_mount = dev
4547
4548 fs_type = get_dev_fs(dev_to_mount)
4549 if fs_type is not None:
4550 mount_options = get_mount_options(cluster='ceph',
4551 fs_type=fs_type)
4552 try:
4553 tpath = mount(dev=dev_to_mount,
4554 fstype=fs_type, options=mount_options)
4555 try:
4556 for name in Space.NAMES:
4557 space_uuid = get_oneliner(tpath,
4558 name + '_uuid')
4559 if space_uuid:
4560 space_map[space_uuid.lower()] = dev
4561 finally:
4562 unmount(tpath)
4563 except MountError:
4564 pass
4565
4566 LOG.debug("main_list: " + str(partmap) + ", uuid_map = " +
4567 str(uuid_map) + ", space_map = " + str(space_map))
4568
4569 devices = []
4570 for base, parts in sorted(partmap.items()):
4571 if parts:
4572 disk = {'path': get_dev_path(base)}
4573 partitions = []
4574 for p in sorted(parts):
4575 partitions.append(list_dev(get_dev_path(p),
4576 uuid_map,
4577 space_map))
4578 disk['partitions'] = partitions
4579 devices.append(disk)
4580 else:
4581 device = list_dev(get_dev_path(base), uuid_map, space_map)
4582 device['path'] = get_dev_path(base)
4583 devices.append(device)
4584 LOG.debug("list_devices: " + str(devices))
4585 return devices
4586
4587
4588def list_zfs():
4589 try:
4590 out, err, ret = command(
4591 [
4592 'zfs',
4593 'list',
4594 '-o', 'name,mountpoint'
4595 ]
4596 )
4597 except subprocess.CalledProcessError as e:
4598 LOG.info('zfs list -o name,mountpoint '
4599 'fails.\n (Error: %s)' % e)
4600 raise
4601 lines = out.splitlines()
4602 for line in lines[1:]:
4603 vdevline = line.split()
4604 if os.path.exists(os.path.join(vdevline[1], 'active')):
4605 elems = os.path.split(vdevline[1])
4606 print(vdevline[0], "ceph data, active, cluster ceph,", elems[1],
4607 "mounted on:", vdevline[1])
4608 else:
4609 print(vdevline[0] + " other, zfs, mounted on: " + vdevline[1])
4610
4611
4612def main_list(args):
4613 with activate_lock:
4614 if FREEBSD:
4615 main_list_freebsd(args)
4616 else:
4617 main_list_protected(args)
4618
4619
4620def main_list_protected(args):
4621 devices = list_devices()
4622 if args.path:
4623 paths = []
4624 for path in args.path:
4625 if os.path.exists(path):
4626 paths.append(os.path.realpath(path))
4627 else:
4628 paths.append(path)
4629 selected_devices = []
4630 for device in devices:
4631 for path in paths:
4632 if re.search(path + '$', device['path']):
4633 selected_devices.append(device)
4634 else:
4635 selected_devices = devices
4636 if args.format == 'json':
4637 print(json.dumps(selected_devices))
4638 else:
4639 output = list_format_plain(selected_devices)
4640 if output:
4641 print(output)
4642
4643
4644def main_list_freebsd(args):
4645 # Currently accomodate only ZFS Filestore partitions
4646 # return a list of VDEVs and mountpoints
4647 # > zfs list
4648 # NAME USED AVAIL REFER MOUNTPOINT
4649 # osd0 1.01G 1.32T 1.01G /var/lib/ceph/osd/osd.0
4650 # osd1 1.01G 1.32T 1.01G /var/lib/ceph/osd/osd.1
4651 list_zfs()
4652
4653
4654###########################
4655#
4656# Mark devices that we want to suppress activates on with a
4657# file like
4658#
4659# /var/lib/ceph/tmp/suppress-activate.sdb
4660#
4661# where the last bit is the sanitized device name (/dev/X without the
4662# /dev/ prefix) and the is_suppress() check matches a prefix. That
4663# means suppressing sdb will stop activate on sdb1, sdb2, etc.
4664#
4665
4666def is_suppressed(path):
4667 disk = os.path.realpath(path)
4668 try:
4669 if (not disk.startswith('/dev/') or
224ce89b 4670 not ldev_is_diskdevice(disk)):
7c673cae
FG
4671 return False
4672 base = get_dev_name(disk)
4673 while len(base):
4674 if os.path.exists(SUPPRESS_PREFIX + base): # noqa
4675 return True
4676 base = base[:-1]
4677 except:
4678 return False
4679
4680
4681def set_suppress(path):
4682 disk = os.path.realpath(path)
4683 if not os.path.exists(disk):
4684 raise Error('does not exist', path)
c07f9fc5 4685 if not ldev_is_diskdevice(path):
7c673cae
FG
4686 raise Error('not a block device', path)
4687 base = get_dev_name(disk)
4688
4689 with open(SUPPRESS_PREFIX + base, 'w') as f: # noqa
4690 pass
4691 LOG.info('set suppress flag on %s', base)
4692
4693
4694def unset_suppress(path):
4695 disk = os.path.realpath(path)
4696 if not os.path.exists(disk):
4697 raise Error('does not exist', path)
224ce89b 4698 if not ldev_is_diskdevice(path):
7c673cae
FG
4699 raise Error('not a block device', path)
4700 assert disk.startswith('/dev/')
4701 base = get_dev_name(disk)
4702
4703 fn = SUPPRESS_PREFIX + base # noqa
4704 if not os.path.exists(fn):
4705 raise Error('not marked as suppressed', path)
4706
4707 try:
4708 os.unlink(fn)
4709 LOG.info('unset suppress flag on %s', base)
4710 except OSError as e:
4711 raise Error('failed to unsuppress', e)
4712
4713
4714def main_suppress(args):
4715 set_suppress(args.path)
4716
4717
4718def main_unsuppress(args):
4719 unset_suppress(args.path)
4720
4721
4722def main_zap(args):
4723 for dev in args.dev:
4724 zap(dev)
4725
4726
4727def main_trigger(args):
4728 LOG.debug("main_trigger: " + str(args))
4729 if is_systemd() and not args.sync:
4730 # http://www.freedesktop.org/software/systemd/man/systemd-escape.html
4731 escaped_dev = args.dev[1:].replace('-', '\\x2d')
4732 service = 'ceph-disk@{dev}.service'.format(dev=escaped_dev)
4733 LOG.info('systemd detected, triggering %s' % service)
4734 command(
4735 [
4736 'systemctl',
4737 '--no-block',
4738 'restart',
4739 service,
4740 ]
4741 )
4742 return
4743 if is_upstart() and not args.sync:
4744 LOG.info('upstart detected, triggering ceph-disk task')
4745 command(
4746 [
4747 'initctl',
4748 'emit',
4749 'ceph-disk',
4750 'dev={dev}'.format(dev=args.dev),
4751 'pid={pid}'.format(pid=os.getpid()),
4752 ]
4753 )
4754 return
4755
4756 if get_ceph_user() == 'ceph':
4757 command_check_call(['chown', 'ceph:ceph', args.dev])
4758 parttype = get_partition_type(args.dev)
4759 partid = get_partition_uuid(args.dev)
4760
4761 LOG.info('trigger {dev} parttype {parttype} uuid {partid}'.format(
4762 dev=args.dev,
4763 parttype=parttype,
4764 partid=partid,
4765 ))
4766
4767 ceph_disk = ['ceph-disk']
4768 if args.verbose:
4769 ceph_disk.append('--verbose')
4770
4771 if parttype in (PTYPE['regular']['osd']['ready'],
4772 PTYPE['mpath']['osd']['ready']):
4773 out, err, ret = command(
4774 ceph_disk +
4775 [
4776 'activate',
4777 args.dev,
4778 ]
4779 )
4780
4781 elif parttype in (PTYPE['plain']['osd']['ready'],
4782 PTYPE['luks']['osd']['ready']):
4783 out, err, ret = command(
4784 ceph_disk +
4785 [
4786 'activate',
4787 '--dmcrypt',
4788 args.dev,
4789 ]
4790 )
4791
4792 elif parttype in (PTYPE['regular']['journal']['ready'],
4793 PTYPE['mpath']['journal']['ready']):
4794 out, err, ret = command(
4795 ceph_disk +
4796 [
4797 'activate-journal',
4798 args.dev,
4799 ]
4800 )
4801
4802 elif parttype in (PTYPE['plain']['journal']['ready'],
4803 PTYPE['luks']['journal']['ready']):
4804 out, err, ret = command(
4805 ceph_disk +
4806 [
4807 'activate-journal',
4808 '--dmcrypt',
4809 args.dev,
4810 ]
4811 )
4812
4813 elif parttype in (PTYPE['regular']['block']['ready'],
4814 PTYPE['regular']['block.db']['ready'],
4815 PTYPE['regular']['block.wal']['ready'],
4816 PTYPE['mpath']['block']['ready'],
4817 PTYPE['mpath']['block.db']['ready'],
4818 PTYPE['mpath']['block.wal']['ready']):
4819 out, err, ret = command(
4820 ceph_disk +
4821 [
4822 'activate-block',
4823 args.dev,
4824 ]
4825 )
4826
4827 elif parttype in (PTYPE['plain']['block']['ready'],
4828 PTYPE['plain']['block.db']['ready'],
4829 PTYPE['plain']['block.wal']['ready'],
4830 PTYPE['luks']['block']['ready'],
4831 PTYPE['luks']['block.db']['ready'],
4832 PTYPE['luks']['block.wal']['ready']):
4833 out, err, ret = command(
4834 ceph_disk +
4835 [
4836 'activate-block',
4837 '--dmcrypt',
4838 args.dev,
4839 ]
4840 )
4841
4842 elif parttype in (PTYPE['regular']['lockbox']['ready'],
4843 PTYPE['mpath']['lockbox']['ready']):
4844 out, err, ret = command(
4845 ceph_disk +
4846 [
4847 'activate-lockbox',
4848 args.dev,
4849 ]
4850 )
4851
4852 else:
4853 raise Error('unrecognized partition type %s' % parttype)
4854
4855 if ret != 0:
4856 LOG.info(out)
4857 LOG.error(err)
4858 raise Error('return code ' + str(ret))
4859 else:
4860 LOG.debug(out)
4861 LOG.debug(err)
4862
4863
4864def main_fix(args):
4865 # A hash table containing 'path': ('uid', 'gid', blocking, recursive)
4866 fix_table = [
c07f9fc5
FG
4867 ('/usr/bin/ceph-mon', 'root', ROOTGROUP, True, False),
4868 ('/usr/bin/ceph-mds', 'root', ROOTGROUP, True, False),
4869 ('/usr/bin/ceph-osd', 'root', ROOTGROUP, True, False),
4870 ('/usr/bin/radosgw', 'root', ROOTGROUP, True, False),
4871 ('/etc/ceph', 'root', ROOTGROUP, True, True),
7c673cae
FG
4872 ('/var/run/ceph', 'ceph', 'ceph', True, True),
4873 ('/var/log/ceph', 'ceph', 'ceph', True, True),
31f18b77 4874 ('/var/log/radosgw', 'ceph', 'ceph', True, True),
7c673cae
FG
4875 ('/var/lib/ceph', 'ceph', 'ceph', True, False),
4876 ]
4877
4878 # Relabel/chown all files under /var/lib/ceph/ recursively (except for osd)
4879 for directory in glob.glob('/var/lib/ceph/*'):
4880 if directory == '/var/lib/ceph/osd':
4881 fix_table.append((directory, 'ceph', 'ceph', True, False))
4882 else:
4883 fix_table.append((directory, 'ceph', 'ceph', True, True))
4884
4885 # Relabel/chown the osds recursively and in parallel
4886 for directory in glob.glob('/var/lib/ceph/osd/*'):
4887 fix_table.append((directory, 'ceph', 'ceph', False, True))
4888
4889 LOG.debug("fix_table: " + str(fix_table))
4890
4891 # The lists of background processes
4892 all_processes = []
4893 permissions_processes = []
4894 selinux_processes = []
4895
4896 # Preliminary checks
4897 if args.selinux or args.all:
4898 out, err, ret = command(['selinuxenabled'])
4899 if ret:
4900 LOG.error('SELinux is not enabled, please enable it, first.')
4901 raise Error('no SELinux')
4902
4903 for daemon in ['ceph-mon', 'ceph-osd', 'ceph-mds', 'radosgw', 'ceph-mgr']:
4904 out, err, ret = command(['pgrep', daemon])
4905 if ret == 0:
4906 LOG.error(daemon + ' is running, please stop it, first')
4907 raise Error(daemon + ' running')
4908
4909 # Relabel the basic system data without the ceph files
4910 if args.system or args.all:
4911 c = ['restorecon', '-R', '/']
4912 for directory, _, _, _, _ in fix_table:
4913 # Skip /var/lib/ceph subdirectories
4914 if directory.startswith('/var/lib/ceph/'):
4915 continue
4916 c.append('-e')
4917 c.append(directory)
4918
4919 out, err, ret = command(c)
4920
4921 if ret:
4922 LOG.error("Failed to restore labels of the underlying system")
4923 LOG.error(err)
4924 raise Error("basic restore failed")
4925
4926 # Use find to relabel + chown ~simultaenously
4927 if args.all:
4928 for directory, uid, gid, blocking, recursive in fix_table:
31f18b77
FG
4929 # Skip directories/files that are not installed
4930 if not os.access(directory, os.F_OK):
4931 continue
4932
7c673cae
FG
4933 c = [
4934 'find',
4935 directory,
4936 '-exec',
4937 'chown',
4938 ':'.join((uid, gid)),
4939 '{}',
4940 '+',
4941 '-exec',
4942 'restorecon',
4943 '{}',
4944 '+',
4945 ]
4946
4947 # Just pass -maxdepth 0 for non-recursive calls
4948 if not recursive:
4949 c += ['-maxdepth', '0']
4950
4951 if blocking:
4952 out, err, ret = command(c)
4953
4954 if ret:
4955 LOG.error("Failed to fix " + directory)
4956 LOG.error(err)
4957 raise Error(directory + " fix failed")
4958 else:
4959 all_processes.append(command_init(c))
4960
4961 LOG.debug("all_processes: " + str(all_processes))
4962 for process in all_processes:
4963 out, err, ret = command_wait(process)
4964 if ret:
4965 LOG.error("A background find process failed")
4966 LOG.error(err)
4967 raise Error("background failed")
4968
4969 # Fix permissions
4970 if args.permissions:
4971 for directory, uid, gid, blocking, recursive in fix_table:
31f18b77
FG
4972 # Skip directories/files that are not installed
4973 if not os.access(directory, os.F_OK):
4974 continue
4975
7c673cae
FG
4976 if recursive:
4977 c = [
4978 'chown',
4979 '-R',
4980 ':'.join((uid, gid)),
4981 directory
4982 ]
4983 else:
4984 c = [
4985 'chown',
4986 ':'.join((uid, gid)),
4987 directory
4988 ]
4989
4990 if blocking:
4991 out, err, ret = command(c)
4992
4993 if ret:
4994 LOG.error("Failed to chown " + directory)
4995 LOG.error(err)
4996 raise Error(directory + " chown failed")
4997 else:
4998 permissions_processes.append(command_init(c))
4999
5000 LOG.debug("permissions_processes: " + str(permissions_processes))
5001 for process in permissions_processes:
5002 out, err, ret = command_wait(process)
5003 if ret:
5004 LOG.error("A background permissions process failed")
5005 LOG.error(err)
5006 raise Error("background failed")
5007
5008 # Fix SELinux labels
5009 if args.selinux:
5010 for directory, uid, gid, blocking, recursive in fix_table:
31f18b77
FG
5011 # Skip directories/files that are not installed
5012 if not os.access(directory, os.F_OK):
5013 continue
5014
7c673cae
FG
5015 if recursive:
5016 c = [
5017 'restorecon',
5018 '-R',
5019 directory
5020 ]
5021 else:
5022 c = [
5023 'restorecon',
5024 directory
5025 ]
5026
5027 if blocking:
5028 out, err, ret = command(c)
5029
5030 if ret:
5031 LOG.error("Failed to restore labels for " + directory)
5032 LOG.error(err)
5033 raise Error(directory + " relabel failed")
5034 else:
5035 selinux_processes.append(command_init(c))
5036
5037 LOG.debug("selinux_processes: " + str(selinux_processes))
5038 for process in selinux_processes:
5039 out, err, ret = command_wait(process)
5040 if ret:
5041 LOG.error("A background selinux process failed")
5042 LOG.error(err)
5043 raise Error("background failed")
5044
5045 LOG.info(
5046 "The ceph files has been fixed, please reboot "
5047 "the system for the changes to take effect."
5048 )
5049
5050
5051def setup_statedir(dir):
5052 # XXX The following use of globals makes linting
5053 # really hard. Global state in Python is iffy and
5054 # should be avoided.
5055 global STATEDIR
5056 STATEDIR = dir
5057
5058 if not os.path.exists(STATEDIR):
5059 os.mkdir(STATEDIR)
5060 if not os.path.exists(STATEDIR + "/tmp"):
5061 os.mkdir(STATEDIR + "/tmp")
5062
5063 global prepare_lock
5064 prepare_lock = FileLock(STATEDIR + '/tmp/ceph-disk.prepare.lock')
5065
5066 global activate_lock
5067 activate_lock = FileLock(STATEDIR + '/tmp/ceph-disk.activate.lock')
5068
5069 global SUPPRESS_PREFIX
5070 SUPPRESS_PREFIX = STATEDIR + '/tmp/suppress-activate.'
5071
5072
5073def setup_sysconfdir(dir):
5074 global SYSCONFDIR
5075 SYSCONFDIR = dir
5076
5077
5078def parse_args(argv):
5079 parser = argparse.ArgumentParser(
5080 'ceph-disk',
5081 )
5082 parser.add_argument(
5083 '-v', '--verbose',
5084 action='store_true', default=None,
5085 help='be more verbose',
5086 )
5087 parser.add_argument(
5088 '--log-stdout',
5089 action='store_true', default=None,
5090 help='log to stdout',
5091 )
5092 parser.add_argument(
5093 '--prepend-to-path',
5094 metavar='PATH',
5095 default='/usr/bin',
5096 help=('prepend PATH to $PATH for backward compatibility '
5097 '(default /usr/bin)'),
5098 )
5099 parser.add_argument(
5100 '--statedir',
5101 metavar='PATH',
5102 default='/var/lib/ceph',
5103 help=('directory in which ceph state is preserved '
5104 '(default /var/lib/ceph)'),
5105 )
5106 parser.add_argument(
5107 '--sysconfdir',
5108 metavar='PATH',
5109 default='/etc/ceph',
5110 help=('directory in which ceph configuration files are found '
5111 '(default /etc/ceph)'),
5112 )
5113 parser.add_argument(
5114 '--setuser',
5115 metavar='USER',
5116 default=None,
5117 help='use the given user for subprocesses, rather than ceph or root'
5118 )
5119 parser.add_argument(
5120 '--setgroup',
5121 metavar='GROUP',
5122 default=None,
5123 help='use the given group for subprocesses, rather than ceph or root'
5124 )
5125 parser.set_defaults(
5126 # we want to hold on to this, for later
5127 prog=parser.prog,
5128 )
5129
5130 subparsers = parser.add_subparsers(
5131 title='subcommands',
5132 description='valid subcommands',
5133 help='sub-command help',
5134 )
5135
5136 Prepare.set_subparser(subparsers)
5137 make_activate_parser(subparsers)
5138 make_activate_lockbox_parser(subparsers)
5139 make_activate_block_parser(subparsers)
5140 make_activate_journal_parser(subparsers)
5141 make_activate_all_parser(subparsers)
5142 make_list_parser(subparsers)
5143 make_suppress_parser(subparsers)
5144 make_deactivate_parser(subparsers)
5145 make_destroy_parser(subparsers)
5146 make_zap_parser(subparsers)
5147 make_trigger_parser(subparsers)
5148 make_fix_parser(subparsers)
5149
5150 args = parser.parse_args(argv)
5151 return args
5152
5153
5154def make_fix_parser(subparsers):
5155 fix_parser = subparsers.add_parser(
5156 'fix',
5157 formatter_class=argparse.RawDescriptionHelpFormatter,
5158 description=textwrap.fill(textwrap.dedent("""\
5159 """)),
5160 help='fix SELinux labels and/or file permissions')
5161
5162 fix_parser.add_argument(
5163 '--system',
5164 action='store_true',
5165 default=False,
5166 help='fix SELinux labels for the non-ceph system data'
5167 )
5168 fix_parser.add_argument(
5169 '--selinux',
5170 action='store_true',
5171 default=False,
5172 help='fix SELinux labels for ceph data'
5173 )
5174 fix_parser.add_argument(
5175 '--permissions',
5176 action='store_true',
5177 default=False,
5178 help='fix file permissions for ceph data'
5179 )
5180 fix_parser.add_argument(
5181 '--all',
5182 action='store_true',
5183 default=False,
5184 help='perform all the fix-related operations'
5185 )
5186 fix_parser.set_defaults(
5187 func=main_fix,
5188 )
5189 return fix_parser
5190
5191
5192def make_trigger_parser(subparsers):
5193 trigger_parser = subparsers.add_parser(
5194 'trigger',
5195 formatter_class=argparse.RawDescriptionHelpFormatter,
5196 description=textwrap.fill(textwrap.dedent("""\
5197 The partition given in argument is activated. The type of the
5198 partition (data, lockbox, journal etc.) is detected by its
5199 type. If the init system is upstart or systemd, the activation is
5200 delegated to it and runs asynchronously, which
5201 helps reduce the execution time of udev actions.
5202 """)),
5203 help='activate any device (called by udev)')
5204 trigger_parser.add_argument(
5205 'dev',
5206 help=('device'),
5207 )
5208 trigger_parser.add_argument(
5209 '--cluster',
5210 metavar='NAME',
5211 default='ceph',
5212 help='cluster name to assign this disk to',
5213 )
5214 trigger_parser.add_argument(
5215 '--dmcrypt',
5216 action='store_true', default=None,
5217 help='map devices with dm-crypt',
5218 )
5219 trigger_parser.add_argument(
5220 '--dmcrypt-key-dir',
5221 metavar='KEYDIR',
5222 default='/etc/ceph/dmcrypt-keys',
5223 help='directory where dm-crypt keys are stored',
5224 )
5225 trigger_parser.add_argument(
5226 '--sync',
5227 action='store_true', default=None,
5228 help='do operation synchronously; do not trigger systemd',
5229 )
5230 trigger_parser.set_defaults(
5231 func=main_trigger,
5232 )
5233 return trigger_parser
5234
5235
5236def make_activate_parser(subparsers):
5237 activate_parser = subparsers.add_parser(
5238 'activate',
5239 formatter_class=argparse.RawDescriptionHelpFormatter,
5240 description=textwrap.fill(textwrap.dedent("""\
5241 Activate the OSD found at PATH (can be a directory
5242 or a device partition, possibly encrypted). When
5243 activated for the first time, a unique OSD id is obtained
5244 from the cluster. If PATH is a directory, a symbolic
5245 link is added in {statedir}/osd/ceph-$id. If PATH is
5246 a partition, it is mounted on {statedir}/osd/ceph-$id.
5247 Finally, the OSD daemon is run.
5248
5249 If the OSD depends on auxiliary partitions (journal, block, ...)
5250 they need to be available otherwise activation will fail. It
5251 may happen if a journal is encrypted and cryptsetup was not
5252 run yet.
5253 """.format(statedir=STATEDIR))),
5254 help='Activate a Ceph OSD')
5255 activate_parser.add_argument(
5256 '--mount',
5257 action='store_true', default=None,
5258 help='mount a block device [deprecated, ignored]',
5259 )
5260 activate_parser.add_argument(
5261 '--activate-key',
5262 metavar='PATH',
5263 help='bootstrap-osd keyring path template (%(default)s)',
5264 dest='activate_key_template',
5265 )
5266 activate_parser.add_argument(
5267 '--mark-init',
5268 metavar='INITSYSTEM',
5269 help='init system to manage this dir',
5270 default='auto',
5271 choices=INIT_SYSTEMS,
5272 )
5273 activate_parser.add_argument(
5274 '--no-start-daemon',
5275 action='store_true', default=None,
5276 help='do not start the daemon',
5277 )
5278 activate_parser.add_argument(
5279 'path',
5280 metavar='PATH',
5281 help='path to block device or directory',
5282 )
5283 activate_parser.add_argument(
5284 '--dmcrypt',
5285 action='store_true', default=None,
5286 help='map DATA and/or JOURNAL devices with dm-crypt',
5287 )
5288 activate_parser.add_argument(
5289 '--dmcrypt-key-dir',
5290 metavar='KEYDIR',
5291 default='/etc/ceph/dmcrypt-keys',
5292 help='directory where dm-crypt keys are stored',
5293 )
5294 activate_parser.add_argument(
5295 '--reactivate',
5296 action='store_true', default=False,
5297 help='activate the deactived OSD',
5298 )
5299 activate_parser.set_defaults(
5300 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5301 func=main_activate,
5302 )
5303 return activate_parser
5304
5305
5306def make_activate_lockbox_parser(subparsers):
5307 parser = subparsers.add_parser(
5308 'activate-lockbox',
5309 formatter_class=argparse.RawDescriptionHelpFormatter,
5310 description=textwrap.fill(textwrap.dedent("""\
5311 Mount the partition found at PATH on {statedir}/osd-lockbox/$uuid
5312 where $uuid uniquely identifies the OSD that needs this lockbox
5313 to retrieve keys from the monitor and unlock its partitions.
5314
5315 If the OSD has one or more auxiliary devices (journal, block, ...)
5316 symbolic links are created at {statedir}/osd-lockbox/$other_uuid
5317 and point to {statedir}/osd-lockbox/$uuid. This will, for instance,
5318 allow a journal encrypted in a partition identified by $other_uuid to
5319 fetch the keys it needs from the monitor.
5320
5321 Finally the OSD is activated, as it would be with ceph-disk activate.
5322 """.format(statedir=STATEDIR))),
5323 help='Activate a Ceph lockbox')
5324 parser.add_argument(
5325 '--activate-key',
5326 help='bootstrap-osd keyring path template (%(default)s)',
5327 dest='activate_key_template',
5328 )
5329 parser.add_argument(
5330 '--dmcrypt-key-dir',
5331 metavar='KEYDIR',
5332 default='/etc/ceph/dmcrypt-keys',
5333 help='directory where dm-crypt keys are stored',
5334 )
5335 parser.add_argument(
5336 'path',
5337 metavar='PATH',
5338 help='path to block device',
5339 )
5340 parser.set_defaults(
5341 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5342 func=main_activate_lockbox,
5343 )
5344 return parser
5345
5346
5347def make_activate_block_parser(subparsers):
5348 return make_activate_space_parser('block', subparsers)
5349
5350
5351def make_activate_journal_parser(subparsers):
5352 return make_activate_space_parser('journal', subparsers)
5353
5354
5355def make_activate_space_parser(name, subparsers):
5356 activate_space_parser = subparsers.add_parser(
5357 'activate-%s' % name,
5358 formatter_class=argparse.RawDescriptionHelpFormatter,
5359 description=textwrap.fill(textwrap.dedent("""\
5360 Activating a {name} partition is only meaningfull
5361 if it is encrypted and it will map it using
5362 cryptsetup.
5363
5364 Finally the corresponding OSD is activated,
5365 as it would be with ceph-disk activate.
5366 """.format(name=name))),
5367 help='Activate an OSD via its %s device' % name)
5368 activate_space_parser.add_argument(
5369 'dev',
5370 metavar='DEV',
5371 help='path to %s block device' % name,
5372 )
5373 activate_space_parser.add_argument(
5374 '--activate-key',
5375 metavar='PATH',
5376 help='bootstrap-osd keyring path template (%(default)s)',
5377 dest='activate_key_template',
5378 )
5379 activate_space_parser.add_argument(
5380 '--mark-init',
5381 metavar='INITSYSTEM',
5382 help='init system to manage this dir',
5383 default='auto',
5384 choices=INIT_SYSTEMS,
5385 )
5386 activate_space_parser.add_argument(
5387 '--dmcrypt',
5388 action='store_true', default=None,
5389 help=('map data and/or auxiliariy (journal, etc.) '
5390 'devices with dm-crypt'),
5391 )
5392 activate_space_parser.add_argument(
5393 '--dmcrypt-key-dir',
5394 metavar='KEYDIR',
5395 default='/etc/ceph/dmcrypt-keys',
5396 help='directory where dm-crypt keys are stored',
5397 )
5398 activate_space_parser.add_argument(
5399 '--reactivate',
5400 action='store_true', default=False,
5401 help='activate the deactived OSD',
5402 )
5403 activate_space_parser.set_defaults(
5404 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5405 func=lambda args: main_activate_space(name, args),
5406 )
5407 return activate_space_parser
5408
5409
5410def make_activate_all_parser(subparsers):
5411 activate_all_parser = subparsers.add_parser(
5412 'activate-all',
5413 formatter_class=argparse.RawDescriptionHelpFormatter,
5414 description=textwrap.fill(textwrap.dedent("""\
5415 Activate all OSD partitions found in /dev/disk/by-parttypeuuid.
5416 The partitions containing auxiliary devices (journal, block, ...)
5417 are not activated.
5418 """)),
5419 help='Activate all tagged OSD partitions')
5420 activate_all_parser.add_argument(
5421 '--activate-key',
5422 metavar='PATH',
5423 help='bootstrap-osd keyring path template (%(default)s)',
5424 dest='activate_key_template',
5425 )
5426 activate_all_parser.add_argument(
5427 '--mark-init',
5428 metavar='INITSYSTEM',
5429 help='init system to manage this dir',
5430 default='auto',
5431 choices=INIT_SYSTEMS,
5432 )
5433 activate_all_parser.set_defaults(
5434 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5435 func=main_activate_all,
5436 )
5437 return activate_all_parser
5438
5439
5440def make_list_parser(subparsers):
5441 list_parser = subparsers.add_parser(
5442 'list',
5443 formatter_class=argparse.RawDescriptionHelpFormatter,
5444 description=textwrap.fill(textwrap.dedent("""\
5445 Display all partitions on the system and their
5446 associated Ceph information, if any.
5447 """)),
5448 help='List disks, partitions, and Ceph OSDs')
5449 list_parser.add_argument(
5450 '--format',
5451 help='output format',
5452 default='plain',
5453 choices=['json', 'plain'],
5454 )
5455 list_parser.add_argument(
5456 'path',
5457 metavar='PATH',
5458 nargs='*',
5459 help='path to block devices, relative to /sys/block',
5460 )
5461 list_parser.set_defaults(
5462 func=main_list,
5463 )
5464 return list_parser
5465
5466
5467def make_suppress_parser(subparsers):
5468 suppress_parser = subparsers.add_parser(
5469 'suppress-activate',
5470 formatter_class=argparse.RawDescriptionHelpFormatter,
5471 description=textwrap.fill(textwrap.dedent("""\
5472 Add a prefix to the list of suppressed device names
5473 so that they are ignored by all activate* subcommands.
5474 """)),
5475 help='Suppress activate on a device (prefix)')
5476 suppress_parser.add_argument(
5477 'path',
5478 metavar='PATH',
5479 help='path to block device or directory',
5480 )
5481 suppress_parser.set_defaults(
5482 func=main_suppress,
5483 )
5484
5485 unsuppress_parser = subparsers.add_parser(
5486 'unsuppress-activate',
5487 formatter_class=argparse.RawDescriptionHelpFormatter,
5488 description=textwrap.fill(textwrap.dedent("""\
5489 Remove a prefix from the list of suppressed device names
5490 so that they are no longer ignored by all
5491 activate* subcommands.
5492 """)),
5493 help='Stop suppressing activate on a device (prefix)')
5494 unsuppress_parser.add_argument(
5495 'path',
5496 metavar='PATH',
5497 help='path to block device or directory',
5498 )
5499 unsuppress_parser.set_defaults(
5500 func=main_unsuppress,
5501 )
5502 return suppress_parser
5503
5504
5505def make_deactivate_parser(subparsers):
5506 deactivate_parser = subparsers.add_parser(
5507 'deactivate',
5508 formatter_class=argparse.RawDescriptionHelpFormatter,
5509 description=textwrap.fill(textwrap.dedent("""\
5510 Deactivate the OSD located at PATH. It stops the OSD daemon
5511 and optionally marks it out (with --mark-out). The content of
5512 the OSD is left untouched.
5513
5514 By default, the, ready, active, INIT-specific files are
5515 removed (so that it is not automatically re-activated by the
5516 udev rules or ceph-disk trigger) and the file deactive is
5517 created to remember the OSD is deactivated.
5518
5519 If the --once option is given, the ready, active, INIT-specific
5520 files are not removed and the OSD will reactivate whenever
5521 ceph-disk trigger is run on one of the devices (journal, data,
5522 block, lockbox, ...).
5523
5524 If the OSD is dmcrypt, remove the data dmcrypt map. When
5525 deactivate finishes, the OSD is down.
5526 """)),
5527 help='Deactivate a Ceph OSD')
5528 deactivate_parser.add_argument(
5529 '--cluster',
5530 metavar='NAME',
5531 default='ceph',
5532 help='cluster name to assign this disk to',
5533 )
5534 deactivate_parser.add_argument(
5535 'path',
5536 metavar='PATH',
5537 nargs='?',
5538 help='path to block device or directory',
5539 )
5540 deactivate_parser.add_argument(
5541 '--deactivate-by-id',
5542 metavar='<id>',
5543 help='ID of OSD to deactive'
5544 )
5545 deactivate_parser.add_argument(
5546 '--mark-out',
5547 action='store_true', default=False,
5548 help='option to mark the osd out',
5549 )
5550 deactivate_parser.add_argument(
5551 '--once',
5552 action='store_true', default=False,
5553 help='does not need --reactivate to activate again',
5554 )
5555 deactivate_parser.set_defaults(
5556 func=main_deactivate,
5557 )
5558
5559
5560def make_destroy_parser(subparsers):
5561 destroy_parser = subparsers.add_parser(
5562 'destroy',
5563 formatter_class=argparse.RawDescriptionHelpFormatter,
c07f9fc5
FG
5564 description=textwrap.fill(textwrap.dedent("""\ Destroy the OSD located at PATH. It removes the OSD from the
5565 cluster and marks it destroyed. An OSD must be down before it
5566 can be destroyed. Once it is destroyed, a new OSD can be created
5567 in its place, reusing the same OSD id and position (e.g. after
5568 a failed HDD or SSD is replaced). Alternatively, if the
5569 --purge option is also specified, the OSD is removed from the
5570 CRUSH map and the OSD id is deallocated.""")),
7c673cae
FG
5571 help='Destroy a Ceph OSD')
5572 destroy_parser.add_argument(
5573 '--cluster',
5574 metavar='NAME',
5575 default='ceph',
5576 help='cluster name to assign this disk to',
5577 )
5578 destroy_parser.add_argument(
5579 'path',
5580 metavar='PATH',
5581 nargs='?',
5582 help='path to block device or directory',
5583 )
5584 destroy_parser.add_argument(
5585 '--destroy-by-id',
5586 metavar='<id>',
5587 help='ID of OSD to destroy'
5588 )
5589 destroy_parser.add_argument(
5590 '--dmcrypt-key-dir',
5591 metavar='KEYDIR',
5592 default='/etc/ceph/dmcrypt-keys',
5593 help=('directory where dm-crypt keys are stored '
5594 '(If you don\'t know how it work, '
5595 'dont use it. we have default value)'),
5596 )
5597 destroy_parser.add_argument(
5598 '--zap',
5599 action='store_true', default=False,
5600 help='option to erase data and partition',
5601 )
c07f9fc5
FG
5602 destroy_parser.add_argument(
5603 '--purge',
5604 action='store_true', default=False,
5605 help='option to remove OSD from CRUSH map and deallocate the id',
5606 )
7c673cae
FG
5607 destroy_parser.set_defaults(
5608 func=main_destroy,
5609 )
5610
5611
5612def make_zap_parser(subparsers):
5613 zap_parser = subparsers.add_parser(
5614 'zap',
5615 formatter_class=argparse.RawDescriptionHelpFormatter,
5616 description=textwrap.fill(textwrap.dedent("""\
5617 Zap/erase/destroy a device's partition table and contents. It
5618 actually uses sgdisk and it's option --zap-all to
5619 destroy both GPT and MBR data structures so that the disk
5620 becomes suitable for repartitioning.
5621 """)),
5622 help='Zap/erase/destroy a device\'s partition table (and contents)')
5623 zap_parser.add_argument(
5624 'dev',
5625 metavar='DEV',
5626 nargs='+',
5627 help='path to block device',
5628 )
5629 zap_parser.set_defaults(
5630 func=main_zap,
5631 )
5632 return zap_parser
5633
5634
5635def main(argv):
5636 args = parse_args(argv)
5637
5638 setup_logging(args.verbose, args.log_stdout)
5639
5640 if args.prepend_to_path != '':
5641 path = os.environ.get('PATH', os.defpath)
5642 os.environ['PATH'] = args.prepend_to_path + ":" + path
5643
31f18b77
FG
5644 if args.func.__name__ != 'main_trigger':
5645 # trigger may run when statedir is unavailable and does not use it
5646 setup_statedir(args.statedir)
7c673cae
FG
5647 setup_sysconfdir(args.sysconfdir)
5648
5649 global CEPH_PREF_USER
5650 CEPH_PREF_USER = args.setuser
5651 global CEPH_PREF_GROUP
5652 CEPH_PREF_GROUP = args.setgroup
5653
5654 if args.verbose:
5655 args.func(args)
5656 else:
5657 main_catch(args.func, args)
5658
5659
5660def setup_logging(verbose, log_stdout):
5661 loglevel = logging.WARNING
5662 if verbose:
5663 loglevel = logging.DEBUG
5664
5665 if log_stdout:
5666 ch = logging.StreamHandler(stream=sys.stdout)
5667 ch.setLevel(loglevel)
5668 formatter = logging.Formatter('%(funcName)s: %(message)s')
5669 ch.setFormatter(formatter)
5670 LOG.addHandler(ch)
5671 LOG.setLevel(loglevel)
5672 else:
5673 logging.basicConfig(
5674 level=loglevel,
5675 format='%(funcName)s: %(message)s',
5676 )
5677
5678
5679def main_catch(func, args):
5680
5681 try:
5682 func(args)
5683
5684 except Error as e:
5685 raise SystemExit(
5686 '{prog}: {msg}'.format(
5687 prog=args.prog,
5688 msg=e,
5689 )
5690 )
5691
5692 except CephDiskException as error:
5693 exc_name = error.__class__.__name__
5694 raise SystemExit(
5695 '{prog} {exc_name}: {msg}'.format(
5696 prog=args.prog,
5697 exc_name=exc_name,
5698 msg=error,
5699 )
5700 )
5701
5702
5703def run():
5704 main(sys.argv[1:])
5705
5706
5707if __name__ == '__main__':
5708 main(sys.argv[1:])
5709 warned_about = {}