]> git.proxmox.com Git - ceph.git/blob - ceph/src/ceph-disk/ceph_disk/main.py
5ae5840b6ac894b5962f827d37fbce8cdf3794e0
[ceph.git] / ceph / src / ceph-disk / ceph_disk / main.py
1 #!/usr/bin/env python
2 #
3 # Copyright (C) 2015, 2016, 2017 Red Hat <contact@redhat.com>
4 # Copyright (C) 2014 Inktank <info@inktank.com>
5 # Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
6 # Copyright (C) 2014 Catalyst.net Ltd
7 #
8 # Author: Loic Dachary <loic@dachary.org>
9 #
10 # This program is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU Library Public License as published by
12 # the Free Software Foundation; either version 2, or (at your option)
13 # any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU Library Public License for more details.
19 #
20
21 from __future__ import print_function
22
23 import argparse
24 import base64
25 import errno
26 import fcntl
27 import json
28 import logging
29 import os
30 import platform
31 import re
32 import subprocess
33 import stat
34 import sys
35 import tempfile
36 import uuid
37 import time
38 import shlex
39 import shutil
40 import pwd
41 import grp
42 import textwrap
43 import glob
44
45 CEPH_OSD_ONDISK_MAGIC = 'ceph osd volume v026'
46 CEPH_LOCKBOX_ONDISK_MAGIC = 'ceph lockbox volume v001'
47
48 KEY_MANAGEMENT_MODE_V1 = 'ceph-mon v1'
49
50 PTYPE = {
51 'regular': {
52 'journal': {
53 # identical because creating a journal is atomic
54 'ready': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
55 'tobe': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
56 },
57 'block': {
58 # identical because creating a block is atomic
59 'ready': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
60 'tobe': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
61 },
62 'block.db': {
63 # identical because creating a block is atomic
64 'ready': '30cd0809-c2b2-499c-8879-2d6b78529876',
65 'tobe': '30cd0809-c2b2-499c-8879-2d6b785292be',
66 },
67 'block.wal': {
68 # identical because creating a block is atomic
69 'ready': '5ce17fce-4087-4169-b7ff-056cc58473f9',
70 'tobe': '5ce17fce-4087-4169-b7ff-056cc58472be',
71 },
72 'osd': {
73 'ready': '4fbd7e29-9d25-41b8-afd0-062c0ceff05d',
74 'tobe': '89c57f98-2fe5-4dc0-89c1-f3ad0ceff2be',
75 },
76 'lockbox': {
77 'ready': 'fb3aabf9-d25f-47cc-bf5e-721d1816496b',
78 'tobe': 'fb3aabf9-d25f-47cc-bf5e-721d181642be',
79 },
80 },
81 'luks': {
82 'journal': {
83 'ready': '45b0969e-9b03-4f30-b4c6-35865ceff106',
84 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
85 },
86 'block': {
87 'ready': 'cafecafe-9b03-4f30-b4c6-35865ceff106',
88 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
89 },
90 'block.db': {
91 'ready': '166418da-c469-4022-adf4-b30afd37f176',
92 'tobe': '7521c784-4626-4260-bc8d-ba77a0f5f2be',
93 },
94 'block.wal': {
95 'ready': '86a32090-3647-40b9-bbbd-38d8c573aa86',
96 'tobe': '92dad30f-175b-4d40-a5b0-5c0a258b42be',
97 },
98 'osd': {
99 'ready': '4fbd7e29-9d25-41b8-afd0-35865ceff05d',
100 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
101 },
102 },
103 'plain': {
104 'journal': {
105 'ready': '45b0969e-9b03-4f30-b4c6-5ec00ceff106',
106 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
107 },
108 'block': {
109 'ready': 'cafecafe-9b03-4f30-b4c6-5ec00ceff106',
110 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
111 },
112 'block.db': {
113 'ready': '93b0052d-02d9-4d8a-a43b-33a3ee4dfbc3',
114 'tobe': '69d17c68-3e58-4399-aff0-b68265f2e2be',
115 },
116 'block.wal': {
117 'ready': '306e8683-4fe2-4330-b7c0-00a917c16966',
118 'tobe': 'f2d89683-a621-4063-964a-eb1f7863a2be',
119 },
120 'osd': {
121 'ready': '4fbd7e29-9d25-41b8-afd0-5ec00ceff05d',
122 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
123 },
124 },
125 'mpath': {
126 'journal': {
127 'ready': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
128 'tobe': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
129 },
130 'block': {
131 'ready': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
132 'tobe': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
133 },
134 'block.db': {
135 'ready': 'ec6d6385-e346-45dc-be91-da2a7c8b3261',
136 'tobe': 'ec6d6385-e346-45dc-be91-da2a7c8b32be',
137 },
138 'block.wal': {
139 'ready': '01b41e1b-002a-453c-9f17-88793989ff8f',
140 'tobe': '01b41e1b-002a-453c-9f17-88793989f2be',
141 },
142 'osd': {
143 'ready': '4fbd7e29-8ae0-4982-bf9d-5a8d867af560',
144 'tobe': '89c57f98-8ae0-4982-bf9d-5a8d867af560',
145 },
146 'lockbox': {
147 'ready': '7f4a666a-16f3-47a2-8445-152ef4d03f6c',
148 'tobe': '7f4a666a-16f3-47a2-8445-152ef4d032be',
149 },
150 },
151 }
152
153
154 class Ptype(object):
155
156 @staticmethod
157 def get_ready_by_type(what):
158 return [x['ready'] for x in PTYPE[what].values()]
159
160 @staticmethod
161 def get_ready_by_name(name):
162 return [x[name]['ready'] for x in PTYPE.values() if name in x]
163
164 @staticmethod
165 def is_regular_space(ptype):
166 return Ptype.is_what_space('regular', ptype)
167
168 @staticmethod
169 def is_mpath_space(ptype):
170 return Ptype.is_what_space('mpath', ptype)
171
172 @staticmethod
173 def is_plain_space(ptype):
174 return Ptype.is_what_space('plain', ptype)
175
176 @staticmethod
177 def is_luks_space(ptype):
178 return Ptype.is_what_space('luks', ptype)
179
180 @staticmethod
181 def is_what_space(what, ptype):
182 for name in Space.NAMES:
183 if ptype == PTYPE[what][name]['ready']:
184 return True
185 return False
186
187 @staticmethod
188 def space_ptype_to_name(ptype):
189 for what in PTYPE.values():
190 for name in Space.NAMES:
191 if ptype == what[name]['ready']:
192 return name
193 raise ValueError('ptype ' + ptype + ' not found')
194
195 @staticmethod
196 def is_dmcrypt_space(ptype):
197 for name in Space.NAMES:
198 if Ptype.is_dmcrypt(ptype, name):
199 return True
200 return False
201
202 @staticmethod
203 def is_dmcrypt(ptype, name):
204 for what in ('plain', 'luks'):
205 if ptype == PTYPE[what][name]['ready']:
206 return True
207 return False
208
209
210 SYSFS = '/sys'
211
212 if platform.system() == 'FreeBSD':
213 FREEBSD = True
214 DEFAULT_FS_TYPE = 'zfs'
215 PROCDIR = '/compat/linux/proc'
216 # FreeBSD does not have blockdevices any more
217 BLOCKDIR = '/dev'
218 ROOTGROUP = 'wheel'
219 else:
220 FREEBSD = False
221 DEFAULT_FS_TYPE = 'xfs'
222 PROCDIR = '/proc'
223 BLOCKDIR = '/sys/block'
224 ROOTGROUP = 'root'
225
226 """
227 OSD STATUS Definition
228 """
229 OSD_STATUS_OUT_DOWN = 0
230 OSD_STATUS_OUT_UP = 1
231 OSD_STATUS_IN_DOWN = 2
232 OSD_STATUS_IN_UP = 3
233
234 MOUNT_OPTIONS = dict(
235 btrfs='noatime,user_subvol_rm_allowed',
236 # user_xattr is default ever since linux 2.6.39 / 3.0, but we'll
237 # delay a moment before removing it fully because we did have some
238 # issues with ext4 before the xatts-in-leveldb work, and it seemed
239 # that user_xattr helped
240 ext4='noatime,user_xattr',
241 xfs='noatime,inode64',
242 )
243
244 MKFS_ARGS = dict(
245 btrfs=[
246 # btrfs requires -f, for the same reason as xfs (see comment below)
247 '-f',
248 '-m', 'single',
249 '-l', '32768',
250 '-n', '32768',
251 ],
252 xfs=[
253 # xfs insists on not overwriting previous fs; even if we wipe
254 # partition table, we often recreate it exactly the same way,
255 # so we'll see ghosts of filesystems past
256 '-f',
257 '-i', 'size=2048',
258 ],
259 zfs=[
260 '-o', 'atime=off'
261 ],
262 )
263
264 INIT_SYSTEMS = [
265 'upstart',
266 'sysvinit',
267 'systemd',
268 'openrc',
269 'bsdrc',
270 'auto',
271 'none',
272 ]
273
274 STATEDIR = '/var/lib/ceph'
275
276 SYSCONFDIR = '/etc/ceph'
277
278 prepare_lock = None
279 activate_lock = None
280 SUPPRESS_PREFIX = None
281
282 # only warn once about some things
283 warned_about = {}
284
285 # Nuke the TERM variable to avoid confusing any subprocesses we call.
286 # For example, libreadline will print weird control sequences for some
287 # TERM values.
288 if 'TERM' in os.environ:
289 del os.environ['TERM']
290
291 LOG_NAME = __name__
292 if LOG_NAME == '__main__':
293 LOG_NAME = os.path.basename(sys.argv[0])
294 LOG = logging.getLogger(LOG_NAME)
295
296 # Allow user-preferred values for subprocess user and group
297 CEPH_PREF_USER = None
298 CEPH_PREF_GROUP = None
299
300
301 class FileLock(object):
302 def __init__(self, fn):
303 self.fn = fn
304 self.fd = None
305
306 def __enter__(self):
307 assert not self.fd
308 self.fd = os.open(self.fn, os.O_WRONLY | os.O_CREAT)
309 fcntl.lockf(self.fd, fcntl.LOCK_EX)
310
311 def __exit__(self, exc_type, exc_val, exc_tb):
312 assert self.fd
313 fcntl.lockf(self.fd, fcntl.LOCK_UN)
314 os.close(self.fd)
315 self.fd = None
316
317
318 class Error(Exception):
319 """
320 Error
321 """
322
323 def __str__(self):
324 doc = _bytes2str(self.__doc__.strip())
325 try:
326 str_type = basestring
327 except NameError:
328 str_type = str
329 args = [a if isinstance(a, str_type) else str(a) for a in self.args]
330 return ': '.join([doc] + [_bytes2str(a) for a in args])
331
332
333 class MountError(Error):
334 """
335 Mounting filesystem failed
336 """
337
338
339 class UnmountError(Error):
340 """
341 Unmounting filesystem failed
342 """
343
344
345 class BadMagicError(Error):
346 """
347 Does not look like a Ceph OSD, or incompatible version
348 """
349
350
351 class TruncatedLineError(Error):
352 """
353 Line is truncated
354 """
355
356
357 class TooManyLinesError(Error):
358 """
359 Too many lines
360 """
361
362
363 class FilesystemTypeError(Error):
364 """
365 Cannot discover filesystem type
366 """
367
368
369 class CephDiskException(Exception):
370 """
371 A base exception for ceph-disk to provide custom (ad-hoc) messages that
372 will be caught and dealt with when main() is executed
373 """
374 pass
375
376
377 class ExecutableNotFound(CephDiskException):
378 """
379 Exception to report on executables not available in PATH
380 """
381 pass
382
383
384 def is_systemd():
385 """
386 Detect whether systemd is running
387 """
388 with open(PROCDIR + '/1/comm', 'r') as f:
389 return 'systemd' in f.read()
390
391
392 def is_upstart():
393 """
394 Detect whether upstart is running
395 """
396 (out, err, _) = command(['init', '--version'])
397 return 'upstart' in out
398
399
400 def maybe_mkdir(*a, **kw):
401 """
402 Creates a new directory if it doesn't exist, removes
403 existing symlink before creating the directory.
404 """
405 # remove any symlink, if it is there..
406 if os.path.exists(*a) and stat.S_ISLNK(os.lstat(*a).st_mode):
407 LOG.debug('Removing old symlink at %s', *a)
408 os.unlink(*a)
409 try:
410 os.mkdir(*a, **kw)
411 except OSError as e:
412 if e.errno == errno.EEXIST:
413 pass
414 else:
415 raise
416
417
418 def which(executable):
419 """find the location of an executable"""
420 envpath = os.environ.get('PATH') or os.defpath
421 PATH = envpath.split(os.pathsep)
422
423 locations = PATH + [
424 '/usr/local/bin',
425 '/bin',
426 '/usr/bin',
427 '/usr/local/sbin',
428 '/usr/sbin',
429 '/sbin',
430 ]
431
432 for location in locations:
433 executable_path = os.path.join(location, executable)
434 if (os.path.isfile(executable_path) and
435 os.access(executable_path, os.X_OK)):
436 return executable_path
437
438
439 def _get_command_executable(arguments):
440 """
441 Return the full path for an executable, raise if the executable is not
442 found. If the executable has already a full path do not perform any checks.
443 """
444 if os.path.isabs(arguments[0]): # an absolute path
445 return arguments
446 executable = which(arguments[0])
447 if not executable:
448 command_msg = 'Could not run command: %s' % ' '.join(arguments)
449 executable_msg = '%s not in path.' % arguments[0]
450 raise ExecutableNotFound('%s %s' % (executable_msg, command_msg))
451
452 # swap the old executable for the new one
453 arguments[0] = executable
454 return arguments
455
456
457 def command(arguments, **kwargs):
458 """
459 Safely execute a ``subprocess.Popen`` call making sure that the
460 executable exists and raising a helpful error message
461 if it does not.
462
463 .. note:: This should be the preferred way of calling ``subprocess.Popen``
464 since it provides the caller with the safety net of making sure that
465 executables *will* be found and will error nicely otherwise.
466
467 This returns the output of the command and the return code of the
468 process in a tuple: (stdout, stderr, returncode).
469 """
470
471 arguments = list(map(_bytes2str, _get_command_executable(arguments)))
472
473 LOG.info('Running command: %s' % ' '.join(arguments))
474 process = subprocess.Popen(
475 arguments,
476 stdout=subprocess.PIPE,
477 stderr=subprocess.PIPE,
478 **kwargs)
479 out, err = process.communicate()
480
481 return _bytes2str(out), _bytes2str(err), process.returncode
482
483
484 def command_with_stdin(arguments, stdin):
485 LOG.info("Running command with stdin: " + " ".join(arguments))
486 process = subprocess.Popen(
487 arguments,
488 stdin=subprocess.PIPE,
489 stdout=subprocess.PIPE,
490 stderr=subprocess.PIPE)
491 out, err = process.communicate(stdin)
492 LOG.debug(out)
493 if process.returncode != 0:
494 LOG.error(err)
495 raise SystemExit(
496 "'{cmd}' failed with status code {returncode}".format(
497 cmd=arguments,
498 returncode=process.returncode,
499 )
500 )
501 return out
502
503
504 def _bytes2str(string):
505 return string.decode('utf-8') if isinstance(string, bytes) else string
506
507
508 def command_init(arguments, **kwargs):
509 """
510 Safely execute a non-blocking ``subprocess.Popen`` call
511 making sure that the executable exists and raising a helpful
512 error message if it does not.
513
514 .. note:: This should be the preferred way of calling ``subprocess.Popen``
515 since it provides the caller with the safety net of making sure that
516 executables *will* be found and will error nicely otherwise.
517
518 This returns the process.
519 """
520
521 arguments = list(map(_bytes2str, _get_command_executable(arguments)))
522
523 LOG.info('Running command: %s' % ' '.join(arguments))
524 process = subprocess.Popen(
525 arguments,
526 stdout=subprocess.PIPE,
527 stderr=subprocess.PIPE,
528 **kwargs)
529 return process
530
531
532 def command_wait(process):
533 """
534 Wait for the process finish and parse its output.
535 """
536
537 out, err = process.communicate()
538
539 return _bytes2str(out), _bytes2str(err), process.returncode
540
541
542 def command_check_call(arguments, exit=False):
543 """
544 Safely execute a ``subprocess.check_call`` call making sure that the
545 executable exists and raising a helpful error message if it does not.
546
547 When ``exit`` is set to ``True`` this helper will do a clean (sans
548 traceback) system exit.
549 .. note:: This should be the preferred way of calling
550 ``subprocess.check_call`` since it provides the caller with the safety net
551 of making sure that executables *will* be found and will error nicely
552 otherwise.
553 """
554 arguments = _get_command_executable(arguments)
555 command = ' '.join(arguments)
556 LOG.info('Running command: %s', command)
557 try:
558 return subprocess.check_call(arguments)
559 except subprocess.CalledProcessError as error:
560 if exit:
561 if error.output:
562 LOG.error(error.output)
563 raise SystemExit(
564 "'{cmd}' failed with status code {returncode}".format(
565 cmd=command,
566 returncode=error.returncode,
567 )
568 )
569 raise
570
571
572 #
573 # An alternative block_path implementation would be
574 #
575 # name = basename(dev)
576 # return /sys/devices/virtual/block/$name
577 #
578 # It is however more fragile because it relies on the fact
579 # that the basename of the device the user will use always
580 # matches the one the driver will use. On Ubuntu 14.04, for
581 # instance, when multipath creates a partition table on
582 #
583 # /dev/mapper/353333330000007d0 -> ../dm-0
584 #
585 # it will create partition devices named
586 #
587 # /dev/mapper/353333330000007d0-part1
588 #
589 # which is the same device as /dev/dm-1 but not a symbolic
590 # link to it:
591 #
592 # ubuntu@other:~$ ls -l /dev/mapper /dev/dm-1
593 # brw-rw---- 1 root disk 252, 1 Aug 15 17:52 /dev/dm-1
594 # lrwxrwxrwx 1 root root 7 Aug 15 17:52 353333330000007d0 -> ../dm-0
595 # brw-rw---- 1 root disk 252, 1 Aug 15 17:52 353333330000007d0-part1
596 #
597 # Using the basename in this case fails.
598 #
599
600
601 def block_path(dev):
602 if FREEBSD:
603 return dev
604 path = os.path.realpath(dev)
605 rdev = os.stat(path).st_rdev
606 (M, m) = (os.major(rdev), os.minor(rdev))
607 return "{sysfs}/dev/block/{M}:{m}".format(sysfs=SYSFS, M=M, m=m)
608
609
610 def get_dm_uuid(dev):
611 uuid_path = os.path.join(block_path(dev), 'dm', 'uuid')
612 LOG.debug("get_dm_uuid " + dev + " uuid path is " + uuid_path)
613 if not os.path.exists(uuid_path):
614 return False
615 uuid = open(uuid_path, 'r').read()
616 LOG.debug("get_dm_uuid " + dev + " uuid is " + uuid)
617 return uuid
618
619
620 def is_mpath(dev):
621 """
622 True if the path is managed by multipath
623 """
624 if FREEBSD:
625 return False
626 uuid = get_dm_uuid(dev)
627 return (uuid and
628 (re.match('part\d+-mpath-', uuid) or
629 re.match('mpath-', uuid)))
630
631
632 def get_dev_name(path):
633 """
634 get device name from path. e.g.::
635
636 /dev/sda -> sda, /dev/cciss/c0d1 -> cciss!c0d1
637
638 a device "name" is something like::
639
640 sdb
641 cciss!c0d1
642
643 """
644 assert path.startswith('/dev/')
645 base = path[5:]
646 return base.replace('/', '!')
647
648
649 def get_dev_path(name):
650 """
651 get a path (/dev/...) from a name (cciss!c0d1)
652 a device "path" is something like::
653
654 /dev/sdb
655 /dev/cciss/c0d1
656
657 """
658 return '/dev/' + name.replace('!', '/')
659
660
661 def get_dev_relpath(name):
662 """
663 get a relative path to /dev from a name (cciss!c0d1)
664 """
665 return name.replace('!', '/')
666
667
668 def get_dev_size(dev, size='megabytes'):
669 """
670 Attempt to get the size of a device so that we can prevent errors
671 from actions to devices that are smaller, and improve error reporting.
672
673 Because we want to avoid breakage in case this approach is not robust, we
674 will issue a warning if we failed to get the size.
675
676 :param size: bytes or megabytes
677 :param dev: the device to calculate the size
678 """
679 fd = os.open(dev, os.O_RDONLY)
680 dividers = {'bytes': 1, 'megabytes': 1024 * 1024}
681 try:
682 device_size = os.lseek(fd, 0, os.SEEK_END)
683 divider = dividers.get(size, 1024 * 1024) # default to megabytes
684 return device_size // divider
685 except Exception as error:
686 LOG.warning('failed to get size of %s: %s' % (dev, str(error)))
687 finally:
688 os.close(fd)
689
690
691 def stmode_is_diskdevice(dmode):
692 if stat.S_ISBLK(dmode):
693 return True
694 else:
695 # FreeBSD does not have block devices
696 # All disks are character devices
697 return FREEBSD and stat.S_ISCHR(dmode)
698
699
700 def dev_is_diskdevice(dev):
701 dmode = os.stat(dev).st_mode
702 return stmode_is_diskdevice(dmode)
703
704
705 def ldev_is_diskdevice(dev):
706 dmode = os.lstat(dev).st_mode
707 return stmode_is_diskdevice(dmode)
708
709
710 def path_is_diskdevice(path):
711 dev = os.path.realpath(path)
712 return dev_is_diskdevice(dev)
713
714
715 def get_partition_mpath(dev, pnum):
716 part_re = "part{pnum}-mpath-".format(pnum=pnum)
717 partitions = list_partitions_mpath(dev, part_re)
718 if partitions:
719 return partitions[0]
720 else:
721 return None
722
723
724 def get_partition_dev(dev, pnum):
725 """
726 get the device name for a partition
727
728 assume that partitions are named like the base dev,
729 with a number, and optionally
730 some intervening characters (like 'p'). e.g.,
731
732 sda 1 -> sda1
733 cciss/c0d1 1 -> cciss!c0d1p1
734 """
735 max_retry = 10
736 for retry in range(0, max_retry + 1):
737 partname = None
738 error_msg = ""
739 if is_mpath(dev):
740 partname = get_partition_mpath(dev, pnum)
741 else:
742 name = get_dev_name(os.path.realpath(dev))
743 sys_entry = os.path.join(BLOCKDIR, name)
744 error_msg = " in %s" % sys_entry
745 for f in os.listdir(sys_entry):
746 if f.startswith(name) and f.endswith(str(pnum)):
747 # we want the shortest name that starts with the base name
748 # and ends with the partition number
749 if not partname or len(f) < len(partname):
750 partname = f
751 if partname:
752 if retry:
753 LOG.info('Found partition %d for %s after %d tries' %
754 (pnum, dev, retry))
755 return get_dev_path(partname)
756 else:
757 if retry < max_retry:
758 LOG.info('Try %d/%d : partition %d for %s does not exist%s' %
759 (retry + 1, max_retry, pnum, dev, error_msg))
760 time.sleep(.2)
761 continue
762 else:
763 raise Error('partition %d for %s does not appear to exist%s' %
764 (pnum, dev, error_msg))
765
766
767 def list_all_partitions():
768 """
769 Return a list of devices and partitions
770 """
771 if not FREEBSD:
772 names = os.listdir(BLOCKDIR)
773 dev_part_list = {}
774 for name in names:
775 # /dev/fd0 may hang http://tracker.ceph.com/issues/6827
776 if re.match(r'^fd\d$', name):
777 continue
778 dev_part_list[name] = list_partitions(get_dev_path(name))
779 else:
780 with open(os.path.join(PROCDIR, "partitions")) as partitions:
781 for line in partitions:
782 columns = line.split()
783 if len(columns) >= 4:
784 name = columns[3]
785 dev_part_list[name] = list_partitions(get_dev_path(name))
786 return dev_part_list
787
788
789 def list_partitions(dev):
790 dev = os.path.realpath(dev)
791 if is_mpath(dev):
792 return list_partitions_mpath(dev)
793 else:
794 return list_partitions_device(dev)
795
796
797 def list_partitions_mpath(dev, part_re="part\d+-mpath-"):
798 p = block_path(dev)
799 partitions = []
800 holders = os.path.join(p, 'holders')
801 for holder in os.listdir(holders):
802 uuid_path = os.path.join(holders, holder, 'dm', 'uuid')
803 uuid = open(uuid_path, 'r').read()
804 LOG.debug("list_partitions_mpath: " + uuid_path + " uuid = " + uuid)
805 if re.match(part_re, uuid):
806 partitions.append(holder)
807 return partitions
808
809
810 def list_partitions_device(dev):
811 """
812 Return a list of partitions on the given device name
813 """
814 partitions = []
815 basename = get_dev_name(dev)
816 for name in os.listdir(block_path(dev)):
817 if name.startswith(basename):
818 partitions.append(name)
819 return partitions
820
821
822 def get_partition_base(dev):
823 """
824 Get the base device for a partition
825 """
826 dev = os.path.realpath(dev)
827 if not ldev_is_diskdevice(dev):
828 raise Error('not a block device', dev)
829
830 name = get_dev_name(dev)
831 if os.path.exists(os.path.join('/sys/block', name)):
832 raise Error('not a partition', dev)
833
834 # find the base
835 for basename in os.listdir('/sys/block'):
836 if os.path.exists(os.path.join('/sys/block', basename, name)):
837 return get_dev_path(basename)
838 raise Error('no parent device for partition', dev)
839
840
841 def is_partition_mpath(dev):
842 uuid = get_dm_uuid(dev)
843 return bool(re.match('part\d+-mpath-', uuid))
844
845
846 def partnum_mpath(dev):
847 uuid = get_dm_uuid(dev)
848 return re.findall('part(\d+)-mpath-', uuid)[0]
849
850
851 def get_partition_base_mpath(dev):
852 slave_path = os.path.join(block_path(dev), 'slaves')
853 slaves = os.listdir(slave_path)
854 assert slaves
855 name_path = os.path.join(slave_path, slaves[0], 'dm', 'name')
856 name = open(name_path, 'r').read().strip()
857 return os.path.join('/dev/mapper', name)
858
859
860 def is_partition(dev):
861 """
862 Check whether a given device path is a partition or a full disk.
863 """
864 if is_mpath(dev):
865 return is_partition_mpath(dev)
866
867 dev = os.path.realpath(dev)
868 st = os.lstat(dev)
869 if not stmode_is_diskdevice(st.st_mode):
870 raise Error('not a block device', dev)
871
872 name = get_dev_name(dev)
873 if os.path.exists(os.path.join(BLOCKDIR, name)):
874 return False
875
876 # make sure it is a partition of something else
877 major = os.major(st.st_rdev)
878 minor = os.minor(st.st_rdev)
879 if os.path.exists('/sys/dev/block/%d:%d/partition' % (major, minor)):
880 return True
881
882 raise Error('not a disk or partition', dev)
883
884
885 def is_mounted(dev):
886 """
887 Check if the given device is mounted.
888 """
889 dev = os.path.realpath(dev)
890 with open(PROCDIR + '/mounts', 'rb') as proc_mounts:
891 for line in proc_mounts:
892 fields = line.split()
893 if len(fields) < 3:
894 continue
895 mounts_dev = fields[0]
896 path = fields[1]
897 if os.path.isabs(mounts_dev) and os.path.exists(mounts_dev):
898 mounts_dev = os.path.realpath(mounts_dev)
899 if mounts_dev == dev:
900 return _bytes2str(path)
901 return None
902
903
904 def is_held(dev):
905 """
906 Check if a device is held by another device (e.g., a dm-crypt mapping)
907 """
908 assert os.path.exists(dev)
909 if is_mpath(dev):
910 return []
911
912 dev = os.path.realpath(dev)
913 base = get_dev_name(dev)
914
915 # full disk?
916 directory = '/sys/block/{base}/holders'.format(base=base)
917 if os.path.exists(directory):
918 return os.listdir(directory)
919
920 # partition?
921 part = base
922 while len(base):
923 directory = '/sys/block/{base}/{part}/holders'.format(
924 part=part, base=base)
925 if os.path.exists(directory):
926 return os.listdir(directory)
927 base = base[:-1]
928 return []
929
930
931 def verify_not_in_use(dev, check_partitions=False):
932 """
933 Verify if a given device (path) is in use (e.g. mounted or
934 in use by device-mapper).
935
936 :raises: Error if device is in use.
937 """
938 assert os.path.exists(dev)
939 if is_mounted(dev):
940 raise Error('Device is mounted', dev)
941 holders = is_held(dev)
942 if holders:
943 raise Error('Device %s is in use by a device-mapper '
944 'mapping (dm-crypt?)' % dev, ','.join(holders))
945
946 if check_partitions and not is_partition(dev):
947 for partname in list_partitions(dev):
948 partition = get_dev_path(partname)
949 if is_mounted(partition):
950 raise Error('Device is mounted', partition)
951 holders = is_held(partition)
952 if holders:
953 raise Error('Device %s is in use by a device-mapper '
954 'mapping (dm-crypt?)'
955 % partition, ','.join(holders))
956
957
958 def must_be_one_line(line):
959 """
960 Checks if given line is really one single line.
961
962 :raises: TruncatedLineError or TooManyLinesError
963 :return: Content of the line, or None if line isn't valid.
964 """
965 line = _bytes2str(line)
966
967 if line[-1:] != '\n':
968 raise TruncatedLineError(line)
969 line = line[:-1]
970 if '\n' in line:
971 raise TooManyLinesError(line)
972 return line
973
974
975 def read_one_line(parent, name):
976 """
977 Read a file whose sole contents are a single line.
978
979 Strips the newline.
980
981 :return: Contents of the line, or None if file did not exist.
982 """
983 path = os.path.join(parent, name)
984 try:
985 line = open(path, 'rb').read()
986 except IOError as e:
987 if e.errno == errno.ENOENT:
988 return None
989 else:
990 raise
991
992 try:
993 line = must_be_one_line(line)
994 except (TruncatedLineError, TooManyLinesError) as e:
995 raise Error(
996 'File is corrupt: {path}: {msg}'.format(
997 path=path,
998 msg=e,
999 )
1000 )
1001 return line
1002
1003
1004 def write_one_line(parent, name, text):
1005 """
1006 Write a file whose sole contents are a single line.
1007
1008 Adds a newline.
1009 """
1010 path = os.path.join(parent, name)
1011 tmp = '{path}.{pid}.tmp'.format(path=path, pid=os.getpid())
1012 with open(tmp, 'wb') as tmp_file:
1013 tmp_file.write(text.encode('utf-8') + b'\n')
1014 os.fsync(tmp_file.fileno())
1015 path_set_context(tmp)
1016 os.rename(tmp, path)
1017
1018
1019 def init_get():
1020 """
1021 Get a init system using 'ceph-detect-init'
1022 """
1023 init = _check_output(
1024 args=[
1025 'ceph-detect-init',
1026 '--default', 'sysvinit',
1027 ],
1028 )
1029 init = must_be_one_line(init)
1030 return init
1031
1032
1033 def check_osd_magic(path):
1034 """
1035 Check that this path has the Ceph OSD magic.
1036
1037 :raises: BadMagicError if this does not look like a Ceph OSD data
1038 dir.
1039 """
1040 magic = read_one_line(path, 'magic')
1041 if magic is None:
1042 # probably not mkfs'ed yet
1043 raise BadMagicError(path)
1044 if magic != CEPH_OSD_ONDISK_MAGIC:
1045 raise BadMagicError(path)
1046
1047
1048 def check_osd_id(osd_id):
1049 """
1050 Ensures osd id is numeric.
1051 """
1052 if not re.match(r'^[0-9]+$', osd_id):
1053 raise Error('osd id is not numeric', osd_id)
1054
1055
1056 def allocate_osd_id(
1057 cluster,
1058 fsid,
1059 keyring,
1060 path,
1061 ):
1062 """
1063 Allocates an OSD id on the given cluster.
1064
1065 :raises: Error if the call to allocate the OSD id fails.
1066 :return: The allocated OSD id.
1067 """
1068 lockbox_path = os.path.join(STATEDIR, 'osd-lockbox', fsid)
1069 lockbox_osd_id = read_one_line(lockbox_path, 'whoami')
1070 osd_keyring = os.path.join(path, 'keyring')
1071 if lockbox_osd_id:
1072 LOG.debug('Getting OSD id from Lockbox...')
1073 osd_id = lockbox_osd_id
1074 shutil.move(os.path.join(lockbox_path, 'osd_keyring'),
1075 osd_keyring)
1076 path_set_context(osd_keyring)
1077 os.unlink(os.path.join(lockbox_path, 'whoami'))
1078 return osd_id
1079
1080 LOG.debug('Allocating OSD id...')
1081 secrets = Secrets()
1082 try:
1083 wanttobe = read_one_line(path, 'wanttobe')
1084 if os.path.exists(os.path.join(path, 'wanttobe')):
1085 os.unlink(os.path.join(path, 'wanttobe'))
1086 id_arg = wanttobe and [wanttobe] or []
1087 osd_id = command_with_stdin(
1088 [
1089 'ceph',
1090 '--cluster', cluster,
1091 '--name', 'client.bootstrap-osd',
1092 '--keyring', keyring,
1093 '-i', '-',
1094 'osd', 'new',
1095 fsid,
1096 ] + id_arg,
1097 secrets.get_json()
1098 )
1099 except subprocess.CalledProcessError as e:
1100 raise Error('ceph osd create failed', e, e.output)
1101 osd_id = must_be_one_line(osd_id)
1102 check_osd_id(osd_id)
1103 secrets.write_osd_keyring(osd_keyring, osd_id)
1104 return osd_id
1105
1106
1107 def get_osd_id(path):
1108 """
1109 Gets the OSD id of the OSD at the given path.
1110 """
1111 osd_id = read_one_line(path, 'whoami')
1112 if osd_id is not None:
1113 check_osd_id(osd_id)
1114 return osd_id
1115
1116
1117 def get_ceph_user():
1118 global CEPH_PREF_USER
1119
1120 if CEPH_PREF_USER is not None:
1121 try:
1122 pwd.getpwnam(CEPH_PREF_USER)
1123 return CEPH_PREF_USER
1124 except KeyError:
1125 print("No such user:", CEPH_PREF_USER)
1126 sys.exit(2)
1127 else:
1128 try:
1129 pwd.getpwnam('ceph')
1130 return 'ceph'
1131 except KeyError:
1132 return 'root'
1133
1134
1135 def get_ceph_group():
1136 global CEPH_PREF_GROUP
1137
1138 if CEPH_PREF_GROUP is not None:
1139 try:
1140 grp.getgrnam(CEPH_PREF_GROUP)
1141 return CEPH_PREF_GROUP
1142 except KeyError:
1143 print("No such group:", CEPH_PREF_GROUP)
1144 sys.exit(2)
1145 else:
1146 try:
1147 grp.getgrnam('ceph')
1148 return 'ceph'
1149 except KeyError:
1150 return 'root'
1151
1152
1153 def path_set_context(path):
1154 # restore selinux context to default policy values
1155 if which('restorecon'):
1156 command(['restorecon', '-R', path])
1157
1158 # if ceph user exists, set owner to ceph
1159 if get_ceph_user() == 'ceph':
1160 command(['chown', '-R', 'ceph:ceph', path])
1161
1162
1163 def _check_output(args=None, **kwargs):
1164 out, err, ret = command(args, **kwargs)
1165 if ret:
1166 cmd = args[0]
1167 error = subprocess.CalledProcessError(ret, cmd)
1168 error.output = out + err
1169 raise error
1170 return _bytes2str(out)
1171
1172
1173 def get_conf(cluster, variable):
1174 """
1175 Get the value of the given configuration variable from the
1176 cluster.
1177
1178 :raises: Error if call to ceph-conf fails.
1179 :return: The variable value or None.
1180 """
1181 try:
1182 out, err, ret = command(
1183 [
1184 'ceph-conf',
1185 '--cluster={cluster}'.format(
1186 cluster=cluster,
1187 ),
1188 '--name=osd.',
1189 '--lookup',
1190 variable,
1191 ],
1192 close_fds=True,
1193 )
1194 except OSError as e:
1195 raise Error('error executing ceph-conf', e, err)
1196 if ret == 1:
1197 # config entry not found
1198 return None
1199 elif ret != 0:
1200 raise Error('getting variable from configuration failed')
1201 value = out.split('\n', 1)[0]
1202 # don't differentiate between "var=" and no var set
1203 if not value:
1204 return None
1205 return value
1206
1207
1208 def get_conf_with_default(cluster, variable):
1209 """
1210 Get a config value that is known to the C++ code.
1211
1212 This will fail if called on variables that are not defined in
1213 common config options.
1214 """
1215 try:
1216 out = _check_output(
1217 args=[
1218 'ceph-osd',
1219 '--cluster={cluster}'.format(
1220 cluster=cluster,
1221 ),
1222 '--show-config-value={variable}'.format(
1223 variable=variable,
1224 ),
1225 ],
1226 close_fds=True,
1227 )
1228 except subprocess.CalledProcessError as e:
1229 raise Error(
1230 'getting variable from configuration failed',
1231 e,
1232 )
1233
1234 value = str(out).split('\n', 1)[0]
1235 return value
1236
1237
1238 def get_fsid(cluster):
1239 """
1240 Get the fsid of the cluster.
1241
1242 :return: The fsid or raises Error.
1243 """
1244 fsid = get_conf_with_default(cluster=cluster, variable='fsid')
1245 if fsid is None:
1246 raise Error('getting cluster uuid from configuration failed')
1247 return fsid.lower()
1248
1249
1250 def get_dmcrypt_key_path(
1251 _uuid,
1252 key_dir,
1253 luks
1254 ):
1255 """
1256 Get path to dmcrypt key file.
1257
1258 :return: Path to the dmcrypt key file, callers should check for existence.
1259 """
1260 if luks:
1261 path = os.path.join(key_dir, _uuid + ".luks.key")
1262 else:
1263 path = os.path.join(key_dir, _uuid)
1264
1265 return path
1266
1267
1268 def get_dmcrypt_key(
1269 _uuid,
1270 key_dir,
1271 luks
1272 ):
1273 legacy_path = get_dmcrypt_key_path(_uuid, key_dir, luks)
1274 if os.path.exists(legacy_path):
1275 return (legacy_path,)
1276 path = os.path.join(STATEDIR, 'osd-lockbox', _uuid)
1277 if os.path.exists(path):
1278 mode = get_oneliner(path, 'key-management-mode')
1279 osd_uuid = get_oneliner(path, 'osd-uuid')
1280 ceph_fsid = read_one_line(path, 'ceph_fsid')
1281 if ceph_fsid is None:
1282 raise Error('No cluster uuid assigned.')
1283 cluster = find_cluster_by_uuid(ceph_fsid)
1284 if cluster is None:
1285 raise Error('No cluster conf found in ' + SYSCONFDIR +
1286 ' with fsid %s' % ceph_fsid)
1287
1288 if mode == KEY_MANAGEMENT_MODE_V1:
1289 key, stderr, ret = command(
1290 [
1291 'ceph',
1292 '--cluster', cluster,
1293 '--name',
1294 'client.osd-lockbox.' + osd_uuid,
1295 '--keyring',
1296 os.path.join(path, 'keyring'),
1297 'config-key',
1298 'get',
1299 'dm-crypt/osd/' + osd_uuid + '/luks',
1300 ],
1301 )
1302 LOG.debug("stderr " + stderr)
1303 assert ret == 0
1304 return base64.b64decode(key)
1305 else:
1306 raise Error('unknown key-management-mode ' + str(mode))
1307 raise Error('unable to read dm-crypt key', path, legacy_path)
1308
1309
1310 def _dmcrypt_map(
1311 rawdev,
1312 key,
1313 _uuid,
1314 cryptsetup_parameters,
1315 luks,
1316 format_dev=False,
1317 ):
1318 dev = dmcrypt_is_mapped(_uuid)
1319 if dev:
1320 return dev
1321
1322 if isinstance(key, tuple):
1323 # legacy, before lockbox
1324 assert os.path.exists(key[0])
1325 keypath = key[0]
1326 key = None
1327 else:
1328 keypath = '-'
1329 dev = '/dev/mapper/' + _uuid
1330 luksFormat_args = [
1331 'cryptsetup',
1332 '--batch-mode',
1333 '--key-file',
1334 keypath,
1335 'luksFormat',
1336 rawdev,
1337 ] + cryptsetup_parameters
1338
1339 luksOpen_args = [
1340 'cryptsetup',
1341 '--key-file',
1342 keypath,
1343 'luksOpen',
1344 rawdev,
1345 _uuid,
1346 ]
1347
1348 create_args = [
1349 'cryptsetup',
1350 '--key-file',
1351 keypath,
1352 'create',
1353 _uuid,
1354 rawdev,
1355 ] + cryptsetup_parameters
1356
1357 try:
1358 if luks:
1359 if format_dev:
1360 command_with_stdin(luksFormat_args, key)
1361 command_with_stdin(luksOpen_args, key)
1362 else:
1363 # Plain mode has no format function, nor any validation
1364 # that the key is correct.
1365 command_with_stdin(create_args, key)
1366 # set proper ownership of mapped device
1367 command_check_call(['chown', 'ceph:ceph', dev])
1368 return dev
1369
1370 except subprocess.CalledProcessError as e:
1371 raise Error('unable to map device', rawdev, e)
1372
1373
1374 def dmcrypt_unmap(
1375 _uuid
1376 ):
1377 if not os.path.exists('/dev/mapper/' + _uuid):
1378 return
1379 retries = 0
1380 while True:
1381 try:
1382 command_check_call(['cryptsetup', 'remove', _uuid])
1383 break
1384 except subprocess.CalledProcessError as e:
1385 if retries == 10:
1386 raise Error('unable to unmap device', _uuid, e)
1387 else:
1388 time.sleep(0.5 + retries * 1.0)
1389 retries += 1
1390
1391
1392 def mount(
1393 dev,
1394 fstype,
1395 options,
1396 ):
1397 """
1398 Mounts a device with given filessystem type and
1399 mount options to a tempfile path under /var/lib/ceph/tmp.
1400 """
1401 # sanity check: none of the arguments are None
1402 if dev is None:
1403 raise ValueError('dev may not be None')
1404 if fstype is None:
1405 raise ValueError('fstype may not be None')
1406
1407 # pick best-of-breed mount options based on fs type
1408 if options is None:
1409 options = MOUNT_OPTIONS.get(fstype, '')
1410
1411 myTemp = STATEDIR + '/tmp'
1412 # mkdtemp expect 'dir' to be existing on the system
1413 # Let's be sure it's always the case
1414 if not os.path.exists(myTemp):
1415 os.makedirs(myTemp)
1416
1417 # mount
1418 path = tempfile.mkdtemp(
1419 prefix='mnt.',
1420 dir=myTemp,
1421 )
1422 try:
1423 LOG.debug('Mounting %s on %s with options %s', dev, path, options)
1424 command_check_call(
1425 [
1426 'mount',
1427 '-t', fstype,
1428 '-o', options,
1429 '--',
1430 dev,
1431 path,
1432 ],
1433 )
1434 if which('restorecon'):
1435 command(
1436 [
1437 'restorecon',
1438 path,
1439 ],
1440 )
1441 except subprocess.CalledProcessError as e:
1442 try:
1443 os.rmdir(path)
1444 except (OSError, IOError):
1445 pass
1446 raise MountError(e)
1447
1448 return path
1449
1450
1451 def unmount(
1452 path,
1453 ):
1454 """
1455 Unmount and removes the given mount point.
1456 """
1457 retries = 0
1458 while True:
1459 try:
1460 LOG.debug('Unmounting %s', path)
1461 command_check_call(
1462 [
1463 '/bin/umount',
1464 '--',
1465 path,
1466 ],
1467 )
1468 break
1469 except subprocess.CalledProcessError as e:
1470 # on failure, retry 3 times with incremental backoff
1471 if retries == 3:
1472 raise UnmountError(e)
1473 else:
1474 time.sleep(0.5 + retries * 1.0)
1475 retries += 1
1476
1477 os.rmdir(path)
1478
1479
1480 ###########################################
1481
1482 def extract_parted_partition_numbers(partitions):
1483 numbers_as_strings = re.findall('^\d+', partitions, re.MULTILINE)
1484 return map(int, numbers_as_strings)
1485
1486
1487 def get_free_partition_index(dev):
1488 """
1489 Get the next free partition index on a given device.
1490
1491 :return: Index number (> 1 if there is already a partition on the device)
1492 or 1 if there is no partition table.
1493 """
1494 try:
1495 lines = _check_output(
1496 args=[
1497 'parted',
1498 '--machine',
1499 '--',
1500 dev,
1501 'print',
1502 ],
1503 )
1504 except subprocess.CalledProcessError as e:
1505 LOG.info('cannot read partition index; assume it '
1506 'isn\'t present\n (Error: %s)' % e)
1507 return 1
1508
1509 if not lines:
1510 raise Error('parted failed to output anything')
1511 LOG.debug('get_free_partition_index: analyzing ' + lines)
1512 if ('CHS;' not in lines and
1513 'CYL;' not in lines and
1514 'BYT;' not in lines):
1515 raise Error('parted output expected to contain one of ' +
1516 'CHH; CYL; or BYT; : ' + lines)
1517 if os.path.realpath(dev) not in lines:
1518 raise Error('parted output expected to contain ' + dev + ': ' + lines)
1519 _, partitions = lines.split(os.path.realpath(dev))
1520 partition_numbers = extract_parted_partition_numbers(partitions)
1521 if partition_numbers:
1522 return max(partition_numbers) + 1
1523 else:
1524 return 1
1525
1526
1527 def check_journal_reqs(args):
1528 _, _, allows_journal = command([
1529 'ceph-osd', '--check-allows-journal',
1530 '-i', '0',
1531 '--log-file', '$run_dir/$cluster-osd-check.log',
1532 '--cluster', args.cluster,
1533 '--setuser', get_ceph_user(),
1534 '--setgroup', get_ceph_group(),
1535 ])
1536 _, _, wants_journal = command([
1537 'ceph-osd', '--check-wants-journal',
1538 '-i', '0',
1539 '--log-file', '$run_dir/$cluster-osd-check.log',
1540 '--cluster', args.cluster,
1541 '--setuser', get_ceph_user(),
1542 '--setgroup', get_ceph_group(),
1543 ])
1544 _, _, needs_journal = command([
1545 'ceph-osd', '--check-needs-journal',
1546 '-i', '0',
1547 '--log-file', '$run_dir/$cluster-osd-check.log',
1548 '--cluster', args.cluster,
1549 '--setuser', get_ceph_user(),
1550 '--setgroup', get_ceph_group(),
1551 ])
1552 return (not allows_journal, not wants_journal, not needs_journal)
1553
1554
1555 def update_partition(dev, description):
1556 """
1557 Must be called after modifying a partition table so the kernel
1558 know about the change and fire udev events accordingly. A side
1559 effect of partprobe is to remove partitions and add them again.
1560 The first udevadm settle waits for ongoing udev events to
1561 complete, just in case one of them rely on an existing partition
1562 on dev. The second udevadm settle guarantees to the caller that
1563 all udev events related to the partition table change have been
1564 processed, i.e. the 95-ceph-osd.rules actions and mode changes,
1565 group changes etc. are complete.
1566 """
1567 LOG.debug('Calling partprobe on %s device %s', description, dev)
1568 partprobe_ok = False
1569 error = 'unknown error'
1570 partprobe = _get_command_executable(['partprobe'])[0]
1571 for i in range(5):
1572 command_check_call(['udevadm', 'settle', '--timeout=600'])
1573 try:
1574 _check_output(['flock', '-s', dev, partprobe, dev])
1575 partprobe_ok = True
1576 break
1577 except subprocess.CalledProcessError as e:
1578 error = e.output
1579 if ('unable to inform the kernel' not in error and
1580 'Device or resource busy' not in error):
1581 raise
1582 LOG.debug('partprobe %s failed : %s (ignored, waiting 60s)'
1583 % (dev, error))
1584 time.sleep(60)
1585 if not partprobe_ok:
1586 raise Error('partprobe %s failed : %s' % (dev, error))
1587 command_check_call(['udevadm', 'settle', '--timeout=600'])
1588
1589
1590 def zap_linux(dev):
1591 try:
1592 # Thoroughly wipe all partitions of any traces of
1593 # Filesystems or OSD Journals
1594 #
1595 # In addition we need to write 10M of data to each partition
1596 # to make sure that after re-creating the same partition
1597 # there is no trace left of any previous Filesystem or OSD
1598 # Journal
1599
1600 LOG.debug('Writing zeros to existing partitions on %s', dev)
1601
1602 for partname in list_partitions(dev):
1603 partition = get_dev_path(partname)
1604 command_check_call(
1605 [
1606 'wipefs',
1607 '--all',
1608 partition,
1609 ],
1610 )
1611
1612 command_check_call(
1613 [
1614 'dd',
1615 'if=/dev/zero',
1616 'of={path}'.format(path=partition),
1617 'bs=1M',
1618 'count=10',
1619 ],
1620 )
1621
1622 LOG.debug('Zapping partition table on %s', dev)
1623
1624 # try to wipe out any GPT partition table backups. sgdisk
1625 # isn't too thorough.
1626 lba_size = 4096
1627 size = 33 * lba_size
1628 with open(dev, 'wb') as dev_file:
1629 dev_file.seek(-size, os.SEEK_END)
1630 dev_file.write(size * b'\0')
1631
1632 command_check_call(
1633 [
1634 'sgdisk',
1635 '--zap-all',
1636 '--',
1637 dev,
1638 ],
1639 )
1640 command_check_call(
1641 [
1642 'sgdisk',
1643 '--clear',
1644 '--mbrtogpt',
1645 '--',
1646 dev,
1647 ],
1648 )
1649 update_partition(dev, 'zapped')
1650
1651 except subprocess.CalledProcessError as e:
1652 raise Error(e)
1653
1654
1655 def zap_freebsd(dev):
1656 try:
1657 # For FreeBSD we just need to zap the partition.
1658 command_check_call(
1659 [
1660 'gpart',
1661 'destroy',
1662 '-F',
1663 dev,
1664 ],
1665 )
1666
1667 except subprocess.CalledProcessError as e:
1668 raise Error(e)
1669
1670
1671 def zap(dev):
1672 """
1673 Destroy the partition table and content of a given disk.
1674 """
1675 dev = os.path.realpath(dev)
1676 dmode = os.stat(dev).st_mode
1677 if not stat.S_ISBLK(dmode) or is_partition(dev):
1678 raise Error('not full block device; cannot zap', dev)
1679 if FREEBSD:
1680 zap_freebsd(dev)
1681 else:
1682 zap_linux(dev)
1683
1684
1685 def adjust_symlink(target, path):
1686 create = True
1687 if os.path.lexists(path):
1688 try:
1689 mode = os.lstat(path).st_mode
1690 if stat.S_ISREG(mode):
1691 LOG.debug('Removing old file %s', path)
1692 os.unlink(path)
1693 elif stat.S_ISLNK(mode):
1694 old = os.readlink(path)
1695 if old != target:
1696 LOG.debug('Removing old symlink %s -> %s', path, old)
1697 os.unlink(path)
1698 else:
1699 create = False
1700 except:
1701 raise Error('unable to remove (or adjust) old file (symlink)',
1702 path)
1703 if create:
1704 LOG.debug('Creating symlink %s -> %s', path, target)
1705 try:
1706 os.symlink(target, path)
1707 except:
1708 raise Error('unable to create symlink %s -> %s' % (path, target))
1709
1710
1711 def get_mount_options(cluster, fs_type):
1712 mount_options = get_conf(
1713 cluster,
1714 variable='osd_mount_options_{fstype}'.format(
1715 fstype=fs_type,
1716 ),
1717 )
1718 if mount_options is None:
1719 mount_options = get_conf(
1720 cluster,
1721 variable='osd_fs_mount_options_{fstype}'.format(
1722 fstype=fs_type,
1723 ),
1724 )
1725 else:
1726 # remove whitespaces
1727 mount_options = "".join(mount_options.split())
1728 return mount_options
1729
1730
1731 class Device(object):
1732
1733 def __init__(self, path, args):
1734 self.args = args
1735 self.path = path
1736 self.dev_size = None
1737 self.partitions = {}
1738 self.ptype_map = None
1739 assert not is_partition(self.path)
1740
1741 def create_partition(self, uuid, name, size=0, num=0):
1742 ptype = self.ptype_tobe_for_name(name)
1743 if num == 0:
1744 num = get_free_partition_index(dev=self.path)
1745 if size > 0:
1746 new = '--new={num}:0:+{size}M'.format(num=num, size=size)
1747 if size > self.get_dev_size():
1748 LOG.error('refusing to create %s on %s' % (name, self.path))
1749 LOG.error('%s size (%sM) is bigger than device (%sM)'
1750 % (name, size, self.get_dev_size()))
1751 raise Error('%s device size (%sM) is not big enough for %s'
1752 % (self.path, self.get_dev_size(), name))
1753 else:
1754 new = '--largest-new={num}'.format(num=num)
1755
1756 LOG.debug('Creating %s partition num %d size %d on %s',
1757 name, num, size, self.path)
1758 command_check_call(
1759 [
1760 'sgdisk',
1761 new,
1762 '--change-name={num}:ceph {name}'.format(num=num, name=name),
1763 '--partition-guid={num}:{uuid}'.format(num=num, uuid=uuid),
1764 '--typecode={num}:{uuid}'.format(num=num, uuid=ptype),
1765 '--mbrtogpt',
1766 '--',
1767 self.path,
1768 ],
1769 exit=True
1770 )
1771 update_partition(self.path, 'created')
1772 return num
1773
1774 def ptype_tobe_for_name(self, name):
1775 LOG.debug("name = " + name)
1776 if name == 'data':
1777 name = 'osd'
1778 if name == 'lockbox':
1779 if is_mpath(self.path):
1780 return PTYPE['mpath']['lockbox']['tobe']
1781 else:
1782 return PTYPE['regular']['lockbox']['tobe']
1783 if self.ptype_map is None:
1784 partition = DevicePartition.factory(
1785 path=self.path, dev=None, args=self.args)
1786 self.ptype_map = partition.ptype_map
1787 return self.ptype_map[name]['tobe']
1788
1789 def get_partition(self, num):
1790 if num not in self.partitions:
1791 dev = get_partition_dev(self.path, num)
1792 partition = DevicePartition.factory(
1793 path=self.path, dev=dev, args=self.args)
1794 partition.set_partition_number(num)
1795 self.partitions[num] = partition
1796 return self.partitions[num]
1797
1798 def get_dev_size(self):
1799 if self.dev_size is None:
1800 self.dev_size = get_dev_size(self.path)
1801 return self.dev_size
1802
1803 @staticmethod
1804 def factory(path, args):
1805 return Device(path, args)
1806
1807
1808 class DevicePartition(object):
1809
1810 def __init__(self, args):
1811 self.args = args
1812 self.num = None
1813 self.rawdev = None
1814 self.dev = None
1815 self.uuid = None
1816 self.ptype_map = None
1817 self.ptype = None
1818 self.set_variables_ptype()
1819
1820 def get_uuid(self):
1821 if self.uuid is None:
1822 self.uuid = get_partition_uuid(self.rawdev)
1823 return self.uuid
1824
1825 def get_ptype(self):
1826 if self.ptype is None:
1827 self.ptype = get_partition_type(self.rawdev)
1828 return self.ptype
1829
1830 def set_partition_number(self, num):
1831 self.num = num
1832
1833 def get_partition_number(self):
1834 return self.num
1835
1836 def set_dev(self, dev):
1837 self.dev = dev
1838 self.rawdev = dev
1839
1840 def get_dev(self):
1841 return self.dev
1842
1843 def get_rawdev(self):
1844 return self.rawdev
1845
1846 def set_variables_ptype(self):
1847 self.ptype_map = PTYPE['regular']
1848
1849 def ptype_for_name(self, name):
1850 return self.ptype_map[name]['ready']
1851
1852 @staticmethod
1853 def factory(path, dev, args):
1854 dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
1855 if ((path is not None and is_mpath(path)) or
1856 (dev is not None and is_mpath(dev))):
1857 partition = DevicePartitionMultipath(args)
1858 elif dmcrypt_type == 'luks':
1859 partition = DevicePartitionCryptLuks(args)
1860 elif dmcrypt_type == 'plain':
1861 partition = DevicePartitionCryptPlain(args)
1862 else:
1863 partition = DevicePartition(args)
1864 partition.set_dev(dev)
1865 return partition
1866
1867
1868 class DevicePartitionMultipath(DevicePartition):
1869
1870 def set_variables_ptype(self):
1871 self.ptype_map = PTYPE['mpath']
1872
1873
1874 class DevicePartitionCrypt(DevicePartition):
1875
1876 def __init__(self, args):
1877 super(DevicePartitionCrypt, self).__init__(args)
1878 self.osd_dm_key = None
1879 self.cryptsetup_parameters = CryptHelpers.get_cryptsetup_parameters(
1880 self.args)
1881 self.dmcrypt_type = CryptHelpers.get_dmcrypt_type(self.args)
1882 self.dmcrypt_keysize = CryptHelpers.get_dmcrypt_keysize(self.args)
1883
1884 def setup_crypt(self):
1885 pass
1886
1887 def map(self):
1888 self.setup_crypt()
1889 self.dev = _dmcrypt_map(
1890 rawdev=self.rawdev,
1891 key=self.osd_dm_key,
1892 _uuid=self.get_uuid(),
1893 cryptsetup_parameters=self.cryptsetup_parameters,
1894 luks=self.luks(),
1895 format_dev=True,
1896 )
1897
1898 def unmap(self):
1899 self.setup_crypt()
1900 dmcrypt_unmap(self.get_uuid())
1901 self.dev = self.rawdev
1902
1903 def format(self):
1904 self.setup_crypt()
1905 self.map()
1906
1907
1908 class DevicePartitionCryptPlain(DevicePartitionCrypt):
1909
1910 def luks(self):
1911 return False
1912
1913 def setup_crypt(self):
1914 if self.osd_dm_key is not None:
1915 return
1916
1917 self.cryptsetup_parameters += ['--key-size', str(self.dmcrypt_keysize)]
1918
1919 self.osd_dm_key = get_dmcrypt_key(
1920 self.get_uuid(), self.args.dmcrypt_key_dir,
1921 False)
1922
1923 def set_variables_ptype(self):
1924 self.ptype_map = PTYPE['plain']
1925
1926
1927 class DevicePartitionCryptLuks(DevicePartitionCrypt):
1928
1929 def luks(self):
1930 return True
1931
1932 def setup_crypt(self):
1933 if self.osd_dm_key is not None:
1934 return
1935
1936 if self.dmcrypt_keysize == 1024:
1937 # We don't force this into the cryptsetup_parameters,
1938 # as we want the cryptsetup defaults
1939 # to prevail for the actual LUKS key lengths.
1940 pass
1941 else:
1942 self.cryptsetup_parameters += ['--key-size',
1943 str(self.dmcrypt_keysize)]
1944
1945 self.osd_dm_key = get_dmcrypt_key(
1946 self.get_uuid(), self.args.dmcrypt_key_dir,
1947 True)
1948
1949 def set_variables_ptype(self):
1950 self.ptype_map = PTYPE['luks']
1951
1952
1953 class Prepare(object):
1954
1955 def __init__(self, args):
1956 self.args = args
1957
1958 @staticmethod
1959 def parser():
1960 parser = argparse.ArgumentParser(add_help=False)
1961 parser.add_argument(
1962 '--cluster',
1963 metavar='NAME',
1964 default='ceph',
1965 help='cluster name to assign this disk to',
1966 )
1967 parser.add_argument(
1968 '--cluster-uuid',
1969 metavar='UUID',
1970 help='cluster uuid to assign this disk to',
1971 )
1972 parser.add_argument(
1973 '--osd-uuid',
1974 metavar='UUID',
1975 help='unique OSD uuid to assign this disk to',
1976 )
1977 parser.add_argument(
1978 '--osd-id',
1979 metavar='ID',
1980 help='unique OSD id to assign this disk to',
1981 )
1982 parser.add_argument(
1983 '--crush-device-class',
1984 help='crush device class to assign this disk to',
1985 )
1986 parser.add_argument(
1987 '--dmcrypt',
1988 action='store_true', default=None,
1989 help='encrypt DATA and/or JOURNAL devices with dm-crypt',
1990 )
1991 parser.add_argument(
1992 '--dmcrypt-key-dir',
1993 metavar='KEYDIR',
1994 default='/etc/ceph/dmcrypt-keys',
1995 help='directory where dm-crypt keys are stored',
1996 )
1997 parser.add_argument(
1998 '--prepare-key',
1999 metavar='PATH',
2000 help='bootstrap-osd keyring path template (%(default)s)',
2001 default='{statedir}/bootstrap-osd/{cluster}.keyring',
2002 dest='prepare_key_template',
2003 )
2004 parser.add_argument(
2005 '--no-locking',
2006 action='store_true', default=None,
2007 help='let many prepare\'s run in parallel',
2008 )
2009 return parser
2010
2011 @staticmethod
2012 def set_subparser(subparsers):
2013 parents = [
2014 Prepare.parser(),
2015 PrepareData.parser(),
2016 Lockbox.parser(),
2017 ]
2018 parents.extend(PrepareFilestore.parent_parsers())
2019 parents.extend(PrepareBluestore.parent_parsers())
2020 parser = subparsers.add_parser(
2021 'prepare',
2022 parents=parents,
2023 formatter_class=argparse.RawDescriptionHelpFormatter,
2024 description=textwrap.fill(textwrap.dedent("""\
2025 If the --bluestore argument is given, a bluestore objectstore
2026 will be created. If --filestore is provided, a legacy FileStore
2027 objectstore will be created. If neither is specified, we default
2028 to BlueStore.
2029
2030 When an entire device is prepared for bluestore, two
2031 partitions are created. The first partition is for metadata,
2032 the second partition is for blocks that contain data.
2033
2034 Unless explicitly specified with --block.db or
2035 --block.wal, the bluestore DB and WAL data is stored on
2036 the main block device. For instance:
2037
2038 ceph-disk prepare --bluestore /dev/sdc
2039
2040 Will create
2041
2042 /dev/sdc1 for osd metadata
2043 /dev/sdc2 for block, db, and wal data (the rest of the disk)
2044
2045
2046 If either --block.db or --block.wal are specified to be
2047 the same whole device, they will be created as partition
2048 three and four respectively. For instance:
2049
2050 ceph-disk prepare --bluestore \\
2051 --block.db /dev/sdc \\
2052 --block.wal /dev/sdc \\
2053 /dev/sdc
2054
2055 Will create
2056
2057 /dev/sdc1 for osd metadata
2058 /dev/sdc2 for block (the rest of the disk)
2059 /dev/sdc3 for db
2060 /dev/sdc4 for wal
2061
2062 """)),
2063 help='Prepare a directory or disk for a Ceph OSD',
2064 )
2065 parser.set_defaults(
2066 func=Prepare.main,
2067 )
2068 return parser
2069
2070 def prepare(self):
2071 if self.args.no_locking:
2072 self._prepare()
2073 else:
2074 with prepare_lock:
2075 self._prepare()
2076
2077 @staticmethod
2078 def factory(args):
2079 if args.bluestore:
2080 return PrepareBluestore(args)
2081 else:
2082 return PrepareFilestore(args)
2083
2084 @staticmethod
2085 def main(args):
2086 Prepare.factory(args).prepare()
2087
2088
2089 class PrepareFilestore(Prepare):
2090
2091 def __init__(self, args):
2092 super(PrepareFilestore, self).__init__(args)
2093 if args.dmcrypt:
2094 self.lockbox = Lockbox(args)
2095 self.data = PrepareFilestoreData(args)
2096 self.journal = PrepareJournal(args)
2097
2098 @staticmethod
2099 def parent_parsers():
2100 return [
2101 PrepareJournal.parser(),
2102 ]
2103
2104 def _prepare(self):
2105 if self.data.args.dmcrypt:
2106 self.lockbox.prepare()
2107 self.data.prepare(self.journal)
2108
2109
2110 class PrepareBluestore(Prepare):
2111
2112 def __init__(self, args):
2113 super(PrepareBluestore, self).__init__(args)
2114 if args.dmcrypt:
2115 self.lockbox = Lockbox(args)
2116 self.data = PrepareBluestoreData(args)
2117 self.block = PrepareBluestoreBlock(args)
2118 self.blockdb = PrepareBluestoreBlockDB(args)
2119 self.blockwal = PrepareBluestoreBlockWAL(args)
2120
2121 @staticmethod
2122 def parser():
2123 parser = argparse.ArgumentParser(add_help=False)
2124 parser.add_argument(
2125 '--bluestore',
2126 dest='bluestore',
2127 action='store_true', default=True,
2128 help='bluestore objectstore',
2129 )
2130 parser.add_argument(
2131 '--filestore',
2132 dest='bluestore',
2133 action='store_false',
2134 help='filestore objectstore',
2135 )
2136 return parser
2137
2138 @staticmethod
2139 def parent_parsers():
2140 return [
2141 PrepareBluestore.parser(),
2142 PrepareBluestoreBlock.parser(),
2143 PrepareBluestoreBlockDB.parser(),
2144 PrepareBluestoreBlockWAL.parser(),
2145 ]
2146
2147 def _prepare(self):
2148 if self.data.args.dmcrypt:
2149 self.lockbox.prepare()
2150 to_prepare_list = []
2151 if getattr(self.data.args, 'block.db'):
2152 to_prepare_list.append(self.blockdb)
2153 if getattr(self.data.args, 'block.wal'):
2154 to_prepare_list.append(self.blockwal)
2155 to_prepare_list.append(self.block)
2156 self.data.prepare(*to_prepare_list)
2157
2158
2159 class Space(object):
2160
2161 NAMES = ('block', 'journal', 'block.db', 'block.wal')
2162
2163
2164 class PrepareSpace(object):
2165
2166 NONE = 0
2167 FILE = 1
2168 DEVICE = 2
2169
2170 def __init__(self, args):
2171 self.args = args
2172 self.set_type()
2173 self.space_size = self.get_space_size()
2174 if getattr(self.args, self.name + '_uuid') is None:
2175 setattr(self.args, self.name + '_uuid', str(uuid.uuid4()))
2176 self.space_symlink = None
2177 self.space_dmcrypt = None
2178
2179 def set_type(self):
2180 name = self.name
2181 args = self.args
2182 if (self.wants_space() and
2183 dev_is_diskdevice(args.data) and
2184 not is_partition(args.data) and
2185 getattr(args, name) is None and
2186 getattr(args, name + '_file') is None):
2187 LOG.info('Will colocate %s with data on %s',
2188 name, args.data)
2189 setattr(args, name, args.data)
2190
2191 if getattr(args, name) is None:
2192 if getattr(args, name + '_dev'):
2193 raise Error('%s is unspecified; not a block device' %
2194 name.capitalize(), getattr(args, name))
2195 self.type = self.NONE
2196 return
2197
2198 if not os.path.exists(getattr(args, name)):
2199 if getattr(args, name + '_dev'):
2200 raise Error('%s does not exist; not a block device' %
2201 name.capitalize(), getattr(args, name))
2202 self.type = self.FILE
2203 return
2204
2205 mode = os.stat(getattr(args, name)).st_mode
2206 if stmode_is_diskdevice(mode):
2207 if getattr(args, name + '_file'):
2208 raise Error('%s is not a regular file' % name.capitalize,
2209 getattr(args, name))
2210 self.type = self.DEVICE
2211 return
2212
2213 if stat.S_ISREG(mode):
2214 if getattr(args, name + '_dev'):
2215 raise Error('%s is not a block device' % name.capitalize,
2216 getattr(args, name))
2217 self.type = self.FILE
2218 return
2219
2220 raise Error('%s %s is neither a block device nor regular file' %
2221 (name.capitalize, getattr(args, name)))
2222
2223 def is_none(self):
2224 return self.type == self.NONE
2225
2226 def is_file(self):
2227 return self.type == self.FILE
2228
2229 def is_device(self):
2230 return self.type == self.DEVICE
2231
2232 @staticmethod
2233 def parser(name, positional=True):
2234 parser = argparse.ArgumentParser(add_help=False)
2235 parser.add_argument(
2236 '--%s-uuid' % name,
2237 metavar='UUID',
2238 help='unique uuid to assign to the %s' % name,
2239 )
2240 parser.add_argument(
2241 '--%s-file' % name,
2242 action='store_true', default=None,
2243 help='verify that %s is a file' % name.upper(),
2244 )
2245 parser.add_argument(
2246 '--%s-dev' % name,
2247 action='store_true', default=None,
2248 help='verify that %s is a block device' % name.upper(),
2249 )
2250
2251 if positional:
2252 parser.add_argument(
2253 name,
2254 metavar=name.upper(),
2255 nargs='?',
2256 help=('path to OSD %s disk block device;' % name +
2257 ' leave out to store %s in file' % name),
2258 )
2259 return parser
2260
2261 def wants_space(self):
2262 return True
2263
2264 def populate_data_path(self, path):
2265 if self.type == self.DEVICE:
2266 self.populate_data_path_device(path)
2267 elif self.type == self.FILE:
2268 self.populate_data_path_file(path)
2269 elif self.type == self.NONE:
2270 pass
2271 else:
2272 raise Error('unexpected type ', self.type)
2273
2274 def populate_data_path_file(self, path):
2275 space_uuid = self.name + '_uuid'
2276 if getattr(self.args, space_uuid) is not None:
2277 write_one_line(path, space_uuid,
2278 getattr(self.args, space_uuid))
2279 if self.space_symlink is not None:
2280 adjust_symlink(self.space_symlink,
2281 os.path.join(path, self.name))
2282
2283 def populate_data_path_device(self, path):
2284 self.populate_data_path_file(path)
2285
2286 if self.space_dmcrypt is not None:
2287 adjust_symlink(self.space_dmcrypt,
2288 os.path.join(path, self.name + '_dmcrypt'))
2289 else:
2290 try:
2291 os.unlink(os.path.join(path, self.name + '_dmcrypt'))
2292 except OSError:
2293 pass
2294
2295 def prepare(self):
2296 if self.type == self.DEVICE:
2297 self.prepare_device()
2298 elif self.type == self.FILE:
2299 self.prepare_file()
2300 elif self.type == self.NONE:
2301 pass
2302 else:
2303 raise Error('unexpected type ', self.type)
2304
2305 def prepare_file(self):
2306 space_filename = getattr(self.args, self.name)
2307 if not os.path.exists(space_filename):
2308 LOG.debug('Creating %s file %s with size 0'
2309 ' (ceph-osd will resize and allocate)',
2310 self.name,
2311 space_filename)
2312 space_file = open(space_filename, 'wb')
2313 space_file.close()
2314 path_set_context(space_filename)
2315
2316 LOG.debug('%s is file %s',
2317 self.name.capitalize(),
2318 space_filename)
2319 LOG.warning('OSD will not be hot-swappable if %s is '
2320 'not the same device as the osd data' %
2321 self.name)
2322 self.space_symlink = space_filename
2323
2324 def prepare_device(self):
2325 reusing_partition = False
2326
2327 if is_partition(getattr(self.args, self.name)):
2328 LOG.debug('%s %s is a partition',
2329 self.name.capitalize(), getattr(self.args, self.name))
2330 partition = DevicePartition.factory(
2331 path=None, dev=getattr(self.args, self.name), args=self.args)
2332 if isinstance(partition, DevicePartitionCrypt):
2333 raise Error(getattr(self.args, self.name) +
2334 ' partition already exists'
2335 ' and --dmcrypt specified')
2336 LOG.warning('OSD will not be hot-swappable' +
2337 ' if ' + self.name + ' is not' +
2338 ' the same device as the osd data')
2339 if partition.get_ptype() == partition.ptype_for_name(self.name):
2340 LOG.debug('%s %s was previously prepared with '
2341 'ceph-disk. Reusing it.',
2342 self.name.capitalize(),
2343 getattr(self.args, self.name))
2344 reusing_partition = True
2345 # Read and reuse the partition uuid from this journal's
2346 # previous life. We reuse the uuid instead of changing it
2347 # because udev does not reliably notice changes to an
2348 # existing partition's GUID. See
2349 # http://tracker.ceph.com/issues/10146
2350 setattr(self.args, self.name + '_uuid', partition.get_uuid())
2351 LOG.debug('Reusing %s with uuid %s',
2352 self.name,
2353 getattr(self.args, self.name + '_uuid'))
2354 else:
2355 LOG.warning('%s %s was not prepared with '
2356 'ceph-disk. Symlinking directly.',
2357 self.name.capitalize(),
2358 getattr(self.args, self.name))
2359 self.space_symlink = getattr(self.args, self.name)
2360 return
2361
2362 self.space_symlink = '/dev/disk/by-partuuid/{uuid}'.format(
2363 uuid=getattr(self.args, self.name + '_uuid'))
2364
2365 if self.args.dmcrypt:
2366 self.space_dmcrypt = self.space_symlink
2367 self.space_symlink = '/dev/mapper/{uuid}'.format(
2368 uuid=getattr(self.args, self.name + '_uuid'))
2369
2370 if reusing_partition:
2371 # confirm that the space_symlink exists. It should since
2372 # this was an active space
2373 # in the past. Continuing otherwise would be futile.
2374 assert os.path.exists(self.space_symlink)
2375 return
2376
2377 num = self.desired_partition_number()
2378
2379 if num == 0:
2380 LOG.warning('OSD will not be hot-swappable if %s '
2381 'is not the same device as the osd data',
2382 self.name)
2383
2384 device = Device.factory(getattr(self.args, self.name), self.args)
2385 num = device.create_partition(
2386 uuid=getattr(self.args, self.name + '_uuid'),
2387 name=self.name,
2388 size=self.space_size,
2389 num=num)
2390
2391 partition = device.get_partition(num)
2392
2393 LOG.debug('%s is GPT partition %s',
2394 self.name.capitalize(),
2395 self.space_symlink)
2396
2397 if isinstance(partition, DevicePartitionCrypt):
2398 partition.format()
2399 partition.map()
2400
2401 command_check_call(
2402 [
2403 'sgdisk',
2404 '--typecode={num}:{uuid}'.format(
2405 num=num,
2406 uuid=partition.ptype_for_name(self.name),
2407 ),
2408 '--',
2409 getattr(self.args, self.name),
2410 ],
2411 )
2412 update_partition(getattr(self.args, self.name), 'prepared')
2413
2414 LOG.debug('%s is GPT partition %s',
2415 self.name.capitalize(),
2416 self.space_symlink)
2417
2418
2419 class PrepareJournal(PrepareSpace):
2420
2421 def __init__(self, args):
2422 self.name = 'journal'
2423 (self.allows_journal,
2424 self.wants_journal,
2425 self.needs_journal) = check_journal_reqs(args)
2426
2427 if args.journal and not self.allows_journal:
2428 raise Error('journal specified but not allowed by osd backend')
2429
2430 super(PrepareJournal, self).__init__(args)
2431
2432 def wants_space(self):
2433 return self.wants_journal
2434
2435 def get_space_size(self):
2436 return int(get_conf_with_default(
2437 cluster=self.args.cluster,
2438 variable='osd_journal_size',
2439 ))
2440
2441 def desired_partition_number(self):
2442 if self.args.journal == self.args.data:
2443 # we're sharing the disk between osd data and journal;
2444 # make journal be partition number 2
2445 num = 2
2446 else:
2447 num = 0
2448 return num
2449
2450 @staticmethod
2451 def parser():
2452 return PrepareSpace.parser('journal')
2453
2454
2455 class PrepareBluestoreBlock(PrepareSpace):
2456
2457 def __init__(self, args):
2458 self.name = 'block'
2459 super(PrepareBluestoreBlock, self).__init__(args)
2460
2461 def get_space_size(self):
2462 block_size = get_conf(
2463 cluster=self.args.cluster,
2464 variable='bluestore_block_size',
2465 )
2466
2467 if block_size is None:
2468 return 0 # get as much space as possible
2469 else:
2470 return int(block_size) / 1048576 # MB
2471
2472 def desired_partition_number(self):
2473 if self.args.block == self.args.data:
2474 num = 2
2475 else:
2476 num = 0
2477 return num
2478
2479 @staticmethod
2480 def parser():
2481 return PrepareSpace.parser('block')
2482
2483
2484 class PrepareBluestoreBlockDB(PrepareSpace):
2485
2486 def __init__(self, args):
2487 self.name = 'block.db'
2488 super(PrepareBluestoreBlockDB, self).__init__(args)
2489
2490 def get_space_size(self):
2491 block_db_size = get_conf(
2492 cluster=self.args.cluster,
2493 variable='bluestore_block_db_size',
2494 )
2495
2496 if block_db_size is None or int(block_db_size) == 0:
2497 block_size = get_conf(
2498 cluster=self.args.cluster,
2499 variable='bluestore_block_size',
2500 )
2501 if block_size is None:
2502 return 1024 # MB
2503 size = int(block_size) / 100 / 1048576
2504 return max(size, 1024) # MB
2505 else:
2506 return int(block_db_size) / 1048576 # MB
2507
2508 def desired_partition_number(self):
2509 if getattr(self.args, 'block.db') == self.args.data:
2510 num = 3
2511 else:
2512 num = 0
2513 return num
2514
2515 def wants_space(self):
2516 return False
2517
2518 @staticmethod
2519 def parser():
2520 parser = PrepareSpace.parser('block.db', positional=False)
2521 parser.add_argument(
2522 '--block.db',
2523 metavar='BLOCKDB',
2524 help='path to the device or file for bluestore block.db',
2525 )
2526 return parser
2527
2528
2529 class PrepareBluestoreBlockWAL(PrepareSpace):
2530
2531 def __init__(self, args):
2532 self.name = 'block.wal'
2533 super(PrepareBluestoreBlockWAL, self).__init__(args)
2534
2535 def get_space_size(self):
2536 block_size = get_conf(
2537 cluster=self.args.cluster,
2538 variable='bluestore_block_wal_size',
2539 )
2540
2541 if block_size is None:
2542 return 576 # MB, default value
2543 else:
2544 return int(block_size) / 1048576 # MB
2545
2546 def desired_partition_number(self):
2547 if getattr(self.args, 'block.wal') == self.args.data:
2548 num = 4
2549 else:
2550 num = 0
2551 return num
2552
2553 def wants_space(self):
2554 return False
2555
2556 @staticmethod
2557 def parser():
2558 parser = PrepareSpace.parser('block.wal', positional=False)
2559 parser.add_argument(
2560 '--block.wal',
2561 metavar='BLOCKWAL',
2562 help='path to the device or file for bluestore block.wal',
2563 )
2564 return parser
2565
2566
2567 class CryptHelpers(object):
2568
2569 @staticmethod
2570 def get_cryptsetup_parameters(args):
2571 cryptsetup_parameters_str = get_conf(
2572 cluster=args.cluster,
2573 variable='osd_cryptsetup_parameters',
2574 )
2575 if cryptsetup_parameters_str is None:
2576 return []
2577 else:
2578 return shlex.split(cryptsetup_parameters_str)
2579
2580 @staticmethod
2581 def get_dmcrypt_keysize(args):
2582 dmcrypt_keysize_str = get_conf(
2583 cluster=args.cluster,
2584 variable='osd_dmcrypt_key_size',
2585 )
2586 dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
2587 if dmcrypt_type == 'luks':
2588 if dmcrypt_keysize_str is None:
2589 # As LUKS will hash the 'passphrase' in .luks.key
2590 # into a key, set a large default
2591 # so if not updated for some time, it is still a
2592 # reasonable value.
2593 #
2594 return 1024
2595 else:
2596 return int(dmcrypt_keysize_str)
2597 elif dmcrypt_type == 'plain':
2598 if dmcrypt_keysize_str is None:
2599 # This value is hard-coded in the udev script
2600 return 256
2601 else:
2602 LOG.warning('ensure the 95-ceph-osd.rules file has '
2603 'been copied to /etc/udev/rules.d '
2604 'and modified to call cryptsetup '
2605 'with --key-size=%s' % dmcrypt_keysize_str)
2606 return int(dmcrypt_keysize_str)
2607 else:
2608 return 0
2609
2610 @staticmethod
2611 def get_dmcrypt_type(args):
2612 if hasattr(args, 'dmcrypt') and args.dmcrypt:
2613 dmcrypt_type = get_conf(
2614 cluster=args.cluster,
2615 variable='osd_dmcrypt_type',
2616 )
2617
2618 if dmcrypt_type is None or dmcrypt_type == 'luks':
2619 return 'luks'
2620 elif dmcrypt_type == 'plain':
2621 return 'plain'
2622 else:
2623 raise Error('invalid osd_dmcrypt_type parameter '
2624 '(must be luks or plain): ', dmcrypt_type)
2625 else:
2626 return None
2627
2628
2629 class Secrets(object):
2630
2631 def __init__(self):
2632 secret, stderr, ret = command(['ceph-authtool', '--gen-print-key'])
2633 LOG.debug("stderr " + stderr)
2634 assert ret == 0
2635 self.keys = {
2636 'cephx_secret': secret.strip(),
2637 }
2638
2639 def write_osd_keyring(self, keyring, osd_id):
2640 command_check_call(
2641 [
2642 'ceph-authtool', keyring,
2643 '--create-keyring',
2644 '--name', 'osd.' + str(osd_id),
2645 '--add-key', self.keys['cephx_secret'],
2646 ])
2647 path_set_context(keyring)
2648
2649 def get_json(self):
2650 return bytearray(json.dumps(self.keys), 'ascii')
2651
2652
2653 class LockboxSecrets(Secrets):
2654
2655 def __init__(self, args):
2656 super(LockboxSecrets, self).__init__()
2657
2658 key_size = CryptHelpers.get_dmcrypt_keysize(args)
2659 key = open('/dev/urandom', 'rb').read(key_size / 8)
2660 base64_key = base64.b64encode(key).decode('ascii')
2661
2662 secret, stderr, ret = command(['ceph-authtool', '--gen-print-key'])
2663 LOG.debug("stderr " + stderr)
2664 assert ret == 0
2665
2666 self.keys.update({
2667 'dmcrypt_key': base64_key,
2668 'cephx_lockbox_secret': secret.strip(),
2669 })
2670
2671 def write_lockbox_keyring(self, path, osd_uuid):
2672 keyring = os.path.join(path, 'keyring')
2673 command_check_call(
2674 [
2675 'ceph-authtool', keyring,
2676 '--create-keyring',
2677 '--name', 'client.osd-lockbox.' + osd_uuid,
2678 '--add-key', self.keys['cephx_lockbox_secret'],
2679 ])
2680 path_set_context(keyring)
2681
2682
2683 class Lockbox(object):
2684
2685 def __init__(self, args):
2686 self.args = args
2687 self.partition = None
2688 self.device = None
2689
2690 if hasattr(self.args, 'lockbox') and self.args.lockbox is None:
2691 self.args.lockbox = self.args.data
2692
2693 def set_partition(self, partition):
2694 self.partition = partition
2695
2696 @staticmethod
2697 def parser():
2698 parser = argparse.ArgumentParser(add_help=False)
2699 parser.add_argument(
2700 '--lockbox',
2701 help='path to the device to store the lockbox',
2702 )
2703 parser.add_argument(
2704 '--lockbox-uuid',
2705 metavar='UUID',
2706 help='unique lockbox uuid',
2707 )
2708 return parser
2709
2710 def create_partition(self):
2711 self.device = Device.factory(self.args.lockbox, argparse.Namespace())
2712 partition_number = 5
2713 self.device.create_partition(uuid=self.args.lockbox_uuid,
2714 name='lockbox',
2715 num=partition_number,
2716 size=10) # MB
2717 return self.device.get_partition(partition_number)
2718
2719 def set_or_create_partition(self):
2720 if is_partition(self.args.lockbox):
2721 LOG.debug('OSD lockbox device %s is a partition',
2722 self.args.lockbox)
2723 self.partition = DevicePartition.factory(
2724 path=None, dev=self.args.lockbox, args=self.args)
2725 ptype = self.partition.get_ptype()
2726 ready = Ptype.get_ready_by_name('lockbox')
2727 if ptype not in ready:
2728 LOG.warning('incorrect partition UUID: %s, expected %s'
2729 % (ptype, str(ready)))
2730 else:
2731 LOG.debug('Creating osd partition on %s',
2732 self.args.lockbox)
2733 self.partition = self.create_partition()
2734
2735 def create_key(self):
2736 cluster = self.args.cluster
2737 bootstrap = self.args.prepare_key_template.format(cluster=cluster,
2738 statedir=STATEDIR)
2739 path = self.get_mount_point()
2740 secrets = LockboxSecrets(self.args)
2741 id_arg = self.args.osd_id and [self.args.osd_id] or []
2742 osd_id = command_with_stdin(
2743 [
2744 'ceph',
2745 '--cluster', cluster,
2746 '--name', 'client.bootstrap-osd',
2747 '--keyring', bootstrap,
2748 '-i', '-',
2749 'osd', 'new', self.args.osd_uuid,
2750 ] + id_arg,
2751 secrets.get_json()
2752 )
2753 secrets.write_lockbox_keyring(path, self.args.osd_uuid)
2754 osd_id = must_be_one_line(osd_id)
2755 check_osd_id(osd_id)
2756 write_one_line(path, 'whoami', osd_id)
2757 secrets.write_osd_keyring(os.path.join(path, 'osd_keyring'), osd_id)
2758 write_one_line(path, 'key-management-mode', KEY_MANAGEMENT_MODE_V1)
2759
2760 def symlink_spaces(self, path):
2761 target = self.get_mount_point()
2762 for name in Space.NAMES:
2763 if (hasattr(self.args, name + '_uuid') and
2764 getattr(self.args, name + '_uuid')):
2765 uuid = getattr(self.args, name + '_uuid')
2766 symlink = os.path.join(STATEDIR, 'osd-lockbox', uuid)
2767 adjust_symlink(target, symlink)
2768 write_one_line(path, name + '-uuid', uuid)
2769
2770 def populate(self):
2771 maybe_mkdir(os.path.join(STATEDIR, 'osd-lockbox'))
2772 args = ['mkfs', '-t', 'ext4', self.partition.get_dev()]
2773 LOG.debug('Creating lockbox fs on %s: ' + str(" ".join(args)))
2774 command_check_call(args)
2775 path = self.get_mount_point()
2776 maybe_mkdir(path)
2777 args = ['mount', '-t', 'ext4', self.partition.get_dev(), path]
2778 LOG.debug('Mounting lockbox ' + str(" ".join(args)))
2779 command_check_call(args)
2780 write_one_line(path, 'osd-uuid', self.args.osd_uuid)
2781 if self.args.cluster_uuid is None:
2782 self.args.cluster_uuid = get_fsid(cluster=self.args.cluster)
2783 write_one_line(path, 'ceph_fsid', self.args.cluster_uuid)
2784 self.create_key()
2785 self.symlink_spaces(path)
2786 write_one_line(path, 'magic', CEPH_LOCKBOX_ONDISK_MAGIC)
2787 if self.device is not None:
2788 command_check_call(
2789 [
2790 'sgdisk',
2791 '--typecode={num}:{uuid}'.format(
2792 num=self.partition.get_partition_number(),
2793 uuid=self.partition.ptype_for_name('lockbox'),
2794 ),
2795 '--',
2796 get_partition_base(self.partition.get_dev()),
2797 ],
2798 )
2799
2800 def get_mount_point(self):
2801 return os.path.join(STATEDIR, 'osd-lockbox', self.args.osd_uuid)
2802
2803 def get_osd_uuid(self):
2804 return self.args.osd_uuid
2805
2806 def activate(self):
2807 path = is_mounted(self.partition.get_dev())
2808 if path:
2809 LOG.info("Lockbox already mounted at " + path)
2810 return
2811
2812 path = tempfile.mkdtemp(
2813 prefix='mnt.',
2814 dir=STATEDIR + '/tmp',
2815 )
2816 args = ['mount', '-t', 'ext4', '-o', 'ro',
2817 self.partition.get_dev(),
2818 path]
2819 LOG.debug('Mounting lockbox temporarily ' + str(" ".join(args)))
2820 command_check_call(args)
2821 self.args.osd_uuid = get_oneliner(path, 'osd-uuid')
2822 command_check_call(['umount', path])
2823 LOG.debug('Mounting lockbox readonly ' + str(" ".join(args)))
2824 args = ['mount', '-t', 'ext4', '-o', 'ro',
2825 self.partition.get_dev(),
2826 self.get_mount_point()]
2827 command_check_call(args)
2828 for name in Space.NAMES + ('osd',):
2829 uuid_path = os.path.join(self.get_mount_point(), name + '-uuid')
2830 if os.path.exists(uuid_path):
2831 uuid = get_oneliner(self.get_mount_point(), name + '-uuid')
2832 dev = os.path.join('/dev/disk/by-partuuid/', uuid.lower())
2833 args = ['ceph-disk', 'trigger', dev]
2834 command_check_call(args)
2835
2836 def prepare(self):
2837 verify_not_in_use(self.args.lockbox, check_partitions=True)
2838 self.set_or_create_partition()
2839 self.populate()
2840
2841
2842 class PrepareData(object):
2843
2844 FILE = 1
2845 DEVICE = 2
2846
2847 def __init__(self, args):
2848
2849 self.args = args
2850 self.partition = None
2851 self.set_type()
2852 if self.args.cluster_uuid is None:
2853 self.args.cluster_uuid = get_fsid(cluster=self.args.cluster)
2854
2855 if self.args.osd_uuid is None:
2856 self.args.osd_uuid = str(uuid.uuid4())
2857
2858 def set_type(self):
2859 dmode = os.stat(self.args.data).st_mode
2860
2861 if stat.S_ISDIR(dmode):
2862 self.type = self.FILE
2863 elif stmode_is_diskdevice(dmode):
2864 self.type = self.DEVICE
2865 else:
2866 raise Error('not a dir or block device', self.args.data)
2867
2868 def is_file(self):
2869 return self.type == self.FILE
2870
2871 def is_device(self):
2872 return self.type == self.DEVICE
2873
2874 @staticmethod
2875 def parser():
2876 parser = argparse.ArgumentParser(add_help=False)
2877 parser.add_argument(
2878 '--fs-type',
2879 help='file system type to use (e.g. "ext4")',
2880 )
2881 parser.add_argument(
2882 '--zap-disk',
2883 action='store_true', default=None,
2884 help='destroy the partition table (and content) of a disk',
2885 )
2886 parser.add_argument(
2887 '--data-dir',
2888 action='store_true', default=None,
2889 help='verify that DATA is a dir',
2890 )
2891 parser.add_argument(
2892 '--data-dev',
2893 action='store_true', default=None,
2894 help='verify that DATA is a block device',
2895 )
2896 parser.add_argument(
2897 'data',
2898 metavar='DATA',
2899 help='path to OSD data (a disk block device or directory)',
2900 )
2901 return parser
2902
2903 def populate_data_path_file(self, path, *to_prepare_list):
2904 self.populate_data_path(path, *to_prepare_list)
2905
2906 def populate_data_path(self, path, *to_prepare_list):
2907 if os.path.exists(os.path.join(path, 'magic')):
2908 LOG.debug('Data dir %s already exists', path)
2909 return
2910 else:
2911 LOG.debug('Preparing osd data dir %s', path)
2912
2913 if self.args.osd_uuid is None:
2914 self.args.osd_uuid = str(uuid.uuid4())
2915
2916 write_one_line(path, 'ceph_fsid', self.args.cluster_uuid)
2917 write_one_line(path, 'fsid', self.args.osd_uuid)
2918 if self.args.osd_id:
2919 write_one_line(path, 'wanttobe', self.args.osd_id)
2920 if self.args.crush_device_class:
2921 write_one_line(path, 'crush_device_class',
2922 self.args.crush_device_class)
2923 write_one_line(path, 'magic', CEPH_OSD_ONDISK_MAGIC)
2924
2925 for to_prepare in to_prepare_list:
2926 to_prepare.populate_data_path(path)
2927
2928 def prepare(self, *to_prepare_list):
2929 if self.type == self.DEVICE:
2930 self.prepare_device(*to_prepare_list)
2931 elif self.type == self.FILE:
2932 self.prepare_file(*to_prepare_list)
2933 else:
2934 raise Error('unexpected type ', self.type)
2935
2936 def prepare_file(self, *to_prepare_list):
2937
2938 if not os.path.exists(self.args.data):
2939 raise Error('data path for directory does not exist',
2940 self.args.data)
2941
2942 if self.args.data_dev:
2943 raise Error('data path is not a block device', self.args.data)
2944
2945 for to_prepare in to_prepare_list:
2946 to_prepare.prepare()
2947
2948 self.populate_data_path_file(self.args.data, *to_prepare_list)
2949
2950 def sanity_checks(self):
2951 if not os.path.exists(self.args.data):
2952 raise Error('data path for device does not exist',
2953 self.args.data)
2954 verify_not_in_use(self.args.data,
2955 check_partitions=not self.args.dmcrypt)
2956
2957 def set_variables(self):
2958 if self.args.fs_type is None:
2959 self.args.fs_type = get_conf(
2960 cluster=self.args.cluster,
2961 variable='osd_mkfs_type',
2962 )
2963 if self.args.fs_type is None:
2964 self.args.fs_type = get_conf(
2965 cluster=self.args.cluster,
2966 variable='osd_fs_type',
2967 )
2968 if self.args.fs_type is None:
2969 self.args.fs_type = DEFAULT_FS_TYPE
2970
2971 self.mkfs_args = get_conf(
2972 cluster=self.args.cluster,
2973 variable='osd_mkfs_options_{fstype}'.format(
2974 fstype=self.args.fs_type,
2975 ),
2976 )
2977 if self.mkfs_args is None:
2978 self.mkfs_args = get_conf(
2979 cluster=self.args.cluster,
2980 variable='osd_fs_mkfs_options_{fstype}'.format(
2981 fstype=self.args.fs_type,
2982 ),
2983 )
2984
2985 self.mount_options = get_mount_options(cluster=self.args.cluster,
2986 fs_type=self.args.fs_type)
2987
2988 if self.args.osd_uuid is None:
2989 self.args.osd_uuid = str(uuid.uuid4())
2990
2991 def prepare_device(self, *to_prepare_list):
2992 self.sanity_checks()
2993 self.set_variables()
2994 if self.args.zap_disk is not None:
2995 zap(self.args.data)
2996
2997 def create_data_partition(self):
2998 device = Device.factory(self.args.data, self.args)
2999 partition_number = 1
3000 device.create_partition(uuid=self.args.osd_uuid,
3001 name='data',
3002 num=partition_number,
3003 size=self.get_space_size())
3004 return device.get_partition(partition_number)
3005
3006 def set_data_partition(self):
3007 if is_partition(self.args.data):
3008 LOG.debug('OSD data device %s is a partition',
3009 self.args.data)
3010 self.partition = DevicePartition.factory(
3011 path=None, dev=self.args.data, args=self.args)
3012 ptype = self.partition.get_ptype()
3013 ready = Ptype.get_ready_by_name('osd')
3014 if ptype not in ready:
3015 LOG.warning('incorrect partition UUID: %s, expected %s'
3016 % (ptype, str(ready)))
3017 else:
3018 LOG.debug('Creating osd partition on %s',
3019 self.args.data)
3020 self.partition = self.create_data_partition()
3021
3022 def populate_data_path_device(self, *to_prepare_list):
3023 partition = self.partition
3024
3025 if isinstance(partition, DevicePartitionCrypt):
3026 partition.map()
3027
3028 try:
3029 args = [
3030 'mkfs',
3031 '-t',
3032 self.args.fs_type,
3033 ]
3034 if self.mkfs_args is not None:
3035 args.extend(self.mkfs_args.split())
3036 if self.args.fs_type == 'xfs':
3037 args.extend(['-f']) # always force
3038 else:
3039 args.extend(MKFS_ARGS.get(self.args.fs_type, []))
3040 args.extend([
3041 '--',
3042 partition.get_dev(),
3043 ])
3044 LOG.debug('Creating %s fs on %s',
3045 self.args.fs_type, partition.get_dev())
3046 command_check_call(args, exit=True)
3047
3048 path = mount(dev=partition.get_dev(),
3049 fstype=self.args.fs_type,
3050 options=self.mount_options)
3051
3052 try:
3053 self.populate_data_path(path, *to_prepare_list)
3054 finally:
3055 path_set_context(path)
3056 unmount(path)
3057 finally:
3058 if isinstance(partition, DevicePartitionCrypt):
3059 partition.unmap()
3060
3061 if not is_partition(self.args.data):
3062 command_check_call(
3063 [
3064 'sgdisk',
3065 '--typecode=%d:%s' % (partition.get_partition_number(),
3066 partition.ptype_for_name('osd')),
3067 '--',
3068 self.args.data,
3069 ],
3070 exit=True,
3071 )
3072 update_partition(self.args.data, 'prepared')
3073 command_check_call(['udevadm', 'trigger',
3074 '--action=add',
3075 '--sysname-match',
3076 os.path.basename(partition.rawdev)])
3077
3078
3079 class PrepareFilestoreData(PrepareData):
3080
3081 def get_space_size(self):
3082 return 0 # get as much space as possible
3083
3084 def prepare_device(self, *to_prepare_list):
3085 super(PrepareFilestoreData, self).prepare_device(*to_prepare_list)
3086 for to_prepare in to_prepare_list:
3087 to_prepare.prepare()
3088 self.set_data_partition()
3089 self.populate_data_path_device(*to_prepare_list)
3090
3091 def populate_data_path(self, path, *to_prepare_list):
3092 super(PrepareFilestoreData, self).populate_data_path(path,
3093 *to_prepare_list)
3094 write_one_line(path, 'type', 'filestore')
3095
3096
3097 class PrepareBluestoreData(PrepareData):
3098
3099 def get_space_size(self):
3100 return 100 # MB
3101
3102 def prepare_device(self, *to_prepare_list):
3103 super(PrepareBluestoreData, self).prepare_device(*to_prepare_list)
3104 self.set_data_partition()
3105 for to_prepare in to_prepare_list:
3106 to_prepare.prepare()
3107 self.populate_data_path_device(*to_prepare_list)
3108
3109 def populate_data_path(self, path, *to_prepare_list):
3110 super(PrepareBluestoreData, self).populate_data_path(path,
3111 *to_prepare_list)
3112 write_one_line(path, 'type', 'bluestore')
3113
3114
3115 def mkfs(
3116 path,
3117 cluster,
3118 osd_id,
3119 fsid,
3120 keyring,
3121 ):
3122 monmap = os.path.join(path, 'activate.monmap')
3123 command_check_call(
3124 [
3125 'ceph',
3126 '--cluster', cluster,
3127 '--name', 'client.bootstrap-osd',
3128 '--keyring', keyring,
3129 'mon', 'getmap', '-o', monmap,
3130 ],
3131 )
3132
3133 osd_type = read_one_line(path, 'type')
3134
3135 if osd_type == 'bluestore':
3136 command_check_call(
3137 [
3138 'ceph-osd',
3139 '--cluster', cluster,
3140 '--mkfs',
3141 '-i', osd_id,
3142 '--monmap', monmap,
3143 '--osd-data', path,
3144 '--osd-uuid', fsid,
3145 '--setuser', get_ceph_user(),
3146 '--setgroup', get_ceph_group(),
3147 ],
3148 )
3149 elif osd_type == 'filestore':
3150 command_check_call(
3151 [
3152 'ceph-osd',
3153 '--cluster', cluster,
3154 '--mkfs',
3155 '-i', osd_id,
3156 '--monmap', monmap,
3157 '--osd-data', path,
3158 '--osd-journal', os.path.join(path, 'journal'),
3159 '--osd-uuid', fsid,
3160 '--setuser', get_ceph_user(),
3161 '--setgroup', get_ceph_group(),
3162 ],
3163 )
3164 else:
3165 raise Error('unrecognized objectstore type %s' % osd_type)
3166
3167
3168 def get_mount_point(cluster, osd_id):
3169 parent = STATEDIR + '/osd'
3170 return os.path.join(
3171 parent,
3172 '{cluster}-{osd_id}'.format(cluster=cluster, osd_id=osd_id),
3173 )
3174
3175
3176 def move_mount(
3177 dev,
3178 path,
3179 cluster,
3180 osd_id,
3181 fstype,
3182 mount_options,
3183 ):
3184 LOG.debug('Moving mount to final location...')
3185 osd_data = get_mount_point(cluster, osd_id)
3186 maybe_mkdir(osd_data)
3187
3188 # pick best-of-breed mount options based on fs type
3189 if mount_options is None:
3190 mount_options = MOUNT_OPTIONS.get(fstype, '')
3191
3192 # we really want to mount --move, but that is not supported when
3193 # the parent mount is shared, as it is by default on RH, Fedora,
3194 # and probably others. Also, --bind doesn't properly manipulate
3195 # /etc/mtab, which *still* isn't a symlink to /proc/mounts despite
3196 # this being 2013. Instead, mount the original device at the final
3197 # location.
3198 command_check_call(
3199 [
3200 '/bin/mount',
3201 '-o',
3202 mount_options,
3203 '--',
3204 dev,
3205 osd_data,
3206 ],
3207 )
3208 command_check_call(
3209 [
3210 '/bin/umount',
3211 '-l', # lazy, in case someone else is peeking at the
3212 # wrong moment
3213 '--',
3214 path,
3215 ],
3216 )
3217
3218
3219 #
3220 # For upgrade purposes, to make sure there are no competing units,
3221 # both --runtime unit and the default should be disabled. There can be
3222 # two units at the same time: one with --runtime and another without
3223 # it. If, for any reason (manual or ceph-disk) the two units co-exist
3224 # they will compete with each other.
3225 #
3226 def systemd_disable(
3227 path,
3228 osd_id,
3229 ):
3230 # ensure there is no duplicate ceph-osd@.service
3231 for style in ([], ['--runtime']):
3232 command_check_call(
3233 [
3234 'systemctl',
3235 'disable',
3236 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3237 ] + style,
3238 )
3239
3240
3241 def systemd_start(
3242 path,
3243 osd_id,
3244 ):
3245 systemd_disable(path, osd_id)
3246 if is_mounted(path):
3247 style = ['--runtime']
3248 else:
3249 style = []
3250 command_check_call(
3251 [
3252 'systemctl',
3253 'enable',
3254 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3255 ] + style,
3256 )
3257 command_check_call(
3258 [
3259 'systemctl',
3260 'start',
3261 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3262 ],
3263 )
3264
3265
3266 def systemd_stop(
3267 path,
3268 osd_id,
3269 ):
3270 systemd_disable(path, osd_id)
3271 command_check_call(
3272 [
3273 'systemctl',
3274 'stop',
3275 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3276 ],
3277 )
3278
3279
3280 def start_daemon(
3281 cluster,
3282 osd_id,
3283 ):
3284 LOG.debug('Starting %s osd.%s...', cluster, osd_id)
3285
3286 path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3287 cluster=cluster, osd_id=osd_id)
3288
3289 try:
3290 if os.path.exists(os.path.join(path, 'upstart')):
3291 command_check_call(
3292 [
3293 '/sbin/initctl',
3294 # use emit, not start, because start would fail if the
3295 # instance was already running
3296 'emit',
3297 # since the daemon starting doesn't guarantee much about
3298 # the service being operational anyway, don't bother
3299 # waiting for it
3300 '--no-wait',
3301 '--',
3302 'ceph-osd',
3303 'cluster={cluster}'.format(cluster=cluster),
3304 'id={osd_id}'.format(osd_id=osd_id),
3305 ],
3306 )
3307 elif os.path.exists(os.path.join(path, 'sysvinit')):
3308 if os.path.exists('/usr/sbin/service'):
3309 svc = '/usr/sbin/service'
3310 else:
3311 svc = '/sbin/service'
3312 command_check_call(
3313 [
3314 svc,
3315 'ceph',
3316 '--cluster',
3317 '{cluster}'.format(cluster=cluster),
3318 'start',
3319 'osd.{osd_id}'.format(osd_id=osd_id),
3320 ],
3321 )
3322 elif os.path.exists(os.path.join(path, 'systemd')):
3323 systemd_start(path, osd_id)
3324 elif os.path.exists(os.path.join(path, 'openrc')):
3325 base_script = '/etc/init.d/ceph-osd'
3326 osd_script = '{base}.{osd_id}'.format(
3327 base=base_script,
3328 osd_id=osd_id
3329 )
3330 if not os.path.exists(osd_script):
3331 os.symlink(base_script, osd_script)
3332 command_check_call(
3333 [
3334 osd_script,
3335 'start',
3336 ],
3337 )
3338 elif os.path.exists(os.path.join(path, 'bsdrc')):
3339 command_check_call(
3340 [
3341 '/usr/sbin/service', 'ceph', 'start',
3342 'osd.{osd_id}'.format(osd_id=osd_id),
3343 ],
3344 )
3345 else:
3346 raise Error('{cluster} osd.{osd_id} '
3347 'is not tagged with an init system'
3348 .format(
3349 cluster=cluster,
3350 osd_id=osd_id,
3351 ))
3352 except subprocess.CalledProcessError as e:
3353 raise Error('ceph osd start failed', e)
3354
3355
3356 def stop_daemon(
3357 cluster,
3358 osd_id,
3359 ):
3360 LOG.debug('Stoping %s osd.%s...', cluster, osd_id)
3361
3362 path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3363 cluster=cluster, osd_id=osd_id)
3364
3365 try:
3366 if os.path.exists(os.path.join(path, 'upstart')):
3367 command_check_call(
3368 [
3369 '/sbin/initctl',
3370 'stop',
3371 'ceph-osd',
3372 'cluster={cluster}'.format(cluster=cluster),
3373 'id={osd_id}'.format(osd_id=osd_id),
3374 ],
3375 )
3376 elif os.path.exists(os.path.join(path, 'sysvinit')):
3377 svc = which('service')
3378 command_check_call(
3379 [
3380 svc,
3381 'ceph',
3382 '--cluster',
3383 '{cluster}'.format(cluster=cluster),
3384 'stop',
3385 'osd.{osd_id}'.format(osd_id=osd_id),
3386 ],
3387 )
3388 elif os.path.exists(os.path.join(path, 'systemd')):
3389 systemd_stop(path, osd_id)
3390 elif os.path.exists(os.path.join(path, 'openrc')):
3391 command_check_call(
3392 [
3393 '/etc/init.d/ceph-osd.{osd_id}'.format(osd_id=osd_id),
3394 'stop',
3395 ],
3396 )
3397 elif os.path.exists(os.path.join(path, 'bsdrc')):
3398 command_check_call(
3399 [
3400 '/usr/local/etc/rc.d/ceph stop osd.{osd_id}'
3401 .format(osd_id=osd_id),
3402 ],
3403 )
3404 else:
3405 raise Error('{cluster} osd.{osd_id} '
3406 'is not tagged with an init system'
3407 .format(cluster=cluster, osd_id=osd_id))
3408 except subprocess.CalledProcessError as e:
3409 raise Error('ceph osd stop failed', e)
3410
3411
3412 def detect_fstype(dev):
3413 if FREEBSD:
3414 fstype = _check_output(
3415 args=[
3416 'fstyp',
3417 '-u',
3418 dev,
3419 ],
3420 )
3421 else:
3422 fstype = _check_output(
3423 args=[
3424 '/sbin/blkid',
3425 # we don't want stale cached results
3426 '-p',
3427 '-s', 'TYPE',
3428 '-o', 'value',
3429 '--',
3430 dev,
3431 ],
3432 )
3433 fstype = must_be_one_line(fstype)
3434 return fstype
3435
3436
3437 def dmcrypt_is_mapped(uuid):
3438 path = os.path.join('/dev/mapper', uuid)
3439 if os.path.exists(path):
3440 return path
3441 else:
3442 return None
3443
3444
3445 def dmcrypt_map(dev, dmcrypt_key_dir):
3446 ptype = get_partition_type(dev)
3447 if ptype in Ptype.get_ready_by_type('plain'):
3448 luks = False
3449 cryptsetup_parameters = ['--key-size', '256']
3450 elif ptype in Ptype.get_ready_by_type('luks'):
3451 luks = True
3452 cryptsetup_parameters = []
3453 else:
3454 raise Error('--dmcrypt called for dev %s with invalid ptype %s'
3455 % (dev, ptype))
3456 part_uuid = get_partition_uuid(dev)
3457 dmcrypt_key = get_dmcrypt_key(part_uuid, dmcrypt_key_dir, luks)
3458 return _dmcrypt_map(
3459 rawdev=dev,
3460 key=dmcrypt_key,
3461 _uuid=part_uuid,
3462 cryptsetup_parameters=cryptsetup_parameters,
3463 luks=luks,
3464 format_dev=False,
3465 )
3466
3467
3468 def mount_activate(
3469 dev,
3470 activate_key_template,
3471 init,
3472 dmcrypt,
3473 dmcrypt_key_dir,
3474 reactivate=False,
3475 ):
3476
3477 if dmcrypt:
3478 part_uuid = get_partition_uuid(dev)
3479 dev = dmcrypt_map(dev, dmcrypt_key_dir)
3480 try:
3481 fstype = detect_fstype(dev=dev)
3482 except (subprocess.CalledProcessError,
3483 TruncatedLineError,
3484 TooManyLinesError) as e:
3485 raise FilesystemTypeError(
3486 'device {dev}'.format(dev=dev),
3487 e,
3488 )
3489
3490 # TODO always using mount options from cluster=ceph for
3491 # now; see http://tracker.newdream.net/issues/3253
3492 mount_options = get_mount_options(cluster='ceph', fs_type=fstype)
3493
3494 path = mount(dev=dev, fstype=fstype, options=mount_options)
3495
3496 # check if the disk is deactive, change the journal owner, group
3497 # mode for correct user and group.
3498 if os.path.exists(os.path.join(path, 'deactive')):
3499 # logging to syslog will help us easy to know udev triggered failure
3500 if not reactivate:
3501 unmount(path)
3502 # we need to unmap again because dmcrypt map will create again
3503 # on bootup stage (due to deactivate)
3504 if '/dev/mapper/' in dev:
3505 part_uuid = dev.replace('/dev/mapper/', '')
3506 dmcrypt_unmap(part_uuid)
3507 LOG.info('OSD deactivated! reactivate with: --reactivate')
3508 raise Error('OSD deactivated! reactivate with: --reactivate')
3509 # flag to activate a deactive osd.
3510 deactive = True
3511 else:
3512 deactive = False
3513
3514 osd_id = None
3515 cluster = None
3516 try:
3517 (osd_id, cluster) = activate(path, activate_key_template, init)
3518
3519 # Now active successfully
3520 # If we got reactivate and deactive, remove the deactive file
3521 if deactive and reactivate:
3522 os.remove(os.path.join(path, 'deactive'))
3523 LOG.info('Remove `deactive` file.')
3524
3525 # check if the disk is already active, or if something else is already
3526 # mounted there
3527 active = False
3528 other = False
3529 src_dev = os.stat(path).st_dev
3530 try:
3531 dst_dev = os.stat((STATEDIR + '/osd/{cluster}-{osd_id}').format(
3532 cluster=cluster,
3533 osd_id=osd_id)).st_dev
3534 if src_dev == dst_dev:
3535 active = True
3536 else:
3537 parent_dev = os.stat(STATEDIR + '/osd').st_dev
3538 if dst_dev != parent_dev:
3539 other = True
3540 elif os.listdir(get_mount_point(cluster, osd_id)):
3541 LOG.info(get_mount_point(cluster, osd_id) +
3542 " is not empty, won't override")
3543 other = True
3544
3545 except OSError:
3546 pass
3547
3548 if active:
3549 LOG.info('%s osd.%s already mounted in position; unmounting ours.'
3550 % (cluster, osd_id))
3551 unmount(path)
3552 elif other:
3553 raise Error('another %s osd.%s already mounted in position '
3554 '(old/different cluster instance?); unmounting ours.'
3555 % (cluster, osd_id))
3556 else:
3557 move_mount(
3558 dev=dev,
3559 path=path,
3560 cluster=cluster,
3561 osd_id=osd_id,
3562 fstype=fstype,
3563 mount_options=mount_options,
3564 )
3565 return cluster, osd_id
3566
3567 except:
3568 LOG.error('Failed to activate')
3569 unmount(path)
3570 raise
3571 finally:
3572 # remove our temp dir
3573 if os.path.exists(path):
3574 os.rmdir(path)
3575
3576
3577 def activate_dir(
3578 path,
3579 activate_key_template,
3580 init,
3581 ):
3582
3583 if not os.path.exists(path):
3584 raise Error(
3585 'directory %s does not exist' % path
3586 )
3587
3588 (osd_id, cluster) = activate(path, activate_key_template, init)
3589
3590 if init not in (None, 'none'):
3591 canonical = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3592 cluster=cluster,
3593 osd_id=osd_id)
3594 if path != canonical:
3595 # symlink it from the proper location
3596 create = True
3597 if os.path.lexists(canonical):
3598 old = os.readlink(canonical)
3599 if old != path:
3600 LOG.debug('Removing old symlink %s -> %s', canonical, old)
3601 try:
3602 os.unlink(canonical)
3603 except:
3604 raise Error('unable to remove old symlink', canonical)
3605 else:
3606 create = False
3607 if create:
3608 LOG.debug('Creating symlink %s -> %s', canonical, path)
3609 try:
3610 os.symlink(path, canonical)
3611 except:
3612 raise Error('unable to create symlink %s -> %s'
3613 % (canonical, path))
3614
3615 return cluster, osd_id
3616
3617
3618 def find_cluster_by_uuid(_uuid):
3619 """
3620 Find a cluster name by searching /etc/ceph/*.conf for a conf file
3621 with the right uuid.
3622 """
3623 _uuid = _uuid.lower()
3624 no_fsid = []
3625 if not os.path.exists(SYSCONFDIR):
3626 return None
3627 for conf_file in os.listdir(SYSCONFDIR):
3628 if not conf_file.endswith('.conf'):
3629 continue
3630 cluster = conf_file[:-5]
3631 try:
3632 fsid = get_fsid(cluster)
3633 except Error as e:
3634 if 'getting cluster uuid from configuration failed' not in str(e):
3635 raise e
3636 no_fsid.append(cluster)
3637 else:
3638 if fsid == _uuid:
3639 return cluster
3640 # be tolerant of /etc/ceph/ceph.conf without an fsid defined.
3641 if len(no_fsid) == 1 and no_fsid[0] == 'ceph':
3642 LOG.warning('No fsid defined in ' + SYSCONFDIR +
3643 '/ceph.conf; using anyway')
3644 return 'ceph'
3645 return None
3646
3647
3648 def activate(
3649 path,
3650 activate_key_template,
3651 init,
3652 ):
3653
3654 check_osd_magic(path)
3655
3656 ceph_fsid = read_one_line(path, 'ceph_fsid')
3657 if ceph_fsid is None:
3658 raise Error('No cluster uuid assigned.')
3659 LOG.debug('Cluster uuid is %s', ceph_fsid)
3660
3661 cluster = find_cluster_by_uuid(ceph_fsid)
3662 if cluster is None:
3663 raise Error('No cluster conf found in ' + SYSCONFDIR +
3664 ' with fsid %s' % ceph_fsid)
3665 LOG.debug('Cluster name is %s', cluster)
3666
3667 fsid = read_one_line(path, 'fsid')
3668 if fsid is None:
3669 raise Error('No OSD uuid assigned.')
3670 LOG.debug('OSD uuid is %s', fsid)
3671
3672 keyring = activate_key_template.format(cluster=cluster,
3673 statedir=STATEDIR)
3674
3675 osd_id = get_osd_id(path)
3676 if osd_id is None:
3677 osd_id = allocate_osd_id(
3678 cluster=cluster,
3679 fsid=fsid,
3680 keyring=keyring,
3681 path=path,
3682 )
3683 write_one_line(path, 'whoami', osd_id)
3684 LOG.debug('OSD id is %s', osd_id)
3685
3686 if not os.path.exists(os.path.join(path, 'ready')):
3687 LOG.debug('Initializing OSD...')
3688 # re-running mkfs is safe, so just run until it completes
3689 mkfs(
3690 path=path,
3691 cluster=cluster,
3692 osd_id=osd_id,
3693 fsid=fsid,
3694 keyring=keyring,
3695 )
3696
3697 if init not in (None, 'none'):
3698 if init == 'auto':
3699 conf_val = get_conf(
3700 cluster=cluster,
3701 variable='init'
3702 )
3703 if conf_val is not None:
3704 init = conf_val
3705 else:
3706 init = init_get()
3707
3708 LOG.debug('Marking with init system %s', init)
3709 init_path = os.path.join(path, init)
3710 with open(init_path, 'w'):
3711 path_set_context(init_path)
3712
3713 # remove markers for others, just in case.
3714 for other in INIT_SYSTEMS:
3715 if other != init:
3716 try:
3717 os.unlink(os.path.join(path, other))
3718 except OSError:
3719 pass
3720
3721 if not os.path.exists(os.path.join(path, 'active')):
3722 write_one_line(path, 'active', 'ok')
3723 LOG.debug('%s osd.%s data dir is ready at %s', cluster, osd_id, path)
3724 return (osd_id, cluster)
3725
3726
3727 def main_activate(args):
3728 cluster = None
3729 osd_id = None
3730
3731 LOG.info('path = ' + str(args.path))
3732 if not os.path.exists(args.path):
3733 raise Error('%s does not exist' % args.path)
3734
3735 if is_suppressed(args.path):
3736 LOG.info('suppressed activate request on %s', args.path)
3737 return
3738
3739 with activate_lock:
3740 mode = os.stat(args.path).st_mode
3741 if stmode_is_diskdevice(mode):
3742 if (is_partition(args.path) and
3743 (get_partition_type(args.path) ==
3744 PTYPE['mpath']['osd']['ready']) and
3745 not is_mpath(args.path)):
3746 raise Error('%s is not a multipath block device' %
3747 args.path)
3748 (cluster, osd_id) = mount_activate(
3749 dev=args.path,
3750 activate_key_template=args.activate_key_template,
3751 init=args.mark_init,
3752 dmcrypt=args.dmcrypt,
3753 dmcrypt_key_dir=args.dmcrypt_key_dir,
3754 reactivate=args.reactivate,
3755 )
3756 osd_data = get_mount_point(cluster, osd_id)
3757
3758 elif stat.S_ISDIR(mode):
3759 (cluster, osd_id) = activate_dir(
3760 path=args.path,
3761 activate_key_template=args.activate_key_template,
3762 init=args.mark_init,
3763 )
3764 osd_data = args.path
3765
3766 else:
3767 raise Error('%s is not a directory or block device' % args.path)
3768
3769 # exit with 0 if the journal device is not up, yet
3770 # journal device will do the activation
3771 osd_journal = '{path}/journal'.format(path=osd_data)
3772 if os.path.islink(osd_journal) and not os.access(osd_journal, os.F_OK):
3773 LOG.info("activate: Journal not present, not starting, yet")
3774 return
3775
3776 if (not args.no_start_daemon and args.mark_init == 'none'):
3777 command_check_call(
3778 [
3779 'ceph-osd',
3780 '--cluster={cluster}'.format(cluster=cluster),
3781 '--id={osd_id}'.format(osd_id=osd_id),
3782 '--osd-data={path}'.format(path=osd_data),
3783 '--osd-journal={journal}'.format(journal=osd_journal),
3784 ],
3785 )
3786
3787 if (not args.no_start_daemon and
3788 args.mark_init not in (None, 'none')):
3789
3790 start_daemon(
3791 cluster=cluster,
3792 osd_id=osd_id,
3793 )
3794
3795
3796 def main_activate_lockbox(args):
3797 with activate_lock:
3798 main_activate_lockbox_protected(args)
3799
3800
3801 def main_activate_lockbox_protected(args):
3802 partition = DevicePartition.factory(
3803 path=None, dev=args.path, args=args)
3804
3805 lockbox = Lockbox(args)
3806 lockbox.set_partition(partition)
3807 lockbox.activate()
3808
3809
3810 ###########################
3811
3812 def _mark_osd_out(cluster, osd_id):
3813 LOG.info('Prepare to mark osd.%d out...', osd_id)
3814 command([
3815 'ceph',
3816 'osd',
3817 'out',
3818 'osd.%d' % osd_id,
3819 ])
3820
3821
3822 def _check_osd_status(cluster, osd_id):
3823 """
3824 report the osd status:
3825 00(0) : means OSD OUT AND DOWN
3826 01(1) : means OSD OUT AND UP
3827 10(2) : means OSD IN AND DOWN
3828 11(3) : means OSD IN AND UP
3829 """
3830 LOG.info("Checking osd id: %s ..." % osd_id)
3831 found = False
3832 status_code = 0
3833 out, err, ret = command([
3834 'ceph',
3835 'osd',
3836 'dump',
3837 '--cluster={cluster}'.format(
3838 cluster=cluster,
3839 ),
3840 '--format',
3841 'json',
3842 ])
3843 out_json = json.loads(out)
3844 for item in out_json[u'osds']:
3845 if item.get(u'osd') == int(osd_id):
3846 found = True
3847 if item.get(u'in') is 1:
3848 status_code += 2
3849 if item.get(u'up') is 1:
3850 status_code += 1
3851 if not found:
3852 raise Error('Could not osd.%s in osd tree!' % osd_id)
3853 return status_code
3854
3855
3856 def _remove_osd_directory_files(mounted_path, cluster):
3857 """
3858 To remove the 'ready', 'active', INIT-specific files.
3859 """
3860 if os.path.exists(os.path.join(mounted_path, 'ready')):
3861 os.remove(os.path.join(mounted_path, 'ready'))
3862 LOG.info('Remove `ready` file.')
3863 else:
3864 LOG.info('`ready` file is already removed.')
3865
3866 if os.path.exists(os.path.join(mounted_path, 'active')):
3867 os.remove(os.path.join(mounted_path, 'active'))
3868 LOG.info('Remove `active` file.')
3869 else:
3870 LOG.info('`active` file is already removed.')
3871
3872 # Just check `upstart` and `sysvinit` directly if filename is init-spec.
3873 conf_val = get_conf(
3874 cluster=cluster,
3875 variable='init'
3876 )
3877 if conf_val is not None:
3878 init = conf_val
3879 else:
3880 init = init_get()
3881 os.remove(os.path.join(mounted_path, init))
3882 LOG.info('Remove `%s` file.', init)
3883 return
3884
3885
3886 def main_deactivate(args):
3887 with activate_lock:
3888 main_deactivate_locked(args)
3889
3890
3891 def main_deactivate_locked(args):
3892 osd_id = args.deactivate_by_id
3893 path = args.path
3894 target_dev = None
3895 dmcrypt = False
3896 devices = list_devices()
3897
3898 # list all devices and found we need
3899 for device in devices:
3900 if 'partitions' in device:
3901 for dev_part in device.get('partitions'):
3902 if (osd_id and
3903 'whoami' in dev_part and
3904 dev_part['whoami'] == osd_id):
3905 target_dev = dev_part
3906 elif (path and
3907 'path' in dev_part and
3908 dev_part['path'] == path):
3909 target_dev = dev_part
3910 if not target_dev:
3911 raise Error('Cannot find any match device!!')
3912
3913 # set up all we need variable
3914 osd_id = target_dev['whoami']
3915 part_type = target_dev['ptype']
3916 mounted_path = target_dev['mount']
3917 if Ptype.is_dmcrypt(part_type, 'osd'):
3918 dmcrypt = True
3919
3920 # Do not do anything if osd is already down.
3921 status_code = _check_osd_status(args.cluster, osd_id)
3922 if status_code == OSD_STATUS_IN_UP:
3923 if args.mark_out is True:
3924 _mark_osd_out(args.cluster, int(osd_id))
3925 stop_daemon(args.cluster, osd_id)
3926 elif status_code == OSD_STATUS_IN_DOWN:
3927 if args.mark_out is True:
3928 _mark_osd_out(args.cluster, int(osd_id))
3929 LOG.info("OSD already out/down. Do not do anything now.")
3930 return
3931 elif status_code == OSD_STATUS_OUT_UP:
3932 stop_daemon(args.cluster, osd_id)
3933 elif status_code == OSD_STATUS_OUT_DOWN:
3934 LOG.info("OSD already out/down. Do not do anything now.")
3935 return
3936
3937 if not args.once:
3938 # remove 'ready', 'active', and INIT-specific files.
3939 _remove_osd_directory_files(mounted_path, args.cluster)
3940
3941 # Write deactivate to osd directory!
3942 with open(os.path.join(mounted_path, 'deactive'), 'w'):
3943 path_set_context(os.path.join(mounted_path, 'deactive'))
3944
3945 unmount(mounted_path)
3946 LOG.info("Umount `%s` successfully.", mounted_path)
3947
3948 if dmcrypt:
3949 lockbox = os.path.join(STATEDIR, 'osd-lockbox')
3950 command(['umount', os.path.join(lockbox, target_dev['uuid'])])
3951
3952 dmcrypt_unmap(target_dev['uuid'])
3953 for name in Space.NAMES:
3954 if name + '_uuid' in target_dev:
3955 dmcrypt_unmap(target_dev[name + '_uuid'])
3956
3957 ###########################
3958
3959
3960 def _remove_lockbox(uuid, cluster):
3961 lockbox = os.path.join(STATEDIR, 'osd-lockbox')
3962 if not os.path.exists(lockbox):
3963 return
3964 canonical = os.path.join(lockbox, uuid)
3965 command(['umount', canonical])
3966 for name in os.listdir(lockbox):
3967 path = os.path.join(lockbox, name)
3968 if os.path.islink(path) and os.readlink(path) == canonical:
3969 os.unlink(path)
3970
3971
3972 def destroy_lookup_device(args, predicate, description):
3973 devices = list_devices()
3974 for device in devices:
3975 for partition in device.get('partitions', []):
3976 if partition['type'] == 'lockbox':
3977 if not is_mounted(partition['path']):
3978 main_activate_lockbox_protected(
3979 argparse.Namespace(verbose=args.verbose,
3980 path=partition['path']))
3981 for device in devices:
3982 for partition in device.get('partitions', []):
3983 if partition['dmcrypt']:
3984 dmcrypt_path = dmcrypt_is_mapped(partition['uuid'])
3985 if dmcrypt_path:
3986 unmap = False
3987 else:
3988 dmcrypt_path = dmcrypt_map(partition['path'],
3989 args.dmcrypt_key_dir)
3990 unmap = True
3991 list_dev_osd(dmcrypt_path, {}, partition)
3992 if unmap:
3993 dmcrypt_unmap(partition['uuid'])
3994 dmcrypt = True
3995 else:
3996 dmcrypt = False
3997 if predicate(partition):
3998 return dmcrypt, partition
3999 raise Error('found no device matching ', description)
4000
4001
4002 def main_destroy(args):
4003 with activate_lock:
4004 main_destroy_locked(args)
4005
4006
4007 def main_destroy_locked(args):
4008 osd_id = args.destroy_by_id
4009 path = args.path
4010 target_dev = None
4011
4012 if path:
4013 if not is_partition(path):
4014 raise Error(path + " must be a partition device")
4015 path = os.path.realpath(path)
4016
4017 if path:
4018 (dmcrypt, target_dev) = destroy_lookup_device(
4019 args, lambda x: x.get('path') == path,
4020 path)
4021 elif osd_id:
4022 (dmcrypt, target_dev) = destroy_lookup_device(
4023 args, lambda x: x.get('whoami') == osd_id,
4024 'osd id ' + str(osd_id))
4025
4026 osd_id = target_dev['whoami']
4027 dev_path = target_dev['path']
4028 if target_dev['ptype'] == PTYPE['mpath']['osd']['ready']:
4029 base_dev = get_partition_base_mpath(dev_path)
4030 else:
4031 base_dev = get_partition_base(dev_path)
4032
4033 # Before osd deactivate, we cannot destroy it
4034 status_code = _check_osd_status(args.cluster, osd_id)
4035 if status_code != OSD_STATUS_OUT_DOWN and \
4036 status_code != OSD_STATUS_IN_DOWN:
4037 raise Error("Could not destroy the active osd. (osd-id: %s)" %
4038 osd_id)
4039
4040 if args.purge:
4041 action = 'purge'
4042 else:
4043 action = 'destroy'
4044 LOG.info("Prepare to %s osd.%s" % (action, osd_id))
4045 command([
4046 'ceph',
4047 'osd',
4048 action,
4049 'osd.%s' % osd_id,
4050 '--yes-i-really-mean-it',
4051 ])
4052
4053 # we remove the crypt map and device mapper (if dmcrypt is True)
4054 if dmcrypt:
4055 for name in Space.NAMES:
4056 if target_dev.get(name + '_uuid'):
4057 dmcrypt_unmap(target_dev[name + '_uuid'])
4058 _remove_lockbox(target_dev['uuid'], args.cluster)
4059
4060 # Check zap flag. If we found zap flag, we need to find device for
4061 # destroy this osd data.
4062 if args.zap is True:
4063 # erase the osd data
4064 LOG.info("Prepare to zap the device %s" % base_dev)
4065 zap(base_dev)
4066
4067
4068 def get_space_osd_uuid(name, path):
4069 if not os.path.exists(path):
4070 raise Error('%s does not exist' % path)
4071
4072 if not path_is_diskdevice(path):
4073 raise Error('%s is not a block device' % path)
4074
4075 if (is_partition(path) and
4076 get_partition_type(path) in (PTYPE['mpath']['journal']['ready'],
4077 PTYPE['mpath']['block']['ready']) and
4078 not is_mpath(path)):
4079 raise Error('%s is not a multipath block device' %
4080 path)
4081
4082 try:
4083 out = _check_output(
4084 args=[
4085 'ceph-osd',
4086 '--get-device-fsid',
4087 path,
4088 ],
4089 close_fds=True,
4090 )
4091 except subprocess.CalledProcessError as e:
4092 raise Error(
4093 'failed to get osd uuid/fsid from %s' % name,
4094 e,
4095 )
4096 value = str(out).split('\n', 1)[0]
4097 LOG.debug('%s %s has OSD UUID %s', name.capitalize(), path, value)
4098 return value
4099
4100
4101 def main_activate_space(name, args):
4102 if not os.path.exists(args.dev):
4103 raise Error('%s does not exist' % args.dev)
4104
4105 if is_suppressed(args.dev):
4106 LOG.info('suppressed activate request on space %s', args.dev)
4107 return
4108
4109 cluster = None
4110 osd_id = None
4111 osd_uuid = None
4112 dev = None
4113 with activate_lock:
4114 if args.dmcrypt:
4115 dev = dmcrypt_map(args.dev, args.dmcrypt_key_dir)
4116 else:
4117 dev = args.dev
4118 # FIXME: For an encrypted journal dev, does this return the
4119 # cyphertext or plaintext dev uuid!? Also, if the journal is
4120 # encrypted, is the data partition also always encrypted, or
4121 # are mixed pairs supported!?
4122 osd_uuid = get_space_osd_uuid(name, dev)
4123 path = os.path.join('/dev/disk/by-partuuid/', osd_uuid.lower())
4124
4125 if is_suppressed(path):
4126 LOG.info('suppressed activate request on %s', path)
4127 return
4128
4129 # warn and exit with 0 if the data device is not up, yet
4130 # data device will do the activation
4131 if not os.access(path, os.F_OK):
4132 LOG.info("activate: OSD device not present, not starting, yet")
4133 return
4134
4135 (cluster, osd_id) = mount_activate(
4136 dev=path,
4137 activate_key_template=args.activate_key_template,
4138 init=args.mark_init,
4139 dmcrypt=args.dmcrypt,
4140 dmcrypt_key_dir=args.dmcrypt_key_dir,
4141 reactivate=args.reactivate,
4142 )
4143
4144 start_daemon(
4145 cluster=cluster,
4146 osd_id=osd_id,
4147 )
4148
4149
4150 ###########################
4151
4152
4153 def main_activate_all(args):
4154 dir = '/dev/disk/by-parttypeuuid'
4155 LOG.debug('Scanning %s', dir)
4156 if not os.path.exists(dir):
4157 return
4158 err = False
4159 for name in os.listdir(dir):
4160 if name.find('.') < 0:
4161 continue
4162 (tag, uuid) = name.split('.')
4163
4164 if tag in Ptype.get_ready_by_name('osd'):
4165
4166 if Ptype.is_dmcrypt(tag, 'osd'):
4167 path = os.path.join('/dev/mapper', uuid)
4168 else:
4169 path = os.path.join(dir, name)
4170
4171 if is_suppressed(path):
4172 LOG.info('suppressed activate request on %s', path)
4173 continue
4174
4175 LOG.info('Activating %s', path)
4176 with activate_lock:
4177 try:
4178 # never map dmcrypt cyphertext devices
4179 (cluster, osd_id) = mount_activate(
4180 dev=path,
4181 activate_key_template=args.activate_key_template,
4182 init=args.mark_init,
4183 dmcrypt=False,
4184 dmcrypt_key_dir='',
4185 )
4186 start_daemon(
4187 cluster=cluster,
4188 osd_id=osd_id,
4189 )
4190
4191 except Exception as e:
4192 print(
4193 '{prog}: {msg}'.format(prog=args.prog, msg=e),
4194 file=sys.stderr
4195 )
4196
4197 err = True
4198
4199 if err:
4200 raise Error('One or more partitions failed to activate')
4201
4202
4203 ###########################
4204
4205 def is_swap(dev):
4206 dev = os.path.realpath(dev)
4207 with open(PROCDIR + '/swaps', 'rb') as proc_swaps:
4208 for line in proc_swaps.readlines()[1:]:
4209 fields = line.split()
4210 if len(fields) < 3:
4211 continue
4212 swaps_dev = fields[0]
4213 if os.path.isabs(swaps_dev) and os.path.exists(swaps_dev):
4214 swaps_dev = os.path.realpath(swaps_dev)
4215 if swaps_dev == dev:
4216 return True
4217 return False
4218
4219
4220 def get_oneliner(base, name):
4221 path = os.path.join(base, name)
4222 if os.path.isfile(path):
4223 with open(path, 'rb') as _file:
4224 return _bytes2str(_file.readline().rstrip())
4225 return None
4226
4227
4228 def get_dev_fs(dev):
4229 if FREEBSD:
4230 fstype, _, ret = command(
4231 [
4232 'fstyp',
4233 '-u',
4234 dev,
4235 ],
4236 )
4237 if ret == 0:
4238 return fstype
4239 else:
4240 fscheck, _, _ = command(
4241 [
4242 'blkid',
4243 '-s',
4244 'TYPE',
4245 dev,
4246 ],
4247 )
4248 if 'TYPE' in fscheck:
4249 fstype = fscheck.split()[1].split('"')[1]
4250 return fstype
4251 return None
4252
4253
4254 def split_dev_base_partnum(dev):
4255 if is_mpath(dev):
4256 partnum = partnum_mpath(dev)
4257 base = get_partition_base_mpath(dev)
4258 else:
4259 b = block_path(dev)
4260 partnum = open(os.path.join(b, 'partition')).read().strip()
4261 base = get_partition_base(dev)
4262 return base, partnum
4263
4264
4265 def get_partition_type(part):
4266 return get_blkid_partition_info(part, 'ID_PART_ENTRY_TYPE')
4267
4268
4269 def get_partition_uuid(part):
4270 return get_blkid_partition_info(part, 'ID_PART_ENTRY_UUID')
4271
4272
4273 def get_blkid_partition_info(dev, what=None):
4274 out, _, _ = command(
4275 [
4276 'blkid',
4277 '-o',
4278 'udev',
4279 '-p',
4280 dev,
4281 ]
4282 )
4283 p = {}
4284 for line in out.splitlines():
4285 (key, value) = line.split('=')
4286 p[key] = value
4287 if what:
4288 return p.get(what)
4289 else:
4290 return p
4291
4292
4293 def more_osd_info(path, uuid_map, desc):
4294 desc['ceph_fsid'] = get_oneliner(path, 'ceph_fsid')
4295 if desc['ceph_fsid']:
4296 desc['cluster'] = find_cluster_by_uuid(desc['ceph_fsid'])
4297 desc['whoami'] = get_oneliner(path, 'whoami')
4298 for name in Space.NAMES:
4299 uuid = get_oneliner(path, name + '_uuid')
4300 if uuid:
4301 desc[name + '_uuid'] = uuid.lower()
4302 if desc[name + '_uuid'] in uuid_map:
4303 desc[name + '_dev'] = uuid_map[desc[name + '_uuid']]
4304
4305
4306 def list_dev_osd(dev, uuid_map, desc):
4307 desc['mount'] = is_mounted(dev)
4308 desc['fs_type'] = get_dev_fs(dev)
4309 desc['state'] = 'unprepared'
4310 if desc['mount']:
4311 desc['state'] = 'active'
4312 more_osd_info(desc['mount'], uuid_map, desc)
4313 elif desc['fs_type']:
4314 try:
4315 tpath = mount(dev=dev, fstype=desc['fs_type'], options='')
4316 if tpath:
4317 try:
4318 magic = get_oneliner(tpath, 'magic')
4319 if magic is not None:
4320 desc['magic'] = magic
4321 desc['state'] = 'prepared'
4322 more_osd_info(tpath, uuid_map, desc)
4323 finally:
4324 unmount(tpath)
4325 except MountError:
4326 pass
4327
4328
4329 def list_dev_lockbox(dev, uuid_map, desc):
4330 desc['mount'] = is_mounted(dev)
4331 desc['fs_type'] = get_dev_fs(dev)
4332 desc['state'] = 'unprepared'
4333 if desc['mount']:
4334 desc['state'] = 'active'
4335 desc['osd_uuid'] = get_oneliner(desc['mount'], 'osd-uuid')
4336 elif desc['fs_type']:
4337 try:
4338 tpath = tempfile.mkdtemp(prefix='mnt.', dir=STATEDIR + '/tmp')
4339 args = ['mount', '-t', 'ext4', dev, tpath]
4340 LOG.debug('Mounting lockbox ' + str(" ".join(args)))
4341 command_check_call(args)
4342 magic = get_oneliner(tpath, 'magic')
4343 if magic is not None:
4344 desc['magic'] = magic
4345 desc['state'] = 'prepared'
4346 desc['osd_uuid'] = get_oneliner(tpath, 'osd-uuid')
4347 unmount(tpath)
4348 except subprocess.CalledProcessError:
4349 pass
4350 if desc.get('osd_uuid') in uuid_map:
4351 desc['lockbox_for'] = uuid_map[desc['osd_uuid']]
4352
4353
4354 def list_format_lockbox_plain(dev):
4355 desc = []
4356 if dev.get('lockbox_for'):
4357 desc.append('for ' + dev['lockbox_for'])
4358 elif dev.get('osd_uuid'):
4359 desc.append('for osd ' + dev['osd_uuid'])
4360 return desc
4361
4362
4363 def list_format_more_osd_info_plain(dev):
4364 desc = []
4365 if dev.get('ceph_fsid'):
4366 if dev.get('cluster'):
4367 desc.append('cluster ' + dev['cluster'])
4368 else:
4369 desc.append('unknown cluster ' + dev['ceph_fsid'])
4370 if dev.get('whoami'):
4371 desc.append('osd.%s' % dev['whoami'])
4372 for name in Space.NAMES:
4373 if dev.get(name + '_dev'):
4374 desc.append(name + ' %s' % dev[name + '_dev'])
4375 return desc
4376
4377
4378 def list_format_dev_plain(dev, prefix=''):
4379 desc = []
4380 if dev['ptype'] == PTYPE['regular']['osd']['ready']:
4381 desc = (['ceph data', dev['state']] +
4382 list_format_more_osd_info_plain(dev))
4383 elif dev['ptype'] in (PTYPE['regular']['lockbox']['ready'],
4384 PTYPE['mpath']['lockbox']['ready']):
4385 desc = (['ceph lockbox', dev['state']] +
4386 list_format_lockbox_plain(dev))
4387 elif Ptype.is_dmcrypt(dev['ptype'], 'osd'):
4388 dmcrypt = dev['dmcrypt']
4389 if not dmcrypt['holders']:
4390 desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
4391 'not currently mapped']
4392 elif len(dmcrypt['holders']) == 1:
4393 holder = get_dev_path(dmcrypt['holders'][0])
4394 desc = ['ceph data (dmcrypt %s %s)' %
4395 (dmcrypt['type'], holder)]
4396 desc += list_format_more_osd_info_plain(dev)
4397 else:
4398 desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
4399 'holders: ' + ','.join(dmcrypt['holders'])]
4400 elif Ptype.is_regular_space(dev['ptype']):
4401 name = Ptype.space_ptype_to_name(dev['ptype'])
4402 desc.append('ceph ' + name)
4403 if dev.get(name + '_for'):
4404 desc.append('for %s' % dev[name + '_for'])
4405 elif Ptype.is_dmcrypt_space(dev['ptype']):
4406 name = Ptype.space_ptype_to_name(dev['ptype'])
4407 dmcrypt = dev['dmcrypt']
4408 if dmcrypt['holders'] and len(dmcrypt['holders']) == 1:
4409 holder = get_dev_path(dmcrypt['holders'][0])
4410 desc = ['ceph ' + name + ' (dmcrypt %s %s)' %
4411 (dmcrypt['type'], holder)]
4412 else:
4413 desc = ['ceph ' + name + ' (dmcrypt %s)' % dmcrypt['type']]
4414 if dev.get(name + '_for'):
4415 desc.append('for %s' % dev[name + '_for'])
4416 else:
4417 desc.append(dev['type'])
4418 if dev.get('fs_type'):
4419 desc.append(dev['fs_type'])
4420 elif dev.get('ptype'):
4421 desc.append(dev['ptype'])
4422 if dev.get('mount'):
4423 desc.append('mounted on %s' % dev['mount'])
4424 return '%s%s %s' % (prefix, dev['path'], ', '.join(desc))
4425
4426
4427 def list_format_plain(devices):
4428 lines = []
4429 for device in devices:
4430 if device.get('partitions'):
4431 lines.append('%s :' % device['path'])
4432 for p in sorted(device['partitions'], key=lambda x: x['path']):
4433 lines.append(list_format_dev_plain(dev=p,
4434 prefix=' '))
4435 else:
4436 lines.append(list_format_dev_plain(dev=device,
4437 prefix=''))
4438 return "\n".join(lines)
4439
4440
4441 def list_dev(dev, uuid_map, space_map):
4442 info = {
4443 'path': dev,
4444 'dmcrypt': {},
4445 }
4446
4447 info['is_partition'] = is_partition(dev)
4448 if info['is_partition']:
4449 ptype = get_partition_type(dev)
4450 info['uuid'] = get_partition_uuid(dev)
4451 else:
4452 ptype = 'unknown'
4453 info['ptype'] = ptype
4454 LOG.info("list_dev(dev = " + dev + ", ptype = " + str(ptype) + ")")
4455 if ptype in (PTYPE['regular']['osd']['ready'],
4456 PTYPE['mpath']['osd']['ready']):
4457 info['type'] = 'data'
4458 if ptype == PTYPE['mpath']['osd']['ready']:
4459 info['multipath'] = True
4460 list_dev_osd(dev, uuid_map, info)
4461 elif ptype in (PTYPE['regular']['lockbox']['ready'],
4462 PTYPE['mpath']['lockbox']['ready']):
4463 info['type'] = 'lockbox'
4464 if ptype == PTYPE['mpath']['osd']['ready']:
4465 info['multipath'] = True
4466 list_dev_lockbox(dev, uuid_map, info)
4467 elif ptype == PTYPE['plain']['osd']['ready']:
4468 holders = is_held(dev)
4469 info['type'] = 'data'
4470 info['dmcrypt']['holders'] = holders
4471 info['dmcrypt']['type'] = 'plain'
4472 if len(holders) == 1:
4473 list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
4474 elif ptype == PTYPE['luks']['osd']['ready']:
4475 holders = is_held(dev)
4476 info['type'] = 'data'
4477 info['dmcrypt']['holders'] = holders
4478 info['dmcrypt']['type'] = 'LUKS'
4479 if len(holders) == 1:
4480 list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
4481 elif Ptype.is_regular_space(ptype) or Ptype.is_mpath_space(ptype):
4482 name = Ptype.space_ptype_to_name(ptype)
4483 info['type'] = name
4484 if ptype == PTYPE['mpath'][name]['ready']:
4485 info['multipath'] = True
4486 if info.get('uuid') in space_map:
4487 info[name + '_for'] = space_map[info['uuid']]
4488 elif Ptype.is_plain_space(ptype):
4489 name = Ptype.space_ptype_to_name(ptype)
4490 holders = is_held(dev)
4491 info['type'] = name
4492 info['dmcrypt']['type'] = 'plain'
4493 info['dmcrypt']['holders'] = holders
4494 if info.get('uuid') in space_map:
4495 info[name + '_for'] = space_map[info['uuid']]
4496 elif Ptype.is_luks_space(ptype):
4497 name = Ptype.space_ptype_to_name(ptype)
4498 holders = is_held(dev)
4499 info['type'] = name
4500 info['dmcrypt']['type'] = 'LUKS'
4501 info['dmcrypt']['holders'] = holders
4502 if info.get('uuid') in space_map:
4503 info[name + '_for'] = space_map[info['uuid']]
4504 else:
4505 path = is_mounted(dev)
4506 fs_type = get_dev_fs(dev)
4507 if is_swap(dev):
4508 info['type'] = 'swap'
4509 else:
4510 info['type'] = 'other'
4511 if fs_type:
4512 info['fs_type'] = fs_type
4513 if path:
4514 info['mount'] = path
4515
4516 return info
4517
4518
4519 def list_devices():
4520 partmap = list_all_partitions()
4521
4522 uuid_map = {}
4523 space_map = {}
4524 for base, parts in sorted(partmap.items()):
4525 for p in parts:
4526 dev = get_dev_path(p)
4527 part_uuid = get_partition_uuid(dev)
4528 if part_uuid:
4529 uuid_map[part_uuid] = dev
4530 ptype = get_partition_type(dev)
4531 LOG.debug("main_list: " + dev +
4532 " ptype = " + str(ptype) +
4533 " uuid = " + str(part_uuid))
4534 if ptype in Ptype.get_ready_by_name('osd'):
4535 if Ptype.is_dmcrypt(ptype, 'osd'):
4536 holders = is_held(dev)
4537 if len(holders) != 1:
4538 continue
4539 dev_to_mount = get_dev_path(holders[0])
4540 else:
4541 dev_to_mount = dev
4542
4543 fs_type = get_dev_fs(dev_to_mount)
4544 if fs_type is not None:
4545 mount_options = get_mount_options(cluster='ceph',
4546 fs_type=fs_type)
4547 try:
4548 tpath = mount(dev=dev_to_mount,
4549 fstype=fs_type, options=mount_options)
4550 try:
4551 for name in Space.NAMES:
4552 space_uuid = get_oneliner(tpath,
4553 name + '_uuid')
4554 if space_uuid:
4555 space_map[space_uuid.lower()] = dev
4556 finally:
4557 unmount(tpath)
4558 except MountError:
4559 pass
4560
4561 LOG.debug("main_list: " + str(partmap) + ", uuid_map = " +
4562 str(uuid_map) + ", space_map = " + str(space_map))
4563
4564 devices = []
4565 for base, parts in sorted(partmap.items()):
4566 if parts:
4567 disk = {'path': get_dev_path(base)}
4568 partitions = []
4569 for p in sorted(parts):
4570 partitions.append(list_dev(get_dev_path(p),
4571 uuid_map,
4572 space_map))
4573 disk['partitions'] = partitions
4574 devices.append(disk)
4575 else:
4576 device = list_dev(get_dev_path(base), uuid_map, space_map)
4577 device['path'] = get_dev_path(base)
4578 devices.append(device)
4579 LOG.debug("list_devices: " + str(devices))
4580 return devices
4581
4582
4583 def list_zfs():
4584 try:
4585 out, err, ret = command(
4586 [
4587 'zfs',
4588 'list',
4589 '-o', 'name,mountpoint'
4590 ]
4591 )
4592 except subprocess.CalledProcessError as e:
4593 LOG.info('zfs list -o name,mountpoint '
4594 'fails.\n (Error: %s)' % e)
4595 raise
4596 lines = out.splitlines()
4597 for line in lines[1:]:
4598 vdevline = line.split()
4599 if os.path.exists(os.path.join(vdevline[1], 'active')):
4600 elems = os.path.split(vdevline[1])
4601 print(vdevline[0], "ceph data, active, cluster ceph,", elems[1],
4602 "mounted on:", vdevline[1])
4603 else:
4604 print(vdevline[0] + " other, zfs, mounted on: " + vdevline[1])
4605
4606
4607 def main_list(args):
4608 with activate_lock:
4609 if FREEBSD:
4610 main_list_freebsd(args)
4611 else:
4612 main_list_protected(args)
4613
4614
4615 def main_list_protected(args):
4616 devices = list_devices()
4617 if args.path:
4618 paths = []
4619 for path in args.path:
4620 if os.path.exists(path):
4621 paths.append(os.path.realpath(path))
4622 else:
4623 paths.append(path)
4624 selected_devices = []
4625 for device in devices:
4626 for path in paths:
4627 if re.search(path + '$', device['path']):
4628 selected_devices.append(device)
4629 else:
4630 selected_devices = devices
4631 if args.format == 'json':
4632 print(json.dumps(selected_devices))
4633 else:
4634 output = list_format_plain(selected_devices)
4635 if output:
4636 print(output)
4637
4638
4639 def main_list_freebsd(args):
4640 # Currently accomodate only ZFS Filestore partitions
4641 # return a list of VDEVs and mountpoints
4642 # > zfs list
4643 # NAME USED AVAIL REFER MOUNTPOINT
4644 # osd0 1.01G 1.32T 1.01G /var/lib/ceph/osd/osd.0
4645 # osd1 1.01G 1.32T 1.01G /var/lib/ceph/osd/osd.1
4646 list_zfs()
4647
4648
4649 ###########################
4650 #
4651 # Mark devices that we want to suppress activates on with a
4652 # file like
4653 #
4654 # /var/lib/ceph/tmp/suppress-activate.sdb
4655 #
4656 # where the last bit is the sanitized device name (/dev/X without the
4657 # /dev/ prefix) and the is_suppress() check matches a prefix. That
4658 # means suppressing sdb will stop activate on sdb1, sdb2, etc.
4659 #
4660
4661 def is_suppressed(path):
4662 disk = os.path.realpath(path)
4663 try:
4664 if (not disk.startswith('/dev/') or
4665 not ldev_is_diskdevice(disk)):
4666 return False
4667 base = get_dev_name(disk)
4668 while len(base):
4669 if os.path.exists(SUPPRESS_PREFIX + base): # noqa
4670 return True
4671 base = base[:-1]
4672 except:
4673 return False
4674
4675
4676 def set_suppress(path):
4677 disk = os.path.realpath(path)
4678 if not os.path.exists(disk):
4679 raise Error('does not exist', path)
4680 if not ldev_is_diskdevice(path):
4681 raise Error('not a block device', path)
4682 base = get_dev_name(disk)
4683
4684 with open(SUPPRESS_PREFIX + base, 'w') as f: # noqa
4685 pass
4686 LOG.info('set suppress flag on %s', base)
4687
4688
4689 def unset_suppress(path):
4690 disk = os.path.realpath(path)
4691 if not os.path.exists(disk):
4692 raise Error('does not exist', path)
4693 if not ldev_is_diskdevice(path):
4694 raise Error('not a block device', path)
4695 assert disk.startswith('/dev/')
4696 base = get_dev_name(disk)
4697
4698 fn = SUPPRESS_PREFIX + base # noqa
4699 if not os.path.exists(fn):
4700 raise Error('not marked as suppressed', path)
4701
4702 try:
4703 os.unlink(fn)
4704 LOG.info('unset suppress flag on %s', base)
4705 except OSError as e:
4706 raise Error('failed to unsuppress', e)
4707
4708
4709 def main_suppress(args):
4710 set_suppress(args.path)
4711
4712
4713 def main_unsuppress(args):
4714 unset_suppress(args.path)
4715
4716
4717 def main_zap(args):
4718 for dev in args.dev:
4719 zap(dev)
4720
4721
4722 def main_trigger(args):
4723 LOG.debug("main_trigger: " + str(args))
4724 if is_systemd() and not args.sync:
4725 # http://www.freedesktop.org/software/systemd/man/systemd-escape.html
4726 escaped_dev = args.dev[1:].replace('-', '\\x2d')
4727 service = 'ceph-disk@{dev}.service'.format(dev=escaped_dev)
4728 LOG.info('systemd detected, triggering %s' % service)
4729 command(
4730 [
4731 'systemctl',
4732 '--no-block',
4733 'restart',
4734 service,
4735 ]
4736 )
4737 return
4738 if is_upstart() and not args.sync:
4739 LOG.info('upstart detected, triggering ceph-disk task')
4740 command(
4741 [
4742 'initctl',
4743 'emit',
4744 'ceph-disk',
4745 'dev={dev}'.format(dev=args.dev),
4746 'pid={pid}'.format(pid=os.getpid()),
4747 ]
4748 )
4749 return
4750
4751 if get_ceph_user() == 'ceph':
4752 command_check_call(['chown', 'ceph:ceph', args.dev])
4753 parttype = get_partition_type(args.dev)
4754 partid = get_partition_uuid(args.dev)
4755
4756 LOG.info('trigger {dev} parttype {parttype} uuid {partid}'.format(
4757 dev=args.dev,
4758 parttype=parttype,
4759 partid=partid,
4760 ))
4761
4762 ceph_disk = ['ceph-disk']
4763 if args.verbose:
4764 ceph_disk.append('--verbose')
4765
4766 if parttype in (PTYPE['regular']['osd']['ready'],
4767 PTYPE['mpath']['osd']['ready']):
4768 out, err, ret = command(
4769 ceph_disk +
4770 [
4771 'activate',
4772 args.dev,
4773 ]
4774 )
4775
4776 elif parttype in (PTYPE['plain']['osd']['ready'],
4777 PTYPE['luks']['osd']['ready']):
4778 out, err, ret = command(
4779 ceph_disk +
4780 [
4781 'activate',
4782 '--dmcrypt',
4783 args.dev,
4784 ]
4785 )
4786
4787 elif parttype in (PTYPE['regular']['journal']['ready'],
4788 PTYPE['mpath']['journal']['ready']):
4789 out, err, ret = command(
4790 ceph_disk +
4791 [
4792 'activate-journal',
4793 args.dev,
4794 ]
4795 )
4796
4797 elif parttype in (PTYPE['plain']['journal']['ready'],
4798 PTYPE['luks']['journal']['ready']):
4799 out, err, ret = command(
4800 ceph_disk +
4801 [
4802 'activate-journal',
4803 '--dmcrypt',
4804 args.dev,
4805 ]
4806 )
4807
4808 elif parttype in (PTYPE['regular']['block']['ready'],
4809 PTYPE['regular']['block.db']['ready'],
4810 PTYPE['regular']['block.wal']['ready'],
4811 PTYPE['mpath']['block']['ready'],
4812 PTYPE['mpath']['block.db']['ready'],
4813 PTYPE['mpath']['block.wal']['ready']):
4814 out, err, ret = command(
4815 ceph_disk +
4816 [
4817 'activate-block',
4818 args.dev,
4819 ]
4820 )
4821
4822 elif parttype in (PTYPE['plain']['block']['ready'],
4823 PTYPE['plain']['block.db']['ready'],
4824 PTYPE['plain']['block.wal']['ready'],
4825 PTYPE['luks']['block']['ready'],
4826 PTYPE['luks']['block.db']['ready'],
4827 PTYPE['luks']['block.wal']['ready']):
4828 out, err, ret = command(
4829 ceph_disk +
4830 [
4831 'activate-block',
4832 '--dmcrypt',
4833 args.dev,
4834 ]
4835 )
4836
4837 elif parttype in (PTYPE['regular']['lockbox']['ready'],
4838 PTYPE['mpath']['lockbox']['ready']):
4839 out, err, ret = command(
4840 ceph_disk +
4841 [
4842 'activate-lockbox',
4843 args.dev,
4844 ]
4845 )
4846
4847 else:
4848 raise Error('unrecognized partition type %s' % parttype)
4849
4850 if ret != 0:
4851 LOG.info(out)
4852 LOG.error(err)
4853 raise Error('return code ' + str(ret))
4854 else:
4855 LOG.debug(out)
4856 LOG.debug(err)
4857
4858
4859 def main_fix(args):
4860 # A hash table containing 'path': ('uid', 'gid', blocking, recursive)
4861 fix_table = [
4862 ('/usr/bin/ceph-mon', 'root', ROOTGROUP, True, False),
4863 ('/usr/bin/ceph-mds', 'root', ROOTGROUP, True, False),
4864 ('/usr/bin/ceph-osd', 'root', ROOTGROUP, True, False),
4865 ('/usr/bin/radosgw', 'root', ROOTGROUP, True, False),
4866 ('/etc/ceph', 'root', ROOTGROUP, True, True),
4867 ('/var/run/ceph', 'ceph', 'ceph', True, True),
4868 ('/var/log/ceph', 'ceph', 'ceph', True, True),
4869 ('/var/log/radosgw', 'ceph', 'ceph', True, True),
4870 ('/var/lib/ceph', 'ceph', 'ceph', True, False),
4871 ]
4872
4873 # Relabel/chown all files under /var/lib/ceph/ recursively (except for osd)
4874 for directory in glob.glob('/var/lib/ceph/*'):
4875 if directory == '/var/lib/ceph/osd':
4876 fix_table.append((directory, 'ceph', 'ceph', True, False))
4877 else:
4878 fix_table.append((directory, 'ceph', 'ceph', True, True))
4879
4880 # Relabel/chown the osds recursively and in parallel
4881 for directory in glob.glob('/var/lib/ceph/osd/*'):
4882 fix_table.append((directory, 'ceph', 'ceph', False, True))
4883
4884 LOG.debug("fix_table: " + str(fix_table))
4885
4886 # The lists of background processes
4887 all_processes = []
4888 permissions_processes = []
4889 selinux_processes = []
4890
4891 # Preliminary checks
4892 if args.selinux or args.all:
4893 out, err, ret = command(['selinuxenabled'])
4894 if ret:
4895 LOG.error('SELinux is not enabled, please enable it, first.')
4896 raise Error('no SELinux')
4897
4898 for daemon in ['ceph-mon', 'ceph-osd', 'ceph-mds', 'radosgw', 'ceph-mgr']:
4899 out, err, ret = command(['pgrep', daemon])
4900 if ret == 0:
4901 LOG.error(daemon + ' is running, please stop it, first')
4902 raise Error(daemon + ' running')
4903
4904 # Relabel the basic system data without the ceph files
4905 if args.system or args.all:
4906 c = ['restorecon', '-R', '/']
4907 for directory, _, _, _, _ in fix_table:
4908 # Skip /var/lib/ceph subdirectories
4909 if directory.startswith('/var/lib/ceph/'):
4910 continue
4911 c.append('-e')
4912 c.append(directory)
4913
4914 out, err, ret = command(c)
4915
4916 if ret:
4917 LOG.error("Failed to restore labels of the underlying system")
4918 LOG.error(err)
4919 raise Error("basic restore failed")
4920
4921 # Use find to relabel + chown ~simultaenously
4922 if args.all:
4923 for directory, uid, gid, blocking, recursive in fix_table:
4924 # Skip directories/files that are not installed
4925 if not os.access(directory, os.F_OK):
4926 continue
4927
4928 c = [
4929 'find',
4930 directory,
4931 '-exec',
4932 'chown',
4933 ':'.join((uid, gid)),
4934 '{}',
4935 '+',
4936 '-exec',
4937 'restorecon',
4938 '{}',
4939 '+',
4940 ]
4941
4942 # Just pass -maxdepth 0 for non-recursive calls
4943 if not recursive:
4944 c += ['-maxdepth', '0']
4945
4946 if blocking:
4947 out, err, ret = command(c)
4948
4949 if ret:
4950 LOG.error("Failed to fix " + directory)
4951 LOG.error(err)
4952 raise Error(directory + " fix failed")
4953 else:
4954 all_processes.append(command_init(c))
4955
4956 LOG.debug("all_processes: " + str(all_processes))
4957 for process in all_processes:
4958 out, err, ret = command_wait(process)
4959 if ret:
4960 LOG.error("A background find process failed")
4961 LOG.error(err)
4962 raise Error("background failed")
4963
4964 # Fix permissions
4965 if args.permissions:
4966 for directory, uid, gid, blocking, recursive in fix_table:
4967 # Skip directories/files that are not installed
4968 if not os.access(directory, os.F_OK):
4969 continue
4970
4971 if recursive:
4972 c = [
4973 'chown',
4974 '-R',
4975 ':'.join((uid, gid)),
4976 directory
4977 ]
4978 else:
4979 c = [
4980 'chown',
4981 ':'.join((uid, gid)),
4982 directory
4983 ]
4984
4985 if blocking:
4986 out, err, ret = command(c)
4987
4988 if ret:
4989 LOG.error("Failed to chown " + directory)
4990 LOG.error(err)
4991 raise Error(directory + " chown failed")
4992 else:
4993 permissions_processes.append(command_init(c))
4994
4995 LOG.debug("permissions_processes: " + str(permissions_processes))
4996 for process in permissions_processes:
4997 out, err, ret = command_wait(process)
4998 if ret:
4999 LOG.error("A background permissions process failed")
5000 LOG.error(err)
5001 raise Error("background failed")
5002
5003 # Fix SELinux labels
5004 if args.selinux:
5005 for directory, uid, gid, blocking, recursive in fix_table:
5006 # Skip directories/files that are not installed
5007 if not os.access(directory, os.F_OK):
5008 continue
5009
5010 if recursive:
5011 c = [
5012 'restorecon',
5013 '-R',
5014 directory
5015 ]
5016 else:
5017 c = [
5018 'restorecon',
5019 directory
5020 ]
5021
5022 if blocking:
5023 out, err, ret = command(c)
5024
5025 if ret:
5026 LOG.error("Failed to restore labels for " + directory)
5027 LOG.error(err)
5028 raise Error(directory + " relabel failed")
5029 else:
5030 selinux_processes.append(command_init(c))
5031
5032 LOG.debug("selinux_processes: " + str(selinux_processes))
5033 for process in selinux_processes:
5034 out, err, ret = command_wait(process)
5035 if ret:
5036 LOG.error("A background selinux process failed")
5037 LOG.error(err)
5038 raise Error("background failed")
5039
5040 LOG.info(
5041 "The ceph files has been fixed, please reboot "
5042 "the system for the changes to take effect."
5043 )
5044
5045
5046 def setup_statedir(dir):
5047 # XXX The following use of globals makes linting
5048 # really hard. Global state in Python is iffy and
5049 # should be avoided.
5050 global STATEDIR
5051 STATEDIR = dir
5052
5053 if not os.path.exists(STATEDIR):
5054 os.mkdir(STATEDIR)
5055 if not os.path.exists(STATEDIR + "/tmp"):
5056 os.mkdir(STATEDIR + "/tmp")
5057
5058 global prepare_lock
5059 prepare_lock = FileLock(STATEDIR + '/tmp/ceph-disk.prepare.lock')
5060
5061 global activate_lock
5062 activate_lock = FileLock(STATEDIR + '/tmp/ceph-disk.activate.lock')
5063
5064 global SUPPRESS_PREFIX
5065 SUPPRESS_PREFIX = STATEDIR + '/tmp/suppress-activate.'
5066
5067
5068 def setup_sysconfdir(dir):
5069 global SYSCONFDIR
5070 SYSCONFDIR = dir
5071
5072
5073 def parse_args(argv):
5074 parser = argparse.ArgumentParser(
5075 'ceph-disk',
5076 )
5077 parser.add_argument(
5078 '-v', '--verbose',
5079 action='store_true', default=None,
5080 help='be more verbose',
5081 )
5082 parser.add_argument(
5083 '--log-stdout',
5084 action='store_true', default=None,
5085 help='log to stdout',
5086 )
5087 parser.add_argument(
5088 '--prepend-to-path',
5089 metavar='PATH',
5090 default='/usr/bin',
5091 help=('prepend PATH to $PATH for backward compatibility '
5092 '(default /usr/bin)'),
5093 )
5094 parser.add_argument(
5095 '--statedir',
5096 metavar='PATH',
5097 default='/var/lib/ceph',
5098 help=('directory in which ceph state is preserved '
5099 '(default /var/lib/ceph)'),
5100 )
5101 parser.add_argument(
5102 '--sysconfdir',
5103 metavar='PATH',
5104 default='/etc/ceph',
5105 help=('directory in which ceph configuration files are found '
5106 '(default /etc/ceph)'),
5107 )
5108 parser.add_argument(
5109 '--setuser',
5110 metavar='USER',
5111 default=None,
5112 help='use the given user for subprocesses, rather than ceph or root'
5113 )
5114 parser.add_argument(
5115 '--setgroup',
5116 metavar='GROUP',
5117 default=None,
5118 help='use the given group for subprocesses, rather than ceph or root'
5119 )
5120 parser.set_defaults(
5121 # we want to hold on to this, for later
5122 prog=parser.prog,
5123 )
5124
5125 subparsers = parser.add_subparsers(
5126 title='subcommands',
5127 description='valid subcommands',
5128 help='sub-command help',
5129 )
5130
5131 Prepare.set_subparser(subparsers)
5132 make_activate_parser(subparsers)
5133 make_activate_lockbox_parser(subparsers)
5134 make_activate_block_parser(subparsers)
5135 make_activate_journal_parser(subparsers)
5136 make_activate_all_parser(subparsers)
5137 make_list_parser(subparsers)
5138 make_suppress_parser(subparsers)
5139 make_deactivate_parser(subparsers)
5140 make_destroy_parser(subparsers)
5141 make_zap_parser(subparsers)
5142 make_trigger_parser(subparsers)
5143 make_fix_parser(subparsers)
5144
5145 args = parser.parse_args(argv)
5146 return args
5147
5148
5149 def make_fix_parser(subparsers):
5150 fix_parser = subparsers.add_parser(
5151 'fix',
5152 formatter_class=argparse.RawDescriptionHelpFormatter,
5153 description=textwrap.fill(textwrap.dedent("""\
5154 """)),
5155 help='fix SELinux labels and/or file permissions')
5156
5157 fix_parser.add_argument(
5158 '--system',
5159 action='store_true',
5160 default=False,
5161 help='fix SELinux labels for the non-ceph system data'
5162 )
5163 fix_parser.add_argument(
5164 '--selinux',
5165 action='store_true',
5166 default=False,
5167 help='fix SELinux labels for ceph data'
5168 )
5169 fix_parser.add_argument(
5170 '--permissions',
5171 action='store_true',
5172 default=False,
5173 help='fix file permissions for ceph data'
5174 )
5175 fix_parser.add_argument(
5176 '--all',
5177 action='store_true',
5178 default=False,
5179 help='perform all the fix-related operations'
5180 )
5181 fix_parser.set_defaults(
5182 func=main_fix,
5183 )
5184 return fix_parser
5185
5186
5187 def make_trigger_parser(subparsers):
5188 trigger_parser = subparsers.add_parser(
5189 'trigger',
5190 formatter_class=argparse.RawDescriptionHelpFormatter,
5191 description=textwrap.fill(textwrap.dedent("""\
5192 The partition given in argument is activated. The type of the
5193 partition (data, lockbox, journal etc.) is detected by its
5194 type. If the init system is upstart or systemd, the activation is
5195 delegated to it and runs asynchronously, which
5196 helps reduce the execution time of udev actions.
5197 """)),
5198 help='activate any device (called by udev)')
5199 trigger_parser.add_argument(
5200 'dev',
5201 help=('device'),
5202 )
5203 trigger_parser.add_argument(
5204 '--cluster',
5205 metavar='NAME',
5206 default='ceph',
5207 help='cluster name to assign this disk to',
5208 )
5209 trigger_parser.add_argument(
5210 '--dmcrypt',
5211 action='store_true', default=None,
5212 help='map devices with dm-crypt',
5213 )
5214 trigger_parser.add_argument(
5215 '--dmcrypt-key-dir',
5216 metavar='KEYDIR',
5217 default='/etc/ceph/dmcrypt-keys',
5218 help='directory where dm-crypt keys are stored',
5219 )
5220 trigger_parser.add_argument(
5221 '--sync',
5222 action='store_true', default=None,
5223 help='do operation synchronously; do not trigger systemd',
5224 )
5225 trigger_parser.set_defaults(
5226 func=main_trigger,
5227 )
5228 return trigger_parser
5229
5230
5231 def make_activate_parser(subparsers):
5232 activate_parser = subparsers.add_parser(
5233 'activate',
5234 formatter_class=argparse.RawDescriptionHelpFormatter,
5235 description=textwrap.fill(textwrap.dedent("""\
5236 Activate the OSD found at PATH (can be a directory
5237 or a device partition, possibly encrypted). When
5238 activated for the first time, a unique OSD id is obtained
5239 from the cluster. If PATH is a directory, a symbolic
5240 link is added in {statedir}/osd/ceph-$id. If PATH is
5241 a partition, it is mounted on {statedir}/osd/ceph-$id.
5242 Finally, the OSD daemon is run.
5243
5244 If the OSD depends on auxiliary partitions (journal, block, ...)
5245 they need to be available otherwise activation will fail. It
5246 may happen if a journal is encrypted and cryptsetup was not
5247 run yet.
5248 """.format(statedir=STATEDIR))),
5249 help='Activate a Ceph OSD')
5250 activate_parser.add_argument(
5251 '--mount',
5252 action='store_true', default=None,
5253 help='mount a block device [deprecated, ignored]',
5254 )
5255 activate_parser.add_argument(
5256 '--activate-key',
5257 metavar='PATH',
5258 help='bootstrap-osd keyring path template (%(default)s)',
5259 dest='activate_key_template',
5260 )
5261 activate_parser.add_argument(
5262 '--mark-init',
5263 metavar='INITSYSTEM',
5264 help='init system to manage this dir',
5265 default='auto',
5266 choices=INIT_SYSTEMS,
5267 )
5268 activate_parser.add_argument(
5269 '--no-start-daemon',
5270 action='store_true', default=None,
5271 help='do not start the daemon',
5272 )
5273 activate_parser.add_argument(
5274 'path',
5275 metavar='PATH',
5276 help='path to block device or directory',
5277 )
5278 activate_parser.add_argument(
5279 '--dmcrypt',
5280 action='store_true', default=None,
5281 help='map DATA and/or JOURNAL devices with dm-crypt',
5282 )
5283 activate_parser.add_argument(
5284 '--dmcrypt-key-dir',
5285 metavar='KEYDIR',
5286 default='/etc/ceph/dmcrypt-keys',
5287 help='directory where dm-crypt keys are stored',
5288 )
5289 activate_parser.add_argument(
5290 '--reactivate',
5291 action='store_true', default=False,
5292 help='activate the deactived OSD',
5293 )
5294 activate_parser.set_defaults(
5295 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5296 func=main_activate,
5297 )
5298 return activate_parser
5299
5300
5301 def make_activate_lockbox_parser(subparsers):
5302 parser = subparsers.add_parser(
5303 'activate-lockbox',
5304 formatter_class=argparse.RawDescriptionHelpFormatter,
5305 description=textwrap.fill(textwrap.dedent("""\
5306 Mount the partition found at PATH on {statedir}/osd-lockbox/$uuid
5307 where $uuid uniquely identifies the OSD that needs this lockbox
5308 to retrieve keys from the monitor and unlock its partitions.
5309
5310 If the OSD has one or more auxiliary devices (journal, block, ...)
5311 symbolic links are created at {statedir}/osd-lockbox/$other_uuid
5312 and point to {statedir}/osd-lockbox/$uuid. This will, for instance,
5313 allow a journal encrypted in a partition identified by $other_uuid to
5314 fetch the keys it needs from the monitor.
5315
5316 Finally the OSD is activated, as it would be with ceph-disk activate.
5317 """.format(statedir=STATEDIR))),
5318 help='Activate a Ceph lockbox')
5319 parser.add_argument(
5320 '--activate-key',
5321 help='bootstrap-osd keyring path template (%(default)s)',
5322 dest='activate_key_template',
5323 )
5324 parser.add_argument(
5325 '--dmcrypt-key-dir',
5326 metavar='KEYDIR',
5327 default='/etc/ceph/dmcrypt-keys',
5328 help='directory where dm-crypt keys are stored',
5329 )
5330 parser.add_argument(
5331 'path',
5332 metavar='PATH',
5333 help='path to block device',
5334 )
5335 parser.set_defaults(
5336 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5337 func=main_activate_lockbox,
5338 )
5339 return parser
5340
5341
5342 def make_activate_block_parser(subparsers):
5343 return make_activate_space_parser('block', subparsers)
5344
5345
5346 def make_activate_journal_parser(subparsers):
5347 return make_activate_space_parser('journal', subparsers)
5348
5349
5350 def make_activate_space_parser(name, subparsers):
5351 activate_space_parser = subparsers.add_parser(
5352 'activate-%s' % name,
5353 formatter_class=argparse.RawDescriptionHelpFormatter,
5354 description=textwrap.fill(textwrap.dedent("""\
5355 Activating a {name} partition is only meaningfull
5356 if it is encrypted and it will map it using
5357 cryptsetup.
5358
5359 Finally the corresponding OSD is activated,
5360 as it would be with ceph-disk activate.
5361 """.format(name=name))),
5362 help='Activate an OSD via its %s device' % name)
5363 activate_space_parser.add_argument(
5364 'dev',
5365 metavar='DEV',
5366 help='path to %s block device' % name,
5367 )
5368 activate_space_parser.add_argument(
5369 '--activate-key',
5370 metavar='PATH',
5371 help='bootstrap-osd keyring path template (%(default)s)',
5372 dest='activate_key_template',
5373 )
5374 activate_space_parser.add_argument(
5375 '--mark-init',
5376 metavar='INITSYSTEM',
5377 help='init system to manage this dir',
5378 default='auto',
5379 choices=INIT_SYSTEMS,
5380 )
5381 activate_space_parser.add_argument(
5382 '--dmcrypt',
5383 action='store_true', default=None,
5384 help=('map data and/or auxiliariy (journal, etc.) '
5385 'devices with dm-crypt'),
5386 )
5387 activate_space_parser.add_argument(
5388 '--dmcrypt-key-dir',
5389 metavar='KEYDIR',
5390 default='/etc/ceph/dmcrypt-keys',
5391 help='directory where dm-crypt keys are stored',
5392 )
5393 activate_space_parser.add_argument(
5394 '--reactivate',
5395 action='store_true', default=False,
5396 help='activate the deactived OSD',
5397 )
5398 activate_space_parser.set_defaults(
5399 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5400 func=lambda args: main_activate_space(name, args),
5401 )
5402 return activate_space_parser
5403
5404
5405 def make_activate_all_parser(subparsers):
5406 activate_all_parser = subparsers.add_parser(
5407 'activate-all',
5408 formatter_class=argparse.RawDescriptionHelpFormatter,
5409 description=textwrap.fill(textwrap.dedent("""\
5410 Activate all OSD partitions found in /dev/disk/by-parttypeuuid.
5411 The partitions containing auxiliary devices (journal, block, ...)
5412 are not activated.
5413 """)),
5414 help='Activate all tagged OSD partitions')
5415 activate_all_parser.add_argument(
5416 '--activate-key',
5417 metavar='PATH',
5418 help='bootstrap-osd keyring path template (%(default)s)',
5419 dest='activate_key_template',
5420 )
5421 activate_all_parser.add_argument(
5422 '--mark-init',
5423 metavar='INITSYSTEM',
5424 help='init system to manage this dir',
5425 default='auto',
5426 choices=INIT_SYSTEMS,
5427 )
5428 activate_all_parser.set_defaults(
5429 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5430 func=main_activate_all,
5431 )
5432 return activate_all_parser
5433
5434
5435 def make_list_parser(subparsers):
5436 list_parser = subparsers.add_parser(
5437 'list',
5438 formatter_class=argparse.RawDescriptionHelpFormatter,
5439 description=textwrap.fill(textwrap.dedent("""\
5440 Display all partitions on the system and their
5441 associated Ceph information, if any.
5442 """)),
5443 help='List disks, partitions, and Ceph OSDs')
5444 list_parser.add_argument(
5445 '--format',
5446 help='output format',
5447 default='plain',
5448 choices=['json', 'plain'],
5449 )
5450 list_parser.add_argument(
5451 'path',
5452 metavar='PATH',
5453 nargs='*',
5454 help='path to block devices, relative to /sys/block',
5455 )
5456 list_parser.set_defaults(
5457 func=main_list,
5458 )
5459 return list_parser
5460
5461
5462 def make_suppress_parser(subparsers):
5463 suppress_parser = subparsers.add_parser(
5464 'suppress-activate',
5465 formatter_class=argparse.RawDescriptionHelpFormatter,
5466 description=textwrap.fill(textwrap.dedent("""\
5467 Add a prefix to the list of suppressed device names
5468 so that they are ignored by all activate* subcommands.
5469 """)),
5470 help='Suppress activate on a device (prefix)')
5471 suppress_parser.add_argument(
5472 'path',
5473 metavar='PATH',
5474 help='path to block device or directory',
5475 )
5476 suppress_parser.set_defaults(
5477 func=main_suppress,
5478 )
5479
5480 unsuppress_parser = subparsers.add_parser(
5481 'unsuppress-activate',
5482 formatter_class=argparse.RawDescriptionHelpFormatter,
5483 description=textwrap.fill(textwrap.dedent("""\
5484 Remove a prefix from the list of suppressed device names
5485 so that they are no longer ignored by all
5486 activate* subcommands.
5487 """)),
5488 help='Stop suppressing activate on a device (prefix)')
5489 unsuppress_parser.add_argument(
5490 'path',
5491 metavar='PATH',
5492 help='path to block device or directory',
5493 )
5494 unsuppress_parser.set_defaults(
5495 func=main_unsuppress,
5496 )
5497 return suppress_parser
5498
5499
5500 def make_deactivate_parser(subparsers):
5501 deactivate_parser = subparsers.add_parser(
5502 'deactivate',
5503 formatter_class=argparse.RawDescriptionHelpFormatter,
5504 description=textwrap.fill(textwrap.dedent("""\
5505 Deactivate the OSD located at PATH. It stops the OSD daemon
5506 and optionally marks it out (with --mark-out). The content of
5507 the OSD is left untouched.
5508
5509 By default, the, ready, active, INIT-specific files are
5510 removed (so that it is not automatically re-activated by the
5511 udev rules or ceph-disk trigger) and the file deactive is
5512 created to remember the OSD is deactivated.
5513
5514 If the --once option is given, the ready, active, INIT-specific
5515 files are not removed and the OSD will reactivate whenever
5516 ceph-disk trigger is run on one of the devices (journal, data,
5517 block, lockbox, ...).
5518
5519 If the OSD is dmcrypt, remove the data dmcrypt map. When
5520 deactivate finishes, the OSD is down.
5521 """)),
5522 help='Deactivate a Ceph OSD')
5523 deactivate_parser.add_argument(
5524 '--cluster',
5525 metavar='NAME',
5526 default='ceph',
5527 help='cluster name to assign this disk to',
5528 )
5529 deactivate_parser.add_argument(
5530 'path',
5531 metavar='PATH',
5532 nargs='?',
5533 help='path to block device or directory',
5534 )
5535 deactivate_parser.add_argument(
5536 '--deactivate-by-id',
5537 metavar='<id>',
5538 help='ID of OSD to deactive'
5539 )
5540 deactivate_parser.add_argument(
5541 '--mark-out',
5542 action='store_true', default=False,
5543 help='option to mark the osd out',
5544 )
5545 deactivate_parser.add_argument(
5546 '--once',
5547 action='store_true', default=False,
5548 help='does not need --reactivate to activate again',
5549 )
5550 deactivate_parser.set_defaults(
5551 func=main_deactivate,
5552 )
5553
5554
5555 def make_destroy_parser(subparsers):
5556 destroy_parser = subparsers.add_parser(
5557 'destroy',
5558 formatter_class=argparse.RawDescriptionHelpFormatter,
5559 description=textwrap.fill(textwrap.dedent("""\ Destroy the OSD located at PATH. It removes the OSD from the
5560 cluster and marks it destroyed. An OSD must be down before it
5561 can be destroyed. Once it is destroyed, a new OSD can be created
5562 in its place, reusing the same OSD id and position (e.g. after
5563 a failed HDD or SSD is replaced). Alternatively, if the
5564 --purge option is also specified, the OSD is removed from the
5565 CRUSH map and the OSD id is deallocated.""")),
5566 help='Destroy a Ceph OSD')
5567 destroy_parser.add_argument(
5568 '--cluster',
5569 metavar='NAME',
5570 default='ceph',
5571 help='cluster name to assign this disk to',
5572 )
5573 destroy_parser.add_argument(
5574 'path',
5575 metavar='PATH',
5576 nargs='?',
5577 help='path to block device or directory',
5578 )
5579 destroy_parser.add_argument(
5580 '--destroy-by-id',
5581 metavar='<id>',
5582 help='ID of OSD to destroy'
5583 )
5584 destroy_parser.add_argument(
5585 '--dmcrypt-key-dir',
5586 metavar='KEYDIR',
5587 default='/etc/ceph/dmcrypt-keys',
5588 help=('directory where dm-crypt keys are stored '
5589 '(If you don\'t know how it work, '
5590 'dont use it. we have default value)'),
5591 )
5592 destroy_parser.add_argument(
5593 '--zap',
5594 action='store_true', default=False,
5595 help='option to erase data and partition',
5596 )
5597 destroy_parser.add_argument(
5598 '--purge',
5599 action='store_true', default=False,
5600 help='option to remove OSD from CRUSH map and deallocate the id',
5601 )
5602 destroy_parser.set_defaults(
5603 func=main_destroy,
5604 )
5605
5606
5607 def make_zap_parser(subparsers):
5608 zap_parser = subparsers.add_parser(
5609 'zap',
5610 formatter_class=argparse.RawDescriptionHelpFormatter,
5611 description=textwrap.fill(textwrap.dedent("""\
5612 Zap/erase/destroy a device's partition table and contents. It
5613 actually uses sgdisk and it's option --zap-all to
5614 destroy both GPT and MBR data structures so that the disk
5615 becomes suitable for repartitioning.
5616 """)),
5617 help='Zap/erase/destroy a device\'s partition table (and contents)')
5618 zap_parser.add_argument(
5619 'dev',
5620 metavar='DEV',
5621 nargs='+',
5622 help='path to block device',
5623 )
5624 zap_parser.set_defaults(
5625 func=main_zap,
5626 )
5627 return zap_parser
5628
5629
5630 def main(argv):
5631 args = parse_args(argv)
5632
5633 setup_logging(args.verbose, args.log_stdout)
5634
5635 if args.prepend_to_path != '':
5636 path = os.environ.get('PATH', os.defpath)
5637 os.environ['PATH'] = args.prepend_to_path + ":" + path
5638
5639 if args.func.__name__ != 'main_trigger':
5640 # trigger may run when statedir is unavailable and does not use it
5641 setup_statedir(args.statedir)
5642 setup_sysconfdir(args.sysconfdir)
5643
5644 global CEPH_PREF_USER
5645 CEPH_PREF_USER = args.setuser
5646 global CEPH_PREF_GROUP
5647 CEPH_PREF_GROUP = args.setgroup
5648
5649 if args.verbose:
5650 args.func(args)
5651 else:
5652 main_catch(args.func, args)
5653
5654
5655 def setup_logging(verbose, log_stdout):
5656 loglevel = logging.WARNING
5657 if verbose:
5658 loglevel = logging.DEBUG
5659
5660 if log_stdout:
5661 ch = logging.StreamHandler(stream=sys.stdout)
5662 ch.setLevel(loglevel)
5663 formatter = logging.Formatter('%(funcName)s: %(message)s')
5664 ch.setFormatter(formatter)
5665 LOG.addHandler(ch)
5666 LOG.setLevel(loglevel)
5667 else:
5668 logging.basicConfig(
5669 level=loglevel,
5670 format='%(funcName)s: %(message)s',
5671 )
5672
5673
5674 def main_catch(func, args):
5675
5676 try:
5677 func(args)
5678
5679 except Error as e:
5680 raise SystemExit(
5681 '{prog}: {msg}'.format(
5682 prog=args.prog,
5683 msg=e,
5684 )
5685 )
5686
5687 except CephDiskException as error:
5688 exc_name = error.__class__.__name__
5689 raise SystemExit(
5690 '{prog} {exc_name}: {msg}'.format(
5691 prog=args.prog,
5692 exc_name=exc_name,
5693 msg=error,
5694 )
5695 )
5696
5697
5698 def run():
5699 main(sys.argv[1:])
5700
5701
5702 if __name__ == '__main__':
5703 main(sys.argv[1:])
5704 warned_about = {}