]> git.proxmox.com Git - ceph.git/blame - ceph/qa/workunits/ceph-disk/ceph-disk-no-lockbox
update sources to v12.1.0
[ceph.git] / ceph / qa / workunits / ceph-disk / ceph-disk-no-lockbox
CommitLineData
7c673cae
FG
1#!/usr/bin/env python
2#
3# Copyright (C) 2015 Red Hat <contact@redhat.com>
4# Copyright (C) 2014 Inktank <info@inktank.com>
5# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
6# Copyright (C) 2014 Catalyst.net Ltd
7#
8# Author: Loic Dachary <loic@dachary.org>
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU Library Public License as published by
12# the Free Software Foundation; either version 2, or (at your option)
13# any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU Library Public License for more details.
19#
20# THIS IS ceph-disk AS OF dc5a9053ce69c0630091774f16ce421da67d26fb v10.0.3-2247-gdc5a905
21# PRIOR TO THE INTRODUCTION OF THE LOCKBOX VOLUME TO STORE KEY FETCHING
22# STRATEGIES
23#
24import argparse
25import errno
26import fcntl
27import json
28import logging
29import os
30import platform
31import re
32import subprocess
33import stat
34import sys
35import tempfile
36import uuid
37import time
38import shlex
39import pwd
40import grp
41
42CEPH_OSD_ONDISK_MAGIC = 'ceph osd volume v026'
43
44PTYPE = {
45 'regular': {
46 'journal': {
47 # identical because creating a journal is atomic
48 'ready': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
49 'tobe': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
50 },
51 'block': {
52 # identical because creating a block is atomic
53 'ready': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
54 'tobe': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
55 },
56 'osd': {
57 'ready': '4fbd7e29-9d25-41b8-afd0-062c0ceff05d',
58 'tobe': '89c57f98-2fe5-4dc0-89c1-f3ad0ceff2be',
59 },
60 },
61 'luks': {
62 'journal': {
63 'ready': '45b0969e-9b03-4f30-b4c6-35865ceff106',
64 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
65 },
66 'block': {
67 'ready': 'cafecafe-9b03-4f30-b4c6-35865ceff106',
68 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
69 },
70 'osd': {
71 'ready': '4fbd7e29-9d25-41b8-afd0-35865ceff05d',
72 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
73 },
74 },
75 'plain': {
76 'journal': {
77 'ready': '45b0969e-9b03-4f30-b4c6-5ec00ceff106',
78 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
79 },
80 'block': {
81 'ready': 'cafecafe-9b03-4f30-b4c6-5ec00ceff106',
82 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
83 },
84 'osd': {
85 'ready': '4fbd7e29-9d25-41b8-afd0-5ec00ceff05d',
86 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
87 },
88 },
89 'mpath': {
90 'journal': {
91 'ready': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
92 'tobe': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
93 },
94 'block': {
95 'ready': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
96 'tobe': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
97 },
98 'osd': {
99 'ready': '4fbd7e29-8ae0-4982-bf9d-5a8d867af560',
100 'tobe': '89c57f98-8ae0-4982-bf9d-5a8d867af560',
101 },
102 },
103}
104
105
106class Ptype(object):
107
108 @staticmethod
109 def get_ready_by_type(what):
110 return [x['ready'] for x in PTYPE[what].values()]
111
112 @staticmethod
113 def get_ready_by_name(name):
114 return [x[name]['ready'] for x in PTYPE.values()]
115
116 @staticmethod
117 def is_regular_space(ptype):
118 return Ptype.is_what_space('regular', ptype)
119
120 @staticmethod
121 def is_mpath_space(ptype):
122 return Ptype.is_what_space('mpath', ptype)
123
124 @staticmethod
125 def is_plain_space(ptype):
126 return Ptype.is_what_space('plain', ptype)
127
128 @staticmethod
129 def is_luks_space(ptype):
130 return Ptype.is_what_space('luks', ptype)
131
132 @staticmethod
133 def is_what_space(what, ptype):
134 for name in Space.NAMES:
135 if ptype == PTYPE[what][name]['ready']:
136 return True
137 return False
138
139 @staticmethod
140 def space_ptype_to_name(ptype):
141 for what in PTYPE.values():
142 for name in Space.NAMES:
143 if ptype == what[name]['ready']:
144 return name
145 raise ValueError('ptype ' + ptype + ' not found')
146
147 @staticmethod
148 def is_dmcrypt_space(ptype):
149 for name in Space.NAMES:
150 if Ptype.is_dmcrypt(ptype, name):
151 return True
152 return False
153
154 @staticmethod
155 def is_dmcrypt(ptype, name):
156 for what in ('plain', 'luks'):
157 if ptype == PTYPE[what][name]['ready']:
158 return True
159 return False
160
161DEFAULT_FS_TYPE = 'xfs'
162SYSFS = '/sys'
163
164"""
165OSD STATUS Definition
166"""
167OSD_STATUS_OUT_DOWN = 0
168OSD_STATUS_OUT_UP = 1
169OSD_STATUS_IN_DOWN = 2
170OSD_STATUS_IN_UP = 3
171
172MOUNT_OPTIONS = dict(
173 btrfs='noatime,user_subvol_rm_allowed',
174 # user_xattr is default ever since linux 2.6.39 / 3.0, but we'll
175 # delay a moment before removing it fully because we did have some
176 # issues with ext4 before the xatts-in-leveldb work, and it seemed
177 # that user_xattr helped
178 ext4='noatime,user_xattr',
179 xfs='noatime,inode64',
180)
181
182MKFS_ARGS = dict(
183 btrfs=[
184 # btrfs requires -f, for the same reason as xfs (see comment below)
185 '-f',
186 '-m', 'single',
187 '-l', '32768',
188 '-n', '32768',
189 ],
190 xfs=[
191 # xfs insists on not overwriting previous fs; even if we wipe
192 # partition table, we often recreate it exactly the same way,
193 # so we'll see ghosts of filesystems past
194 '-f',
195 '-i', 'size=2048',
196 ],
197)
198
199INIT_SYSTEMS = [
200 'upstart',
201 'sysvinit',
202 'systemd',
203 'auto',
204 'none',
205]
206
207STATEDIR = '/var/lib/ceph'
208
209SYSCONFDIR = '/etc/ceph'
210
211prepare_lock = None
212activate_lock = None
213SUPPRESS_PREFIX = None
214
215# only warn once about some things
216warned_about = {}
217
218# Nuke the TERM variable to avoid confusing any subprocesses we call.
219# For example, libreadline will print weird control sequences for some
220# TERM values.
221if 'TERM' in os.environ:
222 del os.environ['TERM']
223
224LOG_NAME = __name__
225if LOG_NAME == '__main__':
226 LOG_NAME = os.path.basename(sys.argv[0])
227LOG = logging.getLogger(LOG_NAME)
228
229# Allow user-preferred values for subprocess user and group
230CEPH_PREF_USER = None
231CEPH_PREF_GROUP = None
232
233
234class filelock(object):
235 def __init__(self, fn):
236 self.fn = fn
237 self.fd = None
238
239 def acquire(self):
240 assert not self.fd
241 self.fd = file(self.fn, 'w')
242 fcntl.lockf(self.fd, fcntl.LOCK_EX)
243
244 def release(self):
245 assert self.fd
246 fcntl.lockf(self.fd, fcntl.LOCK_UN)
247 self.fd = None
248
249
250class Error(Exception):
251 """
252 Error
253 """
254
255 def __str__(self):
256 doc = self.__doc__.strip()
257 return ': '.join([doc] + [str(a) for a in self.args])
258
259
260class MountError(Error):
261 """
262 Mounting filesystem failed
263 """
264
265
266class UnmountError(Error):
267 """
268 Unmounting filesystem failed
269 """
270
271
272class BadMagicError(Error):
273 """
274 Does not look like a Ceph OSD, or incompatible version
275 """
276
277
278class TruncatedLineError(Error):
279 """
280 Line is truncated
281 """
282
283
284class TooManyLinesError(Error):
285 """
286 Too many lines
287 """
288
289
290class FilesystemTypeError(Error):
291 """
292 Cannot discover filesystem type
293 """
294
295
296class CephDiskException(Exception):
297 """
298 A base exception for ceph-disk to provide custom (ad-hoc) messages that
299 will be caught and dealt with when main() is executed
300 """
301 pass
302
303
304class ExecutableNotFound(CephDiskException):
305 """
306 Exception to report on executables not available in PATH
307 """
308 pass
309
310
311def is_systemd():
312 """
313 Detect whether systemd is running
314 """
315 with file('/proc/1/comm', 'rb') as i:
316 for line in i:
317 if 'systemd' in line:
318 return True
319 return False
320
321
322def is_upstart():
323 """
324 Detect whether upstart is running
325 """
326 (out, err, _) = command(['init', '--version'])
327 if 'upstart' in out:
328 return True
329 return False
330
331
332def maybe_mkdir(*a, **kw):
333 """
334 Creates a new directory if it doesn't exist, removes
335 existing symlink before creating the directory.
336 """
337 # remove any symlink, if it is there..
338 if os.path.exists(*a) and stat.S_ISLNK(os.lstat(*a).st_mode):
339 LOG.debug('Removing old symlink at %s', *a)
340 os.unlink(*a)
341 try:
342 os.mkdir(*a, **kw)
343 except OSError, e:
344 if e.errno == errno.EEXIST:
345 pass
346 else:
347 raise
348
349
350def which(executable):
351 """find the location of an executable"""
352 if 'PATH' in os.environ:
353 envpath = os.environ['PATH']
354 else:
355 envpath = os.defpath
356 PATH = envpath.split(os.pathsep)
357
358 locations = PATH + [
359 '/usr/local/bin',
360 '/bin',
361 '/usr/bin',
362 '/usr/local/sbin',
363 '/usr/sbin',
364 '/sbin',
365 ]
366
367 for location in locations:
368 executable_path = os.path.join(location, executable)
369 if (os.path.isfile(executable_path) and
370 os.access(executable_path, os.X_OK)):
371 return executable_path
372
373
374def _get_command_executable(arguments):
375 """
376 Return the full path for an executable, raise if the executable is not
377 found. If the executable has already a full path do not perform any checks.
378 """
379 if arguments[0].startswith('/'): # an absolute path
380 return arguments
381 executable = which(arguments[0])
382 if not executable:
383 command_msg = 'Could not run command: %s' % ' '.join(arguments)
384 executable_msg = '%s not in path.' % arguments[0]
385 raise ExecutableNotFound('%s %s' % (executable_msg, command_msg))
386
387 # swap the old executable for the new one
388 arguments[0] = executable
389 return arguments
390
391
392def command(arguments, **kwargs):
393 """
394 Safely execute a ``subprocess.Popen`` call making sure that the
395 executable exists and raising a helpful error message
396 if it does not.
397
398 .. note:: This should be the preferred way of calling ``subprocess.Popen``
399 since it provides the caller with the safety net of making sure that
400 executables *will* be found and will error nicely otherwise.
401
402 This returns the output of the command and the return code of the
403 process in a tuple: (output, returncode).
404 """
405 arguments = _get_command_executable(arguments)
406 LOG.info('Running command: %s' % ' '.join(arguments))
407 process = subprocess.Popen(
408 arguments,
409 stdout=subprocess.PIPE,
410 stderr=subprocess.PIPE,
411 **kwargs)
412 out, err = process.communicate()
413 return out, err, process.returncode
414
415
416def command_check_call(arguments):
417 """
418 Safely execute a ``subprocess.check_call`` call making sure that the
419 executable exists and raising a helpful error message if it does not.
420
421 .. note:: This should be the preferred way of calling
422 ``subprocess.check_call`` since it provides the caller with the safety net
423 of making sure that executables *will* be found and will error nicely
424 otherwise.
425 """
426 arguments = _get_command_executable(arguments)
427 LOG.info('Running command: %s', ' '.join(arguments))
428 return subprocess.check_call(arguments)
429
430
431def platform_distro():
432 """
433 Returns a normalized, lower case string without any leading nor trailing
434 whitespace that represents the distribution name of the current machine.
435 """
436 distro = platform_information()[0] or ''
437 return distro.strip().lower()
438
439
440def platform_information():
441 distro, release, codename = platform.linux_distribution()
442 # this could be an empty string in Debian
443 if not codename and 'debian' in distro.lower():
444 debian_codenames = {
445 '8': 'jessie',
446 '7': 'wheezy',
447 '6': 'squeeze',
448 }
449 major_version = release.split('.')[0]
450 codename = debian_codenames.get(major_version, '')
451
452 # In order to support newer jessie/sid or wheezy/sid strings we test
453 # this if sid is buried in the minor, we should use sid anyway.
454 if not codename and '/' in release:
455 major, minor = release.split('/')
456 if minor == 'sid':
457 codename = minor
458 else:
459 codename = major
460
461 return (
462 str(distro).strip(),
463 str(release).strip(),
464 str(codename).strip()
465 )
466
467#
468# An alternative block_path implementation would be
469#
470# name = basename(dev)
471# return /sys/devices/virtual/block/$name
472#
473# It is however more fragile because it relies on the fact
474# that the basename of the device the user will use always
475# matches the one the driver will use. On Ubuntu 14.04, for
476# instance, when multipath creates a partition table on
477#
478# /dev/mapper/353333330000007d0 -> ../dm-0
479#
480# it will create partition devices named
481#
482# /dev/mapper/353333330000007d0-part1
483#
484# which is the same device as /dev/dm-1 but not a symbolic
485# link to it:
486#
487# ubuntu@other:~$ ls -l /dev/mapper /dev/dm-1
488# brw-rw---- 1 root disk 252, 1 Aug 15 17:52 /dev/dm-1
489# lrwxrwxrwx 1 root root 7 Aug 15 17:52 353333330000007d0 -> ../dm-0
490# brw-rw---- 1 root disk 252, 1 Aug 15 17:52 353333330000007d0-part1
491#
492# Using the basename in this case fails.
493#
494
495
496def block_path(dev):
497 path = os.path.realpath(dev)
498 rdev = os.stat(path).st_rdev
499 (M, m) = (os.major(rdev), os.minor(rdev))
500 return "{sysfs}/dev/block/{M}:{m}".format(sysfs=SYSFS, M=M, m=m)
501
502
503def get_dm_uuid(dev):
504 uuid_path = os.path.join(block_path(dev), 'dm', 'uuid')
505 LOG.debug("get_dm_uuid " + dev + " uuid path is " + uuid_path)
506 if not os.path.exists(uuid_path):
507 return False
508 uuid = open(uuid_path, 'r').read()
509 LOG.debug("get_dm_uuid " + dev + " uuid is " + uuid)
510 return uuid
511
512
513def is_mpath(dev):
514 """
515 True if the path is managed by multipath
516 """
517 uuid = get_dm_uuid(dev)
518 return (uuid and
519 (re.match('part\d+-mpath-', uuid) or
520 re.match('mpath-', uuid)))
521
522
523def get_dev_name(path):
524 """
525 get device name from path. e.g.::
526
527 /dev/sda -> sdas, /dev/cciss/c0d1 -> cciss!c0d1
528
529 a device "name" is something like::
530
531 sdb
532 cciss!c0d1
533
534 """
535 assert path.startswith('/dev/')
536 base = path[5:]
537 return base.replace('/', '!')
538
539
540def get_dev_path(name):
541 """
542 get a path (/dev/...) from a name (cciss!c0d1)
543 a device "path" is something like::
544
545 /dev/sdb
546 /dev/cciss/c0d1
547
548 """
549 return '/dev/' + name.replace('!', '/')
550
551
552def get_dev_relpath(name):
553 """
554 get a relative path to /dev from a name (cciss!c0d1)
555 """
556 return name.replace('!', '/')
557
558
559def get_dev_size(dev, size='megabytes'):
560 """
561 Attempt to get the size of a device so that we can prevent errors
562 from actions to devices that are smaller, and improve error reporting.
563
564 Because we want to avoid breakage in case this approach is not robust, we
565 will issue a warning if we failed to get the size.
566
567 :param size: bytes or megabytes
568 :param dev: the device to calculate the size
569 """
570 fd = os.open(dev, os.O_RDONLY)
571 dividers = {'bytes': 1, 'megabytes': 1024 * 1024}
572 try:
573 device_size = os.lseek(fd, 0, os.SEEK_END)
574 divider = dividers.get(size, 1024 * 1024) # default to megabytes
575 return device_size / divider
576 except Exception as error:
577 LOG.warning('failed to get size of %s: %s' % (dev, str(error)))
578 finally:
579 os.close(fd)
580
581
582def get_partition_mpath(dev, pnum):
583 part_re = "part{pnum}-mpath-".format(pnum=pnum)
584 partitions = list_partitions_mpath(dev, part_re)
585 if partitions:
586 return partitions[0]
587 else:
588 return None
589
590
591def get_partition_dev(dev, pnum):
592 """
593 get the device name for a partition
594
595 assume that partitions are named like the base dev,
596 with a number, and optionally
597 some intervening characters (like 'p'). e.g.,
598
599 sda 1 -> sda1
600 cciss/c0d1 1 -> cciss!c0d1p1
601 """
602 partname = None
603 if is_mpath(dev):
604 partname = get_partition_mpath(dev, pnum)
605 else:
606 name = get_dev_name(os.path.realpath(dev))
607 for f in os.listdir(os.path.join('/sys/block', name)):
608 if f.startswith(name) and f.endswith(str(pnum)):
609 # we want the shortest name that starts with the base name
610 # and ends with the partition number
611 if not partname or len(f) < len(partname):
612 partname = f
613 if partname:
614 return get_dev_path(partname)
615 else:
616 raise Error('partition %d for %s does not appear to exist' %
617 (pnum, dev))
618
619
620def list_all_partitions():
621 """
622 Return a list of devices and partitions
623 """
624 names = os.listdir('/sys/block')
625 dev_part_list = {}
626 for name in names:
627 # /dev/fd0 may hang http://tracker.ceph.com/issues/6827
628 if re.match(r'^fd\d$', name):
629 continue
630 dev_part_list[name] = list_partitions(get_dev_path(name))
631 return dev_part_list
632
633
634def list_partitions(dev):
635 dev = os.path.realpath(dev)
636 if is_mpath(dev):
637 return list_partitions_mpath(dev)
638 else:
639 return list_partitions_device(dev)
640
641
642def list_partitions_mpath(dev, part_re="part\d+-mpath-"):
643 p = block_path(dev)
644 partitions = []
645 holders = os.path.join(p, 'holders')
646 for holder in os.listdir(holders):
647 uuid_path = os.path.join(holders, holder, 'dm', 'uuid')
648 uuid = open(uuid_path, 'r').read()
649 LOG.debug("list_partitions_mpath: " + uuid_path + " uuid = " + uuid)
650 if re.match(part_re, uuid):
651 partitions.append(holder)
652 return partitions
653
654
655def list_partitions_device(dev):
656 """
657 Return a list of partitions on the given device name
658 """
659 partitions = []
660 basename = get_dev_name(dev)
661 for name in os.listdir(block_path(dev)):
662 if name.startswith(basename):
663 partitions.append(name)
664 return partitions
665
666
667def get_partition_base(dev):
668 """
669 Get the base device for a partition
670 """
671 dev = os.path.realpath(dev)
672 if not stat.S_ISBLK(os.lstat(dev).st_mode):
673 raise Error('not a block device', dev)
674
675 name = get_dev_name(dev)
676 if os.path.exists(os.path.join('/sys/block', name)):
677 raise Error('not a partition', dev)
678
679 # find the base
680 for basename in os.listdir('/sys/block'):
681 if os.path.exists(os.path.join('/sys/block', basename, name)):
682 return get_dev_path(basename)
683 raise Error('no parent device for partition', dev)
684
685
686def is_partition_mpath(dev):
687 uuid = get_dm_uuid(dev)
688 return bool(re.match('part\d+-mpath-', uuid))
689
690
691def partnum_mpath(dev):
692 uuid = get_dm_uuid(dev)
693 return re.findall('part(\d+)-mpath-', uuid)[0]
694
695
696def get_partition_base_mpath(dev):
697 slave_path = os.path.join(block_path(dev), 'slaves')
698 slaves = os.listdir(slave_path)
699 assert slaves
700 name_path = os.path.join(slave_path, slaves[0], 'dm', 'name')
701 name = open(name_path, 'r').read().strip()
702 return os.path.join('/dev/mapper', name)
703
704
705def is_partition(dev):
706 """
707 Check whether a given device path is a partition or a full disk.
708 """
709 if is_mpath(dev):
710 return is_partition_mpath(dev)
711
712 dev = os.path.realpath(dev)
713 st = os.lstat(dev)
714 if not stat.S_ISBLK(st.st_mode):
715 raise Error('not a block device', dev)
716
717 name = get_dev_name(dev)
718 if os.path.exists(os.path.join('/sys/block', name)):
719 return False
720
721 # make sure it is a partition of something else
722 major = os.major(st.st_rdev)
723 minor = os.minor(st.st_rdev)
724 if os.path.exists('/sys/dev/block/%d:%d/partition' % (major, minor)):
725 return True
726
727 raise Error('not a disk or partition', dev)
728
729
730def is_mounted(dev):
731 """
732 Check if the given device is mounted.
733 """
734 dev = os.path.realpath(dev)
735 with file('/proc/mounts', 'rb') as proc_mounts:
736 for line in proc_mounts:
737 fields = line.split()
738 if len(fields) < 3:
739 continue
740 mounts_dev = fields[0]
741 path = fields[1]
742 if mounts_dev.startswith('/') and os.path.exists(mounts_dev):
743 mounts_dev = os.path.realpath(mounts_dev)
744 if mounts_dev == dev:
745 return path
746 return None
747
748
749def is_held(dev):
750 """
751 Check if a device is held by another device (e.g., a dm-crypt mapping)
752 """
753 assert os.path.exists(dev)
754 if is_mpath(dev):
755 return []
756
757 dev = os.path.realpath(dev)
758 base = get_dev_name(dev)
759
760 # full disk?
761 directory = '/sys/block/{base}/holders'.format(base=base)
762 if os.path.exists(directory):
763 return os.listdir(directory)
764
765 # partition?
766 part = base
767 while len(base):
768 directory = '/sys/block/{base}/{part}/holders'.format(
769 part=part, base=base)
770 if os.path.exists(directory):
771 return os.listdir(directory)
772 base = base[:-1]
773 return []
774
775
776def verify_not_in_use(dev, check_partitions=False):
777 """
778 Verify if a given device (path) is in use (e.g. mounted or
779 in use by device-mapper).
780
781 :raises: Error if device is in use.
782 """
783 assert os.path.exists(dev)
784 if is_mounted(dev):
785 raise Error('Device is mounted', dev)
786 holders = is_held(dev)
787 if holders:
788 raise Error('Device %s is in use by a device-mapper '
789 'mapping (dm-crypt?)' % dev, ','.join(holders))
790
791 if check_partitions and not is_partition(dev):
792 for partname in list_partitions(dev):
793 partition = get_dev_path(partname)
794 if is_mounted(partition):
795 raise Error('Device is mounted', partition)
796 holders = is_held(partition)
797 if holders:
798 raise Error('Device %s is in use by a device-mapper '
799 'mapping (dm-crypt?)'
800 % partition, ','.join(holders))
801
802
803def must_be_one_line(line):
804 """
805 Checks if given line is really one single line.
806
807 :raises: TruncatedLineError or TooManyLinesError
808 :return: Content of the line, or None if line isn't valid.
809 """
810 if line[-1:] != '\n':
811 raise TruncatedLineError(line)
812 line = line[:-1]
813 if '\n' in line:
814 raise TooManyLinesError(line)
815 return line
816
817
818def read_one_line(parent, name):
819 """
820 Read a file whose sole contents are a single line.
821
822 Strips the newline.
823
824 :return: Contents of the line, or None if file did not exist.
825 """
826 path = os.path.join(parent, name)
827 try:
828 line = file(path, 'rb').read()
829 except IOError as e:
830 if e.errno == errno.ENOENT:
831 return None
832 else:
833 raise
834
835 try:
836 line = must_be_one_line(line)
837 except (TruncatedLineError, TooManyLinesError) as e:
838 raise Error(
839 'File is corrupt: {path}: {msg}'.format(
840 path=path,
841 msg=e,
842 )
843 )
844 return line
845
846
847def write_one_line(parent, name, text):
848 """
849 Write a file whose sole contents are a single line.
850
851 Adds a newline.
852 """
853 path = os.path.join(parent, name)
854 tmp = '{path}.{pid}.tmp'.format(path=path, pid=os.getpid())
855 with file(tmp, 'wb') as tmp_file:
856 tmp_file.write(text + '\n')
857 os.fsync(tmp_file.fileno())
858 path_set_context(tmp)
859 os.rename(tmp, path)
860
861
862def init_get():
863 """
864 Get a init system using 'ceph-detect-init'
865 """
866 init = _check_output(
867 args=[
868 'ceph-detect-init',
869 '--default', 'sysvinit',
870 ],
871 )
872 init = must_be_one_line(init)
873 return init
874
875
876def check_osd_magic(path):
877 """
878 Check that this path has the Ceph OSD magic.
879
880 :raises: BadMagicError if this does not look like a Ceph OSD data
881 dir.
882 """
883 magic = read_one_line(path, 'magic')
884 if magic is None:
885 # probably not mkfs'ed yet
886 raise BadMagicError(path)
887 if magic != CEPH_OSD_ONDISK_MAGIC:
888 raise BadMagicError(path)
889
890
891def check_osd_id(osd_id):
892 """
893 Ensures osd id is numeric.
894 """
895 if not re.match(r'^[0-9]+$', osd_id):
896 raise Error('osd id is not numeric', osd_id)
897
898
899def allocate_osd_id(
900 cluster,
901 fsid,
902 keyring,
903):
904 """
905 Accocates an OSD id on the given cluster.
906
907 :raises: Error if the call to allocate the OSD id fails.
908 :return: The allocated OSD id.
909 """
910
911 LOG.debug('Allocating OSD id...')
912 try:
913 osd_id = _check_output(
914 args=[
915 'ceph',
916 '--cluster', cluster,
917 '--name', 'client.bootstrap-osd',
918 '--keyring', keyring,
919 'osd', 'create', '--concise',
920 fsid,
921 ],
922 )
923 except subprocess.CalledProcessError as e:
924 raise Error('ceph osd create failed', e, e.output)
925 osd_id = must_be_one_line(osd_id)
926 check_osd_id(osd_id)
927 return osd_id
928
929
930def get_osd_id(path):
931 """
932 Gets the OSD id of the OSD at the given path.
933 """
934 osd_id = read_one_line(path, 'whoami')
935 if osd_id is not None:
936 check_osd_id(osd_id)
937 return osd_id
938
939
940def get_ceph_user():
941 global CEPH_PREF_USER
942
943 if CEPH_PREF_USER is not None:
944 try:
945 pwd.getpwnam(CEPH_PREF_USER)
946 return CEPH_PREF_USER
947 except KeyError:
948 print "No such user: " + CEPH_PREF_USER
949 sys.exit(2)
950 else:
951 try:
952 pwd.getpwnam('ceph')
953 return 'ceph'
954 except KeyError:
955 return 'root'
956
957
958def get_ceph_group():
959 global CEPH_PREF_GROUP
960
961 if CEPH_PREF_GROUP is not None:
962 try:
963 grp.getgrnam(CEPH_PREF_GROUP)
964 return CEPH_PREF_GROUP
965 except KeyError:
966 print "No such group: " + CEPH_PREF_GROUP
967 sys.exit(2)
968 else:
969 try:
970 grp.getgrnam('ceph')
971 return 'ceph'
972 except KeyError:
973 return 'root'
974
975
976def path_set_context(path):
977 # restore selinux context to default policy values
978 if which('restorecon'):
979 command(['restorecon', '-R', path])
980
981 # if ceph user exists, set owner to ceph
982 if get_ceph_user() == 'ceph':
983 command(['chown', '-R', 'ceph:ceph', path])
984
985
986def _check_output(args=None, **kwargs):
987 out, err, ret = command(args, **kwargs)
988 if ret:
989 cmd = args[0]
990 error = subprocess.CalledProcessError(ret, cmd)
991 error.output = out + err
992 raise error
993 return out
994
995
996def get_conf(cluster, variable):
997 """
998 Get the value of the given configuration variable from the
999 cluster.
1000
1001 :raises: Error if call to ceph-conf fails.
1002 :return: The variable value or None.
1003 """
1004 try:
1005 out, err, ret = command(
1006 [
1007 'ceph-conf',
1008 '--cluster={cluster}'.format(
1009 cluster=cluster,
1010 ),
1011 '--name=osd.',
1012 '--lookup',
1013 variable,
1014 ],
1015 close_fds=True,
1016 )
1017 except OSError as e:
1018 raise Error('error executing ceph-conf', e, err)
1019 if ret == 1:
1020 # config entry not found
1021 return None
1022 elif ret != 0:
1023 raise Error('getting variable from configuration failed')
1024 value = out.split('\n', 1)[0]
1025 # don't differentiate between "var=" and no var set
1026 if not value:
1027 return None
1028 return value
1029
1030
1031def get_conf_with_default(cluster, variable):
1032 """
1033 Get a config value that is known to the C++ code.
1034
1035 This will fail if called on variables that are not defined in
1036 common config options.
1037 """
1038 try:
1039 out = _check_output(
1040 args=[
1041 'ceph-osd',
1042 '--cluster={cluster}'.format(
1043 cluster=cluster,
1044 ),
1045 '--show-config-value={variable}'.format(
1046 variable=variable,
1047 ),
1048 ],
1049 close_fds=True,
1050 )
1051 except subprocess.CalledProcessError as e:
1052 raise Error(
1053 'getting variable from configuration failed',
1054 e,
1055 )
1056
1057 value = str(out).split('\n', 1)[0]
1058 return value
1059
1060
1061def get_fsid(cluster):
1062 """
1063 Get the fsid of the cluster.
1064
1065 :return: The fsid or raises Error.
1066 """
1067 fsid = get_conf_with_default(cluster=cluster, variable='fsid')
1068 if fsid is None:
1069 raise Error('getting cluster uuid from configuration failed')
1070 return fsid.lower()
1071
1072
1073def get_dmcrypt_key_path(
1074 _uuid,
1075 key_dir,
1076 luks
1077):
1078 """
1079 Get path to dmcrypt key file.
1080
1081 :return: Path to the dmcrypt key file, callers should check for existence.
1082 """
1083 if luks:
1084 path = os.path.join(key_dir, _uuid + ".luks.key")
1085 else:
1086 path = os.path.join(key_dir, _uuid)
1087
1088 return path
1089
1090
1091def get_or_create_dmcrypt_key(
1092 _uuid,
1093 key_dir,
1094 key_size,
1095 luks
1096):
1097 """
1098 Get path to existing dmcrypt key or create a new key file.
1099
1100 :return: Path to the dmcrypt key file.
1101 """
1102 path = get_dmcrypt_key_path(_uuid, key_dir, luks)
1103 if os.path.exists(path):
1104 return path
1105
1106 # make a new key
1107 try:
1108 if not os.path.exists(key_dir):
1109 os.makedirs(key_dir, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
1110 with file('/dev/urandom', 'rb') as i:
1111 key = i.read(key_size / 8)
1112 fd = os.open(path, os.O_WRONLY | os.O_CREAT,
1113 stat.S_IRUSR | stat.S_IWUSR)
1114 assert os.write(fd, key) == len(key)
1115 os.close(fd)
1116 return path
1117 except:
1118 raise Error('unable to read or create dm-crypt key', path)
1119
1120
1121def _dmcrypt_map(
1122 rawdev,
1123 keypath,
1124 _uuid,
1125 cryptsetup_parameters,
1126 luks,
1127 format_dev=False,
1128):
1129 """
1130 Maps a device to a dmcrypt device.
1131
1132 :return: Path to the dmcrypt device.
1133 """
1134 dev = '/dev/mapper/' + _uuid
1135 luksFormat_args = [
1136 'cryptsetup',
1137 '--batch-mode',
1138 '--key-file',
1139 keypath,
1140 'luksFormat',
1141 rawdev,
1142 ] + cryptsetup_parameters
1143
1144 luksOpen_args = [
1145 'cryptsetup',
1146 '--key-file',
1147 keypath,
1148 'luksOpen',
1149 rawdev,
1150 _uuid,
1151 ]
1152
1153 create_args = [
1154 'cryptsetup',
1155 '--key-file',
1156 keypath,
1157 'create',
1158 _uuid,
1159 rawdev,
1160 ] + cryptsetup_parameters
1161
1162 try:
1163 if luks:
1164 if format_dev:
1165 command_check_call(luksFormat_args)
1166 command_check_call(luksOpen_args)
1167 else:
1168 # Plain mode has no format function, nor any validation
1169 # that the key is correct.
1170 command_check_call(create_args)
1171 # set proper ownership of mapped device
1172 command_check_call(['chown', 'ceph:ceph', dev])
1173 return dev
1174
1175 except subprocess.CalledProcessError as e:
1176 raise Error('unable to map device', rawdev, e)
1177
1178
1179def dmcrypt_unmap(
1180 _uuid
1181):
1182 """
1183 Removes the dmcrypt device with the given UUID.
1184 """
1185 retries = 0
1186 while True:
1187 try:
1188 command_check_call(['cryptsetup', 'remove', _uuid])
1189 break
1190 except subprocess.CalledProcessError as e:
1191 if retries == 10:
1192 raise Error('unable to unmap device', _uuid, e)
1193 else:
1194 time.sleep(0.5 + retries * 1.0)
1195 retries += 1
1196
1197
1198def mount(
1199 dev,
1200 fstype,
1201 options,
1202):
1203 """
1204 Mounts a device with given filessystem type and
1205 mount options to a tempfile path under /var/lib/ceph/tmp.
1206 """
1207 # sanity check: none of the arguments are None
1208 if dev is None:
1209 raise ValueError('dev may not be None')
1210 if fstype is None:
1211 raise ValueError('fstype may not be None')
1212
1213 # pick best-of-breed mount options based on fs type
1214 if options is None:
1215 options = MOUNT_OPTIONS.get(fstype, '')
1216
1217 # mount
1218 path = tempfile.mkdtemp(
1219 prefix='mnt.',
1220 dir=STATEDIR + '/tmp',
1221 )
1222 try:
1223 LOG.debug('Mounting %s on %s with options %s', dev, path, options)
1224 command_check_call(
1225 [
1226 'mount',
1227 '-t', fstype,
1228 '-o', options,
1229 '--',
1230 dev,
1231 path,
1232 ],
1233 )
1234 if which('restorecon'):
1235 command(
1236 [
1237 'restorecon',
1238 path,
1239 ],
1240 )
1241 except subprocess.CalledProcessError as e:
1242 try:
1243 os.rmdir(path)
1244 except (OSError, IOError):
1245 pass
1246 raise MountError(e)
1247
1248 return path
1249
1250
1251def unmount(
1252 path,
1253):
1254 """
1255 Unmount and removes the given mount point.
1256 """
1257 retries = 0
1258 while True:
1259 try:
1260 LOG.debug('Unmounting %s', path)
1261 command_check_call(
1262 [
1263 '/bin/umount',
1264 '--',
1265 path,
1266 ],
1267 )
1268 break
1269 except subprocess.CalledProcessError as e:
1270 # on failure, retry 3 times with incremental backoff
1271 if retries == 3:
1272 raise UnmountError(e)
1273 else:
1274 time.sleep(0.5 + retries * 1.0)
1275 retries += 1
1276
1277 os.rmdir(path)
1278
1279
1280###########################################
1281
1282def extract_parted_partition_numbers(partitions):
1283 numbers_as_strings = re.findall('^\d+', partitions, re.MULTILINE)
1284 return map(int, numbers_as_strings)
1285
1286
1287def get_free_partition_index(dev):
1288 """
1289 Get the next free partition index on a given device.
1290
1291 :return: Index number (> 1 if there is already a partition on the device)
1292 or 1 if there is no partition table.
1293 """
1294 try:
1295 lines = _check_output(
1296 args=[
1297 'parted',
1298 '--machine',
1299 '--',
1300 dev,
1301 'print',
1302 ],
1303 )
1304 except subprocess.CalledProcessError as e:
1305 LOG.info('cannot read partition index; assume it '
1306 'isn\'t present\n (Error: %s)' % e)
1307 return 1
1308
1309 if not lines:
1310 raise Error('parted failed to output anything')
1311 LOG.debug('get_free_partition_index: analyzing ' + lines)
1312 if ('CHS;' not in lines and
1313 'CYL;' not in lines and
1314 'BYT;' not in lines):
1315 raise Error('parted output expected to contain one of ' +
1316 'CHH; CYL; or BYT; : ' + lines)
1317 if os.path.realpath(dev) not in lines:
1318 raise Error('parted output expected to contain ' + dev + ': ' + lines)
1319 _, partitions = lines.split(os.path.realpath(dev))
1320 partition_numbers = extract_parted_partition_numbers(partitions)
1321 if partition_numbers:
1322 return max(partition_numbers) + 1
1323 else:
1324 return 1
1325
1326
1327def check_journal_reqs(args):
1328 _, _, allows_journal = command([
1329 'ceph-osd', '--check-allows-journal',
1330 '-i', '0',
1331 '--cluster', args.cluster,
1332 ])
1333 _, _, wants_journal = command([
1334 'ceph-osd', '--check-wants-journal',
1335 '-i', '0',
1336 '--cluster', args.cluster,
1337 ])
1338 _, _, needs_journal = command([
1339 'ceph-osd', '--check-needs-journal',
1340 '-i', '0',
1341 '--cluster', args.cluster,
1342 ])
1343 return (not allows_journal, not wants_journal, not needs_journal)
1344
1345
1346def update_partition(dev, description):
1347 """
1348 Must be called after modifying a partition table so the kernel
1349 know about the change and fire udev events accordingly. A side
1350 effect of partprobe is to remove partitions and add them again.
1351 The first udevadm settle waits for ongoing udev events to
1352 complete, just in case one of them rely on an existing partition
1353 on dev. The second udevadm settle guarantees to the caller that
1354 all udev events related to the partition table change have been
1355 processed, i.e. the 95-ceph-osd.rules actions and mode changes,
1356 group changes etc. are complete.
1357 """
1358 LOG.debug('Calling partprobe on %s device %s', description, dev)
1359 partprobe_ok = False
1360 error = 'unknown error'
1361 for i in (1, 2, 3, 4, 5):
1362 command_check_call(['udevadm', 'settle', '--timeout=600'])
1363 try:
1364 _check_output(['partprobe', dev])
1365 partprobe_ok = True
1366 break
1367 except subprocess.CalledProcessError as e:
1368 error = e.output
1369 if ('unable to inform the kernel' not in error and
1370 'Device or resource busy' not in error):
1371 raise
1372 LOG.debug('partprobe %s failed : %s (ignored, waiting 60s)'
1373 % (dev, error))
1374 time.sleep(60)
1375 if not partprobe_ok:
1376 raise Error('partprobe %s failed : %s' % (dev, error))
1377 command_check_call(['udevadm', 'settle', '--timeout=600'])
1378
1379
1380def zap(dev):
1381 """
1382 Destroy the partition table and content of a given disk.
1383 """
1384 dev = os.path.realpath(dev)
1385 dmode = os.stat(dev).st_mode
1386 if not stat.S_ISBLK(dmode) or is_partition(dev):
1387 raise Error('not full block device; cannot zap', dev)
1388 try:
1389 LOG.debug('Zapping partition table on %s', dev)
1390
1391 # try to wipe out any GPT partition table backups. sgdisk
1392 # isn't too thorough.
1393 lba_size = 4096
1394 size = 33 * lba_size
1395 with file(dev, 'wb') as dev_file:
1396 dev_file.seek(-size, os.SEEK_END)
1397 dev_file.write(size * '\0')
1398
1399 command_check_call(
1400 [
1401 'sgdisk',
1402 '--zap-all',
1403 '--',
1404 dev,
1405 ],
1406 )
1407 command_check_call(
1408 [
1409 'sgdisk',
1410 '--clear',
1411 '--mbrtogpt',
1412 '--',
1413 dev,
1414 ],
1415 )
1416
1417 update_partition(dev, 'zapped')
1418
1419 except subprocess.CalledProcessError as e:
1420 raise Error(e)
1421
1422
1423def adjust_symlink(target, path):
1424 create = True
1425 if os.path.lexists(path):
1426 try:
1427 mode = os.lstat(path).st_mode
1428 if stat.S_ISREG(mode):
1429 LOG.debug('Removing old file %s', path)
1430 os.unlink(path)
1431 elif stat.S_ISLNK(mode):
1432 old = os.readlink(path)
1433 if old != target:
1434 LOG.debug('Removing old symlink %s -> %s', path, old)
1435 os.unlink(path)
1436 else:
1437 create = False
1438 except:
1439 raise Error('unable to remove (or adjust) old file (symlink)',
1440 path)
1441 if create:
1442 LOG.debug('Creating symlink %s -> %s', path, target)
1443 try:
1444 os.symlink(target, path)
1445 except:
1446 raise Error('unable to create symlink %s -> %s' % (path, target))
1447
1448
1449class Device(object):
1450
1451 def __init__(self, path, args):
1452 self.args = args
1453 self.path = path
1454 self.dev_size = None
1455 self.partitions = {}
1456 self.ptype_map = None
1457 assert not is_partition(self.path)
1458
1459 def create_partition(self, uuid, name, size=0, num=0):
1460 ptype = self.ptype_tobe_for_name(name)
1461 if num == 0:
1462 num = get_free_partition_index(dev=self.path)
1463 if size > 0:
1464 new = '--new={num}:0:+{size}M'.format(num=num, size=size)
1465 if size > self.get_dev_size():
1466 LOG.error('refusing to create %s on %s' % (name, self.path))
1467 LOG.error('%s size (%sM) is bigger than device (%sM)'
1468 % (name, size, self.get_dev_size()))
1469 raise Error('%s device size (%sM) is not big enough for %s'
1470 % (self.path, self.get_dev_size(), name))
1471 else:
1472 new = '--largest-new={num}'.format(num=num)
1473
1474 LOG.debug('Creating %s partition num %d size %d on %s',
1475 name, num, size, self.path)
1476 command_check_call(
1477 [
1478 'sgdisk',
1479 new,
1480 '--change-name={num}:ceph {name}'.format(num=num, name=name),
1481 '--partition-guid={num}:{uuid}'.format(num=num, uuid=uuid),
1482 '--typecode={num}:{uuid}'.format(num=num, uuid=ptype),
1483 '--mbrtogpt',
1484 '--',
1485 self.path,
1486 ]
1487 )
1488 update_partition(self.path, 'created')
1489 return num
1490
1491 def ptype_tobe_for_name(self, name):
1492 if name == 'data':
1493 name = 'osd'
1494 if self.ptype_map is None:
1495 partition = DevicePartition.factory(
1496 path=self.path, dev=None, args=self.args)
1497 self.ptype_map = partition.ptype_map
1498 return self.ptype_map[name]['tobe']
1499
1500 def get_partition(self, num):
1501 if num not in self.partitions:
1502 dev = get_partition_dev(self.path, num)
1503 partition = DevicePartition.factory(
1504 path=self.path, dev=dev, args=self.args)
1505 partition.set_partition_number(num)
1506 self.partitions[num] = partition
1507 return self.partitions[num]
1508
1509 def get_dev_size(self):
1510 if self.dev_size is None:
1511 self.dev_size = get_dev_size(self.path)
1512 return self.dev_size
1513
1514 @staticmethod
1515 def factory(path, args):
1516 return Device(path, args)
1517
1518
1519class DevicePartition(object):
1520
1521 def __init__(self, args):
1522 self.args = args
1523 self.num = None
1524 self.rawdev = None
1525 self.dev = None
1526 self.uuid = None
1527 self.ptype_map = None
1528 self.ptype = None
1529 self.set_variables_ptype()
1530
1531 def get_uuid(self):
1532 if self.uuid is None:
1533 self.uuid = get_partition_uuid(self.rawdev)
1534 return self.uuid
1535
1536 def get_ptype(self):
1537 if self.ptype is None:
1538 self.ptype = get_partition_type(self.rawdev)
1539 return self.ptype
1540
1541 def set_partition_number(self, num):
1542 self.num = num
1543
1544 def get_partition_number(self):
1545 return self.num
1546
1547 def set_dev(self, dev):
1548 self.dev = dev
1549 self.rawdev = dev
1550
1551 def get_dev(self):
1552 return self.dev
1553
1554 def get_rawdev(self):
1555 return self.rawdev
1556
1557 def set_variables_ptype(self):
1558 self.ptype_map = PTYPE['regular']
1559
1560 def ptype_for_name(self, name):
1561 return self.ptype_map[name]['ready']
1562
1563 @staticmethod
1564 def factory(path, dev, args):
1565 dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
1566 if ((path is not None and is_mpath(path)) or
1567 (dev is not None and is_mpath(dev))):
1568 partition = DevicePartitionMultipath(args)
1569 elif dmcrypt_type == 'luks':
1570 partition = DevicePartitionCryptLuks(args)
1571 elif dmcrypt_type == 'plain':
1572 partition = DevicePartitionCryptPlain(args)
1573 else:
1574 partition = DevicePartition(args)
1575 partition.set_dev(dev)
1576 return partition
1577
1578
1579class DevicePartitionMultipath(DevicePartition):
1580
1581 def set_variables_ptype(self):
1582 self.ptype_map = PTYPE['mpath']
1583
1584
1585class DevicePartitionCrypt(DevicePartition):
1586
1587 def __init__(self, args):
1588 super(DevicePartitionCrypt, self).__init__(args)
1589 self.osd_dm_keypath = None
1590 self.cryptsetup_parameters = CryptHelpers.get_cryptsetup_parameters(
1591 self.args)
1592 self.dmcrypt_type = CryptHelpers.get_dmcrypt_type(self.args)
1593 self.dmcrypt_keysize = CryptHelpers.get_dmcrypt_keysize(self.args)
1594
1595 def setup_crypt(self):
1596 pass
1597
1598 def map(self):
1599 self.setup_crypt()
1600 self.dev = _dmcrypt_map(
1601 rawdev=self.rawdev,
1602 keypath=self.osd_dm_keypath,
1603 _uuid=self.get_uuid(),
1604 cryptsetup_parameters=self.cryptsetup_parameters,
1605 luks=self.luks(),
1606 format_dev=True,
1607 )
1608
1609 def unmap(self):
1610 self.setup_crypt()
1611 dmcrypt_unmap(self.get_uuid())
1612 self.dev = self.rawdev
1613
1614 def format(self):
1615 self.setup_crypt()
1616 self.map()
1617 self.unmap()
1618
1619
1620class DevicePartitionCryptPlain(DevicePartitionCrypt):
1621
1622 def luks(self):
1623 return False
1624
1625 def setup_crypt(self):
1626 if self.osd_dm_keypath is not None:
1627 return
1628
1629 self.cryptsetup_parameters += ['--key-size', str(self.dmcrypt_keysize)]
1630
1631 self.osd_dm_keypath = get_or_create_dmcrypt_key(
1632 self.get_uuid(), self.args.dmcrypt_key_dir,
1633 self.dmcrypt_keysize, False)
1634
1635 def set_variables_ptype(self):
1636 self.ptype_map = PTYPE['plain']
1637
1638
1639class DevicePartitionCryptLuks(DevicePartitionCrypt):
1640
1641 def luks(self):
1642 return True
1643
1644 def setup_crypt(self):
1645 if self.osd_dm_keypath is not None:
1646 return
1647
1648 if self.dmcrypt_keysize == 1024:
1649 # We don't force this into the cryptsetup_parameters,
1650 # as we want the cryptsetup defaults
1651 # to prevail for the actual LUKS key lengths.
1652 pass
1653 else:
1654 self.cryptsetup_parameters += ['--key-size',
1655 str(self.dmcrypt_keysize)]
1656
1657 self.osd_dm_keypath = get_or_create_dmcrypt_key(
1658 self.get_uuid(), self.args.dmcrypt_key_dir,
1659 self.dmcrypt_keysize, True)
1660
1661 def set_variables_ptype(self):
1662 self.ptype_map = PTYPE['luks']
1663
1664
1665class Prepare(object):
1666
1667 @staticmethod
1668 def parser():
1669 parser = argparse.ArgumentParser(add_help=False)
1670 parser.add_argument(
1671 '--cluster',
1672 metavar='NAME',
1673 default='ceph',
1674 help='cluster name to assign this disk to',
1675 )
1676 parser.add_argument(
1677 '--cluster-uuid',
1678 metavar='UUID',
1679 help='cluster uuid to assign this disk to',
1680 )
1681 parser.add_argument(
1682 '--osd-uuid',
1683 metavar='UUID',
1684 help='unique OSD uuid to assign this disk to',
1685 )
1686 parser.add_argument(
1687 '--dmcrypt',
1688 action='store_true', default=None,
1689 help='encrypt DATA and/or JOURNAL devices with dm-crypt',
1690 )
1691 parser.add_argument(
1692 '--dmcrypt-key-dir',
1693 metavar='KEYDIR',
1694 default='/etc/ceph/dmcrypt-keys',
1695 help='directory where dm-crypt keys are stored',
1696 )
1697 return parser
1698
1699 @staticmethod
1700 def set_subparser(subparsers):
1701 parents = [
1702 Prepare.parser(),
1703 PrepareData.parser(),
1704 ]
1705 parents.extend(PrepareFilestore.parent_parsers())
1706 parents.extend(PrepareBluestore.parent_parsers())
1707 parser = subparsers.add_parser(
1708 'prepare',
1709 parents=parents,
1710 help='Prepare a directory or disk for a Ceph OSD',
1711 )
1712 parser.set_defaults(
1713 func=Prepare.main,
1714 )
1715 return parser
1716
1717 def prepare(self):
1718 prepare_lock.acquire()
1719 self.prepare_locked()
1720 prepare_lock.release()
1721
1722 @staticmethod
1723 def factory(args):
1724 if args.bluestore:
1725 return PrepareBluestore(args)
1726 else:
1727 return PrepareFilestore(args)
1728
1729 @staticmethod
1730 def main(args):
1731 Prepare.factory(args).prepare()
1732
1733
1734class PrepareFilestore(Prepare):
1735
1736 def __init__(self, args):
1737 self.data = PrepareFilestoreData(args)
1738 self.journal = PrepareJournal(args)
1739
1740 @staticmethod
1741 def parent_parsers():
1742 return [
1743 PrepareJournal.parser(),
1744 ]
1745
1746 def prepare_locked(self):
1747 self.data.prepare(self.journal)
1748
1749
1750class PrepareBluestore(Prepare):
1751
1752 def __init__(self, args):
1753 self.data = PrepareBluestoreData(args)
1754 self.block = PrepareBluestoreBlock(args)
1755
1756 @staticmethod
1757 def parser():
1758 parser = argparse.ArgumentParser(add_help=False)
1759 parser.add_argument(
1760 '--bluestore',
1761 action='store_true', default=None,
1762 help='bluestore objectstore',
1763 )
31f18b77
FG
1764 parser.add_argument(
1765 '--filestore',
1766 action='store_true', default=True,
1767 help='IGNORED FORWARD COMPATIBIILTY HACK',
1768 )
7c673cae
FG
1769 return parser
1770
1771 @staticmethod
1772 def parent_parsers():
1773 return [
1774 PrepareBluestore.parser(),
1775 PrepareBluestoreBlock.parser(),
1776 ]
1777
1778 def prepare_locked(self):
1779 self.data.prepare(self.block)
1780
1781
1782class Space(object):
1783
1784 NAMES = ('block', 'journal')
1785
1786
1787class PrepareSpace(object):
1788
1789 NONE = 0
1790 FILE = 1
1791 DEVICE = 2
1792
1793 def __init__(self, args):
1794 self.args = args
1795 self.set_type()
1796 self.space_size = self.get_space_size()
1797 if (getattr(self.args, self.name) and
1798 getattr(self.args, self.name + '_uuid') is None):
1799 setattr(self.args, self.name + '_uuid', str(uuid.uuid4()))
1800 self.space_symlink = None
1801 self.space_dmcrypt = None
1802
1803 def set_type(self):
1804 name = self.name
1805 args = self.args
1806 dmode = os.stat(args.data).st_mode
1807 if (self.wants_space() and
1808 stat.S_ISBLK(dmode) and
1809 not is_partition(args.data) and
1810 getattr(args, name) is None and
1811 getattr(args, name + '_file') is None):
1812 LOG.info('Will colocate %s with data on %s',
1813 name, args.data)
1814 setattr(args, name, args.data)
1815
1816 if getattr(args, name) is None:
1817 if getattr(args, name + '_dev'):
1818 raise Error('%s is unspecified; not a block device' %
1819 name.capitalize(), getattr(args, name))
1820 self.type = self.NONE
1821 return
1822
1823 if not os.path.exists(getattr(args, name)):
1824 if getattr(args, name + '_dev'):
1825 raise Error('%s does not exist; not a block device' %
1826 name.capitalize(), getattr(args, name))
1827 self.type = self.FILE
1828 return
1829
1830 mode = os.stat(getattr(args, name)).st_mode
1831 if stat.S_ISBLK(mode):
1832 if getattr(args, name + '_file'):
1833 raise Error('%s is not a regular file' % name.capitalize,
1834 geattr(args, name))
1835 self.type = self.DEVICE
1836 return
1837
1838 if stat.S_ISREG(mode):
1839 if getattr(args, name + '_dev'):
1840 raise Error('%s is not a block device' % name.capitalize,
1841 geattr(args, name))
1842 self.type = self.FILE
1843
1844 raise Error('%s %s is neither a block device nor regular file' %
1845 (name.capitalize, geattr(args, name)))
1846
1847 def is_none(self):
1848 return self.type == self.NONE
1849
1850 def is_file(self):
1851 return self.type == self.FILE
1852
1853 def is_device(self):
1854 return self.type == self.DEVICE
1855
1856 @staticmethod
1857 def parser(name):
1858 parser = argparse.ArgumentParser(add_help=False)
1859 parser.add_argument(
1860 '--%s-uuid' % name,
1861 metavar='UUID',
1862 help='unique uuid to assign to the %s' % name,
1863 )
1864 parser.add_argument(
1865 '--%s-file' % name,
1866 action='store_true', default=None,
1867 help='verify that %s is a file' % name.upper(),
1868 )
1869 parser.add_argument(
1870 '--%s-dev' % name,
1871 action='store_true', default=None,
1872 help='verify that %s is a block device' % name.upper(),
1873 )
1874 parser.add_argument(
1875 name,
1876 metavar=name.upper(),
1877 nargs='?',
1878 help=('path to OSD %s disk block device;' % name +
1879 ' leave out to store %s in file' % name),
1880 )
1881 return parser
1882
1883 def wants_space(self):
1884 return True
1885
1886 def populate_data_path(self, path):
1887 if self.type == self.DEVICE:
1888 self.populate_data_path_device(path)
1889 elif self.type == self.FILE:
1890 self.populate_data_path_file(path)
1891 elif self.type == self.NONE:
1892 pass
1893 else:
1894 raise Error('unexpected type ', self.type)
1895
1896 def populate_data_path_file(self, path):
1897 space_uuid = self.name + '_uuid'
1898 if getattr(self.args, space_uuid) is not None:
1899 write_one_line(path, space_uuid,
1900 getattr(self.args, space_uuid))
1901
1902 def populate_data_path_device(self, path):
1903 self.populate_data_path_file(path)
1904 if self.space_symlink is not None:
1905 adjust_symlink(self.space_symlink,
1906 os.path.join(path, self.name))
1907
1908 if self.space_dmcrypt is not None:
1909 adjust_symlink(self.space_dmcrypt,
1910 os.path.join(path, self.name + '_dmcrypt'))
1911 else:
1912 try:
1913 os.unlink(os.path.join(path, self.name + '_dmcrypt'))
1914 except OSError:
1915 pass
1916
1917 def prepare(self):
1918 if self.type == self.DEVICE:
1919 self.prepare_device()
1920 elif self.type == self.FILE:
1921 self.prepare_file()
1922 elif self.type == self.NONE:
1923 pass
1924 else:
1925 raise Error('unexpected type ', self.type)
1926
1927 def prepare_file(self):
1928 if not os.path.exists(getattr(self.args, self.name)):
1929 LOG.debug('Creating %s file %s with size 0'
1930 ' (ceph-osd will resize and allocate)',
1931 self.name,
1932 getattr(self.args, self.name))
1933 with file(getattr(self.args, self.name), 'wb') as space_file:
1934 pass
1935
1936 LOG.debug('%s is file %s',
1937 self.name.capitalize(),
1938 getattr(self.args, self.name))
1939 LOG.warning('OSD will not be hot-swappable if %s is '
1940 'not the same device as the osd data' %
1941 self.name)
1942 self.space_symlink = space_file
1943
1944 def prepare_device(self):
1945 reusing_partition = False
1946
1947 if is_partition(getattr(self.args, self.name)):
1948 LOG.debug('%s %s is a partition',
1949 self.name.capitalize(), getattr(self.args, self.name))
1950 partition = DevicePartition.factory(
1951 path=None, dev=getattr(self.args, self.name), args=self.args)
1952 if isinstance(partition, DevicePartitionCrypt):
1953 raise Error(getattr(self.args, self.name) +
1954 ' partition already exists'
1955 ' and --dmcrypt specified')
1956 LOG.warning('OSD will not be hot-swappable' +
1957 ' if ' + self.name + ' is not' +
1958 ' the same device as the osd data')
1959 if partition.get_ptype() == partition.ptype_for_name(self.name):
1960 LOG.debug('%s %s was previously prepared with '
1961 'ceph-disk. Reusing it.',
1962 self.name.capitalize(),
1963 getattr(self.args, self.name))
1964 reusing_partition = True
1965 # Read and reuse the partition uuid from this journal's
1966 # previous life. We reuse the uuid instead of changing it
1967 # because udev does not reliably notice changes to an
1968 # existing partition's GUID. See
1969 # http://tracker.ceph.com/issues/10146
1970 setattr(self.args, self.name + '_uuid', partition.get_uuid())
1971 LOG.debug('Reusing %s with uuid %s',
1972 self.name,
1973 getattr(self.args, self.name + '_uuid'))
1974 else:
1975 LOG.warning('%s %s was not prepared with '
1976 'ceph-disk. Symlinking directly.',
1977 self.name.capitalize(),
1978 getattr(self.args, self.name))
1979 self.space_symlink = getattr(self.args, self.name)
1980 return
1981
1982 self.space_symlink = '/dev/disk/by-partuuid/{uuid}'.format(
1983 uuid=getattr(self.args, self.name + '_uuid'))
1984
1985 if self.args.dmcrypt:
1986 self.space_dmcrypt = self.space_symlink
1987 self.space_symlink = '/dev/mapper/{uuid}'.format(
1988 uuid=getattr(self.args, self.name + '_uuid'))
1989
1990 if reusing_partition:
1991 # confirm that the space_symlink exists. It should since
1992 # this was an active space
1993 # in the past. Continuing otherwise would be futile.
1994 assert os.path.exists(self.space_symlink)
1995 return
1996
1997 num = self.desired_partition_number()
1998
1999 if num == 0:
2000 LOG.warning('OSD will not be hot-swappable if %s '
2001 'is not the same device as the osd data',
2002 self.name)
2003
2004 device = Device.factory(getattr(self.args, self.name), self.args)
2005 num = device.create_partition(
2006 uuid=getattr(self.args, self.name + '_uuid'),
2007 name=self.name,
2008 size=self.space_size,
2009 num=num)
2010
2011 partition = device.get_partition(num)
2012
2013 LOG.debug('%s is GPT partition %s',
2014 self.name.capitalize(),
2015 self.space_symlink)
2016
2017 if isinstance(partition, DevicePartitionCrypt):
2018 partition.format()
2019
2020 command_check_call(
2021 [
2022 'sgdisk',
2023 '--typecode={num}:{uuid}'.format(
2024 num=num,
2025 uuid=partition.ptype_for_name(self.name),
2026 ),
2027 '--',
2028 getattr(self.args, self.name),
2029 ],
2030 )
2031
2032 LOG.debug('%s is GPT partition %s',
2033 self.name.capitalize(),
2034 self.space_symlink)
2035
2036
2037class PrepareJournal(PrepareSpace):
2038
2039 def __init__(self, args):
2040 self.name = 'journal'
2041 (self.allows_journal,
2042 self.wants_journal,
2043 self.needs_journal) = check_journal_reqs(args)
2044
2045 if args.journal and not self.allows_journal:
2046 raise Error('journal specified but not allowed by osd backend')
2047
2048 super(PrepareJournal, self).__init__(args)
2049
2050 def wants_space(self):
2051 return self.wants_journal
2052
2053 def get_space_size(self):
2054 return int(get_conf_with_default(
2055 cluster=self.args.cluster,
2056 variable='osd_journal_size',
2057 ))
2058
2059 def desired_partition_number(self):
2060 if self.args.journal == self.args.data:
2061 # we're sharing the disk between osd data and journal;
2062 # make journal be partition number 2
2063 num = 2
2064 else:
2065 num = 0
2066 return num
2067
2068 @staticmethod
2069 def parser():
2070 return PrepareSpace.parser('journal')
2071
2072
2073class PrepareBluestoreBlock(PrepareSpace):
2074
2075 def __init__(self, args):
2076 self.name = 'block'
2077 super(PrepareBluestoreBlock, self).__init__(args)
2078
2079 def get_space_size(self):
2080 return 0 # get as much space as possible
2081
2082 def desired_partition_number(self):
2083 if self.args.block == self.args.data:
2084 num = 2
2085 else:
2086 num = 0
2087 return num
2088
2089 @staticmethod
2090 def parser():
2091 return PrepareSpace.parser('block')
2092
2093
2094class CryptHelpers(object):
2095
2096 @staticmethod
2097 def get_cryptsetup_parameters(args):
2098 cryptsetup_parameters_str = get_conf(
2099 cluster=args.cluster,
2100 variable='osd_cryptsetup_parameters',
2101 )
2102 if cryptsetup_parameters_str is None:
2103 return []
2104 else:
2105 return shlex.split(cryptsetup_parameters_str)
2106
2107 @staticmethod
2108 def get_dmcrypt_keysize(args):
2109 dmcrypt_keysize_str = get_conf(
2110 cluster=args.cluster,
2111 variable='osd_dmcrypt_key_size',
2112 )
2113 dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
2114 if dmcrypt_type == 'luks':
2115 if dmcrypt_keysize_str is None:
2116 # As LUKS will hash the 'passphrase' in .luks.key
2117 # into a key, set a large default
2118 # so if not updated for some time, it is still a
2119 # reasonable value.
2120 #
2121 return 1024
2122 else:
2123 return int(dmcrypt_keysize_str)
2124 elif dmcrypt_type == 'plain':
2125 if dmcrypt_keysize_str is None:
2126 # This value is hard-coded in the udev script
2127 return 256
2128 else:
2129 LOG.warning('ensure the 95-ceph-osd.rules file has '
2130 'been copied to /etc/udev/rules.d '
2131 'and modified to call cryptsetup '
2132 'with --key-size=%s' % dmcrypt_keysize_str)
2133 return int(dmcrypt_keysize_str)
2134 else:
2135 return 0
2136
2137 @staticmethod
2138 def get_dmcrypt_type(args):
2139 if args.dmcrypt:
2140 dmcrypt_type = get_conf(
2141 cluster=args.cluster,
2142 variable='osd_dmcrypt_type',
2143 )
2144
2145 if dmcrypt_type is None or dmcrypt_type == 'luks':
2146 return 'luks'
2147 elif dmcrypt_type == 'plain':
2148 return 'plain'
2149 else:
2150 raise Error('invalid osd_dmcrypt_type parameter '
2151 '(must be luks or plain): ', dmcrypt_type)
2152 else:
2153 return None
2154
2155
2156class PrepareData(object):
2157
2158 FILE = 1
2159 DEVICE = 2
2160
2161 def __init__(self, args):
2162
2163 self.args = args
2164 self.partition = None
2165 self.set_type()
2166 if self.args.cluster_uuid is None:
2167 self.args.cluster_uuid = get_fsid(cluster=self.args.cluster)
2168
2169 if self.args.osd_uuid is None:
2170 self.args.osd_uuid = str(uuid.uuid4())
2171
2172 def set_type(self):
2173 dmode = os.stat(self.args.data).st_mode
2174
2175 if stat.S_ISDIR(dmode):
2176 self.type = self.FILE
2177 elif stat.S_ISBLK(dmode):
2178 self.type = self.DEVICE
2179 else:
2180 raise Error('not a dir or block device', args.data)
2181
2182 def is_file(self):
2183 return self.type == self.FILE
2184
2185 def is_device(self):
2186 return self.type == self.DEVICE
2187
2188 @staticmethod
2189 def parser():
2190 parser = argparse.ArgumentParser(add_help=False)
2191 parser.add_argument(
2192 '--fs-type',
2193 help='file system type to use (e.g. "ext4")',
2194 )
2195 parser.add_argument(
2196 '--zap-disk',
2197 action='store_true', default=None,
2198 help='destroy the partition table (and content) of a disk',
2199 )
2200 parser.add_argument(
2201 '--data-dir',
2202 action='store_true', default=None,
2203 help='verify that DATA is a dir',
2204 )
2205 parser.add_argument(
2206 '--data-dev',
2207 action='store_true', default=None,
2208 help='verify that DATA is a block device',
2209 )
2210 parser.add_argument(
2211 'data',
2212 metavar='DATA',
2213 help='path to OSD data (a disk block device or directory)',
2214 )
2215 return parser
2216
2217 def populate_data_path_file(self, path, *to_prepare_list):
2218 self.populate_data_path(path, *to_prepare_list)
2219
2220 def populate_data_path(self, path, *to_prepare_list):
2221 if os.path.exists(os.path.join(path, 'magic')):
2222 LOG.debug('Data dir %s already exists', path)
2223 return
2224 else:
2225 LOG.debug('Preparing osd data dir %s', path)
2226
2227 if self.args.osd_uuid is None:
2228 self.args.osd_uuid = str(uuid.uuid4())
2229
2230 write_one_line(path, 'ceph_fsid', self.args.cluster_uuid)
2231 write_one_line(path, 'fsid', self.args.osd_uuid)
2232 write_one_line(path, 'magic', CEPH_OSD_ONDISK_MAGIC)
2233
2234 for to_prepare in to_prepare_list:
2235 to_prepare.populate_data_path(path)
2236
2237 def prepare(self, *to_prepare_list):
2238 if self.type == self.DEVICE:
2239 self.prepare_device(*to_prepare_list)
2240 elif self.type == self.FILE:
2241 self.prepare_file(*to_prepare_list)
2242 else:
2243 raise Error('unexpected type ', self.type)
2244
2245 def prepare_file(self, *to_prepare_list):
2246
2247 if not os.path.exists(self.args.data):
2248 raise Error('data path for directory does not exist',
2249 self.args.data)
2250
2251 if self.args.data_dev:
2252 raise Error('data path is not a block device', self.args.data)
2253
2254 for to_prepare in to_prepare_list:
2255 to_prepare.prepare()
2256
2257 self.populate_data_path_file(self.args.data, *to_prepare_list)
2258
2259 def sanity_checks(self):
2260 if not os.path.exists(self.args.data):
2261 raise Error('data path for device does not exist',
2262 self.args.data)
2263 verify_not_in_use(self.args.data, True)
2264
2265 def set_variables(self):
2266 if self.args.fs_type is None:
2267 self.args.fs_type = get_conf(
2268 cluster=self.args.cluster,
2269 variable='osd_mkfs_type',
2270 )
2271 if self.args.fs_type is None:
2272 self.args.fs_type = get_conf(
2273 cluster=self.args.cluster,
2274 variable='osd_fs_type',
2275 )
2276 if self.args.fs_type is None:
2277 self.args.fs_type = DEFAULT_FS_TYPE
2278
2279 self.mkfs_args = get_conf(
2280 cluster=self.args.cluster,
2281 variable='osd_mkfs_options_{fstype}'.format(
2282 fstype=self.args.fs_type,
2283 ),
2284 )
2285 if self.mkfs_args is None:
2286 self.mkfs_args = get_conf(
2287 cluster=self.args.cluster,
2288 variable='osd_fs_mkfs_options_{fstype}'.format(
2289 fstype=self.args.fs_type,
2290 ),
2291 )
2292
2293 self.mount_options = get_conf(
2294 cluster=self.args.cluster,
2295 variable='osd_mount_options_{fstype}'.format(
2296 fstype=self.args.fs_type,
2297 ),
2298 )
2299 if self.mount_options is None:
2300 self.mount_options = get_conf(
2301 cluster=self.args.cluster,
2302 variable='osd_fs_mount_options_{fstype}'.format(
2303 fstype=self.args.fs_type,
2304 ),
2305 )
2306 else:
2307 # remove whitespaces
2308 self.mount_options = "".join(self.mount_options.split())
2309
2310 if self.args.osd_uuid is None:
2311 self.args.osd_uuid = str(uuid.uuid4())
2312
2313 def prepare_device(self, *to_prepare_list):
2314 self.sanity_checks()
2315 self.set_variables()
2316 if self.args.zap_disk is not None:
2317 zap(self.args.data)
2318
2319 def create_data_partition(self):
2320 device = Device.factory(self.args.data, self.args)
2321 partition_number = 1
2322 device.create_partition(uuid=self.args.osd_uuid,
2323 name='data',
2324 num=partition_number,
2325 size=self.get_space_size())
2326 return device.get_partition(partition_number)
2327
2328 def set_data_partition(self):
2329 if is_partition(self.args.data):
2330 LOG.debug('OSD data device %s is a partition',
2331 self.args.data)
2332 self.partition = DevicePartition.factory(
2333 path=None, dev=self.args.data, args=self.args)
2334 ptype = partition.get_ptype()
2335 if ptype != ptype_osd:
2336 LOG.warning('incorrect partition UUID: %s, expected %s'
2337 % (ptype, ptype_osd))
2338 else:
2339 LOG.debug('Creating osd partition on %s',
2340 self.args.data)
2341 self.partition = self.create_data_partition()
2342
2343 def populate_data_path_device(self, *to_prepare_list):
2344 partition = self.partition
2345
2346 if isinstance(partition, DevicePartitionCrypt):
2347 partition.map()
2348
2349 try:
2350 args = [
2351 'mkfs',
2352 '-t',
2353 self.args.fs_type,
2354 ]
2355 if self.mkfs_args is not None:
2356 args.extend(self.mkfs_args.split())
2357 if self.args.fs_type == 'xfs':
2358 args.extend(['-f']) # always force
2359 else:
2360 args.extend(MKFS_ARGS.get(self.args.fs_type, []))
2361 args.extend([
2362 '--',
2363 partition.get_dev(),
2364 ])
2365 try:
2366 LOG.debug('Creating %s fs on %s',
2367 self.args.fs_type, partition.get_dev())
2368 command_check_call(args)
2369 except subprocess.CalledProcessError as e:
2370 raise Error(e)
2371
2372 path = mount(dev=partition.get_dev(),
2373 fstype=self.args.fs_type,
2374 options=self.mount_options)
2375
2376 try:
2377 self.populate_data_path(path, *to_prepare_list)
2378 finally:
2379 path_set_context(path)
2380 unmount(path)
2381 finally:
2382 if isinstance(partition, DevicePartitionCrypt):
2383 partition.unmap()
2384
2385 if not is_partition(self.args.data):
2386 try:
2387 command_check_call(
2388 [
2389 'sgdisk',
2390 '--typecode=%d:%s' % (partition.get_partition_number(),
2391 partition.ptype_for_name('osd')),
2392 '--',
2393 self.args.data,
2394 ],
2395 )
2396 except subprocess.CalledProcessError as e:
2397 raise Error(e)
2398 update_partition(self.args.data, 'prepared')
2399 command_check_call(['udevadm', 'trigger',
2400 '--action=add',
2401 '--sysname-match',
2402 os.path.basename(partition.rawdev)])
2403
2404
2405class PrepareFilestoreData(PrepareData):
2406
2407 def get_space_size(self):
2408 return 0 # get as much space as possible
2409
2410 def prepare_device(self, *to_prepare_list):
2411 super(PrepareFilestoreData, self).prepare_device(*to_prepare_list)
2412 for to_prepare in to_prepare_list:
2413 to_prepare.prepare()
2414 self.set_data_partition()
2415 self.populate_data_path_device(*to_prepare_list)
2416
31f18b77
FG
2417 def populate_data_path(self, path, *to_prepare_list):
2418 super(PrepareFilestoreData, self).populate_data_path(path,
2419 *to_prepare_list)
2420 write_one_line(path, 'type', 'filestore')
2421
7c673cae
FG
2422
2423class PrepareBluestoreData(PrepareData):
2424
2425 def get_space_size(self):
2426 return 100 # MB
2427
2428 def prepare_device(self, *to_prepare_list):
2429 super(PrepareBluestoreData, self).prepare_device(*to_prepare_list)
2430 self.set_data_partition()
2431 for to_prepare in to_prepare_list:
2432 to_prepare.prepare()
2433 self.populate_data_path_device(*to_prepare_list)
2434
2435 def populate_data_path(self, path, *to_prepare_list):
2436 super(PrepareBluestoreData, self).populate_data_path(path,
2437 *to_prepare_list)
2438 write_one_line(path, 'type', 'bluestore')
2439
2440
2441def mkfs(
2442 path,
2443 cluster,
2444 osd_id,
2445 fsid,
2446 keyring,
2447):
2448 monmap = os.path.join(path, 'activate.monmap')
2449 command_check_call(
2450 [
2451 'ceph',
2452 '--cluster', cluster,
2453 '--name', 'client.bootstrap-osd',
2454 '--keyring', keyring,
2455 'mon', 'getmap', '-o', monmap,
2456 ],
2457 )
2458
2459 osd_type = read_one_line(path, 'type')
2460
2461 if osd_type == 'bluestore':
2462 command_check_call(
2463 [
2464 'ceph-osd',
2465 '--cluster', cluster,
2466 '--mkfs',
2467 '--mkkey',
2468 '-i', osd_id,
2469 '--monmap', monmap,
2470 '--osd-data', path,
2471 '--osd-uuid', fsid,
2472 '--keyring', os.path.join(path, 'keyring'),
2473 '--setuser', get_ceph_user(),
2474 '--setgroup', get_ceph_user(),
2475 ],
2476 )
2477 else:
2478 command_check_call(
2479 [
2480 'ceph-osd',
2481 '--cluster', cluster,
2482 '--mkfs',
2483 '--mkkey',
2484 '-i', osd_id,
2485 '--monmap', monmap,
2486 '--osd-data', path,
2487 '--osd-journal', os.path.join(path, 'journal'),
2488 '--osd-uuid', fsid,
2489 '--keyring', os.path.join(path, 'keyring'),
2490 '--setuser', get_ceph_user(),
2491 '--setgroup', get_ceph_group(),
2492 ],
2493 )
2494
2495
2496def auth_key(
2497 path,
2498 cluster,
2499 osd_id,
2500 keyring,
2501):
2502 try:
2503 # try dumpling+ cap scheme
2504 command_check_call(
2505 [
2506 'ceph',
2507 '--cluster', cluster,
2508 '--name', 'client.bootstrap-osd',
2509 '--keyring', keyring,
2510 'auth', 'add', 'osd.{osd_id}'.format(osd_id=osd_id),
2511 '-i', os.path.join(path, 'keyring'),
2512 'osd', 'allow *',
2513 'mon', 'allow profile osd',
2514 ],
2515 )
2516 except subprocess.CalledProcessError as err:
2517 if err.returncode == errno.EINVAL:
2518 # try old cap scheme
2519 command_check_call(
2520 [
2521 'ceph',
2522 '--cluster', cluster,
2523 '--name', 'client.bootstrap-osd',
2524 '--keyring', keyring,
2525 'auth', 'add', 'osd.{osd_id}'.format(osd_id=osd_id),
2526 '-i', os.path.join(path, 'keyring'),
2527 'osd', 'allow *',
2528 'mon', 'allow rwx',
2529 ],
2530 )
2531 else:
2532 raise
2533
2534
2535def get_mount_point(cluster, osd_id):
2536 parent = STATEDIR + '/osd'
2537 return os.path.join(
2538 parent,
2539 '{cluster}-{osd_id}'.format(cluster=cluster, osd_id=osd_id),
2540 )
2541
2542
2543def move_mount(
2544 dev,
2545 path,
2546 cluster,
2547 osd_id,
2548 fstype,
2549 mount_options,
2550):
2551 LOG.debug('Moving mount to final location...')
2552 osd_data = get_mount_point(cluster, osd_id)
2553 maybe_mkdir(osd_data)
2554
2555 # pick best-of-breed mount options based on fs type
2556 if mount_options is None:
2557 mount_options = MOUNT_OPTIONS.get(fstype, '')
2558
2559 # we really want to mount --move, but that is not supported when
2560 # the parent mount is shared, as it is by default on RH, Fedora,
2561 # and probably others. Also, --bind doesn't properly manipulate
2562 # /etc/mtab, which *still* isn't a symlink to /proc/mounts despite
2563 # this being 2013. Instead, mount the original device at the final
2564 # location.
2565 command_check_call(
2566 [
2567 '/bin/mount',
2568 '-o',
2569 mount_options,
2570 '--',
2571 dev,
2572 osd_data,
2573 ],
2574 )
2575 command_check_call(
2576 [
2577 '/bin/umount',
2578 '-l', # lazy, in case someone else is peeking at the
2579 # wrong moment
2580 '--',
2581 path,
2582 ],
2583 )
2584
2585
2586def start_daemon(
2587 cluster,
2588 osd_id,
2589):
2590 LOG.debug('Starting %s osd.%s...', cluster, osd_id)
2591
2592 path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
2593 cluster=cluster, osd_id=osd_id)
2594
2595 try:
2596 if os.path.exists(os.path.join(path, 'upstart')):
2597 command_check_call(
2598 [
2599 '/sbin/initctl',
2600 # use emit, not start, because start would fail if the
2601 # instance was already running
2602 'emit',
2603 # since the daemon starting doesn't guarantee much about
2604 # the service being operational anyway, don't bother
2605 # waiting for it
2606 '--no-wait',
2607 '--',
2608 'ceph-osd',
2609 'cluster={cluster}'.format(cluster=cluster),
2610 'id={osd_id}'.format(osd_id=osd_id),
2611 ],
2612 )
2613 elif os.path.exists(os.path.join(path, 'sysvinit')):
2614 if os.path.exists('/usr/sbin/service'):
2615 svc = '/usr/sbin/service'
2616 else:
2617 svc = '/sbin/service'
2618 command_check_call(
2619 [
2620 svc,
2621 'ceph',
2622 '--cluster',
2623 '{cluster}'.format(cluster=cluster),
2624 'start',
2625 'osd.{osd_id}'.format(osd_id=osd_id),
2626 ],
2627 )
2628 elif os.path.exists(os.path.join(path, 'systemd')):
2629 command_check_call(
2630 [
2631 'systemctl',
2632 'enable',
2633 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
2634 ],
2635 )
2636 command_check_call(
2637 [
2638 'systemctl',
2639 'start',
2640 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
2641 ],
2642 )
2643 else:
2644 raise Error('{cluster} osd.{osd_id} is not tagged '
2645 'with an init system'.format(
2646 cluster=cluster,
2647 osd_id=osd_id,
2648 ))
2649 except subprocess.CalledProcessError as e:
2650 raise Error('ceph osd start failed', e)
2651
2652
2653def stop_daemon(
2654 cluster,
2655 osd_id,
2656):
2657 LOG.debug('Stoping %s osd.%s...', cluster, osd_id)
2658
2659 path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
2660 cluster=cluster, osd_id=osd_id)
2661
2662 try:
2663 if os.path.exists(os.path.join(path, 'upstart')):
2664 command_check_call(
2665 [
2666 '/sbin/initctl',
2667 'stop',
2668 'ceph-osd',
2669 'cluster={cluster}'.format(cluster=cluster),
2670 'id={osd_id}'.format(osd_id=osd_id),
2671 ],
2672 )
2673 elif os.path.exists(os.path.join(path, 'sysvinit')):
2674 svc = which('service')
2675 command_check_call(
2676 [
2677 svc,
2678 'ceph',
2679 '--cluster',
2680 '{cluster}'.format(cluster=cluster),
2681 'stop',
2682 'osd.{osd_id}'.format(osd_id=osd_id),
2683 ],
2684 )
2685 elif os.path.exists(os.path.join(path, 'systemd')):
2686 command_check_call(
2687 [
2688 'systemctl',
2689 'disable',
2690 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
2691 ],
2692 )
2693 command_check_call(
2694 [
2695 'systemctl',
2696 'stop',
2697 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
2698 ],
2699 )
2700 else:
2701 raise Error('{cluster} osd.{osd_id} is not tagged with an init '
2702 ' system'.format(cluster=cluster, osd_id=osd_id))
2703 except subprocess.CalledProcessError as e:
2704 raise Error('ceph osd stop failed', e)
2705
2706
2707def detect_fstype(
2708 dev,
2709):
2710 fstype = _check_output(
2711 args=[
2712 '/sbin/blkid',
2713 # we don't want stale cached results
2714 '-p',
2715 '-s', 'TYPE',
2716 '-o', 'value',
2717 '--',
2718 dev,
2719 ],
2720 )
2721 fstype = must_be_one_line(fstype)
2722 return fstype
2723
2724
2725def dmcrypt_map(dev, dmcrypt_key_dir):
2726 ptype = get_partition_type(dev)
2727 if ptype in Ptype.get_ready_by_type('plain'):
2728 luks = False
2729 cryptsetup_parameters = ['--key-size', '256']
2730 elif ptype in Ptype.get_ready_by_type('luks'):
2731 luks = True
2732 cryptsetup_parameters = []
2733 else:
2734 raise Error('--dmcrypt called for dev %s with invalid ptype %s'
2735 % (dev, ptype))
2736 part_uuid = get_partition_uuid(dev)
2737 dmcrypt_key_path = get_dmcrypt_key_path(part_uuid, dmcrypt_key_dir, luks)
2738 return _dmcrypt_map(
2739 rawdev=dev,
2740 keypath=dmcrypt_key_path,
2741 _uuid=part_uuid,
2742 cryptsetup_parameters=cryptsetup_parameters,
2743 luks=luks,
2744 format_dev=False,
2745 )
2746
2747
2748def mount_activate(
2749 dev,
2750 activate_key_template,
2751 init,
2752 dmcrypt,
2753 dmcrypt_key_dir,
2754 reactivate=False,
2755):
2756
2757 if dmcrypt:
2758 part_uuid = get_partition_uuid(dev)
2759 dev = dmcrypt_map(dev, dmcrypt_key_dir)
2760 try:
2761 fstype = detect_fstype(dev=dev)
2762 except (subprocess.CalledProcessError,
2763 TruncatedLineError,
2764 TooManyLinesError) as e:
2765 raise FilesystemTypeError(
2766 'device {dev}'.format(dev=dev),
2767 e,
2768 )
2769
2770 # TODO always using mount options from cluster=ceph for
2771 # now; see http://tracker.newdream.net/issues/3253
2772 mount_options = get_conf(
2773 cluster='ceph',
2774 variable='osd_mount_options_{fstype}'.format(
2775 fstype=fstype,
2776 ),
2777 )
2778
2779 if mount_options is None:
2780 mount_options = get_conf(
2781 cluster='ceph',
2782 variable='osd_fs_mount_options_{fstype}'.format(
2783 fstype=fstype,
2784 ),
2785 )
2786
2787 # remove whitespaces from mount_options
2788 if mount_options is not None:
2789 mount_options = "".join(mount_options.split())
2790
2791 path = mount(dev=dev, fstype=fstype, options=mount_options)
2792
2793 # check if the disk is deactive, change the journal owner, group
2794 # mode for correct user and group.
2795 if os.path.exists(os.path.join(path, 'deactive')):
2796 # logging to syslog will help us easy to know udev triggered failure
2797 if not reactivate:
2798 unmount(path)
2799 # we need to unmap again because dmcrypt map will create again
2800 # on bootup stage (due to deactivate)
2801 if '/dev/mapper/' in dev:
2802 part_uuid = dev.replace('/dev/mapper/', '')
2803 dmcrypt_unmap(part_uuid)
2804 LOG.info('OSD deactivated! reactivate with: --reactivate')
2805 raise Error('OSD deactivated! reactivate with: --reactivate')
2806 # flag to activate a deactive osd.
2807 deactive = True
2808 else:
2809 deactive = False
2810
2811 osd_id = None
2812 cluster = None
2813 try:
2814 (osd_id, cluster) = activate(path, activate_key_template, init)
2815
2816 # Now active successfully
2817 # If we got reactivate and deactive, remove the deactive file
2818 if deactive and reactivate:
2819 os.remove(os.path.join(path, 'deactive'))
2820 LOG.info('Remove `deactive` file.')
2821
2822 # check if the disk is already active, or if something else is already
2823 # mounted there
2824 active = False
2825 other = False
2826 src_dev = os.stat(path).st_dev
2827 try:
2828 dst_dev = os.stat((STATEDIR + '/osd/{cluster}-{osd_id}').format(
2829 cluster=cluster,
2830 osd_id=osd_id)).st_dev
2831 if src_dev == dst_dev:
2832 active = True
2833 else:
2834 parent_dev = os.stat(STATEDIR + '/osd').st_dev
2835 if dst_dev != parent_dev:
2836 other = True
2837 elif os.listdir(get_mount_point(cluster, osd_id)):
2838 LOG.info(get_mount_point(cluster, osd_id) +
2839 " is not empty, won't override")
2840 other = True
2841
2842 except OSError:
2843 pass
2844
2845 if active:
2846 LOG.info('%s osd.%s already mounted in position; unmounting ours.'
2847 % (cluster, osd_id))
2848 unmount(path)
2849 elif other:
2850 raise Error('another %s osd.%s already mounted in position '
2851 '(old/different cluster instance?); unmounting ours.'
2852 % (cluster, osd_id))
2853 else:
2854 move_mount(
2855 dev=dev,
2856 path=path,
2857 cluster=cluster,
2858 osd_id=osd_id,
2859 fstype=fstype,
2860 mount_options=mount_options,
2861 )
2862 return (cluster, osd_id)
2863
2864 except:
2865 LOG.error('Failed to activate')
2866 unmount(path)
2867 raise
2868 finally:
2869 # remove our temp dir
2870 if os.path.exists(path):
2871 os.rmdir(path)
2872
2873
2874def activate_dir(
2875 path,
2876 activate_key_template,
2877 init,
2878):
2879
2880 if not os.path.exists(path):
2881 raise Error(
2882 'directory %s does not exist' % path
2883 )
2884
2885 (osd_id, cluster) = activate(path, activate_key_template, init)
2886
2887 if init not in (None, 'none'):
2888 canonical = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
2889 cluster=cluster,
2890 osd_id=osd_id)
2891 if path != canonical:
2892 # symlink it from the proper location
2893 create = True
2894 if os.path.lexists(canonical):
2895 old = os.readlink(canonical)
2896 if old != path:
2897 LOG.debug('Removing old symlink %s -> %s', canonical, old)
2898 try:
2899 os.unlink(canonical)
2900 except:
2901 raise Error('unable to remove old symlink', canonical)
2902 else:
2903 create = False
2904 if create:
2905 LOG.debug('Creating symlink %s -> %s', canonical, path)
2906 try:
2907 os.symlink(path, canonical)
2908 except:
2909 raise Error('unable to create symlink %s -> %s'
2910 % (canonical, path))
2911
2912 return (cluster, osd_id)
2913
2914
2915def find_cluster_by_uuid(_uuid):
2916 """
2917 Find a cluster name by searching /etc/ceph/*.conf for a conf file
2918 with the right uuid.
2919 """
2920 _uuid = _uuid.lower()
2921 no_fsid = []
2922 if not os.path.exists(SYSCONFDIR):
2923 return None
2924 for conf_file in os.listdir(SYSCONFDIR):
2925 if not conf_file.endswith('.conf'):
2926 continue
2927 cluster = conf_file[:-5]
2928 try:
2929 fsid = get_fsid(cluster)
2930 except Error as e:
2931 if e.message != 'getting cluster uuid from configuration failed':
2932 raise e
2933 no_fsid.append(cluster)
2934 else:
2935 if fsid == _uuid:
2936 return cluster
2937 # be tolerant of /etc/ceph/ceph.conf without an fsid defined.
2938 if len(no_fsid) == 1 and no_fsid[0] == 'ceph':
2939 LOG.warning('No fsid defined in ' + SYSCONFDIR +
2940 '/ceph.conf; using anyway')
2941 return 'ceph'
2942 return None
2943
2944
2945def activate(
2946 path,
2947 activate_key_template,
2948 init,
2949):
2950
2951 check_osd_magic(path)
2952
2953 ceph_fsid = read_one_line(path, 'ceph_fsid')
2954 if ceph_fsid is None:
2955 raise Error('No cluster uuid assigned.')
2956 LOG.debug('Cluster uuid is %s', ceph_fsid)
2957
2958 cluster = find_cluster_by_uuid(ceph_fsid)
2959 if cluster is None:
2960 raise Error('No cluster conf found in ' + SYSCONFDIR +
2961 ' with fsid %s' % ceph_fsid)
2962 LOG.debug('Cluster name is %s', cluster)
2963
2964 fsid = read_one_line(path, 'fsid')
2965 if fsid is None:
2966 raise Error('No OSD uuid assigned.')
2967 LOG.debug('OSD uuid is %s', fsid)
2968
2969 keyring = activate_key_template.format(cluster=cluster,
2970 statedir=STATEDIR)
2971
2972 osd_id = get_osd_id(path)
2973 if osd_id is None:
2974 osd_id = allocate_osd_id(
2975 cluster=cluster,
2976 fsid=fsid,
2977 keyring=keyring,
2978 )
2979 write_one_line(path, 'whoami', osd_id)
2980 LOG.debug('OSD id is %s', osd_id)
2981
2982 if not os.path.exists(os.path.join(path, 'ready')):
2983 LOG.debug('Initializing OSD...')
2984 # re-running mkfs is safe, so just run until it completes
2985 mkfs(
2986 path=path,
2987 cluster=cluster,
2988 osd_id=osd_id,
2989 fsid=fsid,
2990 keyring=keyring,
2991 )
2992
2993 if init not in (None, 'none'):
2994 if init == 'auto':
2995 conf_val = get_conf(
2996 cluster=cluster,
2997 variable='init'
2998 )
2999 if conf_val is not None:
3000 init = conf_val
3001 else:
3002 init = init_get()
3003
3004 LOG.debug('Marking with init system %s', init)
3005 with file(os.path.join(path, init), 'w'):
3006 pass
3007
3008 # remove markers for others, just in case.
3009 for other in INIT_SYSTEMS:
3010 if other != init:
3011 try:
3012 os.unlink(os.path.join(path, other))
3013 except OSError:
3014 pass
3015
3016 if not os.path.exists(os.path.join(path, 'active')):
3017 LOG.debug('Authorizing OSD key...')
3018 auth_key(
3019 path=path,
3020 cluster=cluster,
3021 osd_id=osd_id,
3022 keyring=keyring,
3023 )
3024 write_one_line(path, 'active', 'ok')
3025 LOG.debug('%s osd.%s data dir is ready at %s', cluster, osd_id, path)
3026 return (osd_id, cluster)
3027
3028
3029def main_activate(args):
3030 cluster = None
3031 osd_id = None
3032
3033 if not os.path.exists(args.path):
3034 raise Error('%s does not exist' % args.path)
3035
3036 if is_suppressed(args.path):
3037 LOG.info('suppressed activate request on %s', args.path)
3038 return
3039
3040 activate_lock.acquire() # noqa
3041 try:
3042 mode = os.stat(args.path).st_mode
3043 if stat.S_ISBLK(mode):
3044 if (is_partition(args.path) and
3045 (get_partition_type(args.path) ==
3046 PTYPE['mpath']['osd']['ready']) and
3047 not is_mpath(args.path)):
3048 raise Error('%s is not a multipath block device' %
3049 args.path)
3050 (cluster, osd_id) = mount_activate(
3051 dev=args.path,
3052 activate_key_template=args.activate_key_template,
3053 init=args.mark_init,
3054 dmcrypt=args.dmcrypt,
3055 dmcrypt_key_dir=args.dmcrypt_key_dir,
3056 reactivate=args.reactivate,
3057 )
3058 osd_data = get_mount_point(cluster, osd_id)
3059
3060 elif stat.S_ISDIR(mode):
3061 (cluster, osd_id) = activate_dir(
3062 path=args.path,
3063 activate_key_template=args.activate_key_template,
3064 init=args.mark_init,
3065 )
3066 osd_data = args.path
3067
3068 else:
3069 raise Error('%s is not a directory or block device' % args.path)
3070
3071 if (not args.no_start_daemon and args.mark_init == 'none'):
3072 command_check_call(
3073 [
3074 'ceph-osd',
3075 '--cluster={cluster}'.format(cluster=cluster),
3076 '--id={osd_id}'.format(osd_id=osd_id),
3077 '--osd-data={path}'.format(path=osd_data),
3078 '--osd-journal={path}/journal'.format(path=osd_data),
3079 ],
3080 )
3081
3082 if (not args.no_start_daemon and
3083 args.mark_init not in (None, 'none')):
3084
3085 start_daemon(
3086 cluster=cluster,
3087 osd_id=osd_id,
3088 )
3089
3090 finally:
3091 activate_lock.release() # noqa
3092
3093
3094###########################
3095
3096def _mark_osd_out(cluster, osd_id):
3097 LOG.info('Prepare to mark osd.%d out...', osd_id)
3098 command([
3099 'ceph',
3100 'osd',
3101 'out',
3102 'osd.%d' % osd_id,
3103 ])
3104
3105
3106def _check_osd_status(cluster, osd_id):
3107 """
3108 report the osd status:
3109 00(0) : means OSD OUT AND DOWN
3110 01(1) : means OSD OUT AND UP
3111 10(2) : means OSD IN AND DOWN
3112 11(3) : means OSD IN AND UP
3113 """
3114 LOG.info("Checking osd id: %s ..." % osd_id)
3115 found = False
3116 status_code = 0
3117 out, err, ret = command([
3118 'ceph',
3119 'osd',
3120 'dump',
3121 '--cluster={cluster}'.format(
3122 cluster=cluster,
3123 ),
3124 '--format',
3125 'json',
3126 ])
3127 out_json = json.loads(out)
3128 for item in out_json[u'osds']:
3129 if item.get(u'osd') == int(osd_id):
3130 found = True
3131 if item.get(u'in') is 1:
3132 status_code += 2
3133 if item.get(u'up') is 1:
3134 status_code += 1
3135 if not found:
3136 raise Error('Could not osd.%s in osd tree!' % osd_id)
3137 return status_code
3138
3139
3140def _remove_osd_directory_files(mounted_path, cluster):
3141 """
3142 To remove the 'ready', 'active', INIT-specific files.
3143 """
3144 if os.path.exists(os.path.join(mounted_path, 'ready')):
3145 os.remove(os.path.join(mounted_path, 'ready'))
3146 LOG.info('Remove `ready` file.')
3147 else:
3148 LOG.info('`ready` file is already removed.')
3149
3150 if os.path.exists(os.path.join(mounted_path, 'active')):
3151 os.remove(os.path.join(mounted_path, 'active'))
3152 LOG.info('Remove `active` file.')
3153 else:
3154 LOG.info('`active` file is already removed.')
3155
3156 # Just check `upstart` and `sysvinit` directly if filename is init-spec.
3157 conf_val = get_conf(
3158 cluster=cluster,
3159 variable='init'
3160 )
3161 if conf_val is not None:
3162 init = conf_val
3163 else:
3164 init = init_get()
3165 os.remove(os.path.join(mounted_path, init))
3166 LOG.info('Remove `%s` file.', init)
3167 return
3168
3169
3170def main_deactivate(args):
3171 activate_lock.acquire() # noqa
3172 try:
3173 main_deactivate_locked(args)
3174 finally:
3175 activate_lock.release() # noqa
3176
3177
3178def main_deactivate_locked(args):
3179 osd_id = args.deactivate_by_id
3180 path = args.path
3181 target_dev = None
3182 dmcrypt = False
3183 devices = list_devices()
3184
3185 # list all devices and found we need
3186 for device in devices:
3187 if 'partitions' in device:
3188 for dev_part in device.get('partitions'):
3189 if (osd_id and
3190 'whoami' in dev_part and
3191 dev_part['whoami'] == osd_id):
3192 target_dev = dev_part
3193 elif (path and
3194 'path' in dev_part and
3195 dev_part['path'] == path):
3196 target_dev = dev_part
3197 if not target_dev:
3198 raise Error('Cannot find any match device!!')
3199
3200 # set up all we need variable
3201 osd_id = target_dev['whoami']
3202 part_type = target_dev['ptype']
3203 mounted_path = target_dev['mount']
3204 if Ptype.is_dmcrypt(part_type, 'osd'):
3205 dmcrypt = True
3206
3207 # Do not do anything if osd is already down.
3208 status_code = _check_osd_status(args.cluster, osd_id)
3209 if status_code == OSD_STATUS_IN_UP:
3210 if args.mark_out is True:
3211 _mark_osd_out(args.cluster, int(osd_id))
3212 stop_daemon(args.cluster, osd_id)
3213 elif status_code == OSD_STATUS_IN_DOWN:
3214 if args.mark_out is True:
3215 _mark_osd_out(args.cluster, int(osd_id))
3216 LOG.info("OSD already out/down. Do not do anything now.")
3217 return
3218 elif status_code == OSD_STATUS_OUT_UP:
3219 stop_daemon(args.cluster, osd_id)
3220 elif status_code == OSD_STATUS_OUT_DOWN:
3221 LOG.info("OSD already out/down. Do not do anything now.")
3222 return
3223
3224 # remove 'ready', 'active', and INIT-specific files.
3225 _remove_osd_directory_files(mounted_path, args.cluster)
3226
3227 # Write deactivate to osd directory!
3228 with open(os.path.join(mounted_path, 'deactive'), 'w'):
3229 path_set_context(os.path.join(mounted_path, 'deactive'))
3230
3231 unmount(mounted_path)
3232 LOG.info("Umount `%s` successfully.", mounted_path)
3233
3234 if dmcrypt:
3235 dmcrypt_unmap(target_dev['uuid'])
3236 for name in Space.NAMES:
3237 if name + '_uuid' in target_dev:
3238 dmcrypt_unmap(target_dev[name + '_uuid'])
3239
3240###########################
3241
3242
3243def _remove_from_crush_map(cluster, osd_id):
3244 LOG.info("Prepare to remove osd.%s from crush map..." % osd_id)
3245 command([
3246 'ceph',
3247 'osd',
3248 'crush',
3249 'remove',
3250 'osd.%s' % osd_id,
3251 ])
3252
3253
3254def _delete_osd_auth_key(cluster, osd_id):
3255 LOG.info("Prepare to delete osd.%s cephx key..." % osd_id)
3256 command([
3257 'ceph',
3258 'auth',
3259 'del',
3260 'osd.%s' % osd_id,
3261 ])
3262
3263
3264def _deallocate_osd_id(cluster, osd_id):
3265 LOG.info("Prepare to deallocate the osd-id: %s..." % osd_id)
3266 command([
3267 'ceph',
3268 'osd',
3269 'rm',
3270 '%s' % osd_id,
3271 ])
3272
3273
3274def destroy_lookup_device(args, predicate, description):
3275 devices = list_devices()
3276 for device in devices:
3277 for partition in device.get('partitions', []):
3278 if partition['dmcrypt']:
3279 dmcrypt_path = dmcrypt_map(partition['path'],
3280 args.dmcrypt_key_dir)
3281 list_dev_osd(dmcrypt_path, {}, partition)
3282 dmcrypt_unmap(partition['uuid'])
3283 if predicate(partition):
3284 return partition
3285 raise Error('found no device matching ', description)
3286
3287
3288def main_destroy(args):
3289 osd_id = args.destroy_by_id
3290 path = args.path
3291 dmcrypt = False
3292 target_dev = None
3293
3294 if path:
3295 if not is_partition(path):
3296 raise Error(path + " must be a partition device")
3297 path = os.path.realpath(path)
3298
3299 if path:
3300 target_dev = destroy_lookup_device(
3301 args, lambda x: x.get('path') == path,
3302 path)
3303 elif osd_id:
3304 target_dev = destroy_lookup_device(
3305 args, lambda x: x.get('whoami') == osd_id,
3306 'osd id ' + str(osd_id))
3307
3308 osd_id = target_dev['whoami']
3309 dev_path = target_dev['path']
3310 if target_dev['ptype'] == PTYPE['mpath']['osd']['ready']:
3311 base_dev = get_partition_base_mpath(dev_path)
3312 else:
3313 base_dev = get_partition_base(dev_path)
3314
3315 # Before osd deactivate, we cannot destroy it
3316 status_code = _check_osd_status(args.cluster, osd_id)
3317 if status_code != OSD_STATUS_OUT_DOWN and \
3318 status_code != OSD_STATUS_IN_DOWN:
3319 raise Error("Could not destroy the active osd. (osd-id: %s)" %
3320 osd_id)
3321
3322 # Remove OSD from crush map
3323 _remove_from_crush_map(args.cluster, osd_id)
3324
3325 # Remove OSD cephx key
3326 _delete_osd_auth_key(args.cluster, osd_id)
3327
3328 # Deallocate OSD ID
3329 _deallocate_osd_id(args.cluster, osd_id)
3330
3331 # we remove the crypt map and device mapper (if dmcrypt is True)
3332 if dmcrypt:
3333 for name in Space.NAMES:
3334 if target_dev.get(name + '_uuid'):
3335 dmcrypt_unmap(target_dev[name + '_uuid'])
3336
3337 # Check zap flag. If we found zap flag, we need to find device for
3338 # destroy this osd data.
3339 if args.zap is True:
3340 # erase the osd data
3341 LOG.info("Prepare to zap the device %s" % base_dev)
3342 zap(base_dev)
3343
3344
3345def get_space_osd_uuid(name, path):
3346 if not os.path.exists(path):
3347 raise Error('%s does not exist' % path)
3348
3349 mode = os.stat(path).st_mode
3350 if not stat.S_ISBLK(mode):
3351 raise Error('%s is not a block device' % path)
3352
3353 if (is_partition(path) and
3354 get_partition_type(path) in (PTYPE['mpath']['journal']['ready'],
3355 PTYPE['mpath']['block']['ready']) and
3356 not is_mpath(path)):
3357 raise Error('%s is not a multipath block device' %
3358 path)
3359
3360 try:
3361 out = _check_output(
3362 args=[
3363 'ceph-osd',
3364 '--get-device-fsid',
3365 path,
3366 ],
3367 close_fds=True,
3368 )
3369 except subprocess.CalledProcessError as e:
3370 raise Error(
3371 'failed to get osd uuid/fsid from %s' % name,
3372 e,
3373 )
3374 value = str(out).split('\n', 1)[0]
3375 LOG.debug('%s %s has OSD UUID %s', name.capitalize(), path, value)
3376 return value
3377
3378
3379def main_activate_space(name, args):
3380 if not os.path.exists(args.dev):
3381 raise Error('%s does not exist' % args.dev)
3382
3383 cluster = None
3384 osd_id = None
3385 osd_uuid = None
3386 dev = None
3387 activate_lock.acquire() # noqa
3388 try:
3389 if args.dmcrypt:
3390 dev = dmcrypt_map(args.dev, args.dmcrypt_key_dir)
3391 else:
3392 dev = args.dev
3393 # FIXME: For an encrypted journal dev, does this return the
3394 # cyphertext or plaintext dev uuid!? Also, if the journal is
3395 # encrypted, is the data partition also always encrypted, or
3396 # are mixed pairs supported!?
3397 osd_uuid = get_space_osd_uuid(name, dev)
3398 path = os.path.join('/dev/disk/by-partuuid/', osd_uuid.lower())
3399
3400 if is_suppressed(path):
3401 LOG.info('suppressed activate request on %s', path)
3402 return
3403
3404 (cluster, osd_id) = mount_activate(
3405 dev=path,
3406 activate_key_template=args.activate_key_template,
3407 init=args.mark_init,
3408 dmcrypt=args.dmcrypt,
3409 dmcrypt_key_dir=args.dmcrypt_key_dir,
3410 reactivate=args.reactivate,
3411 )
3412
3413 start_daemon(
3414 cluster=cluster,
3415 osd_id=osd_id,
3416 )
3417
3418 finally:
3419 activate_lock.release() # noqa
3420
3421
3422###########################
3423
3424
3425def main_activate_all(args):
3426 dir = '/dev/disk/by-parttypeuuid'
3427 LOG.debug('Scanning %s', dir)
3428 if not os.path.exists(dir):
3429 return
3430 err = False
3431 for name in os.listdir(dir):
3432 if name.find('.') < 0:
3433 continue
3434 (tag, uuid) = name.split('.')
3435
3436 if tag in Ptype.get_ready_by_name('osd'):
3437
3438 if Ptype.is_dmcrypt(tag, 'osd'):
3439 path = os.path.join('/dev/mapper', uuid)
3440 else:
3441 path = os.path.join(dir, name)
3442
3443 if is_suppressed(path):
3444 LOG.info('suppressed activate request on %s', path)
3445 continue
3446
3447 LOG.info('Activating %s', path)
3448 activate_lock.acquire() # noqa
3449 try:
3450 # never map dmcrypt cyphertext devices
3451 (cluster, osd_id) = mount_activate(
3452 dev=path,
3453 activate_key_template=args.activate_key_template,
3454 init=args.mark_init,
3455 dmcrypt=False,
3456 dmcrypt_key_dir='',
3457 )
3458 start_daemon(
3459 cluster=cluster,
3460 osd_id=osd_id,
3461 )
3462
3463 except Exception as e:
3464 print >> sys.stderr, '{prog}: {msg}'.format(
3465 prog=args.prog,
3466 msg=e,
3467 )
3468 err = True
3469
3470 finally:
3471 activate_lock.release() # noqa
3472 if err:
3473 raise Error('One or more partitions failed to activate')
3474
3475
3476###########################
3477
3478def is_swap(dev):
3479 dev = os.path.realpath(dev)
3480 with file('/proc/swaps', 'rb') as proc_swaps:
3481 for line in proc_swaps.readlines()[1:]:
3482 fields = line.split()
3483 if len(fields) < 3:
3484 continue
3485 swaps_dev = fields[0]
3486 if swaps_dev.startswith('/') and os.path.exists(swaps_dev):
3487 swaps_dev = os.path.realpath(swaps_dev)
3488 if swaps_dev == dev:
3489 return True
3490 return False
3491
3492
3493def get_oneliner(base, name):
3494 path = os.path.join(base, name)
3495 if os.path.isfile(path):
3496 with open(path, 'r') as _file:
3497 return _file.readline().rstrip()
3498 return None
3499
3500
3501def get_dev_fs(dev):
3502 fscheck, _, _ = command(
3503 [
3504 'blkid',
3505 '-s',
3506 'TYPE',
3507 dev,
3508 ],
3509 )
3510 if 'TYPE' in fscheck:
3511 fstype = fscheck.split()[1].split('"')[1]
3512 return fstype
3513 else:
3514 return None
3515
3516
3517def split_dev_base_partnum(dev):
3518 if is_mpath(dev):
3519 partnum = partnum_mpath(dev)
3520 base = get_partition_base_mpath(dev)
3521 else:
3522 b = block_path(dev)
3523 partnum = open(os.path.join(b, 'partition')).read().strip()
3524 base = get_partition_base(dev)
3525 return (base, partnum)
3526
3527
3528def get_partition_type(part):
3529 return get_blkid_partition_info(part, 'ID_PART_ENTRY_TYPE')
3530
3531
3532def get_partition_uuid(part):
3533 return get_blkid_partition_info(part, 'ID_PART_ENTRY_UUID')
3534
3535
3536def get_blkid_partition_info(dev, what=None):
3537 out, _, _ = command(
3538 [
3539 'blkid',
3540 '-o',
3541 'udev',
3542 '-p',
3543 dev,
3544 ]
3545 )
3546 p = {}
3547 for line in out.splitlines():
3548 (key, value) = line.split('=')
3549 p[key] = value
3550 if what:
3551 return p.get(what)
3552 else:
3553 return p
3554
3555
3556def more_osd_info(path, uuid_map, desc):
3557 desc['ceph_fsid'] = get_oneliner(path, 'ceph_fsid')
3558 if desc['ceph_fsid']:
3559 desc['cluster'] = find_cluster_by_uuid(desc['ceph_fsid'])
3560 desc['whoami'] = get_oneliner(path, 'whoami')
3561 for name in Space.NAMES:
3562 uuid = get_oneliner(path, name + '_uuid')
3563 if uuid:
3564 desc[name + '_uuid'] = uuid.lower()
3565 if desc[name + '_uuid'] in uuid_map:
3566 desc[name + '_dev'] = uuid_map[desc[name + '_uuid']]
3567
3568
3569def list_dev_osd(dev, uuid_map, desc):
3570 desc['mount'] = is_mounted(dev)
3571 desc['fs_type'] = get_dev_fs(dev)
3572 desc['state'] = 'unprepared'
3573 if desc['mount']:
3574 desc['state'] = 'active'
3575 more_osd_info(desc['mount'], uuid_map, desc)
3576 elif desc['fs_type']:
3577 try:
3578 tpath = mount(dev=dev, fstype=desc['fs_type'], options='')
3579 if tpath:
3580 try:
3581 magic = get_oneliner(tpath, 'magic')
3582 if magic is not None:
3583 desc['magic'] = magic
3584 desc['state'] = 'prepared'
3585 more_osd_info(tpath, uuid_map, desc)
3586 finally:
3587 unmount(tpath)
3588 except MountError:
3589 pass
3590
3591
3592def list_format_more_osd_info_plain(dev):
3593 desc = []
3594 if dev.get('ceph_fsid'):
3595 if dev.get('cluster'):
3596 desc.append('cluster ' + dev['cluster'])
3597 else:
3598 desc.append('unknown cluster ' + dev['ceph_fsid'])
3599 if dev.get('whoami'):
3600 desc.append('osd.%s' % dev['whoami'])
3601 for name in Space.NAMES:
3602 if dev.get(name + '_dev'):
3603 desc.append(name + ' %s' % dev[name + '_dev'])
3604 return desc
3605
3606
3607def list_format_dev_plain(dev, prefix=''):
3608 desc = []
3609 if dev['ptype'] == PTYPE['regular']['osd']['ready']:
3610 desc = (['ceph data', dev['state']] +
3611 list_format_more_osd_info_plain(dev))
3612 elif Ptype.is_dmcrypt(dev['ptype'], 'osd'):
3613 dmcrypt = dev['dmcrypt']
3614 if not dmcrypt['holders']:
3615 desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
3616 'not currently mapped']
3617 elif len(dmcrypt['holders']) == 1:
3618 holder = get_dev_path(dmcrypt['holders'][0])
3619 desc = ['ceph data (dmcrypt %s %s)' %
3620 (dmcrypt['type'], holder)]
3621 desc += list_format_more_osd_info_plain(dev)
3622 else:
3623 desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
3624 'holders: ' + ','.join(dmcrypt['holders'])]
3625 elif Ptype.is_regular_space(dev['ptype']):
3626 name = Ptype.space_ptype_to_name(dev['ptype'])
3627 desc.append('ceph ' + name)
3628 if dev.get(name + '_for'):
3629 desc.append('for %s' % dev[name + '_for'])
3630 elif Ptype.is_dmcrypt_space(dev['ptype']):
3631 name = Ptype.space_ptype_to_name(dev['ptype'])
3632 dmcrypt = dev['dmcrypt']
3633 if dmcrypt['holders'] and len(dmcrypt['holders']) == 1:
3634 holder = get_dev_path(dmcrypt['holders'][0])
3635 desc = ['ceph ' + name + ' (dmcrypt %s %s)' %
3636 (dmcrypt['type'], holder)]
3637 else:
3638 desc = ['ceph ' + name + ' (dmcrypt %s)' % dmcrypt['type']]
3639 if dev.get(name + '_for'):
3640 desc.append('for %s' % dev[name + '_for'])
3641 else:
3642 desc.append(dev['type'])
3643 if dev.get('fs_type'):
3644 desc.append(dev['fs_type'])
3645 elif dev.get('ptype'):
3646 desc.append(dev['ptype'])
3647 if dev.get('mount'):
3648 desc.append('mounted on %s' % dev['mount'])
3649 return '%s%s %s' % (prefix, dev['path'], ', '.join(desc))
3650
3651
3652def list_format_plain(devices):
3653 lines = []
3654 for device in devices:
3655 if device.get('partitions'):
3656 lines.append('%s :' % device['path'])
3657 for p in sorted(device['partitions']):
3658 lines.append(list_format_dev_plain(dev=p,
3659 prefix=' '))
3660 else:
3661 lines.append(list_format_dev_plain(dev=device,
3662 prefix=''))
3663 return "\n".join(lines)
3664
3665
3666def list_dev(dev, uuid_map, space_map):
3667 info = {
3668 'path': dev,
3669 'dmcrypt': {},
3670 }
3671
3672 info['is_partition'] = is_partition(dev)
3673 if info['is_partition']:
3674 ptype = get_partition_type(dev)
3675 info['uuid'] = get_partition_uuid(dev)
3676 else:
3677 ptype = 'unknown'
3678 info['ptype'] = ptype
3679 LOG.info("list_dev(dev = " + dev + ", ptype = " + str(ptype) + ")")
3680 if ptype in (PTYPE['regular']['osd']['ready'],
3681 PTYPE['mpath']['osd']['ready']):
3682 info['type'] = 'data'
3683 if ptype == PTYPE['mpath']['osd']['ready']:
3684 info['multipath'] = True
3685 list_dev_osd(dev, uuid_map, info)
3686 elif ptype == PTYPE['plain']['osd']['ready']:
3687 holders = is_held(dev)
3688 info['type'] = 'data'
3689 info['dmcrypt']['holders'] = holders
3690 info['dmcrypt']['type'] = 'plain'
3691 if len(holders) == 1:
3692 list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
3693 elif ptype == PTYPE['luks']['osd']['ready']:
3694 holders = is_held(dev)
3695 info['type'] = 'data'
3696 info['dmcrypt']['holders'] = holders
3697 info['dmcrypt']['type'] = 'LUKS'
3698 if len(holders) == 1:
3699 list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
3700 elif Ptype.is_regular_space(ptype) or Ptype.is_mpath_space(ptype):
3701 name = Ptype.space_ptype_to_name(ptype)
3702 info['type'] = name
3703 if ptype == PTYPE['mpath'][name]['ready']:
3704 info['multipath'] = True
3705 if info.get('uuid') in space_map:
3706 info[name + '_for'] = space_map[info['uuid']]
3707 elif Ptype.is_plain_space(ptype):
3708 name = Ptype.space_ptype_to_name(ptype)
3709 holders = is_held(dev)
3710 info['type'] = name
3711 info['dmcrypt']['type'] = 'plain'
3712 info['dmcrypt']['holders'] = holders
3713 if info.get('uuid') in space_map:
3714 info[name + '_for'] = space_map[info['uuid']]
3715 elif Ptype.is_luks_space(ptype):
3716 name = Ptype.space_ptype_to_name(ptype)
3717 holders = is_held(dev)
3718 info['type'] = name
3719 info['dmcrypt']['type'] = 'LUKS'
3720 info['dmcrypt']['holders'] = holders
3721 if info.get('uuid') in space_map:
3722 info[name + '_for'] = space_map[info['uuid']]
3723 else:
3724 path = is_mounted(dev)
3725 fs_type = get_dev_fs(dev)
3726 if is_swap(dev):
3727 info['type'] = 'swap'
3728 else:
3729 info['type'] = 'other'
3730 if fs_type:
3731 info['fs_type'] = fs_type
3732 if path:
3733 info['mount'] = path
3734
3735 return info
3736
3737
3738def list_devices():
3739 partmap = list_all_partitions()
3740
3741 uuid_map = {}
3742 space_map = {}
3743 for base, parts in sorted(partmap.iteritems()):
3744 for p in parts:
3745 dev = get_dev_path(p)
3746 part_uuid = get_partition_uuid(dev)
3747 if part_uuid:
3748 uuid_map[part_uuid] = dev
3749 ptype = get_partition_type(dev)
3750 LOG.debug("main_list: " + dev +
3751 " ptype = " + str(ptype) +
3752 " uuid = " + str(part_uuid))
3753 if ptype in Ptype.get_ready_by_name('osd'):
3754 if Ptype.is_dmcrypt(ptype, 'osd'):
3755 holders = is_held(dev)
3756 if len(holders) != 1:
3757 continue
3758 dev_to_mount = get_dev_path(holders[0])
3759 else:
3760 dev_to_mount = dev
3761
3762 fs_type = get_dev_fs(dev_to_mount)
3763 if fs_type is not None:
3764 try:
3765 tpath = mount(dev=dev_to_mount,
3766 fstype=fs_type, options='')
3767 try:
3768 for name in Space.NAMES:
3769 space_uuid = get_oneliner(tpath,
3770 name + '_uuid')
3771 if space_uuid:
3772 space_map[space_uuid.lower()] = dev
3773 finally:
3774 unmount(tpath)
3775 except MountError:
3776 pass
3777
3778 LOG.debug("main_list: " + str(partmap) + ", uuid_map = " +
3779 str(uuid_map) + ", space_map = " + str(space_map))
3780
3781 devices = []
3782 for base, parts in sorted(partmap.iteritems()):
3783 if parts:
3784 disk = {'path': get_dev_path(base)}
3785 partitions = []
3786 for p in sorted(parts):
3787 partitions.append(list_dev(get_dev_path(p),
3788 uuid_map,
3789 space_map))
3790 disk['partitions'] = partitions
3791 devices.append(disk)
3792 else:
3793 device = list_dev(get_dev_path(base), uuid_map, space_map)
3794 device['path'] = get_dev_path(base)
3795 devices.append(device)
3796 LOG.debug("list_devices: " + str(devices))
3797 return devices
3798
3799
3800def main_list(args):
3801 devices = list_devices()
3802 if args.path:
3803 paths = []
3804 for path in args.path:
3805 if os.path.exists(path):
3806 paths.append(os.path.realpath(path))
3807 else:
3808 paths.append(path)
3809 selected_devices = []
3810 for device in devices:
3811 for path in paths:
3812 if re.search(path + '$', device['path']):
3813 selected_devices.append(device)
3814 else:
3815 selected_devices = devices
3816 if args.format == 'json':
3817 print json.dumps(selected_devices)
3818 else:
3819 output = list_format_plain(selected_devices)
3820 if output:
3821 print output
3822
3823
3824###########################
3825#
3826# Mark devices that we want to suppress activates on with a
3827# file like
3828#
3829# /var/lib/ceph/tmp/suppress-activate.sdb
3830#
3831# where the last bit is the sanitized device name (/dev/X without the
3832# /dev/ prefix) and the is_suppress() check matches a prefix. That
3833# means suppressing sdb will stop activate on sdb1, sdb2, etc.
3834#
3835
3836def is_suppressed(path):
3837 disk = os.path.realpath(path)
3838 try:
3839 if (not disk.startswith('/dev/') or
3840 not stat.S_ISBLK(os.lstat(disk).st_mode)):
3841 return False
3842 base = get_dev_name(disk)
3843 while len(base):
3844 if os.path.exists(SUPPRESS_PREFIX + base): # noqa
3845 return True
3846 base = base[:-1]
3847 except:
3848 return False
3849
3850
3851def set_suppress(path):
3852 disk = os.path.realpath(path)
3853 if not os.path.exists(disk):
3854 raise Error('does not exist', path)
3855 if not stat.S_ISBLK(os.lstat(path).st_mode):
3856 raise Error('not a block device', path)
3857 base = get_dev_name(disk)
3858
3859 with file(SUPPRESS_PREFIX + base, 'w') as f: # noqa
3860 pass
3861 LOG.info('set suppress flag on %s', base)
3862
3863
3864def unset_suppress(path):
3865 disk = os.path.realpath(path)
3866 if not os.path.exists(disk):
3867 raise Error('does not exist', path)
3868 if not stat.S_ISBLK(os.lstat(path).st_mode):
3869 raise Error('not a block device', path)
3870 assert disk.startswith('/dev/')
3871 base = get_dev_name(disk)
3872
3873 fn = SUPPRESS_PREFIX + base # noqa
3874 if not os.path.exists(fn):
3875 raise Error('not marked as suppressed', path)
3876
3877 try:
3878 os.unlink(fn)
3879 LOG.info('unset suppress flag on %s', base)
3880 except OSError as e:
3881 raise Error('failed to unsuppress', e)
3882
3883
3884def main_suppress(args):
3885 set_suppress(args.path)
3886
3887
3888def main_unsuppress(args):
3889 unset_suppress(args.path)
3890
3891
3892def main_zap(args):
3893 for dev in args.dev:
3894 zap(dev)
3895
3896
3897def main_trigger(args):
3898 LOG.debug("main_trigger: " + str(args))
3899 if is_systemd() and not args.sync:
3900 # http://www.freedesktop.org/software/systemd/man/systemd-escape.html
3901 escaped_dev = args.dev[1:].replace('-', '\\x2d')
3902 service = 'ceph-disk@{dev}.service'.format(dev=escaped_dev)
3903 LOG.info('systemd detected, triggering %s' % service)
3904 command(
3905 [
3906 'systemctl',
3907 '--no-block',
3908 'restart',
3909 service,
3910 ]
3911 )
3912 return
3913 if is_upstart() and not args.sync:
3914 LOG.info('upstart detected, triggering ceph-disk task')
3915 command(
3916 [
3917 'initctl',
3918 'emit',
3919 'ceph-disk',
3920 'dev={dev}'.format(dev=args.dev),
3921 'pid={pid}'.format(pid=os.getpid()),
3922 ]
3923 )
3924 return
3925
3926 parttype = get_partition_type(args.dev)
3927 partid = get_partition_uuid(args.dev)
3928
3929 LOG.info('trigger {dev} parttype {parttype} uuid {partid}'.format(
3930 dev=args.dev,
3931 parttype=parttype,
3932 partid=partid,
3933 ))
3934
3935 if parttype in (PTYPE['regular']['osd']['ready'],
3936 PTYPE['mpath']['osd']['ready']):
3937 command(
3938 [
3939 'ceph-disk',
3940 'activate',
3941 args.dev,
3942 ]
3943 )
3944 elif parttype in (PTYPE['regular']['journal']['ready'],
3945 PTYPE['mpath']['journal']['ready']):
3946 command(
3947 [
3948 'ceph-disk',
3949 'activate-journal',
3950 args.dev,
3951 ]
3952 )
3953
3954 # journals are easy: map, chown, activate-journal
3955 elif parttype == PTYPE['plain']['journal']['ready']:
3956 command(
3957 [
3958 '/sbin/cryptsetup',
3959 '--key-file',
3960 '/etc/ceph/dmcrypt-keys/{partid}'.format(partid=partid),
3961 '--key-size',
3962 '256',
3963 'create',
3964 partid,
3965 args.dev,
3966 ]
3967 )
3968 newdev = '/dev/mapper/' + partid
3969 count = 0
3970 while not os.path.exists(newdev) and count <= 10:
3971 time.sleep(1)
3972 count += 1
3973 command(
3974 [
3975 '/bin/chown',
3976 'ceph:ceph',
3977 newdev,
3978 ]
3979 )
3980 command(
3981 [
3982 '/usr/sbin/ceph-disk',
3983 'activate-journal',
3984 newdev,
3985 ]
3986 )
3987 elif parttype == PTYPE['luks']['journal']['ready']:
3988 command(
3989 [
3990 '/sbin/cryptsetup',
3991 '--key-file',
3992 '/etc/ceph/dmcrypt-keys/{partid}.luks.key'.format(
3993 partid=partid),
3994 'luksOpen',
3995 args.dev,
3996 partid,
3997 ]
3998 )
3999 newdev = '/dev/mapper/' + partid
4000 count = 0
4001 while not os.path.exists(newdev) and count <= 10:
4002 time.sleep(1)
4003 count += 1
4004 command(
4005 [
4006 '/bin/chown',
4007 'ceph:ceph',
4008 newdev,
4009 ]
4010 )
4011 command(
4012 [
4013 '/usr/sbin/ceph-disk',
4014 'activate-journal',
4015 newdev,
4016 ]
4017 )
4018
4019 elif parttype in (PTYPE['regular']['block']['ready'],
4020 PTYPE['mpath']['block']['ready']):
4021 command(
4022 [
4023 'ceph-disk',
4024 'activate-block',
4025 args.dev,
4026 ]
4027 )
4028
4029 # blocks are easy: map, chown, activate-block
4030 elif parttype == PTYPE['plain']['block']['ready']:
4031 command(
4032 [
4033 '/sbin/cryptsetup',
4034 '--key-file',
4035 '/etc/ceph/dmcrypt-keys/{partid}'.format(partid=partid),
4036 '--key-size',
4037 '256',
4038 'create',
4039 partid,
4040 args.dev,
4041 ]
4042 )
4043 newdev = '/dev/mapper/' + partid
4044 count = 0
4045 while not os.path.exists(newdev) and count <= 10:
4046 time.sleep(1)
4047 count += 1
4048 command(
4049 [
4050 '/bin/chown',
4051 'ceph:ceph',
4052 newdev,
4053 ]
4054 )
4055 command(
4056 [
4057 '/usr/sbin/ceph-disk',
4058 'activate-block',
4059 newdev,
4060 ]
4061 )
4062 elif parttype == PTYPE['luks']['block']['ready']:
4063 command(
4064 [
4065 '/sbin/cryptsetup',
4066 '--key-file',
4067 '/etc/ceph/dmcrypt-keys/{partid}.luks.key'.format(
4068 partid=partid),
4069 'luksOpen',
4070 args.dev,
4071 partid,
4072 ]
4073 )
4074 newdev = '/dev/mapper/' + partid
4075 count = 0
4076 while not os.path.exists(newdev) and count <= 10:
4077 time.sleep(1)
4078 count += 1
4079 command(
4080 [
4081 '/bin/chown',
4082 'ceph:ceph',
4083 newdev,
4084 ]
4085 )
4086 command(
4087 [
4088 '/usr/sbin/ceph-disk',
4089 'activate-block',
4090 newdev,
4091 ]
4092 )
4093
4094 # osd data: map, activate
4095 elif parttype == PTYPE['plain']['osd']['ready']:
4096 command(
4097 [
4098 '/sbin/cryptsetup',
4099 '--key-file',
4100 '/etc/ceph/dmcrypt-keys/{partid}'.format(partid=partid),
4101 '--key-size',
4102 '256',
4103 'create',
4104 partid,
4105 args.dev,
4106 ]
4107 )
4108 newdev = '/dev/mapper/' + partid
4109 count = 0
4110 while not os.path.exists(newdev) and count <= 10:
4111 time.sleep(1)
4112 count += 1
4113 command(
4114 [
4115 '/usr/sbin/ceph-disk',
4116 'activate',
4117 newdev,
4118 ]
4119 )
4120
4121 elif parttype == PTYPE['luks']['osd']['ready']:
4122 command(
4123 [
4124 '/sbin/cryptsetup',
4125 '--key-file',
4126 '/etc/ceph/dmcrypt-keys/{partid}.luks.key'.format(
4127 partid=partid),
4128 'luksOpen',
4129 args.dev,
4130 partid,
4131 ]
4132 )
4133 newdev = '/dev/mapper/' + partid
4134 count = 0
4135 while not os.path.exists(newdev) and count <= 10:
4136 time.sleep(1)
4137 count += 1
4138 command(
4139 [
4140 '/usr/sbin/ceph-disk',
4141 'activate',
4142 newdev,
4143 ]
4144 )
4145
4146 else:
4147 raise Error('unrecognized partition type %s' % parttype)
4148
4149
4150def setup_statedir(dir):
4151 # XXX The following use of globals makes linting
4152 # really hard. Global state in Python is iffy and
4153 # should be avoided.
4154 global STATEDIR
4155 STATEDIR = dir
4156
4157 if not os.path.exists(STATEDIR):
4158 os.mkdir(STATEDIR)
4159 if not os.path.exists(STATEDIR + "/tmp"):
4160 os.mkdir(STATEDIR + "/tmp")
4161
4162 global prepare_lock
4163 prepare_lock = filelock(STATEDIR + '/tmp/ceph-disk.prepare.lock')
4164
4165 global activate_lock
4166 activate_lock = filelock(STATEDIR + '/tmp/ceph-disk.activate.lock')
4167
4168 global SUPPRESS_PREFIX
4169 SUPPRESS_PREFIX = STATEDIR + '/tmp/suppress-activate.'
4170
4171
4172def setup_sysconfdir(dir):
4173 global SYSCONFDIR
4174 SYSCONFDIR = dir
4175
4176
4177def parse_args(argv):
4178 parser = argparse.ArgumentParser(
4179 'ceph-disk',
4180 )
4181 parser.add_argument(
4182 '-v', '--verbose',
4183 action='store_true', default=None,
4184 help='be more verbose',
4185 )
4186 parser.add_argument(
4187 '--log-stdout',
4188 action='store_true', default=None,
4189 help='log to stdout',
4190 )
4191 parser.add_argument(
4192 '--prepend-to-path',
4193 metavar='PATH',
4194 default='/usr/bin',
4195 help=('prepend PATH to $PATH for backward compatibility '
4196 '(default /usr/bin)'),
4197 )
4198 parser.add_argument(
4199 '--statedir',
4200 metavar='PATH',
4201 default='/var/lib/ceph',
4202 help=('directory in which ceph state is preserved '
4203 '(default /var/lib/ceph)'),
4204 )
4205 parser.add_argument(
4206 '--sysconfdir',
4207 metavar='PATH',
4208 default='/etc/ceph',
4209 help=('directory in which ceph configuration files are found '
4210 '(default /etc/ceph)'),
4211 )
4212 parser.add_argument(
4213 '--setuser',
4214 metavar='USER',
4215 default=None,
4216 help='use the given user for subprocesses, rather than ceph or root'
4217 )
4218 parser.add_argument(
4219 '--setgroup',
4220 metavar='GROUP',
4221 default=None,
4222 help='use the given group for subprocesses, rather than ceph or root'
4223 )
4224 parser.set_defaults(
4225 # we want to hold on to this, for later
4226 prog=parser.prog,
4227 )
4228
4229 subparsers = parser.add_subparsers(
4230 title='subcommands',
4231 description='valid subcommands',
4232 help='sub-command help',
4233 )
4234
4235 Prepare.set_subparser(subparsers)
4236 make_activate_parser(subparsers)
4237 make_activate_block_parser(subparsers)
4238 make_activate_journal_parser(subparsers)
4239 make_activate_all_parser(subparsers)
4240 make_list_parser(subparsers)
4241 make_suppress_parser(subparsers)
4242 make_deactivate_parser(subparsers)
4243 make_destroy_parser(subparsers)
4244 make_zap_parser(subparsers)
4245 make_trigger_parser(subparsers)
4246
4247 args = parser.parse_args(argv)
4248 return args
4249
4250
4251def make_trigger_parser(subparsers):
4252 trigger_parser = subparsers.add_parser(
4253 'trigger',
4254 help='Trigger an event (caled by udev)')
4255 trigger_parser.add_argument(
4256 'dev',
4257 help=('device'),
4258 )
4259 trigger_parser.add_argument(
4260 '--sync',
4261 action='store_true', default=None,
4262 help=('do operation synchronously; do not trigger systemd'),
4263 )
4264 trigger_parser.set_defaults(
4265 func=main_trigger,
4266 )
4267 return trigger_parser
4268
4269
4270def make_activate_parser(subparsers):
4271 activate_parser = subparsers.add_parser(
4272 'activate',
4273 help='Activate a Ceph OSD')
4274 activate_parser.add_argument(
4275 '--mount',
4276 action='store_true', default=None,
4277 help='mount a block device [deprecated, ignored]',
4278 )
4279 activate_parser.add_argument(
4280 '--activate-key',
4281 metavar='PATH',
4282 help='bootstrap-osd keyring path template (%(default)s)',
4283 dest='activate_key_template',
4284 )
4285 activate_parser.add_argument(
4286 '--mark-init',
4287 metavar='INITSYSTEM',
4288 help='init system to manage this dir',
4289 default='auto',
4290 choices=INIT_SYSTEMS,
4291 )
4292 activate_parser.add_argument(
4293 '--no-start-daemon',
4294 action='store_true', default=None,
4295 help='do not start the daemon',
4296 )
4297 activate_parser.add_argument(
4298 'path',
4299 metavar='PATH',
4300 help='path to block device or directory',
4301 )
4302 activate_parser.add_argument(
4303 '--dmcrypt',
4304 action='store_true', default=None,
4305 help='map DATA and/or JOURNAL devices with dm-crypt',
4306 )
4307 activate_parser.add_argument(
4308 '--dmcrypt-key-dir',
4309 metavar='KEYDIR',
4310 default='/etc/ceph/dmcrypt-keys',
4311 help='directory where dm-crypt keys are stored',
4312 )
4313 activate_parser.add_argument(
4314 '--reactivate',
4315 action='store_true', default=False,
4316 help='activate the deactived OSD',
4317 )
4318 activate_parser.set_defaults(
4319 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
4320 func=main_activate,
4321 )
4322 return activate_parser
4323
4324
4325def make_activate_block_parser(subparsers):
4326 return make_activate_space_parser('block', subparsers)
4327
4328
4329def make_activate_journal_parser(subparsers):
4330 return make_activate_space_parser('journal', subparsers)
4331
4332
4333def make_activate_space_parser(name, subparsers):
4334 activate_space_parser = subparsers.add_parser(
4335 'activate-%s' % name,
4336 help='Activate an OSD via its %s device' % name)
4337 activate_space_parser.add_argument(
4338 'dev',
4339 metavar='DEV',
4340 help='path to %s block device' % name,
4341 )
4342 activate_space_parser.add_argument(
4343 '--activate-key',
4344 metavar='PATH',
4345 help='bootstrap-osd keyring path template (%(default)s)',
4346 dest='activate_key_template',
4347 )
4348 activate_space_parser.add_argument(
4349 '--mark-init',
4350 metavar='INITSYSTEM',
4351 help='init system to manage this dir',
4352 default='auto',
4353 choices=INIT_SYSTEMS,
4354 )
4355 activate_space_parser.add_argument(
4356 '--dmcrypt',
4357 action='store_true', default=None,
4358 help=('map data and/or auxiliariy (journal, etc.) '
4359 'devices with dm-crypt'),
4360 )
4361 activate_space_parser.add_argument(
4362 '--dmcrypt-key-dir',
4363 metavar='KEYDIR',
4364 default='/etc/ceph/dmcrypt-keys',
4365 help='directory where dm-crypt keys are stored',
4366 )
4367 activate_space_parser.add_argument(
4368 '--reactivate',
4369 action='store_true', default=False,
4370 help='activate the deactived OSD',
4371 )
4372 activate_space_parser.set_defaults(
4373 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
4374 func=lambda args: main_activate_space(name, args),
4375 )
4376 return activate_space_parser
4377
4378
4379def make_activate_all_parser(subparsers):
4380 activate_all_parser = subparsers.add_parser(
4381 'activate-all',
4382 help='Activate all tagged OSD partitions')
4383 activate_all_parser.add_argument(
4384 '--activate-key',
4385 metavar='PATH',
4386 help='bootstrap-osd keyring path template (%(default)s)',
4387 dest='activate_key_template',
4388 )
4389 activate_all_parser.add_argument(
4390 '--mark-init',
4391 metavar='INITSYSTEM',
4392 help='init system to manage this dir',
4393 default='auto',
4394 choices=INIT_SYSTEMS,
4395 )
4396 activate_all_parser.set_defaults(
4397 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
4398 func=main_activate_all,
4399 )
4400 return activate_all_parser
4401
4402
4403def make_list_parser(subparsers):
4404 list_parser = subparsers.add_parser(
4405 'list',
4406 help='List disks, partitions, and Ceph OSDs')
4407 list_parser.add_argument(
4408 '--format',
4409 help='output format',
4410 default='plain',
4411 choices=['json', 'plain'],
4412 )
4413 list_parser.add_argument(
4414 'path',
4415 metavar='PATH',
4416 nargs='*',
4417 help='path to block devices, relative to /sys/block',
4418 )
4419 list_parser.set_defaults(
4420 func=main_list,
4421 )
4422 return list_parser
4423
4424
4425def make_suppress_parser(subparsers):
4426 suppress_parser = subparsers.add_parser(
4427 'suppress-activate',
4428 help='Suppress activate on a device (prefix)')
4429 suppress_parser.add_argument(
4430 'path',
4431 metavar='PATH',
4432 help='path to block device or directory',
4433 )
4434 suppress_parser.set_defaults(
4435 func=main_suppress,
4436 )
4437
4438 unsuppress_parser = subparsers.add_parser(
4439 'unsuppress-activate',
4440 help='Stop suppressing activate on a device (prefix)')
4441 unsuppress_parser.add_argument(
4442 'path',
4443 metavar='PATH',
4444 help='path to block device or directory',
4445 )
4446 unsuppress_parser.set_defaults(
4447 func=main_unsuppress,
4448 )
4449 return suppress_parser
4450
4451
4452def make_deactivate_parser(subparsers):
4453 deactivate_parser = subparsers.add_parser(
4454 'deactivate',
4455 help='Deactivate a Ceph OSD')
4456 deactivate_parser.add_argument(
4457 '--cluster',
4458 metavar='NAME',
4459 default='ceph',
4460 help='cluster name to assign this disk to',
4461 )
4462 deactivate_parser.add_argument(
4463 'path',
4464 metavar='PATH',
4465 nargs='?',
4466 help='path to block device or directory',
4467 )
4468 deactivate_parser.add_argument(
4469 '--deactivate-by-id',
4470 metavar='<id>',
4471 help='ID of OSD to deactive'
4472 )
4473 deactivate_parser.add_argument(
4474 '--mark-out',
4475 action='store_true', default=False,
4476 help='option to mark the osd out',
4477 )
4478 deactivate_parser.set_defaults(
4479 func=main_deactivate,
4480 )
4481
4482
4483def make_destroy_parser(subparsers):
4484 destroy_parser = subparsers.add_parser(
4485 'destroy',
4486 help='Destroy a Ceph OSD')
4487 destroy_parser.add_argument(
4488 '--cluster',
4489 metavar='NAME',
4490 default='ceph',
4491 help='cluster name to assign this disk to',
4492 )
4493 destroy_parser.add_argument(
4494 'path',
4495 metavar='PATH',
4496 nargs='?',
4497 help='path to block device or directory',
4498 )
4499 destroy_parser.add_argument(
4500 '--destroy-by-id',
4501 metavar='<id>',
4502 help='ID of OSD to destroy'
4503 )
4504 destroy_parser.add_argument(
4505 '--dmcrypt-key-dir',
4506 metavar='KEYDIR',
4507 default='/etc/ceph/dmcrypt-keys',
4508 help=('directory where dm-crypt keys are stored '
4509 '(If you don\'t know how it work, '
4510 'dont use it. we have default value)'),
4511 )
4512 destroy_parser.add_argument(
4513 '--zap',
4514 action='store_true', default=False,
4515 help='option to erase data and partition',
4516 )
4517 destroy_parser.set_defaults(
4518 func=main_destroy,
4519 )
4520
4521
4522def make_zap_parser(subparsers):
4523 zap_parser = subparsers.add_parser(
4524 'zap',
4525 help='Zap/erase/destroy a device\'s partition table (and contents)')
4526 zap_parser.add_argument(
4527 'dev',
4528 metavar='DEV',
4529 nargs='+',
4530 help='path to block device',
4531 )
4532 zap_parser.set_defaults(
4533 func=main_zap,
4534 )
4535 return zap_parser
4536
4537
4538def main(argv):
4539 args = parse_args(argv)
4540
4541 setup_logging(args.verbose, args.log_stdout)
4542
4543 if args.prepend_to_path != '':
4544 path = os.environ.get('PATH', os.defpath)
4545 os.environ['PATH'] = args.prepend_to_path + ":" + path
4546
4547 setup_statedir(args.statedir)
4548 setup_sysconfdir(args.sysconfdir)
4549
4550 global CEPH_PREF_USER
4551 CEPH_PREF_USER = args.setuser
4552 global CEPH_PREF_GROUP
4553 CEPH_PREF_GROUP = args.setgroup
4554
4555 if args.verbose:
4556 args.func(args)
4557 else:
4558 main_catch(args.func, args)
4559
4560
4561def setup_logging(verbose, log_stdout):
4562 loglevel = logging.WARNING
4563 if verbose:
4564 loglevel = logging.DEBUG
4565
4566 if log_stdout:
4567 ch = logging.StreamHandler(stream=sys.stdout)
4568 ch.setLevel(loglevel)
4569 formatter = logging.Formatter('%(filename)s: %(message)s')
4570 ch.setFormatter(formatter)
4571 LOG.addHandler(ch)
4572 LOG.setLevel(loglevel)
4573 else:
4574 logging.basicConfig(
4575 level=loglevel,
4576 )
4577
4578
4579def main_catch(func, args):
4580
4581 try:
4582 func(args)
4583
4584 except Error as e:
4585 raise SystemExit(
4586 '{prog}: {msg}'.format(
4587 prog=args.prog,
4588 msg=e,
4589 )
4590 )
4591
4592 except CephDiskException as error:
4593 exc_name = error.__class__.__name__
4594 raise SystemExit(
4595 '{prog} {exc_name}: {msg}'.format(
4596 prog=args.prog,
4597 exc_name=exc_name,
4598 msg=error,
4599 )
4600 )
4601
4602
4603def run():
4604 main(sys.argv[1:])
4605
4606if __name__ == '__main__':
4607 main(sys.argv[1:])
4608 warned_about = {}