]> git.proxmox.com Git - ceph.git/blob - ceph/qa/workunits/ceph-disk/ceph-disk-no-lockbox
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / qa / workunits / ceph-disk / ceph-disk-no-lockbox
1 #!/usr/bin/env python
2 #
3 # Copyright (C) 2015 Red Hat <contact@redhat.com>
4 # Copyright (C) 2014 Inktank <info@inktank.com>
5 # Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
6 # Copyright (C) 2014 Catalyst.net Ltd
7 #
8 # Author: Loic Dachary <loic@dachary.org>
9 #
10 # This program is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU Library Public License as published by
12 # the Free Software Foundation; either version 2, or (at your option)
13 # any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU Library Public License for more details.
19 #
20 # THIS IS ceph-disk AS OF dc5a9053ce69c0630091774f16ce421da67d26fb v10.0.3-2247-gdc5a905
21 # PRIOR TO THE INTRODUCTION OF THE LOCKBOX VOLUME TO STORE KEY FETCHING
22 # STRATEGIES
23 #
24 import argparse
25 import errno
26 import fcntl
27 import json
28 import logging
29 import os
30 import platform
31 import re
32 import subprocess
33 import stat
34 import sys
35 import tempfile
36 import uuid
37 import time
38 import shlex
39 import pwd
40 import grp
41
42 CEPH_OSD_ONDISK_MAGIC = 'ceph osd volume v026'
43
44 PTYPE = {
45 'regular': {
46 'journal': {
47 # identical because creating a journal is atomic
48 'ready': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
49 'tobe': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
50 },
51 'block': {
52 # identical because creating a block is atomic
53 'ready': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
54 'tobe': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
55 },
56 'osd': {
57 'ready': '4fbd7e29-9d25-41b8-afd0-062c0ceff05d',
58 'tobe': '89c57f98-2fe5-4dc0-89c1-f3ad0ceff2be',
59 },
60 },
61 'luks': {
62 'journal': {
63 'ready': '45b0969e-9b03-4f30-b4c6-35865ceff106',
64 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
65 },
66 'block': {
67 'ready': 'cafecafe-9b03-4f30-b4c6-35865ceff106',
68 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
69 },
70 'osd': {
71 'ready': '4fbd7e29-9d25-41b8-afd0-35865ceff05d',
72 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
73 },
74 },
75 'plain': {
76 'journal': {
77 'ready': '45b0969e-9b03-4f30-b4c6-5ec00ceff106',
78 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
79 },
80 'block': {
81 'ready': 'cafecafe-9b03-4f30-b4c6-5ec00ceff106',
82 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
83 },
84 'osd': {
85 'ready': '4fbd7e29-9d25-41b8-afd0-5ec00ceff05d',
86 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
87 },
88 },
89 'mpath': {
90 'journal': {
91 'ready': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
92 'tobe': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
93 },
94 'block': {
95 'ready': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
96 'tobe': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
97 },
98 'osd': {
99 'ready': '4fbd7e29-8ae0-4982-bf9d-5a8d867af560',
100 'tobe': '89c57f98-8ae0-4982-bf9d-5a8d867af560',
101 },
102 },
103 }
104
105
106 class Ptype(object):
107
108 @staticmethod
109 def get_ready_by_type(what):
110 return [x['ready'] for x in PTYPE[what].values()]
111
112 @staticmethod
113 def get_ready_by_name(name):
114 return [x[name]['ready'] for x in PTYPE.values()]
115
116 @staticmethod
117 def is_regular_space(ptype):
118 return Ptype.is_what_space('regular', ptype)
119
120 @staticmethod
121 def is_mpath_space(ptype):
122 return Ptype.is_what_space('mpath', ptype)
123
124 @staticmethod
125 def is_plain_space(ptype):
126 return Ptype.is_what_space('plain', ptype)
127
128 @staticmethod
129 def is_luks_space(ptype):
130 return Ptype.is_what_space('luks', ptype)
131
132 @staticmethod
133 def is_what_space(what, ptype):
134 for name in Space.NAMES:
135 if ptype == PTYPE[what][name]['ready']:
136 return True
137 return False
138
139 @staticmethod
140 def space_ptype_to_name(ptype):
141 for what in PTYPE.values():
142 for name in Space.NAMES:
143 if ptype == what[name]['ready']:
144 return name
145 raise ValueError('ptype ' + ptype + ' not found')
146
147 @staticmethod
148 def is_dmcrypt_space(ptype):
149 for name in Space.NAMES:
150 if Ptype.is_dmcrypt(ptype, name):
151 return True
152 return False
153
154 @staticmethod
155 def is_dmcrypt(ptype, name):
156 for what in ('plain', 'luks'):
157 if ptype == PTYPE[what][name]['ready']:
158 return True
159 return False
160
161 DEFAULT_FS_TYPE = 'xfs'
162 SYSFS = '/sys'
163
164 """
165 OSD STATUS Definition
166 """
167 OSD_STATUS_OUT_DOWN = 0
168 OSD_STATUS_OUT_UP = 1
169 OSD_STATUS_IN_DOWN = 2
170 OSD_STATUS_IN_UP = 3
171
172 MOUNT_OPTIONS = dict(
173 btrfs='noatime,user_subvol_rm_allowed',
174 # user_xattr is default ever since linux 2.6.39 / 3.0, but we'll
175 # delay a moment before removing it fully because we did have some
176 # issues with ext4 before the xatts-in-leveldb work, and it seemed
177 # that user_xattr helped
178 ext4='noatime,user_xattr',
179 xfs='noatime,inode64',
180 )
181
182 MKFS_ARGS = dict(
183 btrfs=[
184 # btrfs requires -f, for the same reason as xfs (see comment below)
185 '-f',
186 '-m', 'single',
187 '-l', '32768',
188 '-n', '32768',
189 ],
190 xfs=[
191 # xfs insists on not overwriting previous fs; even if we wipe
192 # partition table, we often recreate it exactly the same way,
193 # so we'll see ghosts of filesystems past
194 '-f',
195 '-i', 'size=2048',
196 ],
197 )
198
199 INIT_SYSTEMS = [
200 'upstart',
201 'sysvinit',
202 'systemd',
203 'auto',
204 'none',
205 ]
206
207 STATEDIR = '/var/lib/ceph'
208
209 SYSCONFDIR = '/etc/ceph'
210
211 prepare_lock = None
212 activate_lock = None
213 SUPPRESS_PREFIX = None
214
215 # only warn once about some things
216 warned_about = {}
217
218 # Nuke the TERM variable to avoid confusing any subprocesses we call.
219 # For example, libreadline will print weird control sequences for some
220 # TERM values.
221 if 'TERM' in os.environ:
222 del os.environ['TERM']
223
224 LOG_NAME = __name__
225 if LOG_NAME == '__main__':
226 LOG_NAME = os.path.basename(sys.argv[0])
227 LOG = logging.getLogger(LOG_NAME)
228
229 # Allow user-preferred values for subprocess user and group
230 CEPH_PREF_USER = None
231 CEPH_PREF_GROUP = None
232
233
234 class filelock(object):
235 def __init__(self, fn):
236 self.fn = fn
237 self.fd = None
238
239 def acquire(self):
240 assert not self.fd
241 self.fd = file(self.fn, 'w')
242 fcntl.lockf(self.fd, fcntl.LOCK_EX)
243
244 def release(self):
245 assert self.fd
246 fcntl.lockf(self.fd, fcntl.LOCK_UN)
247 self.fd = None
248
249
250 class Error(Exception):
251 """
252 Error
253 """
254
255 def __str__(self):
256 doc = self.__doc__.strip()
257 return ': '.join([doc] + [str(a) for a in self.args])
258
259
260 class MountError(Error):
261 """
262 Mounting filesystem failed
263 """
264
265
266 class UnmountError(Error):
267 """
268 Unmounting filesystem failed
269 """
270
271
272 class BadMagicError(Error):
273 """
274 Does not look like a Ceph OSD, or incompatible version
275 """
276
277
278 class TruncatedLineError(Error):
279 """
280 Line is truncated
281 """
282
283
284 class TooManyLinesError(Error):
285 """
286 Too many lines
287 """
288
289
290 class FilesystemTypeError(Error):
291 """
292 Cannot discover filesystem type
293 """
294
295
296 class CephDiskException(Exception):
297 """
298 A base exception for ceph-disk to provide custom (ad-hoc) messages that
299 will be caught and dealt with when main() is executed
300 """
301 pass
302
303
304 class ExecutableNotFound(CephDiskException):
305 """
306 Exception to report on executables not available in PATH
307 """
308 pass
309
310
311 def is_systemd():
312 """
313 Detect whether systemd is running
314 """
315 with file('/proc/1/comm', 'rb') as i:
316 for line in i:
317 if 'systemd' in line:
318 return True
319 return False
320
321
322 def is_upstart():
323 """
324 Detect whether upstart is running
325 """
326 (out, err, _) = command(['init', '--version'])
327 if 'upstart' in out:
328 return True
329 return False
330
331
332 def maybe_mkdir(*a, **kw):
333 """
334 Creates a new directory if it doesn't exist, removes
335 existing symlink before creating the directory.
336 """
337 # remove any symlink, if it is there..
338 if os.path.exists(*a) and stat.S_ISLNK(os.lstat(*a).st_mode):
339 LOG.debug('Removing old symlink at %s', *a)
340 os.unlink(*a)
341 try:
342 os.mkdir(*a, **kw)
343 except OSError, e:
344 if e.errno == errno.EEXIST:
345 pass
346 else:
347 raise
348
349
350 def which(executable):
351 """find the location of an executable"""
352 if 'PATH' in os.environ:
353 envpath = os.environ['PATH']
354 else:
355 envpath = os.defpath
356 PATH = envpath.split(os.pathsep)
357
358 locations = PATH + [
359 '/usr/local/bin',
360 '/bin',
361 '/usr/bin',
362 '/usr/local/sbin',
363 '/usr/sbin',
364 '/sbin',
365 ]
366
367 for location in locations:
368 executable_path = os.path.join(location, executable)
369 if (os.path.isfile(executable_path) and
370 os.access(executable_path, os.X_OK)):
371 return executable_path
372
373
374 def _get_command_executable(arguments):
375 """
376 Return the full path for an executable, raise if the executable is not
377 found. If the executable has already a full path do not perform any checks.
378 """
379 if arguments[0].startswith('/'): # an absolute path
380 return arguments
381 executable = which(arguments[0])
382 if not executable:
383 command_msg = 'Could not run command: %s' % ' '.join(arguments)
384 executable_msg = '%s not in path.' % arguments[0]
385 raise ExecutableNotFound('%s %s' % (executable_msg, command_msg))
386
387 # swap the old executable for the new one
388 arguments[0] = executable
389 return arguments
390
391
392 def command(arguments, **kwargs):
393 """
394 Safely execute a ``subprocess.Popen`` call making sure that the
395 executable exists and raising a helpful error message
396 if it does not.
397
398 .. note:: This should be the preferred way of calling ``subprocess.Popen``
399 since it provides the caller with the safety net of making sure that
400 executables *will* be found and will error nicely otherwise.
401
402 This returns the output of the command and the return code of the
403 process in a tuple: (output, returncode).
404 """
405 arguments = _get_command_executable(arguments)
406 LOG.info('Running command: %s' % ' '.join(arguments))
407 process = subprocess.Popen(
408 arguments,
409 stdout=subprocess.PIPE,
410 stderr=subprocess.PIPE,
411 **kwargs)
412 out, err = process.communicate()
413 return out, err, process.returncode
414
415
416 def command_check_call(arguments):
417 """
418 Safely execute a ``subprocess.check_call`` call making sure that the
419 executable exists and raising a helpful error message if it does not.
420
421 .. note:: This should be the preferred way of calling
422 ``subprocess.check_call`` since it provides the caller with the safety net
423 of making sure that executables *will* be found and will error nicely
424 otherwise.
425 """
426 arguments = _get_command_executable(arguments)
427 LOG.info('Running command: %s', ' '.join(arguments))
428 return subprocess.check_call(arguments)
429
430
431 def platform_distro():
432 """
433 Returns a normalized, lower case string without any leading nor trailing
434 whitespace that represents the distribution name of the current machine.
435 """
436 distro = platform_information()[0] or ''
437 return distro.strip().lower()
438
439
440 def platform_information():
441 distro, release, codename = platform.linux_distribution()
442 # this could be an empty string in Debian
443 if not codename and 'debian' in distro.lower():
444 debian_codenames = {
445 '8': 'jessie',
446 '7': 'wheezy',
447 '6': 'squeeze',
448 }
449 major_version = release.split('.')[0]
450 codename = debian_codenames.get(major_version, '')
451
452 # In order to support newer jessie/sid or wheezy/sid strings we test
453 # this if sid is buried in the minor, we should use sid anyway.
454 if not codename and '/' in release:
455 major, minor = release.split('/')
456 if minor == 'sid':
457 codename = minor
458 else:
459 codename = major
460
461 return (
462 str(distro).strip(),
463 str(release).strip(),
464 str(codename).strip()
465 )
466
467 #
468 # An alternative block_path implementation would be
469 #
470 # name = basename(dev)
471 # return /sys/devices/virtual/block/$name
472 #
473 # It is however more fragile because it relies on the fact
474 # that the basename of the device the user will use always
475 # matches the one the driver will use. On Ubuntu 14.04, for
476 # instance, when multipath creates a partition table on
477 #
478 # /dev/mapper/353333330000007d0 -> ../dm-0
479 #
480 # it will create partition devices named
481 #
482 # /dev/mapper/353333330000007d0-part1
483 #
484 # which is the same device as /dev/dm-1 but not a symbolic
485 # link to it:
486 #
487 # ubuntu@other:~$ ls -l /dev/mapper /dev/dm-1
488 # brw-rw---- 1 root disk 252, 1 Aug 15 17:52 /dev/dm-1
489 # lrwxrwxrwx 1 root root 7 Aug 15 17:52 353333330000007d0 -> ../dm-0
490 # brw-rw---- 1 root disk 252, 1 Aug 15 17:52 353333330000007d0-part1
491 #
492 # Using the basename in this case fails.
493 #
494
495
496 def block_path(dev):
497 path = os.path.realpath(dev)
498 rdev = os.stat(path).st_rdev
499 (M, m) = (os.major(rdev), os.minor(rdev))
500 return "{sysfs}/dev/block/{M}:{m}".format(sysfs=SYSFS, M=M, m=m)
501
502
503 def get_dm_uuid(dev):
504 uuid_path = os.path.join(block_path(dev), 'dm', 'uuid')
505 LOG.debug("get_dm_uuid " + dev + " uuid path is " + uuid_path)
506 if not os.path.exists(uuid_path):
507 return False
508 uuid = open(uuid_path, 'r').read()
509 LOG.debug("get_dm_uuid " + dev + " uuid is " + uuid)
510 return uuid
511
512
513 def is_mpath(dev):
514 """
515 True if the path is managed by multipath
516 """
517 uuid = get_dm_uuid(dev)
518 return (uuid and
519 (re.match('part\d+-mpath-', uuid) or
520 re.match('mpath-', uuid)))
521
522
523 def get_dev_name(path):
524 """
525 get device name from path. e.g.::
526
527 /dev/sda -> sdas, /dev/cciss/c0d1 -> cciss!c0d1
528
529 a device "name" is something like::
530
531 sdb
532 cciss!c0d1
533
534 """
535 assert path.startswith('/dev/')
536 base = path[5:]
537 return base.replace('/', '!')
538
539
540 def get_dev_path(name):
541 """
542 get a path (/dev/...) from a name (cciss!c0d1)
543 a device "path" is something like::
544
545 /dev/sdb
546 /dev/cciss/c0d1
547
548 """
549 return '/dev/' + name.replace('!', '/')
550
551
552 def get_dev_relpath(name):
553 """
554 get a relative path to /dev from a name (cciss!c0d1)
555 """
556 return name.replace('!', '/')
557
558
559 def get_dev_size(dev, size='megabytes'):
560 """
561 Attempt to get the size of a device so that we can prevent errors
562 from actions to devices that are smaller, and improve error reporting.
563
564 Because we want to avoid breakage in case this approach is not robust, we
565 will issue a warning if we failed to get the size.
566
567 :param size: bytes or megabytes
568 :param dev: the device to calculate the size
569 """
570 fd = os.open(dev, os.O_RDONLY)
571 dividers = {'bytes': 1, 'megabytes': 1024 * 1024}
572 try:
573 device_size = os.lseek(fd, 0, os.SEEK_END)
574 divider = dividers.get(size, 1024 * 1024) # default to megabytes
575 return device_size / divider
576 except Exception as error:
577 LOG.warning('failed to get size of %s: %s' % (dev, str(error)))
578 finally:
579 os.close(fd)
580
581
582 def get_partition_mpath(dev, pnum):
583 part_re = "part{pnum}-mpath-".format(pnum=pnum)
584 partitions = list_partitions_mpath(dev, part_re)
585 if partitions:
586 return partitions[0]
587 else:
588 return None
589
590
591 def get_partition_dev(dev, pnum):
592 """
593 get the device name for a partition
594
595 assume that partitions are named like the base dev,
596 with a number, and optionally
597 some intervening characters (like 'p'). e.g.,
598
599 sda 1 -> sda1
600 cciss/c0d1 1 -> cciss!c0d1p1
601 """
602 partname = None
603 if is_mpath(dev):
604 partname = get_partition_mpath(dev, pnum)
605 else:
606 name = get_dev_name(os.path.realpath(dev))
607 for f in os.listdir(os.path.join('/sys/block', name)):
608 if f.startswith(name) and f.endswith(str(pnum)):
609 # we want the shortest name that starts with the base name
610 # and ends with the partition number
611 if not partname or len(f) < len(partname):
612 partname = f
613 if partname:
614 return get_dev_path(partname)
615 else:
616 raise Error('partition %d for %s does not appear to exist' %
617 (pnum, dev))
618
619
620 def list_all_partitions():
621 """
622 Return a list of devices and partitions
623 """
624 names = os.listdir('/sys/block')
625 dev_part_list = {}
626 for name in names:
627 # /dev/fd0 may hang http://tracker.ceph.com/issues/6827
628 if re.match(r'^fd\d$', name):
629 continue
630 dev_part_list[name] = list_partitions(get_dev_path(name))
631 return dev_part_list
632
633
634 def list_partitions(dev):
635 dev = os.path.realpath(dev)
636 if is_mpath(dev):
637 return list_partitions_mpath(dev)
638 else:
639 return list_partitions_device(dev)
640
641
642 def list_partitions_mpath(dev, part_re="part\d+-mpath-"):
643 p = block_path(dev)
644 partitions = []
645 holders = os.path.join(p, 'holders')
646 for holder in os.listdir(holders):
647 uuid_path = os.path.join(holders, holder, 'dm', 'uuid')
648 uuid = open(uuid_path, 'r').read()
649 LOG.debug("list_partitions_mpath: " + uuid_path + " uuid = " + uuid)
650 if re.match(part_re, uuid):
651 partitions.append(holder)
652 return partitions
653
654
655 def list_partitions_device(dev):
656 """
657 Return a list of partitions on the given device name
658 """
659 partitions = []
660 basename = get_dev_name(dev)
661 for name in os.listdir(block_path(dev)):
662 if name.startswith(basename):
663 partitions.append(name)
664 return partitions
665
666
667 def get_partition_base(dev):
668 """
669 Get the base device for a partition
670 """
671 dev = os.path.realpath(dev)
672 if not stat.S_ISBLK(os.lstat(dev).st_mode):
673 raise Error('not a block device', dev)
674
675 name = get_dev_name(dev)
676 if os.path.exists(os.path.join('/sys/block', name)):
677 raise Error('not a partition', dev)
678
679 # find the base
680 for basename in os.listdir('/sys/block'):
681 if os.path.exists(os.path.join('/sys/block', basename, name)):
682 return get_dev_path(basename)
683 raise Error('no parent device for partition', dev)
684
685
686 def is_partition_mpath(dev):
687 uuid = get_dm_uuid(dev)
688 return bool(re.match('part\d+-mpath-', uuid))
689
690
691 def partnum_mpath(dev):
692 uuid = get_dm_uuid(dev)
693 return re.findall('part(\d+)-mpath-', uuid)[0]
694
695
696 def get_partition_base_mpath(dev):
697 slave_path = os.path.join(block_path(dev), 'slaves')
698 slaves = os.listdir(slave_path)
699 assert slaves
700 name_path = os.path.join(slave_path, slaves[0], 'dm', 'name')
701 name = open(name_path, 'r').read().strip()
702 return os.path.join('/dev/mapper', name)
703
704
705 def is_partition(dev):
706 """
707 Check whether a given device path is a partition or a full disk.
708 """
709 if is_mpath(dev):
710 return is_partition_mpath(dev)
711
712 dev = os.path.realpath(dev)
713 st = os.lstat(dev)
714 if not stat.S_ISBLK(st.st_mode):
715 raise Error('not a block device', dev)
716
717 name = get_dev_name(dev)
718 if os.path.exists(os.path.join('/sys/block', name)):
719 return False
720
721 # make sure it is a partition of something else
722 major = os.major(st.st_rdev)
723 minor = os.minor(st.st_rdev)
724 if os.path.exists('/sys/dev/block/%d:%d/partition' % (major, minor)):
725 return True
726
727 raise Error('not a disk or partition', dev)
728
729
730 def is_mounted(dev):
731 """
732 Check if the given device is mounted.
733 """
734 dev = os.path.realpath(dev)
735 with file('/proc/mounts', 'rb') as proc_mounts:
736 for line in proc_mounts:
737 fields = line.split()
738 if len(fields) < 3:
739 continue
740 mounts_dev = fields[0]
741 path = fields[1]
742 if mounts_dev.startswith('/') and os.path.exists(mounts_dev):
743 mounts_dev = os.path.realpath(mounts_dev)
744 if mounts_dev == dev:
745 return path
746 return None
747
748
749 def is_held(dev):
750 """
751 Check if a device is held by another device (e.g., a dm-crypt mapping)
752 """
753 assert os.path.exists(dev)
754 if is_mpath(dev):
755 return []
756
757 dev = os.path.realpath(dev)
758 base = get_dev_name(dev)
759
760 # full disk?
761 directory = '/sys/block/{base}/holders'.format(base=base)
762 if os.path.exists(directory):
763 return os.listdir(directory)
764
765 # partition?
766 part = base
767 while len(base):
768 directory = '/sys/block/{base}/{part}/holders'.format(
769 part=part, base=base)
770 if os.path.exists(directory):
771 return os.listdir(directory)
772 base = base[:-1]
773 return []
774
775
776 def verify_not_in_use(dev, check_partitions=False):
777 """
778 Verify if a given device (path) is in use (e.g. mounted or
779 in use by device-mapper).
780
781 :raises: Error if device is in use.
782 """
783 assert os.path.exists(dev)
784 if is_mounted(dev):
785 raise Error('Device is mounted', dev)
786 holders = is_held(dev)
787 if holders:
788 raise Error('Device %s is in use by a device-mapper '
789 'mapping (dm-crypt?)' % dev, ','.join(holders))
790
791 if check_partitions and not is_partition(dev):
792 for partname in list_partitions(dev):
793 partition = get_dev_path(partname)
794 if is_mounted(partition):
795 raise Error('Device is mounted', partition)
796 holders = is_held(partition)
797 if holders:
798 raise Error('Device %s is in use by a device-mapper '
799 'mapping (dm-crypt?)'
800 % partition, ','.join(holders))
801
802
803 def must_be_one_line(line):
804 """
805 Checks if given line is really one single line.
806
807 :raises: TruncatedLineError or TooManyLinesError
808 :return: Content of the line, or None if line isn't valid.
809 """
810 if line[-1:] != '\n':
811 raise TruncatedLineError(line)
812 line = line[:-1]
813 if '\n' in line:
814 raise TooManyLinesError(line)
815 return line
816
817
818 def read_one_line(parent, name):
819 """
820 Read a file whose sole contents are a single line.
821
822 Strips the newline.
823
824 :return: Contents of the line, or None if file did not exist.
825 """
826 path = os.path.join(parent, name)
827 try:
828 line = file(path, 'rb').read()
829 except IOError as e:
830 if e.errno == errno.ENOENT:
831 return None
832 else:
833 raise
834
835 try:
836 line = must_be_one_line(line)
837 except (TruncatedLineError, TooManyLinesError) as e:
838 raise Error(
839 'File is corrupt: {path}: {msg}'.format(
840 path=path,
841 msg=e,
842 )
843 )
844 return line
845
846
847 def write_one_line(parent, name, text):
848 """
849 Write a file whose sole contents are a single line.
850
851 Adds a newline.
852 """
853 path = os.path.join(parent, name)
854 tmp = '{path}.{pid}.tmp'.format(path=path, pid=os.getpid())
855 with file(tmp, 'wb') as tmp_file:
856 tmp_file.write(text + '\n')
857 os.fsync(tmp_file.fileno())
858 path_set_context(tmp)
859 os.rename(tmp, path)
860
861
862 def init_get():
863 """
864 Get a init system using 'ceph-detect-init'
865 """
866 init = _check_output(
867 args=[
868 'ceph-detect-init',
869 '--default', 'sysvinit',
870 ],
871 )
872 init = must_be_one_line(init)
873 return init
874
875
876 def check_osd_magic(path):
877 """
878 Check that this path has the Ceph OSD magic.
879
880 :raises: BadMagicError if this does not look like a Ceph OSD data
881 dir.
882 """
883 magic = read_one_line(path, 'magic')
884 if magic is None:
885 # probably not mkfs'ed yet
886 raise BadMagicError(path)
887 if magic != CEPH_OSD_ONDISK_MAGIC:
888 raise BadMagicError(path)
889
890
891 def check_osd_id(osd_id):
892 """
893 Ensures osd id is numeric.
894 """
895 if not re.match(r'^[0-9]+$', osd_id):
896 raise Error('osd id is not numeric', osd_id)
897
898
899 def allocate_osd_id(
900 cluster,
901 fsid,
902 keyring,
903 ):
904 """
905 Accocates an OSD id on the given cluster.
906
907 :raises: Error if the call to allocate the OSD id fails.
908 :return: The allocated OSD id.
909 """
910
911 LOG.debug('Allocating OSD id...')
912 try:
913 osd_id = _check_output(
914 args=[
915 'ceph',
916 '--cluster', cluster,
917 '--name', 'client.bootstrap-osd',
918 '--keyring', keyring,
919 'osd', 'create', '--concise',
920 fsid,
921 ],
922 )
923 except subprocess.CalledProcessError as e:
924 raise Error('ceph osd create failed', e, e.output)
925 osd_id = must_be_one_line(osd_id)
926 check_osd_id(osd_id)
927 return osd_id
928
929
930 def get_osd_id(path):
931 """
932 Gets the OSD id of the OSD at the given path.
933 """
934 osd_id = read_one_line(path, 'whoami')
935 if osd_id is not None:
936 check_osd_id(osd_id)
937 return osd_id
938
939
940 def get_ceph_user():
941 global CEPH_PREF_USER
942
943 if CEPH_PREF_USER is not None:
944 try:
945 pwd.getpwnam(CEPH_PREF_USER)
946 return CEPH_PREF_USER
947 except KeyError:
948 print "No such user: " + CEPH_PREF_USER
949 sys.exit(2)
950 else:
951 try:
952 pwd.getpwnam('ceph')
953 return 'ceph'
954 except KeyError:
955 return 'root'
956
957
958 def get_ceph_group():
959 global CEPH_PREF_GROUP
960
961 if CEPH_PREF_GROUP is not None:
962 try:
963 grp.getgrnam(CEPH_PREF_GROUP)
964 return CEPH_PREF_GROUP
965 except KeyError:
966 print "No such group: " + CEPH_PREF_GROUP
967 sys.exit(2)
968 else:
969 try:
970 grp.getgrnam('ceph')
971 return 'ceph'
972 except KeyError:
973 return 'root'
974
975
976 def path_set_context(path):
977 # restore selinux context to default policy values
978 if which('restorecon'):
979 command(['restorecon', '-R', path])
980
981 # if ceph user exists, set owner to ceph
982 if get_ceph_user() == 'ceph':
983 command(['chown', '-R', 'ceph:ceph', path])
984
985
986 def _check_output(args=None, **kwargs):
987 out, err, ret = command(args, **kwargs)
988 if ret:
989 cmd = args[0]
990 error = subprocess.CalledProcessError(ret, cmd)
991 error.output = out + err
992 raise error
993 return out
994
995
996 def get_conf(cluster, variable):
997 """
998 Get the value of the given configuration variable from the
999 cluster.
1000
1001 :raises: Error if call to ceph-conf fails.
1002 :return: The variable value or None.
1003 """
1004 try:
1005 out, err, ret = command(
1006 [
1007 'ceph-conf',
1008 '--cluster={cluster}'.format(
1009 cluster=cluster,
1010 ),
1011 '--name=osd.',
1012 '--lookup',
1013 variable,
1014 ],
1015 close_fds=True,
1016 )
1017 except OSError as e:
1018 raise Error('error executing ceph-conf', e, err)
1019 if ret == 1:
1020 # config entry not found
1021 return None
1022 elif ret != 0:
1023 raise Error('getting variable from configuration failed')
1024 value = out.split('\n', 1)[0]
1025 # don't differentiate between "var=" and no var set
1026 if not value:
1027 return None
1028 return value
1029
1030
1031 def get_conf_with_default(cluster, variable):
1032 """
1033 Get a config value that is known to the C++ code.
1034
1035 This will fail if called on variables that are not defined in
1036 common config options.
1037 """
1038 try:
1039 out = _check_output(
1040 args=[
1041 'ceph-osd',
1042 '--cluster={cluster}'.format(
1043 cluster=cluster,
1044 ),
1045 '--show-config-value={variable}'.format(
1046 variable=variable,
1047 ),
1048 ],
1049 close_fds=True,
1050 )
1051 except subprocess.CalledProcessError as e:
1052 raise Error(
1053 'getting variable from configuration failed',
1054 e,
1055 )
1056
1057 value = str(out).split('\n', 1)[0]
1058 return value
1059
1060
1061 def get_fsid(cluster):
1062 """
1063 Get the fsid of the cluster.
1064
1065 :return: The fsid or raises Error.
1066 """
1067 fsid = get_conf_with_default(cluster=cluster, variable='fsid')
1068 if fsid is None:
1069 raise Error('getting cluster uuid from configuration failed')
1070 return fsid.lower()
1071
1072
1073 def get_dmcrypt_key_path(
1074 _uuid,
1075 key_dir,
1076 luks
1077 ):
1078 """
1079 Get path to dmcrypt key file.
1080
1081 :return: Path to the dmcrypt key file, callers should check for existence.
1082 """
1083 if luks:
1084 path = os.path.join(key_dir, _uuid + ".luks.key")
1085 else:
1086 path = os.path.join(key_dir, _uuid)
1087
1088 return path
1089
1090
1091 def get_or_create_dmcrypt_key(
1092 _uuid,
1093 key_dir,
1094 key_size,
1095 luks
1096 ):
1097 """
1098 Get path to existing dmcrypt key or create a new key file.
1099
1100 :return: Path to the dmcrypt key file.
1101 """
1102 path = get_dmcrypt_key_path(_uuid, key_dir, luks)
1103 if os.path.exists(path):
1104 return path
1105
1106 # make a new key
1107 try:
1108 if not os.path.exists(key_dir):
1109 os.makedirs(key_dir, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
1110 with file('/dev/urandom', 'rb') as i:
1111 key = i.read(key_size / 8)
1112 fd = os.open(path, os.O_WRONLY | os.O_CREAT,
1113 stat.S_IRUSR | stat.S_IWUSR)
1114 assert os.write(fd, key) == len(key)
1115 os.close(fd)
1116 return path
1117 except:
1118 raise Error('unable to read or create dm-crypt key', path)
1119
1120
1121 def _dmcrypt_map(
1122 rawdev,
1123 keypath,
1124 _uuid,
1125 cryptsetup_parameters,
1126 luks,
1127 format_dev=False,
1128 ):
1129 """
1130 Maps a device to a dmcrypt device.
1131
1132 :return: Path to the dmcrypt device.
1133 """
1134 dev = '/dev/mapper/' + _uuid
1135 luksFormat_args = [
1136 'cryptsetup',
1137 '--batch-mode',
1138 '--key-file',
1139 keypath,
1140 'luksFormat',
1141 rawdev,
1142 ] + cryptsetup_parameters
1143
1144 luksOpen_args = [
1145 'cryptsetup',
1146 '--key-file',
1147 keypath,
1148 'luksOpen',
1149 rawdev,
1150 _uuid,
1151 ]
1152
1153 create_args = [
1154 'cryptsetup',
1155 '--key-file',
1156 keypath,
1157 'create',
1158 _uuid,
1159 rawdev,
1160 ] + cryptsetup_parameters
1161
1162 try:
1163 if luks:
1164 if format_dev:
1165 command_check_call(luksFormat_args)
1166 command_check_call(luksOpen_args)
1167 else:
1168 # Plain mode has no format function, nor any validation
1169 # that the key is correct.
1170 command_check_call(create_args)
1171 # set proper ownership of mapped device
1172 command_check_call(['chown', 'ceph:ceph', dev])
1173 return dev
1174
1175 except subprocess.CalledProcessError as e:
1176 raise Error('unable to map device', rawdev, e)
1177
1178
1179 def dmcrypt_unmap(
1180 _uuid
1181 ):
1182 """
1183 Removes the dmcrypt device with the given UUID.
1184 """
1185 retries = 0
1186 while True:
1187 try:
1188 command_check_call(['cryptsetup', 'remove', _uuid])
1189 break
1190 except subprocess.CalledProcessError as e:
1191 if retries == 10:
1192 raise Error('unable to unmap device', _uuid, e)
1193 else:
1194 time.sleep(0.5 + retries * 1.0)
1195 retries += 1
1196
1197
1198 def mount(
1199 dev,
1200 fstype,
1201 options,
1202 ):
1203 """
1204 Mounts a device with given filessystem type and
1205 mount options to a tempfile path under /var/lib/ceph/tmp.
1206 """
1207 # sanity check: none of the arguments are None
1208 if dev is None:
1209 raise ValueError('dev may not be None')
1210 if fstype is None:
1211 raise ValueError('fstype may not be None')
1212
1213 # pick best-of-breed mount options based on fs type
1214 if options is None:
1215 options = MOUNT_OPTIONS.get(fstype, '')
1216
1217 # mount
1218 path = tempfile.mkdtemp(
1219 prefix='mnt.',
1220 dir=STATEDIR + '/tmp',
1221 )
1222 try:
1223 LOG.debug('Mounting %s on %s with options %s', dev, path, options)
1224 command_check_call(
1225 [
1226 'mount',
1227 '-t', fstype,
1228 '-o', options,
1229 '--',
1230 dev,
1231 path,
1232 ],
1233 )
1234 if which('restorecon'):
1235 command(
1236 [
1237 'restorecon',
1238 path,
1239 ],
1240 )
1241 except subprocess.CalledProcessError as e:
1242 try:
1243 os.rmdir(path)
1244 except (OSError, IOError):
1245 pass
1246 raise MountError(e)
1247
1248 return path
1249
1250
1251 def unmount(
1252 path,
1253 ):
1254 """
1255 Unmount and removes the given mount point.
1256 """
1257 retries = 0
1258 while True:
1259 try:
1260 LOG.debug('Unmounting %s', path)
1261 command_check_call(
1262 [
1263 '/bin/umount',
1264 '--',
1265 path,
1266 ],
1267 )
1268 break
1269 except subprocess.CalledProcessError as e:
1270 # on failure, retry 3 times with incremental backoff
1271 if retries == 3:
1272 raise UnmountError(e)
1273 else:
1274 time.sleep(0.5 + retries * 1.0)
1275 retries += 1
1276
1277 os.rmdir(path)
1278
1279
1280 ###########################################
1281
1282 def extract_parted_partition_numbers(partitions):
1283 numbers_as_strings = re.findall('^\d+', partitions, re.MULTILINE)
1284 return map(int, numbers_as_strings)
1285
1286
1287 def get_free_partition_index(dev):
1288 """
1289 Get the next free partition index on a given device.
1290
1291 :return: Index number (> 1 if there is already a partition on the device)
1292 or 1 if there is no partition table.
1293 """
1294 try:
1295 lines = _check_output(
1296 args=[
1297 'parted',
1298 '--machine',
1299 '--',
1300 dev,
1301 'print',
1302 ],
1303 )
1304 except subprocess.CalledProcessError as e:
1305 LOG.info('cannot read partition index; assume it '
1306 'isn\'t present\n (Error: %s)' % e)
1307 return 1
1308
1309 if not lines:
1310 raise Error('parted failed to output anything')
1311 LOG.debug('get_free_partition_index: analyzing ' + lines)
1312 if ('CHS;' not in lines and
1313 'CYL;' not in lines and
1314 'BYT;' not in lines):
1315 raise Error('parted output expected to contain one of ' +
1316 'CHH; CYL; or BYT; : ' + lines)
1317 if os.path.realpath(dev) not in lines:
1318 raise Error('parted output expected to contain ' + dev + ': ' + lines)
1319 _, partitions = lines.split(os.path.realpath(dev))
1320 partition_numbers = extract_parted_partition_numbers(partitions)
1321 if partition_numbers:
1322 return max(partition_numbers) + 1
1323 else:
1324 return 1
1325
1326
1327 def check_journal_reqs(args):
1328 _, _, allows_journal = command([
1329 'ceph-osd', '--check-allows-journal',
1330 '-i', '0',
1331 '--cluster', args.cluster,
1332 ])
1333 _, _, wants_journal = command([
1334 'ceph-osd', '--check-wants-journal',
1335 '-i', '0',
1336 '--cluster', args.cluster,
1337 ])
1338 _, _, needs_journal = command([
1339 'ceph-osd', '--check-needs-journal',
1340 '-i', '0',
1341 '--cluster', args.cluster,
1342 ])
1343 return (not allows_journal, not wants_journal, not needs_journal)
1344
1345
1346 def update_partition(dev, description):
1347 """
1348 Must be called after modifying a partition table so the kernel
1349 know about the change and fire udev events accordingly. A side
1350 effect of partprobe is to remove partitions and add them again.
1351 The first udevadm settle waits for ongoing udev events to
1352 complete, just in case one of them rely on an existing partition
1353 on dev. The second udevadm settle guarantees to the caller that
1354 all udev events related to the partition table change have been
1355 processed, i.e. the 95-ceph-osd.rules actions and mode changes,
1356 group changes etc. are complete.
1357 """
1358 LOG.debug('Calling partprobe on %s device %s', description, dev)
1359 partprobe_ok = False
1360 error = 'unknown error'
1361 for i in (1, 2, 3, 4, 5):
1362 command_check_call(['udevadm', 'settle', '--timeout=600'])
1363 try:
1364 _check_output(['partprobe', dev])
1365 partprobe_ok = True
1366 break
1367 except subprocess.CalledProcessError as e:
1368 error = e.output
1369 if ('unable to inform the kernel' not in error and
1370 'Device or resource busy' not in error):
1371 raise
1372 LOG.debug('partprobe %s failed : %s (ignored, waiting 60s)'
1373 % (dev, error))
1374 time.sleep(60)
1375 if not partprobe_ok:
1376 raise Error('partprobe %s failed : %s' % (dev, error))
1377 command_check_call(['udevadm', 'settle', '--timeout=600'])
1378
1379
1380 def zap(dev):
1381 """
1382 Destroy the partition table and content of a given disk.
1383 """
1384 dev = os.path.realpath(dev)
1385 dmode = os.stat(dev).st_mode
1386 if not stat.S_ISBLK(dmode) or is_partition(dev):
1387 raise Error('not full block device; cannot zap', dev)
1388 try:
1389 LOG.debug('Zapping partition table on %s', dev)
1390
1391 # try to wipe out any GPT partition table backups. sgdisk
1392 # isn't too thorough.
1393 lba_size = 4096
1394 size = 33 * lba_size
1395 with file(dev, 'wb') as dev_file:
1396 dev_file.seek(-size, os.SEEK_END)
1397 dev_file.write(size * '\0')
1398
1399 command_check_call(
1400 [
1401 'sgdisk',
1402 '--zap-all',
1403 '--',
1404 dev,
1405 ],
1406 )
1407 command_check_call(
1408 [
1409 'sgdisk',
1410 '--clear',
1411 '--mbrtogpt',
1412 '--',
1413 dev,
1414 ],
1415 )
1416
1417 update_partition(dev, 'zapped')
1418
1419 except subprocess.CalledProcessError as e:
1420 raise Error(e)
1421
1422
1423 def adjust_symlink(target, path):
1424 create = True
1425 if os.path.lexists(path):
1426 try:
1427 mode = os.lstat(path).st_mode
1428 if stat.S_ISREG(mode):
1429 LOG.debug('Removing old file %s', path)
1430 os.unlink(path)
1431 elif stat.S_ISLNK(mode):
1432 old = os.readlink(path)
1433 if old != target:
1434 LOG.debug('Removing old symlink %s -> %s', path, old)
1435 os.unlink(path)
1436 else:
1437 create = False
1438 except:
1439 raise Error('unable to remove (or adjust) old file (symlink)',
1440 path)
1441 if create:
1442 LOG.debug('Creating symlink %s -> %s', path, target)
1443 try:
1444 os.symlink(target, path)
1445 except:
1446 raise Error('unable to create symlink %s -> %s' % (path, target))
1447
1448
1449 class Device(object):
1450
1451 def __init__(self, path, args):
1452 self.args = args
1453 self.path = path
1454 self.dev_size = None
1455 self.partitions = {}
1456 self.ptype_map = None
1457 assert not is_partition(self.path)
1458
1459 def create_partition(self, uuid, name, size=0, num=0):
1460 ptype = self.ptype_tobe_for_name(name)
1461 if num == 0:
1462 num = get_free_partition_index(dev=self.path)
1463 if size > 0:
1464 new = '--new={num}:0:+{size}M'.format(num=num, size=size)
1465 if size > self.get_dev_size():
1466 LOG.error('refusing to create %s on %s' % (name, self.path))
1467 LOG.error('%s size (%sM) is bigger than device (%sM)'
1468 % (name, size, self.get_dev_size()))
1469 raise Error('%s device size (%sM) is not big enough for %s'
1470 % (self.path, self.get_dev_size(), name))
1471 else:
1472 new = '--largest-new={num}'.format(num=num)
1473
1474 LOG.debug('Creating %s partition num %d size %d on %s',
1475 name, num, size, self.path)
1476 command_check_call(
1477 [
1478 'sgdisk',
1479 new,
1480 '--change-name={num}:ceph {name}'.format(num=num, name=name),
1481 '--partition-guid={num}:{uuid}'.format(num=num, uuid=uuid),
1482 '--typecode={num}:{uuid}'.format(num=num, uuid=ptype),
1483 '--mbrtogpt',
1484 '--',
1485 self.path,
1486 ]
1487 )
1488 update_partition(self.path, 'created')
1489 return num
1490
1491 def ptype_tobe_for_name(self, name):
1492 if name == 'data':
1493 name = 'osd'
1494 if self.ptype_map is None:
1495 partition = DevicePartition.factory(
1496 path=self.path, dev=None, args=self.args)
1497 self.ptype_map = partition.ptype_map
1498 return self.ptype_map[name]['tobe']
1499
1500 def get_partition(self, num):
1501 if num not in self.partitions:
1502 dev = get_partition_dev(self.path, num)
1503 partition = DevicePartition.factory(
1504 path=self.path, dev=dev, args=self.args)
1505 partition.set_partition_number(num)
1506 self.partitions[num] = partition
1507 return self.partitions[num]
1508
1509 def get_dev_size(self):
1510 if self.dev_size is None:
1511 self.dev_size = get_dev_size(self.path)
1512 return self.dev_size
1513
1514 @staticmethod
1515 def factory(path, args):
1516 return Device(path, args)
1517
1518
1519 class DevicePartition(object):
1520
1521 def __init__(self, args):
1522 self.args = args
1523 self.num = None
1524 self.rawdev = None
1525 self.dev = None
1526 self.uuid = None
1527 self.ptype_map = None
1528 self.ptype = None
1529 self.set_variables_ptype()
1530
1531 def get_uuid(self):
1532 if self.uuid is None:
1533 self.uuid = get_partition_uuid(self.rawdev)
1534 return self.uuid
1535
1536 def get_ptype(self):
1537 if self.ptype is None:
1538 self.ptype = get_partition_type(self.rawdev)
1539 return self.ptype
1540
1541 def set_partition_number(self, num):
1542 self.num = num
1543
1544 def get_partition_number(self):
1545 return self.num
1546
1547 def set_dev(self, dev):
1548 self.dev = dev
1549 self.rawdev = dev
1550
1551 def get_dev(self):
1552 return self.dev
1553
1554 def get_rawdev(self):
1555 return self.rawdev
1556
1557 def set_variables_ptype(self):
1558 self.ptype_map = PTYPE['regular']
1559
1560 def ptype_for_name(self, name):
1561 return self.ptype_map[name]['ready']
1562
1563 @staticmethod
1564 def factory(path, dev, args):
1565 dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
1566 if ((path is not None and is_mpath(path)) or
1567 (dev is not None and is_mpath(dev))):
1568 partition = DevicePartitionMultipath(args)
1569 elif dmcrypt_type == 'luks':
1570 partition = DevicePartitionCryptLuks(args)
1571 elif dmcrypt_type == 'plain':
1572 partition = DevicePartitionCryptPlain(args)
1573 else:
1574 partition = DevicePartition(args)
1575 partition.set_dev(dev)
1576 return partition
1577
1578
1579 class DevicePartitionMultipath(DevicePartition):
1580
1581 def set_variables_ptype(self):
1582 self.ptype_map = PTYPE['mpath']
1583
1584
1585 class DevicePartitionCrypt(DevicePartition):
1586
1587 def __init__(self, args):
1588 super(DevicePartitionCrypt, self).__init__(args)
1589 self.osd_dm_keypath = None
1590 self.cryptsetup_parameters = CryptHelpers.get_cryptsetup_parameters(
1591 self.args)
1592 self.dmcrypt_type = CryptHelpers.get_dmcrypt_type(self.args)
1593 self.dmcrypt_keysize = CryptHelpers.get_dmcrypt_keysize(self.args)
1594
1595 def setup_crypt(self):
1596 pass
1597
1598 def map(self):
1599 self.setup_crypt()
1600 self.dev = _dmcrypt_map(
1601 rawdev=self.rawdev,
1602 keypath=self.osd_dm_keypath,
1603 _uuid=self.get_uuid(),
1604 cryptsetup_parameters=self.cryptsetup_parameters,
1605 luks=self.luks(),
1606 format_dev=True,
1607 )
1608
1609 def unmap(self):
1610 self.setup_crypt()
1611 dmcrypt_unmap(self.get_uuid())
1612 self.dev = self.rawdev
1613
1614 def format(self):
1615 self.setup_crypt()
1616 self.map()
1617 self.unmap()
1618
1619
1620 class DevicePartitionCryptPlain(DevicePartitionCrypt):
1621
1622 def luks(self):
1623 return False
1624
1625 def setup_crypt(self):
1626 if self.osd_dm_keypath is not None:
1627 return
1628
1629 self.cryptsetup_parameters += ['--key-size', str(self.dmcrypt_keysize)]
1630
1631 self.osd_dm_keypath = get_or_create_dmcrypt_key(
1632 self.get_uuid(), self.args.dmcrypt_key_dir,
1633 self.dmcrypt_keysize, False)
1634
1635 def set_variables_ptype(self):
1636 self.ptype_map = PTYPE['plain']
1637
1638
1639 class DevicePartitionCryptLuks(DevicePartitionCrypt):
1640
1641 def luks(self):
1642 return True
1643
1644 def setup_crypt(self):
1645 if self.osd_dm_keypath is not None:
1646 return
1647
1648 if self.dmcrypt_keysize == 1024:
1649 # We don't force this into the cryptsetup_parameters,
1650 # as we want the cryptsetup defaults
1651 # to prevail for the actual LUKS key lengths.
1652 pass
1653 else:
1654 self.cryptsetup_parameters += ['--key-size',
1655 str(self.dmcrypt_keysize)]
1656
1657 self.osd_dm_keypath = get_or_create_dmcrypt_key(
1658 self.get_uuid(), self.args.dmcrypt_key_dir,
1659 self.dmcrypt_keysize, True)
1660
1661 def set_variables_ptype(self):
1662 self.ptype_map = PTYPE['luks']
1663
1664
1665 class Prepare(object):
1666
1667 @staticmethod
1668 def parser():
1669 parser = argparse.ArgumentParser(add_help=False)
1670 parser.add_argument(
1671 '--cluster',
1672 metavar='NAME',
1673 default='ceph',
1674 help='cluster name to assign this disk to',
1675 )
1676 parser.add_argument(
1677 '--cluster-uuid',
1678 metavar='UUID',
1679 help='cluster uuid to assign this disk to',
1680 )
1681 parser.add_argument(
1682 '--osd-uuid',
1683 metavar='UUID',
1684 help='unique OSD uuid to assign this disk to',
1685 )
1686 parser.add_argument(
1687 '--dmcrypt',
1688 action='store_true', default=None,
1689 help='encrypt DATA and/or JOURNAL devices with dm-crypt',
1690 )
1691 parser.add_argument(
1692 '--dmcrypt-key-dir',
1693 metavar='KEYDIR',
1694 default='/etc/ceph/dmcrypt-keys',
1695 help='directory where dm-crypt keys are stored',
1696 )
1697 return parser
1698
1699 @staticmethod
1700 def set_subparser(subparsers):
1701 parents = [
1702 Prepare.parser(),
1703 PrepareData.parser(),
1704 ]
1705 parents.extend(PrepareFilestore.parent_parsers())
1706 parents.extend(PrepareBluestore.parent_parsers())
1707 parser = subparsers.add_parser(
1708 'prepare',
1709 parents=parents,
1710 help='Prepare a directory or disk for a Ceph OSD',
1711 )
1712 parser.set_defaults(
1713 func=Prepare.main,
1714 )
1715 return parser
1716
1717 def prepare(self):
1718 prepare_lock.acquire()
1719 self.prepare_locked()
1720 prepare_lock.release()
1721
1722 @staticmethod
1723 def factory(args):
1724 if args.bluestore:
1725 return PrepareBluestore(args)
1726 else:
1727 return PrepareFilestore(args)
1728
1729 @staticmethod
1730 def main(args):
1731 Prepare.factory(args).prepare()
1732
1733
1734 class PrepareFilestore(Prepare):
1735
1736 def __init__(self, args):
1737 self.data = PrepareFilestoreData(args)
1738 self.journal = PrepareJournal(args)
1739
1740 @staticmethod
1741 def parent_parsers():
1742 return [
1743 PrepareJournal.parser(),
1744 ]
1745
1746 def prepare_locked(self):
1747 self.data.prepare(self.journal)
1748
1749
1750 class PrepareBluestore(Prepare):
1751
1752 def __init__(self, args):
1753 self.data = PrepareBluestoreData(args)
1754 self.block = PrepareBluestoreBlock(args)
1755
1756 @staticmethod
1757 def parser():
1758 parser = argparse.ArgumentParser(add_help=False)
1759 parser.add_argument(
1760 '--bluestore',
1761 action='store_true', default=None,
1762 help='bluestore objectstore',
1763 )
1764 return parser
1765
1766 @staticmethod
1767 def parent_parsers():
1768 return [
1769 PrepareBluestore.parser(),
1770 PrepareBluestoreBlock.parser(),
1771 ]
1772
1773 def prepare_locked(self):
1774 self.data.prepare(self.block)
1775
1776
1777 class Space(object):
1778
1779 NAMES = ('block', 'journal')
1780
1781
1782 class PrepareSpace(object):
1783
1784 NONE = 0
1785 FILE = 1
1786 DEVICE = 2
1787
1788 def __init__(self, args):
1789 self.args = args
1790 self.set_type()
1791 self.space_size = self.get_space_size()
1792 if (getattr(self.args, self.name) and
1793 getattr(self.args, self.name + '_uuid') is None):
1794 setattr(self.args, self.name + '_uuid', str(uuid.uuid4()))
1795 self.space_symlink = None
1796 self.space_dmcrypt = None
1797
1798 def set_type(self):
1799 name = self.name
1800 args = self.args
1801 dmode = os.stat(args.data).st_mode
1802 if (self.wants_space() and
1803 stat.S_ISBLK(dmode) and
1804 not is_partition(args.data) and
1805 getattr(args, name) is None and
1806 getattr(args, name + '_file') is None):
1807 LOG.info('Will colocate %s with data on %s',
1808 name, args.data)
1809 setattr(args, name, args.data)
1810
1811 if getattr(args, name) is None:
1812 if getattr(args, name + '_dev'):
1813 raise Error('%s is unspecified; not a block device' %
1814 name.capitalize(), getattr(args, name))
1815 self.type = self.NONE
1816 return
1817
1818 if not os.path.exists(getattr(args, name)):
1819 if getattr(args, name + '_dev'):
1820 raise Error('%s does not exist; not a block device' %
1821 name.capitalize(), getattr(args, name))
1822 self.type = self.FILE
1823 return
1824
1825 mode = os.stat(getattr(args, name)).st_mode
1826 if stat.S_ISBLK(mode):
1827 if getattr(args, name + '_file'):
1828 raise Error('%s is not a regular file' % name.capitalize,
1829 geattr(args, name))
1830 self.type = self.DEVICE
1831 return
1832
1833 if stat.S_ISREG(mode):
1834 if getattr(args, name + '_dev'):
1835 raise Error('%s is not a block device' % name.capitalize,
1836 geattr(args, name))
1837 self.type = self.FILE
1838
1839 raise Error('%s %s is neither a block device nor regular file' %
1840 (name.capitalize, geattr(args, name)))
1841
1842 def is_none(self):
1843 return self.type == self.NONE
1844
1845 def is_file(self):
1846 return self.type == self.FILE
1847
1848 def is_device(self):
1849 return self.type == self.DEVICE
1850
1851 @staticmethod
1852 def parser(name):
1853 parser = argparse.ArgumentParser(add_help=False)
1854 parser.add_argument(
1855 '--%s-uuid' % name,
1856 metavar='UUID',
1857 help='unique uuid to assign to the %s' % name,
1858 )
1859 parser.add_argument(
1860 '--%s-file' % name,
1861 action='store_true', default=None,
1862 help='verify that %s is a file' % name.upper(),
1863 )
1864 parser.add_argument(
1865 '--%s-dev' % name,
1866 action='store_true', default=None,
1867 help='verify that %s is a block device' % name.upper(),
1868 )
1869 parser.add_argument(
1870 name,
1871 metavar=name.upper(),
1872 nargs='?',
1873 help=('path to OSD %s disk block device;' % name +
1874 ' leave out to store %s in file' % name),
1875 )
1876 return parser
1877
1878 def wants_space(self):
1879 return True
1880
1881 def populate_data_path(self, path):
1882 if self.type == self.DEVICE:
1883 self.populate_data_path_device(path)
1884 elif self.type == self.FILE:
1885 self.populate_data_path_file(path)
1886 elif self.type == self.NONE:
1887 pass
1888 else:
1889 raise Error('unexpected type ', self.type)
1890
1891 def populate_data_path_file(self, path):
1892 space_uuid = self.name + '_uuid'
1893 if getattr(self.args, space_uuid) is not None:
1894 write_one_line(path, space_uuid,
1895 getattr(self.args, space_uuid))
1896
1897 def populate_data_path_device(self, path):
1898 self.populate_data_path_file(path)
1899 if self.space_symlink is not None:
1900 adjust_symlink(self.space_symlink,
1901 os.path.join(path, self.name))
1902
1903 if self.space_dmcrypt is not None:
1904 adjust_symlink(self.space_dmcrypt,
1905 os.path.join(path, self.name + '_dmcrypt'))
1906 else:
1907 try:
1908 os.unlink(os.path.join(path, self.name + '_dmcrypt'))
1909 except OSError:
1910 pass
1911
1912 def prepare(self):
1913 if self.type == self.DEVICE:
1914 self.prepare_device()
1915 elif self.type == self.FILE:
1916 self.prepare_file()
1917 elif self.type == self.NONE:
1918 pass
1919 else:
1920 raise Error('unexpected type ', self.type)
1921
1922 def prepare_file(self):
1923 if not os.path.exists(getattr(self.args, self.name)):
1924 LOG.debug('Creating %s file %s with size 0'
1925 ' (ceph-osd will resize and allocate)',
1926 self.name,
1927 getattr(self.args, self.name))
1928 with file(getattr(self.args, self.name), 'wb') as space_file:
1929 pass
1930
1931 LOG.debug('%s is file %s',
1932 self.name.capitalize(),
1933 getattr(self.args, self.name))
1934 LOG.warning('OSD will not be hot-swappable if %s is '
1935 'not the same device as the osd data' %
1936 self.name)
1937 self.space_symlink = space_file
1938
1939 def prepare_device(self):
1940 reusing_partition = False
1941
1942 if is_partition(getattr(self.args, self.name)):
1943 LOG.debug('%s %s is a partition',
1944 self.name.capitalize(), getattr(self.args, self.name))
1945 partition = DevicePartition.factory(
1946 path=None, dev=getattr(self.args, self.name), args=self.args)
1947 if isinstance(partition, DevicePartitionCrypt):
1948 raise Error(getattr(self.args, self.name) +
1949 ' partition already exists'
1950 ' and --dmcrypt specified')
1951 LOG.warning('OSD will not be hot-swappable' +
1952 ' if ' + self.name + ' is not' +
1953 ' the same device as the osd data')
1954 if partition.get_ptype() == partition.ptype_for_name(self.name):
1955 LOG.debug('%s %s was previously prepared with '
1956 'ceph-disk. Reusing it.',
1957 self.name.capitalize(),
1958 getattr(self.args, self.name))
1959 reusing_partition = True
1960 # Read and reuse the partition uuid from this journal's
1961 # previous life. We reuse the uuid instead of changing it
1962 # because udev does not reliably notice changes to an
1963 # existing partition's GUID. See
1964 # http://tracker.ceph.com/issues/10146
1965 setattr(self.args, self.name + '_uuid', partition.get_uuid())
1966 LOG.debug('Reusing %s with uuid %s',
1967 self.name,
1968 getattr(self.args, self.name + '_uuid'))
1969 else:
1970 LOG.warning('%s %s was not prepared with '
1971 'ceph-disk. Symlinking directly.',
1972 self.name.capitalize(),
1973 getattr(self.args, self.name))
1974 self.space_symlink = getattr(self.args, self.name)
1975 return
1976
1977 self.space_symlink = '/dev/disk/by-partuuid/{uuid}'.format(
1978 uuid=getattr(self.args, self.name + '_uuid'))
1979
1980 if self.args.dmcrypt:
1981 self.space_dmcrypt = self.space_symlink
1982 self.space_symlink = '/dev/mapper/{uuid}'.format(
1983 uuid=getattr(self.args, self.name + '_uuid'))
1984
1985 if reusing_partition:
1986 # confirm that the space_symlink exists. It should since
1987 # this was an active space
1988 # in the past. Continuing otherwise would be futile.
1989 assert os.path.exists(self.space_symlink)
1990 return
1991
1992 num = self.desired_partition_number()
1993
1994 if num == 0:
1995 LOG.warning('OSD will not be hot-swappable if %s '
1996 'is not the same device as the osd data',
1997 self.name)
1998
1999 device = Device.factory(getattr(self.args, self.name), self.args)
2000 num = device.create_partition(
2001 uuid=getattr(self.args, self.name + '_uuid'),
2002 name=self.name,
2003 size=self.space_size,
2004 num=num)
2005
2006 partition = device.get_partition(num)
2007
2008 LOG.debug('%s is GPT partition %s',
2009 self.name.capitalize(),
2010 self.space_symlink)
2011
2012 if isinstance(partition, DevicePartitionCrypt):
2013 partition.format()
2014
2015 command_check_call(
2016 [
2017 'sgdisk',
2018 '--typecode={num}:{uuid}'.format(
2019 num=num,
2020 uuid=partition.ptype_for_name(self.name),
2021 ),
2022 '--',
2023 getattr(self.args, self.name),
2024 ],
2025 )
2026
2027 LOG.debug('%s is GPT partition %s',
2028 self.name.capitalize(),
2029 self.space_symlink)
2030
2031
2032 class PrepareJournal(PrepareSpace):
2033
2034 def __init__(self, args):
2035 self.name = 'journal'
2036 (self.allows_journal,
2037 self.wants_journal,
2038 self.needs_journal) = check_journal_reqs(args)
2039
2040 if args.journal and not self.allows_journal:
2041 raise Error('journal specified but not allowed by osd backend')
2042
2043 super(PrepareJournal, self).__init__(args)
2044
2045 def wants_space(self):
2046 return self.wants_journal
2047
2048 def get_space_size(self):
2049 return int(get_conf_with_default(
2050 cluster=self.args.cluster,
2051 variable='osd_journal_size',
2052 ))
2053
2054 def desired_partition_number(self):
2055 if self.args.journal == self.args.data:
2056 # we're sharing the disk between osd data and journal;
2057 # make journal be partition number 2
2058 num = 2
2059 else:
2060 num = 0
2061 return num
2062
2063 @staticmethod
2064 def parser():
2065 return PrepareSpace.parser('journal')
2066
2067
2068 class PrepareBluestoreBlock(PrepareSpace):
2069
2070 def __init__(self, args):
2071 self.name = 'block'
2072 super(PrepareBluestoreBlock, self).__init__(args)
2073
2074 def get_space_size(self):
2075 return 0 # get as much space as possible
2076
2077 def desired_partition_number(self):
2078 if self.args.block == self.args.data:
2079 num = 2
2080 else:
2081 num = 0
2082 return num
2083
2084 @staticmethod
2085 def parser():
2086 return PrepareSpace.parser('block')
2087
2088
2089 class CryptHelpers(object):
2090
2091 @staticmethod
2092 def get_cryptsetup_parameters(args):
2093 cryptsetup_parameters_str = get_conf(
2094 cluster=args.cluster,
2095 variable='osd_cryptsetup_parameters',
2096 )
2097 if cryptsetup_parameters_str is None:
2098 return []
2099 else:
2100 return shlex.split(cryptsetup_parameters_str)
2101
2102 @staticmethod
2103 def get_dmcrypt_keysize(args):
2104 dmcrypt_keysize_str = get_conf(
2105 cluster=args.cluster,
2106 variable='osd_dmcrypt_key_size',
2107 )
2108 dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
2109 if dmcrypt_type == 'luks':
2110 if dmcrypt_keysize_str is None:
2111 # As LUKS will hash the 'passphrase' in .luks.key
2112 # into a key, set a large default
2113 # so if not updated for some time, it is still a
2114 # reasonable value.
2115 #
2116 return 1024
2117 else:
2118 return int(dmcrypt_keysize_str)
2119 elif dmcrypt_type == 'plain':
2120 if dmcrypt_keysize_str is None:
2121 # This value is hard-coded in the udev script
2122 return 256
2123 else:
2124 LOG.warning('ensure the 95-ceph-osd.rules file has '
2125 'been copied to /etc/udev/rules.d '
2126 'and modified to call cryptsetup '
2127 'with --key-size=%s' % dmcrypt_keysize_str)
2128 return int(dmcrypt_keysize_str)
2129 else:
2130 return 0
2131
2132 @staticmethod
2133 def get_dmcrypt_type(args):
2134 if args.dmcrypt:
2135 dmcrypt_type = get_conf(
2136 cluster=args.cluster,
2137 variable='osd_dmcrypt_type',
2138 )
2139
2140 if dmcrypt_type is None or dmcrypt_type == 'luks':
2141 return 'luks'
2142 elif dmcrypt_type == 'plain':
2143 return 'plain'
2144 else:
2145 raise Error('invalid osd_dmcrypt_type parameter '
2146 '(must be luks or plain): ', dmcrypt_type)
2147 else:
2148 return None
2149
2150
2151 class PrepareData(object):
2152
2153 FILE = 1
2154 DEVICE = 2
2155
2156 def __init__(self, args):
2157
2158 self.args = args
2159 self.partition = None
2160 self.set_type()
2161 if self.args.cluster_uuid is None:
2162 self.args.cluster_uuid = get_fsid(cluster=self.args.cluster)
2163
2164 if self.args.osd_uuid is None:
2165 self.args.osd_uuid = str(uuid.uuid4())
2166
2167 def set_type(self):
2168 dmode = os.stat(self.args.data).st_mode
2169
2170 if stat.S_ISDIR(dmode):
2171 self.type = self.FILE
2172 elif stat.S_ISBLK(dmode):
2173 self.type = self.DEVICE
2174 else:
2175 raise Error('not a dir or block device', args.data)
2176
2177 def is_file(self):
2178 return self.type == self.FILE
2179
2180 def is_device(self):
2181 return self.type == self.DEVICE
2182
2183 @staticmethod
2184 def parser():
2185 parser = argparse.ArgumentParser(add_help=False)
2186 parser.add_argument(
2187 '--fs-type',
2188 help='file system type to use (e.g. "ext4")',
2189 )
2190 parser.add_argument(
2191 '--zap-disk',
2192 action='store_true', default=None,
2193 help='destroy the partition table (and content) of a disk',
2194 )
2195 parser.add_argument(
2196 '--data-dir',
2197 action='store_true', default=None,
2198 help='verify that DATA is a dir',
2199 )
2200 parser.add_argument(
2201 '--data-dev',
2202 action='store_true', default=None,
2203 help='verify that DATA is a block device',
2204 )
2205 parser.add_argument(
2206 'data',
2207 metavar='DATA',
2208 help='path to OSD data (a disk block device or directory)',
2209 )
2210 return parser
2211
2212 def populate_data_path_file(self, path, *to_prepare_list):
2213 self.populate_data_path(path, *to_prepare_list)
2214
2215 def populate_data_path(self, path, *to_prepare_list):
2216 if os.path.exists(os.path.join(path, 'magic')):
2217 LOG.debug('Data dir %s already exists', path)
2218 return
2219 else:
2220 LOG.debug('Preparing osd data dir %s', path)
2221
2222 if self.args.osd_uuid is None:
2223 self.args.osd_uuid = str(uuid.uuid4())
2224
2225 write_one_line(path, 'ceph_fsid', self.args.cluster_uuid)
2226 write_one_line(path, 'fsid', self.args.osd_uuid)
2227 write_one_line(path, 'magic', CEPH_OSD_ONDISK_MAGIC)
2228
2229 for to_prepare in to_prepare_list:
2230 to_prepare.populate_data_path(path)
2231
2232 def prepare(self, *to_prepare_list):
2233 if self.type == self.DEVICE:
2234 self.prepare_device(*to_prepare_list)
2235 elif self.type == self.FILE:
2236 self.prepare_file(*to_prepare_list)
2237 else:
2238 raise Error('unexpected type ', self.type)
2239
2240 def prepare_file(self, *to_prepare_list):
2241
2242 if not os.path.exists(self.args.data):
2243 raise Error('data path for directory does not exist',
2244 self.args.data)
2245
2246 if self.args.data_dev:
2247 raise Error('data path is not a block device', self.args.data)
2248
2249 for to_prepare in to_prepare_list:
2250 to_prepare.prepare()
2251
2252 self.populate_data_path_file(self.args.data, *to_prepare_list)
2253
2254 def sanity_checks(self):
2255 if not os.path.exists(self.args.data):
2256 raise Error('data path for device does not exist',
2257 self.args.data)
2258 verify_not_in_use(self.args.data, True)
2259
2260 def set_variables(self):
2261 if self.args.fs_type is None:
2262 self.args.fs_type = get_conf(
2263 cluster=self.args.cluster,
2264 variable='osd_mkfs_type',
2265 )
2266 if self.args.fs_type is None:
2267 self.args.fs_type = get_conf(
2268 cluster=self.args.cluster,
2269 variable='osd_fs_type',
2270 )
2271 if self.args.fs_type is None:
2272 self.args.fs_type = DEFAULT_FS_TYPE
2273
2274 self.mkfs_args = get_conf(
2275 cluster=self.args.cluster,
2276 variable='osd_mkfs_options_{fstype}'.format(
2277 fstype=self.args.fs_type,
2278 ),
2279 )
2280 if self.mkfs_args is None:
2281 self.mkfs_args = get_conf(
2282 cluster=self.args.cluster,
2283 variable='osd_fs_mkfs_options_{fstype}'.format(
2284 fstype=self.args.fs_type,
2285 ),
2286 )
2287
2288 self.mount_options = get_conf(
2289 cluster=self.args.cluster,
2290 variable='osd_mount_options_{fstype}'.format(
2291 fstype=self.args.fs_type,
2292 ),
2293 )
2294 if self.mount_options is None:
2295 self.mount_options = get_conf(
2296 cluster=self.args.cluster,
2297 variable='osd_fs_mount_options_{fstype}'.format(
2298 fstype=self.args.fs_type,
2299 ),
2300 )
2301 else:
2302 # remove whitespaces
2303 self.mount_options = "".join(self.mount_options.split())
2304
2305 if self.args.osd_uuid is None:
2306 self.args.osd_uuid = str(uuid.uuid4())
2307
2308 def prepare_device(self, *to_prepare_list):
2309 self.sanity_checks()
2310 self.set_variables()
2311 if self.args.zap_disk is not None:
2312 zap(self.args.data)
2313
2314 def create_data_partition(self):
2315 device = Device.factory(self.args.data, self.args)
2316 partition_number = 1
2317 device.create_partition(uuid=self.args.osd_uuid,
2318 name='data',
2319 num=partition_number,
2320 size=self.get_space_size())
2321 return device.get_partition(partition_number)
2322
2323 def set_data_partition(self):
2324 if is_partition(self.args.data):
2325 LOG.debug('OSD data device %s is a partition',
2326 self.args.data)
2327 self.partition = DevicePartition.factory(
2328 path=None, dev=self.args.data, args=self.args)
2329 ptype = partition.get_ptype()
2330 if ptype != ptype_osd:
2331 LOG.warning('incorrect partition UUID: %s, expected %s'
2332 % (ptype, ptype_osd))
2333 else:
2334 LOG.debug('Creating osd partition on %s',
2335 self.args.data)
2336 self.partition = self.create_data_partition()
2337
2338 def populate_data_path_device(self, *to_prepare_list):
2339 partition = self.partition
2340
2341 if isinstance(partition, DevicePartitionCrypt):
2342 partition.map()
2343
2344 try:
2345 args = [
2346 'mkfs',
2347 '-t',
2348 self.args.fs_type,
2349 ]
2350 if self.mkfs_args is not None:
2351 args.extend(self.mkfs_args.split())
2352 if self.args.fs_type == 'xfs':
2353 args.extend(['-f']) # always force
2354 else:
2355 args.extend(MKFS_ARGS.get(self.args.fs_type, []))
2356 args.extend([
2357 '--',
2358 partition.get_dev(),
2359 ])
2360 try:
2361 LOG.debug('Creating %s fs on %s',
2362 self.args.fs_type, partition.get_dev())
2363 command_check_call(args)
2364 except subprocess.CalledProcessError as e:
2365 raise Error(e)
2366
2367 path = mount(dev=partition.get_dev(),
2368 fstype=self.args.fs_type,
2369 options=self.mount_options)
2370
2371 try:
2372 self.populate_data_path(path, *to_prepare_list)
2373 finally:
2374 path_set_context(path)
2375 unmount(path)
2376 finally:
2377 if isinstance(partition, DevicePartitionCrypt):
2378 partition.unmap()
2379
2380 if not is_partition(self.args.data):
2381 try:
2382 command_check_call(
2383 [
2384 'sgdisk',
2385 '--typecode=%d:%s' % (partition.get_partition_number(),
2386 partition.ptype_for_name('osd')),
2387 '--',
2388 self.args.data,
2389 ],
2390 )
2391 except subprocess.CalledProcessError as e:
2392 raise Error(e)
2393 update_partition(self.args.data, 'prepared')
2394 command_check_call(['udevadm', 'trigger',
2395 '--action=add',
2396 '--sysname-match',
2397 os.path.basename(partition.rawdev)])
2398
2399
2400 class PrepareFilestoreData(PrepareData):
2401
2402 def get_space_size(self):
2403 return 0 # get as much space as possible
2404
2405 def prepare_device(self, *to_prepare_list):
2406 super(PrepareFilestoreData, self).prepare_device(*to_prepare_list)
2407 for to_prepare in to_prepare_list:
2408 to_prepare.prepare()
2409 self.set_data_partition()
2410 self.populate_data_path_device(*to_prepare_list)
2411
2412
2413 class PrepareBluestoreData(PrepareData):
2414
2415 def get_space_size(self):
2416 return 100 # MB
2417
2418 def prepare_device(self, *to_prepare_list):
2419 super(PrepareBluestoreData, self).prepare_device(*to_prepare_list)
2420 self.set_data_partition()
2421 for to_prepare in to_prepare_list:
2422 to_prepare.prepare()
2423 self.populate_data_path_device(*to_prepare_list)
2424
2425 def populate_data_path(self, path, *to_prepare_list):
2426 super(PrepareBluestoreData, self).populate_data_path(path,
2427 *to_prepare_list)
2428 write_one_line(path, 'type', 'bluestore')
2429
2430
2431 def mkfs(
2432 path,
2433 cluster,
2434 osd_id,
2435 fsid,
2436 keyring,
2437 ):
2438 monmap = os.path.join(path, 'activate.monmap')
2439 command_check_call(
2440 [
2441 'ceph',
2442 '--cluster', cluster,
2443 '--name', 'client.bootstrap-osd',
2444 '--keyring', keyring,
2445 'mon', 'getmap', '-o', monmap,
2446 ],
2447 )
2448
2449 osd_type = read_one_line(path, 'type')
2450
2451 if osd_type == 'bluestore':
2452 command_check_call(
2453 [
2454 'ceph-osd',
2455 '--cluster', cluster,
2456 '--mkfs',
2457 '--mkkey',
2458 '-i', osd_id,
2459 '--monmap', monmap,
2460 '--osd-data', path,
2461 '--osd-uuid', fsid,
2462 '--keyring', os.path.join(path, 'keyring'),
2463 '--setuser', get_ceph_user(),
2464 '--setgroup', get_ceph_user(),
2465 ],
2466 )
2467 else:
2468 command_check_call(
2469 [
2470 'ceph-osd',
2471 '--cluster', cluster,
2472 '--mkfs',
2473 '--mkkey',
2474 '-i', osd_id,
2475 '--monmap', monmap,
2476 '--osd-data', path,
2477 '--osd-journal', os.path.join(path, 'journal'),
2478 '--osd-uuid', fsid,
2479 '--keyring', os.path.join(path, 'keyring'),
2480 '--setuser', get_ceph_user(),
2481 '--setgroup', get_ceph_group(),
2482 ],
2483 )
2484
2485
2486 def auth_key(
2487 path,
2488 cluster,
2489 osd_id,
2490 keyring,
2491 ):
2492 try:
2493 # try dumpling+ cap scheme
2494 command_check_call(
2495 [
2496 'ceph',
2497 '--cluster', cluster,
2498 '--name', 'client.bootstrap-osd',
2499 '--keyring', keyring,
2500 'auth', 'add', 'osd.{osd_id}'.format(osd_id=osd_id),
2501 '-i', os.path.join(path, 'keyring'),
2502 'osd', 'allow *',
2503 'mon', 'allow profile osd',
2504 ],
2505 )
2506 except subprocess.CalledProcessError as err:
2507 if err.returncode == errno.EINVAL:
2508 # try old cap scheme
2509 command_check_call(
2510 [
2511 'ceph',
2512 '--cluster', cluster,
2513 '--name', 'client.bootstrap-osd',
2514 '--keyring', keyring,
2515 'auth', 'add', 'osd.{osd_id}'.format(osd_id=osd_id),
2516 '-i', os.path.join(path, 'keyring'),
2517 'osd', 'allow *',
2518 'mon', 'allow rwx',
2519 ],
2520 )
2521 else:
2522 raise
2523
2524
2525 def get_mount_point(cluster, osd_id):
2526 parent = STATEDIR + '/osd'
2527 return os.path.join(
2528 parent,
2529 '{cluster}-{osd_id}'.format(cluster=cluster, osd_id=osd_id),
2530 )
2531
2532
2533 def move_mount(
2534 dev,
2535 path,
2536 cluster,
2537 osd_id,
2538 fstype,
2539 mount_options,
2540 ):
2541 LOG.debug('Moving mount to final location...')
2542 osd_data = get_mount_point(cluster, osd_id)
2543 maybe_mkdir(osd_data)
2544
2545 # pick best-of-breed mount options based on fs type
2546 if mount_options is None:
2547 mount_options = MOUNT_OPTIONS.get(fstype, '')
2548
2549 # we really want to mount --move, but that is not supported when
2550 # the parent mount is shared, as it is by default on RH, Fedora,
2551 # and probably others. Also, --bind doesn't properly manipulate
2552 # /etc/mtab, which *still* isn't a symlink to /proc/mounts despite
2553 # this being 2013. Instead, mount the original device at the final
2554 # location.
2555 command_check_call(
2556 [
2557 '/bin/mount',
2558 '-o',
2559 mount_options,
2560 '--',
2561 dev,
2562 osd_data,
2563 ],
2564 )
2565 command_check_call(
2566 [
2567 '/bin/umount',
2568 '-l', # lazy, in case someone else is peeking at the
2569 # wrong moment
2570 '--',
2571 path,
2572 ],
2573 )
2574
2575
2576 def start_daemon(
2577 cluster,
2578 osd_id,
2579 ):
2580 LOG.debug('Starting %s osd.%s...', cluster, osd_id)
2581
2582 path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
2583 cluster=cluster, osd_id=osd_id)
2584
2585 try:
2586 if os.path.exists(os.path.join(path, 'upstart')):
2587 command_check_call(
2588 [
2589 '/sbin/initctl',
2590 # use emit, not start, because start would fail if the
2591 # instance was already running
2592 'emit',
2593 # since the daemon starting doesn't guarantee much about
2594 # the service being operational anyway, don't bother
2595 # waiting for it
2596 '--no-wait',
2597 '--',
2598 'ceph-osd',
2599 'cluster={cluster}'.format(cluster=cluster),
2600 'id={osd_id}'.format(osd_id=osd_id),
2601 ],
2602 )
2603 elif os.path.exists(os.path.join(path, 'sysvinit')):
2604 if os.path.exists('/usr/sbin/service'):
2605 svc = '/usr/sbin/service'
2606 else:
2607 svc = '/sbin/service'
2608 command_check_call(
2609 [
2610 svc,
2611 'ceph',
2612 '--cluster',
2613 '{cluster}'.format(cluster=cluster),
2614 'start',
2615 'osd.{osd_id}'.format(osd_id=osd_id),
2616 ],
2617 )
2618 elif os.path.exists(os.path.join(path, 'systemd')):
2619 command_check_call(
2620 [
2621 'systemctl',
2622 'enable',
2623 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
2624 ],
2625 )
2626 command_check_call(
2627 [
2628 'systemctl',
2629 'start',
2630 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
2631 ],
2632 )
2633 else:
2634 raise Error('{cluster} osd.{osd_id} is not tagged '
2635 'with an init system'.format(
2636 cluster=cluster,
2637 osd_id=osd_id,
2638 ))
2639 except subprocess.CalledProcessError as e:
2640 raise Error('ceph osd start failed', e)
2641
2642
2643 def stop_daemon(
2644 cluster,
2645 osd_id,
2646 ):
2647 LOG.debug('Stoping %s osd.%s...', cluster, osd_id)
2648
2649 path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
2650 cluster=cluster, osd_id=osd_id)
2651
2652 try:
2653 if os.path.exists(os.path.join(path, 'upstart')):
2654 command_check_call(
2655 [
2656 '/sbin/initctl',
2657 'stop',
2658 'ceph-osd',
2659 'cluster={cluster}'.format(cluster=cluster),
2660 'id={osd_id}'.format(osd_id=osd_id),
2661 ],
2662 )
2663 elif os.path.exists(os.path.join(path, 'sysvinit')):
2664 svc = which('service')
2665 command_check_call(
2666 [
2667 svc,
2668 'ceph',
2669 '--cluster',
2670 '{cluster}'.format(cluster=cluster),
2671 'stop',
2672 'osd.{osd_id}'.format(osd_id=osd_id),
2673 ],
2674 )
2675 elif os.path.exists(os.path.join(path, 'systemd')):
2676 command_check_call(
2677 [
2678 'systemctl',
2679 'disable',
2680 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
2681 ],
2682 )
2683 command_check_call(
2684 [
2685 'systemctl',
2686 'stop',
2687 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
2688 ],
2689 )
2690 else:
2691 raise Error('{cluster} osd.{osd_id} is not tagged with an init '
2692 ' system'.format(cluster=cluster, osd_id=osd_id))
2693 except subprocess.CalledProcessError as e:
2694 raise Error('ceph osd stop failed', e)
2695
2696
2697 def detect_fstype(
2698 dev,
2699 ):
2700 fstype = _check_output(
2701 args=[
2702 '/sbin/blkid',
2703 # we don't want stale cached results
2704 '-p',
2705 '-s', 'TYPE',
2706 '-o', 'value',
2707 '--',
2708 dev,
2709 ],
2710 )
2711 fstype = must_be_one_line(fstype)
2712 return fstype
2713
2714
2715 def dmcrypt_map(dev, dmcrypt_key_dir):
2716 ptype = get_partition_type(dev)
2717 if ptype in Ptype.get_ready_by_type('plain'):
2718 luks = False
2719 cryptsetup_parameters = ['--key-size', '256']
2720 elif ptype in Ptype.get_ready_by_type('luks'):
2721 luks = True
2722 cryptsetup_parameters = []
2723 else:
2724 raise Error('--dmcrypt called for dev %s with invalid ptype %s'
2725 % (dev, ptype))
2726 part_uuid = get_partition_uuid(dev)
2727 dmcrypt_key_path = get_dmcrypt_key_path(part_uuid, dmcrypt_key_dir, luks)
2728 return _dmcrypt_map(
2729 rawdev=dev,
2730 keypath=dmcrypt_key_path,
2731 _uuid=part_uuid,
2732 cryptsetup_parameters=cryptsetup_parameters,
2733 luks=luks,
2734 format_dev=False,
2735 )
2736
2737
2738 def mount_activate(
2739 dev,
2740 activate_key_template,
2741 init,
2742 dmcrypt,
2743 dmcrypt_key_dir,
2744 reactivate=False,
2745 ):
2746
2747 if dmcrypt:
2748 part_uuid = get_partition_uuid(dev)
2749 dev = dmcrypt_map(dev, dmcrypt_key_dir)
2750 try:
2751 fstype = detect_fstype(dev=dev)
2752 except (subprocess.CalledProcessError,
2753 TruncatedLineError,
2754 TooManyLinesError) as e:
2755 raise FilesystemTypeError(
2756 'device {dev}'.format(dev=dev),
2757 e,
2758 )
2759
2760 # TODO always using mount options from cluster=ceph for
2761 # now; see http://tracker.newdream.net/issues/3253
2762 mount_options = get_conf(
2763 cluster='ceph',
2764 variable='osd_mount_options_{fstype}'.format(
2765 fstype=fstype,
2766 ),
2767 )
2768
2769 if mount_options is None:
2770 mount_options = get_conf(
2771 cluster='ceph',
2772 variable='osd_fs_mount_options_{fstype}'.format(
2773 fstype=fstype,
2774 ),
2775 )
2776
2777 # remove whitespaces from mount_options
2778 if mount_options is not None:
2779 mount_options = "".join(mount_options.split())
2780
2781 path = mount(dev=dev, fstype=fstype, options=mount_options)
2782
2783 # check if the disk is deactive, change the journal owner, group
2784 # mode for correct user and group.
2785 if os.path.exists(os.path.join(path, 'deactive')):
2786 # logging to syslog will help us easy to know udev triggered failure
2787 if not reactivate:
2788 unmount(path)
2789 # we need to unmap again because dmcrypt map will create again
2790 # on bootup stage (due to deactivate)
2791 if '/dev/mapper/' in dev:
2792 part_uuid = dev.replace('/dev/mapper/', '')
2793 dmcrypt_unmap(part_uuid)
2794 LOG.info('OSD deactivated! reactivate with: --reactivate')
2795 raise Error('OSD deactivated! reactivate with: --reactivate')
2796 # flag to activate a deactive osd.
2797 deactive = True
2798 else:
2799 deactive = False
2800
2801 osd_id = None
2802 cluster = None
2803 try:
2804 (osd_id, cluster) = activate(path, activate_key_template, init)
2805
2806 # Now active successfully
2807 # If we got reactivate and deactive, remove the deactive file
2808 if deactive and reactivate:
2809 os.remove(os.path.join(path, 'deactive'))
2810 LOG.info('Remove `deactive` file.')
2811
2812 # check if the disk is already active, or if something else is already
2813 # mounted there
2814 active = False
2815 other = False
2816 src_dev = os.stat(path).st_dev
2817 try:
2818 dst_dev = os.stat((STATEDIR + '/osd/{cluster}-{osd_id}').format(
2819 cluster=cluster,
2820 osd_id=osd_id)).st_dev
2821 if src_dev == dst_dev:
2822 active = True
2823 else:
2824 parent_dev = os.stat(STATEDIR + '/osd').st_dev
2825 if dst_dev != parent_dev:
2826 other = True
2827 elif os.listdir(get_mount_point(cluster, osd_id)):
2828 LOG.info(get_mount_point(cluster, osd_id) +
2829 " is not empty, won't override")
2830 other = True
2831
2832 except OSError:
2833 pass
2834
2835 if active:
2836 LOG.info('%s osd.%s already mounted in position; unmounting ours.'
2837 % (cluster, osd_id))
2838 unmount(path)
2839 elif other:
2840 raise Error('another %s osd.%s already mounted in position '
2841 '(old/different cluster instance?); unmounting ours.'
2842 % (cluster, osd_id))
2843 else:
2844 move_mount(
2845 dev=dev,
2846 path=path,
2847 cluster=cluster,
2848 osd_id=osd_id,
2849 fstype=fstype,
2850 mount_options=mount_options,
2851 )
2852 return (cluster, osd_id)
2853
2854 except:
2855 LOG.error('Failed to activate')
2856 unmount(path)
2857 raise
2858 finally:
2859 # remove our temp dir
2860 if os.path.exists(path):
2861 os.rmdir(path)
2862
2863
2864 def activate_dir(
2865 path,
2866 activate_key_template,
2867 init,
2868 ):
2869
2870 if not os.path.exists(path):
2871 raise Error(
2872 'directory %s does not exist' % path
2873 )
2874
2875 (osd_id, cluster) = activate(path, activate_key_template, init)
2876
2877 if init not in (None, 'none'):
2878 canonical = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
2879 cluster=cluster,
2880 osd_id=osd_id)
2881 if path != canonical:
2882 # symlink it from the proper location
2883 create = True
2884 if os.path.lexists(canonical):
2885 old = os.readlink(canonical)
2886 if old != path:
2887 LOG.debug('Removing old symlink %s -> %s', canonical, old)
2888 try:
2889 os.unlink(canonical)
2890 except:
2891 raise Error('unable to remove old symlink', canonical)
2892 else:
2893 create = False
2894 if create:
2895 LOG.debug('Creating symlink %s -> %s', canonical, path)
2896 try:
2897 os.symlink(path, canonical)
2898 except:
2899 raise Error('unable to create symlink %s -> %s'
2900 % (canonical, path))
2901
2902 return (cluster, osd_id)
2903
2904
2905 def find_cluster_by_uuid(_uuid):
2906 """
2907 Find a cluster name by searching /etc/ceph/*.conf for a conf file
2908 with the right uuid.
2909 """
2910 _uuid = _uuid.lower()
2911 no_fsid = []
2912 if not os.path.exists(SYSCONFDIR):
2913 return None
2914 for conf_file in os.listdir(SYSCONFDIR):
2915 if not conf_file.endswith('.conf'):
2916 continue
2917 cluster = conf_file[:-5]
2918 try:
2919 fsid = get_fsid(cluster)
2920 except Error as e:
2921 if e.message != 'getting cluster uuid from configuration failed':
2922 raise e
2923 no_fsid.append(cluster)
2924 else:
2925 if fsid == _uuid:
2926 return cluster
2927 # be tolerant of /etc/ceph/ceph.conf without an fsid defined.
2928 if len(no_fsid) == 1 and no_fsid[0] == 'ceph':
2929 LOG.warning('No fsid defined in ' + SYSCONFDIR +
2930 '/ceph.conf; using anyway')
2931 return 'ceph'
2932 return None
2933
2934
2935 def activate(
2936 path,
2937 activate_key_template,
2938 init,
2939 ):
2940
2941 check_osd_magic(path)
2942
2943 ceph_fsid = read_one_line(path, 'ceph_fsid')
2944 if ceph_fsid is None:
2945 raise Error('No cluster uuid assigned.')
2946 LOG.debug('Cluster uuid is %s', ceph_fsid)
2947
2948 cluster = find_cluster_by_uuid(ceph_fsid)
2949 if cluster is None:
2950 raise Error('No cluster conf found in ' + SYSCONFDIR +
2951 ' with fsid %s' % ceph_fsid)
2952 LOG.debug('Cluster name is %s', cluster)
2953
2954 fsid = read_one_line(path, 'fsid')
2955 if fsid is None:
2956 raise Error('No OSD uuid assigned.')
2957 LOG.debug('OSD uuid is %s', fsid)
2958
2959 keyring = activate_key_template.format(cluster=cluster,
2960 statedir=STATEDIR)
2961
2962 osd_id = get_osd_id(path)
2963 if osd_id is None:
2964 osd_id = allocate_osd_id(
2965 cluster=cluster,
2966 fsid=fsid,
2967 keyring=keyring,
2968 )
2969 write_one_line(path, 'whoami', osd_id)
2970 LOG.debug('OSD id is %s', osd_id)
2971
2972 if not os.path.exists(os.path.join(path, 'ready')):
2973 LOG.debug('Initializing OSD...')
2974 # re-running mkfs is safe, so just run until it completes
2975 mkfs(
2976 path=path,
2977 cluster=cluster,
2978 osd_id=osd_id,
2979 fsid=fsid,
2980 keyring=keyring,
2981 )
2982
2983 if init not in (None, 'none'):
2984 if init == 'auto':
2985 conf_val = get_conf(
2986 cluster=cluster,
2987 variable='init'
2988 )
2989 if conf_val is not None:
2990 init = conf_val
2991 else:
2992 init = init_get()
2993
2994 LOG.debug('Marking with init system %s', init)
2995 with file(os.path.join(path, init), 'w'):
2996 pass
2997
2998 # remove markers for others, just in case.
2999 for other in INIT_SYSTEMS:
3000 if other != init:
3001 try:
3002 os.unlink(os.path.join(path, other))
3003 except OSError:
3004 pass
3005
3006 if not os.path.exists(os.path.join(path, 'active')):
3007 LOG.debug('Authorizing OSD key...')
3008 auth_key(
3009 path=path,
3010 cluster=cluster,
3011 osd_id=osd_id,
3012 keyring=keyring,
3013 )
3014 write_one_line(path, 'active', 'ok')
3015 LOG.debug('%s osd.%s data dir is ready at %s', cluster, osd_id, path)
3016 return (osd_id, cluster)
3017
3018
3019 def main_activate(args):
3020 cluster = None
3021 osd_id = None
3022
3023 if not os.path.exists(args.path):
3024 raise Error('%s does not exist' % args.path)
3025
3026 if is_suppressed(args.path):
3027 LOG.info('suppressed activate request on %s', args.path)
3028 return
3029
3030 activate_lock.acquire() # noqa
3031 try:
3032 mode = os.stat(args.path).st_mode
3033 if stat.S_ISBLK(mode):
3034 if (is_partition(args.path) and
3035 (get_partition_type(args.path) ==
3036 PTYPE['mpath']['osd']['ready']) and
3037 not is_mpath(args.path)):
3038 raise Error('%s is not a multipath block device' %
3039 args.path)
3040 (cluster, osd_id) = mount_activate(
3041 dev=args.path,
3042 activate_key_template=args.activate_key_template,
3043 init=args.mark_init,
3044 dmcrypt=args.dmcrypt,
3045 dmcrypt_key_dir=args.dmcrypt_key_dir,
3046 reactivate=args.reactivate,
3047 )
3048 osd_data = get_mount_point(cluster, osd_id)
3049
3050 elif stat.S_ISDIR(mode):
3051 (cluster, osd_id) = activate_dir(
3052 path=args.path,
3053 activate_key_template=args.activate_key_template,
3054 init=args.mark_init,
3055 )
3056 osd_data = args.path
3057
3058 else:
3059 raise Error('%s is not a directory or block device' % args.path)
3060
3061 if (not args.no_start_daemon and args.mark_init == 'none'):
3062 command_check_call(
3063 [
3064 'ceph-osd',
3065 '--cluster={cluster}'.format(cluster=cluster),
3066 '--id={osd_id}'.format(osd_id=osd_id),
3067 '--osd-data={path}'.format(path=osd_data),
3068 '--osd-journal={path}/journal'.format(path=osd_data),
3069 ],
3070 )
3071
3072 if (not args.no_start_daemon and
3073 args.mark_init not in (None, 'none')):
3074
3075 start_daemon(
3076 cluster=cluster,
3077 osd_id=osd_id,
3078 )
3079
3080 finally:
3081 activate_lock.release() # noqa
3082
3083
3084 ###########################
3085
3086 def _mark_osd_out(cluster, osd_id):
3087 LOG.info('Prepare to mark osd.%d out...', osd_id)
3088 command([
3089 'ceph',
3090 'osd',
3091 'out',
3092 'osd.%d' % osd_id,
3093 ])
3094
3095
3096 def _check_osd_status(cluster, osd_id):
3097 """
3098 report the osd status:
3099 00(0) : means OSD OUT AND DOWN
3100 01(1) : means OSD OUT AND UP
3101 10(2) : means OSD IN AND DOWN
3102 11(3) : means OSD IN AND UP
3103 """
3104 LOG.info("Checking osd id: %s ..." % osd_id)
3105 found = False
3106 status_code = 0
3107 out, err, ret = command([
3108 'ceph',
3109 'osd',
3110 'dump',
3111 '--cluster={cluster}'.format(
3112 cluster=cluster,
3113 ),
3114 '--format',
3115 'json',
3116 ])
3117 out_json = json.loads(out)
3118 for item in out_json[u'osds']:
3119 if item.get(u'osd') == int(osd_id):
3120 found = True
3121 if item.get(u'in') is 1:
3122 status_code += 2
3123 if item.get(u'up') is 1:
3124 status_code += 1
3125 if not found:
3126 raise Error('Could not osd.%s in osd tree!' % osd_id)
3127 return status_code
3128
3129
3130 def _remove_osd_directory_files(mounted_path, cluster):
3131 """
3132 To remove the 'ready', 'active', INIT-specific files.
3133 """
3134 if os.path.exists(os.path.join(mounted_path, 'ready')):
3135 os.remove(os.path.join(mounted_path, 'ready'))
3136 LOG.info('Remove `ready` file.')
3137 else:
3138 LOG.info('`ready` file is already removed.')
3139
3140 if os.path.exists(os.path.join(mounted_path, 'active')):
3141 os.remove(os.path.join(mounted_path, 'active'))
3142 LOG.info('Remove `active` file.')
3143 else:
3144 LOG.info('`active` file is already removed.')
3145
3146 # Just check `upstart` and `sysvinit` directly if filename is init-spec.
3147 conf_val = get_conf(
3148 cluster=cluster,
3149 variable='init'
3150 )
3151 if conf_val is not None:
3152 init = conf_val
3153 else:
3154 init = init_get()
3155 os.remove(os.path.join(mounted_path, init))
3156 LOG.info('Remove `%s` file.', init)
3157 return
3158
3159
3160 def main_deactivate(args):
3161 activate_lock.acquire() # noqa
3162 try:
3163 main_deactivate_locked(args)
3164 finally:
3165 activate_lock.release() # noqa
3166
3167
3168 def main_deactivate_locked(args):
3169 osd_id = args.deactivate_by_id
3170 path = args.path
3171 target_dev = None
3172 dmcrypt = False
3173 devices = list_devices()
3174
3175 # list all devices and found we need
3176 for device in devices:
3177 if 'partitions' in device:
3178 for dev_part in device.get('partitions'):
3179 if (osd_id and
3180 'whoami' in dev_part and
3181 dev_part['whoami'] == osd_id):
3182 target_dev = dev_part
3183 elif (path and
3184 'path' in dev_part and
3185 dev_part['path'] == path):
3186 target_dev = dev_part
3187 if not target_dev:
3188 raise Error('Cannot find any match device!!')
3189
3190 # set up all we need variable
3191 osd_id = target_dev['whoami']
3192 part_type = target_dev['ptype']
3193 mounted_path = target_dev['mount']
3194 if Ptype.is_dmcrypt(part_type, 'osd'):
3195 dmcrypt = True
3196
3197 # Do not do anything if osd is already down.
3198 status_code = _check_osd_status(args.cluster, osd_id)
3199 if status_code == OSD_STATUS_IN_UP:
3200 if args.mark_out is True:
3201 _mark_osd_out(args.cluster, int(osd_id))
3202 stop_daemon(args.cluster, osd_id)
3203 elif status_code == OSD_STATUS_IN_DOWN:
3204 if args.mark_out is True:
3205 _mark_osd_out(args.cluster, int(osd_id))
3206 LOG.info("OSD already out/down. Do not do anything now.")
3207 return
3208 elif status_code == OSD_STATUS_OUT_UP:
3209 stop_daemon(args.cluster, osd_id)
3210 elif status_code == OSD_STATUS_OUT_DOWN:
3211 LOG.info("OSD already out/down. Do not do anything now.")
3212 return
3213
3214 # remove 'ready', 'active', and INIT-specific files.
3215 _remove_osd_directory_files(mounted_path, args.cluster)
3216
3217 # Write deactivate to osd directory!
3218 with open(os.path.join(mounted_path, 'deactive'), 'w'):
3219 path_set_context(os.path.join(mounted_path, 'deactive'))
3220
3221 unmount(mounted_path)
3222 LOG.info("Umount `%s` successfully.", mounted_path)
3223
3224 if dmcrypt:
3225 dmcrypt_unmap(target_dev['uuid'])
3226 for name in Space.NAMES:
3227 if name + '_uuid' in target_dev:
3228 dmcrypt_unmap(target_dev[name + '_uuid'])
3229
3230 ###########################
3231
3232
3233 def _remove_from_crush_map(cluster, osd_id):
3234 LOG.info("Prepare to remove osd.%s from crush map..." % osd_id)
3235 command([
3236 'ceph',
3237 'osd',
3238 'crush',
3239 'remove',
3240 'osd.%s' % osd_id,
3241 ])
3242
3243
3244 def _delete_osd_auth_key(cluster, osd_id):
3245 LOG.info("Prepare to delete osd.%s cephx key..." % osd_id)
3246 command([
3247 'ceph',
3248 'auth',
3249 'del',
3250 'osd.%s' % osd_id,
3251 ])
3252
3253
3254 def _deallocate_osd_id(cluster, osd_id):
3255 LOG.info("Prepare to deallocate the osd-id: %s..." % osd_id)
3256 command([
3257 'ceph',
3258 'osd',
3259 'rm',
3260 '%s' % osd_id,
3261 ])
3262
3263
3264 def destroy_lookup_device(args, predicate, description):
3265 devices = list_devices()
3266 for device in devices:
3267 for partition in device.get('partitions', []):
3268 if partition['dmcrypt']:
3269 dmcrypt_path = dmcrypt_map(partition['path'],
3270 args.dmcrypt_key_dir)
3271 list_dev_osd(dmcrypt_path, {}, partition)
3272 dmcrypt_unmap(partition['uuid'])
3273 if predicate(partition):
3274 return partition
3275 raise Error('found no device matching ', description)
3276
3277
3278 def main_destroy(args):
3279 osd_id = args.destroy_by_id
3280 path = args.path
3281 dmcrypt = False
3282 target_dev = None
3283
3284 if path:
3285 if not is_partition(path):
3286 raise Error(path + " must be a partition device")
3287 path = os.path.realpath(path)
3288
3289 if path:
3290 target_dev = destroy_lookup_device(
3291 args, lambda x: x.get('path') == path,
3292 path)
3293 elif osd_id:
3294 target_dev = destroy_lookup_device(
3295 args, lambda x: x.get('whoami') == osd_id,
3296 'osd id ' + str(osd_id))
3297
3298 osd_id = target_dev['whoami']
3299 dev_path = target_dev['path']
3300 if target_dev['ptype'] == PTYPE['mpath']['osd']['ready']:
3301 base_dev = get_partition_base_mpath(dev_path)
3302 else:
3303 base_dev = get_partition_base(dev_path)
3304
3305 # Before osd deactivate, we cannot destroy it
3306 status_code = _check_osd_status(args.cluster, osd_id)
3307 if status_code != OSD_STATUS_OUT_DOWN and \
3308 status_code != OSD_STATUS_IN_DOWN:
3309 raise Error("Could not destroy the active osd. (osd-id: %s)" %
3310 osd_id)
3311
3312 # Remove OSD from crush map
3313 _remove_from_crush_map(args.cluster, osd_id)
3314
3315 # Remove OSD cephx key
3316 _delete_osd_auth_key(args.cluster, osd_id)
3317
3318 # Deallocate OSD ID
3319 _deallocate_osd_id(args.cluster, osd_id)
3320
3321 # we remove the crypt map and device mapper (if dmcrypt is True)
3322 if dmcrypt:
3323 for name in Space.NAMES:
3324 if target_dev.get(name + '_uuid'):
3325 dmcrypt_unmap(target_dev[name + '_uuid'])
3326
3327 # Check zap flag. If we found zap flag, we need to find device for
3328 # destroy this osd data.
3329 if args.zap is True:
3330 # erase the osd data
3331 LOG.info("Prepare to zap the device %s" % base_dev)
3332 zap(base_dev)
3333
3334
3335 def get_space_osd_uuid(name, path):
3336 if not os.path.exists(path):
3337 raise Error('%s does not exist' % path)
3338
3339 mode = os.stat(path).st_mode
3340 if not stat.S_ISBLK(mode):
3341 raise Error('%s is not a block device' % path)
3342
3343 if (is_partition(path) and
3344 get_partition_type(path) in (PTYPE['mpath']['journal']['ready'],
3345 PTYPE['mpath']['block']['ready']) and
3346 not is_mpath(path)):
3347 raise Error('%s is not a multipath block device' %
3348 path)
3349
3350 try:
3351 out = _check_output(
3352 args=[
3353 'ceph-osd',
3354 '--get-device-fsid',
3355 path,
3356 ],
3357 close_fds=True,
3358 )
3359 except subprocess.CalledProcessError as e:
3360 raise Error(
3361 'failed to get osd uuid/fsid from %s' % name,
3362 e,
3363 )
3364 value = str(out).split('\n', 1)[0]
3365 LOG.debug('%s %s has OSD UUID %s', name.capitalize(), path, value)
3366 return value
3367
3368
3369 def main_activate_space(name, args):
3370 if not os.path.exists(args.dev):
3371 raise Error('%s does not exist' % args.dev)
3372
3373 cluster = None
3374 osd_id = None
3375 osd_uuid = None
3376 dev = None
3377 activate_lock.acquire() # noqa
3378 try:
3379 if args.dmcrypt:
3380 dev = dmcrypt_map(args.dev, args.dmcrypt_key_dir)
3381 else:
3382 dev = args.dev
3383 # FIXME: For an encrypted journal dev, does this return the
3384 # cyphertext or plaintext dev uuid!? Also, if the journal is
3385 # encrypted, is the data partition also always encrypted, or
3386 # are mixed pairs supported!?
3387 osd_uuid = get_space_osd_uuid(name, dev)
3388 path = os.path.join('/dev/disk/by-partuuid/', osd_uuid.lower())
3389
3390 if is_suppressed(path):
3391 LOG.info('suppressed activate request on %s', path)
3392 return
3393
3394 (cluster, osd_id) = mount_activate(
3395 dev=path,
3396 activate_key_template=args.activate_key_template,
3397 init=args.mark_init,
3398 dmcrypt=args.dmcrypt,
3399 dmcrypt_key_dir=args.dmcrypt_key_dir,
3400 reactivate=args.reactivate,
3401 )
3402
3403 start_daemon(
3404 cluster=cluster,
3405 osd_id=osd_id,
3406 )
3407
3408 finally:
3409 activate_lock.release() # noqa
3410
3411
3412 ###########################
3413
3414
3415 def main_activate_all(args):
3416 dir = '/dev/disk/by-parttypeuuid'
3417 LOG.debug('Scanning %s', dir)
3418 if not os.path.exists(dir):
3419 return
3420 err = False
3421 for name in os.listdir(dir):
3422 if name.find('.') < 0:
3423 continue
3424 (tag, uuid) = name.split('.')
3425
3426 if tag in Ptype.get_ready_by_name('osd'):
3427
3428 if Ptype.is_dmcrypt(tag, 'osd'):
3429 path = os.path.join('/dev/mapper', uuid)
3430 else:
3431 path = os.path.join(dir, name)
3432
3433 if is_suppressed(path):
3434 LOG.info('suppressed activate request on %s', path)
3435 continue
3436
3437 LOG.info('Activating %s', path)
3438 activate_lock.acquire() # noqa
3439 try:
3440 # never map dmcrypt cyphertext devices
3441 (cluster, osd_id) = mount_activate(
3442 dev=path,
3443 activate_key_template=args.activate_key_template,
3444 init=args.mark_init,
3445 dmcrypt=False,
3446 dmcrypt_key_dir='',
3447 )
3448 start_daemon(
3449 cluster=cluster,
3450 osd_id=osd_id,
3451 )
3452
3453 except Exception as e:
3454 print >> sys.stderr, '{prog}: {msg}'.format(
3455 prog=args.prog,
3456 msg=e,
3457 )
3458 err = True
3459
3460 finally:
3461 activate_lock.release() # noqa
3462 if err:
3463 raise Error('One or more partitions failed to activate')
3464
3465
3466 ###########################
3467
3468 def is_swap(dev):
3469 dev = os.path.realpath(dev)
3470 with file('/proc/swaps', 'rb') as proc_swaps:
3471 for line in proc_swaps.readlines()[1:]:
3472 fields = line.split()
3473 if len(fields) < 3:
3474 continue
3475 swaps_dev = fields[0]
3476 if swaps_dev.startswith('/') and os.path.exists(swaps_dev):
3477 swaps_dev = os.path.realpath(swaps_dev)
3478 if swaps_dev == dev:
3479 return True
3480 return False
3481
3482
3483 def get_oneliner(base, name):
3484 path = os.path.join(base, name)
3485 if os.path.isfile(path):
3486 with open(path, 'r') as _file:
3487 return _file.readline().rstrip()
3488 return None
3489
3490
3491 def get_dev_fs(dev):
3492 fscheck, _, _ = command(
3493 [
3494 'blkid',
3495 '-s',
3496 'TYPE',
3497 dev,
3498 ],
3499 )
3500 if 'TYPE' in fscheck:
3501 fstype = fscheck.split()[1].split('"')[1]
3502 return fstype
3503 else:
3504 return None
3505
3506
3507 def split_dev_base_partnum(dev):
3508 if is_mpath(dev):
3509 partnum = partnum_mpath(dev)
3510 base = get_partition_base_mpath(dev)
3511 else:
3512 b = block_path(dev)
3513 partnum = open(os.path.join(b, 'partition')).read().strip()
3514 base = get_partition_base(dev)
3515 return (base, partnum)
3516
3517
3518 def get_partition_type(part):
3519 return get_blkid_partition_info(part, 'ID_PART_ENTRY_TYPE')
3520
3521
3522 def get_partition_uuid(part):
3523 return get_blkid_partition_info(part, 'ID_PART_ENTRY_UUID')
3524
3525
3526 def get_blkid_partition_info(dev, what=None):
3527 out, _, _ = command(
3528 [
3529 'blkid',
3530 '-o',
3531 'udev',
3532 '-p',
3533 dev,
3534 ]
3535 )
3536 p = {}
3537 for line in out.splitlines():
3538 (key, value) = line.split('=')
3539 p[key] = value
3540 if what:
3541 return p.get(what)
3542 else:
3543 return p
3544
3545
3546 def more_osd_info(path, uuid_map, desc):
3547 desc['ceph_fsid'] = get_oneliner(path, 'ceph_fsid')
3548 if desc['ceph_fsid']:
3549 desc['cluster'] = find_cluster_by_uuid(desc['ceph_fsid'])
3550 desc['whoami'] = get_oneliner(path, 'whoami')
3551 for name in Space.NAMES:
3552 uuid = get_oneliner(path, name + '_uuid')
3553 if uuid:
3554 desc[name + '_uuid'] = uuid.lower()
3555 if desc[name + '_uuid'] in uuid_map:
3556 desc[name + '_dev'] = uuid_map[desc[name + '_uuid']]
3557
3558
3559 def list_dev_osd(dev, uuid_map, desc):
3560 desc['mount'] = is_mounted(dev)
3561 desc['fs_type'] = get_dev_fs(dev)
3562 desc['state'] = 'unprepared'
3563 if desc['mount']:
3564 desc['state'] = 'active'
3565 more_osd_info(desc['mount'], uuid_map, desc)
3566 elif desc['fs_type']:
3567 try:
3568 tpath = mount(dev=dev, fstype=desc['fs_type'], options='')
3569 if tpath:
3570 try:
3571 magic = get_oneliner(tpath, 'magic')
3572 if magic is not None:
3573 desc['magic'] = magic
3574 desc['state'] = 'prepared'
3575 more_osd_info(tpath, uuid_map, desc)
3576 finally:
3577 unmount(tpath)
3578 except MountError:
3579 pass
3580
3581
3582 def list_format_more_osd_info_plain(dev):
3583 desc = []
3584 if dev.get('ceph_fsid'):
3585 if dev.get('cluster'):
3586 desc.append('cluster ' + dev['cluster'])
3587 else:
3588 desc.append('unknown cluster ' + dev['ceph_fsid'])
3589 if dev.get('whoami'):
3590 desc.append('osd.%s' % dev['whoami'])
3591 for name in Space.NAMES:
3592 if dev.get(name + '_dev'):
3593 desc.append(name + ' %s' % dev[name + '_dev'])
3594 return desc
3595
3596
3597 def list_format_dev_plain(dev, prefix=''):
3598 desc = []
3599 if dev['ptype'] == PTYPE['regular']['osd']['ready']:
3600 desc = (['ceph data', dev['state']] +
3601 list_format_more_osd_info_plain(dev))
3602 elif Ptype.is_dmcrypt(dev['ptype'], 'osd'):
3603 dmcrypt = dev['dmcrypt']
3604 if not dmcrypt['holders']:
3605 desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
3606 'not currently mapped']
3607 elif len(dmcrypt['holders']) == 1:
3608 holder = get_dev_path(dmcrypt['holders'][0])
3609 desc = ['ceph data (dmcrypt %s %s)' %
3610 (dmcrypt['type'], holder)]
3611 desc += list_format_more_osd_info_plain(dev)
3612 else:
3613 desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
3614 'holders: ' + ','.join(dmcrypt['holders'])]
3615 elif Ptype.is_regular_space(dev['ptype']):
3616 name = Ptype.space_ptype_to_name(dev['ptype'])
3617 desc.append('ceph ' + name)
3618 if dev.get(name + '_for'):
3619 desc.append('for %s' % dev[name + '_for'])
3620 elif Ptype.is_dmcrypt_space(dev['ptype']):
3621 name = Ptype.space_ptype_to_name(dev['ptype'])
3622 dmcrypt = dev['dmcrypt']
3623 if dmcrypt['holders'] and len(dmcrypt['holders']) == 1:
3624 holder = get_dev_path(dmcrypt['holders'][0])
3625 desc = ['ceph ' + name + ' (dmcrypt %s %s)' %
3626 (dmcrypt['type'], holder)]
3627 else:
3628 desc = ['ceph ' + name + ' (dmcrypt %s)' % dmcrypt['type']]
3629 if dev.get(name + '_for'):
3630 desc.append('for %s' % dev[name + '_for'])
3631 else:
3632 desc.append(dev['type'])
3633 if dev.get('fs_type'):
3634 desc.append(dev['fs_type'])
3635 elif dev.get('ptype'):
3636 desc.append(dev['ptype'])
3637 if dev.get('mount'):
3638 desc.append('mounted on %s' % dev['mount'])
3639 return '%s%s %s' % (prefix, dev['path'], ', '.join(desc))
3640
3641
3642 def list_format_plain(devices):
3643 lines = []
3644 for device in devices:
3645 if device.get('partitions'):
3646 lines.append('%s :' % device['path'])
3647 for p in sorted(device['partitions']):
3648 lines.append(list_format_dev_plain(dev=p,
3649 prefix=' '))
3650 else:
3651 lines.append(list_format_dev_plain(dev=device,
3652 prefix=''))
3653 return "\n".join(lines)
3654
3655
3656 def list_dev(dev, uuid_map, space_map):
3657 info = {
3658 'path': dev,
3659 'dmcrypt': {},
3660 }
3661
3662 info['is_partition'] = is_partition(dev)
3663 if info['is_partition']:
3664 ptype = get_partition_type(dev)
3665 info['uuid'] = get_partition_uuid(dev)
3666 else:
3667 ptype = 'unknown'
3668 info['ptype'] = ptype
3669 LOG.info("list_dev(dev = " + dev + ", ptype = " + str(ptype) + ")")
3670 if ptype in (PTYPE['regular']['osd']['ready'],
3671 PTYPE['mpath']['osd']['ready']):
3672 info['type'] = 'data'
3673 if ptype == PTYPE['mpath']['osd']['ready']:
3674 info['multipath'] = True
3675 list_dev_osd(dev, uuid_map, info)
3676 elif ptype == PTYPE['plain']['osd']['ready']:
3677 holders = is_held(dev)
3678 info['type'] = 'data'
3679 info['dmcrypt']['holders'] = holders
3680 info['dmcrypt']['type'] = 'plain'
3681 if len(holders) == 1:
3682 list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
3683 elif ptype == PTYPE['luks']['osd']['ready']:
3684 holders = is_held(dev)
3685 info['type'] = 'data'
3686 info['dmcrypt']['holders'] = holders
3687 info['dmcrypt']['type'] = 'LUKS'
3688 if len(holders) == 1:
3689 list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
3690 elif Ptype.is_regular_space(ptype) or Ptype.is_mpath_space(ptype):
3691 name = Ptype.space_ptype_to_name(ptype)
3692 info['type'] = name
3693 if ptype == PTYPE['mpath'][name]['ready']:
3694 info['multipath'] = True
3695 if info.get('uuid') in space_map:
3696 info[name + '_for'] = space_map[info['uuid']]
3697 elif Ptype.is_plain_space(ptype):
3698 name = Ptype.space_ptype_to_name(ptype)
3699 holders = is_held(dev)
3700 info['type'] = name
3701 info['dmcrypt']['type'] = 'plain'
3702 info['dmcrypt']['holders'] = holders
3703 if info.get('uuid') in space_map:
3704 info[name + '_for'] = space_map[info['uuid']]
3705 elif Ptype.is_luks_space(ptype):
3706 name = Ptype.space_ptype_to_name(ptype)
3707 holders = is_held(dev)
3708 info['type'] = name
3709 info['dmcrypt']['type'] = 'LUKS'
3710 info['dmcrypt']['holders'] = holders
3711 if info.get('uuid') in space_map:
3712 info[name + '_for'] = space_map[info['uuid']]
3713 else:
3714 path = is_mounted(dev)
3715 fs_type = get_dev_fs(dev)
3716 if is_swap(dev):
3717 info['type'] = 'swap'
3718 else:
3719 info['type'] = 'other'
3720 if fs_type:
3721 info['fs_type'] = fs_type
3722 if path:
3723 info['mount'] = path
3724
3725 return info
3726
3727
3728 def list_devices():
3729 partmap = list_all_partitions()
3730
3731 uuid_map = {}
3732 space_map = {}
3733 for base, parts in sorted(partmap.iteritems()):
3734 for p in parts:
3735 dev = get_dev_path(p)
3736 part_uuid = get_partition_uuid(dev)
3737 if part_uuid:
3738 uuid_map[part_uuid] = dev
3739 ptype = get_partition_type(dev)
3740 LOG.debug("main_list: " + dev +
3741 " ptype = " + str(ptype) +
3742 " uuid = " + str(part_uuid))
3743 if ptype in Ptype.get_ready_by_name('osd'):
3744 if Ptype.is_dmcrypt(ptype, 'osd'):
3745 holders = is_held(dev)
3746 if len(holders) != 1:
3747 continue
3748 dev_to_mount = get_dev_path(holders[0])
3749 else:
3750 dev_to_mount = dev
3751
3752 fs_type = get_dev_fs(dev_to_mount)
3753 if fs_type is not None:
3754 try:
3755 tpath = mount(dev=dev_to_mount,
3756 fstype=fs_type, options='')
3757 try:
3758 for name in Space.NAMES:
3759 space_uuid = get_oneliner(tpath,
3760 name + '_uuid')
3761 if space_uuid:
3762 space_map[space_uuid.lower()] = dev
3763 finally:
3764 unmount(tpath)
3765 except MountError:
3766 pass
3767
3768 LOG.debug("main_list: " + str(partmap) + ", uuid_map = " +
3769 str(uuid_map) + ", space_map = " + str(space_map))
3770
3771 devices = []
3772 for base, parts in sorted(partmap.iteritems()):
3773 if parts:
3774 disk = {'path': get_dev_path(base)}
3775 partitions = []
3776 for p in sorted(parts):
3777 partitions.append(list_dev(get_dev_path(p),
3778 uuid_map,
3779 space_map))
3780 disk['partitions'] = partitions
3781 devices.append(disk)
3782 else:
3783 device = list_dev(get_dev_path(base), uuid_map, space_map)
3784 device['path'] = get_dev_path(base)
3785 devices.append(device)
3786 LOG.debug("list_devices: " + str(devices))
3787 return devices
3788
3789
3790 def main_list(args):
3791 devices = list_devices()
3792 if args.path:
3793 paths = []
3794 for path in args.path:
3795 if os.path.exists(path):
3796 paths.append(os.path.realpath(path))
3797 else:
3798 paths.append(path)
3799 selected_devices = []
3800 for device in devices:
3801 for path in paths:
3802 if re.search(path + '$', device['path']):
3803 selected_devices.append(device)
3804 else:
3805 selected_devices = devices
3806 if args.format == 'json':
3807 print json.dumps(selected_devices)
3808 else:
3809 output = list_format_plain(selected_devices)
3810 if output:
3811 print output
3812
3813
3814 ###########################
3815 #
3816 # Mark devices that we want to suppress activates on with a
3817 # file like
3818 #
3819 # /var/lib/ceph/tmp/suppress-activate.sdb
3820 #
3821 # where the last bit is the sanitized device name (/dev/X without the
3822 # /dev/ prefix) and the is_suppress() check matches a prefix. That
3823 # means suppressing sdb will stop activate on sdb1, sdb2, etc.
3824 #
3825
3826 def is_suppressed(path):
3827 disk = os.path.realpath(path)
3828 try:
3829 if (not disk.startswith('/dev/') or
3830 not stat.S_ISBLK(os.lstat(disk).st_mode)):
3831 return False
3832 base = get_dev_name(disk)
3833 while len(base):
3834 if os.path.exists(SUPPRESS_PREFIX + base): # noqa
3835 return True
3836 base = base[:-1]
3837 except:
3838 return False
3839
3840
3841 def set_suppress(path):
3842 disk = os.path.realpath(path)
3843 if not os.path.exists(disk):
3844 raise Error('does not exist', path)
3845 if not stat.S_ISBLK(os.lstat(path).st_mode):
3846 raise Error('not a block device', path)
3847 base = get_dev_name(disk)
3848
3849 with file(SUPPRESS_PREFIX + base, 'w') as f: # noqa
3850 pass
3851 LOG.info('set suppress flag on %s', base)
3852
3853
3854 def unset_suppress(path):
3855 disk = os.path.realpath(path)
3856 if not os.path.exists(disk):
3857 raise Error('does not exist', path)
3858 if not stat.S_ISBLK(os.lstat(path).st_mode):
3859 raise Error('not a block device', path)
3860 assert disk.startswith('/dev/')
3861 base = get_dev_name(disk)
3862
3863 fn = SUPPRESS_PREFIX + base # noqa
3864 if not os.path.exists(fn):
3865 raise Error('not marked as suppressed', path)
3866
3867 try:
3868 os.unlink(fn)
3869 LOG.info('unset suppress flag on %s', base)
3870 except OSError as e:
3871 raise Error('failed to unsuppress', e)
3872
3873
3874 def main_suppress(args):
3875 set_suppress(args.path)
3876
3877
3878 def main_unsuppress(args):
3879 unset_suppress(args.path)
3880
3881
3882 def main_zap(args):
3883 for dev in args.dev:
3884 zap(dev)
3885
3886
3887 def main_trigger(args):
3888 LOG.debug("main_trigger: " + str(args))
3889 if is_systemd() and not args.sync:
3890 # http://www.freedesktop.org/software/systemd/man/systemd-escape.html
3891 escaped_dev = args.dev[1:].replace('-', '\\x2d')
3892 service = 'ceph-disk@{dev}.service'.format(dev=escaped_dev)
3893 LOG.info('systemd detected, triggering %s' % service)
3894 command(
3895 [
3896 'systemctl',
3897 '--no-block',
3898 'restart',
3899 service,
3900 ]
3901 )
3902 return
3903 if is_upstart() and not args.sync:
3904 LOG.info('upstart detected, triggering ceph-disk task')
3905 command(
3906 [
3907 'initctl',
3908 'emit',
3909 'ceph-disk',
3910 'dev={dev}'.format(dev=args.dev),
3911 'pid={pid}'.format(pid=os.getpid()),
3912 ]
3913 )
3914 return
3915
3916 parttype = get_partition_type(args.dev)
3917 partid = get_partition_uuid(args.dev)
3918
3919 LOG.info('trigger {dev} parttype {parttype} uuid {partid}'.format(
3920 dev=args.dev,
3921 parttype=parttype,
3922 partid=partid,
3923 ))
3924
3925 if parttype in (PTYPE['regular']['osd']['ready'],
3926 PTYPE['mpath']['osd']['ready']):
3927 command(
3928 [
3929 'ceph-disk',
3930 'activate',
3931 args.dev,
3932 ]
3933 )
3934 elif parttype in (PTYPE['regular']['journal']['ready'],
3935 PTYPE['mpath']['journal']['ready']):
3936 command(
3937 [
3938 'ceph-disk',
3939 'activate-journal',
3940 args.dev,
3941 ]
3942 )
3943
3944 # journals are easy: map, chown, activate-journal
3945 elif parttype == PTYPE['plain']['journal']['ready']:
3946 command(
3947 [
3948 '/sbin/cryptsetup',
3949 '--key-file',
3950 '/etc/ceph/dmcrypt-keys/{partid}'.format(partid=partid),
3951 '--key-size',
3952 '256',
3953 'create',
3954 partid,
3955 args.dev,
3956 ]
3957 )
3958 newdev = '/dev/mapper/' + partid
3959 count = 0
3960 while not os.path.exists(newdev) and count <= 10:
3961 time.sleep(1)
3962 count += 1
3963 command(
3964 [
3965 '/bin/chown',
3966 'ceph:ceph',
3967 newdev,
3968 ]
3969 )
3970 command(
3971 [
3972 '/usr/sbin/ceph-disk',
3973 'activate-journal',
3974 newdev,
3975 ]
3976 )
3977 elif parttype == PTYPE['luks']['journal']['ready']:
3978 command(
3979 [
3980 '/sbin/cryptsetup',
3981 '--key-file',
3982 '/etc/ceph/dmcrypt-keys/{partid}.luks.key'.format(
3983 partid=partid),
3984 'luksOpen',
3985 args.dev,
3986 partid,
3987 ]
3988 )
3989 newdev = '/dev/mapper/' + partid
3990 count = 0
3991 while not os.path.exists(newdev) and count <= 10:
3992 time.sleep(1)
3993 count += 1
3994 command(
3995 [
3996 '/bin/chown',
3997 'ceph:ceph',
3998 newdev,
3999 ]
4000 )
4001 command(
4002 [
4003 '/usr/sbin/ceph-disk',
4004 'activate-journal',
4005 newdev,
4006 ]
4007 )
4008
4009 elif parttype in (PTYPE['regular']['block']['ready'],
4010 PTYPE['mpath']['block']['ready']):
4011 command(
4012 [
4013 'ceph-disk',
4014 'activate-block',
4015 args.dev,
4016 ]
4017 )
4018
4019 # blocks are easy: map, chown, activate-block
4020 elif parttype == PTYPE['plain']['block']['ready']:
4021 command(
4022 [
4023 '/sbin/cryptsetup',
4024 '--key-file',
4025 '/etc/ceph/dmcrypt-keys/{partid}'.format(partid=partid),
4026 '--key-size',
4027 '256',
4028 'create',
4029 partid,
4030 args.dev,
4031 ]
4032 )
4033 newdev = '/dev/mapper/' + partid
4034 count = 0
4035 while not os.path.exists(newdev) and count <= 10:
4036 time.sleep(1)
4037 count += 1
4038 command(
4039 [
4040 '/bin/chown',
4041 'ceph:ceph',
4042 newdev,
4043 ]
4044 )
4045 command(
4046 [
4047 '/usr/sbin/ceph-disk',
4048 'activate-block',
4049 newdev,
4050 ]
4051 )
4052 elif parttype == PTYPE['luks']['block']['ready']:
4053 command(
4054 [
4055 '/sbin/cryptsetup',
4056 '--key-file',
4057 '/etc/ceph/dmcrypt-keys/{partid}.luks.key'.format(
4058 partid=partid),
4059 'luksOpen',
4060 args.dev,
4061 partid,
4062 ]
4063 )
4064 newdev = '/dev/mapper/' + partid
4065 count = 0
4066 while not os.path.exists(newdev) and count <= 10:
4067 time.sleep(1)
4068 count += 1
4069 command(
4070 [
4071 '/bin/chown',
4072 'ceph:ceph',
4073 newdev,
4074 ]
4075 )
4076 command(
4077 [
4078 '/usr/sbin/ceph-disk',
4079 'activate-block',
4080 newdev,
4081 ]
4082 )
4083
4084 # osd data: map, activate
4085 elif parttype == PTYPE['plain']['osd']['ready']:
4086 command(
4087 [
4088 '/sbin/cryptsetup',
4089 '--key-file',
4090 '/etc/ceph/dmcrypt-keys/{partid}'.format(partid=partid),
4091 '--key-size',
4092 '256',
4093 'create',
4094 partid,
4095 args.dev,
4096 ]
4097 )
4098 newdev = '/dev/mapper/' + partid
4099 count = 0
4100 while not os.path.exists(newdev) and count <= 10:
4101 time.sleep(1)
4102 count += 1
4103 command(
4104 [
4105 '/usr/sbin/ceph-disk',
4106 'activate',
4107 newdev,
4108 ]
4109 )
4110
4111 elif parttype == PTYPE['luks']['osd']['ready']:
4112 command(
4113 [
4114 '/sbin/cryptsetup',
4115 '--key-file',
4116 '/etc/ceph/dmcrypt-keys/{partid}.luks.key'.format(
4117 partid=partid),
4118 'luksOpen',
4119 args.dev,
4120 partid,
4121 ]
4122 )
4123 newdev = '/dev/mapper/' + partid
4124 count = 0
4125 while not os.path.exists(newdev) and count <= 10:
4126 time.sleep(1)
4127 count += 1
4128 command(
4129 [
4130 '/usr/sbin/ceph-disk',
4131 'activate',
4132 newdev,
4133 ]
4134 )
4135
4136 else:
4137 raise Error('unrecognized partition type %s' % parttype)
4138
4139
4140 def setup_statedir(dir):
4141 # XXX The following use of globals makes linting
4142 # really hard. Global state in Python is iffy and
4143 # should be avoided.
4144 global STATEDIR
4145 STATEDIR = dir
4146
4147 if not os.path.exists(STATEDIR):
4148 os.mkdir(STATEDIR)
4149 if not os.path.exists(STATEDIR + "/tmp"):
4150 os.mkdir(STATEDIR + "/tmp")
4151
4152 global prepare_lock
4153 prepare_lock = filelock(STATEDIR + '/tmp/ceph-disk.prepare.lock')
4154
4155 global activate_lock
4156 activate_lock = filelock(STATEDIR + '/tmp/ceph-disk.activate.lock')
4157
4158 global SUPPRESS_PREFIX
4159 SUPPRESS_PREFIX = STATEDIR + '/tmp/suppress-activate.'
4160
4161
4162 def setup_sysconfdir(dir):
4163 global SYSCONFDIR
4164 SYSCONFDIR = dir
4165
4166
4167 def parse_args(argv):
4168 parser = argparse.ArgumentParser(
4169 'ceph-disk',
4170 )
4171 parser.add_argument(
4172 '-v', '--verbose',
4173 action='store_true', default=None,
4174 help='be more verbose',
4175 )
4176 parser.add_argument(
4177 '--log-stdout',
4178 action='store_true', default=None,
4179 help='log to stdout',
4180 )
4181 parser.add_argument(
4182 '--prepend-to-path',
4183 metavar='PATH',
4184 default='/usr/bin',
4185 help=('prepend PATH to $PATH for backward compatibility '
4186 '(default /usr/bin)'),
4187 )
4188 parser.add_argument(
4189 '--statedir',
4190 metavar='PATH',
4191 default='/var/lib/ceph',
4192 help=('directory in which ceph state is preserved '
4193 '(default /var/lib/ceph)'),
4194 )
4195 parser.add_argument(
4196 '--sysconfdir',
4197 metavar='PATH',
4198 default='/etc/ceph',
4199 help=('directory in which ceph configuration files are found '
4200 '(default /etc/ceph)'),
4201 )
4202 parser.add_argument(
4203 '--setuser',
4204 metavar='USER',
4205 default=None,
4206 help='use the given user for subprocesses, rather than ceph or root'
4207 )
4208 parser.add_argument(
4209 '--setgroup',
4210 metavar='GROUP',
4211 default=None,
4212 help='use the given group for subprocesses, rather than ceph or root'
4213 )
4214 parser.set_defaults(
4215 # we want to hold on to this, for later
4216 prog=parser.prog,
4217 )
4218
4219 subparsers = parser.add_subparsers(
4220 title='subcommands',
4221 description='valid subcommands',
4222 help='sub-command help',
4223 )
4224
4225 Prepare.set_subparser(subparsers)
4226 make_activate_parser(subparsers)
4227 make_activate_block_parser(subparsers)
4228 make_activate_journal_parser(subparsers)
4229 make_activate_all_parser(subparsers)
4230 make_list_parser(subparsers)
4231 make_suppress_parser(subparsers)
4232 make_deactivate_parser(subparsers)
4233 make_destroy_parser(subparsers)
4234 make_zap_parser(subparsers)
4235 make_trigger_parser(subparsers)
4236
4237 args = parser.parse_args(argv)
4238 return args
4239
4240
4241 def make_trigger_parser(subparsers):
4242 trigger_parser = subparsers.add_parser(
4243 'trigger',
4244 help='Trigger an event (caled by udev)')
4245 trigger_parser.add_argument(
4246 'dev',
4247 help=('device'),
4248 )
4249 trigger_parser.add_argument(
4250 '--sync',
4251 action='store_true', default=None,
4252 help=('do operation synchronously; do not trigger systemd'),
4253 )
4254 trigger_parser.set_defaults(
4255 func=main_trigger,
4256 )
4257 return trigger_parser
4258
4259
4260 def make_activate_parser(subparsers):
4261 activate_parser = subparsers.add_parser(
4262 'activate',
4263 help='Activate a Ceph OSD')
4264 activate_parser.add_argument(
4265 '--mount',
4266 action='store_true', default=None,
4267 help='mount a block device [deprecated, ignored]',
4268 )
4269 activate_parser.add_argument(
4270 '--activate-key',
4271 metavar='PATH',
4272 help='bootstrap-osd keyring path template (%(default)s)',
4273 dest='activate_key_template',
4274 )
4275 activate_parser.add_argument(
4276 '--mark-init',
4277 metavar='INITSYSTEM',
4278 help='init system to manage this dir',
4279 default='auto',
4280 choices=INIT_SYSTEMS,
4281 )
4282 activate_parser.add_argument(
4283 '--no-start-daemon',
4284 action='store_true', default=None,
4285 help='do not start the daemon',
4286 )
4287 activate_parser.add_argument(
4288 'path',
4289 metavar='PATH',
4290 help='path to block device or directory',
4291 )
4292 activate_parser.add_argument(
4293 '--dmcrypt',
4294 action='store_true', default=None,
4295 help='map DATA and/or JOURNAL devices with dm-crypt',
4296 )
4297 activate_parser.add_argument(
4298 '--dmcrypt-key-dir',
4299 metavar='KEYDIR',
4300 default='/etc/ceph/dmcrypt-keys',
4301 help='directory where dm-crypt keys are stored',
4302 )
4303 activate_parser.add_argument(
4304 '--reactivate',
4305 action='store_true', default=False,
4306 help='activate the deactived OSD',
4307 )
4308 activate_parser.set_defaults(
4309 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
4310 func=main_activate,
4311 )
4312 return activate_parser
4313
4314
4315 def make_activate_block_parser(subparsers):
4316 return make_activate_space_parser('block', subparsers)
4317
4318
4319 def make_activate_journal_parser(subparsers):
4320 return make_activate_space_parser('journal', subparsers)
4321
4322
4323 def make_activate_space_parser(name, subparsers):
4324 activate_space_parser = subparsers.add_parser(
4325 'activate-%s' % name,
4326 help='Activate an OSD via its %s device' % name)
4327 activate_space_parser.add_argument(
4328 'dev',
4329 metavar='DEV',
4330 help='path to %s block device' % name,
4331 )
4332 activate_space_parser.add_argument(
4333 '--activate-key',
4334 metavar='PATH',
4335 help='bootstrap-osd keyring path template (%(default)s)',
4336 dest='activate_key_template',
4337 )
4338 activate_space_parser.add_argument(
4339 '--mark-init',
4340 metavar='INITSYSTEM',
4341 help='init system to manage this dir',
4342 default='auto',
4343 choices=INIT_SYSTEMS,
4344 )
4345 activate_space_parser.add_argument(
4346 '--dmcrypt',
4347 action='store_true', default=None,
4348 help=('map data and/or auxiliariy (journal, etc.) '
4349 'devices with dm-crypt'),
4350 )
4351 activate_space_parser.add_argument(
4352 '--dmcrypt-key-dir',
4353 metavar='KEYDIR',
4354 default='/etc/ceph/dmcrypt-keys',
4355 help='directory where dm-crypt keys are stored',
4356 )
4357 activate_space_parser.add_argument(
4358 '--reactivate',
4359 action='store_true', default=False,
4360 help='activate the deactived OSD',
4361 )
4362 activate_space_parser.set_defaults(
4363 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
4364 func=lambda args: main_activate_space(name, args),
4365 )
4366 return activate_space_parser
4367
4368
4369 def make_activate_all_parser(subparsers):
4370 activate_all_parser = subparsers.add_parser(
4371 'activate-all',
4372 help='Activate all tagged OSD partitions')
4373 activate_all_parser.add_argument(
4374 '--activate-key',
4375 metavar='PATH',
4376 help='bootstrap-osd keyring path template (%(default)s)',
4377 dest='activate_key_template',
4378 )
4379 activate_all_parser.add_argument(
4380 '--mark-init',
4381 metavar='INITSYSTEM',
4382 help='init system to manage this dir',
4383 default='auto',
4384 choices=INIT_SYSTEMS,
4385 )
4386 activate_all_parser.set_defaults(
4387 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
4388 func=main_activate_all,
4389 )
4390 return activate_all_parser
4391
4392
4393 def make_list_parser(subparsers):
4394 list_parser = subparsers.add_parser(
4395 'list',
4396 help='List disks, partitions, and Ceph OSDs')
4397 list_parser.add_argument(
4398 '--format',
4399 help='output format',
4400 default='plain',
4401 choices=['json', 'plain'],
4402 )
4403 list_parser.add_argument(
4404 'path',
4405 metavar='PATH',
4406 nargs='*',
4407 help='path to block devices, relative to /sys/block',
4408 )
4409 list_parser.set_defaults(
4410 func=main_list,
4411 )
4412 return list_parser
4413
4414
4415 def make_suppress_parser(subparsers):
4416 suppress_parser = subparsers.add_parser(
4417 'suppress-activate',
4418 help='Suppress activate on a device (prefix)')
4419 suppress_parser.add_argument(
4420 'path',
4421 metavar='PATH',
4422 help='path to block device or directory',
4423 )
4424 suppress_parser.set_defaults(
4425 func=main_suppress,
4426 )
4427
4428 unsuppress_parser = subparsers.add_parser(
4429 'unsuppress-activate',
4430 help='Stop suppressing activate on a device (prefix)')
4431 unsuppress_parser.add_argument(
4432 'path',
4433 metavar='PATH',
4434 help='path to block device or directory',
4435 )
4436 unsuppress_parser.set_defaults(
4437 func=main_unsuppress,
4438 )
4439 return suppress_parser
4440
4441
4442 def make_deactivate_parser(subparsers):
4443 deactivate_parser = subparsers.add_parser(
4444 'deactivate',
4445 help='Deactivate a Ceph OSD')
4446 deactivate_parser.add_argument(
4447 '--cluster',
4448 metavar='NAME',
4449 default='ceph',
4450 help='cluster name to assign this disk to',
4451 )
4452 deactivate_parser.add_argument(
4453 'path',
4454 metavar='PATH',
4455 nargs='?',
4456 help='path to block device or directory',
4457 )
4458 deactivate_parser.add_argument(
4459 '--deactivate-by-id',
4460 metavar='<id>',
4461 help='ID of OSD to deactive'
4462 )
4463 deactivate_parser.add_argument(
4464 '--mark-out',
4465 action='store_true', default=False,
4466 help='option to mark the osd out',
4467 )
4468 deactivate_parser.set_defaults(
4469 func=main_deactivate,
4470 )
4471
4472
4473 def make_destroy_parser(subparsers):
4474 destroy_parser = subparsers.add_parser(
4475 'destroy',
4476 help='Destroy a Ceph OSD')
4477 destroy_parser.add_argument(
4478 '--cluster',
4479 metavar='NAME',
4480 default='ceph',
4481 help='cluster name to assign this disk to',
4482 )
4483 destroy_parser.add_argument(
4484 'path',
4485 metavar='PATH',
4486 nargs='?',
4487 help='path to block device or directory',
4488 )
4489 destroy_parser.add_argument(
4490 '--destroy-by-id',
4491 metavar='<id>',
4492 help='ID of OSD to destroy'
4493 )
4494 destroy_parser.add_argument(
4495 '--dmcrypt-key-dir',
4496 metavar='KEYDIR',
4497 default='/etc/ceph/dmcrypt-keys',
4498 help=('directory where dm-crypt keys are stored '
4499 '(If you don\'t know how it work, '
4500 'dont use it. we have default value)'),
4501 )
4502 destroy_parser.add_argument(
4503 '--zap',
4504 action='store_true', default=False,
4505 help='option to erase data and partition',
4506 )
4507 destroy_parser.set_defaults(
4508 func=main_destroy,
4509 )
4510
4511
4512 def make_zap_parser(subparsers):
4513 zap_parser = subparsers.add_parser(
4514 'zap',
4515 help='Zap/erase/destroy a device\'s partition table (and contents)')
4516 zap_parser.add_argument(
4517 'dev',
4518 metavar='DEV',
4519 nargs='+',
4520 help='path to block device',
4521 )
4522 zap_parser.set_defaults(
4523 func=main_zap,
4524 )
4525 return zap_parser
4526
4527
4528 def main(argv):
4529 args = parse_args(argv)
4530
4531 setup_logging(args.verbose, args.log_stdout)
4532
4533 if args.prepend_to_path != '':
4534 path = os.environ.get('PATH', os.defpath)
4535 os.environ['PATH'] = args.prepend_to_path + ":" + path
4536
4537 setup_statedir(args.statedir)
4538 setup_sysconfdir(args.sysconfdir)
4539
4540 global CEPH_PREF_USER
4541 CEPH_PREF_USER = args.setuser
4542 global CEPH_PREF_GROUP
4543 CEPH_PREF_GROUP = args.setgroup
4544
4545 if args.verbose:
4546 args.func(args)
4547 else:
4548 main_catch(args.func, args)
4549
4550
4551 def setup_logging(verbose, log_stdout):
4552 loglevel = logging.WARNING
4553 if verbose:
4554 loglevel = logging.DEBUG
4555
4556 if log_stdout:
4557 ch = logging.StreamHandler(stream=sys.stdout)
4558 ch.setLevel(loglevel)
4559 formatter = logging.Formatter('%(filename)s: %(message)s')
4560 ch.setFormatter(formatter)
4561 LOG.addHandler(ch)
4562 LOG.setLevel(loglevel)
4563 else:
4564 logging.basicConfig(
4565 level=loglevel,
4566 )
4567
4568
4569 def main_catch(func, args):
4570
4571 try:
4572 func(args)
4573
4574 except Error as e:
4575 raise SystemExit(
4576 '{prog}: {msg}'.format(
4577 prog=args.prog,
4578 msg=e,
4579 )
4580 )
4581
4582 except CephDiskException as error:
4583 exc_name = error.__class__.__name__
4584 raise SystemExit(
4585 '{prog} {exc_name}: {msg}'.format(
4586 prog=args.prog,
4587 exc_name=exc_name,
4588 msg=error,
4589 )
4590 )
4591
4592
4593 def run():
4594 main(sys.argv[1:])
4595
4596 if __name__ == '__main__':
4597 main(sys.argv[1:])
4598 warned_about = {}