3 # Copyright (C) 2015 Red Hat <contact@redhat.com>
4 # Copyright (C) 2014 Inktank <info@inktank.com>
5 # Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
6 # Copyright (C) 2014 Catalyst.net Ltd
8 # Author: Loic Dachary <loic@dachary.org>
10 # This program is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU Library Public License as published by
12 # the Free Software Foundation; either version 2, or (at your option)
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU Library Public License for more details.
20 # THIS IS ceph-disk AS OF dc5a9053ce69c0630091774f16ce421da67d26fb v10.0.3-2247-gdc5a905
21 # PRIOR TO THE INTRODUCTION OF THE LOCKBOX VOLUME TO STORE KEY FETCHING
42 CEPH_OSD_ONDISK_MAGIC
= 'ceph osd volume v026'
47 # identical because creating a journal is atomic
48 'ready': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
49 'tobe': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
52 # identical because creating a block is atomic
53 'ready': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
54 'tobe': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
57 'ready': '4fbd7e29-9d25-41b8-afd0-062c0ceff05d',
58 'tobe': '89c57f98-2fe5-4dc0-89c1-f3ad0ceff2be',
63 'ready': '45b0969e-9b03-4f30-b4c6-35865ceff106',
64 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
67 'ready': 'cafecafe-9b03-4f30-b4c6-35865ceff106',
68 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
71 'ready': '4fbd7e29-9d25-41b8-afd0-35865ceff05d',
72 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
77 'ready': '45b0969e-9b03-4f30-b4c6-5ec00ceff106',
78 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
81 'ready': 'cafecafe-9b03-4f30-b4c6-5ec00ceff106',
82 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
85 'ready': '4fbd7e29-9d25-41b8-afd0-5ec00ceff05d',
86 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
91 'ready': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
92 'tobe': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
95 'ready': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
96 'tobe': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
99 'ready': '4fbd7e29-8ae0-4982-bf9d-5a8d867af560',
100 'tobe': '89c57f98-8ae0-4982-bf9d-5a8d867af560',
109 def get_ready_by_type(what
):
110 return [x
['ready'] for x
in PTYPE
[what
].values()]
113 def get_ready_by_name(name
):
114 return [x
[name
]['ready'] for x
in PTYPE
.values()]
117 def is_regular_space(ptype
):
118 return Ptype
.is_what_space('regular', ptype
)
121 def is_mpath_space(ptype
):
122 return Ptype
.is_what_space('mpath', ptype
)
125 def is_plain_space(ptype
):
126 return Ptype
.is_what_space('plain', ptype
)
129 def is_luks_space(ptype
):
130 return Ptype
.is_what_space('luks', ptype
)
133 def is_what_space(what
, ptype
):
134 for name
in Space
.NAMES
:
135 if ptype
== PTYPE
[what
][name
]['ready']:
140 def space_ptype_to_name(ptype
):
141 for what
in PTYPE
.values():
142 for name
in Space
.NAMES
:
143 if ptype
== what
[name
]['ready']:
145 raise ValueError('ptype ' + ptype
+ ' not found')
148 def is_dmcrypt_space(ptype
):
149 for name
in Space
.NAMES
:
150 if Ptype
.is_dmcrypt(ptype
, name
):
155 def is_dmcrypt(ptype
, name
):
156 for what
in ('plain', 'luks'):
157 if ptype
== PTYPE
[what
][name
]['ready']:
161 DEFAULT_FS_TYPE
= 'xfs'
165 OSD STATUS Definition
167 OSD_STATUS_OUT_DOWN
= 0
168 OSD_STATUS_OUT_UP
= 1
169 OSD_STATUS_IN_DOWN
= 2
172 MOUNT_OPTIONS
= dict(
173 btrfs
='noatime,user_subvol_rm_allowed',
174 # user_xattr is default ever since linux 2.6.39 / 3.0, but we'll
175 # delay a moment before removing it fully because we did have some
176 # issues with ext4 before the xatts-in-leveldb work, and it seemed
177 # that user_xattr helped
178 ext4
='noatime,user_xattr',
179 xfs
='noatime,inode64',
184 # btrfs requires -f, for the same reason as xfs (see comment below)
191 # xfs insists on not overwriting previous fs; even if we wipe
192 # partition table, we often recreate it exactly the same way,
193 # so we'll see ghosts of filesystems past
207 STATEDIR
= '/var/lib/ceph'
209 SYSCONFDIR
= '/etc/ceph'
213 SUPPRESS_PREFIX
= None
215 # only warn once about some things
218 # Nuke the TERM variable to avoid confusing any subprocesses we call.
219 # For example, libreadline will print weird control sequences for some
221 if 'TERM' in os
.environ
:
222 del os
.environ
['TERM']
225 if LOG_NAME
== '__main__':
226 LOG_NAME
= os
.path
.basename(sys
.argv
[0])
227 LOG
= logging
.getLogger(LOG_NAME
)
229 # Allow user-preferred values for subprocess user and group
230 CEPH_PREF_USER
= None
231 CEPH_PREF_GROUP
= None
234 class filelock(object):
235 def __init__(self
, fn
):
241 self
.fd
= file(self
.fn
, 'w')
242 fcntl
.lockf(self
.fd
, fcntl
.LOCK_EX
)
246 fcntl
.lockf(self
.fd
, fcntl
.LOCK_UN
)
250 class Error(Exception):
256 doc
= self
.__doc
__.strip()
257 return ': '.join([doc
] + [str(a
) for a
in self
.args
])
260 class MountError(Error
):
262 Mounting filesystem failed
266 class UnmountError(Error
):
268 Unmounting filesystem failed
272 class BadMagicError(Error
):
274 Does not look like a Ceph OSD, or incompatible version
278 class TruncatedLineError(Error
):
284 class TooManyLinesError(Error
):
290 class FilesystemTypeError(Error
):
292 Cannot discover filesystem type
296 class CephDiskException(Exception):
298 A base exception for ceph-disk to provide custom (ad-hoc) messages that
299 will be caught and dealt with when main() is executed
304 class ExecutableNotFound(CephDiskException
):
306 Exception to report on executables not available in PATH
313 Detect whether systemd is running
315 with
file('/proc/1/comm', 'rb') as i
:
317 if 'systemd' in line
:
324 Detect whether upstart is running
326 (out
, err
, _
) = command(['init', '--version'])
332 def maybe_mkdir(*a
, **kw
):
334 Creates a new directory if it doesn't exist, removes
335 existing symlink before creating the directory.
337 # remove any symlink, if it is there..
338 if os
.path
.exists(*a
) and stat
.S_ISLNK(os
.lstat(*a
).st_mode
):
339 LOG
.debug('Removing old symlink at %s', *a
)
344 if e
.errno
== errno
.EEXIST
:
350 def which(executable
):
351 """find the location of an executable"""
352 if 'PATH' in os
.environ
:
353 envpath
= os
.environ
['PATH']
356 PATH
= envpath
.split(os
.pathsep
)
367 for location
in locations
:
368 executable_path
= os
.path
.join(location
, executable
)
369 if (os
.path
.isfile(executable_path
) and
370 os
.access(executable_path
, os
.X_OK
)):
371 return executable_path
374 def _get_command_executable(arguments
):
376 Return the full path for an executable, raise if the executable is not
377 found. If the executable has already a full path do not perform any checks.
379 if arguments
[0].startswith('/'): # an absolute path
381 executable
= which(arguments
[0])
383 command_msg
= 'Could not run command: %s' % ' '.join(arguments
)
384 executable_msg
= '%s not in path.' % arguments
[0]
385 raise ExecutableNotFound('%s %s' % (executable_msg
, command_msg
))
387 # swap the old executable for the new one
388 arguments
[0] = executable
392 def command(arguments
, **kwargs
):
394 Safely execute a ``subprocess.Popen`` call making sure that the
395 executable exists and raising a helpful error message
398 .. note:: This should be the preferred way of calling ``subprocess.Popen``
399 since it provides the caller with the safety net of making sure that
400 executables *will* be found and will error nicely otherwise.
402 This returns the output of the command and the return code of the
403 process in a tuple: (output, returncode).
405 arguments
= _get_command_executable(arguments
)
406 LOG
.info('Running command: %s' % ' '.join(arguments
))
407 process
= subprocess
.Popen(
409 stdout
=subprocess
.PIPE
,
410 stderr
=subprocess
.PIPE
,
412 out
, err
= process
.communicate()
413 return out
, err
, process
.returncode
416 def command_check_call(arguments
):
418 Safely execute a ``subprocess.check_call`` call making sure that the
419 executable exists and raising a helpful error message if it does not.
421 .. note:: This should be the preferred way of calling
422 ``subprocess.check_call`` since it provides the caller with the safety net
423 of making sure that executables *will* be found and will error nicely
426 arguments
= _get_command_executable(arguments
)
427 LOG
.info('Running command: %s', ' '.join(arguments
))
428 return subprocess
.check_call(arguments
)
431 def platform_distro():
433 Returns a normalized, lower case string without any leading nor trailing
434 whitespace that represents the distribution name of the current machine.
436 distro
= platform_information()[0] or ''
437 return distro
.strip().lower()
440 def platform_information():
441 distro
, release
, codename
= platform
.linux_distribution()
442 # this could be an empty string in Debian
443 if not codename
and 'debian' in distro
.lower():
449 major_version
= release
.split('.')[0]
450 codename
= debian_codenames
.get(major_version
, '')
452 # In order to support newer jessie/sid or wheezy/sid strings we test
453 # this if sid is buried in the minor, we should use sid anyway.
454 if not codename
and '/' in release
:
455 major
, minor
= release
.split('/')
463 str(release
).strip(),
464 str(codename
).strip()
468 # An alternative block_path implementation would be
470 # name = basename(dev)
471 # return /sys/devices/virtual/block/$name
473 # It is however more fragile because it relies on the fact
474 # that the basename of the device the user will use always
475 # matches the one the driver will use. On Ubuntu 14.04, for
476 # instance, when multipath creates a partition table on
478 # /dev/mapper/353333330000007d0 -> ../dm-0
480 # it will create partition devices named
482 # /dev/mapper/353333330000007d0-part1
484 # which is the same device as /dev/dm-1 but not a symbolic
487 # ubuntu@other:~$ ls -l /dev/mapper /dev/dm-1
488 # brw-rw---- 1 root disk 252, 1 Aug 15 17:52 /dev/dm-1
489 # lrwxrwxrwx 1 root root 7 Aug 15 17:52 353333330000007d0 -> ../dm-0
490 # brw-rw---- 1 root disk 252, 1 Aug 15 17:52 353333330000007d0-part1
492 # Using the basename in this case fails.
497 path
= os
.path
.realpath(dev
)
498 rdev
= os
.stat(path
).st_rdev
499 (M
, m
) = (os
.major(rdev
), os
.minor(rdev
))
500 return "{sysfs}/dev/block/{M}:{m}".format(sysfs
=SYSFS
, M
=M
, m
=m
)
503 def get_dm_uuid(dev
):
504 uuid_path
= os
.path
.join(block_path(dev
), 'dm', 'uuid')
505 LOG
.debug("get_dm_uuid " + dev
+ " uuid path is " + uuid_path
)
506 if not os
.path
.exists(uuid_path
):
508 uuid
= open(uuid_path
, 'r').read()
509 LOG
.debug("get_dm_uuid " + dev
+ " uuid is " + uuid
)
515 True if the path is managed by multipath
517 uuid
= get_dm_uuid(dev
)
519 (re
.match('part\d+-mpath-', uuid
) or
520 re
.match('mpath-', uuid
)))
523 def get_dev_name(path
):
525 get device name from path. e.g.::
527 /dev/sda -> sdas, /dev/cciss/c0d1 -> cciss!c0d1
529 a device "name" is something like::
535 assert path
.startswith('/dev/')
537 return base
.replace('/', '!')
540 def get_dev_path(name
):
542 get a path (/dev/...) from a name (cciss!c0d1)
543 a device "path" is something like::
549 return '/dev/' + name
.replace('!', '/')
552 def get_dev_relpath(name
):
554 get a relative path to /dev from a name (cciss!c0d1)
556 return name
.replace('!', '/')
559 def get_dev_size(dev
, size
='megabytes'):
561 Attempt to get the size of a device so that we can prevent errors
562 from actions to devices that are smaller, and improve error reporting.
564 Because we want to avoid breakage in case this approach is not robust, we
565 will issue a warning if we failed to get the size.
567 :param size: bytes or megabytes
568 :param dev: the device to calculate the size
570 fd
= os
.open(dev
, os
.O_RDONLY
)
571 dividers
= {'bytes': 1, 'megabytes': 1024 * 1024}
573 device_size
= os
.lseek(fd
, 0, os
.SEEK_END
)
574 divider
= dividers
.get(size
, 1024 * 1024) # default to megabytes
575 return device_size
/ divider
576 except Exception as error
:
577 LOG
.warning('failed to get size of %s: %s' % (dev
, str(error
)))
582 def get_partition_mpath(dev
, pnum
):
583 part_re
= "part{pnum}-mpath-".format(pnum
=pnum
)
584 partitions
= list_partitions_mpath(dev
, part_re
)
591 def get_partition_dev(dev
, pnum
):
593 get the device name for a partition
595 assume that partitions are named like the base dev,
596 with a number, and optionally
597 some intervening characters (like 'p'). e.g.,
600 cciss/c0d1 1 -> cciss!c0d1p1
604 partname
= get_partition_mpath(dev
, pnum
)
606 name
= get_dev_name(os
.path
.realpath(dev
))
607 for f
in os
.listdir(os
.path
.join('/sys/block', name
)):
608 if f
.startswith(name
) and f
.endswith(str(pnum
)):
609 # we want the shortest name that starts with the base name
610 # and ends with the partition number
611 if not partname
or len(f
) < len(partname
):
614 return get_dev_path(partname
)
616 raise Error('partition %d for %s does not appear to exist' %
620 def list_all_partitions():
622 Return a list of devices and partitions
624 names
= os
.listdir('/sys/block')
627 # /dev/fd0 may hang http://tracker.ceph.com/issues/6827
628 if re
.match(r
'^fd\d$', name
):
630 dev_part_list
[name
] = list_partitions(get_dev_path(name
))
634 def list_partitions(dev
):
635 dev
= os
.path
.realpath(dev
)
637 return list_partitions_mpath(dev
)
639 return list_partitions_device(dev
)
642 def list_partitions_mpath(dev
, part_re
="part\d+-mpath-"):
645 holders
= os
.path
.join(p
, 'holders')
646 for holder
in os
.listdir(holders
):
647 uuid_path
= os
.path
.join(holders
, holder
, 'dm', 'uuid')
648 uuid
= open(uuid_path
, 'r').read()
649 LOG
.debug("list_partitions_mpath: " + uuid_path
+ " uuid = " + uuid
)
650 if re
.match(part_re
, uuid
):
651 partitions
.append(holder
)
655 def list_partitions_device(dev
):
657 Return a list of partitions on the given device name
660 basename
= get_dev_name(dev
)
661 for name
in os
.listdir(block_path(dev
)):
662 if name
.startswith(basename
):
663 partitions
.append(name
)
667 def get_partition_base(dev
):
669 Get the base device for a partition
671 dev
= os
.path
.realpath(dev
)
672 if not stat
.S_ISBLK(os
.lstat(dev
).st_mode
):
673 raise Error('not a block device', dev
)
675 name
= get_dev_name(dev
)
676 if os
.path
.exists(os
.path
.join('/sys/block', name
)):
677 raise Error('not a partition', dev
)
680 for basename
in os
.listdir('/sys/block'):
681 if os
.path
.exists(os
.path
.join('/sys/block', basename
, name
)):
682 return get_dev_path(basename
)
683 raise Error('no parent device for partition', dev
)
686 def is_partition_mpath(dev
):
687 uuid
= get_dm_uuid(dev
)
688 return bool(re
.match('part\d+-mpath-', uuid
))
691 def partnum_mpath(dev
):
692 uuid
= get_dm_uuid(dev
)
693 return re
.findall('part(\d+)-mpath-', uuid
)[0]
696 def get_partition_base_mpath(dev
):
697 slave_path
= os
.path
.join(block_path(dev
), 'slaves')
698 slaves
= os
.listdir(slave_path
)
700 name_path
= os
.path
.join(slave_path
, slaves
[0], 'dm', 'name')
701 name
= open(name_path
, 'r').read().strip()
702 return os
.path
.join('/dev/mapper', name
)
705 def is_partition(dev
):
707 Check whether a given device path is a partition or a full disk.
710 return is_partition_mpath(dev
)
712 dev
= os
.path
.realpath(dev
)
714 if not stat
.S_ISBLK(st
.st_mode
):
715 raise Error('not a block device', dev
)
717 name
= get_dev_name(dev
)
718 if os
.path
.exists(os
.path
.join('/sys/block', name
)):
721 # make sure it is a partition of something else
722 major
= os
.major(st
.st_rdev
)
723 minor
= os
.minor(st
.st_rdev
)
724 if os
.path
.exists('/sys/dev/block/%d:%d/partition' % (major
, minor
)):
727 raise Error('not a disk or partition', dev
)
732 Check if the given device is mounted.
734 dev
= os
.path
.realpath(dev
)
735 with
file('/proc/mounts', 'rb') as proc_mounts
:
736 for line
in proc_mounts
:
737 fields
= line
.split()
740 mounts_dev
= fields
[0]
742 if mounts_dev
.startswith('/') and os
.path
.exists(mounts_dev
):
743 mounts_dev
= os
.path
.realpath(mounts_dev
)
744 if mounts_dev
== dev
:
751 Check if a device is held by another device (e.g., a dm-crypt mapping)
753 assert os
.path
.exists(dev
)
757 dev
= os
.path
.realpath(dev
)
758 base
= get_dev_name(dev
)
761 directory
= '/sys/block/{base}/holders'.format(base
=base
)
762 if os
.path
.exists(directory
):
763 return os
.listdir(directory
)
768 directory
= '/sys/block/{base}/{part}/holders'.format(
769 part
=part
, base
=base
)
770 if os
.path
.exists(directory
):
771 return os
.listdir(directory
)
776 def verify_not_in_use(dev
, check_partitions
=False):
778 Verify if a given device (path) is in use (e.g. mounted or
779 in use by device-mapper).
781 :raises: Error if device is in use.
783 assert os
.path
.exists(dev
)
785 raise Error('Device is mounted', dev
)
786 holders
= is_held(dev
)
788 raise Error('Device %s is in use by a device-mapper '
789 'mapping (dm-crypt?)' % dev
, ','.join(holders
))
791 if check_partitions
and not is_partition(dev
):
792 for partname
in list_partitions(dev
):
793 partition
= get_dev_path(partname
)
794 if is_mounted(partition
):
795 raise Error('Device is mounted', partition
)
796 holders
= is_held(partition
)
798 raise Error('Device %s is in use by a device-mapper '
799 'mapping (dm-crypt?)'
800 % partition
, ','.join(holders
))
803 def must_be_one_line(line
):
805 Checks if given line is really one single line.
807 :raises: TruncatedLineError or TooManyLinesError
808 :return: Content of the line, or None if line isn't valid.
810 if line
[-1:] != '\n':
811 raise TruncatedLineError(line
)
814 raise TooManyLinesError(line
)
818 def read_one_line(parent
, name
):
820 Read a file whose sole contents are a single line.
824 :return: Contents of the line, or None if file did not exist.
826 path
= os
.path
.join(parent
, name
)
828 line
= file(path
, 'rb').read()
830 if e
.errno
== errno
.ENOENT
:
836 line
= must_be_one_line(line
)
837 except (TruncatedLineError
, TooManyLinesError
) as e
:
839 'File is corrupt: {path}: {msg}'.format(
847 def write_one_line(parent
, name
, text
):
849 Write a file whose sole contents are a single line.
853 path
= os
.path
.join(parent
, name
)
854 tmp
= '{path}.{pid}.tmp'.format(path
=path
, pid
=os
.getpid())
855 with
file(tmp
, 'wb') as tmp_file
:
856 tmp_file
.write(text
+ '\n')
857 os
.fsync(tmp_file
.fileno())
858 path_set_context(tmp
)
864 Get a init system using 'ceph-detect-init'
866 init
= _check_output(
869 '--default', 'sysvinit',
872 init
= must_be_one_line(init
)
876 def check_osd_magic(path
):
878 Check that this path has the Ceph OSD magic.
880 :raises: BadMagicError if this does not look like a Ceph OSD data
883 magic
= read_one_line(path
, 'magic')
885 # probably not mkfs'ed yet
886 raise BadMagicError(path
)
887 if magic
!= CEPH_OSD_ONDISK_MAGIC
:
888 raise BadMagicError(path
)
891 def check_osd_id(osd_id
):
893 Ensures osd id is numeric.
895 if not re
.match(r
'^[0-9]+$', osd_id
):
896 raise Error('osd id is not numeric', osd_id
)
905 Accocates an OSD id on the given cluster.
907 :raises: Error if the call to allocate the OSD id fails.
908 :return: The allocated OSD id.
911 LOG
.debug('Allocating OSD id...')
913 osd_id
= _check_output(
916 '--cluster', cluster
,
917 '--name', 'client.bootstrap-osd',
918 '--keyring', keyring
,
919 'osd', 'create', '--concise',
923 except subprocess
.CalledProcessError
as e
:
924 raise Error('ceph osd create failed', e
, e
.output
)
925 osd_id
= must_be_one_line(osd_id
)
930 def get_osd_id(path
):
932 Gets the OSD id of the OSD at the given path.
934 osd_id
= read_one_line(path
, 'whoami')
935 if osd_id
is not None:
941 global CEPH_PREF_USER
943 if CEPH_PREF_USER
is not None:
945 pwd
.getpwnam(CEPH_PREF_USER
)
946 return CEPH_PREF_USER
948 print "No such user: " + CEPH_PREF_USER
958 def get_ceph_group():
959 global CEPH_PREF_GROUP
961 if CEPH_PREF_GROUP
is not None:
963 grp
.getgrnam(CEPH_PREF_GROUP
)
964 return CEPH_PREF_GROUP
966 print "No such group: " + CEPH_PREF_GROUP
976 def path_set_context(path
):
977 # restore selinux context to default policy values
978 if which('restorecon'):
979 command(['restorecon', '-R', path
])
981 # if ceph user exists, set owner to ceph
982 if get_ceph_user() == 'ceph':
983 command(['chown', '-R', 'ceph:ceph', path
])
986 def _check_output(args
=None, **kwargs
):
987 out
, err
, ret
= command(args
, **kwargs
)
990 error
= subprocess
.CalledProcessError(ret
, cmd
)
991 error
.output
= out
+ err
996 def get_conf(cluster
, variable
):
998 Get the value of the given configuration variable from the
1001 :raises: Error if call to ceph-conf fails.
1002 :return: The variable value or None.
1005 out
, err
, ret
= command(
1008 '--cluster={cluster}'.format(
1017 except OSError as e
:
1018 raise Error('error executing ceph-conf', e
, err
)
1020 # config entry not found
1023 raise Error('getting variable from configuration failed')
1024 value
= out
.split('\n', 1)[0]
1025 # don't differentiate between "var=" and no var set
1031 def get_conf_with_default(cluster
, variable
):
1033 Get a config value that is known to the C++ code.
1035 This will fail if called on variables that are not defined in
1036 common config options.
1039 out
= _check_output(
1042 '--cluster={cluster}'.format(
1045 '--show-config-value={variable}'.format(
1051 except subprocess
.CalledProcessError
as e
:
1053 'getting variable from configuration failed',
1057 value
= str(out
).split('\n', 1)[0]
1061 def get_fsid(cluster
):
1063 Get the fsid of the cluster.
1065 :return: The fsid or raises Error.
1067 fsid
= get_conf_with_default(cluster
=cluster
, variable
='fsid')
1069 raise Error('getting cluster uuid from configuration failed')
1073 def get_dmcrypt_key_path(
1079 Get path to dmcrypt key file.
1081 :return: Path to the dmcrypt key file, callers should check for existence.
1084 path
= os
.path
.join(key_dir
, _uuid
+ ".luks.key")
1086 path
= os
.path
.join(key_dir
, _uuid
)
1091 def get_or_create_dmcrypt_key(
1098 Get path to existing dmcrypt key or create a new key file.
1100 :return: Path to the dmcrypt key file.
1102 path
= get_dmcrypt_key_path(_uuid
, key_dir
, luks
)
1103 if os
.path
.exists(path
):
1108 if not os
.path
.exists(key_dir
):
1109 os
.makedirs(key_dir
, stat
.S_IRUSR | stat
.S_IWUSR | stat
.S_IXUSR
)
1110 with
file('/dev/urandom', 'rb') as i
:
1111 key
= i
.read(key_size
/ 8)
1112 fd
= os
.open(path
, os
.O_WRONLY | os
.O_CREAT
,
1113 stat
.S_IRUSR | stat
.S_IWUSR
)
1114 assert os
.write(fd
, key
) == len(key
)
1118 raise Error('unable to read or create dm-crypt key', path
)
1125 cryptsetup_parameters
,
1130 Maps a device to a dmcrypt device.
1132 :return: Path to the dmcrypt device.
1134 dev
= '/dev/mapper/' + _uuid
1142 ] + cryptsetup_parameters
1160 ] + cryptsetup_parameters
1165 command_check_call(luksFormat_args
)
1166 command_check_call(luksOpen_args
)
1168 # Plain mode has no format function, nor any validation
1169 # that the key is correct.
1170 command_check_call(create_args
)
1171 # set proper ownership of mapped device
1172 command_check_call(['chown', 'ceph:ceph', dev
])
1175 except subprocess
.CalledProcessError
as e
:
1176 raise Error('unable to map device', rawdev
, e
)
1183 Removes the dmcrypt device with the given UUID.
1188 command_check_call(['cryptsetup', 'remove', _uuid
])
1190 except subprocess
.CalledProcessError
as e
:
1192 raise Error('unable to unmap device', _uuid
, e
)
1194 time
.sleep(0.5 + retries
* 1.0)
1204 Mounts a device with given filessystem type and
1205 mount options to a tempfile path under /var/lib/ceph/tmp.
1207 # sanity check: none of the arguments are None
1209 raise ValueError('dev may not be None')
1211 raise ValueError('fstype may not be None')
1213 # pick best-of-breed mount options based on fs type
1215 options
= MOUNT_OPTIONS
.get(fstype
, '')
1218 path
= tempfile
.mkdtemp(
1220 dir=STATEDIR
+ '/tmp',
1223 LOG
.debug('Mounting %s on %s with options %s', dev
, path
, options
)
1234 if which('restorecon'):
1241 except subprocess
.CalledProcessError
as e
:
1244 except (OSError, IOError):
1255 Unmount and removes the given mount point.
1260 LOG
.debug('Unmounting %s', path
)
1269 except subprocess
.CalledProcessError
as e
:
1270 # on failure, retry 3 times with incremental backoff
1272 raise UnmountError(e
)
1274 time
.sleep(0.5 + retries
* 1.0)
1280 ###########################################
1282 def extract_parted_partition_numbers(partitions
):
1283 numbers_as_strings
= re
.findall('^\d+', partitions
, re
.MULTILINE
)
1284 return map(int, numbers_as_strings
)
1287 def get_free_partition_index(dev
):
1289 Get the next free partition index on a given device.
1291 :return: Index number (> 1 if there is already a partition on the device)
1292 or 1 if there is no partition table.
1295 lines
= _check_output(
1304 except subprocess
.CalledProcessError
as e
:
1305 LOG
.info('cannot read partition index; assume it '
1306 'isn\'t present\n (Error: %s)' % e
)
1310 raise Error('parted failed to output anything')
1311 LOG
.debug('get_free_partition_index: analyzing ' + lines
)
1312 if ('CHS;' not in lines
and
1313 'CYL;' not in lines
and
1314 'BYT;' not in lines
):
1315 raise Error('parted output expected to contain one of ' +
1316 'CHH; CYL; or BYT; : ' + lines
)
1317 if os
.path
.realpath(dev
) not in lines
:
1318 raise Error('parted output expected to contain ' + dev
+ ': ' + lines
)
1319 _
, partitions
= lines
.split(os
.path
.realpath(dev
))
1320 partition_numbers
= extract_parted_partition_numbers(partitions
)
1321 if partition_numbers
:
1322 return max(partition_numbers
) + 1
1327 def check_journal_reqs(args
):
1328 _
, _
, allows_journal
= command([
1329 'ceph-osd', '--check-allows-journal',
1331 '--cluster', args
.cluster
,
1333 _
, _
, wants_journal
= command([
1334 'ceph-osd', '--check-wants-journal',
1336 '--cluster', args
.cluster
,
1338 _
, _
, needs_journal
= command([
1339 'ceph-osd', '--check-needs-journal',
1341 '--cluster', args
.cluster
,
1343 return (not allows_journal
, not wants_journal
, not needs_journal
)
1346 def update_partition(dev
, description
):
1348 Must be called after modifying a partition table so the kernel
1349 know about the change and fire udev events accordingly. A side
1350 effect of partprobe is to remove partitions and add them again.
1351 The first udevadm settle waits for ongoing udev events to
1352 complete, just in case one of them rely on an existing partition
1353 on dev. The second udevadm settle guarantees to the caller that
1354 all udev events related to the partition table change have been
1355 processed, i.e. the 95-ceph-osd.rules actions and mode changes,
1356 group changes etc. are complete.
1358 LOG
.debug('Calling partprobe on %s device %s', description
, dev
)
1359 partprobe_ok
= False
1360 error
= 'unknown error'
1361 for i
in (1, 2, 3, 4, 5):
1362 command_check_call(['udevadm', 'settle', '--timeout=600'])
1364 _check_output(['partprobe', dev
])
1367 except subprocess
.CalledProcessError
as e
:
1369 if ('unable to inform the kernel' not in error
and
1370 'Device or resource busy' not in error
):
1372 LOG
.debug('partprobe %s failed : %s (ignored, waiting 60s)'
1375 if not partprobe_ok
:
1376 raise Error('partprobe %s failed : %s' % (dev
, error
))
1377 command_check_call(['udevadm', 'settle', '--timeout=600'])
1382 Destroy the partition table and content of a given disk.
1384 dev
= os
.path
.realpath(dev
)
1385 dmode
= os
.stat(dev
).st_mode
1386 if not stat
.S_ISBLK(dmode
) or is_partition(dev
):
1387 raise Error('not full block device; cannot zap', dev
)
1389 LOG
.debug('Zapping partition table on %s', dev
)
1391 # try to wipe out any GPT partition table backups. sgdisk
1392 # isn't too thorough.
1394 size
= 33 * lba_size
1395 with
file(dev
, 'wb') as dev_file
:
1396 dev_file
.seek(-size
, os
.SEEK_END
)
1397 dev_file
.write(size
* '\0')
1417 update_partition(dev
, 'zapped')
1419 except subprocess
.CalledProcessError
as e
:
1423 def adjust_symlink(target
, path
):
1425 if os
.path
.lexists(path
):
1427 mode
= os
.lstat(path
).st_mode
1428 if stat
.S_ISREG(mode
):
1429 LOG
.debug('Removing old file %s', path
)
1431 elif stat
.S_ISLNK(mode
):
1432 old
= os
.readlink(path
)
1434 LOG
.debug('Removing old symlink %s -> %s', path
, old
)
1439 raise Error('unable to remove (or adjust) old file (symlink)',
1442 LOG
.debug('Creating symlink %s -> %s', path
, target
)
1444 os
.symlink(target
, path
)
1446 raise Error('unable to create symlink %s -> %s' % (path
, target
))
1449 class Device(object):
1451 def __init__(self
, path
, args
):
1454 self
.dev_size
= None
1455 self
.partitions
= {}
1456 self
.ptype_map
= None
1457 assert not is_partition(self
.path
)
1459 def create_partition(self
, uuid
, name
, size
=0, num
=0):
1460 ptype
= self
.ptype_tobe_for_name(name
)
1462 num
= get_free_partition_index(dev
=self
.path
)
1464 new
= '--new={num}:0:+{size}M'.format(num
=num
, size
=size
)
1465 if size
> self
.get_dev_size():
1466 LOG
.error('refusing to create %s on %s' % (name
, self
.path
))
1467 LOG
.error('%s size (%sM) is bigger than device (%sM)'
1468 % (name
, size
, self
.get_dev_size()))
1469 raise Error('%s device size (%sM) is not big enough for %s'
1470 % (self
.path
, self
.get_dev_size(), name
))
1472 new
= '--largest-new={num}'.format(num
=num
)
1474 LOG
.debug('Creating %s partition num %d size %d on %s',
1475 name
, num
, size
, self
.path
)
1480 '--change-name={num}:ceph {name}'.format(num
=num
, name
=name
),
1481 '--partition-guid={num}:{uuid}'.format(num
=num
, uuid
=uuid
),
1482 '--typecode={num}:{uuid}'.format(num
=num
, uuid
=ptype
),
1488 update_partition(self
.path
, 'created')
1491 def ptype_tobe_for_name(self
, name
):
1494 if self
.ptype_map
is None:
1495 partition
= DevicePartition
.factory(
1496 path
=self
.path
, dev
=None, args
=self
.args
)
1497 self
.ptype_map
= partition
.ptype_map
1498 return self
.ptype_map
[name
]['tobe']
1500 def get_partition(self
, num
):
1501 if num
not in self
.partitions
:
1502 dev
= get_partition_dev(self
.path
, num
)
1503 partition
= DevicePartition
.factory(
1504 path
=self
.path
, dev
=dev
, args
=self
.args
)
1505 partition
.set_partition_number(num
)
1506 self
.partitions
[num
] = partition
1507 return self
.partitions
[num
]
1509 def get_dev_size(self
):
1510 if self
.dev_size
is None:
1511 self
.dev_size
= get_dev_size(self
.path
)
1512 return self
.dev_size
1515 def factory(path
, args
):
1516 return Device(path
, args
)
1519 class DevicePartition(object):
1521 def __init__(self
, args
):
1527 self
.ptype_map
= None
1529 self
.set_variables_ptype()
1532 if self
.uuid
is None:
1533 self
.uuid
= get_partition_uuid(self
.rawdev
)
1536 def get_ptype(self
):
1537 if self
.ptype
is None:
1538 self
.ptype
= get_partition_type(self
.rawdev
)
1541 def set_partition_number(self
, num
):
1544 def get_partition_number(self
):
1547 def set_dev(self
, dev
):
1554 def get_rawdev(self
):
1557 def set_variables_ptype(self
):
1558 self
.ptype_map
= PTYPE
['regular']
1560 def ptype_for_name(self
, name
):
1561 return self
.ptype_map
[name
]['ready']
1564 def factory(path
, dev
, args
):
1565 dmcrypt_type
= CryptHelpers
.get_dmcrypt_type(args
)
1566 if ((path
is not None and is_mpath(path
)) or
1567 (dev
is not None and is_mpath(dev
))):
1568 partition
= DevicePartitionMultipath(args
)
1569 elif dmcrypt_type
== 'luks':
1570 partition
= DevicePartitionCryptLuks(args
)
1571 elif dmcrypt_type
== 'plain':
1572 partition
= DevicePartitionCryptPlain(args
)
1574 partition
= DevicePartition(args
)
1575 partition
.set_dev(dev
)
1579 class DevicePartitionMultipath(DevicePartition
):
1581 def set_variables_ptype(self
):
1582 self
.ptype_map
= PTYPE
['mpath']
1585 class DevicePartitionCrypt(DevicePartition
):
1587 def __init__(self
, args
):
1588 super(DevicePartitionCrypt
, self
).__init
__(args
)
1589 self
.osd_dm_keypath
= None
1590 self
.cryptsetup_parameters
= CryptHelpers
.get_cryptsetup_parameters(
1592 self
.dmcrypt_type
= CryptHelpers
.get_dmcrypt_type(self
.args
)
1593 self
.dmcrypt_keysize
= CryptHelpers
.get_dmcrypt_keysize(self
.args
)
1595 def setup_crypt(self
):
1600 self
.dev
= _dmcrypt_map(
1602 keypath
=self
.osd_dm_keypath
,
1603 _uuid
=self
.get_uuid(),
1604 cryptsetup_parameters
=self
.cryptsetup_parameters
,
1611 dmcrypt_unmap(self
.get_uuid())
1612 self
.dev
= self
.rawdev
1620 class DevicePartitionCryptPlain(DevicePartitionCrypt
):
1625 def setup_crypt(self
):
1626 if self
.osd_dm_keypath
is not None:
1629 self
.cryptsetup_parameters
+= ['--key-size', str(self
.dmcrypt_keysize
)]
1631 self
.osd_dm_keypath
= get_or_create_dmcrypt_key(
1632 self
.get_uuid(), self
.args
.dmcrypt_key_dir
,
1633 self
.dmcrypt_keysize
, False)
1635 def set_variables_ptype(self
):
1636 self
.ptype_map
= PTYPE
['plain']
1639 class DevicePartitionCryptLuks(DevicePartitionCrypt
):
1644 def setup_crypt(self
):
1645 if self
.osd_dm_keypath
is not None:
1648 if self
.dmcrypt_keysize
== 1024:
1649 # We don't force this into the cryptsetup_parameters,
1650 # as we want the cryptsetup defaults
1651 # to prevail for the actual LUKS key lengths.
1654 self
.cryptsetup_parameters
+= ['--key-size',
1655 str(self
.dmcrypt_keysize
)]
1657 self
.osd_dm_keypath
= get_or_create_dmcrypt_key(
1658 self
.get_uuid(), self
.args
.dmcrypt_key_dir
,
1659 self
.dmcrypt_keysize
, True)
1661 def set_variables_ptype(self
):
1662 self
.ptype_map
= PTYPE
['luks']
1665 class Prepare(object):
1669 parser
= argparse
.ArgumentParser(add_help
=False)
1670 parser
.add_argument(
1674 help='cluster name to assign this disk to',
1676 parser
.add_argument(
1679 help='cluster uuid to assign this disk to',
1681 parser
.add_argument(
1684 help='unique OSD uuid to assign this disk to',
1686 parser
.add_argument(
1688 action
='store_true', default
=None,
1689 help='encrypt DATA and/or JOURNAL devices with dm-crypt',
1691 parser
.add_argument(
1692 '--dmcrypt-key-dir',
1694 default
='/etc/ceph/dmcrypt-keys',
1695 help='directory where dm-crypt keys are stored',
1700 def set_subparser(subparsers
):
1703 PrepareData
.parser(),
1705 parents
.extend(PrepareFilestore
.parent_parsers())
1706 parents
.extend(PrepareBluestore
.parent_parsers())
1707 parser
= subparsers
.add_parser(
1710 help='Prepare a directory or disk for a Ceph OSD',
1712 parser
.set_defaults(
1718 prepare_lock
.acquire()
1719 self
.prepare_locked()
1720 prepare_lock
.release()
1725 return PrepareBluestore(args
)
1727 return PrepareFilestore(args
)
1731 Prepare
.factory(args
).prepare()
1734 class PrepareFilestore(Prepare
):
1736 def __init__(self
, args
):
1737 self
.data
= PrepareFilestoreData(args
)
1738 self
.journal
= PrepareJournal(args
)
1741 def parent_parsers():
1743 PrepareJournal
.parser(),
1746 def prepare_locked(self
):
1747 self
.data
.prepare(self
.journal
)
1750 class PrepareBluestore(Prepare
):
1752 def __init__(self
, args
):
1753 self
.data
= PrepareBluestoreData(args
)
1754 self
.block
= PrepareBluestoreBlock(args
)
1758 parser
= argparse
.ArgumentParser(add_help
=False)
1759 parser
.add_argument(
1761 action
='store_true', default
=None,
1762 help='bluestore objectstore',
1767 def parent_parsers():
1769 PrepareBluestore
.parser(),
1770 PrepareBluestoreBlock
.parser(),
1773 def prepare_locked(self
):
1774 self
.data
.prepare(self
.block
)
1777 class Space(object):
1779 NAMES
= ('block', 'journal')
1782 class PrepareSpace(object):
1788 def __init__(self
, args
):
1791 self
.space_size
= self
.get_space_size()
1792 if (getattr(self
.args
, self
.name
) and
1793 getattr(self
.args
, self
.name
+ '_uuid') is None):
1794 setattr(self
.args
, self
.name
+ '_uuid', str(uuid
.uuid4()))
1795 self
.space_symlink
= None
1796 self
.space_dmcrypt
= None
1801 dmode
= os
.stat(args
.data
).st_mode
1802 if (self
.wants_space() and
1803 stat
.S_ISBLK(dmode
) and
1804 not is_partition(args
.data
) and
1805 getattr(args
, name
) is None and
1806 getattr(args
, name
+ '_file') is None):
1807 LOG
.info('Will colocate %s with data on %s',
1809 setattr(args
, name
, args
.data
)
1811 if getattr(args
, name
) is None:
1812 if getattr(args
, name
+ '_dev'):
1813 raise Error('%s is unspecified; not a block device' %
1814 name
.capitalize(), getattr(args
, name
))
1815 self
.type = self
.NONE
1818 if not os
.path
.exists(getattr(args
, name
)):
1819 if getattr(args
, name
+ '_dev'):
1820 raise Error('%s does not exist; not a block device' %
1821 name
.capitalize(), getattr(args
, name
))
1822 self
.type = self
.FILE
1825 mode
= os
.stat(getattr(args
, name
)).st_mode
1826 if stat
.S_ISBLK(mode
):
1827 if getattr(args
, name
+ '_file'):
1828 raise Error('%s is not a regular file' % name
.capitalize
,
1830 self
.type = self
.DEVICE
1833 if stat
.S_ISREG(mode
):
1834 if getattr(args
, name
+ '_dev'):
1835 raise Error('%s is not a block device' % name
.capitalize
,
1837 self
.type = self
.FILE
1839 raise Error('%s %s is neither a block device nor regular file' %
1840 (name
.capitalize
, geattr(args
, name
)))
1843 return self
.type == self
.NONE
1846 return self
.type == self
.FILE
1848 def is_device(self
):
1849 return self
.type == self
.DEVICE
1853 parser
= argparse
.ArgumentParser(add_help
=False)
1854 parser
.add_argument(
1857 help='unique uuid to assign to the %s' % name
,
1859 parser
.add_argument(
1861 action
='store_true', default
=None,
1862 help='verify that %s is a file' % name
.upper(),
1864 parser
.add_argument(
1866 action
='store_true', default
=None,
1867 help='verify that %s is a block device' % name
.upper(),
1869 parser
.add_argument(
1871 metavar
=name
.upper(),
1873 help=('path to OSD %s disk block device;' % name
+
1874 ' leave out to store %s in file' % name
),
1878 def wants_space(self
):
1881 def populate_data_path(self
, path
):
1882 if self
.type == self
.DEVICE
:
1883 self
.populate_data_path_device(path
)
1884 elif self
.type == self
.FILE
:
1885 self
.populate_data_path_file(path
)
1886 elif self
.type == self
.NONE
:
1889 raise Error('unexpected type ', self
.type)
1891 def populate_data_path_file(self
, path
):
1892 space_uuid
= self
.name
+ '_uuid'
1893 if getattr(self
.args
, space_uuid
) is not None:
1894 write_one_line(path
, space_uuid
,
1895 getattr(self
.args
, space_uuid
))
1897 def populate_data_path_device(self
, path
):
1898 self
.populate_data_path_file(path
)
1899 if self
.space_symlink
is not None:
1900 adjust_symlink(self
.space_symlink
,
1901 os
.path
.join(path
, self
.name
))
1903 if self
.space_dmcrypt
is not None:
1904 adjust_symlink(self
.space_dmcrypt
,
1905 os
.path
.join(path
, self
.name
+ '_dmcrypt'))
1908 os
.unlink(os
.path
.join(path
, self
.name
+ '_dmcrypt'))
1913 if self
.type == self
.DEVICE
:
1914 self
.prepare_device()
1915 elif self
.type == self
.FILE
:
1917 elif self
.type == self
.NONE
:
1920 raise Error('unexpected type ', self
.type)
1922 def prepare_file(self
):
1923 if not os
.path
.exists(getattr(self
.args
, self
.name
)):
1924 LOG
.debug('Creating %s file %s with size 0'
1925 ' (ceph-osd will resize and allocate)',
1927 getattr(self
.args
, self
.name
))
1928 with
file(getattr(self
.args
, self
.name
), 'wb') as space_file
:
1931 LOG
.debug('%s is file %s',
1932 self
.name
.capitalize(),
1933 getattr(self
.args
, self
.name
))
1934 LOG
.warning('OSD will not be hot-swappable if %s is '
1935 'not the same device as the osd data' %
1937 self
.space_symlink
= space_file
1939 def prepare_device(self
):
1940 reusing_partition
= False
1942 if is_partition(getattr(self
.args
, self
.name
)):
1943 LOG
.debug('%s %s is a partition',
1944 self
.name
.capitalize(), getattr(self
.args
, self
.name
))
1945 partition
= DevicePartition
.factory(
1946 path
=None, dev
=getattr(self
.args
, self
.name
), args
=self
.args
)
1947 if isinstance(partition
, DevicePartitionCrypt
):
1948 raise Error(getattr(self
.args
, self
.name
) +
1949 ' partition already exists'
1950 ' and --dmcrypt specified')
1951 LOG
.warning('OSD will not be hot-swappable' +
1952 ' if ' + self
.name
+ ' is not' +
1953 ' the same device as the osd data')
1954 if partition
.get_ptype() == partition
.ptype_for_name(self
.name
):
1955 LOG
.debug('%s %s was previously prepared with '
1956 'ceph-disk. Reusing it.',
1957 self
.name
.capitalize(),
1958 getattr(self
.args
, self
.name
))
1959 reusing_partition
= True
1960 # Read and reuse the partition uuid from this journal's
1961 # previous life. We reuse the uuid instead of changing it
1962 # because udev does not reliably notice changes to an
1963 # existing partition's GUID. See
1964 # http://tracker.ceph.com/issues/10146
1965 setattr(self
.args
, self
.name
+ '_uuid', partition
.get_uuid())
1966 LOG
.debug('Reusing %s with uuid %s',
1968 getattr(self
.args
, self
.name
+ '_uuid'))
1970 LOG
.warning('%s %s was not prepared with '
1971 'ceph-disk. Symlinking directly.',
1972 self
.name
.capitalize(),
1973 getattr(self
.args
, self
.name
))
1974 self
.space_symlink
= getattr(self
.args
, self
.name
)
1977 self
.space_symlink
= '/dev/disk/by-partuuid/{uuid}'.format(
1978 uuid
=getattr(self
.args
, self
.name
+ '_uuid'))
1980 if self
.args
.dmcrypt
:
1981 self
.space_dmcrypt
= self
.space_symlink
1982 self
.space_symlink
= '/dev/mapper/{uuid}'.format(
1983 uuid
=getattr(self
.args
, self
.name
+ '_uuid'))
1985 if reusing_partition
:
1986 # confirm that the space_symlink exists. It should since
1987 # this was an active space
1988 # in the past. Continuing otherwise would be futile.
1989 assert os
.path
.exists(self
.space_symlink
)
1992 num
= self
.desired_partition_number()
1995 LOG
.warning('OSD will not be hot-swappable if %s '
1996 'is not the same device as the osd data',
1999 device
= Device
.factory(getattr(self
.args
, self
.name
), self
.args
)
2000 num
= device
.create_partition(
2001 uuid
=getattr(self
.args
, self
.name
+ '_uuid'),
2003 size
=self
.space_size
,
2006 partition
= device
.get_partition(num
)
2008 LOG
.debug('%s is GPT partition %s',
2009 self
.name
.capitalize(),
2012 if isinstance(partition
, DevicePartitionCrypt
):
2018 '--typecode={num}:{uuid}'.format(
2020 uuid
=partition
.ptype_for_name(self
.name
),
2023 getattr(self
.args
, self
.name
),
2027 LOG
.debug('%s is GPT partition %s',
2028 self
.name
.capitalize(),
2032 class PrepareJournal(PrepareSpace
):
2034 def __init__(self
, args
):
2035 self
.name
= 'journal'
2036 (self
.allows_journal
,
2038 self
.needs_journal
) = check_journal_reqs(args
)
2040 if args
.journal
and not self
.allows_journal
:
2041 raise Error('journal specified but not allowed by osd backend')
2043 super(PrepareJournal
, self
).__init
__(args
)
2045 def wants_space(self
):
2046 return self
.wants_journal
2048 def get_space_size(self
):
2049 return int(get_conf_with_default(
2050 cluster
=self
.args
.cluster
,
2051 variable
='osd_journal_size',
2054 def desired_partition_number(self
):
2055 if self
.args
.journal
== self
.args
.data
:
2056 # we're sharing the disk between osd data and journal;
2057 # make journal be partition number 2
2065 return PrepareSpace
.parser('journal')
2068 class PrepareBluestoreBlock(PrepareSpace
):
2070 def __init__(self
, args
):
2072 super(PrepareBluestoreBlock
, self
).__init
__(args
)
2074 def get_space_size(self
):
2075 return 0 # get as much space as possible
2077 def desired_partition_number(self
):
2078 if self
.args
.block
== self
.args
.data
:
2086 return PrepareSpace
.parser('block')
2089 class CryptHelpers(object):
2092 def get_cryptsetup_parameters(args
):
2093 cryptsetup_parameters_str
= get_conf(
2094 cluster
=args
.cluster
,
2095 variable
='osd_cryptsetup_parameters',
2097 if cryptsetup_parameters_str
is None:
2100 return shlex
.split(cryptsetup_parameters_str
)
2103 def get_dmcrypt_keysize(args
):
2104 dmcrypt_keysize_str
= get_conf(
2105 cluster
=args
.cluster
,
2106 variable
='osd_dmcrypt_key_size',
2108 dmcrypt_type
= CryptHelpers
.get_dmcrypt_type(args
)
2109 if dmcrypt_type
== 'luks':
2110 if dmcrypt_keysize_str
is None:
2111 # As LUKS will hash the 'passphrase' in .luks.key
2112 # into a key, set a large default
2113 # so if not updated for some time, it is still a
2118 return int(dmcrypt_keysize_str
)
2119 elif dmcrypt_type
== 'plain':
2120 if dmcrypt_keysize_str
is None:
2121 # This value is hard-coded in the udev script
2124 LOG
.warning('ensure the 95-ceph-osd.rules file has '
2125 'been copied to /etc/udev/rules.d '
2126 'and modified to call cryptsetup '
2127 'with --key-size=%s' % dmcrypt_keysize_str
)
2128 return int(dmcrypt_keysize_str
)
2133 def get_dmcrypt_type(args
):
2135 dmcrypt_type
= get_conf(
2136 cluster
=args
.cluster
,
2137 variable
='osd_dmcrypt_type',
2140 if dmcrypt_type
is None or dmcrypt_type
== 'luks':
2142 elif dmcrypt_type
== 'plain':
2145 raise Error('invalid osd_dmcrypt_type parameter '
2146 '(must be luks or plain): ', dmcrypt_type
)
2151 class PrepareData(object):
2156 def __init__(self
, args
):
2159 self
.partition
= None
2161 if self
.args
.cluster_uuid
is None:
2162 self
.args
.cluster_uuid
= get_fsid(cluster
=self
.args
.cluster
)
2164 if self
.args
.osd_uuid
is None:
2165 self
.args
.osd_uuid
= str(uuid
.uuid4())
2168 dmode
= os
.stat(self
.args
.data
).st_mode
2170 if stat
.S_ISDIR(dmode
):
2171 self
.type = self
.FILE
2172 elif stat
.S_ISBLK(dmode
):
2173 self
.type = self
.DEVICE
2175 raise Error('not a dir or block device', args
.data
)
2178 return self
.type == self
.FILE
2180 def is_device(self
):
2181 return self
.type == self
.DEVICE
2185 parser
= argparse
.ArgumentParser(add_help
=False)
2186 parser
.add_argument(
2188 help='file system type to use (e.g. "ext4")',
2190 parser
.add_argument(
2192 action
='store_true', default
=None,
2193 help='destroy the partition table (and content) of a disk',
2195 parser
.add_argument(
2197 action
='store_true', default
=None,
2198 help='verify that DATA is a dir',
2200 parser
.add_argument(
2202 action
='store_true', default
=None,
2203 help='verify that DATA is a block device',
2205 parser
.add_argument(
2208 help='path to OSD data (a disk block device or directory)',
2212 def populate_data_path_file(self
, path
, *to_prepare_list
):
2213 self
.populate_data_path(path
, *to_prepare_list
)
2215 def populate_data_path(self
, path
, *to_prepare_list
):
2216 if os
.path
.exists(os
.path
.join(path
, 'magic')):
2217 LOG
.debug('Data dir %s already exists', path
)
2220 LOG
.debug('Preparing osd data dir %s', path
)
2222 if self
.args
.osd_uuid
is None:
2223 self
.args
.osd_uuid
= str(uuid
.uuid4())
2225 write_one_line(path
, 'ceph_fsid', self
.args
.cluster_uuid
)
2226 write_one_line(path
, 'fsid', self
.args
.osd_uuid
)
2227 write_one_line(path
, 'magic', CEPH_OSD_ONDISK_MAGIC
)
2229 for to_prepare
in to_prepare_list
:
2230 to_prepare
.populate_data_path(path
)
2232 def prepare(self
, *to_prepare_list
):
2233 if self
.type == self
.DEVICE
:
2234 self
.prepare_device(*to_prepare_list
)
2235 elif self
.type == self
.FILE
:
2236 self
.prepare_file(*to_prepare_list
)
2238 raise Error('unexpected type ', self
.type)
2240 def prepare_file(self
, *to_prepare_list
):
2242 if not os
.path
.exists(self
.args
.data
):
2243 raise Error('data path for directory does not exist',
2246 if self
.args
.data_dev
:
2247 raise Error('data path is not a block device', self
.args
.data
)
2249 for to_prepare
in to_prepare_list
:
2250 to_prepare
.prepare()
2252 self
.populate_data_path_file(self
.args
.data
, *to_prepare_list
)
2254 def sanity_checks(self
):
2255 if not os
.path
.exists(self
.args
.data
):
2256 raise Error('data path for device does not exist',
2258 verify_not_in_use(self
.args
.data
, True)
2260 def set_variables(self
):
2261 if self
.args
.fs_type
is None:
2262 self
.args
.fs_type
= get_conf(
2263 cluster
=self
.args
.cluster
,
2264 variable
='osd_mkfs_type',
2266 if self
.args
.fs_type
is None:
2267 self
.args
.fs_type
= get_conf(
2268 cluster
=self
.args
.cluster
,
2269 variable
='osd_fs_type',
2271 if self
.args
.fs_type
is None:
2272 self
.args
.fs_type
= DEFAULT_FS_TYPE
2274 self
.mkfs_args
= get_conf(
2275 cluster
=self
.args
.cluster
,
2276 variable
='osd_mkfs_options_{fstype}'.format(
2277 fstype
=self
.args
.fs_type
,
2280 if self
.mkfs_args
is None:
2281 self
.mkfs_args
= get_conf(
2282 cluster
=self
.args
.cluster
,
2283 variable
='osd_fs_mkfs_options_{fstype}'.format(
2284 fstype
=self
.args
.fs_type
,
2288 self
.mount_options
= get_conf(
2289 cluster
=self
.args
.cluster
,
2290 variable
='osd_mount_options_{fstype}'.format(
2291 fstype
=self
.args
.fs_type
,
2294 if self
.mount_options
is None:
2295 self
.mount_options
= get_conf(
2296 cluster
=self
.args
.cluster
,
2297 variable
='osd_fs_mount_options_{fstype}'.format(
2298 fstype
=self
.args
.fs_type
,
2302 # remove whitespaces
2303 self
.mount_options
= "".join(self
.mount_options
.split())
2305 if self
.args
.osd_uuid
is None:
2306 self
.args
.osd_uuid
= str(uuid
.uuid4())
2308 def prepare_device(self
, *to_prepare_list
):
2309 self
.sanity_checks()
2310 self
.set_variables()
2311 if self
.args
.zap_disk
is not None:
2314 def create_data_partition(self
):
2315 device
= Device
.factory(self
.args
.data
, self
.args
)
2316 partition_number
= 1
2317 device
.create_partition(uuid
=self
.args
.osd_uuid
,
2319 num
=partition_number
,
2320 size
=self
.get_space_size())
2321 return device
.get_partition(partition_number
)
2323 def set_data_partition(self
):
2324 if is_partition(self
.args
.data
):
2325 LOG
.debug('OSD data device %s is a partition',
2327 self
.partition
= DevicePartition
.factory(
2328 path
=None, dev
=self
.args
.data
, args
=self
.args
)
2329 ptype
= partition
.get_ptype()
2330 if ptype
!= ptype_osd
:
2331 LOG
.warning('incorrect partition UUID: %s, expected %s'
2332 % (ptype
, ptype_osd
))
2334 LOG
.debug('Creating osd partition on %s',
2336 self
.partition
= self
.create_data_partition()
2338 def populate_data_path_device(self
, *to_prepare_list
):
2339 partition
= self
.partition
2341 if isinstance(partition
, DevicePartitionCrypt
):
2350 if self
.mkfs_args
is not None:
2351 args
.extend(self
.mkfs_args
.split())
2352 if self
.args
.fs_type
== 'xfs':
2353 args
.extend(['-f']) # always force
2355 args
.extend(MKFS_ARGS
.get(self
.args
.fs_type
, []))
2358 partition
.get_dev(),
2361 LOG
.debug('Creating %s fs on %s',
2362 self
.args
.fs_type
, partition
.get_dev())
2363 command_check_call(args
)
2364 except subprocess
.CalledProcessError
as e
:
2367 path
= mount(dev
=partition
.get_dev(),
2368 fstype
=self
.args
.fs_type
,
2369 options
=self
.mount_options
)
2372 self
.populate_data_path(path
, *to_prepare_list
)
2374 path_set_context(path
)
2377 if isinstance(partition
, DevicePartitionCrypt
):
2380 if not is_partition(self
.args
.data
):
2385 '--typecode=%d:%s' % (partition
.get_partition_number(),
2386 partition
.ptype_for_name('osd')),
2391 except subprocess
.CalledProcessError
as e
:
2393 update_partition(self
.args
.data
, 'prepared')
2394 command_check_call(['udevadm', 'trigger',
2397 os
.path
.basename(partition
.rawdev
)])
2400 class PrepareFilestoreData(PrepareData
):
2402 def get_space_size(self
):
2403 return 0 # get as much space as possible
2405 def prepare_device(self
, *to_prepare_list
):
2406 super(PrepareFilestoreData
, self
).prepare_device(*to_prepare_list
)
2407 for to_prepare
in to_prepare_list
:
2408 to_prepare
.prepare()
2409 self
.set_data_partition()
2410 self
.populate_data_path_device(*to_prepare_list
)
2413 class PrepareBluestoreData(PrepareData
):
2415 def get_space_size(self
):
2418 def prepare_device(self
, *to_prepare_list
):
2419 super(PrepareBluestoreData
, self
).prepare_device(*to_prepare_list
)
2420 self
.set_data_partition()
2421 for to_prepare
in to_prepare_list
:
2422 to_prepare
.prepare()
2423 self
.populate_data_path_device(*to_prepare_list
)
2425 def populate_data_path(self
, path
, *to_prepare_list
):
2426 super(PrepareBluestoreData
, self
).populate_data_path(path
,
2428 write_one_line(path
, 'type', 'bluestore')
2438 monmap
= os
.path
.join(path
, 'activate.monmap')
2442 '--cluster', cluster
,
2443 '--name', 'client.bootstrap-osd',
2444 '--keyring', keyring
,
2445 'mon', 'getmap', '-o', monmap
,
2449 osd_type
= read_one_line(path
, 'type')
2451 if osd_type
== 'bluestore':
2455 '--cluster', cluster
,
2462 '--keyring', os
.path
.join(path
, 'keyring'),
2463 '--setuser', get_ceph_user(),
2464 '--setgroup', get_ceph_user(),
2471 '--cluster', cluster
,
2477 '--osd-journal', os
.path
.join(path
, 'journal'),
2479 '--keyring', os
.path
.join(path
, 'keyring'),
2480 '--setuser', get_ceph_user(),
2481 '--setgroup', get_ceph_group(),
2493 # try dumpling+ cap scheme
2497 '--cluster', cluster
,
2498 '--name', 'client.bootstrap-osd',
2499 '--keyring', keyring
,
2500 'auth', 'add', 'osd.{osd_id}'.format(osd_id
=osd_id
),
2501 '-i', os
.path
.join(path
, 'keyring'),
2503 'mon', 'allow profile osd',
2506 except subprocess
.CalledProcessError
as err
:
2507 if err
.returncode
== errno
.EINVAL
:
2508 # try old cap scheme
2512 '--cluster', cluster
,
2513 '--name', 'client.bootstrap-osd',
2514 '--keyring', keyring
,
2515 'auth', 'add', 'osd.{osd_id}'.format(osd_id
=osd_id
),
2516 '-i', os
.path
.join(path
, 'keyring'),
2525 def get_mount_point(cluster
, osd_id
):
2526 parent
= STATEDIR
+ '/osd'
2527 return os
.path
.join(
2529 '{cluster}-{osd_id}'.format(cluster
=cluster
, osd_id
=osd_id
),
2541 LOG
.debug('Moving mount to final location...')
2542 osd_data
= get_mount_point(cluster
, osd_id
)
2543 maybe_mkdir(osd_data
)
2545 # pick best-of-breed mount options based on fs type
2546 if mount_options
is None:
2547 mount_options
= MOUNT_OPTIONS
.get(fstype
, '')
2549 # we really want to mount --move, but that is not supported when
2550 # the parent mount is shared, as it is by default on RH, Fedora,
2551 # and probably others. Also, --bind doesn't properly manipulate
2552 # /etc/mtab, which *still* isn't a symlink to /proc/mounts despite
2553 # this being 2013. Instead, mount the original device at the final
2568 '-l', # lazy, in case someone else is peeking at the
2580 LOG
.debug('Starting %s osd.%s...', cluster
, osd_id
)
2582 path
= (STATEDIR
+ '/osd/{cluster}-{osd_id}').format(
2583 cluster
=cluster
, osd_id
=osd_id
)
2586 if os
.path
.exists(os
.path
.join(path
, 'upstart')):
2590 # use emit, not start, because start would fail if the
2591 # instance was already running
2593 # since the daemon starting doesn't guarantee much about
2594 # the service being operational anyway, don't bother
2599 'cluster={cluster}'.format(cluster
=cluster
),
2600 'id={osd_id}'.format(osd_id
=osd_id
),
2603 elif os
.path
.exists(os
.path
.join(path
, 'sysvinit')):
2604 if os
.path
.exists('/usr/sbin/service'):
2605 svc
= '/usr/sbin/service'
2607 svc
= '/sbin/service'
2613 '{cluster}'.format(cluster
=cluster
),
2615 'osd.{osd_id}'.format(osd_id
=osd_id
),
2618 elif os
.path
.exists(os
.path
.join(path
, 'systemd')):
2623 'ceph-osd@{osd_id}'.format(osd_id
=osd_id
),
2630 'ceph-osd@{osd_id}'.format(osd_id
=osd_id
),
2634 raise Error('{cluster} osd.{osd_id} is not tagged '
2635 'with an init system'.format(
2639 except subprocess
.CalledProcessError
as e
:
2640 raise Error('ceph osd start failed', e
)
2647 LOG
.debug('Stoping %s osd.%s...', cluster
, osd_id
)
2649 path
= (STATEDIR
+ '/osd/{cluster}-{osd_id}').format(
2650 cluster
=cluster
, osd_id
=osd_id
)
2653 if os
.path
.exists(os
.path
.join(path
, 'upstart')):
2659 'cluster={cluster}'.format(cluster
=cluster
),
2660 'id={osd_id}'.format(osd_id
=osd_id
),
2663 elif os
.path
.exists(os
.path
.join(path
, 'sysvinit')):
2664 svc
= which('service')
2670 '{cluster}'.format(cluster
=cluster
),
2672 'osd.{osd_id}'.format(osd_id
=osd_id
),
2675 elif os
.path
.exists(os
.path
.join(path
, 'systemd')):
2680 'ceph-osd@{osd_id}'.format(osd_id
=osd_id
),
2687 'ceph-osd@{osd_id}'.format(osd_id
=osd_id
),
2691 raise Error('{cluster} osd.{osd_id} is not tagged with an init '
2692 ' system'.format(cluster
=cluster
, osd_id
=osd_id
))
2693 except subprocess
.CalledProcessError
as e
:
2694 raise Error('ceph osd stop failed', e
)
2700 fstype
= _check_output(
2703 # we don't want stale cached results
2711 fstype
= must_be_one_line(fstype
)
2715 def dmcrypt_map(dev
, dmcrypt_key_dir
):
2716 ptype
= get_partition_type(dev
)
2717 if ptype
in Ptype
.get_ready_by_type('plain'):
2719 cryptsetup_parameters
= ['--key-size', '256']
2720 elif ptype
in Ptype
.get_ready_by_type('luks'):
2722 cryptsetup_parameters
= []
2724 raise Error('--dmcrypt called for dev %s with invalid ptype %s'
2726 part_uuid
= get_partition_uuid(dev
)
2727 dmcrypt_key_path
= get_dmcrypt_key_path(part_uuid
, dmcrypt_key_dir
, luks
)
2728 return _dmcrypt_map(
2730 keypath
=dmcrypt_key_path
,
2732 cryptsetup_parameters
=cryptsetup_parameters
,
2740 activate_key_template
,
2748 part_uuid
= get_partition_uuid(dev
)
2749 dev
= dmcrypt_map(dev
, dmcrypt_key_dir
)
2751 fstype
= detect_fstype(dev
=dev
)
2752 except (subprocess
.CalledProcessError
,
2754 TooManyLinesError
) as e
:
2755 raise FilesystemTypeError(
2756 'device {dev}'.format(dev
=dev
),
2760 # TODO always using mount options from cluster=ceph for
2761 # now; see http://tracker.newdream.net/issues/3253
2762 mount_options
= get_conf(
2764 variable
='osd_mount_options_{fstype}'.format(
2769 if mount_options
is None:
2770 mount_options
= get_conf(
2772 variable
='osd_fs_mount_options_{fstype}'.format(
2777 # remove whitespaces from mount_options
2778 if mount_options
is not None:
2779 mount_options
= "".join(mount_options
.split())
2781 path
= mount(dev
=dev
, fstype
=fstype
, options
=mount_options
)
2783 # check if the disk is deactive, change the journal owner, group
2784 # mode for correct user and group.
2785 if os
.path
.exists(os
.path
.join(path
, 'deactive')):
2786 # logging to syslog will help us easy to know udev triggered failure
2789 # we need to unmap again because dmcrypt map will create again
2790 # on bootup stage (due to deactivate)
2791 if '/dev/mapper/' in dev
:
2792 part_uuid
= dev
.replace('/dev/mapper/', '')
2793 dmcrypt_unmap(part_uuid
)
2794 LOG
.info('OSD deactivated! reactivate with: --reactivate')
2795 raise Error('OSD deactivated! reactivate with: --reactivate')
2796 # flag to activate a deactive osd.
2804 (osd_id
, cluster
) = activate(path
, activate_key_template
, init
)
2806 # Now active successfully
2807 # If we got reactivate and deactive, remove the deactive file
2808 if deactive
and reactivate
:
2809 os
.remove(os
.path
.join(path
, 'deactive'))
2810 LOG
.info('Remove `deactive` file.')
2812 # check if the disk is already active, or if something else is already
2816 src_dev
= os
.stat(path
).st_dev
2818 dst_dev
= os
.stat((STATEDIR
+ '/osd/{cluster}-{osd_id}').format(
2820 osd_id
=osd_id
)).st_dev
2821 if src_dev
== dst_dev
:
2824 parent_dev
= os
.stat(STATEDIR
+ '/osd').st_dev
2825 if dst_dev
!= parent_dev
:
2827 elif os
.listdir(get_mount_point(cluster
, osd_id
)):
2828 LOG
.info(get_mount_point(cluster
, osd_id
) +
2829 " is not empty, won't override")
2836 LOG
.info('%s osd.%s already mounted in position; unmounting ours.'
2837 % (cluster
, osd_id
))
2840 raise Error('another %s osd.%s already mounted in position '
2841 '(old/different cluster instance?); unmounting ours.'
2842 % (cluster
, osd_id
))
2850 mount_options
=mount_options
,
2852 return (cluster
, osd_id
)
2855 LOG
.error('Failed to activate')
2859 # remove our temp dir
2860 if os
.path
.exists(path
):
2866 activate_key_template
,
2870 if not os
.path
.exists(path
):
2872 'directory %s does not exist' % path
2875 (osd_id
, cluster
) = activate(path
, activate_key_template
, init
)
2877 if init
not in (None, 'none'):
2878 canonical
= (STATEDIR
+ '/osd/{cluster}-{osd_id}').format(
2881 if path
!= canonical
:
2882 # symlink it from the proper location
2884 if os
.path
.lexists(canonical
):
2885 old
= os
.readlink(canonical
)
2887 LOG
.debug('Removing old symlink %s -> %s', canonical
, old
)
2889 os
.unlink(canonical
)
2891 raise Error('unable to remove old symlink', canonical
)
2895 LOG
.debug('Creating symlink %s -> %s', canonical
, path
)
2897 os
.symlink(path
, canonical
)
2899 raise Error('unable to create symlink %s -> %s'
2900 % (canonical
, path
))
2902 return (cluster
, osd_id
)
2905 def find_cluster_by_uuid(_uuid
):
2907 Find a cluster name by searching /etc/ceph/*.conf for a conf file
2908 with the right uuid.
2910 _uuid
= _uuid
.lower()
2912 if not os
.path
.exists(SYSCONFDIR
):
2914 for conf_file
in os
.listdir(SYSCONFDIR
):
2915 if not conf_file
.endswith('.conf'):
2917 cluster
= conf_file
[:-5]
2919 fsid
= get_fsid(cluster
)
2921 if e
.message
!= 'getting cluster uuid from configuration failed':
2923 no_fsid
.append(cluster
)
2927 # be tolerant of /etc/ceph/ceph.conf without an fsid defined.
2928 if len(no_fsid
) == 1 and no_fsid
[0] == 'ceph':
2929 LOG
.warning('No fsid defined in ' + SYSCONFDIR
+
2930 '/ceph.conf; using anyway')
2937 activate_key_template
,
2941 check_osd_magic(path
)
2943 ceph_fsid
= read_one_line(path
, 'ceph_fsid')
2944 if ceph_fsid
is None:
2945 raise Error('No cluster uuid assigned.')
2946 LOG
.debug('Cluster uuid is %s', ceph_fsid
)
2948 cluster
= find_cluster_by_uuid(ceph_fsid
)
2950 raise Error('No cluster conf found in ' + SYSCONFDIR
+
2951 ' with fsid %s' % ceph_fsid
)
2952 LOG
.debug('Cluster name is %s', cluster
)
2954 fsid
= read_one_line(path
, 'fsid')
2956 raise Error('No OSD uuid assigned.')
2957 LOG
.debug('OSD uuid is %s', fsid
)
2959 keyring
= activate_key_template
.format(cluster
=cluster
,
2962 osd_id
= get_osd_id(path
)
2964 osd_id
= allocate_osd_id(
2969 write_one_line(path
, 'whoami', osd_id
)
2970 LOG
.debug('OSD id is %s', osd_id
)
2972 if not os
.path
.exists(os
.path
.join(path
, 'ready')):
2973 LOG
.debug('Initializing OSD...')
2974 # re-running mkfs is safe, so just run until it completes
2983 if init
not in (None, 'none'):
2985 conf_val
= get_conf(
2989 if conf_val
is not None:
2994 LOG
.debug('Marking with init system %s', init
)
2995 with
file(os
.path
.join(path
, init
), 'w'):
2998 # remove markers for others, just in case.
2999 for other
in INIT_SYSTEMS
:
3002 os
.unlink(os
.path
.join(path
, other
))
3006 if not os
.path
.exists(os
.path
.join(path
, 'active')):
3007 LOG
.debug('Authorizing OSD key...')
3014 write_one_line(path
, 'active', 'ok')
3015 LOG
.debug('%s osd.%s data dir is ready at %s', cluster
, osd_id
, path
)
3016 return (osd_id
, cluster
)
3019 def main_activate(args
):
3023 if not os
.path
.exists(args
.path
):
3024 raise Error('%s does not exist' % args
.path
)
3026 if is_suppressed(args
.path
):
3027 LOG
.info('suppressed activate request on %s', args
.path
)
3030 activate_lock
.acquire() # noqa
3032 mode
= os
.stat(args
.path
).st_mode
3033 if stat
.S_ISBLK(mode
):
3034 if (is_partition(args
.path
) and
3035 (get_partition_type(args
.path
) ==
3036 PTYPE
['mpath']['osd']['ready']) and
3037 not is_mpath(args
.path
)):
3038 raise Error('%s is not a multipath block device' %
3040 (cluster
, osd_id
) = mount_activate(
3042 activate_key_template
=args
.activate_key_template
,
3043 init
=args
.mark_init
,
3044 dmcrypt
=args
.dmcrypt
,
3045 dmcrypt_key_dir
=args
.dmcrypt_key_dir
,
3046 reactivate
=args
.reactivate
,
3048 osd_data
= get_mount_point(cluster
, osd_id
)
3050 elif stat
.S_ISDIR(mode
):
3051 (cluster
, osd_id
) = activate_dir(
3053 activate_key_template
=args
.activate_key_template
,
3054 init
=args
.mark_init
,
3056 osd_data
= args
.path
3059 raise Error('%s is not a directory or block device' % args
.path
)
3061 if (not args
.no_start_daemon
and args
.mark_init
== 'none'):
3065 '--cluster={cluster}'.format(cluster
=cluster
),
3066 '--id={osd_id}'.format(osd_id
=osd_id
),
3067 '--osd-data={path}'.format(path
=osd_data
),
3068 '--osd-journal={path}/journal'.format(path
=osd_data
),
3072 if (not args
.no_start_daemon
and
3073 args
.mark_init
not in (None, 'none')):
3081 activate_lock
.release() # noqa
3084 ###########################
3086 def _mark_osd_out(cluster
, osd_id
):
3087 LOG
.info('Prepare to mark osd.%d out...', osd_id
)
3096 def _check_osd_status(cluster
, osd_id
):
3098 report the osd status:
3099 00(0) : means OSD OUT AND DOWN
3100 01(1) : means OSD OUT AND UP
3101 10(2) : means OSD IN AND DOWN
3102 11(3) : means OSD IN AND UP
3104 LOG
.info("Checking osd id: %s ..." % osd_id
)
3107 out
, err
, ret
= command([
3111 '--cluster={cluster}'.format(
3117 out_json
= json
.loads(out
)
3118 for item
in out_json
[u
'osds']:
3119 if item
.get(u
'osd') == int(osd_id
):
3121 if item
.get(u
'in') is 1:
3123 if item
.get(u
'up') is 1:
3126 raise Error('Could not osd.%s in osd tree!' % osd_id
)
3130 def _remove_osd_directory_files(mounted_path
, cluster
):
3132 To remove the 'ready', 'active', INIT-specific files.
3134 if os
.path
.exists(os
.path
.join(mounted_path
, 'ready')):
3135 os
.remove(os
.path
.join(mounted_path
, 'ready'))
3136 LOG
.info('Remove `ready` file.')
3138 LOG
.info('`ready` file is already removed.')
3140 if os
.path
.exists(os
.path
.join(mounted_path
, 'active')):
3141 os
.remove(os
.path
.join(mounted_path
, 'active'))
3142 LOG
.info('Remove `active` file.')
3144 LOG
.info('`active` file is already removed.')
3146 # Just check `upstart` and `sysvinit` directly if filename is init-spec.
3147 conf_val
= get_conf(
3151 if conf_val
is not None:
3155 os
.remove(os
.path
.join(mounted_path
, init
))
3156 LOG
.info('Remove `%s` file.', init
)
3160 def main_deactivate(args
):
3161 activate_lock
.acquire() # noqa
3163 main_deactivate_locked(args
)
3165 activate_lock
.release() # noqa
3168 def main_deactivate_locked(args
):
3169 osd_id
= args
.deactivate_by_id
3173 devices
= list_devices()
3175 # list all devices and found we need
3176 for device
in devices
:
3177 if 'partitions' in device
:
3178 for dev_part
in device
.get('partitions'):
3180 'whoami' in dev_part
and
3181 dev_part
['whoami'] == osd_id
):
3182 target_dev
= dev_part
3184 'path' in dev_part
and
3185 dev_part
['path'] == path
):
3186 target_dev
= dev_part
3188 raise Error('Cannot find any match device!!')
3190 # set up all we need variable
3191 osd_id
= target_dev
['whoami']
3192 part_type
= target_dev
['ptype']
3193 mounted_path
= target_dev
['mount']
3194 if Ptype
.is_dmcrypt(part_type
, 'osd'):
3197 # Do not do anything if osd is already down.
3198 status_code
= _check_osd_status(args
.cluster
, osd_id
)
3199 if status_code
== OSD_STATUS_IN_UP
:
3200 if args
.mark_out
is True:
3201 _mark_osd_out(args
.cluster
, int(osd_id
))
3202 stop_daemon(args
.cluster
, osd_id
)
3203 elif status_code
== OSD_STATUS_IN_DOWN
:
3204 if args
.mark_out
is True:
3205 _mark_osd_out(args
.cluster
, int(osd_id
))
3206 LOG
.info("OSD already out/down. Do not do anything now.")
3208 elif status_code
== OSD_STATUS_OUT_UP
:
3209 stop_daemon(args
.cluster
, osd_id
)
3210 elif status_code
== OSD_STATUS_OUT_DOWN
:
3211 LOG
.info("OSD already out/down. Do not do anything now.")
3214 # remove 'ready', 'active', and INIT-specific files.
3215 _remove_osd_directory_files(mounted_path
, args
.cluster
)
3217 # Write deactivate to osd directory!
3218 with
open(os
.path
.join(mounted_path
, 'deactive'), 'w'):
3219 path_set_context(os
.path
.join(mounted_path
, 'deactive'))
3221 unmount(mounted_path
)
3222 LOG
.info("Umount `%s` successfully.", mounted_path
)
3225 dmcrypt_unmap(target_dev
['uuid'])
3226 for name
in Space
.NAMES
:
3227 if name
+ '_uuid' in target_dev
:
3228 dmcrypt_unmap(target_dev
[name
+ '_uuid'])
3230 ###########################
3233 def _remove_from_crush_map(cluster
, osd_id
):
3234 LOG
.info("Prepare to remove osd.%s from crush map..." % osd_id
)
3244 def _delete_osd_auth_key(cluster
, osd_id
):
3245 LOG
.info("Prepare to delete osd.%s cephx key..." % osd_id
)
3254 def _deallocate_osd_id(cluster
, osd_id
):
3255 LOG
.info("Prepare to deallocate the osd-id: %s..." % osd_id
)
3264 def destroy_lookup_device(args
, predicate
, description
):
3265 devices
= list_devices()
3266 for device
in devices
:
3267 for partition
in device
.get('partitions', []):
3268 if partition
['dmcrypt']:
3269 dmcrypt_path
= dmcrypt_map(partition
['path'],
3270 args
.dmcrypt_key_dir
)
3271 list_dev_osd(dmcrypt_path
, {}, partition
)
3272 dmcrypt_unmap(partition
['uuid'])
3273 if predicate(partition
):
3275 raise Error('found no device matching ', description
)
3278 def main_destroy(args
):
3279 osd_id
= args
.destroy_by_id
3285 if not is_partition(path
):
3286 raise Error(path
+ " must be a partition device")
3287 path
= os
.path
.realpath(path
)
3290 target_dev
= destroy_lookup_device(
3291 args
, lambda x
: x
.get('path') == path
,
3294 target_dev
= destroy_lookup_device(
3295 args
, lambda x
: x
.get('whoami') == osd_id
,
3296 'osd id ' + str(osd_id
))
3298 osd_id
= target_dev
['whoami']
3299 dev_path
= target_dev
['path']
3300 if target_dev
['ptype'] == PTYPE
['mpath']['osd']['ready']:
3301 base_dev
= get_partition_base_mpath(dev_path
)
3303 base_dev
= get_partition_base(dev_path
)
3305 # Before osd deactivate, we cannot destroy it
3306 status_code
= _check_osd_status(args
.cluster
, osd_id
)
3307 if status_code
!= OSD_STATUS_OUT_DOWN
and \
3308 status_code
!= OSD_STATUS_IN_DOWN
:
3309 raise Error("Could not destroy the active osd. (osd-id: %s)" %
3312 # Remove OSD from crush map
3313 _remove_from_crush_map(args
.cluster
, osd_id
)
3315 # Remove OSD cephx key
3316 _delete_osd_auth_key(args
.cluster
, osd_id
)
3319 _deallocate_osd_id(args
.cluster
, osd_id
)
3321 # we remove the crypt map and device mapper (if dmcrypt is True)
3323 for name
in Space
.NAMES
:
3324 if target_dev
.get(name
+ '_uuid'):
3325 dmcrypt_unmap(target_dev
[name
+ '_uuid'])
3327 # Check zap flag. If we found zap flag, we need to find device for
3328 # destroy this osd data.
3329 if args
.zap
is True:
3330 # erase the osd data
3331 LOG
.info("Prepare to zap the device %s" % base_dev
)
3335 def get_space_osd_uuid(name
, path
):
3336 if not os
.path
.exists(path
):
3337 raise Error('%s does not exist' % path
)
3339 mode
= os
.stat(path
).st_mode
3340 if not stat
.S_ISBLK(mode
):
3341 raise Error('%s is not a block device' % path
)
3343 if (is_partition(path
) and
3344 get_partition_type(path
) in (PTYPE
['mpath']['journal']['ready'],
3345 PTYPE
['mpath']['block']['ready']) and
3346 not is_mpath(path
)):
3347 raise Error('%s is not a multipath block device' %
3351 out
= _check_output(
3354 '--get-device-fsid',
3359 except subprocess
.CalledProcessError
as e
:
3361 'failed to get osd uuid/fsid from %s' % name
,
3364 value
= str(out
).split('\n', 1)[0]
3365 LOG
.debug('%s %s has OSD UUID %s', name
.capitalize(), path
, value
)
3369 def main_activate_space(name
, args
):
3370 if not os
.path
.exists(args
.dev
):
3371 raise Error('%s does not exist' % args
.dev
)
3377 activate_lock
.acquire() # noqa
3380 dev
= dmcrypt_map(args
.dev
, args
.dmcrypt_key_dir
)
3383 # FIXME: For an encrypted journal dev, does this return the
3384 # cyphertext or plaintext dev uuid!? Also, if the journal is
3385 # encrypted, is the data partition also always encrypted, or
3386 # are mixed pairs supported!?
3387 osd_uuid
= get_space_osd_uuid(name
, dev
)
3388 path
= os
.path
.join('/dev/disk/by-partuuid/', osd_uuid
.lower())
3390 if is_suppressed(path
):
3391 LOG
.info('suppressed activate request on %s', path
)
3394 (cluster
, osd_id
) = mount_activate(
3396 activate_key_template
=args
.activate_key_template
,
3397 init
=args
.mark_init
,
3398 dmcrypt
=args
.dmcrypt
,
3399 dmcrypt_key_dir
=args
.dmcrypt_key_dir
,
3400 reactivate
=args
.reactivate
,
3409 activate_lock
.release() # noqa
3412 ###########################
3415 def main_activate_all(args
):
3416 dir = '/dev/disk/by-parttypeuuid'
3417 LOG
.debug('Scanning %s', dir)
3418 if not os
.path
.exists(dir):
3421 for name
in os
.listdir(dir):
3422 if name
.find('.') < 0:
3424 (tag
, uuid
) = name
.split('.')
3426 if tag
in Ptype
.get_ready_by_name('osd'):
3428 if Ptype
.is_dmcrypt(tag
, 'osd'):
3429 path
= os
.path
.join('/dev/mapper', uuid
)
3431 path
= os
.path
.join(dir, name
)
3433 if is_suppressed(path
):
3434 LOG
.info('suppressed activate request on %s', path
)
3437 LOG
.info('Activating %s', path
)
3438 activate_lock
.acquire() # noqa
3440 # never map dmcrypt cyphertext devices
3441 (cluster
, osd_id
) = mount_activate(
3443 activate_key_template
=args
.activate_key_template
,
3444 init
=args
.mark_init
,
3453 except Exception as e
:
3454 print >> sys
.stderr
, '{prog}: {msg}'.format(
3461 activate_lock
.release() # noqa
3463 raise Error('One or more partitions failed to activate')
3466 ###########################
3469 dev
= os
.path
.realpath(dev
)
3470 with
file('/proc/swaps', 'rb') as proc_swaps
:
3471 for line
in proc_swaps
.readlines()[1:]:
3472 fields
= line
.split()
3475 swaps_dev
= fields
[0]
3476 if swaps_dev
.startswith('/') and os
.path
.exists(swaps_dev
):
3477 swaps_dev
= os
.path
.realpath(swaps_dev
)
3478 if swaps_dev
== dev
:
3483 def get_oneliner(base
, name
):
3484 path
= os
.path
.join(base
, name
)
3485 if os
.path
.isfile(path
):
3486 with
open(path
, 'r') as _file
:
3487 return _file
.readline().rstrip()
3491 def get_dev_fs(dev
):
3492 fscheck
, _
, _
= command(
3500 if 'TYPE' in fscheck
:
3501 fstype
= fscheck
.split()[1].split('"')[1]
3507 def split_dev_base_partnum(dev
):
3509 partnum
= partnum_mpath(dev
)
3510 base
= get_partition_base_mpath(dev
)
3513 partnum
= open(os
.path
.join(b
, 'partition')).read().strip()
3514 base
= get_partition_base(dev
)
3515 return (base
, partnum
)
3518 def get_partition_type(part
):
3519 return get_blkid_partition_info(part
, 'ID_PART_ENTRY_TYPE')
3522 def get_partition_uuid(part
):
3523 return get_blkid_partition_info(part
, 'ID_PART_ENTRY_UUID')
3526 def get_blkid_partition_info(dev
, what
=None):
3527 out
, _
, _
= command(
3537 for line
in out
.splitlines():
3538 (key
, value
) = line
.split('=')
3546 def more_osd_info(path
, uuid_map
, desc
):
3547 desc
['ceph_fsid'] = get_oneliner(path
, 'ceph_fsid')
3548 if desc
['ceph_fsid']:
3549 desc
['cluster'] = find_cluster_by_uuid(desc
['ceph_fsid'])
3550 desc
['whoami'] = get_oneliner(path
, 'whoami')
3551 for name
in Space
.NAMES
:
3552 uuid
= get_oneliner(path
, name
+ '_uuid')
3554 desc
[name
+ '_uuid'] = uuid
.lower()
3555 if desc
[name
+ '_uuid'] in uuid_map
:
3556 desc
[name
+ '_dev'] = uuid_map
[desc
[name
+ '_uuid']]
3559 def list_dev_osd(dev
, uuid_map
, desc
):
3560 desc
['mount'] = is_mounted(dev
)
3561 desc
['fs_type'] = get_dev_fs(dev
)
3562 desc
['state'] = 'unprepared'
3564 desc
['state'] = 'active'
3565 more_osd_info(desc
['mount'], uuid_map
, desc
)
3566 elif desc
['fs_type']:
3568 tpath
= mount(dev
=dev
, fstype
=desc
['fs_type'], options
='')
3571 magic
= get_oneliner(tpath
, 'magic')
3572 if magic
is not None:
3573 desc
['magic'] = magic
3574 desc
['state'] = 'prepared'
3575 more_osd_info(tpath
, uuid_map
, desc
)
3582 def list_format_more_osd_info_plain(dev
):
3584 if dev
.get('ceph_fsid'):
3585 if dev
.get('cluster'):
3586 desc
.append('cluster ' + dev
['cluster'])
3588 desc
.append('unknown cluster ' + dev
['ceph_fsid'])
3589 if dev
.get('whoami'):
3590 desc
.append('osd.%s' % dev
['whoami'])
3591 for name
in Space
.NAMES
:
3592 if dev
.get(name
+ '_dev'):
3593 desc
.append(name
+ ' %s' % dev
[name
+ '_dev'])
3597 def list_format_dev_plain(dev
, prefix
=''):
3599 if dev
['ptype'] == PTYPE
['regular']['osd']['ready']:
3600 desc
= (['ceph data', dev
['state']] +
3601 list_format_more_osd_info_plain(dev
))
3602 elif Ptype
.is_dmcrypt(dev
['ptype'], 'osd'):
3603 dmcrypt
= dev
['dmcrypt']
3604 if not dmcrypt
['holders']:
3605 desc
= ['ceph data (dmcrypt %s)' % dmcrypt
['type'],
3606 'not currently mapped']
3607 elif len(dmcrypt
['holders']) == 1:
3608 holder
= get_dev_path(dmcrypt
['holders'][0])
3609 desc
= ['ceph data (dmcrypt %s %s)' %
3610 (dmcrypt
['type'], holder
)]
3611 desc
+= list_format_more_osd_info_plain(dev
)
3613 desc
= ['ceph data (dmcrypt %s)' % dmcrypt
['type'],
3614 'holders: ' + ','.join(dmcrypt
['holders'])]
3615 elif Ptype
.is_regular_space(dev
['ptype']):
3616 name
= Ptype
.space_ptype_to_name(dev
['ptype'])
3617 desc
.append('ceph ' + name
)
3618 if dev
.get(name
+ '_for'):
3619 desc
.append('for %s' % dev
[name
+ '_for'])
3620 elif Ptype
.is_dmcrypt_space(dev
['ptype']):
3621 name
= Ptype
.space_ptype_to_name(dev
['ptype'])
3622 dmcrypt
= dev
['dmcrypt']
3623 if dmcrypt
['holders'] and len(dmcrypt
['holders']) == 1:
3624 holder
= get_dev_path(dmcrypt
['holders'][0])
3625 desc
= ['ceph ' + name
+ ' (dmcrypt %s %s)' %
3626 (dmcrypt
['type'], holder
)]
3628 desc
= ['ceph ' + name
+ ' (dmcrypt %s)' % dmcrypt
['type']]
3629 if dev
.get(name
+ '_for'):
3630 desc
.append('for %s' % dev
[name
+ '_for'])
3632 desc
.append(dev
['type'])
3633 if dev
.get('fs_type'):
3634 desc
.append(dev
['fs_type'])
3635 elif dev
.get('ptype'):
3636 desc
.append(dev
['ptype'])
3637 if dev
.get('mount'):
3638 desc
.append('mounted on %s' % dev
['mount'])
3639 return '%s%s %s' % (prefix
, dev
['path'], ', '.join(desc
))
3642 def list_format_plain(devices
):
3644 for device
in devices
:
3645 if device
.get('partitions'):
3646 lines
.append('%s :' % device
['path'])
3647 for p
in sorted(device
['partitions']):
3648 lines
.append(list_format_dev_plain(dev
=p
,
3651 lines
.append(list_format_dev_plain(dev
=device
,
3653 return "\n".join(lines
)
3656 def list_dev(dev
, uuid_map
, space_map
):
3662 info
['is_partition'] = is_partition(dev
)
3663 if info
['is_partition']:
3664 ptype
= get_partition_type(dev
)
3665 info
['uuid'] = get_partition_uuid(dev
)
3668 info
['ptype'] = ptype
3669 LOG
.info("list_dev(dev = " + dev
+ ", ptype = " + str(ptype
) + ")")
3670 if ptype
in (PTYPE
['regular']['osd']['ready'],
3671 PTYPE
['mpath']['osd']['ready']):
3672 info
['type'] = 'data'
3673 if ptype
== PTYPE
['mpath']['osd']['ready']:
3674 info
['multipath'] = True
3675 list_dev_osd(dev
, uuid_map
, info
)
3676 elif ptype
== PTYPE
['plain']['osd']['ready']:
3677 holders
= is_held(dev
)
3678 info
['type'] = 'data'
3679 info
['dmcrypt']['holders'] = holders
3680 info
['dmcrypt']['type'] = 'plain'
3681 if len(holders
) == 1:
3682 list_dev_osd(get_dev_path(holders
[0]), uuid_map
, info
)
3683 elif ptype
== PTYPE
['luks']['osd']['ready']:
3684 holders
= is_held(dev
)
3685 info
['type'] = 'data'
3686 info
['dmcrypt']['holders'] = holders
3687 info
['dmcrypt']['type'] = 'LUKS'
3688 if len(holders
) == 1:
3689 list_dev_osd(get_dev_path(holders
[0]), uuid_map
, info
)
3690 elif Ptype
.is_regular_space(ptype
) or Ptype
.is_mpath_space(ptype
):
3691 name
= Ptype
.space_ptype_to_name(ptype
)
3693 if ptype
== PTYPE
['mpath'][name
]['ready']:
3694 info
['multipath'] = True
3695 if info
.get('uuid') in space_map
:
3696 info
[name
+ '_for'] = space_map
[info
['uuid']]
3697 elif Ptype
.is_plain_space(ptype
):
3698 name
= Ptype
.space_ptype_to_name(ptype
)
3699 holders
= is_held(dev
)
3701 info
['dmcrypt']['type'] = 'plain'
3702 info
['dmcrypt']['holders'] = holders
3703 if info
.get('uuid') in space_map
:
3704 info
[name
+ '_for'] = space_map
[info
['uuid']]
3705 elif Ptype
.is_luks_space(ptype
):
3706 name
= Ptype
.space_ptype_to_name(ptype
)
3707 holders
= is_held(dev
)
3709 info
['dmcrypt']['type'] = 'LUKS'
3710 info
['dmcrypt']['holders'] = holders
3711 if info
.get('uuid') in space_map
:
3712 info
[name
+ '_for'] = space_map
[info
['uuid']]
3714 path
= is_mounted(dev
)
3715 fs_type
= get_dev_fs(dev
)
3717 info
['type'] = 'swap'
3719 info
['type'] = 'other'
3721 info
['fs_type'] = fs_type
3723 info
['mount'] = path
3729 partmap
= list_all_partitions()
3733 for base
, parts
in sorted(partmap
.iteritems()):
3735 dev
= get_dev_path(p
)
3736 part_uuid
= get_partition_uuid(dev
)
3738 uuid_map
[part_uuid
] = dev
3739 ptype
= get_partition_type(dev
)
3740 LOG
.debug("main_list: " + dev
+
3741 " ptype = " + str(ptype
) +
3742 " uuid = " + str(part_uuid
))
3743 if ptype
in Ptype
.get_ready_by_name('osd'):
3744 if Ptype
.is_dmcrypt(ptype
, 'osd'):
3745 holders
= is_held(dev
)
3746 if len(holders
) != 1:
3748 dev_to_mount
= get_dev_path(holders
[0])
3752 fs_type
= get_dev_fs(dev_to_mount
)
3753 if fs_type
is not None:
3755 tpath
= mount(dev
=dev_to_mount
,
3756 fstype
=fs_type
, options
='')
3758 for name
in Space
.NAMES
:
3759 space_uuid
= get_oneliner(tpath
,
3762 space_map
[space_uuid
.lower()] = dev
3768 LOG
.debug("main_list: " + str(partmap
) + ", uuid_map = " +
3769 str(uuid_map
) + ", space_map = " + str(space_map
))
3772 for base
, parts
in sorted(partmap
.iteritems()):
3774 disk
= {'path': get_dev_path(base
)}
3776 for p
in sorted(parts
):
3777 partitions
.append(list_dev(get_dev_path(p
),
3780 disk
['partitions'] = partitions
3781 devices
.append(disk
)
3783 device
= list_dev(get_dev_path(base
), uuid_map
, space_map
)
3784 device
['path'] = get_dev_path(base
)
3785 devices
.append(device
)
3786 LOG
.debug("list_devices: " + str(devices
))
3790 def main_list(args
):
3791 devices
= list_devices()
3794 for path
in args
.path
:
3795 if os
.path
.exists(path
):
3796 paths
.append(os
.path
.realpath(path
))
3799 selected_devices
= []
3800 for device
in devices
:
3802 if re
.search(path
+ '$', device
['path']):
3803 selected_devices
.append(device
)
3805 selected_devices
= devices
3806 if args
.format
== 'json':
3807 print json
.dumps(selected_devices
)
3809 output
= list_format_plain(selected_devices
)
3814 ###########################
3816 # Mark devices that we want to suppress activates on with a
3819 # /var/lib/ceph/tmp/suppress-activate.sdb
3821 # where the last bit is the sanitized device name (/dev/X without the
3822 # /dev/ prefix) and the is_suppress() check matches a prefix. That
3823 # means suppressing sdb will stop activate on sdb1, sdb2, etc.
3826 def is_suppressed(path
):
3827 disk
= os
.path
.realpath(path
)
3829 if (not disk
.startswith('/dev/') or
3830 not stat
.S_ISBLK(os
.lstat(disk
).st_mode
)):
3832 base
= get_dev_name(disk
)
3834 if os
.path
.exists(SUPPRESS_PREFIX
+ base
): # noqa
3841 def set_suppress(path
):
3842 disk
= os
.path
.realpath(path
)
3843 if not os
.path
.exists(disk
):
3844 raise Error('does not exist', path
)
3845 if not stat
.S_ISBLK(os
.lstat(path
).st_mode
):
3846 raise Error('not a block device', path
)
3847 base
= get_dev_name(disk
)
3849 with
file(SUPPRESS_PREFIX
+ base
, 'w') as f
: # noqa
3851 LOG
.info('set suppress flag on %s', base
)
3854 def unset_suppress(path
):
3855 disk
= os
.path
.realpath(path
)
3856 if not os
.path
.exists(disk
):
3857 raise Error('does not exist', path
)
3858 if not stat
.S_ISBLK(os
.lstat(path
).st_mode
):
3859 raise Error('not a block device', path
)
3860 assert disk
.startswith('/dev/')
3861 base
= get_dev_name(disk
)
3863 fn
= SUPPRESS_PREFIX
+ base
# noqa
3864 if not os
.path
.exists(fn
):
3865 raise Error('not marked as suppressed', path
)
3869 LOG
.info('unset suppress flag on %s', base
)
3870 except OSError as e
:
3871 raise Error('failed to unsuppress', e
)
3874 def main_suppress(args
):
3875 set_suppress(args
.path
)
3878 def main_unsuppress(args
):
3879 unset_suppress(args
.path
)
3883 for dev
in args
.dev
:
3887 def main_trigger(args
):
3888 LOG
.debug("main_trigger: " + str(args
))
3889 if is_systemd() and not args
.sync
:
3890 # http://www.freedesktop.org/software/systemd/man/systemd-escape.html
3891 escaped_dev
= args
.dev
[1:].replace('-', '\\x2d')
3892 service
= 'ceph-disk@{dev}.service'.format(dev
=escaped_dev
)
3893 LOG
.info('systemd detected, triggering %s' % service
)
3903 if is_upstart() and not args
.sync
:
3904 LOG
.info('upstart detected, triggering ceph-disk task')
3910 'dev={dev}'.format(dev
=args
.dev
),
3911 'pid={pid}'.format(pid
=os
.getpid()),
3916 parttype
= get_partition_type(args
.dev
)
3917 partid
= get_partition_uuid(args
.dev
)
3919 LOG
.info('trigger {dev} parttype {parttype} uuid {partid}'.format(
3925 if parttype
in (PTYPE
['regular']['osd']['ready'],
3926 PTYPE
['mpath']['osd']['ready']):
3934 elif parttype
in (PTYPE
['regular']['journal']['ready'],
3935 PTYPE
['mpath']['journal']['ready']):
3944 # journals are easy: map, chown, activate-journal
3945 elif parttype
== PTYPE
['plain']['journal']['ready']:
3950 '/etc/ceph/dmcrypt-keys/{partid}'.format(partid
=partid
),
3958 newdev
= '/dev/mapper/' + partid
3960 while not os
.path
.exists(newdev
) and count
<= 10:
3972 '/usr/sbin/ceph-disk',
3977 elif parttype
== PTYPE
['luks']['journal']['ready']:
3982 '/etc/ceph/dmcrypt-keys/{partid}.luks.key'.format(
3989 newdev
= '/dev/mapper/' + partid
3991 while not os
.path
.exists(newdev
) and count
<= 10:
4003 '/usr/sbin/ceph-disk',
4009 elif parttype
in (PTYPE
['regular']['block']['ready'],
4010 PTYPE
['mpath']['block']['ready']):
4019 # blocks are easy: map, chown, activate-block
4020 elif parttype
== PTYPE
['plain']['block']['ready']:
4025 '/etc/ceph/dmcrypt-keys/{partid}'.format(partid
=partid
),
4033 newdev
= '/dev/mapper/' + partid
4035 while not os
.path
.exists(newdev
) and count
<= 10:
4047 '/usr/sbin/ceph-disk',
4052 elif parttype
== PTYPE
['luks']['block']['ready']:
4057 '/etc/ceph/dmcrypt-keys/{partid}.luks.key'.format(
4064 newdev
= '/dev/mapper/' + partid
4066 while not os
.path
.exists(newdev
) and count
<= 10:
4078 '/usr/sbin/ceph-disk',
4084 # osd data: map, activate
4085 elif parttype
== PTYPE
['plain']['osd']['ready']:
4090 '/etc/ceph/dmcrypt-keys/{partid}'.format(partid
=partid
),
4098 newdev
= '/dev/mapper/' + partid
4100 while not os
.path
.exists(newdev
) and count
<= 10:
4105 '/usr/sbin/ceph-disk',
4111 elif parttype
== PTYPE
['luks']['osd']['ready']:
4116 '/etc/ceph/dmcrypt-keys/{partid}.luks.key'.format(
4123 newdev
= '/dev/mapper/' + partid
4125 while not os
.path
.exists(newdev
) and count
<= 10:
4130 '/usr/sbin/ceph-disk',
4137 raise Error('unrecognized partition type %s' % parttype
)
4140 def setup_statedir(dir):
4141 # XXX The following use of globals makes linting
4142 # really hard. Global state in Python is iffy and
4143 # should be avoided.
4147 if not os
.path
.exists(STATEDIR
):
4149 if not os
.path
.exists(STATEDIR
+ "/tmp"):
4150 os
.mkdir(STATEDIR
+ "/tmp")
4153 prepare_lock
= filelock(STATEDIR
+ '/tmp/ceph-disk.prepare.lock')
4155 global activate_lock
4156 activate_lock
= filelock(STATEDIR
+ '/tmp/ceph-disk.activate.lock')
4158 global SUPPRESS_PREFIX
4159 SUPPRESS_PREFIX
= STATEDIR
+ '/tmp/suppress-activate.'
4162 def setup_sysconfdir(dir):
4167 def parse_args(argv
):
4168 parser
= argparse
.ArgumentParser(
4171 parser
.add_argument(
4173 action
='store_true', default
=None,
4174 help='be more verbose',
4176 parser
.add_argument(
4178 action
='store_true', default
=None,
4179 help='log to stdout',
4181 parser
.add_argument(
4182 '--prepend-to-path',
4185 help=('prepend PATH to $PATH for backward compatibility '
4186 '(default /usr/bin)'),
4188 parser
.add_argument(
4191 default
='/var/lib/ceph',
4192 help=('directory in which ceph state is preserved '
4193 '(default /var/lib/ceph)'),
4195 parser
.add_argument(
4198 default
='/etc/ceph',
4199 help=('directory in which ceph configuration files are found '
4200 '(default /etc/ceph)'),
4202 parser
.add_argument(
4206 help='use the given user for subprocesses, rather than ceph or root'
4208 parser
.add_argument(
4212 help='use the given group for subprocesses, rather than ceph or root'
4214 parser
.set_defaults(
4215 # we want to hold on to this, for later
4219 subparsers
= parser
.add_subparsers(
4220 title
='subcommands',
4221 description
='valid subcommands',
4222 help='sub-command help',
4225 Prepare
.set_subparser(subparsers
)
4226 make_activate_parser(subparsers
)
4227 make_activate_block_parser(subparsers
)
4228 make_activate_journal_parser(subparsers
)
4229 make_activate_all_parser(subparsers
)
4230 make_list_parser(subparsers
)
4231 make_suppress_parser(subparsers
)
4232 make_deactivate_parser(subparsers
)
4233 make_destroy_parser(subparsers
)
4234 make_zap_parser(subparsers
)
4235 make_trigger_parser(subparsers
)
4237 args
= parser
.parse_args(argv
)
4241 def make_trigger_parser(subparsers
):
4242 trigger_parser
= subparsers
.add_parser(
4244 help='Trigger an event (caled by udev)')
4245 trigger_parser
.add_argument(
4249 trigger_parser
.add_argument(
4251 action
='store_true', default
=None,
4252 help=('do operation synchronously; do not trigger systemd'),
4254 trigger_parser
.set_defaults(
4257 return trigger_parser
4260 def make_activate_parser(subparsers
):
4261 activate_parser
= subparsers
.add_parser(
4263 help='Activate a Ceph OSD')
4264 activate_parser
.add_argument(
4266 action
='store_true', default
=None,
4267 help='mount a block device [deprecated, ignored]',
4269 activate_parser
.add_argument(
4272 help='bootstrap-osd keyring path template (%(default)s)',
4273 dest
='activate_key_template',
4275 activate_parser
.add_argument(
4277 metavar
='INITSYSTEM',
4278 help='init system to manage this dir',
4280 choices
=INIT_SYSTEMS
,
4282 activate_parser
.add_argument(
4283 '--no-start-daemon',
4284 action
='store_true', default
=None,
4285 help='do not start the daemon',
4287 activate_parser
.add_argument(
4290 help='path to block device or directory',
4292 activate_parser
.add_argument(
4294 action
='store_true', default
=None,
4295 help='map DATA and/or JOURNAL devices with dm-crypt',
4297 activate_parser
.add_argument(
4298 '--dmcrypt-key-dir',
4300 default
='/etc/ceph/dmcrypt-keys',
4301 help='directory where dm-crypt keys are stored',
4303 activate_parser
.add_argument(
4305 action
='store_true', default
=False,
4306 help='activate the deactived OSD',
4308 activate_parser
.set_defaults(
4309 activate_key_template
='{statedir}/bootstrap-osd/{cluster}.keyring',
4312 return activate_parser
4315 def make_activate_block_parser(subparsers
):
4316 return make_activate_space_parser('block', subparsers
)
4319 def make_activate_journal_parser(subparsers
):
4320 return make_activate_space_parser('journal', subparsers
)
4323 def make_activate_space_parser(name
, subparsers
):
4324 activate_space_parser
= subparsers
.add_parser(
4325 'activate-%s' % name
,
4326 help='Activate an OSD via its %s device' % name
)
4327 activate_space_parser
.add_argument(
4330 help='path to %s block device' % name
,
4332 activate_space_parser
.add_argument(
4335 help='bootstrap-osd keyring path template (%(default)s)',
4336 dest
='activate_key_template',
4338 activate_space_parser
.add_argument(
4340 metavar
='INITSYSTEM',
4341 help='init system to manage this dir',
4343 choices
=INIT_SYSTEMS
,
4345 activate_space_parser
.add_argument(
4347 action
='store_true', default
=None,
4348 help=('map data and/or auxiliariy (journal, etc.) '
4349 'devices with dm-crypt'),
4351 activate_space_parser
.add_argument(
4352 '--dmcrypt-key-dir',
4354 default
='/etc/ceph/dmcrypt-keys',
4355 help='directory where dm-crypt keys are stored',
4357 activate_space_parser
.add_argument(
4359 action
='store_true', default
=False,
4360 help='activate the deactived OSD',
4362 activate_space_parser
.set_defaults(
4363 activate_key_template
='{statedir}/bootstrap-osd/{cluster}.keyring',
4364 func
=lambda args
: main_activate_space(name
, args
),
4366 return activate_space_parser
4369 def make_activate_all_parser(subparsers
):
4370 activate_all_parser
= subparsers
.add_parser(
4372 help='Activate all tagged OSD partitions')
4373 activate_all_parser
.add_argument(
4376 help='bootstrap-osd keyring path template (%(default)s)',
4377 dest
='activate_key_template',
4379 activate_all_parser
.add_argument(
4381 metavar
='INITSYSTEM',
4382 help='init system to manage this dir',
4384 choices
=INIT_SYSTEMS
,
4386 activate_all_parser
.set_defaults(
4387 activate_key_template
='{statedir}/bootstrap-osd/{cluster}.keyring',
4388 func
=main_activate_all
,
4390 return activate_all_parser
4393 def make_list_parser(subparsers
):
4394 list_parser
= subparsers
.add_parser(
4396 help='List disks, partitions, and Ceph OSDs')
4397 list_parser
.add_argument(
4399 help='output format',
4401 choices
=['json', 'plain'],
4403 list_parser
.add_argument(
4407 help='path to block devices, relative to /sys/block',
4409 list_parser
.set_defaults(
4415 def make_suppress_parser(subparsers
):
4416 suppress_parser
= subparsers
.add_parser(
4417 'suppress-activate',
4418 help='Suppress activate on a device (prefix)')
4419 suppress_parser
.add_argument(
4422 help='path to block device or directory',
4424 suppress_parser
.set_defaults(
4428 unsuppress_parser
= subparsers
.add_parser(
4429 'unsuppress-activate',
4430 help='Stop suppressing activate on a device (prefix)')
4431 unsuppress_parser
.add_argument(
4434 help='path to block device or directory',
4436 unsuppress_parser
.set_defaults(
4437 func
=main_unsuppress
,
4439 return suppress_parser
4442 def make_deactivate_parser(subparsers
):
4443 deactivate_parser
= subparsers
.add_parser(
4445 help='Deactivate a Ceph OSD')
4446 deactivate_parser
.add_argument(
4450 help='cluster name to assign this disk to',
4452 deactivate_parser
.add_argument(
4456 help='path to block device or directory',
4458 deactivate_parser
.add_argument(
4459 '--deactivate-by-id',
4461 help='ID of OSD to deactive'
4463 deactivate_parser
.add_argument(
4465 action
='store_true', default
=False,
4466 help='option to mark the osd out',
4468 deactivate_parser
.set_defaults(
4469 func
=main_deactivate
,
4473 def make_destroy_parser(subparsers
):
4474 destroy_parser
= subparsers
.add_parser(
4476 help='Destroy a Ceph OSD')
4477 destroy_parser
.add_argument(
4481 help='cluster name to assign this disk to',
4483 destroy_parser
.add_argument(
4487 help='path to block device or directory',
4489 destroy_parser
.add_argument(
4492 help='ID of OSD to destroy'
4494 destroy_parser
.add_argument(
4495 '--dmcrypt-key-dir',
4497 default
='/etc/ceph/dmcrypt-keys',
4498 help=('directory where dm-crypt keys are stored '
4499 '(If you don\'t know how it work, '
4500 'dont use it. we have default value)'),
4502 destroy_parser
.add_argument(
4504 action
='store_true', default
=False,
4505 help='option to erase data and partition',
4507 destroy_parser
.set_defaults(
4512 def make_zap_parser(subparsers
):
4513 zap_parser
= subparsers
.add_parser(
4515 help='Zap/erase/destroy a device\'s partition table (and contents)')
4516 zap_parser
.add_argument(
4520 help='path to block device',
4522 zap_parser
.set_defaults(
4529 args
= parse_args(argv
)
4531 setup_logging(args
.verbose
, args
.log_stdout
)
4533 if args
.prepend_to_path
!= '':
4534 path
= os
.environ
.get('PATH', os
.defpath
)
4535 os
.environ
['PATH'] = args
.prepend_to_path
+ ":" + path
4537 setup_statedir(args
.statedir
)
4538 setup_sysconfdir(args
.sysconfdir
)
4540 global CEPH_PREF_USER
4541 CEPH_PREF_USER
= args
.setuser
4542 global CEPH_PREF_GROUP
4543 CEPH_PREF_GROUP
= args
.setgroup
4548 main_catch(args
.func
, args
)
4551 def setup_logging(verbose
, log_stdout
):
4552 loglevel
= logging
.WARNING
4554 loglevel
= logging
.DEBUG
4557 ch
= logging
.StreamHandler(stream
=sys
.stdout
)
4558 ch
.setLevel(loglevel
)
4559 formatter
= logging
.Formatter('%(filename)s: %(message)s')
4560 ch
.setFormatter(formatter
)
4562 LOG
.setLevel(loglevel
)
4564 logging
.basicConfig(
4569 def main_catch(func
, args
):
4576 '{prog}: {msg}'.format(
4582 except CephDiskException
as error
:
4583 exc_name
= error
.__class
__.__name
__
4585 '{prog} {exc_name}: {msg}'.format(
4596 if __name__
== '__main__':