]>
git.proxmox.com Git - ceph.git/blob - ceph/src/ceph-volume/ceph_volume/util/prepare.py
2 These utilities for prepare provide all the pieces needed to prepare a device
3 but also a compounded ("single call") helper to do them in order. Some plugins
4 may want to change some part of the process, while others might want to consume
12 from ceph_volume
import process
, conf
, terminal
13 from ceph_volume
.util
import system
, constants
, str_to_int
, disk
15 logger
= logging
.getLogger(__name__
)
16 mlogger
= terminal
.MultiLogger(__name__
)
20 stdout
, stderr
, returncode
= process
.call(
21 ['ceph-authtool', '--gen-print-key'],
23 logfile_verbose
=False)
25 raise RuntimeError('Unable to generate a new auth key')
26 return ' '.join(stdout
).strip()
29 def write_keyring(osd_id
, secret
, keyring_name
='keyring', name
=None):
31 Create a keyring file with the ``ceph-authtool`` utility. Constructs the
32 path over well-known conventions for the OSD, and allows any other custom
35 :param osd_id: The ID for the OSD to be used
36 :param secret: The key to be added as (as a string)
37 :param name: Defaults to 'osd.{ID}' but can be used to add other client
38 names, specifically for 'lockbox' type of keys
39 :param keyring_name: Alternative keyring name, for supporting other
40 types of keys like for lockbox
42 osd_keyring
= '/var/lib/ceph/osd/%s-%s/%s' % (conf
.cluster
, osd_id
, keyring_name
)
43 name
= name
or 'osd.%s' % str(osd_id
)
44 mlogger
.info(f
'Creating keyring file for {name}')
47 'ceph-authtool', osd_keyring
,
52 logfile_verbose
=False)
53 system
.chown(osd_keyring
)
56 def get_block_db_size(lv_format
=True):
58 Helper to retrieve the size (defined in megabytes in ceph.conf) to create
59 the block.db logical volume, it "translates" the string into a float value,
60 then converts that into gigabytes, and finally (optionally) it formats it
61 back as a string so that it can be used for creating the LV.
63 :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size
64 would result in '5G', otherwise it will return a ``Size`` object.
66 .. note: Configuration values are in bytes, unlike journals which
67 are defined in gigabytes
71 conf_db_size
= conf
.ceph
.get_safe('osd', 'bluestore_block_db_size', None)
73 logger
.exception("failed to load ceph configuration, will use defaults")
77 'block.db has no size configuration, will fallback to using as much as possible'
79 # TODO better to return disk.Size(b=0) here
81 logger
.debug('bluestore_block_db_size set to %s' % conf_db_size
)
82 db_size
= disk
.Size(b
=str_to_int(conf_db_size
))
84 if db_size
< disk
.Size(gb
=2):
85 mlogger
.error('Refusing to continue with configured size for block.db')
86 raise RuntimeError('block.db sizes must be larger than 2GB, detected: %s' % db_size
)
88 return '%sG' % db_size
.gb
.as_int()
91 def get_block_wal_size(lv_format
=True):
93 Helper to retrieve the size (defined in megabytes in ceph.conf) to create
94 the block.wal logical volume, it "translates" the string into a float value,
95 then converts that into gigabytes, and finally (optionally) it formats it
96 back as a string so that it can be used for creating the LV.
98 :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size
99 would result in '5G', otherwise it will return a ``Size`` object.
101 .. note: Configuration values are in bytes, unlike journals which
102 are defined in gigabytes
106 conf_wal_size
= conf
.ceph
.get_safe('osd', 'bluestore_block_wal_size', None)
108 logger
.exception("failed to load ceph configuration, will use defaults")
110 if not conf_wal_size
:
112 'block.wal has no size configuration, will fallback to using as much as possible'
115 logger
.debug('bluestore_block_wal_size set to %s' % conf_wal_size
)
116 wal_size
= disk
.Size(b
=str_to_int(conf_wal_size
))
118 if wal_size
< disk
.Size(gb
=2):
119 mlogger
.error('Refusing to continue with configured size for block.wal')
120 raise RuntimeError('block.wal sizes must be larger than 2GB, detected: %s' % wal_size
)
122 return '%sG' % wal_size
.gb
.as_int()
126 def create_id(fsid
, json_secrets
, osd_id
=None):
128 :param fsid: The osd fsid to create, always required
129 :param json_secrets: a json-ready object with whatever secrets are wanted
130 to be passed to the monitor
131 :param osd_id: Reuse an existing ID from an OSD that's been destroyed, if the
132 id does not exist in the cluster a new ID will be created
134 bootstrap_keyring
= '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf
.cluster
137 '--cluster', conf
.cluster
,
138 '--name', 'client.bootstrap-osd',
139 '--keyring', bootstrap_keyring
,
143 if osd_id
is not None:
144 if osd_id_available(osd_id
):
147 raise RuntimeError("The osd ID {} is already in use or does not exist.".format(osd_id
))
148 stdout
, stderr
, returncode
= process
.call(
154 raise RuntimeError('Unable to create a new OSD id')
155 return ' '.join(stdout
).strip()
158 def osd_id_available(osd_id
):
160 Checks to see if an osd ID exists and if it's available for
161 reuse. Returns True if it is, False if it isn't.
163 :param osd_id: The osd ID to check
168 bootstrap_keyring
= '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf
.cluster
169 stdout
, stderr
, returncode
= process
.call(
172 '--cluster', conf
.cluster
,
173 '--name', 'client.bootstrap-osd',
174 '--keyring', bootstrap_keyring
,
182 raise RuntimeError('Unable check if OSD id exists: %s' % osd_id
)
184 output
= json
.loads(''.join(stdout
).strip())
185 osds
= output
['nodes']
186 osd
= [osd
for osd
in osds
if str(osd
['id']) == str(osd_id
)]
187 if not osd
or (osd
and osd
[0].get('status') == "destroyed"):
192 def mount_tmpfs(path
):
200 # Restore SELinux context
201 system
.set_context(path
)
204 def create_osd_path(osd_id
, tmpfs
=False):
205 path
= '/var/lib/ceph/osd/%s-%s' % (conf
.cluster
, osd_id
)
206 system
.mkdir_p('/var/lib/ceph/osd/%s-%s' % (conf
.cluster
, osd_id
))
211 def format_device(device
):
213 command
= ['mkfs', '-t', 'xfs']
215 # get the mkfs options if any for xfs,
216 # fallback to the default options defined in constants.mkfs
217 flags
= conf
.ceph
.get_list(
219 'osd_mkfs_options_xfs',
220 default
=constants
.mkfs
.get('xfs'),
225 if '-f' not in flags
:
226 flags
.insert(0, '-f')
228 command
.extend(flags
)
229 command
.append(device
)
233 def _normalize_mount_flags(flags
, extras
=None):
235 Mount flag options have to be a single string, separated by a comma. If the
236 flags are separated by spaces, or with commas and spaces in ceph.conf, the
237 mount options will be passed incorrectly.
239 This will help when parsing ceph.conf values return something like::
247 :param flags: A list of flags, or a single string of mount flags
248 :param extras: Extra set of mount flags, useful when custom devices like VDO need
249 ad-hoc mount configurations
251 # Instead of using set(), we append to this new list here, because set()
252 # will create an arbitrary order on the items that is made worst when
253 # testing with tools like tox that includes a randomizer seed. By
254 # controlling the order, it is easier to correctly assert the expectation
256 if isinstance(flags
, list):
260 # ensure that spaces and commas are removed so that they can join
261 # correctly, remove duplicates
263 if f
and f
not in unique_flags
:
264 unique_flags
.append(f
.strip().strip(','))
265 return ','.join(unique_flags
)
267 # split them, clean them, and join them back again
268 flags
= flags
.strip().split(' ')
272 # remove possible duplicates
274 if f
and f
not in unique_flags
:
275 unique_flags
.append(f
.strip().strip(','))
276 flags
= ','.join(unique_flags
)
277 # Before returning, split them again, since strings can be mashed up
278 # together, preventing removal of duplicate entries
279 return ','.join(set(flags
.split(',')))
282 def mount_osd(device
, osd_id
, **kw
):
284 is_vdo
= kw
.get('is_vdo', '0')
287 destination
= '/var/lib/ceph/osd/%s-%s' % (conf
.cluster
, osd_id
)
288 command
= ['mount', '-t', 'xfs', '-o']
289 flags
= conf
.ceph
.get_list(
291 'osd_mount_options_xfs',
292 default
=constants
.mount
.get('xfs'),
296 _normalize_mount_flags(flags
, extras
=extras
)
298 command
.append(device
)
299 command
.append(destination
)
302 # Restore SELinux context
303 system
.set_context(destination
)
306 def _link_device(device
, device_type
, osd_id
):
308 Allow linking any device type in an OSD directory. ``device`` must the be
309 source, with an absolute path and ``device_type`` will be the destination
310 name, like 'journal', or 'block'
312 device_path
= '/var/lib/ceph/osd/%s-%s/%s' % (
317 command
= ['ln', '-s', device
, device_path
]
322 def _validate_bluestore_device(device
, excepted_device_type
, osd_uuid
):
324 Validate whether the given device is truly what it is supposed to be
327 out
, err
, ret
= process
.call(['ceph-bluestore-tool', 'show-label', '--dev', device
])
329 terminal
.error('ceph-bluestore-tool failed to run. %s'% err
)
332 terminal
.error('no label on %s'% device
)
334 oj
= json
.loads(''.join(out
))
336 terminal
.error('%s not in the output of ceph-bluestore-tool, buggy?'% device
)
338 current_device_type
= oj
[device
]['description']
339 if current_device_type
!= excepted_device_type
:
340 terminal
.error('%s is not a %s device but %s'% (device
, excepted_device_type
, current_device_type
))
342 current_osd_uuid
= oj
[device
]['osd_uuid']
343 if current_osd_uuid
!= osd_uuid
:
344 terminal
.error('device %s is used by another osd %s as %s, should be %s'% (device
, current_osd_uuid
, current_device_type
, osd_uuid
))
348 def link_block(block_device
, osd_id
):
349 _link_device(block_device
, 'block', osd_id
)
352 def link_wal(wal_device
, osd_id
, osd_uuid
=None):
353 _validate_bluestore_device(wal_device
, 'bluefs wal', osd_uuid
)
354 _link_device(wal_device
, 'block.wal', osd_id
)
357 def link_db(db_device
, osd_id
, osd_uuid
=None):
358 _validate_bluestore_device(db_device
, 'bluefs db', osd_uuid
)
359 _link_device(db_device
, 'block.db', osd_id
)
362 def get_monmap(osd_id
):
364 Before creating the OSD files, a monmap needs to be retrieved so that it
365 can be used to tell the monitor(s) about the new OSD. A call will look like::
367 ceph --cluster ceph --name client.bootstrap-osd \
368 --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring \
369 mon getmap -o /var/lib/ceph/osd/ceph-0/activate.monmap
371 path
= '/var/lib/ceph/osd/%s-%s/' % (conf
.cluster
, osd_id
)
372 bootstrap_keyring
= '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf
.cluster
373 monmap_destination
= os
.path
.join(path
, 'activate.monmap')
377 '--cluster', conf
.cluster
,
378 '--name', 'client.bootstrap-osd',
379 '--keyring', bootstrap_keyring
,
380 'mon', 'getmap', '-o', monmap_destination
384 def get_osdspec_affinity():
385 return os
.environ
.get('CEPH_VOLUME_OSDSPEC_AFFINITY', '')
388 def osd_mkfs_bluestore(osd_id
, fsid
, keyring
=None, wal
=False, db
=False):
390 Create the files for the OSD to function. A normal call will look like:
392 ceph-osd --cluster ceph --mkfs --mkkey -i 0 \
393 --monmap /var/lib/ceph/osd/ceph-0/activate.monmap \
394 --osd-data /var/lib/ceph/osd/ceph-0 \
395 --osd-uuid 8d208665-89ae-4733-8888-5d3bfbeeec6c \
396 --keyring /var/lib/ceph/osd/ceph-0/keyring \
397 --setuser ceph --setgroup ceph
399 In some cases it is required to use the keyring, when it is passed in as
400 a keyword argument it is used as part of the ceph-osd command
402 path
= '/var/lib/ceph/osd/%s-%s/' % (conf
.cluster
, osd_id
)
403 monmap
= os
.path
.join(path
, 'activate.monmap')
409 '--cluster', conf
.cluster
,
410 '--osd-objectstore', 'bluestore',
416 supplementary_command
= [
423 if keyring
is not None:
424 base_command
.extend(['--keyfile', '-'])
428 ['--bluestore-block-wal-path', wal
]
434 ['--bluestore-block-db-path', db
]
438 if get_osdspec_affinity():
439 base_command
.extend(['--osdspec-affinity', get_osdspec_affinity()])
441 command
= base_command
+ supplementary_command
444 When running in containers the --mkfs on raw device sometimes fails
445 to acquire a lock through flock() on the device because systemd-udevd holds one temporarily.
446 See KernelDevice.cc and _lock() to understand how ceph-osd acquires the lock.
447 Because this is really transient, we retry up to 5 times and wait for 1 sec in-between
449 for retry
in range(5):
450 _
, _
, returncode
= process
.call(command
, stdin
=keyring
, terminal_verbose
=True, show_command
=True)
454 if returncode
== errno
.EWOULDBLOCK
:
456 logger
.info('disk is held by another process, trying to mkfs again... (%s/5 attempt)' % retry
)
459 raise RuntimeError('Command failed with exit code %s: %s' % (returncode
, ' '.join(command
)))