]> git.proxmox.com Git - ceph.git/blame - ceph/src/ceph-volume/ceph_volume/util/prepare.py
import ceph 16.2.7
[ceph.git] / ceph / src / ceph-volume / ceph_volume / util / prepare.py
CommitLineData
d2e6a577
FG
1"""
2These utilities for prepare provide all the pieces needed to prepare a device
3but also a compounded ("single call") helper to do them in order. Some plugins
4may want to change some part of the process, while others might want to consume
5the single-call helper
6"""
f91f0fd5 7import errno
d2e6a577
FG
8import os
9import logging
b32b8144 10import json
f91f0fd5 11import time
1adf2230
AA
12from ceph_volume import process, conf, __release__, terminal
13from ceph_volume.util import system, constants, str_to_int, disk
d2e6a577
FG
14
15logger = logging.getLogger(__name__)
1adf2230 16mlogger = terminal.MultiLogger(__name__)
d2e6a577
FG
17
18
19def create_key():
b32b8144
FG
20 stdout, stderr, returncode = process.call(
21 ['ceph-authtool', '--gen-print-key'],
22 show_command=True)
d2e6a577
FG
23 if returncode != 0:
24 raise RuntimeError('Unable to generate a new auth key')
25 return ' '.join(stdout).strip()
26
27
b32b8144
FG
28def write_keyring(osd_id, secret, keyring_name='keyring', name=None):
29 """
30 Create a keyring file with the ``ceph-authtool`` utility. Constructs the
31 path over well-known conventions for the OSD, and allows any other custom
32 ``name`` to be set.
33
34 :param osd_id: The ID for the OSD to be used
35 :param secret: The key to be added as (as a string)
36 :param name: Defaults to 'osd.{ID}' but can be used to add other client
37 names, specifically for 'lockbox' type of keys
38 :param keyring_name: Alternative keyring name, for supporting other
39 types of keys like for lockbox
40 """
41 osd_keyring = '/var/lib/ceph/osd/%s-%s/%s' % (conf.cluster, osd_id, keyring_name)
42 name = name or 'osd.%s' % str(osd_id)
d2e6a577
FG
43 process.run(
44 [
45 'ceph-authtool', osd_keyring,
46 '--create-keyring',
b32b8144 47 '--name', name,
d2e6a577
FG
48 '--add-key', secret
49 ])
50 system.chown(osd_keyring)
d2e6a577
FG
51
52
1adf2230
AA
53def get_journal_size(lv_format=True):
54 """
55 Helper to retrieve the size (defined in megabytes in ceph.conf) to create
56 the journal logical volume, it "translates" the string into a float value,
57 then converts that into gigabytes, and finally (optionally) it formats it
58 back as a string so that it can be used for creating the LV.
59
60 :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size
61 would result in '5G', otherwise it will return a ``Size`` object.
62 """
63 conf_journal_size = conf.ceph.get_safe('osd', 'osd_journal_size', '5120')
64 logger.debug('osd_journal_size set to %s' % conf_journal_size)
65 journal_size = disk.Size(mb=str_to_int(conf_journal_size))
66
67 if journal_size < disk.Size(gb=2):
68 mlogger.error('Refusing to continue with configured size for journal')
69 raise RuntimeError('journal sizes must be larger than 2GB, detected: %s' % journal_size)
70 if lv_format:
71 return '%sG' % journal_size.gb.as_int()
72 return journal_size
73
74
91327a77
AA
75def get_block_db_size(lv_format=True):
76 """
77 Helper to retrieve the size (defined in megabytes in ceph.conf) to create
78 the block.db logical volume, it "translates" the string into a float value,
79 then converts that into gigabytes, and finally (optionally) it formats it
80 back as a string so that it can be used for creating the LV.
81
82 :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size
83 would result in '5G', otherwise it will return a ``Size`` object.
84
85 .. note: Configuration values are in bytes, unlike journals which
86 are defined in gigabytes
87 """
88 conf_db_size = None
89 try:
90 conf_db_size = conf.ceph.get_safe('osd', 'bluestore_block_db_size', None)
91 except RuntimeError:
92 logger.exception("failed to load ceph configuration, will use defaults")
93
94 if not conf_db_size:
95 logger.debug(
96 'block.db has no size configuration, will fallback to using as much as possible'
97 )
f91f0fd5 98 # TODO better to return disk.Size(b=0) here
91327a77
AA
99 return None
100 logger.debug('bluestore_block_db_size set to %s' % conf_db_size)
101 db_size = disk.Size(b=str_to_int(conf_db_size))
102
103 if db_size < disk.Size(gb=2):
104 mlogger.error('Refusing to continue with configured size for block.db')
105 raise RuntimeError('block.db sizes must be larger than 2GB, detected: %s' % db_size)
106 if lv_format:
107 return '%sG' % db_size.gb.as_int()
108 return db_size
109
11fdf7f2
TL
110def get_block_wal_size(lv_format=True):
111 """
112 Helper to retrieve the size (defined in megabytes in ceph.conf) to create
113 the block.wal logical volume, it "translates" the string into a float value,
114 then converts that into gigabytes, and finally (optionally) it formats it
115 back as a string so that it can be used for creating the LV.
116
117 :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size
118 would result in '5G', otherwise it will return a ``Size`` object.
119
120 .. note: Configuration values are in bytes, unlike journals which
121 are defined in gigabytes
122 """
123 conf_wal_size = None
124 try:
125 conf_wal_size = conf.ceph.get_safe('osd', 'bluestore_block_wal_size', None)
126 except RuntimeError:
127 logger.exception("failed to load ceph configuration, will use defaults")
128
129 if not conf_wal_size:
130 logger.debug(
131 'block.wal has no size configuration, will fallback to using as much as possible'
132 )
133 return None
134 logger.debug('bluestore_block_wal_size set to %s' % conf_wal_size)
135 wal_size = disk.Size(b=str_to_int(conf_wal_size))
136
137 if wal_size < disk.Size(gb=2):
138 mlogger.error('Refusing to continue with configured size for block.wal')
139 raise RuntimeError('block.wal sizes must be larger than 2GB, detected: %s' % wal_size)
140 if lv_format:
141 return '%sG' % wal_size.gb.as_int()
142 return wal_size
143
91327a77 144
b32b8144 145def create_id(fsid, json_secrets, osd_id=None):
d2e6a577
FG
146 """
147 :param fsid: The osd fsid to create, always required
148 :param json_secrets: a json-ready object with whatever secrets are wanted
149 to be passed to the monitor
b32b8144
FG
150 :param osd_id: Reuse an existing ID from an OSD that's been destroyed, if the
151 id does not exist in the cluster a new ID will be created
d2e6a577
FG
152 """
153 bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster
b32b8144
FG
154 cmd = [
155 'ceph',
156 '--cluster', conf.cluster,
157 '--name', 'client.bootstrap-osd',
158 '--keyring', bootstrap_keyring,
159 '-i', '-',
160 'osd', 'new', fsid
161 ]
1adf2230
AA
162 if osd_id is not None:
163 if osd_id_available(osd_id):
164 cmd.append(osd_id)
165 else:
166 raise RuntimeError("The osd ID {} is already in use or does not exist.".format(osd_id))
b32b8144
FG
167 stdout, stderr, returncode = process.call(
168 cmd,
169 stdin=json_secrets,
170 show_command=True
171 )
172 if returncode != 0:
173 raise RuntimeError('Unable to create a new OSD id')
174 return ' '.join(stdout).strip()
175
176
1adf2230 177def osd_id_available(osd_id):
b32b8144 178 """
1adf2230
AA
179 Checks to see if an osd ID exists and if it's available for
180 reuse. Returns True if it is, False if it isn't.
b32b8144
FG
181
182 :param osd_id: The osd ID to check
183 """
184 if osd_id is None:
185 return False
a4b75251 186
b32b8144 187 bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster
d2e6a577
FG
188 stdout, stderr, returncode = process.call(
189 [
190 'ceph',
191 '--cluster', conf.cluster,
192 '--name', 'client.bootstrap-osd',
193 '--keyring', bootstrap_keyring,
b32b8144
FG
194 'osd',
195 'tree',
196 '-f', 'json',
d2e6a577 197 ],
b32b8144 198 show_command=True
d2e6a577
FG
199 )
200 if returncode != 0:
b32b8144
FG
201 raise RuntimeError('Unable check if OSD id exists: %s' % osd_id)
202
203 output = json.loads(''.join(stdout).strip())
204 osds = output['nodes']
1adf2230 205 osd = [osd for osd in osds if str(osd['id']) == str(osd_id)]
a4b75251 206 if not osd or (osd and osd[0].get('status') == "destroyed"):
1adf2230
AA
207 return True
208 return False
d2e6a577
FG
209
210
3efd9988
FG
211def mount_tmpfs(path):
212 process.run([
3efd9988
FG
213 'mount',
214 '-t',
215 'tmpfs', 'tmpfs',
216 path
217 ])
218
1adf2230
AA
219 # Restore SELinux context
220 system.set_context(path)
221
3efd9988
FG
222
223def create_osd_path(osd_id, tmpfs=False):
224 path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id)
d2e6a577 225 system.mkdir_p('/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id))
3efd9988
FG
226 if tmpfs:
227 mount_tmpfs(path)
d2e6a577
FG
228
229
230def format_device(device):
231 # only supports xfs
b32b8144 232 command = ['mkfs', '-t', 'xfs']
d2e6a577
FG
233
234 # get the mkfs options if any for xfs,
235 # fallback to the default options defined in constants.mkfs
236 flags = conf.ceph.get_list(
237 'osd',
238 'osd_mkfs_options_xfs',
239 default=constants.mkfs.get('xfs'),
240 split=' ',
241 )
242
243 # always force
244 if '-f' not in flags:
245 flags.insert(0, '-f')
246
247 command.extend(flags)
248 command.append(device)
249 process.run(command)
250
251
94b18763 252def _normalize_mount_flags(flags, extras=None):
3a9019d9
FG
253 """
254 Mount flag options have to be a single string, separated by a comma. If the
255 flags are separated by spaces, or with commas and spaces in ceph.conf, the
256 mount options will be passed incorrectly.
257
258 This will help when parsing ceph.conf values return something like::
259
260 ["rw,", "exec,"]
261
262 Or::
263
264 [" rw ,", "exec"]
265
266 :param flags: A list of flags, or a single string of mount flags
94b18763
FG
267 :param extras: Extra set of mount flags, useful when custom devices like VDO need
268 ad-hoc mount configurations
3a9019d9 269 """
94b18763
FG
270 # Instead of using set(), we append to this new list here, because set()
271 # will create an arbitrary order on the items that is made worst when
272 # testing with tools like tox that includes a randomizer seed. By
273 # controlling the order, it is easier to correctly assert the expectation
274 unique_flags = []
3a9019d9 275 if isinstance(flags, list):
94b18763
FG
276 if extras:
277 flags.extend(extras)
278
3a9019d9 279 # ensure that spaces and commas are removed so that they can join
94b18763
FG
280 # correctly, remove duplicates
281 for f in flags:
282 if f and f not in unique_flags:
283 unique_flags.append(f.strip().strip(','))
284 return ','.join(unique_flags)
3a9019d9
FG
285
286 # split them, clean them, and join them back again
287 flags = flags.strip().split(' ')
94b18763
FG
288 if extras:
289 flags.extend(extras)
290
291 # remove possible duplicates
292 for f in flags:
293 if f and f not in unique_flags:
294 unique_flags.append(f.strip().strip(','))
295 flags = ','.join(unique_flags)
296 # Before returning, split them again, since strings can be mashed up
297 # together, preventing removal of duplicate entries
298 return ','.join(set(flags.split(',')))
299
300
301def mount_osd(device, osd_id, **kw):
302 extras = []
303 is_vdo = kw.get('is_vdo', '0')
304 if is_vdo == '1':
305 extras = ['discard']
d2e6a577 306 destination = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id)
b32b8144 307 command = ['mount', '-t', 'xfs', '-o']
d2e6a577
FG
308 flags = conf.ceph.get_list(
309 'osd',
310 'osd_mount_options_xfs',
311 default=constants.mount.get('xfs'),
312 split=' ',
313 )
94b18763
FG
314 command.append(
315 _normalize_mount_flags(flags, extras=extras)
316 )
d2e6a577
FG
317 command.append(device)
318 command.append(destination)
319 process.run(command)
320
1adf2230
AA
321 # Restore SELinux context
322 system.set_context(destination)
323
d2e6a577 324
3efd9988
FG
325def _link_device(device, device_type, osd_id):
326 """
327 Allow linking any device type in an OSD directory. ``device`` must the be
328 source, with an absolute path and ``device_type`` will be the destination
329 name, like 'journal', or 'block'
330 """
331 device_path = '/var/lib/ceph/osd/%s-%s/%s' % (
d2e6a577 332 conf.cluster,
3efd9988
FG
333 osd_id,
334 device_type
d2e6a577 335 )
b32b8144 336 command = ['ln', '-s', device, device_path]
3efd9988
FG
337 system.chown(device)
338
d2e6a577
FG
339 process.run(command)
340
92f5a8d4
TL
341def _validate_bluestore_device(device, excepted_device_type, osd_uuid):
342 """
343 Validate whether the given device is truly what it is supposed to be
344 """
345
346 out, err, ret = process.call(['ceph-bluestore-tool', 'show-label', '--dev', device])
347 if err:
348 terminal.error('ceph-bluestore-tool failed to run. %s'% err)
349 raise SystemExit(1)
350 if ret:
351 terminal.error('no label on %s'% device)
352 raise SystemExit(1)
353 oj = json.loads(''.join(out))
354 if device not in oj:
355 terminal.error('%s not in the output of ceph-bluestore-tool, buggy?'% device)
356 raise SystemExit(1)
357 current_device_type = oj[device]['description']
358 if current_device_type != excepted_device_type:
359 terminal.error('%s is not a %s device but %s'% (device, excepted_device_type, current_device_type))
360 raise SystemExit(1)
361 current_osd_uuid = oj[device]['osd_uuid']
362 if current_osd_uuid != osd_uuid:
363 terminal.error('device %s is used by another osd %s as %s, should be %s'% (device, current_osd_uuid, current_device_type, osd_uuid))
364 raise SystemExit(1)
d2e6a577 365
3efd9988
FG
366def link_journal(journal_device, osd_id):
367 _link_device(journal_device, 'journal', osd_id)
368
369
370def link_block(block_device, osd_id):
371 _link_device(block_device, 'block', osd_id)
372
373
92f5a8d4
TL
374def link_wal(wal_device, osd_id, osd_uuid=None):
375 _validate_bluestore_device(wal_device, 'bluefs wal', osd_uuid)
3efd9988
FG
376 _link_device(wal_device, 'block.wal', osd_id)
377
378
92f5a8d4
TL
379def link_db(db_device, osd_id, osd_uuid=None):
380 _validate_bluestore_device(db_device, 'bluefs db', osd_uuid)
3efd9988
FG
381 _link_device(db_device, 'block.db', osd_id)
382
383
d2e6a577
FG
384def get_monmap(osd_id):
385 """
386 Before creating the OSD files, a monmap needs to be retrieved so that it
387 can be used to tell the monitor(s) about the new OSD. A call will look like::
388
389 ceph --cluster ceph --name client.bootstrap-osd \
390 --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring \
391 mon getmap -o /var/lib/ceph/osd/ceph-0/activate.monmap
392 """
393 path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id)
394 bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster
395 monmap_destination = os.path.join(path, 'activate.monmap')
396
397 process.run([
d2e6a577
FG
398 'ceph',
399 '--cluster', conf.cluster,
400 '--name', 'client.bootstrap-osd',
401 '--keyring', bootstrap_keyring,
402 'mon', 'getmap', '-o', monmap_destination
403 ])
404
405
e306af50
TL
406def get_osdspec_affinity():
407 return os.environ.get('CEPH_VOLUME_OSDSPEC_AFFINITY', '')
408
409
3efd9988
FG
410def osd_mkfs_bluestore(osd_id, fsid, keyring=None, wal=False, db=False):
411 """
412 Create the files for the OSD to function. A normal call will look like:
413
414 ceph-osd --cluster ceph --mkfs --mkkey -i 0 \
415 --monmap /var/lib/ceph/osd/ceph-0/activate.monmap \
416 --osd-data /var/lib/ceph/osd/ceph-0 \
417 --osd-uuid 8d208665-89ae-4733-8888-5d3bfbeeec6c \
418 --keyring /var/lib/ceph/osd/ceph-0/keyring \
419 --setuser ceph --setgroup ceph
420
421 In some cases it is required to use the keyring, when it is passed in as
11fdf7f2 422 a keyword argument it is used as part of the ceph-osd command
3efd9988
FG
423 """
424 path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id)
425 monmap = os.path.join(path, 'activate.monmap')
426
427 system.chown(path)
428
429 base_command = [
3efd9988
FG
430 'ceph-osd',
431 '--cluster', conf.cluster,
3efd9988
FG
432 '--osd-objectstore', 'bluestore',
433 '--mkfs',
434 '-i', osd_id,
435 '--monmap', monmap,
436 ]
437
438 supplementary_command = [
439 '--osd-data', path,
440 '--osd-uuid', fsid,
441 '--setuser', 'ceph',
442 '--setgroup', 'ceph'
443 ]
444
445 if keyring is not None:
b32b8144 446 base_command.extend(['--keyfile', '-'])
3efd9988
FG
447
448 if wal:
449 base_command.extend(
450 ['--bluestore-block-wal-path', wal]
451 )
452 system.chown(wal)
453
454 if db:
455 base_command.extend(
456 ['--bluestore-block-db-path', db]
457 )
458 system.chown(db)
459
e306af50
TL
460 if get_osdspec_affinity():
461 base_command.extend(['--osdspec-affinity', get_osdspec_affinity()])
462
3efd9988
FG
463 command = base_command + supplementary_command
464
f91f0fd5
TL
465 """
466 When running in containers the --mkfs on raw device sometimes fails
467 to acquire a lock through flock() on the device because systemd-udevd holds one temporarily.
468 See KernelDevice.cc and _lock() to understand how ceph-osd acquires the lock.
469 Because this is really transient, we retry up to 5 times and wait for 1 sec in-between
470 """
471 for retry in range(5):
472 _, _, returncode = process.call(command, stdin=keyring, terminal_verbose=True, show_command=True)
473 if returncode == 0:
474 break
475 else:
476 if returncode == errno.EWOULDBLOCK:
477 time.sleep(1)
478 logger.info('disk is held by another process, trying to mkfs again... (%s/5 attempt)' % retry)
479 continue
480 else:
481 raise RuntimeError('Command failed with exit code %s: %s' % (returncode, ' '.join(command)))
3efd9988
FG
482
483
1adf2230 484def osd_mkfs_filestore(osd_id, fsid, keyring):
d2e6a577
FG
485 """
486 Create the files for the OSD to function. A normal call will look like:
487
488 ceph-osd --cluster ceph --mkfs --mkkey -i 0 \
489 --monmap /var/lib/ceph/osd/ceph-0/activate.monmap \
490 --osd-data /var/lib/ceph/osd/ceph-0 \
491 --osd-journal /var/lib/ceph/osd/ceph-0/journal \
492 --osd-uuid 8d208665-89ae-4733-8888-5d3bfbeeec6c \
493 --keyring /var/lib/ceph/osd/ceph-0/keyring \
494 --setuser ceph --setgroup ceph
495
496 """
497 path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id)
498 monmap = os.path.join(path, 'activate.monmap')
499 journal = os.path.join(path, 'journal')
500
501 system.chown(journal)
502 system.chown(path)
503
1adf2230 504 command = [
d2e6a577
FG
505 'ceph-osd',
506 '--cluster', conf.cluster,
3efd9988 507 '--osd-objectstore', 'filestore',
d2e6a577
FG
508 '--mkfs',
509 '-i', osd_id,
510 '--monmap', monmap,
1adf2230
AA
511 ]
512
e306af50
TL
513 if get_osdspec_affinity():
514 command.extend(['--osdspec-affinity', get_osdspec_affinity()])
515
1adf2230
AA
516 if __release__ != 'luminous':
517 # goes through stdin
518 command.extend(['--keyfile', '-'])
519
520 command.extend([
d2e6a577
FG
521 '--osd-data', path,
522 '--osd-journal', journal,
523 '--osd-uuid', fsid,
524 '--setuser', 'ceph',
525 '--setgroup', 'ceph'
526 ])
1adf2230
AA
527
528 _, _, returncode = process.call(
529 command, stdin=keyring, terminal_verbose=True, show_command=True
530 )
531 if returncode != 0:
532 raise RuntimeError('Command failed with exit code %s: %s' % (returncode, ' '.join(command)))