]> git.proxmox.com Git - ceph.git/blob - ceph/src/ceph-volume/ceph_volume/util/prepare.py
Import ceph 15.2.8
[ceph.git] / ceph / src / ceph-volume / ceph_volume / util / prepare.py
1 """
2 These utilities for prepare provide all the pieces needed to prepare a device
3 but also a compounded ("single call") helper to do them in order. Some plugins
4 may want to change some part of the process, while others might want to consume
5 the single-call helper
6 """
7 import errno
8 import os
9 import logging
10 import json
11 import time
12 from ceph_volume import process, conf, __release__, terminal
13 from ceph_volume.util import system, constants, str_to_int, disk
14
15 logger = logging.getLogger(__name__)
16 mlogger = terminal.MultiLogger(__name__)
17
18
19 def create_key():
20 stdout, stderr, returncode = process.call(
21 ['ceph-authtool', '--gen-print-key'],
22 show_command=True)
23 if returncode != 0:
24 raise RuntimeError('Unable to generate a new auth key')
25 return ' '.join(stdout).strip()
26
27
28 def write_keyring(osd_id, secret, keyring_name='keyring', name=None):
29 """
30 Create a keyring file with the ``ceph-authtool`` utility. Constructs the
31 path over well-known conventions for the OSD, and allows any other custom
32 ``name`` to be set.
33
34 :param osd_id: The ID for the OSD to be used
35 :param secret: The key to be added as (as a string)
36 :param name: Defaults to 'osd.{ID}' but can be used to add other client
37 names, specifically for 'lockbox' type of keys
38 :param keyring_name: Alternative keyring name, for supporting other
39 types of keys like for lockbox
40 """
41 osd_keyring = '/var/lib/ceph/osd/%s-%s/%s' % (conf.cluster, osd_id, keyring_name)
42 name = name or 'osd.%s' % str(osd_id)
43 process.run(
44 [
45 'ceph-authtool', osd_keyring,
46 '--create-keyring',
47 '--name', name,
48 '--add-key', secret
49 ])
50 system.chown(osd_keyring)
51
52
53 def get_journal_size(lv_format=True):
54 """
55 Helper to retrieve the size (defined in megabytes in ceph.conf) to create
56 the journal logical volume, it "translates" the string into a float value,
57 then converts that into gigabytes, and finally (optionally) it formats it
58 back as a string so that it can be used for creating the LV.
59
60 :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size
61 would result in '5G', otherwise it will return a ``Size`` object.
62 """
63 conf_journal_size = conf.ceph.get_safe('osd', 'osd_journal_size', '5120')
64 logger.debug('osd_journal_size set to %s' % conf_journal_size)
65 journal_size = disk.Size(mb=str_to_int(conf_journal_size))
66
67 if journal_size < disk.Size(gb=2):
68 mlogger.error('Refusing to continue with configured size for journal')
69 raise RuntimeError('journal sizes must be larger than 2GB, detected: %s' % journal_size)
70 if lv_format:
71 return '%sG' % journal_size.gb.as_int()
72 return journal_size
73
74
75 def get_block_db_size(lv_format=True):
76 """
77 Helper to retrieve the size (defined in megabytes in ceph.conf) to create
78 the block.db logical volume, it "translates" the string into a float value,
79 then converts that into gigabytes, and finally (optionally) it formats it
80 back as a string so that it can be used for creating the LV.
81
82 :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size
83 would result in '5G', otherwise it will return a ``Size`` object.
84
85 .. note: Configuration values are in bytes, unlike journals which
86 are defined in gigabytes
87 """
88 conf_db_size = None
89 try:
90 conf_db_size = conf.ceph.get_safe('osd', 'bluestore_block_db_size', None)
91 except RuntimeError:
92 logger.exception("failed to load ceph configuration, will use defaults")
93
94 if not conf_db_size:
95 logger.debug(
96 'block.db has no size configuration, will fallback to using as much as possible'
97 )
98 # TODO better to return disk.Size(b=0) here
99 return None
100 logger.debug('bluestore_block_db_size set to %s' % conf_db_size)
101 db_size = disk.Size(b=str_to_int(conf_db_size))
102
103 if db_size < disk.Size(gb=2):
104 mlogger.error('Refusing to continue with configured size for block.db')
105 raise RuntimeError('block.db sizes must be larger than 2GB, detected: %s' % db_size)
106 if lv_format:
107 return '%sG' % db_size.gb.as_int()
108 return db_size
109
110 def get_block_wal_size(lv_format=True):
111 """
112 Helper to retrieve the size (defined in megabytes in ceph.conf) to create
113 the block.wal logical volume, it "translates" the string into a float value,
114 then converts that into gigabytes, and finally (optionally) it formats it
115 back as a string so that it can be used for creating the LV.
116
117 :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size
118 would result in '5G', otherwise it will return a ``Size`` object.
119
120 .. note: Configuration values are in bytes, unlike journals which
121 are defined in gigabytes
122 """
123 conf_wal_size = None
124 try:
125 conf_wal_size = conf.ceph.get_safe('osd', 'bluestore_block_wal_size', None)
126 except RuntimeError:
127 logger.exception("failed to load ceph configuration, will use defaults")
128
129 if not conf_wal_size:
130 logger.debug(
131 'block.wal has no size configuration, will fallback to using as much as possible'
132 )
133 return None
134 logger.debug('bluestore_block_wal_size set to %s' % conf_wal_size)
135 wal_size = disk.Size(b=str_to_int(conf_wal_size))
136
137 if wal_size < disk.Size(gb=2):
138 mlogger.error('Refusing to continue with configured size for block.wal')
139 raise RuntimeError('block.wal sizes must be larger than 2GB, detected: %s' % wal_size)
140 if lv_format:
141 return '%sG' % wal_size.gb.as_int()
142 return wal_size
143
144
145 def create_id(fsid, json_secrets, osd_id=None):
146 """
147 :param fsid: The osd fsid to create, always required
148 :param json_secrets: a json-ready object with whatever secrets are wanted
149 to be passed to the monitor
150 :param osd_id: Reuse an existing ID from an OSD that's been destroyed, if the
151 id does not exist in the cluster a new ID will be created
152 """
153 bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster
154 cmd = [
155 'ceph',
156 '--cluster', conf.cluster,
157 '--name', 'client.bootstrap-osd',
158 '--keyring', bootstrap_keyring,
159 '-i', '-',
160 'osd', 'new', fsid
161 ]
162 if osd_id is not None:
163 if osd_id_available(osd_id):
164 cmd.append(osd_id)
165 else:
166 raise RuntimeError("The osd ID {} is already in use or does not exist.".format(osd_id))
167 stdout, stderr, returncode = process.call(
168 cmd,
169 stdin=json_secrets,
170 show_command=True
171 )
172 if returncode != 0:
173 raise RuntimeError('Unable to create a new OSD id')
174 return ' '.join(stdout).strip()
175
176
177 def osd_id_available(osd_id):
178 """
179 Checks to see if an osd ID exists and if it's available for
180 reuse. Returns True if it is, False if it isn't.
181
182 :param osd_id: The osd ID to check
183 """
184 if osd_id is None:
185 return False
186 bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster
187 stdout, stderr, returncode = process.call(
188 [
189 'ceph',
190 '--cluster', conf.cluster,
191 '--name', 'client.bootstrap-osd',
192 '--keyring', bootstrap_keyring,
193 'osd',
194 'tree',
195 '-f', 'json',
196 ],
197 show_command=True
198 )
199 if returncode != 0:
200 raise RuntimeError('Unable check if OSD id exists: %s' % osd_id)
201
202 output = json.loads(''.join(stdout).strip())
203 osds = output['nodes']
204 osd = [osd for osd in osds if str(osd['id']) == str(osd_id)]
205 if osd and osd[0].get('status') == "destroyed":
206 return True
207 return False
208
209
210 def mount_tmpfs(path):
211 process.run([
212 'mount',
213 '-t',
214 'tmpfs', 'tmpfs',
215 path
216 ])
217
218 # Restore SELinux context
219 system.set_context(path)
220
221
222 def create_osd_path(osd_id, tmpfs=False):
223 path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id)
224 system.mkdir_p('/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id))
225 if tmpfs:
226 mount_tmpfs(path)
227
228
229 def format_device(device):
230 # only supports xfs
231 command = ['mkfs', '-t', 'xfs']
232
233 # get the mkfs options if any for xfs,
234 # fallback to the default options defined in constants.mkfs
235 flags = conf.ceph.get_list(
236 'osd',
237 'osd_mkfs_options_xfs',
238 default=constants.mkfs.get('xfs'),
239 split=' ',
240 )
241
242 # always force
243 if '-f' not in flags:
244 flags.insert(0, '-f')
245
246 command.extend(flags)
247 command.append(device)
248 process.run(command)
249
250
251 def _normalize_mount_flags(flags, extras=None):
252 """
253 Mount flag options have to be a single string, separated by a comma. If the
254 flags are separated by spaces, or with commas and spaces in ceph.conf, the
255 mount options will be passed incorrectly.
256
257 This will help when parsing ceph.conf values return something like::
258
259 ["rw,", "exec,"]
260
261 Or::
262
263 [" rw ,", "exec"]
264
265 :param flags: A list of flags, or a single string of mount flags
266 :param extras: Extra set of mount flags, useful when custom devices like VDO need
267 ad-hoc mount configurations
268 """
269 # Instead of using set(), we append to this new list here, because set()
270 # will create an arbitrary order on the items that is made worst when
271 # testing with tools like tox that includes a randomizer seed. By
272 # controlling the order, it is easier to correctly assert the expectation
273 unique_flags = []
274 if isinstance(flags, list):
275 if extras:
276 flags.extend(extras)
277
278 # ensure that spaces and commas are removed so that they can join
279 # correctly, remove duplicates
280 for f in flags:
281 if f and f not in unique_flags:
282 unique_flags.append(f.strip().strip(','))
283 return ','.join(unique_flags)
284
285 # split them, clean them, and join them back again
286 flags = flags.strip().split(' ')
287 if extras:
288 flags.extend(extras)
289
290 # remove possible duplicates
291 for f in flags:
292 if f and f not in unique_flags:
293 unique_flags.append(f.strip().strip(','))
294 flags = ','.join(unique_flags)
295 # Before returning, split them again, since strings can be mashed up
296 # together, preventing removal of duplicate entries
297 return ','.join(set(flags.split(',')))
298
299
300 def mount_osd(device, osd_id, **kw):
301 extras = []
302 is_vdo = kw.get('is_vdo', '0')
303 if is_vdo == '1':
304 extras = ['discard']
305 destination = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id)
306 command = ['mount', '-t', 'xfs', '-o']
307 flags = conf.ceph.get_list(
308 'osd',
309 'osd_mount_options_xfs',
310 default=constants.mount.get('xfs'),
311 split=' ',
312 )
313 command.append(
314 _normalize_mount_flags(flags, extras=extras)
315 )
316 command.append(device)
317 command.append(destination)
318 process.run(command)
319
320 # Restore SELinux context
321 system.set_context(destination)
322
323
324 def _link_device(device, device_type, osd_id):
325 """
326 Allow linking any device type in an OSD directory. ``device`` must the be
327 source, with an absolute path and ``device_type`` will be the destination
328 name, like 'journal', or 'block'
329 """
330 device_path = '/var/lib/ceph/osd/%s-%s/%s' % (
331 conf.cluster,
332 osd_id,
333 device_type
334 )
335 command = ['ln', '-s', device, device_path]
336 system.chown(device)
337
338 process.run(command)
339
340 def _validate_bluestore_device(device, excepted_device_type, osd_uuid):
341 """
342 Validate whether the given device is truly what it is supposed to be
343 """
344
345 out, err, ret = process.call(['ceph-bluestore-tool', 'show-label', '--dev', device])
346 if err:
347 terminal.error('ceph-bluestore-tool failed to run. %s'% err)
348 raise SystemExit(1)
349 if ret:
350 terminal.error('no label on %s'% device)
351 raise SystemExit(1)
352 oj = json.loads(''.join(out))
353 if device not in oj:
354 terminal.error('%s not in the output of ceph-bluestore-tool, buggy?'% device)
355 raise SystemExit(1)
356 current_device_type = oj[device]['description']
357 if current_device_type != excepted_device_type:
358 terminal.error('%s is not a %s device but %s'% (device, excepted_device_type, current_device_type))
359 raise SystemExit(1)
360 current_osd_uuid = oj[device]['osd_uuid']
361 if current_osd_uuid != osd_uuid:
362 terminal.error('device %s is used by another osd %s as %s, should be %s'% (device, current_osd_uuid, current_device_type, osd_uuid))
363 raise SystemExit(1)
364
365 def link_journal(journal_device, osd_id):
366 _link_device(journal_device, 'journal', osd_id)
367
368
369 def link_block(block_device, osd_id):
370 _link_device(block_device, 'block', osd_id)
371
372
373 def link_wal(wal_device, osd_id, osd_uuid=None):
374 _validate_bluestore_device(wal_device, 'bluefs wal', osd_uuid)
375 _link_device(wal_device, 'block.wal', osd_id)
376
377
378 def link_db(db_device, osd_id, osd_uuid=None):
379 _validate_bluestore_device(db_device, 'bluefs db', osd_uuid)
380 _link_device(db_device, 'block.db', osd_id)
381
382
383 def get_monmap(osd_id):
384 """
385 Before creating the OSD files, a monmap needs to be retrieved so that it
386 can be used to tell the monitor(s) about the new OSD. A call will look like::
387
388 ceph --cluster ceph --name client.bootstrap-osd \
389 --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring \
390 mon getmap -o /var/lib/ceph/osd/ceph-0/activate.monmap
391 """
392 path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id)
393 bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster
394 monmap_destination = os.path.join(path, 'activate.monmap')
395
396 process.run([
397 'ceph',
398 '--cluster', conf.cluster,
399 '--name', 'client.bootstrap-osd',
400 '--keyring', bootstrap_keyring,
401 'mon', 'getmap', '-o', monmap_destination
402 ])
403
404
405 def get_osdspec_affinity():
406 return os.environ.get('CEPH_VOLUME_OSDSPEC_AFFINITY', '')
407
408
409 def osd_mkfs_bluestore(osd_id, fsid, keyring=None, wal=False, db=False):
410 """
411 Create the files for the OSD to function. A normal call will look like:
412
413 ceph-osd --cluster ceph --mkfs --mkkey -i 0 \
414 --monmap /var/lib/ceph/osd/ceph-0/activate.monmap \
415 --osd-data /var/lib/ceph/osd/ceph-0 \
416 --osd-uuid 8d208665-89ae-4733-8888-5d3bfbeeec6c \
417 --keyring /var/lib/ceph/osd/ceph-0/keyring \
418 --setuser ceph --setgroup ceph
419
420 In some cases it is required to use the keyring, when it is passed in as
421 a keyword argument it is used as part of the ceph-osd command
422 """
423 path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id)
424 monmap = os.path.join(path, 'activate.monmap')
425
426 system.chown(path)
427
428 base_command = [
429 'ceph-osd',
430 '--cluster', conf.cluster,
431 '--osd-objectstore', 'bluestore',
432 '--mkfs',
433 '-i', osd_id,
434 '--monmap', monmap,
435 ]
436
437 supplementary_command = [
438 '--osd-data', path,
439 '--osd-uuid', fsid,
440 '--setuser', 'ceph',
441 '--setgroup', 'ceph'
442 ]
443
444 if keyring is not None:
445 base_command.extend(['--keyfile', '-'])
446
447 if wal:
448 base_command.extend(
449 ['--bluestore-block-wal-path', wal]
450 )
451 system.chown(wal)
452
453 if db:
454 base_command.extend(
455 ['--bluestore-block-db-path', db]
456 )
457 system.chown(db)
458
459 if get_osdspec_affinity():
460 base_command.extend(['--osdspec-affinity', get_osdspec_affinity()])
461
462 command = base_command + supplementary_command
463
464 """
465 When running in containers the --mkfs on raw device sometimes fails
466 to acquire a lock through flock() on the device because systemd-udevd holds one temporarily.
467 See KernelDevice.cc and _lock() to understand how ceph-osd acquires the lock.
468 Because this is really transient, we retry up to 5 times and wait for 1 sec in-between
469 """
470 for retry in range(5):
471 _, _, returncode = process.call(command, stdin=keyring, terminal_verbose=True, show_command=True)
472 if returncode == 0:
473 break
474 else:
475 if returncode == errno.EWOULDBLOCK:
476 time.sleep(1)
477 logger.info('disk is held by another process, trying to mkfs again... (%s/5 attempt)' % retry)
478 continue
479 else:
480 raise RuntimeError('Command failed with exit code %s: %s' % (returncode, ' '.join(command)))
481
482
483 def osd_mkfs_filestore(osd_id, fsid, keyring):
484 """
485 Create the files for the OSD to function. A normal call will look like:
486
487 ceph-osd --cluster ceph --mkfs --mkkey -i 0 \
488 --monmap /var/lib/ceph/osd/ceph-0/activate.monmap \
489 --osd-data /var/lib/ceph/osd/ceph-0 \
490 --osd-journal /var/lib/ceph/osd/ceph-0/journal \
491 --osd-uuid 8d208665-89ae-4733-8888-5d3bfbeeec6c \
492 --keyring /var/lib/ceph/osd/ceph-0/keyring \
493 --setuser ceph --setgroup ceph
494
495 """
496 path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id)
497 monmap = os.path.join(path, 'activate.monmap')
498 journal = os.path.join(path, 'journal')
499
500 system.chown(journal)
501 system.chown(path)
502
503 command = [
504 'ceph-osd',
505 '--cluster', conf.cluster,
506 '--osd-objectstore', 'filestore',
507 '--mkfs',
508 '-i', osd_id,
509 '--monmap', monmap,
510 ]
511
512 if get_osdspec_affinity():
513 command.extend(['--osdspec-affinity', get_osdspec_affinity()])
514
515 if __release__ != 'luminous':
516 # goes through stdin
517 command.extend(['--keyfile', '-'])
518
519 command.extend([
520 '--osd-data', path,
521 '--osd-journal', journal,
522 '--osd-uuid', fsid,
523 '--setuser', 'ceph',
524 '--setgroup', 'ceph'
525 ])
526
527 _, _, returncode = process.call(
528 command, stdin=keyring, terminal_verbose=True, show_command=True
529 )
530 if returncode != 0:
531 raise RuntimeError('Command failed with exit code %s: %s' % (returncode, ' '.join(command)))