]> git.proxmox.com Git - ceph.git/blob - ceph/src/ceph-volume/ceph_volume/util/prepare.py
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / ceph-volume / ceph_volume / util / prepare.py
1 """
2 These utilities for prepare provide all the pieces needed to prepare a device
3 but also a compounded ("single call") helper to do them in order. Some plugins
4 may want to change some part of the process, while others might want to consume
5 the single-call helper
6 """
7 import errno
8 import os
9 import logging
10 import json
11 import time
12 from ceph_volume import process, conf, terminal
13 from ceph_volume.util import system, constants, str_to_int, disk
14
15 logger = logging.getLogger(__name__)
16 mlogger = terminal.MultiLogger(__name__)
17
18
19 def create_key():
20 stdout, stderr, returncode = process.call(
21 ['ceph-authtool', '--gen-print-key'],
22 show_command=True,
23 logfile_verbose=False)
24 if returncode != 0:
25 raise RuntimeError('Unable to generate a new auth key')
26 return ' '.join(stdout).strip()
27
28
29 def write_keyring(osd_id, secret, keyring_name='keyring', name=None):
30 """
31 Create a keyring file with the ``ceph-authtool`` utility. Constructs the
32 path over well-known conventions for the OSD, and allows any other custom
33 ``name`` to be set.
34
35 :param osd_id: The ID for the OSD to be used
36 :param secret: The key to be added as (as a string)
37 :param name: Defaults to 'osd.{ID}' but can be used to add other client
38 names, specifically for 'lockbox' type of keys
39 :param keyring_name: Alternative keyring name, for supporting other
40 types of keys like for lockbox
41 """
42 osd_keyring = '/var/lib/ceph/osd/%s-%s/%s' % (conf.cluster, osd_id, keyring_name)
43 name = name or 'osd.%s' % str(osd_id)
44 mlogger.info(f'Creating keyring file for {name}')
45 process.call(
46 [
47 'ceph-authtool', osd_keyring,
48 '--create-keyring',
49 '--name', name,
50 '--add-key', secret
51 ],
52 logfile_verbose=False)
53 system.chown(osd_keyring)
54
55
56 def get_block_db_size(lv_format=True):
57 """
58 Helper to retrieve the size (defined in megabytes in ceph.conf) to create
59 the block.db logical volume, it "translates" the string into a float value,
60 then converts that into gigabytes, and finally (optionally) it formats it
61 back as a string so that it can be used for creating the LV.
62
63 :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size
64 would result in '5G', otherwise it will return a ``Size`` object.
65
66 .. note: Configuration values are in bytes, unlike journals which
67 are defined in gigabytes
68 """
69 conf_db_size = None
70 try:
71 conf_db_size = conf.ceph.get_safe('osd', 'bluestore_block_db_size', None)
72 except RuntimeError:
73 logger.exception("failed to load ceph configuration, will use defaults")
74
75 if not conf_db_size:
76 logger.debug(
77 'block.db has no size configuration, will fallback to using as much as possible'
78 )
79 # TODO better to return disk.Size(b=0) here
80 return None
81 logger.debug('bluestore_block_db_size set to %s' % conf_db_size)
82 db_size = disk.Size(b=str_to_int(conf_db_size))
83
84 if db_size < disk.Size(gb=2):
85 mlogger.error('Refusing to continue with configured size for block.db')
86 raise RuntimeError('block.db sizes must be larger than 2GB, detected: %s' % db_size)
87 if lv_format:
88 return '%sG' % db_size.gb.as_int()
89 return db_size
90
91 def get_block_wal_size(lv_format=True):
92 """
93 Helper to retrieve the size (defined in megabytes in ceph.conf) to create
94 the block.wal logical volume, it "translates" the string into a float value,
95 then converts that into gigabytes, and finally (optionally) it formats it
96 back as a string so that it can be used for creating the LV.
97
98 :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size
99 would result in '5G', otherwise it will return a ``Size`` object.
100
101 .. note: Configuration values are in bytes, unlike journals which
102 are defined in gigabytes
103 """
104 conf_wal_size = None
105 try:
106 conf_wal_size = conf.ceph.get_safe('osd', 'bluestore_block_wal_size', None)
107 except RuntimeError:
108 logger.exception("failed to load ceph configuration, will use defaults")
109
110 if not conf_wal_size:
111 logger.debug(
112 'block.wal has no size configuration, will fallback to using as much as possible'
113 )
114 return None
115 logger.debug('bluestore_block_wal_size set to %s' % conf_wal_size)
116 wal_size = disk.Size(b=str_to_int(conf_wal_size))
117
118 if wal_size < disk.Size(gb=2):
119 mlogger.error('Refusing to continue with configured size for block.wal')
120 raise RuntimeError('block.wal sizes must be larger than 2GB, detected: %s' % wal_size)
121 if lv_format:
122 return '%sG' % wal_size.gb.as_int()
123 return wal_size
124
125
126 def create_id(fsid, json_secrets, osd_id=None):
127 """
128 :param fsid: The osd fsid to create, always required
129 :param json_secrets: a json-ready object with whatever secrets are wanted
130 to be passed to the monitor
131 :param osd_id: Reuse an existing ID from an OSD that's been destroyed, if the
132 id does not exist in the cluster a new ID will be created
133 """
134 bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster
135 cmd = [
136 'ceph',
137 '--cluster', conf.cluster,
138 '--name', 'client.bootstrap-osd',
139 '--keyring', bootstrap_keyring,
140 '-i', '-',
141 'osd', 'new', fsid
142 ]
143 if osd_id is not None:
144 if osd_id_available(osd_id):
145 cmd.append(osd_id)
146 else:
147 raise RuntimeError("The osd ID {} is already in use or does not exist.".format(osd_id))
148 stdout, stderr, returncode = process.call(
149 cmd,
150 stdin=json_secrets,
151 show_command=True
152 )
153 if returncode != 0:
154 raise RuntimeError('Unable to create a new OSD id')
155 return ' '.join(stdout).strip()
156
157
158 def osd_id_available(osd_id):
159 """
160 Checks to see if an osd ID exists and if it's available for
161 reuse. Returns True if it is, False if it isn't.
162
163 :param osd_id: The osd ID to check
164 """
165 if osd_id is None:
166 return False
167
168 bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster
169 stdout, stderr, returncode = process.call(
170 [
171 'ceph',
172 '--cluster', conf.cluster,
173 '--name', 'client.bootstrap-osd',
174 '--keyring', bootstrap_keyring,
175 'osd',
176 'tree',
177 '-f', 'json',
178 ],
179 show_command=True
180 )
181 if returncode != 0:
182 raise RuntimeError('Unable check if OSD id exists: %s' % osd_id)
183
184 output = json.loads(''.join(stdout).strip())
185 osds = output['nodes']
186 osd = [osd for osd in osds if str(osd['id']) == str(osd_id)]
187 if not osd or (osd and osd[0].get('status') == "destroyed"):
188 return True
189 return False
190
191
192 def mount_tmpfs(path):
193 process.run([
194 'mount',
195 '-t',
196 'tmpfs', 'tmpfs',
197 path
198 ])
199
200 # Restore SELinux context
201 system.set_context(path)
202
203
204 def create_osd_path(osd_id, tmpfs=False):
205 path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id)
206 system.mkdir_p('/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id))
207 if tmpfs:
208 mount_tmpfs(path)
209
210
211 def format_device(device):
212 # only supports xfs
213 command = ['mkfs', '-t', 'xfs']
214
215 # get the mkfs options if any for xfs,
216 # fallback to the default options defined in constants.mkfs
217 flags = conf.ceph.get_list(
218 'osd',
219 'osd_mkfs_options_xfs',
220 default=constants.mkfs.get('xfs'),
221 split=' ',
222 )
223
224 # always force
225 if '-f' not in flags:
226 flags.insert(0, '-f')
227
228 command.extend(flags)
229 command.append(device)
230 process.run(command)
231
232
233 def _normalize_mount_flags(flags, extras=None):
234 """
235 Mount flag options have to be a single string, separated by a comma. If the
236 flags are separated by spaces, or with commas and spaces in ceph.conf, the
237 mount options will be passed incorrectly.
238
239 This will help when parsing ceph.conf values return something like::
240
241 ["rw,", "exec,"]
242
243 Or::
244
245 [" rw ,", "exec"]
246
247 :param flags: A list of flags, or a single string of mount flags
248 :param extras: Extra set of mount flags, useful when custom devices like VDO need
249 ad-hoc mount configurations
250 """
251 # Instead of using set(), we append to this new list here, because set()
252 # will create an arbitrary order on the items that is made worst when
253 # testing with tools like tox that includes a randomizer seed. By
254 # controlling the order, it is easier to correctly assert the expectation
255 unique_flags = []
256 if isinstance(flags, list):
257 if extras:
258 flags.extend(extras)
259
260 # ensure that spaces and commas are removed so that they can join
261 # correctly, remove duplicates
262 for f in flags:
263 if f and f not in unique_flags:
264 unique_flags.append(f.strip().strip(','))
265 return ','.join(unique_flags)
266
267 # split them, clean them, and join them back again
268 flags = flags.strip().split(' ')
269 if extras:
270 flags.extend(extras)
271
272 # remove possible duplicates
273 for f in flags:
274 if f and f not in unique_flags:
275 unique_flags.append(f.strip().strip(','))
276 flags = ','.join(unique_flags)
277 # Before returning, split them again, since strings can be mashed up
278 # together, preventing removal of duplicate entries
279 return ','.join(set(flags.split(',')))
280
281
282 def mount_osd(device, osd_id, **kw):
283 extras = []
284 is_vdo = kw.get('is_vdo', '0')
285 if is_vdo == '1':
286 extras = ['discard']
287 destination = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id)
288 command = ['mount', '-t', 'xfs', '-o']
289 flags = conf.ceph.get_list(
290 'osd',
291 'osd_mount_options_xfs',
292 default=constants.mount.get('xfs'),
293 split=' ',
294 )
295 command.append(
296 _normalize_mount_flags(flags, extras=extras)
297 )
298 command.append(device)
299 command.append(destination)
300 process.run(command)
301
302 # Restore SELinux context
303 system.set_context(destination)
304
305
306 def _link_device(device, device_type, osd_id):
307 """
308 Allow linking any device type in an OSD directory. ``device`` must the be
309 source, with an absolute path and ``device_type`` will be the destination
310 name, like 'journal', or 'block'
311 """
312 device_path = '/var/lib/ceph/osd/%s-%s/%s' % (
313 conf.cluster,
314 osd_id,
315 device_type
316 )
317 command = ['ln', '-s', device, device_path]
318 system.chown(device)
319
320 process.run(command)
321
322 def _validate_bluestore_device(device, excepted_device_type, osd_uuid):
323 """
324 Validate whether the given device is truly what it is supposed to be
325 """
326
327 out, err, ret = process.call(['ceph-bluestore-tool', 'show-label', '--dev', device])
328 if err:
329 terminal.error('ceph-bluestore-tool failed to run. %s'% err)
330 raise SystemExit(1)
331 if ret:
332 terminal.error('no label on %s'% device)
333 raise SystemExit(1)
334 oj = json.loads(''.join(out))
335 if device not in oj:
336 terminal.error('%s not in the output of ceph-bluestore-tool, buggy?'% device)
337 raise SystemExit(1)
338 current_device_type = oj[device]['description']
339 if current_device_type != excepted_device_type:
340 terminal.error('%s is not a %s device but %s'% (device, excepted_device_type, current_device_type))
341 raise SystemExit(1)
342 current_osd_uuid = oj[device]['osd_uuid']
343 if current_osd_uuid != osd_uuid:
344 terminal.error('device %s is used by another osd %s as %s, should be %s'% (device, current_osd_uuid, current_device_type, osd_uuid))
345 raise SystemExit(1)
346
347
348 def link_block(block_device, osd_id):
349 _link_device(block_device, 'block', osd_id)
350
351
352 def link_wal(wal_device, osd_id, osd_uuid=None):
353 _validate_bluestore_device(wal_device, 'bluefs wal', osd_uuid)
354 _link_device(wal_device, 'block.wal', osd_id)
355
356
357 def link_db(db_device, osd_id, osd_uuid=None):
358 _validate_bluestore_device(db_device, 'bluefs db', osd_uuid)
359 _link_device(db_device, 'block.db', osd_id)
360
361
362 def get_monmap(osd_id):
363 """
364 Before creating the OSD files, a monmap needs to be retrieved so that it
365 can be used to tell the monitor(s) about the new OSD. A call will look like::
366
367 ceph --cluster ceph --name client.bootstrap-osd \
368 --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring \
369 mon getmap -o /var/lib/ceph/osd/ceph-0/activate.monmap
370 """
371 path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id)
372 bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster
373 monmap_destination = os.path.join(path, 'activate.monmap')
374
375 process.run([
376 'ceph',
377 '--cluster', conf.cluster,
378 '--name', 'client.bootstrap-osd',
379 '--keyring', bootstrap_keyring,
380 'mon', 'getmap', '-o', monmap_destination
381 ])
382
383
384 def get_osdspec_affinity():
385 return os.environ.get('CEPH_VOLUME_OSDSPEC_AFFINITY', '')
386
387
388 def osd_mkfs_bluestore(osd_id, fsid, keyring=None, wal=False, db=False):
389 """
390 Create the files for the OSD to function. A normal call will look like:
391
392 ceph-osd --cluster ceph --mkfs --mkkey -i 0 \
393 --monmap /var/lib/ceph/osd/ceph-0/activate.monmap \
394 --osd-data /var/lib/ceph/osd/ceph-0 \
395 --osd-uuid 8d208665-89ae-4733-8888-5d3bfbeeec6c \
396 --keyring /var/lib/ceph/osd/ceph-0/keyring \
397 --setuser ceph --setgroup ceph
398
399 In some cases it is required to use the keyring, when it is passed in as
400 a keyword argument it is used as part of the ceph-osd command
401 """
402 path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id)
403 monmap = os.path.join(path, 'activate.monmap')
404
405 system.chown(path)
406
407 base_command = [
408 'ceph-osd',
409 '--cluster', conf.cluster,
410 '--osd-objectstore', 'bluestore',
411 '--mkfs',
412 '-i', osd_id,
413 '--monmap', monmap,
414 ]
415
416 supplementary_command = [
417 '--osd-data', path,
418 '--osd-uuid', fsid,
419 '--setuser', 'ceph',
420 '--setgroup', 'ceph'
421 ]
422
423 if keyring is not None:
424 base_command.extend(['--keyfile', '-'])
425
426 if wal:
427 base_command.extend(
428 ['--bluestore-block-wal-path', wal]
429 )
430 system.chown(wal)
431
432 if db:
433 base_command.extend(
434 ['--bluestore-block-db-path', db]
435 )
436 system.chown(db)
437
438 if get_osdspec_affinity():
439 base_command.extend(['--osdspec-affinity', get_osdspec_affinity()])
440
441 command = base_command + supplementary_command
442
443 """
444 When running in containers the --mkfs on raw device sometimes fails
445 to acquire a lock through flock() on the device because systemd-udevd holds one temporarily.
446 See KernelDevice.cc and _lock() to understand how ceph-osd acquires the lock.
447 Because this is really transient, we retry up to 5 times and wait for 1 sec in-between
448 """
449 for retry in range(5):
450 _, _, returncode = process.call(command, stdin=keyring, terminal_verbose=True, show_command=True)
451 if returncode == 0:
452 break
453 else:
454 if returncode == errno.EWOULDBLOCK:
455 time.sleep(1)
456 logger.info('disk is held by another process, trying to mkfs again... (%s/5 attempt)' % retry)
457 continue
458 else:
459 raise RuntimeError('Command failed with exit code %s: %s' % (returncode, ' '.join(command)))
460