]> git.proxmox.com Git - ceph.git/blame - ceph/src/ceph-volume/ceph_volume/util/prepare.py
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / ceph-volume / ceph_volume / util / prepare.py
CommitLineData
d2e6a577
FG
1"""
2These utilities for prepare provide all the pieces needed to prepare a device
3but also a compounded ("single call") helper to do them in order. Some plugins
4may want to change some part of the process, while others might want to consume
5the single-call helper
6"""
f91f0fd5 7import errno
d2e6a577
FG
8import os
9import logging
b32b8144 10import json
f91f0fd5 11import time
20effc67 12from ceph_volume import process, conf, terminal
1adf2230 13from ceph_volume.util import system, constants, str_to_int, disk
d2e6a577
FG
14
15logger = logging.getLogger(__name__)
1adf2230 16mlogger = terminal.MultiLogger(__name__)
d2e6a577
FG
17
18
19def create_key():
b32b8144
FG
20 stdout, stderr, returncode = process.call(
21 ['ceph-authtool', '--gen-print-key'],
2a845540
TL
22 show_command=True,
23 logfile_verbose=False)
d2e6a577
FG
24 if returncode != 0:
25 raise RuntimeError('Unable to generate a new auth key')
26 return ' '.join(stdout).strip()
27
28
b32b8144
FG
29def write_keyring(osd_id, secret, keyring_name='keyring', name=None):
30 """
31 Create a keyring file with the ``ceph-authtool`` utility. Constructs the
32 path over well-known conventions for the OSD, and allows any other custom
33 ``name`` to be set.
34
35 :param osd_id: The ID for the OSD to be used
36 :param secret: The key to be added as (as a string)
37 :param name: Defaults to 'osd.{ID}' but can be used to add other client
38 names, specifically for 'lockbox' type of keys
39 :param keyring_name: Alternative keyring name, for supporting other
40 types of keys like for lockbox
41 """
42 osd_keyring = '/var/lib/ceph/osd/%s-%s/%s' % (conf.cluster, osd_id, keyring_name)
43 name = name or 'osd.%s' % str(osd_id)
2a845540
TL
44 mlogger.info(f'Creating keyring file for {name}')
45 process.call(
d2e6a577
FG
46 [
47 'ceph-authtool', osd_keyring,
48 '--create-keyring',
b32b8144 49 '--name', name,
d2e6a577 50 '--add-key', secret
2a845540
TL
51 ],
52 logfile_verbose=False)
d2e6a577 53 system.chown(osd_keyring)
d2e6a577
FG
54
55
91327a77
AA
56def get_block_db_size(lv_format=True):
57 """
58 Helper to retrieve the size (defined in megabytes in ceph.conf) to create
59 the block.db logical volume, it "translates" the string into a float value,
60 then converts that into gigabytes, and finally (optionally) it formats it
61 back as a string so that it can be used for creating the LV.
62
63 :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size
64 would result in '5G', otherwise it will return a ``Size`` object.
65
66 .. note: Configuration values are in bytes, unlike journals which
67 are defined in gigabytes
68 """
69 conf_db_size = None
70 try:
71 conf_db_size = conf.ceph.get_safe('osd', 'bluestore_block_db_size', None)
72 except RuntimeError:
73 logger.exception("failed to load ceph configuration, will use defaults")
74
75 if not conf_db_size:
76 logger.debug(
77 'block.db has no size configuration, will fallback to using as much as possible'
78 )
f91f0fd5 79 # TODO better to return disk.Size(b=0) here
91327a77
AA
80 return None
81 logger.debug('bluestore_block_db_size set to %s' % conf_db_size)
82 db_size = disk.Size(b=str_to_int(conf_db_size))
83
84 if db_size < disk.Size(gb=2):
85 mlogger.error('Refusing to continue with configured size for block.db')
86 raise RuntimeError('block.db sizes must be larger than 2GB, detected: %s' % db_size)
87 if lv_format:
88 return '%sG' % db_size.gb.as_int()
89 return db_size
90
11fdf7f2
TL
91def get_block_wal_size(lv_format=True):
92 """
93 Helper to retrieve the size (defined in megabytes in ceph.conf) to create
94 the block.wal logical volume, it "translates" the string into a float value,
95 then converts that into gigabytes, and finally (optionally) it formats it
96 back as a string so that it can be used for creating the LV.
97
98 :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size
99 would result in '5G', otherwise it will return a ``Size`` object.
100
101 .. note: Configuration values are in bytes, unlike journals which
102 are defined in gigabytes
103 """
104 conf_wal_size = None
105 try:
106 conf_wal_size = conf.ceph.get_safe('osd', 'bluestore_block_wal_size', None)
107 except RuntimeError:
108 logger.exception("failed to load ceph configuration, will use defaults")
109
110 if not conf_wal_size:
111 logger.debug(
112 'block.wal has no size configuration, will fallback to using as much as possible'
113 )
114 return None
115 logger.debug('bluestore_block_wal_size set to %s' % conf_wal_size)
116 wal_size = disk.Size(b=str_to_int(conf_wal_size))
117
118 if wal_size < disk.Size(gb=2):
119 mlogger.error('Refusing to continue with configured size for block.wal')
120 raise RuntimeError('block.wal sizes must be larger than 2GB, detected: %s' % wal_size)
121 if lv_format:
122 return '%sG' % wal_size.gb.as_int()
123 return wal_size
124
91327a77 125
b32b8144 126def create_id(fsid, json_secrets, osd_id=None):
d2e6a577
FG
127 """
128 :param fsid: The osd fsid to create, always required
129 :param json_secrets: a json-ready object with whatever secrets are wanted
130 to be passed to the monitor
b32b8144
FG
131 :param osd_id: Reuse an existing ID from an OSD that's been destroyed, if the
132 id does not exist in the cluster a new ID will be created
d2e6a577
FG
133 """
134 bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster
b32b8144
FG
135 cmd = [
136 'ceph',
137 '--cluster', conf.cluster,
138 '--name', 'client.bootstrap-osd',
139 '--keyring', bootstrap_keyring,
140 '-i', '-',
141 'osd', 'new', fsid
142 ]
1adf2230
AA
143 if osd_id is not None:
144 if osd_id_available(osd_id):
145 cmd.append(osd_id)
146 else:
147 raise RuntimeError("The osd ID {} is already in use or does not exist.".format(osd_id))
b32b8144
FG
148 stdout, stderr, returncode = process.call(
149 cmd,
150 stdin=json_secrets,
151 show_command=True
152 )
153 if returncode != 0:
154 raise RuntimeError('Unable to create a new OSD id')
155 return ' '.join(stdout).strip()
156
157
1adf2230 158def osd_id_available(osd_id):
b32b8144 159 """
1adf2230
AA
160 Checks to see if an osd ID exists and if it's available for
161 reuse. Returns True if it is, False if it isn't.
b32b8144
FG
162
163 :param osd_id: The osd ID to check
164 """
165 if osd_id is None:
166 return False
a4b75251 167
b32b8144 168 bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster
d2e6a577
FG
169 stdout, stderr, returncode = process.call(
170 [
171 'ceph',
172 '--cluster', conf.cluster,
173 '--name', 'client.bootstrap-osd',
174 '--keyring', bootstrap_keyring,
b32b8144
FG
175 'osd',
176 'tree',
177 '-f', 'json',
d2e6a577 178 ],
b32b8144 179 show_command=True
d2e6a577
FG
180 )
181 if returncode != 0:
b32b8144
FG
182 raise RuntimeError('Unable check if OSD id exists: %s' % osd_id)
183
184 output = json.loads(''.join(stdout).strip())
185 osds = output['nodes']
1adf2230 186 osd = [osd for osd in osds if str(osd['id']) == str(osd_id)]
a4b75251 187 if not osd or (osd and osd[0].get('status') == "destroyed"):
1adf2230
AA
188 return True
189 return False
d2e6a577
FG
190
191
3efd9988
FG
192def mount_tmpfs(path):
193 process.run([
3efd9988
FG
194 'mount',
195 '-t',
196 'tmpfs', 'tmpfs',
197 path
198 ])
199
1adf2230
AA
200 # Restore SELinux context
201 system.set_context(path)
202
3efd9988
FG
203
204def create_osd_path(osd_id, tmpfs=False):
205 path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id)
d2e6a577 206 system.mkdir_p('/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id))
3efd9988
FG
207 if tmpfs:
208 mount_tmpfs(path)
d2e6a577
FG
209
210
211def format_device(device):
212 # only supports xfs
b32b8144 213 command = ['mkfs', '-t', 'xfs']
d2e6a577
FG
214
215 # get the mkfs options if any for xfs,
216 # fallback to the default options defined in constants.mkfs
217 flags = conf.ceph.get_list(
218 'osd',
219 'osd_mkfs_options_xfs',
220 default=constants.mkfs.get('xfs'),
221 split=' ',
222 )
223
224 # always force
225 if '-f' not in flags:
226 flags.insert(0, '-f')
227
228 command.extend(flags)
229 command.append(device)
230 process.run(command)
231
232
94b18763 233def _normalize_mount_flags(flags, extras=None):
3a9019d9
FG
234 """
235 Mount flag options have to be a single string, separated by a comma. If the
236 flags are separated by spaces, or with commas and spaces in ceph.conf, the
237 mount options will be passed incorrectly.
238
239 This will help when parsing ceph.conf values return something like::
240
241 ["rw,", "exec,"]
242
243 Or::
244
245 [" rw ,", "exec"]
246
247 :param flags: A list of flags, or a single string of mount flags
94b18763
FG
248 :param extras: Extra set of mount flags, useful when custom devices like VDO need
249 ad-hoc mount configurations
3a9019d9 250 """
94b18763
FG
251 # Instead of using set(), we append to this new list here, because set()
252 # will create an arbitrary order on the items that is made worst when
253 # testing with tools like tox that includes a randomizer seed. By
254 # controlling the order, it is easier to correctly assert the expectation
255 unique_flags = []
3a9019d9 256 if isinstance(flags, list):
94b18763
FG
257 if extras:
258 flags.extend(extras)
259
3a9019d9 260 # ensure that spaces and commas are removed so that they can join
94b18763
FG
261 # correctly, remove duplicates
262 for f in flags:
263 if f and f not in unique_flags:
264 unique_flags.append(f.strip().strip(','))
265 return ','.join(unique_flags)
3a9019d9
FG
266
267 # split them, clean them, and join them back again
268 flags = flags.strip().split(' ')
94b18763
FG
269 if extras:
270 flags.extend(extras)
271
272 # remove possible duplicates
273 for f in flags:
274 if f and f not in unique_flags:
275 unique_flags.append(f.strip().strip(','))
276 flags = ','.join(unique_flags)
277 # Before returning, split them again, since strings can be mashed up
278 # together, preventing removal of duplicate entries
279 return ','.join(set(flags.split(',')))
280
281
282def mount_osd(device, osd_id, **kw):
283 extras = []
284 is_vdo = kw.get('is_vdo', '0')
285 if is_vdo == '1':
286 extras = ['discard']
d2e6a577 287 destination = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id)
b32b8144 288 command = ['mount', '-t', 'xfs', '-o']
d2e6a577
FG
289 flags = conf.ceph.get_list(
290 'osd',
291 'osd_mount_options_xfs',
292 default=constants.mount.get('xfs'),
293 split=' ',
294 )
94b18763
FG
295 command.append(
296 _normalize_mount_flags(flags, extras=extras)
297 )
d2e6a577
FG
298 command.append(device)
299 command.append(destination)
300 process.run(command)
301
1adf2230
AA
302 # Restore SELinux context
303 system.set_context(destination)
304
d2e6a577 305
3efd9988
FG
306def _link_device(device, device_type, osd_id):
307 """
308 Allow linking any device type in an OSD directory. ``device`` must the be
309 source, with an absolute path and ``device_type`` will be the destination
310 name, like 'journal', or 'block'
311 """
312 device_path = '/var/lib/ceph/osd/%s-%s/%s' % (
d2e6a577 313 conf.cluster,
3efd9988
FG
314 osd_id,
315 device_type
d2e6a577 316 )
b32b8144 317 command = ['ln', '-s', device, device_path]
3efd9988
FG
318 system.chown(device)
319
d2e6a577
FG
320 process.run(command)
321
92f5a8d4
TL
322def _validate_bluestore_device(device, excepted_device_type, osd_uuid):
323 """
324 Validate whether the given device is truly what it is supposed to be
325 """
326
327 out, err, ret = process.call(['ceph-bluestore-tool', 'show-label', '--dev', device])
328 if err:
329 terminal.error('ceph-bluestore-tool failed to run. %s'% err)
330 raise SystemExit(1)
331 if ret:
332 terminal.error('no label on %s'% device)
333 raise SystemExit(1)
334 oj = json.loads(''.join(out))
335 if device not in oj:
336 terminal.error('%s not in the output of ceph-bluestore-tool, buggy?'% device)
337 raise SystemExit(1)
338 current_device_type = oj[device]['description']
339 if current_device_type != excepted_device_type:
340 terminal.error('%s is not a %s device but %s'% (device, excepted_device_type, current_device_type))
341 raise SystemExit(1)
342 current_osd_uuid = oj[device]['osd_uuid']
343 if current_osd_uuid != osd_uuid:
344 terminal.error('device %s is used by another osd %s as %s, should be %s'% (device, current_osd_uuid, current_device_type, osd_uuid))
345 raise SystemExit(1)
d2e6a577 346
3efd9988
FG
347
348def link_block(block_device, osd_id):
349 _link_device(block_device, 'block', osd_id)
350
351
92f5a8d4
TL
352def link_wal(wal_device, osd_id, osd_uuid=None):
353 _validate_bluestore_device(wal_device, 'bluefs wal', osd_uuid)
3efd9988
FG
354 _link_device(wal_device, 'block.wal', osd_id)
355
356
92f5a8d4
TL
357def link_db(db_device, osd_id, osd_uuid=None):
358 _validate_bluestore_device(db_device, 'bluefs db', osd_uuid)
3efd9988
FG
359 _link_device(db_device, 'block.db', osd_id)
360
361
d2e6a577
FG
362def get_monmap(osd_id):
363 """
364 Before creating the OSD files, a monmap needs to be retrieved so that it
365 can be used to tell the monitor(s) about the new OSD. A call will look like::
366
367 ceph --cluster ceph --name client.bootstrap-osd \
368 --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring \
369 mon getmap -o /var/lib/ceph/osd/ceph-0/activate.monmap
370 """
371 path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id)
372 bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster
373 monmap_destination = os.path.join(path, 'activate.monmap')
374
375 process.run([
d2e6a577
FG
376 'ceph',
377 '--cluster', conf.cluster,
378 '--name', 'client.bootstrap-osd',
379 '--keyring', bootstrap_keyring,
380 'mon', 'getmap', '-o', monmap_destination
381 ])
382
383
e306af50
TL
384def get_osdspec_affinity():
385 return os.environ.get('CEPH_VOLUME_OSDSPEC_AFFINITY', '')
386
387
3efd9988
FG
388def osd_mkfs_bluestore(osd_id, fsid, keyring=None, wal=False, db=False):
389 """
390 Create the files for the OSD to function. A normal call will look like:
391
392 ceph-osd --cluster ceph --mkfs --mkkey -i 0 \
393 --monmap /var/lib/ceph/osd/ceph-0/activate.monmap \
394 --osd-data /var/lib/ceph/osd/ceph-0 \
395 --osd-uuid 8d208665-89ae-4733-8888-5d3bfbeeec6c \
396 --keyring /var/lib/ceph/osd/ceph-0/keyring \
397 --setuser ceph --setgroup ceph
398
399 In some cases it is required to use the keyring, when it is passed in as
11fdf7f2 400 a keyword argument it is used as part of the ceph-osd command
3efd9988
FG
401 """
402 path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id)
403 monmap = os.path.join(path, 'activate.monmap')
404
405 system.chown(path)
406
407 base_command = [
3efd9988
FG
408 'ceph-osd',
409 '--cluster', conf.cluster,
3efd9988
FG
410 '--osd-objectstore', 'bluestore',
411 '--mkfs',
412 '-i', osd_id,
413 '--monmap', monmap,
414 ]
415
416 supplementary_command = [
417 '--osd-data', path,
418 '--osd-uuid', fsid,
419 '--setuser', 'ceph',
420 '--setgroup', 'ceph'
421 ]
422
423 if keyring is not None:
b32b8144 424 base_command.extend(['--keyfile', '-'])
3efd9988
FG
425
426 if wal:
427 base_command.extend(
428 ['--bluestore-block-wal-path', wal]
429 )
430 system.chown(wal)
431
432 if db:
433 base_command.extend(
434 ['--bluestore-block-db-path', db]
435 )
436 system.chown(db)
437
e306af50
TL
438 if get_osdspec_affinity():
439 base_command.extend(['--osdspec-affinity', get_osdspec_affinity()])
440
3efd9988
FG
441 command = base_command + supplementary_command
442
f91f0fd5
TL
443 """
444 When running in containers the --mkfs on raw device sometimes fails
445 to acquire a lock through flock() on the device because systemd-udevd holds one temporarily.
446 See KernelDevice.cc and _lock() to understand how ceph-osd acquires the lock.
447 Because this is really transient, we retry up to 5 times and wait for 1 sec in-between
448 """
449 for retry in range(5):
450 _, _, returncode = process.call(command, stdin=keyring, terminal_verbose=True, show_command=True)
451 if returncode == 0:
452 break
453 else:
454 if returncode == errno.EWOULDBLOCK:
455 time.sleep(1)
456 logger.info('disk is held by another process, trying to mkfs again... (%s/5 attempt)' % retry)
457 continue
458 else:
459 raise RuntimeError('Command failed with exit code %s: %s' % (returncode, ' '.join(command)))
3efd9988 460