]>
Commit | Line | Data |
---|---|---|
d2e6a577 FG |
1 | """ |
2 | These utilities for prepare provide all the pieces needed to prepare a device | |
3 | but also a compounded ("single call") helper to do them in order. Some plugins | |
4 | may want to change some part of the process, while others might want to consume | |
5 | the single-call helper | |
6 | """ | |
f91f0fd5 | 7 | import errno |
d2e6a577 FG |
8 | import os |
9 | import logging | |
b32b8144 | 10 | import json |
f91f0fd5 | 11 | import time |
20effc67 | 12 | from ceph_volume import process, conf, terminal |
1adf2230 | 13 | from ceph_volume.util import system, constants, str_to_int, disk |
d2e6a577 FG |
14 | |
15 | logger = logging.getLogger(__name__) | |
1adf2230 | 16 | mlogger = terminal.MultiLogger(__name__) |
d2e6a577 FG |
17 | |
18 | ||
19 | def create_key(): | |
b32b8144 FG |
20 | stdout, stderr, returncode = process.call( |
21 | ['ceph-authtool', '--gen-print-key'], | |
2a845540 TL |
22 | show_command=True, |
23 | logfile_verbose=False) | |
d2e6a577 FG |
24 | if returncode != 0: |
25 | raise RuntimeError('Unable to generate a new auth key') | |
26 | return ' '.join(stdout).strip() | |
27 | ||
28 | ||
b32b8144 FG |
29 | def write_keyring(osd_id, secret, keyring_name='keyring', name=None): |
30 | """ | |
31 | Create a keyring file with the ``ceph-authtool`` utility. Constructs the | |
32 | path over well-known conventions for the OSD, and allows any other custom | |
33 | ``name`` to be set. | |
34 | ||
35 | :param osd_id: The ID for the OSD to be used | |
36 | :param secret: The key to be added as (as a string) | |
37 | :param name: Defaults to 'osd.{ID}' but can be used to add other client | |
38 | names, specifically for 'lockbox' type of keys | |
39 | :param keyring_name: Alternative keyring name, for supporting other | |
40 | types of keys like for lockbox | |
41 | """ | |
42 | osd_keyring = '/var/lib/ceph/osd/%s-%s/%s' % (conf.cluster, osd_id, keyring_name) | |
43 | name = name or 'osd.%s' % str(osd_id) | |
2a845540 TL |
44 | mlogger.info(f'Creating keyring file for {name}') |
45 | process.call( | |
d2e6a577 FG |
46 | [ |
47 | 'ceph-authtool', osd_keyring, | |
48 | '--create-keyring', | |
b32b8144 | 49 | '--name', name, |
d2e6a577 | 50 | '--add-key', secret |
2a845540 TL |
51 | ], |
52 | logfile_verbose=False) | |
d2e6a577 | 53 | system.chown(osd_keyring) |
d2e6a577 FG |
54 | |
55 | ||
91327a77 AA |
56 | def get_block_db_size(lv_format=True): |
57 | """ | |
58 | Helper to retrieve the size (defined in megabytes in ceph.conf) to create | |
59 | the block.db logical volume, it "translates" the string into a float value, | |
60 | then converts that into gigabytes, and finally (optionally) it formats it | |
61 | back as a string so that it can be used for creating the LV. | |
62 | ||
63 | :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size | |
64 | would result in '5G', otherwise it will return a ``Size`` object. | |
65 | ||
66 | .. note: Configuration values are in bytes, unlike journals which | |
67 | are defined in gigabytes | |
68 | """ | |
69 | conf_db_size = None | |
70 | try: | |
71 | conf_db_size = conf.ceph.get_safe('osd', 'bluestore_block_db_size', None) | |
72 | except RuntimeError: | |
73 | logger.exception("failed to load ceph configuration, will use defaults") | |
74 | ||
75 | if not conf_db_size: | |
76 | logger.debug( | |
77 | 'block.db has no size configuration, will fallback to using as much as possible' | |
78 | ) | |
f91f0fd5 | 79 | # TODO better to return disk.Size(b=0) here |
91327a77 AA |
80 | return None |
81 | logger.debug('bluestore_block_db_size set to %s' % conf_db_size) | |
82 | db_size = disk.Size(b=str_to_int(conf_db_size)) | |
83 | ||
84 | if db_size < disk.Size(gb=2): | |
85 | mlogger.error('Refusing to continue with configured size for block.db') | |
86 | raise RuntimeError('block.db sizes must be larger than 2GB, detected: %s' % db_size) | |
87 | if lv_format: | |
88 | return '%sG' % db_size.gb.as_int() | |
89 | return db_size | |
90 | ||
11fdf7f2 TL |
91 | def get_block_wal_size(lv_format=True): |
92 | """ | |
93 | Helper to retrieve the size (defined in megabytes in ceph.conf) to create | |
94 | the block.wal logical volume, it "translates" the string into a float value, | |
95 | then converts that into gigabytes, and finally (optionally) it formats it | |
96 | back as a string so that it can be used for creating the LV. | |
97 | ||
98 | :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size | |
99 | would result in '5G', otherwise it will return a ``Size`` object. | |
100 | ||
101 | .. note: Configuration values are in bytes, unlike journals which | |
102 | are defined in gigabytes | |
103 | """ | |
104 | conf_wal_size = None | |
105 | try: | |
106 | conf_wal_size = conf.ceph.get_safe('osd', 'bluestore_block_wal_size', None) | |
107 | except RuntimeError: | |
108 | logger.exception("failed to load ceph configuration, will use defaults") | |
109 | ||
110 | if not conf_wal_size: | |
111 | logger.debug( | |
112 | 'block.wal has no size configuration, will fallback to using as much as possible' | |
113 | ) | |
114 | return None | |
115 | logger.debug('bluestore_block_wal_size set to %s' % conf_wal_size) | |
116 | wal_size = disk.Size(b=str_to_int(conf_wal_size)) | |
117 | ||
118 | if wal_size < disk.Size(gb=2): | |
119 | mlogger.error('Refusing to continue with configured size for block.wal') | |
120 | raise RuntimeError('block.wal sizes must be larger than 2GB, detected: %s' % wal_size) | |
121 | if lv_format: | |
122 | return '%sG' % wal_size.gb.as_int() | |
123 | return wal_size | |
124 | ||
91327a77 | 125 | |
b32b8144 | 126 | def create_id(fsid, json_secrets, osd_id=None): |
d2e6a577 FG |
127 | """ |
128 | :param fsid: The osd fsid to create, always required | |
129 | :param json_secrets: a json-ready object with whatever secrets are wanted | |
130 | to be passed to the monitor | |
b32b8144 FG |
131 | :param osd_id: Reuse an existing ID from an OSD that's been destroyed, if the |
132 | id does not exist in the cluster a new ID will be created | |
d2e6a577 FG |
133 | """ |
134 | bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster | |
b32b8144 FG |
135 | cmd = [ |
136 | 'ceph', | |
137 | '--cluster', conf.cluster, | |
138 | '--name', 'client.bootstrap-osd', | |
139 | '--keyring', bootstrap_keyring, | |
140 | '-i', '-', | |
141 | 'osd', 'new', fsid | |
142 | ] | |
1adf2230 AA |
143 | if osd_id is not None: |
144 | if osd_id_available(osd_id): | |
145 | cmd.append(osd_id) | |
146 | else: | |
147 | raise RuntimeError("The osd ID {} is already in use or does not exist.".format(osd_id)) | |
b32b8144 FG |
148 | stdout, stderr, returncode = process.call( |
149 | cmd, | |
150 | stdin=json_secrets, | |
151 | show_command=True | |
152 | ) | |
153 | if returncode != 0: | |
154 | raise RuntimeError('Unable to create a new OSD id') | |
155 | return ' '.join(stdout).strip() | |
156 | ||
157 | ||
1adf2230 | 158 | def osd_id_available(osd_id): |
b32b8144 | 159 | """ |
1adf2230 AA |
160 | Checks to see if an osd ID exists and if it's available for |
161 | reuse. Returns True if it is, False if it isn't. | |
b32b8144 FG |
162 | |
163 | :param osd_id: The osd ID to check | |
164 | """ | |
165 | if osd_id is None: | |
166 | return False | |
a4b75251 | 167 | |
b32b8144 | 168 | bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster |
d2e6a577 FG |
169 | stdout, stderr, returncode = process.call( |
170 | [ | |
171 | 'ceph', | |
172 | '--cluster', conf.cluster, | |
173 | '--name', 'client.bootstrap-osd', | |
174 | '--keyring', bootstrap_keyring, | |
b32b8144 FG |
175 | 'osd', |
176 | 'tree', | |
177 | '-f', 'json', | |
d2e6a577 | 178 | ], |
b32b8144 | 179 | show_command=True |
d2e6a577 FG |
180 | ) |
181 | if returncode != 0: | |
b32b8144 FG |
182 | raise RuntimeError('Unable check if OSD id exists: %s' % osd_id) |
183 | ||
184 | output = json.loads(''.join(stdout).strip()) | |
185 | osds = output['nodes'] | |
1adf2230 | 186 | osd = [osd for osd in osds if str(osd['id']) == str(osd_id)] |
a4b75251 | 187 | if not osd or (osd and osd[0].get('status') == "destroyed"): |
1adf2230 AA |
188 | return True |
189 | return False | |
d2e6a577 FG |
190 | |
191 | ||
3efd9988 FG |
192 | def mount_tmpfs(path): |
193 | process.run([ | |
3efd9988 FG |
194 | 'mount', |
195 | '-t', | |
196 | 'tmpfs', 'tmpfs', | |
197 | path | |
198 | ]) | |
199 | ||
1adf2230 AA |
200 | # Restore SELinux context |
201 | system.set_context(path) | |
202 | ||
3efd9988 FG |
203 | |
204 | def create_osd_path(osd_id, tmpfs=False): | |
205 | path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id) | |
d2e6a577 | 206 | system.mkdir_p('/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id)) |
3efd9988 FG |
207 | if tmpfs: |
208 | mount_tmpfs(path) | |
d2e6a577 FG |
209 | |
210 | ||
211 | def format_device(device): | |
212 | # only supports xfs | |
b32b8144 | 213 | command = ['mkfs', '-t', 'xfs'] |
d2e6a577 FG |
214 | |
215 | # get the mkfs options if any for xfs, | |
216 | # fallback to the default options defined in constants.mkfs | |
217 | flags = conf.ceph.get_list( | |
218 | 'osd', | |
219 | 'osd_mkfs_options_xfs', | |
220 | default=constants.mkfs.get('xfs'), | |
221 | split=' ', | |
222 | ) | |
223 | ||
224 | # always force | |
225 | if '-f' not in flags: | |
226 | flags.insert(0, '-f') | |
227 | ||
228 | command.extend(flags) | |
229 | command.append(device) | |
230 | process.run(command) | |
231 | ||
232 | ||
94b18763 | 233 | def _normalize_mount_flags(flags, extras=None): |
3a9019d9 FG |
234 | """ |
235 | Mount flag options have to be a single string, separated by a comma. If the | |
236 | flags are separated by spaces, or with commas and spaces in ceph.conf, the | |
237 | mount options will be passed incorrectly. | |
238 | ||
239 | This will help when parsing ceph.conf values return something like:: | |
240 | ||
241 | ["rw,", "exec,"] | |
242 | ||
243 | Or:: | |
244 | ||
245 | [" rw ,", "exec"] | |
246 | ||
247 | :param flags: A list of flags, or a single string of mount flags | |
94b18763 FG |
248 | :param extras: Extra set of mount flags, useful when custom devices like VDO need |
249 | ad-hoc mount configurations | |
3a9019d9 | 250 | """ |
94b18763 FG |
251 | # Instead of using set(), we append to this new list here, because set() |
252 | # will create an arbitrary order on the items that is made worst when | |
253 | # testing with tools like tox that includes a randomizer seed. By | |
254 | # controlling the order, it is easier to correctly assert the expectation | |
255 | unique_flags = [] | |
3a9019d9 | 256 | if isinstance(flags, list): |
94b18763 FG |
257 | if extras: |
258 | flags.extend(extras) | |
259 | ||
3a9019d9 | 260 | # ensure that spaces and commas are removed so that they can join |
94b18763 FG |
261 | # correctly, remove duplicates |
262 | for f in flags: | |
263 | if f and f not in unique_flags: | |
264 | unique_flags.append(f.strip().strip(',')) | |
265 | return ','.join(unique_flags) | |
3a9019d9 FG |
266 | |
267 | # split them, clean them, and join them back again | |
268 | flags = flags.strip().split(' ') | |
94b18763 FG |
269 | if extras: |
270 | flags.extend(extras) | |
271 | ||
272 | # remove possible duplicates | |
273 | for f in flags: | |
274 | if f and f not in unique_flags: | |
275 | unique_flags.append(f.strip().strip(',')) | |
276 | flags = ','.join(unique_flags) | |
277 | # Before returning, split them again, since strings can be mashed up | |
278 | # together, preventing removal of duplicate entries | |
279 | return ','.join(set(flags.split(','))) | |
280 | ||
281 | ||
282 | def mount_osd(device, osd_id, **kw): | |
283 | extras = [] | |
284 | is_vdo = kw.get('is_vdo', '0') | |
285 | if is_vdo == '1': | |
286 | extras = ['discard'] | |
d2e6a577 | 287 | destination = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id) |
b32b8144 | 288 | command = ['mount', '-t', 'xfs', '-o'] |
d2e6a577 FG |
289 | flags = conf.ceph.get_list( |
290 | 'osd', | |
291 | 'osd_mount_options_xfs', | |
292 | default=constants.mount.get('xfs'), | |
293 | split=' ', | |
294 | ) | |
94b18763 FG |
295 | command.append( |
296 | _normalize_mount_flags(flags, extras=extras) | |
297 | ) | |
d2e6a577 FG |
298 | command.append(device) |
299 | command.append(destination) | |
300 | process.run(command) | |
301 | ||
1adf2230 AA |
302 | # Restore SELinux context |
303 | system.set_context(destination) | |
304 | ||
d2e6a577 | 305 | |
3efd9988 FG |
306 | def _link_device(device, device_type, osd_id): |
307 | """ | |
308 | Allow linking any device type in an OSD directory. ``device`` must the be | |
309 | source, with an absolute path and ``device_type`` will be the destination | |
310 | name, like 'journal', or 'block' | |
311 | """ | |
312 | device_path = '/var/lib/ceph/osd/%s-%s/%s' % ( | |
d2e6a577 | 313 | conf.cluster, |
3efd9988 FG |
314 | osd_id, |
315 | device_type | |
d2e6a577 | 316 | ) |
b32b8144 | 317 | command = ['ln', '-s', device, device_path] |
3efd9988 FG |
318 | system.chown(device) |
319 | ||
d2e6a577 FG |
320 | process.run(command) |
321 | ||
92f5a8d4 TL |
322 | def _validate_bluestore_device(device, excepted_device_type, osd_uuid): |
323 | """ | |
324 | Validate whether the given device is truly what it is supposed to be | |
325 | """ | |
326 | ||
327 | out, err, ret = process.call(['ceph-bluestore-tool', 'show-label', '--dev', device]) | |
328 | if err: | |
329 | terminal.error('ceph-bluestore-tool failed to run. %s'% err) | |
330 | raise SystemExit(1) | |
331 | if ret: | |
332 | terminal.error('no label on %s'% device) | |
333 | raise SystemExit(1) | |
334 | oj = json.loads(''.join(out)) | |
335 | if device not in oj: | |
336 | terminal.error('%s not in the output of ceph-bluestore-tool, buggy?'% device) | |
337 | raise SystemExit(1) | |
338 | current_device_type = oj[device]['description'] | |
339 | if current_device_type != excepted_device_type: | |
340 | terminal.error('%s is not a %s device but %s'% (device, excepted_device_type, current_device_type)) | |
341 | raise SystemExit(1) | |
342 | current_osd_uuid = oj[device]['osd_uuid'] | |
343 | if current_osd_uuid != osd_uuid: | |
344 | terminal.error('device %s is used by another osd %s as %s, should be %s'% (device, current_osd_uuid, current_device_type, osd_uuid)) | |
345 | raise SystemExit(1) | |
d2e6a577 | 346 | |
3efd9988 FG |
347 | |
348 | def link_block(block_device, osd_id): | |
349 | _link_device(block_device, 'block', osd_id) | |
350 | ||
351 | ||
92f5a8d4 TL |
352 | def link_wal(wal_device, osd_id, osd_uuid=None): |
353 | _validate_bluestore_device(wal_device, 'bluefs wal', osd_uuid) | |
3efd9988 FG |
354 | _link_device(wal_device, 'block.wal', osd_id) |
355 | ||
356 | ||
92f5a8d4 TL |
357 | def link_db(db_device, osd_id, osd_uuid=None): |
358 | _validate_bluestore_device(db_device, 'bluefs db', osd_uuid) | |
3efd9988 FG |
359 | _link_device(db_device, 'block.db', osd_id) |
360 | ||
361 | ||
d2e6a577 FG |
362 | def get_monmap(osd_id): |
363 | """ | |
364 | Before creating the OSD files, a monmap needs to be retrieved so that it | |
365 | can be used to tell the monitor(s) about the new OSD. A call will look like:: | |
366 | ||
367 | ceph --cluster ceph --name client.bootstrap-osd \ | |
368 | --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring \ | |
369 | mon getmap -o /var/lib/ceph/osd/ceph-0/activate.monmap | |
370 | """ | |
371 | path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id) | |
372 | bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster | |
373 | monmap_destination = os.path.join(path, 'activate.monmap') | |
374 | ||
375 | process.run([ | |
d2e6a577 FG |
376 | 'ceph', |
377 | '--cluster', conf.cluster, | |
378 | '--name', 'client.bootstrap-osd', | |
379 | '--keyring', bootstrap_keyring, | |
380 | 'mon', 'getmap', '-o', monmap_destination | |
381 | ]) | |
382 | ||
383 | ||
e306af50 TL |
384 | def get_osdspec_affinity(): |
385 | return os.environ.get('CEPH_VOLUME_OSDSPEC_AFFINITY', '') | |
386 | ||
387 | ||
3efd9988 FG |
388 | def osd_mkfs_bluestore(osd_id, fsid, keyring=None, wal=False, db=False): |
389 | """ | |
390 | Create the files for the OSD to function. A normal call will look like: | |
391 | ||
392 | ceph-osd --cluster ceph --mkfs --mkkey -i 0 \ | |
393 | --monmap /var/lib/ceph/osd/ceph-0/activate.monmap \ | |
394 | --osd-data /var/lib/ceph/osd/ceph-0 \ | |
395 | --osd-uuid 8d208665-89ae-4733-8888-5d3bfbeeec6c \ | |
396 | --keyring /var/lib/ceph/osd/ceph-0/keyring \ | |
397 | --setuser ceph --setgroup ceph | |
398 | ||
399 | In some cases it is required to use the keyring, when it is passed in as | |
11fdf7f2 | 400 | a keyword argument it is used as part of the ceph-osd command |
3efd9988 FG |
401 | """ |
402 | path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id) | |
403 | monmap = os.path.join(path, 'activate.monmap') | |
404 | ||
405 | system.chown(path) | |
406 | ||
407 | base_command = [ | |
3efd9988 FG |
408 | 'ceph-osd', |
409 | '--cluster', conf.cluster, | |
3efd9988 FG |
410 | '--osd-objectstore', 'bluestore', |
411 | '--mkfs', | |
412 | '-i', osd_id, | |
413 | '--monmap', monmap, | |
414 | ] | |
415 | ||
416 | supplementary_command = [ | |
417 | '--osd-data', path, | |
418 | '--osd-uuid', fsid, | |
419 | '--setuser', 'ceph', | |
420 | '--setgroup', 'ceph' | |
421 | ] | |
422 | ||
423 | if keyring is not None: | |
b32b8144 | 424 | base_command.extend(['--keyfile', '-']) |
3efd9988 FG |
425 | |
426 | if wal: | |
427 | base_command.extend( | |
428 | ['--bluestore-block-wal-path', wal] | |
429 | ) | |
430 | system.chown(wal) | |
431 | ||
432 | if db: | |
433 | base_command.extend( | |
434 | ['--bluestore-block-db-path', db] | |
435 | ) | |
436 | system.chown(db) | |
437 | ||
e306af50 TL |
438 | if get_osdspec_affinity(): |
439 | base_command.extend(['--osdspec-affinity', get_osdspec_affinity()]) | |
440 | ||
3efd9988 FG |
441 | command = base_command + supplementary_command |
442 | ||
f91f0fd5 TL |
443 | """ |
444 | When running in containers the --mkfs on raw device sometimes fails | |
445 | to acquire a lock through flock() on the device because systemd-udevd holds one temporarily. | |
446 | See KernelDevice.cc and _lock() to understand how ceph-osd acquires the lock. | |
447 | Because this is really transient, we retry up to 5 times and wait for 1 sec in-between | |
448 | """ | |
449 | for retry in range(5): | |
450 | _, _, returncode = process.call(command, stdin=keyring, terminal_verbose=True, show_command=True) | |
451 | if returncode == 0: | |
452 | break | |
453 | else: | |
454 | if returncode == errno.EWOULDBLOCK: | |
455 | time.sleep(1) | |
456 | logger.info('disk is held by another process, trying to mkfs again... (%s/5 attempt)' % retry) | |
457 | continue | |
458 | else: | |
459 | raise RuntimeError('Command failed with exit code %s: %s' % (returncode, ' '.join(command))) | |
3efd9988 | 460 |