]> git.proxmox.com Git - ceph.git/blame - ceph/src/ceph-volume/ceph_volume/devices/lvm/prepare.py
compile with GCC 12 not 11
[ceph.git] / ceph / src / ceph-volume / ceph_volume / devices / lvm / prepare.py
CommitLineData
d2e6a577
FG
1from __future__ import print_function
2import json
b32b8144 3import logging
d2e6a577
FG
4from textwrap import dedent
5from ceph_volume.util import prepare as prepare_utils
b32b8144 6from ceph_volume.util import encryption as encryption_utils
181888fb 7from ceph_volume.util import system, disk
3a9019d9 8from ceph_volume.util.arg_validators import exclude_group_options
181888fb 9from ceph_volume import conf, decorators, terminal
3efd9988 10from ceph_volume.api import lvm as api
b32b8144 11from .common import prepare_parser, rollback_osd
d2e6a577
FG
12
13
b32b8144
FG
14logger = logging.getLogger(__name__)
15
16
17def prepare_dmcrypt(key, device, device_type, tags):
18 """
19 Helper for devices that are encrypted. The operations needed for
20 block, db, wal, or data/journal devices are all the same
21 """
22 if not device:
23 return ''
24 tag_name = 'ceph.%s_uuid' % device_type
25 uuid = tags[tag_name]
26 # format data device
27 encryption_utils.luks_format(
28 key,
29 device
30 )
31 encryption_utils.luks_open(
32 key,
33 device,
34 uuid
35 )
36
37 return '/dev/mapper/%s' % uuid
38
39
40def prepare_filestore(device, journal, secrets, tags, osd_id, fsid):
d2e6a577 41 """
3efd9988 42 :param device: The name of the logical volume to work with
d2e6a577
FG
43 :param journal: similar to device but can also be a regular/plain disk
44 :param secrets: A dict with the secrets needed to create the osd (e.g. cephx)
45 :param id_: The OSD id
46 :param fsid: The OSD fsid, also known as the OSD UUID
47 """
48 cephx_secret = secrets.get('cephx_secret', prepare_utils.create_key())
d2e6a577 49
b32b8144
FG
50 # encryption-only operations
51 if secrets.get('dmcrypt_key'):
52 # format and open ('decrypt' devices) and re-assign the device and journal
53 # variables so that the rest of the process can use the mapper paths
54 key = secrets['dmcrypt_key']
55 device = prepare_dmcrypt(key, device, 'data', tags)
56 journal = prepare_dmcrypt(key, journal, 'journal', tags)
57
94b18763
FG
58 # vdo detection
59 is_vdo = api.is_vdo(device)
d2e6a577 60 # create the directory
3efd9988 61 prepare_utils.create_osd_path(osd_id)
d2e6a577
FG
62 # format the device
63 prepare_utils.format_device(device)
64 # mount the data device
94b18763 65 prepare_utils.mount_osd(device, osd_id, is_vdo=is_vdo)
d2e6a577
FG
66 # symlink the journal
67 prepare_utils.link_journal(journal, osd_id)
68 # get the latest monmap
69 prepare_utils.get_monmap(osd_id)
70 # prepare the osd filesystem
1adf2230 71 prepare_utils.osd_mkfs_filestore(osd_id, fsid, cephx_secret)
d2e6a577
FG
72 # write the OSD keyring if it doesn't exist already
73 prepare_utils.write_keyring(osd_id, cephx_secret)
b32b8144
FG
74 if secrets.get('dmcrypt_key'):
75 # if the device is going to get activated right away, this can be done
76 # here, otherwise it will be recreated
77 encryption_utils.write_lockbox_keyring(
78 osd_id,
79 fsid,
80 tags['ceph.cephx_lockbox_secret']
81 )
d2e6a577
FG
82
83
b32b8144 84def prepare_bluestore(block, wal, db, secrets, tags, osd_id, fsid):
3efd9988
FG
85 """
86 :param block: The name of the logical volume for the bluestore data
87 :param wal: a regular/plain disk or logical volume, to be used for block.wal
88 :param db: a regular/plain disk or logical volume, to be used for block.db
89 :param secrets: A dict with the secrets needed to create the osd (e.g. cephx)
90 :param id_: The OSD id
91 :param fsid: The OSD fsid, also known as the OSD UUID
92 """
93 cephx_secret = secrets.get('cephx_secret', prepare_utils.create_key())
b32b8144
FG
94 # encryption-only operations
95 if secrets.get('dmcrypt_key'):
96 # If encrypted, there is no need to create the lockbox keyring file because
97 # bluestore re-creates the files and does not have support for other files
98 # like the custom lockbox one. This will need to be done on activation.
99 # format and open ('decrypt' devices) and re-assign the device and journal
100 # variables so that the rest of the process can use the mapper paths
101 key = secrets['dmcrypt_key']
102 block = prepare_dmcrypt(key, block, 'block', tags)
103 wal = prepare_dmcrypt(key, wal, 'wal', tags)
104 db = prepare_dmcrypt(key, db, 'db', tags)
3efd9988 105
3efd9988
FG
106 # create the directory
107 prepare_utils.create_osd_path(osd_id, tmpfs=True)
108 # symlink the block
109 prepare_utils.link_block(block, osd_id)
110 # get the latest monmap
111 prepare_utils.get_monmap(osd_id)
112 # write the OSD keyring if it doesn't exist already
113 prepare_utils.write_keyring(osd_id, cephx_secret)
114 # prepare the osd filesystem
115 prepare_utils.osd_mkfs_bluestore(
116 osd_id, fsid,
117 keyring=cephx_secret,
118 wal=wal,
119 db=db
120 )
d2e6a577
FG
121
122
123class Prepare(object):
124
125 help = 'Format an LVM device and associate it with an OSD'
126
127 def __init__(self, argv):
128 self.argv = argv
b32b8144 129 self.osd_id = None
d2e6a577 130
3efd9988 131 def get_ptuuid(self, argument):
181888fb
FG
132 uuid = disk.get_partuuid(argument)
133 if not uuid:
134 terminal.error('blkid could not detect a PARTUUID for device: %s' % argument)
3efd9988 135 raise RuntimeError('unable to use device')
181888fb
FG
136 return uuid
137
f91f0fd5 138 def setup_device(self, device_type, device_name, tags, size, slots):
3efd9988
FG
139 """
140 Check if ``device`` is an lv, if so, set the tags, making sure to
141 update the tags with the lv_uuid and lv_path which the incoming tags
142 will not have.
143
144 If the device is not a logical volume, then retrieve the partition UUID
145 by querying ``blkid``
146 """
147 if device_name is None:
148 return '', '', tags
149 tags['ceph.type'] = device_type
94b18763 150 tags['ceph.vdo'] = api.is_vdo(device_name)
f6b5b4d7
TL
151
152 try:
153 vg_name, lv_name = device_name.split('/')
a4b75251
TL
154 lv = api.get_single_lv(filters={'lv_name': lv_name,
155 'vg_name': vg_name})
f6b5b4d7
TL
156 except ValueError:
157 lv = None
158
3efd9988 159 if lv:
f91f0fd5 160 lv_uuid = lv.lv_uuid
3efd9988 161 path = lv.lv_path
f91f0fd5 162 tags['ceph.%s_uuid' % device_type] = lv_uuid
3efd9988
FG
163 tags['ceph.%s_device' % device_type] = path
164 lv.set_tags(tags)
92f5a8d4
TL
165 elif disk.is_device(device_name):
166 # We got a disk, create an lv
167 lv_type = "osd-{}".format(device_type)
f91f0fd5 168 name_uuid = system.generate_uuid()
92f5a8d4
TL
169 kwargs = {
170 'device': device_name,
171 'tags': tags,
f91f0fd5 172 'slots': slots
92f5a8d4 173 }
f91f0fd5
TL
174 #TODO use get_block_db_size and co here to get configured size in
175 #conf file
92f5a8d4 176 if size != 0:
f91f0fd5 177 kwargs['size'] = size
92f5a8d4
TL
178 lv = api.create_lv(
179 lv_type,
f91f0fd5 180 name_uuid,
92f5a8d4
TL
181 **kwargs)
182 path = lv.lv_path
183 tags['ceph.{}_device'.format(device_type)] = path
f91f0fd5
TL
184 tags['ceph.{}_uuid'.format(device_type)] = lv.lv_uuid
185 lv_uuid = lv.lv_uuid
92f5a8d4 186 lv.set_tags(tags)
3efd9988
FG
187 else:
188 # otherwise assume this is a regular disk partition
f91f0fd5 189 name_uuid = self.get_ptuuid(device_name)
3efd9988 190 path = device_name
f91f0fd5 191 tags['ceph.%s_uuid' % device_type] = name_uuid
3efd9988 192 tags['ceph.%s_device' % device_type] = path
f91f0fd5
TL
193 lv_uuid = name_uuid
194 return path, lv_uuid, tags
3efd9988 195
92f5a8d4 196 def prepare_data_device(self, device_type, osd_uuid):
3efd9988
FG
197 """
198 Check if ``arg`` is a device or partition to create an LV out of it
199 with a distinct volume group name, assigning LV tags on it and
200 ultimately, returning the logical volume object. Failing to detect
201 a device or partition will result in error.
202
203 :param arg: The value of ``--data`` when parsing args
204 :param device_type: Usually, either ``data`` or ``block`` (filestore vs. bluestore)
92f5a8d4 205 :param osd_uuid: The OSD uuid
3efd9988 206 """
92f5a8d4
TL
207 device = self.args.data
208 if disk.is_partition(device) or disk.is_device(device):
3efd9988 209 # we must create a vg, and then a single lv
92f5a8d4
TL
210 lv_name_prefix = "osd-{}".format(device_type)
211 kwargs = {'device': device,
212 'tags': {'ceph.type': device_type},
f91f0fd5 213 'slots': self.args.data_slots,
92f5a8d4
TL
214 }
215 logger.debug('data device size: {}'.format(self.args.data_size))
216 if self.args.data_size != 0:
f91f0fd5 217 kwargs['size'] = self.args.data_size
3efd9988 218 return api.create_lv(
92f5a8d4
TL
219 lv_name_prefix,
220 osd_uuid,
221 **kwargs)
3efd9988
FG
222 else:
223 error = [
92f5a8d4 224 'Cannot use device ({}).'.format(device),
b32b8144 225 'A vg/lv path or an existing device is needed']
3efd9988
FG
226 raise RuntimeError(' '.join(error))
227
92f5a8d4 228 raise RuntimeError('no data logical volume found with: {}'.format(device))
3efd9988 229
91327a77 230 def safe_prepare(self, args=None):
b32b8144
FG
231 """
232 An intermediate step between `main()` and `prepare()` so that we can
233 capture the `self.osd_id` in case we need to rollback
91327a77
AA
234
235 :param args: Injected args, usually from `lvm create` which compounds
236 both `prepare` and `create`
b32b8144 237 """
91327a77
AA
238 if args is not None:
239 self.args = args
f6b5b4d7
TL
240
241 try:
242 vgname, lvname = self.args.data.split('/')
a4b75251
TL
243 lv = api.get_single_lv(filters={'lv_name': lvname,
244 'vg_name': vgname})
f6b5b4d7
TL
245 except ValueError:
246 lv = None
247
248 if api.is_ceph_device(lv):
92f5a8d4
TL
249 logger.info("device {} is already used".format(self.args.data))
250 raise RuntimeError("skipping {}, it is already prepared".format(self.args.data))
b32b8144 251 try:
91327a77 252 self.prepare()
b32b8144 253 except Exception:
28e407b8 254 logger.exception('lvm prepare was unable to complete')
b32b8144 255 logger.info('will rollback OSD ID creation')
91327a77 256 rollback_osd(self.args, self.osd_id)
b32b8144 257 raise
91327a77
AA
258 terminal.success("ceph-volume lvm prepare successful for: %s" % self.args.data)
259
260 def get_cluster_fsid(self):
261 """
262 Allows using --cluster-fsid as an argument, but can fallback to reading
263 from ceph.conf if that is unset (the default behavior).
264 """
265 if self.args.cluster_fsid:
266 return self.args.cluster_fsid
267 else:
268 return conf.ceph.get('global', 'fsid')
b32b8144 269
d2e6a577 270 @decorators.needs_root
91327a77 271 def prepare(self):
d2e6a577
FG
272 # FIXME we don't allow re-using a keyring, we always generate one for the
273 # OSD, this needs to be fixed. This could either be a file (!) or a string
274 # (!!) or some flags that we would need to compound into a dict so that we
275 # can convert to JSON (!!!)
276 secrets = {'cephx_secret': prepare_utils.create_key()}
b32b8144 277 cephx_lockbox_secret = ''
91327a77 278 encrypted = 1 if self.args.dmcrypt else 0
b32b8144
FG
279 cephx_lockbox_secret = '' if not encrypted else prepare_utils.create_key()
280
281 if encrypted:
282 secrets['dmcrypt_key'] = encryption_utils.create_dmcrypt_key()
283 secrets['cephx_lockbox_secret'] = cephx_lockbox_secret
d2e6a577 284
91327a77
AA
285 cluster_fsid = self.get_cluster_fsid()
286
287 osd_fsid = self.args.osd_fsid or system.generate_uuid()
288 crush_device_class = self.args.crush_device_class
b32b8144
FG
289 if crush_device_class:
290 secrets['crush_device_class'] = crush_device_class
291 # reuse a given ID if it exists, otherwise create a new ID
91327a77 292 self.osd_id = prepare_utils.create_id(osd_fsid, json.dumps(secrets), osd_id=self.args.osd_id)
b32b8144
FG
293 tags = {
294 'ceph.osd_fsid': osd_fsid,
295 'ceph.osd_id': self.osd_id,
296 'ceph.cluster_fsid': cluster_fsid,
297 'ceph.cluster_name': conf.cluster,
298 'ceph.crush_device_class': crush_device_class,
e306af50 299 'ceph.osdspec_affinity': prepare_utils.get_osdspec_affinity()
b32b8144 300 }
91327a77
AA
301 if self.args.filestore:
302 if not self.args.journal:
f91f0fd5
TL
303 logger.info(('no journal was specifed, creating journal lv '
304 'on {}').format(self.args.data))
305 self.args.journal = self.args.data
306 self.args.journal_size = disk.Size(g=5)
307 # need to adjust data size/slots for colocated journal
308 if self.args.data_size:
309 self.args.data_size -= self.args.journal_size
310 if self.args.data_slots == 1:
311 self.args.data_slots = 0
312 else:
313 raise RuntimeError('Can\'t handle multiple filestore OSDs '
314 'with colocated journals yet. Please '
315 'create journal LVs manually')
316 tags['ceph.cephx_lockbox_secret'] = cephx_lockbox_secret
317 tags['ceph.encrypted'] = encrypted
318
319 journal_device, journal_uuid, tags = self.setup_device(
320 'journal',
321 self.args.journal,
322 tags,
323 self.args.journal_size,
324 self.args.journal_slots)
b5b8bbf5 325
f6b5b4d7
TL
326 try:
327 vg_name, lv_name = self.args.data.split('/')
a4b75251 328 data_lv = api.get_single_lv(filters={'lv_name': lv_name,
f6b5b4d7
TL
329 'vg_name': vg_name})
330 except ValueError:
331 data_lv = None
332
3efd9988 333 if not data_lv:
92f5a8d4 334 data_lv = self.prepare_data_device('data', osd_fsid)
181888fb 335
b32b8144
FG
336 tags['ceph.data_device'] = data_lv.lv_path
337 tags['ceph.data_uuid'] = data_lv.lv_uuid
94b18763 338 tags['ceph.vdo'] = api.is_vdo(data_lv.lv_path)
3efd9988
FG
339 tags['ceph.type'] = 'data'
340 data_lv.set_tags(tags)
f91f0fd5
TL
341 if not journal_device.startswith('/'):
342 # we got a journal lv, set rest of the tags
a4b75251
TL
343 api.get_single_lv(filters={'lv_name': lv_name,
344 'vg_name': vg_name}).set_tags(tags)
d2e6a577
FG
345
346 prepare_filestore(
347 data_lv.lv_path,
348 journal_device,
349 secrets,
b32b8144
FG
350 tags,
351 self.osd_id,
352 osd_fsid,
d2e6a577 353 )
91327a77 354 elif self.args.bluestore:
f6b5b4d7
TL
355 try:
356 vg_name, lv_name = self.args.data.split('/')
a4b75251
TL
357 block_lv = api.get_single_lv(filters={'lv_name': lv_name,
358 'vg_name': vg_name})
f6b5b4d7
TL
359 except ValueError:
360 block_lv = None
361
3efd9988 362 if not block_lv:
92f5a8d4 363 block_lv = self.prepare_data_device('block', osd_fsid)
3efd9988 364
b32b8144
FG
365 tags['ceph.block_device'] = block_lv.lv_path
366 tags['ceph.block_uuid'] = block_lv.lv_uuid
367 tags['ceph.cephx_lockbox_secret'] = cephx_lockbox_secret
368 tags['ceph.encrypted'] = encrypted
94b18763 369 tags['ceph.vdo'] = api.is_vdo(block_lv.lv_path)
3efd9988 370
92f5a8d4 371 wal_device, wal_uuid, tags = self.setup_device(
f91f0fd5
TL
372 'wal',
373 self.args.block_wal,
374 tags,
375 self.args.block_wal_size,
376 self.args.block_wal_slots)
92f5a8d4 377 db_device, db_uuid, tags = self.setup_device(
f91f0fd5
TL
378 'db',
379 self.args.block_db,
380 tags,
381 self.args.block_db_size,
382 self.args.block_db_slots)
3efd9988
FG
383
384 tags['ceph.type'] = 'block'
385 block_lv.set_tags(tags)
386
387 prepare_bluestore(
388 block_lv.lv_path,
389 wal_device,
390 db_device,
391 secrets,
b32b8144
FG
392 tags,
393 self.osd_id,
394 osd_fsid,
3efd9988 395 )
d2e6a577
FG
396
397 def main(self):
398 sub_command_help = dedent("""
399 Prepare an OSD by assigning an ID and FSID, registering them with the
400 cluster with an ID and FSID, formatting and mounting the volume, and
401 finally by adding all the metadata to the logical volumes using LVM
402 tags, so that it can later be discovered.
403
404 Once the OSD is ready, an ad-hoc systemd unit will be enabled so that
405 it can later get activated and the OSD daemon can get started.
406
b32b8144 407 Encryption is supported via dmcrypt and the --dmcrypt flag.
d2e6a577 408
91327a77 409 Existing logical volume (lv):
3efd9988 410
91327a77 411 ceph-volume lvm prepare --data {vg/lv}
3efd9988 412
92f5a8d4 413 Existing block device (a logical volume will be created):
d2e6a577 414
91327a77 415 ceph-volume lvm prepare --data /path/to/device
d2e6a577 416
92f5a8d4
TL
417 Optionally, can consume db and wal devices, partitions or logical
418 volumes. A device will get a logical volume, partitions and existing
419 logical volumes will be used as is:
d2e6a577 420
92f5a8d4 421 ceph-volume lvm prepare --data {vg/lv} --block.wal {partition} --block.db {/path/to/device}
d2e6a577
FG
422 """)
423 parser = prepare_parser(
424 prog='ceph-volume lvm prepare',
425 description=sub_command_help,
426 )
427 if len(self.argv) == 0:
428 print(sub_command_help)
429 return
3a9019d9 430 exclude_group_options(parser, argv=self.argv, groups=['filestore', 'bluestore'])
91327a77
AA
431 self.args = parser.parse_args(self.argv)
432 # the unfortunate mix of one superset for both filestore and bluestore
433 # makes this validation cumbersome
434 if self.args.filestore:
435 if not self.args.journal:
436 raise SystemExit('--journal is required when using --filestore')
3efd9988
FG
437 # Default to bluestore here since defaulting it in add_argument may
438 # cause both to be True
91327a77
AA
439 if not self.args.bluestore and not self.args.filestore:
440 self.args.bluestore = True
92f5a8d4 441 self.safe_prepare()