]> git.proxmox.com Git - ceph.git/blame - ceph/src/ceph-volume/ceph_volume/devices/lvm/prepare.py
bump version to 15.2.6-pve1
[ceph.git] / ceph / src / ceph-volume / ceph_volume / devices / lvm / prepare.py
CommitLineData
d2e6a577
FG
1from __future__ import print_function
2import json
b32b8144 3import logging
d2e6a577
FG
4from textwrap import dedent
5from ceph_volume.util import prepare as prepare_utils
b32b8144 6from ceph_volume.util import encryption as encryption_utils
181888fb 7from ceph_volume.util import system, disk
3a9019d9 8from ceph_volume.util.arg_validators import exclude_group_options
181888fb 9from ceph_volume import conf, decorators, terminal
3efd9988 10from ceph_volume.api import lvm as api
b32b8144 11from .common import prepare_parser, rollback_osd
d2e6a577
FG
12
13
b32b8144
FG
14logger = logging.getLogger(__name__)
15
16
17def prepare_dmcrypt(key, device, device_type, tags):
18 """
19 Helper for devices that are encrypted. The operations needed for
20 block, db, wal, or data/journal devices are all the same
21 """
22 if not device:
23 return ''
24 tag_name = 'ceph.%s_uuid' % device_type
25 uuid = tags[tag_name]
26 # format data device
27 encryption_utils.luks_format(
28 key,
29 device
30 )
31 encryption_utils.luks_open(
32 key,
33 device,
34 uuid
35 )
36
37 return '/dev/mapper/%s' % uuid
38
39
40def prepare_filestore(device, journal, secrets, tags, osd_id, fsid):
d2e6a577 41 """
3efd9988 42 :param device: The name of the logical volume to work with
d2e6a577
FG
43 :param journal: similar to device but can also be a regular/plain disk
44 :param secrets: A dict with the secrets needed to create the osd (e.g. cephx)
45 :param id_: The OSD id
46 :param fsid: The OSD fsid, also known as the OSD UUID
47 """
48 cephx_secret = secrets.get('cephx_secret', prepare_utils.create_key())
d2e6a577 49
b32b8144
FG
50 # encryption-only operations
51 if secrets.get('dmcrypt_key'):
52 # format and open ('decrypt' devices) and re-assign the device and journal
53 # variables so that the rest of the process can use the mapper paths
54 key = secrets['dmcrypt_key']
55 device = prepare_dmcrypt(key, device, 'data', tags)
56 journal = prepare_dmcrypt(key, journal, 'journal', tags)
57
94b18763
FG
58 # vdo detection
59 is_vdo = api.is_vdo(device)
d2e6a577 60 # create the directory
3efd9988 61 prepare_utils.create_osd_path(osd_id)
d2e6a577
FG
62 # format the device
63 prepare_utils.format_device(device)
64 # mount the data device
94b18763 65 prepare_utils.mount_osd(device, osd_id, is_vdo=is_vdo)
d2e6a577
FG
66 # symlink the journal
67 prepare_utils.link_journal(journal, osd_id)
68 # get the latest monmap
69 prepare_utils.get_monmap(osd_id)
70 # prepare the osd filesystem
1adf2230 71 prepare_utils.osd_mkfs_filestore(osd_id, fsid, cephx_secret)
d2e6a577
FG
72 # write the OSD keyring if it doesn't exist already
73 prepare_utils.write_keyring(osd_id, cephx_secret)
b32b8144
FG
74 if secrets.get('dmcrypt_key'):
75 # if the device is going to get activated right away, this can be done
76 # here, otherwise it will be recreated
77 encryption_utils.write_lockbox_keyring(
78 osd_id,
79 fsid,
80 tags['ceph.cephx_lockbox_secret']
81 )
d2e6a577
FG
82
83
b32b8144 84def prepare_bluestore(block, wal, db, secrets, tags, osd_id, fsid):
3efd9988
FG
85 """
86 :param block: The name of the logical volume for the bluestore data
87 :param wal: a regular/plain disk or logical volume, to be used for block.wal
88 :param db: a regular/plain disk or logical volume, to be used for block.db
89 :param secrets: A dict with the secrets needed to create the osd (e.g. cephx)
90 :param id_: The OSD id
91 :param fsid: The OSD fsid, also known as the OSD UUID
92 """
93 cephx_secret = secrets.get('cephx_secret', prepare_utils.create_key())
b32b8144
FG
94 # encryption-only operations
95 if secrets.get('dmcrypt_key'):
96 # If encrypted, there is no need to create the lockbox keyring file because
97 # bluestore re-creates the files and does not have support for other files
98 # like the custom lockbox one. This will need to be done on activation.
99 # format and open ('decrypt' devices) and re-assign the device and journal
100 # variables so that the rest of the process can use the mapper paths
101 key = secrets['dmcrypt_key']
102 block = prepare_dmcrypt(key, block, 'block', tags)
103 wal = prepare_dmcrypt(key, wal, 'wal', tags)
104 db = prepare_dmcrypt(key, db, 'db', tags)
3efd9988 105
3efd9988
FG
106 # create the directory
107 prepare_utils.create_osd_path(osd_id, tmpfs=True)
108 # symlink the block
109 prepare_utils.link_block(block, osd_id)
110 # get the latest monmap
111 prepare_utils.get_monmap(osd_id)
112 # write the OSD keyring if it doesn't exist already
113 prepare_utils.write_keyring(osd_id, cephx_secret)
114 # prepare the osd filesystem
115 prepare_utils.osd_mkfs_bluestore(
116 osd_id, fsid,
117 keyring=cephx_secret,
118 wal=wal,
119 db=db
120 )
d2e6a577
FG
121
122
123class Prepare(object):
124
125 help = 'Format an LVM device and associate it with an OSD'
126
127 def __init__(self, argv):
128 self.argv = argv
b32b8144 129 self.osd_id = None
d2e6a577 130
3efd9988 131 def get_ptuuid(self, argument):
181888fb
FG
132 uuid = disk.get_partuuid(argument)
133 if not uuid:
134 terminal.error('blkid could not detect a PARTUUID for device: %s' % argument)
3efd9988 135 raise RuntimeError('unable to use device')
181888fb
FG
136 return uuid
137
92f5a8d4 138 def setup_device(self, device_type, device_name, tags, size):
3efd9988
FG
139 """
140 Check if ``device`` is an lv, if so, set the tags, making sure to
141 update the tags with the lv_uuid and lv_path which the incoming tags
142 will not have.
143
144 If the device is not a logical volume, then retrieve the partition UUID
145 by querying ``blkid``
146 """
147 if device_name is None:
148 return '', '', tags
149 tags['ceph.type'] = device_type
94b18763 150 tags['ceph.vdo'] = api.is_vdo(device_name)
f6b5b4d7
TL
151
152 try:
153 vg_name, lv_name = device_name.split('/')
154 lv = api.get_first_lv(filters={'lv_name': lv_name,
155 'vg_name': vg_name})
156 except ValueError:
157 lv = None
158
3efd9988
FG
159 if lv:
160 uuid = lv.lv_uuid
161 path = lv.lv_path
162 tags['ceph.%s_uuid' % device_type] = uuid
163 tags['ceph.%s_device' % device_type] = path
164 lv.set_tags(tags)
92f5a8d4
TL
165 elif disk.is_device(device_name):
166 # We got a disk, create an lv
167 lv_type = "osd-{}".format(device_type)
168 uuid = system.generate_uuid()
169 tags['ceph.{}_uuid'.format(device_type)] = uuid
170 kwargs = {
171 'device': device_name,
172 'tags': tags,
173 }
174 if size != 0:
175 kwargs['size'] = disk.Size.parse(size)
176 lv = api.create_lv(
177 lv_type,
178 uuid,
179 **kwargs)
180 path = lv.lv_path
181 tags['ceph.{}_device'.format(device_type)] = path
182 lv.set_tags(tags)
3efd9988
FG
183 else:
184 # otherwise assume this is a regular disk partition
185 uuid = self.get_ptuuid(device_name)
186 path = device_name
187 tags['ceph.%s_uuid' % device_type] = uuid
188 tags['ceph.%s_device' % device_type] = path
189 return path, uuid, tags
190
92f5a8d4 191 def prepare_data_device(self, device_type, osd_uuid):
3efd9988
FG
192 """
193 Check if ``arg`` is a device or partition to create an LV out of it
194 with a distinct volume group name, assigning LV tags on it and
195 ultimately, returning the logical volume object. Failing to detect
196 a device or partition will result in error.
197
198 :param arg: The value of ``--data`` when parsing args
199 :param device_type: Usually, either ``data`` or ``block`` (filestore vs. bluestore)
92f5a8d4 200 :param osd_uuid: The OSD uuid
3efd9988 201 """
92f5a8d4
TL
202 device = self.args.data
203 if disk.is_partition(device) or disk.is_device(device):
3efd9988 204 # we must create a vg, and then a single lv
92f5a8d4
TL
205 lv_name_prefix = "osd-{}".format(device_type)
206 kwargs = {'device': device,
207 'tags': {'ceph.type': device_type},
208 }
209 logger.debug('data device size: {}'.format(self.args.data_size))
210 if self.args.data_size != 0:
211 kwargs['size'] = disk.Size.parse(self.args.data_size)
3efd9988 212 return api.create_lv(
92f5a8d4
TL
213 lv_name_prefix,
214 osd_uuid,
215 **kwargs)
3efd9988
FG
216 else:
217 error = [
92f5a8d4 218 'Cannot use device ({}).'.format(device),
b32b8144 219 'A vg/lv path or an existing device is needed']
3efd9988
FG
220 raise RuntimeError(' '.join(error))
221
92f5a8d4 222 raise RuntimeError('no data logical volume found with: {}'.format(device))
3efd9988 223
91327a77 224 def safe_prepare(self, args=None):
b32b8144
FG
225 """
226 An intermediate step between `main()` and `prepare()` so that we can
227 capture the `self.osd_id` in case we need to rollback
91327a77
AA
228
229 :param args: Injected args, usually from `lvm create` which compounds
230 both `prepare` and `create`
b32b8144 231 """
91327a77
AA
232 if args is not None:
233 self.args = args
f6b5b4d7
TL
234
235 try:
236 vgname, lvname = self.args.data.split('/')
237 lv = api.get_first_lv(filters={'lv_name': lvname,
238 'vg_name': vgname})
239 except ValueError:
240 lv = None
241
242 if api.is_ceph_device(lv):
92f5a8d4
TL
243 logger.info("device {} is already used".format(self.args.data))
244 raise RuntimeError("skipping {}, it is already prepared".format(self.args.data))
b32b8144 245 try:
91327a77 246 self.prepare()
b32b8144 247 except Exception:
28e407b8 248 logger.exception('lvm prepare was unable to complete')
b32b8144 249 logger.info('will rollback OSD ID creation')
91327a77 250 rollback_osd(self.args, self.osd_id)
b32b8144 251 raise
91327a77
AA
252 terminal.success("ceph-volume lvm prepare successful for: %s" % self.args.data)
253
254 def get_cluster_fsid(self):
255 """
256 Allows using --cluster-fsid as an argument, but can fallback to reading
257 from ceph.conf if that is unset (the default behavior).
258 """
259 if self.args.cluster_fsid:
260 return self.args.cluster_fsid
261 else:
262 return conf.ceph.get('global', 'fsid')
b32b8144 263
d2e6a577 264 @decorators.needs_root
91327a77 265 def prepare(self):
d2e6a577
FG
266 # FIXME we don't allow re-using a keyring, we always generate one for the
267 # OSD, this needs to be fixed. This could either be a file (!) or a string
268 # (!!) or some flags that we would need to compound into a dict so that we
269 # can convert to JSON (!!!)
270 secrets = {'cephx_secret': prepare_utils.create_key()}
b32b8144 271 cephx_lockbox_secret = ''
91327a77 272 encrypted = 1 if self.args.dmcrypt else 0
b32b8144
FG
273 cephx_lockbox_secret = '' if not encrypted else prepare_utils.create_key()
274
275 if encrypted:
276 secrets['dmcrypt_key'] = encryption_utils.create_dmcrypt_key()
277 secrets['cephx_lockbox_secret'] = cephx_lockbox_secret
d2e6a577 278
91327a77
AA
279 cluster_fsid = self.get_cluster_fsid()
280
281 osd_fsid = self.args.osd_fsid or system.generate_uuid()
282 crush_device_class = self.args.crush_device_class
b32b8144
FG
283 if crush_device_class:
284 secrets['crush_device_class'] = crush_device_class
285 # reuse a given ID if it exists, otherwise create a new ID
91327a77 286 self.osd_id = prepare_utils.create_id(osd_fsid, json.dumps(secrets), osd_id=self.args.osd_id)
b32b8144
FG
287 tags = {
288 'ceph.osd_fsid': osd_fsid,
289 'ceph.osd_id': self.osd_id,
290 'ceph.cluster_fsid': cluster_fsid,
291 'ceph.cluster_name': conf.cluster,
292 'ceph.crush_device_class': crush_device_class,
e306af50 293 'ceph.osdspec_affinity': prepare_utils.get_osdspec_affinity()
b32b8144 294 }
91327a77 295 if self.args.filestore:
92f5a8d4
TL
296 #TODO: allow auto creation of journal on passed device, only works
297 # when physical device is passed, not LV
91327a77 298 if not self.args.journal:
b5b8bbf5 299 raise RuntimeError('--journal is required when using --filestore')
b5b8bbf5 300
f6b5b4d7
TL
301 try:
302 vg_name, lv_name = self.args.data.split('/')
303 data_lv = api.get_first_lv(filters={'lv_name': lv_name,
304 'vg_name': vg_name})
305 except ValueError:
306 data_lv = None
307
3efd9988 308 if not data_lv:
92f5a8d4 309 data_lv = self.prepare_data_device('data', osd_fsid)
181888fb 310
b32b8144
FG
311 tags['ceph.data_device'] = data_lv.lv_path
312 tags['ceph.data_uuid'] = data_lv.lv_uuid
313 tags['ceph.cephx_lockbox_secret'] = cephx_lockbox_secret
314 tags['ceph.encrypted'] = encrypted
94b18763 315 tags['ceph.vdo'] = api.is_vdo(data_lv.lv_path)
3efd9988 316
91327a77 317 journal_device, journal_uuid, tags = self.setup_device(
92f5a8d4 318 'journal', self.args.journal, tags, self.args.journal_size)
3efd9988
FG
319
320 tags['ceph.type'] = 'data'
321 data_lv.set_tags(tags)
d2e6a577
FG
322
323 prepare_filestore(
324 data_lv.lv_path,
325 journal_device,
326 secrets,
b32b8144
FG
327 tags,
328 self.osd_id,
329 osd_fsid,
d2e6a577 330 )
91327a77 331 elif self.args.bluestore:
f6b5b4d7
TL
332 try:
333 vg_name, lv_name = self.args.data.split('/')
334 block_lv = api.get_first_lv(filters={'lv_name': lv_name,
335 'vg_name': vg_name})
336 except ValueError:
337 block_lv = None
338
3efd9988 339 if not block_lv:
92f5a8d4 340 block_lv = self.prepare_data_device('block', osd_fsid)
3efd9988 341
b32b8144
FG
342 tags['ceph.block_device'] = block_lv.lv_path
343 tags['ceph.block_uuid'] = block_lv.lv_uuid
344 tags['ceph.cephx_lockbox_secret'] = cephx_lockbox_secret
345 tags['ceph.encrypted'] = encrypted
94b18763 346 tags['ceph.vdo'] = api.is_vdo(block_lv.lv_path)
3efd9988 347
92f5a8d4
TL
348 wal_device, wal_uuid, tags = self.setup_device(
349 'wal', self.args.block_wal, tags, self.args.block_wal_size)
350 db_device, db_uuid, tags = self.setup_device(
351 'db', self.args.block_db, tags, self.args.block_db_size)
3efd9988
FG
352
353 tags['ceph.type'] = 'block'
354 block_lv.set_tags(tags)
355
356 prepare_bluestore(
357 block_lv.lv_path,
358 wal_device,
359 db_device,
360 secrets,
b32b8144
FG
361 tags,
362 self.osd_id,
363 osd_fsid,
3efd9988 364 )
d2e6a577
FG
365
366 def main(self):
367 sub_command_help = dedent("""
368 Prepare an OSD by assigning an ID and FSID, registering them with the
369 cluster with an ID and FSID, formatting and mounting the volume, and
370 finally by adding all the metadata to the logical volumes using LVM
371 tags, so that it can later be discovered.
372
373 Once the OSD is ready, an ad-hoc systemd unit will be enabled so that
374 it can later get activated and the OSD daemon can get started.
375
b32b8144 376 Encryption is supported via dmcrypt and the --dmcrypt flag.
d2e6a577 377
91327a77 378 Existing logical volume (lv):
3efd9988 379
91327a77 380 ceph-volume lvm prepare --data {vg/lv}
3efd9988 381
92f5a8d4 382 Existing block device (a logical volume will be created):
d2e6a577 383
91327a77 384 ceph-volume lvm prepare --data /path/to/device
d2e6a577 385
92f5a8d4
TL
386 Optionally, can consume db and wal devices, partitions or logical
387 volumes. A device will get a logical volume, partitions and existing
388 logical volumes will be used as is:
d2e6a577 389
92f5a8d4 390 ceph-volume lvm prepare --data {vg/lv} --block.wal {partition} --block.db {/path/to/device}
d2e6a577
FG
391 """)
392 parser = prepare_parser(
393 prog='ceph-volume lvm prepare',
394 description=sub_command_help,
395 )
396 if len(self.argv) == 0:
397 print(sub_command_help)
398 return
3a9019d9 399 exclude_group_options(parser, argv=self.argv, groups=['filestore', 'bluestore'])
91327a77
AA
400 self.args = parser.parse_args(self.argv)
401 # the unfortunate mix of one superset for both filestore and bluestore
402 # makes this validation cumbersome
403 if self.args.filestore:
404 if not self.args.journal:
405 raise SystemExit('--journal is required when using --filestore')
3efd9988
FG
406 # Default to bluestore here since defaulting it in add_argument may
407 # cause both to be True
91327a77
AA
408 if not self.args.bluestore and not self.args.filestore:
409 self.args.bluestore = True
92f5a8d4 410 self.safe_prepare()