]> git.proxmox.com Git - ceph.git/blame - ceph/src/ceph-volume/ceph_volume/devices/lvm/batch.py
update ceph source to reef 18.2.0
[ceph.git] / ceph / src / ceph-volume / ceph_volume / devices / lvm / batch.py
CommitLineData
1adf2230 1import argparse
f91f0fd5 2from collections import namedtuple
f6b5b4d7 3import json
f91f0fd5 4import logging
1adf2230
AA
5from textwrap import dedent
6from ceph_volume import terminal, decorators
f91f0fd5
TL
7from ceph_volume.util import disk, prompt_bool, arg_validators, templates
8from ceph_volume.util import prepare
9from . import common
10from .create import Create
11from .prepare import Prepare
1adf2230 12
91327a77
AA
13mlogger = terminal.MultiLogger(__name__)
14logger = logging.getLogger(__name__)
15
1adf2230
AA
16
17device_list_template = """
18 * {path: <25} {size: <10} {state}"""
19
20
21def device_formatter(devices):
22 lines = []
23 for path, details in devices:
24 lines.append(device_list_template.format(
25 path=path, size=details['human_readable_size'],
26 state='solid' if details['rotational'] == '0' else 'rotational')
27 )
28
29 return ''.join(lines)
30
31
05a536ef 32def ensure_disjoint_device_lists(data, db=[], wal=[]):
f91f0fd5
TL
33 # check that all device lists are disjoint with each other
34 if not all([set(data).isdisjoint(set(db)),
35 set(data).isdisjoint(set(wal)),
f91f0fd5
TL
36 set(db).isdisjoint(set(wal))]):
37 raise Exception('Device lists are not disjoint')
38
39
40def separate_devices_from_lvs(devices):
41 phys = []
42 lvm = []
43 for d in devices:
44 phys.append(d) if d.is_device else lvm.append(d)
45 return phys, lvm
46
47
48def get_physical_osds(devices, args):
49 '''
50 Goes through passed physical devices and assigns OSDs
51 '''
52 data_slots = args.osds_per_device
53 if args.data_slots:
54 data_slots = max(args.data_slots, args.osds_per_device)
20effc67 55 rel_data_size = args.data_allocate_fraction / data_slots
f91f0fd5
TL
56 mlogger.debug('relative data size: {}'.format(rel_data_size))
57 ret = []
58 for dev in devices:
59 if dev.available_lvm:
60 dev_size = dev.vg_size[0]
61 abs_size = disk.Size(b=int(dev_size * rel_data_size))
62 free_size = dev.vg_free[0]
63 for _ in range(args.osds_per_device):
64 if abs_size > free_size:
65 break
66 free_size -= abs_size.b
67 osd_id = None
68 if args.osd_ids:
69 osd_id = args.osd_ids.pop()
70 ret.append(Batch.OSD(dev.path,
71 rel_data_size,
72 abs_size,
73 args.osds_per_device,
74 osd_id,
2a845540
TL
75 'dmcrypt' if args.dmcrypt else None,
76 dev.symlink))
f91f0fd5
TL
77 return ret
78
79
80def get_lvm_osds(lvs, args):
81 '''
82 Goes through passed LVs and assigns planned osds
83 '''
84 ret = []
85 for lv in lvs:
86 if lv.used_by_ceph:
87 continue
88 osd_id = None
89 if args.osd_ids:
90 osd_id = args.osd_ids.pop()
91 osd = Batch.OSD("{}/{}".format(lv.vg_name, lv.lv_name),
92 100.0,
93 disk.Size(b=int(lv.lvs[0].lv_size)),
94 1,
95 osd_id,
96 'dmcrypt' if args.dmcrypt else None)
97 ret.append(osd)
98 return ret
99
100
101def get_physical_fast_allocs(devices, type_, fast_slots_per_device, new_osds, args):
102 requested_slots = getattr(args, '{}_slots'.format(type_))
103 if not requested_slots or requested_slots < fast_slots_per_device:
104 if requested_slots:
cd265ab1 105 mlogger.info('{}_slots argument is too small, ignoring'.format(type_))
f91f0fd5
TL
106 requested_slots = fast_slots_per_device
107
108 requested_size = getattr(args, '{}_size'.format(type_), 0)
b3b6e05e 109 if not requested_size or requested_size == 0:
f91f0fd5
TL
110 # no size argument was specified, check ceph.conf
111 get_size_fct = getattr(prepare, 'get_{}_size'.format(type_))
112 requested_size = get_size_fct(lv_format=False)
113
114 ret = []
a4b75251 115 vg_device_map = group_devices_by_vg(devices)
1e59de90 116 for vg_name, vg_devices in vg_device_map.items():
a4b75251
TL
117 for dev in vg_devices:
118 if not dev.available_lvm:
119 continue
120 # any LV present is considered a taken slot
121 occupied_slots = len(dev.lvs)
1e59de90
TL
122 # prior to v15.2.8, db/wal deployments were grouping multiple fast devices into single VGs - we need to
123 # multiply requested_slots (per device) by the number of devices in the VG in order to ensure that
124 # abs_size is calculated correctly from vg_size
125 if vg_name == 'unused_devices':
126 slots_for_vg = requested_slots
127 else:
128 if len(vg_devices) > 1:
129 slots_for_vg = len(args.devices)
130 else:
131 slots_for_vg = len(vg_devices) * requested_slots
a4b75251
TL
132 dev_size = dev.vg_size[0]
133 # this only looks at the first vg on device, unsure if there is a better
134 # way
1e59de90 135 abs_size = disk.Size(b=int(dev_size / slots_for_vg))
a4b75251
TL
136 free_size = dev.vg_free[0]
137 relative_size = int(abs_size) / dev_size
138 if requested_size:
139 if requested_size <= abs_size:
140 abs_size = requested_size
141 relative_size = int(abs_size) / dev_size
142 else:
143 mlogger.error(
144 '{} was requested for {}, but only {} can be fulfilled'.format(
145 requested_size,
146 '{}_size'.format(type_),
147 abs_size,
148 ))
149 exit(1)
150 while abs_size <= free_size and len(ret) < new_osds and occupied_slots < fast_slots_per_device:
151 free_size -= abs_size.b
152 occupied_slots += 1
153 ret.append((dev.path, relative_size, abs_size, requested_slots))
f91f0fd5
TL
154 return ret
155
a4b75251
TL
156def group_devices_by_vg(devices):
157 result = dict()
158 result['unused_devices'] = []
159 for dev in devices:
160 if len(dev.vgs) > 0:
a4b75251
TL
161 vg_name = dev.vgs[0].name
162 if vg_name in result:
163 result[vg_name].append(dev)
164 else:
165 result[vg_name] = [dev]
166 else:
167 result['unused_devices'].append(dev)
168 return result
f91f0fd5
TL
169
170def get_lvm_fast_allocs(lvs):
171 return [("{}/{}".format(d.vg_name, d.lv_name), 100.0,
172 disk.Size(b=int(d.lvs[0].lv_size)), 1) for d in lvs if not
05a536ef 173 d.journal_used_by_ceph]
1adf2230
AA
174
175
176class Batch(object):
177
178 help = 'Automatically size devices for multi-OSD provisioning with minimal interaction'
179
180 _help = dedent("""
181 Automatically size devices ready for OSD provisioning based on default strategies.
182
1adf2230
AA
183 Usage:
184
185 ceph-volume lvm batch [DEVICE...]
186
f91f0fd5 187 Devices can be physical block devices or LVs.
1adf2230
AA
188 Optional reporting on possible outcomes is enabled with --report
189
190 ceph-volume lvm batch --report [DEVICE...]
191 """)
192
193 def __init__(self, argv):
1adf2230
AA
194 parser = argparse.ArgumentParser(
195 prog='ceph-volume lvm batch',
196 formatter_class=argparse.RawDescriptionHelpFormatter,
f91f0fd5 197 description=self._help,
1adf2230
AA
198 )
199
200 parser.add_argument(
201 'devices',
202 metavar='DEVICES',
203 nargs='*',
33c7a0ef 204 type=arg_validators.ValidBatchDataDevice(),
1adf2230
AA
205 default=[],
206 help='Devices to provision OSDs',
207 )
11fdf7f2
TL
208 parser.add_argument(
209 '--db-devices',
210 nargs='*',
f91f0fd5 211 type=arg_validators.ValidBatchDevice(),
11fdf7f2
TL
212 default=[],
213 help='Devices to provision OSDs db volumes',
214 )
215 parser.add_argument(
216 '--wal-devices',
217 nargs='*',
f91f0fd5 218 type=arg_validators.ValidBatchDevice(),
11fdf7f2
TL
219 default=[],
220 help='Devices to provision OSDs wal volumes',
221 )
11fdf7f2 222 parser.add_argument(
f91f0fd5 223 '--auto',
11fdf7f2 224 action='store_true',
f91f0fd5
TL
225 help=('deploy multi-device OSDs if rotational and non-rotational drives '
226 'are passed in DEVICES'),
227 default=True
228 )
229 parser.add_argument(
230 '--no-auto',
231 action='store_false',
232 dest='auto',
11fdf7f2
TL
233 help=('deploy standalone OSDs if rotational and non-rotational drives '
234 'are passed in DEVICES'),
235 )
1adf2230
AA
236 parser.add_argument(
237 '--bluestore',
238 action='store_true',
239 help='bluestore objectstore (default)',
240 )
1adf2230
AA
241 parser.add_argument(
242 '--report',
243 action='store_true',
f91f0fd5 244 help='Only report on OSD that would be created and exit',
1adf2230
AA
245 )
246 parser.add_argument(
247 '--yes',
248 action='store_true',
249 help='Avoid prompting for confirmation when provisioning',
250 )
251 parser.add_argument(
252 '--format',
253 help='output format, defaults to "pretty"',
254 default='pretty',
f91f0fd5 255 choices=['json', 'json-pretty', 'pretty'],
1adf2230
AA
256 )
257 parser.add_argument(
258 '--dmcrypt',
259 action='store_true',
260 help='Enable device encryption via dm-crypt',
261 )
262 parser.add_argument(
263 '--crush-device-class',
264 dest='crush_device_class',
265 help='Crush device class to assign this OSD to',
20effc67 266 default=""
1adf2230
AA
267 )
268 parser.add_argument(
269 '--no-systemd',
270 dest='no_systemd',
271 action='store_true',
272 help='Skip creating and enabling systemd units and starting OSD services',
273 )
91327a77
AA
274 parser.add_argument(
275 '--osds-per-device',
276 type=int,
277 default=1,
278 help='Provision more than 1 (the default) OSD per device',
279 )
280 parser.add_argument(
f91f0fd5 281 '--data-slots',
91327a77 282 type=int,
f91f0fd5
TL
283 help=('Provision more than 1 (the default) OSD slot per device'
284 ' if more slots then osds-per-device are specified, slots'
285 'will stay unoccupied'),
286 )
20effc67
TL
287 parser.add_argument(
288 '--data-allocate-fraction',
289 type=arg_validators.ValidFraction(),
290 help='Fraction to allocate from data device (0,1.0]',
291 default=1.0
292 )
f91f0fd5
TL
293 parser.add_argument(
294 '--block-db-size',
295 type=disk.Size.parse,
91327a77
AA
296 help='Set (or override) the "bluestore_block_db_size" value, in bytes'
297 )
11fdf7f2 298 parser.add_argument(
f91f0fd5 299 '--block-db-slots',
11fdf7f2 300 type=int,
f91f0fd5
TL
301 help='Provision slots on DB device, can remain unoccupied'
302 )
303 parser.add_argument(
304 '--block-wal-size',
305 type=disk.Size.parse,
11fdf7f2
TL
306 help='Set (or override) the "bluestore_block_wal_size" value, in bytes'
307 )
91327a77 308 parser.add_argument(
f91f0fd5 309 '--block-wal-slots',
91327a77 310 type=int,
f91f0fd5
TL
311 help='Provision slots on WAL device, can remain unoccupied'
312 )
91327a77
AA
313 parser.add_argument(
314 '--prepare',
315 action='store_true',
316 help='Only prepare all OSDs, do not activate',
317 )
11fdf7f2
TL
318 parser.add_argument(
319 '--osd-ids',
320 nargs='*',
321 default=[],
322 help='Reuse existing OSD ids',
20effc67 323 type=arg_validators.valid_osd_id
11fdf7f2
TL
324 )
325 self.args = parser.parse_args(argv)
326 self.parser = parser
05a536ef 327 for dev_list in ['', 'db_', 'wal_']:
11fdf7f2
TL
328 setattr(self, '{}usable'.format(dev_list), [])
329
f91f0fd5
TL
330 def report(self, plan):
331 report = self._create_report(plan)
332 print(report)
11fdf7f2 333
f91f0fd5 334 def _create_report(self, plan):
11fdf7f2 335 if self.args.format == 'pretty':
f91f0fd5
TL
336 report = ''
337 report += templates.total_osds.format(total_osds=len(plan))
338
339 report += templates.osd_component_titles
340 for osd in plan:
341 report += templates.osd_header
342 report += osd.report()
343 return report
11fdf7f2 344 else:
f91f0fd5
TL
345 json_report = []
346 for osd in plan:
347 json_report.append(osd.report_json())
348 if self.args.format == 'json':
349 return json.dumps(json_report)
350 elif self.args.format == 'json-pretty':
351 return json.dumps(json_report, indent=4,
352 sort_keys=True)
353
354 def _check_slot_args(self):
355 '''
356 checking if -slots args are consistent with other arguments
357 '''
358 if self.args.data_slots and self.args.osds_per_device:
359 if self.args.data_slots < self.args.osds_per_device:
360 raise ValueError('data_slots is smaller then osds_per_device')
361
362 def _sort_rotational_disks(self):
363 '''
364 Helper for legacy auto behaviour.
365 Sorts drives into rotating and non-rotating, the latter being used for
05a536ef 366 db.
f91f0fd5
TL
367 '''
368 mlogger.warning('DEPRECATION NOTICE')
369 mlogger.warning('You are using the legacy automatic disk sorting behavior')
370 mlogger.warning('The Pacific release will change the default to --no-auto')
371 rotating = []
372 ssd = []
373 for d in self.args.devices:
374 rotating.append(d) if d.rotational else ssd.append(d)
375 if ssd and not rotating:
376 # no need for additional sorting, we'll only deploy standalone on ssds
377 return
378 self.args.devices = rotating
05a536ef 379 self.args.db_devices = ssd
11fdf7f2
TL
380
381 @decorators.needs_root
382 def main(self):
383 if not self.args.devices:
384 return self.parser.print_help()
1adf2230
AA
385
386 # Default to bluestore here since defaulting it in add_argument may
387 # cause both to be True
05a536ef 388 if not self.args.bluestore:
11fdf7f2 389 self.args.bluestore = True
1adf2230 390
f91f0fd5 391 if (self.args.auto and not self.args.db_devices and not
05a536ef 392 self.args.wal_devices):
f91f0fd5
TL
393 self._sort_rotational_disks()
394
395 self._check_slot_args()
396
397 ensure_disjoint_device_lists(self.args.devices,
398 self.args.db_devices,
05a536ef 399 self.args.wal_devices)
f91f0fd5
TL
400
401 plan = self.get_plan(self.args)
11fdf7f2
TL
402
403 if self.args.report:
f91f0fd5
TL
404 self.report(plan)
405 return 0
11fdf7f2 406
f91f0fd5
TL
407 if not self.args.yes:
408 self.report(plan)
409 terminal.info('The above OSDs would be created if the operation continues')
410 if not prompt_bool('do you want to proceed? (yes/no)'):
411 terminal.error('aborting OSD provisioning')
412 raise SystemExit(0)
11fdf7f2 413
f91f0fd5
TL
414 self._execute(plan)
415
416 def _execute(self, plan):
417 defaults = common.get_default_args()
418 global_args = [
419 'bluestore',
f91f0fd5
TL
420 'dmcrypt',
421 'crush_device_class',
422 'no_systemd',
423 ]
424 defaults.update({arg: getattr(self.args, arg) for arg in global_args})
425 for osd in plan:
426 args = osd.get_args(defaults)
427 if self.args.prepare:
428 p = Prepare([])
429 p.safe_prepare(argparse.Namespace(**args))
430 else:
431 c = Create([])
432 c.create(argparse.Namespace(**args))
433
434
435 def get_plan(self, args):
436 if args.bluestore:
437 plan = self.get_deployment_layout(args, args.devices, args.db_devices,
438 args.wal_devices)
f91f0fd5
TL
439 return plan
440
441 def get_deployment_layout(self, args, devices, fast_devices=[],
442 very_fast_devices=[]):
443 '''
444 The methods here are mostly just organization, error reporting and
445 setting up of (default) args. The heavy lifting code for the deployment
446 layout can be found in the static get_*_osds and get_*_fast_allocs
447 functions.
448 '''
449 plan = []
450 phys_devs, lvm_devs = separate_devices_from_lvs(devices)
451 mlogger.debug(('passed data devices: {} physical,'
452 ' {} LVM').format(len(phys_devs), len(lvm_devs)))
453
454 plan.extend(get_physical_osds(phys_devs, args))
455
456 plan.extend(get_lvm_osds(lvm_devs, args))
457
458 num_osds = len(plan)
459 if num_osds == 0:
460 mlogger.info('All data devices are unavailable')
461 return plan
462 requested_osds = args.osds_per_device * len(phys_devs) + len(lvm_devs)
463
05a536ef
TL
464 if args.bluestore:
465 fast_type = 'block_db'
f91f0fd5
TL
466 fast_allocations = self.fast_allocations(fast_devices,
467 requested_osds,
468 num_osds,
469 fast_type)
470 if fast_devices and not fast_allocations:
471 mlogger.info('{} fast devices were passed, but none are available'.format(len(fast_devices)))
472 return []
473 if fast_devices and not len(fast_allocations) == num_osds:
474 mlogger.error('{} fast allocations != {} num_osds'.format(
475 len(fast_allocations), num_osds))
476 exit(1)
477
478 very_fast_allocations = self.fast_allocations(very_fast_devices,
479 requested_osds,
480 num_osds,
481 'block_wal')
482 if very_fast_devices and not very_fast_allocations:
483 mlogger.info('{} very fast devices were passed, but none are available'.format(len(very_fast_devices)))
484 return []
485 if very_fast_devices and not len(very_fast_allocations) == num_osds:
486 mlogger.error('{} very fast allocations != {} num_osds'.format(
487 len(very_fast_allocations), num_osds))
488 exit(1)
489
490 for osd in plan:
491 if fast_devices:
492 osd.add_fast_device(*fast_allocations.pop(),
493 type_=fast_type)
494 if very_fast_devices and args.bluestore:
495 osd.add_very_fast_device(*very_fast_allocations.pop())
496 return plan
497
498 def fast_allocations(self, devices, requested_osds, new_osds, type_):
499 ret = []
500 if not devices:
501 return ret
502 phys_devs, lvm_devs = separate_devices_from_lvs(devices)
503 mlogger.debug(('passed {} devices: {} physical,'
504 ' {} LVM').format(type_, len(phys_devs), len(lvm_devs)))
505
506 ret.extend(get_lvm_fast_allocs(lvm_devs))
507
508 # fill up uneven distributions across fast devices: 5 osds and 2 fast
509 # devices? create 3 slots on each device rather then deploying
510 # heterogeneous osds
522d829b
TL
511 slot_divider = max(1, len(phys_devs))
512 if (requested_osds - len(lvm_devs)) % slot_divider:
513 fast_slots_per_device = int((requested_osds - len(lvm_devs)) / slot_divider) + 1
f91f0fd5 514 else:
522d829b 515 fast_slots_per_device = int((requested_osds - len(lvm_devs)) / slot_divider)
f91f0fd5
TL
516
517
518 ret.extend(get_physical_fast_allocs(phys_devs,
519 type_,
520 fast_slots_per_device,
521 new_osds,
522 self.args))
523 return ret
524
525 class OSD(object):
526 '''
527 This class simply stores info about to-be-deployed OSDs and provides an
528 easy way to retrieve the necessary create arguments.
529 '''
530 VolSpec = namedtuple('VolSpec',
531 ['path',
532 'rel_size',
533 'abs_size',
534 'slots',
535 'type_'])
536
537 def __init__(self,
538 data_path,
539 rel_size,
540 abs_size,
541 slots,
542 id_,
2a845540
TL
543 encryption,
544 symlink=None):
f91f0fd5
TL
545 self.id_ = id_
546 self.data = self.VolSpec(path=data_path,
547 rel_size=rel_size,
548 abs_size=abs_size,
549 slots=slots,
550 type_='data')
551 self.fast = None
552 self.very_fast = None
553 self.encryption = encryption
2a845540 554 self.symlink = symlink
f91f0fd5
TL
555
556 def add_fast_device(self, path, rel_size, abs_size, slots, type_):
557 self.fast = self.VolSpec(path=path,
558 rel_size=rel_size,
559 abs_size=abs_size,
560 slots=slots,
561 type_=type_)
562
563 def add_very_fast_device(self, path, rel_size, abs_size, slots):
564 self.very_fast = self.VolSpec(path=path,
565 rel_size=rel_size,
566 abs_size=abs_size,
567 slots=slots,
568 type_='block_wal')
569
570 def _get_osd_plan(self):
571 plan = {
572 'data': self.data.path,
573 'data_size': self.data.abs_size,
574 'encryption': self.encryption,
575 }
576 if self.fast:
577 type_ = self.fast.type_.replace('.', '_')
578 plan.update(
579 {
580 type_: self.fast.path,
581 '{}_size'.format(type_): self.fast.abs_size,
582 })
583 if self.very_fast:
584 plan.update(
585 {
586 'block_wal': self.very_fast.path,
587 'block_wal_size': self.very_fast.abs_size,
588 })
589 if self.id_:
590 plan.update({'osd_id': self.id_})
591 return plan
592
593 def get_args(self, defaults):
594 my_defaults = defaults.copy()
595 my_defaults.update(self._get_osd_plan())
596 return my_defaults
597
598 def report(self):
599 report = ''
600 if self.id_:
601 report += templates.osd_reused_id.format(
602 id_=self.id_)
603 if self.encryption:
604 report += templates.osd_encryption.format(
605 enc=self.encryption)
2a845540
TL
606 path = self.data.path
607 if self.symlink:
608 path = f'{self.symlink} -> {self.data.path}'
f91f0fd5
TL
609 report += templates.osd_component.format(
610 _type=self.data.type_,
2a845540 611 path=path,
f91f0fd5
TL
612 size=self.data.abs_size,
613 percent=self.data.rel_size)
614 if self.fast:
615 report += templates.osd_component.format(
616 _type=self.fast.type_,
617 path=self.fast.path,
618 size=self.fast.abs_size,
619 percent=self.fast.rel_size)
620 if self.very_fast:
621 report += templates.osd_component.format(
622 _type=self.very_fast.type_,
623 path=self.very_fast.path,
624 size=self.very_fast.abs_size,
625 percent=self.very_fast.rel_size)
626 return report
627
628 def report_json(self):
629 # cast all values to string so that the report can be dumped in to
630 # json.dumps
631 return {k: str(v) for k, v in self._get_osd_plan().items()}