]> git.proxmox.com Git - ceph.git/blob - ceph/src/ceph-volume/ceph_volume/devices/lvm/batch.py
9ed2bf2fccf43d2a93bb99dfc9b02c4001bda590
[ceph.git] / ceph / src / ceph-volume / ceph_volume / devices / lvm / batch.py
1 import argparse
2 from collections import namedtuple
3 import json
4 import logging
5 from textwrap import dedent
6 from ceph_volume import terminal, decorators
7 from ceph_volume.util import disk, prompt_bool, arg_validators, templates
8 from ceph_volume.util import prepare
9 from . import common
10 from .create import Create
11 from .prepare import Prepare
12
13 mlogger = terminal.MultiLogger(__name__)
14 logger = logging.getLogger(__name__)
15
16
17 device_list_template = """
18 * {path: <25} {size: <10} {state}"""
19
20
21 def device_formatter(devices):
22 lines = []
23 for path, details in devices:
24 lines.append(device_list_template.format(
25 path=path, size=details['human_readable_size'],
26 state='solid' if details['rotational'] == '0' else 'rotational')
27 )
28
29 return ''.join(lines)
30
31
32 def ensure_disjoint_device_lists(data, db=[], wal=[], journal=[]):
33 # check that all device lists are disjoint with each other
34 if not all([set(data).isdisjoint(set(db)),
35 set(data).isdisjoint(set(wal)),
36 set(data).isdisjoint(set(journal)),
37 set(db).isdisjoint(set(wal))]):
38 raise Exception('Device lists are not disjoint')
39
40
41 def separate_devices_from_lvs(devices):
42 phys = []
43 lvm = []
44 for d in devices:
45 phys.append(d) if d.is_device else lvm.append(d)
46 return phys, lvm
47
48
49 def get_physical_osds(devices, args):
50 '''
51 Goes through passed physical devices and assigns OSDs
52 '''
53 data_slots = args.osds_per_device
54 if args.data_slots:
55 data_slots = max(args.data_slots, args.osds_per_device)
56 rel_data_size = args.data_allocate_fraction / data_slots
57 mlogger.debug('relative data size: {}'.format(rel_data_size))
58 ret = []
59 for dev in devices:
60 if dev.available_lvm:
61 dev_size = dev.vg_size[0]
62 abs_size = disk.Size(b=int(dev_size * rel_data_size))
63 free_size = dev.vg_free[0]
64 for _ in range(args.osds_per_device):
65 if abs_size > free_size:
66 break
67 free_size -= abs_size.b
68 osd_id = None
69 if args.osd_ids:
70 osd_id = args.osd_ids.pop()
71 ret.append(Batch.OSD(dev.path,
72 rel_data_size,
73 abs_size,
74 args.osds_per_device,
75 osd_id,
76 'dmcrypt' if args.dmcrypt else None,
77 dev.symlink))
78 return ret
79
80
81 def get_lvm_osds(lvs, args):
82 '''
83 Goes through passed LVs and assigns planned osds
84 '''
85 ret = []
86 for lv in lvs:
87 if lv.used_by_ceph:
88 continue
89 osd_id = None
90 if args.osd_ids:
91 osd_id = args.osd_ids.pop()
92 osd = Batch.OSD("{}/{}".format(lv.vg_name, lv.lv_name),
93 100.0,
94 disk.Size(b=int(lv.lvs[0].lv_size)),
95 1,
96 osd_id,
97 'dmcrypt' if args.dmcrypt else None)
98 ret.append(osd)
99 return ret
100
101
102 def get_physical_fast_allocs(devices, type_, fast_slots_per_device, new_osds, args):
103 requested_slots = getattr(args, '{}_slots'.format(type_))
104 if not requested_slots or requested_slots < fast_slots_per_device:
105 if requested_slots:
106 mlogger.info('{}_slots argument is too small, ignoring'.format(type_))
107 requested_slots = fast_slots_per_device
108
109 requested_size = getattr(args, '{}_size'.format(type_), 0)
110 if not requested_size or requested_size == 0:
111 # no size argument was specified, check ceph.conf
112 get_size_fct = getattr(prepare, 'get_{}_size'.format(type_))
113 requested_size = get_size_fct(lv_format=False)
114
115 ret = []
116 vg_device_map = group_devices_by_vg(devices)
117 for vg_name, vg_devices in vg_device_map.items():
118 for dev in vg_devices:
119 if not dev.available_lvm:
120 continue
121 # any LV present is considered a taken slot
122 occupied_slots = len(dev.lvs)
123 # prior to v15.2.8, db/wal deployments were grouping multiple fast devices into single VGs - we need to
124 # multiply requested_slots (per device) by the number of devices in the VG in order to ensure that
125 # abs_size is calculated correctly from vg_size
126 if vg_name == 'unused_devices':
127 slots_for_vg = requested_slots
128 else:
129 if len(vg_devices) > 1:
130 slots_for_vg = len(args.devices)
131 else:
132 slots_for_vg = len(vg_devices) * requested_slots
133 dev_size = dev.vg_size[0]
134 # this only looks at the first vg on device, unsure if there is a better
135 # way
136 abs_size = disk.Size(b=int(dev_size / slots_for_vg))
137 free_size = dev.vg_free[0]
138 relative_size = int(abs_size) / dev_size
139 if requested_size:
140 if requested_size <= abs_size:
141 abs_size = requested_size
142 relative_size = int(abs_size) / dev_size
143 else:
144 mlogger.error(
145 '{} was requested for {}, but only {} can be fulfilled'.format(
146 requested_size,
147 '{}_size'.format(type_),
148 abs_size,
149 ))
150 exit(1)
151 while abs_size <= free_size and len(ret) < new_osds and occupied_slots < fast_slots_per_device:
152 free_size -= abs_size.b
153 occupied_slots += 1
154 ret.append((dev.path, relative_size, abs_size, requested_slots))
155 return ret
156
157 def group_devices_by_vg(devices):
158 result = dict()
159 result['unused_devices'] = []
160 for dev in devices:
161 if len(dev.vgs) > 0:
162 vg_name = dev.vgs[0].name
163 if vg_name in result:
164 result[vg_name].append(dev)
165 else:
166 result[vg_name] = [dev]
167 else:
168 result['unused_devices'].append(dev)
169 return result
170
171 def get_lvm_fast_allocs(lvs):
172 return [("{}/{}".format(d.vg_name, d.lv_name), 100.0,
173 disk.Size(b=int(d.lvs[0].lv_size)), 1) for d in lvs if not
174 d.used_by_ceph]
175
176
177 class Batch(object):
178
179 help = 'Automatically size devices for multi-OSD provisioning with minimal interaction'
180
181 _help = dedent("""
182 Automatically size devices ready for OSD provisioning based on default strategies.
183
184 Usage:
185
186 ceph-volume lvm batch [DEVICE...]
187
188 Devices can be physical block devices or LVs.
189 Optional reporting on possible outcomes is enabled with --report
190
191 ceph-volume lvm batch --report [DEVICE...]
192 """)
193
194 def __init__(self, argv):
195 parser = argparse.ArgumentParser(
196 prog='ceph-volume lvm batch',
197 formatter_class=argparse.RawDescriptionHelpFormatter,
198 description=self._help,
199 )
200
201 parser.add_argument(
202 'devices',
203 metavar='DEVICES',
204 nargs='*',
205 type=arg_validators.ValidBatchDataDevice(),
206 default=[],
207 help='Devices to provision OSDs',
208 )
209 parser.add_argument(
210 '--db-devices',
211 nargs='*',
212 type=arg_validators.ValidBatchDevice(),
213 default=[],
214 help='Devices to provision OSDs db volumes',
215 )
216 parser.add_argument(
217 '--wal-devices',
218 nargs='*',
219 type=arg_validators.ValidBatchDevice(),
220 default=[],
221 help='Devices to provision OSDs wal volumes',
222 )
223 parser.add_argument(
224 '--journal-devices',
225 nargs='*',
226 type=arg_validators.ValidBatchDevice(),
227 default=[],
228 help='Devices to provision OSDs journal volumes',
229 )
230 parser.add_argument(
231 '--auto',
232 action='store_true',
233 help=('deploy multi-device OSDs if rotational and non-rotational drives '
234 'are passed in DEVICES'),
235 default=True
236 )
237 parser.add_argument(
238 '--no-auto',
239 action='store_false',
240 dest='auto',
241 help=('deploy standalone OSDs if rotational and non-rotational drives '
242 'are passed in DEVICES'),
243 )
244 parser.add_argument(
245 '--bluestore',
246 action='store_true',
247 help='bluestore objectstore (default)',
248 )
249 parser.add_argument(
250 '--filestore',
251 action='store_true',
252 help='filestore objectstore',
253 )
254 parser.add_argument(
255 '--report',
256 action='store_true',
257 help='Only report on OSD that would be created and exit',
258 )
259 parser.add_argument(
260 '--yes',
261 action='store_true',
262 help='Avoid prompting for confirmation when provisioning',
263 )
264 parser.add_argument(
265 '--format',
266 help='output format, defaults to "pretty"',
267 default='pretty',
268 choices=['json', 'json-pretty', 'pretty'],
269 )
270 parser.add_argument(
271 '--dmcrypt',
272 action='store_true',
273 help='Enable device encryption via dm-crypt',
274 )
275 parser.add_argument(
276 '--crush-device-class',
277 dest='crush_device_class',
278 help='Crush device class to assign this OSD to',
279 default=""
280 )
281 parser.add_argument(
282 '--no-systemd',
283 dest='no_systemd',
284 action='store_true',
285 help='Skip creating and enabling systemd units and starting OSD services',
286 )
287 parser.add_argument(
288 '--osds-per-device',
289 type=int,
290 default=1,
291 help='Provision more than 1 (the default) OSD per device',
292 )
293 parser.add_argument(
294 '--data-slots',
295 type=int,
296 help=('Provision more than 1 (the default) OSD slot per device'
297 ' if more slots then osds-per-device are specified, slots'
298 'will stay unoccupied'),
299 )
300 parser.add_argument(
301 '--data-allocate-fraction',
302 type=arg_validators.ValidFraction(),
303 help='Fraction to allocate from data device (0,1.0]',
304 default=1.0
305 )
306 parser.add_argument(
307 '--block-db-size',
308 type=disk.Size.parse,
309 help='Set (or override) the "bluestore_block_db_size" value, in bytes'
310 )
311 parser.add_argument(
312 '--block-db-slots',
313 type=int,
314 help='Provision slots on DB device, can remain unoccupied'
315 )
316 parser.add_argument(
317 '--block-wal-size',
318 type=disk.Size.parse,
319 help='Set (or override) the "bluestore_block_wal_size" value, in bytes'
320 )
321 parser.add_argument(
322 '--block-wal-slots',
323 type=int,
324 help='Provision slots on WAL device, can remain unoccupied'
325 )
326 def journal_size_in_mb_hack(size):
327 # TODO give user time to adjust, then remove this
328 if size and size[-1].isdigit():
329 mlogger.warning('DEPRECATION NOTICE')
330 mlogger.warning('--journal-size as integer is parsed as megabytes')
331 mlogger.warning('A future release will parse integers as bytes')
332 mlogger.warning('Add a "M" to explicitly pass a megabyte size')
333 size += 'M'
334 return disk.Size.parse(size)
335 parser.add_argument(
336 '--journal-size',
337 type=journal_size_in_mb_hack,
338 help='Override the "osd_journal_size" value, in megabytes'
339 )
340 parser.add_argument(
341 '--journal-slots',
342 type=int,
343 help='Provision slots on journal device, can remain unoccupied'
344 )
345 parser.add_argument(
346 '--prepare',
347 action='store_true',
348 help='Only prepare all OSDs, do not activate',
349 )
350 parser.add_argument(
351 '--osd-ids',
352 nargs='*',
353 default=[],
354 help='Reuse existing OSD ids',
355 type=arg_validators.valid_osd_id
356 )
357 self.args = parser.parse_args(argv)
358 self.parser = parser
359 for dev_list in ['', 'db_', 'wal_', 'journal_']:
360 setattr(self, '{}usable'.format(dev_list), [])
361
362 def report(self, plan):
363 report = self._create_report(plan)
364 print(report)
365
366 def _create_report(self, plan):
367 if self.args.format == 'pretty':
368 report = ''
369 report += templates.total_osds.format(total_osds=len(plan))
370
371 report += templates.osd_component_titles
372 for osd in plan:
373 report += templates.osd_header
374 report += osd.report()
375 return report
376 else:
377 json_report = []
378 for osd in plan:
379 json_report.append(osd.report_json())
380 if self.args.format == 'json':
381 return json.dumps(json_report)
382 elif self.args.format == 'json-pretty':
383 return json.dumps(json_report, indent=4,
384 sort_keys=True)
385
386 def _check_slot_args(self):
387 '''
388 checking if -slots args are consistent with other arguments
389 '''
390 if self.args.data_slots and self.args.osds_per_device:
391 if self.args.data_slots < self.args.osds_per_device:
392 raise ValueError('data_slots is smaller then osds_per_device')
393
394 def _sort_rotational_disks(self):
395 '''
396 Helper for legacy auto behaviour.
397 Sorts drives into rotating and non-rotating, the latter being used for
398 db or journal.
399 '''
400 mlogger.warning('DEPRECATION NOTICE')
401 mlogger.warning('You are using the legacy automatic disk sorting behavior')
402 mlogger.warning('The Pacific release will change the default to --no-auto')
403 rotating = []
404 ssd = []
405 for d in self.args.devices:
406 rotating.append(d) if d.rotational else ssd.append(d)
407 if ssd and not rotating:
408 # no need for additional sorting, we'll only deploy standalone on ssds
409 return
410 self.args.devices = rotating
411 if self.args.filestore:
412 self.args.journal_devices = ssd
413 else:
414 self.args.db_devices = ssd
415
416 @decorators.needs_root
417 def main(self):
418 if not self.args.devices:
419 return self.parser.print_help()
420
421 # Default to bluestore here since defaulting it in add_argument may
422 # cause both to be True
423 if not self.args.bluestore and not self.args.filestore:
424 self.args.bluestore = True
425
426 if (self.args.auto and not self.args.db_devices and not
427 self.args.wal_devices and not self.args.journal_devices):
428 self._sort_rotational_disks()
429
430 self._check_slot_args()
431
432 ensure_disjoint_device_lists(self.args.devices,
433 self.args.db_devices,
434 self.args.wal_devices,
435 self.args.journal_devices)
436
437 plan = self.get_plan(self.args)
438
439 if self.args.report:
440 self.report(plan)
441 return 0
442
443 if not self.args.yes:
444 self.report(plan)
445 terminal.info('The above OSDs would be created if the operation continues')
446 if not prompt_bool('do you want to proceed? (yes/no)'):
447 terminal.error('aborting OSD provisioning')
448 raise SystemExit(0)
449
450 self._execute(plan)
451
452 def _execute(self, plan):
453 defaults = common.get_default_args()
454 global_args = [
455 'bluestore',
456 'filestore',
457 'dmcrypt',
458 'crush_device_class',
459 'no_systemd',
460 ]
461 defaults.update({arg: getattr(self.args, arg) for arg in global_args})
462 for osd in plan:
463 args = osd.get_args(defaults)
464 if self.args.prepare:
465 p = Prepare([])
466 p.safe_prepare(argparse.Namespace(**args))
467 else:
468 c = Create([])
469 c.create(argparse.Namespace(**args))
470
471
472 def get_plan(self, args):
473 if args.bluestore:
474 plan = self.get_deployment_layout(args, args.devices, args.db_devices,
475 args.wal_devices)
476 elif args.filestore:
477 plan = self.get_deployment_layout(args, args.devices, args.journal_devices)
478 return plan
479
480 def get_deployment_layout(self, args, devices, fast_devices=[],
481 very_fast_devices=[]):
482 '''
483 The methods here are mostly just organization, error reporting and
484 setting up of (default) args. The heavy lifting code for the deployment
485 layout can be found in the static get_*_osds and get_*_fast_allocs
486 functions.
487 '''
488 plan = []
489 phys_devs, lvm_devs = separate_devices_from_lvs(devices)
490 mlogger.debug(('passed data devices: {} physical,'
491 ' {} LVM').format(len(phys_devs), len(lvm_devs)))
492
493 plan.extend(get_physical_osds(phys_devs, args))
494
495 plan.extend(get_lvm_osds(lvm_devs, args))
496
497 num_osds = len(plan)
498 if num_osds == 0:
499 mlogger.info('All data devices are unavailable')
500 return plan
501 requested_osds = args.osds_per_device * len(phys_devs) + len(lvm_devs)
502
503 fast_type = 'block_db' if args.bluestore else 'journal'
504 fast_allocations = self.fast_allocations(fast_devices,
505 requested_osds,
506 num_osds,
507 fast_type)
508 if fast_devices and not fast_allocations:
509 mlogger.info('{} fast devices were passed, but none are available'.format(len(fast_devices)))
510 return []
511 if fast_devices and not len(fast_allocations) == num_osds:
512 mlogger.error('{} fast allocations != {} num_osds'.format(
513 len(fast_allocations), num_osds))
514 exit(1)
515
516 very_fast_allocations = self.fast_allocations(very_fast_devices,
517 requested_osds,
518 num_osds,
519 'block_wal')
520 if very_fast_devices and not very_fast_allocations:
521 mlogger.info('{} very fast devices were passed, but none are available'.format(len(very_fast_devices)))
522 return []
523 if very_fast_devices and not len(very_fast_allocations) == num_osds:
524 mlogger.error('{} very fast allocations != {} num_osds'.format(
525 len(very_fast_allocations), num_osds))
526 exit(1)
527
528 for osd in plan:
529 if fast_devices:
530 osd.add_fast_device(*fast_allocations.pop(),
531 type_=fast_type)
532 if very_fast_devices and args.bluestore:
533 osd.add_very_fast_device(*very_fast_allocations.pop())
534 return plan
535
536 def fast_allocations(self, devices, requested_osds, new_osds, type_):
537 ret = []
538 if not devices:
539 return ret
540 phys_devs, lvm_devs = separate_devices_from_lvs(devices)
541 mlogger.debug(('passed {} devices: {} physical,'
542 ' {} LVM').format(type_, len(phys_devs), len(lvm_devs)))
543
544 ret.extend(get_lvm_fast_allocs(lvm_devs))
545
546 # fill up uneven distributions across fast devices: 5 osds and 2 fast
547 # devices? create 3 slots on each device rather then deploying
548 # heterogeneous osds
549 slot_divider = max(1, len(phys_devs))
550 if (requested_osds - len(lvm_devs)) % slot_divider:
551 fast_slots_per_device = int((requested_osds - len(lvm_devs)) / slot_divider) + 1
552 else:
553 fast_slots_per_device = int((requested_osds - len(lvm_devs)) / slot_divider)
554
555
556 ret.extend(get_physical_fast_allocs(phys_devs,
557 type_,
558 fast_slots_per_device,
559 new_osds,
560 self.args))
561 return ret
562
563 class OSD(object):
564 '''
565 This class simply stores info about to-be-deployed OSDs and provides an
566 easy way to retrieve the necessary create arguments.
567 '''
568 VolSpec = namedtuple('VolSpec',
569 ['path',
570 'rel_size',
571 'abs_size',
572 'slots',
573 'type_'])
574
575 def __init__(self,
576 data_path,
577 rel_size,
578 abs_size,
579 slots,
580 id_,
581 encryption,
582 symlink=None):
583 self.id_ = id_
584 self.data = self.VolSpec(path=data_path,
585 rel_size=rel_size,
586 abs_size=abs_size,
587 slots=slots,
588 type_='data')
589 self.fast = None
590 self.very_fast = None
591 self.encryption = encryption
592 self.symlink = symlink
593
594 def add_fast_device(self, path, rel_size, abs_size, slots, type_):
595 self.fast = self.VolSpec(path=path,
596 rel_size=rel_size,
597 abs_size=abs_size,
598 slots=slots,
599 type_=type_)
600
601 def add_very_fast_device(self, path, rel_size, abs_size, slots):
602 self.very_fast = self.VolSpec(path=path,
603 rel_size=rel_size,
604 abs_size=abs_size,
605 slots=slots,
606 type_='block_wal')
607
608 def _get_osd_plan(self):
609 plan = {
610 'data': self.data.path,
611 'data_size': self.data.abs_size,
612 'encryption': self.encryption,
613 }
614 if self.fast:
615 type_ = self.fast.type_.replace('.', '_')
616 plan.update(
617 {
618 type_: self.fast.path,
619 '{}_size'.format(type_): self.fast.abs_size,
620 })
621 if self.very_fast:
622 plan.update(
623 {
624 'block_wal': self.very_fast.path,
625 'block_wal_size': self.very_fast.abs_size,
626 })
627 if self.id_:
628 plan.update({'osd_id': self.id_})
629 return plan
630
631 def get_args(self, defaults):
632 my_defaults = defaults.copy()
633 my_defaults.update(self._get_osd_plan())
634 return my_defaults
635
636 def report(self):
637 report = ''
638 if self.id_:
639 report += templates.osd_reused_id.format(
640 id_=self.id_)
641 if self.encryption:
642 report += templates.osd_encryption.format(
643 enc=self.encryption)
644 path = self.data.path
645 if self.symlink:
646 path = f'{self.symlink} -> {self.data.path}'
647 report += templates.osd_component.format(
648 _type=self.data.type_,
649 path=path,
650 size=self.data.abs_size,
651 percent=self.data.rel_size)
652 if self.fast:
653 report += templates.osd_component.format(
654 _type=self.fast.type_,
655 path=self.fast.path,
656 size=self.fast.abs_size,
657 percent=self.fast.rel_size)
658 if self.very_fast:
659 report += templates.osd_component.format(
660 _type=self.very_fast.type_,
661 path=self.very_fast.path,
662 size=self.very_fast.abs_size,
663 percent=self.very_fast.rel_size)
664 return report
665
666 def report_json(self):
667 # cast all values to string so that the report can be dumped in to
668 # json.dumps
669 return {k: str(v) for k, v in self._get_osd_plan().items()}