]> git.proxmox.com Git - ceph.git/blob - ceph/src/ceph-volume/ceph_volume/devices/lvm/batch.py
dace25530aba0d27b11246f5505875b2448bc9f7
[ceph.git] / ceph / src / ceph-volume / ceph_volume / devices / lvm / batch.py
1 import argparse
2 from collections import namedtuple
3 import json
4 import logging
5 from textwrap import dedent
6 from ceph_volume import terminal, decorators
7 from ceph_volume.util import disk, prompt_bool, arg_validators, templates
8 from ceph_volume.util import prepare
9 from . import common
10 from .create import Create
11 from .prepare import Prepare
12
13 mlogger = terminal.MultiLogger(__name__)
14 logger = logging.getLogger(__name__)
15
16
17 device_list_template = """
18 * {path: <25} {size: <10} {state}"""
19
20
21 def device_formatter(devices):
22 lines = []
23 for path, details in devices:
24 lines.append(device_list_template.format(
25 path=path, size=details['human_readable_size'],
26 state='solid' if details['rotational'] == '0' else 'rotational')
27 )
28
29 return ''.join(lines)
30
31
32 def ensure_disjoint_device_lists(data, db=[], wal=[], journal=[]):
33 # check that all device lists are disjoint with each other
34 if not all([set(data).isdisjoint(set(db)),
35 set(data).isdisjoint(set(wal)),
36 set(data).isdisjoint(set(journal)),
37 set(db).isdisjoint(set(wal))]):
38 raise Exception('Device lists are not disjoint')
39
40
41 def separate_devices_from_lvs(devices):
42 phys = []
43 lvm = []
44 for d in devices:
45 phys.append(d) if d.is_device else lvm.append(d)
46 return phys, lvm
47
48
49 def get_physical_osds(devices, args):
50 '''
51 Goes through passed physical devices and assigns OSDs
52 '''
53 data_slots = args.osds_per_device
54 if args.data_slots:
55 data_slots = max(args.data_slots, args.osds_per_device)
56 rel_data_size = args.data_allocate_fraction / data_slots
57 mlogger.debug('relative data size: {}'.format(rel_data_size))
58 ret = []
59 for dev in devices:
60 if dev.available_lvm:
61 dev_size = dev.vg_size[0]
62 abs_size = disk.Size(b=int(dev_size * rel_data_size))
63 free_size = dev.vg_free[0]
64 for _ in range(args.osds_per_device):
65 if abs_size > free_size:
66 break
67 free_size -= abs_size.b
68 osd_id = None
69 if args.osd_ids:
70 osd_id = args.osd_ids.pop()
71 ret.append(Batch.OSD(dev.path,
72 rel_data_size,
73 abs_size,
74 args.osds_per_device,
75 osd_id,
76 'dmcrypt' if args.dmcrypt else None))
77 return ret
78
79
80 def get_lvm_osds(lvs, args):
81 '''
82 Goes through passed LVs and assigns planned osds
83 '''
84 ret = []
85 for lv in lvs:
86 if lv.used_by_ceph:
87 continue
88 osd_id = None
89 if args.osd_ids:
90 osd_id = args.osd_ids.pop()
91 osd = Batch.OSD("{}/{}".format(lv.vg_name, lv.lv_name),
92 100.0,
93 disk.Size(b=int(lv.lvs[0].lv_size)),
94 1,
95 osd_id,
96 'dmcrypt' if args.dmcrypt else None)
97 ret.append(osd)
98 return ret
99
100
101 def get_physical_fast_allocs(devices, type_, fast_slots_per_device, new_osds, args):
102 requested_slots = getattr(args, '{}_slots'.format(type_))
103 if not requested_slots or requested_slots < fast_slots_per_device:
104 if requested_slots:
105 mlogger.info('{}_slots argument is too small, ignoring'.format(type_))
106 requested_slots = fast_slots_per_device
107
108 requested_size = getattr(args, '{}_size'.format(type_), 0)
109 if not requested_size or requested_size == 0:
110 # no size argument was specified, check ceph.conf
111 get_size_fct = getattr(prepare, 'get_{}_size'.format(type_))
112 requested_size = get_size_fct(lv_format=False)
113
114 ret = []
115 vg_device_map = group_devices_by_vg(devices)
116 for vg_devices in vg_device_map.values():
117 for dev in vg_devices:
118 if not dev.available_lvm:
119 continue
120 # any LV present is considered a taken slot
121 occupied_slots = len(dev.lvs)
122 # prior to v15.2.8, db/wal deployments were grouping multiple fast devices into single VGs - we need to
123 # multiply requested_slots (per device) by the number of devices in the VG in order to ensure that
124 # abs_size is calculated correctly from vg_size
125 slots_for_vg = len(vg_devices) * requested_slots
126 dev_size = dev.vg_size[0]
127 # this only looks at the first vg on device, unsure if there is a better
128 # way
129 abs_size = disk.Size(b=int(dev_size / slots_for_vg))
130 free_size = dev.vg_free[0]
131 relative_size = int(abs_size) / dev_size
132 if requested_size:
133 if requested_size <= abs_size:
134 abs_size = requested_size
135 relative_size = int(abs_size) / dev_size
136 else:
137 mlogger.error(
138 '{} was requested for {}, but only {} can be fulfilled'.format(
139 requested_size,
140 '{}_size'.format(type_),
141 abs_size,
142 ))
143 exit(1)
144 while abs_size <= free_size and len(ret) < new_osds and occupied_slots < fast_slots_per_device:
145 free_size -= abs_size.b
146 occupied_slots += 1
147 ret.append((dev.path, relative_size, abs_size, requested_slots))
148 return ret
149
150 def group_devices_by_vg(devices):
151 result = dict()
152 result['unused_devices'] = []
153 for dev in devices:
154 if len(dev.vgs) > 0:
155 # already using assumption that a PV only belongs to single VG in other places
156 vg_name = dev.vgs[0].name
157 if vg_name in result:
158 result[vg_name].append(dev)
159 else:
160 result[vg_name] = [dev]
161 else:
162 result['unused_devices'].append(dev)
163 return result
164
165 def get_lvm_fast_allocs(lvs):
166 return [("{}/{}".format(d.vg_name, d.lv_name), 100.0,
167 disk.Size(b=int(d.lvs[0].lv_size)), 1) for d in lvs if not
168 d.used_by_ceph]
169
170
171 class Batch(object):
172
173 help = 'Automatically size devices for multi-OSD provisioning with minimal interaction'
174
175 _help = dedent("""
176 Automatically size devices ready for OSD provisioning based on default strategies.
177
178 Usage:
179
180 ceph-volume lvm batch [DEVICE...]
181
182 Devices can be physical block devices or LVs.
183 Optional reporting on possible outcomes is enabled with --report
184
185 ceph-volume lvm batch --report [DEVICE...]
186 """)
187
188 def __init__(self, argv):
189 parser = argparse.ArgumentParser(
190 prog='ceph-volume lvm batch',
191 formatter_class=argparse.RawDescriptionHelpFormatter,
192 description=self._help,
193 )
194
195 parser.add_argument(
196 'devices',
197 metavar='DEVICES',
198 nargs='*',
199 type=arg_validators.ValidBatchDevice(),
200 default=[],
201 help='Devices to provision OSDs',
202 )
203 parser.add_argument(
204 '--db-devices',
205 nargs='*',
206 type=arg_validators.ValidBatchDevice(),
207 default=[],
208 help='Devices to provision OSDs db volumes',
209 )
210 parser.add_argument(
211 '--wal-devices',
212 nargs='*',
213 type=arg_validators.ValidBatchDevice(),
214 default=[],
215 help='Devices to provision OSDs wal volumes',
216 )
217 parser.add_argument(
218 '--journal-devices',
219 nargs='*',
220 type=arg_validators.ValidBatchDevice(),
221 default=[],
222 help='Devices to provision OSDs journal volumes',
223 )
224 parser.add_argument(
225 '--auto',
226 action='store_true',
227 help=('deploy multi-device OSDs if rotational and non-rotational drives '
228 'are passed in DEVICES'),
229 default=True
230 )
231 parser.add_argument(
232 '--no-auto',
233 action='store_false',
234 dest='auto',
235 help=('deploy standalone OSDs if rotational and non-rotational drives '
236 'are passed in DEVICES'),
237 )
238 parser.add_argument(
239 '--bluestore',
240 action='store_true',
241 help='bluestore objectstore (default)',
242 )
243 parser.add_argument(
244 '--filestore',
245 action='store_true',
246 help='filestore objectstore',
247 )
248 parser.add_argument(
249 '--report',
250 action='store_true',
251 help='Only report on OSD that would be created and exit',
252 )
253 parser.add_argument(
254 '--yes',
255 action='store_true',
256 help='Avoid prompting for confirmation when provisioning',
257 )
258 parser.add_argument(
259 '--format',
260 help='output format, defaults to "pretty"',
261 default='pretty',
262 choices=['json', 'json-pretty', 'pretty'],
263 )
264 parser.add_argument(
265 '--dmcrypt',
266 action='store_true',
267 help='Enable device encryption via dm-crypt',
268 )
269 parser.add_argument(
270 '--crush-device-class',
271 dest='crush_device_class',
272 help='Crush device class to assign this OSD to',
273 default=""
274 )
275 parser.add_argument(
276 '--no-systemd',
277 dest='no_systemd',
278 action='store_true',
279 help='Skip creating and enabling systemd units and starting OSD services',
280 )
281 parser.add_argument(
282 '--osds-per-device',
283 type=int,
284 default=1,
285 help='Provision more than 1 (the default) OSD per device',
286 )
287 parser.add_argument(
288 '--data-slots',
289 type=int,
290 help=('Provision more than 1 (the default) OSD slot per device'
291 ' if more slots then osds-per-device are specified, slots'
292 'will stay unoccupied'),
293 )
294 parser.add_argument(
295 '--data-allocate-fraction',
296 type=arg_validators.ValidFraction(),
297 help='Fraction to allocate from data device (0,1.0]',
298 default=1.0
299 )
300 parser.add_argument(
301 '--block-db-size',
302 type=disk.Size.parse,
303 help='Set (or override) the "bluestore_block_db_size" value, in bytes'
304 )
305 parser.add_argument(
306 '--block-db-slots',
307 type=int,
308 help='Provision slots on DB device, can remain unoccupied'
309 )
310 parser.add_argument(
311 '--block-wal-size',
312 type=disk.Size.parse,
313 help='Set (or override) the "bluestore_block_wal_size" value, in bytes'
314 )
315 parser.add_argument(
316 '--block-wal-slots',
317 type=int,
318 help='Provision slots on WAL device, can remain unoccupied'
319 )
320 def journal_size_in_mb_hack(size):
321 # TODO give user time to adjust, then remove this
322 if size and size[-1].isdigit():
323 mlogger.warning('DEPRECATION NOTICE')
324 mlogger.warning('--journal-size as integer is parsed as megabytes')
325 mlogger.warning('A future release will parse integers as bytes')
326 mlogger.warning('Add a "M" to explicitly pass a megabyte size')
327 size += 'M'
328 return disk.Size.parse(size)
329 parser.add_argument(
330 '--journal-size',
331 type=journal_size_in_mb_hack,
332 help='Override the "osd_journal_size" value, in megabytes'
333 )
334 parser.add_argument(
335 '--journal-slots',
336 type=int,
337 help='Provision slots on journal device, can remain unoccupied'
338 )
339 parser.add_argument(
340 '--prepare',
341 action='store_true',
342 help='Only prepare all OSDs, do not activate',
343 )
344 parser.add_argument(
345 '--osd-ids',
346 nargs='*',
347 default=[],
348 help='Reuse existing OSD ids',
349 type=arg_validators.valid_osd_id
350 )
351 self.args = parser.parse_args(argv)
352 self.parser = parser
353 for dev_list in ['', 'db_', 'wal_', 'journal_']:
354 setattr(self, '{}usable'.format(dev_list), [])
355
356 def report(self, plan):
357 report = self._create_report(plan)
358 print(report)
359
360 def _create_report(self, plan):
361 if self.args.format == 'pretty':
362 report = ''
363 report += templates.total_osds.format(total_osds=len(plan))
364
365 report += templates.osd_component_titles
366 for osd in plan:
367 report += templates.osd_header
368 report += osd.report()
369 return report
370 else:
371 json_report = []
372 for osd in plan:
373 json_report.append(osd.report_json())
374 if self.args.format == 'json':
375 return json.dumps(json_report)
376 elif self.args.format == 'json-pretty':
377 return json.dumps(json_report, indent=4,
378 sort_keys=True)
379
380 def _check_slot_args(self):
381 '''
382 checking if -slots args are consistent with other arguments
383 '''
384 if self.args.data_slots and self.args.osds_per_device:
385 if self.args.data_slots < self.args.osds_per_device:
386 raise ValueError('data_slots is smaller then osds_per_device')
387
388 def _sort_rotational_disks(self):
389 '''
390 Helper for legacy auto behaviour.
391 Sorts drives into rotating and non-rotating, the latter being used for
392 db or journal.
393 '''
394 mlogger.warning('DEPRECATION NOTICE')
395 mlogger.warning('You are using the legacy automatic disk sorting behavior')
396 mlogger.warning('The Pacific release will change the default to --no-auto')
397 rotating = []
398 ssd = []
399 for d in self.args.devices:
400 rotating.append(d) if d.rotational else ssd.append(d)
401 if ssd and not rotating:
402 # no need for additional sorting, we'll only deploy standalone on ssds
403 return
404 self.args.devices = rotating
405 if self.args.filestore:
406 self.args.journal_devices = ssd
407 else:
408 self.args.db_devices = ssd
409
410 @decorators.needs_root
411 def main(self):
412 if not self.args.devices:
413 return self.parser.print_help()
414
415 # Default to bluestore here since defaulting it in add_argument may
416 # cause both to be True
417 if not self.args.bluestore and not self.args.filestore:
418 self.args.bluestore = True
419
420 if (self.args.auto and not self.args.db_devices and not
421 self.args.wal_devices and not self.args.journal_devices):
422 self._sort_rotational_disks()
423
424 self._check_slot_args()
425
426 ensure_disjoint_device_lists(self.args.devices,
427 self.args.db_devices,
428 self.args.wal_devices,
429 self.args.journal_devices)
430
431 plan = self.get_plan(self.args)
432
433 if self.args.report:
434 self.report(plan)
435 return 0
436
437 if not self.args.yes:
438 self.report(plan)
439 terminal.info('The above OSDs would be created if the operation continues')
440 if not prompt_bool('do you want to proceed? (yes/no)'):
441 terminal.error('aborting OSD provisioning')
442 raise SystemExit(0)
443
444 self._execute(plan)
445
446 def _execute(self, plan):
447 defaults = common.get_default_args()
448 global_args = [
449 'bluestore',
450 'filestore',
451 'dmcrypt',
452 'crush_device_class',
453 'no_systemd',
454 ]
455 defaults.update({arg: getattr(self.args, arg) for arg in global_args})
456 for osd in plan:
457 args = osd.get_args(defaults)
458 if self.args.prepare:
459 p = Prepare([])
460 p.safe_prepare(argparse.Namespace(**args))
461 else:
462 c = Create([])
463 c.create(argparse.Namespace(**args))
464
465
466 def get_plan(self, args):
467 if args.bluestore:
468 plan = self.get_deployment_layout(args, args.devices, args.db_devices,
469 args.wal_devices)
470 elif args.filestore:
471 plan = self.get_deployment_layout(args, args.devices, args.journal_devices)
472 return plan
473
474 def get_deployment_layout(self, args, devices, fast_devices=[],
475 very_fast_devices=[]):
476 '''
477 The methods here are mostly just organization, error reporting and
478 setting up of (default) args. The heavy lifting code for the deployment
479 layout can be found in the static get_*_osds and get_*_fast_allocs
480 functions.
481 '''
482 plan = []
483 phys_devs, lvm_devs = separate_devices_from_lvs(devices)
484 mlogger.debug(('passed data devices: {} physical,'
485 ' {} LVM').format(len(phys_devs), len(lvm_devs)))
486
487 plan.extend(get_physical_osds(phys_devs, args))
488
489 plan.extend(get_lvm_osds(lvm_devs, args))
490
491 num_osds = len(plan)
492 if num_osds == 0:
493 mlogger.info('All data devices are unavailable')
494 return plan
495 requested_osds = args.osds_per_device * len(phys_devs) + len(lvm_devs)
496
497 fast_type = 'block_db' if args.bluestore else 'journal'
498 fast_allocations = self.fast_allocations(fast_devices,
499 requested_osds,
500 num_osds,
501 fast_type)
502 if fast_devices and not fast_allocations:
503 mlogger.info('{} fast devices were passed, but none are available'.format(len(fast_devices)))
504 return []
505 if fast_devices and not len(fast_allocations) == num_osds:
506 mlogger.error('{} fast allocations != {} num_osds'.format(
507 len(fast_allocations), num_osds))
508 exit(1)
509
510 very_fast_allocations = self.fast_allocations(very_fast_devices,
511 requested_osds,
512 num_osds,
513 'block_wal')
514 if very_fast_devices and not very_fast_allocations:
515 mlogger.info('{} very fast devices were passed, but none are available'.format(len(very_fast_devices)))
516 return []
517 if very_fast_devices and not len(very_fast_allocations) == num_osds:
518 mlogger.error('{} very fast allocations != {} num_osds'.format(
519 len(very_fast_allocations), num_osds))
520 exit(1)
521
522 for osd in plan:
523 if fast_devices:
524 osd.add_fast_device(*fast_allocations.pop(),
525 type_=fast_type)
526 if very_fast_devices and args.bluestore:
527 osd.add_very_fast_device(*very_fast_allocations.pop())
528 return plan
529
530 def fast_allocations(self, devices, requested_osds, new_osds, type_):
531 ret = []
532 if not devices:
533 return ret
534 phys_devs, lvm_devs = separate_devices_from_lvs(devices)
535 mlogger.debug(('passed {} devices: {} physical,'
536 ' {} LVM').format(type_, len(phys_devs), len(lvm_devs)))
537
538 ret.extend(get_lvm_fast_allocs(lvm_devs))
539
540 # fill up uneven distributions across fast devices: 5 osds and 2 fast
541 # devices? create 3 slots on each device rather then deploying
542 # heterogeneous osds
543 slot_divider = max(1, len(phys_devs))
544 if (requested_osds - len(lvm_devs)) % slot_divider:
545 fast_slots_per_device = int((requested_osds - len(lvm_devs)) / slot_divider) + 1
546 else:
547 fast_slots_per_device = int((requested_osds - len(lvm_devs)) / slot_divider)
548
549
550 ret.extend(get_physical_fast_allocs(phys_devs,
551 type_,
552 fast_slots_per_device,
553 new_osds,
554 self.args))
555 return ret
556
557 class OSD(object):
558 '''
559 This class simply stores info about to-be-deployed OSDs and provides an
560 easy way to retrieve the necessary create arguments.
561 '''
562 VolSpec = namedtuple('VolSpec',
563 ['path',
564 'rel_size',
565 'abs_size',
566 'slots',
567 'type_'])
568
569 def __init__(self,
570 data_path,
571 rel_size,
572 abs_size,
573 slots,
574 id_,
575 encryption):
576 self.id_ = id_
577 self.data = self.VolSpec(path=data_path,
578 rel_size=rel_size,
579 abs_size=abs_size,
580 slots=slots,
581 type_='data')
582 self.fast = None
583 self.very_fast = None
584 self.encryption = encryption
585
586 def add_fast_device(self, path, rel_size, abs_size, slots, type_):
587 self.fast = self.VolSpec(path=path,
588 rel_size=rel_size,
589 abs_size=abs_size,
590 slots=slots,
591 type_=type_)
592
593 def add_very_fast_device(self, path, rel_size, abs_size, slots):
594 self.very_fast = self.VolSpec(path=path,
595 rel_size=rel_size,
596 abs_size=abs_size,
597 slots=slots,
598 type_='block_wal')
599
600 def _get_osd_plan(self):
601 plan = {
602 'data': self.data.path,
603 'data_size': self.data.abs_size,
604 'encryption': self.encryption,
605 }
606 if self.fast:
607 type_ = self.fast.type_.replace('.', '_')
608 plan.update(
609 {
610 type_: self.fast.path,
611 '{}_size'.format(type_): self.fast.abs_size,
612 })
613 if self.very_fast:
614 plan.update(
615 {
616 'block_wal': self.very_fast.path,
617 'block_wal_size': self.very_fast.abs_size,
618 })
619 if self.id_:
620 plan.update({'osd_id': self.id_})
621 return plan
622
623 def get_args(self, defaults):
624 my_defaults = defaults.copy()
625 my_defaults.update(self._get_osd_plan())
626 return my_defaults
627
628 def report(self):
629 report = ''
630 if self.id_:
631 report += templates.osd_reused_id.format(
632 id_=self.id_)
633 if self.encryption:
634 report += templates.osd_encryption.format(
635 enc=self.encryption)
636 report += templates.osd_component.format(
637 _type=self.data.type_,
638 path=self.data.path,
639 size=self.data.abs_size,
640 percent=self.data.rel_size)
641 if self.fast:
642 report += templates.osd_component.format(
643 _type=self.fast.type_,
644 path=self.fast.path,
645 size=self.fast.abs_size,
646 percent=self.fast.rel_size)
647 if self.very_fast:
648 report += templates.osd_component.format(
649 _type=self.very_fast.type_,
650 path=self.very_fast.path,
651 size=self.very_fast.abs_size,
652 percent=self.very_fast.rel_size)
653 return report
654
655 def report_json(self):
656 # cast all values to string so that the report can be dumped in to
657 # json.dumps
658 return {k: str(v) for k, v in self._get_osd_plan().items()}