]> git.proxmox.com Git - ceph.git/blame - ceph/src/ceph-volume/ceph_volume/devices/lvm/batch.py
import ceph 16.2.7
[ceph.git] / ceph / src / ceph-volume / ceph_volume / devices / lvm / batch.py
CommitLineData
1adf2230 1import argparse
f91f0fd5 2from collections import namedtuple
f6b5b4d7 3import json
f91f0fd5 4import logging
1adf2230
AA
5from textwrap import dedent
6from ceph_volume import terminal, decorators
f91f0fd5
TL
7from ceph_volume.util import disk, prompt_bool, arg_validators, templates
8from ceph_volume.util import prepare
9from . import common
10from .create import Create
11from .prepare import Prepare
1adf2230 12
91327a77
AA
13mlogger = terminal.MultiLogger(__name__)
14logger = logging.getLogger(__name__)
15
1adf2230
AA
16
17device_list_template = """
18 * {path: <25} {size: <10} {state}"""
19
20
21def device_formatter(devices):
22 lines = []
23 for path, details in devices:
24 lines.append(device_list_template.format(
25 path=path, size=details['human_readable_size'],
26 state='solid' if details['rotational'] == '0' else 'rotational')
27 )
28
29 return ''.join(lines)
30
31
f91f0fd5
TL
32def ensure_disjoint_device_lists(data, db=[], wal=[], journal=[]):
33 # check that all device lists are disjoint with each other
34 if not all([set(data).isdisjoint(set(db)),
35 set(data).isdisjoint(set(wal)),
36 set(data).isdisjoint(set(journal)),
37 set(db).isdisjoint(set(wal))]):
38 raise Exception('Device lists are not disjoint')
39
40
41def separate_devices_from_lvs(devices):
42 phys = []
43 lvm = []
44 for d in devices:
45 phys.append(d) if d.is_device else lvm.append(d)
46 return phys, lvm
47
48
49def get_physical_osds(devices, args):
50 '''
51 Goes through passed physical devices and assigns OSDs
52 '''
53 data_slots = args.osds_per_device
54 if args.data_slots:
55 data_slots = max(args.data_slots, args.osds_per_device)
56 rel_data_size = 1.0 / data_slots
57 mlogger.debug('relative data size: {}'.format(rel_data_size))
58 ret = []
59 for dev in devices:
60 if dev.available_lvm:
61 dev_size = dev.vg_size[0]
62 abs_size = disk.Size(b=int(dev_size * rel_data_size))
63 free_size = dev.vg_free[0]
64 for _ in range(args.osds_per_device):
65 if abs_size > free_size:
66 break
67 free_size -= abs_size.b
68 osd_id = None
69 if args.osd_ids:
70 osd_id = args.osd_ids.pop()
71 ret.append(Batch.OSD(dev.path,
72 rel_data_size,
73 abs_size,
74 args.osds_per_device,
75 osd_id,
76 'dmcrypt' if args.dmcrypt else None))
77 return ret
78
79
80def get_lvm_osds(lvs, args):
81 '''
82 Goes through passed LVs and assigns planned osds
83 '''
84 ret = []
85 for lv in lvs:
86 if lv.used_by_ceph:
87 continue
88 osd_id = None
89 if args.osd_ids:
90 osd_id = args.osd_ids.pop()
91 osd = Batch.OSD("{}/{}".format(lv.vg_name, lv.lv_name),
92 100.0,
93 disk.Size(b=int(lv.lvs[0].lv_size)),
94 1,
95 osd_id,
96 'dmcrypt' if args.dmcrypt else None)
97 ret.append(osd)
98 return ret
99
100
101def get_physical_fast_allocs(devices, type_, fast_slots_per_device, new_osds, args):
102 requested_slots = getattr(args, '{}_slots'.format(type_))
103 if not requested_slots or requested_slots < fast_slots_per_device:
104 if requested_slots:
cd265ab1 105 mlogger.info('{}_slots argument is too small, ignoring'.format(type_))
f91f0fd5
TL
106 requested_slots = fast_slots_per_device
107
108 requested_size = getattr(args, '{}_size'.format(type_), 0)
b3b6e05e 109 if not requested_size or requested_size == 0:
f91f0fd5
TL
110 # no size argument was specified, check ceph.conf
111 get_size_fct = getattr(prepare, 'get_{}_size'.format(type_))
112 requested_size = get_size_fct(lv_format=False)
113
114 ret = []
a4b75251
TL
115 vg_device_map = group_devices_by_vg(devices)
116 for vg_devices in vg_device_map.values():
117 for dev in vg_devices:
118 if not dev.available_lvm:
119 continue
120 # any LV present is considered a taken slot
121 occupied_slots = len(dev.lvs)
122 # prior to v15.2.8, db/wal deployments were grouping multiple fast devices into single VGs - we need to
123 # multiply requested_slots (per device) by the number of devices in the VG in order to ensure that
124 # abs_size is calculated correctly from vg_size
125 slots_for_vg = len(vg_devices) * requested_slots
126 dev_size = dev.vg_size[0]
127 # this only looks at the first vg on device, unsure if there is a better
128 # way
129 abs_size = disk.Size(b=int(dev_size / slots_for_vg))
130 free_size = dev.vg_free[0]
131 relative_size = int(abs_size) / dev_size
132 if requested_size:
133 if requested_size <= abs_size:
134 abs_size = requested_size
135 relative_size = int(abs_size) / dev_size
136 else:
137 mlogger.error(
138 '{} was requested for {}, but only {} can be fulfilled'.format(
139 requested_size,
140 '{}_size'.format(type_),
141 abs_size,
142 ))
143 exit(1)
144 while abs_size <= free_size and len(ret) < new_osds and occupied_slots < fast_slots_per_device:
145 free_size -= abs_size.b
146 occupied_slots += 1
147 ret.append((dev.path, relative_size, abs_size, requested_slots))
f91f0fd5
TL
148 return ret
149
a4b75251
TL
150def group_devices_by_vg(devices):
151 result = dict()
152 result['unused_devices'] = []
153 for dev in devices:
154 if len(dev.vgs) > 0:
155 # already using assumption that a PV only belongs to single VG in other places
156 vg_name = dev.vgs[0].name
157 if vg_name in result:
158 result[vg_name].append(dev)
159 else:
160 result[vg_name] = [dev]
161 else:
162 result['unused_devices'].append(dev)
163 return result
f91f0fd5
TL
164
165def get_lvm_fast_allocs(lvs):
166 return [("{}/{}".format(d.vg_name, d.lv_name), 100.0,
167 disk.Size(b=int(d.lvs[0].lv_size)), 1) for d in lvs if not
168 d.used_by_ceph]
1adf2230
AA
169
170
171class Batch(object):
172
173 help = 'Automatically size devices for multi-OSD provisioning with minimal interaction'
174
175 _help = dedent("""
176 Automatically size devices ready for OSD provisioning based on default strategies.
177
1adf2230
AA
178 Usage:
179
180 ceph-volume lvm batch [DEVICE...]
181
f91f0fd5 182 Devices can be physical block devices or LVs.
1adf2230
AA
183 Optional reporting on possible outcomes is enabled with --report
184
185 ceph-volume lvm batch --report [DEVICE...]
186 """)
187
188 def __init__(self, argv):
1adf2230
AA
189 parser = argparse.ArgumentParser(
190 prog='ceph-volume lvm batch',
191 formatter_class=argparse.RawDescriptionHelpFormatter,
f91f0fd5 192 description=self._help,
1adf2230
AA
193 )
194
195 parser.add_argument(
196 'devices',
197 metavar='DEVICES',
198 nargs='*',
f91f0fd5 199 type=arg_validators.ValidBatchDevice(),
1adf2230
AA
200 default=[],
201 help='Devices to provision OSDs',
202 )
11fdf7f2
TL
203 parser.add_argument(
204 '--db-devices',
205 nargs='*',
f91f0fd5 206 type=arg_validators.ValidBatchDevice(),
11fdf7f2
TL
207 default=[],
208 help='Devices to provision OSDs db volumes',
209 )
210 parser.add_argument(
211 '--wal-devices',
212 nargs='*',
f91f0fd5 213 type=arg_validators.ValidBatchDevice(),
11fdf7f2
TL
214 default=[],
215 help='Devices to provision OSDs wal volumes',
216 )
217 parser.add_argument(
218 '--journal-devices',
219 nargs='*',
f91f0fd5 220 type=arg_validators.ValidBatchDevice(),
11fdf7f2
TL
221 default=[],
222 help='Devices to provision OSDs journal volumes',
223 )
224 parser.add_argument(
f91f0fd5 225 '--auto',
11fdf7f2 226 action='store_true',
f91f0fd5
TL
227 help=('deploy multi-device OSDs if rotational and non-rotational drives '
228 'are passed in DEVICES'),
229 default=True
230 )
231 parser.add_argument(
232 '--no-auto',
233 action='store_false',
234 dest='auto',
11fdf7f2
TL
235 help=('deploy standalone OSDs if rotational and non-rotational drives '
236 'are passed in DEVICES'),
237 )
1adf2230
AA
238 parser.add_argument(
239 '--bluestore',
240 action='store_true',
241 help='bluestore objectstore (default)',
242 )
243 parser.add_argument(
244 '--filestore',
245 action='store_true',
246 help='filestore objectstore',
247 )
248 parser.add_argument(
249 '--report',
250 action='store_true',
f91f0fd5 251 help='Only report on OSD that would be created and exit',
1adf2230
AA
252 )
253 parser.add_argument(
254 '--yes',
255 action='store_true',
256 help='Avoid prompting for confirmation when provisioning',
257 )
258 parser.add_argument(
259 '--format',
260 help='output format, defaults to "pretty"',
261 default='pretty',
f91f0fd5 262 choices=['json', 'json-pretty', 'pretty'],
1adf2230
AA
263 )
264 parser.add_argument(
265 '--dmcrypt',
266 action='store_true',
267 help='Enable device encryption via dm-crypt',
268 )
269 parser.add_argument(
270 '--crush-device-class',
271 dest='crush_device_class',
272 help='Crush device class to assign this OSD to',
273 )
274 parser.add_argument(
275 '--no-systemd',
276 dest='no_systemd',
277 action='store_true',
278 help='Skip creating and enabling systemd units and starting OSD services',
279 )
91327a77
AA
280 parser.add_argument(
281 '--osds-per-device',
282 type=int,
283 default=1,
284 help='Provision more than 1 (the default) OSD per device',
285 )
286 parser.add_argument(
f91f0fd5 287 '--data-slots',
91327a77 288 type=int,
f91f0fd5
TL
289 help=('Provision more than 1 (the default) OSD slot per device'
290 ' if more slots then osds-per-device are specified, slots'
291 'will stay unoccupied'),
292 )
293 parser.add_argument(
294 '--block-db-size',
295 type=disk.Size.parse,
91327a77
AA
296 help='Set (or override) the "bluestore_block_db_size" value, in bytes'
297 )
11fdf7f2 298 parser.add_argument(
f91f0fd5 299 '--block-db-slots',
11fdf7f2 300 type=int,
f91f0fd5
TL
301 help='Provision slots on DB device, can remain unoccupied'
302 )
303 parser.add_argument(
304 '--block-wal-size',
305 type=disk.Size.parse,
11fdf7f2
TL
306 help='Set (or override) the "bluestore_block_wal_size" value, in bytes'
307 )
91327a77 308 parser.add_argument(
f91f0fd5 309 '--block-wal-slots',
91327a77 310 type=int,
f91f0fd5
TL
311 help='Provision slots on WAL device, can remain unoccupied'
312 )
313 def journal_size_in_mb_hack(size):
314 # TODO give user time to adjust, then remove this
315 if size and size[-1].isdigit():
316 mlogger.warning('DEPRECATION NOTICE')
317 mlogger.warning('--journal-size as integer is parsed as megabytes')
318 mlogger.warning('A future release will parse integers as bytes')
319 mlogger.warning('Add a "M" to explicitly pass a megabyte size')
320 size += 'M'
321 return disk.Size.parse(size)
322 parser.add_argument(
323 '--journal-size',
324 type=journal_size_in_mb_hack,
91327a77
AA
325 help='Override the "osd_journal_size" value, in megabytes'
326 )
f91f0fd5
TL
327 parser.add_argument(
328 '--journal-slots',
329 type=int,
330 help='Provision slots on journal device, can remain unoccupied'
331 )
91327a77
AA
332 parser.add_argument(
333 '--prepare',
334 action='store_true',
335 help='Only prepare all OSDs, do not activate',
336 )
11fdf7f2
TL
337 parser.add_argument(
338 '--osd-ids',
339 nargs='*',
340 default=[],
341 help='Reuse existing OSD ids',
a4b75251 342 type=common.valid_osd_id
11fdf7f2
TL
343 )
344 self.args = parser.parse_args(argv)
345 self.parser = parser
346 for dev_list in ['', 'db_', 'wal_', 'journal_']:
347 setattr(self, '{}usable'.format(dev_list), [])
348
f91f0fd5
TL
349 def report(self, plan):
350 report = self._create_report(plan)
351 print(report)
11fdf7f2 352
f91f0fd5 353 def _create_report(self, plan):
11fdf7f2 354 if self.args.format == 'pretty':
f91f0fd5
TL
355 report = ''
356 report += templates.total_osds.format(total_osds=len(plan))
357
358 report += templates.osd_component_titles
359 for osd in plan:
360 report += templates.osd_header
361 report += osd.report()
362 return report
11fdf7f2 363 else:
f91f0fd5
TL
364 json_report = []
365 for osd in plan:
366 json_report.append(osd.report_json())
367 if self.args.format == 'json':
368 return json.dumps(json_report)
369 elif self.args.format == 'json-pretty':
370 return json.dumps(json_report, indent=4,
371 sort_keys=True)
372
373 def _check_slot_args(self):
374 '''
375 checking if -slots args are consistent with other arguments
376 '''
377 if self.args.data_slots and self.args.osds_per_device:
378 if self.args.data_slots < self.args.osds_per_device:
379 raise ValueError('data_slots is smaller then osds_per_device')
380
381 def _sort_rotational_disks(self):
382 '''
383 Helper for legacy auto behaviour.
384 Sorts drives into rotating and non-rotating, the latter being used for
385 db or journal.
386 '''
387 mlogger.warning('DEPRECATION NOTICE')
388 mlogger.warning('You are using the legacy automatic disk sorting behavior')
389 mlogger.warning('The Pacific release will change the default to --no-auto')
390 rotating = []
391 ssd = []
392 for d in self.args.devices:
393 rotating.append(d) if d.rotational else ssd.append(d)
394 if ssd and not rotating:
395 # no need for additional sorting, we'll only deploy standalone on ssds
396 return
397 self.args.devices = rotating
398 if self.args.filestore:
399 self.args.journal_devices = ssd
11fdf7f2 400 else:
f91f0fd5 401 self.args.db_devices = ssd
11fdf7f2
TL
402
403 @decorators.needs_root
404 def main(self):
405 if not self.args.devices:
406 return self.parser.print_help()
1adf2230
AA
407
408 # Default to bluestore here since defaulting it in add_argument may
409 # cause both to be True
11fdf7f2
TL
410 if not self.args.bluestore and not self.args.filestore:
411 self.args.bluestore = True
1adf2230 412
f91f0fd5
TL
413 if (self.args.auto and not self.args.db_devices and not
414 self.args.wal_devices and not self.args.journal_devices):
415 self._sort_rotational_disks()
416
417 self._check_slot_args()
418
419 ensure_disjoint_device_lists(self.args.devices,
420 self.args.db_devices,
421 self.args.wal_devices,
422 self.args.journal_devices)
423
424 plan = self.get_plan(self.args)
11fdf7f2
TL
425
426 if self.args.report:
f91f0fd5
TL
427 self.report(plan)
428 return 0
11fdf7f2 429
f91f0fd5
TL
430 if not self.args.yes:
431 self.report(plan)
432 terminal.info('The above OSDs would be created if the operation continues')
433 if not prompt_bool('do you want to proceed? (yes/no)'):
434 terminal.error('aborting OSD provisioning')
435 raise SystemExit(0)
11fdf7f2 436
f91f0fd5
TL
437 self._execute(plan)
438
439 def _execute(self, plan):
440 defaults = common.get_default_args()
441 global_args = [
442 'bluestore',
443 'filestore',
444 'dmcrypt',
445 'crush_device_class',
446 'no_systemd',
447 ]
448 defaults.update({arg: getattr(self.args, arg) for arg in global_args})
449 for osd in plan:
450 args = osd.get_args(defaults)
451 if self.args.prepare:
452 p = Prepare([])
453 p.safe_prepare(argparse.Namespace(**args))
454 else:
455 c = Create([])
456 c.create(argparse.Namespace(**args))
457
458
459 def get_plan(self, args):
460 if args.bluestore:
461 plan = self.get_deployment_layout(args, args.devices, args.db_devices,
462 args.wal_devices)
463 elif args.filestore:
464 plan = self.get_deployment_layout(args, args.devices, args.journal_devices)
465 return plan
466
467 def get_deployment_layout(self, args, devices, fast_devices=[],
468 very_fast_devices=[]):
469 '''
470 The methods here are mostly just organization, error reporting and
471 setting up of (default) args. The heavy lifting code for the deployment
472 layout can be found in the static get_*_osds and get_*_fast_allocs
473 functions.
474 '''
475 plan = []
476 phys_devs, lvm_devs = separate_devices_from_lvs(devices)
477 mlogger.debug(('passed data devices: {} physical,'
478 ' {} LVM').format(len(phys_devs), len(lvm_devs)))
479
480 plan.extend(get_physical_osds(phys_devs, args))
481
482 plan.extend(get_lvm_osds(lvm_devs, args))
483
484 num_osds = len(plan)
485 if num_osds == 0:
486 mlogger.info('All data devices are unavailable')
487 return plan
488 requested_osds = args.osds_per_device * len(phys_devs) + len(lvm_devs)
489
490 fast_type = 'block_db' if args.bluestore else 'journal'
491 fast_allocations = self.fast_allocations(fast_devices,
492 requested_osds,
493 num_osds,
494 fast_type)
495 if fast_devices and not fast_allocations:
496 mlogger.info('{} fast devices were passed, but none are available'.format(len(fast_devices)))
497 return []
498 if fast_devices and not len(fast_allocations) == num_osds:
499 mlogger.error('{} fast allocations != {} num_osds'.format(
500 len(fast_allocations), num_osds))
501 exit(1)
502
503 very_fast_allocations = self.fast_allocations(very_fast_devices,
504 requested_osds,
505 num_osds,
506 'block_wal')
507 if very_fast_devices and not very_fast_allocations:
508 mlogger.info('{} very fast devices were passed, but none are available'.format(len(very_fast_devices)))
509 return []
510 if very_fast_devices and not len(very_fast_allocations) == num_osds:
511 mlogger.error('{} very fast allocations != {} num_osds'.format(
512 len(very_fast_allocations), num_osds))
513 exit(1)
514
515 for osd in plan:
516 if fast_devices:
517 osd.add_fast_device(*fast_allocations.pop(),
518 type_=fast_type)
519 if very_fast_devices and args.bluestore:
520 osd.add_very_fast_device(*very_fast_allocations.pop())
521 return plan
522
523 def fast_allocations(self, devices, requested_osds, new_osds, type_):
524 ret = []
525 if not devices:
526 return ret
527 phys_devs, lvm_devs = separate_devices_from_lvs(devices)
528 mlogger.debug(('passed {} devices: {} physical,'
529 ' {} LVM').format(type_, len(phys_devs), len(lvm_devs)))
530
531 ret.extend(get_lvm_fast_allocs(lvm_devs))
532
533 # fill up uneven distributions across fast devices: 5 osds and 2 fast
534 # devices? create 3 slots on each device rather then deploying
535 # heterogeneous osds
522d829b
TL
536 slot_divider = max(1, len(phys_devs))
537 if (requested_osds - len(lvm_devs)) % slot_divider:
538 fast_slots_per_device = int((requested_osds - len(lvm_devs)) / slot_divider) + 1
f91f0fd5 539 else:
522d829b 540 fast_slots_per_device = int((requested_osds - len(lvm_devs)) / slot_divider)
f91f0fd5
TL
541
542
543 ret.extend(get_physical_fast_allocs(phys_devs,
544 type_,
545 fast_slots_per_device,
546 new_osds,
547 self.args))
548 return ret
549
550 class OSD(object):
551 '''
552 This class simply stores info about to-be-deployed OSDs and provides an
553 easy way to retrieve the necessary create arguments.
554 '''
555 VolSpec = namedtuple('VolSpec',
556 ['path',
557 'rel_size',
558 'abs_size',
559 'slots',
560 'type_'])
561
562 def __init__(self,
563 data_path,
564 rel_size,
565 abs_size,
566 slots,
567 id_,
568 encryption):
569 self.id_ = id_
570 self.data = self.VolSpec(path=data_path,
571 rel_size=rel_size,
572 abs_size=abs_size,
573 slots=slots,
574 type_='data')
575 self.fast = None
576 self.very_fast = None
577 self.encryption = encryption
578
579 def add_fast_device(self, path, rel_size, abs_size, slots, type_):
580 self.fast = self.VolSpec(path=path,
581 rel_size=rel_size,
582 abs_size=abs_size,
583 slots=slots,
584 type_=type_)
585
586 def add_very_fast_device(self, path, rel_size, abs_size, slots):
587 self.very_fast = self.VolSpec(path=path,
588 rel_size=rel_size,
589 abs_size=abs_size,
590 slots=slots,
591 type_='block_wal')
592
593 def _get_osd_plan(self):
594 plan = {
595 'data': self.data.path,
596 'data_size': self.data.abs_size,
597 'encryption': self.encryption,
598 }
599 if self.fast:
600 type_ = self.fast.type_.replace('.', '_')
601 plan.update(
602 {
603 type_: self.fast.path,
604 '{}_size'.format(type_): self.fast.abs_size,
605 })
606 if self.very_fast:
607 plan.update(
608 {
609 'block_wal': self.very_fast.path,
610 'block_wal_size': self.very_fast.abs_size,
611 })
612 if self.id_:
613 plan.update({'osd_id': self.id_})
614 return plan
615
616 def get_args(self, defaults):
617 my_defaults = defaults.copy()
618 my_defaults.update(self._get_osd_plan())
619 return my_defaults
620
621 def report(self):
622 report = ''
623 if self.id_:
624 report += templates.osd_reused_id.format(
625 id_=self.id_)
626 if self.encryption:
627 report += templates.osd_encryption.format(
628 enc=self.encryption)
629 report += templates.osd_component.format(
630 _type=self.data.type_,
631 path=self.data.path,
632 size=self.data.abs_size,
633 percent=self.data.rel_size)
634 if self.fast:
635 report += templates.osd_component.format(
636 _type=self.fast.type_,
637 path=self.fast.path,
638 size=self.fast.abs_size,
639 percent=self.fast.rel_size)
640 if self.very_fast:
641 report += templates.osd_component.format(
642 _type=self.very_fast.type_,
643 path=self.very_fast.path,
644 size=self.very_fast.abs_size,
645 percent=self.very_fast.rel_size)
646 return report
647
648 def report_json(self):
649 # cast all values to string so that the report can be dumped in to
650 # json.dumps
651 return {k: str(v) for k, v in self._get_osd_plan().items()}