]> git.proxmox.com Git - ceph.git/blob - ceph/src/ceph-volume/ceph_volume/api/lvm.py
import ceph octopus 15.2.17
[ceph.git] / ceph / src / ceph-volume / ceph_volume / api / lvm.py
1 """
2 API for CRUD lvm tag operations. Follows the Ceph LVM tag naming convention
3 that prefixes tags with ``ceph.`` and uses ``=`` for assignment, and provides
4 set of utilities for interacting with LVM.
5 """
6 import logging
7 import os
8 import uuid
9 from itertools import repeat
10 from math import floor
11 from ceph_volume import process, util, conf
12 from ceph_volume.exceptions import SizeAllocationError
13
14 logger = logging.getLogger(__name__)
15
16
17 def convert_filters_to_str(filters):
18 """
19 Convert filter args from dictionary to following format -
20 filters={filter_name=filter_val,...}
21 """
22 if not filters:
23 return filters
24
25 filter_arg = ''
26 for k, v in filters.items():
27 filter_arg += k + '=' + v + ','
28 # get rid of extra comma at the end
29 filter_arg = filter_arg[:len(filter_arg) - 1]
30
31 return filter_arg
32
33
34 def convert_tags_to_str(tags):
35 """
36 Convert tags from dictionary to following format -
37 tags={tag_name=tag_val,...}
38 """
39 if not tags:
40 return tags
41
42 tag_arg = 'tags={'
43 for k, v in tags.items():
44 tag_arg += k + '=' + v + ','
45 # get rid of extra comma at the end
46 tag_arg = tag_arg[:len(tag_arg) - 1] + '}'
47
48 return tag_arg
49
50
51 def make_filters_lvmcmd_ready(filters, tags):
52 """
53 Convert filters (including tags) from dictionary to following format -
54 filter_name=filter_val...,tags={tag_name=tag_val,...}
55
56 The command will look as follows =
57 lvs -S filter_name=filter_val...,tags={tag_name=tag_val,...}
58 """
59 filters = convert_filters_to_str(filters)
60 tags = convert_tags_to_str(tags)
61
62 if filters and tags:
63 return filters + ',' + tags
64 if filters and not tags:
65 return filters
66 if not filters and tags:
67 return tags
68 else:
69 return ''
70
71
72 def _output_parser(output, fields):
73 """
74 Newer versions of LVM allow ``--reportformat=json``, but older versions,
75 like the one included in Xenial do not. LVM has the ability to filter and
76 format its output so we assume the output will be in a format this parser
77 can handle (using ';' as a delimiter)
78
79 :param fields: A string, possibly using ',' to group many items, as it
80 would be used on the CLI
81 :param output: The CLI output from the LVM call
82 """
83 field_items = fields.split(',')
84 report = []
85 for line in output:
86 # clear the leading/trailing whitespace
87 line = line.strip()
88
89 # remove the extra '"' in each field
90 line = line.replace('"', '')
91
92 # prevent moving forward with empty contents
93 if not line:
94 continue
95
96 # splitting on ';' because that is what the lvm call uses as
97 # '--separator'
98 output_items = [i.strip() for i in line.split(';')]
99 # map the output to the fields
100 report.append(
101 dict(zip(field_items, output_items))
102 )
103
104 return report
105
106
107 def _splitname_parser(line):
108 """
109 Parses the output from ``dmsetup splitname``, that should contain prefixes
110 (--nameprefixes) and set the separator to ";"
111
112 Output for /dev/mapper/vg-lv will usually look like::
113
114 DM_VG_NAME='/dev/mapper/vg';DM_LV_NAME='lv';DM_LV_LAYER=''
115
116
117 The ``VG_NAME`` will usually not be what other callers need (e.g. just 'vg'
118 in the example), so this utility will split ``/dev/mapper/`` out, so that
119 the actual volume group name is kept
120
121 :returns: dictionary with stripped prefixes
122 """
123 parsed = {}
124 try:
125 parts = line[0].split(';')
126 except IndexError:
127 logger.exception('Unable to parse mapper device: %s', line)
128 return parsed
129
130 for part in parts:
131 part = part.replace("'", '')
132 key, value = part.split('=')
133 if 'DM_VG_NAME' in key:
134 value = value.split('/dev/mapper/')[-1]
135 key = key.split('DM_')[-1]
136 parsed[key] = value
137
138 return parsed
139
140
141 def sizing(device_size, parts=None, size=None):
142 """
143 Calculate proper sizing to fully utilize the volume group in the most
144 efficient way possible. To prevent situations where LVM might accept
145 a percentage that is beyond the vg's capabilities, it will refuse with
146 an error when requesting a larger-than-possible parameter, in addition
147 to rounding down calculations.
148
149 A dictionary with different sizing parameters is returned, to make it
150 easier for others to choose what they need in order to create logical
151 volumes::
152
153 >>> sizing(100, parts=2)
154 >>> {'parts': 2, 'percentages': 50, 'sizes': 50}
155
156 """
157 if parts is not None and size is not None:
158 raise ValueError(
159 "Cannot process sizing with both parts (%s) and size (%s)" % (parts, size)
160 )
161
162 if size and size > device_size:
163 raise SizeAllocationError(size, device_size)
164
165 def get_percentage(parts):
166 return int(floor(100 / float(parts)))
167
168 if parts is not None:
169 # Prevent parts being 0, falling back to 1 (100% usage)
170 parts = parts or 1
171 percentages = get_percentage(parts)
172
173 if size:
174 parts = int(device_size / size) or 1
175 percentages = get_percentage(parts)
176
177 sizes = device_size / parts if parts else int(floor(device_size))
178
179 return {
180 'parts': parts,
181 'percentages': percentages,
182 'sizes': int(sizes/1024/1024/1024),
183 }
184
185
186 def parse_tags(lv_tags):
187 """
188 Return a dictionary mapping of all the tags associated with
189 a Volume from the comma-separated tags coming from the LVM API
190
191 Input look like::
192
193 "ceph.osd_fsid=aaa-fff-bbbb,ceph.osd_id=0"
194
195 For the above example, the expected return value would be::
196
197 {
198 "ceph.osd_fsid": "aaa-fff-bbbb",
199 "ceph.osd_id": "0"
200 }
201 """
202 if not lv_tags:
203 return {}
204 tag_mapping = {}
205 tags = lv_tags.split(',')
206 for tag_assignment in tags:
207 if not tag_assignment.startswith('ceph.'):
208 continue
209 key, value = tag_assignment.split('=', 1)
210 tag_mapping[key] = value
211
212 return tag_mapping
213
214
215 def _vdo_parents(devices):
216 """
217 It is possible we didn't get a logical volume, or a mapper path, but
218 a device like /dev/sda2, to resolve this, we must look at all the slaves of
219 every single device in /sys/block and if any of those devices is related to
220 VDO devices, then we can add the parent
221 """
222 parent_devices = []
223 for parent in os.listdir('/sys/block'):
224 for slave in os.listdir('/sys/block/%s/slaves' % parent):
225 if slave in devices:
226 parent_devices.append('/dev/%s' % parent)
227 parent_devices.append(parent)
228 return parent_devices
229
230
231 def _vdo_slaves(vdo_names):
232 """
233 find all the slaves associated with each vdo name (from realpath) by going
234 into /sys/block/<realpath>/slaves
235 """
236 devices = []
237 for vdo_name in vdo_names:
238 mapper_path = '/dev/mapper/%s' % vdo_name
239 if not os.path.exists(mapper_path):
240 continue
241 # resolve the realpath and realname of the vdo mapper
242 vdo_realpath = os.path.realpath(mapper_path)
243 vdo_realname = vdo_realpath.split('/')[-1]
244 slaves_path = '/sys/block/%s/slaves' % vdo_realname
245 if not os.path.exists(slaves_path):
246 continue
247 devices.append(vdo_realpath)
248 devices.append(mapper_path)
249 devices.append(vdo_realname)
250 for slave in os.listdir(slaves_path):
251 devices.append('/dev/%s' % slave)
252 devices.append(slave)
253 return devices
254
255
256 def _is_vdo(path):
257 """
258 A VDO device can be composed from many different devices, go through each
259 one of those devices and its slaves (if any) and correlate them back to
260 /dev/mapper and their realpaths, and then check if they appear as part of
261 /sys/kvdo/<name>/statistics
262
263 From the realpath of a logical volume, determine if it is a VDO device or
264 not, by correlating it to the presence of the name in
265 /sys/kvdo/<name>/statistics and all the previously captured devices
266 """
267 if not os.path.isdir('/sys/kvdo'):
268 return False
269 realpath = os.path.realpath(path)
270 realpath_name = realpath.split('/')[-1]
271 devices = []
272 vdo_names = set()
273 # get all the vdo names
274 for dirname in os.listdir('/sys/kvdo/'):
275 if os.path.isdir('/sys/kvdo/%s/statistics' % dirname):
276 vdo_names.add(dirname)
277
278 # find all the slaves associated with each vdo name (from realpath) by
279 # going into /sys/block/<realpath>/slaves
280 devices.extend(_vdo_slaves(vdo_names))
281
282 # Find all possible parents, looking into slaves that are related to VDO
283 devices.extend(_vdo_parents(devices))
284
285 return any([
286 path in devices,
287 realpath in devices,
288 realpath_name in devices])
289
290
291 def is_vdo(path):
292 """
293 Detect if a path is backed by VDO, proxying the actual call to _is_vdo so
294 that we can prevent an exception breaking OSD creation. If an exception is
295 raised, it will get captured and logged to file, while returning
296 a ``False``.
297 """
298 try:
299 if _is_vdo(path):
300 return '1'
301 return '0'
302 except Exception:
303 logger.exception('Unable to properly detect device as VDO: %s', path)
304 return '0'
305
306
307 def dmsetup_splitname(dev):
308 """
309 Run ``dmsetup splitname`` and parse the results.
310
311 .. warning:: This call does not ensure that the device is correct or that
312 it exists. ``dmsetup`` will happily take a non existing path and still
313 return a 0 exit status.
314 """
315 command = [
316 'dmsetup', 'splitname', '--noheadings',
317 "--separator=';'", '--nameprefixes', dev
318 ]
319 out, err, rc = process.call(command)
320 return _splitname_parser(out)
321
322
323 def is_ceph_device(lv):
324 try:
325 lv.tags['ceph.osd_id']
326 except (KeyError, AttributeError):
327 logger.warning('device is not part of ceph: %s', lv)
328 return False
329
330 if lv.tags['ceph.osd_id'] == 'null':
331 return False
332 else:
333 return True
334
335
336 ####################################
337 #
338 # Code for LVM Physical Volumes
339 #
340 ################################
341
342 PV_FIELDS = 'pv_name,pv_tags,pv_uuid,vg_name,lv_uuid'
343
344 class PVolume(object):
345 """
346 Represents a Physical Volume from LVM, with some top-level attributes like
347 ``pv_name`` and parsed tags as a dictionary of key/value pairs.
348 """
349
350 def __init__(self, **kw):
351 for k, v in kw.items():
352 setattr(self, k, v)
353 self.pv_api = kw
354 self.name = kw['pv_name']
355 self.tags = parse_tags(kw['pv_tags'])
356
357 def __str__(self):
358 return '<%s>' % self.pv_api['pv_name']
359
360 def __repr__(self):
361 return self.__str__()
362
363 def set_tags(self, tags):
364 """
365 :param tags: A dictionary of tag names and values, like::
366
367 {
368 "ceph.osd_fsid": "aaa-fff-bbbb",
369 "ceph.osd_id": "0"
370 }
371
372 At the end of all modifications, the tags are refreshed to reflect
373 LVM's most current view.
374 """
375 for k, v in tags.items():
376 self.set_tag(k, v)
377 # after setting all the tags, refresh them for the current object, use the
378 # pv_* identifiers to filter because those shouldn't change
379 pv_object = self.get_single_pv(filter={'pv_name': self.pv_name,
380 'pv_uuid': self.pv_uuid})
381
382 if not pv_object:
383 raise RuntimeError('No PV was found.')
384
385 self.tags = pv_object.tags
386
387 def set_tag(self, key, value):
388 """
389 Set the key/value pair as an LVM tag. Does not "refresh" the values of
390 the current object for its tags. Meant to be a "fire and forget" type
391 of modification.
392
393 **warning**: Altering tags on a PV has to be done ensuring that the
394 device is actually the one intended. ``pv_name`` is *not* a persistent
395 value, only ``pv_uuid`` is. Using ``pv_uuid`` is the best way to make
396 sure the device getting changed is the one needed.
397 """
398 # remove it first if it exists
399 if self.tags.get(key):
400 current_value = self.tags[key]
401 tag = "%s=%s" % (key, current_value)
402 process.call(['pvchange', '--deltag', tag, self.pv_name], run_on_host=True)
403
404 process.call(
405 [
406 'pvchange',
407 '--addtag', '%s=%s' % (key, value), self.pv_name
408 ],
409 run_on_host=True
410 )
411
412
413 def create_pv(device):
414 """
415 Create a physical volume from a device, useful when devices need to be later mapped
416 to journals.
417 """
418 process.run([
419 'pvcreate',
420 '-v', # verbose
421 '-f', # force it
422 '--yes', # answer yes to any prompts
423 device
424 ], run_on_host=True)
425
426
427 def remove_pv(pv_name):
428 """
429 Removes a physical volume using a double `-f` to prevent prompts and fully
430 remove anything related to LVM. This is tremendously destructive, but so is all other actions
431 when zapping a device.
432
433 In the case where multiple PVs are found, it will ignore that fact and
434 continue with the removal, specifically in the case of messages like::
435
436 WARNING: PV $UUID /dev/DEV-1 was already found on /dev/DEV-2
437
438 These situations can be avoided with custom filtering rules, which this API
439 cannot handle while accommodating custom user filters.
440 """
441 fail_msg = "Unable to remove vg %s" % pv_name
442 process.run(
443 [
444 'pvremove',
445 '-v', # verbose
446 '-f', # force it
447 '-f', # force it
448 pv_name
449 ],
450 run_on_host=True,
451 fail_msg=fail_msg,
452 )
453
454
455 def get_pvs(fields=PV_FIELDS, filters='', tags=None):
456 """
457 Return a list of PVs that are available on the system and match the
458 filters and tags passed. Argument filters takes a dictionary containing
459 arguments required by -S option of LVM. Passing a list of LVM tags can be
460 quite tricky to pass as a dictionary within dictionary, therefore pass
461 dictionary of tags via tags argument and tricky part will be taken care of
462 by the helper methods.
463
464 :param fields: string containing list of fields to be displayed by the
465 pvs command
466 :param sep: string containing separator to be used between two fields
467 :param filters: dictionary containing LVM filters
468 :param tags: dictionary containng LVM tags
469 :returns: list of class PVolume object representing pvs on the system
470 """
471 filters = make_filters_lvmcmd_ready(filters, tags)
472 args = ['pvs', '--noheadings', '--readonly', '--separator=";"', '-S',
473 filters, '-o', fields]
474
475 stdout, stderr, returncode = process.call(args, run_on_host=True, verbose_on_failure=False)
476 pvs_report = _output_parser(stdout, fields)
477 return [PVolume(**pv_report) for pv_report in pvs_report]
478
479
480 def get_single_pv(fields=PV_FIELDS, filters=None, tags=None):
481 """
482 Wrapper of get_pvs() meant to be a convenience method to avoid the phrase::
483 pvs = get_pvs()
484 if len(pvs) >= 1:
485 pv = pvs[0]
486 """
487 pvs = get_pvs(fields=fields, filters=filters, tags=tags)
488
489 if len(pvs) == 0:
490 return None
491 if len(pvs) > 1:
492 raise RuntimeError('Filters {} matched more than 1 PV present on this host.'.format(str(filters)))
493
494 return pvs[0]
495
496
497 ################################
498 #
499 # Code for LVM Volume Groups
500 #
501 #############################
502
503 VG_FIELDS = 'vg_name,pv_count,lv_count,vg_attr,vg_extent_count,vg_free_count,vg_extent_size'
504 VG_CMD_OPTIONS = ['--noheadings', '--readonly', '--units=b', '--nosuffix', '--separator=";"']
505
506
507 class VolumeGroup(object):
508 """
509 Represents an LVM group, with some top-level attributes like ``vg_name``
510 """
511
512 def __init__(self, **kw):
513 for k, v in kw.items():
514 setattr(self, k, v)
515 self.name = kw['vg_name']
516 if not self.name:
517 raise ValueError('VolumeGroup must have a non-empty name')
518 self.tags = parse_tags(kw.get('vg_tags', ''))
519
520 def __str__(self):
521 return '<%s>' % self.name
522
523 def __repr__(self):
524 return self.__str__()
525
526 @property
527 def free(self):
528 """
529 Return free space in VG in bytes
530 """
531 return int(self.vg_extent_size) * int(self.vg_free_count)
532
533 @property
534 def free_percent(self):
535 """
536 Return free space in VG in bytes
537 """
538 return int(self.vg_free_count) / int(self.vg_extent_count)
539
540 @property
541 def size(self):
542 """
543 Returns VG size in bytes
544 """
545 return int(self.vg_extent_size) * int(self.vg_extent_count)
546
547 def sizing(self, parts=None, size=None):
548 """
549 Calculate proper sizing to fully utilize the volume group in the most
550 efficient way possible. To prevent situations where LVM might accept
551 a percentage that is beyond the vg's capabilities, it will refuse with
552 an error when requesting a larger-than-possible parameter, in addition
553 to rounding down calculations.
554
555 A dictionary with different sizing parameters is returned, to make it
556 easier for others to choose what they need in order to create logical
557 volumes::
558
559 >>> data_vg.free
560 1024
561 >>> data_vg.sizing(parts=4)
562 {'parts': 4, 'sizes': 256, 'percentages': 25}
563 >>> data_vg.sizing(size=512)
564 {'parts': 2, 'sizes': 512, 'percentages': 50}
565
566
567 :param parts: Number of parts to create LVs from
568 :param size: Size in gigabytes to divide the VG into
569
570 :raises SizeAllocationError: When requested size cannot be allocated with
571 :raises ValueError: If both ``parts`` and ``size`` are given
572 """
573 if parts is not None and size is not None:
574 raise ValueError(
575 "Cannot process sizing with both parts (%s) and size (%s)" % (parts, size)
576 )
577
578 # if size is given we need to map that to extents so that we avoid
579 # issues when trying to get this right with a size in gigabytes find
580 # the percentage first, cheating, because these values are thrown out
581 vg_free_count = util.str_to_int(self.vg_free_count)
582
583 if size:
584 size = size * 1024 * 1024 * 1024
585 extents = int(size / int(self.vg_extent_size))
586 disk_sizing = sizing(self.free, size=size, parts=parts)
587 else:
588 if parts is not None:
589 # Prevent parts being 0, falling back to 1 (100% usage)
590 parts = parts or 1
591 size = int(self.free / parts)
592 extents = size * vg_free_count / self.free
593 disk_sizing = sizing(self.free, parts=parts)
594
595 extent_sizing = sizing(vg_free_count, size=extents)
596
597 disk_sizing['extents'] = int(extents)
598 disk_sizing['percentages'] = extent_sizing['percentages']
599 return disk_sizing
600
601 def bytes_to_extents(self, size):
602 '''
603 Return a how many free extents we can fit into a size in bytes. This has
604 some uncertainty involved. If size/extent_size is within 1% of the
605 actual free extents we will return the extent count, otherwise we'll
606 throw an error.
607 This accomodates for the size calculation in batch. We need to report
608 the OSD layout but have not yet created any LVM structures. We use the
609 disk size in batch if no VG is present and that will overshoot the
610 actual free_extent count due to LVM overhead.
611
612 '''
613 b_to_ext = int(size / int(self.vg_extent_size))
614 if b_to_ext < int(self.vg_free_count):
615 # return bytes in extents if there is more space
616 return b_to_ext
617 elif b_to_ext / int(self.vg_free_count) - 1 < 0.01:
618 # return vg_fre_count if its less then 1% off
619 logger.info(
620 'bytes_to_extents results in {} but only {} '
621 'are available, adjusting the latter'.format(b_to_ext,
622 self.vg_free_count))
623 return int(self.vg_free_count)
624 # else raise an exception
625 raise RuntimeError('Can\'t convert {} to free extents, only {} ({} '
626 'bytes) are free'.format(size, self.vg_free_count,
627 self.free))
628
629 def slots_to_extents(self, slots):
630 '''
631 Return how many extents fit the VG slot times
632 '''
633 return int(int(self.vg_extent_count) / slots)
634
635
636 def create_vg(devices, name=None, name_prefix=None):
637 """
638 Create a Volume Group. Command looks like::
639
640 vgcreate --force --yes group_name device
641
642 Once created the volume group is returned as a ``VolumeGroup`` object
643
644 :param devices: A list of devices to create a VG. Optionally, a single
645 device (as a string) can be used.
646 :param name: Optionally set the name of the VG, defaults to 'ceph-{uuid}'
647 :param name_prefix: Optionally prefix the name of the VG, which will get combined
648 with a UUID string
649 """
650 if isinstance(devices, set):
651 devices = list(devices)
652 if not isinstance(devices, list):
653 devices = [devices]
654 if name_prefix:
655 name = "%s-%s" % (name_prefix, str(uuid.uuid4()))
656 elif name is None:
657 name = "ceph-%s" % str(uuid.uuid4())
658 process.run([
659 'vgcreate',
660 '--force',
661 '--yes',
662 name] + devices,
663 run_on_host=True
664 )
665
666 return get_single_vg(filters={'vg_name': name})
667
668
669 def extend_vg(vg, devices):
670 """
671 Extend a Volume Group. Command looks like::
672
673 vgextend --force --yes group_name [device, ...]
674
675 Once created the volume group is extended and returned as a ``VolumeGroup`` object
676
677 :param vg: A VolumeGroup object
678 :param devices: A list of devices to extend the VG. Optionally, a single
679 device (as a string) can be used.
680 """
681 if not isinstance(devices, list):
682 devices = [devices]
683 process.run([
684 'vgextend',
685 '--force',
686 '--yes',
687 vg.name] + devices,
688 run_on_host=True
689 )
690
691 return get_single_vg(filters={'vg_name': vg.name})
692
693
694 def reduce_vg(vg, devices):
695 """
696 Reduce a Volume Group. Command looks like::
697
698 vgreduce --force --yes group_name [device, ...]
699
700 :param vg: A VolumeGroup object
701 :param devices: A list of devices to remove from the VG. Optionally, a
702 single device (as a string) can be used.
703 """
704 if not isinstance(devices, list):
705 devices = [devices]
706 process.run([
707 'vgreduce',
708 '--force',
709 '--yes',
710 vg.name] + devices,
711 run_on_host=True
712 )
713
714 return get_single_vg(filter={'vg_name': vg.name})
715
716
717 def remove_vg(vg_name):
718 """
719 Removes a volume group.
720 """
721 if not vg_name:
722 logger.warning('Skipping removal of invalid VG name: "%s"', vg_name)
723 return
724 fail_msg = "Unable to remove vg %s" % vg_name
725 process.run(
726 [
727 'vgremove',
728 '-v', # verbose
729 '-f', # force it
730 vg_name
731 ],
732 run_on_host=True,
733 fail_msg=fail_msg,
734 )
735
736
737 def get_vgs(fields=VG_FIELDS, filters='', tags=None):
738 """
739 Return a list of VGs that are available on the system and match the
740 filters and tags passed. Argument filters takes a dictionary containing
741 arguments required by -S option of LVM. Passing a list of LVM tags can be
742 quite tricky to pass as a dictionary within dictionary, therefore pass
743 dictionary of tags via tags argument and tricky part will be taken care of
744 by the helper methods.
745
746 :param fields: string containing list of fields to be displayed by the
747 vgs command
748 :param sep: string containing separator to be used between two fields
749 :param filters: dictionary containing LVM filters
750 :param tags: dictionary containng LVM tags
751 :returns: list of class VolumeGroup object representing vgs on the system
752 """
753 filters = make_filters_lvmcmd_ready(filters, tags)
754 args = ['vgs'] + VG_CMD_OPTIONS + ['-S', filters, '-o', fields]
755
756 stdout, stderr, returncode = process.call(args, run_on_host=True, verbose_on_failure=False)
757 vgs_report =_output_parser(stdout, fields)
758 return [VolumeGroup(**vg_report) for vg_report in vgs_report]
759
760
761 def get_single_vg(fields=VG_FIELDS, filters=None, tags=None):
762 """
763 Wrapper of get_vgs() meant to be a convenience method to avoid the phrase::
764 vgs = get_vgs()
765 if len(vgs) >= 1:
766 vg = vgs[0]
767 """
768 vgs = get_vgs(fields=fields, filters=filters, tags=tags)
769
770 if len(vgs) == 0:
771 return None
772 if len(vgs) > 1:
773 raise RuntimeError('Filters {} matched more than 1 VG present on this host.'.format(str(filters)))
774
775 return vgs[0]
776
777
778 def get_device_vgs(device, name_prefix=''):
779 stdout, stderr, returncode = process.call(
780 ['pvs'] + VG_CMD_OPTIONS + ['-o', VG_FIELDS, device],
781 run_on_host=True,
782 verbose_on_failure=False
783 )
784 vgs = _output_parser(stdout, VG_FIELDS)
785 return [VolumeGroup(**vg) for vg in vgs if vg['vg_name'] and vg['vg_name'].startswith(name_prefix)]
786
787
788 #################################
789 #
790 # Code for LVM Logical Volumes
791 #
792 ###############################
793
794 LV_FIELDS = 'lv_tags,lv_path,lv_name,vg_name,lv_uuid,lv_size'
795 LV_CMD_OPTIONS = ['--noheadings', '--readonly', '--separator=";"', '-a',
796 '--units=b', '--nosuffix']
797
798
799 class Volume(object):
800 """
801 Represents a Logical Volume from LVM, with some top-level attributes like
802 ``lv_name`` and parsed tags as a dictionary of key/value pairs.
803 """
804
805 def __init__(self, **kw):
806 for k, v in kw.items():
807 setattr(self, k, v)
808 self.lv_api = kw
809 self.name = kw['lv_name']
810 if not self.name:
811 raise ValueError('Volume must have a non-empty name')
812 self.tags = parse_tags(kw['lv_tags'])
813 self.encrypted = self.tags.get('ceph.encrypted', '0') == '1'
814 self.used_by_ceph = 'ceph.osd_id' in self.tags
815
816 def __str__(self):
817 return '<%s>' % self.lv_api['lv_path']
818
819 def __repr__(self):
820 return self.__str__()
821
822 def as_dict(self):
823 obj = {}
824 obj.update(self.lv_api)
825 obj['tags'] = self.tags
826 obj['name'] = self.name
827 obj['type'] = self.tags['ceph.type']
828 obj['path'] = self.lv_path
829 return obj
830
831 def report(self):
832 if not self.used_by_ceph:
833 return {
834 'name': self.lv_name,
835 'comment': 'not used by ceph'
836 }
837 else:
838 type_ = self.tags['ceph.type']
839 report = {
840 'name': self.lv_name,
841 'osd_id': self.tags['ceph.osd_id'],
842 'cluster_name': self.tags.get('ceph.cluster_name', conf.cluster),
843 'type': type_,
844 'osd_fsid': self.tags['ceph.osd_fsid'],
845 'cluster_fsid': self.tags['ceph.cluster_fsid'],
846 'osdspec_affinity': self.tags.get('ceph.osdspec_affinity', ''),
847 }
848 type_uuid = '{}_uuid'.format(type_)
849 report[type_uuid] = self.tags['ceph.{}'.format(type_uuid)]
850 return report
851
852 def _format_tag_args(self, op, tags):
853 tag_args = ['{}={}'.format(k, v) for k, v in tags.items()]
854 # weird but efficient way of ziping two lists and getting a flat list
855 return list(sum(zip(repeat(op), tag_args), ()))
856
857 def clear_tags(self, keys=None):
858 """
859 Removes all or passed tags from the Logical Volume.
860 """
861 if not keys:
862 keys = self.tags.keys()
863
864 del_tags = {k: self.tags[k] for k in keys if k in self.tags}
865 if not del_tags:
866 # nothing to clear
867 return
868 del_tag_args = self._format_tag_args('--deltag', del_tags)
869 # --deltag returns successful even if the to be deleted tag is not set
870 process.call(['lvchange'] + del_tag_args + [self.lv_path], run_on_host=True)
871 for k in del_tags.keys():
872 del self.tags[k]
873
874
875 def set_tags(self, tags):
876 """
877 :param tags: A dictionary of tag names and values, like::
878
879 {
880 "ceph.osd_fsid": "aaa-fff-bbbb",
881 "ceph.osd_id": "0"
882 }
883
884 At the end of all modifications, the tags are refreshed to reflect
885 LVM's most current view.
886 """
887 self.clear_tags(tags.keys())
888 add_tag_args = self._format_tag_args('--addtag', tags)
889 process.call(['lvchange'] + add_tag_args + [self.lv_path], run_on_host=True)
890 for k, v in tags.items():
891 self.tags[k] = v
892
893
894 def clear_tag(self, key):
895 if self.tags.get(key):
896 current_value = self.tags[key]
897 tag = "%s=%s" % (key, current_value)
898 process.call(['lvchange', '--deltag', tag, self.lv_path], run_on_host=True)
899 del self.tags[key]
900
901
902 def set_tag(self, key, value):
903 """
904 Set the key/value pair as an LVM tag.
905 """
906 # remove it first if it exists
907 self.clear_tag(key)
908
909 process.call(
910 [
911 'lvchange',
912 '--addtag', '%s=%s' % (key, value), self.lv_path
913 ],
914 run_on_host=True
915 )
916 self.tags[key] = value
917
918 def deactivate(self):
919 """
920 Deactivate the LV by calling lvchange -an
921 """
922 process.call(['lvchange', '-an', self.lv_path], run_on_host=True)
923
924
925 def create_lv(name_prefix,
926 uuid,
927 vg=None,
928 device=None,
929 slots=None,
930 extents=None,
931 size=None,
932 tags=None):
933 """
934 Create a Logical Volume in a Volume Group. Command looks like::
935
936 lvcreate -L 50G -n gfslv vg0
937
938 ``name_prefix`` is required. If ``size`` is provided its expected to be a
939 byte count. Tags are an optional dictionary and is expected to
940 conform to the convention of prefixing them with "ceph." like::
941
942 {"ceph.block_device": "/dev/ceph/osd-1"}
943
944 :param name_prefix: name prefix for the LV, typically somehting like ceph-osd-block
945 :param uuid: UUID to ensure uniqueness; is combined with name_prefix to
946 form the LV name
947 :param vg: optional, pass an existing VG to create LV
948 :param device: optional, device to use. Either device of vg must be passed
949 :param slots: optional, number of slots to divide vg up, LV will occupy one
950 one slot if enough space is available
951 :param extends: optional, how many lvm extends to use, supersedes slots
952 :param size: optional, target LV size in bytes, supersedes extents,
953 resulting LV might be smaller depending on extent
954 size of the underlying VG
955 :param tags: optional, a dict of lvm tags to set on the LV
956 """
957 name = '{}-{}'.format(name_prefix, uuid)
958 if not vg:
959 if not device:
960 raise RuntimeError("Must either specify vg or device, none given")
961 # check if a vgs starting with ceph already exists
962 vgs = get_device_vgs(device, 'ceph')
963 if vgs:
964 vg = vgs[0]
965 else:
966 # create on if not
967 vg = create_vg(device, name_prefix='ceph')
968 assert(vg)
969
970 if size:
971 extents = vg.bytes_to_extents(size)
972 logger.debug('size was passed: {} -> {}'.format(size, extents))
973 elif slots and not extents:
974 extents = vg.slots_to_extents(slots)
975 logger.debug('slots was passed: {} -> {}'.format(slots, extents))
976
977 if extents:
978 command = [
979 'lvcreate',
980 '--yes',
981 '-l',
982 '{}'.format(extents),
983 '-n', name, vg.vg_name
984 ]
985 # create the lv with all the space available, this is needed because the
986 # system call is different for LVM
987 else:
988 command = [
989 'lvcreate',
990 '--yes',
991 '-l',
992 '100%FREE',
993 '-n', name, vg.vg_name
994 ]
995 process.run(command, run_on_host=True)
996
997 lv = get_single_lv(filters={'lv_name': name, 'vg_name': vg.vg_name})
998
999 if tags is None:
1000 tags = {
1001 "ceph.osd_id": "null",
1002 "ceph.type": "null",
1003 "ceph.cluster_fsid": "null",
1004 "ceph.osd_fsid": "null",
1005 }
1006 # when creating a distinct type, the caller doesn't know what the path will
1007 # be so this function will set it after creation using the mapping
1008 # XXX add CEPH_VOLUME_LVM_DEBUG to enable -vvvv on lv operations
1009 type_path_tag = {
1010 'journal': 'ceph.journal_device',
1011 'data': 'ceph.data_device',
1012 'block': 'ceph.block_device',
1013 'wal': 'ceph.wal_device',
1014 'db': 'ceph.db_device',
1015 'lockbox': 'ceph.lockbox_device', # XXX might not ever need this lockbox sorcery
1016 }
1017 path_tag = type_path_tag.get(tags.get('ceph.type'))
1018 if path_tag:
1019 tags.update({path_tag: lv.lv_path})
1020
1021 lv.set_tags(tags)
1022
1023 return lv
1024
1025
1026 def create_lvs(volume_group, parts=None, size=None, name_prefix='ceph-lv'):
1027 """
1028 Create multiple Logical Volumes from a Volume Group by calculating the
1029 proper extents from ``parts`` or ``size``. A custom prefix can be used
1030 (defaults to ``ceph-lv``), these names are always suffixed with a uuid.
1031
1032 LV creation in ceph-volume will require tags, this is expected to be
1033 pre-computed by callers who know Ceph metadata like OSD IDs and FSIDs. It
1034 will probably not be the case when mass-creating LVs, so common/default
1035 tags will be set to ``"null"``.
1036
1037 .. note:: LVs that are not in use can be detected by querying LVM for tags that are
1038 set to ``"null"``.
1039
1040 :param volume_group: The volume group (vg) to use for LV creation
1041 :type group: ``VolumeGroup()`` object
1042 :param parts: Number of LVs to create *instead of* ``size``.
1043 :type parts: int
1044 :param size: Size (in gigabytes) of LVs to create, e.g. "as many 10gb LVs as possible"
1045 :type size: int
1046 :param extents: The number of LVM extents to use to create the LV. Useful if looking to have
1047 accurate LV sizes (LVM rounds sizes otherwise)
1048 """
1049 if parts is None and size is None:
1050 # fallback to just one part (using 100% of the vg)
1051 parts = 1
1052 lvs = []
1053 tags = {
1054 "ceph.osd_id": "null",
1055 "ceph.type": "null",
1056 "ceph.cluster_fsid": "null",
1057 "ceph.osd_fsid": "null",
1058 }
1059 sizing = volume_group.sizing(parts=parts, size=size)
1060 for part in range(0, sizing['parts']):
1061 size = sizing['sizes']
1062 extents = sizing['extents']
1063 lvs.append(
1064 create_lv(name_prefix, uuid.uuid4(), vg=volume_group, extents=extents, tags=tags)
1065 )
1066 return lvs
1067
1068
1069 def remove_lv(lv):
1070 """
1071 Removes a logical volume given it's absolute path.
1072
1073 Will return True if the lv is successfully removed or
1074 raises a RuntimeError if the removal fails.
1075
1076 :param lv: A ``Volume`` object or the path for an LV
1077 """
1078 if isinstance(lv, Volume):
1079 path = lv.lv_path
1080 else:
1081 path = lv
1082
1083 stdout, stderr, returncode = process.call(
1084 [
1085 'lvremove',
1086 '-v', # verbose
1087 '-f', # force it
1088 path
1089 ],
1090 run_on_host=True,
1091 show_command=True,
1092 terminal_verbose=True,
1093 )
1094 if returncode != 0:
1095 raise RuntimeError("Unable to remove %s" % path)
1096 return True
1097
1098
1099 def get_lvs(fields=LV_FIELDS, filters='', tags=None):
1100 """
1101 Return a list of LVs that are available on the system and match the
1102 filters and tags passed. Argument filters takes a dictionary containing
1103 arguments required by -S option of LVM. Passing a list of LVM tags can be
1104 quite tricky to pass as a dictionary within dictionary, therefore pass
1105 dictionary of tags via tags argument and tricky part will be taken care of
1106 by the helper methods.
1107
1108 :param fields: string containing list of fields to be displayed by the
1109 lvs command
1110 :param sep: string containing separator to be used between two fields
1111 :param filters: dictionary containing LVM filters
1112 :param tags: dictionary containng LVM tags
1113 :returns: list of class Volume object representing LVs on the system
1114 """
1115 filters = make_filters_lvmcmd_ready(filters, tags)
1116 args = ['lvs'] + LV_CMD_OPTIONS + ['-S', filters, '-o', fields]
1117
1118 stdout, stderr, returncode = process.call(args, run_on_host=True, verbose_on_failure=False)
1119 lvs_report = _output_parser(stdout, fields)
1120 return [Volume(**lv_report) for lv_report in lvs_report]
1121
1122
1123 def get_single_lv(fields=LV_FIELDS, filters=None, tags=None):
1124 """
1125 Wrapper of get_lvs() meant to be a convenience method to avoid the phrase::
1126 lvs = get_lvs()
1127 if len(lvs) >= 1:
1128 lv = lvs[0]
1129 """
1130 lvs = get_lvs(fields=fields, filters=filters, tags=tags)
1131
1132 if len(lvs) == 0:
1133 return None
1134 if len(lvs) > 1:
1135 raise RuntimeError('Filters {} matched more than 1 LV present on this host.'.format(str(filters)))
1136
1137 return lvs[0]
1138
1139
1140 def get_lv_by_name(name):
1141 stdout, stderr, returncode = process.call(
1142 ['lvs', '--noheadings', '-o', LV_FIELDS, '-S',
1143 'lv_name={}'.format(name)],
1144 run_on_host=True,
1145 verbose_on_failure=False
1146 )
1147 lvs = _output_parser(stdout, LV_FIELDS)
1148 return [Volume(**lv) for lv in lvs]
1149
1150
1151 def get_lvs_by_tag(lv_tag):
1152 stdout, stderr, returncode = process.call(
1153 ['lvs', '--noheadings', '--separator=";"', '-a', '-o', LV_FIELDS, '-S',
1154 'lv_tags={{{}}}'.format(lv_tag)],
1155 run_on_host=True,
1156 verbose_on_failure=False
1157 )
1158 lvs = _output_parser(stdout, LV_FIELDS)
1159 return [Volume(**lv) for lv in lvs]
1160
1161
1162 def get_device_lvs(device, name_prefix=''):
1163 stdout, stderr, returncode = process.call(
1164 ['pvs'] + LV_CMD_OPTIONS + ['-o', LV_FIELDS, device],
1165 run_on_host=True,
1166 verbose_on_failure=False
1167 )
1168 lvs = _output_parser(stdout, LV_FIELDS)
1169 return [Volume(**lv) for lv in lvs if lv['lv_name'] and
1170 lv['lv_name'].startswith(name_prefix)]
1171
1172 def get_lv_by_fullname(full_name):
1173 """
1174 returns LV by the specified LV's full name (formatted as vg_name/lv_name)
1175 """
1176 try:
1177 vg_name, lv_name = full_name.split('/')
1178 res_lv = get_single_lv(filters={'lv_name': lv_name,
1179 'vg_name': vg_name})
1180 except ValueError:
1181 res_lv = None
1182 return res_lv