]> git.proxmox.com Git - ceph.git/blame - ceph/src/ceph-volume/ceph_volume/api/lvm.py
Add patch for failing prerm scripts
[ceph.git] / ceph / src / ceph-volume / ceph_volume / api / lvm.py
CommitLineData
d2e6a577
FG
1"""
2API for CRUD lvm tag operations. Follows the Ceph LVM tag naming convention
3that prefixes tags with ``ceph.`` and uses ``=`` for assignment, and provides
4set of utilities for interacting with LVM.
5"""
94b18763
FG
6import logging
7import os
1adf2230
AA
8import uuid
9from math import floor
10from ceph_volume import process, util
11from ceph_volume.exceptions import (
12 MultipleLVsError, MultipleVGsError,
13 MultiplePVsError, SizeAllocationError
14)
d2e6a577 15
94b18763
FG
16logger = logging.getLogger(__name__)
17
d2e6a577 18
b5b8bbf5
FG
19def _output_parser(output, fields):
20 """
21 Newer versions of LVM allow ``--reportformat=json``, but older versions,
22 like the one included in Xenial do not. LVM has the ability to filter and
23 format its output so we assume the output will be in a format this parser
24 can handle (using ',' as a delimiter)
25
26 :param fields: A string, possibly using ',' to group many items, as it
27 would be used on the CLI
28 :param output: The CLI output from the LVM call
29 """
30 field_items = fields.split(',')
31 report = []
32 for line in output:
33 # clear the leading/trailing whitespace
34 line = line.strip()
35
36 # remove the extra '"' in each field
37 line = line.replace('"', '')
38
39 # prevent moving forward with empty contents
40 if not line:
41 continue
42
11fdf7f2 43 # splitting on ';' because that is what the lvm call uses as
b5b8bbf5
FG
44 # '--separator'
45 output_items = [i.strip() for i in line.split(';')]
46 # map the output to the fiels
47 report.append(
48 dict(zip(field_items, output_items))
49 )
50
51 return report
52
53
1adf2230
AA
54def _splitname_parser(line):
55 """
56 Parses the output from ``dmsetup splitname``, that should contain prefixes
57 (--nameprefixes) and set the separator to ";"
58
59 Output for /dev/mapper/vg-lv will usually look like::
60
61 DM_VG_NAME='/dev/mapper/vg';DM_LV_NAME='lv';DM_LV_LAYER=''
62
63
64 The ``VG_NAME`` will usually not be what other callers need (e.g. just 'vg'
65 in the example), so this utility will split ``/dev/mapper/`` out, so that
66 the actual volume group name is kept
67
68 :returns: dictionary with stripped prefixes
69 """
1adf2230 70 parsed = {}
81eedcae
TL
71 try:
72 parts = line[0].split(';')
73 except IndexError:
74 logger.exception('Unable to parse mapper device: %s', line)
75 return parsed
76
1adf2230
AA
77 for part in parts:
78 part = part.replace("'", '')
79 key, value = part.split('=')
80 if 'DM_VG_NAME' in key:
81 value = value.split('/dev/mapper/')[-1]
82 key = key.split('DM_')[-1]
83 parsed[key] = value
84
85 return parsed
86
87
88def sizing(device_size, parts=None, size=None):
89 """
90 Calculate proper sizing to fully utilize the volume group in the most
91 efficient way possible. To prevent situations where LVM might accept
92 a percentage that is beyond the vg's capabilities, it will refuse with
93 an error when requesting a larger-than-possible parameter, in addition
94 to rounding down calculations.
95
96 A dictionary with different sizing parameters is returned, to make it
97 easier for others to choose what they need in order to create logical
98 volumes::
99
100 >>> sizing(100, parts=2)
101 >>> {'parts': 2, 'percentages': 50, 'sizes': 50}
102
103 """
104 if parts is not None and size is not None:
105 raise ValueError(
106 "Cannot process sizing with both parts (%s) and size (%s)" % (parts, size)
107 )
108
109 if size and size > device_size:
110 raise SizeAllocationError(size, device_size)
111
112 def get_percentage(parts):
113 return int(floor(100 / float(parts)))
114
115 if parts is not None:
116 # Prevent parts being 0, falling back to 1 (100% usage)
117 parts = parts or 1
118 percentages = get_percentage(parts)
119
120 if size:
121 parts = int(device_size / size) or 1
122 percentages = get_percentage(parts)
123
124 sizes = device_size / parts if parts else int(floor(device_size))
125
126 return {
127 'parts': parts,
128 'percentages': percentages,
129 'sizes': int(sizes),
130 }
131
132
d2e6a577
FG
133def parse_tags(lv_tags):
134 """
135 Return a dictionary mapping of all the tags associated with
136 a Volume from the comma-separated tags coming from the LVM API
137
138 Input look like::
139
140 "ceph.osd_fsid=aaa-fff-bbbb,ceph.osd_id=0"
141
142 For the above example, the expected return value would be::
143
144 {
145 "ceph.osd_fsid": "aaa-fff-bbbb",
146 "ceph.osd_id": "0"
147 }
148 """
149 if not lv_tags:
150 return {}
151 tag_mapping = {}
152 tags = lv_tags.split(',')
153 for tag_assignment in tags:
b32b8144
FG
154 if not tag_assignment.startswith('ceph.'):
155 continue
d2e6a577
FG
156 key, value = tag_assignment.split('=', 1)
157 tag_mapping[key] = value
158
159 return tag_mapping
160
161
94b18763
FG
162def _vdo_parents(devices):
163 """
164 It is possible we didn't get a logical volume, or a mapper path, but
165 a device like /dev/sda2, to resolve this, we must look at all the slaves of
166 every single device in /sys/block and if any of those devices is related to
167 VDO devices, then we can add the parent
168 """
169 parent_devices = []
170 for parent in os.listdir('/sys/block'):
171 for slave in os.listdir('/sys/block/%s/slaves' % parent):
172 if slave in devices:
173 parent_devices.append('/dev/%s' % parent)
174 parent_devices.append(parent)
175 return parent_devices
176
177
178def _vdo_slaves(vdo_names):
179 """
180 find all the slaves associated with each vdo name (from realpath) by going
181 into /sys/block/<realpath>/slaves
182 """
183 devices = []
184 for vdo_name in vdo_names:
185 mapper_path = '/dev/mapper/%s' % vdo_name
186 if not os.path.exists(mapper_path):
187 continue
188 # resolve the realpath and realname of the vdo mapper
189 vdo_realpath = os.path.realpath(mapper_path)
190 vdo_realname = vdo_realpath.split('/')[-1]
191 slaves_path = '/sys/block/%s/slaves' % vdo_realname
192 if not os.path.exists(slaves_path):
193 continue
194 devices.append(vdo_realpath)
195 devices.append(mapper_path)
196 devices.append(vdo_realname)
197 for slave in os.listdir(slaves_path):
198 devices.append('/dev/%s' % slave)
199 devices.append(slave)
200 return devices
201
202
203def _is_vdo(path):
204 """
205 A VDO device can be composed from many different devices, go through each
206 one of those devices and its slaves (if any) and correlate them back to
207 /dev/mapper and their realpaths, and then check if they appear as part of
208 /sys/kvdo/<name>/statistics
209
210 From the realpath of a logical volume, determine if it is a VDO device or
211 not, by correlating it to the presence of the name in
212 /sys/kvdo/<name>/statistics and all the previously captured devices
213 """
214 if not os.path.isdir('/sys/kvdo'):
215 return False
216 realpath = os.path.realpath(path)
217 realpath_name = realpath.split('/')[-1]
218 devices = []
219 vdo_names = set()
220 # get all the vdo names
221 for dirname in os.listdir('/sys/kvdo/'):
222 if os.path.isdir('/sys/kvdo/%s/statistics' % dirname):
223 vdo_names.add(dirname)
224
225 # find all the slaves associated with each vdo name (from realpath) by
226 # going into /sys/block/<realpath>/slaves
227 devices.extend(_vdo_slaves(vdo_names))
228
229 # Find all possible parents, looking into slaves that are related to VDO
230 devices.extend(_vdo_parents(devices))
231
232 return any([
233 path in devices,
234 realpath in devices,
235 realpath_name in devices])
236
237
238def is_vdo(path):
239 """
240 Detect if a path is backed by VDO, proxying the actual call to _is_vdo so
241 that we can prevent an exception breaking OSD creation. If an exception is
242 raised, it will get captured and logged to file, while returning
243 a ``False``.
244 """
245 try:
246 if _is_vdo(path):
247 return '1'
248 return '0'
249 except Exception:
250 logger.exception('Unable to properly detect device as VDO: %s', path)
251 return '0'
252
253
1adf2230
AA
254def dmsetup_splitname(dev):
255 """
256 Run ``dmsetup splitname`` and parse the results.
257
258 .. warning:: This call does not ensure that the device is correct or that
259 it exists. ``dmsetup`` will happily take a non existing path and still
260 return a 0 exit status.
261 """
262 command = [
263 'dmsetup', 'splitname', '--noheadings',
264 "--separator=';'", '--nameprefixes', dev
265 ]
266 out, err, rc = process.call(command)
267 return _splitname_parser(out)
268
269
eafe8130
TL
270####################################
271#
272# Code for LVM Physical Volumes
273#
274################################
d2e6a577
FG
275
276
181888fb
FG
277def get_api_pvs():
278 """
279 Return the list of physical volumes configured for lvm and available in the
280 system using flags to include common metadata associated with them like the uuid
281
b32b8144
FG
282 This will only return physical volumes set up to work with LVM.
283
94b18763 284 Command and delimited output should look like::
181888fb 285
94b18763 286 $ pvs --noheadings --readonly --separator=';' -o pv_name,pv_tags,pv_uuid
181888fb
FG
287 /dev/sda1;;
288 /dev/sdv;;07A4F654-4162-4600-8EB3-88D1E42F368D
289
290 """
28e407b8 291 fields = 'pv_name,pv_tags,pv_uuid,vg_name,lv_uuid'
181888fb 292
181888fb 293 stdout, stderr, returncode = process.call(
91327a77
AA
294 ['pvs', '--no-heading', '--readonly', '--separator=";"', '-o', fields],
295 verbose_on_failure=False
181888fb
FG
296 )
297
298 return _output_parser(stdout, fields)
299
300
eafe8130 301class PVolume(object):
3efd9988 302 """
eafe8130
TL
303 Represents a Physical Volume from LVM, with some top-level attributes like
304 ``pv_name`` and parsed tags as a dictionary of key/value pairs.
3efd9988 305 """
3efd9988 306
eafe8130
TL
307 def __init__(self, **kw):
308 for k, v in kw.items():
309 setattr(self, k, v)
310 self.pv_api = kw
311 self.name = kw['pv_name']
312 self.tags = parse_tags(kw['pv_tags'])
3efd9988 313
eafe8130
TL
314 def __str__(self):
315 return '<%s>' % self.pv_api['pv_name']
d2e6a577 316
eafe8130
TL
317 def __repr__(self):
318 return self.__str__()
319
320 def set_tags(self, tags):
321 """
322 :param tags: A dictionary of tag names and values, like::
323
324 {
325 "ceph.osd_fsid": "aaa-fff-bbbb",
326 "ceph.osd_id": "0"
327 }
328
329 At the end of all modifications, the tags are refreshed to reflect
330 LVM's most current view.
331 """
332 for k, v in tags.items():
333 self.set_tag(k, v)
334 # after setting all the tags, refresh them for the current object, use the
335 # pv_* identifiers to filter because those shouldn't change
336 pv_object = get_pv(pv_name=self.pv_name, pv_uuid=self.pv_uuid)
337 self.tags = pv_object.tags
338
339 def set_tag(self, key, value):
340 """
341 Set the key/value pair as an LVM tag. Does not "refresh" the values of
342 the current object for its tags. Meant to be a "fire and forget" type
343 of modification.
344
345 **warning**: Altering tags on a PV has to be done ensuring that the
346 device is actually the one intended. ``pv_name`` is *not* a persistent
347 value, only ``pv_uuid`` is. Using ``pv_uuid`` is the best way to make
348 sure the device getting changed is the one needed.
349 """
350 # remove it first if it exists
351 if self.tags.get(key):
352 current_value = self.tags[key]
353 tag = "%s=%s" % (key, current_value)
354 process.call(['pvchange', '--deltag', tag, self.pv_name])
355
356 process.call(
357 [
358 'pvchange',
359 '--addtag', '%s=%s' % (key, value), self.pv_name
360 ]
361 )
181888fb
FG
362
363
eafe8130 364class PVolumes(list):
181888fb 365 """
eafe8130
TL
366 A list of all known (physical) volumes for the current system, with the ability
367 to filter them via keyword arguments.
181888fb 368 """
eafe8130
TL
369
370 def __init__(self, populate=True):
371 if populate:
372 self._populate()
373
374 def _populate(self):
375 # get all the pvs in the current system
376 for pv_item in get_api_pvs():
377 self.append(PVolume(**pv_item))
378
379 def _purge(self):
380 """
381 Deplete all the items in the list, used internally only so that we can
382 dynamically allocate the items when filtering without the concern of
383 messing up the contents
384 """
385 self[:] = []
386
387 def _filter(self, pv_name=None, pv_uuid=None, pv_tags=None):
388 """
389 The actual method that filters using a new list. Useful so that other
390 methods that do not want to alter the contents of the list (e.g.
391 ``self.find``) can operate safely.
392 """
393 filtered = [i for i in self]
394 if pv_name:
395 filtered = [i for i in filtered if i.pv_name == pv_name]
396
397 if pv_uuid:
398 filtered = [i for i in filtered if i.pv_uuid == pv_uuid]
399
400 # at this point, `filtered` has either all the physical volumes in self
401 # or is an actual filtered list if any filters were applied
402 if pv_tags:
403 tag_filtered = []
404 for pvolume in filtered:
405 matches = all(pvolume.tags.get(k) == str(v) for k, v in pv_tags.items())
406 if matches:
407 tag_filtered.append(pvolume)
408 # return the tag_filtered pvolumes here, the `filtered` list is no
409 # longer usable
410 return tag_filtered
411
412 return filtered
413
414 def filter(self, pv_name=None, pv_uuid=None, pv_tags=None):
415 """
416 Filter out volumes on top level attributes like ``pv_name`` or by
417 ``pv_tags`` where a dict is required. For example, to find a physical
418 volume that has an OSD ID of 0, the filter would look like::
419
420 pv_tags={'ceph.osd_id': '0'}
421
422 """
423 if not any([pv_name, pv_uuid, pv_tags]):
424 raise TypeError('.filter() requires pv_name, pv_uuid, or pv_tags'
425 '(none given)')
426
427 filtered_pvs = PVolumes(populate=False)
428 filtered_pvs.extend(self._filter(pv_name, pv_uuid, pv_tags))
429 return filtered_pvs
430
431 def get(self, pv_name=None, pv_uuid=None, pv_tags=None):
432 """
433 This is a bit expensive, since it will try to filter out all the
434 matching items in the list, filter them out applying anything that was
435 added and return the matching item.
436
437 This method does *not* alter the list, and it will raise an error if
438 multiple pvs are matched
439
440 It is useful to use ``tags`` when trying to find a specific logical volume,
441 but it can also lead to multiple pvs being found, since a lot of metadata
442 is shared between pvs of a distinct OSD.
443 """
444 if not any([pv_name, pv_uuid, pv_tags]):
445 return None
446 pvs = self._filter(
447 pv_name=pv_name,
448 pv_uuid=pv_uuid,
449 pv_tags=pv_tags
450 )
451 if not pvs:
452 return None
453 if len(pvs) > 1 and pv_tags:
454 raise MultiplePVsError(pv_name)
455 return pvs[0]
181888fb
FG
456
457
458def create_pv(device):
459 """
460 Create a physical volume from a device, useful when devices need to be later mapped
461 to journals.
462 """
463 process.run([
181888fb
FG
464 'pvcreate',
465 '-v', # verbose
466 '-f', # force it
467 '--yes', # answer yes to any prompts
468 device
469 ])
d2e6a577
FG
470
471
eafe8130 472def remove_pv(pv_name):
3efd9988 473 """
eafe8130
TL
474 Removes a physical volume using a double `-f` to prevent prompts and fully
475 remove anything related to LVM. This is tremendously destructive, but so is all other actions
476 when zapping a device.
3efd9988 477
eafe8130
TL
478 In the case where multiple PVs are found, it will ignore that fact and
479 continue with the removal, specifically in the case of messages like::
3efd9988 480
eafe8130 481 WARNING: PV $UUID /dev/DEV-1 was already found on /dev/DEV-2
1adf2230 482
eafe8130
TL
483 These situations can be avoided with custom filtering rules, which this API
484 cannot handle while accommodating custom user filters.
3efd9988 485 """
eafe8130
TL
486 fail_msg = "Unable to remove vg %s" % pv_name
487 process.run(
488 [
489 'pvremove',
490 '-v', # verbose
491 '-f', # force it
492 '-f', # force it
493 pv_name
494 ],
495 fail_msg=fail_msg,
496 )
3efd9988
FG
497
498
eafe8130 499def get_pv(pv_name=None, pv_uuid=None, pv_tags=None, pvs=None):
1adf2230 500 """
eafe8130
TL
501 Return a matching pv (physical volume) for the current system, requiring
502 ``pv_name``, ``pv_uuid``, or ``pv_tags``. Raises an error if more than one
503 pv is found.
504 """
505 if not any([pv_name, pv_uuid, pv_tags]):
506 return None
507 if pvs is None or len(pvs) == 0:
508 pvs = PVolumes()
1adf2230 509
eafe8130 510 return pvs.get(pv_name=pv_name, pv_uuid=pv_uuid, pv_tags=pv_tags)
1adf2230 511
1adf2230 512
eafe8130
TL
513################################
514#
515# Code for LVM Volume Groups
516#
517#############################
1adf2230
AA
518
519
eafe8130 520def get_api_vgs():
81eedcae 521 """
eafe8130
TL
522 Return the list of group volumes available in the system using flags to
523 include common metadata associated with them
81eedcae 524
eafe8130 525 Command and sample delimited output should look like::
81eedcae 526
eafe8130
TL
527 $ vgs --noheadings --units=g --readonly --separator=';' \
528 -o vg_name,pv_count,lv_count,snap_count,vg_attr,vg_size,vg_free
529 ubuntubox-vg;1;2;0;wz--n-;299.52g;12.00m
530 osd_vg;3;1;0;wz--n-;29.21g;9.21g
531
532 To normalize sizing, the units are forced in 'g' which is equivalent to
533 gigabytes, which uses multiples of 1024 (as opposed to 1000)
81eedcae 534 """
eafe8130
TL
535 fields = 'vg_name,pv_count,lv_count,snap_count,vg_attr,vg_size,vg_free,vg_free_count'
536 stdout, stderr, returncode = process.call(
537 ['vgs', '--noheadings', '--readonly', '--units=g', '--separator=";"', '-o', fields],
538 verbose_on_failure=False
81eedcae 539 )
eafe8130 540 return _output_parser(stdout, fields)
81eedcae
TL
541
542
eafe8130 543class VolumeGroup(object):
b32b8144 544 """
eafe8130 545 Represents an LVM group, with some top-level attributes like ``vg_name``
b32b8144 546 """
b32b8144 547
eafe8130
TL
548 def __init__(self, **kw):
549 for k, v in kw.items():
550 setattr(self, k, v)
551 self.name = kw['vg_name']
552 self.tags = parse_tags(kw.get('vg_tags', ''))
b32b8144 553
eafe8130
TL
554 def __str__(self):
555 return '<%s>' % self.name
91327a77 556
eafe8130
TL
557 def __repr__(self):
558 return self.__str__()
91327a77 559
eafe8130
TL
560 def _parse_size(self, size):
561 error_msg = "Unable to convert vg size to integer: '%s'" % str(size)
562 try:
563 integer, _ = size.split('g')
564 except ValueError:
565 logger.exception(error_msg)
566 raise RuntimeError(error_msg)
91327a77 567
eafe8130 568 return util.str_to_int(integer)
b32b8144 569
eafe8130
TL
570 @property
571 def free(self):
572 """
573 Parse the available size in gigabytes from the ``vg_free`` attribute, that
574 will be a string with a character ('g') to indicate gigabytes in size.
575 Returns a rounded down integer to ease internal operations::
b32b8144 576
eafe8130
TL
577 >>> data_vg.vg_free
578 '0.01g'
579 >>> data_vg.size
580 0
581 """
582 return self._parse_size(self.vg_free)
3efd9988 583
eafe8130
TL
584 @property
585 def size(self):
586 """
587 Parse the size in gigabytes from the ``vg_size`` attribute, that
588 will be a string with a character ('g') to indicate gigabytes in size.
589 Returns a rounded down integer to ease internal operations::
91327a77 590
eafe8130
TL
591 >>> data_vg.vg_size
592 '1024.9g'
593 >>> data_vg.size
594 1024
595 """
596 return self._parse_size(self.vg_size)
91327a77 597
eafe8130
TL
598 def sizing(self, parts=None, size=None):
599 """
600 Calculate proper sizing to fully utilize the volume group in the most
601 efficient way possible. To prevent situations where LVM might accept
602 a percentage that is beyond the vg's capabilities, it will refuse with
603 an error when requesting a larger-than-possible parameter, in addition
604 to rounding down calculations.
3efd9988 605
eafe8130
TL
606 A dictionary with different sizing parameters is returned, to make it
607 easier for others to choose what they need in order to create logical
608 volumes::
3efd9988 609
eafe8130
TL
610 >>> data_vg.free
611 1024
612 >>> data_vg.sizing(parts=4)
613 {'parts': 4, 'sizes': 256, 'percentages': 25}
614 >>> data_vg.sizing(size=512)
615 {'parts': 2, 'sizes': 512, 'percentages': 50}
d2e6a577 616
d2e6a577 617
eafe8130
TL
618 :param parts: Number of parts to create LVs from
619 :param size: Size in gigabytes to divide the VG into
d2e6a577 620
eafe8130
TL
621 :raises SizeAllocationError: When requested size cannot be allocated with
622 :raises ValueError: If both ``parts`` and ``size`` are given
623 """
624 if parts is not None and size is not None:
625 raise ValueError(
626 "Cannot process sizing with both parts (%s) and size (%s)" % (parts, size)
627 )
1adf2230 628
eafe8130
TL
629 # if size is given we need to map that to extents so that we avoid
630 # issues when trying to get this right with a size in gigabytes find
631 # the percentage first, cheating, because these values are thrown out
632 vg_free_count = util.str_to_int(self.vg_free_count)
633
634 if size:
635 extents = int(size * vg_free_count / self.free)
636 disk_sizing = sizing(self.free, size=size, parts=parts)
637 else:
638 if parts is not None:
639 # Prevent parts being 0, falling back to 1 (100% usage)
640 parts = parts or 1
641 size = int(self.free / parts)
642 extents = size * vg_free_count / self.free
643 disk_sizing = sizing(self.free, parts=parts)
644
645 extent_sizing = sizing(vg_free_count, size=extents)
646
647 disk_sizing['extents'] = int(extents)
648 disk_sizing['percentages'] = extent_sizing['percentages']
649 return disk_sizing
650
651
652class VolumeGroups(list):
653 """
654 A list of all known volume groups for the current system, with the ability
655 to filter them via keyword arguments.
d2e6a577 656 """
1adf2230 657
eafe8130
TL
658 def __init__(self, populate=True):
659 if populate:
660 self._populate()
d2e6a577
FG
661
662 def _populate(self):
663 # get all the vgs in the current system
664 for vg_item in get_api_vgs():
665 self.append(VolumeGroup(**vg_item))
666
667 def _purge(self):
668 """
669 Deplete all the items in the list, used internally only so that we can
670 dynamically allocate the items when filtering without the concern of
671 messing up the contents
672 """
673 self[:] = []
674
675 def _filter(self, vg_name=None, vg_tags=None):
676 """
677 The actual method that filters using a new list. Useful so that other
678 methods that do not want to alter the contents of the list (e.g.
679 ``self.find``) can operate safely.
680
681 .. note:: ``vg_tags`` is not yet implemented
682 """
683 filtered = [i for i in self]
684 if vg_name:
685 filtered = [i for i in filtered if i.vg_name == vg_name]
686
687 # at this point, `filtered` has either all the volumes in self or is an
688 # actual filtered list if any filters were applied
689 if vg_tags:
690 tag_filtered = []
181888fb
FG
691 for volume in filtered:
692 matches = all(volume.tags.get(k) == str(v) for k, v in vg_tags.items())
693 if matches:
694 tag_filtered.append(volume)
d2e6a577
FG
695 return tag_filtered
696
697 return filtered
698
699 def filter(self, vg_name=None, vg_tags=None):
700 """
701 Filter out groups on top level attributes like ``vg_name`` or by
702 ``vg_tags`` where a dict is required. For example, to find a Ceph group
703 with dmcache as the type, the filter would look like::
704
705 vg_tags={'ceph.type': 'dmcache'}
706
707 .. warning:: These tags are not documented because they are currently
708 unused, but are here to maintain API consistency
709 """
710 if not any([vg_name, vg_tags]):
711 raise TypeError('.filter() requires vg_name or vg_tags (none given)')
eafe8130
TL
712
713 filtered_vgs = VolumeGroups(populate=False)
714 filtered_vgs.extend(self._filter(vg_name, vg_tags))
715 return filtered_vgs
d2e6a577
FG
716
717 def get(self, vg_name=None, vg_tags=None):
718 """
719 This is a bit expensive, since it will try to filter out all the
720 matching items in the list, filter them out applying anything that was
721 added and return the matching item.
722
723 This method does *not* alter the list, and it will raise an error if
724 multiple VGs are matched
725
726 It is useful to use ``tags`` when trying to find a specific volume group,
727 but it can also lead to multiple vgs being found (although unlikely)
728 """
729 if not any([vg_name, vg_tags]):
730 return None
731 vgs = self._filter(
732 vg_name=vg_name,
733 vg_tags=vg_tags
734 )
735 if not vgs:
736 return None
737 if len(vgs) > 1:
738 # this is probably never going to happen, but it is here to keep
739 # the API code consistent
740 raise MultipleVGsError(vg_name)
741 return vgs[0]
742
743
eafe8130 744def create_vg(devices, name=None, name_prefix=None):
d2e6a577 745 """
eafe8130 746 Create a Volume Group. Command looks like::
d2e6a577 747
eafe8130 748 vgcreate --force --yes group_name device
d2e6a577 749
eafe8130 750 Once created the volume group is returned as a ``VolumeGroup`` object
d2e6a577 751
eafe8130
TL
752 :param devices: A list of devices to create a VG. Optionally, a single
753 device (as a string) can be used.
754 :param name: Optionally set the name of the VG, defaults to 'ceph-{uuid}'
755 :param name_prefix: Optionally prefix the name of the VG, which will get combined
756 with a UUID string
757 """
758 if isinstance(devices, set):
759 devices = list(devices)
760 if not isinstance(devices, list):
761 devices = [devices]
762 if name_prefix:
763 name = "%s-%s" % (name_prefix, str(uuid.uuid4()))
764 elif name is None:
765 name = "ceph-%s" % str(uuid.uuid4())
766 process.run([
767 'vgcreate',
768 '-s',
769 '1G',
770 '--force',
771 '--yes',
772 name] + devices
773 )
d2e6a577 774
eafe8130
TL
775 vg = get_vg(vg_name=name)
776 return vg
d2e6a577 777
d2e6a577 778
eafe8130
TL
779def extend_vg(vg, devices):
780 """
781 Extend a Volume Group. Command looks like::
181888fb 782
eafe8130 783 vgextend --force --yes group_name [device, ...]
d2e6a577 784
eafe8130 785 Once created the volume group is extended and returned as a ``VolumeGroup`` object
d2e6a577 786
eafe8130
TL
787 :param vg: A VolumeGroup object
788 :param devices: A list of devices to extend the VG. Optionally, a single
789 device (as a string) can be used.
790 """
791 if not isinstance(devices, list):
792 devices = [devices]
793 process.run([
794 'vgextend',
795 '--force',
796 '--yes',
797 vg.name] + devices
798 )
d2e6a577 799
eafe8130
TL
800 vg = get_vg(vg_name=vg.name)
801 return vg
d2e6a577 802
d2e6a577 803
eafe8130
TL
804def reduce_vg(vg, devices):
805 """
806 Reduce a Volume Group. Command looks like::
d2e6a577 807
eafe8130 808 vgreduce --force --yes group_name [device, ...]
d2e6a577 809
eafe8130
TL
810 :param vg: A VolumeGroup object
811 :param devices: A list of devices to remove from the VG. Optionally, a
812 single device (as a string) can be used.
813 """
814 if not isinstance(devices, list):
815 devices = [devices]
816 process.run([
817 'vgreduce',
818 '--force',
819 '--yes',
820 vg.name] + devices
821 )
d2e6a577 822
eafe8130
TL
823 vg = get_vg(vg_name=vg.name)
824 return vg
d2e6a577
FG
825
826
eafe8130 827def remove_vg(vg_name):
181888fb 828 """
eafe8130 829 Removes a volume group.
181888fb 830 """
eafe8130
TL
831 if not vg_name:
832 logger.warning('Skipping removal of invalid VG name: "%s"', vg_name)
833 return
834 fail_msg = "Unable to remove vg %s" % vg_name
835 process.run(
836 [
837 'vgremove',
838 '-v', # verbose
839 '-f', # force it
840 vg_name
841 ],
842 fail_msg=fail_msg,
843 )
181888fb
FG
844
845
eafe8130 846def get_vg(vg_name=None, vg_tags=None, vgs=None):
d2e6a577 847 """
eafe8130
TL
848 Return a matching vg for the current system, requires ``vg_name`` or
849 ``tags``. Raises an error if more than one vg is found.
1adf2230 850
eafe8130
TL
851 It is useful to use ``tags`` when trying to find a specific volume group,
852 but it can also lead to multiple vgs being found.
853 """
854 if not any([vg_name, vg_tags]):
855 return None
856 if vgs is None or len(vgs) == 0:
857 vgs = VolumeGroups()
1adf2230 858
eafe8130 859 return vgs.get(vg_name=vg_name, vg_tags=vg_tags)
1adf2230
AA
860
861
eafe8130
TL
862#################################
863#
864# Code for LVM Logical Volumes
865#
866###############################
1adf2230 867
1adf2230 868
eafe8130
TL
869def get_api_lvs():
870 """
871 Return the list of logical volumes available in the system using flags to include common
872 metadata associated with them
1adf2230 873
eafe8130 874 Command and delimited output should look like::
1adf2230 875
eafe8130
TL
876 $ lvs --noheadings --readonly --separator=';' -a -o lv_tags,lv_path,lv_name,vg_name
877 ;/dev/ubuntubox-vg/root;root;ubuntubox-vg
878 ;/dev/ubuntubox-vg/swap_1;swap_1;ubuntubox-vg
1adf2230 879
eafe8130
TL
880 """
881 fields = 'lv_tags,lv_path,lv_name,vg_name,lv_uuid,lv_size'
882 stdout, stderr, returncode = process.call(
883 ['lvs', '--noheadings', '--readonly', '--separator=";"', '-a', '-o', fields],
884 verbose_on_failure=False
885 )
886 return _output_parser(stdout, fields)
1adf2230 887
d2e6a577
FG
888
889class Volume(object):
890 """
891 Represents a Logical Volume from LVM, with some top-level attributes like
892 ``lv_name`` and parsed tags as a dictionary of key/value pairs.
893 """
894
895 def __init__(self, **kw):
896 for k, v in kw.items():
897 setattr(self, k, v)
898 self.lv_api = kw
899 self.name = kw['lv_name']
900 self.tags = parse_tags(kw['lv_tags'])
3a9019d9 901 self.encrypted = self.tags.get('ceph.encrypted', '0') == '1'
91327a77 902 self.used_by_ceph = 'ceph.osd_id' in self.tags
d2e6a577
FG
903
904 def __str__(self):
905 return '<%s>' % self.lv_api['lv_path']
906
907 def __repr__(self):
908 return self.__str__()
909
3efd9988
FG
910 def as_dict(self):
911 obj = {}
912 obj.update(self.lv_api)
913 obj['tags'] = self.tags
914 obj['name'] = self.name
915 obj['type'] = self.tags['ceph.type']
916 obj['path'] = self.lv_path
917 return obj
918
91327a77
AA
919 def report(self):
920 if not self.used_by_ceph:
921 return {
922 'name': self.lv_name,
923 'comment': 'not used by ceph'
924 }
925 else:
926 type_ = self.tags['ceph.type']
927 report = {
928 'name': self.lv_name,
929 'osd_id': self.tags['ceph.osd_id'],
930 'cluster_name': self.tags['ceph.cluster_name'],
931 'type': type_,
932 'osd_fsid': self.tags['ceph.osd_fsid'],
933 'cluster_fsid': self.tags['ceph.cluster_fsid'],
934 }
935 type_uuid = '{}_uuid'.format(type_)
936 report[type_uuid] = self.tags['ceph.{}'.format(type_uuid)]
937 return report
938
3efd9988
FG
939 def clear_tags(self):
940 """
941 Removes all tags from the Logical Volume.
942 """
81eedcae
TL
943 for k in list(self.tags):
944 self.clear_tag(k)
945
3efd9988 946
d2e6a577
FG
947 def set_tags(self, tags):
948 """
949 :param tags: A dictionary of tag names and values, like::
950
951 {
952 "ceph.osd_fsid": "aaa-fff-bbbb",
953 "ceph.osd_id": "0"
954 }
955
956 At the end of all modifications, the tags are refreshed to reflect
957 LVM's most current view.
958 """
959 for k, v in tags.items():
960 self.set_tag(k, v)
81eedcae
TL
961
962
963 def clear_tag(self, key):
964 if self.tags.get(key):
965 current_value = self.tags[key]
966 tag = "%s=%s" % (key, current_value)
967 process.call(['lvchange', '--deltag', tag, self.lv_path])
968 del self.tags[key]
969
d2e6a577
FG
970
971 def set_tag(self, key, value):
972 """
81eedcae 973 Set the key/value pair as an LVM tag.
d2e6a577
FG
974 """
975 # remove it first if it exists
81eedcae 976 self.clear_tag(key)
d2e6a577
FG
977
978 process.call(
979 [
b32b8144 980 'lvchange',
d2e6a577
FG
981 '--addtag', '%s=%s' % (key, value), self.lv_path
982 ]
983 )
81eedcae 984 self.tags[key] = value
181888fb
FG
985
986
eafe8130 987class Volumes(list):
181888fb 988 """
eafe8130
TL
989 A list of all known (logical) volumes for the current system, with the ability
990 to filter them via keyword arguments.
181888fb
FG
991 """
992
eafe8130
TL
993 def __init__(self):
994 self._populate()
181888fb 995
eafe8130
TL
996 def _populate(self):
997 # get all the lvs in the current system
998 for lv_item in get_api_lvs():
999 self.append(Volume(**lv_item))
181888fb 1000
eafe8130 1001 def _purge(self):
181888fb 1002 """
eafe8130
TL
1003 Delete all the items in the list, used internally only so that we can
1004 dynamically allocate the items when filtering without the concern of
1005 messing up the contents
1006 """
1007 self[:] = []
181888fb 1008
eafe8130
TL
1009 def _filter(self, lv_name=None, vg_name=None, lv_path=None, lv_uuid=None, lv_tags=None):
1010 """
1011 The actual method that filters using a new list. Useful so that other
1012 methods that do not want to alter the contents of the list (e.g.
1013 ``self.find``) can operate safely.
1014 """
1015 filtered = [i for i in self]
1016 if lv_name:
1017 filtered = [i for i in filtered if i.lv_name == lv_name]
181888fb 1018
eafe8130
TL
1019 if vg_name:
1020 filtered = [i for i in filtered if i.vg_name == vg_name]
1021
1022 if lv_uuid:
1023 filtered = [i for i in filtered if i.lv_uuid == lv_uuid]
1024
1025 if lv_path:
1026 filtered = [i for i in filtered if i.lv_path == lv_path]
1027
1028 # at this point, `filtered` has either all the volumes in self or is an
1029 # actual filtered list if any filters were applied
1030 if lv_tags:
1031 tag_filtered = []
1032 for volume in filtered:
1033 # all the tags we got need to match on the volume
1034 matches = all(volume.tags.get(k) == str(v) for k, v in lv_tags.items())
1035 if matches:
1036 tag_filtered.append(volume)
1037 return tag_filtered
1038
1039 return filtered
1040
1041 def filter(self, lv_name=None, vg_name=None, lv_path=None, lv_uuid=None, lv_tags=None):
181888fb 1042 """
eafe8130
TL
1043 Filter out volumes on top level attributes like ``lv_name`` or by
1044 ``lv_tags`` where a dict is required. For example, to find a volume
1045 that has an OSD ID of 0, the filter would look like::
1046
1047 lv_tags={'ceph.osd_id': '0'}
181888fb 1048
181888fb 1049 """
eafe8130
TL
1050 if not any([lv_name, vg_name, lv_path, lv_uuid, lv_tags]):
1051 raise TypeError('.filter() requires lv_name, vg_name, lv_path, lv_uuid, or tags (none given)')
1052 # first find the filtered volumes with the values in self
1053 filtered_volumes = self._filter(
1054 lv_name=lv_name,
1055 vg_name=vg_name,
1056 lv_path=lv_path,
1057 lv_uuid=lv_uuid,
1058 lv_tags=lv_tags
1059 )
1060 # then purge everything
1061 self._purge()
1062 # and add the filtered items
1063 self.extend(filtered_volumes)
181888fb 1064
eafe8130 1065 def get(self, lv_name=None, vg_name=None, lv_path=None, lv_uuid=None, lv_tags=None):
181888fb 1066 """
eafe8130
TL
1067 This is a bit expensive, since it will try to filter out all the
1068 matching items in the list, filter them out applying anything that was
1069 added and return the matching item.
181888fb 1070
eafe8130
TL
1071 This method does *not* alter the list, and it will raise an error if
1072 multiple LVs are matched
1073
1074 It is useful to use ``tags`` when trying to find a specific logical volume,
1075 but it can also lead to multiple lvs being found, since a lot of metadata
1076 is shared between lvs of a distinct OSD.
1077 """
1078 if not any([lv_name, vg_name, lv_path, lv_uuid, lv_tags]):
1079 return None
1080 lvs = self._filter(
1081 lv_name=lv_name,
1082 vg_name=vg_name,
1083 lv_path=lv_path,
1084 lv_uuid=lv_uuid,
1085 lv_tags=lv_tags
181888fb 1086 )
eafe8130
TL
1087 if not lvs:
1088 return None
1089 if len(lvs) > 1:
1090 raise MultipleLVsError(lv_name, lv_path)
1091 return lvs[0]
1092
1093
1094def create_lv(name, group, extents=None, size=None, tags=None, uuid_name=False, pv=None):
1095 """
1096 Create a Logical Volume in a Volume Group. Command looks like::
1097
1098 lvcreate -L 50G -n gfslv vg0
1099
1100 ``name``, ``group``, are required. If ``size`` is provided it must follow
1101 lvm's size notation (like 1G, or 20M). Tags are an optional dictionary and is expected to
1102 conform to the convention of prefixing them with "ceph." like::
1103
1104 {"ceph.block_device": "/dev/ceph/osd-1"}
1105
1106 :param uuid_name: Optionally combine the ``name`` with UUID to ensure uniqueness
1107 """
1108 if uuid_name:
1109 name = '%s-%s' % (name, uuid.uuid4())
1110 if tags is None:
1111 tags = {
1112 "ceph.osd_id": "null",
1113 "ceph.type": "null",
1114 "ceph.cluster_fsid": "null",
1115 "ceph.osd_fsid": "null",
1116 }
1117
1118 # XXX add CEPH_VOLUME_LVM_DEBUG to enable -vvvv on lv operations
1119 type_path_tag = {
1120 'journal': 'ceph.journal_device',
1121 'data': 'ceph.data_device',
1122 'block': 'ceph.block_device',
1123 'wal': 'ceph.wal_device',
1124 'db': 'ceph.db_device',
1125 'lockbox': 'ceph.lockbox_device', # XXX might not ever need this lockbox sorcery
1126 }
1127 if size:
1128 command = [
1129 'lvcreate',
1130 '--yes',
1131 '-L',
1132 '%s' % size,
1133 '-n', name, group
1134 ]
1135 elif extents:
1136 command = [
1137 'lvcreate',
1138 '--yes',
1139 '-l',
1140 '%s' % extents,
1141 '-n', name, group
1142 ]
1143 # create the lv with all the space available, this is needed because the
1144 # system call is different for LVM
1145 else:
1146 command = [
1147 'lvcreate',
1148 '--yes',
1149 '-l',
1150 '100%FREE',
1151 '-n', name, group
1152 ]
1153 if pv:
1154 command.append(pv)
1155 process.run(command)
1156
1157 lv = get_lv(lv_name=name, vg_name=group)
1158 lv.set_tags(tags)
1159
1160 # when creating a distinct type, the caller doesn't know what the path will
1161 # be so this function will set it after creation using the mapping
1162 path_tag = type_path_tag.get(tags.get('ceph.type'))
1163 if path_tag:
1164 lv.set_tags(
1165 {path_tag: lv.lv_path}
1166 )
1167 return lv
1168
1169
1170def remove_lv(lv):
1171 """
1172 Removes a logical volume given it's absolute path.
1173
1174 Will return True if the lv is successfully removed or
1175 raises a RuntimeError if the removal fails.
1176
1177 :param lv: A ``Volume`` object or the path for an LV
1178 """
1179 if isinstance(lv, Volume):
1180 path = lv.lv_path
1181 else:
1182 path = lv
1183
1184 stdout, stderr, returncode = process.call(
1185 [
1186 'lvremove',
1187 '-v', # verbose
1188 '-f', # force it
1189 path
1190 ],
1191 show_command=True,
1192 terminal_verbose=True,
1193 )
1194 if returncode != 0:
1195 raise RuntimeError("Unable to remove %s" % path)
1196 return True
1197
1198
1199def is_lv(dev, lvs=None):
1200 """
1201 Boolean to detect if a device is an LV or not.
1202 """
1203 splitname = dmsetup_splitname(dev)
1204 # Allowing to optionally pass `lvs` can help reduce repetitive checks for
1205 # multiple devices at once.
1206 if lvs is None or len(lvs) == 0:
1207 lvs = Volumes()
1208
1209 if splitname.get('LV_NAME'):
1210 lvs.filter(lv_name=splitname['LV_NAME'], vg_name=splitname['VG_NAME'])
1211 return len(lvs) > 0
1212 return False
1213
1214
1215def get_lv(lv_name=None, vg_name=None, lv_path=None, lv_uuid=None, lv_tags=None, lvs=None):
1216 """
1217 Return a matching lv for the current system, requiring ``lv_name``,
1218 ``vg_name``, ``lv_path`` or ``tags``. Raises an error if more than one lv
1219 is found.
1220
1221 It is useful to use ``tags`` when trying to find a specific logical volume,
1222 but it can also lead to multiple lvs being found, since a lot of metadata
1223 is shared between lvs of a distinct OSD.
1224 """
1225 if not any([lv_name, vg_name, lv_path, lv_uuid, lv_tags]):
1226 return None
1227 if lvs is None:
1228 lvs = Volumes()
1229 return lvs.get(
1230 lv_name=lv_name, vg_name=vg_name, lv_path=lv_path, lv_uuid=lv_uuid,
1231 lv_tags=lv_tags
1232 )
1233
1234
1235def get_lv_from_argument(argument):
1236 """
1237 Helper proxy function that consumes a possible logical volume passed in from the CLI
1238 in the form of `vg/lv`, but with some validation so that an argument that is a full
1239 path to a device can be ignored
1240 """
1241 if argument.startswith('/'):
1242 lv = get_lv(lv_path=argument)
1243 return lv
1244 try:
1245 vg_name, lv_name = argument.split('/')
1246 except (ValueError, AttributeError):
1247 return None
1248 return get_lv(lv_name=lv_name, vg_name=vg_name)
1249
1250
1251def create_lvs(volume_group, parts=None, size=None, name_prefix='ceph-lv'):
1252 """
1253 Create multiple Logical Volumes from a Volume Group by calculating the
1254 proper extents from ``parts`` or ``size``. A custom prefix can be used
1255 (defaults to ``ceph-lv``), these names are always suffixed with a uuid.
1256
1257 LV creation in ceph-volume will require tags, this is expected to be
1258 pre-computed by callers who know Ceph metadata like OSD IDs and FSIDs. It
1259 will probably not be the case when mass-creating LVs, so common/default
1260 tags will be set to ``"null"``.
1261
1262 .. note:: LVs that are not in use can be detected by querying LVM for tags that are
1263 set to ``"null"``.
1264
1265 :param volume_group: The volume group (vg) to use for LV creation
1266 :type group: ``VolumeGroup()`` object
1267 :param parts: Number of LVs to create *instead of* ``size``.
1268 :type parts: int
1269 :param size: Size (in gigabytes) of LVs to create, e.g. "as many 10gb LVs as possible"
1270 :type size: int
1271 :param extents: The number of LVM extents to use to create the LV. Useful if looking to have
1272 accurate LV sizes (LVM rounds sizes otherwise)
1273 """
1274 if parts is None and size is None:
1275 # fallback to just one part (using 100% of the vg)
1276 parts = 1
1277 lvs = []
1278 tags = {
1279 "ceph.osd_id": "null",
1280 "ceph.type": "null",
1281 "ceph.cluster_fsid": "null",
1282 "ceph.osd_fsid": "null",
1283 }
1284 sizing = volume_group.sizing(parts=parts, size=size)
1285 for part in range(0, sizing['parts']):
1286 size = sizing['sizes']
1287 extents = sizing['extents']
1288 lv_name = '%s-%s' % (name_prefix, uuid.uuid4())
1289 lvs.append(
1290 create_lv(lv_name, volume_group.name, extents=extents, tags=tags)
1291 )
1292 return lvs