]> git.proxmox.com Git - ceph.git/blame - ceph/src/ceph-volume/ceph_volume/api/lvm.py
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / ceph-volume / ceph_volume / api / lvm.py
CommitLineData
d2e6a577
FG
1"""
2API for CRUD lvm tag operations. Follows the Ceph LVM tag naming convention
3that prefixes tags with ``ceph.`` and uses ``=`` for assignment, and provides
4set of utilities for interacting with LVM.
5"""
94b18763
FG
6import logging
7import os
1adf2230 8import uuid
aee94f69 9import re
e306af50 10from itertools import repeat
1adf2230 11from math import floor
20effc67 12from ceph_volume import process, util, conf
f6b5b4d7 13from ceph_volume.exceptions import SizeAllocationError
d2e6a577 14
94b18763
FG
15logger = logging.getLogger(__name__)
16
d2e6a577 17
f6b5b4d7
TL
18def convert_filters_to_str(filters):
19 """
20 Convert filter args from dictionary to following format -
21 filters={filter_name=filter_val,...}
22 """
23 if not filters:
24 return filters
25
26 filter_arg = ''
27 for k, v in filters.items():
28 filter_arg += k + '=' + v + ','
29 # get rid of extra comma at the end
30 filter_arg = filter_arg[:len(filter_arg) - 1]
31
32 return filter_arg
33
34
35def convert_tags_to_str(tags):
36 """
37 Convert tags from dictionary to following format -
38 tags={tag_name=tag_val,...}
39 """
40 if not tags:
41 return tags
42
43 tag_arg = 'tags={'
44 for k, v in tags.items():
45 tag_arg += k + '=' + v + ','
46 # get rid of extra comma at the end
47 tag_arg = tag_arg[:len(tag_arg) - 1] + '}'
48
49 return tag_arg
50
51
52def make_filters_lvmcmd_ready(filters, tags):
53 """
54 Convert filters (including tags) from dictionary to following format -
55 filter_name=filter_val...,tags={tag_name=tag_val,...}
56
57 The command will look as follows =
58 lvs -S filter_name=filter_val...,tags={tag_name=tag_val,...}
59 """
60 filters = convert_filters_to_str(filters)
61 tags = convert_tags_to_str(tags)
62
63 if filters and tags:
64 return filters + ',' + tags
65 if filters and not tags:
66 return filters
67 if not filters and tags:
68 return tags
69 else:
70 return ''
71
72
b5b8bbf5
FG
73def _output_parser(output, fields):
74 """
75 Newer versions of LVM allow ``--reportformat=json``, but older versions,
76 like the one included in Xenial do not. LVM has the ability to filter and
77 format its output so we assume the output will be in a format this parser
92f5a8d4 78 can handle (using ';' as a delimiter)
b5b8bbf5
FG
79
80 :param fields: A string, possibly using ',' to group many items, as it
81 would be used on the CLI
82 :param output: The CLI output from the LVM call
83 """
84 field_items = fields.split(',')
85 report = []
86 for line in output:
87 # clear the leading/trailing whitespace
88 line = line.strip()
89
90 # remove the extra '"' in each field
91 line = line.replace('"', '')
92
93 # prevent moving forward with empty contents
94 if not line:
95 continue
96
11fdf7f2 97 # splitting on ';' because that is what the lvm call uses as
b5b8bbf5
FG
98 # '--separator'
99 output_items = [i.strip() for i in line.split(';')]
92f5a8d4 100 # map the output to the fields
b5b8bbf5
FG
101 report.append(
102 dict(zip(field_items, output_items))
103 )
104
105 return report
106
107
1adf2230
AA
108def _splitname_parser(line):
109 """
110 Parses the output from ``dmsetup splitname``, that should contain prefixes
111 (--nameprefixes) and set the separator to ";"
112
113 Output for /dev/mapper/vg-lv will usually look like::
114
115 DM_VG_NAME='/dev/mapper/vg';DM_LV_NAME='lv';DM_LV_LAYER=''
116
117
118 The ``VG_NAME`` will usually not be what other callers need (e.g. just 'vg'
119 in the example), so this utility will split ``/dev/mapper/`` out, so that
120 the actual volume group name is kept
121
122 :returns: dictionary with stripped prefixes
123 """
1adf2230 124 parsed = {}
81eedcae
TL
125 try:
126 parts = line[0].split(';')
127 except IndexError:
128 logger.exception('Unable to parse mapper device: %s', line)
129 return parsed
130
1adf2230
AA
131 for part in parts:
132 part = part.replace("'", '')
133 key, value = part.split('=')
134 if 'DM_VG_NAME' in key:
135 value = value.split('/dev/mapper/')[-1]
136 key = key.split('DM_')[-1]
137 parsed[key] = value
138
139 return parsed
140
141
142def sizing(device_size, parts=None, size=None):
143 """
144 Calculate proper sizing to fully utilize the volume group in the most
145 efficient way possible. To prevent situations where LVM might accept
146 a percentage that is beyond the vg's capabilities, it will refuse with
147 an error when requesting a larger-than-possible parameter, in addition
148 to rounding down calculations.
149
150 A dictionary with different sizing parameters is returned, to make it
151 easier for others to choose what they need in order to create logical
152 volumes::
153
154 >>> sizing(100, parts=2)
155 >>> {'parts': 2, 'percentages': 50, 'sizes': 50}
156
157 """
158 if parts is not None and size is not None:
159 raise ValueError(
160 "Cannot process sizing with both parts (%s) and size (%s)" % (parts, size)
161 )
162
163 if size and size > device_size:
164 raise SizeAllocationError(size, device_size)
165
166 def get_percentage(parts):
167 return int(floor(100 / float(parts)))
168
169 if parts is not None:
170 # Prevent parts being 0, falling back to 1 (100% usage)
171 parts = parts or 1
172 percentages = get_percentage(parts)
173
174 if size:
175 parts = int(device_size / size) or 1
176 percentages = get_percentage(parts)
177
178 sizes = device_size / parts if parts else int(floor(device_size))
179
180 return {
181 'parts': parts,
182 'percentages': percentages,
92f5a8d4 183 'sizes': int(sizes/1024/1024/1024),
1adf2230
AA
184 }
185
186
d2e6a577
FG
187def parse_tags(lv_tags):
188 """
189 Return a dictionary mapping of all the tags associated with
190 a Volume from the comma-separated tags coming from the LVM API
191
192 Input look like::
193
194 "ceph.osd_fsid=aaa-fff-bbbb,ceph.osd_id=0"
195
196 For the above example, the expected return value would be::
197
198 {
199 "ceph.osd_fsid": "aaa-fff-bbbb",
200 "ceph.osd_id": "0"
201 }
202 """
203 if not lv_tags:
204 return {}
205 tag_mapping = {}
206 tags = lv_tags.split(',')
207 for tag_assignment in tags:
b32b8144
FG
208 if not tag_assignment.startswith('ceph.'):
209 continue
d2e6a577
FG
210 key, value = tag_assignment.split('=', 1)
211 tag_mapping[key] = value
212
213 return tag_mapping
214
215
94b18763
FG
216def _vdo_parents(devices):
217 """
218 It is possible we didn't get a logical volume, or a mapper path, but
219 a device like /dev/sda2, to resolve this, we must look at all the slaves of
220 every single device in /sys/block and if any of those devices is related to
221 VDO devices, then we can add the parent
222 """
223 parent_devices = []
224 for parent in os.listdir('/sys/block'):
225 for slave in os.listdir('/sys/block/%s/slaves' % parent):
226 if slave in devices:
227 parent_devices.append('/dev/%s' % parent)
228 parent_devices.append(parent)
229 return parent_devices
230
231
232def _vdo_slaves(vdo_names):
233 """
234 find all the slaves associated with each vdo name (from realpath) by going
235 into /sys/block/<realpath>/slaves
236 """
237 devices = []
238 for vdo_name in vdo_names:
239 mapper_path = '/dev/mapper/%s' % vdo_name
240 if not os.path.exists(mapper_path):
241 continue
242 # resolve the realpath and realname of the vdo mapper
243 vdo_realpath = os.path.realpath(mapper_path)
244 vdo_realname = vdo_realpath.split('/')[-1]
245 slaves_path = '/sys/block/%s/slaves' % vdo_realname
246 if not os.path.exists(slaves_path):
247 continue
248 devices.append(vdo_realpath)
249 devices.append(mapper_path)
250 devices.append(vdo_realname)
251 for slave in os.listdir(slaves_path):
252 devices.append('/dev/%s' % slave)
253 devices.append(slave)
254 return devices
255
256
257def _is_vdo(path):
258 """
259 A VDO device can be composed from many different devices, go through each
260 one of those devices and its slaves (if any) and correlate them back to
261 /dev/mapper and their realpaths, and then check if they appear as part of
262 /sys/kvdo/<name>/statistics
263
264 From the realpath of a logical volume, determine if it is a VDO device or
265 not, by correlating it to the presence of the name in
266 /sys/kvdo/<name>/statistics and all the previously captured devices
267 """
268 if not os.path.isdir('/sys/kvdo'):
269 return False
270 realpath = os.path.realpath(path)
271 realpath_name = realpath.split('/')[-1]
272 devices = []
273 vdo_names = set()
274 # get all the vdo names
275 for dirname in os.listdir('/sys/kvdo/'):
276 if os.path.isdir('/sys/kvdo/%s/statistics' % dirname):
277 vdo_names.add(dirname)
278
279 # find all the slaves associated with each vdo name (from realpath) by
280 # going into /sys/block/<realpath>/slaves
281 devices.extend(_vdo_slaves(vdo_names))
282
283 # Find all possible parents, looking into slaves that are related to VDO
284 devices.extend(_vdo_parents(devices))
285
286 return any([
287 path in devices,
288 realpath in devices,
289 realpath_name in devices])
290
291
292def is_vdo(path):
293 """
294 Detect if a path is backed by VDO, proxying the actual call to _is_vdo so
295 that we can prevent an exception breaking OSD creation. If an exception is
296 raised, it will get captured and logged to file, while returning
297 a ``False``.
298 """
299 try:
300 if _is_vdo(path):
301 return '1'
302 return '0'
303 except Exception:
304 logger.exception('Unable to properly detect device as VDO: %s', path)
305 return '0'
306
307
1adf2230
AA
308def dmsetup_splitname(dev):
309 """
310 Run ``dmsetup splitname`` and parse the results.
311
312 .. warning:: This call does not ensure that the device is correct or that
313 it exists. ``dmsetup`` will happily take a non existing path and still
314 return a 0 exit status.
315 """
316 command = [
317 'dmsetup', 'splitname', '--noheadings',
318 "--separator=';'", '--nameprefixes', dev
319 ]
320 out, err, rc = process.call(command)
321 return _splitname_parser(out)
322
323
92f5a8d4
TL
324def is_ceph_device(lv):
325 try:
326 lv.tags['ceph.osd_id']
327 except (KeyError, AttributeError):
328 logger.warning('device is not part of ceph: %s', lv)
329 return False
330
331 if lv.tags['ceph.osd_id'] == 'null':
332 return False
333 else:
334 return True
335
336
eafe8130
TL
337####################################
338#
339# Code for LVM Physical Volumes
340#
341################################
d2e6a577 342
92f5a8d4 343PV_FIELDS = 'pv_name,pv_tags,pv_uuid,vg_name,lv_uuid'
d2e6a577 344
eafe8130 345class PVolume(object):
3efd9988 346 """
eafe8130
TL
347 Represents a Physical Volume from LVM, with some top-level attributes like
348 ``pv_name`` and parsed tags as a dictionary of key/value pairs.
3efd9988 349 """
3efd9988 350
eafe8130
TL
351 def __init__(self, **kw):
352 for k, v in kw.items():
353 setattr(self, k, v)
354 self.pv_api = kw
355 self.name = kw['pv_name']
356 self.tags = parse_tags(kw['pv_tags'])
3efd9988 357
eafe8130
TL
358 def __str__(self):
359 return '<%s>' % self.pv_api['pv_name']
d2e6a577 360
eafe8130
TL
361 def __repr__(self):
362 return self.__str__()
363
364 def set_tags(self, tags):
365 """
366 :param tags: A dictionary of tag names and values, like::
367
368 {
369 "ceph.osd_fsid": "aaa-fff-bbbb",
370 "ceph.osd_id": "0"
371 }
372
373 At the end of all modifications, the tags are refreshed to reflect
374 LVM's most current view.
375 """
376 for k, v in tags.items():
377 self.set_tag(k, v)
378 # after setting all the tags, refresh them for the current object, use the
379 # pv_* identifiers to filter because those shouldn't change
a4b75251
TL
380 pv_object = self.get_single_pv(filter={'pv_name': self.pv_name,
381 'pv_uuid': self.pv_uuid})
382
383 if not pv_object:
384 raise RuntimeError('No PV was found.')
385
eafe8130
TL
386 self.tags = pv_object.tags
387
388 def set_tag(self, key, value):
389 """
390 Set the key/value pair as an LVM tag. Does not "refresh" the values of
391 the current object for its tags. Meant to be a "fire and forget" type
392 of modification.
393
394 **warning**: Altering tags on a PV has to be done ensuring that the
395 device is actually the one intended. ``pv_name`` is *not* a persistent
396 value, only ``pv_uuid`` is. Using ``pv_uuid`` is the best way to make
397 sure the device getting changed is the one needed.
398 """
399 # remove it first if it exists
400 if self.tags.get(key):
401 current_value = self.tags[key]
402 tag = "%s=%s" % (key, current_value)
20effc67 403 process.call(['pvchange', '--deltag', tag, self.pv_name], run_on_host=True)
eafe8130
TL
404
405 process.call(
406 [
407 'pvchange',
408 '--addtag', '%s=%s' % (key, value), self.pv_name
20effc67
TL
409 ],
410 run_on_host=True
eafe8130 411 )
181888fb
FG
412
413
181888fb
FG
414def create_pv(device):
415 """
416 Create a physical volume from a device, useful when devices need to be later mapped
417 to journals.
418 """
419 process.run([
181888fb
FG
420 'pvcreate',
421 '-v', # verbose
422 '-f', # force it
423 '--yes', # answer yes to any prompts
424 device
20effc67 425 ], run_on_host=True)
d2e6a577
FG
426
427
eafe8130 428def remove_pv(pv_name):
3efd9988 429 """
eafe8130
TL
430 Removes a physical volume using a double `-f` to prevent prompts and fully
431 remove anything related to LVM. This is tremendously destructive, but so is all other actions
432 when zapping a device.
3efd9988 433
eafe8130
TL
434 In the case where multiple PVs are found, it will ignore that fact and
435 continue with the removal, specifically in the case of messages like::
3efd9988 436
eafe8130 437 WARNING: PV $UUID /dev/DEV-1 was already found on /dev/DEV-2
1adf2230 438
eafe8130
TL
439 These situations can be avoided with custom filtering rules, which this API
440 cannot handle while accommodating custom user filters.
3efd9988 441 """
eafe8130
TL
442 fail_msg = "Unable to remove vg %s" % pv_name
443 process.run(
444 [
445 'pvremove',
446 '-v', # verbose
447 '-f', # force it
448 '-f', # force it
449 pv_name
450 ],
20effc67 451 run_on_host=True,
eafe8130
TL
452 fail_msg=fail_msg,
453 )
3efd9988
FG
454
455
f6b5b4d7 456def get_pvs(fields=PV_FIELDS, filters='', tags=None):
1adf2230 457 """
f6b5b4d7
TL
458 Return a list of PVs that are available on the system and match the
459 filters and tags passed. Argument filters takes a dictionary containing
460 arguments required by -S option of LVM. Passing a list of LVM tags can be
461 quite tricky to pass as a dictionary within dictionary, therefore pass
462 dictionary of tags via tags argument and tricky part will be taken care of
463 by the helper methods.
464
465 :param fields: string containing list of fields to be displayed by the
466 pvs command
467 :param sep: string containing separator to be used between two fields
468 :param filters: dictionary containing LVM filters
469 :param tags: dictionary containng LVM tags
470 :returns: list of class PVolume object representing pvs on the system
eafe8130 471 """
f6b5b4d7 472 filters = make_filters_lvmcmd_ready(filters, tags)
522d829b 473 args = ['pvs', '--noheadings', '--readonly', '--separator=";"', '-S',
f6b5b4d7
TL
474 filters, '-o', fields]
475
20effc67 476 stdout, stderr, returncode = process.call(args, run_on_host=True, verbose_on_failure=False)
f6b5b4d7
TL
477 pvs_report = _output_parser(stdout, fields)
478 return [PVolume(**pv_report) for pv_report in pvs_report]
479
1adf2230 480
a4b75251 481def get_single_pv(fields=PV_FIELDS, filters=None, tags=None):
f6b5b4d7 482 """
a4b75251 483 Wrapper of get_pvs() meant to be a convenience method to avoid the phrase::
f6b5b4d7
TL
484 pvs = get_pvs()
485 if len(pvs) >= 1:
486 pv = pvs[0]
487 """
488 pvs = get_pvs(fields=fields, filters=filters, tags=tags)
a4b75251
TL
489
490 if len(pvs) == 0:
491 return None
492 if len(pvs) > 1:
493 raise RuntimeError('Filters {} matched more than 1 PV present on this host.'.format(str(filters)))
494
495 return pvs[0]
1adf2230 496
1adf2230 497
eafe8130
TL
498################################
499#
500# Code for LVM Volume Groups
501#
502#############################
1adf2230 503
92f5a8d4
TL
504VG_FIELDS = 'vg_name,pv_count,lv_count,vg_attr,vg_extent_count,vg_free_count,vg_extent_size'
505VG_CMD_OPTIONS = ['--noheadings', '--readonly', '--units=b', '--nosuffix', '--separator=";"']
506
1adf2230 507
eafe8130 508class VolumeGroup(object):
b32b8144 509 """
eafe8130 510 Represents an LVM group, with some top-level attributes like ``vg_name``
b32b8144 511 """
b32b8144 512
eafe8130
TL
513 def __init__(self, **kw):
514 for k, v in kw.items():
515 setattr(self, k, v)
516 self.name = kw['vg_name']
9f95a23c
TL
517 if not self.name:
518 raise ValueError('VolumeGroup must have a non-empty name')
eafe8130 519 self.tags = parse_tags(kw.get('vg_tags', ''))
b32b8144 520
eafe8130
TL
521 def __str__(self):
522 return '<%s>' % self.name
91327a77 523
eafe8130
TL
524 def __repr__(self):
525 return self.__str__()
91327a77 526
eafe8130
TL
527 @property
528 def free(self):
529 """
92f5a8d4 530 Return free space in VG in bytes
eafe8130 531 """
92f5a8d4 532 return int(self.vg_extent_size) * int(self.vg_free_count)
3efd9988 533
f91f0fd5
TL
534 @property
535 def free_percent(self):
536 """
537 Return free space in VG in bytes
538 """
539 return int(self.vg_free_count) / int(self.vg_extent_count)
540
eafe8130
TL
541 @property
542 def size(self):
543 """
92f5a8d4 544 Returns VG size in bytes
eafe8130 545 """
92f5a8d4 546 return int(self.vg_extent_size) * int(self.vg_extent_count)
91327a77 547
eafe8130
TL
548 def sizing(self, parts=None, size=None):
549 """
550 Calculate proper sizing to fully utilize the volume group in the most
551 efficient way possible. To prevent situations where LVM might accept
552 a percentage that is beyond the vg's capabilities, it will refuse with
553 an error when requesting a larger-than-possible parameter, in addition
554 to rounding down calculations.
3efd9988 555
eafe8130
TL
556 A dictionary with different sizing parameters is returned, to make it
557 easier for others to choose what they need in order to create logical
558 volumes::
3efd9988 559
eafe8130
TL
560 >>> data_vg.free
561 1024
562 >>> data_vg.sizing(parts=4)
563 {'parts': 4, 'sizes': 256, 'percentages': 25}
564 >>> data_vg.sizing(size=512)
565 {'parts': 2, 'sizes': 512, 'percentages': 50}
d2e6a577 566
d2e6a577 567
eafe8130
TL
568 :param parts: Number of parts to create LVs from
569 :param size: Size in gigabytes to divide the VG into
d2e6a577 570
eafe8130
TL
571 :raises SizeAllocationError: When requested size cannot be allocated with
572 :raises ValueError: If both ``parts`` and ``size`` are given
573 """
574 if parts is not None and size is not None:
575 raise ValueError(
576 "Cannot process sizing with both parts (%s) and size (%s)" % (parts, size)
577 )
1adf2230 578
eafe8130
TL
579 # if size is given we need to map that to extents so that we avoid
580 # issues when trying to get this right with a size in gigabytes find
581 # the percentage first, cheating, because these values are thrown out
582 vg_free_count = util.str_to_int(self.vg_free_count)
583
584 if size:
92f5a8d4
TL
585 size = size * 1024 * 1024 * 1024
586 extents = int(size / int(self.vg_extent_size))
eafe8130
TL
587 disk_sizing = sizing(self.free, size=size, parts=parts)
588 else:
589 if parts is not None:
590 # Prevent parts being 0, falling back to 1 (100% usage)
591 parts = parts or 1
592 size = int(self.free / parts)
593 extents = size * vg_free_count / self.free
594 disk_sizing = sizing(self.free, parts=parts)
595
596 extent_sizing = sizing(vg_free_count, size=extents)
597
598 disk_sizing['extents'] = int(extents)
599 disk_sizing['percentages'] = extent_sizing['percentages']
600 return disk_sizing
601
92f5a8d4
TL
602 def bytes_to_extents(self, size):
603 '''
cd265ab1
TL
604 Return a how many free extents we can fit into a size in bytes. This has
605 some uncertainty involved. If size/extent_size is within 1% of the
606 actual free extents we will return the extent count, otherwise we'll
607 throw an error.
608 This accomodates for the size calculation in batch. We need to report
609 the OSD layout but have not yet created any LVM structures. We use the
610 disk size in batch if no VG is present and that will overshoot the
611 actual free_extent count due to LVM overhead.
612
92f5a8d4 613 '''
cd265ab1
TL
614 b_to_ext = int(size / int(self.vg_extent_size))
615 if b_to_ext < int(self.vg_free_count):
616 # return bytes in extents if there is more space
617 return b_to_ext
618 elif b_to_ext / int(self.vg_free_count) - 1 < 0.01:
619 # return vg_fre_count if its less then 1% off
620 logger.info(
621 'bytes_to_extents results in {} but only {} '
622 'are available, adjusting the latter'.format(b_to_ext,
623 self.vg_free_count))
624 return int(self.vg_free_count)
625 # else raise an exception
626 raise RuntimeError('Can\'t convert {} to free extents, only {} ({} '
627 'bytes) are free'.format(size, self.vg_free_count,
628 self.free))
92f5a8d4
TL
629
630 def slots_to_extents(self, slots):
631 '''
632 Return how many extents fit the VG slot times
633 '''
f91f0fd5 634 return int(int(self.vg_extent_count) / slots)
92f5a8d4 635
eafe8130 636
eafe8130 637def create_vg(devices, name=None, name_prefix=None):
d2e6a577 638 """
eafe8130 639 Create a Volume Group. Command looks like::
d2e6a577 640
eafe8130 641 vgcreate --force --yes group_name device
d2e6a577 642
eafe8130 643 Once created the volume group is returned as a ``VolumeGroup`` object
d2e6a577 644
eafe8130
TL
645 :param devices: A list of devices to create a VG. Optionally, a single
646 device (as a string) can be used.
647 :param name: Optionally set the name of the VG, defaults to 'ceph-{uuid}'
648 :param name_prefix: Optionally prefix the name of the VG, which will get combined
649 with a UUID string
650 """
651 if isinstance(devices, set):
652 devices = list(devices)
653 if not isinstance(devices, list):
654 devices = [devices]
655 if name_prefix:
656 name = "%s-%s" % (name_prefix, str(uuid.uuid4()))
657 elif name is None:
658 name = "ceph-%s" % str(uuid.uuid4())
659 process.run([
660 'vgcreate',
eafe8130
TL
661 '--force',
662 '--yes',
20effc67
TL
663 name] + devices,
664 run_on_host=True
eafe8130 665 )
d2e6a577 666
a4b75251 667 return get_single_vg(filters={'vg_name': name})
d2e6a577 668
d2e6a577 669
eafe8130
TL
670def extend_vg(vg, devices):
671 """
672 Extend a Volume Group. Command looks like::
181888fb 673
eafe8130 674 vgextend --force --yes group_name [device, ...]
d2e6a577 675
eafe8130 676 Once created the volume group is extended and returned as a ``VolumeGroup`` object
d2e6a577 677
eafe8130
TL
678 :param vg: A VolumeGroup object
679 :param devices: A list of devices to extend the VG. Optionally, a single
680 device (as a string) can be used.
681 """
682 if not isinstance(devices, list):
683 devices = [devices]
684 process.run([
685 'vgextend',
686 '--force',
687 '--yes',
20effc67
TL
688 vg.name] + devices,
689 run_on_host=True
eafe8130 690 )
d2e6a577 691
a4b75251 692 return get_single_vg(filters={'vg_name': vg.name})
d2e6a577 693
d2e6a577 694
eafe8130
TL
695def reduce_vg(vg, devices):
696 """
697 Reduce a Volume Group. Command looks like::
d2e6a577 698
eafe8130 699 vgreduce --force --yes group_name [device, ...]
d2e6a577 700
eafe8130
TL
701 :param vg: A VolumeGroup object
702 :param devices: A list of devices to remove from the VG. Optionally, a
703 single device (as a string) can be used.
704 """
705 if not isinstance(devices, list):
706 devices = [devices]
707 process.run([
708 'vgreduce',
709 '--force',
710 '--yes',
20effc67
TL
711 vg.name] + devices,
712 run_on_host=True
eafe8130 713 )
d2e6a577 714
a4b75251 715 return get_single_vg(filter={'vg_name': vg.name})
d2e6a577
FG
716
717
eafe8130 718def remove_vg(vg_name):
181888fb 719 """
eafe8130 720 Removes a volume group.
181888fb 721 """
eafe8130
TL
722 if not vg_name:
723 logger.warning('Skipping removal of invalid VG name: "%s"', vg_name)
724 return
725 fail_msg = "Unable to remove vg %s" % vg_name
726 process.run(
727 [
728 'vgremove',
729 '-v', # verbose
730 '-f', # force it
731 vg_name
732 ],
20effc67 733 run_on_host=True,
eafe8130
TL
734 fail_msg=fail_msg,
735 )
181888fb
FG
736
737
f6b5b4d7 738def get_vgs(fields=VG_FIELDS, filters='', tags=None):
d2e6a577 739 """
f6b5b4d7
TL
740 Return a list of VGs that are available on the system and match the
741 filters and tags passed. Argument filters takes a dictionary containing
742 arguments required by -S option of LVM. Passing a list of LVM tags can be
743 quite tricky to pass as a dictionary within dictionary, therefore pass
744 dictionary of tags via tags argument and tricky part will be taken care of
745 by the helper methods.
1adf2230 746
f6b5b4d7
TL
747 :param fields: string containing list of fields to be displayed by the
748 vgs command
749 :param sep: string containing separator to be used between two fields
750 :param filters: dictionary containing LVM filters
751 :param tags: dictionary containng LVM tags
752 :returns: list of class VolumeGroup object representing vgs on the system
eafe8130 753 """
f6b5b4d7
TL
754 filters = make_filters_lvmcmd_ready(filters, tags)
755 args = ['vgs'] + VG_CMD_OPTIONS + ['-S', filters, '-o', fields]
1adf2230 756
20effc67 757 stdout, stderr, returncode = process.call(args, run_on_host=True, verbose_on_failure=False)
f6b5b4d7
TL
758 vgs_report =_output_parser(stdout, fields)
759 return [VolumeGroup(**vg_report) for vg_report in vgs_report]
760
761
a4b75251 762def get_single_vg(fields=VG_FIELDS, filters=None, tags=None):
f6b5b4d7 763 """
a4b75251 764 Wrapper of get_vgs() meant to be a convenience method to avoid the phrase::
f6b5b4d7
TL
765 vgs = get_vgs()
766 if len(vgs) >= 1:
767 vg = vgs[0]
768 """
769 vgs = get_vgs(fields=fields, filters=filters, tags=tags)
a4b75251
TL
770
771 if len(vgs) == 0:
772 return None
773 if len(vgs) > 1:
774 raise RuntimeError('Filters {} matched more than 1 VG present on this host.'.format(str(filters)))
775
776 return vgs[0]
1adf2230
AA
777
778
92f5a8d4
TL
779def get_device_vgs(device, name_prefix=''):
780 stdout, stderr, returncode = process.call(
781 ['pvs'] + VG_CMD_OPTIONS + ['-o', VG_FIELDS, device],
20effc67 782 run_on_host=True,
92f5a8d4
TL
783 verbose_on_failure=False
784 )
785 vgs = _output_parser(stdout, VG_FIELDS)
9f95a23c 786 return [VolumeGroup(**vg) for vg in vgs if vg['vg_name'] and vg['vg_name'].startswith(name_prefix)]
92f5a8d4
TL
787
788
2a845540
TL
789def get_all_devices_vgs(name_prefix=''):
790 vg_fields = f'pv_name,{VG_FIELDS}'
791 cmd = ['pvs'] + VG_CMD_OPTIONS + ['-o', vg_fields]
792 stdout, stderr, returncode = process.call(
793 cmd,
794 run_on_host=True,
795 verbose_on_failure=False
796 )
797 vgs = _output_parser(stdout, vg_fields)
39ae355f 798 return [VolumeGroup(**vg) for vg in vgs if vg['vg_name']]
2a845540 799
eafe8130
TL
800#################################
801#
802# Code for LVM Logical Volumes
803#
804###############################
1adf2230 805
92f5a8d4 806LV_FIELDS = 'lv_tags,lv_path,lv_name,vg_name,lv_uuid,lv_size'
f91f0fd5
TL
807LV_CMD_OPTIONS = ['--noheadings', '--readonly', '--separator=";"', '-a',
808 '--units=b', '--nosuffix']
1adf2230 809
d2e6a577
FG
810
811class Volume(object):
812 """
813 Represents a Logical Volume from LVM, with some top-level attributes like
814 ``lv_name`` and parsed tags as a dictionary of key/value pairs.
815 """
816
817 def __init__(self, **kw):
818 for k, v in kw.items():
819 setattr(self, k, v)
820 self.lv_api = kw
821 self.name = kw['lv_name']
9f95a23c
TL
822 if not self.name:
823 raise ValueError('Volume must have a non-empty name')
d2e6a577 824 self.tags = parse_tags(kw['lv_tags'])
3a9019d9 825 self.encrypted = self.tags.get('ceph.encrypted', '0') == '1'
91327a77 826 self.used_by_ceph = 'ceph.osd_id' in self.tags
d2e6a577
FG
827
828 def __str__(self):
829 return '<%s>' % self.lv_api['lv_path']
830
831 def __repr__(self):
832 return self.__str__()
833
3efd9988
FG
834 def as_dict(self):
835 obj = {}
836 obj.update(self.lv_api)
837 obj['tags'] = self.tags
838 obj['name'] = self.name
839 obj['type'] = self.tags['ceph.type']
840 obj['path'] = self.lv_path
841 return obj
842
91327a77
AA
843 def report(self):
844 if not self.used_by_ceph:
845 return {
846 'name': self.lv_name,
847 'comment': 'not used by ceph'
848 }
849 else:
850 type_ = self.tags['ceph.type']
851 report = {
852 'name': self.lv_name,
853 'osd_id': self.tags['ceph.osd_id'],
20effc67 854 'cluster_name': self.tags.get('ceph.cluster_name', conf.cluster),
91327a77
AA
855 'type': type_,
856 'osd_fsid': self.tags['ceph.osd_fsid'],
857 'cluster_fsid': self.tags['ceph.cluster_fsid'],
e306af50 858 'osdspec_affinity': self.tags.get('ceph.osdspec_affinity', ''),
91327a77
AA
859 }
860 type_uuid = '{}_uuid'.format(type_)
861 report[type_uuid] = self.tags['ceph.{}'.format(type_uuid)]
862 return report
863
e306af50
TL
864 def _format_tag_args(self, op, tags):
865 tag_args = ['{}={}'.format(k, v) for k, v in tags.items()]
866 # weird but efficient way of ziping two lists and getting a flat list
867 return list(sum(zip(repeat(op), tag_args), ()))
868
869 def clear_tags(self, keys=None):
3efd9988 870 """
e306af50 871 Removes all or passed tags from the Logical Volume.
3efd9988 872 """
e306af50
TL
873 if not keys:
874 keys = self.tags.keys()
875
876 del_tags = {k: self.tags[k] for k in keys if k in self.tags}
877 if not del_tags:
878 # nothing to clear
879 return
880 del_tag_args = self._format_tag_args('--deltag', del_tags)
881 # --deltag returns successful even if the to be deleted tag is not set
20effc67 882 process.call(['lvchange'] + del_tag_args + [self.lv_path], run_on_host=True)
e306af50
TL
883 for k in del_tags.keys():
884 del self.tags[k]
81eedcae 885
3efd9988 886
d2e6a577
FG
887 def set_tags(self, tags):
888 """
889 :param tags: A dictionary of tag names and values, like::
890
891 {
892 "ceph.osd_fsid": "aaa-fff-bbbb",
893 "ceph.osd_id": "0"
894 }
895
896 At the end of all modifications, the tags are refreshed to reflect
897 LVM's most current view.
898 """
e306af50
TL
899 self.clear_tags(tags.keys())
900 add_tag_args = self._format_tag_args('--addtag', tags)
20effc67 901 process.call(['lvchange'] + add_tag_args + [self.lv_path], run_on_host=True)
d2e6a577 902 for k, v in tags.items():
e306af50 903 self.tags[k] = v
81eedcae
TL
904
905
906 def clear_tag(self, key):
907 if self.tags.get(key):
908 current_value = self.tags[key]
909 tag = "%s=%s" % (key, current_value)
20effc67 910 process.call(['lvchange', '--deltag', tag, self.lv_path], run_on_host=True)
81eedcae
TL
911 del self.tags[key]
912
d2e6a577
FG
913
914 def set_tag(self, key, value):
915 """
81eedcae 916 Set the key/value pair as an LVM tag.
d2e6a577
FG
917 """
918 # remove it first if it exists
81eedcae 919 self.clear_tag(key)
d2e6a577
FG
920
921 process.call(
922 [
b32b8144 923 'lvchange',
d2e6a577 924 '--addtag', '%s=%s' % (key, value), self.lv_path
20effc67
TL
925 ],
926 run_on_host=True
d2e6a577 927 )
81eedcae 928 self.tags[key] = value
181888fb 929
92f5a8d4
TL
930 def deactivate(self):
931 """
932 Deactivate the LV by calling lvchange -an
933 """
20effc67 934 process.call(['lvchange', '-an', self.lv_path], run_on_host=True)
92f5a8d4 935
181888fb 936
92f5a8d4
TL
937def create_lv(name_prefix,
938 uuid,
939 vg=None,
940 device=None,
941 slots=None,
942 extents=None,
943 size=None,
944 tags=None):
eafe8130
TL
945 """
946 Create a Logical Volume in a Volume Group. Command looks like::
947
948 lvcreate -L 50G -n gfslv vg0
949
92f5a8d4
TL
950 ``name_prefix`` is required. If ``size`` is provided its expected to be a
951 byte count. Tags are an optional dictionary and is expected to
eafe8130
TL
952 conform to the convention of prefixing them with "ceph." like::
953
954 {"ceph.block_device": "/dev/ceph/osd-1"}
955
92f5a8d4
TL
956 :param name_prefix: name prefix for the LV, typically somehting like ceph-osd-block
957 :param uuid: UUID to ensure uniqueness; is combined with name_prefix to
958 form the LV name
959 :param vg: optional, pass an existing VG to create LV
960 :param device: optional, device to use. Either device of vg must be passed
961 :param slots: optional, number of slots to divide vg up, LV will occupy one
962 one slot if enough space is available
963 :param extends: optional, how many lvm extends to use, supersedes slots
964 :param size: optional, target LV size in bytes, supersedes extents,
965 resulting LV might be smaller depending on extent
966 size of the underlying VG
967 :param tags: optional, a dict of lvm tags to set on the LV
968 """
969 name = '{}-{}'.format(name_prefix, uuid)
970 if not vg:
971 if not device:
972 raise RuntimeError("Must either specify vg or device, none given")
973 # check if a vgs starting with ceph already exists
974 vgs = get_device_vgs(device, 'ceph')
975 if vgs:
976 vg = vgs[0]
977 else:
978 # create on if not
979 vg = create_vg(device, name_prefix='ceph')
980 assert(vg)
eafe8130 981
eafe8130 982 if size:
92f5a8d4
TL
983 extents = vg.bytes_to_extents(size)
984 logger.debug('size was passed: {} -> {}'.format(size, extents))
985 elif slots and not extents:
986 extents = vg.slots_to_extents(slots)
987 logger.debug('slots was passed: {} -> {}'.format(slots, extents))
988
989 if extents:
eafe8130
TL
990 command = [
991 'lvcreate',
992 '--yes',
993 '-l',
92f5a8d4
TL
994 '{}'.format(extents),
995 '-n', name, vg.vg_name
eafe8130
TL
996 ]
997 # create the lv with all the space available, this is needed because the
998 # system call is different for LVM
999 else:
1000 command = [
1001 'lvcreate',
1002 '--yes',
1003 '-l',
1004 '100%FREE',
92f5a8d4 1005 '-n', name, vg.vg_name
eafe8130 1006 ]
20effc67 1007 process.run(command, run_on_host=True)
eafe8130 1008
a4b75251 1009 lv = get_single_lv(filters={'lv_name': name, 'vg_name': vg.vg_name})
eafe8130 1010
92f5a8d4
TL
1011 if tags is None:
1012 tags = {
1013 "ceph.osd_id": "null",
1014 "ceph.type": "null",
1015 "ceph.cluster_fsid": "null",
1016 "ceph.osd_fsid": "null",
1017 }
eafe8130
TL
1018 # when creating a distinct type, the caller doesn't know what the path will
1019 # be so this function will set it after creation using the mapping
92f5a8d4
TL
1020 # XXX add CEPH_VOLUME_LVM_DEBUG to enable -vvvv on lv operations
1021 type_path_tag = {
92f5a8d4
TL
1022 'data': 'ceph.data_device',
1023 'block': 'ceph.block_device',
1024 'wal': 'ceph.wal_device',
1025 'db': 'ceph.db_device',
1026 'lockbox': 'ceph.lockbox_device', # XXX might not ever need this lockbox sorcery
1027 }
eafe8130
TL
1028 path_tag = type_path_tag.get(tags.get('ceph.type'))
1029 if path_tag:
92f5a8d4
TL
1030 tags.update({path_tag: lv.lv_path})
1031
1032 lv.set_tags(tags)
1033
eafe8130
TL
1034 return lv
1035
1036
eafe8130
TL
1037def create_lvs(volume_group, parts=None, size=None, name_prefix='ceph-lv'):
1038 """
1039 Create multiple Logical Volumes from a Volume Group by calculating the
1040 proper extents from ``parts`` or ``size``. A custom prefix can be used
1041 (defaults to ``ceph-lv``), these names are always suffixed with a uuid.
1042
1043 LV creation in ceph-volume will require tags, this is expected to be
1044 pre-computed by callers who know Ceph metadata like OSD IDs and FSIDs. It
1045 will probably not be the case when mass-creating LVs, so common/default
1046 tags will be set to ``"null"``.
1047
1048 .. note:: LVs that are not in use can be detected by querying LVM for tags that are
1049 set to ``"null"``.
1050
1051 :param volume_group: The volume group (vg) to use for LV creation
1052 :type group: ``VolumeGroup()`` object
1053 :param parts: Number of LVs to create *instead of* ``size``.
1054 :type parts: int
1055 :param size: Size (in gigabytes) of LVs to create, e.g. "as many 10gb LVs as possible"
1056 :type size: int
1057 :param extents: The number of LVM extents to use to create the LV. Useful if looking to have
1058 accurate LV sizes (LVM rounds sizes otherwise)
1059 """
1060 if parts is None and size is None:
1061 # fallback to just one part (using 100% of the vg)
1062 parts = 1
1063 lvs = []
1064 tags = {
1065 "ceph.osd_id": "null",
1066 "ceph.type": "null",
1067 "ceph.cluster_fsid": "null",
1068 "ceph.osd_fsid": "null",
1069 }
1070 sizing = volume_group.sizing(parts=parts, size=size)
1071 for part in range(0, sizing['parts']):
1072 size = sizing['sizes']
1073 extents = sizing['extents']
eafe8130 1074 lvs.append(
92f5a8d4 1075 create_lv(name_prefix, uuid.uuid4(), vg=volume_group, extents=extents, tags=tags)
eafe8130
TL
1076 )
1077 return lvs
92f5a8d4
TL
1078
1079
f6b5b4d7 1080def remove_lv(lv):
92f5a8d4 1081 """
f6b5b4d7 1082 Removes a logical volume given it's absolute path.
92f5a8d4 1083
f6b5b4d7
TL
1084 Will return True if the lv is successfully removed or
1085 raises a RuntimeError if the removal fails.
92f5a8d4 1086
f6b5b4d7 1087 :param lv: A ``Volume`` object or the path for an LV
92f5a8d4 1088 """
f6b5b4d7
TL
1089 if isinstance(lv, Volume):
1090 path = lv.lv_path
92f5a8d4 1091 else:
f6b5b4d7 1092 path = lv
92f5a8d4 1093
f6b5b4d7
TL
1094 stdout, stderr, returncode = process.call(
1095 [
1096 'lvremove',
1097 '-v', # verbose
1098 '-f', # force it
1099 path
1100 ],
20effc67 1101 run_on_host=True,
f6b5b4d7
TL
1102 show_command=True,
1103 terminal_verbose=True,
1104 )
1105 if returncode != 0:
1106 raise RuntimeError("Unable to remove %s" % path)
1107 return True
92f5a8d4 1108
92f5a8d4
TL
1109
1110def get_lvs(fields=LV_FIELDS, filters='', tags=None):
1111 """
1112 Return a list of LVs that are available on the system and match the
1113 filters and tags passed. Argument filters takes a dictionary containing
1114 arguments required by -S option of LVM. Passing a list of LVM tags can be
1115 quite tricky to pass as a dictionary within dictionary, therefore pass
1116 dictionary of tags via tags argument and tricky part will be taken care of
1117 by the helper methods.
1118
1119 :param fields: string containing list of fields to be displayed by the
1120 lvs command
1121 :param sep: string containing separator to be used between two fields
1122 :param filters: dictionary containing LVM filters
1123 :param tags: dictionary containng LVM tags
1124 :returns: list of class Volume object representing LVs on the system
1125 """
1126 filters = make_filters_lvmcmd_ready(filters, tags)
1127 args = ['lvs'] + LV_CMD_OPTIONS + ['-S', filters, '-o', fields]
1128
20effc67 1129 stdout, stderr, returncode = process.call(args, run_on_host=True, verbose_on_failure=False)
92f5a8d4
TL
1130 lvs_report = _output_parser(stdout, fields)
1131 return [Volume(**lv_report) for lv_report in lvs_report]
1132
f6b5b4d7 1133
a4b75251 1134def get_single_lv(fields=LV_FIELDS, filters=None, tags=None):
92f5a8d4 1135 """
a4b75251 1136 Wrapper of get_lvs() meant to be a convenience method to avoid the phrase::
92f5a8d4
TL
1137 lvs = get_lvs()
1138 if len(lvs) >= 1:
1139 lv = lvs[0]
1140 """
1141 lvs = get_lvs(fields=fields, filters=filters, tags=tags)
a4b75251
TL
1142
1143 if len(lvs) == 0:
1144 return None
1145 if len(lvs) > 1:
1146 raise RuntimeError('Filters {} matched more than 1 LV present on this host.'.format(str(filters)))
1147
1148 return lvs[0]
f6b5b4d7
TL
1149
1150
20effc67
TL
1151def get_lvs_from_osd_id(osd_id):
1152 return get_lvs(tags={'ceph.osd_id': osd_id})
1153
1154
1155def get_single_lv_from_osd_id(osd_id):
1156 return get_single_lv(tags={'ceph.osd_id': osd_id})
1157
1158
f6b5b4d7
TL
1159def get_lv_by_name(name):
1160 stdout, stderr, returncode = process.call(
1161 ['lvs', '--noheadings', '-o', LV_FIELDS, '-S',
1162 'lv_name={}'.format(name)],
20effc67 1163 run_on_host=True,
f6b5b4d7
TL
1164 verbose_on_failure=False
1165 )
1166 lvs = _output_parser(stdout, LV_FIELDS)
1167 return [Volume(**lv) for lv in lvs]
1168
1169
1170def get_lvs_by_tag(lv_tag):
1171 stdout, stderr, returncode = process.call(
1172 ['lvs', '--noheadings', '--separator=";"', '-a', '-o', LV_FIELDS, '-S',
1173 'lv_tags={{{}}}'.format(lv_tag)],
20effc67 1174 run_on_host=True,
f6b5b4d7
TL
1175 verbose_on_failure=False
1176 )
1177 lvs = _output_parser(stdout, LV_FIELDS)
1178 return [Volume(**lv) for lv in lvs]
1179
1180
1181def get_device_lvs(device, name_prefix=''):
1182 stdout, stderr, returncode = process.call(
1183 ['pvs'] + LV_CMD_OPTIONS + ['-o', LV_FIELDS, device],
20effc67 1184 run_on_host=True,
f6b5b4d7
TL
1185 verbose_on_failure=False
1186 )
1187 lvs = _output_parser(stdout, LV_FIELDS)
1188 return [Volume(**lv) for lv in lvs if lv['lv_name'] and
1189 lv['lv_name'].startswith(name_prefix)]
522d829b 1190
20effc67
TL
1191def get_lvs_from_path(devpath):
1192 lvs = []
1193 if os.path.isabs(devpath):
1194 # we have a block device
1195 lvs = get_device_lvs(devpath)
1196 if not lvs:
1197 # maybe this was a LV path /dev/vg_name/lv_name or /dev/mapper/
1198 lvs = get_lvs(filters={'path': devpath})
1199
1200 return lvs
1201
522d829b
TL
1202def get_lv_by_fullname(full_name):
1203 """
1204 returns LV by the specified LV's full name (formatted as vg_name/lv_name)
1205 """
1206 try:
1207 vg_name, lv_name = full_name.split('/')
20effc67 1208 res_lv = get_single_lv(filters={'lv_name': lv_name,
522d829b
TL
1209 'vg_name': vg_name})
1210 except ValueError:
1211 res_lv = None
1212 return res_lv
aee94f69
TL
1213
1214def get_lv_path_from_mapper(mapper):
1215 """
1216 This functions translates a given mapper device under the format:
1217 /dev/mapper/LV to the format /dev/VG/LV.
1218 eg:
1219 from:
1220 /dev/mapper/ceph--c1a97e46--234c--46aa--a549--3ca1d1f356a9-osd--block--32e8e896--172e--4a38--a06a--3702598510ec
1221 to:
1222 /dev/ceph-c1a97e46-234c-46aa-a549-3ca1d1f356a9/osd-block-32e8e896-172e-4a38-a06a-3702598510ec
1223 """
1224 results = re.split(r'^\/dev\/mapper\/(.+\w)-(\w.+)', mapper)
1225 results = list(filter(None, results))
1226
1227 if len(results) != 2:
1228 return None
1229
1230 return f"/dev/{results[0].replace('--', '-')}/{results[1].replace('--', '-')}"
1231
1232def get_mapper_from_lv_path(lv_path):
1233 """
1234 This functions translates a given lv path under the format:
1235 /dev/VG/LV to the format /dev/mapper/LV.
1236 eg:
1237 from:
1238 /dev/ceph-c1a97e46-234c-46aa-a549-3ca1d1f356a9/osd-block-32e8e896-172e-4a38-a06a-3702598510ec
1239 to:
1240 /dev/mapper/ceph--c1a97e46--234c--46aa--a549--3ca1d1f356a9-osd--block--32e8e896--172e--4a38--a06a--3702598510ec
1241 """
1242 results = re.split(r'^\/dev\/(.+\w)-(\w.+)', lv_path)
1243 results = list(filter(None, results))
1244
1245 if len(results) != 2:
1246 return None
1247
1248 return f"/dev/mapper/{results[0].replace('-', '--')}/{results[1].replace('-', '--')}"