]> git.proxmox.com Git - ceph.git/blame - ceph/src/ceph-volume/ceph_volume/api/lvm.py
import ceph 15.2.14
[ceph.git] / ceph / src / ceph-volume / ceph_volume / api / lvm.py
CommitLineData
d2e6a577
FG
1"""
2API for CRUD lvm tag operations. Follows the Ceph LVM tag naming convention
3that prefixes tags with ``ceph.`` and uses ``=`` for assignment, and provides
4set of utilities for interacting with LVM.
5"""
94b18763
FG
6import logging
7import os
1adf2230 8import uuid
e306af50 9from itertools import repeat
1adf2230
AA
10from math import floor
11from ceph_volume import process, util
f6b5b4d7 12from ceph_volume.exceptions import SizeAllocationError
d2e6a577 13
94b18763
FG
14logger = logging.getLogger(__name__)
15
d2e6a577 16
f6b5b4d7
TL
17def convert_filters_to_str(filters):
18 """
19 Convert filter args from dictionary to following format -
20 filters={filter_name=filter_val,...}
21 """
22 if not filters:
23 return filters
24
25 filter_arg = ''
26 for k, v in filters.items():
27 filter_arg += k + '=' + v + ','
28 # get rid of extra comma at the end
29 filter_arg = filter_arg[:len(filter_arg) - 1]
30
31 return filter_arg
32
33
34def convert_tags_to_str(tags):
35 """
36 Convert tags from dictionary to following format -
37 tags={tag_name=tag_val,...}
38 """
39 if not tags:
40 return tags
41
42 tag_arg = 'tags={'
43 for k, v in tags.items():
44 tag_arg += k + '=' + v + ','
45 # get rid of extra comma at the end
46 tag_arg = tag_arg[:len(tag_arg) - 1] + '}'
47
48 return tag_arg
49
50
51def make_filters_lvmcmd_ready(filters, tags):
52 """
53 Convert filters (including tags) from dictionary to following format -
54 filter_name=filter_val...,tags={tag_name=tag_val,...}
55
56 The command will look as follows =
57 lvs -S filter_name=filter_val...,tags={tag_name=tag_val,...}
58 """
59 filters = convert_filters_to_str(filters)
60 tags = convert_tags_to_str(tags)
61
62 if filters and tags:
63 return filters + ',' + tags
64 if filters and not tags:
65 return filters
66 if not filters and tags:
67 return tags
68 else:
69 return ''
70
71
b5b8bbf5
FG
72def _output_parser(output, fields):
73 """
74 Newer versions of LVM allow ``--reportformat=json``, but older versions,
75 like the one included in Xenial do not. LVM has the ability to filter and
76 format its output so we assume the output will be in a format this parser
92f5a8d4 77 can handle (using ';' as a delimiter)
b5b8bbf5
FG
78
79 :param fields: A string, possibly using ',' to group many items, as it
80 would be used on the CLI
81 :param output: The CLI output from the LVM call
82 """
83 field_items = fields.split(',')
84 report = []
85 for line in output:
86 # clear the leading/trailing whitespace
87 line = line.strip()
88
89 # remove the extra '"' in each field
90 line = line.replace('"', '')
91
92 # prevent moving forward with empty contents
93 if not line:
94 continue
95
11fdf7f2 96 # splitting on ';' because that is what the lvm call uses as
b5b8bbf5
FG
97 # '--separator'
98 output_items = [i.strip() for i in line.split(';')]
92f5a8d4 99 # map the output to the fields
b5b8bbf5
FG
100 report.append(
101 dict(zip(field_items, output_items))
102 )
103
104 return report
105
106
1adf2230
AA
107def _splitname_parser(line):
108 """
109 Parses the output from ``dmsetup splitname``, that should contain prefixes
110 (--nameprefixes) and set the separator to ";"
111
112 Output for /dev/mapper/vg-lv will usually look like::
113
114 DM_VG_NAME='/dev/mapper/vg';DM_LV_NAME='lv';DM_LV_LAYER=''
115
116
117 The ``VG_NAME`` will usually not be what other callers need (e.g. just 'vg'
118 in the example), so this utility will split ``/dev/mapper/`` out, so that
119 the actual volume group name is kept
120
121 :returns: dictionary with stripped prefixes
122 """
1adf2230 123 parsed = {}
81eedcae
TL
124 try:
125 parts = line[0].split(';')
126 except IndexError:
127 logger.exception('Unable to parse mapper device: %s', line)
128 return parsed
129
1adf2230
AA
130 for part in parts:
131 part = part.replace("'", '')
132 key, value = part.split('=')
133 if 'DM_VG_NAME' in key:
134 value = value.split('/dev/mapper/')[-1]
135 key = key.split('DM_')[-1]
136 parsed[key] = value
137
138 return parsed
139
140
141def sizing(device_size, parts=None, size=None):
142 """
143 Calculate proper sizing to fully utilize the volume group in the most
144 efficient way possible. To prevent situations where LVM might accept
145 a percentage that is beyond the vg's capabilities, it will refuse with
146 an error when requesting a larger-than-possible parameter, in addition
147 to rounding down calculations.
148
149 A dictionary with different sizing parameters is returned, to make it
150 easier for others to choose what they need in order to create logical
151 volumes::
152
153 >>> sizing(100, parts=2)
154 >>> {'parts': 2, 'percentages': 50, 'sizes': 50}
155
156 """
157 if parts is not None and size is not None:
158 raise ValueError(
159 "Cannot process sizing with both parts (%s) and size (%s)" % (parts, size)
160 )
161
162 if size and size > device_size:
163 raise SizeAllocationError(size, device_size)
164
165 def get_percentage(parts):
166 return int(floor(100 / float(parts)))
167
168 if parts is not None:
169 # Prevent parts being 0, falling back to 1 (100% usage)
170 parts = parts or 1
171 percentages = get_percentage(parts)
172
173 if size:
174 parts = int(device_size / size) or 1
175 percentages = get_percentage(parts)
176
177 sizes = device_size / parts if parts else int(floor(device_size))
178
179 return {
180 'parts': parts,
181 'percentages': percentages,
92f5a8d4 182 'sizes': int(sizes/1024/1024/1024),
1adf2230
AA
183 }
184
185
d2e6a577
FG
186def parse_tags(lv_tags):
187 """
188 Return a dictionary mapping of all the tags associated with
189 a Volume from the comma-separated tags coming from the LVM API
190
191 Input look like::
192
193 "ceph.osd_fsid=aaa-fff-bbbb,ceph.osd_id=0"
194
195 For the above example, the expected return value would be::
196
197 {
198 "ceph.osd_fsid": "aaa-fff-bbbb",
199 "ceph.osd_id": "0"
200 }
201 """
202 if not lv_tags:
203 return {}
204 tag_mapping = {}
205 tags = lv_tags.split(',')
206 for tag_assignment in tags:
b32b8144
FG
207 if not tag_assignment.startswith('ceph.'):
208 continue
d2e6a577
FG
209 key, value = tag_assignment.split('=', 1)
210 tag_mapping[key] = value
211
212 return tag_mapping
213
214
94b18763
FG
215def _vdo_parents(devices):
216 """
217 It is possible we didn't get a logical volume, or a mapper path, but
218 a device like /dev/sda2, to resolve this, we must look at all the slaves of
219 every single device in /sys/block and if any of those devices is related to
220 VDO devices, then we can add the parent
221 """
222 parent_devices = []
223 for parent in os.listdir('/sys/block'):
224 for slave in os.listdir('/sys/block/%s/slaves' % parent):
225 if slave in devices:
226 parent_devices.append('/dev/%s' % parent)
227 parent_devices.append(parent)
228 return parent_devices
229
230
231def _vdo_slaves(vdo_names):
232 """
233 find all the slaves associated with each vdo name (from realpath) by going
234 into /sys/block/<realpath>/slaves
235 """
236 devices = []
237 for vdo_name in vdo_names:
238 mapper_path = '/dev/mapper/%s' % vdo_name
239 if not os.path.exists(mapper_path):
240 continue
241 # resolve the realpath and realname of the vdo mapper
242 vdo_realpath = os.path.realpath(mapper_path)
243 vdo_realname = vdo_realpath.split('/')[-1]
244 slaves_path = '/sys/block/%s/slaves' % vdo_realname
245 if not os.path.exists(slaves_path):
246 continue
247 devices.append(vdo_realpath)
248 devices.append(mapper_path)
249 devices.append(vdo_realname)
250 for slave in os.listdir(slaves_path):
251 devices.append('/dev/%s' % slave)
252 devices.append(slave)
253 return devices
254
255
256def _is_vdo(path):
257 """
258 A VDO device can be composed from many different devices, go through each
259 one of those devices and its slaves (if any) and correlate them back to
260 /dev/mapper and their realpaths, and then check if they appear as part of
261 /sys/kvdo/<name>/statistics
262
263 From the realpath of a logical volume, determine if it is a VDO device or
264 not, by correlating it to the presence of the name in
265 /sys/kvdo/<name>/statistics and all the previously captured devices
266 """
267 if not os.path.isdir('/sys/kvdo'):
268 return False
269 realpath = os.path.realpath(path)
270 realpath_name = realpath.split('/')[-1]
271 devices = []
272 vdo_names = set()
273 # get all the vdo names
274 for dirname in os.listdir('/sys/kvdo/'):
275 if os.path.isdir('/sys/kvdo/%s/statistics' % dirname):
276 vdo_names.add(dirname)
277
278 # find all the slaves associated with each vdo name (from realpath) by
279 # going into /sys/block/<realpath>/slaves
280 devices.extend(_vdo_slaves(vdo_names))
281
282 # Find all possible parents, looking into slaves that are related to VDO
283 devices.extend(_vdo_parents(devices))
284
285 return any([
286 path in devices,
287 realpath in devices,
288 realpath_name in devices])
289
290
291def is_vdo(path):
292 """
293 Detect if a path is backed by VDO, proxying the actual call to _is_vdo so
294 that we can prevent an exception breaking OSD creation. If an exception is
295 raised, it will get captured and logged to file, while returning
296 a ``False``.
297 """
298 try:
299 if _is_vdo(path):
300 return '1'
301 return '0'
302 except Exception:
303 logger.exception('Unable to properly detect device as VDO: %s', path)
304 return '0'
305
306
1adf2230
AA
307def dmsetup_splitname(dev):
308 """
309 Run ``dmsetup splitname`` and parse the results.
310
311 .. warning:: This call does not ensure that the device is correct or that
312 it exists. ``dmsetup`` will happily take a non existing path and still
313 return a 0 exit status.
314 """
315 command = [
316 'dmsetup', 'splitname', '--noheadings',
317 "--separator=';'", '--nameprefixes', dev
318 ]
319 out, err, rc = process.call(command)
320 return _splitname_parser(out)
321
322
92f5a8d4
TL
323def is_ceph_device(lv):
324 try:
325 lv.tags['ceph.osd_id']
326 except (KeyError, AttributeError):
327 logger.warning('device is not part of ceph: %s', lv)
328 return False
329
330 if lv.tags['ceph.osd_id'] == 'null':
331 return False
332 else:
333 return True
334
335
eafe8130
TL
336####################################
337#
338# Code for LVM Physical Volumes
339#
340################################
d2e6a577 341
92f5a8d4 342PV_FIELDS = 'pv_name,pv_tags,pv_uuid,vg_name,lv_uuid'
d2e6a577 343
eafe8130 344class PVolume(object):
3efd9988 345 """
eafe8130
TL
346 Represents a Physical Volume from LVM, with some top-level attributes like
347 ``pv_name`` and parsed tags as a dictionary of key/value pairs.
3efd9988 348 """
3efd9988 349
eafe8130
TL
350 def __init__(self, **kw):
351 for k, v in kw.items():
352 setattr(self, k, v)
353 self.pv_api = kw
354 self.name = kw['pv_name']
355 self.tags = parse_tags(kw['pv_tags'])
3efd9988 356
eafe8130
TL
357 def __str__(self):
358 return '<%s>' % self.pv_api['pv_name']
d2e6a577 359
eafe8130
TL
360 def __repr__(self):
361 return self.__str__()
362
363 def set_tags(self, tags):
364 """
365 :param tags: A dictionary of tag names and values, like::
366
367 {
368 "ceph.osd_fsid": "aaa-fff-bbbb",
369 "ceph.osd_id": "0"
370 }
371
372 At the end of all modifications, the tags are refreshed to reflect
373 LVM's most current view.
374 """
375 for k, v in tags.items():
376 self.set_tag(k, v)
377 # after setting all the tags, refresh them for the current object, use the
378 # pv_* identifiers to filter because those shouldn't change
f6b5b4d7
TL
379 pv_object = self.get_first_pv(filter={'pv_name': self.pv_name,
380 'pv_uuid': self.pv_uuid})
eafe8130
TL
381 self.tags = pv_object.tags
382
383 def set_tag(self, key, value):
384 """
385 Set the key/value pair as an LVM tag. Does not "refresh" the values of
386 the current object for its tags. Meant to be a "fire and forget" type
387 of modification.
388
389 **warning**: Altering tags on a PV has to be done ensuring that the
390 device is actually the one intended. ``pv_name`` is *not* a persistent
391 value, only ``pv_uuid`` is. Using ``pv_uuid`` is the best way to make
392 sure the device getting changed is the one needed.
393 """
394 # remove it first if it exists
395 if self.tags.get(key):
396 current_value = self.tags[key]
397 tag = "%s=%s" % (key, current_value)
398 process.call(['pvchange', '--deltag', tag, self.pv_name])
399
400 process.call(
401 [
402 'pvchange',
403 '--addtag', '%s=%s' % (key, value), self.pv_name
404 ]
405 )
181888fb
FG
406
407
181888fb
FG
408def create_pv(device):
409 """
410 Create a physical volume from a device, useful when devices need to be later mapped
411 to journals.
412 """
413 process.run([
181888fb
FG
414 'pvcreate',
415 '-v', # verbose
416 '-f', # force it
417 '--yes', # answer yes to any prompts
418 device
419 ])
d2e6a577
FG
420
421
eafe8130 422def remove_pv(pv_name):
3efd9988 423 """
eafe8130
TL
424 Removes a physical volume using a double `-f` to prevent prompts and fully
425 remove anything related to LVM. This is tremendously destructive, but so is all other actions
426 when zapping a device.
3efd9988 427
eafe8130
TL
428 In the case where multiple PVs are found, it will ignore that fact and
429 continue with the removal, specifically in the case of messages like::
3efd9988 430
eafe8130 431 WARNING: PV $UUID /dev/DEV-1 was already found on /dev/DEV-2
1adf2230 432
eafe8130
TL
433 These situations can be avoided with custom filtering rules, which this API
434 cannot handle while accommodating custom user filters.
3efd9988 435 """
eafe8130
TL
436 fail_msg = "Unable to remove vg %s" % pv_name
437 process.run(
438 [
439 'pvremove',
440 '-v', # verbose
441 '-f', # force it
442 '-f', # force it
443 pv_name
444 ],
445 fail_msg=fail_msg,
446 )
3efd9988
FG
447
448
f6b5b4d7 449def get_pvs(fields=PV_FIELDS, filters='', tags=None):
1adf2230 450 """
f6b5b4d7
TL
451 Return a list of PVs that are available on the system and match the
452 filters and tags passed. Argument filters takes a dictionary containing
453 arguments required by -S option of LVM. Passing a list of LVM tags can be
454 quite tricky to pass as a dictionary within dictionary, therefore pass
455 dictionary of tags via tags argument and tricky part will be taken care of
456 by the helper methods.
457
458 :param fields: string containing list of fields to be displayed by the
459 pvs command
460 :param sep: string containing separator to be used between two fields
461 :param filters: dictionary containing LVM filters
462 :param tags: dictionary containng LVM tags
463 :returns: list of class PVolume object representing pvs on the system
eafe8130 464 """
f6b5b4d7
TL
465 filters = make_filters_lvmcmd_ready(filters, tags)
466 args = ['pvs', '--no-heading', '--readonly', '--separator=";"', '-S',
467 filters, '-o', fields]
468
469 stdout, stderr, returncode = process.call(args, verbose_on_failure=False)
470 pvs_report = _output_parser(stdout, fields)
471 return [PVolume(**pv_report) for pv_report in pvs_report]
472
1adf2230 473
f6b5b4d7
TL
474def get_first_pv(fields=PV_FIELDS, filters=None, tags=None):
475 """
476 Wrapper of get_pv meant to be a convenience method to avoid the phrase::
477 pvs = get_pvs()
478 if len(pvs) >= 1:
479 pv = pvs[0]
480 """
481 pvs = get_pvs(fields=fields, filters=filters, tags=tags)
482 return pvs[0] if len(pvs) > 0 else []
1adf2230 483
1adf2230 484
eafe8130
TL
485################################
486#
487# Code for LVM Volume Groups
488#
489#############################
1adf2230 490
92f5a8d4
TL
491VG_FIELDS = 'vg_name,pv_count,lv_count,vg_attr,vg_extent_count,vg_free_count,vg_extent_size'
492VG_CMD_OPTIONS = ['--noheadings', '--readonly', '--units=b', '--nosuffix', '--separator=";"']
493
1adf2230 494
eafe8130 495class VolumeGroup(object):
b32b8144 496 """
eafe8130 497 Represents an LVM group, with some top-level attributes like ``vg_name``
b32b8144 498 """
b32b8144 499
eafe8130
TL
500 def __init__(self, **kw):
501 for k, v in kw.items():
502 setattr(self, k, v)
503 self.name = kw['vg_name']
9f95a23c
TL
504 if not self.name:
505 raise ValueError('VolumeGroup must have a non-empty name')
eafe8130 506 self.tags = parse_tags(kw.get('vg_tags', ''))
b32b8144 507
eafe8130
TL
508 def __str__(self):
509 return '<%s>' % self.name
91327a77 510
eafe8130
TL
511 def __repr__(self):
512 return self.__str__()
91327a77 513
eafe8130
TL
514 @property
515 def free(self):
516 """
92f5a8d4 517 Return free space in VG in bytes
eafe8130 518 """
92f5a8d4 519 return int(self.vg_extent_size) * int(self.vg_free_count)
3efd9988 520
f91f0fd5
TL
521 @property
522 def free_percent(self):
523 """
524 Return free space in VG in bytes
525 """
526 return int(self.vg_free_count) / int(self.vg_extent_count)
527
eafe8130
TL
528 @property
529 def size(self):
530 """
92f5a8d4 531 Returns VG size in bytes
eafe8130 532 """
92f5a8d4 533 return int(self.vg_extent_size) * int(self.vg_extent_count)
91327a77 534
eafe8130
TL
535 def sizing(self, parts=None, size=None):
536 """
537 Calculate proper sizing to fully utilize the volume group in the most
538 efficient way possible. To prevent situations where LVM might accept
539 a percentage that is beyond the vg's capabilities, it will refuse with
540 an error when requesting a larger-than-possible parameter, in addition
541 to rounding down calculations.
3efd9988 542
eafe8130
TL
543 A dictionary with different sizing parameters is returned, to make it
544 easier for others to choose what they need in order to create logical
545 volumes::
3efd9988 546
eafe8130
TL
547 >>> data_vg.free
548 1024
549 >>> data_vg.sizing(parts=4)
550 {'parts': 4, 'sizes': 256, 'percentages': 25}
551 >>> data_vg.sizing(size=512)
552 {'parts': 2, 'sizes': 512, 'percentages': 50}
d2e6a577 553
d2e6a577 554
eafe8130
TL
555 :param parts: Number of parts to create LVs from
556 :param size: Size in gigabytes to divide the VG into
d2e6a577 557
eafe8130
TL
558 :raises SizeAllocationError: When requested size cannot be allocated with
559 :raises ValueError: If both ``parts`` and ``size`` are given
560 """
561 if parts is not None and size is not None:
562 raise ValueError(
563 "Cannot process sizing with both parts (%s) and size (%s)" % (parts, size)
564 )
1adf2230 565
eafe8130
TL
566 # if size is given we need to map that to extents so that we avoid
567 # issues when trying to get this right with a size in gigabytes find
568 # the percentage first, cheating, because these values are thrown out
569 vg_free_count = util.str_to_int(self.vg_free_count)
570
571 if size:
92f5a8d4
TL
572 size = size * 1024 * 1024 * 1024
573 extents = int(size / int(self.vg_extent_size))
eafe8130
TL
574 disk_sizing = sizing(self.free, size=size, parts=parts)
575 else:
576 if parts is not None:
577 # Prevent parts being 0, falling back to 1 (100% usage)
578 parts = parts or 1
579 size = int(self.free / parts)
580 extents = size * vg_free_count / self.free
581 disk_sizing = sizing(self.free, parts=parts)
582
583 extent_sizing = sizing(vg_free_count, size=extents)
584
585 disk_sizing['extents'] = int(extents)
586 disk_sizing['percentages'] = extent_sizing['percentages']
587 return disk_sizing
588
92f5a8d4
TL
589 def bytes_to_extents(self, size):
590 '''
cd265ab1
TL
591 Return a how many free extents we can fit into a size in bytes. This has
592 some uncertainty involved. If size/extent_size is within 1% of the
593 actual free extents we will return the extent count, otherwise we'll
594 throw an error.
595 This accomodates for the size calculation in batch. We need to report
596 the OSD layout but have not yet created any LVM structures. We use the
597 disk size in batch if no VG is present and that will overshoot the
598 actual free_extent count due to LVM overhead.
599
92f5a8d4 600 '''
cd265ab1
TL
601 b_to_ext = int(size / int(self.vg_extent_size))
602 if b_to_ext < int(self.vg_free_count):
603 # return bytes in extents if there is more space
604 return b_to_ext
605 elif b_to_ext / int(self.vg_free_count) - 1 < 0.01:
606 # return vg_fre_count if its less then 1% off
607 logger.info(
608 'bytes_to_extents results in {} but only {} '
609 'are available, adjusting the latter'.format(b_to_ext,
610 self.vg_free_count))
611 return int(self.vg_free_count)
612 # else raise an exception
613 raise RuntimeError('Can\'t convert {} to free extents, only {} ({} '
614 'bytes) are free'.format(size, self.vg_free_count,
615 self.free))
92f5a8d4
TL
616
617 def slots_to_extents(self, slots):
618 '''
619 Return how many extents fit the VG slot times
620 '''
f91f0fd5 621 return int(int(self.vg_extent_count) / slots)
92f5a8d4 622
eafe8130 623
eafe8130 624def create_vg(devices, name=None, name_prefix=None):
d2e6a577 625 """
eafe8130 626 Create a Volume Group. Command looks like::
d2e6a577 627
eafe8130 628 vgcreate --force --yes group_name device
d2e6a577 629
eafe8130 630 Once created the volume group is returned as a ``VolumeGroup`` object
d2e6a577 631
eafe8130
TL
632 :param devices: A list of devices to create a VG. Optionally, a single
633 device (as a string) can be used.
634 :param name: Optionally set the name of the VG, defaults to 'ceph-{uuid}'
635 :param name_prefix: Optionally prefix the name of the VG, which will get combined
636 with a UUID string
637 """
638 if isinstance(devices, set):
639 devices = list(devices)
640 if not isinstance(devices, list):
641 devices = [devices]
642 if name_prefix:
643 name = "%s-%s" % (name_prefix, str(uuid.uuid4()))
644 elif name is None:
645 name = "ceph-%s" % str(uuid.uuid4())
646 process.run([
647 'vgcreate',
eafe8130
TL
648 '--force',
649 '--yes',
650 name] + devices
651 )
d2e6a577 652
f6b5b4d7 653 return get_first_vg(filters={'vg_name': name})
d2e6a577 654
d2e6a577 655
eafe8130
TL
656def extend_vg(vg, devices):
657 """
658 Extend a Volume Group. Command looks like::
181888fb 659
eafe8130 660 vgextend --force --yes group_name [device, ...]
d2e6a577 661
eafe8130 662 Once created the volume group is extended and returned as a ``VolumeGroup`` object
d2e6a577 663
eafe8130
TL
664 :param vg: A VolumeGroup object
665 :param devices: A list of devices to extend the VG. Optionally, a single
666 device (as a string) can be used.
667 """
668 if not isinstance(devices, list):
669 devices = [devices]
670 process.run([
671 'vgextend',
672 '--force',
673 '--yes',
674 vg.name] + devices
675 )
d2e6a577 676
f6b5b4d7 677 return get_first_vg(filters={'vg_name': vg.name})
d2e6a577 678
d2e6a577 679
eafe8130
TL
680def reduce_vg(vg, devices):
681 """
682 Reduce a Volume Group. Command looks like::
d2e6a577 683
eafe8130 684 vgreduce --force --yes group_name [device, ...]
d2e6a577 685
eafe8130
TL
686 :param vg: A VolumeGroup object
687 :param devices: A list of devices to remove from the VG. Optionally, a
688 single device (as a string) can be used.
689 """
690 if not isinstance(devices, list):
691 devices = [devices]
692 process.run([
693 'vgreduce',
694 '--force',
695 '--yes',
696 vg.name] + devices
697 )
d2e6a577 698
f6b5b4d7 699 return get_first_vg(filter={'vg_name': vg.name})
d2e6a577
FG
700
701
eafe8130 702def remove_vg(vg_name):
181888fb 703 """
eafe8130 704 Removes a volume group.
181888fb 705 """
eafe8130
TL
706 if not vg_name:
707 logger.warning('Skipping removal of invalid VG name: "%s"', vg_name)
708 return
709 fail_msg = "Unable to remove vg %s" % vg_name
710 process.run(
711 [
712 'vgremove',
713 '-v', # verbose
714 '-f', # force it
715 vg_name
716 ],
717 fail_msg=fail_msg,
718 )
181888fb
FG
719
720
f6b5b4d7 721def get_vgs(fields=VG_FIELDS, filters='', tags=None):
d2e6a577 722 """
f6b5b4d7
TL
723 Return a list of VGs that are available on the system and match the
724 filters and tags passed. Argument filters takes a dictionary containing
725 arguments required by -S option of LVM. Passing a list of LVM tags can be
726 quite tricky to pass as a dictionary within dictionary, therefore pass
727 dictionary of tags via tags argument and tricky part will be taken care of
728 by the helper methods.
1adf2230 729
f6b5b4d7
TL
730 :param fields: string containing list of fields to be displayed by the
731 vgs command
732 :param sep: string containing separator to be used between two fields
733 :param filters: dictionary containing LVM filters
734 :param tags: dictionary containng LVM tags
735 :returns: list of class VolumeGroup object representing vgs on the system
eafe8130 736 """
f6b5b4d7
TL
737 filters = make_filters_lvmcmd_ready(filters, tags)
738 args = ['vgs'] + VG_CMD_OPTIONS + ['-S', filters, '-o', fields]
1adf2230 739
f6b5b4d7
TL
740 stdout, stderr, returncode = process.call(args, verbose_on_failure=False)
741 vgs_report =_output_parser(stdout, fields)
742 return [VolumeGroup(**vg_report) for vg_report in vgs_report]
743
744
745def get_first_vg(fields=VG_FIELDS, filters=None, tags=None):
746 """
747 Wrapper of get_vg meant to be a convenience method to avoid the phrase::
748 vgs = get_vgs()
749 if len(vgs) >= 1:
750 vg = vgs[0]
751 """
752 vgs = get_vgs(fields=fields, filters=filters, tags=tags)
753 return vgs[0] if len(vgs) > 0 else []
1adf2230
AA
754
755
92f5a8d4
TL
756def get_device_vgs(device, name_prefix=''):
757 stdout, stderr, returncode = process.call(
758 ['pvs'] + VG_CMD_OPTIONS + ['-o', VG_FIELDS, device],
759 verbose_on_failure=False
760 )
761 vgs = _output_parser(stdout, VG_FIELDS)
9f95a23c 762 return [VolumeGroup(**vg) for vg in vgs if vg['vg_name'] and vg['vg_name'].startswith(name_prefix)]
92f5a8d4
TL
763
764
eafe8130
TL
765#################################
766#
767# Code for LVM Logical Volumes
768#
769###############################
1adf2230 770
92f5a8d4 771LV_FIELDS = 'lv_tags,lv_path,lv_name,vg_name,lv_uuid,lv_size'
f91f0fd5
TL
772LV_CMD_OPTIONS = ['--noheadings', '--readonly', '--separator=";"', '-a',
773 '--units=b', '--nosuffix']
1adf2230 774
d2e6a577
FG
775
776class Volume(object):
777 """
778 Represents a Logical Volume from LVM, with some top-level attributes like
779 ``lv_name`` and parsed tags as a dictionary of key/value pairs.
780 """
781
782 def __init__(self, **kw):
783 for k, v in kw.items():
784 setattr(self, k, v)
785 self.lv_api = kw
786 self.name = kw['lv_name']
9f95a23c
TL
787 if not self.name:
788 raise ValueError('Volume must have a non-empty name')
d2e6a577 789 self.tags = parse_tags(kw['lv_tags'])
3a9019d9 790 self.encrypted = self.tags.get('ceph.encrypted', '0') == '1'
91327a77 791 self.used_by_ceph = 'ceph.osd_id' in self.tags
d2e6a577
FG
792
793 def __str__(self):
794 return '<%s>' % self.lv_api['lv_path']
795
796 def __repr__(self):
797 return self.__str__()
798
3efd9988
FG
799 def as_dict(self):
800 obj = {}
801 obj.update(self.lv_api)
802 obj['tags'] = self.tags
803 obj['name'] = self.name
804 obj['type'] = self.tags['ceph.type']
805 obj['path'] = self.lv_path
806 return obj
807
91327a77
AA
808 def report(self):
809 if not self.used_by_ceph:
810 return {
811 'name': self.lv_name,
812 'comment': 'not used by ceph'
813 }
814 else:
815 type_ = self.tags['ceph.type']
816 report = {
817 'name': self.lv_name,
818 'osd_id': self.tags['ceph.osd_id'],
819 'cluster_name': self.tags['ceph.cluster_name'],
820 'type': type_,
821 'osd_fsid': self.tags['ceph.osd_fsid'],
822 'cluster_fsid': self.tags['ceph.cluster_fsid'],
e306af50 823 'osdspec_affinity': self.tags.get('ceph.osdspec_affinity', ''),
91327a77
AA
824 }
825 type_uuid = '{}_uuid'.format(type_)
826 report[type_uuid] = self.tags['ceph.{}'.format(type_uuid)]
827 return report
828
e306af50
TL
829 def _format_tag_args(self, op, tags):
830 tag_args = ['{}={}'.format(k, v) for k, v in tags.items()]
831 # weird but efficient way of ziping two lists and getting a flat list
832 return list(sum(zip(repeat(op), tag_args), ()))
833
834 def clear_tags(self, keys=None):
3efd9988 835 """
e306af50 836 Removes all or passed tags from the Logical Volume.
3efd9988 837 """
e306af50
TL
838 if not keys:
839 keys = self.tags.keys()
840
841 del_tags = {k: self.tags[k] for k in keys if k in self.tags}
842 if not del_tags:
843 # nothing to clear
844 return
845 del_tag_args = self._format_tag_args('--deltag', del_tags)
846 # --deltag returns successful even if the to be deleted tag is not set
847 process.call(['lvchange'] + del_tag_args + [self.lv_path])
848 for k in del_tags.keys():
849 del self.tags[k]
81eedcae 850
3efd9988 851
d2e6a577
FG
852 def set_tags(self, tags):
853 """
854 :param tags: A dictionary of tag names and values, like::
855
856 {
857 "ceph.osd_fsid": "aaa-fff-bbbb",
858 "ceph.osd_id": "0"
859 }
860
861 At the end of all modifications, the tags are refreshed to reflect
862 LVM's most current view.
863 """
e306af50
TL
864 self.clear_tags(tags.keys())
865 add_tag_args = self._format_tag_args('--addtag', tags)
866 process.call(['lvchange'] + add_tag_args + [self.lv_path])
d2e6a577 867 for k, v in tags.items():
e306af50 868 self.tags[k] = v
81eedcae
TL
869
870
871 def clear_tag(self, key):
872 if self.tags.get(key):
873 current_value = self.tags[key]
874 tag = "%s=%s" % (key, current_value)
875 process.call(['lvchange', '--deltag', tag, self.lv_path])
876 del self.tags[key]
877
d2e6a577
FG
878
879 def set_tag(self, key, value):
880 """
81eedcae 881 Set the key/value pair as an LVM tag.
d2e6a577
FG
882 """
883 # remove it first if it exists
81eedcae 884 self.clear_tag(key)
d2e6a577
FG
885
886 process.call(
887 [
b32b8144 888 'lvchange',
d2e6a577
FG
889 '--addtag', '%s=%s' % (key, value), self.lv_path
890 ]
891 )
81eedcae 892 self.tags[key] = value
181888fb 893
92f5a8d4
TL
894 def deactivate(self):
895 """
896 Deactivate the LV by calling lvchange -an
897 """
898 process.call(['lvchange', '-an', self.lv_path])
899
181888fb 900
92f5a8d4
TL
901def create_lv(name_prefix,
902 uuid,
903 vg=None,
904 device=None,
905 slots=None,
906 extents=None,
907 size=None,
908 tags=None):
eafe8130
TL
909 """
910 Create a Logical Volume in a Volume Group. Command looks like::
911
912 lvcreate -L 50G -n gfslv vg0
913
92f5a8d4
TL
914 ``name_prefix`` is required. If ``size`` is provided its expected to be a
915 byte count. Tags are an optional dictionary and is expected to
eafe8130
TL
916 conform to the convention of prefixing them with "ceph." like::
917
918 {"ceph.block_device": "/dev/ceph/osd-1"}
919
92f5a8d4
TL
920 :param name_prefix: name prefix for the LV, typically somehting like ceph-osd-block
921 :param uuid: UUID to ensure uniqueness; is combined with name_prefix to
922 form the LV name
923 :param vg: optional, pass an existing VG to create LV
924 :param device: optional, device to use. Either device of vg must be passed
925 :param slots: optional, number of slots to divide vg up, LV will occupy one
926 one slot if enough space is available
927 :param extends: optional, how many lvm extends to use, supersedes slots
928 :param size: optional, target LV size in bytes, supersedes extents,
929 resulting LV might be smaller depending on extent
930 size of the underlying VG
931 :param tags: optional, a dict of lvm tags to set on the LV
932 """
933 name = '{}-{}'.format(name_prefix, uuid)
934 if not vg:
935 if not device:
936 raise RuntimeError("Must either specify vg or device, none given")
937 # check if a vgs starting with ceph already exists
938 vgs = get_device_vgs(device, 'ceph')
939 if vgs:
940 vg = vgs[0]
941 else:
942 # create on if not
943 vg = create_vg(device, name_prefix='ceph')
944 assert(vg)
eafe8130 945
eafe8130 946 if size:
92f5a8d4
TL
947 extents = vg.bytes_to_extents(size)
948 logger.debug('size was passed: {} -> {}'.format(size, extents))
949 elif slots and not extents:
950 extents = vg.slots_to_extents(slots)
951 logger.debug('slots was passed: {} -> {}'.format(slots, extents))
952
953 if extents:
eafe8130
TL
954 command = [
955 'lvcreate',
956 '--yes',
957 '-l',
92f5a8d4
TL
958 '{}'.format(extents),
959 '-n', name, vg.vg_name
eafe8130
TL
960 ]
961 # create the lv with all the space available, this is needed because the
962 # system call is different for LVM
963 else:
964 command = [
965 'lvcreate',
966 '--yes',
967 '-l',
968 '100%FREE',
92f5a8d4 969 '-n', name, vg.vg_name
eafe8130 970 ]
eafe8130
TL
971 process.run(command)
972
f6b5b4d7 973 lv = get_first_lv(filters={'lv_name': name, 'vg_name': vg.vg_name})
eafe8130 974
92f5a8d4
TL
975 if tags is None:
976 tags = {
977 "ceph.osd_id": "null",
978 "ceph.type": "null",
979 "ceph.cluster_fsid": "null",
980 "ceph.osd_fsid": "null",
981 }
eafe8130
TL
982 # when creating a distinct type, the caller doesn't know what the path will
983 # be so this function will set it after creation using the mapping
92f5a8d4
TL
984 # XXX add CEPH_VOLUME_LVM_DEBUG to enable -vvvv on lv operations
985 type_path_tag = {
986 'journal': 'ceph.journal_device',
987 'data': 'ceph.data_device',
988 'block': 'ceph.block_device',
989 'wal': 'ceph.wal_device',
990 'db': 'ceph.db_device',
991 'lockbox': 'ceph.lockbox_device', # XXX might not ever need this lockbox sorcery
992 }
eafe8130
TL
993 path_tag = type_path_tag.get(tags.get('ceph.type'))
994 if path_tag:
92f5a8d4
TL
995 tags.update({path_tag: lv.lv_path})
996
997 lv.set_tags(tags)
998
eafe8130
TL
999 return lv
1000
1001
eafe8130
TL
1002def create_lvs(volume_group, parts=None, size=None, name_prefix='ceph-lv'):
1003 """
1004 Create multiple Logical Volumes from a Volume Group by calculating the
1005 proper extents from ``parts`` or ``size``. A custom prefix can be used
1006 (defaults to ``ceph-lv``), these names are always suffixed with a uuid.
1007
1008 LV creation in ceph-volume will require tags, this is expected to be
1009 pre-computed by callers who know Ceph metadata like OSD IDs and FSIDs. It
1010 will probably not be the case when mass-creating LVs, so common/default
1011 tags will be set to ``"null"``.
1012
1013 .. note:: LVs that are not in use can be detected by querying LVM for tags that are
1014 set to ``"null"``.
1015
1016 :param volume_group: The volume group (vg) to use for LV creation
1017 :type group: ``VolumeGroup()`` object
1018 :param parts: Number of LVs to create *instead of* ``size``.
1019 :type parts: int
1020 :param size: Size (in gigabytes) of LVs to create, e.g. "as many 10gb LVs as possible"
1021 :type size: int
1022 :param extents: The number of LVM extents to use to create the LV. Useful if looking to have
1023 accurate LV sizes (LVM rounds sizes otherwise)
1024 """
1025 if parts is None and size is None:
1026 # fallback to just one part (using 100% of the vg)
1027 parts = 1
1028 lvs = []
1029 tags = {
1030 "ceph.osd_id": "null",
1031 "ceph.type": "null",
1032 "ceph.cluster_fsid": "null",
1033 "ceph.osd_fsid": "null",
1034 }
1035 sizing = volume_group.sizing(parts=parts, size=size)
1036 for part in range(0, sizing['parts']):
1037 size = sizing['sizes']
1038 extents = sizing['extents']
eafe8130 1039 lvs.append(
92f5a8d4 1040 create_lv(name_prefix, uuid.uuid4(), vg=volume_group, extents=extents, tags=tags)
eafe8130
TL
1041 )
1042 return lvs
92f5a8d4
TL
1043
1044
f6b5b4d7 1045def remove_lv(lv):
92f5a8d4 1046 """
f6b5b4d7 1047 Removes a logical volume given it's absolute path.
92f5a8d4 1048
f6b5b4d7
TL
1049 Will return True if the lv is successfully removed or
1050 raises a RuntimeError if the removal fails.
92f5a8d4 1051
f6b5b4d7 1052 :param lv: A ``Volume`` object or the path for an LV
92f5a8d4 1053 """
f6b5b4d7
TL
1054 if isinstance(lv, Volume):
1055 path = lv.lv_path
92f5a8d4 1056 else:
f6b5b4d7 1057 path = lv
92f5a8d4 1058
f6b5b4d7
TL
1059 stdout, stderr, returncode = process.call(
1060 [
1061 'lvremove',
1062 '-v', # verbose
1063 '-f', # force it
1064 path
1065 ],
1066 show_command=True,
1067 terminal_verbose=True,
1068 )
1069 if returncode != 0:
1070 raise RuntimeError("Unable to remove %s" % path)
1071 return True
92f5a8d4 1072
92f5a8d4
TL
1073
1074def get_lvs(fields=LV_FIELDS, filters='', tags=None):
1075 """
1076 Return a list of LVs that are available on the system and match the
1077 filters and tags passed. Argument filters takes a dictionary containing
1078 arguments required by -S option of LVM. Passing a list of LVM tags can be
1079 quite tricky to pass as a dictionary within dictionary, therefore pass
1080 dictionary of tags via tags argument and tricky part will be taken care of
1081 by the helper methods.
1082
1083 :param fields: string containing list of fields to be displayed by the
1084 lvs command
1085 :param sep: string containing separator to be used between two fields
1086 :param filters: dictionary containing LVM filters
1087 :param tags: dictionary containng LVM tags
1088 :returns: list of class Volume object representing LVs on the system
1089 """
1090 filters = make_filters_lvmcmd_ready(filters, tags)
1091 args = ['lvs'] + LV_CMD_OPTIONS + ['-S', filters, '-o', fields]
1092
1093 stdout, stderr, returncode = process.call(args, verbose_on_failure=False)
1094 lvs_report = _output_parser(stdout, fields)
1095 return [Volume(**lv_report) for lv_report in lvs_report]
1096
f6b5b4d7 1097
92f5a8d4
TL
1098def get_first_lv(fields=LV_FIELDS, filters=None, tags=None):
1099 """
1100 Wrapper of get_lv meant to be a convenience method to avoid the phrase::
1101 lvs = get_lvs()
1102 if len(lvs) >= 1:
1103 lv = lvs[0]
1104 """
1105 lvs = get_lvs(fields=fields, filters=filters, tags=tags)
1106 return lvs[0] if len(lvs) > 0 else []
f6b5b4d7
TL
1107
1108
1109def get_lv_by_name(name):
1110 stdout, stderr, returncode = process.call(
1111 ['lvs', '--noheadings', '-o', LV_FIELDS, '-S',
1112 'lv_name={}'.format(name)],
1113 verbose_on_failure=False
1114 )
1115 lvs = _output_parser(stdout, LV_FIELDS)
1116 return [Volume(**lv) for lv in lvs]
1117
1118
1119def get_lvs_by_tag(lv_tag):
1120 stdout, stderr, returncode = process.call(
1121 ['lvs', '--noheadings', '--separator=";"', '-a', '-o', LV_FIELDS, '-S',
1122 'lv_tags={{{}}}'.format(lv_tag)],
1123 verbose_on_failure=False
1124 )
1125 lvs = _output_parser(stdout, LV_FIELDS)
1126 return [Volume(**lv) for lv in lvs]
1127
1128
1129def get_device_lvs(device, name_prefix=''):
1130 stdout, stderr, returncode = process.call(
1131 ['pvs'] + LV_CMD_OPTIONS + ['-o', LV_FIELDS, device],
1132 verbose_on_failure=False
1133 )
1134 lvs = _output_parser(stdout, LV_FIELDS)
1135 return [Volume(**lv) for lv in lvs if lv['lv_name'] and
1136 lv['lv_name'].startswith(name_prefix)]
6d8e3169
FG
1137
1138def get_lv_by_fullname(full_name):
1139 """
1140 returns LV by the specified LV's full name (formatted as vg_name/lv_name)
1141 """
1142 try:
1143 vg_name, lv_name = full_name.split('/')
1144 res_lv = get_first_lv(filters={'lv_name': lv_name,
1145 'vg_name': vg_name})
1146 except ValueError:
1147 res_lv = None
1148 return res_lv