]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | #!/usr/bin/env python |
2 | # | |
c07f9fc5 | 3 | # Copyright (C) 2015, 2016, 2017 Red Hat <contact@redhat.com> |
7c673cae FG |
4 | # Copyright (C) 2014 Inktank <info@inktank.com> |
5 | # Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> | |
6 | # Copyright (C) 2014 Catalyst.net Ltd | |
7 | # | |
8 | # Author: Loic Dachary <loic@dachary.org> | |
9 | # | |
10 | # This program is free software; you can redistribute it and/or modify | |
11 | # it under the terms of the GNU Library Public License as published by | |
12 | # the Free Software Foundation; either version 2, or (at your option) | |
13 | # any later version. | |
14 | # | |
15 | # This program is distributed in the hope that it will be useful, | |
16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | # GNU Library Public License for more details. | |
19 | # | |
20 | ||
21 | from __future__ import print_function | |
22 | ||
23 | import argparse | |
24 | import base64 | |
25 | import errno | |
26 | import fcntl | |
3efd9988 | 27 | import functools |
7c673cae FG |
28 | import json |
29 | import logging | |
30 | import os | |
31 | import platform | |
32 | import re | |
33 | import subprocess | |
34 | import stat | |
35 | import sys | |
36 | import tempfile | |
37 | import uuid | |
38 | import time | |
39 | import shlex | |
c07f9fc5 | 40 | import shutil |
7c673cae FG |
41 | import pwd |
42 | import grp | |
43 | import textwrap | |
44 | import glob | |
45 | ||
46 | CEPH_OSD_ONDISK_MAGIC = 'ceph osd volume v026' | |
47 | CEPH_LOCKBOX_ONDISK_MAGIC = 'ceph lockbox volume v001' | |
48 | ||
49 | KEY_MANAGEMENT_MODE_V1 = 'ceph-mon v1' | |
50 | ||
51 | PTYPE = { | |
52 | 'regular': { | |
53 | 'journal': { | |
54 | # identical because creating a journal is atomic | |
55 | 'ready': '45b0969e-9b03-4f30-b4c6-b4b80ceff106', | |
56 | 'tobe': '45b0969e-9b03-4f30-b4c6-b4b80ceff106', | |
57 | }, | |
58 | 'block': { | |
59 | # identical because creating a block is atomic | |
60 | 'ready': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106', | |
61 | 'tobe': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106', | |
62 | }, | |
63 | 'block.db': { | |
64 | # identical because creating a block is atomic | |
65 | 'ready': '30cd0809-c2b2-499c-8879-2d6b78529876', | |
66 | 'tobe': '30cd0809-c2b2-499c-8879-2d6b785292be', | |
67 | }, | |
68 | 'block.wal': { | |
69 | # identical because creating a block is atomic | |
70 | 'ready': '5ce17fce-4087-4169-b7ff-056cc58473f9', | |
71 | 'tobe': '5ce17fce-4087-4169-b7ff-056cc58472be', | |
72 | }, | |
73 | 'osd': { | |
74 | 'ready': '4fbd7e29-9d25-41b8-afd0-062c0ceff05d', | |
75 | 'tobe': '89c57f98-2fe5-4dc0-89c1-f3ad0ceff2be', | |
76 | }, | |
77 | 'lockbox': { | |
78 | 'ready': 'fb3aabf9-d25f-47cc-bf5e-721d1816496b', | |
79 | 'tobe': 'fb3aabf9-d25f-47cc-bf5e-721d181642be', | |
80 | }, | |
81 | }, | |
82 | 'luks': { | |
83 | 'journal': { | |
84 | 'ready': '45b0969e-9b03-4f30-b4c6-35865ceff106', | |
85 | 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be', | |
86 | }, | |
87 | 'block': { | |
88 | 'ready': 'cafecafe-9b03-4f30-b4c6-35865ceff106', | |
89 | 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be', | |
90 | }, | |
91 | 'block.db': { | |
92 | 'ready': '166418da-c469-4022-adf4-b30afd37f176', | |
93 | 'tobe': '7521c784-4626-4260-bc8d-ba77a0f5f2be', | |
94 | }, | |
95 | 'block.wal': { | |
96 | 'ready': '86a32090-3647-40b9-bbbd-38d8c573aa86', | |
97 | 'tobe': '92dad30f-175b-4d40-a5b0-5c0a258b42be', | |
98 | }, | |
99 | 'osd': { | |
100 | 'ready': '4fbd7e29-9d25-41b8-afd0-35865ceff05d', | |
101 | 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be', | |
102 | }, | |
103 | }, | |
104 | 'plain': { | |
105 | 'journal': { | |
106 | 'ready': '45b0969e-9b03-4f30-b4c6-5ec00ceff106', | |
107 | 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be', | |
108 | }, | |
109 | 'block': { | |
110 | 'ready': 'cafecafe-9b03-4f30-b4c6-5ec00ceff106', | |
111 | 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be', | |
112 | }, | |
113 | 'block.db': { | |
114 | 'ready': '93b0052d-02d9-4d8a-a43b-33a3ee4dfbc3', | |
115 | 'tobe': '69d17c68-3e58-4399-aff0-b68265f2e2be', | |
116 | }, | |
117 | 'block.wal': { | |
118 | 'ready': '306e8683-4fe2-4330-b7c0-00a917c16966', | |
119 | 'tobe': 'f2d89683-a621-4063-964a-eb1f7863a2be', | |
120 | }, | |
121 | 'osd': { | |
122 | 'ready': '4fbd7e29-9d25-41b8-afd0-5ec00ceff05d', | |
123 | 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be', | |
124 | }, | |
125 | }, | |
126 | 'mpath': { | |
127 | 'journal': { | |
128 | 'ready': '45b0969e-8ae0-4982-bf9d-5a8d867af560', | |
129 | 'tobe': '45b0969e-8ae0-4982-bf9d-5a8d867af560', | |
130 | }, | |
131 | 'block': { | |
132 | 'ready': 'cafecafe-8ae0-4982-bf9d-5a8d867af560', | |
133 | 'tobe': 'cafecafe-8ae0-4982-bf9d-5a8d867af560', | |
134 | }, | |
135 | 'block.db': { | |
136 | 'ready': 'ec6d6385-e346-45dc-be91-da2a7c8b3261', | |
137 | 'tobe': 'ec6d6385-e346-45dc-be91-da2a7c8b32be', | |
138 | }, | |
139 | 'block.wal': { | |
140 | 'ready': '01b41e1b-002a-453c-9f17-88793989ff8f', | |
141 | 'tobe': '01b41e1b-002a-453c-9f17-88793989f2be', | |
142 | }, | |
143 | 'osd': { | |
144 | 'ready': '4fbd7e29-8ae0-4982-bf9d-5a8d867af560', | |
145 | 'tobe': '89c57f98-8ae0-4982-bf9d-5a8d867af560', | |
146 | }, | |
147 | 'lockbox': { | |
148 | 'ready': '7f4a666a-16f3-47a2-8445-152ef4d03f6c', | |
149 | 'tobe': '7f4a666a-16f3-47a2-8445-152ef4d032be', | |
150 | }, | |
151 | }, | |
152 | } | |
153 | ||
b32b8144 FG |
154 | try: |
155 | # see https://bugs.python.org/issue23098 | |
156 | os.major(0x80002b00) | |
157 | except OverflowError: | |
158 | os.major = lambda devid: ((devid >> 8) & 0xfff) | ((devid >> 32) & ~0xfff) | |
159 | os.minor = lambda devid: (devid & 0xff) | ((devid >> 12) & ~0xff) | |
160 | ||
7c673cae FG |
161 | |
162 | class Ptype(object): | |
163 | ||
164 | @staticmethod | |
165 | def get_ready_by_type(what): | |
166 | return [x['ready'] for x in PTYPE[what].values()] | |
167 | ||
168 | @staticmethod | |
169 | def get_ready_by_name(name): | |
170 | return [x[name]['ready'] for x in PTYPE.values() if name in x] | |
171 | ||
172 | @staticmethod | |
173 | def is_regular_space(ptype): | |
174 | return Ptype.is_what_space('regular', ptype) | |
175 | ||
176 | @staticmethod | |
177 | def is_mpath_space(ptype): | |
178 | return Ptype.is_what_space('mpath', ptype) | |
179 | ||
180 | @staticmethod | |
181 | def is_plain_space(ptype): | |
182 | return Ptype.is_what_space('plain', ptype) | |
183 | ||
184 | @staticmethod | |
185 | def is_luks_space(ptype): | |
186 | return Ptype.is_what_space('luks', ptype) | |
187 | ||
188 | @staticmethod | |
189 | def is_what_space(what, ptype): | |
190 | for name in Space.NAMES: | |
191 | if ptype == PTYPE[what][name]['ready']: | |
192 | return True | |
193 | return False | |
194 | ||
195 | @staticmethod | |
196 | def space_ptype_to_name(ptype): | |
197 | for what in PTYPE.values(): | |
198 | for name in Space.NAMES: | |
199 | if ptype == what[name]['ready']: | |
200 | return name | |
201 | raise ValueError('ptype ' + ptype + ' not found') | |
202 | ||
203 | @staticmethod | |
204 | def is_dmcrypt_space(ptype): | |
205 | for name in Space.NAMES: | |
206 | if Ptype.is_dmcrypt(ptype, name): | |
207 | return True | |
208 | return False | |
209 | ||
210 | @staticmethod | |
211 | def is_dmcrypt(ptype, name): | |
212 | for what in ('plain', 'luks'): | |
213 | if ptype == PTYPE[what][name]['ready']: | |
214 | return True | |
215 | return False | |
216 | ||
217 | ||
218 | SYSFS = '/sys' | |
219 | ||
220 | if platform.system() == 'FreeBSD': | |
221 | FREEBSD = True | |
222 | DEFAULT_FS_TYPE = 'zfs' | |
223 | PROCDIR = '/compat/linux/proc' | |
224 | # FreeBSD does not have blockdevices any more | |
225 | BLOCKDIR = '/dev' | |
c07f9fc5 | 226 | ROOTGROUP = 'wheel' |
7c673cae FG |
227 | else: |
228 | FREEBSD = False | |
229 | DEFAULT_FS_TYPE = 'xfs' | |
230 | PROCDIR = '/proc' | |
231 | BLOCKDIR = '/sys/block' | |
c07f9fc5 | 232 | ROOTGROUP = 'root' |
7c673cae FG |
233 | |
234 | """ | |
235 | OSD STATUS Definition | |
236 | """ | |
237 | OSD_STATUS_OUT_DOWN = 0 | |
238 | OSD_STATUS_OUT_UP = 1 | |
239 | OSD_STATUS_IN_DOWN = 2 | |
240 | OSD_STATUS_IN_UP = 3 | |
241 | ||
242 | MOUNT_OPTIONS = dict( | |
243 | btrfs='noatime,user_subvol_rm_allowed', | |
244 | # user_xattr is default ever since linux 2.6.39 / 3.0, but we'll | |
245 | # delay a moment before removing it fully because we did have some | |
246 | # issues with ext4 before the xatts-in-leveldb work, and it seemed | |
247 | # that user_xattr helped | |
248 | ext4='noatime,user_xattr', | |
249 | xfs='noatime,inode64', | |
250 | ) | |
251 | ||
252 | MKFS_ARGS = dict( | |
253 | btrfs=[ | |
254 | # btrfs requires -f, for the same reason as xfs (see comment below) | |
255 | '-f', | |
256 | '-m', 'single', | |
257 | '-l', '32768', | |
258 | '-n', '32768', | |
259 | ], | |
260 | xfs=[ | |
261 | # xfs insists on not overwriting previous fs; even if we wipe | |
262 | # partition table, we often recreate it exactly the same way, | |
263 | # so we'll see ghosts of filesystems past | |
264 | '-f', | |
265 | '-i', 'size=2048', | |
266 | ], | |
267 | zfs=[ | |
268 | '-o', 'atime=off' | |
269 | ], | |
270 | ) | |
271 | ||
272 | INIT_SYSTEMS = [ | |
273 | 'upstart', | |
274 | 'sysvinit', | |
275 | 'systemd', | |
276 | 'openrc', | |
277 | 'bsdrc', | |
278 | 'auto', | |
279 | 'none', | |
280 | ] | |
281 | ||
282 | STATEDIR = '/var/lib/ceph' | |
283 | ||
284 | SYSCONFDIR = '/etc/ceph' | |
285 | ||
286 | prepare_lock = None | |
287 | activate_lock = None | |
288 | SUPPRESS_PREFIX = None | |
289 | ||
290 | # only warn once about some things | |
291 | warned_about = {} | |
292 | ||
293 | # Nuke the TERM variable to avoid confusing any subprocesses we call. | |
294 | # For example, libreadline will print weird control sequences for some | |
295 | # TERM values. | |
296 | if 'TERM' in os.environ: | |
297 | del os.environ['TERM'] | |
298 | ||
299 | LOG_NAME = __name__ | |
300 | if LOG_NAME == '__main__': | |
301 | LOG_NAME = os.path.basename(sys.argv[0]) | |
302 | LOG = logging.getLogger(LOG_NAME) | |
303 | ||
304 | # Allow user-preferred values for subprocess user and group | |
305 | CEPH_PREF_USER = None | |
306 | CEPH_PREF_GROUP = None | |
307 | ||
308 | ||
309 | class FileLock(object): | |
310 | def __init__(self, fn): | |
311 | self.fn = fn | |
312 | self.fd = None | |
313 | ||
314 | def __enter__(self): | |
315 | assert not self.fd | |
316 | self.fd = os.open(self.fn, os.O_WRONLY | os.O_CREAT) | |
317 | fcntl.lockf(self.fd, fcntl.LOCK_EX) | |
318 | ||
319 | def __exit__(self, exc_type, exc_val, exc_tb): | |
320 | assert self.fd | |
321 | fcntl.lockf(self.fd, fcntl.LOCK_UN) | |
322 | os.close(self.fd) | |
323 | self.fd = None | |
324 | ||
325 | ||
326 | class Error(Exception): | |
327 | """ | |
328 | Error | |
329 | """ | |
330 | ||
331 | def __str__(self): | |
332 | doc = _bytes2str(self.__doc__.strip()) | |
333 | try: | |
334 | str_type = basestring | |
335 | except NameError: | |
336 | str_type = str | |
337 | args = [a if isinstance(a, str_type) else str(a) for a in self.args] | |
338 | return ': '.join([doc] + [_bytes2str(a) for a in args]) | |
339 | ||
340 | ||
341 | class MountError(Error): | |
342 | """ | |
343 | Mounting filesystem failed | |
344 | """ | |
345 | ||
346 | ||
347 | class UnmountError(Error): | |
348 | """ | |
349 | Unmounting filesystem failed | |
350 | """ | |
351 | ||
352 | ||
353 | class BadMagicError(Error): | |
354 | """ | |
355 | Does not look like a Ceph OSD, or incompatible version | |
356 | """ | |
357 | ||
358 | ||
359 | class TruncatedLineError(Error): | |
360 | """ | |
361 | Line is truncated | |
362 | """ | |
363 | ||
364 | ||
365 | class TooManyLinesError(Error): | |
366 | """ | |
367 | Too many lines | |
368 | """ | |
369 | ||
370 | ||
371 | class FilesystemTypeError(Error): | |
372 | """ | |
373 | Cannot discover filesystem type | |
374 | """ | |
375 | ||
376 | ||
377 | class CephDiskException(Exception): | |
378 | """ | |
379 | A base exception for ceph-disk to provide custom (ad-hoc) messages that | |
380 | will be caught and dealt with when main() is executed | |
381 | """ | |
382 | pass | |
383 | ||
384 | ||
385 | class ExecutableNotFound(CephDiskException): | |
386 | """ | |
387 | Exception to report on executables not available in PATH | |
388 | """ | |
389 | pass | |
390 | ||
391 | ||
392 | def is_systemd(): | |
393 | """ | |
394 | Detect whether systemd is running | |
395 | """ | |
396 | with open(PROCDIR + '/1/comm', 'r') as f: | |
397 | return 'systemd' in f.read() | |
398 | ||
399 | ||
400 | def is_upstart(): | |
401 | """ | |
402 | Detect whether upstart is running | |
403 | """ | |
404 | (out, err, _) = command(['init', '--version']) | |
405 | return 'upstart' in out | |
406 | ||
407 | ||
408 | def maybe_mkdir(*a, **kw): | |
409 | """ | |
410 | Creates a new directory if it doesn't exist, removes | |
411 | existing symlink before creating the directory. | |
412 | """ | |
413 | # remove any symlink, if it is there.. | |
414 | if os.path.exists(*a) and stat.S_ISLNK(os.lstat(*a).st_mode): | |
415 | LOG.debug('Removing old symlink at %s', *a) | |
416 | os.unlink(*a) | |
417 | try: | |
418 | os.mkdir(*a, **kw) | |
419 | except OSError as e: | |
420 | if e.errno == errno.EEXIST: | |
421 | pass | |
422 | else: | |
423 | raise | |
424 | ||
425 | ||
426 | def which(executable): | |
427 | """find the location of an executable""" | |
428 | envpath = os.environ.get('PATH') or os.defpath | |
429 | PATH = envpath.split(os.pathsep) | |
430 | ||
431 | locations = PATH + [ | |
432 | '/usr/local/bin', | |
433 | '/bin', | |
434 | '/usr/bin', | |
435 | '/usr/local/sbin', | |
436 | '/usr/sbin', | |
437 | '/sbin', | |
438 | ] | |
439 | ||
440 | for location in locations: | |
441 | executable_path = os.path.join(location, executable) | |
442 | if (os.path.isfile(executable_path) and | |
443 | os.access(executable_path, os.X_OK)): | |
444 | return executable_path | |
445 | ||
446 | ||
447 | def _get_command_executable(arguments): | |
448 | """ | |
449 | Return the full path for an executable, raise if the executable is not | |
450 | found. If the executable has already a full path do not perform any checks. | |
451 | """ | |
452 | if os.path.isabs(arguments[0]): # an absolute path | |
453 | return arguments | |
454 | executable = which(arguments[0]) | |
455 | if not executable: | |
456 | command_msg = 'Could not run command: %s' % ' '.join(arguments) | |
457 | executable_msg = '%s not in path.' % arguments[0] | |
458 | raise ExecutableNotFound('%s %s' % (executable_msg, command_msg)) | |
459 | ||
460 | # swap the old executable for the new one | |
461 | arguments[0] = executable | |
462 | return arguments | |
463 | ||
464 | ||
465 | def command(arguments, **kwargs): | |
466 | """ | |
467 | Safely execute a ``subprocess.Popen`` call making sure that the | |
468 | executable exists and raising a helpful error message | |
469 | if it does not. | |
470 | ||
471 | .. note:: This should be the preferred way of calling ``subprocess.Popen`` | |
472 | since it provides the caller with the safety net of making sure that | |
473 | executables *will* be found and will error nicely otherwise. | |
474 | ||
475 | This returns the output of the command and the return code of the | |
476 | process in a tuple: (stdout, stderr, returncode). | |
477 | """ | |
478 | ||
479 | arguments = list(map(_bytes2str, _get_command_executable(arguments))) | |
480 | ||
481 | LOG.info('Running command: %s' % ' '.join(arguments)) | |
482 | process = subprocess.Popen( | |
483 | arguments, | |
484 | stdout=subprocess.PIPE, | |
485 | stderr=subprocess.PIPE, | |
486 | **kwargs) | |
487 | out, err = process.communicate() | |
488 | ||
489 | return _bytes2str(out), _bytes2str(err), process.returncode | |
490 | ||
491 | ||
c07f9fc5 FG |
492 | def command_with_stdin(arguments, stdin): |
493 | LOG.info("Running command with stdin: " + " ".join(arguments)) | |
494 | process = subprocess.Popen( | |
495 | arguments, | |
496 | stdin=subprocess.PIPE, | |
497 | stdout=subprocess.PIPE, | |
498 | stderr=subprocess.PIPE) | |
499 | out, err = process.communicate(stdin) | |
500 | LOG.debug(out) | |
501 | if process.returncode != 0: | |
502 | LOG.error(err) | |
503 | raise SystemExit( | |
504 | "'{cmd}' failed with status code {returncode}".format( | |
505 | cmd=arguments, | |
506 | returncode=process.returncode, | |
507 | ) | |
508 | ) | |
509 | return out | |
510 | ||
511 | ||
7c673cae FG |
512 | def _bytes2str(string): |
513 | return string.decode('utf-8') if isinstance(string, bytes) else string | |
514 | ||
515 | ||
516 | def command_init(arguments, **kwargs): | |
517 | """ | |
518 | Safely execute a non-blocking ``subprocess.Popen`` call | |
519 | making sure that the executable exists and raising a helpful | |
520 | error message if it does not. | |
521 | ||
522 | .. note:: This should be the preferred way of calling ``subprocess.Popen`` | |
523 | since it provides the caller with the safety net of making sure that | |
524 | executables *will* be found and will error nicely otherwise. | |
525 | ||
526 | This returns the process. | |
527 | """ | |
528 | ||
529 | arguments = list(map(_bytes2str, _get_command_executable(arguments))) | |
530 | ||
531 | LOG.info('Running command: %s' % ' '.join(arguments)) | |
532 | process = subprocess.Popen( | |
533 | arguments, | |
534 | stdout=subprocess.PIPE, | |
535 | stderr=subprocess.PIPE, | |
536 | **kwargs) | |
537 | return process | |
538 | ||
539 | ||
540 | def command_wait(process): | |
541 | """ | |
542 | Wait for the process finish and parse its output. | |
543 | """ | |
544 | ||
545 | out, err = process.communicate() | |
546 | ||
547 | return _bytes2str(out), _bytes2str(err), process.returncode | |
548 | ||
549 | ||
550 | def command_check_call(arguments, exit=False): | |
551 | """ | |
552 | Safely execute a ``subprocess.check_call`` call making sure that the | |
553 | executable exists and raising a helpful error message if it does not. | |
554 | ||
555 | When ``exit`` is set to ``True`` this helper will do a clean (sans | |
556 | traceback) system exit. | |
557 | .. note:: This should be the preferred way of calling | |
558 | ``subprocess.check_call`` since it provides the caller with the safety net | |
559 | of making sure that executables *will* be found and will error nicely | |
560 | otherwise. | |
561 | """ | |
562 | arguments = _get_command_executable(arguments) | |
563 | command = ' '.join(arguments) | |
564 | LOG.info('Running command: %s', command) | |
565 | try: | |
566 | return subprocess.check_call(arguments) | |
567 | except subprocess.CalledProcessError as error: | |
568 | if exit: | |
569 | if error.output: | |
570 | LOG.error(error.output) | |
571 | raise SystemExit( | |
572 | "'{cmd}' failed with status code {returncode}".format( | |
573 | cmd=command, | |
574 | returncode=error.returncode, | |
575 | ) | |
576 | ) | |
577 | raise | |
578 | ||
579 | ||
7c673cae FG |
580 | # |
581 | # An alternative block_path implementation would be | |
582 | # | |
583 | # name = basename(dev) | |
584 | # return /sys/devices/virtual/block/$name | |
585 | # | |
586 | # It is however more fragile because it relies on the fact | |
587 | # that the basename of the device the user will use always | |
588 | # matches the one the driver will use. On Ubuntu 14.04, for | |
589 | # instance, when multipath creates a partition table on | |
590 | # | |
591 | # /dev/mapper/353333330000007d0 -> ../dm-0 | |
592 | # | |
593 | # it will create partition devices named | |
594 | # | |
595 | # /dev/mapper/353333330000007d0-part1 | |
596 | # | |
597 | # which is the same device as /dev/dm-1 but not a symbolic | |
598 | # link to it: | |
599 | # | |
600 | # ubuntu@other:~$ ls -l /dev/mapper /dev/dm-1 | |
601 | # brw-rw---- 1 root disk 252, 1 Aug 15 17:52 /dev/dm-1 | |
602 | # lrwxrwxrwx 1 root root 7 Aug 15 17:52 353333330000007d0 -> ../dm-0 | |
603 | # brw-rw---- 1 root disk 252, 1 Aug 15 17:52 353333330000007d0-part1 | |
604 | # | |
605 | # Using the basename in this case fails. | |
606 | # | |
607 | ||
608 | ||
609 | def block_path(dev): | |
610 | if FREEBSD: | |
611 | return dev | |
612 | path = os.path.realpath(dev) | |
613 | rdev = os.stat(path).st_rdev | |
614 | (M, m) = (os.major(rdev), os.minor(rdev)) | |
615 | return "{sysfs}/dev/block/{M}:{m}".format(sysfs=SYSFS, M=M, m=m) | |
616 | ||
617 | ||
618 | def get_dm_uuid(dev): | |
619 | uuid_path = os.path.join(block_path(dev), 'dm', 'uuid') | |
620 | LOG.debug("get_dm_uuid " + dev + " uuid path is " + uuid_path) | |
621 | if not os.path.exists(uuid_path): | |
622 | return False | |
623 | uuid = open(uuid_path, 'r').read() | |
624 | LOG.debug("get_dm_uuid " + dev + " uuid is " + uuid) | |
625 | return uuid | |
626 | ||
627 | ||
628 | def is_mpath(dev): | |
629 | """ | |
630 | True if the path is managed by multipath | |
631 | """ | |
632 | if FREEBSD: | |
633 | return False | |
634 | uuid = get_dm_uuid(dev) | |
635 | return (uuid and | |
636 | (re.match('part\d+-mpath-', uuid) or | |
637 | re.match('mpath-', uuid))) | |
638 | ||
639 | ||
640 | def get_dev_name(path): | |
641 | """ | |
642 | get device name from path. e.g.:: | |
643 | ||
644 | /dev/sda -> sda, /dev/cciss/c0d1 -> cciss!c0d1 | |
645 | ||
646 | a device "name" is something like:: | |
647 | ||
648 | sdb | |
649 | cciss!c0d1 | |
650 | ||
651 | """ | |
652 | assert path.startswith('/dev/') | |
653 | base = path[5:] | |
654 | return base.replace('/', '!') | |
655 | ||
656 | ||
657 | def get_dev_path(name): | |
658 | """ | |
659 | get a path (/dev/...) from a name (cciss!c0d1) | |
660 | a device "path" is something like:: | |
661 | ||
662 | /dev/sdb | |
663 | /dev/cciss/c0d1 | |
664 | ||
665 | """ | |
666 | return '/dev/' + name.replace('!', '/') | |
667 | ||
668 | ||
669 | def get_dev_relpath(name): | |
670 | """ | |
671 | get a relative path to /dev from a name (cciss!c0d1) | |
672 | """ | |
673 | return name.replace('!', '/') | |
674 | ||
675 | ||
676 | def get_dev_size(dev, size='megabytes'): | |
677 | """ | |
678 | Attempt to get the size of a device so that we can prevent errors | |
679 | from actions to devices that are smaller, and improve error reporting. | |
680 | ||
681 | Because we want to avoid breakage in case this approach is not robust, we | |
682 | will issue a warning if we failed to get the size. | |
683 | ||
684 | :param size: bytes or megabytes | |
685 | :param dev: the device to calculate the size | |
686 | """ | |
687 | fd = os.open(dev, os.O_RDONLY) | |
688 | dividers = {'bytes': 1, 'megabytes': 1024 * 1024} | |
689 | try: | |
690 | device_size = os.lseek(fd, 0, os.SEEK_END) | |
691 | divider = dividers.get(size, 1024 * 1024) # default to megabytes | |
692 | return device_size // divider | |
693 | except Exception as error: | |
694 | LOG.warning('failed to get size of %s: %s' % (dev, str(error))) | |
695 | finally: | |
696 | os.close(fd) | |
697 | ||
698 | ||
224ce89b WB |
699 | def stmode_is_diskdevice(dmode): |
700 | if stat.S_ISBLK(dmode): | |
701 | return True | |
702 | else: | |
703 | # FreeBSD does not have block devices | |
704 | # All disks are character devices | |
705 | return FREEBSD and stat.S_ISCHR(dmode) | |
706 | ||
707 | ||
708 | def dev_is_diskdevice(dev): | |
709 | dmode = os.stat(dev).st_mode | |
710 | return stmode_is_diskdevice(dmode) | |
711 | ||
712 | ||
713 | def ldev_is_diskdevice(dev): | |
714 | dmode = os.lstat(dev).st_mode | |
715 | return stmode_is_diskdevice(dmode) | |
716 | ||
717 | ||
718 | def path_is_diskdevice(path): | |
719 | dev = os.path.realpath(path) | |
720 | return dev_is_diskdevice(dev) | |
721 | ||
722 | ||
7c673cae FG |
723 | def get_partition_mpath(dev, pnum): |
724 | part_re = "part{pnum}-mpath-".format(pnum=pnum) | |
725 | partitions = list_partitions_mpath(dev, part_re) | |
726 | if partitions: | |
727 | return partitions[0] | |
728 | else: | |
729 | return None | |
730 | ||
731 | ||
3efd9988 FG |
732 | def retry(on_error=Exception, max_tries=10, wait=0.2, backoff=0): |
733 | def wrapper(func): | |
734 | @functools.wraps(func) | |
735 | def repeat(*args, **kwargs): | |
736 | for tries in range(max_tries - 1): | |
737 | try: | |
738 | return func(*args, **kwargs) | |
739 | except on_error: | |
740 | time.sleep(wait + backoff * tries) | |
741 | return func(*args, **kwargs) | |
742 | return repeat | |
743 | return wrapper | |
744 | ||
745 | ||
746 | @retry(Error) | |
7c673cae FG |
747 | def get_partition_dev(dev, pnum): |
748 | """ | |
749 | get the device name for a partition | |
750 | ||
751 | assume that partitions are named like the base dev, | |
752 | with a number, and optionally | |
753 | some intervening characters (like 'p'). e.g., | |
754 | ||
755 | sda 1 -> sda1 | |
756 | cciss/c0d1 1 -> cciss!c0d1p1 | |
757 | """ | |
3efd9988 FG |
758 | partname = None |
759 | error_msg = "" | |
760 | if is_mpath(dev): | |
761 | partname = get_partition_mpath(dev, pnum) | |
762 | else: | |
763 | name = get_dev_name(os.path.realpath(dev)) | |
764 | sys_entry = os.path.join(BLOCKDIR, name) | |
765 | error_msg = " in %s" % sys_entry | |
766 | for f in os.listdir(sys_entry): | |
767 | if f.startswith(name) and f.endswith(str(pnum)): | |
768 | # we want the shortest name that starts with the base name | |
769 | # and ends with the partition number | |
770 | if not partname or len(f) < len(partname): | |
771 | partname = f | |
772 | if partname: | |
773 | return get_dev_path(partname) | |
774 | else: | |
775 | raise Error('partition %d for %s does not appear to exist%s' % | |
776 | (pnum, dev, error_msg)) | |
7c673cae FG |
777 | |
778 | ||
779 | def list_all_partitions(): | |
780 | """ | |
781 | Return a list of devices and partitions | |
782 | """ | |
783 | if not FREEBSD: | |
784 | names = os.listdir(BLOCKDIR) | |
785 | dev_part_list = {} | |
786 | for name in names: | |
787 | # /dev/fd0 may hang http://tracker.ceph.com/issues/6827 | |
788 | if re.match(r'^fd\d$', name): | |
789 | continue | |
790 | dev_part_list[name] = list_partitions(get_dev_path(name)) | |
791 | else: | |
792 | with open(os.path.join(PROCDIR, "partitions")) as partitions: | |
793 | for line in partitions: | |
794 | columns = line.split() | |
795 | if len(columns) >= 4: | |
796 | name = columns[3] | |
797 | dev_part_list[name] = list_partitions(get_dev_path(name)) | |
798 | return dev_part_list | |
799 | ||
800 | ||
801 | def list_partitions(dev): | |
802 | dev = os.path.realpath(dev) | |
803 | if is_mpath(dev): | |
804 | return list_partitions_mpath(dev) | |
805 | else: | |
806 | return list_partitions_device(dev) | |
807 | ||
808 | ||
809 | def list_partitions_mpath(dev, part_re="part\d+-mpath-"): | |
810 | p = block_path(dev) | |
811 | partitions = [] | |
812 | holders = os.path.join(p, 'holders') | |
813 | for holder in os.listdir(holders): | |
814 | uuid_path = os.path.join(holders, holder, 'dm', 'uuid') | |
815 | uuid = open(uuid_path, 'r').read() | |
816 | LOG.debug("list_partitions_mpath: " + uuid_path + " uuid = " + uuid) | |
817 | if re.match(part_re, uuid): | |
818 | partitions.append(holder) | |
819 | return partitions | |
820 | ||
821 | ||
822 | def list_partitions_device(dev): | |
823 | """ | |
824 | Return a list of partitions on the given device name | |
825 | """ | |
826 | partitions = [] | |
827 | basename = get_dev_name(dev) | |
828 | for name in os.listdir(block_path(dev)): | |
829 | if name.startswith(basename): | |
830 | partitions.append(name) | |
831 | return partitions | |
832 | ||
833 | ||
834 | def get_partition_base(dev): | |
835 | """ | |
836 | Get the base device for a partition | |
837 | """ | |
838 | dev = os.path.realpath(dev) | |
224ce89b | 839 | if not ldev_is_diskdevice(dev): |
7c673cae FG |
840 | raise Error('not a block device', dev) |
841 | ||
842 | name = get_dev_name(dev) | |
843 | if os.path.exists(os.path.join('/sys/block', name)): | |
844 | raise Error('not a partition', dev) | |
845 | ||
846 | # find the base | |
847 | for basename in os.listdir('/sys/block'): | |
848 | if os.path.exists(os.path.join('/sys/block', basename, name)): | |
849 | return get_dev_path(basename) | |
850 | raise Error('no parent device for partition', dev) | |
851 | ||
852 | ||
853 | def is_partition_mpath(dev): | |
854 | uuid = get_dm_uuid(dev) | |
855 | return bool(re.match('part\d+-mpath-', uuid)) | |
856 | ||
857 | ||
858 | def partnum_mpath(dev): | |
859 | uuid = get_dm_uuid(dev) | |
860 | return re.findall('part(\d+)-mpath-', uuid)[0] | |
861 | ||
862 | ||
863 | def get_partition_base_mpath(dev): | |
864 | slave_path = os.path.join(block_path(dev), 'slaves') | |
865 | slaves = os.listdir(slave_path) | |
866 | assert slaves | |
867 | name_path = os.path.join(slave_path, slaves[0], 'dm', 'name') | |
868 | name = open(name_path, 'r').read().strip() | |
869 | return os.path.join('/dev/mapper', name) | |
870 | ||
871 | ||
872 | def is_partition(dev): | |
873 | """ | |
874 | Check whether a given device path is a partition or a full disk. | |
875 | """ | |
876 | if is_mpath(dev): | |
877 | return is_partition_mpath(dev) | |
878 | ||
879 | dev = os.path.realpath(dev) | |
880 | st = os.lstat(dev) | |
224ce89b | 881 | if not stmode_is_diskdevice(st.st_mode): |
7c673cae FG |
882 | raise Error('not a block device', dev) |
883 | ||
884 | name = get_dev_name(dev) | |
885 | if os.path.exists(os.path.join(BLOCKDIR, name)): | |
886 | return False | |
887 | ||
888 | # make sure it is a partition of something else | |
889 | major = os.major(st.st_rdev) | |
890 | minor = os.minor(st.st_rdev) | |
891 | if os.path.exists('/sys/dev/block/%d:%d/partition' % (major, minor)): | |
892 | return True | |
893 | ||
894 | raise Error('not a disk or partition', dev) | |
895 | ||
896 | ||
897 | def is_mounted(dev): | |
898 | """ | |
899 | Check if the given device is mounted. | |
900 | """ | |
901 | dev = os.path.realpath(dev) | |
902 | with open(PROCDIR + '/mounts', 'rb') as proc_mounts: | |
903 | for line in proc_mounts: | |
904 | fields = line.split() | |
905 | if len(fields) < 3: | |
906 | continue | |
907 | mounts_dev = fields[0] | |
908 | path = fields[1] | |
909 | if os.path.isabs(mounts_dev) and os.path.exists(mounts_dev): | |
910 | mounts_dev = os.path.realpath(mounts_dev) | |
911 | if mounts_dev == dev: | |
912 | return _bytes2str(path) | |
913 | return None | |
914 | ||
915 | ||
916 | def is_held(dev): | |
917 | """ | |
918 | Check if a device is held by another device (e.g., a dm-crypt mapping) | |
919 | """ | |
920 | assert os.path.exists(dev) | |
921 | if is_mpath(dev): | |
922 | return [] | |
923 | ||
924 | dev = os.path.realpath(dev) | |
925 | base = get_dev_name(dev) | |
926 | ||
927 | # full disk? | |
928 | directory = '/sys/block/{base}/holders'.format(base=base) | |
929 | if os.path.exists(directory): | |
930 | return os.listdir(directory) | |
931 | ||
932 | # partition? | |
933 | part = base | |
934 | while len(base): | |
935 | directory = '/sys/block/{base}/{part}/holders'.format( | |
936 | part=part, base=base) | |
937 | if os.path.exists(directory): | |
938 | return os.listdir(directory) | |
939 | base = base[:-1] | |
940 | return [] | |
941 | ||
942 | ||
943 | def verify_not_in_use(dev, check_partitions=False): | |
944 | """ | |
945 | Verify if a given device (path) is in use (e.g. mounted or | |
946 | in use by device-mapper). | |
947 | ||
948 | :raises: Error if device is in use. | |
949 | """ | |
950 | assert os.path.exists(dev) | |
951 | if is_mounted(dev): | |
952 | raise Error('Device is mounted', dev) | |
953 | holders = is_held(dev) | |
954 | if holders: | |
955 | raise Error('Device %s is in use by a device-mapper ' | |
956 | 'mapping (dm-crypt?)' % dev, ','.join(holders)) | |
957 | ||
958 | if check_partitions and not is_partition(dev): | |
959 | for partname in list_partitions(dev): | |
960 | partition = get_dev_path(partname) | |
961 | if is_mounted(partition): | |
962 | raise Error('Device is mounted', partition) | |
963 | holders = is_held(partition) | |
964 | if holders: | |
965 | raise Error('Device %s is in use by a device-mapper ' | |
966 | 'mapping (dm-crypt?)' | |
967 | % partition, ','.join(holders)) | |
968 | ||
969 | ||
970 | def must_be_one_line(line): | |
971 | """ | |
972 | Checks if given line is really one single line. | |
973 | ||
974 | :raises: TruncatedLineError or TooManyLinesError | |
975 | :return: Content of the line, or None if line isn't valid. | |
976 | """ | |
977 | line = _bytes2str(line) | |
978 | ||
979 | if line[-1:] != '\n': | |
980 | raise TruncatedLineError(line) | |
981 | line = line[:-1] | |
982 | if '\n' in line: | |
983 | raise TooManyLinesError(line) | |
984 | return line | |
985 | ||
986 | ||
987 | def read_one_line(parent, name): | |
988 | """ | |
989 | Read a file whose sole contents are a single line. | |
990 | ||
991 | Strips the newline. | |
992 | ||
993 | :return: Contents of the line, or None if file did not exist. | |
994 | """ | |
995 | path = os.path.join(parent, name) | |
996 | try: | |
997 | line = open(path, 'rb').read() | |
998 | except IOError as e: | |
999 | if e.errno == errno.ENOENT: | |
1000 | return None | |
1001 | else: | |
1002 | raise | |
1003 | ||
1004 | try: | |
1005 | line = must_be_one_line(line) | |
1006 | except (TruncatedLineError, TooManyLinesError) as e: | |
1007 | raise Error( | |
1008 | 'File is corrupt: {path}: {msg}'.format( | |
1009 | path=path, | |
1010 | msg=e, | |
1011 | ) | |
1012 | ) | |
1013 | return line | |
1014 | ||
1015 | ||
1016 | def write_one_line(parent, name, text): | |
1017 | """ | |
1018 | Write a file whose sole contents are a single line. | |
1019 | ||
1020 | Adds a newline. | |
1021 | """ | |
1022 | path = os.path.join(parent, name) | |
1023 | tmp = '{path}.{pid}.tmp'.format(path=path, pid=os.getpid()) | |
1024 | with open(tmp, 'wb') as tmp_file: | |
1025 | tmp_file.write(text.encode('utf-8') + b'\n') | |
1026 | os.fsync(tmp_file.fileno()) | |
1027 | path_set_context(tmp) | |
1028 | os.rename(tmp, path) | |
1029 | ||
1030 | ||
1031 | def init_get(): | |
1032 | """ | |
1033 | Get a init system using 'ceph-detect-init' | |
1034 | """ | |
1035 | init = _check_output( | |
1036 | args=[ | |
1037 | 'ceph-detect-init', | |
1038 | '--default', 'sysvinit', | |
1039 | ], | |
1040 | ) | |
1041 | init = must_be_one_line(init) | |
1042 | return init | |
1043 | ||
1044 | ||
1045 | def check_osd_magic(path): | |
1046 | """ | |
1047 | Check that this path has the Ceph OSD magic. | |
1048 | ||
1049 | :raises: BadMagicError if this does not look like a Ceph OSD data | |
1050 | dir. | |
1051 | """ | |
1052 | magic = read_one_line(path, 'magic') | |
1053 | if magic is None: | |
1054 | # probably not mkfs'ed yet | |
1055 | raise BadMagicError(path) | |
1056 | if magic != CEPH_OSD_ONDISK_MAGIC: | |
1057 | raise BadMagicError(path) | |
1058 | ||
1059 | ||
1060 | def check_osd_id(osd_id): | |
1061 | """ | |
1062 | Ensures osd id is numeric. | |
1063 | """ | |
1064 | if not re.match(r'^[0-9]+$', osd_id): | |
1065 | raise Error('osd id is not numeric', osd_id) | |
1066 | ||
1067 | ||
1068 | def allocate_osd_id( | |
1069 | cluster, | |
1070 | fsid, | |
1071 | keyring, | |
c07f9fc5 | 1072 | path, |
7c673cae FG |
1073 | ): |
1074 | """ | |
c07f9fc5 | 1075 | Allocates an OSD id on the given cluster. |
7c673cae FG |
1076 | |
1077 | :raises: Error if the call to allocate the OSD id fails. | |
1078 | :return: The allocated OSD id. | |
1079 | """ | |
c07f9fc5 FG |
1080 | lockbox_path = os.path.join(STATEDIR, 'osd-lockbox', fsid) |
1081 | lockbox_osd_id = read_one_line(lockbox_path, 'whoami') | |
1082 | osd_keyring = os.path.join(path, 'keyring') | |
1083 | if lockbox_osd_id: | |
1084 | LOG.debug('Getting OSD id from Lockbox...') | |
1085 | osd_id = lockbox_osd_id | |
1086 | shutil.move(os.path.join(lockbox_path, 'osd_keyring'), | |
1087 | osd_keyring) | |
1088 | path_set_context(osd_keyring) | |
1089 | os.unlink(os.path.join(lockbox_path, 'whoami')) | |
1090 | return osd_id | |
7c673cae FG |
1091 | |
1092 | LOG.debug('Allocating OSD id...') | |
c07f9fc5 | 1093 | secrets = Secrets() |
7c673cae | 1094 | try: |
c07f9fc5 FG |
1095 | wanttobe = read_one_line(path, 'wanttobe') |
1096 | if os.path.exists(os.path.join(path, 'wanttobe')): | |
1097 | os.unlink(os.path.join(path, 'wanttobe')) | |
1098 | id_arg = wanttobe and [wanttobe] or [] | |
1099 | osd_id = command_with_stdin( | |
1100 | [ | |
7c673cae FG |
1101 | 'ceph', |
1102 | '--cluster', cluster, | |
1103 | '--name', 'client.bootstrap-osd', | |
1104 | '--keyring', keyring, | |
c07f9fc5 FG |
1105 | '-i', '-', |
1106 | 'osd', 'new', | |
7c673cae | 1107 | fsid, |
c07f9fc5 FG |
1108 | ] + id_arg, |
1109 | secrets.get_json() | |
7c673cae FG |
1110 | ) |
1111 | except subprocess.CalledProcessError as e: | |
1112 | raise Error('ceph osd create failed', e, e.output) | |
1113 | osd_id = must_be_one_line(osd_id) | |
1114 | check_osd_id(osd_id) | |
c07f9fc5 | 1115 | secrets.write_osd_keyring(osd_keyring, osd_id) |
7c673cae FG |
1116 | return osd_id |
1117 | ||
1118 | ||
1119 | def get_osd_id(path): | |
1120 | """ | |
1121 | Gets the OSD id of the OSD at the given path. | |
1122 | """ | |
1123 | osd_id = read_one_line(path, 'whoami') | |
1124 | if osd_id is not None: | |
1125 | check_osd_id(osd_id) | |
1126 | return osd_id | |
1127 | ||
1128 | ||
1129 | def get_ceph_user(): | |
1130 | global CEPH_PREF_USER | |
1131 | ||
1132 | if CEPH_PREF_USER is not None: | |
1133 | try: | |
1134 | pwd.getpwnam(CEPH_PREF_USER) | |
1135 | return CEPH_PREF_USER | |
1136 | except KeyError: | |
1137 | print("No such user:", CEPH_PREF_USER) | |
1138 | sys.exit(2) | |
1139 | else: | |
1140 | try: | |
1141 | pwd.getpwnam('ceph') | |
1142 | return 'ceph' | |
1143 | except KeyError: | |
1144 | return 'root' | |
1145 | ||
1146 | ||
1147 | def get_ceph_group(): | |
1148 | global CEPH_PREF_GROUP | |
1149 | ||
1150 | if CEPH_PREF_GROUP is not None: | |
1151 | try: | |
1152 | grp.getgrnam(CEPH_PREF_GROUP) | |
1153 | return CEPH_PREF_GROUP | |
1154 | except KeyError: | |
1155 | print("No such group:", CEPH_PREF_GROUP) | |
1156 | sys.exit(2) | |
1157 | else: | |
1158 | try: | |
1159 | grp.getgrnam('ceph') | |
1160 | return 'ceph' | |
1161 | except KeyError: | |
1162 | return 'root' | |
1163 | ||
1164 | ||
1165 | def path_set_context(path): | |
1166 | # restore selinux context to default policy values | |
1167 | if which('restorecon'): | |
1168 | command(['restorecon', '-R', path]) | |
1169 | ||
1170 | # if ceph user exists, set owner to ceph | |
1171 | if get_ceph_user() == 'ceph': | |
1172 | command(['chown', '-R', 'ceph:ceph', path]) | |
1173 | ||
1174 | ||
1175 | def _check_output(args=None, **kwargs): | |
1176 | out, err, ret = command(args, **kwargs) | |
1177 | if ret: | |
1178 | cmd = args[0] | |
1179 | error = subprocess.CalledProcessError(ret, cmd) | |
1180 | error.output = out + err | |
1181 | raise error | |
1182 | return _bytes2str(out) | |
1183 | ||
1184 | ||
1185 | def get_conf(cluster, variable): | |
1186 | """ | |
1187 | Get the value of the given configuration variable from the | |
1188 | cluster. | |
1189 | ||
1190 | :raises: Error if call to ceph-conf fails. | |
1191 | :return: The variable value or None. | |
1192 | """ | |
1193 | try: | |
1194 | out, err, ret = command( | |
1195 | [ | |
1196 | 'ceph-conf', | |
1197 | '--cluster={cluster}'.format( | |
1198 | cluster=cluster, | |
1199 | ), | |
1200 | '--name=osd.', | |
1201 | '--lookup', | |
1202 | variable, | |
1203 | ], | |
1204 | close_fds=True, | |
1205 | ) | |
1206 | except OSError as e: | |
1207 | raise Error('error executing ceph-conf', e, err) | |
1208 | if ret == 1: | |
1209 | # config entry not found | |
1210 | return None | |
1211 | elif ret != 0: | |
1212 | raise Error('getting variable from configuration failed') | |
1213 | value = out.split('\n', 1)[0] | |
1214 | # don't differentiate between "var=" and no var set | |
1215 | if not value: | |
1216 | return None | |
1217 | return value | |
1218 | ||
1219 | ||
1220 | def get_conf_with_default(cluster, variable): | |
1221 | """ | |
1222 | Get a config value that is known to the C++ code. | |
1223 | ||
1224 | This will fail if called on variables that are not defined in | |
1225 | common config options. | |
1226 | """ | |
1227 | try: | |
1228 | out = _check_output( | |
1229 | args=[ | |
1230 | 'ceph-osd', | |
1231 | '--cluster={cluster}'.format( | |
1232 | cluster=cluster, | |
1233 | ), | |
1234 | '--show-config-value={variable}'.format( | |
1235 | variable=variable, | |
1236 | ), | |
1237 | ], | |
1238 | close_fds=True, | |
1239 | ) | |
1240 | except subprocess.CalledProcessError as e: | |
1241 | raise Error( | |
1242 | 'getting variable from configuration failed', | |
1243 | e, | |
1244 | ) | |
1245 | ||
1246 | value = str(out).split('\n', 1)[0] | |
1247 | return value | |
1248 | ||
1249 | ||
1250 | def get_fsid(cluster): | |
1251 | """ | |
1252 | Get the fsid of the cluster. | |
1253 | ||
1254 | :return: The fsid or raises Error. | |
1255 | """ | |
1256 | fsid = get_conf_with_default(cluster=cluster, variable='fsid') | |
b32b8144 FG |
1257 | # uuids from boost always default to 'the empty uuid' |
1258 | if fsid == '00000000-0000-0000-0000-000000000000': | |
7c673cae FG |
1259 | raise Error('getting cluster uuid from configuration failed') |
1260 | return fsid.lower() | |
1261 | ||
1262 | ||
1263 | def get_dmcrypt_key_path( | |
1264 | _uuid, | |
1265 | key_dir, | |
1266 | luks | |
1267 | ): | |
1268 | """ | |
1269 | Get path to dmcrypt key file. | |
1270 | ||
1271 | :return: Path to the dmcrypt key file, callers should check for existence. | |
1272 | """ | |
1273 | if luks: | |
1274 | path = os.path.join(key_dir, _uuid + ".luks.key") | |
1275 | else: | |
1276 | path = os.path.join(key_dir, _uuid) | |
1277 | ||
1278 | return path | |
1279 | ||
1280 | ||
1281 | def get_dmcrypt_key( | |
1282 | _uuid, | |
1283 | key_dir, | |
1284 | luks | |
1285 | ): | |
1286 | legacy_path = get_dmcrypt_key_path(_uuid, key_dir, luks) | |
1287 | if os.path.exists(legacy_path): | |
1288 | return (legacy_path,) | |
1289 | path = os.path.join(STATEDIR, 'osd-lockbox', _uuid) | |
1290 | if os.path.exists(path): | |
1291 | mode = get_oneliner(path, 'key-management-mode') | |
1292 | osd_uuid = get_oneliner(path, 'osd-uuid') | |
1293 | ceph_fsid = read_one_line(path, 'ceph_fsid') | |
1294 | if ceph_fsid is None: | |
d2e6a577 FG |
1295 | LOG.warning("no `ceph_fsid` found falling back to 'ceph' " |
1296 | "for cluster name") | |
1297 | cluster = 'ceph' | |
1298 | else: | |
1299 | cluster = find_cluster_by_uuid(ceph_fsid) | |
1300 | if cluster is None: | |
1301 | raise Error('No cluster conf found in ' + SYSCONFDIR + | |
1302 | ' with fsid %s' % ceph_fsid) | |
7c673cae FG |
1303 | |
1304 | if mode == KEY_MANAGEMENT_MODE_V1: | |
1305 | key, stderr, ret = command( | |
1306 | [ | |
1307 | 'ceph', | |
1308 | '--cluster', cluster, | |
1309 | '--name', | |
1310 | 'client.osd-lockbox.' + osd_uuid, | |
1311 | '--keyring', | |
1312 | os.path.join(path, 'keyring'), | |
1313 | 'config-key', | |
1314 | 'get', | |
1315 | 'dm-crypt/osd/' + osd_uuid + '/luks', | |
1316 | ], | |
1317 | ) | |
1318 | LOG.debug("stderr " + stderr) | |
1319 | assert ret == 0 | |
1320 | return base64.b64decode(key) | |
1321 | else: | |
1322 | raise Error('unknown key-management-mode ' + str(mode)) | |
1323 | raise Error('unable to read dm-crypt key', path, legacy_path) | |
1324 | ||
1325 | ||
1326 | def _dmcrypt_map( | |
1327 | rawdev, | |
1328 | key, | |
1329 | _uuid, | |
1330 | cryptsetup_parameters, | |
1331 | luks, | |
1332 | format_dev=False, | |
1333 | ): | |
1334 | dev = dmcrypt_is_mapped(_uuid) | |
1335 | if dev: | |
1336 | return dev | |
1337 | ||
1338 | if isinstance(key, tuple): | |
1339 | # legacy, before lockbox | |
1340 | assert os.path.exists(key[0]) | |
1341 | keypath = key[0] | |
1342 | key = None | |
1343 | else: | |
1344 | keypath = '-' | |
1345 | dev = '/dev/mapper/' + _uuid | |
1346 | luksFormat_args = [ | |
1347 | 'cryptsetup', | |
1348 | '--batch-mode', | |
1349 | '--key-file', | |
1350 | keypath, | |
1351 | 'luksFormat', | |
1352 | rawdev, | |
1353 | ] + cryptsetup_parameters | |
1354 | ||
1355 | luksOpen_args = [ | |
1356 | 'cryptsetup', | |
1357 | '--key-file', | |
1358 | keypath, | |
1359 | 'luksOpen', | |
1360 | rawdev, | |
1361 | _uuid, | |
1362 | ] | |
1363 | ||
1364 | create_args = [ | |
1365 | 'cryptsetup', | |
1366 | '--key-file', | |
1367 | keypath, | |
1368 | 'create', | |
1369 | _uuid, | |
1370 | rawdev, | |
1371 | ] + cryptsetup_parameters | |
1372 | ||
7c673cae FG |
1373 | try: |
1374 | if luks: | |
1375 | if format_dev: | |
c07f9fc5 FG |
1376 | command_with_stdin(luksFormat_args, key) |
1377 | command_with_stdin(luksOpen_args, key) | |
7c673cae FG |
1378 | else: |
1379 | # Plain mode has no format function, nor any validation | |
1380 | # that the key is correct. | |
c07f9fc5 | 1381 | command_with_stdin(create_args, key) |
7c673cae FG |
1382 | # set proper ownership of mapped device |
1383 | command_check_call(['chown', 'ceph:ceph', dev]) | |
1384 | return dev | |
1385 | ||
1386 | except subprocess.CalledProcessError as e: | |
1387 | raise Error('unable to map device', rawdev, e) | |
1388 | ||
1389 | ||
3efd9988 FG |
1390 | @retry(Error, max_tries=10, wait=0.5, backoff=1.0) |
1391 | def dmcrypt_unmap(_uuid): | |
7c673cae FG |
1392 | if not os.path.exists('/dev/mapper/' + _uuid): |
1393 | return | |
3efd9988 FG |
1394 | try: |
1395 | command_check_call(['cryptsetup', 'remove', _uuid]) | |
1396 | except subprocess.CalledProcessError as e: | |
1397 | raise Error('unable to unmap device', _uuid, e) | |
7c673cae FG |
1398 | |
1399 | ||
1400 | def mount( | |
1401 | dev, | |
1402 | fstype, | |
1403 | options, | |
1404 | ): | |
1405 | """ | |
1406 | Mounts a device with given filessystem type and | |
1407 | mount options to a tempfile path under /var/lib/ceph/tmp. | |
1408 | """ | |
1409 | # sanity check: none of the arguments are None | |
1410 | if dev is None: | |
1411 | raise ValueError('dev may not be None') | |
1412 | if fstype is None: | |
1413 | raise ValueError('fstype may not be None') | |
1414 | ||
1415 | # pick best-of-breed mount options based on fs type | |
1416 | if options is None: | |
1417 | options = MOUNT_OPTIONS.get(fstype, '') | |
1418 | ||
1419 | myTemp = STATEDIR + '/tmp' | |
1420 | # mkdtemp expect 'dir' to be existing on the system | |
1421 | # Let's be sure it's always the case | |
1422 | if not os.path.exists(myTemp): | |
1423 | os.makedirs(myTemp) | |
1424 | ||
1425 | # mount | |
1426 | path = tempfile.mkdtemp( | |
1427 | prefix='mnt.', | |
1428 | dir=myTemp, | |
1429 | ) | |
1430 | try: | |
1431 | LOG.debug('Mounting %s on %s with options %s', dev, path, options) | |
1432 | command_check_call( | |
1433 | [ | |
1434 | 'mount', | |
1435 | '-t', fstype, | |
1436 | '-o', options, | |
1437 | '--', | |
1438 | dev, | |
1439 | path, | |
1440 | ], | |
1441 | ) | |
1442 | if which('restorecon'): | |
1443 | command( | |
1444 | [ | |
1445 | 'restorecon', | |
1446 | path, | |
1447 | ], | |
1448 | ) | |
1449 | except subprocess.CalledProcessError as e: | |
1450 | try: | |
1451 | os.rmdir(path) | |
1452 | except (OSError, IOError): | |
1453 | pass | |
1454 | raise MountError(e) | |
1455 | ||
1456 | return path | |
1457 | ||
1458 | ||
3efd9988 | 1459 | @retry(UnmountError, max_tries=3, wait=0.5, backoff=1.0) |
7c673cae FG |
1460 | def unmount( |
1461 | path, | |
d2e6a577 | 1462 | do_rm=True, |
7c673cae FG |
1463 | ): |
1464 | """ | |
1465 | Unmount and removes the given mount point. | |
1466 | """ | |
3efd9988 FG |
1467 | try: |
1468 | LOG.debug('Unmounting %s', path) | |
1469 | command_check_call( | |
1470 | [ | |
1471 | '/bin/umount', | |
1472 | '--', | |
1473 | path, | |
1474 | ], | |
1475 | ) | |
1476 | except subprocess.CalledProcessError as e: | |
1477 | raise UnmountError(e) | |
d2e6a577 FG |
1478 | if not do_rm: |
1479 | return | |
7c673cae FG |
1480 | os.rmdir(path) |
1481 | ||
1482 | ||
1483 | ########################################### | |
1484 | ||
1485 | def extract_parted_partition_numbers(partitions): | |
1486 | numbers_as_strings = re.findall('^\d+', partitions, re.MULTILINE) | |
1487 | return map(int, numbers_as_strings) | |
1488 | ||
1489 | ||
1490 | def get_free_partition_index(dev): | |
1491 | """ | |
1492 | Get the next free partition index on a given device. | |
1493 | ||
1494 | :return: Index number (> 1 if there is already a partition on the device) | |
1495 | or 1 if there is no partition table. | |
1496 | """ | |
1497 | try: | |
1498 | lines = _check_output( | |
1499 | args=[ | |
1500 | 'parted', | |
1501 | '--machine', | |
1502 | '--', | |
1503 | dev, | |
1504 | 'print', | |
1505 | ], | |
1506 | ) | |
1507 | except subprocess.CalledProcessError as e: | |
1508 | LOG.info('cannot read partition index; assume it ' | |
1509 | 'isn\'t present\n (Error: %s)' % e) | |
1510 | return 1 | |
1511 | ||
1512 | if not lines: | |
1513 | raise Error('parted failed to output anything') | |
1514 | LOG.debug('get_free_partition_index: analyzing ' + lines) | |
1515 | if ('CHS;' not in lines and | |
1516 | 'CYL;' not in lines and | |
1517 | 'BYT;' not in lines): | |
1518 | raise Error('parted output expected to contain one of ' + | |
1519 | 'CHH; CYL; or BYT; : ' + lines) | |
1520 | if os.path.realpath(dev) not in lines: | |
1521 | raise Error('parted output expected to contain ' + dev + ': ' + lines) | |
1522 | _, partitions = lines.split(os.path.realpath(dev)) | |
1523 | partition_numbers = extract_parted_partition_numbers(partitions) | |
1524 | if partition_numbers: | |
1525 | return max(partition_numbers) + 1 | |
1526 | else: | |
1527 | return 1 | |
1528 | ||
1529 | ||
1530 | def check_journal_reqs(args): | |
28e407b8 | 1531 | log_file = "/var/log/ceph/$cluster-osd-check.log" |
7c673cae FG |
1532 | _, _, allows_journal = command([ |
1533 | 'ceph-osd', '--check-allows-journal', | |
1534 | '-i', '0', | |
28e407b8 | 1535 | '--log-file', log_file, |
7c673cae FG |
1536 | '--cluster', args.cluster, |
1537 | '--setuser', get_ceph_user(), | |
1538 | '--setgroup', get_ceph_group(), | |
1539 | ]) | |
1540 | _, _, wants_journal = command([ | |
1541 | 'ceph-osd', '--check-wants-journal', | |
1542 | '-i', '0', | |
28e407b8 | 1543 | '--log-file', log_file, |
7c673cae FG |
1544 | '--cluster', args.cluster, |
1545 | '--setuser', get_ceph_user(), | |
1546 | '--setgroup', get_ceph_group(), | |
1547 | ]) | |
1548 | _, _, needs_journal = command([ | |
1549 | 'ceph-osd', '--check-needs-journal', | |
1550 | '-i', '0', | |
28e407b8 | 1551 | '--log-file', log_file, |
7c673cae FG |
1552 | '--cluster', args.cluster, |
1553 | '--setuser', get_ceph_user(), | |
1554 | '--setgroup', get_ceph_group(), | |
1555 | ]) | |
1556 | return (not allows_journal, not wants_journal, not needs_journal) | |
1557 | ||
1558 | ||
1559 | def update_partition(dev, description): | |
1560 | """ | |
1561 | Must be called after modifying a partition table so the kernel | |
1562 | know about the change and fire udev events accordingly. A side | |
1563 | effect of partprobe is to remove partitions and add them again. | |
1564 | The first udevadm settle waits for ongoing udev events to | |
1565 | complete, just in case one of them rely on an existing partition | |
1566 | on dev. The second udevadm settle guarantees to the caller that | |
1567 | all udev events related to the partition table change have been | |
1568 | processed, i.e. the 95-ceph-osd.rules actions and mode changes, | |
1569 | group changes etc. are complete. | |
1570 | """ | |
1571 | LOG.debug('Calling partprobe on %s device %s', description, dev) | |
1572 | partprobe_ok = False | |
1573 | error = 'unknown error' | |
1574 | partprobe = _get_command_executable(['partprobe'])[0] | |
1575 | for i in range(5): | |
1576 | command_check_call(['udevadm', 'settle', '--timeout=600']) | |
1577 | try: | |
1578 | _check_output(['flock', '-s', dev, partprobe, dev]) | |
1579 | partprobe_ok = True | |
1580 | break | |
1581 | except subprocess.CalledProcessError as e: | |
1582 | error = e.output | |
1583 | if ('unable to inform the kernel' not in error and | |
1584 | 'Device or resource busy' not in error): | |
1585 | raise | |
1586 | LOG.debug('partprobe %s failed : %s (ignored, waiting 60s)' | |
1587 | % (dev, error)) | |
1588 | time.sleep(60) | |
1589 | if not partprobe_ok: | |
1590 | raise Error('partprobe %s failed : %s' % (dev, error)) | |
1591 | command_check_call(['udevadm', 'settle', '--timeout=600']) | |
1592 | ||
1593 | ||
224ce89b | 1594 | def zap_linux(dev): |
7c673cae FG |
1595 | try: |
1596 | # Thoroughly wipe all partitions of any traces of | |
1597 | # Filesystems or OSD Journals | |
1598 | # | |
3a9019d9 FG |
1599 | # In addition we need to write 110M (read following comment for more |
1600 | # details on the context of this magic number) of data to each | |
1601 | # partition to make sure that after re-creating the same partition | |
7c673cae FG |
1602 | # there is no trace left of any previous Filesystem or OSD |
1603 | # Journal | |
1604 | ||
1605 | LOG.debug('Writing zeros to existing partitions on %s', dev) | |
1606 | ||
1607 | for partname in list_partitions(dev): | |
1608 | partition = get_dev_path(partname) | |
1609 | command_check_call( | |
1610 | [ | |
1611 | 'wipefs', | |
1612 | '--all', | |
1613 | partition, | |
1614 | ], | |
1615 | ) | |
1616 | ||
3a9019d9 FG |
1617 | # for an typical bluestore device, it has |
1618 | # 1. a 100M xfs data partition | |
1619 | # 2. a bluestore_block_size block partition | |
1620 | # 3. a bluestore_block_db_size block.db partition | |
1621 | # 4. a bluestore_block_wal_size block.wal partition | |
1622 | # so we need to wipe out the bits storing the bits storing | |
1623 | # bluestore's collections' meta information in that case to | |
1624 | # prevent OSD from comparing the meta data, like OSD id and fsid, | |
1625 | # stored on the device to be zapped with the oness passed in. here, | |
1626 | # we assume that the allocator of bluestore puts these meta data | |
1627 | # at the beginning of the block partition. without knowning the | |
1628 | # actual layout of the bluefs, we add extra 10M to be on the safe | |
1629 | # side. if this partition was formatted for a filesystem, 10MB | |
1630 | # would be more than enough to nuke its superblock. | |
1631 | count = min(PrepareBluestoreData.SPACE_SIZE + 10, | |
1632 | get_dev_size(partition)) | |
7c673cae FG |
1633 | command_check_call( |
1634 | [ | |
1635 | 'dd', | |
1636 | 'if=/dev/zero', | |
1637 | 'of={path}'.format(path=partition), | |
1638 | 'bs=1M', | |
3a9019d9 | 1639 | 'count={count}'.format(count=count), |
7c673cae FG |
1640 | ], |
1641 | ) | |
1642 | ||
1643 | LOG.debug('Zapping partition table on %s', dev) | |
1644 | ||
1645 | # try to wipe out any GPT partition table backups. sgdisk | |
1646 | # isn't too thorough. | |
1647 | lba_size = 4096 | |
1648 | size = 33 * lba_size | |
1649 | with open(dev, 'wb') as dev_file: | |
1650 | dev_file.seek(-size, os.SEEK_END) | |
1651 | dev_file.write(size * b'\0') | |
1652 | ||
1653 | command_check_call( | |
1654 | [ | |
1655 | 'sgdisk', | |
1656 | '--zap-all', | |
1657 | '--', | |
1658 | dev, | |
1659 | ], | |
1660 | ) | |
1661 | command_check_call( | |
1662 | [ | |
1663 | 'sgdisk', | |
1664 | '--clear', | |
1665 | '--mbrtogpt', | |
1666 | '--', | |
1667 | dev, | |
1668 | ], | |
1669 | ) | |
7c673cae FG |
1670 | update_partition(dev, 'zapped') |
1671 | ||
1672 | except subprocess.CalledProcessError as e: | |
1673 | raise Error(e) | |
1674 | ||
1675 | ||
224ce89b WB |
1676 | def zap_freebsd(dev): |
1677 | try: | |
1678 | # For FreeBSD we just need to zap the partition. | |
1679 | command_check_call( | |
1680 | [ | |
1681 | 'gpart', | |
1682 | 'destroy', | |
1683 | '-F', | |
1684 | dev, | |
1685 | ], | |
1686 | ) | |
1687 | ||
1688 | except subprocess.CalledProcessError as e: | |
1689 | raise Error(e) | |
1690 | ||
1691 | ||
1692 | def zap(dev): | |
1693 | """ | |
1694 | Destroy the partition table and content of a given disk. | |
1695 | """ | |
1696 | dev = os.path.realpath(dev) | |
1697 | dmode = os.stat(dev).st_mode | |
1698 | if not stat.S_ISBLK(dmode) or is_partition(dev): | |
1699 | raise Error('not full block device; cannot zap', dev) | |
1700 | if FREEBSD: | |
1701 | zap_freebsd(dev) | |
1702 | else: | |
1703 | zap_linux(dev) | |
1704 | ||
1705 | ||
7c673cae FG |
1706 | def adjust_symlink(target, path): |
1707 | create = True | |
1708 | if os.path.lexists(path): | |
1709 | try: | |
1710 | mode = os.lstat(path).st_mode | |
1711 | if stat.S_ISREG(mode): | |
1712 | LOG.debug('Removing old file %s', path) | |
1713 | os.unlink(path) | |
1714 | elif stat.S_ISLNK(mode): | |
1715 | old = os.readlink(path) | |
1716 | if old != target: | |
1717 | LOG.debug('Removing old symlink %s -> %s', path, old) | |
1718 | os.unlink(path) | |
1719 | else: | |
1720 | create = False | |
1721 | except: | |
1722 | raise Error('unable to remove (or adjust) old file (symlink)', | |
1723 | path) | |
1724 | if create: | |
1725 | LOG.debug('Creating symlink %s -> %s', path, target) | |
1726 | try: | |
1727 | os.symlink(target, path) | |
1728 | except: | |
1729 | raise Error('unable to create symlink %s -> %s' % (path, target)) | |
1730 | ||
1731 | ||
1732 | def get_mount_options(cluster, fs_type): | |
1733 | mount_options = get_conf( | |
1734 | cluster, | |
1735 | variable='osd_mount_options_{fstype}'.format( | |
1736 | fstype=fs_type, | |
1737 | ), | |
1738 | ) | |
1739 | if mount_options is None: | |
1740 | mount_options = get_conf( | |
1741 | cluster, | |
1742 | variable='osd_fs_mount_options_{fstype}'.format( | |
1743 | fstype=fs_type, | |
1744 | ), | |
1745 | ) | |
1746 | else: | |
1747 | # remove whitespaces | |
1748 | mount_options = "".join(mount_options.split()) | |
1749 | return mount_options | |
1750 | ||
1751 | ||
1752 | class Device(object): | |
1753 | ||
1754 | def __init__(self, path, args): | |
1755 | self.args = args | |
1756 | self.path = path | |
1757 | self.dev_size = None | |
1758 | self.partitions = {} | |
1759 | self.ptype_map = None | |
1760 | assert not is_partition(self.path) | |
1761 | ||
1762 | def create_partition(self, uuid, name, size=0, num=0): | |
1763 | ptype = self.ptype_tobe_for_name(name) | |
1764 | if num == 0: | |
1765 | num = get_free_partition_index(dev=self.path) | |
1766 | if size > 0: | |
1767 | new = '--new={num}:0:+{size}M'.format(num=num, size=size) | |
1768 | if size > self.get_dev_size(): | |
1769 | LOG.error('refusing to create %s on %s' % (name, self.path)) | |
1770 | LOG.error('%s size (%sM) is bigger than device (%sM)' | |
1771 | % (name, size, self.get_dev_size())) | |
1772 | raise Error('%s device size (%sM) is not big enough for %s' | |
1773 | % (self.path, self.get_dev_size(), name)) | |
1774 | else: | |
1775 | new = '--largest-new={num}'.format(num=num) | |
1776 | ||
1777 | LOG.debug('Creating %s partition num %d size %d on %s', | |
1778 | name, num, size, self.path) | |
1779 | command_check_call( | |
1780 | [ | |
1781 | 'sgdisk', | |
1782 | new, | |
1783 | '--change-name={num}:ceph {name}'.format(num=num, name=name), | |
1784 | '--partition-guid={num}:{uuid}'.format(num=num, uuid=uuid), | |
1785 | '--typecode={num}:{uuid}'.format(num=num, uuid=ptype), | |
1786 | '--mbrtogpt', | |
1787 | '--', | |
1788 | self.path, | |
1789 | ], | |
1790 | exit=True | |
1791 | ) | |
1792 | update_partition(self.path, 'created') | |
1793 | return num | |
1794 | ||
1795 | def ptype_tobe_for_name(self, name): | |
1796 | LOG.debug("name = " + name) | |
1797 | if name == 'data': | |
1798 | name = 'osd' | |
1799 | if name == 'lockbox': | |
1800 | if is_mpath(self.path): | |
1801 | return PTYPE['mpath']['lockbox']['tobe'] | |
1802 | else: | |
1803 | return PTYPE['regular']['lockbox']['tobe'] | |
1804 | if self.ptype_map is None: | |
1805 | partition = DevicePartition.factory( | |
1806 | path=self.path, dev=None, args=self.args) | |
1807 | self.ptype_map = partition.ptype_map | |
1808 | return self.ptype_map[name]['tobe'] | |
1809 | ||
1810 | def get_partition(self, num): | |
1811 | if num not in self.partitions: | |
1812 | dev = get_partition_dev(self.path, num) | |
1813 | partition = DevicePartition.factory( | |
1814 | path=self.path, dev=dev, args=self.args) | |
1815 | partition.set_partition_number(num) | |
1816 | self.partitions[num] = partition | |
1817 | return self.partitions[num] | |
1818 | ||
1819 | def get_dev_size(self): | |
1820 | if self.dev_size is None: | |
1821 | self.dev_size = get_dev_size(self.path) | |
1822 | return self.dev_size | |
1823 | ||
1824 | @staticmethod | |
1825 | def factory(path, args): | |
1826 | return Device(path, args) | |
1827 | ||
1828 | ||
1829 | class DevicePartition(object): | |
1830 | ||
1831 | def __init__(self, args): | |
1832 | self.args = args | |
1833 | self.num = None | |
1834 | self.rawdev = None | |
1835 | self.dev = None | |
1836 | self.uuid = None | |
1837 | self.ptype_map = None | |
1838 | self.ptype = None | |
1839 | self.set_variables_ptype() | |
1840 | ||
1841 | def get_uuid(self): | |
1842 | if self.uuid is None: | |
1843 | self.uuid = get_partition_uuid(self.rawdev) | |
1844 | return self.uuid | |
1845 | ||
1846 | def get_ptype(self): | |
1847 | if self.ptype is None: | |
1848 | self.ptype = get_partition_type(self.rawdev) | |
1849 | return self.ptype | |
1850 | ||
1851 | def set_partition_number(self, num): | |
1852 | self.num = num | |
1853 | ||
1854 | def get_partition_number(self): | |
1855 | return self.num | |
1856 | ||
1857 | def set_dev(self, dev): | |
1858 | self.dev = dev | |
1859 | self.rawdev = dev | |
1860 | ||
1861 | def get_dev(self): | |
1862 | return self.dev | |
1863 | ||
1864 | def get_rawdev(self): | |
1865 | return self.rawdev | |
1866 | ||
1867 | def set_variables_ptype(self): | |
1868 | self.ptype_map = PTYPE['regular'] | |
1869 | ||
1870 | def ptype_for_name(self, name): | |
1871 | return self.ptype_map[name]['ready'] | |
1872 | ||
1873 | @staticmethod | |
3efd9988 | 1874 | @retry(OSError) |
7c673cae FG |
1875 | def factory(path, dev, args): |
1876 | dmcrypt_type = CryptHelpers.get_dmcrypt_type(args) | |
1877 | if ((path is not None and is_mpath(path)) or | |
1878 | (dev is not None and is_mpath(dev))): | |
1879 | partition = DevicePartitionMultipath(args) | |
1880 | elif dmcrypt_type == 'luks': | |
1881 | partition = DevicePartitionCryptLuks(args) | |
1882 | elif dmcrypt_type == 'plain': | |
1883 | partition = DevicePartitionCryptPlain(args) | |
1884 | else: | |
1885 | partition = DevicePartition(args) | |
1886 | partition.set_dev(dev) | |
1887 | return partition | |
1888 | ||
1889 | ||
1890 | class DevicePartitionMultipath(DevicePartition): | |
1891 | ||
1892 | def set_variables_ptype(self): | |
1893 | self.ptype_map = PTYPE['mpath'] | |
1894 | ||
1895 | ||
1896 | class DevicePartitionCrypt(DevicePartition): | |
1897 | ||
1898 | def __init__(self, args): | |
1899 | super(DevicePartitionCrypt, self).__init__(args) | |
1900 | self.osd_dm_key = None | |
1901 | self.cryptsetup_parameters = CryptHelpers.get_cryptsetup_parameters( | |
1902 | self.args) | |
1903 | self.dmcrypt_type = CryptHelpers.get_dmcrypt_type(self.args) | |
1904 | self.dmcrypt_keysize = CryptHelpers.get_dmcrypt_keysize(self.args) | |
1905 | ||
1906 | def setup_crypt(self): | |
1907 | pass | |
1908 | ||
1909 | def map(self): | |
1910 | self.setup_crypt() | |
1911 | self.dev = _dmcrypt_map( | |
1912 | rawdev=self.rawdev, | |
1913 | key=self.osd_dm_key, | |
1914 | _uuid=self.get_uuid(), | |
1915 | cryptsetup_parameters=self.cryptsetup_parameters, | |
1916 | luks=self.luks(), | |
1917 | format_dev=True, | |
1918 | ) | |
1919 | ||
1920 | def unmap(self): | |
1921 | self.setup_crypt() | |
1922 | dmcrypt_unmap(self.get_uuid()) | |
1923 | self.dev = self.rawdev | |
1924 | ||
1925 | def format(self): | |
1926 | self.setup_crypt() | |
1927 | self.map() | |
1928 | ||
1929 | ||
1930 | class DevicePartitionCryptPlain(DevicePartitionCrypt): | |
1931 | ||
1932 | def luks(self): | |
1933 | return False | |
1934 | ||
1935 | def setup_crypt(self): | |
1936 | if self.osd_dm_key is not None: | |
1937 | return | |
1938 | ||
1939 | self.cryptsetup_parameters += ['--key-size', str(self.dmcrypt_keysize)] | |
1940 | ||
1941 | self.osd_dm_key = get_dmcrypt_key( | |
1942 | self.get_uuid(), self.args.dmcrypt_key_dir, | |
1943 | False) | |
1944 | ||
1945 | def set_variables_ptype(self): | |
1946 | self.ptype_map = PTYPE['plain'] | |
1947 | ||
1948 | ||
1949 | class DevicePartitionCryptLuks(DevicePartitionCrypt): | |
1950 | ||
1951 | def luks(self): | |
1952 | return True | |
1953 | ||
1954 | def setup_crypt(self): | |
1955 | if self.osd_dm_key is not None: | |
1956 | return | |
1957 | ||
1958 | if self.dmcrypt_keysize == 1024: | |
1959 | # We don't force this into the cryptsetup_parameters, | |
1960 | # as we want the cryptsetup defaults | |
1961 | # to prevail for the actual LUKS key lengths. | |
1962 | pass | |
1963 | else: | |
1964 | self.cryptsetup_parameters += ['--key-size', | |
1965 | str(self.dmcrypt_keysize)] | |
1966 | ||
1967 | self.osd_dm_key = get_dmcrypt_key( | |
1968 | self.get_uuid(), self.args.dmcrypt_key_dir, | |
1969 | True) | |
1970 | ||
1971 | def set_variables_ptype(self): | |
1972 | self.ptype_map = PTYPE['luks'] | |
1973 | ||
1974 | ||
1975 | class Prepare(object): | |
1976 | ||
1977 | def __init__(self, args): | |
1978 | self.args = args | |
1979 | ||
1980 | @staticmethod | |
1981 | def parser(): | |
1982 | parser = argparse.ArgumentParser(add_help=False) | |
1983 | parser.add_argument( | |
1984 | '--cluster', | |
1985 | metavar='NAME', | |
1986 | default='ceph', | |
1987 | help='cluster name to assign this disk to', | |
1988 | ) | |
1989 | parser.add_argument( | |
1990 | '--cluster-uuid', | |
1991 | metavar='UUID', | |
1992 | help='cluster uuid to assign this disk to', | |
1993 | ) | |
1994 | parser.add_argument( | |
1995 | '--osd-uuid', | |
1996 | metavar='UUID', | |
1997 | help='unique OSD uuid to assign this disk to', | |
1998 | ) | |
c07f9fc5 FG |
1999 | parser.add_argument( |
2000 | '--osd-id', | |
2001 | metavar='ID', | |
2002 | help='unique OSD id to assign this disk to', | |
2003 | ) | |
7c673cae FG |
2004 | parser.add_argument( |
2005 | '--crush-device-class', | |
2006 | help='crush device class to assign this disk to', | |
2007 | ) | |
2008 | parser.add_argument( | |
2009 | '--dmcrypt', | |
2010 | action='store_true', default=None, | |
2011 | help='encrypt DATA and/or JOURNAL devices with dm-crypt', | |
2012 | ) | |
2013 | parser.add_argument( | |
2014 | '--dmcrypt-key-dir', | |
2015 | metavar='KEYDIR', | |
2016 | default='/etc/ceph/dmcrypt-keys', | |
2017 | help='directory where dm-crypt keys are stored', | |
2018 | ) | |
2019 | parser.add_argument( | |
2020 | '--prepare-key', | |
2021 | metavar='PATH', | |
2022 | help='bootstrap-osd keyring path template (%(default)s)', | |
2023 | default='{statedir}/bootstrap-osd/{cluster}.keyring', | |
2024 | dest='prepare_key_template', | |
2025 | ) | |
2026 | parser.add_argument( | |
2027 | '--no-locking', | |
2028 | action='store_true', default=None, | |
2029 | help='let many prepare\'s run in parallel', | |
2030 | ) | |
2031 | return parser | |
2032 | ||
2033 | @staticmethod | |
2034 | def set_subparser(subparsers): | |
2035 | parents = [ | |
2036 | Prepare.parser(), | |
2037 | PrepareData.parser(), | |
2038 | Lockbox.parser(), | |
2039 | ] | |
2040 | parents.extend(PrepareFilestore.parent_parsers()) | |
2041 | parents.extend(PrepareBluestore.parent_parsers()) | |
2042 | parser = subparsers.add_parser( | |
2043 | 'prepare', | |
2044 | parents=parents, | |
2045 | formatter_class=argparse.RawDescriptionHelpFormatter, | |
2046 | description=textwrap.fill(textwrap.dedent("""\ | |
2047 | If the --bluestore argument is given, a bluestore objectstore | |
31f18b77 FG |
2048 | will be created. If --filestore is provided, a legacy FileStore |
2049 | objectstore will be created. If neither is specified, we default | |
2050 | to BlueStore. | |
7c673cae FG |
2051 | |
2052 | When an entire device is prepared for bluestore, two | |
2053 | partitions are created. The first partition is for metadata, | |
2054 | the second partition is for blocks that contain data. | |
2055 | ||
2056 | Unless explicitly specified with --block.db or | |
2057 | --block.wal, the bluestore DB and WAL data is stored on | |
2058 | the main block device. For instance: | |
2059 | ||
2060 | ceph-disk prepare --bluestore /dev/sdc | |
2061 | ||
2062 | Will create | |
2063 | ||
2064 | /dev/sdc1 for osd metadata | |
2065 | /dev/sdc2 for block, db, and wal data (the rest of the disk) | |
2066 | ||
2067 | ||
2068 | If either --block.db or --block.wal are specified to be | |
2069 | the same whole device, they will be created as partition | |
2070 | three and four respectively. For instance: | |
2071 | ||
2072 | ceph-disk prepare --bluestore \\ | |
2073 | --block.db /dev/sdc \\ | |
2074 | --block.wal /dev/sdc \\ | |
2075 | /dev/sdc | |
2076 | ||
2077 | Will create | |
2078 | ||
2079 | /dev/sdc1 for osd metadata | |
2080 | /dev/sdc2 for block (the rest of the disk) | |
2081 | /dev/sdc3 for db | |
2082 | /dev/sdc4 for wal | |
2083 | ||
2084 | """)), | |
2085 | help='Prepare a directory or disk for a Ceph OSD', | |
2086 | ) | |
2087 | parser.set_defaults( | |
2088 | func=Prepare.main, | |
2089 | ) | |
2090 | return parser | |
2091 | ||
2092 | def prepare(self): | |
2093 | if self.args.no_locking: | |
2094 | self._prepare() | |
2095 | else: | |
2096 | with prepare_lock: | |
2097 | self._prepare() | |
2098 | ||
2099 | @staticmethod | |
2100 | def factory(args): | |
2101 | if args.bluestore: | |
2102 | return PrepareBluestore(args) | |
2103 | else: | |
2104 | return PrepareFilestore(args) | |
2105 | ||
2106 | @staticmethod | |
2107 | def main(args): | |
2108 | Prepare.factory(args).prepare() | |
2109 | ||
2110 | ||
2111 | class PrepareFilestore(Prepare): | |
2112 | ||
2113 | def __init__(self, args): | |
2114 | super(PrepareFilestore, self).__init__(args) | |
2115 | if args.dmcrypt: | |
2116 | self.lockbox = Lockbox(args) | |
2117 | self.data = PrepareFilestoreData(args) | |
2118 | self.journal = PrepareJournal(args) | |
2119 | ||
2120 | @staticmethod | |
2121 | def parent_parsers(): | |
2122 | return [ | |
2123 | PrepareJournal.parser(), | |
2124 | ] | |
2125 | ||
2126 | def _prepare(self): | |
2127 | if self.data.args.dmcrypt: | |
2128 | self.lockbox.prepare() | |
2129 | self.data.prepare(self.journal) | |
2130 | ||
2131 | ||
2132 | class PrepareBluestore(Prepare): | |
2133 | ||
2134 | def __init__(self, args): | |
2135 | super(PrepareBluestore, self).__init__(args) | |
2136 | if args.dmcrypt: | |
2137 | self.lockbox = Lockbox(args) | |
2138 | self.data = PrepareBluestoreData(args) | |
2139 | self.block = PrepareBluestoreBlock(args) | |
2140 | self.blockdb = PrepareBluestoreBlockDB(args) | |
2141 | self.blockwal = PrepareBluestoreBlockWAL(args) | |
2142 | ||
2143 | @staticmethod | |
2144 | def parser(): | |
2145 | parser = argparse.ArgumentParser(add_help=False) | |
2146 | parser.add_argument( | |
2147 | '--bluestore', | |
31f18b77 FG |
2148 | dest='bluestore', |
2149 | action='store_true', default=True, | |
7c673cae FG |
2150 | help='bluestore objectstore', |
2151 | ) | |
31f18b77 FG |
2152 | parser.add_argument( |
2153 | '--filestore', | |
2154 | dest='bluestore', | |
2155 | action='store_false', | |
2156 | help='filestore objectstore', | |
2157 | ) | |
7c673cae FG |
2158 | return parser |
2159 | ||
2160 | @staticmethod | |
2161 | def parent_parsers(): | |
2162 | return [ | |
2163 | PrepareBluestore.parser(), | |
2164 | PrepareBluestoreBlock.parser(), | |
2165 | PrepareBluestoreBlockDB.parser(), | |
2166 | PrepareBluestoreBlockWAL.parser(), | |
2167 | ] | |
2168 | ||
2169 | def _prepare(self): | |
2170 | if self.data.args.dmcrypt: | |
2171 | self.lockbox.prepare() | |
2172 | to_prepare_list = [] | |
2173 | if getattr(self.data.args, 'block.db'): | |
2174 | to_prepare_list.append(self.blockdb) | |
2175 | if getattr(self.data.args, 'block.wal'): | |
2176 | to_prepare_list.append(self.blockwal) | |
2177 | to_prepare_list.append(self.block) | |
2178 | self.data.prepare(*to_prepare_list) | |
2179 | ||
2180 | ||
2181 | class Space(object): | |
2182 | ||
2183 | NAMES = ('block', 'journal', 'block.db', 'block.wal') | |
2184 | ||
2185 | ||
2186 | class PrepareSpace(object): | |
2187 | ||
2188 | NONE = 0 | |
2189 | FILE = 1 | |
2190 | DEVICE = 2 | |
2191 | ||
2192 | def __init__(self, args): | |
2193 | self.args = args | |
2194 | self.set_type() | |
2195 | self.space_size = self.get_space_size() | |
2196 | if getattr(self.args, self.name + '_uuid') is None: | |
2197 | setattr(self.args, self.name + '_uuid', str(uuid.uuid4())) | |
2198 | self.space_symlink = None | |
2199 | self.space_dmcrypt = None | |
2200 | ||
2201 | def set_type(self): | |
2202 | name = self.name | |
2203 | args = self.args | |
7c673cae | 2204 | if (self.wants_space() and |
224ce89b | 2205 | dev_is_diskdevice(args.data) and |
7c673cae FG |
2206 | not is_partition(args.data) and |
2207 | getattr(args, name) is None and | |
2208 | getattr(args, name + '_file') is None): | |
2209 | LOG.info('Will colocate %s with data on %s', | |
2210 | name, args.data) | |
2211 | setattr(args, name, args.data) | |
2212 | ||
2213 | if getattr(args, name) is None: | |
2214 | if getattr(args, name + '_dev'): | |
2215 | raise Error('%s is unspecified; not a block device' % | |
2216 | name.capitalize(), getattr(args, name)) | |
2217 | self.type = self.NONE | |
2218 | return | |
2219 | ||
2220 | if not os.path.exists(getattr(args, name)): | |
2221 | if getattr(args, name + '_dev'): | |
2222 | raise Error('%s does not exist; not a block device' % | |
2223 | name.capitalize(), getattr(args, name)) | |
2224 | self.type = self.FILE | |
2225 | return | |
2226 | ||
2227 | mode = os.stat(getattr(args, name)).st_mode | |
224ce89b | 2228 | if stmode_is_diskdevice(mode): |
7c673cae FG |
2229 | if getattr(args, name + '_file'): |
2230 | raise Error('%s is not a regular file' % name.capitalize, | |
2231 | getattr(args, name)) | |
2232 | self.type = self.DEVICE | |
2233 | return | |
2234 | ||
2235 | if stat.S_ISREG(mode): | |
2236 | if getattr(args, name + '_dev'): | |
2237 | raise Error('%s is not a block device' % name.capitalize, | |
2238 | getattr(args, name)) | |
2239 | self.type = self.FILE | |
2240 | return | |
2241 | ||
2242 | raise Error('%s %s is neither a block device nor regular file' % | |
2243 | (name.capitalize, getattr(args, name))) | |
2244 | ||
2245 | def is_none(self): | |
2246 | return self.type == self.NONE | |
2247 | ||
2248 | def is_file(self): | |
2249 | return self.type == self.FILE | |
2250 | ||
2251 | def is_device(self): | |
2252 | return self.type == self.DEVICE | |
2253 | ||
2254 | @staticmethod | |
2255 | def parser(name, positional=True): | |
2256 | parser = argparse.ArgumentParser(add_help=False) | |
2257 | parser.add_argument( | |
2258 | '--%s-uuid' % name, | |
2259 | metavar='UUID', | |
2260 | help='unique uuid to assign to the %s' % name, | |
2261 | ) | |
2262 | parser.add_argument( | |
2263 | '--%s-file' % name, | |
2264 | action='store_true', default=None, | |
2265 | help='verify that %s is a file' % name.upper(), | |
2266 | ) | |
2267 | parser.add_argument( | |
2268 | '--%s-dev' % name, | |
2269 | action='store_true', default=None, | |
2270 | help='verify that %s is a block device' % name.upper(), | |
2271 | ) | |
2272 | ||
2273 | if positional: | |
2274 | parser.add_argument( | |
2275 | name, | |
2276 | metavar=name.upper(), | |
2277 | nargs='?', | |
2278 | help=('path to OSD %s disk block device;' % name + | |
2279 | ' leave out to store %s in file' % name), | |
2280 | ) | |
2281 | return parser | |
2282 | ||
2283 | def wants_space(self): | |
2284 | return True | |
2285 | ||
2286 | def populate_data_path(self, path): | |
2287 | if self.type == self.DEVICE: | |
2288 | self.populate_data_path_device(path) | |
2289 | elif self.type == self.FILE: | |
2290 | self.populate_data_path_file(path) | |
2291 | elif self.type == self.NONE: | |
2292 | pass | |
2293 | else: | |
2294 | raise Error('unexpected type ', self.type) | |
2295 | ||
2296 | def populate_data_path_file(self, path): | |
2297 | space_uuid = self.name + '_uuid' | |
2298 | if getattr(self.args, space_uuid) is not None: | |
2299 | write_one_line(path, space_uuid, | |
2300 | getattr(self.args, space_uuid)) | |
2301 | if self.space_symlink is not None: | |
2302 | adjust_symlink(self.space_symlink, | |
2303 | os.path.join(path, self.name)) | |
2304 | ||
2305 | def populate_data_path_device(self, path): | |
2306 | self.populate_data_path_file(path) | |
2307 | ||
2308 | if self.space_dmcrypt is not None: | |
2309 | adjust_symlink(self.space_dmcrypt, | |
2310 | os.path.join(path, self.name + '_dmcrypt')) | |
2311 | else: | |
2312 | try: | |
2313 | os.unlink(os.path.join(path, self.name + '_dmcrypt')) | |
2314 | except OSError: | |
2315 | pass | |
2316 | ||
2317 | def prepare(self): | |
2318 | if self.type == self.DEVICE: | |
2319 | self.prepare_device() | |
2320 | elif self.type == self.FILE: | |
2321 | self.prepare_file() | |
2322 | elif self.type == self.NONE: | |
2323 | pass | |
2324 | else: | |
2325 | raise Error('unexpected type ', self.type) | |
2326 | ||
2327 | def prepare_file(self): | |
2328 | space_filename = getattr(self.args, self.name) | |
2329 | if not os.path.exists(space_filename): | |
2330 | LOG.debug('Creating %s file %s with size 0' | |
2331 | ' (ceph-osd will resize and allocate)', | |
2332 | self.name, | |
2333 | space_filename) | |
2334 | space_file = open(space_filename, 'wb') | |
2335 | space_file.close() | |
2336 | path_set_context(space_filename) | |
2337 | ||
2338 | LOG.debug('%s is file %s', | |
2339 | self.name.capitalize(), | |
2340 | space_filename) | |
2341 | LOG.warning('OSD will not be hot-swappable if %s is ' | |
2342 | 'not the same device as the osd data' % | |
2343 | self.name) | |
2344 | self.space_symlink = space_filename | |
2345 | ||
2346 | def prepare_device(self): | |
2347 | reusing_partition = False | |
2348 | ||
2349 | if is_partition(getattr(self.args, self.name)): | |
2350 | LOG.debug('%s %s is a partition', | |
2351 | self.name.capitalize(), getattr(self.args, self.name)) | |
2352 | partition = DevicePartition.factory( | |
2353 | path=None, dev=getattr(self.args, self.name), args=self.args) | |
2354 | if isinstance(partition, DevicePartitionCrypt): | |
2355 | raise Error(getattr(self.args, self.name) + | |
2356 | ' partition already exists' | |
2357 | ' and --dmcrypt specified') | |
2358 | LOG.warning('OSD will not be hot-swappable' + | |
2359 | ' if ' + self.name + ' is not' + | |
2360 | ' the same device as the osd data') | |
2361 | if partition.get_ptype() == partition.ptype_for_name(self.name): | |
2362 | LOG.debug('%s %s was previously prepared with ' | |
2363 | 'ceph-disk. Reusing it.', | |
2364 | self.name.capitalize(), | |
2365 | getattr(self.args, self.name)) | |
2366 | reusing_partition = True | |
2367 | # Read and reuse the partition uuid from this journal's | |
2368 | # previous life. We reuse the uuid instead of changing it | |
2369 | # because udev does not reliably notice changes to an | |
2370 | # existing partition's GUID. See | |
2371 | # http://tracker.ceph.com/issues/10146 | |
2372 | setattr(self.args, self.name + '_uuid', partition.get_uuid()) | |
2373 | LOG.debug('Reusing %s with uuid %s', | |
2374 | self.name, | |
2375 | getattr(self.args, self.name + '_uuid')) | |
2376 | else: | |
2377 | LOG.warning('%s %s was not prepared with ' | |
2378 | 'ceph-disk. Symlinking directly.', | |
2379 | self.name.capitalize(), | |
2380 | getattr(self.args, self.name)) | |
2381 | self.space_symlink = getattr(self.args, self.name) | |
2382 | return | |
2383 | ||
2384 | self.space_symlink = '/dev/disk/by-partuuid/{uuid}'.format( | |
2385 | uuid=getattr(self.args, self.name + '_uuid')) | |
2386 | ||
2387 | if self.args.dmcrypt: | |
2388 | self.space_dmcrypt = self.space_symlink | |
2389 | self.space_symlink = '/dev/mapper/{uuid}'.format( | |
2390 | uuid=getattr(self.args, self.name + '_uuid')) | |
2391 | ||
2392 | if reusing_partition: | |
2393 | # confirm that the space_symlink exists. It should since | |
2394 | # this was an active space | |
2395 | # in the past. Continuing otherwise would be futile. | |
2396 | assert os.path.exists(self.space_symlink) | |
2397 | return | |
2398 | ||
2399 | num = self.desired_partition_number() | |
2400 | ||
2401 | if num == 0: | |
2402 | LOG.warning('OSD will not be hot-swappable if %s ' | |
2403 | 'is not the same device as the osd data', | |
2404 | self.name) | |
2405 | ||
2406 | device = Device.factory(getattr(self.args, self.name), self.args) | |
2407 | num = device.create_partition( | |
2408 | uuid=getattr(self.args, self.name + '_uuid'), | |
2409 | name=self.name, | |
2410 | size=self.space_size, | |
2411 | num=num) | |
2412 | ||
2413 | partition = device.get_partition(num) | |
2414 | ||
2415 | LOG.debug('%s is GPT partition %s', | |
2416 | self.name.capitalize(), | |
2417 | self.space_symlink) | |
2418 | ||
2419 | if isinstance(partition, DevicePartitionCrypt): | |
2420 | partition.format() | |
2421 | partition.map() | |
2422 | ||
2423 | command_check_call( | |
2424 | [ | |
2425 | 'sgdisk', | |
2426 | '--typecode={num}:{uuid}'.format( | |
2427 | num=num, | |
2428 | uuid=partition.ptype_for_name(self.name), | |
2429 | ), | |
2430 | '--', | |
2431 | getattr(self.args, self.name), | |
2432 | ], | |
2433 | ) | |
2434 | update_partition(getattr(self.args, self.name), 'prepared') | |
2435 | ||
2436 | LOG.debug('%s is GPT partition %s', | |
2437 | self.name.capitalize(), | |
2438 | self.space_symlink) | |
2439 | ||
2440 | ||
2441 | class PrepareJournal(PrepareSpace): | |
2442 | ||
2443 | def __init__(self, args): | |
2444 | self.name = 'journal' | |
2445 | (self.allows_journal, | |
2446 | self.wants_journal, | |
2447 | self.needs_journal) = check_journal_reqs(args) | |
2448 | ||
2449 | if args.journal and not self.allows_journal: | |
2450 | raise Error('journal specified but not allowed by osd backend') | |
2451 | ||
2452 | super(PrepareJournal, self).__init__(args) | |
2453 | ||
2454 | def wants_space(self): | |
2455 | return self.wants_journal | |
2456 | ||
2457 | def get_space_size(self): | |
2458 | return int(get_conf_with_default( | |
2459 | cluster=self.args.cluster, | |
2460 | variable='osd_journal_size', | |
2461 | )) | |
2462 | ||
2463 | def desired_partition_number(self): | |
2464 | if self.args.journal == self.args.data: | |
2465 | # we're sharing the disk between osd data and journal; | |
2466 | # make journal be partition number 2 | |
2467 | num = 2 | |
2468 | else: | |
2469 | num = 0 | |
2470 | return num | |
2471 | ||
2472 | @staticmethod | |
2473 | def parser(): | |
2474 | return PrepareSpace.parser('journal') | |
2475 | ||
2476 | ||
2477 | class PrepareBluestoreBlock(PrepareSpace): | |
2478 | ||
2479 | def __init__(self, args): | |
2480 | self.name = 'block' | |
2481 | super(PrepareBluestoreBlock, self).__init__(args) | |
2482 | ||
2483 | def get_space_size(self): | |
2484 | block_size = get_conf( | |
2485 | cluster=self.args.cluster, | |
2486 | variable='bluestore_block_size', | |
2487 | ) | |
2488 | ||
2489 | if block_size is None: | |
2490 | return 0 # get as much space as possible | |
2491 | else: | |
2492 | return int(block_size) / 1048576 # MB | |
2493 | ||
2494 | def desired_partition_number(self): | |
2495 | if self.args.block == self.args.data: | |
2496 | num = 2 | |
2497 | else: | |
2498 | num = 0 | |
2499 | return num | |
2500 | ||
2501 | @staticmethod | |
2502 | def parser(): | |
2503 | return PrepareSpace.parser('block') | |
2504 | ||
2505 | ||
2506 | class PrepareBluestoreBlockDB(PrepareSpace): | |
2507 | ||
2508 | def __init__(self, args): | |
2509 | self.name = 'block.db' | |
2510 | super(PrepareBluestoreBlockDB, self).__init__(args) | |
2511 | ||
2512 | def get_space_size(self): | |
31f18b77 | 2513 | block_db_size = get_conf( |
7c673cae FG |
2514 | cluster=self.args.cluster, |
2515 | variable='bluestore_block_db_size', | |
2516 | ) | |
2517 | ||
31f18b77 FG |
2518 | if block_db_size is None or int(block_db_size) == 0: |
2519 | block_size = get_conf( | |
2520 | cluster=self.args.cluster, | |
2521 | variable='bluestore_block_size', | |
2522 | ) | |
2523 | if block_size is None: | |
2524 | return 1024 # MB | |
2525 | size = int(block_size) / 100 / 1048576 | |
2526 | return max(size, 1024) # MB | |
7c673cae | 2527 | else: |
31f18b77 | 2528 | return int(block_db_size) / 1048576 # MB |
7c673cae FG |
2529 | |
2530 | def desired_partition_number(self): | |
2531 | if getattr(self.args, 'block.db') == self.args.data: | |
2532 | num = 3 | |
2533 | else: | |
2534 | num = 0 | |
2535 | return num | |
2536 | ||
2537 | def wants_space(self): | |
2538 | return False | |
2539 | ||
2540 | @staticmethod | |
2541 | def parser(): | |
2542 | parser = PrepareSpace.parser('block.db', positional=False) | |
2543 | parser.add_argument( | |
2544 | '--block.db', | |
2545 | metavar='BLOCKDB', | |
2546 | help='path to the device or file for bluestore block.db', | |
2547 | ) | |
2548 | return parser | |
2549 | ||
2550 | ||
2551 | class PrepareBluestoreBlockWAL(PrepareSpace): | |
2552 | ||
2553 | def __init__(self, args): | |
2554 | self.name = 'block.wal' | |
2555 | super(PrepareBluestoreBlockWAL, self).__init__(args) | |
2556 | ||
2557 | def get_space_size(self): | |
2558 | block_size = get_conf( | |
2559 | cluster=self.args.cluster, | |
2560 | variable='bluestore_block_wal_size', | |
2561 | ) | |
2562 | ||
2563 | if block_size is None: | |
2564 | return 576 # MB, default value | |
2565 | else: | |
2566 | return int(block_size) / 1048576 # MB | |
2567 | ||
2568 | def desired_partition_number(self): | |
2569 | if getattr(self.args, 'block.wal') == self.args.data: | |
2570 | num = 4 | |
2571 | else: | |
2572 | num = 0 | |
2573 | return num | |
2574 | ||
2575 | def wants_space(self): | |
2576 | return False | |
2577 | ||
2578 | @staticmethod | |
2579 | def parser(): | |
2580 | parser = PrepareSpace.parser('block.wal', positional=False) | |
2581 | parser.add_argument( | |
2582 | '--block.wal', | |
2583 | metavar='BLOCKWAL', | |
2584 | help='path to the device or file for bluestore block.wal', | |
2585 | ) | |
2586 | return parser | |
2587 | ||
2588 | ||
2589 | class CryptHelpers(object): | |
2590 | ||
2591 | @staticmethod | |
2592 | def get_cryptsetup_parameters(args): | |
2593 | cryptsetup_parameters_str = get_conf( | |
2594 | cluster=args.cluster, | |
2595 | variable='osd_cryptsetup_parameters', | |
2596 | ) | |
2597 | if cryptsetup_parameters_str is None: | |
2598 | return [] | |
2599 | else: | |
2600 | return shlex.split(cryptsetup_parameters_str) | |
2601 | ||
2602 | @staticmethod | |
2603 | def get_dmcrypt_keysize(args): | |
2604 | dmcrypt_keysize_str = get_conf( | |
2605 | cluster=args.cluster, | |
2606 | variable='osd_dmcrypt_key_size', | |
2607 | ) | |
2608 | dmcrypt_type = CryptHelpers.get_dmcrypt_type(args) | |
2609 | if dmcrypt_type == 'luks': | |
2610 | if dmcrypt_keysize_str is None: | |
2611 | # As LUKS will hash the 'passphrase' in .luks.key | |
2612 | # into a key, set a large default | |
2613 | # so if not updated for some time, it is still a | |
2614 | # reasonable value. | |
2615 | # | |
2616 | return 1024 | |
2617 | else: | |
2618 | return int(dmcrypt_keysize_str) | |
2619 | elif dmcrypt_type == 'plain': | |
2620 | if dmcrypt_keysize_str is None: | |
2621 | # This value is hard-coded in the udev script | |
2622 | return 256 | |
2623 | else: | |
2624 | LOG.warning('ensure the 95-ceph-osd.rules file has ' | |
2625 | 'been copied to /etc/udev/rules.d ' | |
2626 | 'and modified to call cryptsetup ' | |
2627 | 'with --key-size=%s' % dmcrypt_keysize_str) | |
2628 | return int(dmcrypt_keysize_str) | |
2629 | else: | |
2630 | return 0 | |
2631 | ||
2632 | @staticmethod | |
2633 | def get_dmcrypt_type(args): | |
2634 | if hasattr(args, 'dmcrypt') and args.dmcrypt: | |
2635 | dmcrypt_type = get_conf( | |
2636 | cluster=args.cluster, | |
2637 | variable='osd_dmcrypt_type', | |
2638 | ) | |
2639 | ||
2640 | if dmcrypt_type is None or dmcrypt_type == 'luks': | |
2641 | return 'luks' | |
2642 | elif dmcrypt_type == 'plain': | |
2643 | return 'plain' | |
2644 | else: | |
2645 | raise Error('invalid osd_dmcrypt_type parameter ' | |
2646 | '(must be luks or plain): ', dmcrypt_type) | |
2647 | else: | |
2648 | return None | |
2649 | ||
2650 | ||
c07f9fc5 FG |
2651 | class Secrets(object): |
2652 | ||
2653 | def __init__(self): | |
2654 | secret, stderr, ret = command(['ceph-authtool', '--gen-print-key']) | |
2655 | LOG.debug("stderr " + stderr) | |
2656 | assert ret == 0 | |
2657 | self.keys = { | |
2658 | 'cephx_secret': secret.strip(), | |
2659 | } | |
2660 | ||
2661 | def write_osd_keyring(self, keyring, osd_id): | |
2662 | command_check_call( | |
2663 | [ | |
2664 | 'ceph-authtool', keyring, | |
2665 | '--create-keyring', | |
2666 | '--name', 'osd.' + str(osd_id), | |
2667 | '--add-key', self.keys['cephx_secret'], | |
2668 | ]) | |
2669 | path_set_context(keyring) | |
2670 | ||
2671 | def get_json(self): | |
2672 | return bytearray(json.dumps(self.keys), 'ascii') | |
2673 | ||
2674 | ||
2675 | class LockboxSecrets(Secrets): | |
2676 | ||
2677 | def __init__(self, args): | |
2678 | super(LockboxSecrets, self).__init__() | |
2679 | ||
2680 | key_size = CryptHelpers.get_dmcrypt_keysize(args) | |
2681 | key = open('/dev/urandom', 'rb').read(key_size / 8) | |
2682 | base64_key = base64.b64encode(key).decode('ascii') | |
2683 | ||
2684 | secret, stderr, ret = command(['ceph-authtool', '--gen-print-key']) | |
2685 | LOG.debug("stderr " + stderr) | |
2686 | assert ret == 0 | |
2687 | ||
2688 | self.keys.update({ | |
2689 | 'dmcrypt_key': base64_key, | |
2690 | 'cephx_lockbox_secret': secret.strip(), | |
2691 | }) | |
2692 | ||
2693 | def write_lockbox_keyring(self, path, osd_uuid): | |
2694 | keyring = os.path.join(path, 'keyring') | |
2695 | command_check_call( | |
2696 | [ | |
2697 | 'ceph-authtool', keyring, | |
2698 | '--create-keyring', | |
2699 | '--name', 'client.osd-lockbox.' + osd_uuid, | |
2700 | '--add-key', self.keys['cephx_lockbox_secret'], | |
2701 | ]) | |
2702 | path_set_context(keyring) | |
2703 | ||
2704 | ||
7c673cae FG |
2705 | class Lockbox(object): |
2706 | ||
2707 | def __init__(self, args): | |
2708 | self.args = args | |
2709 | self.partition = None | |
2710 | self.device = None | |
2711 | ||
2712 | if hasattr(self.args, 'lockbox') and self.args.lockbox is None: | |
2713 | self.args.lockbox = self.args.data | |
2714 | ||
2715 | def set_partition(self, partition): | |
2716 | self.partition = partition | |
2717 | ||
2718 | @staticmethod | |
2719 | def parser(): | |
2720 | parser = argparse.ArgumentParser(add_help=False) | |
2721 | parser.add_argument( | |
2722 | '--lockbox', | |
2723 | help='path to the device to store the lockbox', | |
2724 | ) | |
2725 | parser.add_argument( | |
2726 | '--lockbox-uuid', | |
2727 | metavar='UUID', | |
2728 | help='unique lockbox uuid', | |
2729 | ) | |
2730 | return parser | |
2731 | ||
2732 | def create_partition(self): | |
2733 | self.device = Device.factory(self.args.lockbox, argparse.Namespace()) | |
c07f9fc5 | 2734 | partition_number = 5 |
7c673cae FG |
2735 | self.device.create_partition(uuid=self.args.lockbox_uuid, |
2736 | name='lockbox', | |
2737 | num=partition_number, | |
2738 | size=10) # MB | |
2739 | return self.device.get_partition(partition_number) | |
2740 | ||
2741 | def set_or_create_partition(self): | |
2742 | if is_partition(self.args.lockbox): | |
2743 | LOG.debug('OSD lockbox device %s is a partition', | |
2744 | self.args.lockbox) | |
2745 | self.partition = DevicePartition.factory( | |
2746 | path=None, dev=self.args.lockbox, args=self.args) | |
2747 | ptype = self.partition.get_ptype() | |
2748 | ready = Ptype.get_ready_by_name('lockbox') | |
2749 | if ptype not in ready: | |
2750 | LOG.warning('incorrect partition UUID: %s, expected %s' | |
2751 | % (ptype, str(ready))) | |
2752 | else: | |
2753 | LOG.debug('Creating osd partition on %s', | |
2754 | self.args.lockbox) | |
2755 | self.partition = self.create_partition() | |
2756 | ||
2757 | def create_key(self): | |
7c673cae FG |
2758 | cluster = self.args.cluster |
2759 | bootstrap = self.args.prepare_key_template.format(cluster=cluster, | |
2760 | statedir=STATEDIR) | |
c07f9fc5 FG |
2761 | path = self.get_mount_point() |
2762 | secrets = LockboxSecrets(self.args) | |
2763 | id_arg = self.args.osd_id and [self.args.osd_id] or [] | |
2764 | osd_id = command_with_stdin( | |
7c673cae FG |
2765 | [ |
2766 | 'ceph', | |
2767 | '--cluster', cluster, | |
2768 | '--name', 'client.bootstrap-osd', | |
2769 | '--keyring', bootstrap, | |
c07f9fc5 FG |
2770 | '-i', '-', |
2771 | 'osd', 'new', self.args.osd_uuid, | |
2772 | ] + id_arg, | |
2773 | secrets.get_json() | |
7c673cae | 2774 | ) |
c07f9fc5 FG |
2775 | secrets.write_lockbox_keyring(path, self.args.osd_uuid) |
2776 | osd_id = must_be_one_line(osd_id) | |
2777 | check_osd_id(osd_id) | |
2778 | write_one_line(path, 'whoami', osd_id) | |
2779 | secrets.write_osd_keyring(os.path.join(path, 'osd_keyring'), osd_id) | |
7c673cae FG |
2780 | write_one_line(path, 'key-management-mode', KEY_MANAGEMENT_MODE_V1) |
2781 | ||
2782 | def symlink_spaces(self, path): | |
2783 | target = self.get_mount_point() | |
2784 | for name in Space.NAMES: | |
2785 | if (hasattr(self.args, name + '_uuid') and | |
2786 | getattr(self.args, name + '_uuid')): | |
2787 | uuid = getattr(self.args, name + '_uuid') | |
2788 | symlink = os.path.join(STATEDIR, 'osd-lockbox', uuid) | |
2789 | adjust_symlink(target, symlink) | |
2790 | write_one_line(path, name + '-uuid', uuid) | |
2791 | ||
2792 | def populate(self): | |
2793 | maybe_mkdir(os.path.join(STATEDIR, 'osd-lockbox')) | |
2794 | args = ['mkfs', '-t', 'ext4', self.partition.get_dev()] | |
2795 | LOG.debug('Creating lockbox fs on %s: ' + str(" ".join(args))) | |
2796 | command_check_call(args) | |
2797 | path = self.get_mount_point() | |
2798 | maybe_mkdir(path) | |
2799 | args = ['mount', '-t', 'ext4', self.partition.get_dev(), path] | |
2800 | LOG.debug('Mounting lockbox ' + str(" ".join(args))) | |
2801 | command_check_call(args) | |
2802 | write_one_line(path, 'osd-uuid', self.args.osd_uuid) | |
2803 | if self.args.cluster_uuid is None: | |
2804 | self.args.cluster_uuid = get_fsid(cluster=self.args.cluster) | |
2805 | write_one_line(path, 'ceph_fsid', self.args.cluster_uuid) | |
2806 | self.create_key() | |
2807 | self.symlink_spaces(path) | |
2808 | write_one_line(path, 'magic', CEPH_LOCKBOX_ONDISK_MAGIC) | |
2809 | if self.device is not None: | |
2810 | command_check_call( | |
2811 | [ | |
2812 | 'sgdisk', | |
2813 | '--typecode={num}:{uuid}'.format( | |
2814 | num=self.partition.get_partition_number(), | |
2815 | uuid=self.partition.ptype_for_name('lockbox'), | |
2816 | ), | |
2817 | '--', | |
2818 | get_partition_base(self.partition.get_dev()), | |
2819 | ], | |
2820 | ) | |
2821 | ||
2822 | def get_mount_point(self): | |
2823 | return os.path.join(STATEDIR, 'osd-lockbox', self.args.osd_uuid) | |
2824 | ||
2825 | def get_osd_uuid(self): | |
2826 | return self.args.osd_uuid | |
2827 | ||
2828 | def activate(self): | |
2829 | path = is_mounted(self.partition.get_dev()) | |
2830 | if path: | |
2831 | LOG.info("Lockbox already mounted at " + path) | |
2832 | return | |
2833 | ||
2834 | path = tempfile.mkdtemp( | |
2835 | prefix='mnt.', | |
2836 | dir=STATEDIR + '/tmp', | |
2837 | ) | |
2838 | args = ['mount', '-t', 'ext4', '-o', 'ro', | |
2839 | self.partition.get_dev(), | |
2840 | path] | |
2841 | LOG.debug('Mounting lockbox temporarily ' + str(" ".join(args))) | |
2842 | command_check_call(args) | |
2843 | self.args.osd_uuid = get_oneliner(path, 'osd-uuid') | |
2844 | command_check_call(['umount', path]) | |
2845 | LOG.debug('Mounting lockbox readonly ' + str(" ".join(args))) | |
2846 | args = ['mount', '-t', 'ext4', '-o', 'ro', | |
2847 | self.partition.get_dev(), | |
2848 | self.get_mount_point()] | |
2849 | command_check_call(args) | |
2850 | for name in Space.NAMES + ('osd',): | |
2851 | uuid_path = os.path.join(self.get_mount_point(), name + '-uuid') | |
2852 | if os.path.exists(uuid_path): | |
2853 | uuid = get_oneliner(self.get_mount_point(), name + '-uuid') | |
2854 | dev = os.path.join('/dev/disk/by-partuuid/', uuid.lower()) | |
2855 | args = ['ceph-disk', 'trigger', dev] | |
2856 | command_check_call(args) | |
2857 | ||
2858 | def prepare(self): | |
2859 | verify_not_in_use(self.args.lockbox, check_partitions=True) | |
2860 | self.set_or_create_partition() | |
2861 | self.populate() | |
2862 | ||
2863 | ||
2864 | class PrepareData(object): | |
2865 | ||
2866 | FILE = 1 | |
2867 | DEVICE = 2 | |
2868 | ||
2869 | def __init__(self, args): | |
2870 | ||
2871 | self.args = args | |
2872 | self.partition = None | |
2873 | self.set_type() | |
2874 | if self.args.cluster_uuid is None: | |
2875 | self.args.cluster_uuid = get_fsid(cluster=self.args.cluster) | |
2876 | ||
2877 | if self.args.osd_uuid is None: | |
2878 | self.args.osd_uuid = str(uuid.uuid4()) | |
2879 | ||
2880 | def set_type(self): | |
2881 | dmode = os.stat(self.args.data).st_mode | |
2882 | ||
2883 | if stat.S_ISDIR(dmode): | |
2884 | self.type = self.FILE | |
224ce89b | 2885 | elif stmode_is_diskdevice(dmode): |
7c673cae FG |
2886 | self.type = self.DEVICE |
2887 | else: | |
2888 | raise Error('not a dir or block device', self.args.data) | |
2889 | ||
2890 | def is_file(self): | |
2891 | return self.type == self.FILE | |
2892 | ||
2893 | def is_device(self): | |
2894 | return self.type == self.DEVICE | |
2895 | ||
2896 | @staticmethod | |
2897 | def parser(): | |
2898 | parser = argparse.ArgumentParser(add_help=False) | |
2899 | parser.add_argument( | |
2900 | '--fs-type', | |
2901 | help='file system type to use (e.g. "ext4")', | |
2902 | ) | |
2903 | parser.add_argument( | |
2904 | '--zap-disk', | |
2905 | action='store_true', default=None, | |
2906 | help='destroy the partition table (and content) of a disk', | |
2907 | ) | |
2908 | parser.add_argument( | |
2909 | '--data-dir', | |
2910 | action='store_true', default=None, | |
2911 | help='verify that DATA is a dir', | |
2912 | ) | |
2913 | parser.add_argument( | |
2914 | '--data-dev', | |
2915 | action='store_true', default=None, | |
2916 | help='verify that DATA is a block device', | |
2917 | ) | |
2918 | parser.add_argument( | |
2919 | 'data', | |
2920 | metavar='DATA', | |
2921 | help='path to OSD data (a disk block device or directory)', | |
2922 | ) | |
2923 | return parser | |
2924 | ||
2925 | def populate_data_path_file(self, path, *to_prepare_list): | |
2926 | self.populate_data_path(path, *to_prepare_list) | |
2927 | ||
2928 | def populate_data_path(self, path, *to_prepare_list): | |
2929 | if os.path.exists(os.path.join(path, 'magic')): | |
2930 | LOG.debug('Data dir %s already exists', path) | |
2931 | return | |
2932 | else: | |
2933 | LOG.debug('Preparing osd data dir %s', path) | |
2934 | ||
2935 | if self.args.osd_uuid is None: | |
2936 | self.args.osd_uuid = str(uuid.uuid4()) | |
2937 | ||
2938 | write_one_line(path, 'ceph_fsid', self.args.cluster_uuid) | |
2939 | write_one_line(path, 'fsid', self.args.osd_uuid) | |
c07f9fc5 FG |
2940 | if self.args.osd_id: |
2941 | write_one_line(path, 'wanttobe', self.args.osd_id) | |
7c673cae FG |
2942 | if self.args.crush_device_class: |
2943 | write_one_line(path, 'crush_device_class', | |
2944 | self.args.crush_device_class) | |
2945 | write_one_line(path, 'magic', CEPH_OSD_ONDISK_MAGIC) | |
2946 | ||
2947 | for to_prepare in to_prepare_list: | |
2948 | to_prepare.populate_data_path(path) | |
2949 | ||
2950 | def prepare(self, *to_prepare_list): | |
2951 | if self.type == self.DEVICE: | |
2952 | self.prepare_device(*to_prepare_list) | |
2953 | elif self.type == self.FILE: | |
2954 | self.prepare_file(*to_prepare_list) | |
2955 | else: | |
2956 | raise Error('unexpected type ', self.type) | |
2957 | ||
2958 | def prepare_file(self, *to_prepare_list): | |
2959 | ||
2960 | if not os.path.exists(self.args.data): | |
2961 | raise Error('data path for directory does not exist', | |
2962 | self.args.data) | |
2963 | ||
2964 | if self.args.data_dev: | |
2965 | raise Error('data path is not a block device', self.args.data) | |
2966 | ||
2967 | for to_prepare in to_prepare_list: | |
2968 | to_prepare.prepare() | |
2969 | ||
2970 | self.populate_data_path_file(self.args.data, *to_prepare_list) | |
2971 | ||
2972 | def sanity_checks(self): | |
2973 | if not os.path.exists(self.args.data): | |
2974 | raise Error('data path for device does not exist', | |
2975 | self.args.data) | |
2976 | verify_not_in_use(self.args.data, | |
2977 | check_partitions=not self.args.dmcrypt) | |
2978 | ||
2979 | def set_variables(self): | |
2980 | if self.args.fs_type is None: | |
2981 | self.args.fs_type = get_conf( | |
2982 | cluster=self.args.cluster, | |
2983 | variable='osd_mkfs_type', | |
2984 | ) | |
2985 | if self.args.fs_type is None: | |
2986 | self.args.fs_type = get_conf( | |
2987 | cluster=self.args.cluster, | |
2988 | variable='osd_fs_type', | |
2989 | ) | |
2990 | if self.args.fs_type is None: | |
2991 | self.args.fs_type = DEFAULT_FS_TYPE | |
2992 | ||
2993 | self.mkfs_args = get_conf( | |
2994 | cluster=self.args.cluster, | |
2995 | variable='osd_mkfs_options_{fstype}'.format( | |
2996 | fstype=self.args.fs_type, | |
2997 | ), | |
2998 | ) | |
2999 | if self.mkfs_args is None: | |
3000 | self.mkfs_args = get_conf( | |
3001 | cluster=self.args.cluster, | |
3002 | variable='osd_fs_mkfs_options_{fstype}'.format( | |
3003 | fstype=self.args.fs_type, | |
3004 | ), | |
3005 | ) | |
3006 | ||
3007 | self.mount_options = get_mount_options(cluster=self.args.cluster, | |
3008 | fs_type=self.args.fs_type) | |
3009 | ||
3010 | if self.args.osd_uuid is None: | |
3011 | self.args.osd_uuid = str(uuid.uuid4()) | |
3012 | ||
3013 | def prepare_device(self, *to_prepare_list): | |
3014 | self.sanity_checks() | |
3015 | self.set_variables() | |
3016 | if self.args.zap_disk is not None: | |
3017 | zap(self.args.data) | |
3018 | ||
3019 | def create_data_partition(self): | |
3020 | device = Device.factory(self.args.data, self.args) | |
3021 | partition_number = 1 | |
3022 | device.create_partition(uuid=self.args.osd_uuid, | |
3023 | name='data', | |
3024 | num=partition_number, | |
3025 | size=self.get_space_size()) | |
3026 | return device.get_partition(partition_number) | |
3027 | ||
3028 | def set_data_partition(self): | |
3029 | if is_partition(self.args.data): | |
3030 | LOG.debug('OSD data device %s is a partition', | |
3031 | self.args.data) | |
3032 | self.partition = DevicePartition.factory( | |
3033 | path=None, dev=self.args.data, args=self.args) | |
3034 | ptype = self.partition.get_ptype() | |
3035 | ready = Ptype.get_ready_by_name('osd') | |
3036 | if ptype not in ready: | |
3037 | LOG.warning('incorrect partition UUID: %s, expected %s' | |
3038 | % (ptype, str(ready))) | |
3039 | else: | |
3040 | LOG.debug('Creating osd partition on %s', | |
3041 | self.args.data) | |
3042 | self.partition = self.create_data_partition() | |
3043 | ||
3044 | def populate_data_path_device(self, *to_prepare_list): | |
3045 | partition = self.partition | |
3046 | ||
3047 | if isinstance(partition, DevicePartitionCrypt): | |
3048 | partition.map() | |
3049 | ||
3050 | try: | |
3051 | args = [ | |
3052 | 'mkfs', | |
3053 | '-t', | |
3054 | self.args.fs_type, | |
3055 | ] | |
3056 | if self.mkfs_args is not None: | |
3057 | args.extend(self.mkfs_args.split()) | |
3058 | if self.args.fs_type == 'xfs': | |
3059 | args.extend(['-f']) # always force | |
3060 | else: | |
3061 | args.extend(MKFS_ARGS.get(self.args.fs_type, [])) | |
3062 | args.extend([ | |
3063 | '--', | |
3064 | partition.get_dev(), | |
3065 | ]) | |
3066 | LOG.debug('Creating %s fs on %s', | |
3067 | self.args.fs_type, partition.get_dev()) | |
3068 | command_check_call(args, exit=True) | |
3069 | ||
3070 | path = mount(dev=partition.get_dev(), | |
3071 | fstype=self.args.fs_type, | |
3072 | options=self.mount_options) | |
3073 | ||
3074 | try: | |
3075 | self.populate_data_path(path, *to_prepare_list) | |
3076 | finally: | |
3077 | path_set_context(path) | |
3078 | unmount(path) | |
3079 | finally: | |
3080 | if isinstance(partition, DevicePartitionCrypt): | |
3081 | partition.unmap() | |
3082 | ||
3083 | if not is_partition(self.args.data): | |
3084 | command_check_call( | |
3085 | [ | |
3086 | 'sgdisk', | |
3087 | '--typecode=%d:%s' % (partition.get_partition_number(), | |
3088 | partition.ptype_for_name('osd')), | |
3089 | '--', | |
3090 | self.args.data, | |
3091 | ], | |
3092 | exit=True, | |
3093 | ) | |
3094 | update_partition(self.args.data, 'prepared') | |
3095 | command_check_call(['udevadm', 'trigger', | |
3096 | '--action=add', | |
3097 | '--sysname-match', | |
3098 | os.path.basename(partition.rawdev)]) | |
3099 | ||
3100 | ||
3101 | class PrepareFilestoreData(PrepareData): | |
3102 | ||
3103 | def get_space_size(self): | |
3104 | return 0 # get as much space as possible | |
3105 | ||
3106 | def prepare_device(self, *to_prepare_list): | |
3107 | super(PrepareFilestoreData, self).prepare_device(*to_prepare_list) | |
3108 | for to_prepare in to_prepare_list: | |
3109 | to_prepare.prepare() | |
3110 | self.set_data_partition() | |
3111 | self.populate_data_path_device(*to_prepare_list) | |
3112 | ||
31f18b77 FG |
3113 | def populate_data_path(self, path, *to_prepare_list): |
3114 | super(PrepareFilestoreData, self).populate_data_path(path, | |
3115 | *to_prepare_list) | |
3116 | write_one_line(path, 'type', 'filestore') | |
3117 | ||
7c673cae FG |
3118 | |
3119 | class PrepareBluestoreData(PrepareData): | |
3a9019d9 | 3120 | SPACE_SIZE = 100 |
7c673cae FG |
3121 | |
3122 | def get_space_size(self): | |
3a9019d9 | 3123 | return self.SPACE_SIZE # MB |
7c673cae FG |
3124 | |
3125 | def prepare_device(self, *to_prepare_list): | |
3126 | super(PrepareBluestoreData, self).prepare_device(*to_prepare_list) | |
3127 | self.set_data_partition() | |
3128 | for to_prepare in to_prepare_list: | |
3129 | to_prepare.prepare() | |
3130 | self.populate_data_path_device(*to_prepare_list) | |
3131 | ||
3132 | def populate_data_path(self, path, *to_prepare_list): | |
3133 | super(PrepareBluestoreData, self).populate_data_path(path, | |
3134 | *to_prepare_list) | |
3135 | write_one_line(path, 'type', 'bluestore') | |
3136 | ||
3137 | ||
7c673cae FG |
3138 | def mkfs( |
3139 | path, | |
3140 | cluster, | |
3141 | osd_id, | |
3142 | fsid, | |
3143 | keyring, | |
3144 | ): | |
3145 | monmap = os.path.join(path, 'activate.monmap') | |
3146 | command_check_call( | |
3147 | [ | |
3148 | 'ceph', | |
3149 | '--cluster', cluster, | |
3150 | '--name', 'client.bootstrap-osd', | |
3151 | '--keyring', keyring, | |
3152 | 'mon', 'getmap', '-o', monmap, | |
3153 | ], | |
3154 | ) | |
3155 | ||
3156 | osd_type = read_one_line(path, 'type') | |
3157 | ||
3158 | if osd_type == 'bluestore': | |
c07f9fc5 | 3159 | command_check_call( |
7c673cae FG |
3160 | [ |
3161 | 'ceph-osd', | |
3162 | '--cluster', cluster, | |
3163 | '--mkfs', | |
7c673cae FG |
3164 | '-i', osd_id, |
3165 | '--monmap', monmap, | |
3166 | '--osd-data', path, | |
3167 | '--osd-uuid', fsid, | |
7c673cae FG |
3168 | '--setuser', get_ceph_user(), |
3169 | '--setgroup', get_ceph_group(), | |
3170 | ], | |
3171 | ) | |
31f18b77 | 3172 | elif osd_type == 'filestore': |
c07f9fc5 | 3173 | command_check_call( |
7c673cae FG |
3174 | [ |
3175 | 'ceph-osd', | |
3176 | '--cluster', cluster, | |
3177 | '--mkfs', | |
7c673cae FG |
3178 | '-i', osd_id, |
3179 | '--monmap', monmap, | |
3180 | '--osd-data', path, | |
3181 | '--osd-journal', os.path.join(path, 'journal'), | |
3182 | '--osd-uuid', fsid, | |
7c673cae FG |
3183 | '--setuser', get_ceph_user(), |
3184 | '--setgroup', get_ceph_group(), | |
3185 | ], | |
3186 | ) | |
31f18b77 FG |
3187 | else: |
3188 | raise Error('unrecognized objectstore type %s' % osd_type) | |
7c673cae FG |
3189 | |
3190 | ||
7c673cae FG |
3191 | def get_mount_point(cluster, osd_id): |
3192 | parent = STATEDIR + '/osd' | |
3193 | return os.path.join( | |
3194 | parent, | |
3195 | '{cluster}-{osd_id}'.format(cluster=cluster, osd_id=osd_id), | |
3196 | ) | |
3197 | ||
3198 | ||
3199 | def move_mount( | |
3200 | dev, | |
3201 | path, | |
3202 | cluster, | |
3203 | osd_id, | |
3204 | fstype, | |
3205 | mount_options, | |
3206 | ): | |
3207 | LOG.debug('Moving mount to final location...') | |
3208 | osd_data = get_mount_point(cluster, osd_id) | |
3209 | maybe_mkdir(osd_data) | |
3210 | ||
3211 | # pick best-of-breed mount options based on fs type | |
3212 | if mount_options is None: | |
3213 | mount_options = MOUNT_OPTIONS.get(fstype, '') | |
3214 | ||
3215 | # we really want to mount --move, but that is not supported when | |
3216 | # the parent mount is shared, as it is by default on RH, Fedora, | |
3217 | # and probably others. Also, --bind doesn't properly manipulate | |
3218 | # /etc/mtab, which *still* isn't a symlink to /proc/mounts despite | |
3219 | # this being 2013. Instead, mount the original device at the final | |
3220 | # location. | |
3221 | command_check_call( | |
3222 | [ | |
3223 | '/bin/mount', | |
3224 | '-o', | |
3225 | mount_options, | |
3226 | '--', | |
3227 | dev, | |
3228 | osd_data, | |
3229 | ], | |
3230 | ) | |
3231 | command_check_call( | |
3232 | [ | |
3233 | '/bin/umount', | |
3234 | '-l', # lazy, in case someone else is peeking at the | |
3235 | # wrong moment | |
3236 | '--', | |
3237 | path, | |
3238 | ], | |
3239 | ) | |
3240 | ||
3241 | ||
3242 | # | |
3243 | # For upgrade purposes, to make sure there are no competing units, | |
3244 | # both --runtime unit and the default should be disabled. There can be | |
3245 | # two units at the same time: one with --runtime and another without | |
3246 | # it. If, for any reason (manual or ceph-disk) the two units co-exist | |
3247 | # they will compete with each other. | |
3248 | # | |
3249 | def systemd_disable( | |
3250 | path, | |
3251 | osd_id, | |
3252 | ): | |
3253 | # ensure there is no duplicate ceph-osd@.service | |
3254 | for style in ([], ['--runtime']): | |
3255 | command_check_call( | |
3256 | [ | |
3257 | 'systemctl', | |
3258 | 'disable', | |
3259 | 'ceph-osd@{osd_id}'.format(osd_id=osd_id), | |
3260 | ] + style, | |
3261 | ) | |
3262 | ||
3263 | ||
3264 | def systemd_start( | |
3265 | path, | |
3266 | osd_id, | |
3267 | ): | |
3268 | systemd_disable(path, osd_id) | |
3efd9988 | 3269 | if os.path.ismount(path): |
7c673cae FG |
3270 | style = ['--runtime'] |
3271 | else: | |
3272 | style = [] | |
3273 | command_check_call( | |
3274 | [ | |
3275 | 'systemctl', | |
3276 | 'enable', | |
3277 | 'ceph-osd@{osd_id}'.format(osd_id=osd_id), | |
3278 | ] + style, | |
3279 | ) | |
3280 | command_check_call( | |
3281 | [ | |
3282 | 'systemctl', | |
3283 | 'start', | |
3284 | 'ceph-osd@{osd_id}'.format(osd_id=osd_id), | |
3285 | ], | |
3286 | ) | |
3287 | ||
3288 | ||
3289 | def systemd_stop( | |
3290 | path, | |
3291 | osd_id, | |
3292 | ): | |
3293 | systemd_disable(path, osd_id) | |
3294 | command_check_call( | |
3295 | [ | |
3296 | 'systemctl', | |
3297 | 'stop', | |
3298 | 'ceph-osd@{osd_id}'.format(osd_id=osd_id), | |
3299 | ], | |
3300 | ) | |
3301 | ||
3302 | ||
3303 | def start_daemon( | |
3304 | cluster, | |
3305 | osd_id, | |
3306 | ): | |
3307 | LOG.debug('Starting %s osd.%s...', cluster, osd_id) | |
3308 | ||
3309 | path = (STATEDIR + '/osd/{cluster}-{osd_id}').format( | |
3310 | cluster=cluster, osd_id=osd_id) | |
3311 | ||
3312 | try: | |
3313 | if os.path.exists(os.path.join(path, 'upstart')): | |
3314 | command_check_call( | |
3315 | [ | |
3316 | '/sbin/initctl', | |
3317 | # use emit, not start, because start would fail if the | |
3318 | # instance was already running | |
3319 | 'emit', | |
3320 | # since the daemon starting doesn't guarantee much about | |
3321 | # the service being operational anyway, don't bother | |
3322 | # waiting for it | |
3323 | '--no-wait', | |
3324 | '--', | |
3325 | 'ceph-osd', | |
3326 | 'cluster={cluster}'.format(cluster=cluster), | |
3327 | 'id={osd_id}'.format(osd_id=osd_id), | |
3328 | ], | |
3329 | ) | |
3330 | elif os.path.exists(os.path.join(path, 'sysvinit')): | |
3331 | if os.path.exists('/usr/sbin/service'): | |
3332 | svc = '/usr/sbin/service' | |
3333 | else: | |
3334 | svc = '/sbin/service' | |
3335 | command_check_call( | |
3336 | [ | |
3337 | svc, | |
3338 | 'ceph', | |
3339 | '--cluster', | |
3340 | '{cluster}'.format(cluster=cluster), | |
3341 | 'start', | |
3342 | 'osd.{osd_id}'.format(osd_id=osd_id), | |
3343 | ], | |
3344 | ) | |
3345 | elif os.path.exists(os.path.join(path, 'systemd')): | |
3346 | systemd_start(path, osd_id) | |
3347 | elif os.path.exists(os.path.join(path, 'openrc')): | |
3348 | base_script = '/etc/init.d/ceph-osd' | |
3349 | osd_script = '{base}.{osd_id}'.format( | |
3350 | base=base_script, | |
3351 | osd_id=osd_id | |
3352 | ) | |
3353 | if not os.path.exists(osd_script): | |
3354 | os.symlink(base_script, osd_script) | |
3355 | command_check_call( | |
3356 | [ | |
3357 | osd_script, | |
3358 | 'start', | |
3359 | ], | |
3360 | ) | |
3361 | elif os.path.exists(os.path.join(path, 'bsdrc')): | |
3362 | command_check_call( | |
3363 | [ | |
31f18b77 FG |
3364 | '/usr/sbin/service', 'ceph', 'start', |
3365 | 'osd.{osd_id}'.format(osd_id=osd_id), | |
7c673cae FG |
3366 | ], |
3367 | ) | |
3368 | else: | |
3369 | raise Error('{cluster} osd.{osd_id} ' | |
3370 | 'is not tagged with an init system' | |
3371 | .format( | |
3372 | cluster=cluster, | |
3373 | osd_id=osd_id, | |
3374 | )) | |
3375 | except subprocess.CalledProcessError as e: | |
3376 | raise Error('ceph osd start failed', e) | |
3377 | ||
3378 | ||
3379 | def stop_daemon( | |
3380 | cluster, | |
3381 | osd_id, | |
3382 | ): | |
3383 | LOG.debug('Stoping %s osd.%s...', cluster, osd_id) | |
3384 | ||
3385 | path = (STATEDIR + '/osd/{cluster}-{osd_id}').format( | |
3386 | cluster=cluster, osd_id=osd_id) | |
3387 | ||
3388 | try: | |
3389 | if os.path.exists(os.path.join(path, 'upstart')): | |
3390 | command_check_call( | |
3391 | [ | |
3392 | '/sbin/initctl', | |
3393 | 'stop', | |
3394 | 'ceph-osd', | |
3395 | 'cluster={cluster}'.format(cluster=cluster), | |
3396 | 'id={osd_id}'.format(osd_id=osd_id), | |
3397 | ], | |
3398 | ) | |
3399 | elif os.path.exists(os.path.join(path, 'sysvinit')): | |
3400 | svc = which('service') | |
3401 | command_check_call( | |
3402 | [ | |
3403 | svc, | |
3404 | 'ceph', | |
3405 | '--cluster', | |
3406 | '{cluster}'.format(cluster=cluster), | |
3407 | 'stop', | |
3408 | 'osd.{osd_id}'.format(osd_id=osd_id), | |
3409 | ], | |
3410 | ) | |
3411 | elif os.path.exists(os.path.join(path, 'systemd')): | |
3412 | systemd_stop(path, osd_id) | |
3413 | elif os.path.exists(os.path.join(path, 'openrc')): | |
3414 | command_check_call( | |
3415 | [ | |
3416 | '/etc/init.d/ceph-osd.{osd_id}'.format(osd_id=osd_id), | |
3417 | 'stop', | |
3418 | ], | |
3419 | ) | |
3420 | elif os.path.exists(os.path.join(path, 'bsdrc')): | |
3421 | command_check_call( | |
3422 | [ | |
3423 | '/usr/local/etc/rc.d/ceph stop osd.{osd_id}' | |
3424 | .format(osd_id=osd_id), | |
3425 | ], | |
3426 | ) | |
3427 | else: | |
3428 | raise Error('{cluster} osd.{osd_id} ' | |
3429 | 'is not tagged with an init system' | |
3430 | .format(cluster=cluster, osd_id=osd_id)) | |
3431 | except subprocess.CalledProcessError as e: | |
3432 | raise Error('ceph osd stop failed', e) | |
3433 | ||
3434 | ||
3435 | def detect_fstype(dev): | |
3436 | if FREEBSD: | |
3437 | fstype = _check_output( | |
3438 | args=[ | |
3439 | 'fstyp', | |
3440 | '-u', | |
3441 | dev, | |
3442 | ], | |
3443 | ) | |
3444 | else: | |
3445 | fstype = _check_output( | |
3446 | args=[ | |
3447 | '/sbin/blkid', | |
3448 | # we don't want stale cached results | |
3449 | '-p', | |
3450 | '-s', 'TYPE', | |
3451 | '-o', 'value', | |
3452 | '--', | |
3453 | dev, | |
3454 | ], | |
3455 | ) | |
3456 | fstype = must_be_one_line(fstype) | |
3457 | return fstype | |
3458 | ||
3459 | ||
3460 | def dmcrypt_is_mapped(uuid): | |
3461 | path = os.path.join('/dev/mapper', uuid) | |
3462 | if os.path.exists(path): | |
3463 | return path | |
3464 | else: | |
3465 | return None | |
3466 | ||
3467 | ||
3468 | def dmcrypt_map(dev, dmcrypt_key_dir): | |
3469 | ptype = get_partition_type(dev) | |
3470 | if ptype in Ptype.get_ready_by_type('plain'): | |
3471 | luks = False | |
3472 | cryptsetup_parameters = ['--key-size', '256'] | |
3473 | elif ptype in Ptype.get_ready_by_type('luks'): | |
3474 | luks = True | |
3475 | cryptsetup_parameters = [] | |
3476 | else: | |
3477 | raise Error('--dmcrypt called for dev %s with invalid ptype %s' | |
3478 | % (dev, ptype)) | |
3479 | part_uuid = get_partition_uuid(dev) | |
3480 | dmcrypt_key = get_dmcrypt_key(part_uuid, dmcrypt_key_dir, luks) | |
3481 | return _dmcrypt_map( | |
3482 | rawdev=dev, | |
3483 | key=dmcrypt_key, | |
3484 | _uuid=part_uuid, | |
3485 | cryptsetup_parameters=cryptsetup_parameters, | |
3486 | luks=luks, | |
3487 | format_dev=False, | |
3488 | ) | |
3489 | ||
3490 | ||
3491 | def mount_activate( | |
3492 | dev, | |
3493 | activate_key_template, | |
3494 | init, | |
3495 | dmcrypt, | |
3496 | dmcrypt_key_dir, | |
3497 | reactivate=False, | |
3498 | ): | |
3499 | ||
3500 | if dmcrypt: | |
3501 | part_uuid = get_partition_uuid(dev) | |
3502 | dev = dmcrypt_map(dev, dmcrypt_key_dir) | |
3503 | try: | |
3504 | fstype = detect_fstype(dev=dev) | |
3505 | except (subprocess.CalledProcessError, | |
3506 | TruncatedLineError, | |
3507 | TooManyLinesError) as e: | |
3508 | raise FilesystemTypeError( | |
3509 | 'device {dev}'.format(dev=dev), | |
3510 | e, | |
3511 | ) | |
3512 | ||
3513 | # TODO always using mount options from cluster=ceph for | |
3514 | # now; see http://tracker.newdream.net/issues/3253 | |
3515 | mount_options = get_mount_options(cluster='ceph', fs_type=fstype) | |
3516 | ||
3517 | path = mount(dev=dev, fstype=fstype, options=mount_options) | |
3518 | ||
3519 | # check if the disk is deactive, change the journal owner, group | |
3520 | # mode for correct user and group. | |
3521 | if os.path.exists(os.path.join(path, 'deactive')): | |
3522 | # logging to syslog will help us easy to know udev triggered failure | |
3523 | if not reactivate: | |
3524 | unmount(path) | |
3525 | # we need to unmap again because dmcrypt map will create again | |
3526 | # on bootup stage (due to deactivate) | |
3527 | if '/dev/mapper/' in dev: | |
3528 | part_uuid = dev.replace('/dev/mapper/', '') | |
3529 | dmcrypt_unmap(part_uuid) | |
3530 | LOG.info('OSD deactivated! reactivate with: --reactivate') | |
3531 | raise Error('OSD deactivated! reactivate with: --reactivate') | |
3532 | # flag to activate a deactive osd. | |
3533 | deactive = True | |
3534 | else: | |
3535 | deactive = False | |
3536 | ||
3537 | osd_id = None | |
3538 | cluster = None | |
3539 | try: | |
3540 | (osd_id, cluster) = activate(path, activate_key_template, init) | |
3541 | ||
3542 | # Now active successfully | |
3543 | # If we got reactivate and deactive, remove the deactive file | |
3544 | if deactive and reactivate: | |
3545 | os.remove(os.path.join(path, 'deactive')) | |
3546 | LOG.info('Remove `deactive` file.') | |
3547 | ||
3548 | # check if the disk is already active, or if something else is already | |
3549 | # mounted there | |
3550 | active = False | |
3551 | other = False | |
3552 | src_dev = os.stat(path).st_dev | |
3553 | try: | |
3554 | dst_dev = os.stat((STATEDIR + '/osd/{cluster}-{osd_id}').format( | |
3555 | cluster=cluster, | |
3556 | osd_id=osd_id)).st_dev | |
3557 | if src_dev == dst_dev: | |
3558 | active = True | |
3559 | else: | |
3560 | parent_dev = os.stat(STATEDIR + '/osd').st_dev | |
3561 | if dst_dev != parent_dev: | |
3562 | other = True | |
3563 | elif os.listdir(get_mount_point(cluster, osd_id)): | |
3564 | LOG.info(get_mount_point(cluster, osd_id) + | |
3565 | " is not empty, won't override") | |
3566 | other = True | |
3567 | ||
3568 | except OSError: | |
3569 | pass | |
3570 | ||
3571 | if active: | |
3572 | LOG.info('%s osd.%s already mounted in position; unmounting ours.' | |
3573 | % (cluster, osd_id)) | |
3574 | unmount(path) | |
3575 | elif other: | |
3576 | raise Error('another %s osd.%s already mounted in position ' | |
3577 | '(old/different cluster instance?); unmounting ours.' | |
3578 | % (cluster, osd_id)) | |
3579 | else: | |
3580 | move_mount( | |
3581 | dev=dev, | |
3582 | path=path, | |
3583 | cluster=cluster, | |
3584 | osd_id=osd_id, | |
3585 | fstype=fstype, | |
3586 | mount_options=mount_options, | |
3587 | ) | |
3588 | return cluster, osd_id | |
3589 | ||
3590 | except: | |
3591 | LOG.error('Failed to activate') | |
3592 | unmount(path) | |
3593 | raise | |
3594 | finally: | |
3595 | # remove our temp dir | |
3596 | if os.path.exists(path): | |
3597 | os.rmdir(path) | |
3598 | ||
3599 | ||
3600 | def activate_dir( | |
3601 | path, | |
3602 | activate_key_template, | |
3603 | init, | |
3604 | ): | |
3605 | ||
3606 | if not os.path.exists(path): | |
3607 | raise Error( | |
3608 | 'directory %s does not exist' % path | |
3609 | ) | |
3610 | ||
3611 | (osd_id, cluster) = activate(path, activate_key_template, init) | |
3612 | ||
3613 | if init not in (None, 'none'): | |
3614 | canonical = (STATEDIR + '/osd/{cluster}-{osd_id}').format( | |
3615 | cluster=cluster, | |
3616 | osd_id=osd_id) | |
3617 | if path != canonical: | |
3618 | # symlink it from the proper location | |
3619 | create = True | |
3620 | if os.path.lexists(canonical): | |
3621 | old = os.readlink(canonical) | |
3622 | if old != path: | |
3623 | LOG.debug('Removing old symlink %s -> %s', canonical, old) | |
3624 | try: | |
3625 | os.unlink(canonical) | |
3626 | except: | |
3627 | raise Error('unable to remove old symlink', canonical) | |
3628 | else: | |
3629 | create = False | |
3630 | if create: | |
3631 | LOG.debug('Creating symlink %s -> %s', canonical, path) | |
3632 | try: | |
3633 | os.symlink(path, canonical) | |
3634 | except: | |
3635 | raise Error('unable to create symlink %s -> %s' | |
3636 | % (canonical, path)) | |
3637 | ||
3638 | return cluster, osd_id | |
3639 | ||
3640 | ||
3641 | def find_cluster_by_uuid(_uuid): | |
3642 | """ | |
3643 | Find a cluster name by searching /etc/ceph/*.conf for a conf file | |
3644 | with the right uuid. | |
3645 | """ | |
3646 | _uuid = _uuid.lower() | |
3647 | no_fsid = [] | |
3648 | if not os.path.exists(SYSCONFDIR): | |
3649 | return None | |
3650 | for conf_file in os.listdir(SYSCONFDIR): | |
3651 | if not conf_file.endswith('.conf'): | |
3652 | continue | |
3653 | cluster = conf_file[:-5] | |
3654 | try: | |
3655 | fsid = get_fsid(cluster) | |
3656 | except Error as e: | |
3657 | if 'getting cluster uuid from configuration failed' not in str(e): | |
3658 | raise e | |
3659 | no_fsid.append(cluster) | |
3660 | else: | |
3661 | if fsid == _uuid: | |
3662 | return cluster | |
3663 | # be tolerant of /etc/ceph/ceph.conf without an fsid defined. | |
3664 | if len(no_fsid) == 1 and no_fsid[0] == 'ceph': | |
3665 | LOG.warning('No fsid defined in ' + SYSCONFDIR + | |
3666 | '/ceph.conf; using anyway') | |
3667 | return 'ceph' | |
3668 | return None | |
3669 | ||
3670 | ||
3671 | def activate( | |
3672 | path, | |
3673 | activate_key_template, | |
3674 | init, | |
3675 | ): | |
3676 | ||
3677 | check_osd_magic(path) | |
3678 | ||
3679 | ceph_fsid = read_one_line(path, 'ceph_fsid') | |
3680 | if ceph_fsid is None: | |
3681 | raise Error('No cluster uuid assigned.') | |
3682 | LOG.debug('Cluster uuid is %s', ceph_fsid) | |
3683 | ||
3684 | cluster = find_cluster_by_uuid(ceph_fsid) | |
3685 | if cluster is None: | |
3686 | raise Error('No cluster conf found in ' + SYSCONFDIR + | |
3687 | ' with fsid %s' % ceph_fsid) | |
3688 | LOG.debug('Cluster name is %s', cluster) | |
3689 | ||
3690 | fsid = read_one_line(path, 'fsid') | |
3691 | if fsid is None: | |
3692 | raise Error('No OSD uuid assigned.') | |
3693 | LOG.debug('OSD uuid is %s', fsid) | |
3694 | ||
3695 | keyring = activate_key_template.format(cluster=cluster, | |
3696 | statedir=STATEDIR) | |
3697 | ||
3698 | osd_id = get_osd_id(path) | |
3699 | if osd_id is None: | |
3700 | osd_id = allocate_osd_id( | |
3701 | cluster=cluster, | |
3702 | fsid=fsid, | |
3703 | keyring=keyring, | |
c07f9fc5 | 3704 | path=path, |
7c673cae FG |
3705 | ) |
3706 | write_one_line(path, 'whoami', osd_id) | |
3707 | LOG.debug('OSD id is %s', osd_id) | |
3708 | ||
3709 | if not os.path.exists(os.path.join(path, 'ready')): | |
3710 | LOG.debug('Initializing OSD...') | |
3711 | # re-running mkfs is safe, so just run until it completes | |
3712 | mkfs( | |
3713 | path=path, | |
3714 | cluster=cluster, | |
3715 | osd_id=osd_id, | |
3716 | fsid=fsid, | |
3717 | keyring=keyring, | |
3718 | ) | |
3719 | ||
3720 | if init not in (None, 'none'): | |
3721 | if init == 'auto': | |
3722 | conf_val = get_conf( | |
3723 | cluster=cluster, | |
3724 | variable='init' | |
3725 | ) | |
3726 | if conf_val is not None: | |
3727 | init = conf_val | |
3728 | else: | |
3729 | init = init_get() | |
3730 | ||
3731 | LOG.debug('Marking with init system %s', init) | |
3732 | init_path = os.path.join(path, init) | |
3733 | with open(init_path, 'w'): | |
3734 | path_set_context(init_path) | |
3735 | ||
3736 | # remove markers for others, just in case. | |
3737 | for other in INIT_SYSTEMS: | |
3738 | if other != init: | |
3739 | try: | |
3740 | os.unlink(os.path.join(path, other)) | |
3741 | except OSError: | |
3742 | pass | |
3743 | ||
3744 | if not os.path.exists(os.path.join(path, 'active')): | |
7c673cae FG |
3745 | write_one_line(path, 'active', 'ok') |
3746 | LOG.debug('%s osd.%s data dir is ready at %s', cluster, osd_id, path) | |
3747 | return (osd_id, cluster) | |
3748 | ||
3749 | ||
3750 | def main_activate(args): | |
3751 | cluster = None | |
3752 | osd_id = None | |
3753 | ||
3754 | LOG.info('path = ' + str(args.path)) | |
3755 | if not os.path.exists(args.path): | |
3756 | raise Error('%s does not exist' % args.path) | |
3757 | ||
3758 | if is_suppressed(args.path): | |
3759 | LOG.info('suppressed activate request on %s', args.path) | |
3760 | return | |
3761 | ||
3762 | with activate_lock: | |
3763 | mode = os.stat(args.path).st_mode | |
224ce89b | 3764 | if stmode_is_diskdevice(mode): |
7c673cae FG |
3765 | if (is_partition(args.path) and |
3766 | (get_partition_type(args.path) == | |
3767 | PTYPE['mpath']['osd']['ready']) and | |
3768 | not is_mpath(args.path)): | |
3769 | raise Error('%s is not a multipath block device' % | |
3770 | args.path) | |
3771 | (cluster, osd_id) = mount_activate( | |
3772 | dev=args.path, | |
3773 | activate_key_template=args.activate_key_template, | |
3774 | init=args.mark_init, | |
3775 | dmcrypt=args.dmcrypt, | |
3776 | dmcrypt_key_dir=args.dmcrypt_key_dir, | |
3777 | reactivate=args.reactivate, | |
3778 | ) | |
3779 | osd_data = get_mount_point(cluster, osd_id) | |
3780 | ||
3efd9988 FG |
3781 | args.cluster = cluster |
3782 | if args.dmcrypt: | |
3783 | for name in Space.NAMES: | |
3784 | # Check if encrypted device in journal | |
3785 | dev_path = os.path.join(osd_data, name + '_dmcrypt') | |
3786 | if not os.path.exists(dev_path): | |
3787 | continue | |
3788 | partition = DevicePartition.factory( | |
3789 | path=None, | |
3790 | dev=dev_path, | |
3791 | args=args) | |
3792 | partition.rawdev = args.path | |
3793 | partition.map() | |
3794 | ||
7c673cae FG |
3795 | elif stat.S_ISDIR(mode): |
3796 | (cluster, osd_id) = activate_dir( | |
3797 | path=args.path, | |
3798 | activate_key_template=args.activate_key_template, | |
3799 | init=args.mark_init, | |
3800 | ) | |
3801 | osd_data = args.path | |
3802 | ||
3803 | else: | |
3804 | raise Error('%s is not a directory or block device' % args.path) | |
3805 | ||
3806 | # exit with 0 if the journal device is not up, yet | |
3807 | # journal device will do the activation | |
3808 | osd_journal = '{path}/journal'.format(path=osd_data) | |
3809 | if os.path.islink(osd_journal) and not os.access(osd_journal, os.F_OK): | |
3810 | LOG.info("activate: Journal not present, not starting, yet") | |
3811 | return | |
3812 | ||
3813 | if (not args.no_start_daemon and args.mark_init == 'none'): | |
3814 | command_check_call( | |
3815 | [ | |
3816 | 'ceph-osd', | |
3817 | '--cluster={cluster}'.format(cluster=cluster), | |
3818 | '--id={osd_id}'.format(osd_id=osd_id), | |
3819 | '--osd-data={path}'.format(path=osd_data), | |
3820 | '--osd-journal={journal}'.format(journal=osd_journal), | |
3821 | ], | |
3822 | ) | |
3823 | ||
3824 | if (not args.no_start_daemon and | |
3825 | args.mark_init not in (None, 'none')): | |
3826 | ||
3827 | start_daemon( | |
3828 | cluster=cluster, | |
3829 | osd_id=osd_id, | |
3830 | ) | |
3831 | ||
3832 | ||
3833 | def main_activate_lockbox(args): | |
3834 | with activate_lock: | |
3835 | main_activate_lockbox_protected(args) | |
3836 | ||
3837 | ||
3838 | def main_activate_lockbox_protected(args): | |
3839 | partition = DevicePartition.factory( | |
3840 | path=None, dev=args.path, args=args) | |
3841 | ||
3842 | lockbox = Lockbox(args) | |
3843 | lockbox.set_partition(partition) | |
3844 | lockbox.activate() | |
3845 | ||
3846 | ||
3847 | ########################### | |
3848 | ||
3849 | def _mark_osd_out(cluster, osd_id): | |
3850 | LOG.info('Prepare to mark osd.%d out...', osd_id) | |
3851 | command([ | |
3852 | 'ceph', | |
3853 | 'osd', | |
3854 | 'out', | |
3855 | 'osd.%d' % osd_id, | |
3856 | ]) | |
3857 | ||
3858 | ||
3859 | def _check_osd_status(cluster, osd_id): | |
3860 | """ | |
3861 | report the osd status: | |
3862 | 00(0) : means OSD OUT AND DOWN | |
3863 | 01(1) : means OSD OUT AND UP | |
3864 | 10(2) : means OSD IN AND DOWN | |
3865 | 11(3) : means OSD IN AND UP | |
3866 | """ | |
3867 | LOG.info("Checking osd id: %s ..." % osd_id) | |
3868 | found = False | |
3869 | status_code = 0 | |
3870 | out, err, ret = command([ | |
3871 | 'ceph', | |
3872 | 'osd', | |
3873 | 'dump', | |
3874 | '--cluster={cluster}'.format( | |
3875 | cluster=cluster, | |
3876 | ), | |
3877 | '--format', | |
3878 | 'json', | |
3879 | ]) | |
3880 | out_json = json.loads(out) | |
3881 | for item in out_json[u'osds']: | |
3882 | if item.get(u'osd') == int(osd_id): | |
3883 | found = True | |
3884 | if item.get(u'in') is 1: | |
3885 | status_code += 2 | |
3886 | if item.get(u'up') is 1: | |
3887 | status_code += 1 | |
3888 | if not found: | |
3889 | raise Error('Could not osd.%s in osd tree!' % osd_id) | |
3890 | return status_code | |
3891 | ||
3892 | ||
3893 | def _remove_osd_directory_files(mounted_path, cluster): | |
3894 | """ | |
3895 | To remove the 'ready', 'active', INIT-specific files. | |
3896 | """ | |
3897 | if os.path.exists(os.path.join(mounted_path, 'ready')): | |
3898 | os.remove(os.path.join(mounted_path, 'ready')) | |
3899 | LOG.info('Remove `ready` file.') | |
3900 | else: | |
3901 | LOG.info('`ready` file is already removed.') | |
3902 | ||
3903 | if os.path.exists(os.path.join(mounted_path, 'active')): | |
3904 | os.remove(os.path.join(mounted_path, 'active')) | |
3905 | LOG.info('Remove `active` file.') | |
3906 | else: | |
3907 | LOG.info('`active` file is already removed.') | |
3908 | ||
3909 | # Just check `upstart` and `sysvinit` directly if filename is init-spec. | |
3910 | conf_val = get_conf( | |
3911 | cluster=cluster, | |
3912 | variable='init' | |
3913 | ) | |
3914 | if conf_val is not None: | |
3915 | init = conf_val | |
3916 | else: | |
3917 | init = init_get() | |
3918 | os.remove(os.path.join(mounted_path, init)) | |
3919 | LOG.info('Remove `%s` file.', init) | |
3920 | return | |
3921 | ||
3922 | ||
3923 | def main_deactivate(args): | |
3924 | with activate_lock: | |
3925 | main_deactivate_locked(args) | |
3926 | ||
3927 | ||
3928 | def main_deactivate_locked(args): | |
3929 | osd_id = args.deactivate_by_id | |
3930 | path = args.path | |
3931 | target_dev = None | |
3932 | dmcrypt = False | |
3933 | devices = list_devices() | |
3934 | ||
3935 | # list all devices and found we need | |
3936 | for device in devices: | |
3937 | if 'partitions' in device: | |
3938 | for dev_part in device.get('partitions'): | |
3939 | if (osd_id and | |
3940 | 'whoami' in dev_part and | |
3941 | dev_part['whoami'] == osd_id): | |
3942 | target_dev = dev_part | |
3943 | elif (path and | |
3944 | 'path' in dev_part and | |
3945 | dev_part['path'] == path): | |
3946 | target_dev = dev_part | |
3947 | if not target_dev: | |
3948 | raise Error('Cannot find any match device!!') | |
3949 | ||
3950 | # set up all we need variable | |
3951 | osd_id = target_dev['whoami'] | |
3952 | part_type = target_dev['ptype'] | |
3953 | mounted_path = target_dev['mount'] | |
3954 | if Ptype.is_dmcrypt(part_type, 'osd'): | |
3955 | dmcrypt = True | |
3956 | ||
3957 | # Do not do anything if osd is already down. | |
3958 | status_code = _check_osd_status(args.cluster, osd_id) | |
3959 | if status_code == OSD_STATUS_IN_UP: | |
3960 | if args.mark_out is True: | |
3961 | _mark_osd_out(args.cluster, int(osd_id)) | |
3962 | stop_daemon(args.cluster, osd_id) | |
3963 | elif status_code == OSD_STATUS_IN_DOWN: | |
3964 | if args.mark_out is True: | |
3965 | _mark_osd_out(args.cluster, int(osd_id)) | |
3966 | LOG.info("OSD already out/down. Do not do anything now.") | |
3967 | return | |
3968 | elif status_code == OSD_STATUS_OUT_UP: | |
3969 | stop_daemon(args.cluster, osd_id) | |
3970 | elif status_code == OSD_STATUS_OUT_DOWN: | |
3971 | LOG.info("OSD already out/down. Do not do anything now.") | |
3972 | return | |
3973 | ||
3974 | if not args.once: | |
3975 | # remove 'ready', 'active', and INIT-specific files. | |
3976 | _remove_osd_directory_files(mounted_path, args.cluster) | |
3977 | ||
3978 | # Write deactivate to osd directory! | |
3979 | with open(os.path.join(mounted_path, 'deactive'), 'w'): | |
3980 | path_set_context(os.path.join(mounted_path, 'deactive')) | |
3981 | ||
d2e6a577 | 3982 | unmount(mounted_path, do_rm=not args.once) |
7c673cae FG |
3983 | LOG.info("Umount `%s` successfully.", mounted_path) |
3984 | ||
3985 | if dmcrypt: | |
3986 | lockbox = os.path.join(STATEDIR, 'osd-lockbox') | |
3987 | command(['umount', os.path.join(lockbox, target_dev['uuid'])]) | |
3988 | ||
3989 | dmcrypt_unmap(target_dev['uuid']) | |
3990 | for name in Space.NAMES: | |
3991 | if name + '_uuid' in target_dev: | |
3992 | dmcrypt_unmap(target_dev[name + '_uuid']) | |
3993 | ||
3994 | ########################### | |
3995 | ||
3996 | ||
7c673cae | 3997 | def _remove_lockbox(uuid, cluster): |
7c673cae FG |
3998 | lockbox = os.path.join(STATEDIR, 'osd-lockbox') |
3999 | if not os.path.exists(lockbox): | |
4000 | return | |
4001 | canonical = os.path.join(lockbox, uuid) | |
4002 | command(['umount', canonical]) | |
4003 | for name in os.listdir(lockbox): | |
4004 | path = os.path.join(lockbox, name) | |
4005 | if os.path.islink(path) and os.readlink(path) == canonical: | |
4006 | os.unlink(path) | |
4007 | ||
4008 | ||
4009 | def destroy_lookup_device(args, predicate, description): | |
4010 | devices = list_devices() | |
4011 | for device in devices: | |
4012 | for partition in device.get('partitions', []): | |
4013 | if partition['type'] == 'lockbox': | |
4014 | if not is_mounted(partition['path']): | |
4015 | main_activate_lockbox_protected( | |
4016 | argparse.Namespace(verbose=args.verbose, | |
4017 | path=partition['path'])) | |
4018 | for device in devices: | |
4019 | for partition in device.get('partitions', []): | |
4020 | if partition['dmcrypt']: | |
4021 | dmcrypt_path = dmcrypt_is_mapped(partition['uuid']) | |
4022 | if dmcrypt_path: | |
4023 | unmap = False | |
4024 | else: | |
4025 | dmcrypt_path = dmcrypt_map(partition['path'], | |
4026 | args.dmcrypt_key_dir) | |
4027 | unmap = True | |
4028 | list_dev_osd(dmcrypt_path, {}, partition) | |
4029 | if unmap: | |
4030 | dmcrypt_unmap(partition['uuid']) | |
4031 | dmcrypt = True | |
4032 | else: | |
4033 | dmcrypt = False | |
4034 | if predicate(partition): | |
4035 | return dmcrypt, partition | |
4036 | raise Error('found no device matching ', description) | |
4037 | ||
4038 | ||
4039 | def main_destroy(args): | |
4040 | with activate_lock: | |
4041 | main_destroy_locked(args) | |
4042 | ||
4043 | ||
4044 | def main_destroy_locked(args): | |
4045 | osd_id = args.destroy_by_id | |
4046 | path = args.path | |
4047 | target_dev = None | |
4048 | ||
4049 | if path: | |
4050 | if not is_partition(path): | |
4051 | raise Error(path + " must be a partition device") | |
4052 | path = os.path.realpath(path) | |
4053 | ||
4054 | if path: | |
4055 | (dmcrypt, target_dev) = destroy_lookup_device( | |
4056 | args, lambda x: x.get('path') == path, | |
4057 | path) | |
4058 | elif osd_id: | |
4059 | (dmcrypt, target_dev) = destroy_lookup_device( | |
4060 | args, lambda x: x.get('whoami') == osd_id, | |
4061 | 'osd id ' + str(osd_id)) | |
4062 | ||
4063 | osd_id = target_dev['whoami'] | |
4064 | dev_path = target_dev['path'] | |
4065 | if target_dev['ptype'] == PTYPE['mpath']['osd']['ready']: | |
4066 | base_dev = get_partition_base_mpath(dev_path) | |
4067 | else: | |
4068 | base_dev = get_partition_base(dev_path) | |
4069 | ||
4070 | # Before osd deactivate, we cannot destroy it | |
4071 | status_code = _check_osd_status(args.cluster, osd_id) | |
4072 | if status_code != OSD_STATUS_OUT_DOWN and \ | |
4073 | status_code != OSD_STATUS_IN_DOWN: | |
4074 | raise Error("Could not destroy the active osd. (osd-id: %s)" % | |
4075 | osd_id) | |
4076 | ||
c07f9fc5 FG |
4077 | if args.purge: |
4078 | action = 'purge' | |
4079 | else: | |
4080 | action = 'destroy' | |
4081 | LOG.info("Prepare to %s osd.%s" % (action, osd_id)) | |
4082 | command([ | |
4083 | 'ceph', | |
4084 | 'osd', | |
4085 | action, | |
4086 | 'osd.%s' % osd_id, | |
4087 | '--yes-i-really-mean-it', | |
4088 | ]) | |
7c673cae FG |
4089 | |
4090 | # we remove the crypt map and device mapper (if dmcrypt is True) | |
4091 | if dmcrypt: | |
4092 | for name in Space.NAMES: | |
4093 | if target_dev.get(name + '_uuid'): | |
4094 | dmcrypt_unmap(target_dev[name + '_uuid']) | |
4095 | _remove_lockbox(target_dev['uuid'], args.cluster) | |
4096 | ||
4097 | # Check zap flag. If we found zap flag, we need to find device for | |
4098 | # destroy this osd data. | |
4099 | if args.zap is True: | |
4100 | # erase the osd data | |
4101 | LOG.info("Prepare to zap the device %s" % base_dev) | |
4102 | zap(base_dev) | |
4103 | ||
4104 | ||
4105 | def get_space_osd_uuid(name, path): | |
4106 | if not os.path.exists(path): | |
4107 | raise Error('%s does not exist' % path) | |
4108 | ||
c07f9fc5 | 4109 | if not path_is_diskdevice(path): |
7c673cae FG |
4110 | raise Error('%s is not a block device' % path) |
4111 | ||
4112 | if (is_partition(path) and | |
4113 | get_partition_type(path) in (PTYPE['mpath']['journal']['ready'], | |
4114 | PTYPE['mpath']['block']['ready']) and | |
4115 | not is_mpath(path)): | |
4116 | raise Error('%s is not a multipath block device' % | |
4117 | path) | |
4118 | ||
4119 | try: | |
4120 | out = _check_output( | |
4121 | args=[ | |
4122 | 'ceph-osd', | |
4123 | '--get-device-fsid', | |
4124 | path, | |
4125 | ], | |
4126 | close_fds=True, | |
4127 | ) | |
4128 | except subprocess.CalledProcessError as e: | |
4129 | raise Error( | |
4130 | 'failed to get osd uuid/fsid from %s' % name, | |
4131 | e, | |
4132 | ) | |
4133 | value = str(out).split('\n', 1)[0] | |
4134 | LOG.debug('%s %s has OSD UUID %s', name.capitalize(), path, value) | |
4135 | return value | |
4136 | ||
4137 | ||
4138 | def main_activate_space(name, args): | |
4139 | if not os.path.exists(args.dev): | |
4140 | raise Error('%s does not exist' % args.dev) | |
4141 | ||
c07f9fc5 FG |
4142 | if is_suppressed(args.dev): |
4143 | LOG.info('suppressed activate request on space %s', args.dev) | |
4144 | return | |
4145 | ||
7c673cae FG |
4146 | cluster = None |
4147 | osd_id = None | |
4148 | osd_uuid = None | |
4149 | dev = None | |
4150 | with activate_lock: | |
4151 | if args.dmcrypt: | |
4152 | dev = dmcrypt_map(args.dev, args.dmcrypt_key_dir) | |
4153 | else: | |
4154 | dev = args.dev | |
4155 | # FIXME: For an encrypted journal dev, does this return the | |
4156 | # cyphertext or plaintext dev uuid!? Also, if the journal is | |
4157 | # encrypted, is the data partition also always encrypted, or | |
4158 | # are mixed pairs supported!? | |
4159 | osd_uuid = get_space_osd_uuid(name, dev) | |
4160 | path = os.path.join('/dev/disk/by-partuuid/', osd_uuid.lower()) | |
4161 | ||
4162 | if is_suppressed(path): | |
4163 | LOG.info('suppressed activate request on %s', path) | |
4164 | return | |
4165 | ||
4166 | # warn and exit with 0 if the data device is not up, yet | |
4167 | # data device will do the activation | |
4168 | if not os.access(path, os.F_OK): | |
4169 | LOG.info("activate: OSD device not present, not starting, yet") | |
4170 | return | |
4171 | ||
4172 | (cluster, osd_id) = mount_activate( | |
4173 | dev=path, | |
4174 | activate_key_template=args.activate_key_template, | |
4175 | init=args.mark_init, | |
4176 | dmcrypt=args.dmcrypt, | |
4177 | dmcrypt_key_dir=args.dmcrypt_key_dir, | |
4178 | reactivate=args.reactivate, | |
4179 | ) | |
4180 | ||
4181 | start_daemon( | |
4182 | cluster=cluster, | |
4183 | osd_id=osd_id, | |
4184 | ) | |
4185 | ||
4186 | ||
4187 | ########################### | |
4188 | ||
4189 | ||
4190 | def main_activate_all(args): | |
4191 | dir = '/dev/disk/by-parttypeuuid' | |
4192 | LOG.debug('Scanning %s', dir) | |
4193 | if not os.path.exists(dir): | |
4194 | return | |
4195 | err = False | |
4196 | for name in os.listdir(dir): | |
4197 | if name.find('.') < 0: | |
4198 | continue | |
4199 | (tag, uuid) = name.split('.') | |
4200 | ||
4201 | if tag in Ptype.get_ready_by_name('osd'): | |
4202 | ||
4203 | if Ptype.is_dmcrypt(tag, 'osd'): | |
4204 | path = os.path.join('/dev/mapper', uuid) | |
4205 | else: | |
4206 | path = os.path.join(dir, name) | |
4207 | ||
4208 | if is_suppressed(path): | |
4209 | LOG.info('suppressed activate request on %s', path) | |
4210 | continue | |
4211 | ||
4212 | LOG.info('Activating %s', path) | |
4213 | with activate_lock: | |
4214 | try: | |
4215 | # never map dmcrypt cyphertext devices | |
4216 | (cluster, osd_id) = mount_activate( | |
4217 | dev=path, | |
4218 | activate_key_template=args.activate_key_template, | |
4219 | init=args.mark_init, | |
4220 | dmcrypt=False, | |
4221 | dmcrypt_key_dir='', | |
4222 | ) | |
4223 | start_daemon( | |
4224 | cluster=cluster, | |
4225 | osd_id=osd_id, | |
4226 | ) | |
4227 | ||
4228 | except Exception as e: | |
4229 | print( | |
4230 | '{prog}: {msg}'.format(prog=args.prog, msg=e), | |
4231 | file=sys.stderr | |
4232 | ) | |
4233 | ||
4234 | err = True | |
4235 | ||
4236 | if err: | |
4237 | raise Error('One or more partitions failed to activate') | |
4238 | ||
4239 | ||
4240 | ########################### | |
4241 | ||
4242 | def is_swap(dev): | |
4243 | dev = os.path.realpath(dev) | |
4244 | with open(PROCDIR + '/swaps', 'rb') as proc_swaps: | |
4245 | for line in proc_swaps.readlines()[1:]: | |
4246 | fields = line.split() | |
4247 | if len(fields) < 3: | |
4248 | continue | |
4249 | swaps_dev = fields[0] | |
4250 | if os.path.isabs(swaps_dev) and os.path.exists(swaps_dev): | |
4251 | swaps_dev = os.path.realpath(swaps_dev) | |
4252 | if swaps_dev == dev: | |
4253 | return True | |
4254 | return False | |
4255 | ||
4256 | ||
4257 | def get_oneliner(base, name): | |
4258 | path = os.path.join(base, name) | |
4259 | if os.path.isfile(path): | |
4260 | with open(path, 'rb') as _file: | |
4261 | return _bytes2str(_file.readline().rstrip()) | |
4262 | return None | |
4263 | ||
4264 | ||
4265 | def get_dev_fs(dev): | |
4266 | if FREEBSD: | |
4267 | fstype, _, ret = command( | |
4268 | [ | |
4269 | 'fstyp', | |
4270 | '-u', | |
4271 | dev, | |
4272 | ], | |
4273 | ) | |
4274 | if ret == 0: | |
4275 | return fstype | |
4276 | else: | |
4277 | fscheck, _, _ = command( | |
4278 | [ | |
4279 | 'blkid', | |
4280 | '-s', | |
4281 | 'TYPE', | |
4282 | dev, | |
4283 | ], | |
4284 | ) | |
4285 | if 'TYPE' in fscheck: | |
4286 | fstype = fscheck.split()[1].split('"')[1] | |
4287 | return fstype | |
4288 | return None | |
4289 | ||
4290 | ||
4291 | def split_dev_base_partnum(dev): | |
4292 | if is_mpath(dev): | |
4293 | partnum = partnum_mpath(dev) | |
4294 | base = get_partition_base_mpath(dev) | |
4295 | else: | |
4296 | b = block_path(dev) | |
4297 | partnum = open(os.path.join(b, 'partition')).read().strip() | |
4298 | base = get_partition_base(dev) | |
4299 | return base, partnum | |
4300 | ||
4301 | ||
4302 | def get_partition_type(part): | |
4303 | return get_blkid_partition_info(part, 'ID_PART_ENTRY_TYPE') | |
4304 | ||
4305 | ||
4306 | def get_partition_uuid(part): | |
4307 | return get_blkid_partition_info(part, 'ID_PART_ENTRY_UUID') | |
4308 | ||
4309 | ||
4310 | def get_blkid_partition_info(dev, what=None): | |
4311 | out, _, _ = command( | |
4312 | [ | |
4313 | 'blkid', | |
4314 | '-o', | |
4315 | 'udev', | |
4316 | '-p', | |
4317 | dev, | |
4318 | ] | |
4319 | ) | |
4320 | p = {} | |
4321 | for line in out.splitlines(): | |
4322 | (key, value) = line.split('=') | |
4323 | p[key] = value | |
4324 | if what: | |
4325 | return p.get(what) | |
4326 | else: | |
4327 | return p | |
4328 | ||
4329 | ||
4330 | def more_osd_info(path, uuid_map, desc): | |
4331 | desc['ceph_fsid'] = get_oneliner(path, 'ceph_fsid') | |
4332 | if desc['ceph_fsid']: | |
4333 | desc['cluster'] = find_cluster_by_uuid(desc['ceph_fsid']) | |
4334 | desc['whoami'] = get_oneliner(path, 'whoami') | |
4335 | for name in Space.NAMES: | |
4336 | uuid = get_oneliner(path, name + '_uuid') | |
4337 | if uuid: | |
4338 | desc[name + '_uuid'] = uuid.lower() | |
4339 | if desc[name + '_uuid'] in uuid_map: | |
4340 | desc[name + '_dev'] = uuid_map[desc[name + '_uuid']] | |
4341 | ||
4342 | ||
4343 | def list_dev_osd(dev, uuid_map, desc): | |
4344 | desc['mount'] = is_mounted(dev) | |
4345 | desc['fs_type'] = get_dev_fs(dev) | |
4346 | desc['state'] = 'unprepared' | |
4347 | if desc['mount']: | |
4348 | desc['state'] = 'active' | |
4349 | more_osd_info(desc['mount'], uuid_map, desc) | |
4350 | elif desc['fs_type']: | |
4351 | try: | |
4352 | tpath = mount(dev=dev, fstype=desc['fs_type'], options='') | |
4353 | if tpath: | |
4354 | try: | |
4355 | magic = get_oneliner(tpath, 'magic') | |
4356 | if magic is not None: | |
4357 | desc['magic'] = magic | |
4358 | desc['state'] = 'prepared' | |
4359 | more_osd_info(tpath, uuid_map, desc) | |
4360 | finally: | |
4361 | unmount(tpath) | |
4362 | except MountError: | |
4363 | pass | |
4364 | ||
4365 | ||
4366 | def list_dev_lockbox(dev, uuid_map, desc): | |
4367 | desc['mount'] = is_mounted(dev) | |
4368 | desc['fs_type'] = get_dev_fs(dev) | |
4369 | desc['state'] = 'unprepared' | |
4370 | if desc['mount']: | |
4371 | desc['state'] = 'active' | |
4372 | desc['osd_uuid'] = get_oneliner(desc['mount'], 'osd-uuid') | |
4373 | elif desc['fs_type']: | |
4374 | try: | |
4375 | tpath = tempfile.mkdtemp(prefix='mnt.', dir=STATEDIR + '/tmp') | |
4376 | args = ['mount', '-t', 'ext4', dev, tpath] | |
4377 | LOG.debug('Mounting lockbox ' + str(" ".join(args))) | |
4378 | command_check_call(args) | |
4379 | magic = get_oneliner(tpath, 'magic') | |
4380 | if magic is not None: | |
4381 | desc['magic'] = magic | |
4382 | desc['state'] = 'prepared' | |
4383 | desc['osd_uuid'] = get_oneliner(tpath, 'osd-uuid') | |
4384 | unmount(tpath) | |
4385 | except subprocess.CalledProcessError: | |
4386 | pass | |
4387 | if desc.get('osd_uuid') in uuid_map: | |
4388 | desc['lockbox_for'] = uuid_map[desc['osd_uuid']] | |
4389 | ||
4390 | ||
4391 | def list_format_lockbox_plain(dev): | |
4392 | desc = [] | |
4393 | if dev.get('lockbox_for'): | |
4394 | desc.append('for ' + dev['lockbox_for']) | |
4395 | elif dev.get('osd_uuid'): | |
4396 | desc.append('for osd ' + dev['osd_uuid']) | |
4397 | return desc | |
4398 | ||
4399 | ||
4400 | def list_format_more_osd_info_plain(dev): | |
4401 | desc = [] | |
4402 | if dev.get('ceph_fsid'): | |
4403 | if dev.get('cluster'): | |
4404 | desc.append('cluster ' + dev['cluster']) | |
4405 | else: | |
4406 | desc.append('unknown cluster ' + dev['ceph_fsid']) | |
4407 | if dev.get('whoami'): | |
4408 | desc.append('osd.%s' % dev['whoami']) | |
4409 | for name in Space.NAMES: | |
4410 | if dev.get(name + '_dev'): | |
4411 | desc.append(name + ' %s' % dev[name + '_dev']) | |
4412 | return desc | |
4413 | ||
4414 | ||
4415 | def list_format_dev_plain(dev, prefix=''): | |
4416 | desc = [] | |
4417 | if dev['ptype'] == PTYPE['regular']['osd']['ready']: | |
4418 | desc = (['ceph data', dev['state']] + | |
4419 | list_format_more_osd_info_plain(dev)) | |
4420 | elif dev['ptype'] in (PTYPE['regular']['lockbox']['ready'], | |
4421 | PTYPE['mpath']['lockbox']['ready']): | |
4422 | desc = (['ceph lockbox', dev['state']] + | |
4423 | list_format_lockbox_plain(dev)) | |
4424 | elif Ptype.is_dmcrypt(dev['ptype'], 'osd'): | |
4425 | dmcrypt = dev['dmcrypt'] | |
4426 | if not dmcrypt['holders']: | |
4427 | desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'], | |
4428 | 'not currently mapped'] | |
4429 | elif len(dmcrypt['holders']) == 1: | |
4430 | holder = get_dev_path(dmcrypt['holders'][0]) | |
4431 | desc = ['ceph data (dmcrypt %s %s)' % | |
4432 | (dmcrypt['type'], holder)] | |
4433 | desc += list_format_more_osd_info_plain(dev) | |
4434 | else: | |
4435 | desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'], | |
4436 | 'holders: ' + ','.join(dmcrypt['holders'])] | |
4437 | elif Ptype.is_regular_space(dev['ptype']): | |
4438 | name = Ptype.space_ptype_to_name(dev['ptype']) | |
4439 | desc.append('ceph ' + name) | |
4440 | if dev.get(name + '_for'): | |
4441 | desc.append('for %s' % dev[name + '_for']) | |
4442 | elif Ptype.is_dmcrypt_space(dev['ptype']): | |
4443 | name = Ptype.space_ptype_to_name(dev['ptype']) | |
4444 | dmcrypt = dev['dmcrypt'] | |
4445 | if dmcrypt['holders'] and len(dmcrypt['holders']) == 1: | |
4446 | holder = get_dev_path(dmcrypt['holders'][0]) | |
4447 | desc = ['ceph ' + name + ' (dmcrypt %s %s)' % | |
4448 | (dmcrypt['type'], holder)] | |
4449 | else: | |
4450 | desc = ['ceph ' + name + ' (dmcrypt %s)' % dmcrypt['type']] | |
4451 | if dev.get(name + '_for'): | |
4452 | desc.append('for %s' % dev[name + '_for']) | |
4453 | else: | |
4454 | desc.append(dev['type']) | |
4455 | if dev.get('fs_type'): | |
4456 | desc.append(dev['fs_type']) | |
4457 | elif dev.get('ptype'): | |
4458 | desc.append(dev['ptype']) | |
4459 | if dev.get('mount'): | |
4460 | desc.append('mounted on %s' % dev['mount']) | |
4461 | return '%s%s %s' % (prefix, dev['path'], ', '.join(desc)) | |
4462 | ||
4463 | ||
4464 | def list_format_plain(devices): | |
4465 | lines = [] | |
4466 | for device in devices: | |
4467 | if device.get('partitions'): | |
4468 | lines.append('%s :' % device['path']) | |
4469 | for p in sorted(device['partitions'], key=lambda x: x['path']): | |
4470 | lines.append(list_format_dev_plain(dev=p, | |
4471 | prefix=' ')) | |
4472 | else: | |
4473 | lines.append(list_format_dev_plain(dev=device, | |
4474 | prefix='')) | |
4475 | return "\n".join(lines) | |
4476 | ||
4477 | ||
4478 | def list_dev(dev, uuid_map, space_map): | |
4479 | info = { | |
4480 | 'path': dev, | |
4481 | 'dmcrypt': {}, | |
4482 | } | |
4483 | ||
4484 | info['is_partition'] = is_partition(dev) | |
4485 | if info['is_partition']: | |
4486 | ptype = get_partition_type(dev) | |
4487 | info['uuid'] = get_partition_uuid(dev) | |
4488 | else: | |
4489 | ptype = 'unknown' | |
4490 | info['ptype'] = ptype | |
4491 | LOG.info("list_dev(dev = " + dev + ", ptype = " + str(ptype) + ")") | |
4492 | if ptype in (PTYPE['regular']['osd']['ready'], | |
4493 | PTYPE['mpath']['osd']['ready']): | |
4494 | info['type'] = 'data' | |
4495 | if ptype == PTYPE['mpath']['osd']['ready']: | |
4496 | info['multipath'] = True | |
4497 | list_dev_osd(dev, uuid_map, info) | |
4498 | elif ptype in (PTYPE['regular']['lockbox']['ready'], | |
4499 | PTYPE['mpath']['lockbox']['ready']): | |
4500 | info['type'] = 'lockbox' | |
4501 | if ptype == PTYPE['mpath']['osd']['ready']: | |
4502 | info['multipath'] = True | |
4503 | list_dev_lockbox(dev, uuid_map, info) | |
4504 | elif ptype == PTYPE['plain']['osd']['ready']: | |
4505 | holders = is_held(dev) | |
4506 | info['type'] = 'data' | |
4507 | info['dmcrypt']['holders'] = holders | |
4508 | info['dmcrypt']['type'] = 'plain' | |
4509 | if len(holders) == 1: | |
4510 | list_dev_osd(get_dev_path(holders[0]), uuid_map, info) | |
4511 | elif ptype == PTYPE['luks']['osd']['ready']: | |
4512 | holders = is_held(dev) | |
4513 | info['type'] = 'data' | |
4514 | info['dmcrypt']['holders'] = holders | |
4515 | info['dmcrypt']['type'] = 'LUKS' | |
4516 | if len(holders) == 1: | |
4517 | list_dev_osd(get_dev_path(holders[0]), uuid_map, info) | |
4518 | elif Ptype.is_regular_space(ptype) or Ptype.is_mpath_space(ptype): | |
4519 | name = Ptype.space_ptype_to_name(ptype) | |
4520 | info['type'] = name | |
4521 | if ptype == PTYPE['mpath'][name]['ready']: | |
4522 | info['multipath'] = True | |
4523 | if info.get('uuid') in space_map: | |
4524 | info[name + '_for'] = space_map[info['uuid']] | |
4525 | elif Ptype.is_plain_space(ptype): | |
4526 | name = Ptype.space_ptype_to_name(ptype) | |
4527 | holders = is_held(dev) | |
4528 | info['type'] = name | |
4529 | info['dmcrypt']['type'] = 'plain' | |
4530 | info['dmcrypt']['holders'] = holders | |
4531 | if info.get('uuid') in space_map: | |
4532 | info[name + '_for'] = space_map[info['uuid']] | |
4533 | elif Ptype.is_luks_space(ptype): | |
4534 | name = Ptype.space_ptype_to_name(ptype) | |
4535 | holders = is_held(dev) | |
4536 | info['type'] = name | |
4537 | info['dmcrypt']['type'] = 'LUKS' | |
4538 | info['dmcrypt']['holders'] = holders | |
4539 | if info.get('uuid') in space_map: | |
4540 | info[name + '_for'] = space_map[info['uuid']] | |
4541 | else: | |
4542 | path = is_mounted(dev) | |
4543 | fs_type = get_dev_fs(dev) | |
4544 | if is_swap(dev): | |
4545 | info['type'] = 'swap' | |
4546 | else: | |
4547 | info['type'] = 'other' | |
4548 | if fs_type: | |
4549 | info['fs_type'] = fs_type | |
4550 | if path: | |
4551 | info['mount'] = path | |
4552 | ||
4553 | return info | |
4554 | ||
4555 | ||
4556 | def list_devices(): | |
4557 | partmap = list_all_partitions() | |
4558 | ||
4559 | uuid_map = {} | |
4560 | space_map = {} | |
4561 | for base, parts in sorted(partmap.items()): | |
4562 | for p in parts: | |
4563 | dev = get_dev_path(p) | |
4564 | part_uuid = get_partition_uuid(dev) | |
4565 | if part_uuid: | |
4566 | uuid_map[part_uuid] = dev | |
4567 | ptype = get_partition_type(dev) | |
4568 | LOG.debug("main_list: " + dev + | |
4569 | " ptype = " + str(ptype) + | |
4570 | " uuid = " + str(part_uuid)) | |
4571 | if ptype in Ptype.get_ready_by_name('osd'): | |
4572 | if Ptype.is_dmcrypt(ptype, 'osd'): | |
4573 | holders = is_held(dev) | |
4574 | if len(holders) != 1: | |
4575 | continue | |
4576 | dev_to_mount = get_dev_path(holders[0]) | |
4577 | else: | |
4578 | dev_to_mount = dev | |
4579 | ||
4580 | fs_type = get_dev_fs(dev_to_mount) | |
4581 | if fs_type is not None: | |
4582 | mount_options = get_mount_options(cluster='ceph', | |
4583 | fs_type=fs_type) | |
4584 | try: | |
4585 | tpath = mount(dev=dev_to_mount, | |
4586 | fstype=fs_type, options=mount_options) | |
4587 | try: | |
4588 | for name in Space.NAMES: | |
4589 | space_uuid = get_oneliner(tpath, | |
4590 | name + '_uuid') | |
4591 | if space_uuid: | |
4592 | space_map[space_uuid.lower()] = dev | |
4593 | finally: | |
4594 | unmount(tpath) | |
4595 | except MountError: | |
4596 | pass | |
4597 | ||
4598 | LOG.debug("main_list: " + str(partmap) + ", uuid_map = " + | |
4599 | str(uuid_map) + ", space_map = " + str(space_map)) | |
4600 | ||
4601 | devices = [] | |
4602 | for base, parts in sorted(partmap.items()): | |
4603 | if parts: | |
4604 | disk = {'path': get_dev_path(base)} | |
4605 | partitions = [] | |
4606 | for p in sorted(parts): | |
4607 | partitions.append(list_dev(get_dev_path(p), | |
4608 | uuid_map, | |
4609 | space_map)) | |
4610 | disk['partitions'] = partitions | |
4611 | devices.append(disk) | |
4612 | else: | |
4613 | device = list_dev(get_dev_path(base), uuid_map, space_map) | |
4614 | device['path'] = get_dev_path(base) | |
4615 | devices.append(device) | |
4616 | LOG.debug("list_devices: " + str(devices)) | |
4617 | return devices | |
4618 | ||
4619 | ||
4620 | def list_zfs(): | |
4621 | try: | |
4622 | out, err, ret = command( | |
4623 | [ | |
4624 | 'zfs', | |
4625 | 'list', | |
4626 | '-o', 'name,mountpoint' | |
4627 | ] | |
4628 | ) | |
4629 | except subprocess.CalledProcessError as e: | |
4630 | LOG.info('zfs list -o name,mountpoint ' | |
4631 | 'fails.\n (Error: %s)' % e) | |
4632 | raise | |
4633 | lines = out.splitlines() | |
4634 | for line in lines[1:]: | |
4635 | vdevline = line.split() | |
4636 | if os.path.exists(os.path.join(vdevline[1], 'active')): | |
4637 | elems = os.path.split(vdevline[1]) | |
4638 | print(vdevline[0], "ceph data, active, cluster ceph,", elems[1], | |
4639 | "mounted on:", vdevline[1]) | |
4640 | else: | |
4641 | print(vdevline[0] + " other, zfs, mounted on: " + vdevline[1]) | |
4642 | ||
4643 | ||
4644 | def main_list(args): | |
4645 | with activate_lock: | |
4646 | if FREEBSD: | |
4647 | main_list_freebsd(args) | |
4648 | else: | |
4649 | main_list_protected(args) | |
4650 | ||
4651 | ||
4652 | def main_list_protected(args): | |
4653 | devices = list_devices() | |
4654 | if args.path: | |
4655 | paths = [] | |
4656 | for path in args.path: | |
4657 | if os.path.exists(path): | |
4658 | paths.append(os.path.realpath(path)) | |
4659 | else: | |
4660 | paths.append(path) | |
4661 | selected_devices = [] | |
4662 | for device in devices: | |
4663 | for path in paths: | |
4664 | if re.search(path + '$', device['path']): | |
4665 | selected_devices.append(device) | |
4666 | else: | |
4667 | selected_devices = devices | |
4668 | if args.format == 'json': | |
4669 | print(json.dumps(selected_devices)) | |
4670 | else: | |
4671 | output = list_format_plain(selected_devices) | |
4672 | if output: | |
4673 | print(output) | |
4674 | ||
4675 | ||
4676 | def main_list_freebsd(args): | |
4677 | # Currently accomodate only ZFS Filestore partitions | |
4678 | # return a list of VDEVs and mountpoints | |
4679 | # > zfs list | |
4680 | # NAME USED AVAIL REFER MOUNTPOINT | |
4681 | # osd0 1.01G 1.32T 1.01G /var/lib/ceph/osd/osd.0 | |
4682 | # osd1 1.01G 1.32T 1.01G /var/lib/ceph/osd/osd.1 | |
4683 | list_zfs() | |
4684 | ||
4685 | ||
4686 | ########################### | |
4687 | # | |
4688 | # Mark devices that we want to suppress activates on with a | |
4689 | # file like | |
4690 | # | |
4691 | # /var/lib/ceph/tmp/suppress-activate.sdb | |
4692 | # | |
4693 | # where the last bit is the sanitized device name (/dev/X without the | |
4694 | # /dev/ prefix) and the is_suppress() check matches a prefix. That | |
4695 | # means suppressing sdb will stop activate on sdb1, sdb2, etc. | |
4696 | # | |
4697 | ||
4698 | def is_suppressed(path): | |
4699 | disk = os.path.realpath(path) | |
4700 | try: | |
4701 | if (not disk.startswith('/dev/') or | |
224ce89b | 4702 | not ldev_is_diskdevice(disk)): |
7c673cae FG |
4703 | return False |
4704 | base = get_dev_name(disk) | |
4705 | while len(base): | |
4706 | if os.path.exists(SUPPRESS_PREFIX + base): # noqa | |
4707 | return True | |
4708 | base = base[:-1] | |
4709 | except: | |
4710 | return False | |
4711 | ||
4712 | ||
4713 | def set_suppress(path): | |
4714 | disk = os.path.realpath(path) | |
4715 | if not os.path.exists(disk): | |
4716 | raise Error('does not exist', path) | |
c07f9fc5 | 4717 | if not ldev_is_diskdevice(path): |
7c673cae FG |
4718 | raise Error('not a block device', path) |
4719 | base = get_dev_name(disk) | |
4720 | ||
4721 | with open(SUPPRESS_PREFIX + base, 'w') as f: # noqa | |
4722 | pass | |
4723 | LOG.info('set suppress flag on %s', base) | |
4724 | ||
4725 | ||
4726 | def unset_suppress(path): | |
4727 | disk = os.path.realpath(path) | |
4728 | if not os.path.exists(disk): | |
4729 | raise Error('does not exist', path) | |
224ce89b | 4730 | if not ldev_is_diskdevice(path): |
7c673cae FG |
4731 | raise Error('not a block device', path) |
4732 | assert disk.startswith('/dev/') | |
4733 | base = get_dev_name(disk) | |
4734 | ||
4735 | fn = SUPPRESS_PREFIX + base # noqa | |
4736 | if not os.path.exists(fn): | |
4737 | raise Error('not marked as suppressed', path) | |
4738 | ||
4739 | try: | |
4740 | os.unlink(fn) | |
4741 | LOG.info('unset suppress flag on %s', base) | |
4742 | except OSError as e: | |
4743 | raise Error('failed to unsuppress', e) | |
4744 | ||
4745 | ||
4746 | def main_suppress(args): | |
4747 | set_suppress(args.path) | |
4748 | ||
4749 | ||
4750 | def main_unsuppress(args): | |
4751 | unset_suppress(args.path) | |
4752 | ||
4753 | ||
4754 | def main_zap(args): | |
4755 | for dev in args.dev: | |
4756 | zap(dev) | |
4757 | ||
4758 | ||
4759 | def main_trigger(args): | |
4760 | LOG.debug("main_trigger: " + str(args)) | |
4761 | if is_systemd() and not args.sync: | |
4762 | # http://www.freedesktop.org/software/systemd/man/systemd-escape.html | |
4763 | escaped_dev = args.dev[1:].replace('-', '\\x2d') | |
4764 | service = 'ceph-disk@{dev}.service'.format(dev=escaped_dev) | |
4765 | LOG.info('systemd detected, triggering %s' % service) | |
4766 | command( | |
4767 | [ | |
4768 | 'systemctl', | |
4769 | '--no-block', | |
4770 | 'restart', | |
4771 | service, | |
4772 | ] | |
4773 | ) | |
4774 | return | |
4775 | if is_upstart() and not args.sync: | |
4776 | LOG.info('upstart detected, triggering ceph-disk task') | |
4777 | command( | |
4778 | [ | |
4779 | 'initctl', | |
4780 | 'emit', | |
4781 | 'ceph-disk', | |
4782 | 'dev={dev}'.format(dev=args.dev), | |
4783 | 'pid={pid}'.format(pid=os.getpid()), | |
4784 | ] | |
4785 | ) | |
4786 | return | |
4787 | ||
4788 | if get_ceph_user() == 'ceph': | |
4789 | command_check_call(['chown', 'ceph:ceph', args.dev]) | |
4790 | parttype = get_partition_type(args.dev) | |
4791 | partid = get_partition_uuid(args.dev) | |
4792 | ||
4793 | LOG.info('trigger {dev} parttype {parttype} uuid {partid}'.format( | |
4794 | dev=args.dev, | |
4795 | parttype=parttype, | |
4796 | partid=partid, | |
4797 | )) | |
4798 | ||
4799 | ceph_disk = ['ceph-disk'] | |
4800 | if args.verbose: | |
4801 | ceph_disk.append('--verbose') | |
4802 | ||
4803 | if parttype in (PTYPE['regular']['osd']['ready'], | |
4804 | PTYPE['mpath']['osd']['ready']): | |
4805 | out, err, ret = command( | |
4806 | ceph_disk + | |
4807 | [ | |
4808 | 'activate', | |
4809 | args.dev, | |
4810 | ] | |
4811 | ) | |
4812 | ||
4813 | elif parttype in (PTYPE['plain']['osd']['ready'], | |
4814 | PTYPE['luks']['osd']['ready']): | |
4815 | out, err, ret = command( | |
4816 | ceph_disk + | |
4817 | [ | |
4818 | 'activate', | |
4819 | '--dmcrypt', | |
4820 | args.dev, | |
4821 | ] | |
4822 | ) | |
4823 | ||
4824 | elif parttype in (PTYPE['regular']['journal']['ready'], | |
4825 | PTYPE['mpath']['journal']['ready']): | |
4826 | out, err, ret = command( | |
4827 | ceph_disk + | |
4828 | [ | |
4829 | 'activate-journal', | |
4830 | args.dev, | |
4831 | ] | |
4832 | ) | |
4833 | ||
4834 | elif parttype in (PTYPE['plain']['journal']['ready'], | |
4835 | PTYPE['luks']['journal']['ready']): | |
4836 | out, err, ret = command( | |
4837 | ceph_disk + | |
4838 | [ | |
4839 | 'activate-journal', | |
4840 | '--dmcrypt', | |
4841 | args.dev, | |
4842 | ] | |
4843 | ) | |
4844 | ||
4845 | elif parttype in (PTYPE['regular']['block']['ready'], | |
4846 | PTYPE['regular']['block.db']['ready'], | |
4847 | PTYPE['regular']['block.wal']['ready'], | |
4848 | PTYPE['mpath']['block']['ready'], | |
4849 | PTYPE['mpath']['block.db']['ready'], | |
4850 | PTYPE['mpath']['block.wal']['ready']): | |
4851 | out, err, ret = command( | |
4852 | ceph_disk + | |
4853 | [ | |
4854 | 'activate-block', | |
4855 | args.dev, | |
4856 | ] | |
4857 | ) | |
4858 | ||
4859 | elif parttype in (PTYPE['plain']['block']['ready'], | |
4860 | PTYPE['plain']['block.db']['ready'], | |
4861 | PTYPE['plain']['block.wal']['ready'], | |
4862 | PTYPE['luks']['block']['ready'], | |
4863 | PTYPE['luks']['block.db']['ready'], | |
4864 | PTYPE['luks']['block.wal']['ready']): | |
4865 | out, err, ret = command( | |
4866 | ceph_disk + | |
4867 | [ | |
4868 | 'activate-block', | |
4869 | '--dmcrypt', | |
4870 | args.dev, | |
4871 | ] | |
4872 | ) | |
4873 | ||
4874 | elif parttype in (PTYPE['regular']['lockbox']['ready'], | |
4875 | PTYPE['mpath']['lockbox']['ready']): | |
4876 | out, err, ret = command( | |
4877 | ceph_disk + | |
4878 | [ | |
4879 | 'activate-lockbox', | |
4880 | args.dev, | |
4881 | ] | |
4882 | ) | |
4883 | ||
4884 | else: | |
4885 | raise Error('unrecognized partition type %s' % parttype) | |
4886 | ||
4887 | if ret != 0: | |
4888 | LOG.info(out) | |
4889 | LOG.error(err) | |
4890 | raise Error('return code ' + str(ret)) | |
4891 | else: | |
4892 | LOG.debug(out) | |
4893 | LOG.debug(err) | |
4894 | ||
4895 | ||
4896 | def main_fix(args): | |
4897 | # A hash table containing 'path': ('uid', 'gid', blocking, recursive) | |
4898 | fix_table = [ | |
c07f9fc5 FG |
4899 | ('/usr/bin/ceph-mon', 'root', ROOTGROUP, True, False), |
4900 | ('/usr/bin/ceph-mds', 'root', ROOTGROUP, True, False), | |
4901 | ('/usr/bin/ceph-osd', 'root', ROOTGROUP, True, False), | |
4902 | ('/usr/bin/radosgw', 'root', ROOTGROUP, True, False), | |
4903 | ('/etc/ceph', 'root', ROOTGROUP, True, True), | |
7c673cae FG |
4904 | ('/var/run/ceph', 'ceph', 'ceph', True, True), |
4905 | ('/var/log/ceph', 'ceph', 'ceph', True, True), | |
31f18b77 | 4906 | ('/var/log/radosgw', 'ceph', 'ceph', True, True), |
7c673cae FG |
4907 | ('/var/lib/ceph', 'ceph', 'ceph', True, False), |
4908 | ] | |
4909 | ||
4910 | # Relabel/chown all files under /var/lib/ceph/ recursively (except for osd) | |
4911 | for directory in glob.glob('/var/lib/ceph/*'): | |
4912 | if directory == '/var/lib/ceph/osd': | |
4913 | fix_table.append((directory, 'ceph', 'ceph', True, False)) | |
4914 | else: | |
4915 | fix_table.append((directory, 'ceph', 'ceph', True, True)) | |
4916 | ||
4917 | # Relabel/chown the osds recursively and in parallel | |
4918 | for directory in glob.glob('/var/lib/ceph/osd/*'): | |
4919 | fix_table.append((directory, 'ceph', 'ceph', False, True)) | |
4920 | ||
4921 | LOG.debug("fix_table: " + str(fix_table)) | |
4922 | ||
4923 | # The lists of background processes | |
4924 | all_processes = [] | |
4925 | permissions_processes = [] | |
4926 | selinux_processes = [] | |
4927 | ||
4928 | # Preliminary checks | |
4929 | if args.selinux or args.all: | |
4930 | out, err, ret = command(['selinuxenabled']) | |
4931 | if ret: | |
4932 | LOG.error('SELinux is not enabled, please enable it, first.') | |
4933 | raise Error('no SELinux') | |
4934 | ||
4935 | for daemon in ['ceph-mon', 'ceph-osd', 'ceph-mds', 'radosgw', 'ceph-mgr']: | |
4936 | out, err, ret = command(['pgrep', daemon]) | |
4937 | if ret == 0: | |
4938 | LOG.error(daemon + ' is running, please stop it, first') | |
4939 | raise Error(daemon + ' running') | |
4940 | ||
4941 | # Relabel the basic system data without the ceph files | |
4942 | if args.system or args.all: | |
4943 | c = ['restorecon', '-R', '/'] | |
4944 | for directory, _, _, _, _ in fix_table: | |
4945 | # Skip /var/lib/ceph subdirectories | |
4946 | if directory.startswith('/var/lib/ceph/'): | |
4947 | continue | |
4948 | c.append('-e') | |
4949 | c.append(directory) | |
4950 | ||
4951 | out, err, ret = command(c) | |
4952 | ||
4953 | if ret: | |
4954 | LOG.error("Failed to restore labels of the underlying system") | |
4955 | LOG.error(err) | |
4956 | raise Error("basic restore failed") | |
4957 | ||
4958 | # Use find to relabel + chown ~simultaenously | |
4959 | if args.all: | |
4960 | for directory, uid, gid, blocking, recursive in fix_table: | |
31f18b77 FG |
4961 | # Skip directories/files that are not installed |
4962 | if not os.access(directory, os.F_OK): | |
4963 | continue | |
4964 | ||
7c673cae FG |
4965 | c = [ |
4966 | 'find', | |
4967 | directory, | |
4968 | '-exec', | |
4969 | 'chown', | |
4970 | ':'.join((uid, gid)), | |
4971 | '{}', | |
4972 | '+', | |
4973 | '-exec', | |
4974 | 'restorecon', | |
4975 | '{}', | |
4976 | '+', | |
4977 | ] | |
4978 | ||
4979 | # Just pass -maxdepth 0 for non-recursive calls | |
4980 | if not recursive: | |
4981 | c += ['-maxdepth', '0'] | |
4982 | ||
4983 | if blocking: | |
4984 | out, err, ret = command(c) | |
4985 | ||
4986 | if ret: | |
4987 | LOG.error("Failed to fix " + directory) | |
4988 | LOG.error(err) | |
4989 | raise Error(directory + " fix failed") | |
4990 | else: | |
4991 | all_processes.append(command_init(c)) | |
4992 | ||
4993 | LOG.debug("all_processes: " + str(all_processes)) | |
4994 | for process in all_processes: | |
4995 | out, err, ret = command_wait(process) | |
4996 | if ret: | |
4997 | LOG.error("A background find process failed") | |
4998 | LOG.error(err) | |
4999 | raise Error("background failed") | |
5000 | ||
5001 | # Fix permissions | |
5002 | if args.permissions: | |
5003 | for directory, uid, gid, blocking, recursive in fix_table: | |
31f18b77 FG |
5004 | # Skip directories/files that are not installed |
5005 | if not os.access(directory, os.F_OK): | |
5006 | continue | |
5007 | ||
7c673cae FG |
5008 | if recursive: |
5009 | c = [ | |
5010 | 'chown', | |
5011 | '-R', | |
5012 | ':'.join((uid, gid)), | |
5013 | directory | |
5014 | ] | |
5015 | else: | |
5016 | c = [ | |
5017 | 'chown', | |
5018 | ':'.join((uid, gid)), | |
5019 | directory | |
5020 | ] | |
5021 | ||
5022 | if blocking: | |
5023 | out, err, ret = command(c) | |
5024 | ||
5025 | if ret: | |
5026 | LOG.error("Failed to chown " + directory) | |
5027 | LOG.error(err) | |
5028 | raise Error(directory + " chown failed") | |
5029 | else: | |
5030 | permissions_processes.append(command_init(c)) | |
5031 | ||
5032 | LOG.debug("permissions_processes: " + str(permissions_processes)) | |
5033 | for process in permissions_processes: | |
5034 | out, err, ret = command_wait(process) | |
5035 | if ret: | |
5036 | LOG.error("A background permissions process failed") | |
5037 | LOG.error(err) | |
5038 | raise Error("background failed") | |
5039 | ||
5040 | # Fix SELinux labels | |
5041 | if args.selinux: | |
5042 | for directory, uid, gid, blocking, recursive in fix_table: | |
31f18b77 FG |
5043 | # Skip directories/files that are not installed |
5044 | if not os.access(directory, os.F_OK): | |
5045 | continue | |
5046 | ||
7c673cae FG |
5047 | if recursive: |
5048 | c = [ | |
5049 | 'restorecon', | |
5050 | '-R', | |
5051 | directory | |
5052 | ] | |
5053 | else: | |
5054 | c = [ | |
5055 | 'restorecon', | |
5056 | directory | |
5057 | ] | |
5058 | ||
5059 | if blocking: | |
5060 | out, err, ret = command(c) | |
5061 | ||
5062 | if ret: | |
5063 | LOG.error("Failed to restore labels for " + directory) | |
5064 | LOG.error(err) | |
5065 | raise Error(directory + " relabel failed") | |
5066 | else: | |
5067 | selinux_processes.append(command_init(c)) | |
5068 | ||
5069 | LOG.debug("selinux_processes: " + str(selinux_processes)) | |
5070 | for process in selinux_processes: | |
5071 | out, err, ret = command_wait(process) | |
5072 | if ret: | |
5073 | LOG.error("A background selinux process failed") | |
5074 | LOG.error(err) | |
5075 | raise Error("background failed") | |
5076 | ||
5077 | LOG.info( | |
5078 | "The ceph files has been fixed, please reboot " | |
5079 | "the system for the changes to take effect." | |
5080 | ) | |
5081 | ||
5082 | ||
5083 | def setup_statedir(dir): | |
5084 | # XXX The following use of globals makes linting | |
5085 | # really hard. Global state in Python is iffy and | |
5086 | # should be avoided. | |
5087 | global STATEDIR | |
5088 | STATEDIR = dir | |
5089 | ||
5090 | if not os.path.exists(STATEDIR): | |
5091 | os.mkdir(STATEDIR) | |
5092 | if not os.path.exists(STATEDIR + "/tmp"): | |
5093 | os.mkdir(STATEDIR + "/tmp") | |
5094 | ||
5095 | global prepare_lock | |
5096 | prepare_lock = FileLock(STATEDIR + '/tmp/ceph-disk.prepare.lock') | |
5097 | ||
5098 | global activate_lock | |
5099 | activate_lock = FileLock(STATEDIR + '/tmp/ceph-disk.activate.lock') | |
5100 | ||
5101 | global SUPPRESS_PREFIX | |
5102 | SUPPRESS_PREFIX = STATEDIR + '/tmp/suppress-activate.' | |
5103 | ||
5104 | ||
5105 | def setup_sysconfdir(dir): | |
5106 | global SYSCONFDIR | |
5107 | SYSCONFDIR = dir | |
5108 | ||
5109 | ||
5110 | def parse_args(argv): | |
5111 | parser = argparse.ArgumentParser( | |
5112 | 'ceph-disk', | |
5113 | ) | |
5114 | parser.add_argument( | |
5115 | '-v', '--verbose', | |
5116 | action='store_true', default=None, | |
5117 | help='be more verbose', | |
5118 | ) | |
5119 | parser.add_argument( | |
5120 | '--log-stdout', | |
5121 | action='store_true', default=None, | |
5122 | help='log to stdout', | |
5123 | ) | |
5124 | parser.add_argument( | |
5125 | '--prepend-to-path', | |
5126 | metavar='PATH', | |
5127 | default='/usr/bin', | |
5128 | help=('prepend PATH to $PATH for backward compatibility ' | |
5129 | '(default /usr/bin)'), | |
5130 | ) | |
5131 | parser.add_argument( | |
5132 | '--statedir', | |
5133 | metavar='PATH', | |
5134 | default='/var/lib/ceph', | |
5135 | help=('directory in which ceph state is preserved ' | |
5136 | '(default /var/lib/ceph)'), | |
5137 | ) | |
5138 | parser.add_argument( | |
5139 | '--sysconfdir', | |
5140 | metavar='PATH', | |
5141 | default='/etc/ceph', | |
5142 | help=('directory in which ceph configuration files are found ' | |
5143 | '(default /etc/ceph)'), | |
5144 | ) | |
5145 | parser.add_argument( | |
5146 | '--setuser', | |
5147 | metavar='USER', | |
5148 | default=None, | |
5149 | help='use the given user for subprocesses, rather than ceph or root' | |
5150 | ) | |
5151 | parser.add_argument( | |
5152 | '--setgroup', | |
5153 | metavar='GROUP', | |
5154 | default=None, | |
5155 | help='use the given group for subprocesses, rather than ceph or root' | |
5156 | ) | |
5157 | parser.set_defaults( | |
5158 | # we want to hold on to this, for later | |
5159 | prog=parser.prog, | |
5160 | ) | |
5161 | ||
5162 | subparsers = parser.add_subparsers( | |
5163 | title='subcommands', | |
5164 | description='valid subcommands', | |
5165 | help='sub-command help', | |
5166 | ) | |
5167 | ||
5168 | Prepare.set_subparser(subparsers) | |
5169 | make_activate_parser(subparsers) | |
5170 | make_activate_lockbox_parser(subparsers) | |
5171 | make_activate_block_parser(subparsers) | |
5172 | make_activate_journal_parser(subparsers) | |
5173 | make_activate_all_parser(subparsers) | |
5174 | make_list_parser(subparsers) | |
5175 | make_suppress_parser(subparsers) | |
5176 | make_deactivate_parser(subparsers) | |
5177 | make_destroy_parser(subparsers) | |
5178 | make_zap_parser(subparsers) | |
5179 | make_trigger_parser(subparsers) | |
5180 | make_fix_parser(subparsers) | |
5181 | ||
5182 | args = parser.parse_args(argv) | |
5183 | return args | |
5184 | ||
5185 | ||
5186 | def make_fix_parser(subparsers): | |
5187 | fix_parser = subparsers.add_parser( | |
5188 | 'fix', | |
5189 | formatter_class=argparse.RawDescriptionHelpFormatter, | |
5190 | description=textwrap.fill(textwrap.dedent("""\ | |
5191 | """)), | |
5192 | help='fix SELinux labels and/or file permissions') | |
5193 | ||
5194 | fix_parser.add_argument( | |
5195 | '--system', | |
5196 | action='store_true', | |
5197 | default=False, | |
5198 | help='fix SELinux labels for the non-ceph system data' | |
5199 | ) | |
5200 | fix_parser.add_argument( | |
5201 | '--selinux', | |
5202 | action='store_true', | |
5203 | default=False, | |
5204 | help='fix SELinux labels for ceph data' | |
5205 | ) | |
5206 | fix_parser.add_argument( | |
5207 | '--permissions', | |
5208 | action='store_true', | |
5209 | default=False, | |
5210 | help='fix file permissions for ceph data' | |
5211 | ) | |
5212 | fix_parser.add_argument( | |
5213 | '--all', | |
5214 | action='store_true', | |
5215 | default=False, | |
5216 | help='perform all the fix-related operations' | |
5217 | ) | |
5218 | fix_parser.set_defaults( | |
5219 | func=main_fix, | |
5220 | ) | |
5221 | return fix_parser | |
5222 | ||
5223 | ||
5224 | def make_trigger_parser(subparsers): | |
5225 | trigger_parser = subparsers.add_parser( | |
5226 | 'trigger', | |
5227 | formatter_class=argparse.RawDescriptionHelpFormatter, | |
5228 | description=textwrap.fill(textwrap.dedent("""\ | |
5229 | The partition given in argument is activated. The type of the | |
5230 | partition (data, lockbox, journal etc.) is detected by its | |
5231 | type. If the init system is upstart or systemd, the activation is | |
5232 | delegated to it and runs asynchronously, which | |
5233 | helps reduce the execution time of udev actions. | |
5234 | """)), | |
5235 | help='activate any device (called by udev)') | |
5236 | trigger_parser.add_argument( | |
5237 | 'dev', | |
5238 | help=('device'), | |
5239 | ) | |
5240 | trigger_parser.add_argument( | |
5241 | '--cluster', | |
5242 | metavar='NAME', | |
5243 | default='ceph', | |
5244 | help='cluster name to assign this disk to', | |
5245 | ) | |
5246 | trigger_parser.add_argument( | |
5247 | '--dmcrypt', | |
5248 | action='store_true', default=None, | |
5249 | help='map devices with dm-crypt', | |
5250 | ) | |
5251 | trigger_parser.add_argument( | |
5252 | '--dmcrypt-key-dir', | |
5253 | metavar='KEYDIR', | |
5254 | default='/etc/ceph/dmcrypt-keys', | |
5255 | help='directory where dm-crypt keys are stored', | |
5256 | ) | |
5257 | trigger_parser.add_argument( | |
5258 | '--sync', | |
5259 | action='store_true', default=None, | |
5260 | help='do operation synchronously; do not trigger systemd', | |
5261 | ) | |
5262 | trigger_parser.set_defaults( | |
5263 | func=main_trigger, | |
5264 | ) | |
5265 | return trigger_parser | |
5266 | ||
5267 | ||
5268 | def make_activate_parser(subparsers): | |
5269 | activate_parser = subparsers.add_parser( | |
5270 | 'activate', | |
5271 | formatter_class=argparse.RawDescriptionHelpFormatter, | |
5272 | description=textwrap.fill(textwrap.dedent("""\ | |
5273 | Activate the OSD found at PATH (can be a directory | |
5274 | or a device partition, possibly encrypted). When | |
5275 | activated for the first time, a unique OSD id is obtained | |
5276 | from the cluster. If PATH is a directory, a symbolic | |
5277 | link is added in {statedir}/osd/ceph-$id. If PATH is | |
5278 | a partition, it is mounted on {statedir}/osd/ceph-$id. | |
5279 | Finally, the OSD daemon is run. | |
5280 | ||
5281 | If the OSD depends on auxiliary partitions (journal, block, ...) | |
5282 | they need to be available otherwise activation will fail. It | |
5283 | may happen if a journal is encrypted and cryptsetup was not | |
5284 | run yet. | |
5285 | """.format(statedir=STATEDIR))), | |
5286 | help='Activate a Ceph OSD') | |
5287 | activate_parser.add_argument( | |
5288 | '--mount', | |
5289 | action='store_true', default=None, | |
5290 | help='mount a block device [deprecated, ignored]', | |
5291 | ) | |
5292 | activate_parser.add_argument( | |
5293 | '--activate-key', | |
5294 | metavar='PATH', | |
5295 | help='bootstrap-osd keyring path template (%(default)s)', | |
5296 | dest='activate_key_template', | |
5297 | ) | |
5298 | activate_parser.add_argument( | |
5299 | '--mark-init', | |
5300 | metavar='INITSYSTEM', | |
5301 | help='init system to manage this dir', | |
5302 | default='auto', | |
5303 | choices=INIT_SYSTEMS, | |
5304 | ) | |
5305 | activate_parser.add_argument( | |
5306 | '--no-start-daemon', | |
5307 | action='store_true', default=None, | |
5308 | help='do not start the daemon', | |
5309 | ) | |
5310 | activate_parser.add_argument( | |
5311 | 'path', | |
5312 | metavar='PATH', | |
5313 | help='path to block device or directory', | |
5314 | ) | |
5315 | activate_parser.add_argument( | |
5316 | '--dmcrypt', | |
5317 | action='store_true', default=None, | |
5318 | help='map DATA and/or JOURNAL devices with dm-crypt', | |
5319 | ) | |
5320 | activate_parser.add_argument( | |
5321 | '--dmcrypt-key-dir', | |
5322 | metavar='KEYDIR', | |
5323 | default='/etc/ceph/dmcrypt-keys', | |
5324 | help='directory where dm-crypt keys are stored', | |
5325 | ) | |
5326 | activate_parser.add_argument( | |
5327 | '--reactivate', | |
5328 | action='store_true', default=False, | |
5329 | help='activate the deactived OSD', | |
5330 | ) | |
5331 | activate_parser.set_defaults( | |
5332 | activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring', | |
5333 | func=main_activate, | |
5334 | ) | |
5335 | return activate_parser | |
5336 | ||
5337 | ||
5338 | def make_activate_lockbox_parser(subparsers): | |
5339 | parser = subparsers.add_parser( | |
5340 | 'activate-lockbox', | |
5341 | formatter_class=argparse.RawDescriptionHelpFormatter, | |
5342 | description=textwrap.fill(textwrap.dedent("""\ | |
5343 | Mount the partition found at PATH on {statedir}/osd-lockbox/$uuid | |
5344 | where $uuid uniquely identifies the OSD that needs this lockbox | |
5345 | to retrieve keys from the monitor and unlock its partitions. | |
5346 | ||
5347 | If the OSD has one or more auxiliary devices (journal, block, ...) | |
5348 | symbolic links are created at {statedir}/osd-lockbox/$other_uuid | |
5349 | and point to {statedir}/osd-lockbox/$uuid. This will, for instance, | |
5350 | allow a journal encrypted in a partition identified by $other_uuid to | |
5351 | fetch the keys it needs from the monitor. | |
5352 | ||
5353 | Finally the OSD is activated, as it would be with ceph-disk activate. | |
5354 | """.format(statedir=STATEDIR))), | |
5355 | help='Activate a Ceph lockbox') | |
5356 | parser.add_argument( | |
5357 | '--activate-key', | |
5358 | help='bootstrap-osd keyring path template (%(default)s)', | |
5359 | dest='activate_key_template', | |
5360 | ) | |
5361 | parser.add_argument( | |
5362 | '--dmcrypt-key-dir', | |
5363 | metavar='KEYDIR', | |
5364 | default='/etc/ceph/dmcrypt-keys', | |
5365 | help='directory where dm-crypt keys are stored', | |
5366 | ) | |
5367 | parser.add_argument( | |
5368 | 'path', | |
5369 | metavar='PATH', | |
5370 | help='path to block device', | |
5371 | ) | |
5372 | parser.set_defaults( | |
5373 | activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring', | |
5374 | func=main_activate_lockbox, | |
5375 | ) | |
5376 | return parser | |
5377 | ||
5378 | ||
5379 | def make_activate_block_parser(subparsers): | |
5380 | return make_activate_space_parser('block', subparsers) | |
5381 | ||
5382 | ||
5383 | def make_activate_journal_parser(subparsers): | |
5384 | return make_activate_space_parser('journal', subparsers) | |
5385 | ||
5386 | ||
5387 | def make_activate_space_parser(name, subparsers): | |
5388 | activate_space_parser = subparsers.add_parser( | |
5389 | 'activate-%s' % name, | |
5390 | formatter_class=argparse.RawDescriptionHelpFormatter, | |
5391 | description=textwrap.fill(textwrap.dedent("""\ | |
5392 | Activating a {name} partition is only meaningfull | |
5393 | if it is encrypted and it will map it using | |
5394 | cryptsetup. | |
5395 | ||
5396 | Finally the corresponding OSD is activated, | |
5397 | as it would be with ceph-disk activate. | |
5398 | """.format(name=name))), | |
5399 | help='Activate an OSD via its %s device' % name) | |
5400 | activate_space_parser.add_argument( | |
5401 | 'dev', | |
5402 | metavar='DEV', | |
5403 | help='path to %s block device' % name, | |
5404 | ) | |
5405 | activate_space_parser.add_argument( | |
5406 | '--activate-key', | |
5407 | metavar='PATH', | |
5408 | help='bootstrap-osd keyring path template (%(default)s)', | |
5409 | dest='activate_key_template', | |
5410 | ) | |
5411 | activate_space_parser.add_argument( | |
5412 | '--mark-init', | |
5413 | metavar='INITSYSTEM', | |
5414 | help='init system to manage this dir', | |
5415 | default='auto', | |
5416 | choices=INIT_SYSTEMS, | |
5417 | ) | |
5418 | activate_space_parser.add_argument( | |
5419 | '--dmcrypt', | |
5420 | action='store_true', default=None, | |
5421 | help=('map data and/or auxiliariy (journal, etc.) ' | |
5422 | 'devices with dm-crypt'), | |
5423 | ) | |
5424 | activate_space_parser.add_argument( | |
5425 | '--dmcrypt-key-dir', | |
5426 | metavar='KEYDIR', | |
5427 | default='/etc/ceph/dmcrypt-keys', | |
5428 | help='directory where dm-crypt keys are stored', | |
5429 | ) | |
5430 | activate_space_parser.add_argument( | |
5431 | '--reactivate', | |
5432 | action='store_true', default=False, | |
5433 | help='activate the deactived OSD', | |
5434 | ) | |
5435 | activate_space_parser.set_defaults( | |
5436 | activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring', | |
5437 | func=lambda args: main_activate_space(name, args), | |
5438 | ) | |
5439 | return activate_space_parser | |
5440 | ||
5441 | ||
5442 | def make_activate_all_parser(subparsers): | |
5443 | activate_all_parser = subparsers.add_parser( | |
5444 | 'activate-all', | |
5445 | formatter_class=argparse.RawDescriptionHelpFormatter, | |
5446 | description=textwrap.fill(textwrap.dedent("""\ | |
5447 | Activate all OSD partitions found in /dev/disk/by-parttypeuuid. | |
5448 | The partitions containing auxiliary devices (journal, block, ...) | |
5449 | are not activated. | |
5450 | """)), | |
5451 | help='Activate all tagged OSD partitions') | |
5452 | activate_all_parser.add_argument( | |
5453 | '--activate-key', | |
5454 | metavar='PATH', | |
5455 | help='bootstrap-osd keyring path template (%(default)s)', | |
5456 | dest='activate_key_template', | |
5457 | ) | |
5458 | activate_all_parser.add_argument( | |
5459 | '--mark-init', | |
5460 | metavar='INITSYSTEM', | |
5461 | help='init system to manage this dir', | |
5462 | default='auto', | |
5463 | choices=INIT_SYSTEMS, | |
5464 | ) | |
5465 | activate_all_parser.set_defaults( | |
5466 | activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring', | |
5467 | func=main_activate_all, | |
5468 | ) | |
5469 | return activate_all_parser | |
5470 | ||
5471 | ||
5472 | def make_list_parser(subparsers): | |
5473 | list_parser = subparsers.add_parser( | |
5474 | 'list', | |
5475 | formatter_class=argparse.RawDescriptionHelpFormatter, | |
5476 | description=textwrap.fill(textwrap.dedent("""\ | |
5477 | Display all partitions on the system and their | |
5478 | associated Ceph information, if any. | |
5479 | """)), | |
5480 | help='List disks, partitions, and Ceph OSDs') | |
5481 | list_parser.add_argument( | |
5482 | '--format', | |
5483 | help='output format', | |
5484 | default='plain', | |
5485 | choices=['json', 'plain'], | |
5486 | ) | |
5487 | list_parser.add_argument( | |
5488 | 'path', | |
5489 | metavar='PATH', | |
5490 | nargs='*', | |
5491 | help='path to block devices, relative to /sys/block', | |
5492 | ) | |
5493 | list_parser.set_defaults( | |
5494 | func=main_list, | |
5495 | ) | |
5496 | return list_parser | |
5497 | ||
5498 | ||
5499 | def make_suppress_parser(subparsers): | |
5500 | suppress_parser = subparsers.add_parser( | |
5501 | 'suppress-activate', | |
5502 | formatter_class=argparse.RawDescriptionHelpFormatter, | |
5503 | description=textwrap.fill(textwrap.dedent("""\ | |
5504 | Add a prefix to the list of suppressed device names | |
5505 | so that they are ignored by all activate* subcommands. | |
5506 | """)), | |
5507 | help='Suppress activate on a device (prefix)') | |
5508 | suppress_parser.add_argument( | |
5509 | 'path', | |
5510 | metavar='PATH', | |
5511 | help='path to block device or directory', | |
5512 | ) | |
5513 | suppress_parser.set_defaults( | |
5514 | func=main_suppress, | |
5515 | ) | |
5516 | ||
5517 | unsuppress_parser = subparsers.add_parser( | |
5518 | 'unsuppress-activate', | |
5519 | formatter_class=argparse.RawDescriptionHelpFormatter, | |
5520 | description=textwrap.fill(textwrap.dedent("""\ | |
5521 | Remove a prefix from the list of suppressed device names | |
5522 | so that they are no longer ignored by all | |
5523 | activate* subcommands. | |
5524 | """)), | |
5525 | help='Stop suppressing activate on a device (prefix)') | |
5526 | unsuppress_parser.add_argument( | |
5527 | 'path', | |
5528 | metavar='PATH', | |
5529 | help='path to block device or directory', | |
5530 | ) | |
5531 | unsuppress_parser.set_defaults( | |
5532 | func=main_unsuppress, | |
5533 | ) | |
5534 | return suppress_parser | |
5535 | ||
5536 | ||
5537 | def make_deactivate_parser(subparsers): | |
5538 | deactivate_parser = subparsers.add_parser( | |
5539 | 'deactivate', | |
5540 | formatter_class=argparse.RawDescriptionHelpFormatter, | |
5541 | description=textwrap.fill(textwrap.dedent("""\ | |
5542 | Deactivate the OSD located at PATH. It stops the OSD daemon | |
5543 | and optionally marks it out (with --mark-out). The content of | |
5544 | the OSD is left untouched. | |
5545 | ||
5546 | By default, the, ready, active, INIT-specific files are | |
5547 | removed (so that it is not automatically re-activated by the | |
5548 | udev rules or ceph-disk trigger) and the file deactive is | |
5549 | created to remember the OSD is deactivated. | |
5550 | ||
5551 | If the --once option is given, the ready, active, INIT-specific | |
5552 | files are not removed and the OSD will reactivate whenever | |
5553 | ceph-disk trigger is run on one of the devices (journal, data, | |
5554 | block, lockbox, ...). | |
5555 | ||
5556 | If the OSD is dmcrypt, remove the data dmcrypt map. When | |
5557 | deactivate finishes, the OSD is down. | |
5558 | """)), | |
5559 | help='Deactivate a Ceph OSD') | |
5560 | deactivate_parser.add_argument( | |
5561 | '--cluster', | |
5562 | metavar='NAME', | |
5563 | default='ceph', | |
5564 | help='cluster name to assign this disk to', | |
5565 | ) | |
5566 | deactivate_parser.add_argument( | |
5567 | 'path', | |
5568 | metavar='PATH', | |
5569 | nargs='?', | |
5570 | help='path to block device or directory', | |
5571 | ) | |
5572 | deactivate_parser.add_argument( | |
5573 | '--deactivate-by-id', | |
5574 | metavar='<id>', | |
5575 | help='ID of OSD to deactive' | |
5576 | ) | |
5577 | deactivate_parser.add_argument( | |
5578 | '--mark-out', | |
5579 | action='store_true', default=False, | |
5580 | help='option to mark the osd out', | |
5581 | ) | |
5582 | deactivate_parser.add_argument( | |
5583 | '--once', | |
5584 | action='store_true', default=False, | |
5585 | help='does not need --reactivate to activate again', | |
5586 | ) | |
5587 | deactivate_parser.set_defaults( | |
5588 | func=main_deactivate, | |
5589 | ) | |
5590 | ||
5591 | ||
5592 | def make_destroy_parser(subparsers): | |
5593 | destroy_parser = subparsers.add_parser( | |
5594 | 'destroy', | |
5595 | formatter_class=argparse.RawDescriptionHelpFormatter, | |
c07f9fc5 FG |
5596 | description=textwrap.fill(textwrap.dedent("""\ Destroy the OSD located at PATH. It removes the OSD from the |
5597 | cluster and marks it destroyed. An OSD must be down before it | |
5598 | can be destroyed. Once it is destroyed, a new OSD can be created | |
5599 | in its place, reusing the same OSD id and position (e.g. after | |
5600 | a failed HDD or SSD is replaced). Alternatively, if the | |
5601 | --purge option is also specified, the OSD is removed from the | |
5602 | CRUSH map and the OSD id is deallocated.""")), | |
7c673cae FG |
5603 | help='Destroy a Ceph OSD') |
5604 | destroy_parser.add_argument( | |
5605 | '--cluster', | |
5606 | metavar='NAME', | |
5607 | default='ceph', | |
5608 | help='cluster name to assign this disk to', | |
5609 | ) | |
5610 | destroy_parser.add_argument( | |
5611 | 'path', | |
5612 | metavar='PATH', | |
5613 | nargs='?', | |
5614 | help='path to block device or directory', | |
5615 | ) | |
5616 | destroy_parser.add_argument( | |
5617 | '--destroy-by-id', | |
5618 | metavar='<id>', | |
5619 | help='ID of OSD to destroy' | |
5620 | ) | |
5621 | destroy_parser.add_argument( | |
5622 | '--dmcrypt-key-dir', | |
5623 | metavar='KEYDIR', | |
5624 | default='/etc/ceph/dmcrypt-keys', | |
5625 | help=('directory where dm-crypt keys are stored ' | |
5626 | '(If you don\'t know how it work, ' | |
5627 | 'dont use it. we have default value)'), | |
5628 | ) | |
5629 | destroy_parser.add_argument( | |
5630 | '--zap', | |
5631 | action='store_true', default=False, | |
5632 | help='option to erase data and partition', | |
5633 | ) | |
c07f9fc5 FG |
5634 | destroy_parser.add_argument( |
5635 | '--purge', | |
5636 | action='store_true', default=False, | |
5637 | help='option to remove OSD from CRUSH map and deallocate the id', | |
5638 | ) | |
7c673cae FG |
5639 | destroy_parser.set_defaults( |
5640 | func=main_destroy, | |
5641 | ) | |
5642 | ||
5643 | ||
5644 | def make_zap_parser(subparsers): | |
5645 | zap_parser = subparsers.add_parser( | |
5646 | 'zap', | |
5647 | formatter_class=argparse.RawDescriptionHelpFormatter, | |
5648 | description=textwrap.fill(textwrap.dedent("""\ | |
5649 | Zap/erase/destroy a device's partition table and contents. It | |
5650 | actually uses sgdisk and it's option --zap-all to | |
5651 | destroy both GPT and MBR data structures so that the disk | |
5652 | becomes suitable for repartitioning. | |
5653 | """)), | |
5654 | help='Zap/erase/destroy a device\'s partition table (and contents)') | |
5655 | zap_parser.add_argument( | |
5656 | 'dev', | |
5657 | metavar='DEV', | |
5658 | nargs='+', | |
5659 | help='path to block device', | |
5660 | ) | |
5661 | zap_parser.set_defaults( | |
5662 | func=main_zap, | |
5663 | ) | |
5664 | return zap_parser | |
5665 | ||
5666 | ||
5667 | def main(argv): | |
5668 | args = parse_args(argv) | |
5669 | ||
5670 | setup_logging(args.verbose, args.log_stdout) | |
5671 | ||
5672 | if args.prepend_to_path != '': | |
5673 | path = os.environ.get('PATH', os.defpath) | |
5674 | os.environ['PATH'] = args.prepend_to_path + ":" + path | |
5675 | ||
31f18b77 FG |
5676 | if args.func.__name__ != 'main_trigger': |
5677 | # trigger may run when statedir is unavailable and does not use it | |
5678 | setup_statedir(args.statedir) | |
7c673cae FG |
5679 | setup_sysconfdir(args.sysconfdir) |
5680 | ||
5681 | global CEPH_PREF_USER | |
5682 | CEPH_PREF_USER = args.setuser | |
5683 | global CEPH_PREF_GROUP | |
5684 | CEPH_PREF_GROUP = args.setgroup | |
5685 | ||
5686 | if args.verbose: | |
b32b8144 | 5687 | args.func(args) |
7c673cae FG |
5688 | else: |
5689 | main_catch(args.func, args) | |
5690 | ||
5691 | ||
5692 | def setup_logging(verbose, log_stdout): | |
5693 | loglevel = logging.WARNING | |
5694 | if verbose: | |
5695 | loglevel = logging.DEBUG | |
5696 | ||
5697 | if log_stdout: | |
5698 | ch = logging.StreamHandler(stream=sys.stdout) | |
5699 | ch.setLevel(loglevel) | |
5700 | formatter = logging.Formatter('%(funcName)s: %(message)s') | |
5701 | ch.setFormatter(formatter) | |
5702 | LOG.addHandler(ch) | |
5703 | LOG.setLevel(loglevel) | |
5704 | else: | |
5705 | logging.basicConfig( | |
5706 | level=loglevel, | |
5707 | format='%(funcName)s: %(message)s', | |
5708 | ) | |
5709 | ||
5710 | ||
5711 | def main_catch(func, args): | |
5712 | ||
5713 | try: | |
5714 | func(args) | |
5715 | ||
5716 | except Error as e: | |
5717 | raise SystemExit( | |
5718 | '{prog}: {msg}'.format( | |
5719 | prog=args.prog, | |
5720 | msg=e, | |
5721 | ) | |
5722 | ) | |
5723 | ||
5724 | except CephDiskException as error: | |
5725 | exc_name = error.__class__.__name__ | |
5726 | raise SystemExit( | |
5727 | '{prog} {exc_name}: {msg}'.format( | |
5728 | prog=args.prog, | |
5729 | exc_name=exc_name, | |
5730 | msg=error, | |
5731 | ) | |
5732 | ) | |
5733 | ||
5734 | ||
5735 | def run(): | |
5736 | main(sys.argv[1:]) | |
5737 | ||
5738 | ||
5739 | if __name__ == '__main__': | |
5740 | main(sys.argv[1:]) | |
5741 | warned_about = {} |