]> git.proxmox.com Git - ceph.git/blame - ceph/src/pybind/mgr/balancer/module.py
buildsys: auto-determine current version for makefile
[ceph.git] / ceph / src / pybind / mgr / balancer / module.py
CommitLineData
3efd9988
FG
1
2"""
3Balance PG distribution across OSDs.
4"""
5
6import copy
7import errno
8import json
9import math
10import random
1adf2230 11import six
3efd9988
FG
12import time
13from mgr_module import MgrModule, CommandResult
14from threading import Event
b32b8144 15from mgr_module import CRUSHMap
3efd9988 16
3efd9988
FG
17TIME_FORMAT = '%Y-%m-%d_%H:%M:%S'
18
3efd9988
FG
19class MappingState:
20 def __init__(self, osdmap, pg_dump, desc=''):
21 self.desc = desc
22 self.osdmap = osdmap
23 self.osdmap_dump = self.osdmap.dump()
24 self.crush = osdmap.get_crush()
25 self.crush_dump = self.crush.dump()
26 self.pg_dump = pg_dump
27 self.pg_stat = {
28 i['pgid']: i['stat_sum'] for i in pg_dump.get('pg_stats', [])
29 }
94b18763
FG
30 osd_poolids = [p['pool'] for p in self.osdmap_dump.get('pools', [])]
31 pg_poolids = [p['poolid'] for p in pg_dump.get('pool_stats', [])]
32 self.poolids = set(osd_poolids) & set(pg_poolids)
3efd9988
FG
33 self.pg_up = {}
34 self.pg_up_by_poolid = {}
35 for poolid in self.poolids:
36 self.pg_up_by_poolid[poolid] = osdmap.map_pool_pgs_up(poolid)
1adf2230 37 for a,b in six.iteritems(self.pg_up_by_poolid[poolid]):
3efd9988
FG
38 self.pg_up[a] = b
39
40 def calc_misplaced_from(self, other_ms):
41 num = len(other_ms.pg_up)
42 misplaced = 0
1adf2230 43 for pgid, before in six.iteritems(other_ms.pg_up):
3efd9988
FG
44 if before != self.pg_up.get(pgid, []):
45 misplaced += 1
46 if num > 0:
47 return float(misplaced) / float(num)
48 return 0.0
49
50class Plan:
94b18763 51 def __init__(self, name, ms, pools):
3efd9988
FG
52 self.mode = 'unknown'
53 self.name = name
54 self.initial = ms
94b18763 55 self.pools = pools
3efd9988
FG
56
57 self.osd_weights = {}
58 self.compat_ws = {}
59 self.inc = ms.osdmap.new_incremental()
60
61 def final_state(self):
62 self.inc.set_osd_reweights(self.osd_weights)
63 self.inc.set_crush_compat_weight_set_weights(self.compat_ws)
64 return MappingState(self.initial.osdmap.apply_incremental(self.inc),
65 self.initial.pg_dump,
66 'plan %s final' % self.name)
67
68 def dump(self):
69 return json.dumps(self.inc.dump(), indent=4)
70
71 def show(self):
72 ls = []
73 ls.append('# starting osdmap epoch %d' % self.initial.osdmap.get_epoch())
74 ls.append('# starting crush version %d' %
75 self.initial.osdmap.get_crush_version())
76 ls.append('# mode %s' % self.mode)
77 if len(self.compat_ws) and \
11fdf7f2 78 not CRUSHMap.have_default_choose_args(self.initial.crush_dump):
3efd9988 79 ls.append('ceph osd crush weight-set create-compat')
1adf2230 80 for osd, weight in six.iteritems(self.compat_ws):
3efd9988
FG
81 ls.append('ceph osd crush weight-set reweight-compat %s %f' %
82 (osd, weight))
1adf2230 83 for osd, weight in six.iteritems(self.osd_weights):
3efd9988
FG
84 ls.append('ceph osd reweight osd.%d %f' % (osd, weight))
85 incdump = self.inc.dump()
86 for pgid in incdump.get('old_pg_upmap_items', []):
87 ls.append('ceph osd rm-pg-upmap-items %s' % pgid)
88 for item in incdump.get('new_pg_upmap_items', []):
89 osdlist = []
90 for m in item['mappings']:
91 osdlist += [m['from'], m['to']]
92 ls.append('ceph osd pg-upmap-items %s %s' %
93 (item['pgid'], ' '.join([str(a) for a in osdlist])))
94 return '\n'.join(ls)
95
96
97class Eval:
3efd9988
FG
98 def __init__(self, ms):
99 self.ms = ms
94b18763
FG
100 self.root_ids = {} # root name -> id
101 self.pool_name = {} # pool id -> pool name
102 self.pool_id = {} # pool name -> id
103 self.pool_roots = {} # pool name -> root name
104 self.root_pools = {} # root name -> pools
105 self.target_by_root = {} # root name -> target weight map
106 self.count_by_pool = {}
107 self.count_by_root = {}
108 self.actual_by_pool = {} # pool -> by_* -> actual weight map
109 self.actual_by_root = {} # pool -> by_* -> actual weight map
110 self.total_by_pool = {} # pool -> by_* -> total
111 self.total_by_root = {} # root -> by_* -> total
112 self.stats_by_pool = {} # pool -> by_* -> stddev or avg -> value
113 self.stats_by_root = {} # root -> by_* -> stddev or avg -> value
114
115 self.score_by_pool = {}
116 self.score_by_root = {}
117
118 self.score = 0.0
3efd9988
FG
119
120 def show(self, verbose=False):
121 if verbose:
122 r = self.ms.desc + '\n'
123 r += 'target_by_root %s\n' % self.target_by_root
124 r += 'actual_by_pool %s\n' % self.actual_by_pool
125 r += 'actual_by_root %s\n' % self.actual_by_root
126 r += 'count_by_pool %s\n' % self.count_by_pool
127 r += 'count_by_root %s\n' % self.count_by_root
128 r += 'total_by_pool %s\n' % self.total_by_pool
129 r += 'total_by_root %s\n' % self.total_by_root
130 r += 'stats_by_root %s\n' % self.stats_by_root
131 r += 'score_by_pool %s\n' % self.score_by_pool
132 r += 'score_by_root %s\n' % self.score_by_root
133 else:
134 r = self.ms.desc + ' '
135 r += 'score %f (lower is better)\n' % self.score
136 return r
137
138 def calc_stats(self, count, target, total):
139 num = max(len(target), 1)
140 r = {}
141 for t in ('pgs', 'objects', 'bytes'):
94b18763
FG
142 if total[t] == 0:
143 r[t] = {
144 'avg': 0,
145 'stddev': 0,
146 'sum_weight': 0,
147 'score': 0,
148 }
149 continue
150
3efd9988
FG
151 avg = float(total[t]) / float(num)
152 dev = 0.0
153
154 # score is a measure of how uneven the data distribution is.
155 # score lies between [0, 1), 0 means perfect distribution.
156 score = 0.0
157 sum_weight = 0.0
158
1adf2230 159 for k, v in six.iteritems(count[t]):
3efd9988
FG
160 # adjust/normalize by weight
161 if target[k]:
162 adjusted = float(v) / target[k] / float(num)
163 else:
164 adjusted = 0.0
165
166 # Overweighted devices and their weights are factors to calculate reweight_urgency.
167 # One 10% underfilled device with 5 2% overfilled devices, is arguably a better
168 # situation than one 10% overfilled with 5 2% underfilled devices
169 if adjusted > avg:
170 '''
171 F(x) = 2*phi(x) - 1, where phi(x) = cdf of standard normal distribution
172 x = (adjusted - avg)/avg.
173 Since, we're considering only over-weighted devices, x >= 0, and so phi(x) lies in [0.5, 1).
174 To bring range of F(x) in range [0, 1), we need to make the above modification.
175
176 In general, we need to use a function F(x), where x = (adjusted - avg)/avg
177 1. which is bounded between 0 and 1, so that ultimately reweight_urgency will also be bounded.
178 2. A larger value of x, should imply more urgency to reweight.
179 3. Also, the difference between F(x) when x is large, should be minimal.
180 4. The value of F(x) should get close to 1 (highest urgency to reweight) with steeply.
181
182 Could have used F(x) = (1 - e^(-x)). But that had slower convergence to 1, compared to the one currently in use.
183
184 cdf of standard normal distribution: https://stackoverflow.com/a/29273201
185 '''
186 score += target[k] * (math.erf(((adjusted - avg)/avg) / math.sqrt(2.0)))
187 sum_weight += target[k]
188 dev += (avg - adjusted) * (avg - adjusted)
189 stddev = math.sqrt(dev / float(max(num - 1, 1)))
190 score = score / max(sum_weight, 1)
191 r[t] = {
192 'avg': avg,
193 'stddev': stddev,
194 'sum_weight': sum_weight,
195 'score': score,
196 }
197 return r
198
199class Module(MgrModule):
11fdf7f2
TL
200 MODULE_OPTIONS = [
201 {
202 'name': 'active',
203 'type': 'bool',
204 'default': False,
205 'desc': 'automatically balance PGs across cluster',
206 'runtime': True,
207 },
208 {
209 'name': 'begin_time',
210 'type': 'str',
211 'default': '0000',
212 'desc': 'beginning time of day to automatically balance',
213 'long_desc': 'This is a time of day in the format HHMM.',
214 'runtime': True,
215 },
216 {
217 'name': 'end_time',
218 'type': 'str',
219 'default': '2400',
220 'desc': 'ending time of day to automatically balance',
221 'long_desc': 'This is a time of day in the format HHMM.',
222 'runtime': True,
223 },
224 {
225 'name': 'begin_weekday',
226 'type': 'uint',
227 'default': 0,
228 'min': 0,
229 'max': 7,
230 'desc': 'Restrict automatic balancing to this day of the week or later',
231 'long_desc': '0 or 7 = Sunday, 1 = Monday, etc.',
232 'runtime': True,
233 },
234 {
235 'name': 'end_weekday',
236 'type': 'uint',
237 'default': 7,
238 'min': 0,
239 'max': 7,
240 'desc': 'Restrict automatic balancing to days of the week earlier than this',
241 'long_desc': '0 or 7 = Sunday, 1 = Monday, etc.',
242 'runtime': True,
243 },
244 {
245 'name': 'crush_compat_max_iterations',
246 'type': 'uint',
247 'default': 25,
248 'min': 1,
249 'max': 250,
250 'desc': 'maximum number of iterations to attempt optimization',
251 'runtime': True,
252 },
253 {
254 'name': 'crush_compat_metrics',
255 'type': 'str',
256 'default': 'pgs,objects,bytes',
257 'desc': 'metrics with which to calculate OSD utilization',
258 'long_desc': 'Value is a list of one or more of "pgs", "objects", or "bytes", and indicates which metrics to use to balance utilization.',
259 'runtime': True,
260 },
261 {
262 'name': 'crush_compat_step',
263 'type': 'float',
264 'default': .5,
265 'min': .001,
266 'max': .999,
267 'desc': 'aggressiveness of optimization',
268 'long_desc': '.99 is very aggressive, .01 is less aggressive',
269 'runtime': True,
270 },
271 {
272 'name': 'min_score',
273 'type': 'float',
274 'default': 0,
275 'desc': 'minimum score, below which no optimization is attempted',
276 'runtime': True,
277 },
278 {
279 'name': 'mode',
280 'desc': 'Balancer mode',
281 'default': 'none',
282 'enum_allowed': ['none', 'crush-compat', 'upmap'],
283 'runtime': True,
284 },
285 {
286 'name': 'sleep_interval',
287 'type': 'secs',
288 'default': 60,
289 'desc': 'how frequently to wake up and attempt optimization',
290 'runtime': True,
291 },
292 {
293 'name': 'upmap_max_iterations',
294 'type': 'uint',
295 'default': 10,
296 'desc': 'maximum upmap optimization iterations',
297 'runtime': True,
298 },
299 {
300 'name': 'upmap_max_deviation',
301 'type': 'float',
302 'default': .01,
303 'min': 0,
304 'max': 1,
305 'desc': 'deviation below which no optimization is attempted',
306 'long_desc': 'If the ratio between the fullest and least-full OSD is below this value then we stop trying to optimize placement.',
307 'runtime': True,
308 },
309 {
310 'name': 'pool_ids',
311 'type': 'str',
312 'default': '',
313 'desc': 'pools which the automatic balancing will be limited to',
314 'runtime': True,
315 },
316 ]
317
3efd9988
FG
318 COMMANDS = [
319 {
320 "cmd": "balancer status",
321 "desc": "Show balancer status",
322 "perm": "r",
323 },
324 {
325 "cmd": "balancer mode name=mode,type=CephChoices,strings=none|crush-compat|upmap",
326 "desc": "Set balancer mode",
327 "perm": "rw",
328 },
329 {
330 "cmd": "balancer on",
331 "desc": "Enable automatic balancing",
332 "perm": "rw",
333 },
334 {
335 "cmd": "balancer off",
336 "desc": "Disable automatic balancing",
337 "perm": "rw",
338 },
11fdf7f2
TL
339 {
340 "cmd": "balancer pool ls",
341 "desc": "List automatic balancing pools. "
342 "Note that empty list means all existing pools will be automatic balancing targets, "
343 "which is the default behaviour of balancer.",
344 "perm": "r",
345 },
346 {
347 "cmd": "balancer pool add name=pools,type=CephString,n=N",
348 "desc": "Enable automatic balancing for specific pools",
349 "perm": "rw",
350 },
351 {
352 "cmd": "balancer pool rm name=pools,type=CephString,n=N",
353 "desc": "Disable automatic balancing for specific pools",
354 "perm": "rw",
355 },
3efd9988 356 {
94b18763
FG
357 "cmd": "balancer eval name=option,type=CephString,req=false",
358 "desc": "Evaluate data distribution for the current cluster or specific pool or specific plan",
3efd9988
FG
359 "perm": "r",
360 },
361 {
94b18763
FG
362 "cmd": "balancer eval-verbose name=option,type=CephString,req=false",
363 "desc": "Evaluate data distribution for the current cluster or specific pool or specific plan (verbosely)",
3efd9988
FG
364 "perm": "r",
365 },
366 {
94b18763 367 "cmd": "balancer optimize name=plan,type=CephString name=pools,type=CephString,n=N,req=false",
3efd9988
FG
368 "desc": "Run optimizer to create a new plan",
369 "perm": "rw",
370 },
371 {
372 "cmd": "balancer show name=plan,type=CephString",
373 "desc": "Show details of an optimization plan",
374 "perm": "r",
375 },
376 {
377 "cmd": "balancer rm name=plan,type=CephString",
378 "desc": "Discard an optimization plan",
379 "perm": "rw",
380 },
381 {
382 "cmd": "balancer reset",
383 "desc": "Discard all optimization plans",
384 "perm": "rw",
385 },
386 {
387 "cmd": "balancer dump name=plan,type=CephString",
388 "desc": "Show an optimization plan",
389 "perm": "r",
390 },
f64942e4
AA
391 {
392 "cmd": "balancer ls",
393 "desc": "List all plans",
394 "perm": "r",
395 },
3efd9988
FG
396 {
397 "cmd": "balancer execute name=plan,type=CephString",
398 "desc": "Execute an optimization plan",
a8e16298 399 "perm": "rw",
3efd9988
FG
400 },
401 ]
402 active = False
403 run = True
404 plans = {}
405 mode = ''
406
407 def __init__(self, *args, **kwargs):
408 super(Module, self).__init__(*args, **kwargs)
409 self.event = Event()
410
11fdf7f2 411 def handle_command(self, inbuf, command):
3efd9988
FG
412 self.log.warn("Handling command: '%s'" % str(command))
413 if command['prefix'] == 'balancer status':
414 s = {
f64942e4 415 'plans': list(self.plans.keys()),
3efd9988 416 'active': self.active,
11fdf7f2 417 'mode': self.get_module_option('mode'),
3efd9988
FG
418 }
419 return (0, json.dumps(s, indent=4), '')
420 elif command['prefix'] == 'balancer mode':
11fdf7f2
TL
421 if command['mode'] == 'upmap':
422 min_compat_client = self.get_osdmap().dump().get('require_min_compat_client', '')
423 if min_compat_client < 'luminous': # works well because version is alphabetized..
424 warn = 'min_compat_client "%s" ' \
425 '< "luminous", which is required for pg-upmap. ' \
426 'Try "ceph osd set-require-min-compat-client luminous" ' \
427 'before enabling this mode' % min_compat_client
428 return (-errno.EPERM, '', warn)
81eedcae
TL
429 elif command['mode'] == 'crush-compat':
430 ms = MappingState(self.get_osdmap(),
431 self.get("pg_dump"),
432 'initialize compat weight-set')
433 self.get_compat_weight_set_weights(ms) # ignore error
11fdf7f2 434 self.set_module_option('mode', command['mode'])
3efd9988
FG
435 return (0, '', '')
436 elif command['prefix'] == 'balancer on':
437 if not self.active:
11fdf7f2 438 self.set_module_option('active', 'true')
3efd9988
FG
439 self.active = True
440 self.event.set()
441 return (0, '', '')
442 elif command['prefix'] == 'balancer off':
443 if self.active:
11fdf7f2 444 self.set_module_option('active', 'false')
3efd9988
FG
445 self.active = False
446 self.event.set()
447 return (0, '', '')
11fdf7f2
TL
448 elif command['prefix'] == 'balancer pool ls':
449 pool_ids = self.get_module_option('pool_ids')
450 if pool_ids is '':
451 return (0, '', '')
452 pool_ids = pool_ids.split(',')
453 pool_ids = [int(p) for p in pool_ids]
454 pool_name_by_id = dict((p['pool'], p['pool_name']) for p in self.get_osdmap().dump().get('pools', []))
455 should_prune = False
456 final_ids = []
457 final_names = []
458 for p in pool_ids:
459 if p in pool_name_by_id:
460 final_ids.append(p)
461 final_names.append(pool_name_by_id[p])
462 else:
463 should_prune = True
464 if should_prune: # some pools were gone, prune
465 self.set_module_option('pool_ids', ','.join(final_ids))
466 return (0, json.dumps(final_names, indent=4), '')
467 elif command['prefix'] == 'balancer pool add':
468 raw_names = command['pools']
469 pool_id_by_name = dict((p['pool_name'], p['pool']) for p in self.get_osdmap().dump().get('pools', []))
470 invalid_names = [p for p in raw_names if p not in pool_id_by_name]
471 if invalid_names:
472 return (-errno.EINVAL, '', 'pool(s) %s not found' % invalid_names)
473 to_add = [str(pool_id_by_name[p]) for p in raw_names if p in pool_id_by_name]
474 existing = self.get_module_option('pool_ids')
475 final = to_add
476 if existing is not '':
477 existing = existing.split(',')
478 final = set(to_add) | set(existing)
479 self.set_module_option('pool_ids', ','.join(final))
480 return (0, '', '')
481 elif command['prefix'] == 'balancer pool rm':
482 raw_names = command['pools']
483 existing = self.get_module_option('pool_ids')
484 if existing is '': # for idempotence
485 return (0, '', '')
486 existing = existing.split(',')
487 osdmap = self.get_osdmap()
488 pool_ids = [str(p['pool']) for p in osdmap.dump().get('pools', [])]
489 pool_id_by_name = dict((p['pool_name'], p['pool']) for p in osdmap.dump().get('pools', []))
490 final = [p for p in existing if p in pool_ids]
491 to_delete = [str(pool_id_by_name[p]) for p in raw_names if p in pool_id_by_name]
492 final = set(final) - set(to_delete)
493 self.set_module_option('pool_ids', ','.join(final))
494 return (0, '', '')
3efd9988
FG
495 elif command['prefix'] == 'balancer eval' or command['prefix'] == 'balancer eval-verbose':
496 verbose = command['prefix'] == 'balancer eval-verbose'
94b18763
FG
497 pools = []
498 if 'option' in command:
499 plan = self.plans.get(command['option'])
3efd9988 500 if not plan:
94b18763
FG
501 # not a plan, does it look like a pool?
502 osdmap = self.get_osdmap()
503 valid_pool_names = [p['pool_name'] for p in osdmap.dump().get('pools', [])]
504 option = command['option']
505 if option not in valid_pool_names:
506 return (-errno.EINVAL, '', 'option "%s" not a plan or a pool' % option)
507 pools.append(option)
508 ms = MappingState(osdmap, self.get("pg_dump"), 'pool "%s"' % option)
509 else:
510 pools = plan.pools
511 ms = plan.final_state()
3efd9988
FG
512 else:
513 ms = MappingState(self.get_osdmap(),
514 self.get("pg_dump"),
515 'current cluster')
94b18763 516 return (0, self.evaluate(ms, pools, verbose=verbose), '')
3efd9988 517 elif command['prefix'] == 'balancer optimize':
94b18763
FG
518 pools = []
519 if 'pools' in command:
520 pools = command['pools']
521 osdmap = self.get_osdmap()
522 valid_pool_names = [p['pool_name'] for p in osdmap.dump().get('pools', [])]
523 invalid_pool_names = []
524 for p in pools:
525 if p not in valid_pool_names:
526 invalid_pool_names.append(p)
527 if len(invalid_pool_names):
528 return (-errno.EINVAL, '', 'pools %s not found' % invalid_pool_names)
529 plan = self.plan_create(command['plan'], osdmap, pools)
530 r, detail = self.optimize(plan)
531 # remove plan if we are currently unable to find an optimization
532 # or distribution is already perfect
533 if r:
534 self.plan_rm(command['plan'])
535 return (r, '', detail)
3efd9988 536 elif command['prefix'] == 'balancer rm':
b32b8144 537 self.plan_rm(command['plan'])
3efd9988
FG
538 return (0, '', '')
539 elif command['prefix'] == 'balancer reset':
540 self.plans = {}
541 return (0, '', '')
f64942e4
AA
542 elif command['prefix'] == 'balancer ls':
543 return (0, json.dumps([p for p in self.plans], indent=4), '')
3efd9988
FG
544 elif command['prefix'] == 'balancer dump':
545 plan = self.plans.get(command['plan'])
546 if not plan:
547 return (-errno.ENOENT, '', 'plan %s not found' % command['plan'])
548 return (0, plan.dump(), '')
549 elif command['prefix'] == 'balancer show':
550 plan = self.plans.get(command['plan'])
551 if not plan:
552 return (-errno.ENOENT, '', 'plan %s not found' % command['plan'])
553 return (0, plan.show(), '')
554 elif command['prefix'] == 'balancer execute':
555 plan = self.plans.get(command['plan'])
556 if not plan:
557 return (-errno.ENOENT, '', 'plan %s not found' % command['plan'])
94b18763
FG
558 r, detail = self.execute(plan)
559 self.plan_rm(command['plan'])
560 return (r, '', detail)
3efd9988
FG
561 else:
562 return (-errno.EINVAL, '',
563 "Command not found '{0}'".format(command['prefix']))
564
565 def shutdown(self):
566 self.log.info('Stopping')
567 self.run = False
568 self.event.set()
569
a8e16298
TL
570 def time_permit(self):
571 local_time = time.localtime()
572 time_of_day = time.strftime('%H%M', local_time)
573 weekday = (local_time.tm_wday + 1) % 7 # be compatible with C
574 permit = False
575
11fdf7f2
TL
576 begin_time = self.get_module_option('begin_time')
577 end_time = self.get_module_option('end_time')
a8e16298
TL
578 if begin_time <= end_time:
579 permit = begin_time <= time_of_day < end_time
580 else:
581 permit = time_of_day >= begin_time or time_of_day < end_time
582 if not permit:
583 self.log.debug("should run between %s - %s, now %s, skipping",
584 begin_time, end_time, time_of_day)
585 return False
586
11fdf7f2
TL
587 begin_weekday = self.get_module_option('begin_weekday')
588 end_weekday = self.get_module_option('end_weekday')
a8e16298
TL
589 if begin_weekday <= end_weekday:
590 permit = begin_weekday <= weekday < end_weekday
3efd9988 591 else:
a8e16298
TL
592 permit = weekday >= begin_weekday or weekday < end_weekday
593 if not permit:
594 self.log.debug("should run between weekday %d - %d, now %d, skipping",
595 begin_weekday, end_weekday, weekday)
596 return False
597
598 return True
3efd9988
FG
599
600 def serve(self):
601 self.log.info('Starting')
602 while self.run:
11fdf7f2
TL
603 self.active = self.get_module_option('active')
604 sleep_interval = self.get_module_option('sleep_interval')
a8e16298
TL
605 self.log.debug('Waking up [%s, now %s]',
606 "active" if self.active else "inactive",
607 time.strftime(TIME_FORMAT, time.localtime()))
608 if self.active and self.time_permit():
3efd9988
FG
609 self.log.debug('Running')
610 name = 'auto_%s' % time.strftime(TIME_FORMAT, time.gmtime())
11fdf7f2
TL
611 osdmap = self.get_osdmap()
612 allow = self.get_module_option('pool_ids')
613 final = []
614 if allow is not '':
615 allow = allow.split(',')
616 valid = [str(p['pool']) for p in osdmap.dump().get('pools', [])]
617 final = set(allow) & set(valid)
618 if set(allow) - set(valid): # some pools were gone, prune
619 self.set_module_option('pool_ids', ','.join(final))
620 pool_name_by_id = dict((p['pool'], p['pool_name']) for p in osdmap.dump().get('pools', []))
621 final = [int(p) for p in final]
622 final = [pool_name_by_id[p] for p in final if p in pool_name_by_id]
623 plan = self.plan_create(name, osdmap, final)
94b18763
FG
624 r, detail = self.optimize(plan)
625 if r == 0:
3efd9988
FG
626 self.execute(plan)
627 self.plan_rm(name)
628 self.log.debug('Sleeping for %d', sleep_interval)
629 self.event.wait(sleep_interval)
630 self.event.clear()
631
94b18763
FG
632 def plan_create(self, name, osdmap, pools):
633 plan = Plan(name,
634 MappingState(osdmap,
635 self.get("pg_dump"),
636 'plan %s initial' % name),
637 pools)
3efd9988
FG
638 self.plans[name] = plan
639 return plan
640
641 def plan_rm(self, name):
642 if name in self.plans:
643 del self.plans[name]
644
94b18763 645 def calc_eval(self, ms, pools):
3efd9988
FG
646 pe = Eval(ms)
647 pool_rule = {}
648 pool_info = {}
649 for p in ms.osdmap_dump.get('pools',[]):
94b18763
FG
650 if len(pools) and p['pool_name'] not in pools:
651 continue
652 # skip dead or not-yet-ready pools too
653 if p['pool'] not in ms.poolids:
654 continue
3efd9988
FG
655 pe.pool_name[p['pool']] = p['pool_name']
656 pe.pool_id[p['pool_name']] = p['pool']
657 pool_rule[p['pool_name']] = p['crush_rule']
658 pe.pool_roots[p['pool_name']] = []
659 pool_info[p['pool_name']] = p
94b18763 660 if len(pool_info) == 0:
3efd9988
FG
661 return pe
662 self.log.debug('pool_name %s' % pe.pool_name)
663 self.log.debug('pool_id %s' % pe.pool_id)
664 self.log.debug('pools %s' % pools)
665 self.log.debug('pool_rule %s' % pool_rule)
666
667 osd_weight = { a['osd']: a['weight']
94b18763 668 for a in ms.osdmap_dump.get('osds',[]) if a['weight'] > 0 }
3efd9988
FG
669
670 # get expected distributions by root
671 actual_by_root = {}
672 rootids = ms.crush.find_takes()
673 roots = []
674 for rootid in rootids:
3efd9988 675 ls = ms.osdmap.get_pools_by_take(rootid)
94b18763
FG
676 want = []
677 # find out roots associating with pools we are passed in
678 for candidate in ls:
679 if candidate in pe.pool_name:
680 want.append(candidate)
681 if len(want) == 0:
682 continue
683 root = ms.crush.get_item_name(rootid)
3efd9988 684 pe.root_pools[root] = []
94b18763 685 for poolid in want:
3efd9988
FG
686 pe.pool_roots[pe.pool_name[poolid]].append(root)
687 pe.root_pools[root].append(pe.pool_name[poolid])
94b18763
FG
688 pe.root_ids[root] = rootid
689 roots.append(root)
3efd9988
FG
690 weight_map = ms.crush.get_take_weight_osd_map(rootid)
691 adjusted_map = {
94b18763 692 osd: cw * osd_weight[osd]
1adf2230 693 for osd,cw in six.iteritems(weight_map) if osd in osd_weight and cw > 0
3efd9988 694 }
94b18763
FG
695 sum_w = sum(adjusted_map.values())
696 assert len(adjusted_map) == 0 or sum_w > 0
3efd9988 697 pe.target_by_root[root] = { osd: w / sum_w
1adf2230 698 for osd,w in six.iteritems(adjusted_map) }
3efd9988
FG
699 actual_by_root[root] = {
700 'pgs': {},
701 'objects': {},
702 'bytes': {},
703 }
11fdf7f2 704 for osd in pe.target_by_root[root]:
3efd9988
FG
705 actual_by_root[root]['pgs'][osd] = 0
706 actual_by_root[root]['objects'][osd] = 0
707 actual_by_root[root]['bytes'][osd] = 0
708 pe.total_by_root[root] = {
709 'pgs': 0,
710 'objects': 0,
711 'bytes': 0,
712 }
713 self.log.debug('pool_roots %s' % pe.pool_roots)
714 self.log.debug('root_pools %s' % pe.root_pools)
715 self.log.debug('target_by_root %s' % pe.target_by_root)
716
717 # pool and root actual
1adf2230 718 for pool, pi in six.iteritems(pool_info):
3efd9988
FG
719 poolid = pi['pool']
720 pm = ms.pg_up_by_poolid[poolid]
721 pgs = 0
722 objects = 0
723 bytes = 0
724 pgs_by_osd = {}
725 objects_by_osd = {}
726 bytes_by_osd = {}
727 for root in pe.pool_roots[pool]:
11fdf7f2 728 for osd in pe.target_by_root[root]:
3efd9988
FG
729 pgs_by_osd[osd] = 0
730 objects_by_osd[osd] = 0
731 bytes_by_osd[osd] = 0
1adf2230 732 for pgid, up in six.iteritems(pm):
3efd9988 733 for osd in [int(osd) for osd in up]:
b32b8144
FG
734 if osd == CRUSHMap.ITEM_NONE:
735 continue
3efd9988
FG
736 pgs_by_osd[osd] += 1
737 objects_by_osd[osd] += ms.pg_stat[pgid]['num_objects']
738 bytes_by_osd[osd] += ms.pg_stat[pgid]['num_bytes']
739 # pick a root to associate this pg instance with.
740 # note that this is imprecise if the roots have
741 # overlapping children.
742 # FIXME: divide bytes by k for EC pools.
743 for root in pe.pool_roots[pool]:
744 if osd in pe.target_by_root[root]:
745 actual_by_root[root]['pgs'][osd] += 1
746 actual_by_root[root]['objects'][osd] += ms.pg_stat[pgid]['num_objects']
747 actual_by_root[root]['bytes'][osd] += ms.pg_stat[pgid]['num_bytes']
748 pgs += 1
749 objects += ms.pg_stat[pgid]['num_objects']
750 bytes += ms.pg_stat[pgid]['num_bytes']
751 pe.total_by_root[root]['pgs'] += 1
752 pe.total_by_root[root]['objects'] += ms.pg_stat[pgid]['num_objects']
753 pe.total_by_root[root]['bytes'] += ms.pg_stat[pgid]['num_bytes']
754 break
755 pe.count_by_pool[pool] = {
756 'pgs': {
757 k: v
1adf2230 758 for k, v in six.iteritems(pgs_by_osd)
3efd9988
FG
759 },
760 'objects': {
761 k: v
1adf2230 762 for k, v in six.iteritems(objects_by_osd)
3efd9988
FG
763 },
764 'bytes': {
765 k: v
1adf2230 766 for k, v in six.iteritems(bytes_by_osd)
3efd9988
FG
767 },
768 }
769 pe.actual_by_pool[pool] = {
770 'pgs': {
771 k: float(v) / float(max(pgs, 1))
1adf2230 772 for k, v in six.iteritems(pgs_by_osd)
3efd9988
FG
773 },
774 'objects': {
775 k: float(v) / float(max(objects, 1))
1adf2230 776 for k, v in six.iteritems(objects_by_osd)
3efd9988
FG
777 },
778 'bytes': {
779 k: float(v) / float(max(bytes, 1))
1adf2230 780 for k, v in six.iteritems(bytes_by_osd)
3efd9988
FG
781 },
782 }
783 pe.total_by_pool[pool] = {
784 'pgs': pgs,
785 'objects': objects,
786 'bytes': bytes,
787 }
11fdf7f2 788 for root in pe.total_by_root:
3efd9988
FG
789 pe.count_by_root[root] = {
790 'pgs': {
791 k: float(v)
1adf2230 792 for k, v in six.iteritems(actual_by_root[root]['pgs'])
3efd9988
FG
793 },
794 'objects': {
795 k: float(v)
1adf2230 796 for k, v in six.iteritems(actual_by_root[root]['objects'])
3efd9988
FG
797 },
798 'bytes': {
799 k: float(v)
1adf2230 800 for k, v in six.iteritems(actual_by_root[root]['bytes'])
3efd9988
FG
801 },
802 }
803 pe.actual_by_root[root] = {
804 'pgs': {
805 k: float(v) / float(max(pe.total_by_root[root]['pgs'], 1))
1adf2230 806 for k, v in six.iteritems(actual_by_root[root]['pgs'])
3efd9988
FG
807 },
808 'objects': {
809 k: float(v) / float(max(pe.total_by_root[root]['objects'], 1))
1adf2230 810 for k, v in six.iteritems(actual_by_root[root]['objects'])
3efd9988
FG
811 },
812 'bytes': {
813 k: float(v) / float(max(pe.total_by_root[root]['bytes'], 1))
1adf2230 814 for k, v in six.iteritems(actual_by_root[root]['bytes'])
3efd9988
FG
815 },
816 }
817 self.log.debug('actual_by_pool %s' % pe.actual_by_pool)
818 self.log.debug('actual_by_root %s' % pe.actual_by_root)
819
820 # average and stddev and score
821 pe.stats_by_root = {
822 a: pe.calc_stats(
823 b,
824 pe.target_by_root[a],
825 pe.total_by_root[a]
1adf2230 826 ) for a, b in six.iteritems(pe.count_by_root)
3efd9988 827 }
94b18763 828 self.log.debug('stats_by_root %s' % pe.stats_by_root)
3efd9988
FG
829
830 # the scores are already normalized
831 pe.score_by_root = {
832 r: {
833 'pgs': pe.stats_by_root[r]['pgs']['score'],
834 'objects': pe.stats_by_root[r]['objects']['score'],
835 'bytes': pe.stats_by_root[r]['bytes']['score'],
836 } for r in pe.total_by_root.keys()
837 }
94b18763 838 self.log.debug('score_by_root %s' % pe.score_by_root)
3efd9988 839
f64942e4 840 # get the list of score metrics, comma separated
11fdf7f2 841 metrics = self.get_module_option('crush_compat_metrics').split(',')
f64942e4 842
3efd9988
FG
843 # total score is just average of normalized stddevs
844 pe.score = 0.0
1adf2230
AA
845 for r, vs in six.iteritems(pe.score_by_root):
846 for k, v in six.iteritems(vs):
f64942e4
AA
847 if k in metrics:
848 pe.score += v
849 pe.score /= len(metrics) * len(roots)
3efd9988
FG
850 return pe
851
94b18763
FG
852 def evaluate(self, ms, pools, verbose=False):
853 pe = self.calc_eval(ms, pools)
3efd9988
FG
854 return pe.show(verbose=verbose)
855
856 def optimize(self, plan):
857 self.log.info('Optimize plan %s' % plan.name)
11fdf7f2
TL
858 plan.mode = self.get_module_option('mode')
859 max_misplaced = float(self.get_ceph_option('target_max_misplaced_ratio'))
3efd9988
FG
860 self.log.info('Mode %s, max misplaced %f' %
861 (plan.mode, max_misplaced))
862
863 info = self.get('pg_status')
864 unknown = info.get('unknown_pgs_ratio', 0.0)
865 degraded = info.get('degraded_ratio', 0.0)
866 inactive = info.get('inactive_pgs_ratio', 0.0)
867 misplaced = info.get('misplaced_ratio', 0.0)
868 self.log.debug('unknown %f degraded %f inactive %f misplaced %g',
869 unknown, degraded, inactive, misplaced)
870 if unknown > 0.0:
94b18763
FG
871 detail = 'Some PGs (%f) are unknown; try again later' % unknown
872 self.log.info(detail)
873 return -errno.EAGAIN, detail
3efd9988 874 elif degraded > 0.0:
94b18763
FG
875 detail = 'Some objects (%f) are degraded; try again later' % degraded
876 self.log.info(detail)
877 return -errno.EAGAIN, detail
3efd9988 878 elif inactive > 0.0:
94b18763
FG
879 detail = 'Some PGs (%f) are inactive; try again later' % inactive
880 self.log.info(detail)
881 return -errno.EAGAIN, detail
3efd9988 882 elif misplaced >= max_misplaced:
94b18763
FG
883 detail = 'Too many objects (%f > %f) are misplaced; ' \
884 'try again later' % (misplaced, max_misplaced)
885 self.log.info(detail)
886 return -errno.EAGAIN, detail
3efd9988
FG
887 else:
888 if plan.mode == 'upmap':
889 return self.do_upmap(plan)
890 elif plan.mode == 'crush-compat':
891 return self.do_crush_compat(plan)
892 elif plan.mode == 'none':
94b18763 893 detail = 'Please do "ceph balancer mode" to choose a valid mode first'
3efd9988 894 self.log.info('Idle')
94b18763 895 return -errno.ENOEXEC, detail
3efd9988 896 else:
94b18763
FG
897 detail = 'Unrecognized mode %s' % plan.mode
898 self.log.info(detail)
899 return -errno.EINVAL, detail
3efd9988
FG
900 ##
901
902 def do_upmap(self, plan):
903 self.log.info('do_upmap')
11fdf7f2
TL
904 max_iterations = self.get_module_option('upmap_max_iterations')
905 max_deviation = self.get_module_option('upmap_max_deviation')
3efd9988
FG
906
907 ms = plan.initial
94b18763
FG
908 if len(plan.pools):
909 pools = plan.pools
910 else: # all
911 pools = [str(i['pool_name']) for i in ms.osdmap_dump.get('pools',[])]
3efd9988 912 if len(pools) == 0:
94b18763
FG
913 detail = 'No pools available'
914 self.log.info(detail)
915 return -errno.ENOENT, detail
3efd9988
FG
916
917 inc = plan.inc
918 total_did = 0
919 left = max_iterations
11fdf7f2
TL
920 osdmap_dump = self.get_osdmap().dump()
921 pools_with_pg_merge = [p['pool_name'] for p in osdmap_dump.get('pools', [])
922 if p['pg_num'] > p['pg_num_target']]
923 crush_rule_by_pool_name = dict((p['pool_name'], p['crush_rule']) for p in osdmap_dump.get('pools', []))
924 pools_by_crush_rule = {} # group pools by crush_rule
3efd9988 925 for pool in pools:
11fdf7f2
TL
926 if pool not in crush_rule_by_pool_name:
927 self.log.info('pool %s does not exist' % pool)
928 continue
929 if pool in pools_with_pg_merge:
930 self.log.info('pool %s has pending PG(s) for merging, skipping for now' % pool)
931 continue
932 crush_rule = crush_rule_by_pool_name[pool]
933 if crush_rule not in pools_by_crush_rule:
934 pools_by_crush_rule[crush_rule] = []
935 pools_by_crush_rule[crush_rule].append(pool)
936 classified_pools = list(pools_by_crush_rule.values())
937 # shuffle so all pools get equal (in)attention
938 random.shuffle(classified_pools)
939 for it in classified_pools:
940 did = ms.osdmap.calc_pg_upmaps(inc, max_deviation, left, it)
3efd9988
FG
941 total_did += did
942 left -= did
943 if left <= 0:
944 break
945 self.log.info('prepared %d/%d changes' % (total_did, max_iterations))
94b18763 946 if total_did == 0:
11fdf7f2
TL
947 return -errno.EALREADY, 'Unable to find further optimization, ' \
948 'or pool(s)\' pg_num is decreasing, ' \
94b18763
FG
949 'or distribution is already perfect'
950 return 0, ''
3efd9988
FG
951
952 def do_crush_compat(self, plan):
953 self.log.info('do_crush_compat')
11fdf7f2 954 max_iterations = self.get_module_option('crush_compat_max_iterations')
3efd9988 955 if max_iterations < 1:
94b18763 956 return -errno.EINVAL, '"crush_compat_max_iterations" must be >= 1'
11fdf7f2 957 step = self.get_module_option('crush_compat_step')
3efd9988 958 if step <= 0 or step >= 1.0:
94b18763 959 return -errno.EINVAL, '"crush_compat_step" must be in (0, 1)'
11fdf7f2 960 max_misplaced = float(self.get_ceph_option('target_max_misplaced_ratio'))
3efd9988
FG
961 min_pg_per_osd = 2
962
963 ms = plan.initial
964 osdmap = ms.osdmap
965 crush = osdmap.get_crush()
94b18763 966 pe = self.calc_eval(ms, plan.pools)
11fdf7f2 967 min_score_to_optimize = self.get_module_option('min_score')
94b18763
FG
968 if pe.score <= min_score_to_optimize:
969 if pe.score == 0:
970 detail = 'Distribution is already perfect'
971 else:
972 detail = 'score %f <= min_score %f, will not optimize' \
973 % (pe.score, min_score_to_optimize)
974 self.log.info(detail)
975 return -errno.EALREADY, detail
3efd9988
FG
976
977 # get current osd reweights
978 orig_osd_weight = { a['osd']: a['weight']
979 for a in ms.osdmap_dump.get('osds',[]) }
1adf2230 980 reweighted_osds = [ a for a,b in six.iteritems(orig_osd_weight)
3efd9988
FG
981 if b < 1.0 and b > 0.0 ]
982
983 # get current compat weight-set weights
94b18763
FG
984 orig_ws = self.get_compat_weight_set_weights(ms)
985 if not orig_ws:
986 return -errno.EAGAIN, 'compat weight-set not available'
1adf2230 987 orig_ws = { a: b for a, b in six.iteritems(orig_ws) if a >= 0 }
3efd9988
FG
988
989 # Make sure roots don't overlap their devices. If so, we
990 # can't proceed.
991 roots = pe.target_by_root.keys()
992 self.log.debug('roots %s', roots)
993 visited = {}
994 overlap = {}
995 root_ids = {}
1adf2230 996 for root, wm in six.iteritems(pe.target_by_root):
11fdf7f2 997 for osd in wm:
3efd9988 998 if osd in visited:
11fdf7f2
TL
999 if osd not in overlap:
1000 overlap[osd] = [ visited[osd] ]
1001 overlap[osd].append(root)
1002 visited[osd] = root
3efd9988 1003 if len(overlap) > 0:
94b18763 1004 detail = 'Some osds belong to multiple subtrees: %s' % \
11fdf7f2 1005 overlap
94b18763
FG
1006 self.log.error(detail)
1007 return -errno.EOPNOTSUPP, detail
3efd9988 1008
f64942e4 1009 # rebalance by pgs, objects, or bytes
11fdf7f2 1010 metrics = self.get_module_option('crush_compat_metrics').split(',')
f64942e4
AA
1011 key = metrics[0] # balancing using the first score metric
1012 if key not in ['pgs', 'bytes', 'objects']:
1013 self.log.warn("Invalid crush_compat balancing key %s. Using 'pgs'." % key)
1014 key = 'pgs'
3efd9988
FG
1015
1016 # go
1017 best_ws = copy.deepcopy(orig_ws)
1018 best_ow = copy.deepcopy(orig_osd_weight)
1019 best_pe = pe
1020 left = max_iterations
1021 bad_steps = 0
1022 next_ws = copy.deepcopy(best_ws)
1023 next_ow = copy.deepcopy(best_ow)
1024 while left > 0:
1025 # adjust
1026 self.log.debug('best_ws %s' % best_ws)
1027 random.shuffle(roots)
1028 for root in roots:
1029 pools = best_pe.root_pools[root]
94b18763
FG
1030 osds = len(best_pe.target_by_root[root])
1031 min_pgs = osds * min_pg_per_osd
1032 if best_pe.total_by_root[root][key] < min_pgs:
3efd9988
FG
1033 self.log.info('Skipping root %s (pools %s), total pgs %d '
1034 '< minimum %d (%d per osd)',
94b18763
FG
1035 root, pools,
1036 best_pe.total_by_root[root][key],
1037 min_pgs, min_pg_per_osd)
3efd9988
FG
1038 continue
1039 self.log.info('Balancing root %s (pools %s) by %s' %
1040 (root, pools, key))
1041 target = best_pe.target_by_root[root]
1042 actual = best_pe.actual_by_root[root][key]
1043 queue = sorted(actual.keys(),
1044 key=lambda osd: -abs(target[osd] - actual[osd]))
1045 for osd in queue:
1046 if orig_osd_weight[osd] == 0:
1047 self.log.debug('skipping out osd.%d', osd)
1048 else:
1049 deviation = target[osd] - actual[osd]
1050 if deviation == 0:
1051 break
1052 self.log.debug('osd.%d deviation %f', osd, deviation)
1053 weight = best_ws[osd]
1054 ow = orig_osd_weight[osd]
1055 if actual[osd] > 0:
1056 calc_weight = target[osd] / actual[osd] * weight * ow
1057 else:
81eedcae
TL
1058 # for newly created osds, reset calc_weight at target value
1059 # this way weight-set will end up absorbing *step* of its
1060 # target (final) value at the very beginning and slowly catch up later.
1061 # note that if this turns out causing too many misplaced
1062 # pgs, then we'll reduce step and retry
1063 calc_weight = target[osd]
3efd9988
FG
1064 new_weight = weight * (1.0 - step) + calc_weight * step
1065 self.log.debug('Reweight osd.%d %f -> %f', osd, weight,
1066 new_weight)
1067 next_ws[osd] = new_weight
1068 if ow < 1.0:
1069 new_ow = min(1.0, max(step + (1.0 - step) * ow,
1070 ow + .005))
1071 self.log.debug('Reweight osd.%d reweight %f -> %f',
1072 osd, ow, new_ow)
1073 next_ow[osd] = new_ow
1074
1075 # normalize weights under this root
1076 root_weight = crush.get_item_weight(pe.root_ids[root])
1adf2230 1077 root_sum = sum(b for a,b in six.iteritems(next_ws)
3efd9988
FG
1078 if a in target.keys())
1079 if root_sum > 0 and root_weight > 0:
1080 factor = root_sum / root_weight
1081 self.log.debug('normalizing root %s %d, weight %f, '
1082 'ws sum %f, factor %f',
1083 root, pe.root_ids[root], root_weight,
1084 root_sum, factor)
1085 for osd in actual.keys():
1086 next_ws[osd] = next_ws[osd] / factor
1087
1088 # recalc
1089 plan.compat_ws = copy.deepcopy(next_ws)
1090 next_ms = plan.final_state()
94b18763 1091 next_pe = self.calc_eval(next_ms, plan.pools)
3efd9988
FG
1092 next_misplaced = next_ms.calc_misplaced_from(ms)
1093 self.log.debug('Step result score %f -> %f, misplacing %f',
1094 best_pe.score, next_pe.score, next_misplaced)
1095
1096 if next_misplaced > max_misplaced:
1097 if best_pe.score < pe.score:
1098 self.log.debug('Step misplaced %f > max %f, stopping',
1099 next_misplaced, max_misplaced)
1100 break
1101 step /= 2.0
1102 next_ws = copy.deepcopy(best_ws)
1103 next_ow = copy.deepcopy(best_ow)
1104 self.log.debug('Step misplaced %f > max %f, reducing step to %f',
1105 next_misplaced, max_misplaced, step)
1106 else:
1107 if next_pe.score > best_pe.score * 1.0001:
94b18763 1108 bad_steps += 1
3efd9988
FG
1109 if bad_steps < 5 and random.randint(0, 100) < 70:
1110 self.log.debug('Score got worse, taking another step')
1111 else:
1112 step /= 2.0
1113 next_ws = copy.deepcopy(best_ws)
1114 next_ow = copy.deepcopy(best_ow)
1115 self.log.debug('Score got worse, trying smaller step %f',
1116 step)
1117 else:
1118 bad_steps = 0
1119 best_pe = next_pe
91327a77
AA
1120 best_ws = copy.deepcopy(next_ws)
1121 best_ow = copy.deepcopy(next_ow)
3efd9988
FG
1122 if best_pe.score == 0:
1123 break
1124 left -= 1
1125
1126 # allow a small regression if we are phasing out osd weights
1127 fudge = 0
81eedcae 1128 if best_ow != orig_osd_weight:
3efd9988
FG
1129 fudge = .001
1130
1131 if best_pe.score < pe.score + fudge:
1132 self.log.info('Success, score %f -> %f', pe.score, best_pe.score)
1133 plan.compat_ws = best_ws
1adf2230 1134 for osd, w in six.iteritems(best_ow):
3efd9988
FG
1135 if w != orig_osd_weight[osd]:
1136 self.log.debug('osd.%d reweight %f', osd, w)
1137 plan.osd_weights[osd] = w
94b18763 1138 return 0, ''
3efd9988
FG
1139 else:
1140 self.log.info('Failed to find further optimization, score %f',
1141 pe.score)
94b18763
FG
1142 plan.compat_ws = {}
1143 return -errno.EDOM, 'Unable to find further optimization, ' \
1144 'change balancer mode and retry might help'
1145
1146 def get_compat_weight_set_weights(self, ms):
11fdf7f2 1147 if not CRUSHMap.have_default_choose_args(ms.crush_dump):
94b18763
FG
1148 # enable compat weight-set first
1149 self.log.debug('ceph osd crush weight-set create-compat')
1150 result = CommandResult('')
1151 self.send_command(result, 'mon', '', json.dumps({
1152 'prefix': 'osd crush weight-set create-compat',
1153 'format': 'json',
1154 }), '')
1155 r, outb, outs = result.wait()
1156 if r != 0:
1157 self.log.error('Error creating compat weight-set')
1158 return
1159
1160 result = CommandResult('')
1161 self.send_command(result, 'mon', '', json.dumps({
1162 'prefix': 'osd crush dump',
1163 'format': 'json',
1164 }), '')
1165 r, outb, outs = result.wait()
1166 if r != 0:
1167 self.log.error('Error dumping crush map')
1168 return
1169 try:
1170 crushmap = json.loads(outb)
1171 except:
1172 raise RuntimeError('unable to parse crush map')
1173 else:
1174 crushmap = ms.crush_dump
3efd9988 1175
11fdf7f2 1176 raw = CRUSHMap.get_default_choose_args(crushmap)
3efd9988
FG
1177 weight_set = {}
1178 for b in raw:
1179 bucket = None
1180 for t in crushmap['buckets']:
1181 if t['id'] == b['bucket_id']:
1182 bucket = t
1183 break
1184 if not bucket:
1185 raise RuntimeError('could not find bucket %s' % b['bucket_id'])
1186 self.log.debug('bucket items %s' % bucket['items'])
1187 self.log.debug('weight set %s' % b['weight_set'][0])
1188 if len(bucket['items']) != len(b['weight_set'][0]):
1189 raise RuntimeError('weight-set size does not match bucket items')
1190 for pos in range(len(bucket['items'])):
1191 weight_set[bucket['items'][pos]['id']] = b['weight_set'][0][pos]
1192
1193 self.log.debug('weight_set weights %s' % weight_set)
1194 return weight_set
1195
1196 def do_crush(self):
1197 self.log.info('do_crush (not yet implemented)')
1198
1199 def do_osd_weight(self):
1200 self.log.info('do_osd_weight (not yet implemented)')
1201
1202 def execute(self, plan):
1203 self.log.info('Executing plan %s' % plan.name)
1204
1205 commands = []
1206
1207 # compat weight-set
1208 if len(plan.compat_ws) and \
11fdf7f2 1209 not CRUSHMap.have_default_choose_args(plan.initial.crush_dump):
3efd9988
FG
1210 self.log.debug('ceph osd crush weight-set create-compat')
1211 result = CommandResult('')
1212 self.send_command(result, 'mon', '', json.dumps({
1213 'prefix': 'osd crush weight-set create-compat',
1214 'format': 'json',
1215 }), '')
1216 r, outb, outs = result.wait()
1217 if r != 0:
1218 self.log.error('Error creating compat weight-set')
94b18763 1219 return r, outs
3efd9988 1220
1adf2230 1221 for osd, weight in six.iteritems(plan.compat_ws):
3efd9988
FG
1222 self.log.info('ceph osd crush weight-set reweight-compat osd.%d %f',
1223 osd, weight)
b32b8144 1224 result = CommandResult('')
3efd9988
FG
1225 self.send_command(result, 'mon', '', json.dumps({
1226 'prefix': 'osd crush weight-set reweight-compat',
1227 'format': 'json',
1228 'item': 'osd.%d' % osd,
1229 'weight': [weight],
b32b8144 1230 }), '')
3efd9988
FG
1231 commands.append(result)
1232
1233 # new_weight
1234 reweightn = {}
1adf2230 1235 for osd, weight in six.iteritems(plan.osd_weights):
3efd9988
FG
1236 reweightn[str(osd)] = str(int(weight * float(0x10000)))
1237 if len(reweightn):
1238 self.log.info('ceph osd reweightn %s', reweightn)
b32b8144 1239 result = CommandResult('')
3efd9988
FG
1240 self.send_command(result, 'mon', '', json.dumps({
1241 'prefix': 'osd reweightn',
1242 'format': 'json',
1243 'weights': json.dumps(reweightn),
b32b8144 1244 }), '')
3efd9988
FG
1245 commands.append(result)
1246
1247 # upmap
1248 incdump = plan.inc.dump()
1249 for pgid in incdump.get('old_pg_upmap_items', []):
1250 self.log.info('ceph osd rm-pg-upmap-items %s', pgid)
1251 result = CommandResult('foo')
1252 self.send_command(result, 'mon', '', json.dumps({
1253 'prefix': 'osd rm-pg-upmap-items',
1254 'format': 'json',
1255 'pgid': pgid,
1256 }), 'foo')
1257 commands.append(result)
1258
1259 for item in incdump.get('new_pg_upmap_items', []):
1260 self.log.info('ceph osd pg-upmap-items %s mappings %s', item['pgid'],
1261 item['mappings'])
1262 osdlist = []
1263 for m in item['mappings']:
1264 osdlist += [m['from'], m['to']]
1265 result = CommandResult('foo')
1266 self.send_command(result, 'mon', '', json.dumps({
1267 'prefix': 'osd pg-upmap-items',
1268 'format': 'json',
1269 'pgid': item['pgid'],
1270 'id': osdlist,
1271 }), 'foo')
1272 commands.append(result)
1273
1274 # wait for commands
1275 self.log.debug('commands %s' % commands)
1276 for result in commands:
1277 r, outb, outs = result.wait()
1278 if r != 0:
94b18763
FG
1279 self.log.error('execute error: r = %d, detail = %s' % (r, outs))
1280 return r, outs
3efd9988 1281 self.log.debug('done')
94b18763 1282 return 0, ''