]> git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/pg_autoscaler/module.py
57d27655114483a8d7abf9056415cac4315a63b9
[ceph.git] / ceph / src / pybind / mgr / pg_autoscaler / module.py
1 """
2 Automatically scale pg_num based on how much data is stored in each pool.
3 """
4
5 import json
6 import mgr_util
7 import threading
8 from typing import Any, Dict, List, Optional, Set, Tuple, TYPE_CHECKING, Union
9 import uuid
10 from prettytable import PrettyTable
11 from mgr_module import HealthChecksT, CLIReadCommand, CLIWriteCommand, CRUSHMap, MgrModule, Option, OSDMap
12
13 """
14 Some terminology is made up for the purposes of this module:
15
16 - "raw pgs": pg count after applying replication, i.e. the real resource
17 consumption of a pool.
18 - "grow/shrink" - increase/decrease the pg_num in a pool
19 - "crush subtree" - non-overlapping domains in crush hierarchy: used as
20 units of resource management.
21 """
22
23 INTERVAL = 5
24
25 PG_NUM_MIN = 32 # unless specified on a per-pool basis
26
27 if TYPE_CHECKING:
28 import sys
29 if sys.version_info >= (3, 8):
30 from typing import Literal
31 else:
32 from typing_extensions import Literal
33
34 PassT = Literal['first', 'second', 'third']
35
36
37 def nearest_power_of_two(n: int) -> int:
38 v = int(n)
39
40 v -= 1
41 v |= v >> 1
42 v |= v >> 2
43 v |= v >> 4
44 v |= v >> 8
45 v |= v >> 16
46
47 # High bound power of two
48 v += 1
49
50 # Low bound power of tow
51 x = v >> 1
52
53 return x if (v - n) > (n - x) else v
54
55
56 def effective_target_ratio(target_ratio: float,
57 total_target_ratio: float,
58 total_target_bytes: int,
59 capacity: int) -> float:
60 """
61 Returns the target ratio after normalizing for ratios across pools and
62 adjusting for capacity reserved by pools that have target_size_bytes set.
63 """
64 target_ratio = float(target_ratio)
65 if total_target_ratio:
66 target_ratio = target_ratio / total_target_ratio
67
68 if total_target_bytes and capacity:
69 fraction_available = 1.0 - min(1.0, float(total_target_bytes) / capacity)
70 target_ratio *= fraction_available
71
72 return target_ratio
73
74
75 class PgAdjustmentProgress(object):
76 """
77 Keeps the initial and target pg_num values
78 """
79
80 def __init__(self, pool_id: int, pg_num: int, pg_num_target: int) -> None:
81 self.ev_id = str(uuid.uuid4())
82 self.pool_id = pool_id
83 self.reset(pg_num, pg_num_target)
84
85 def reset(self, pg_num: int, pg_num_target: int) -> None:
86 self.pg_num = pg_num
87 self.pg_num_target = pg_num_target
88
89 def update(self, module: MgrModule, progress: float) -> None:
90 desc = 'increasing' if self.pg_num < self.pg_num_target else 'decreasing'
91 module.remote('progress', 'update', self.ev_id,
92 ev_msg="PG autoscaler %s pool %d PGs from %d to %d" %
93 (desc, self.pool_id, self.pg_num, self.pg_num_target),
94 ev_progress=progress,
95 refs=[("pool", self.pool_id)])
96
97
98 class CrushSubtreeResourceStatus:
99 def __init__(self) -> None:
100 self.root_ids: List[int] = []
101 self.osds: Set[int] = set()
102 self.osd_count: Optional[int] = None # Number of OSDs
103 self.pg_target: Optional[int] = None # Ideal full-capacity PG count?
104 self.pg_current = 0 # How many PGs already?
105 self.pg_left = 0
106 self.capacity: Optional[int] = None # Total capacity of OSDs in subtree
107 self.pool_ids: List[int] = []
108 self.pool_names: List[str] = []
109 self.pool_count: Optional[int] = None
110 self.pool_used = 0
111 self.total_target_ratio = 0.0
112 self.total_target_bytes = 0 # including replication / EC overhead
113
114
115 class PgAutoscaler(MgrModule):
116 """
117 PG autoscaler.
118 """
119 NATIVE_OPTIONS = [
120 'mon_target_pg_per_osd',
121 'mon_max_pg_per_osd',
122 ]
123
124 MODULE_OPTIONS = [
125 Option(
126 name='sleep_interval',
127 type='secs',
128 default=60),
129
130 Option(
131 name='threshold',
132 type='float',
133 desc='scaling threshold',
134 long_desc=('The factor by which the `NEW PG_NUM` must vary from the current'
135 '`PG_NUM` before being accepted. Cannot be less than 1.0'),
136 default=3.0,
137 min=1.0),
138 Option(
139 name='noautoscale',
140 type='bool',
141 desc='global autoscale flag',
142 long_desc=('Option to turn on/off the autoscaler for all pools'),
143 default=False),
144 ]
145
146 def __init__(self, *args: Any, **kwargs: Any) -> None:
147 super(PgAutoscaler, self).__init__(*args, **kwargs)
148 self._shutdown = threading.Event()
149 self._event: Dict[int, PgAdjustmentProgress] = {}
150
151 # So much of what we do peeks at the osdmap that it's easiest
152 # to just keep a copy of the pythonized version.
153 self._osd_map = None
154 if TYPE_CHECKING:
155 self.sleep_interval = 60
156 self.mon_target_pg_per_osd = 0
157 self.threshold = 3.0
158 self.noautoscale = False
159
160 def config_notify(self) -> None:
161 for opt in self.NATIVE_OPTIONS:
162 setattr(self,
163 opt,
164 self.get_ceph_option(opt))
165 self.log.debug(' native option %s = %s', opt, getattr(self, opt))
166 for opt in self.MODULE_OPTIONS:
167 setattr(self,
168 opt['name'],
169 self.get_module_option(opt['name']))
170 self.log.debug(' mgr option %s = %s',
171 opt['name'], getattr(self, opt['name']))
172
173 @CLIReadCommand('osd pool autoscale-status')
174 def _command_autoscale_status(self, format: str = 'plain') -> Tuple[int, str, str]:
175 """
176 report on pool pg_num sizing recommendation and intent
177 """
178 osdmap = self.get_osdmap()
179 pools = osdmap.get_pools_by_name()
180 ps, root_map = self._get_pool_status(osdmap, pools)
181
182 if format in ('json', 'json-pretty'):
183 return 0, json.dumps(ps, indent=4, sort_keys=True), ''
184 else:
185 table = PrettyTable(['POOL', 'SIZE', 'TARGET SIZE',
186 'RATE', 'RAW CAPACITY',
187 'RATIO', 'TARGET RATIO',
188 'EFFECTIVE RATIO',
189 'BIAS',
190 'PG_NUM',
191 # 'IDEAL',
192 'NEW PG_NUM', 'AUTOSCALE',
193 'BULK'],
194 border=False)
195 table.left_padding_width = 0
196 table.right_padding_width = 2
197 table.align['POOL'] = 'l'
198 table.align['SIZE'] = 'r'
199 table.align['TARGET SIZE'] = 'r'
200 table.align['RATE'] = 'r'
201 table.align['RAW CAPACITY'] = 'r'
202 table.align['RATIO'] = 'r'
203 table.align['TARGET RATIO'] = 'r'
204 table.align['EFFECTIVE RATIO'] = 'r'
205 table.align['BIAS'] = 'r'
206 table.align['PG_NUM'] = 'r'
207 # table.align['IDEAL'] = 'r'
208 table.align['NEW PG_NUM'] = 'r'
209 table.align['AUTOSCALE'] = 'l'
210 table.align['BULK'] = 'l'
211 for p in ps:
212 if p['would_adjust']:
213 final = str(p['pg_num_final'])
214 else:
215 final = ''
216 if p['target_bytes'] > 0:
217 ts = mgr_util.format_bytes(p['target_bytes'], 6)
218 else:
219 ts = ''
220 if p['target_ratio'] > 0.0:
221 tr = '%.4f' % p['target_ratio']
222 else:
223 tr = ''
224 if p['effective_target_ratio'] > 0.0:
225 etr = '%.4f' % p['effective_target_ratio']
226 else:
227 etr = ''
228 table.add_row([
229 p['pool_name'],
230 mgr_util.format_bytes(p['logical_used'], 6),
231 ts,
232 p['raw_used_rate'],
233 mgr_util.format_bytes(p['subtree_capacity'], 6),
234 '%.4f' % p['capacity_ratio'],
235 tr,
236 etr,
237 p['bias'],
238 p['pg_num_target'],
239 # p['pg_num_ideal'],
240 final,
241 p['pg_autoscale_mode'],
242 str(p['bulk'])
243 ])
244 return 0, table.get_string(), ''
245
246 @CLIWriteCommand("osd pool set threshold")
247 def set_scaling_threshold(self, num: float) -> Tuple[int, str, str]:
248 """
249 set the autoscaler threshold
250 A.K.A. the factor by which the new PG_NUM must vary from the existing PG_NUM
251 """
252 if num < 1.0:
253 return 22, "", "threshold cannot be set less than 1.0"
254 self.set_module_option("threshold", num)
255 return 0, "threshold updated", ""
256
257 def complete_all_progress_events(self) -> None:
258 for pool_id in list(self._event):
259 ev = self._event[pool_id]
260 self.remote('progress', 'complete', ev.ev_id)
261 del self._event[pool_id]
262
263 def set_autoscale_mode_all_pools(self, status: str) -> None:
264 osdmap = self.get_osdmap()
265 pools = osdmap.get_pools_by_name()
266 for pool_name, _ in pools.items():
267 self.mon_command({
268 'prefix': 'osd pool set',
269 'pool': pool_name,
270 'var': 'pg_autoscale_mode',
271 'val': status
272 })
273 @CLIWriteCommand("osd pool get noautoscale")
274 def get_noautoscale(self) -> Tuple[int, str, str]:
275 """
276 Get the noautoscale flag to see if all pools
277 are setting the autoscaler on or off as well
278 as newly created pools in the future.
279 """
280
281 if self.noautoscale == None:
282 raise TypeError("noautoscale cannot be None")
283 elif self.noautoscale:
284 return 0, "", "noautoscale is on"
285 else:
286 return 0, "", "noautoscale is off"
287
288 @CLIWriteCommand("osd pool unset noautoscale")
289 def unset_noautoscale(self) -> Tuple[int, str, str]:
290 """
291 Unset the noautoscale flag so all pools will
292 have autoscale enabled (including newly created
293 pools in the future).
294 """
295 if not self.noautoscale:
296 return 0, "", "noautoscale is already unset!"
297 else:
298 self.set_module_option("noautoscale", False)
299 self.mon_command({
300 'prefix': 'config set',
301 'who': 'global',
302 'name': 'osd_pool_default_pg_autoscale_mode',
303 'value': 'on'
304 })
305 self.set_autoscale_mode_all_pools("on")
306 return 0, "", "noautoscale is unset, all pools now have autoscale on"
307
308 @CLIWriteCommand("osd pool set noautoscale")
309 def set_noautoscale(self) -> Tuple[int, str, str]:
310 """
311 set the noautoscale for all pools (including
312 newly created pools in the future)
313 and complete all on-going progress events
314 regarding PG-autoscaling.
315 """
316 if self.noautoscale:
317 return 0, "", "noautoscale is already set!"
318 else:
319 self.set_module_option("noautoscale", True)
320 self.mon_command({
321 'prefix': 'config set',
322 'who': 'global',
323 'name': 'osd_pool_default_pg_autoscale_mode',
324 'value': 'off'
325 })
326 self.set_autoscale_mode_all_pools("off")
327 self.complete_all_progress_events()
328 return 0, "", "noautoscale is set, all pools now have autoscale off"
329
330 def serve(self) -> None:
331 self.config_notify()
332 while not self._shutdown.is_set():
333 self._maybe_adjust()
334 self._update_progress_events()
335 self._shutdown.wait(timeout=self.sleep_interval)
336
337 def shutdown(self) -> None:
338 self.log.info('Stopping pg_autoscaler')
339 self._shutdown.set()
340
341 def identify_subtrees_and_overlaps(self,
342 osdmap: OSDMap,
343 crush: CRUSHMap,
344 result: Dict[int, CrushSubtreeResourceStatus],
345 overlapped_roots: Set[int],
346 roots: List[CrushSubtreeResourceStatus]) -> \
347 Tuple[List[CrushSubtreeResourceStatus],
348 Set[int]]:
349
350 # We identify subtrees and overlapping roots from osdmap
351 for pool_id, pool in osdmap.get_pools().items():
352 crush_rule = crush.get_rule_by_id(pool['crush_rule'])
353 assert crush_rule is not None
354 cr_name = crush_rule['rule_name']
355 root_id = crush.get_rule_root(cr_name)
356 assert root_id is not None
357 osds = set(crush.get_osds_under(root_id))
358
359 # Are there overlapping roots?
360 s = None
361 for prev_root_id, prev in result.items():
362 if osds & prev.osds:
363 s = prev
364 if prev_root_id != root_id:
365 overlapped_roots.add(prev_root_id)
366 overlapped_roots.add(root_id)
367 self.log.warning("pool %s won't scale due to overlapping roots: %s",
368 pool['pool_name'], overlapped_roots)
369 self.log.warning("Please See: https://docs.ceph.com/en/"
370 "latest/rados/operations/placement-groups"
371 "/#automated-scaling")
372 break
373 if not s:
374 s = CrushSubtreeResourceStatus()
375 roots.append(s)
376 result[root_id] = s
377 s.root_ids.append(root_id)
378 s.osds |= osds
379 s.pool_ids.append(pool_id)
380 s.pool_names.append(pool['pool_name'])
381 s.pg_current += pool['pg_num_target'] * pool['size']
382 target_ratio = pool['options'].get('target_size_ratio', 0.0)
383 if target_ratio:
384 s.total_target_ratio += target_ratio
385 else:
386 target_bytes = pool['options'].get('target_size_bytes', 0)
387 if target_bytes:
388 s.total_target_bytes += target_bytes * osdmap.pool_raw_used_rate(pool_id)
389 return roots, overlapped_roots
390
391 def get_subtree_resource_status(self,
392 osdmap: OSDMap,
393 crush: CRUSHMap) -> Tuple[Dict[int, CrushSubtreeResourceStatus],
394 Set[int]]:
395 """
396 For each CRUSH subtree of interest (i.e. the roots under which
397 we have pools), calculate the current resource usages and targets,
398 such as how many PGs there are, vs. how many PGs we would
399 like there to be.
400 """
401 result: Dict[int, CrushSubtreeResourceStatus] = {}
402 roots: List[CrushSubtreeResourceStatus] = []
403 overlapped_roots: Set[int] = set()
404 # identify subtrees and overlapping roots
405 roots, overlapped_roots = self.identify_subtrees_and_overlaps(osdmap,
406 crush, result, overlapped_roots, roots)
407 # finish subtrees
408 all_stats = self.get('osd_stats')
409 for s in roots:
410 assert s.osds is not None
411 s.osd_count = len(s.osds)
412 s.pg_target = s.osd_count * self.mon_target_pg_per_osd
413 s.pg_left = s.pg_target
414 s.pool_count = len(s.pool_ids)
415 capacity = 0
416 for osd_stats in all_stats['osd_stats']:
417 if osd_stats['osd'] in s.osds:
418 # Intentionally do not apply the OSD's reweight to
419 # this, because we want to calculate PG counts based
420 # on the physical storage available, not how it is
421 # reweighted right now.
422 capacity += osd_stats['kb'] * 1024
423
424 s.capacity = capacity
425 self.log.debug('root_ids %s pools %s with %d osds, pg_target %d',
426 s.root_ids,
427 s.pool_ids,
428 s.osd_count,
429 s.pg_target)
430
431 return result, overlapped_roots
432
433 def _calc_final_pg_target(
434 self,
435 p: Dict[str, Any],
436 pool_name: str,
437 root_map: Dict[int, CrushSubtreeResourceStatus],
438 root_id: int,
439 capacity_ratio: float,
440 bias: float,
441 even_pools: Dict[str, Dict[str, Any]],
442 bulk_pools: Dict[str, Dict[str, Any]],
443 func_pass: 'PassT',
444 bulk: bool,
445 ) -> Union[Tuple[float, int, int], Tuple[None, None, None]]:
446 """
447 `profile` determines behaviour of the autoscaler.
448 `first_pass` flag used to determine if this is the first
449 pass where the caller tries to calculate/adjust pools that has
450 used_ratio > even_ratio else this is the second pass,
451 we calculate final_ratio by giving it 1 / pool_count
452 of the root we are currently looking at.
453 """
454 if func_pass == 'first':
455 # first pass to deal with small pools (no bulk flag)
456 # calculating final_pg_target based on capacity ratio
457 # we also keep track of bulk_pools to be used in second pass
458 if not bulk:
459 final_ratio = capacity_ratio
460 pg_left = root_map[root_id].pg_left
461 assert pg_left is not None
462 used_pg = final_ratio * pg_left
463 root_map[root_id].pg_left -= int(used_pg)
464 root_map[root_id].pool_used += 1
465 pool_pg_target = used_pg / p['size'] * bias
466 else:
467 bulk_pools[pool_name] = p
468 return None, None, None
469
470 elif func_pass == 'second':
471 # second pass we calculate the final_pg_target
472 # for pools that have used_ratio > even_ratio
473 # and we keep track of even pools to be used in third pass
474 pool_count = root_map[root_id].pool_count
475 assert pool_count is not None
476 even_ratio = 1 / (pool_count - root_map[root_id].pool_used)
477 used_ratio = capacity_ratio
478
479 if used_ratio > even_ratio:
480 root_map[root_id].pool_used += 1
481 else:
482 even_pools[pool_name] = p
483 return None, None, None
484
485 final_ratio = max(used_ratio, even_ratio)
486 pg_left = root_map[root_id].pg_left
487 assert pg_left is not None
488 used_pg = final_ratio * pg_left
489 root_map[root_id].pg_left -= int(used_pg)
490 pool_pg_target = used_pg / p['size'] * bias
491
492 else:
493 # third pass we just split the pg_left to all even_pools
494 pool_count = root_map[root_id].pool_count
495 assert pool_count is not None
496 final_ratio = 1 / (pool_count - root_map[root_id].pool_used)
497 pool_pg_target = (final_ratio * root_map[root_id].pg_left) / p['size'] * bias
498
499 min_pg = p.get('options', {}).get('pg_num_min', PG_NUM_MIN)
500 max_pg = p.get('options', {}).get('pg_num_max')
501 final_pg_target = max(min_pg, nearest_power_of_two(pool_pg_target))
502 if max_pg and max_pg < final_pg_target:
503 final_pg_target = max_pg
504 self.log.info("Pool '{0}' root_id {1} using {2} of space, bias {3}, "
505 "pg target {4} quantized to {5} (current {6})".format(
506 p['pool_name'],
507 root_id,
508 capacity_ratio,
509 bias,
510 pool_pg_target,
511 final_pg_target,
512 p['pg_num_target']
513 ))
514 return final_ratio, pool_pg_target, final_pg_target
515
516 def _get_pool_pg_targets(
517 self,
518 osdmap: OSDMap,
519 pools: Dict[str, Dict[str, Any]],
520 crush_map: CRUSHMap,
521 root_map: Dict[int, CrushSubtreeResourceStatus],
522 pool_stats: Dict[int, Dict[str, int]],
523 ret: List[Dict[str, Any]],
524 threshold: float,
525 func_pass: 'PassT',
526 overlapped_roots: Set[int],
527 ) -> Tuple[List[Dict[str, Any]], Dict[str, Dict[str, Any]] , Dict[str, Dict[str, Any]]]:
528 """
529 Calculates final_pg_target of each pools and determine if it needs
530 scaling, this depends on the profile of the autoscaler. For scale-down,
531 we start out with a full complement of pgs and only descrease it when other
532 pools needs more pgs due to increased usage. For scale-up, we start out with
533 the minimal amount of pgs and only scale when there is increase in usage.
534 """
535 even_pools: Dict[str, Dict[str, Any]] = {}
536 bulk_pools: Dict[str, Dict[str, Any]] = {}
537 for pool_name, p in pools.items():
538 pool_id = p['pool']
539 if pool_id not in pool_stats:
540 # race with pool deletion; skip
541 continue
542
543 # FIXME: we assume there is only one take per pool, but that
544 # may not be true.
545 crush_rule = crush_map.get_rule_by_id(p['crush_rule'])
546 assert crush_rule is not None
547 cr_name = crush_rule['rule_name']
548 root_id = crush_map.get_rule_root(cr_name)
549 assert root_id is not None
550 if root_id in overlapped_roots:
551 # skip pools
552 # with overlapping roots
553 self.log.warn("pool %d contains an overlapping root %d"
554 "... skipping scaling", pool_id, root_id)
555 continue
556 capacity = root_map[root_id].capacity
557 assert capacity is not None
558 if capacity == 0:
559 self.log.debug('skipping empty subtree %s', cr_name)
560 continue
561
562 raw_used_rate = osdmap.pool_raw_used_rate(pool_id)
563
564 pool_logical_used = pool_stats[pool_id]['stored']
565 bias = p['options'].get('pg_autoscale_bias', 1.0)
566 target_bytes = 0
567 # ratio takes precedence if both are set
568 if p['options'].get('target_size_ratio', 0.0) == 0.0:
569 target_bytes = p['options'].get('target_size_bytes', 0)
570
571 # What proportion of space are we using?
572 actual_raw_used = pool_logical_used * raw_used_rate
573 actual_capacity_ratio = float(actual_raw_used) / capacity
574
575 pool_raw_used = max(pool_logical_used, target_bytes) * raw_used_rate
576 capacity_ratio = float(pool_raw_used) / capacity
577
578 self.log.info("effective_target_ratio {0} {1} {2} {3}".format(
579 p['options'].get('target_size_ratio', 0.0),
580 root_map[root_id].total_target_ratio,
581 root_map[root_id].total_target_bytes,
582 capacity))
583
584 target_ratio = effective_target_ratio(p['options'].get('target_size_ratio', 0.0),
585 root_map[root_id].total_target_ratio,
586 root_map[root_id].total_target_bytes,
587 capacity)
588
589 # determine if the pool is a bulk
590 bulk = False
591 flags = p['flags_names'].split(",")
592 if "bulk" in flags:
593 bulk = True
594
595 capacity_ratio = max(capacity_ratio, target_ratio)
596 final_ratio, pool_pg_target, final_pg_target = self._calc_final_pg_target(
597 p, pool_name, root_map, root_id,
598 capacity_ratio, bias, even_pools,
599 bulk_pools, func_pass, bulk)
600
601 if final_ratio is None:
602 continue
603
604 adjust = False
605 if (final_pg_target > p['pg_num_target'] * threshold or
606 final_pg_target < p['pg_num_target'] / threshold) and \
607 final_ratio >= 0.0 and \
608 final_ratio <= 1.0:
609 adjust = True
610
611 assert pool_pg_target is not None
612 ret.append({
613 'pool_id': pool_id,
614 'pool_name': p['pool_name'],
615 'crush_root_id': root_id,
616 'pg_autoscale_mode': p['pg_autoscale_mode'],
617 'pg_num_target': p['pg_num_target'],
618 'logical_used': pool_logical_used,
619 'target_bytes': target_bytes,
620 'raw_used_rate': raw_used_rate,
621 'subtree_capacity': capacity,
622 'actual_raw_used': actual_raw_used,
623 'raw_used': pool_raw_used,
624 'actual_capacity_ratio': actual_capacity_ratio,
625 'capacity_ratio': capacity_ratio,
626 'target_ratio': p['options'].get('target_size_ratio', 0.0),
627 'effective_target_ratio': target_ratio,
628 'pg_num_ideal': int(pool_pg_target),
629 'pg_num_final': final_pg_target,
630 'would_adjust': adjust,
631 'bias': p.get('options', {}).get('pg_autoscale_bias', 1.0),
632 'bulk': bulk,
633 })
634
635 return ret, bulk_pools, even_pools
636
637 def _get_pool_status(
638 self,
639 osdmap: OSDMap,
640 pools: Dict[str, Dict[str, Any]],
641 ) -> Tuple[List[Dict[str, Any]],
642 Dict[int, CrushSubtreeResourceStatus]]:
643 threshold = self.threshold
644 assert threshold >= 1.0
645
646 crush_map = osdmap.get_crush()
647 root_map, overlapped_roots = self.get_subtree_resource_status(osdmap, crush_map)
648 df = self.get('df')
649 pool_stats = dict([(p['id'], p['stats']) for p in df['pools']])
650
651 ret: List[Dict[str, Any]] = []
652
653 # Iterate over all pools to determine how they should be sized.
654 # First call of _get_pool_pg_targets() is to find/adjust pools that uses more capacaity than
655 # the even_ratio of other pools and we adjust those first.
656 # Second call make use of the even_pools we keep track of in the first call.
657 # All we need to do is iterate over those and give them 1/pool_count of the
658 # total pgs.
659
660 ret, bulk_pools, _ = self._get_pool_pg_targets(osdmap, pools, crush_map, root_map,
661 pool_stats, ret, threshold, 'first', overlapped_roots)
662
663 ret, _, even_pools = self._get_pool_pg_targets(osdmap, bulk_pools, crush_map, root_map,
664 pool_stats, ret, threshold, 'second', overlapped_roots)
665
666 ret, _, _ = self._get_pool_pg_targets(osdmap, even_pools, crush_map, root_map,
667 pool_stats, ret, threshold, 'third', overlapped_roots)
668
669 return (ret, root_map)
670
671 def _update_progress_events(self) -> None:
672 if self.noautoscale:
673 return
674 osdmap = self.get_osdmap()
675 pools = osdmap.get_pools()
676 for pool_id in list(self._event):
677 ev = self._event[pool_id]
678 pool_data = pools.get(pool_id)
679 if pool_data is None or pool_data['pg_num'] == pool_data['pg_num_target'] or ev.pg_num == ev.pg_num_target:
680 # pool is gone or we've reached our target
681 self.remote('progress', 'complete', ev.ev_id)
682 del self._event[pool_id]
683 continue
684 ev.update(self, (ev.pg_num - pool_data['pg_num']) / (ev.pg_num - ev.pg_num_target))
685
686 def _maybe_adjust(self) -> None:
687 if self.noautoscale:
688 return
689 self.log.info('_maybe_adjust')
690 osdmap = self.get_osdmap()
691 if osdmap.get_require_osd_release() < 'nautilus':
692 return
693 pools = osdmap.get_pools_by_name()
694 self.log.debug("pool: {0}".format(json.dumps(pools, indent=4,
695 sort_keys=True)))
696 ps, root_map = self._get_pool_status(osdmap, pools)
697
698 # Anyone in 'warn', set the health message for them and then
699 # drop them from consideration.
700 too_few = []
701 too_many = []
702 bytes_and_ratio = []
703 health_checks: Dict[str, Dict[str, Union[int, str, List[str]]]] = {}
704
705 total_bytes = dict([(r, 0) for r in iter(root_map)])
706 total_target_bytes = dict([(r, 0.0) for r in iter(root_map)])
707 target_bytes_pools: Dict[int, List[int]] = dict([(r, []) for r in iter(root_map)])
708
709 for p in ps:
710 pool_id = p['pool_id']
711 pool_opts = pools[p['pool_name']]['options']
712 if pool_opts.get('target_size_ratio', 0) > 0 and pool_opts.get('target_size_bytes', 0) > 0:
713 bytes_and_ratio.append(
714 'Pool %s has target_size_bytes and target_size_ratio set' % p['pool_name'])
715 total_bytes[p['crush_root_id']] += max(
716 p['actual_raw_used'],
717 p['target_bytes'] * p['raw_used_rate'])
718 if p['target_bytes'] > 0:
719 total_target_bytes[p['crush_root_id']] += p['target_bytes'] * p['raw_used_rate']
720 target_bytes_pools[p['crush_root_id']].append(p['pool_name'])
721 if not p['would_adjust']:
722 continue
723 if p['pg_autoscale_mode'] == 'warn':
724 msg = 'Pool %s has %d placement groups, should have %d' % (
725 p['pool_name'],
726 p['pg_num_target'],
727 p['pg_num_final'])
728 if p['pg_num_final'] > p['pg_num_target']:
729 too_few.append(msg)
730 else:
731 too_many.append(msg)
732
733 if p['pg_autoscale_mode'] == 'on':
734 # Note that setting pg_num actually sets pg_num_target (see
735 # OSDMonitor.cc)
736 r = self.mon_command({
737 'prefix': 'osd pool set',
738 'pool': p['pool_name'],
739 'var': 'pg_num',
740 'val': str(p['pg_num_final'])
741 })
742
743 # create new event or update existing one to reflect
744 # progress from current state to the new pg_num_target
745 pool_data = pools[p['pool_name']]
746 pg_num = pool_data['pg_num']
747 new_target = p['pg_num_final']
748 if pool_id in self._event:
749 self._event[pool_id].reset(pg_num, new_target)
750 else:
751 self._event[pool_id] = PgAdjustmentProgress(pool_id, pg_num, new_target)
752 self._event[pool_id].update(self, 0.0)
753
754 if r[0] != 0:
755 # FIXME: this is a serious and unexpected thing,
756 # we should expose it as a cluster log error once
757 # the hook for doing that from ceph-mgr modules is
758 # in.
759 self.log.error("pg_num adjustment on {0} to {1} failed: {2}"
760 .format(p['pool_name'],
761 p['pg_num_final'], r))
762
763 if too_few:
764 summary = "{0} pools have too few placement groups".format(
765 len(too_few))
766 health_checks['POOL_TOO_FEW_PGS'] = {
767 'severity': 'warning',
768 'summary': summary,
769 'count': len(too_few),
770 'detail': too_few
771 }
772 if too_many:
773 summary = "{0} pools have too many placement groups".format(
774 len(too_many))
775 health_checks['POOL_TOO_MANY_PGS'] = {
776 'severity': 'warning',
777 'summary': summary,
778 'count': len(too_many),
779 'detail': too_many
780 }
781
782 too_much_target_bytes = []
783 for root_id, total in total_bytes.items():
784 total_target = int(total_target_bytes[root_id])
785 capacity = root_map[root_id].capacity
786 assert capacity is not None
787 if total_target > 0 and total > capacity and capacity:
788 too_much_target_bytes.append(
789 'Pools %s overcommit available storage by %.03fx due to '
790 'target_size_bytes %s on pools %s' % (
791 root_map[root_id].pool_names,
792 total / capacity,
793 mgr_util.format_bytes(total_target, 5, colored=False),
794 target_bytes_pools[root_id]
795 )
796 )
797 elif total_target > capacity and capacity:
798 too_much_target_bytes.append(
799 'Pools %s overcommit available storage by %.03fx due to '
800 'collective target_size_bytes of %s' % (
801 root_map[root_id].pool_names,
802 total / capacity,
803 mgr_util.format_bytes(total_target, 5, colored=False),
804 )
805 )
806 if too_much_target_bytes:
807 health_checks['POOL_TARGET_SIZE_BYTES_OVERCOMMITTED'] = {
808 'severity': 'warning',
809 'summary': "%d subtrees have overcommitted pool target_size_bytes" % len(too_much_target_bytes),
810 'count': len(too_much_target_bytes),
811 'detail': too_much_target_bytes,
812 }
813
814 if bytes_and_ratio:
815 health_checks['POOL_HAS_TARGET_SIZE_BYTES_AND_RATIO'] = {
816 'severity': 'warning',
817 'summary': "%d pools have both target_size_bytes and target_size_ratio set" % len(bytes_and_ratio),
818 'count': len(bytes_and_ratio),
819 'detail': bytes_and_ratio,
820 }
821
822 self.set_health_checks(health_checks)