]> git.proxmox.com Git - ceph.git/blame - ceph/src/pybind/mgr/pg_autoscaler/module.py
import quincy beta 17.1.0
[ceph.git] / ceph / src / pybind / mgr / pg_autoscaler / module.py
CommitLineData
11fdf7f2
TL
1"""
2Automatically scale pg_num based on how much data is stored in each pool.
3"""
4
11fdf7f2
TL
5import json
6import mgr_util
7import threading
522d829b 8from typing import Any, Dict, List, Optional, Set, Tuple, TYPE_CHECKING, Union
11fdf7f2 9import uuid
9f95a23c 10from prettytable import PrettyTable
522d829b 11from mgr_module import HealthChecksT, CLIReadCommand, CLIWriteCommand, CRUSHMap, MgrModule, Option, OSDMap
11fdf7f2
TL
12
13"""
14Some terminology is made up for the purposes of this module:
15
16 - "raw pgs": pg count after applying replication, i.e. the real resource
17 consumption of a pool.
18 - "grow/shrink" - increase/decrease the pg_num in a pool
19 - "crush subtree" - non-overlapping domains in crush hierarchy: used as
20 units of resource management.
21"""
22
23INTERVAL = 5
24
92f5a8d4 25PG_NUM_MIN = 32 # unless specified on a per-pool basis
11fdf7f2 26
522d829b
TL
27if TYPE_CHECKING:
28 import sys
29 if sys.version_info >= (3, 8):
30 from typing import Literal
31 else:
32 from typing_extensions import Literal
33
20effc67 34 PassT = Literal['first', 'second', 'third']
522d829b
TL
35
36
37def nearest_power_of_two(n: int) -> int:
11fdf7f2
TL
38 v = int(n)
39
40 v -= 1
41 v |= v >> 1
42 v |= v >> 2
43 v |= v >> 4
44 v |= v >> 8
45 v |= v >> 16
46
47 # High bound power of two
48 v += 1
49
50 # Low bound power of tow
51 x = v >> 1
52
53 return x if (v - n) > (n - x) else v
54
522d829b
TL
55
56def effective_target_ratio(target_ratio: float,
57 total_target_ratio: float,
58 total_target_bytes: int,
59 capacity: int) -> float:
9f95a23c
TL
60 """
61 Returns the target ratio after normalizing for ratios across pools and
62 adjusting for capacity reserved by pools that have target_size_bytes set.
63 """
64 target_ratio = float(target_ratio)
65 if total_target_ratio:
66 target_ratio = target_ratio / total_target_ratio
67
68 if total_target_bytes and capacity:
69 fraction_available = 1.0 - min(1.0, float(total_target_bytes) / capacity)
70 target_ratio *= fraction_available
71
72 return target_ratio
73
74
75class PgAdjustmentProgress(object):
76 """
77 Keeps the initial and target pg_num values
78 """
522d829b
TL
79
80 def __init__(self, pool_id: int, pg_num: int, pg_num_target: int) -> None:
9f95a23c
TL
81 self.ev_id = str(uuid.uuid4())
82 self.pool_id = pool_id
83 self.reset(pg_num, pg_num_target)
84
522d829b 85 def reset(self, pg_num: int, pg_num_target: int) -> None:
9f95a23c
TL
86 self.pg_num = pg_num
87 self.pg_num_target = pg_num_target
88
522d829b 89 def update(self, module: MgrModule, progress: float) -> None:
9f95a23c
TL
90 desc = 'increasing' if self.pg_num < self.pg_num_target else 'decreasing'
91 module.remote('progress', 'update', self.ev_id,
92 ev_msg="PG autoscaler %s pool %d PGs from %d to %d" %
522d829b 93 (desc, self.pool_id, self.pg_num, self.pg_num_target),
9f95a23c
TL
94 ev_progress=progress,
95 refs=[("pool", self.pool_id)])
96
11fdf7f2 97
522d829b
TL
98class CrushSubtreeResourceStatus:
99 def __init__(self) -> None:
100 self.root_ids: List[int] = []
101 self.osds: Set[int] = set()
102 self.osd_count: Optional[int] = None # Number of OSDs
103 self.pg_target: Optional[int] = None # Ideal full-capacity PG count?
104 self.pg_current = 0 # How many PGs already?
105 self.pg_left = 0
106 self.capacity: Optional[int] = None # Total capacity of OSDs in subtree
107 self.pool_ids: List[int] = []
108 self.pool_names: List[str] = []
109 self.pool_count: Optional[int] = None
110 self.pool_used = 0
111 self.total_target_ratio = 0.0
112 self.total_target_bytes = 0 # including replication / EC overhead
113
114
11fdf7f2
TL
115class PgAutoscaler(MgrModule):
116 """
117 PG autoscaler.
118 """
11fdf7f2
TL
119 NATIVE_OPTIONS = [
120 'mon_target_pg_per_osd',
121 'mon_max_pg_per_osd',
122 ]
123
124 MODULE_OPTIONS = [
522d829b
TL
125 Option(
126 name='sleep_interval',
127 type='secs',
128 default=60),
20effc67
TL
129
130 Option(
131 name='threshold',
132 type='float',
133 desc='scaling threshold',
134 long_desc=('The factor by which the `NEW PG_NUM` must vary from the current'
135 '`PG_NUM` before being accepted. Cannot be less than 1.0'),
136 default=3.0,
137 min=1.0),
522d829b 138 Option(
20effc67
TL
139 name='noautoscale',
140 type='bool',
141 desc='global autoscale flag',
142 long_desc=('Option to turn on/off the autoscaler for all pools'),
143 default=False),
11fdf7f2
TL
144 ]
145
522d829b 146 def __init__(self, *args: Any, **kwargs: Any) -> None:
11fdf7f2
TL
147 super(PgAutoscaler, self).__init__(*args, **kwargs)
148 self._shutdown = threading.Event()
522d829b 149 self._event: Dict[int, PgAdjustmentProgress] = {}
11fdf7f2
TL
150
151 # So much of what we do peeks at the osdmap that it's easiest
152 # to just keep a copy of the pythonized version.
153 self._osd_map = None
522d829b 154 if TYPE_CHECKING:
522d829b
TL
155 self.sleep_interval = 60
156 self.mon_target_pg_per_osd = 0
20effc67
TL
157 self.threshold = 3.0
158 self.noautoscale = False
11fdf7f2 159
522d829b 160 def config_notify(self) -> None:
11fdf7f2
TL
161 for opt in self.NATIVE_OPTIONS:
162 setattr(self,
163 opt,
164 self.get_ceph_option(opt))
165 self.log.debug(' native option %s = %s', opt, getattr(self, opt))
166 for opt in self.MODULE_OPTIONS:
167 setattr(self,
168 opt['name'],
9f95a23c 169 self.get_module_option(opt['name']))
11fdf7f2
TL
170 self.log.debug(' mgr option %s = %s',
171 opt['name'], getattr(self, opt['name']))
172
522d829b
TL
173 @CLIReadCommand('osd pool autoscale-status')
174 def _command_autoscale_status(self, format: str = 'plain') -> Tuple[int, str, str]:
175 """
176 report on pool pg_num sizing recommendation and intent
177 """
11fdf7f2
TL
178 osdmap = self.get_osdmap()
179 pools = osdmap.get_pools_by_name()
20effc67 180 ps, root_map = self._get_pool_status(osdmap, pools)
11fdf7f2 181
522d829b 182 if format in ('json', 'json-pretty'):
9f95a23c 183 return 0, json.dumps(ps, indent=4, sort_keys=True), ''
11fdf7f2
TL
184 else:
185 table = PrettyTable(['POOL', 'SIZE', 'TARGET SIZE',
186 'RATE', 'RAW CAPACITY',
187 'RATIO', 'TARGET RATIO',
9f95a23c 188 'EFFECTIVE RATIO',
11fdf7f2
TL
189 'BIAS',
190 'PG_NUM',
191# 'IDEAL',
522d829b 192 'NEW PG_NUM', 'AUTOSCALE',
20effc67 193 'BULK'],
11fdf7f2 194 border=False)
eafe8130 195 table.left_padding_width = 0
9f95a23c 196 table.right_padding_width = 2
11fdf7f2
TL
197 table.align['POOL'] = 'l'
198 table.align['SIZE'] = 'r'
199 table.align['TARGET SIZE'] = 'r'
200 table.align['RATE'] = 'r'
201 table.align['RAW CAPACITY'] = 'r'
202 table.align['RATIO'] = 'r'
203 table.align['TARGET RATIO'] = 'r'
9f95a23c 204 table.align['EFFECTIVE RATIO'] = 'r'
11fdf7f2
TL
205 table.align['BIAS'] = 'r'
206 table.align['PG_NUM'] = 'r'
207# table.align['IDEAL'] = 'r'
208 table.align['NEW PG_NUM'] = 'r'
209 table.align['AUTOSCALE'] = 'l'
20effc67 210 table.align['BULK'] = 'l'
11fdf7f2
TL
211 for p in ps:
212 if p['would_adjust']:
213 final = str(p['pg_num_final'])
214 else:
215 final = ''
216 if p['target_bytes'] > 0:
217 ts = mgr_util.format_bytes(p['target_bytes'], 6)
218 else:
219 ts = ''
220 if p['target_ratio'] > 0.0:
221 tr = '%.4f' % p['target_ratio']
222 else:
223 tr = ''
9f95a23c
TL
224 if p['effective_target_ratio'] > 0.0:
225 etr = '%.4f' % p['effective_target_ratio']
226 else:
227 etr = ''
11fdf7f2
TL
228 table.add_row([
229 p['pool_name'],
230 mgr_util.format_bytes(p['logical_used'], 6),
231 ts,
232 p['raw_used_rate'],
233 mgr_util.format_bytes(p['subtree_capacity'], 6),
234 '%.4f' % p['capacity_ratio'],
235 tr,
9f95a23c 236 etr,
11fdf7f2
TL
237 p['bias'],
238 p['pg_num_target'],
239# p['pg_num_ideal'],
240 final,
241 p['pg_autoscale_mode'],
20effc67 242 str(p['bulk'])
11fdf7f2
TL
243 ])
244 return 0, table.get_string(), ''
245
20effc67
TL
246 @CLIWriteCommand("osd pool set threshold")
247 def set_scaling_threshold(self, num: float) -> Tuple[int, str, str]:
248 """
249 set the autoscaler threshold
250 A.K.A. the factor by which the new PG_NUM must vary from the existing PG_NUM
251 """
252 if num < 1.0:
253 return 22, "", "threshold cannot be set less than 1.0"
254 self.set_module_option("threshold", num)
255 return 0, "threshold updated", ""
256
257 def complete_all_progress_events(self) -> None:
258 for pool_id in list(self._event):
259 ev = self._event[pool_id]
260 self.remote('progress', 'complete', ev.ev_id)
261 del self._event[pool_id]
262
263 def set_autoscale_mode_all_pools(self, status: str) -> None:
264 osdmap = self.get_osdmap()
265 pools = osdmap.get_pools_by_name()
266 for pool_name, _ in pools.items():
267 self.mon_command({
268 'prefix': 'osd pool set',
269 'pool': pool_name,
270 'var': 'pg_autoscale_mode',
271 'val': status
272 })
273 @CLIWriteCommand("osd pool get noautoscale")
274 def get_noautoscale(self) -> Tuple[int, str, str]:
522d829b 275 """
20effc67
TL
276 Get the noautoscale flag to see if all pools
277 are setting the autoscaler on or off as well
278 as newly created pools in the future.
522d829b 279 """
20effc67
TL
280
281 if self.noautoscale == None:
282 raise TypeError("noautoscale cannot be None")
283 elif self.noautoscale:
284 return 0, "", "noautoscale is on"
522d829b 285 else:
20effc67 286 return 0, "", "noautoscale is off"
522d829b 287
20effc67
TL
288 @CLIWriteCommand("osd pool unset noautoscale")
289 def unset_noautoscale(self) -> Tuple[int, str, str]:
522d829b 290 """
20effc67
TL
291 Unset the noautoscale flag so all pools will
292 have autoscale enabled (including newly created
293 pools in the future).
522d829b 294 """
20effc67
TL
295 if not self.noautoscale:
296 return 0, "", "noautoscale is already unset!"
522d829b 297 else:
20effc67
TL
298 self.set_module_option("noautoscale", False)
299 self.mon_command({
300 'prefix': 'config set',
301 'who': 'global',
302 'name': 'osd_pool_default_pg_autoscale_mode',
303 'value': 'on'
304 })
305 self.set_autoscale_mode_all_pools("on")
306 return 0, "", "noautoscale is unset, all pools now have autoscale on"
307
308 @CLIWriteCommand("osd pool set noautoscale")
309 def set_noautoscale(self) -> Tuple[int, str, str]:
310 """
311 set the noautoscale for all pools (including
312 newly created pools in the future)
313 and complete all on-going progress events
314 regarding PG-autoscaling.
315 """
316 if self.noautoscale:
317 return 0, "", "noautoscale is already set!"
318 else:
319 self.set_module_option("noautoscale", True)
320 self.mon_command({
321 'prefix': 'config set',
322 'who': 'global',
323 'name': 'osd_pool_default_pg_autoscale_mode',
324 'value': 'off'
325 })
326 self.set_autoscale_mode_all_pools("off")
327 self.complete_all_progress_events()
328 return 0, "", "noautoscale is set, all pools now have autoscale off"
522d829b
TL
329
330 def serve(self) -> None:
11fdf7f2
TL
331 self.config_notify()
332 while not self._shutdown.is_set():
333 self._maybe_adjust()
9f95a23c 334 self._update_progress_events()
522d829b 335 self._shutdown.wait(timeout=self.sleep_interval)
11fdf7f2 336
522d829b 337 def shutdown(self) -> None:
92f5a8d4
TL
338 self.log.info('Stopping pg_autoscaler')
339 self._shutdown.set()
340
522d829b
TL
341 def identify_subtrees_and_overlaps(self,
342 osdmap: OSDMap,
343 crush: CRUSHMap,
344 result: Dict[int, CrushSubtreeResourceStatus],
345 overlapped_roots: Set[int],
346 roots: List[CrushSubtreeResourceStatus]) -> \
347 Tuple[List[CrushSubtreeResourceStatus],
348 Set[int]]:
349
350 # We identify subtrees and overlapping roots from osdmap
11fdf7f2 351 for pool_id, pool in osdmap.get_pools().items():
522d829b
TL
352 crush_rule = crush.get_rule_by_id(pool['crush_rule'])
353 assert crush_rule is not None
354 cr_name = crush_rule['rule_name']
355 root_id = crush.get_rule_root(cr_name)
356 assert root_id is not None
11fdf7f2
TL
357 osds = set(crush.get_osds_under(root_id))
358
522d829b 359 # Are there overlapping roots?
11fdf7f2 360 s = None
522d829b 361 for prev_root_id, prev in result.items():
11fdf7f2
TL
362 if osds & prev.osds:
363 s = prev
522d829b
TL
364 if prev_root_id != root_id:
365 overlapped_roots.add(prev_root_id)
366 overlapped_roots.add(root_id)
367 self.log.error('pool %d has overlapping roots: %s',
368 pool_id, overlapped_roots)
11fdf7f2
TL
369 break
370 if not s:
371 s = CrushSubtreeResourceStatus()
372 roots.append(s)
373 result[root_id] = s
374 s.root_ids.append(root_id)
375 s.osds |= osds
9f95a23c 376 s.pool_ids.append(pool_id)
11fdf7f2
TL
377 s.pool_names.append(pool['pool_name'])
378 s.pg_current += pool['pg_num_target'] * pool['size']
9f95a23c
TL
379 target_ratio = pool['options'].get('target_size_ratio', 0.0)
380 if target_ratio:
381 s.total_target_ratio += target_ratio
382 else:
383 target_bytes = pool['options'].get('target_size_bytes', 0)
384 if target_bytes:
385 s.total_target_bytes += target_bytes * osdmap.pool_raw_used_rate(pool_id)
522d829b 386 return roots, overlapped_roots
11fdf7f2 387
522d829b
TL
388 def get_subtree_resource_status(self,
389 osdmap: OSDMap,
390 crush: CRUSHMap) -> Tuple[Dict[int, CrushSubtreeResourceStatus],
391 Set[int]]:
392 """
393 For each CRUSH subtree of interest (i.e. the roots under which
394 we have pools), calculate the current resource usages and targets,
395 such as how many PGs there are, vs. how many PGs we would
396 like there to be.
397 """
398 result: Dict[int, CrushSubtreeResourceStatus] = {}
399 roots: List[CrushSubtreeResourceStatus] = []
400 overlapped_roots: Set[int] = set()
401 # identify subtrees and overlapping roots
402 roots, overlapped_roots = self.identify_subtrees_and_overlaps(osdmap,
403 crush, result, overlapped_roots, roots)
11fdf7f2
TL
404 # finish subtrees
405 all_stats = self.get('osd_stats')
406 for s in roots:
522d829b 407 assert s.osds is not None
11fdf7f2 408 s.osd_count = len(s.osds)
9f95a23c 409 s.pg_target = s.osd_count * self.mon_target_pg_per_osd
522d829b
TL
410 s.pg_left = s.pg_target
411 s.pool_count = len(s.pool_ids)
412 capacity = 0
11fdf7f2
TL
413 for osd_stats in all_stats['osd_stats']:
414 if osd_stats['osd'] in s.osds:
415 # Intentionally do not apply the OSD's reweight to
416 # this, because we want to calculate PG counts based
417 # on the physical storage available, not how it is
418 # reweighted right now.
419 capacity += osd_stats['kb'] * 1024
420
421 s.capacity = capacity
11fdf7f2
TL
422 self.log.debug('root_ids %s pools %s with %d osds, pg_target %d',
423 s.root_ids,
424 s.pool_ids,
425 s.osd_count,
426 s.pg_target)
427
522d829b 428 return result, overlapped_roots
11fdf7f2 429
522d829b 430 def _calc_final_pg_target(
11fdf7f2 431 self,
522d829b
TL
432 p: Dict[str, Any],
433 pool_name: str,
434 root_map: Dict[int, CrushSubtreeResourceStatus],
435 root_id: int,
436 capacity_ratio: float,
522d829b 437 bias: float,
20effc67
TL
438 even_pools: Dict[str, Dict[str, Any]],
439 bulk_pools: Dict[str, Dict[str, Any]],
440 func_pass: 'PassT',
441 bulk: bool,
522d829b
TL
442 ) -> Union[Tuple[float, int, int], Tuple[None, None, None]]:
443 """
444 `profile` determines behaviour of the autoscaler.
20effc67 445 `first_pass` flag used to determine if this is the first
522d829b
TL
446 pass where the caller tries to calculate/adjust pools that has
447 used_ratio > even_ratio else this is the second pass,
448 we calculate final_ratio by giving it 1 / pool_count
449 of the root we are currently looking at.
450 """
20effc67
TL
451 if func_pass == 'first':
452 # first pass to deal with small pools (no bulk flag)
453 # calculating final_pg_target based on capacity ratio
454 # we also keep track of bulk_pools to be used in second pass
455 if not bulk:
456 final_ratio = capacity_ratio
457 pg_left = root_map[root_id].pg_left
458 assert pg_left is not None
459 used_pg = final_ratio * pg_left
522d829b 460 root_map[root_id].pg_left -= int(used_pg)
20effc67 461 root_map[root_id].pool_used += 1
522d829b 462 pool_pg_target = used_pg / p['size'] * bias
522d829b 463 else:
20effc67
TL
464 bulk_pools[pool_name] = p
465 return None, None, None
466
467 elif func_pass == 'second':
468 # second pass we calculate the final_pg_target
469 # for pools that have used_ratio > even_ratio
470 # and we keep track of even pools to be used in third pass
471 pool_count = root_map[root_id].pool_count
472 assert pool_count is not None
473 even_ratio = 1 / (pool_count - root_map[root_id].pool_used)
474 used_ratio = capacity_ratio
475
476 if used_ratio > even_ratio:
477 root_map[root_id].pool_used += 1
478 else:
479 even_pools[pool_name] = p
480 return None, None, None
481
482 final_ratio = max(used_ratio, even_ratio)
483 pg_left = root_map[root_id].pg_left
484 assert pg_left is not None
485 used_pg = final_ratio * pg_left
486 root_map[root_id].pg_left -= int(used_pg)
487 pool_pg_target = used_pg / p['size'] * bias
11fdf7f2 488
20effc67
TL
489 else:
490 # third pass we just split the pg_left to all even_pools
491 pool_count = root_map[root_id].pool_count
492 assert pool_count is not None
493 final_ratio = 1 / (pool_count - root_map[root_id].pool_used)
494 pool_pg_target = (final_ratio * root_map[root_id].pg_left) / p['size'] * bias
495
496 min_pg = p.get('options', {}).get('pg_num_min', PG_NUM_MIN)
497 max_pg = p.get('options', {}).get('pg_num_max')
498 final_pg_target = max(min_pg, nearest_power_of_two(pool_pg_target))
499 if max_pg and max_pg < final_pg_target:
500 final_pg_target = max_pg
501 self.log.info("Pool '{0}' root_id {1} using {2} of space, bias {3}, "
502 "pg target {4} quantized to {5} (current {6})".format(
503 p['pool_name'],
504 root_id,
505 capacity_ratio,
506 bias,
507 pool_pg_target,
508 final_pg_target,
509 p['pg_num_target']
510 ))
522d829b 511 return final_ratio, pool_pg_target, final_pg_target
11fdf7f2 512
20effc67 513 def _get_pool_pg_targets(
522d829b
TL
514 self,
515 osdmap: OSDMap,
516 pools: Dict[str, Dict[str, Any]],
517 crush_map: CRUSHMap,
518 root_map: Dict[int, CrushSubtreeResourceStatus],
519 pool_stats: Dict[int, Dict[str, int]],
520 ret: List[Dict[str, Any]],
521 threshold: float,
20effc67 522 func_pass: 'PassT',
522d829b 523 overlapped_roots: Set[int],
20effc67 524 ) -> Tuple[List[Dict[str, Any]], Dict[str, Dict[str, Any]] , Dict[str, Dict[str, Any]]]:
522d829b
TL
525 """
526 Calculates final_pg_target of each pools and determine if it needs
527 scaling, this depends on the profile of the autoscaler. For scale-down,
528 we start out with a full complement of pgs and only descrease it when other
529 pools needs more pgs due to increased usage. For scale-up, we start out with
530 the minimal amount of pgs and only scale when there is increase in usage.
531 """
532 even_pools: Dict[str, Dict[str, Any]] = {}
20effc67 533 bulk_pools: Dict[str, Dict[str, Any]] = {}
f67539c2 534 for pool_name, p in pools.items():
11fdf7f2 535 pool_id = p['pool']
eafe8130
TL
536 if pool_id not in pool_stats:
537 # race with pool deletion; skip
538 continue
11fdf7f2
TL
539
540 # FIXME: we assume there is only one take per pool, but that
541 # may not be true.
522d829b
TL
542 crush_rule = crush_map.get_rule_by_id(p['crush_rule'])
543 assert crush_rule is not None
544 cr_name = crush_rule['rule_name']
545 root_id = crush_map.get_rule_root(cr_name)
546 assert root_id is not None
20effc67
TL
547 if root_id in overlapped_roots:
548 # skip pools
522d829b
TL
549 # with overlapping roots
550 self.log.warn("pool %d contains an overlapping root %d"
551 "... skipping scaling", pool_id, root_id)
552 continue
11fdf7f2 553 capacity = root_map[root_id].capacity
522d829b 554 assert capacity is not None
11fdf7f2
TL
555 if capacity == 0:
556 self.log.debug('skipping empty subtree %s', cr_name)
557 continue
558
559 raw_used_rate = osdmap.pool_raw_used_rate(pool_id)
560
eafe8130 561 pool_logical_used = pool_stats[pool_id]['stored']
11fdf7f2 562 bias = p['options'].get('pg_autoscale_bias', 1.0)
9f95a23c
TL
563 target_bytes = 0
564 # ratio takes precedence if both are set
565 if p['options'].get('target_size_ratio', 0.0) == 0.0:
566 target_bytes = p['options'].get('target_size_bytes', 0)
11fdf7f2
TL
567
568 # What proportion of space are we using?
569 actual_raw_used = pool_logical_used * raw_used_rate
570 actual_capacity_ratio = float(actual_raw_used) / capacity
571
572 pool_raw_used = max(pool_logical_used, target_bytes) * raw_used_rate
573 capacity_ratio = float(pool_raw_used) / capacity
574
9f95a23c
TL
575 self.log.info("effective_target_ratio {0} {1} {2} {3}".format(
576 p['options'].get('target_size_ratio', 0.0),
577 root_map[root_id].total_target_ratio,
578 root_map[root_id].total_target_bytes,
579 capacity))
522d829b 580
9f95a23c
TL
581 target_ratio = effective_target_ratio(p['options'].get('target_size_ratio', 0.0),
582 root_map[root_id].total_target_ratio,
583 root_map[root_id].total_target_bytes,
584 capacity)
585
20effc67
TL
586 # determine if the pool is a bulk
587 bulk = False
588 flags = p['flags_names'].split(",")
589 if "bulk" in flags:
590 bulk = True
591
522d829b
TL
592 capacity_ratio = max(capacity_ratio, target_ratio)
593 final_ratio, pool_pg_target, final_pg_target = self._calc_final_pg_target(
20effc67
TL
594 p, pool_name, root_map, root_id,
595 capacity_ratio, bias, even_pools,
596 bulk_pools, func_pass, bulk)
11fdf7f2 597
522d829b
TL
598 if final_ratio is None:
599 continue
11fdf7f2
TL
600
601 adjust = False
522d829b
TL
602 if (final_pg_target > p['pg_num_target'] * threshold or
603 final_pg_target < p['pg_num_target'] / threshold) and \
604 final_ratio >= 0.0 and \
605 final_ratio <= 1.0:
11fdf7f2
TL
606 adjust = True
607
522d829b 608 assert pool_pg_target is not None
11fdf7f2
TL
609 ret.append({
610 'pool_id': pool_id,
611 'pool_name': p['pool_name'],
612 'crush_root_id': root_id,
613 'pg_autoscale_mode': p['pg_autoscale_mode'],
614 'pg_num_target': p['pg_num_target'],
615 'logical_used': pool_logical_used,
616 'target_bytes': target_bytes,
617 'raw_used_rate': raw_used_rate,
618 'subtree_capacity': capacity,
619 'actual_raw_used': actual_raw_used,
620 'raw_used': pool_raw_used,
621 'actual_capacity_ratio': actual_capacity_ratio,
622 'capacity_ratio': capacity_ratio,
9f95a23c
TL
623 'target_ratio': p['options'].get('target_size_ratio', 0.0),
624 'effective_target_ratio': target_ratio,
11fdf7f2
TL
625 'pg_num_ideal': int(pool_pg_target),
626 'pg_num_final': final_pg_target,
627 'would_adjust': adjust,
628 'bias': p.get('options', {}).get('pg_autoscale_bias', 1.0),
20effc67 629 'bulk': bulk,
522d829b 630 })
11fdf7f2 631
20effc67 632 return ret, bulk_pools, even_pools
11fdf7f2 633
522d829b
TL
634 def _get_pool_status(
635 self,
636 osdmap: OSDMap,
637 pools: Dict[str, Dict[str, Any]],
522d829b
TL
638 ) -> Tuple[List[Dict[str, Any]],
639 Dict[int, CrushSubtreeResourceStatus]]:
20effc67
TL
640 threshold = self.threshold
641 assert threshold >= 1.0
522d829b
TL
642
643 crush_map = osdmap.get_crush()
644 root_map, overlapped_roots = self.get_subtree_resource_status(osdmap, crush_map)
645 df = self.get('df')
646 pool_stats = dict([(p['id'], p['stats']) for p in df['pools']])
647
648 ret: List[Dict[str, Any]] = []
649
650 # Iterate over all pools to determine how they should be sized.
20effc67 651 # First call of _get_pool_pg_targets() is to find/adjust pools that uses more capacaity than
522d829b
TL
652 # the even_ratio of other pools and we adjust those first.
653 # Second call make use of the even_pools we keep track of in the first call.
654 # All we need to do is iterate over those and give them 1/pool_count of the
655 # total pgs.
656
20effc67
TL
657 ret, bulk_pools, _ = self._get_pool_pg_targets(osdmap, pools, crush_map, root_map,
658 pool_stats, ret, threshold, 'first', overlapped_roots)
522d829b 659
20effc67
TL
660 ret, _, even_pools = self._get_pool_pg_targets(osdmap, bulk_pools, crush_map, root_map,
661 pool_stats, ret, threshold, 'second', overlapped_roots)
662
663 ret, _, _ = self._get_pool_pg_targets(osdmap, even_pools, crush_map, root_map,
664 pool_stats, ret, threshold, 'third', overlapped_roots)
522d829b
TL
665
666 return (ret, root_map)
667
668 def _update_progress_events(self) -> None:
20effc67
TL
669 if self.noautoscale:
670 return
9f95a23c
TL
671 osdmap = self.get_osdmap()
672 pools = osdmap.get_pools()
673 for pool_id in list(self._event):
674 ev = self._event[pool_id]
675 pool_data = pools.get(pool_id)
f6b5b4d7 676 if pool_data is None or pool_data['pg_num'] == pool_data['pg_num_target'] or ev.pg_num == ev.pg_num_target:
9f95a23c
TL
677 # pool is gone or we've reached our target
678 self.remote('progress', 'complete', ev.ev_id)
679 del self._event[pool_id]
680 continue
681 ev.update(self, (ev.pg_num - pool_data['pg_num']) / (ev.pg_num - ev.pg_num_target))
11fdf7f2 682
522d829b 683 def _maybe_adjust(self) -> None:
20effc67
TL
684 if self.noautoscale:
685 return
11fdf7f2
TL
686 self.log.info('_maybe_adjust')
687 osdmap = self.get_osdmap()
9f95a23c
TL
688 if osdmap.get_require_osd_release() < 'nautilus':
689 return
11fdf7f2 690 pools = osdmap.get_pools_by_name()
20effc67 691 ps, root_map = self._get_pool_status(osdmap, pools)
11fdf7f2
TL
692
693 # Anyone in 'warn', set the health message for them and then
694 # drop them from consideration.
695 too_few = []
696 too_many = []
9f95a23c 697 bytes_and_ratio = []
522d829b 698 health_checks: Dict[str, Dict[str, Union[int, str, List[str]]]] = {}
11fdf7f2 699
81eedcae
TL
700 total_bytes = dict([(r, 0) for r in iter(root_map)])
701 total_target_bytes = dict([(r, 0.0) for r in iter(root_map)])
522d829b 702 target_bytes_pools: Dict[int, List[int]] = dict([(r, []) for r in iter(root_map)])
11fdf7f2
TL
703
704 for p in ps:
9f95a23c
TL
705 pool_id = p['pool_id']
706 pool_opts = pools[p['pool_name']]['options']
707 if pool_opts.get('target_size_ratio', 0) > 0 and pool_opts.get('target_size_bytes', 0) > 0:
522d829b
TL
708 bytes_and_ratio.append(
709 'Pool %s has target_size_bytes and target_size_ratio set' % p['pool_name'])
11fdf7f2
TL
710 total_bytes[p['crush_root_id']] += max(
711 p['actual_raw_used'],
712 p['target_bytes'] * p['raw_used_rate'])
713 if p['target_bytes'] > 0:
714 total_target_bytes[p['crush_root_id']] += p['target_bytes'] * p['raw_used_rate']
715 target_bytes_pools[p['crush_root_id']].append(p['pool_name'])
716 if not p['would_adjust']:
717 continue
718 if p['pg_autoscale_mode'] == 'warn':
719 msg = 'Pool %s has %d placement groups, should have %d' % (
720 p['pool_name'],
721 p['pg_num_target'],
722 p['pg_num_final'])
723 if p['pg_num_final'] > p['pg_num_target']:
724 too_few.append(msg)
725 else:
726 too_many.append(msg)
727
728 if p['pg_autoscale_mode'] == 'on':
729 # Note that setting pg_num actually sets pg_num_target (see
730 # OSDMonitor.cc)
731 r = self.mon_command({
732 'prefix': 'osd pool set',
733 'pool': p['pool_name'],
734 'var': 'pg_num',
735 'val': str(p['pg_num_final'])
736 })
737
9f95a23c
TL
738 # create new event or update existing one to reflect
739 # progress from current state to the new pg_num_target
740 pool_data = pools[p['pool_name']]
741 pg_num = pool_data['pg_num']
742 new_target = p['pg_num_final']
743 if pool_id in self._event:
744 self._event[pool_id].reset(pg_num, new_target)
745 else:
746 self._event[pool_id] = PgAdjustmentProgress(pool_id, pg_num, new_target)
747 self._event[pool_id].update(self, 0.0)
748
11fdf7f2
TL
749 if r[0] != 0:
750 # FIXME: this is a serious and unexpected thing,
751 # we should expose it as a cluster log error once
752 # the hook for doing that from ceph-mgr modules is
753 # in.
754 self.log.error("pg_num adjustment on {0} to {1} failed: {2}"
755 .format(p['pool_name'],
756 p['pg_num_final'], r))
757
758 if too_few:
759 summary = "{0} pools have too few placement groups".format(
760 len(too_few))
761 health_checks['POOL_TOO_FEW_PGS'] = {
762 'severity': 'warning',
763 'summary': summary,
9f95a23c 764 'count': len(too_few),
11fdf7f2
TL
765 'detail': too_few
766 }
767 if too_many:
768 summary = "{0} pools have too many placement groups".format(
769 len(too_many))
770 health_checks['POOL_TOO_MANY_PGS'] = {
771 'severity': 'warning',
772 'summary': summary,
9f95a23c 773 'count': len(too_many),
11fdf7f2
TL
774 'detail': too_many
775 }
776
11fdf7f2 777 too_much_target_bytes = []
f67539c2 778 for root_id, total in total_bytes.items():
522d829b
TL
779 total_target = int(total_target_bytes[root_id])
780 capacity = root_map[root_id].capacity
781 assert capacity is not None
782 if total_target > 0 and total > capacity and capacity:
11fdf7f2
TL
783 too_much_target_bytes.append(
784 'Pools %s overcommit available storage by %.03fx due to '
785 'target_size_bytes %s on pools %s' % (
786 root_map[root_id].pool_names,
522d829b 787 total / capacity,
11fdf7f2
TL
788 mgr_util.format_bytes(total_target, 5, colored=False),
789 target_bytes_pools[root_id]
790 )
791 )
522d829b 792 elif total_target > capacity and capacity:
11fdf7f2
TL
793 too_much_target_bytes.append(
794 'Pools %s overcommit available storage by %.03fx due to '
795 'collective target_size_bytes of %s' % (
796 root_map[root_id].pool_names,
522d829b 797 total / capacity,
11fdf7f2
TL
798 mgr_util.format_bytes(total_target, 5, colored=False),
799 )
800 )
801 if too_much_target_bytes:
802 health_checks['POOL_TARGET_SIZE_BYTES_OVERCOMMITTED'] = {
803 'severity': 'warning',
804 'summary': "%d subtrees have overcommitted pool target_size_bytes" % len(too_much_target_bytes),
9f95a23c 805 'count': len(too_much_target_bytes),
11fdf7f2
TL
806 'detail': too_much_target_bytes,
807 }
808
9f95a23c
TL
809 if bytes_and_ratio:
810 health_checks['POOL_HAS_TARGET_SIZE_BYTES_AND_RATIO'] = {
811 'severity': 'warning',
812 'summary': "%d pools have both target_size_bytes and target_size_ratio set" % len(bytes_and_ratio),
813 'count': len(bytes_and_ratio),
814 'detail': bytes_and_ratio,
815 }
11fdf7f2
TL
816
817 self.set_health_checks(health_checks)