ceph/src/pybind/mgr/pg_autoscaler/module.py

   1 """
   2 Automatically scale pg_num based on how much data is stored in each pool.
   3 """
   4
   5 import json
   6 import mgr_util
   7 import threading
   8 from typing import Any, Dict, List, Optional, Set, Tuple, TYPE_CHECKING, Union
   9 import uuid
  10 from prettytable import PrettyTable
  11 from mgr_module import HealthChecksT, CLIReadCommand, CLIWriteCommand, CRUSHMap, MgrModule, Option, OSDMap
  12
  13 """
  14 Some terminology is made up for the purposes of this module:
  15
  16  - "raw pgs": pg count after applying replication, i.e. the real resource
  17               consumption of a pool.
  18  - "grow/shrink" - increase/decrease the pg_num in a pool
  19  - "crush subtree" - non-overlapping domains in crush hierarchy: used as
  20                      units of resource management.
  21 """
  22
  23 INTERVAL = 5
  24
  25 PG_NUM_MIN = 32  # unless specified on a per-pool basis
  26
  27 if TYPE_CHECKING:
  28     import sys
  29     if sys.version_info >= (3, 8):
  30         from typing import Literal
  31     else:
  32         from typing_extensions import Literal
  33
  34     PassT = Literal['first', 'second', 'third']
  35
  36
  37 def nearest_power_of_two(n: int) -> int:
  38     v = int(n)
  39
  40     v -= 1
  41     v |= v >> 1
  42     v |= v >> 2
  43     v |= v >> 4
  44     v |= v >> 8
  45     v |= v >> 16
  46
  47     # High bound power of two
  48     v += 1
  49
  50     # Low bound power of tow
  51     x = v >> 1
  52
  53     return x if (v - n) > (n - x) else v
  54
  55
  56 def effective_target_ratio(target_ratio: float,
  57                            total_target_ratio: float,
  58                            total_target_bytes: int,
  59                            capacity: int) -> float:
  60     """
  61     Returns the target ratio after normalizing for ratios across pools and
  62     adjusting for capacity reserved by pools that have target_size_bytes set.
  63     """
  64     target_ratio = float(target_ratio)
  65     if total_target_ratio:
  66         target_ratio = target_ratio / total_target_ratio
  67
  68     if total_target_bytes and capacity:
  69         fraction_available = 1.0 - min(1.0, float(total_target_bytes) / capacity)
  70         target_ratio *= fraction_available
  71
  72     return target_ratio
  73
  74
  75 class PgAdjustmentProgress(object):
  76     """
  77     Keeps the initial and target pg_num values
  78     """
  79
  80     def __init__(self, pool_id: int, pg_num: int, pg_num_target: int) -> None:
  81         self.ev_id = str(uuid.uuid4())
  82         self.pool_id = pool_id
  83         self.reset(pg_num, pg_num_target)
  84
  85     def reset(self, pg_num: int, pg_num_target: int) -> None:
  86         self.pg_num = pg_num
  87         self.pg_num_target = pg_num_target
  88
  89     def update(self, module: MgrModule, progress: float) -> None:
  90         desc = 'increasing' if self.pg_num < self.pg_num_target else 'decreasing'
  91         module.remote('progress', 'update', self.ev_id,
  92                       ev_msg="PG autoscaler %s pool %d PGs from %d to %d" %
  93                       (desc, self.pool_id, self.pg_num, self.pg_num_target),
  94                       ev_progress=progress,
  95                       refs=[("pool", self.pool_id)])
  96
  97
  98 class CrushSubtreeResourceStatus:
  99     def __init__(self) -> None:
 100         self.root_ids: List[int] = []
 101         self.osds: Set[int] = set()
 102         self.osd_count: Optional[int] = None  # Number of OSDs
 103         self.pg_target: Optional[int] = None  # Ideal full-capacity PG count?
 104         self.pg_current = 0  # How many PGs already?
 105         self.pg_left = 0
 106         self.capacity: Optional[int] = None  # Total capacity of OSDs in subtree
 107         self.pool_ids: List[int] = []
 108         self.pool_names: List[str] = []
 109         self.pool_count: Optional[int] = None
 110         self.pool_used = 0
 111         self.total_target_ratio = 0.0
 112         self.total_target_bytes = 0  # including replication / EC overhead
 113
 114
 115 class PgAutoscaler(MgrModule):
 116     """
 117     PG autoscaler.
 118     """
 119     NATIVE_OPTIONS = [
 120         'mon_target_pg_per_osd',
 121         'mon_max_pg_per_osd',
 122     ]
 123
 124     MODULE_OPTIONS = [
 125         Option(
 126             name='sleep_interval',
 127             type='secs',
 128             default=60),
 129
 130         Option(
 131             name='threshold',
 132             type='float',
 133             desc='scaling threshold',
 134             long_desc=('The factor by which the `NEW PG_NUM` must vary from the current'
 135                        '`PG_NUM` before being accepted. Cannot be less than 1.0'),
 136             default=3.0,
 137             min=1.0),
 138         Option(
 139             name='noautoscale',
 140             type='bool',
 141             desc='global autoscale flag',
 142             long_desc=('Option to turn on/off the autoscaler for all pools'),
 143             default=False),
 144     ]
 145
 146     def __init__(self, *args: Any, **kwargs: Any) -> None:
 147         super(PgAutoscaler, self).__init__(*args, **kwargs)
 148         self._shutdown = threading.Event()
 149         self._event: Dict[int, PgAdjustmentProgress] = {}
 150
 151         # So much of what we do peeks at the osdmap that it's easiest
 152         # to just keep a copy of the pythonized version.
 153         self._osd_map = None
 154         if TYPE_CHECKING:
 155             self.sleep_interval = 60
 156             self.mon_target_pg_per_osd = 0
 157             self.threshold = 3.0
 158             self.noautoscale = False
 159
 160     def config_notify(self) -> None:
 161         for opt in self.NATIVE_OPTIONS:
 162             setattr(self,
 163                     opt,
 164                     self.get_ceph_option(opt))
 165             self.log.debug(' native option %s = %s', opt, getattr(self, opt))
 166         for opt in self.MODULE_OPTIONS:
 167             setattr(self,
 168                     opt['name'],
 169                     self.get_module_option(opt['name']))
 170             self.log.debug(' mgr option %s = %s',
 171                            opt['name'], getattr(self, opt['name']))
 172
 173     @CLIReadCommand('osd pool autoscale-status')
 174     def _command_autoscale_status(self, format: str = 'plain') -> Tuple[int, str, str]:
 175         """
 176         report on pool pg_num sizing recommendation and intent
 177         """
 178         osdmap = self.get_osdmap()
 179         pools = osdmap.get_pools_by_name()
 180         ps, root_map = self._get_pool_status(osdmap, pools)
 181
 182         if format in ('json', 'json-pretty'):
 183             return 0, json.dumps(ps, indent=4, sort_keys=True), ''
 184         else:
 185             table = PrettyTable(['POOL', 'SIZE', 'TARGET SIZE',
 186                                  'RATE', 'RAW CAPACITY',
 187                                  'RATIO', 'TARGET RATIO',
 188                                  'EFFECTIVE RATIO',
 189                                  'BIAS',
 190                                  'PG_NUM',
 191 #                                 'IDEAL',
 192                                  'NEW PG_NUM', 'AUTOSCALE',
 193                                  'BULK'],
 194                                 border=False)
 195             table.left_padding_width = 0
 196             table.right_padding_width = 2
 197             table.align['POOL'] = 'l'
 198             table.align['SIZE'] = 'r'
 199             table.align['TARGET SIZE'] = 'r'
 200             table.align['RATE'] = 'r'
 201             table.align['RAW CAPACITY'] = 'r'
 202             table.align['RATIO'] = 'r'
 203             table.align['TARGET RATIO'] = 'r'
 204             table.align['EFFECTIVE RATIO'] = 'r'
 205             table.align['BIAS'] = 'r'
 206             table.align['PG_NUM'] = 'r'
 207 #            table.align['IDEAL'] = 'r'
 208             table.align['NEW PG_NUM'] = 'r'
 209             table.align['AUTOSCALE'] = 'l'
 210             table.align['BULK'] = 'l'
 211             for p in ps:
 212                 if p['would_adjust']:
 213                     final = str(p['pg_num_final'])
 214                 else:
 215                     final = ''
 216                 if p['target_bytes'] > 0:
 217                     ts = mgr_util.format_bytes(p['target_bytes'], 6)
 218                 else:
 219                     ts = ''
 220                 if p['target_ratio'] > 0.0:
 221                     tr = '%.4f' % p['target_ratio']
 222                 else:
 223                     tr = ''
 224                 if p['effective_target_ratio'] > 0.0:
 225                     etr = '%.4f' % p['effective_target_ratio']
 226                 else:
 227                     etr = ''
 228                 table.add_row([
 229                     p['pool_name'],
 230                     mgr_util.format_bytes(p['logical_used'], 6),
 231                     ts,
 232                     p['raw_used_rate'],
 233                     mgr_util.format_bytes(p['subtree_capacity'], 6),
 234                     '%.4f' % p['capacity_ratio'],
 235                     tr,
 236                     etr,
 237                     p['bias'],
 238                     p['pg_num_target'],
 239 #                    p['pg_num_ideal'],
 240                     final,
 241                     p['pg_autoscale_mode'],
 242                     str(p['bulk'])
 243                 ])
 244             return 0, table.get_string(), ''
 245
 246     @CLIWriteCommand("osd pool set threshold")
 247     def set_scaling_threshold(self, num: float) -> Tuple[int, str, str]:
 248         """
 249         set the autoscaler threshold
 250         A.K.A. the factor by which the new PG_NUM must vary from the existing PG_NUM
 251         """
 252         if num < 1.0:
 253             return 22, "", "threshold cannot be set less than 1.0"
 254         self.set_module_option("threshold", num)
 255         return 0, "threshold updated", ""
 256
 257     def complete_all_progress_events(self) -> None:
 258         for pool_id in list(self._event):
 259             ev = self._event[pool_id]
 260             self.remote('progress', 'complete', ev.ev_id)
 261             del self._event[pool_id]
 262
 263     def set_autoscale_mode_all_pools(self, status: str) -> None:
 264         osdmap = self.get_osdmap()
 265         pools = osdmap.get_pools_by_name()
 266         for pool_name, _ in pools.items():
 267             self.mon_command({
 268                 'prefix': 'osd pool set',
 269                 'pool': pool_name,
 270                 'var': 'pg_autoscale_mode',
 271                 'val': status
 272             })
 273     @CLIWriteCommand("osd pool get noautoscale")
 274     def get_noautoscale(self) -> Tuple[int, str, str]:
 275         """
 276         Get the noautoscale flag to see if all pools
 277         are setting the autoscaler on or off as well
 278         as newly created pools in the future.
 279         """
 280
 281         if self.noautoscale == None:
 282             raise TypeError("noautoscale cannot be None")
 283         elif self.noautoscale:
 284             return 0, "", "noautoscale is on"
 285         else:
 286             return 0, "", "noautoscale is off"
 287
 288     @CLIWriteCommand("osd pool unset noautoscale")
 289     def unset_noautoscale(self) -> Tuple[int, str, str]:
 290         """
 291         Unset the noautoscale flag so all pools will
 292         have autoscale enabled (including newly created
 293         pools in the future).
 294         """
 295         if not self.noautoscale:
 296             return 0, "", "noautoscale is already unset!"
 297         else:
 298             self.set_module_option("noautoscale", False)
 299             self.mon_command({
 300                 'prefix': 'config set',
 301                 'who': 'global',
 302                 'name': 'osd_pool_default_pg_autoscale_mode',
 303                 'value': 'on'
 304             })
 305             self.set_autoscale_mode_all_pools("on")
 306             return 0, "", "noautoscale is unset, all pools now have autoscale on"
 307
 308     @CLIWriteCommand("osd pool set noautoscale")
 309     def set_noautoscale(self) -> Tuple[int, str, str]:
 310         """
 311         set the noautoscale for all pools (including
 312         newly created pools in the future)
 313         and complete all on-going progress events
 314         regarding PG-autoscaling.
 315         """
 316         if self.noautoscale:
 317             return 0, "", "noautoscale is already set!"
 318         else:
 319             self.set_module_option("noautoscale", True)
 320             self.mon_command({
 321                 'prefix': 'config set',
 322                 'who': 'global',
 323                 'name': 'osd_pool_default_pg_autoscale_mode',
 324                 'value': 'off'
 325             })
 326             self.set_autoscale_mode_all_pools("off")
 327             self.complete_all_progress_events()
 328             return 0, "", "noautoscale is set, all pools now have autoscale off"
 329
 330     def serve(self) -> None:
 331         self.config_notify()
 332         while not self._shutdown.is_set():
 333             self._maybe_adjust()
 334             self._update_progress_events()
 335             self._shutdown.wait(timeout=self.sleep_interval)
 336
 337     def shutdown(self) -> None:
 338         self.log.info('Stopping pg_autoscaler')
 339         self._shutdown.set()
 340
 341     def identify_subtrees_and_overlaps(self,
 342                                        osdmap: OSDMap,
 343                                        crush: CRUSHMap,
 344                                        result: Dict[int, CrushSubtreeResourceStatus],
 345                                        overlapped_roots: Set[int],
 346                                        roots: List[CrushSubtreeResourceStatus]) -> \
 347         Tuple[List[CrushSubtreeResourceStatus],
 348               Set[int]]:
 349
 350         # We identify subtrees and overlapping roots from osdmap
 351         for pool_id, pool in osdmap.get_pools().items():
 352             crush_rule = crush.get_rule_by_id(pool['crush_rule'])
 353             assert crush_rule is not None
 354             cr_name = crush_rule['rule_name']
 355             root_id = crush.get_rule_root(cr_name)
 356             assert root_id is not None
 357             osds = set(crush.get_osds_under(root_id))
 358
 359             # Are there overlapping roots?
 360             s = None
 361             for prev_root_id, prev in result.items():
 362                 if osds & prev.osds:
 363                     s = prev
 364                     if prev_root_id != root_id:
 365                         overlapped_roots.add(prev_root_id)
 366                         overlapped_roots.add(root_id)
 367                         self.log.warning("pool %s won't scale due to overlapping roots: %s",
 368                                        pool['pool_name'], overlapped_roots)
 369                         self.log.warning("Please See: https://docs.ceph.com/en/"
 370                                          "latest/rados/operations/placement-groups"
 371                                          "/#automated-scaling")
 372                     break
 373             if not s:
 374                 s = CrushSubtreeResourceStatus()
 375                 roots.append(s)
 376             result[root_id] = s
 377             s.root_ids.append(root_id)
 378             s.osds |= osds
 379             s.pool_ids.append(pool_id)
 380             s.pool_names.append(pool['pool_name'])
 381             s.pg_current += pool['pg_num_target'] * pool['size']
 382             target_ratio = pool['options'].get('target_size_ratio', 0.0)
 383             if target_ratio:
 384                 s.total_target_ratio += target_ratio
 385             else:
 386                 target_bytes = pool['options'].get('target_size_bytes', 0)
 387                 if target_bytes:
 388                     s.total_target_bytes += target_bytes * osdmap.pool_raw_used_rate(pool_id)
 389         return roots, overlapped_roots
 390
 391     def get_subtree_resource_status(self,
 392                                     osdmap: OSDMap,
 393                                     crush: CRUSHMap) -> Tuple[Dict[int, CrushSubtreeResourceStatus],
 394                                                               Set[int]]:
 395         """
 396         For each CRUSH subtree of interest (i.e. the roots under which
 397         we have pools), calculate the current resource usages and targets,
 398         such as how many PGs there are, vs. how many PGs we would
 399         like there to be.
 400         """
 401         result: Dict[int, CrushSubtreeResourceStatus] = {}
 402         roots: List[CrushSubtreeResourceStatus] = []
 403         overlapped_roots: Set[int] = set()
 404         # identify subtrees and overlapping roots
 405         roots, overlapped_roots = self.identify_subtrees_and_overlaps(osdmap,
 406                                                                       crush, result, overlapped_roots, roots)
 407         # finish subtrees
 408         all_stats = self.get('osd_stats')
 409         for s in roots:
 410             assert s.osds is not None
 411             s.osd_count = len(s.osds)
 412             s.pg_target = s.osd_count * self.mon_target_pg_per_osd
 413             s.pg_left = s.pg_target
 414             s.pool_count = len(s.pool_ids)
 415             capacity = 0
 416             for osd_stats in all_stats['osd_stats']:
 417                 if osd_stats['osd'] in s.osds:
 418                     # Intentionally do not apply the OSD's reweight to
 419                     # this, because we want to calculate PG counts based
 420                     # on the physical storage available, not how it is
 421                     # reweighted right now.
 422                     capacity += osd_stats['kb'] * 1024
 423
 424             s.capacity = capacity
 425             self.log.debug('root_ids %s pools %s with %d osds, pg_target %d',
 426                            s.root_ids,
 427                            s.pool_ids,
 428                            s.osd_count,
 429                            s.pg_target)
 430
 431         return result, overlapped_roots
 432
 433     def _calc_final_pg_target(
 434             self,
 435             p: Dict[str, Any],
 436             pool_name: str,
 437             root_map: Dict[int, CrushSubtreeResourceStatus],
 438             root_id: int,
 439             capacity_ratio: float,
 440             bias: float,
 441             even_pools: Dict[str, Dict[str, Any]],
 442             bulk_pools: Dict[str, Dict[str, Any]],
 443             func_pass: 'PassT',
 444             bulk: bool,
 445     ) -> Union[Tuple[float, int, int], Tuple[None, None, None]]:
 446         """
 447         `profile` determines behaviour of the autoscaler.
 448         `first_pass` flag used to determine if this is the first
 449         pass where the caller tries to calculate/adjust pools that has
 450         used_ratio > even_ratio else this is the second pass,
 451         we calculate final_ratio by giving it 1 / pool_count
 452         of the root we are currently looking at.
 453         """
 454         if func_pass == 'first':
 455             # first pass to deal with small pools (no bulk flag)
 456             # calculating final_pg_target based on capacity ratio
 457             # we also keep track of bulk_pools to be used in second pass
 458             if not bulk:
 459                 final_ratio = capacity_ratio
 460                 pg_left = root_map[root_id].pg_left
 461                 assert pg_left is not None
 462                 used_pg = final_ratio * pg_left
 463                 root_map[root_id].pg_left -= int(used_pg)
 464                 root_map[root_id].pool_used += 1
 465                 pool_pg_target = used_pg / p['size'] * bias
 466             else:
 467                 bulk_pools[pool_name] = p
 468                 return None, None, None
 469
 470         elif func_pass == 'second':
 471             # second pass we calculate the final_pg_target
 472             # for pools that have used_ratio > even_ratio
 473             # and we keep track of even pools to be used in third pass
 474             pool_count = root_map[root_id].pool_count
 475             assert pool_count is not None
 476             even_ratio = 1 / (pool_count - root_map[root_id].pool_used)
 477             used_ratio = capacity_ratio
 478
 479             if used_ratio > even_ratio:
 480                 root_map[root_id].pool_used += 1
 481             else:
 482                 even_pools[pool_name] = p
 483                 return None, None, None
 484
 485             final_ratio = max(used_ratio, even_ratio)
 486             pg_left = root_map[root_id].pg_left
 487             assert pg_left is not None
 488             used_pg = final_ratio * pg_left
 489             root_map[root_id].pg_left -= int(used_pg)
 490             pool_pg_target = used_pg / p['size'] * bias
 491
 492         else:
 493             # third pass we just split the pg_left to all even_pools
 494             pool_count = root_map[root_id].pool_count
 495             assert pool_count is not None
 496             final_ratio = 1 / (pool_count - root_map[root_id].pool_used)
 497             pool_pg_target = (final_ratio * root_map[root_id].pg_left) / p['size'] * bias
 498
 499         min_pg = p.get('options', {}).get('pg_num_min', PG_NUM_MIN)
 500         max_pg = p.get('options', {}).get('pg_num_max')
 501         final_pg_target = max(min_pg, nearest_power_of_two(pool_pg_target))
 502         if max_pg and max_pg < final_pg_target:
 503             final_pg_target = max_pg
 504         self.log.info("Pool '{0}' root_id {1} using {2} of space, bias {3}, "
 505                       "pg target {4} quantized to {5} (current {6})".format(
 506                       p['pool_name'],
 507                       root_id,
 508                       capacity_ratio,
 509                       bias,
 510                       pool_pg_target,
 511                       final_pg_target,
 512                       p['pg_num_target']
 513         ))
 514         return final_ratio, pool_pg_target, final_pg_target
 515
 516     def _get_pool_pg_targets(
 517             self,
 518             osdmap: OSDMap,
 519             pools: Dict[str, Dict[str, Any]],
 520             crush_map: CRUSHMap,
 521             root_map: Dict[int, CrushSubtreeResourceStatus],
 522             pool_stats: Dict[int, Dict[str, int]],
 523             ret: List[Dict[str, Any]],
 524             threshold: float,
 525             func_pass: 'PassT',
 526             overlapped_roots: Set[int],
 527     ) -> Tuple[List[Dict[str, Any]], Dict[str, Dict[str, Any]] , Dict[str, Dict[str, Any]]]:
 528         """
 529         Calculates final_pg_target of each pools and determine if it needs
 530         scaling, this depends on the profile of the autoscaler. For scale-down,
 531         we start out with a full complement of pgs and only descrease it when other
 532         pools needs more pgs due to increased usage. For scale-up, we start out with
 533         the minimal amount of pgs and only scale when there is increase in usage.
 534         """
 535         even_pools: Dict[str, Dict[str, Any]] = {}
 536         bulk_pools: Dict[str, Dict[str, Any]] = {}
 537         for pool_name, p in pools.items():
 538             pool_id = p['pool']
 539             if pool_id not in pool_stats:
 540                 # race with pool deletion; skip
 541                 continue
 542
 543             # FIXME: we assume there is only one take per pool, but that
 544             # may not be true.
 545             crush_rule = crush_map.get_rule_by_id(p['crush_rule'])
 546             assert crush_rule is not None
 547             cr_name = crush_rule['rule_name']
 548             root_id = crush_map.get_rule_root(cr_name)
 549             assert root_id is not None
 550             if root_id in overlapped_roots:
 551                 # skip pools
 552                 # with overlapping roots
 553                 self.log.warn("pool %d contains an overlapping root %d"
 554                               "... skipping scaling", pool_id, root_id)
 555                 continue
 556             capacity = root_map[root_id].capacity
 557             assert capacity is not None
 558             if capacity == 0:
 559                 self.log.debug('skipping empty subtree %s', cr_name)
 560                 continue
 561
 562             raw_used_rate = osdmap.pool_raw_used_rate(pool_id)
 563
 564             pool_logical_used = pool_stats[pool_id]['stored']
 565             bias = p['options'].get('pg_autoscale_bias', 1.0)
 566             target_bytes = 0
 567             # ratio takes precedence if both are set
 568             if p['options'].get('target_size_ratio', 0.0) == 0.0:
 569                 target_bytes = p['options'].get('target_size_bytes', 0)
 570
 571             # What proportion of space are we using?
 572             actual_raw_used = pool_logical_used * raw_used_rate
 573             actual_capacity_ratio = float(actual_raw_used) / capacity
 574
 575             pool_raw_used = max(pool_logical_used, target_bytes) * raw_used_rate
 576             capacity_ratio = float(pool_raw_used) / capacity
 577
 578             self.log.info("effective_target_ratio {0} {1} {2} {3}".format(
 579                 p['options'].get('target_size_ratio', 0.0),
 580                 root_map[root_id].total_target_ratio,
 581                 root_map[root_id].total_target_bytes,
 582                 capacity))
 583
 584             target_ratio = effective_target_ratio(p['options'].get('target_size_ratio', 0.0),
 585                                                   root_map[root_id].total_target_ratio,
 586                                                   root_map[root_id].total_target_bytes,
 587                                                   capacity)
 588
 589             # determine if the pool is a bulk
 590             bulk = False
 591             flags = p['flags_names'].split(",")
 592             if "bulk" in flags:
 593                 bulk = True
 594
 595             capacity_ratio = max(capacity_ratio, target_ratio)
 596             final_ratio, pool_pg_target, final_pg_target = self._calc_final_pg_target(
 597                 p, pool_name, root_map, root_id,
 598                 capacity_ratio, bias, even_pools,
 599                 bulk_pools, func_pass, bulk)
 600
 601             if final_ratio is None:
 602                 continue
 603
 604             adjust = False
 605             if (final_pg_target > p['pg_num_target'] * threshold or
 606                     final_pg_target < p['pg_num_target'] / threshold) and \
 607                     final_ratio >= 0.0 and \
 608                     final_ratio <= 1.0:
 609                 adjust = True
 610
 611             assert pool_pg_target is not None
 612             ret.append({
 613                 'pool_id': pool_id,
 614                 'pool_name': p['pool_name'],
 615                 'crush_root_id': root_id,
 616                 'pg_autoscale_mode': p['pg_autoscale_mode'],
 617                 'pg_num_target': p['pg_num_target'],
 618                 'logical_used': pool_logical_used,
 619                 'target_bytes': target_bytes,
 620                 'raw_used_rate': raw_used_rate,
 621                 'subtree_capacity': capacity,
 622                 'actual_raw_used': actual_raw_used,
 623                 'raw_used': pool_raw_used,
 624                 'actual_capacity_ratio': actual_capacity_ratio,
 625                 'capacity_ratio': capacity_ratio,
 626                 'target_ratio': p['options'].get('target_size_ratio', 0.0),
 627                 'effective_target_ratio': target_ratio,
 628                 'pg_num_ideal': int(pool_pg_target),
 629                 'pg_num_final': final_pg_target,
 630                 'would_adjust': adjust,
 631                 'bias': p.get('options', {}).get('pg_autoscale_bias', 1.0),
 632                 'bulk': bulk,
 633             })
 634
 635         return ret, bulk_pools, even_pools
 636
 637     def _get_pool_status(
 638             self,
 639             osdmap: OSDMap,
 640             pools: Dict[str, Dict[str, Any]],
 641     ) -> Tuple[List[Dict[str, Any]],
 642                Dict[int, CrushSubtreeResourceStatus]]:
 643         threshold = self.threshold
 644         assert threshold >= 1.0
 645
 646         crush_map = osdmap.get_crush()
 647         root_map, overlapped_roots = self.get_subtree_resource_status(osdmap, crush_map)
 648         df = self.get('df')
 649         pool_stats = dict([(p['id'], p['stats']) for p in df['pools']])
 650
 651         ret: List[Dict[str, Any]] = []
 652
 653         # Iterate over all pools to determine how they should be sized.
 654         # First call of _get_pool_pg_targets() is to find/adjust pools that uses more capacaity than
 655         # the even_ratio of other pools and we adjust those first.
 656         # Second call make use of the even_pools we keep track of in the first call.
 657         # All we need to do is iterate over those and give them 1/pool_count of the
 658         # total pgs.
 659
 660         ret, bulk_pools, _  = self._get_pool_pg_targets(osdmap, pools, crush_map, root_map,
 661                                                   pool_stats, ret, threshold, 'first', overlapped_roots)
 662
 663         ret, _, even_pools = self._get_pool_pg_targets(osdmap, bulk_pools, crush_map, root_map,
 664                                                   pool_stats, ret, threshold, 'second', overlapped_roots)
 665
 666         ret, _, _ = self._get_pool_pg_targets(osdmap, even_pools, crush_map, root_map,
 667                                          pool_stats, ret, threshold, 'third', overlapped_roots)
 668
 669         return (ret, root_map)
 670
 671     def _update_progress_events(self) -> None:
 672         if self.noautoscale:
 673             return
 674         osdmap = self.get_osdmap()
 675         pools = osdmap.get_pools()
 676         for pool_id in list(self._event):
 677             ev = self._event[pool_id]
 678             pool_data = pools.get(pool_id)
 679             if pool_data is None or pool_data['pg_num'] == pool_data['pg_num_target'] or ev.pg_num == ev.pg_num_target:
 680                 # pool is gone or we've reached our target
 681                 self.remote('progress', 'complete', ev.ev_id)
 682                 del self._event[pool_id]
 683                 continue
 684             ev.update(self, (ev.pg_num - pool_data['pg_num']) / (ev.pg_num - ev.pg_num_target))
 685
 686     def _maybe_adjust(self) -> None:
 687         if self.noautoscale:
 688             return
 689         self.log.info('_maybe_adjust')
 690         osdmap = self.get_osdmap()
 691         if osdmap.get_require_osd_release() < 'nautilus':
 692             return
 693         pools = osdmap.get_pools_by_name()
 694         self.log.debug("pool: {0}".format(json.dumps(pools, indent=4,
 695                                 sort_keys=True)))
 696         ps, root_map = self._get_pool_status(osdmap, pools)
 697
 698         # Anyone in 'warn', set the health message for them and then
 699         # drop them from consideration.
 700         too_few = []
 701         too_many = []
 702         bytes_and_ratio = []
 703         health_checks: Dict[str, Dict[str, Union[int, str, List[str]]]] = {}
 704
 705         total_bytes = dict([(r, 0) for r in iter(root_map)])
 706         total_target_bytes = dict([(r, 0.0) for r in iter(root_map)])
 707         target_bytes_pools: Dict[int, List[int]] = dict([(r, []) for r in iter(root_map)])
 708
 709         for p in ps:
 710             pool_id = p['pool_id']
 711             pool_opts = pools[p['pool_name']]['options']
 712             if pool_opts.get('target_size_ratio', 0) > 0 and pool_opts.get('target_size_bytes', 0) > 0:
 713                 bytes_and_ratio.append(
 714                     'Pool %s has target_size_bytes and target_size_ratio set' % p['pool_name'])
 715             total_bytes[p['crush_root_id']] += max(
 716                 p['actual_raw_used'],
 717                 p['target_bytes'] * p['raw_used_rate'])
 718             if p['target_bytes'] > 0:
 719                 total_target_bytes[p['crush_root_id']] += p['target_bytes'] * p['raw_used_rate']
 720                 target_bytes_pools[p['crush_root_id']].append(p['pool_name'])
 721             if not p['would_adjust']:
 722                 continue
 723             if p['pg_autoscale_mode'] == 'warn':
 724                 msg = 'Pool %s has %d placement groups, should have %d' % (
 725                     p['pool_name'],
 726                     p['pg_num_target'],
 727                     p['pg_num_final'])
 728                 if p['pg_num_final'] > p['pg_num_target']:
 729                     too_few.append(msg)
 730                 else:
 731                     too_many.append(msg)
 732
 733             if p['pg_autoscale_mode'] == 'on':
 734                 # Note that setting pg_num actually sets pg_num_target (see
 735                 # OSDMonitor.cc)
 736                 r = self.mon_command({
 737                     'prefix': 'osd pool set',
 738                     'pool': p['pool_name'],
 739                     'var': 'pg_num',
 740                     'val': str(p['pg_num_final'])
 741                 })
 742
 743                 # create new event or update existing one to reflect
 744                 # progress from current state to the new pg_num_target
 745                 pool_data = pools[p['pool_name']]
 746                 pg_num = pool_data['pg_num']
 747                 new_target = p['pg_num_final']
 748                 if pool_id in self._event:
 749                     self._event[pool_id].reset(pg_num, new_target)
 750                 else:
 751                     self._event[pool_id] = PgAdjustmentProgress(pool_id, pg_num, new_target)
 752                 self._event[pool_id].update(self, 0.0)
 753
 754                 if r[0] != 0:
 755                     # FIXME: this is a serious and unexpected thing,
 756                     # we should expose it as a cluster log error once
 757                     # the hook for doing that from ceph-mgr modules is
 758                     # in.
 759                     self.log.error("pg_num adjustment on {0} to {1} failed: {2}"
 760                                    .format(p['pool_name'],
 761                                            p['pg_num_final'], r))
 762
 763         if too_few:
 764             summary = "{0} pools have too few placement groups".format(
 765                 len(too_few))
 766             health_checks['POOL_TOO_FEW_PGS'] = {
 767                 'severity': 'warning',
 768                 'summary': summary,
 769                 'count': len(too_few),
 770                 'detail': too_few
 771             }
 772         if too_many:
 773             summary = "{0} pools have too many placement groups".format(
 774                 len(too_many))
 775             health_checks['POOL_TOO_MANY_PGS'] = {
 776                 'severity': 'warning',
 777                 'summary': summary,
 778                 'count': len(too_many),
 779                 'detail': too_many
 780             }
 781
 782         too_much_target_bytes = []
 783         for root_id, total in total_bytes.items():
 784             total_target = int(total_target_bytes[root_id])
 785             capacity = root_map[root_id].capacity
 786             assert capacity is not None
 787             if total_target > 0 and total > capacity and capacity:
 788                 too_much_target_bytes.append(
 789                     'Pools %s overcommit available storage by %.03fx due to '
 790                     'target_size_bytes %s on pools %s' % (
 791                         root_map[root_id].pool_names,
 792                         total / capacity,
 793                         mgr_util.format_bytes(total_target, 5, colored=False),
 794                         target_bytes_pools[root_id]
 795                     )
 796                 )
 797             elif total_target > capacity and capacity:
 798                 too_much_target_bytes.append(
 799                     'Pools %s overcommit available storage by %.03fx due to '
 800                     'collective target_size_bytes of %s' % (
 801                         root_map[root_id].pool_names,
 802                         total / capacity,
 803                         mgr_util.format_bytes(total_target, 5, colored=False),
 804                     )
 805                 )
 806         if too_much_target_bytes:
 807             health_checks['POOL_TARGET_SIZE_BYTES_OVERCOMMITTED'] = {
 808                 'severity': 'warning',
 809                 'summary': "%d subtrees have overcommitted pool target_size_bytes" % len(too_much_target_bytes),
 810                 'count': len(too_much_target_bytes),
 811                 'detail': too_much_target_bytes,
 812             }
 813
 814         if bytes_and_ratio:
 815             health_checks['POOL_HAS_TARGET_SIZE_BYTES_AND_RATIO'] = {
 816                 'severity': 'warning',
 817                 'summary': "%d pools have both target_size_bytes and target_size_ratio set" % len(bytes_and_ratio),
 818                 'count': len(bytes_and_ratio),
 819                 'detail': bytes_and_ratio,
 820             }
 821
 822         self.set_health_checks(health_checks)