]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/pybind/mgr/pg_autoscaler/module.py
update ceph source to reef 18.2.1
[ceph.git] / ceph / src / pybind / mgr / pg_autoscaler / module.py
index b7a95fc7b97b64e146ccdfcf8ac652a500ed5743..ea7c4b00b4c68e4c5c6189bde7df7785dd83230f 100644 (file)
@@ -135,12 +135,6 @@ class PgAutoscaler(MgrModule):
                        '`PG_NUM` before being accepted. Cannot be less than 1.0'),
             default=3.0,
             min=1.0),
-        Option(
-            name='noautoscale',
-            type='bool',
-            desc='global autoscale flag',
-            long_desc=('Option to turn on/off the autoscaler for all pools'),
-            default=False),
     ]
 
     def __init__(self, *args: Any, **kwargs: Any) -> None:
@@ -155,7 +149,6 @@ class PgAutoscaler(MgrModule):
             self.sleep_interval = 60
             self.mon_target_pg_per_osd = 0
             self.threshold = 3.0
-            self.noautoscale = False
 
     def config_notify(self) -> None:
         for opt in self.NATIVE_OPTIONS:
@@ -238,7 +231,7 @@ class PgAutoscaler(MgrModule):
                     p['pg_num_target'],
 #                    p['pg_num_ideal'],
                     final,
-                    p['pg_autoscale_mode'],
+                    'off' if self.has_noautoscale_flag() else p['pg_autoscale_mode'],
                     str(p['bulk'])
                 ])
             return 0, table.get_string(), ''
@@ -260,16 +253,13 @@ class PgAutoscaler(MgrModule):
             self.remote('progress', 'complete', ev.ev_id)
             del self._event[pool_id]
 
-    def set_autoscale_mode_all_pools(self, status: str) -> None:
-        osdmap = self.get_osdmap()
-        pools = osdmap.get_pools_by_name()
-        for pool_name, _ in pools.items():
-            self.mon_command({
-                'prefix': 'osd pool set',
-                'pool': pool_name,
-                'var': 'pg_autoscale_mode',
-                'val': status
-            })
+    def has_noautoscale_flag(self) -> bool:
+        flags = self.get_osdmap().dump().get('flags', '')
+        if 'noautoscale' in flags:
+            return True
+        else:
+            return False
+
     @CLIWriteCommand("osd pool get noautoscale")
     def get_noautoscale(self) -> Tuple[int, str, str]:
         """
@@ -277,10 +267,7 @@ class PgAutoscaler(MgrModule):
         are setting the autoscaler on or off as well
         as newly created pools in the future.
         """
-
-        if self.noautoscale == None:
-            raise TypeError("noautoscale cannot be None")
-        elif self.noautoscale:
+        if self.has_noautoscale_flag():
             return 0, "", "noautoscale is on"
         else:
             return 0, "", "noautoscale is off"
@@ -289,21 +276,23 @@ class PgAutoscaler(MgrModule):
     def unset_noautoscale(self) -> Tuple[int, str, str]:
         """
         Unset the noautoscale flag so all pools will
-        have autoscale enabled (including newly created
-        pools in the future).
+        go back to its previous mode. Newly created
+        pools in the future will autoscaler on by default.
         """
-        if not self.noautoscale:
+        if not self.has_noautoscale_flag():
             return 0, "", "noautoscale is already unset!"
         else:
-            self.set_module_option("noautoscale", False)
             self.mon_command({
                 'prefix': 'config set',
                 'who': 'global',
                 'name': 'osd_pool_default_pg_autoscale_mode',
                 'value': 'on'
             })
-            self.set_autoscale_mode_all_pools("on")
-            return 0, "", "noautoscale is unset, all pools now have autoscale on"
+            self.mon_command({
+                'prefix': 'osd unset',
+                'key': 'noautoscale'
+            })
+            return 0, "", "noautoscale is unset, all pools now back to its previous mode"
 
     @CLIWriteCommand("osd pool set noautoscale")
     def set_noautoscale(self) -> Tuple[int, str, str]:
@@ -313,25 +302,30 @@ class PgAutoscaler(MgrModule):
         and complete all on-going progress events
         regarding PG-autoscaling.
         """
-        if self.noautoscale:
+        if self.has_noautoscale_flag():
             return 0, "", "noautoscale is already set!"
         else:
-            self.set_module_option("noautoscale", True)
             self.mon_command({
                 'prefix': 'config set',
                 'who': 'global',
                 'name': 'osd_pool_default_pg_autoscale_mode',
                 'value': 'off'
             })
-            self.set_autoscale_mode_all_pools("off")
+            self.mon_command({
+                'prefix': 'osd set',
+                'key': 'noautoscale'
+            })
             self.complete_all_progress_events()
             return 0, "", "noautoscale is set, all pools now have autoscale off"
 
     def serve(self) -> None:
         self.config_notify()
         while not self._shutdown.is_set():
-            self._maybe_adjust()
-            self._update_progress_events()
+            if not self.has_noautoscale_flag():
+                osdmap = self.get_osdmap()
+                pools = osdmap.get_pools_by_name()
+                self._maybe_adjust(osdmap, pools)
+                self._update_progress_events(osdmap, pools)
             self._shutdown.wait(timeout=self.sleep_interval)
 
     def shutdown(self) -> None:
@@ -340,6 +334,7 @@ class PgAutoscaler(MgrModule):
 
     def identify_subtrees_and_overlaps(self,
                                        osdmap: OSDMap,
+                                       pools: Dict[str, Dict[str, Any]],
                                        crush: CRUSHMap,
                                        result: Dict[int, CrushSubtreeResourceStatus],
                                        overlapped_roots: Set[int],
@@ -348,7 +343,7 @@ class PgAutoscaler(MgrModule):
               Set[int]]:
 
         # We identify subtrees and overlapping roots from osdmap
-        for pool_id, pool in osdmap.get_pools().items():
+        for pool_name, pool in pools.items():
             crush_rule = crush.get_rule_by_id(pool['crush_rule'])
             assert crush_rule is not None
             cr_name = crush_rule['rule_name']
@@ -365,7 +360,7 @@ class PgAutoscaler(MgrModule):
                         overlapped_roots.add(prev_root_id)
                         overlapped_roots.add(root_id)
                         self.log.warning("pool %s won't scale due to overlapping roots: %s",
-                                       pool['pool_name'], overlapped_roots)
+                                      pool_name, overlapped_roots)
                         self.log.warning("Please See: https://docs.ceph.com/en/"
                                          "latest/rados/operations/placement-groups"
                                          "/#automated-scaling")
@@ -376,8 +371,8 @@ class PgAutoscaler(MgrModule):
             result[root_id] = s
             s.root_ids.append(root_id)
             s.osds |= osds
-            s.pool_ids.append(pool_id)
-            s.pool_names.append(pool['pool_name'])
+            s.pool_ids.append(pool['pool'])
+            s.pool_names.append(pool_name)
             s.pg_current += pool['pg_num_target'] * pool['size']
             target_ratio = pool['options'].get('target_size_ratio', 0.0)
             if target_ratio:
@@ -385,11 +380,12 @@ class PgAutoscaler(MgrModule):
             else:
                 target_bytes = pool['options'].get('target_size_bytes', 0)
                 if target_bytes:
-                    s.total_target_bytes += target_bytes * osdmap.pool_raw_used_rate(pool_id)
+                    s.total_target_bytes += target_bytes * osdmap.pool_raw_used_rate(pool['pool'])
         return roots, overlapped_roots
 
     def get_subtree_resource_status(self,
                                     osdmap: OSDMap,
+                                    pools: Dict[str, Dict[str, Any]],
                                     crush: CRUSHMap) -> Tuple[Dict[int, CrushSubtreeResourceStatus],
                                                               Set[int]]:
         """
@@ -402,8 +398,9 @@ class PgAutoscaler(MgrModule):
         roots: List[CrushSubtreeResourceStatus] = []
         overlapped_roots: Set[int] = set()
         # identify subtrees and overlapping roots
-        roots, overlapped_roots = self.identify_subtrees_and_overlaps(osdmap,
-                                                                      crush, result, overlapped_roots, roots)
+        roots, overlapped_roots = self.identify_subtrees_and_overlaps(
+            osdmap, pools, crush, result, overlapped_roots, roots
+        )
         # finish subtrees
         all_stats = self.get('osd_stats')
         for s in roots:
@@ -561,7 +558,6 @@ class PgAutoscaler(MgrModule):
 
             raw_used_rate = osdmap.pool_raw_used_rate(pool_id)
 
-            pool_logical_used = pool_stats[pool_id]['stored']
             bias = p['options'].get('pg_autoscale_bias', 1.0)
             target_bytes = 0
             # ratio takes precedence if both are set
@@ -569,10 +565,10 @@ class PgAutoscaler(MgrModule):
                 target_bytes = p['options'].get('target_size_bytes', 0)
 
             # What proportion of space are we using?
-            actual_raw_used = pool_logical_used * raw_used_rate
+            actual_raw_used = pool_stats[pool_id]['bytes_used']
             actual_capacity_ratio = float(actual_raw_used) / capacity
 
-            pool_raw_used = max(pool_logical_used, target_bytes) * raw_used_rate
+            pool_raw_used = max(actual_raw_used, target_bytes * raw_used_rate)
             capacity_ratio = float(pool_raw_used) / capacity
 
             self.log.info("effective_target_ratio {0} {1} {2} {3}".format(
@@ -616,7 +612,7 @@ class PgAutoscaler(MgrModule):
                 'crush_root_id': root_id,
                 'pg_autoscale_mode': p['pg_autoscale_mode'],
                 'pg_num_target': p['pg_num_target'],
-                'logical_used': pool_logical_used,
+                'logical_used': float(actual_raw_used)/raw_used_rate,
                 'target_bytes': target_bytes,
                 'raw_used_rate': raw_used_rate,
                 'subtree_capacity': capacity,
@@ -645,7 +641,7 @@ class PgAutoscaler(MgrModule):
         assert threshold >= 1.0
 
         crush_map = osdmap.get_crush()
-        root_map, overlapped_roots = self.get_subtree_resource_status(osdmap, crush_map)
+        root_map, overlapped_roots = self.get_subtree_resource_status(osdmap, pools, crush_map)
         df = self.get('df')
         pool_stats = dict([(p['id'], p['stats']) for p in df['pools']])
 
@@ -669,31 +665,51 @@ class PgAutoscaler(MgrModule):
 
         return (ret, root_map)
 
-    def _update_progress_events(self) -> None:
-        if self.noautoscale:
+    def _get_pool_by_id(self,
+                     pools: Dict[str, Dict[str, Any]],
+                     pool_id: int) -> Optional[Dict[str, Any]]:
+        # Helper for getting pool data by pool_id
+        for pool_name, p in pools.items():
+            if p['pool'] == pool_id:
+                return p
+        self.log.debug('pool not found')
+        return None
+
+    def _update_progress_events(self,
+                                osdmap: OSDMap,
+                                pools: Dict[str, Dict[str, Any]]) -> None:
+        # Update progress events if necessary
+        if self.has_noautoscale_flag():
+            self.log.debug("noautoscale_flag is set.")
             return
-        osdmap = self.get_osdmap()
-        pools = osdmap.get_pools()
         for pool_id in list(self._event):
             ev = self._event[pool_id]
-            pool_data = pools.get(pool_id)
-            if pool_data is None or pool_data['pg_num'] == pool_data['pg_num_target'] or ev.pg_num == ev.pg_num_target:
+            pool_data = self._get_pool_by_id(pools, pool_id)
+            if (
+                pool_data is None
+                or pool_data["pg_num"] == pool_data["pg_num_target"]
+                or ev.pg_num == ev.pg_num_target
+            ):
                 # pool is gone or we've reached our target
                 self.remote('progress', 'complete', ev.ev_id)
                 del self._event[pool_id]
                 continue
             ev.update(self, (ev.pg_num - pool_data['pg_num']) / (ev.pg_num - ev.pg_num_target))
 
-    def _maybe_adjust(self) -> None:
-        if self.noautoscale:
-            return
+    def _maybe_adjust(self,
+                      osdmap: OSDMap,
+                      pools: Dict[str, Dict[str, Any]]) -> None:
+        # Figure out which pool needs pg adjustments
         self.log.info('_maybe_adjust')
-        osdmap = self.get_osdmap()
+        if self.has_noautoscale_flag():
+            self.log.debug("noautoscale_flag is set.")
+            return
         if osdmap.get_require_osd_release() < 'nautilus':
             return
-        pools = osdmap.get_pools_by_name()
+
         self.log.debug("pool: {0}".format(json.dumps(pools, indent=4,
                                 sort_keys=True)))
+
         ps, root_map = self._get_pool_status(osdmap, pools)
 
         # Anyone in 'warn', set the health message for them and then
@@ -719,8 +735,6 @@ class PgAutoscaler(MgrModule):
             if p['target_bytes'] > 0:
                 total_target_bytes[p['crush_root_id']] += p['target_bytes'] * p['raw_used_rate']
                 target_bytes_pools[p['crush_root_id']].append(p['pool_name'])
-            if not p['would_adjust']:
-                continue
             if p['pg_autoscale_mode'] == 'warn':
                 msg = 'Pool %s has %d placement groups, should have %d' % (
                     p['pool_name'],
@@ -728,9 +742,10 @@ class PgAutoscaler(MgrModule):
                     p['pg_num_final'])
                 if p['pg_num_final'] > p['pg_num_target']:
                     too_few.append(msg)
-                else:
+                elif p['pg_num_final'] < p['pg_num_target']:
                     too_many.append(msg)
-
+            if not p['would_adjust']:
+                continue
             if p['pg_autoscale_mode'] == 'on':
                 # Note that setting pg_num actually sets pg_num_target (see
                 # OSDMonitor.cc)