]> git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/mds_autoscaler/module.py
import quincy beta 17.1.0
[ceph.git] / ceph / src / pybind / mgr / mds_autoscaler / module.py
1 """
2 Automatically scale MDSs based on status of the file-system using the FSMap
3 """
4
5 import logging
6 from typing import Any, Optional
7 from mgr_module import MgrModule, NotifyType
8 from ceph.deployment.service_spec import ServiceSpec
9 import orchestrator
10 import copy
11
12 log = logging.getLogger(__name__)
13
14
15 class MDSAutoscaler(orchestrator.OrchestratorClientMixin, MgrModule):
16 """
17 MDS autoscaler.
18 """
19 NOTIFY_TYPES = [NotifyType.fs_map]
20
21 def __init__(self, *args: Any, **kwargs: Any) -> None:
22 MgrModule.__init__(self, *args, **kwargs)
23 self.set_mgr(self)
24
25 def get_service(self, fs_name: str) -> Optional[orchestrator.ServiceDescription]:
26 service = f"mds.{fs_name}"
27 completion = self.describe_service(service_type='mds',
28 service_name=service,
29 refresh=True)
30 orchestrator.raise_if_exception(completion)
31 if completion.result:
32 return completion.result[0]
33 return None
34
35 def update_daemon_count(self, spec: ServiceSpec, fs_name: str, abscount: int) -> ServiceSpec:
36 ps = copy.deepcopy(spec.placement)
37 ps.count = abscount
38 newspec = ServiceSpec(service_type=spec.service_type,
39 service_id=spec.service_id,
40 placement=ps)
41 return newspec
42
43 def get_required_standby_count(self, fs_map: dict, fs_name: str) -> int:
44 assert fs_map is not None
45 for fs in fs_map['filesystems']:
46 if fs['mdsmap']['fs_name'] == fs_name:
47 return fs['mdsmap']['standby_count_wanted']
48 assert False
49
50 def get_required_max_mds(self, fs_map: dict, fs_name: str) -> int:
51 assert fs_map is not None
52 for fs in fs_map['filesystems']:
53 if fs['mdsmap']['fs_name'] == fs_name:
54 return fs['mdsmap']['max_mds']
55 assert False
56
57 def verify_and_manage_mds_instance(self, fs_map: dict, fs_name: str) -> None:
58 assert fs_map is not None
59
60 try:
61 svc = self.get_service(fs_name)
62 if not svc:
63 self.log.info(f"fs {fs_name}: no service defined; skipping")
64 return
65 if not svc.spec.placement.count:
66 self.log.info(f"fs {fs_name}: service does not specify a count; skipping")
67 return
68
69 standbys_required = self.get_required_standby_count(fs_map, fs_name)
70 max_mds = self.get_required_max_mds(fs_map, fs_name)
71 want = max_mds + standbys_required
72
73 self.log.info(f"fs {fs_name}: "
74 f"max_mds={max_mds} "
75 f"standbys_required={standbys_required}, "
76 f"count={svc.spec.placement.count}")
77
78 if want == svc.spec.placement.count:
79 return
80
81 self.log.info(f"fs {fs_name}: adjusting daemon count from {svc.spec.placement.count} to {want}")
82 newspec = self.update_daemon_count(svc.spec, fs_name, want)
83 completion = self.apply_mds(newspec)
84 orchestrator.raise_if_exception(completion)
85 except orchestrator.OrchestratorError as e:
86 self.log.exception(f"fs {fs_name}: exception while updating service: {e}")
87 pass
88
89 def notify(self, notify_type: NotifyType, notify_id: str) -> None:
90 if notify_type != NotifyType.fs_map:
91 return
92 fs_map = self.get('fs_map')
93 if not fs_map:
94 return
95
96 # we don't know for which fs config has been changed
97 for fs in fs_map['filesystems']:
98 fs_name = fs['mdsmap']['fs_name']
99 self.verify_and_manage_mds_instance(fs_map, fs_name)