]>
git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/cephadm/osd.py
2ead0cb8077f6508f82c8866e2e22397cfe93818
6 from typing
import List
, Dict
, Any
, Set
, Union
9 from orchestrator
import OrchestratorError
11 logger
= logging
.getLogger(__name__
)
14 class OSDRemoval(object):
21 start_at
: datetime
.datetime
,
24 self
.replace
= replace
26 self
.nodename
= nodename
27 self
.fullname
= fullname
28 self
.started_at
= start_at
29 self
.pg_count
= pg_count
31 # needed due to changing 'started_at' attr
32 def __eq__(self
, other
):
33 return self
.osd_id
== other
.osd_id
36 return hash(self
.osd_id
)
39 return ('<OSDRemoval>(osd_id={}, replace={}, force={}, nodename={}'
40 ', fullname={}, started_at={}, pg_count={})').format(
41 self
.osd_id
, self
.replace
, self
.force
, self
.nodename
,
42 self
.fullname
, self
.started_at
, self
.pg_count
)
45 def pg_count_str(self
) -> str:
46 return 'n/a' if self
.pg_count
< 0 else str(self
.pg_count
)
49 class RemoveUtil(object):
50 def __init__(self
, mgr
):
52 self
.to_remove_osds
: Set
[OSDRemoval
] = set()
53 self
.osd_removal_report
: Dict
[OSDRemoval
, Union
[int,str]] = dict()
56 def report(self
) -> Set
[OSDRemoval
]:
57 return self
.to_remove_osds
.copy()
59 def queue_osds_for_removal(self
, osds
: Set
[OSDRemoval
]):
60 self
.to_remove_osds
.update(osds
)
62 def _remove_osds_bg(self
) -> None:
64 Performs actions in the _serve() loop to remove an OSD
68 f
"{len(self.to_remove_osds)} OSDs are scheduled for removal: {list(self.to_remove_osds)}")
69 self
._update
_osd
_removal
_status
()
70 remove_osds
: set = self
.to_remove_osds
.copy()
71 for osd
in remove_osds
:
73 self
.drain_osd(osd
.osd_id
)
75 if not self
.is_empty(osd
.osd_id
):
76 logger
.info(f
"OSD <{osd.osd_id}> is not empty yet. Waiting a bit more")
79 if not self
.ok_to_destroy([osd
.osd_id
]):
81 f
"OSD <{osd.osd_id}> is not safe-to-destroy yet. Waiting a bit more")
85 if not self
.down_osd([osd
.osd_id
]):
86 # also remove it from the remove_osd list and set a health_check warning?
87 raise orchestrator
.OrchestratorError(
88 f
"Could not set OSD <{osd.osd_id}> to 'down'")
91 if not self
.destroy_osd(osd
.osd_id
):
92 # also remove it from the remove_osd list and set a health_check warning?
93 raise orchestrator
.OrchestratorError(
94 f
"Could not destroy OSD <{osd.osd_id}>")
96 if not self
.purge_osd(osd
.osd_id
):
97 # also remove it from the remove_osd list and set a health_check warning?
98 raise orchestrator
.OrchestratorError(f
"Could not purge OSD <{osd.osd_id}>")
100 self
.mgr
._remove
_daemon
(osd
.fullname
, osd
.nodename
)
101 logger
.info(f
"Successfully removed OSD <{osd.osd_id}> on {osd.nodename}")
102 logger
.debug(f
"Removing {osd.osd_id} from the queue.")
103 self
.to_remove_osds
.remove(osd
)
105 def _update_osd_removal_status(self
):
107 Generate a OSD report that can be printed to the CLI
109 logger
.debug("Update OSD removal status")
110 for osd
in self
.to_remove_osds
:
111 osd
.pg_count
= self
.get_pg_count(str(osd
.osd_id
))
112 logger
.debug(f
"OSD removal status: {self.to_remove_osds}")
114 def drain_osd(self
, osd_id
: str) -> bool:
116 Uses `osd_support` module to schedule a drain operation of an OSD
119 'prefix': 'osd drain',
120 'osd_ids': [int(osd_id
)]
122 return self
._run
_mon
_cmd
(cmd_args
)
124 def get_pg_count(self
, osd_id
: str) -> int:
125 """ Queries for PG count of an OSD """
126 self
.mgr
.log
.debug("Querying for drain status")
127 ret
, out
, err
= self
.mgr
.mon_command({
128 'prefix': 'osd drain status',
131 self
.mgr
.log
.error(f
"Calling osd drain status failed with {err}")
132 raise OrchestratorError("Could not query `osd drain status`")
133 out
= json
.loads(out
)
135 if str(o
.get('osd_id', '')) == str(osd_id
):
136 return int(o
.get('pgs', -1))
139 def is_empty(self
, osd_id
: str) -> bool:
140 """ Checks if an OSD is empty """
141 return self
.get_pg_count(osd_id
) == 0
143 def ok_to_destroy(self
, osd_ids
: List
[int]) -> bool:
144 """ Queries the safe-to-destroy flag for OSDs """
145 cmd_args
= {'prefix': 'osd safe-to-destroy',
147 return self
._run
_mon
_cmd
(cmd_args
)
149 def destroy_osd(self
, osd_id
: int) -> bool:
150 """ Destroys an OSD (forcefully) """
151 cmd_args
= {'prefix': 'osd destroy-actual',
153 'yes_i_really_mean_it': True}
154 return self
._run
_mon
_cmd
(cmd_args
)
156 def down_osd(self
, osd_ids
: List
[int]) -> bool:
157 """ Sets `out` flag to OSDs """
159 'prefix': 'osd down',
162 return self
._run
_mon
_cmd
(cmd_args
)
164 def purge_osd(self
, osd_id
: int) -> bool:
165 """ Purges an OSD from the cluster (forcefully) """
167 'prefix': 'osd purge-actual',
169 'yes_i_really_mean_it': True
171 return self
._run
_mon
_cmd
(cmd_args
)
173 def out_osd(self
, osd_ids
: List
[int]) -> bool:
174 """ Sets `down` flag to OSDs """
179 return self
._run
_mon
_cmd
(cmd_args
)
181 def _run_mon_cmd(self
, cmd_args
: dict) -> bool:
183 Generic command to run mon_command and evaluate/log the results
185 ret
, out
, err
= self
.mgr
.mon_command(cmd_args
)
187 self
.mgr
.log
.debug(f
"ran {cmd_args} with mon_command")
188 self
.mgr
.log
.error(f
"cmd: {cmd_args.get('prefix')} failed with: {err}. (errno:{ret})")
190 self
.mgr
.log
.debug(f
"cmd: {cmd_args.get('prefix')} returns: {out}")