]> git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/cephadm/services/nfs.py
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / pybind / mgr / cephadm / services / nfs.py
1 import errno
2 import logging
3 import os
4 import subprocess
5 import tempfile
6 from typing import Dict, Tuple, Any, List, cast, Optional
7
8 from mgr_module import HandleCommandResult
9 from mgr_module import NFS_POOL_NAME as POOL_NAME
10
11 from ceph.deployment.service_spec import ServiceSpec, NFSServiceSpec
12
13 from orchestrator import DaemonDescription
14
15 from cephadm.services.cephadmservice import AuthEntity, CephadmDaemonDeploySpec, CephService
16
17 logger = logging.getLogger(__name__)
18
19
20 class NFSService(CephService):
21 TYPE = 'nfs'
22
23 def ranked(self) -> bool:
24 return True
25
26 def fence(self, daemon_id: str) -> None:
27 logger.info(f'Fencing old nfs.{daemon_id}')
28 ret, out, err = self.mgr.mon_command({
29 'prefix': 'auth rm',
30 'entity': f'client.nfs.{daemon_id}',
31 })
32
33 # TODO: block/fence this entity (in case it is still running somewhere)
34
35 def fence_old_ranks(self,
36 spec: ServiceSpec,
37 rank_map: Dict[int, Dict[int, Optional[str]]],
38 num_ranks: int) -> None:
39 for rank, m in list(rank_map.items()):
40 if rank >= num_ranks:
41 for daemon_id in m.values():
42 if daemon_id is not None:
43 self.fence(daemon_id)
44 del rank_map[rank]
45 nodeid = f'{spec.service_name()}.{rank}'
46 self.mgr.log.info(f'Removing {nodeid} from the ganesha grace table')
47 self.run_grace_tool(cast(NFSServiceSpec, spec), 'remove', nodeid)
48 self.mgr.spec_store.save_rank_map(spec.service_name(), rank_map)
49 else:
50 max_gen = max(m.keys())
51 for gen, daemon_id in list(m.items()):
52 if gen < max_gen:
53 if daemon_id is not None:
54 self.fence(daemon_id)
55 del rank_map[rank][gen]
56 self.mgr.spec_store.save_rank_map(spec.service_name(), rank_map)
57
58 def config(self, spec: NFSServiceSpec) -> None: # type: ignore
59 from nfs.cluster import create_ganesha_pool
60
61 assert self.TYPE == spec.service_type
62 create_ganesha_pool(self.mgr)
63
64 def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
65 assert self.TYPE == daemon_spec.daemon_type
66 daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
67 return daemon_spec
68
69 def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
70 assert self.TYPE == daemon_spec.daemon_type
71
72 daemon_type = daemon_spec.daemon_type
73 daemon_id = daemon_spec.daemon_id
74 host = daemon_spec.host
75 spec = cast(NFSServiceSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
76
77 deps: List[str] = []
78
79 nodeid = f'{daemon_spec.service_name}.{daemon_spec.rank}'
80
81 # create the RADOS recovery pool keyring
82 rados_user = f'{daemon_type}.{daemon_id}'
83 rados_keyring = self.create_keyring(daemon_spec)
84
85 # ensure rank is known to ganesha
86 self.mgr.log.info(f'Ensuring {nodeid} is in the ganesha grace table')
87 self.run_grace_tool(spec, 'add', nodeid)
88
89 # create the rados config object
90 self.create_rados_config_obj(spec)
91
92 # create the RGW keyring
93 rgw_user = f'{rados_user}-rgw'
94 rgw_keyring = self.create_rgw_keyring(daemon_spec)
95 bind_addr = spec.virtual_ip if spec.virtual_ip else (daemon_spec.ip if daemon_spec.ip else '')
96 if not bind_addr:
97 logger.warning(f'Bind address in {daemon_type}.{daemon_id}\'s ganesha conf is defaulting to empty')
98
99 # generate the ganesha config
100 def get_ganesha_conf() -> str:
101 context: Dict[str, Any] = {
102 "user": rados_user,
103 "nodeid": nodeid,
104 "pool": POOL_NAME,
105 "namespace": spec.service_id,
106 "rgw_user": rgw_user,
107 "url": f'rados://{POOL_NAME}/{spec.service_id}/{spec.rados_config_name()}',
108 # fall back to default NFS port if not present in daemon_spec
109 "port": daemon_spec.ports[0] if daemon_spec.ports else 2049,
110 "bind_addr": bind_addr,
111 }
112 return self.mgr.template.render('services/nfs/ganesha.conf.j2', context)
113
114 # generate the cephadm config json
115 def get_cephadm_config() -> Dict[str, Any]:
116 config: Dict[str, Any] = {}
117 config['pool'] = POOL_NAME
118 config['namespace'] = spec.service_id
119 config['userid'] = rados_user
120 config['extra_args'] = ['-N', 'NIV_EVENT']
121 config['files'] = {
122 'ganesha.conf': get_ganesha_conf(),
123 }
124 config.update(
125 self.get_config_and_keyring(
126 daemon_type, daemon_id,
127 keyring=rados_keyring,
128 host=host
129 )
130 )
131 config['rgw'] = {
132 'cluster': 'ceph',
133 'user': rgw_user,
134 'keyring': rgw_keyring,
135 }
136 logger.debug('Generated cephadm config-json: %s' % config)
137 return config
138
139 return get_cephadm_config(), deps
140
141 def create_rados_config_obj(self,
142 spec: NFSServiceSpec,
143 clobber: bool = False) -> None:
144 objname = spec.rados_config_name()
145 cmd = [
146 'rados',
147 '-n', f"mgr.{self.mgr.get_mgr_id()}",
148 '-k', str(self.mgr.get_ceph_option('keyring')),
149 '-p', POOL_NAME,
150 '--namespace', cast(str, spec.service_id),
151 ]
152 result = subprocess.run(
153 cmd + ['get', objname, '-'],
154 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
155 timeout=10)
156 if not result.returncode and not clobber:
157 logger.info('Rados config object exists: %s' % objname)
158 else:
159 logger.info('Creating rados config object: %s' % objname)
160 result = subprocess.run(
161 cmd + ['put', objname, '-'],
162 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
163 timeout=10)
164 if result.returncode:
165 self.mgr.log.warning(
166 f'Unable to create rados config object {objname}: {result.stderr.decode("utf-8")}'
167 )
168 raise RuntimeError(result.stderr.decode("utf-8"))
169
170 def create_keyring(self, daemon_spec: CephadmDaemonDeploySpec) -> str:
171 daemon_id = daemon_spec.daemon_id
172 spec = cast(NFSServiceSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
173 entity: AuthEntity = self.get_auth_entity(daemon_id)
174
175 osd_caps = 'allow rw pool=%s namespace=%s' % (POOL_NAME, spec.service_id)
176
177 logger.info('Creating key for %s' % entity)
178 keyring = self.get_keyring_with_caps(entity,
179 ['mon', 'allow r',
180 'osd', osd_caps])
181
182 return keyring
183
184 def create_rgw_keyring(self, daemon_spec: CephadmDaemonDeploySpec) -> str:
185 daemon_id = daemon_spec.daemon_id
186 entity: AuthEntity = self.get_auth_entity(f'{daemon_id}-rgw')
187
188 logger.info('Creating key for %s' % entity)
189 keyring = self.get_keyring_with_caps(entity,
190 ['mon', 'allow r',
191 'osd', 'allow rwx tag rgw *=*'])
192
193 return keyring
194
195 def run_grace_tool(self,
196 spec: NFSServiceSpec,
197 action: str,
198 nodeid: str) -> None:
199 # write a temp keyring and referencing config file. this is a kludge
200 # because the ganesha-grace-tool can only authenticate as a client (and
201 # not a mgr). Also, it doesn't allow you to pass a keyring location via
202 # the command line, nor does it parse the CEPH_ARGS env var.
203 tmp_id = f'mgr.nfs.grace.{spec.service_name()}'
204 entity = AuthEntity(f'client.{tmp_id}')
205 keyring = self.get_keyring_with_caps(
206 entity,
207 ['mon', 'allow r', 'osd', f'allow rwx pool {POOL_NAME}']
208 )
209 tmp_keyring = tempfile.NamedTemporaryFile(mode='w', prefix='mgr-grace-keyring')
210 os.fchmod(tmp_keyring.fileno(), 0o600)
211 tmp_keyring.write(keyring)
212 tmp_keyring.flush()
213 tmp_conf = tempfile.NamedTemporaryFile(mode='w', prefix='mgr-grace-conf')
214 tmp_conf.write(self.mgr.get_minimal_ceph_conf())
215 tmp_conf.write(f'\tkeyring = {tmp_keyring.name}\n')
216 tmp_conf.flush()
217 try:
218 cmd: List[str] = [
219 'ganesha-rados-grace',
220 '--cephconf', tmp_conf.name,
221 '--userid', tmp_id,
222 '--pool', POOL_NAME,
223 '--ns', cast(str, spec.service_id),
224 action, nodeid,
225 ]
226 self.mgr.log.debug(cmd)
227 result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
228 timeout=10)
229 if result.returncode:
230 self.mgr.log.warning(
231 f'ganesha-rados-grace tool failed: {result.stderr.decode("utf-8")}'
232 )
233 raise RuntimeError(f'grace tool failed: {result.stderr.decode("utf-8")}')
234
235 finally:
236 self.mgr.check_mon_command({
237 'prefix': 'auth rm',
238 'entity': entity,
239 })
240
241 def remove_rgw_keyring(self, daemon: DaemonDescription) -> None:
242 assert daemon.daemon_id is not None
243 daemon_id: str = daemon.daemon_id
244 entity: AuthEntity = self.get_auth_entity(f'{daemon_id}-rgw')
245
246 logger.info(f'Removing key for {entity}')
247 self.mgr.check_mon_command({
248 'prefix': 'auth rm',
249 'entity': entity,
250 })
251
252 def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None:
253 super().post_remove(daemon, is_failed_deploy=is_failed_deploy)
254 self.remove_rgw_keyring(daemon)
255
256 def ok_to_stop(self,
257 daemon_ids: List[str],
258 force: bool = False,
259 known: Optional[List[str]] = None) -> HandleCommandResult:
260 # if only 1 nfs, alert user (this is not passable with --force)
261 warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'NFS', 1, True)
262 if warn:
263 return HandleCommandResult(-errno.EBUSY, '', warn_message)
264
265 # if reached here, there is > 1 nfs daemon.
266 if force:
267 return HandleCommandResult(0, warn_message, '')
268
269 # if reached here, > 1 nfs daemon and no force flag.
270 # Provide warning
271 warn_message = "WARNING: Removing NFS daemons can cause clients to lose connectivity. "
272 return HandleCommandResult(-errno.EBUSY, '', warn_message)
273
274 def purge(self, service_name: str) -> None:
275 if service_name not in self.mgr.spec_store:
276 return
277 spec = cast(NFSServiceSpec, self.mgr.spec_store[service_name].spec)
278
279 logger.info(f'Removing grace file for {service_name}')
280 cmd = [
281 'rados',
282 '-n', f"mgr.{self.mgr.get_mgr_id()}",
283 '-k', str(self.mgr.get_ceph_option('keyring')),
284 '-p', POOL_NAME,
285 '--namespace', cast(str, spec.service_id),
286 'rm', 'grace',
287 ]
288 subprocess.run(
289 cmd,
290 stdout=subprocess.PIPE,
291 stderr=subprocess.PIPE,
292 timeout=10
293 )