]> git.proxmox.com Git - ceph.git/blame - ceph/src/pybind/mgr/nfs/cluster.py
update ceph source to reef 18.2.1
[ceph.git] / ceph / src / pybind / mgr / nfs / cluster.py
CommitLineData
2a845540 1import ipaddress
b3b6e05e 2import logging
b3b6e05e 3import re
a4b75251 4import socket
1e59de90 5from typing import cast, Dict, List, Any, Union, Optional, TYPE_CHECKING
b3b6e05e 6
a4b75251 7from mgr_module import NFS_POOL_NAME as POOL_NAME
b3b6e05e 8from ceph.deployment.service_spec import NFSServiceSpec, PlacementSpec, IngressSpec
1e59de90 9from object_format import ErrorResponse
b3b6e05e
TL
10
11import orchestrator
1e59de90 12from orchestrator.module import IngressType
b3b6e05e
TL
13
14from .exception import NFSInvalidOperation, ClusterNotFound
1e59de90
TL
15from .utils import (
16 ManualRestartRequired,
17 NonFatalError,
18 available_clusters,
19 conf_obj_name,
20 restart_nfs_service,
21 user_conf_obj_name)
22from .export import NFSRados
b3b6e05e 23
a4b75251
TL
24if TYPE_CHECKING:
25 from nfs.module import Module
26 from mgr_module import MgrModule
27
28
b3b6e05e
TL
29log = logging.getLogger(__name__)
30
31
32def resolve_ip(hostname: str) -> str:
33 try:
34 r = socket.getaddrinfo(hostname, None, flags=socket.AI_CANONNAME,
35 type=socket.SOCK_STREAM)
36 # pick first v4 IP, if present
37 for a in r:
38 if a[0] == socket.AF_INET:
39 return a[4][0]
40 return r[0][4][0]
41 except socket.gaierror as e:
42 raise NFSInvalidOperation(f"Cannot resolve IP for host {hostname}: {e}")
43
44
a4b75251 45def create_ganesha_pool(mgr: 'MgrModule') -> None:
b3b6e05e 46 pool_list = [p['pool_name'] for p in mgr.get_osdmap().dump().get('pools', [])]
a4b75251 47 if POOL_NAME not in pool_list:
1e59de90
TL
48 mgr.check_mon_command({'prefix': 'osd pool create',
49 'pool': POOL_NAME,
50 'yes_i_really_mean_it': True})
b3b6e05e 51 mgr.check_mon_command({'prefix': 'osd pool application enable',
a4b75251 52 'pool': POOL_NAME,
b3b6e05e 53 'app': 'nfs'})
a4b75251 54 log.debug("Successfully created nfs-ganesha pool %s", POOL_NAME)
b3b6e05e
TL
55
56
57class NFSCluster:
a4b75251 58 def __init__(self, mgr: 'Module') -> None:
b3b6e05e
TL
59 self.mgr = mgr
60
a4b75251
TL
61 def _call_orch_apply_nfs(
62 self,
63 cluster_id: str,
1e59de90 64 placement: Optional[str] = None,
a4b75251 65 virtual_ip: Optional[str] = None,
1e59de90 66 ingress_mode: Optional[IngressType] = None,
a4b75251
TL
67 port: Optional[int] = None,
68 ) -> None:
69 if not port:
70 port = 2049 # default nfs port
b3b6e05e
TL
71 if virtual_ip:
72 # nfs + ingress
73 # run NFS on non-standard port
1e59de90
TL
74 if not ingress_mode:
75 ingress_mode = IngressType.default
aee94f69 76 ingress_mode = ingress_mode.canonicalize()
1e59de90
TL
77 pspec = PlacementSpec.from_string(placement)
78 if ingress_mode == IngressType.keepalive_only:
79 # enforce count=1 for nfs over keepalive only
80 pspec.count = 1
aee94f69
TL
81
82 ganesha_port = 10000 + port # semi-arbitrary, fix me someday
83 frontend_port: Optional[int] = port
84 virtual_ip_for_ganesha: Optional[str] = None
85 keepalive_only: bool = False
86 enable_haproxy_protocol: bool = False
87 if ingress_mode == IngressType.haproxy_protocol:
88 enable_haproxy_protocol = True
89 elif ingress_mode == IngressType.keepalive_only:
90 keepalive_only = True
91 virtual_ip_for_ganesha = virtual_ip.split('/')[0]
92 ganesha_port = port
93 frontend_port = None
94
a4b75251 95 spec = NFSServiceSpec(service_type='nfs', service_id=cluster_id,
1e59de90 96 placement=pspec,
b3b6e05e 97 # use non-default port so we don't conflict with ingress
aee94f69
TL
98 port=ganesha_port,
99 virtual_ip=virtual_ip_for_ganesha,
100 enable_haproxy_protocol=enable_haproxy_protocol)
b3b6e05e
TL
101 completion = self.mgr.apply_nfs(spec)
102 orchestrator.raise_if_exception(completion)
103 ispec = IngressSpec(service_type='ingress',
a4b75251
TL
104 service_id='nfs.' + cluster_id,
105 backend_service='nfs.' + cluster_id,
1e59de90 106 placement=pspec,
aee94f69 107 frontend_port=frontend_port,
a4b75251 108 monitor_port=7000 + port, # semi-arbitrary, fix me someday
1e59de90 109 virtual_ip=virtual_ip,
aee94f69
TL
110 keepalive_only=keepalive_only,
111 enable_haproxy_protocol=enable_haproxy_protocol)
b3b6e05e
TL
112 completion = self.mgr.apply_ingress(ispec)
113 orchestrator.raise_if_exception(completion)
114 else:
115 # standalone nfs
a4b75251
TL
116 spec = NFSServiceSpec(service_type='nfs', service_id=cluster_id,
117 placement=PlacementSpec.from_string(placement),
118 port=port)
b3b6e05e
TL
119 completion = self.mgr.apply_nfs(spec)
120 orchestrator.raise_if_exception(completion)
1d09f67e
TL
121 log.debug("Successfully deployed nfs daemons with cluster id %s and placement %s",
122 cluster_id, placement)
b3b6e05e 123
a4b75251 124 def create_empty_rados_obj(self, cluster_id: str) -> None:
1d09f67e
TL
125 common_conf = conf_obj_name(cluster_id)
126 self._rados(cluster_id).write_obj('', conf_obj_name(cluster_id))
a4b75251
TL
127 log.info("Created empty object:%s", common_conf)
128
129 def delete_config_obj(self, cluster_id: str) -> None:
1d09f67e 130 self._rados(cluster_id).remove_all_obj()
a4b75251 131 log.info("Deleted %s object and all objects in %s",
1d09f67e 132 conf_obj_name(cluster_id), cluster_id)
a4b75251
TL
133
134 def create_nfs_cluster(
135 self,
136 cluster_id: str,
137 placement: Optional[str],
138 virtual_ip: Optional[str],
139 ingress: Optional[bool] = None,
1e59de90 140 ingress_mode: Optional[IngressType] = None,
a4b75251 141 port: Optional[int] = None,
1e59de90 142 ) -> None:
b3b6e05e 143 try:
2a845540
TL
144 if virtual_ip:
145 # validate virtual_ip value: ip_address throws a ValueError
146 # exception in case it's not a valid ipv4 or ipv6 address
147 ip = virtual_ip.split('/')[0]
148 ipaddress.ip_address(ip)
b3b6e05e
TL
149 if virtual_ip and not ingress:
150 raise NFSInvalidOperation('virtual_ip can only be provided with ingress enabled')
151 if not virtual_ip and ingress:
152 raise NFSInvalidOperation('ingress currently requires a virtual_ip')
1e59de90
TL
153 if ingress_mode and not ingress:
154 raise NFSInvalidOperation('--ingress-mode must be passed along with --ingress')
b3b6e05e
TL
155 invalid_str = re.search('[^A-Za-z0-9-_.]', cluster_id)
156 if invalid_str:
157 raise NFSInvalidOperation(f"cluster id {cluster_id} is invalid. "
158 f"{invalid_str.group()} is char not permitted")
159
a4b75251 160 create_ganesha_pool(self.mgr)
b3b6e05e 161
a4b75251 162 self.create_empty_rados_obj(cluster_id)
b3b6e05e
TL
163
164 if cluster_id not in available_clusters(self.mgr):
1e59de90
TL
165 self._call_orch_apply_nfs(cluster_id, placement, virtual_ip, ingress_mode, port)
166 return
167 raise NonFatalError(f"{cluster_id} cluster already exists")
b3b6e05e 168 except Exception as e:
1e59de90
TL
169 log.exception(f"NFS Cluster {cluster_id} could not be created")
170 raise ErrorResponse.wrap(e)
b3b6e05e 171
1e59de90 172 def delete_nfs_cluster(self, cluster_id: str) -> None:
b3b6e05e
TL
173 try:
174 cluster_list = available_clusters(self.mgr)
175 if cluster_id in cluster_list:
176 self.mgr.export_mgr.delete_all_exports(cluster_id)
a4b75251 177 completion = self.mgr.remove_service('ingress.nfs.' + cluster_id)
b3b6e05e 178 orchestrator.raise_if_exception(completion)
a4b75251 179 completion = self.mgr.remove_service('nfs.' + cluster_id)
b3b6e05e 180 orchestrator.raise_if_exception(completion)
a4b75251 181 self.delete_config_obj(cluster_id)
1e59de90
TL
182 return
183 raise NonFatalError("Cluster does not exist")
b3b6e05e 184 except Exception as e:
1e59de90
TL
185 log.exception(f"Failed to delete NFS Cluster {cluster_id}")
186 raise ErrorResponse.wrap(e)
b3b6e05e 187
1e59de90 188 def list_nfs_cluster(self) -> List[str]:
b3b6e05e 189 try:
1e59de90 190 return available_clusters(self.mgr)
b3b6e05e 191 except Exception as e:
1e59de90
TL
192 log.exception("Failed to list NFS Cluster")
193 raise ErrorResponse.wrap(e)
b3b6e05e
TL
194
195 def _show_nfs_cluster_info(self, cluster_id: str) -> Dict[str, Any]:
b3b6e05e 196 completion = self.mgr.list_daemons(daemon_type='nfs')
b3b6e05e 197 # Here completion.result is a list DaemonDescription objects
a4b75251
TL
198 clusters = orchestrator.raise_if_exception(completion)
199 backends: List[Dict[str, Union[Any]]] = []
200
201 for cluster in clusters:
202 if cluster_id == cluster.service_id():
203 assert cluster.hostname
b3b6e05e
TL
204 try:
205 if cluster.ip:
206 ip = cluster.ip
207 else:
208 c = self.mgr.get_hosts()
209 orchestrator.raise_if_exception(c)
a4b75251 210 hosts = [h for h in c.result or []
b3b6e05e
TL
211 if h.hostname == cluster.hostname]
212 if hosts:
213 ip = resolve_ip(hosts[0].addr)
214 else:
215 # sigh
216 ip = resolve_ip(cluster.hostname)
217 backends.append({
a4b75251
TL
218 "hostname": cluster.hostname,
219 "ip": ip,
220 "port": cluster.ports[0] if cluster.ports else None
221 })
b3b6e05e
TL
222 except orchestrator.OrchestratorError:
223 continue
224
225 r: Dict[str, Any] = {
226 'virtual_ip': None,
227 'backend': backends,
228 }
229 sc = self.mgr.describe_service(service_type='ingress')
a4b75251
TL
230 services = orchestrator.raise_if_exception(sc)
231 for i in services:
b3b6e05e
TL
232 spec = cast(IngressSpec, i.spec)
233 if spec.backend_service == f'nfs.{cluster_id}':
a4b75251 234 r['virtual_ip'] = i.virtual_ip.split('/')[0] if i.virtual_ip else None
b3b6e05e
TL
235 if i.ports:
236 r['port'] = i.ports[0]
237 if len(i.ports) > 1:
238 r['monitor_port'] = i.ports[1]
a4b75251 239 log.debug("Successfully fetched %s info: %s", cluster_id, r)
b3b6e05e
TL
240 return r
241
1e59de90 242 def show_nfs_cluster_info(self, cluster_id: Optional[str] = None) -> Dict[str, Any]:
b3b6e05e 243 try:
39ae355f
TL
244 if cluster_id and cluster_id not in available_clusters(self.mgr):
245 raise ClusterNotFound()
b3b6e05e
TL
246 info_res = {}
247 if cluster_id:
248 cluster_ls = [cluster_id]
249 else:
250 cluster_ls = available_clusters(self.mgr)
251
252 for cluster_id in cluster_ls:
253 res = self._show_nfs_cluster_info(cluster_id)
254 if res:
255 info_res[cluster_id] = res
1e59de90 256 return info_res
b3b6e05e 257 except Exception as e:
1e59de90
TL
258 log.exception("Failed to show info for cluster")
259 raise ErrorResponse.wrap(e)
b3b6e05e 260
1e59de90 261 def get_nfs_cluster_config(self, cluster_id: str) -> str:
a4b75251
TL
262 try:
263 if cluster_id in available_clusters(self.mgr):
1d09f67e
TL
264 rados_obj = self._rados(cluster_id)
265 conf = rados_obj.read_obj(user_conf_obj_name(cluster_id))
1e59de90 266 return conf or ""
a4b75251
TL
267 raise ClusterNotFound()
268 except Exception as e:
1e59de90
TL
269 log.exception(f"Fetching NFS-Ganesha Config failed for {cluster_id}")
270 raise ErrorResponse.wrap(e)
a4b75251 271
1e59de90 272 def set_nfs_cluster_config(self, cluster_id: str, nfs_config: str) -> None:
b3b6e05e 273 try:
b3b6e05e 274 if cluster_id in available_clusters(self.mgr):
1d09f67e 275 rados_obj = self._rados(cluster_id)
b3b6e05e 276 if rados_obj.check_user_config():
1e59de90 277 raise NonFatalError("NFS-Ganesha User Config already exists")
1d09f67e
TL
278 rados_obj.write_obj(nfs_config, user_conf_obj_name(cluster_id),
279 conf_obj_name(cluster_id))
a4b75251 280 log.debug("Successfully saved %s's user config: \n %s", cluster_id, nfs_config)
b3b6e05e 281 restart_nfs_service(self.mgr, cluster_id)
1e59de90 282 return
b3b6e05e
TL
283 raise ClusterNotFound()
284 except NotImplementedError:
1e59de90 285 raise ManualRestartRequired("NFS-Ganesha Config Added Successfully")
b3b6e05e 286 except Exception as e:
1e59de90
TL
287 log.exception(f"Setting NFS-Ganesha Config failed for {cluster_id}")
288 raise ErrorResponse.wrap(e)
b3b6e05e 289
1e59de90 290 def reset_nfs_cluster_config(self, cluster_id: str) -> None:
b3b6e05e
TL
291 try:
292 if cluster_id in available_clusters(self.mgr):
1d09f67e 293 rados_obj = self._rados(cluster_id)
b3b6e05e 294 if not rados_obj.check_user_config():
1e59de90 295 raise NonFatalError("NFS-Ganesha User Config does not exist")
1d09f67e
TL
296 rados_obj.remove_obj(user_conf_obj_name(cluster_id),
297 conf_obj_name(cluster_id))
b3b6e05e 298 restart_nfs_service(self.mgr, cluster_id)
1e59de90 299 return
b3b6e05e
TL
300 raise ClusterNotFound()
301 except NotImplementedError:
1e59de90 302 raise ManualRestartRequired("NFS-Ganesha Config Removed Successfully")
b3b6e05e 303 except Exception as e:
1e59de90
TL
304 log.exception(f"Resetting NFS-Ganesha Config failed for {cluster_id}")
305 raise ErrorResponse.wrap(e)
1d09f67e
TL
306
307 def _rados(self, cluster_id: str) -> NFSRados:
308 """Return a new NFSRados object for the given cluster id."""
309 return NFSRados(self.mgr.rados, cluster_id)