]> git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/dashboard/module.py
362faddad11df865fbc1660373f1b90bbb3ccd29
[ceph.git] / ceph / src / pybind / mgr / dashboard / module.py
1
2 """
3 Demonstrate writing a Ceph web interface inside a mgr module.
4 """
5
6 # We must share a global reference to this instance, because it is the
7 # gatekeeper to all accesses to data from the C++ side (e.g. the REST API
8 # request handlers need to see it)
9 from collections import defaultdict
10 import collections
11
12 _global_instance = {'plugin': None}
13 def global_instance():
14 assert _global_instance['plugin'] is not None
15 return _global_instance['plugin']
16
17
18 import os
19 import logging
20 import logging.config
21 import json
22 import sys
23 import time
24 import threading
25
26 import cherrypy
27 import jinja2
28
29 from mgr_module import MgrModule, CommandResult
30
31 from types import OsdMap, NotFound, Config, FsMap, MonMap, \
32 PgSummary, Health, MonStatus
33
34 import rados
35 from rbd_ls import RbdLs
36 from cephfs_clients import CephFSClients
37
38
39 log = logging.getLogger("dashboard")
40
41
42 # How many cluster log lines shall we hold onto in our
43 # python module for the convenience of the GUI?
44 LOG_BUFFER_SIZE = 30
45
46 # cherrypy likes to sys.exit on error. don't let it take us down too!
47 def os_exit_noop():
48 pass
49
50 os._exit = os_exit_noop
51
52
53 def recurse_refs(root, path):
54 if isinstance(root, dict):
55 for k, v in root.items():
56 recurse_refs(v, path + "->%s" % k)
57 elif isinstance(root, list):
58 for n, i in enumerate(root):
59 recurse_refs(i, path + "[%d]" % n)
60
61 log.info("%s %d (%s)" % (path, sys.getrefcount(root), root.__class__))
62
63
64 class Module(MgrModule):
65 def __init__(self, *args, **kwargs):
66 super(Module, self).__init__(*args, **kwargs)
67 _global_instance['plugin'] = self
68 self.log.info("Constructing module {0}: instance {1}".format(
69 __name__, _global_instance))
70
71 self.log_primed = False
72 self.log_buffer = collections.deque(maxlen=LOG_BUFFER_SIZE)
73 self.audit_buffer = collections.deque(maxlen=LOG_BUFFER_SIZE)
74
75 # Keep a librados instance for those that need it.
76 self._rados = None
77
78 # Stateful instances of RbdLs, hold cached results. Key to dict
79 # is pool name.
80 self.rbd_ls = {}
81
82 # Stateful instances of CephFSClients, hold cached results. Key to
83 # dict is FSCID
84 self.cephfs_clients = {}
85
86 # A short history of pool df stats
87 self.pool_stats = defaultdict(lambda: defaultdict(
88 lambda: collections.deque(maxlen=10)))
89
90 @property
91 def rados(self):
92 """
93 A librados instance to be shared by any classes within
94 this mgr module that want one.
95 """
96 if self._rados:
97 return self._rados
98
99 from mgr_module import ceph_state
100 ctx_capsule = ceph_state.get_context()
101 self._rados = rados.Rados(context=ctx_capsule)
102 self._rados.connect()
103
104 return self._rados
105
106 def get_localized_config(self, key):
107 r = self.get_config(self.get_mgr_id() + '/' + key)
108 if r is None:
109 r = self.get_config(key)
110 return r
111
112 def update_pool_stats(self):
113 df = global_instance().get("df")
114 pool_stats = dict([(p['id'], p['stats']) for p in df['pools']])
115 now = time.time()
116 for pool_id, stats in pool_stats.items():
117 for stat_name, stat_val in stats.items():
118 self.pool_stats[pool_id][stat_name].appendleft((now, stat_val))
119
120 def notify(self, notify_type, notify_val):
121 if notify_type == "clog":
122 # Only store log messages once we've done our initial load,
123 # so that we don't end up duplicating.
124 if self.log_primed:
125 if notify_val['channel'] == "audit":
126 self.audit_buffer.appendleft(notify_val)
127 else:
128 self.log_buffer.appendleft(notify_val)
129 elif notify_type == "pg_summary":
130 self.update_pool_stats()
131 else:
132 pass
133
134 def get_sync_object(self, object_type, path=None):
135 if object_type == OsdMap:
136 data = self.get("osd_map")
137
138 assert data is not None
139
140 data['tree'] = self.get("osd_map_tree")
141 data['crush'] = self.get("osd_map_crush")
142 data['crush_map_text'] = self.get("osd_map_crush_map_text")
143 data['osd_metadata'] = self.get("osd_metadata")
144 obj = OsdMap(data)
145 elif object_type == Config:
146 data = self.get("config")
147 obj = Config( data)
148 elif object_type == MonMap:
149 data = self.get("mon_map")
150 obj = MonMap(data)
151 elif object_type == FsMap:
152 data = self.get("fs_map")
153 obj = FsMap(data)
154 elif object_type == PgSummary:
155 data = self.get("pg_summary")
156 self.log.debug("JSON: {0}".format(data))
157 obj = PgSummary(data)
158 elif object_type == Health:
159 data = self.get("health")
160 obj = Health(json.loads(data['json']))
161 elif object_type == MonStatus:
162 data = self.get("mon_status")
163 obj = MonStatus(json.loads(data['json']))
164 else:
165 raise NotImplementedError(object_type)
166
167 # TODO: move 'path' handling up into C++ land so that we only
168 # Pythonize the part we're interested in
169 if path:
170 try:
171 for part in path:
172 if isinstance(obj, dict):
173 obj = obj[part]
174 else:
175 obj = getattr(obj, part)
176 except (AttributeError, KeyError):
177 raise NotFound(object_type, path)
178
179 return obj
180
181 def shutdown(self):
182 log.info("Stopping server...")
183 cherrypy.engine.exit()
184 log.info("Stopped server")
185
186 log.info("Stopping librados...")
187 if self._rados:
188 self._rados.shutdown()
189 log.info("Stopped librados.")
190
191 def get_latest(self, daemon_type, daemon_name, stat):
192 data = self.get_counter(daemon_type, daemon_name, stat)[stat]
193 if data:
194 return data[-1][1]
195 else:
196 return 0
197
198 def get_rate(self, daemon_type, daemon_name, stat):
199 data = self.get_counter(daemon_type, daemon_name, stat)[stat]
200
201 if data and len(data) > 1:
202 return (data[-1][1] - data[-2][1]) / float(data[-1][0] - data[-2][0])
203 else:
204 return 0
205
206 def format_dimless(self, n, width, colored=True):
207 """
208 Format a number without units, so as to fit into `width` characters, substituting
209 an appropriate unit suffix.
210 """
211 units = [' ', 'k', 'M', 'G', 'T', 'P']
212 unit = 0
213 while len("%s" % (int(n) // (1000**unit))) > width - 1:
214 unit += 1
215
216 if unit > 0:
217 truncated_float = ("%f" % (n / (1000.0 ** unit)))[0:width - 1]
218 if truncated_float[-1] == '.':
219 truncated_float = " " + truncated_float[0:-1]
220 else:
221 truncated_float = "%{wid}d".format(wid=width-1) % n
222 formatted = "%s%s" % (truncated_float, units[unit])
223
224 if colored:
225 # TODO: html equivalent
226 # if n == 0:
227 # color = self.BLACK, False
228 # else:
229 # color = self.YELLOW, False
230 # return self.bold(self.colorize(formatted[0:-1], color[0], color[1])) \
231 # + self.bold(self.colorize(formatted[-1], self.BLACK, False))
232 return formatted
233 else:
234 return formatted
235
236 def fs_status(self, fs_id):
237 mds_versions = defaultdict(list)
238
239 fsmap = self.get("fs_map")
240 filesystem = None
241 for fs in fsmap['filesystems']:
242 if fs['id'] == fs_id:
243 filesystem = fs
244 break
245
246 rank_table = []
247
248 mdsmap = filesystem['mdsmap']
249
250 client_count = 0
251
252 for rank in mdsmap["in"]:
253 up = "mds_{0}".format(rank) in mdsmap["up"]
254 if up:
255 gid = mdsmap['up']["mds_{0}".format(rank)]
256 info = mdsmap['info']['gid_{0}'.format(gid)]
257 dns = self.get_latest("mds", info['name'], "mds.inodes")
258 inos = self.get_latest("mds", info['name'], "mds_mem.ino")
259
260 if rank == 0:
261 client_count = self.get_latest("mds", info['name'],
262 "mds_sessions.session_count")
263 elif client_count == 0:
264 # In case rank 0 was down, look at another rank's
265 # sessionmap to get an indication of clients.
266 client_count = self.get_latest("mds", info['name'],
267 "mds_sessions.session_count")
268
269 laggy = "laggy_since" in info
270
271 state = info['state'].split(":")[1]
272 if laggy:
273 state += "(laggy)"
274
275 # if state == "active" and not laggy:
276 # c_state = self.colorize(state, self.GREEN)
277 # else:
278 # c_state = self.colorize(state, self.YELLOW)
279
280 # Populate based on context of state, e.g. client
281 # ops for an active daemon, replay progress, reconnect
282 # progress
283 activity = ""
284
285 if state == "active":
286 activity = "Reqs: " + self.format_dimless(
287 self.get_rate("mds", info['name'], "mds_server.handle_client_request"),
288 5
289 ) + "/s"
290
291 metadata = self.get_metadata('mds', info['name'])
292 mds_versions[metadata['ceph_version']].append(info['name'])
293 rank_table.append(
294 {
295 "rank": rank,
296 "state": state,
297 "mds": info['name'],
298 "activity": activity,
299 "dns": dns,
300 "inos": inos
301 }
302 )
303
304 else:
305 rank_table.append(
306 {
307 "rank": rank,
308 "state": "failed",
309 "mds": "",
310 "activity": "",
311 "dns": 0,
312 "inos": 0
313 }
314 )
315
316 # Find the standby replays
317 for gid_str, daemon_info in mdsmap['info'].iteritems():
318 if daemon_info['state'] != "up:standby-replay":
319 continue
320
321 inos = self.get_latest("mds", daemon_info['name'], "mds_mem.ino")
322 dns = self.get_latest("mds", daemon_info['name'], "mds.inodes")
323
324 activity = "Evts: " + self.format_dimless(
325 self.get_rate("mds", daemon_info['name'], "mds_log.replay"),
326 5
327 ) + "/s"
328
329 rank_table.append(
330 {
331 "rank": "{0}-s".format(daemon_info['rank']),
332 "state": "standby-replay",
333 "mds": daemon_info['name'],
334 "activity": activity,
335 "dns": dns,
336 "inos": inos
337 }
338 )
339
340 df = self.get("df")
341 pool_stats = dict([(p['id'], p['stats']) for p in df['pools']])
342 osdmap = self.get("osd_map")
343 pools = dict([(p['pool'], p) for p in osdmap['pools']])
344 metadata_pool_id = mdsmap['metadata_pool']
345 data_pool_ids = mdsmap['data_pools']
346
347 pools_table = []
348 for pool_id in [metadata_pool_id] + data_pool_ids:
349 pool_type = "metadata" if pool_id == metadata_pool_id else "data"
350 stats = pool_stats[pool_id]
351 pools_table.append({
352 "pool": pools[pool_id]['pool_name'],
353 "type": pool_type,
354 "used": stats['bytes_used'],
355 "avail": stats['max_avail']
356 })
357
358 standby_table = []
359 for standby in fsmap['standbys']:
360 metadata = self.get_metadata('mds', standby['name'])
361 mds_versions[metadata['ceph_version']].append(standby['name'])
362
363 standby_table.append({
364 'name': standby['name']
365 })
366
367 return {
368 "filesystem": {
369 "id": fs_id,
370 "name": mdsmap['fs_name'],
371 "client_count": client_count,
372 "clients_url": "/clients/{0}/".format(fs_id),
373 "ranks": rank_table,
374 "pools": pools_table
375 },
376 "standbys": standby_table,
377 "versions": mds_versions
378 }
379
380 def serve(self):
381 current_dir = os.path.dirname(os.path.abspath(__file__))
382
383 jinja_loader = jinja2.FileSystemLoader(current_dir)
384 env = jinja2.Environment(loader=jinja_loader)
385
386 result = CommandResult("")
387 self.send_command(result, "mon", "", json.dumps({
388 "prefix":"log last",
389 "format": "json"
390 }), "")
391 r, outb, outs = result.wait()
392 if r != 0:
393 # Oh well. We won't let this stop us though.
394 self.log.error("Error fetching log history (r={0}, \"{1}\")".format(
395 r, outs))
396 else:
397 try:
398 lines = json.loads(outb)
399 except ValueError:
400 self.log.error("Error decoding log history")
401 else:
402 for l in lines:
403 if l['channel'] == 'audit':
404 self.audit_buffer.appendleft(l)
405 else:
406 self.log_buffer.appendleft(l)
407
408 self.log_primed = True
409
410 class Root(object):
411 def _toplevel_data(self):
412 """
413 Data consumed by the base.html template
414 """
415 fsmap = global_instance().get_sync_object(FsMap)
416 filesystems = [
417 {
418 "id": f['id'],
419 "name": f['mdsmap']['fs_name'],
420 "url": "/filesystem/{0}/".format(f['id'])
421 }
422 for f in fsmap.data['filesystems']
423 ]
424
425 return {
426 'health': global_instance().get_sync_object(Health).data,
427 'filesystems': filesystems
428 }
429
430 @cherrypy.expose
431 def filesystem(self, fs_id):
432 template = env.get_template("filesystem.html")
433
434 toplevel_data = self._toplevel_data()
435
436 content_data = {
437 "fs_status": global_instance().fs_status(int(fs_id))
438 }
439
440 return template.render(
441 ceph_version=global_instance().version,
442 toplevel_data=json.dumps(toplevel_data, indent=2),
443 content_data=json.dumps(content_data, indent=2)
444 )
445
446 @cherrypy.expose
447 @cherrypy.tools.json_out()
448 def filesystem_data(self, fs_id):
449 return global_instance().fs_status(int(fs_id))
450
451 def _osd(self, osd_id):
452 #global_instance().fs_status(int(fs_id))
453 osd_id = int(osd_id)
454
455 osd_map = global_instance().get("osd_map")
456
457 osd = None
458 for o in osd_map['osds']:
459 if o['osd'] == osd_id:
460 osd = o
461 break
462
463 assert osd is not None # TODO 400
464
465 osd_spec = "{0}".format(osd_id)
466
467 osd_metadata = global_instance().get_metadata(
468 "osd", osd_spec)
469
470 result = CommandResult("")
471 global_instance().send_command(result, "osd", osd_spec,
472 json.dumps({
473 "prefix": "perf histogram dump",
474 }),
475 "")
476 r, outb, outs = result.wait()
477 assert r == 0
478 histogram = json.loads(outb)
479
480 return {
481 "osd": osd,
482 "osd_metadata": osd_metadata,
483 "osd_histogram": histogram
484 }
485
486 @cherrypy.expose
487 def osd_perf(self, osd_id):
488 template = env.get_template("osd_perf.html")
489 toplevel_data = self._toplevel_data()
490
491 return template.render(
492 ceph_version=global_instance().version,
493 toplevel_data=json.dumps(toplevel_data, indent=2),
494 content_data=json.dumps(self._osd(osd_id), indent=2)
495 )
496
497 @cherrypy.expose
498 @cherrypy.tools.json_out()
499 def osd_perf_data(self, osd_id):
500 return self._osd(osd_id)
501
502 def _clients(self, fs_id):
503 cephfs_clients = global_instance().cephfs_clients.get(fs_id, None)
504 if cephfs_clients is None:
505 cephfs_clients = CephFSClients(global_instance(), fs_id)
506 global_instance().cephfs_clients[fs_id] = cephfs_clients
507
508 status, clients = cephfs_clients.get()
509 #TODO do something sensible with status
510
511 # Decorate the metadata with some fields that will be
512 # indepdendent of whether it's a kernel or userspace
513 # client, so that the javascript doesn't have to grok that.
514 for client in clients:
515 if "ceph_version" in client['client_metadata']:
516 client['type'] = "userspace"
517 client['version'] = client['client_metadata']['ceph_version']
518 client['hostname'] = client['client_metadata']['hostname']
519 elif "kernel_version" in client['client_metadata']:
520 client['type'] = "kernel"
521 client['version'] = client['kernel_version']
522 client['hostname'] = client['client_metadata']['hostname']
523 else:
524 client['type'] = "unknown"
525 client['version'] = ""
526 client['hostname'] = ""
527
528 return clients
529
530 @cherrypy.expose
531 def clients(self, fs_id):
532 template = env.get_template("clients.html")
533
534 toplevel_data = self._toplevel_data()
535
536 clients = self._clients(int(fs_id))
537 global_instance().log.debug(json.dumps(clients, indent=2))
538 content_data = {
539 "clients": clients,
540 "fscid": fs_id
541 }
542
543 return template.render(
544 ceph_version=global_instance().version,
545 toplevel_data=json.dumps(toplevel_data, indent=2),
546 content_data=json.dumps(content_data, indent=2)
547 )
548
549 @cherrypy.expose
550 @cherrypy.tools.json_out()
551 def clients_data(self, fs_id):
552 return self._clients(int(fs_id))
553
554 def _rbd(self, pool_name):
555 rbd_ls = global_instance().rbd_ls.get(pool_name, None)
556 if rbd_ls is None:
557 rbd_ls = RbdLs(global_instance(), pool_name)
558 global_instance().rbd_ls[pool_name] = rbd_ls
559
560 status, value = rbd_ls.get()
561
562 interval = 5
563
564 wait = interval - rbd_ls.latency
565 def wait_and_load():
566 time.sleep(wait)
567 rbd_ls.get()
568
569 threading.Thread(target=wait_and_load).start()
570
571 assert status != RbdLs.VALUE_NONE # FIXME bubble status up to UI
572 return value
573
574 @cherrypy.expose
575 def rbd(self, pool_name):
576 template = env.get_template("rbd.html")
577
578 toplevel_data = self._toplevel_data()
579
580 images = self._rbd(pool_name)
581 content_data = {
582 "images": images,
583 "pool_name": pool_name
584 }
585
586 return template.render(
587 ceph_version=global_instance().version,
588 toplevel_data=json.dumps(toplevel_data, indent=2),
589 content_data=json.dumps(content_data, indent=2)
590 )
591
592 @cherrypy.expose
593 @cherrypy.tools.json_out()
594 def rbd_data(self, pool_name):
595 return self._rbd(pool_name)
596
597 @cherrypy.expose
598 def health(self):
599 template = env.get_template("health.html")
600 return template.render(
601 ceph_version=global_instance().version,
602 toplevel_data=json.dumps(self._toplevel_data(), indent=2),
603 content_data=json.dumps(self._health(), indent=2)
604 )
605
606 @cherrypy.expose
607 def servers(self):
608 template = env.get_template("servers.html")
609 return template.render(
610 ceph_version=global_instance().version,
611 toplevel_data=json.dumps(self._toplevel_data(), indent=2),
612 content_data=json.dumps(self._servers(), indent=2)
613 )
614
615 def _servers(self):
616 servers = global_instance().list_servers()
617 return {
618 'servers': global_instance().list_servers()
619 }
620
621 @cherrypy.expose
622 @cherrypy.tools.json_out()
623 def servers_data(self):
624 return self._servers()
625
626 def _health(self):
627 # Fuse osdmap with pg_summary to get description of pools
628 # including their PG states
629 osd_map = global_instance().get_sync_object(OsdMap).data
630 pg_summary = global_instance().get_sync_object(PgSummary).data
631 pools = []
632
633 if len(global_instance().pool_stats) == 0:
634 global_instance().update_pool_stats()
635
636 for pool in osd_map['pools']:
637 pool['pg_status'] = pg_summary['by_pool'][pool['pool'].__str__()]
638 stats = global_instance().pool_stats[pool['pool']]
639 s = {}
640
641 def get_rate(series):
642 if len(series) >= 2:
643 return (float(series[0][1]) - float(series[1][1])) / (float(series[0][0]) - float(series[1][0]))
644 else:
645 return 0
646
647 for stat_name, stat_series in stats.items():
648 s[stat_name] = {
649 'latest': stat_series[0][1],
650 'rate': get_rate(stat_series),
651 'series': [i for i in stat_series]
652 }
653 pool['stats'] = s
654 pools.append(pool)
655
656 # Not needed, skip the effort of transmitting this
657 # to UI
658 del osd_map['pg_temp']
659
660 return {
661 "health": global_instance().get_sync_object(Health).data,
662 "mon_status": global_instance().get_sync_object(
663 MonStatus).data,
664 "osd_map": osd_map,
665 "clog": list(global_instance().log_buffer),
666 "audit_log": list(global_instance().audit_buffer),
667 "pools": pools
668 }
669
670 @cherrypy.expose
671 @cherrypy.tools.json_out()
672 def health_data(self):
673 return self._health()
674
675 @cherrypy.expose
676 def index(self):
677 return self.health()
678
679 @cherrypy.expose
680 @cherrypy.tools.json_out()
681 def toplevel_data(self):
682 return self._toplevel_data()
683
684 def _get_mds_names(self, filesystem_id=None):
685 names = []
686
687 fsmap = global_instance().get("fs_map")
688 for fs in fsmap['filesystems']:
689 if filesystem_id is not None and fs['id'] != filesystem_id:
690 continue
691 names.extend([info['name'] for _, info in fs['mdsmap']['info'].items()])
692
693 if filesystem_id is None:
694 names.extend(info['name'] for info in fsmap['standbys'])
695
696 return names
697
698 @cherrypy.expose
699 @cherrypy.tools.json_out()
700 def mds_counters(self, fs_id):
701 """
702 Result format: map of daemon name to map of counter to list of datapoints
703 """
704
705 # Opinionated list of interesting performance counters for the GUI --
706 # if you need something else just add it. See how simple life is
707 # when you don't have to write general purpose APIs?
708 counters = [
709 "mds_server.handle_client_request",
710 "mds_log.ev",
711 "mds_cache.num_strays",
712 "mds.exported",
713 "mds.exported_inodes",
714 "mds.imported",
715 "mds.imported_inodes",
716 "mds.inodes",
717 "mds.caps",
718 "mds.subtrees"
719 ]
720
721 result = {}
722 mds_names = self._get_mds_names(int(fs_id))
723
724 for mds_name in mds_names:
725 result[mds_name] = {}
726 for counter in counters:
727 data = global_instance().get_counter("mds", mds_name, counter)
728 if data is not None:
729 result[mds_name][counter] = data[counter]
730 else:
731 result[mds_name][counter] = []
732
733 return dict(result)
734
735 server_addr = self.get_localized_config('server_addr')
736 server_port = self.get_localized_config('server_port') or '7000'
737 if server_addr is None:
738 raise RuntimeError('no server_addr configured; try "ceph config-key put mgr/dashboard/server_addr <ip>"')
739 log.info("server_addr: %s server_port: %s" % (server_addr, server_port))
740 cherrypy.config.update({
741 'server.socket_host': server_addr,
742 'server.socket_port': int(server_port),
743 'engine.autoreload.on': False
744 })
745
746 static_dir = os.path.join(current_dir, 'static')
747 conf = {
748 "/static": {
749 "tools.staticdir.on": True,
750 'tools.staticdir.dir': static_dir
751 }
752 }
753 log.info("Serving static from {0}".format(static_dir))
754 cherrypy.tree.mount(Root(), "/", conf)
755
756 log.info("Starting engine...")
757 cherrypy.engine.start()
758 log.info("Waiting for engine...")
759 cherrypy.engine.block()
760 log.info("Engine done.")