]>
git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/dashboard/module.py
3 Demonstrate writing a Ceph web interface inside a mgr module.
6 # We must share a global reference to this instance, because it is the
7 # gatekeeper to all accesses to data from the C++ side (e.g. the REST API
8 # request handlers need to see it)
9 from collections
import defaultdict
12 _global_instance
= {'plugin': None}
13 def global_instance():
14 assert _global_instance
['plugin'] is not None
15 return _global_instance
['plugin']
29 from mgr_module
import MgrModule
, CommandResult
31 from types
import OsdMap
, NotFound
, Config
, FsMap
, MonMap
, \
32 PgSummary
, Health
, MonStatus
35 from rbd_ls
import RbdLs
36 from cephfs_clients
import CephFSClients
39 log
= logging
.getLogger("dashboard")
42 # How many cluster log lines shall we hold onto in our
43 # python module for the convenience of the GUI?
46 # cherrypy likes to sys.exit on error. don't let it take us down too!
50 os
._exit
= os_exit_noop
53 def recurse_refs(root
, path
):
54 if isinstance(root
, dict):
55 for k
, v
in root
.items():
56 recurse_refs(v
, path
+ "->%s" % k
)
57 elif isinstance(root
, list):
58 for n
, i
in enumerate(root
):
59 recurse_refs(i
, path
+ "[%d]" % n
)
61 log
.info("%s %d (%s)" % (path
, sys
.getrefcount(root
), root
.__class
__))
64 class Module(MgrModule
):
65 def __init__(self
, *args
, **kwargs
):
66 super(Module
, self
).__init
__(*args
, **kwargs
)
67 _global_instance
['plugin'] = self
68 self
.log
.info("Constructing module {0}: instance {1}".format(
69 __name__
, _global_instance
))
71 self
.log_primed
= False
72 self
.log_buffer
= collections
.deque(maxlen
=LOG_BUFFER_SIZE
)
73 self
.audit_buffer
= collections
.deque(maxlen
=LOG_BUFFER_SIZE
)
75 # Keep a librados instance for those that need it.
78 # Stateful instances of RbdLs, hold cached results. Key to dict
82 # Stateful instances of CephFSClients, hold cached results. Key to
84 self
.cephfs_clients
= {}
86 # A short history of pool df stats
87 self
.pool_stats
= defaultdict(lambda: defaultdict(
88 lambda: collections
.deque(maxlen
=10)))
93 A librados instance to be shared by any classes within
94 this mgr module that want one.
99 from mgr_module
import ceph_state
100 ctx_capsule
= ceph_state
.get_context()
101 self
._rados
= rados
.Rados(context
=ctx_capsule
)
102 self
._rados
.connect()
106 def get_localized_config(self
, key
):
107 r
= self
.get_config(self
.get_mgr_id() + '/' + key
)
109 r
= self
.get_config(key
)
112 def update_pool_stats(self
):
113 df
= global_instance().get("df")
114 pool_stats
= dict([(p
['id'], p
['stats']) for p
in df
['pools']])
116 for pool_id
, stats
in pool_stats
.items():
117 for stat_name
, stat_val
in stats
.items():
118 self
.pool_stats
[pool_id
][stat_name
].appendleft((now
, stat_val
))
120 def notify(self
, notify_type
, notify_val
):
121 if notify_type
== "clog":
122 # Only store log messages once we've done our initial load,
123 # so that we don't end up duplicating.
125 if notify_val
['channel'] == "audit":
126 self
.audit_buffer
.appendleft(notify_val
)
128 self
.log_buffer
.appendleft(notify_val
)
129 elif notify_type
== "pg_summary":
130 self
.update_pool_stats()
134 def get_sync_object(self
, object_type
, path
=None):
135 if object_type
== OsdMap
:
136 data
= self
.get("osd_map")
138 assert data
is not None
140 data
['tree'] = self
.get("osd_map_tree")
141 data
['crush'] = self
.get("osd_map_crush")
142 data
['crush_map_text'] = self
.get("osd_map_crush_map_text")
143 data
['osd_metadata'] = self
.get("osd_metadata")
145 elif object_type
== Config
:
146 data
= self
.get("config")
148 elif object_type
== MonMap
:
149 data
= self
.get("mon_map")
151 elif object_type
== FsMap
:
152 data
= self
.get("fs_map")
154 elif object_type
== PgSummary
:
155 data
= self
.get("pg_summary")
156 self
.log
.debug("JSON: {0}".format(data
))
157 obj
= PgSummary(data
)
158 elif object_type
== Health
:
159 data
= self
.get("health")
160 obj
= Health(json
.loads(data
['json']))
161 elif object_type
== MonStatus
:
162 data
= self
.get("mon_status")
163 obj
= MonStatus(json
.loads(data
['json']))
165 raise NotImplementedError(object_type
)
167 # TODO: move 'path' handling up into C++ land so that we only
168 # Pythonize the part we're interested in
172 if isinstance(obj
, dict):
175 obj
= getattr(obj
, part
)
176 except (AttributeError, KeyError):
177 raise NotFound(object_type
, path
)
182 log
.info("Stopping server...")
183 cherrypy
.engine
.exit()
184 log
.info("Stopped server")
186 log
.info("Stopping librados...")
188 self
._rados
.shutdown()
189 log
.info("Stopped librados.")
191 def get_latest(self
, daemon_type
, daemon_name
, stat
):
192 data
= self
.get_counter(daemon_type
, daemon_name
, stat
)[stat
]
198 def get_rate(self
, daemon_type
, daemon_name
, stat
):
199 data
= self
.get_counter(daemon_type
, daemon_name
, stat
)[stat
]
201 if data
and len(data
) > 1:
202 return (data
[-1][1] - data
[-2][1]) / float(data
[-1][0] - data
[-2][0])
206 def format_dimless(self
, n
, width
, colored
=True):
208 Format a number without units, so as to fit into `width` characters, substituting
209 an appropriate unit suffix.
211 units
= [' ', 'k', 'M', 'G', 'T', 'P']
213 while len("%s" % (int(n
) // (1000**unit
))) > width
- 1:
217 truncated_float
= ("%f" % (n
/ (1000.0 ** unit
)))[0:width
- 1]
218 if truncated_float
[-1] == '.':
219 truncated_float
= " " + truncated_float
[0:-1]
221 truncated_float
= "%{wid}d".format(wid
=width
-1) % n
222 formatted
= "%s%s" % (truncated_float
, units
[unit
])
225 # TODO: html equivalent
227 # color = self.BLACK, False
229 # color = self.YELLOW, False
230 # return self.bold(self.colorize(formatted[0:-1], color[0], color[1])) \
231 # + self.bold(self.colorize(formatted[-1], self.BLACK, False))
236 def fs_status(self
, fs_id
):
237 mds_versions
= defaultdict(list)
239 fsmap
= self
.get("fs_map")
241 for fs
in fsmap
['filesystems']:
242 if fs
['id'] == fs_id
:
248 mdsmap
= filesystem
['mdsmap']
252 for rank
in mdsmap
["in"]:
253 up
= "mds_{0}".format(rank
) in mdsmap
["up"]
255 gid
= mdsmap
['up']["mds_{0}".format(rank
)]
256 info
= mdsmap
['info']['gid_{0}'.format(gid
)]
257 dns
= self
.get_latest("mds", info
['name'], "mds.inodes")
258 inos
= self
.get_latest("mds", info
['name'], "mds_mem.ino")
261 client_count
= self
.get_latest("mds", info
['name'],
262 "mds_sessions.session_count")
263 elif client_count
== 0:
264 # In case rank 0 was down, look at another rank's
265 # sessionmap to get an indication of clients.
266 client_count
= self
.get_latest("mds", info
['name'],
267 "mds_sessions.session_count")
269 laggy
= "laggy_since" in info
271 state
= info
['state'].split(":")[1]
275 # if state == "active" and not laggy:
276 # c_state = self.colorize(state, self.GREEN)
278 # c_state = self.colorize(state, self.YELLOW)
280 # Populate based on context of state, e.g. client
281 # ops for an active daemon, replay progress, reconnect
285 if state
== "active":
286 activity
= "Reqs: " + self
.format_dimless(
287 self
.get_rate("mds", info
['name'], "mds_server.handle_client_request"),
291 metadata
= self
.get_metadata('mds', info
['name'])
292 mds_versions
[metadata
['ceph_version']].append(info
['name'])
298 "activity": activity
,
316 # Find the standby replays
317 for gid_str
, daemon_info
in mdsmap
['info'].iteritems():
318 if daemon_info
['state'] != "up:standby-replay":
321 inos
= self
.get_latest("mds", daemon_info
['name'], "mds_mem.ino")
322 dns
= self
.get_latest("mds", daemon_info
['name'], "mds.inodes")
324 activity
= "Evts: " + self
.format_dimless(
325 self
.get_rate("mds", daemon_info
['name'], "mds_log.replay"),
331 "rank": "{0}-s".format(daemon_info
['rank']),
332 "state": "standby-replay",
333 "mds": daemon_info
['name'],
334 "activity": activity
,
341 pool_stats
= dict([(p
['id'], p
['stats']) for p
in df
['pools']])
342 osdmap
= self
.get("osd_map")
343 pools
= dict([(p
['pool'], p
) for p
in osdmap
['pools']])
344 metadata_pool_id
= mdsmap
['metadata_pool']
345 data_pool_ids
= mdsmap
['data_pools']
348 for pool_id
in [metadata_pool_id
] + data_pool_ids
:
349 pool_type
= "metadata" if pool_id
== metadata_pool_id
else "data"
350 stats
= pool_stats
[pool_id
]
352 "pool": pools
[pool_id
]['pool_name'],
354 "used": stats
['bytes_used'],
355 "avail": stats
['max_avail']
359 for standby
in fsmap
['standbys']:
360 metadata
= self
.get_metadata('mds', standby
['name'])
361 mds_versions
[metadata
['ceph_version']].append(standby
['name'])
363 standby_table
.append({
364 'name': standby
['name']
370 "name": mdsmap
['fs_name'],
371 "client_count": client_count
,
372 "clients_url": "/clients/{0}/".format(fs_id
),
376 "standbys": standby_table
,
377 "versions": mds_versions
381 current_dir
= os
.path
.dirname(os
.path
.abspath(__file__
))
383 jinja_loader
= jinja2
.FileSystemLoader(current_dir
)
384 env
= jinja2
.Environment(loader
=jinja_loader
)
386 result
= CommandResult("")
387 self
.send_command(result
, "mon", "", json
.dumps({
391 r
, outb
, outs
= result
.wait()
393 # Oh well. We won't let this stop us though.
394 self
.log
.error("Error fetching log history (r={0}, \"{1}\")".format(
398 lines
= json
.loads(outb
)
400 self
.log
.error("Error decoding log history")
403 if l
['channel'] == 'audit':
404 self
.audit_buffer
.appendleft(l
)
406 self
.log_buffer
.appendleft(l
)
408 self
.log_primed
= True
411 def _toplevel_data(self
):
413 Data consumed by the base.html template
415 fsmap
= global_instance().get_sync_object(FsMap
)
419 "name": f
['mdsmap']['fs_name'],
420 "url": "/filesystem/{0}/".format(f
['id'])
422 for f
in fsmap
.data
['filesystems']
426 'health': global_instance().get_sync_object(Health
).data
,
427 'filesystems': filesystems
431 def filesystem(self
, fs_id
):
432 template
= env
.get_template("filesystem.html")
434 toplevel_data
= self
._toplevel
_data
()
437 "fs_status": global_instance().fs_status(int(fs_id
))
440 return template
.render(
441 ceph_version
=global_instance().version
,
442 toplevel_data
=json
.dumps(toplevel_data
, indent
=2),
443 content_data
=json
.dumps(content_data
, indent
=2)
447 @cherrypy.tools
.json_out()
448 def filesystem_data(self
, fs_id
):
449 return global_instance().fs_status(int(fs_id
))
451 def _osd(self
, osd_id
):
452 #global_instance().fs_status(int(fs_id))
455 osd_map
= global_instance().get("osd_map")
458 for o
in osd_map
['osds']:
459 if o
['osd'] == osd_id
:
463 assert osd
is not None # TODO 400
465 osd_spec
= "{0}".format(osd_id
)
467 osd_metadata
= global_instance().get_metadata(
470 result
= CommandResult("")
471 global_instance().send_command(result
, "osd", osd_spec
,
473 "prefix": "perf histogram dump",
476 r
, outb
, outs
= result
.wait()
478 histogram
= json
.loads(outb
)
482 "osd_metadata": osd_metadata
,
483 "osd_histogram": histogram
487 def osd_perf(self
, osd_id
):
488 template
= env
.get_template("osd_perf.html")
489 toplevel_data
= self
._toplevel
_data
()
491 return template
.render(
492 ceph_version
=global_instance().version
,
493 toplevel_data
=json
.dumps(toplevel_data
, indent
=2),
494 content_data
=json
.dumps(self
._osd
(osd_id
), indent
=2)
498 @cherrypy.tools
.json_out()
499 def osd_perf_data(self
, osd_id
):
500 return self
._osd
(osd_id
)
502 def _clients(self
, fs_id
):
503 cephfs_clients
= global_instance().cephfs_clients
.get(fs_id
, None)
504 if cephfs_clients
is None:
505 cephfs_clients
= CephFSClients(global_instance(), fs_id
)
506 global_instance().cephfs_clients
[fs_id
] = cephfs_clients
508 status
, clients
= cephfs_clients
.get()
509 #TODO do something sensible with status
511 # Decorate the metadata with some fields that will be
512 # indepdendent of whether it's a kernel or userspace
513 # client, so that the javascript doesn't have to grok that.
514 for client
in clients
:
515 if "ceph_version" in client
['client_metadata']:
516 client
['type'] = "userspace"
517 client
['version'] = client
['client_metadata']['ceph_version']
518 client
['hostname'] = client
['client_metadata']['hostname']
519 elif "kernel_version" in client
['client_metadata']:
520 client
['type'] = "kernel"
521 client
['version'] = client
['kernel_version']
522 client
['hostname'] = client
['client_metadata']['hostname']
524 client
['type'] = "unknown"
525 client
['version'] = ""
526 client
['hostname'] = ""
531 def clients(self
, fs_id
):
532 template
= env
.get_template("clients.html")
534 toplevel_data
= self
._toplevel
_data
()
536 clients
= self
._clients
(int(fs_id
))
537 global_instance().log
.debug(json
.dumps(clients
, indent
=2))
543 return template
.render(
544 ceph_version
=global_instance().version
,
545 toplevel_data
=json
.dumps(toplevel_data
, indent
=2),
546 content_data
=json
.dumps(content_data
, indent
=2)
550 @cherrypy.tools
.json_out()
551 def clients_data(self
, fs_id
):
552 return self
._clients
(int(fs_id
))
554 def _rbd(self
, pool_name
):
555 rbd_ls
= global_instance().rbd_ls
.get(pool_name
, None)
557 rbd_ls
= RbdLs(global_instance(), pool_name
)
558 global_instance().rbd_ls
[pool_name
] = rbd_ls
560 status
, value
= rbd_ls
.get()
564 wait
= interval
- rbd_ls
.latency
569 threading
.Thread(target
=wait_and_load
).start()
571 assert status
!= RbdLs
.VALUE_NONE
# FIXME bubble status up to UI
575 def rbd(self
, pool_name
):
576 template
= env
.get_template("rbd.html")
578 toplevel_data
= self
._toplevel
_data
()
580 images
= self
._rbd
(pool_name
)
583 "pool_name": pool_name
586 return template
.render(
587 ceph_version
=global_instance().version
,
588 toplevel_data
=json
.dumps(toplevel_data
, indent
=2),
589 content_data
=json
.dumps(content_data
, indent
=2)
593 @cherrypy.tools
.json_out()
594 def rbd_data(self
, pool_name
):
595 return self
._rbd
(pool_name
)
599 template
= env
.get_template("health.html")
600 return template
.render(
601 ceph_version
=global_instance().version
,
602 toplevel_data
=json
.dumps(self
._toplevel
_data
(), indent
=2),
603 content_data
=json
.dumps(self
._health
(), indent
=2)
608 template
= env
.get_template("servers.html")
609 return template
.render(
610 ceph_version
=global_instance().version
,
611 toplevel_data
=json
.dumps(self
._toplevel
_data
(), indent
=2),
612 content_data
=json
.dumps(self
._servers
(), indent
=2)
616 servers
= global_instance().list_servers()
618 'servers': global_instance().list_servers()
622 @cherrypy.tools
.json_out()
623 def servers_data(self
):
624 return self
._servers
()
627 # Fuse osdmap with pg_summary to get description of pools
628 # including their PG states
629 osd_map
= global_instance().get_sync_object(OsdMap
).data
630 pg_summary
= global_instance().get_sync_object(PgSummary
).data
633 if len(global_instance().pool_stats
) == 0:
634 global_instance().update_pool_stats()
636 for pool
in osd_map
['pools']:
637 pool
['pg_status'] = pg_summary
['by_pool'][pool
['pool'].__str
__()]
638 stats
= global_instance().pool_stats
[pool
['pool']]
641 def get_rate(series
):
643 return (float(series
[0][1]) - float(series
[1][1])) / (float(series
[0][0]) - float(series
[1][0]))
647 for stat_name
, stat_series
in stats
.items():
649 'latest': stat_series
[0][1],
650 'rate': get_rate(stat_series
),
651 'series': [i
for i
in stat_series
]
656 # Not needed, skip the effort of transmitting this
658 del osd_map
['pg_temp']
661 "health": global_instance().get_sync_object(Health
).data
,
662 "mon_status": global_instance().get_sync_object(
665 "clog": list(global_instance().log_buffer
),
666 "audit_log": list(global_instance().audit_buffer
),
671 @cherrypy.tools
.json_out()
672 def health_data(self
):
673 return self
._health
()
680 @cherrypy.tools
.json_out()
681 def toplevel_data(self
):
682 return self
._toplevel
_data
()
684 def _get_mds_names(self
, filesystem_id
=None):
687 fsmap
= global_instance().get("fs_map")
688 for fs
in fsmap
['filesystems']:
689 if filesystem_id
is not None and fs
['id'] != filesystem_id
:
691 names
.extend([info
['name'] for _
, info
in fs
['mdsmap']['info'].items()])
693 if filesystem_id
is None:
694 names
.extend(info
['name'] for info
in fsmap
['standbys'])
699 @cherrypy.tools
.json_out()
700 def mds_counters(self
, fs_id
):
702 Result format: map of daemon name to map of counter to list of datapoints
705 # Opinionated list of interesting performance counters for the GUI --
706 # if you need something else just add it. See how simple life is
707 # when you don't have to write general purpose APIs?
709 "mds_server.handle_client_request",
711 "mds_cache.num_strays",
713 "mds.exported_inodes",
715 "mds.imported_inodes",
722 mds_names
= self
._get
_mds
_names
(int(fs_id
))
724 for mds_name
in mds_names
:
725 result
[mds_name
] = {}
726 for counter
in counters
:
727 data
= global_instance().get_counter("mds", mds_name
, counter
)
729 result
[mds_name
][counter
] = data
[counter
]
731 result
[mds_name
][counter
] = []
735 server_addr
= self
.get_localized_config('server_addr')
736 server_port
= self
.get_localized_config('server_port') or '7000'
737 if server_addr
is None:
738 raise RuntimeError('no server_addr configured; try "ceph config-key put mgr/dashboard/server_addr <ip>"')
739 log
.info("server_addr: %s server_port: %s" % (server_addr
, server_port
))
740 cherrypy
.config
.update({
741 'server.socket_host': server_addr
,
742 'server.socket_port': int(server_port
),
743 'engine.autoreload.on': False
746 static_dir
= os
.path
.join(current_dir
, 'static')
749 "tools.staticdir.on": True,
750 'tools.staticdir.dir': static_dir
753 log
.info("Serving static from {0}".format(static_dir
))
754 cherrypy
.tree
.mount(Root(), "/", conf
)
756 log
.info("Starting engine...")
757 cherrypy
.engine
.start()
758 log
.info("Waiting for engine...")
759 cherrypy
.engine
.block()
760 log
.info("Engine done.")