]>
git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/dashboard/module.py
3 Demonstrate writing a Ceph web interface inside a mgr module.
6 # We must share a global reference to this instance, because it is the
7 # gatekeeper to all accesses to data from the C++ side (e.g. the REST API
8 # request handlers need to see it)
9 from collections
import defaultdict
12 _global_instance
= {'plugin': None}
13 def global_instance():
14 assert _global_instance
['plugin'] is not None
15 return _global_instance
['plugin']
29 from mgr_module
import MgrModule
, CommandResult
31 from types
import OsdMap
, NotFound
, Config
, FsMap
, MonMap
, \
32 PgSummary
, Health
, MonStatus
35 from rbd_ls
import RbdLs
, RbdPoolLs
36 from cephfs_clients
import CephFSClients
39 log
= logging
.getLogger("dashboard")
42 # How many cluster log lines shall we hold onto in our
43 # python module for the convenience of the GUI?
46 # cherrypy likes to sys.exit on error. don't let it take us down too!
50 os
._exit
= os_exit_noop
53 def recurse_refs(root
, path
):
54 if isinstance(root
, dict):
55 for k
, v
in root
.items():
56 recurse_refs(v
, path
+ "->%s" % k
)
57 elif isinstance(root
, list):
58 for n
, i
in enumerate(root
):
59 recurse_refs(i
, path
+ "[%d]" % n
)
61 log
.info("%s %d (%s)" % (path
, sys
.getrefcount(root
), root
.__class
__))
64 class Module(MgrModule
):
65 def __init__(self
, *args
, **kwargs
):
66 super(Module
, self
).__init
__(*args
, **kwargs
)
67 _global_instance
['plugin'] = self
68 self
.log
.info("Constructing module {0}: instance {1}".format(
69 __name__
, _global_instance
))
71 self
.log_primed
= False
72 self
.log_buffer
= collections
.deque(maxlen
=LOG_BUFFER_SIZE
)
73 self
.audit_buffer
= collections
.deque(maxlen
=LOG_BUFFER_SIZE
)
75 # Keep a librados instance for those that need it.
78 # Stateful instances of RbdLs, hold cached results. Key to dict
82 # Stateful instance of RbdPoolLs, hold cached list of RBD
84 self
.rbd_pool_ls
= RbdPoolLs(self
)
86 # Stateful instances of CephFSClients, hold cached results. Key to
88 self
.cephfs_clients
= {}
90 # A short history of pool df stats
91 self
.pool_stats
= defaultdict(lambda: defaultdict(
92 lambda: collections
.deque(maxlen
=10)))
97 A librados instance to be shared by any classes within
98 this mgr module that want one.
103 from mgr_module
import ceph_state
104 ctx_capsule
= ceph_state
.get_context()
105 self
._rados
= rados
.Rados(context
=ctx_capsule
)
106 self
._rados
.connect()
110 def update_pool_stats(self
):
111 df
= global_instance().get("df")
112 pool_stats
= dict([(p
['id'], p
['stats']) for p
in df
['pools']])
114 for pool_id
, stats
in pool_stats
.items():
115 for stat_name
, stat_val
in stats
.items():
116 self
.pool_stats
[pool_id
][stat_name
].appendleft((now
, stat_val
))
118 def notify(self
, notify_type
, notify_val
):
119 if notify_type
== "clog":
120 # Only store log messages once we've done our initial load,
121 # so that we don't end up duplicating.
123 if notify_val
['channel'] == "audit":
124 self
.audit_buffer
.appendleft(notify_val
)
126 self
.log_buffer
.appendleft(notify_val
)
127 elif notify_type
== "pg_summary":
128 self
.update_pool_stats()
132 def get_sync_object(self
, object_type
, path
=None):
133 if object_type
== OsdMap
:
134 data
= self
.get("osd_map")
136 assert data
is not None
138 data
['tree'] = self
.get("osd_map_tree")
139 data
['crush'] = self
.get("osd_map_crush")
140 data
['crush_map_text'] = self
.get("osd_map_crush_map_text")
141 data
['osd_metadata'] = self
.get("osd_metadata")
143 elif object_type
== Config
:
144 data
= self
.get("config")
146 elif object_type
== MonMap
:
147 data
= self
.get("mon_map")
149 elif object_type
== FsMap
:
150 data
= self
.get("fs_map")
152 elif object_type
== PgSummary
:
153 data
= self
.get("pg_summary")
154 self
.log
.debug("JSON: {0}".format(data
))
155 obj
= PgSummary(data
)
156 elif object_type
== Health
:
157 data
= self
.get("health")
158 obj
= Health(json
.loads(data
['json']))
159 elif object_type
== MonStatus
:
160 data
= self
.get("mon_status")
161 obj
= MonStatus(json
.loads(data
['json']))
163 raise NotImplementedError(object_type
)
165 # TODO: move 'path' handling up into C++ land so that we only
166 # Pythonize the part we're interested in
170 if isinstance(obj
, dict):
173 obj
= getattr(obj
, part
)
174 except (AttributeError, KeyError):
175 raise NotFound(object_type
, path
)
180 log
.info("Stopping server...")
181 cherrypy
.engine
.exit()
182 log
.info("Stopped server")
184 log
.info("Stopping librados...")
186 self
._rados
.shutdown()
187 log
.info("Stopped librados.")
189 def get_latest(self
, daemon_type
, daemon_name
, stat
):
190 data
= self
.get_counter(daemon_type
, daemon_name
, stat
)[stat
]
196 def get_rate(self
, daemon_type
, daemon_name
, stat
):
197 data
= self
.get_counter(daemon_type
, daemon_name
, stat
)[stat
]
199 if data
and len(data
) > 1:
200 return (data
[-1][1] - data
[-2][1]) / float(data
[-1][0] - data
[-2][0])
204 def format_dimless(self
, n
, width
, colored
=True):
206 Format a number without units, so as to fit into `width` characters, substituting
207 an appropriate unit suffix.
209 units
= [' ', 'k', 'M', 'G', 'T', 'P']
211 while len("%s" % (int(n
) // (1000**unit
))) > width
- 1:
215 truncated_float
= ("%f" % (n
/ (1000.0 ** unit
)))[0:width
- 1]
216 if truncated_float
[-1] == '.':
217 truncated_float
= " " + truncated_float
[0:-1]
219 truncated_float
= "%{wid}d".format(wid
=width
-1) % n
220 formatted
= "%s%s" % (truncated_float
, units
[unit
])
223 # TODO: html equivalent
225 # color = self.BLACK, False
227 # color = self.YELLOW, False
228 # return self.bold(self.colorize(formatted[0:-1], color[0], color[1])) \
229 # + self.bold(self.colorize(formatted[-1], self.BLACK, False))
234 def fs_status(self
, fs_id
):
235 mds_versions
= defaultdict(list)
237 fsmap
= self
.get("fs_map")
239 for fs
in fsmap
['filesystems']:
240 if fs
['id'] == fs_id
:
246 mdsmap
= filesystem
['mdsmap']
250 for rank
in mdsmap
["in"]:
251 up
= "mds_{0}".format(rank
) in mdsmap
["up"]
253 gid
= mdsmap
['up']["mds_{0}".format(rank
)]
254 info
= mdsmap
['info']['gid_{0}'.format(gid
)]
255 dns
= self
.get_latest("mds", info
['name'], "mds.inodes")
256 inos
= self
.get_latest("mds", info
['name'], "mds_mem.ino")
259 client_count
= self
.get_latest("mds", info
['name'],
260 "mds_sessions.session_count")
261 elif client_count
== 0:
262 # In case rank 0 was down, look at another rank's
263 # sessionmap to get an indication of clients.
264 client_count
= self
.get_latest("mds", info
['name'],
265 "mds_sessions.session_count")
267 laggy
= "laggy_since" in info
269 state
= info
['state'].split(":")[1]
273 # if state == "active" and not laggy:
274 # c_state = self.colorize(state, self.GREEN)
276 # c_state = self.colorize(state, self.YELLOW)
278 # Populate based on context of state, e.g. client
279 # ops for an active daemon, replay progress, reconnect
283 if state
== "active":
284 activity
= "Reqs: " + self
.format_dimless(
285 self
.get_rate("mds", info
['name'], "mds_server.handle_client_request"),
289 metadata
= self
.get_metadata('mds', info
['name'])
290 mds_versions
[metadata
['ceph_version']].append(info
['name'])
296 "activity": activity
,
314 # Find the standby replays
315 for gid_str
, daemon_info
in mdsmap
['info'].iteritems():
316 if daemon_info
['state'] != "up:standby-replay":
319 inos
= self
.get_latest("mds", daemon_info
['name'], "mds_mem.ino")
320 dns
= self
.get_latest("mds", daemon_info
['name'], "mds.inodes")
322 activity
= "Evts: " + self
.format_dimless(
323 self
.get_rate("mds", daemon_info
['name'], "mds_log.replay"),
329 "rank": "{0}-s".format(daemon_info
['rank']),
330 "state": "standby-replay",
331 "mds": daemon_info
['name'],
332 "activity": activity
,
339 pool_stats
= dict([(p
['id'], p
['stats']) for p
in df
['pools']])
340 osdmap
= self
.get("osd_map")
341 pools
= dict([(p
['pool'], p
) for p
in osdmap
['pools']])
342 metadata_pool_id
= mdsmap
['metadata_pool']
343 data_pool_ids
= mdsmap
['data_pools']
346 for pool_id
in [metadata_pool_id
] + data_pool_ids
:
347 pool_type
= "metadata" if pool_id
== metadata_pool_id
else "data"
348 stats
= pool_stats
[pool_id
]
350 "pool": pools
[pool_id
]['pool_name'],
352 "used": stats
['bytes_used'],
353 "avail": stats
['max_avail']
357 for standby
in fsmap
['standbys']:
358 metadata
= self
.get_metadata('mds', standby
['name'])
359 mds_versions
[metadata
['ceph_version']].append(standby
['name'])
361 standby_table
.append({
362 'name': standby
['name']
368 "name": mdsmap
['fs_name'],
369 "client_count": client_count
,
370 "clients_url": "/clients/{0}/".format(fs_id
),
374 "standbys": standby_table
,
375 "versions": mds_versions
379 current_dir
= os
.path
.dirname(os
.path
.abspath(__file__
))
381 jinja_loader
= jinja2
.FileSystemLoader(current_dir
)
382 env
= jinja2
.Environment(loader
=jinja_loader
)
384 result
= CommandResult("")
385 self
.send_command(result
, "mon", "", json
.dumps({
389 r
, outb
, outs
= result
.wait()
391 # Oh well. We won't let this stop us though.
392 self
.log
.error("Error fetching log history (r={0}, \"{1}\")".format(
396 lines
= json
.loads(outb
)
398 self
.log
.error("Error decoding log history")
401 if l
['channel'] == 'audit':
402 self
.audit_buffer
.appendleft(l
)
404 self
.log_buffer
.appendleft(l
)
406 self
.log_primed
= True
409 def _toplevel_data(self
):
411 Data consumed by the base.html template
413 status
, data
= global_instance().rbd_pool_ls
.get()
415 log
.warning("Failed to get RBD pool list")
421 "url": "/rbd/{0}/".format(name
)
424 ], key
=lambda k
: k
['name'])
426 fsmap
= global_instance().get_sync_object(FsMap
)
430 "name": f
['mdsmap']['fs_name'],
431 "url": "/filesystem/{0}/".format(f
['id'])
433 for f
in fsmap
.data
['filesystems']
437 'rbd_pools': rbd_pools
,
438 'health_status': self
._health
_data
()['status'],
439 'filesystems': filesystems
443 def filesystem(self
, fs_id
):
444 template
= env
.get_template("filesystem.html")
446 toplevel_data
= self
._toplevel
_data
()
449 "fs_status": global_instance().fs_status(int(fs_id
))
452 return template
.render(
453 ceph_version
=global_instance().version
,
454 toplevel_data
=json
.dumps(toplevel_data
, indent
=2),
455 content_data
=json
.dumps(content_data
, indent
=2)
459 @cherrypy.tools
.json_out()
460 def filesystem_data(self
, fs_id
):
461 return global_instance().fs_status(int(fs_id
))
463 def _osd(self
, osd_id
):
464 #global_instance().fs_status(int(fs_id))
467 osd_map
= global_instance().get("osd_map")
470 for o
in osd_map
['osds']:
471 if o
['osd'] == osd_id
:
475 assert osd
is not None # TODO 400
477 osd_spec
= "{0}".format(osd_id
)
479 osd_metadata
= global_instance().get_metadata(
482 result
= CommandResult("")
483 global_instance().send_command(result
, "osd", osd_spec
,
485 "prefix": "perf histogram dump",
488 r
, outb
, outs
= result
.wait()
490 histogram
= json
.loads(outb
)
494 "osd_metadata": osd_metadata
,
495 "osd_histogram": histogram
499 def osd_perf(self
, osd_id
):
500 template
= env
.get_template("osd_perf.html")
501 toplevel_data
= self
._toplevel
_data
()
503 return template
.render(
504 ceph_version
=global_instance().version
,
505 toplevel_data
=json
.dumps(toplevel_data
, indent
=2),
506 content_data
=json
.dumps(self
._osd
(osd_id
), indent
=2)
510 @cherrypy.tools
.json_out()
511 def osd_perf_data(self
, osd_id
):
512 return self
._osd
(osd_id
)
514 def _clients(self
, fs_id
):
515 cephfs_clients
= global_instance().cephfs_clients
.get(fs_id
, None)
516 if cephfs_clients
is None:
517 cephfs_clients
= CephFSClients(global_instance(), fs_id
)
518 global_instance().cephfs_clients
[fs_id
] = cephfs_clients
520 status
, clients
= cephfs_clients
.get()
521 #TODO do something sensible with status
523 # Decorate the metadata with some fields that will be
524 # indepdendent of whether it's a kernel or userspace
525 # client, so that the javascript doesn't have to grok that.
526 for client
in clients
:
527 if "ceph_version" in client
['client_metadata']:
528 client
['type'] = "userspace"
529 client
['version'] = client
['client_metadata']['ceph_version']
530 client
['hostname'] = client
['client_metadata']['hostname']
531 elif "kernel_version" in client
['client_metadata']:
532 client
['type'] = "kernel"
533 client
['version'] = client
['client_metadata']['kernel_version']
534 client
['hostname'] = client
['client_metadata']['hostname']
536 client
['type'] = "unknown"
537 client
['version'] = ""
538 client
['hostname'] = ""
543 def clients(self
, fscid_str
):
545 fscid
= int(fscid_str
)
547 raise cherrypy
.HTTPError(400,
548 "Invalid filesystem id {0}".format(fscid_str
))
551 fs_name
= FsMap(global_instance().get(
552 "fs_map")).get_filesystem(fscid
)['mdsmap']['fs_name']
554 log
.warning("Missing FSCID, dumping fsmap:\n{0}".format(
555 json
.dumps(global_instance().get("fs_map"), indent
=2)
557 raise cherrypy
.HTTPError(404,
558 "No filesystem with id {0}".format(fscid
))
560 clients
= self
._clients
(fscid
)
561 global_instance().log
.debug(json
.dumps(clients
, indent
=2))
566 "fs_url": "/filesystem/" + fscid_str
+ "/"
569 template
= env
.get_template("clients.html")
570 return template
.render(
571 ceph_version
=global_instance().version
,
572 toplevel_data
=json
.dumps(self
._toplevel
_data
(), indent
=2),
573 content_data
=json
.dumps(content_data
, indent
=2)
577 @cherrypy.tools
.json_out()
578 def clients_data(self
, fs_id
):
579 return self
._clients
(int(fs_id
))
581 def _rbd(self
, pool_name
):
582 rbd_ls
= global_instance().rbd_ls
.get(pool_name
, None)
584 rbd_ls
= RbdLs(global_instance(), pool_name
)
585 global_instance().rbd_ls
[pool_name
] = rbd_ls
587 status
, value
= rbd_ls
.get()
591 wait
= interval
- rbd_ls
.latency
596 threading
.Thread(target
=wait_and_load
).start()
598 assert status
!= RbdLs
.VALUE_NONE
# FIXME bubble status up to UI
602 def rbd(self
, pool_name
):
603 template
= env
.get_template("rbd.html")
605 toplevel_data
= self
._toplevel
_data
()
607 images
= self
._rbd
(pool_name
)
610 "pool_name": pool_name
613 return template
.render(
614 ceph_version
=global_instance().version
,
615 toplevel_data
=json
.dumps(toplevel_data
, indent
=2),
616 content_data
=json
.dumps(content_data
, indent
=2)
620 @cherrypy.tools
.json_out()
621 def rbd_data(self
, pool_name
):
622 return self
._rbd
(pool_name
)
626 template
= env
.get_template("health.html")
627 return template
.render(
628 ceph_version
=global_instance().version
,
629 toplevel_data
=json
.dumps(self
._toplevel
_data
(), indent
=2),
630 content_data
=json
.dumps(self
._health
(), indent
=2)
635 template
= env
.get_template("servers.html")
636 return template
.render(
637 ceph_version
=global_instance().version
,
638 toplevel_data
=json
.dumps(self
._toplevel
_data
(), indent
=2),
639 content_data
=json
.dumps(self
._servers
(), indent
=2)
644 'servers': global_instance().list_servers()
648 @cherrypy.tools
.json_out()
649 def servers_data(self
):
650 return self
._servers
()
652 def _health_data(self
):
653 health
= global_instance().get_sync_object(Health
).data
654 # Transform the `checks` dict into a list for the convenience
655 # of rendering from javascript.
657 for k
, v
in health
['checks'].iteritems():
661 checks
= sorted(checks
, cmp=lambda a
, b
: a
['severity'] > b
['severity'])
663 health
['checks'] = checks
668 # Fuse osdmap with pg_summary to get description of pools
669 # including their PG states
670 osd_map
= global_instance().get_sync_object(OsdMap
).data
671 pg_summary
= global_instance().get_sync_object(PgSummary
).data
674 if len(global_instance().pool_stats
) == 0:
675 global_instance().update_pool_stats()
677 for pool
in osd_map
['pools']:
678 pool
['pg_status'] = pg_summary
['by_pool'][pool
['pool'].__str
__()]
679 stats
= global_instance().pool_stats
[pool
['pool']]
682 def get_rate(series
):
684 return (float(series
[0][1]) - float(series
[1][1])) / (float(series
[0][0]) - float(series
[1][0]))
688 for stat_name
, stat_series
in stats
.items():
690 'latest': stat_series
[0][1],
691 'rate': get_rate(stat_series
),
692 'series': [i
for i
in stat_series
]
697 # Not needed, skip the effort of transmitting this
699 del osd_map
['pg_temp']
701 df
= global_instance().get("df")
702 df
['stats']['total_objects'] = sum(
703 [p
['stats']['objects'] for p
in df
['pools']])
706 "health": self
._health
_data
(),
707 "mon_status": global_instance().get_sync_object(
709 "fs_map": global_instance().get_sync_object(FsMap
).data
,
711 "clog": list(global_instance().log_buffer
),
712 "audit_log": list(global_instance().audit_buffer
),
714 "mgr_map": global_instance().get("mgr_map"),
719 @cherrypy.tools
.json_out()
720 def health_data(self
):
721 return self
._health
()
728 @cherrypy.tools
.json_out()
729 def toplevel_data(self
):
730 return self
._toplevel
_data
()
732 def _get_mds_names(self
, filesystem_id
=None):
735 fsmap
= global_instance().get("fs_map")
736 for fs
in fsmap
['filesystems']:
737 if filesystem_id
is not None and fs
['id'] != filesystem_id
:
739 names
.extend([info
['name'] for _
, info
in fs
['mdsmap']['info'].items()])
741 if filesystem_id
is None:
742 names
.extend(info
['name'] for info
in fsmap
['standbys'])
747 @cherrypy.tools
.json_out()
748 def mds_counters(self
, fs_id
):
750 Result format: map of daemon name to map of counter to list of datapoints
753 # Opinionated list of interesting performance counters for the GUI --
754 # if you need something else just add it. See how simple life is
755 # when you don't have to write general purpose APIs?
757 "mds_server.handle_client_request",
759 "mds_cache.num_strays",
761 "mds.exported_inodes",
763 "mds.imported_inodes",
770 mds_names
= self
._get
_mds
_names
(int(fs_id
))
772 for mds_name
in mds_names
:
773 result
[mds_name
] = {}
774 for counter
in counters
:
775 data
= global_instance().get_counter("mds", mds_name
, counter
)
777 result
[mds_name
][counter
] = data
[counter
]
779 result
[mds_name
][counter
] = []
783 server_addr
= self
.get_localized_config('server_addr', '::')
784 server_port
= self
.get_localized_config('server_port', '7000')
785 if server_addr
is None:
786 raise RuntimeError('no server_addr configured; try "ceph config-key put mgr/dashboard/server_addr <ip>"')
787 log
.info("server_addr: %s server_port: %s" % (server_addr
, server_port
))
788 cherrypy
.config
.update({
789 'server.socket_host': server_addr
,
790 'server.socket_port': int(server_port
),
791 'engine.autoreload.on': False
794 static_dir
= os
.path
.join(current_dir
, 'static')
797 "tools.staticdir.on": True,
798 'tools.staticdir.dir': static_dir
801 log
.info("Serving static from {0}".format(static_dir
))
802 cherrypy
.tree
.mount(Root(), "/", conf
)
804 log
.info("Starting engine...")
805 cherrypy
.engine
.start()
806 log
.info("Waiting for engine...")
807 cherrypy
.engine
.block()
808 log
.info("Engine done.")