1 local g = import 'grafonnet/grafana.libsonnet';
2 local u = import 'utils.libsonnet';
7 local PoolOverviewSingleStatPanel(format,
17 u.addSingleStatSchema(['#299c46', 'rgba(237, 129, 40, 0.89)', '#d44a3a'],
28 .addTarget(u.addTargetSchema(expr, 1, targetFormat, '')) + { gridPos: { x: x, y: y, w: w, h: h } };
30 local PoolOverviewStyle(alias,
40 'rgba(245, 54, 54, 0.9)',
41 'rgba(237, 129, 40, 0.89)',
42 'rgba(50, 172, 45, 0.97)',
44 'YYYY-MM-DD HH:mm:ss',
53 local PoolOverviewGraphPanel(title,
64 u.graphPanelSchema({},
77 [u.addTargetSchema(expr,
81 ) + { gridPos: { x: x, y: y, w: w, h: h } };
84 'Ceph Pools Overview',
92 { refresh_intervals: ['5s', '10s', '15s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'], time_options: ['5m', '15m', '1h', '6h', '12h', '24h', '2d', '7d', '30d'] }
95 u.addAnnotationSchema(
100 'rgba(0, 211, 255, 1)',
101 'Annotations & Alerts',
106 g.template.datasource('datasource',
112 g.template.custom(label='TopK',
118 PoolOverviewSingleStatPanel(
123 'count(ceph_pool_metadata)',
130 PoolOverviewSingleStatPanel(
132 'Pools with Compression',
133 'Count of the pools that have compression enabled',
135 'count(ceph_pool_metadata{compression_mode!="none"})',
142 PoolOverviewSingleStatPanel(
144 'Total Raw Capacity',
145 'Total raw capacity available to the cluster',
147 'sum(ceph_osd_stat_bytes)',
154 PoolOverviewSingleStatPanel(
156 'Raw Capacity Consumed',
157 'Total raw capacity consumed by user data and associated overheads (metadata + redundancy)',
159 'sum(ceph_pool_bytes_used)',
166 PoolOverviewSingleStatPanel(
169 'Total of client data stored in the cluster',
171 'sum(ceph_pool_stored)',
178 PoolOverviewSingleStatPanel(
180 'Compression Savings',
181 'A compression saving is determined as the data eligible to be compressed minus the capacity used to store the data after compression',
183 'sum(ceph_pool_compress_under_bytes - ceph_pool_compress_bytes_used)',
190 PoolOverviewSingleStatPanel(
192 'Compression Eligibility',
193 'Indicates how suitable the data is within the pools that are/have been enabled for compression - averaged across all pools holding compressed data\n',
195 '(sum(ceph_pool_compress_under_bytes > 0) / sum(ceph_pool_stored_raw and ceph_pool_compress_under_bytes > 0)) * 100',
202 PoolOverviewSingleStatPanel(
204 'Compression Factor',
205 'This factor describes the average ratio of data eligible to be compressed divided by the data actually stored. It does not account for data written that was ineligible for compression (too small, or compression yield too low)',
207 'sum(ceph_pool_compress_under_bytes > 0) / sum(ceph_pool_compress_bytes_used > 0)',
217 { col: 5, desc: true },
219 PoolOverviewStyle('', 'Time', 'hidden', 'short', null, [], []),
220 PoolOverviewStyle('', 'instance', 'hidden', 'short', null, [], []),
221 PoolOverviewStyle('', 'job', 'hidden', 'short', null, [], []),
222 PoolOverviewStyle('Pool Name', 'name', 'string', 'short', null, [], []),
223 PoolOverviewStyle('Pool ID', 'pool_id', 'hidden', 'none', null, [], []),
224 PoolOverviewStyle('Compression Factor', 'Value #A', 'number', 'none', null, [], []),
225 PoolOverviewStyle('% Used', 'Value #D', 'number', 'percentunit', 'value', ['70', '85'], []),
226 PoolOverviewStyle('Usable Free', 'Value #B', 'number', 'bytes', null, [], []),
227 PoolOverviewStyle('Compression Eligibility', 'Value #C', 'number', 'percent', null, [], []),
228 PoolOverviewStyle('Compression Savings', 'Value #E', 'number', 'bytes', null, [], []),
229 PoolOverviewStyle('Growth (5d)', 'Value #F', 'number', 'bytes', 'value', ['0', '0'], []),
230 PoolOverviewStyle('IOPS', 'Value #G', 'number', 'none', null, [], []),
231 PoolOverviewStyle('Bandwidth', 'Value #H', 'number', 'Bps', null, [], []),
232 PoolOverviewStyle('', '__name__', 'hidden', 'short', null, [], []),
233 PoolOverviewStyle('', 'type', 'hidden', 'short', null, [], []),
234 PoolOverviewStyle('', 'compression_mode', 'hidden', 'short', null, [], []),
235 PoolOverviewStyle('Type', 'description', 'string', 'short', null, [], []),
236 PoolOverviewStyle('Stored', 'Value #J', 'number', 'bytes', null, [], []),
237 PoolOverviewStyle('', 'Value #I', 'hidden', 'short', null, [], []),
238 PoolOverviewStyle('Compression', 'Value #K', 'string', 'short', null, [], [{ text: 'ON', value: '1' }]),
246 '(ceph_pool_compress_under_bytes / ceph_pool_compress_bytes_used > 0) and on(pool_id) (((ceph_pool_compress_under_bytes > 0) / ceph_pool_stored_raw) * 100 > 0.5)',
252 'ceph_pool_max_avail * on(pool_id) group_left(name) ceph_pool_metadata',
258 '((ceph_pool_compress_under_bytes > 0) / ceph_pool_stored_raw) * 100',
264 '(ceph_pool_percent_used * on(pool_id) group_left(name) ceph_pool_metadata)',
270 '(ceph_pool_compress_under_bytes - ceph_pool_compress_bytes_used > 0)',
276 'delta(ceph_pool_stored[5d])', 1, 'table', 'F'
279 'rate(ceph_pool_rd[30s]) + rate(ceph_pool_wr[30s])',
285 'rate(ceph_pool_rd_bytes[30s]) + rate(ceph_pool_wr_bytes[30s])',
291 'ceph_pool_metadata', 1, 'table', 'I'
294 'ceph_pool_stored * on(pool_id) group_left ceph_pool_metadata',
300 'ceph_pool_metadata{compression_mode!="none"}', 1, 'table', 'K'
302 u.addTargetSchema('', '', '', 'L'),
304 ) + { gridPos: { x: 0, y: 3, w: 24, h: 6 } },
305 PoolOverviewGraphPanel(
306 'Top $topk Client IOPS by Pool',
307 'This chart shows the sum of read and write IOPS from all clients by pool',
310 'topk($topk,round((rate(ceph_pool_rd[30s]) + rate(ceph_pool_wr[30s])),1) * on(pool_id) group_left(instance,name) ceph_pool_metadata) ',
320 'topk($topk,rate(ceph_pool_wr[30s]) + on(pool_id) group_left(instance,name) ceph_pool_metadata) ',
326 PoolOverviewGraphPanel(
327 'Top $topk Client Bandwidth by Pool',
328 'The chart shows the sum of read and write bytes from all clients, by pool',
331 'topk($topk,(rate(ceph_pool_rd_bytes[30s]) + rate(ceph_pool_wr_bytes[30s])) * on(pool_id) group_left(instance,name) ceph_pool_metadata)',
339 PoolOverviewGraphPanel(
340 'Pool Capacity Usage (RAW)',
341 'Historical view of capacity usage, to help identify growth and trends in pool consumption',
344 'ceph_pool_bytes_used * on(pool_id) group_right ceph_pool_metadata',
354 local PoolDetailSingleStatPanel(format,
369 u.addSingleStatSchema(['#299c46', 'rgba(237, 129, 40, 0.89)', '#d44a3a'],
380 .addTarget(u.addTargetSchema(expr, 1, targetFormat, '')) + { gridPos: { x: x, y: y, w: w, h: h } };
382 local PoolDetailGraphPanel(alias,
394 u.graphPanelSchema(alias,
407 [u.addTargetSchema(expr, 1, 'time_series', legendFormat)]
408 ) + { gridPos: { x: x, y: y, w: w, h: h } };
420 refresh_intervals: ['5s', '10s', '15s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'],
421 time_options: ['5m', '15m', '1h', '6h', '12h', '24h', '2d', '7d', '30d'],
425 type='grafana', id='grafana', name='Grafana', version='5.3.2'
428 type='panel', id='graph', name='Graph', version='5.0.0'
431 type='panel', id='singlestat', name='Singlestat', version='5.0.0'
434 u.addAnnotationSchema(
439 'rgba(0, 211, 255, 1)',
440 'Annotations & Alerts',
445 g.template.datasource('datasource',
447 'Prometheus admin.virt1.home.fajerski.name:9090',
451 u.addTemplateSchema('pool_name',
453 'label_values(ceph_pool_metadata,name)',
461 PoolDetailSingleStatPanel(
471 '(ceph_pool_stored / (ceph_pool_stored + ceph_pool_max_avail)) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}',
478 PoolDetailSingleStatPanel(
481 'Time till pool is full assuming the average fill rate of the last 6 hours',
488 '(ceph_pool_max_avail / deriv(ceph_pool_stored[6h])) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"} > 0',
495 PoolDetailGraphPanel(
499 write_op_per_sec: '#E5AC0E',
501 '$pool_name Object Ingress/Egress',
504 'Objects out(-) / in(+) ',
505 'deriv(ceph_pool_objects[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}',
507 'Objects per second',
513 PoolDetailGraphPanel(
515 read_op_per_sec: '#3F6833',
516 write_op_per_sec: '#E5AC0E',
517 }, '$pool_name Client IOPS', '', 'iops', 'Read (-) / Write (+)', 'irate(ceph_pool_rd[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', 'time_series', 'reads', 0, 7, 12, 7
519 .addSeriesOverride({ alias: 'reads', transform: 'negative-Y' })
522 'irate(ceph_pool_wr[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', 1, 'time_series', 'writes'
525 PoolDetailGraphPanel(
527 read_op_per_sec: '#3F6833',
528 write_op_per_sec: '#E5AC0E',
530 '$pool_name Client Throughput',
533 'Read (-) / Write (+)',
534 'irate(ceph_pool_rd_bytes[1m]) + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}',
542 .addSeriesOverride({ alias: 'reads', transform: 'negative-Y' })
545 'irate(ceph_pool_wr_bytes[1m]) + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}',
551 PoolDetailGraphPanel(
553 read_op_per_sec: '#3F6833',
554 write_op_per_sec: '#E5AC0E',
556 '$pool_name Objects',
560 'ceph_pool_objects * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}',