]> git.proxmox.com Git - ceph.git/blob - ceph/monitoring/ceph-mixin/dashboards/pool.libsonnet
import quincy beta 17.1.0
[ceph.git] / ceph / monitoring / ceph-mixin / dashboards / pool.libsonnet
1 local g = import 'grafonnet/grafana.libsonnet';
2 local u = import 'utils.libsonnet';
3
4 {
5 grafanaDashboards+:: {
6 'pool-overview.json':
7 local PoolOverviewSingleStatPanel(format,
8 title,
9 description,
10 valueName,
11 expr,
12 targetFormat,
13 x,
14 y,
15 w,
16 h) =
17 u.addSingleStatSchema(['#299c46', 'rgba(237, 129, 40, 0.89)', '#d44a3a'],
18 '$datasource',
19 format,
20 title,
21 description,
22 valueName,
23 false,
24 100,
25 false,
26 false,
27 '')
28 .addTarget(u.addTargetSchema(expr, 1, targetFormat, '')) + { gridPos: { x: x, y: y, w: w, h: h } };
29
30 local PoolOverviewStyle(alias,
31 pattern,
32 type,
33 unit,
34 colorMode,
35 thresholds,
36 valueMaps) =
37 u.addStyle(alias,
38 colorMode,
39 [
40 'rgba(245, 54, 54, 0.9)',
41 'rgba(237, 129, 40, 0.89)',
42 'rgba(50, 172, 45, 0.97)',
43 ],
44 'YYYY-MM-DD HH:mm:ss',
45 2,
46 1,
47 pattern,
48 thresholds,
49 type,
50 unit,
51 valueMaps);
52
53 local PoolOverviewGraphPanel(title,
54 description,
55 formatY1,
56 labelY1,
57 expr,
58 targetFormat,
59 legendFormat,
60 x,
61 y,
62 w,
63 h) =
64 u.graphPanelSchema({},
65 title,
66 description,
67 'null as zero',
68 false,
69 formatY1,
70 'short',
71 labelY1,
72 null,
73 0,
74 1,
75 '$datasource')
76 .addTargets(
77 [u.addTargetSchema(expr,
78 1,
79 'time_series',
80 legendFormat)]
81 ) + { gridPos: { x: x, y: y, w: w, h: h } };
82
83 u.dashboardSchema(
84 'Ceph Pools Overview',
85 '',
86 'z99hzWtmk',
87 'now-1h',
88 '15s',
89 22,
90 [],
91 '',
92 { refresh_intervals: ['5s', '10s', '15s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'], time_options: ['5m', '15m', '1h', '6h', '12h', '24h', '2d', '7d', '30d'] }
93 )
94 .addAnnotation(
95 u.addAnnotationSchema(
96 1,
97 '-- Grafana --',
98 true,
99 true,
100 'rgba(0, 211, 255, 1)',
101 'Annotations & Alerts',
102 'dashboard'
103 )
104 )
105 .addTemplate(
106 g.template.datasource('datasource',
107 'prometheus',
108 'Dashboard1',
109 label='Data Source')
110 )
111 .addTemplate(
112 g.template.custom(label='TopK',
113 name='topk',
114 current='15',
115 query='15')
116 )
117 .addPanels([
118 PoolOverviewSingleStatPanel(
119 'none',
120 'Pools',
121 '',
122 'avg',
123 'count(ceph_pool_metadata)',
124 'table',
125 0,
126 0,
127 3,
128 3
129 ),
130 PoolOverviewSingleStatPanel(
131 'none',
132 'Pools with Compression',
133 'Count of the pools that have compression enabled',
134 'current',
135 'count(ceph_pool_metadata{compression_mode!="none"})',
136 '',
137 3,
138 0,
139 3,
140 3
141 ),
142 PoolOverviewSingleStatPanel(
143 'bytes',
144 'Total Raw Capacity',
145 'Total raw capacity available to the cluster',
146 'current',
147 'sum(ceph_osd_stat_bytes)',
148 '',
149 6,
150 0,
151 3,
152 3
153 ),
154 PoolOverviewSingleStatPanel(
155 'bytes',
156 'Raw Capacity Consumed',
157 'Total raw capacity consumed by user data and associated overheads (metadata + redundancy)',
158 'current',
159 'sum(ceph_pool_bytes_used)',
160 '',
161 9,
162 0,
163 3,
164 3
165 ),
166 PoolOverviewSingleStatPanel(
167 'bytes',
168 'Logical Stored ',
169 'Total of client data stored in the cluster',
170 'current',
171 'sum(ceph_pool_stored)',
172 '',
173 12,
174 0,
175 3,
176 3
177 ),
178 PoolOverviewSingleStatPanel(
179 'bytes',
180 'Compression Savings',
181 'A compression saving is determined as the data eligible to be compressed minus the capacity used to store the data after compression',
182 'current',
183 'sum(ceph_pool_compress_under_bytes - ceph_pool_compress_bytes_used)',
184 '',
185 15,
186 0,
187 3,
188 3
189 ),
190 PoolOverviewSingleStatPanel(
191 'percent',
192 'Compression Eligibility',
193 'Indicates how suitable the data is within the pools that are/have been enabled for compression - averaged across all pools holding compressed data\n',
194 'current',
195 '(sum(ceph_pool_compress_under_bytes > 0) / sum(ceph_pool_stored_raw and ceph_pool_compress_under_bytes > 0)) * 100',
196 'table',
197 18,
198 0,
199 3,
200 3
201 ),
202 PoolOverviewSingleStatPanel(
203 'none',
204 'Compression Factor',
205 'This factor describes the average ratio of data eligible to be compressed divided by the data actually stored. It does not account for data written that was ineligible for compression (too small, or compression yield too low)',
206 'current',
207 'sum(ceph_pool_compress_under_bytes > 0) / sum(ceph_pool_compress_bytes_used > 0)',
208 '',
209 21,
210 0,
211 3,
212 3
213 ),
214 u.addTableSchema(
215 '$datasource',
216 '',
217 { col: 5, desc: true },
218 [
219 PoolOverviewStyle('', 'Time', 'hidden', 'short', null, [], []),
220 PoolOverviewStyle('', 'instance', 'hidden', 'short', null, [], []),
221 PoolOverviewStyle('', 'job', 'hidden', 'short', null, [], []),
222 PoolOverviewStyle('Pool Name', 'name', 'string', 'short', null, [], []),
223 PoolOverviewStyle('Pool ID', 'pool_id', 'hidden', 'none', null, [], []),
224 PoolOverviewStyle('Compression Factor', 'Value #A', 'number', 'none', null, [], []),
225 PoolOverviewStyle('% Used', 'Value #D', 'number', 'percentunit', 'value', ['70', '85'], []),
226 PoolOverviewStyle('Usable Free', 'Value #B', 'number', 'bytes', null, [], []),
227 PoolOverviewStyle('Compression Eligibility', 'Value #C', 'number', 'percent', null, [], []),
228 PoolOverviewStyle('Compression Savings', 'Value #E', 'number', 'bytes', null, [], []),
229 PoolOverviewStyle('Growth (5d)', 'Value #F', 'number', 'bytes', 'value', ['0', '0'], []),
230 PoolOverviewStyle('IOPS', 'Value #G', 'number', 'none', null, [], []),
231 PoolOverviewStyle('Bandwidth', 'Value #H', 'number', 'Bps', null, [], []),
232 PoolOverviewStyle('', '__name__', 'hidden', 'short', null, [], []),
233 PoolOverviewStyle('', 'type', 'hidden', 'short', null, [], []),
234 PoolOverviewStyle('', 'compression_mode', 'hidden', 'short', null, [], []),
235 PoolOverviewStyle('Type', 'description', 'string', 'short', null, [], []),
236 PoolOverviewStyle('Stored', 'Value #J', 'number', 'bytes', null, [], []),
237 PoolOverviewStyle('', 'Value #I', 'hidden', 'short', null, [], []),
238 PoolOverviewStyle('Compression', 'Value #K', 'string', 'short', null, [], [{ text: 'ON', value: '1' }]),
239 ],
240 'Pool Overview',
241 'table'
242 )
243 .addTargets(
244 [
245 u.addTargetSchema(
246 '(ceph_pool_compress_under_bytes / ceph_pool_compress_bytes_used > 0) and on(pool_id) (((ceph_pool_compress_under_bytes > 0) / ceph_pool_stored_raw) * 100 > 0.5)',
247 1,
248 'table',
249 'A'
250 ),
251 u.addTargetSchema(
252 'ceph_pool_max_avail * on(pool_id) group_left(name) ceph_pool_metadata',
253 1,
254 'table',
255 'B'
256 ),
257 u.addTargetSchema(
258 '((ceph_pool_compress_under_bytes > 0) / ceph_pool_stored_raw) * 100',
259 1,
260 'table',
261 'C'
262 ),
263 u.addTargetSchema(
264 '(ceph_pool_percent_used * on(pool_id) group_left(name) ceph_pool_metadata)',
265 1,
266 'table',
267 'D'
268 ),
269 u.addTargetSchema(
270 '(ceph_pool_compress_under_bytes - ceph_pool_compress_bytes_used > 0)',
271 1,
272 'table',
273 'E'
274 ),
275 u.addTargetSchema(
276 'delta(ceph_pool_stored[5d])', 1, 'table', 'F'
277 ),
278 u.addTargetSchema(
279 'rate(ceph_pool_rd[30s]) + rate(ceph_pool_wr[30s])',
280 1,
281 'table',
282 'G'
283 ),
284 u.addTargetSchema(
285 'rate(ceph_pool_rd_bytes[30s]) + rate(ceph_pool_wr_bytes[30s])',
286 1,
287 'table',
288 'H'
289 ),
290 u.addTargetSchema(
291 'ceph_pool_metadata', 1, 'table', 'I'
292 ),
293 u.addTargetSchema(
294 'ceph_pool_stored * on(pool_id) group_left ceph_pool_metadata',
295 1,
296 'table',
297 'J'
298 ),
299 u.addTargetSchema(
300 'ceph_pool_metadata{compression_mode!="none"}', 1, 'table', 'K'
301 ),
302 u.addTargetSchema('', '', '', 'L'),
303 ]
304 ) + { gridPos: { x: 0, y: 3, w: 24, h: 6 } },
305 PoolOverviewGraphPanel(
306 'Top $topk Client IOPS by Pool',
307 'This chart shows the sum of read and write IOPS from all clients by pool',
308 'short',
309 'IOPS',
310 'topk($topk,round((rate(ceph_pool_rd[30s]) + rate(ceph_pool_wr[30s])),1) * on(pool_id) group_left(instance,name) ceph_pool_metadata) ',
311 'time_series',
312 '{{name}} ',
313 0,
314 9,
315 12,
316 8
317 )
318 .addTarget(
319 u.addTargetSchema(
320 'topk($topk,rate(ceph_pool_wr[30s]) + on(pool_id) group_left(instance,name) ceph_pool_metadata) ',
321 1,
322 'time_series',
323 '{{name}} - write'
324 )
325 ),
326 PoolOverviewGraphPanel(
327 'Top $topk Client Bandwidth by Pool',
328 'The chart shows the sum of read and write bytes from all clients, by pool',
329 'Bps',
330 'Throughput',
331 'topk($topk,(rate(ceph_pool_rd_bytes[30s]) + rate(ceph_pool_wr_bytes[30s])) * on(pool_id) group_left(instance,name) ceph_pool_metadata)',
332 'time_series',
333 '{{name}}',
334 12,
335 9,
336 12,
337 8
338 ),
339 PoolOverviewGraphPanel(
340 'Pool Capacity Usage (RAW)',
341 'Historical view of capacity usage, to help identify growth and trends in pool consumption',
342 'bytes',
343 'Capacity Used',
344 'ceph_pool_bytes_used * on(pool_id) group_right ceph_pool_metadata',
345 '',
346 '{{name}}',
347 0,
348 17,
349 24,
350 7
351 ),
352 ]),
353 'pool-detail.json':
354 local PoolDetailSingleStatPanel(format,
355 title,
356 description,
357 valueName,
358 colorValue,
359 gaugeMaxValue,
360 gaugeShow,
361 sparkLineShow,
362 thresholds,
363 expr,
364 targetFormat,
365 x,
366 y,
367 w,
368 h) =
369 u.addSingleStatSchema(['#299c46', 'rgba(237, 129, 40, 0.89)', '#d44a3a'],
370 '$datasource',
371 format,
372 title,
373 description,
374 valueName,
375 colorValue,
376 gaugeMaxValue,
377 gaugeShow,
378 sparkLineShow,
379 thresholds)
380 .addTarget(u.addTargetSchema(expr, 1, targetFormat, '')) + { gridPos: { x: x, y: y, w: w, h: h } };
381
382 local PoolDetailGraphPanel(alias,
383 title,
384 description,
385 formatY1,
386 labelY1,
387 expr,
388 targetFormat,
389 legendFormat,
390 x,
391 y,
392 w,
393 h) =
394 u.graphPanelSchema(alias,
395 title,
396 description,
397 'null as zero',
398 false,
399 formatY1,
400 'short',
401 labelY1,
402 null,
403 null,
404 1,
405 '$datasource')
406 .addTargets(
407 [u.addTargetSchema(expr, 1, 'time_series', legendFormat)]
408 ) + { gridPos: { x: x, y: y, w: w, h: h } };
409
410 u.dashboardSchema(
411 'Ceph Pool Details',
412 '',
413 '-xyV8KCiz',
414 'now-1h',
415 '15s',
416 22,
417 [],
418 '',
419 {
420 refresh_intervals: ['5s', '10s', '15s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'],
421 time_options: ['5m', '15m', '1h', '6h', '12h', '24h', '2d', '7d', '30d'],
422 }
423 )
424 .addRequired(
425 type='grafana', id='grafana', name='Grafana', version='5.3.2'
426 )
427 .addRequired(
428 type='panel', id='graph', name='Graph', version='5.0.0'
429 )
430 .addRequired(
431 type='panel', id='singlestat', name='Singlestat', version='5.0.0'
432 )
433 .addAnnotation(
434 u.addAnnotationSchema(
435 1,
436 '-- Grafana --',
437 true,
438 true,
439 'rgba(0, 211, 255, 1)',
440 'Annotations & Alerts',
441 'dashboard'
442 )
443 )
444 .addTemplate(
445 g.template.datasource('datasource',
446 'prometheus',
447 'Prometheus admin.virt1.home.fajerski.name:9090',
448 label='Data Source')
449 )
450 .addTemplate(
451 u.addTemplateSchema('pool_name',
452 '$datasource',
453 'label_values(ceph_pool_metadata,name)',
454 1,
455 false,
456 1,
457 'Pool Name',
458 '')
459 )
460 .addPanels([
461 PoolDetailSingleStatPanel(
462 'percentunit',
463 'Capacity used',
464 '',
465 'current',
466 true,
467 1,
468 true,
469 true,
470 '.7,.8',
471 '(ceph_pool_stored / (ceph_pool_stored + ceph_pool_max_avail)) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}',
472 'time_series',
473 0,
474 0,
475 7,
476 7
477 ),
478 PoolDetailSingleStatPanel(
479 's',
480 'Time till full',
481 'Time till pool is full assuming the average fill rate of the last 6 hours',
482 false,
483 100,
484 false,
485 false,
486 '',
487 'current',
488 '(ceph_pool_max_avail / deriv(ceph_pool_stored[6h])) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"} > 0',
489 'time_series',
490 7,
491 0,
492 5,
493 7
494 ),
495 PoolDetailGraphPanel(
496 {
497 read_op_per_sec:
498 '#3F6833',
499 write_op_per_sec: '#E5AC0E',
500 },
501 '$pool_name Object Ingress/Egress',
502 '',
503 'ops',
504 'Objects out(-) / in(+) ',
505 'deriv(ceph_pool_objects[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}',
506 'time_series',
507 'Objects per second',
508 12,
509 0,
510 12,
511 7
512 ),
513 PoolDetailGraphPanel(
514 {
515 read_op_per_sec: '#3F6833',
516 write_op_per_sec: '#E5AC0E',
517 }, '$pool_name Client IOPS', '', 'iops', 'Read (-) / Write (+)', 'irate(ceph_pool_rd[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', 'time_series', 'reads', 0, 7, 12, 7
518 )
519 .addSeriesOverride({ alias: 'reads', transform: 'negative-Y' })
520 .addTarget(
521 u.addTargetSchema(
522 'irate(ceph_pool_wr[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', 1, 'time_series', 'writes'
523 )
524 ),
525 PoolDetailGraphPanel(
526 {
527 read_op_per_sec: '#3F6833',
528 write_op_per_sec: '#E5AC0E',
529 },
530 '$pool_name Client Throughput',
531 '',
532 'Bps',
533 'Read (-) / Write (+)',
534 'irate(ceph_pool_rd_bytes[1m]) + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}',
535 'time_series',
536 'reads',
537 12,
538 7,
539 12,
540 7
541 )
542 .addSeriesOverride({ alias: 'reads', transform: 'negative-Y' })
543 .addTarget(
544 u.addTargetSchema(
545 'irate(ceph_pool_wr_bytes[1m]) + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}',
546 1,
547 'time_series',
548 'writes'
549 )
550 ),
551 PoolDetailGraphPanel(
552 {
553 read_op_per_sec: '#3F6833',
554 write_op_per_sec: '#E5AC0E',
555 },
556 '$pool_name Objects',
557 '',
558 'short',
559 'Objects',
560 'ceph_pool_objects * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}',
561 'time_series',
562 'Number of Objects',
563 0,
564 14,
565 12,
566 7
567 ),
568 ]),
569 },
570 }