]> git.proxmox.com Git - ceph.git/blame - ceph/monitoring/ceph-mixin/dashboards/pool.libsonnet
import quincy 17.2.0
[ceph.git] / ceph / monitoring / ceph-mixin / dashboards / pool.libsonnet
CommitLineData
20effc67
TL
1local g = import 'grafonnet/grafana.libsonnet';
2local u = import 'utils.libsonnet';
3
4{
5 grafanaDashboards+:: {
6 'pool-overview.json':
7 local PoolOverviewSingleStatPanel(format,
8 title,
9 description,
10 valueName,
11 expr,
1d09f67e 12 instant,
20effc67
TL
13 targetFormat,
14 x,
15 y,
16 w,
17 h) =
18 u.addSingleStatSchema(['#299c46', 'rgba(237, 129, 40, 0.89)', '#d44a3a'],
19 '$datasource',
20 format,
21 title,
22 description,
23 valueName,
24 false,
25 100,
26 false,
27 false,
28 '')
1d09f67e 29 .addTarget(u.addTargetSchema(expr, '', targetFormat, 1, instant)) + { gridPos: { x: x, y: y, w: w, h: h } };
20effc67
TL
30
31 local PoolOverviewStyle(alias,
32 pattern,
33 type,
34 unit,
35 colorMode,
36 thresholds,
37 valueMaps) =
38 u.addStyle(alias,
39 colorMode,
40 [
41 'rgba(245, 54, 54, 0.9)',
42 'rgba(237, 129, 40, 0.89)',
43 'rgba(50, 172, 45, 0.97)',
44 ],
45 'YYYY-MM-DD HH:mm:ss',
46 2,
47 1,
48 pattern,
49 thresholds,
50 type,
51 unit,
52 valueMaps);
53
54 local PoolOverviewGraphPanel(title,
55 description,
56 formatY1,
57 labelY1,
58 expr,
20effc67
TL
59 legendFormat,
60 x,
61 y,
62 w,
63 h) =
64 u.graphPanelSchema({},
65 title,
66 description,
67 'null as zero',
68 false,
69 formatY1,
70 'short',
71 labelY1,
72 null,
73 0,
74 1,
75 '$datasource')
76 .addTargets(
77 [u.addTargetSchema(expr,
20effc67
TL
78 legendFormat)]
79 ) + { gridPos: { x: x, y: y, w: w, h: h } };
80
81 u.dashboardSchema(
82 'Ceph Pools Overview',
83 '',
84 'z99hzWtmk',
85 'now-1h',
86 '15s',
87 22,
88 [],
89 '',
90 { refresh_intervals: ['5s', '10s', '15s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'], time_options: ['5m', '15m', '1h', '6h', '12h', '24h', '2d', '7d', '30d'] }
91 )
92 .addAnnotation(
93 u.addAnnotationSchema(
94 1,
95 '-- Grafana --',
96 true,
97 true,
98 'rgba(0, 211, 255, 1)',
99 'Annotations & Alerts',
100 'dashboard'
101 )
102 )
103 .addTemplate(
104 g.template.datasource('datasource',
105 'prometheus',
106 'Dashboard1',
107 label='Data Source')
108 )
109 .addTemplate(
110 g.template.custom(label='TopK',
111 name='topk',
112 current='15',
113 query='15')
114 )
115 .addPanels([
116 PoolOverviewSingleStatPanel(
117 'none',
118 'Pools',
119 '',
120 'avg',
121 'count(ceph_pool_metadata)',
1d09f67e 122 true,
20effc67
TL
123 'table',
124 0,
125 0,
126 3,
127 3
128 ),
129 PoolOverviewSingleStatPanel(
130 'none',
131 'Pools with Compression',
132 'Count of the pools that have compression enabled',
133 'current',
134 'count(ceph_pool_metadata{compression_mode!="none"})',
1d09f67e 135 null,
20effc67
TL
136 '',
137 3,
138 0,
139 3,
140 3
141 ),
142 PoolOverviewSingleStatPanel(
143 'bytes',
144 'Total Raw Capacity',
145 'Total raw capacity available to the cluster',
146 'current',
147 'sum(ceph_osd_stat_bytes)',
1d09f67e 148 null,
20effc67
TL
149 '',
150 6,
151 0,
152 3,
153 3
154 ),
155 PoolOverviewSingleStatPanel(
156 'bytes',
157 'Raw Capacity Consumed',
158 'Total raw capacity consumed by user data and associated overheads (metadata + redundancy)',
159 'current',
160 'sum(ceph_pool_bytes_used)',
1d09f67e 161 true,
20effc67
TL
162 '',
163 9,
164 0,
165 3,
166 3
167 ),
168 PoolOverviewSingleStatPanel(
169 'bytes',
170 'Logical Stored ',
171 'Total of client data stored in the cluster',
172 'current',
173 'sum(ceph_pool_stored)',
1d09f67e 174 true,
20effc67
TL
175 '',
176 12,
177 0,
178 3,
179 3
180 ),
181 PoolOverviewSingleStatPanel(
182 'bytes',
183 'Compression Savings',
184 'A compression saving is determined as the data eligible to be compressed minus the capacity used to store the data after compression',
185 'current',
186 'sum(ceph_pool_compress_under_bytes - ceph_pool_compress_bytes_used)',
1d09f67e 187 null,
20effc67
TL
188 '',
189 15,
190 0,
191 3,
192 3
193 ),
194 PoolOverviewSingleStatPanel(
195 'percent',
196 'Compression Eligibility',
197 'Indicates how suitable the data is within the pools that are/have been enabled for compression - averaged across all pools holding compressed data\n',
198 'current',
199 '(sum(ceph_pool_compress_under_bytes > 0) / sum(ceph_pool_stored_raw and ceph_pool_compress_under_bytes > 0)) * 100',
1d09f67e 200 null,
20effc67
TL
201 'table',
202 18,
203 0,
204 3,
205 3
206 ),
207 PoolOverviewSingleStatPanel(
208 'none',
209 'Compression Factor',
210 'This factor describes the average ratio of data eligible to be compressed divided by the data actually stored. It does not account for data written that was ineligible for compression (too small, or compression yield too low)',
211 'current',
212 'sum(ceph_pool_compress_under_bytes > 0) / sum(ceph_pool_compress_bytes_used > 0)',
1d09f67e 213 null,
20effc67
TL
214 '',
215 21,
216 0,
217 3,
218 3
219 ),
220 u.addTableSchema(
221 '$datasource',
222 '',
223 { col: 5, desc: true },
224 [
225 PoolOverviewStyle('', 'Time', 'hidden', 'short', null, [], []),
226 PoolOverviewStyle('', 'instance', 'hidden', 'short', null, [], []),
227 PoolOverviewStyle('', 'job', 'hidden', 'short', null, [], []),
228 PoolOverviewStyle('Pool Name', 'name', 'string', 'short', null, [], []),
229 PoolOverviewStyle('Pool ID', 'pool_id', 'hidden', 'none', null, [], []),
230 PoolOverviewStyle('Compression Factor', 'Value #A', 'number', 'none', null, [], []),
231 PoolOverviewStyle('% Used', 'Value #D', 'number', 'percentunit', 'value', ['70', '85'], []),
232 PoolOverviewStyle('Usable Free', 'Value #B', 'number', 'bytes', null, [], []),
233 PoolOverviewStyle('Compression Eligibility', 'Value #C', 'number', 'percent', null, [], []),
234 PoolOverviewStyle('Compression Savings', 'Value #E', 'number', 'bytes', null, [], []),
235 PoolOverviewStyle('Growth (5d)', 'Value #F', 'number', 'bytes', 'value', ['0', '0'], []),
236 PoolOverviewStyle('IOPS', 'Value #G', 'number', 'none', null, [], []),
237 PoolOverviewStyle('Bandwidth', 'Value #H', 'number', 'Bps', null, [], []),
238 PoolOverviewStyle('', '__name__', 'hidden', 'short', null, [], []),
239 PoolOverviewStyle('', 'type', 'hidden', 'short', null, [], []),
240 PoolOverviewStyle('', 'compression_mode', 'hidden', 'short', null, [], []),
241 PoolOverviewStyle('Type', 'description', 'string', 'short', null, [], []),
242 PoolOverviewStyle('Stored', 'Value #J', 'number', 'bytes', null, [], []),
243 PoolOverviewStyle('', 'Value #I', 'hidden', 'short', null, [], []),
244 PoolOverviewStyle('Compression', 'Value #K', 'string', 'short', null, [], [{ text: 'ON', value: '1' }]),
245 ],
246 'Pool Overview',
247 'table'
248 )
249 .addTargets(
250 [
251 u.addTargetSchema(
252 '(ceph_pool_compress_under_bytes / ceph_pool_compress_bytes_used > 0) and on(pool_id) (((ceph_pool_compress_under_bytes > 0) / ceph_pool_stored_raw) * 100 > 0.5)',
1d09f67e 253 'A',
20effc67 254 'table',
1d09f67e
TL
255 1,
256 true
20effc67
TL
257 ),
258 u.addTargetSchema(
259 'ceph_pool_max_avail * on(pool_id) group_left(name) ceph_pool_metadata',
1d09f67e 260 'B',
20effc67 261 'table',
1d09f67e
TL
262 1,
263 true
20effc67
TL
264 ),
265 u.addTargetSchema(
266 '((ceph_pool_compress_under_bytes > 0) / ceph_pool_stored_raw) * 100',
1d09f67e 267 'C',
20effc67 268 'table',
1d09f67e
TL
269 1,
270 true
20effc67
TL
271 ),
272 u.addTargetSchema(
273 '(ceph_pool_percent_used * on(pool_id) group_left(name) ceph_pool_metadata)',
1d09f67e 274 'D',
20effc67 275 'table',
1d09f67e
TL
276 1,
277 true
20effc67
TL
278 ),
279 u.addTargetSchema(
280 '(ceph_pool_compress_under_bytes - ceph_pool_compress_bytes_used > 0)',
1d09f67e 281 'E',
20effc67 282 'table',
1d09f67e
TL
283 1,
284 true
20effc67
TL
285 ),
286 u.addTargetSchema(
1d09f67e 287 'delta(ceph_pool_stored[5d])', 'F', 'table', 1, true
20effc67
TL
288 ),
289 u.addTargetSchema(
290 'rate(ceph_pool_rd[30s]) + rate(ceph_pool_wr[30s])',
1d09f67e 291 'G',
20effc67 292 'table',
1d09f67e
TL
293 1,
294 true
20effc67
TL
295 ),
296 u.addTargetSchema(
297 'rate(ceph_pool_rd_bytes[30s]) + rate(ceph_pool_wr_bytes[30s])',
1d09f67e 298 'H',
20effc67 299 'table',
1d09f67e
TL
300 1,
301 true
20effc67
TL
302 ),
303 u.addTargetSchema(
1d09f67e 304 'ceph_pool_metadata', 'I', 'table', 1, true
20effc67
TL
305 ),
306 u.addTargetSchema(
307 'ceph_pool_stored * on(pool_id) group_left ceph_pool_metadata',
1d09f67e 308 'J',
20effc67 309 'table',
1d09f67e
TL
310 1,
311 true
20effc67
TL
312 ),
313 u.addTargetSchema(
1d09f67e 314 'ceph_pool_metadata{compression_mode!="none"}', 'K', 'table', 1, true
20effc67 315 ),
1d09f67e 316 u.addTargetSchema('', 'L', '', '', null),
20effc67
TL
317 ]
318 ) + { gridPos: { x: 0, y: 3, w: 24, h: 6 } },
319 PoolOverviewGraphPanel(
320 'Top $topk Client IOPS by Pool',
321 'This chart shows the sum of read and write IOPS from all clients by pool',
322 'short',
323 'IOPS',
324 'topk($topk,round((rate(ceph_pool_rd[30s]) + rate(ceph_pool_wr[30s])),1) * on(pool_id) group_left(instance,name) ceph_pool_metadata) ',
20effc67
TL
325 '{{name}} ',
326 0,
327 9,
328 12,
329 8
330 )
331 .addTarget(
332 u.addTargetSchema(
333 'topk($topk,rate(ceph_pool_wr[30s]) + on(pool_id) group_left(instance,name) ceph_pool_metadata) ',
20effc67
TL
334 '{{name}} - write'
335 )
336 ),
337 PoolOverviewGraphPanel(
338 'Top $topk Client Bandwidth by Pool',
339 'The chart shows the sum of read and write bytes from all clients, by pool',
340 'Bps',
341 'Throughput',
342 'topk($topk,(rate(ceph_pool_rd_bytes[30s]) + rate(ceph_pool_wr_bytes[30s])) * on(pool_id) group_left(instance,name) ceph_pool_metadata)',
20effc67
TL
343 '{{name}}',
344 12,
345 9,
346 12,
347 8
348 ),
349 PoolOverviewGraphPanel(
350 'Pool Capacity Usage (RAW)',
351 'Historical view of capacity usage, to help identify growth and trends in pool consumption',
352 'bytes',
353 'Capacity Used',
354 'ceph_pool_bytes_used * on(pool_id) group_right ceph_pool_metadata',
20effc67
TL
355 '{{name}}',
356 0,
357 17,
358 24,
359 7
360 ),
361 ]),
362 'pool-detail.json':
363 local PoolDetailSingleStatPanel(format,
364 title,
365 description,
366 valueName,
367 colorValue,
368 gaugeMaxValue,
369 gaugeShow,
370 sparkLineShow,
371 thresholds,
372 expr,
373 targetFormat,
374 x,
375 y,
376 w,
377 h) =
378 u.addSingleStatSchema(['#299c46', 'rgba(237, 129, 40, 0.89)', '#d44a3a'],
379 '$datasource',
380 format,
381 title,
382 description,
383 valueName,
384 colorValue,
385 gaugeMaxValue,
386 gaugeShow,
387 sparkLineShow,
388 thresholds)
1d09f67e 389 .addTarget(u.addTargetSchema(expr, '', targetFormat)) + { gridPos: { x: x, y: y, w: w, h: h } };
20effc67
TL
390
391 local PoolDetailGraphPanel(alias,
392 title,
393 description,
394 formatY1,
395 labelY1,
396 expr,
20effc67
TL
397 legendFormat,
398 x,
399 y,
400 w,
401 h) =
402 u.graphPanelSchema(alias,
403 title,
404 description,
405 'null as zero',
406 false,
407 formatY1,
408 'short',
409 labelY1,
410 null,
411 null,
412 1,
413 '$datasource')
414 .addTargets(
1d09f67e 415 [u.addTargetSchema(expr, legendFormat)]
20effc67
TL
416 ) + { gridPos: { x: x, y: y, w: w, h: h } };
417
418 u.dashboardSchema(
419 'Ceph Pool Details',
420 '',
421 '-xyV8KCiz',
422 'now-1h',
423 '15s',
424 22,
425 [],
426 '',
427 {
428 refresh_intervals: ['5s', '10s', '15s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'],
429 time_options: ['5m', '15m', '1h', '6h', '12h', '24h', '2d', '7d', '30d'],
430 }
431 )
432 .addRequired(
433 type='grafana', id='grafana', name='Grafana', version='5.3.2'
434 )
435 .addRequired(
436 type='panel', id='graph', name='Graph', version='5.0.0'
437 )
438 .addRequired(
439 type='panel', id='singlestat', name='Singlestat', version='5.0.0'
440 )
441 .addAnnotation(
442 u.addAnnotationSchema(
443 1,
444 '-- Grafana --',
445 true,
446 true,
447 'rgba(0, 211, 255, 1)',
448 'Annotations & Alerts',
449 'dashboard'
450 )
451 )
452 .addTemplate(
453 g.template.datasource('datasource',
454 'prometheus',
455 'Prometheus admin.virt1.home.fajerski.name:9090',
456 label='Data Source')
457 )
458 .addTemplate(
459 u.addTemplateSchema('pool_name',
460 '$datasource',
461 'label_values(ceph_pool_metadata,name)',
462 1,
463 false,
464 1,
465 'Pool Name',
466 '')
467 )
468 .addPanels([
469 PoolDetailSingleStatPanel(
470 'percentunit',
471 'Capacity used',
472 '',
473 'current',
474 true,
475 1,
476 true,
477 true,
478 '.7,.8',
479 '(ceph_pool_stored / (ceph_pool_stored + ceph_pool_max_avail)) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}',
480 'time_series',
481 0,
482 0,
483 7,
484 7
485 ),
486 PoolDetailSingleStatPanel(
487 's',
488 'Time till full',
489 'Time till pool is full assuming the average fill rate of the last 6 hours',
490 false,
491 100,
492 false,
493 false,
494 '',
495 'current',
496 '(ceph_pool_max_avail / deriv(ceph_pool_stored[6h])) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"} > 0',
497 'time_series',
498 7,
499 0,
500 5,
501 7
502 ),
503 PoolDetailGraphPanel(
504 {
505 read_op_per_sec:
506 '#3F6833',
507 write_op_per_sec: '#E5AC0E',
508 },
509 '$pool_name Object Ingress/Egress',
510 '',
511 'ops',
512 'Objects out(-) / in(+) ',
513 'deriv(ceph_pool_objects[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}',
20effc67
TL
514 'Objects per second',
515 12,
516 0,
517 12,
518 7
519 ),
520 PoolDetailGraphPanel(
521 {
522 read_op_per_sec: '#3F6833',
523 write_op_per_sec: '#E5AC0E',
1d09f67e 524 }, '$pool_name Client IOPS', '', 'iops', 'Read (-) / Write (+)', 'irate(ceph_pool_rd[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', 'reads', 0, 7, 12, 7
20effc67
TL
525 )
526 .addSeriesOverride({ alias: 'reads', transform: 'negative-Y' })
527 .addTarget(
528 u.addTargetSchema(
1d09f67e 529 'irate(ceph_pool_wr[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', 'writes'
20effc67
TL
530 )
531 ),
532 PoolDetailGraphPanel(
533 {
534 read_op_per_sec: '#3F6833',
535 write_op_per_sec: '#E5AC0E',
536 },
537 '$pool_name Client Throughput',
538 '',
539 'Bps',
540 'Read (-) / Write (+)',
541 'irate(ceph_pool_rd_bytes[1m]) + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}',
20effc67
TL
542 'reads',
543 12,
544 7,
545 12,
546 7
547 )
548 .addSeriesOverride({ alias: 'reads', transform: 'negative-Y' })
549 .addTarget(
550 u.addTargetSchema(
551 'irate(ceph_pool_wr_bytes[1m]) + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}',
20effc67
TL
552 'writes'
553 )
554 ),
555 PoolDetailGraphPanel(
556 {
557 read_op_per_sec: '#3F6833',
558 write_op_per_sec: '#E5AC0E',
559 },
560 '$pool_name Objects',
561 '',
562 'short',
563 'Objects',
564 'ceph_pool_objects * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}',
20effc67
TL
565 'Number of Objects',
566 0,
567 14,
568 12,
569 7
570 ),
571 ]),
572 },
573}