]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - tools/kvm/kvm_stat/kvm_stat
tools/kvm_stat: add new command line switch '-i'
[mirror_ubuntu-bionic-kernel.git] / tools / kvm / kvm_stat / kvm_stat
1 #!/usr/bin/python
2 #
3 # top-like utility for displaying kvm statistics
4 #
5 # Copyright 2006-2008 Qumranet Technologies
6 # Copyright 2008-2011 Red Hat, Inc.
7 #
8 # Authors:
9 # Avi Kivity <avi@redhat.com>
10 #
11 # This work is licensed under the terms of the GNU GPL, version 2. See
12 # the COPYING file in the top-level directory.
13 """The kvm_stat module outputs statistics about running KVM VMs
14
15 Three different ways of output formatting are available:
16 - as a top-like text ui
17 - in a key -> value format
18 - in an all keys, all values format
19
20 The data is sampled from the KVM's debugfs entries and its perf events.
21 """
22
23 import curses
24 import sys
25 import os
26 import time
27 import optparse
28 import ctypes
29 import fcntl
30 import resource
31 import struct
32 import re
33 import subprocess
34 from collections import defaultdict
35
36 VMX_EXIT_REASONS = {
37 'EXCEPTION_NMI': 0,
38 'EXTERNAL_INTERRUPT': 1,
39 'TRIPLE_FAULT': 2,
40 'PENDING_INTERRUPT': 7,
41 'NMI_WINDOW': 8,
42 'TASK_SWITCH': 9,
43 'CPUID': 10,
44 'HLT': 12,
45 'INVLPG': 14,
46 'RDPMC': 15,
47 'RDTSC': 16,
48 'VMCALL': 18,
49 'VMCLEAR': 19,
50 'VMLAUNCH': 20,
51 'VMPTRLD': 21,
52 'VMPTRST': 22,
53 'VMREAD': 23,
54 'VMRESUME': 24,
55 'VMWRITE': 25,
56 'VMOFF': 26,
57 'VMON': 27,
58 'CR_ACCESS': 28,
59 'DR_ACCESS': 29,
60 'IO_INSTRUCTION': 30,
61 'MSR_READ': 31,
62 'MSR_WRITE': 32,
63 'INVALID_STATE': 33,
64 'MWAIT_INSTRUCTION': 36,
65 'MONITOR_INSTRUCTION': 39,
66 'PAUSE_INSTRUCTION': 40,
67 'MCE_DURING_VMENTRY': 41,
68 'TPR_BELOW_THRESHOLD': 43,
69 'APIC_ACCESS': 44,
70 'EPT_VIOLATION': 48,
71 'EPT_MISCONFIG': 49,
72 'WBINVD': 54,
73 'XSETBV': 55,
74 'APIC_WRITE': 56,
75 'INVPCID': 58,
76 }
77
78 SVM_EXIT_REASONS = {
79 'READ_CR0': 0x000,
80 'READ_CR3': 0x003,
81 'READ_CR4': 0x004,
82 'READ_CR8': 0x008,
83 'WRITE_CR0': 0x010,
84 'WRITE_CR3': 0x013,
85 'WRITE_CR4': 0x014,
86 'WRITE_CR8': 0x018,
87 'READ_DR0': 0x020,
88 'READ_DR1': 0x021,
89 'READ_DR2': 0x022,
90 'READ_DR3': 0x023,
91 'READ_DR4': 0x024,
92 'READ_DR5': 0x025,
93 'READ_DR6': 0x026,
94 'READ_DR7': 0x027,
95 'WRITE_DR0': 0x030,
96 'WRITE_DR1': 0x031,
97 'WRITE_DR2': 0x032,
98 'WRITE_DR3': 0x033,
99 'WRITE_DR4': 0x034,
100 'WRITE_DR5': 0x035,
101 'WRITE_DR6': 0x036,
102 'WRITE_DR7': 0x037,
103 'EXCP_BASE': 0x040,
104 'INTR': 0x060,
105 'NMI': 0x061,
106 'SMI': 0x062,
107 'INIT': 0x063,
108 'VINTR': 0x064,
109 'CR0_SEL_WRITE': 0x065,
110 'IDTR_READ': 0x066,
111 'GDTR_READ': 0x067,
112 'LDTR_READ': 0x068,
113 'TR_READ': 0x069,
114 'IDTR_WRITE': 0x06a,
115 'GDTR_WRITE': 0x06b,
116 'LDTR_WRITE': 0x06c,
117 'TR_WRITE': 0x06d,
118 'RDTSC': 0x06e,
119 'RDPMC': 0x06f,
120 'PUSHF': 0x070,
121 'POPF': 0x071,
122 'CPUID': 0x072,
123 'RSM': 0x073,
124 'IRET': 0x074,
125 'SWINT': 0x075,
126 'INVD': 0x076,
127 'PAUSE': 0x077,
128 'HLT': 0x078,
129 'INVLPG': 0x079,
130 'INVLPGA': 0x07a,
131 'IOIO': 0x07b,
132 'MSR': 0x07c,
133 'TASK_SWITCH': 0x07d,
134 'FERR_FREEZE': 0x07e,
135 'SHUTDOWN': 0x07f,
136 'VMRUN': 0x080,
137 'VMMCALL': 0x081,
138 'VMLOAD': 0x082,
139 'VMSAVE': 0x083,
140 'STGI': 0x084,
141 'CLGI': 0x085,
142 'SKINIT': 0x086,
143 'RDTSCP': 0x087,
144 'ICEBP': 0x088,
145 'WBINVD': 0x089,
146 'MONITOR': 0x08a,
147 'MWAIT': 0x08b,
148 'MWAIT_COND': 0x08c,
149 'XSETBV': 0x08d,
150 'NPF': 0x400,
151 }
152
153 # EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
154 AARCH64_EXIT_REASONS = {
155 'UNKNOWN': 0x00,
156 'WFI': 0x01,
157 'CP15_32': 0x03,
158 'CP15_64': 0x04,
159 'CP14_MR': 0x05,
160 'CP14_LS': 0x06,
161 'FP_ASIMD': 0x07,
162 'CP10_ID': 0x08,
163 'CP14_64': 0x0C,
164 'ILL_ISS': 0x0E,
165 'SVC32': 0x11,
166 'HVC32': 0x12,
167 'SMC32': 0x13,
168 'SVC64': 0x15,
169 'HVC64': 0x16,
170 'SMC64': 0x17,
171 'SYS64': 0x18,
172 'IABT': 0x20,
173 'IABT_HYP': 0x21,
174 'PC_ALIGN': 0x22,
175 'DABT': 0x24,
176 'DABT_HYP': 0x25,
177 'SP_ALIGN': 0x26,
178 'FP_EXC32': 0x28,
179 'FP_EXC64': 0x2C,
180 'SERROR': 0x2F,
181 'BREAKPT': 0x30,
182 'BREAKPT_HYP': 0x31,
183 'SOFTSTP': 0x32,
184 'SOFTSTP_HYP': 0x33,
185 'WATCHPT': 0x34,
186 'WATCHPT_HYP': 0x35,
187 'BKPT32': 0x38,
188 'VECTOR32': 0x3A,
189 'BRK64': 0x3C,
190 }
191
192 # From include/uapi/linux/kvm.h, KVM_EXIT_xxx
193 USERSPACE_EXIT_REASONS = {
194 'UNKNOWN': 0,
195 'EXCEPTION': 1,
196 'IO': 2,
197 'HYPERCALL': 3,
198 'DEBUG': 4,
199 'HLT': 5,
200 'MMIO': 6,
201 'IRQ_WINDOW_OPEN': 7,
202 'SHUTDOWN': 8,
203 'FAIL_ENTRY': 9,
204 'INTR': 10,
205 'SET_TPR': 11,
206 'TPR_ACCESS': 12,
207 'S390_SIEIC': 13,
208 'S390_RESET': 14,
209 'DCR': 15,
210 'NMI': 16,
211 'INTERNAL_ERROR': 17,
212 'OSI': 18,
213 'PAPR_HCALL': 19,
214 'S390_UCONTROL': 20,
215 'WATCHDOG': 21,
216 'S390_TSCH': 22,
217 'EPR': 23,
218 'SYSTEM_EVENT': 24,
219 }
220
221 IOCTL_NUMBERS = {
222 'SET_FILTER': 0x40082406,
223 'ENABLE': 0x00002400,
224 'DISABLE': 0x00002401,
225 'RESET': 0x00002403,
226 }
227
228
229 class Arch(object):
230 """Encapsulates global architecture specific data.
231
232 Contains the performance event open syscall and ioctl numbers, as
233 well as the VM exit reasons for the architecture it runs on.
234
235 """
236 @staticmethod
237 def get_arch():
238 machine = os.uname()[4]
239
240 if machine.startswith('ppc'):
241 return ArchPPC()
242 elif machine.startswith('aarch64'):
243 return ArchA64()
244 elif machine.startswith('s390'):
245 return ArchS390()
246 else:
247 # X86_64
248 for line in open('/proc/cpuinfo'):
249 if not line.startswith('flags'):
250 continue
251
252 flags = line.split()
253 if 'vmx' in flags:
254 return ArchX86(VMX_EXIT_REASONS)
255 if 'svm' in flags:
256 return ArchX86(SVM_EXIT_REASONS)
257 return
258
259
260 class ArchX86(Arch):
261 def __init__(self, exit_reasons):
262 self.sc_perf_evt_open = 298
263 self.ioctl_numbers = IOCTL_NUMBERS
264 self.exit_reasons = exit_reasons
265
266
267 class ArchPPC(Arch):
268 def __init__(self):
269 self.sc_perf_evt_open = 319
270 self.ioctl_numbers = IOCTL_NUMBERS
271 self.ioctl_numbers['ENABLE'] = 0x20002400
272 self.ioctl_numbers['DISABLE'] = 0x20002401
273 self.ioctl_numbers['RESET'] = 0x20002403
274
275 # PPC comes in 32 and 64 bit and some generated ioctl
276 # numbers depend on the wordsize.
277 char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
278 self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
279 self.exit_reasons = {}
280
281
282 class ArchA64(Arch):
283 def __init__(self):
284 self.sc_perf_evt_open = 241
285 self.ioctl_numbers = IOCTL_NUMBERS
286 self.exit_reasons = AARCH64_EXIT_REASONS
287
288
289 class ArchS390(Arch):
290 def __init__(self):
291 self.sc_perf_evt_open = 331
292 self.ioctl_numbers = IOCTL_NUMBERS
293 self.exit_reasons = None
294
295 ARCH = Arch.get_arch()
296
297
298 class perf_event_attr(ctypes.Structure):
299 """Struct that holds the necessary data to set up a trace event.
300
301 For an extensive explanation see perf_event_open(2) and
302 include/uapi/linux/perf_event.h, struct perf_event_attr
303
304 All fields that are not initialized in the constructor are 0.
305
306 """
307 _fields_ = [('type', ctypes.c_uint32),
308 ('size', ctypes.c_uint32),
309 ('config', ctypes.c_uint64),
310 ('sample_freq', ctypes.c_uint64),
311 ('sample_type', ctypes.c_uint64),
312 ('read_format', ctypes.c_uint64),
313 ('flags', ctypes.c_uint64),
314 ('wakeup_events', ctypes.c_uint32),
315 ('bp_type', ctypes.c_uint32),
316 ('bp_addr', ctypes.c_uint64),
317 ('bp_len', ctypes.c_uint64),
318 ]
319
320 def __init__(self):
321 super(self.__class__, self).__init__()
322 self.type = PERF_TYPE_TRACEPOINT
323 self.size = ctypes.sizeof(self)
324 self.read_format = PERF_FORMAT_GROUP
325
326
327 PERF_TYPE_TRACEPOINT = 2
328 PERF_FORMAT_GROUP = 1 << 3
329
330 PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
331 PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
332
333
334 class Group(object):
335 """Represents a perf event group."""
336
337 def __init__(self):
338 self.events = []
339
340 def add_event(self, event):
341 self.events.append(event)
342
343 def read(self):
344 """Returns a dict with 'event name: value' for all events in the
345 group.
346
347 Values are read by reading from the file descriptor of the
348 event that is the group leader. See perf_event_open(2) for
349 details.
350
351 Read format for the used event configuration is:
352 struct read_format {
353 u64 nr; /* The number of events */
354 struct {
355 u64 value; /* The value of the event */
356 } values[nr];
357 };
358
359 """
360 length = 8 * (1 + len(self.events))
361 read_format = 'xxxxxxxx' + 'Q' * len(self.events)
362 return dict(zip([event.name for event in self.events],
363 struct.unpack(read_format,
364 os.read(self.events[0].fd, length))))
365
366
367 class Event(object):
368 """Represents a performance event and manages its life cycle."""
369 def __init__(self, name, group, trace_cpu, trace_pid, trace_point,
370 trace_filter, trace_set='kvm'):
371 self.libc = ctypes.CDLL('libc.so.6', use_errno=True)
372 self.syscall = self.libc.syscall
373 self.name = name
374 self.fd = None
375 self.setup_event(group, trace_cpu, trace_pid, trace_point,
376 trace_filter, trace_set)
377
378 def __del__(self):
379 """Closes the event's file descriptor.
380
381 As no python file object was created for the file descriptor,
382 python will not reference count the descriptor and will not
383 close it itself automatically, so we do it.
384
385 """
386 if self.fd:
387 os.close(self.fd)
388
389 def perf_event_open(self, attr, pid, cpu, group_fd, flags):
390 """Wrapper for the sys_perf_evt_open() syscall.
391
392 Used to set up performance events, returns a file descriptor or -1
393 on error.
394
395 Attributes are:
396 - syscall number
397 - struct perf_event_attr *
398 - pid or -1 to monitor all pids
399 - cpu number or -1 to monitor all cpus
400 - The file descriptor of the group leader or -1 to create a group.
401 - flags
402
403 """
404 return self.syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr),
405 ctypes.c_int(pid), ctypes.c_int(cpu),
406 ctypes.c_int(group_fd), ctypes.c_long(flags))
407
408 def setup_event_attribute(self, trace_set, trace_point):
409 """Returns an initialized ctype perf_event_attr struct."""
410
411 id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
412 trace_point, 'id')
413
414 event_attr = perf_event_attr()
415 event_attr.config = int(open(id_path).read())
416 return event_attr
417
418 def setup_event(self, group, trace_cpu, trace_pid, trace_point,
419 trace_filter, trace_set):
420 """Sets up the perf event in Linux.
421
422 Issues the syscall to register the event in the kernel and
423 then sets the optional filter.
424
425 """
426
427 event_attr = self.setup_event_attribute(trace_set, trace_point)
428
429 # First event will be group leader.
430 group_leader = -1
431
432 # All others have to pass the leader's descriptor instead.
433 if group.events:
434 group_leader = group.events[0].fd
435
436 fd = self.perf_event_open(event_attr, trace_pid,
437 trace_cpu, group_leader, 0)
438 if fd == -1:
439 err = ctypes.get_errno()
440 raise OSError(err, os.strerror(err),
441 'while calling sys_perf_event_open().')
442
443 if trace_filter:
444 fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'],
445 trace_filter)
446
447 self.fd = fd
448
449 def enable(self):
450 """Enables the trace event in the kernel.
451
452 Enabling the group leader makes reading counters from it and the
453 events under it possible.
454
455 """
456 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0)
457
458 def disable(self):
459 """Disables the trace event in the kernel.
460
461 Disabling the group leader makes reading all counters under it
462 impossible.
463
464 """
465 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0)
466
467 def reset(self):
468 """Resets the count of the trace event in the kernel."""
469 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
470
471
472 class Provider(object):
473 """Encapsulates functionalities used by all providers."""
474 @staticmethod
475 def is_field_wanted(fields_filter, field):
476 """Indicate whether field is valid according to fields_filter."""
477 if not fields_filter:
478 return True
479 return re.match(fields_filter, field) is not None
480
481 @staticmethod
482 def walkdir(path):
483 """Returns os.walk() data for specified directory.
484
485 As it is only a wrapper it returns the same 3-tuple of (dirpath,
486 dirnames, filenames).
487 """
488 return next(os.walk(path))
489
490
491 class TracepointProvider(Provider):
492 """Data provider for the stats class.
493
494 Manages the events/groups from which it acquires its data.
495
496 """
497 def __init__(self, pid, fields_filter):
498 self.group_leaders = []
499 self.filters = self.get_filters()
500 self.update_fields(fields_filter)
501 self.pid = pid
502
503 @staticmethod
504 def get_filters():
505 """Returns a dict of trace events, their filter ids and
506 the values that can be filtered.
507
508 Trace events can be filtered for special values by setting a
509 filter string via an ioctl. The string normally has the format
510 identifier==value. For each filter a new event will be created, to
511 be able to distinguish the events.
512
513 """
514 filters = {}
515 filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
516 if ARCH.exit_reasons:
517 filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
518 return filters
519
520 def get_available_fields(self):
521 """Returns a list of available event's of format 'event name(filter
522 name)'.
523
524 All available events have directories under
525 /sys/kernel/debug/tracing/events/ which export information
526 about the specific event. Therefore, listing the dirs gives us
527 a list of all available events.
528
529 Some events like the vm exit reasons can be filtered for
530 specific values. To take account for that, the routine below
531 creates special fields with the following format:
532 event name(filter name)
533
534 """
535 path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
536 fields = self.walkdir(path)[1]
537 extra = []
538 for field in fields:
539 if field in self.filters:
540 filter_name_, filter_dicts = self.filters[field]
541 for name in filter_dicts:
542 extra.append(field + '(' + name + ')')
543 fields += extra
544 return fields
545
546 def update_fields(self, fields_filter):
547 """Refresh fields, applying fields_filter"""
548 self._fields = [field for field in self.get_available_fields()
549 if self.is_field_wanted(fields_filter, field)]
550
551 @staticmethod
552 def get_online_cpus():
553 """Returns a list of cpu id integers."""
554 def parse_int_list(list_string):
555 """Returns an int list from a string of comma separated integers and
556 integer ranges."""
557 integers = []
558 members = list_string.split(',')
559
560 for member in members:
561 if '-' not in member:
562 integers.append(int(member))
563 else:
564 int_range = member.split('-')
565 integers.extend(range(int(int_range[0]),
566 int(int_range[1]) + 1))
567
568 return integers
569
570 with open('/sys/devices/system/cpu/online') as cpu_list:
571 cpu_string = cpu_list.readline()
572 return parse_int_list(cpu_string)
573
574 def setup_traces(self):
575 """Creates all event and group objects needed to be able to retrieve
576 data."""
577 fields = self.get_available_fields()
578 if self._pid > 0:
579 # Fetch list of all threads of the monitored pid, as qemu
580 # starts a thread for each vcpu.
581 path = os.path.join('/proc', str(self._pid), 'task')
582 groupids = self.walkdir(path)[1]
583 else:
584 groupids = self.get_online_cpus()
585
586 # The constant is needed as a buffer for python libs, std
587 # streams and other files that the script opens.
588 newlim = len(groupids) * len(fields) + 50
589 try:
590 softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
591
592 if hardlim < newlim:
593 # Now we need CAP_SYS_RESOURCE, to increase the hard limit.
594 resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim))
595 else:
596 # Raising the soft limit is sufficient.
597 resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim))
598
599 except ValueError:
600 sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
601
602 for groupid in groupids:
603 group = Group()
604 for name in fields:
605 tracepoint = name
606 tracefilter = None
607 match = re.match(r'(.*)\((.*)\)', name)
608 if match:
609 tracepoint, sub = match.groups()
610 tracefilter = ('%s==%d\0' %
611 (self.filters[tracepoint][0],
612 self.filters[tracepoint][1][sub]))
613
614 # From perf_event_open(2):
615 # pid > 0 and cpu == -1
616 # This measures the specified process/thread on any CPU.
617 #
618 # pid == -1 and cpu >= 0
619 # This measures all processes/threads on the specified CPU.
620 trace_cpu = groupid if self._pid == 0 else -1
621 trace_pid = int(groupid) if self._pid != 0 else -1
622
623 group.add_event(Event(name=name,
624 group=group,
625 trace_cpu=trace_cpu,
626 trace_pid=trace_pid,
627 trace_point=tracepoint,
628 trace_filter=tracefilter))
629
630 self.group_leaders.append(group)
631
632 @property
633 def fields(self):
634 return self._fields
635
636 @fields.setter
637 def fields(self, fields):
638 """Enables/disables the (un)wanted events"""
639 self._fields = fields
640 for group in self.group_leaders:
641 for index, event in enumerate(group.events):
642 if event.name in fields:
643 event.reset()
644 event.enable()
645 else:
646 # Do not disable the group leader.
647 # It would disable all of its events.
648 if index != 0:
649 event.disable()
650
651 @property
652 def pid(self):
653 return self._pid
654
655 @pid.setter
656 def pid(self, pid):
657 """Changes the monitored pid by setting new traces."""
658 self._pid = pid
659 # The garbage collector will get rid of all Event/Group
660 # objects and open files after removing the references.
661 self.group_leaders = []
662 self.setup_traces()
663 self.fields = self._fields
664
665 def read(self):
666 """Returns 'event name: current value' for all enabled events."""
667 ret = defaultdict(int)
668 for group in self.group_leaders:
669 for name, val in group.read().iteritems():
670 if name in self._fields:
671 ret[name] += val
672 return ret
673
674 def reset(self):
675 """Reset all field counters"""
676 for group in self.group_leaders:
677 for event in group.events:
678 event.reset()
679
680
681 class DebugfsProvider(Provider):
682 """Provides data from the files that KVM creates in the kvm debugfs
683 folder."""
684 def __init__(self, pid, fields_filter, include_past):
685 self.update_fields(fields_filter)
686 self._baseline = {}
687 self.do_read = True
688 self.paths = []
689 self.pid = pid
690 if include_past:
691 self.restore()
692
693 def get_available_fields(self):
694 """"Returns a list of available fields.
695
696 The fields are all available KVM debugfs files
697
698 """
699 return self.walkdir(PATH_DEBUGFS_KVM)[2]
700
701 def update_fields(self, fields_filter):
702 """Refresh fields, applying fields_filter"""
703 self._fields = [field for field in self.get_available_fields()
704 if self.is_field_wanted(fields_filter, field)]
705
706 @property
707 def fields(self):
708 return self._fields
709
710 @fields.setter
711 def fields(self, fields):
712 self._fields = fields
713 self.reset()
714
715 @property
716 def pid(self):
717 return self._pid
718
719 @pid.setter
720 def pid(self, pid):
721 self._pid = pid
722 if pid != 0:
723 vms = self.walkdir(PATH_DEBUGFS_KVM)[1]
724 if len(vms) == 0:
725 self.do_read = False
726
727 self.paths = filter(lambda x: "{}-".format(pid) in x, vms)
728
729 else:
730 self.paths = []
731 self.do_read = True
732 self.reset()
733
734 def read(self, reset=0):
735 """Returns a dict with format:'file name / field -> current value'.
736
737 Parameter 'reset':
738 0 plain read
739 1 reset field counts to 0
740 2 restore the original field counts
741
742 """
743 results = {}
744
745 # If no debugfs filtering support is available, then don't read.
746 if not self.do_read:
747 return results
748
749 paths = self.paths
750 if self._pid == 0:
751 paths = []
752 for entry in os.walk(PATH_DEBUGFS_KVM):
753 for dir in entry[1]:
754 paths.append(dir)
755 for path in paths:
756 for field in self._fields:
757 value = self.read_field(field, path)
758 key = path + field
759 if reset == 1:
760 self._baseline[key] = value
761 if reset == 2:
762 self._baseline[key] = 0
763 if self._baseline.get(key, -1) == -1:
764 self._baseline[key] = value
765 results[field] = (results.get(field, 0) + value -
766 self._baseline.get(key, 0))
767
768 return results
769
770 def read_field(self, field, path):
771 """Returns the value of a single field from a specific VM."""
772 try:
773 return int(open(os.path.join(PATH_DEBUGFS_KVM,
774 path,
775 field))
776 .read())
777 except IOError:
778 return 0
779
780 def reset(self):
781 """Reset field counters"""
782 self._baseline = {}
783 self.read(1)
784
785 def restore(self):
786 """Reset field counters"""
787 self._baseline = {}
788 self.read(2)
789
790
791 class Stats(object):
792 """Manages the data providers and the data they provide.
793
794 It is used to set filters on the provider's data and collect all
795 provider data.
796
797 """
798 def __init__(self, options):
799 self.providers = self.get_providers(options)
800 self._pid_filter = options.pid
801 self._fields_filter = options.fields
802 self.values = {}
803
804 @staticmethod
805 def get_providers(options):
806 """Returns a list of data providers depending on the passed options."""
807 providers = []
808
809 if options.debugfs:
810 providers.append(DebugfsProvider(options.pid, options.fields,
811 options.dbgfs_include_past))
812 if options.tracepoints or not providers:
813 providers.append(TracepointProvider(options.pid, options.fields))
814
815 return providers
816
817 def update_provider_filters(self):
818 """Propagates fields filters to providers."""
819 # As we reset the counters when updating the fields we can
820 # also clear the cache of old values.
821 self.values = {}
822 for provider in self.providers:
823 provider.update_fields(self._fields_filter)
824
825 def reset(self):
826 self.values = {}
827 for provider in self.providers:
828 provider.reset()
829
830 @property
831 def fields_filter(self):
832 return self._fields_filter
833
834 @fields_filter.setter
835 def fields_filter(self, fields_filter):
836 if fields_filter != self._fields_filter:
837 self._fields_filter = fields_filter
838 self.update_provider_filters()
839
840 @property
841 def pid_filter(self):
842 return self._pid_filter
843
844 @pid_filter.setter
845 def pid_filter(self, pid):
846 if pid != self._pid_filter:
847 self._pid_filter = pid
848 self.values = {}
849 for provider in self.providers:
850 provider.pid = self._pid_filter
851
852 def get(self):
853 """Returns a dict with field -> (value, delta to last value) of all
854 provider data."""
855 for provider in self.providers:
856 new = provider.read()
857 for key in provider.fields:
858 oldval = self.values.get(key, (0, 0))[0]
859 newval = new.get(key, 0)
860 newdelta = newval - oldval
861 self.values[key] = (newval, newdelta)
862 return self.values
863
864 DELAY_DEFAULT = 3.0
865 MAX_GUEST_NAME_LEN = 48
866 MAX_REGEX_LEN = 44
867 DEFAULT_REGEX = r'^[^\(]*$'
868 SORT_DEFAULT = 0
869
870
871 class Tui(object):
872 """Instruments curses to draw a nice text ui."""
873 def __init__(self, stats):
874 self.stats = stats
875 self.screen = None
876 self._delay_initial = 0.25
877 self._delay_regular = DELAY_DEFAULT
878 self._sorting = SORT_DEFAULT
879
880 def __enter__(self):
881 """Initialises curses for later use. Based on curses.wrapper
882 implementation from the Python standard library."""
883 self.screen = curses.initscr()
884 curses.noecho()
885 curses.cbreak()
886
887 # The try/catch works around a minor bit of
888 # over-conscientiousness in the curses module, the error
889 # return from C start_color() is ignorable.
890 try:
891 curses.start_color()
892 except curses.error:
893 pass
894
895 # Hide cursor in extra statement as some monochrome terminals
896 # might support hiding but not colors.
897 try:
898 curses.curs_set(0)
899 except curses.error:
900 pass
901
902 curses.use_default_colors()
903 return self
904
905 def __exit__(self, *exception):
906 """Resets the terminal to its normal state. Based on curses.wrapper
907 implementation from the Python standard library."""
908 if self.screen:
909 self.screen.keypad(0)
910 curses.echo()
911 curses.nocbreak()
912 curses.endwin()
913
914 def get_all_gnames(self):
915 """Returns a list of (pid, gname) tuples of all running guests"""
916 res = []
917 try:
918 child = subprocess.Popen(['ps', '-A', '--format', 'pid,args'],
919 stdout=subprocess.PIPE)
920 except:
921 raise Exception
922 for line in child.stdout:
923 line = line.lstrip().split(' ', 1)
924 # perform a sanity check before calling the more expensive
925 # function to possibly extract the guest name
926 if ' -name ' in line[1]:
927 res.append((line[0], self.get_gname_from_pid(line[0])))
928 child.stdout.close()
929
930 return res
931
932 def print_all_gnames(self, row):
933 """Print a list of all running guests along with their pids."""
934 self.screen.addstr(row, 2, '%8s %-60s' %
935 ('Pid', 'Guest Name (fuzzy list, might be '
936 'inaccurate!)'),
937 curses.A_UNDERLINE)
938 row += 1
939 try:
940 for line in self.get_all_gnames():
941 self.screen.addstr(row, 2, '%8s %-60s' % (line[0], line[1]))
942 row += 1
943 if row >= self.screen.getmaxyx()[0]:
944 break
945 except Exception:
946 self.screen.addstr(row + 1, 2, 'Not available')
947
948 def get_pid_from_gname(self, gname):
949 """Fuzzy function to convert guest name to QEMU process pid.
950
951 Returns a list of potential pids, can be empty if no match found.
952 Throws an exception on processing errors.
953
954 """
955 pids = []
956 for line in self.get_all_gnames():
957 if gname == line[1]:
958 pids.append(int(line[0]))
959
960 return pids
961
962 @staticmethod
963 def get_gname_from_pid(pid):
964 """Returns the guest name for a QEMU process pid.
965
966 Extracts the guest name from the QEMU comma line by processing the
967 '-name' option. Will also handle names specified out of sequence.
968
969 """
970 name = ''
971 try:
972 line = open('/proc/{}/cmdline'
973 .format(pid), 'rb').read().split('\0')
974 parms = line[line.index('-name') + 1].split(',')
975 while '' in parms:
976 # commas are escaped (i.e. ',,'), hence e.g. 'foo,bar' results
977 # in # ['foo', '', 'bar'], which we revert here
978 idx = parms.index('')
979 parms[idx - 1] += ',' + parms[idx + 1]
980 del parms[idx:idx+2]
981 # the '-name' switch allows for two ways to specify the guest name,
982 # where the plain name overrides the name specified via 'guest='
983 for arg in parms:
984 if '=' not in arg:
985 name = arg
986 break
987 if arg[:6] == 'guest=':
988 name = arg[6:]
989 except (ValueError, IOError, IndexError):
990 pass
991
992 return name
993
994 def update_drilldown(self):
995 """Sets or removes a filter that only allows fields without braces."""
996 if not self.stats.fields_filter:
997 self.stats.fields_filter = DEFAULT_REGEX
998
999 elif self.stats.fields_filter == DEFAULT_REGEX:
1000 self.stats.fields_filter = None
1001
1002 def update_pid(self, pid):
1003 """Propagates pid selection to stats object."""
1004 self.stats.pid_filter = pid
1005
1006 def refresh_header(self, pid=None):
1007 """Refreshes the header."""
1008 if pid is None:
1009 pid = self.stats.pid_filter
1010 self.screen.erase()
1011 gname = self.get_gname_from_pid(pid)
1012 if gname:
1013 gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...'
1014 if len(gname) > MAX_GUEST_NAME_LEN
1015 else gname))
1016 if pid > 0:
1017 self.screen.addstr(0, 0, 'kvm statistics - pid {0} {1}'
1018 .format(pid, gname), curses.A_BOLD)
1019 else:
1020 self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
1021 if self.stats.fields_filter and self.stats.fields_filter \
1022 != DEFAULT_REGEX:
1023 regex = self.stats.fields_filter
1024 if len(regex) > MAX_REGEX_LEN:
1025 regex = regex[:MAX_REGEX_LEN] + '...'
1026 self.screen.addstr(1, 17, 'regex filter: {0}'.format(regex))
1027 self.screen.addstr(2, 1, '%-40s %10s%7s %8s' %
1028 ('Event', 'Total', '%Total', 'CurAvg/s'),
1029 curses.A_STANDOUT)
1030 self.screen.addstr(4, 1, 'Collecting data...')
1031 self.screen.refresh()
1032
1033 def refresh_body(self, sleeptime):
1034 row = 3
1035 self.screen.move(row, 0)
1036 self.screen.clrtobot()
1037 stats = self.stats.get()
1038
1039 def sortCurAvg(x):
1040 # sort by current events if available
1041 if stats[x][1]:
1042 return (-stats[x][1], -stats[x][0])
1043 else:
1044 return (0, -stats[x][0])
1045
1046 def sortTotal(x):
1047 # sort by totals
1048 return (0, -stats[x][0])
1049 total = 0.
1050 for val in stats.values():
1051 total += val[0]
1052 if self._sorting == SORT_DEFAULT:
1053 sortkey = sortCurAvg
1054 else:
1055 sortkey = sortTotal
1056 for key in sorted(stats.keys(), key=sortkey):
1057
1058 if row >= self.screen.getmaxyx()[0]:
1059 break
1060 values = stats[key]
1061 if not values[0] and not values[1]:
1062 break
1063 if values[0] is not None:
1064 cur = int(round(values[1] / sleeptime)) if values[1] else ''
1065 self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' %
1066 (key, values[0], values[0] * 100 / total,
1067 cur))
1068 row += 1
1069 if row == 3:
1070 self.screen.addstr(4, 1, 'No matching events reported yet')
1071 self.screen.refresh()
1072
1073 def show_help_interactive(self):
1074 """Display help with list of interactive commands"""
1075 msg = (' c clear filter',
1076 ' f filter by regular expression',
1077 ' g filter by guest name',
1078 ' h display interactive commands reference',
1079 ' o toggle sorting order (Total vs CurAvg/s)',
1080 ' p filter by PID',
1081 ' q quit',
1082 ' r reset stats',
1083 ' s set update interval',
1084 ' x toggle reporting of stats for individual child trace'
1085 ' events',
1086 'Any other key refreshes statistics immediately')
1087 curses.cbreak()
1088 self.screen.erase()
1089 self.screen.addstr(0, 0, "Interactive commands reference",
1090 curses.A_BOLD)
1091 self.screen.addstr(2, 0, "Press any key to exit", curses.A_STANDOUT)
1092 row = 4
1093 for line in msg:
1094 self.screen.addstr(row, 0, line)
1095 row += 1
1096 self.screen.getkey()
1097 self.refresh_header()
1098
1099 def show_filter_selection(self):
1100 """Draws filter selection mask.
1101
1102 Asks for a valid regex and sets the fields filter accordingly.
1103
1104 """
1105 while True:
1106 self.screen.erase()
1107 self.screen.addstr(0, 0,
1108 "Show statistics for events matching a regex.",
1109 curses.A_BOLD)
1110 self.screen.addstr(2, 0,
1111 "Current regex: {0}"
1112 .format(self.stats.fields_filter))
1113 self.screen.addstr(3, 0, "New regex: ")
1114 curses.echo()
1115 regex = self.screen.getstr()
1116 curses.noecho()
1117 if len(regex) == 0:
1118 self.stats.fields_filter = DEFAULT_REGEX
1119 self.refresh_header()
1120 return
1121 try:
1122 re.compile(regex)
1123 self.stats.fields_filter = regex
1124 self.refresh_header()
1125 return
1126 except re.error:
1127 continue
1128
1129 def show_vm_selection_by_pid(self):
1130 """Draws PID selection mask.
1131
1132 Asks for a pid until a valid pid or 0 has been entered.
1133
1134 """
1135 msg = ''
1136 while True:
1137 self.screen.erase()
1138 self.screen.addstr(0, 0,
1139 'Show statistics for specific pid.',
1140 curses.A_BOLD)
1141 self.screen.addstr(1, 0,
1142 'This might limit the shown data to the trace '
1143 'statistics.')
1144 self.screen.addstr(5, 0, msg)
1145 self.print_all_gnames(7)
1146
1147 curses.echo()
1148 self.screen.addstr(3, 0, "Pid [0 or pid]: ")
1149 pid = self.screen.getstr()
1150 curses.noecho()
1151
1152 try:
1153 if len(pid) > 0:
1154 pid = int(pid)
1155 if pid != 0 and not os.path.isdir(os.path.join('/proc/',
1156 str(pid))):
1157 msg = '"' + str(pid) + '": Not a running process'
1158 continue
1159 else:
1160 pid = 0
1161 self.refresh_header(pid)
1162 self.update_pid(pid)
1163 break
1164 except ValueError:
1165 msg = '"' + str(pid) + '": Not a valid pid'
1166
1167 def show_set_update_interval(self):
1168 """Draws update interval selection mask."""
1169 msg = ''
1170 while True:
1171 self.screen.erase()
1172 self.screen.addstr(0, 0, 'Set update interval (defaults to %fs).' %
1173 DELAY_DEFAULT, curses.A_BOLD)
1174 self.screen.addstr(4, 0, msg)
1175 self.screen.addstr(2, 0, 'Change delay from %.1fs to ' %
1176 self._delay_regular)
1177 curses.echo()
1178 val = self.screen.getstr()
1179 curses.noecho()
1180
1181 try:
1182 if len(val) > 0:
1183 delay = float(val)
1184 if delay < 0.1:
1185 msg = '"' + str(val) + '": Value must be >=0.1'
1186 continue
1187 if delay > 25.5:
1188 msg = '"' + str(val) + '": Value must be <=25.5'
1189 continue
1190 else:
1191 delay = DELAY_DEFAULT
1192 self._delay_regular = delay
1193 break
1194
1195 except ValueError:
1196 msg = '"' + str(val) + '": Invalid value'
1197 self.refresh_header()
1198
1199 def show_vm_selection_by_guest_name(self):
1200 """Draws guest selection mask.
1201
1202 Asks for a guest name until a valid guest name or '' is entered.
1203
1204 """
1205 msg = ''
1206 while True:
1207 self.screen.erase()
1208 self.screen.addstr(0, 0,
1209 'Show statistics for specific guest.',
1210 curses.A_BOLD)
1211 self.screen.addstr(1, 0,
1212 'This might limit the shown data to the trace '
1213 'statistics.')
1214 self.screen.addstr(5, 0, msg)
1215 self.print_all_gnames(7)
1216 curses.echo()
1217 self.screen.addstr(3, 0, "Guest [ENTER or guest]: ")
1218 gname = self.screen.getstr()
1219 curses.noecho()
1220
1221 if not gname:
1222 self.refresh_header(0)
1223 self.update_pid(0)
1224 break
1225 else:
1226 pids = []
1227 try:
1228 pids = self.get_pid_from_gname(gname)
1229 except:
1230 msg = '"' + gname + '": Internal error while searching, ' \
1231 'use pid filter instead'
1232 continue
1233 if len(pids) == 0:
1234 msg = '"' + gname + '": Not an active guest'
1235 continue
1236 if len(pids) > 1:
1237 msg = '"' + gname + '": Multiple matches found, use pid ' \
1238 'filter instead'
1239 continue
1240 self.refresh_header(pids[0])
1241 self.update_pid(pids[0])
1242 break
1243
1244 def show_stats(self):
1245 """Refreshes the screen and processes user input."""
1246 sleeptime = self._delay_initial
1247 self.refresh_header()
1248 start = 0.0 # result based on init value never appears on screen
1249 while True:
1250 self.refresh_body(time.time() - start)
1251 curses.halfdelay(int(sleeptime * 10))
1252 start = time.time()
1253 sleeptime = self._delay_regular
1254 try:
1255 char = self.screen.getkey()
1256 if char == 'c':
1257 self.stats.fields_filter = DEFAULT_REGEX
1258 self.refresh_header(0)
1259 self.update_pid(0)
1260 if char == 'f':
1261 curses.curs_set(1)
1262 self.show_filter_selection()
1263 curses.curs_set(0)
1264 sleeptime = self._delay_initial
1265 if char == 'g':
1266 curses.curs_set(1)
1267 self.show_vm_selection_by_guest_name()
1268 curses.curs_set(0)
1269 sleeptime = self._delay_initial
1270 if char == 'h':
1271 self.show_help_interactive()
1272 if char == 'o':
1273 self._sorting = not self._sorting
1274 if char == 'p':
1275 curses.curs_set(1)
1276 self.show_vm_selection_by_pid()
1277 curses.curs_set(0)
1278 sleeptime = self._delay_initial
1279 if char == 'q':
1280 break
1281 if char == 'r':
1282 self.stats.reset()
1283 if char == 's':
1284 curses.curs_set(1)
1285 self.show_set_update_interval()
1286 curses.curs_set(0)
1287 sleeptime = self._delay_initial
1288 if char == 'x':
1289 self.update_drilldown()
1290 # prevents display of current values on next refresh
1291 self.stats.get()
1292 except KeyboardInterrupt:
1293 break
1294 except curses.error:
1295 continue
1296
1297
1298 def batch(stats):
1299 """Prints statistics in a key, value format."""
1300 try:
1301 s = stats.get()
1302 time.sleep(1)
1303 s = stats.get()
1304 for key in sorted(s.keys()):
1305 values = s[key]
1306 print '%-42s%10d%10d' % (key, values[0], values[1])
1307 except KeyboardInterrupt:
1308 pass
1309
1310
1311 def log(stats):
1312 """Prints statistics as reiterating key block, multiple value blocks."""
1313 keys = sorted(stats.get().iterkeys())
1314
1315 def banner():
1316 for k in keys:
1317 print '%s' % k,
1318 print
1319
1320 def statline():
1321 s = stats.get()
1322 for k in keys:
1323 print ' %9d' % s[k][1],
1324 print
1325 line = 0
1326 banner_repeat = 20
1327 while True:
1328 try:
1329 time.sleep(1)
1330 if line % banner_repeat == 0:
1331 banner()
1332 statline()
1333 line += 1
1334 except KeyboardInterrupt:
1335 break
1336
1337
1338 def get_options():
1339 """Returns processed program arguments."""
1340 description_text = """
1341 This script displays various statistics about VMs running under KVM.
1342 The statistics are gathered from the KVM debugfs entries and / or the
1343 currently available perf traces.
1344
1345 The monitoring takes additional cpu cycles and might affect the VM's
1346 performance.
1347
1348 Requirements:
1349 - Access to:
1350 /sys/kernel/debug/kvm
1351 /sys/kernel/debug/trace/events/*
1352 /proc/pid/task
1353 - /proc/sys/kernel/perf_event_paranoid < 1 if user has no
1354 CAP_SYS_ADMIN and perf events are used.
1355 - CAP_SYS_RESOURCE if the hard limit is not high enough to allow
1356 the large number of files that are possibly opened.
1357
1358 Interactive Commands:
1359 c clear filter
1360 f filter by regular expression
1361 g filter by guest name
1362 h display interactive commands reference
1363 o toggle sorting order (Total vs CurAvg/s)
1364 p filter by PID
1365 q quit
1366 r reset stats
1367 s set update interval
1368 x toggle reporting of stats for individual child trace events
1369 Press any other key to refresh statistics immediately.
1370 """
1371
1372 class PlainHelpFormatter(optparse.IndentedHelpFormatter):
1373 def format_description(self, description):
1374 if description:
1375 return description + "\n"
1376 else:
1377 return ""
1378
1379 def cb_guest_to_pid(option, opt, val, parser):
1380 try:
1381 pids = Tui.get_pid_from_gname(val)
1382 except:
1383 raise optparse.OptionValueError('Error while searching for guest '
1384 '"{}", use "-p" to specify a pid '
1385 'instead'.format(val))
1386 if len(pids) == 0:
1387 raise optparse.OptionValueError('No guest by the name "{}" '
1388 'found'.format(val))
1389 if len(pids) > 1:
1390 raise optparse.OptionValueError('Multiple processes found (pids: '
1391 '{}) - use "-p" to specify a pid '
1392 'instead'.format(" ".join(pids)))
1393 parser.values.pid = pids[0]
1394
1395 optparser = optparse.OptionParser(description=description_text,
1396 formatter=PlainHelpFormatter())
1397 optparser.add_option('-1', '--once', '--batch',
1398 action='store_true',
1399 default=False,
1400 dest='once',
1401 help='run in batch mode for one second',
1402 )
1403 optparser.add_option('-i', '--debugfs-include-past',
1404 action='store_true',
1405 default=False,
1406 dest='dbgfs_include_past',
1407 help='include all available data on past events for '
1408 'debugfs',
1409 )
1410 optparser.add_option('-l', '--log',
1411 action='store_true',
1412 default=False,
1413 dest='log',
1414 help='run in logging mode (like vmstat)',
1415 )
1416 optparser.add_option('-t', '--tracepoints',
1417 action='store_true',
1418 default=False,
1419 dest='tracepoints',
1420 help='retrieve statistics from tracepoints',
1421 )
1422 optparser.add_option('-d', '--debugfs',
1423 action='store_true',
1424 default=False,
1425 dest='debugfs',
1426 help='retrieve statistics from debugfs',
1427 )
1428 optparser.add_option('-f', '--fields',
1429 action='store',
1430 default=DEFAULT_REGEX,
1431 dest='fields',
1432 help='fields to display (regex)',
1433 )
1434 optparser.add_option('-p', '--pid',
1435 action='store',
1436 default=0,
1437 type='int',
1438 dest='pid',
1439 help='restrict statistics to pid',
1440 )
1441 optparser.add_option('-g', '--guest',
1442 action='callback',
1443 type='string',
1444 dest='pid',
1445 metavar='GUEST',
1446 help='restrict statistics to guest by name',
1447 callback=cb_guest_to_pid,
1448 )
1449 (options, _) = optparser.parse_args(sys.argv)
1450 return options
1451
1452
1453 def check_access(options):
1454 """Exits if the current user can't access all needed directories."""
1455 if not os.path.exists('/sys/kernel/debug'):
1456 sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.')
1457 sys.exit(1)
1458
1459 if not os.path.exists(PATH_DEBUGFS_KVM):
1460 sys.stderr.write("Please make sure, that debugfs is mounted and "
1461 "readable by the current user:\n"
1462 "('mount -t debugfs debugfs /sys/kernel/debug')\n"
1463 "Also ensure, that the kvm modules are loaded.\n")
1464 sys.exit(1)
1465
1466 if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or
1467 not options.debugfs):
1468 sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
1469 "when using the option -t (default).\n"
1470 "If it is enabled, make {0} readable by the "
1471 "current user.\n"
1472 .format(PATH_DEBUGFS_TRACING))
1473 if options.tracepoints:
1474 sys.exit(1)
1475
1476 sys.stderr.write("Falling back to debugfs statistics!\n")
1477 options.debugfs = True
1478 time.sleep(5)
1479
1480 return options
1481
1482
1483 def main():
1484 options = get_options()
1485 options = check_access(options)
1486
1487 if (options.pid > 0 and
1488 not os.path.isdir(os.path.join('/proc/',
1489 str(options.pid)))):
1490 sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n')
1491 sys.exit('Specified pid does not exist.')
1492
1493 stats = Stats(options)
1494
1495 if options.log:
1496 log(stats)
1497 elif not options.once:
1498 with Tui(stats) as tui:
1499 tui.show_stats()
1500 else:
1501 batch(stats)
1502
1503 if __name__ == "__main__":
1504 main()