]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - tools/kvm/kvm_stat/kvm_stat
tools/kvm_stat: add option '--guest'
[mirror_ubuntu-bionic-kernel.git] / tools / kvm / kvm_stat / kvm_stat
1 #!/usr/bin/python
2 #
3 # top-like utility for displaying kvm statistics
4 #
5 # Copyright 2006-2008 Qumranet Technologies
6 # Copyright 2008-2011 Red Hat, Inc.
7 #
8 # Authors:
9 # Avi Kivity <avi@redhat.com>
10 #
11 # This work is licensed under the terms of the GNU GPL, version 2. See
12 # the COPYING file in the top-level directory.
13 """The kvm_stat module outputs statistics about running KVM VMs
14
15 Three different ways of output formatting are available:
16 - as a top-like text ui
17 - in a key -> value format
18 - in an all keys, all values format
19
20 The data is sampled from the KVM's debugfs entries and its perf events.
21 """
22
23 import curses
24 import sys
25 import os
26 import time
27 import optparse
28 import ctypes
29 import fcntl
30 import resource
31 import struct
32 import re
33 import subprocess
34 from collections import defaultdict
35
36 VMX_EXIT_REASONS = {
37 'EXCEPTION_NMI': 0,
38 'EXTERNAL_INTERRUPT': 1,
39 'TRIPLE_FAULT': 2,
40 'PENDING_INTERRUPT': 7,
41 'NMI_WINDOW': 8,
42 'TASK_SWITCH': 9,
43 'CPUID': 10,
44 'HLT': 12,
45 'INVLPG': 14,
46 'RDPMC': 15,
47 'RDTSC': 16,
48 'VMCALL': 18,
49 'VMCLEAR': 19,
50 'VMLAUNCH': 20,
51 'VMPTRLD': 21,
52 'VMPTRST': 22,
53 'VMREAD': 23,
54 'VMRESUME': 24,
55 'VMWRITE': 25,
56 'VMOFF': 26,
57 'VMON': 27,
58 'CR_ACCESS': 28,
59 'DR_ACCESS': 29,
60 'IO_INSTRUCTION': 30,
61 'MSR_READ': 31,
62 'MSR_WRITE': 32,
63 'INVALID_STATE': 33,
64 'MWAIT_INSTRUCTION': 36,
65 'MONITOR_INSTRUCTION': 39,
66 'PAUSE_INSTRUCTION': 40,
67 'MCE_DURING_VMENTRY': 41,
68 'TPR_BELOW_THRESHOLD': 43,
69 'APIC_ACCESS': 44,
70 'EPT_VIOLATION': 48,
71 'EPT_MISCONFIG': 49,
72 'WBINVD': 54,
73 'XSETBV': 55,
74 'APIC_WRITE': 56,
75 'INVPCID': 58,
76 }
77
78 SVM_EXIT_REASONS = {
79 'READ_CR0': 0x000,
80 'READ_CR3': 0x003,
81 'READ_CR4': 0x004,
82 'READ_CR8': 0x008,
83 'WRITE_CR0': 0x010,
84 'WRITE_CR3': 0x013,
85 'WRITE_CR4': 0x014,
86 'WRITE_CR8': 0x018,
87 'READ_DR0': 0x020,
88 'READ_DR1': 0x021,
89 'READ_DR2': 0x022,
90 'READ_DR3': 0x023,
91 'READ_DR4': 0x024,
92 'READ_DR5': 0x025,
93 'READ_DR6': 0x026,
94 'READ_DR7': 0x027,
95 'WRITE_DR0': 0x030,
96 'WRITE_DR1': 0x031,
97 'WRITE_DR2': 0x032,
98 'WRITE_DR3': 0x033,
99 'WRITE_DR4': 0x034,
100 'WRITE_DR5': 0x035,
101 'WRITE_DR6': 0x036,
102 'WRITE_DR7': 0x037,
103 'EXCP_BASE': 0x040,
104 'INTR': 0x060,
105 'NMI': 0x061,
106 'SMI': 0x062,
107 'INIT': 0x063,
108 'VINTR': 0x064,
109 'CR0_SEL_WRITE': 0x065,
110 'IDTR_READ': 0x066,
111 'GDTR_READ': 0x067,
112 'LDTR_READ': 0x068,
113 'TR_READ': 0x069,
114 'IDTR_WRITE': 0x06a,
115 'GDTR_WRITE': 0x06b,
116 'LDTR_WRITE': 0x06c,
117 'TR_WRITE': 0x06d,
118 'RDTSC': 0x06e,
119 'RDPMC': 0x06f,
120 'PUSHF': 0x070,
121 'POPF': 0x071,
122 'CPUID': 0x072,
123 'RSM': 0x073,
124 'IRET': 0x074,
125 'SWINT': 0x075,
126 'INVD': 0x076,
127 'PAUSE': 0x077,
128 'HLT': 0x078,
129 'INVLPG': 0x079,
130 'INVLPGA': 0x07a,
131 'IOIO': 0x07b,
132 'MSR': 0x07c,
133 'TASK_SWITCH': 0x07d,
134 'FERR_FREEZE': 0x07e,
135 'SHUTDOWN': 0x07f,
136 'VMRUN': 0x080,
137 'VMMCALL': 0x081,
138 'VMLOAD': 0x082,
139 'VMSAVE': 0x083,
140 'STGI': 0x084,
141 'CLGI': 0x085,
142 'SKINIT': 0x086,
143 'RDTSCP': 0x087,
144 'ICEBP': 0x088,
145 'WBINVD': 0x089,
146 'MONITOR': 0x08a,
147 'MWAIT': 0x08b,
148 'MWAIT_COND': 0x08c,
149 'XSETBV': 0x08d,
150 'NPF': 0x400,
151 }
152
153 # EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
154 AARCH64_EXIT_REASONS = {
155 'UNKNOWN': 0x00,
156 'WFI': 0x01,
157 'CP15_32': 0x03,
158 'CP15_64': 0x04,
159 'CP14_MR': 0x05,
160 'CP14_LS': 0x06,
161 'FP_ASIMD': 0x07,
162 'CP10_ID': 0x08,
163 'CP14_64': 0x0C,
164 'ILL_ISS': 0x0E,
165 'SVC32': 0x11,
166 'HVC32': 0x12,
167 'SMC32': 0x13,
168 'SVC64': 0x15,
169 'HVC64': 0x16,
170 'SMC64': 0x17,
171 'SYS64': 0x18,
172 'IABT': 0x20,
173 'IABT_HYP': 0x21,
174 'PC_ALIGN': 0x22,
175 'DABT': 0x24,
176 'DABT_HYP': 0x25,
177 'SP_ALIGN': 0x26,
178 'FP_EXC32': 0x28,
179 'FP_EXC64': 0x2C,
180 'SERROR': 0x2F,
181 'BREAKPT': 0x30,
182 'BREAKPT_HYP': 0x31,
183 'SOFTSTP': 0x32,
184 'SOFTSTP_HYP': 0x33,
185 'WATCHPT': 0x34,
186 'WATCHPT_HYP': 0x35,
187 'BKPT32': 0x38,
188 'VECTOR32': 0x3A,
189 'BRK64': 0x3C,
190 }
191
192 # From include/uapi/linux/kvm.h, KVM_EXIT_xxx
193 USERSPACE_EXIT_REASONS = {
194 'UNKNOWN': 0,
195 'EXCEPTION': 1,
196 'IO': 2,
197 'HYPERCALL': 3,
198 'DEBUG': 4,
199 'HLT': 5,
200 'MMIO': 6,
201 'IRQ_WINDOW_OPEN': 7,
202 'SHUTDOWN': 8,
203 'FAIL_ENTRY': 9,
204 'INTR': 10,
205 'SET_TPR': 11,
206 'TPR_ACCESS': 12,
207 'S390_SIEIC': 13,
208 'S390_RESET': 14,
209 'DCR': 15,
210 'NMI': 16,
211 'INTERNAL_ERROR': 17,
212 'OSI': 18,
213 'PAPR_HCALL': 19,
214 'S390_UCONTROL': 20,
215 'WATCHDOG': 21,
216 'S390_TSCH': 22,
217 'EPR': 23,
218 'SYSTEM_EVENT': 24,
219 }
220
221 IOCTL_NUMBERS = {
222 'SET_FILTER': 0x40082406,
223 'ENABLE': 0x00002400,
224 'DISABLE': 0x00002401,
225 'RESET': 0x00002403,
226 }
227
228
229 class Arch(object):
230 """Encapsulates global architecture specific data.
231
232 Contains the performance event open syscall and ioctl numbers, as
233 well as the VM exit reasons for the architecture it runs on.
234
235 """
236 @staticmethod
237 def get_arch():
238 machine = os.uname()[4]
239
240 if machine.startswith('ppc'):
241 return ArchPPC()
242 elif machine.startswith('aarch64'):
243 return ArchA64()
244 elif machine.startswith('s390'):
245 return ArchS390()
246 else:
247 # X86_64
248 for line in open('/proc/cpuinfo'):
249 if not line.startswith('flags'):
250 continue
251
252 flags = line.split()
253 if 'vmx' in flags:
254 return ArchX86(VMX_EXIT_REASONS)
255 if 'svm' in flags:
256 return ArchX86(SVM_EXIT_REASONS)
257 return
258
259
260 class ArchX86(Arch):
261 def __init__(self, exit_reasons):
262 self.sc_perf_evt_open = 298
263 self.ioctl_numbers = IOCTL_NUMBERS
264 self.exit_reasons = exit_reasons
265
266
267 class ArchPPC(Arch):
268 def __init__(self):
269 self.sc_perf_evt_open = 319
270 self.ioctl_numbers = IOCTL_NUMBERS
271 self.ioctl_numbers['ENABLE'] = 0x20002400
272 self.ioctl_numbers['DISABLE'] = 0x20002401
273 self.ioctl_numbers['RESET'] = 0x20002403
274
275 # PPC comes in 32 and 64 bit and some generated ioctl
276 # numbers depend on the wordsize.
277 char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
278 self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
279 self.exit_reasons = {}
280
281
282 class ArchA64(Arch):
283 def __init__(self):
284 self.sc_perf_evt_open = 241
285 self.ioctl_numbers = IOCTL_NUMBERS
286 self.exit_reasons = AARCH64_EXIT_REASONS
287
288
289 class ArchS390(Arch):
290 def __init__(self):
291 self.sc_perf_evt_open = 331
292 self.ioctl_numbers = IOCTL_NUMBERS
293 self.exit_reasons = None
294
295 ARCH = Arch.get_arch()
296
297
298 def walkdir(path):
299 """Returns os.walk() data for specified directory.
300
301 As it is only a wrapper it returns the same 3-tuple of (dirpath,
302 dirnames, filenames).
303 """
304 return next(os.walk(path))
305
306
307 def parse_int_list(list_string):
308 """Returns an int list from a string of comma separated integers and
309 integer ranges."""
310 integers = []
311 members = list_string.split(',')
312
313 for member in members:
314 if '-' not in member:
315 integers.append(int(member))
316 else:
317 int_range = member.split('-')
318 integers.extend(range(int(int_range[0]),
319 int(int_range[1]) + 1))
320
321 return integers
322
323
324 def get_pid_from_gname(gname):
325 """Fuzzy function to convert guest name to QEMU process pid.
326
327 Returns a list of potential pids, can be empty if no match found.
328 Throws an exception on processing errors.
329
330 """
331 pids = []
332 try:
333 child = subprocess.Popen(['ps', '-A', '--format', 'pid,args'],
334 stdout=subprocess.PIPE)
335 except:
336 raise Exception
337 for line in child.stdout:
338 line = line.lstrip().split(' ', 1)
339 # perform a sanity check before calling the more expensive
340 # function to possibly extract the guest name
341 if ' -name ' in line[1] and gname == get_gname_from_pid(line[0]):
342 pids.append(int(line[0]))
343 child.stdout.close()
344
345 return pids
346
347
348 def get_gname_from_pid(pid):
349 """Returns the guest name for a QEMU process pid.
350
351 Extracts the guest name from the QEMU comma line by processing the '-name'
352 option. Will also handle names specified out of sequence.
353
354 """
355 name = ''
356 try:
357 line = open('/proc/{}/cmdline'.format(pid), 'rb').read().split('\0')
358 parms = line[line.index('-name') + 1].split(',')
359 while '' in parms:
360 # commas are escaped (i.e. ',,'), hence e.g. 'foo,bar' results in
361 # ['foo', '', 'bar'], which we revert here
362 idx = parms.index('')
363 parms[idx - 1] += ',' + parms[idx + 1]
364 del parms[idx:idx+2]
365 # the '-name' switch allows for two ways to specify the guest name,
366 # where the plain name overrides the name specified via 'guest='
367 for arg in parms:
368 if '=' not in arg:
369 name = arg
370 break
371 if arg[:6] == 'guest=':
372 name = arg[6:]
373 except (ValueError, IOError, IndexError):
374 pass
375
376 return name
377
378
379 def get_online_cpus():
380 """Returns a list of cpu id integers."""
381 with open('/sys/devices/system/cpu/online') as cpu_list:
382 cpu_string = cpu_list.readline()
383 return parse_int_list(cpu_string)
384
385
386 def get_filters():
387 """Returns a dict of trace events, their filter ids and
388 the values that can be filtered.
389
390 Trace events can be filtered for special values by setting a
391 filter string via an ioctl. The string normally has the format
392 identifier==value. For each filter a new event will be created, to
393 be able to distinguish the events.
394
395 """
396 filters = {}
397 filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
398 if ARCH.exit_reasons:
399 filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
400 return filters
401
402 libc = ctypes.CDLL('libc.so.6', use_errno=True)
403 syscall = libc.syscall
404
405
406 class perf_event_attr(ctypes.Structure):
407 """Struct that holds the necessary data to set up a trace event.
408
409 For an extensive explanation see perf_event_open(2) and
410 include/uapi/linux/perf_event.h, struct perf_event_attr
411
412 All fields that are not initialized in the constructor are 0.
413
414 """
415 _fields_ = [('type', ctypes.c_uint32),
416 ('size', ctypes.c_uint32),
417 ('config', ctypes.c_uint64),
418 ('sample_freq', ctypes.c_uint64),
419 ('sample_type', ctypes.c_uint64),
420 ('read_format', ctypes.c_uint64),
421 ('flags', ctypes.c_uint64),
422 ('wakeup_events', ctypes.c_uint32),
423 ('bp_type', ctypes.c_uint32),
424 ('bp_addr', ctypes.c_uint64),
425 ('bp_len', ctypes.c_uint64),
426 ]
427
428 def __init__(self):
429 super(self.__class__, self).__init__()
430 self.type = PERF_TYPE_TRACEPOINT
431 self.size = ctypes.sizeof(self)
432 self.read_format = PERF_FORMAT_GROUP
433
434
435 def perf_event_open(attr, pid, cpu, group_fd, flags):
436 """Wrapper for the sys_perf_evt_open() syscall.
437
438 Used to set up performance events, returns a file descriptor or -1
439 on error.
440
441 Attributes are:
442 - syscall number
443 - struct perf_event_attr *
444 - pid or -1 to monitor all pids
445 - cpu number or -1 to monitor all cpus
446 - The file descriptor of the group leader or -1 to create a group.
447 - flags
448
449 """
450 return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr),
451 ctypes.c_int(pid), ctypes.c_int(cpu),
452 ctypes.c_int(group_fd), ctypes.c_long(flags))
453
454 PERF_TYPE_TRACEPOINT = 2
455 PERF_FORMAT_GROUP = 1 << 3
456
457 PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
458 PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
459
460
461 class Group(object):
462 """Represents a perf event group."""
463
464 def __init__(self):
465 self.events = []
466
467 def add_event(self, event):
468 self.events.append(event)
469
470 def read(self):
471 """Returns a dict with 'event name: value' for all events in the
472 group.
473
474 Values are read by reading from the file descriptor of the
475 event that is the group leader. See perf_event_open(2) for
476 details.
477
478 Read format for the used event configuration is:
479 struct read_format {
480 u64 nr; /* The number of events */
481 struct {
482 u64 value; /* The value of the event */
483 } values[nr];
484 };
485
486 """
487 length = 8 * (1 + len(self.events))
488 read_format = 'xxxxxxxx' + 'Q' * len(self.events)
489 return dict(zip([event.name for event in self.events],
490 struct.unpack(read_format,
491 os.read(self.events[0].fd, length))))
492
493
494 class Event(object):
495 """Represents a performance event and manages its life cycle."""
496 def __init__(self, name, group, trace_cpu, trace_pid, trace_point,
497 trace_filter, trace_set='kvm'):
498 self.name = name
499 self.fd = None
500 self.setup_event(group, trace_cpu, trace_pid, trace_point,
501 trace_filter, trace_set)
502
503 def __del__(self):
504 """Closes the event's file descriptor.
505
506 As no python file object was created for the file descriptor,
507 python will not reference count the descriptor and will not
508 close it itself automatically, so we do it.
509
510 """
511 if self.fd:
512 os.close(self.fd)
513
514 def setup_event_attribute(self, trace_set, trace_point):
515 """Returns an initialized ctype perf_event_attr struct."""
516
517 id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
518 trace_point, 'id')
519
520 event_attr = perf_event_attr()
521 event_attr.config = int(open(id_path).read())
522 return event_attr
523
524 def setup_event(self, group, trace_cpu, trace_pid, trace_point,
525 trace_filter, trace_set):
526 """Sets up the perf event in Linux.
527
528 Issues the syscall to register the event in the kernel and
529 then sets the optional filter.
530
531 """
532
533 event_attr = self.setup_event_attribute(trace_set, trace_point)
534
535 # First event will be group leader.
536 group_leader = -1
537
538 # All others have to pass the leader's descriptor instead.
539 if group.events:
540 group_leader = group.events[0].fd
541
542 fd = perf_event_open(event_attr, trace_pid,
543 trace_cpu, group_leader, 0)
544 if fd == -1:
545 err = ctypes.get_errno()
546 raise OSError(err, os.strerror(err),
547 'while calling sys_perf_event_open().')
548
549 if trace_filter:
550 fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'],
551 trace_filter)
552
553 self.fd = fd
554
555 def enable(self):
556 """Enables the trace event in the kernel.
557
558 Enabling the group leader makes reading counters from it and the
559 events under it possible.
560
561 """
562 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0)
563
564 def disable(self):
565 """Disables the trace event in the kernel.
566
567 Disabling the group leader makes reading all counters under it
568 impossible.
569
570 """
571 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0)
572
573 def reset(self):
574 """Resets the count of the trace event in the kernel."""
575 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
576
577
578 class TracepointProvider(object):
579 """Data provider for the stats class.
580
581 Manages the events/groups from which it acquires its data.
582
583 """
584 def __init__(self):
585 self.group_leaders = []
586 self.filters = get_filters()
587 self._fields = self.get_available_fields()
588 self._pid = 0
589
590 def get_available_fields(self):
591 """Returns a list of available event's of format 'event name(filter
592 name)'.
593
594 All available events have directories under
595 /sys/kernel/debug/tracing/events/ which export information
596 about the specific event. Therefore, listing the dirs gives us
597 a list of all available events.
598
599 Some events like the vm exit reasons can be filtered for
600 specific values. To take account for that, the routine below
601 creates special fields with the following format:
602 event name(filter name)
603
604 """
605 path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
606 fields = walkdir(path)[1]
607 extra = []
608 for field in fields:
609 if field in self.filters:
610 filter_name_, filter_dicts = self.filters[field]
611 for name in filter_dicts:
612 extra.append(field + '(' + name + ')')
613 fields += extra
614 return fields
615
616 def setup_traces(self):
617 """Creates all event and group objects needed to be able to retrieve
618 data."""
619 fields = self.get_available_fields()
620 if self._pid > 0:
621 # Fetch list of all threads of the monitored pid, as qemu
622 # starts a thread for each vcpu.
623 path = os.path.join('/proc', str(self._pid), 'task')
624 groupids = walkdir(path)[1]
625 else:
626 groupids = get_online_cpus()
627
628 # The constant is needed as a buffer for python libs, std
629 # streams and other files that the script opens.
630 newlim = len(groupids) * len(fields) + 50
631 try:
632 softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
633
634 if hardlim < newlim:
635 # Now we need CAP_SYS_RESOURCE, to increase the hard limit.
636 resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim))
637 else:
638 # Raising the soft limit is sufficient.
639 resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim))
640
641 except ValueError:
642 sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
643
644 for groupid in groupids:
645 group = Group()
646 for name in fields:
647 tracepoint = name
648 tracefilter = None
649 match = re.match(r'(.*)\((.*)\)', name)
650 if match:
651 tracepoint, sub = match.groups()
652 tracefilter = ('%s==%d\0' %
653 (self.filters[tracepoint][0],
654 self.filters[tracepoint][1][sub]))
655
656 # From perf_event_open(2):
657 # pid > 0 and cpu == -1
658 # This measures the specified process/thread on any CPU.
659 #
660 # pid == -1 and cpu >= 0
661 # This measures all processes/threads on the specified CPU.
662 trace_cpu = groupid if self._pid == 0 else -1
663 trace_pid = int(groupid) if self._pid != 0 else -1
664
665 group.add_event(Event(name=name,
666 group=group,
667 trace_cpu=trace_cpu,
668 trace_pid=trace_pid,
669 trace_point=tracepoint,
670 trace_filter=tracefilter))
671
672 self.group_leaders.append(group)
673
674 def available_fields(self):
675 return self.get_available_fields()
676
677 @property
678 def fields(self):
679 return self._fields
680
681 @fields.setter
682 def fields(self, fields):
683 """Enables/disables the (un)wanted events"""
684 self._fields = fields
685 for group in self.group_leaders:
686 for index, event in enumerate(group.events):
687 if event.name in fields:
688 event.reset()
689 event.enable()
690 else:
691 # Do not disable the group leader.
692 # It would disable all of its events.
693 if index != 0:
694 event.disable()
695
696 @property
697 def pid(self):
698 return self._pid
699
700 @pid.setter
701 def pid(self, pid):
702 """Changes the monitored pid by setting new traces."""
703 self._pid = pid
704 # The garbage collector will get rid of all Event/Group
705 # objects and open files after removing the references.
706 self.group_leaders = []
707 self.setup_traces()
708 self.fields = self._fields
709
710 def read(self):
711 """Returns 'event name: current value' for all enabled events."""
712 ret = defaultdict(int)
713 for group in self.group_leaders:
714 for name, val in group.read().iteritems():
715 if name in self._fields:
716 ret[name] += val
717 return ret
718
719
720 class DebugfsProvider(object):
721 """Provides data from the files that KVM creates in the kvm debugfs
722 folder."""
723 def __init__(self):
724 self._fields = self.get_available_fields()
725 self._pid = 0
726 self.do_read = True
727 self.paths = []
728
729 def get_available_fields(self):
730 """"Returns a list of available fields.
731
732 The fields are all available KVM debugfs files
733
734 """
735 return walkdir(PATH_DEBUGFS_KVM)[2]
736
737 @property
738 def fields(self):
739 return self._fields
740
741 @fields.setter
742 def fields(self, fields):
743 self._fields = fields
744
745 @property
746 def pid(self):
747 return self._pid
748
749 @pid.setter
750 def pid(self, pid):
751 if pid != 0:
752 self._pid = pid
753
754 vms = walkdir(PATH_DEBUGFS_KVM)[1]
755 if len(vms) == 0:
756 self.do_read = False
757
758 self.paths = filter(lambda x: "{}-".format(pid) in x, vms)
759
760 else:
761 self.paths = ['']
762 self.do_read = True
763
764 def read(self):
765 """Returns a dict with format:'file name / field -> current value'."""
766 results = {}
767
768 # If no debugfs filtering support is available, then don't read.
769 if not self.do_read:
770 return results
771
772 for path in self.paths:
773 for field in self._fields:
774 results[field] = results.get(field, 0) \
775 + self.read_field(field, path)
776
777 return results
778
779 def read_field(self, field, path):
780 """Returns the value of a single field from a specific VM."""
781 try:
782 return int(open(os.path.join(PATH_DEBUGFS_KVM,
783 path,
784 field))
785 .read())
786 except IOError:
787 return 0
788
789
790 class Stats(object):
791 """Manages the data providers and the data they provide.
792
793 It is used to set filters on the provider's data and collect all
794 provider data.
795
796 """
797 def __init__(self, providers, pid, fields=None):
798 self.providers = providers
799 self._pid_filter = pid
800 self._fields_filter = fields
801 self.values = {}
802 self.update_provider_pid()
803 self.update_provider_filters()
804
805 def update_provider_filters(self):
806 """Propagates fields filters to providers."""
807 def wanted(key):
808 if not self._fields_filter:
809 return True
810 return re.match(self._fields_filter, key) is not None
811
812 # As we reset the counters when updating the fields we can
813 # also clear the cache of old values.
814 self.values = {}
815 for provider in self.providers:
816 provider_fields = [key for key in provider.get_available_fields()
817 if wanted(key)]
818 provider.fields = provider_fields
819
820 def update_provider_pid(self):
821 """Propagates pid filters to providers."""
822 for provider in self.providers:
823 provider.pid = self._pid_filter
824
825 @property
826 def fields_filter(self):
827 return self._fields_filter
828
829 @fields_filter.setter
830 def fields_filter(self, fields_filter):
831 self._fields_filter = fields_filter
832 self.update_provider_filters()
833
834 @property
835 def pid_filter(self):
836 return self._pid_filter
837
838 @pid_filter.setter
839 def pid_filter(self, pid):
840 self._pid_filter = pid
841 self.values = {}
842 self.update_provider_pid()
843
844 def get(self):
845 """Returns a dict with field -> (value, delta to last value) of all
846 provider data."""
847 for provider in self.providers:
848 new = provider.read()
849 for key in provider.fields:
850 oldval = self.values.get(key, (0, 0))
851 newval = new.get(key, 0)
852 newdelta = None
853 if oldval is not None:
854 newdelta = newval - oldval[0]
855 self.values[key] = (newval, newdelta)
856 return self.values
857
858 LABEL_WIDTH = 40
859 NUMBER_WIDTH = 10
860 DELAY_INITIAL = 0.25
861 DELAY_REGULAR = 3.0
862 MAX_GUEST_NAME_LEN = 48
863 MAX_REGEX_LEN = 44
864
865
866 class Tui(object):
867 """Instruments curses to draw a nice text ui."""
868 def __init__(self, stats):
869 self.stats = stats
870 self.screen = None
871 self.update_drilldown()
872
873 def __enter__(self):
874 """Initialises curses for later use. Based on curses.wrapper
875 implementation from the Python standard library."""
876 self.screen = curses.initscr()
877 curses.noecho()
878 curses.cbreak()
879
880 # The try/catch works around a minor bit of
881 # over-conscientiousness in the curses module, the error
882 # return from C start_color() is ignorable.
883 try:
884 curses.start_color()
885 except curses.error:
886 pass
887
888 # Hide cursor in extra statement as some monochrome terminals
889 # might support hiding but not colors.
890 try:
891 curses.curs_set(0)
892 except curses.error:
893 pass
894
895 curses.use_default_colors()
896 return self
897
898 def __exit__(self, *exception):
899 """Resets the terminal to its normal state. Based on curses.wrappre
900 implementation from the Python standard library."""
901 if self.screen:
902 self.screen.keypad(0)
903 curses.echo()
904 curses.nocbreak()
905 curses.endwin()
906
907 def update_drilldown(self):
908 """Sets or removes a filter that only allows fields without braces."""
909 if not self.stats.fields_filter:
910 self.stats.fields_filter = r'^[^\(]*$'
911
912 elif self.stats.fields_filter == r'^[^\(]*$':
913 self.stats.fields_filter = None
914
915 def update_pid(self, pid):
916 """Propagates pid selection to stats object."""
917 self.stats.pid_filter = pid
918
919 def refresh_header(self, pid=None):
920 """Refreshes the header."""
921 if pid is None:
922 pid = self.stats.pid_filter
923 self.screen.erase()
924 gname = get_gname_from_pid(pid)
925 if gname:
926 gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...'
927 if len(gname) > MAX_GUEST_NAME_LEN
928 else gname))
929 if pid > 0:
930 self.screen.addstr(0, 0, 'kvm statistics - pid {0} {1}'
931 .format(pid, gname), curses.A_BOLD)
932 else:
933 self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
934 if self.stats.fields_filter and self.stats.fields_filter != '^[^\(]*$':
935 regex = self.stats.fields_filter
936 if len(regex) > MAX_REGEX_LEN:
937 regex = regex[:MAX_REGEX_LEN] + '...'
938 self.screen.addstr(1, 17, 'regex filter: {0}'.format(regex))
939 self.screen.addstr(2, 1, 'Event')
940 self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH -
941 len('Total'), 'Total')
942 self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 -
943 len('Current'), 'Current')
944 self.screen.addstr(4, 1, 'Collecting data...')
945 self.screen.refresh()
946
947 def refresh_body(self, sleeptime):
948 row = 3
949 self.screen.move(row, 0)
950 self.screen.clrtobot()
951 stats = self.stats.get()
952
953 def sortkey(x):
954 if stats[x][1]:
955 return (-stats[x][1], -stats[x][0])
956 else:
957 return (0, -stats[x][0])
958 for key in sorted(stats.keys(), key=sortkey):
959
960 if row >= self.screen.getmaxyx()[0]:
961 break
962 values = stats[key]
963 if not values[0] and not values[1]:
964 break
965 col = 1
966 self.screen.addstr(row, col, key)
967 col += LABEL_WIDTH
968 self.screen.addstr(row, col, '%10d' % (values[0],))
969 col += NUMBER_WIDTH
970 if values[1] is not None:
971 self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,))
972 row += 1
973 self.screen.refresh()
974
975 def show_filter_selection(self):
976 """Draws filter selection mask.
977
978 Asks for a valid regex and sets the fields filter accordingly.
979
980 """
981 while True:
982 self.screen.erase()
983 self.screen.addstr(0, 0,
984 "Show statistics for events matching a regex.",
985 curses.A_BOLD)
986 self.screen.addstr(2, 0,
987 "Current regex: {0}"
988 .format(self.stats.fields_filter))
989 self.screen.addstr(3, 0, "New regex: ")
990 curses.echo()
991 regex = self.screen.getstr()
992 curses.noecho()
993 if len(regex) == 0:
994 self.stats.fields_filter = r'^[^\(]*$'
995 self.refresh_header()
996 return
997 try:
998 re.compile(regex)
999 self.stats.fields_filter = regex
1000 self.refresh_header()
1001 return
1002 except re.error:
1003 continue
1004
1005 def show_vm_selection_by_pid(self):
1006 """Draws PID selection mask.
1007
1008 Asks for a pid until a valid pid or 0 has been entered.
1009
1010 """
1011 msg = ''
1012 while True:
1013 self.screen.erase()
1014 self.screen.addstr(0, 0,
1015 'Show statistics for specific pid.',
1016 curses.A_BOLD)
1017 self.screen.addstr(1, 0,
1018 'This might limit the shown data to the trace '
1019 'statistics.')
1020 self.screen.addstr(5, 0, msg)
1021
1022 curses.echo()
1023 self.screen.addstr(3, 0, "Pid [0 or pid]: ")
1024 pid = self.screen.getstr()
1025 curses.noecho()
1026
1027 try:
1028 if len(pid) > 0:
1029 pid = int(pid)
1030 if pid != 0 and not os.path.isdir(os.path.join('/proc/',
1031 str(pid))):
1032 msg = '"' + str(pid) + '": Not a running process'
1033 continue
1034 else:
1035 pid = 0
1036 self.refresh_header(pid)
1037 self.update_pid(pid)
1038 break
1039
1040 except ValueError:
1041 msg = '"' + str(pid) + '": Not a valid pid'
1042 continue
1043
1044 def show_vm_selection_by_guest_name(self):
1045 """Draws guest selection mask.
1046
1047 Asks for a guest name until a valid guest name or '' is entered.
1048
1049 """
1050 msg = ''
1051 while True:
1052 self.screen.erase()
1053 self.screen.addstr(0, 0,
1054 'Show statistics for specific guest.',
1055 curses.A_BOLD)
1056 self.screen.addstr(1, 0,
1057 'This might limit the shown data to the trace '
1058 'statistics.')
1059 self.screen.addstr(5, 0, msg)
1060 curses.echo()
1061 self.screen.addstr(3, 0, "Guest [ENTER or guest]: ")
1062 gname = self.screen.getstr()
1063 curses.noecho()
1064
1065 if not gname:
1066 self.refresh_header(0)
1067 self.update_pid(0)
1068 break
1069 else:
1070 pids = []
1071 try:
1072 pids = get_pid_from_gname(gname)
1073 except:
1074 msg = '"' + gname + '": Internal error while searching, ' \
1075 'use pid filter instead'
1076 continue
1077 if len(pids) == 0:
1078 msg = '"' + gname + '": Not an active guest'
1079 continue
1080 if len(pids) > 1:
1081 msg = '"' + gname + '": Multiple matches found, use pid ' \
1082 'filter instead'
1083 continue
1084 self.refresh_header(pids[0])
1085 self.update_pid(pids[0])
1086 break
1087
1088 def show_stats(self):
1089 """Refreshes the screen and processes user input."""
1090 sleeptime = DELAY_INITIAL
1091 self.refresh_header()
1092 while True:
1093 self.refresh_body(sleeptime)
1094 curses.halfdelay(int(sleeptime * 10))
1095 sleeptime = DELAY_REGULAR
1096 try:
1097 char = self.screen.getkey()
1098 if char == 'x':
1099 self.refresh_header()
1100 self.update_drilldown()
1101 sleeptime = DELAY_INITIAL
1102 if char == 'q':
1103 break
1104 if char == 'f':
1105 self.show_filter_selection()
1106 sleeptime = DELAY_INITIAL
1107 if char == 'g':
1108 self.show_vm_selection_by_guest_name()
1109 sleeptime = DELAY_INITIAL
1110 if char == 'p':
1111 self.show_vm_selection_by_pid()
1112 sleeptime = DELAY_INITIAL
1113 except KeyboardInterrupt:
1114 break
1115 except curses.error:
1116 continue
1117
1118
1119 def batch(stats):
1120 """Prints statistics in a key, value format."""
1121 try:
1122 s = stats.get()
1123 time.sleep(1)
1124 s = stats.get()
1125 for key in sorted(s.keys()):
1126 values = s[key]
1127 print '%-42s%10d%10d' % (key, values[0], values[1])
1128 except KeyboardInterrupt:
1129 pass
1130
1131
1132 def log(stats):
1133 """Prints statistics as reiterating key block, multiple value blocks."""
1134 keys = sorted(stats.get().iterkeys())
1135
1136 def banner():
1137 for k in keys:
1138 print '%s' % k,
1139 print
1140
1141 def statline():
1142 s = stats.get()
1143 for k in keys:
1144 print ' %9d' % s[k][1],
1145 print
1146 line = 0
1147 banner_repeat = 20
1148 while True:
1149 try:
1150 time.sleep(1)
1151 if line % banner_repeat == 0:
1152 banner()
1153 statline()
1154 line += 1
1155 except KeyboardInterrupt:
1156 break
1157
1158
1159 def get_options():
1160 """Returns processed program arguments."""
1161 description_text = """
1162 This script displays various statistics about VMs running under KVM.
1163 The statistics are gathered from the KVM debugfs entries and / or the
1164 currently available perf traces.
1165
1166 The monitoring takes additional cpu cycles and might affect the VM's
1167 performance.
1168
1169 Requirements:
1170 - Access to:
1171 /sys/kernel/debug/kvm
1172 /sys/kernel/debug/trace/events/*
1173 /proc/pid/task
1174 - /proc/sys/kernel/perf_event_paranoid < 1 if user has no
1175 CAP_SYS_ADMIN and perf events are used.
1176 - CAP_SYS_RESOURCE if the hard limit is not high enough to allow
1177 the large number of files that are possibly opened.
1178
1179 Interactive Commands:
1180 f filter by regular expression
1181 g filter by guest name
1182 p filter by PID
1183 q quit
1184 x toggle reporting of stats for individual child trace events
1185 Press any other key to refresh statistics immediately.
1186 """
1187
1188 class PlainHelpFormatter(optparse.IndentedHelpFormatter):
1189 def format_description(self, description):
1190 if description:
1191 return description + "\n"
1192 else:
1193 return ""
1194
1195 def cb_guest_to_pid(option, opt, val, parser):
1196 try:
1197 pids = get_pid_from_gname(val)
1198 except:
1199 raise optparse.OptionValueError('Error while searching for guest '
1200 '"{}", use "-p" to specify a pid '
1201 'instead'.format(val))
1202 if len(pids) == 0:
1203 raise optparse.OptionValueError('No guest by the name "{}" '
1204 'found'.format(val))
1205 if len(pids) > 1:
1206 raise optparse.OptionValueError('Multiple processes found (pids: '
1207 '{}) - use "-p" to specify a pid '
1208 'instead'.format(" ".join(pids)))
1209 parser.values.pid = pids[0]
1210
1211 optparser = optparse.OptionParser(description=description_text,
1212 formatter=PlainHelpFormatter())
1213 optparser.add_option('-1', '--once', '--batch',
1214 action='store_true',
1215 default=False,
1216 dest='once',
1217 help='run in batch mode for one second',
1218 )
1219 optparser.add_option('-l', '--log',
1220 action='store_true',
1221 default=False,
1222 dest='log',
1223 help='run in logging mode (like vmstat)',
1224 )
1225 optparser.add_option('-t', '--tracepoints',
1226 action='store_true',
1227 default=False,
1228 dest='tracepoints',
1229 help='retrieve statistics from tracepoints',
1230 )
1231 optparser.add_option('-d', '--debugfs',
1232 action='store_true',
1233 default=False,
1234 dest='debugfs',
1235 help='retrieve statistics from debugfs',
1236 )
1237 optparser.add_option('-f', '--fields',
1238 action='store',
1239 default=None,
1240 dest='fields',
1241 help='fields to display (regex)',
1242 )
1243 optparser.add_option('-p', '--pid',
1244 action='store',
1245 default=0,
1246 type='int',
1247 dest='pid',
1248 help='restrict statistics to pid',
1249 )
1250 optparser.add_option('-g', '--guest',
1251 action='callback',
1252 type='string',
1253 dest='pid',
1254 metavar='GUEST',
1255 help='restrict statistics to guest by name',
1256 callback=cb_guest_to_pid,
1257 )
1258 (options, _) = optparser.parse_args(sys.argv)
1259 return options
1260
1261
1262 def get_providers(options):
1263 """Returns a list of data providers depending on the passed options."""
1264 providers = []
1265
1266 if options.tracepoints:
1267 providers.append(TracepointProvider())
1268 if options.debugfs:
1269 providers.append(DebugfsProvider())
1270 if len(providers) == 0:
1271 providers.append(TracepointProvider())
1272
1273 return providers
1274
1275
1276 def check_access(options):
1277 """Exits if the current user can't access all needed directories."""
1278 if not os.path.exists('/sys/kernel/debug'):
1279 sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.')
1280 sys.exit(1)
1281
1282 if not os.path.exists(PATH_DEBUGFS_KVM):
1283 sys.stderr.write("Please make sure, that debugfs is mounted and "
1284 "readable by the current user:\n"
1285 "('mount -t debugfs debugfs /sys/kernel/debug')\n"
1286 "Also ensure, that the kvm modules are loaded.\n")
1287 sys.exit(1)
1288
1289 if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or
1290 not options.debugfs):
1291 sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
1292 "when using the option -t (default).\n"
1293 "If it is enabled, make {0} readable by the "
1294 "current user.\n"
1295 .format(PATH_DEBUGFS_TRACING))
1296 if options.tracepoints:
1297 sys.exit(1)
1298
1299 sys.stderr.write("Falling back to debugfs statistics!\n")
1300 options.debugfs = True
1301 time.sleep(5)
1302
1303 return options
1304
1305
1306 def main():
1307 options = get_options()
1308 options = check_access(options)
1309
1310 if (options.pid > 0 and
1311 not os.path.isdir(os.path.join('/proc/',
1312 str(options.pid)))):
1313 sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n')
1314 sys.exit('Specified pid does not exist.')
1315
1316 providers = get_providers(options)
1317 stats = Stats(providers, options.pid, fields=options.fields)
1318
1319 if options.log:
1320 log(stats)
1321 elif not options.once:
1322 with Tui(stats) as tui:
1323 tui.show_stats()
1324 else:
1325 batch(stats)
1326
1327 if __name__ == "__main__":
1328 main()