]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - tools/kvm/kvm_stat/kvm_stat
a527b2fc66850fc108f8fc9401a833f025625c20
[mirror_ubuntu-bionic-kernel.git] / tools / kvm / kvm_stat / kvm_stat
1 #!/usr/bin/python
2 #
3 # top-like utility for displaying kvm statistics
4 #
5 # Copyright 2006-2008 Qumranet Technologies
6 # Copyright 2008-2011 Red Hat, Inc.
7 #
8 # Authors:
9 # Avi Kivity <avi@redhat.com>
10 #
11 # This work is licensed under the terms of the GNU GPL, version 2. See
12 # the COPYING file in the top-level directory.
13 """The kvm_stat module outputs statistics about running KVM VMs
14
15 Three different ways of output formatting are available:
16 - as a top-like text ui
17 - in a key -> value format
18 - in an all keys, all values format
19
20 The data is sampled from the KVM's debugfs entries and its perf events.
21 """
22
23 import curses
24 import sys
25 import os
26 import time
27 import optparse
28 import ctypes
29 import fcntl
30 import resource
31 import struct
32 import re
33 import subprocess
34 from collections import defaultdict
35
36 VMX_EXIT_REASONS = {
37 'EXCEPTION_NMI': 0,
38 'EXTERNAL_INTERRUPT': 1,
39 'TRIPLE_FAULT': 2,
40 'PENDING_INTERRUPT': 7,
41 'NMI_WINDOW': 8,
42 'TASK_SWITCH': 9,
43 'CPUID': 10,
44 'HLT': 12,
45 'INVLPG': 14,
46 'RDPMC': 15,
47 'RDTSC': 16,
48 'VMCALL': 18,
49 'VMCLEAR': 19,
50 'VMLAUNCH': 20,
51 'VMPTRLD': 21,
52 'VMPTRST': 22,
53 'VMREAD': 23,
54 'VMRESUME': 24,
55 'VMWRITE': 25,
56 'VMOFF': 26,
57 'VMON': 27,
58 'CR_ACCESS': 28,
59 'DR_ACCESS': 29,
60 'IO_INSTRUCTION': 30,
61 'MSR_READ': 31,
62 'MSR_WRITE': 32,
63 'INVALID_STATE': 33,
64 'MWAIT_INSTRUCTION': 36,
65 'MONITOR_INSTRUCTION': 39,
66 'PAUSE_INSTRUCTION': 40,
67 'MCE_DURING_VMENTRY': 41,
68 'TPR_BELOW_THRESHOLD': 43,
69 'APIC_ACCESS': 44,
70 'EPT_VIOLATION': 48,
71 'EPT_MISCONFIG': 49,
72 'WBINVD': 54,
73 'XSETBV': 55,
74 'APIC_WRITE': 56,
75 'INVPCID': 58,
76 }
77
78 SVM_EXIT_REASONS = {
79 'READ_CR0': 0x000,
80 'READ_CR3': 0x003,
81 'READ_CR4': 0x004,
82 'READ_CR8': 0x008,
83 'WRITE_CR0': 0x010,
84 'WRITE_CR3': 0x013,
85 'WRITE_CR4': 0x014,
86 'WRITE_CR8': 0x018,
87 'READ_DR0': 0x020,
88 'READ_DR1': 0x021,
89 'READ_DR2': 0x022,
90 'READ_DR3': 0x023,
91 'READ_DR4': 0x024,
92 'READ_DR5': 0x025,
93 'READ_DR6': 0x026,
94 'READ_DR7': 0x027,
95 'WRITE_DR0': 0x030,
96 'WRITE_DR1': 0x031,
97 'WRITE_DR2': 0x032,
98 'WRITE_DR3': 0x033,
99 'WRITE_DR4': 0x034,
100 'WRITE_DR5': 0x035,
101 'WRITE_DR6': 0x036,
102 'WRITE_DR7': 0x037,
103 'EXCP_BASE': 0x040,
104 'INTR': 0x060,
105 'NMI': 0x061,
106 'SMI': 0x062,
107 'INIT': 0x063,
108 'VINTR': 0x064,
109 'CR0_SEL_WRITE': 0x065,
110 'IDTR_READ': 0x066,
111 'GDTR_READ': 0x067,
112 'LDTR_READ': 0x068,
113 'TR_READ': 0x069,
114 'IDTR_WRITE': 0x06a,
115 'GDTR_WRITE': 0x06b,
116 'LDTR_WRITE': 0x06c,
117 'TR_WRITE': 0x06d,
118 'RDTSC': 0x06e,
119 'RDPMC': 0x06f,
120 'PUSHF': 0x070,
121 'POPF': 0x071,
122 'CPUID': 0x072,
123 'RSM': 0x073,
124 'IRET': 0x074,
125 'SWINT': 0x075,
126 'INVD': 0x076,
127 'PAUSE': 0x077,
128 'HLT': 0x078,
129 'INVLPG': 0x079,
130 'INVLPGA': 0x07a,
131 'IOIO': 0x07b,
132 'MSR': 0x07c,
133 'TASK_SWITCH': 0x07d,
134 'FERR_FREEZE': 0x07e,
135 'SHUTDOWN': 0x07f,
136 'VMRUN': 0x080,
137 'VMMCALL': 0x081,
138 'VMLOAD': 0x082,
139 'VMSAVE': 0x083,
140 'STGI': 0x084,
141 'CLGI': 0x085,
142 'SKINIT': 0x086,
143 'RDTSCP': 0x087,
144 'ICEBP': 0x088,
145 'WBINVD': 0x089,
146 'MONITOR': 0x08a,
147 'MWAIT': 0x08b,
148 'MWAIT_COND': 0x08c,
149 'XSETBV': 0x08d,
150 'NPF': 0x400,
151 }
152
153 # EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
154 AARCH64_EXIT_REASONS = {
155 'UNKNOWN': 0x00,
156 'WFI': 0x01,
157 'CP15_32': 0x03,
158 'CP15_64': 0x04,
159 'CP14_MR': 0x05,
160 'CP14_LS': 0x06,
161 'FP_ASIMD': 0x07,
162 'CP10_ID': 0x08,
163 'CP14_64': 0x0C,
164 'ILL_ISS': 0x0E,
165 'SVC32': 0x11,
166 'HVC32': 0x12,
167 'SMC32': 0x13,
168 'SVC64': 0x15,
169 'HVC64': 0x16,
170 'SMC64': 0x17,
171 'SYS64': 0x18,
172 'IABT': 0x20,
173 'IABT_HYP': 0x21,
174 'PC_ALIGN': 0x22,
175 'DABT': 0x24,
176 'DABT_HYP': 0x25,
177 'SP_ALIGN': 0x26,
178 'FP_EXC32': 0x28,
179 'FP_EXC64': 0x2C,
180 'SERROR': 0x2F,
181 'BREAKPT': 0x30,
182 'BREAKPT_HYP': 0x31,
183 'SOFTSTP': 0x32,
184 'SOFTSTP_HYP': 0x33,
185 'WATCHPT': 0x34,
186 'WATCHPT_HYP': 0x35,
187 'BKPT32': 0x38,
188 'VECTOR32': 0x3A,
189 'BRK64': 0x3C,
190 }
191
192 # From include/uapi/linux/kvm.h, KVM_EXIT_xxx
193 USERSPACE_EXIT_REASONS = {
194 'UNKNOWN': 0,
195 'EXCEPTION': 1,
196 'IO': 2,
197 'HYPERCALL': 3,
198 'DEBUG': 4,
199 'HLT': 5,
200 'MMIO': 6,
201 'IRQ_WINDOW_OPEN': 7,
202 'SHUTDOWN': 8,
203 'FAIL_ENTRY': 9,
204 'INTR': 10,
205 'SET_TPR': 11,
206 'TPR_ACCESS': 12,
207 'S390_SIEIC': 13,
208 'S390_RESET': 14,
209 'DCR': 15,
210 'NMI': 16,
211 'INTERNAL_ERROR': 17,
212 'OSI': 18,
213 'PAPR_HCALL': 19,
214 'S390_UCONTROL': 20,
215 'WATCHDOG': 21,
216 'S390_TSCH': 22,
217 'EPR': 23,
218 'SYSTEM_EVENT': 24,
219 }
220
221 IOCTL_NUMBERS = {
222 'SET_FILTER': 0x40082406,
223 'ENABLE': 0x00002400,
224 'DISABLE': 0x00002401,
225 'RESET': 0x00002403,
226 }
227
228
229 class Arch(object):
230 """Encapsulates global architecture specific data.
231
232 Contains the performance event open syscall and ioctl numbers, as
233 well as the VM exit reasons for the architecture it runs on.
234
235 """
236 @staticmethod
237 def get_arch():
238 machine = os.uname()[4]
239
240 if machine.startswith('ppc'):
241 return ArchPPC()
242 elif machine.startswith('aarch64'):
243 return ArchA64()
244 elif machine.startswith('s390'):
245 return ArchS390()
246 else:
247 # X86_64
248 for line in open('/proc/cpuinfo'):
249 if not line.startswith('flags'):
250 continue
251
252 flags = line.split()
253 if 'vmx' in flags:
254 return ArchX86(VMX_EXIT_REASONS)
255 if 'svm' in flags:
256 return ArchX86(SVM_EXIT_REASONS)
257 return
258
259
260 class ArchX86(Arch):
261 def __init__(self, exit_reasons):
262 self.sc_perf_evt_open = 298
263 self.ioctl_numbers = IOCTL_NUMBERS
264 self.exit_reasons = exit_reasons
265
266
267 class ArchPPC(Arch):
268 def __init__(self):
269 self.sc_perf_evt_open = 319
270 self.ioctl_numbers = IOCTL_NUMBERS
271 self.ioctl_numbers['ENABLE'] = 0x20002400
272 self.ioctl_numbers['DISABLE'] = 0x20002401
273 self.ioctl_numbers['RESET'] = 0x20002403
274
275 # PPC comes in 32 and 64 bit and some generated ioctl
276 # numbers depend on the wordsize.
277 char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
278 self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
279 self.exit_reasons = {}
280
281
282 class ArchA64(Arch):
283 def __init__(self):
284 self.sc_perf_evt_open = 241
285 self.ioctl_numbers = IOCTL_NUMBERS
286 self.exit_reasons = AARCH64_EXIT_REASONS
287
288
289 class ArchS390(Arch):
290 def __init__(self):
291 self.sc_perf_evt_open = 331
292 self.ioctl_numbers = IOCTL_NUMBERS
293 self.exit_reasons = None
294
295 ARCH = Arch.get_arch()
296
297
298 def walkdir(path):
299 """Returns os.walk() data for specified directory.
300
301 As it is only a wrapper it returns the same 3-tuple of (dirpath,
302 dirnames, filenames).
303 """
304 return next(os.walk(path))
305
306
307 def parse_int_list(list_string):
308 """Returns an int list from a string of comma separated integers and
309 integer ranges."""
310 integers = []
311 members = list_string.split(',')
312
313 for member in members:
314 if '-' not in member:
315 integers.append(int(member))
316 else:
317 int_range = member.split('-')
318 integers.extend(range(int(int_range[0]),
319 int(int_range[1]) + 1))
320
321 return integers
322
323
324 def get_pid_from_gname(gname):
325 """Fuzzy function to convert guest name to QEMU process pid.
326
327 Returns a list of potential pids, can be empty if no match found.
328 Throws an exception on processing errors.
329
330 """
331 pids = []
332 try:
333 child = subprocess.Popen(['ps', '-A', '--format', 'pid,args'],
334 stdout=subprocess.PIPE)
335 except:
336 raise Exception
337 for line in child.stdout:
338 line = line.lstrip().split(' ', 1)
339 # perform a sanity check before calling the more expensive
340 # function to possibly extract the guest name
341 if ' -name ' in line[1] and gname == get_gname_from_pid(line[0]):
342 pids.append(int(line[0]))
343 child.stdout.close()
344
345 return pids
346
347
348 def get_gname_from_pid(pid):
349 """Returns the guest name for a QEMU process pid.
350
351 Extracts the guest name from the QEMU comma line by processing the '-name'
352 option. Will also handle names specified out of sequence.
353
354 """
355 name = ''
356 try:
357 line = open('/proc/{}/cmdline'.format(pid), 'rb').read().split('\0')
358 parms = line[line.index('-name') + 1].split(',')
359 while '' in parms:
360 # commas are escaped (i.e. ',,'), hence e.g. 'foo,bar' results in
361 # ['foo', '', 'bar'], which we revert here
362 idx = parms.index('')
363 parms[idx - 1] += ',' + parms[idx + 1]
364 del parms[idx:idx+2]
365 # the '-name' switch allows for two ways to specify the guest name,
366 # where the plain name overrides the name specified via 'guest='
367 for arg in parms:
368 if '=' not in arg:
369 name = arg
370 break
371 if arg[:6] == 'guest=':
372 name = arg[6:]
373 except (ValueError, IOError, IndexError):
374 pass
375
376 return name
377
378
379 def get_online_cpus():
380 """Returns a list of cpu id integers."""
381 with open('/sys/devices/system/cpu/online') as cpu_list:
382 cpu_string = cpu_list.readline()
383 return parse_int_list(cpu_string)
384
385
386 def get_filters():
387 """Returns a dict of trace events, their filter ids and
388 the values that can be filtered.
389
390 Trace events can be filtered for special values by setting a
391 filter string via an ioctl. The string normally has the format
392 identifier==value. For each filter a new event will be created, to
393 be able to distinguish the events.
394
395 """
396 filters = {}
397 filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
398 if ARCH.exit_reasons:
399 filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
400 return filters
401
402 libc = ctypes.CDLL('libc.so.6', use_errno=True)
403 syscall = libc.syscall
404
405
406 class perf_event_attr(ctypes.Structure):
407 """Struct that holds the necessary data to set up a trace event.
408
409 For an extensive explanation see perf_event_open(2) and
410 include/uapi/linux/perf_event.h, struct perf_event_attr
411
412 All fields that are not initialized in the constructor are 0.
413
414 """
415 _fields_ = [('type', ctypes.c_uint32),
416 ('size', ctypes.c_uint32),
417 ('config', ctypes.c_uint64),
418 ('sample_freq', ctypes.c_uint64),
419 ('sample_type', ctypes.c_uint64),
420 ('read_format', ctypes.c_uint64),
421 ('flags', ctypes.c_uint64),
422 ('wakeup_events', ctypes.c_uint32),
423 ('bp_type', ctypes.c_uint32),
424 ('bp_addr', ctypes.c_uint64),
425 ('bp_len', ctypes.c_uint64),
426 ]
427
428 def __init__(self):
429 super(self.__class__, self).__init__()
430 self.type = PERF_TYPE_TRACEPOINT
431 self.size = ctypes.sizeof(self)
432 self.read_format = PERF_FORMAT_GROUP
433
434
435 def perf_event_open(attr, pid, cpu, group_fd, flags):
436 """Wrapper for the sys_perf_evt_open() syscall.
437
438 Used to set up performance events, returns a file descriptor or -1
439 on error.
440
441 Attributes are:
442 - syscall number
443 - struct perf_event_attr *
444 - pid or -1 to monitor all pids
445 - cpu number or -1 to monitor all cpus
446 - The file descriptor of the group leader or -1 to create a group.
447 - flags
448
449 """
450 return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr),
451 ctypes.c_int(pid), ctypes.c_int(cpu),
452 ctypes.c_int(group_fd), ctypes.c_long(flags))
453
454 PERF_TYPE_TRACEPOINT = 2
455 PERF_FORMAT_GROUP = 1 << 3
456
457 PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
458 PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
459
460
461 class Group(object):
462 """Represents a perf event group."""
463
464 def __init__(self):
465 self.events = []
466
467 def add_event(self, event):
468 self.events.append(event)
469
470 def read(self):
471 """Returns a dict with 'event name: value' for all events in the
472 group.
473
474 Values are read by reading from the file descriptor of the
475 event that is the group leader. See perf_event_open(2) for
476 details.
477
478 Read format for the used event configuration is:
479 struct read_format {
480 u64 nr; /* The number of events */
481 struct {
482 u64 value; /* The value of the event */
483 } values[nr];
484 };
485
486 """
487 length = 8 * (1 + len(self.events))
488 read_format = 'xxxxxxxx' + 'Q' * len(self.events)
489 return dict(zip([event.name for event in self.events],
490 struct.unpack(read_format,
491 os.read(self.events[0].fd, length))))
492
493
494 class Event(object):
495 """Represents a performance event and manages its life cycle."""
496 def __init__(self, name, group, trace_cpu, trace_pid, trace_point,
497 trace_filter, trace_set='kvm'):
498 self.name = name
499 self.fd = None
500 self.setup_event(group, trace_cpu, trace_pid, trace_point,
501 trace_filter, trace_set)
502
503 def __del__(self):
504 """Closes the event's file descriptor.
505
506 As no python file object was created for the file descriptor,
507 python will not reference count the descriptor and will not
508 close it itself automatically, so we do it.
509
510 """
511 if self.fd:
512 os.close(self.fd)
513
514 def setup_event_attribute(self, trace_set, trace_point):
515 """Returns an initialized ctype perf_event_attr struct."""
516
517 id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
518 trace_point, 'id')
519
520 event_attr = perf_event_attr()
521 event_attr.config = int(open(id_path).read())
522 return event_attr
523
524 def setup_event(self, group, trace_cpu, trace_pid, trace_point,
525 trace_filter, trace_set):
526 """Sets up the perf event in Linux.
527
528 Issues the syscall to register the event in the kernel and
529 then sets the optional filter.
530
531 """
532
533 event_attr = self.setup_event_attribute(trace_set, trace_point)
534
535 # First event will be group leader.
536 group_leader = -1
537
538 # All others have to pass the leader's descriptor instead.
539 if group.events:
540 group_leader = group.events[0].fd
541
542 fd = perf_event_open(event_attr, trace_pid,
543 trace_cpu, group_leader, 0)
544 if fd == -1:
545 err = ctypes.get_errno()
546 raise OSError(err, os.strerror(err),
547 'while calling sys_perf_event_open().')
548
549 if trace_filter:
550 fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'],
551 trace_filter)
552
553 self.fd = fd
554
555 def enable(self):
556 """Enables the trace event in the kernel.
557
558 Enabling the group leader makes reading counters from it and the
559 events under it possible.
560
561 """
562 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0)
563
564 def disable(self):
565 """Disables the trace event in the kernel.
566
567 Disabling the group leader makes reading all counters under it
568 impossible.
569
570 """
571 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0)
572
573 def reset(self):
574 """Resets the count of the trace event in the kernel."""
575 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
576
577
578 class TracepointProvider(object):
579 """Data provider for the stats class.
580
581 Manages the events/groups from which it acquires its data.
582
583 """
584 def __init__(self):
585 self.group_leaders = []
586 self.filters = get_filters()
587 self._fields = self.get_available_fields()
588 self._pid = 0
589
590 def get_available_fields(self):
591 """Returns a list of available event's of format 'event name(filter
592 name)'.
593
594 All available events have directories under
595 /sys/kernel/debug/tracing/events/ which export information
596 about the specific event. Therefore, listing the dirs gives us
597 a list of all available events.
598
599 Some events like the vm exit reasons can be filtered for
600 specific values. To take account for that, the routine below
601 creates special fields with the following format:
602 event name(filter name)
603
604 """
605 path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
606 fields = walkdir(path)[1]
607 extra = []
608 for field in fields:
609 if field in self.filters:
610 filter_name_, filter_dicts = self.filters[field]
611 for name in filter_dicts:
612 extra.append(field + '(' + name + ')')
613 fields += extra
614 return fields
615
616 def setup_traces(self):
617 """Creates all event and group objects needed to be able to retrieve
618 data."""
619 fields = self.get_available_fields()
620 if self._pid > 0:
621 # Fetch list of all threads of the monitored pid, as qemu
622 # starts a thread for each vcpu.
623 path = os.path.join('/proc', str(self._pid), 'task')
624 groupids = walkdir(path)[1]
625 else:
626 groupids = get_online_cpus()
627
628 # The constant is needed as a buffer for python libs, std
629 # streams and other files that the script opens.
630 newlim = len(groupids) * len(fields) + 50
631 try:
632 softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
633
634 if hardlim < newlim:
635 # Now we need CAP_SYS_RESOURCE, to increase the hard limit.
636 resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim))
637 else:
638 # Raising the soft limit is sufficient.
639 resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim))
640
641 except ValueError:
642 sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
643
644 for groupid in groupids:
645 group = Group()
646 for name in fields:
647 tracepoint = name
648 tracefilter = None
649 match = re.match(r'(.*)\((.*)\)', name)
650 if match:
651 tracepoint, sub = match.groups()
652 tracefilter = ('%s==%d\0' %
653 (self.filters[tracepoint][0],
654 self.filters[tracepoint][1][sub]))
655
656 # From perf_event_open(2):
657 # pid > 0 and cpu == -1
658 # This measures the specified process/thread on any CPU.
659 #
660 # pid == -1 and cpu >= 0
661 # This measures all processes/threads on the specified CPU.
662 trace_cpu = groupid if self._pid == 0 else -1
663 trace_pid = int(groupid) if self._pid != 0 else -1
664
665 group.add_event(Event(name=name,
666 group=group,
667 trace_cpu=trace_cpu,
668 trace_pid=trace_pid,
669 trace_point=tracepoint,
670 trace_filter=tracefilter))
671
672 self.group_leaders.append(group)
673
674 def available_fields(self):
675 return self.get_available_fields()
676
677 @property
678 def fields(self):
679 return self._fields
680
681 @fields.setter
682 def fields(self, fields):
683 """Enables/disables the (un)wanted events"""
684 self._fields = fields
685 for group in self.group_leaders:
686 for index, event in enumerate(group.events):
687 if event.name in fields:
688 event.reset()
689 event.enable()
690 else:
691 # Do not disable the group leader.
692 # It would disable all of its events.
693 if index != 0:
694 event.disable()
695
696 @property
697 def pid(self):
698 return self._pid
699
700 @pid.setter
701 def pid(self, pid):
702 """Changes the monitored pid by setting new traces."""
703 self._pid = pid
704 # The garbage collector will get rid of all Event/Group
705 # objects and open files after removing the references.
706 self.group_leaders = []
707 self.setup_traces()
708 self.fields = self._fields
709
710 def read(self):
711 """Returns 'event name: current value' for all enabled events."""
712 ret = defaultdict(int)
713 for group in self.group_leaders:
714 for name, val in group.read().iteritems():
715 if name in self._fields:
716 ret[name] += val
717 return ret
718
719 def reset(self):
720 """Reset all field counters"""
721 for group in self.group_leaders:
722 for event in group.events:
723 event.reset()
724
725
726 class DebugfsProvider(object):
727 """Provides data from the files that KVM creates in the kvm debugfs
728 folder."""
729 def __init__(self):
730 self._fields = self.get_available_fields()
731 self._baseline = {}
732 self._pid = 0
733 self.do_read = True
734 self.paths = []
735 self.reset()
736
737 def get_available_fields(self):
738 """"Returns a list of available fields.
739
740 The fields are all available KVM debugfs files
741
742 """
743 return walkdir(PATH_DEBUGFS_KVM)[2]
744
745 @property
746 def fields(self):
747 return self._fields
748
749 @fields.setter
750 def fields(self, fields):
751 self._fields = fields
752 self.reset()
753
754 @property
755 def pid(self):
756 return self._pid
757
758 @pid.setter
759 def pid(self, pid):
760 if pid != 0:
761 self._pid = pid
762
763 vms = walkdir(PATH_DEBUGFS_KVM)[1]
764 if len(vms) == 0:
765 self.do_read = False
766
767 self.paths = filter(lambda x: "{}-".format(pid) in x, vms)
768
769 else:
770 self.paths = []
771 self.do_read = True
772 self.reset()
773
774 def read(self, reset=0):
775 """Returns a dict with format:'file name / field -> current value'."""
776 results = {}
777
778 # If no debugfs filtering support is available, then don't read.
779 if not self.do_read:
780 return results
781
782 paths = self.paths
783 if self._pid == 0:
784 paths = []
785 for entry in os.walk(PATH_DEBUGFS_KVM):
786 for dir in entry[1]:
787 paths.append(dir)
788 for path in paths:
789 for field in self._fields:
790 value = self.read_field(field, path)
791 key = path + field
792 if reset:
793 self._baseline[key] = value
794 if self._baseline.get(key, -1) == -1:
795 self._baseline[key] = value
796 results[field] = (results.get(field, 0) + value -
797 self._baseline.get(key, 0))
798
799 return results
800
801 def read_field(self, field, path):
802 """Returns the value of a single field from a specific VM."""
803 try:
804 return int(open(os.path.join(PATH_DEBUGFS_KVM,
805 path,
806 field))
807 .read())
808 except IOError:
809 return 0
810
811 def reset(self):
812 """Reset field counters"""
813 self._baseline = {}
814 self.read(1)
815
816
817 class Stats(object):
818 """Manages the data providers and the data they provide.
819
820 It is used to set filters on the provider's data and collect all
821 provider data.
822
823 """
824 def __init__(self, providers, pid, fields=None):
825 self.providers = providers
826 self._pid_filter = pid
827 self._fields_filter = fields
828 self.values = {}
829 self.update_provider_pid()
830 self.update_provider_filters()
831
832 def update_provider_filters(self):
833 """Propagates fields filters to providers."""
834 def wanted(key):
835 if not self._fields_filter:
836 return True
837 return re.match(self._fields_filter, key) is not None
838
839 # As we reset the counters when updating the fields we can
840 # also clear the cache of old values.
841 self.values = {}
842 for provider in self.providers:
843 provider_fields = [key for key in provider.get_available_fields()
844 if wanted(key)]
845 provider.fields = provider_fields
846
847 def update_provider_pid(self):
848 """Propagates pid filters to providers."""
849 for provider in self.providers:
850 provider.pid = self._pid_filter
851
852 def reset(self):
853 self.values = {}
854 for provider in self.providers:
855 provider.reset()
856
857 @property
858 def fields_filter(self):
859 return self._fields_filter
860
861 @fields_filter.setter
862 def fields_filter(self, fields_filter):
863 if fields_filter != self._fields_filter:
864 self._fields_filter = fields_filter
865 self.update_provider_filters()
866
867 @property
868 def pid_filter(self):
869 return self._pid_filter
870
871 @pid_filter.setter
872 def pid_filter(self, pid):
873 if pid != self._pid_filter:
874 self._pid_filter = pid
875 self.values = {}
876 self.update_provider_pid()
877
878 def get(self):
879 """Returns a dict with field -> (value, delta to last value) of all
880 provider data."""
881 for provider in self.providers:
882 new = provider.read()
883 for key in provider.fields:
884 oldval = self.values.get(key, (0, 0))[0]
885 newval = new.get(key, 0)
886 newdelta = newval - oldval
887 self.values[key] = (newval, newdelta)
888 return self.values
889
890 DELAY_INITIAL = 0.25
891 DELAY_REGULAR = 3.0
892 MAX_GUEST_NAME_LEN = 48
893 MAX_REGEX_LEN = 44
894 DEFAULT_REGEX = r'^[^\(]*$'
895
896
897 class Tui(object):
898 """Instruments curses to draw a nice text ui."""
899 def __init__(self, stats):
900 self.stats = stats
901 self.screen = None
902 self.update_drilldown()
903
904 def __enter__(self):
905 """Initialises curses for later use. Based on curses.wrapper
906 implementation from the Python standard library."""
907 self.screen = curses.initscr()
908 curses.noecho()
909 curses.cbreak()
910
911 # The try/catch works around a minor bit of
912 # over-conscientiousness in the curses module, the error
913 # return from C start_color() is ignorable.
914 try:
915 curses.start_color()
916 except curses.error:
917 pass
918
919 # Hide cursor in extra statement as some monochrome terminals
920 # might support hiding but not colors.
921 try:
922 curses.curs_set(0)
923 except curses.error:
924 pass
925
926 curses.use_default_colors()
927 return self
928
929 def __exit__(self, *exception):
930 """Resets the terminal to its normal state. Based on curses.wrapper
931 implementation from the Python standard library."""
932 if self.screen:
933 self.screen.keypad(0)
934 curses.echo()
935 curses.nocbreak()
936 curses.endwin()
937
938 def update_drilldown(self):
939 """Sets or removes a filter that only allows fields without braces."""
940 if not self.stats.fields_filter:
941 self.stats.fields_filter = DEFAULT_REGEX
942
943 elif self.stats.fields_filter == DEFAULT_REGEX:
944 self.stats.fields_filter = None
945
946 def update_pid(self, pid):
947 """Propagates pid selection to stats object."""
948 self.stats.pid_filter = pid
949
950 def refresh_header(self, pid=None):
951 """Refreshes the header."""
952 if pid is None:
953 pid = self.stats.pid_filter
954 self.screen.erase()
955 gname = get_gname_from_pid(pid)
956 if gname:
957 gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...'
958 if len(gname) > MAX_GUEST_NAME_LEN
959 else gname))
960 if pid > 0:
961 self.screen.addstr(0, 0, 'kvm statistics - pid {0} {1}'
962 .format(pid, gname), curses.A_BOLD)
963 else:
964 self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
965 if self.stats.fields_filter and self.stats.fields_filter \
966 != DEFAULT_REGEX:
967 regex = self.stats.fields_filter
968 if len(regex) > MAX_REGEX_LEN:
969 regex = regex[:MAX_REGEX_LEN] + '...'
970 self.screen.addstr(1, 17, 'regex filter: {0}'.format(regex))
971 self.screen.addstr(2, 1, '%-40s %10s%7s %7s' %
972 ('Event', 'Total', '%Total', 'Current'))
973 self.screen.addstr(4, 1, 'Collecting data...')
974 self.screen.refresh()
975
976 def refresh_body(self, sleeptime):
977 row = 3
978 self.screen.move(row, 0)
979 self.screen.clrtobot()
980 stats = self.stats.get()
981
982 def sortkey(x):
983 if stats[x][1]:
984 return (-stats[x][1], -stats[x][0])
985 else:
986 return (0, -stats[x][0])
987 total = 0.
988 for val in stats.values():
989 total += val[0]
990 for key in sorted(stats.keys(), key=sortkey):
991
992 if row >= self.screen.getmaxyx()[0]:
993 break
994 values = stats[key]
995 if not values[0] and not values[1]:
996 break
997 if values[0] is not None:
998 cur = int(round(values[1] / sleeptime)) if values[1] else ''
999 self.screen.addstr(row, 1, '%-40s %10d%7.1f %7s' %
1000 (key, values[0], values[0] * 100 / total,
1001 cur))
1002 row += 1
1003 self.screen.refresh()
1004
1005 def show_filter_selection(self):
1006 """Draws filter selection mask.
1007
1008 Asks for a valid regex and sets the fields filter accordingly.
1009
1010 """
1011 while True:
1012 self.screen.erase()
1013 self.screen.addstr(0, 0,
1014 "Show statistics for events matching a regex.",
1015 curses.A_BOLD)
1016 self.screen.addstr(2, 0,
1017 "Current regex: {0}"
1018 .format(self.stats.fields_filter))
1019 self.screen.addstr(3, 0, "New regex: ")
1020 curses.echo()
1021 regex = self.screen.getstr()
1022 curses.noecho()
1023 if len(regex) == 0:
1024 self.stats.fields_filter = DEFAULT_REGEX
1025 self.refresh_header()
1026 return
1027 try:
1028 re.compile(regex)
1029 self.stats.fields_filter = regex
1030 self.refresh_header()
1031 return
1032 except re.error:
1033 continue
1034
1035 def show_vm_selection_by_pid(self):
1036 """Draws PID selection mask.
1037
1038 Asks for a pid until a valid pid or 0 has been entered.
1039
1040 """
1041 msg = ''
1042 while True:
1043 self.screen.erase()
1044 self.screen.addstr(0, 0,
1045 'Show statistics for specific pid.',
1046 curses.A_BOLD)
1047 self.screen.addstr(1, 0,
1048 'This might limit the shown data to the trace '
1049 'statistics.')
1050 self.screen.addstr(5, 0, msg)
1051
1052 curses.echo()
1053 self.screen.addstr(3, 0, "Pid [0 or pid]: ")
1054 pid = self.screen.getstr()
1055 curses.noecho()
1056
1057 try:
1058 if len(pid) > 0:
1059 pid = int(pid)
1060 if pid != 0 and not os.path.isdir(os.path.join('/proc/',
1061 str(pid))):
1062 msg = '"' + str(pid) + '": Not a running process'
1063 continue
1064 else:
1065 pid = 0
1066 self.refresh_header(pid)
1067 self.update_pid(pid)
1068 break
1069
1070 except ValueError:
1071 msg = '"' + str(pid) + '": Not a valid pid'
1072 continue
1073
1074 def show_vm_selection_by_guest_name(self):
1075 """Draws guest selection mask.
1076
1077 Asks for a guest name until a valid guest name or '' is entered.
1078
1079 """
1080 msg = ''
1081 while True:
1082 self.screen.erase()
1083 self.screen.addstr(0, 0,
1084 'Show statistics for specific guest.',
1085 curses.A_BOLD)
1086 self.screen.addstr(1, 0,
1087 'This might limit the shown data to the trace '
1088 'statistics.')
1089 self.screen.addstr(5, 0, msg)
1090 curses.echo()
1091 self.screen.addstr(3, 0, "Guest [ENTER or guest]: ")
1092 gname = self.screen.getstr()
1093 curses.noecho()
1094
1095 if not gname:
1096 self.refresh_header(0)
1097 self.update_pid(0)
1098 break
1099 else:
1100 pids = []
1101 try:
1102 pids = get_pid_from_gname(gname)
1103 except:
1104 msg = '"' + gname + '": Internal error while searching, ' \
1105 'use pid filter instead'
1106 continue
1107 if len(pids) == 0:
1108 msg = '"' + gname + '": Not an active guest'
1109 continue
1110 if len(pids) > 1:
1111 msg = '"' + gname + '": Multiple matches found, use pid ' \
1112 'filter instead'
1113 continue
1114 self.refresh_header(pids[0])
1115 self.update_pid(pids[0])
1116 break
1117
1118 def show_stats(self):
1119 """Refreshes the screen and processes user input."""
1120 sleeptime = DELAY_INITIAL
1121 self.refresh_header()
1122 start = 0.0 # result based on init value never appears on screen
1123 while True:
1124 self.refresh_body(time.time() - start)
1125 curses.halfdelay(int(sleeptime * 10))
1126 start = time.time()
1127 sleeptime = DELAY_REGULAR
1128 try:
1129 char = self.screen.getkey()
1130 if char == 'x':
1131 self.update_drilldown()
1132 if char == 'q':
1133 break
1134 if char == 'c':
1135 self.stats.fields_filter = DEFAULT_REGEX
1136 self.refresh_header(0)
1137 self.update_pid(0)
1138 if char == 'f':
1139 self.show_filter_selection()
1140 sleeptime = DELAY_INITIAL
1141 if char == 'g':
1142 self.show_vm_selection_by_guest_name()
1143 sleeptime = DELAY_INITIAL
1144 if char == 'p':
1145 self.show_vm_selection_by_pid()
1146 sleeptime = DELAY_INITIAL
1147 if char == 'r':
1148 self.stats.reset()
1149 except KeyboardInterrupt:
1150 break
1151 except curses.error:
1152 continue
1153
1154
1155 def batch(stats):
1156 """Prints statistics in a key, value format."""
1157 try:
1158 s = stats.get()
1159 time.sleep(1)
1160 s = stats.get()
1161 for key in sorted(s.keys()):
1162 values = s[key]
1163 print '%-42s%10d%10d' % (key, values[0], values[1])
1164 except KeyboardInterrupt:
1165 pass
1166
1167
1168 def log(stats):
1169 """Prints statistics as reiterating key block, multiple value blocks."""
1170 keys = sorted(stats.get().iterkeys())
1171
1172 def banner():
1173 for k in keys:
1174 print '%s' % k,
1175 print
1176
1177 def statline():
1178 s = stats.get()
1179 for k in keys:
1180 print ' %9d' % s[k][1],
1181 print
1182 line = 0
1183 banner_repeat = 20
1184 while True:
1185 try:
1186 time.sleep(1)
1187 if line % banner_repeat == 0:
1188 banner()
1189 statline()
1190 line += 1
1191 except KeyboardInterrupt:
1192 break
1193
1194
1195 def get_options():
1196 """Returns processed program arguments."""
1197 description_text = """
1198 This script displays various statistics about VMs running under KVM.
1199 The statistics are gathered from the KVM debugfs entries and / or the
1200 currently available perf traces.
1201
1202 The monitoring takes additional cpu cycles and might affect the VM's
1203 performance.
1204
1205 Requirements:
1206 - Access to:
1207 /sys/kernel/debug/kvm
1208 /sys/kernel/debug/trace/events/*
1209 /proc/pid/task
1210 - /proc/sys/kernel/perf_event_paranoid < 1 if user has no
1211 CAP_SYS_ADMIN and perf events are used.
1212 - CAP_SYS_RESOURCE if the hard limit is not high enough to allow
1213 the large number of files that are possibly opened.
1214
1215 Interactive Commands:
1216 c clear filter
1217 f filter by regular expression
1218 g filter by guest name
1219 p filter by PID
1220 q quit
1221 x toggle reporting of stats for individual child trace events
1222 r reset stats
1223 Press any other key to refresh statistics immediately.
1224 """
1225
1226 class PlainHelpFormatter(optparse.IndentedHelpFormatter):
1227 def format_description(self, description):
1228 if description:
1229 return description + "\n"
1230 else:
1231 return ""
1232
1233 def cb_guest_to_pid(option, opt, val, parser):
1234 try:
1235 pids = get_pid_from_gname(val)
1236 except:
1237 raise optparse.OptionValueError('Error while searching for guest '
1238 '"{}", use "-p" to specify a pid '
1239 'instead'.format(val))
1240 if len(pids) == 0:
1241 raise optparse.OptionValueError('No guest by the name "{}" '
1242 'found'.format(val))
1243 if len(pids) > 1:
1244 raise optparse.OptionValueError('Multiple processes found (pids: '
1245 '{}) - use "-p" to specify a pid '
1246 'instead'.format(" ".join(pids)))
1247 parser.values.pid = pids[0]
1248
1249 optparser = optparse.OptionParser(description=description_text,
1250 formatter=PlainHelpFormatter())
1251 optparser.add_option('-1', '--once', '--batch',
1252 action='store_true',
1253 default=False,
1254 dest='once',
1255 help='run in batch mode for one second',
1256 )
1257 optparser.add_option('-l', '--log',
1258 action='store_true',
1259 default=False,
1260 dest='log',
1261 help='run in logging mode (like vmstat)',
1262 )
1263 optparser.add_option('-t', '--tracepoints',
1264 action='store_true',
1265 default=False,
1266 dest='tracepoints',
1267 help='retrieve statistics from tracepoints',
1268 )
1269 optparser.add_option('-d', '--debugfs',
1270 action='store_true',
1271 default=False,
1272 dest='debugfs',
1273 help='retrieve statistics from debugfs',
1274 )
1275 optparser.add_option('-f', '--fields',
1276 action='store',
1277 default=None,
1278 dest='fields',
1279 help='fields to display (regex)',
1280 )
1281 optparser.add_option('-p', '--pid',
1282 action='store',
1283 default=0,
1284 type='int',
1285 dest='pid',
1286 help='restrict statistics to pid',
1287 )
1288 optparser.add_option('-g', '--guest',
1289 action='callback',
1290 type='string',
1291 dest='pid',
1292 metavar='GUEST',
1293 help='restrict statistics to guest by name',
1294 callback=cb_guest_to_pid,
1295 )
1296 (options, _) = optparser.parse_args(sys.argv)
1297 return options
1298
1299
1300 def get_providers(options):
1301 """Returns a list of data providers depending on the passed options."""
1302 providers = []
1303
1304 if options.tracepoints:
1305 providers.append(TracepointProvider())
1306 if options.debugfs:
1307 providers.append(DebugfsProvider())
1308 if len(providers) == 0:
1309 providers.append(TracepointProvider())
1310
1311 return providers
1312
1313
1314 def check_access(options):
1315 """Exits if the current user can't access all needed directories."""
1316 if not os.path.exists('/sys/kernel/debug'):
1317 sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.')
1318 sys.exit(1)
1319
1320 if not os.path.exists(PATH_DEBUGFS_KVM):
1321 sys.stderr.write("Please make sure, that debugfs is mounted and "
1322 "readable by the current user:\n"
1323 "('mount -t debugfs debugfs /sys/kernel/debug')\n"
1324 "Also ensure, that the kvm modules are loaded.\n")
1325 sys.exit(1)
1326
1327 if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or
1328 not options.debugfs):
1329 sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
1330 "when using the option -t (default).\n"
1331 "If it is enabled, make {0} readable by the "
1332 "current user.\n"
1333 .format(PATH_DEBUGFS_TRACING))
1334 if options.tracepoints:
1335 sys.exit(1)
1336
1337 sys.stderr.write("Falling back to debugfs statistics!\n")
1338 options.debugfs = True
1339 time.sleep(5)
1340
1341 return options
1342
1343
1344 def main():
1345 options = get_options()
1346 options = check_access(options)
1347
1348 if (options.pid > 0 and
1349 not os.path.isdir(os.path.join('/proc/',
1350 str(options.pid)))):
1351 sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n')
1352 sys.exit('Specified pid does not exist.')
1353
1354 providers = get_providers(options)
1355 stats = Stats(providers, options.pid, fields=options.fields)
1356
1357 if options.log:
1358 log(stats)
1359 elif not options.once:
1360 with Tui(stats) as tui:
1361 tui.show_stats()
1362 else:
1363 batch(stats)
1364
1365 if __name__ == "__main__":
1366 main()