]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - tools/kvm/kvm_stat/kvm_stat
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[mirror_ubuntu-bionic-kernel.git] / tools / kvm / kvm_stat / kvm_stat
1 #!/usr/bin/python
2 #
3 # top-like utility for displaying kvm statistics
4 #
5 # Copyright 2006-2008 Qumranet Technologies
6 # Copyright 2008-2011 Red Hat, Inc.
7 #
8 # Authors:
9 # Avi Kivity <avi@redhat.com>
10 #
11 # This work is licensed under the terms of the GNU GPL, version 2. See
12 # the COPYING file in the top-level directory.
13 """The kvm_stat module outputs statistics about running KVM VMs
14
15 Three different ways of output formatting are available:
16 - as a top-like text ui
17 - in a key -> value format
18 - in an all keys, all values format
19
20 The data is sampled from the KVM's debugfs entries and its perf events.
21 """
22
23 import curses
24 import sys
25 import os
26 import time
27 import optparse
28 import ctypes
29 import fcntl
30 import resource
31 import struct
32 import re
33 import subprocess
34 from collections import defaultdict
35
36 VMX_EXIT_REASONS = {
37 'EXCEPTION_NMI': 0,
38 'EXTERNAL_INTERRUPT': 1,
39 'TRIPLE_FAULT': 2,
40 'PENDING_INTERRUPT': 7,
41 'NMI_WINDOW': 8,
42 'TASK_SWITCH': 9,
43 'CPUID': 10,
44 'HLT': 12,
45 'INVLPG': 14,
46 'RDPMC': 15,
47 'RDTSC': 16,
48 'VMCALL': 18,
49 'VMCLEAR': 19,
50 'VMLAUNCH': 20,
51 'VMPTRLD': 21,
52 'VMPTRST': 22,
53 'VMREAD': 23,
54 'VMRESUME': 24,
55 'VMWRITE': 25,
56 'VMOFF': 26,
57 'VMON': 27,
58 'CR_ACCESS': 28,
59 'DR_ACCESS': 29,
60 'IO_INSTRUCTION': 30,
61 'MSR_READ': 31,
62 'MSR_WRITE': 32,
63 'INVALID_STATE': 33,
64 'MWAIT_INSTRUCTION': 36,
65 'MONITOR_INSTRUCTION': 39,
66 'PAUSE_INSTRUCTION': 40,
67 'MCE_DURING_VMENTRY': 41,
68 'TPR_BELOW_THRESHOLD': 43,
69 'APIC_ACCESS': 44,
70 'EPT_VIOLATION': 48,
71 'EPT_MISCONFIG': 49,
72 'WBINVD': 54,
73 'XSETBV': 55,
74 'APIC_WRITE': 56,
75 'INVPCID': 58,
76 }
77
78 SVM_EXIT_REASONS = {
79 'READ_CR0': 0x000,
80 'READ_CR3': 0x003,
81 'READ_CR4': 0x004,
82 'READ_CR8': 0x008,
83 'WRITE_CR0': 0x010,
84 'WRITE_CR3': 0x013,
85 'WRITE_CR4': 0x014,
86 'WRITE_CR8': 0x018,
87 'READ_DR0': 0x020,
88 'READ_DR1': 0x021,
89 'READ_DR2': 0x022,
90 'READ_DR3': 0x023,
91 'READ_DR4': 0x024,
92 'READ_DR5': 0x025,
93 'READ_DR6': 0x026,
94 'READ_DR7': 0x027,
95 'WRITE_DR0': 0x030,
96 'WRITE_DR1': 0x031,
97 'WRITE_DR2': 0x032,
98 'WRITE_DR3': 0x033,
99 'WRITE_DR4': 0x034,
100 'WRITE_DR5': 0x035,
101 'WRITE_DR6': 0x036,
102 'WRITE_DR7': 0x037,
103 'EXCP_BASE': 0x040,
104 'INTR': 0x060,
105 'NMI': 0x061,
106 'SMI': 0x062,
107 'INIT': 0x063,
108 'VINTR': 0x064,
109 'CR0_SEL_WRITE': 0x065,
110 'IDTR_READ': 0x066,
111 'GDTR_READ': 0x067,
112 'LDTR_READ': 0x068,
113 'TR_READ': 0x069,
114 'IDTR_WRITE': 0x06a,
115 'GDTR_WRITE': 0x06b,
116 'LDTR_WRITE': 0x06c,
117 'TR_WRITE': 0x06d,
118 'RDTSC': 0x06e,
119 'RDPMC': 0x06f,
120 'PUSHF': 0x070,
121 'POPF': 0x071,
122 'CPUID': 0x072,
123 'RSM': 0x073,
124 'IRET': 0x074,
125 'SWINT': 0x075,
126 'INVD': 0x076,
127 'PAUSE': 0x077,
128 'HLT': 0x078,
129 'INVLPG': 0x079,
130 'INVLPGA': 0x07a,
131 'IOIO': 0x07b,
132 'MSR': 0x07c,
133 'TASK_SWITCH': 0x07d,
134 'FERR_FREEZE': 0x07e,
135 'SHUTDOWN': 0x07f,
136 'VMRUN': 0x080,
137 'VMMCALL': 0x081,
138 'VMLOAD': 0x082,
139 'VMSAVE': 0x083,
140 'STGI': 0x084,
141 'CLGI': 0x085,
142 'SKINIT': 0x086,
143 'RDTSCP': 0x087,
144 'ICEBP': 0x088,
145 'WBINVD': 0x089,
146 'MONITOR': 0x08a,
147 'MWAIT': 0x08b,
148 'MWAIT_COND': 0x08c,
149 'XSETBV': 0x08d,
150 'NPF': 0x400,
151 }
152
153 # EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
154 AARCH64_EXIT_REASONS = {
155 'UNKNOWN': 0x00,
156 'WFI': 0x01,
157 'CP15_32': 0x03,
158 'CP15_64': 0x04,
159 'CP14_MR': 0x05,
160 'CP14_LS': 0x06,
161 'FP_ASIMD': 0x07,
162 'CP10_ID': 0x08,
163 'CP14_64': 0x0C,
164 'ILL_ISS': 0x0E,
165 'SVC32': 0x11,
166 'HVC32': 0x12,
167 'SMC32': 0x13,
168 'SVC64': 0x15,
169 'HVC64': 0x16,
170 'SMC64': 0x17,
171 'SYS64': 0x18,
172 'IABT': 0x20,
173 'IABT_HYP': 0x21,
174 'PC_ALIGN': 0x22,
175 'DABT': 0x24,
176 'DABT_HYP': 0x25,
177 'SP_ALIGN': 0x26,
178 'FP_EXC32': 0x28,
179 'FP_EXC64': 0x2C,
180 'SERROR': 0x2F,
181 'BREAKPT': 0x30,
182 'BREAKPT_HYP': 0x31,
183 'SOFTSTP': 0x32,
184 'SOFTSTP_HYP': 0x33,
185 'WATCHPT': 0x34,
186 'WATCHPT_HYP': 0x35,
187 'BKPT32': 0x38,
188 'VECTOR32': 0x3A,
189 'BRK64': 0x3C,
190 }
191
192 # From include/uapi/linux/kvm.h, KVM_EXIT_xxx
193 USERSPACE_EXIT_REASONS = {
194 'UNKNOWN': 0,
195 'EXCEPTION': 1,
196 'IO': 2,
197 'HYPERCALL': 3,
198 'DEBUG': 4,
199 'HLT': 5,
200 'MMIO': 6,
201 'IRQ_WINDOW_OPEN': 7,
202 'SHUTDOWN': 8,
203 'FAIL_ENTRY': 9,
204 'INTR': 10,
205 'SET_TPR': 11,
206 'TPR_ACCESS': 12,
207 'S390_SIEIC': 13,
208 'S390_RESET': 14,
209 'DCR': 15,
210 'NMI': 16,
211 'INTERNAL_ERROR': 17,
212 'OSI': 18,
213 'PAPR_HCALL': 19,
214 'S390_UCONTROL': 20,
215 'WATCHDOG': 21,
216 'S390_TSCH': 22,
217 'EPR': 23,
218 'SYSTEM_EVENT': 24,
219 }
220
221 IOCTL_NUMBERS = {
222 'SET_FILTER': 0x40082406,
223 'ENABLE': 0x00002400,
224 'DISABLE': 0x00002401,
225 'RESET': 0x00002403,
226 }
227
228
229 class Arch(object):
230 """Encapsulates global architecture specific data.
231
232 Contains the performance event open syscall and ioctl numbers, as
233 well as the VM exit reasons for the architecture it runs on.
234
235 """
236 @staticmethod
237 def get_arch():
238 machine = os.uname()[4]
239
240 if machine.startswith('ppc'):
241 return ArchPPC()
242 elif machine.startswith('aarch64'):
243 return ArchA64()
244 elif machine.startswith('s390'):
245 return ArchS390()
246 else:
247 # X86_64
248 for line in open('/proc/cpuinfo'):
249 if not line.startswith('flags'):
250 continue
251
252 flags = line.split()
253 if 'vmx' in flags:
254 return ArchX86(VMX_EXIT_REASONS)
255 if 'svm' in flags:
256 return ArchX86(SVM_EXIT_REASONS)
257 return
258
259
260 class ArchX86(Arch):
261 def __init__(self, exit_reasons):
262 self.sc_perf_evt_open = 298
263 self.ioctl_numbers = IOCTL_NUMBERS
264 self.exit_reasons = exit_reasons
265
266
267 class ArchPPC(Arch):
268 def __init__(self):
269 self.sc_perf_evt_open = 319
270 self.ioctl_numbers = IOCTL_NUMBERS
271 self.ioctl_numbers['ENABLE'] = 0x20002400
272 self.ioctl_numbers['DISABLE'] = 0x20002401
273 self.ioctl_numbers['RESET'] = 0x20002403
274
275 # PPC comes in 32 and 64 bit and some generated ioctl
276 # numbers depend on the wordsize.
277 char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
278 self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
279 self.exit_reasons = {}
280
281
282 class ArchA64(Arch):
283 def __init__(self):
284 self.sc_perf_evt_open = 241
285 self.ioctl_numbers = IOCTL_NUMBERS
286 self.exit_reasons = AARCH64_EXIT_REASONS
287
288
289 class ArchS390(Arch):
290 def __init__(self):
291 self.sc_perf_evt_open = 331
292 self.ioctl_numbers = IOCTL_NUMBERS
293 self.exit_reasons = None
294
295 ARCH = Arch.get_arch()
296
297
298 def walkdir(path):
299 """Returns os.walk() data for specified directory.
300
301 As it is only a wrapper it returns the same 3-tuple of (dirpath,
302 dirnames, filenames).
303 """
304 return next(os.walk(path))
305
306
307 def parse_int_list(list_string):
308 """Returns an int list from a string of comma separated integers and
309 integer ranges."""
310 integers = []
311 members = list_string.split(',')
312
313 for member in members:
314 if '-' not in member:
315 integers.append(int(member))
316 else:
317 int_range = member.split('-')
318 integers.extend(range(int(int_range[0]),
319 int(int_range[1]) + 1))
320
321 return integers
322
323
324 def get_pid_from_gname(gname):
325 """Fuzzy function to convert guest name to QEMU process pid.
326
327 Returns a list of potential pids, can be empty if no match found.
328 Throws an exception on processing errors.
329
330 """
331 pids = []
332 try:
333 child = subprocess.Popen(['ps', '-A', '--format', 'pid,args'],
334 stdout=subprocess.PIPE)
335 except:
336 raise Exception
337 for line in child.stdout:
338 line = line.lstrip().split(' ', 1)
339 # perform a sanity check before calling the more expensive
340 # function to possibly extract the guest name
341 if ' -name ' in line[1] and gname == get_gname_from_pid(line[0]):
342 pids.append(int(line[0]))
343 child.stdout.close()
344
345 return pids
346
347
348 def get_gname_from_pid(pid):
349 """Returns the guest name for a QEMU process pid.
350
351 Extracts the guest name from the QEMU comma line by processing the '-name'
352 option. Will also handle names specified out of sequence.
353
354 """
355 name = ''
356 try:
357 line = open('/proc/{}/cmdline'.format(pid), 'rb').read().split('\0')
358 parms = line[line.index('-name') + 1].split(',')
359 while '' in parms:
360 # commas are escaped (i.e. ',,'), hence e.g. 'foo,bar' results in
361 # ['foo', '', 'bar'], which we revert here
362 idx = parms.index('')
363 parms[idx - 1] += ',' + parms[idx + 1]
364 del parms[idx:idx+2]
365 # the '-name' switch allows for two ways to specify the guest name,
366 # where the plain name overrides the name specified via 'guest='
367 for arg in parms:
368 if '=' not in arg:
369 name = arg
370 break
371 if arg[:6] == 'guest=':
372 name = arg[6:]
373 except (ValueError, IOError, IndexError):
374 pass
375
376 return name
377
378
379 def get_online_cpus():
380 """Returns a list of cpu id integers."""
381 with open('/sys/devices/system/cpu/online') as cpu_list:
382 cpu_string = cpu_list.readline()
383 return parse_int_list(cpu_string)
384
385
386 def get_filters():
387 """Returns a dict of trace events, their filter ids and
388 the values that can be filtered.
389
390 Trace events can be filtered for special values by setting a
391 filter string via an ioctl. The string normally has the format
392 identifier==value. For each filter a new event will be created, to
393 be able to distinguish the events.
394
395 """
396 filters = {}
397 filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
398 if ARCH.exit_reasons:
399 filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
400 return filters
401
402 libc = ctypes.CDLL('libc.so.6', use_errno=True)
403 syscall = libc.syscall
404
405
406 class perf_event_attr(ctypes.Structure):
407 """Struct that holds the necessary data to set up a trace event.
408
409 For an extensive explanation see perf_event_open(2) and
410 include/uapi/linux/perf_event.h, struct perf_event_attr
411
412 All fields that are not initialized in the constructor are 0.
413
414 """
415 _fields_ = [('type', ctypes.c_uint32),
416 ('size', ctypes.c_uint32),
417 ('config', ctypes.c_uint64),
418 ('sample_freq', ctypes.c_uint64),
419 ('sample_type', ctypes.c_uint64),
420 ('read_format', ctypes.c_uint64),
421 ('flags', ctypes.c_uint64),
422 ('wakeup_events', ctypes.c_uint32),
423 ('bp_type', ctypes.c_uint32),
424 ('bp_addr', ctypes.c_uint64),
425 ('bp_len', ctypes.c_uint64),
426 ]
427
428 def __init__(self):
429 super(self.__class__, self).__init__()
430 self.type = PERF_TYPE_TRACEPOINT
431 self.size = ctypes.sizeof(self)
432 self.read_format = PERF_FORMAT_GROUP
433
434
435 def perf_event_open(attr, pid, cpu, group_fd, flags):
436 """Wrapper for the sys_perf_evt_open() syscall.
437
438 Used to set up performance events, returns a file descriptor or -1
439 on error.
440
441 Attributes are:
442 - syscall number
443 - struct perf_event_attr *
444 - pid or -1 to monitor all pids
445 - cpu number or -1 to monitor all cpus
446 - The file descriptor of the group leader or -1 to create a group.
447 - flags
448
449 """
450 return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr),
451 ctypes.c_int(pid), ctypes.c_int(cpu),
452 ctypes.c_int(group_fd), ctypes.c_long(flags))
453
454 PERF_TYPE_TRACEPOINT = 2
455 PERF_FORMAT_GROUP = 1 << 3
456
457 PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
458 PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
459
460
461 class Group(object):
462 """Represents a perf event group."""
463
464 def __init__(self):
465 self.events = []
466
467 def add_event(self, event):
468 self.events.append(event)
469
470 def read(self):
471 """Returns a dict with 'event name: value' for all events in the
472 group.
473
474 Values are read by reading from the file descriptor of the
475 event that is the group leader. See perf_event_open(2) for
476 details.
477
478 Read format for the used event configuration is:
479 struct read_format {
480 u64 nr; /* The number of events */
481 struct {
482 u64 value; /* The value of the event */
483 } values[nr];
484 };
485
486 """
487 length = 8 * (1 + len(self.events))
488 read_format = 'xxxxxxxx' + 'Q' * len(self.events)
489 return dict(zip([event.name for event in self.events],
490 struct.unpack(read_format,
491 os.read(self.events[0].fd, length))))
492
493
494 class Event(object):
495 """Represents a performance event and manages its life cycle."""
496 def __init__(self, name, group, trace_cpu, trace_pid, trace_point,
497 trace_filter, trace_set='kvm'):
498 self.name = name
499 self.fd = None
500 self.setup_event(group, trace_cpu, trace_pid, trace_point,
501 trace_filter, trace_set)
502
503 def __del__(self):
504 """Closes the event's file descriptor.
505
506 As no python file object was created for the file descriptor,
507 python will not reference count the descriptor and will not
508 close it itself automatically, so we do it.
509
510 """
511 if self.fd:
512 os.close(self.fd)
513
514 def setup_event_attribute(self, trace_set, trace_point):
515 """Returns an initialized ctype perf_event_attr struct."""
516
517 id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
518 trace_point, 'id')
519
520 event_attr = perf_event_attr()
521 event_attr.config = int(open(id_path).read())
522 return event_attr
523
524 def setup_event(self, group, trace_cpu, trace_pid, trace_point,
525 trace_filter, trace_set):
526 """Sets up the perf event in Linux.
527
528 Issues the syscall to register the event in the kernel and
529 then sets the optional filter.
530
531 """
532
533 event_attr = self.setup_event_attribute(trace_set, trace_point)
534
535 # First event will be group leader.
536 group_leader = -1
537
538 # All others have to pass the leader's descriptor instead.
539 if group.events:
540 group_leader = group.events[0].fd
541
542 fd = perf_event_open(event_attr, trace_pid,
543 trace_cpu, group_leader, 0)
544 if fd == -1:
545 err = ctypes.get_errno()
546 raise OSError(err, os.strerror(err),
547 'while calling sys_perf_event_open().')
548
549 if trace_filter:
550 fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'],
551 trace_filter)
552
553 self.fd = fd
554
555 def enable(self):
556 """Enables the trace event in the kernel.
557
558 Enabling the group leader makes reading counters from it and the
559 events under it possible.
560
561 """
562 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0)
563
564 def disable(self):
565 """Disables the trace event in the kernel.
566
567 Disabling the group leader makes reading all counters under it
568 impossible.
569
570 """
571 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0)
572
573 def reset(self):
574 """Resets the count of the trace event in the kernel."""
575 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
576
577
578 class TracepointProvider(object):
579 """Data provider for the stats class.
580
581 Manages the events/groups from which it acquires its data.
582
583 """
584 def __init__(self):
585 self.group_leaders = []
586 self.filters = get_filters()
587 self._fields = self.get_available_fields()
588 self._pid = 0
589
590 def get_available_fields(self):
591 """Returns a list of available event's of format 'event name(filter
592 name)'.
593
594 All available events have directories under
595 /sys/kernel/debug/tracing/events/ which export information
596 about the specific event. Therefore, listing the dirs gives us
597 a list of all available events.
598
599 Some events like the vm exit reasons can be filtered for
600 specific values. To take account for that, the routine below
601 creates special fields with the following format:
602 event name(filter name)
603
604 """
605 path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
606 fields = walkdir(path)[1]
607 extra = []
608 for field in fields:
609 if field in self.filters:
610 filter_name_, filter_dicts = self.filters[field]
611 for name in filter_dicts:
612 extra.append(field + '(' + name + ')')
613 fields += extra
614 return fields
615
616 def setup_traces(self):
617 """Creates all event and group objects needed to be able to retrieve
618 data."""
619 fields = self.get_available_fields()
620 if self._pid > 0:
621 # Fetch list of all threads of the monitored pid, as qemu
622 # starts a thread for each vcpu.
623 path = os.path.join('/proc', str(self._pid), 'task')
624 groupids = walkdir(path)[1]
625 else:
626 groupids = get_online_cpus()
627
628 # The constant is needed as a buffer for python libs, std
629 # streams and other files that the script opens.
630 newlim = len(groupids) * len(fields) + 50
631 try:
632 softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
633
634 if hardlim < newlim:
635 # Now we need CAP_SYS_RESOURCE, to increase the hard limit.
636 resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim))
637 else:
638 # Raising the soft limit is sufficient.
639 resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim))
640
641 except ValueError:
642 sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
643
644 for groupid in groupids:
645 group = Group()
646 for name in fields:
647 tracepoint = name
648 tracefilter = None
649 match = re.match(r'(.*)\((.*)\)', name)
650 if match:
651 tracepoint, sub = match.groups()
652 tracefilter = ('%s==%d\0' %
653 (self.filters[tracepoint][0],
654 self.filters[tracepoint][1][sub]))
655
656 # From perf_event_open(2):
657 # pid > 0 and cpu == -1
658 # This measures the specified process/thread on any CPU.
659 #
660 # pid == -1 and cpu >= 0
661 # This measures all processes/threads on the specified CPU.
662 trace_cpu = groupid if self._pid == 0 else -1
663 trace_pid = int(groupid) if self._pid != 0 else -1
664
665 group.add_event(Event(name=name,
666 group=group,
667 trace_cpu=trace_cpu,
668 trace_pid=trace_pid,
669 trace_point=tracepoint,
670 trace_filter=tracefilter))
671
672 self.group_leaders.append(group)
673
674 def available_fields(self):
675 return self.get_available_fields()
676
677 @property
678 def fields(self):
679 return self._fields
680
681 @fields.setter
682 def fields(self, fields):
683 """Enables/disables the (un)wanted events"""
684 self._fields = fields
685 for group in self.group_leaders:
686 for index, event in enumerate(group.events):
687 if event.name in fields:
688 event.reset()
689 event.enable()
690 else:
691 # Do not disable the group leader.
692 # It would disable all of its events.
693 if index != 0:
694 event.disable()
695
696 @property
697 def pid(self):
698 return self._pid
699
700 @pid.setter
701 def pid(self, pid):
702 """Changes the monitored pid by setting new traces."""
703 self._pid = pid
704 # The garbage collector will get rid of all Event/Group
705 # objects and open files after removing the references.
706 self.group_leaders = []
707 self.setup_traces()
708 self.fields = self._fields
709
710 def read(self):
711 """Returns 'event name: current value' for all enabled events."""
712 ret = defaultdict(int)
713 for group in self.group_leaders:
714 for name, val in group.read().iteritems():
715 if name in self._fields:
716 ret[name] += val
717 return ret
718
719 def reset(self):
720 """Reset all field counters"""
721 for group in self.group_leaders:
722 for event in group.events:
723 event.reset()
724
725
726 class DebugfsProvider(object):
727 """Provides data from the files that KVM creates in the kvm debugfs
728 folder."""
729 def __init__(self):
730 self._fields = self.get_available_fields()
731 self._baseline = {}
732 self._pid = 0
733 self.do_read = True
734 self.paths = []
735 self.reset()
736
737 def get_available_fields(self):
738 """"Returns a list of available fields.
739
740 The fields are all available KVM debugfs files
741
742 """
743 return walkdir(PATH_DEBUGFS_KVM)[2]
744
745 @property
746 def fields(self):
747 return self._fields
748
749 @fields.setter
750 def fields(self, fields):
751 self._fields = fields
752 self.reset()
753
754 @property
755 def pid(self):
756 return self._pid
757
758 @pid.setter
759 def pid(self, pid):
760 if pid != 0:
761 self._pid = pid
762
763 vms = walkdir(PATH_DEBUGFS_KVM)[1]
764 if len(vms) == 0:
765 self.do_read = False
766
767 self.paths = filter(lambda x: "{}-".format(pid) in x, vms)
768
769 else:
770 self.paths = []
771 self.do_read = True
772 self.reset()
773
774 def read(self, reset=0):
775 """Returns a dict with format:'file name / field -> current value'."""
776 results = {}
777
778 # If no debugfs filtering support is available, then don't read.
779 if not self.do_read:
780 return results
781
782 paths = self.paths
783 if self._pid == 0:
784 paths = []
785 for entry in os.walk(PATH_DEBUGFS_KVM):
786 for dir in entry[1]:
787 paths.append(dir)
788 for path in paths:
789 for field in self._fields:
790 value = self.read_field(field, path)
791 key = path + field
792 if reset:
793 self._baseline[key] = value
794 if self._baseline.get(key, -1) == -1:
795 self._baseline[key] = value
796 results[field] = (results.get(field, 0) + value -
797 self._baseline.get(key, 0))
798
799 return results
800
801 def read_field(self, field, path):
802 """Returns the value of a single field from a specific VM."""
803 try:
804 return int(open(os.path.join(PATH_DEBUGFS_KVM,
805 path,
806 field))
807 .read())
808 except IOError:
809 return 0
810
811 def reset(self):
812 """Reset field counters"""
813 self._baseline = {}
814 self.read(1)
815
816
817 class Stats(object):
818 """Manages the data providers and the data they provide.
819
820 It is used to set filters on the provider's data and collect all
821 provider data.
822
823 """
824 def __init__(self, providers, pid, fields=None):
825 self.providers = providers
826 self._pid_filter = pid
827 self._fields_filter = fields
828 self.values = {}
829 self.update_provider_pid()
830 self.update_provider_filters()
831
832 def update_provider_filters(self):
833 """Propagates fields filters to providers."""
834 def wanted(key):
835 if not self._fields_filter:
836 return True
837 return re.match(self._fields_filter, key) is not None
838
839 # As we reset the counters when updating the fields we can
840 # also clear the cache of old values.
841 self.values = {}
842 for provider in self.providers:
843 provider_fields = [key for key in provider.get_available_fields()
844 if wanted(key)]
845 provider.fields = provider_fields
846
847 def update_provider_pid(self):
848 """Propagates pid filters to providers."""
849 for provider in self.providers:
850 provider.pid = self._pid_filter
851
852 def reset(self):
853 self.values = {}
854 for provider in self.providers:
855 provider.reset()
856
857 @property
858 def fields_filter(self):
859 return self._fields_filter
860
861 @fields_filter.setter
862 def fields_filter(self, fields_filter):
863 if fields_filter != self._fields_filter:
864 self._fields_filter = fields_filter
865 self.update_provider_filters()
866
867 @property
868 def pid_filter(self):
869 return self._pid_filter
870
871 @pid_filter.setter
872 def pid_filter(self, pid):
873 if pid != self._pid_filter:
874 self._pid_filter = pid
875 self.values = {}
876 self.update_provider_pid()
877
878 def get(self):
879 """Returns a dict with field -> (value, delta to last value) of all
880 provider data."""
881 for provider in self.providers:
882 new = provider.read()
883 for key in provider.fields:
884 oldval = self.values.get(key, (0, 0))[0]
885 newval = new.get(key, 0)
886 newdelta = newval - oldval
887 self.values[key] = (newval, newdelta)
888 return self.values
889
890 LABEL_WIDTH = 40
891 NUMBER_WIDTH = 10
892 DELAY_INITIAL = 0.25
893 DELAY_REGULAR = 3.0
894 MAX_GUEST_NAME_LEN = 48
895 MAX_REGEX_LEN = 44
896 DEFAULT_REGEX = r'^[^\(]*$'
897
898
899 class Tui(object):
900 """Instruments curses to draw a nice text ui."""
901 def __init__(self, stats):
902 self.stats = stats
903 self.screen = None
904 self.update_drilldown()
905
906 def __enter__(self):
907 """Initialises curses for later use. Based on curses.wrapper
908 implementation from the Python standard library."""
909 self.screen = curses.initscr()
910 curses.noecho()
911 curses.cbreak()
912
913 # The try/catch works around a minor bit of
914 # over-conscientiousness in the curses module, the error
915 # return from C start_color() is ignorable.
916 try:
917 curses.start_color()
918 except curses.error:
919 pass
920
921 # Hide cursor in extra statement as some monochrome terminals
922 # might support hiding but not colors.
923 try:
924 curses.curs_set(0)
925 except curses.error:
926 pass
927
928 curses.use_default_colors()
929 return self
930
931 def __exit__(self, *exception):
932 """Resets the terminal to its normal state. Based on curses.wrappre
933 implementation from the Python standard library."""
934 if self.screen:
935 self.screen.keypad(0)
936 curses.echo()
937 curses.nocbreak()
938 curses.endwin()
939
940 def update_drilldown(self):
941 """Sets or removes a filter that only allows fields without braces."""
942 if not self.stats.fields_filter:
943 self.stats.fields_filter = DEFAULT_REGEX
944
945 elif self.stats.fields_filter == DEFAULT_REGEX:
946 self.stats.fields_filter = None
947
948 def update_pid(self, pid):
949 """Propagates pid selection to stats object."""
950 self.stats.pid_filter = pid
951
952 def refresh_header(self, pid=None):
953 """Refreshes the header."""
954 if pid is None:
955 pid = self.stats.pid_filter
956 self.screen.erase()
957 gname = get_gname_from_pid(pid)
958 if gname:
959 gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...'
960 if len(gname) > MAX_GUEST_NAME_LEN
961 else gname))
962 if pid > 0:
963 self.screen.addstr(0, 0, 'kvm statistics - pid {0} {1}'
964 .format(pid, gname), curses.A_BOLD)
965 else:
966 self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
967 if self.stats.fields_filter and self.stats.fields_filter \
968 != DEFAULT_REGEX:
969 regex = self.stats.fields_filter
970 if len(regex) > MAX_REGEX_LEN:
971 regex = regex[:MAX_REGEX_LEN] + '...'
972 self.screen.addstr(1, 17, 'regex filter: {0}'.format(regex))
973 self.screen.addstr(2, 1, 'Event')
974 self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH -
975 len('Total'), 'Total')
976 self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 7 -
977 len('%Total'), '%Total')
978 self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 7 + 8 -
979 len('Current'), 'Current')
980 self.screen.addstr(4, 1, 'Collecting data...')
981 self.screen.refresh()
982
983 def refresh_body(self, sleeptime):
984 row = 3
985 self.screen.move(row, 0)
986 self.screen.clrtobot()
987 stats = self.stats.get()
988
989 def sortkey(x):
990 if stats[x][1]:
991 return (-stats[x][1], -stats[x][0])
992 else:
993 return (0, -stats[x][0])
994 total = 0.
995 for val in stats.values():
996 total += val[0]
997 for key in sorted(stats.keys(), key=sortkey):
998
999 if row >= self.screen.getmaxyx()[0]:
1000 break
1001 values = stats[key]
1002 if not values[0] and not values[1]:
1003 break
1004 col = 1
1005 self.screen.addstr(row, col, key)
1006 col += LABEL_WIDTH
1007 self.screen.addstr(row, col, '%10d' % (values[0],))
1008 col += NUMBER_WIDTH
1009 self.screen.addstr(row, col, '%7.1f' % (values[0] * 100 / total,))
1010 col += 7
1011 if values[1] is not None:
1012 self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,))
1013 row += 1
1014 self.screen.refresh()
1015
1016 def show_filter_selection(self):
1017 """Draws filter selection mask.
1018
1019 Asks for a valid regex and sets the fields filter accordingly.
1020
1021 """
1022 while True:
1023 self.screen.erase()
1024 self.screen.addstr(0, 0,
1025 "Show statistics for events matching a regex.",
1026 curses.A_BOLD)
1027 self.screen.addstr(2, 0,
1028 "Current regex: {0}"
1029 .format(self.stats.fields_filter))
1030 self.screen.addstr(3, 0, "New regex: ")
1031 curses.echo()
1032 regex = self.screen.getstr()
1033 curses.noecho()
1034 if len(regex) == 0:
1035 self.stats.fields_filter = DEFAULT_REGEX
1036 self.refresh_header()
1037 return
1038 try:
1039 re.compile(regex)
1040 self.stats.fields_filter = regex
1041 self.refresh_header()
1042 return
1043 except re.error:
1044 continue
1045
1046 def show_vm_selection_by_pid(self):
1047 """Draws PID selection mask.
1048
1049 Asks for a pid until a valid pid or 0 has been entered.
1050
1051 """
1052 msg = ''
1053 while True:
1054 self.screen.erase()
1055 self.screen.addstr(0, 0,
1056 'Show statistics for specific pid.',
1057 curses.A_BOLD)
1058 self.screen.addstr(1, 0,
1059 'This might limit the shown data to the trace '
1060 'statistics.')
1061 self.screen.addstr(5, 0, msg)
1062
1063 curses.echo()
1064 self.screen.addstr(3, 0, "Pid [0 or pid]: ")
1065 pid = self.screen.getstr()
1066 curses.noecho()
1067
1068 try:
1069 if len(pid) > 0:
1070 pid = int(pid)
1071 if pid != 0 and not os.path.isdir(os.path.join('/proc/',
1072 str(pid))):
1073 msg = '"' + str(pid) + '": Not a running process'
1074 continue
1075 else:
1076 pid = 0
1077 self.refresh_header(pid)
1078 self.update_pid(pid)
1079 break
1080
1081 except ValueError:
1082 msg = '"' + str(pid) + '": Not a valid pid'
1083 continue
1084
1085 def show_vm_selection_by_guest_name(self):
1086 """Draws guest selection mask.
1087
1088 Asks for a guest name until a valid guest name or '' is entered.
1089
1090 """
1091 msg = ''
1092 while True:
1093 self.screen.erase()
1094 self.screen.addstr(0, 0,
1095 'Show statistics for specific guest.',
1096 curses.A_BOLD)
1097 self.screen.addstr(1, 0,
1098 'This might limit the shown data to the trace '
1099 'statistics.')
1100 self.screen.addstr(5, 0, msg)
1101 curses.echo()
1102 self.screen.addstr(3, 0, "Guest [ENTER or guest]: ")
1103 gname = self.screen.getstr()
1104 curses.noecho()
1105
1106 if not gname:
1107 self.refresh_header(0)
1108 self.update_pid(0)
1109 break
1110 else:
1111 pids = []
1112 try:
1113 pids = get_pid_from_gname(gname)
1114 except:
1115 msg = '"' + gname + '": Internal error while searching, ' \
1116 'use pid filter instead'
1117 continue
1118 if len(pids) == 0:
1119 msg = '"' + gname + '": Not an active guest'
1120 continue
1121 if len(pids) > 1:
1122 msg = '"' + gname + '": Multiple matches found, use pid ' \
1123 'filter instead'
1124 continue
1125 self.refresh_header(pids[0])
1126 self.update_pid(pids[0])
1127 break
1128
1129 def show_stats(self):
1130 """Refreshes the screen and processes user input."""
1131 sleeptime = DELAY_INITIAL
1132 self.refresh_header()
1133 while True:
1134 self.refresh_body(sleeptime)
1135 curses.halfdelay(int(sleeptime * 10))
1136 sleeptime = DELAY_REGULAR
1137 try:
1138 char = self.screen.getkey()
1139 if char == 'x':
1140 self.refresh_header()
1141 self.update_drilldown()
1142 sleeptime = DELAY_INITIAL
1143 if char == 'q':
1144 break
1145 if char == 'c':
1146 self.stats.fields_filter = DEFAULT_REGEX
1147 self.refresh_header(0)
1148 self.update_pid(0)
1149 sleeptime = DELAY_INITIAL
1150 if char == 'f':
1151 self.show_filter_selection()
1152 sleeptime = DELAY_INITIAL
1153 if char == 'g':
1154 self.show_vm_selection_by_guest_name()
1155 sleeptime = DELAY_INITIAL
1156 if char == 'p':
1157 self.show_vm_selection_by_pid()
1158 sleeptime = DELAY_INITIAL
1159 if char == 'r':
1160 self.refresh_header()
1161 self.stats.reset()
1162 sleeptime = DELAY_INITIAL
1163 except KeyboardInterrupt:
1164 break
1165 except curses.error:
1166 continue
1167
1168
1169 def batch(stats):
1170 """Prints statistics in a key, value format."""
1171 try:
1172 s = stats.get()
1173 time.sleep(1)
1174 s = stats.get()
1175 for key in sorted(s.keys()):
1176 values = s[key]
1177 print '%-42s%10d%10d' % (key, values[0], values[1])
1178 except KeyboardInterrupt:
1179 pass
1180
1181
1182 def log(stats):
1183 """Prints statistics as reiterating key block, multiple value blocks."""
1184 keys = sorted(stats.get().iterkeys())
1185
1186 def banner():
1187 for k in keys:
1188 print '%s' % k,
1189 print
1190
1191 def statline():
1192 s = stats.get()
1193 for k in keys:
1194 print ' %9d' % s[k][1],
1195 print
1196 line = 0
1197 banner_repeat = 20
1198 while True:
1199 try:
1200 time.sleep(1)
1201 if line % banner_repeat == 0:
1202 banner()
1203 statline()
1204 line += 1
1205 except KeyboardInterrupt:
1206 break
1207
1208
1209 def get_options():
1210 """Returns processed program arguments."""
1211 description_text = """
1212 This script displays various statistics about VMs running under KVM.
1213 The statistics are gathered from the KVM debugfs entries and / or the
1214 currently available perf traces.
1215
1216 The monitoring takes additional cpu cycles and might affect the VM's
1217 performance.
1218
1219 Requirements:
1220 - Access to:
1221 /sys/kernel/debug/kvm
1222 /sys/kernel/debug/trace/events/*
1223 /proc/pid/task
1224 - /proc/sys/kernel/perf_event_paranoid < 1 if user has no
1225 CAP_SYS_ADMIN and perf events are used.
1226 - CAP_SYS_RESOURCE if the hard limit is not high enough to allow
1227 the large number of files that are possibly opened.
1228
1229 Interactive Commands:
1230 c clear filter
1231 f filter by regular expression
1232 g filter by guest name
1233 p filter by PID
1234 q quit
1235 x toggle reporting of stats for individual child trace events
1236 r reset stats
1237 Press any other key to refresh statistics immediately.
1238 """
1239
1240 class PlainHelpFormatter(optparse.IndentedHelpFormatter):
1241 def format_description(self, description):
1242 if description:
1243 return description + "\n"
1244 else:
1245 return ""
1246
1247 def cb_guest_to_pid(option, opt, val, parser):
1248 try:
1249 pids = get_pid_from_gname(val)
1250 except:
1251 raise optparse.OptionValueError('Error while searching for guest '
1252 '"{}", use "-p" to specify a pid '
1253 'instead'.format(val))
1254 if len(pids) == 0:
1255 raise optparse.OptionValueError('No guest by the name "{}" '
1256 'found'.format(val))
1257 if len(pids) > 1:
1258 raise optparse.OptionValueError('Multiple processes found (pids: '
1259 '{}) - use "-p" to specify a pid '
1260 'instead'.format(" ".join(pids)))
1261 parser.values.pid = pids[0]
1262
1263 optparser = optparse.OptionParser(description=description_text,
1264 formatter=PlainHelpFormatter())
1265 optparser.add_option('-1', '--once', '--batch',
1266 action='store_true',
1267 default=False,
1268 dest='once',
1269 help='run in batch mode for one second',
1270 )
1271 optparser.add_option('-l', '--log',
1272 action='store_true',
1273 default=False,
1274 dest='log',
1275 help='run in logging mode (like vmstat)',
1276 )
1277 optparser.add_option('-t', '--tracepoints',
1278 action='store_true',
1279 default=False,
1280 dest='tracepoints',
1281 help='retrieve statistics from tracepoints',
1282 )
1283 optparser.add_option('-d', '--debugfs',
1284 action='store_true',
1285 default=False,
1286 dest='debugfs',
1287 help='retrieve statistics from debugfs',
1288 )
1289 optparser.add_option('-f', '--fields',
1290 action='store',
1291 default=None,
1292 dest='fields',
1293 help='fields to display (regex)',
1294 )
1295 optparser.add_option('-p', '--pid',
1296 action='store',
1297 default=0,
1298 type='int',
1299 dest='pid',
1300 help='restrict statistics to pid',
1301 )
1302 optparser.add_option('-g', '--guest',
1303 action='callback',
1304 type='string',
1305 dest='pid',
1306 metavar='GUEST',
1307 help='restrict statistics to guest by name',
1308 callback=cb_guest_to_pid,
1309 )
1310 (options, _) = optparser.parse_args(sys.argv)
1311 return options
1312
1313
1314 def get_providers(options):
1315 """Returns a list of data providers depending on the passed options."""
1316 providers = []
1317
1318 if options.tracepoints:
1319 providers.append(TracepointProvider())
1320 if options.debugfs:
1321 providers.append(DebugfsProvider())
1322 if len(providers) == 0:
1323 providers.append(TracepointProvider())
1324
1325 return providers
1326
1327
1328 def check_access(options):
1329 """Exits if the current user can't access all needed directories."""
1330 if not os.path.exists('/sys/kernel/debug'):
1331 sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.')
1332 sys.exit(1)
1333
1334 if not os.path.exists(PATH_DEBUGFS_KVM):
1335 sys.stderr.write("Please make sure, that debugfs is mounted and "
1336 "readable by the current user:\n"
1337 "('mount -t debugfs debugfs /sys/kernel/debug')\n"
1338 "Also ensure, that the kvm modules are loaded.\n")
1339 sys.exit(1)
1340
1341 if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or
1342 not options.debugfs):
1343 sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
1344 "when using the option -t (default).\n"
1345 "If it is enabled, make {0} readable by the "
1346 "current user.\n"
1347 .format(PATH_DEBUGFS_TRACING))
1348 if options.tracepoints:
1349 sys.exit(1)
1350
1351 sys.stderr.write("Falling back to debugfs statistics!\n")
1352 options.debugfs = True
1353 time.sleep(5)
1354
1355 return options
1356
1357
1358 def main():
1359 options = get_options()
1360 options = check_access(options)
1361
1362 if (options.pid > 0 and
1363 not os.path.isdir(os.path.join('/proc/',
1364 str(options.pid)))):
1365 sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n')
1366 sys.exit('Specified pid does not exist.')
1367
1368 providers = get_providers(options)
1369 stats = Stats(providers, options.pid, fields=options.fields)
1370
1371 if options.log:
1372 log(stats)
1373 elif not options.once:
1374 with Tui(stats) as tui:
1375 tui.show_stats()
1376 else:
1377 batch(stats)
1378
1379 if __name__ == "__main__":
1380 main()