]>
git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - tools/kvm/kvm_stat/kvm_stat
3 # top-like utility for displaying kvm statistics
5 # Copyright 2006-2008 Qumranet Technologies
6 # Copyright 2008-2011 Red Hat, Inc.
9 # Avi Kivity <avi@redhat.com>
11 # This work is licensed under the terms of the GNU GPL, version 2. See
12 # the COPYING file in the top-level directory.
13 """The kvm_stat module outputs statistics about running KVM VMs
15 Three different ways of output formatting are available:
16 - as a top-like text ui
17 - in a key -> value format
18 - in an all keys, all values format
20 The data is sampled from the KVM's debugfs entries and its perf events.
33 from collections
import defaultdict
34 from time
import sleep
38 'EXTERNAL_INTERRUPT': 1,
40 'PENDING_INTERRUPT': 7,
64 'MWAIT_INSTRUCTION': 36,
65 'MONITOR_INSTRUCTION': 39,
66 'PAUSE_INSTRUCTION': 40,
67 'MCE_DURING_VMENTRY': 41,
68 'TPR_BELOW_THRESHOLD': 43,
109 'CR0_SEL_WRITE': 0x065,
133 'TASK_SWITCH': 0x07d,
134 'FERR_FREEZE': 0x07e,
153 # EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
154 AARCH64_EXIT_REASONS
= {
192 # From include/uapi/linux/kvm.h, KVM_EXIT_xxx
193 USERSPACE_EXIT_REASONS
= {
201 'IRQ_WINDOW_OPEN': 7,
211 'INTERNAL_ERROR': 17,
222 'SET_FILTER': 0x40082406,
223 'ENABLE': 0x00002400,
224 'DISABLE': 0x00002401,
229 """Encapsulates global architecture specific data.
231 Contains the performance event open syscall and ioctl numbers, as
232 well as the VM exit reasons for the architecture it runs on.
237 machine
= os
.uname()[4]
239 if machine
.startswith('ppc'):
241 elif machine
.startswith('aarch64'):
243 elif machine
.startswith('s390'):
247 for line
in open('/proc/cpuinfo'):
248 if not line
.startswith('flags'):
253 return ArchX86(VMX_EXIT_REASONS
)
255 return ArchX86(SVM_EXIT_REASONS
)
259 def __init__(self
, exit_reasons
):
260 self
.sc_perf_evt_open
= 298
261 self
.ioctl_numbers
= IOCTL_NUMBERS
262 self
.exit_reasons
= exit_reasons
266 self
.sc_perf_evt_open
= 319
267 self
.ioctl_numbers
= IOCTL_NUMBERS
268 self
.ioctl_numbers
['ENABLE'] = 0x20002400
269 self
.ioctl_numbers
['DISABLE'] = 0x20002401
270 self
.ioctl_numbers
['RESET'] = 0x20002403
272 # PPC comes in 32 and 64 bit and some generated ioctl
273 # numbers depend on the wordsize.
274 char_ptr_size
= ctypes
.sizeof(ctypes
.c_char_p
)
275 self
.ioctl_numbers
['SET_FILTER'] = 0x80002406 | char_ptr_size
<< 16
276 self
.exit_reasons
= {}
280 self
.sc_perf_evt_open
= 241
281 self
.ioctl_numbers
= IOCTL_NUMBERS
282 self
.exit_reasons
= AARCH64_EXIT_REASONS
284 class ArchS390(Arch
):
286 self
.sc_perf_evt_open
= 331
287 self
.ioctl_numbers
= IOCTL_NUMBERS
288 self
.exit_reasons
= None
290 ARCH
= Arch
.get_arch()
294 """Returns os.walk() data for specified directory.
296 As it is only a wrapper it returns the same 3-tuple of (dirpath,
297 dirnames, filenames).
299 return next(os
.walk(path
))
302 def parse_int_list(list_string
):
303 """Returns an int list from a string of comma separated integers and
306 members
= list_string
.split(',')
308 for member
in members
:
309 if '-' not in member
:
310 integers
.append(int(member
))
312 int_range
= member
.split('-')
313 integers
.extend(range(int(int_range
[0]),
314 int(int_range
[1]) + 1))
319 def get_online_cpus():
320 """Returns a list of cpu id integers."""
321 with
open('/sys/devices/system/cpu/online') as cpu_list
:
322 cpu_string
= cpu_list
.readline()
323 return parse_int_list(cpu_string
)
327 """Returns a dict of trace events, their filter ids and
328 the values that can be filtered.
330 Trace events can be filtered for special values by setting a
331 filter string via an ioctl. The string normally has the format
332 identifier==value. For each filter a new event will be created, to
333 be able to distinguish the events.
337 filters
['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS
)
338 if ARCH
.exit_reasons
:
339 filters
['kvm_exit'] = ('exit_reason', ARCH
.exit_reasons
)
342 libc
= ctypes
.CDLL('libc.so.6', use_errno
=True)
343 syscall
= libc
.syscall
345 class perf_event_attr(ctypes
.Structure
):
346 """Struct that holds the necessary data to set up a trace event.
348 For an extensive explanation see perf_event_open(2) and
349 include/uapi/linux/perf_event.h, struct perf_event_attr
351 All fields that are not initialized in the constructor are 0.
354 _fields_
= [('type', ctypes
.c_uint32
),
355 ('size', ctypes
.c_uint32
),
356 ('config', ctypes
.c_uint64
),
357 ('sample_freq', ctypes
.c_uint64
),
358 ('sample_type', ctypes
.c_uint64
),
359 ('read_format', ctypes
.c_uint64
),
360 ('flags', ctypes
.c_uint64
),
361 ('wakeup_events', ctypes
.c_uint32
),
362 ('bp_type', ctypes
.c_uint32
),
363 ('bp_addr', ctypes
.c_uint64
),
364 ('bp_len', ctypes
.c_uint64
),
368 super(self
.__class
__, self
).__init
__()
369 self
.type = PERF_TYPE_TRACEPOINT
370 self
.size
= ctypes
.sizeof(self
)
371 self
.read_format
= PERF_FORMAT_GROUP
373 def perf_event_open(attr
, pid
, cpu
, group_fd
, flags
):
374 """Wrapper for the sys_perf_evt_open() syscall.
376 Used to set up performance events, returns a file descriptor or -1
381 - struct perf_event_attr *
382 - pid or -1 to monitor all pids
383 - cpu number or -1 to monitor all cpus
384 - The file descriptor of the group leader or -1 to create a group.
388 return syscall(ARCH
.sc_perf_evt_open
, ctypes
.pointer(attr
),
389 ctypes
.c_int(pid
), ctypes
.c_int(cpu
),
390 ctypes
.c_int(group_fd
), ctypes
.c_long(flags
))
392 PERF_TYPE_TRACEPOINT
= 2
393 PERF_FORMAT_GROUP
= 1 << 3
395 PATH_DEBUGFS_TRACING
= '/sys/kernel/debug/tracing'
396 PATH_DEBUGFS_KVM
= '/sys/kernel/debug/kvm'
399 """Represents a perf event group."""
404 def add_event(self
, event
):
405 self
.events
.append(event
)
408 """Returns a dict with 'event name: value' for all events in the
411 Values are read by reading from the file descriptor of the
412 event that is the group leader. See perf_event_open(2) for
415 Read format for the used event configuration is:
417 u64 nr; /* The number of events */
419 u64 value; /* The value of the event */
424 length
= 8 * (1 + len(self
.events
))
425 read_format
= 'xxxxxxxx' + 'Q' * len(self
.events
)
426 return dict(zip([event
.name
for event
in self
.events
],
427 struct
.unpack(read_format
,
428 os
.read(self
.events
[0].fd
, length
))))
431 """Represents a performance event and manages its life cycle."""
432 def __init__(self
, name
, group
, trace_cpu
, trace_pid
, trace_point
,
433 trace_filter
, trace_set
='kvm'):
436 self
.setup_event(group
, trace_cpu
, trace_pid
, trace_point
,
437 trace_filter
, trace_set
)
440 """Closes the event's file descriptor.
442 As no python file object was created for the file descriptor,
443 python will not reference count the descriptor and will not
444 close it itself automatically, so we do it.
450 def setup_event_attribute(self
, trace_set
, trace_point
):
451 """Returns an initialized ctype perf_event_attr struct."""
453 id_path
= os
.path
.join(PATH_DEBUGFS_TRACING
, 'events', trace_set
,
456 event_attr
= perf_event_attr()
457 event_attr
.config
= int(open(id_path
).read())
460 def setup_event(self
, group
, trace_cpu
, trace_pid
, trace_point
,
461 trace_filter
, trace_set
):
462 """Sets up the perf event in Linux.
464 Issues the syscall to register the event in the kernel and
465 then sets the optional filter.
469 event_attr
= self
.setup_event_attribute(trace_set
, trace_point
)
471 # First event will be group leader.
474 # All others have to pass the leader's descriptor instead.
476 group_leader
= group
.events
[0].fd
478 fd
= perf_event_open(event_attr
, trace_pid
,
479 trace_cpu
, group_leader
, 0)
481 err
= ctypes
.get_errno()
482 raise OSError(err
, os
.strerror(err
),
483 'while calling sys_perf_event_open().')
486 fcntl
.ioctl(fd
, ARCH
.ioctl_numbers
['SET_FILTER'],
492 """Enables the trace event in the kernel.
494 Enabling the group leader makes reading counters from it and the
495 events under it possible.
498 fcntl
.ioctl(self
.fd
, ARCH
.ioctl_numbers
['ENABLE'], 0)
501 """Disables the trace event in the kernel.
503 Disabling the group leader makes reading all counters under it
507 fcntl
.ioctl(self
.fd
, ARCH
.ioctl_numbers
['DISABLE'], 0)
510 """Resets the count of the trace event in the kernel."""
511 fcntl
.ioctl(self
.fd
, ARCH
.ioctl_numbers
['RESET'], 0)
513 class TracepointProvider(object):
514 """Data provider for the stats class.
516 Manages the events/groups from which it acquires its data.
520 self
.group_leaders
= []
521 self
.filters
= get_filters()
522 self
._fields
= self
.get_available_fields()
525 def get_available_fields(self
):
526 """Returns a list of available event's of format 'event name(filter
529 All available events have directories under
530 /sys/kernel/debug/tracing/events/ which export information
531 about the specific event. Therefore, listing the dirs gives us
532 a list of all available events.
534 Some events like the vm exit reasons can be filtered for
535 specific values. To take account for that, the routine below
536 creates special fields with the following format:
537 event name(filter name)
540 path
= os
.path
.join(PATH_DEBUGFS_TRACING
, 'events', 'kvm')
541 fields
= walkdir(path
)[1]
544 if field
in self
.filters
:
545 filter_name_
, filter_dicts
= self
.filters
[field
]
546 for name
in filter_dicts
:
547 extra
.append(field
+ '(' + name
+ ')')
551 def setup_traces(self
):
552 """Creates all event and group objects needed to be able to retrieve
555 # Fetch list of all threads of the monitored pid, as qemu
556 # starts a thread for each vcpu.
557 path
= os
.path
.join('/proc', str(self
._pid
), 'task')
558 groupids
= walkdir(path
)[1]
560 groupids
= get_online_cpus()
562 # The constant is needed as a buffer for python libs, std
563 # streams and other files that the script opens.
564 newlim
= len(groupids
) * len(self
._fields
) + 50
566 softlim_
, hardlim
= resource
.getrlimit(resource
.RLIMIT_NOFILE
)
569 # Now we need CAP_SYS_RESOURCE, to increase the hard limit.
570 resource
.setrlimit(resource
.RLIMIT_NOFILE
, (newlim
, newlim
))
572 # Raising the soft limit is sufficient.
573 resource
.setrlimit(resource
.RLIMIT_NOFILE
, (newlim
, hardlim
))
576 sys
.exit("NOFILE rlimit could not be raised to {0}".format(newlim
))
578 for groupid
in groupids
:
580 for name
in self
._fields
:
583 match
= re
.match(r
'(.*)\((.*)\)', name
)
585 tracepoint
, sub
= match
.groups()
586 tracefilter
= ('%s==%d\0' %
587 (self
.filters
[tracepoint
][0],
588 self
.filters
[tracepoint
][1][sub
]))
590 # From perf_event_open(2):
591 # pid > 0 and cpu == -1
592 # This measures the specified process/thread on any CPU.
594 # pid == -1 and cpu >= 0
595 # This measures all processes/threads on the specified CPU.
596 trace_cpu
= groupid
if self
._pid
== 0 else -1
597 trace_pid
= int(groupid
) if self
._pid
!= 0 else -1
599 group
.add_event(Event(name
=name
,
603 trace_point
=tracepoint
,
604 trace_filter
=tracefilter
))
606 self
.group_leaders
.append(group
)
608 def available_fields(self
):
609 return self
.get_available_fields()
616 def fields(self
, fields
):
617 """Enables/disables the (un)wanted events"""
618 self
._fields
= fields
619 for group
in self
.group_leaders
:
620 for index
, event
in enumerate(group
.events
):
621 if event
.name
in fields
:
625 # Do not disable the group leader.
626 # It would disable all of its events.
636 """Changes the monitored pid by setting new traces."""
638 # The garbage collector will get rid of all Event/Group
639 # objects and open files after removing the references.
640 self
.group_leaders
= []
642 self
.fields
= self
._fields
645 """Returns 'event name: current value' for all enabled events."""
646 ret
= defaultdict(int)
647 for group
in self
.group_leaders
:
648 for name
, val
in group
.read().iteritems():
649 if name
in self
._fields
:
653 class DebugfsProvider(object):
654 """Provides data from the files that KVM creates in the kvm debugfs
657 self
._fields
= self
.get_available_fields()
661 def get_available_fields(self
):
662 """"Returns a list of available fields.
664 The fields are all available KVM debugfs files
667 return walkdir(PATH_DEBUGFS_KVM
)[2]
674 def fields(self
, fields
):
675 self
._fields
= fields
686 vms
= walkdir(PATH_DEBUGFS_KVM
)[1]
690 self
.paths
= filter(lambda x
: "{}-".format(pid
) in x
, vms
)
697 """Returns a dict with format:'file name / field -> current value'."""
700 # If no debugfs filtering support is available, then don't read.
704 for path
in self
.paths
:
705 for field
in self
._fields
:
706 results
[field
] = results
.get(field
, 0) \
707 + self
.read_field(field
, path
)
711 def read_field(self
, field
, path
):
712 """Returns the value of a single field from a specific VM."""
714 return int(open(os
.path
.join(PATH_DEBUGFS_KVM
,
722 """Manages the data providers and the data they provide.
724 It is used to set filters on the provider's data and collect all
728 def __init__(self
, providers
, pid
, fields
=None):
729 self
.providers
= providers
730 self
._pid
_filter
= pid
731 self
._fields
_filter
= fields
733 self
.update_provider_pid()
734 self
.update_provider_filters()
736 def update_provider_filters(self
):
737 """Propagates fields filters to providers."""
739 if not self
._fields
_filter
:
741 return re
.match(self
._fields
_filter
, key
) is not None
743 # As we reset the counters when updating the fields we can
744 # also clear the cache of old values.
746 for provider
in self
.providers
:
747 provider_fields
= [key
for key
in provider
.get_available_fields()
749 provider
.fields
= provider_fields
751 def update_provider_pid(self
):
752 """Propagates pid filters to providers."""
753 for provider
in self
.providers
:
754 provider
.pid
= self
._pid
_filter
757 def fields_filter(self
):
758 return self
._fields
_filter
760 @fields_filter.setter
761 def fields_filter(self
, fields_filter
):
762 self
._fields
_filter
= fields_filter
763 self
.update_provider_filters()
766 def pid_filter(self
):
767 return self
._pid
_filter
770 def pid_filter(self
, pid
):
771 self
._pid
_filter
= pid
773 self
.update_provider_pid()
776 """Returns a dict with field -> (value, delta to last value) of all
778 for provider
in self
.providers
:
779 new
= provider
.read()
780 for key
in provider
.fields
:
781 oldval
= self
.values
.get(key
, (0, 0))
782 newval
= new
.get(key
, 0)
784 if oldval
is not None:
785 newdelta
= newval
- oldval
[0]
786 self
.values
[key
] = (newval
, newdelta
)
793 """Instruments curses to draw a nice text ui."""
794 def __init__(self
, stats
):
797 self
.drilldown
= False
798 self
.update_drilldown()
801 """Initialises curses for later use. Based on curses.wrapper
802 implementation from the Python standard library."""
803 self
.screen
= curses
.initscr()
807 # The try/catch works around a minor bit of
808 # over-conscientiousness in the curses module, the error
809 # return from C start_color() is ignorable.
815 # Hide cursor in extra statement as some monochrome terminals
816 # might support hiding but not colors.
822 curses
.use_default_colors()
825 def __exit__(self
, *exception
):
826 """Resets the terminal to its normal state. Based on curses.wrappre
827 implementation from the Python standard library."""
829 self
.screen
.keypad(0)
834 def update_drilldown(self
):
835 """Sets or removes a filter that only allows fields without braces."""
836 if not self
.stats
.fields_filter
:
837 self
.stats
.fields_filter
= r
'^[^\(]*$'
839 elif self
.stats
.fields_filter
== r
'^[^\(]*$':
840 self
.stats
.fields_filter
= None
842 def update_pid(self
, pid
):
843 """Propagates pid selection to stats object."""
844 self
.stats
.pid_filter
= pid
846 def refresh(self
, sleeptime
):
847 """Refreshes on-screen data."""
849 if self
.stats
.pid_filter
> 0:
850 self
.screen
.addstr(0, 0, 'kvm statistics - pid {0}'
851 .format(self
.stats
.pid_filter
),
854 self
.screen
.addstr(0, 0, 'kvm statistics - summary', curses
.A_BOLD
)
855 self
.screen
.addstr(2, 1, 'Event')
856 self
.screen
.addstr(2, 1 + LABEL_WIDTH
+ NUMBER_WIDTH
-
857 len('Total'), 'Total')
858 self
.screen
.addstr(2, 1 + LABEL_WIDTH
+ NUMBER_WIDTH
+ 8 -
859 len('Current'), 'Current')
861 stats
= self
.stats
.get()
864 return (-stats
[x
][1], -stats
[x
][0])
866 return (0, -stats
[x
][0])
867 for key
in sorted(stats
.keys(), key
=sortkey
):
869 if row
>= self
.screen
.getmaxyx()[0]:
872 if not values
[0] and not values
[1]:
875 self
.screen
.addstr(row
, col
, key
)
877 self
.screen
.addstr(row
, col
, '%10d' % (values
[0],))
879 if values
[1] is not None:
880 self
.screen
.addstr(row
, col
, '%8d' % (values
[1] / sleeptime
,))
882 self
.screen
.refresh()
884 def show_filter_selection(self
):
885 """Draws filter selection mask.
887 Asks for a valid regex and sets the fields filter accordingly.
892 self
.screen
.addstr(0, 0,
893 "Show statistics for events matching a regex.",
895 self
.screen
.addstr(2, 0,
897 .format(self
.stats
.fields_filter
))
898 self
.screen
.addstr(3, 0, "New regex: ")
900 regex
= self
.screen
.getstr()
906 self
.stats
.fields_filter
= regex
911 def show_vm_selection(self
):
912 """Draws PID selection mask.
914 Asks for a pid until a valid pid or 0 has been entered.
919 self
.screen
.addstr(0, 0,
920 'Show statistics for specific pid.',
922 self
.screen
.addstr(1, 0,
923 'This might limit the shown data to the trace '
927 self
.screen
.addstr(3, 0, "Pid [0 or pid]: ")
928 pid
= self
.screen
.getstr()
938 if not os
.path
.isdir(os
.path
.join('/proc/', str(pid
))):
947 def show_stats(self
):
948 """Refreshes the screen and processes user input."""
951 self
.refresh(sleeptime
)
952 curses
.halfdelay(int(sleeptime
* 10))
955 char
= self
.screen
.getkey()
957 self
.drilldown
= not self
.drilldown
958 self
.update_drilldown()
962 self
.show_filter_selection()
964 self
.show_vm_selection()
965 except KeyboardInterrupt:
971 """Prints statistics in a key, value format."""
975 for key
in sorted(s
.keys()):
977 print '%-42s%10d%10d' % (key
, values
[0], values
[1])
980 """Prints statistics as reiterating key block, multiple value blocks."""
981 keys
= sorted(stats
.get().iterkeys())
989 print ' %9d' % s
[k
][1],
995 if line
% banner_repeat
== 0:
1001 """Returns processed program arguments."""
1002 description_text
= """
1003 This script displays various statistics about VMs running under KVM.
1004 The statistics are gathered from the KVM debugfs entries and / or the
1005 currently available perf traces.
1007 The monitoring takes additional cpu cycles and might affect the VM's
1012 /sys/kernel/debug/kvm
1013 /sys/kernel/debug/trace/events/*
1015 - /proc/sys/kernel/perf_event_paranoid < 1 if user has no
1016 CAP_SYS_ADMIN and perf events are used.
1017 - CAP_SYS_RESOURCE if the hard limit is not high enough to allow
1018 the large number of files that are possibly opened.
1021 class PlainHelpFormatter(optparse
.IndentedHelpFormatter
):
1022 def format_description(self
, description
):
1024 return description
+ "\n"
1028 optparser
= optparse
.OptionParser(description
=description_text
,
1029 formatter
=PlainHelpFormatter())
1030 optparser
.add_option('-1', '--once', '--batch',
1031 action
='store_true',
1034 help='run in batch mode for one second',
1036 optparser
.add_option('-l', '--log',
1037 action
='store_true',
1040 help='run in logging mode (like vmstat)',
1042 optparser
.add_option('-t', '--tracepoints',
1043 action
='store_true',
1046 help='retrieve statistics from tracepoints',
1048 optparser
.add_option('-d', '--debugfs',
1049 action
='store_true',
1052 help='retrieve statistics from debugfs',
1054 optparser
.add_option('-f', '--fields',
1058 help='fields to display (regex)',
1060 optparser
.add_option('-p', '--pid',
1065 help='restrict statistics to pid',
1067 (options
, _
) = optparser
.parse_args(sys
.argv
)
1070 def get_providers(options
):
1071 """Returns a list of data providers depending on the passed options."""
1074 if options
.tracepoints
:
1075 providers
.append(TracepointProvider())
1077 providers
.append(DebugfsProvider())
1078 if len(providers
) == 0:
1079 providers
.append(TracepointProvider())
1083 def check_access(options
):
1084 """Exits if the current user can't access all needed directories."""
1085 if not os
.path
.exists('/sys/kernel/debug'):
1086 sys
.stderr
.write('Please enable CONFIG_DEBUG_FS in your kernel.')
1089 if not os
.path
.exists(PATH_DEBUGFS_KVM
):
1090 sys
.stderr
.write("Please make sure, that debugfs is mounted and "
1091 "readable by the current user:\n"
1092 "('mount -t debugfs debugfs /sys/kernel/debug')\n"
1093 "Also ensure, that the kvm modules are loaded.\n")
1096 if not os
.path
.exists(PATH_DEBUGFS_TRACING
) and (options
.tracepoints
1097 or not options
.debugfs
):
1098 sys
.stderr
.write("Please enable CONFIG_TRACING in your kernel "
1099 "when using the option -t (default).\n"
1100 "If it is enabled, make {0} readable by the "
1102 .format(PATH_DEBUGFS_TRACING
))
1103 if options
.tracepoints
:
1106 sys
.stderr
.write("Falling back to debugfs statistics!\n")
1107 options
.debugfs
= True
1113 options
= get_options()
1114 options
= check_access(options
)
1116 if (options
.pid
> 0 and
1117 not os
.path
.isdir(os
.path
.join('/proc/',
1118 str(options
.pid
)))):
1119 sys
.stderr
.write('Did you use a (unsupported) tid instead of a pid?\n')
1120 sys
.exit('Specified pid does not exist.')
1122 providers
= get_providers(options
)
1123 stats
= Stats(providers
, options
.pid
, fields
=options
.fields
)
1127 elif not options
.once
:
1128 with
Tui(stats
) as tui
:
1133 if __name__
== "__main__":